mirror of https://github.com/kortix-ai/suna.git
feat: download pptx
This commit is contained in:
parent
7a67ebe746
commit
aacc6549fb
File diff suppressed because it is too large
Load Diff
|
@ -7,4 +7,5 @@ pytesseract==0.3.13
|
|||
pandas==2.3.0
|
||||
playwright>=1.40.0
|
||||
PyPDF2>=3.0.0
|
||||
bs4==0.0.2
|
||||
bs4==0.0.2
|
||||
python-pptx>=0.6.23
|
|
@ -5,9 +5,10 @@ import uvicorn
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Import PDF router and Visual HTML Editor router
|
||||
# Import PDF router, PPTX router, and Visual HTML Editor router
|
||||
from html_to_pdf_router import router as pdf_router
|
||||
from visual_html_editor_router import router as editor_router
|
||||
from html_to_pptx_router import router as pptx_router
|
||||
|
||||
# Ensure we're serving from the /workspace directory
|
||||
workspace_dir = "/workspace"
|
||||
|
@ -26,6 +27,7 @@ app.add_middleware(WorkspaceDirMiddleware)
|
|||
# Include routers
|
||||
app.include_router(pdf_router)
|
||||
app.include_router(editor_router)
|
||||
app.include_router(pptx_router)
|
||||
|
||||
# Create output directory for generated PDFs (needed by PDF router)
|
||||
output_dir = Path("generated_pdfs")
|
||||
|
|
|
@ -14,7 +14,7 @@ from fastapi import APIRouter, HTTPException
|
|||
from fastapi.responses import HTMLResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
from bs4 import BeautifulSoup, NavigableString, Comment
|
||||
|
||||
# Create router
|
||||
router = APIRouter(prefix="/api/html", tags=["visual-editor"])
|
||||
|
@ -78,6 +78,24 @@ async def get_editable_elements(file_path: str):
|
|||
# Find all elements that could contain text
|
||||
all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
|
||||
|
||||
# Filter out elements that only contain comments
|
||||
filtered_elements = []
|
||||
for element in all_elements:
|
||||
# Check if element only contains comments
|
||||
only_comments = True
|
||||
for child in element.children:
|
||||
if isinstance(child, Comment):
|
||||
continue
|
||||
if isinstance(child, NavigableString) and not child.strip():
|
||||
continue
|
||||
only_comments = False
|
||||
break
|
||||
|
||||
if not only_comments:
|
||||
filtered_elements.append(element)
|
||||
|
||||
all_elements = filtered_elements
|
||||
|
||||
for element in all_elements:
|
||||
# Strategy 1: Elements with ONLY text content (no child elements)
|
||||
if element.string and element.string.strip():
|
||||
|
@ -99,6 +117,9 @@ async def get_editable_elements(file_path: str):
|
|||
has_mixed_content = False
|
||||
# Process each child node
|
||||
for child in list(element.contents): # Use list() to avoid modification during iteration
|
||||
# Skip comment nodes (Comments are a subclass of NavigableString)
|
||||
if isinstance(child, Comment):
|
||||
continue
|
||||
# Check if it's a NavigableString (raw text) with actual content
|
||||
if (isinstance(child, NavigableString) and child.strip()):
|
||||
|
||||
|
@ -335,6 +356,24 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
|
|||
# Find all elements that could contain text
|
||||
all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
|
||||
|
||||
# Filter out elements that only contain comments
|
||||
filtered_elements = []
|
||||
for element in all_elements:
|
||||
# Check if element only contains comments
|
||||
only_comments = True
|
||||
for child in element.children:
|
||||
if isinstance(child, Comment):
|
||||
continue
|
||||
if isinstance(child, NavigableString) and not child.strip():
|
||||
continue
|
||||
only_comments = False
|
||||
break
|
||||
|
||||
if not only_comments:
|
||||
filtered_elements.append(element)
|
||||
|
||||
all_elements = filtered_elements
|
||||
|
||||
for element in all_elements:
|
||||
# Strategy 1: Elements with ONLY text content (no child elements)
|
||||
if element.string and element.string.strip():
|
||||
|
@ -347,6 +386,9 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
|
|||
has_mixed_content = False
|
||||
# Process each child node
|
||||
for child in list(element.contents): # Use list() to avoid modification during iteration
|
||||
# Skip comment nodes (Comments are a subclass of NavigableString)
|
||||
if isinstance(child, Comment):
|
||||
continue
|
||||
# Check if it's a NavigableString (raw text) with actual content
|
||||
if (isinstance(child, NavigableString) and child.strip()):
|
||||
|
||||
|
|
Loading…
Reference in New Issue