mirror of https://github.com/kortix-ai/suna.git
feat: download pptx
This commit is contained in:
parent
7a67ebe746
commit
aacc6549fb
File diff suppressed because it is too large
Load Diff
|
@ -7,4 +7,5 @@ pytesseract==0.3.13
|
||||||
pandas==2.3.0
|
pandas==2.3.0
|
||||||
playwright>=1.40.0
|
playwright>=1.40.0
|
||||||
PyPDF2>=3.0.0
|
PyPDF2>=3.0.0
|
||||||
bs4==0.0.2
|
bs4==0.0.2
|
||||||
|
python-pptx>=0.6.23
|
|
@ -5,9 +5,10 @@ import uvicorn
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Import PDF router and Visual HTML Editor router
|
# Import PDF router, PPTX router, and Visual HTML Editor router
|
||||||
from html_to_pdf_router import router as pdf_router
|
from html_to_pdf_router import router as pdf_router
|
||||||
from visual_html_editor_router import router as editor_router
|
from visual_html_editor_router import router as editor_router
|
||||||
|
from html_to_pptx_router import router as pptx_router
|
||||||
|
|
||||||
# Ensure we're serving from the /workspace directory
|
# Ensure we're serving from the /workspace directory
|
||||||
workspace_dir = "/workspace"
|
workspace_dir = "/workspace"
|
||||||
|
@ -26,6 +27,7 @@ app.add_middleware(WorkspaceDirMiddleware)
|
||||||
# Include routers
|
# Include routers
|
||||||
app.include_router(pdf_router)
|
app.include_router(pdf_router)
|
||||||
app.include_router(editor_router)
|
app.include_router(editor_router)
|
||||||
|
app.include_router(pptx_router)
|
||||||
|
|
||||||
# Create output directory for generated PDFs (needed by PDF router)
|
# Create output directory for generated PDFs (needed by PDF router)
|
||||||
output_dir = Path("generated_pdfs")
|
output_dir = Path("generated_pdfs")
|
||||||
|
|
|
@ -14,7 +14,7 @@ from fastapi import APIRouter, HTTPException
|
||||||
from fastapi.responses import HTMLResponse
|
from fastapi.responses import HTMLResponse
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from bs4 import BeautifulSoup, NavigableString
|
from bs4 import BeautifulSoup, NavigableString, Comment
|
||||||
|
|
||||||
# Create router
|
# Create router
|
||||||
router = APIRouter(prefix="/api/html", tags=["visual-editor"])
|
router = APIRouter(prefix="/api/html", tags=["visual-editor"])
|
||||||
|
@ -78,6 +78,24 @@ async def get_editable_elements(file_path: str):
|
||||||
# Find all elements that could contain text
|
# Find all elements that could contain text
|
||||||
all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
|
all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
|
||||||
|
|
||||||
|
# Filter out elements that only contain comments
|
||||||
|
filtered_elements = []
|
||||||
|
for element in all_elements:
|
||||||
|
# Check if element only contains comments
|
||||||
|
only_comments = True
|
||||||
|
for child in element.children:
|
||||||
|
if isinstance(child, Comment):
|
||||||
|
continue
|
||||||
|
if isinstance(child, NavigableString) and not child.strip():
|
||||||
|
continue
|
||||||
|
only_comments = False
|
||||||
|
break
|
||||||
|
|
||||||
|
if not only_comments:
|
||||||
|
filtered_elements.append(element)
|
||||||
|
|
||||||
|
all_elements = filtered_elements
|
||||||
|
|
||||||
for element in all_elements:
|
for element in all_elements:
|
||||||
# Strategy 1: Elements with ONLY text content (no child elements)
|
# Strategy 1: Elements with ONLY text content (no child elements)
|
||||||
if element.string and element.string.strip():
|
if element.string and element.string.strip():
|
||||||
|
@ -99,6 +117,9 @@ async def get_editable_elements(file_path: str):
|
||||||
has_mixed_content = False
|
has_mixed_content = False
|
||||||
# Process each child node
|
# Process each child node
|
||||||
for child in list(element.contents): # Use list() to avoid modification during iteration
|
for child in list(element.contents): # Use list() to avoid modification during iteration
|
||||||
|
# Skip comment nodes (Comments are a subclass of NavigableString)
|
||||||
|
if isinstance(child, Comment):
|
||||||
|
continue
|
||||||
# Check if it's a NavigableString (raw text) with actual content
|
# Check if it's a NavigableString (raw text) with actual content
|
||||||
if (isinstance(child, NavigableString) and child.strip()):
|
if (isinstance(child, NavigableString) and child.strip()):
|
||||||
|
|
||||||
|
@ -335,6 +356,24 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
|
||||||
# Find all elements that could contain text
|
# Find all elements that could contain text
|
||||||
all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
|
all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
|
||||||
|
|
||||||
|
# Filter out elements that only contain comments
|
||||||
|
filtered_elements = []
|
||||||
|
for element in all_elements:
|
||||||
|
# Check if element only contains comments
|
||||||
|
only_comments = True
|
||||||
|
for child in element.children:
|
||||||
|
if isinstance(child, Comment):
|
||||||
|
continue
|
||||||
|
if isinstance(child, NavigableString) and not child.strip():
|
||||||
|
continue
|
||||||
|
only_comments = False
|
||||||
|
break
|
||||||
|
|
||||||
|
if not only_comments:
|
||||||
|
filtered_elements.append(element)
|
||||||
|
|
||||||
|
all_elements = filtered_elements
|
||||||
|
|
||||||
for element in all_elements:
|
for element in all_elements:
|
||||||
# Strategy 1: Elements with ONLY text content (no child elements)
|
# Strategy 1: Elements with ONLY text content (no child elements)
|
||||||
if element.string and element.string.strip():
|
if element.string and element.string.strip():
|
||||||
|
@ -347,6 +386,9 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
|
||||||
has_mixed_content = False
|
has_mixed_content = False
|
||||||
# Process each child node
|
# Process each child node
|
||||||
for child in list(element.contents): # Use list() to avoid modification during iteration
|
for child in list(element.contents): # Use list() to avoid modification during iteration
|
||||||
|
# Skip comment nodes (Comments are a subclass of NavigableString)
|
||||||
|
if isinstance(child, Comment):
|
||||||
|
continue
|
||||||
# Check if it's a NavigableString (raw text) with actual content
|
# Check if it's a NavigableString (raw text) with actual content
|
||||||
if (isinstance(child, NavigableString) and child.strip()):
|
if (isinstance(child, NavigableString) and child.strip()):
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue