feat: download pptx

2025-08-25 05:58:04 +05:30 · 2025-08-25 05:58:04 +05:30 · aacc6549fb
parent 7a67ebe746
commit aacc6549fb
4 changed files with 1171 additions and 3 deletions
--- a/backend/sandbox/docker/html_to_pptx_router.py
+++ b/backend/sandbox/docker/html_to_pptx_router.py
--- a/backend/sandbox/docker/requirements.txt
+++ b/backend/sandbox/docker/requirements.txt
@ -7,4 +7,5 @@ pytesseract==0.3.13
 pandas==2.3.0
 playwright>=1.40.0
 PyPDF2>=3.0.0
-bs4==0.0.2
+bs4==0.0.2
 python-pptx>=0.6.23
--- a/backend/sandbox/docker/server.py
+++ b/backend/sandbox/docker/server.py
@ -5,9 +5,10 @@ import uvicorn
 import os
 from pathlib import Path
-# Import PDF router and Visual HTML Editor router
+# Import PDF router, PPTX router, and Visual HTML Editor router
 from html_to_pdf_router import router as pdf_router
 from visual_html_editor_router import router as editor_router
 from html_to_pptx_router import router as pptx_router
 # Ensure we're serving from the /workspace directory
 workspace_dir = "/workspace"
@ -26,6 +27,7 @@ app.add_middleware(WorkspaceDirMiddleware)
 # Include routers
 app.include_router(pdf_router)
 app.include_router(editor_router)
 app.include_router(pptx_router)
 # Create output directory for generated PDFs (needed by PDF router)
 output_dir = Path("generated_pdfs")
--- a/backend/sandbox/docker/visual_html_editor_router.py
+++ b/backend/sandbox/docker/visual_html_editor_router.py
@ -14,7 +14,7 @@ from fastapi import APIRouter, HTTPException
 from fastapi.responses import HTMLResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
-from bs4 import BeautifulSoup, NavigableString
+from bs4 import BeautifulSoup, NavigableString, Comment
 # Create router
 router = APIRouter(prefix="/api/html", tags=["visual-editor"])
@ -78,6 +78,24 @@ async def get_editable_elements(file_path: str):
        # Find all elements that could contain text
        all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
        # Filter out elements that only contain comments
        filtered_elements = []
        for element in all_elements:
            # Check if element only contains comments
            only_comments = True
            for child in element.children:
                if isinstance(child, Comment):
                    continue
                if isinstance(child, NavigableString) and not child.strip():
                    continue
                only_comments = False
                break
            if not only_comments:
                filtered_elements.append(element)
        all_elements = filtered_elements
        for element in all_elements:
            # Strategy 1: Elements with ONLY text content (no child elements)
            if element.string and element.string.strip():
@ -99,6 +117,9 @@ async def get_editable_elements(file_path: str):
                has_mixed_content = False
                # Process each child node
                for child in list(element.contents):  # Use list() to avoid modification during iteration
                    # Skip comment nodes (Comments are a subclass of NavigableString)
                    if isinstance(child, Comment):
                        continue
                    # Check if it's a NavigableString (raw text) with actual content
                    if (isinstance(child, NavigableString) and child.strip()):
@ -335,6 +356,24 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
    # Find all elements that could contain text
    all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
    # Filter out elements that only contain comments
    filtered_elements = []
    for element in all_elements:
        # Check if element only contains comments
        only_comments = True
        for child in element.children:
            if isinstance(child, Comment):
                continue
            if isinstance(child, NavigableString) and not child.strip():
                continue
            only_comments = False
            break
        if not only_comments:
            filtered_elements.append(element)
    all_elements = filtered_elements
    for element in all_elements:
        # Strategy 1: Elements with ONLY text content (no child elements)
        if element.string and element.string.strip():
@ -347,6 +386,9 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
            has_mixed_content = False
            # Process each child node
            for child in list(element.contents):  # Use list() to avoid modification during iteration
                # Skip comment nodes (Comments are a subclass of NavigableString)
                if isinstance(child, Comment):
                    continue
                # Check if it's a NavigableString (raw text) with actual content
                if (isinstance(child, NavigableString) and child.strip()):