feat: download pptx

2025-08-25 05:58:04 +05:30 · 2025-08-25 05:58:04 +05:30 · aacc6549fb
parent 7a67ebe746
commit aacc6549fb
4 changed files with 1171 additions and 3 deletions
--- a/backend/sandbox/docker/html_to_pptx_router.py
+++ b/backend/sandbox/docker/html_to_pptx_router.py
--- a/backend/sandbox/docker/requirements.txt
+++ b/backend/sandbox/docker/requirements.txt
@ -7,4 +7,5 @@ pytesseract==0.3.13
 pandas==2.3.0
 playwright>=1.40.0
 PyPDF2>=3.0.0
-bs4==0.0.2
+bs4==0.0.2
+python-pptx>=0.6.23
--- a/backend/sandbox/docker/server.py
+++ b/backend/sandbox/docker/server.py
@ -5,9 +5,10 @@ import uvicorn
 import os
 from pathlib import Path

-# Import PDF router and Visual HTML Editor router
+# Import PDF router, PPTX router, and Visual HTML Editor router
 from html_to_pdf_router import router as pdf_router
 from visual_html_editor_router import router as editor_router
+from html_to_pptx_router import router as pptx_router

 # Ensure we're serving from the /workspace directory
 workspace_dir = "/workspace"
@ -26,6 +27,7 @@ app.add_middleware(WorkspaceDirMiddleware)
 # Include routers
 app.include_router(pdf_router)
 app.include_router(editor_router)
+app.include_router(pptx_router)

 # Create output directory for generated PDFs (needed by PDF router)
 output_dir = Path("generated_pdfs")
--- a/backend/sandbox/docker/visual_html_editor_router.py
+++ b/backend/sandbox/docker/visual_html_editor_router.py
@ -14,7 +14,7 @@ from fastapi import APIRouter, HTTPException
 from fastapi.responses import HTMLResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
-from bs4 import BeautifulSoup, NavigableString
+from bs4 import BeautifulSoup, NavigableString, Comment

 # Create router
 router = APIRouter(prefix="/api/html", tags=["visual-editor"])
@ -78,6 +78,24 @@ async def get_editable_elements(file_path: str):
        # Find all elements that could contain text
        all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
        
+        # Filter out elements that only contain comments
+        filtered_elements = []
+        for element in all_elements:
+            # Check if element only contains comments
+            only_comments = True
+            for child in element.children:
+                if isinstance(child, Comment):
+                    continue
+                if isinstance(child, NavigableString) and not child.strip():
+                    continue
+                only_comments = False
+                break
+                
+            if not only_comments:
+                filtered_elements.append(element)
+                
+        all_elements = filtered_elements
+        
        for element in all_elements:
            # Strategy 1: Elements with ONLY text content (no child elements)
            if element.string and element.string.strip():
@ -99,6 +117,9 @@ async def get_editable_elements(file_path: str):
                has_mixed_content = False
                # Process each child node
                for child in list(element.contents):  # Use list() to avoid modification during iteration
+                    # Skip comment nodes (Comments are a subclass of NavigableString)
+                    if isinstance(child, Comment):
+                        continue
                    # Check if it's a NavigableString (raw text) with actual content
                    if (isinstance(child, NavigableString) and child.strip()):
                        
@ -335,6 +356,24 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
    # Find all elements that could contain text
    all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
    
+    # Filter out elements that only contain comments
+    filtered_elements = []
+    for element in all_elements:
+        # Check if element only contains comments
+        only_comments = True
+        for child in element.children:
+            if isinstance(child, Comment):
+                continue
+            if isinstance(child, NavigableString) and not child.strip():
+                continue
+            only_comments = False
+            break
+            
+        if not only_comments:
+            filtered_elements.append(element)
+            
+    all_elements = filtered_elements
+    
    for element in all_elements:
        # Strategy 1: Elements with ONLY text content (no child elements)
        if element.string and element.string.strip():
@ -347,6 +386,9 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
            has_mixed_content = False
            # Process each child node
            for child in list(element.contents):  # Use list() to avoid modification during iteration
+                # Skip comment nodes (Comments are a subclass of NavigableString)
+                if isinstance(child, Comment):
+                    continue
                # Check if it's a NavigableString (raw text) with actual content
                if (isinstance(child, NavigableString) and child.strip()):