feat: download pptx

This commit is contained in:
Krishav Raj Singh 2025-08-25 05:58:04 +05:30
parent 7a67ebe746
commit aacc6549fb
4 changed files with 1171 additions and 3 deletions

File diff suppressed because it is too large Load Diff

View File

@ -7,4 +7,5 @@ pytesseract==0.3.13
pandas==2.3.0 pandas==2.3.0
playwright>=1.40.0 playwright>=1.40.0
PyPDF2>=3.0.0 PyPDF2>=3.0.0
bs4==0.0.2 bs4==0.0.2
python-pptx>=0.6.23

View File

@ -5,9 +5,10 @@ import uvicorn
import os import os
from pathlib import Path from pathlib import Path
# Import PDF router and Visual HTML Editor router # Import PDF router, PPTX router, and Visual HTML Editor router
from html_to_pdf_router import router as pdf_router from html_to_pdf_router import router as pdf_router
from visual_html_editor_router import router as editor_router from visual_html_editor_router import router as editor_router
from html_to_pptx_router import router as pptx_router
# Ensure we're serving from the /workspace directory # Ensure we're serving from the /workspace directory
workspace_dir = "/workspace" workspace_dir = "/workspace"
@ -26,6 +27,7 @@ app.add_middleware(WorkspaceDirMiddleware)
# Include routers # Include routers
app.include_router(pdf_router) app.include_router(pdf_router)
app.include_router(editor_router) app.include_router(editor_router)
app.include_router(pptx_router)
# Create output directory for generated PDFs (needed by PDF router) # Create output directory for generated PDFs (needed by PDF router)
output_dir = Path("generated_pdfs") output_dir = Path("generated_pdfs")

View File

@ -14,7 +14,7 @@ from fastapi import APIRouter, HTTPException
from fastapi.responses import HTMLResponse from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel from pydantic import BaseModel
from bs4 import BeautifulSoup, NavigableString from bs4 import BeautifulSoup, NavigableString, Comment
# Create router # Create router
router = APIRouter(prefix="/api/html", tags=["visual-editor"]) router = APIRouter(prefix="/api/html", tags=["visual-editor"])
@ -78,6 +78,24 @@ async def get_editable_elements(file_path: str):
# Find all elements that could contain text # Find all elements that could contain text
all_elements = soup.find_all(TEXT_ELEMENTS + ['div']) all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
# Filter out elements that only contain comments
filtered_elements = []
for element in all_elements:
# Check if element only contains comments
only_comments = True
for child in element.children:
if isinstance(child, Comment):
continue
if isinstance(child, NavigableString) and not child.strip():
continue
only_comments = False
break
if not only_comments:
filtered_elements.append(element)
all_elements = filtered_elements
for element in all_elements: for element in all_elements:
# Strategy 1: Elements with ONLY text content (no child elements) # Strategy 1: Elements with ONLY text content (no child elements)
if element.string and element.string.strip(): if element.string and element.string.strip():
@ -99,6 +117,9 @@ async def get_editable_elements(file_path: str):
has_mixed_content = False has_mixed_content = False
# Process each child node # Process each child node
for child in list(element.contents): # Use list() to avoid modification during iteration for child in list(element.contents): # Use list() to avoid modification during iteration
# Skip comment nodes (Comments are a subclass of NavigableString)
if isinstance(child, Comment):
continue
# Check if it's a NavigableString (raw text) with actual content # Check if it's a NavigableString (raw text) with actual content
if (isinstance(child, NavigableString) and child.strip()): if (isinstance(child, NavigableString) and child.strip()):
@ -335,6 +356,24 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
# Find all elements that could contain text # Find all elements that could contain text
all_elements = soup.find_all(TEXT_ELEMENTS + ['div']) all_elements = soup.find_all(TEXT_ELEMENTS + ['div'])
# Filter out elements that only contain comments
filtered_elements = []
for element in all_elements:
# Check if element only contains comments
only_comments = True
for child in element.children:
if isinstance(child, Comment):
continue
if isinstance(child, NavigableString) and not child.strip():
continue
only_comments = False
break
if not only_comments:
filtered_elements.append(element)
all_elements = filtered_elements
for element in all_elements: for element in all_elements:
# Strategy 1: Elements with ONLY text content (no child elements) # Strategy 1: Elements with ONLY text content (no child elements)
if element.string and element.string.strip(): if element.string and element.string.strip():
@ -347,6 +386,9 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
has_mixed_content = False has_mixed_content = False
# Process each child node # Process each child node
for child in list(element.contents): # Use list() to avoid modification during iteration for child in list(element.contents): # Use list() to avoid modification during iteration
# Skip comment nodes (Comments are a subclass of NavigableString)
if isinstance(child, Comment):
continue
# Check if it's a NavigableString (raw text) with actual content # Check if it's a NavigableString (raw text) with actual content
if (isinstance(child, NavigableString) and child.strip()): if (isinstance(child, NavigableString) and child.strip()):