diff --git a/backend/sandbox/docker/html_to_pdf_router.py b/backend/sandbox/docker/html_to_pdf_router.py new file mode 100644 index 00000000..65995d9e --- /dev/null +++ b/backend/sandbox/docker/html_to_pdf_router.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python3 +""" +FastAPI HTML Presentation to PDF Converter Router + +Provides PDF conversion endpoints as a FastAPI router that can be included in other applications. +""" + +import json +import asyncio +from pathlib import Path +from typing import Dict, List +import tempfile + +from fastapi import APIRouter, HTTPException +from fastapi.responses import Response +from pydantic import BaseModel, Field + +try: + from playwright.async_api import async_playwright +except ImportError: + raise ImportError("Playwright is not installed. Please install it with: pip install playwright") + +try: + from PyPDF2 import PdfWriter, PdfReader +except ImportError: + raise ImportError("PyPDF2 is not installed. Please install it with: pip install PyPDF2") + + +# Create router +router = APIRouter(prefix="/presentation", tags=["pdf-conversion"]) + +# Create output directory for generated PDFs +output_dir = Path("generated_pdfs") +output_dir.mkdir(exist_ok=True) + + +class ConvertRequest(BaseModel): + presentation_path: str = Field(..., description="Path to the presentation folder containing metadata.json") + download: bool = Field(False, description="If true, returns the PDF file directly. If false, returns JSON with download URL.") + + +class ConvertResponse(BaseModel): + success: bool + message: str + pdf_url: str + filename: str + total_slides: int + + +class PresentationToPDFAPI: + def __init__(self, presentation_dir: str): + """Initialize the converter with presentation directory.""" + self.presentation_dir = Path(presentation_dir).resolve() + self.metadata_path = self.presentation_dir / "metadata.json" + self.metadata = None + self.slides_info = [] + + # Validate inputs + if not self.presentation_dir.exists(): + raise FileNotFoundError(f"Presentation directory not found: {self.presentation_dir}") + + if not self.metadata_path.exists(): + raise FileNotFoundError(f"metadata.json not found in: {self.presentation_dir}") + + def load_metadata(self) -> Dict: + """Load and parse metadata.json""" + try: + with open(self.metadata_path, 'r', encoding='utf-8') as f: + self.metadata = json.load(f) + + # Extract slide information and sort by slide number + slides = self.metadata.get('slides', {}) + self.slides_info = [] + + for slide_num, slide_data in slides.items(): + filename = slide_data.get('filename') + title = slide_data.get('title', f'Slide {slide_num}') + + if filename: + # Treat filename as absolute path only + html_path = Path(filename) + print(f"Using path: {html_path}") + + # Verify the path exists + if html_path.exists(): + self.slides_info.append({ + 'number': int(slide_num), + 'title': title, + 'filename': filename, + 'path': html_path + }) + print(f"Added slide {slide_num}: {html_path}") + else: + print(f"Warning: HTML file does not exist: {html_path}") + + # Sort slides by number + self.slides_info.sort(key=lambda x: x['number']) + + if not self.slides_info: + raise ValueError("No valid slides found in metadata.json") + + return self.metadata + + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON in metadata.json: {e}") + except Exception as e: + raise ValueError(f"Error loading metadata: {e}") + + async def render_slide_to_pdf(self, browser, slide_info: Dict, temp_dir: Path) -> Path: + """Render a single HTML slide to PDF using Playwright.""" + html_path = slide_info['path'] + slide_num = slide_info['number'] + + print(f"Rendering slide {slide_num}: {slide_info['title']}") + + # Create new page with exact presentation dimensions + page = await browser.new_page() + + try: + # Set exact viewport to 1920x1080 + await page.set_viewport_size({"width": 1920, "height": 1080}) + await page.emulate_media(media='screen') + + # Override device pixel ratio for exact dimensions + await page.evaluate(""" + () => { + Object.defineProperty(window, 'devicePixelRatio', { + get: () => 1 + }); + } + """) + + # Navigate to the HTML file + file_url = f"file://{html_path.absolute()}" + await page.goto(file_url, wait_until="networkidle", timeout=30000) + + # Wait for fonts and dynamic content to load + await page.wait_for_timeout(3000) + + # Ensure exact slide dimensions + await page.evaluate(""" + () => { + const slideContainer = document.querySelector('.slide-container'); + if (slideContainer) { + slideContainer.style.width = '1920px'; + slideContainer.style.height = '1080px'; + slideContainer.style.transform = 'none'; + slideContainer.style.maxWidth = 'none'; + slideContainer.style.maxHeight = 'none'; + } + + document.body.style.margin = '0'; + document.body.style.padding = '0'; + document.body.style.width = '1920px'; + document.body.style.height = '1080px'; + document.body.style.overflow = 'hidden'; + } + """) + + await page.wait_for_timeout(1000) + + # Generate PDF for this slide + temp_pdf_path = temp_dir / f"slide_{slide_num:02d}.pdf" + + await page.pdf( + path=str(temp_pdf_path), + width="1920px", + height="1080px", + margin={"top": "0", "right": "0", "bottom": "0", "left": "0"}, + print_background=True, + prefer_css_page_size=False + ) + + print(f" ā Slide {slide_num} rendered") + return temp_pdf_path + + except Exception as e: + raise RuntimeError(f"Error rendering slide {slide_num}: {e}") + finally: + await page.close() + + def combine_pdfs(self, pdf_paths: List[Path], output_path: Path) -> None: + """Combine multiple PDF files into a single PDF.""" + print(f"Combining {len(pdf_paths)} PDFs...") + + pdf_writer = PdfWriter() + + try: + for pdf_path in pdf_paths: + if pdf_path.exists(): + with open(pdf_path, 'rb') as pdf_file: + pdf_reader = PdfReader(pdf_file) + for page in pdf_reader.pages: + pdf_writer.add_page(page) + + # Write the combined PDF + with open(output_path, 'wb') as output_file: + pdf_writer.write(output_file) + + print(f"ā PDF created: {output_path}") + + except Exception as e: + raise RuntimeError(f"Error combining PDFs: {e}") + + async def convert_to_pdf(self, store_locally: bool = True) -> tuple: + """Main conversion method with concurrent processing.""" + print("š Starting concurrent HTML to PDF conversion...") + + # Load metadata + self.load_metadata() + + # Create temporary directory for intermediate files + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Launch browser + async with async_playwright() as p: + print("š Launching browser...") + browser = await p.chromium.launch( + headless=True, + args=[ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-gpu', + '--force-device-scale-factor=1', + '--disable-background-timer-throttling' + ] + ) + + try: + # Process all slides concurrently using asyncio.gather + print(f"š Processing {len(self.slides_info)} slides concurrently...") + + tasks = [ + self.render_slide_to_pdf(browser, slide_info, temp_path) + for slide_info in self.slides_info + ] + + # Wait for all slides to be processed concurrently + pdf_paths = await asyncio.gather(*tasks) + + finally: + await browser.close() + + # Create output path + presentation_name = self.metadata.get('presentation_name', 'presentation') + temp_output_path = temp_path / f"{presentation_name}.pdf" + + # Combine all PDFs (sort by slide number to maintain order) + sorted_pdf_paths = sorted(pdf_paths, key=lambda p: int(p.stem.split('_')[1])) + self.combine_pdfs(sorted_pdf_paths, temp_output_path) + + if store_locally: + # Store in the static files directory for URL serving + timestamp = int(asyncio.get_event_loop().time()) + filename = f"{presentation_name}_{timestamp}.pdf" + final_output = output_dir / filename + import shutil + shutil.copy2(temp_output_path, final_output) + return final_output, len(self.slides_info) + else: + # For direct download, read file content into memory (no local storage) + with open(temp_output_path, 'rb') as f: + pdf_content = f.read() + return pdf_content, len(self.slides_info), presentation_name + + +@router.post("/convert-to-pdf") +async def convert_presentation_to_pdf(request: ConvertRequest): + """ + Convert HTML presentation to PDF with concurrent processing. + + Takes a presentation folder path and returns either: + - PDF file directly (if download=true) - uses presentation name as filename + - JSON response with download URL (if download=false, default) + """ + try: + print(f"š„ Received conversion request for: {request.presentation_path}") + + # Create converter + converter = PresentationToPDFAPI(request.presentation_path) + + # If download is requested, don't store locally and return file directly + if request.download: + pdf_content, total_slides, presentation_name = await converter.convert_to_pdf(store_locally=False) + + print(f"⨠Direct download conversion completed for: {presentation_name}") + + return Response( + content=pdf_content, + media_type="application/pdf", + headers={"Content-Disposition": f"attachment; filename=\"{presentation_name}.pdf\""} + ) + + # Otherwise, store locally and return JSON with download URL + pdf_path, total_slides = await converter.convert_to_pdf(store_locally=True) + + print(f"⨠Conversion completed: {pdf_path}") + + pdf_url = f"/downloads/{pdf_path.name}" + + return ConvertResponse( + success=True, + message=f"PDF generated successfully with {total_slides} slides", + pdf_url=pdf_url, + filename=pdf_path.name, + total_slides=total_slides + ) + + except FileNotFoundError as e: + raise HTTPException(status_code=404, detail=str(e)) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + print(f"ā Conversion error: {e}") + raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}") + + +@router.get("/health") +async def pdf_health_check(): + """PDF service health check endpoint.""" + return {"status": "healthy", "service": "HTML to PDF Converter"} \ No newline at end of file diff --git a/backend/sandbox/docker/presentation-processing-wip/convert_to_pptx_perfect.sh b/backend/sandbox/docker/presentation-processing-wip/convert_to_pptx_perfect.sh deleted file mode 100755 index e85383ad..00000000 --- a/backend/sandbox/docker/presentation-processing-wip/convert_to_pptx_perfect.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash - -# HTML to PPTX Perfect 1:1 Converter Setup and Execution Script -# This script provides PERFECT 1:1 conversion with complete background capture + editable text - -set -e # Exit on any error - -echo "šÆ HTML to PPTX Perfect 1:1 Converter" -echo "=====================================" -echo "šØ Perfect background capture + Editable text overlay" -echo "" - -# Check if Python is available -if ! command -v python3 &> /dev/null; then - echo "ā Python 3 is required but not found. Please install Python 3.7+ and try again." - exit 1 -fi - -echo "š§ Setting up dependencies..." - -# Install Python dependencies -if [ -f "requirements.txt" ]; then - echo "š¦ Installing Python packages..." - python3 -m pip install -r requirements.txt -else - echo "š¦ Installing Python packages individually..." - python3 -m pip install playwright python-pptx Pillow beautifulsoup4 lxml -fi - -# Install Playwright browsers -echo "š Installing Playwright browser..." -python3 -m playwright install chromium - -echo "ā Dependencies installed successfully!" -echo "" - -# Run the perfect conversion -echo "š Starting PERFECT 1:1 HTML to PPTX conversion..." -echo "š Method: Perfect background capture + Editable text overlay" -echo "" - -if [ $# -eq 0 ]; then - # No arguments, use current directory with perfect naming - python3 html_to_pptx_perfect.py -elif [ $# -eq 1 ]; then - # One argument (presentation directory or output file) - if [[ "$1" == *.pptx ]]; then - # If argument ends with .pptx, treat it as output filename - python3 html_to_pptx_perfect.py . "$1" - else - # Otherwise treat it as presentation directory - python3 html_to_pptx_perfect.py "$1" - fi -elif [ $# -eq 2 ]; then - # Two arguments (presentation directory and output file) - python3 html_to_pptx_perfect.py "$1" "$2" -else - echo "Usage: $0 [presentation_directory] [output_file.pptx]" - echo "" - echo "Examples:" - echo " $0 # Convert current directory (perfect mode)" - echo " $0 my_slides/ # Convert my_slides/ (perfect mode)" - echo " $0 perfect_output.pptx # Convert current directory to perfect_output.pptx" - echo " $0 my_slides/ perfect_output.pptx # Convert my_slides/ to perfect_output.pptx" - echo "" - echo "Perfect 1:1 Mode Features:" - echo " ā PERFECT visual fidelity (everything captured exactly)" - echo " ā All icons, gradients, decorations preserved" - echo " ā Fully editable text elements" - echo " ā True 1:1 conversion" - echo " ā Simple and reliable approach" - exit 1 -fi - -echo "" -echo "š PERFECT 1:1 HTML to PPTX conversion completed!" -echo "⨠Perfect backgrounds + Editable text!" diff --git a/backend/sandbox/docker/presentation-processing-wip/elon_musk_presentation.pdf b/backend/sandbox/docker/presentation-processing-wip/elon_musk_presentation.pdf new file mode 100644 index 00000000..1a6b1b6b Binary files /dev/null and b/backend/sandbox/docker/presentation-processing-wip/elon_musk_presentation.pdf differ diff --git a/backend/sandbox/docker/presentation-processing-wip/html_to_pdf.py b/backend/sandbox/docker/presentation-processing-wip/html_to_pdf.py deleted file mode 100644 index e83fbd12..00000000 --- a/backend/sandbox/docker/presentation-processing-wip/html_to_pdf.py +++ /dev/null @@ -1,358 +0,0 @@ -#!/usr/bin/env python3 -""" -HTML Presentation to PDF Converter - -This script converts HTML slides to a single PDF file based on metadata.json. -It uses Playwright to render each HTML slide at exactly 1920x1080 resolution -and combines them into a single PDF. - -Usage: - python html_to_pdf.py [presentation_directory] [output_pdf_path] - -Example: - python html_to_pdf.py . elon_musk_presentation.pdf - python html_to_pdf.py /path/to/presentation output.pdf -""" - -import json -import os -import sys -import asyncio -from pathlib import Path -from typing import Dict, List, Tuple -import tempfile -import subprocess - -try: - from playwright.async_api import async_playwright -except ImportError: - print("Error: Playwright is not installed. Please install it with:") - print("pip install playwright") - print("playwright install chromium") - sys.exit(1) - -try: - from PyPDF2 import PdfWriter, PdfReader -except ImportError: - print("Error: PyPDF2 is not installed. Please install it with:") - print("pip install PyPDF2") - sys.exit(1) - - -class PresentationToPDF: - def __init__(self, presentation_dir: str, output_path: str = None): - """ - Initialize the converter. - - Args: - presentation_dir: Directory containing metadata.json and HTML slides - output_path: Output PDF file path (optional, defaults to presentation_name.pdf) - """ - self.presentation_dir = Path(presentation_dir).resolve() - self.metadata_path = self.presentation_dir / "metadata.json" - self.output_path = output_path - self.metadata = None - self.slides_info = [] - - # Validate inputs - if not self.presentation_dir.exists(): - raise FileNotFoundError(f"Presentation directory not found: {self.presentation_dir}") - - if not self.metadata_path.exists(): - raise FileNotFoundError(f"metadata.json not found in: {self.presentation_dir}") - - def load_metadata(self) -> Dict: - """Load and parse metadata.json""" - try: - with open(self.metadata_path, 'r', encoding='utf-8') as f: - self.metadata = json.load(f) - - # Extract slide information and sort by slide number - slides = self.metadata.get('slides', {}) - self.slides_info = [] - - for slide_num, slide_data in slides.items(): - filename = slide_data.get('filename') - title = slide_data.get('title', f'Slide {slide_num}') - - if filename: - html_path = self.presentation_dir / filename - if html_path.exists(): - self.slides_info.append({ - 'number': int(slide_num), - 'title': title, - 'filename': filename, - 'path': html_path - }) - else: - print(f"Warning: HTML file not found: {html_path}") - - # Sort slides by number - self.slides_info.sort(key=lambda x: x['number']) - - if not self.slides_info: - raise ValueError("No valid slides found in metadata.json") - - # Set default output path if not provided - if not self.output_path: - presentation_name = self.metadata.get('presentation_name', 'presentation') - self.output_path = self.presentation_dir / f"{presentation_name}.pdf" - else: - self.output_path = Path(self.output_path).resolve() - - print(f"Loaded {len(self.slides_info)} slides from metadata") - return self.metadata - - except json.JSONDecodeError as e: - raise ValueError(f"Invalid JSON in metadata.json: {e}") - except Exception as e: - raise ValueError(f"Error loading metadata: {e}") - - async def render_slide_to_pdf(self, browser, slide_info: Dict, temp_dir: Path) -> Path: - """ - Render a single HTML slide to PDF using Playwright. - - Args: - browser: Playwright browser instance - slide_info: Slide information dictionary - temp_dir: Temporary directory for intermediate files - - Returns: - Path to the generated PDF file - """ - html_path = slide_info['path'] - slide_num = slide_info['number'] - - print(f"Rendering slide {slide_num}: {slide_info['title']}") - - # Create new page with exact presentation dimensions - page = await browser.new_page() - - try: - # CRITICAL: Set exact viewport to 1920x1080 - this is the key! - await page.set_viewport_size({"width": 1920, "height": 1080}) - - # Use screen media type for accurate rendering - await page.emulate_media(media='screen') - - # Disable device scale factor to ensure 1:1 pixel mapping - await page.evaluate(""" - () => { - // Override device pixel ratio to ensure exact dimensions - Object.defineProperty(window, 'devicePixelRatio', { - get: () => 1 - }); - } - """) - - # Navigate to the HTML file - file_url = f"file://{html_path.absolute()}" - await page.goto(file_url, wait_until="networkidle", timeout=30000) - - # Wait for fonts and dynamic content to fully load - await page.wait_for_timeout(3000) - - # Ensure the slide container is exactly 1920x1080 - await page.evaluate(""" - () => { - const slideContainer = document.querySelector('.slide-container'); - if (slideContainer) { - slideContainer.style.width = '1920px'; - slideContainer.style.height = '1080px'; - slideContainer.style.transform = 'none'; - slideContainer.style.maxWidth = 'none'; - slideContainer.style.maxHeight = 'none'; - } - - // Ensure body doesn't interfere with dimensions - document.body.style.margin = '0'; - document.body.style.padding = '0'; - document.body.style.width = '1920px'; - document.body.style.height = '1080px'; - document.body.style.overflow = 'hidden'; - } - """) - - # Wait a bit more for the layout adjustments - await page.wait_for_timeout(1000) - - # Generate PDF for this slide with exact dimensions - temp_pdf_path = temp_dir / f"slide_{slide_num:02d}.pdf" - - await page.pdf( - path=str(temp_pdf_path), - width="1920px", - height="1080px", - margin={"top": "0", "right": "0", "bottom": "0", "left": "0"}, - print_background=True, - prefer_css_page_size=False - ) - - print(f" ā Slide {slide_num} rendered at 1920x1080") - return temp_pdf_path - - except Exception as e: - raise RuntimeError(f"Error rendering slide {slide_num}: {e}") - finally: - await page.close() - - def combine_pdfs(self, pdf_paths: List[Path]) -> None: - """ - Combine multiple PDF files into a single PDF. - - Args: - pdf_paths: List of PDF file paths to combine - """ - print(f"Combining {len(pdf_paths)} PDFs into final output...") - - pdf_writer = PdfWriter() - - try: - for pdf_path in pdf_paths: - if not pdf_path.exists(): - print(f"Warning: PDF file not found: {pdf_path}") - continue - - with open(pdf_path, 'rb') as pdf_file: - pdf_reader = PdfReader(pdf_file) - for page in pdf_reader.pages: - pdf_writer.add_page(page) - - # Write the combined PDF - with open(self.output_path, 'wb') as output_file: - pdf_writer.write(output_file) - - print(f"ā PDF created successfully: {self.output_path}") - print(f"š Total pages: {len(pdf_writer.pages)}") - - except Exception as e: - raise RuntimeError(f"Error combining PDFs: {e}") - - async def convert_to_pdf(self) -> None: - """Main conversion method""" - print("š Starting HTML to PDF conversion...") - - # Load metadata - self.load_metadata() - - # Create temporary directory for intermediate files - with tempfile.TemporaryDirectory() as temp_dir: - temp_path = Path(temp_dir) - pdf_paths = [] - - # Launch browser with exact rendering settings - async with async_playwright() as p: - print("š Launching browser with 1920x1080 configuration...") - browser = await p.chromium.launch( - headless=True, - args=[ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - '--disable-gpu', - '--no-first-run', - '--disable-default-apps', - '--disable-web-security', - '--disable-features=TranslateUI', - '--disable-ipc-flooding-protection', - # Force device scale factor to 1 for exact pixel mapping - '--force-device-scale-factor=1', - '--disable-background-timer-throttling', - '--disable-backgrounding-occluded-windows', - '--disable-renderer-backgrounding' - ] - ) - - try: - # Process each slide - for slide_info in self.slides_info: - pdf_path = await self.render_slide_to_pdf(browser, slide_info, temp_path) - pdf_paths.append(pdf_path) - - finally: - await browser.close() - - # Combine all PDFs - self.combine_pdfs(pdf_paths) - - print("⨠Conversion completed successfully!") - - -def check_dependencies(): - """Check if required dependencies are available""" - missing_deps = [] - - try: - import playwright - except ImportError: - missing_deps.append("playwright (pip install playwright)") - - try: - import PyPDF2 - except ImportError: - missing_deps.append("PyPDF2 (pip install PyPDF2)") - - if missing_deps: - print("ā Missing dependencies:") - for dep in missing_deps: - print(f" - {dep}") - print("\nPlease install missing dependencies and try again.") - return False - - # Check if Playwright browsers are installed - try: - result = subprocess.run(['playwright', 'install', '--dry-run'], - capture_output=True, text=True, timeout=10) - if "chromium" not in result.stdout.lower(): - print("ā ļø Playwright browser not found. Please run:") - print(" playwright install chromium") - return False - except (subprocess.TimeoutExpired, subprocess.CalledProcessError, FileNotFoundError): - print("ā ļø Could not verify Playwright installation. You may need to run:") - print(" playwright install chromium") - - return True - - -def main(): - """Main CLI entry point""" - print("š HTML Presentation to PDF Converter") - print("=" * 50) - - # Check dependencies - if not check_dependencies(): - sys.exit(1) - - # Parse command line arguments - if len(sys.argv) < 2: - presentation_dir = "." - output_path = None - elif len(sys.argv) == 2: - presentation_dir = sys.argv[1] - output_path = None - elif len(sys.argv) == 3: - presentation_dir = sys.argv[1] - output_path = sys.argv[2] - else: - print("Usage: python html_to_pdf.py [presentation_directory] [output_pdf_path]") - print("\nExamples:") - print(" python html_to_pdf.py") - print(" python html_to_pdf.py . my_presentation.pdf") - print(" python html_to_pdf.py /path/to/slides output.pdf") - sys.exit(1) - - try: - # Create converter and run - converter = PresentationToPDF(presentation_dir, output_path) - asyncio.run(converter.convert_to_pdf()) - - except KeyboardInterrupt: - print("\nā Conversion cancelled by user") - sys.exit(1) - except Exception as e: - print(f"ā Error: {e}") - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/backend/sandbox/docker/presentation-processing-wip/html_to_pptx_perfect.py b/backend/sandbox/docker/presentation-processing-wip/html_to_pptx_perfect_wip.py similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/html_to_pptx_perfect.py rename to backend/sandbox/docker/presentation-processing-wip/html_to_pptx_perfect_wip.py diff --git a/backend/sandbox/docker/presentation-processing-wip/metadata.json b/backend/sandbox/docker/presentation-processing-wip/metadata.json deleted file mode 100644 index 871477b9..00000000 --- a/backend/sandbox/docker/presentation-processing-wip/metadata.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "presentation_name": "elon_musk", - "title": "Elon Musk: Visionary Entrepreneur", - "description": "", - "slides": { - "1": { - "title": "Title Slide", - "filename": "slide_01.html", - "file_path": "presentations/elon_musk/slide_01.html", - "preview_url": "/workspace/presentations/elon_musk/slide_01.html", - "created_at": "2025-08-20T23:16:46.862281" - }, - "2": { - "title": "Early Life & Background", - "filename": "slide_02.html", - "file_path": "presentations/elon_musk/slide_02.html", - "preview_url": "/workspace/presentations/elon_musk/slide_02.html", - "created_at": "2025-08-20T23:17:02.255166" - }, - "3": { - "title": "First Ventures", - "filename": "slide_03.html", - "file_path": "presentations/elon_musk/slide_03.html", - "preview_url": "/workspace/presentations/elon_musk/slide_03.html", - "created_at": "2025-08-20T23:17:22.873481" - } - }, - "created_at": "2025-08-20T23:16:46.285519", - "updated_at": "2025-08-20T23:20:04.399832" -} \ No newline at end of file diff --git a/backend/sandbox/docker/presentation-processing-wip/slide_01.html b/backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_01.html similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/slide_01.html rename to backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_01.html diff --git a/backend/sandbox/docker/presentation-processing-wip/slide_02.html b/backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_02.html similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/slide_02.html rename to backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_02.html diff --git a/backend/sandbox/docker/presentation-processing-wip/slide_03.html b/backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_03.html similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/slide_03.html rename to backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_03.html diff --git a/backend/sandbox/docker/presentation-processing-wip/slide_04.html b/backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_04.html similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/slide_04.html rename to backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_04.html diff --git a/backend/sandbox/docker/presentation-processing-wip/slide_05.html b/backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_05.html similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/slide_05.html rename to backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_05.html diff --git a/backend/sandbox/docker/presentation-processing-wip/slide_06.html b/backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_06.html similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/slide_06.html rename to backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_06.html diff --git a/backend/sandbox/docker/presentation-processing-wip/slide_07.html b/backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_07.html similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/slide_07.html rename to backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_07.html diff --git a/backend/sandbox/docker/presentation-processing-wip/slide_08.html b/backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_08.html similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/slide_08.html rename to backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_08.html diff --git a/backend/sandbox/docker/presentation-processing-wip/slide_09.html b/backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_09.html similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/slide_09.html rename to backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_09.html diff --git a/backend/sandbox/docker/presentation-processing-wip/slide_10.html b/backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_10.html similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/slide_10.html rename to backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_10.html diff --git a/backend/sandbox/docker/presentation-processing-wip/slide_11.html b/backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_11.html similarity index 100% rename from backend/sandbox/docker/presentation-processing-wip/slide_11.html rename to backend/sandbox/docker/presentation-processing-wip/workspace/presentations/slide_11.html diff --git a/backend/sandbox/docker/requirements.txt b/backend/sandbox/docker/requirements.txt index dc10dd11..3ebb1cff 100644 --- a/backend/sandbox/docker/requirements.txt +++ b/backend/sandbox/docker/requirements.txt @@ -4,4 +4,6 @@ pyautogui==0.9.54 pillow==10.2.0 pydantic==2.6.1 pytesseract==0.3.13 -pandas==2.3.0 \ No newline at end of file +pandas==2.3.0 +playwright>=1.40.0 +PyPDF2>=3.0.0 \ No newline at end of file diff --git a/backend/sandbox/docker/server.py b/backend/sandbox/docker/server.py index defa5f0a..d51411ea 100644 --- a/backend/sandbox/docker/server.py +++ b/backend/sandbox/docker/server.py @@ -3,6 +3,11 @@ from fastapi.staticfiles import StaticFiles from starlette.middleware.base import BaseHTTPMiddleware import uvicorn import os +from pathlib import Path + +# Import PDF router and Visual HTML Editor router +from html_to_pdf_router import router as pdf_router +from visual_html_editor_router import router as editor_router # Ensure we're serving from the /workspace directory workspace_dir = "/workspace" @@ -18,8 +23,254 @@ class WorkspaceDirMiddleware(BaseHTTPMiddleware): app = FastAPI() app.add_middleware(WorkspaceDirMiddleware) +# Include routers +app.include_router(pdf_router) +app.include_router(editor_router) + +# Create output directory for generated PDFs (needed by PDF router) +output_dir = Path("generated_pdfs") +output_dir.mkdir(exist_ok=True) + +# Mount static files for PDF downloads +app.mount("/downloads", StaticFiles(directory=str(output_dir)), name="downloads") + # Initial directory creation os.makedirs(workspace_dir, exist_ok=True) + +# Add visual HTML editor root endpoint +@app.get("/editor") +async def list_html_files(): + """List all HTML files in the workspace for easy access""" + from fastapi.responses import HTMLResponse + try: + html_files = [f for f in os.listdir(workspace_dir) if f.endswith('.html')] + + html_content = """ + + +
+ + +Click-to-edit any HTML file with live preview
+Add .html files to this directory to start editing
+Click-to-edit any HTML file with live preview
-Add .html files to this directory to start editing
-