presentation processing sandbox wip

This commit is contained in:
marko-kraemer 2025-08-23 02:38:47 -07:00
parent 81cf745e99
commit 253ff6557d
20 changed files with 614 additions and 739 deletions

View File

@ -0,0 +1,323 @@
#!/usr/bin/env python3
"""
FastAPI HTML Presentation to PDF Converter Router
Provides PDF conversion endpoints as a FastAPI router that can be included in other applications.
"""
import json
import asyncio
from pathlib import Path
from typing import Dict, List
import tempfile
from fastapi import APIRouter, HTTPException
from fastapi.responses import Response
from pydantic import BaseModel, Field
try:
from playwright.async_api import async_playwright
except ImportError:
raise ImportError("Playwright is not installed. Please install it with: pip install playwright")
try:
from PyPDF2 import PdfWriter, PdfReader
except ImportError:
raise ImportError("PyPDF2 is not installed. Please install it with: pip install PyPDF2")
# Create router
router = APIRouter(prefix="/presentation", tags=["pdf-conversion"])
# Create output directory for generated PDFs
output_dir = Path("generated_pdfs")
output_dir.mkdir(exist_ok=True)
class ConvertRequest(BaseModel):
presentation_path: str = Field(..., description="Path to the presentation folder containing metadata.json")
download: bool = Field(False, description="If true, returns the PDF file directly. If false, returns JSON with download URL.")
class ConvertResponse(BaseModel):
success: bool
message: str
pdf_url: str
filename: str
total_slides: int
class PresentationToPDFAPI:
def __init__(self, presentation_dir: str):
"""Initialize the converter with presentation directory."""
self.presentation_dir = Path(presentation_dir).resolve()
self.metadata_path = self.presentation_dir / "metadata.json"
self.metadata = None
self.slides_info = []
# Validate inputs
if not self.presentation_dir.exists():
raise FileNotFoundError(f"Presentation directory not found: {self.presentation_dir}")
if not self.metadata_path.exists():
raise FileNotFoundError(f"metadata.json not found in: {self.presentation_dir}")
def load_metadata(self) -> Dict:
"""Load and parse metadata.json"""
try:
with open(self.metadata_path, 'r', encoding='utf-8') as f:
self.metadata = json.load(f)
# Extract slide information and sort by slide number
slides = self.metadata.get('slides', {})
self.slides_info = []
for slide_num, slide_data in slides.items():
filename = slide_data.get('filename')
title = slide_data.get('title', f'Slide {slide_num}')
if filename:
# Treat filename as absolute path only
html_path = Path(filename)
print(f"Using path: {html_path}")
# Verify the path exists
if html_path.exists():
self.slides_info.append({
'number': int(slide_num),
'title': title,
'filename': filename,
'path': html_path
})
print(f"Added slide {slide_num}: {html_path}")
else:
print(f"Warning: HTML file does not exist: {html_path}")
# Sort slides by number
self.slides_info.sort(key=lambda x: x['number'])
if not self.slides_info:
raise ValueError("No valid slides found in metadata.json")
return self.metadata
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in metadata.json: {e}")
except Exception as e:
raise ValueError(f"Error loading metadata: {e}")
async def render_slide_to_pdf(self, browser, slide_info: Dict, temp_dir: Path) -> Path:
"""Render a single HTML slide to PDF using Playwright."""
html_path = slide_info['path']
slide_num = slide_info['number']
print(f"Rendering slide {slide_num}: {slide_info['title']}")
# Create new page with exact presentation dimensions
page = await browser.new_page()
try:
# Set exact viewport to 1920x1080
await page.set_viewport_size({"width": 1920, "height": 1080})
await page.emulate_media(media='screen')
# Override device pixel ratio for exact dimensions
await page.evaluate("""
() => {
Object.defineProperty(window, 'devicePixelRatio', {
get: () => 1
});
}
""")
# Navigate to the HTML file
file_url = f"file://{html_path.absolute()}"
await page.goto(file_url, wait_until="networkidle", timeout=30000)
# Wait for fonts and dynamic content to load
await page.wait_for_timeout(3000)
# Ensure exact slide dimensions
await page.evaluate("""
() => {
const slideContainer = document.querySelector('.slide-container');
if (slideContainer) {
slideContainer.style.width = '1920px';
slideContainer.style.height = '1080px';
slideContainer.style.transform = 'none';
slideContainer.style.maxWidth = 'none';
slideContainer.style.maxHeight = 'none';
}
document.body.style.margin = '0';
document.body.style.padding = '0';
document.body.style.width = '1920px';
document.body.style.height = '1080px';
document.body.style.overflow = 'hidden';
}
""")
await page.wait_for_timeout(1000)
# Generate PDF for this slide
temp_pdf_path = temp_dir / f"slide_{slide_num:02d}.pdf"
await page.pdf(
path=str(temp_pdf_path),
width="1920px",
height="1080px",
margin={"top": "0", "right": "0", "bottom": "0", "left": "0"},
print_background=True,
prefer_css_page_size=False
)
print(f" ✓ Slide {slide_num} rendered")
return temp_pdf_path
except Exception as e:
raise RuntimeError(f"Error rendering slide {slide_num}: {e}")
finally:
await page.close()
def combine_pdfs(self, pdf_paths: List[Path], output_path: Path) -> None:
"""Combine multiple PDF files into a single PDF."""
print(f"Combining {len(pdf_paths)} PDFs...")
pdf_writer = PdfWriter()
try:
for pdf_path in pdf_paths:
if pdf_path.exists():
with open(pdf_path, 'rb') as pdf_file:
pdf_reader = PdfReader(pdf_file)
for page in pdf_reader.pages:
pdf_writer.add_page(page)
# Write the combined PDF
with open(output_path, 'wb') as output_file:
pdf_writer.write(output_file)
print(f"✅ PDF created: {output_path}")
except Exception as e:
raise RuntimeError(f"Error combining PDFs: {e}")
async def convert_to_pdf(self, store_locally: bool = True) -> tuple:
"""Main conversion method with concurrent processing."""
print("🚀 Starting concurrent HTML to PDF conversion...")
# Load metadata
self.load_metadata()
# Create temporary directory for intermediate files
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
# Launch browser
async with async_playwright() as p:
print("🌐 Launching browser...")
browser = await p.chromium.launch(
headless=True,
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--force-device-scale-factor=1',
'--disable-background-timer-throttling'
]
)
try:
# Process all slides concurrently using asyncio.gather
print(f"📄 Processing {len(self.slides_info)} slides concurrently...")
tasks = [
self.render_slide_to_pdf(browser, slide_info, temp_path)
for slide_info in self.slides_info
]
# Wait for all slides to be processed concurrently
pdf_paths = await asyncio.gather(*tasks)
finally:
await browser.close()
# Create output path
presentation_name = self.metadata.get('presentation_name', 'presentation')
temp_output_path = temp_path / f"{presentation_name}.pdf"
# Combine all PDFs (sort by slide number to maintain order)
sorted_pdf_paths = sorted(pdf_paths, key=lambda p: int(p.stem.split('_')[1]))
self.combine_pdfs(sorted_pdf_paths, temp_output_path)
if store_locally:
# Store in the static files directory for URL serving
timestamp = int(asyncio.get_event_loop().time())
filename = f"{presentation_name}_{timestamp}.pdf"
final_output = output_dir / filename
import shutil
shutil.copy2(temp_output_path, final_output)
return final_output, len(self.slides_info)
else:
# For direct download, read file content into memory (no local storage)
with open(temp_output_path, 'rb') as f:
pdf_content = f.read()
return pdf_content, len(self.slides_info), presentation_name
@router.post("/convert-to-pdf")
async def convert_presentation_to_pdf(request: ConvertRequest):
"""
Convert HTML presentation to PDF with concurrent processing.
Takes a presentation folder path and returns either:
- PDF file directly (if download=true) - uses presentation name as filename
- JSON response with download URL (if download=false, default)
"""
try:
print(f"📥 Received conversion request for: {request.presentation_path}")
# Create converter
converter = PresentationToPDFAPI(request.presentation_path)
# If download is requested, don't store locally and return file directly
if request.download:
pdf_content, total_slides, presentation_name = await converter.convert_to_pdf(store_locally=False)
print(f"✨ Direct download conversion completed for: {presentation_name}")
return Response(
content=pdf_content,
media_type="application/pdf",
headers={"Content-Disposition": f"attachment; filename=\"{presentation_name}.pdf\""}
)
# Otherwise, store locally and return JSON with download URL
pdf_path, total_slides = await converter.convert_to_pdf(store_locally=True)
print(f"✨ Conversion completed: {pdf_path}")
pdf_url = f"/downloads/{pdf_path.name}"
return ConvertResponse(
success=True,
message=f"PDF generated successfully with {total_slides} slides",
pdf_url=pdf_url,
filename=pdf_path.name,
total_slides=total_slides
)
except FileNotFoundError as e:
raise HTTPException(status_code=404, detail=str(e))
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
print(f"❌ Conversion error: {e}")
raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
@router.get("/health")
async def pdf_health_check():
"""PDF service health check endpoint."""
return {"status": "healthy", "service": "HTML to PDF Converter"}

View File

@ -1,77 +0,0 @@
#!/bin/bash
# HTML to PPTX Perfect 1:1 Converter Setup and Execution Script
# This script provides PERFECT 1:1 conversion with complete background capture + editable text
set -e # Exit on any error
echo "🎯 HTML to PPTX Perfect 1:1 Converter"
echo "====================================="
echo "🎨 Perfect background capture + Editable text overlay"
echo ""
# Check if Python is available
if ! command -v python3 &> /dev/null; then
echo "❌ Python 3 is required but not found. Please install Python 3.7+ and try again."
exit 1
fi
echo "🔧 Setting up dependencies..."
# Install Python dependencies
if [ -f "requirements.txt" ]; then
echo "📦 Installing Python packages..."
python3 -m pip install -r requirements.txt
else
echo "📦 Installing Python packages individually..."
python3 -m pip install playwright python-pptx Pillow beautifulsoup4 lxml
fi
# Install Playwright browsers
echo "🌐 Installing Playwright browser..."
python3 -m playwright install chromium
echo "✅ Dependencies installed successfully!"
echo ""
# Run the perfect conversion
echo "🚀 Starting PERFECT 1:1 HTML to PPTX conversion..."
echo "📋 Method: Perfect background capture + Editable text overlay"
echo ""
if [ $# -eq 0 ]; then
# No arguments, use current directory with perfect naming
python3 html_to_pptx_perfect.py
elif [ $# -eq 1 ]; then
# One argument (presentation directory or output file)
if [[ "$1" == *.pptx ]]; then
# If argument ends with .pptx, treat it as output filename
python3 html_to_pptx_perfect.py . "$1"
else
# Otherwise treat it as presentation directory
python3 html_to_pptx_perfect.py "$1"
fi
elif [ $# -eq 2 ]; then
# Two arguments (presentation directory and output file)
python3 html_to_pptx_perfect.py "$1" "$2"
else
echo "Usage: $0 [presentation_directory] [output_file.pptx]"
echo ""
echo "Examples:"
echo " $0 # Convert current directory (perfect mode)"
echo " $0 my_slides/ # Convert my_slides/ (perfect mode)"
echo " $0 perfect_output.pptx # Convert current directory to perfect_output.pptx"
echo " $0 my_slides/ perfect_output.pptx # Convert my_slides/ to perfect_output.pptx"
echo ""
echo "Perfect 1:1 Mode Features:"
echo " ✅ PERFECT visual fidelity (everything captured exactly)"
echo " ✅ All icons, gradients, decorations preserved"
echo " ✅ Fully editable text elements"
echo " ✅ True 1:1 conversion"
echo " ✅ Simple and reliable approach"
exit 1
fi
echo ""
echo "🎉 PERFECT 1:1 HTML to PPTX conversion completed!"
echo "✨ Perfect backgrounds + Editable text!"

View File

@ -1,358 +0,0 @@
#!/usr/bin/env python3
"""
HTML Presentation to PDF Converter
This script converts HTML slides to a single PDF file based on metadata.json.
It uses Playwright to render each HTML slide at exactly 1920x1080 resolution
and combines them into a single PDF.
Usage:
python html_to_pdf.py [presentation_directory] [output_pdf_path]
Example:
python html_to_pdf.py . elon_musk_presentation.pdf
python html_to_pdf.py /path/to/presentation output.pdf
"""
import json
import os
import sys
import asyncio
from pathlib import Path
from typing import Dict, List, Tuple
import tempfile
import subprocess
try:
from playwright.async_api import async_playwright
except ImportError:
print("Error: Playwright is not installed. Please install it with:")
print("pip install playwright")
print("playwright install chromium")
sys.exit(1)
try:
from PyPDF2 import PdfWriter, PdfReader
except ImportError:
print("Error: PyPDF2 is not installed. Please install it with:")
print("pip install PyPDF2")
sys.exit(1)
class PresentationToPDF:
def __init__(self, presentation_dir: str, output_path: str = None):
"""
Initialize the converter.
Args:
presentation_dir: Directory containing metadata.json and HTML slides
output_path: Output PDF file path (optional, defaults to presentation_name.pdf)
"""
self.presentation_dir = Path(presentation_dir).resolve()
self.metadata_path = self.presentation_dir / "metadata.json"
self.output_path = output_path
self.metadata = None
self.slides_info = []
# Validate inputs
if not self.presentation_dir.exists():
raise FileNotFoundError(f"Presentation directory not found: {self.presentation_dir}")
if not self.metadata_path.exists():
raise FileNotFoundError(f"metadata.json not found in: {self.presentation_dir}")
def load_metadata(self) -> Dict:
"""Load and parse metadata.json"""
try:
with open(self.metadata_path, 'r', encoding='utf-8') as f:
self.metadata = json.load(f)
# Extract slide information and sort by slide number
slides = self.metadata.get('slides', {})
self.slides_info = []
for slide_num, slide_data in slides.items():
filename = slide_data.get('filename')
title = slide_data.get('title', f'Slide {slide_num}')
if filename:
html_path = self.presentation_dir / filename
if html_path.exists():
self.slides_info.append({
'number': int(slide_num),
'title': title,
'filename': filename,
'path': html_path
})
else:
print(f"Warning: HTML file not found: {html_path}")
# Sort slides by number
self.slides_info.sort(key=lambda x: x['number'])
if not self.slides_info:
raise ValueError("No valid slides found in metadata.json")
# Set default output path if not provided
if not self.output_path:
presentation_name = self.metadata.get('presentation_name', 'presentation')
self.output_path = self.presentation_dir / f"{presentation_name}.pdf"
else:
self.output_path = Path(self.output_path).resolve()
print(f"Loaded {len(self.slides_info)} slides from metadata")
return self.metadata
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in metadata.json: {e}")
except Exception as e:
raise ValueError(f"Error loading metadata: {e}")
async def render_slide_to_pdf(self, browser, slide_info: Dict, temp_dir: Path) -> Path:
"""
Render a single HTML slide to PDF using Playwright.
Args:
browser: Playwright browser instance
slide_info: Slide information dictionary
temp_dir: Temporary directory for intermediate files
Returns:
Path to the generated PDF file
"""
html_path = slide_info['path']
slide_num = slide_info['number']
print(f"Rendering slide {slide_num}: {slide_info['title']}")
# Create new page with exact presentation dimensions
page = await browser.new_page()
try:
# CRITICAL: Set exact viewport to 1920x1080 - this is the key!
await page.set_viewport_size({"width": 1920, "height": 1080})
# Use screen media type for accurate rendering
await page.emulate_media(media='screen')
# Disable device scale factor to ensure 1:1 pixel mapping
await page.evaluate("""
() => {
// Override device pixel ratio to ensure exact dimensions
Object.defineProperty(window, 'devicePixelRatio', {
get: () => 1
});
}
""")
# Navigate to the HTML file
file_url = f"file://{html_path.absolute()}"
await page.goto(file_url, wait_until="networkidle", timeout=30000)
# Wait for fonts and dynamic content to fully load
await page.wait_for_timeout(3000)
# Ensure the slide container is exactly 1920x1080
await page.evaluate("""
() => {
const slideContainer = document.querySelector('.slide-container');
if (slideContainer) {
slideContainer.style.width = '1920px';
slideContainer.style.height = '1080px';
slideContainer.style.transform = 'none';
slideContainer.style.maxWidth = 'none';
slideContainer.style.maxHeight = 'none';
}
// Ensure body doesn't interfere with dimensions
document.body.style.margin = '0';
document.body.style.padding = '0';
document.body.style.width = '1920px';
document.body.style.height = '1080px';
document.body.style.overflow = 'hidden';
}
""")
# Wait a bit more for the layout adjustments
await page.wait_for_timeout(1000)
# Generate PDF for this slide with exact dimensions
temp_pdf_path = temp_dir / f"slide_{slide_num:02d}.pdf"
await page.pdf(
path=str(temp_pdf_path),
width="1920px",
height="1080px",
margin={"top": "0", "right": "0", "bottom": "0", "left": "0"},
print_background=True,
prefer_css_page_size=False
)
print(f" ✓ Slide {slide_num} rendered at 1920x1080")
return temp_pdf_path
except Exception as e:
raise RuntimeError(f"Error rendering slide {slide_num}: {e}")
finally:
await page.close()
def combine_pdfs(self, pdf_paths: List[Path]) -> None:
"""
Combine multiple PDF files into a single PDF.
Args:
pdf_paths: List of PDF file paths to combine
"""
print(f"Combining {len(pdf_paths)} PDFs into final output...")
pdf_writer = PdfWriter()
try:
for pdf_path in pdf_paths:
if not pdf_path.exists():
print(f"Warning: PDF file not found: {pdf_path}")
continue
with open(pdf_path, 'rb') as pdf_file:
pdf_reader = PdfReader(pdf_file)
for page in pdf_reader.pages:
pdf_writer.add_page(page)
# Write the combined PDF
with open(self.output_path, 'wb') as output_file:
pdf_writer.write(output_file)
print(f"✅ PDF created successfully: {self.output_path}")
print(f"📊 Total pages: {len(pdf_writer.pages)}")
except Exception as e:
raise RuntimeError(f"Error combining PDFs: {e}")
async def convert_to_pdf(self) -> None:
"""Main conversion method"""
print("🚀 Starting HTML to PDF conversion...")
# Load metadata
self.load_metadata()
# Create temporary directory for intermediate files
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
pdf_paths = []
# Launch browser with exact rendering settings
async with async_playwright() as p:
print("🌐 Launching browser with 1920x1080 configuration...")
browser = await p.chromium.launch(
headless=True,
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--no-first-run',
'--disable-default-apps',
'--disable-web-security',
'--disable-features=TranslateUI',
'--disable-ipc-flooding-protection',
# Force device scale factor to 1 for exact pixel mapping
'--force-device-scale-factor=1',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding'
]
)
try:
# Process each slide
for slide_info in self.slides_info:
pdf_path = await self.render_slide_to_pdf(browser, slide_info, temp_path)
pdf_paths.append(pdf_path)
finally:
await browser.close()
# Combine all PDFs
self.combine_pdfs(pdf_paths)
print("✨ Conversion completed successfully!")
def check_dependencies():
"""Check if required dependencies are available"""
missing_deps = []
try:
import playwright
except ImportError:
missing_deps.append("playwright (pip install playwright)")
try:
import PyPDF2
except ImportError:
missing_deps.append("PyPDF2 (pip install PyPDF2)")
if missing_deps:
print("❌ Missing dependencies:")
for dep in missing_deps:
print(f" - {dep}")
print("\nPlease install missing dependencies and try again.")
return False
# Check if Playwright browsers are installed
try:
result = subprocess.run(['playwright', 'install', '--dry-run'],
capture_output=True, text=True, timeout=10)
if "chromium" not in result.stdout.lower():
print("⚠️ Playwright browser not found. Please run:")
print(" playwright install chromium")
return False
except (subprocess.TimeoutExpired, subprocess.CalledProcessError, FileNotFoundError):
print("⚠️ Could not verify Playwright installation. You may need to run:")
print(" playwright install chromium")
return True
def main():
"""Main CLI entry point"""
print("📄 HTML Presentation to PDF Converter")
print("=" * 50)
# Check dependencies
if not check_dependencies():
sys.exit(1)
# Parse command line arguments
if len(sys.argv) < 2:
presentation_dir = "."
output_path = None
elif len(sys.argv) == 2:
presentation_dir = sys.argv[1]
output_path = None
elif len(sys.argv) == 3:
presentation_dir = sys.argv[1]
output_path = sys.argv[2]
else:
print("Usage: python html_to_pdf.py [presentation_directory] [output_pdf_path]")
print("\nExamples:")
print(" python html_to_pdf.py")
print(" python html_to_pdf.py . my_presentation.pdf")
print(" python html_to_pdf.py /path/to/slides output.pdf")
sys.exit(1)
try:
# Create converter and run
converter = PresentationToPDF(presentation_dir, output_path)
asyncio.run(converter.convert_to_pdf())
except KeyboardInterrupt:
print("\n❌ Conversion cancelled by user")
sys.exit(1)
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -1,30 +0,0 @@
{
"presentation_name": "elon_musk",
"title": "Elon Musk: Visionary Entrepreneur",
"description": "",
"slides": {
"1": {
"title": "Title Slide",
"filename": "slide_01.html",
"file_path": "presentations/elon_musk/slide_01.html",
"preview_url": "/workspace/presentations/elon_musk/slide_01.html",
"created_at": "2025-08-20T23:16:46.862281"
},
"2": {
"title": "Early Life & Background",
"filename": "slide_02.html",
"file_path": "presentations/elon_musk/slide_02.html",
"preview_url": "/workspace/presentations/elon_musk/slide_02.html",
"created_at": "2025-08-20T23:17:02.255166"
},
"3": {
"title": "First Ventures",
"filename": "slide_03.html",
"file_path": "presentations/elon_musk/slide_03.html",
"preview_url": "/workspace/presentations/elon_musk/slide_03.html",
"created_at": "2025-08-20T23:17:22.873481"
}
},
"created_at": "2025-08-20T23:16:46.285519",
"updated_at": "2025-08-20T23:20:04.399832"
}

View File

@ -4,4 +4,6 @@ pyautogui==0.9.54
pillow==10.2.0
pydantic==2.6.1
pytesseract==0.3.13
pandas==2.3.0
pandas==2.3.0
playwright>=1.40.0
PyPDF2>=3.0.0

View File

@ -3,6 +3,11 @@ from fastapi.staticfiles import StaticFiles
from starlette.middleware.base import BaseHTTPMiddleware
import uvicorn
import os
from pathlib import Path
# Import PDF router and Visual HTML Editor router
from html_to_pdf_router import router as pdf_router
from visual_html_editor_router import router as editor_router
# Ensure we're serving from the /workspace directory
workspace_dir = "/workspace"
@ -18,8 +23,254 @@ class WorkspaceDirMiddleware(BaseHTTPMiddleware):
app = FastAPI()
app.add_middleware(WorkspaceDirMiddleware)
# Include routers
app.include_router(pdf_router)
app.include_router(editor_router)
# Create output directory for generated PDFs (needed by PDF router)
output_dir = Path("generated_pdfs")
output_dir.mkdir(exist_ok=True)
# Mount static files for PDF downloads
app.mount("/downloads", StaticFiles(directory=str(output_dir)), name="downloads")
# Initial directory creation
os.makedirs(workspace_dir, exist_ok=True)
# Add visual HTML editor root endpoint
@app.get("/editor")
async def list_html_files():
"""List all HTML files in the workspace for easy access"""
from fastapi.responses import HTMLResponse
try:
html_files = [f for f in os.listdir(workspace_dir) if f.endswith('.html')]
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Visual HTML Editor</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, system-ui, sans-serif;
background: white;
color: black;
line-height: 1.5;
max-width: 900px;
margin: 0 auto;
padding: 40px 20px;
}
.header {
text-align: center;
margin-bottom: 32px;
border-bottom: 1px solid #e4e4e7;
padding-bottom: 24px;
}
.header h1 {
font-size: 24px;
font-weight: 600;
letter-spacing: -0.025em;
margin-bottom: 8px;
color: #09090b;
}
.header p {
font-size: 14px;
color: #71717a;
font-weight: 400;
}
.file-list {
border: 1px solid #e4e4e7;
border-radius: 8px;
overflow: hidden;
}
.file-item {
padding: 16px 20px;
border-bottom: 1px solid #e4e4e7;
display: flex;
justify-content: space-between;
align-items: center;
transition: background-color 0.15s ease;
}
.file-item:hover {
background: #f4f4f5;
}
.file-item:last-child {
border-bottom: none;
}
.file-name {
font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
font-size: 14px;
font-weight: 500;
color: black;
}
.file-actions {
display: flex;
gap: 12px;
}
.btn {
padding: 8px 16px;
text-decoration: none;
font-size: 13px;
font-weight: 500;
border: 1px solid #e4e4e7;
color: #09090b;
background: white;
transition: all 0.15s ease;
text-align: center;
min-width: 60px;
border-radius: 6px;
}
.btn:hover {
background: #f4f4f5;
border-color: #d4d4d8;
}
.btn-edit {
background: #09090b;
color: white;
border-color: #09090b;
}
.btn-edit:hover {
background: #18181b;
border-color: #18181b;
}
.empty-state {
text-align: center;
padding: 64px 20px;
color: #71717a;
border: 1px solid #e4e4e7;
border-radius: 8px;
}
.empty-state h3 {
font-size: 16px;
font-weight: 500;
margin-bottom: 8px;
color: #09090b;
}
.info {
margin-top: 32px;
padding: 20px;
background: #fafafa;
border: 1px solid #e4e4e7;
border-radius: 8px;
}
.info h3 {
font-size: 16px;
font-weight: 500;
margin-bottom: 12px;
}
.info-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
}
.info-item {
font-size: 14px;
line-height: 1.4;
}
.info-item strong {
font-weight: 500;
}
@media (max-width: 600px) {
.info-grid {
grid-template-columns: 1fr;
}
.file-item {
flex-direction: column;
align-items: flex-start;
gap: 12px;
}
.file-actions {
width: 100%;
justify-content: flex-end;
}
}
</style>
</head>
<body>
<div class="header">
<h1>Visual HTML Editor</h1>
<p>Click-to-edit any HTML file with live preview</p>
</div>
<div class="file-list">
"""
if html_files:
for file in sorted(html_files):
html_content += f"""
<div class="file-item">
<div class="file-name">{file}</div>
<div class="file-actions">
<a href="/{file}" class="btn" target="_blank">View</a>
<a href="/api/html/{file}/editor" class="btn btn-edit" target="_blank">Edit</a>
</div>
</div>
"""
else:
html_content += """
<div class="empty-state">
<h3>No files found</h3>
<p>Add .html files to this directory to start editing</p>
</div>
"""
html_content += """
</div>
<div class="info">
<h3>How to use</h3>
<div class="info-grid">
<div class="info-item">
<strong>Edit text:</strong> Hover over any text and click the edit icon
</div>
<div class="info-item">
<strong>Delete elements:</strong> Click the trash icon to remove content
</div>
<div class="info-item">
<strong>Save changes:</strong> Press Ctrl+Enter or click Save
</div>
<div class="info-item">
<strong>Cancel editing:</strong> Press Escape or click Cancel
</div>
</div>
</div>
</body>
</html>
"""
return HTMLResponse(content=html_content)
except Exception as e:
print(f"❌ Error listing HTML files: {e}")
from fastapi import HTTPException
raise HTTPException(status_code=500, detail=str(e))
# Serve HTML files directly at root level
@app.get("/{file_name}")
async def serve_html_file(file_name: str):
"""Serve HTML files directly for viewing"""
from fastapi import HTTPException
from fastapi.responses import HTMLResponse
if not file_name.endswith('.html'):
raise HTTPException(status_code=404, detail="File must be .html")
file_path = os.path.join(workspace_dir, file_name)
if not os.path.exists(file_path):
raise HTTPException(status_code=404, detail="File not found")
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
return HTMLResponse(content=content)
app.mount('/', StaticFiles(directory=workspace_dir, html=True), name='site')
# This is needed for the import string approach with uvicorn

View File

@ -1,17 +1,26 @@
from fastapi import FastAPI, Request, HTTPException
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse, JSONResponse
from starlette.middleware.base import BaseHTTPMiddleware
from pydantic import BaseModel
from typing import Optional, Dict, Any
import uvicorn
#!/usr/bin/env python3
"""
Visual HTML Editor Router
Provides visual HTML editing endpoints as a FastAPI router that can be included in other applications.
"""
import os
import json
import re
from typing import Optional, Dict, Any
from pathlib import Path
from fastapi import APIRouter, HTTPException
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from bs4 import BeautifulSoup, NavigableString
# Use current directory as workspace (presentation-example folder)
workspace_dir = os.path.dirname(os.path.abspath(__file__))
# Create router
router = APIRouter(prefix="/api/html", tags=["visual-editor"])
# Use /workspace as the default workspace directory
workspace_dir = "/workspace"
# All text elements that should be editable
TEXT_ELEMENTS = [
@ -29,36 +38,28 @@ TEXT_ELEMENTS = [
'label', 'legend', # Form text
]
class WorkspaceDirMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
# Ensure workspace directory exists
if not os.path.exists(workspace_dir):
print(f"Workspace directory {workspace_dir} not found, recreating...")
os.makedirs(workspace_dir, exist_ok=True)
return await call_next(request)
app = FastAPI(title="Visual HTML Editor", version="1.0.0")
app.add_middleware(WorkspaceDirMiddleware)
# ===== VISUAL HTML EDITOR API =====
class EditTextRequest(BaseModel):
file_path: str
element_selector: str # CSS selector to identify element
new_text: str
class DeleteElementRequest(BaseModel):
file_path: str
element_selector: str
class SaveContentRequest(BaseModel):
file_path: str
html_content: str
class GetEditableElementsResponse(BaseModel):
elements: list[Dict[str, Any]]
@app.get("/api/html/{file_path:path}/editable-elements")
@router.get("/{file_path:path}/editable-elements")
async def get_editable_elements(file_path: str):
"""Get all editable text elements from an HTML file"""
try:
@ -144,7 +145,8 @@ async def get_editable_elements(file_path: str):
print(f"Error getting editable elements: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/html/edit-text")
@router.post("/edit-text")
async def edit_text(request: EditTextRequest):
"""Edit text content of an element in an HTML file"""
try:
@ -187,7 +189,8 @@ async def edit_text(request: EditTextRequest):
print(f"❌ Error editing text: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/html/delete-element")
@router.post("/delete-element")
async def delete_element(request: DeleteElementRequest):
"""Delete an element from an HTML file"""
try:
@ -239,7 +242,8 @@ async def delete_element(request: DeleteElementRequest):
print(f"❌ Error deleting element: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/html/save-content")
@router.post("/save-content")
async def save_content(request: SaveContentRequest):
"""Save the entire HTML content to file"""
try:
@ -297,7 +301,8 @@ async def save_content(request: SaveContentRequest):
print(f"❌ Error saving content: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/html/{file_path:path}/editor")
@router.get("/{file_path:path}/editor")
async def get_html_editor(file_path: str):
"""Serve the visual editor for an HTML file"""
try:
@ -317,217 +322,6 @@ async def get_html_editor(file_path: str):
print(f"❌ Error serving editor: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
async def list_html_files():
"""List all HTML files in the workspace for easy access"""
try:
html_files = [f for f in os.listdir(workspace_dir) if f.endswith('.html')]
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Visual HTML Editor</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, system-ui, sans-serif;
background: white;
color: black;
line-height: 1.5;
max-width: 900px;
margin: 0 auto;
padding: 40px 20px;
}
.header {
text-align: center;
margin-bottom: 32px;
border-bottom: 1px solid #e4e4e7;
padding-bottom: 24px;
}
.header h1 {
font-size: 24px;
font-weight: 600;
letter-spacing: -0.025em;
margin-bottom: 8px;
color: #09090b;
}
.header p {
font-size: 14px;
color: #71717a;
font-weight: 400;
}
.file-list {
border: 1px solid #e4e4e7;
border-radius: 8px;
overflow: hidden;
}
.file-item {
padding: 16px 20px;
border-bottom: 1px solid #e4e4e7;
display: flex;
justify-content: space-between;
align-items: center;
transition: background-color 0.15s ease;
}
.file-item:hover {
background: #f4f4f5;
}
.file-item:last-child {
border-bottom: none;
}
.file-name {
font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
font-size: 14px;
font-weight: 500;
color: black;
}
.file-actions {
display: flex;
gap: 12px;
}
.btn {
padding: 8px 16px;
text-decoration: none;
font-size: 13px;
font-weight: 500;
border: 1px solid #e4e4e7;
color: #09090b;
background: white;
transition: all 0.15s ease;
text-align: center;
min-width: 60px;
border-radius: 6px;
}
.btn:hover {
background: #f4f4f5;
border-color: #d4d4d8;
}
.btn-edit {
background: #09090b;
color: white;
border-color: #09090b;
}
.btn-edit:hover {
background: #18181b;
border-color: #18181b;
}
.empty-state {
text-align: center;
padding: 64px 20px;
color: #71717a;
border: 1px solid #e4e4e7;
border-radius: 8px;
}
.empty-state h3 {
font-size: 16px;
font-weight: 500;
margin-bottom: 8px;
color: #09090b;
}
.info {
margin-top: 32px;
padding: 20px;
background: #fafafa;
border: 1px solid #e4e4e7;
border-radius: 8px;
}
.info h3 {
font-size: 16px;
font-weight: 500;
margin-bottom: 12px;
}
.info-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
}
.info-item {
font-size: 14px;
line-height: 1.4;
}
.info-item strong {
font-weight: 500;
}
@media (max-width: 600px) {
.info-grid {
grid-template-columns: 1fr;
}
.file-item {
flex-direction: column;
align-items: flex-start;
gap: 12px;
}
.file-actions {
width: 100%;
justify-content: flex-end;
}
}
</style>
</head>
<body>
<div class="header">
<h1>Visual HTML Editor</h1>
<p>Click-to-edit any HTML file with live preview</p>
</div>
<div class="file-list">
"""
if html_files:
for file in sorted(html_files):
html_content += f"""
<div class="file-item">
<div class="file-name">{file}</div>
<div class="file-actions">
<a href="/{file}" class="btn" target="_blank">View</a>
<a href="/api/html/{file}/editor" class="btn btn-edit" target="_blank">Edit</a>
</div>
</div>
"""
else:
html_content += """
<div class="empty-state">
<h3>No files found</h3>
<p>Add .html files to this directory to start editing</p>
</div>
"""
html_content += """
</div>
<div class="info">
<h3>How to use</h3>
<div class="info-grid">
<div class="info-item">
<strong>Edit text:</strong> Hover over any text and click the edit icon
</div>
<div class="info-item">
<strong>Delete elements:</strong> Click the trash icon to remove content
</div>
<div class="info-item">
<strong>Save changes:</strong> Press Ctrl+Enter or click Save
</div>
<div class="info-item">
<strong>Cancel editing:</strong> Press Escape or click Cancel
</div>
</div>
</div>
</body>
</html>
"""
return HTMLResponse(content=html_content)
except Exception as e:
print(f"❌ Error listing HTML files: {e}")
raise HTTPException(status_code=500, detail=str(e))
def inject_editor_functionality(html_content: str, file_path: str) -> str:
"""Inject visual editor functionality into existing HTML"""
@ -913,7 +707,7 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
# Add editor JavaScript
editor_js = f"""
<script>
const API_BASE = '';
const API_BASE = '/api/html';
const FILE_PATH = '{file_path}';
class VisualHtmlEditor {{
@ -1365,7 +1159,7 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
deleteElement(element) {{
const text = element.textContent.substring(0, 60);
if (!confirm('Delete this element?\\n\\n"' + text + '..."')) {{
if (!confirm('Delete this element?\\\\n\\\\n"' + text + '..."')) {{
return;
}}
@ -1431,7 +1225,7 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
// Confirm before saving (permanent action)
const changeCount = this.pendingChanges.size;
if (!confirm(`Save all ${{changeCount}} change${{changeCount === 1 ? '' : 's'}} to file?\\n\\nThis action cannot be undone.`)) {{
if (!confirm(`Save all ${{changeCount}} change${{changeCount === 1 ? '' : 's'}} to file?\\\\n\\\\nThis action cannot be undone.`)) {{
return;
}}
@ -1456,7 +1250,7 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
const currentHtml = document.documentElement.outerHTML;
// Send it to a new endpoint that replaces the file content
const response = await fetch('/api/html/save-content', {{
const response = await fetch(`${{API_BASE}}/save-content`, {{
method: 'POST',
headers: {{ 'Content-Type': 'application/json' }},
body: JSON.stringify({{
@ -1557,34 +1351,4 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str:
if soup.body:
soup.body.append(BeautifulSoup(editor_js, 'html.parser'))
return str(soup)
# ===== END VISUAL HTML EDITOR API =====
# Mount static files (serve HTML files directly)
app.mount('/static', StaticFiles(directory=workspace_dir), name='static')
# Serve HTML files directly at root level
@app.get("/{file_name}")
async def serve_html_file(file_name: str):
"""Serve HTML files directly for viewing"""
if not file_name.endswith('.html'):
raise HTTPException(status_code=404, detail="File must be .html")
file_path = os.path.join(workspace_dir, file_name)
if not os.path.exists(file_path):
raise HTTPException(status_code=404, detail="File not found")
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
return HTMLResponse(content=content)
# This is needed for the import string approach with uvicorn
if __name__ == '__main__':
print(f"🚀 Starting Visual HTML Editor")
print(f"📁 Workspace: {workspace_dir}")
print(f"🌐 Server will be available at: http://localhost:8080")
print(f"✏️ Access editor at: http://localhost:8080/api/html/[filename]/editor")
uvicorn.run("visual-html-editor:app", host="0.0.0.0", port=8080, reload=True)
return str(soup)