diff --git a/backend/sandbox/docker/html_to_pptx_router.py b/backend/sandbox/docker/html_to_pptx_router.py new file mode 100644 index 00000000..ecd8642c --- /dev/null +++ b/backend/sandbox/docker/html_to_pptx_router.py @@ -0,0 +1,1123 @@ +#!/usr/bin/env python3 +""" +FastAPI HTML Presentation to PPTX Converter Router + +Provides PPTX conversion endpoints as a FastAPI router that can be included in other applications. +Uses the perfect 1:1 conversion approach with async processing. +""" + +import json +import asyncio +from pathlib import Path +from typing import Dict, List, Optional +import tempfile +import shutil +from dataclasses import dataclass + +from fastapi import APIRouter, HTTPException +from fastapi.responses import Response +from pydantic import BaseModel, Field + +try: + from playwright.async_api import async_playwright +except ImportError: + raise ImportError("Playwright is not installed. Please install it with: pip install playwright") + +try: + from pptx import Presentation + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + from pptx.dml.color import RGBColor +except ImportError as e: + raise ImportError(f"python-pptx is not installed. Please install it with: pip install python-pptx. Error: {e}") + +try: + from bs4 import BeautifulSoup +except ImportError: + raise ImportError("BeautifulSoup is not installed. Please install it with: pip install beautifulsoup4") + + +# Create router +router = APIRouter(prefix="/presentation", tags=["pptx-conversion"]) + +# Create output directory for generated PPTXs +output_dir = Path("generated_pptx") +output_dir.mkdir(exist_ok=True) + + +class ConvertRequest(BaseModel): + presentation_path: str = Field(..., description="Path to the presentation folder containing metadata.json") + download: bool = Field(False, description="If true, returns the PPTX file directly. If false, returns JSON with download URL.") + + +class ConvertResponse(BaseModel): + success: bool + message: str + pptx_url: str + filename: str + total_slides: int + + +@dataclass +class TextElement: + """Text element information for editable text boxes""" + text: str + x: float + y: float + width: float + height: float + font_family: str + font_size: float + font_weight: str + color: str + text_align: str + line_height: float + tag: str + + +class CSSParser: + """Parse CSS styles and convert values to appropriate units""" + + @staticmethod + def parse_color(color_str: str) -> tuple[int, int, int]: + """Parse CSS color string to RGB tuple""" + if not color_str: + return (0, 0, 0) + + color_str = color_str.strip().lower() + + # Handle hex colors + if color_str.startswith('#'): + hex_color = color_str[1:] + if len(hex_color) == 3: + hex_color = ''.join([c*2 for c in hex_color]) + try: + return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4)) + except ValueError: + return (0, 0, 0) + + # Handle rgb() colors + import re + rgb_match = re.match(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str) + if rgb_match: + return tuple(int(x) for x in rgb_match.groups()) + + # Handle rgba() colors (ignore alpha for now) + rgba_match = re.match(r'rgba\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*[\d.]+\s*\)', color_str) + if rgba_match: + return tuple(int(x) for x in rgba_match.groups()) + + # Named colors + named_colors = { + 'black': (0, 0, 0), 'white': (255, 255, 255), 'red': (255, 0, 0), + 'green': (0, 128, 0), 'blue': (0, 0, 255), 'yellow': (255, 255, 0), + 'cyan': (0, 255, 255), 'magenta': (255, 0, 255), 'gray': (128, 128, 128), + 'grey': (128, 128, 128), 'orange': (255, 165, 0), 'purple': (128, 0, 128) + } + + return named_colors.get(color_str, (0, 0, 0)) + + @staticmethod + def parse_font_weight(weight_str: str) -> bool: + """Parse font weight to bold boolean""" + if not weight_str: + return False + + weight_str = weight_str.strip().lower() + bold_weights = ['bold', 'bolder', '700', '800', '900'] + + return weight_str in bold_weights or (weight_str.isdigit() and int(weight_str) >= 700) + + +class PerfectHTMLToPPTXConverter: + def __init__(self, presentation_dir: str): + """Initialize the perfect converter.""" + self.presentation_dir = Path(presentation_dir).resolve() + self.metadata_path = self.presentation_dir / "metadata.json" + self.metadata = None + self.slides_info = [] + + # Validate inputs + if not self.presentation_dir.exists(): + raise FileNotFoundError(f"Presentation directory not found: {self.presentation_dir}") + + if not self.metadata_path.exists(): + raise FileNotFoundError(f"metadata.json not found in: {self.presentation_dir}") + + def load_metadata(self) -> Dict: + """Load and parse metadata.json""" + try: + with open(self.metadata_path, 'r', encoding='utf-8') as f: + self.metadata = json.load(f) + + # Extract slide information and sort by slide number + slides = self.metadata.get('slides', {}) + self.slides_info = [] + + for slide_num, slide_data in slides.items(): + filename = slide_data.get('filename') + file_path = slide_data.get('file_path') + title = slide_data.get('title', f'Slide {slide_num}') + + if file_path: + # Use the local presentation directory instead of hardcoded /workspace + html_path = Path(f"/workspace/{file_path}") + print(f"Using path: {html_path}") + + # Verify the path exists + if html_path.exists(): + self.slides_info.append({ + 'number': int(slide_num), + 'title': title, + 'filename': filename, + 'path': html_path + }) + print(f"Added slide {slide_num}: {html_path}") + else: + print(f"Warning: HTML file does not exist: {html_path}") + + # Sort slides by number + self.slides_info.sort(key=lambda x: x['number']) + + if not self.slides_info: + raise ValueError("No valid slides found in metadata.json") + + return self.metadata + + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON in metadata.json: {e}") + except Exception as e: + raise ValueError(f"Error loading metadata: {e}") + + async def extract_visual_elements(self, browser, html_path: Path, temp_dir: Path) -> List[Dict]: + """Extract all visual elements (non-text) as individual images with positioning.""" + page = await browser.new_page() + visual_elements = [] + + try: + # Set exact viewport dimensions + await page.set_viewport_size({"width": 1920, "height": 1080}) + await page.emulate_media(media='screen') + + # Force device pixel ratio to 1 + await page.evaluate(r""" + () => { + Object.defineProperty(window, 'devicePixelRatio', { + get: () => 1 + }); + } + """) + + # Navigate to HTML file + file_url = f"file://{html_path.absolute()}" + await page.goto(file_url, wait_until="networkidle", timeout=30000) + + # Wait for fonts and content to load + await page.wait_for_timeout(5000) + + # Extract all visual elements with precise positioning + visual_data = await page.evaluate(r""" + () => { + function extractVisualElements(element, depth = 0) { + if (!element || element.nodeType !== Node.ELEMENT_NODE) return []; + + const computedStyle = window.getComputedStyle(element); + const rect = element.getBoundingClientRect(); + + // Skip hidden elements + if (computedStyle.display === 'none' || computedStyle.visibility === 'hidden') return []; + if (rect.width === 0 || rect.height === 0) return []; + + const results = []; + + // Check if this element has visual content (not just text) + const hasVisualContent = ( + computedStyle.backgroundImage !== 'none' || + computedStyle.backgroundColor !== 'rgba(0, 0, 0, 0)' || + computedStyle.borderStyle !== 'none' || + computedStyle.borderImage !== 'none' || + element.tagName === 'IMG' || + element.tagName === 'SVG' || + element.tagName === 'CANVAS' || + computedStyle.boxShadow !== 'none' || + computedStyle.borderRadius !== '0px' || + computedStyle.gradient !== 'none' + ); + + // Check if this element has meaningful text content + const hasText = element.textContent && element.textContent.trim().length > 0; + + // Skip very large elements that are likely the main background + const isLikelyBackground = rect.width > 1000 || rect.height > 800; + + // If element has visual content, is not just a text container, and is not the main background + if (hasVisualContent && !hasText && !isLikelyBackground) { + results.push({ + type: 'visual', + x: Math.round(rect.left * 100) / 100, + y: Math.round(rect.top * 100) / 100, + width: Math.round(rect.width * 100) / 100, + height: Math.round(rect.height * 100) / 100, + tag: element.tagName.toLowerCase(), + className: element.className, + id: element.id, + depth: depth + }); + } + + // Process children for nested elements + Array.from(element.children).forEach(child => { + results.push(...extractVisualElements(child, depth + 1)); + }); + + return results; + } + + // Start extraction from body + const allVisualElements = extractVisualElements(document.body); + + // Sort by depth (background elements first) then by position + allVisualElements.sort((a, b) => { + if (a.depth !== b.depth) return a.depth - b.depth; + if (Math.abs(a.y - b.y) < 5) return a.x - b.x; + return a.y - b.y; + }); + + return allVisualElements; + } + """) + + # Capture each visual element as an individual image + for i, data in enumerate(visual_data): + if data and data['type'] == 'visual': + # Make text transparent for this element only + await page.evaluate(r""" + (elementData) => { + const elements = document.querySelectorAll('*'); + for (let el of elements) { + const rect = el.getBoundingClientRect(); + if (Math.abs(rect.left - elementData.x) < 1 && + Math.abs(rect.top - elementData.y) < 1 && + Math.abs(rect.width - elementData.width) < 1 && + Math.abs(rect.height - elementData.height) < 1) { + + // Make text transparent in this element + if (el.textContent && el.textContent.trim()) { + el.style.color = 'transparent'; + el.style.textShadow = 'none'; + el.style.webkitTextStroke = 'none'; + } + break; + } + } + } + """, data) + + # Wait for changes to apply + await page.wait_for_timeout(500) + + # Capture this specific element + element_path = temp_dir / f"visual_element_{html_path.stem}_{i:03d}.png" + await page.screenshot( + path=str(element_path), + full_page=False, + clip={ + "x": data['x'], + "y": data['y'], + "width": data['width'], + "height": data['height'] + } + ) + + visual_element = { + 'type': 'visual', + 'x': data['x'], + 'y': data['y'], + 'width': data['width'], + 'height': data['height'], + 'tag': data['tag'], + 'image_path': element_path, + 'depth': data['depth'] + } + + visual_elements.append(visual_element) + print(f" āœ“ Captured visual element {i}: {data['tag']} at ({data['x']:.1f}, {data['y']:.1f})") + + print(f" āœ“ Extracted {len(visual_elements)} visual elements") + + # Also capture the full slide background to ensure we don't miss any background colors + print(" šŸŽØ Capturing full slide background as fallback...") + try: + # Make all text AND visual elements transparent temporarily + await page.evaluate(r""" + () => { + function makeAllContentTransparent(element) { + if (element.nodeType === Node.TEXT_NODE) return; + if (element.nodeType === Node.ELEMENT_NODE) { + // Make text transparent + if (element.textContent && element.textContent.trim()) { + element.style.color = 'transparent'; + element.style.textShadow = 'none'; + element.style.webkitTextStroke = 'none'; + } + + // Make visual elements transparent (icons, graphics, borders, etc.) + const computedStyle = window.getComputedStyle(element); + const hasVisualContent = ( + computedStyle.backgroundImage !== 'none' || + computedStyle.backgroundColor !== 'rgba(0, 0, 0, 0)' || + computedStyle.borderStyle !== 'none' || + computedStyle.borderImage !== 'none' || + element.tagName === 'IMG' || + element.tagName === 'SVG' || + element.tagName === 'CANVAS' || + computedStyle.boxShadow !== 'none' || + computedStyle.borderRadius !== '0px' || + computedStyle.gradient !== 'none' + ); + + // Make visual elements transparent but keep their space + if (hasVisualContent) { + element.style.opacity = '0'; + element.style.pointerEvents = 'none'; + } + + // Process children + Array.from(element.children).forEach(makeAllContentTransparent); + } + } + makeAllContentTransparent(document.body); + } + """) + + await page.wait_for_timeout(1000) + + # Capture full slide + full_background_path = temp_dir / f"full_background_{html_path.stem}.png" + await page.screenshot( + path=str(full_background_path), + full_page=False, + clip={"x": 0, "y": 0, "width": 1920, "height": 1080} + ) + + # Add full background as first visual element (will be placed behind everything) + full_background_element = { + 'type': 'background', + 'x': 0, + 'y': 0, + 'width': 1920, + 'height': 1080, + 'tag': 'full_background', + 'image_path': full_background_path, + 'depth': -1 # Lowest depth to place behind everything + } + + visual_elements.insert(0, full_background_element) + print(f" āœ… Added full background fallback") + + except Exception as e: + print(f" āš ļø Could not capture full background: {e}") + + return visual_elements + + except Exception as e: + raise RuntimeError(f"Error extracting visual elements: {e}") + finally: + await page.close() + + async def capture_clean_background(self, browser, html_path: Path, temp_dir: Path, visual_elements: List[Dict]) -> Path: + """Capture the clean background with visual elements temporarily hidden.""" + page = await browser.new_page() + + try: + # Set exact viewport dimensions + await page.set_viewport_size({"width": 1920, "height": 1080}) + await page.emulate_media(media='screen') + + # Force device pixel ratio to 1 for exact measurements + await page.evaluate(r""" + () => { + Object.defineProperty(window, 'devicePixelRatio', { + get: () => 1 + }); + } + """) + + # Navigate to HTML file + file_url = f"file://{html_path.absolute()}" + await page.goto(file_url, wait_until="networkidle", timeout=30000) + + # Wait for fonts and content to load + await page.wait_for_timeout(5000) + + # Make text transparent AND hide visual elements to get clean background + await page.evaluate(r""" + (visualElementsData) => { + // Function to make text transparent + function makeTextTransparent(element) { + if (element.nodeType === Node.TEXT_NODE) return; + if (element.nodeType === Node.ELEMENT_NODE) { + const hasText = element.textContent && element.textContent.trim(); + if (hasText) { + element.style.color = 'transparent'; + element.style.textShadow = 'none'; + element.style.webkitTextStroke = 'none'; + } + Array.from(element.children).forEach(makeTextTransparent); + } + } + + // Function to hide visual elements (but preserve main background) + function hideVisualElements() { + visualElementsData.forEach(visualData => { + // Skip very large elements that are likely the main background + if (visualData.width > 1000 || visualData.height > 800) { + console.log('Skipping large element (likely background):', visualData); + return; + } + + // Find elements that match the visual element criteria + const elements = document.querySelectorAll('*'); + for (let el of elements) { + const rect = el.getBoundingClientRect(); + if (Math.abs(rect.left - visualData.x) < 2 && + Math.abs(rect.top - visualData.y) < 2 && + Math.abs(rect.width - visualData.width) < 2 && + Math.abs(rect.height - visualData.height) < 2) { + + // Hide this visual element + el.style.visibility = 'hidden'; + el.setAttribute('data-temporarily-hidden', 'true'); + break; + } + } + }); + } + + // Apply both transformations + makeTextTransparent(document.body); + hideVisualElements(); + + console.log('Made text transparent and hid visual elements for clean background'); + } + """, visual_elements) + + # Wait for changes to apply + await page.wait_for_timeout(2000) + + # Take clean background screenshot + background_path = temp_dir / f"clean_background_{html_path.stem}.png" + await page.screenshot( + path=str(background_path), + full_page=False, + clip={"x": 0, "y": 0, "width": 1920, "height": 1080} + ) + + # Verify the captured image + if background_path.exists(): + file_size = background_path.stat().st_size + print(f" āœ“ Captured clean background: {background_path.name} ({file_size} bytes)") + + # Debug: check if image is mostly black + try: + from PIL import Image + with Image.open(background_path) as img: + # Convert to RGB if needed + if img.mode != 'RGB': + img = img.convert('RGB') + + # Sample some pixels to check if image is mostly black + pixels = list(img.getdata()) + black_pixels = sum(1 for p in pixels if p[0] < 50 and p[1] < 50 and p[2] < 50) + total_pixels = len(pixels) + black_percentage = (black_pixels / total_pixels) * 100 + + print(f" šŸ“Š Image analysis: {black_percentage:.1f}% black pixels") + + if black_percentage > 80: + print(f" āš ļø WARNING: Image appears to be mostly black!") + + except Exception as e: + print(f" āš ļø Could not analyze image: {e}") + else: + print(f" āŒ Background image was not created!") + + return background_path + + except Exception as e: + raise RuntimeError(f"Error capturing clean background: {e}") + finally: + await page.close() + + async def extract_text_elements(self, browser, html_path: Path) -> List[TextElement]: + """Extract all text elements with precise positioning for editable text boxes.""" + page = await browser.new_page() + text_elements = [] + + try: + # Set exact viewport dimensions + await page.set_viewport_size({"width": 1920, "height": 1080}) + await page.emulate_media(media='screen') + + # Force device pixel ratio to 1 + await page.evaluate(r""" + () => { + Object.defineProperty(window, 'devicePixelRatio', { + get: () => 1 + }); + } + """) + + # Navigate to HTML file + file_url = f"file://{html_path.absolute()}" + await page.goto(file_url, wait_until="networkidle", timeout=30000) + + # Wait for fonts and content to load + await page.wait_for_timeout(5000) + + # Extract all text elements with precise positioning + text_data = await page.evaluate(r""" + () => { + function extractTextFromElement(element) { + if (!element || element.nodeType !== Node.ELEMENT_NODE) return []; + + const computedStyle = window.getComputedStyle(element); + const rect = element.getBoundingClientRect(); + + // Skip hidden elements + if (computedStyle.display === 'none' || computedStyle.visibility === 'hidden') return []; + if (rect.width === 0 || rect.height === 0) return []; + + const results = []; + + // Get direct text content (not from children) + let directText = ''; + for (let node of element.childNodes) { + if (node.nodeType === Node.TEXT_NODE) { + directText += node.textContent; + } + } + directText = directText.trim(); + + // If this element has direct text content, extract it + if (directText && directText.length > 0) { + const fontSizeMatch = computedStyle.fontSize.match(/([0-9.]+)px/); + const actualFontSize = fontSizeMatch ? parseFloat(fontSizeMatch[1]) : 16; + + // Get the actual text node position, not the container + const textNodes = []; + for (let node of element.childNodes) { + if (node.nodeType === Node.TEXT_NODE && node.textContent.trim()) { + const range = document.createRange(); + range.selectNodeContents(node); + const textRect = range.getBoundingClientRect(); + textNodes.push({ + text: node.textContent.trim(), + rect: textRect + }); + } + } + + // Use text node position if available, otherwise fall back to container + if (textNodes.length > 0) { + textNodes.forEach(textNode => { + results.push({ + text: textNode.text, + x: Math.round(textNode.rect.left * 100) / 100, + y: Math.round(textNode.rect.top * 100) / 100, + width: Math.round(textNode.rect.width * 100) / 100, + height: Math.round(textNode.rect.height * 100) / 100, + fontFamily: computedStyle.fontFamily, + actualFontSizePx: actualFontSize, + fontWeight: computedStyle.fontWeight, + color: computedStyle.color, + textAlign: computedStyle.textAlign, + lineHeight: computedStyle.lineHeight, + tag: element.tagName.toLowerCase() + }); + }); + } else { + // Fallback to container position + results.push({ + text: directText, + x: Math.round(rect.left * 100) / 100, + y: Math.round(rect.top * 100) / 100, + width: Math.round(rect.width * 100) / 100, + height: Math.round(rect.height * 100) / 100, + fontFamily: computedStyle.fontFamily, + actualFontSizePx: actualFontSize, + fontWeight: computedStyle.fontWeight, + color: computedStyle.color, + textAlign: computedStyle.textAlign, + lineHeight: computedStyle.lineHeight, + tag: element.tagName.toLowerCase() + }); + } + } + + // Process children for nested text + Array.from(element.children).forEach(child => { + results.push(...extractTextFromElement(child)); + }); + + return results; + } + + // Start extraction from body + const allTextElements = extractTextFromElement(document.body); + + // Sort by position (top to bottom, left to right) + allTextElements.sort((a, b) => { + if (Math.abs(a.y - b.y) < 5) { // Same line + return a.x - b.x; + } + return a.y - b.y; + }); + + return allTextElements; + } + """) + + # Convert to TextElement objects + for data in text_data: + if data and data['text']: + # Parse font family + font_family = data['fontFamily'] + if font_family: + font_family = font_family.split(',')[0].strip().strip('"\'') + font_family_map = { + 'roboto': 'Roboto', 'arial': 'Arial', 'helvetica': 'Helvetica', + 'sans-serif': 'Arial', 'serif': 'Times New Roman', 'monospace': 'Courier New' + } + font_family = font_family_map.get(font_family.lower(), font_family) + else: + font_family = 'Arial' + + # Parse line height + line_height = 1.2 + if data['lineHeight'] and data['lineHeight'] != 'normal': + if data['lineHeight'].endswith('px'): + px_value = float(data['lineHeight'][:-2]) + line_height = px_value / data['actualFontSizePx'] + else: + try: + line_height = float(data['lineHeight']) + except: + line_height = 1.2 + + text_element = TextElement( + text=data['text'], + x=data['x'], + y=data['y'], + width=data['width'], + height=data['height'], + font_family=font_family, + font_size=data['actualFontSizePx'] * 0.75, # Convert px to points + font_weight=data['fontWeight'], + color=data['color'], + text_align=data['textAlign'], + line_height=line_height, + tag=data['tag'] + ) + + text_elements.append(text_element) + + print(f" āœ“ Extracted {len(text_elements)} text elements") + return text_elements + + except Exception as e: + raise RuntimeError(f"Error extracting text elements: {e}") + finally: + await page.close() + + def create_text_box(self, slide, text_element: TextElement) -> None: + """Create an editable text box in PowerPoint with exact positioning.""" + # Convert pixel coordinates to inches + left = Inches(text_element.x / 96.0) + top = Inches(text_element.y / 96.0) + + # Calculate proper width - ensure it's wide enough for the text content + # Add some padding to prevent text wrapping + text_width = text_element.width + if text_width < 50: # If width is too small, estimate based on text length + # Rough estimate: each character needs about 8-12px depending on font + estimated_width = len(text_element.text) * 10 # 10px per character + text_width = max(text_width, estimated_width) + + # Add padding to prevent text from being cut off + padding = 8 # 8px padding on each side + final_width = text_width + (padding * 2) + + width = Inches(final_width / 96.0) + height = Inches(max(text_element.height, 10) / 96.0) + + print(f" šŸ“ Text: '{text_element.text[:40]}...' at ({text_element.x:.1f}, {text_element.y:.1f})") + + # Create text box + textbox = slide.shapes.add_textbox(left, top, width, height) + text_frame = textbox.text_frame + text_frame.clear() + + # Set text frame properties for exact positioning + text_frame.margin_left = Pt(0) + text_frame.margin_right = Pt(0) + text_frame.margin_top = Pt(0) + text_frame.margin_bottom = Pt(0) + text_frame.word_wrap = True + text_frame.auto_size = None + + # Add paragraph + p = text_frame.paragraphs[0] + p.text = text_element.text + + # Set text alignment + alignment_map = { + 'left': PP_ALIGN.LEFT, 'center': PP_ALIGN.CENTER, 'centre': PP_ALIGN.CENTER, + 'right': PP_ALIGN.RIGHT, 'justify': PP_ALIGN.JUSTIFY, 'start': PP_ALIGN.LEFT, + 'end': PP_ALIGN.RIGHT + } + p.alignment = alignment_map.get(text_element.text_align.lower(), PP_ALIGN.LEFT) + + # Set spacing + p.space_before = Pt(0) + p.space_after = Pt(0) + if hasattr(p, 'line_spacing'): + p.line_spacing = text_element.line_height + + # Set font properties + font = p.font + font.name = text_element.font_family + font.size = Pt(max(text_element.font_size, 8)) + font.bold = CSSParser.parse_font_weight(text_element.font_weight) + + # Set font color + try: + r, g, b = CSSParser.parse_color(text_element.color) + font.color.rgb = RGBColor(r, g, b) + except Exception as e: + print(f" Warning: Could not parse color '{text_element.color}': {e}") + font.color.rgb = RGBColor(0, 0, 0) + + # Make textbox transparent (no background, no border) + textbox.fill.background() + textbox.line.fill.background() + + def add_visual_element_to_slide(self, slide, visual_element: Dict) -> None: + """Add a visual element as an image to the PowerPoint slide with exact positioning.""" + if visual_element['image_path'].exists(): + # Convert pixel coordinates to inches + left = Inches(visual_element['x'] / 96.0) + top = Inches(visual_element['y'] / 96.0) + width = Inches(visual_element['width'] / 96.0) + height = Inches(visual_element['height'] / 96.0) + + # Add the image to the slide + picture = slide.shapes.add_picture(str(visual_element['image_path']), left, top, width, height) + + # Special handling for background elements + if visual_element['tag'] == 'clean_background': + # Ensure background is sent to back + picture.z_order = 0 + print(f" šŸŽØ Added background: {visual_element['image_path'].name} (z-order: {picture.z_order})") + else: + print(f" āœ… Added visual element: {visual_element['tag']} at ({visual_element['x']:.1f}, {visual_element['y']:.1f})") + else: + print(f" āš ļø Visual element image not found: {visual_element['image_path']}") + # Debug: check what files exist in temp_dir + temp_dir = visual_element['image_path'].parent + if temp_dir.exists(): + print(f" šŸ“ Files in temp directory: {list(temp_dir.glob('*.png'))}") + + async def build_slide_from_analysis(self, presentation, slide_analysis: Dict, temp_dir: Path) -> None: + """Build a PowerPoint slide from pre-analyzed data (fast, no browser operations).""" + slide_info = slide_analysis['slide_info'] + visual_elements = slide_analysis['visual_elements'] + background_path = slide_analysis['background_path'] + text_elements = slide_analysis['text_elements'] + + slide_num = slide_info['number'] + print(f" šŸ—ļø Building slide {slide_num}: {slide_info['title']}") + + # Add blank slide + blank_slide_layout = presentation.slide_layouts[6] # Blank layout + slide = presentation.slides.add_slide(blank_slide_layout) + + # Step 1: Add the clean background as the base layer + print(f" šŸ–¼ļø Adding background...") + background_element = { + 'type': 'background', + 'x': 0, + 'y': 0, + 'width': 1920, + 'height': 1080, + 'tag': 'clean_background', + 'image_path': background_path, + 'depth': -1 + } + self.add_visual_element_to_slide(slide, background_element) + + # Step 2: Add individual visual elements on top of background + print(f" šŸ–¼ļø Adding {len(visual_elements)} visual elements...") + for visual_element in visual_elements: + # Skip the full background we already added + if visual_element['tag'] != 'full_background': + self.add_visual_element_to_slide(slide, visual_element) + + # Step 3: Create editable text boxes on top of everything + print(f" āœļø Adding {len(text_elements)} editable text elements...") + for text_element in text_elements: + self.create_text_box(slide, text_element) + + print(f" šŸŽ‰ Slide {slide_num} built successfully!") + + async def convert_slide_perfect(self, browser, slide_info: Dict, presentation, temp_dir: Path) -> None: + """Convert a single HTML slide using perfect 1:1 approach.""" + html_path = slide_info['path'] + slide_num = slide_info['number'] + + print(f"Converting slide {slide_num}: {slide_info['title']} (Perfect 1:1 Mode)") + + # Add blank slide + blank_slide_layout = presentation.slide_layouts[6] # Blank layout + slide = presentation.slides.add_slide(blank_slide_layout) + + # Step 1: Extract individual visual elements (icons, graphics, borders, etc.) + print(" šŸŽØ Extracting individual visual elements...") + visual_elements = await self.extract_visual_elements(browser, html_path, temp_dir) + + # Step 2: Capture clean background with visual elements temporarily hidden + print(" šŸŽØ Capturing clean background (visual elements hidden)...") + background_path = await self.capture_clean_background(browser, html_path, temp_dir, visual_elements) + + # Step 3: Add the clean background as the base layer + print(" šŸ–¼ļø Adding clean background as base layer...") + background_element = { + 'type': 'background', + 'x': 0, + 'y': 0, + 'width': 1920, + 'height': 1080, + 'tag': 'clean_background', + 'image_path': background_path, + 'depth': -1 + } + self.add_visual_element_to_slide(slide, background_element) + + # Step 4: Add individual visual elements on top of background + print(" šŸ–¼ļø Adding individual visual elements...") + for visual_element in visual_elements: + # Skip the full background we already added + if visual_element['tag'] != 'full_background': + self.add_visual_element_to_slide(slide, visual_element) + + # Step 5: Extract and add editable text elements + print(" šŸ“ Extracting editable text elements...") + text_elements = await self.extract_text_elements(browser, html_path) + + # Step 6: Create editable text boxes on top of everything + print(" āœļø Adding editable text overlays...") + for text_element in text_elements: + self.create_text_box(slide, text_element) + + print(f" šŸŽ‰ Slide {slide_num}: Perfect background + {len(visual_elements)} visual elements + {len(text_elements)} editable text elements") + + async def convert_to_pptx_perfect(self, store_locally: bool = True) -> tuple: + """Main perfect conversion method with concurrent processing.""" + print("šŸŽÆ Starting PERFECT 1:1 HTML to PPTX conversion...") + print("šŸ“‹ Method: Individual visual elements + Editable text overlay") + print("šŸ”„ Processing: Concurrent slide analysis + Sequential PPTX building") + print("=" * 80) + + # Load metadata + self.load_metadata() + + # Create temporary directory for images + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Launch browser for processing + async with async_playwright() as p: + print("🌐 Launching browser for concurrent processing...") + browser = await p.chromium.launch( + headless=True, + args=[ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-gpu', + '--force-device-scale-factor=1', + '--disable-background-timer-throttling', + '--disable-backgrounding-occluded-windows', + '--disable-renderer-backgrounding', + '--disable-features=VizDisplayCompositor' + ] + ) + + try: + # STEP 1: Concurrent slide analysis (extract visual elements, capture backgrounds, extract text) + print(f"šŸ“„ Processing {len(self.slides_info)} slides concurrently...") + + async def analyze_slide(slide_info: Dict) -> Dict: + """Analyze a single slide concurrently - extract all data needed for PPTX creation.""" + html_path = slide_info['path'] + slide_num = slide_info['number'] + + print(f" šŸ” Analyzing slide {slide_num}: {slide_info['title']}") + + # Extract visual elements + visual_elements = await self.extract_visual_elements(browser, html_path, temp_path) + + # Capture clean background + background_path = await self.capture_clean_background(browser, html_path, temp_path, visual_elements) + + # Extract text elements + text_elements = await self.extract_text_elements(browser, html_path) + + return { + 'slide_info': slide_info, + 'visual_elements': visual_elements, + 'background_path': background_path, + 'text_elements': text_elements + } + + # Process all slides concurrently using asyncio.gather + analysis_tasks = [ + analyze_slide(slide_info) for slide_info in self.slides_info + ] + + # Wait for all slides to be analyzed concurrently + slide_analyses = await asyncio.gather(*analysis_tasks) + + print(f"āœ… All {len(slide_analyses)} slides analyzed concurrently!") + + finally: + await browser.close() + + # STEP 2: Sequential PPTX building (using the analyzed data) + print("šŸ—ļø Building PowerPoint presentation from analyzed data...") + + # Create new PowerPoint presentation + presentation = Presentation() + + # Set slide dimensions to 1920x1080 (16:9) + presentation.slide_width = Inches(20) # 1920px at 96 DPI + presentation.slide_height = Inches(11.25) # 1080px at 96 DPI + + # Remove default slide + if len(presentation.slides) > 0: + xml_slides = presentation.slides._sldIdLst + xml_slides.remove(xml_slides[0]) + + # Build slides using the analyzed data (this is fast since all heavy work is done) + for slide_analysis in slide_analyses: + await self.build_slide_from_analysis(presentation, slide_analysis, temp_path) + + # Save PowerPoint presentation to temp (INSIDE the temp directory context) + presentation_name = self.metadata.get('presentation_name', 'presentation') + temp_output_path = temp_path / f"{presentation_name}.pptx" + + print(f" šŸ’¾ Saving presentation to temp: {temp_output_path}") + print(f" šŸ“ Temp directory contents before save: {list(temp_path.glob('*'))}") + + presentation.save(str(temp_output_path)) + + print(f" āœ… Presentation saved to temp") + print(f" šŸ“ Temp directory contents after save: {list(temp_path.glob('*'))}") + print(f" šŸ” Checking if temp file exists: {temp_output_path.exists()}") + if temp_output_path.exists(): + print(f" šŸ“ Temp file size: {temp_output_path.stat().st_size} bytes") + else: + print(f" āŒ Temp file does not exist!") + + if store_locally: + # Store in the static files directory for URL serving + timestamp = int(asyncio.get_event_loop().time()) + filename = f"{presentation_name}_{timestamp}.pptx" + final_output = output_dir / filename + final_output.parent.mkdir(exist_ok=True) + + # Copy from temp to final location + import shutil + shutil.copy2(temp_output_path, final_output) + + print(f"\nšŸŽ‰ PERFECT 1:1 PPTX created successfully: {final_output}") + print(f"šŸ“Š Total slides: {len(presentation.slides)}") + print(f"✨ Perfect visual fidelity + Fully editable text!") + print(f"šŸš€ Concurrent processing completed!") + + return final_output, len(presentation.slides) + else: + # For direct download, read content from temp file before cleanup + print(f" šŸ“– Reading file content for download...") + try: + with open(temp_output_path, 'rb') as f: + pptx_content = f.read() + print(f" āœ… File content read successfully: {len(pptx_content)} bytes") + except Exception as e: + print(f" āŒ Error reading file: {e}") + print(f" šŸ“ Final temp directory contents: {list(temp_path.glob('*'))}") + raise + + print(f"\nšŸŽ‰ PERFECT 1:1 PPTX content prepared for download") + print(f"šŸ“Š Total slides: {len(presentation.slides)}") + print(f"✨ Perfect visual fidelity + Fully editable text!") + print(f"šŸš€ Concurrent processing completed!") + + return pptx_content, len(presentation.slides), presentation_name + + +@router.post("/convert-to-pptx") +async def convert_presentation_to_pptx(request: ConvertRequest): + """ + Convert HTML presentation to PPTX with perfect 1:1 fidelity and async processing. + + Takes a presentation folder path and returns either: + - PPTX file directly (if download=true) - uses presentation name as filename + - JSON response with download URL (if download=false, default) + """ + try: + print(f"šŸ“„ Received PPTX conversion request for: {request.presentation_path}") + + # Create converter + converter = PerfectHTMLToPPTXConverter(request.presentation_path) + + # If download is requested, don't store locally and return file directly + if request.download: + pptx_content, total_slides, presentation_name = await converter.convert_to_pptx_perfect(store_locally=False) + + print(f"✨ Direct download conversion completed for: {presentation_name}") + + return Response( + content=pptx_content, + media_type="application/vnd.openxmlformats-officedocument.presentationml.presentation", + headers={"Content-Disposition": f"attachment; filename=\"{presentation_name}.pptx\""} + ) + + # Otherwise, store locally and return JSON with download URL + pptx_path, total_slides = await converter.convert_to_pptx_perfect(store_locally=True) + + print(f"✨ Conversion completed: {pptx_path}") + + pptx_url = f"/downloads/{pptx_path.name}" + + return ConvertResponse( + success=True, + message=f"PPTX generated successfully with {total_slides} slides", + pptx_url=pptx_url, + filename=pptx_path.name, + total_slides=total_slides + ) + + except FileNotFoundError as e: + raise HTTPException(status_code=404, detail=str(e)) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + print(f"āŒ PPTX conversion error: {e}") + raise HTTPException(status_code=500, detail=f"PPTX conversion failed: {str(e)}") + + +@router.get("/health") +async def pptx_health_check(): + """PPTX service health check endpoint.""" + return {"status": "healthy", "service": "HTML to PPTX Converter"} diff --git a/backend/sandbox/docker/requirements.txt b/backend/sandbox/docker/requirements.txt index f906d34e..df40156b 100644 --- a/backend/sandbox/docker/requirements.txt +++ b/backend/sandbox/docker/requirements.txt @@ -7,4 +7,5 @@ pytesseract==0.3.13 pandas==2.3.0 playwright>=1.40.0 PyPDF2>=3.0.0 -bs4==0.0.2 \ No newline at end of file +bs4==0.0.2 +python-pptx>=0.6.23 \ No newline at end of file diff --git a/backend/sandbox/docker/server.py b/backend/sandbox/docker/server.py index d51411ea..24d93c80 100644 --- a/backend/sandbox/docker/server.py +++ b/backend/sandbox/docker/server.py @@ -5,9 +5,10 @@ import uvicorn import os from pathlib import Path -# Import PDF router and Visual HTML Editor router +# Import PDF router, PPTX router, and Visual HTML Editor router from html_to_pdf_router import router as pdf_router from visual_html_editor_router import router as editor_router +from html_to_pptx_router import router as pptx_router # Ensure we're serving from the /workspace directory workspace_dir = "/workspace" @@ -26,6 +27,7 @@ app.add_middleware(WorkspaceDirMiddleware) # Include routers app.include_router(pdf_router) app.include_router(editor_router) +app.include_router(pptx_router) # Create output directory for generated PDFs (needed by PDF router) output_dir = Path("generated_pdfs") diff --git a/backend/sandbox/docker/visual_html_editor_router.py b/backend/sandbox/docker/visual_html_editor_router.py index cbd229ae..ecf1a659 100644 --- a/backend/sandbox/docker/visual_html_editor_router.py +++ b/backend/sandbox/docker/visual_html_editor_router.py @@ -14,7 +14,7 @@ from fastapi import APIRouter, HTTPException from fastapi.responses import HTMLResponse from fastapi.staticfiles import StaticFiles from pydantic import BaseModel -from bs4 import BeautifulSoup, NavigableString +from bs4 import BeautifulSoup, NavigableString, Comment # Create router router = APIRouter(prefix="/api/html", tags=["visual-editor"]) @@ -78,6 +78,24 @@ async def get_editable_elements(file_path: str): # Find all elements that could contain text all_elements = soup.find_all(TEXT_ELEMENTS + ['div']) + # Filter out elements that only contain comments + filtered_elements = [] + for element in all_elements: + # Check if element only contains comments + only_comments = True + for child in element.children: + if isinstance(child, Comment): + continue + if isinstance(child, NavigableString) and not child.strip(): + continue + only_comments = False + break + + if not only_comments: + filtered_elements.append(element) + + all_elements = filtered_elements + for element in all_elements: # Strategy 1: Elements with ONLY text content (no child elements) if element.string and element.string.strip(): @@ -99,6 +117,9 @@ async def get_editable_elements(file_path: str): has_mixed_content = False # Process each child node for child in list(element.contents): # Use list() to avoid modification during iteration + # Skip comment nodes (Comments are a subclass of NavigableString) + if isinstance(child, Comment): + continue # Check if it's a NavigableString (raw text) with actual content if (isinstance(child, NavigableString) and child.strip()): @@ -335,6 +356,24 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str: # Find all elements that could contain text all_elements = soup.find_all(TEXT_ELEMENTS + ['div']) + # Filter out elements that only contain comments + filtered_elements = [] + for element in all_elements: + # Check if element only contains comments + only_comments = True + for child in element.children: + if isinstance(child, Comment): + continue + if isinstance(child, NavigableString) and not child.strip(): + continue + only_comments = False + break + + if not only_comments: + filtered_elements.append(element) + + all_elements = filtered_elements + for element in all_elements: # Strategy 1: Elements with ONLY text content (no child elements) if element.string and element.string.strip(): @@ -347,6 +386,9 @@ def inject_editor_functionality(html_content: str, file_path: str) -> str: has_mixed_content = False # Process each child node for child in list(element.contents): # Use list() to avoid modification during iteration + # Skip comment nodes (Comments are a subclass of NavigableString) + if isinstance(child, Comment): + continue # Check if it's a NavigableString (raw text) with actual content if (isinstance(child, NavigableString) and child.strip()):