suna/backend/sandbox/docker/presentation-processing-wip/html_to_pptx_perfect_wip.py

690 lines
27 KiB
Python

#!/usr/bin/env python3
"""
HTML Presentation to PPTX Converter - Perfect 1:1 Approach
This script provides PERFECT 1:1 conversion by:
- Capturing the ENTIRE slide as a pixel-perfect background image (including all icons, gradients, decorations)
- Making text transparent for the background capture
- Extracting text elements separately for editable PowerPoint text boxes
- Overlaying editable text on the perfect background
Usage:
python html_to_pptx_perfect.py [presentation_directory] [output_pptx_path]
Example:
python html_to_pptx_perfect.py . perfect_presentation.pptx
"""
import json
import os
import sys
import re
import asyncio
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Any
import tempfile
import subprocess
from dataclasses import dataclass
import base64
import io
try:
from playwright.async_api import async_playwright
except ImportError:
print("Error: Playwright is not installed. Please install it with:")
print("pip install playwright")
print("playwright install chromium")
sys.exit(1)
try:
from bs4 import BeautifulSoup, Tag
except ImportError:
print("Error: BeautifulSoup is not installed. Please install it with:")
print("pip install beautifulsoup4")
sys.exit(1)
try:
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
except ImportError as e:
print("Error: python-pptx is not installed or has missing components. Please install it with:")
print("pip install python-pptx")
print(f"Import error: {e}")
sys.exit(1)
try:
from PIL import Image, ImageDraw, ImageFont
except ImportError:
print("Error: Pillow is not installed. Please install it with:")
print("pip install Pillow")
sys.exit(1)
@dataclass
class TextElement:
"""Text element information for editable text boxes"""
text: str
x: float
y: float
width: float
height: float
font_family: str
font_size: float
font_weight: str
color: str
text_align: str
line_height: float
tag: str
class CSSParser:
"""Parse CSS styles and convert values to appropriate units"""
@staticmethod
def parse_color(color_str: str) -> Tuple[int, int, int]:
"""Parse CSS color string to RGB tuple"""
if not color_str:
return (0, 0, 0)
color_str = color_str.strip().lower()
# Handle hex colors
if color_str.startswith('#'):
hex_color = color_str[1:]
if len(hex_color) == 3:
hex_color = ''.join([c*2 for c in hex_color])
try:
return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
except ValueError:
return (0, 0, 0)
# Handle rgb() colors
rgb_match = re.match(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
if rgb_match:
return tuple(int(x) for x in rgb_match.groups())
# Handle rgba() colors (ignore alpha for now)
rgba_match = re.match(r'rgba\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*[\d.]+\s*\)', color_str)
if rgba_match:
return tuple(int(x) for x in rgba_match.groups())
# Named colors
named_colors = {
'black': (0, 0, 0), 'white': (255, 255, 255), 'red': (255, 0, 0),
'green': (0, 128, 0), 'blue': (0, 0, 255), 'yellow': (255, 255, 0),
'cyan': (0, 255, 255), 'magenta': (255, 0, 255), 'gray': (128, 128, 128),
'grey': (128, 128, 128), 'orange': (255, 165, 0), 'purple': (128, 0, 128)
}
return named_colors.get(color_str, (0, 0, 0))
@staticmethod
def parse_font_weight(weight_str: str) -> bool:
"""Parse font weight to bold boolean"""
if not weight_str:
return False
weight_str = weight_str.strip().lower()
bold_weights = ['bold', 'bolder', '700', '800', '900']
return weight_str in bold_weights or (weight_str.isdigit() and int(weight_str) >= 700)
class PerfectHTMLToPPTXConverter:
def __init__(self, presentation_dir: str, output_path: str = None):
"""
Initialize the perfect converter.
Args:
presentation_dir: Directory containing metadata.json and HTML slides
output_path: Output PPTX file path (optional)
"""
self.presentation_dir = Path(presentation_dir).resolve()
self.metadata_path = self.presentation_dir / "metadata.json"
self.output_path = output_path
self.metadata = None
self.slides_info = []
# Validate inputs
if not self.presentation_dir.exists():
raise FileNotFoundError(f"Presentation directory not found: {self.presentation_dir}")
if not self.metadata_path.exists():
raise FileNotFoundError(f"metadata.json not found in: {self.presentation_dir}")
def load_metadata(self) -> Dict:
"""Load and parse metadata.json"""
try:
with open(self.metadata_path, 'r', encoding='utf-8') as f:
self.metadata = json.load(f)
# Extract slide information and sort by slide number
slides = self.metadata.get('slides', {})
self.slides_info = []
for slide_num, slide_data in slides.items():
filename = slide_data.get('filename')
title = slide_data.get('title', f'Slide {slide_num}')
if filename:
html_path = self.presentation_dir / filename
if html_path.exists():
self.slides_info.append({
'number': int(slide_num),
'title': title,
'filename': filename,
'path': html_path
})
else:
print(f"Warning: HTML file not found: {html_path}")
# Sort slides by number
self.slides_info.sort(key=lambda x: x['number'])
if not self.slides_info:
raise ValueError("No valid slides found in metadata.json")
# Set default output path if not provided
if not self.output_path:
presentation_name = self.metadata.get('presentation_name', 'presentation')
self.output_path = self.presentation_dir / f"{presentation_name}_perfect.pptx"
else:
self.output_path = Path(self.output_path).resolve()
print(f"Loaded {len(self.slides_info)} slides from metadata")
return self.metadata
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in metadata.json: {e}")
except Exception as e:
raise ValueError(f"Error loading metadata: {e}")
async def capture_perfect_background(self, browser, html_path: Path, temp_dir: Path) -> Path:
"""
Capture the entire slide as a perfect background image with text made transparent.
Args:
browser: Playwright browser instance
html_path: Path to HTML file
temp_dir: Temporary directory for images
Returns:
Path to the perfect background image
"""
page = await browser.new_page()
try:
# Set exact viewport dimensions
await page.set_viewport_size({"width": 1920, "height": 1080})
await page.emulate_media(media='screen')
# Force device pixel ratio to 1 for exact measurements
await page.evaluate(r"""
() => {
Object.defineProperty(window, 'devicePixelRatio', {
get: () => 1
});
}
""")
# Navigate to HTML file
file_url = f"file://{html_path.absolute()}"
await page.goto(file_url, wait_until="networkidle", timeout=30000)
# Wait for fonts and content to load
await page.wait_for_timeout(5000)
# Make ALL text transparent while preserving layout and everything else
await page.evaluate(r"""
() => {
// Function to make text transparent while keeping all visual elements
function makeTextTransparent(element) {
if (element.nodeType === Node.TEXT_NODE) {
// Don't remove text nodes, just make them invisible
return;
} else if (element.nodeType === Node.ELEMENT_NODE) {
const computed = window.getComputedStyle(element);
// If this element contains text, make the text transparent
// but preserve all other styling (backgrounds, borders, etc.)
const hasText = element.textContent && element.textContent.trim();
if (hasText) {
// Store original color for later if needed
const originalColor = computed.color;
element.setAttribute('data-original-color', originalColor);
// Make text transparent but keep everything else
element.style.color = 'transparent';
element.style.textShadow = 'none';
element.style.webkitTextStroke = 'none';
}
// Process children
Array.from(element.children).forEach(makeTextTransparent);
}
}
// Apply to entire document
makeTextTransparent(document.body);
console.log('Made all text transparent while preserving visual elements');
}
""")
# Wait for changes to apply
await page.wait_for_timeout(2000)
# Take perfect screenshot
background_path = temp_dir / f"perfect_background_{html_path.stem}.png"
await page.screenshot(
path=str(background_path),
full_page=False,
clip={"x": 0, "y": 0, "width": 1920, "height": 1080}
)
print(f" ✓ Captured perfect background: {background_path.name}")
return background_path
except Exception as e:
raise RuntimeError(f"Error capturing perfect background: {e}")
finally:
await page.close()
async def extract_text_elements(self, browser, html_path: Path) -> List[TextElement]:
"""
Extract all text elements with precise positioning for editable text boxes.
Args:
browser: Playwright browser instance
html_path: Path to HTML file
Returns:
List of TextElement objects
"""
page = await browser.new_page()
text_elements = []
try:
# Set exact viewport dimensions
await page.set_viewport_size({"width": 1920, "height": 1080})
await page.emulate_media(media='screen')
# Force device pixel ratio to 1
await page.evaluate(r"""
() => {
Object.defineProperty(window, 'devicePixelRatio', {
get: () => 1
});
}
""")
# Navigate to HTML file
file_url = f"file://{html_path.absolute()}"
await page.goto(file_url, wait_until="networkidle", timeout=30000)
# Wait for fonts and content to load
await page.wait_for_timeout(5000)
# Extract all text elements with precise positioning
text_data = await page.evaluate(r"""
() => {
function extractTextFromElement(element) {
if (!element || element.nodeType !== Node.ELEMENT_NODE) return [];
const computedStyle = window.getComputedStyle(element);
const rect = element.getBoundingClientRect();
// Skip hidden elements
if (computedStyle.display === 'none' || computedStyle.visibility === 'hidden') return [];
if (rect.width === 0 || rect.height === 0) return [];
const results = [];
// Get direct text content (not from children)
let directText = '';
for (let node of element.childNodes) {
if (node.nodeType === Node.TEXT_NODE) {
directText += node.textContent;
}
}
directText = directText.trim();
// If this element has direct text content, extract it
if (directText && directText.length > 0) {
const fontSizeMatch = computedStyle.fontSize.match(/([0-9.]+)px/);
const actualFontSize = fontSizeMatch ? parseFloat(fontSizeMatch[1]) : 16;
results.push({
text: directText,
x: Math.round(rect.left * 100) / 100,
y: Math.round(rect.top * 100) / 100,
width: Math.round(rect.width * 100) / 100,
height: Math.round(rect.height * 100) / 100,
fontFamily: computedStyle.fontFamily,
actualFontSizePx: actualFontSize,
fontWeight: computedStyle.fontWeight,
color: computedStyle.color,
textAlign: computedStyle.textAlign,
lineHeight: computedStyle.lineHeight,
tag: element.tagName.toLowerCase()
});
}
// Process children for nested text
Array.from(element.children).forEach(child => {
results.push(...extractTextFromElement(child));
});
return results;
}
// Start extraction from body
const allTextElements = extractTextFromElement(document.body);
// Sort by position (top to bottom, left to right)
allTextElements.sort((a, b) => {
if (Math.abs(a.y - b.y) < 5) { // Same line
return a.x - b.x;
}
return a.y - b.y;
});
return allTextElements;
}
""")
# Convert to TextElement objects
for data in text_data:
if data and data['text']:
# Parse font family
font_family = data['fontFamily']
if font_family:
font_family = font_family.split(',')[0].strip().strip('"\'')
font_family_map = {
'roboto': 'Roboto', 'arial': 'Arial', 'helvetica': 'Helvetica',
'sans-serif': 'Arial', 'serif': 'Times New Roman', 'monospace': 'Courier New'
}
font_family = font_family_map.get(font_family.lower(), font_family)
else:
font_family = 'Arial'
# Parse line height
line_height = 1.2
if data['lineHeight'] and data['lineHeight'] != 'normal':
if data['lineHeight'].endswith('px'):
px_value = float(data['lineHeight'][:-2])
line_height = px_value / data['actualFontSizePx']
else:
try:
line_height = float(data['lineHeight'])
except:
line_height = 1.2
text_element = TextElement(
text=data['text'],
x=data['x'],
y=data['y'],
width=data['width'],
height=data['height'],
font_family=font_family,
font_size=data['actualFontSizePx'] * 0.75, # Convert px to points
font_weight=data['fontWeight'],
color=data['color'],
text_align=data['textAlign'],
line_height=line_height,
tag=data['tag']
)
text_elements.append(text_element)
print(f" ✓ Extracted {len(text_elements)} text elements")
return text_elements
except Exception as e:
raise RuntimeError(f"Error extracting text elements: {e}")
finally:
await page.close()
def create_text_box(self, slide, text_element: TextElement) -> None:
"""
Create an editable text box in PowerPoint with exact positioning.
Args:
slide: PowerPoint slide object
text_element: TextElement with positioning and styling
"""
# Convert pixel coordinates to inches
left = Inches(text_element.x / 96.0)
top = Inches(text_element.y / 96.0)
width = Inches(max(text_element.width, 10) / 96.0)
height = Inches(max(text_element.height, 10) / 96.0)
print(f" 📝 Text: '{text_element.text[:40]}...' at ({text_element.x:.1f}, {text_element.y:.1f})")
# Create text box
textbox = slide.shapes.add_textbox(left, top, width, height)
text_frame = textbox.text_frame
text_frame.clear()
# Set text frame properties for exact positioning
text_frame.margin_left = Pt(0)
text_frame.margin_right = Pt(0)
text_frame.margin_top = Pt(0)
text_frame.margin_bottom = Pt(0)
text_frame.word_wrap = True
text_frame.auto_size = None
# Add paragraph
p = text_frame.paragraphs[0]
p.text = text_element.text
# Set text alignment
alignment_map = {
'left': PP_ALIGN.LEFT, 'center': PP_ALIGN.CENTER, 'centre': PP_ALIGN.CENTER,
'right': PP_ALIGN.RIGHT, 'justify': PP_ALIGN.JUSTIFY, 'start': PP_ALIGN.LEFT,
'end': PP_ALIGN.RIGHT
}
p.alignment = alignment_map.get(text_element.text_align.lower(), PP_ALIGN.LEFT)
# Set spacing
p.space_before = Pt(0)
p.space_after = Pt(0)
if hasattr(p, 'line_spacing'):
p.line_spacing = text_element.line_height
# Set font properties
font = p.font
font.name = text_element.font_family
font.size = Pt(max(text_element.font_size, 8))
font.bold = CSSParser.parse_font_weight(text_element.font_weight)
# Set font color
try:
r, g, b = CSSParser.parse_color(text_element.color)
font.color.rgb = RGBColor(r, g, b)
except Exception as e:
print(f" Warning: Could not parse color '{text_element.color}': {e}")
font.color.rgb = RGBColor(0, 0, 0)
# Make textbox transparent (no background, no border)
textbox.fill.background()
textbox.line.fill.background()
async def convert_slide_perfect(self, browser, slide_info: Dict, presentation, temp_dir: Path) -> None:
"""
Convert a single HTML slide using perfect 1:1 approach.
Args:
browser: Playwright browser instance
slide_info: Slide information dictionary
presentation: PowerPoint presentation object
temp_dir: Temporary directory for images
"""
html_path = slide_info['path']
slide_num = slide_info['number']
print(f"Converting slide {slide_num}: {slide_info['title']} (Perfect 1:1 Mode)")
# Add blank slide
blank_slide_layout = presentation.slide_layouts[6] # Blank layout
slide = presentation.slides.add_slide(blank_slide_layout)
# Step 1: Capture perfect background (everything except text)
print(" 🎨 Capturing PERFECT background with all visual elements...")
background_image_path = await self.capture_perfect_background(browser, html_path, temp_dir)
# Step 2: Add perfect background to slide
if background_image_path and background_image_path.exists():
left = Inches(0)
top = Inches(0)
width = Inches(20) # 1920px at 96 DPI
height = Inches(11.25) # 1080px at 96 DPI
picture = slide.shapes.add_picture(str(background_image_path), left, top, width, height)
print(f" ✅ Perfect background added (1920x1080)")
# Step 3: Extract and add editable text elements
print(" 📝 Extracting editable text elements...")
text_elements = await self.extract_text_elements(browser, html_path)
# Step 4: Create editable text boxes on top of perfect background
print(" ✍️ Adding editable text overlays...")
for text_element in text_elements:
self.create_text_box(slide, text_element)
print(f" 🎉 Slide {slide_num}: PERFECT background + {len(text_elements)} editable text elements")
async def convert_to_pptx_perfect(self) -> None:
"""Main perfect conversion method"""
print("🎯 Starting PERFECT 1:1 HTML to PPTX conversion...")
print("📋 Method: Perfect background rasterization + Editable text overlay")
print("=" * 80)
# Load metadata
self.load_metadata()
# Create new PowerPoint presentation
presentation = Presentation()
# Set slide dimensions to 1920x1080 (16:9)
presentation.slide_width = Inches(20) # 1920px at 96 DPI
presentation.slide_height = Inches(11.25) # 1080px at 96 DPI
# Remove default slide
if len(presentation.slides) > 0:
xml_slides = presentation.slides._sldIdLst
xml_slides.remove(xml_slides[0])
# Create temporary directory for images
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
# Launch browser for processing
async with async_playwright() as p:
print("🌐 Launching browser for perfect processing...")
browser = await p.chromium.launch(
headless=True,
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--force-device-scale-factor=1',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-features=VizDisplayCompositor'
]
)
try:
# Process each slide
for slide_info in self.slides_info:
await self.convert_slide_perfect(browser, slide_info, presentation, temp_path)
finally:
await browser.close()
# Save PowerPoint presentation
presentation.save(str(self.output_path))
print(f"\n🎉 PERFECT 1:1 PPTX created successfully: {self.output_path}")
print(f"📊 Total slides: {len(presentation.slides)}")
print(f"✨ Perfect visual fidelity + Fully editable text!")
def check_dependencies():
"""Check if required dependencies are available"""
missing_deps = []
try:
import playwright
except ImportError:
missing_deps.append("playwright (pip install playwright)")
try:
from pptx import Presentation
except ImportError:
missing_deps.append("python-pptx (pip install python-pptx)")
try:
from PIL import Image
except ImportError:
missing_deps.append("Pillow (pip install Pillow)")
if missing_deps:
print("❌ Missing dependencies:")
for dep in missing_deps:
print(f" - {dep}")
print("\nPlease install missing dependencies and try again.")
return False
return True
def main():
"""Main CLI entry point"""
print("🎯 HTML Presentation to PPTX Converter - PERFECT 1:1 MODE")
print("=" * 80)
print("🎨 Perfect background capture + Editable text overlay")
print()
# Check dependencies
if not check_dependencies():
sys.exit(1)
# Parse command line arguments
if len(sys.argv) < 2:
presentation_dir = "."
output_path = None
elif len(sys.argv) == 2:
presentation_dir = sys.argv[1]
output_path = None
elif len(sys.argv) == 3:
presentation_dir = sys.argv[1]
output_path = sys.argv[2]
else:
print("Usage: python html_to_pptx_perfect.py [presentation_directory] [output_pptx_path]")
print("\nExamples:")
print(" python html_to_pptx_perfect.py")
print(" python html_to_pptx_perfect.py . perfect_presentation.pptx")
print(" python html_to_pptx_perfect.py /path/to/slides output.pptx")
sys.exit(1)
try:
# Create converter and run
converter = PerfectHTMLToPPTXConverter(presentation_dir, output_path)
asyncio.run(converter.convert_to_pptx_perfect())
except KeyboardInterrupt:
print("\n❌ Conversion cancelled by user")
sys.exit(1)
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()