Merge pull request #1734 from KrishavRajSingh/fix/upload_svg

load svg images
This commit is contained in:
Krishav 2025-09-26 22:26:58 +05:30 committed by GitHub
commit 18c6d452fb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 261 additions and 14 deletions

View File

@ -20,7 +20,7 @@ You can modify the sandbox environment for development or to add new capabilitie
```
cd backend/sandbox/docker
docker compose build
docker push kortix/suna:0.1.3.19
docker push kortix/suna:0.1.3.20
```
3. Test your changes locally using docker-compose

View File

@ -31,6 +31,7 @@ class BrowserAutomation {
this.router.post('/screenshot', this.screenshot.bind(this));
this.router.post('/act', this.act.bind(this));
this.router.post('/extract', this.extract.bind(this));
this.router.post('/convert-svg', this.convertSvg.bind(this));
}
@ -306,6 +307,91 @@ class BrowserAutomation {
}
}
async convertSvg(req: express.Request, res: express.Response) {
console.log(`Converting SVG to PNG: ${JSON.stringify(req.body)}`);
try {
if (!this.browserInitialized || !this.page) {
res.status(500).json({
success: false,
message: "Browser not initialized",
error: "Browser must be initialized before converting SVG",
url: "",
title: ""
} as BrowserActionResult);
return;
}
const { svg_file_path } = req.body;
if (!svg_file_path) {
res.status(400).json({
success: false,
message: "SVG file path is required",
error: "svg_file_path parameter is missing",
url: "",
title: ""
} as BrowserActionResult);
return;
}
// Navigate to the SVG file
const fileUrl = `file://${svg_file_path}`;
await this.page.goto(fileUrl, { waitUntil: 'domcontentloaded', timeout: 10000 });
// Wait for any potential loading/animations
await this.page.waitForTimeout(500);
let screenshot_base64: string;
// Try to get the SVG element and take a screenshot of just that element
const svgElement = await this.page.locator('svg').first();
const svgCount = await this.page.locator('svg').count();
if (svgCount > 0) {
// Get bounding box to check if element is visible
const bbox = await svgElement.boundingBox();
if (bbox && bbox.width > 0 && bbox.height > 0) {
// Take screenshot of just the SVG element
const screenshotBuffer = await svgElement.screenshot({ type: 'png' });
screenshot_base64 = screenshotBuffer.toString('base64');
} else {
// Fallback to full page screenshot
const screenshotBuffer = await this.page.screenshot({ fullPage: true, type: 'png' });
screenshot_base64 = screenshotBuffer.toString('base64');
}
} else {
// No SVG found, take full page screenshot anyway
const screenshotBuffer = await this.page.screenshot({ fullPage: true, type: 'png' });
screenshot_base64 = screenshotBuffer.toString('base64');
}
const page_info = await this.get_stagehand_state();
res.json({
success: true,
message: `Successfully converted SVG to PNG: ${svg_file_path}`,
url: page_info.url,
title: page_info.title,
screenshot_base64: screenshot_base64
} as BrowserActionResult);
} catch (error) {
console.error("Error converting SVG:", error);
const page_info = await this.get_stagehand_state();
res.status(500).json({
success: false,
message: "Failed to convert SVG",
url: page_info.url,
title: page_info.title,
screenshot_base64: page_info.screenshot_base64,
error: String(error)
} as BrowserActionResult);
}
}
}
const browserAutomation = new BrowserAutomation();

View File

@ -6,7 +6,7 @@ services:
dockerfile: ${DOCKERFILE:-Dockerfile}
args:
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
image: kortix/suna:0.1.3.19
image: kortix/suna:0.1.3.20
ports:
- "6080:6080" # noVNC web interface
- "5901:5901" # VNC port

View File

@ -10,7 +10,11 @@ from core.sandbox.tool_base import SandboxToolsBase
from core.agentpress.thread_manager import ThreadManager
from core.tools.image_context_manager import ImageContextManager
import json
from svglib.svglib import svg2rlg
from reportlab.graphics import renderPM
import tempfile
import requests
from core.utils.config import config
# Add common image MIME types if mimetypes module is limited
mimetypes.add_type("image/webp", ".webp")
@ -39,7 +43,77 @@ class SandboxVisionTool(SandboxToolsBase):
self.thread_manager = thread_manager
self.image_context_manager = ImageContextManager(thread_manager)
def compress_image(self, image_bytes: bytes, mime_type: str, file_path: str) -> Tuple[bytes, str]:
async def convert_svg_with_sandbox_browser(self, svg_full_path: str) -> Tuple[bytes, str]:
"""Convert SVG to PNG using sandbox browser API for better rendering support.
Args:
svg_full_path: Full path to SVG file in sandbox
Returns:
Tuple of (png_bytes, 'image/png')
"""
try:
# Ensure sandbox is initialized
await self._ensure_sandbox()
env_vars = {"GEMINI_API_KEY": config.GEMINI_API_KEY}
init_response = await self.sandbox.process.exec(
"curl -s -X POST 'http://localhost:8004/api/init' -H 'Content-Type: application/json' -d '{\"api_key\": \"'$GEMINI_API_KEY'\"}'",
timeout=30,
env=env_vars
)
if init_response.exit_code != 0:
raise Exception(f"Failed to initialize browser: {init_response.result}")
try:
init_data = json.loads(init_response.result)
if init_data.get("status") not in ["healthy", "initialized"]:
raise Exception(f"Browser initialization failed: {init_data}")
except json.JSONDecodeError:
# Assume success if we can't parse response
pass
# Now call the browser API conversion endpoint
params = {
"svg_file_path": svg_full_path
}
# Build curl command to call sandbox browser API
url = "http://localhost:8004/api/convert-svg"
json_data = json.dumps(params)
curl_cmd = f"curl -s -X POST '{url}' -H 'Content-Type: application/json' -d '{json_data}'"
# Execute the API call
response = await self.sandbox.process.exec(curl_cmd, timeout=30)
if response.exit_code == 0:
try:
response_data = json.loads(response.result)
if response_data.get("success"):
# Extract the base64 screenshot
screenshot_base64 = response_data.get("screenshot_base64")
if screenshot_base64:
png_bytes = base64.b64decode(screenshot_base64)
print(f"[SeeImage] Converted SVG '{os.path.basename(svg_full_path)}' to PNG using sandbox browser")
return png_bytes, 'image/png'
else:
raise Exception("No screenshot data in browser response")
else:
error_msg = response_data.get("error", "Unknown browser conversion error")
raise Exception(f"Browser conversion failed: {error_msg}")
except json.JSONDecodeError:
raise Exception(f"Invalid JSON response from browser API: {response.result}")
else:
raise Exception(f"Browser API call failed with exit code {response.exit_code}: {response.result}")
except Exception as e:
raise Exception(f"Sandbox browser-based SVG conversion failed: {str(e)}")
async def compress_image(self, image_bytes: bytes, mime_type: str, file_path: str) -> Tuple[bytes, str]:
"""Compress an image to reduce its size while maintaining reasonable quality.
Args:
@ -51,6 +125,48 @@ class SandboxVisionTool(SandboxToolsBase):
Tuple of (compressed_bytes, new_mime_type)
"""
try:
# Handle SVG conversion first (before PIL processing)
if mime_type == 'image/svg+xml' or file_path.lower().endswith('.svg'):
# Try browser-based conversion first (better quality)
try:
# Construct full sandbox path from the relative file_path
full_svg_path = f"{self.workspace_path}/{file_path}"
png_bytes, png_mime = await self.convert_svg_with_sandbox_browser(full_svg_path)
image_bytes = png_bytes
mime_type = png_mime
except Exception as browser_error:
print(f"[SeeImage] Browser-based SVG conversion failed: {browser_error}")
# Fallback to svglib approach
try:
# Create temporary SVG file for svglib
with tempfile.NamedTemporaryFile(suffix='.svg', delete=False) as temp_svg:
temp_svg.write(image_bytes)
temp_svg_path = temp_svg.name
try:
# Convert SVG to PNG using svglib + reportlab
drawing = svg2rlg(temp_svg_path)
png_buffer = BytesIO()
renderPM.drawToFile(drawing, png_buffer, fmt='PNG')
png_bytes = png_buffer.getvalue()
print(f"[SeeImage] Converted SVG '{file_path}' to PNG using fallback method (svglib)")
# Update for PIL processing
image_bytes = png_bytes
mime_type = 'image/png'
finally:
# Clean up temporary file
os.unlink(temp_svg_path)
except ImportError:
print(f"[SeeImage] SVG conversion not available - using original SVG file '{file_path}'")
return image_bytes, mime_type
except Exception as e:
print(f"[SeeImage] SVG conversion failed - using original SVG file '{file_path}': {str(e)}")
return image_bytes, mime_type
# Open image from bytes
img = Image.open(BytesIO(image_bytes))
@ -85,7 +201,7 @@ class SandboxVisionTool(SandboxToolsBase):
img.save(output, format='PNG', optimize=True, compress_level=DEFAULT_PNG_COMPRESS_LEVEL)
output_mime = 'image/png'
else:
# Convert everything else to JPEG for better compression
# Convert everything else to JPEG for better compression (converted SVGs stay PNG above)
img.save(output, format='JPEG', quality=DEFAULT_JPEG_QUALITY, optimize=True)
output_mime = 'image/jpeg'
@ -151,7 +267,7 @@ class SandboxVisionTool(SandboxToolsBase):
"properties": {
"file_path": {
"type": "string",
"description": "Either a relative path to the image file within the /workspace directory (e.g., 'screenshots/image.png') or a URL to an image (e.g., 'https://example.com/image.jpg'). Supported formats: JPG, PNG, GIF, WEBP. Max size: 10MB."
"description": "Either a relative path to the image file within the /workspace directory (e.g., 'screenshots/image.png') or a URL to an image (e.g., 'https://example.com/image.jpg'). Supported formats: JPG, PNG, GIF, WEBP, SVG. Max size: 10MB. SVG files are automatically converted to PNG using browser rendering for best quality."
}
},
"required": ["file_path"]
@ -219,19 +335,35 @@ class SandboxVisionTool(SandboxToolsBase):
elif ext == '.png': mime_type = 'image/png'
elif ext == '.gif': mime_type = 'image/gif'
elif ext == '.webp': mime_type = 'image/webp'
elif ext == '.svg': mime_type = 'image/svg+xml'
else:
return self.fail_response(f"Unsupported or unknown image format for file: '{cleaned_path}'. Supported: JPG, PNG, GIF, WEBP.")
return self.fail_response(f"Unsupported or unknown image format for file: '{cleaned_path}'. Supported: JPG, PNG, GIF, WEBP, SVG.")
original_size = file_info.size
# Compress the image
compressed_bytes, compressed_mime_type = self.compress_image(image_bytes, mime_type, cleaned_path)
compressed_bytes, compressed_mime_type = await self.compress_image(image_bytes, mime_type, cleaned_path)
# Check if compressed image is still too large
if len(compressed_bytes) > MAX_COMPRESSED_SIZE:
return self.fail_response(f"Image file '{cleaned_path}' is still too large after compression ({len(compressed_bytes) / (1024*1024):.2f}MB). Maximum compressed size is {MAX_COMPRESSED_SIZE / (1024*1024)}MB.")
# For SVG files that were converted to PNG, save the converted PNG to sandbox
if (mime_type == 'image/svg+xml' or cleaned_path.lower().endswith('.svg')) and compressed_mime_type == 'image/png':
# Create PNG filename by replacing .svg extension
png_filename = cleaned_path.rsplit('.', 1)[0] + '_converted.png'
png_full_path = f"{self.workspace_path}/{png_filename}"
try:
# Save converted PNG to sandbox
await self.sandbox.fs.upload_file(compressed_bytes, png_full_path)
cleaned_path = png_filename
print(f"[SeeImage] Saved converted PNG to sandbox as '{png_filename}' for frontend display")
except Exception as e:
print(f"[SeeImage] Warning: Could not save converted PNG to sandbox: {e}")
# Continue with original path if save fails
# Convert to base64
base64_image = base64.b64encode(compressed_bytes).decode('utf-8')
@ -248,8 +380,13 @@ class SandboxVisionTool(SandboxToolsBase):
if not result:
return self.fail_response(f"Failed to add image '{cleaned_path}' to conversation context.")
# Inform the agent the image will be available next turn
return self.success_response(f"Successfully loaded and compressed the image '{cleaned_path}' (reduced from {original_size / 1024:.1f}KB to {len(compressed_bytes) / 1024:.1f}KB).")
# Return structured output like other tools
result_data = {
"message": f"Successfully loaded a compressed version of the image '{cleaned_path}' (reduced from {original_size / 1024:.1f}KB to {len(compressed_bytes) / 1024:.1f}KB).",
"file_path": cleaned_path,
}
return self.success_response(result_data)
except Exception as e:
return self.fail_response(f"An unexpected error occurred while trying to see the image: {str(e)}")

View File

@ -311,8 +311,8 @@ class Configuration:
STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
# Sandbox configuration
SANDBOX_IMAGE_NAME = "kortix/suna:0.1.3.19"
SANDBOX_SNAPSHOT_NAME = "kortix/suna:0.1.3.19"
SANDBOX_IMAGE_NAME = "kortix/suna:0.1.3.20"
SANDBOX_SNAPSHOT_NAME = "kortix/suna:0.1.3.20"
SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf"
# LangFuse configuration

View File

@ -45,8 +45,32 @@ const extractFromNewFormat = (content: any): {
}
}
// Extract display file path from structured output, fall back to args
let filePath = args.file_path || null;
const rawOutput = toolExecution.result?.output;
// Check if output has display_file_path (handles both object and string formats)
if (rawOutput) {
let outputData = rawOutput;
console.log('outputData', outputData);
// Parse string output if needed
if (typeof rawOutput === 'string') {
try {
outputData = JSON.parse(rawOutput);
} catch (e) {
// Not JSON, keep original
}
}
// Use display_file_path if available
if (outputData && typeof outputData === 'object' && outputData.file_path) {
filePath = outputData.file_path;
}
}
const extractedData = {
filePath: args.file_path || null,
filePath,
description: parsedContent.summary || null,
success: toolExecution.result?.success,
timestamp: toolExecution.execution_details?.timestamp,

View File

@ -686,9 +686,9 @@ class SetupWizard:
)
print_info("Create a snapshot with these exact settings:")
print_info(
f" - Name:\t\t{Colors.GREEN}kortix/suna:0.1.3.19{Colors.ENDC}")
f" - Name:\t\t{Colors.GREEN}kortix/suna:0.1.3.20{Colors.ENDC}")
print_info(
f" - Snapshot name:\t{Colors.GREEN}kortix/suna:0.1.3.19{Colors.ENDC}")
f" - Snapshot name:\t{Colors.GREEN}kortix/suna:0.1.3.20{Colors.ENDC}")
print_info(
f" - Entrypoint:\t{Colors.GREEN}/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf{Colors.ENDC}"
)