mirror of https://github.com/kortix-ai/suna.git
Merge pull request #1734 from KrishavRajSingh/fix/upload_svg
load svg images
This commit is contained in:
commit
18c6d452fb
|
@ -20,7 +20,7 @@ You can modify the sandbox environment for development or to add new capabilitie
|
|||
```
|
||||
cd backend/sandbox/docker
|
||||
docker compose build
|
||||
docker push kortix/suna:0.1.3.19
|
||||
docker push kortix/suna:0.1.3.20
|
||||
```
|
||||
3. Test your changes locally using docker-compose
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ class BrowserAutomation {
|
|||
this.router.post('/screenshot', this.screenshot.bind(this));
|
||||
this.router.post('/act', this.act.bind(this));
|
||||
this.router.post('/extract', this.extract.bind(this));
|
||||
this.router.post('/convert-svg', this.convertSvg.bind(this));
|
||||
|
||||
}
|
||||
|
||||
|
@ -306,6 +307,91 @@ class BrowserAutomation {
|
|||
}
|
||||
}
|
||||
|
||||
async convertSvg(req: express.Request, res: express.Response) {
|
||||
console.log(`Converting SVG to PNG: ${JSON.stringify(req.body)}`);
|
||||
|
||||
try {
|
||||
if (!this.browserInitialized || !this.page) {
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
message: "Browser not initialized",
|
||||
error: "Browser must be initialized before converting SVG",
|
||||
url: "",
|
||||
title: ""
|
||||
} as BrowserActionResult);
|
||||
return;
|
||||
}
|
||||
|
||||
const { svg_file_path } = req.body;
|
||||
|
||||
if (!svg_file_path) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
message: "SVG file path is required",
|
||||
error: "svg_file_path parameter is missing",
|
||||
url: "",
|
||||
title: ""
|
||||
} as BrowserActionResult);
|
||||
return;
|
||||
}
|
||||
|
||||
// Navigate to the SVG file
|
||||
const fileUrl = `file://${svg_file_path}`;
|
||||
await this.page.goto(fileUrl, { waitUntil: 'domcontentloaded', timeout: 10000 });
|
||||
|
||||
// Wait for any potential loading/animations
|
||||
await this.page.waitForTimeout(500);
|
||||
|
||||
let screenshot_base64: string;
|
||||
|
||||
// Try to get the SVG element and take a screenshot of just that element
|
||||
const svgElement = await this.page.locator('svg').first();
|
||||
const svgCount = await this.page.locator('svg').count();
|
||||
|
||||
if (svgCount > 0) {
|
||||
// Get bounding box to check if element is visible
|
||||
const bbox = await svgElement.boundingBox();
|
||||
|
||||
if (bbox && bbox.width > 0 && bbox.height > 0) {
|
||||
// Take screenshot of just the SVG element
|
||||
const screenshotBuffer = await svgElement.screenshot({ type: 'png' });
|
||||
screenshot_base64 = screenshotBuffer.toString('base64');
|
||||
} else {
|
||||
// Fallback to full page screenshot
|
||||
const screenshotBuffer = await this.page.screenshot({ fullPage: true, type: 'png' });
|
||||
screenshot_base64 = screenshotBuffer.toString('base64');
|
||||
}
|
||||
} else {
|
||||
// No SVG found, take full page screenshot anyway
|
||||
const screenshotBuffer = await this.page.screenshot({ fullPage: true, type: 'png' });
|
||||
screenshot_base64 = screenshotBuffer.toString('base64');
|
||||
}
|
||||
|
||||
const page_info = await this.get_stagehand_state();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Successfully converted SVG to PNG: ${svg_file_path}`,
|
||||
url: page_info.url,
|
||||
title: page_info.title,
|
||||
screenshot_base64: screenshot_base64
|
||||
} as BrowserActionResult);
|
||||
|
||||
} catch (error) {
|
||||
console.error("Error converting SVG:", error);
|
||||
const page_info = await this.get_stagehand_state();
|
||||
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
message: "Failed to convert SVG",
|
||||
url: page_info.url,
|
||||
title: page_info.title,
|
||||
screenshot_base64: page_info.screenshot_base64,
|
||||
error: String(error)
|
||||
} as BrowserActionResult);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const browserAutomation = new BrowserAutomation();
|
||||
|
|
|
@ -6,7 +6,7 @@ services:
|
|||
dockerfile: ${DOCKERFILE:-Dockerfile}
|
||||
args:
|
||||
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
|
||||
image: kortix/suna:0.1.3.19
|
||||
image: kortix/suna:0.1.3.20
|
||||
ports:
|
||||
- "6080:6080" # noVNC web interface
|
||||
- "5901:5901" # VNC port
|
||||
|
|
|
@ -10,7 +10,11 @@ from core.sandbox.tool_base import SandboxToolsBase
|
|||
from core.agentpress.thread_manager import ThreadManager
|
||||
from core.tools.image_context_manager import ImageContextManager
|
||||
import json
|
||||
from svglib.svglib import svg2rlg
|
||||
from reportlab.graphics import renderPM
|
||||
import tempfile
|
||||
import requests
|
||||
from core.utils.config import config
|
||||
|
||||
# Add common image MIME types if mimetypes module is limited
|
||||
mimetypes.add_type("image/webp", ".webp")
|
||||
|
@ -39,7 +43,77 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
self.thread_manager = thread_manager
|
||||
self.image_context_manager = ImageContextManager(thread_manager)
|
||||
|
||||
def compress_image(self, image_bytes: bytes, mime_type: str, file_path: str) -> Tuple[bytes, str]:
|
||||
async def convert_svg_with_sandbox_browser(self, svg_full_path: str) -> Tuple[bytes, str]:
|
||||
"""Convert SVG to PNG using sandbox browser API for better rendering support.
|
||||
|
||||
Args:
|
||||
svg_full_path: Full path to SVG file in sandbox
|
||||
|
||||
Returns:
|
||||
Tuple of (png_bytes, 'image/png')
|
||||
"""
|
||||
try:
|
||||
|
||||
# Ensure sandbox is initialized
|
||||
await self._ensure_sandbox()
|
||||
|
||||
env_vars = {"GEMINI_API_KEY": config.GEMINI_API_KEY}
|
||||
init_response = await self.sandbox.process.exec(
|
||||
"curl -s -X POST 'http://localhost:8004/api/init' -H 'Content-Type: application/json' -d '{\"api_key\": \"'$GEMINI_API_KEY'\"}'",
|
||||
timeout=30,
|
||||
env=env_vars
|
||||
)
|
||||
|
||||
if init_response.exit_code != 0:
|
||||
raise Exception(f"Failed to initialize browser: {init_response.result}")
|
||||
|
||||
try:
|
||||
init_data = json.loads(init_response.result)
|
||||
if init_data.get("status") not in ["healthy", "initialized"]:
|
||||
raise Exception(f"Browser initialization failed: {init_data}")
|
||||
except json.JSONDecodeError:
|
||||
# Assume success if we can't parse response
|
||||
pass
|
||||
|
||||
# Now call the browser API conversion endpoint
|
||||
params = {
|
||||
"svg_file_path": svg_full_path
|
||||
}
|
||||
|
||||
# Build curl command to call sandbox browser API
|
||||
url = "http://localhost:8004/api/convert-svg"
|
||||
json_data = json.dumps(params)
|
||||
curl_cmd = f"curl -s -X POST '{url}' -H 'Content-Type: application/json' -d '{json_data}'"
|
||||
|
||||
# Execute the API call
|
||||
response = await self.sandbox.process.exec(curl_cmd, timeout=30)
|
||||
|
||||
if response.exit_code == 0:
|
||||
try:
|
||||
response_data = json.loads(response.result)
|
||||
|
||||
if response_data.get("success"):
|
||||
# Extract the base64 screenshot
|
||||
screenshot_base64 = response_data.get("screenshot_base64")
|
||||
if screenshot_base64:
|
||||
png_bytes = base64.b64decode(screenshot_base64)
|
||||
print(f"[SeeImage] Converted SVG '{os.path.basename(svg_full_path)}' to PNG using sandbox browser")
|
||||
return png_bytes, 'image/png'
|
||||
else:
|
||||
raise Exception("No screenshot data in browser response")
|
||||
else:
|
||||
error_msg = response_data.get("error", "Unknown browser conversion error")
|
||||
raise Exception(f"Browser conversion failed: {error_msg}")
|
||||
|
||||
except json.JSONDecodeError:
|
||||
raise Exception(f"Invalid JSON response from browser API: {response.result}")
|
||||
else:
|
||||
raise Exception(f"Browser API call failed with exit code {response.exit_code}: {response.result}")
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Sandbox browser-based SVG conversion failed: {str(e)}")
|
||||
|
||||
async def compress_image(self, image_bytes: bytes, mime_type: str, file_path: str) -> Tuple[bytes, str]:
|
||||
"""Compress an image to reduce its size while maintaining reasonable quality.
|
||||
|
||||
Args:
|
||||
|
@ -51,6 +125,48 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
Tuple of (compressed_bytes, new_mime_type)
|
||||
"""
|
||||
try:
|
||||
# Handle SVG conversion first (before PIL processing)
|
||||
if mime_type == 'image/svg+xml' or file_path.lower().endswith('.svg'):
|
||||
# Try browser-based conversion first (better quality)
|
||||
try:
|
||||
# Construct full sandbox path from the relative file_path
|
||||
full_svg_path = f"{self.workspace_path}/{file_path}"
|
||||
png_bytes, png_mime = await self.convert_svg_with_sandbox_browser(full_svg_path)
|
||||
image_bytes = png_bytes
|
||||
mime_type = png_mime
|
||||
except Exception as browser_error:
|
||||
print(f"[SeeImage] Browser-based SVG conversion failed: {browser_error}")
|
||||
|
||||
# Fallback to svglib approach
|
||||
try:
|
||||
|
||||
# Create temporary SVG file for svglib
|
||||
with tempfile.NamedTemporaryFile(suffix='.svg', delete=False) as temp_svg:
|
||||
temp_svg.write(image_bytes)
|
||||
temp_svg_path = temp_svg.name
|
||||
|
||||
try:
|
||||
# Convert SVG to PNG using svglib + reportlab
|
||||
drawing = svg2rlg(temp_svg_path)
|
||||
png_buffer = BytesIO()
|
||||
renderPM.drawToFile(drawing, png_buffer, fmt='PNG')
|
||||
png_bytes = png_buffer.getvalue()
|
||||
|
||||
print(f"[SeeImage] Converted SVG '{file_path}' to PNG using fallback method (svglib)")
|
||||
# Update for PIL processing
|
||||
image_bytes = png_bytes
|
||||
mime_type = 'image/png'
|
||||
finally:
|
||||
# Clean up temporary file
|
||||
os.unlink(temp_svg_path)
|
||||
|
||||
except ImportError:
|
||||
print(f"[SeeImage] SVG conversion not available - using original SVG file '{file_path}'")
|
||||
return image_bytes, mime_type
|
||||
except Exception as e:
|
||||
print(f"[SeeImage] SVG conversion failed - using original SVG file '{file_path}': {str(e)}")
|
||||
return image_bytes, mime_type
|
||||
|
||||
# Open image from bytes
|
||||
img = Image.open(BytesIO(image_bytes))
|
||||
|
||||
|
@ -85,7 +201,7 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
img.save(output, format='PNG', optimize=True, compress_level=DEFAULT_PNG_COMPRESS_LEVEL)
|
||||
output_mime = 'image/png'
|
||||
else:
|
||||
# Convert everything else to JPEG for better compression
|
||||
# Convert everything else to JPEG for better compression (converted SVGs stay PNG above)
|
||||
img.save(output, format='JPEG', quality=DEFAULT_JPEG_QUALITY, optimize=True)
|
||||
output_mime = 'image/jpeg'
|
||||
|
||||
|
@ -151,7 +267,7 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
"properties": {
|
||||
"file_path": {
|
||||
"type": "string",
|
||||
"description": "Either a relative path to the image file within the /workspace directory (e.g., 'screenshots/image.png') or a URL to an image (e.g., 'https://example.com/image.jpg'). Supported formats: JPG, PNG, GIF, WEBP. Max size: 10MB."
|
||||
"description": "Either a relative path to the image file within the /workspace directory (e.g., 'screenshots/image.png') or a URL to an image (e.g., 'https://example.com/image.jpg'). Supported formats: JPG, PNG, GIF, WEBP, SVG. Max size: 10MB. SVG files are automatically converted to PNG using browser rendering for best quality."
|
||||
}
|
||||
},
|
||||
"required": ["file_path"]
|
||||
|
@ -219,19 +335,35 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
elif ext == '.png': mime_type = 'image/png'
|
||||
elif ext == '.gif': mime_type = 'image/gif'
|
||||
elif ext == '.webp': mime_type = 'image/webp'
|
||||
elif ext == '.svg': mime_type = 'image/svg+xml'
|
||||
else:
|
||||
return self.fail_response(f"Unsupported or unknown image format for file: '{cleaned_path}'. Supported: JPG, PNG, GIF, WEBP.")
|
||||
return self.fail_response(f"Unsupported or unknown image format for file: '{cleaned_path}'. Supported: JPG, PNG, GIF, WEBP, SVG.")
|
||||
|
||||
original_size = file_info.size
|
||||
|
||||
|
||||
# Compress the image
|
||||
compressed_bytes, compressed_mime_type = self.compress_image(image_bytes, mime_type, cleaned_path)
|
||||
compressed_bytes, compressed_mime_type = await self.compress_image(image_bytes, mime_type, cleaned_path)
|
||||
|
||||
# Check if compressed image is still too large
|
||||
if len(compressed_bytes) > MAX_COMPRESSED_SIZE:
|
||||
return self.fail_response(f"Image file '{cleaned_path}' is still too large after compression ({len(compressed_bytes) / (1024*1024):.2f}MB). Maximum compressed size is {MAX_COMPRESSED_SIZE / (1024*1024)}MB.")
|
||||
|
||||
# For SVG files that were converted to PNG, save the converted PNG to sandbox
|
||||
if (mime_type == 'image/svg+xml' or cleaned_path.lower().endswith('.svg')) and compressed_mime_type == 'image/png':
|
||||
# Create PNG filename by replacing .svg extension
|
||||
png_filename = cleaned_path.rsplit('.', 1)[0] + '_converted.png'
|
||||
png_full_path = f"{self.workspace_path}/{png_filename}"
|
||||
|
||||
try:
|
||||
# Save converted PNG to sandbox
|
||||
await self.sandbox.fs.upload_file(compressed_bytes, png_full_path)
|
||||
cleaned_path = png_filename
|
||||
print(f"[SeeImage] Saved converted PNG to sandbox as '{png_filename}' for frontend display")
|
||||
except Exception as e:
|
||||
print(f"[SeeImage] Warning: Could not save converted PNG to sandbox: {e}")
|
||||
# Continue with original path if save fails
|
||||
|
||||
# Convert to base64
|
||||
base64_image = base64.b64encode(compressed_bytes).decode('utf-8')
|
||||
|
||||
|
@ -248,8 +380,13 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
if not result:
|
||||
return self.fail_response(f"Failed to add image '{cleaned_path}' to conversation context.")
|
||||
|
||||
# Inform the agent the image will be available next turn
|
||||
return self.success_response(f"Successfully loaded and compressed the image '{cleaned_path}' (reduced from {original_size / 1024:.1f}KB to {len(compressed_bytes) / 1024:.1f}KB).")
|
||||
# Return structured output like other tools
|
||||
result_data = {
|
||||
"message": f"Successfully loaded a compressed version of the image '{cleaned_path}' (reduced from {original_size / 1024:.1f}KB to {len(compressed_bytes) / 1024:.1f}KB).",
|
||||
"file_path": cleaned_path,
|
||||
}
|
||||
|
||||
return self.success_response(result_data)
|
||||
|
||||
except Exception as e:
|
||||
return self.fail_response(f"An unexpected error occurred while trying to see the image: {str(e)}")
|
||||
|
|
|
@ -311,8 +311,8 @@ class Configuration:
|
|||
STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
|
||||
|
||||
# Sandbox configuration
|
||||
SANDBOX_IMAGE_NAME = "kortix/suna:0.1.3.19"
|
||||
SANDBOX_SNAPSHOT_NAME = "kortix/suna:0.1.3.19"
|
||||
SANDBOX_IMAGE_NAME = "kortix/suna:0.1.3.20"
|
||||
SANDBOX_SNAPSHOT_NAME = "kortix/suna:0.1.3.20"
|
||||
SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf"
|
||||
|
||||
# LangFuse configuration
|
||||
|
|
|
@ -45,8 +45,32 @@ const extractFromNewFormat = (content: any): {
|
|||
}
|
||||
}
|
||||
|
||||
// Extract display file path from structured output, fall back to args
|
||||
let filePath = args.file_path || null;
|
||||
const rawOutput = toolExecution.result?.output;
|
||||
|
||||
// Check if output has display_file_path (handles both object and string formats)
|
||||
if (rawOutput) {
|
||||
let outputData = rawOutput;
|
||||
console.log('outputData', outputData);
|
||||
|
||||
// Parse string output if needed
|
||||
if (typeof rawOutput === 'string') {
|
||||
try {
|
||||
outputData = JSON.parse(rawOutput);
|
||||
} catch (e) {
|
||||
// Not JSON, keep original
|
||||
}
|
||||
}
|
||||
|
||||
// Use display_file_path if available
|
||||
if (outputData && typeof outputData === 'object' && outputData.file_path) {
|
||||
filePath = outputData.file_path;
|
||||
}
|
||||
}
|
||||
|
||||
const extractedData = {
|
||||
filePath: args.file_path || null,
|
||||
filePath,
|
||||
description: parsedContent.summary || null,
|
||||
success: toolExecution.result?.success,
|
||||
timestamp: toolExecution.execution_details?.timestamp,
|
||||
|
|
4
setup.py
4
setup.py
|
@ -686,9 +686,9 @@ class SetupWizard:
|
|||
)
|
||||
print_info("Create a snapshot with these exact settings:")
|
||||
print_info(
|
||||
f" - Name:\t\t{Colors.GREEN}kortix/suna:0.1.3.19{Colors.ENDC}")
|
||||
f" - Name:\t\t{Colors.GREEN}kortix/suna:0.1.3.20{Colors.ENDC}")
|
||||
print_info(
|
||||
f" - Snapshot name:\t{Colors.GREEN}kortix/suna:0.1.3.19{Colors.ENDC}")
|
||||
f" - Snapshot name:\t{Colors.GREEN}kortix/suna:0.1.3.20{Colors.ENDC}")
|
||||
print_info(
|
||||
f" - Entrypoint:\t{Colors.GREEN}/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf{Colors.ENDC}"
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue