diff --git a/backend/core/sandbox/README.md b/backend/core/sandbox/README.md index 2ea9ca51..2690026e 100644 --- a/backend/core/sandbox/README.md +++ b/backend/core/sandbox/README.md @@ -20,7 +20,7 @@ You can modify the sandbox environment for development or to add new capabilitie ``` cd backend/sandbox/docker docker compose build - docker push kortix/suna:0.1.3.19 + docker push kortix/suna:0.1.3.20 ``` 3. Test your changes locally using docker-compose diff --git a/backend/core/sandbox/docker/browserApi.ts b/backend/core/sandbox/docker/browserApi.ts index 966186e4..c4eb6093 100644 --- a/backend/core/sandbox/docker/browserApi.ts +++ b/backend/core/sandbox/docker/browserApi.ts @@ -31,6 +31,7 @@ class BrowserAutomation { this.router.post('/screenshot', this.screenshot.bind(this)); this.router.post('/act', this.act.bind(this)); this.router.post('/extract', this.extract.bind(this)); + this.router.post('/convert-svg', this.convertSvg.bind(this)); } @@ -306,6 +307,91 @@ class BrowserAutomation { } } + async convertSvg(req: express.Request, res: express.Response) { + console.log(`Converting SVG to PNG: ${JSON.stringify(req.body)}`); + + try { + if (!this.browserInitialized || !this.page) { + res.status(500).json({ + success: false, + message: "Browser not initialized", + error: "Browser must be initialized before converting SVG", + url: "", + title: "" + } as BrowserActionResult); + return; + } + + const { svg_file_path } = req.body; + + if (!svg_file_path) { + res.status(400).json({ + success: false, + message: "SVG file path is required", + error: "svg_file_path parameter is missing", + url: "", + title: "" + } as BrowserActionResult); + return; + } + + // Navigate to the SVG file + const fileUrl = `file://${svg_file_path}`; + await this.page.goto(fileUrl, { waitUntil: 'domcontentloaded', timeout: 10000 }); + + // Wait for any potential loading/animations + await this.page.waitForTimeout(500); + + let screenshot_base64: string; + + // Try to get the SVG element and take a screenshot of just that element + const svgElement = await this.page.locator('svg').first(); + const svgCount = await this.page.locator('svg').count(); + + if (svgCount > 0) { + // Get bounding box to check if element is visible + const bbox = await svgElement.boundingBox(); + + if (bbox && bbox.width > 0 && bbox.height > 0) { + // Take screenshot of just the SVG element + const screenshotBuffer = await svgElement.screenshot({ type: 'png' }); + screenshot_base64 = screenshotBuffer.toString('base64'); + } else { + // Fallback to full page screenshot + const screenshotBuffer = await this.page.screenshot({ fullPage: true, type: 'png' }); + screenshot_base64 = screenshotBuffer.toString('base64'); + } + } else { + // No SVG found, take full page screenshot anyway + const screenshotBuffer = await this.page.screenshot({ fullPage: true, type: 'png' }); + screenshot_base64 = screenshotBuffer.toString('base64'); + } + + const page_info = await this.get_stagehand_state(); + + res.json({ + success: true, + message: `Successfully converted SVG to PNG: ${svg_file_path}`, + url: page_info.url, + title: page_info.title, + screenshot_base64: screenshot_base64 + } as BrowserActionResult); + + } catch (error) { + console.error("Error converting SVG:", error); + const page_info = await this.get_stagehand_state(); + + res.status(500).json({ + success: false, + message: "Failed to convert SVG", + url: page_info.url, + title: page_info.title, + screenshot_base64: page_info.screenshot_base64, + error: String(error) + } as BrowserActionResult); + } + } + } const browserAutomation = new BrowserAutomation(); diff --git a/backend/core/sandbox/docker/docker-compose.yml b/backend/core/sandbox/docker/docker-compose.yml index 6fb0700f..8ee7cc6a 100644 --- a/backend/core/sandbox/docker/docker-compose.yml +++ b/backend/core/sandbox/docker/docker-compose.yml @@ -6,7 +6,7 @@ services: dockerfile: ${DOCKERFILE:-Dockerfile} args: TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64} - image: kortix/suna:0.1.3.19 + image: kortix/suna:0.1.3.20 ports: - "6080:6080" # noVNC web interface - "5901:5901" # VNC port diff --git a/backend/core/tools/sb_vision_tool.py b/backend/core/tools/sb_vision_tool.py index f0c7500a..f6190766 100644 --- a/backend/core/tools/sb_vision_tool.py +++ b/backend/core/tools/sb_vision_tool.py @@ -10,7 +10,11 @@ from core.sandbox.tool_base import SandboxToolsBase from core.agentpress.thread_manager import ThreadManager from core.tools.image_context_manager import ImageContextManager import json +from svglib.svglib import svg2rlg +from reportlab.graphics import renderPM +import tempfile import requests +from core.utils.config import config # Add common image MIME types if mimetypes module is limited mimetypes.add_type("image/webp", ".webp") @@ -39,7 +43,77 @@ class SandboxVisionTool(SandboxToolsBase): self.thread_manager = thread_manager self.image_context_manager = ImageContextManager(thread_manager) - def compress_image(self, image_bytes: bytes, mime_type: str, file_path: str) -> Tuple[bytes, str]: + async def convert_svg_with_sandbox_browser(self, svg_full_path: str) -> Tuple[bytes, str]: + """Convert SVG to PNG using sandbox browser API for better rendering support. + + Args: + svg_full_path: Full path to SVG file in sandbox + + Returns: + Tuple of (png_bytes, 'image/png') + """ + try: + + # Ensure sandbox is initialized + await self._ensure_sandbox() + + env_vars = {"GEMINI_API_KEY": config.GEMINI_API_KEY} + init_response = await self.sandbox.process.exec( + "curl -s -X POST 'http://localhost:8004/api/init' -H 'Content-Type: application/json' -d '{\"api_key\": \"'$GEMINI_API_KEY'\"}'", + timeout=30, + env=env_vars + ) + + if init_response.exit_code != 0: + raise Exception(f"Failed to initialize browser: {init_response.result}") + + try: + init_data = json.loads(init_response.result) + if init_data.get("status") not in ["healthy", "initialized"]: + raise Exception(f"Browser initialization failed: {init_data}") + except json.JSONDecodeError: + # Assume success if we can't parse response + pass + + # Now call the browser API conversion endpoint + params = { + "svg_file_path": svg_full_path + } + + # Build curl command to call sandbox browser API + url = "http://localhost:8004/api/convert-svg" + json_data = json.dumps(params) + curl_cmd = f"curl -s -X POST '{url}' -H 'Content-Type: application/json' -d '{json_data}'" + + # Execute the API call + response = await self.sandbox.process.exec(curl_cmd, timeout=30) + + if response.exit_code == 0: + try: + response_data = json.loads(response.result) + + if response_data.get("success"): + # Extract the base64 screenshot + screenshot_base64 = response_data.get("screenshot_base64") + if screenshot_base64: + png_bytes = base64.b64decode(screenshot_base64) + print(f"[SeeImage] Converted SVG '{os.path.basename(svg_full_path)}' to PNG using sandbox browser") + return png_bytes, 'image/png' + else: + raise Exception("No screenshot data in browser response") + else: + error_msg = response_data.get("error", "Unknown browser conversion error") + raise Exception(f"Browser conversion failed: {error_msg}") + + except json.JSONDecodeError: + raise Exception(f"Invalid JSON response from browser API: {response.result}") + else: + raise Exception(f"Browser API call failed with exit code {response.exit_code}: {response.result}") + + except Exception as e: + raise Exception(f"Sandbox browser-based SVG conversion failed: {str(e)}") + + async def compress_image(self, image_bytes: bytes, mime_type: str, file_path: str) -> Tuple[bytes, str]: """Compress an image to reduce its size while maintaining reasonable quality. Args: @@ -51,6 +125,48 @@ class SandboxVisionTool(SandboxToolsBase): Tuple of (compressed_bytes, new_mime_type) """ try: + # Handle SVG conversion first (before PIL processing) + if mime_type == 'image/svg+xml' or file_path.lower().endswith('.svg'): + # Try browser-based conversion first (better quality) + try: + # Construct full sandbox path from the relative file_path + full_svg_path = f"{self.workspace_path}/{file_path}" + png_bytes, png_mime = await self.convert_svg_with_sandbox_browser(full_svg_path) + image_bytes = png_bytes + mime_type = png_mime + except Exception as browser_error: + print(f"[SeeImage] Browser-based SVG conversion failed: {browser_error}") + + # Fallback to svglib approach + try: + + # Create temporary SVG file for svglib + with tempfile.NamedTemporaryFile(suffix='.svg', delete=False) as temp_svg: + temp_svg.write(image_bytes) + temp_svg_path = temp_svg.name + + try: + # Convert SVG to PNG using svglib + reportlab + drawing = svg2rlg(temp_svg_path) + png_buffer = BytesIO() + renderPM.drawToFile(drawing, png_buffer, fmt='PNG') + png_bytes = png_buffer.getvalue() + + print(f"[SeeImage] Converted SVG '{file_path}' to PNG using fallback method (svglib)") + # Update for PIL processing + image_bytes = png_bytes + mime_type = 'image/png' + finally: + # Clean up temporary file + os.unlink(temp_svg_path) + + except ImportError: + print(f"[SeeImage] SVG conversion not available - using original SVG file '{file_path}'") + return image_bytes, mime_type + except Exception as e: + print(f"[SeeImage] SVG conversion failed - using original SVG file '{file_path}': {str(e)}") + return image_bytes, mime_type + # Open image from bytes img = Image.open(BytesIO(image_bytes)) @@ -85,7 +201,7 @@ class SandboxVisionTool(SandboxToolsBase): img.save(output, format='PNG', optimize=True, compress_level=DEFAULT_PNG_COMPRESS_LEVEL) output_mime = 'image/png' else: - # Convert everything else to JPEG for better compression + # Convert everything else to JPEG for better compression (converted SVGs stay PNG above) img.save(output, format='JPEG', quality=DEFAULT_JPEG_QUALITY, optimize=True) output_mime = 'image/jpeg' @@ -151,7 +267,7 @@ class SandboxVisionTool(SandboxToolsBase): "properties": { "file_path": { "type": "string", - "description": "Either a relative path to the image file within the /workspace directory (e.g., 'screenshots/image.png') or a URL to an image (e.g., 'https://example.com/image.jpg'). Supported formats: JPG, PNG, GIF, WEBP. Max size: 10MB." + "description": "Either a relative path to the image file within the /workspace directory (e.g., 'screenshots/image.png') or a URL to an image (e.g., 'https://example.com/image.jpg'). Supported formats: JPG, PNG, GIF, WEBP, SVG. Max size: 10MB. SVG files are automatically converted to PNG using browser rendering for best quality." } }, "required": ["file_path"] @@ -219,19 +335,35 @@ class SandboxVisionTool(SandboxToolsBase): elif ext == '.png': mime_type = 'image/png' elif ext == '.gif': mime_type = 'image/gif' elif ext == '.webp': mime_type = 'image/webp' + elif ext == '.svg': mime_type = 'image/svg+xml' else: - return self.fail_response(f"Unsupported or unknown image format for file: '{cleaned_path}'. Supported: JPG, PNG, GIF, WEBP.") + return self.fail_response(f"Unsupported or unknown image format for file: '{cleaned_path}'. Supported: JPG, PNG, GIF, WEBP, SVG.") original_size = file_info.size # Compress the image - compressed_bytes, compressed_mime_type = self.compress_image(image_bytes, mime_type, cleaned_path) + compressed_bytes, compressed_mime_type = await self.compress_image(image_bytes, mime_type, cleaned_path) # Check if compressed image is still too large if len(compressed_bytes) > MAX_COMPRESSED_SIZE: return self.fail_response(f"Image file '{cleaned_path}' is still too large after compression ({len(compressed_bytes) / (1024*1024):.2f}MB). Maximum compressed size is {MAX_COMPRESSED_SIZE / (1024*1024)}MB.") + # For SVG files that were converted to PNG, save the converted PNG to sandbox + if (mime_type == 'image/svg+xml' or cleaned_path.lower().endswith('.svg')) and compressed_mime_type == 'image/png': + # Create PNG filename by replacing .svg extension + png_filename = cleaned_path.rsplit('.', 1)[0] + '_converted.png' + png_full_path = f"{self.workspace_path}/{png_filename}" + + try: + # Save converted PNG to sandbox + await self.sandbox.fs.upload_file(compressed_bytes, png_full_path) + cleaned_path = png_filename + print(f"[SeeImage] Saved converted PNG to sandbox as '{png_filename}' for frontend display") + except Exception as e: + print(f"[SeeImage] Warning: Could not save converted PNG to sandbox: {e}") + # Continue with original path if save fails + # Convert to base64 base64_image = base64.b64encode(compressed_bytes).decode('utf-8') @@ -248,8 +380,13 @@ class SandboxVisionTool(SandboxToolsBase): if not result: return self.fail_response(f"Failed to add image '{cleaned_path}' to conversation context.") - # Inform the agent the image will be available next turn - return self.success_response(f"Successfully loaded and compressed the image '{cleaned_path}' (reduced from {original_size / 1024:.1f}KB to {len(compressed_bytes) / 1024:.1f}KB).") + # Return structured output like other tools + result_data = { + "message": f"Successfully loaded a compressed version of the image '{cleaned_path}' (reduced from {original_size / 1024:.1f}KB to {len(compressed_bytes) / 1024:.1f}KB).", + "file_path": cleaned_path, + } + + return self.success_response(result_data) except Exception as e: return self.fail_response(f"An unexpected error occurred while trying to see the image: {str(e)}") diff --git a/backend/core/utils/config.py b/backend/core/utils/config.py index 711e3341..91e21f8f 100644 --- a/backend/core/utils/config.py +++ b/backend/core/utils/config.py @@ -311,8 +311,8 @@ class Configuration: STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY' # Sandbox configuration - SANDBOX_IMAGE_NAME = "kortix/suna:0.1.3.19" - SANDBOX_SNAPSHOT_NAME = "kortix/suna:0.1.3.19" + SANDBOX_IMAGE_NAME = "kortix/suna:0.1.3.20" + SANDBOX_SNAPSHOT_NAME = "kortix/suna:0.1.3.20" SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf" # LangFuse configuration diff --git a/frontend/src/components/thread/tool-views/see-image-tool/_utils.ts b/frontend/src/components/thread/tool-views/see-image-tool/_utils.ts index 56e15fb2..f1e6fa36 100644 --- a/frontend/src/components/thread/tool-views/see-image-tool/_utils.ts +++ b/frontend/src/components/thread/tool-views/see-image-tool/_utils.ts @@ -45,8 +45,32 @@ const extractFromNewFormat = (content: any): { } } + // Extract display file path from structured output, fall back to args + let filePath = args.file_path || null; + const rawOutput = toolExecution.result?.output; + + // Check if output has display_file_path (handles both object and string formats) + if (rawOutput) { + let outputData = rawOutput; + console.log('outputData', outputData); + + // Parse string output if needed + if (typeof rawOutput === 'string') { + try { + outputData = JSON.parse(rawOutput); + } catch (e) { + // Not JSON, keep original + } + } + + // Use display_file_path if available + if (outputData && typeof outputData === 'object' && outputData.file_path) { + filePath = outputData.file_path; + } + } + const extractedData = { - filePath: args.file_path || null, + filePath, description: parsedContent.summary || null, success: toolExecution.result?.success, timestamp: toolExecution.execution_details?.timestamp, diff --git a/setup.py b/setup.py index 4ce9af4e..02561231 100644 --- a/setup.py +++ b/setup.py @@ -686,9 +686,9 @@ class SetupWizard: ) print_info("Create a snapshot with these exact settings:") print_info( - f" - Name:\t\t{Colors.GREEN}kortix/suna:0.1.3.19{Colors.ENDC}") + f" - Name:\t\t{Colors.GREEN}kortix/suna:0.1.3.20{Colors.ENDC}") print_info( - f" - Snapshot name:\t{Colors.GREEN}kortix/suna:0.1.3.19{Colors.ENDC}") + f" - Snapshot name:\t{Colors.GREEN}kortix/suna:0.1.3.20{Colors.ENDC}") print_info( f" - Entrypoint:\t{Colors.GREEN}/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf{Colors.ENDC}" )