Merge pull request #1734 from KrishavRajSingh/fix/upload_svg

load svg images
2025-09-26 22:26:58 +05:30 · 2025-09-26 22:26:58 +05:30 · 18c6d452fb
parent 09d0263d33 8198ad0937
commit 18c6d452fb
7 changed files with 261 additions and 14 deletions
--- a/backend/core/sandbox/README.md
+++ b/backend/core/sandbox/README.md
@ -20,7 +20,7 @@ You can modify the sandbox environment for development or to add new capabilitie
   ```
   cd backend/sandbox/docker
   docker compose build
-   docker push kortix/suna:0.1.3.19
+   docker push kortix/suna:0.1.3.20
   ```
 3. Test your changes locally using docker-compose

--- a/backend/core/sandbox/docker/browserApi.ts
+++ b/backend/core/sandbox/docker/browserApi.ts
@ -31,6 +31,7 @@ class BrowserAutomation {
        this.router.post('/screenshot', this.screenshot.bind(this));
        this.router.post('/act', this.act.bind(this));
        this.router.post('/extract', this.extract.bind(this));
+        this.router.post('/convert-svg', this.convertSvg.bind(this));

    }

@ -306,6 +307,91 @@ class BrowserAutomation {
        }
    }

+    async convertSvg(req: express.Request, res: express.Response) {
+        console.log(`Converting SVG to PNG: ${JSON.stringify(req.body)}`);
+        
+        try {
+            if (!this.browserInitialized || !this.page) {
+                res.status(500).json({
+                    success: false,
+                    message: "Browser not initialized",
+                    error: "Browser must be initialized before converting SVG",
+                    url: "",
+                    title: ""
+                } as BrowserActionResult);
+                return;
+            }
+
+            const { svg_file_path } = req.body;
+            
+            if (!svg_file_path) {
+                res.status(400).json({
+                    success: false,
+                    message: "SVG file path is required",
+                    error: "svg_file_path parameter is missing",
+                    url: "",
+                    title: ""
+                } as BrowserActionResult);
+                return;
+            }
+
+            // Navigate to the SVG file
+            const fileUrl = `file://${svg_file_path}`;
+            await this.page.goto(fileUrl, { waitUntil: 'domcontentloaded', timeout: 10000 });
+            
+            // Wait for any potential loading/animations
+            await this.page.waitForTimeout(500);
+
+            let screenshot_base64: string;
+            
+            // Try to get the SVG element and take a screenshot of just that element
+            const svgElement = await this.page.locator('svg').first();
+            const svgCount = await this.page.locator('svg').count();
+            
+            if (svgCount > 0) {
+                // Get bounding box to check if element is visible
+                const bbox = await svgElement.boundingBox();
+                
+                if (bbox && bbox.width > 0 && bbox.height > 0) {
+                    // Take screenshot of just the SVG element
+                    const screenshotBuffer = await svgElement.screenshot({ type: 'png' });
+                    screenshot_base64 = screenshotBuffer.toString('base64');
+                } else {
+                    // Fallback to full page screenshot
+                    const screenshotBuffer = await this.page.screenshot({ fullPage: true, type: 'png' });
+                    screenshot_base64 = screenshotBuffer.toString('base64');
+                }
+            } else {
+                // No SVG found, take full page screenshot anyway
+                const screenshotBuffer = await this.page.screenshot({ fullPage: true, type: 'png' });
+                screenshot_base64 = screenshotBuffer.toString('base64');
+            }
+
+            const page_info = await this.get_stagehand_state();
+            
+            res.json({
+                success: true,
+                message: `Successfully converted SVG to PNG: ${svg_file_path}`,
+                url: page_info.url,
+                title: page_info.title,
+                screenshot_base64: screenshot_base64
+            } as BrowserActionResult);
+
+        } catch (error) {
+            console.error("Error converting SVG:", error);
+            const page_info = await this.get_stagehand_state();
+            
+            res.status(500).json({
+                success: false,
+                message: "Failed to convert SVG",
+                url: page_info.url,
+                title: page_info.title,
+                screenshot_base64: page_info.screenshot_base64,
+                error: String(error)
+            } as BrowserActionResult);
+        }
+    }
+
 }

 const browserAutomation = new BrowserAutomation();
--- a/backend/core/sandbox/docker/docker-compose.yml
+++ b/backend/core/sandbox/docker/docker-compose.yml
@ -6,7 +6,7 @@ services:
      dockerfile: ${DOCKERFILE:-Dockerfile}
      args:
        TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
-    image: kortix/suna:0.1.3.19
+    image: kortix/suna:0.1.3.20
    ports:
      - "6080:6080"  # noVNC web interface
      - "5901:5901"  # VNC port
--- a/backend/core/tools/sb_vision_tool.py
+++ b/backend/core/tools/sb_vision_tool.py
@ -10,7 +10,11 @@ from core.sandbox.tool_base import SandboxToolsBase
 from core.agentpress.thread_manager import ThreadManager
 from core.tools.image_context_manager import ImageContextManager
 import json
+from svglib.svglib import svg2rlg
+from reportlab.graphics import renderPM
+import tempfile
 import requests
+from core.utils.config import config

 # Add common image MIME types if mimetypes module is limited
 mimetypes.add_type("image/webp", ".webp")
@ -39,7 +43,77 @@ class SandboxVisionTool(SandboxToolsBase):
        self.thread_manager = thread_manager
        self.image_context_manager = ImageContextManager(thread_manager)

-    def compress_image(self, image_bytes: bytes, mime_type: str, file_path: str) -> Tuple[bytes, str]:
+    async def convert_svg_with_sandbox_browser(self, svg_full_path: str) -> Tuple[bytes, str]:
+        """Convert SVG to PNG using sandbox browser API for better rendering support.
+        
+        Args:
+            svg_full_path: Full path to SVG file in sandbox
+            
+        Returns:
+            Tuple of (png_bytes, 'image/png')
+        """
+        try:
+            
+            # Ensure sandbox is initialized
+            await self._ensure_sandbox()
+            
+            env_vars = {"GEMINI_API_KEY": config.GEMINI_API_KEY}
+            init_response = await self.sandbox.process.exec(
+                "curl -s -X POST 'http://localhost:8004/api/init' -H 'Content-Type: application/json' -d '{\"api_key\": \"'$GEMINI_API_KEY'\"}'",
+                timeout=30,
+                env=env_vars
+            )
+            
+            if init_response.exit_code != 0:
+                raise Exception(f"Failed to initialize browser: {init_response.result}")
+            
+            try:
+                init_data = json.loads(init_response.result)
+                if init_data.get("status") not in ["healthy", "initialized"]:
+                    raise Exception(f"Browser initialization failed: {init_data}")
+            except json.JSONDecodeError:
+                # Assume success if we can't parse response
+                pass
+            
+            # Now call the browser API conversion endpoint
+            params = {
+                "svg_file_path": svg_full_path
+            }
+            
+            # Build curl command to call sandbox browser API
+            url = "http://localhost:8004/api/convert-svg"
+            json_data = json.dumps(params)
+            curl_cmd = f"curl -s -X POST '{url}' -H 'Content-Type: application/json' -d '{json_data}'"
+            
+            # Execute the API call
+            response = await self.sandbox.process.exec(curl_cmd, timeout=30)
+            
+            if response.exit_code == 0:
+                try:
+                    response_data = json.loads(response.result)
+                    
+                    if response_data.get("success"):
+                        # Extract the base64 screenshot
+                        screenshot_base64 = response_data.get("screenshot_base64")
+                        if screenshot_base64:
+                            png_bytes = base64.b64decode(screenshot_base64)
+                            print(f"[SeeImage] Converted SVG '{os.path.basename(svg_full_path)}' to PNG using sandbox browser")
+                            return png_bytes, 'image/png'
+                        else:
+                            raise Exception("No screenshot data in browser response")
+                    else:
+                        error_msg = response_data.get("error", "Unknown browser conversion error")
+                        raise Exception(f"Browser conversion failed: {error_msg}")
+                        
+                except json.JSONDecodeError:
+                    raise Exception(f"Invalid JSON response from browser API: {response.result}")
+            else:
+                raise Exception(f"Browser API call failed with exit code {response.exit_code}: {response.result}")
+                
+        except Exception as e:
+            raise Exception(f"Sandbox browser-based SVG conversion failed: {str(e)}")
+    
+    async def compress_image(self, image_bytes: bytes, mime_type: str, file_path: str) -> Tuple[bytes, str]:
        """Compress an image to reduce its size while maintaining reasonable quality.
        
        Args:
@ -51,6 +125,48 @@ class SandboxVisionTool(SandboxToolsBase):
            Tuple of (compressed_bytes, new_mime_type)
        """
        try:
+            # Handle SVG conversion first (before PIL processing)
+            if mime_type == 'image/svg+xml' or file_path.lower().endswith('.svg'):
+                # Try browser-based conversion first (better quality)
+                try:
+                    # Construct full sandbox path from the relative file_path
+                    full_svg_path = f"{self.workspace_path}/{file_path}"
+                    png_bytes, png_mime = await self.convert_svg_with_sandbox_browser(full_svg_path)
+                    image_bytes = png_bytes
+                    mime_type = png_mime
+                except Exception as browser_error:
+                    print(f"[SeeImage] Browser-based SVG conversion failed: {browser_error}")
+                    
+                    # Fallback to svglib approach
+                    try:
+                        
+                        # Create temporary SVG file for svglib
+                        with tempfile.NamedTemporaryFile(suffix='.svg', delete=False) as temp_svg:
+                            temp_svg.write(image_bytes)
+                            temp_svg_path = temp_svg.name
+                        
+                        try:
+                            # Convert SVG to PNG using svglib + reportlab
+                            drawing = svg2rlg(temp_svg_path)
+                            png_buffer = BytesIO()
+                            renderPM.drawToFile(drawing, png_buffer, fmt='PNG')
+                            png_bytes = png_buffer.getvalue()
+                            
+                            print(f"[SeeImage] Converted SVG '{file_path}' to PNG using fallback method (svglib)")
+                            # Update for PIL processing
+                            image_bytes = png_bytes
+                            mime_type = 'image/png'
+                        finally:
+                            # Clean up temporary file
+                            os.unlink(temp_svg_path)
+                            
+                    except ImportError:
+                        print(f"[SeeImage] SVG conversion not available - using original SVG file '{file_path}'")
+                        return image_bytes, mime_type
+                    except Exception as e:
+                        print(f"[SeeImage] SVG conversion failed - using original SVG file '{file_path}': {str(e)}")
+                        return image_bytes, mime_type
+            
            # Open image from bytes
            img = Image.open(BytesIO(image_bytes))
            
@ -85,7 +201,7 @@ class SandboxVisionTool(SandboxToolsBase):
                img.save(output, format='PNG', optimize=True, compress_level=DEFAULT_PNG_COMPRESS_LEVEL)
                output_mime = 'image/png'
            else:
-                # Convert everything else to JPEG for better compression
+                # Convert everything else to JPEG for better compression (converted SVGs stay PNG above)
                img.save(output, format='JPEG', quality=DEFAULT_JPEG_QUALITY, optimize=True)
                output_mime = 'image/jpeg'
            
@ -151,7 +267,7 @@ class SandboxVisionTool(SandboxToolsBase):
                "properties": {
                    "file_path": {
                        "type": "string",
-                        "description": "Either a relative path to the image file within the /workspace directory (e.g., 'screenshots/image.png') or a URL to an image (e.g., 'https://example.com/image.jpg'). Supported formats: JPG, PNG, GIF, WEBP. Max size: 10MB."
+                        "description": "Either a relative path to the image file within the /workspace directory (e.g., 'screenshots/image.png') or a URL to an image (e.g., 'https://example.com/image.jpg'). Supported formats: JPG, PNG, GIF, WEBP, SVG. Max size: 10MB. SVG files are automatically converted to PNG using browser rendering for best quality."
                    }
                },
                "required": ["file_path"]
@ -219,19 +335,35 @@ class SandboxVisionTool(SandboxToolsBase):
                    elif ext == '.png': mime_type = 'image/png'
                    elif ext == '.gif': mime_type = 'image/gif'
                    elif ext == '.webp': mime_type = 'image/webp'
+                    elif ext == '.svg': mime_type = 'image/svg+xml'
                    else:
-                        return self.fail_response(f"Unsupported or unknown image format for file: '{cleaned_path}'. Supported: JPG, PNG, GIF, WEBP.")
+                        return self.fail_response(f"Unsupported or unknown image format for file: '{cleaned_path}'. Supported: JPG, PNG, GIF, WEBP, SVG.")
                
                original_size = file_info.size
            

            # Compress the image
-            compressed_bytes, compressed_mime_type = self.compress_image(image_bytes, mime_type, cleaned_path)
+            compressed_bytes, compressed_mime_type = await self.compress_image(image_bytes, mime_type, cleaned_path)
            
            # Check if compressed image is still too large
            if len(compressed_bytes) > MAX_COMPRESSED_SIZE:
                return self.fail_response(f"Image file '{cleaned_path}' is still too large after compression ({len(compressed_bytes) / (1024*1024):.2f}MB). Maximum compressed size is {MAX_COMPRESSED_SIZE / (1024*1024)}MB.")

+            # For SVG files that were converted to PNG, save the converted PNG to sandbox
+            if (mime_type == 'image/svg+xml' or cleaned_path.lower().endswith('.svg')) and compressed_mime_type == 'image/png':
+                # Create PNG filename by replacing .svg extension
+                png_filename = cleaned_path.rsplit('.', 1)[0] + '_converted.png'
+                png_full_path = f"{self.workspace_path}/{png_filename}"
+                
+                try:
+                    # Save converted PNG to sandbox
+                    await self.sandbox.fs.upload_file(compressed_bytes, png_full_path)
+                    cleaned_path = png_filename
+                    print(f"[SeeImage] Saved converted PNG to sandbox as '{png_filename}' for frontend display")
+                except Exception as e:
+                    print(f"[SeeImage] Warning: Could not save converted PNG to sandbox: {e}")
+                    # Continue with original path if save fails
+
            # Convert to base64
            base64_image = base64.b64encode(compressed_bytes).decode('utf-8')

@ -248,8 +380,13 @@ class SandboxVisionTool(SandboxToolsBase):
            if not result:
                return self.fail_response(f"Failed to add image '{cleaned_path}' to conversation context.")

-            # Inform the agent the image will be available next turn
-            return self.success_response(f"Successfully loaded and compressed the image '{cleaned_path}' (reduced from {original_size / 1024:.1f}KB to {len(compressed_bytes) / 1024:.1f}KB).")
+            # Return structured output like other tools
+            result_data = {
+                "message": f"Successfully loaded a compressed version of the image '{cleaned_path}' (reduced from {original_size / 1024:.1f}KB to {len(compressed_bytes) / 1024:.1f}KB).",
+                "file_path": cleaned_path,
+            }
+            
+            return self.success_response(result_data)

        except Exception as e:
            return self.fail_response(f"An unexpected error occurred while trying to see the image: {str(e)}")
--- a/backend/core/utils/config.py
+++ b/backend/core/utils/config.py
@ -311,8 +311,8 @@ class Configuration:
    STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
    
    # Sandbox configuration
-    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.3.19"
-    SANDBOX_SNAPSHOT_NAME = "kortix/suna:0.1.3.19"
+    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.3.20"
+    SANDBOX_SNAPSHOT_NAME = "kortix/suna:0.1.3.20"
    SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf"

    # LangFuse configuration
--- a/frontend/src/components/thread/tool-views/see-image-tool/_utils.ts
+++ b/frontend/src/components/thread/tool-views/see-image-tool/_utils.ts
@ -45,8 +45,32 @@ const extractFromNewFormat = (content: any): {
      }
    }

+    // Extract display file path from structured output, fall back to args
+    let filePath = args.file_path || null;
+    const rawOutput = toolExecution.result?.output;
+    
+    // Check if output has display_file_path (handles both object and string formats)
+    if (rawOutput) {
+      let outputData = rawOutput;
+      console.log('outputData', outputData);
+      
+      // Parse string output if needed
+      if (typeof rawOutput === 'string') {
+        try {
+          outputData = JSON.parse(rawOutput);
+        } catch (e) {
+          // Not JSON, keep original
+        }
+      }
+      
+      // Use display_file_path if available
+      if (outputData && typeof outputData === 'object' && outputData.file_path) {
+        filePath = outputData.file_path;
+      }
+    }
+
    const extractedData = {
-      filePath: args.file_path || null,
+      filePath,
      description: parsedContent.summary || null,
      success: toolExecution.result?.success,
      timestamp: toolExecution.execution_details?.timestamp,
--- a/setup.py
+++ b/setup.py
@ -686,9 +686,9 @@ class SetupWizard:
        )
        print_info("Create a snapshot with these exact settings:")
        print_info(
-            f"   - Name:\t\t{Colors.GREEN}kortix/suna:0.1.3.19{Colors.ENDC}")
+            f"   - Name:\t\t{Colors.GREEN}kortix/suna:0.1.3.20{Colors.ENDC}")
        print_info(
-            f"   - Snapshot name:\t{Colors.GREEN}kortix/suna:0.1.3.19{Colors.ENDC}")
+            f"   - Snapshot name:\t{Colors.GREEN}kortix/suna:0.1.3.20{Colors.ENDC}")
        print_info(
            f"   - Entrypoint:\t{Colors.GREEN}/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf{Colors.ENDC}"
        )