diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py index 844b821b..d297c3d5 100644 --- a/backend/agent/tools/sb_browser_tool.py +++ b/backend/agent/tools/sb_browser_tool.py @@ -31,7 +31,7 @@ class SandboxBrowserTool(SandboxToolsBase): await self._ensure_sandbox() # Build the curl command - url = f"http://localhost:8002/api/automation/{endpoint}" + url = f"http://localhost:8003/api/automation/{endpoint}" if method == "GET" and params: query_params = "&".join([f"{k}={v}" for k, v in params.items()]) diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile index 45ddb5ef..5608e335 100644 --- a/backend/sandbox/docker/Dockerfile +++ b/backend/sandbox/docker/Dockerfile @@ -68,6 +68,9 @@ RUN apt-get update && apt-get install -y \ iputils-ping \ dnsutils \ sudo \ + # OCR Tools + tesseract-ocr \ + tesseract-ocr-eng \ && rm -rf /var/lib/apt/lists/* # Install Node.js and npm diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py index c2fc5186..7fb5baf5 100644 --- a/backend/sandbox/docker/browser_api.py +++ b/backend/sandbox/docker/browser_api.py @@ -618,10 +618,29 @@ class BrowserAutomation: """Take a screenshot and return as base64 encoded string""" try: page = await self.get_current_page() - screenshot_bytes = await page.screenshot(type='jpeg', quality=60, full_page=False) + + # Wait for network to be idle and DOM to be stable + try: + await page.wait_for_load_state("networkidle", timeout=60000) # Increased timeout to 60s + except Exception as e: + print(f"Warning: Network idle timeout, proceeding anyway: {e}") + + # Wait for any animations to complete + # await page.wait_for_timeout(1000) # Wait 1 second for animations + + # Take screenshot with increased timeout and better options + screenshot_bytes = await page.screenshot( + type='jpeg', + quality=60, + full_page=False, + timeout=60000, # Increased timeout to 60s + scale='device' # Use device scale factor + ) + return base64.b64encode(screenshot_bytes).decode('utf-8') except Exception as e: print(f"Error taking screenshot: {e}") + traceback.print_exc() # Return an empty string rather than failing return "" @@ -2065,4 +2084,4 @@ if __name__ == '__main__': asyncio.run(test_browser_api_2()) else: print("Starting API server") - uvicorn.run("browser_api:api_app", host="0.0.0.0", port=8002) \ No newline at end of file + uvicorn.run("browser_api:api_app", host="0.0.0.0", port=8003) \ No newline at end of file diff --git a/backend/utils/config.py b/backend/utils/config.py index c1392d07..ea683267 100644 --- a/backend/utils/config.py +++ b/backend/utils/config.py @@ -159,7 +159,7 @@ class Configuration: STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY' # Sandbox configuration - SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.6" + SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.7" SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf" @property