From cbb8b7e65ab6c10af668d6fcef8518376a00003e Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Sun, 18 May 2025 05:49:09 +0200
Subject: [PATCH 01/21] chore(docker): update Docker configurations and
 streamline browser handling

- Removed unnecessary comments and updated the Docker Compose files for backend and sandbox services.
- Changed `docker-compose` to `docker compose` in README for consistency.
- Updated Chromium dependencies and configurations in Dockerfile for improved stability.
- Enhanced browser automation logic to handle page navigation and state recovery more effectively.
- Adjusted environment variables for better performance and resource management.
---
 backend/docker-compose.yml                |   4 -
 backend/sandbox/README.md                 |   2 +-
 backend/sandbox/docker/Dockerfile         |  17 +++-
 backend/sandbox/docker/browser_api.py     | 112 +++++++++++++++++-----
 backend/sandbox/docker/docker-compose.yml |  16 +++-
 backend/utils/config.py                   |   4 +-
 6 files changed, 120 insertions(+), 35 deletions(-)

diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml
index f5f91540..094f94da 100644
--- a/backend/docker-compose.yml
+++ b/backend/docker-compose.yml
@@ -1,7 +1,3 @@
-# This is a Docker Compose file for the backend service. For self-hosting, look at the root docker-compose.yml file.
-
-version: "3.8"
-
 services:
   api:
     build:
diff --git a/backend/sandbox/README.md b/backend/sandbox/README.md
index cafa4b7b..f8c0d571 100644
--- a/backend/sandbox/README.md
+++ b/backend/sandbox/README.md
@@ -19,7 +19,7 @@ You can modify the sandbox environment for development or to add new capabilitie
 2. Build a custom image:
    ```
    cd backend/sandbox/docker
-   docker-compose build
+   docker compose build
    ```
 3. Test your changes locally using docker-compose
 
diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile
index 418fe524..f51d706b 100644
--- a/backend/sandbox/docker/Dockerfile
+++ b/backend/sandbox/docker/Dockerfile
@@ -68,6 +68,9 @@ RUN apt-get update && apt-get install -y \
     iputils-ping \
     dnsutils \
     sudo \
+    # Chromium dependencies
+    chromium \
+    chromium-driver \
     && rm -rf /var/lib/apt/lists/*
 
 # Install Node.js and npm
@@ -110,14 +113,20 @@ RUN python -c "from playwright.sync_api import sync_playwright; print('Playwrigh
 
 # Set environment variables
 ENV PYTHONUNBUFFERED=1
-ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
+ENV CHROME_PATH=/usr/bin/chromium
 ENV ANONYMIZED_TELEMETRY=false
 ENV DISPLAY=:99
-ENV RESOLUTION=1920x1080x24
+ENV RESOLUTION=1024x768x24
 ENV VNC_PASSWORD=vncpassword
 ENV CHROME_PERSISTENT_SESSION=true
-ENV RESOLUTION_WIDTH=1920
-ENV RESOLUTION_HEIGHT=1080
+ENV RESOLUTION_WIDTH=1024
+ENV RESOLUTION_HEIGHT=768
+# Add Chrome stability flags
+ENV CHROME_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"
+# Add Playwright specific settings
+ENV PLAYWRIGHT_SKIP_BROWSER_GC=1
+ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
+ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
 
 # Set up supervisor configuration
 RUN mkdir -p /var/log/supervisor
diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py
index f122cdfc..76d7b01d 100644
--- a/backend/sandbox/docker/browser_api.py
+++ b/backend/sandbox/docker/browser_api.py
@@ -311,7 +311,7 @@ class BrowserAutomation:
         
         # Tab management
         self.router.post("/automation/switch_tab")(self.switch_tab)
-        self.router.post("/automation/open_tab")(self.open_tab)
+        # self.router.post("/automation/open_tab")(self.open_tab)
         self.router.post("/automation/close_tab")(self.close_tab)
         
         # Content actions
@@ -337,7 +337,7 @@ class BrowserAutomation:
             playwright = await async_playwright().start()
             print("Playwright started, launching browser...")
             
-            # Use non-headless mode for testing with slower timeouts
+            # Use non-headless mode for testing with slower timeouts and additional options
             launch_options = {
                 "headless": False,
                 "timeout": 60000
@@ -354,21 +354,17 @@ class BrowserAutomation:
                 self.browser = await playwright.chromium.launch(**launch_options)
                 print("Browser launched with minimal options")
 
-            try:
-                await self.get_current_page()
-                print("Found existing page, using it")
-                self.current_page_index = 0
-            except Exception as page_error:
-                print(f"Error finding existing page, creating new one. ( {page_error})")
+            # Check if we already have pages
+            if not self.pages:
+                print("Creating initial page")
                 page = await self.browser.new_page()
-                print("New page created successfully")
                 self.pages.append(page)
                 self.current_page_index = 0
-                # Navigate to about:blank to ensure page is ready
-                # await page.goto("google.com", timeout=30000)
-                print("Navigated to google.com")
+            else:
+                print("Using existing page")
+                self.current_page_index = 0
                 
-                print("Browser initialization completed successfully")
+            print("Browser initialization completed successfully")
         except Exception as e:
             print(f"Browser startup error: {str(e)}")
             traceback.print_exc()
@@ -533,6 +529,18 @@ class BrowserAutomation:
         """Get the current DOM state including element tree and selector map"""
         try:
             page = await self.get_current_page()
+            
+            # First check if page is valid and has content
+            try:
+                current_url = page.url
+                if current_url == "about:blank":
+                    # If page is blank, try to recover by waiting for content
+                    await page.wait_for_load_state("domcontentloaded", timeout=5000)
+                    current_url = page.url
+            except Exception as e:
+                print(f"Error checking page URL: {e}")
+                current_url = "about:blank"
+            
             selector_map = await self.get_selector_map()
             
             # Create a root element
@@ -550,13 +558,12 @@ class BrowserAutomation:
                     root.children.append(element)
             
             # Get basic page info
-            url = page.url
             try:
                 title = await page.title()
             except:
                 title = "Unknown Title"
             
-            # Get more accurate scroll information - fix JavaScript syntax
+            # Get more accurate scroll information
             try:
                 scroll_info = await page.evaluate("""
                 () => {
@@ -587,7 +594,7 @@ class BrowserAutomation:
             return DOMState(
                 element_tree=root,
                 selector_map=selector_map,
-                url=url,
+                url=current_url,
                 title=title,
                 pixels_above=pixels_above,
                 pixels_below=pixels_below
@@ -595,7 +602,16 @@ class BrowserAutomation:
         except Exception as e:
             print(f"Error getting DOM state: {e}")
             traceback.print_exc()
-            # Return a minimal valid state to avoid breaking tests
+            
+            # Try to get at least the current URL before falling back
+            current_url = "about:blank"
+            try:
+                page = await self.get_current_page()
+                current_url = page.url
+            except:
+                pass
+                
+            # Return a minimal valid state with the actual URL if possible
             dummy_root = DOMElementNode(
                 is_visible=True,
                 tag_name="body",
@@ -606,7 +622,7 @@ class BrowserAutomation:
             return DOMState(
                 element_tree=dummy_root,
                 selector_map=dummy_map,
-                url=page.url if 'page' in locals() else "about:blank",
+                url=current_url,
                 title="Error page",
                 pixels_above=0,
                 pixels_below=0
@@ -860,10 +876,52 @@ class BrowserAutomation:
         """Navigate to a specified URL"""
         try:
             page = await self.get_current_page()
-            await page.goto(action.url, wait_until="domcontentloaded")
-            await page.wait_for_load_state("networkidle", timeout=10000)
             
-            # Get updated state after action
+            # First check if we're already on the target URL
+            current_url = page.url
+            if current_url == action.url:
+                print(f"Already on target URL: {action.url}")
+                dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})")
+                return self.build_action_result(
+                    True,
+                    f"Already on {action.url}",
+                    dom_state,
+                    screenshot,
+                    elements,
+                    metadata,
+                    error="",
+                    content=None
+                )
+            
+            # Attempt navigation with retries
+            max_retries = 3
+            retry_count = 0
+            last_error = None
+            
+            while retry_count < max_retries:
+                try:
+                    print(f"Navigation attempt {retry_count + 1} to {action.url}")
+                    await page.goto(action.url, wait_until="domcontentloaded", timeout=30000)
+                    await page.wait_for_load_state("networkidle", timeout=10000)
+                    
+                    # Verify we actually navigated to the target URL
+                    new_url = page.url
+                    if new_url == "about:blank":
+                        raise Exception("Navigation resulted in blank page")
+                        
+                    print(f"Successfully navigated to {new_url}")
+                    break
+                except Exception as e:
+                    last_error = e
+                    retry_count += 1
+                    if retry_count < max_retries:
+                        print(f"Navigation attempt {retry_count} failed: {e}")
+                        await asyncio.sleep(1)  # Wait before retry
+                    else:
+                        print(f"All navigation attempts failed: {e}")
+                        raise
+            
+            # Get updated state after successful navigation
             dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})")
             
             result = self.build_action_result(
@@ -882,6 +940,7 @@ class BrowserAutomation:
         except Exception as e:
             print(f"Navigation error: {str(e)}")
             traceback.print_exc()
+            
             # Try to get some state info even after error
             try:
                 dom_state, screenshot, elements, metadata = await self.get_updated_browser_state("navigate_error_recovery")
@@ -896,6 +955,14 @@ class BrowserAutomation:
                     content=None
                 )
             except:
+                # If we can't get state, at least try to get the current URL
+                current_url = "about:blank"
+                try:
+                    page = await self.get_current_page()
+                    current_url = page.url
+                except:
+                    pass
+                    
                 return self.build_action_result(
                     False,
                     str(e),
@@ -904,7 +971,8 @@ class BrowserAutomation:
                     "",
                     {},
                     error=str(e),
-                    content=None
+                    content=None,
+                    fallback_url=current_url
                 )
     
     async def search_google(self, action: SearchGoogleAction = Body(...)):
diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml
index 27432984..03024280 100644
--- a/backend/sandbox/docker/docker-compose.yml
+++ b/backend/sandbox/docker/docker-compose.yml
@@ -6,7 +6,7 @@ services:
       dockerfile: ${DOCKERFILE:-Dockerfile}
       args:
         TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
-    image: kortix/suna:0.1.2
+    image: kortix/suna:0.1.2.1
     ports:
       - "6080:6080"  # noVNC web interface
       - "5901:5901"  # VNC port
@@ -15,7 +15,7 @@ services:
       - "8080:8080"  # HTTP server port
     environment:
       - ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
-      - CHROME_PATH=/usr/bin/google-chrome
+      - CHROME_PATH=/usr/bin/chromium
       - CHROME_USER_DATA=/app/data/chrome_data
       - CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false}
       - CHROME_CDP=${CHROME_CDP:-http://localhost:9222}
@@ -27,8 +27,13 @@ services:
       - VNC_PASSWORD=${VNC_PASSWORD:-vncpassword}
       - CHROME_DEBUGGING_PORT=9222
       - CHROME_DEBUGGING_HOST=localhost
+      - CHROME_FLAGS=${CHROME_FLAGS:-"--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"}
+      - PLAYWRIGHT_SKIP_BROWSER_GC=1
+      - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
+      - NODE_OPTIONS="--max-old-space-size=4096"
     volumes:
       - /tmp/.X11-unix:/tmp/.X11-unix
+      - chrome_data:/app/data/chrome_data
     restart: unless-stopped
     shm_size: '2gb'
     cap_add:
@@ -42,3 +47,10 @@ services:
       interval: 10s
       timeout: 5s
       retries: 3
+    ulimits:
+      nofile:
+        soft: 65536
+        hard: 65536
+
+volumes:
+  chrome_data:
diff --git a/backend/utils/config.py b/backend/utils/config.py
index e08b3eab..c396e61b 100644
--- a/backend/utils/config.py
+++ b/backend/utils/config.py
@@ -155,8 +155,8 @@ class Configuration:
     STRIPE_DEFAULT_TRIAL_DAYS: int = 14
     
     # Stripe Product IDs
-    STRIPE_PRODUCT_ID_PROD: str = 'prod_SCl7AQ2C8kK1CD'  # Production product ID
-    STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'  # Staging product ID
+    STRIPE_PRODUCT_ID_PROD: str = 'prod_SCl7AQ2C8kK1CD'
+    STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
     
     # Sandbox configuration
     SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2"

From 7b27f83a6d2dcce87353a147a1189bab133e6a96 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Sun, 18 May 2025 05:55:02 +0200
Subject: [PATCH 02/21] use playwright chromium directly

---
 backend/sandbox/docker/Dockerfile         | 7 ++-----
 backend/sandbox/docker/docker-compose.yml | 4 ++--
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile
index f51d706b..c02f2f2e 100644
--- a/backend/sandbox/docker/Dockerfile
+++ b/backend/sandbox/docker/Dockerfile
@@ -68,9 +68,6 @@ RUN apt-get update && apt-get install -y \
     iputils-ping \
     dnsutils \
     sudo \
-    # Chromium dependencies
-    chromium \
-    chromium-driver \
     && rm -rf /var/lib/apt/lists/*
 
 # Install Node.js and npm
@@ -113,7 +110,7 @@ RUN python -c "from playwright.sync_api import sync_playwright; print('Playwrigh
 
 # Set environment variables
 ENV PYTHONUNBUFFERED=1
-ENV CHROME_PATH=/usr/bin/chromium
+ENV CHROME_PATH=/ms-playwright/chromium-*/chrome
 ENV ANONYMIZED_TELEMETRY=false
 ENV DISPLAY=:99
 ENV RESOLUTION=1024x768x24
@@ -126,7 +123,7 @@ ENV CHROME_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-s
 # Add Playwright specific settings
 ENV PLAYWRIGHT_SKIP_BROWSER_GC=1
 ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
-ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
+ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome
 
 # Set up supervisor configuration
 RUN mkdir -p /var/log/supervisor
diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml
index 03024280..136c972f 100644
--- a/backend/sandbox/docker/docker-compose.yml
+++ b/backend/sandbox/docker/docker-compose.yml
@@ -15,7 +15,7 @@ services:
       - "8080:8080"  # HTTP server port
     environment:
       - ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
-      - CHROME_PATH=/usr/bin/chromium
+      - CHROME_PATH=/ms-playwright/chromium-*/chrome
       - CHROME_USER_DATA=/app/data/chrome_data
       - CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false}
       - CHROME_CDP=${CHROME_CDP:-http://localhost:9222}
@@ -29,7 +29,7 @@ services:
       - CHROME_DEBUGGING_HOST=localhost
       - CHROME_FLAGS=${CHROME_FLAGS:-"--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"}
       - PLAYWRIGHT_SKIP_BROWSER_GC=1
-      - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
+      - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome
       - NODE_OPTIONS="--max-old-space-size=4096"
     volumes:
       - /tmp/.X11-unix:/tmp/.X11-unix

From 9d27ec2beadae4c1631bcd05c972429e52853501 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Sun, 18 May 2025 06:15:09 +0200
Subject: [PATCH 03/21] wip

---
 backend/sandbox/docker/Dockerfile         |  4 ++--
 backend/sandbox/docker/docker-compose.yml | 12 +++++++++---
 backend/utils/config.py                   |  2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile
index c02f2f2e..c5b201f5 100644
--- a/backend/sandbox/docker/Dockerfile
+++ b/backend/sandbox/docker/Dockerfile
@@ -110,7 +110,7 @@ RUN python -c "from playwright.sync_api import sync_playwright; print('Playwrigh
 
 # Set environment variables
 ENV PYTHONUNBUFFERED=1
-ENV CHROME_PATH=/ms-playwright/chromium-*/chrome
+ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
 ENV ANONYMIZED_TELEMETRY=false
 ENV DISPLAY=:99
 ENV RESOLUTION=1024x768x24
@@ -123,7 +123,7 @@ ENV CHROME_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-s
 # Add Playwright specific settings
 ENV PLAYWRIGHT_SKIP_BROWSER_GC=1
 ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
-ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome
+ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
 
 # Set up supervisor configuration
 RUN mkdir -p /var/log/supervisor
diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml
index 136c972f..73729279 100644
--- a/backend/sandbox/docker/docker-compose.yml
+++ b/backend/sandbox/docker/docker-compose.yml
@@ -6,7 +6,7 @@ services:
       dockerfile: ${DOCKERFILE:-Dockerfile}
       args:
         TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
-    image: kortix/suna:0.1.2.1
+    image: kortix/suna:0.1.2
     ports:
       - "6080:6080"  # noVNC web interface
       - "5901:5901"  # VNC port
@@ -15,7 +15,7 @@ services:
       - "8080:8080"  # HTTP server port
     environment:
       - ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
-      - CHROME_PATH=/ms-playwright/chromium-*/chrome
+      - CHROME_PATH=/usr/bin/google-chrome
       - CHROME_USER_DATA=/app/data/chrome_data
       - CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false}
       - CHROME_CDP=${CHROME_CDP:-http://localhost:9222}
@@ -29,7 +29,7 @@ services:
       - CHROME_DEBUGGING_HOST=localhost
       - CHROME_FLAGS=${CHROME_FLAGS:-"--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"}
       - PLAYWRIGHT_SKIP_BROWSER_GC=1
-      - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome
+      - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
       - NODE_OPTIONS="--max-old-space-size=4096"
     volumes:
       - /tmp/.X11-unix:/tmp/.X11-unix
@@ -51,6 +51,12 @@ services:
       nofile:
         soft: 65536
         hard: 65536
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+        reservations:
+          memory: 2G
 
 volumes:
   chrome_data:
diff --git a/backend/utils/config.py b/backend/utils/config.py
index c396e61b..73f1cbd5 100644
--- a/backend/utils/config.py
+++ b/backend/utils/config.py
@@ -159,7 +159,7 @@ class Configuration:
     STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
     
     # Sandbox configuration
-    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2"
+    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.2"
     SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf"
 
     @property

From 0ae9b1cd1093d672851db37f108b2fdc7e39e611 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Sun, 18 May 2025 06:23:01 +0200
Subject: [PATCH 04/21] wip

---
 backend/sandbox/docker/Dockerfile         | 12 +++---------
 backend/sandbox/docker/docker-compose.yml | 18 ------------------
 2 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile
index c5b201f5..418fe524 100644
--- a/backend/sandbox/docker/Dockerfile
+++ b/backend/sandbox/docker/Dockerfile
@@ -113,17 +113,11 @@ ENV PYTHONUNBUFFERED=1
 ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
 ENV ANONYMIZED_TELEMETRY=false
 ENV DISPLAY=:99
-ENV RESOLUTION=1024x768x24
+ENV RESOLUTION=1920x1080x24
 ENV VNC_PASSWORD=vncpassword
 ENV CHROME_PERSISTENT_SESSION=true
-ENV RESOLUTION_WIDTH=1024
-ENV RESOLUTION_HEIGHT=768
-# Add Chrome stability flags
-ENV CHROME_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"
-# Add Playwright specific settings
-ENV PLAYWRIGHT_SKIP_BROWSER_GC=1
-ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
-ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
+ENV RESOLUTION_WIDTH=1920
+ENV RESOLUTION_HEIGHT=1080
 
 # Set up supervisor configuration
 RUN mkdir -p /var/log/supervisor
diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml
index 73729279..27432984 100644
--- a/backend/sandbox/docker/docker-compose.yml
+++ b/backend/sandbox/docker/docker-compose.yml
@@ -27,13 +27,8 @@ services:
       - VNC_PASSWORD=${VNC_PASSWORD:-vncpassword}
       - CHROME_DEBUGGING_PORT=9222
       - CHROME_DEBUGGING_HOST=localhost
-      - CHROME_FLAGS=${CHROME_FLAGS:-"--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"}
-      - PLAYWRIGHT_SKIP_BROWSER_GC=1
-      - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
-      - NODE_OPTIONS="--max-old-space-size=4096"
     volumes:
       - /tmp/.X11-unix:/tmp/.X11-unix
-      - chrome_data:/app/data/chrome_data
     restart: unless-stopped
     shm_size: '2gb'
     cap_add:
@@ -47,16 +42,3 @@ services:
       interval: 10s
       timeout: 5s
       retries: 3
-    ulimits:
-      nofile:
-        soft: 65536
-        hard: 65536
-    deploy:
-      resources:
-        limits:
-          memory: 4G
-        reservations:
-          memory: 2G
-
-volumes:
-  chrome_data:

From 2af572ab035ed94d549fd36fbbced11b62238d8b Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Sun, 18 May 2025 20:44:44 +0200
Subject: [PATCH 05/21] wip

---
 backend/agent/tools/sb_browser_tool.py |   2 +-
 backend/sandbox/docker/Dockerfile      |   6 +-
 backend/sandbox/docker/browser_api.py  | 257 +++++--------------------
 backend/utils/config.py                |   2 +-
 4 files changed, 55 insertions(+), 212 deletions(-)

diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py
index ce9130ec..59602db1 100644
--- a/backend/agent/tools/sb_browser_tool.py
+++ b/backend/agent/tools/sb_browser_tool.py
@@ -30,7 +30,7 @@ class SandboxBrowserTool(SandboxToolsBase):
             await self._ensure_sandbox()
             
             # Build the curl command
-            url = f"http://localhost:8002/api/automation/{endpoint}"
+            url = f"http://localhost:8003/api/automation/{endpoint}"
             
             if method == "GET" and params:
                 query_params = "&".join([f"{k}={v}" for k, v in params.items()])
diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile
index 418fe524..b8f74a52 100644
--- a/backend/sandbox/docker/Dockerfile
+++ b/backend/sandbox/docker/Dockerfile
@@ -113,11 +113,11 @@ ENV PYTHONUNBUFFERED=1
 ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
 ENV ANONYMIZED_TELEMETRY=false
 ENV DISPLAY=:99
-ENV RESOLUTION=1920x1080x24
+ENV RESOLUTION=1024x768x24
 ENV VNC_PASSWORD=vncpassword
 ENV CHROME_PERSISTENT_SESSION=true
-ENV RESOLUTION_WIDTH=1920
-ENV RESOLUTION_HEIGHT=1080
+ENV RESOLUTION_WIDTH=1024
+ENV RESOLUTION_HEIGHT=768
 
 # Set up supervisor configuration
 RUN mkdir -p /var/log/supervisor
diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py
index 76d7b01d..1490bba9 100644
--- a/backend/sandbox/docker/browser_api.py
+++ b/backend/sandbox/docker/browser_api.py
@@ -15,8 +15,6 @@ import traceback
 import pytesseract
 from PIL import Image
 import io
-from utils.logger import logger
-from services.supabase import DBConnection
 
 #######################################################
 # Action model definitions
@@ -261,16 +259,15 @@ class BrowserActionResult(BaseModel):
     url: Optional[str] = None
     title: Optional[str] = None
     elements: Optional[str] = None  # Formatted string of clickable elements
-    screenshot_base64: Optional[str] = None  # For backward compatibility
-    screenshot_url: Optional[str] = None 
+    screenshot_base64: Optional[str] = None
     pixels_above: int = 0
     pixels_below: int = 0
     content: Optional[str] = None
-    ocr_text: Optional[str] = None
+    ocr_text: Optional[str] = None  # Added field for OCR text
     
     # Additional metadata
-    element_count: int = 0
-    interactive_elements: Optional[List[Dict[str, Any]]] = None
+    element_count: int = 0  # Number of interactive elements found
+    interactive_elements: Optional[List[Dict[str, Any]]] = None  # Simplified list of interactive elements
     viewport_width: Optional[int] = None
     viewport_height: Optional[int] = None
     
@@ -291,7 +288,6 @@ class BrowserAutomation:
         self.include_attributes = ["id", "href", "src", "alt", "aria-label", "placeholder", "name", "role", "title", "value"]
         self.screenshot_dir = os.path.join(os.getcwd(), "screenshots")
         os.makedirs(self.screenshot_dir, exist_ok=True)
-        self.db = DBConnection()  # Initialize DB connection
         
         # Register routes
         self.router.on_startup.append(self.startup)
@@ -311,7 +307,7 @@ class BrowserAutomation:
         
         # Tab management
         self.router.post("/automation/switch_tab")(self.switch_tab)
-        # self.router.post("/automation/open_tab")(self.open_tab)
+        self.router.post("/automation/open_tab")(self.open_tab)
         self.router.post("/automation/close_tab")(self.close_tab)
         
         # Content actions
@@ -337,7 +333,7 @@ class BrowserAutomation:
             playwright = await async_playwright().start()
             print("Playwright started, launching browser...")
             
-            # Use non-headless mode for testing with slower timeouts and additional options
+            # Use non-headless mode for testing with slower timeouts
             launch_options = {
                 "headless": False,
                 "timeout": 60000
@@ -346,6 +342,19 @@ class BrowserAutomation:
             try:
                 self.browser = await playwright.chromium.launch(**launch_options)
                 print("Browser launched successfully")
+                
+                # Create a single context with viewport settings
+                self.context = await self.browser.new_context(
+                    viewport={'width': 1024, 'height': 768}
+                )
+                
+                # Create initial page and navigate to a default page
+                page = await self.context.new_page()
+                await page.goto("https://www.google.com", wait_until="domcontentloaded")
+                self.pages.append(page)
+                self.current_page_index = 0
+                print("Initial page created and navigated to Google")
+                
             except Exception as browser_error:
                 print(f"Failed to launch browser: {browser_error}")
                 # Try with minimal options
@@ -353,16 +362,16 @@ class BrowserAutomation:
                 launch_options = {"timeout": 90000}
                 self.browser = await playwright.chromium.launch(**launch_options)
                 print("Browser launched with minimal options")
-
-            # Check if we already have pages
-            if not self.pages:
-                print("Creating initial page")
-                page = await self.browser.new_page()
+                
+                # Create context and initial page even with minimal options
+                self.context = await self.browser.new_context(
+                    viewport={'width': 1024, 'height': 768}
+                )
+                page = await self.context.new_page()
+                await page.goto("https://www.google.com", wait_until="domcontentloaded")
                 self.pages.append(page)
                 self.current_page_index = 0
-            else:
-                print("Using existing page")
-                self.current_page_index = 0
+                print("Initial page created with minimal options")
                 
             print("Browser initialization completed successfully")
         except Exception as e:
@@ -529,18 +538,6 @@ class BrowserAutomation:
         """Get the current DOM state including element tree and selector map"""
         try:
             page = await self.get_current_page()
-            
-            # First check if page is valid and has content
-            try:
-                current_url = page.url
-                if current_url == "about:blank":
-                    # If page is blank, try to recover by waiting for content
-                    await page.wait_for_load_state("domcontentloaded", timeout=5000)
-                    current_url = page.url
-            except Exception as e:
-                print(f"Error checking page URL: {e}")
-                current_url = "about:blank"
-            
             selector_map = await self.get_selector_map()
             
             # Create a root element
@@ -558,12 +555,13 @@ class BrowserAutomation:
                     root.children.append(element)
             
             # Get basic page info
+            url = page.url
             try:
                 title = await page.title()
             except:
                 title = "Unknown Title"
             
-            # Get more accurate scroll information
+            # Get more accurate scroll information - fix JavaScript syntax
             try:
                 scroll_info = await page.evaluate("""
                 () => {
@@ -594,7 +592,7 @@ class BrowserAutomation:
             return DOMState(
                 element_tree=root,
                 selector_map=selector_map,
-                url=current_url,
+                url=url,
                 title=title,
                 pixels_above=pixels_above,
                 pixels_below=pixels_below
@@ -602,16 +600,7 @@ class BrowserAutomation:
         except Exception as e:
             print(f"Error getting DOM state: {e}")
             traceback.print_exc()
-            
-            # Try to get at least the current URL before falling back
-            current_url = "about:blank"
-            try:
-                page = await self.get_current_page()
-                current_url = page.url
-            except:
-                pass
-                
-            # Return a minimal valid state with the actual URL if possible
+            # Return a minimal valid state to avoid breaking tests
             dummy_root = DOMElementNode(
                 is_visible=True,
                 tag_name="body",
@@ -622,92 +611,22 @@ class BrowserAutomation:
             return DOMState(
                 element_tree=dummy_root,
                 selector_map=dummy_map,
-                url=current_url,
+                url=page.url if 'page' in locals() else "about:blank",
                 title="Error page",
                 pixels_above=0,
                 pixels_below=0
             )
     
     async def take_screenshot(self) -> str:
-        """Take a screenshot and return as base64 encoded string or S3 URL"""
+        """Take a screenshot and return as base64 encoded string"""
         try:
             page = await self.get_current_page()
             screenshot_bytes = await page.screenshot(type='jpeg', quality=60, full_page=False)
-
-            client = await self.db.client
-            
-            if client:
-                timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-                random_id = random.randint(1000, 9999)
-                filename = f"screenshot_{timestamp}_{random_id}.jpg"
-                
-                logger.info(f"Attempting to upload screenshot: {filename}")
-                result = await self.upload_to_storage(client, screenshot_bytes, filename)
-                
-                if isinstance(result, dict) and result.get("is_s3") and result.get("url"):
-                    if await self.verify_file_exists(client, filename):
-                        logger.info(f"Screenshot upload verified: {filename}")
-                    else:
-                        logger.error(f"Screenshot upload failed verification: {filename}")
-                        return base64.b64encode(screenshot_bytes).decode('utf-8')
-                
-                return result
-            else:
-                logger.warning("No Supabase client available, falling back to base64")
-                return base64.b64encode(screenshot_bytes).decode('utf-8')
+            return base64.b64encode(screenshot_bytes).decode('utf-8')
         except Exception as e:
-            logger.error(f"Error taking screenshot: {str(e)}")
-            traceback.print_exc()
+            print(f"Error taking screenshot: {e}")
+            # Return an empty string rather than failing
             return ""
-        
-    async def upload_to_storage(self, client, file_bytes: bytes, filename: str) -> str:
-        """Upload file to Supabase Storage and return the URL"""
-        try:
-            bucket_name = 'screenshots'
-            
-            buckets = client.storage.list_buckets()
-            if not any(bucket.name == bucket_name for bucket in buckets):
-                logger.info(f"Creating bucket: {bucket_name}")
-                try:
-                    client.storage.create_bucket(bucket_name)
-                    logger.info("Bucket created successfully")
-                except Exception as e:
-                    logger.error(f"Failed to create bucket: {str(e)}")
-                    raise
-
-            logger.info(f"Uploading file: {filename}")
-            try:
-                result = client.storage.from_(bucket_name).upload(
-                    path=filename,
-                    file=file_bytes,
-                    file_options={"content-type": "image/jpeg"}
-                )
-                logger.info("File upload successful")
-            except Exception as e:
-                logger.error(f"Failed to upload file: {str(e)}")
-                raise
-            
-            file_url = client.storage.from_(bucket_name).get_public_url(filename)
-            logger.info(f"Generated URL: {file_url}")
-            
-            return {"url": file_url, "is_s3": True}
-        except Exception as e:
-            logger.error(f"Error in upload_to_storage: {str(e)}")
-            traceback.print_exc()
-            return base64.b64encode(file_bytes).decode('utf-8')
-
-    async def verify_file_exists(self, client, filename: str) -> bool:
-        """Verify that a file exists in the storage bucket"""
-        logger.info(f"=== Verifying file exists: {filename} ===")
-        try:
-            bucket_name = 'screenshots'
-            files = client.storage.from_(bucket_name).list()
-            exists = any(f['name'] == filename for f in files)
-            logger.info(f"File verification result: {'exists' if exists else 'not found'}")
-            return exists
-        except Exception as e:
-            logger.error(f"Error verifying file: {str(e)}")
-            return False
     
     async def save_screenshot_to_file(self) -> str:
         """Take a screenshot and save to file, returning the path"""
@@ -750,32 +669,20 @@ class BrowserAutomation:
         """Helper method to get updated browser state after any action
         Returns a tuple of (dom_state, screenshot, elements, metadata)
         """
-        logger.info(f"=== Starting get_updated_browser_state for action: {action_name} ===")
         try:
             # Wait a moment for any potential async processes to settle
-            logger.info("Waiting for async processes to settle")
             await asyncio.sleep(0.5)
             
             # Get updated state
-            logger.info("Getting current DOM state")
             dom_state = await self.get_current_dom_state()
-            logger.info(f"DOM state retrieved - URL: {dom_state.url}, Title: {dom_state.title}")
-            
-            logger.info("Taking screenshot")
             screenshot = await self.take_screenshot()
-            logger.info(f"Screenshot result type: {'dict' if isinstance(screenshot, dict) else 'base64 string'}")
-            if isinstance(screenshot, dict) and screenshot.get("url"):
-                logger.info(f"Screenshot URL: {screenshot['url']}")
             
             # Format elements for output
-            logger.info("Formatting clickable elements")
             elements = dom_state.element_tree.clickable_elements_to_string(
                 include_attributes=self.include_attributes
             )
-            logger.info(f"Found {len(dom_state.selector_map)} clickable elements")
             
             # Collect additional metadata
-            logger.info("Collecting metadata")
             page = await self.get_current_page()
             metadata = {}
             
@@ -801,9 +708,8 @@ class BrowserAutomation:
             
             metadata['interactive_elements'] = interactive_elements
             
-            # Get viewport dimensions
+            # Get viewport dimensions - Fix syntax error in JavaScript
             try:
-                logger.info("Getting viewport dimensions")
                 viewport = await page.evaluate("""
                 () => {
                     return {
@@ -814,43 +720,33 @@ class BrowserAutomation:
                 """)
                 metadata['viewport_width'] = viewport.get('width', 0)
                 metadata['viewport_height'] = viewport.get('height', 0)
-                logger.info(f"Viewport dimensions: {metadata['viewport_width']}x{metadata['viewport_height']}")
             except Exception as e:
-                logger.error(f"Error getting viewport dimensions: {e}")
+                print(f"Error getting viewport dimensions: {e}")
                 metadata['viewport_width'] = 0
                 metadata['viewport_height'] = 0
             
             # Extract OCR text from screenshot if available
             ocr_text = ""
             if screenshot:
-                logger.info("Extracting OCR text from screenshot")
                 ocr_text = await self.extract_ocr_text_from_screenshot(screenshot)
                 metadata['ocr_text'] = ocr_text
-                logger.info(f"OCR text length: {len(ocr_text)} characters")
             
-            logger.info(f"=== Completed get_updated_browser_state for {action_name} ===")
+            print(f"Got updated state after {action_name}: {len(dom_state.selector_map)} elements")
             return dom_state, screenshot, elements, metadata
         except Exception as e:
-            logger.error(f"Error in get_updated_browser_state for {action_name}: {e}")
+            print(f"Error getting updated state after {action_name}: {e}")
             traceback.print_exc()
             # Return empty values in case of error
             return None, "", "", {}
 
     def build_action_result(self, success: bool, message: str, dom_state, screenshot: str, 
-                      elements: str, metadata: dict, error: str = "", content: str = None,
-                      fallback_url: str = None) -> BrowserActionResult:
+                              elements: str, metadata: dict, error: str = "", content: str = None,
+                              fallback_url: str = None) -> BrowserActionResult:
         """Helper method to build a consistent BrowserActionResult"""
+        # Ensure elements is never None to avoid display issues
         if elements is None:
             elements = ""
             
-        screenshot_base64 = None
-        screenshot_url = None
-        
-        if isinstance(screenshot, dict) and screenshot.get("is_s3"):
-            screenshot_url = screenshot.get("url")
-        else:
-            screenshot_base64 = screenshot
-            
         return BrowserActionResult(
             success=success,
             message=message,
@@ -858,8 +754,7 @@ class BrowserAutomation:
             url=dom_state.url if dom_state else fallback_url or "",
             title=dom_state.title if dom_state else "",
             elements=elements,
-            screenshot_base64=screenshot_base64,
-            screenshot_url=screenshot_url,
+            screenshot_base64=screenshot,
             pixels_above=dom_state.pixels_above if dom_state else 0,
             pixels_below=dom_state.pixels_below if dom_state else 0,
             content=content,
@@ -876,52 +771,10 @@ class BrowserAutomation:
         """Navigate to a specified URL"""
         try:
             page = await self.get_current_page()
+            await page.goto(action.url, wait_until="domcontentloaded")
+            await page.wait_for_load_state("networkidle", timeout=10000)
             
-            # First check if we're already on the target URL
-            current_url = page.url
-            if current_url == action.url:
-                print(f"Already on target URL: {action.url}")
-                dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})")
-                return self.build_action_result(
-                    True,
-                    f"Already on {action.url}",
-                    dom_state,
-                    screenshot,
-                    elements,
-                    metadata,
-                    error="",
-                    content=None
-                )
-            
-            # Attempt navigation with retries
-            max_retries = 3
-            retry_count = 0
-            last_error = None
-            
-            while retry_count < max_retries:
-                try:
-                    print(f"Navigation attempt {retry_count + 1} to {action.url}")
-                    await page.goto(action.url, wait_until="domcontentloaded", timeout=30000)
-                    await page.wait_for_load_state("networkidle", timeout=10000)
-                    
-                    # Verify we actually navigated to the target URL
-                    new_url = page.url
-                    if new_url == "about:blank":
-                        raise Exception("Navigation resulted in blank page")
-                        
-                    print(f"Successfully navigated to {new_url}")
-                    break
-                except Exception as e:
-                    last_error = e
-                    retry_count += 1
-                    if retry_count < max_retries:
-                        print(f"Navigation attempt {retry_count} failed: {e}")
-                        await asyncio.sleep(1)  # Wait before retry
-                    else:
-                        print(f"All navigation attempts failed: {e}")
-                        raise
-            
-            # Get updated state after successful navigation
+            # Get updated state after action
             dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})")
             
             result = self.build_action_result(
@@ -940,7 +793,6 @@ class BrowserAutomation:
         except Exception as e:
             print(f"Navigation error: {str(e)}")
             traceback.print_exc()
-            
             # Try to get some state info even after error
             try:
                 dom_state, screenshot, elements, metadata = await self.get_updated_browser_state("navigate_error_recovery")
@@ -955,14 +807,6 @@ class BrowserAutomation:
                     content=None
                 )
             except:
-                # If we can't get state, at least try to get the current URL
-                current_url = "about:blank"
-                try:
-                    page = await self.get_current_page()
-                    current_url = page.url
-                except:
-                    pass
-                    
                 return self.build_action_result(
                     False,
                     str(e),
@@ -971,8 +815,7 @@ class BrowserAutomation:
                     "",
                     {},
                     error=str(e),
-                    content=None,
-                    fallback_url=current_url
+                    content=None
                 )
     
     async def search_google(self, action: SearchGoogleAction = Body(...)):
@@ -1407,8 +1250,8 @@ class BrowserAutomation:
         """Open a new tab with the specified URL"""
         try:
             print(f"Attempting to open new tab with URL: {action.url}")
-            # Create new page in same browser instance
-            new_page = await self.browser.new_page()
+            # Create new page in the existing context
+            new_page = await self.context.new_page()
             print(f"New page created successfully")
             
             # Navigate to the URL
diff --git a/backend/utils/config.py b/backend/utils/config.py
index 73f1cbd5..7be5875b 100644
--- a/backend/utils/config.py
+++ b/backend/utils/config.py
@@ -159,7 +159,7 @@ class Configuration:
     STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
     
     # Sandbox configuration
-    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.2"
+    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.4"
     SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf"
 
     @property

From 0148c03ec5ff81ac5ac01f4790d65dcf035252e0 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Sun, 18 May 2025 21:25:15 +0200
Subject: [PATCH 06/21] v1

---
 backend/agent/tools/sb_browser_tool.py |  2 +-
 backend/sandbox/README.md              | 13 +++++++++++++
 backend/utils/config.py                |  2 +-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py
index 59602db1..ce9130ec 100644
--- a/backend/agent/tools/sb_browser_tool.py
+++ b/backend/agent/tools/sb_browser_tool.py
@@ -30,7 +30,7 @@ class SandboxBrowserTool(SandboxToolsBase):
             await self._ensure_sandbox()
             
             # Build the curl command
-            url = f"http://localhost:8003/api/automation/{endpoint}"
+            url = f"http://localhost:8002/api/automation/{endpoint}"
             
             if method == "GET" and params:
                 query_params = "&".join([f"{k}={v}" for k, v in params.items()])
diff --git a/backend/sandbox/README.md b/backend/sandbox/README.md
index f8c0d571..0be85940 100644
--- a/backend/sandbox/README.md
+++ b/backend/sandbox/README.md
@@ -20,6 +20,7 @@ You can modify the sandbox environment for development or to add new capabilitie
    ```
    cd backend/sandbox/docker
    docker compose build
+   docker push kortix/suna:0.1.2
    ```
 3. Test your changes locally using docker-compose
 
@@ -30,3 +31,15 @@ To use your custom sandbox image:
 1. Change the `image` parameter in `docker-compose.yml` (that defines the image name `kortix/suna:___`)
 2. Update the same image name in `backend/sandbox/sandbox.py` in the `create_sandbox` function
 3. If using Daytona for deployment, update the image reference there as well
+
+## Publishing New Versions
+
+When publishing a new version of the sandbox:
+
+1. Update the version number in `docker-compose.yml` (e.g., from `0.1.2` to `0.1.3`)
+2. Build the new image: `docker compose build`
+3. Push the new version: `docker push kortix/suna:0.1.3`
+4. Update all references to the image version in:
+   - `backend/utils/config.py`
+   - Daytona images
+   - Any other services using this image
\ No newline at end of file
diff --git a/backend/utils/config.py b/backend/utils/config.py
index 7be5875b..dfd2f545 100644
--- a/backend/utils/config.py
+++ b/backend/utils/config.py
@@ -159,7 +159,7 @@ class Configuration:
     STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
     
     # Sandbox configuration
-    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.4"
+    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.5"
     SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf"
 
     @property

From 56b9dcad8f1c4223293f01990c9ec0cccb81a04c Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Sun, 18 May 2025 21:44:54 +0200
Subject: [PATCH 07/21] wip

---
 backend/sandbox/docker/Dockerfile         |   2 +
 backend/sandbox/docker/browser_api.py     | 191 ++--------------------
 backend/sandbox/docker/docker-compose.yml |   1 +
 3 files changed, 21 insertions(+), 173 deletions(-)

diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile
index b8f74a52..45ddb5ef 100644
--- a/backend/sandbox/docker/Dockerfile
+++ b/backend/sandbox/docker/Dockerfile
@@ -118,6 +118,8 @@ ENV VNC_PASSWORD=vncpassword
 ENV CHROME_PERSISTENT_SESSION=true
 ENV RESOLUTION_WIDTH=1024
 ENV RESOLUTION_HEIGHT=768
+# Add Chrome flags to prevent multiple tabs/windows
+ENV CHROME_FLAGS="--single-process --no-first-run --no-default-browser-check --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-dev-shm-usage --disable-extensions --disable-features=TranslateUI --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkServiceInProcess2 --force-color-profile=srgb --metrics-recording-only --mute-audio --no-sandbox --disable-gpu"
 
 # Set up supervisor configuration
 RUN mkdir -p /var/log/supervisor
diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py
index 1490bba9..0642fe1f 100644
--- a/backend/sandbox/docker/browser_api.py
+++ b/backend/sandbox/docker/browser_api.py
@@ -282,8 +282,8 @@ class BrowserAutomation:
     def __init__(self):
         self.router = APIRouter()
         self.browser: Browser = None
-        self.pages: List[Page] = []
-        self.current_page_index: int = 0
+        self.context = None
+        self.page = None  # Single page instance
         self.logger = logging.getLogger("browser_automation")
         self.include_attributes = ["id", "href", "src", "alt", "aria-label", "placeholder", "name", "role", "title", "value"]
         self.screenshot_dir = os.path.join(os.getcwd(), "screenshots")
@@ -305,11 +305,6 @@ class BrowserAutomation:
         self.router.post("/automation/input_text")(self.input_text)
         self.router.post("/automation/send_keys")(self.send_keys)
         
-        # Tab management
-        self.router.post("/automation/switch_tab")(self.switch_tab)
-        self.router.post("/automation/open_tab")(self.open_tab)
-        self.router.post("/automation/close_tab")(self.close_tab)
-        
         # Content actions
         self.router.post("/automation/extract_content")(self.extract_content)
         self.router.post("/automation/save_pdf")(self.save_pdf)
@@ -348,12 +343,10 @@ class BrowserAutomation:
                     viewport={'width': 1024, 'height': 768}
                 )
                 
-                # Create initial page and navigate to a default page
-                page = await self.context.new_page()
-                await page.goto("https://www.google.com", wait_until="domcontentloaded")
-                self.pages.append(page)
-                self.current_page_index = 0
-                print("Initial page created and navigated to Google")
+                # Create single page and navigate to a neutral page
+                self.page = await self.context.new_page()
+                await self.page.goto("about:blank", wait_until="domcontentloaded")
+                print("Initial page created and navigated to about:blank")
                 
             except Exception as browser_error:
                 print(f"Failed to launch browser: {browser_error}")
@@ -367,10 +360,8 @@ class BrowserAutomation:
                 self.context = await self.browser.new_context(
                     viewport={'width': 1024, 'height': 768}
                 )
-                page = await self.context.new_page()
-                await page.goto("https://www.google.com", wait_until="domcontentloaded")
-                self.pages.append(page)
-                self.current_page_index = 0
+                self.page = await self.context.new_page()
+                await self.page.goto("about:blank", wait_until="domcontentloaded")
                 print("Initial page created with minimal options")
                 
             print("Browser initialization completed successfully")
@@ -385,10 +376,10 @@ class BrowserAutomation:
             await self.browser.close()
     
     async def get_current_page(self) -> Page:
-        """Get the current active page"""
-        if not self.pages:
-            raise HTTPException(status_code=500, detail="No browser pages available")
-        return self.pages[self.current_page_index]
+        """Get the current page"""
+        if not self.page:
+            raise HTTPException(status_code=500, detail="No browser page available")
+        return self.page
     
     async def get_selector_map(self) -> Dict[int, DOMElementNode]:
         """Get a map of selectable elements on the page"""
@@ -770,9 +761,8 @@ class BrowserAutomation:
     async def navigate_to(self, action: GoToUrlAction = Body(...)):
         """Navigate to a specified URL"""
         try:
-            page = await self.get_current_page()
-            await page.goto(action.url, wait_until="domcontentloaded")
-            await page.wait_for_load_state("networkidle", timeout=10000)
+            await self.page.goto(action.url, wait_until="domcontentloaded")
+            await self.page.wait_for_load_state("networkidle", timeout=10000)
             
             # Get updated state after action
             dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})")
@@ -821,10 +811,9 @@ class BrowserAutomation:
     async def search_google(self, action: SearchGoogleAction = Body(...)):
         """Search Google with the provided query"""
         try:
-            page = await self.get_current_page()
             search_url = f"https://www.google.com/search?q={action.query}"
-            await page.goto(search_url)
-            await page.wait_for_load_state()
+            await self.page.goto(search_url)
+            await self.page.wait_for_load_state()
             
             # Get updated state after action
             dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"search_google({action.query})")
@@ -870,9 +859,8 @@ class BrowserAutomation:
     async def go_back(self, _: NoParamsAction = Body(...)):
         """Navigate back in browser history"""
         try:
-            page = await self.get_current_page()
-            await page.go_back()
-            await page.wait_for_load_state()
+            await self.page.go_back()
+            await self.page.wait_for_load_state()
             
             # Get updated state after action
             dom_state, screenshot, elements, metadata = await self.get_updated_browser_state("go_back")
@@ -1201,149 +1189,6 @@ class BrowserAutomation:
                 content=None
             )
     
-    # Tab Management Actions
-    
-    async def switch_tab(self, action: SwitchTabAction = Body(...)):
-        """Switch to a different tab by index"""
-        try:
-            if 0 <= action.page_id < len(self.pages):
-                self.current_page_index = action.page_id
-                page = await self.get_current_page()
-                await page.wait_for_load_state()
-                
-                # Get updated state after action
-                dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"switch_tab({action.page_id})")
-                
-                return self.build_action_result(
-                    True,
-                    f"Switched to tab {action.page_id}",
-                    dom_state,
-                    screenshot,
-                    elements,
-                    metadata,
-                    error="",
-                    content=None
-                )
-            else:
-                return self.build_action_result(
-                    False,
-                    f"Tab {action.page_id} not found",
-                    None,
-                    "",
-                    "",
-                    {},
-                    error=f"Tab {action.page_id} not found"
-                )
-        except Exception as e:
-            return self.build_action_result(
-                False,
-                str(e),
-                None,
-                "",
-                "",
-                {},
-                error=str(e),
-                content=None
-            )
-    
-    async def open_tab(self, action: OpenTabAction = Body(...)):
-        """Open a new tab with the specified URL"""
-        try:
-            print(f"Attempting to open new tab with URL: {action.url}")
-            # Create new page in the existing context
-            new_page = await self.context.new_page()
-            print(f"New page created successfully")
-            
-            # Navigate to the URL
-            await new_page.goto(action.url, wait_until="domcontentloaded")
-            await new_page.wait_for_load_state("networkidle", timeout=10000)
-            print(f"Navigated to URL in new tab: {action.url}")
-            
-            # Add to page list and make it current
-            self.pages.append(new_page)
-            self.current_page_index = len(self.pages) - 1
-            print(f"New tab added as index {self.current_page_index}")
-            
-            # Get updated state after action
-            dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"open_tab({action.url})")
-            
-            return self.build_action_result(
-                True,
-                f"Opened new tab with URL: {action.url}",
-                dom_state,
-                screenshot,
-                elements,
-                metadata,
-                error="",
-                content=None
-            )
-        except Exception as e:
-            print("****"*10)
-            print(f"Error opening tab: {e}")
-            print(traceback.format_exc())
-            print("****"*10)
-            return self.build_action_result(
-                False,
-                str(e),
-                None,
-                "",
-                "",
-                {},
-                error=str(e),
-                content=None
-            )
-    
-    async def close_tab(self, action: CloseTabAction = Body(...)):
-        """Close a tab by index"""
-        try:
-            if 0 <= action.page_id < len(self.pages):
-                page = self.pages[action.page_id]
-                url = page.url
-                await page.close()
-                self.pages.pop(action.page_id)
-                
-                # Adjust current index if needed
-                if self.current_page_index >= len(self.pages):
-                    self.current_page_index = max(0, len(self.pages) - 1)
-                elif self.current_page_index >= action.page_id:
-                    self.current_page_index = max(0, self.current_page_index - 1)
-                
-                # Get updated state after action
-                page = await self.get_current_page()
-                dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"close_tab({action.page_id})")
-                
-                return self.build_action_result(
-                    True,
-                    f"Closed tab {action.page_id} with URL: {url}",
-                    dom_state,
-                    screenshot,
-                    elements,
-                    metadata,
-                    error="",
-                    content=None
-                )
-            else:
-                return self.build_action_result(
-                    False,
-                    f"Tab {action.page_id} not found",
-                    None,
-                    "",
-                    "",
-                    {},
-                    error=f"Tab {action.page_id} not found"
-                )
-        except Exception as e:
-            return self.build_action_result(
-                False,
-                str(e),
-                None,
-                "",
-                "",
-                {},
-                error=str(e),
-                content=None
-            )
-    
     # Content Actions
     
     async def extract_content(self, goal: str = Body(...)):
diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml
index 27432984..48d2363b 100644
--- a/backend/sandbox/docker/docker-compose.yml
+++ b/backend/sandbox/docker/docker-compose.yml
@@ -27,6 +27,7 @@ services:
       - VNC_PASSWORD=${VNC_PASSWORD:-vncpassword}
       - CHROME_DEBUGGING_PORT=9222
       - CHROME_DEBUGGING_HOST=localhost
+      - CHROME_FLAGS=${CHROME_FLAGS:-"--single-process --no-first-run --no-default-browser-check --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-dev-shm-usage --disable-extensions --disable-features=TranslateUI --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkServiceInProcess2 --force-color-profile=srgb --metrics-recording-only --mute-audio --no-sandbox --disable-gpu"}
     volumes:
       - /tmp/.X11-unix:/tmp/.X11-unix
     restart: unless-stopped

From 952a2dd3bfe68ed6624b27dd8f8a7bbed0a8d330 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Sun, 18 May 2025 21:59:48 +0200
Subject: [PATCH 08/21] kortix/suna:0.1.2.7 working

---
 backend/sandbox/docker/docker-compose.yml | 2 +-
 backend/utils/config.py                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml
index 48d2363b..ff843624 100644
--- a/backend/sandbox/docker/docker-compose.yml
+++ b/backend/sandbox/docker/docker-compose.yml
@@ -6,7 +6,7 @@ services:
       dockerfile: ${DOCKERFILE:-Dockerfile}
       args:
         TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
-    image: kortix/suna:0.1.2
+    image: kortix/suna:0.1.2.7
     ports:
       - "6080:6080"  # noVNC web interface
       - "5901:5901"  # VNC port
diff --git a/backend/utils/config.py b/backend/utils/config.py
index dfd2f545..c1392d07 100644
--- a/backend/utils/config.py
+++ b/backend/utils/config.py
@@ -159,7 +159,7 @@ class Configuration:
     STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
     
     # Sandbox configuration
-    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.5"
+    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.6"
     SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf"
 
     @property

From 709b4595ccd24084a73bc473475637911f4838c1 Mon Sep 17 00:00:00 2001
From: Soumyadas15 <saumyadas2017@gmail.com>
Date: Mon, 19 May 2025 01:44:45 +0530
Subject: [PATCH 09/21] chore(dev): fix redundant billing checks

---
 frontend/src/contexts/BillingContext.tsx      | 80 +++++++++++++++++++
 .../react-query/threads/use-billing-status.ts | 15 +++-
 2 files changed, 94 insertions(+), 1 deletion(-)
 create mode 100644 frontend/src/contexts/BillingContext.tsx

diff --git a/frontend/src/contexts/BillingContext.tsx b/frontend/src/contexts/BillingContext.tsx
new file mode 100644
index 00000000..f54fc478
--- /dev/null
+++ b/frontend/src/contexts/BillingContext.tsx
@@ -0,0 +1,80 @@
+'use client';
+
+import React, { createContext, useContext, useCallback, useEffect, useRef } from 'react';
+import { useBillingStatusQuery } from '@/hooks/react-query/threads/use-billing-status';
+import { BillingStatusResponse } from '@/lib/api';
+import { isLocalMode } from '@/lib/config';
+
+interface BillingContextType {
+  billingStatus: BillingStatusResponse | null;
+  isLoading: boolean;
+  error: Error | null;
+  checkBillingStatus: () => Promise<boolean>;
+  lastCheckTime: number | null;
+}
+
+const BillingContext = createContext<BillingContextType | null>(null);
+
+export function BillingProvider({ children }: { children: React.ReactNode }) {
+  const billingStatusQuery = useBillingStatusQuery();
+  const lastCheckRef = useRef<number | null>(null);
+  const checkInProgressRef = useRef<boolean>(false);
+
+  const checkBillingStatus = useCallback(async (force = false): Promise<boolean> => {
+    if (isLocalMode()) {
+      console.log('Running in local development mode - billing checks are disabled');
+      return false;
+    }
+
+    if (checkInProgressRef.current) {
+      return !billingStatusQuery.data?.can_run;
+    }
+
+    const now = Date.now();
+    if (!force && lastCheckRef.current && now - lastCheckRef.current < 60000) {
+      return !billingStatusQuery.data?.can_run;
+    }
+
+    try {
+      checkInProgressRef.current = true;
+      if (force || billingStatusQuery.isStale) {
+        await billingStatusQuery.refetch();
+      }
+      lastCheckRef.current = now;
+      return !billingStatusQuery.data?.can_run;
+    } catch (err) {
+      console.error('Error checking billing status:', err);
+      return false;
+    } finally {
+      checkInProgressRef.current = false;
+    }
+  }, [billingStatusQuery]);
+
+  useEffect(() => {
+    if (!billingStatusQuery.data) {
+      checkBillingStatus(true);
+    }
+  }, [checkBillingStatus, billingStatusQuery.data]);
+
+  const value = {
+    billingStatus: billingStatusQuery.data || null,
+    isLoading: billingStatusQuery.isLoading,
+    error: billingStatusQuery.error,
+    checkBillingStatus,
+    lastCheckTime: lastCheckRef.current,
+  };
+
+  return (
+    <BillingContext.Provider value={value}>
+      {children}
+    </BillingContext.Provider>
+  );
+}
+
+export function useBilling() {
+  const context = useContext(BillingContext);
+  if (!context) {
+    throw new Error('useBilling must be used within a BillingProvider');
+  }
+  return context;
+} 
\ No newline at end of file
diff --git a/frontend/src/hooks/react-query/threads/use-billing-status.ts b/frontend/src/hooks/react-query/threads/use-billing-status.ts
index a310784b..de43b3ea 100644
--- a/frontend/src/hooks/react-query/threads/use-billing-status.ts
+++ b/frontend/src/hooks/react-query/threads/use-billing-status.ts
@@ -1,6 +1,7 @@
 import { createQueryHook } from "@/hooks/use-query";
 import { threadKeys } from "./keys";
-import { checkBillingStatus } from "@/lib/api";
+import { checkBillingStatus, BillingStatusResponse } from "@/lib/api";
+import { Query } from "@tanstack/react-query";
 
 export const useBillingStatusQuery = (enabled = true) =>
   createQueryHook(
@@ -10,5 +11,17 @@ export const useBillingStatusQuery = (enabled = true) =>
       enabled,
       retry: 1,
       staleTime: 1000 * 60 * 5,
+      gcTime: 1000 * 60 * 10, // 10 minutes (using gcTime instead of cacheTime)
+      refetchOnWindowFocus: false, // Disable refetch on window focus
+      refetchOnMount: false, // Disable refetch on component mount
+      refetchOnReconnect: false, // Disable refetch on reconnect
+      // Only refetch if the data is stale and the query is enabled
+      refetchInterval: (query: Query<BillingStatusResponse, Error>) => {
+        // If we have data and it indicates the user can't run, check more frequently
+        if (query.state.data && !query.state.data.can_run) {
+          return 1000 * 60; // Check every minute if user can't run
+        }
+        return false; // Don't refetch automatically otherwise
+      },
     }
   )();

From 70755d30745ff0c4e5828f287755657013620f15 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Mon, 19 May 2025 01:06:44 +0200
Subject: [PATCH 10/21] save all browser states

---
 backend/agent/run.py                          |  1 -
 backend/agent/tools/sb_browser_tool.py        |  7 +---
 backend/agentpress/response_processor.py      |  8 ++--
 .../thread/tool-views/BrowserToolView.tsx     | 37 ++++++++++++-------
 4 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/backend/agent/run.py b/backend/agent/run.py
index 3301e498..59fd5e01 100644
--- a/backend/agent/run.py
+++ b/backend/agent/run.py
@@ -154,7 +154,6 @@ async def run_agent(
                 else:
                     logger.warning("Browser state found but no screenshot data.")
 
-                await client.table('messages').delete().eq('message_id', latest_browser_state_msg.data[0]["message_id"]).execute()
             except Exception as e:
                 logger.error(f"Error parsing browser state: {e}")
 
diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py
index ce9130ec..eaeeac6a 100644
--- a/backend/agent/tools/sb_browser_tool.py
+++ b/backend/agent/tools/sb_browser_tool.py
@@ -59,7 +59,6 @@ class SandboxBrowserTool(SandboxToolsBase):
 
                     logger.info("Browser automation request completed successfully")
 
-                    # Add full result to thread messages for state tracking
                     added_message = await self.thread_manager.add_message(
                         thread_id=self.thread_id,
                         type="browser_state",
@@ -67,17 +66,13 @@ class SandboxBrowserTool(SandboxToolsBase):
                         is_llm_message=False
                     )
 
-                    # Return tool-specific success response
                     success_response = {
                         "success": True,
                         "message": result.get("message", "Browser action completed successfully")
                     }
 
-                    # Add message ID if available
                     if added_message and 'message_id' in added_message:
                         success_response['message_id'] = added_message['message_id']
-
-                    # Add relevant browser-specific info
                     if result.get("url"):
                         success_response["url"] = result["url"]
                     if result.get("title"):
@@ -86,7 +81,6 @@ class SandboxBrowserTool(SandboxToolsBase):
                         success_response["elements_found"] = result["element_count"]
                     if result.get("pixels_below"):
                         success_response["scrollable_content"] = result["pixels_below"] > 0
-                    # Add OCR text when available
                     if result.get("ocr_text"):
                         success_response["ocr_text"] = result["ocr_text"]
 
@@ -104,6 +98,7 @@ class SandboxBrowserTool(SandboxToolsBase):
             logger.debug(traceback.format_exc())
             return self.fail_response(f"Error executing browser action: {e}")
 
+
     @openapi_schema({
         "type": "function",
         "function": {
diff --git a/backend/agentpress/response_processor.py b/backend/agentpress/response_processor.py
index ea6e028a..58cdaf83 100644
--- a/backend/agentpress/response_processor.py
+++ b/backend/agentpress/response_processor.py
@@ -978,7 +978,7 @@ class ResponseProcessor:
                         if value is not None:
                             params[mapping.param_name] = value
                             parsing_details["attributes"][mapping.param_name] = value # Store raw attribute
-                            logger.info(f"Found attribute {mapping.param_name}: {value}")
+                            # logger.info(f"Found attribute {mapping.param_name}: {value}")
                 
                     elif mapping.node_type == "element":
                         # Extract element content
@@ -986,7 +986,7 @@ class ResponseProcessor:
                         if content is not None:
                             params[mapping.param_name] = content.strip()
                             parsing_details["elements"][mapping.param_name] = content.strip() # Store raw element content
-                            logger.info(f"Found element {mapping.param_name}: {content.strip()}")
+                            # logger.info(f"Found element {mapping.param_name}: {content.strip()}")
                 
                     elif mapping.node_type == "text":
                         # Extract text content
@@ -994,7 +994,7 @@ class ResponseProcessor:
                         if content is not None:
                             params[mapping.param_name] = content.strip()
                             parsing_details["text_content"] = content.strip() # Store raw text content
-                            logger.info(f"Found text content for {mapping.param_name}: {content.strip()}")
+                            # logger.info(f"Found text content for {mapping.param_name}: {content.strip()}")
                 
                     elif mapping.node_type == "content":
                         # Extract root content
@@ -1002,7 +1002,7 @@ class ResponseProcessor:
                         if content is not None:
                             params[mapping.param_name] = content.strip()
                             parsing_details["root_content"] = content.strip() # Store raw root content
-                            logger.info(f"Found root content for {mapping.param_name}")
+                            # logger.info(f"Found root content for {mapping.param_name}")
                 
                 except Exception as e:
                     logger.error(f"Error processing mapping {mapping}: {e}")
diff --git a/frontend/src/components/thread/tool-views/BrowserToolView.tsx b/frontend/src/components/thread/tool-views/BrowserToolView.tsx
index 2fcb45e9..d43453aa 100644
--- a/frontend/src/components/thread/tool-views/BrowserToolView.tsx
+++ b/frontend/src/components/thread/tool-views/BrowserToolView.tsx
@@ -72,20 +72,31 @@ export function BrowserToolView({
 
   // Find the browser_state message and extract the screenshot
   let screenshotBase64: string | null = null;
-  if (browserStateMessageId && messages.length > 0) {
-    const browserStateMessage = messages.find(
-      (msg) =>
-        (msg.type as string) === 'browser_state' &&
-        msg.message_id === browserStateMessageId,
-    );
+  let latestBrowserState: any = null;
+  let latestTimestamp = 0;
 
-    if (browserStateMessage) {
-      const browserStateContent = safeJsonParse<{ screenshot_base64?: string }>(
-        browserStateMessage.content,
-        {},
-      );
-      console.log('Browser state content: ', browserStateContent)
-      screenshotBase64 = browserStateContent?.screenshot_base64 || null;
+  if (messages.length > 0) {
+    // Find the latest browser_state message by comparing timestamps
+    messages.forEach((msg) => {
+      if ((msg.type as string) === 'browser_state') {
+        try {
+          const content = safeJsonParse<{ timestamp?: number }>(msg.content, {});
+          const timestamp = content?.timestamp || 0;
+          
+          if (timestamp > latestTimestamp) {
+            latestTimestamp = timestamp;
+            latestBrowserState = content;
+          }
+        } catch (error) {
+          console.error('[BrowserToolView] Error parsing browser state:', error);
+        }
+      }
+    });
+
+    // Use the latest browser state
+    if (latestBrowserState) {
+      screenshotBase64 = latestBrowserState.screenshot_base64 || null;
+      console.log('Latest browser state:', latestBrowserState);
     }
   }
 

From f22412b963b651c69a5e24603b26c133f38818c1 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Mon, 19 May 2025 01:28:14 +0200
Subject: [PATCH 11/21] s3 url save instead of base64

---
 backend/agent/tools/sb_browser_tool.py | 14 +++++++
 backend/services/supabase.py           | 44 ++++++++++++++++++++++
 backend/utils/s3_upload_utils.py       | 51 ++++++++++++++++++++++++++
 3 files changed, 109 insertions(+)
 create mode 100644 backend/utils/s3_upload_utils.py

diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py
index eaeeac6a..844b821b 100644
--- a/backend/agent/tools/sb_browser_tool.py
+++ b/backend/agent/tools/sb_browser_tool.py
@@ -5,6 +5,7 @@ from agentpress.tool import ToolResult, openapi_schema, xml_schema
 from agentpress.thread_manager import ThreadManager
 from sandbox.tool_base import SandboxToolsBase
 from utils.logger import logger
+from utils.s3_upload_utils import upload_base64_image
 
 
 class SandboxBrowserTool(SandboxToolsBase):
@@ -59,6 +60,17 @@ class SandboxBrowserTool(SandboxToolsBase):
 
                     logger.info("Browser automation request completed successfully")
 
+                    if "screenshot_base64" in result:
+                        try:
+                            image_url = await upload_base64_image(result["screenshot_base64"])
+                            result["image_url"] = image_url
+                            # Remove base64 data from result to keep it clean
+                            del result["screenshot_base64"]
+                            logger.debug(f"Uploaded screenshot to {image_url}")
+                        except Exception as e:
+                            logger.error(f"Failed to upload screenshot: {e}")
+                            result["image_upload_error"] = str(e)
+
                     added_message = await self.thread_manager.add_message(
                         thread_id=self.thread_id,
                         type="browser_state",
@@ -83,6 +95,8 @@ class SandboxBrowserTool(SandboxToolsBase):
                         success_response["scrollable_content"] = result["pixels_below"] > 0
                     if result.get("ocr_text"):
                         success_response["ocr_text"] = result["ocr_text"]
+                    if result.get("image_url"):
+                        success_response["image_url"] = result["image_url"]
 
                     return self.success_response(success_response)
 
diff --git a/backend/services/supabase.py b/backend/services/supabase.py
index 0bb1419a..0a3f8558 100644
--- a/backend/services/supabase.py
+++ b/backend/services/supabase.py
@@ -6,6 +6,9 @@ from typing import Optional
 from supabase import create_async_client, AsyncClient
 from utils.logger import logger
 from utils.config import config
+import base64
+import uuid
+from datetime import datetime
 
 class DBConnection:
     """Singleton database connection manager using Supabase."""
@@ -66,4 +69,45 @@ class DBConnection:
             raise RuntimeError("Database not initialized")
         return self._client
 
+    async def upload_base64_image(self, base64_data: str, bucket_name: str = "browser-screenshots") -> str:
+        """Upload a base64 encoded image to Supabase storage and return the URL.
+        
+        Args:
+            base64_data (str): Base64 encoded image data (with or without data URL prefix)
+            bucket_name (str): Name of the storage bucket to upload to
+            
+        Returns:
+            str: Public URL of the uploaded image
+        """
+        try:
+            # Remove data URL prefix if present
+            if base64_data.startswith('data:'):
+                base64_data = base64_data.split(',')[1]
+            
+            # Decode base64 data
+            image_data = base64.b64decode(base64_data)
+            
+            # Generate unique filename
+            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+            unique_id = str(uuid.uuid4())[:8]
+            filename = f"image_{timestamp}_{unique_id}.png"
+            
+            # Upload to Supabase storage
+            client = await self.client
+            storage_response = await client.storage.from_(bucket_name).upload(
+                filename,
+                image_data,
+                {"content-type": "image/png"}
+            )
+            
+            # Get public URL
+            public_url = await client.storage.from_(bucket_name).get_public_url(filename)
+            
+            logger.debug(f"Successfully uploaded image to {public_url}")
+            return public_url
+            
+        except Exception as e:
+            logger.error(f"Error uploading base64 image: {e}")
+            raise RuntimeError(f"Failed to upload image: {str(e)}")
+
 
diff --git a/backend/utils/s3_upload_utils.py b/backend/utils/s3_upload_utils.py
new file mode 100644
index 00000000..65722640
--- /dev/null
+++ b/backend/utils/s3_upload_utils.py
@@ -0,0 +1,51 @@
+"""
+Utility functions for handling image operations.
+"""
+
+import base64
+import uuid
+from datetime import datetime
+from utils.logger import logger
+from services.supabase import DBConnection
+
+async def upload_base64_image(base64_data: str, bucket_name: str = "browser-screenshots") -> str:
+    """Upload a base64 encoded image to Supabase storage and return the URL.
+    
+    Args:
+        base64_data (str): Base64 encoded image data (with or without data URL prefix)
+        bucket_name (str): Name of the storage bucket to upload to
+        
+    Returns:
+        str: Public URL of the uploaded image
+    """
+    try:
+        # Remove data URL prefix if present
+        if base64_data.startswith('data:'):
+            base64_data = base64_data.split(',')[1]
+        
+        # Decode base64 data
+        image_data = base64.b64decode(base64_data)
+        
+        # Generate unique filename
+        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+        unique_id = str(uuid.uuid4())[:8]
+        filename = f"image_{timestamp}_{unique_id}.png"
+        
+        # Upload to Supabase storage
+        db = DBConnection()
+        client = await db.client
+        storage_response = await client.storage.from_(bucket_name).upload(
+            filename,
+            image_data,
+            {"content-type": "image/png"}
+        )
+        
+        # Get public URL
+        public_url = await client.storage.from_(bucket_name).get_public_url(filename)
+        
+        logger.debug(f"Successfully uploaded image to {public_url}")
+        return public_url
+        
+    except Exception as e:
+        logger.error(f"Error uploading base64 image: {e}")
+        raise RuntimeError(f"Failed to upload image: {str(e)}") 
\ No newline at end of file

From dd3f04c4a553f72ec514f11b88dc1e8361b0493c Mon Sep 17 00:00:00 2001
From: sharath <29162020+tnfssc@users.noreply.github.com>
Date: Sun, 18 May 2025 23:46:08 +0000
Subject: [PATCH 12/21] fix(redis): service crash on redis client maxxing

---
 backend/docker-compose.yml         | 3 ++-
 backend/services/docker/redis.conf | 1 +
 docker-compose.yaml                | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 backend/services/docker/redis.conf

diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml
index f5f91540..eb46c224 100644
--- a/backend/docker-compose.yml
+++ b/backend/docker-compose.yml
@@ -133,10 +133,11 @@ services:
       - "127.0.0.1:6379:6379"
     volumes:
       - redis_data:/data
+      - ./services/docker/redis.conf:/usr/local/etc/redis/redis.conf:ro
     restart: unless-stopped
     networks:
       - app-network
-    command: redis-server --appendonly yes --bind 0.0.0.0 --protected-mode no --maxmemory 8gb --maxmemory-policy allkeys-lru
+    command: redis-server /usr/local/etc/redis/redis.conf --appendonly yes --bind 0.0.0.0 --protected-mode no --maxmemory 8gb --maxmemory-policy allkeys-lru
     healthcheck:
       test: ["CMD", "redis-cli", "ping"]
       interval: 10s
diff --git a/backend/services/docker/redis.conf b/backend/services/docker/redis.conf
new file mode 100644
index 00000000..b8b41800
--- /dev/null
+++ b/backend/services/docker/redis.conf
@@ -0,0 +1 @@
+timeout 120
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 61950eb1..4ace92bf 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -3,7 +3,8 @@ services:
     image: redis:7-alpine
     volumes:
       - redis_data:/data
-    command: redis-server --save 60 1 --loglevel warning
+      - ./backend/services/docker/redis.conf:/usr/local/etc/redis/redis.conf:ro
+    command: redis-server /usr/local/etc/redis/redis.conf --save 60 1 --loglevel warning
     healthcheck:
       test: ["CMD", "redis-cli", "ping"]
       interval: 10s

From f2e7b27e0287385788b87828c246aa9a072d2d7d Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Mon, 19 May 2025 02:00:14 +0200
Subject: [PATCH 13/21] multi tab

---
 backend/sandbox/docker/browser_api.py | 218 ++++++++++++++++++++++----
 1 file changed, 185 insertions(+), 33 deletions(-)

diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py
index 0642fe1f..c2fc5186 100644
--- a/backend/sandbox/docker/browser_api.py
+++ b/backend/sandbox/docker/browser_api.py
@@ -282,8 +282,8 @@ class BrowserAutomation:
     def __init__(self):
         self.router = APIRouter()
         self.browser: Browser = None
-        self.context = None
-        self.page = None  # Single page instance
+        self.pages: List[Page] = []
+        self.current_page_index: int = 0
         self.logger = logging.getLogger("browser_automation")
         self.include_attributes = ["id", "href", "src", "alt", "aria-label", "placeholder", "name", "role", "title", "value"]
         self.screenshot_dir = os.path.join(os.getcwd(), "screenshots")
@@ -305,6 +305,11 @@ class BrowserAutomation:
         self.router.post("/automation/input_text")(self.input_text)
         self.router.post("/automation/send_keys")(self.send_keys)
         
+        # Tab management
+        self.router.post("/automation/switch_tab")(self.switch_tab)
+        self.router.post("/automation/open_tab")(self.open_tab)
+        self.router.post("/automation/close_tab")(self.close_tab)
+        
         # Content actions
         self.router.post("/automation/extract_content")(self.extract_content)
         self.router.post("/automation/save_pdf")(self.save_pdf)
@@ -337,17 +342,6 @@ class BrowserAutomation:
             try:
                 self.browser = await playwright.chromium.launch(**launch_options)
                 print("Browser launched successfully")
-                
-                # Create a single context with viewport settings
-                self.context = await self.browser.new_context(
-                    viewport={'width': 1024, 'height': 768}
-                )
-                
-                # Create single page and navigate to a neutral page
-                self.page = await self.context.new_page()
-                await self.page.goto("about:blank", wait_until="domcontentloaded")
-                print("Initial page created and navigated to about:blank")
-                
             except Exception as browser_error:
                 print(f"Failed to launch browser: {browser_error}")
                 # Try with minimal options
@@ -355,16 +349,22 @@ class BrowserAutomation:
                 launch_options = {"timeout": 90000}
                 self.browser = await playwright.chromium.launch(**launch_options)
                 print("Browser launched with minimal options")
+
+            try:
+                await self.get_current_page()
+                print("Found existing page, using it")
+                self.current_page_index = 0
+            except Exception as page_error:
+                print(f"Error finding existing page, creating new one. ( {page_error})")
+                page = await self.browser.new_page(viewport={'width': 1024, 'height': 768})
+                print("New page created successfully")
+                self.pages.append(page)
+                self.current_page_index = 0
+                # Navigate directly to google.com instead of about:blank
+                await page.goto("https://www.google.com", wait_until="domcontentloaded", timeout=30000)
+                print("Navigated to google.com")
                 
-                # Create context and initial page even with minimal options
-                self.context = await self.browser.new_context(
-                    viewport={'width': 1024, 'height': 768}
-                )
-                self.page = await self.context.new_page()
-                await self.page.goto("about:blank", wait_until="domcontentloaded")
-                print("Initial page created with minimal options")
-                
-            print("Browser initialization completed successfully")
+                print("Browser initialization completed successfully")
         except Exception as e:
             print(f"Browser startup error: {str(e)}")
             traceback.print_exc()
@@ -376,10 +376,10 @@ class BrowserAutomation:
             await self.browser.close()
     
     async def get_current_page(self) -> Page:
-        """Get the current page"""
-        if not self.page:
-            raise HTTPException(status_code=500, detail="No browser page available")
-        return self.page
+        """Get the current active page"""
+        if not self.pages:
+            raise HTTPException(status_code=500, detail="No browser pages available")
+        return self.pages[self.current_page_index]
     
     async def get_selector_map(self) -> Dict[int, DOMElementNode]:
         """Get a map of selectable elements on the page"""
@@ -599,10 +599,16 @@ class BrowserAutomation:
                 is_top_element=True
             )
             dummy_map = {1: dummy_root}
+            current_url = "unknown"
+            try:
+                if 'page' in locals():
+                    current_url = page.url
+            except:
+                pass
             return DOMState(
                 element_tree=dummy_root,
                 selector_map=dummy_map,
-                url=page.url if 'page' in locals() else "about:blank",
+                url=current_url,
                 title="Error page",
                 pixels_above=0,
                 pixels_below=0
@@ -761,8 +767,9 @@ class BrowserAutomation:
     async def navigate_to(self, action: GoToUrlAction = Body(...)):
         """Navigate to a specified URL"""
         try:
-            await self.page.goto(action.url, wait_until="domcontentloaded")
-            await self.page.wait_for_load_state("networkidle", timeout=10000)
+            page = await self.get_current_page()
+            await page.goto(action.url, wait_until="domcontentloaded")
+            await page.wait_for_load_state("networkidle", timeout=10000)
             
             # Get updated state after action
             dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})")
@@ -811,9 +818,10 @@ class BrowserAutomation:
     async def search_google(self, action: SearchGoogleAction = Body(...)):
         """Search Google with the provided query"""
         try:
+            page = await self.get_current_page()
             search_url = f"https://www.google.com/search?q={action.query}"
-            await self.page.goto(search_url)
-            await self.page.wait_for_load_state()
+            await page.goto(search_url)
+            await page.wait_for_load_state()
             
             # Get updated state after action
             dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"search_google({action.query})")
@@ -859,8 +867,9 @@ class BrowserAutomation:
     async def go_back(self, _: NoParamsAction = Body(...)):
         """Navigate back in browser history"""
         try:
-            await self.page.go_back()
-            await self.page.wait_for_load_state()
+            page = await self.get_current_page()
+            await page.go_back()
+            await page.wait_for_load_state()
             
             # Get updated state after action
             dom_state, screenshot, elements, metadata = await self.get_updated_browser_state("go_back")
@@ -1189,6 +1198,149 @@ class BrowserAutomation:
                 content=None
             )
     
+    # Tab Management Actions
+    
+    async def switch_tab(self, action: SwitchTabAction = Body(...)):
+        """Switch to a different tab by index"""
+        try:
+            if 0 <= action.page_id < len(self.pages):
+                self.current_page_index = action.page_id
+                page = await self.get_current_page()
+                await page.wait_for_load_state()
+                
+                # Get updated state after action
+                dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"switch_tab({action.page_id})")
+                
+                return self.build_action_result(
+                    True,
+                    f"Switched to tab {action.page_id}",
+                    dom_state,
+                    screenshot,
+                    elements,
+                    metadata,
+                    error="",
+                    content=None
+                )
+            else:
+                return self.build_action_result(
+                    False,
+                    f"Tab {action.page_id} not found",
+                    None,
+                    "",
+                    "",
+                    {},
+                    error=f"Tab {action.page_id} not found"
+                )
+        except Exception as e:
+            return self.build_action_result(
+                False,
+                str(e),
+                None,
+                "",
+                "",
+                {},
+                error=str(e),
+                content=None
+            )
+    
+    async def open_tab(self, action: OpenTabAction = Body(...)):
+        """Open a new tab with the specified URL"""
+        try:
+            print(f"Attempting to open new tab with URL: {action.url}")
+            # Create new page in same browser instance
+            new_page = await self.browser.new_page()
+            print(f"New page created successfully")
+            
+            # Navigate to the URL
+            await new_page.goto(action.url, wait_until="domcontentloaded")
+            await new_page.wait_for_load_state("networkidle", timeout=10000)
+            print(f"Navigated to URL in new tab: {action.url}")
+            
+            # Add to page list and make it current
+            self.pages.append(new_page)
+            self.current_page_index = len(self.pages) - 1
+            print(f"New tab added as index {self.current_page_index}")
+            
+            # Get updated state after action
+            dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"open_tab({action.url})")
+            
+            return self.build_action_result(
+                True,
+                f"Opened new tab with URL: {action.url}",
+                dom_state,
+                screenshot,
+                elements,
+                metadata,
+                error="",
+                content=None
+            )
+        except Exception as e:
+            print("****"*10)
+            print(f"Error opening tab: {e}")
+            print(traceback.format_exc())
+            print("****"*10)
+            return self.build_action_result(
+                False,
+                str(e),
+                None,
+                "",
+                "",
+                {},
+                error=str(e),
+                content=None
+            )
+    
+    async def close_tab(self, action: CloseTabAction = Body(...)):
+        """Close a tab by index"""
+        try:
+            if 0 <= action.page_id < len(self.pages):
+                page = self.pages[action.page_id]
+                url = page.url
+                await page.close()
+                self.pages.pop(action.page_id)
+                
+                # Adjust current index if needed
+                if self.current_page_index >= len(self.pages):
+                    self.current_page_index = max(0, len(self.pages) - 1)
+                elif self.current_page_index >= action.page_id:
+                    self.current_page_index = max(0, self.current_page_index - 1)
+                
+                # Get updated state after action
+                page = await self.get_current_page()
+                dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"close_tab({action.page_id})")
+                
+                return self.build_action_result(
+                    True,
+                    f"Closed tab {action.page_id} with URL: {url}",
+                    dom_state,
+                    screenshot,
+                    elements,
+                    metadata,
+                    error="",
+                    content=None
+                )
+            else:
+                return self.build_action_result(
+                    False,
+                    f"Tab {action.page_id} not found",
+                    None,
+                    "",
+                    "",
+                    {},
+                    error=f"Tab {action.page_id} not found"
+                )
+        except Exception as e:
+            return self.build_action_result(
+                False,
+                str(e),
+                None,
+                "",
+                "",
+                {},
+                error=str(e),
+                content=None
+            )
+    
     # Content Actions
     
     async def extract_content(self, goal: str = Body(...)):

From c8826d520505994e5acf7255705899c002673b03 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Mon, 19 May 2025 02:14:45 +0200
Subject: [PATCH 14/21] fix: expose RabbitMQ ports for local development

---
 docker-compose.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docker-compose.yaml b/docker-compose.yaml
index 61950eb1..46319c70 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -12,6 +12,9 @@ services:
 
   rabbitmq:
     image: rabbitmq
+    ports:
+      - "5672:5672"  
+      - "15672:15672"
     volumes:
       - rabbitmq_data:/var/lib/rabbitmq
     restart: unless-stopped

From 75372a94c038e33e08124bad84922b53043cc5f4 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Mon, 19 May 2025 02:43:05 +0200
Subject: [PATCH 15/21] browser api fixes

---
 backend/agent/tools/sb_browser_tool.py |  2 +-
 backend/sandbox/docker/Dockerfile      |  3 +++
 backend/sandbox/docker/browser_api.py  | 23 +++++++++++++++++++++--
 backend/utils/config.py                |  2 +-
 4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py
index 844b821b..d297c3d5 100644
--- a/backend/agent/tools/sb_browser_tool.py
+++ b/backend/agent/tools/sb_browser_tool.py
@@ -31,7 +31,7 @@ class SandboxBrowserTool(SandboxToolsBase):
             await self._ensure_sandbox()
             
             # Build the curl command
-            url = f"http://localhost:8002/api/automation/{endpoint}"
+            url = f"http://localhost:8003/api/automation/{endpoint}"
             
             if method == "GET" and params:
                 query_params = "&".join([f"{k}={v}" for k, v in params.items()])
diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile
index 45ddb5ef..5608e335 100644
--- a/backend/sandbox/docker/Dockerfile
+++ b/backend/sandbox/docker/Dockerfile
@@ -68,6 +68,9 @@ RUN apt-get update && apt-get install -y \
     iputils-ping \
     dnsutils \
     sudo \
+    # OCR Tools
+    tesseract-ocr \
+    tesseract-ocr-eng \
     && rm -rf /var/lib/apt/lists/*
 
 # Install Node.js and npm
diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py
index c2fc5186..7fb5baf5 100644
--- a/backend/sandbox/docker/browser_api.py
+++ b/backend/sandbox/docker/browser_api.py
@@ -618,10 +618,29 @@ class BrowserAutomation:
         """Take a screenshot and return as base64 encoded string"""
         try:
             page = await self.get_current_page()
-            screenshot_bytes = await page.screenshot(type='jpeg', quality=60, full_page=False)
+            
+            # Wait for network to be idle and DOM to be stable
+            try:
+                await page.wait_for_load_state("networkidle", timeout=60000)  # Increased timeout to 60s
+            except Exception as e:
+                print(f"Warning: Network idle timeout, proceeding anyway: {e}")
+            
+            # Wait for any animations to complete
+            # await page.wait_for_timeout(1000)  # Wait 1 second for animations
+            
+            # Take screenshot with increased timeout and better options
+            screenshot_bytes = await page.screenshot(
+                type='jpeg',
+                quality=60,
+                full_page=False,
+                timeout=60000,  # Increased timeout to 60s
+                scale='device'  # Use device scale factor
+            )
+            
             return base64.b64encode(screenshot_bytes).decode('utf-8')
         except Exception as e:
             print(f"Error taking screenshot: {e}")
+            traceback.print_exc()
             # Return an empty string rather than failing
             return ""
     
@@ -2065,4 +2084,4 @@ if __name__ == '__main__':
         asyncio.run(test_browser_api_2())
     else:
         print("Starting API server")
-        uvicorn.run("browser_api:api_app", host="0.0.0.0", port=8002)
\ No newline at end of file
+        uvicorn.run("browser_api:api_app", host="0.0.0.0", port=8003)
\ No newline at end of file
diff --git a/backend/utils/config.py b/backend/utils/config.py
index c1392d07..ea683267 100644
--- a/backend/utils/config.py
+++ b/backend/utils/config.py
@@ -159,7 +159,7 @@ class Configuration:
     STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
     
     # Sandbox configuration
-    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.6"
+    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.7"
     SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf"
 
     @property

From d15986b63e6c2346bc067ec5f0d378fe08403ac9 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Mon, 19 May 2025 03:39:02 +0200
Subject: [PATCH 16/21] wip

---
 backend/sandbox/docker/docker-compose.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml
index ff843624..08f0969d 100644
--- a/backend/sandbox/docker/docker-compose.yml
+++ b/backend/sandbox/docker/docker-compose.yml
@@ -6,7 +6,7 @@ services:
       dockerfile: ${DOCKERFILE:-Dockerfile}
       args:
         TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
-    image: kortix/suna:0.1.2.7
+    image: kortix/suna:0.1.2.8
     ports:
       - "6080:6080"  # noVNC web interface
       - "5901:5901"  # VNC port

From 74320d66406d78b55f30f56e2623dcac8e479566 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Mon, 19 May 2025 03:54:33 +0200
Subject: [PATCH 17/21] kortix/suna:0.1.2.8 sandbox bump

---
 backend/utils/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/utils/config.py b/backend/utils/config.py
index ea683267..085cf041 100644
--- a/backend/utils/config.py
+++ b/backend/utils/config.py
@@ -159,7 +159,7 @@ class Configuration:
     STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
     
     # Sandbox configuration
-    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.7"
+    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.8"
     SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf"
 
     @property

From c1615b48ae7cba432170f125d56a6697253ecf27 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Mon, 19 May 2025 04:24:21 +0200
Subject: [PATCH 18/21] fe change rev

---
 .../thread/tool-views/BrowserToolView.tsx     | 37 +++++++------------
 1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/frontend/src/components/thread/tool-views/BrowserToolView.tsx b/frontend/src/components/thread/tool-views/BrowserToolView.tsx
index d43453aa..2fcb45e9 100644
--- a/frontend/src/components/thread/tool-views/BrowserToolView.tsx
+++ b/frontend/src/components/thread/tool-views/BrowserToolView.tsx
@@ -72,31 +72,20 @@ export function BrowserToolView({
 
   // Find the browser_state message and extract the screenshot
   let screenshotBase64: string | null = null;
-  let latestBrowserState: any = null;
-  let latestTimestamp = 0;
+  if (browserStateMessageId && messages.length > 0) {
+    const browserStateMessage = messages.find(
+      (msg) =>
+        (msg.type as string) === 'browser_state' &&
+        msg.message_id === browserStateMessageId,
+    );
 
-  if (messages.length > 0) {
-    // Find the latest browser_state message by comparing timestamps
-    messages.forEach((msg) => {
-      if ((msg.type as string) === 'browser_state') {
-        try {
-          const content = safeJsonParse<{ timestamp?: number }>(msg.content, {});
-          const timestamp = content?.timestamp || 0;
-          
-          if (timestamp > latestTimestamp) {
-            latestTimestamp = timestamp;
-            latestBrowserState = content;
-          }
-        } catch (error) {
-          console.error('[BrowserToolView] Error parsing browser state:', error);
-        }
-      }
-    });
-
-    // Use the latest browser state
-    if (latestBrowserState) {
-      screenshotBase64 = latestBrowserState.screenshot_base64 || null;
-      console.log('Latest browser state:', latestBrowserState);
+    if (browserStateMessage) {
+      const browserStateContent = safeJsonParse<{ screenshot_base64?: string }>(
+        browserStateMessage.content,
+        {},
+      );
+      console.log('Browser state content: ', browserStateContent)
+      screenshotBase64 = browserStateContent?.screenshot_base64 || null;
     }
   }
 

From cea53931f74ef7d2b33f7f2104c1b969cd49a649 Mon Sep 17 00:00:00 2001
From: Soumyadas15 <saumyadas2017@gmail.com>
Date: Mon, 19 May 2025 10:43:53 +0530
Subject: [PATCH 19/21] chore(dev): second attempt to fix billing checks

---
 .../(dashboard)/agents/[threadId]/page.tsx    | 43 +++++++++----------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx b/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx
index 336af2a9..e7ead8b5 100644
--- a/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx
+++ b/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx
@@ -1013,38 +1013,35 @@ export default function ThreadPage({
     }
   }, [project?.account_id, billingStatusQuery]);
 
-  // Check billing when agent status changes
   useEffect(() => {
-    const previousStatus = previousAgentStatus.current;
+    let timeoutId: NodeJS.Timeout;
+    const shouldCheckBilling = 
+      project?.account_id && 
+      (initialLoadCompleted.current || 
+       (messagesLoadedRef.current && !isLoading) ||
+       (previousAgentStatus.current === 'running' && agentStatus === 'idle'));
 
-    // Check if agent just completed (status changed from running to idle)
-    if (previousStatus === 'running' && agentStatus === 'idle') {
-      checkBillingLimits();
+    if (shouldCheckBilling) {
+      timeoutId = setTimeout(() => {
+        checkBillingLimits();
+      }, 500);
     }
 
-    // Store current status for next comparison
     previousAgentStatus.current = agentStatus;
-  }, [agentStatus, checkBillingLimits]);
 
-  // Check billing on initial load
-  useEffect(() => {
-    if (project?.account_id && initialLoadCompleted.current) {
-      console.log('Checking billing status on page load');
-      checkBillingLimits();
-    }
-  }, [project?.account_id, checkBillingLimits, initialLoadCompleted]);
-
-  // Check billing after messages loaded
-  useEffect(() => {
-    if (messagesLoadedRef.current && project?.account_id && !isLoading) {
-      console.log('Checking billing status after messages loaded');
-      checkBillingLimits();
-    }
+    return () => {
+      if (timeoutId) {
+        clearTimeout(timeoutId);
+      }
+    };
+  // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [
-    messagesLoadedRef.current,
-    checkBillingLimits,
     project?.account_id,
+    initialLoadCompleted.current,
+    messagesLoadedRef.current,
     isLoading,
+    agentStatus,
+    checkBillingLimits
   ]);
 
   // Check for debug mode in URL on initial load and when URL changes

From f89d97568d3803309d8ae953c94682e578662920 Mon Sep 17 00:00:00 2001
From: Soumyadas15 <saumyadas2017@gmail.com>
Date: Mon, 19 May 2025 11:24:02 +0530
Subject: [PATCH 20/21] chore(dev): cleanup useeffect deps

---
 frontend/src/app/(dashboard)/agents/[threadId]/page.tsx | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx b/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx
index e7ead8b5..e1c9a80d 100644
--- a/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx
+++ b/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx
@@ -1034,11 +1034,8 @@ export default function ThreadPage({
         clearTimeout(timeoutId);
       }
     };
-  // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [
     project?.account_id,
-    initialLoadCompleted.current,
-    messagesLoadedRef.current,
     isLoading,
     agentStatus,
     checkBillingLimits

From 908208898ec4b1dff38564bc15d7d54b349ead82 Mon Sep 17 00:00:00 2001
From: Surajdusane <138127406+Surajdusane@users.noreply.github.com>
Date: Mon, 19 May 2025 12:04:18 +0530
Subject: [PATCH 21/21] fix: improve forgot password dialog opacity and remove
 duplicate close button

---
 frontend/src/app/auth/page.tsx           | 2 +-
 frontend/src/components/GoogleSignIn.tsx | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/frontend/src/app/auth/page.tsx b/frontend/src/app/auth/page.tsx
index aa5b6793..b90c143b 100644
--- a/frontend/src/app/auth/page.tsx
+++ b/frontend/src/app/auth/page.tsx
@@ -509,7 +509,7 @@ function LoginContent() {
 
       {/* Forgot Password Dialog */}
       <Dialog open={forgotPasswordOpen} onOpenChange={setForgotPasswordOpen}>
-        <DialogContent className="sm:max-w-md rounded-xl bg-[#F3F4F6] dark:bg-[#F9FAFB]/[0.02] border border-border">
+        <DialogContent className="sm:max-w-md rounded-xl bg-[#F3F4F6] dark:bg-[#17171A] border border-border [&>button]:hidden">
           <DialogHeader>
             <div className="flex items-center justify-between">
               <DialogTitle className="text-xl font-medium">
diff --git a/frontend/src/components/GoogleSignIn.tsx b/frontend/src/components/GoogleSignIn.tsx
index 1b6e3d87..a99eeb8d 100644
--- a/frontend/src/components/GoogleSignIn.tsx
+++ b/frontend/src/components/GoogleSignIn.tsx
@@ -3,6 +3,7 @@
 import { useEffect, useCallback, useRef, useState } from 'react';
 import Script from 'next/script';
 import { createClient } from '@/lib/supabase/client';
+import { useTheme } from 'next-themes';
 
 // Add type declarations for Google One Tap
 declare global {
@@ -68,6 +69,7 @@ interface GoogleSignInProps {
 export default function GoogleSignIn({ returnUrl }: GoogleSignInProps) {
   const googleClientId = process.env.NEXT_PUBLIC_GOOGLE_CLIENT_ID;
   const [isLoading, setIsLoading] = useState(false);
+  const { resolvedTheme } = useTheme();
 
   const handleGoogleSignIn = useCallback(
     async (response: GoogleSignInResponse) => {
@@ -184,7 +186,7 @@ export default function GoogleSignIn({ returnUrl }: GoogleSignInProps) {
             if (buttonContainer) {
               window.google.accounts.id.renderButton(buttonContainer, {
                 type: 'standard',
-                theme: 'outline',
+                theme: resolvedTheme === 'dark' ? 'filled_black' : 'outline',
                 size: 'large',
                 text: 'continue_with',
                 shape: 'pill',