chore(docker): update Docker configurations and streamline browser handling

- Removed unnecessary comments and updated the Docker Compose files for backend and sandbox services. - Changed `docker-compose` to `docker compose` in README for consistency. - Updated Chromium dependencies and configurations in Dockerfile for improved stability. - Enhanced browser automation logic to handle page navigation and state recovery more effectively. - Adjusted environment variables for better performance and resource management.
2025-05-18 05:49:09 +02:00 · 2025-05-18 05:49:09 +02:00 · cbb8b7e65a
parent 0e3f9d5a1f
commit cbb8b7e65a
6 changed files with 120 additions and 35 deletions
--- a/backend/docker-compose.yml
+++ b/backend/docker-compose.yml
@ -1,7 +1,3 @@
-# This is a Docker Compose file for the backend service. For self-hosting, look at the root docker-compose.yml file.
-
-version: "3.8"
-
 services:
  api:
    build:
--- a/backend/sandbox/README.md
+++ b/backend/sandbox/README.md
@ -19,7 +19,7 @@ You can modify the sandbox environment for development or to add new capabilitie
 2. Build a custom image:
   ```
   cd backend/sandbox/docker
-   docker-compose build
+   docker compose build
   ```
 3. Test your changes locally using docker-compose

--- a/backend/sandbox/docker/Dockerfile
+++ b/backend/sandbox/docker/Dockerfile
@ -68,6 +68,9 @@ RUN apt-get update && apt-get install -y \
    iputils-ping \
    dnsutils \
    sudo \
+    # Chromium dependencies
+    chromium \
+    chromium-driver \
    && rm -rf /var/lib/apt/lists/*

 # Install Node.js and npm
@ -110,14 +113,20 @@ RUN python -c "from playwright.sync_api import sync_playwright; print('Playwrigh

 # Set environment variables
 ENV PYTHONUNBUFFERED=1
-ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
+ENV CHROME_PATH=/usr/bin/chromium
 ENV ANONYMIZED_TELEMETRY=false
 ENV DISPLAY=:99
-ENV RESOLUTION=1920x1080x24
+ENV RESOLUTION=1024x768x24
 ENV VNC_PASSWORD=vncpassword
 ENV CHROME_PERSISTENT_SESSION=true
-ENV RESOLUTION_WIDTH=1920
-ENV RESOLUTION_HEIGHT=1080
+ENV RESOLUTION_WIDTH=1024
+ENV RESOLUTION_HEIGHT=768
+# Add Chrome stability flags
+ENV CHROME_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"
+# Add Playwright specific settings
+ENV PLAYWRIGHT_SKIP_BROWSER_GC=1
+ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
+ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium

 # Set up supervisor configuration
 RUN mkdir -p /var/log/supervisor
--- a/backend/sandbox/docker/browser_api.py
+++ b/backend/sandbox/docker/browser_api.py
@ -311,7 +311,7 @@ class BrowserAutomation:
        
        # Tab management
        self.router.post("/automation/switch_tab")(self.switch_tab)
-        self.router.post("/automation/open_tab")(self.open_tab)
+        # self.router.post("/automation/open_tab")(self.open_tab)
        self.router.post("/automation/close_tab")(self.close_tab)
        
        # Content actions
@ -337,7 +337,7 @@ class BrowserAutomation:
            playwright = await async_playwright().start()
            print("Playwright started, launching browser...")
            
-            # Use non-headless mode for testing with slower timeouts
+            # Use non-headless mode for testing with slower timeouts and additional options
            launch_options = {
                "headless": False,
                "timeout": 60000
@ -354,21 +354,17 @@ class BrowserAutomation:
                self.browser = await playwright.chromium.launch(**launch_options)
                print("Browser launched with minimal options")

-            try:
-                await self.get_current_page()
-                print("Found existing page, using it")
-                self.current_page_index = 0
-            except Exception as page_error:
-                print(f"Error finding existing page, creating new one. ( {page_error})")
+            # Check if we already have pages
+            if not self.pages:
+                print("Creating initial page")
                page = await self.browser.new_page()
-                print("New page created successfully")
                self.pages.append(page)
                self.current_page_index = 0
-                # Navigate to about:blank to ensure page is ready
-                # await page.goto("google.com", timeout=30000)
-                print("Navigated to google.com")
+            else:
+                print("Using existing page")
+                self.current_page_index = 0
                
-                print("Browser initialization completed successfully")
+            print("Browser initialization completed successfully")
        except Exception as e:
            print(f"Browser startup error: {str(e)}")
            traceback.print_exc()
@ -533,6 +529,18 @@ class BrowserAutomation:
        """Get the current DOM state including element tree and selector map"""
        try:
            page = await self.get_current_page()
+            
+            # First check if page is valid and has content
+            try:
+                current_url = page.url
+                if current_url == "about:blank":
+                    # If page is blank, try to recover by waiting for content
+                    await page.wait_for_load_state("domcontentloaded", timeout=5000)
+                    current_url = page.url
+            except Exception as e:
+                print(f"Error checking page URL: {e}")
+                current_url = "about:blank"
+            
            selector_map = await self.get_selector_map()
            
            # Create a root element
@ -550,13 +558,12 @@ class BrowserAutomation:
                    root.children.append(element)
            
            # Get basic page info
-            url = page.url
            try:
                title = await page.title()
            except:
                title = "Unknown Title"
            
-            # Get more accurate scroll information - fix JavaScript syntax
+            # Get more accurate scroll information
            try:
                scroll_info = await page.evaluate("""
                () => {
@ -587,7 +594,7 @@ class BrowserAutomation:
            return DOMState(
                element_tree=root,
                selector_map=selector_map,
-                url=url,
+                url=current_url,
                title=title,
                pixels_above=pixels_above,
                pixels_below=pixels_below
@ -595,7 +602,16 @@ class BrowserAutomation:
        except Exception as e:
            print(f"Error getting DOM state: {e}")
            traceback.print_exc()
-            # Return a minimal valid state to avoid breaking tests
+            
+            # Try to get at least the current URL before falling back
+            current_url = "about:blank"
+            try:
+                page = await self.get_current_page()
+                current_url = page.url
+            except:
+                pass
+                
+            # Return a minimal valid state with the actual URL if possible
            dummy_root = DOMElementNode(
                is_visible=True,
                tag_name="body",
@ -606,7 +622,7 @@ class BrowserAutomation:
            return DOMState(
                element_tree=dummy_root,
                selector_map=dummy_map,
-                url=page.url if 'page' in locals() else "about:blank",
+                url=current_url,
                title="Error page",
                pixels_above=0,
                pixels_below=0
@ -860,10 +876,52 @@ class BrowserAutomation:
        """Navigate to a specified URL"""
        try:
            page = await self.get_current_page()
-            await page.goto(action.url, wait_until="domcontentloaded")
-            await page.wait_for_load_state("networkidle", timeout=10000)
            
-            # Get updated state after action
+            # First check if we're already on the target URL
+            current_url = page.url
+            if current_url == action.url:
+                print(f"Already on target URL: {action.url}")
+                dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})")
+                return self.build_action_result(
+                    True,
+                    f"Already on {action.url}",
+                    dom_state,
+                    screenshot,
+                    elements,
+                    metadata,
+                    error="",
+                    content=None
+                )
+            
+            # Attempt navigation with retries
+            max_retries = 3
+            retry_count = 0
+            last_error = None
+            
+            while retry_count < max_retries:
+                try:
+                    print(f"Navigation attempt {retry_count + 1} to {action.url}")
+                    await page.goto(action.url, wait_until="domcontentloaded", timeout=30000)
+                    await page.wait_for_load_state("networkidle", timeout=10000)
+                    
+                    # Verify we actually navigated to the target URL
+                    new_url = page.url
+                    if new_url == "about:blank":
+                        raise Exception("Navigation resulted in blank page")
+                        
+                    print(f"Successfully navigated to {new_url}")
+                    break
+                except Exception as e:
+                    last_error = e
+                    retry_count += 1
+                    if retry_count < max_retries:
+                        print(f"Navigation attempt {retry_count} failed: {e}")
+                        await asyncio.sleep(1)  # Wait before retry
+                    else:
+                        print(f"All navigation attempts failed: {e}")
+                        raise
+            
+            # Get updated state after successful navigation
            dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})")
            
            result = self.build_action_result(
@ -882,6 +940,7 @@ class BrowserAutomation:
        except Exception as e:
            print(f"Navigation error: {str(e)}")
            traceback.print_exc()
+            
            # Try to get some state info even after error
            try:
                dom_state, screenshot, elements, metadata = await self.get_updated_browser_state("navigate_error_recovery")
@ -896,6 +955,14 @@ class BrowserAutomation:
                    content=None
                )
            except:
+                # If we can't get state, at least try to get the current URL
+                current_url = "about:blank"
+                try:
+                    page = await self.get_current_page()
+                    current_url = page.url
+                except:
+                    pass
+                    
                return self.build_action_result(
                    False,
                    str(e),
@ -904,7 +971,8 @@ class BrowserAutomation:
                    "",
                    {},
                    error=str(e),
-                    content=None
+                    content=None,
+                    fallback_url=current_url
                )
    
    async def search_google(self, action: SearchGoogleAction = Body(...)):
--- a/backend/sandbox/docker/docker-compose.yml
+++ b/backend/sandbox/docker/docker-compose.yml
@ -6,7 +6,7 @@ services:
      dockerfile: ${DOCKERFILE:-Dockerfile}
      args:
        TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
-    image: kortix/suna:0.1.2
+    image: kortix/suna:0.1.2.1
    ports:
      - "6080:6080"  # noVNC web interface
      - "5901:5901"  # VNC port
@ -15,7 +15,7 @@ services:
      - "8080:8080"  # HTTP server port
    environment:
      - ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
-      - CHROME_PATH=/usr/bin/google-chrome
+      - CHROME_PATH=/usr/bin/chromium
      - CHROME_USER_DATA=/app/data/chrome_data
      - CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false}
      - CHROME_CDP=${CHROME_CDP:-http://localhost:9222}
@ -27,8 +27,13 @@ services:
      - VNC_PASSWORD=${VNC_PASSWORD:-vncpassword}
      - CHROME_DEBUGGING_PORT=9222
      - CHROME_DEBUGGING_HOST=localhost
+      - CHROME_FLAGS=${CHROME_FLAGS:-"--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"}
+      - PLAYWRIGHT_SKIP_BROWSER_GC=1
+      - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
+      - NODE_OPTIONS="--max-old-space-size=4096"
    volumes:
      - /tmp/.X11-unix:/tmp/.X11-unix
+      - chrome_data:/app/data/chrome_data
    restart: unless-stopped
    shm_size: '2gb'
    cap_add:
@ -42,3 +47,10 @@ services:
      interval: 10s
      timeout: 5s
      retries: 3
+    ulimits:
+      nofile:
+        soft: 65536
+        hard: 65536
+
+volumes:
+  chrome_data:
--- a/backend/utils/config.py
+++ b/backend/utils/config.py
@ -155,8 +155,8 @@ class Configuration:
    STRIPE_DEFAULT_TRIAL_DAYS: int = 14
    
    # Stripe Product IDs
-    STRIPE_PRODUCT_ID_PROD: str = 'prod_SCl7AQ2C8kK1CD'  # Production product ID
-    STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'  # Staging product ID
+    STRIPE_PRODUCT_ID_PROD: str = 'prod_SCl7AQ2C8kK1CD'
+    STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
    
    # Sandbox configuration
    SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2"