From cbb8b7e65ab6c10af668d6fcef8518376a00003e Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Sun, 18 May 2025 05:49:09 +0200 Subject: [PATCH 01/21] chore(docker): update Docker configurations and streamline browser handling - Removed unnecessary comments and updated the Docker Compose files for backend and sandbox services. - Changed `docker-compose` to `docker compose` in README for consistency. - Updated Chromium dependencies and configurations in Dockerfile for improved stability. - Enhanced browser automation logic to handle page navigation and state recovery more effectively. - Adjusted environment variables for better performance and resource management. --- backend/docker-compose.yml | 4 - backend/sandbox/README.md | 2 +- backend/sandbox/docker/Dockerfile | 17 +++- backend/sandbox/docker/browser_api.py | 112 +++++++++++++++++----- backend/sandbox/docker/docker-compose.yml | 16 +++- backend/utils/config.py | 4 +- 6 files changed, 120 insertions(+), 35 deletions(-) diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index f5f91540..094f94da 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -1,7 +1,3 @@ -# This is a Docker Compose file for the backend service. For self-hosting, look at the root docker-compose.yml file. - -version: "3.8" - services: api: build: diff --git a/backend/sandbox/README.md b/backend/sandbox/README.md index cafa4b7b..f8c0d571 100644 --- a/backend/sandbox/README.md +++ b/backend/sandbox/README.md @@ -19,7 +19,7 @@ You can modify the sandbox environment for development or to add new capabilitie 2. Build a custom image: ``` cd backend/sandbox/docker - docker-compose build + docker compose build ``` 3. Test your changes locally using docker-compose diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile index 418fe524..f51d706b 100644 --- a/backend/sandbox/docker/Dockerfile +++ b/backend/sandbox/docker/Dockerfile @@ -68,6 +68,9 @@ RUN apt-get update && apt-get install -y \ iputils-ping \ dnsutils \ sudo \ + # Chromium dependencies + chromium \ + chromium-driver \ && rm -rf /var/lib/apt/lists/* # Install Node.js and npm @@ -110,14 +113,20 @@ RUN python -c "from playwright.sync_api import sync_playwright; print('Playwrigh # Set environment variables ENV PYTHONUNBUFFERED=1 -ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome +ENV CHROME_PATH=/usr/bin/chromium ENV ANONYMIZED_TELEMETRY=false ENV DISPLAY=:99 -ENV RESOLUTION=1920x1080x24 +ENV RESOLUTION=1024x768x24 ENV VNC_PASSWORD=vncpassword ENV CHROME_PERSISTENT_SESSION=true -ENV RESOLUTION_WIDTH=1920 -ENV RESOLUTION_HEIGHT=1080 +ENV RESOLUTION_WIDTH=1024 +ENV RESOLUTION_HEIGHT=768 +# Add Chrome stability flags +ENV CHROME_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio" +# Add Playwright specific settings +ENV PLAYWRIGHT_SKIP_BROWSER_GC=1 +ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright +ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium # Set up supervisor configuration RUN mkdir -p /var/log/supervisor diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py index f122cdfc..76d7b01d 100644 --- a/backend/sandbox/docker/browser_api.py +++ b/backend/sandbox/docker/browser_api.py @@ -311,7 +311,7 @@ class BrowserAutomation: # Tab management self.router.post("/automation/switch_tab")(self.switch_tab) - self.router.post("/automation/open_tab")(self.open_tab) + # self.router.post("/automation/open_tab")(self.open_tab) self.router.post("/automation/close_tab")(self.close_tab) # Content actions @@ -337,7 +337,7 @@ class BrowserAutomation: playwright = await async_playwright().start() print("Playwright started, launching browser...") - # Use non-headless mode for testing with slower timeouts + # Use non-headless mode for testing with slower timeouts and additional options launch_options = { "headless": False, "timeout": 60000 @@ -354,21 +354,17 @@ class BrowserAutomation: self.browser = await playwright.chromium.launch(**launch_options) print("Browser launched with minimal options") - try: - await self.get_current_page() - print("Found existing page, using it") - self.current_page_index = 0 - except Exception as page_error: - print(f"Error finding existing page, creating new one. ( {page_error})") + # Check if we already have pages + if not self.pages: + print("Creating initial page") page = await self.browser.new_page() - print("New page created successfully") self.pages.append(page) self.current_page_index = 0 - # Navigate to about:blank to ensure page is ready - # await page.goto("google.com", timeout=30000) - print("Navigated to google.com") + else: + print("Using existing page") + self.current_page_index = 0 - print("Browser initialization completed successfully") + print("Browser initialization completed successfully") except Exception as e: print(f"Browser startup error: {str(e)}") traceback.print_exc() @@ -533,6 +529,18 @@ class BrowserAutomation: """Get the current DOM state including element tree and selector map""" try: page = await self.get_current_page() + + # First check if page is valid and has content + try: + current_url = page.url + if current_url == "about:blank": + # If page is blank, try to recover by waiting for content + await page.wait_for_load_state("domcontentloaded", timeout=5000) + current_url = page.url + except Exception as e: + print(f"Error checking page URL: {e}") + current_url = "about:blank" + selector_map = await self.get_selector_map() # Create a root element @@ -550,13 +558,12 @@ class BrowserAutomation: root.children.append(element) # Get basic page info - url = page.url try: title = await page.title() except: title = "Unknown Title" - # Get more accurate scroll information - fix JavaScript syntax + # Get more accurate scroll information try: scroll_info = await page.evaluate(""" () => { @@ -587,7 +594,7 @@ class BrowserAutomation: return DOMState( element_tree=root, selector_map=selector_map, - url=url, + url=current_url, title=title, pixels_above=pixels_above, pixels_below=pixels_below @@ -595,7 +602,16 @@ class BrowserAutomation: except Exception as e: print(f"Error getting DOM state: {e}") traceback.print_exc() - # Return a minimal valid state to avoid breaking tests + + # Try to get at least the current URL before falling back + current_url = "about:blank" + try: + page = await self.get_current_page() + current_url = page.url + except: + pass + + # Return a minimal valid state with the actual URL if possible dummy_root = DOMElementNode( is_visible=True, tag_name="body", @@ -606,7 +622,7 @@ class BrowserAutomation: return DOMState( element_tree=dummy_root, selector_map=dummy_map, - url=page.url if 'page' in locals() else "about:blank", + url=current_url, title="Error page", pixels_above=0, pixels_below=0 @@ -860,10 +876,52 @@ class BrowserAutomation: """Navigate to a specified URL""" try: page = await self.get_current_page() - await page.goto(action.url, wait_until="domcontentloaded") - await page.wait_for_load_state("networkidle", timeout=10000) - # Get updated state after action + # First check if we're already on the target URL + current_url = page.url + if current_url == action.url: + print(f"Already on target URL: {action.url}") + dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})") + return self.build_action_result( + True, + f"Already on {action.url}", + dom_state, + screenshot, + elements, + metadata, + error="", + content=None + ) + + # Attempt navigation with retries + max_retries = 3 + retry_count = 0 + last_error = None + + while retry_count < max_retries: + try: + print(f"Navigation attempt {retry_count + 1} to {action.url}") + await page.goto(action.url, wait_until="domcontentloaded", timeout=30000) + await page.wait_for_load_state("networkidle", timeout=10000) + + # Verify we actually navigated to the target URL + new_url = page.url + if new_url == "about:blank": + raise Exception("Navigation resulted in blank page") + + print(f"Successfully navigated to {new_url}") + break + except Exception as e: + last_error = e + retry_count += 1 + if retry_count < max_retries: + print(f"Navigation attempt {retry_count} failed: {e}") + await asyncio.sleep(1) # Wait before retry + else: + print(f"All navigation attempts failed: {e}") + raise + + # Get updated state after successful navigation dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})") result = self.build_action_result( @@ -882,6 +940,7 @@ class BrowserAutomation: except Exception as e: print(f"Navigation error: {str(e)}") traceback.print_exc() + # Try to get some state info even after error try: dom_state, screenshot, elements, metadata = await self.get_updated_browser_state("navigate_error_recovery") @@ -896,6 +955,14 @@ class BrowserAutomation: content=None ) except: + # If we can't get state, at least try to get the current URL + current_url = "about:blank" + try: + page = await self.get_current_page() + current_url = page.url + except: + pass + return self.build_action_result( False, str(e), @@ -904,7 +971,8 @@ class BrowserAutomation: "", {}, error=str(e), - content=None + content=None, + fallback_url=current_url ) async def search_google(self, action: SearchGoogleAction = Body(...)): diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml index 27432984..03024280 100644 --- a/backend/sandbox/docker/docker-compose.yml +++ b/backend/sandbox/docker/docker-compose.yml @@ -6,7 +6,7 @@ services: dockerfile: ${DOCKERFILE:-Dockerfile} args: TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64} - image: kortix/suna:0.1.2 + image: kortix/suna:0.1.2.1 ports: - "6080:6080" # noVNC web interface - "5901:5901" # VNC port @@ -15,7 +15,7 @@ services: - "8080:8080" # HTTP server port environment: - ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false} - - CHROME_PATH=/usr/bin/google-chrome + - CHROME_PATH=/usr/bin/chromium - CHROME_USER_DATA=/app/data/chrome_data - CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false} - CHROME_CDP=${CHROME_CDP:-http://localhost:9222} @@ -27,8 +27,13 @@ services: - VNC_PASSWORD=${VNC_PASSWORD:-vncpassword} - CHROME_DEBUGGING_PORT=9222 - CHROME_DEBUGGING_HOST=localhost + - CHROME_FLAGS=${CHROME_FLAGS:-"--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"} + - PLAYWRIGHT_SKIP_BROWSER_GC=1 + - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium + - NODE_OPTIONS="--max-old-space-size=4096" volumes: - /tmp/.X11-unix:/tmp/.X11-unix + - chrome_data:/app/data/chrome_data restart: unless-stopped shm_size: '2gb' cap_add: @@ -42,3 +47,10 @@ services: interval: 10s timeout: 5s retries: 3 + ulimits: + nofile: + soft: 65536 + hard: 65536 + +volumes: + chrome_data: diff --git a/backend/utils/config.py b/backend/utils/config.py index e08b3eab..c396e61b 100644 --- a/backend/utils/config.py +++ b/backend/utils/config.py @@ -155,8 +155,8 @@ class Configuration: STRIPE_DEFAULT_TRIAL_DAYS: int = 14 # Stripe Product IDs - STRIPE_PRODUCT_ID_PROD: str = 'prod_SCl7AQ2C8kK1CD' # Production product ID - STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY' # Staging product ID + STRIPE_PRODUCT_ID_PROD: str = 'prod_SCl7AQ2C8kK1CD' + STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY' # Sandbox configuration SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2" From 7b27f83a6d2dcce87353a147a1189bab133e6a96 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Sun, 18 May 2025 05:55:02 +0200 Subject: [PATCH 02/21] use playwright chromium directly --- backend/sandbox/docker/Dockerfile | 7 ++----- backend/sandbox/docker/docker-compose.yml | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile index f51d706b..c02f2f2e 100644 --- a/backend/sandbox/docker/Dockerfile +++ b/backend/sandbox/docker/Dockerfile @@ -68,9 +68,6 @@ RUN apt-get update && apt-get install -y \ iputils-ping \ dnsutils \ sudo \ - # Chromium dependencies - chromium \ - chromium-driver \ && rm -rf /var/lib/apt/lists/* # Install Node.js and npm @@ -113,7 +110,7 @@ RUN python -c "from playwright.sync_api import sync_playwright; print('Playwrigh # Set environment variables ENV PYTHONUNBUFFERED=1 -ENV CHROME_PATH=/usr/bin/chromium +ENV CHROME_PATH=/ms-playwright/chromium-*/chrome ENV ANONYMIZED_TELEMETRY=false ENV DISPLAY=:99 ENV RESOLUTION=1024x768x24 @@ -126,7 +123,7 @@ ENV CHROME_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-s # Add Playwright specific settings ENV PLAYWRIGHT_SKIP_BROWSER_GC=1 ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright -ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium +ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome # Set up supervisor configuration RUN mkdir -p /var/log/supervisor diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml index 03024280..136c972f 100644 --- a/backend/sandbox/docker/docker-compose.yml +++ b/backend/sandbox/docker/docker-compose.yml @@ -15,7 +15,7 @@ services: - "8080:8080" # HTTP server port environment: - ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false} - - CHROME_PATH=/usr/bin/chromium + - CHROME_PATH=/ms-playwright/chromium-*/chrome - CHROME_USER_DATA=/app/data/chrome_data - CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false} - CHROME_CDP=${CHROME_CDP:-http://localhost:9222} @@ -29,7 +29,7 @@ services: - CHROME_DEBUGGING_HOST=localhost - CHROME_FLAGS=${CHROME_FLAGS:-"--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"} - PLAYWRIGHT_SKIP_BROWSER_GC=1 - - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium + - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome - NODE_OPTIONS="--max-old-space-size=4096" volumes: - /tmp/.X11-unix:/tmp/.X11-unix From 9d27ec2beadae4c1631bcd05c972429e52853501 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Sun, 18 May 2025 06:15:09 +0200 Subject: [PATCH 03/21] wip --- backend/sandbox/docker/Dockerfile | 4 ++-- backend/sandbox/docker/docker-compose.yml | 12 +++++++++--- backend/utils/config.py | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile index c02f2f2e..c5b201f5 100644 --- a/backend/sandbox/docker/Dockerfile +++ b/backend/sandbox/docker/Dockerfile @@ -110,7 +110,7 @@ RUN python -c "from playwright.sync_api import sync_playwright; print('Playwrigh # Set environment variables ENV PYTHONUNBUFFERED=1 -ENV CHROME_PATH=/ms-playwright/chromium-*/chrome +ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome ENV ANONYMIZED_TELEMETRY=false ENV DISPLAY=:99 ENV RESOLUTION=1024x768x24 @@ -123,7 +123,7 @@ ENV CHROME_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-s # Add Playwright specific settings ENV PLAYWRIGHT_SKIP_BROWSER_GC=1 ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright -ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome +ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome-linux/chrome # Set up supervisor configuration RUN mkdir -p /var/log/supervisor diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml index 136c972f..73729279 100644 --- a/backend/sandbox/docker/docker-compose.yml +++ b/backend/sandbox/docker/docker-compose.yml @@ -6,7 +6,7 @@ services: dockerfile: ${DOCKERFILE:-Dockerfile} args: TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64} - image: kortix/suna:0.1.2.1 + image: kortix/suna:0.1.2 ports: - "6080:6080" # noVNC web interface - "5901:5901" # VNC port @@ -15,7 +15,7 @@ services: - "8080:8080" # HTTP server port environment: - ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false} - - CHROME_PATH=/ms-playwright/chromium-*/chrome + - CHROME_PATH=/usr/bin/google-chrome - CHROME_USER_DATA=/app/data/chrome_data - CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false} - CHROME_CDP=${CHROME_CDP:-http://localhost:9222} @@ -29,7 +29,7 @@ services: - CHROME_DEBUGGING_HOST=localhost - CHROME_FLAGS=${CHROME_FLAGS:-"--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"} - PLAYWRIGHT_SKIP_BROWSER_GC=1 - - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome + - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome-linux/chrome - NODE_OPTIONS="--max-old-space-size=4096" volumes: - /tmp/.X11-unix:/tmp/.X11-unix @@ -51,6 +51,12 @@ services: nofile: soft: 65536 hard: 65536 + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 2G volumes: chrome_data: diff --git a/backend/utils/config.py b/backend/utils/config.py index c396e61b..73f1cbd5 100644 --- a/backend/utils/config.py +++ b/backend/utils/config.py @@ -159,7 +159,7 @@ class Configuration: STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY' # Sandbox configuration - SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2" + SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.2" SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf" @property From 0ae9b1cd1093d672851db37f108b2fdc7e39e611 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Sun, 18 May 2025 06:23:01 +0200 Subject: [PATCH 04/21] wip --- backend/sandbox/docker/Dockerfile | 12 +++--------- backend/sandbox/docker/docker-compose.yml | 18 ------------------ 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile index c5b201f5..418fe524 100644 --- a/backend/sandbox/docker/Dockerfile +++ b/backend/sandbox/docker/Dockerfile @@ -113,17 +113,11 @@ ENV PYTHONUNBUFFERED=1 ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome ENV ANONYMIZED_TELEMETRY=false ENV DISPLAY=:99 -ENV RESOLUTION=1024x768x24 +ENV RESOLUTION=1920x1080x24 ENV VNC_PASSWORD=vncpassword ENV CHROME_PERSISTENT_SESSION=true -ENV RESOLUTION_WIDTH=1024 -ENV RESOLUTION_HEIGHT=768 -# Add Chrome stability flags -ENV CHROME_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio" -# Add Playwright specific settings -ENV PLAYWRIGHT_SKIP_BROWSER_GC=1 -ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright -ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome-linux/chrome +ENV RESOLUTION_WIDTH=1920 +ENV RESOLUTION_HEIGHT=1080 # Set up supervisor configuration RUN mkdir -p /var/log/supervisor diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml index 73729279..27432984 100644 --- a/backend/sandbox/docker/docker-compose.yml +++ b/backend/sandbox/docker/docker-compose.yml @@ -27,13 +27,8 @@ services: - VNC_PASSWORD=${VNC_PASSWORD:-vncpassword} - CHROME_DEBUGGING_PORT=9222 - CHROME_DEBUGGING_HOST=localhost - - CHROME_FLAGS=${CHROME_FLAGS:-"--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"} - - PLAYWRIGHT_SKIP_BROWSER_GC=1 - - PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/ms-playwright/chromium-*/chrome-linux/chrome - - NODE_OPTIONS="--max-old-space-size=4096" volumes: - /tmp/.X11-unix:/tmp/.X11-unix - - chrome_data:/app/data/chrome_data restart: unless-stopped shm_size: '2gb' cap_add: @@ -47,16 +42,3 @@ services: interval: 10s timeout: 5s retries: 3 - ulimits: - nofile: - soft: 65536 - hard: 65536 - deploy: - resources: - limits: - memory: 4G - reservations: - memory: 2G - -volumes: - chrome_data: From 2af572ab035ed94d549fd36fbbced11b62238d8b Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Sun, 18 May 2025 20:44:44 +0200 Subject: [PATCH 05/21] wip --- backend/agent/tools/sb_browser_tool.py | 2 +- backend/sandbox/docker/Dockerfile | 6 +- backend/sandbox/docker/browser_api.py | 257 +++++-------------------- backend/utils/config.py | 2 +- 4 files changed, 55 insertions(+), 212 deletions(-) diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py index ce9130ec..59602db1 100644 --- a/backend/agent/tools/sb_browser_tool.py +++ b/backend/agent/tools/sb_browser_tool.py @@ -30,7 +30,7 @@ class SandboxBrowserTool(SandboxToolsBase): await self._ensure_sandbox() # Build the curl command - url = f"http://localhost:8002/api/automation/{endpoint}" + url = f"http://localhost:8003/api/automation/{endpoint}" if method == "GET" and params: query_params = "&".join([f"{k}={v}" for k, v in params.items()]) diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile index 418fe524..b8f74a52 100644 --- a/backend/sandbox/docker/Dockerfile +++ b/backend/sandbox/docker/Dockerfile @@ -113,11 +113,11 @@ ENV PYTHONUNBUFFERED=1 ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome ENV ANONYMIZED_TELEMETRY=false ENV DISPLAY=:99 -ENV RESOLUTION=1920x1080x24 +ENV RESOLUTION=1024x768x24 ENV VNC_PASSWORD=vncpassword ENV CHROME_PERSISTENT_SESSION=true -ENV RESOLUTION_WIDTH=1920 -ENV RESOLUTION_HEIGHT=1080 +ENV RESOLUTION_WIDTH=1024 +ENV RESOLUTION_HEIGHT=768 # Set up supervisor configuration RUN mkdir -p /var/log/supervisor diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py index 76d7b01d..1490bba9 100644 --- a/backend/sandbox/docker/browser_api.py +++ b/backend/sandbox/docker/browser_api.py @@ -15,8 +15,6 @@ import traceback import pytesseract from PIL import Image import io -from utils.logger import logger -from services.supabase import DBConnection ####################################################### # Action model definitions @@ -261,16 +259,15 @@ class BrowserActionResult(BaseModel): url: Optional[str] = None title: Optional[str] = None elements: Optional[str] = None # Formatted string of clickable elements - screenshot_base64: Optional[str] = None # For backward compatibility - screenshot_url: Optional[str] = None + screenshot_base64: Optional[str] = None pixels_above: int = 0 pixels_below: int = 0 content: Optional[str] = None - ocr_text: Optional[str] = None + ocr_text: Optional[str] = None # Added field for OCR text # Additional metadata - element_count: int = 0 - interactive_elements: Optional[List[Dict[str, Any]]] = None + element_count: int = 0 # Number of interactive elements found + interactive_elements: Optional[List[Dict[str, Any]]] = None # Simplified list of interactive elements viewport_width: Optional[int] = None viewport_height: Optional[int] = None @@ -291,7 +288,6 @@ class BrowserAutomation: self.include_attributes = ["id", "href", "src", "alt", "aria-label", "placeholder", "name", "role", "title", "value"] self.screenshot_dir = os.path.join(os.getcwd(), "screenshots") os.makedirs(self.screenshot_dir, exist_ok=True) - self.db = DBConnection() # Initialize DB connection # Register routes self.router.on_startup.append(self.startup) @@ -311,7 +307,7 @@ class BrowserAutomation: # Tab management self.router.post("/automation/switch_tab")(self.switch_tab) - # self.router.post("/automation/open_tab")(self.open_tab) + self.router.post("/automation/open_tab")(self.open_tab) self.router.post("/automation/close_tab")(self.close_tab) # Content actions @@ -337,7 +333,7 @@ class BrowserAutomation: playwright = await async_playwright().start() print("Playwright started, launching browser...") - # Use non-headless mode for testing with slower timeouts and additional options + # Use non-headless mode for testing with slower timeouts launch_options = { "headless": False, "timeout": 60000 @@ -346,6 +342,19 @@ class BrowserAutomation: try: self.browser = await playwright.chromium.launch(**launch_options) print("Browser launched successfully") + + # Create a single context with viewport settings + self.context = await self.browser.new_context( + viewport={'width': 1024, 'height': 768} + ) + + # Create initial page and navigate to a default page + page = await self.context.new_page() + await page.goto("https://www.google.com", wait_until="domcontentloaded") + self.pages.append(page) + self.current_page_index = 0 + print("Initial page created and navigated to Google") + except Exception as browser_error: print(f"Failed to launch browser: {browser_error}") # Try with minimal options @@ -353,16 +362,16 @@ class BrowserAutomation: launch_options = {"timeout": 90000} self.browser = await playwright.chromium.launch(**launch_options) print("Browser launched with minimal options") - - # Check if we already have pages - if not self.pages: - print("Creating initial page") - page = await self.browser.new_page() + + # Create context and initial page even with minimal options + self.context = await self.browser.new_context( + viewport={'width': 1024, 'height': 768} + ) + page = await self.context.new_page() + await page.goto("https://www.google.com", wait_until="domcontentloaded") self.pages.append(page) self.current_page_index = 0 - else: - print("Using existing page") - self.current_page_index = 0 + print("Initial page created with minimal options") print("Browser initialization completed successfully") except Exception as e: @@ -529,18 +538,6 @@ class BrowserAutomation: """Get the current DOM state including element tree and selector map""" try: page = await self.get_current_page() - - # First check if page is valid and has content - try: - current_url = page.url - if current_url == "about:blank": - # If page is blank, try to recover by waiting for content - await page.wait_for_load_state("domcontentloaded", timeout=5000) - current_url = page.url - except Exception as e: - print(f"Error checking page URL: {e}") - current_url = "about:blank" - selector_map = await self.get_selector_map() # Create a root element @@ -558,12 +555,13 @@ class BrowserAutomation: root.children.append(element) # Get basic page info + url = page.url try: title = await page.title() except: title = "Unknown Title" - # Get more accurate scroll information + # Get more accurate scroll information - fix JavaScript syntax try: scroll_info = await page.evaluate(""" () => { @@ -594,7 +592,7 @@ class BrowserAutomation: return DOMState( element_tree=root, selector_map=selector_map, - url=current_url, + url=url, title=title, pixels_above=pixels_above, pixels_below=pixels_below @@ -602,16 +600,7 @@ class BrowserAutomation: except Exception as e: print(f"Error getting DOM state: {e}") traceback.print_exc() - - # Try to get at least the current URL before falling back - current_url = "about:blank" - try: - page = await self.get_current_page() - current_url = page.url - except: - pass - - # Return a minimal valid state with the actual URL if possible + # Return a minimal valid state to avoid breaking tests dummy_root = DOMElementNode( is_visible=True, tag_name="body", @@ -622,92 +611,22 @@ class BrowserAutomation: return DOMState( element_tree=dummy_root, selector_map=dummy_map, - url=current_url, + url=page.url if 'page' in locals() else "about:blank", title="Error page", pixels_above=0, pixels_below=0 ) async def take_screenshot(self) -> str: - """Take a screenshot and return as base64 encoded string or S3 URL""" + """Take a screenshot and return as base64 encoded string""" try: page = await self.get_current_page() screenshot_bytes = await page.screenshot(type='jpeg', quality=60, full_page=False) - - client = await self.db.client - - if client: - timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') - random_id = random.randint(1000, 9999) - filename = f"screenshot_{timestamp}_{random_id}.jpg" - - logger.info(f"Attempting to upload screenshot: {filename}") - result = await self.upload_to_storage(client, screenshot_bytes, filename) - - if isinstance(result, dict) and result.get("is_s3") and result.get("url"): - if await self.verify_file_exists(client, filename): - logger.info(f"Screenshot upload verified: {filename}") - else: - logger.error(f"Screenshot upload failed verification: {filename}") - return base64.b64encode(screenshot_bytes).decode('utf-8') - - return result - else: - logger.warning("No Supabase client available, falling back to base64") - return base64.b64encode(screenshot_bytes).decode('utf-8') + return base64.b64encode(screenshot_bytes).decode('utf-8') except Exception as e: - logger.error(f"Error taking screenshot: {str(e)}") - traceback.print_exc() + print(f"Error taking screenshot: {e}") + # Return an empty string rather than failing return "" - - async def upload_to_storage(self, client, file_bytes: bytes, filename: str) -> str: - """Upload file to Supabase Storage and return the URL""" - try: - bucket_name = 'screenshots' - - buckets = client.storage.list_buckets() - if not any(bucket.name == bucket_name for bucket in buckets): - logger.info(f"Creating bucket: {bucket_name}") - try: - client.storage.create_bucket(bucket_name) - logger.info("Bucket created successfully") - except Exception as e: - logger.error(f"Failed to create bucket: {str(e)}") - raise - - logger.info(f"Uploading file: {filename}") - try: - result = client.storage.from_(bucket_name).upload( - path=filename, - file=file_bytes, - file_options={"content-type": "image/jpeg"} - ) - logger.info("File upload successful") - except Exception as e: - logger.error(f"Failed to upload file: {str(e)}") - raise - - file_url = client.storage.from_(bucket_name).get_public_url(filename) - logger.info(f"Generated URL: {file_url}") - - return {"url": file_url, "is_s3": True} - except Exception as e: - logger.error(f"Error in upload_to_storage: {str(e)}") - traceback.print_exc() - return base64.b64encode(file_bytes).decode('utf-8') - - async def verify_file_exists(self, client, filename: str) -> bool: - """Verify that a file exists in the storage bucket""" - logger.info(f"=== Verifying file exists: {filename} ===") - try: - bucket_name = 'screenshots' - files = client.storage.from_(bucket_name).list() - exists = any(f['name'] == filename for f in files) - logger.info(f"File verification result: {'exists' if exists else 'not found'}") - return exists - except Exception as e: - logger.error(f"Error verifying file: {str(e)}") - return False async def save_screenshot_to_file(self) -> str: """Take a screenshot and save to file, returning the path""" @@ -750,32 +669,20 @@ class BrowserAutomation: """Helper method to get updated browser state after any action Returns a tuple of (dom_state, screenshot, elements, metadata) """ - logger.info(f"=== Starting get_updated_browser_state for action: {action_name} ===") try: # Wait a moment for any potential async processes to settle - logger.info("Waiting for async processes to settle") await asyncio.sleep(0.5) # Get updated state - logger.info("Getting current DOM state") dom_state = await self.get_current_dom_state() - logger.info(f"DOM state retrieved - URL: {dom_state.url}, Title: {dom_state.title}") - - logger.info("Taking screenshot") screenshot = await self.take_screenshot() - logger.info(f"Screenshot result type: {'dict' if isinstance(screenshot, dict) else 'base64 string'}") - if isinstance(screenshot, dict) and screenshot.get("url"): - logger.info(f"Screenshot URL: {screenshot['url']}") # Format elements for output - logger.info("Formatting clickable elements") elements = dom_state.element_tree.clickable_elements_to_string( include_attributes=self.include_attributes ) - logger.info(f"Found {len(dom_state.selector_map)} clickable elements") # Collect additional metadata - logger.info("Collecting metadata") page = await self.get_current_page() metadata = {} @@ -801,9 +708,8 @@ class BrowserAutomation: metadata['interactive_elements'] = interactive_elements - # Get viewport dimensions + # Get viewport dimensions - Fix syntax error in JavaScript try: - logger.info("Getting viewport dimensions") viewport = await page.evaluate(""" () => { return { @@ -814,43 +720,33 @@ class BrowserAutomation: """) metadata['viewport_width'] = viewport.get('width', 0) metadata['viewport_height'] = viewport.get('height', 0) - logger.info(f"Viewport dimensions: {metadata['viewport_width']}x{metadata['viewport_height']}") except Exception as e: - logger.error(f"Error getting viewport dimensions: {e}") + print(f"Error getting viewport dimensions: {e}") metadata['viewport_width'] = 0 metadata['viewport_height'] = 0 # Extract OCR text from screenshot if available ocr_text = "" if screenshot: - logger.info("Extracting OCR text from screenshot") ocr_text = await self.extract_ocr_text_from_screenshot(screenshot) metadata['ocr_text'] = ocr_text - logger.info(f"OCR text length: {len(ocr_text)} characters") - logger.info(f"=== Completed get_updated_browser_state for {action_name} ===") + print(f"Got updated state after {action_name}: {len(dom_state.selector_map)} elements") return dom_state, screenshot, elements, metadata except Exception as e: - logger.error(f"Error in get_updated_browser_state for {action_name}: {e}") + print(f"Error getting updated state after {action_name}: {e}") traceback.print_exc() # Return empty values in case of error return None, "", "", {} def build_action_result(self, success: bool, message: str, dom_state, screenshot: str, - elements: str, metadata: dict, error: str = "", content: str = None, - fallback_url: str = None) -> BrowserActionResult: + elements: str, metadata: dict, error: str = "", content: str = None, + fallback_url: str = None) -> BrowserActionResult: """Helper method to build a consistent BrowserActionResult""" + # Ensure elements is never None to avoid display issues if elements is None: elements = "" - screenshot_base64 = None - screenshot_url = None - - if isinstance(screenshot, dict) and screenshot.get("is_s3"): - screenshot_url = screenshot.get("url") - else: - screenshot_base64 = screenshot - return BrowserActionResult( success=success, message=message, @@ -858,8 +754,7 @@ class BrowserAutomation: url=dom_state.url if dom_state else fallback_url or "", title=dom_state.title if dom_state else "", elements=elements, - screenshot_base64=screenshot_base64, - screenshot_url=screenshot_url, + screenshot_base64=screenshot, pixels_above=dom_state.pixels_above if dom_state else 0, pixels_below=dom_state.pixels_below if dom_state else 0, content=content, @@ -876,52 +771,10 @@ class BrowserAutomation: """Navigate to a specified URL""" try: page = await self.get_current_page() + await page.goto(action.url, wait_until="domcontentloaded") + await page.wait_for_load_state("networkidle", timeout=10000) - # First check if we're already on the target URL - current_url = page.url - if current_url == action.url: - print(f"Already on target URL: {action.url}") - dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})") - return self.build_action_result( - True, - f"Already on {action.url}", - dom_state, - screenshot, - elements, - metadata, - error="", - content=None - ) - - # Attempt navigation with retries - max_retries = 3 - retry_count = 0 - last_error = None - - while retry_count < max_retries: - try: - print(f"Navigation attempt {retry_count + 1} to {action.url}") - await page.goto(action.url, wait_until="domcontentloaded", timeout=30000) - await page.wait_for_load_state("networkidle", timeout=10000) - - # Verify we actually navigated to the target URL - new_url = page.url - if new_url == "about:blank": - raise Exception("Navigation resulted in blank page") - - print(f"Successfully navigated to {new_url}") - break - except Exception as e: - last_error = e - retry_count += 1 - if retry_count < max_retries: - print(f"Navigation attempt {retry_count} failed: {e}") - await asyncio.sleep(1) # Wait before retry - else: - print(f"All navigation attempts failed: {e}") - raise - - # Get updated state after successful navigation + # Get updated state after action dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})") result = self.build_action_result( @@ -940,7 +793,6 @@ class BrowserAutomation: except Exception as e: print(f"Navigation error: {str(e)}") traceback.print_exc() - # Try to get some state info even after error try: dom_state, screenshot, elements, metadata = await self.get_updated_browser_state("navigate_error_recovery") @@ -955,14 +807,6 @@ class BrowserAutomation: content=None ) except: - # If we can't get state, at least try to get the current URL - current_url = "about:blank" - try: - page = await self.get_current_page() - current_url = page.url - except: - pass - return self.build_action_result( False, str(e), @@ -971,8 +815,7 @@ class BrowserAutomation: "", {}, error=str(e), - content=None, - fallback_url=current_url + content=None ) async def search_google(self, action: SearchGoogleAction = Body(...)): @@ -1407,8 +1250,8 @@ class BrowserAutomation: """Open a new tab with the specified URL""" try: print(f"Attempting to open new tab with URL: {action.url}") - # Create new page in same browser instance - new_page = await self.browser.new_page() + # Create new page in the existing context + new_page = await self.context.new_page() print(f"New page created successfully") # Navigate to the URL diff --git a/backend/utils/config.py b/backend/utils/config.py index 73f1cbd5..7be5875b 100644 --- a/backend/utils/config.py +++ b/backend/utils/config.py @@ -159,7 +159,7 @@ class Configuration: STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY' # Sandbox configuration - SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.2" + SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.4" SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf" @property From 0148c03ec5ff81ac5ac01f4790d65dcf035252e0 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Sun, 18 May 2025 21:25:15 +0200 Subject: [PATCH 06/21] v1 --- backend/agent/tools/sb_browser_tool.py | 2 +- backend/sandbox/README.md | 13 +++++++++++++ backend/utils/config.py | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py index 59602db1..ce9130ec 100644 --- a/backend/agent/tools/sb_browser_tool.py +++ b/backend/agent/tools/sb_browser_tool.py @@ -30,7 +30,7 @@ class SandboxBrowserTool(SandboxToolsBase): await self._ensure_sandbox() # Build the curl command - url = f"http://localhost:8003/api/automation/{endpoint}" + url = f"http://localhost:8002/api/automation/{endpoint}" if method == "GET" and params: query_params = "&".join([f"{k}={v}" for k, v in params.items()]) diff --git a/backend/sandbox/README.md b/backend/sandbox/README.md index f8c0d571..0be85940 100644 --- a/backend/sandbox/README.md +++ b/backend/sandbox/README.md @@ -20,6 +20,7 @@ You can modify the sandbox environment for development or to add new capabilitie ``` cd backend/sandbox/docker docker compose build + docker push kortix/suna:0.1.2 ``` 3. Test your changes locally using docker-compose @@ -30,3 +31,15 @@ To use your custom sandbox image: 1. Change the `image` parameter in `docker-compose.yml` (that defines the image name `kortix/suna:___`) 2. Update the same image name in `backend/sandbox/sandbox.py` in the `create_sandbox` function 3. If using Daytona for deployment, update the image reference there as well + +## Publishing New Versions + +When publishing a new version of the sandbox: + +1. Update the version number in `docker-compose.yml` (e.g., from `0.1.2` to `0.1.3`) +2. Build the new image: `docker compose build` +3. Push the new version: `docker push kortix/suna:0.1.3` +4. Update all references to the image version in: + - `backend/utils/config.py` + - Daytona images + - Any other services using this image \ No newline at end of file diff --git a/backend/utils/config.py b/backend/utils/config.py index 7be5875b..dfd2f545 100644 --- a/backend/utils/config.py +++ b/backend/utils/config.py @@ -159,7 +159,7 @@ class Configuration: STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY' # Sandbox configuration - SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.4" + SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.5" SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf" @property From 56b9dcad8f1c4223293f01990c9ec0cccb81a04c Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Sun, 18 May 2025 21:44:54 +0200 Subject: [PATCH 07/21] wip --- backend/sandbox/docker/Dockerfile | 2 + backend/sandbox/docker/browser_api.py | 191 ++-------------------- backend/sandbox/docker/docker-compose.yml | 1 + 3 files changed, 21 insertions(+), 173 deletions(-) diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile index b8f74a52..45ddb5ef 100644 --- a/backend/sandbox/docker/Dockerfile +++ b/backend/sandbox/docker/Dockerfile @@ -118,6 +118,8 @@ ENV VNC_PASSWORD=vncpassword ENV CHROME_PERSISTENT_SESSION=true ENV RESOLUTION_WIDTH=1024 ENV RESOLUTION_HEIGHT=768 +# Add Chrome flags to prevent multiple tabs/windows +ENV CHROME_FLAGS="--single-process --no-first-run --no-default-browser-check --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-dev-shm-usage --disable-extensions --disable-features=TranslateUI --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkServiceInProcess2 --force-color-profile=srgb --metrics-recording-only --mute-audio --no-sandbox --disable-gpu" # Set up supervisor configuration RUN mkdir -p /var/log/supervisor diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py index 1490bba9..0642fe1f 100644 --- a/backend/sandbox/docker/browser_api.py +++ b/backend/sandbox/docker/browser_api.py @@ -282,8 +282,8 @@ class BrowserAutomation: def __init__(self): self.router = APIRouter() self.browser: Browser = None - self.pages: List[Page] = [] - self.current_page_index: int = 0 + self.context = None + self.page = None # Single page instance self.logger = logging.getLogger("browser_automation") self.include_attributes = ["id", "href", "src", "alt", "aria-label", "placeholder", "name", "role", "title", "value"] self.screenshot_dir = os.path.join(os.getcwd(), "screenshots") @@ -305,11 +305,6 @@ class BrowserAutomation: self.router.post("/automation/input_text")(self.input_text) self.router.post("/automation/send_keys")(self.send_keys) - # Tab management - self.router.post("/automation/switch_tab")(self.switch_tab) - self.router.post("/automation/open_tab")(self.open_tab) - self.router.post("/automation/close_tab")(self.close_tab) - # Content actions self.router.post("/automation/extract_content")(self.extract_content) self.router.post("/automation/save_pdf")(self.save_pdf) @@ -348,12 +343,10 @@ class BrowserAutomation: viewport={'width': 1024, 'height': 768} ) - # Create initial page and navigate to a default page - page = await self.context.new_page() - await page.goto("https://www.google.com", wait_until="domcontentloaded") - self.pages.append(page) - self.current_page_index = 0 - print("Initial page created and navigated to Google") + # Create single page and navigate to a neutral page + self.page = await self.context.new_page() + await self.page.goto("about:blank", wait_until="domcontentloaded") + print("Initial page created and navigated to about:blank") except Exception as browser_error: print(f"Failed to launch browser: {browser_error}") @@ -367,10 +360,8 @@ class BrowserAutomation: self.context = await self.browser.new_context( viewport={'width': 1024, 'height': 768} ) - page = await self.context.new_page() - await page.goto("https://www.google.com", wait_until="domcontentloaded") - self.pages.append(page) - self.current_page_index = 0 + self.page = await self.context.new_page() + await self.page.goto("about:blank", wait_until="domcontentloaded") print("Initial page created with minimal options") print("Browser initialization completed successfully") @@ -385,10 +376,10 @@ class BrowserAutomation: await self.browser.close() async def get_current_page(self) -> Page: - """Get the current active page""" - if not self.pages: - raise HTTPException(status_code=500, detail="No browser pages available") - return self.pages[self.current_page_index] + """Get the current page""" + if not self.page: + raise HTTPException(status_code=500, detail="No browser page available") + return self.page async def get_selector_map(self) -> Dict[int, DOMElementNode]: """Get a map of selectable elements on the page""" @@ -770,9 +761,8 @@ class BrowserAutomation: async def navigate_to(self, action: GoToUrlAction = Body(...)): """Navigate to a specified URL""" try: - page = await self.get_current_page() - await page.goto(action.url, wait_until="domcontentloaded") - await page.wait_for_load_state("networkidle", timeout=10000) + await self.page.goto(action.url, wait_until="domcontentloaded") + await self.page.wait_for_load_state("networkidle", timeout=10000) # Get updated state after action dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})") @@ -821,10 +811,9 @@ class BrowserAutomation: async def search_google(self, action: SearchGoogleAction = Body(...)): """Search Google with the provided query""" try: - page = await self.get_current_page() search_url = f"https://www.google.com/search?q={action.query}" - await page.goto(search_url) - await page.wait_for_load_state() + await self.page.goto(search_url) + await self.page.wait_for_load_state() # Get updated state after action dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"search_google({action.query})") @@ -870,9 +859,8 @@ class BrowserAutomation: async def go_back(self, _: NoParamsAction = Body(...)): """Navigate back in browser history""" try: - page = await self.get_current_page() - await page.go_back() - await page.wait_for_load_state() + await self.page.go_back() + await self.page.wait_for_load_state() # Get updated state after action dom_state, screenshot, elements, metadata = await self.get_updated_browser_state("go_back") @@ -1201,149 +1189,6 @@ class BrowserAutomation: content=None ) - # Tab Management Actions - - async def switch_tab(self, action: SwitchTabAction = Body(...)): - """Switch to a different tab by index""" - try: - if 0 <= action.page_id < len(self.pages): - self.current_page_index = action.page_id - page = await self.get_current_page() - await page.wait_for_load_state() - - # Get updated state after action - dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"switch_tab({action.page_id})") - - return self.build_action_result( - True, - f"Switched to tab {action.page_id}", - dom_state, - screenshot, - elements, - metadata, - error="", - content=None - ) - else: - return self.build_action_result( - False, - f"Tab {action.page_id} not found", - None, - "", - "", - {}, - error=f"Tab {action.page_id} not found" - ) - except Exception as e: - return self.build_action_result( - False, - str(e), - None, - "", - "", - {}, - error=str(e), - content=None - ) - - async def open_tab(self, action: OpenTabAction = Body(...)): - """Open a new tab with the specified URL""" - try: - print(f"Attempting to open new tab with URL: {action.url}") - # Create new page in the existing context - new_page = await self.context.new_page() - print(f"New page created successfully") - - # Navigate to the URL - await new_page.goto(action.url, wait_until="domcontentloaded") - await new_page.wait_for_load_state("networkidle", timeout=10000) - print(f"Navigated to URL in new tab: {action.url}") - - # Add to page list and make it current - self.pages.append(new_page) - self.current_page_index = len(self.pages) - 1 - print(f"New tab added as index {self.current_page_index}") - - # Get updated state after action - dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"open_tab({action.url})") - - return self.build_action_result( - True, - f"Opened new tab with URL: {action.url}", - dom_state, - screenshot, - elements, - metadata, - error="", - content=None - ) - except Exception as e: - print("****"*10) - print(f"Error opening tab: {e}") - print(traceback.format_exc()) - print("****"*10) - return self.build_action_result( - False, - str(e), - None, - "", - "", - {}, - error=str(e), - content=None - ) - - async def close_tab(self, action: CloseTabAction = Body(...)): - """Close a tab by index""" - try: - if 0 <= action.page_id < len(self.pages): - page = self.pages[action.page_id] - url = page.url - await page.close() - self.pages.pop(action.page_id) - - # Adjust current index if needed - if self.current_page_index >= len(self.pages): - self.current_page_index = max(0, len(self.pages) - 1) - elif self.current_page_index >= action.page_id: - self.current_page_index = max(0, self.current_page_index - 1) - - # Get updated state after action - page = await self.get_current_page() - dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"close_tab({action.page_id})") - - return self.build_action_result( - True, - f"Closed tab {action.page_id} with URL: {url}", - dom_state, - screenshot, - elements, - metadata, - error="", - content=None - ) - else: - return self.build_action_result( - False, - f"Tab {action.page_id} not found", - None, - "", - "", - {}, - error=f"Tab {action.page_id} not found" - ) - except Exception as e: - return self.build_action_result( - False, - str(e), - None, - "", - "", - {}, - error=str(e), - content=None - ) - # Content Actions async def extract_content(self, goal: str = Body(...)): diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml index 27432984..48d2363b 100644 --- a/backend/sandbox/docker/docker-compose.yml +++ b/backend/sandbox/docker/docker-compose.yml @@ -27,6 +27,7 @@ services: - VNC_PASSWORD=${VNC_PASSWORD:-vncpassword} - CHROME_DEBUGGING_PORT=9222 - CHROME_DEBUGGING_HOST=localhost + - CHROME_FLAGS=${CHROME_FLAGS:-"--single-process --no-first-run --no-default-browser-check --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-dev-shm-usage --disable-extensions --disable-features=TranslateUI --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkServiceInProcess2 --force-color-profile=srgb --metrics-recording-only --mute-audio --no-sandbox --disable-gpu"} volumes: - /tmp/.X11-unix:/tmp/.X11-unix restart: unless-stopped From 952a2dd3bfe68ed6624b27dd8f8a7bbed0a8d330 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Sun, 18 May 2025 21:59:48 +0200 Subject: [PATCH 08/21] kortix/suna:0.1.2.7 working --- backend/sandbox/docker/docker-compose.yml | 2 +- backend/utils/config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml index 48d2363b..ff843624 100644 --- a/backend/sandbox/docker/docker-compose.yml +++ b/backend/sandbox/docker/docker-compose.yml @@ -6,7 +6,7 @@ services: dockerfile: ${DOCKERFILE:-Dockerfile} args: TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64} - image: kortix/suna:0.1.2 + image: kortix/suna:0.1.2.7 ports: - "6080:6080" # noVNC web interface - "5901:5901" # VNC port diff --git a/backend/utils/config.py b/backend/utils/config.py index dfd2f545..c1392d07 100644 --- a/backend/utils/config.py +++ b/backend/utils/config.py @@ -159,7 +159,7 @@ class Configuration: STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY' # Sandbox configuration - SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.5" + SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.6" SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf" @property From 709b4595ccd24084a73bc473475637911f4838c1 Mon Sep 17 00:00:00 2001 From: Soumyadas15 Date: Mon, 19 May 2025 01:44:45 +0530 Subject: [PATCH 09/21] chore(dev): fix redundant billing checks --- frontend/src/contexts/BillingContext.tsx | 80 +++++++++++++++++++ .../react-query/threads/use-billing-status.ts | 15 +++- 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 frontend/src/contexts/BillingContext.tsx diff --git a/frontend/src/contexts/BillingContext.tsx b/frontend/src/contexts/BillingContext.tsx new file mode 100644 index 00000000..f54fc478 --- /dev/null +++ b/frontend/src/contexts/BillingContext.tsx @@ -0,0 +1,80 @@ +'use client'; + +import React, { createContext, useContext, useCallback, useEffect, useRef } from 'react'; +import { useBillingStatusQuery } from '@/hooks/react-query/threads/use-billing-status'; +import { BillingStatusResponse } from '@/lib/api'; +import { isLocalMode } from '@/lib/config'; + +interface BillingContextType { + billingStatus: BillingStatusResponse | null; + isLoading: boolean; + error: Error | null; + checkBillingStatus: () => Promise; + lastCheckTime: number | null; +} + +const BillingContext = createContext(null); + +export function BillingProvider({ children }: { children: React.ReactNode }) { + const billingStatusQuery = useBillingStatusQuery(); + const lastCheckRef = useRef(null); + const checkInProgressRef = useRef(false); + + const checkBillingStatus = useCallback(async (force = false): Promise => { + if (isLocalMode()) { + console.log('Running in local development mode - billing checks are disabled'); + return false; + } + + if (checkInProgressRef.current) { + return !billingStatusQuery.data?.can_run; + } + + const now = Date.now(); + if (!force && lastCheckRef.current && now - lastCheckRef.current < 60000) { + return !billingStatusQuery.data?.can_run; + } + + try { + checkInProgressRef.current = true; + if (force || billingStatusQuery.isStale) { + await billingStatusQuery.refetch(); + } + lastCheckRef.current = now; + return !billingStatusQuery.data?.can_run; + } catch (err) { + console.error('Error checking billing status:', err); + return false; + } finally { + checkInProgressRef.current = false; + } + }, [billingStatusQuery]); + + useEffect(() => { + if (!billingStatusQuery.data) { + checkBillingStatus(true); + } + }, [checkBillingStatus, billingStatusQuery.data]); + + const value = { + billingStatus: billingStatusQuery.data || null, + isLoading: billingStatusQuery.isLoading, + error: billingStatusQuery.error, + checkBillingStatus, + lastCheckTime: lastCheckRef.current, + }; + + return ( + + {children} + + ); +} + +export function useBilling() { + const context = useContext(BillingContext); + if (!context) { + throw new Error('useBilling must be used within a BillingProvider'); + } + return context; +} \ No newline at end of file diff --git a/frontend/src/hooks/react-query/threads/use-billing-status.ts b/frontend/src/hooks/react-query/threads/use-billing-status.ts index a310784b..de43b3ea 100644 --- a/frontend/src/hooks/react-query/threads/use-billing-status.ts +++ b/frontend/src/hooks/react-query/threads/use-billing-status.ts @@ -1,6 +1,7 @@ import { createQueryHook } from "@/hooks/use-query"; import { threadKeys } from "./keys"; -import { checkBillingStatus } from "@/lib/api"; +import { checkBillingStatus, BillingStatusResponse } from "@/lib/api"; +import { Query } from "@tanstack/react-query"; export const useBillingStatusQuery = (enabled = true) => createQueryHook( @@ -10,5 +11,17 @@ export const useBillingStatusQuery = (enabled = true) => enabled, retry: 1, staleTime: 1000 * 60 * 5, + gcTime: 1000 * 60 * 10, // 10 minutes (using gcTime instead of cacheTime) + refetchOnWindowFocus: false, // Disable refetch on window focus + refetchOnMount: false, // Disable refetch on component mount + refetchOnReconnect: false, // Disable refetch on reconnect + // Only refetch if the data is stale and the query is enabled + refetchInterval: (query: Query) => { + // If we have data and it indicates the user can't run, check more frequently + if (query.state.data && !query.state.data.can_run) { + return 1000 * 60; // Check every minute if user can't run + } + return false; // Don't refetch automatically otherwise + }, } )(); From 70755d30745ff0c4e5828f287755657013620f15 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Mon, 19 May 2025 01:06:44 +0200 Subject: [PATCH 10/21] save all browser states --- backend/agent/run.py | 1 - backend/agent/tools/sb_browser_tool.py | 7 +--- backend/agentpress/response_processor.py | 8 ++-- .../thread/tool-views/BrowserToolView.tsx | 37 ++++++++++++------- 4 files changed, 29 insertions(+), 24 deletions(-) diff --git a/backend/agent/run.py b/backend/agent/run.py index 3301e498..59fd5e01 100644 --- a/backend/agent/run.py +++ b/backend/agent/run.py @@ -154,7 +154,6 @@ async def run_agent( else: logger.warning("Browser state found but no screenshot data.") - await client.table('messages').delete().eq('message_id', latest_browser_state_msg.data[0]["message_id"]).execute() except Exception as e: logger.error(f"Error parsing browser state: {e}") diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py index ce9130ec..eaeeac6a 100644 --- a/backend/agent/tools/sb_browser_tool.py +++ b/backend/agent/tools/sb_browser_tool.py @@ -59,7 +59,6 @@ class SandboxBrowserTool(SandboxToolsBase): logger.info("Browser automation request completed successfully") - # Add full result to thread messages for state tracking added_message = await self.thread_manager.add_message( thread_id=self.thread_id, type="browser_state", @@ -67,17 +66,13 @@ class SandboxBrowserTool(SandboxToolsBase): is_llm_message=False ) - # Return tool-specific success response success_response = { "success": True, "message": result.get("message", "Browser action completed successfully") } - # Add message ID if available if added_message and 'message_id' in added_message: success_response['message_id'] = added_message['message_id'] - - # Add relevant browser-specific info if result.get("url"): success_response["url"] = result["url"] if result.get("title"): @@ -86,7 +81,6 @@ class SandboxBrowserTool(SandboxToolsBase): success_response["elements_found"] = result["element_count"] if result.get("pixels_below"): success_response["scrollable_content"] = result["pixels_below"] > 0 - # Add OCR text when available if result.get("ocr_text"): success_response["ocr_text"] = result["ocr_text"] @@ -104,6 +98,7 @@ class SandboxBrowserTool(SandboxToolsBase): logger.debug(traceback.format_exc()) return self.fail_response(f"Error executing browser action: {e}") + @openapi_schema({ "type": "function", "function": { diff --git a/backend/agentpress/response_processor.py b/backend/agentpress/response_processor.py index ea6e028a..58cdaf83 100644 --- a/backend/agentpress/response_processor.py +++ b/backend/agentpress/response_processor.py @@ -978,7 +978,7 @@ class ResponseProcessor: if value is not None: params[mapping.param_name] = value parsing_details["attributes"][mapping.param_name] = value # Store raw attribute - logger.info(f"Found attribute {mapping.param_name}: {value}") + # logger.info(f"Found attribute {mapping.param_name}: {value}") elif mapping.node_type == "element": # Extract element content @@ -986,7 +986,7 @@ class ResponseProcessor: if content is not None: params[mapping.param_name] = content.strip() parsing_details["elements"][mapping.param_name] = content.strip() # Store raw element content - logger.info(f"Found element {mapping.param_name}: {content.strip()}") + # logger.info(f"Found element {mapping.param_name}: {content.strip()}") elif mapping.node_type == "text": # Extract text content @@ -994,7 +994,7 @@ class ResponseProcessor: if content is not None: params[mapping.param_name] = content.strip() parsing_details["text_content"] = content.strip() # Store raw text content - logger.info(f"Found text content for {mapping.param_name}: {content.strip()}") + # logger.info(f"Found text content for {mapping.param_name}: {content.strip()}") elif mapping.node_type == "content": # Extract root content @@ -1002,7 +1002,7 @@ class ResponseProcessor: if content is not None: params[mapping.param_name] = content.strip() parsing_details["root_content"] = content.strip() # Store raw root content - logger.info(f"Found root content for {mapping.param_name}") + # logger.info(f"Found root content for {mapping.param_name}") except Exception as e: logger.error(f"Error processing mapping {mapping}: {e}") diff --git a/frontend/src/components/thread/tool-views/BrowserToolView.tsx b/frontend/src/components/thread/tool-views/BrowserToolView.tsx index 2fcb45e9..d43453aa 100644 --- a/frontend/src/components/thread/tool-views/BrowserToolView.tsx +++ b/frontend/src/components/thread/tool-views/BrowserToolView.tsx @@ -72,20 +72,31 @@ export function BrowserToolView({ // Find the browser_state message and extract the screenshot let screenshotBase64: string | null = null; - if (browserStateMessageId && messages.length > 0) { - const browserStateMessage = messages.find( - (msg) => - (msg.type as string) === 'browser_state' && - msg.message_id === browserStateMessageId, - ); + let latestBrowserState: any = null; + let latestTimestamp = 0; - if (browserStateMessage) { - const browserStateContent = safeJsonParse<{ screenshot_base64?: string }>( - browserStateMessage.content, - {}, - ); - console.log('Browser state content: ', browserStateContent) - screenshotBase64 = browserStateContent?.screenshot_base64 || null; + if (messages.length > 0) { + // Find the latest browser_state message by comparing timestamps + messages.forEach((msg) => { + if ((msg.type as string) === 'browser_state') { + try { + const content = safeJsonParse<{ timestamp?: number }>(msg.content, {}); + const timestamp = content?.timestamp || 0; + + if (timestamp > latestTimestamp) { + latestTimestamp = timestamp; + latestBrowserState = content; + } + } catch (error) { + console.error('[BrowserToolView] Error parsing browser state:', error); + } + } + }); + + // Use the latest browser state + if (latestBrowserState) { + screenshotBase64 = latestBrowserState.screenshot_base64 || null; + console.log('Latest browser state:', latestBrowserState); } } From f22412b963b651c69a5e24603b26c133f38818c1 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Mon, 19 May 2025 01:28:14 +0200 Subject: [PATCH 11/21] s3 url save instead of base64 --- backend/agent/tools/sb_browser_tool.py | 14 +++++++ backend/services/supabase.py | 44 ++++++++++++++++++++++ backend/utils/s3_upload_utils.py | 51 ++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 backend/utils/s3_upload_utils.py diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py index eaeeac6a..844b821b 100644 --- a/backend/agent/tools/sb_browser_tool.py +++ b/backend/agent/tools/sb_browser_tool.py @@ -5,6 +5,7 @@ from agentpress.tool import ToolResult, openapi_schema, xml_schema from agentpress.thread_manager import ThreadManager from sandbox.tool_base import SandboxToolsBase from utils.logger import logger +from utils.s3_upload_utils import upload_base64_image class SandboxBrowserTool(SandboxToolsBase): @@ -59,6 +60,17 @@ class SandboxBrowserTool(SandboxToolsBase): logger.info("Browser automation request completed successfully") + if "screenshot_base64" in result: + try: + image_url = await upload_base64_image(result["screenshot_base64"]) + result["image_url"] = image_url + # Remove base64 data from result to keep it clean + del result["screenshot_base64"] + logger.debug(f"Uploaded screenshot to {image_url}") + except Exception as e: + logger.error(f"Failed to upload screenshot: {e}") + result["image_upload_error"] = str(e) + added_message = await self.thread_manager.add_message( thread_id=self.thread_id, type="browser_state", @@ -83,6 +95,8 @@ class SandboxBrowserTool(SandboxToolsBase): success_response["scrollable_content"] = result["pixels_below"] > 0 if result.get("ocr_text"): success_response["ocr_text"] = result["ocr_text"] + if result.get("image_url"): + success_response["image_url"] = result["image_url"] return self.success_response(success_response) diff --git a/backend/services/supabase.py b/backend/services/supabase.py index 0bb1419a..0a3f8558 100644 --- a/backend/services/supabase.py +++ b/backend/services/supabase.py @@ -6,6 +6,9 @@ from typing import Optional from supabase import create_async_client, AsyncClient from utils.logger import logger from utils.config import config +import base64 +import uuid +from datetime import datetime class DBConnection: """Singleton database connection manager using Supabase.""" @@ -66,4 +69,45 @@ class DBConnection: raise RuntimeError("Database not initialized") return self._client + async def upload_base64_image(self, base64_data: str, bucket_name: str = "browser-screenshots") -> str: + """Upload a base64 encoded image to Supabase storage and return the URL. + + Args: + base64_data (str): Base64 encoded image data (with or without data URL prefix) + bucket_name (str): Name of the storage bucket to upload to + + Returns: + str: Public URL of the uploaded image + """ + try: + # Remove data URL prefix if present + if base64_data.startswith('data:'): + base64_data = base64_data.split(',')[1] + + # Decode base64 data + image_data = base64.b64decode(base64_data) + + # Generate unique filename + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + unique_id = str(uuid.uuid4())[:8] + filename = f"image_{timestamp}_{unique_id}.png" + + # Upload to Supabase storage + client = await self.client + storage_response = await client.storage.from_(bucket_name).upload( + filename, + image_data, + {"content-type": "image/png"} + ) + + # Get public URL + public_url = await client.storage.from_(bucket_name).get_public_url(filename) + + logger.debug(f"Successfully uploaded image to {public_url}") + return public_url + + except Exception as e: + logger.error(f"Error uploading base64 image: {e}") + raise RuntimeError(f"Failed to upload image: {str(e)}") + diff --git a/backend/utils/s3_upload_utils.py b/backend/utils/s3_upload_utils.py new file mode 100644 index 00000000..65722640 --- /dev/null +++ b/backend/utils/s3_upload_utils.py @@ -0,0 +1,51 @@ +""" +Utility functions for handling image operations. +""" + +import base64 +import uuid +from datetime import datetime +from utils.logger import logger +from services.supabase import DBConnection + +async def upload_base64_image(base64_data: str, bucket_name: str = "browser-screenshots") -> str: + """Upload a base64 encoded image to Supabase storage and return the URL. + + Args: + base64_data (str): Base64 encoded image data (with or without data URL prefix) + bucket_name (str): Name of the storage bucket to upload to + + Returns: + str: Public URL of the uploaded image + """ + try: + # Remove data URL prefix if present + if base64_data.startswith('data:'): + base64_data = base64_data.split(',')[1] + + # Decode base64 data + image_data = base64.b64decode(base64_data) + + # Generate unique filename + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + unique_id = str(uuid.uuid4())[:8] + filename = f"image_{timestamp}_{unique_id}.png" + + # Upload to Supabase storage + db = DBConnection() + client = await db.client + storage_response = await client.storage.from_(bucket_name).upload( + filename, + image_data, + {"content-type": "image/png"} + ) + + # Get public URL + public_url = await client.storage.from_(bucket_name).get_public_url(filename) + + logger.debug(f"Successfully uploaded image to {public_url}") + return public_url + + except Exception as e: + logger.error(f"Error uploading base64 image: {e}") + raise RuntimeError(f"Failed to upload image: {str(e)}") \ No newline at end of file From dd3f04c4a553f72ec514f11b88dc1e8361b0493c Mon Sep 17 00:00:00 2001 From: sharath <29162020+tnfssc@users.noreply.github.com> Date: Sun, 18 May 2025 23:46:08 +0000 Subject: [PATCH 12/21] fix(redis): service crash on redis client maxxing --- backend/docker-compose.yml | 3 ++- backend/services/docker/redis.conf | 1 + docker-compose.yaml | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 backend/services/docker/redis.conf diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index f5f91540..eb46c224 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -133,10 +133,11 @@ services: - "127.0.0.1:6379:6379" volumes: - redis_data:/data + - ./services/docker/redis.conf:/usr/local/etc/redis/redis.conf:ro restart: unless-stopped networks: - app-network - command: redis-server --appendonly yes --bind 0.0.0.0 --protected-mode no --maxmemory 8gb --maxmemory-policy allkeys-lru + command: redis-server /usr/local/etc/redis/redis.conf --appendonly yes --bind 0.0.0.0 --protected-mode no --maxmemory 8gb --maxmemory-policy allkeys-lru healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 10s diff --git a/backend/services/docker/redis.conf b/backend/services/docker/redis.conf new file mode 100644 index 00000000..b8b41800 --- /dev/null +++ b/backend/services/docker/redis.conf @@ -0,0 +1 @@ +timeout 120 diff --git a/docker-compose.yaml b/docker-compose.yaml index 61950eb1..4ace92bf 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -3,7 +3,8 @@ services: image: redis:7-alpine volumes: - redis_data:/data - command: redis-server --save 60 1 --loglevel warning + - ./backend/services/docker/redis.conf:/usr/local/etc/redis/redis.conf:ro + command: redis-server /usr/local/etc/redis/redis.conf --save 60 1 --loglevel warning healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 10s From f2e7b27e0287385788b87828c246aa9a072d2d7d Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Mon, 19 May 2025 02:00:14 +0200 Subject: [PATCH 13/21] multi tab --- backend/sandbox/docker/browser_api.py | 218 ++++++++++++++++++++++---- 1 file changed, 185 insertions(+), 33 deletions(-) diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py index 0642fe1f..c2fc5186 100644 --- a/backend/sandbox/docker/browser_api.py +++ b/backend/sandbox/docker/browser_api.py @@ -282,8 +282,8 @@ class BrowserAutomation: def __init__(self): self.router = APIRouter() self.browser: Browser = None - self.context = None - self.page = None # Single page instance + self.pages: List[Page] = [] + self.current_page_index: int = 0 self.logger = logging.getLogger("browser_automation") self.include_attributes = ["id", "href", "src", "alt", "aria-label", "placeholder", "name", "role", "title", "value"] self.screenshot_dir = os.path.join(os.getcwd(), "screenshots") @@ -305,6 +305,11 @@ class BrowserAutomation: self.router.post("/automation/input_text")(self.input_text) self.router.post("/automation/send_keys")(self.send_keys) + # Tab management + self.router.post("/automation/switch_tab")(self.switch_tab) + self.router.post("/automation/open_tab")(self.open_tab) + self.router.post("/automation/close_tab")(self.close_tab) + # Content actions self.router.post("/automation/extract_content")(self.extract_content) self.router.post("/automation/save_pdf")(self.save_pdf) @@ -337,17 +342,6 @@ class BrowserAutomation: try: self.browser = await playwright.chromium.launch(**launch_options) print("Browser launched successfully") - - # Create a single context with viewport settings - self.context = await self.browser.new_context( - viewport={'width': 1024, 'height': 768} - ) - - # Create single page and navigate to a neutral page - self.page = await self.context.new_page() - await self.page.goto("about:blank", wait_until="domcontentloaded") - print("Initial page created and navigated to about:blank") - except Exception as browser_error: print(f"Failed to launch browser: {browser_error}") # Try with minimal options @@ -355,16 +349,22 @@ class BrowserAutomation: launch_options = {"timeout": 90000} self.browser = await playwright.chromium.launch(**launch_options) print("Browser launched with minimal options") + + try: + await self.get_current_page() + print("Found existing page, using it") + self.current_page_index = 0 + except Exception as page_error: + print(f"Error finding existing page, creating new one. ( {page_error})") + page = await self.browser.new_page(viewport={'width': 1024, 'height': 768}) + print("New page created successfully") + self.pages.append(page) + self.current_page_index = 0 + # Navigate directly to google.com instead of about:blank + await page.goto("https://www.google.com", wait_until="domcontentloaded", timeout=30000) + print("Navigated to google.com") - # Create context and initial page even with minimal options - self.context = await self.browser.new_context( - viewport={'width': 1024, 'height': 768} - ) - self.page = await self.context.new_page() - await self.page.goto("about:blank", wait_until="domcontentloaded") - print("Initial page created with minimal options") - - print("Browser initialization completed successfully") + print("Browser initialization completed successfully") except Exception as e: print(f"Browser startup error: {str(e)}") traceback.print_exc() @@ -376,10 +376,10 @@ class BrowserAutomation: await self.browser.close() async def get_current_page(self) -> Page: - """Get the current page""" - if not self.page: - raise HTTPException(status_code=500, detail="No browser page available") - return self.page + """Get the current active page""" + if not self.pages: + raise HTTPException(status_code=500, detail="No browser pages available") + return self.pages[self.current_page_index] async def get_selector_map(self) -> Dict[int, DOMElementNode]: """Get a map of selectable elements on the page""" @@ -599,10 +599,16 @@ class BrowserAutomation: is_top_element=True ) dummy_map = {1: dummy_root} + current_url = "unknown" + try: + if 'page' in locals(): + current_url = page.url + except: + pass return DOMState( element_tree=dummy_root, selector_map=dummy_map, - url=page.url if 'page' in locals() else "about:blank", + url=current_url, title="Error page", pixels_above=0, pixels_below=0 @@ -761,8 +767,9 @@ class BrowserAutomation: async def navigate_to(self, action: GoToUrlAction = Body(...)): """Navigate to a specified URL""" try: - await self.page.goto(action.url, wait_until="domcontentloaded") - await self.page.wait_for_load_state("networkidle", timeout=10000) + page = await self.get_current_page() + await page.goto(action.url, wait_until="domcontentloaded") + await page.wait_for_load_state("networkidle", timeout=10000) # Get updated state after action dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})") @@ -811,9 +818,10 @@ class BrowserAutomation: async def search_google(self, action: SearchGoogleAction = Body(...)): """Search Google with the provided query""" try: + page = await self.get_current_page() search_url = f"https://www.google.com/search?q={action.query}" - await self.page.goto(search_url) - await self.page.wait_for_load_state() + await page.goto(search_url) + await page.wait_for_load_state() # Get updated state after action dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"search_google({action.query})") @@ -859,8 +867,9 @@ class BrowserAutomation: async def go_back(self, _: NoParamsAction = Body(...)): """Navigate back in browser history""" try: - await self.page.go_back() - await self.page.wait_for_load_state() + page = await self.get_current_page() + await page.go_back() + await page.wait_for_load_state() # Get updated state after action dom_state, screenshot, elements, metadata = await self.get_updated_browser_state("go_back") @@ -1189,6 +1198,149 @@ class BrowserAutomation: content=None ) + # Tab Management Actions + + async def switch_tab(self, action: SwitchTabAction = Body(...)): + """Switch to a different tab by index""" + try: + if 0 <= action.page_id < len(self.pages): + self.current_page_index = action.page_id + page = await self.get_current_page() + await page.wait_for_load_state() + + # Get updated state after action + dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"switch_tab({action.page_id})") + + return self.build_action_result( + True, + f"Switched to tab {action.page_id}", + dom_state, + screenshot, + elements, + metadata, + error="", + content=None + ) + else: + return self.build_action_result( + False, + f"Tab {action.page_id} not found", + None, + "", + "", + {}, + error=f"Tab {action.page_id} not found" + ) + except Exception as e: + return self.build_action_result( + False, + str(e), + None, + "", + "", + {}, + error=str(e), + content=None + ) + + async def open_tab(self, action: OpenTabAction = Body(...)): + """Open a new tab with the specified URL""" + try: + print(f"Attempting to open new tab with URL: {action.url}") + # Create new page in same browser instance + new_page = await self.browser.new_page() + print(f"New page created successfully") + + # Navigate to the URL + await new_page.goto(action.url, wait_until="domcontentloaded") + await new_page.wait_for_load_state("networkidle", timeout=10000) + print(f"Navigated to URL in new tab: {action.url}") + + # Add to page list and make it current + self.pages.append(new_page) + self.current_page_index = len(self.pages) - 1 + print(f"New tab added as index {self.current_page_index}") + + # Get updated state after action + dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"open_tab({action.url})") + + return self.build_action_result( + True, + f"Opened new tab with URL: {action.url}", + dom_state, + screenshot, + elements, + metadata, + error="", + content=None + ) + except Exception as e: + print("****"*10) + print(f"Error opening tab: {e}") + print(traceback.format_exc()) + print("****"*10) + return self.build_action_result( + False, + str(e), + None, + "", + "", + {}, + error=str(e), + content=None + ) + + async def close_tab(self, action: CloseTabAction = Body(...)): + """Close a tab by index""" + try: + if 0 <= action.page_id < len(self.pages): + page = self.pages[action.page_id] + url = page.url + await page.close() + self.pages.pop(action.page_id) + + # Adjust current index if needed + if self.current_page_index >= len(self.pages): + self.current_page_index = max(0, len(self.pages) - 1) + elif self.current_page_index >= action.page_id: + self.current_page_index = max(0, self.current_page_index - 1) + + # Get updated state after action + page = await self.get_current_page() + dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"close_tab({action.page_id})") + + return self.build_action_result( + True, + f"Closed tab {action.page_id} with URL: {url}", + dom_state, + screenshot, + elements, + metadata, + error="", + content=None + ) + else: + return self.build_action_result( + False, + f"Tab {action.page_id} not found", + None, + "", + "", + {}, + error=f"Tab {action.page_id} not found" + ) + except Exception as e: + return self.build_action_result( + False, + str(e), + None, + "", + "", + {}, + error=str(e), + content=None + ) + # Content Actions async def extract_content(self, goal: str = Body(...)): From c8826d520505994e5acf7255705899c002673b03 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Mon, 19 May 2025 02:14:45 +0200 Subject: [PATCH 14/21] fix: expose RabbitMQ ports for local development --- docker-compose.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index 61950eb1..46319c70 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -12,6 +12,9 @@ services: rabbitmq: image: rabbitmq + ports: + - "5672:5672" + - "15672:15672" volumes: - rabbitmq_data:/var/lib/rabbitmq restart: unless-stopped From 75372a94c038e33e08124bad84922b53043cc5f4 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Mon, 19 May 2025 02:43:05 +0200 Subject: [PATCH 15/21] browser api fixes --- backend/agent/tools/sb_browser_tool.py | 2 +- backend/sandbox/docker/Dockerfile | 3 +++ backend/sandbox/docker/browser_api.py | 23 +++++++++++++++++++++-- backend/utils/config.py | 2 +- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py index 844b821b..d297c3d5 100644 --- a/backend/agent/tools/sb_browser_tool.py +++ b/backend/agent/tools/sb_browser_tool.py @@ -31,7 +31,7 @@ class SandboxBrowserTool(SandboxToolsBase): await self._ensure_sandbox() # Build the curl command - url = f"http://localhost:8002/api/automation/{endpoint}" + url = f"http://localhost:8003/api/automation/{endpoint}" if method == "GET" and params: query_params = "&".join([f"{k}={v}" for k, v in params.items()]) diff --git a/backend/sandbox/docker/Dockerfile b/backend/sandbox/docker/Dockerfile index 45ddb5ef..5608e335 100644 --- a/backend/sandbox/docker/Dockerfile +++ b/backend/sandbox/docker/Dockerfile @@ -68,6 +68,9 @@ RUN apt-get update && apt-get install -y \ iputils-ping \ dnsutils \ sudo \ + # OCR Tools + tesseract-ocr \ + tesseract-ocr-eng \ && rm -rf /var/lib/apt/lists/* # Install Node.js and npm diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py index c2fc5186..7fb5baf5 100644 --- a/backend/sandbox/docker/browser_api.py +++ b/backend/sandbox/docker/browser_api.py @@ -618,10 +618,29 @@ class BrowserAutomation: """Take a screenshot and return as base64 encoded string""" try: page = await self.get_current_page() - screenshot_bytes = await page.screenshot(type='jpeg', quality=60, full_page=False) + + # Wait for network to be idle and DOM to be stable + try: + await page.wait_for_load_state("networkidle", timeout=60000) # Increased timeout to 60s + except Exception as e: + print(f"Warning: Network idle timeout, proceeding anyway: {e}") + + # Wait for any animations to complete + # await page.wait_for_timeout(1000) # Wait 1 second for animations + + # Take screenshot with increased timeout and better options + screenshot_bytes = await page.screenshot( + type='jpeg', + quality=60, + full_page=False, + timeout=60000, # Increased timeout to 60s + scale='device' # Use device scale factor + ) + return base64.b64encode(screenshot_bytes).decode('utf-8') except Exception as e: print(f"Error taking screenshot: {e}") + traceback.print_exc() # Return an empty string rather than failing return "" @@ -2065,4 +2084,4 @@ if __name__ == '__main__': asyncio.run(test_browser_api_2()) else: print("Starting API server") - uvicorn.run("browser_api:api_app", host="0.0.0.0", port=8002) \ No newline at end of file + uvicorn.run("browser_api:api_app", host="0.0.0.0", port=8003) \ No newline at end of file diff --git a/backend/utils/config.py b/backend/utils/config.py index c1392d07..ea683267 100644 --- a/backend/utils/config.py +++ b/backend/utils/config.py @@ -159,7 +159,7 @@ class Configuration: STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY' # Sandbox configuration - SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.6" + SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.7" SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf" @property From d15986b63e6c2346bc067ec5f0d378fe08403ac9 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Mon, 19 May 2025 03:39:02 +0200 Subject: [PATCH 16/21] wip --- backend/sandbox/docker/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/sandbox/docker/docker-compose.yml b/backend/sandbox/docker/docker-compose.yml index ff843624..08f0969d 100644 --- a/backend/sandbox/docker/docker-compose.yml +++ b/backend/sandbox/docker/docker-compose.yml @@ -6,7 +6,7 @@ services: dockerfile: ${DOCKERFILE:-Dockerfile} args: TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64} - image: kortix/suna:0.1.2.7 + image: kortix/suna:0.1.2.8 ports: - "6080:6080" # noVNC web interface - "5901:5901" # VNC port From 74320d66406d78b55f30f56e2623dcac8e479566 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Mon, 19 May 2025 03:54:33 +0200 Subject: [PATCH 17/21] kortix/suna:0.1.2.8 sandbox bump --- backend/utils/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/utils/config.py b/backend/utils/config.py index ea683267..085cf041 100644 --- a/backend/utils/config.py +++ b/backend/utils/config.py @@ -159,7 +159,7 @@ class Configuration: STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY' # Sandbox configuration - SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.7" + SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.8" SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf" @property From c1615b48ae7cba432170f125d56a6697253ecf27 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Mon, 19 May 2025 04:24:21 +0200 Subject: [PATCH 18/21] fe change rev --- .../thread/tool-views/BrowserToolView.tsx | 37 +++++++------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/frontend/src/components/thread/tool-views/BrowserToolView.tsx b/frontend/src/components/thread/tool-views/BrowserToolView.tsx index d43453aa..2fcb45e9 100644 --- a/frontend/src/components/thread/tool-views/BrowserToolView.tsx +++ b/frontend/src/components/thread/tool-views/BrowserToolView.tsx @@ -72,31 +72,20 @@ export function BrowserToolView({ // Find the browser_state message and extract the screenshot let screenshotBase64: string | null = null; - let latestBrowserState: any = null; - let latestTimestamp = 0; + if (browserStateMessageId && messages.length > 0) { + const browserStateMessage = messages.find( + (msg) => + (msg.type as string) === 'browser_state' && + msg.message_id === browserStateMessageId, + ); - if (messages.length > 0) { - // Find the latest browser_state message by comparing timestamps - messages.forEach((msg) => { - if ((msg.type as string) === 'browser_state') { - try { - const content = safeJsonParse<{ timestamp?: number }>(msg.content, {}); - const timestamp = content?.timestamp || 0; - - if (timestamp > latestTimestamp) { - latestTimestamp = timestamp; - latestBrowserState = content; - } - } catch (error) { - console.error('[BrowserToolView] Error parsing browser state:', error); - } - } - }); - - // Use the latest browser state - if (latestBrowserState) { - screenshotBase64 = latestBrowserState.screenshot_base64 || null; - console.log('Latest browser state:', latestBrowserState); + if (browserStateMessage) { + const browserStateContent = safeJsonParse<{ screenshot_base64?: string }>( + browserStateMessage.content, + {}, + ); + console.log('Browser state content: ', browserStateContent) + screenshotBase64 = browserStateContent?.screenshot_base64 || null; } } From cea53931f74ef7d2b33f7f2104c1b969cd49a649 Mon Sep 17 00:00:00 2001 From: Soumyadas15 Date: Mon, 19 May 2025 10:43:53 +0530 Subject: [PATCH 19/21] chore(dev): second attempt to fix billing checks --- .../(dashboard)/agents/[threadId]/page.tsx | 43 +++++++++---------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx b/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx index 336af2a9..e7ead8b5 100644 --- a/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx +++ b/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx @@ -1013,38 +1013,35 @@ export default function ThreadPage({ } }, [project?.account_id, billingStatusQuery]); - // Check billing when agent status changes useEffect(() => { - const previousStatus = previousAgentStatus.current; + let timeoutId: NodeJS.Timeout; + const shouldCheckBilling = + project?.account_id && + (initialLoadCompleted.current || + (messagesLoadedRef.current && !isLoading) || + (previousAgentStatus.current === 'running' && agentStatus === 'idle')); - // Check if agent just completed (status changed from running to idle) - if (previousStatus === 'running' && agentStatus === 'idle') { - checkBillingLimits(); + if (shouldCheckBilling) { + timeoutId = setTimeout(() => { + checkBillingLimits(); + }, 500); } - // Store current status for next comparison previousAgentStatus.current = agentStatus; - }, [agentStatus, checkBillingLimits]); - // Check billing on initial load - useEffect(() => { - if (project?.account_id && initialLoadCompleted.current) { - console.log('Checking billing status on page load'); - checkBillingLimits(); - } - }, [project?.account_id, checkBillingLimits, initialLoadCompleted]); - - // Check billing after messages loaded - useEffect(() => { - if (messagesLoadedRef.current && project?.account_id && !isLoading) { - console.log('Checking billing status after messages loaded'); - checkBillingLimits(); - } + return () => { + if (timeoutId) { + clearTimeout(timeoutId); + } + }; + // eslint-disable-next-line react-hooks/exhaustive-deps }, [ - messagesLoadedRef.current, - checkBillingLimits, project?.account_id, + initialLoadCompleted.current, + messagesLoadedRef.current, isLoading, + agentStatus, + checkBillingLimits ]); // Check for debug mode in URL on initial load and when URL changes From f89d97568d3803309d8ae953c94682e578662920 Mon Sep 17 00:00:00 2001 From: Soumyadas15 Date: Mon, 19 May 2025 11:24:02 +0530 Subject: [PATCH 20/21] chore(dev): cleanup useeffect deps --- frontend/src/app/(dashboard)/agents/[threadId]/page.tsx | 3 --- 1 file changed, 3 deletions(-) diff --git a/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx b/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx index e7ead8b5..e1c9a80d 100644 --- a/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx +++ b/frontend/src/app/(dashboard)/agents/[threadId]/page.tsx @@ -1034,11 +1034,8 @@ export default function ThreadPage({ clearTimeout(timeoutId); } }; - // eslint-disable-next-line react-hooks/exhaustive-deps }, [ project?.account_id, - initialLoadCompleted.current, - messagesLoadedRef.current, isLoading, agentStatus, checkBillingLimits From 908208898ec4b1dff38564bc15d7d54b349ead82 Mon Sep 17 00:00:00 2001 From: Surajdusane <138127406+Surajdusane@users.noreply.github.com> Date: Mon, 19 May 2025 12:04:18 +0530 Subject: [PATCH 21/21] fix: improve forgot password dialog opacity and remove duplicate close button --- frontend/src/app/auth/page.tsx | 2 +- frontend/src/components/GoogleSignIn.tsx | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/frontend/src/app/auth/page.tsx b/frontend/src/app/auth/page.tsx index aa5b6793..b90c143b 100644 --- a/frontend/src/app/auth/page.tsx +++ b/frontend/src/app/auth/page.tsx @@ -509,7 +509,7 @@ function LoginContent() { {/* Forgot Password Dialog */} - +
diff --git a/frontend/src/components/GoogleSignIn.tsx b/frontend/src/components/GoogleSignIn.tsx index 1b6e3d87..a99eeb8d 100644 --- a/frontend/src/components/GoogleSignIn.tsx +++ b/frontend/src/components/GoogleSignIn.tsx @@ -3,6 +3,7 @@ import { useEffect, useCallback, useRef, useState } from 'react'; import Script from 'next/script'; import { createClient } from '@/lib/supabase/client'; +import { useTheme } from 'next-themes'; // Add type declarations for Google One Tap declare global { @@ -68,6 +69,7 @@ interface GoogleSignInProps { export default function GoogleSignIn({ returnUrl }: GoogleSignInProps) { const googleClientId = process.env.NEXT_PUBLIC_GOOGLE_CLIENT_ID; const [isLoading, setIsLoading] = useState(false); + const { resolvedTheme } = useTheme(); const handleGoogleSignIn = useCallback( async (response: GoogleSignInResponse) => { @@ -184,7 +186,7 @@ export default function GoogleSignIn({ returnUrl }: GoogleSignInProps) { if (buttonContainer) { window.google.accounts.id.renderButton(buttonContainer, { type: 'standard', - theme: 'outline', + theme: resolvedTheme === 'dark' ? 'filled_black' : 'outline', size: 'large', text: 'continue_with', shape: 'pill',