chore(docker): update Docker configurations and streamline browser handling

- Removed unnecessary comments and updated the Docker Compose files for backend and sandbox services.
- Changed `docker-compose` to `docker compose` in README for consistency.
- Updated Chromium dependencies and configurations in Dockerfile for improved stability.
- Enhanced browser automation logic to handle page navigation and state recovery more effectively.
- Adjusted environment variables for better performance and resource management.
This commit is contained in:
marko-kraemer 2025-05-18 05:49:09 +02:00
parent 0e3f9d5a1f
commit cbb8b7e65a
6 changed files with 120 additions and 35 deletions

View File

@ -1,7 +1,3 @@
# This is a Docker Compose file for the backend service. For self-hosting, look at the root docker-compose.yml file.
version: "3.8"
services:
api:
build:

View File

@ -19,7 +19,7 @@ You can modify the sandbox environment for development or to add new capabilitie
2. Build a custom image:
```
cd backend/sandbox/docker
docker-compose build
docker compose build
```
3. Test your changes locally using docker-compose

View File

@ -68,6 +68,9 @@ RUN apt-get update && apt-get install -y \
iputils-ping \
dnsutils \
sudo \
# Chromium dependencies
chromium \
chromium-driver \
&& rm -rf /var/lib/apt/lists/*
# Install Node.js and npm
@ -110,14 +113,20 @@ RUN python -c "from playwright.sync_api import sync_playwright; print('Playwrigh
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
ENV CHROME_PATH=/usr/bin/chromium
ENV ANONYMIZED_TELEMETRY=false
ENV DISPLAY=:99
ENV RESOLUTION=1920x1080x24
ENV RESOLUTION=1024x768x24
ENV VNC_PASSWORD=vncpassword
ENV CHROME_PERSISTENT_SESSION=true
ENV RESOLUTION_WIDTH=1920
ENV RESOLUTION_HEIGHT=1080
ENV RESOLUTION_WIDTH=1024
ENV RESOLUTION_HEIGHT=768
# Add Chrome stability flags
ENV CHROME_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"
# Add Playwright specific settings
ENV PLAYWRIGHT_SKIP_BROWSER_GC=1
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
# Set up supervisor configuration
RUN mkdir -p /var/log/supervisor

View File

@ -311,7 +311,7 @@ class BrowserAutomation:
# Tab management
self.router.post("/automation/switch_tab")(self.switch_tab)
self.router.post("/automation/open_tab")(self.open_tab)
# self.router.post("/automation/open_tab")(self.open_tab)
self.router.post("/automation/close_tab")(self.close_tab)
# Content actions
@ -337,7 +337,7 @@ class BrowserAutomation:
playwright = await async_playwright().start()
print("Playwright started, launching browser...")
# Use non-headless mode for testing with slower timeouts
# Use non-headless mode for testing with slower timeouts and additional options
launch_options = {
"headless": False,
"timeout": 60000
@ -354,21 +354,17 @@ class BrowserAutomation:
self.browser = await playwright.chromium.launch(**launch_options)
print("Browser launched with minimal options")
try:
await self.get_current_page()
print("Found existing page, using it")
self.current_page_index = 0
except Exception as page_error:
print(f"Error finding existing page, creating new one. ( {page_error})")
# Check if we already have pages
if not self.pages:
print("Creating initial page")
page = await self.browser.new_page()
print("New page created successfully")
self.pages.append(page)
self.current_page_index = 0
# Navigate to about:blank to ensure page is ready
# await page.goto("google.com", timeout=30000)
print("Navigated to google.com")
else:
print("Using existing page")
self.current_page_index = 0
print("Browser initialization completed successfully")
print("Browser initialization completed successfully")
except Exception as e:
print(f"Browser startup error: {str(e)}")
traceback.print_exc()
@ -533,6 +529,18 @@ class BrowserAutomation:
"""Get the current DOM state including element tree and selector map"""
try:
page = await self.get_current_page()
# First check if page is valid and has content
try:
current_url = page.url
if current_url == "about:blank":
# If page is blank, try to recover by waiting for content
await page.wait_for_load_state("domcontentloaded", timeout=5000)
current_url = page.url
except Exception as e:
print(f"Error checking page URL: {e}")
current_url = "about:blank"
selector_map = await self.get_selector_map()
# Create a root element
@ -550,13 +558,12 @@ class BrowserAutomation:
root.children.append(element)
# Get basic page info
url = page.url
try:
title = await page.title()
except:
title = "Unknown Title"
# Get more accurate scroll information - fix JavaScript syntax
# Get more accurate scroll information
try:
scroll_info = await page.evaluate("""
() => {
@ -587,7 +594,7 @@ class BrowserAutomation:
return DOMState(
element_tree=root,
selector_map=selector_map,
url=url,
url=current_url,
title=title,
pixels_above=pixels_above,
pixels_below=pixels_below
@ -595,7 +602,16 @@ class BrowserAutomation:
except Exception as e:
print(f"Error getting DOM state: {e}")
traceback.print_exc()
# Return a minimal valid state to avoid breaking tests
# Try to get at least the current URL before falling back
current_url = "about:blank"
try:
page = await self.get_current_page()
current_url = page.url
except:
pass
# Return a minimal valid state with the actual URL if possible
dummy_root = DOMElementNode(
is_visible=True,
tag_name="body",
@ -606,7 +622,7 @@ class BrowserAutomation:
return DOMState(
element_tree=dummy_root,
selector_map=dummy_map,
url=page.url if 'page' in locals() else "about:blank",
url=current_url,
title="Error page",
pixels_above=0,
pixels_below=0
@ -860,10 +876,52 @@ class BrowserAutomation:
"""Navigate to a specified URL"""
try:
page = await self.get_current_page()
await page.goto(action.url, wait_until="domcontentloaded")
await page.wait_for_load_state("networkidle", timeout=10000)
# Get updated state after action
# First check if we're already on the target URL
current_url = page.url
if current_url == action.url:
print(f"Already on target URL: {action.url}")
dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})")
return self.build_action_result(
True,
f"Already on {action.url}",
dom_state,
screenshot,
elements,
metadata,
error="",
content=None
)
# Attempt navigation with retries
max_retries = 3
retry_count = 0
last_error = None
while retry_count < max_retries:
try:
print(f"Navigation attempt {retry_count + 1} to {action.url}")
await page.goto(action.url, wait_until="domcontentloaded", timeout=30000)
await page.wait_for_load_state("networkidle", timeout=10000)
# Verify we actually navigated to the target URL
new_url = page.url
if new_url == "about:blank":
raise Exception("Navigation resulted in blank page")
print(f"Successfully navigated to {new_url}")
break
except Exception as e:
last_error = e
retry_count += 1
if retry_count < max_retries:
print(f"Navigation attempt {retry_count} failed: {e}")
await asyncio.sleep(1) # Wait before retry
else:
print(f"All navigation attempts failed: {e}")
raise
# Get updated state after successful navigation
dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"navigate_to({action.url})")
result = self.build_action_result(
@ -882,6 +940,7 @@ class BrowserAutomation:
except Exception as e:
print(f"Navigation error: {str(e)}")
traceback.print_exc()
# Try to get some state info even after error
try:
dom_state, screenshot, elements, metadata = await self.get_updated_browser_state("navigate_error_recovery")
@ -896,6 +955,14 @@ class BrowserAutomation:
content=None
)
except:
# If we can't get state, at least try to get the current URL
current_url = "about:blank"
try:
page = await self.get_current_page()
current_url = page.url
except:
pass
return self.build_action_result(
False,
str(e),
@ -904,7 +971,8 @@ class BrowserAutomation:
"",
{},
error=str(e),
content=None
content=None,
fallback_url=current_url
)
async def search_google(self, action: SearchGoogleAction = Body(...)):

View File

@ -6,7 +6,7 @@ services:
dockerfile: ${DOCKERFILE:-Dockerfile}
args:
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
image: kortix/suna:0.1.2
image: kortix/suna:0.1.2.1
ports:
- "6080:6080" # noVNC web interface
- "5901:5901" # VNC port
@ -15,7 +15,7 @@ services:
- "8080:8080" # HTTP server port
environment:
- ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
- CHROME_PATH=/usr/bin/google-chrome
- CHROME_PATH=/usr/bin/chromium
- CHROME_USER_DATA=/app/data/chrome_data
- CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false}
- CHROME_CDP=${CHROME_CDP:-http://localhost:9222}
@ -27,8 +27,13 @@ services:
- VNC_PASSWORD=${VNC_PASSWORD:-vncpassword}
- CHROME_DEBUGGING_PORT=9222
- CHROME_DEBUGGING_HOST=localhost
- CHROME_FLAGS=${CHROME_FLAGS:-"--no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-extensions --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-breakpad --disable-component-extensions-with-background-pages --disable-features=TranslateUI,BlinkGenPropertyTrees --disable-ipc-flooding-protection --disable-renderer-backgrounding --enable-features=NetworkService,NetworkServiceInProcess --force-color-profile=srgb --metrics-recording-only --mute-audio"}
- PLAYWRIGHT_SKIP_BROWSER_GC=1
- PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
- NODE_OPTIONS="--max-old-space-size=4096"
volumes:
- /tmp/.X11-unix:/tmp/.X11-unix
- chrome_data:/app/data/chrome_data
restart: unless-stopped
shm_size: '2gb'
cap_add:
@ -42,3 +47,10 @@ services:
interval: 10s
timeout: 5s
retries: 3
ulimits:
nofile:
soft: 65536
hard: 65536
volumes:
chrome_data:

View File

@ -155,8 +155,8 @@ class Configuration:
STRIPE_DEFAULT_TRIAL_DAYS: int = 14
# Stripe Product IDs
STRIPE_PRODUCT_ID_PROD: str = 'prod_SCl7AQ2C8kK1CD' # Production product ID
STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY' # Staging product ID
STRIPE_PRODUCT_ID_PROD: str = 'prod_SCl7AQ2C8kK1CD'
STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
# Sandbox configuration
SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2"