mirror of https://github.com/kortix-ai/suna.git
Merge branch 'PRODUCTION' into sync/production
This commit is contained in:
commit
4ef4eeceb1
|
@ -290,7 +290,9 @@ async def run_agent(
|
|||
|
||||
latest_user_message = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'user').order('created_at', desc=True).limit(1).execute()
|
||||
if latest_user_message.data and len(latest_user_message.data) > 0:
|
||||
data = json.loads(latest_user_message.data[0]['content'])
|
||||
data = latest_user_message.data[0]['content']
|
||||
if isinstance(data, str):
|
||||
data = json.loads(data)
|
||||
trace.update(input=data['content'])
|
||||
|
||||
while continue_execution and iteration_count < max_iterations:
|
||||
|
@ -327,14 +329,16 @@ async def run_agent(
|
|||
latest_browser_state_msg = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'browser_state').order('created_at', desc=True).limit(1).execute()
|
||||
if latest_browser_state_msg.data and len(latest_browser_state_msg.data) > 0:
|
||||
try:
|
||||
browser_content = json.loads(latest_browser_state_msg.data[0]["content"])
|
||||
browser_content = latest_browser_state_msg.data[0]["content"]
|
||||
if isinstance(browser_content, str):
|
||||
browser_content = json.loads(browser_content)
|
||||
screenshot_base64 = browser_content.get("screenshot_base64")
|
||||
screenshot_url = browser_content.get("screenshot_url")
|
||||
screenshot_url = browser_content.get("image_url")
|
||||
|
||||
# Create a copy of the browser state without screenshot data
|
||||
browser_state_text = browser_content.copy()
|
||||
browser_state_text.pop('screenshot_base64', None)
|
||||
browser_state_text.pop('screenshot_url', None)
|
||||
browser_state_text.pop('image_url', None)
|
||||
|
||||
if browser_state_text:
|
||||
temp_message_content_list.append({
|
||||
|
@ -348,6 +352,7 @@ async def run_agent(
|
|||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": screenshot_url,
|
||||
"format": "image/jpeg"
|
||||
}
|
||||
})
|
||||
elif screenshot_base64:
|
||||
|
@ -369,7 +374,7 @@ async def run_agent(
|
|||
latest_image_context_msg = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'image_context').order('created_at', desc=True).limit(1).execute()
|
||||
if latest_image_context_msg.data and len(latest_image_context_msg.data) > 0:
|
||||
try:
|
||||
image_context_content = json.loads(latest_image_context_msg.data[0]["content"])
|
||||
image_context_content = latest_image_context_msg.data[0]["content"] if isinstance(latest_image_context_msg.data[0]["content"], dict) else json.loads(latest_image_context_msg.data[0]["content"])
|
||||
base64_image = image_context_content.get("base64")
|
||||
mime_type = image_context_content.get("mime_type")
|
||||
file_path = image_context_content.get("file_path", "unknown file")
|
||||
|
|
|
@ -20,7 +20,7 @@ You can modify the sandbox environment for development or to add new capabilitie
|
|||
```
|
||||
cd backend/sandbox/docker
|
||||
docker compose build
|
||||
docker push kortix/suna:0.1.2
|
||||
docker push kortix/suna:0.1.3
|
||||
```
|
||||
3. Test your changes locally using docker-compose
|
||||
|
||||
|
|
|
@ -96,11 +96,6 @@ WORKDIR /app
|
|||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy server script
|
||||
COPY . /app
|
||||
COPY server.py /app/server.py
|
||||
COPY browser_api.py /app/browser_api.py
|
||||
|
||||
# Install Playwright and browsers with system dependencies
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
# Install Playwright package first
|
||||
|
@ -111,6 +106,11 @@ RUN playwright install chromium
|
|||
# Verify installation
|
||||
RUN python -c "from playwright.sync_api import sync_playwright; print('Playwright installation verified')"
|
||||
|
||||
# Copy server script
|
||||
COPY . /app
|
||||
COPY server.py /app/server.py
|
||||
COPY browser_api.py /app/browser_api.py
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from fastapi import FastAPI, APIRouter, HTTPException, Body
|
||||
from playwright.async_api import async_playwright, Browser, Page
|
||||
from playwright.async_api import async_playwright, Browser, BrowserContext, Page
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List, Dict, Any
|
||||
import asyncio
|
||||
|
@ -282,6 +282,7 @@ class BrowserAutomation:
|
|||
def __init__(self):
|
||||
self.router = APIRouter()
|
||||
self.browser: Browser = None
|
||||
self.browser_context: BrowserContext = None
|
||||
self.pages: List[Page] = []
|
||||
self.current_page_index: int = 0
|
||||
self.logger = logging.getLogger("browser_automation")
|
||||
|
@ -341,6 +342,7 @@ class BrowserAutomation:
|
|||
|
||||
try:
|
||||
self.browser = await playwright.chromium.launch(**launch_options)
|
||||
self.browser_context = await self.browser.new_context(viewport={'width': 1024, 'height': 768})
|
||||
print("Browser launched successfully")
|
||||
except Exception as browser_error:
|
||||
print(f"Failed to launch browser: {browser_error}")
|
||||
|
@ -348,6 +350,7 @@ class BrowserAutomation:
|
|||
print("Retrying with minimal options...")
|
||||
launch_options = {"timeout": 90000}
|
||||
self.browser = await playwright.chromium.launch(**launch_options)
|
||||
self.browser_context = await self.browser.new_context(viewport={'width': 1024, 'height': 768})
|
||||
print("Browser launched with minimal options")
|
||||
|
||||
try:
|
||||
|
@ -356,13 +359,20 @@ class BrowserAutomation:
|
|||
self.current_page_index = 0
|
||||
except Exception as page_error:
|
||||
print(f"Error finding existing page, creating new one. ( {page_error})")
|
||||
page = await self.browser.new_page(viewport={'width': 1024, 'height': 768})
|
||||
page = await self.browser_context.new_page()
|
||||
print("New page created successfully")
|
||||
self.pages.append(page)
|
||||
self.current_page_index = 0
|
||||
# Navigate directly to google.com instead of about:blank
|
||||
await page.goto("https://www.google.com", wait_until="domcontentloaded", timeout=30000)
|
||||
print("Navigated to google.com")
|
||||
|
||||
try:
|
||||
self.browser_context.on("page", self.handle_page_created)
|
||||
except Exception as e:
|
||||
print(f"Error setting up page event handler: {e}")
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
print("Browser initialization completed successfully")
|
||||
except Exception as e:
|
||||
|
@ -372,8 +382,17 @@ class BrowserAutomation:
|
|||
|
||||
async def shutdown(self):
|
||||
"""Clean up browser instance on shutdown"""
|
||||
if self.browser_context:
|
||||
await self.browser_context.close()
|
||||
if self.browser:
|
||||
await self.browser.close()
|
||||
|
||||
async def handle_page_created(self, page: Page):
|
||||
"""Handle new page creation"""
|
||||
await asyncio.sleep(0.5)
|
||||
self.pages.append(page)
|
||||
self.current_page_index = len(self.pages) - 1
|
||||
print(f"Page created: {page.url}; current page index: {self.current_page_index}")
|
||||
|
||||
async def get_current_page(self) -> Page:
|
||||
"""Get the current active page"""
|
||||
|
@ -958,6 +977,7 @@ class BrowserAutomation:
|
|||
# Give time for any navigation or DOM updates to occur
|
||||
await page.wait_for_load_state("networkidle", timeout=5000)
|
||||
|
||||
await asyncio.sleep(1)
|
||||
# Get updated state after action
|
||||
dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"click_coordinates({action.x}, {action.y})")
|
||||
|
||||
|
@ -977,6 +997,7 @@ class BrowserAutomation:
|
|||
|
||||
# Try to get state even after error
|
||||
try:
|
||||
await asyncio.sleep(1)
|
||||
dom_state, screenshot, elements, metadata = await self.get_updated_browser_state("click_coordinates_error_recovery")
|
||||
return self.build_action_result(
|
||||
False,
|
||||
|
@ -1076,7 +1097,7 @@ class BrowserAutomation:
|
|||
await page.wait_for_load_state("networkidle", timeout=5000)
|
||||
except Exception as wait_error:
|
||||
print(f"Timeout or error waiting for network idle after click: {wait_error}")
|
||||
await asyncio.sleep(1) # Fallback wait
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Get updated state after action
|
||||
dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"click_element({action.index})")
|
||||
|
@ -1161,6 +1182,7 @@ class BrowserAutomation:
|
|||
# Fallback to xpath
|
||||
await page.fill(f"//{element.tag_name}[{action.index}]", action.text)
|
||||
|
||||
await asyncio.sleep(1)
|
||||
# Get updated state after action
|
||||
dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"input_text({action.index}, '{action.text}')")
|
||||
|
||||
|
@ -1192,6 +1214,7 @@ class BrowserAutomation:
|
|||
page = await self.get_current_page()
|
||||
await page.keyboard.press(action.keys)
|
||||
|
||||
await asyncio.sleep(1)
|
||||
# Get updated state after action
|
||||
dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"send_keys({action.keys})")
|
||||
|
||||
|
@ -1267,7 +1290,7 @@ class BrowserAutomation:
|
|||
try:
|
||||
print(f"Attempting to open new tab with URL: {action.url}")
|
||||
# Create new page in same browser instance
|
||||
new_page = await self.browser.new_page()
|
||||
new_page = await self.browser_context.new_page()
|
||||
print(f"New page created successfully")
|
||||
|
||||
# Navigate to the URL
|
||||
|
|
|
@ -6,12 +6,12 @@ services:
|
|||
dockerfile: ${DOCKERFILE:-Dockerfile}
|
||||
args:
|
||||
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
|
||||
image: kortix/suna:0.1.2.8
|
||||
image: kortix/suna:0.1.3
|
||||
ports:
|
||||
- "6080:6080" # noVNC web interface
|
||||
- "5901:5901" # VNC port
|
||||
- "9222:9222" # Chrome remote debugging port
|
||||
- "8000:8000" # API server port
|
||||
- "8003:8003" # API server port
|
||||
- "8080:8080" # HTTP server port
|
||||
environment:
|
||||
- ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
|
||||
|
|
|
@ -159,7 +159,7 @@ class Configuration:
|
|||
STRIPE_PRODUCT_ID_STAGING: str = 'prod_SCgIj3G7yPOAWY'
|
||||
|
||||
# Sandbox configuration
|
||||
SANDBOX_IMAGE_NAME = "kortix/suna:0.1.2.8"
|
||||
SANDBOX_IMAGE_NAME = "kortix/suna:0.1.3"
|
||||
SANDBOX_ENTRYPOINT = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf"
|
||||
|
||||
# LangFuse configuration
|
||||
|
|
|
@ -115,7 +115,7 @@ As part of the setup, you'll need to:
|
|||
1. Create a Daytona account
|
||||
2. Generate an API key
|
||||
3. Create a Docker image:
|
||||
- Image name: `kortix/suna:0.1.2.8`
|
||||
- Image name: `kortix/suna:0.1.3`
|
||||
- Entrypoint: `/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf`
|
||||
|
||||
## Manual Configuration
|
||||
|
|
|
@ -65,7 +65,7 @@ export function BrowserToolView({
|
|||
const [imageError, setImageError] = React.useState(false);
|
||||
|
||||
try {
|
||||
const topLevelParsed = safeJsonParse<{ content?: string }>(toolContent, {});
|
||||
const topLevelParsed = safeJsonParse<{ content?: any }>(toolContent, {});
|
||||
const innerContentString = topLevelParsed?.content || toolContent;
|
||||
if (innerContentString && typeof innerContentString === 'string') {
|
||||
const toolResultMatch = innerContentString.match(/ToolResult\([^)]*output='([\s\S]*?)'(?:\s*,|\s*\))/);
|
||||
|
@ -116,7 +116,18 @@ export function BrowserToolView({
|
|||
screenshotUrl = finalParsedOutput?.image_url || null;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (innerContentString && typeof innerContentString === "object") {
|
||||
screenshotUrl = (() => {
|
||||
if (!innerContentString) return null;
|
||||
if (!("tool_execution" in innerContentString)) return null;
|
||||
if (!("result" in innerContentString.tool_execution)) return null;
|
||||
if (!("output" in innerContentString.tool_execution.result)) return null;
|
||||
if (!("image_url" in innerContentString.tool_execution.result.output)) return null;
|
||||
if (typeof innerContentString.tool_execution.result.output.image_url !== "string") return null;
|
||||
return innerContentString.tool_execution.result.output.image_url;
|
||||
})()
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
}
|
||||
|
||||
|
|
2
setup.py
2
setup.py
|
@ -237,7 +237,7 @@ def collect_daytona_info():
|
|||
print_info("Then, generate an API key from 'Keys' menu")
|
||||
print_info("After that, go to Images (https://app.daytona.io/dashboard/images)")
|
||||
print_info("Click '+ Create Image'")
|
||||
print_info(f"Enter 'kortix/suna:0.1.2.8' as the image name")
|
||||
print_info(f"Enter 'kortix/suna:0.1.3' as the image name")
|
||||
print_info(f"Set '/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf' as the Entrypoint")
|
||||
|
||||
input("Press Enter to continue once you've completed these steps...")
|
||||
|
|
Loading…
Reference in New Issue