mirror of https://github.com/kortix-ai/suna.git
preview
This commit is contained in:
parent
ad78a0d4f3
commit
c4d30e270b
|
@ -58,7 +58,8 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
|
|||
await client.table('projects').update({
|
||||
'sandbox': {
|
||||
'id': sandbox_id,
|
||||
'pass': sandbox_pass
|
||||
'pass': sandbox_pass,
|
||||
'vnc_preview': sandbox.get_preview_link(6080)
|
||||
}
|
||||
}).eq('project_id', project_id).execute()
|
||||
|
||||
|
@ -114,6 +115,12 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
|
|||
print(f"Last message was from assistant, stopping execution")
|
||||
continue_execution = False
|
||||
break
|
||||
# Get the latest message from messages table that its tpye is browser_state
|
||||
latest_browser_state = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'browser_state').order('created_at', desc=True).limit(1).execute()
|
||||
if latest_browser_state.data and len(latest_browser_state.data) > 0:
|
||||
temporary_message = latest_browser_state.data[0].get('content', '')
|
||||
else:
|
||||
temporary_message = None
|
||||
|
||||
response = await thread_manager.run_thread(
|
||||
thread_id=thread_id,
|
||||
|
@ -124,6 +131,7 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
|
|||
llm_max_tokens=64000,
|
||||
tool_choice="auto",
|
||||
max_xml_tool_calls=1,
|
||||
# temporary_message=
|
||||
processor_config=ProcessorConfig(
|
||||
xml_tool_calling=True,
|
||||
native_tool_calling=False,
|
||||
|
|
|
@ -30,9 +30,9 @@ class SandboxBrowserTool(SandboxToolsBase):
|
|||
if method == "GET" and params:
|
||||
query_params = "&".join([f"{k}={v}" for k, v in params.items()])
|
||||
url = f"{url}?{query_params}"
|
||||
curl_cmd = f"curl -X {method} '{url}' -H 'Content-Type: application/json'"
|
||||
curl_cmd = f"curl -s -X {method} '{url}' -H 'Content-Type: application/json'"
|
||||
else:
|
||||
curl_cmd = f"curl -X {method} '{url}' -H 'Content-Type: application/json'"
|
||||
curl_cmd = f"curl -s -X {method} '{url}' -H 'Content-Type: application/json'"
|
||||
if params:
|
||||
json_data = json.dumps(params)
|
||||
curl_cmd += f" -d '{json_data}'"
|
||||
|
@ -46,7 +46,43 @@ class SandboxBrowserTool(SandboxToolsBase):
|
|||
try:
|
||||
result = json.loads(response.result)
|
||||
logger.info("Browser automation request completed successfully")
|
||||
return self.success_response(result)
|
||||
|
||||
# Create a cleaned version of the result based on BrowserActionResult schema
|
||||
cleaned_result = {
|
||||
"success": result.get("success", False),
|
||||
"message": result.get("message", ""),
|
||||
"error": result.get("error", ""),
|
||||
"url": result.get("url"),
|
||||
"title": result.get("title"),
|
||||
"elements": result.get("elements"),
|
||||
"pixels_above": result.get("pixels_above", 0),
|
||||
"pixels_below": result.get("pixels_below", 0),
|
||||
"content": result.get("content"),
|
||||
"element_count": result.get("element_count", 0),
|
||||
"interactive_elements": result.get("interactive_elements"),
|
||||
"viewport_width": result.get("viewport_width"),
|
||||
"viewport_height": result.get("viewport_height")
|
||||
}
|
||||
|
||||
# Print screenshot info to console but don't return it
|
||||
if "screenshot_base64" in result:
|
||||
has_screenshot = bool(result.get("screenshot_base64"))
|
||||
print(f"\033[95mScreenshot captured: {has_screenshot}\033[0m")
|
||||
|
||||
# Print viewport info if available
|
||||
if cleaned_result["viewport_width"] and cleaned_result["viewport_height"]:
|
||||
print(f"\033[95mViewport size: {cleaned_result['viewport_width']}x{cleaned_result['viewport_height']}\033[0m")
|
||||
|
||||
# Print interactive elements count
|
||||
if cleaned_result["element_count"] > 0:
|
||||
print(f"\033[95mFound {cleaned_result['element_count']} interactive elements\033[0m")
|
||||
|
||||
print("************************************************")
|
||||
print(cleaned_result)
|
||||
print("************************************************")
|
||||
|
||||
return self.success_response(cleaned_result)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"Failed to parse response JSON: {response.result}")
|
||||
return self.fail_response(f"Failed to parse response JSON: {response.result}")
|
||||
|
@ -99,45 +135,45 @@ class SandboxBrowserTool(SandboxToolsBase):
|
|||
print(f"\033[95mNavigating to: {url}\033[0m")
|
||||
return await self._execute_browser_action("navigate_to", {"url": url})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_search_google",
|
||||
"description": "Search Google with the provided query",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The search query to use"
|
||||
}
|
||||
},
|
||||
"required": ["query"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-search-google",
|
||||
mappings=[
|
||||
{"param_name": "query", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-search-google>
|
||||
artificial intelligence news
|
||||
</browser-search-google>
|
||||
'''
|
||||
)
|
||||
async def browser_search_google(self, query: str) -> ToolResult:
|
||||
"""Search Google with the provided query
|
||||
# @openapi_schema({
|
||||
# "type": "function",
|
||||
# "function": {
|
||||
# "name": "browser_search_google",
|
||||
# "description": "Search Google with the provided query",
|
||||
# "parameters": {
|
||||
# "type": "object",
|
||||
# "properties": {
|
||||
# "query": {
|
||||
# "type": "string",
|
||||
# "description": "The search query to use"
|
||||
# }
|
||||
# },
|
||||
# "required": ["query"]
|
||||
# }
|
||||
# }
|
||||
# })
|
||||
# @xml_schema(
|
||||
# tag_name="browser-search-google",
|
||||
# mappings=[
|
||||
# {"param_name": "query", "node_type": "content", "path": "."}
|
||||
# ],
|
||||
# example='''
|
||||
# <browser-search-google>
|
||||
# artificial intelligence news
|
||||
# </browser-search-google>
|
||||
# '''
|
||||
# )
|
||||
# async def browser_search_google(self, query: str) -> ToolResult:
|
||||
# """Search Google with the provided query
|
||||
|
||||
Args:
|
||||
query (str): The search query to use
|
||||
# Args:
|
||||
# query (str): The search query to use
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mSearching Google for: {query}\033[0m")
|
||||
return await self._execute_browser_action("search_google", {"query": query})
|
||||
# Returns:
|
||||
# dict: Result of the execution
|
||||
# """
|
||||
# print(f"\033[95mSearching Google for: {query}\033[0m")
|
||||
# return await self._execute_browser_action("search_google", {"query": query})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
|
@ -269,7 +305,7 @@ class SandboxBrowserTool(SandboxToolsBase):
|
|||
@xml_schema(
|
||||
tag_name="browser-input-text",
|
||||
mappings=[
|
||||
{"param_name": "index", "node_type": "attribute", "path": "@index"},
|
||||
{"param_name": "index", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "text", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
|
@ -371,45 +407,45 @@ class SandboxBrowserTool(SandboxToolsBase):
|
|||
print(f"\033[95mSwitching to tab: {page_id}\033[0m")
|
||||
return await self._execute_browser_action("switch_tab", {"page_id": page_id})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_open_tab",
|
||||
"description": "Open a new browser tab with the specified URL",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "The URL to open in the new tab"
|
||||
}
|
||||
},
|
||||
"required": ["url"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-open-tab",
|
||||
mappings=[
|
||||
{"param_name": "url", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-open-tab>
|
||||
https://example.com
|
||||
</browser-open-tab>
|
||||
'''
|
||||
)
|
||||
async def browser_open_tab(self, url: str) -> ToolResult:
|
||||
"""Open a new browser tab with the specified URL
|
||||
# @openapi_schema({
|
||||
# "type": "function",
|
||||
# "function": {
|
||||
# "name": "browser_open_tab",
|
||||
# "description": "Open a new browser tab with the specified URL",
|
||||
# "parameters": {
|
||||
# "type": "object",
|
||||
# "properties": {
|
||||
# "url": {
|
||||
# "type": "string",
|
||||
# "description": "The URL to open in the new tab"
|
||||
# }
|
||||
# },
|
||||
# "required": ["url"]
|
||||
# }
|
||||
# }
|
||||
# })
|
||||
# @xml_schema(
|
||||
# tag_name="browser-open-tab",
|
||||
# mappings=[
|
||||
# {"param_name": "url", "node_type": "content", "path": "."}
|
||||
# ],
|
||||
# example='''
|
||||
# <browser-open-tab>
|
||||
# https://example.com
|
||||
# </browser-open-tab>
|
||||
# '''
|
||||
# )
|
||||
# async def browser_open_tab(self, url: str) -> ToolResult:
|
||||
# """Open a new browser tab with the specified URL
|
||||
|
||||
Args:
|
||||
url (str): The URL to open in the new tab
|
||||
# Args:
|
||||
# url (str): The URL to open in the new tab
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mOpening new tab with URL: {url}\033[0m")
|
||||
return await self._execute_browser_action("open_tab", {"url": url})
|
||||
# Returns:
|
||||
# dict: Result of the execution
|
||||
# """
|
||||
# print(f"\033[95mOpening new tab with URL: {url}\033[0m")
|
||||
# return await self._execute_browser_action("open_tab", {"url": url})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
|
@ -451,72 +487,64 @@ class SandboxBrowserTool(SandboxToolsBase):
|
|||
print(f"\033[95mClosing tab: {page_id}\033[0m")
|
||||
return await self._execute_browser_action("close_tab", {"page_id": page_id})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_extract_content",
|
||||
"description": "Extract content from the current page based on the provided goal",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"goal": {
|
||||
"type": "string",
|
||||
"description": "The extraction goal (e.g., 'extract all links', 'find product information')"
|
||||
}
|
||||
},
|
||||
"required": ["goal"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-extract-content",
|
||||
mappings=[
|
||||
{"param_name": "goal", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-extract-content>
|
||||
Extract all links on the page
|
||||
</browser-extract-content>
|
||||
'''
|
||||
)
|
||||
async def browser_extract_content(self, goal: str) -> ToolResult:
|
||||
"""Extract content from the current page based on the provided goal
|
||||
# @openapi_schema({
|
||||
# "type": "function",
|
||||
# "function": {
|
||||
# "name": "browser_extract_content",
|
||||
# "description": "Extract content from the current page based on the provided goal",
|
||||
# "parameters": {
|
||||
# "type": "object",
|
||||
# "properties": {
|
||||
# "goal": {
|
||||
# "type": "string",
|
||||
# "description": "The extraction goal (e.g., 'extract all links', 'find product information')"
|
||||
# }
|
||||
# },
|
||||
# "required": ["goal"]
|
||||
# }
|
||||
# }
|
||||
# })
|
||||
# @xml_schema(
|
||||
# tag_name="browser-extract-content",
|
||||
# mappings=[
|
||||
# {"param_name": "goal", "node_type": "content", "path": "."}
|
||||
# ],
|
||||
# example='''
|
||||
# <browser-extract-content>
|
||||
# Extract all links on the page
|
||||
# </browser-extract-content>
|
||||
# '''
|
||||
# )
|
||||
# async def browser_extract_content(self, goal: str) -> ToolResult:
|
||||
# """Extract content from the current page based on the provided goal
|
||||
|
||||
Args:
|
||||
goal (str): The extraction goal
|
||||
# Args:
|
||||
# goal (str): The extraction goal
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mExtracting content with goal: {goal}\033[0m")
|
||||
return await self._execute_browser_action("extract_content", {"goal": goal})
|
||||
# Returns:
|
||||
# dict: Result of the execution
|
||||
# """
|
||||
# print(f"\033[95mExtracting content with goal: {goal}\033[0m")
|
||||
# result = await self._execute_browser_action("extract_content", {"goal": goal})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_save_pdf",
|
||||
"description": "Save the current page as a PDF file",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {}
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-save-pdf",
|
||||
mappings=[],
|
||||
example='''
|
||||
<browser-save-pdf></browser-save-pdf>
|
||||
'''
|
||||
)
|
||||
async def browser_save_pdf(self) -> ToolResult:
|
||||
"""Save the current page as a PDF file
|
||||
# # Format content for better readability
|
||||
# if result.get("success"):
|
||||
# print(f"\033[92mContent extraction successful\033[0m")
|
||||
# content = result.data.get("content", "")
|
||||
# url = result.data.get("url", "")
|
||||
# title = result.data.get("title", "")
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mSaving current page as PDF\033[0m")
|
||||
return await self._execute_browser_action("save_pdf")
|
||||
# if content:
|
||||
# content_preview = content[:200] + "..." if len(content) > 200 else content
|
||||
# print(f"\033[95mExtracted content from {title} ({url}):\033[0m")
|
||||
# print(f"\033[96m{content_preview}\033[0m")
|
||||
# print(f"\033[95mTotal content length: {len(content)} characters\033[0m")
|
||||
# else:
|
||||
# print(f"\033[93mNo content extracted from {url}\033[0m")
|
||||
# else:
|
||||
# print(f"\033[91mFailed to extract content: {result.data.get('error', 'Unknown error')}\033[0m")
|
||||
|
||||
# return result
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
|
@ -712,7 +740,7 @@ class SandboxBrowserTool(SandboxToolsBase):
|
|||
@xml_schema(
|
||||
tag_name="browser-select-dropdown-option",
|
||||
mappings=[
|
||||
{"param_name": "index", "node_type": "attribute", "path": "@index"},
|
||||
{"param_name": "index", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "text", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
|
@ -773,12 +801,12 @@ class SandboxBrowserTool(SandboxToolsBase):
|
|||
@xml_schema(
|
||||
tag_name="browser-drag-drop",
|
||||
mappings=[
|
||||
{"param_name": "element_source", "node_type": "attribute", "path": "@element_source"},
|
||||
{"param_name": "element_target", "node_type": "attribute", "path": "@element_target"},
|
||||
{"param_name": "coord_source_x", "node_type": "attribute", "path": "@coord_source_x"},
|
||||
{"param_name": "coord_source_y", "node_type": "attribute", "path": "@coord_source_y"},
|
||||
{"param_name": "coord_target_x", "node_type": "attribute", "path": "@coord_target_x"},
|
||||
{"param_name": "coord_target_y", "node_type": "attribute", "path": "@coord_target_y"}
|
||||
{"param_name": "element_source", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "element_target", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "coord_source_x", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "coord_source_y", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "coord_target_x", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "coord_target_y", "node_type": "attribute", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-drag-drop element_source="#draggable" element_target="#droppable"></browser-drag-drop>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,272 +0,0 @@
|
|||
import asyncio
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
from enum import Enum
|
||||
from playwright.async_api import async_playwright, Browser, Page, Mouse, Keyboard
|
||||
import base64
|
||||
|
||||
class MouseButton(str, Enum):
|
||||
left = "left"
|
||||
middle = "middle"
|
||||
right = "right"
|
||||
|
||||
class Position(BaseModel):
|
||||
x: Optional[int] = None
|
||||
y: Optional[int] = None
|
||||
|
||||
class MouseAction(BaseModel):
|
||||
x: Optional[int] = None
|
||||
y: Optional[int] = None
|
||||
clicks: Optional[int] = 1
|
||||
button: MouseButton = MouseButton.left
|
||||
delay: Optional[float] = 0.0
|
||||
|
||||
class KeyboardAction(BaseModel):
|
||||
key: str
|
||||
|
||||
class KeyboardPress(BaseModel):
|
||||
keys: Union[str, List[str]]
|
||||
delay: Optional[float] = 0.0
|
||||
|
||||
class WriteAction(BaseModel):
|
||||
message: str
|
||||
delay: Optional[float] = 0.0
|
||||
|
||||
class HotkeyAction(BaseModel):
|
||||
keys: List[str]
|
||||
delay: Optional[float] = 0.0
|
||||
|
||||
class BrowserAutomation:
|
||||
def __init__(self):
|
||||
self.router = APIRouter()
|
||||
self.browser: Optional[Browser] = None
|
||||
self.page: Optional[Page] = None
|
||||
self.mouse: Optional[Mouse] = None
|
||||
self.keyboard: Optional[Keyboard] = None
|
||||
|
||||
# Register routes
|
||||
self.router.on_startup.append(self.startup)
|
||||
self.router.on_shutdown.append(self.shutdown)
|
||||
|
||||
self.router.get("/automation/mouse/position")(self.get_mouse_position)
|
||||
self.router.post("/automation/mouse/move")(self.move_mouse)
|
||||
self.router.post("/automation/mouse/click")(self.click_mouse)
|
||||
self.router.post("/automation/mouse/down")(self.mouse_down)
|
||||
self.router.post("/automation/mouse/up")(self.mouse_up)
|
||||
self.router.post("/automation/keyboard/press")(self.press_key)
|
||||
self.router.post("/automation/keyboard/write")(self.write_text)
|
||||
self.router.post("/automation/keyboard/hotkey")(self.press_hotkey)
|
||||
self.router.post("/automation/navigate_to")(self.navigate_to)
|
||||
self.router.post("/automation/screenshot")(self.take_screenshot)
|
||||
|
||||
async def startup(self):
|
||||
"""Initialize the browser instance on startup"""
|
||||
playwright = await async_playwright().start()
|
||||
# Connect to the persistent browser running on port 9222
|
||||
self.browser = await playwright.chromium.connect_over_cdp("http://localhost:9222")
|
||||
# self.browser = await playwright.chromium.launch(headless=False)
|
||||
self.page = await self.browser.new_page()
|
||||
# await self.page.goto('about:blank')
|
||||
self.mouse = self.page.mouse
|
||||
self.keyboard = self.page.keyboard
|
||||
|
||||
async def shutdown(self):
|
||||
"""Clean up browser instance on shutdown"""
|
||||
if self.browser:
|
||||
await self.browser.close()
|
||||
|
||||
async def get_mouse_position(self):
|
||||
"""Get current mouse position"""
|
||||
try:
|
||||
# Playwright doesn't provide direct mouse position
|
||||
# We'll return the last known position from our tracking
|
||||
return {"x": 0, "y": 0} # Default position
|
||||
except Exception as e:
|
||||
return {"error": str(e), "x": 0, "y": 0}
|
||||
|
||||
async def move_mouse(self, action: Position):
|
||||
"""Move mouse to specified position"""
|
||||
try:
|
||||
await self.mouse.move(action.x, action.y)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def click_mouse(self, action: MouseAction):
|
||||
"""Click at the specified position"""
|
||||
try:
|
||||
await self.mouse.click(
|
||||
action.x,
|
||||
action.y,
|
||||
button=action.button,
|
||||
click_count=action.clicks,
|
||||
delay=action.delay * 1000 if action.delay else None
|
||||
)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def mouse_down(self, action: MouseAction):
|
||||
"""Press mouse button down"""
|
||||
try:
|
||||
await self.mouse.down(button=action.button)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def mouse_up(self, action: MouseAction):
|
||||
"""Release mouse button"""
|
||||
try:
|
||||
await self.mouse.up(button=action.button)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def press_key(self, action: KeyboardPress):
|
||||
"""Press specified key(s)"""
|
||||
try:
|
||||
if isinstance(action.keys, list):
|
||||
for key in action.keys:
|
||||
await self.keyboard.press(key)
|
||||
if action.delay:
|
||||
await asyncio.sleep(action.delay)
|
||||
else:
|
||||
await self.keyboard.press(action.keys)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def write_text(self, action: WriteAction):
|
||||
"""Type specified text"""
|
||||
try:
|
||||
await self.keyboard.type(action.message, delay=action.delay * 1000 if action.delay else undefined)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def press_hotkey(self, action: HotkeyAction):
|
||||
"""Press multiple keys simultaneously"""
|
||||
try:
|
||||
# Press all keys in sequence
|
||||
for key in action.keys:
|
||||
await self.keyboard.down(key)
|
||||
|
||||
# Release all keys in reverse order
|
||||
for key in reversed(action.keys):
|
||||
await self.keyboard.up(key)
|
||||
|
||||
if action.delay:
|
||||
await asyncio.sleep(action.delay)
|
||||
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def navigate_to(self, url: str):
|
||||
"""Navigate to a specified URL"""
|
||||
try:
|
||||
await self.page.goto(url)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def take_screenshot(self) -> Dict[str, str]:
|
||||
"""Take a screenshot of the current page"""
|
||||
try:
|
||||
screenshot_bytes = await self.page.screenshot()
|
||||
return {"image": base64.b64encode(screenshot_bytes).decode()}
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
# Create a singleton instance
|
||||
automation_service = BrowserAutomation()
|
||||
|
||||
|
||||
async def run_demo():
|
||||
"""Run a demonstration of browser automation capabilities"""
|
||||
print("Starting browser automation demo...")
|
||||
|
||||
# Initialize the automation service
|
||||
service = BrowserAutomation()
|
||||
await service.startup()
|
||||
|
||||
try:
|
||||
# 1. Navigate to a test website
|
||||
await service.page.goto('https://playwright.dev')
|
||||
print("✓ Navigated to playwright.dev")
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# 2. Take a screenshot
|
||||
result = await service.take_screenshot()
|
||||
if 'image' in result:
|
||||
print("✓ Took initial screenshot")
|
||||
|
||||
# 3. Move mouse to center and click
|
||||
center_pos = MouseAction(
|
||||
x=500,
|
||||
y=300,
|
||||
clicks=1
|
||||
)
|
||||
await service.move_mouse(Position(x=center_pos.x, y=center_pos.y))
|
||||
print("✓ Moved mouse to center")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
await service.click_mouse(center_pos)
|
||||
print("✓ Clicked at center")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# 4. Type some text into search box
|
||||
# First, click the search button
|
||||
await service.page.click('button[type="button"]:has-text("Search")')
|
||||
print("✓ Clicked search button")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Type search term
|
||||
write_action = WriteAction(
|
||||
message="browser automation",
|
||||
delay=0.1
|
||||
)
|
||||
await service.write_text(write_action)
|
||||
print("✓ Typed search text")
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# 5. Press Enter
|
||||
enter_action = KeyboardPress(
|
||||
keys="Enter"
|
||||
)
|
||||
await service.press_key(enter_action)
|
||||
print("✓ Pressed Enter")
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# 6. Demonstrate hotkeys (e.g., Ctrl+A to select all)
|
||||
hotkey_action = HotkeyAction(
|
||||
keys=["Control", "a"]
|
||||
)
|
||||
await service.press_hotkey(hotkey_action)
|
||||
print("✓ Pressed Ctrl+A")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# 7. Take another screenshot after interactions
|
||||
result = await service.take_screenshot()
|
||||
if 'image' in result:
|
||||
print("✓ Took final screenshot")
|
||||
|
||||
print("\nDemo completed successfully! 🎉")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during demo: {str(e)}", file=sys.stderr)
|
||||
raise
|
||||
finally:
|
||||
# Clean up
|
||||
await service.shutdown()
|
||||
print("Browser closed.")
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
print("Browser Automation Demo")
|
||||
print("======================")
|
||||
asyncio.run(run_demo())
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -6,7 +6,7 @@ services:
|
|||
dockerfile: ${DOCKERFILE:-Dockerfile}
|
||||
args:
|
||||
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
|
||||
image: adamcohenhillel/kortix-suna:0.0.10
|
||||
image: adamcohenhillel/kortix-suna:0.0.13
|
||||
ports:
|
||||
- "6080:6080" # noVNC web interface
|
||||
- "5901:5901" # VNC port
|
||||
|
|
|
@ -78,7 +78,7 @@ def create_sandbox(password: str):
|
|||
logger.debug("OPENAI_API_KEY configured for sandbox")
|
||||
|
||||
sandbox = daytona.create(CreateSandboxParams(
|
||||
image="adamcohenhillel/kortix-suna:0.0.10",
|
||||
image="adamcohenhillel/kortix-suna:0.0.13",
|
||||
public=True,
|
||||
env_vars={
|
||||
"CHROME_PERSISTENT_SESSION": "true",
|
||||
|
|
|
@ -282,6 +282,12 @@ export default function AgentPage({ params }: AgentPageProps) {
|
|||
part.isToolCall = !isUserMessage;
|
||||
part.status = part.isClosing ? 'completed' : 'running';
|
||||
|
||||
// Check if this is a browser-related tool and add VNC preview
|
||||
if (part.tagName.includes('browser') && agent?.sandbox?.vnc_preview) {
|
||||
console.log(`[TOOLS] Adding VNC preview from sandbox to browser tool ${part.tagName}`);
|
||||
part.vncPreview = agent.sandbox.vnc_preview + "/vnc_lite.html?password=" + agent.sandbox.pass;
|
||||
}
|
||||
|
||||
// Use ID for deduplication
|
||||
if (!seenTagIds.has(part.id)) {
|
||||
seenTagIds.add(part.id);
|
||||
|
@ -307,6 +313,12 @@ export default function AgentPage({ params }: AgentPageProps) {
|
|||
tag.isToolCall = !isUserMessage;
|
||||
tag.status = tag.isClosing ? 'completed' : 'running';
|
||||
|
||||
// Check if this is a browser-related tool and add VNC preview
|
||||
if (tag.tagName.includes('browser') && agent?.sandbox?.vnc_preview) {
|
||||
console.log(`[TOOLS] Adding VNC preview from sandbox to browser tool ${tag.tagName}`);
|
||||
tag.vncPreview = agent.sandbox.vnc_preview + "/vnc_lite.html?password=" + agent.sandbox.pass;
|
||||
}
|
||||
|
||||
// Use ID for deduplication
|
||||
if (!seenTagIds.has(tag.id)) {
|
||||
seenTagIds.add(tag.id);
|
||||
|
@ -381,7 +393,7 @@ export default function AgentPage({ params }: AgentPageProps) {
|
|||
|
||||
// Update tool calls in the shared context
|
||||
setToolCalls(pairedTags);
|
||||
}, [messages, streamContent, setToolCalls]);
|
||||
}, [messages, streamContent, setToolCalls, agent]);
|
||||
|
||||
// Scroll to bottom of messages
|
||||
const scrollToBottom = useCallback(() => {
|
||||
|
|
|
@ -4,7 +4,7 @@ import React from 'react';
|
|||
import { ParsedTag, ToolComponentProps } from '@/lib/types/tool-calls';
|
||||
import {
|
||||
File, FileText, Terminal, FolderPlus, Folder, Code, Search as SearchIcon,
|
||||
Bell, Replace, Plus, Minus
|
||||
Bell, Replace, Plus, Minus, Globe
|
||||
} from 'lucide-react';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { diffLines } from 'diff';
|
||||
|
@ -458,6 +458,69 @@ export const SearchCodeTool: React.FC<ToolComponentProps> = ({ tag, mode }) => {
|
|||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Browser Navigate Tool Component
|
||||
*/
|
||||
export const BrowserNavigateTool: React.FC<ToolComponentProps> = ({ tag, mode }) => {
|
||||
const url = tag.content || '';
|
||||
const isRunning = tag.status === 'running';
|
||||
|
||||
if (mode === 'compact') {
|
||||
return (
|
||||
<CompactToolDisplay
|
||||
icon={<Globe className="h-4 w-4 mr-2" />}
|
||||
name={isRunning ? "Navigating to" : "Navigated to"}
|
||||
input={url}
|
||||
isRunning={isRunning}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="border rounded-lg overflow-hidden border-subtle dark:border-white/10">
|
||||
<div className="flex items-center px-2 py-1 text-xs font-medium border-b border-subtle dark:border-white/10 bg-background-secondary dark:bg-background-secondary text-foreground">
|
||||
<Globe className="h-4 w-4 mr-2" />
|
||||
<div className="flex-1">{isRunning ? `Navigating to` : `Navigated to`}: {url}</div>
|
||||
{isRunning && (
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-amber-500">Running</span>
|
||||
<div className="h-2 w-2 rounded-full bg-amber-500 animate-pulse"></div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="p-3 bg-card-bg dark:bg-background-secondary text-foreground">
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center gap-1 text-xs text-muted-foreground mb-1">
|
||||
<Globe className="h-3 w-3" />
|
||||
<span className="font-mono">{url}</span>
|
||||
</div>
|
||||
|
||||
{/* Display VNC preview if available */}
|
||||
{tag.vncPreview && (
|
||||
<div className="mt-2 border border-subtle dark:border-white/10 rounded-md overflow-hidden">
|
||||
<div className="text-xs bg-black text-white p-1">VNC Preview</div>
|
||||
<div className="relative w-full h-[300px] overflow-hidden">
|
||||
<iframe
|
||||
src={tag.vncPreview}
|
||||
title="Browser preview"
|
||||
className="absolute top-0 left-0 border-0"
|
||||
style={{
|
||||
width: '200%',
|
||||
height: '200%',
|
||||
transform: 'scale(0.5)',
|
||||
transformOrigin: '0 0'
|
||||
}}
|
||||
sandbox="allow-same-origin allow-scripts"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
// Tool component registry
|
||||
export const ToolComponentRegistry: Record<string, React.FC<ToolComponentProps>> = {
|
||||
'create-file': CreateFileTool,
|
||||
|
@ -471,6 +534,19 @@ export const ToolComponentRegistry: Record<string, React.FC<ToolComponentProps>>
|
|||
'ask': NotifyTool, // Handle ask similar to notify for now
|
||||
'complete': NotifyTool, // Handle complete similar to notify for now
|
||||
'full-file-rewrite': FullFileRewriteTool,
|
||||
'browser-navigate-to': BrowserNavigateTool,
|
||||
'browser-click-element': BrowserNavigateTool,
|
||||
'browser-input-text': BrowserNavigateTool,
|
||||
'browser-go-back': BrowserNavigateTool,
|
||||
'browser-wait': BrowserNavigateTool,
|
||||
'browser-scroll-down': BrowserNavigateTool,
|
||||
'browser-scroll-up': BrowserNavigateTool,
|
||||
'browser-scroll-to-text': BrowserNavigateTool,
|
||||
'browser-switch-tab': BrowserNavigateTool,
|
||||
'browser-close-tab': BrowserNavigateTool,
|
||||
'browser-get-dropdown-options': BrowserNavigateTool,
|
||||
'browser-select-dropdown-option': BrowserNavigateTool,
|
||||
'browser-drag-drop': BrowserNavigateTool,
|
||||
};
|
||||
|
||||
// Helper function to get the appropriate component for a tag
|
||||
|
|
|
@ -80,8 +80,11 @@ export type Project = {
|
|||
description: string;
|
||||
account_id: string;
|
||||
created_at: string;
|
||||
sandbox_id?: string;
|
||||
sandbox_pass?: string;
|
||||
sandbox: {
|
||||
vnc_preview?: string;
|
||||
id?: string;
|
||||
pass?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export type Thread = {
|
||||
|
@ -214,7 +217,8 @@ export const createProject = async (
|
|||
name: data.name,
|
||||
description: data.description || '',
|
||||
account_id: data.account_id,
|
||||
created_at: data.created_at
|
||||
created_at: data.created_at,
|
||||
sandbox: { id: "", pass: "", vnc_preview: "" }
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -13,6 +13,9 @@ export interface ParsedTag {
|
|||
isToolCall?: boolean; // Whether this is a tool call (vs a result)
|
||||
isPaired?: boolean; // Whether this tag has been paired with its call/result
|
||||
status?: 'running' | 'completed' | 'error'; // Status of the tool call
|
||||
|
||||
// VNC preview for browser-related tools
|
||||
vncPreview?: string; // VNC preview image URL
|
||||
}
|
||||
|
||||
// Display mode for tool components
|
||||
|
@ -37,7 +40,20 @@ export const SUPPORTED_XML_TAGS = [
|
|||
'list-directory',
|
||||
'search-code',
|
||||
'complete',
|
||||
'full-file-rewrite'
|
||||
'full-file-rewrite',
|
||||
'browser-navigate-to',
|
||||
'browser-click-element',
|
||||
'browser-input-text',
|
||||
'browser-go-back',
|
||||
'browser-wait',
|
||||
'browser-scroll-down',
|
||||
'browser-scroll-up',
|
||||
'browser-scroll-to-text',
|
||||
'browser-switch-tab',
|
||||
'browser-close-tab',
|
||||
'browser-get-dropdown-options',
|
||||
'browser-select-dropdown-option',
|
||||
'browser-drag-drop'
|
||||
];
|
||||
|
||||
// Tool status labels
|
||||
|
|
Loading…
Reference in New Issue