This commit is contained in:
Adam Cohen Hillel 2025-04-15 15:34:26 +01:00
parent ad78a0d4f3
commit c4d30e270b
10 changed files with 1725 additions and 560 deletions

View File

@ -58,7 +58,8 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
await client.table('projects').update({
'sandbox': {
'id': sandbox_id,
'pass': sandbox_pass
'pass': sandbox_pass,
'vnc_preview': sandbox.get_preview_link(6080)
}
}).eq('project_id', project_id).execute()
@ -114,6 +115,12 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
print(f"Last message was from assistant, stopping execution")
continue_execution = False
break
# Get the latest message from messages table that its tpye is browser_state
latest_browser_state = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'browser_state').order('created_at', desc=True).limit(1).execute()
if latest_browser_state.data and len(latest_browser_state.data) > 0:
temporary_message = latest_browser_state.data[0].get('content', '')
else:
temporary_message = None
response = await thread_manager.run_thread(
thread_id=thread_id,
@ -124,6 +131,7 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
llm_max_tokens=64000,
tool_choice="auto",
max_xml_tool_calls=1,
# temporary_message=
processor_config=ProcessorConfig(
xml_tool_calling=True,
native_tool_calling=False,

View File

@ -30,9 +30,9 @@ class SandboxBrowserTool(SandboxToolsBase):
if method == "GET" and params:
query_params = "&".join([f"{k}={v}" for k, v in params.items()])
url = f"{url}?{query_params}"
curl_cmd = f"curl -X {method} '{url}' -H 'Content-Type: application/json'"
curl_cmd = f"curl -s -X {method} '{url}' -H 'Content-Type: application/json'"
else:
curl_cmd = f"curl -X {method} '{url}' -H 'Content-Type: application/json'"
curl_cmd = f"curl -s -X {method} '{url}' -H 'Content-Type: application/json'"
if params:
json_data = json.dumps(params)
curl_cmd += f" -d '{json_data}'"
@ -46,7 +46,43 @@ class SandboxBrowserTool(SandboxToolsBase):
try:
result = json.loads(response.result)
logger.info("Browser automation request completed successfully")
return self.success_response(result)
# Create a cleaned version of the result based on BrowserActionResult schema
cleaned_result = {
"success": result.get("success", False),
"message": result.get("message", ""),
"error": result.get("error", ""),
"url": result.get("url"),
"title": result.get("title"),
"elements": result.get("elements"),
"pixels_above": result.get("pixels_above", 0),
"pixels_below": result.get("pixels_below", 0),
"content": result.get("content"),
"element_count": result.get("element_count", 0),
"interactive_elements": result.get("interactive_elements"),
"viewport_width": result.get("viewport_width"),
"viewport_height": result.get("viewport_height")
}
# Print screenshot info to console but don't return it
if "screenshot_base64" in result:
has_screenshot = bool(result.get("screenshot_base64"))
print(f"\033[95mScreenshot captured: {has_screenshot}\033[0m")
# Print viewport info if available
if cleaned_result["viewport_width"] and cleaned_result["viewport_height"]:
print(f"\033[95mViewport size: {cleaned_result['viewport_width']}x{cleaned_result['viewport_height']}\033[0m")
# Print interactive elements count
if cleaned_result["element_count"] > 0:
print(f"\033[95mFound {cleaned_result['element_count']} interactive elements\033[0m")
print("************************************************")
print(cleaned_result)
print("************************************************")
return self.success_response(cleaned_result)
except json.JSONDecodeError:
logger.error(f"Failed to parse response JSON: {response.result}")
return self.fail_response(f"Failed to parse response JSON: {response.result}")
@ -99,45 +135,45 @@ class SandboxBrowserTool(SandboxToolsBase):
print(f"\033[95mNavigating to: {url}\033[0m")
return await self._execute_browser_action("navigate_to", {"url": url})
@openapi_schema({
"type": "function",
"function": {
"name": "browser_search_google",
"description": "Search Google with the provided query",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query to use"
}
},
"required": ["query"]
}
}
})
@xml_schema(
tag_name="browser-search-google",
mappings=[
{"param_name": "query", "node_type": "content", "path": "."}
],
example='''
<browser-search-google>
artificial intelligence news
</browser-search-google>
'''
)
async def browser_search_google(self, query: str) -> ToolResult:
"""Search Google with the provided query
# @openapi_schema({
# "type": "function",
# "function": {
# "name": "browser_search_google",
# "description": "Search Google with the provided query",
# "parameters": {
# "type": "object",
# "properties": {
# "query": {
# "type": "string",
# "description": "The search query to use"
# }
# },
# "required": ["query"]
# }
# }
# })
# @xml_schema(
# tag_name="browser-search-google",
# mappings=[
# {"param_name": "query", "node_type": "content", "path": "."}
# ],
# example='''
# <browser-search-google>
# artificial intelligence news
# </browser-search-google>
# '''
# )
# async def browser_search_google(self, query: str) -> ToolResult:
# """Search Google with the provided query
Args:
query (str): The search query to use
# Args:
# query (str): The search query to use
Returns:
dict: Result of the execution
"""
print(f"\033[95mSearching Google for: {query}\033[0m")
return await self._execute_browser_action("search_google", {"query": query})
# Returns:
# dict: Result of the execution
# """
# print(f"\033[95mSearching Google for: {query}\033[0m")
# return await self._execute_browser_action("search_google", {"query": query})
@openapi_schema({
"type": "function",
@ -269,7 +305,7 @@ class SandboxBrowserTool(SandboxToolsBase):
@xml_schema(
tag_name="browser-input-text",
mappings=[
{"param_name": "index", "node_type": "attribute", "path": "@index"},
{"param_name": "index", "node_type": "attribute", "path": "."},
{"param_name": "text", "node_type": "content", "path": "."}
],
example='''
@ -371,45 +407,45 @@ class SandboxBrowserTool(SandboxToolsBase):
print(f"\033[95mSwitching to tab: {page_id}\033[0m")
return await self._execute_browser_action("switch_tab", {"page_id": page_id})
@openapi_schema({
"type": "function",
"function": {
"name": "browser_open_tab",
"description": "Open a new browser tab with the specified URL",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to open in the new tab"
}
},
"required": ["url"]
}
}
})
@xml_schema(
tag_name="browser-open-tab",
mappings=[
{"param_name": "url", "node_type": "content", "path": "."}
],
example='''
<browser-open-tab>
https://example.com
</browser-open-tab>
'''
)
async def browser_open_tab(self, url: str) -> ToolResult:
"""Open a new browser tab with the specified URL
# @openapi_schema({
# "type": "function",
# "function": {
# "name": "browser_open_tab",
# "description": "Open a new browser tab with the specified URL",
# "parameters": {
# "type": "object",
# "properties": {
# "url": {
# "type": "string",
# "description": "The URL to open in the new tab"
# }
# },
# "required": ["url"]
# }
# }
# })
# @xml_schema(
# tag_name="browser-open-tab",
# mappings=[
# {"param_name": "url", "node_type": "content", "path": "."}
# ],
# example='''
# <browser-open-tab>
# https://example.com
# </browser-open-tab>
# '''
# )
# async def browser_open_tab(self, url: str) -> ToolResult:
# """Open a new browser tab with the specified URL
Args:
url (str): The URL to open in the new tab
# Args:
# url (str): The URL to open in the new tab
Returns:
dict: Result of the execution
"""
print(f"\033[95mOpening new tab with URL: {url}\033[0m")
return await self._execute_browser_action("open_tab", {"url": url})
# Returns:
# dict: Result of the execution
# """
# print(f"\033[95mOpening new tab with URL: {url}\033[0m")
# return await self._execute_browser_action("open_tab", {"url": url})
@openapi_schema({
"type": "function",
@ -451,72 +487,64 @@ class SandboxBrowserTool(SandboxToolsBase):
print(f"\033[95mClosing tab: {page_id}\033[0m")
return await self._execute_browser_action("close_tab", {"page_id": page_id})
@openapi_schema({
"type": "function",
"function": {
"name": "browser_extract_content",
"description": "Extract content from the current page based on the provided goal",
"parameters": {
"type": "object",
"properties": {
"goal": {
"type": "string",
"description": "The extraction goal (e.g., 'extract all links', 'find product information')"
}
},
"required": ["goal"]
}
}
})
@xml_schema(
tag_name="browser-extract-content",
mappings=[
{"param_name": "goal", "node_type": "content", "path": "."}
],
example='''
<browser-extract-content>
Extract all links on the page
</browser-extract-content>
'''
)
async def browser_extract_content(self, goal: str) -> ToolResult:
"""Extract content from the current page based on the provided goal
# @openapi_schema({
# "type": "function",
# "function": {
# "name": "browser_extract_content",
# "description": "Extract content from the current page based on the provided goal",
# "parameters": {
# "type": "object",
# "properties": {
# "goal": {
# "type": "string",
# "description": "The extraction goal (e.g., 'extract all links', 'find product information')"
# }
# },
# "required": ["goal"]
# }
# }
# })
# @xml_schema(
# tag_name="browser-extract-content",
# mappings=[
# {"param_name": "goal", "node_type": "content", "path": "."}
# ],
# example='''
# <browser-extract-content>
# Extract all links on the page
# </browser-extract-content>
# '''
# )
# async def browser_extract_content(self, goal: str) -> ToolResult:
# """Extract content from the current page based on the provided goal
Args:
goal (str): The extraction goal
# Args:
# goal (str): The extraction goal
Returns:
dict: Result of the execution
"""
print(f"\033[95mExtracting content with goal: {goal}\033[0m")
return await self._execute_browser_action("extract_content", {"goal": goal})
@openapi_schema({
"type": "function",
"function": {
"name": "browser_save_pdf",
"description": "Save the current page as a PDF file",
"parameters": {
"type": "object",
"properties": {}
}
}
})
@xml_schema(
tag_name="browser-save-pdf",
mappings=[],
example='''
<browser-save-pdf></browser-save-pdf>
'''
)
async def browser_save_pdf(self) -> ToolResult:
"""Save the current page as a PDF file
# Returns:
# dict: Result of the execution
# """
# print(f"\033[95mExtracting content with goal: {goal}\033[0m")
# result = await self._execute_browser_action("extract_content", {"goal": goal})
Returns:
dict: Result of the execution
"""
print(f"\033[95mSaving current page as PDF\033[0m")
return await self._execute_browser_action("save_pdf")
# # Format content for better readability
# if result.get("success"):
# print(f"\033[92mContent extraction successful\033[0m")
# content = result.data.get("content", "")
# url = result.data.get("url", "")
# title = result.data.get("title", "")
# if content:
# content_preview = content[:200] + "..." if len(content) > 200 else content
# print(f"\033[95mExtracted content from {title} ({url}):\033[0m")
# print(f"\033[96m{content_preview}\033[0m")
# print(f"\033[95mTotal content length: {len(content)} characters\033[0m")
# else:
# print(f"\033[93mNo content extracted from {url}\033[0m")
# else:
# print(f"\033[91mFailed to extract content: {result.data.get('error', 'Unknown error')}\033[0m")
# return result
@openapi_schema({
"type": "function",
@ -712,7 +740,7 @@ class SandboxBrowserTool(SandboxToolsBase):
@xml_schema(
tag_name="browser-select-dropdown-option",
mappings=[
{"param_name": "index", "node_type": "attribute", "path": "@index"},
{"param_name": "index", "node_type": "attribute", "path": "."},
{"param_name": "text", "node_type": "content", "path": "."}
],
example='''
@ -773,12 +801,12 @@ class SandboxBrowserTool(SandboxToolsBase):
@xml_schema(
tag_name="browser-drag-drop",
mappings=[
{"param_name": "element_source", "node_type": "attribute", "path": "@element_source"},
{"param_name": "element_target", "node_type": "attribute", "path": "@element_target"},
{"param_name": "coord_source_x", "node_type": "attribute", "path": "@coord_source_x"},
{"param_name": "coord_source_y", "node_type": "attribute", "path": "@coord_source_y"},
{"param_name": "coord_target_x", "node_type": "attribute", "path": "@coord_target_x"},
{"param_name": "coord_target_y", "node_type": "attribute", "path": "@coord_target_y"}
{"param_name": "element_source", "node_type": "attribute", "path": "."},
{"param_name": "element_target", "node_type": "attribute", "path": "."},
{"param_name": "coord_source_x", "node_type": "attribute", "path": "."},
{"param_name": "coord_source_y", "node_type": "attribute", "path": "."},
{"param_name": "coord_target_x", "node_type": "attribute", "path": "."},
{"param_name": "coord_target_y", "node_type": "attribute", "path": "."}
],
example='''
<browser-drag-drop element_source="#draggable" element_target="#droppable"></browser-drag-drop>

File diff suppressed because it is too large Load Diff

View File

@ -1,272 +0,0 @@
import asyncio
from typing import List, Dict, Any, Optional, Union
from fastapi import APIRouter
from pydantic import BaseModel
from enum import Enum
from playwright.async_api import async_playwright, Browser, Page, Mouse, Keyboard
import base64
class MouseButton(str, Enum):
left = "left"
middle = "middle"
right = "right"
class Position(BaseModel):
x: Optional[int] = None
y: Optional[int] = None
class MouseAction(BaseModel):
x: Optional[int] = None
y: Optional[int] = None
clicks: Optional[int] = 1
button: MouseButton = MouseButton.left
delay: Optional[float] = 0.0
class KeyboardAction(BaseModel):
key: str
class KeyboardPress(BaseModel):
keys: Union[str, List[str]]
delay: Optional[float] = 0.0
class WriteAction(BaseModel):
message: str
delay: Optional[float] = 0.0
class HotkeyAction(BaseModel):
keys: List[str]
delay: Optional[float] = 0.0
class BrowserAutomation:
def __init__(self):
self.router = APIRouter()
self.browser: Optional[Browser] = None
self.page: Optional[Page] = None
self.mouse: Optional[Mouse] = None
self.keyboard: Optional[Keyboard] = None
# Register routes
self.router.on_startup.append(self.startup)
self.router.on_shutdown.append(self.shutdown)
self.router.get("/automation/mouse/position")(self.get_mouse_position)
self.router.post("/automation/mouse/move")(self.move_mouse)
self.router.post("/automation/mouse/click")(self.click_mouse)
self.router.post("/automation/mouse/down")(self.mouse_down)
self.router.post("/automation/mouse/up")(self.mouse_up)
self.router.post("/automation/keyboard/press")(self.press_key)
self.router.post("/automation/keyboard/write")(self.write_text)
self.router.post("/automation/keyboard/hotkey")(self.press_hotkey)
self.router.post("/automation/navigate_to")(self.navigate_to)
self.router.post("/automation/screenshot")(self.take_screenshot)
async def startup(self):
"""Initialize the browser instance on startup"""
playwright = await async_playwright().start()
# Connect to the persistent browser running on port 9222
self.browser = await playwright.chromium.connect_over_cdp("http://localhost:9222")
# self.browser = await playwright.chromium.launch(headless=False)
self.page = await self.browser.new_page()
# await self.page.goto('about:blank')
self.mouse = self.page.mouse
self.keyboard = self.page.keyboard
async def shutdown(self):
"""Clean up browser instance on shutdown"""
if self.browser:
await self.browser.close()
async def get_mouse_position(self):
"""Get current mouse position"""
try:
# Playwright doesn't provide direct mouse position
# We'll return the last known position from our tracking
return {"x": 0, "y": 0} # Default position
except Exception as e:
return {"error": str(e), "x": 0, "y": 0}
async def move_mouse(self, action: Position):
"""Move mouse to specified position"""
try:
await self.mouse.move(action.x, action.y)
return {"success": True}
except Exception as e:
return {"success": False, "error": str(e)}
async def click_mouse(self, action: MouseAction):
"""Click at the specified position"""
try:
await self.mouse.click(
action.x,
action.y,
button=action.button,
click_count=action.clicks,
delay=action.delay * 1000 if action.delay else None
)
return {"success": True}
except Exception as e:
return {"success": False, "error": str(e)}
async def mouse_down(self, action: MouseAction):
"""Press mouse button down"""
try:
await self.mouse.down(button=action.button)
return {"success": True}
except Exception as e:
return {"success": False, "error": str(e)}
async def mouse_up(self, action: MouseAction):
"""Release mouse button"""
try:
await self.mouse.up(button=action.button)
return {"success": True}
except Exception as e:
return {"success": False, "error": str(e)}
async def press_key(self, action: KeyboardPress):
"""Press specified key(s)"""
try:
if isinstance(action.keys, list):
for key in action.keys:
await self.keyboard.press(key)
if action.delay:
await asyncio.sleep(action.delay)
else:
await self.keyboard.press(action.keys)
return {"success": True}
except Exception as e:
return {"success": False, "error": str(e)}
async def write_text(self, action: WriteAction):
"""Type specified text"""
try:
await self.keyboard.type(action.message, delay=action.delay * 1000 if action.delay else undefined)
return {"success": True}
except Exception as e:
return {"success": False, "error": str(e)}
async def press_hotkey(self, action: HotkeyAction):
"""Press multiple keys simultaneously"""
try:
# Press all keys in sequence
for key in action.keys:
await self.keyboard.down(key)
# Release all keys in reverse order
for key in reversed(action.keys):
await self.keyboard.up(key)
if action.delay:
await asyncio.sleep(action.delay)
return {"success": True}
except Exception as e:
return {"success": False, "error": str(e)}
async def navigate_to(self, url: str):
"""Navigate to a specified URL"""
try:
await self.page.goto(url)
return {"success": True}
except Exception as e:
return {"success": False, "error": str(e)}
async def take_screenshot(self) -> Dict[str, str]:
"""Take a screenshot of the current page"""
try:
screenshot_bytes = await self.page.screenshot()
return {"image": base64.b64encode(screenshot_bytes).decode()}
except Exception as e:
return {"error": str(e)}
# Create a singleton instance
automation_service = BrowserAutomation()
async def run_demo():
"""Run a demonstration of browser automation capabilities"""
print("Starting browser automation demo...")
# Initialize the automation service
service = BrowserAutomation()
await service.startup()
try:
# 1. Navigate to a test website
await service.page.goto('https://playwright.dev')
print("✓ Navigated to playwright.dev")
await asyncio.sleep(2)
# 2. Take a screenshot
result = await service.take_screenshot()
if 'image' in result:
print("✓ Took initial screenshot")
# 3. Move mouse to center and click
center_pos = MouseAction(
x=500,
y=300,
clicks=1
)
await service.move_mouse(Position(x=center_pos.x, y=center_pos.y))
print("✓ Moved mouse to center")
await asyncio.sleep(1)
await service.click_mouse(center_pos)
print("✓ Clicked at center")
await asyncio.sleep(1)
# 4. Type some text into search box
# First, click the search button
await service.page.click('button[type="button"]:has-text("Search")')
print("✓ Clicked search button")
await asyncio.sleep(1)
# Type search term
write_action = WriteAction(
message="browser automation",
delay=0.1
)
await service.write_text(write_action)
print("✓ Typed search text")
await asyncio.sleep(2)
# 5. Press Enter
enter_action = KeyboardPress(
keys="Enter"
)
await service.press_key(enter_action)
print("✓ Pressed Enter")
await asyncio.sleep(2)
# 6. Demonstrate hotkeys (e.g., Ctrl+A to select all)
hotkey_action = HotkeyAction(
keys=["Control", "a"]
)
await service.press_hotkey(hotkey_action)
print("✓ Pressed Ctrl+A")
await asyncio.sleep(1)
# 7. Take another screenshot after interactions
result = await service.take_screenshot()
if 'image' in result:
print("✓ Took final screenshot")
print("\nDemo completed successfully! 🎉")
except Exception as e:
print(f"Error during demo: {str(e)}", file=sys.stderr)
raise
finally:
# Clean up
await service.shutdown()
print("Browser closed.")
def main():
"""Main entry point"""
print("Browser Automation Demo")
print("======================")
asyncio.run(run_demo())
if __name__ == "__main__":
main()

View File

@ -6,7 +6,7 @@ services:
dockerfile: ${DOCKERFILE:-Dockerfile}
args:
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
image: adamcohenhillel/kortix-suna:0.0.10
image: adamcohenhillel/kortix-suna:0.0.13
ports:
- "6080:6080" # noVNC web interface
- "5901:5901" # VNC port

View File

@ -78,7 +78,7 @@ def create_sandbox(password: str):
logger.debug("OPENAI_API_KEY configured for sandbox")
sandbox = daytona.create(CreateSandboxParams(
image="adamcohenhillel/kortix-suna:0.0.10",
image="adamcohenhillel/kortix-suna:0.0.13",
public=True,
env_vars={
"CHROME_PERSISTENT_SESSION": "true",

View File

@ -282,6 +282,12 @@ export default function AgentPage({ params }: AgentPageProps) {
part.isToolCall = !isUserMessage;
part.status = part.isClosing ? 'completed' : 'running';
// Check if this is a browser-related tool and add VNC preview
if (part.tagName.includes('browser') && agent?.sandbox?.vnc_preview) {
console.log(`[TOOLS] Adding VNC preview from sandbox to browser tool ${part.tagName}`);
part.vncPreview = agent.sandbox.vnc_preview + "/vnc_lite.html?password=" + agent.sandbox.pass;
}
// Use ID for deduplication
if (!seenTagIds.has(part.id)) {
seenTagIds.add(part.id);
@ -307,6 +313,12 @@ export default function AgentPage({ params }: AgentPageProps) {
tag.isToolCall = !isUserMessage;
tag.status = tag.isClosing ? 'completed' : 'running';
// Check if this is a browser-related tool and add VNC preview
if (tag.tagName.includes('browser') && agent?.sandbox?.vnc_preview) {
console.log(`[TOOLS] Adding VNC preview from sandbox to browser tool ${tag.tagName}`);
tag.vncPreview = agent.sandbox.vnc_preview + "/vnc_lite.html?password=" + agent.sandbox.pass;
}
// Use ID for deduplication
if (!seenTagIds.has(tag.id)) {
seenTagIds.add(tag.id);
@ -381,7 +393,7 @@ export default function AgentPage({ params }: AgentPageProps) {
// Update tool calls in the shared context
setToolCalls(pairedTags);
}, [messages, streamContent, setToolCalls]);
}, [messages, streamContent, setToolCalls, agent]);
// Scroll to bottom of messages
const scrollToBottom = useCallback(() => {

View File

@ -4,7 +4,7 @@ import React from 'react';
import { ParsedTag, ToolComponentProps } from '@/lib/types/tool-calls';
import {
File, FileText, Terminal, FolderPlus, Folder, Code, Search as SearchIcon,
Bell, Replace, Plus, Minus
Bell, Replace, Plus, Minus, Globe
} from 'lucide-react';
import { cn } from '@/lib/utils';
import { diffLines } from 'diff';
@ -458,6 +458,69 @@ export const SearchCodeTool: React.FC<ToolComponentProps> = ({ tag, mode }) => {
);
};
/**
* Browser Navigate Tool Component
*/
export const BrowserNavigateTool: React.FC<ToolComponentProps> = ({ tag, mode }) => {
const url = tag.content || '';
const isRunning = tag.status === 'running';
if (mode === 'compact') {
return (
<CompactToolDisplay
icon={<Globe className="h-4 w-4 mr-2" />}
name={isRunning ? "Navigating to" : "Navigated to"}
input={url}
isRunning={isRunning}
/>
);
}
return (
<div className="border rounded-lg overflow-hidden border-subtle dark:border-white/10">
<div className="flex items-center px-2 py-1 text-xs font-medium border-b border-subtle dark:border-white/10 bg-background-secondary dark:bg-background-secondary text-foreground">
<Globe className="h-4 w-4 mr-2" />
<div className="flex-1">{isRunning ? `Navigating to` : `Navigated to`}: {url}</div>
{isRunning && (
<div className="flex items-center gap-2">
<span className="text-amber-500">Running</span>
<div className="h-2 w-2 rounded-full bg-amber-500 animate-pulse"></div>
</div>
)}
</div>
<div className="p-3 bg-card-bg dark:bg-background-secondary text-foreground">
<div className="space-y-2">
<div className="flex items-center gap-1 text-xs text-muted-foreground mb-1">
<Globe className="h-3 w-3" />
<span className="font-mono">{url}</span>
</div>
{/* Display VNC preview if available */}
{tag.vncPreview && (
<div className="mt-2 border border-subtle dark:border-white/10 rounded-md overflow-hidden">
<div className="text-xs bg-black text-white p-1">VNC Preview</div>
<div className="relative w-full h-[300px] overflow-hidden">
<iframe
src={tag.vncPreview}
title="Browser preview"
className="absolute top-0 left-0 border-0"
style={{
width: '200%',
height: '200%',
transform: 'scale(0.5)',
transformOrigin: '0 0'
}}
sandbox="allow-same-origin allow-scripts"
/>
</div>
</div>
)}
</div>
</div>
</div>
);
};
// Tool component registry
export const ToolComponentRegistry: Record<string, React.FC<ToolComponentProps>> = {
'create-file': CreateFileTool,
@ -471,6 +534,19 @@ export const ToolComponentRegistry: Record<string, React.FC<ToolComponentProps>>
'ask': NotifyTool, // Handle ask similar to notify for now
'complete': NotifyTool, // Handle complete similar to notify for now
'full-file-rewrite': FullFileRewriteTool,
'browser-navigate-to': BrowserNavigateTool,
'browser-click-element': BrowserNavigateTool,
'browser-input-text': BrowserNavigateTool,
'browser-go-back': BrowserNavigateTool,
'browser-wait': BrowserNavigateTool,
'browser-scroll-down': BrowserNavigateTool,
'browser-scroll-up': BrowserNavigateTool,
'browser-scroll-to-text': BrowserNavigateTool,
'browser-switch-tab': BrowserNavigateTool,
'browser-close-tab': BrowserNavigateTool,
'browser-get-dropdown-options': BrowserNavigateTool,
'browser-select-dropdown-option': BrowserNavigateTool,
'browser-drag-drop': BrowserNavigateTool,
};
// Helper function to get the appropriate component for a tag

View File

@ -80,8 +80,11 @@ export type Project = {
description: string;
account_id: string;
created_at: string;
sandbox_id?: string;
sandbox_pass?: string;
sandbox: {
vnc_preview?: string;
id?: string;
pass?: string;
};
}
export type Thread = {
@ -214,7 +217,8 @@ export const createProject = async (
name: data.name,
description: data.description || '',
account_id: data.account_id,
created_at: data.created_at
created_at: data.created_at,
sandbox: { id: "", pass: "", vnc_preview: "" }
};
};

View File

@ -13,6 +13,9 @@ export interface ParsedTag {
isToolCall?: boolean; // Whether this is a tool call (vs a result)
isPaired?: boolean; // Whether this tag has been paired with its call/result
status?: 'running' | 'completed' | 'error'; // Status of the tool call
// VNC preview for browser-related tools
vncPreview?: string; // VNC preview image URL
}
// Display mode for tool components
@ -37,7 +40,20 @@ export const SUPPORTED_XML_TAGS = [
'list-directory',
'search-code',
'complete',
'full-file-rewrite'
'full-file-rewrite',
'browser-navigate-to',
'browser-click-element',
'browser-input-text',
'browser-go-back',
'browser-wait',
'browser-scroll-down',
'browser-scroll-up',
'browser-scroll-to-text',
'browser-switch-tab',
'browser-close-tab',
'browser-get-dropdown-options',
'browser-select-dropdown-option',
'browser-drag-drop'
];
// Tool status labels