mirror of https://github.com/kortix-ai/suna.git
Merge pull request #36 from kortix-ai/bring-back-browser-use
Bring back using browser with pure playwright
This commit is contained in:
commit
f0d7392d3b
|
@ -57,6 +57,17 @@ You have the ability to execute operations using both Python and CLI tools:
|
|||
- Finding recent news, articles, and information beyond training data
|
||||
- Crawling webpage content for detailed information extraction
|
||||
|
||||
### 2.2.5 BROWSER TOOLS AND CAPABILITIES
|
||||
- BROWSER OPERATIONS:
|
||||
* Navigate to URLs and manage history
|
||||
* Fill forms and submit data
|
||||
* Click elements and interact with pages
|
||||
* Extract text and HTML content
|
||||
* Wait for elements to load
|
||||
* Scroll pages and handle infinite scroll
|
||||
* YOU CAN DO ANYTHING ON THE BROWSER - including clicking on elements, filling forms, submitting data, etc.
|
||||
* The browser is in a sandboxed environment, so nothing to worry about.
|
||||
|
||||
# 3. TOOLKIT & METHODOLOGY
|
||||
|
||||
## 3.1 TOOL SELECTION PRINCIPLES
|
||||
|
|
|
@ -12,6 +12,7 @@ from agentpress.thread_manager import ThreadManager
|
|||
from agentpress.response_processor import ProcessorConfig
|
||||
from agent.tools.sb_shell_tool import SandboxShellTool
|
||||
from agent.tools.sb_files_tool import SandboxFilesTool
|
||||
from agent.tools.sb_browser_tool import SandboxBrowserTool
|
||||
from agent.prompt import get_system_prompt
|
||||
from sandbox.sandbox import daytona, create_sandbox, get_or_start_sandbox
|
||||
from utils.billing import check_billing_status, get_account_id_from_thread
|
||||
|
@ -52,22 +53,28 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
|
|||
else:
|
||||
sandbox_pass = str(uuid4())
|
||||
sandbox = create_sandbox(sandbox_pass)
|
||||
print(f"\033[91m{sandbox.get_preview_link(6080)}/vnc_lite.html?password={sandbox_pass}\033[0m")
|
||||
sandbox_id = sandbox.id
|
||||
await client.table('projects').update({
|
||||
'sandbox': {
|
||||
'id': sandbox_id,
|
||||
'pass': sandbox_pass
|
||||
'pass': sandbox_pass,
|
||||
'vnc_preview': sandbox.get_preview_link(6080)
|
||||
}
|
||||
}).eq('project_id', project_id).execute()
|
||||
|
||||
# thread_manager.add_tool(SandboxBrowseTool, sandbox=sandbox)
|
||||
thread_manager.add_tool(SandboxShellTool, sandbox=sandbox)
|
||||
thread_manager.add_tool(SandboxFilesTool, sandbox=sandbox)
|
||||
thread_manager.add_tool(SandboxBrowserTool, sandbox=sandbox, thread_id=thread_id, thread_manager=thread_manager)
|
||||
thread_manager.add_tool(SandboxDeployTool, sandbox=sandbox)
|
||||
thread_manager.add_tool(MessageTool)
|
||||
thread_manager.add_tool(WebSearchTool)
|
||||
thread_manager.add_tool(SandboxDeployTool, sandbox=sandbox)
|
||||
|
||||
system_message = { "role": "system", "content": get_system_prompt() }
|
||||
xml_examples = ""
|
||||
for tag_name, example in thread_manager.tool_registry.get_xml_examples().items():
|
||||
xml_examples += f"{example}\n"
|
||||
|
||||
system_message = { "role": "system", "content": get_system_prompt() + "\n\n" + f"<tool_examples>\n{xml_examples}\n</tool_examples>" }
|
||||
|
||||
model_name = "anthropic/claude-3-7-sonnet-latest"
|
||||
# model_name = "groq/llama-3.3-70b-versatile"
|
||||
|
@ -108,6 +115,37 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
|
|||
print(f"Last message was from assistant, stopping execution")
|
||||
continue_execution = False
|
||||
break
|
||||
# Get the latest message from messages table that its tpye is browser_state
|
||||
|
||||
latest_browser_state = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'browser_state').order('created_at', desc=True).limit(1).execute()
|
||||
temporary_message = None
|
||||
if latest_browser_state.data and len(latest_browser_state.data) > 0:
|
||||
try:
|
||||
content = json.loads(latest_browser_state.data[0]["content"])
|
||||
screenshot_base64 = content["screenshot_base64"]
|
||||
# Create a copy of the browser state without screenshot
|
||||
browser_state = content.copy()
|
||||
browser_state.pop('screenshot_base64', None)
|
||||
browser_state.pop('screenshot_url', None)
|
||||
browser_state.pop('screenshot_url_base64', None)
|
||||
temporary_message = { "role": "user", "content": [] }
|
||||
if browser_state:
|
||||
temporary_message["content"].append({
|
||||
"type": "text",
|
||||
"text": f"The following is the current state of the browser:\n{browser_state}"
|
||||
})
|
||||
if screenshot_base64:
|
||||
temporary_message["content"].append({
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{screenshot_base64}",
|
||||
}
|
||||
})
|
||||
else:
|
||||
print("@@@@@ THIS TIME NO SCREENSHOT!!")
|
||||
except Exception as e:
|
||||
print(f"Error parsing browser state: {e}")
|
||||
# print(latest_browser_state.data[0])
|
||||
|
||||
response = await thread_manager.run_thread(
|
||||
thread_id=thread_id,
|
||||
|
@ -115,9 +153,10 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
|
|||
stream=stream,
|
||||
llm_model=model_name,
|
||||
llm_temperature=0,
|
||||
llm_max_tokens=64000,
|
||||
llm_max_tokens=128000,
|
||||
tool_choice="auto",
|
||||
max_xml_tool_calls=1,
|
||||
temporary_message=temporary_message,
|
||||
processor_config=ProcessorConfig(
|
||||
xml_tool_calling=True,
|
||||
native_tool_calling=False,
|
||||
|
|
|
@ -0,0 +1,846 @@
|
|||
import traceback
|
||||
import json
|
||||
|
||||
from agentpress.tool import ToolResult, openapi_schema, xml_schema
|
||||
from agentpress.thread_manager import ThreadManager
|
||||
from sandbox.sandbox import SandboxToolsBase, Sandbox
|
||||
from utils.logger import logger
|
||||
|
||||
|
||||
class SandboxBrowserTool(SandboxToolsBase):
|
||||
"""Tool for executing tasks in a Daytona sandbox with browser-use capabilities."""
|
||||
|
||||
def __init__(self, sandbox: Sandbox, thread_id: str, thread_manager: ThreadManager):
|
||||
super().__init__(sandbox)
|
||||
self.thread_id = thread_id
|
||||
self.thread_manager = thread_manager
|
||||
|
||||
async def _execute_browser_action(self, endpoint: str, params: dict = None, method: str = "POST") -> ToolResult:
|
||||
"""Execute a browser automation action through the API
|
||||
|
||||
Args:
|
||||
endpoint (str): The API endpoint to call
|
||||
params (dict, optional): Parameters to send. Defaults to None.
|
||||
method (str, optional): HTTP method to use. Defaults to "POST".
|
||||
|
||||
Returns:
|
||||
ToolResult: Result of the execution
|
||||
"""
|
||||
try:
|
||||
# Build the curl command
|
||||
url = f"http://localhost:8002/api/automation/{endpoint}"
|
||||
|
||||
if method == "GET" and params:
|
||||
query_params = "&".join([f"{k}={v}" for k, v in params.items()])
|
||||
url = f"{url}?{query_params}"
|
||||
curl_cmd = f"curl -s -X {method} '{url}' -H 'Content-Type: application/json'"
|
||||
else:
|
||||
curl_cmd = f"curl -s -X {method} '{url}' -H 'Content-Type: application/json'"
|
||||
if params:
|
||||
json_data = json.dumps(params)
|
||||
curl_cmd += f" -d '{json_data}'"
|
||||
|
||||
print(f"\033[95mExecuting curl command:\033[0m")
|
||||
print(f"{curl_cmd}")
|
||||
|
||||
response = self.sandbox.process.exec(curl_cmd, timeout=30)
|
||||
|
||||
if response.exit_code == 0:
|
||||
try:
|
||||
result = json.loads(response.result)
|
||||
|
||||
if not "content" in result:
|
||||
result["content"] = ""
|
||||
|
||||
if not "role" in result:
|
||||
result["role"] = "assistant"
|
||||
|
||||
logger.info("Browser automation request completed successfully")
|
||||
|
||||
# Add full result to thread messages for state tracking
|
||||
await self.thread_manager.add_message(
|
||||
thread_id=self.thread_id,
|
||||
type="browser_state",
|
||||
content=result,
|
||||
is_llm_message=False
|
||||
)
|
||||
|
||||
# Return tool-specific success response
|
||||
success_response = {
|
||||
"success": True,
|
||||
"message": result.get("message", "Browser action completed successfully")
|
||||
}
|
||||
|
||||
# Add relevant browser-specific info
|
||||
if result.get("url"):
|
||||
success_response["url"] = result["url"]
|
||||
if result.get("title"):
|
||||
success_response["title"] = result["title"]
|
||||
if result.get("element_count"):
|
||||
success_response["elements_found"] = result["element_count"]
|
||||
if result.get("pixels_below"):
|
||||
success_response["scrollable_content"] = result["pixels_below"] > 0
|
||||
|
||||
return self.success_response(success_response)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"Failed to parse response JSON: {response.result}")
|
||||
return self.fail_response(f"Failed to parse response JSON: {response.result}")
|
||||
else:
|
||||
logger.error(f"Browser automation request failed: {response.result}")
|
||||
return self.fail_response(f"Browser automation request failed: {response.result}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing browser action: {e}")
|
||||
print(traceback.format_exc())
|
||||
return self.fail_response(f"Error executing browser action: {e}")
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_navigate_to",
|
||||
"description": "Navigate to a specific url",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "The url to navigate to"
|
||||
}
|
||||
},
|
||||
"required": ["url"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-navigate-to",
|
||||
mappings=[
|
||||
{"param_name": "url", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-navigate-to>
|
||||
https://example.com
|
||||
</browser-navigate-to>
|
||||
'''
|
||||
)
|
||||
async def browser_navigate_to(self, url: str) -> ToolResult:
|
||||
"""Navigate to a specific url
|
||||
|
||||
Args:
|
||||
url (str): The url to navigate to
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mNavigating to: {url}\033[0m")
|
||||
return await self._execute_browser_action("navigate_to", {"url": url})
|
||||
|
||||
# @openapi_schema({
|
||||
# "type": "function",
|
||||
# "function": {
|
||||
# "name": "browser_search_google",
|
||||
# "description": "Search Google with the provided query",
|
||||
# "parameters": {
|
||||
# "type": "object",
|
||||
# "properties": {
|
||||
# "query": {
|
||||
# "type": "string",
|
||||
# "description": "The search query to use"
|
||||
# }
|
||||
# },
|
||||
# "required": ["query"]
|
||||
# }
|
||||
# }
|
||||
# })
|
||||
# @xml_schema(
|
||||
# tag_name="browser-search-google",
|
||||
# mappings=[
|
||||
# {"param_name": "query", "node_type": "content", "path": "."}
|
||||
# ],
|
||||
# example='''
|
||||
# <browser-search-google>
|
||||
# artificial intelligence news
|
||||
# </browser-search-google>
|
||||
# '''
|
||||
# )
|
||||
# async def browser_search_google(self, query: str) -> ToolResult:
|
||||
# """Search Google with the provided query
|
||||
|
||||
# Args:
|
||||
# query (str): The search query to use
|
||||
|
||||
# Returns:
|
||||
# dict: Result of the execution
|
||||
# """
|
||||
# print(f"\033[95mSearching Google for: {query}\033[0m")
|
||||
# return await self._execute_browser_action("search_google", {"query": query})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_go_back",
|
||||
"description": "Navigate back in browser history",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {}
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-go-back",
|
||||
mappings=[],
|
||||
example='''
|
||||
<browser-go-back></browser-go-back>
|
||||
'''
|
||||
)
|
||||
async def browser_go_back(self) -> ToolResult:
|
||||
"""Navigate back in browser history
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mNavigating back in browser history\033[0m")
|
||||
return await self._execute_browser_action("go_back", {})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_wait",
|
||||
"description": "Wait for the specified number of seconds",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"seconds": {
|
||||
"type": "integer",
|
||||
"description": "Number of seconds to wait (default: 3)"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-wait",
|
||||
mappings=[
|
||||
{"param_name": "seconds", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-wait>
|
||||
5
|
||||
</browser-wait>
|
||||
'''
|
||||
)
|
||||
async def browser_wait(self, seconds: int = 3) -> ToolResult:
|
||||
"""Wait for the specified number of seconds
|
||||
|
||||
Args:
|
||||
seconds (int, optional): Number of seconds to wait. Defaults to 3.
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mWaiting for {seconds} seconds\033[0m")
|
||||
return await self._execute_browser_action("wait", {"seconds": seconds})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_click_element",
|
||||
"description": "Click on an element by index",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "The index of the element to click"
|
||||
}
|
||||
},
|
||||
"required": ["index"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-click-element",
|
||||
mappings=[
|
||||
{"param_name": "index", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-click-element>
|
||||
2
|
||||
</browser-click-element>
|
||||
'''
|
||||
)
|
||||
async def browser_click_element(self, index: int) -> ToolResult:
|
||||
"""Click on an element by index
|
||||
|
||||
Args:
|
||||
index (int): The index of the element to click
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mClicking element with index: {index}\033[0m")
|
||||
return await self._execute_browser_action("click_element", {"index": index})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_input_text",
|
||||
"description": "Input text into an element",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "The index of the element to input text into"
|
||||
},
|
||||
"text": {
|
||||
"type": "string",
|
||||
"description": "The text to input"
|
||||
}
|
||||
},
|
||||
"required": ["index", "text"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-input-text",
|
||||
mappings=[
|
||||
{"param_name": "index", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "text", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-input-text index="2">
|
||||
Hello, world!
|
||||
</browser-input-text>
|
||||
'''
|
||||
)
|
||||
async def browser_input_text(self, index: int, text: str) -> ToolResult:
|
||||
"""Input text into an element
|
||||
|
||||
Args:
|
||||
index (int): The index of the element to input text into
|
||||
text (str): The text to input
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mInputting text into element {index}: {text}\033[0m")
|
||||
return await self._execute_browser_action("input_text", {"index": index, "text": text})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_send_keys",
|
||||
"description": "Send keyboard keys such as Enter, Escape, or keyboard shortcuts",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"keys": {
|
||||
"type": "string",
|
||||
"description": "The keys to send (e.g., 'Enter', 'Escape', 'Control+a')"
|
||||
}
|
||||
},
|
||||
"required": ["keys"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-send-keys",
|
||||
mappings=[
|
||||
{"param_name": "keys", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-send-keys>
|
||||
Enter
|
||||
</browser-send-keys>
|
||||
'''
|
||||
)
|
||||
async def browser_send_keys(self, keys: str) -> ToolResult:
|
||||
"""Send keyboard keys
|
||||
|
||||
Args:
|
||||
keys (str): The keys to send (e.g., 'Enter', 'Escape', 'Control+a')
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mSending keys: {keys}\033[0m")
|
||||
return await self._execute_browser_action("send_keys", {"keys": keys})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_switch_tab",
|
||||
"description": "Switch to a different browser tab",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"page_id": {
|
||||
"type": "integer",
|
||||
"description": "The ID of the tab to switch to"
|
||||
}
|
||||
},
|
||||
"required": ["page_id"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-switch-tab",
|
||||
mappings=[
|
||||
{"param_name": "page_id", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-switch-tab>
|
||||
1
|
||||
</browser-switch-tab>
|
||||
'''
|
||||
)
|
||||
async def browser_switch_tab(self, page_id: int) -> ToolResult:
|
||||
"""Switch to a different browser tab
|
||||
|
||||
Args:
|
||||
page_id (int): The ID of the tab to switch to
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mSwitching to tab: {page_id}\033[0m")
|
||||
return await self._execute_browser_action("switch_tab", {"page_id": page_id})
|
||||
|
||||
# @openapi_schema({
|
||||
# "type": "function",
|
||||
# "function": {
|
||||
# "name": "browser_open_tab",
|
||||
# "description": "Open a new browser tab with the specified URL",
|
||||
# "parameters": {
|
||||
# "type": "object",
|
||||
# "properties": {
|
||||
# "url": {
|
||||
# "type": "string",
|
||||
# "description": "The URL to open in the new tab"
|
||||
# }
|
||||
# },
|
||||
# "required": ["url"]
|
||||
# }
|
||||
# }
|
||||
# })
|
||||
# @xml_schema(
|
||||
# tag_name="browser-open-tab",
|
||||
# mappings=[
|
||||
# {"param_name": "url", "node_type": "content", "path": "."}
|
||||
# ],
|
||||
# example='''
|
||||
# <browser-open-tab>
|
||||
# https://example.com
|
||||
# </browser-open-tab>
|
||||
# '''
|
||||
# )
|
||||
# async def browser_open_tab(self, url: str) -> ToolResult:
|
||||
# """Open a new browser tab with the specified URL
|
||||
|
||||
# Args:
|
||||
# url (str): The URL to open in the new tab
|
||||
|
||||
# Returns:
|
||||
# dict: Result of the execution
|
||||
# """
|
||||
# print(f"\033[95mOpening new tab with URL: {url}\033[0m")
|
||||
# return await self._execute_browser_action("open_tab", {"url": url})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_close_tab",
|
||||
"description": "Close a browser tab",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"page_id": {
|
||||
"type": "integer",
|
||||
"description": "The ID of the tab to close"
|
||||
}
|
||||
},
|
||||
"required": ["page_id"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-close-tab",
|
||||
mappings=[
|
||||
{"param_name": "page_id", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-close-tab>
|
||||
1
|
||||
</browser-close-tab>
|
||||
'''
|
||||
)
|
||||
async def browser_close_tab(self, page_id: int) -> ToolResult:
|
||||
"""Close a browser tab
|
||||
|
||||
Args:
|
||||
page_id (int): The ID of the tab to close
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mClosing tab: {page_id}\033[0m")
|
||||
return await self._execute_browser_action("close_tab", {"page_id": page_id})
|
||||
|
||||
# @openapi_schema({
|
||||
# "type": "function",
|
||||
# "function": {
|
||||
# "name": "browser_extract_content",
|
||||
# "description": "Extract content from the current page based on the provided goal",
|
||||
# "parameters": {
|
||||
# "type": "object",
|
||||
# "properties": {
|
||||
# "goal": {
|
||||
# "type": "string",
|
||||
# "description": "The extraction goal (e.g., 'extract all links', 'find product information')"
|
||||
# }
|
||||
# },
|
||||
# "required": ["goal"]
|
||||
# }
|
||||
# }
|
||||
# })
|
||||
# @xml_schema(
|
||||
# tag_name="browser-extract-content",
|
||||
# mappings=[
|
||||
# {"param_name": "goal", "node_type": "content", "path": "."}
|
||||
# ],
|
||||
# example='''
|
||||
# <browser-extract-content>
|
||||
# Extract all links on the page
|
||||
# </browser-extract-content>
|
||||
# '''
|
||||
# )
|
||||
# async def browser_extract_content(self, goal: str) -> ToolResult:
|
||||
# """Extract content from the current page based on the provided goal
|
||||
|
||||
# Args:
|
||||
# goal (str): The extraction goal
|
||||
|
||||
# Returns:
|
||||
# dict: Result of the execution
|
||||
# """
|
||||
# print(f"\033[95mExtracting content with goal: {goal}\033[0m")
|
||||
# result = await self._execute_browser_action("extract_content", {"goal": goal})
|
||||
|
||||
# # Format content for better readability
|
||||
# if result.get("success"):
|
||||
# print(f"\033[92mContent extraction successful\033[0m")
|
||||
# content = result.data.get("content", "")
|
||||
# url = result.data.get("url", "")
|
||||
# title = result.data.get("title", "")
|
||||
|
||||
# if content:
|
||||
# content_preview = content[:200] + "..." if len(content) > 200 else content
|
||||
# print(f"\033[95mExtracted content from {title} ({url}):\033[0m")
|
||||
# print(f"\033[96m{content_preview}\033[0m")
|
||||
# print(f"\033[95mTotal content length: {len(content)} characters\033[0m")
|
||||
# else:
|
||||
# print(f"\033[93mNo content extracted from {url}\033[0m")
|
||||
# else:
|
||||
# print(f"\033[91mFailed to extract content: {result.data.get('error', 'Unknown error')}\033[0m")
|
||||
|
||||
# return result
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_scroll_down",
|
||||
"description": "Scroll down the page",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"amount": {
|
||||
"type": "integer",
|
||||
"description": "Pixel amount to scroll (if not specified, scrolls one page)"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-scroll-down",
|
||||
mappings=[
|
||||
{"param_name": "amount", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-scroll-down>
|
||||
500
|
||||
</browser-scroll-down>
|
||||
'''
|
||||
)
|
||||
async def browser_scroll_down(self, amount: int = None) -> ToolResult:
|
||||
"""Scroll down the page
|
||||
|
||||
Args:
|
||||
amount (int, optional): Pixel amount to scroll. If None, scrolls one page.
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
params = {}
|
||||
if amount is not None:
|
||||
params["amount"] = amount
|
||||
print(f"\033[95mScrolling down by {amount} pixels\033[0m")
|
||||
else:
|
||||
print(f"\033[95mScrolling down one page\033[0m")
|
||||
|
||||
return await self._execute_browser_action("scroll_down", params)
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_scroll_up",
|
||||
"description": "Scroll up the page",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"amount": {
|
||||
"type": "integer",
|
||||
"description": "Pixel amount to scroll (if not specified, scrolls one page)"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-scroll-up",
|
||||
mappings=[
|
||||
{"param_name": "amount", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-scroll-up>
|
||||
500
|
||||
</browser-scroll-up>
|
||||
'''
|
||||
)
|
||||
async def browser_scroll_up(self, amount: int = None) -> ToolResult:
|
||||
"""Scroll up the page
|
||||
|
||||
Args:
|
||||
amount (int, optional): Pixel amount to scroll. If None, scrolls one page.
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
params = {}
|
||||
if amount is not None:
|
||||
params["amount"] = amount
|
||||
print(f"\033[95mScrolling up by {amount} pixels\033[0m")
|
||||
else:
|
||||
print(f"\033[95mScrolling up one page\033[0m")
|
||||
|
||||
return await self._execute_browser_action("scroll_up", params)
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_scroll_to_text",
|
||||
"description": "Scroll to specific text on the page",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "string",
|
||||
"description": "The text to scroll to"
|
||||
}
|
||||
},
|
||||
"required": ["text"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-scroll-to-text",
|
||||
mappings=[
|
||||
{"param_name": "text", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-scroll-to-text>
|
||||
Contact Us
|
||||
</browser-scroll-to-text>
|
||||
'''
|
||||
)
|
||||
async def browser_scroll_to_text(self, text: str) -> ToolResult:
|
||||
"""Scroll to specific text on the page
|
||||
|
||||
Args:
|
||||
text (str): The text to scroll to
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mScrolling to text: {text}\033[0m")
|
||||
return await self._execute_browser_action("scroll_to_text", {"text": text})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_get_dropdown_options",
|
||||
"description": "Get all options from a dropdown element",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "The index of the dropdown element"
|
||||
}
|
||||
},
|
||||
"required": ["index"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-get-dropdown-options",
|
||||
mappings=[
|
||||
{"param_name": "index", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-get-dropdown-options>
|
||||
2
|
||||
</browser-get-dropdown-options>
|
||||
'''
|
||||
)
|
||||
async def browser_get_dropdown_options(self, index: int) -> ToolResult:
|
||||
"""Get all options from a dropdown element
|
||||
|
||||
Args:
|
||||
index (int): The index of the dropdown element
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution with the dropdown options
|
||||
"""
|
||||
print(f"\033[95mGetting options from dropdown with index: {index}\033[0m")
|
||||
return await self._execute_browser_action("get_dropdown_options", {"index": index})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_select_dropdown_option",
|
||||
"description": "Select an option from a dropdown by text",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "The index of the dropdown element"
|
||||
},
|
||||
"text": {
|
||||
"type": "string",
|
||||
"description": "The text of the option to select"
|
||||
}
|
||||
},
|
||||
"required": ["index", "text"]
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-select-dropdown-option",
|
||||
mappings=[
|
||||
{"param_name": "index", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "text", "node_type": "content", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-select-dropdown-option index="2">
|
||||
Option 1
|
||||
</browser-select-dropdown-option>
|
||||
'''
|
||||
)
|
||||
async def browser_select_dropdown_option(self, index: int, text: str) -> ToolResult:
|
||||
"""Select an option from a dropdown by text
|
||||
|
||||
Args:
|
||||
index (int): The index of the dropdown element
|
||||
text (str): The text of the option to select
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
print(f"\033[95mSelecting option '{text}' from dropdown with index: {index}\033[0m")
|
||||
return await self._execute_browser_action("select_dropdown_option", {"index": index, "text": text})
|
||||
|
||||
@openapi_schema({
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "browser_drag_drop",
|
||||
"description": "Perform drag and drop operation between elements or coordinates",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"element_source": {
|
||||
"type": "string",
|
||||
"description": "The source element selector"
|
||||
},
|
||||
"element_target": {
|
||||
"type": "string",
|
||||
"description": "The target element selector"
|
||||
},
|
||||
"coord_source_x": {
|
||||
"type": "integer",
|
||||
"description": "The source X coordinate"
|
||||
},
|
||||
"coord_source_y": {
|
||||
"type": "integer",
|
||||
"description": "The source Y coordinate"
|
||||
},
|
||||
"coord_target_x": {
|
||||
"type": "integer",
|
||||
"description": "The target X coordinate"
|
||||
},
|
||||
"coord_target_y": {
|
||||
"type": "integer",
|
||||
"description": "The target Y coordinate"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
@xml_schema(
|
||||
tag_name="browser-drag-drop",
|
||||
mappings=[
|
||||
{"param_name": "element_source", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "element_target", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "coord_source_x", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "coord_source_y", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "coord_target_x", "node_type": "attribute", "path": "."},
|
||||
{"param_name": "coord_target_y", "node_type": "attribute", "path": "."}
|
||||
],
|
||||
example='''
|
||||
<browser-drag-drop element_source="#draggable" element_target="#droppable"></browser-drag-drop>
|
||||
'''
|
||||
)
|
||||
async def browser_drag_drop(self, element_source: str = None, element_target: str = None,
|
||||
coord_source_x: int = None, coord_source_y: int = None,
|
||||
coord_target_x: int = None, coord_target_y: int = None) -> ToolResult:
|
||||
"""Perform drag and drop operation between elements or coordinates
|
||||
|
||||
Args:
|
||||
element_source (str, optional): The source element selector
|
||||
element_target (str, optional): The target element selector
|
||||
coord_source_x (int, optional): The source X coordinate
|
||||
coord_source_y (int, optional): The source Y coordinate
|
||||
coord_target_x (int, optional): The target X coordinate
|
||||
coord_target_y (int, optional): The target Y coordinate
|
||||
|
||||
Returns:
|
||||
dict: Result of the execution
|
||||
"""
|
||||
params = {}
|
||||
|
||||
if element_source and element_target:
|
||||
params["element_source"] = element_source
|
||||
params["element_target"] = element_target
|
||||
print(f"\033[95mDragging from element '{element_source}' to '{element_target}'\033[0m")
|
||||
elif all(coord is not None for coord in [coord_source_x, coord_source_y, coord_target_x, coord_target_y]):
|
||||
params["coord_source_x"] = coord_source_x
|
||||
params["coord_source_y"] = coord_source_y
|
||||
params["coord_target_x"] = coord_target_x
|
||||
params["coord_target_y"] = coord_target_y
|
||||
print(f"\033[95mDragging from coordinates ({coord_source_x}, {coord_source_y}) to ({coord_target_x}, {coord_target_y})\033[0m")
|
||||
else:
|
||||
return self.fail_response("Must provide either element selectors or coordinates for drag and drop")
|
||||
|
||||
return await self._execute_browser_action("drag_drop", params)
|
|
@ -94,7 +94,9 @@ COPY requirements.txt .
|
|||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy server script
|
||||
COPY . /app
|
||||
COPY server.py /app/server.py
|
||||
COPY browser_api.py /app/browser_api.py
|
||||
|
||||
# Install Playwright and browsers with system dependencies
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
|
@ -106,9 +108,6 @@ RUN playwright install chromium
|
|||
# Verify installation
|
||||
RUN python -c "from playwright.sync_api import sync_playwright; print('Playwright installation verified')"
|
||||
|
||||
# Copy the application code
|
||||
# COPY . .
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
from fastapi import FastAPI
|
||||
from automation_service import automation_service
|
||||
|
||||
# Create API app
|
||||
api_app = FastAPI()
|
||||
|
||||
@api_app.get("/api")
|
||||
async def health_check():
|
||||
return {"status": "ok", "message": "API server is running"}
|
||||
|
||||
# Include automation service router with /api prefix
|
||||
api_app.include_router(automation_service.router, prefix="/api")
|
||||
|
||||
# This is needed for the import string approach with uvicorn
|
||||
if __name__ == '__main__':
|
||||
import uvicorn
|
||||
print("Starting API server")
|
||||
uvicorn.run("api:api_app", host="0.0.0.0", port=8000)
|
|
@ -1,195 +0,0 @@
|
|||
import pyautogui
|
||||
import time
|
||||
import os
|
||||
import sys
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
import io
|
||||
import base64
|
||||
from PIL import Image
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from enum import Enum
|
||||
|
||||
# Set environment variable for the display if not already set
|
||||
if 'DISPLAY' not in os.environ:
|
||||
os.environ['DISPLAY'] = ':99'
|
||||
|
||||
# Try to initialize pyautogui with error handling
|
||||
try:
|
||||
pyautogui.FAILSAFE = False
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not initialize pyautogui: {e}", file=sys.stderr)
|
||||
print("This may be due to X11 authentication issues. Continuing anyway.", file=sys.stderr)
|
||||
|
||||
## Input Models
|
||||
|
||||
class MouseButton(str, Enum):
|
||||
left = "left"
|
||||
middle = "middle"
|
||||
right = "right"
|
||||
|
||||
class Position(BaseModel):
|
||||
x: Optional[int] = None
|
||||
y: Optional[int] = None
|
||||
|
||||
class MouseAction(BaseModel):
|
||||
x: Optional[int] = None
|
||||
y: Optional[int] = None
|
||||
clicks: Optional[int] = 1
|
||||
interval: Optional[float] = 0.0
|
||||
button: MouseButton = MouseButton.left
|
||||
duration: Optional[float] = 0.0
|
||||
|
||||
class KeyboardAction(BaseModel):
|
||||
key: str
|
||||
|
||||
class KeyboardPress(BaseModel):
|
||||
keys: Union[str, List[str]]
|
||||
presses: Optional[int] = 1
|
||||
interval: Optional[float] = 0.0
|
||||
|
||||
class WriteAction(BaseModel):
|
||||
message: str
|
||||
interval: Optional[float] = 0.0
|
||||
|
||||
class HotkeyAction(BaseModel):
|
||||
keys: List[str]
|
||||
interval: Optional[float] = 0.0
|
||||
|
||||
|
||||
class AutomationService:
|
||||
def __init__(self):
|
||||
self.router = APIRouter()
|
||||
|
||||
# Set fallback to avoid crashes
|
||||
pyautogui.FAILSAFE = False
|
||||
|
||||
# X error handling
|
||||
try:
|
||||
# Test if we can get the screen size
|
||||
self.screen_width, self.screen_height = pyautogui.size()
|
||||
print(f"Screen size detected: {self.screen_width}x{self.screen_height}")
|
||||
self.x11_available = True
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not get screen size: {e}", file=sys.stderr)
|
||||
print("X11 functionality may be limited. Using fallback values.", file=sys.stderr)
|
||||
self.screen_width = 1920
|
||||
self.screen_height = 1080
|
||||
self.x11_available = False
|
||||
|
||||
self.router.get("/automation/mouse/position")(self.get_mouse_position)
|
||||
self.router.post("/automation/mouse/move")(self.move_mouse)
|
||||
self.router.post("/automation/mouse/click")(self.click_mouse)
|
||||
self.router.post("/automation/mouse/down")(self.mouse_down)
|
||||
self.router.post("/automation/mouse/up")(self.mouse_up)
|
||||
self.router.post("/automation/mouse/drag")(self.drag_mouse)
|
||||
self.router.post("/automation/mouse/scroll")(self.scroll_mouse)
|
||||
self.router.post("/automation/keyboard/down")(self.key_down)
|
||||
self.router.post("/automation/keyboard/up")(self.key_up)
|
||||
self.router.post("/automation/keyboard/press")(self.press_key)
|
||||
self.router.post("/automation/keyboard/write")(self.write_text)
|
||||
self.router.post("/automation/keyboard/hotkey")(self.press_hotkey)
|
||||
self.router.post("/automation/screenshot")(self.take_screenshot)
|
||||
|
||||
async def get_mouse_position(self):
|
||||
try:
|
||||
x, y = pyautogui.position()
|
||||
return {"x": x, "y": y}
|
||||
except Exception as e:
|
||||
return {"error": str(e), "x": 0, "y": 0}
|
||||
|
||||
async def move_mouse(self, action: Position):
|
||||
try:
|
||||
pyautogui.moveTo(x=action.x, y=action.y)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def click_mouse(self, action: MouseAction):
|
||||
try:
|
||||
pyautogui.click(x=action.x, y=action.y, clicks=action.clicks,
|
||||
interval=action.interval, button=action.button,
|
||||
duration=action.duration)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def mouse_down(self, action: MouseAction):
|
||||
try:
|
||||
pyautogui.mouseDown(x=action.x, y=action.y,
|
||||
button=action.button, duration=action.duration)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def mouse_up(self, action: MouseAction):
|
||||
try:
|
||||
pyautogui.mouseUp(x=action.x, y=action.y,
|
||||
button=action.button, duration=action.duration)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def drag_mouse(self, action: MouseAction):
|
||||
try:
|
||||
pyautogui.dragTo(x=action.x, y=action.y,
|
||||
duration=action.duration, button=action.button)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def scroll_mouse(self, action: MouseAction):
|
||||
try:
|
||||
pyautogui.scroll(clicks=action.clicks, x=action.x, y=action.y)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def key_down(self, action: KeyboardAction):
|
||||
try:
|
||||
pyautogui.keyDown(action.key)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def key_up(self, action: KeyboardAction):
|
||||
try:
|
||||
pyautogui.keyUp(action.key)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def press_key(self, action: KeyboardPress):
|
||||
try:
|
||||
pyautogui.press(keys=action.keys, presses=action.presses,
|
||||
interval=action.interval)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def write_text(self, action: WriteAction):
|
||||
try:
|
||||
pyautogui.write(message=action.message, interval=action.interval)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def press_hotkey(self, action: HotkeyAction):
|
||||
try:
|
||||
pyautogui.hotkey(*action.keys, interval=action.interval)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def take_screenshot(self) -> Dict[str, str]:
|
||||
try:
|
||||
screenshot = pyautogui.screenshot()
|
||||
img_byte_arr = io.BytesIO()
|
||||
screenshot.save(img_byte_arr, format='PNG')
|
||||
img_byte_arr = img_byte_arr.getvalue()
|
||||
return {"image": base64.b64encode(img_byte_arr).decode()}
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
# Create a singleton instance
|
||||
automation_service = AutomationService()
|
File diff suppressed because it is too large
Load Diff
|
@ -6,7 +6,7 @@ services:
|
|||
dockerfile: ${DOCKERFILE:-Dockerfile}
|
||||
args:
|
||||
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
|
||||
image: kortixmarko/kortix-suna:0.0.5
|
||||
image: adamcohenhillel/kortix-suna:0.0.13
|
||||
ports:
|
||||
- "6080:6080" # noVNC web interface
|
||||
- "5901:5901" # VNC port
|
||||
|
|
|
@ -65,21 +65,6 @@ startretries=5
|
|||
startsecs=3
|
||||
depends_on=x11vnc
|
||||
|
||||
[program:persistent_browser]
|
||||
environment=START_URL="data:text/html,<html><body><h1>Browser Ready</h1></body></html>"
|
||||
command=bash -c "mkdir -p /app/data/chrome_data && sleep 8 && $(find /ms-playwright/chromium-*/chrome-linux -name chrome) --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 \"$START_URL\""
|
||||
autorestart=true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
priority=350
|
||||
startretries=5
|
||||
startsecs=10
|
||||
stopsignal=TERM
|
||||
stopwaitsecs=15
|
||||
depends_on=novnc
|
||||
|
||||
[program:http_server]
|
||||
command=python /app/server.py
|
||||
directory=/app
|
||||
|
@ -94,8 +79,8 @@ startsecs=5
|
|||
stopsignal=TERM
|
||||
stopwaitsecs=10
|
||||
|
||||
[program:api_server]
|
||||
command=python /app/api.py
|
||||
[program:browser_api]
|
||||
command=python /app/browser_api.py
|
||||
directory=/app
|
||||
autorestart=true
|
||||
stdout_logfile=/dev/stdout
|
||||
|
|
|
@ -121,11 +121,12 @@ def prepare_params(
|
|||
logger.debug(f"Added {len(tools)} tools to API parameters")
|
||||
|
||||
# # Add Claude-specific headers
|
||||
# if "claude" in model_name.lower() or "anthropic" in model_name.lower():
|
||||
# params["extra_headers"] = {
|
||||
# "anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"
|
||||
# }
|
||||
# logger.debug("Added Claude-specific headers")
|
||||
if "claude" in model_name.lower() or "anthropic" in model_name.lower():
|
||||
params["extra_headers"] = {
|
||||
# "anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"
|
||||
"anthropic-beta": "output-128k-2025-02-19"
|
||||
}
|
||||
logger.debug("Added Claude-specific headers")
|
||||
|
||||
# Add OpenRouter-specific parameters
|
||||
if model_name.startswith("openrouter/"):
|
||||
|
|
|
@ -4,9 +4,9 @@ from services.supabase import DBConnection
|
|||
|
||||
# Define subscription tiers and their monthly hour limits
|
||||
SUBSCRIPTION_TIERS = {
|
||||
'price_1RDQbOG6l1KZGqIrgrYzMbnL': {'name': 'free', 'hours': 1},
|
||||
'price_1RC2PYG6l1KZGqIrpbzFB9Lp': {'name': 'base', 'hours': 1},
|
||||
'price_1RDQWqG6l1KZGqIrChli4Ys4': {'name': 'extra', 'hours': 1}
|
||||
'price_1RDQbOG6l1KZGqIrgrYzMbnL': {'name': 'free', 'hours': 100},
|
||||
'price_1RC2PYG6l1KZGqIrpbzFB9Lp': {'name': 'base', 'hours': 100},
|
||||
'price_1RDQWqG6l1KZGqIrChli4Ys4': {'name': 'extra', 'hours': 100}
|
||||
}
|
||||
|
||||
async def get_account_subscription(client, account_id: str) -> Optional[Dict]:
|
||||
|
|
|
@ -282,6 +282,12 @@ export default function AgentPage({ params }: AgentPageProps) {
|
|||
part.isToolCall = !isUserMessage;
|
||||
part.status = part.isClosing ? 'completed' : 'running';
|
||||
|
||||
// Check if this is a browser-related tool and add VNC preview
|
||||
if (part.tagName.includes('browser') && agent?.sandbox?.vnc_preview) {
|
||||
console.log(`[TOOLS] Adding VNC preview from sandbox to browser tool ${part.tagName}`);
|
||||
part.vncPreview = agent.sandbox.vnc_preview + "/vnc_lite.html?password=" + agent.sandbox.pass;
|
||||
}
|
||||
|
||||
// Use ID for deduplication
|
||||
if (!seenTagIds.has(part.id)) {
|
||||
seenTagIds.add(part.id);
|
||||
|
@ -307,6 +313,12 @@ export default function AgentPage({ params }: AgentPageProps) {
|
|||
tag.isToolCall = !isUserMessage;
|
||||
tag.status = tag.isClosing ? 'completed' : 'running';
|
||||
|
||||
// Check if this is a browser-related tool and add VNC preview
|
||||
if (tag.tagName.includes('browser') && agent?.sandbox?.vnc_preview) {
|
||||
console.log(`[TOOLS] Adding VNC preview from sandbox to browser tool ${tag.tagName}`);
|
||||
tag.vncPreview = agent.sandbox.vnc_preview + "/vnc_lite.html?password=" + agent.sandbox.pass;
|
||||
}
|
||||
|
||||
// Use ID for deduplication
|
||||
if (!seenTagIds.has(tag.id)) {
|
||||
seenTagIds.add(tag.id);
|
||||
|
@ -381,7 +393,7 @@ export default function AgentPage({ params }: AgentPageProps) {
|
|||
|
||||
// Update tool calls in the shared context
|
||||
setToolCalls(pairedTags);
|
||||
}, [messages, streamContent, setToolCalls]);
|
||||
}, [messages, streamContent, setToolCalls, agent]);
|
||||
|
||||
// Scroll to bottom of messages
|
||||
const scrollToBottom = useCallback(() => {
|
||||
|
@ -752,6 +764,10 @@ export default function AgentPage({ params }: AgentPageProps) {
|
|||
<>
|
||||
{messages.map((message, index) => {
|
||||
// Skip messages containing "ToolResult("
|
||||
if (!message || !message?.content || !message?.role) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (message.content.includes("ToolResult(")) {
|
||||
return null;
|
||||
}
|
||||
|
@ -927,6 +943,9 @@ export default function AgentPage({ params }: AgentPageProps) {
|
|||
<>
|
||||
{messages.map((message, index) => {
|
||||
// Skip messages containing "ToolResult("
|
||||
if (!message || !message?.content || !message?.role) {
|
||||
return null;
|
||||
}
|
||||
if (message.content.includes("ToolResult(")) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -16,12 +16,12 @@ export const SUBSCRIPTION_PLANS = {
|
|||
const PLAN_DETAILS = {
|
||||
[SUBSCRIPTION_PLANS.FREE]: {
|
||||
name: 'Free',
|
||||
limit: 1,
|
||||
limit: 100,
|
||||
price: 0
|
||||
},
|
||||
[SUBSCRIPTION_PLANS.BASIC]: {
|
||||
name: 'Basic',
|
||||
limit: 10,
|
||||
limit: 100,
|
||||
price: 10
|
||||
},
|
||||
[SUBSCRIPTION_PLANS.PRO]: {
|
||||
|
|
|
@ -4,7 +4,7 @@ import React from 'react';
|
|||
import { ParsedTag, ToolComponentProps } from '@/lib/types/tool-calls';
|
||||
import {
|
||||
File, FileText, Terminal, FolderPlus, Folder, Code, Search as SearchIcon,
|
||||
Bell, Replace, Plus, Minus
|
||||
Bell, Replace, Plus, Minus, Globe, Search
|
||||
} from 'lucide-react';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { diffLines } from 'diff';
|
||||
|
@ -458,6 +458,128 @@ export const SearchCodeTool: React.FC<ToolComponentProps> = ({ tag, mode }) => {
|
|||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Browser Navigate Tool Component
|
||||
*/
|
||||
export const BrowserNavigateTool: React.FC<ToolComponentProps> = ({ tag, mode }) => {
|
||||
const url = tag.content || '';
|
||||
const isRunning = tag.status === 'running';
|
||||
|
||||
if (mode === 'compact') {
|
||||
return (
|
||||
<CompactToolDisplay
|
||||
icon={<Globe className="h-4 w-4 mr-2" />}
|
||||
name={isRunning ? "Navigating to" : "Navigated to"}
|
||||
input={url}
|
||||
isRunning={isRunning}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="border rounded-lg overflow-hidden border-subtle dark:border-white/10">
|
||||
<div className="flex items-center px-2 py-1 text-xs font-medium border-b border-subtle dark:border-white/10 bg-background-secondary dark:bg-background-secondary text-foreground">
|
||||
<Globe className="h-4 w-4 mr-2" />
|
||||
<div className="flex-1">{isRunning ? `Navigating to` : `Navigated to`}: {url}</div>
|
||||
{isRunning && (
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-amber-500">Running</span>
|
||||
<div className="h-2 w-2 rounded-full bg-amber-500 animate-pulse"></div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="p-3 bg-card-bg dark:bg-background-secondary text-foreground">
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center gap-1 text-xs text-muted-foreground mb-1">
|
||||
<Globe className="h-3 w-3" />
|
||||
<span className="font-mono">{url}</span>
|
||||
</div>
|
||||
|
||||
{/* Display VNC preview if available */}
|
||||
{tag.vncPreview && (
|
||||
<div className="mt-2 border border-subtle dark:border-white/10 rounded-md overflow-hidden">
|
||||
<div className="text-xs bg-black text-white p-1">VNC Preview</div>
|
||||
<div className="relative w-full h-[300px] overflow-hidden">
|
||||
<iframe
|
||||
src={tag.vncPreview}
|
||||
title="Browser preview"
|
||||
className="absolute top-0 left-0 border-0"
|
||||
style={{
|
||||
width: '200%',
|
||||
height: '200%',
|
||||
transform: 'scale(0.5)',
|
||||
transformOrigin: '0 0'
|
||||
}}
|
||||
sandbox="allow-same-origin allow-scripts"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Web Search Tool Component
|
||||
*/
|
||||
export const WebSearchTool: React.FC<ToolComponentProps> = ({ tag, mode }) => {
|
||||
const query = tag.attributes.query || '';
|
||||
const isRunning = tag.status === 'running';
|
||||
|
||||
if (mode === 'compact') {
|
||||
return (
|
||||
<CompactToolDisplay
|
||||
icon={<Search className="h-4 w-4 mr-2" />}
|
||||
name={isRunning ? "Web search in progress..." : "Web search complete"}
|
||||
input={query}
|
||||
isRunning={isRunning}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
const results = tag.result?.output ? JSON.parse(tag.result.output) : [];
|
||||
|
||||
return (
|
||||
<div className="border rounded-lg overflow-hidden border-subtle dark:border-white/10">
|
||||
<div className="flex items-center px-2 py-1 text-xs font-medium border-b border-subtle dark:border-white/10 bg-background-secondary dark:bg-background-secondary text-foreground">
|
||||
<Search className="h-4 w-4 mr-2" />
|
||||
<div className="flex-1">Web Search: {query}</div>
|
||||
{isRunning && (
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-amber-500">Searching</span>
|
||||
<div className="h-2 w-2 rounded-full bg-amber-500 animate-pulse"></div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="p-3 bg-card-bg dark:bg-background-secondary text-foreground">
|
||||
{results.length > 0 ? (
|
||||
<div className="space-y-3">
|
||||
{results.map((result: any, index: number) => (
|
||||
<div key={index} className="text-sm">
|
||||
<a href={result.URL} target="_blank" rel="noopener noreferrer" className="font-medium text-blue-600 hover:underline">
|
||||
{result.Title}
|
||||
</a>
|
||||
<div className="text-xs text-muted-foreground mt-1">
|
||||
{result.URL}
|
||||
{result['Published Date'] && (
|
||||
<span className="ml-2">
|
||||
({new Date(result['Published Date']).toLocaleDateString()})
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-sm text-muted-foreground">No results found</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
// Tool component registry
|
||||
export const ToolComponentRegistry: Record<string, React.FC<ToolComponentProps>> = {
|
||||
'create-file': CreateFileTool,
|
||||
|
@ -471,10 +593,28 @@ export const ToolComponentRegistry: Record<string, React.FC<ToolComponentProps>>
|
|||
'ask': NotifyTool, // Handle ask similar to notify for now
|
||||
'complete': NotifyTool, // Handle complete similar to notify for now
|
||||
'full-file-rewrite': FullFileRewriteTool,
|
||||
'browser-navigate-to': BrowserNavigateTool,
|
||||
'browser-click-element': BrowserNavigateTool,
|
||||
'browser-input-text': BrowserNavigateTool,
|
||||
'browser-go-back': BrowserNavigateTool,
|
||||
'browser-wait': BrowserNavigateTool,
|
||||
'browser-scroll-down': BrowserNavigateTool,
|
||||
'browser-scroll-up': BrowserNavigateTool,
|
||||
'browser-scroll-to-text': BrowserNavigateTool,
|
||||
'browser-switch-tab': BrowserNavigateTool,
|
||||
'browser-close-tab': BrowserNavigateTool,
|
||||
'browser-get-dropdown-options': BrowserNavigateTool,
|
||||
'browser-select-dropdown-option': BrowserNavigateTool,
|
||||
'browser-drag-drop': BrowserNavigateTool,
|
||||
'web-search': WebSearchTool,
|
||||
};
|
||||
|
||||
// Helper function to get the appropriate component for a tag
|
||||
export function getComponentForTag(tag: ParsedTag): React.FC<ToolComponentProps> {
|
||||
console.log("getComponentForTag", tag);
|
||||
if (!tag || !tag?.tagName) {
|
||||
console.warn(`No tag name for tag: ${tag}`);
|
||||
}
|
||||
if (!ToolComponentRegistry[tag.tagName]) {
|
||||
console.warn(`No component registered for tag type: ${tag.tagName}`);
|
||||
}
|
||||
|
|
|
@ -175,29 +175,3 @@ export function useToolsPanel() {
|
|||
prevTool,
|
||||
};
|
||||
}
|
||||
|
||||
// Helper function to get a friendly title for a tool call
|
||||
function getToolTitle(tag: ParsedTag): string {
|
||||
switch (tag.tagName) {
|
||||
case 'create-file':
|
||||
return `Creating file: ${tag.attributes.file_path || ''}`;
|
||||
case 'read-file':
|
||||
return `Reading file: ${tag.attributes.file_path || ''}`;
|
||||
case 'execute-command':
|
||||
return `Executing: ${tag.attributes.command || ''}`;
|
||||
case 'create-directory':
|
||||
return `Creating directory: ${tag.attributes.path || ''}`;
|
||||
case 'list-directory':
|
||||
return `Listing directory: ${tag.attributes.path || ''}`;
|
||||
case 'search-code':
|
||||
return `Searching code: ${tag.attributes.query || ''}`;
|
||||
case 'notify':
|
||||
return `Notification: ${tag.attributes.message || ''}`;
|
||||
case 'str-replace':
|
||||
return `String replace: ${tag.attributes.pattern || ''}`;
|
||||
case 'full-file-rewrite':
|
||||
return `Full file rewrite: ${tag.attributes.file_path || ''}`;
|
||||
default:
|
||||
return `${tag.tagName} operation`;
|
||||
}
|
||||
}
|
|
@ -80,8 +80,11 @@ export type Project = {
|
|||
description: string;
|
||||
account_id: string;
|
||||
created_at: string;
|
||||
sandbox_id?: string;
|
||||
sandbox_pass?: string;
|
||||
sandbox: {
|
||||
vnc_preview?: string;
|
||||
id?: string;
|
||||
pass?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export type Thread = {
|
||||
|
@ -214,7 +217,8 @@ export const createProject = async (
|
|||
name: data.name,
|
||||
description: data.description || '',
|
||||
account_id: data.account_id,
|
||||
created_at: data.created_at
|
||||
created_at: data.created_at,
|
||||
sandbox: { id: "", pass: "", vnc_preview: "" }
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -13,6 +13,9 @@ export interface ParsedTag {
|
|||
isToolCall?: boolean; // Whether this is a tool call (vs a result)
|
||||
isPaired?: boolean; // Whether this tag has been paired with its call/result
|
||||
status?: 'running' | 'completed' | 'error'; // Status of the tool call
|
||||
|
||||
// VNC preview for browser-related tools
|
||||
vncPreview?: string; // VNC preview image URL
|
||||
}
|
||||
|
||||
// Display mode for tool components
|
||||
|
@ -37,7 +40,21 @@ export const SUPPORTED_XML_TAGS = [
|
|||
'list-directory',
|
||||
'search-code',
|
||||
'complete',
|
||||
'full-file-rewrite'
|
||||
'full-file-rewrite',
|
||||
'browser-navigate-to',
|
||||
'browser-click-element',
|
||||
'browser-input-text',
|
||||
'browser-go-back',
|
||||
'browser-wait',
|
||||
'browser-scroll-down',
|
||||
'browser-scroll-up',
|
||||
'browser-scroll-to-text',
|
||||
'browser-switch-tab',
|
||||
'browser-close-tab',
|
||||
'browser-get-dropdown-options',
|
||||
'browser-select-dropdown-option',
|
||||
'browser-drag-drop',
|
||||
'web-search'
|
||||
];
|
||||
|
||||
// Tool status labels
|
||||
|
|
Loading…
Reference in New Issue