mirror of https://github.com/kortix-ai/suna.git
fuck yeah
This commit is contained in:
parent
176e28dc67
commit
e51b1076a7
|
@ -65,7 +65,8 @@ You have the ability to execute operations using both Python and CLI tools:
|
||||||
* Extract text and HTML content
|
* Extract text and HTML content
|
||||||
* Wait for elements to load
|
* Wait for elements to load
|
||||||
* Scroll pages and handle infinite scroll
|
* Scroll pages and handle infinite scroll
|
||||||
|
* YOU CAN DO ANYTHING ON THE BROWSER - including clicking on elements, filling forms, submitting data, etc.
|
||||||
|
* The browser is in a sandboxed environment, so nothing to worry about.
|
||||||
|
|
||||||
# 3. TOOLKIT & METHODOLOGY
|
# 3. TOOLKIT & METHODOLOGY
|
||||||
|
|
||||||
|
|
|
@ -63,12 +63,12 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
|
||||||
}
|
}
|
||||||
}).eq('project_id', project_id).execute()
|
}).eq('project_id', project_id).execute()
|
||||||
|
|
||||||
# thread_manager.add_tool(SandboxShellTool, sandbox=sandbox)
|
thread_manager.add_tool(SandboxShellTool, sandbox=sandbox)
|
||||||
# thread_manager.add_tool(SandboxFilesTool, sandbox=sandbox)
|
thread_manager.add_tool(SandboxFilesTool, sandbox=sandbox)
|
||||||
thread_manager.add_tool(SandboxBrowserTool, sandbox=sandbox)
|
thread_manager.add_tool(SandboxBrowserTool, sandbox=sandbox, thread_id=thread_id, thread_manager=thread_manager)
|
||||||
# thread_manager.add_tool(SandboxDeployTool, sandbox=sandbox)
|
thread_manager.add_tool(SandboxDeployTool, sandbox=sandbox)
|
||||||
# thread_manager.add_tool(MessageTool)
|
thread_manager.add_tool(MessageTool)
|
||||||
# thread_manager.add_tool(WebSearchTool)
|
thread_manager.add_tool(WebSearchTool)
|
||||||
|
|
||||||
xml_examples = ""
|
xml_examples = ""
|
||||||
for tag_name, example in thread_manager.tool_registry.get_xml_examples().items():
|
for tag_name, example in thread_manager.tool_registry.get_xml_examples().items():
|
||||||
|
@ -116,11 +116,36 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
|
||||||
continue_execution = False
|
continue_execution = False
|
||||||
break
|
break
|
||||||
# Get the latest message from messages table that its tpye is browser_state
|
# Get the latest message from messages table that its tpye is browser_state
|
||||||
|
|
||||||
latest_browser_state = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'browser_state').order('created_at', desc=True).limit(1).execute()
|
latest_browser_state = await client.table('messages').select('*').eq('thread_id', thread_id).eq('type', 'browser_state').order('created_at', desc=True).limit(1).execute()
|
||||||
|
temporary_message = None
|
||||||
if latest_browser_state.data and len(latest_browser_state.data) > 0:
|
if latest_browser_state.data and len(latest_browser_state.data) > 0:
|
||||||
temporary_message = latest_browser_state.data[0].get('content', '')
|
try:
|
||||||
else:
|
content = json.loads(latest_browser_state.data[0]["content"])
|
||||||
temporary_message = None
|
screenshot_base64 = content["screenshot_base64"]
|
||||||
|
# Create a copy of the browser state without screenshot
|
||||||
|
browser_state = content.copy()
|
||||||
|
browser_state.pop('screenshot_base64', None)
|
||||||
|
browser_state.pop('screenshot_url', None)
|
||||||
|
browser_state.pop('screenshot_url_base64', None)
|
||||||
|
temporary_message = { "role": "user", "content": [] }
|
||||||
|
if browser_state:
|
||||||
|
temporary_message["content"].append({
|
||||||
|
"type": "text",
|
||||||
|
"text": f"The following is the current state of the browser:\n{browser_state}"
|
||||||
|
})
|
||||||
|
if screenshot_base64:
|
||||||
|
temporary_message["content"].append({
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": f"data:image/jpeg;base64,{screenshot_base64}",
|
||||||
|
}
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
print("@@@@@ THIS TIME NO SCREENSHOT!!")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error parsing browser state: {e}")
|
||||||
|
# print(latest_browser_state.data[0])
|
||||||
|
|
||||||
response = await thread_manager.run_thread(
|
response = await thread_manager.run_thread(
|
||||||
thread_id=thread_id,
|
thread_id=thread_id,
|
||||||
|
@ -131,7 +156,7 @@ async def run_agent(thread_id: str, project_id: str, stream: bool = True, thread
|
||||||
llm_max_tokens=64000,
|
llm_max_tokens=64000,
|
||||||
tool_choice="auto",
|
tool_choice="auto",
|
||||||
max_xml_tool_calls=1,
|
max_xml_tool_calls=1,
|
||||||
# temporary_message=
|
temporary_message=temporary_message,
|
||||||
processor_config=ProcessorConfig(
|
processor_config=ProcessorConfig(
|
||||||
xml_tool_calling=True,
|
xml_tool_calling=True,
|
||||||
native_tool_calling=False,
|
native_tool_calling=False,
|
||||||
|
|
|
@ -2,6 +2,7 @@ import traceback
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from agentpress.tool import ToolResult, openapi_schema, xml_schema
|
from agentpress.tool import ToolResult, openapi_schema, xml_schema
|
||||||
|
from agentpress.thread_manager import ThreadManager
|
||||||
from sandbox.sandbox import SandboxToolsBase, Sandbox
|
from sandbox.sandbox import SandboxToolsBase, Sandbox
|
||||||
from utils.logger import logger
|
from utils.logger import logger
|
||||||
|
|
||||||
|
@ -9,8 +10,10 @@ from utils.logger import logger
|
||||||
class SandboxBrowserTool(SandboxToolsBase):
|
class SandboxBrowserTool(SandboxToolsBase):
|
||||||
"""Tool for executing tasks in a Daytona sandbox with browser-use capabilities."""
|
"""Tool for executing tasks in a Daytona sandbox with browser-use capabilities."""
|
||||||
|
|
||||||
def __init__(self, sandbox: Sandbox):
|
def __init__(self, sandbox: Sandbox, thread_id: str, thread_manager: ThreadManager):
|
||||||
super().__init__(sandbox)
|
super().__init__(sandbox)
|
||||||
|
self.thread_id = thread_id
|
||||||
|
self.thread_manager = thread_manager
|
||||||
|
|
||||||
async def _execute_browser_action(self, endpoint: str, params: dict = None, method: str = "POST") -> ToolResult:
|
async def _execute_browser_action(self, endpoint: str, params: dict = None, method: str = "POST") -> ToolResult:
|
||||||
"""Execute a browser automation action through the API
|
"""Execute a browser automation action through the API
|
||||||
|
@ -45,43 +48,40 @@ class SandboxBrowserTool(SandboxToolsBase):
|
||||||
if response.exit_code == 0:
|
if response.exit_code == 0:
|
||||||
try:
|
try:
|
||||||
result = json.loads(response.result)
|
result = json.loads(response.result)
|
||||||
|
|
||||||
|
if not "content" in result:
|
||||||
|
result["content"] = ""
|
||||||
|
|
||||||
|
if not "role" in result:
|
||||||
|
result["role"] = "assistant"
|
||||||
|
|
||||||
logger.info("Browser automation request completed successfully")
|
logger.info("Browser automation request completed successfully")
|
||||||
|
|
||||||
# Create a cleaned version of the result based on BrowserActionResult schema
|
# Add full result to thread messages for state tracking
|
||||||
cleaned_result = {
|
await self.thread_manager.add_message(
|
||||||
"success": result.get("success", False),
|
thread_id=self.thread_id,
|
||||||
"message": result.get("message", ""),
|
type="browser_state",
|
||||||
"error": result.get("error", ""),
|
content=result,
|
||||||
"url": result.get("url"),
|
is_llm_message=False
|
||||||
"title": result.get("title"),
|
)
|
||||||
"elements": result.get("elements"),
|
|
||||||
"pixels_above": result.get("pixels_above", 0),
|
# Return tool-specific success response
|
||||||
"pixels_below": result.get("pixels_below", 0),
|
success_response = {
|
||||||
"content": result.get("content"),
|
"success": True,
|
||||||
"element_count": result.get("element_count", 0),
|
"message": result.get("message", "Browser action completed successfully")
|
||||||
"interactive_elements": result.get("interactive_elements"),
|
|
||||||
"viewport_width": result.get("viewport_width"),
|
|
||||||
"viewport_height": result.get("viewport_height")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Print screenshot info to console but don't return it
|
# Add relevant browser-specific info
|
||||||
if "screenshot_base64" in result:
|
if result.get("url"):
|
||||||
has_screenshot = bool(result.get("screenshot_base64"))
|
success_response["url"] = result["url"]
|
||||||
print(f"\033[95mScreenshot captured: {has_screenshot}\033[0m")
|
if result.get("title"):
|
||||||
|
success_response["title"] = result["title"]
|
||||||
|
if result.get("element_count"):
|
||||||
|
success_response["elements_found"] = result["element_count"]
|
||||||
|
if result.get("pixels_below"):
|
||||||
|
success_response["scrollable_content"] = result["pixels_below"] > 0
|
||||||
|
|
||||||
# Print viewport info if available
|
return self.success_response(success_response)
|
||||||
if cleaned_result["viewport_width"] and cleaned_result["viewport_height"]:
|
|
||||||
print(f"\033[95mViewport size: {cleaned_result['viewport_width']}x{cleaned_result['viewport_height']}\033[0m")
|
|
||||||
|
|
||||||
# Print interactive elements count
|
|
||||||
if cleaned_result["element_count"] > 0:
|
|
||||||
print(f"\033[95mFound {cleaned_result['element_count']} interactive elements\033[0m")
|
|
||||||
|
|
||||||
print("************************************************")
|
|
||||||
print(cleaned_result)
|
|
||||||
print("************************************************")
|
|
||||||
|
|
||||||
return self.success_response(cleaned_result)
|
|
||||||
|
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.error(f"Failed to parse response JSON: {response.result}")
|
logger.error(f"Failed to parse response JSON: {response.result}")
|
||||||
|
|
|
@ -764,6 +764,10 @@ export default function AgentPage({ params }: AgentPageProps) {
|
||||||
<>
|
<>
|
||||||
{messages.map((message, index) => {
|
{messages.map((message, index) => {
|
||||||
// Skip messages containing "ToolResult("
|
// Skip messages containing "ToolResult("
|
||||||
|
if (!message || !message?.content || !message?.role) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
if (message.content.includes("ToolResult(")) {
|
if (message.content.includes("ToolResult(")) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -939,6 +943,9 @@ export default function AgentPage({ params }: AgentPageProps) {
|
||||||
<>
|
<>
|
||||||
{messages.map((message, index) => {
|
{messages.map((message, index) => {
|
||||||
// Skip messages containing "ToolResult("
|
// Skip messages containing "ToolResult("
|
||||||
|
if (!message || !message?.content || !message?.role) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
if (message.content.includes("ToolResult(")) {
|
if (message.content.includes("ToolResult(")) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -175,29 +175,3 @@ export function useToolsPanel() {
|
||||||
prevTool,
|
prevTool,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function to get a friendly title for a tool call
|
|
||||||
function getToolTitle(tag: ParsedTag): string {
|
|
||||||
switch (tag.tagName) {
|
|
||||||
case 'create-file':
|
|
||||||
return `Creating file: ${tag.attributes.file_path || ''}`;
|
|
||||||
case 'read-file':
|
|
||||||
return `Reading file: ${tag.attributes.file_path || ''}`;
|
|
||||||
case 'execute-command':
|
|
||||||
return `Executing: ${tag.attributes.command || ''}`;
|
|
||||||
case 'create-directory':
|
|
||||||
return `Creating directory: ${tag.attributes.path || ''}`;
|
|
||||||
case 'list-directory':
|
|
||||||
return `Listing directory: ${tag.attributes.path || ''}`;
|
|
||||||
case 'search-code':
|
|
||||||
return `Searching code: ${tag.attributes.query || ''}`;
|
|
||||||
case 'notify':
|
|
||||||
return `Notification: ${tag.attributes.message || ''}`;
|
|
||||||
case 'str-replace':
|
|
||||||
return `String replace: ${tag.attributes.pattern || ''}`;
|
|
||||||
case 'full-file-rewrite':
|
|
||||||
return `Full file rewrite: ${tag.attributes.file_path || ''}`;
|
|
||||||
default:
|
|
||||||
return `${tag.tagName} operation`;
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue