From 70755d30745ff0c4e5828f287755657013620f15 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Mon, 19 May 2025 01:06:44 +0200 Subject: [PATCH] save all browser states --- backend/agent/run.py | 1 - backend/agent/tools/sb_browser_tool.py | 7 +--- backend/agentpress/response_processor.py | 8 ++-- .../thread/tool-views/BrowserToolView.tsx | 37 ++++++++++++------- 4 files changed, 29 insertions(+), 24 deletions(-) diff --git a/backend/agent/run.py b/backend/agent/run.py index 3301e498..59fd5e01 100644 --- a/backend/agent/run.py +++ b/backend/agent/run.py @@ -154,7 +154,6 @@ async def run_agent( else: logger.warning("Browser state found but no screenshot data.") - await client.table('messages').delete().eq('message_id', latest_browser_state_msg.data[0]["message_id"]).execute() except Exception as e: logger.error(f"Error parsing browser state: {e}") diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py index ce9130ec..eaeeac6a 100644 --- a/backend/agent/tools/sb_browser_tool.py +++ b/backend/agent/tools/sb_browser_tool.py @@ -59,7 +59,6 @@ class SandboxBrowserTool(SandboxToolsBase): logger.info("Browser automation request completed successfully") - # Add full result to thread messages for state tracking added_message = await self.thread_manager.add_message( thread_id=self.thread_id, type="browser_state", @@ -67,17 +66,13 @@ class SandboxBrowserTool(SandboxToolsBase): is_llm_message=False ) - # Return tool-specific success response success_response = { "success": True, "message": result.get("message", "Browser action completed successfully") } - # Add message ID if available if added_message and 'message_id' in added_message: success_response['message_id'] = added_message['message_id'] - - # Add relevant browser-specific info if result.get("url"): success_response["url"] = result["url"] if result.get("title"): @@ -86,7 +81,6 @@ class SandboxBrowserTool(SandboxToolsBase): success_response["elements_found"] = result["element_count"] if result.get("pixels_below"): success_response["scrollable_content"] = result["pixels_below"] > 0 - # Add OCR text when available if result.get("ocr_text"): success_response["ocr_text"] = result["ocr_text"] @@ -104,6 +98,7 @@ class SandboxBrowserTool(SandboxToolsBase): logger.debug(traceback.format_exc()) return self.fail_response(f"Error executing browser action: {e}") + @openapi_schema({ "type": "function", "function": { diff --git a/backend/agentpress/response_processor.py b/backend/agentpress/response_processor.py index ea6e028a..58cdaf83 100644 --- a/backend/agentpress/response_processor.py +++ b/backend/agentpress/response_processor.py @@ -978,7 +978,7 @@ class ResponseProcessor: if value is not None: params[mapping.param_name] = value parsing_details["attributes"][mapping.param_name] = value # Store raw attribute - logger.info(f"Found attribute {mapping.param_name}: {value}") + # logger.info(f"Found attribute {mapping.param_name}: {value}") elif mapping.node_type == "element": # Extract element content @@ -986,7 +986,7 @@ class ResponseProcessor: if content is not None: params[mapping.param_name] = content.strip() parsing_details["elements"][mapping.param_name] = content.strip() # Store raw element content - logger.info(f"Found element {mapping.param_name}: {content.strip()}") + # logger.info(f"Found element {mapping.param_name}: {content.strip()}") elif mapping.node_type == "text": # Extract text content @@ -994,7 +994,7 @@ class ResponseProcessor: if content is not None: params[mapping.param_name] = content.strip() parsing_details["text_content"] = content.strip() # Store raw text content - logger.info(f"Found text content for {mapping.param_name}: {content.strip()}") + # logger.info(f"Found text content for {mapping.param_name}: {content.strip()}") elif mapping.node_type == "content": # Extract root content @@ -1002,7 +1002,7 @@ class ResponseProcessor: if content is not None: params[mapping.param_name] = content.strip() parsing_details["root_content"] = content.strip() # Store raw root content - logger.info(f"Found root content for {mapping.param_name}") + # logger.info(f"Found root content for {mapping.param_name}") except Exception as e: logger.error(f"Error processing mapping {mapping}: {e}") diff --git a/frontend/src/components/thread/tool-views/BrowserToolView.tsx b/frontend/src/components/thread/tool-views/BrowserToolView.tsx index 2fcb45e9..d43453aa 100644 --- a/frontend/src/components/thread/tool-views/BrowserToolView.tsx +++ b/frontend/src/components/thread/tool-views/BrowserToolView.tsx @@ -72,20 +72,31 @@ export function BrowserToolView({ // Find the browser_state message and extract the screenshot let screenshotBase64: string | null = null; - if (browserStateMessageId && messages.length > 0) { - const browserStateMessage = messages.find( - (msg) => - (msg.type as string) === 'browser_state' && - msg.message_id === browserStateMessageId, - ); + let latestBrowserState: any = null; + let latestTimestamp = 0; - if (browserStateMessage) { - const browserStateContent = safeJsonParse<{ screenshot_base64?: string }>( - browserStateMessage.content, - {}, - ); - console.log('Browser state content: ', browserStateContent) - screenshotBase64 = browserStateContent?.screenshot_base64 || null; + if (messages.length > 0) { + // Find the latest browser_state message by comparing timestamps + messages.forEach((msg) => { + if ((msg.type as string) === 'browser_state') { + try { + const content = safeJsonParse<{ timestamp?: number }>(msg.content, {}); + const timestamp = content?.timestamp || 0; + + if (timestamp > latestTimestamp) { + latestTimestamp = timestamp; + latestBrowserState = content; + } + } catch (error) { + console.error('[BrowserToolView] Error parsing browser state:', error); + } + } + }); + + // Use the latest browser state + if (latestBrowserState) { + screenshotBase64 = latestBrowserState.screenshot_base64 || null; + console.log('Latest browser state:', latestBrowserState); } }