From 70755d30745ff0c4e5828f287755657013620f15 Mon Sep 17 00:00:00 2001
From: marko-kraemer <markokraemer.mail@gmail.com>
Date: Mon, 19 May 2025 01:06:44 +0200
Subject: [PATCH] save all browser states

---
 backend/agent/run.py                          |  1 -
 backend/agent/tools/sb_browser_tool.py        |  7 +---
 backend/agentpress/response_processor.py      |  8 ++--
 .../thread/tool-views/BrowserToolView.tsx     | 37 ++++++++++++-------
 4 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/backend/agent/run.py b/backend/agent/run.py
index 3301e498..59fd5e01 100644
--- a/backend/agent/run.py
+++ b/backend/agent/run.py
@@ -154,7 +154,6 @@ async def run_agent(
                 else:
                     logger.warning("Browser state found but no screenshot data.")
 
-                await client.table('messages').delete().eq('message_id', latest_browser_state_msg.data[0]["message_id"]).execute()
             except Exception as e:
                 logger.error(f"Error parsing browser state: {e}")
 
diff --git a/backend/agent/tools/sb_browser_tool.py b/backend/agent/tools/sb_browser_tool.py
index ce9130ec..eaeeac6a 100644
--- a/backend/agent/tools/sb_browser_tool.py
+++ b/backend/agent/tools/sb_browser_tool.py
@@ -59,7 +59,6 @@ class SandboxBrowserTool(SandboxToolsBase):
 
                     logger.info("Browser automation request completed successfully")
 
-                    # Add full result to thread messages for state tracking
                     added_message = await self.thread_manager.add_message(
                         thread_id=self.thread_id,
                         type="browser_state",
@@ -67,17 +66,13 @@ class SandboxBrowserTool(SandboxToolsBase):
                         is_llm_message=False
                     )
 
-                    # Return tool-specific success response
                     success_response = {
                         "success": True,
                         "message": result.get("message", "Browser action completed successfully")
                     }
 
-                    # Add message ID if available
                     if added_message and 'message_id' in added_message:
                         success_response['message_id'] = added_message['message_id']
-
-                    # Add relevant browser-specific info
                     if result.get("url"):
                         success_response["url"] = result["url"]
                     if result.get("title"):
@@ -86,7 +81,6 @@ class SandboxBrowserTool(SandboxToolsBase):
                         success_response["elements_found"] = result["element_count"]
                     if result.get("pixels_below"):
                         success_response["scrollable_content"] = result["pixels_below"] > 0
-                    # Add OCR text when available
                     if result.get("ocr_text"):
                         success_response["ocr_text"] = result["ocr_text"]
 
@@ -104,6 +98,7 @@ class SandboxBrowserTool(SandboxToolsBase):
             logger.debug(traceback.format_exc())
             return self.fail_response(f"Error executing browser action: {e}")
 
+
     @openapi_schema({
         "type": "function",
         "function": {
diff --git a/backend/agentpress/response_processor.py b/backend/agentpress/response_processor.py
index ea6e028a..58cdaf83 100644
--- a/backend/agentpress/response_processor.py
+++ b/backend/agentpress/response_processor.py
@@ -978,7 +978,7 @@ class ResponseProcessor:
                         if value is not None:
                             params[mapping.param_name] = value
                             parsing_details["attributes"][mapping.param_name] = value # Store raw attribute
-                            logger.info(f"Found attribute {mapping.param_name}: {value}")
+                            # logger.info(f"Found attribute {mapping.param_name}: {value}")
                 
                     elif mapping.node_type == "element":
                         # Extract element content
@@ -986,7 +986,7 @@ class ResponseProcessor:
                         if content is not None:
                             params[mapping.param_name] = content.strip()
                             parsing_details["elements"][mapping.param_name] = content.strip() # Store raw element content
-                            logger.info(f"Found element {mapping.param_name}: {content.strip()}")
+                            # logger.info(f"Found element {mapping.param_name}: {content.strip()}")
                 
                     elif mapping.node_type == "text":
                         # Extract text content
@@ -994,7 +994,7 @@ class ResponseProcessor:
                         if content is not None:
                             params[mapping.param_name] = content.strip()
                             parsing_details["text_content"] = content.strip() # Store raw text content
-                            logger.info(f"Found text content for {mapping.param_name}: {content.strip()}")
+                            # logger.info(f"Found text content for {mapping.param_name}: {content.strip()}")
                 
                     elif mapping.node_type == "content":
                         # Extract root content
@@ -1002,7 +1002,7 @@ class ResponseProcessor:
                         if content is not None:
                             params[mapping.param_name] = content.strip()
                             parsing_details["root_content"] = content.strip() # Store raw root content
-                            logger.info(f"Found root content for {mapping.param_name}")
+                            # logger.info(f"Found root content for {mapping.param_name}")
                 
                 except Exception as e:
                     logger.error(f"Error processing mapping {mapping}: {e}")
diff --git a/frontend/src/components/thread/tool-views/BrowserToolView.tsx b/frontend/src/components/thread/tool-views/BrowserToolView.tsx
index 2fcb45e9..d43453aa 100644
--- a/frontend/src/components/thread/tool-views/BrowserToolView.tsx
+++ b/frontend/src/components/thread/tool-views/BrowserToolView.tsx
@@ -72,20 +72,31 @@ export function BrowserToolView({
 
   // Find the browser_state message and extract the screenshot
   let screenshotBase64: string | null = null;
-  if (browserStateMessageId && messages.length > 0) {
-    const browserStateMessage = messages.find(
-      (msg) =>
-        (msg.type as string) === 'browser_state' &&
-        msg.message_id === browserStateMessageId,
-    );
+  let latestBrowserState: any = null;
+  let latestTimestamp = 0;
 
-    if (browserStateMessage) {
-      const browserStateContent = safeJsonParse<{ screenshot_base64?: string }>(
-        browserStateMessage.content,
-        {},
-      );
-      console.log('Browser state content: ', browserStateContent)
-      screenshotBase64 = browserStateContent?.screenshot_base64 || null;
+  if (messages.length > 0) {
+    // Find the latest browser_state message by comparing timestamps
+    messages.forEach((msg) => {
+      if ((msg.type as string) === 'browser_state') {
+        try {
+          const content = safeJsonParse<{ timestamp?: number }>(msg.content, {});
+          const timestamp = content?.timestamp || 0;
+          
+          if (timestamp > latestTimestamp) {
+            latestTimestamp = timestamp;
+            latestBrowserState = content;
+          }
+        } catch (error) {
+          console.error('[BrowserToolView] Error parsing browser state:', error);
+        }
+      }
+    });
+
+    // Use the latest browser state
+    if (latestBrowserState) {
+      screenshotBase64 = latestBrowserState.screenshot_base64 || null;
+      console.log('Latest browser state:', latestBrowserState);
     }
   }