save all browser states

This commit is contained in:
marko-kraemer 2025-05-19 01:06:44 +02:00
parent 952a2dd3bf
commit 70755d3074
4 changed files with 29 additions and 24 deletions

View File

@ -154,7 +154,6 @@ async def run_agent(
else: else:
logger.warning("Browser state found but no screenshot data.") logger.warning("Browser state found but no screenshot data.")
await client.table('messages').delete().eq('message_id', latest_browser_state_msg.data[0]["message_id"]).execute()
except Exception as e: except Exception as e:
logger.error(f"Error parsing browser state: {e}") logger.error(f"Error parsing browser state: {e}")

View File

@ -59,7 +59,6 @@ class SandboxBrowserTool(SandboxToolsBase):
logger.info("Browser automation request completed successfully") logger.info("Browser automation request completed successfully")
# Add full result to thread messages for state tracking
added_message = await self.thread_manager.add_message( added_message = await self.thread_manager.add_message(
thread_id=self.thread_id, thread_id=self.thread_id,
type="browser_state", type="browser_state",
@ -67,17 +66,13 @@ class SandboxBrowserTool(SandboxToolsBase):
is_llm_message=False is_llm_message=False
) )
# Return tool-specific success response
success_response = { success_response = {
"success": True, "success": True,
"message": result.get("message", "Browser action completed successfully") "message": result.get("message", "Browser action completed successfully")
} }
# Add message ID if available
if added_message and 'message_id' in added_message: if added_message and 'message_id' in added_message:
success_response['message_id'] = added_message['message_id'] success_response['message_id'] = added_message['message_id']
# Add relevant browser-specific info
if result.get("url"): if result.get("url"):
success_response["url"] = result["url"] success_response["url"] = result["url"]
if result.get("title"): if result.get("title"):
@ -86,7 +81,6 @@ class SandboxBrowserTool(SandboxToolsBase):
success_response["elements_found"] = result["element_count"] success_response["elements_found"] = result["element_count"]
if result.get("pixels_below"): if result.get("pixels_below"):
success_response["scrollable_content"] = result["pixels_below"] > 0 success_response["scrollable_content"] = result["pixels_below"] > 0
# Add OCR text when available
if result.get("ocr_text"): if result.get("ocr_text"):
success_response["ocr_text"] = result["ocr_text"] success_response["ocr_text"] = result["ocr_text"]
@ -104,6 +98,7 @@ class SandboxBrowserTool(SandboxToolsBase):
logger.debug(traceback.format_exc()) logger.debug(traceback.format_exc())
return self.fail_response(f"Error executing browser action: {e}") return self.fail_response(f"Error executing browser action: {e}")
@openapi_schema({ @openapi_schema({
"type": "function", "type": "function",
"function": { "function": {

View File

@ -978,7 +978,7 @@ class ResponseProcessor:
if value is not None: if value is not None:
params[mapping.param_name] = value params[mapping.param_name] = value
parsing_details["attributes"][mapping.param_name] = value # Store raw attribute parsing_details["attributes"][mapping.param_name] = value # Store raw attribute
logger.info(f"Found attribute {mapping.param_name}: {value}") # logger.info(f"Found attribute {mapping.param_name}: {value}")
elif mapping.node_type == "element": elif mapping.node_type == "element":
# Extract element content # Extract element content
@ -986,7 +986,7 @@ class ResponseProcessor:
if content is not None: if content is not None:
params[mapping.param_name] = content.strip() params[mapping.param_name] = content.strip()
parsing_details["elements"][mapping.param_name] = content.strip() # Store raw element content parsing_details["elements"][mapping.param_name] = content.strip() # Store raw element content
logger.info(f"Found element {mapping.param_name}: {content.strip()}") # logger.info(f"Found element {mapping.param_name}: {content.strip()}")
elif mapping.node_type == "text": elif mapping.node_type == "text":
# Extract text content # Extract text content
@ -994,7 +994,7 @@ class ResponseProcessor:
if content is not None: if content is not None:
params[mapping.param_name] = content.strip() params[mapping.param_name] = content.strip()
parsing_details["text_content"] = content.strip() # Store raw text content parsing_details["text_content"] = content.strip() # Store raw text content
logger.info(f"Found text content for {mapping.param_name}: {content.strip()}") # logger.info(f"Found text content for {mapping.param_name}: {content.strip()}")
elif mapping.node_type == "content": elif mapping.node_type == "content":
# Extract root content # Extract root content
@ -1002,7 +1002,7 @@ class ResponseProcessor:
if content is not None: if content is not None:
params[mapping.param_name] = content.strip() params[mapping.param_name] = content.strip()
parsing_details["root_content"] = content.strip() # Store raw root content parsing_details["root_content"] = content.strip() # Store raw root content
logger.info(f"Found root content for {mapping.param_name}") # logger.info(f"Found root content for {mapping.param_name}")
except Exception as e: except Exception as e:
logger.error(f"Error processing mapping {mapping}: {e}") logger.error(f"Error processing mapping {mapping}: {e}")

View File

@ -72,20 +72,31 @@ export function BrowserToolView({
// Find the browser_state message and extract the screenshot // Find the browser_state message and extract the screenshot
let screenshotBase64: string | null = null; let screenshotBase64: string | null = null;
if (browserStateMessageId && messages.length > 0) { let latestBrowserState: any = null;
const browserStateMessage = messages.find( let latestTimestamp = 0;
(msg) =>
(msg.type as string) === 'browser_state' &&
msg.message_id === browserStateMessageId,
);
if (browserStateMessage) { if (messages.length > 0) {
const browserStateContent = safeJsonParse<{ screenshot_base64?: string }>( // Find the latest browser_state message by comparing timestamps
browserStateMessage.content, messages.forEach((msg) => {
{}, if ((msg.type as string) === 'browser_state') {
); try {
console.log('Browser state content: ', browserStateContent) const content = safeJsonParse<{ timestamp?: number }>(msg.content, {});
screenshotBase64 = browserStateContent?.screenshot_base64 || null; const timestamp = content?.timestamp || 0;
if (timestamp > latestTimestamp) {
latestTimestamp = timestamp;
latestBrowserState = content;
}
} catch (error) {
console.error('[BrowserToolView] Error parsing browser state:', error);
}
}
});
// Use the latest browser state
if (latestBrowserState) {
screenshotBase64 = latestBrowserState.screenshot_base64 || null;
console.log('Latest browser state:', latestBrowserState);
} }
} }