save all browser states

This commit is contained in:
marko-kraemer 2025-05-19 01:06:44 +02:00
parent 952a2dd3bf
commit 70755d3074
4 changed files with 29 additions and 24 deletions

View File

@ -154,7 +154,6 @@ async def run_agent(
else:
logger.warning("Browser state found but no screenshot data.")
await client.table('messages').delete().eq('message_id', latest_browser_state_msg.data[0]["message_id"]).execute()
except Exception as e:
logger.error(f"Error parsing browser state: {e}")

View File

@ -59,7 +59,6 @@ class SandboxBrowserTool(SandboxToolsBase):
logger.info("Browser automation request completed successfully")
# Add full result to thread messages for state tracking
added_message = await self.thread_manager.add_message(
thread_id=self.thread_id,
type="browser_state",
@ -67,17 +66,13 @@ class SandboxBrowserTool(SandboxToolsBase):
is_llm_message=False
)
# Return tool-specific success response
success_response = {
"success": True,
"message": result.get("message", "Browser action completed successfully")
}
# Add message ID if available
if added_message and 'message_id' in added_message:
success_response['message_id'] = added_message['message_id']
# Add relevant browser-specific info
if result.get("url"):
success_response["url"] = result["url"]
if result.get("title"):
@ -86,7 +81,6 @@ class SandboxBrowserTool(SandboxToolsBase):
success_response["elements_found"] = result["element_count"]
if result.get("pixels_below"):
success_response["scrollable_content"] = result["pixels_below"] > 0
# Add OCR text when available
if result.get("ocr_text"):
success_response["ocr_text"] = result["ocr_text"]
@ -104,6 +98,7 @@ class SandboxBrowserTool(SandboxToolsBase):
logger.debug(traceback.format_exc())
return self.fail_response(f"Error executing browser action: {e}")
@openapi_schema({
"type": "function",
"function": {

View File

@ -978,7 +978,7 @@ class ResponseProcessor:
if value is not None:
params[mapping.param_name] = value
parsing_details["attributes"][mapping.param_name] = value # Store raw attribute
logger.info(f"Found attribute {mapping.param_name}: {value}")
# logger.info(f"Found attribute {mapping.param_name}: {value}")
elif mapping.node_type == "element":
# Extract element content
@ -986,7 +986,7 @@ class ResponseProcessor:
if content is not None:
params[mapping.param_name] = content.strip()
parsing_details["elements"][mapping.param_name] = content.strip() # Store raw element content
logger.info(f"Found element {mapping.param_name}: {content.strip()}")
# logger.info(f"Found element {mapping.param_name}: {content.strip()}")
elif mapping.node_type == "text":
# Extract text content
@ -994,7 +994,7 @@ class ResponseProcessor:
if content is not None:
params[mapping.param_name] = content.strip()
parsing_details["text_content"] = content.strip() # Store raw text content
logger.info(f"Found text content for {mapping.param_name}: {content.strip()}")
# logger.info(f"Found text content for {mapping.param_name}: {content.strip()}")
elif mapping.node_type == "content":
# Extract root content
@ -1002,7 +1002,7 @@ class ResponseProcessor:
if content is not None:
params[mapping.param_name] = content.strip()
parsing_details["root_content"] = content.strip() # Store raw root content
logger.info(f"Found root content for {mapping.param_name}")
# logger.info(f"Found root content for {mapping.param_name}")
except Exception as e:
logger.error(f"Error processing mapping {mapping}: {e}")

View File

@ -72,20 +72,31 @@ export function BrowserToolView({
// Find the browser_state message and extract the screenshot
let screenshotBase64: string | null = null;
if (browserStateMessageId && messages.length > 0) {
const browserStateMessage = messages.find(
(msg) =>
(msg.type as string) === 'browser_state' &&
msg.message_id === browserStateMessageId,
);
let latestBrowserState: any = null;
let latestTimestamp = 0;
if (browserStateMessage) {
const browserStateContent = safeJsonParse<{ screenshot_base64?: string }>(
browserStateMessage.content,
{},
);
console.log('Browser state content: ', browserStateContent)
screenshotBase64 = browserStateContent?.screenshot_base64 || null;
if (messages.length > 0) {
// Find the latest browser_state message by comparing timestamps
messages.forEach((msg) => {
if ((msg.type as string) === 'browser_state') {
try {
const content = safeJsonParse<{ timestamp?: number }>(msg.content, {});
const timestamp = content?.timestamp || 0;
if (timestamp > latestTimestamp) {
latestTimestamp = timestamp;
latestBrowserState = content;
}
} catch (error) {
console.error('[BrowserToolView] Error parsing browser state:', error);
}
}
});
// Use the latest browser state
if (latestBrowserState) {
screenshotBase64 = latestBrowserState.screenshot_base64 || null;
console.log('Latest browser state:', latestBrowserState);
}
}