From 674e4d92d057bdf7e09bd5adf1234c27cd11758e Mon Sep 17 00:00:00 2001 From: Krishav Raj Singh Date: Sat, 23 Aug 2025 04:22:56 +0530 Subject: [PATCH] listen to filechooser --- backend/agent/tools/browser_tool.py | 19 +++++++++++-- backend/sandbox/docker/browserApi.ts | 41 +++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/backend/agent/tools/browser_tool.py b/backend/agent/tools/browser_tool.py index f142a923..129f4bf1 100644 --- a/backend/agent/tools/browser_tool.py +++ b/backend/agent/tools/browser_tool.py @@ -327,7 +327,7 @@ class BrowserTool(SandboxToolsBase): "type": "function", "function": { "name": "browser_act", - "description": "Perform any browser action using natural language description. CRITICAL: This tool automatically provides a screenshot with every action. For data entry actions (filling forms, entering text, selecting options), you MUST review the provided screenshot to verify that displayed values exactly match what was intended. Report mismatches immediately.", + "description": "Perform any browser action using natural language description. CRITICAL: This tool automatically provides a screenshot with every action. For data entry actions (filling forms, entering text, selecting options), you MUST review the provided screenshot to verify that displayed values exactly match what was intended. Report mismatches immediately. CRITICAL FILE UPLOAD RULE: ANY action that involves clicking, interacting with, or locating upload buttons, file inputs, resume upload sections, or any element that might trigger a choose file dialog MUST include the filePath parameter with filePath. This includes actions like 'click upload button', 'locate resume section', 'find file input' etc. Always err on the side of caution - if there's any possibility the action might lead to a file dialog, include filePath. This prevents accidental file dialog triggers without proper file handling.", "parameters": { "type": "object", "properties": { @@ -345,6 +345,10 @@ class BrowserTool(SandboxToolsBase): "type": "boolean", "description": "Whether to include iframe content in the action. Set to true if the target element is inside an iframe.", "default": True + }, + "filePath": { + "type": "string", + "description": "CRITICAL: REQUIRED for ANY action that might involve file uploads. This includes: clicking upload buttons, locating resume sections, finding file inputs, scrolling to upload areas, or any action that could potentially trigger a file dialog. Always include this parameter when dealing with upload-related elements to prevent accidental file dialog triggers. The tool will automatically handle the file upload after the action is performed.", } }, "required": ["action"] @@ -359,11 +363,20 @@ class BrowserTool(SandboxToolsBase): true + + + + click on upload resume button + /workspace/downloads/document.pdf + + ''') - async def browser_act(self, action: str, variables: dict = None, iframes: bool = False) -> ToolResult: + async def browser_act(self, action: str, variables: dict = None, iframes: bool = False, filePath: dict = None) -> ToolResult: """Perform any browser action using Stagehand.""" - logger.debug(f"Browser acting: {action} (variables={'***' if variables else None}, iframes={iframes})") + logger.debug(f"Browser acting: {action} (variables={'***' if variables else None}, iframes={iframes}), filePath={filePath}") params = {"action": action, "iframes": iframes, "variables": variables} + if filePath: + params["filePath"] = filePath return await self._execute_stagehand_api("act", params) @openapi_schema({ diff --git a/backend/sandbox/docker/browserApi.ts b/backend/sandbox/docker/browserApi.ts index dad29a3b..c6696144 100644 --- a/backend/sandbox/docker/browserApi.ts +++ b/backend/sandbox/docker/browserApi.ts @@ -1,5 +1,6 @@ import express from 'express'; import { Stagehand, type LogLine, type Page } from '@browserbasehq/stagehand'; +import { FileChooser } from 'playwright'; const app = express(); app.use(express.json()); @@ -226,7 +227,45 @@ class BrowserAutomation { async act(req: express.Request, res: express.Response): Promise { try { if (this.page && this.browserInitialized) { - const { action, iframes, variables } = req.body; + const { action, iframes, variables, filePath } = req.body; + + let fileChooseHandler: ((fileChooser: FileChooser) => Promise) | null=null; + fileChooseHandler = async (fileChooser) => { + if(filePath){ + await fileChooser.setFiles(filePath); + } else { + await fileChooser.setFiles([]); + + await this.page?.evaluate(() => { + const toast = document.createElement('div'); + toast.style.cssText = ` + position: fixed; + top: 20px; + right: 20px; + background: #ff6b6b; + color: white; + padding: 12px 16px; + border-radius: 6px; + z-index: 10000; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; + font-size: 14px; + max-width: 300px; + box-shadow: 0 4px 12px rgba(0,0,0,0.15); + `; + toast.textContent = 'File upload cancelled - no file specified'; + document.body.appendChild(toast); + + setTimeout(() => { + if (toast.parentNode) { + toast.parentNode.removeChild(toast); + } + }, 3000); + }); + } + } + + this.page.on('filechooser', fileChooseHandler); + const result = await this.page.act({action, iframes: iframes || true, variables}); const page_info = await this.get_stagehand_state(); const response: BrowserActionResult = {