mirror of https://github.com/kortix-ai/suna.git
Merge pull request #67 from kortix-ai/fix_browser-use
gem2.5 fix the click element
This commit is contained in:
commit
c882df8cd6
|
@ -980,81 +980,94 @@ class BrowserAutomation:
|
||||||
"""Click on an element by index"""
|
"""Click on an element by index"""
|
||||||
try:
|
try:
|
||||||
page = await self.get_current_page()
|
page = await self.get_current_page()
|
||||||
selector_map = await self.get_selector_map()
|
|
||||||
|
# Get the current state and selector map *before* the click
|
||||||
|
initial_dom_state = await self.get_current_dom_state()
|
||||||
|
selector_map = initial_dom_state.selector_map
|
||||||
|
|
||||||
if action.index not in selector_map:
|
if action.index not in selector_map:
|
||||||
|
# Get updated state even if element not found initially
|
||||||
|
dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"click_element_error (index {action.index} not found)")
|
||||||
return self.build_action_result(
|
return self.build_action_result(
|
||||||
False,
|
False,
|
||||||
f"Element with index {action.index} not found",
|
f"Element with index {action.index} not found",
|
||||||
None,
|
dom_state, # Use the latest state
|
||||||
"",
|
screenshot,
|
||||||
"",
|
elements,
|
||||||
{},
|
metadata,
|
||||||
error=f"Element with index {action.index} not found"
|
error=f"Element with index {action.index} not found"
|
||||||
)
|
)
|
||||||
|
|
||||||
# In a real implementation, we would use the selector map to get the element's
|
element_to_click = selector_map[action.index]
|
||||||
# properties and use them to find and click the element
|
print(f"Attempting to click element: {element_to_click}")
|
||||||
element = selector_map[action.index]
|
|
||||||
print(f"Clicking element: {element}")
|
|
||||||
|
|
||||||
# Use CSS selector or XPath to locate and click the element
|
# Construct a more reliable selector using JavaScript evaluation
|
||||||
await page.wait_for_timeout(500) # Small delay before clicking
|
# Find the element based on its properties captured in selector_map
|
||||||
|
js_selector_script = """
|
||||||
|
(targetElementInfo) => {
|
||||||
|
const interactiveElements = Array.from(document.querySelectorAll(
|
||||||
|
'a, button, input, select, textarea, [role="button"], [role="link"], [role="checkbox"], [role="radio"], [tabindex]:not([tabindex="-1"])'
|
||||||
|
));
|
||||||
|
|
||||||
|
const visibleElements = interactiveElements.filter(el => {
|
||||||
|
const style = window.getComputedStyle(el);
|
||||||
|
const rect = el.getBoundingClientRect();
|
||||||
|
return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0' && rect.width > 0 && rect.height > 0;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (targetElementInfo.index > 0 && targetElementInfo.index <= visibleElements.length) {
|
||||||
|
// Return the element at the specified index (1-based)
|
||||||
|
return visibleElements[targetElementInfo.index - 1];
|
||||||
|
}
|
||||||
|
return null; // Element not found at the expected index
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
element_info = {'index': action.index} # Pass the target index to the script
|
||||||
|
|
||||||
|
target_element_handle = await page.evaluate_handle(js_selector_script, element_info)
|
||||||
|
|
||||||
click_success = False
|
click_success = False
|
||||||
try:
|
error_message = ""
|
||||||
# Try different strategies to click the element
|
|
||||||
if element.attributes.get("id"):
|
|
||||||
await page.click(f"#{element.attributes['id']}")
|
|
||||||
click_success = True
|
|
||||||
elif element.attributes.get("class"):
|
|
||||||
class_selector = f".{element.attributes['class'].replace(' ', '.')}"
|
|
||||||
await page.click(class_selector)
|
|
||||||
click_success = True
|
|
||||||
else:
|
|
||||||
# Try text-based location
|
|
||||||
text = element.get_all_text_till_next_clickable_element()
|
|
||||||
if text:
|
|
||||||
await page.click(f"text={text}")
|
|
||||||
click_success = True
|
|
||||||
else:
|
|
||||||
# Generic xpath - not reliable but for demo purposes
|
|
||||||
await page.click(f"//{element.tag_name}[{action.index}]")
|
|
||||||
click_success = True
|
|
||||||
except Exception as click_error:
|
|
||||||
print(f"Error clicking element with standard methods: {click_error}")
|
|
||||||
# Fallback to JavaScript click
|
|
||||||
try:
|
|
||||||
js_click = f"""
|
|
||||||
(function() {{
|
|
||||||
const elements = document.querySelectorAll('{element.tag_name}');
|
|
||||||
if (elements.length >= {action.index}) {{
|
|
||||||
elements[{action.index-1}].click();
|
|
||||||
return true;
|
|
||||||
}}
|
|
||||||
return false;
|
|
||||||
}})()
|
|
||||||
"""
|
|
||||||
click_success = await page.evaluate(js_click)
|
|
||||||
except Exception as js_error:
|
|
||||||
print(f"Error with JavaScript click fallback: {js_error}")
|
|
||||||
|
|
||||||
# Give time for any navigation to occur
|
if await target_element_handle.evaluate("node => node !== null"):
|
||||||
|
try:
|
||||||
|
# Use Playwright's recommended way: click the handle
|
||||||
|
# Add timeout and wait for element to be stable
|
||||||
|
await target_element_handle.click(timeout=5000)
|
||||||
|
click_success = True
|
||||||
|
print(f"Successfully clicked element handle for index {action.index}")
|
||||||
|
except Exception as click_error:
|
||||||
|
error_message = f"Error clicking element handle: {click_error}"
|
||||||
|
print(error_message)
|
||||||
|
# Optional: Add fallback methods here if needed
|
||||||
|
# e.g., target_element_handle.dispatch_event('click')
|
||||||
|
else:
|
||||||
|
error_message = f"Could not locate the target element handle for index {action.index} using JS script."
|
||||||
|
print(error_message)
|
||||||
|
|
||||||
|
|
||||||
|
# Wait for potential page changes/network activity
|
||||||
|
try:
|
||||||
await page.wait_for_load_state("networkidle", timeout=5000)
|
await page.wait_for_load_state("networkidle", timeout=5000)
|
||||||
|
except Exception as wait_error:
|
||||||
|
print(f"Timeout or error waiting for network idle after click: {wait_error}")
|
||||||
|
await asyncio.sleep(1) # Fallback wait
|
||||||
|
|
||||||
# Get updated state after action
|
# Get updated state after action
|
||||||
dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"click_element({action.index})")
|
dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"click_element({action.index})")
|
||||||
|
|
||||||
return self.build_action_result(
|
return self.build_action_result(
|
||||||
click_success,
|
click_success,
|
||||||
f"Clicked element with index {action.index}" if click_success else f"Attempted to click element {action.index} but may have failed",
|
f"Clicked element with index {action.index}" if click_success else f"Attempted to click element {action.index} but failed. Error: {error_message}",
|
||||||
dom_state,
|
dom_state,
|
||||||
screenshot,
|
screenshot,
|
||||||
elements,
|
elements,
|
||||||
metadata,
|
metadata,
|
||||||
error="",
|
error=error_message if not click_success else "",
|
||||||
content=None
|
content=None
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error in click_element: {e}")
|
print(f"Error in click_element: {e}")
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
@ -1072,15 +1085,22 @@ class BrowserAutomation:
|
||||||
content=None
|
content=None
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
|
# Fallback if getting state also fails
|
||||||
|
current_url = "unknown"
|
||||||
|
try:
|
||||||
|
current_url = page.url # Try to get at least the URL
|
||||||
|
except:
|
||||||
|
pass
|
||||||
return self.build_action_result(
|
return self.build_action_result(
|
||||||
False,
|
False,
|
||||||
str(e),
|
str(e),
|
||||||
None,
|
None, # No DOM state available
|
||||||
"",
|
"", # No screenshot
|
||||||
"",
|
"", # No elements string
|
||||||
{},
|
{}, # Empty metadata
|
||||||
error=str(e),
|
error=str(e),
|
||||||
content=None
|
content=None,
|
||||||
|
fallback_url=current_url
|
||||||
)
|
)
|
||||||
|
|
||||||
async def input_text(self, action: InputTextAction = Body(...)):
|
async def input_text(self, action: InputTextAction = Body(...)):
|
||||||
|
@ -1917,16 +1937,127 @@ async def test_browser_api():
|
||||||
await automation_service.shutdown()
|
await automation_service.shutdown()
|
||||||
print("Browser closed")
|
print("Browser closed")
|
||||||
|
|
||||||
|
async def test_browser_api_2():
|
||||||
|
"""Test the browser automation API functionality on the chess page"""
|
||||||
|
try:
|
||||||
|
# Initialize browser automation
|
||||||
|
print("\n=== Starting Browser Automation Test 2 (Chess Page) ===")
|
||||||
|
await automation_service.startup()
|
||||||
|
print("✅ Browser started successfully")
|
||||||
|
|
||||||
|
# Navigate to the chess test page
|
||||||
|
print("\n--- Testing Navigation to Chess Page ---")
|
||||||
|
test_url = "https://dat-lequoc.github.io/chess-for-suna/chess.html"
|
||||||
|
result = await automation_service.navigate_to(GoToUrlAction(url=test_url))
|
||||||
|
print(f"Navigation status: {'✅ Success' if result.success else '❌ Failed'}")
|
||||||
|
if not result.success:
|
||||||
|
print(f"Error: {result.error}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"URL: {result.url}")
|
||||||
|
print(f"Title: {result.title}")
|
||||||
|
|
||||||
|
# Check DOM state and elements
|
||||||
|
print(f"\nFound {result.element_count} interactive elements")
|
||||||
|
if result.elements and result.elements.strip():
|
||||||
|
print("Elements:")
|
||||||
|
print(result.elements)
|
||||||
|
else:
|
||||||
|
print("No formatted elements found, but DOM was processed")
|
||||||
|
|
||||||
|
# Display interactive elements as JSON
|
||||||
|
if result.interactive_elements and len(result.interactive_elements) > 0:
|
||||||
|
print("\nInteractive elements summary:")
|
||||||
|
for el in result.interactive_elements:
|
||||||
|
print(f" [{el['index']}] <{el['tag_name']}> {el.get('text', '')[:30]}")
|
||||||
|
|
||||||
|
# Screenshot info
|
||||||
|
print(f"\nScreenshot captured: {'Yes' if result.screenshot_base64 else 'No'}")
|
||||||
|
print(f"Viewport size: {result.viewport_width}x{result.viewport_height}")
|
||||||
|
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
# Test clicking on an element (e.g., a chess square)
|
||||||
|
print("\n--- Testing Element Click (element 5) ---")
|
||||||
|
if result.element_count > 4: # Ensure element 5 exists
|
||||||
|
click_index = 5
|
||||||
|
click_result = await automation_service.click_element(ClickElementAction(index=click_index))
|
||||||
|
print(f"Click status for element {click_index}: {'✅ Success' if click_result.success else '❌ Failed'}")
|
||||||
|
print(f"Message: {click_result.message}")
|
||||||
|
print(f"URL after click: {click_result.url}")
|
||||||
|
|
||||||
|
# Retrieve and display elements again after click
|
||||||
|
print(f"\n--- Retrieving elements after clicking element {click_index} ---")
|
||||||
|
if click_result.elements and click_result.elements.strip():
|
||||||
|
print("Updated Elements:")
|
||||||
|
print(click_result.elements)
|
||||||
|
else:
|
||||||
|
print("No formatted elements found after click.")
|
||||||
|
|
||||||
|
if click_result.interactive_elements and len(click_result.interactive_elements) > 0:
|
||||||
|
print("\nUpdated interactive elements summary:")
|
||||||
|
for el in click_result.interactive_elements:
|
||||||
|
print(f" [{el['index']}] <{el['tag_name']}> {el.get('text', '')[:30]}")
|
||||||
|
else:
|
||||||
|
print("No interactive elements found after click.")
|
||||||
|
|
||||||
|
# Test clicking element 1 after the first click
|
||||||
|
print("\n--- Testing Element Click (element 1 after clicking 5) ---")
|
||||||
|
if click_result.element_count > 0: # Check if there are still elements
|
||||||
|
click_index_2 = 1
|
||||||
|
click_result_2 = await automation_service.click_element(ClickElementAction(index=click_index_2))
|
||||||
|
print(f"Click status for element {click_index_2}: {'✅ Success' if click_result_2.success else '❌ Failed'}")
|
||||||
|
print(f"Message: {click_result_2.message}")
|
||||||
|
print(f"URL after click: {click_result_2.url}")
|
||||||
|
|
||||||
|
# Retrieve and display elements again after the second click
|
||||||
|
print(f"\n--- Retrieving elements after clicking element {click_index_2} ---")
|
||||||
|
if click_result_2.elements and click_result_2.elements.strip():
|
||||||
|
print("Elements after second click:")
|
||||||
|
print(click_result_2.elements)
|
||||||
|
else:
|
||||||
|
print("No formatted elements found after second click.")
|
||||||
|
|
||||||
|
if click_result_2.interactive_elements and len(click_result_2.interactive_elements) > 0:
|
||||||
|
print("\nInteractive elements summary after second click:")
|
||||||
|
for el in click_result_2.interactive_elements:
|
||||||
|
print(f" [{el['index']}] <{el['tag_name']}> {el.get('text', '')[:30]}")
|
||||||
|
else:
|
||||||
|
print("No interactive elements found after second click.")
|
||||||
|
else:
|
||||||
|
print("Skipping second element click test - no elements found after first click.")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("Skipping element click test - fewer than 5 elements found.")
|
||||||
|
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
print("\n✅ Chess Page Test Completed!")
|
||||||
|
await asyncio.sleep(100)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n❌ Chess Page Test failed: {str(e)}")
|
||||||
|
traceback.print_exc()
|
||||||
|
finally:
|
||||||
|
# Ensure browser is closed
|
||||||
|
print("\n--- Cleaning up ---")
|
||||||
|
await automation_service.shutdown()
|
||||||
|
print("Browser closed")
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import uvicorn
|
import uvicorn
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
# Check if running in test mode
|
# Check command line arguments for test mode
|
||||||
test_mode = len(sys.argv) > 1 and sys.argv[1] == "--test"
|
test_mode_1 = "--test" in sys.argv
|
||||||
|
test_mode_2 = "--test2" in sys.argv
|
||||||
|
|
||||||
if test_mode:
|
if test_mode_1:
|
||||||
print("Running in test mode")
|
print("Running in test mode 1")
|
||||||
asyncio.run(test_browser_api())
|
asyncio.run(test_browser_api())
|
||||||
|
elif test_mode_2:
|
||||||
|
print("Running in test mode 2 (Chess Page)")
|
||||||
|
asyncio.run(test_browser_api_2())
|
||||||
else:
|
else:
|
||||||
print("Starting API server")
|
print("Starting API server")
|
||||||
uvicorn.run("browser_api:api_app", host="0.0.0.0", port=8002)
|
uvicorn.run("browser_api:api_app", host="0.0.0.0", port=8002)
|
Loading…
Reference in New Issue