From a6dcdbcff34def9d94568fd0d5e88a66685b532f Mon Sep 17 00:00:00 2001
From: LE Quoc Dat <quocdat.le.insacvl@gmail.com>
Date: Tue, 22 Apr 2025 14:29:40 +0100
Subject: [PATCH] gem2.5 fix the click element

---
 backend/sandbox/docker/browser_api.py | 257 +++++++++++++++++++-------
 1 file changed, 194 insertions(+), 63 deletions(-)

diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py
index 67a3bdb7..579c8458 100644
--- a/backend/sandbox/docker/browser_api.py
+++ b/backend/sandbox/docker/browser_api.py
@@ -980,81 +980,94 @@ class BrowserAutomation:
         """Click on an element by index"""
         try:
             page = await self.get_current_page()
-            selector_map = await self.get_selector_map()
+            
+            # Get the current state and selector map *before* the click
+            initial_dom_state = await self.get_current_dom_state()
+            selector_map = initial_dom_state.selector_map
             
             if action.index not in selector_map:
+                # Get updated state even if element not found initially
+                dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"click_element_error (index {action.index} not found)")
                 return self.build_action_result(
                     False,
                     f"Element with index {action.index} not found",
-                    None,
-                    "",
-                    "",
-                    {},
+                    dom_state, # Use the latest state
+                    screenshot,
+                    elements,
+                    metadata,
                     error=f"Element with index {action.index} not found"
                 )
+
+            element_to_click = selector_map[action.index]
+            print(f"Attempting to click element: {element_to_click}")
+
+            # Construct a more reliable selector using JavaScript evaluation
+            # Find the element based on its properties captured in selector_map
+            js_selector_script = """
+            (targetElementInfo) => {
+                const interactiveElements = Array.from(document.querySelectorAll(
+                    'a, button, input, select, textarea, [role="button"], [role="link"], [role="checkbox"], [role="radio"], [tabindex]:not([tabindex="-1"])'
+                ));
+                
+                const visibleElements = interactiveElements.filter(el => {
+                    const style = window.getComputedStyle(el);
+                    const rect = el.getBoundingClientRect();
+                    return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0' && rect.width > 0 && rect.height > 0;
+                });
+
+                if (targetElementInfo.index > 0 && targetElementInfo.index <= visibleElements.length) {
+                    // Return the element at the specified index (1-based)
+                    return visibleElements[targetElementInfo.index - 1];
+                }
+                return null; // Element not found at the expected index
+            }
+            """
             
-            # In a real implementation, we would use the selector map to get the element's
-            # properties and use them to find and click the element
-            element = selector_map[action.index]
-            print(f"Clicking element: {element}")
-            
-            # Use CSS selector or XPath to locate and click the element
-            await page.wait_for_timeout(500)  # Small delay before clicking
+            element_info = {'index': action.index} # Pass the target index to the script
             
+            target_element_handle = await page.evaluate_handle(js_selector_script, element_info)
+
             click_success = False
-            try:
-                # Try different strategies to click the element
-                if element.attributes.get("id"):
-                    await page.click(f"#{element.attributes['id']}")
-                    click_success = True
-                elif element.attributes.get("class"):
-                    class_selector = f".{element.attributes['class'].replace(' ', '.')}"
-                    await page.click(class_selector)
-                    click_success = True
-                else:
-                    # Try text-based location
-                    text = element.get_all_text_till_next_clickable_element()
-                    if text:
-                        await page.click(f"text={text}")
-                        click_success = True
-                    else:
-                        # Generic xpath - not reliable but for demo purposes
-                        await page.click(f"//{element.tag_name}[{action.index}]")
-                        click_success = True
-            except Exception as click_error:
-                print(f"Error clicking element with standard methods: {click_error}")
-                # Fallback to JavaScript click
+            error_message = ""
+
+            if await target_element_handle.evaluate("node => node !== null"):
                 try:
-                    js_click = f"""
-                    (function() {{
-                        const elements = document.querySelectorAll('{element.tag_name}');
-                        if (elements.length >= {action.index}) {{
-                            elements[{action.index-1}].click();
-                            return true;
-                        }}
-                        return false;
-                    }})()
-                    """
-                    click_success = await page.evaluate(js_click)
-                except Exception as js_error:
-                    print(f"Error with JavaScript click fallback: {js_error}")
-            
-            # Give time for any navigation to occur
-            await page.wait_for_load_state("networkidle", timeout=5000)
-            
+                    # Use Playwright's recommended way: click the handle
+                    # Add timeout and wait for element to be stable
+                    await target_element_handle.click(timeout=5000) 
+                    click_success = True
+                    print(f"Successfully clicked element handle for index {action.index}")
+                except Exception as click_error:
+                    error_message = f"Error clicking element handle: {click_error}"
+                    print(error_message)
+                    # Optional: Add fallback methods here if needed
+                    # e.g., target_element_handle.dispatch_event('click')
+            else:
+                 error_message = f"Could not locate the target element handle for index {action.index} using JS script."
+                 print(error_message)
+
+
+            # Wait for potential page changes/network activity
+            try:
+                await page.wait_for_load_state("networkidle", timeout=5000)
+            except Exception as wait_error:
+                print(f"Timeout or error waiting for network idle after click: {wait_error}")
+                await asyncio.sleep(1) # Fallback wait
+
             # Get updated state after action
             dom_state, screenshot, elements, metadata = await self.get_updated_browser_state(f"click_element({action.index})")
-            
+
             return self.build_action_result(
                 click_success,
-                f"Clicked element with index {action.index}" if click_success else f"Attempted to click element {action.index} but may have failed",
+                f"Clicked element with index {action.index}" if click_success else f"Attempted to click element {action.index} but failed. Error: {error_message}",
                 dom_state,
                 screenshot,
                 elements,
                 metadata,
-                error="",
+                error=error_message if not click_success else "",
                 content=None
             )
+            
         except Exception as e:
             print(f"Error in click_element: {e}")
             traceback.print_exc()
@@ -1072,15 +1085,22 @@ class BrowserAutomation:
                     content=None
                 )
             except:
+                # Fallback if getting state also fails
+                current_url = "unknown"
+                try:
+                   current_url = page.url # Try to get at least the URL
+                except:
+                    pass 
                 return self.build_action_result(
                     False,
                     str(e),
-                    None,
-                    "",
-                    "",
-                    {},
+                    None, # No DOM state available
+                    "",   # No screenshot
+                    "",   # No elements string
+                    {},   # Empty metadata
                     error=str(e),
-                    content=None
+                    content=None,
+                    fallback_url=current_url 
                 )
     
     async def input_text(self, action: InputTextAction = Body(...)):
@@ -1917,16 +1937,127 @@ async def test_browser_api():
         await automation_service.shutdown()
         print("Browser closed")
 
+async def test_browser_api_2():
+    """Test the browser automation API functionality on the chess page"""
+    try:
+        # Initialize browser automation
+        print("\n=== Starting Browser Automation Test 2 (Chess Page) ===")
+        await automation_service.startup()
+        print("✅ Browser started successfully")
+
+        # Navigate to the chess test page
+        print("\n--- Testing Navigation to Chess Page ---")
+        test_url = "https://dat-lequoc.github.io/chess-for-suna/chess.html"
+        result = await automation_service.navigate_to(GoToUrlAction(url=test_url))
+        print(f"Navigation status: {'✅ Success' if result.success else '❌ Failed'}")
+        if not result.success:
+            print(f"Error: {result.error}")
+            return
+        
+        print(f"URL: {result.url}")
+        print(f"Title: {result.title}")
+        
+        # Check DOM state and elements
+        print(f"\nFound {result.element_count} interactive elements")
+        if result.elements and result.elements.strip():
+            print("Elements:")
+            print(result.elements)
+        else:
+            print("No formatted elements found, but DOM was processed")
+            
+        # Display interactive elements as JSON
+        if result.interactive_elements and len(result.interactive_elements) > 0:
+            print("\nInteractive elements summary:")
+            for el in result.interactive_elements:
+                print(f"  [{el['index']}] <{el['tag_name']}> {el.get('text', '')[:30]}")
+        
+        # Screenshot info
+        print(f"\nScreenshot captured: {'Yes' if result.screenshot_base64 else 'No'}")
+        print(f"Viewport size: {result.viewport_width}x{result.viewport_height}")
+        
+        await asyncio.sleep(2)
+
+        # Test clicking on an element (e.g., a chess square)
+        print("\n--- Testing Element Click (element 5) ---")
+        if result.element_count > 4: # Ensure element 5 exists
+            click_index = 5
+            click_result = await automation_service.click_element(ClickElementAction(index=click_index))
+            print(f"Click status for element {click_index}: {'✅ Success' if click_result.success else '❌ Failed'}")
+            print(f"Message: {click_result.message}")
+            print(f"URL after click: {click_result.url}")
+
+            # Retrieve and display elements again after click
+            print(f"\n--- Retrieving elements after clicking element {click_index} ---")
+            if click_result.elements and click_result.elements.strip():
+                print("Updated Elements:")
+                print(click_result.elements)
+            else:
+                print("No formatted elements found after click.")
+
+            if click_result.interactive_elements and len(click_result.interactive_elements) > 0:
+                print("\nUpdated interactive elements summary:")
+                for el in click_result.interactive_elements:
+                    print(f"  [{el['index']}] <{el['tag_name']}> {el.get('text', '')[:30]}")
+            else:
+                print("No interactive elements found after click.")
+
+            # Test clicking element 1 after the first click
+            print("\n--- Testing Element Click (element 1 after clicking 5) ---")
+            if click_result.element_count > 0: # Check if there are still elements
+                click_index_2 = 1
+                click_result_2 = await automation_service.click_element(ClickElementAction(index=click_index_2))
+                print(f"Click status for element {click_index_2}: {'✅ Success' if click_result_2.success else '❌ Failed'}")
+                print(f"Message: {click_result_2.message}")
+                print(f"URL after click: {click_result_2.url}")
+
+                # Retrieve and display elements again after the second click
+                print(f"\n--- Retrieving elements after clicking element {click_index_2} ---")
+                if click_result_2.elements and click_result_2.elements.strip():
+                    print("Elements after second click:")
+                    print(click_result_2.elements)
+                else:
+                    print("No formatted elements found after second click.")
+
+                if click_result_2.interactive_elements and len(click_result_2.interactive_elements) > 0:
+                    print("\nInteractive elements summary after second click:")
+                    for el in click_result_2.interactive_elements:
+                        print(f"  [{el['index']}] <{el['tag_name']}> {el.get('text', '')[:30]}")
+                else:
+                    print("No interactive elements found after second click.")
+            else:
+                print("Skipping second element click test - no elements found after first click.")
+
+        else:
+            print("Skipping element click test - fewer than 5 elements found.")
+
+        await asyncio.sleep(2)
+
+        print("\n✅ Chess Page Test Completed!")
+        await asyncio.sleep(100)
+
+    except Exception as e:
+        print(f"\n❌ Chess Page Test failed: {str(e)}")
+        traceback.print_exc()
+    finally:
+        # Ensure browser is closed
+        print("\n--- Cleaning up ---")
+        await automation_service.shutdown()
+        print("Browser closed")
+
 if __name__ == '__main__':
     import uvicorn
     import sys
     
-    # Check if running in test mode
-    test_mode = len(sys.argv) > 1 and sys.argv[1] == "--test"
+    # Check command line arguments for test mode
+    test_mode_1 = "--test" in sys.argv
+    test_mode_2 = "--test2" in sys.argv
     
-    if test_mode:
-        print("Running in test mode")
+    if test_mode_1:
+        print("Running in test mode 1")
         asyncio.run(test_browser_api())
+    elif test_mode_2:
+        print("Running in test mode 2 (Chess Page)")
+        asyncio.run(test_browser_api_2())
     else:
         print("Starting API server")
         uvicorn.run("browser_api:api_app", host="0.0.0.0", port=8002)
\ No newline at end of file