From a1b60e2c8f622edc50795c2f684499eccdf8cbfa Mon Sep 17 00:00:00 2001 From: Soumyadas15 Date: Wed, 14 May 2025 10:58:11 +0530 Subject: [PATCH 1/2] chore(dev): upload images to s3 --- backend/agent/run.py | 19 ++++- backend/sandbox/docker/browser_api.py | 80 ++++++++++++++++--- .../thread/tool-views/BrowserToolView.tsx | 1 + 3 files changed, 84 insertions(+), 16 deletions(-) diff --git a/backend/agent/run.py b/backend/agent/run.py index 5ada055a..3301e498 100644 --- a/backend/agent/run.py +++ b/backend/agent/run.py @@ -122,18 +122,29 @@ async def run_agent( try: browser_content = json.loads(latest_browser_state_msg.data[0]["content"]) screenshot_base64 = browser_content.get("screenshot_base64") - # Create a copy of the browser state without screenshot + screenshot_url = browser_content.get("screenshot_url") + + # Create a copy of the browser state without screenshot data browser_state_text = browser_content.copy() browser_state_text.pop('screenshot_base64', None) browser_state_text.pop('screenshot_url', None) - browser_state_text.pop('screenshot_url_base64', None) if browser_state_text: temp_message_content_list.append({ "type": "text", "text": f"The following is the current state of the browser:\n{json.dumps(browser_state_text, indent=2)}" }) - if screenshot_base64: + + # Prioritize screenshot_url if available + if screenshot_url: + temp_message_content_list.append({ + "type": "image_url", + "image_url": { + "url": screenshot_url, + } + }) + elif screenshot_base64: + # Fallback to base64 if URL not available temp_message_content_list.append({ "type": "image_url", "image_url": { @@ -141,7 +152,7 @@ async def run_agent( } }) else: - logger.warning("Browser state found but no screenshot base64 data.") + logger.warning("Browser state found but no screenshot data.") await client.table('messages').delete().eq('message_id', latest_browser_state_msg.data[0]["message_id"]).execute() except Exception as e: diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py index 471fc6b0..6149e5e7 100644 --- a/backend/sandbox/docker/browser_api.py +++ b/backend/sandbox/docker/browser_api.py @@ -15,6 +15,7 @@ import traceback import pytesseract from PIL import Image import io +from utils.logger import logger ####################################################### # Action model definitions @@ -259,15 +260,16 @@ class BrowserActionResult(BaseModel): url: Optional[str] = None title: Optional[str] = None elements: Optional[str] = None # Formatted string of clickable elements - screenshot_base64: Optional[str] = None + screenshot_base64: Optional[str] = None # For backward compatibility + screenshot_url: Optional[str] = None pixels_above: int = 0 pixels_below: int = 0 content: Optional[str] = None - ocr_text: Optional[str] = None # Added field for OCR text + ocr_text: Optional[str] = None # Additional metadata - element_count: int = 0 # Number of interactive elements found - interactive_elements: Optional[List[Dict[str, Any]]] = None # Simplified list of interactive elements + element_count: int = 0 + interactive_elements: Optional[List[Dict[str, Any]]] = None viewport_width: Optional[int] = None viewport_height: Optional[int] = None @@ -609,15 +611,61 @@ class BrowserAutomation: ) async def take_screenshot(self) -> str: - """Take a screenshot and return as base64 encoded string""" + """Take a screenshot and return as base64 encoded string or S3 URL""" try: page = await self.get_current_page() screenshot_bytes = await page.screenshot(type='jpeg', quality=60, full_page=False) - return base64.b64encode(screenshot_bytes).decode('utf-8') + + # If we have storage credentials, upload to S3 + if os.environ.get('SUPABASE_URL') and os.environ.get('SUPABASE_KEY'): + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + random_id = random.randint(1000, 9999) + filename = f"screenshot_{timestamp}_{random_id}.jpg" + + result = await self.upload_to_storage(screenshot_bytes, filename) + + # Verify the upload was successful if we got a URL + if isinstance(result, dict) and result.get("is_s3") and result.get("url"): + if await self.verify_file_exists(filename): + logger.info(f"Screenshot upload verified: {filename}") + else: + logger.error(f"Screenshot upload failed verification: {filename}") + return base64.b64encode(screenshot_bytes).decode('utf-8') + + return result + else: + return base64.b64encode(screenshot_bytes).decode('utf-8') except Exception as e: - print(f"Error taking screenshot: {e}") - # Return an empty string rather than failing + traceback.print_exc() return "" + + async def upload_to_storage(self, file_bytes: bytes, filename: str) -> str: + """Upload file to Supabase Storage and return the URL""" + try: + supabase_url = os.environ.get('SUPABASE_URL') + supabase_key = os.environ.get('SUPABASE_KEY') + + from supabase import create_client, Client + supabase_client: Client = create_client(supabase_url, supabase_key) + bucket_name = 'screenshots' + + buckets = supabase_client.storage.list_buckets() + if not any(bucket.name == bucket_name for bucket in buckets): + supabase_client.storage.create_bucket(bucket_name) + + result = supabase_client.storage.from_(bucket_name).upload( + path=filename, + file=file_bytes, + file_options={"content-type": "image/jpeg"} + ) + + # Get the public URL + file_url = supabase_client.storage.from_(bucket_name).get_public_url(filename) + + return {"url": file_url, "is_s3": True} + except Exception as e: + traceback.print_exc() + return base64.b64encode(file_bytes).decode('utf-8') async def save_screenshot_to_file(self) -> str: """Take a screenshot and save to file, returning the path""" @@ -731,13 +779,20 @@ class BrowserAutomation: return None, "", "", {} def build_action_result(self, success: bool, message: str, dom_state, screenshot: str, - elements: str, metadata: dict, error: str = "", content: str = None, - fallback_url: str = None) -> BrowserActionResult: + elements: str, metadata: dict, error: str = "", content: str = None, + fallback_url: str = None) -> BrowserActionResult: """Helper method to build a consistent BrowserActionResult""" - # Ensure elements is never None to avoid display issues if elements is None: elements = "" + screenshot_base64 = None + screenshot_url = None + + if isinstance(screenshot, dict) and screenshot.get("is_s3"): + screenshot_url = screenshot.get("url") + else: + screenshot_base64 = screenshot + return BrowserActionResult( success=success, message=message, @@ -745,7 +800,8 @@ class BrowserAutomation: url=dom_state.url if dom_state else fallback_url or "", title=dom_state.title if dom_state else "", elements=elements, - screenshot_base64=screenshot, + screenshot_base64=screenshot_base64, + screenshot_url=screenshot_url, pixels_above=dom_state.pixels_above if dom_state else 0, pixels_below=dom_state.pixels_below if dom_state else 0, content=content, diff --git a/frontend/src/components/thread/tool-views/BrowserToolView.tsx b/frontend/src/components/thread/tool-views/BrowserToolView.tsx index e18cf711..2fcb45e9 100644 --- a/frontend/src/components/thread/tool-views/BrowserToolView.tsx +++ b/frontend/src/components/thread/tool-views/BrowserToolView.tsx @@ -84,6 +84,7 @@ export function BrowserToolView({ browserStateMessage.content, {}, ); + console.log('Browser state content: ', browserStateContent) screenshotBase64 = browserStateContent?.screenshot_base64 || null; } } From e95118f26e095e7346e15b53972e9190ab96d097 Mon Sep 17 00:00:00 2001 From: Soumyadas15 Date: Wed, 14 May 2025 16:07:15 +0530 Subject: [PATCH 2/2] chore(dev): upload images to s3 - draft --- backend/sandbox/docker/browser_api.py | 90 ++++++++++++++----- backend/utils/logger.py | 9 +- .../src/components/payment/paywall-dialog.tsx | 2 +- 3 files changed, 72 insertions(+), 29 deletions(-) diff --git a/backend/sandbox/docker/browser_api.py b/backend/sandbox/docker/browser_api.py index 6149e5e7..f122cdfc 100644 --- a/backend/sandbox/docker/browser_api.py +++ b/backend/sandbox/docker/browser_api.py @@ -16,6 +16,7 @@ import pytesseract from PIL import Image import io from utils.logger import logger +from services.supabase import DBConnection ####################################################### # Action model definitions @@ -290,6 +291,7 @@ class BrowserAutomation: self.include_attributes = ["id", "href", "src", "alt", "aria-label", "placeholder", "name", "role", "title", "value"] self.screenshot_dir = os.path.join(os.getcwd(), "screenshots") os.makedirs(self.screenshot_dir, exist_ok=True) + self.db = DBConnection() # Initialize DB connection # Register routes self.router.on_startup.append(self.startup) @@ -615,18 +617,19 @@ class BrowserAutomation: try: page = await self.get_current_page() screenshot_bytes = await page.screenshot(type='jpeg', quality=60, full_page=False) + + client = await self.db.client - # If we have storage credentials, upload to S3 - if os.environ.get('SUPABASE_URL') and os.environ.get('SUPABASE_KEY'): + if client: timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') random_id = random.randint(1000, 9999) filename = f"screenshot_{timestamp}_{random_id}.jpg" - result = await self.upload_to_storage(screenshot_bytes, filename) + logger.info(f"Attempting to upload screenshot: {filename}") + result = await self.upload_to_storage(client, screenshot_bytes, filename) - # Verify the upload was successful if we got a URL if isinstance(result, dict) and result.get("is_s3") and result.get("url"): - if await self.verify_file_exists(filename): + if await self.verify_file_exists(client, filename): logger.info(f"Screenshot upload verified: {filename}") else: logger.error(f"Screenshot upload failed verification: {filename}") @@ -634,38 +637,61 @@ class BrowserAutomation: return result else: + logger.warning("No Supabase client available, falling back to base64") return base64.b64encode(screenshot_bytes).decode('utf-8') except Exception as e: + logger.error(f"Error taking screenshot: {str(e)}") traceback.print_exc() return "" - async def upload_to_storage(self, file_bytes: bytes, filename: str) -> str: + async def upload_to_storage(self, client, file_bytes: bytes, filename: str) -> str: """Upload file to Supabase Storage and return the URL""" try: - supabase_url = os.environ.get('SUPABASE_URL') - supabase_key = os.environ.get('SUPABASE_KEY') - - from supabase import create_client, Client - supabase_client: Client = create_client(supabase_url, supabase_key) bucket_name = 'screenshots' - buckets = supabase_client.storage.list_buckets() + buckets = client.storage.list_buckets() if not any(bucket.name == bucket_name for bucket in buckets): - supabase_client.storage.create_bucket(bucket_name) + logger.info(f"Creating bucket: {bucket_name}") + try: + client.storage.create_bucket(bucket_name) + logger.info("Bucket created successfully") + except Exception as e: + logger.error(f"Failed to create bucket: {str(e)}") + raise - result = supabase_client.storage.from_(bucket_name).upload( - path=filename, - file=file_bytes, - file_options={"content-type": "image/jpeg"} - ) + logger.info(f"Uploading file: {filename}") + try: + result = client.storage.from_(bucket_name).upload( + path=filename, + file=file_bytes, + file_options={"content-type": "image/jpeg"} + ) + logger.info("File upload successful") + except Exception as e: + logger.error(f"Failed to upload file: {str(e)}") + raise - # Get the public URL - file_url = supabase_client.storage.from_(bucket_name).get_public_url(filename) + file_url = client.storage.from_(bucket_name).get_public_url(filename) + logger.info(f"Generated URL: {file_url}") return {"url": file_url, "is_s3": True} except Exception as e: + logger.error(f"Error in upload_to_storage: {str(e)}") traceback.print_exc() return base64.b64encode(file_bytes).decode('utf-8') + + async def verify_file_exists(self, client, filename: str) -> bool: + """Verify that a file exists in the storage bucket""" + logger.info(f"=== Verifying file exists: {filename} ===") + try: + bucket_name = 'screenshots' + files = client.storage.from_(bucket_name).list() + exists = any(f['name'] == filename for f in files) + logger.info(f"File verification result: {'exists' if exists else 'not found'}") + return exists + except Exception as e: + logger.error(f"Error verifying file: {str(e)}") + return False async def save_screenshot_to_file(self) -> str: """Take a screenshot and save to file, returning the path""" @@ -708,20 +734,32 @@ class BrowserAutomation: """Helper method to get updated browser state after any action Returns a tuple of (dom_state, screenshot, elements, metadata) """ + logger.info(f"=== Starting get_updated_browser_state for action: {action_name} ===") try: # Wait a moment for any potential async processes to settle + logger.info("Waiting for async processes to settle") await asyncio.sleep(0.5) # Get updated state + logger.info("Getting current DOM state") dom_state = await self.get_current_dom_state() + logger.info(f"DOM state retrieved - URL: {dom_state.url}, Title: {dom_state.title}") + + logger.info("Taking screenshot") screenshot = await self.take_screenshot() + logger.info(f"Screenshot result type: {'dict' if isinstance(screenshot, dict) else 'base64 string'}") + if isinstance(screenshot, dict) and screenshot.get("url"): + logger.info(f"Screenshot URL: {screenshot['url']}") # Format elements for output + logger.info("Formatting clickable elements") elements = dom_state.element_tree.clickable_elements_to_string( include_attributes=self.include_attributes ) + logger.info(f"Found {len(dom_state.selector_map)} clickable elements") # Collect additional metadata + logger.info("Collecting metadata") page = await self.get_current_page() metadata = {} @@ -747,8 +785,9 @@ class BrowserAutomation: metadata['interactive_elements'] = interactive_elements - # Get viewport dimensions - Fix syntax error in JavaScript + # Get viewport dimensions try: + logger.info("Getting viewport dimensions") viewport = await page.evaluate(""" () => { return { @@ -759,21 +798,24 @@ class BrowserAutomation: """) metadata['viewport_width'] = viewport.get('width', 0) metadata['viewport_height'] = viewport.get('height', 0) + logger.info(f"Viewport dimensions: {metadata['viewport_width']}x{metadata['viewport_height']}") except Exception as e: - print(f"Error getting viewport dimensions: {e}") + logger.error(f"Error getting viewport dimensions: {e}") metadata['viewport_width'] = 0 metadata['viewport_height'] = 0 # Extract OCR text from screenshot if available ocr_text = "" if screenshot: + logger.info("Extracting OCR text from screenshot") ocr_text = await self.extract_ocr_text_from_screenshot(screenshot) metadata['ocr_text'] = ocr_text + logger.info(f"OCR text length: {len(ocr_text)} characters") - print(f"Got updated state after {action_name}: {len(dom_state.selector_map)} elements") + logger.info(f"=== Completed get_updated_browser_state for {action_name} ===") return dom_state, screenshot, elements, metadata except Exception as e: - print(f"Error getting updated state after {action_name}: {e}") + logger.error(f"Error in get_updated_browser_state for {action_name}: {e}") traceback.print_exc() # Return empty values in case of error return None, "", "", {} diff --git a/backend/utils/logger.py b/backend/utils/logger.py index 51574ed6..32fae2fd 100644 --- a/backend/utils/logger.py +++ b/backend/utils/logger.py @@ -100,22 +100,23 @@ def setup_logger(name: str = 'agentpress') -> logging.Logger: except Exception as e: print(f"Error setting up file handler: {e}") - # Console handler - WARNING in production, INFO in other environments + # Console handler - WARNING in production, DEBUG in other environments try: console_handler = logging.StreamHandler(sys.stdout) if config.ENV_MODE == EnvMode.PRODUCTION: console_handler.setLevel(logging.WARNING) else: - console_handler.setLevel(logging.INFO) + console_handler.setLevel(logging.DEBUG) console_formatter = logging.Formatter( - '%(asctime)s - %(levelname)s - %(message)s' + '%(asctime)s - %(levelname)s - %(name)s - %(message)s' ) console_handler.setFormatter(console_formatter) # Add console handler to logger logger.addHandler(console_handler) - print(f"Added console handler with level: {console_handler.level}") + logger.info(f"Added console handler with level: {console_handler.level}") + logger.info(f"Log file will be created at: {log_dir}") except Exception as e: print(f"Error setting up console handler: {e}") diff --git a/frontend/src/components/payment/paywall-dialog.tsx b/frontend/src/components/payment/paywall-dialog.tsx index 8b401684..1ce5a0bd 100644 --- a/frontend/src/components/payment/paywall-dialog.tsx +++ b/frontend/src/components/payment/paywall-dialog.tsx @@ -58,7 +58,7 @@ export const PaywallDialog: React.FC = ({ strayBackdrops.forEach(element => element.remove()); }; }, []); - + useEffect(() => { if (!open) { document.body.classList.remove('overflow-hidden');