mirror of https://github.com/kortix-ai/suna.git
chore(dev): upload images to s3
This commit is contained in:
parent
fe05ccfb4f
commit
a1b60e2c8f
|
@ -122,18 +122,29 @@ async def run_agent(
|
||||||
try:
|
try:
|
||||||
browser_content = json.loads(latest_browser_state_msg.data[0]["content"])
|
browser_content = json.loads(latest_browser_state_msg.data[0]["content"])
|
||||||
screenshot_base64 = browser_content.get("screenshot_base64")
|
screenshot_base64 = browser_content.get("screenshot_base64")
|
||||||
# Create a copy of the browser state without screenshot
|
screenshot_url = browser_content.get("screenshot_url")
|
||||||
|
|
||||||
|
# Create a copy of the browser state without screenshot data
|
||||||
browser_state_text = browser_content.copy()
|
browser_state_text = browser_content.copy()
|
||||||
browser_state_text.pop('screenshot_base64', None)
|
browser_state_text.pop('screenshot_base64', None)
|
||||||
browser_state_text.pop('screenshot_url', None)
|
browser_state_text.pop('screenshot_url', None)
|
||||||
browser_state_text.pop('screenshot_url_base64', None)
|
|
||||||
|
|
||||||
if browser_state_text:
|
if browser_state_text:
|
||||||
temp_message_content_list.append({
|
temp_message_content_list.append({
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"text": f"The following is the current state of the browser:\n{json.dumps(browser_state_text, indent=2)}"
|
"text": f"The following is the current state of the browser:\n{json.dumps(browser_state_text, indent=2)}"
|
||||||
})
|
})
|
||||||
if screenshot_base64:
|
|
||||||
|
# Prioritize screenshot_url if available
|
||||||
|
if screenshot_url:
|
||||||
|
temp_message_content_list.append({
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": screenshot_url,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
elif screenshot_base64:
|
||||||
|
# Fallback to base64 if URL not available
|
||||||
temp_message_content_list.append({
|
temp_message_content_list.append({
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": {
|
"image_url": {
|
||||||
|
@ -141,7 +152,7 @@ async def run_agent(
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
logger.warning("Browser state found but no screenshot base64 data.")
|
logger.warning("Browser state found but no screenshot data.")
|
||||||
|
|
||||||
await client.table('messages').delete().eq('message_id', latest_browser_state_msg.data[0]["message_id"]).execute()
|
await client.table('messages').delete().eq('message_id', latest_browser_state_msg.data[0]["message_id"]).execute()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -15,6 +15,7 @@ import traceback
|
||||||
import pytesseract
|
import pytesseract
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import io
|
import io
|
||||||
|
from utils.logger import logger
|
||||||
|
|
||||||
#######################################################
|
#######################################################
|
||||||
# Action model definitions
|
# Action model definitions
|
||||||
|
@ -259,15 +260,16 @@ class BrowserActionResult(BaseModel):
|
||||||
url: Optional[str] = None
|
url: Optional[str] = None
|
||||||
title: Optional[str] = None
|
title: Optional[str] = None
|
||||||
elements: Optional[str] = None # Formatted string of clickable elements
|
elements: Optional[str] = None # Formatted string of clickable elements
|
||||||
screenshot_base64: Optional[str] = None
|
screenshot_base64: Optional[str] = None # For backward compatibility
|
||||||
|
screenshot_url: Optional[str] = None
|
||||||
pixels_above: int = 0
|
pixels_above: int = 0
|
||||||
pixels_below: int = 0
|
pixels_below: int = 0
|
||||||
content: Optional[str] = None
|
content: Optional[str] = None
|
||||||
ocr_text: Optional[str] = None # Added field for OCR text
|
ocr_text: Optional[str] = None
|
||||||
|
|
||||||
# Additional metadata
|
# Additional metadata
|
||||||
element_count: int = 0 # Number of interactive elements found
|
element_count: int = 0
|
||||||
interactive_elements: Optional[List[Dict[str, Any]]] = None # Simplified list of interactive elements
|
interactive_elements: Optional[List[Dict[str, Any]]] = None
|
||||||
viewport_width: Optional[int] = None
|
viewport_width: Optional[int] = None
|
||||||
viewport_height: Optional[int] = None
|
viewport_height: Optional[int] = None
|
||||||
|
|
||||||
|
@ -609,15 +611,61 @@ class BrowserAutomation:
|
||||||
)
|
)
|
||||||
|
|
||||||
async def take_screenshot(self) -> str:
|
async def take_screenshot(self) -> str:
|
||||||
"""Take a screenshot and return as base64 encoded string"""
|
"""Take a screenshot and return as base64 encoded string or S3 URL"""
|
||||||
try:
|
try:
|
||||||
page = await self.get_current_page()
|
page = await self.get_current_page()
|
||||||
screenshot_bytes = await page.screenshot(type='jpeg', quality=60, full_page=False)
|
screenshot_bytes = await page.screenshot(type='jpeg', quality=60, full_page=False)
|
||||||
return base64.b64encode(screenshot_bytes).decode('utf-8')
|
|
||||||
|
# If we have storage credentials, upload to S3
|
||||||
|
if os.environ.get('SUPABASE_URL') and os.environ.get('SUPABASE_KEY'):
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||||
|
random_id = random.randint(1000, 9999)
|
||||||
|
filename = f"screenshot_{timestamp}_{random_id}.jpg"
|
||||||
|
|
||||||
|
result = await self.upload_to_storage(screenshot_bytes, filename)
|
||||||
|
|
||||||
|
# Verify the upload was successful if we got a URL
|
||||||
|
if isinstance(result, dict) and result.get("is_s3") and result.get("url"):
|
||||||
|
if await self.verify_file_exists(filename):
|
||||||
|
logger.info(f"Screenshot upload verified: {filename}")
|
||||||
|
else:
|
||||||
|
logger.error(f"Screenshot upload failed verification: {filename}")
|
||||||
|
return base64.b64encode(screenshot_bytes).decode('utf-8')
|
||||||
|
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
return base64.b64encode(screenshot_bytes).decode('utf-8')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error taking screenshot: {e}")
|
traceback.print_exc()
|
||||||
# Return an empty string rather than failing
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
async def upload_to_storage(self, file_bytes: bytes, filename: str) -> str:
|
||||||
|
"""Upload file to Supabase Storage and return the URL"""
|
||||||
|
try:
|
||||||
|
supabase_url = os.environ.get('SUPABASE_URL')
|
||||||
|
supabase_key = os.environ.get('SUPABASE_KEY')
|
||||||
|
|
||||||
|
from supabase import create_client, Client
|
||||||
|
supabase_client: Client = create_client(supabase_url, supabase_key)
|
||||||
|
bucket_name = 'screenshots'
|
||||||
|
|
||||||
|
buckets = supabase_client.storage.list_buckets()
|
||||||
|
if not any(bucket.name == bucket_name for bucket in buckets):
|
||||||
|
supabase_client.storage.create_bucket(bucket_name)
|
||||||
|
|
||||||
|
result = supabase_client.storage.from_(bucket_name).upload(
|
||||||
|
path=filename,
|
||||||
|
file=file_bytes,
|
||||||
|
file_options={"content-type": "image/jpeg"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get the public URL
|
||||||
|
file_url = supabase_client.storage.from_(bucket_name).get_public_url(filename)
|
||||||
|
|
||||||
|
return {"url": file_url, "is_s3": True}
|
||||||
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
|
return base64.b64encode(file_bytes).decode('utf-8')
|
||||||
|
|
||||||
async def save_screenshot_to_file(self) -> str:
|
async def save_screenshot_to_file(self) -> str:
|
||||||
"""Take a screenshot and save to file, returning the path"""
|
"""Take a screenshot and save to file, returning the path"""
|
||||||
|
@ -731,13 +779,20 @@ class BrowserAutomation:
|
||||||
return None, "", "", {}
|
return None, "", "", {}
|
||||||
|
|
||||||
def build_action_result(self, success: bool, message: str, dom_state, screenshot: str,
|
def build_action_result(self, success: bool, message: str, dom_state, screenshot: str,
|
||||||
elements: str, metadata: dict, error: str = "", content: str = None,
|
elements: str, metadata: dict, error: str = "", content: str = None,
|
||||||
fallback_url: str = None) -> BrowserActionResult:
|
fallback_url: str = None) -> BrowserActionResult:
|
||||||
"""Helper method to build a consistent BrowserActionResult"""
|
"""Helper method to build a consistent BrowserActionResult"""
|
||||||
# Ensure elements is never None to avoid display issues
|
|
||||||
if elements is None:
|
if elements is None:
|
||||||
elements = ""
|
elements = ""
|
||||||
|
|
||||||
|
screenshot_base64 = None
|
||||||
|
screenshot_url = None
|
||||||
|
|
||||||
|
if isinstance(screenshot, dict) and screenshot.get("is_s3"):
|
||||||
|
screenshot_url = screenshot.get("url")
|
||||||
|
else:
|
||||||
|
screenshot_base64 = screenshot
|
||||||
|
|
||||||
return BrowserActionResult(
|
return BrowserActionResult(
|
||||||
success=success,
|
success=success,
|
||||||
message=message,
|
message=message,
|
||||||
|
@ -745,7 +800,8 @@ class BrowserAutomation:
|
||||||
url=dom_state.url if dom_state else fallback_url or "",
|
url=dom_state.url if dom_state else fallback_url or "",
|
||||||
title=dom_state.title if dom_state else "",
|
title=dom_state.title if dom_state else "",
|
||||||
elements=elements,
|
elements=elements,
|
||||||
screenshot_base64=screenshot,
|
screenshot_base64=screenshot_base64,
|
||||||
|
screenshot_url=screenshot_url,
|
||||||
pixels_above=dom_state.pixels_above if dom_state else 0,
|
pixels_above=dom_state.pixels_above if dom_state else 0,
|
||||||
pixels_below=dom_state.pixels_below if dom_state else 0,
|
pixels_below=dom_state.pixels_below if dom_state else 0,
|
||||||
content=content,
|
content=content,
|
||||||
|
|
|
@ -84,6 +84,7 @@ export function BrowserToolView({
|
||||||
browserStateMessage.content,
|
browserStateMessage.content,
|
||||||
{},
|
{},
|
||||||
);
|
);
|
||||||
|
console.log('Browser state content: ', browserStateContent)
|
||||||
screenshotBase64 = browserStateContent?.screenshot_base64 || null;
|
screenshotBase64 = browserStateContent?.screenshot_base64 || null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue