2025-04-28 08:28:21 +08:00
import os
import base64
import mimetypes
2025-05-25 17:51:20 +08:00
from typing import Optional , Tuple
from io import BytesIO
from PIL import Image
2025-06-30 01:14:07 +08:00
from urllib . parse import urlparse
2025-07-31 04:12:11 +08:00
from agentpress . tool import ToolResult , openapi_schema , usage_example
2025-05-10 10:26:42 +08:00
from sandbox . tool_base import SandboxToolsBase
2025-04-28 08:28:21 +08:00
from agentpress . thread_manager import ThreadManager
import json
2025-06-30 01:14:07 +08:00
import requests
2025-04-28 08:28:21 +08:00
# Add common image MIME types if mimetypes module is limited
mimetypes . add_type ( " image/webp " , " .webp " )
mimetypes . add_type ( " image/jpeg " , " .jpg " )
mimetypes . add_type ( " image/jpeg " , " .jpeg " )
mimetypes . add_type ( " image/png " , " .png " )
mimetypes . add_type ( " image/gif " , " .gif " )
2025-05-25 17:51:20 +08:00
# Maximum file size in bytes (e.g., 10MB for original, 5MB for compressed)
2025-04-28 08:28:21 +08:00
MAX_IMAGE_SIZE = 10 * 1024 * 1024
2025-05-25 17:51:20 +08:00
MAX_COMPRESSED_SIZE = 5 * 1024 * 1024
# Compression settings
DEFAULT_MAX_WIDTH = 1920
DEFAULT_MAX_HEIGHT = 1080
DEFAULT_JPEG_QUALITY = 85
DEFAULT_PNG_COMPRESS_LEVEL = 6
2025-04-28 08:28:21 +08:00
class SandboxVisionTool ( SandboxToolsBase ) :
""" Tool for allowing the agent to ' see ' images within the sandbox. """
def __init__ ( self , project_id : str , thread_id : str , thread_manager : ThreadManager ) :
super ( ) . __init__ ( project_id , thread_manager )
self . thread_id = thread_id
# Make thread_manager accessible within the tool instance
self . thread_manager = thread_manager
2025-05-25 17:51:20 +08:00
def compress_image ( self , image_bytes : bytes , mime_type : str , file_path : str ) - > Tuple [ bytes , str ] :
""" Compress an image to reduce its size while maintaining reasonable quality.
Args :
image_bytes : Original image bytes
mime_type : MIME type of the image
file_path : Path to the image file ( for logging )
Returns :
Tuple of ( compressed_bytes , new_mime_type )
"""
try :
# Open image from bytes
img = Image . open ( BytesIO ( image_bytes ) )
# Convert RGBA to RGB if necessary (for JPEG)
if img . mode in ( ' RGBA ' , ' LA ' , ' P ' ) :
# Create a white background
background = Image . new ( ' RGB ' , img . size , ( 255 , 255 , 255 ) )
if img . mode == ' P ' :
img = img . convert ( ' RGBA ' )
background . paste ( img , mask = img . split ( ) [ - 1 ] if img . mode == ' RGBA ' else None )
img = background
# Calculate new dimensions while maintaining aspect ratio
width , height = img . size
if width > DEFAULT_MAX_WIDTH or height > DEFAULT_MAX_HEIGHT :
ratio = min ( DEFAULT_MAX_WIDTH / width , DEFAULT_MAX_HEIGHT / height )
new_width = int ( width * ratio )
new_height = int ( height * ratio )
img = img . resize ( ( new_width , new_height ) , Image . Resampling . LANCZOS )
print ( f " [SeeImage] Resized image from { width } x { height } to { new_width } x { new_height } " )
# Save to bytes with compression
output = BytesIO ( )
# Determine output format based on original mime type
if mime_type == ' image/gif ' :
# Keep GIFs as GIFs to preserve animation
img . save ( output , format = ' GIF ' , optimize = True )
output_mime = ' image/gif '
elif mime_type == ' image/png ' :
# Compress PNG
img . save ( output , format = ' PNG ' , optimize = True , compress_level = DEFAULT_PNG_COMPRESS_LEVEL )
output_mime = ' image/png '
else :
# Convert everything else to JPEG for better compression
img . save ( output , format = ' JPEG ' , quality = DEFAULT_JPEG_QUALITY , optimize = True )
output_mime = ' image/jpeg '
compressed_bytes = output . getvalue ( )
# Log compression results
original_size = len ( image_bytes )
compressed_size = len ( compressed_bytes )
compression_ratio = ( 1 - compressed_size / original_size ) * 100
print ( f " [SeeImage] Compressed ' { file_path } ' from { original_size / 1024 : .1f } KB to { compressed_size / 1024 : .1f } KB ( { compression_ratio : .1f } % reduction) " )
return compressed_bytes , output_mime
except Exception as e :
print ( f " [SeeImage] Failed to compress image: { str ( e ) } . Using original. " )
return image_bytes , mime_type
2025-06-30 01:14:07 +08:00
def is_url ( self , file_path : str ) - > bool :
""" check if the file path is url """
parsed_url = urlparse ( file_path )
return parsed_url . scheme in ( ' http ' , ' https ' )
def download_image_from_url ( self , url : str ) - > Tuple [ bytes , str ] :
""" Download image from a URL """
try :
headers = {
" User-Agent " : " Mozilla/5.0 " # Some servers block default Python
}
# HEAD request to get the image size
head_response = requests . head ( url , timeout = 10 , headers = headers , stream = True )
head_response . raise_for_status ( )
# Check content length
content_length = int ( head_response . headers . get ( ' Content-Length ' ) )
if content_length and content_length > MAX_IMAGE_SIZE :
raise Exception ( f " Image is too large ( { ( content_length ) / ( 1024 * 1024 ) : .2f } MB) for the maximum allowed size of { MAX_IMAGE_SIZE / ( 1024 * 1024 ) : .2f } MB " )
# Download the image
response = requests . get ( url , timeout = 10 , headers = headers , stream = True )
response . raise_for_status ( )
image_bytes = response . content
if len ( image_bytes ) > MAX_IMAGE_SIZE :
raise Exception ( f " Downloaded image is too large ( { ( len ( image_bytes ) ) / ( 1024 * 1024 ) : .2f } MB). Maximum allowed size of { MAX_IMAGE_SIZE / ( 1024 * 1024 ) : .2f } MB " )
# Get MIME type
mime_type = response . headers . get ( ' Content-Type ' )
if not mime_type or not mime_type . startswith ( ' image/ ' ) :
raise Exception ( f " URL does not point to an image (Content-Type: { mime_type } ): { url } " )
return image_bytes , mime_type
except Exception as e :
return self . fail_response ( f " Failed to download image from URL: { str ( e ) } " )
2025-04-28 08:28:21 +08:00
@openapi_schema ( {
" type " : " function " ,
" function " : {
" name " : " see_image " ,
2025-06-30 01:14:07 +08:00
" description " : " Allows the agent to ' see ' an image file located in the /workspace directory or from a URL. Provide either a relative path to a local image or the URL to an image. The image will be compressed before sending to reduce token usage. The image content will be made available in the next turn ' s context. " ,
2025-04-28 08:28:21 +08:00
" parameters " : {
" type " : " object " ,
" properties " : {
" file_path " : {
" type " : " string " ,
2025-06-30 01:14:07 +08:00
" description " : " Either a relative path to the image file within the /workspace directory (e.g., ' screenshots/image.png ' ) or a URL to an image (e.g., ' https://example.com/image.jpg ' ). Supported formats: JPG, PNG, GIF, WEBP. Max size: 10MB. "
2025-04-28 08:28:21 +08:00
}
} ,
" required " : [ " file_path " ]
}
}
} )
2025-07-31 04:12:11 +08:00
@usage_example ( '''
2025-06-30 01:14:07 +08:00
< ! - - Example : Request to see a local image named ' diagram.png ' inside the ' docs ' folder - - >
2025-05-28 20:07:54 +08:00
< function_calls >
< invoke name = " see_image " >
< parameter name = " file_path " > docs / diagram . png < / parameter >
< / invoke >
< / function_calls >
2025-06-30 01:14:07 +08:00
< ! - - Example : Request to see an image from a URL - - >
< function_calls >
< invoke name = " see_image " >
< parameter name = " file_path " > https : / / example . com / image . jpg < / parameter >
< / invoke >
< / function_calls >
2025-07-31 04:12:11 +08:00
''' )
2025-04-28 08:28:21 +08:00
async def see_image ( self , file_path : str ) - > ToolResult :
2025-06-30 01:14:07 +08:00
""" Reads an image file from local file system or from a URL, compresses it, converts it to base64, and adds it as a temporary message. """
2025-04-28 08:28:21 +08:00
try :
2025-06-30 01:14:07 +08:00
is_url = self . is_url ( file_path )
if is_url :
try :
image_bytes , mime_type = self . download_image_from_url ( file_path )
original_size = len ( image_bytes )
cleaned_path = file_path
except Exception as e :
return self . fail_response ( f " Failed to download image from URL: { str ( e ) } " )
else :
# Ensure sandbox is initialized
await self . _ensure_sandbox ( )
# Clean and construct full path
cleaned_path = self . clean_path ( file_path )
full_path = f " { self . workspace_path } / { cleaned_path } "
# Check if file exists and get info
try :
2025-07-04 23:42:53 +08:00
file_info = await self . sandbox . fs . get_file_info ( full_path )
2025-06-30 01:14:07 +08:00
if file_info . is_dir :
return self . fail_response ( f " Path ' { cleaned_path } ' is a directory, not an image file. " )
except Exception as e :
return self . fail_response ( f " Image file not found at path: ' { cleaned_path } ' " )
# Check file size
if file_info . size > MAX_IMAGE_SIZE :
return self . fail_response ( f " Image file ' { cleaned_path } ' is too large ( { file_info . size / ( 1024 * 1024 ) : .2f } MB). Maximum size is { MAX_IMAGE_SIZE / ( 1024 * 1024 ) } MB. " )
# Read image file content
try :
2025-07-04 23:42:53 +08:00
image_bytes = await self . sandbox . fs . download_file ( full_path )
2025-06-30 01:14:07 +08:00
except Exception as e :
return self . fail_response ( f " Could not read image file: { cleaned_path } " )
# Determine MIME type
mime_type , _ = mimetypes . guess_type ( full_path )
if not mime_type or not mime_type . startswith ( ' image/ ' ) :
# Basic fallback based on extension if mimetypes fails
ext = os . path . splitext ( cleaned_path ) [ 1 ] . lower ( )
if ext == ' .jpg ' or ext == ' .jpeg ' : mime_type = ' image/jpeg '
elif ext == ' .png ' : mime_type = ' image/png '
elif ext == ' .gif ' : mime_type = ' image/gif '
elif ext == ' .webp ' : mime_type = ' image/webp '
else :
return self . fail_response ( f " Unsupported or unknown image format for file: ' { cleaned_path } ' . Supported: JPG, PNG, GIF, WEBP. " )
original_size = file_info . size
2025-04-28 08:28:21 +08:00
2025-05-25 17:51:20 +08:00
# Compress the image
compressed_bytes , compressed_mime_type = self . compress_image ( image_bytes , mime_type , cleaned_path )
# Check if compressed image is still too large
if len ( compressed_bytes ) > MAX_COMPRESSED_SIZE :
return self . fail_response ( f " Image file ' { cleaned_path } ' is still too large after compression ( { len ( compressed_bytes ) / ( 1024 * 1024 ) : .2f } MB). Maximum compressed size is { MAX_COMPRESSED_SIZE / ( 1024 * 1024 ) } MB. " )
# Convert to base64
base64_image = base64 . b64encode ( compressed_bytes ) . decode ( ' utf-8 ' )
2025-04-28 08:28:21 +08:00
# Prepare the temporary message content
image_context_data = {
2025-05-25 17:51:20 +08:00
" mime_type " : compressed_mime_type ,
2025-04-28 08:28:21 +08:00
" base64 " : base64_image ,
2025-05-25 17:51:20 +08:00
" file_path " : cleaned_path , # Include path for context
2025-06-30 01:14:07 +08:00
" original_size " : original_size ,
2025-05-25 17:51:20 +08:00
" compressed_size " : len ( compressed_bytes )
2025-04-28 08:28:21 +08:00
}
# Add the temporary message using the thread_manager callback
# Use a distinct type like 'image_context'
await self . thread_manager . add_message (
thread_id = self . thread_id ,
type = " image_context " , # Use a specific type for this
content = image_context_data , # Store the dict directly
is_llm_message = False # This is context generated by a tool
)
# Inform the agent the image will be available next turn
2025-06-30 01:14:07 +08:00
return self . success_response ( f " Successfully loaded and compressed the image ' { cleaned_path } ' (reduced from { original_size / 1024 : .1f } KB to { len ( compressed_bytes ) / 1024 : .1f } KB). " )
2025-04-28 08:28:21 +08:00
except Exception as e :
return self . fail_response ( f " An unexpected error occurred while trying to see the image: { str ( e ) } " )