2025-04-28 08:28:21 +08:00
import os
import base64
import mimetypes
from typing import Optional
from agentpress . tool import ToolResult , openapi_schema , xml_schema
2025-05-10 10:26:42 +08:00
from sandbox . tool_base import SandboxToolsBase
2025-04-28 08:28:21 +08:00
from agentpress . thread_manager import ThreadManager
import json
# Add common image MIME types if mimetypes module is limited
mimetypes . add_type ( " image/webp " , " .webp " )
mimetypes . add_type ( " image/jpeg " , " .jpg " )
mimetypes . add_type ( " image/jpeg " , " .jpeg " )
mimetypes . add_type ( " image/png " , " .png " )
mimetypes . add_type ( " image/gif " , " .gif " )
# Maximum file size in bytes (e.g., 5MB)
MAX_IMAGE_SIZE = 10 * 1024 * 1024
class SandboxVisionTool ( SandboxToolsBase ) :
""" Tool for allowing the agent to ' see ' images within the sandbox. """
def __init__ ( self , project_id : str , thread_id : str , thread_manager : ThreadManager ) :
super ( ) . __init__ ( project_id , thread_manager )
self . thread_id = thread_id
# Make thread_manager accessible within the tool instance
self . thread_manager = thread_manager
@openapi_schema ( {
" type " : " function " ,
" function " : {
" name " : " see_image " ,
" description " : " Allows the agent to ' see ' an image file located in the /workspace directory. Provide the relative path to the image. The image content will be made available in the next turn ' s context. " ,
" parameters " : {
" type " : " object " ,
" properties " : {
" file_path " : {
" type " : " string " ,
" description " : " The relative path to the image file within the /workspace directory (e.g., ' screenshots/image.png ' ). Supported formats: JPG, PNG, GIF, WEBP. Max size: 5MB. "
}
} ,
" required " : [ " file_path " ]
}
}
} )
@xml_schema (
tag_name = " see-image " ,
mappings = [
{ " param_name " : " file_path " , " node_type " : " attribute " , " path " : " . " }
] ,
example = '''
< ! - - Example : Request to see an image named ' diagram.png ' inside the ' docs ' folder - - >
< see - image file_path = " docs/diagram.png " > < / see - image >
'''
)
async def see_image ( self , file_path : str ) - > ToolResult :
""" Reads an image file, converts it to base64, and adds it as a temporary message. """
try :
# Ensure sandbox is initialized
await self . _ensure_sandbox ( )
# Clean and construct full path
cleaned_path = self . clean_path ( file_path )
full_path = f " { self . workspace_path } / { cleaned_path } "
# Check if file exists and get info
try :
file_info = self . sandbox . fs . get_file_info ( full_path )
if file_info . is_dir :
return self . fail_response ( f " Path ' { cleaned_path } ' is a directory, not an image file. " )
except Exception as e :
return self . fail_response ( f " Image file not found at path: ' { cleaned_path } ' " )
# Check file size
if file_info . size > MAX_IMAGE_SIZE :
return self . fail_response ( f " Image file ' { cleaned_path } ' is too large ( { file_info . size / ( 1024 * 1024 ) : .2f } MB). Maximum size is { MAX_IMAGE_SIZE / ( 1024 * 1024 ) } MB. " )
# Read image file content
try :
image_bytes = self . sandbox . fs . download_file ( full_path )
except Exception as e :
return self . fail_response ( f " Could not read image file: { cleaned_path } " )
# Convert to base64
base64_image = base64 . b64encode ( image_bytes ) . decode ( ' utf-8 ' )
# Determine MIME type
mime_type , _ = mimetypes . guess_type ( full_path )
if not mime_type or not mime_type . startswith ( ' image/ ' ) :
# Basic fallback based on extension if mimetypes fails
ext = os . path . splitext ( cleaned_path ) [ 1 ] . lower ( )
if ext == ' .jpg ' or ext == ' .jpeg ' : mime_type = ' image/jpeg '
elif ext == ' .png ' : mime_type = ' image/png '
elif ext == ' .gif ' : mime_type = ' image/gif '
elif ext == ' .webp ' : mime_type = ' image/webp '
else :
return self . fail_response ( f " Unsupported or unknown image format for file: ' { cleaned_path } ' . Supported: JPG, PNG, GIF, WEBP. " )
# Prepare the temporary message content
image_context_data = {
" mime_type " : mime_type ,
" base64 " : base64_image ,
" file_path " : cleaned_path # Include path for context
}
# Add the temporary message using the thread_manager callback
# Use a distinct type like 'image_context'
await self . thread_manager . add_message (
thread_id = self . thread_id ,
type = " image_context " , # Use a specific type for this
content = image_context_data , # Store the dict directly
is_llm_message = False # This is context generated by a tool
)
# Inform the agent the image will be available next turn
return self . success_response ( f " Successfully loaded the image ' { cleaned_path } ' . " )
except Exception as e :
return self . fail_response ( f " An unexpected error occurred while trying to see the image: { str ( e ) } " )