2025-06-09 09:38:50 +08:00
from typing import Optional
2025-07-31 04:12:11 +08:00
from agentpress . tool import ToolResult , openapi_schema , usage_example
2025-06-09 09:38:50 +08:00
from sandbox . tool_base import SandboxToolsBase
from agentpress . thread_manager import ThreadManager
import httpx
from io import BytesIO
import uuid
2025-06-24 01:36:31 +08:00
from litellm import aimage_generation , aimage_edit
2025-07-05 20:39:55 +08:00
import base64
2025-06-09 09:38:50 +08:00
class SandboxImageEditTool ( SandboxToolsBase ) :
2025-06-24 01:36:31 +08:00
""" Tool for generating or editing images using OpenAI GPT Image 1 via OpenAI SDK (no mask support). """
2025-06-09 09:38:50 +08:00
def __init__ ( self , project_id : str , thread_id : str , thread_manager : ThreadManager ) :
super ( ) . __init__ ( project_id , thread_manager )
self . thread_id = thread_id
self . thread_manager = thread_manager
@openapi_schema (
{
" type " : " function " ,
" function " : {
" name " : " image_edit_or_generate " ,
2025-06-24 01:36:31 +08:00
" description " : " Generate a new image from a prompt, or edit an existing image (no mask support) using OpenAI GPT Image 1 via OpenAI SDK. Stores the result in the thread context. " ,
2025-06-09 09:38:50 +08:00
" parameters " : {
" type " : " object " ,
" properties " : {
" mode " : {
" type " : " string " ,
" enum " : [ " generate " , " edit " ] ,
" description " : " ' generate ' to create a new image from a prompt, ' edit ' to edit an existing image. " ,
} ,
" prompt " : {
" type " : " string " ,
" description " : " Text prompt describing the desired image or edit. " ,
} ,
" image_path " : {
" type " : " string " ,
2025-08-15 09:01:24 +08:00
" description " : " (edit mode only) Path to the image file to edit. Can be: 1) Relative path to /workspace (e.g., ' generated_image_abc123.png ' ), or 2) Full URL (e.g., ' https://example.com/image.png ' ). Required when mode= ' edit ' . " ,
2025-06-09 09:38:50 +08:00
} ,
} ,
" required " : [ " mode " , " prompt " ] ,
} ,
} ,
}
)
2025-07-31 04:12:11 +08:00
@usage_example ( """
2025-08-15 09:01:24 +08:00
Generate mode example ( new image ) :
2025-06-09 09:38:50 +08:00
< function_calls >
< invoke name = " image_edit_or_generate " >
< parameter name = " mode " > generate < / parameter >
< parameter name = " prompt " > A futuristic cityscape at sunset < / parameter >
< / invoke >
< / function_calls >
2025-08-15 09:01:24 +08:00
Edit mode example ( modifying existing ) :
< function_calls >
< invoke name = " image_edit_or_generate " >
< parameter name = " mode " > edit < / parameter >
< parameter name = " prompt " > Add a red hat to the person in the image < / parameter >
< parameter name = " image_path " > generated_image_abc123 . png < / parameter >
< / invoke >
< / function_calls >
Multi - turn workflow ( follow - up edits ) :
1. User : " Create a logo " → generate mode
2. User : " Make it more colorful " → edit mode ( automatic )
3. User : " Add text to it " → edit mode ( automatic )
2025-07-31 04:12:11 +08:00
""" )
2025-06-09 09:38:50 +08:00
async def image_edit_or_generate (
self ,
mode : str ,
prompt : str ,
image_path : Optional [ str ] = None ,
) - > ToolResult :
2025-06-24 01:36:31 +08:00
""" Generate or edit images using OpenAI GPT Image 1 via OpenAI SDK (no mask support). """
2025-06-09 09:38:50 +08:00
try :
await self . _ensure_sandbox ( )
if mode == " generate " :
2025-06-24 01:36:31 +08:00
response = await aimage_generation (
model = " gpt-image-1 " ,
prompt = prompt ,
n = 1 ,
size = " 1024x1024 " ,
2025-06-09 09:38:50 +08:00
)
elif mode == " edit " :
if not image_path :
return self . fail_response ( " ' image_path ' is required for edit mode. " )
image_bytes = await self . _get_image_bytes ( image_path )
if isinstance ( image_bytes , ToolResult ) : # Error occurred
return image_bytes
# Create BytesIO object with proper filename to set MIME type
image_io = BytesIO ( image_bytes )
2025-06-24 01:36:31 +08:00
image_io . name = (
" image.png " # Set filename to ensure proper MIME type detection
)
response = await aimage_edit (
2025-07-05 20:39:55 +08:00
image = [ image_io ] , # Type in the LiteLLM SDK is wrong
2025-06-24 01:36:31 +08:00
prompt = prompt ,
model = " gpt-image-1 " ,
n = 1 ,
size = " 1024x1024 " ,
2025-06-09 09:38:50 +08:00
)
else :
return self . fail_response ( " Invalid mode. Use ' generate ' or ' edit ' . " )
# Download and save the generated image to sandbox
image_filename = await self . _process_image_response ( response )
if isinstance ( image_filename , ToolResult ) : # Error occurred
return image_filename
return self . success_response (
f " Successfully generated image using mode ' { mode } ' . Image saved as: { image_filename } . You can use the ask tool to display the image. "
)
except Exception as e :
return self . fail_response (
f " An error occurred during image generation/editing: { str ( e ) } "
)
async def _get_image_bytes ( self , image_path : str ) - > bytes | ToolResult :
""" Get image bytes from URL or local file path. """
if image_path . startswith ( ( " http:// " , " https:// " ) ) :
return await self . _download_image_from_url ( image_path )
else :
return await self . _read_image_from_sandbox ( image_path )
async def _download_image_from_url ( self , url : str ) - > bytes | ToolResult :
""" Download image from URL. """
try :
async with httpx . AsyncClient ( ) as client :
response = await client . get ( url )
response . raise_for_status ( )
return response . content
except Exception :
return self . fail_response ( f " Could not download image from URL: { url } " )
async def _read_image_from_sandbox ( self , image_path : str ) - > bytes | ToolResult :
""" Read image from sandbox filesystem. """
try :
cleaned_path = self . clean_path ( image_path )
full_path = f " { self . workspace_path } / { cleaned_path } "
# Check if file exists and is not a directory
2025-07-05 19:00:00 +08:00
file_info = await self . sandbox . fs . get_file_info ( full_path )
2025-06-09 09:38:50 +08:00
if file_info . is_dir :
return self . fail_response (
f " Path ' { cleaned_path } ' is a directory, not an image file. "
)
2025-07-05 19:00:00 +08:00
return await self . sandbox . fs . download_file ( full_path )
2025-06-09 09:38:50 +08:00
except Exception as e :
return self . fail_response (
f " Could not read image file from sandbox: { image_path } - { str ( e ) } "
)
async def _process_image_response ( self , response ) - > str | ToolResult :
""" Download generated image and save to sandbox with random name. """
try :
2025-07-05 20:39:55 +08:00
original_b64_str = response . data [ 0 ] . b64_json
# Decode base64 image data
image_data = base64 . b64decode ( original_b64_str )
2025-06-09 09:38:50 +08:00
# Generate random filename
random_filename = f " generated_image_ { uuid . uuid4 ( ) . hex [ : 8 ] } .png "
sandbox_path = f " { self . workspace_path } / { random_filename } "
# Save image to sandbox
2025-07-05 20:39:55 +08:00
await self . sandbox . fs . upload_file ( image_data , sandbox_path )
2025-06-09 09:38:50 +08:00
return random_filename
except Exception as e :
return self . fail_response ( f " Failed to download and save image: { str ( e ) } " )