suna/backend/agent/tools/sb_image_edit_tool.py

from typing import Optional
from agentpress.tool import ToolResult, openapi_schema, usage_example
from sandbox.tool_base import SandboxToolsBase
from agentpress.thread_manager import ThreadManager
import httpx
from io import BytesIO
import uuid
from litellm import aimage_generation, aimage_edit
import base64


class SandboxImageEditTool(SandboxToolsBase):
    """Tool for generating or editing images using OpenAI GPT Image 1 via OpenAI SDK (no mask support)."""

    def __init__(self, project_id: str, thread_id: str, thread_manager: ThreadManager):
        super().__init__(project_id, thread_manager)
        self.thread_id = thread_id
        self.thread_manager = thread_manager

    @openapi_schema(
        {
            "type": "function",
            "function": {
                "name": "image_edit_or_generate",
                "description": "Generate a new image from a prompt, or edit an existing image (no mask support) using OpenAI GPT Image 1 via OpenAI SDK. Stores the result in the thread context.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "mode": {
                            "type": "string",
                            "enum": ["generate", "edit"],
                            "description": "'generate' to create a new image from a prompt, 'edit' to edit an existing image.",
                        },
                        "prompt": {
                            "type": "string",
                            "description": "Text prompt describing the desired image or edit.",
                        },
                        "image_path": {
                            "type": "string",
                            "description": "(edit mode only) Path to the image file to edit. Can be: 1) Relative path to /workspace (e.g., 'generated_image_abc123.png'), or 2) Full URL (e.g., 'https://example.com/image.png'). Required when mode='edit'.",
                        },
                    },
                    "required": ["mode", "prompt"],
                },
            },
        }
    )
    @usage_example("""
        Generate mode example (new image):
        <function_calls>
        <invoke name="image_edit_or_generate">
        <parameter name="mode">generate</parameter>
        <parameter name="prompt">A futuristic cityscape at sunset</parameter>
        </invoke>
        </function_calls>
        
        Edit mode example (modifying existing):
        <function_calls>
        <invoke name="image_edit_or_generate">
        <parameter name="mode">edit</parameter>
        <parameter name="prompt">Add a red hat to the person in the image</parameter>
        <parameter name="image_path">generated_image_abc123.png</parameter>
        </invoke>
        </function_calls>
        
        Multi-turn workflow (follow-up edits):
        1. User: "Create a logo" → generate mode
        2. User: "Make it more colorful" → edit mode (automatic)
        3. User: "Add text to it" → edit mode (automatic)
        """)
    async def image_edit_or_generate(
        self,
        mode: str,
        prompt: str,
        image_path: Optional[str] = None,
    ) -> ToolResult:
        """Generate or edit images using OpenAI GPT Image 1 via OpenAI SDK (no mask support)."""
        try:
            await self._ensure_sandbox()

            if mode == "generate":
                response = await aimage_generation(
                    model="gpt-image-1",
                    prompt=prompt,
                    n=1,
                    size="1024x1024",
                )
            elif mode == "edit":
                if not image_path:
                    return self.fail_response("'image_path' is required for edit mode.")

                image_bytes = await self._get_image_bytes(image_path)
                if isinstance(image_bytes, ToolResult):  # Error occurred
                    return image_bytes

                # Create BytesIO object with proper filename to set MIME type
                image_io = BytesIO(image_bytes)
                image_io.name = (
                    "image.png"  # Set filename to ensure proper MIME type detection
                )

                response = await aimage_edit(
                    image=[image_io],  # Type in the LiteLLM SDK is wrong
                    prompt=prompt,
                    model="gpt-image-1",
                    n=1,
                    size="1024x1024",
                )
            else:
                return self.fail_response("Invalid mode. Use 'generate' or 'edit'.")

            # Download and save the generated image to sandbox
            image_filename = await self._process_image_response(response)
            if isinstance(image_filename, ToolResult):  # Error occurred
                return image_filename

            return self.success_response(
                f"Successfully generated image using mode '{mode}'. Image saved as: {image_filename}. You can use the ask tool to display the image."
            )

        except Exception as e:
            return self.fail_response(
                f"An error occurred during image generation/editing: {str(e)}"
            )

    async def _get_image_bytes(self, image_path: str) -> bytes | ToolResult:
        """Get image bytes from URL or local file path."""
        if image_path.startswith(("http://", "https://")):
            return await self._download_image_from_url(image_path)
        else:
            return await self._read_image_from_sandbox(image_path)

    async def _download_image_from_url(self, url: str) -> bytes | ToolResult:
        """Download image from URL."""
        try:
            async with httpx.AsyncClient() as client:
                response = await client.get(url)
                response.raise_for_status()
                return response.content
        except Exception:
            return self.fail_response(f"Could not download image from URL: {url}")

    async def _read_image_from_sandbox(self, image_path: str) -> bytes | ToolResult:
        """Read image from sandbox filesystem."""
        try:
            cleaned_path = self.clean_path(image_path)
            full_path = f"{self.workspace_path}/{cleaned_path}"

            # Check if file exists and is not a directory
            file_info = await self.sandbox.fs.get_file_info(full_path)
            if file_info.is_dir:
                return self.fail_response(
                    f"Path '{cleaned_path}' is a directory, not an image file."
                )

            return await self.sandbox.fs.download_file(full_path)

        except Exception as e:
            return self.fail_response(
                f"Could not read image file from sandbox: {image_path} - {str(e)}"
            )

    async def _process_image_response(self, response) -> str | ToolResult:
        """Download generated image and save to sandbox with random name."""
        try:
            original_b64_str = response.data[0].b64_json
            # Decode base64 image data
            image_data = base64.b64decode(original_b64_str)

            # Generate random filename
            random_filename = f"generated_image_{uuid.uuid4().hex[:8]}.png"
            sandbox_path = f"{self.workspace_path}/{random_filename}"

            # Save image to sandbox
            await self.sandbox.fs.upload_file(image_data, sandbox_path)
            return random_filename

        except Exception as e:
            return self.fail_response(f"Failed to download and save image: {str(e)}")
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00			`from typing import Optional`
update tool decorator to have usage_example 2025-07-31 04:12:11 +08:00			`from agentpress.tool import ToolResult, openapi_schema, usage_example`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00			`from sandbox.tool_base import SandboxToolsBase`
			`from agentpress.thread_manager import ThreadManager`
			`import httpx`
			`from io import BytesIO`
			`import uuid`
Update server configuration and dependencies - Increased the number of workers from 1 to 4 in the server startup configuration for improved performance. - Upgraded the OpenAI dependency from version 1.72.0 to 1.90.0 in `pyproject.toml` and `uv.lock`. - Refactored image editing tool to utilize the new OpenAI GPT Image 1 model, updating method calls and documentation accordingly. 2025-06-24 01:36:31 +08:00			`from litellm import aimage_generation, aimage_edit`
fix(sb_image_edit_tool): update image processing to use base64 data instead of URL 2025-07-05 20:39:55 +08:00			`import base64`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00

			`class SandboxImageEditTool(SandboxToolsBase):`
Update server configuration and dependencies - Increased the number of workers from 1 to 4 in the server startup configuration for improved performance. - Upgraded the OpenAI dependency from version 1.72.0 to 1.90.0 in `pyproject.toml` and `uv.lock`. - Refactored image editing tool to utilize the new OpenAI GPT Image 1 model, updating method calls and documentation accordingly. 2025-06-24 01:36:31 +08:00			`"""Tool for generating or editing images using OpenAI GPT Image 1 via OpenAI SDK (no mask support)."""`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00
			`def __init__(self, project_id: str, thread_id: str, thread_manager: ThreadManager):`
			`super().__init__(project_id, thread_manager)`
			`self.thread_id = thread_id`
			`self.thread_manager = thread_manager`

			`@openapi_schema(`
			`{`
			`"type": "function",`
			`"function": {`
			`"name": "image_edit_or_generate",`
Update server configuration and dependencies - Increased the number of workers from 1 to 4 in the server startup configuration for improved performance. - Upgraded the OpenAI dependency from version 1.72.0 to 1.90.0 in `pyproject.toml` and `uv.lock`. - Refactored image editing tool to utilize the new OpenAI GPT Image 1 model, updating method calls and documentation accordingly. 2025-06-24 01:36:31 +08:00			`"description": "Generate a new image from a prompt, or edit an existing image (no mask support) using OpenAI GPT Image 1 via OpenAI SDK. Stores the result in the thread context.",`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00			`"parameters": {`
			`"type": "object",`
			`"properties": {`
			`"mode": {`
			`"type": "string",`
			`"enum": ["generate", "edit"],`
			`"description": "'generate' to create a new image from a prompt, 'edit' to edit an existing image.",`
			`},`
			`"prompt": {`
			`"type": "string",`
			`"description": "Text prompt describing the desired image or edit.",`
			`},`
			`"image_path": {`
			`"type": "string",`
standardise tool select component, fix sb image edit&gen tool 2025-08-15 09:01:24 +08:00			`"description": "(edit mode only) Path to the image file to edit. Can be: 1) Relative path to /workspace (e.g., 'generated_image_abc123.png'), or 2) Full URL (e.g., 'https://example.com/image.png'). Required when mode='edit'.",`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00			`},`
			`},`
			`"required": ["mode", "prompt"],`
			`},`
			`},`
			`}`
			`)`
update tool decorator to have usage_example 2025-07-31 04:12:11 +08:00			`@usage_example("""`
standardise tool select component, fix sb image edit&gen tool 2025-08-15 09:01:24 +08:00			`Generate mode example (new image):`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00			`<function_calls>`
			`<invoke name="image_edit_or_generate">`
			`<parameter name="mode">generate</parameter>`
			`<parameter name="prompt">A futuristic cityscape at sunset</parameter>`
			`</invoke>`
			`</function_calls>`
standardise tool select component, fix sb image edit&gen tool 2025-08-15 09:01:24 +08:00
			`Edit mode example (modifying existing):`
			`<function_calls>`
			`<invoke name="image_edit_or_generate">`
			`<parameter name="mode">edit</parameter>`
			`<parameter name="prompt">Add a red hat to the person in the image</parameter>`
			`<parameter name="image_path">generated_image_abc123.png</parameter>`
			`</invoke>`
			`</function_calls>`

			`Multi-turn workflow (follow-up edits):`
			`1. User: "Create a logo" → generate mode`
			`2. User: "Make it more colorful" → edit mode (automatic)`
			`3. User: "Add text to it" → edit mode (automatic)`
update tool decorator to have usage_example 2025-07-31 04:12:11 +08:00			`""")`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00			`async def image_edit_or_generate(`
			`self,`
			`mode: str,`
			`prompt: str,`
			`image_path: Optional[str] = None,`
			`) -> ToolResult:`
Update server configuration and dependencies - Increased the number of workers from 1 to 4 in the server startup configuration for improved performance. - Upgraded the OpenAI dependency from version 1.72.0 to 1.90.0 in `pyproject.toml` and `uv.lock`. - Refactored image editing tool to utilize the new OpenAI GPT Image 1 model, updating method calls and documentation accordingly. 2025-06-24 01:36:31 +08:00			`"""Generate or edit images using OpenAI GPT Image 1 via OpenAI SDK (no mask support)."""`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00			`try:`
			`await self._ensure_sandbox()`

			`if mode == "generate":`
Update server configuration and dependencies - Increased the number of workers from 1 to 4 in the server startup configuration for improved performance. - Upgraded the OpenAI dependency from version 1.72.0 to 1.90.0 in `pyproject.toml` and `uv.lock`. - Refactored image editing tool to utilize the new OpenAI GPT Image 1 model, updating method calls and documentation accordingly. 2025-06-24 01:36:31 +08:00			`response = await aimage_generation(`
			`model="gpt-image-1",`
			`prompt=prompt,`
			`n=1,`
			`size="1024x1024",`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00			`)`
			`elif mode == "edit":`
			`if not image_path:`
			`return self.fail_response("'image_path' is required for edit mode.")`

			`image_bytes = await self._get_image_bytes(image_path)`
			`if isinstance(image_bytes, ToolResult): # Error occurred`
			`return image_bytes`

			`# Create BytesIO object with proper filename to set MIME type`
			`image_io = BytesIO(image_bytes)`
Update server configuration and dependencies - Increased the number of workers from 1 to 4 in the server startup configuration for improved performance. - Upgraded the OpenAI dependency from version 1.72.0 to 1.90.0 in `pyproject.toml` and `uv.lock`. - Refactored image editing tool to utilize the new OpenAI GPT Image 1 model, updating method calls and documentation accordingly. 2025-06-24 01:36:31 +08:00			`image_io.name = (`
			`"image.png" # Set filename to ensure proper MIME type detection`
			`)`

			`response = await aimage_edit(`
fix(sb_image_edit_tool): update image processing to use base64 data instead of URL 2025-07-05 20:39:55 +08:00			`image=[image_io], # Type in the LiteLLM SDK is wrong`
Update server configuration and dependencies - Increased the number of workers from 1 to 4 in the server startup configuration for improved performance. - Upgraded the OpenAI dependency from version 1.72.0 to 1.90.0 in `pyproject.toml` and `uv.lock`. - Refactored image editing tool to utilize the new OpenAI GPT Image 1 model, updating method calls and documentation accordingly. 2025-06-24 01:36:31 +08:00			`prompt=prompt,`
			`model="gpt-image-1",`
			`n=1,`
			`size="1024x1024",`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00			`)`
			`else:`
			`return self.fail_response("Invalid mode. Use 'generate' or 'edit'.")`

			`# Download and save the generated image to sandbox`
			`image_filename = await self._process_image_response(response)`
			`if isinstance(image_filename, ToolResult): # Error occurred`
			`return image_filename`

			`return self.success_response(`
			`f"Successfully generated image using mode '{mode}'. Image saved as: {image_filename}. You can use the ask tool to display the image."`
			`)`

			`except Exception as e:`
			`return self.fail_response(`
			`f"An error occurred during image generation/editing: {str(e)}"`
			`)`

			`async def _get_image_bytes(self, image_path: str) -> bytes \| ToolResult:`
			`"""Get image bytes from URL or local file path."""`
			`if image_path.startswith(("http://", "https://")):`
			`return await self._download_image_from_url(image_path)`
			`else:`
			`return await self._read_image_from_sandbox(image_path)`

			`async def _download_image_from_url(self, url: str) -> bytes \| ToolResult:`
			`"""Download image from URL."""`
			`try:`
			`async with httpx.AsyncClient() as client:`
			`response = await client.get(url)`
			`response.raise_for_status()`
			`return response.content`
			`except Exception:`
			`return self.fail_response(f"Could not download image from URL: {url}")`

			`async def _read_image_from_sandbox(self, image_path: str) -> bytes \| ToolResult:`
			`"""Read image from sandbox filesystem."""`
			`try:`
			`cleaned_path = self.clean_path(image_path)`
			`full_path = f"{self.workspace_path}/{cleaned_path}"`

			`# Check if file exists and is not a directory`
refactor(sb_image_edit_tool): convert file operations to async for improved performance 2025-07-05 19:00:00 +08:00			`file_info = await self.sandbox.fs.get_file_info(full_path)`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00			`if file_info.is_dir:`
			`return self.fail_response(`
			`f"Path '{cleaned_path}' is a directory, not an image file."`
			`)`

refactor(sb_image_edit_tool): convert file operations to async for improved performance 2025-07-05 19:00:00 +08:00			`return await self.sandbox.fs.download_file(full_path)`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00
			`except Exception as e:`
			`return self.fail_response(`
			`f"Could not read image file from sandbox: {image_path} - {str(e)}"`
			`)`

			`async def _process_image_response(self, response) -> str \| ToolResult:`
			`"""Download generated image and save to sandbox with random name."""`
			`try:`
fix(sb_image_edit_tool): update image processing to use base64 data instead of URL 2025-07-05 20:39:55 +08:00			`original_b64_str = response.data[0].b64_json`
			`# Decode base64 image data`
			`image_data = base64.b64decode(original_b64_str)`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00
			`# Generate random filename`
			`random_filename = f"generated_image_{uuid.uuid4().hex[:8]}.png"`
			`sandbox_path = f"{self.workspace_path}/{random_filename}"`

			`# Save image to sandbox`
fix(sb_image_edit_tool): update image processing to use base64 data instead of URL 2025-07-05 20:39:55 +08:00			`await self.sandbox.fs.upload_file(image_data, sandbox_path)`
feat(image-editing): introduce image generation and editing tool with updated documentation 2025-06-09 09:38:50 +08:00			`return random_filename`

			`except Exception as e:`
			`return self.fail_response(f"Failed to download and save image: {str(e)}")`