mirror of https://github.com/kortix-ai/suna.git
rm image from context
This commit is contained in:
parent
ae04dddf65
commit
a5d8edabc9
|
@ -69,23 +69,79 @@ class ImageContextManager:
|
|||
}).execute()
|
||||
result = db_result.data[0] if db_result.data and len(db_result.data) > 0 else None
|
||||
|
||||
if result:
|
||||
logger.debug(f"Added image to context: {file_path}")
|
||||
return result
|
||||
else:
|
||||
if not result:
|
||||
logger.error("Failed to insert image message")
|
||||
return None
|
||||
|
||||
logger.debug(f"Added image to context: {file_path}")
|
||||
|
||||
# ===== CRITICAL: Add image context management instruction =====
|
||||
# This hardcoded instruction ensures images are properly managed and removed
|
||||
context_instruction = {
|
||||
"role": "user",
|
||||
"content": """⚠️ IMPORTANT - IMAGE CONTEXT MANAGEMENT:
|
||||
|
||||
You are now viewing an image that has been loaded into context. Due to context window limitations, this image WILL BE AUTOMATICALLY REMOVED after you analyze it.
|
||||
|
||||
REQUIRED ACTIONS:
|
||||
1. **Analyze the image thoroughly** - Look at all details, text, UI elements, colors, layout, etc.
|
||||
2. **Write a DETAILED SUMMARY** - Describe what you see in comprehensive detail so you can reference it later. Include:
|
||||
- All visible text and labels
|
||||
- UI components and their states
|
||||
- Colors, layout, and visual hierarchy
|
||||
- Any errors, warnings, or important information
|
||||
- Relationships between elements
|
||||
3. **Call clear_images_from_context** - You MUST call this tool after your analysis to free up context tokens
|
||||
|
||||
WHY THIS MATTERS:
|
||||
- Images consume significant context tokens
|
||||
- You will NOT see this image again after it's cleared (unless explicitly reloaded with load_image)
|
||||
- Your written summary is your only future reference to this image
|
||||
- Failing to clear images will cause context overflow
|
||||
|
||||
REMEMBER: Be thorough in your summary - it's your permanent record of what you saw!"""
|
||||
}
|
||||
|
||||
context_instruction_metadata = {
|
||||
"image_context": True,
|
||||
"instruction_type": "context_management",
|
||||
"related_file": file_path
|
||||
}
|
||||
|
||||
# Add the context management instruction
|
||||
if self.thread_manager:
|
||||
await self.thread_manager.add_message(
|
||||
thread_id=thread_id,
|
||||
type='user',
|
||||
content=context_instruction,
|
||||
is_llm_message=True,
|
||||
metadata=context_instruction_metadata
|
||||
)
|
||||
else:
|
||||
# Fallback to direct DB access
|
||||
client = await self.db.client
|
||||
await client.table('messages').insert({
|
||||
'thread_id': thread_id,
|
||||
'type': 'user',
|
||||
'content': context_instruction,
|
||||
'is_llm_message': True,
|
||||
'metadata': context_instruction_metadata
|
||||
}).execute()
|
||||
|
||||
logger.debug(f"Added context management instruction for image: {file_path}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to add image to context: {str(e)}", exc_info=True)
|
||||
return None
|
||||
|
||||
async def clear_images_from_context(self, thread_id: str) -> int:
|
||||
"""Remove all image context messages from a thread."""
|
||||
"""Remove all image context messages from a thread, including images and their management instructions."""
|
||||
try:
|
||||
client = await self.db.client
|
||||
|
||||
# Delete all messages with image_context metadata
|
||||
# Delete all messages with image_context metadata (includes both images and instructions)
|
||||
result = await client.table('messages').delete().eq(
|
||||
'thread_id', thread_id
|
||||
).eq(
|
||||
|
@ -95,7 +151,7 @@ class ImageContextManager:
|
|||
).execute()
|
||||
|
||||
deleted_count = len(result.data) if result.data else 0
|
||||
logger.debug(f"Cleared {deleted_count} images from context")
|
||||
logger.debug(f"Cleared {deleted_count} image-related messages from context (images + instructions)")
|
||||
return deleted_count
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
@ -165,11 +165,9 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
os.unlink(temp_svg_path)
|
||||
|
||||
except ImportError:
|
||||
print(f"[SeeImage] SVG conversion not available - using original SVG file '{file_path}'")
|
||||
return image_bytes, mime_type
|
||||
raise Exception(f"SVG conversion libraries not available. Cannot display SVG file '{file_path}'. Please convert to PNG manually.")
|
||||
except Exception as e:
|
||||
print(f"[SeeImage] SVG conversion failed - using original SVG file '{file_path}': {str(e)}")
|
||||
return image_bytes, mime_type
|
||||
raise Exception(f"SVG conversion failed for '{file_path}': {str(e)}. Please convert to PNG manually.")
|
||||
|
||||
# Open image from bytes
|
||||
img = Image.open(BytesIO(image_bytes))
|
||||
|
@ -220,8 +218,14 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
return compressed_bytes, output_mime
|
||||
|
||||
except Exception as e:
|
||||
print(f"[SeeImage] Failed to compress image: {str(e)}. Using original.")
|
||||
return image_bytes, mime_type
|
||||
# CRITICAL: Never return unsupported formats
|
||||
# If compression fails, we need to ensure we still return a supported format
|
||||
if mime_type in ['image/jpeg', 'image/png', 'image/gif', 'image/webp']:
|
||||
print(f"[SeeImage] Failed to compress image: {str(e)}. Using original (format is supported).")
|
||||
return image_bytes, mime_type
|
||||
else:
|
||||
# Unsupported format and compression failed - must fail
|
||||
raise Exception(f"Failed to process image '{file_path}' with unsupported format '{mime_type}': {str(e)}")
|
||||
|
||||
def is_url(self, file_path: str) -> bool:
|
||||
"""check if the file path is url"""
|
||||
|
@ -265,7 +269,7 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
"type": "function",
|
||||
"function": {
|
||||
"name": "load_image",
|
||||
"description": "Loads an image file into conversation context from the /workspace directory or from a URL. Provide either a relative path to a local image or the URL to an image. The image will be compressed before sending to reduce token usage. IMPORTANT: If you previously loaded an image but cleared context, you can load it again by calling this tool with the same file path - no need to ask user to re-upload.",
|
||||
"description": "Loads an image file into conversation context from the /workspace directory or from a URL. CRITICAL: After loading, you MUST analyze the image thoroughly, write a detailed summary, and then call clear_images_from_context to free context tokens. Images consume significant tokens and must be actively managed. You can reload any image later with the same file path if needed.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -353,6 +357,15 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
print(f"[SeeImage] Warning: Could not save converted PNG to sandbox: {e}")
|
||||
# Continue with original path if save fails
|
||||
|
||||
# CRITICAL: Validate MIME type before upload - Anthropic only accepts 4 formats
|
||||
SUPPORTED_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']
|
||||
if compressed_mime_type not in SUPPORTED_MIME_TYPES:
|
||||
return self.fail_response(
|
||||
f"Invalid image format '{compressed_mime_type}' after compression. "
|
||||
f"Only {', '.join(SUPPORTED_MIME_TYPES)} are supported for viewing by the AI. "
|
||||
f"Original file: '{cleaned_path}'. Please convert the image to a supported format."
|
||||
)
|
||||
|
||||
# Upload to Supabase Storage instead of base64
|
||||
try:
|
||||
# Generate unique filename
|
||||
|
@ -418,7 +431,7 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
"type": "function",
|
||||
"function": {
|
||||
"name": "clear_images_from_context",
|
||||
"description": "Clears all images from conversation memory. Use when done analyzing images or to free up context tokens. IMPORTANT: Files remain accessible - use load_image with the same path to load any image again instead of asking user to re-upload.",
|
||||
"description": "REQUIRED after viewing images: Removes all images and their instructions from context to free up tokens. You MUST call this after analyzing images. The image files remain accessible in the sandbox - you can reload them later with load_image if needed. This is critical for context management.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
|
@ -435,7 +448,13 @@ class SandboxVisionTool(SandboxToolsBase):
|
|||
deleted_count = await self.image_context_manager.clear_images_from_context(self.thread_id)
|
||||
|
||||
if deleted_count > 0:
|
||||
return self.success_response(f"Successfully cleared {deleted_count} image(s) from conversation context. Visual memory has been reset.")
|
||||
# Typically 2 messages per image: the image itself + the context instruction
|
||||
image_count = deleted_count // 2
|
||||
return self.success_response(
|
||||
f"Successfully cleared approximately {image_count} image(s) and their instructions from conversation context "
|
||||
f"({deleted_count} total messages removed). Context tokens freed up. "
|
||||
f"You can reload any image again using load_image if needed."
|
||||
)
|
||||
else:
|
||||
return self.success_response("No images found in conversation context to clear.")
|
||||
|
||||
|
|
Loading…
Reference in New Issue