mirror of https://github.com/kortix-ai/suna.git
Merge pull request #1786 from KrishavRajSingh/main
rm image from context
This commit is contained in:
commit
cc5cc54ad9
|
@ -69,23 +69,79 @@ class ImageContextManager:
|
||||||
}).execute()
|
}).execute()
|
||||||
result = db_result.data[0] if db_result.data and len(db_result.data) > 0 else None
|
result = db_result.data[0] if db_result.data and len(db_result.data) > 0 else None
|
||||||
|
|
||||||
if result:
|
if not result:
|
||||||
logger.debug(f"Added image to context: {file_path}")
|
|
||||||
return result
|
|
||||||
else:
|
|
||||||
logger.error("Failed to insert image message")
|
logger.error("Failed to insert image message")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
logger.debug(f"Added image to context: {file_path}")
|
||||||
|
|
||||||
|
# ===== CRITICAL: Add image context management instruction =====
|
||||||
|
# This hardcoded instruction ensures images are properly managed and removed
|
||||||
|
context_instruction = {
|
||||||
|
"role": "user",
|
||||||
|
"content": """⚠️ IMPORTANT - IMAGE CONTEXT MANAGEMENT:
|
||||||
|
|
||||||
|
You are now viewing an image that has been loaded into context. Due to context window limitations, this image WILL BE AUTOMATICALLY REMOVED after you analyze it.
|
||||||
|
|
||||||
|
REQUIRED ACTIONS:
|
||||||
|
1. **Analyze the image thoroughly** - Look at all details, text, UI elements, colors, layout, etc.
|
||||||
|
2. **Write a DETAILED SUMMARY** - Describe what you see in comprehensive detail so you can reference it later. Include:
|
||||||
|
- All visible text and labels
|
||||||
|
- UI components and their states
|
||||||
|
- Colors, layout, and visual hierarchy
|
||||||
|
- Any errors, warnings, or important information
|
||||||
|
- Relationships between elements
|
||||||
|
3. **Call clear_images_from_context** - You MUST call this tool after your analysis to free up context tokens
|
||||||
|
|
||||||
|
WHY THIS MATTERS:
|
||||||
|
- Images consume significant context tokens
|
||||||
|
- You will NOT see this image again after it's cleared (unless explicitly reloaded with load_image)
|
||||||
|
- Your written summary is your only future reference to this image
|
||||||
|
- Failing to clear images will cause context overflow
|
||||||
|
|
||||||
|
REMEMBER: Be thorough in your summary - it's your permanent record of what you saw!"""
|
||||||
|
}
|
||||||
|
|
||||||
|
context_instruction_metadata = {
|
||||||
|
"image_context": True,
|
||||||
|
"instruction_type": "context_management",
|
||||||
|
"related_file": file_path
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add the context management instruction
|
||||||
|
if self.thread_manager:
|
||||||
|
await self.thread_manager.add_message(
|
||||||
|
thread_id=thread_id,
|
||||||
|
type='user',
|
||||||
|
content=context_instruction,
|
||||||
|
is_llm_message=True,
|
||||||
|
metadata=context_instruction_metadata
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Fallback to direct DB access
|
||||||
|
client = await self.db.client
|
||||||
|
await client.table('messages').insert({
|
||||||
|
'thread_id': thread_id,
|
||||||
|
'type': 'user',
|
||||||
|
'content': context_instruction,
|
||||||
|
'is_llm_message': True,
|
||||||
|
'metadata': context_instruction_metadata
|
||||||
|
}).execute()
|
||||||
|
|
||||||
|
logger.debug(f"Added context management instruction for image: {file_path}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to add image to context: {str(e)}", exc_info=True)
|
logger.error(f"Failed to add image to context: {str(e)}", exc_info=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def clear_images_from_context(self, thread_id: str) -> int:
|
async def clear_images_from_context(self, thread_id: str) -> int:
|
||||||
"""Remove all image context messages from a thread."""
|
"""Remove all image context messages from a thread, including images and their management instructions."""
|
||||||
try:
|
try:
|
||||||
client = await self.db.client
|
client = await self.db.client
|
||||||
|
|
||||||
# Delete all messages with image_context metadata
|
# Delete all messages with image_context metadata (includes both images and instructions)
|
||||||
result = await client.table('messages').delete().eq(
|
result = await client.table('messages').delete().eq(
|
||||||
'thread_id', thread_id
|
'thread_id', thread_id
|
||||||
).eq(
|
).eq(
|
||||||
|
@ -95,7 +151,7 @@ class ImageContextManager:
|
||||||
).execute()
|
).execute()
|
||||||
|
|
||||||
deleted_count = len(result.data) if result.data else 0
|
deleted_count = len(result.data) if result.data else 0
|
||||||
logger.debug(f"Cleared {deleted_count} images from context")
|
logger.debug(f"Cleared {deleted_count} image-related messages from context (images + instructions)")
|
||||||
return deleted_count
|
return deleted_count
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -165,11 +165,9 @@ class SandboxVisionTool(SandboxToolsBase):
|
||||||
os.unlink(temp_svg_path)
|
os.unlink(temp_svg_path)
|
||||||
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print(f"[SeeImage] SVG conversion not available - using original SVG file '{file_path}'")
|
raise Exception(f"SVG conversion libraries not available. Cannot display SVG file '{file_path}'. Please convert to PNG manually.")
|
||||||
return image_bytes, mime_type
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[SeeImage] SVG conversion failed - using original SVG file '{file_path}': {str(e)}")
|
raise Exception(f"SVG conversion failed for '{file_path}': {str(e)}. Please convert to PNG manually.")
|
||||||
return image_bytes, mime_type
|
|
||||||
|
|
||||||
# Open image from bytes
|
# Open image from bytes
|
||||||
img = Image.open(BytesIO(image_bytes))
|
img = Image.open(BytesIO(image_bytes))
|
||||||
|
@ -220,8 +218,14 @@ class SandboxVisionTool(SandboxToolsBase):
|
||||||
return compressed_bytes, output_mime
|
return compressed_bytes, output_mime
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[SeeImage] Failed to compress image: {str(e)}. Using original.")
|
# CRITICAL: Never return unsupported formats
|
||||||
return image_bytes, mime_type
|
# If compression fails, we need to ensure we still return a supported format
|
||||||
|
if mime_type in ['image/jpeg', 'image/png', 'image/gif', 'image/webp']:
|
||||||
|
print(f"[SeeImage] Failed to compress image: {str(e)}. Using original (format is supported).")
|
||||||
|
return image_bytes, mime_type
|
||||||
|
else:
|
||||||
|
# Unsupported format and compression failed - must fail
|
||||||
|
raise Exception(f"Failed to process image '{file_path}' with unsupported format '{mime_type}': {str(e)}")
|
||||||
|
|
||||||
def is_url(self, file_path: str) -> bool:
|
def is_url(self, file_path: str) -> bool:
|
||||||
"""check if the file path is url"""
|
"""check if the file path is url"""
|
||||||
|
@ -265,7 +269,7 @@ class SandboxVisionTool(SandboxToolsBase):
|
||||||
"type": "function",
|
"type": "function",
|
||||||
"function": {
|
"function": {
|
||||||
"name": "load_image",
|
"name": "load_image",
|
||||||
"description": "Loads an image file into conversation context from the /workspace directory or from a URL. Provide either a relative path to a local image or the URL to an image. The image will be compressed before sending to reduce token usage. IMPORTANT: If you previously loaded an image but cleared context, you can load it again by calling this tool with the same file path - no need to ask user to re-upload.",
|
"description": "Loads an image file into conversation context from the /workspace directory or from a URL. CRITICAL: After loading, you MUST analyze the image thoroughly, write a detailed summary, and then call clear_images_from_context to free context tokens. Images consume significant tokens and must be actively managed. You can reload any image later with the same file path if needed.",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -353,6 +357,15 @@ class SandboxVisionTool(SandboxToolsBase):
|
||||||
print(f"[SeeImage] Warning: Could not save converted PNG to sandbox: {e}")
|
print(f"[SeeImage] Warning: Could not save converted PNG to sandbox: {e}")
|
||||||
# Continue with original path if save fails
|
# Continue with original path if save fails
|
||||||
|
|
||||||
|
# CRITICAL: Validate MIME type before upload - Anthropic only accepts 4 formats
|
||||||
|
SUPPORTED_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']
|
||||||
|
if compressed_mime_type not in SUPPORTED_MIME_TYPES:
|
||||||
|
return self.fail_response(
|
||||||
|
f"Invalid image format '{compressed_mime_type}' after compression. "
|
||||||
|
f"Only {', '.join(SUPPORTED_MIME_TYPES)} are supported for viewing by the AI. "
|
||||||
|
f"Original file: '{cleaned_path}'. Please convert the image to a supported format."
|
||||||
|
)
|
||||||
|
|
||||||
# Upload to Supabase Storage instead of base64
|
# Upload to Supabase Storage instead of base64
|
||||||
try:
|
try:
|
||||||
# Generate unique filename
|
# Generate unique filename
|
||||||
|
@ -418,7 +431,7 @@ class SandboxVisionTool(SandboxToolsBase):
|
||||||
"type": "function",
|
"type": "function",
|
||||||
"function": {
|
"function": {
|
||||||
"name": "clear_images_from_context",
|
"name": "clear_images_from_context",
|
||||||
"description": "Clears all images from conversation memory. Use when done analyzing images or to free up context tokens. IMPORTANT: Files remain accessible - use load_image with the same path to load any image again instead of asking user to re-upload.",
|
"description": "REQUIRED after viewing images: Removes all images and their instructions from context to free up tokens. You MUST call this after analyzing images. The image files remain accessible in the sandbox - you can reload them later with load_image if needed. This is critical for context management.",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {},
|
"properties": {},
|
||||||
|
@ -435,7 +448,13 @@ class SandboxVisionTool(SandboxToolsBase):
|
||||||
deleted_count = await self.image_context_manager.clear_images_from_context(self.thread_id)
|
deleted_count = await self.image_context_manager.clear_images_from_context(self.thread_id)
|
||||||
|
|
||||||
if deleted_count > 0:
|
if deleted_count > 0:
|
||||||
return self.success_response(f"Successfully cleared {deleted_count} image(s) from conversation context. Visual memory has been reset.")
|
# Typically 2 messages per image: the image itself + the context instruction
|
||||||
|
image_count = deleted_count // 2
|
||||||
|
return self.success_response(
|
||||||
|
f"Successfully cleared approximately {image_count} image(s) and their instructions from conversation context "
|
||||||
|
f"({deleted_count} total messages removed). Context tokens freed up. "
|
||||||
|
f"You can reload any image again using load_image if needed."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
return self.success_response("No images found in conversation context to clear.")
|
return self.success_response("No images found in conversation context to clear.")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue