Merge pull request #1786 from KrishavRajSingh/main

rm image from context
This commit is contained in:
Krishav 2025-10-08 07:43:20 +05:30 committed by GitHub
commit cc5cc54ad9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 91 additions and 16 deletions

View File

@ -69,23 +69,79 @@ class ImageContextManager:
}).execute()
result = db_result.data[0] if db_result.data and len(db_result.data) > 0 else None
if result:
logger.debug(f"Added image to context: {file_path}")
return result
else:
if not result:
logger.error("Failed to insert image message")
return None
logger.debug(f"Added image to context: {file_path}")
# ===== CRITICAL: Add image context management instruction =====
# This hardcoded instruction ensures images are properly managed and removed
context_instruction = {
"role": "user",
"content": """⚠️ IMPORTANT - IMAGE CONTEXT MANAGEMENT:
You are now viewing an image that has been loaded into context. Due to context window limitations, this image WILL BE AUTOMATICALLY REMOVED after you analyze it.
REQUIRED ACTIONS:
1. **Analyze the image thoroughly** - Look at all details, text, UI elements, colors, layout, etc.
2. **Write a DETAILED SUMMARY** - Describe what you see in comprehensive detail so you can reference it later. Include:
- All visible text and labels
- UI components and their states
- Colors, layout, and visual hierarchy
- Any errors, warnings, or important information
- Relationships between elements
3. **Call clear_images_from_context** - You MUST call this tool after your analysis to free up context tokens
WHY THIS MATTERS:
- Images consume significant context tokens
- You will NOT see this image again after it's cleared (unless explicitly reloaded with load_image)
- Your written summary is your only future reference to this image
- Failing to clear images will cause context overflow
REMEMBER: Be thorough in your summary - it's your permanent record of what you saw!"""
}
context_instruction_metadata = {
"image_context": True,
"instruction_type": "context_management",
"related_file": file_path
}
# Add the context management instruction
if self.thread_manager:
await self.thread_manager.add_message(
thread_id=thread_id,
type='user',
content=context_instruction,
is_llm_message=True,
metadata=context_instruction_metadata
)
else:
# Fallback to direct DB access
client = await self.db.client
await client.table('messages').insert({
'thread_id': thread_id,
'type': 'user',
'content': context_instruction,
'is_llm_message': True,
'metadata': context_instruction_metadata
}).execute()
logger.debug(f"Added context management instruction for image: {file_path}")
return result
except Exception as e:
logger.error(f"Failed to add image to context: {str(e)}", exc_info=True)
return None
async def clear_images_from_context(self, thread_id: str) -> int:
"""Remove all image context messages from a thread."""
"""Remove all image context messages from a thread, including images and their management instructions."""
try:
client = await self.db.client
# Delete all messages with image_context metadata
# Delete all messages with image_context metadata (includes both images and instructions)
result = await client.table('messages').delete().eq(
'thread_id', thread_id
).eq(
@ -95,7 +151,7 @@ class ImageContextManager:
).execute()
deleted_count = len(result.data) if result.data else 0
logger.debug(f"Cleared {deleted_count} images from context")
logger.debug(f"Cleared {deleted_count} image-related messages from context (images + instructions)")
return deleted_count
except Exception as e:

View File

@ -165,11 +165,9 @@ class SandboxVisionTool(SandboxToolsBase):
os.unlink(temp_svg_path)
except ImportError:
print(f"[SeeImage] SVG conversion not available - using original SVG file '{file_path}'")
return image_bytes, mime_type
raise Exception(f"SVG conversion libraries not available. Cannot display SVG file '{file_path}'. Please convert to PNG manually.")
except Exception as e:
print(f"[SeeImage] SVG conversion failed - using original SVG file '{file_path}': {str(e)}")
return image_bytes, mime_type
raise Exception(f"SVG conversion failed for '{file_path}': {str(e)}. Please convert to PNG manually.")
# Open image from bytes
img = Image.open(BytesIO(image_bytes))
@ -220,8 +218,14 @@ class SandboxVisionTool(SandboxToolsBase):
return compressed_bytes, output_mime
except Exception as e:
print(f"[SeeImage] Failed to compress image: {str(e)}. Using original.")
return image_bytes, mime_type
# CRITICAL: Never return unsupported formats
# If compression fails, we need to ensure we still return a supported format
if mime_type in ['image/jpeg', 'image/png', 'image/gif', 'image/webp']:
print(f"[SeeImage] Failed to compress image: {str(e)}. Using original (format is supported).")
return image_bytes, mime_type
else:
# Unsupported format and compression failed - must fail
raise Exception(f"Failed to process image '{file_path}' with unsupported format '{mime_type}': {str(e)}")
def is_url(self, file_path: str) -> bool:
"""check if the file path is url"""
@ -265,7 +269,7 @@ class SandboxVisionTool(SandboxToolsBase):
"type": "function",
"function": {
"name": "load_image",
"description": "Loads an image file into conversation context from the /workspace directory or from a URL. Provide either a relative path to a local image or the URL to an image. The image will be compressed before sending to reduce token usage. IMPORTANT: If you previously loaded an image but cleared context, you can load it again by calling this tool with the same file path - no need to ask user to re-upload.",
"description": "Loads an image file into conversation context from the /workspace directory or from a URL. CRITICAL: After loading, you MUST analyze the image thoroughly, write a detailed summary, and then call clear_images_from_context to free context tokens. Images consume significant tokens and must be actively managed. You can reload any image later with the same file path if needed.",
"parameters": {
"type": "object",
"properties": {
@ -353,6 +357,15 @@ class SandboxVisionTool(SandboxToolsBase):
print(f"[SeeImage] Warning: Could not save converted PNG to sandbox: {e}")
# Continue with original path if save fails
# CRITICAL: Validate MIME type before upload - Anthropic only accepts 4 formats
SUPPORTED_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']
if compressed_mime_type not in SUPPORTED_MIME_TYPES:
return self.fail_response(
f"Invalid image format '{compressed_mime_type}' after compression. "
f"Only {', '.join(SUPPORTED_MIME_TYPES)} are supported for viewing by the AI. "
f"Original file: '{cleaned_path}'. Please convert the image to a supported format."
)
# Upload to Supabase Storage instead of base64
try:
# Generate unique filename
@ -418,7 +431,7 @@ class SandboxVisionTool(SandboxToolsBase):
"type": "function",
"function": {
"name": "clear_images_from_context",
"description": "Clears all images from conversation memory. Use when done analyzing images or to free up context tokens. IMPORTANT: Files remain accessible - use load_image with the same path to load any image again instead of asking user to re-upload.",
"description": "REQUIRED after viewing images: Removes all images and their instructions from context to free up tokens. You MUST call this after analyzing images. The image files remain accessible in the sandbox - you can reload them later with load_image if needed. This is critical for context management.",
"parameters": {
"type": "object",
"properties": {},
@ -435,7 +448,13 @@ class SandboxVisionTool(SandboxToolsBase):
deleted_count = await self.image_context_manager.clear_images_from_context(self.thread_id)
if deleted_count > 0:
return self.success_response(f"Successfully cleared {deleted_count} image(s) from conversation context. Visual memory has been reset.")
# Typically 2 messages per image: the image itself + the context instruction
image_count = deleted_count // 2
return self.success_response(
f"Successfully cleared approximately {image_count} image(s) and their instructions from conversation context "
f"({deleted_count} total messages removed). Context tokens freed up. "
f"You can reload any image again using load_image if needed."
)
else:
return self.success_response("No images found in conversation context to clear.")