mirror of https://github.com/kortix-ai/suna.git
context window defs
This commit is contained in:
parent
4cacf55c5b
commit
19765f56c0
|
@ -11,6 +11,7 @@ from typing import List, Dict, Any, Optional, Union
|
|||
from litellm.utils import token_counter
|
||||
from services.supabase import DBConnection
|
||||
from utils.logger import logger
|
||||
from utils.constants import get_model_context_window
|
||||
|
||||
DEFAULT_TOKEN_THRESHOLD = 120000
|
||||
|
||||
|
@ -217,17 +218,22 @@ class ContextManager:
|
|||
token_threshold: Token threshold for individual message compression (must be a power of 2)
|
||||
max_iterations: Maximum number of compression iterations
|
||||
"""
|
||||
# Set model-specific token limits
|
||||
if 'sonnet' in llm_model.lower():
|
||||
max_tokens = 200 * 1000 - 64000 - 28000
|
||||
elif 'gpt' in llm_model.lower():
|
||||
max_tokens = 128 * 1000 - 28000
|
||||
elif 'gemini' in llm_model.lower():
|
||||
max_tokens = 1000 * 1000 - 300000
|
||||
elif 'deepseek' in llm_model.lower():
|
||||
max_tokens = 128 * 1000 - 28000
|
||||
else:
|
||||
max_tokens = 41 * 1000 - 10000
|
||||
# Get model-specific token limits from constants
|
||||
context_window = get_model_context_window(llm_model)
|
||||
|
||||
# Reserve tokens for output generation and safety margin
|
||||
if context_window >= 1_000_000: # Very large context models (Gemini)
|
||||
max_tokens = context_window - 300_000 # Large safety margin for huge contexts
|
||||
elif context_window >= 400_000: # Large context models (GPT-5)
|
||||
max_tokens = context_window - 64_000 # Reserve for output + margin
|
||||
elif context_window >= 200_000: # Medium context models (Claude Sonnet)
|
||||
max_tokens = context_window - 32_000 # Reserve for output + margin
|
||||
elif context_window >= 100_000: # Standard large context models
|
||||
max_tokens = context_window - 16_000 # Reserve for output + margin
|
||||
else: # Smaller context models
|
||||
max_tokens = context_window - 8_000 # Reserve for output + margin
|
||||
|
||||
logger.debug(f"Model {llm_model}: context_window={context_window}, effective_limit={max_tokens}")
|
||||
|
||||
result = messages
|
||||
result = self.remove_meta_messages(result)
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,64 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to demonstrate the context manager debug functionality.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from agentpress.context_manager import ContextManager
|
||||
|
||||
def test_context_compression():
|
||||
"""Test the context compression with debug output."""
|
||||
|
||||
# Create sample messages that will trigger compression
|
||||
sample_messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful AI assistant.",
|
||||
"message_id": "msg_001"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, can you help me with a complex task that involves analyzing a large dataset and generating comprehensive reports? " * 100, # Make it long
|
||||
"message_id": "msg_002"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Certainly! I'd be happy to help you with your dataset analysis and report generation. Let me break this down into manageable steps... " * 200, # Make it very long
|
||||
"message_id": "msg_003"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Great! Here's my dataset: " + "x" * 10000, # Large content
|
||||
"message_id": "msg_004"
|
||||
}
|
||||
]
|
||||
|
||||
# Create context manager
|
||||
cm = ContextManager()
|
||||
|
||||
# Test compression with debug
|
||||
print("Testing context compression with debug output...")
|
||||
print(f"Original messages count: {len(sample_messages)}")
|
||||
|
||||
compressed = cm.compress_messages(
|
||||
messages=sample_messages,
|
||||
llm_model="gpt-4",
|
||||
thread_id="test_thread_123",
|
||||
agent_run_id="test_run_456"
|
||||
)
|
||||
|
||||
print(f"Compressed messages count: {len(compressed)}")
|
||||
print(f"Debug files saved to: {cm.debug_dir}")
|
||||
|
||||
# List the debug files
|
||||
import glob
|
||||
debug_files = glob.glob(os.path.join(cm.debug_dir, "*.json"))
|
||||
print(f"Debug files created: {len(debug_files)}")
|
||||
for file in debug_files[-3:]: # Show last 3 files
|
||||
print(f" - {os.path.basename(file)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_context_compression()
|
|
@ -0,0 +1,54 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to verify model context window limits.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from utils.constants import get_model_context_window, MODEL_CONTEXT_WINDOWS
|
||||
|
||||
def test_model_limits():
|
||||
"""Test the model context window limits."""
|
||||
|
||||
print("=== All Model Context Windows ===")
|
||||
for model, window in sorted(MODEL_CONTEXT_WINDOWS.items()):
|
||||
print(f'{model}: {window:,} tokens')
|
||||
|
||||
print("\n=== Testing get_model_context_window function ===")
|
||||
test_models = [
|
||||
'gpt-5',
|
||||
'sonnet-3.5',
|
||||
'gemini-2.5-pro',
|
||||
'claude-sonnet-4',
|
||||
'grok-4',
|
||||
'unknown-model',
|
||||
'anthropic/claude-sonnet-4-20250514',
|
||||
'openai/gpt-5-mini'
|
||||
]
|
||||
|
||||
for model in test_models:
|
||||
window = get_model_context_window(model)
|
||||
print(f'{model}: {window:,} tokens')
|
||||
|
||||
print("\n=== Context Manager Logic Simulation ===")
|
||||
for model in ['gpt-5', 'anthropic/claude-sonnet-4', 'gemini/gemini-2.5-pro', 'unknown-model']:
|
||||
context_window = get_model_context_window(model)
|
||||
|
||||
# Simulate the logic from context manager
|
||||
if context_window >= 1_000_000: # Very large context models (Gemini)
|
||||
max_tokens = context_window - 300_000
|
||||
elif context_window >= 400_000: # Large context models (GPT-5)
|
||||
max_tokens = context_window - 64_000
|
||||
elif context_window >= 200_000: # Medium context models (Claude Sonnet)
|
||||
max_tokens = context_window - 32_000
|
||||
elif context_window >= 100_000: # Standard large context models
|
||||
max_tokens = context_window - 16_000
|
||||
else: # Smaller context models
|
||||
max_tokens = context_window - 8_000
|
||||
|
||||
print(f'{model}: context={context_window:,} → effective_limit={max_tokens:,} (reserved: {context_window-max_tokens:,})')
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_model_limits()
|
|
@ -8,6 +8,7 @@ MODELS = {
|
|||
"input_cost_per_million_tokens": 3.00,
|
||||
"output_cost_per_million_tokens": 15.00
|
||||
},
|
||||
"context_window": 200_000, # 200k tokens
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
# "openrouter/deepseek/deepseek-chat": {
|
||||
|
@ -48,6 +49,7 @@ MODELS = {
|
|||
"input_cost_per_million_tokens": 1.00,
|
||||
"output_cost_per_million_tokens": 3.00
|
||||
},
|
||||
"context_window": 200_000, # 200k tokens
|
||||
"tier_availability": ["free", "paid"]
|
||||
},
|
||||
"xai/grok-4": {
|
||||
|
@ -56,6 +58,7 @@ MODELS = {
|
|||
"input_cost_per_million_tokens": 5.00,
|
||||
"output_cost_per_million_tokens": 15.00
|
||||
},
|
||||
"context_window": 128_000, # 128k tokens
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
|
||||
|
@ -66,6 +69,7 @@ MODELS = {
|
|||
"input_cost_per_million_tokens": 1.25,
|
||||
"output_cost_per_million_tokens": 10.00
|
||||
},
|
||||
"context_window": 2_000_000, # 2M tokens
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
# "openai/gpt-4o": {
|
||||
|
@ -90,6 +94,7 @@ MODELS = {
|
|||
"input_cost_per_million_tokens": 1.25,
|
||||
"output_cost_per_million_tokens": 10.00
|
||||
},
|
||||
"context_window": 400_000, # 400k tokens
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
"openai/gpt-5-mini": {
|
||||
|
@ -98,6 +103,7 @@ MODELS = {
|
|||
"input_cost_per_million_tokens": 0.25,
|
||||
"output_cost_per_million_tokens": 2.00
|
||||
},
|
||||
"context_window": 400_000, # 400k tokens
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
# "openai/gpt-4.1-mini": {
|
||||
|
@ -114,6 +120,7 @@ MODELS = {
|
|||
"input_cost_per_million_tokens": 3.00,
|
||||
"output_cost_per_million_tokens": 15.00
|
||||
},
|
||||
"context_window": 200_000, # 200k tokens
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
"anthropic/claude-3-5-sonnet-latest": {
|
||||
|
@ -122,6 +129,7 @@ MODELS = {
|
|||
"input_cost_per_million_tokens": 3.00,
|
||||
"output_cost_per_million_tokens": 15.00
|
||||
},
|
||||
"context_window": 200_000, # 200k tokens
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
}
|
||||
|
@ -140,6 +148,9 @@ def _generate_model_structures():
|
|||
# Generate pricing
|
||||
pricing = {}
|
||||
|
||||
# Generate context window limits
|
||||
context_windows = {}
|
||||
|
||||
for model_name, config in MODELS.items():
|
||||
# Add to tier lists
|
||||
if "free" in config["tier_availability"]:
|
||||
|
@ -154,29 +165,43 @@ def _generate_model_structures():
|
|||
# Add pricing
|
||||
pricing[model_name] = config["pricing"]
|
||||
|
||||
# Also add pricing for legacy model name variations
|
||||
# Add context window limits
|
||||
if "context_window" in config:
|
||||
context_windows[model_name] = config["context_window"]
|
||||
|
||||
# Also add pricing and context windows for legacy model name variations
|
||||
if model_name.startswith("openrouter/deepseek/"):
|
||||
legacy_name = model_name.replace("openrouter/", "")
|
||||
pricing[legacy_name] = config["pricing"]
|
||||
if "context_window" in config:
|
||||
context_windows[legacy_name] = config["context_window"]
|
||||
elif model_name.startswith("openrouter/qwen/"):
|
||||
legacy_name = model_name.replace("openrouter/", "")
|
||||
pricing[legacy_name] = config["pricing"]
|
||||
if "context_window" in config:
|
||||
context_windows[legacy_name] = config["context_window"]
|
||||
elif model_name.startswith("gemini/"):
|
||||
legacy_name = model_name.replace("gemini/", "")
|
||||
pricing[legacy_name] = config["pricing"]
|
||||
if "context_window" in config:
|
||||
context_windows[legacy_name] = config["context_window"]
|
||||
elif model_name.startswith("anthropic/"):
|
||||
# Add anthropic/claude-sonnet-4 alias for claude-sonnet-4-20250514
|
||||
if "claude-sonnet-4-20250514" in model_name:
|
||||
pricing["anthropic/claude-sonnet-4"] = config["pricing"]
|
||||
if "context_window" in config:
|
||||
context_windows["anthropic/claude-sonnet-4"] = config["context_window"]
|
||||
elif model_name.startswith("xai/"):
|
||||
# Add pricing for OpenRouter x-ai models
|
||||
openrouter_name = model_name.replace("xai/", "openrouter/x-ai/")
|
||||
pricing[openrouter_name] = config["pricing"]
|
||||
if "context_window" in config:
|
||||
context_windows[openrouter_name] = config["context_window"]
|
||||
|
||||
return free_models, paid_models, aliases, pricing
|
||||
return free_models, paid_models, aliases, pricing, context_windows
|
||||
|
||||
# Generate all structures
|
||||
FREE_TIER_MODELS, PAID_TIER_MODELS, MODEL_NAME_ALIASES, HARDCODED_MODEL_PRICES = _generate_model_structures()
|
||||
FREE_TIER_MODELS, PAID_TIER_MODELS, MODEL_NAME_ALIASES, HARDCODED_MODEL_PRICES, MODEL_CONTEXT_WINDOWS = _generate_model_structures()
|
||||
|
||||
MODEL_ACCESS_TIERS = {
|
||||
"free": FREE_TIER_MODELS,
|
||||
|
@ -191,3 +216,40 @@ MODEL_ACCESS_TIERS = {
|
|||
"tier_6_42_yearly_commitment": PAID_TIER_MODELS,
|
||||
"tier_12_84_yearly_commitment": PAID_TIER_MODELS,
|
||||
}
|
||||
|
||||
def get_model_context_window(model_name: str, default: int = 31_000) -> int:
|
||||
"""
|
||||
Get the context window size for a given model.
|
||||
|
||||
Args:
|
||||
model_name: The model name or alias
|
||||
default: Default context window if model not found
|
||||
|
||||
Returns:
|
||||
Context window size in tokens
|
||||
"""
|
||||
# Check direct model name first
|
||||
if model_name in MODEL_CONTEXT_WINDOWS:
|
||||
return MODEL_CONTEXT_WINDOWS[model_name]
|
||||
|
||||
# Check if it's an alias
|
||||
if model_name in MODEL_NAME_ALIASES:
|
||||
canonical_name = MODEL_NAME_ALIASES[model_name]
|
||||
if canonical_name in MODEL_CONTEXT_WINDOWS:
|
||||
return MODEL_CONTEXT_WINDOWS[canonical_name]
|
||||
|
||||
# Fallback patterns for common model naming variations
|
||||
if 'sonnet' in model_name.lower():
|
||||
return 200_000 # Claude Sonnet models
|
||||
elif 'gpt-5' in model_name.lower():
|
||||
return 400_000 # GPT-5 models
|
||||
elif 'gemini' in model_name.lower():
|
||||
return 2_000_000 # Gemini models
|
||||
elif 'grok' in model_name.lower():
|
||||
return 128_000 # Grok models
|
||||
elif 'gpt' in model_name.lower():
|
||||
return 128_000 # GPT-4 and variants
|
||||
elif 'deepseek' in model_name.lower():
|
||||
return 128_000 # DeepSeek models
|
||||
|
||||
return default
|
||||
|
|
Loading…
Reference in New Issue