context window defs

This commit is contained in:
marko-kraemer 2025-08-17 12:24:37 -07:00
parent 4cacf55c5b
commit 19765f56c0
34 changed files with 4522 additions and 14 deletions

View File

@ -11,6 +11,7 @@ from typing import List, Dict, Any, Optional, Union
from litellm.utils import token_counter
from services.supabase import DBConnection
from utils.logger import logger
from utils.constants import get_model_context_window
DEFAULT_TOKEN_THRESHOLD = 120000
@ -217,17 +218,22 @@ class ContextManager:
token_threshold: Token threshold for individual message compression (must be a power of 2)
max_iterations: Maximum number of compression iterations
"""
# Set model-specific token limits
if 'sonnet' in llm_model.lower():
max_tokens = 200 * 1000 - 64000 - 28000
elif 'gpt' in llm_model.lower():
max_tokens = 128 * 1000 - 28000
elif 'gemini' in llm_model.lower():
max_tokens = 1000 * 1000 - 300000
elif 'deepseek' in llm_model.lower():
max_tokens = 128 * 1000 - 28000
else:
max_tokens = 41 * 1000 - 10000
# Get model-specific token limits from constants
context_window = get_model_context_window(llm_model)
# Reserve tokens for output generation and safety margin
if context_window >= 1_000_000: # Very large context models (Gemini)
max_tokens = context_window - 300_000 # Large safety margin for huge contexts
elif context_window >= 400_000: # Large context models (GPT-5)
max_tokens = context_window - 64_000 # Reserve for output + margin
elif context_window >= 200_000: # Medium context models (Claude Sonnet)
max_tokens = context_window - 32_000 # Reserve for output + margin
elif context_window >= 100_000: # Standard large context models
max_tokens = context_window - 16_000 # Reserve for output + margin
else: # Smaller context models
max_tokens = context_window - 8_000 # Reserve for output + margin
logger.debug(f"Model {llm_model}: context_window={context_window}, effective_limit={max_tokens}")
result = messages
result = self.remove_meta_messages(result)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,64 @@
#!/usr/bin/env python3
"""
Test script to demonstrate the context manager debug functionality.
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from agentpress.context_manager import ContextManager
def test_context_compression():
"""Test the context compression with debug output."""
# Create sample messages that will trigger compression
sample_messages = [
{
"role": "system",
"content": "You are a helpful AI assistant.",
"message_id": "msg_001"
},
{
"role": "user",
"content": "Hello, can you help me with a complex task that involves analyzing a large dataset and generating comprehensive reports? " * 100, # Make it long
"message_id": "msg_002"
},
{
"role": "assistant",
"content": "Certainly! I'd be happy to help you with your dataset analysis and report generation. Let me break this down into manageable steps... " * 200, # Make it very long
"message_id": "msg_003"
},
{
"role": "user",
"content": "Great! Here's my dataset: " + "x" * 10000, # Large content
"message_id": "msg_004"
}
]
# Create context manager
cm = ContextManager()
# Test compression with debug
print("Testing context compression with debug output...")
print(f"Original messages count: {len(sample_messages)}")
compressed = cm.compress_messages(
messages=sample_messages,
llm_model="gpt-4",
thread_id="test_thread_123",
agent_run_id="test_run_456"
)
print(f"Compressed messages count: {len(compressed)}")
print(f"Debug files saved to: {cm.debug_dir}")
# List the debug files
import glob
debug_files = glob.glob(os.path.join(cm.debug_dir, "*.json"))
print(f"Debug files created: {len(debug_files)}")
for file in debug_files[-3:]: # Show last 3 files
print(f" - {os.path.basename(file)}")
if __name__ == "__main__":
test_context_compression()

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python3
"""
Test script to verify model context window limits.
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from utils.constants import get_model_context_window, MODEL_CONTEXT_WINDOWS
def test_model_limits():
"""Test the model context window limits."""
print("=== All Model Context Windows ===")
for model, window in sorted(MODEL_CONTEXT_WINDOWS.items()):
print(f'{model}: {window:,} tokens')
print("\n=== Testing get_model_context_window function ===")
test_models = [
'gpt-5',
'sonnet-3.5',
'gemini-2.5-pro',
'claude-sonnet-4',
'grok-4',
'unknown-model',
'anthropic/claude-sonnet-4-20250514',
'openai/gpt-5-mini'
]
for model in test_models:
window = get_model_context_window(model)
print(f'{model}: {window:,} tokens')
print("\n=== Context Manager Logic Simulation ===")
for model in ['gpt-5', 'anthropic/claude-sonnet-4', 'gemini/gemini-2.5-pro', 'unknown-model']:
context_window = get_model_context_window(model)
# Simulate the logic from context manager
if context_window >= 1_000_000: # Very large context models (Gemini)
max_tokens = context_window - 300_000
elif context_window >= 400_000: # Large context models (GPT-5)
max_tokens = context_window - 64_000
elif context_window >= 200_000: # Medium context models (Claude Sonnet)
max_tokens = context_window - 32_000
elif context_window >= 100_000: # Standard large context models
max_tokens = context_window - 16_000
else: # Smaller context models
max_tokens = context_window - 8_000
print(f'{model}: context={context_window:,} → effective_limit={max_tokens:,} (reserved: {context_window-max_tokens:,})')
if __name__ == "__main__":
test_model_limits()

View File

@ -8,6 +8,7 @@ MODELS = {
"input_cost_per_million_tokens": 3.00,
"output_cost_per_million_tokens": 15.00
},
"context_window": 200_000, # 200k tokens
"tier_availability": ["paid"]
},
# "openrouter/deepseek/deepseek-chat": {
@ -48,6 +49,7 @@ MODELS = {
"input_cost_per_million_tokens": 1.00,
"output_cost_per_million_tokens": 3.00
},
"context_window": 200_000, # 200k tokens
"tier_availability": ["free", "paid"]
},
"xai/grok-4": {
@ -56,6 +58,7 @@ MODELS = {
"input_cost_per_million_tokens": 5.00,
"output_cost_per_million_tokens": 15.00
},
"context_window": 128_000, # 128k tokens
"tier_availability": ["paid"]
},
@ -66,6 +69,7 @@ MODELS = {
"input_cost_per_million_tokens": 1.25,
"output_cost_per_million_tokens": 10.00
},
"context_window": 2_000_000, # 2M tokens
"tier_availability": ["paid"]
},
# "openai/gpt-4o": {
@ -90,6 +94,7 @@ MODELS = {
"input_cost_per_million_tokens": 1.25,
"output_cost_per_million_tokens": 10.00
},
"context_window": 400_000, # 400k tokens
"tier_availability": ["paid"]
},
"openai/gpt-5-mini": {
@ -98,6 +103,7 @@ MODELS = {
"input_cost_per_million_tokens": 0.25,
"output_cost_per_million_tokens": 2.00
},
"context_window": 400_000, # 400k tokens
"tier_availability": ["paid"]
},
# "openai/gpt-4.1-mini": {
@ -114,6 +120,7 @@ MODELS = {
"input_cost_per_million_tokens": 3.00,
"output_cost_per_million_tokens": 15.00
},
"context_window": 200_000, # 200k tokens
"tier_availability": ["paid"]
},
"anthropic/claude-3-5-sonnet-latest": {
@ -122,6 +129,7 @@ MODELS = {
"input_cost_per_million_tokens": 3.00,
"output_cost_per_million_tokens": 15.00
},
"context_window": 200_000, # 200k tokens
"tier_availability": ["paid"]
},
}
@ -140,6 +148,9 @@ def _generate_model_structures():
# Generate pricing
pricing = {}
# Generate context window limits
context_windows = {}
for model_name, config in MODELS.items():
# Add to tier lists
if "free" in config["tier_availability"]:
@ -154,29 +165,43 @@ def _generate_model_structures():
# Add pricing
pricing[model_name] = config["pricing"]
# Also add pricing for legacy model name variations
# Add context window limits
if "context_window" in config:
context_windows[model_name] = config["context_window"]
# Also add pricing and context windows for legacy model name variations
if model_name.startswith("openrouter/deepseek/"):
legacy_name = model_name.replace("openrouter/", "")
pricing[legacy_name] = config["pricing"]
if "context_window" in config:
context_windows[legacy_name] = config["context_window"]
elif model_name.startswith("openrouter/qwen/"):
legacy_name = model_name.replace("openrouter/", "")
pricing[legacy_name] = config["pricing"]
if "context_window" in config:
context_windows[legacy_name] = config["context_window"]
elif model_name.startswith("gemini/"):
legacy_name = model_name.replace("gemini/", "")
pricing[legacy_name] = config["pricing"]
if "context_window" in config:
context_windows[legacy_name] = config["context_window"]
elif model_name.startswith("anthropic/"):
# Add anthropic/claude-sonnet-4 alias for claude-sonnet-4-20250514
if "claude-sonnet-4-20250514" in model_name:
pricing["anthropic/claude-sonnet-4"] = config["pricing"]
if "context_window" in config:
context_windows["anthropic/claude-sonnet-4"] = config["context_window"]
elif model_name.startswith("xai/"):
# Add pricing for OpenRouter x-ai models
openrouter_name = model_name.replace("xai/", "openrouter/x-ai/")
pricing[openrouter_name] = config["pricing"]
if "context_window" in config:
context_windows[openrouter_name] = config["context_window"]
return free_models, paid_models, aliases, pricing
return free_models, paid_models, aliases, pricing, context_windows
# Generate all structures
FREE_TIER_MODELS, PAID_TIER_MODELS, MODEL_NAME_ALIASES, HARDCODED_MODEL_PRICES = _generate_model_structures()
FREE_TIER_MODELS, PAID_TIER_MODELS, MODEL_NAME_ALIASES, HARDCODED_MODEL_PRICES, MODEL_CONTEXT_WINDOWS = _generate_model_structures()
MODEL_ACCESS_TIERS = {
"free": FREE_TIER_MODELS,
@ -191,3 +216,40 @@ MODEL_ACCESS_TIERS = {
"tier_6_42_yearly_commitment": PAID_TIER_MODELS,
"tier_12_84_yearly_commitment": PAID_TIER_MODELS,
}
def get_model_context_window(model_name: str, default: int = 31_000) -> int:
"""
Get the context window size for a given model.
Args:
model_name: The model name or alias
default: Default context window if model not found
Returns:
Context window size in tokens
"""
# Check direct model name first
if model_name in MODEL_CONTEXT_WINDOWS:
return MODEL_CONTEXT_WINDOWS[model_name]
# Check if it's an alias
if model_name in MODEL_NAME_ALIASES:
canonical_name = MODEL_NAME_ALIASES[model_name]
if canonical_name in MODEL_CONTEXT_WINDOWS:
return MODEL_CONTEXT_WINDOWS[canonical_name]
# Fallback patterns for common model naming variations
if 'sonnet' in model_name.lower():
return 200_000 # Claude Sonnet models
elif 'gpt-5' in model_name.lower():
return 400_000 # GPT-5 models
elif 'gemini' in model_name.lower():
return 2_000_000 # Gemini models
elif 'grok' in model_name.lower():
return 128_000 # Grok models
elif 'gpt' in model_name.lower():
return 128_000 # GPT-4 and variants
elif 'deepseek' in model_name.lower():
return 128_000 # DeepSeek models
return default