From aee59459078598544e1500578fbe8c4a93ae1f0a Mon Sep 17 00:00:00 2001 From: Sharath <29162020+tnfssc@users.noreply.github.com> Date: Fri, 6 Jun 2025 12:36:11 +0530 Subject: [PATCH 1/7] hotfix(context-limit): truncate more content to allow more messages --- backend/agentpress/thread_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/agentpress/thread_manager.py b/backend/agentpress/thread_manager.py index e130b683..3c6dc465 100644 --- a/backend/agentpress/thread_manager.py +++ b/backend/agentpress/thread_manager.py @@ -345,11 +345,11 @@ Here are the XML tools available with examples: if self._is_tool_result_message(msg): # Only compress ToolResult messages _i += 1 # Count the number of ToolResult messages msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message - if msg_token_count > 5000: # If the message is too long + if msg_token_count > 1000: # If the message is too long if _i > 1: # If this is not the most recent ToolResult message message_id = msg.get('message_id') # Get the message_id if message_id: - msg["content"] = msg["content"][:10000] + "... (truncated)" + f"\n\nThis message is too long, use the expand-message tool with message_id \"{message_id}\" to see the full message" # Truncate the message + msg["content"] = msg["content"][:3000] + "... (truncated)" + f"\n\nThis message is too long, use the expand-message tool with message_id \"{message_id}\" to see the full message" # Truncate the message else: msg["content"] = msg["content"][:200000] + f"\n\nThis message is too long, repeat relevant information in your response to remember it" # Truncate to 300k characters to avoid overloading the context at once, but don't truncate otherwise From 78b80bc2bc8c3354be28a9ec80fa19a1d5217ef2 Mon Sep 17 00:00:00 2001 From: Soumyadas15 Date: Fri, 6 Jun 2025 13:19:47 +0530 Subject: [PATCH 2/7] feat: ready for deployment --- test_agent_builder_response.py | 136 --------------------------------- 1 file changed, 136 deletions(-) delete mode 100644 test_agent_builder_response.py diff --git a/test_agent_builder_response.py b/test_agent_builder_response.py deleted file mode 100644 index 3240cdd7..00000000 --- a/test_agent_builder_response.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script to verify enhanced response processor for agent builder tools. -""" - -import asyncio -import json -from backend.agentpress.response_processor import ResponseProcessor -from backend.agentpress.tool_registry import ToolRegistry -from backend.agentpress.tool import ToolResult - -class MockTool: - """Mock tool for testing.""" - - def success_response(self, data): - return ToolResult(success=True, output=json.dumps(data, indent=2)) - - def fail_response(self, msg): - return ToolResult(success=False, output=msg) - -async def mock_add_message(thread_id, type, content, is_llm_message, metadata=None): - """Mock add message callback.""" - return { - "message_id": "test-message-id", - "thread_id": thread_id, - "type": type, - "content": content, - "is_llm_message": is_llm_message, - "metadata": metadata or {} - } - -def test_update_agent_response(): - """Test update_agent tool response formatting.""" - - # Create mock tool result for update_agent - mock_tool = MockTool() - update_result = mock_tool.success_response({ - "message": "Agent updated successfully", - "updated_fields": ["name", "description", "system_prompt"], - "agent": { - "agent_id": "test-agent-123", - "name": "Research Assistant", - "description": "An AI assistant specialized in research", - "system_prompt": "You are a research assistant with expertise in gathering, analyzing, and synthesizing information from various sources.", - "agentpress_tools": { - "web_search": {"enabled": True, "description": "Search the web"}, - "sb_files": {"enabled": True, "description": "File operations"} - }, - "configured_mcps": [ - {"name": "Exa Search", "qualifiedName": "exa", "enabledTools": ["search"]} - ], - "avatar": "🔬", - "avatar_color": "#4F46E5" - } - }) - - # Test with agent builder mode - tool_registry = ToolRegistry() - processor = ResponseProcessor( - tool_registry=tool_registry, - add_message_callback=mock_add_message, - is_agent_builder=True, - target_agent_id="test-agent-123" - ) - - tool_call = { - "function_name": "update_agent", - "xml_tag_name": "update_agent", - "arguments": {"name": "Research Assistant"} - } - - structured_result = processor._create_structured_tool_result(tool_call, update_result) - - print("=== Agent Builder Mode - Update Agent Tool Response ===") - print(structured_result["summary"]) - print("\n" + "="*60 + "\n") - - # Test without agent builder mode - processor_normal = ResponseProcessor( - tool_registry=tool_registry, - add_message_callback=mock_add_message, - is_agent_builder=False - ) - - structured_result_normal = processor_normal._create_structured_tool_result(tool_call, update_result) - - print("=== Normal Mode - Update Agent Tool Response ===") - print(structured_result_normal["summary"]) - print("\n" + "="*60 + "\n") - -def test_get_current_agent_config_response(): - """Test get_current_agent_config tool response formatting.""" - - mock_tool = MockTool() - config_result = mock_tool.success_response({ - "summary": "Agent 'Research Assistant' has 2 tools enabled and 1 MCP servers configured.", - "configuration": { - "agent_id": "test-agent-123", - "name": "Research Assistant", - "description": "An AI assistant specialized in research", - "system_prompt": "You are a research assistant with expertise in gathering, analyzing, and synthesizing information from various sources. Your approach is thorough and methodical.", - "agentpress_tools": { - "web_search": {"enabled": True, "description": "Search the web"}, - "sb_files": {"enabled": False, "description": "File operations"} - }, - "configured_mcps": [], - "avatar": "🔬", - "avatar_color": "#4F46E5" - } - }) - - tool_registry = ToolRegistry() - processor = ResponseProcessor( - tool_registry=tool_registry, - add_message_callback=mock_add_message, - is_agent_builder=True, - target_agent_id="test-agent-123" - ) - - tool_call = { - "function_name": "get_current_agent_config", - "xml_tag_name": "get_current_agent_config", - "arguments": {} - } - - structured_result = processor._create_structured_tool_result(tool_call, config_result) - - print("=== Agent Builder Mode - Get Current Agent Config Response ===") - print(structured_result["summary"]) - print("\n" + "="*60 + "\n") - -if __name__ == "__main__": - print("Testing Enhanced Response Processor for Agent Builder Tools\n") - test_update_agent_response() - test_get_current_agent_config_response() - print("✅ All tests completed!") \ No newline at end of file From 49c9e0b9faf0cb716bbce8163e7c15e742efc24e Mon Sep 17 00:00:00 2001 From: Sharath <29162020+tnfssc@users.noreply.github.com> Date: Fri, 6 Jun 2025 13:29:20 +0530 Subject: [PATCH 3/7] hotfix(langfuse): fix langfuse verison --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index cc58b88b..395edcae 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -32,7 +32,7 @@ stripe>=12.0.1 dramatiq>=1.17.1 pika>=1.3.2 prometheus-client>=0.21.1 -langfuse>=2.60.5 +langfuse==2.60.5 httpx>=0.24.0 Pillow>=10.0.0 sentry-sdk[fastapi]>=2.29.1 From 96ce252073358240638965c9d5b7bce5fc294e50 Mon Sep 17 00:00:00 2001 From: Soumyadas15 Date: Fri, 6 Jun 2025 13:44:38 +0530 Subject: [PATCH 4/7] fix(ui): remove unnecessary redundant toast messages --- frontend/src/lib/api.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index fd0ce8da..f621ad26 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -1247,7 +1247,6 @@ export const listSandboxFiles = async ( return data.files || []; } catch (error) { console.error('Failed to list sandbox files:', error); - handleApiError(error, { operation: 'list files', resource: `directory ${path}` }); throw error; } }; @@ -1475,8 +1474,6 @@ export const checkApiHealth = async (): Promise => { return response.json(); } catch (error) { - console.error('API health check failed:', error); - handleApiError(error, { operation: 'check system health', resource: 'system status' }); throw error; } }; @@ -1783,7 +1780,6 @@ export const checkBillingStatus = async (): Promise => { return response.json(); } catch (error) { console.error('Failed to check billing status:', error); - handleApiError(error, { operation: 'check billing status', resource: 'account status' }); throw error; } }; From 7e463cdd10ae8b88321f7987eab975bcf823bd8f Mon Sep 17 00:00:00 2001 From: Soumyadas15 Date: Fri, 6 Jun 2025 13:46:21 +0530 Subject: [PATCH 5/7] fix(ui): remove unnecessary redundant toast messages --- frontend/src/lib/api.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index f621ad26..d9d1e5d0 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -1247,6 +1247,7 @@ export const listSandboxFiles = async ( return data.files || []; } catch (error) { console.error('Failed to list sandbox files:', error); + // handleApiError(error, { operation: 'list files', resource: `directory ${path}` }); throw error; } }; From 93660e4a6193110419a3aa5abbb90933c32087bd Mon Sep 17 00:00:00 2001 From: sharath <29162020+tnfssc@users.noreply.github.com> Date: Fri, 6 Jun 2025 08:56:14 +0000 Subject: [PATCH 6/7] feat(thread-manager): add message compression methods to handle long messages efficiently --- .gitignore | 2 + backend/agentpress/thread_manager.py | 138 +++++++++++++++++++++++---- 2 files changed, 120 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index ebee6baf..217b9662 100644 --- a/.gitignore +++ b/.gitignore @@ -198,3 +198,5 @@ rabbitmq_data .setup_progress .setup_env.json + +backend/.test_token_compression.py diff --git a/backend/agentpress/thread_manager.py b/backend/agentpress/thread_manager.py index b89db3a6..4851e2d1 100644 --- a/backend/agentpress/thread_manager.py +++ b/backend/agentpress/thread_manager.py @@ -25,6 +25,7 @@ from utils.logger import logger from langfuse.client import StatefulGenerationClient, StatefulTraceClient from services.langfuse import langfuse import datetime +from litellm import token_counter # Type alias for tool choice ToolChoice = Literal["auto", "required", "none"] @@ -74,6 +75,122 @@ class ThreadManager: except (json.JSONDecodeError, TypeError): pass return False + + def _compress_message(self, msg_content: Union[str, dict], message_id: Optional[str] = None, max_length: int = 3000) -> Union[str, dict]: + """Compress the message content.""" + # print("max_length", max_length) + if isinstance(msg_content, str): + if len(msg_content) > max_length: + return msg_content[:max_length] + "... (truncated)" + f"\n\nThis message is too long, use the expand-message tool with message_id \"{message_id}\" to see the full message" + else: + return msg_content + elif isinstance(msg_content, dict): + if len(json.dumps(msg_content)) > max_length: + return json.dumps(msg_content)[:max_length] + "... (truncated)" + f"\n\nThis message is too long, use the expand-message tool with message_id \"{message_id}\" to see the full message" + else: + return msg_content + + def _safe_truncate(self, msg_content: Union[str, dict], max_length: int = 200000) -> Union[str, dict]: + """Truncate the message content safely.""" + if isinstance(msg_content, str): + if len(msg_content) > max_length: + return msg_content[:max_length] + f"\n\nThis message is too long, repeat relevant information in your response to remember it" + else: + return msg_content + elif isinstance(msg_content, dict): + if len(json.dumps(msg_content)) > max_length: + return json.dumps(msg_content)[:max_length] + f"\n\nThis message is too long, repeat relevant information in your response to remember it" + else: + return msg_content + + def _compress_tool_result_messages(self, messages: List[Dict[str, Any]], llm_model: str, llm_max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]: + """Compress the tool result messages except the most recent one.""" + uncompressed_total_token_count = token_counter(model=llm_model, messages=messages) + + if uncompressed_total_token_count > (llm_max_tokens or (64 * 1000)): + _i = 0 # Count the number of ToolResult messages + for msg in reversed(messages): # Start from the end and work backwards + if self._is_tool_result_message(msg): # Only compress ToolResult messages + _i += 1 # Count the number of ToolResult messages + msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message + if msg_token_count > token_threshold: # If the message is too long + if _i > 1: # If this is not the most recent ToolResult message + message_id = msg.get('message_id') # Get the message_id + if message_id: + msg["content"] = self._compress_message(msg["content"], message_id, token_threshold * 3) + else: + logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}") + else: + msg["content"] = self._safe_truncate(msg["content"]) + return messages + + def _compress_user_messages(self, messages: List[Dict[str, Any]], llm_model: str, llm_max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]: + """Compress the user messages except the most recent one.""" + uncompressed_total_token_count = token_counter(model=llm_model, messages=messages) + + if uncompressed_total_token_count > (llm_max_tokens or (100 * 1000)): + _i = 0 # Count the number of User messages + for msg in reversed(messages): # Start from the end and work backwards + if msg.get('role') == 'user': # Only compress User messages + _i += 1 # Count the number of User messages + msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message + if msg_token_count > token_threshold: # If the message is too long + if _i > 1: # If this is not the most recent User message + message_id = msg.get('message_id') # Get the message_id + if message_id: + msg["content"] = self._compress_message(msg["content"], message_id, token_threshold * 3) + else: + logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}") + else: + msg["content"] = self._safe_truncate(msg["content"]) + return messages + + def _compress_assistant_messages(self, messages: List[Dict[str, Any]], llm_model: str, llm_max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]: + """Compress the assistant messages except the most recent one.""" + uncompressed_total_token_count = token_counter(model=llm_model, messages=messages) + if uncompressed_total_token_count > (llm_max_tokens or (100 * 1000)): + _i = 0 # Count the number of Assistant messages + for msg in reversed(messages): # Start from the end and work backwards + if msg.get('role') == 'assistant': # Only compress Assistant messages + _i += 1 # Count the number of Assistant messages + msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message + if msg_token_count > token_threshold: # If the message is too long + if _i > 1: # If this is not the most recent Assistant message + message_id = msg.get('message_id') # Get the message_id + if message_id: + msg["content"] = self._compress_message(msg["content"], message_id, token_threshold * 3) + else: + logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}") + else: + msg["content"] = self._safe_truncate(msg["content"]) + + return messages + + def _compress_messages(self, messages: List[Dict[str, Any]], llm_model: str, llm_max_tokens: Optional[int], token_threshold: Optional[int] = 4096, max_iterations: int = 5) -> List[Dict[str, Any]]: + """Compress the messages. + token_threshold: must be a power of 2 + """ + if max_iterations <= 0: + logger.warning(f"_compress_messages: Max iterations reached, returning uncompressed messages") + return messages + + result = messages + + uncompressed_total_token_count = token_counter(model=llm_model, messages=messages) + + result = self._compress_tool_result_messages(result, llm_model, llm_max_tokens, token_threshold) + result = self._compress_user_messages(result, llm_model, llm_max_tokens, token_threshold) + result = self._compress_assistant_messages(result, llm_model, llm_max_tokens, token_threshold) + + compressed_token_count = token_counter(model=llm_model, messages=result) + + logger.info(f"_compress_messages: {uncompressed_total_token_count} -> {compressed_token_count}") # Log the token compression for debugging later + + if (compressed_token_count > llm_max_tokens): + logger.warning(f"Further token compression is needed: {compressed_token_count} > {llm_max_tokens}") + result = self._compress_messages(messages, llm_model, llm_max_tokens, int(token_threshold / 2), max_iterations - 1) + + return result def add_tool(self, tool_class: Type[Tool], function_names: Optional[List[str]] = None, **kwargs): """Add a tool to the ThreadManager.""" @@ -287,7 +404,6 @@ Here are the XML tools available with examples: # 2. Check token count before proceeding token_count = 0 try: - from litellm import token_counter # Use the potentially modified working_system_prompt for token counting token_count = token_counter(model=llm_model, messages=[working_system_prompt] + messages) token_threshold = self.context_manager.token_threshold @@ -344,25 +460,7 @@ Here are the XML tools available with examples: openapi_tool_schemas = self.tool_registry.get_openapi_schemas() logger.debug(f"Retrieved {len(openapi_tool_schemas) if openapi_tool_schemas else 0} OpenAPI tool schemas") - - uncompressed_total_token_count = token_counter(model=llm_model, messages=prepared_messages) - - if uncompressed_total_token_count > (llm_max_tokens or (100 * 1000)): - _i = 0 # Count the number of ToolResult messages - for msg in reversed(prepared_messages): # Start from the end and work backwards - if self._is_tool_result_message(msg): # Only compress ToolResult messages - _i += 1 # Count the number of ToolResult messages - msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message - if msg_token_count > 1000: # If the message is too long - if _i > 1: # If this is not the most recent ToolResult message - message_id = msg.get('message_id') # Get the message_id - if message_id: - msg["content"] = msg["content"][:3000] + "... (truncated)" + f"\n\nThis message is too long, use the expand-message tool with message_id \"{message_id}\" to see the full message" # Truncate the message - else: - msg["content"] = msg["content"][:200000] + f"\n\nThis message is too long, repeat relevant information in your response to remember it" # Truncate to 300k characters to avoid overloading the context at once, but don't truncate otherwise - - compressed_total_token_count = token_counter(model=llm_model, messages=prepared_messages) - logger.info(f"token_compression: {uncompressed_total_token_count} -> {compressed_total_token_count}") # Log the token compression for debugging later + prepared_messages = self._compress_messages(prepared_messages, llm_model, llm_max_tokens) # 5. Make LLM API call logger.debug("Making LLM API call") From b5ae395fed375c8c4a2b9c5e1e1d910cd3d371b0 Mon Sep 17 00:00:00 2001 From: sharath <29162020+tnfssc@users.noreply.github.com> Date: Fri, 6 Jun 2025 09:18:34 +0000 Subject: [PATCH 7/7] refactor(thread-manager): standardize max_tokens parameter in message compression methods --- backend/agentpress/thread_manager.py | 46 ++++++++++++++++++---------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/backend/agentpress/thread_manager.py b/backend/agentpress/thread_manager.py index 4851e2d1..644a90d5 100644 --- a/backend/agentpress/thread_manager.py +++ b/backend/agentpress/thread_manager.py @@ -103,11 +103,11 @@ class ThreadManager: else: return msg_content - def _compress_tool_result_messages(self, messages: List[Dict[str, Any]], llm_model: str, llm_max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]: + def _compress_tool_result_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]: """Compress the tool result messages except the most recent one.""" uncompressed_total_token_count = token_counter(model=llm_model, messages=messages) - if uncompressed_total_token_count > (llm_max_tokens or (64 * 1000)): + if uncompressed_total_token_count > (max_tokens or (64 * 1000)): _i = 0 # Count the number of ToolResult messages for msg in reversed(messages): # Start from the end and work backwards if self._is_tool_result_message(msg): # Only compress ToolResult messages @@ -121,14 +121,14 @@ class ThreadManager: else: logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}") else: - msg["content"] = self._safe_truncate(msg["content"]) + msg["content"] = self._safe_truncate(msg["content"], int(max_tokens * 2)) return messages - def _compress_user_messages(self, messages: List[Dict[str, Any]], llm_model: str, llm_max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]: + def _compress_user_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]: """Compress the user messages except the most recent one.""" uncompressed_total_token_count = token_counter(model=llm_model, messages=messages) - if uncompressed_total_token_count > (llm_max_tokens or (100 * 1000)): + if uncompressed_total_token_count > (max_tokens or (100 * 1000)): _i = 0 # Count the number of User messages for msg in reversed(messages): # Start from the end and work backwards if msg.get('role') == 'user': # Only compress User messages @@ -142,13 +142,13 @@ class ThreadManager: else: logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}") else: - msg["content"] = self._safe_truncate(msg["content"]) + msg["content"] = self._safe_truncate(msg["content"], int(max_tokens * 2)) return messages - def _compress_assistant_messages(self, messages: List[Dict[str, Any]], llm_model: str, llm_max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]: + def _compress_assistant_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]: """Compress the assistant messages except the most recent one.""" uncompressed_total_token_count = token_counter(model=llm_model, messages=messages) - if uncompressed_total_token_count > (llm_max_tokens or (100 * 1000)): + if uncompressed_total_token_count > (max_tokens or (100 * 1000)): _i = 0 # Count the number of Assistant messages for msg in reversed(messages): # Start from the end and work backwards if msg.get('role') == 'assistant': # Only compress Assistant messages @@ -162,14 +162,26 @@ class ThreadManager: else: logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}") else: - msg["content"] = self._safe_truncate(msg["content"]) + msg["content"] = self._safe_truncate(msg["content"], int(max_tokens * 2)) return messages - def _compress_messages(self, messages: List[Dict[str, Any]], llm_model: str, llm_max_tokens: Optional[int], token_threshold: Optional[int] = 4096, max_iterations: int = 5) -> List[Dict[str, Any]]: + def _compress_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int] = 41000, token_threshold: Optional[int] = 4096, max_iterations: int = 5) -> List[Dict[str, Any]]: """Compress the messages. token_threshold: must be a power of 2 """ + + if 'sonnet' in llm_model.lower(): + max_tokens = 200 * 1000 - 64000 + elif 'gpt' in llm_model.lower(): + max_tokens = 128 * 1000 - 28000 + elif 'gemini' in llm_model.lower(): + max_tokens = 1000 * 1000 - 300000 + elif 'deepseek' in llm_model.lower(): + max_tokens = 163 * 1000 - 32000 + else: + max_tokens = 41 * 1000 - 10000 + if max_iterations <= 0: logger.warning(f"_compress_messages: Max iterations reached, returning uncompressed messages") return messages @@ -178,17 +190,17 @@ class ThreadManager: uncompressed_total_token_count = token_counter(model=llm_model, messages=messages) - result = self._compress_tool_result_messages(result, llm_model, llm_max_tokens, token_threshold) - result = self._compress_user_messages(result, llm_model, llm_max_tokens, token_threshold) - result = self._compress_assistant_messages(result, llm_model, llm_max_tokens, token_threshold) + result = self._compress_tool_result_messages(result, llm_model, max_tokens, token_threshold) + result = self._compress_user_messages(result, llm_model, max_tokens, token_threshold) + result = self._compress_assistant_messages(result, llm_model, max_tokens, token_threshold) compressed_token_count = token_counter(model=llm_model, messages=result) logger.info(f"_compress_messages: {uncompressed_total_token_count} -> {compressed_token_count}") # Log the token compression for debugging later - if (compressed_token_count > llm_max_tokens): - logger.warning(f"Further token compression is needed: {compressed_token_count} > {llm_max_tokens}") - result = self._compress_messages(messages, llm_model, llm_max_tokens, int(token_threshold / 2), max_iterations - 1) + if (compressed_token_count > max_tokens): + logger.warning(f"Further token compression is needed: {compressed_token_count} > {max_tokens}") + result = self._compress_messages(messages, llm_model, max_tokens, int(token_threshold / 2), max_iterations - 1) return result @@ -460,7 +472,7 @@ Here are the XML tools available with examples: openapi_tool_schemas = self.tool_registry.get_openapi_schemas() logger.debug(f"Retrieved {len(openapi_tool_schemas) if openapi_tool_schemas else 0} OpenAPI tool schemas") - prepared_messages = self._compress_messages(prepared_messages, llm_model, llm_max_tokens) + prepared_messages = self._compress_messages(prepared_messages, llm_model) # 5. Make LLM API call logger.debug("Making LLM API call")