disable context manager

2025-04-18 07:13:44 +01:00 · 2025-04-18 07:13:44 +01:00 · 8669e40312
parent aa8c5c6d78
commit 8669e40312
3 changed files with 27 additions and 15 deletions
--- a/backend/agent/api.py
+++ b/backend/agent/api.py
@ -37,7 +37,8 @@ class AgentStartRequest(BaseModel):
    model_name: Optional[str] = "anthropic/claude-3-7-sonnet-latest"
    enable_thinking: Optional[bool] = False
    reasoning_effort: Optional[str] = 'low'
-    stream: Optional[bool] = False # Default stream to False for API
+    stream: Optional[bool] = True
+    enable_context_manager: Optional[bool] = False

 def initialize(
    _thread_manager: ThreadManager,
@ -256,7 +257,7 @@ async def start_agent(
    user_id: str = Depends(get_current_user_id)
 ):
    """Start an agent for a specific thread in the background."""
-    logger.info(f"Starting new agent for thread: {thread_id} with config: model={body.model_name}, thinking={body.enable_thinking}, effort={body.reasoning_effort}, stream={body.stream}")
+    logger.info(f"Starting new agent for thread: {thread_id} with config: model={body.model_name}, thinking={body.enable_thinking}, effort={body.reasoning_effort}, stream={body.stream}, context_manager={body.enable_context_manager}")
    client = await db.client
    
    # Verify user has access to this thread
@ -340,7 +341,8 @@ async def start_agent(
            model_name=MODEL_NAME_ALIASES.get(body.model_name, body.model_name), 
            enable_thinking=body.enable_thinking,
            reasoning_effort=body.reasoning_effort,
-            stream=body.stream # Pass stream parameter
+            stream=body.stream,
+            enable_context_manager=body.enable_context_manager
        )
    )
    
@ -477,10 +479,11 @@ async def run_agent_background(
    model_name: str,
    enable_thinking: Optional[bool],
    reasoning_effort: Optional[str],
-    stream: bool # Add stream parameter
+    stream: bool,
+    enable_context_manager: bool
 ):
    """Run the agent in the background and handle status updates."""
-    logger.debug(f"Starting background agent run: {agent_run_id} for thread: {thread_id} (instance: {instance_id}) with model={model_name}, thinking={enable_thinking}, effort={reasoning_effort}, stream={stream}")
+    logger.debug(f"Starting background agent run: {agent_run_id} for thread: {thread_id} (instance: {instance_id}) with model={model_name}, thinking={enable_thinking}, effort={reasoning_effort}, stream={stream}, context_manager={enable_context_manager}")
    client = await db.client
    
    # Tracking variables
@ -601,12 +604,13 @@ async def run_agent_background(
        agent_gen = run_agent(
            thread_id=thread_id,
            project_id=project_id,
-            stream=stream, # Pass stream parameter from API request
+            stream=stream,
            thread_manager=thread_manager,
            sandbox=sandbox,
-            model_name=model_name, # Pass model_name
-            enable_thinking=enable_thinking, # Pass enable_thinking
-            reasoning_effort=reasoning_effort # Pass reasoning_effort
+            model_name=model_name,
+            enable_thinking=enable_thinking,
+            reasoning_effort=reasoning_effort,
+            enable_context_manager=enable_context_manager
        )
        
        # Collect all responses to save to database
--- a/backend/agent/run.py
+++ b/backend/agent/run.py
@ -31,7 +31,8 @@ async def run_agent(
    max_iterations: int = 150,
    model_name: str = "anthropic/claude-3-7-sonnet-latest",
    enable_thinking: Optional[bool] = False,
-    reasoning_effort: Optional[str] = 'low'
+    reasoning_effort: Optional[str] = 'low',
+    enable_context_manager: bool = True
 ):
    """Run the development agent with specified configuration."""
    
@ -142,7 +143,8 @@ async def run_agent(
            native_max_auto_continues=native_max_auto_continues,
            include_xml_examples=True,
            enable_thinking=enable_thinking,
-            reasoning_effort=reasoning_effort
+            reasoning_effort=reasoning_effort,
+            enable_context_manager=enable_context_manager
        )
            
        if isinstance(response, dict) and "status" in response and response["status"] == "error":
@ -272,7 +274,8 @@ async def process_agent_response(
    stream: bool = True,
    model_name: str = "anthropic/claude-3-7-sonnet-latest",
    enable_thinking: Optional[bool] = False,
-    reasoning_effort: Optional[str] = 'low'
+    reasoning_effort: Optional[str] = 'low',
+    enable_context_manager: bool = True
 ):
    """Process the streaming response from the agent."""
    chunk_counter = 0
@ -293,7 +296,8 @@ async def process_agent_response(
        native_max_auto_continues=25,
        model_name=model_name,
        enable_thinking=enable_thinking,
-        reasoning_effort=reasoning_effort
+        reasoning_effort=reasoning_effort,
+        enable_context_manager=enable_context_manager
    ):
        chunk_counter += 1
        
--- a/backend/agentpress/thread_manager.py
+++ b/backend/agentpress/thread_manager.py
@ -163,7 +163,8 @@ class ThreadManager:
        max_xml_tool_calls: int = 0,
        include_xml_examples: bool = False,
        enable_thinking: Optional[bool] = False,
-        reasoning_effort: Optional[str] = 'low'
+        reasoning_effort: Optional[str] = 'low',
+        enable_context_manager: bool = True
    ) -> Union[Dict[str, Any], AsyncGenerator]:
        """Run a conversation thread with LLM integration and tool execution.
        
@ -183,6 +184,7 @@ class ThreadManager:
            include_xml_examples: Whether to include XML tool examples in the system prompt
            enable_thinking: Whether to enable thinking before making a decision
            reasoning_effort: The effort level for reasoning
+            enable_context_manager: Whether to enable automatic context summarization.
            
        Returns:
            An async generator yielding response chunks or error dict
@ -249,7 +251,7 @@ Here are the XML tools available with examples:
                    logger.info(f"Thread {thread_id} token count: {token_count}/{token_threshold} ({(token_count/token_threshold)*100:.1f}%)")
                    
                    # If we're over the threshold, summarize the thread
-                    if token_count >= token_threshold:
+                    if token_count >= token_threshold and enable_context_manager:
                        logger.info(f"Thread token count ({token_count}) exceeds threshold ({token_threshold}), summarizing...")
                        
                        # Create summary using context manager
@ -270,6 +272,8 @@ Here are the XML tools available with examples:
                            logger.info(f"After summarization: token count reduced from {token_count} to {new_token_count}")
                        else:
                            logger.warning("Summarization failed or wasn't needed - proceeding with original messages")
+                    else:
+                        logger.info("Automatic summarization disabled. Skipping token count check and summarization.")
                except Exception as e:
                    logger.error(f"Error counting tokens or summarizing: {str(e)}")