add reasoning setup for sonnet

2025-04-17 20:48:08 +01:00 · 2025-04-17 20:48:08 +01:00 · 460424dbe5
parent e752e4381b
commit 460424dbe5
3 changed files with 29 additions and 5 deletions
--- a/backend/agentpress/response_processor.py
+++ b/backend/agentpress/response_processor.py
@ -155,6 +155,10 @@ class ResponseProcessor:
                if hasattr(chunk, 'choices') and chunk.choices:
                    delta = chunk.choices[0].delta if hasattr(chunk.choices[0], 'delta') else None
                    
+                    # Check for and log Anthropic thinking content
+                    if delta and hasattr(delta, 'reasoning_content') and delta.reasoning_content:
+                        logger.info(f"[THINKING]: {delta.reasoning_content}")
+                    
                    # Process content chunk
                    if delta and hasattr(delta, 'content') and delta.content:
                        chunk_content = delta.content
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -1,6 +1,6 @@
 streamlit-quill==0.0.3
 python-dotenv==1.0.1
-litellm>=1.44.0
+litellm>=1.66.2
 click==8.1.7
 questionary==2.0.1
 requests>=2.31.0
--- a/backend/services/llm.py
+++ b/backend/services/llm.py
@ -28,7 +28,7 @@ RATE_LIMIT_DELAY = 30
 RETRY_DELAY = 5

 # Define debug log directory relative to this file's location
-DEBUG_LOG_DIR = os.path.join(os.path.dirname(__file__), '..', 'debug_logs') # Assumes backend/debug_logs
+DEBUG_LOG_DIR = os.path.join(os.path.dirname(__file__), 'debug_logs')

 class LLMError(Exception):
    """Base exception for LLM-related errors."""
@ -156,6 +156,22 @@ def prepare_params(
            params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
            logger.debug(f"Auto-set model_id for Claude 3.7 Sonnet: {params['model_id']}")

+    # --- Add Anthropic Thinking/Reasoning Effort ---
+    # Read environment variables for thinking/reasoning
+    enable_thinking_env = os.environ.get('ENABLE_THINKING', 'false').lower() == 'true'
+
+    # Check if the model is Anthropic
+    is_anthropic = "sonnet-3-7" in model_name.lower() or "anthropic" in model_name.lower()
+
+    # Add reasoning_effort parameter if enabled and applicable
+    if is_anthropic and enable_thinking_env:
+        reasoning_effort_env = os.environ.get('REASONING_EFFORT', 'low') # Default to 'low'
+        params["reasoning_effort"] = reasoning_effort_env
+        logger.info(f"Anthropic thinking enabled with reasoning_effort='{reasoning_effort_env}'")
+
+        # Anthropic requires temperature=1 when thinking/reasoning_effort is enabled
+        params["temperature"] = 1.0
+
    return params

 async def make_llm_api_call(
@ -272,15 +288,19 @@ async def make_llm_api_call(
    # Initialize log path to None, it will be set only if logging is enabled
    response_log_path = None
    enable_debug_logging = os.environ.get('ENABLE_LLM_DEBUG_LOGGING', 'false').lower() == 'true'
+    # enable_debug_logging = True

    if enable_debug_logging:
        try:
            os.makedirs(DEBUG_LOG_DIR, exist_ok=True)
+            # save the model name too 
+            model_name = params["model"]
+            
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            # Use a unique ID or counter if calls can happen in the same second
            # For simplicity, using timestamp only for now
-            request_log_path = os.path.join(DEBUG_LOG_DIR, f"llm_request_{timestamp}.json")
-            response_log_path = os.path.join(DEBUG_LOG_DIR, f"llm_response_{timestamp}.json") # Set here if enabled
+            request_log_path = os.path.join(DEBUG_LOG_DIR, f"llm_request_{timestamp}_{model_name}.json")
+            response_log_path = os.path.join(DEBUG_LOG_DIR, f"llm_response_{timestamp}_{model_name}.json") # Set here if enabled

            # Log the request parameters just before the attempt loop
            logger.debug(f"Logging LLM request parameters to {request_log_path}")
@ -300,7 +320,7 @@ async def make_llm_api_call(
    for attempt in range(MAX_RETRIES):
        try:
            logger.debug(f"Attempt {attempt + 1}/{MAX_RETRIES}")
-            
+            # print(params)
            response = await litellm.acompletion(**params)
            logger.debug(f"Successfully received API response from {model_name}")