add reasoning setup for sonnet

This commit is contained in:
LE Quoc Dat 2025-04-17 20:48:08 +01:00
parent e752e4381b
commit 460424dbe5
3 changed files with 29 additions and 5 deletions

View File

@ -155,6 +155,10 @@ class ResponseProcessor:
if hasattr(chunk, 'choices') and chunk.choices:
delta = chunk.choices[0].delta if hasattr(chunk.choices[0], 'delta') else None
# Check for and log Anthropic thinking content
if delta and hasattr(delta, 'reasoning_content') and delta.reasoning_content:
logger.info(f"[THINKING]: {delta.reasoning_content}")
# Process content chunk
if delta and hasattr(delta, 'content') and delta.content:
chunk_content = delta.content

View File

@ -1,6 +1,6 @@
streamlit-quill==0.0.3
python-dotenv==1.0.1
litellm>=1.44.0
litellm>=1.66.2
click==8.1.7
questionary==2.0.1
requests>=2.31.0

View File

@ -28,7 +28,7 @@ RATE_LIMIT_DELAY = 30
RETRY_DELAY = 5
# Define debug log directory relative to this file's location
DEBUG_LOG_DIR = os.path.join(os.path.dirname(__file__), '..', 'debug_logs') # Assumes backend/debug_logs
DEBUG_LOG_DIR = os.path.join(os.path.dirname(__file__), 'debug_logs')
class LLMError(Exception):
"""Base exception for LLM-related errors."""
@ -156,6 +156,22 @@ def prepare_params(
params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
logger.debug(f"Auto-set model_id for Claude 3.7 Sonnet: {params['model_id']}")
# --- Add Anthropic Thinking/Reasoning Effort ---
# Read environment variables for thinking/reasoning
enable_thinking_env = os.environ.get('ENABLE_THINKING', 'false').lower() == 'true'
# Check if the model is Anthropic
is_anthropic = "sonnet-3-7" in model_name.lower() or "anthropic" in model_name.lower()
# Add reasoning_effort parameter if enabled and applicable
if is_anthropic and enable_thinking_env:
reasoning_effort_env = os.environ.get('REASONING_EFFORT', 'low') # Default to 'low'
params["reasoning_effort"] = reasoning_effort_env
logger.info(f"Anthropic thinking enabled with reasoning_effort='{reasoning_effort_env}'")
# Anthropic requires temperature=1 when thinking/reasoning_effort is enabled
params["temperature"] = 1.0
return params
async def make_llm_api_call(
@ -272,15 +288,19 @@ async def make_llm_api_call(
# Initialize log path to None, it will be set only if logging is enabled
response_log_path = None
enable_debug_logging = os.environ.get('ENABLE_LLM_DEBUG_LOGGING', 'false').lower() == 'true'
# enable_debug_logging = True
if enable_debug_logging:
try:
os.makedirs(DEBUG_LOG_DIR, exist_ok=True)
# save the model name too
model_name = params["model"]
timestamp = time.strftime("%Y%m%d_%H%M%S")
# Use a unique ID or counter if calls can happen in the same second
# For simplicity, using timestamp only for now
request_log_path = os.path.join(DEBUG_LOG_DIR, f"llm_request_{timestamp}.json")
response_log_path = os.path.join(DEBUG_LOG_DIR, f"llm_response_{timestamp}.json") # Set here if enabled
request_log_path = os.path.join(DEBUG_LOG_DIR, f"llm_request_{timestamp}_{model_name}.json")
response_log_path = os.path.join(DEBUG_LOG_DIR, f"llm_response_{timestamp}_{model_name}.json") # Set here if enabled
# Log the request parameters just before the attempt loop
logger.debug(f"Logging LLM request parameters to {request_log_path}")
@ -300,7 +320,7 @@ async def make_llm_api_call(
for attempt in range(MAX_RETRIES):
try:
logger.debug(f"Attempt {attempt + 1}/{MAX_RETRIES}")
# print(params)
response = await litellm.acompletion(**params)
logger.debug(f"Successfully received API response from {model_name}")