This commit is contained in:
marko-kraemer 2025-09-29 13:07:05 +02:00
parent 6a114b1ef4
commit a1f4b17ad2
1 changed files with 34 additions and 36 deletions

View File

@ -90,12 +90,6 @@ def _configure_token_limits(params: Dict[str, Any], model_name: str, max_tokens:
# logger.debug(f"No max_tokens specified, using provider defaults for model: {model_name}") # logger.debug(f"No max_tokens specified, using provider defaults for model: {model_name}")
return return
if model_name.startswith("bedrock/") and "claude-3-7" in model_name:
# For Claude 3.7 in Bedrock, do not set max_tokens or max_tokens_to_sample
# as it causes errors with inference profiles
# logger.debug(f"Skipping max_tokens for Claude 3.7 model: {model_name}")
return
is_openai_o_series = 'o1' in model_name is_openai_o_series = 'o1' in model_name
is_openai_gpt5 = 'gpt-5' in model_name is_openai_gpt5 = 'gpt-5' in model_name
param_name = "max_completion_tokens" if (is_openai_o_series or is_openai_gpt5) else "max_tokens" param_name = "max_completion_tokens" if (is_openai_o_series or is_openai_gpt5) else "max_tokens"
@ -132,6 +126,38 @@ def _configure_openrouter(params: Dict[str, Any], model_name: str) -> None:
params["extra_headers"] = extra_headers params["extra_headers"] = extra_headers
# logger.debug(f"Added OpenRouter site URL and app name to headers") # logger.debug(f"Added OpenRouter site URL and app name to headers")
def _configure_bedrock(params: Dict[str, Any], resolved_model_name: str, model_id: Optional[str]) -> None:
"""Configure Bedrock-specific parameters including inference profile ARNs."""
if not resolved_model_name.startswith("bedrock/"):
return
# Set inference profile ARNs if available and not already provided
from core.ai_models import model_manager
model_obj = model_manager.get_model(resolved_model_name)
if model_obj and not model_id: # Only set if not already provided
if "claude-sonnet-4-20250514-v1:0" in resolved_model_name:
params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-sonnet-4-20250514-v1:0"
# logger.debug(f"Set Bedrock inference profile ARN for Claude Sonnet 4")
elif "claude-3-7-sonnet-20250219-v1:0" in resolved_model_name:
params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
# logger.debug(f"Set Bedrock inference profile ARN for Claude 3.7 Sonnet")
def _configure_openai_compatible(params: Dict[str, Any], model_name: str, api_key: Optional[str], api_base: Optional[str]) -> None:
"""Configure OpenAI-compatible provider setup."""
if not model_name.startswith("openai-compatible/"):
return
# Check if have required config either from parameters or environment
if (not api_key and not config.OPENAI_COMPATIBLE_API_KEY) or (
not api_base and not config.OPENAI_COMPATIBLE_API_BASE
):
raise LLMError(
"OPENAI_COMPATIBLE_API_KEY and OPENAI_COMPATIBLE_API_BASE is required for openai-compatible models. If just updated the environment variables, wait a few minutes or restart the service to ensure they are loaded."
)
setup_provider_router(api_key, api_base)
logger.debug(f"Configured OpenAI-compatible provider with custom API base")
def _configure_thinking(params: Dict[str, Any], model_name: str) -> None: def _configure_thinking(params: Dict[str, Any], model_name: str) -> None:
"""Configure reasoning/thinking parameters automatically based on model capabilities.""" """Configure reasoning/thinking parameters automatically based on model capabilities."""
# Check if model supports thinking/reasoning # Check if model supports thinking/reasoning
@ -194,11 +220,8 @@ def prepare_params(
"num_retries": MAX_RETRIES, "num_retries": MAX_RETRIES,
} }
# Enable usage tracking for streaming requests
if stream: if stream:
params["stream_options"] = {"include_usage": True} params["stream_options"] = {"include_usage": True}
# logger.debug(f"Added stream_options for usage tracking: {params['stream_options']}")
if api_key: if api_key:
params["api_key"] = api_key params["api_key"] = api_key
if api_base: if api_base:
@ -206,36 +229,12 @@ def prepare_params(
if model_id: if model_id:
params["model_id"] = model_id params["model_id"] = model_id
# For Bedrock models, set inference profile ARNs if available _configure_bedrock(params, resolved_model_name, model_id)
if resolved_model_name.startswith("bedrock/"): _configure_openai_compatible(params, model_name, api_key, api_base)
from core.ai_models import model_manager
model_obj = model_manager.get_model(resolved_model_name)
if model_obj and not model_id: # Only set if not already provided
if "claude-sonnet-4-20250514-v1:0" in resolved_model_name:
params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-sonnet-4-20250514-v1:0"
elif "claude-3-7-sonnet-20250219-v1:0" in resolved_model_name:
params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
if model_name.startswith("openai-compatible/"):
# Check if have required config either from parameters or environment
if (not api_key and not config.OPENAI_COMPATIBLE_API_KEY) or (
not api_base and not config.OPENAI_COMPATIBLE_API_BASE
):
raise LLMError(
"OPENAI_COMPATIBLE_API_KEY and OPENAI_COMPATIBLE_API_BASE is required for openai-compatible models. If just updated the environment variables, wait a few minutes or restart the service to ensure they are loaded."
)
setup_provider_router(api_key, api_base)
# Handle token limits
_configure_token_limits(params, resolved_model_name, max_tokens) _configure_token_limits(params, resolved_model_name, max_tokens)
# Add tools if provided
_add_tools_config(params, tools, tool_choice) _add_tools_config(params, tools, tool_choice)
# Add Anthropic-specific parameters
_configure_anthropic(params, resolved_model_name) _configure_anthropic(params, resolved_model_name)
# Add OpenRouter-specific parameters
_configure_openrouter(params, resolved_model_name) _configure_openrouter(params, resolved_model_name)
# _configure_thinking(params, resolved_model_name) # _configure_thinking(params, resolved_model_name)
return params return params
@ -311,7 +310,6 @@ async def make_llm_api_call(
ErrorProcessor.log_error(processed_error) ErrorProcessor.log_error(processed_error)
raise LLMError(processed_error.message) raise LLMError(processed_error.message)
async def _wrap_streaming_response(response) -> AsyncGenerator: async def _wrap_streaming_response(response) -> AsyncGenerator:
"""Wrap streaming response to handle errors during iteration.""" """Wrap streaming response to handle errors during iteration."""
try: try: