mirror of https://github.com/kortix-ai/suna.git
Merge pull request #935 from tnfssc/chore/xai-fallbacks
This commit is contained in:
commit
5f8038490b
|
@ -160,10 +160,6 @@ def prepare_params(
|
|||
# "anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"
|
||||
"anthropic-beta": "output-128k-2025-02-19"
|
||||
}
|
||||
params["fallbacks"] = [{
|
||||
"model": "openrouter/anthropic/claude-sonnet-4",
|
||||
"messages": messages,
|
||||
}]
|
||||
# params["mock_testing_fallback"] = True
|
||||
logger.debug("Added Claude-specific headers")
|
||||
|
||||
|
@ -191,6 +187,14 @@ def prepare_params(
|
|||
params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
|
||||
logger.debug(f"Auto-set model_id for Claude 3.7 Sonnet: {params['model_id']}")
|
||||
|
||||
fallback_model = get_openrouter_fallback(model_name)
|
||||
if fallback_model:
|
||||
params["fallbacks"] = [{
|
||||
"model": fallback_model,
|
||||
"messages": messages,
|
||||
}]
|
||||
logger.debug(f"Added OpenRouter fallback for model: {model_name} to {fallback_model}")
|
||||
|
||||
# Apply Anthropic prompt caching (minimal implementation)
|
||||
# Check model name *after* potential modifications (like adding bedrock/ prefix)
|
||||
effective_model_name = params.get("model", model_name) # Use model from params if set, else original
|
||||
|
@ -320,27 +324,6 @@ async def make_llm_api_call(
|
|||
# logger.debug(f"Response: {response}")
|
||||
return response
|
||||
|
||||
except litellm.exceptions.InternalServerError as e:
|
||||
# Check if it's an Anthropic overloaded error
|
||||
if "Overloaded" in str(e) and "AnthropicException" in str(e):
|
||||
fallback_model = get_openrouter_fallback(model_name)
|
||||
if fallback_model and not params.get("model", "").startswith("openrouter/"):
|
||||
logger.warning(f"Anthropic overloaded, falling back to OpenRouter: {fallback_model}")
|
||||
params["model"] = fallback_model
|
||||
# Remove any model_id as it's specific to Bedrock
|
||||
params.pop("model_id", None)
|
||||
# Continue with next attempt using fallback model
|
||||
last_error = e
|
||||
await handle_error(e, attempt, MAX_RETRIES)
|
||||
else:
|
||||
# No fallback available or already using OpenRouter
|
||||
last_error = e
|
||||
await handle_error(e, attempt, MAX_RETRIES)
|
||||
else:
|
||||
# Other internal server errors
|
||||
last_error = e
|
||||
await handle_error(e, attempt, MAX_RETRIES)
|
||||
|
||||
except (litellm.exceptions.RateLimitError, OpenAIError, json.JSONDecodeError) as e:
|
||||
last_error = e
|
||||
await handle_error(e, attempt, MAX_RETRIES)
|
||||
|
|
|
@ -94,8 +94,8 @@ MODELS = {
|
|||
"tier_availability": ["paid"]
|
||||
},
|
||||
|
||||
"openrouter/x-ai/grok-4": {
|
||||
"aliases": ["grok-4"],
|
||||
"xai/grok-4": {
|
||||
"aliases": ["grok-4", "x-ai/grok-4"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 5.00,
|
||||
"output_cost_per_million_tokens": 15.00
|
||||
|
|
Loading…
Reference in New Issue