Merge pull request #935 from tnfssc/chore/xai-fallbacks

This commit is contained in:
Sharath 2025-07-11 20:49:34 +05:30 committed by GitHub
commit 5f8038490b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 10 additions and 27 deletions

View File

@ -160,10 +160,6 @@ def prepare_params(
# "anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"
"anthropic-beta": "output-128k-2025-02-19"
}
params["fallbacks"] = [{
"model": "openrouter/anthropic/claude-sonnet-4",
"messages": messages,
}]
# params["mock_testing_fallback"] = True
logger.debug("Added Claude-specific headers")
@ -191,6 +187,14 @@ def prepare_params(
params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
logger.debug(f"Auto-set model_id for Claude 3.7 Sonnet: {params['model_id']}")
fallback_model = get_openrouter_fallback(model_name)
if fallback_model:
params["fallbacks"] = [{
"model": fallback_model,
"messages": messages,
}]
logger.debug(f"Added OpenRouter fallback for model: {model_name} to {fallback_model}")
# Apply Anthropic prompt caching (minimal implementation)
# Check model name *after* potential modifications (like adding bedrock/ prefix)
effective_model_name = params.get("model", model_name) # Use model from params if set, else original
@ -320,27 +324,6 @@ async def make_llm_api_call(
# logger.debug(f"Response: {response}")
return response
except litellm.exceptions.InternalServerError as e:
# Check if it's an Anthropic overloaded error
if "Overloaded" in str(e) and "AnthropicException" in str(e):
fallback_model = get_openrouter_fallback(model_name)
if fallback_model and not params.get("model", "").startswith("openrouter/"):
logger.warning(f"Anthropic overloaded, falling back to OpenRouter: {fallback_model}")
params["model"] = fallback_model
# Remove any model_id as it's specific to Bedrock
params.pop("model_id", None)
# Continue with next attempt using fallback model
last_error = e
await handle_error(e, attempt, MAX_RETRIES)
else:
# No fallback available or already using OpenRouter
last_error = e
await handle_error(e, attempt, MAX_RETRIES)
else:
# Other internal server errors
last_error = e
await handle_error(e, attempt, MAX_RETRIES)
except (litellm.exceptions.RateLimitError, OpenAIError, json.JSONDecodeError) as e:
last_error = e
await handle_error(e, attempt, MAX_RETRIES)

View File

@ -94,8 +94,8 @@ MODELS = {
"tier_availability": ["paid"]
},
"openrouter/x-ai/grok-4": {
"aliases": ["grok-4"],
"xai/grok-4": {
"aliases": ["grok-4", "x-ai/grok-4"],
"pricing": {
"input_cost_per_million_tokens": 5.00,
"output_cost_per_million_tokens": 15.00