mirror of https://github.com/kortix-ai/suna.git
model fallback, add xai grok 4
This commit is contained in:
parent
f08b37a96d
commit
957a2c9cbf
|
@ -13,8 +13,8 @@ from utils.config import config, EnvMode
|
|||
from services.supabase import DBConnection
|
||||
from utils.auth_utils import get_current_user_id_from_jwt
|
||||
from pydantic import BaseModel
|
||||
from utils.constants import MODEL_ACCESS_TIERS, MODEL_NAME_ALIASES
|
||||
from litellm import cost_per_token
|
||||
from utils.constants import MODEL_ACCESS_TIERS, MODEL_NAME_ALIASES, HARDCODED_MODEL_PRICES
|
||||
from litellm.cost_calculator import cost_per_token
|
||||
import time
|
||||
|
||||
# Initialize Stripe
|
||||
|
@ -26,46 +26,6 @@ TOKEN_PRICE_MULTIPLIER = 1.5
|
|||
# Initialize router
|
||||
router = APIRouter(prefix="/billing", tags=["billing"])
|
||||
|
||||
# Hardcoded pricing for specific models (prices per million tokens)
|
||||
HARDCODED_MODEL_PRICES = {
|
||||
"openrouter/deepseek/deepseek-chat": {
|
||||
"input_cost_per_million_tokens": 0.38,
|
||||
"output_cost_per_million_tokens": 0.89
|
||||
},
|
||||
"deepseek/deepseek-chat": {
|
||||
"input_cost_per_million_tokens": 0.38,
|
||||
"output_cost_per_million_tokens": 0.89
|
||||
},
|
||||
"qwen/qwen3-235b-a22b": {
|
||||
"input_cost_per_million_tokens": 0.13,
|
||||
"output_cost_per_million_tokens": 0.60
|
||||
},
|
||||
"openrouter/qwen/qwen3-235b-a22b": {
|
||||
"input_cost_per_million_tokens": 0.13,
|
||||
"output_cost_per_million_tokens": 0.60
|
||||
},
|
||||
"google/gemini-2.5-flash-preview-05-20": {
|
||||
"input_cost_per_million_tokens": 0.15,
|
||||
"output_cost_per_million_tokens": 0.60
|
||||
},
|
||||
"openrouter/google/gemini-2.5-flash-preview-05-20": {
|
||||
"input_cost_per_million_tokens": 0.15,
|
||||
"output_cost_per_million_tokens": 0.60
|
||||
},
|
||||
"anthropic/claude-sonnet-4": {
|
||||
"input_cost_per_million_tokens": 3.00,
|
||||
"output_cost_per_million_tokens": 15.00,
|
||||
},
|
||||
"google/gemini-2.5-pro": {
|
||||
"input_cost_per_million_tokens": 1.25,
|
||||
"output_cost_per_million_tokens": 10.00,
|
||||
},
|
||||
"openrouter/google/gemini-2.5-pro": {
|
||||
"input_cost_per_million_tokens": 1.25,
|
||||
"output_cost_per_million_tokens": 10.00,
|
||||
},
|
||||
}
|
||||
|
||||
def get_model_pricing(model: str) -> tuple[float, float] | None:
|
||||
"""
|
||||
Get pricing for a model. Returns (input_cost_per_million, output_cost_per_million) or None.
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
LLM API interface for making calls to various language models.
|
||||
|
||||
This module provides a unified interface for making API calls to different LLM providers
|
||||
(OpenAI, Anthropic, Groq, etc.) using LiteLLM. It includes support for:
|
||||
(OpenAI, Anthropic, Groq, xAI, etc.) using LiteLLM. It includes support for:
|
||||
- Streaming responses
|
||||
- Tool calls and function calling
|
||||
- Retry logic with exponential backoff
|
||||
|
@ -16,6 +16,7 @@ import json
|
|||
import asyncio
|
||||
from openai import OpenAIError
|
||||
import litellm
|
||||
from litellm.files.main import ModelResponse
|
||||
from utils.logger import logger
|
||||
from utils.config import config
|
||||
|
||||
|
@ -37,7 +38,7 @@ class LLMRetryError(LLMError):
|
|||
|
||||
def setup_api_keys() -> None:
|
||||
"""Set up API keys from environment variables."""
|
||||
providers = ['OPENAI', 'ANTHROPIC', 'GROQ', 'OPENROUTER']
|
||||
providers = ['OPENAI', 'ANTHROPIC', 'GROQ', 'OPENROUTER', 'XAI']
|
||||
for provider in providers:
|
||||
key = getattr(config, f'{provider}_API_KEY')
|
||||
if key:
|
||||
|
@ -64,6 +65,36 @@ def setup_api_keys() -> None:
|
|||
else:
|
||||
logger.warning(f"Missing AWS credentials for Bedrock integration - access_key: {bool(aws_access_key)}, secret_key: {bool(aws_secret_key)}, region: {aws_region}")
|
||||
|
||||
def get_openrouter_fallback(model_name: str) -> Optional[str]:
|
||||
"""Get OpenRouter fallback model for a given model name."""
|
||||
# Skip if already using OpenRouter
|
||||
if model_name.startswith("openrouter/"):
|
||||
return None
|
||||
|
||||
# Map models to their OpenRouter equivalents
|
||||
fallback_mapping = {
|
||||
"anthropic/claude-3-7-sonnet-latest": "openrouter/anthropic/claude-3.7-sonnet",
|
||||
"anthropic/claude-sonnet-4-20250514": "openrouter/anthropic/claude-sonnet-4",
|
||||
"xai/grok-4": "openrouter/x-ai/grok-4",
|
||||
}
|
||||
|
||||
# Check for exact match first
|
||||
if model_name in fallback_mapping:
|
||||
return fallback_mapping[model_name]
|
||||
|
||||
# Check for partial matches (e.g., bedrock models)
|
||||
for key, value in fallback_mapping.items():
|
||||
if key in model_name:
|
||||
return value
|
||||
|
||||
# Default fallbacks by provider
|
||||
if "claude" in model_name.lower() or "anthropic" in model_name.lower():
|
||||
return "openrouter/anthropic/claude-sonnet-4"
|
||||
elif "xai" in model_name.lower() or "grok" in model_name.lower():
|
||||
return "openrouter/x-ai/grok-4"
|
||||
|
||||
return None
|
||||
|
||||
async def handle_error(error: Exception, attempt: int, max_attempts: int) -> None:
|
||||
"""Handle API errors with appropriate delays and logging."""
|
||||
delay = RATE_LIMIT_DELAY if isinstance(error, litellm.exceptions.RateLimitError) else RETRY_DELAY
|
||||
|
@ -196,6 +227,7 @@ def prepare_params(
|
|||
# Add reasoning_effort for Anthropic models if enabled
|
||||
use_thinking = enable_thinking if enable_thinking is not None else False
|
||||
is_anthropic = "anthropic" in effective_model_name.lower() or "claude" in effective_model_name.lower()
|
||||
is_xai = "xai" in effective_model_name.lower() or model_name.startswith("xai/")
|
||||
|
||||
if is_anthropic and use_thinking:
|
||||
effort_level = reasoning_effort if reasoning_effort else 'low'
|
||||
|
@ -203,6 +235,17 @@ def prepare_params(
|
|||
params["temperature"] = 1.0 # Required by Anthropic when reasoning_effort is used
|
||||
logger.info(f"Anthropic thinking enabled with reasoning_effort='{effort_level}'")
|
||||
|
||||
# Add reasoning_effort for xAI models if enabled
|
||||
if is_xai and use_thinking:
|
||||
effort_level = reasoning_effort if reasoning_effort else 'low'
|
||||
params["reasoning_effort"] = effort_level
|
||||
logger.info(f"xAI thinking enabled with reasoning_effort='{effort_level}'")
|
||||
|
||||
# Add xAI-specific parameters
|
||||
if model_name.startswith("xai/"):
|
||||
logger.debug(f"Preparing xAI parameters for model: {model_name}")
|
||||
# xAI models support standard parameters, no special handling needed beyond reasoning_effort
|
||||
|
||||
return params
|
||||
|
||||
async def make_llm_api_call(
|
||||
|
@ -220,7 +263,7 @@ async def make_llm_api_call(
|
|||
model_id: Optional[str] = None,
|
||||
enable_thinking: Optional[bool] = False,
|
||||
reasoning_effort: Optional[str] = 'low'
|
||||
) -> Union[Dict[str, Any], AsyncGenerator]:
|
||||
) -> Union[Dict[str, Any], AsyncGenerator, ModelResponse]:
|
||||
"""
|
||||
Make an API call to a language model using LiteLLM.
|
||||
|
||||
|
@ -277,6 +320,27 @@ async def make_llm_api_call(
|
|||
# logger.debug(f"Response: {response}")
|
||||
return response
|
||||
|
||||
except litellm.exceptions.InternalServerError as e:
|
||||
# Check if it's an Anthropic overloaded error
|
||||
if "Overloaded" in str(e) and "AnthropicException" in str(e):
|
||||
fallback_model = get_openrouter_fallback(model_name)
|
||||
if fallback_model and not params.get("model", "").startswith("openrouter/"):
|
||||
logger.warning(f"Anthropic overloaded, falling back to OpenRouter: {fallback_model}")
|
||||
params["model"] = fallback_model
|
||||
# Remove any model_id as it's specific to Bedrock
|
||||
params.pop("model_id", None)
|
||||
# Continue with next attempt using fallback model
|
||||
last_error = e
|
||||
await handle_error(e, attempt, MAX_RETRIES)
|
||||
else:
|
||||
# No fallback available or already using OpenRouter
|
||||
last_error = e
|
||||
await handle_error(e, attempt, MAX_RETRIES)
|
||||
else:
|
||||
# Other internal server errors
|
||||
last_error = e
|
||||
await handle_error(e, attempt, MAX_RETRIES)
|
||||
|
||||
except (litellm.exceptions.RateLimitError, OpenAIError, json.JSONDecodeError) as e:
|
||||
last_error = e
|
||||
await handle_error(e, attempt, MAX_RETRIES)
|
||||
|
|
|
@ -174,6 +174,7 @@ class Configuration:
|
|||
OPENAI_API_KEY: Optional[str] = None
|
||||
GROQ_API_KEY: Optional[str] = None
|
||||
OPENROUTER_API_KEY: Optional[str] = None
|
||||
XAI_API_KEY: Optional[str] = None
|
||||
OPENROUTER_API_BASE: Optional[str] = "https://openrouter.ai/api/v1"
|
||||
OR_SITE_URL: Optional[str] = "https://kortix.ai"
|
||||
OR_APP_NAME: Optional[str] = "Kortix AI"
|
||||
|
|
|
@ -1,181 +1,165 @@
|
|||
MODEL_ACCESS_TIERS = {
|
||||
"free": [
|
||||
"openrouter/deepseek/deepseek-chat",
|
||||
"openrouter/qwen/qwen3-235b-a22b",
|
||||
"openrouter/google/gemini-2.5-flash-preview-05-20",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
],
|
||||
"tier_2_20": [
|
||||
"openrouter/deepseek/deepseek-chat",
|
||||
# "xai/grok-3-mini-fast-beta",
|
||||
"openai/gpt-4o",
|
||||
# "openai/gpt-4-turbo",
|
||||
# "xai/grok-3-fast-latest",
|
||||
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
|
||||
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
|
||||
"anthropic/claude-3-5-haiku-latest",
|
||||
# "openai/gpt-4",
|
||||
"anthropic/claude-3-7-sonnet-latest",
|
||||
"anthropic/claude-3-5-sonnet-latest",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
"openai/gpt-4.1",
|
||||
"openai/gpt-4.1-mini",
|
||||
"openrouter/deepseek/deepseek-chat-v3-0324",
|
||||
# "openrouter/deepseek/deepseek-r1",
|
||||
"openrouter/qwen/qwen3-235b-a22b",
|
||||
],
|
||||
"tier_6_50": [
|
||||
"openrouter/deepseek/deepseek-chat",
|
||||
# "xai/grok-3-mini-fast-beta",
|
||||
"openai/gpt-4o",
|
||||
"openai/gpt-4.1",
|
||||
"openai/gpt-4.1-mini",
|
||||
"anthropic/claude-3-5-haiku-latest",
|
||||
# "openai/gpt-4-turbo",
|
||||
# "xai/grok-3-fast-latest",
|
||||
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
|
||||
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
|
||||
# "openai/gpt-4",
|
||||
"anthropic/claude-3-7-sonnet-latest",
|
||||
"anthropic/claude-3-5-sonnet-latest",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
"openai/gpt-4.1",
|
||||
"openai/gpt-4.1-mini",
|
||||
"openrouter/deepseek/deepseek-chat-v3-0324",
|
||||
# "openrouter/deepseek/deepseek-r1",
|
||||
"openrouter/qwen/qwen3-235b-a22b",
|
||||
],
|
||||
"tier_12_100": [
|
||||
"openrouter/deepseek/deepseek-chat",
|
||||
# "xai/grok-3-mini-fast-beta",
|
||||
"openai/gpt-4o",
|
||||
# Master model configuration - single source of truth
|
||||
MODELS = {
|
||||
# Free tier models
|
||||
|
||||
"anthropic/claude-sonnet-4-20250514": {
|
||||
"aliases": ["claude-sonnet-4"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 3.00,
|
||||
"output_cost_per_million_tokens": 15.00
|
||||
},
|
||||
"tier_availability": ["free", "paid"]
|
||||
},
|
||||
|
||||
# "openai/gpt-4-turbo",
|
||||
# "xai/grok-3-fast-latest",
|
||||
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
|
||||
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
|
||||
"openrouter/deepseek/deepseek-chat-v3-0324",
|
||||
# "openai/gpt-4",
|
||||
"anthropic/claude-3-7-sonnet-latest",
|
||||
"anthropic/claude-3-5-sonnet-latest",
|
||||
"anthropic/claude-3-5-haiku-latest",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
"openai/gpt-4.1",
|
||||
"openai/gpt-4.1-mini",
|
||||
# "openrouter/deepseek/deepseek-r1",
|
||||
"openrouter/qwen/qwen3-235b-a22b",
|
||||
],
|
||||
"tier_25_200": [
|
||||
"openrouter/deepseek/deepseek-chat",
|
||||
# "xai/grok-3-mini-fast-beta",
|
||||
"openai/gpt-4o",
|
||||
# "openai/gpt-4-turbo",
|
||||
# "xai/grok-3-fast-latest",
|
||||
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
|
||||
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
|
||||
"openrouter/deepseek/deepseek-chat-v3-0324",
|
||||
# "openai/gpt-4",
|
||||
"anthropic/claude-3-7-sonnet-latest",
|
||||
"anthropic/claude-3-5-sonnet-latest",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
"anthropic/claude-3-5-haiku-latest",
|
||||
"openai/gpt-4.1",
|
||||
"openai/gpt-4.1-mini",
|
||||
# "openrouter/deepseek/deepseek-r1",
|
||||
"openrouter/qwen/qwen3-235b-a22b",
|
||||
],
|
||||
"tier_50_400": [
|
||||
"openrouter/deepseek/deepseek-chat",
|
||||
# "xai/grok-3-mini-fast-beta",
|
||||
"openai/gpt-4o",
|
||||
# "openai/gpt-4-turbo",
|
||||
# "xai/grok-3-fast-latest",
|
||||
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
|
||||
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
|
||||
# "openai/gpt-4",
|
||||
"anthropic/claude-3-7-sonnet-latest",
|
||||
"anthropic/claude-3-5-sonnet-latest",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
"anthropic/claude-3-5-haiku-latest",
|
||||
"openrouter/deepseek/deepseek-chat-v3-0324",
|
||||
"openai/gpt-4.1",
|
||||
"openai/gpt-4.1-mini",
|
||||
# "openrouter/deepseek/deepseek-r1",
|
||||
"openrouter/qwen/qwen3-235b-a22b",
|
||||
],
|
||||
"tier_125_800": [
|
||||
"openrouter/deepseek/deepseek-chat",
|
||||
# "xai/grok-3-mini-fast-beta",
|
||||
"openai/gpt-4o",
|
||||
# "openai/gpt-4-turbo",
|
||||
# "xai/grok-3-fast-latest",
|
||||
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
|
||||
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
|
||||
# "openai/gpt-4",
|
||||
"anthropic/claude-3-7-sonnet-latest",
|
||||
"anthropic/claude-3-5-sonnet-latest",
|
||||
"anthropic/claude-3-5-haiku-latest",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
"openrouter/deepseek/deepseek-chat-v3-0324",
|
||||
"openai/gpt-4.1",
|
||||
"openai/gpt-4.1-mini",
|
||||
# "openrouter/deepseek/deepseek-r1",
|
||||
"openrouter/qwen/qwen3-235b-a22b",
|
||||
],
|
||||
"tier_200_1000": [
|
||||
"openrouter/deepseek/deepseek-chat",
|
||||
# "xai/grok-3-mini-fast-beta",
|
||||
"openai/gpt-4o",
|
||||
# "openai/gpt-4-turbo",
|
||||
# "xai/grok-3-fast-latest",
|
||||
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
|
||||
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
|
||||
# "openai/gpt-4",
|
||||
"anthropic/claude-3-7-sonnet-latest",
|
||||
"anthropic/claude-3-5-sonnet-latest",
|
||||
"anthropic/claude-3-5-haiku-latest",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
"openrouter/deepseek/deepseek-chat-v3-0324",
|
||||
"openai/gpt-4.1",
|
||||
"openai/gpt-4.1-mini",
|
||||
# "openrouter/deepseek/deepseek-r1",
|
||||
"openrouter/qwen/qwen3-235b-a22b",
|
||||
],
|
||||
"openrouter/deepseek/deepseek-chat": {
|
||||
"aliases": ["deepseek"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 0.38,
|
||||
"output_cost_per_million_tokens": 0.89
|
||||
},
|
||||
"tier_availability": ["free", "paid"]
|
||||
},
|
||||
"openrouter/qwen/qwen3-235b-a22b": {
|
||||
"aliases": ["qwen3"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 0.13,
|
||||
"output_cost_per_million_tokens": 0.60
|
||||
},
|
||||
"tier_availability": ["free", "paid"]
|
||||
},
|
||||
"openrouter/google/gemini-2.5-flash-preview-05-20": {
|
||||
"aliases": ["gemini-flash-2.5"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 0.15,
|
||||
"output_cost_per_million_tokens": 0.60
|
||||
},
|
||||
"tier_availability": ["free", "paid"]
|
||||
},
|
||||
|
||||
# Paid tier only models
|
||||
"openrouter/deepseek/deepseek-chat-v3-0324": {
|
||||
"aliases": ["deepseek/deepseek-chat-v3-0324"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 0.38,
|
||||
"output_cost_per_million_tokens": 0.89
|
||||
},
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
"openrouter/google/gemini-2.5-pro": {
|
||||
"aliases": ["google/gemini-2.5-pro"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 1.25,
|
||||
"output_cost_per_million_tokens": 10.00
|
||||
},
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
"openai/gpt-4o": {
|
||||
"aliases": ["gpt-4o"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 2.50,
|
||||
"output_cost_per_million_tokens": 10.00
|
||||
},
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
"openai/gpt-4.1": {
|
||||
"aliases": ["gpt-4.1"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 15.00,
|
||||
"output_cost_per_million_tokens": 60.00
|
||||
},
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
"openai/gpt-4.1-mini": {
|
||||
"aliases": ["gpt-4.1-mini"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 1.50,
|
||||
"output_cost_per_million_tokens": 6.00
|
||||
},
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
"anthropic/claude-3-7-sonnet-latest": {
|
||||
"aliases": ["sonnet-3.7"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 3.00,
|
||||
"output_cost_per_million_tokens": 15.00
|
||||
},
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
"anthropic/claude-3-5-sonnet-latest": {
|
||||
"aliases": ["sonnet-3.5"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 3.00,
|
||||
"output_cost_per_million_tokens": 15.00
|
||||
},
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
|
||||
"openrouter/x-ai/grok-4": {
|
||||
"aliases": ["grok-4"],
|
||||
"pricing": {
|
||||
"input_cost_per_million_tokens": 5.00,
|
||||
"output_cost_per_million_tokens": 15.00
|
||||
},
|
||||
"tier_availability": ["paid"]
|
||||
},
|
||||
|
||||
}
|
||||
MODEL_NAME_ALIASES = {
|
||||
# Short names to full names
|
||||
"sonnet-3.7": "anthropic/claude-3-7-sonnet-latest",
|
||||
"sonnet-3.5": "anthropic/claude-3-5-sonnet-latest",
|
||||
"haiku-3.5": "anthropic/claude-3-5-haiku-latest",
|
||||
"claude-sonnet-4": "anthropic/claude-sonnet-4-20250514",
|
||||
# "gpt-4.1": "openai/gpt-4.1-2025-04-14", # Commented out in constants.py
|
||||
"gpt-4o": "openai/gpt-4o",
|
||||
"gpt-4.1": "openai/gpt-4.1",
|
||||
"gpt-4.1-mini": "openai/gpt-4.1-mini",
|
||||
# "gpt-4-turbo": "openai/gpt-4-turbo", # Commented out in constants.py
|
||||
# "gpt-4": "openai/gpt-4", # Commented out in constants.py
|
||||
# "gemini-flash-2.5": "openrouter/google/gemini-2.5-flash-preview", # Commented out in constants.py
|
||||
# "grok-3": "xai/grok-3-fast-latest", # Commented out in constants.py
|
||||
"deepseek": "openrouter/deepseek/deepseek-chat",
|
||||
# "deepseek-r1": "openrouter/deepseek/deepseek-r1",
|
||||
# "grok-3-mini": "xai/grok-3-mini-fast-beta", # Commented out in constants.py
|
||||
"qwen3": "openrouter/qwen/qwen3-235b-a22b", # Commented out in constants.py
|
||||
"gemini-flash-2.5": "openrouter/google/gemini-2.5-flash-preview-05-20",
|
||||
"gemini-2.5-flash:thinking": "openrouter/google/gemini-2.5-flash-preview-05-20:thinking",
|
||||
# "google/gemini-2.5-flash-preview":"openrouter/google/gemini-2.5-flash-preview",
|
||||
# "google/gemini-2.5-flash-preview:thinking":"openrouter/google/gemini-2.5-flash-preview:thinking",
|
||||
"google/gemini-2.5-pro": "openrouter/google/gemini-2.5-pro",
|
||||
"deepseek/deepseek-chat-v3-0324": "openrouter/deepseek/deepseek-chat-v3-0324",
|
||||
# Also include full names as keys to ensure they map to themselves
|
||||
# "anthropic/claude-3-7-sonnet-latest": "anthropic/claude-3-7-sonnet-latest",
|
||||
# "openai/gpt-4.1-2025-04-14": "openai/gpt-4.1-2025-04-14", # Commented out in constants.py
|
||||
# "openai/gpt-4o": "openai/gpt-4o",
|
||||
# "openai/gpt-4-turbo": "openai/gpt-4-turbo", # Commented out in constants.py
|
||||
# "openai/gpt-4": "openai/gpt-4", # Commented out in constants.py
|
||||
# "openrouter/google/gemini-2.5-flash-preview": "openrouter/google/gemini-2.5-flash-preview", # Commented out in constants.py
|
||||
# "xai/grok-3-fast-latest": "xai/grok-3-fast-latest", # Commented out in constants.py
|
||||
# "deepseek/deepseek-chat": "openrouter/deepseek/deepseek-chat",
|
||||
# "deepseek/deepseek-r1": "openrouter/deepseek/deepseek-r1",
|
||||
# "qwen/qwen3-235b-a22b": "openrouter/qwen/qwen3-235b-a22b",
|
||||
# "xai/grok-3-mini-fast-beta": "xai/grok-3-mini-fast-beta", # Commented out in constants.py
|
||||
|
||||
# Derived structures (auto-generated from MODELS)
|
||||
def _generate_model_structures():
|
||||
"""Generate all model structures from the master MODELS dictionary."""
|
||||
|
||||
# Generate tier lists
|
||||
free_models = []
|
||||
paid_models = []
|
||||
|
||||
# Generate aliases
|
||||
aliases = {}
|
||||
|
||||
# Generate pricing
|
||||
pricing = {}
|
||||
|
||||
for model_name, config in MODELS.items():
|
||||
# Add to tier lists
|
||||
if "free" in config["tier_availability"]:
|
||||
free_models.append(model_name)
|
||||
if "paid" in config["tier_availability"]:
|
||||
paid_models.append(model_name)
|
||||
|
||||
# Add aliases
|
||||
for alias in config["aliases"]:
|
||||
aliases[alias] = model_name
|
||||
|
||||
# Add pricing
|
||||
pricing[model_name] = config["pricing"]
|
||||
|
||||
# Also add pricing for legacy model name variations
|
||||
if model_name.startswith("openrouter/deepseek/"):
|
||||
legacy_name = model_name.replace("openrouter/", "")
|
||||
pricing[legacy_name] = config["pricing"]
|
||||
elif model_name.startswith("openrouter/qwen/"):
|
||||
legacy_name = model_name.replace("openrouter/", "")
|
||||
pricing[legacy_name] = config["pricing"]
|
||||
elif model_name.startswith("openrouter/google/"):
|
||||
legacy_name = model_name.replace("openrouter/", "")
|
||||
pricing[legacy_name] = config["pricing"]
|
||||
elif model_name.startswith("anthropic/"):
|
||||
# Add anthropic/claude-sonnet-4 alias for claude-sonnet-4-20250514
|
||||
if "claude-sonnet-4-20250514" in model_name:
|
||||
pricing["anthropic/claude-sonnet-4"] = config["pricing"]
|
||||
|
||||
return free_models, paid_models, aliases, pricing
|
||||
|
||||
# Generate all structures
|
||||
FREE_TIER_MODELS, PAID_TIER_MODELS, MODEL_NAME_ALIASES, HARDCODED_MODEL_PRICES = _generate_model_structures()
|
||||
|
||||
MODEL_ACCESS_TIERS = {
|
||||
"free": FREE_TIER_MODELS,
|
||||
"tier_2_20": PAID_TIER_MODELS,
|
||||
"tier_6_50": PAID_TIER_MODELS,
|
||||
"tier_12_100": PAID_TIER_MODELS,
|
||||
"tier_25_200": PAID_TIER_MODELS,
|
||||
"tier_50_400": PAID_TIER_MODELS,
|
||||
"tier_125_800": PAID_TIER_MODELS,
|
||||
"tier_200_1000": PAID_TIER_MODELS,
|
||||
}
|
||||
|
|
|
@ -28,139 +28,78 @@ export interface CustomModel {
|
|||
label: string;
|
||||
}
|
||||
|
||||
// SINGLE SOURCE OF TRUTH for all model data
|
||||
// SINGLE SOURCE OF TRUTH for all model data - aligned with backend constants
|
||||
export const MODELS = {
|
||||
// Premium high-priority models
|
||||
// Free tier models (available to all users)
|
||||
'claude-sonnet-4': {
|
||||
tier: 'free',
|
||||
priority: 100,
|
||||
recommended: true,
|
||||
lowQuality: false,
|
||||
description: 'Claude Sonnet 4 - Anthropic\'s latest and most advanced AI assistant'
|
||||
lowQuality: false
|
||||
},
|
||||
|
||||
'gemini-flash-2.5': {
|
||||
tier: 'free',
|
||||
priority: 70,
|
||||
recommended: false,
|
||||
lowQuality: false
|
||||
},
|
||||
'qwen3': {
|
||||
tier: 'free',
|
||||
priority: 60,
|
||||
recommended: false,
|
||||
lowQuality: false
|
||||
},
|
||||
|
||||
// Premium/Paid tier models (require subscription)
|
||||
'sonnet-3.7': {
|
||||
tier: 'premium',
|
||||
priority: 99,
|
||||
recommended: false,
|
||||
lowQuality: false
|
||||
},
|
||||
'grok-4': {
|
||||
tier: 'premium',
|
||||
priority: 98,
|
||||
recommended: false,
|
||||
lowQuality: false
|
||||
},
|
||||
'google/gemini-2.5-pro': {
|
||||
tier: 'premium',
|
||||
priority: 100,
|
||||
priority: 97,
|
||||
recommended: false,
|
||||
lowQuality: false,
|
||||
description: 'Gemini Pro 2.5 - Google\'s latest advanced model'
|
||||
},
|
||||
'sonnet-3.7': {
|
||||
tier: 'premium',
|
||||
priority: 95,
|
||||
recommended: false,
|
||||
lowQuality: false,
|
||||
description: 'Claude 3.7 - Anthropic\'s most powerful AI assistant'
|
||||
},
|
||||
'claude-sonnet-3.7-reasoning': {
|
||||
tier: 'premium',
|
||||
priority: 95,
|
||||
recommended: true,
|
||||
lowQuality: false,
|
||||
description: 'Claude 3.7 with enhanced reasoning capabilities'
|
||||
lowQuality: false
|
||||
},
|
||||
'gpt-4.1': {
|
||||
tier: 'premium',
|
||||
priority: 95,
|
||||
priority: 96,
|
||||
recommended: false,
|
||||
lowQuality: false,
|
||||
description: 'GPT-4.1 - OpenAI\'s most advanced model with enhanced reasoning'
|
||||
lowQuality: false
|
||||
},
|
||||
'claude-3.5': {
|
||||
tier: 'premium',
|
||||
priority: 90,
|
||||
recommended: true,
|
||||
lowQuality: false,
|
||||
description: 'Claude 3.5 - Anthropic\'s balanced model with solid capabilities'
|
||||
},
|
||||
'gemini-2.5-flash:thinking': {
|
||||
'sonnet-3.5': {
|
||||
tier: 'premium',
|
||||
priority: 90,
|
||||
recommended: false,
|
||||
lowQuality: false,
|
||||
description: 'Gemini Flash 2.5 - Google\'s fast, responsive AI model'
|
||||
lowQuality: false
|
||||
},
|
||||
'gpt-4o': {
|
||||
tier: 'premium',
|
||||
priority: 85,
|
||||
priority: 88,
|
||||
recommended: false,
|
||||
lowQuality: false,
|
||||
description: 'GPT-4o - Optimized for speed, reliability, and cost-effectiveness'
|
||||
lowQuality: false
|
||||
},
|
||||
'gpt-4-turbo': {
|
||||
'gemini-2.5-flash:thinking': {
|
||||
tier: 'premium',
|
||||
priority: 85,
|
||||
priority: 84,
|
||||
recommended: false,
|
||||
lowQuality: false,
|
||||
description: 'GPT-4 Turbo - OpenAI\'s powerful model with a great balance of performance and cost'
|
||||
},
|
||||
'gpt-4': {
|
||||
tier: 'premium',
|
||||
priority: 80,
|
||||
recommended: false,
|
||||
lowQuality: false,
|
||||
description: 'GPT-4 - OpenAI\'s highly capable model with advanced reasoning'
|
||||
lowQuality: false
|
||||
},
|
||||
'deepseek/deepseek-chat-v3-0324': {
|
||||
tier: 'premium',
|
||||
priority: 75,
|
||||
recommended: false,
|
||||
lowQuality: false,
|
||||
description: 'DeepSeek Chat - Advanced AI assistant with strong reasoning'
|
||||
lowQuality: false
|
||||
},
|
||||
|
||||
// Free tier models
|
||||
'deepseek-r1': {
|
||||
tier: 'free',
|
||||
priority: 60,
|
||||
recommended: false,
|
||||
lowQuality: false,
|
||||
description: 'DeepSeek R1 - Advanced model with enhanced reasoning and coding capabilities'
|
||||
},
|
||||
'deepseek': {
|
||||
tier: 'free',
|
||||
priority: 50,
|
||||
recommended: false,
|
||||
lowQuality: true,
|
||||
description: 'DeepSeek - Free tier model with good general capabilities'
|
||||
},
|
||||
'gemini-flash-2.5': {
|
||||
tier: 'free',
|
||||
priority: 50,
|
||||
recommended: false,
|
||||
lowQuality: true,
|
||||
description: 'Gemini Flash - Google\'s faster, more efficient model'
|
||||
},
|
||||
'grok-3-mini': {
|
||||
tier: 'free',
|
||||
priority: 45,
|
||||
recommended: false,
|
||||
lowQuality: true,
|
||||
description: 'Grok-3 Mini - Smaller, faster version of Grok-3 for simpler tasks'
|
||||
},
|
||||
'qwen3': {
|
||||
tier: 'free',
|
||||
priority: 40,
|
||||
recommended: false,
|
||||
lowQuality: true,
|
||||
description: 'Qwen3 - Alibaba\'s powerful multilingual language model'
|
||||
},
|
||||
};
|
||||
|
||||
// Model tier definitions
|
||||
export const MODEL_TIERS = {
|
||||
premium: {
|
||||
requiresSubscription: true,
|
||||
baseDescription: 'Advanced model with superior capabilities'
|
||||
},
|
||||
free: {
|
||||
requiresSubscription: false,
|
||||
baseDescription: 'Available to all users'
|
||||
},
|
||||
custom: {
|
||||
requiresSubscription: false,
|
||||
baseDescription: 'User-defined model'
|
||||
}
|
||||
};
|
||||
|
||||
// Helper to check if a user can access a model based on subscription status
|
||||
|
@ -224,6 +163,7 @@ const saveModelPreference = (modelId: string): void => {
|
|||
export const useModelSelection = () => {
|
||||
const [selectedModel, setSelectedModel] = useState(DEFAULT_FREE_MODEL_ID);
|
||||
const [customModels, setCustomModels] = useState<CustomModel[]>([]);
|
||||
const [hasInitialized, setHasInitialized] = useState(false);
|
||||
|
||||
const { data: subscriptionData } = useSubscription();
|
||||
const { data: modelsData, isLoading: isLoadingModels } = useAvailableModels({
|
||||
|
@ -258,14 +198,12 @@ export const useModelSelection = () => {
|
|||
id: DEFAULT_FREE_MODEL_ID,
|
||||
label: 'DeepSeek',
|
||||
requiresSubscription: false,
|
||||
description: MODELS[DEFAULT_FREE_MODEL_ID]?.description || MODEL_TIERS.free.baseDescription,
|
||||
priority: MODELS[DEFAULT_FREE_MODEL_ID]?.priority || 50
|
||||
},
|
||||
{
|
||||
id: DEFAULT_PREMIUM_MODEL_ID,
|
||||
label: 'Claude Sonnet 4',
|
||||
label: 'Sonnet 4',
|
||||
requiresSubscription: true,
|
||||
description: MODELS[DEFAULT_PREMIUM_MODEL_ID]?.description || MODEL_TIERS.premium.baseDescription,
|
||||
priority: MODELS[DEFAULT_PREMIUM_MODEL_ID]?.priority || 100
|
||||
},
|
||||
];
|
||||
|
@ -295,8 +233,6 @@ export const useModelSelection = () => {
|
|||
id: shortName,
|
||||
label: cleanLabel,
|
||||
requiresSubscription: isPremium,
|
||||
description: modelData.description ||
|
||||
(isPremium ? MODEL_TIERS.premium.baseDescription : MODEL_TIERS.free.baseDescription),
|
||||
top: modelData.priority >= 90, // Mark high-priority models as "top"
|
||||
priority: modelData.priority || 0,
|
||||
lowQuality: modelData.lowQuality || false,
|
||||
|
@ -311,7 +247,6 @@ export const useModelSelection = () => {
|
|||
id: model.id,
|
||||
label: model.label || formatModelName(model.id),
|
||||
requiresSubscription: false,
|
||||
description: MODEL_TIERS.custom.baseDescription,
|
||||
top: false,
|
||||
isCustom: true,
|
||||
priority: 30, // Low priority by default
|
||||
|
@ -323,13 +258,13 @@ export const useModelSelection = () => {
|
|||
}
|
||||
|
||||
// Sort models consistently in one place:
|
||||
// 1. First by free/premium (free first)
|
||||
// 1. First by recommended (recommended first)
|
||||
// 2. Then by priority (higher first)
|
||||
// 3. Finally by name (alphabetical)
|
||||
const sortedModels = models.sort((a, b) => {
|
||||
// First by free/premium status
|
||||
if (a.requiresSubscription !== b.requiresSubscription) {
|
||||
return a.requiresSubscription ? -1 : 1;
|
||||
// First by recommended status
|
||||
if (a.recommended !== b.recommended) {
|
||||
return a.recommended ? -1 : 1;
|
||||
}
|
||||
|
||||
// Then by priority (higher first)
|
||||
|
@ -352,66 +287,64 @@ export const useModelSelection = () => {
|
|||
);
|
||||
}, [MODEL_OPTIONS, subscriptionStatus]);
|
||||
|
||||
// Initialize selected model from localStorage or defaults
|
||||
// Initialize selected model from localStorage ONLY ONCE
|
||||
useEffect(() => {
|
||||
if (typeof window === 'undefined') return;
|
||||
if (typeof window === 'undefined' || hasInitialized) return;
|
||||
|
||||
console.log('Initializing model selection from localStorage...');
|
||||
|
||||
try {
|
||||
const savedModel = localStorage.getItem(STORAGE_KEY_MODEL);
|
||||
console.log('Saved model from localStorage:', savedModel);
|
||||
|
||||
// Local mode - allow any model
|
||||
if (isLocalMode()) {
|
||||
if (savedModel && MODEL_OPTIONS.find(option => option.id === savedModel)) {
|
||||
setSelectedModel(savedModel);
|
||||
} else {
|
||||
setSelectedModel(DEFAULT_PREMIUM_MODEL_ID);
|
||||
saveModelPreference(DEFAULT_PREMIUM_MODEL_ID);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Premium subscription - ALWAYS use premium model
|
||||
if (subscriptionStatus === 'active') {
|
||||
// If they had a premium model saved and it's still valid, use it
|
||||
const hasSavedPremiumModel = savedModel &&
|
||||
MODEL_OPTIONS.find(option =>
|
||||
option.id === savedModel &&
|
||||
option.requiresSubscription &&
|
||||
canAccessModel(subscriptionStatus, true)
|
||||
);
|
||||
|
||||
// Otherwise use the default premium model
|
||||
if (hasSavedPremiumModel) {
|
||||
setSelectedModel(savedModel!);
|
||||
} else {
|
||||
setSelectedModel(DEFAULT_PREMIUM_MODEL_ID);
|
||||
saveModelPreference(DEFAULT_PREMIUM_MODEL_ID);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// No subscription - use saved model if accessible (free tier), or default free
|
||||
// If we have a saved model, validate it's still available and accessible
|
||||
if (savedModel) {
|
||||
const modelOption = MODEL_OPTIONS.find(option => option.id === savedModel);
|
||||
if (modelOption && canAccessModel(subscriptionStatus, modelOption.requiresSubscription)) {
|
||||
setSelectedModel(savedModel);
|
||||
} else {
|
||||
setSelectedModel(DEFAULT_FREE_MODEL_ID);
|
||||
saveModelPreference(DEFAULT_FREE_MODEL_ID);
|
||||
// Wait for models to load before validating
|
||||
if (isLoadingModels) {
|
||||
console.log('Models still loading, waiting...');
|
||||
return;
|
||||
}
|
||||
|
||||
const modelOption = MODEL_OPTIONS.find(option => option.id === savedModel);
|
||||
const isCustomModel = isLocalMode() && customModels.some(model => model.id === savedModel);
|
||||
|
||||
// Check if saved model is still valid and accessible
|
||||
if (modelOption || isCustomModel) {
|
||||
const isAccessible = isLocalMode() ||
|
||||
canAccessModel(subscriptionStatus, modelOption?.requiresSubscription ?? false);
|
||||
|
||||
if (isAccessible) {
|
||||
console.log('Using saved model:', savedModel);
|
||||
setSelectedModel(savedModel);
|
||||
setHasInitialized(true);
|
||||
return;
|
||||
} else {
|
||||
console.log('Saved model not accessible, falling back to default');
|
||||
}
|
||||
} else {
|
||||
console.log('Saved model not found in available models, falling back to default');
|
||||
}
|
||||
} else {
|
||||
setSelectedModel(DEFAULT_FREE_MODEL_ID);
|
||||
saveModelPreference(DEFAULT_FREE_MODEL_ID);
|
||||
}
|
||||
|
||||
// Fallback to default model
|
||||
const defaultModel = subscriptionStatus === 'active' ? DEFAULT_PREMIUM_MODEL_ID : DEFAULT_FREE_MODEL_ID;
|
||||
console.log('Using default model:', defaultModel);
|
||||
setSelectedModel(defaultModel);
|
||||
saveModelPreference(defaultModel);
|
||||
setHasInitialized(true);
|
||||
|
||||
} catch (error) {
|
||||
console.warn('Failed to load preferences from localStorage:', error);
|
||||
setSelectedModel(DEFAULT_FREE_MODEL_ID);
|
||||
const defaultModel = subscriptionStatus === 'active' ? DEFAULT_PREMIUM_MODEL_ID : DEFAULT_FREE_MODEL_ID;
|
||||
setSelectedModel(defaultModel);
|
||||
saveModelPreference(defaultModel);
|
||||
setHasInitialized(true);
|
||||
}
|
||||
}, [subscriptionStatus, MODEL_OPTIONS]);
|
||||
}, [subscriptionStatus, MODEL_OPTIONS, isLoadingModels, customModels, hasInitialized]);
|
||||
|
||||
// Handle model selection change
|
||||
const handleModelChange = (modelId: string) => {
|
||||
console.log('handleModelChange', modelId);
|
||||
console.log('handleModelChange called with:', modelId);
|
||||
|
||||
// Refresh custom models from localStorage to ensure we have the latest
|
||||
if (isLocalMode()) {
|
||||
|
@ -441,7 +374,8 @@ export const useModelSelection = () => {
|
|||
console.warn('Model not accessible:', modelId);
|
||||
return;
|
||||
}
|
||||
console.log('setting selected model', modelId);
|
||||
|
||||
console.log('Setting selected model and saving to localStorage:', modelId);
|
||||
setSelectedModel(modelId);
|
||||
saveModelPreference(modelId);
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue