model fallback, add xai grok 4

This commit is contained in:
marko-kraemer 2025-07-10 12:58:10 +02:00
parent f08b37a96d
commit 957a2c9cbf
5 changed files with 326 additions and 383 deletions

View File

@ -13,8 +13,8 @@ from utils.config import config, EnvMode
from services.supabase import DBConnection
from utils.auth_utils import get_current_user_id_from_jwt
from pydantic import BaseModel
from utils.constants import MODEL_ACCESS_TIERS, MODEL_NAME_ALIASES
from litellm import cost_per_token
from utils.constants import MODEL_ACCESS_TIERS, MODEL_NAME_ALIASES, HARDCODED_MODEL_PRICES
from litellm.cost_calculator import cost_per_token
import time
# Initialize Stripe
@ -26,46 +26,6 @@ TOKEN_PRICE_MULTIPLIER = 1.5
# Initialize router
router = APIRouter(prefix="/billing", tags=["billing"])
# Hardcoded pricing for specific models (prices per million tokens)
HARDCODED_MODEL_PRICES = {
"openrouter/deepseek/deepseek-chat": {
"input_cost_per_million_tokens": 0.38,
"output_cost_per_million_tokens": 0.89
},
"deepseek/deepseek-chat": {
"input_cost_per_million_tokens": 0.38,
"output_cost_per_million_tokens": 0.89
},
"qwen/qwen3-235b-a22b": {
"input_cost_per_million_tokens": 0.13,
"output_cost_per_million_tokens": 0.60
},
"openrouter/qwen/qwen3-235b-a22b": {
"input_cost_per_million_tokens": 0.13,
"output_cost_per_million_tokens": 0.60
},
"google/gemini-2.5-flash-preview-05-20": {
"input_cost_per_million_tokens": 0.15,
"output_cost_per_million_tokens": 0.60
},
"openrouter/google/gemini-2.5-flash-preview-05-20": {
"input_cost_per_million_tokens": 0.15,
"output_cost_per_million_tokens": 0.60
},
"anthropic/claude-sonnet-4": {
"input_cost_per_million_tokens": 3.00,
"output_cost_per_million_tokens": 15.00,
},
"google/gemini-2.5-pro": {
"input_cost_per_million_tokens": 1.25,
"output_cost_per_million_tokens": 10.00,
},
"openrouter/google/gemini-2.5-pro": {
"input_cost_per_million_tokens": 1.25,
"output_cost_per_million_tokens": 10.00,
},
}
def get_model_pricing(model: str) -> tuple[float, float] | None:
"""
Get pricing for a model. Returns (input_cost_per_million, output_cost_per_million) or None.

View File

@ -2,7 +2,7 @@
LLM API interface for making calls to various language models.
This module provides a unified interface for making API calls to different LLM providers
(OpenAI, Anthropic, Groq, etc.) using LiteLLM. It includes support for:
(OpenAI, Anthropic, Groq, xAI, etc.) using LiteLLM. It includes support for:
- Streaming responses
- Tool calls and function calling
- Retry logic with exponential backoff
@ -16,6 +16,7 @@ import json
import asyncio
from openai import OpenAIError
import litellm
from litellm.files.main import ModelResponse
from utils.logger import logger
from utils.config import config
@ -37,7 +38,7 @@ class LLMRetryError(LLMError):
def setup_api_keys() -> None:
"""Set up API keys from environment variables."""
providers = ['OPENAI', 'ANTHROPIC', 'GROQ', 'OPENROUTER']
providers = ['OPENAI', 'ANTHROPIC', 'GROQ', 'OPENROUTER', 'XAI']
for provider in providers:
key = getattr(config, f'{provider}_API_KEY')
if key:
@ -64,6 +65,36 @@ def setup_api_keys() -> None:
else:
logger.warning(f"Missing AWS credentials for Bedrock integration - access_key: {bool(aws_access_key)}, secret_key: {bool(aws_secret_key)}, region: {aws_region}")
def get_openrouter_fallback(model_name: str) -> Optional[str]:
"""Get OpenRouter fallback model for a given model name."""
# Skip if already using OpenRouter
if model_name.startswith("openrouter/"):
return None
# Map models to their OpenRouter equivalents
fallback_mapping = {
"anthropic/claude-3-7-sonnet-latest": "openrouter/anthropic/claude-3.7-sonnet",
"anthropic/claude-sonnet-4-20250514": "openrouter/anthropic/claude-sonnet-4",
"xai/grok-4": "openrouter/x-ai/grok-4",
}
# Check for exact match first
if model_name in fallback_mapping:
return fallback_mapping[model_name]
# Check for partial matches (e.g., bedrock models)
for key, value in fallback_mapping.items():
if key in model_name:
return value
# Default fallbacks by provider
if "claude" in model_name.lower() or "anthropic" in model_name.lower():
return "openrouter/anthropic/claude-sonnet-4"
elif "xai" in model_name.lower() or "grok" in model_name.lower():
return "openrouter/x-ai/grok-4"
return None
async def handle_error(error: Exception, attempt: int, max_attempts: int) -> None:
"""Handle API errors with appropriate delays and logging."""
delay = RATE_LIMIT_DELAY if isinstance(error, litellm.exceptions.RateLimitError) else RETRY_DELAY
@ -196,6 +227,7 @@ def prepare_params(
# Add reasoning_effort for Anthropic models if enabled
use_thinking = enable_thinking if enable_thinking is not None else False
is_anthropic = "anthropic" in effective_model_name.lower() or "claude" in effective_model_name.lower()
is_xai = "xai" in effective_model_name.lower() or model_name.startswith("xai/")
if is_anthropic and use_thinking:
effort_level = reasoning_effort if reasoning_effort else 'low'
@ -203,6 +235,17 @@ def prepare_params(
params["temperature"] = 1.0 # Required by Anthropic when reasoning_effort is used
logger.info(f"Anthropic thinking enabled with reasoning_effort='{effort_level}'")
# Add reasoning_effort for xAI models if enabled
if is_xai and use_thinking:
effort_level = reasoning_effort if reasoning_effort else 'low'
params["reasoning_effort"] = effort_level
logger.info(f"xAI thinking enabled with reasoning_effort='{effort_level}'")
# Add xAI-specific parameters
if model_name.startswith("xai/"):
logger.debug(f"Preparing xAI parameters for model: {model_name}")
# xAI models support standard parameters, no special handling needed beyond reasoning_effort
return params
async def make_llm_api_call(
@ -220,7 +263,7 @@ async def make_llm_api_call(
model_id: Optional[str] = None,
enable_thinking: Optional[bool] = False,
reasoning_effort: Optional[str] = 'low'
) -> Union[Dict[str, Any], AsyncGenerator]:
) -> Union[Dict[str, Any], AsyncGenerator, ModelResponse]:
"""
Make an API call to a language model using LiteLLM.
@ -277,6 +320,27 @@ async def make_llm_api_call(
# logger.debug(f"Response: {response}")
return response
except litellm.exceptions.InternalServerError as e:
# Check if it's an Anthropic overloaded error
if "Overloaded" in str(e) and "AnthropicException" in str(e):
fallback_model = get_openrouter_fallback(model_name)
if fallback_model and not params.get("model", "").startswith("openrouter/"):
logger.warning(f"Anthropic overloaded, falling back to OpenRouter: {fallback_model}")
params["model"] = fallback_model
# Remove any model_id as it's specific to Bedrock
params.pop("model_id", None)
# Continue with next attempt using fallback model
last_error = e
await handle_error(e, attempt, MAX_RETRIES)
else:
# No fallback available or already using OpenRouter
last_error = e
await handle_error(e, attempt, MAX_RETRIES)
else:
# Other internal server errors
last_error = e
await handle_error(e, attempt, MAX_RETRIES)
except (litellm.exceptions.RateLimitError, OpenAIError, json.JSONDecodeError) as e:
last_error = e
await handle_error(e, attempt, MAX_RETRIES)

View File

@ -174,6 +174,7 @@ class Configuration:
OPENAI_API_KEY: Optional[str] = None
GROQ_API_KEY: Optional[str] = None
OPENROUTER_API_KEY: Optional[str] = None
XAI_API_KEY: Optional[str] = None
OPENROUTER_API_BASE: Optional[str] = "https://openrouter.ai/api/v1"
OR_SITE_URL: Optional[str] = "https://kortix.ai"
OR_APP_NAME: Optional[str] = "Kortix AI"

View File

@ -1,181 +1,165 @@
MODEL_ACCESS_TIERS = {
"free": [
"openrouter/deepseek/deepseek-chat",
"openrouter/qwen/qwen3-235b-a22b",
"openrouter/google/gemini-2.5-flash-preview-05-20",
"anthropic/claude-sonnet-4-20250514",
],
"tier_2_20": [
"openrouter/deepseek/deepseek-chat",
# "xai/grok-3-mini-fast-beta",
"openai/gpt-4o",
# "openai/gpt-4-turbo",
# "xai/grok-3-fast-latest",
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
"anthropic/claude-3-5-haiku-latest",
# "openai/gpt-4",
"anthropic/claude-3-7-sonnet-latest",
"anthropic/claude-3-5-sonnet-latest",
"anthropic/claude-sonnet-4-20250514",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
"openrouter/deepseek/deepseek-chat-v3-0324",
# "openrouter/deepseek/deepseek-r1",
"openrouter/qwen/qwen3-235b-a22b",
],
"tier_6_50": [
"openrouter/deepseek/deepseek-chat",
# "xai/grok-3-mini-fast-beta",
"openai/gpt-4o",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
"anthropic/claude-3-5-haiku-latest",
# "openai/gpt-4-turbo",
# "xai/grok-3-fast-latest",
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
# "openai/gpt-4",
"anthropic/claude-3-7-sonnet-latest",
"anthropic/claude-3-5-sonnet-latest",
"anthropic/claude-sonnet-4-20250514",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
"openrouter/deepseek/deepseek-chat-v3-0324",
# "openrouter/deepseek/deepseek-r1",
"openrouter/qwen/qwen3-235b-a22b",
],
"tier_12_100": [
"openrouter/deepseek/deepseek-chat",
# "xai/grok-3-mini-fast-beta",
"openai/gpt-4o",
# Master model configuration - single source of truth
MODELS = {
# Free tier models
"anthropic/claude-sonnet-4-20250514": {
"aliases": ["claude-sonnet-4"],
"pricing": {
"input_cost_per_million_tokens": 3.00,
"output_cost_per_million_tokens": 15.00
},
"tier_availability": ["free", "paid"]
},
# "openai/gpt-4-turbo",
# "xai/grok-3-fast-latest",
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
"openrouter/deepseek/deepseek-chat-v3-0324",
# "openai/gpt-4",
"anthropic/claude-3-7-sonnet-latest",
"anthropic/claude-3-5-sonnet-latest",
"anthropic/claude-3-5-haiku-latest",
"anthropic/claude-sonnet-4-20250514",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
# "openrouter/deepseek/deepseek-r1",
"openrouter/qwen/qwen3-235b-a22b",
],
"tier_25_200": [
"openrouter/deepseek/deepseek-chat",
# "xai/grok-3-mini-fast-beta",
"openai/gpt-4o",
# "openai/gpt-4-turbo",
# "xai/grok-3-fast-latest",
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
"openrouter/deepseek/deepseek-chat-v3-0324",
# "openai/gpt-4",
"anthropic/claude-3-7-sonnet-latest",
"anthropic/claude-3-5-sonnet-latest",
"anthropic/claude-sonnet-4-20250514",
"anthropic/claude-3-5-haiku-latest",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
# "openrouter/deepseek/deepseek-r1",
"openrouter/qwen/qwen3-235b-a22b",
],
"tier_50_400": [
"openrouter/deepseek/deepseek-chat",
# "xai/grok-3-mini-fast-beta",
"openai/gpt-4o",
# "openai/gpt-4-turbo",
# "xai/grok-3-fast-latest",
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
# "openai/gpt-4",
"anthropic/claude-3-7-sonnet-latest",
"anthropic/claude-3-5-sonnet-latest",
"anthropic/claude-sonnet-4-20250514",
"anthropic/claude-3-5-haiku-latest",
"openrouter/deepseek/deepseek-chat-v3-0324",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
# "openrouter/deepseek/deepseek-r1",
"openrouter/qwen/qwen3-235b-a22b",
],
"tier_125_800": [
"openrouter/deepseek/deepseek-chat",
# "xai/grok-3-mini-fast-beta",
"openai/gpt-4o",
# "openai/gpt-4-turbo",
# "xai/grok-3-fast-latest",
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
# "openai/gpt-4",
"anthropic/claude-3-7-sonnet-latest",
"anthropic/claude-3-5-sonnet-latest",
"anthropic/claude-3-5-haiku-latest",
"anthropic/claude-sonnet-4-20250514",
"openrouter/deepseek/deepseek-chat-v3-0324",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
# "openrouter/deepseek/deepseek-r1",
"openrouter/qwen/qwen3-235b-a22b",
],
"tier_200_1000": [
"openrouter/deepseek/deepseek-chat",
# "xai/grok-3-mini-fast-beta",
"openai/gpt-4o",
# "openai/gpt-4-turbo",
# "xai/grok-3-fast-latest",
"openrouter/google/gemini-2.5-flash-preview-05-20", # Added
"openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro
# "openai/gpt-4",
"anthropic/claude-3-7-sonnet-latest",
"anthropic/claude-3-5-sonnet-latest",
"anthropic/claude-3-5-haiku-latest",
"anthropic/claude-sonnet-4-20250514",
"openrouter/deepseek/deepseek-chat-v3-0324",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
# "openrouter/deepseek/deepseek-r1",
"openrouter/qwen/qwen3-235b-a22b",
],
"openrouter/deepseek/deepseek-chat": {
"aliases": ["deepseek"],
"pricing": {
"input_cost_per_million_tokens": 0.38,
"output_cost_per_million_tokens": 0.89
},
"tier_availability": ["free", "paid"]
},
"openrouter/qwen/qwen3-235b-a22b": {
"aliases": ["qwen3"],
"pricing": {
"input_cost_per_million_tokens": 0.13,
"output_cost_per_million_tokens": 0.60
},
"tier_availability": ["free", "paid"]
},
"openrouter/google/gemini-2.5-flash-preview-05-20": {
"aliases": ["gemini-flash-2.5"],
"pricing": {
"input_cost_per_million_tokens": 0.15,
"output_cost_per_million_tokens": 0.60
},
"tier_availability": ["free", "paid"]
},
# Paid tier only models
"openrouter/deepseek/deepseek-chat-v3-0324": {
"aliases": ["deepseek/deepseek-chat-v3-0324"],
"pricing": {
"input_cost_per_million_tokens": 0.38,
"output_cost_per_million_tokens": 0.89
},
"tier_availability": ["paid"]
},
"openrouter/google/gemini-2.5-pro": {
"aliases": ["google/gemini-2.5-pro"],
"pricing": {
"input_cost_per_million_tokens": 1.25,
"output_cost_per_million_tokens": 10.00
},
"tier_availability": ["paid"]
},
"openai/gpt-4o": {
"aliases": ["gpt-4o"],
"pricing": {
"input_cost_per_million_tokens": 2.50,
"output_cost_per_million_tokens": 10.00
},
"tier_availability": ["paid"]
},
"openai/gpt-4.1": {
"aliases": ["gpt-4.1"],
"pricing": {
"input_cost_per_million_tokens": 15.00,
"output_cost_per_million_tokens": 60.00
},
"tier_availability": ["paid"]
},
"openai/gpt-4.1-mini": {
"aliases": ["gpt-4.1-mini"],
"pricing": {
"input_cost_per_million_tokens": 1.50,
"output_cost_per_million_tokens": 6.00
},
"tier_availability": ["paid"]
},
"anthropic/claude-3-7-sonnet-latest": {
"aliases": ["sonnet-3.7"],
"pricing": {
"input_cost_per_million_tokens": 3.00,
"output_cost_per_million_tokens": 15.00
},
"tier_availability": ["paid"]
},
"anthropic/claude-3-5-sonnet-latest": {
"aliases": ["sonnet-3.5"],
"pricing": {
"input_cost_per_million_tokens": 3.00,
"output_cost_per_million_tokens": 15.00
},
"tier_availability": ["paid"]
},
"openrouter/x-ai/grok-4": {
"aliases": ["grok-4"],
"pricing": {
"input_cost_per_million_tokens": 5.00,
"output_cost_per_million_tokens": 15.00
},
"tier_availability": ["paid"]
},
}
MODEL_NAME_ALIASES = {
# Short names to full names
"sonnet-3.7": "anthropic/claude-3-7-sonnet-latest",
"sonnet-3.5": "anthropic/claude-3-5-sonnet-latest",
"haiku-3.5": "anthropic/claude-3-5-haiku-latest",
"claude-sonnet-4": "anthropic/claude-sonnet-4-20250514",
# "gpt-4.1": "openai/gpt-4.1-2025-04-14", # Commented out in constants.py
"gpt-4o": "openai/gpt-4o",
"gpt-4.1": "openai/gpt-4.1",
"gpt-4.1-mini": "openai/gpt-4.1-mini",
# "gpt-4-turbo": "openai/gpt-4-turbo", # Commented out in constants.py
# "gpt-4": "openai/gpt-4", # Commented out in constants.py
# "gemini-flash-2.5": "openrouter/google/gemini-2.5-flash-preview", # Commented out in constants.py
# "grok-3": "xai/grok-3-fast-latest", # Commented out in constants.py
"deepseek": "openrouter/deepseek/deepseek-chat",
# "deepseek-r1": "openrouter/deepseek/deepseek-r1",
# "grok-3-mini": "xai/grok-3-mini-fast-beta", # Commented out in constants.py
"qwen3": "openrouter/qwen/qwen3-235b-a22b", # Commented out in constants.py
"gemini-flash-2.5": "openrouter/google/gemini-2.5-flash-preview-05-20",
"gemini-2.5-flash:thinking": "openrouter/google/gemini-2.5-flash-preview-05-20:thinking",
# "google/gemini-2.5-flash-preview":"openrouter/google/gemini-2.5-flash-preview",
# "google/gemini-2.5-flash-preview:thinking":"openrouter/google/gemini-2.5-flash-preview:thinking",
"google/gemini-2.5-pro": "openrouter/google/gemini-2.5-pro",
"deepseek/deepseek-chat-v3-0324": "openrouter/deepseek/deepseek-chat-v3-0324",
# Also include full names as keys to ensure they map to themselves
# "anthropic/claude-3-7-sonnet-latest": "anthropic/claude-3-7-sonnet-latest",
# "openai/gpt-4.1-2025-04-14": "openai/gpt-4.1-2025-04-14", # Commented out in constants.py
# "openai/gpt-4o": "openai/gpt-4o",
# "openai/gpt-4-turbo": "openai/gpt-4-turbo", # Commented out in constants.py
# "openai/gpt-4": "openai/gpt-4", # Commented out in constants.py
# "openrouter/google/gemini-2.5-flash-preview": "openrouter/google/gemini-2.5-flash-preview", # Commented out in constants.py
# "xai/grok-3-fast-latest": "xai/grok-3-fast-latest", # Commented out in constants.py
# "deepseek/deepseek-chat": "openrouter/deepseek/deepseek-chat",
# "deepseek/deepseek-r1": "openrouter/deepseek/deepseek-r1",
# "qwen/qwen3-235b-a22b": "openrouter/qwen/qwen3-235b-a22b",
# "xai/grok-3-mini-fast-beta": "xai/grok-3-mini-fast-beta", # Commented out in constants.py
# Derived structures (auto-generated from MODELS)
def _generate_model_structures():
"""Generate all model structures from the master MODELS dictionary."""
# Generate tier lists
free_models = []
paid_models = []
# Generate aliases
aliases = {}
# Generate pricing
pricing = {}
for model_name, config in MODELS.items():
# Add to tier lists
if "free" in config["tier_availability"]:
free_models.append(model_name)
if "paid" in config["tier_availability"]:
paid_models.append(model_name)
# Add aliases
for alias in config["aliases"]:
aliases[alias] = model_name
# Add pricing
pricing[model_name] = config["pricing"]
# Also add pricing for legacy model name variations
if model_name.startswith("openrouter/deepseek/"):
legacy_name = model_name.replace("openrouter/", "")
pricing[legacy_name] = config["pricing"]
elif model_name.startswith("openrouter/qwen/"):
legacy_name = model_name.replace("openrouter/", "")
pricing[legacy_name] = config["pricing"]
elif model_name.startswith("openrouter/google/"):
legacy_name = model_name.replace("openrouter/", "")
pricing[legacy_name] = config["pricing"]
elif model_name.startswith("anthropic/"):
# Add anthropic/claude-sonnet-4 alias for claude-sonnet-4-20250514
if "claude-sonnet-4-20250514" in model_name:
pricing["anthropic/claude-sonnet-4"] = config["pricing"]
return free_models, paid_models, aliases, pricing
# Generate all structures
FREE_TIER_MODELS, PAID_TIER_MODELS, MODEL_NAME_ALIASES, HARDCODED_MODEL_PRICES = _generate_model_structures()
MODEL_ACCESS_TIERS = {
"free": FREE_TIER_MODELS,
"tier_2_20": PAID_TIER_MODELS,
"tier_6_50": PAID_TIER_MODELS,
"tier_12_100": PAID_TIER_MODELS,
"tier_25_200": PAID_TIER_MODELS,
"tier_50_400": PAID_TIER_MODELS,
"tier_125_800": PAID_TIER_MODELS,
"tier_200_1000": PAID_TIER_MODELS,
}

View File

@ -28,139 +28,78 @@ export interface CustomModel {
label: string;
}
// SINGLE SOURCE OF TRUTH for all model data
// SINGLE SOURCE OF TRUTH for all model data - aligned with backend constants
export const MODELS = {
// Premium high-priority models
// Free tier models (available to all users)
'claude-sonnet-4': {
tier: 'free',
priority: 100,
recommended: true,
lowQuality: false,
description: 'Claude Sonnet 4 - Anthropic\'s latest and most advanced AI assistant'
lowQuality: false
},
'gemini-flash-2.5': {
tier: 'free',
priority: 70,
recommended: false,
lowQuality: false
},
'qwen3': {
tier: 'free',
priority: 60,
recommended: false,
lowQuality: false
},
// Premium/Paid tier models (require subscription)
'sonnet-3.7': {
tier: 'premium',
priority: 99,
recommended: false,
lowQuality: false
},
'grok-4': {
tier: 'premium',
priority: 98,
recommended: false,
lowQuality: false
},
'google/gemini-2.5-pro': {
tier: 'premium',
priority: 100,
priority: 97,
recommended: false,
lowQuality: false,
description: 'Gemini Pro 2.5 - Google\'s latest advanced model'
},
'sonnet-3.7': {
tier: 'premium',
priority: 95,
recommended: false,
lowQuality: false,
description: 'Claude 3.7 - Anthropic\'s most powerful AI assistant'
},
'claude-sonnet-3.7-reasoning': {
tier: 'premium',
priority: 95,
recommended: true,
lowQuality: false,
description: 'Claude 3.7 with enhanced reasoning capabilities'
lowQuality: false
},
'gpt-4.1': {
tier: 'premium',
priority: 95,
priority: 96,
recommended: false,
lowQuality: false,
description: 'GPT-4.1 - OpenAI\'s most advanced model with enhanced reasoning'
lowQuality: false
},
'claude-3.5': {
tier: 'premium',
priority: 90,
recommended: true,
lowQuality: false,
description: 'Claude 3.5 - Anthropic\'s balanced model with solid capabilities'
},
'gemini-2.5-flash:thinking': {
'sonnet-3.5': {
tier: 'premium',
priority: 90,
recommended: false,
lowQuality: false,
description: 'Gemini Flash 2.5 - Google\'s fast, responsive AI model'
lowQuality: false
},
'gpt-4o': {
tier: 'premium',
priority: 85,
priority: 88,
recommended: false,
lowQuality: false,
description: 'GPT-4o - Optimized for speed, reliability, and cost-effectiveness'
lowQuality: false
},
'gpt-4-turbo': {
'gemini-2.5-flash:thinking': {
tier: 'premium',
priority: 85,
priority: 84,
recommended: false,
lowQuality: false,
description: 'GPT-4 Turbo - OpenAI\'s powerful model with a great balance of performance and cost'
},
'gpt-4': {
tier: 'premium',
priority: 80,
recommended: false,
lowQuality: false,
description: 'GPT-4 - OpenAI\'s highly capable model with advanced reasoning'
lowQuality: false
},
'deepseek/deepseek-chat-v3-0324': {
tier: 'premium',
priority: 75,
recommended: false,
lowQuality: false,
description: 'DeepSeek Chat - Advanced AI assistant with strong reasoning'
lowQuality: false
},
// Free tier models
'deepseek-r1': {
tier: 'free',
priority: 60,
recommended: false,
lowQuality: false,
description: 'DeepSeek R1 - Advanced model with enhanced reasoning and coding capabilities'
},
'deepseek': {
tier: 'free',
priority: 50,
recommended: false,
lowQuality: true,
description: 'DeepSeek - Free tier model with good general capabilities'
},
'gemini-flash-2.5': {
tier: 'free',
priority: 50,
recommended: false,
lowQuality: true,
description: 'Gemini Flash - Google\'s faster, more efficient model'
},
'grok-3-mini': {
tier: 'free',
priority: 45,
recommended: false,
lowQuality: true,
description: 'Grok-3 Mini - Smaller, faster version of Grok-3 for simpler tasks'
},
'qwen3': {
tier: 'free',
priority: 40,
recommended: false,
lowQuality: true,
description: 'Qwen3 - Alibaba\'s powerful multilingual language model'
},
};
// Model tier definitions
export const MODEL_TIERS = {
premium: {
requiresSubscription: true,
baseDescription: 'Advanced model with superior capabilities'
},
free: {
requiresSubscription: false,
baseDescription: 'Available to all users'
},
custom: {
requiresSubscription: false,
baseDescription: 'User-defined model'
}
};
// Helper to check if a user can access a model based on subscription status
@ -224,6 +163,7 @@ const saveModelPreference = (modelId: string): void => {
export const useModelSelection = () => {
const [selectedModel, setSelectedModel] = useState(DEFAULT_FREE_MODEL_ID);
const [customModels, setCustomModels] = useState<CustomModel[]>([]);
const [hasInitialized, setHasInitialized] = useState(false);
const { data: subscriptionData } = useSubscription();
const { data: modelsData, isLoading: isLoadingModels } = useAvailableModels({
@ -258,14 +198,12 @@ export const useModelSelection = () => {
id: DEFAULT_FREE_MODEL_ID,
label: 'DeepSeek',
requiresSubscription: false,
description: MODELS[DEFAULT_FREE_MODEL_ID]?.description || MODEL_TIERS.free.baseDescription,
priority: MODELS[DEFAULT_FREE_MODEL_ID]?.priority || 50
},
{
id: DEFAULT_PREMIUM_MODEL_ID,
label: 'Claude Sonnet 4',
label: 'Sonnet 4',
requiresSubscription: true,
description: MODELS[DEFAULT_PREMIUM_MODEL_ID]?.description || MODEL_TIERS.premium.baseDescription,
priority: MODELS[DEFAULT_PREMIUM_MODEL_ID]?.priority || 100
},
];
@ -295,8 +233,6 @@ export const useModelSelection = () => {
id: shortName,
label: cleanLabel,
requiresSubscription: isPremium,
description: modelData.description ||
(isPremium ? MODEL_TIERS.premium.baseDescription : MODEL_TIERS.free.baseDescription),
top: modelData.priority >= 90, // Mark high-priority models as "top"
priority: modelData.priority || 0,
lowQuality: modelData.lowQuality || false,
@ -311,7 +247,6 @@ export const useModelSelection = () => {
id: model.id,
label: model.label || formatModelName(model.id),
requiresSubscription: false,
description: MODEL_TIERS.custom.baseDescription,
top: false,
isCustom: true,
priority: 30, // Low priority by default
@ -323,13 +258,13 @@ export const useModelSelection = () => {
}
// Sort models consistently in one place:
// 1. First by free/premium (free first)
// 1. First by recommended (recommended first)
// 2. Then by priority (higher first)
// 3. Finally by name (alphabetical)
const sortedModels = models.sort((a, b) => {
// First by free/premium status
if (a.requiresSubscription !== b.requiresSubscription) {
return a.requiresSubscription ? -1 : 1;
// First by recommended status
if (a.recommended !== b.recommended) {
return a.recommended ? -1 : 1;
}
// Then by priority (higher first)
@ -352,66 +287,64 @@ export const useModelSelection = () => {
);
}, [MODEL_OPTIONS, subscriptionStatus]);
// Initialize selected model from localStorage or defaults
// Initialize selected model from localStorage ONLY ONCE
useEffect(() => {
if (typeof window === 'undefined') return;
if (typeof window === 'undefined' || hasInitialized) return;
console.log('Initializing model selection from localStorage...');
try {
const savedModel = localStorage.getItem(STORAGE_KEY_MODEL);
console.log('Saved model from localStorage:', savedModel);
// Local mode - allow any model
if (isLocalMode()) {
if (savedModel && MODEL_OPTIONS.find(option => option.id === savedModel)) {
setSelectedModel(savedModel);
} else {
setSelectedModel(DEFAULT_PREMIUM_MODEL_ID);
saveModelPreference(DEFAULT_PREMIUM_MODEL_ID);
}
return;
}
// Premium subscription - ALWAYS use premium model
if (subscriptionStatus === 'active') {
// If they had a premium model saved and it's still valid, use it
const hasSavedPremiumModel = savedModel &&
MODEL_OPTIONS.find(option =>
option.id === savedModel &&
option.requiresSubscription &&
canAccessModel(subscriptionStatus, true)
);
// Otherwise use the default premium model
if (hasSavedPremiumModel) {
setSelectedModel(savedModel!);
} else {
setSelectedModel(DEFAULT_PREMIUM_MODEL_ID);
saveModelPreference(DEFAULT_PREMIUM_MODEL_ID);
}
return;
}
// No subscription - use saved model if accessible (free tier), or default free
// If we have a saved model, validate it's still available and accessible
if (savedModel) {
const modelOption = MODEL_OPTIONS.find(option => option.id === savedModel);
if (modelOption && canAccessModel(subscriptionStatus, modelOption.requiresSubscription)) {
setSelectedModel(savedModel);
} else {
setSelectedModel(DEFAULT_FREE_MODEL_ID);
saveModelPreference(DEFAULT_FREE_MODEL_ID);
// Wait for models to load before validating
if (isLoadingModels) {
console.log('Models still loading, waiting...');
return;
}
const modelOption = MODEL_OPTIONS.find(option => option.id === savedModel);
const isCustomModel = isLocalMode() && customModels.some(model => model.id === savedModel);
// Check if saved model is still valid and accessible
if (modelOption || isCustomModel) {
const isAccessible = isLocalMode() ||
canAccessModel(subscriptionStatus, modelOption?.requiresSubscription ?? false);
if (isAccessible) {
console.log('Using saved model:', savedModel);
setSelectedModel(savedModel);
setHasInitialized(true);
return;
} else {
console.log('Saved model not accessible, falling back to default');
}
} else {
console.log('Saved model not found in available models, falling back to default');
}
} else {
setSelectedModel(DEFAULT_FREE_MODEL_ID);
saveModelPreference(DEFAULT_FREE_MODEL_ID);
}
// Fallback to default model
const defaultModel = subscriptionStatus === 'active' ? DEFAULT_PREMIUM_MODEL_ID : DEFAULT_FREE_MODEL_ID;
console.log('Using default model:', defaultModel);
setSelectedModel(defaultModel);
saveModelPreference(defaultModel);
setHasInitialized(true);
} catch (error) {
console.warn('Failed to load preferences from localStorage:', error);
setSelectedModel(DEFAULT_FREE_MODEL_ID);
const defaultModel = subscriptionStatus === 'active' ? DEFAULT_PREMIUM_MODEL_ID : DEFAULT_FREE_MODEL_ID;
setSelectedModel(defaultModel);
saveModelPreference(defaultModel);
setHasInitialized(true);
}
}, [subscriptionStatus, MODEL_OPTIONS]);
}, [subscriptionStatus, MODEL_OPTIONS, isLoadingModels, customModels, hasInitialized]);
// Handle model selection change
const handleModelChange = (modelId: string) => {
console.log('handleModelChange', modelId);
console.log('handleModelChange called with:', modelId);
// Refresh custom models from localStorage to ensure we have the latest
if (isLocalMode()) {
@ -441,7 +374,8 @@ export const useModelSelection = () => {
console.warn('Model not accessible:', modelId);
return;
}
console.log('setting selected model', modelId);
console.log('Setting selected model and saving to localStorage:', modelId);
setSelectedModel(modelId);
saveModelPreference(modelId);
};