From 957a2c9cbf4c1b74435289028575ae5933accb40 Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Thu, 10 Jul 2025 12:58:10 +0200 Subject: [PATCH] model fallback, add xai grok 4 --- backend/services/billing.py | 44 +-- backend/services/llm.py | 70 +++- backend/utils/config.py | 1 + backend/utils/constants.py | 340 +++++++++--------- .../thread/chat-input/_use-model-selection.ts | 254 +++++-------- 5 files changed, 326 insertions(+), 383 deletions(-) diff --git a/backend/services/billing.py b/backend/services/billing.py index 1f46a571..1b37a553 100644 --- a/backend/services/billing.py +++ b/backend/services/billing.py @@ -13,8 +13,8 @@ from utils.config import config, EnvMode from services.supabase import DBConnection from utils.auth_utils import get_current_user_id_from_jwt from pydantic import BaseModel -from utils.constants import MODEL_ACCESS_TIERS, MODEL_NAME_ALIASES -from litellm import cost_per_token +from utils.constants import MODEL_ACCESS_TIERS, MODEL_NAME_ALIASES, HARDCODED_MODEL_PRICES +from litellm.cost_calculator import cost_per_token import time # Initialize Stripe @@ -26,46 +26,6 @@ TOKEN_PRICE_MULTIPLIER = 1.5 # Initialize router router = APIRouter(prefix="/billing", tags=["billing"]) -# Hardcoded pricing for specific models (prices per million tokens) -HARDCODED_MODEL_PRICES = { - "openrouter/deepseek/deepseek-chat": { - "input_cost_per_million_tokens": 0.38, - "output_cost_per_million_tokens": 0.89 - }, - "deepseek/deepseek-chat": { - "input_cost_per_million_tokens": 0.38, - "output_cost_per_million_tokens": 0.89 - }, - "qwen/qwen3-235b-a22b": { - "input_cost_per_million_tokens": 0.13, - "output_cost_per_million_tokens": 0.60 - }, - "openrouter/qwen/qwen3-235b-a22b": { - "input_cost_per_million_tokens": 0.13, - "output_cost_per_million_tokens": 0.60 - }, - "google/gemini-2.5-flash-preview-05-20": { - "input_cost_per_million_tokens": 0.15, - "output_cost_per_million_tokens": 0.60 - }, - "openrouter/google/gemini-2.5-flash-preview-05-20": { - "input_cost_per_million_tokens": 0.15, - "output_cost_per_million_tokens": 0.60 - }, - "anthropic/claude-sonnet-4": { - "input_cost_per_million_tokens": 3.00, - "output_cost_per_million_tokens": 15.00, - }, - "google/gemini-2.5-pro": { - "input_cost_per_million_tokens": 1.25, - "output_cost_per_million_tokens": 10.00, - }, - "openrouter/google/gemini-2.5-pro": { - "input_cost_per_million_tokens": 1.25, - "output_cost_per_million_tokens": 10.00, - }, -} - def get_model_pricing(model: str) -> tuple[float, float] | None: """ Get pricing for a model. Returns (input_cost_per_million, output_cost_per_million) or None. diff --git a/backend/services/llm.py b/backend/services/llm.py index 99b5e6f2..fe52e87e 100644 --- a/backend/services/llm.py +++ b/backend/services/llm.py @@ -2,7 +2,7 @@ LLM API interface for making calls to various language models. This module provides a unified interface for making API calls to different LLM providers -(OpenAI, Anthropic, Groq, etc.) using LiteLLM. It includes support for: +(OpenAI, Anthropic, Groq, xAI, etc.) using LiteLLM. It includes support for: - Streaming responses - Tool calls and function calling - Retry logic with exponential backoff @@ -16,6 +16,7 @@ import json import asyncio from openai import OpenAIError import litellm +from litellm.files.main import ModelResponse from utils.logger import logger from utils.config import config @@ -37,7 +38,7 @@ class LLMRetryError(LLMError): def setup_api_keys() -> None: """Set up API keys from environment variables.""" - providers = ['OPENAI', 'ANTHROPIC', 'GROQ', 'OPENROUTER'] + providers = ['OPENAI', 'ANTHROPIC', 'GROQ', 'OPENROUTER', 'XAI'] for provider in providers: key = getattr(config, f'{provider}_API_KEY') if key: @@ -64,6 +65,36 @@ def setup_api_keys() -> None: else: logger.warning(f"Missing AWS credentials for Bedrock integration - access_key: {bool(aws_access_key)}, secret_key: {bool(aws_secret_key)}, region: {aws_region}") +def get_openrouter_fallback(model_name: str) -> Optional[str]: + """Get OpenRouter fallback model for a given model name.""" + # Skip if already using OpenRouter + if model_name.startswith("openrouter/"): + return None + + # Map models to their OpenRouter equivalents + fallback_mapping = { + "anthropic/claude-3-7-sonnet-latest": "openrouter/anthropic/claude-3.7-sonnet", + "anthropic/claude-sonnet-4-20250514": "openrouter/anthropic/claude-sonnet-4", + "xai/grok-4": "openrouter/x-ai/grok-4", + } + + # Check for exact match first + if model_name in fallback_mapping: + return fallback_mapping[model_name] + + # Check for partial matches (e.g., bedrock models) + for key, value in fallback_mapping.items(): + if key in model_name: + return value + + # Default fallbacks by provider + if "claude" in model_name.lower() or "anthropic" in model_name.lower(): + return "openrouter/anthropic/claude-sonnet-4" + elif "xai" in model_name.lower() or "grok" in model_name.lower(): + return "openrouter/x-ai/grok-4" + + return None + async def handle_error(error: Exception, attempt: int, max_attempts: int) -> None: """Handle API errors with appropriate delays and logging.""" delay = RATE_LIMIT_DELAY if isinstance(error, litellm.exceptions.RateLimitError) else RETRY_DELAY @@ -196,6 +227,7 @@ def prepare_params( # Add reasoning_effort for Anthropic models if enabled use_thinking = enable_thinking if enable_thinking is not None else False is_anthropic = "anthropic" in effective_model_name.lower() or "claude" in effective_model_name.lower() + is_xai = "xai" in effective_model_name.lower() or model_name.startswith("xai/") if is_anthropic and use_thinking: effort_level = reasoning_effort if reasoning_effort else 'low' @@ -203,6 +235,17 @@ def prepare_params( params["temperature"] = 1.0 # Required by Anthropic when reasoning_effort is used logger.info(f"Anthropic thinking enabled with reasoning_effort='{effort_level}'") + # Add reasoning_effort for xAI models if enabled + if is_xai and use_thinking: + effort_level = reasoning_effort if reasoning_effort else 'low' + params["reasoning_effort"] = effort_level + logger.info(f"xAI thinking enabled with reasoning_effort='{effort_level}'") + + # Add xAI-specific parameters + if model_name.startswith("xai/"): + logger.debug(f"Preparing xAI parameters for model: {model_name}") + # xAI models support standard parameters, no special handling needed beyond reasoning_effort + return params async def make_llm_api_call( @@ -220,7 +263,7 @@ async def make_llm_api_call( model_id: Optional[str] = None, enable_thinking: Optional[bool] = False, reasoning_effort: Optional[str] = 'low' -) -> Union[Dict[str, Any], AsyncGenerator]: +) -> Union[Dict[str, Any], AsyncGenerator, ModelResponse]: """ Make an API call to a language model using LiteLLM. @@ -277,6 +320,27 @@ async def make_llm_api_call( # logger.debug(f"Response: {response}") return response + except litellm.exceptions.InternalServerError as e: + # Check if it's an Anthropic overloaded error + if "Overloaded" in str(e) and "AnthropicException" in str(e): + fallback_model = get_openrouter_fallback(model_name) + if fallback_model and not params.get("model", "").startswith("openrouter/"): + logger.warning(f"Anthropic overloaded, falling back to OpenRouter: {fallback_model}") + params["model"] = fallback_model + # Remove any model_id as it's specific to Bedrock + params.pop("model_id", None) + # Continue with next attempt using fallback model + last_error = e + await handle_error(e, attempt, MAX_RETRIES) + else: + # No fallback available or already using OpenRouter + last_error = e + await handle_error(e, attempt, MAX_RETRIES) + else: + # Other internal server errors + last_error = e + await handle_error(e, attempt, MAX_RETRIES) + except (litellm.exceptions.RateLimitError, OpenAIError, json.JSONDecodeError) as e: last_error = e await handle_error(e, attempt, MAX_RETRIES) diff --git a/backend/utils/config.py b/backend/utils/config.py index 4128668e..5a63de6e 100644 --- a/backend/utils/config.py +++ b/backend/utils/config.py @@ -174,6 +174,7 @@ class Configuration: OPENAI_API_KEY: Optional[str] = None GROQ_API_KEY: Optional[str] = None OPENROUTER_API_KEY: Optional[str] = None + XAI_API_KEY: Optional[str] = None OPENROUTER_API_BASE: Optional[str] = "https://openrouter.ai/api/v1" OR_SITE_URL: Optional[str] = "https://kortix.ai" OR_APP_NAME: Optional[str] = "Kortix AI" diff --git a/backend/utils/constants.py b/backend/utils/constants.py index 984079ed..fd05e711 100644 --- a/backend/utils/constants.py +++ b/backend/utils/constants.py @@ -1,181 +1,165 @@ -MODEL_ACCESS_TIERS = { - "free": [ - "openrouter/deepseek/deepseek-chat", - "openrouter/qwen/qwen3-235b-a22b", - "openrouter/google/gemini-2.5-flash-preview-05-20", - "anthropic/claude-sonnet-4-20250514", - ], - "tier_2_20": [ - "openrouter/deepseek/deepseek-chat", - # "xai/grok-3-mini-fast-beta", - "openai/gpt-4o", - # "openai/gpt-4-turbo", - # "xai/grok-3-fast-latest", - "openrouter/google/gemini-2.5-flash-preview-05-20", # Added - "openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro - "anthropic/claude-3-5-haiku-latest", - # "openai/gpt-4", - "anthropic/claude-3-7-sonnet-latest", - "anthropic/claude-3-5-sonnet-latest", - "anthropic/claude-sonnet-4-20250514", - "openai/gpt-4.1", - "openai/gpt-4.1-mini", - "openrouter/deepseek/deepseek-chat-v3-0324", - # "openrouter/deepseek/deepseek-r1", - "openrouter/qwen/qwen3-235b-a22b", - ], - "tier_6_50": [ - "openrouter/deepseek/deepseek-chat", - # "xai/grok-3-mini-fast-beta", - "openai/gpt-4o", - "openai/gpt-4.1", - "openai/gpt-4.1-mini", - "anthropic/claude-3-5-haiku-latest", - # "openai/gpt-4-turbo", - # "xai/grok-3-fast-latest", - "openrouter/google/gemini-2.5-flash-preview-05-20", # Added - "openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro - # "openai/gpt-4", - "anthropic/claude-3-7-sonnet-latest", - "anthropic/claude-3-5-sonnet-latest", - "anthropic/claude-sonnet-4-20250514", - "openai/gpt-4.1", - "openai/gpt-4.1-mini", - "openrouter/deepseek/deepseek-chat-v3-0324", - # "openrouter/deepseek/deepseek-r1", - "openrouter/qwen/qwen3-235b-a22b", - ], - "tier_12_100": [ - "openrouter/deepseek/deepseek-chat", - # "xai/grok-3-mini-fast-beta", - "openai/gpt-4o", +# Master model configuration - single source of truth +MODELS = { + # Free tier models + + "anthropic/claude-sonnet-4-20250514": { + "aliases": ["claude-sonnet-4"], + "pricing": { + "input_cost_per_million_tokens": 3.00, + "output_cost_per_million_tokens": 15.00 + }, + "tier_availability": ["free", "paid"] + }, - # "openai/gpt-4-turbo", - # "xai/grok-3-fast-latest", - "openrouter/google/gemini-2.5-flash-preview-05-20", # Added - "openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro - "openrouter/deepseek/deepseek-chat-v3-0324", - # "openai/gpt-4", - "anthropic/claude-3-7-sonnet-latest", - "anthropic/claude-3-5-sonnet-latest", - "anthropic/claude-3-5-haiku-latest", - "anthropic/claude-sonnet-4-20250514", - "openai/gpt-4.1", - "openai/gpt-4.1-mini", - # "openrouter/deepseek/deepseek-r1", - "openrouter/qwen/qwen3-235b-a22b", - ], - "tier_25_200": [ - "openrouter/deepseek/deepseek-chat", - # "xai/grok-3-mini-fast-beta", - "openai/gpt-4o", - # "openai/gpt-4-turbo", - # "xai/grok-3-fast-latest", - "openrouter/google/gemini-2.5-flash-preview-05-20", # Added - "openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro - "openrouter/deepseek/deepseek-chat-v3-0324", - # "openai/gpt-4", - "anthropic/claude-3-7-sonnet-latest", - "anthropic/claude-3-5-sonnet-latest", - "anthropic/claude-sonnet-4-20250514", - "anthropic/claude-3-5-haiku-latest", - "openai/gpt-4.1", - "openai/gpt-4.1-mini", - # "openrouter/deepseek/deepseek-r1", - "openrouter/qwen/qwen3-235b-a22b", - ], - "tier_50_400": [ - "openrouter/deepseek/deepseek-chat", - # "xai/grok-3-mini-fast-beta", - "openai/gpt-4o", - # "openai/gpt-4-turbo", - # "xai/grok-3-fast-latest", - "openrouter/google/gemini-2.5-flash-preview-05-20", # Added - "openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro - # "openai/gpt-4", - "anthropic/claude-3-7-sonnet-latest", - "anthropic/claude-3-5-sonnet-latest", - "anthropic/claude-sonnet-4-20250514", - "anthropic/claude-3-5-haiku-latest", - "openrouter/deepseek/deepseek-chat-v3-0324", - "openai/gpt-4.1", - "openai/gpt-4.1-mini", - # "openrouter/deepseek/deepseek-r1", - "openrouter/qwen/qwen3-235b-a22b", - ], - "tier_125_800": [ - "openrouter/deepseek/deepseek-chat", - # "xai/grok-3-mini-fast-beta", - "openai/gpt-4o", - # "openai/gpt-4-turbo", - # "xai/grok-3-fast-latest", - "openrouter/google/gemini-2.5-flash-preview-05-20", # Added - "openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro - # "openai/gpt-4", - "anthropic/claude-3-7-sonnet-latest", - "anthropic/claude-3-5-sonnet-latest", - "anthropic/claude-3-5-haiku-latest", - "anthropic/claude-sonnet-4-20250514", - "openrouter/deepseek/deepseek-chat-v3-0324", - "openai/gpt-4.1", - "openai/gpt-4.1-mini", - # "openrouter/deepseek/deepseek-r1", - "openrouter/qwen/qwen3-235b-a22b", - ], - "tier_200_1000": [ - "openrouter/deepseek/deepseek-chat", - # "xai/grok-3-mini-fast-beta", - "openai/gpt-4o", - # "openai/gpt-4-turbo", - # "xai/grok-3-fast-latest", - "openrouter/google/gemini-2.5-flash-preview-05-20", # Added - "openrouter/google/gemini-2.5-pro", # Added Gemini 2.5 Pro - # "openai/gpt-4", - "anthropic/claude-3-7-sonnet-latest", - "anthropic/claude-3-5-sonnet-latest", - "anthropic/claude-3-5-haiku-latest", - "anthropic/claude-sonnet-4-20250514", - "openrouter/deepseek/deepseek-chat-v3-0324", - "openai/gpt-4.1", - "openai/gpt-4.1-mini", - # "openrouter/deepseek/deepseek-r1", - "openrouter/qwen/qwen3-235b-a22b", - ], + "openrouter/deepseek/deepseek-chat": { + "aliases": ["deepseek"], + "pricing": { + "input_cost_per_million_tokens": 0.38, + "output_cost_per_million_tokens": 0.89 + }, + "tier_availability": ["free", "paid"] + }, + "openrouter/qwen/qwen3-235b-a22b": { + "aliases": ["qwen3"], + "pricing": { + "input_cost_per_million_tokens": 0.13, + "output_cost_per_million_tokens": 0.60 + }, + "tier_availability": ["free", "paid"] + }, + "openrouter/google/gemini-2.5-flash-preview-05-20": { + "aliases": ["gemini-flash-2.5"], + "pricing": { + "input_cost_per_million_tokens": 0.15, + "output_cost_per_million_tokens": 0.60 + }, + "tier_availability": ["free", "paid"] + }, + + # Paid tier only models + "openrouter/deepseek/deepseek-chat-v3-0324": { + "aliases": ["deepseek/deepseek-chat-v3-0324"], + "pricing": { + "input_cost_per_million_tokens": 0.38, + "output_cost_per_million_tokens": 0.89 + }, + "tier_availability": ["paid"] + }, + "openrouter/google/gemini-2.5-pro": { + "aliases": ["google/gemini-2.5-pro"], + "pricing": { + "input_cost_per_million_tokens": 1.25, + "output_cost_per_million_tokens": 10.00 + }, + "tier_availability": ["paid"] + }, + "openai/gpt-4o": { + "aliases": ["gpt-4o"], + "pricing": { + "input_cost_per_million_tokens": 2.50, + "output_cost_per_million_tokens": 10.00 + }, + "tier_availability": ["paid"] + }, + "openai/gpt-4.1": { + "aliases": ["gpt-4.1"], + "pricing": { + "input_cost_per_million_tokens": 15.00, + "output_cost_per_million_tokens": 60.00 + }, + "tier_availability": ["paid"] + }, + "openai/gpt-4.1-mini": { + "aliases": ["gpt-4.1-mini"], + "pricing": { + "input_cost_per_million_tokens": 1.50, + "output_cost_per_million_tokens": 6.00 + }, + "tier_availability": ["paid"] + }, + "anthropic/claude-3-7-sonnet-latest": { + "aliases": ["sonnet-3.7"], + "pricing": { + "input_cost_per_million_tokens": 3.00, + "output_cost_per_million_tokens": 15.00 + }, + "tier_availability": ["paid"] + }, + "anthropic/claude-3-5-sonnet-latest": { + "aliases": ["sonnet-3.5"], + "pricing": { + "input_cost_per_million_tokens": 3.00, + "output_cost_per_million_tokens": 15.00 + }, + "tier_availability": ["paid"] + }, + + "openrouter/x-ai/grok-4": { + "aliases": ["grok-4"], + "pricing": { + "input_cost_per_million_tokens": 5.00, + "output_cost_per_million_tokens": 15.00 + }, + "tier_availability": ["paid"] + }, + } -MODEL_NAME_ALIASES = { - # Short names to full names - "sonnet-3.7": "anthropic/claude-3-7-sonnet-latest", - "sonnet-3.5": "anthropic/claude-3-5-sonnet-latest", - "haiku-3.5": "anthropic/claude-3-5-haiku-latest", - "claude-sonnet-4": "anthropic/claude-sonnet-4-20250514", - # "gpt-4.1": "openai/gpt-4.1-2025-04-14", # Commented out in constants.py - "gpt-4o": "openai/gpt-4o", - "gpt-4.1": "openai/gpt-4.1", - "gpt-4.1-mini": "openai/gpt-4.1-mini", - # "gpt-4-turbo": "openai/gpt-4-turbo", # Commented out in constants.py - # "gpt-4": "openai/gpt-4", # Commented out in constants.py - # "gemini-flash-2.5": "openrouter/google/gemini-2.5-flash-preview", # Commented out in constants.py - # "grok-3": "xai/grok-3-fast-latest", # Commented out in constants.py - "deepseek": "openrouter/deepseek/deepseek-chat", - # "deepseek-r1": "openrouter/deepseek/deepseek-r1", - # "grok-3-mini": "xai/grok-3-mini-fast-beta", # Commented out in constants.py - "qwen3": "openrouter/qwen/qwen3-235b-a22b", # Commented out in constants.py - "gemini-flash-2.5": "openrouter/google/gemini-2.5-flash-preview-05-20", - "gemini-2.5-flash:thinking": "openrouter/google/gemini-2.5-flash-preview-05-20:thinking", - # "google/gemini-2.5-flash-preview":"openrouter/google/gemini-2.5-flash-preview", - # "google/gemini-2.5-flash-preview:thinking":"openrouter/google/gemini-2.5-flash-preview:thinking", - "google/gemini-2.5-pro": "openrouter/google/gemini-2.5-pro", - "deepseek/deepseek-chat-v3-0324": "openrouter/deepseek/deepseek-chat-v3-0324", - # Also include full names as keys to ensure they map to themselves - # "anthropic/claude-3-7-sonnet-latest": "anthropic/claude-3-7-sonnet-latest", - # "openai/gpt-4.1-2025-04-14": "openai/gpt-4.1-2025-04-14", # Commented out in constants.py - # "openai/gpt-4o": "openai/gpt-4o", - # "openai/gpt-4-turbo": "openai/gpt-4-turbo", # Commented out in constants.py - # "openai/gpt-4": "openai/gpt-4", # Commented out in constants.py - # "openrouter/google/gemini-2.5-flash-preview": "openrouter/google/gemini-2.5-flash-preview", # Commented out in constants.py - # "xai/grok-3-fast-latest": "xai/grok-3-fast-latest", # Commented out in constants.py - # "deepseek/deepseek-chat": "openrouter/deepseek/deepseek-chat", - # "deepseek/deepseek-r1": "openrouter/deepseek/deepseek-r1", - # "qwen/qwen3-235b-a22b": "openrouter/qwen/qwen3-235b-a22b", - # "xai/grok-3-mini-fast-beta": "xai/grok-3-mini-fast-beta", # Commented out in constants.py + +# Derived structures (auto-generated from MODELS) +def _generate_model_structures(): + """Generate all model structures from the master MODELS dictionary.""" + + # Generate tier lists + free_models = [] + paid_models = [] + + # Generate aliases + aliases = {} + + # Generate pricing + pricing = {} + + for model_name, config in MODELS.items(): + # Add to tier lists + if "free" in config["tier_availability"]: + free_models.append(model_name) + if "paid" in config["tier_availability"]: + paid_models.append(model_name) + + # Add aliases + for alias in config["aliases"]: + aliases[alias] = model_name + + # Add pricing + pricing[model_name] = config["pricing"] + + # Also add pricing for legacy model name variations + if model_name.startswith("openrouter/deepseek/"): + legacy_name = model_name.replace("openrouter/", "") + pricing[legacy_name] = config["pricing"] + elif model_name.startswith("openrouter/qwen/"): + legacy_name = model_name.replace("openrouter/", "") + pricing[legacy_name] = config["pricing"] + elif model_name.startswith("openrouter/google/"): + legacy_name = model_name.replace("openrouter/", "") + pricing[legacy_name] = config["pricing"] + elif model_name.startswith("anthropic/"): + # Add anthropic/claude-sonnet-4 alias for claude-sonnet-4-20250514 + if "claude-sonnet-4-20250514" in model_name: + pricing["anthropic/claude-sonnet-4"] = config["pricing"] + + return free_models, paid_models, aliases, pricing + +# Generate all structures +FREE_TIER_MODELS, PAID_TIER_MODELS, MODEL_NAME_ALIASES, HARDCODED_MODEL_PRICES = _generate_model_structures() + +MODEL_ACCESS_TIERS = { + "free": FREE_TIER_MODELS, + "tier_2_20": PAID_TIER_MODELS, + "tier_6_50": PAID_TIER_MODELS, + "tier_12_100": PAID_TIER_MODELS, + "tier_25_200": PAID_TIER_MODELS, + "tier_50_400": PAID_TIER_MODELS, + "tier_125_800": PAID_TIER_MODELS, + "tier_200_1000": PAID_TIER_MODELS, } diff --git a/frontend/src/components/thread/chat-input/_use-model-selection.ts b/frontend/src/components/thread/chat-input/_use-model-selection.ts index 7d06cf8e..7687e1ed 100644 --- a/frontend/src/components/thread/chat-input/_use-model-selection.ts +++ b/frontend/src/components/thread/chat-input/_use-model-selection.ts @@ -28,139 +28,78 @@ export interface CustomModel { label: string; } -// SINGLE SOURCE OF TRUTH for all model data +// SINGLE SOURCE OF TRUTH for all model data - aligned with backend constants export const MODELS = { - // Premium high-priority models + // Free tier models (available to all users) 'claude-sonnet-4': { tier: 'free', priority: 100, recommended: true, - lowQuality: false, - description: 'Claude Sonnet 4 - Anthropic\'s latest and most advanced AI assistant' + lowQuality: false + }, + + 'gemini-flash-2.5': { + tier: 'free', + priority: 70, + recommended: false, + lowQuality: false + }, + 'qwen3': { + tier: 'free', + priority: 60, + recommended: false, + lowQuality: false + }, + + // Premium/Paid tier models (require subscription) + 'sonnet-3.7': { + tier: 'premium', + priority: 99, + recommended: false, + lowQuality: false + }, + 'grok-4': { + tier: 'premium', + priority: 98, + recommended: false, + lowQuality: false }, 'google/gemini-2.5-pro': { tier: 'premium', - priority: 100, + priority: 97, recommended: false, - lowQuality: false, - description: 'Gemini Pro 2.5 - Google\'s latest advanced model' - }, - 'sonnet-3.7': { - tier: 'premium', - priority: 95, - recommended: false, - lowQuality: false, - description: 'Claude 3.7 - Anthropic\'s most powerful AI assistant' - }, - 'claude-sonnet-3.7-reasoning': { - tier: 'premium', - priority: 95, - recommended: true, - lowQuality: false, - description: 'Claude 3.7 with enhanced reasoning capabilities' + lowQuality: false }, 'gpt-4.1': { tier: 'premium', - priority: 95, + priority: 96, recommended: false, - lowQuality: false, - description: 'GPT-4.1 - OpenAI\'s most advanced model with enhanced reasoning' + lowQuality: false }, - 'claude-3.5': { - tier: 'premium', - priority: 90, - recommended: true, - lowQuality: false, - description: 'Claude 3.5 - Anthropic\'s balanced model with solid capabilities' - }, - 'gemini-2.5-flash:thinking': { + 'sonnet-3.5': { tier: 'premium', priority: 90, recommended: false, - lowQuality: false, - description: 'Gemini Flash 2.5 - Google\'s fast, responsive AI model' + lowQuality: false }, 'gpt-4o': { tier: 'premium', - priority: 85, + priority: 88, recommended: false, - lowQuality: false, - description: 'GPT-4o - Optimized for speed, reliability, and cost-effectiveness' + lowQuality: false }, - 'gpt-4-turbo': { + 'gemini-2.5-flash:thinking': { tier: 'premium', - priority: 85, + priority: 84, recommended: false, - lowQuality: false, - description: 'GPT-4 Turbo - OpenAI\'s powerful model with a great balance of performance and cost' - }, - 'gpt-4': { - tier: 'premium', - priority: 80, - recommended: false, - lowQuality: false, - description: 'GPT-4 - OpenAI\'s highly capable model with advanced reasoning' + lowQuality: false }, 'deepseek/deepseek-chat-v3-0324': { tier: 'premium', priority: 75, recommended: false, - lowQuality: false, - description: 'DeepSeek Chat - Advanced AI assistant with strong reasoning' + lowQuality: false }, - - // Free tier models - 'deepseek-r1': { - tier: 'free', - priority: 60, - recommended: false, - lowQuality: false, - description: 'DeepSeek R1 - Advanced model with enhanced reasoning and coding capabilities' - }, - 'deepseek': { - tier: 'free', - priority: 50, - recommended: false, - lowQuality: true, - description: 'DeepSeek - Free tier model with good general capabilities' - }, - 'gemini-flash-2.5': { - tier: 'free', - priority: 50, - recommended: false, - lowQuality: true, - description: 'Gemini Flash - Google\'s faster, more efficient model' - }, - 'grok-3-mini': { - tier: 'free', - priority: 45, - recommended: false, - lowQuality: true, - description: 'Grok-3 Mini - Smaller, faster version of Grok-3 for simpler tasks' - }, - 'qwen3': { - tier: 'free', - priority: 40, - recommended: false, - lowQuality: true, - description: 'Qwen3 - Alibaba\'s powerful multilingual language model' - }, -}; - -// Model tier definitions -export const MODEL_TIERS = { - premium: { - requiresSubscription: true, - baseDescription: 'Advanced model with superior capabilities' - }, - free: { - requiresSubscription: false, - baseDescription: 'Available to all users' - }, - custom: { - requiresSubscription: false, - baseDescription: 'User-defined model' - } }; // Helper to check if a user can access a model based on subscription status @@ -224,6 +163,7 @@ const saveModelPreference = (modelId: string): void => { export const useModelSelection = () => { const [selectedModel, setSelectedModel] = useState(DEFAULT_FREE_MODEL_ID); const [customModels, setCustomModels] = useState([]); + const [hasInitialized, setHasInitialized] = useState(false); const { data: subscriptionData } = useSubscription(); const { data: modelsData, isLoading: isLoadingModels } = useAvailableModels({ @@ -258,14 +198,12 @@ export const useModelSelection = () => { id: DEFAULT_FREE_MODEL_ID, label: 'DeepSeek', requiresSubscription: false, - description: MODELS[DEFAULT_FREE_MODEL_ID]?.description || MODEL_TIERS.free.baseDescription, priority: MODELS[DEFAULT_FREE_MODEL_ID]?.priority || 50 }, { id: DEFAULT_PREMIUM_MODEL_ID, - label: 'Claude Sonnet 4', + label: 'Sonnet 4', requiresSubscription: true, - description: MODELS[DEFAULT_PREMIUM_MODEL_ID]?.description || MODEL_TIERS.premium.baseDescription, priority: MODELS[DEFAULT_PREMIUM_MODEL_ID]?.priority || 100 }, ]; @@ -295,8 +233,6 @@ export const useModelSelection = () => { id: shortName, label: cleanLabel, requiresSubscription: isPremium, - description: modelData.description || - (isPremium ? MODEL_TIERS.premium.baseDescription : MODEL_TIERS.free.baseDescription), top: modelData.priority >= 90, // Mark high-priority models as "top" priority: modelData.priority || 0, lowQuality: modelData.lowQuality || false, @@ -311,7 +247,6 @@ export const useModelSelection = () => { id: model.id, label: model.label || formatModelName(model.id), requiresSubscription: false, - description: MODEL_TIERS.custom.baseDescription, top: false, isCustom: true, priority: 30, // Low priority by default @@ -323,13 +258,13 @@ export const useModelSelection = () => { } // Sort models consistently in one place: - // 1. First by free/premium (free first) + // 1. First by recommended (recommended first) // 2. Then by priority (higher first) // 3. Finally by name (alphabetical) const sortedModels = models.sort((a, b) => { - // First by free/premium status - if (a.requiresSubscription !== b.requiresSubscription) { - return a.requiresSubscription ? -1 : 1; + // First by recommended status + if (a.recommended !== b.recommended) { + return a.recommended ? -1 : 1; } // Then by priority (higher first) @@ -352,66 +287,64 @@ export const useModelSelection = () => { ); }, [MODEL_OPTIONS, subscriptionStatus]); - // Initialize selected model from localStorage or defaults + // Initialize selected model from localStorage ONLY ONCE useEffect(() => { - if (typeof window === 'undefined') return; + if (typeof window === 'undefined' || hasInitialized) return; + + console.log('Initializing model selection from localStorage...'); try { const savedModel = localStorage.getItem(STORAGE_KEY_MODEL); + console.log('Saved model from localStorage:', savedModel); - // Local mode - allow any model - if (isLocalMode()) { - if (savedModel && MODEL_OPTIONS.find(option => option.id === savedModel)) { - setSelectedModel(savedModel); - } else { - setSelectedModel(DEFAULT_PREMIUM_MODEL_ID); - saveModelPreference(DEFAULT_PREMIUM_MODEL_ID); - } - return; - } - - // Premium subscription - ALWAYS use premium model - if (subscriptionStatus === 'active') { - // If they had a premium model saved and it's still valid, use it - const hasSavedPremiumModel = savedModel && - MODEL_OPTIONS.find(option => - option.id === savedModel && - option.requiresSubscription && - canAccessModel(subscriptionStatus, true) - ); - - // Otherwise use the default premium model - if (hasSavedPremiumModel) { - setSelectedModel(savedModel!); - } else { - setSelectedModel(DEFAULT_PREMIUM_MODEL_ID); - saveModelPreference(DEFAULT_PREMIUM_MODEL_ID); - } - return; - } - - // No subscription - use saved model if accessible (free tier), or default free + // If we have a saved model, validate it's still available and accessible if (savedModel) { - const modelOption = MODEL_OPTIONS.find(option => option.id === savedModel); - if (modelOption && canAccessModel(subscriptionStatus, modelOption.requiresSubscription)) { - setSelectedModel(savedModel); - } else { - setSelectedModel(DEFAULT_FREE_MODEL_ID); - saveModelPreference(DEFAULT_FREE_MODEL_ID); + // Wait for models to load before validating + if (isLoadingModels) { + console.log('Models still loading, waiting...'); + return; + } + + const modelOption = MODEL_OPTIONS.find(option => option.id === savedModel); + const isCustomModel = isLocalMode() && customModels.some(model => model.id === savedModel); + + // Check if saved model is still valid and accessible + if (modelOption || isCustomModel) { + const isAccessible = isLocalMode() || + canAccessModel(subscriptionStatus, modelOption?.requiresSubscription ?? false); + + if (isAccessible) { + console.log('Using saved model:', savedModel); + setSelectedModel(savedModel); + setHasInitialized(true); + return; + } else { + console.log('Saved model not accessible, falling back to default'); + } + } else { + console.log('Saved model not found in available models, falling back to default'); } - } else { - setSelectedModel(DEFAULT_FREE_MODEL_ID); - saveModelPreference(DEFAULT_FREE_MODEL_ID); } + + // Fallback to default model + const defaultModel = subscriptionStatus === 'active' ? DEFAULT_PREMIUM_MODEL_ID : DEFAULT_FREE_MODEL_ID; + console.log('Using default model:', defaultModel); + setSelectedModel(defaultModel); + saveModelPreference(defaultModel); + setHasInitialized(true); + } catch (error) { console.warn('Failed to load preferences from localStorage:', error); - setSelectedModel(DEFAULT_FREE_MODEL_ID); + const defaultModel = subscriptionStatus === 'active' ? DEFAULT_PREMIUM_MODEL_ID : DEFAULT_FREE_MODEL_ID; + setSelectedModel(defaultModel); + saveModelPreference(defaultModel); + setHasInitialized(true); } - }, [subscriptionStatus, MODEL_OPTIONS]); + }, [subscriptionStatus, MODEL_OPTIONS, isLoadingModels, customModels, hasInitialized]); // Handle model selection change const handleModelChange = (modelId: string) => { - console.log('handleModelChange', modelId); + console.log('handleModelChange called with:', modelId); // Refresh custom models from localStorage to ensure we have the latest if (isLocalMode()) { @@ -441,7 +374,8 @@ export const useModelSelection = () => { console.warn('Model not accessible:', modelId); return; } - console.log('setting selected model', modelId); + + console.log('Setting selected model and saving to localStorage:', modelId); setSelectedModel(modelId); saveModelPreference(modelId); };