From 79b71db25045026b2d5bc2c26f138405df048079 Mon Sep 17 00:00:00 2001
From: sharath <29162020+tnfssc@users.noreply.github.com>
Date: Thu, 26 Jun 2025 15:18:37 +0000
Subject: [PATCH] feat(billing): implement hardcoded model pricing and enhance
cost calculation logic
---
backend/services/billing.py | 200 ++++++++++++------
.../src/components/billing/usage-logs.tsx | 7 +-
.../home/sections/pricing-section.tsx | 3 +-
3 files changed, 138 insertions(+), 72 deletions(-)
diff --git a/backend/services/billing.py b/backend/services/billing.py
index 1547fedf..dcbfa923 100644
--- a/backend/services/billing.py
+++ b/backend/services/billing.py
@@ -23,6 +23,49 @@ stripe.api_key = config.STRIPE_SECRET_KEY
# Initialize router
router = APIRouter(prefix="/billing", tags=["billing"])
+# Hardcoded pricing for specific models (prices per million tokens)
+HARDCODED_MODEL_PRICES = {
+ "openrouter/deepseek/deepseek-chat": {
+ "input_cost_per_million_tokens": 0.38,
+ "output_cost_per_million_tokens": 0.89
+ },
+ "deepseek/deepseek-chat": {
+ "input_cost_per_million_tokens": 0.38,
+ "output_cost_per_million_tokens": 0.89
+ },
+ "qwen/qwen3-235b-a22b": {
+ "input_cost_per_million_tokens": 0.13,
+ "output_cost_per_million_tokens": 0.60
+ },
+ "openrouter/qwen/qwen3-235b-a22b": {
+ "input_cost_per_million_tokens": 0.13,
+ "output_cost_per_million_tokens": 0.60
+ },
+ "google/gemini-2.5-flash-preview-05-20": {
+ "input_cost_per_million_tokens": 0.15,
+ "output_cost_per_million_tokens": 0.60
+ },
+ "openrouter/google/gemini-2.5-flash-preview-05-20": {
+ "input_cost_per_million_tokens": 0.15,
+ "output_cost_per_million_tokens": 0.60
+ }
+}
+
+def get_model_pricing(model: str) -> tuple[float, float] | None:
+ """
+ Get pricing for a model. Returns (input_cost_per_million, output_cost_per_million) or None.
+
+ Args:
+ model: The model name to get pricing for
+
+ Returns:
+ Tuple of (input_cost_per_million_tokens, output_cost_per_million_tokens) or None if not found
+ """
+ if model in HARDCODED_MODEL_PRICES:
+ pricing = HARDCODED_MODEL_PRICES[model]
+ return pricing["input_cost_per_million_tokens"], pricing["output_cost_per_million_tokens"]
+ return None
+
SUBSCRIPTION_TIERS = {
config.STRIPE_FREE_TIER_ID: {'name': 'free', 'minutes': 60, 'cost': 5},
@@ -209,20 +252,34 @@ async def calculate_monthly_usage(client, user_id: str) -> float:
if not token_messages.data:
return 0.0
- # Calculate total minutes
- total_prompt_tokens = 0
- total_completion_tokens = 0
+ # Calculate total cost per message (to handle different models correctly)
+ total_cost = 0.0
for run in token_messages.data:
prompt_tokens = run['content']['usage']['prompt_tokens']
completion_tokens = run['content']['usage']['completion_tokens']
model = run['content']['model']
- total_prompt_tokens += prompt_tokens
- total_completion_tokens += completion_tokens
+ # Check if we have hardcoded pricing for this model
+ hardcoded_pricing = get_model_pricing(model)
+ if hardcoded_pricing:
+ input_cost_per_million, output_cost_per_million = hardcoded_pricing
+ input_cost = (prompt_tokens / 1_000_000) * input_cost_per_million
+ output_cost = (completion_tokens / 1_000_000) * output_cost_per_million
+ message_cost = input_cost + output_cost
+ else:
+ # Use litellm pricing as fallback
+ try:
+ prompt_token_cost, completion_token_cost = cost_per_token(model, int(prompt_tokens), int(completion_tokens))
+ message_cost = prompt_token_cost + completion_token_cost
+ except Exception as e:
+ logger.warning(f"Could not get pricing for model {model}: {str(e)}, skipping message")
+ continue
+
+ total_cost += message_cost
- prompt_token_cost, completion_token_cost = cost_per_token(model, int(total_prompt_tokens), int(total_completion_tokens))
- total_cost = (prompt_token_cost + completion_token_cost) * 2 # Return total cost * 2
+ # Return total cost * 2 (as per original logic)
+ total_cost = total_cost * 2
logger.info(f"Total cost for user {user_id}: {total_cost}")
return total_cost
@@ -973,72 +1030,83 @@ async def get_available_models(
# Check if model is available with current subscription
is_available = model in allowed_models
- # Get pricing information from litellm using cost_per_token
+ # Get pricing information - check hardcoded prices first, then litellm
pricing_info = {}
- try:
- # Try to get pricing using cost_per_token function
- models_to_try = []
-
- # Add the original model name
- models_to_try.append(model)
-
- # Try to resolve the model name using MODEL_NAME_ALIASES
- if model in MODEL_NAME_ALIASES:
- resolved_model = MODEL_NAME_ALIASES[model]
- models_to_try.append(resolved_model)
- # Also try without provider prefix if it has one
- if '/' in resolved_model:
- models_to_try.append(resolved_model.split('/', 1)[1])
-
- # If model is a value in aliases, try to find a matching key
- for alias_key, alias_value in MODEL_NAME_ALIASES.items():
- if alias_value == model:
- models_to_try.append(alias_key)
- break
-
- # Also try without provider prefix for the original model
- if '/' in model:
- models_to_try.append(model.split('/', 1)[1])
-
- # Special handling for Google models accessed via OpenRouter
- if model.startswith('openrouter/google/'):
- google_model_name = model.replace('openrouter/', '')
- models_to_try.append(google_model_name)
-
- # Try each model name variation until we find one that works
- input_cost_per_token = None
- output_cost_per_token = None
-
- for model_name in models_to_try:
- try:
- # Use cost_per_token with sample token counts to get the per-token costs
- input_cost, output_cost = cost_per_token(model_name, 1000000, 1000000)
- if input_cost is not None and output_cost is not None:
- input_cost_per_token = input_cost
- output_cost_per_token = output_cost
+
+ # Check if we have hardcoded pricing for this model
+ hardcoded_pricing = get_model_pricing(model)
+ if hardcoded_pricing:
+ input_cost_per_million, output_cost_per_million = hardcoded_pricing
+ pricing_info = {
+ "input_cost_per_million_tokens": input_cost_per_million,
+ "output_cost_per_million_tokens": output_cost_per_million,
+ "max_tokens": None
+ }
+ else:
+ try:
+ # Try to get pricing using cost_per_token function
+ models_to_try = []
+
+ # Add the original model name
+ models_to_try.append(model)
+
+ # Try to resolve the model name using MODEL_NAME_ALIASES
+ if model in MODEL_NAME_ALIASES:
+ resolved_model = MODEL_NAME_ALIASES[model]
+ models_to_try.append(resolved_model)
+ # Also try without provider prefix if it has one
+ if '/' in resolved_model:
+ models_to_try.append(resolved_model.split('/', 1)[1])
+
+ # If model is a value in aliases, try to find a matching key
+ for alias_key, alias_value in MODEL_NAME_ALIASES.items():
+ if alias_value == model:
+ models_to_try.append(alias_key)
break
- except Exception:
- continue
-
- if input_cost_per_token is not None and output_cost_per_token is not None:
- pricing_info = {
- "input_cost_per_million_tokens": round(input_cost_per_token * 2, 2),
- "output_cost_per_million_tokens": round(output_cost_per_token * 2, 2),
- "max_tokens": None # cost_per_token doesn't provide max_tokens info
- }
- else:
+
+ # Also try without provider prefix for the original model
+ if '/' in model:
+ models_to_try.append(model.split('/', 1)[1])
+
+ # Special handling for Google models accessed via OpenRouter
+ if model.startswith('openrouter/google/'):
+ google_model_name = model.replace('openrouter/', '')
+ models_to_try.append(google_model_name)
+
+ # Try each model name variation until we find one that works
+ input_cost_per_token = None
+ output_cost_per_token = None
+
+ for model_name in models_to_try:
+ try:
+ # Use cost_per_token with sample token counts to get the per-token costs
+ input_cost, output_cost = cost_per_token(model_name, 1000000, 1000000)
+ if input_cost is not None and output_cost is not None:
+ input_cost_per_token = input_cost
+ output_cost_per_token = output_cost
+ break
+ except Exception:
+ continue
+
+ if input_cost_per_token is not None and output_cost_per_token is not None:
+ pricing_info = {
+ "input_cost_per_million_tokens": round(input_cost_per_token * 2, 2),
+ "output_cost_per_million_tokens": round(output_cost_per_token * 2, 2),
+ "max_tokens": None # cost_per_token doesn't provide max_tokens info
+ }
+ else:
+ pricing_info = {
+ "input_cost_per_million_tokens": None,
+ "output_cost_per_million_tokens": None,
+ "max_tokens": None
+ }
+ except Exception as e:
+ logger.warning(f"Could not get pricing for model {model}: {str(e)}")
pricing_info = {
"input_cost_per_million_tokens": None,
"output_cost_per_million_tokens": None,
"max_tokens": None
}
- except Exception as e:
- logger.warning(f"Could not get pricing for model {model}: {str(e)}")
- pricing_info = {
- "input_cost_per_million_tokens": None,
- "output_cost_per_million_tokens": None,
- "max_tokens": None
- }
model_info.append({
"id": model,
diff --git a/frontend/src/components/billing/usage-logs.tsx b/frontend/src/components/billing/usage-logs.tsx
index 0caf3680..032ebd1b 100644
--- a/frontend/src/components/billing/usage-logs.tsx
+++ b/frontend/src/components/billing/usage-logs.tsx
@@ -491,7 +491,7 @@ export default function UsageLogs({ accountId }: Props) {
Time
Model
- Total
+ Tokens
Cost
@@ -508,10 +508,7 @@ export default function UsageLogs({ accountId }: Props) {
).toLocaleTimeString()}
-
+
{log.content.model}
diff --git a/frontend/src/components/home/sections/pricing-section.tsx b/frontend/src/components/home/sections/pricing-section.tsx
index dbb7e0be..8c2d43c9 100644
--- a/frontend/src/components/home/sections/pricing-section.tsx
+++ b/frontend/src/components/home/sections/pricing-section.tsx
@@ -548,7 +548,7 @@ export function PricingSection({
const [deploymentType, setDeploymentType] = useState<'cloud' | 'self-hosted'>(
'cloud',
);
- const { data: subscriptionData, isLoading: isFetchingPlan, error: subscriptionQueryError } = useSubscription();
+ const { data: subscriptionData, isLoading: isFetchingPlan, error: subscriptionQueryError, refetch: refetchSubscription } = useSubscription();
// Derive authentication and subscription status from the hook data
const isAuthenticated = !!subscriptionData && subscriptionQueryError === null;
@@ -592,6 +592,7 @@ export function PricingSection({
};
const handleSubscriptionUpdate = () => {
+ refetchSubscription();
// The useSubscription hook will automatically refetch, so we just need to clear loading states
setTimeout(() => {
setPlanLoadingStates({});