feat(usage): add monthly usage script for user activity tracking

This commit is contained in:
user 2025-06-25 14:12:01 +00:00
parent 201af90a1f
commit 4d4faca12a
2 changed files with 249 additions and 1 deletions

View File

@ -0,0 +1,248 @@
"""
Monthly Usage Script
This script calculates the monthly usage (in agent run minutes) for a specific user during a specific month.
Usage:
python backend/utils/scripts/get_monthly_usage.py --user-id <USER_ID> --year <YEAR> --month <MONTH> [--verbose]
Arguments:
--user-id The user ID to get usage for (required)
--year The year (e.g., 2024) (required)
--month The month (1-12) (required)
--verbose Enable verbose logging (optional)
Examples:
# Get usage for December 2024
python backend/utils/scripts/get_monthly_usage.py --user-id "user123" --year 2024 --month 12
# Get usage with verbose logging
python backend/utils/scripts/get_monthly_usage.py --user-id "user123" --year 2024 --month 11 --verbose
Output:
The script will output:
- User information (email and ID)
- Month and year
- Total usage in minutes and hours
- Average usage per day (if any usage exists)
Example output:
=== Monthly Usage Report ===
User: user@example.com (user123)
Month: December 2024
Total Usage: 150.45 minutes
Total Usage: 2.51 hours
Average per day: 5.02 minutes
Features:
- Validates agent runs to exclude invalid durations (>2 hours)
- Handles incomplete agent runs appropriately
- Provides detailed logging for debugging
- Calculates usage only for the specified month
- Shows average daily usage
Notes:
- The script requires access to the Supabase database
- Make sure the .env file is properly configured
- The script uses the same logic as the billing system for consistency
"""
import asyncio
import argparse
from datetime import datetime, timezone
from dotenv import load_dotenv
load_dotenv(".env")
from services.supabase import DBConnection
from utils.logger import logger
db_connection = None
db = None
async def get_db():
global db_connection, db
if db_connection is None or db is None:
db_connection = DBConnection()
db = await db_connection.client
return db
async def get_user(user_id: str):
"""Get user information by user ID."""
db = await get_db()
user = await db.auth.admin.get_user_by_id(user_id)
return user.user.model_dump()
async def calculate_monthly_usage(client, user_id: str, year: int, month: int):
"""Calculate total agent run minutes for a specific month for a user."""
# Get start and end of specified month in UTC
start_of_month = datetime(year, month, 1, tzinfo=timezone.utc)
# Calculate start of next month for end boundary
if month == 12:
end_of_month = datetime(year + 1, 1, 1, tzinfo=timezone.utc)
else:
end_of_month = datetime(year, month + 1, 1, tzinfo=timezone.utc)
# First get all threads for this user
threads_result = (
await client.table("threads")
.select("thread_id")
.eq("account_id", user_id)
.execute()
)
if not threads_result.data:
return 0.0, []
thread_ids = [t["thread_id"] for t in threads_result.data]
logger.info(f"Found {len(thread_ids)} threads for user {user_id}")
# Then get all agent runs for these threads in specified month
runs_result = (
await client.table("agent_runs")
.select("id, started_at, completed_at, thread_id")
.in_("thread_id", thread_ids)
.gte("started_at", start_of_month.isoformat())
.lt("started_at", end_of_month.isoformat())
.execute()
)
if not runs_result.data:
return 0.0, []
logger.info(f"Found {len(runs_result.data)} agent runs in {year}-{month:02d}")
# Calculate total minutes and collect run details
total_seconds = 0
valid_runs = 0
run_details = []
for run in runs_result.data:
start_time = datetime.fromisoformat(
run["started_at"].replace("Z", "+00:00")
).timestamp()
if run["completed_at"]:
end_time = datetime.fromisoformat(
run["completed_at"].replace("Z", "+00:00")
).timestamp()
# Skip runs that seem invalid (more than 2 hours)
if start_time < end_time - 7200:
logger.warning(f"Skipping run with duration > 2 hours: {run}")
continue
status = "completed"
else:
# For incomplete runs, use end of month as boundary if run started in that month
end_time = min(
end_of_month.timestamp(), datetime.now(timezone.utc).timestamp()
)
# Skip runs that started more than 1 hour ago and are still incomplete
if start_time < datetime.now(timezone.utc).timestamp() - 3600:
logger.warning(f"Skipping incomplete run started > 1 hour ago: {run}")
continue
status = "incomplete"
duration = end_time - start_time
total_seconds += duration
valid_runs += 1
# Store run details
run_details.append(
{
"id": run["id"],
"thread_id": run["thread_id"],
"started_at": run["started_at"],
"completed_at": run["completed_at"],
"duration_minutes": duration / 60,
"status": status,
}
)
logger.debug(f"Run duration: {duration/60:.2f} minutes")
logger.info(
f"Processed {valid_runs} valid runs out of {len(runs_result.data)} total runs"
)
# Sort runs by duration (longest first)
run_details.sort(key=lambda x: x["duration_minutes"], reverse=True)
return total_seconds / 60, run_details # Convert to minutes
async def main():
"""Main function to run the script."""
# Parse command line arguments
parser = argparse.ArgumentParser(
description="Get monthly usage for a specific user and month"
)
parser.add_argument(
"--user-id", type=str, help="User ID to get usage for", required=True
)
parser.add_argument("--year", type=int, help="Year (e.g., 2024)", required=True)
parser.add_argument("--month", type=int, help="Month (1-12)", required=True)
parser.add_argument(
"--verbose", "-v", action="store_true", help="Enable verbose logging"
)
args = parser.parse_args()
# Validate month
if args.month < 1 or args.month > 12:
raise ValueError("Month must be between 1 and 12")
try:
# Get user information
try:
user = await get_user(args.user_id)
logger.info(f"User: {user['id']} ({user['email']})")
except Exception as e:
logger.warning(f"Could not fetch user details: {e}")
user = {"id": args.user_id, "email": "unknown"}
# Get database connection
db = await get_db()
# Calculate monthly usage
usage_minutes, run_details = await calculate_monthly_usage(
db, args.user_id, args.year, args.month
)
# Display results
month_name = datetime(args.year, args.month, 1).strftime("%B")
print(f"\n=== Monthly Usage Report ===")
print(f"User: {user['email']} ({user['id']})")
print(f"Month: {month_name} {args.year}")
print(f"Total Usage: {usage_minutes:.2f} minutes")
print(f"Total Usage: {usage_minutes/60:.2f} hours")
if usage_minutes > 0:
print(f"Average per day: {usage_minutes/30:.2f} minutes")
# Display top 10 runs
if run_details:
print(f"\n=== Top Longest Runs ===")
for i, run in enumerate(run_details, 1):
started_at = datetime.fromisoformat(
run["started_at"].replace("Z", "+00:00")
)
print(
f"{i:2d}. {run['duration_minutes']:6.2f} min | {started_at.strftime('%Y-%m-%d %H:%M')} | {run['status']:10} | Thread: {run['thread_id']} | Run: {run['id']}"
)
else:
print("\nNo runs found for this period.")
except Exception as e:
logger.error(f"Error: {e}")
raise e
finally:
await DBConnection.disconnect()
if __name__ == "__main__":
asyncio.run(main())

View File

@ -5,4 +5,4 @@ python = "3.11.10"
uv = "0.6.5" uv = "0.6.5"
[env] [env]
_.python.venv = { path = "backend/.venv", create = true } PYTHONPATH = "{{config_root}}/backend"