From 0507708fecd7417669022eed58f5571a11649cbb Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Mon, 5 May 2025 02:20:45 +0200 Subject: [PATCH] archive script --- backend/agent/run.py | 2 + .../utils/scripts/archive_old_sandboxes.py | 342 ++++++++++++++++++ 2 files changed, 344 insertions(+) create mode 100644 backend/utils/scripts/archive_old_sandboxes.py diff --git a/backend/agent/run.py b/backend/agent/run.py index 7e1dc6ce..e4ccad4d 100644 --- a/backend/agent/run.py +++ b/backend/agent/run.py @@ -173,6 +173,8 @@ async def run_agent( max_tokens = 64000 if "sonnet" in model_name.lower() else None + # model_name = "openrouter/qwen/qwen3-235b-a22b" + response = await thread_manager.run_thread( thread_id=thread_id, system_prompt=system_message, diff --git a/backend/utils/scripts/archive_old_sandboxes.py b/backend/utils/scripts/archive_old_sandboxes.py new file mode 100644 index 00000000..00573943 --- /dev/null +++ b/backend/utils/scripts/archive_old_sandboxes.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python +""" +Script to archive sandboxes for projects that are older than 1 day. + +Usage: + python archive_old_sandboxes.py [--days N] [--dry-run] + +This script: +1. Gets all projects from the projects table +2. Filters projects created more than N days ago (default: 1 day) +3. Archives the sandboxes for those projects + +Make sure your environment variables are properly set: +- SUPABASE_URL +- SUPABASE_SERVICE_ROLE_KEY +- DAYTONA_SERVER_URL +""" + +import asyncio +import sys +import os +import argparse +from typing import List, Dict, Any +from datetime import datetime, timedelta +from dotenv import load_dotenv + +# Load script-specific environment variables +load_dotenv(".env") + +from services.supabase import DBConnection +from sandbox.sandbox import daytona +from utils.logger import logger + +# Global DB connection to reuse +db_connection = None + + +async def get_old_projects(days_threshold: int = 1) -> List[Dict[str, Any]]: + """ + Query all projects created more than N days ago. + + Args: + days_threshold: Number of days threshold (default: 1) + + Returns: + List of projects with their sandbox information + """ + global db_connection + if db_connection is None: + db_connection = DBConnection() + + client = await db_connection.client + + # Print the Supabase URL being used + print(f"Using Supabase URL: {os.getenv('SUPABASE_URL')}") + + # Calculate the date threshold + threshold_date = (datetime.now() - timedelta(days=days_threshold)).isoformat() + + # Initialize variables for pagination + all_projects = [] + page_size = 1000 + current_page = 0 + has_more = True + + logger.info(f"Starting to fetch projects older than {days_threshold} day(s)") + print(f"Looking for projects created before: {threshold_date}") + + # Paginate through all projects + while has_more: + # Query projects with pagination + start_range = current_page * page_size + end_range = start_range + page_size - 1 + + logger.info(f"Fetching projects page {current_page+1} (range: {start_range}-{end_range})") + + try: + result = await client.table('projects').select( + 'project_id', + 'name', + 'created_at', + 'account_id', + 'sandbox' + ).range(start_range, end_range).execute() + + # Debug info - print raw response + print(f"Response data length: {len(result.data)}") + + if not result.data: + print("No more data returned from query, ending pagination") + has_more = False + else: + # Print a sample project to see the actual data structure + if current_page == 0 and result.data: + print(f"Sample project data: {result.data[0]}") + + all_projects.extend(result.data) + current_page += 1 + + # Progress update + logger.info(f"Loaded {len(all_projects)} projects so far") + print(f"Loaded {len(all_projects)} projects so far...") + + # Check if we've reached the end - if we got fewer results than the page size + if len(result.data) < page_size: + print(f"Got {len(result.data)} records which is less than page size {page_size}, ending pagination") + has_more = False + else: + print(f"Full page returned ({len(result.data)} records), continuing to next page") + + except Exception as e: + logger.error(f"Error during pagination: {str(e)}") + print(f"Error during pagination: {str(e)}") + has_more = False # Stop on error + + # Print the query result summary + total_projects = len(all_projects) + print(f"Found {total_projects} total projects in database") + logger.info(f"Total projects found in database: {total_projects}") + + if not all_projects: + logger.info("No projects found in database") + return [] + + # Filter projects that are older than the threshold and have sandbox information + old_projects_with_sandboxes = [ + project for project in all_projects + if project.get('created_at') and project.get('created_at') < threshold_date + and project.get('sandbox') and project['sandbox'].get('id') + ] + + logger.info(f"Found {len(old_projects_with_sandboxes)} old projects with sandboxes") + + # Print a few sample old projects for debugging + if old_projects_with_sandboxes: + print("\nSample of old projects with sandboxes:") + for i, project in enumerate(old_projects_with_sandboxes[:3]): + print(f" {i+1}. {project.get('name')} (Created: {project.get('created_at')})") + print(f" Sandbox ID: {project['sandbox'].get('id')}") + if i >= 2: + break + + return old_projects_with_sandboxes + + +async def archive_sandbox(project: Dict[str, Any], dry_run: bool) -> bool: + """ + Archive a single sandbox. + + Args: + project: Project information containing sandbox to archive + dry_run: If True, only simulate archiving + + Returns: + True if successful, False otherwise + """ + sandbox_id = project['sandbox'].get('id') + project_name = project.get('name', 'Unknown') + project_id = project.get('project_id', 'Unknown') + created_at = project.get('created_at', 'Unknown') + + try: + logger.info(f"Checking sandbox {sandbox_id} for project '{project_name}' (ID: {project_id}, Created: {created_at})") + + if dry_run: + logger.info(f"DRY RUN: Would archive sandbox {sandbox_id}") + print(f"Would archive sandbox {sandbox_id} for project '{project_name}' (Created: {created_at})") + return True + + # Get the sandbox + sandbox = daytona.get_current_sandbox(sandbox_id) + + # Check sandbox state - it must be stopped before archiving + sandbox_info = sandbox.info() + + # Log the current state + logger.info(f"Sandbox {sandbox_id} is in '{sandbox_info.state}' state") + + # Only archive if the sandbox is in the stopped state + if sandbox_info.state == "stopped": + logger.info(f"Archiving sandbox {sandbox_id} as it is in stopped state") + sandbox.archive() + logger.info(f"Successfully archived sandbox {sandbox_id}") + return True + else: + logger.info(f"Skipping sandbox {sandbox_id} as it is not in stopped state (current: {sandbox_info.state})") + return True + + except Exception as e: + import traceback + error_type = type(e).__name__ + stack_trace = traceback.format_exc() + + # Log detailed error information + logger.error(f"Error processing sandbox {sandbox_id}: {str(e)}") + logger.error(f"Error type: {error_type}") + logger.error(f"Stack trace:\n{stack_trace}") + + # If the exception has a response attribute (like in HTTP errors), log it + if hasattr(e, 'response'): + try: + response_data = e.response.json() if hasattr(e.response, 'json') else str(e.response) + logger.error(f"Response data: {response_data}") + except Exception: + logger.error(f"Could not parse response data from error") + + print(f"Failed to process sandbox {sandbox_id}: {error_type} - {str(e)}") + return False + + +async def process_sandboxes(old_projects: List[Dict[str, Any]], dry_run: bool) -> tuple[int, int]: + """ + Process all sandboxes sequentially. + + Args: + old_projects: List of projects older than the threshold + dry_run: Whether to actually archive sandboxes or just simulate + + Returns: + Tuple of (processed_count, failed_count) + """ + processed_count = 0 + failed_count = 0 + + if dry_run: + logger.info(f"DRY RUN: Would archive {len(old_projects)} sandboxes") + else: + logger.info(f"Archiving {len(old_projects)} sandboxes") + + print(f"Processing {len(old_projects)} sandboxes...") + + # Process each sandbox sequentially + for i, project in enumerate(old_projects): + success = await archive_sandbox(project, dry_run) + + if success: + processed_count += 1 + else: + failed_count += 1 + + # Print progress periodically + if (i + 1) % 20 == 0 or (i + 1) == len(old_projects): + progress = (i + 1) / len(old_projects) * 100 + print(f"Progress: {i + 1}/{len(old_projects)} sandboxes processed ({progress:.1f}%)") + print(f" - Processed: {processed_count}, Failed: {failed_count}") + + return processed_count, failed_count + + +async def main(): + """Main function to run the script.""" + # Parse command line arguments + parser = argparse.ArgumentParser(description='Archive sandboxes for projects older than N days') + parser.add_argument('--days', type=int, default=1, help='Age threshold in days (default: 1)') + parser.add_argument('--dry-run', action='store_true', help='Show what would be archived without actually archiving') + args = parser.parse_args() + + logger.info(f"Starting sandbox cleanup for projects older than {args.days} day(s)") + if args.dry_run: + logger.info("DRY RUN MODE - No sandboxes will be archived") + + # Print environment info + print(f"Environment Mode: {os.getenv('ENV_MODE', 'Not set')}") + print(f"Daytona Server: {os.getenv('DAYTONA_SERVER_URL', 'Not set')}") + + try: + # Initialize global DB connection + global db_connection + db_connection = DBConnection() + + # Get all projects older than the threshold + old_projects = await get_old_projects(args.days) + + if not old_projects: + logger.info(f"No projects older than {args.days} day(s) with sandboxes to process") + print(f"No projects older than {args.days} day(s) with sandboxes to archive.") + return + + # Print summary of what will be processed + print("\n===== SANDBOX CLEANUP SUMMARY =====") + print(f"Projects older than {args.days} day(s): {len(old_projects)}") + print(f"Sandboxes that will be archived: {len(old_projects)}") + print("===================================") + + logger.info(f"Found {len(old_projects)} projects older than {args.days} day(s)") + + # Ask for confirmation before proceeding + if not args.dry_run: + print("\n⚠️ WARNING: You are about to archive sandboxes for old projects ⚠️") + print("This action cannot be undone!") + confirmation = input("\nAre you sure you want to proceed with archiving? (TRUE/FALSE): ").strip().upper() + + if confirmation != "TRUE": + print("Archiving cancelled. Exiting script.") + logger.info("Archiving cancelled by user") + return + + print("\nProceeding with sandbox archiving...\n") + logger.info("User confirmed sandbox archiving") + + # List a sample of projects to be processed + for i, project in enumerate(old_projects[:5]): # Just show first 5 for brevity + created_at = project.get('created_at', 'Unknown') + project_name = project.get('name', 'Unknown') + project_id = project.get('project_id', 'Unknown') + sandbox_id = project['sandbox'].get('id') + + print(f"{i+1}. Project: {project_name}") + print(f" Project ID: {project_id}") + print(f" Created At: {created_at}") + print(f" Sandbox ID: {sandbox_id}") + + if len(old_projects) > 5: + print(f" ... and {len(old_projects) - 5} more projects") + + # Process all sandboxes + processed_count, failed_count = await process_sandboxes(old_projects, args.dry_run) + + # Print final summary + print("\nSandbox Cleanup Summary:") + print(f"Total projects older than {args.days} day(s): {len(old_projects)}") + print(f"Total sandboxes processed: {len(old_projects)}") + + if args.dry_run: + print(f"DRY RUN: No sandboxes were actually archived") + else: + print(f"Successfully processed: {processed_count}") + print(f"Failed to process: {failed_count}") + + logger.info("Sandbox cleanup completed") + + except Exception as e: + logger.error(f"Error during sandbox cleanup: {str(e)}") + sys.exit(1) + finally: + # Clean up database connection + if db_connection: + await DBConnection.disconnect() + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file