2025-05-05 08:20:45 +08:00
#!/usr/bin/env python
"""
Script to archive sandboxes for projects that are older than 1 day .
Usage :
python archive_old_sandboxes . py [ - - days N ] [ - - dry - run ]
This script :
1. Gets all projects from the projects table
2. Filters projects created more than N days ago ( default : 1 day )
3. Archives the sandboxes for those projects
Make sure your environment variables are properly set :
- SUPABASE_URL
- SUPABASE_SERVICE_ROLE_KEY
- DAYTONA_SERVER_URL
"""
2025-05-08 08:00:50 +08:00
# TODO: SAVE THE LATEST SANDBOX STATE SOMEWHERE OR LIKE MASS CHECK THE STATE BEFORE STARTING TO ARCHIVE - AS ITS GOING TO GO OVER A BUNCH THAT ARE ALREADY ARCHIVED – MAYBE BEST TO GET ALL FROM DAYTONA AND THEN RUN THE ARCHIVE ONLY ON THE ONES THAT MEET THE CRITERIA (STOPPED STATE)
2025-05-05 08:20:45 +08:00
import asyncio
import sys
import os
import argparse
from typing import List , Dict , Any
from datetime import datetime , timedelta
from dotenv import load_dotenv
# Load script-specific environment variables
load_dotenv ( " .env " )
from services . supabase import DBConnection
from sandbox . sandbox import daytona
from utils . logger import logger
# Global DB connection to reuse
db_connection = None
async def get_old_projects ( days_threshold : int = 1 ) - > List [ Dict [ str , Any ] ] :
"""
Query all projects created more than N days ago .
Args :
days_threshold : Number of days threshold ( default : 1 )
Returns :
List of projects with their sandbox information
"""
global db_connection
if db_connection is None :
db_connection = DBConnection ( )
client = await db_connection . client
# Print the Supabase URL being used
print ( f " Using Supabase URL: { os . getenv ( ' SUPABASE_URL ' ) } " )
# Calculate the date threshold
threshold_date = ( datetime . now ( ) - timedelta ( days = days_threshold ) ) . isoformat ( )
# Initialize variables for pagination
all_projects = [ ]
page_size = 1000
current_page = 0
has_more = True
logger . info ( f " Starting to fetch projects older than { days_threshold } day(s) " )
print ( f " Looking for projects created before: { threshold_date } " )
# Paginate through all projects
while has_more :
# Query projects with pagination
start_range = current_page * page_size
end_range = start_range + page_size - 1
logger . info ( f " Fetching projects page { current_page + 1 } (range: { start_range } - { end_range } ) " )
try :
result = await client . table ( ' projects ' ) . select (
' project_id ' ,
' name ' ,
' created_at ' ,
' account_id ' ,
' sandbox '
2025-05-08 08:00:50 +08:00
) . order ( ' created_at ' , desc = True ) . range ( start_range , end_range ) . execute ( )
2025-05-05 08:20:45 +08:00
# Debug info - print raw response
print ( f " Response data length: { len ( result . data ) } " )
if not result . data :
print ( " No more data returned from query, ending pagination " )
has_more = False
else :
# Print a sample project to see the actual data structure
if current_page == 0 and result . data :
print ( f " Sample project data: { result . data [ 0 ] } " )
all_projects . extend ( result . data )
current_page + = 1
# Progress update
logger . info ( f " Loaded { len ( all_projects ) } projects so far " )
print ( f " Loaded { len ( all_projects ) } projects so far... " )
# Check if we've reached the end - if we got fewer results than the page size
if len ( result . data ) < page_size :
print ( f " Got { len ( result . data ) } records which is less than page size { page_size } , ending pagination " )
has_more = False
else :
print ( f " Full page returned ( { len ( result . data ) } records), continuing to next page " )
except Exception as e :
logger . error ( f " Error during pagination: { str ( e ) } " )
print ( f " Error during pagination: { str ( e ) } " )
has_more = False # Stop on error
# Print the query result summary
total_projects = len ( all_projects )
print ( f " Found { total_projects } total projects in database " )
logger . info ( f " Total projects found in database: { total_projects } " )
if not all_projects :
logger . info ( " No projects found in database " )
return [ ]
# Filter projects that are older than the threshold and have sandbox information
old_projects_with_sandboxes = [
project for project in all_projects
if project . get ( ' created_at ' ) and project . get ( ' created_at ' ) < threshold_date
and project . get ( ' sandbox ' ) and project [ ' sandbox ' ] . get ( ' id ' )
]
logger . info ( f " Found { len ( old_projects_with_sandboxes ) } old projects with sandboxes " )
# Print a few sample old projects for debugging
if old_projects_with_sandboxes :
print ( " \n Sample of old projects with sandboxes: " )
for i , project in enumerate ( old_projects_with_sandboxes [ : 3 ] ) :
print ( f " { i + 1 } . { project . get ( ' name ' ) } (Created: { project . get ( ' created_at ' ) } ) " )
print ( f " Sandbox ID: { project [ ' sandbox ' ] . get ( ' id ' ) } " )
if i > = 2 :
break
return old_projects_with_sandboxes
async def archive_sandbox ( project : Dict [ str , Any ] , dry_run : bool ) - > bool :
"""
Archive a single sandbox .
Args :
project : Project information containing sandbox to archive
dry_run : If True , only simulate archiving
Returns :
True if successful , False otherwise
"""
sandbox_id = project [ ' sandbox ' ] . get ( ' id ' )
project_name = project . get ( ' name ' , ' Unknown ' )
project_id = project . get ( ' project_id ' , ' Unknown ' )
created_at = project . get ( ' created_at ' , ' Unknown ' )
try :
logger . info ( f " Checking sandbox { sandbox_id } for project ' { project_name } ' (ID: { project_id } , Created: { created_at } ) " )
if dry_run :
logger . info ( f " DRY RUN: Would archive sandbox { sandbox_id } " )
print ( f " Would archive sandbox { sandbox_id } for project ' { project_name } ' (Created: { created_at } ) " )
return True
# Get the sandbox
2025-06-15 06:18:30 +08:00
sandbox = daytona . get ( sandbox_id )
2025-05-05 08:20:45 +08:00
# Check sandbox state - it must be stopped before archiving
sandbox_info = sandbox . info ( )
# Log the current state
logger . info ( f " Sandbox { sandbox_id } is in ' { sandbox_info . state } ' state " )
# Only archive if the sandbox is in the stopped state
if sandbox_info . state == " stopped " :
logger . info ( f " Archiving sandbox { sandbox_id } as it is in stopped state " )
sandbox . archive ( )
logger . info ( f " Successfully archived sandbox { sandbox_id } " )
return True
else :
logger . info ( f " Skipping sandbox { sandbox_id } as it is not in stopped state (current: { sandbox_info . state } ) " )
return True
except Exception as e :
import traceback
error_type = type ( e ) . __name__
stack_trace = traceback . format_exc ( )
# Log detailed error information
logger . error ( f " Error processing sandbox { sandbox_id } : { str ( e ) } " )
logger . error ( f " Error type: { error_type } " )
logger . error ( f " Stack trace: \n { stack_trace } " )
# If the exception has a response attribute (like in HTTP errors), log it
if hasattr ( e , ' response ' ) :
try :
response_data = e . response . json ( ) if hasattr ( e . response , ' json ' ) else str ( e . response )
logger . error ( f " Response data: { response_data } " )
except Exception :
logger . error ( f " Could not parse response data from error " )
print ( f " Failed to process sandbox { sandbox_id } : { error_type } - { str ( e ) } " )
return False
async def process_sandboxes ( old_projects : List [ Dict [ str , Any ] ] , dry_run : bool ) - > tuple [ int , int ] :
"""
Process all sandboxes sequentially .
Args :
old_projects : List of projects older than the threshold
dry_run : Whether to actually archive sandboxes or just simulate
Returns :
Tuple of ( processed_count , failed_count )
"""
processed_count = 0
failed_count = 0
if dry_run :
logger . info ( f " DRY RUN: Would archive { len ( old_projects ) } sandboxes " )
else :
logger . info ( f " Archiving { len ( old_projects ) } sandboxes " )
print ( f " Processing { len ( old_projects ) } sandboxes... " )
# Process each sandbox sequentially
for i , project in enumerate ( old_projects ) :
success = await archive_sandbox ( project , dry_run )
if success :
processed_count + = 1
else :
failed_count + = 1
# Print progress periodically
if ( i + 1 ) % 20 == 0 or ( i + 1 ) == len ( old_projects ) :
progress = ( i + 1 ) / len ( old_projects ) * 100
print ( f " Progress: { i + 1 } / { len ( old_projects ) } sandboxes processed ( { progress : .1f } %) " )
print ( f " - Processed: { processed_count } , Failed: { failed_count } " )
return processed_count , failed_count
async def main ( ) :
""" Main function to run the script. """
# Parse command line arguments
parser = argparse . ArgumentParser ( description = ' Archive sandboxes for projects older than N days ' )
parser . add_argument ( ' --days ' , type = int , default = 1 , help = ' Age threshold in days (default: 1) ' )
parser . add_argument ( ' --dry-run ' , action = ' store_true ' , help = ' Show what would be archived without actually archiving ' )
args = parser . parse_args ( )
logger . info ( f " Starting sandbox cleanup for projects older than { args . days } day(s) " )
if args . dry_run :
logger . info ( " DRY RUN MODE - No sandboxes will be archived " )
# Print environment info
print ( f " Environment Mode: { os . getenv ( ' ENV_MODE ' , ' Not set ' ) } " )
print ( f " Daytona Server: { os . getenv ( ' DAYTONA_SERVER_URL ' , ' Not set ' ) } " )
try :
# Initialize global DB connection
global db_connection
db_connection = DBConnection ( )
# Get all projects older than the threshold
old_projects = await get_old_projects ( args . days )
if not old_projects :
logger . info ( f " No projects older than { args . days } day(s) with sandboxes to process " )
print ( f " No projects older than { args . days } day(s) with sandboxes to archive. " )
return
# Print summary of what will be processed
print ( " \n ===== SANDBOX CLEANUP SUMMARY ===== " )
print ( f " Projects older than { args . days } day(s): { len ( old_projects ) } " )
print ( f " Sandboxes that will be archived: { len ( old_projects ) } " )
print ( " =================================== " )
logger . info ( f " Found { len ( old_projects ) } projects older than { args . days } day(s) " )
# Ask for confirmation before proceeding
if not args . dry_run :
print ( " \n ⚠️ WARNING: You are about to archive sandboxes for old projects ⚠️ " )
print ( " This action cannot be undone! " )
confirmation = input ( " \n Are you sure you want to proceed with archiving? (TRUE/FALSE): " ) . strip ( ) . upper ( )
if confirmation != " TRUE " :
print ( " Archiving cancelled. Exiting script. " )
logger . info ( " Archiving cancelled by user " )
return
print ( " \n Proceeding with sandbox archiving... \n " )
logger . info ( " User confirmed sandbox archiving " )
# List a sample of projects to be processed
for i , project in enumerate ( old_projects [ : 5 ] ) : # Just show first 5 for brevity
created_at = project . get ( ' created_at ' , ' Unknown ' )
project_name = project . get ( ' name ' , ' Unknown ' )
project_id = project . get ( ' project_id ' , ' Unknown ' )
sandbox_id = project [ ' sandbox ' ] . get ( ' id ' )
print ( f " { i + 1 } . Project: { project_name } " )
print ( f " Project ID: { project_id } " )
print ( f " Created At: { created_at } " )
print ( f " Sandbox ID: { sandbox_id } " )
if len ( old_projects ) > 5 :
print ( f " ... and { len ( old_projects ) - 5 } more projects " )
# Process all sandboxes
processed_count , failed_count = await process_sandboxes ( old_projects , args . dry_run )
# Print final summary
print ( " \n Sandbox Cleanup Summary: " )
print ( f " Total projects older than { args . days } day(s): { len ( old_projects ) } " )
print ( f " Total sandboxes processed: { len ( old_projects ) } " )
if args . dry_run :
print ( f " DRY RUN: No sandboxes were actually archived " )
else :
print ( f " Successfully processed: { processed_count } " )
print ( f " Failed to process: { failed_count } " )
logger . info ( " Sandbox cleanup completed " )
except Exception as e :
logger . error ( f " Error during sandbox cleanup: { str ( e ) } " )
sys . exit ( 1 )
finally :
# Clean up database connection
if db_connection :
await DBConnection . disconnect ( )
if __name__ == " __main__ " :
asyncio . run ( main ( ) )