mirror of https://github.com/kortix-ai/suna.git
504 lines
21 KiB
Python
504 lines
21 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
SUNA AGENT INSTALLER
|
||
|
||
Usage:
|
||
python suna_manager.py install # Install with default batch size (50)
|
||
python suna_manager.py install --batch-size 100 # Install with custom batch size
|
||
python suna_manager.py cleanup # Fix broken agents (agents without versions)
|
||
python suna_manager.py repair # Repair orphaned agents (agents with missing version records)
|
||
|
||
Recovery:
|
||
If interrupted, simply re-run the same command - it will skip completed users
|
||
and retry any failed or incomplete installations.
|
||
|
||
If you see discrepancies between agents and agent_versions tables, run:
|
||
python suna_manager.py cleanup
|
||
"""
|
||
|
||
import asyncio
|
||
import argparse
|
||
import sys
|
||
import json
|
||
import time
|
||
import signal
|
||
from pathlib import Path
|
||
|
||
backend_dir = Path(__file__).parent.parent.parent
|
||
sys.path.insert(0, str(backend_dir))
|
||
|
||
from agent.suna import SunaSyncService
|
||
from utils.logger import logger
|
||
|
||
# Global flag for graceful shutdown
|
||
shutdown_requested = False
|
||
|
||
def signal_handler(signum, frame):
|
||
global shutdown_requested
|
||
print_warning(f"\n🛑 Shutdown signal received ({signal.Signals(signum).name})")
|
||
print_info("Finishing current batch before shutdown...")
|
||
print_info("Re-run the same command to resume where you left off")
|
||
shutdown_requested = True
|
||
|
||
def print_success(message: str):
|
||
print(f"✅ {message}")
|
||
|
||
def print_error(message: str):
|
||
print(f"❌ {message}")
|
||
|
||
def print_info(message: str):
|
||
print(f"ℹ️ {message}")
|
||
|
||
def print_warning(message: str):
|
||
print(f"⚠️ {message}")
|
||
|
||
|
||
class SunaManagerCLI:
|
||
def __init__(self):
|
||
self.sync_service = SunaSyncService()
|
||
|
||
async def cleanup_command(self):
|
||
"""Clean up broken agents (agents without versions) caused by termination"""
|
||
print("🧹 Cleaning up broken Suna agents (agents without versions)")
|
||
|
||
try:
|
||
# Find broken agents
|
||
broken_agents = await self._find_broken_agents()
|
||
|
||
if not broken_agents:
|
||
print_success("No broken agents found! All agents have proper versions.")
|
||
return
|
||
|
||
print_warning(f"Found {len(broken_agents)} broken agents (agents without versions)")
|
||
for agent in broken_agents[:5]: # Show first 5
|
||
print_info(f" - Agent {agent['agent_id']} for user {agent['account_id']}")
|
||
|
||
if len(broken_agents) > 5:
|
||
print_info(f" ... and {len(broken_agents) - 5} more")
|
||
|
||
# Confirm cleanup
|
||
print_info("These broken agents will be deleted and recreated properly")
|
||
|
||
# Clean up broken agents
|
||
cleaned_count = 0
|
||
failed_count = 0
|
||
|
||
for agent in broken_agents:
|
||
try:
|
||
await self.sync_service.repository.delete_agent(agent['agent_id'])
|
||
cleaned_count += 1
|
||
logger.info(f"Cleaned up broken agent {agent['agent_id']} for user {agent['account_id']}")
|
||
except Exception as e:
|
||
failed_count += 1
|
||
logger.error(f"Failed to clean up agent {agent['agent_id']}: {e}")
|
||
|
||
print_success(f"Cleaned up {cleaned_count} broken agents")
|
||
if failed_count > 0:
|
||
print_warning(f"Failed to clean up {failed_count} agents")
|
||
|
||
print_info("💡 Now run: python suna_manager.py install")
|
||
print_info(" The install will recreate these users' agents properly")
|
||
|
||
except Exception as e:
|
||
print_error(f"Cleanup failed: {e}")
|
||
logger.error(f"Cleanup error: {e}")
|
||
|
||
async def _find_broken_agents(self):
|
||
"""Find Suna agents that don't have corresponding versions"""
|
||
try:
|
||
client = await self.sync_service.repository.db.client
|
||
|
||
# Manual query to find broken agents
|
||
agents_result = await client.table('agents').select(
|
||
'agent_id, account_id, current_version_id'
|
||
).eq('metadata->>is_suna_default', 'true').execute()
|
||
|
||
broken_agents = []
|
||
for agent in agents_result.data:
|
||
if not agent.get('current_version_id'):
|
||
# Agent has no current_version_id - definitely broken
|
||
broken_agents.append(agent)
|
||
else:
|
||
# Check if the version actually exists
|
||
version_result = await client.table('agent_versions').select(
|
||
'version_id'
|
||
).eq('version_id', agent['current_version_id']).execute()
|
||
|
||
if not version_result.data:
|
||
# Agent points to non-existent version - broken
|
||
broken_agents.append(agent)
|
||
|
||
return broken_agents
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to find broken agents: {e}")
|
||
raise
|
||
|
||
async def status_command(self):
|
||
"""Show status of Suna agent installation"""
|
||
print("📊 Suna Agent Installation Status")
|
||
|
||
try:
|
||
client = await self.sync_service.repository.db.client
|
||
|
||
# Count total personal accounts
|
||
accounts_result = await client.schema('basejump').table('accounts').select(
|
||
'id', count='exact'
|
||
).eq('personal_account', True).execute()
|
||
total_accounts = accounts_result.count or 0
|
||
|
||
# Count Suna agents
|
||
agents_result = await client.table('agents').select(
|
||
'agent_id', count='exact'
|
||
).eq('metadata->>is_suna_default', 'true').execute()
|
||
total_agents = agents_result.count or 0
|
||
|
||
# Count all agent versions (simpler approach)
|
||
all_versions_result = await client.table('agent_versions').select(
|
||
'version_id', count='exact'
|
||
).execute()
|
||
|
||
suna_agents_result = await client.table('agents').select(
|
||
'agent_id'
|
||
).eq('metadata->>is_suna_default', 'true').execute()
|
||
|
||
suna_agent_ids = [a['agent_id'] for a in (suna_agents_result.data or [])]
|
||
|
||
if suna_agent_ids:
|
||
suna_versions_result = await client.table('agent_versions').select(
|
||
'version_id', count='exact'
|
||
).in_('agent_id', suna_agent_ids).execute()
|
||
total_suna_versions = suna_versions_result.count or 0
|
||
else:
|
||
total_suna_versions = 0
|
||
|
||
# Find broken agents
|
||
broken_agents = await self._find_broken_agents()
|
||
broken_count = len(broken_agents)
|
||
|
||
print_info(f"Total personal accounts: {total_accounts}")
|
||
print_info(f"Suna agents created: {total_agents}")
|
||
print_info(f"Agent versions created: {total_suna_versions}")
|
||
|
||
if broken_count > 0:
|
||
print_warning(f"Broken agents (no version): {broken_count}")
|
||
print_info("💡 Run: python suna_manager.py cleanup")
|
||
else:
|
||
print_success("All agents have proper versions!")
|
||
|
||
remaining = total_accounts - (total_agents - broken_count)
|
||
if remaining > 0:
|
||
print_info(f"Users needing Suna: {remaining}")
|
||
print_info("💡 Run: python suna_manager.py install")
|
||
else:
|
||
print_success("All users have Suna agents!")
|
||
|
||
except Exception as e:
|
||
print_error(f"Status check failed: {e}")
|
||
logger.error(f"Status error: {e}")
|
||
|
||
async def install_command(self, batch_size: int = 100):
|
||
global shutdown_requested
|
||
|
||
print(f"🚀 Installing Suna for users who don't have it (batch size: {batch_size})")
|
||
print_info(f"Concurrent processing will dramatically improve performance for large user bases")
|
||
print_info("💡 Safe to interrupt: completed users won't be re-processed on restart")
|
||
|
||
start_time = time.time()
|
||
|
||
try:
|
||
all_accounts = await self.sync_service.repository.get_all_personal_accounts()
|
||
existing_agents = await self.sync_service.repository.find_all_suna_agents()
|
||
existing_account_ids = {agent.account_id for agent in existing_agents}
|
||
missing_accounts = [acc for acc in all_accounts if acc not in existing_account_ids]
|
||
|
||
if not missing_accounts:
|
||
print_success("All users already have Suna agents!")
|
||
return
|
||
|
||
total_needed = len(missing_accounts)
|
||
print_info(f"Found {total_needed} users needing Suna agents")
|
||
|
||
except Exception as e:
|
||
print_error(f"Failed to get user counts: {e}")
|
||
return
|
||
|
||
processed = 0
|
||
try:
|
||
result = await self._install_with_progress(batch_size, total_needed)
|
||
except KeyboardInterrupt:
|
||
print_warning("Installation interrupted by user")
|
||
return
|
||
except Exception as e:
|
||
print_error(f"Installation failed: {e}")
|
||
return
|
||
|
||
end_time = time.time()
|
||
duration = end_time - start_time
|
||
|
||
if result.success:
|
||
print_success(f"Successfully installed Suna for {result.synced_count} users")
|
||
if 'total_batches' in result.details[0]:
|
||
batches = result.details[0]['total_batches']
|
||
print_info(f"Processed in {batches} concurrent batches")
|
||
|
||
if result.synced_count > 0:
|
||
avg_time = duration / result.synced_count
|
||
print_info(f"Performance: {duration:.1f}s total, {avg_time:.2f}s per user")
|
||
|
||
estimated_sequential = result.synced_count * 0.5
|
||
time_saved = estimated_sequential - duration
|
||
if time_saved > 60:
|
||
print_info(f"⚡ Concurrent processing saved ~{time_saved/60:.1f} minutes vs sequential")
|
||
else:
|
||
print_error("Installation completed with errors!")
|
||
|
||
if result.failed_count > 0:
|
||
print_warning(f"Failed to install for {result.failed_count} users")
|
||
if result.failed_count <= 5:
|
||
for error in result.errors:
|
||
print(f" 💥 {error}")
|
||
print_info("💡 Re-run the same command to retry failed installations")
|
||
|
||
async def _install_with_progress(self, batch_size: int, total_needed: int):
|
||
global shutdown_requested
|
||
try:
|
||
current_config = self.sync_service.config_manager.get_current_config()
|
||
all_accounts = await self.sync_service.repository.get_all_personal_accounts()
|
||
existing_agents = await self.sync_service.repository.find_all_suna_agents()
|
||
existing_account_ids = {agent.account_id for agent in existing_agents}
|
||
|
||
missing_accounts = [acc for acc in all_accounts if acc not in existing_account_ids]
|
||
|
||
if not missing_accounts:
|
||
from agent.suna.sync_service import SyncResult
|
||
return SyncResult(
|
||
success=True,
|
||
details=[{"message": "All users already have Suna agents"}]
|
||
)
|
||
|
||
logger.info(f"📦 Installing Suna for {len(missing_accounts)} users in batches of {batch_size}")
|
||
|
||
total_success = 0
|
||
total_failed = 0
|
||
all_errors = []
|
||
|
||
for i in range(0, len(missing_accounts), batch_size):
|
||
if shutdown_requested:
|
||
print_warning("🛑 Graceful shutdown requested - stopping after current batch")
|
||
break
|
||
|
||
batch = missing_accounts[i:i + batch_size]
|
||
batch_num = (i // batch_size) + 1
|
||
total_batches = (len(missing_accounts) + batch_size - 1) // batch_size
|
||
|
||
print_info(f"🔄 Processing batch {batch_num}/{total_batches} ({len(batch)} users)")
|
||
|
||
try:
|
||
success_count, failed_count, errors = await self.sync_service._process_batch(batch)
|
||
|
||
total_success += success_count
|
||
total_failed += failed_count
|
||
all_errors.extend(errors)
|
||
|
||
progress_pct = ((total_success + total_failed) / len(missing_accounts)) * 100
|
||
print_info(f"✅ Batch {batch_num}/{total_batches} completed: {success_count} success, {failed_count} failed ({progress_pct:.1f}% total progress)")
|
||
|
||
except Exception as e:
|
||
batch_error = f"Batch {batch_num} failed: {str(e)}"
|
||
logger.error(batch_error)
|
||
all_errors.append(batch_error)
|
||
total_failed += len(batch)
|
||
|
||
if i + batch_size < len(missing_accounts) and not shutdown_requested:
|
||
await asyncio.sleep(0.1)
|
||
|
||
final_message = f"Installed for {total_success} users, {total_failed} failed"
|
||
if shutdown_requested:
|
||
final_message += " (interrupted - safe to resume)"
|
||
|
||
logger.info(f"🎉 Installation completed: {final_message}")
|
||
|
||
from agent.suna.sync_service import SyncResult
|
||
return SyncResult(
|
||
success=total_failed == 0 and not shutdown_requested,
|
||
synced_count=total_success,
|
||
failed_count=total_failed,
|
||
errors=all_errors,
|
||
details=[{
|
||
"message": final_message,
|
||
"batch_size": batch_size,
|
||
"total_batches": (len(missing_accounts) + batch_size - 1) // batch_size,
|
||
"interrupted": shutdown_requested
|
||
}]
|
||
)
|
||
|
||
except Exception as e:
|
||
error_msg = f"Installation operation failed: {str(e)}"
|
||
logger.error(error_msg)
|
||
from agent.suna.sync_service import SyncResult
|
||
return SyncResult(success=False, errors=[error_msg])
|
||
|
||
async def repair_command(self):
|
||
"""Repair orphaned Suna agents by creating missing versions and fixing broken pointers"""
|
||
print("🛠️ Repairing orphaned Suna agents and fixing broken version pointers")
|
||
try:
|
||
from datetime import datetime, timezone
|
||
from agent.suna.config import SunaConfig
|
||
from agent.versioning.version_service import get_version_service
|
||
|
||
repo = self.sync_service.repository
|
||
config_manager = self.sync_service.config_manager
|
||
|
||
current_config = config_manager.get_current_config()
|
||
version_tag = current_config.version_tag
|
||
|
||
# Unified config in the structure expected by repository repair helpers
|
||
unified_config = {
|
||
"system_prompt": SunaConfig.get_system_prompt(),
|
||
"model": SunaConfig.DEFAULT_MODEL,
|
||
"tools": {
|
||
"agentpress": SunaConfig.DEFAULT_TOOLS
|
||
}
|
||
}
|
||
|
||
# Build config_data metadata used by repository repair
|
||
config_data = {
|
||
"metadata": {
|
||
"is_suna_default": True,
|
||
"centrally_managed": True,
|
||
"config_version": version_tag,
|
||
"last_central_update": datetime.now(timezone.utc).isoformat()
|
||
}
|
||
}
|
||
|
||
# Step 1: Repair agents with no versions at all
|
||
orphaned = await repo.find_orphaned_suna_agents()
|
||
if not orphaned:
|
||
print_success("No orphaned Suna agents found (all have version records)")
|
||
else:
|
||
print_warning(f"Found {len(orphaned)} orphaned agents without versions")
|
||
repaired = 0
|
||
failed = 0
|
||
for agent in orphaned:
|
||
try:
|
||
await repo.create_version_record_for_existing_agent(
|
||
agent_id=agent.agent_id,
|
||
account_id=agent.account_id,
|
||
config_data=config_data,
|
||
unified_config=unified_config,
|
||
version_tag=version_tag
|
||
)
|
||
repaired += 1
|
||
except Exception as e:
|
||
failed += 1
|
||
logger.error(f"Failed to repair orphaned agent {agent.agent_id}: {e}")
|
||
print_success(f"Created missing versions for {repaired} agents")
|
||
if failed:
|
||
print_warning(f"Failed to repair {failed} agents")
|
||
|
||
# Step 2: Fix agents whose current_version_id is missing/invalid but versions exist
|
||
broken = await self._find_broken_agents()
|
||
if not broken:
|
||
print_success("No agents with broken version pointers found")
|
||
else:
|
||
# Filter to those that actually have versions now
|
||
client = await repo.db.client
|
||
fixed = 0
|
||
skipped = 0
|
||
failed = 0
|
||
version_service = await get_version_service()
|
||
|
||
for agent in broken:
|
||
try:
|
||
versions_result = await client.table('agent_versions').select('version_id, is_active, created_at').eq('agent_id', agent['agent_id']).order('created_at', desc=True).execute()
|
||
versions = versions_result.data or []
|
||
if not versions:
|
||
skipped += 1
|
||
continue
|
||
|
||
# Prefer active version, fallback to most recent
|
||
active = next((v for v in versions if v.get('is_active')), None)
|
||
target_id = active['version_id'] if active else versions[0]['version_id']
|
||
|
||
updated = await repo.update_agent_version_pointer(agent['agent_id'], target_id)
|
||
if updated:
|
||
fixed += 1
|
||
else:
|
||
failed += 1
|
||
except Exception as e:
|
||
failed += 1
|
||
logger.error(f"Failed to fix pointer for agent {agent['agent_id']}: {e}")
|
||
|
||
print_success(f"Fixed version pointers for {fixed} agents")
|
||
if skipped:
|
||
print_info(f"Skipped {skipped} agents that still have no versions (already handled above)")
|
||
if failed:
|
||
print_warning(f"Failed to fix {failed} agents")
|
||
|
||
print_success("Repair completed")
|
||
except Exception as e:
|
||
print_error(f"Repair failed: {e}")
|
||
logger.error(f"Repair error: {e}")
|
||
|
||
|
||
async def main():
|
||
signal.signal(signal.SIGINT, signal_handler)
|
||
signal.signal(signal.SIGTERM, signal_handler)
|
||
|
||
parser = argparse.ArgumentParser(
|
||
description="🌞 Suna Agent Manager - Concurrent Installation",
|
||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
epilog=__doc__
|
||
)
|
||
|
||
subparsers = parser.add_subparsers(dest='command', help='Available commands')
|
||
|
||
# Install command
|
||
install_parser = subparsers.add_parser('install', help='📦 Install Suna for users who don\'t have it')
|
||
install_parser.add_argument(
|
||
'--batch-size',
|
||
type=int,
|
||
default=100,
|
||
help='Number of users to process concurrently in each batch (default: 100)'
|
||
)
|
||
|
||
# Cleanup command
|
||
subparsers.add_parser('cleanup', help='🧹 Clean up broken agents (agents without versions)')
|
||
|
||
# Status command
|
||
subparsers.add_parser('status', help='📊 Show installation status and statistics')
|
||
|
||
# Repair command
|
||
subparsers.add_parser('repair', help='🛠️ Repair orphaned Suna agents and fix broken version pointers')
|
||
|
||
args = parser.parse_args()
|
||
|
||
if not args.command:
|
||
parser.print_help()
|
||
return
|
||
|
||
cli = SunaManagerCLI()
|
||
|
||
try:
|
||
if args.command == 'install':
|
||
await cli.install_command(batch_size=args.batch_size)
|
||
elif args.command == 'cleanup':
|
||
await cli.cleanup_command()
|
||
elif args.command == 'status':
|
||
await cli.status_command()
|
||
elif args.command == 'repair':
|
||
await cli.repair_command()
|
||
else:
|
||
parser.print_help()
|
||
|
||
except KeyboardInterrupt:
|
||
print_warning("Operation cancelled by user")
|
||
print_info("💡 Safe to re-run - completed users won't be re-processed")
|
||
except Exception as e:
|
||
print_error(f"Unexpected error: {str(e)}")
|
||
logger.error(f"CLI error: {str(e)}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main()) |