#!/usr/bin/env node import * as fs from 'node:fs'; import * as path from 'node:path'; import { fileURLToPath } from 'node:url'; import { eq, sql } from 'drizzle-orm'; import { getTableName } from 'drizzle-orm'; import { db } from '../src/connection'; import * as schema from '../src/schema'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Get all table objects from schema function getAllTables(): Record { const tables: Record = {}; for (const [key, value] of Object.entries(schema)) { if (value && typeof value === 'object' && Symbol.for('drizzle:IsDrizzleTable') in value) { try { const _tableName = getTableName(value); tables[key] = value; } catch { // Not a table, skip } } } return tables; } async function loadJsonFile(filePath: string): Promise { if (!fs.existsSync(filePath)) { return null; } const content = fs.readFileSync(filePath, 'utf-8'); return JSON.parse(content); } async function upsertData(tx: any, tableName: string, table: any, data: any[]) { if (!data || data.length === 0) return; try { // Fix YAML content in datasets table by converting literal \n to actual newlines if (tableName === 'datasets') { data = data.map(record => ({ ...record, ymlFile: record.ymlFile ? record.ymlFile.replace(/\\n/g, '\n') : record.ymlFile })); } // For tables that should always use ON CONFLICT DO NOTHING instead of updating const doNothingTables = [ 'assetSearch', 'textSearch', 'permissionGroupsToUsers', 'teamsToUsers', 'datasetsToPermissionGroups', 'datasetsToDatasetGroups', 'collectionsToAssets', 'messagesToFiles', 'messagesToSlackMessages', 'metricFilesToDashboardFiles', 'metricFilesToDatasets', 'metricFilesToReportFiles', 'usersToOrganizations', 'permissionGroupsToIdentities', ]; // Batch upsert for large datasets const batchSize = 100; let upserted = 0; for (let i = 0; i < data.length; i += batchSize) { const batch = data.slice(i, i + batchSize); // Use batch insert with ON CONFLICT for better performance if (doNothingTables.includes(tableName)) { // For junction/relation tables, just skip duplicates await tx.insert(table).values(batch).onConflictDoNothing(); upserted += batch.length; } else { // For regular tables with id field, use onConflictDoUpdate for (const record of batch) { const { id, createdAt, ...updateFields } = record; // Most tables have an 'id' column as primary key if (id && table.id) { await tx.insert(table).values(record).onConflictDoUpdate({ target: table.id, set: updateFields, }); } else { // For tables without id column, use onConflictDoNothing await tx.insert(table).values(record).onConflictDoNothing(); } upserted++; } } } console.log(`Upserted ${upserted} records into ${tableName}`); } catch (error) { console.error(`Error upserting data into ${tableName}:`, error); throw error; } } // Standard dev password for all auth users (encrypted version of "password123") const DEV_PASSWORD_ENCRYPTED = '$2a$06$BKy/23Yp58fItuTD0aKWluB2ayXyww8AeXNQ0KHgh9TeRxJ/tbmaC'; // Function to generate auth user from public user data function generateAuthUser(publicUser: any) { return { instanceId: '00000000-0000-0000-0000-000000000000', id: publicUser.id, aud: 'authenticated', role: 'authenticated', email: publicUser.email, encryptedPassword: DEV_PASSWORD_ENCRYPTED, emailConfirmedAt: '2025-03-04 18:42:05.801697+00', invitedAt: null, confirmationToken: '', confirmationSentAt: null, recoveryToken: '', recoverySentAt: null, emailChangeTokenNew: '', emailChange: '', emailChangeSentAt: null, lastSignInAt: null, rawAppMetaData: '{"provider": "email", "providers": ["email"]}', rawUserMetaData: '{}', isSuperAdmin: null, createdAt: '2025-03-04 18:42:05.801697+00', updatedAt: '2025-03-04 18:42:05.801697+00', phone: null, phoneConfirmedAt: null, phoneChange: '', phoneChangeToken: '', phoneChangeSentAt: null, confirmedAt: 'DEFAULT', emailChangeTokenCurrent: '', emailChangeConfirmStatus: '0', bannedUntil: null, reauthenticationToken: '', reauthenticationSentAt: null, isSsoUser: false, deletedAt: null, isAnonymous: false, }; } // Function to generate auth identity from public user data function generateAuthIdentity(publicUser: any) { return { providerId: publicUser.id, userId: publicUser.id, identityData: JSON.stringify({ sub: publicUser.id }), provider: 'email', lastSignInAt: '2025-03-04 18:42:05.801697+00', createdAt: '2025-03-04 18:42:05.801697+00', updatedAt: '2025-03-04 18:42:05.801697+00', email: 'DEFAULT', id: publicUser.id, }; } // Hardcoded vault secret for data sources const HARDCODED_VAULT_SECRET = { type: 'postgres', host: 'aws-0-us-east-1.pooler.supabase.com', port: 5432, username: 'postgres.fjbidcbjvmpesoonimhl', password: 'S8Jrts05EqxsfA3q', database: 'postgres', schema: 'sem', jump_host: null, ssh_username: null, ssh_private_key: null, }; async function seed() { console.log('Starting dynamic database seed from files...\n'); const dataDir = path.join(__dirname, '../seed-data'); if (!fs.existsSync(dataDir)) { console.error(`Seed data directory not found: ${dataDir}`); console.log('Please run extract-all-data-dynamic.ts first to generate seed data files'); process.exit(1); } try { // Load metadata to get table order and dependencies const metadataPath = path.join(dataDir, '_metadata.json'); const metadata = await loadJsonFile(metadataPath); if (!metadata || !metadata.tableOrder) { console.error( 'Metadata file not found or invalid. Please run extract-all-data-dynamic.ts first' ); process.exit(1); } const tableOrder = metadata.tableOrder; const tables = getAllTables(); console.log(`Found ${Object.keys(tables).length} tables in schema`); console.log(`Loading data for ${tableOrder.length} tables from metadata\n`); // Load all data files const seedData: Record = {}; // Load auth data from files (but we'll use hardcoded instead) const _authUsers = await loadJsonFile(path.join(dataDir, 'auth.users.json')); const _authIdentities = await loadJsonFile(path.join(dataDir, 'auth.identities.json')); const _vaultSecrets = await loadJsonFile(path.join(dataDir, 'vault.secrets.json')); // Load public schema data for (const tableName of tableOrder) { const data = await loadJsonFile(path.join(dataDir, `${tableName}.json`)); if (data) { seedData[tableName] = data; } } // Start transaction await db.transaction(async (tx) => { console.log('=== Upserting seed data (no deletion needed) ===\n'); // Generate auth users for all public users in the seed data const publicUsers = seedData.users || []; const authUsers = publicUsers.map(generateAuthUser); const authIdentities = publicUsers.map(generateAuthIdentity); // Upsert auth.users if (authUsers.length > 0) { console.log(`Upserting ${authUsers.length} auth users from public users...`); for (const user of authUsers) { // Build the SQL dynamically with proper parameter handling await tx.execute(sql` INSERT INTO auth.users ( instance_id, id, aud, role, email, encrypted_password, email_confirmed_at, invited_at, confirmation_token, confirmation_sent_at, recovery_token, recovery_sent_at, email_change_token_new, email_change, email_change_sent_at, last_sign_in_at, raw_app_meta_data, raw_user_meta_data, is_super_admin, created_at, updated_at, phone, phone_confirmed_at, phone_change, phone_change_token, phone_change_sent_at, confirmed_at, email_change_token_current, email_change_confirm_status, banned_until, reauthentication_token, reauthentication_sent_at, is_sso_user, deleted_at, is_anonymous ) VALUES ( ${user.instanceId}, ${user.id}, ${user.aud}, ${user.role}, ${user.email}, ${user.encryptedPassword}, ${user.emailConfirmedAt}, ${user.invitedAt}, ${user.confirmationToken}, ${user.confirmationSentAt}, ${user.recoveryToken}, ${user.recoverySentAt}, ${user.emailChangeTokenNew}, ${user.emailChange}, ${user.emailChangeSentAt}, ${user.lastSignInAt}, ${user.rawAppMetaData}::jsonb, ${user.rawUserMetaData}::jsonb, ${user.isSuperAdmin}, ${user.createdAt}, ${user.updatedAt}, ${user.phone}, ${user.phoneConfirmedAt}, ${user.phoneChange}, ${user.phoneChangeToken}, ${user.phoneChangeSentAt}, ${user.confirmedAt === 'DEFAULT' ? sql`DEFAULT` : user.confirmedAt}, ${user.emailChangeTokenCurrent}, ${user.emailChangeConfirmStatus}, ${user.bannedUntil}, ${user.reauthenticationToken}, ${user.reauthenticationSentAt}, ${user.isSsoUser}, ${user.deletedAt}, ${user.isAnonymous} ) ON CONFLICT (id) DO UPDATE SET email = EXCLUDED.email, encrypted_password = EXCLUDED.encrypted_password, updated_at = EXCLUDED.updated_at `); } } // Upsert auth.identities if (authIdentities.length > 0) { console.log(`Upserting ${authIdentities.length} auth identities from public users...`); for (const identity of authIdentities) { await tx.execute(sql` INSERT INTO auth.identities ( provider_id, user_id, identity_data, provider, last_sign_in_at, created_at, updated_at, email, id ) VALUES ( ${identity.providerId}, ${identity.userId}, ${identity.identityData}::jsonb, ${identity.provider}, ${identity.lastSignInAt}, ${identity.createdAt}, ${identity.updatedAt}, ${identity.email === 'DEFAULT' ? sql`DEFAULT` : identity.email}, ${identity.id} ) ON CONFLICT (id) DO UPDATE SET identity_data = EXCLUDED.identity_data, last_sign_in_at = EXCLUDED.last_sign_in_at, updated_at = EXCLUDED.updated_at `); } } // Upsert public schema data in dependency order for (const tableName of tableOrder) { const table = tables[tableName]; const data = seedData[tableName]; if (table && data) { await upsertData(tx, tableName, table, data); } } // After all data is inserted, create vault secrets for data sources const dataSources = seedData.dataSources || []; if (dataSources.length > 0) { console.log('\n=== Creating vault secrets for data sources ===\n'); for (const dataSource of dataSources) { try { console.log(`Creating new vault secret for data source: ${dataSource.name}`); // Always try to delete the existing secret first (if it exists) try { await tx.execute(sql` DELETE FROM vault.secrets WHERE name = ${dataSource.id} `); console.log(`Deleted existing vault secret for ${dataSource.name}`); } catch (_deleteError) { // It's fine if the delete fails, it might not exist } // Create a new vault secret const result = await tx.execute(sql` SELECT vault.create_secret( ${JSON.stringify(HARDCODED_VAULT_SECRET)}, ${dataSource.id} ) as secret_id `); const secretId = result?.rows?.[0]?.secret_id; if (secretId) { // Update the data source with the new secret ID console.log( `Updating data source ${dataSource.name} with new secret ID: ${secretId}` ); await tx .update(tables.dataSources) .set({ secretId: secretId }) .where(eq(tables.dataSources.id, dataSource.id)); console.log(`Successfully linked vault secret to data source ${dataSource.name}`); } } catch (vaultError: any) { console.warn( `Could not create vault secret for data source ${dataSource.name}:`, vaultError.message ); } } } console.log('\n=== Seed completed successfully! ==='); // Show summary console.log('\nSummary:'); const publicUserCount = (seedData.users || []).length; const dataSourceCount = (seedData.dataSources || []).length; const totalRecords = Object.values(seedData).reduce((sum, data) => sum + data.length, 0) + publicUserCount * 2; // x2 for auth users + identities console.log(`Total records upserted: ${totalRecords}`); console.log(`Tables seeded: ${Object.keys(seedData).length + 2}`); // +2 for auth tables console.log(`Auth users upserted: ${publicUserCount} (from public users)`); console.log(`Auth identities upserted: ${publicUserCount}`); console.log(`Vault secrets created/updated for: ${dataSourceCount} data sources`); console.log(`Dev password: password123 (for all users)`); }); } catch (error) { console.error('Error seeding database:', error); process.exit(1); } finally { process.exit(0); } } // Parse command line arguments const args = process.argv.slice(2); const isDryRun = args.includes('--dry-run'); const verbose = args.includes('--verbose'); if (isDryRun) { console.log('DRY RUN MODE - No changes will be made to the database\n'); const dataDir = path.join(__dirname, '../seed-data'); if (!fs.existsSync(dataDir)) { console.error(`Seed data directory not found: ${dataDir}`); process.exit(1); } const metadata = JSON.parse(fs.readFileSync(path.join(dataDir, '_metadata.json'), 'utf-8')); console.log('Seed operation plan:\n'); console.log('Tables will be seeded in this order:'); if (metadata?.tables) { metadata.tables.forEach((table: any, index: number) => { const deps = table.dependencies?.length > 0 ? ` (depends on: ${table.dependencies.join(', ')})` : ''; console.log(`${index + 1}. ${table.name}: ${table.recordCount} records${deps}`); }); console.log(`\nTotal tables: ${metadata.tables.length}`); console.log(`Extraction date: ${metadata.extractedAt}`); } if (verbose && metadata.dependencies) { console.log('\nDependency graph:'); Object.entries(metadata.dependencies).forEach(([table, deps]) => { if ((deps as string[]).length > 0) { console.log(` ${table} → ${(deps as string[]).join(', ')}`); } }); } process.exit(0); } // Set default DATABASE_URL if not provided if (!process.env.DATABASE_URL) { const defaultUrl = 'postgresql://postgres:postgres@localhost:54322/postgres'; console.log(`DATABASE_URL not set - using default: ${defaultUrl}`); process.env.DATABASE_URL = defaultUrl; } // Run the seed seed();