mirror of https://github.com/buster-so/buster.git
425 lines
15 KiB
JavaScript
425 lines
15 KiB
JavaScript
#!/usr/bin/env node
|
|
import * as fs from 'node:fs';
|
|
import * as path from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { eq, sql } from 'drizzle-orm';
|
|
import { getTableName } from 'drizzle-orm';
|
|
import { db } from '../src/connection';
|
|
import * as schema from '../src/schema';
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
|
|
// Get all table objects from schema
|
|
function getAllTables(): Record<string, any> {
|
|
const tables: Record<string, any> = {};
|
|
|
|
for (const [key, value] of Object.entries(schema)) {
|
|
if (value && typeof value === 'object' && Symbol.for('drizzle:IsDrizzleTable') in value) {
|
|
try {
|
|
const _tableName = getTableName(value);
|
|
tables[key] = value;
|
|
} catch {
|
|
// Not a table, skip
|
|
}
|
|
}
|
|
}
|
|
|
|
return tables;
|
|
}
|
|
|
|
async function loadJsonFile(filePath: string): Promise<any> {
|
|
if (!fs.existsSync(filePath)) {
|
|
return null;
|
|
}
|
|
const content = fs.readFileSync(filePath, 'utf-8');
|
|
return JSON.parse(content);
|
|
}
|
|
|
|
async function upsertData(tx: any, tableName: string, table: any, data: any[]) {
|
|
if (!data || data.length === 0) return;
|
|
|
|
try {
|
|
// Fix YAML content in datasets table by converting literal \n to actual newlines
|
|
if (tableName === 'datasets') {
|
|
data = data.map(record => ({
|
|
...record,
|
|
ymlFile: record.ymlFile ? record.ymlFile.replace(/\\n/g, '\n') : record.ymlFile
|
|
}));
|
|
}
|
|
// For tables that should always use ON CONFLICT DO NOTHING instead of updating
|
|
const doNothingTables = [
|
|
'assetSearch',
|
|
'textSearch',
|
|
'permissionGroupsToUsers',
|
|
'teamsToUsers',
|
|
'datasetsToPermissionGroups',
|
|
'datasetsToDatasetGroups',
|
|
'collectionsToAssets',
|
|
'messagesToFiles',
|
|
'messagesToSlackMessages',
|
|
'metricFilesToDashboardFiles',
|
|
'metricFilesToDatasets',
|
|
'metricFilesToReportFiles',
|
|
'usersToOrganizations',
|
|
'permissionGroupsToIdentities',
|
|
];
|
|
|
|
// Batch upsert for large datasets
|
|
const batchSize = 100;
|
|
let upserted = 0;
|
|
|
|
for (let i = 0; i < data.length; i += batchSize) {
|
|
const batch = data.slice(i, i + batchSize);
|
|
|
|
// Use batch insert with ON CONFLICT for better performance
|
|
if (doNothingTables.includes(tableName)) {
|
|
// For junction/relation tables, just skip duplicates
|
|
await tx.insert(table).values(batch).onConflictDoNothing();
|
|
upserted += batch.length;
|
|
} else {
|
|
// For regular tables with id field, use onConflictDoUpdate
|
|
for (const record of batch) {
|
|
const { id, createdAt, ...updateFields } = record;
|
|
|
|
// Most tables have an 'id' column as primary key
|
|
if (id && table.id) {
|
|
await tx.insert(table).values(record).onConflictDoUpdate({
|
|
target: table.id,
|
|
set: updateFields,
|
|
});
|
|
} else {
|
|
// For tables without id column, use onConflictDoNothing
|
|
await tx.insert(table).values(record).onConflictDoNothing();
|
|
}
|
|
upserted++;
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`Upserted ${upserted} records into ${tableName}`);
|
|
} catch (error) {
|
|
console.error(`Error upserting data into ${tableName}:`, error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// Standard dev password for all auth users (encrypted version of "password123")
|
|
const DEV_PASSWORD_ENCRYPTED = '$2a$06$BKy/23Yp58fItuTD0aKWluB2ayXyww8AeXNQ0KHgh9TeRxJ/tbmaC';
|
|
|
|
// Function to generate auth user from public user data
|
|
function generateAuthUser(publicUser: any) {
|
|
return {
|
|
instanceId: '00000000-0000-0000-0000-000000000000',
|
|
id: publicUser.id,
|
|
aud: 'authenticated',
|
|
role: 'authenticated',
|
|
email: publicUser.email,
|
|
encryptedPassword: DEV_PASSWORD_ENCRYPTED,
|
|
emailConfirmedAt: '2025-03-04 18:42:05.801697+00',
|
|
invitedAt: null,
|
|
confirmationToken: '',
|
|
confirmationSentAt: null,
|
|
recoveryToken: '',
|
|
recoverySentAt: null,
|
|
emailChangeTokenNew: '',
|
|
emailChange: '',
|
|
emailChangeSentAt: null,
|
|
lastSignInAt: null,
|
|
rawAppMetaData: '{"provider": "email", "providers": ["email"]}',
|
|
rawUserMetaData: '{}',
|
|
isSuperAdmin: null,
|
|
createdAt: '2025-03-04 18:42:05.801697+00',
|
|
updatedAt: '2025-03-04 18:42:05.801697+00',
|
|
phone: null,
|
|
phoneConfirmedAt: null,
|
|
phoneChange: '',
|
|
phoneChangeToken: '',
|
|
phoneChangeSentAt: null,
|
|
confirmedAt: 'DEFAULT',
|
|
emailChangeTokenCurrent: '',
|
|
emailChangeConfirmStatus: '0',
|
|
bannedUntil: null,
|
|
reauthenticationToken: '',
|
|
reauthenticationSentAt: null,
|
|
isSsoUser: false,
|
|
deletedAt: null,
|
|
isAnonymous: false,
|
|
};
|
|
}
|
|
|
|
// Function to generate auth identity from public user data
|
|
function generateAuthIdentity(publicUser: any) {
|
|
return {
|
|
providerId: publicUser.id,
|
|
userId: publicUser.id,
|
|
identityData: JSON.stringify({ sub: publicUser.id }),
|
|
provider: 'email',
|
|
lastSignInAt: '2025-03-04 18:42:05.801697+00',
|
|
createdAt: '2025-03-04 18:42:05.801697+00',
|
|
updatedAt: '2025-03-04 18:42:05.801697+00',
|
|
email: 'DEFAULT',
|
|
id: publicUser.id,
|
|
};
|
|
}
|
|
|
|
// Hardcoded vault secret for data sources
|
|
const HARDCODED_VAULT_SECRET = {
|
|
type: 'postgres',
|
|
host: 'aws-0-us-east-1.pooler.supabase.com',
|
|
port: 5432,
|
|
username: 'postgres.fjbidcbjvmpesoonimhl',
|
|
password: 'S8Jrts05EqxsfA3q',
|
|
database: 'postgres',
|
|
schema: 'sem',
|
|
jump_host: null,
|
|
ssh_username: null,
|
|
ssh_private_key: null,
|
|
};
|
|
|
|
async function seed() {
|
|
console.log('Starting dynamic database seed from files...\n');
|
|
|
|
const dataDir = path.join(__dirname, '../seed-data');
|
|
|
|
if (!fs.existsSync(dataDir)) {
|
|
console.error(`Seed data directory not found: ${dataDir}`);
|
|
console.log('Please run extract-all-data-dynamic.ts first to generate seed data files');
|
|
process.exit(1);
|
|
}
|
|
|
|
try {
|
|
// Load metadata to get table order and dependencies
|
|
const metadataPath = path.join(dataDir, '_metadata.json');
|
|
const metadata = await loadJsonFile(metadataPath);
|
|
|
|
if (!metadata || !metadata.tableOrder) {
|
|
console.error(
|
|
'Metadata file not found or invalid. Please run extract-all-data-dynamic.ts first'
|
|
);
|
|
process.exit(1);
|
|
}
|
|
|
|
const tableOrder = metadata.tableOrder;
|
|
const tables = getAllTables();
|
|
|
|
console.log(`Found ${Object.keys(tables).length} tables in schema`);
|
|
console.log(`Loading data for ${tableOrder.length} tables from metadata\n`);
|
|
|
|
// Load all data files
|
|
const seedData: Record<string, any[]> = {};
|
|
|
|
// Load auth data from files (but we'll use hardcoded instead)
|
|
const _authUsers = await loadJsonFile(path.join(dataDir, 'auth.users.json'));
|
|
const _authIdentities = await loadJsonFile(path.join(dataDir, 'auth.identities.json'));
|
|
const _vaultSecrets = await loadJsonFile(path.join(dataDir, 'vault.secrets.json'));
|
|
|
|
// Load public schema data
|
|
for (const tableName of tableOrder) {
|
|
const data = await loadJsonFile(path.join(dataDir, `${tableName}.json`));
|
|
if (data) {
|
|
seedData[tableName] = data;
|
|
}
|
|
}
|
|
|
|
// Start transaction
|
|
await db.transaction(async (tx) => {
|
|
console.log('=== Upserting seed data (no deletion needed) ===\n');
|
|
|
|
// Generate auth users for all public users in the seed data
|
|
const publicUsers = seedData.users || [];
|
|
const authUsers = publicUsers.map(generateAuthUser);
|
|
const authIdentities = publicUsers.map(generateAuthIdentity);
|
|
|
|
// Upsert auth.users
|
|
if (authUsers.length > 0) {
|
|
console.log(`Upserting ${authUsers.length} auth users from public users...`);
|
|
for (const user of authUsers) {
|
|
// Build the SQL dynamically with proper parameter handling
|
|
await tx.execute(sql`
|
|
INSERT INTO auth.users (
|
|
instance_id, id, aud, role, email, encrypted_password,
|
|
email_confirmed_at, invited_at, confirmation_token, confirmation_sent_at,
|
|
recovery_token, recovery_sent_at, email_change_token_new, email_change,
|
|
email_change_sent_at, last_sign_in_at, raw_app_meta_data, raw_user_meta_data,
|
|
is_super_admin, created_at, updated_at, phone, phone_confirmed_at,
|
|
phone_change, phone_change_token, phone_change_sent_at, confirmed_at,
|
|
email_change_token_current, email_change_confirm_status, banned_until,
|
|
reauthentication_token, reauthentication_sent_at, is_sso_user, deleted_at, is_anonymous
|
|
) VALUES (
|
|
${user.instanceId}, ${user.id}, ${user.aud}, ${user.role},
|
|
${user.email}, ${user.encryptedPassword}, ${user.emailConfirmedAt},
|
|
${user.invitedAt}, ${user.confirmationToken}, ${user.confirmationSentAt},
|
|
${user.recoveryToken}, ${user.recoverySentAt}, ${user.emailChangeTokenNew},
|
|
${user.emailChange}, ${user.emailChangeSentAt}, ${user.lastSignInAt},
|
|
${user.rawAppMetaData}::jsonb, ${user.rawUserMetaData}::jsonb, ${user.isSuperAdmin},
|
|
${user.createdAt}, ${user.updatedAt}, ${user.phone}, ${user.phoneConfirmedAt},
|
|
${user.phoneChange}, ${user.phoneChangeToken}, ${user.phoneChangeSentAt},
|
|
${user.confirmedAt === 'DEFAULT' ? sql`DEFAULT` : user.confirmedAt},
|
|
${user.emailChangeTokenCurrent}, ${user.emailChangeConfirmStatus}, ${user.bannedUntil},
|
|
${user.reauthenticationToken}, ${user.reauthenticationSentAt}, ${user.isSsoUser},
|
|
${user.deletedAt}, ${user.isAnonymous}
|
|
)
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
email = EXCLUDED.email,
|
|
encrypted_password = EXCLUDED.encrypted_password,
|
|
updated_at = EXCLUDED.updated_at
|
|
`);
|
|
}
|
|
}
|
|
|
|
// Upsert auth.identities
|
|
if (authIdentities.length > 0) {
|
|
console.log(`Upserting ${authIdentities.length} auth identities from public users...`);
|
|
for (const identity of authIdentities) {
|
|
await tx.execute(sql`
|
|
INSERT INTO auth.identities (
|
|
provider_id, user_id, identity_data, provider, last_sign_in_at,
|
|
created_at, updated_at, email, id
|
|
) VALUES (
|
|
${identity.providerId}, ${identity.userId}, ${identity.identityData}::jsonb,
|
|
${identity.provider}, ${identity.lastSignInAt}, ${identity.createdAt},
|
|
${identity.updatedAt},
|
|
${identity.email === 'DEFAULT' ? sql`DEFAULT` : identity.email},
|
|
${identity.id}
|
|
)
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
identity_data = EXCLUDED.identity_data,
|
|
last_sign_in_at = EXCLUDED.last_sign_in_at,
|
|
updated_at = EXCLUDED.updated_at
|
|
`);
|
|
}
|
|
}
|
|
|
|
// Upsert public schema data in dependency order
|
|
for (const tableName of tableOrder) {
|
|
const table = tables[tableName];
|
|
const data = seedData[tableName];
|
|
if (table && data) {
|
|
await upsertData(tx, tableName, table, data);
|
|
}
|
|
}
|
|
|
|
// After all data is inserted, create vault secrets for data sources
|
|
const dataSources = seedData.dataSources || [];
|
|
if (dataSources.length > 0) {
|
|
console.log('\n=== Creating vault secrets for data sources ===\n');
|
|
for (const dataSource of dataSources) {
|
|
try {
|
|
console.log(`Creating new vault secret for data source: ${dataSource.name}`);
|
|
|
|
// Always try to delete the existing secret first (if it exists)
|
|
try {
|
|
await tx.execute(sql`
|
|
DELETE FROM vault.secrets WHERE name = ${dataSource.id}
|
|
`);
|
|
console.log(`Deleted existing vault secret for ${dataSource.name}`);
|
|
} catch (_deleteError) {
|
|
// It's fine if the delete fails, it might not exist
|
|
}
|
|
|
|
// Create a new vault secret
|
|
const result = await tx.execute(sql`
|
|
SELECT vault.create_secret(
|
|
${JSON.stringify(HARDCODED_VAULT_SECRET)},
|
|
${dataSource.id}
|
|
) as secret_id
|
|
`);
|
|
|
|
const secretId = result?.rows?.[0]?.secret_id;
|
|
|
|
if (secretId) {
|
|
// Update the data source with the new secret ID
|
|
console.log(
|
|
`Updating data source ${dataSource.name} with new secret ID: ${secretId}`
|
|
);
|
|
await tx
|
|
.update(tables.dataSources)
|
|
.set({ secretId: secretId })
|
|
.where(eq(tables.dataSources.id, dataSource.id));
|
|
console.log(`Successfully linked vault secret to data source ${dataSource.name}`);
|
|
}
|
|
} catch (vaultError: any) {
|
|
console.warn(
|
|
`Could not create vault secret for data source ${dataSource.name}:`,
|
|
vaultError.message
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log('\n=== Seed completed successfully! ===');
|
|
|
|
// Show summary
|
|
console.log('\nSummary:');
|
|
const publicUserCount = (seedData.users || []).length;
|
|
const dataSourceCount = (seedData.dataSources || []).length;
|
|
const totalRecords =
|
|
Object.values(seedData).reduce((sum, data) => sum + data.length, 0) + publicUserCount * 2; // x2 for auth users + identities
|
|
console.log(`Total records upserted: ${totalRecords}`);
|
|
console.log(`Tables seeded: ${Object.keys(seedData).length + 2}`); // +2 for auth tables
|
|
console.log(`Auth users upserted: ${publicUserCount} (from public users)`);
|
|
console.log(`Auth identities upserted: ${publicUserCount}`);
|
|
console.log(`Vault secrets created/updated for: ${dataSourceCount} data sources`);
|
|
console.log(`Dev password: password123 (for all users)`);
|
|
});
|
|
} catch (error) {
|
|
console.error('Error seeding database:', error);
|
|
process.exit(1);
|
|
} finally {
|
|
process.exit(0);
|
|
}
|
|
}
|
|
|
|
// Parse command line arguments
|
|
const args = process.argv.slice(2);
|
|
const isDryRun = args.includes('--dry-run');
|
|
const verbose = args.includes('--verbose');
|
|
|
|
if (isDryRun) {
|
|
console.log('DRY RUN MODE - No changes will be made to the database\n');
|
|
|
|
const dataDir = path.join(__dirname, '../seed-data');
|
|
if (!fs.existsSync(dataDir)) {
|
|
console.error(`Seed data directory not found: ${dataDir}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
const metadata = JSON.parse(fs.readFileSync(path.join(dataDir, '_metadata.json'), 'utf-8'));
|
|
|
|
console.log('Seed operation plan:\n');
|
|
console.log('Tables will be seeded in this order:');
|
|
|
|
if (metadata?.tables) {
|
|
metadata.tables.forEach((table: any, index: number) => {
|
|
const deps =
|
|
table.dependencies?.length > 0 ? ` (depends on: ${table.dependencies.join(', ')})` : '';
|
|
console.log(`${index + 1}. ${table.name}: ${table.recordCount} records${deps}`);
|
|
});
|
|
|
|
console.log(`\nTotal tables: ${metadata.tables.length}`);
|
|
console.log(`Extraction date: ${metadata.extractedAt}`);
|
|
}
|
|
|
|
if (verbose && metadata.dependencies) {
|
|
console.log('\nDependency graph:');
|
|
Object.entries(metadata.dependencies).forEach(([table, deps]) => {
|
|
if ((deps as string[]).length > 0) {
|
|
console.log(` ${table} → ${(deps as string[]).join(', ')}`);
|
|
}
|
|
});
|
|
}
|
|
|
|
process.exit(0);
|
|
}
|
|
|
|
// Set default DATABASE_URL if not provided
|
|
if (!process.env.DATABASE_URL) {
|
|
const defaultUrl = 'postgresql://postgres:postgres@localhost:54322/postgres';
|
|
console.log(`DATABASE_URL not set - using default: ${defaultUrl}`);
|
|
process.env.DATABASE_URL = defaultUrl;
|
|
}
|
|
|
|
// Run the seed
|
|
seed();
|