Refactor DuckDB integration to lazy-load the module and update package dependencies. Adjust Docker workflow to skip optional dependencies during installation.

This commit is contained in:
dal 2025-09-11 09:40:43 -06:00
parent cd91cb4f08
commit 37efbaee23
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
6 changed files with 71 additions and 11 deletions

View File

@ -79,7 +79,7 @@ jobs:
cp -r packages /tmp/prod-deps/
cp apps/server/package.json /tmp/prod-deps/apps/server/
# Install production dependencies only
# Install production dependencies only, skip optional dependencies
cd /tmp/prod-deps
pnpm install --frozen-lockfile --prod --no-optional

View File

@ -42,17 +42,19 @@ const app = new Hono().get(
today.getFullYear() === updatedDate.getFullYear() &&
today.getMonth() === updatedDate.getMonth() &&
today.getDate() === updatedDate.getDate();
if (isToday) {
return c.json(currentSuggestedPrompts);
}
}
const timeoutMs = 10000; // 10 seconds timeout
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(() => {
reject(new Error('Request timeout after 10 seconds. Returning current suggested prompts.'));
reject(
new Error('Request timeout after 10 seconds. Returning current suggested prompts.')
);
}, timeoutMs);
});

View File

@ -33,8 +33,10 @@
"@buster/env-utils": "workspace:*",
"@buster/data-source": "workspace:*",
"@buster/database": "workspace:*",
"@duckdb/node-api": "1.3.2-alpha.26",
"@turbopuffer/turbopuffer": "^1.0.0",
"zod": "^3.22.4"
},
"optionalDependencies": {
"@duckdb/node-api": "1.3.2-alpha.26"
}
}

View File

@ -1,9 +1,9 @@
/**
* DuckDB-based deduplication for searchable values
* Uses functional composition and Zod validation
* DuckDB is lazy-loaded to avoid requiring it when not needed
*/
import { type DuckDBConnection, DuckDBInstance } from '@duckdb/node-api';
import { z } from 'zod';
import {
type DeduplicationResult,
@ -56,6 +56,45 @@ export const formatSqlInClause = (values: string[]): string => {
// DUCKDB CONNECTION MANAGEMENT
// ============================================================================
// Type definitions for lazy-loaded DuckDB module
// These match the actual DuckDB API but avoid direct import
interface DuckDBConnection {
run(sql: string): Promise<DuckDBResult>;
closeSync(): void;
}
interface DuckDBResult {
getRowObjectsJson(): Promise<unknown[]>;
}
interface DuckDBInstance {
connect(): Promise<DuckDBConnection>;
}
type DuckDBInstanceClass = {
create(dbPath: string, config?: Record<string, string>): Promise<DuckDBInstance>;
};
let DuckDBModule: typeof import('@duckdb/node-api') | null = null;
/**
* Lazy load DuckDB module only when needed
* Throws an error if DuckDB is not installed (optional dependency)
*/
async function loadDuckDB(): Promise<typeof import('@duckdb/node-api')> {
if (!DuckDBModule) {
try {
DuckDBModule = await import('@duckdb/node-api');
} catch (_error) {
throw new Error(
'DuckDB is required for deduplication functionality but is not installed. ' +
'Please install @duckdb/node-api to use deduplication features.'
);
}
}
return DuckDBModule;
}
export interface DuckDBContext {
conn: DuckDBConnection;
dbPath?: string; // Store path for cleanup only if using disk
@ -67,6 +106,11 @@ export interface DuckDBContext {
*/
export const createConnection = async (useDisk = true): Promise<DuckDBContext> => {
try {
// Lazy load DuckDB when first connection is created
const { DuckDBInstance: DuckDBInstanceClass } = (await loadDuckDB()) as {
DuckDBInstance: DuckDBInstanceClass;
};
// Use disk storage for large datasets to avoid memory issues
// The database file will be automatically cleaned up
const dbPath = useDisk ? `/tmp/duckdb-dedupe-${Date.now()}.db` : ':memory:';
@ -79,7 +123,7 @@ export const createConnection = async (useDisk = true): Promise<DuckDBContext> =
// Create instance and get connection
// Instance will be garbage collected after connection is created
const instance = await DuckDBInstance.create(dbPath, config);
const instance = await DuckDBInstanceClass.create(dbPath, config);
const conn = await instance.connect();
// Configure DuckDB for optimal performance with large datasets

View File

@ -1,8 +1,19 @@
/**
* Type-safe helper functions for DuckDB operations
* Note: DuckDB types are aliased since the module is lazy-loaded
*/
import type { DuckDBConnection } from '@duckdb/node-api';
// Type definitions for lazy-loaded DuckDB module
// These match the actual DuckDB API but avoid direct import
interface DuckDBConnection {
run(sql: string): Promise<DuckDBResult>;
closeSync(): void;
}
interface DuckDBResult {
getRowObjectsJson(): Promise<unknown[]>;
}
import type { DuckDBContext } from './deduplicate';
/**

View File

@ -1205,15 +1205,16 @@ importers:
'@buster/vitest-config':
specifier: workspace:*
version: link:../vitest-config
'@duckdb/node-api':
specifier: 1.3.2-alpha.26
version: 1.3.2-alpha.26
'@turbopuffer/turbopuffer':
specifier: ^1.0.0
version: 1.0.0
zod:
specifier: ^3.22.4
version: 3.25.76
optionalDependencies:
'@duckdb/node-api':
specifier: 1.3.2-alpha.26
version: 1.3.2-alpha.26
packages/server-shared:
dependencies: