mirror of https://github.com/buster-so/buster.git
Refactor DuckDB integration to lazy-load the module and update package dependencies. Adjust Docker workflow to skip optional dependencies during installation.
This commit is contained in:
parent
cd91cb4f08
commit
37efbaee23
|
@ -79,7 +79,7 @@ jobs:
|
|||
cp -r packages /tmp/prod-deps/
|
||||
cp apps/server/package.json /tmp/prod-deps/apps/server/
|
||||
|
||||
# Install production dependencies only
|
||||
# Install production dependencies only, skip optional dependencies
|
||||
cd /tmp/prod-deps
|
||||
pnpm install --frozen-lockfile --prod --no-optional
|
||||
|
||||
|
|
|
@ -42,17 +42,19 @@ const app = new Hono().get(
|
|||
today.getFullYear() === updatedDate.getFullYear() &&
|
||||
today.getMonth() === updatedDate.getMonth() &&
|
||||
today.getDate() === updatedDate.getDate();
|
||||
|
||||
|
||||
if (isToday) {
|
||||
return c.json(currentSuggestedPrompts);
|
||||
}
|
||||
}
|
||||
|
||||
const timeoutMs = 10000; // 10 seconds timeout
|
||||
|
||||
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
setTimeout(() => {
|
||||
reject(new Error('Request timeout after 10 seconds. Returning current suggested prompts.'));
|
||||
reject(
|
||||
new Error('Request timeout after 10 seconds. Returning current suggested prompts.')
|
||||
);
|
||||
}, timeoutMs);
|
||||
});
|
||||
|
||||
|
|
|
@ -33,8 +33,10 @@
|
|||
"@buster/env-utils": "workspace:*",
|
||||
"@buster/data-source": "workspace:*",
|
||||
"@buster/database": "workspace:*",
|
||||
"@duckdb/node-api": "1.3.2-alpha.26",
|
||||
"@turbopuffer/turbopuffer": "^1.0.0",
|
||||
"zod": "^3.22.4"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@duckdb/node-api": "1.3.2-alpha.26"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
/**
|
||||
* DuckDB-based deduplication for searchable values
|
||||
* Uses functional composition and Zod validation
|
||||
* DuckDB is lazy-loaded to avoid requiring it when not needed
|
||||
*/
|
||||
|
||||
import { type DuckDBConnection, DuckDBInstance } from '@duckdb/node-api';
|
||||
import { z } from 'zod';
|
||||
import {
|
||||
type DeduplicationResult,
|
||||
|
@ -56,6 +56,45 @@ export const formatSqlInClause = (values: string[]): string => {
|
|||
// DUCKDB CONNECTION MANAGEMENT
|
||||
// ============================================================================
|
||||
|
||||
// Type definitions for lazy-loaded DuckDB module
|
||||
// These match the actual DuckDB API but avoid direct import
|
||||
interface DuckDBConnection {
|
||||
run(sql: string): Promise<DuckDBResult>;
|
||||
closeSync(): void;
|
||||
}
|
||||
|
||||
interface DuckDBResult {
|
||||
getRowObjectsJson(): Promise<unknown[]>;
|
||||
}
|
||||
|
||||
interface DuckDBInstance {
|
||||
connect(): Promise<DuckDBConnection>;
|
||||
}
|
||||
|
||||
type DuckDBInstanceClass = {
|
||||
create(dbPath: string, config?: Record<string, string>): Promise<DuckDBInstance>;
|
||||
};
|
||||
|
||||
let DuckDBModule: typeof import('@duckdb/node-api') | null = null;
|
||||
|
||||
/**
|
||||
* Lazy load DuckDB module only when needed
|
||||
* Throws an error if DuckDB is not installed (optional dependency)
|
||||
*/
|
||||
async function loadDuckDB(): Promise<typeof import('@duckdb/node-api')> {
|
||||
if (!DuckDBModule) {
|
||||
try {
|
||||
DuckDBModule = await import('@duckdb/node-api');
|
||||
} catch (_error) {
|
||||
throw new Error(
|
||||
'DuckDB is required for deduplication functionality but is not installed. ' +
|
||||
'Please install @duckdb/node-api to use deduplication features.'
|
||||
);
|
||||
}
|
||||
}
|
||||
return DuckDBModule;
|
||||
}
|
||||
|
||||
export interface DuckDBContext {
|
||||
conn: DuckDBConnection;
|
||||
dbPath?: string; // Store path for cleanup only if using disk
|
||||
|
@ -67,6 +106,11 @@ export interface DuckDBContext {
|
|||
*/
|
||||
export const createConnection = async (useDisk = true): Promise<DuckDBContext> => {
|
||||
try {
|
||||
// Lazy load DuckDB when first connection is created
|
||||
const { DuckDBInstance: DuckDBInstanceClass } = (await loadDuckDB()) as {
|
||||
DuckDBInstance: DuckDBInstanceClass;
|
||||
};
|
||||
|
||||
// Use disk storage for large datasets to avoid memory issues
|
||||
// The database file will be automatically cleaned up
|
||||
const dbPath = useDisk ? `/tmp/duckdb-dedupe-${Date.now()}.db` : ':memory:';
|
||||
|
@ -79,7 +123,7 @@ export const createConnection = async (useDisk = true): Promise<DuckDBContext> =
|
|||
|
||||
// Create instance and get connection
|
||||
// Instance will be garbage collected after connection is created
|
||||
const instance = await DuckDBInstance.create(dbPath, config);
|
||||
const instance = await DuckDBInstanceClass.create(dbPath, config);
|
||||
const conn = await instance.connect();
|
||||
|
||||
// Configure DuckDB for optimal performance with large datasets
|
||||
|
|
|
@ -1,8 +1,19 @@
|
|||
/**
|
||||
* Type-safe helper functions for DuckDB operations
|
||||
* Note: DuckDB types are aliased since the module is lazy-loaded
|
||||
*/
|
||||
|
||||
import type { DuckDBConnection } from '@duckdb/node-api';
|
||||
// Type definitions for lazy-loaded DuckDB module
|
||||
// These match the actual DuckDB API but avoid direct import
|
||||
interface DuckDBConnection {
|
||||
run(sql: string): Promise<DuckDBResult>;
|
||||
closeSync(): void;
|
||||
}
|
||||
|
||||
interface DuckDBResult {
|
||||
getRowObjectsJson(): Promise<unknown[]>;
|
||||
}
|
||||
|
||||
import type { DuckDBContext } from './deduplicate';
|
||||
|
||||
/**
|
||||
|
|
|
@ -1205,15 +1205,16 @@ importers:
|
|||
'@buster/vitest-config':
|
||||
specifier: workspace:*
|
||||
version: link:../vitest-config
|
||||
'@duckdb/node-api':
|
||||
specifier: 1.3.2-alpha.26
|
||||
version: 1.3.2-alpha.26
|
||||
'@turbopuffer/turbopuffer':
|
||||
specifier: ^1.0.0
|
||||
version: 1.0.0
|
||||
zod:
|
||||
specifier: ^3.22.4
|
||||
version: 3.25.76
|
||||
optionalDependencies:
|
||||
'@duckdb/node-api':
|
||||
specifier: 1.3.2-alpha.26
|
||||
version: 1.3.2-alpha.26
|
||||
|
||||
packages/server-shared:
|
||||
dependencies:
|
||||
|
|
Loading…
Reference in New Issue