mirror of https://github.com/buster-so/buster.git
speed up by making the ancestor search more efficient and adding indexes
This commit is contained in:
parent
92c526a3a5
commit
677e749a57
|
@ -0,0 +1,28 @@
|
|||
ALTER TABLE "users" ALTER COLUMN "suggested_prompts" SET DEFAULT '{
|
||||
"suggestedPrompts": {
|
||||
"report": [
|
||||
"provide a trend analysis of quarterly profits",
|
||||
"evaluate product performance across regions"
|
||||
],
|
||||
"dashboard": [
|
||||
"create a sales performance dashboard",
|
||||
"design a revenue forecast dashboard"
|
||||
],
|
||||
"visualization": [
|
||||
"create a metric for monthly sales",
|
||||
"show top vendors by purchase volume"
|
||||
],
|
||||
"help": [
|
||||
"what types of analyses can you perform?",
|
||||
"what questions can I as buster?",
|
||||
"what data models are available for queries?",
|
||||
"can you explain your forecasting capabilities?"
|
||||
]
|
||||
},
|
||||
"updatedAt": "2024-01-01T00:00:00.000Z"
|
||||
}'::jsonb;--> statement-breakpoint
|
||||
CREATE INDEX "idx_perm_active_asset_identity" ON "asset_permissions" USING btree ("asset_id","asset_type","identity_id","identity_type") WHERE "asset_permissions"."deleted_at" is null;--> statement-breakpoint
|
||||
CREATE INDEX "idx_as2_active_by_asset" ON "asset_search_v2" USING btree ("asset_id","asset_type") WHERE "asset_search_v2"."deleted_at" is null;--> statement-breakpoint
|
||||
CREATE INDEX "idx_cta_active_by_asset" ON "collections_to_assets" USING btree ("asset_id","asset_type","collection_id") WHERE "collections_to_assets"."deleted_at" is null;--> statement-breakpoint
|
||||
CREATE INDEX "idx_mtf_active_by_file" ON "messages_to_files" USING btree ("message_id") WHERE "messages_to_files"."deleted_at" is null;--> statement-breakpoint
|
||||
CREATE INDEX "idx_uto_active_by_user" ON "users_to_organizations" USING btree ("user_id","organization_id") WHERE "users_to_organizations"."deleted_at" is null;
|
File diff suppressed because it is too large
Load Diff
|
@ -792,6 +792,13 @@
|
|||
"when": 1759242725045,
|
||||
"tag": "0113_write-back-logs-config",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 114,
|
||||
"version": "7",
|
||||
"when": 1759256209020,
|
||||
"tag": "0114_lovely_risque",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
import { and, eq, isNull } from 'drizzle-orm';
|
||||
import { and, eq, isNull, sql } from 'drizzle-orm';
|
||||
import { db } from '../../connection';
|
||||
import {
|
||||
chats,
|
||||
|
@ -16,6 +16,155 @@ import type { Ancestor, AssetAncestors } from '../../schema-types';
|
|||
// Type for database transaction
|
||||
type DatabaseTransaction = Parameters<Parameters<typeof db.transaction>[0]>[0];
|
||||
|
||||
/**
|
||||
* Get chat ancestors as a subquery
|
||||
*/
|
||||
export function getChatAncestorsSubquery(assetId: string) {
|
||||
return db
|
||||
.select({
|
||||
id: chats.id,
|
||||
title: chats.title,
|
||||
type: sql<string>`'chat'`.as('type'),
|
||||
})
|
||||
.from(messagesToFiles)
|
||||
.innerJoin(messages, eq(messages.id, messagesToFiles.messageId))
|
||||
.innerJoin(chats, eq(chats.id, messages.chatId))
|
||||
.where(
|
||||
and(
|
||||
eq(messagesToFiles.fileId, assetId),
|
||||
isNull(messagesToFiles.deletedAt),
|
||||
isNull(messages.deletedAt),
|
||||
isNull(chats.deletedAt)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get collection ancestors as a subquery
|
||||
*/
|
||||
export function getCollectionAncestorsSubquery(assetId: string) {
|
||||
return db
|
||||
.select({
|
||||
id: collections.id,
|
||||
title: collections.name,
|
||||
type: sql<string>`'collection'`.as('type'),
|
||||
})
|
||||
.from(collectionsToAssets)
|
||||
.innerJoin(collections, eq(collections.id, collectionsToAssets.collectionId))
|
||||
.where(
|
||||
and(
|
||||
eq(collectionsToAssets.assetId, assetId),
|
||||
isNull(collectionsToAssets.deletedAt),
|
||||
isNull(collections.deletedAt)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get dashboard ancestors as a subquery (for metric files only)
|
||||
*/
|
||||
export function getDashboardAncestorsSubquery(metricId: string) {
|
||||
return db
|
||||
.select({
|
||||
id: dashboardFiles.id,
|
||||
title: dashboardFiles.name,
|
||||
type: sql<string>`'dashboard_file'`.as('type'),
|
||||
})
|
||||
.from(metricFilesToDashboardFiles)
|
||||
.innerJoin(dashboardFiles, eq(dashboardFiles.id, metricFilesToDashboardFiles.dashboardFileId))
|
||||
.where(
|
||||
and(
|
||||
eq(metricFilesToDashboardFiles.metricFileId, metricId),
|
||||
isNull(metricFilesToDashboardFiles.deletedAt),
|
||||
isNull(dashboardFiles.deletedAt)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get report ancestors as a subquery (for metric files only)
|
||||
*/
|
||||
export function getReportAncestorsSubquery(metricId: string) {
|
||||
return db
|
||||
.select({
|
||||
id: reportFiles.id,
|
||||
title: reportFiles.name,
|
||||
type: sql<string>`'report_file'`.as('type'),
|
||||
})
|
||||
.from(metricFilesToReportFiles)
|
||||
.innerJoin(reportFiles, eq(reportFiles.id, metricFilesToReportFiles.reportFileId))
|
||||
.where(
|
||||
and(
|
||||
eq(metricFilesToReportFiles.metricFileId, metricId),
|
||||
isNull(metricFilesToReportFiles.deletedAt),
|
||||
isNull(reportFiles.deletedAt)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all ancestors for an asset using a single query with UNION
|
||||
*/
|
||||
export function getAllAncestorsUnified(assetId: string, assetType: string) {
|
||||
const chatAncestors = getChatAncestorsSubquery(assetId);
|
||||
const collectionAncestors = getCollectionAncestorsSubquery(assetId);
|
||||
|
||||
let query = chatAncestors.union(collectionAncestors);
|
||||
|
||||
// Only include dashboard and report ancestors for metric files
|
||||
if (assetType === 'metric_file') {
|
||||
const dashboardAncestors = getDashboardAncestorsSubquery(assetId);
|
||||
const reportAncestors = getReportAncestorsSubquery(assetId);
|
||||
query = query.union(dashboardAncestors).union(reportAncestors);
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimized function to get all asset ancestors in a single query
|
||||
*/
|
||||
export async function getAssetAncestors(
|
||||
assetId: string,
|
||||
assetType: string,
|
||||
_userId: string,
|
||||
_organizationId: string
|
||||
): Promise<AssetAncestors> {
|
||||
const results = await getAllAncestorsUnified(assetId, assetType);
|
||||
|
||||
// Group results by ancestor type
|
||||
const ancestors: AssetAncestors = {
|
||||
chats: [],
|
||||
collections: [],
|
||||
dashboards: [],
|
||||
reports: [],
|
||||
};
|
||||
|
||||
for (const result of results) {
|
||||
const ancestor: Ancestor = {
|
||||
id: result.id,
|
||||
title: result.title,
|
||||
};
|
||||
|
||||
switch (result.type) {
|
||||
case 'chat':
|
||||
ancestors.chats.push(ancestor);
|
||||
break;
|
||||
case 'collection':
|
||||
ancestors.collections.push(ancestor);
|
||||
break;
|
||||
case 'dashboard_file':
|
||||
ancestors.dashboards.push(ancestor);
|
||||
break;
|
||||
case 'report_file':
|
||||
ancestors.reports.push(ancestor);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ancestors;
|
||||
}
|
||||
|
||||
export async function getAssetChatAncestors(
|
||||
assetId: string,
|
||||
tx?: DatabaseTransaction
|
||||
|
@ -154,37 +303,3 @@ export async function getAssetAncestorsWithTransaction(
|
|||
|
||||
return results;
|
||||
}
|
||||
export async function getAssetAncestors(
|
||||
assetId: string,
|
||||
assetType: string,
|
||||
_userId: string,
|
||||
_organizationId: string
|
||||
): Promise<AssetAncestors> {
|
||||
// Get chats
|
||||
const chatsPromise = getAssetChatAncestors(assetId);
|
||||
|
||||
// Get collections
|
||||
const collectionsPromise = getAssetCollectionAncestors(assetId);
|
||||
|
||||
// Get dashboards
|
||||
const dashboardsPromise =
|
||||
assetType === 'metric_file' ? getMetricDashboardAncestors(assetId) : Promise.resolve([]);
|
||||
|
||||
// Get Reports
|
||||
const reportsPromise =
|
||||
assetType === 'metric_file' ? getMetricReportAncestors(assetId) : Promise.resolve([]);
|
||||
|
||||
const [chats, collections, dashboards, reports] = await Promise.all([
|
||||
chatsPromise,
|
||||
collectionsPromise,
|
||||
dashboardsPromise,
|
||||
reportsPromise,
|
||||
]);
|
||||
|
||||
return {
|
||||
chats,
|
||||
collections,
|
||||
dashboards,
|
||||
reports,
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import { sql } from 'drizzle-orm';
|
||||
import { isNull, sql } from 'drizzle-orm';
|
||||
import {
|
||||
bigint,
|
||||
boolean,
|
||||
|
@ -47,7 +47,6 @@ import {
|
|||
VerificationSchema,
|
||||
WorkspaceSharingSchema,
|
||||
} from './schema-types';
|
||||
import { DEFAULT_USER_SUGGESTED_PROMPTS } from './schema-types/user';
|
||||
|
||||
export const assetPermissionRoleEnum = pgEnum(
|
||||
'asset_permission_role_enum',
|
||||
|
@ -555,7 +554,29 @@ export const users = pgTable(
|
|||
avatarUrl: text('avatar_url'),
|
||||
suggestedPrompts: jsonb('suggested_prompts')
|
||||
.$type<UserSuggestedPromptsType>()
|
||||
.default(DEFAULT_USER_SUGGESTED_PROMPTS)
|
||||
.default(sql`'{
|
||||
"suggestedPrompts": {
|
||||
"report": [
|
||||
"provide a trend analysis of quarterly profits",
|
||||
"evaluate product performance across regions"
|
||||
],
|
||||
"dashboard": [
|
||||
"create a sales performance dashboard",
|
||||
"design a revenue forecast dashboard"
|
||||
],
|
||||
"visualization": [
|
||||
"create a metric for monthly sales",
|
||||
"show top vendors by purchase volume"
|
||||
],
|
||||
"help": [
|
||||
"what types of analyses can you perform?",
|
||||
"what questions can I as buster?",
|
||||
"what data models are available for queries?",
|
||||
"can you explain your forecasting capabilities?"
|
||||
]
|
||||
},
|
||||
"updatedAt": "2024-01-01T00:00:00.000Z"
|
||||
}'::jsonb`)
|
||||
.notNull(),
|
||||
personalizationEnabled: boolean('personalization_enabled').default(false).notNull(),
|
||||
personalizationConfig: jsonb('personalization_config')
|
||||
|
@ -664,6 +685,10 @@ export const messagesToFiles = pgTable(
|
|||
'btree',
|
||||
table.messageId.asc().nullsLast().op('uuid_ops')
|
||||
),
|
||||
// Performance indexes for active messages to files
|
||||
index('idx_mtf_active_by_file')
|
||||
.on(table.messageId)
|
||||
.where(isNull(table.deletedAt)),
|
||||
foreignKey({
|
||||
columns: [table.messageId],
|
||||
foreignColumns: [messages.id],
|
||||
|
@ -1380,6 +1405,10 @@ export const collectionsToAssets = pgTable(
|
|||
columns: [table.collectionId, table.assetId, table.assetType],
|
||||
name: 'collections_to_assets_pkey',
|
||||
}),
|
||||
// Performance index for active collections lookup by asset
|
||||
index('idx_cta_active_by_asset')
|
||||
.on(table.assetId, table.assetType, table.collectionId)
|
||||
.where(isNull(table.deletedAt)),
|
||||
]
|
||||
);
|
||||
|
||||
|
@ -1450,6 +1479,10 @@ export const assetPermissions = pgTable(
|
|||
columns: [table.identityId, table.identityType, table.assetId, table.assetType],
|
||||
name: 'asset_permissions_pkey',
|
||||
}),
|
||||
// Performance index for active permissions lookup by asset and identity
|
||||
index('idx_perm_active_asset_identity')
|
||||
.on(table.assetId, table.assetType, table.identityId, table.identityType)
|
||||
.where(isNull(table.deletedAt)),
|
||||
]
|
||||
);
|
||||
|
||||
|
@ -1506,6 +1539,10 @@ export const usersToOrganizations = pgTable(
|
|||
columns: [table.userId, table.organizationId],
|
||||
name: 'users_to_organizations_pkey',
|
||||
}),
|
||||
// Performance index for active user organization lookup
|
||||
index('idx_uto_active_by_user')
|
||||
.on(table.userId, table.organizationId)
|
||||
.where(isNull(table.deletedAt)),
|
||||
]
|
||||
);
|
||||
|
||||
|
@ -1873,6 +1910,9 @@ export const assetSearchV2 = pgTable(
|
|||
table.additionalText.asc().nullsLast().op('pgroonga_text_full_text_search_ops_v2')
|
||||
),
|
||||
unique('asset_search_v2_asset_type_asset_id_unique').on(table.assetId, table.assetType),
|
||||
index('idx_as2_active_by_asset')
|
||||
.on(table.assetId, table.assetType)
|
||||
.where(isNull(table.deletedAt)),
|
||||
]
|
||||
);
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ import { performTextSearch } from './search';
|
|||
vi.mock('@buster/database/queries', () => ({
|
||||
getUserOrganizationId: vi.fn(),
|
||||
searchText: vi.fn(),
|
||||
getAssetAncestorsWithTransaction: vi.fn(),
|
||||
getAssetAncestors: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('./text-processing-helpers', () => ({
|
||||
|
@ -14,11 +14,7 @@ vi.mock('./text-processing-helpers', () => ({
|
|||
}));
|
||||
|
||||
// Import the mocked functions
|
||||
import {
|
||||
getAssetAncestorsWithTransaction,
|
||||
getUserOrganizationId,
|
||||
searchText,
|
||||
} from '@buster/database/queries';
|
||||
import { getAssetAncestors, getUserOrganizationId, searchText } from '@buster/database/queries';
|
||||
import { processSearchResultText } from './text-processing-helpers';
|
||||
|
||||
describe('search.ts - Unit Tests', () => {
|
||||
|
@ -36,12 +32,14 @@ describe('search.ts - Unit Tests', () => {
|
|||
assetType: 'chat',
|
||||
title: 'Test Result 1',
|
||||
additionalText: 'This is additional text for result 1',
|
||||
updatedAt: '2024-01-01T00:00:00.000Z',
|
||||
},
|
||||
{
|
||||
assetId: 'asset-2',
|
||||
assetType: 'metric_file',
|
||||
title: 'Test Result 2',
|
||||
additionalText: 'This is additional text for result 2',
|
||||
updatedAt: '2024-01-01T00:00:00.000Z',
|
||||
},
|
||||
];
|
||||
|
||||
|
@ -50,8 +48,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
pagination: {
|
||||
page: 1,
|
||||
page_size: 10,
|
||||
total: 2,
|
||||
total_pages: 1,
|
||||
has_more: false,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -70,7 +67,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
processedTitle: `<b>${title}</b>`,
|
||||
processedAdditionalText: `<b>${additionalText}</b>`,
|
||||
}));
|
||||
(getAssetAncestorsWithTransaction as Mock).mockResolvedValue(mockAncestors);
|
||||
(getAssetAncestors as Mock).mockResolvedValue(mockAncestors);
|
||||
});
|
||||
|
||||
describe('performTextSearch', () => {
|
||||
|
@ -110,8 +107,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
pagination: {
|
||||
page: 1,
|
||||
page_size: 10,
|
||||
total: 2,
|
||||
total_pages: 1,
|
||||
has_more: false,
|
||||
},
|
||||
});
|
||||
|
||||
|
@ -226,14 +222,14 @@ describe('search.ts - Unit Tests', () => {
|
|||
|
||||
const result = await performTextSearch(mockUserId, searchRequestWithAncestors);
|
||||
|
||||
expect(getAssetAncestorsWithTransaction).toHaveBeenCalledTimes(2);
|
||||
expect(getAssetAncestorsWithTransaction).toHaveBeenCalledWith(
|
||||
expect(getAssetAncestors).toHaveBeenCalledTimes(2);
|
||||
expect(getAssetAncestors).toHaveBeenCalledWith(
|
||||
'asset-1',
|
||||
'chat',
|
||||
mockUserId,
|
||||
mockOrganizationId
|
||||
);
|
||||
expect(getAssetAncestorsWithTransaction).toHaveBeenCalledWith(
|
||||
expect(getAssetAncestors).toHaveBeenCalledWith(
|
||||
'asset-2',
|
||||
'metric_file',
|
||||
mockUserId,
|
||||
|
@ -247,7 +243,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
it('should not include ancestors when not requested', async () => {
|
||||
const result = await performTextSearch(mockUserId, basicSearchRequest);
|
||||
|
||||
expect(getAssetAncestorsWithTransaction).not.toHaveBeenCalled();
|
||||
expect(getAssetAncestors).not.toHaveBeenCalled();
|
||||
expect(result.data[0]).not.toHaveProperty('ancestors');
|
||||
expect(result.data[1]).not.toHaveProperty('ancestors');
|
||||
});
|
||||
|
@ -258,8 +254,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
pagination: {
|
||||
page: 1,
|
||||
page_size: 10,
|
||||
total: 0,
|
||||
total_pages: 0,
|
||||
has_more: false,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -269,7 +264,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
|
||||
expect(result).toEqual(emptySearchResponse);
|
||||
expect(processSearchResultText).not.toHaveBeenCalled();
|
||||
expect(getAssetAncestorsWithTransaction).not.toHaveBeenCalled();
|
||||
expect(getAssetAncestors).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle null/undefined additional text', async () => {
|
||||
|
@ -279,6 +274,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
assetType: 'chat',
|
||||
title: 'Test Result 1',
|
||||
additionalText: null,
|
||||
updatedAt: '2024-01-01T00:00:00.000Z',
|
||||
},
|
||||
];
|
||||
|
||||
|
@ -333,6 +329,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
assetType: 'chat',
|
||||
title: `Test Result ${i + 1}`,
|
||||
additionalText: `Additional text ${i + 1}`,
|
||||
updatedAt: '2024-01-01T00:00:00.000Z',
|
||||
}));
|
||||
|
||||
(searchText as Mock).mockResolvedValue({
|
||||
|
@ -341,8 +338,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
pagination: {
|
||||
page: 1,
|
||||
page_size: 10,
|
||||
total: 12,
|
||||
total_pages: 2,
|
||||
has_more: true,
|
||||
},
|
||||
});
|
||||
|
||||
|
@ -353,8 +349,8 @@ describe('search.ts - Unit Tests', () => {
|
|||
|
||||
const result = await performTextSearch(mockUserId, searchRequestWithAncestors);
|
||||
|
||||
// Should call getAssetAncestorsWithTransaction for each result
|
||||
expect(getAssetAncestorsWithTransaction).toHaveBeenCalledTimes(12);
|
||||
// Should call getAssetAncestors for each result
|
||||
expect(getAssetAncestors).toHaveBeenCalledTimes(12);
|
||||
|
||||
// Results should have ancestors added
|
||||
expect(result.data).toHaveLength(12);
|
||||
|
@ -369,9 +365,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
includeAssetAncestors: true,
|
||||
};
|
||||
|
||||
(getAssetAncestorsWithTransaction as Mock).mockRejectedValue(
|
||||
new Error('Ancestor lookup failed')
|
||||
);
|
||||
(getAssetAncestors as Mock).mockRejectedValue(new Error('Ancestor lookup failed'));
|
||||
|
||||
await expect(performTextSearch(mockUserId, searchRequestWithAncestors)).rejects.toThrow(
|
||||
'Ancestor lookup failed'
|
||||
|
@ -384,8 +378,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
pagination: {
|
||||
page: 2,
|
||||
page_size: 25,
|
||||
total: 100,
|
||||
total_pages: 4,
|
||||
has_more: true,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -393,8 +386,7 @@ describe('search.ts - Unit Tests', () => {
|
|||
|
||||
const result = await performTextSearch(mockUserId, basicSearchRequest);
|
||||
|
||||
expect(result.pagination.total).toBe(100);
|
||||
expect(result.pagination.total_pages).toBe(4);
|
||||
expect(result.pagination.has_more).toBe(true);
|
||||
expect(result.pagination.page).toBe(2);
|
||||
expect(result.pagination.page_size).toBe(25);
|
||||
});
|
||||
|
|
|
@ -136,7 +136,7 @@ async function addAncestorsToSearchResults(
|
|||
const chunk = searchResults.slice(i, i + chunkSize);
|
||||
const chunkResults = await Promise.all(
|
||||
chunk.map(async (searchResult) => {
|
||||
const ancestors = await getAssetAncestorsWithTransaction(
|
||||
const ancestors = await getAssetAncestors(
|
||||
searchResult.assetId,
|
||||
searchResult.assetType as AssetType,
|
||||
userId,
|
||||
|
|
Loading…
Reference in New Issue