Merge pull request #1219 from buster-so/wells-bus-2002-speed-up-search-endpoint

speed up by making the ancestor search more efficient and adding indexes
This commit is contained in:
wellsbunk5 2025-09-30 12:35:57 -06:00 committed by GitHub
commit 5326938722
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 6318 additions and 68 deletions

View File

@ -0,0 +1,28 @@
ALTER TABLE "users" ALTER COLUMN "suggested_prompts" SET DEFAULT '{
"suggestedPrompts": {
"report": [
"provide a trend analysis of quarterly profits",
"evaluate product performance across regions"
],
"dashboard": [
"create a sales performance dashboard",
"design a revenue forecast dashboard"
],
"visualization": [
"create a metric for monthly sales",
"show top vendors by purchase volume"
],
"help": [
"what types of analyses can you perform?",
"what questions can I ask buster?",
"what data models are available for queries?",
"can you explain your forecasting capabilities?"
]
},
"updatedAt": "2024-01-01T00:00:00.000Z"
}'::jsonb;--> statement-breakpoint
CREATE INDEX "idx_perm_active_asset_identity" ON "asset_permissions" USING btree ("asset_id","asset_type","identity_id","identity_type") WHERE "asset_permissions"."deleted_at" is null;--> statement-breakpoint
CREATE INDEX "idx_as2_active_by_asset" ON "asset_search_v2" USING btree ("asset_id","asset_type") WHERE "asset_search_v2"."deleted_at" is null;--> statement-breakpoint
CREATE INDEX "idx_cta_active_by_asset" ON "collections_to_assets" USING btree ("asset_id","asset_type","collection_id") WHERE "collections_to_assets"."deleted_at" is null;--> statement-breakpoint
CREATE INDEX "idx_mtf_active_by_file" ON "messages_to_files" USING btree ("message_id") WHERE "messages_to_files"."deleted_at" is null;--> statement-breakpoint
CREATE INDEX "idx_uto_active_by_user" ON "users_to_organizations" USING btree ("user_id","organization_id") WHERE "users_to_organizations"."deleted_at" is null;

File diff suppressed because it is too large Load Diff

View File

@ -792,6 +792,13 @@
"when": 1759242725045,
"tag": "0113_write-back-logs-config",
"breakpoints": true
},
{
"idx": 114,
"version": "7",
"when": 1759256209020,
"tag": "0114_lovely_risque",
"breakpoints": true
}
]
}

View File

@ -1,4 +1,4 @@
import { and, eq, isNull } from 'drizzle-orm';
import { and, eq, isNull, sql } from 'drizzle-orm';
import { db } from '../../connection';
import {
chats,
@ -16,6 +16,155 @@ import type { Ancestor, AssetAncestors } from '../../schema-types';
// Type for database transaction
type DatabaseTransaction = Parameters<Parameters<typeof db.transaction>[0]>[0];
/**
* Get chat ancestors as a subquery
*/
export function getChatAncestorsSubquery(assetId: string) {
return db
.select({
id: chats.id,
title: chats.title,
type: sql<string>`'chat'`.as('type'),
})
.from(messagesToFiles)
.innerJoin(messages, eq(messages.id, messagesToFiles.messageId))
.innerJoin(chats, eq(chats.id, messages.chatId))
.where(
and(
eq(messagesToFiles.fileId, assetId),
isNull(messagesToFiles.deletedAt),
isNull(messages.deletedAt),
isNull(chats.deletedAt)
)
);
}
/**
* Get collection ancestors as a subquery
*/
export function getCollectionAncestorsSubquery(assetId: string) {
return db
.select({
id: collections.id,
title: collections.name,
type: sql<string>`'collection'`.as('type'),
})
.from(collectionsToAssets)
.innerJoin(collections, eq(collections.id, collectionsToAssets.collectionId))
.where(
and(
eq(collectionsToAssets.assetId, assetId),
isNull(collectionsToAssets.deletedAt),
isNull(collections.deletedAt)
)
);
}
/**
* Get dashboard ancestors as a subquery (for metric files only)
*/
export function getDashboardAncestorsSubquery(metricId: string) {
return db
.select({
id: dashboardFiles.id,
title: dashboardFiles.name,
type: sql<string>`'dashboard_file'`.as('type'),
})
.from(metricFilesToDashboardFiles)
.innerJoin(dashboardFiles, eq(dashboardFiles.id, metricFilesToDashboardFiles.dashboardFileId))
.where(
and(
eq(metricFilesToDashboardFiles.metricFileId, metricId),
isNull(metricFilesToDashboardFiles.deletedAt),
isNull(dashboardFiles.deletedAt)
)
);
}
/**
* Get report ancestors as a subquery (for metric files only)
*/
export function getReportAncestorsSubquery(metricId: string) {
return db
.select({
id: reportFiles.id,
title: reportFiles.name,
type: sql<string>`'report_file'`.as('type'),
})
.from(metricFilesToReportFiles)
.innerJoin(reportFiles, eq(reportFiles.id, metricFilesToReportFiles.reportFileId))
.where(
and(
eq(metricFilesToReportFiles.metricFileId, metricId),
isNull(metricFilesToReportFiles.deletedAt),
isNull(reportFiles.deletedAt)
)
);
}
/**
* Get all ancestors for an asset using a single query with UNION
*/
export function getAllAncestorsUnified(assetId: string, assetType: string) {
const chatAncestors = getChatAncestorsSubquery(assetId);
const collectionAncestors = getCollectionAncestorsSubquery(assetId);
let query = chatAncestors.union(collectionAncestors);
// Only include dashboard and report ancestors for metric files
if (assetType === 'metric_file') {
const dashboardAncestors = getDashboardAncestorsSubquery(assetId);
const reportAncestors = getReportAncestorsSubquery(assetId);
query = query.union(dashboardAncestors).union(reportAncestors);
}
return query;
}
/**
* Optimized function to get all asset ancestors in a single query
*/
export async function getAssetAncestors(
assetId: string,
assetType: string,
_userId: string,
_organizationId: string
): Promise<AssetAncestors> {
const results = await getAllAncestorsUnified(assetId, assetType);
// Group results by ancestor type
const ancestors: AssetAncestors = {
chats: [],
collections: [],
dashboards: [],
reports: [],
};
for (const result of results) {
const ancestor: Ancestor = {
id: result.id,
title: result.title,
};
switch (result.type) {
case 'chat':
ancestors.chats.push(ancestor);
break;
case 'collection':
ancestors.collections.push(ancestor);
break;
case 'dashboard_file':
ancestors.dashboards.push(ancestor);
break;
case 'report_file':
ancestors.reports.push(ancestor);
break;
}
}
return ancestors;
}
export async function getAssetChatAncestors(
assetId: string,
tx?: DatabaseTransaction
@ -154,37 +303,3 @@ export async function getAssetAncestorsWithTransaction(
return results;
}
export async function getAssetAncestors(
assetId: string,
assetType: string,
_userId: string,
_organizationId: string
): Promise<AssetAncestors> {
// Get chats
const chatsPromise = getAssetChatAncestors(assetId);
// Get collections
const collectionsPromise = getAssetCollectionAncestors(assetId);
// Get dashboards
const dashboardsPromise =
assetType === 'metric_file' ? getMetricDashboardAncestors(assetId) : Promise.resolve([]);
// Get Reports
const reportsPromise =
assetType === 'metric_file' ? getMetricReportAncestors(assetId) : Promise.resolve([]);
const [chats, collections, dashboards, reports] = await Promise.all([
chatsPromise,
collectionsPromise,
dashboardsPromise,
reportsPromise,
]);
return {
chats,
collections,
dashboards,
reports,
};
}

View File

@ -1,4 +1,4 @@
import { sql } from 'drizzle-orm';
import { isNull, sql } from 'drizzle-orm';
import {
bigint,
boolean,
@ -47,7 +47,6 @@ import {
VerificationSchema,
WorkspaceSharingSchema,
} from './schema-types';
import { DEFAULT_USER_SUGGESTED_PROMPTS } from './schema-types/user';
export const assetPermissionRoleEnum = pgEnum(
'asset_permission_role_enum',
@ -555,7 +554,29 @@ export const users = pgTable(
avatarUrl: text('avatar_url'),
suggestedPrompts: jsonb('suggested_prompts')
.$type<UserSuggestedPromptsType>()
.default(DEFAULT_USER_SUGGESTED_PROMPTS)
.default(sql`'{
"suggestedPrompts": {
"report": [
"provide a trend analysis of quarterly profits",
"evaluate product performance across regions"
],
"dashboard": [
"create a sales performance dashboard",
"design a revenue forecast dashboard"
],
"visualization": [
"create a metric for monthly sales",
"show top vendors by purchase volume"
],
"help": [
"what types of analyses can you perform?",
"what questions can I ask buster?",
"what data models are available for queries?",
"can you explain your forecasting capabilities?"
]
},
"updatedAt": "2024-01-01T00:00:00.000Z"
}'::jsonb`)
.notNull(),
personalizationEnabled: boolean('personalization_enabled').default(false).notNull(),
personalizationConfig: jsonb('personalization_config')
@ -664,6 +685,10 @@ export const messagesToFiles = pgTable(
'btree',
table.messageId.asc().nullsLast().op('uuid_ops')
),
// Performance indexes for active messages to files
index('idx_mtf_active_by_file')
.on(table.messageId)
.where(isNull(table.deletedAt)),
foreignKey({
columns: [table.messageId],
foreignColumns: [messages.id],
@ -1380,6 +1405,10 @@ export const collectionsToAssets = pgTable(
columns: [table.collectionId, table.assetId, table.assetType],
name: 'collections_to_assets_pkey',
}),
// Performance index for active collections lookup by asset
index('idx_cta_active_by_asset')
.on(table.assetId, table.assetType, table.collectionId)
.where(isNull(table.deletedAt)),
]
);
@ -1450,6 +1479,10 @@ export const assetPermissions = pgTable(
columns: [table.identityId, table.identityType, table.assetId, table.assetType],
name: 'asset_permissions_pkey',
}),
// Performance index for active permissions lookup by asset and identity
index('idx_perm_active_asset_identity')
.on(table.assetId, table.assetType, table.identityId, table.identityType)
.where(isNull(table.deletedAt)),
]
);
@ -1506,6 +1539,10 @@ export const usersToOrganizations = pgTable(
columns: [table.userId, table.organizationId],
name: 'users_to_organizations_pkey',
}),
// Performance index for active user organization lookup
index('idx_uto_active_by_user')
.on(table.userId, table.organizationId)
.where(isNull(table.deletedAt)),
]
);
@ -1873,6 +1910,9 @@ export const assetSearchV2 = pgTable(
table.additionalText.asc().nullsLast().op('pgroonga_text_full_text_search_ops_v2')
),
unique('asset_search_v2_asset_type_asset_id_unique').on(table.assetId, table.assetType),
index('idx_as2_active_by_asset')
.on(table.assetId, table.assetType)
.where(isNull(table.deletedAt)),
]
);

View File

@ -6,7 +6,7 @@ import { performTextSearch } from './search';
vi.mock('@buster/database/queries', () => ({
getUserOrganizationId: vi.fn(),
searchText: vi.fn(),
getAssetAncestorsWithTransaction: vi.fn(),
getAssetAncestors: vi.fn(),
}));
vi.mock('./text-processing-helpers', () => ({
@ -14,11 +14,7 @@ vi.mock('./text-processing-helpers', () => ({
}));
// Import the mocked functions
import {
getAssetAncestorsWithTransaction,
getUserOrganizationId,
searchText,
} from '@buster/database/queries';
import { getAssetAncestors, getUserOrganizationId, searchText } from '@buster/database/queries';
import { processSearchResultText } from './text-processing-helpers';
describe('search.ts - Unit Tests', () => {
@ -36,12 +32,14 @@ describe('search.ts - Unit Tests', () => {
assetType: 'chat',
title: 'Test Result 1',
additionalText: 'This is additional text for result 1',
updatedAt: '2024-01-01T00:00:00.000Z',
},
{
assetId: 'asset-2',
assetType: 'metric_file',
title: 'Test Result 2',
additionalText: 'This is additional text for result 2',
updatedAt: '2024-01-01T00:00:00.000Z',
},
];
@ -50,8 +48,7 @@ describe('search.ts - Unit Tests', () => {
pagination: {
page: 1,
page_size: 10,
total: 2,
total_pages: 1,
has_more: false,
},
};
@ -70,7 +67,7 @@ describe('search.ts - Unit Tests', () => {
processedTitle: `<b>${title}</b>`,
processedAdditionalText: `<b>${additionalText}</b>`,
}));
(getAssetAncestorsWithTransaction as Mock).mockResolvedValue(mockAncestors);
(getAssetAncestors as Mock).mockResolvedValue(mockAncestors);
});
describe('performTextSearch', () => {
@ -110,8 +107,7 @@ describe('search.ts - Unit Tests', () => {
pagination: {
page: 1,
page_size: 10,
total: 2,
total_pages: 1,
has_more: false,
},
});
@ -226,14 +222,14 @@ describe('search.ts - Unit Tests', () => {
const result = await performTextSearch(mockUserId, searchRequestWithAncestors);
expect(getAssetAncestorsWithTransaction).toHaveBeenCalledTimes(2);
expect(getAssetAncestorsWithTransaction).toHaveBeenCalledWith(
expect(getAssetAncestors).toHaveBeenCalledTimes(2);
expect(getAssetAncestors).toHaveBeenCalledWith(
'asset-1',
'chat',
mockUserId,
mockOrganizationId
);
expect(getAssetAncestorsWithTransaction).toHaveBeenCalledWith(
expect(getAssetAncestors).toHaveBeenCalledWith(
'asset-2',
'metric_file',
mockUserId,
@ -247,7 +243,7 @@ describe('search.ts - Unit Tests', () => {
it('should not include ancestors when not requested', async () => {
const result = await performTextSearch(mockUserId, basicSearchRequest);
expect(getAssetAncestorsWithTransaction).not.toHaveBeenCalled();
expect(getAssetAncestors).not.toHaveBeenCalled();
expect(result.data[0]).not.toHaveProperty('ancestors');
expect(result.data[1]).not.toHaveProperty('ancestors');
});
@ -258,8 +254,7 @@ describe('search.ts - Unit Tests', () => {
pagination: {
page: 1,
page_size: 10,
total: 0,
total_pages: 0,
has_more: false,
},
};
@ -269,7 +264,7 @@ describe('search.ts - Unit Tests', () => {
expect(result).toEqual(emptySearchResponse);
expect(processSearchResultText).not.toHaveBeenCalled();
expect(getAssetAncestorsWithTransaction).not.toHaveBeenCalled();
expect(getAssetAncestors).not.toHaveBeenCalled();
});
it('should handle null/undefined additional text', async () => {
@ -279,6 +274,7 @@ describe('search.ts - Unit Tests', () => {
assetType: 'chat',
title: 'Test Result 1',
additionalText: null,
updatedAt: '2024-01-01T00:00:00.000Z',
},
];
@ -333,6 +329,7 @@ describe('search.ts - Unit Tests', () => {
assetType: 'chat',
title: `Test Result ${i + 1}`,
additionalText: `Additional text ${i + 1}`,
updatedAt: '2024-01-01T00:00:00.000Z',
}));
(searchText as Mock).mockResolvedValue({
@ -341,8 +338,7 @@ describe('search.ts - Unit Tests', () => {
pagination: {
page: 1,
page_size: 10,
total: 12,
total_pages: 2,
has_more: true,
},
});
@ -353,8 +349,8 @@ describe('search.ts - Unit Tests', () => {
const result = await performTextSearch(mockUserId, searchRequestWithAncestors);
// Should call getAssetAncestorsWithTransaction for each result
expect(getAssetAncestorsWithTransaction).toHaveBeenCalledTimes(12);
// Should call getAssetAncestors for each result
expect(getAssetAncestors).toHaveBeenCalledTimes(12);
// Results should have ancestors added
expect(result.data).toHaveLength(12);
@ -369,9 +365,7 @@ describe('search.ts - Unit Tests', () => {
includeAssetAncestors: true,
};
(getAssetAncestorsWithTransaction as Mock).mockRejectedValue(
new Error('Ancestor lookup failed')
);
(getAssetAncestors as Mock).mockRejectedValue(new Error('Ancestor lookup failed'));
await expect(performTextSearch(mockUserId, searchRequestWithAncestors)).rejects.toThrow(
'Ancestor lookup failed'
@ -384,8 +378,7 @@ describe('search.ts - Unit Tests', () => {
pagination: {
page: 2,
page_size: 25,
total: 100,
total_pages: 4,
has_more: true,
},
};
@ -393,8 +386,7 @@ describe('search.ts - Unit Tests', () => {
const result = await performTextSearch(mockUserId, basicSearchRequest);
expect(result.pagination.total).toBe(100);
expect(result.pagination.total_pages).toBe(4);
expect(result.pagination.has_more).toBe(true);
expect(result.pagination.page).toBe(2);
expect(result.pagination.page_size).toBe(25);
});

View File

@ -136,7 +136,7 @@ async function addAncestorsToSearchResults(
const chunk = searchResults.slice(i, i + chunkSize);
const chunkResults = await Promise.all(
chunk.map(async (searchResult) => {
const ancestors = await getAssetAncestorsWithTransaction(
const ancestors = await getAssetAncestors(
searchResult.assetId,
searchResult.assetType as AssetType,
userId,