Merge pull request #1262 from buster-so/update-search

updates to search functionality
This commit is contained in:
wellsbunk5 2025-10-07 16:39:46 -06:00 committed by GitHub
commit 668074d40e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 12837 additions and 2 deletions

View File

@ -0,0 +1,167 @@
-- Step 1: Truncate the asset_search_v2 table to start fresh
TRUNCATE TABLE "asset_search_v2";--> statement-breakpoint
-- Step 2: Add the created_by column as NOT NULL (since table is empty)
ALTER TABLE "asset_search_v2" ADD COLUMN if not exists "created_by" uuid NOT NULL;--> statement-breakpoint
-- Step 3: Add the foreign key constraint for created_by
ALTER TABLE "asset_search_v2" ADD CONSTRAINT "asset_search_v2_created_by_fkey" FOREIGN KEY ("created_by") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
-- Step 4: Repopulate chats with enhanced message content
INSERT INTO public.asset_search_v2 (
asset_id, asset_type, title, additional_text, organization_id,
created_by, created_at, updated_at, deleted_at, screenshot_bucket_key
)
SELECT
c.id,
'chat'::asset_type_enum,
COALESCE(c.title, ''),
(
-- Get interleaved request and response messages in chronological order
SELECT string_agg(
CASE
WHEN request_message IS NOT NULL AND request_message != '' THEN
request_message
ELSE ''
END ||
CASE
WHEN response_messages IS NOT NULL AND
(request_message IS NOT NULL AND request_message != '') THEN
E'\n' || COALESCE((
SELECT string_agg(
jsonb_extract_path_text(response_item, 'message'),
E'\n'
)
FROM jsonb_array_elements(response_messages) AS response_item
WHERE jsonb_extract_path_text(response_item, 'type') = 'text'
AND jsonb_extract_path_text(response_item, 'message') IS NOT NULL
AND jsonb_extract_path_text(response_item, 'message') != ''
), '')
WHEN response_messages IS NOT NULL THEN
COALESCE((
SELECT string_agg(
jsonb_extract_path_text(response_item, 'message'),
E'\n'
)
FROM jsonb_array_elements(response_messages) AS response_item
WHERE jsonb_extract_path_text(response_item, 'type') = 'text'
AND jsonb_extract_path_text(response_item, 'message') IS NOT NULL
AND jsonb_extract_path_text(response_item, 'message') != ''
), '')
ELSE ''
END,
E'\n'
ORDER BY created_at
)
FROM public.messages
WHERE chat_id = c.id
AND deleted_at IS NULL
AND (
(request_message IS NOT NULL AND request_message != '')
OR
(response_messages IS NOT NULL AND jsonb_array_length(response_messages) > 0)
)
),
c.organization_id,
c.created_by,
c.created_at,
c.updated_at,
c.deleted_at,
c.screenshot_bucket_key
FROM public.chats c
WHERE c.created_by IS NOT NULL;--> statement-breakpoint
-- Step 5: Repopulate metric_files with enhanced content
INSERT INTO public.asset_search_v2 (
asset_id, asset_type, title, additional_text, organization_id,
created_by, created_at, updated_at, deleted_at, screenshot_bucket_key
)
SELECT
mf.id,
'metric_file'::asset_type_enum,
COALESCE(mf.name, ''),
COALESCE(mf.content ->> 'description', ''),
mf.organization_id,
mf.created_by,
mf.created_at,
mf.updated_at,
mf.deleted_at,
mf.screenshot_bucket_key
FROM public.metric_files mf
WHERE mf.created_by IS NOT NULL;--> statement-breakpoint
-- Step 6: Repopulate dashboard_files with enhanced content including metric information
INSERT INTO public.asset_search_v2 (
asset_id, asset_type, title, additional_text, organization_id,
created_by, created_at, updated_at, deleted_at, screenshot_bucket_key
)
SELECT
df.id,
'dashboard_file'::asset_type_enum,
COALESCE(df.name, ''),
TRIM(COALESCE(df.content ->> 'description', '') || ' ' || COALESCE((
-- Get metric titles and descriptions from IDs in dashboard content JSON
SELECT STRING_AGG(
COALESCE(mf.content ->> 'name', mf.name) || E'\n' ||
COALESCE(mf.content ->> 'description', ''),
E'\n\n'
)
FROM public.metric_files mf
WHERE mf.id::text IN (
-- Extract metric IDs from dashboard content JSON
SELECT DISTINCT jsonb_extract_path_text(item_value, 'id')
FROM jsonb_array_elements(df.content -> 'rows') AS row_value,
jsonb_array_elements(row_value -> 'items') AS item_value
WHERE jsonb_extract_path_text(item_value, 'id') ~ '^[a-f0-9-]{36}$'
)
AND mf.deleted_at IS NULL
), '')),
df.organization_id,
df.created_by,
df.created_at,
df.updated_at,
df.deleted_at,
df.screenshot_bucket_key
FROM public.dashboard_files df
WHERE df.created_by IS NOT NULL;--> statement-breakpoint
-- Step 7: Repopulate report_files with enhanced content including metric information
INSERT INTO public.asset_search_v2 (
asset_id, asset_type, title, additional_text, organization_id,
created_by, created_at, updated_at, deleted_at, screenshot_bucket_key
)
SELECT
rf.id,
'report_file'::asset_type_enum,
COALESCE(rf.name, ''),
TRIM(
-- Remove <metric> tags and newlines from content
regexp_replace(
regexp_replace(COALESCE(rf.content, ''), '<metric[^>]*(?:/>|>.*?</metric>)', '', 'g'),
'\n', '', 'g'
) || ' ' ||
COALESCE((
-- Get metric names and descriptions for extracted IDs
SELECT STRING_AGG(
COALESCE(mf.content ->> 'name', mf.name) || E'\n' ||
COALESCE(mf.content ->> 'description', ''),
E'\n\n'
)
FROM public.metric_files mf
WHERE mf.id::text IN (
-- Extract metric IDs from <metric metricId="uuid"> tags (both self-closing and full tags)
SELECT DISTINCT matches[1]
FROM regexp_matches(COALESCE(rf.content, ''), '<metric[^>]*metricId="([a-f0-9-]{36})"[^>]*(?:/>|>.*?</metric>)', 'g') AS matches
WHERE matches[1] IS NOT NULL
)
AND mf.deleted_at IS NULL
), '')
),
rf.organization_id,
rf.created_by,
rf.created_at,
rf.updated_at,
rf.deleted_at,
rf.screenshot_bucket_key
FROM public.report_files rf
WHERE rf.created_by IS NOT NULL;

View File

@ -0,0 +1,239 @@
-- Custom SQL migration file, put your code below! --
-- Drop existing triggers to recreate them with screenshot_bucket_key and created_by support
DROP TRIGGER IF EXISTS sync_chats_text_search ON chats;--> statement-breakpoint
DROP TRIGGER IF EXISTS sync_metric_files_text_search ON metric_files;--> statement-breakpoint
DROP TRIGGER IF EXISTS sync_dashboard_files_text_search ON dashboard_files;--> statement-breakpoint
DROP TRIGGER IF EXISTS sync_report_files_text_search ON report_files;--> statement-breakpoint
-- Drop existing trigger functions to recreate them with screenshot_bucket_key and created_by support
DROP FUNCTION IF EXISTS sync_chats_to_text_search();--> statement-breakpoint
DROP FUNCTION IF EXISTS sync_metric_files_to_text_search();--> statement-breakpoint
DROP FUNCTION IF EXISTS sync_dashboard_files_to_text_search();--> statement-breakpoint
DROP FUNCTION IF EXISTS sync_report_files_to_text_search();--> statement-breakpoint
-- Function for chats table with screenshot_bucket_key and created_by support
CREATE OR REPLACE FUNCTION sync_chats_to_text_search()
RETURNS TRIGGER AS $$
DECLARE
request_messages_text text;
response_messages_text text;
messages_text text;
BEGIN
IF TG_OP = 'INSERT' OR TG_OP = 'UPDATE' THEN
-- Get interleaved request and response messages in chronological order
SELECT string_agg(
CASE
WHEN request_message IS NOT NULL AND request_message != '' THEN
request_message
ELSE ''
END ||
CASE
WHEN response_messages IS NOT NULL AND
(request_message IS NOT NULL AND request_message != '') THEN
E'\n' || COALESCE((
SELECT string_agg(
jsonb_extract_path_text(response_item, 'message'),
E'\n'
)
FROM jsonb_array_elements(response_messages) AS response_item
WHERE jsonb_extract_path_text(response_item, 'type') = 'text'
AND jsonb_extract_path_text(response_item, 'message') IS NOT NULL
AND jsonb_extract_path_text(response_item, 'message') != ''
), '')
WHEN response_messages IS NOT NULL THEN
COALESCE((
SELECT string_agg(
jsonb_extract_path_text(response_item, 'message'),
E'\n'
)
FROM jsonb_array_elements(response_messages) AS response_item
WHERE jsonb_extract_path_text(response_item, 'type') = 'text'
AND jsonb_extract_path_text(response_item, 'message') IS NOT NULL
AND jsonb_extract_path_text(response_item, 'message') != ''
), '')
ELSE ''
END,
E'\n'
ORDER BY created_at
)
INTO messages_text
FROM public.messages
WHERE chat_id = NEW.id
AND deleted_at IS NULL
AND (
(request_message IS NOT NULL AND request_message != '')
OR
(response_messages IS NOT NULL AND jsonb_array_length(response_messages) > 0)
);
INSERT INTO public.asset_search_v2 (
asset_id, asset_type, title, additional_text, organization_id,
created_by, created_at, updated_at, deleted_at, screenshot_bucket_key
)
VALUES (
NEW.id, 'chat', COALESCE(NEW.title, ''), messages_text,
NEW.organization_id, NEW.created_by,
NEW.created_at, NEW.updated_at, NEW.deleted_at, NEW.screenshot_bucket_key
)
ON CONFLICT (asset_id, asset_type) DO UPDATE SET
title = EXCLUDED.title,
additional_text = EXCLUDED.additional_text,
updated_at = EXCLUDED.updated_at,
deleted_at = EXCLUDED.deleted_at,
screenshot_bucket_key = EXCLUDED.screenshot_bucket_key;
ELSIF TG_OP = 'DELETE' THEN
UPDATE public.asset_search_v2
SET deleted_at = NOW()
WHERE asset_id = OLD.id AND asset_type = 'chat';
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-- Function for metric_files table with screenshot_bucket_key and created_by support
CREATE OR REPLACE FUNCTION sync_metric_files_to_text_search()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'INSERT' OR TG_OP = 'UPDATE' THEN
INSERT INTO public.asset_search_v2 (
asset_id, asset_type, title, additional_text, organization_id,
created_by, created_at, updated_at, deleted_at, screenshot_bucket_key
)
VALUES (
NEW.id, 'metric_file', COALESCE(NEW.name, ''),
COALESCE(NEW.content ->> 'description', ''),
NEW.organization_id, NEW.created_by,
NEW.created_at, NEW.updated_at, NEW.deleted_at, NEW.screenshot_bucket_key
)
ON CONFLICT (asset_id, asset_type) DO UPDATE SET
title = EXCLUDED.title,
additional_text = EXCLUDED.additional_text,
updated_at = EXCLUDED.updated_at,
deleted_at = EXCLUDED.deleted_at,
screenshot_bucket_key = EXCLUDED.screenshot_bucket_key;
ELSIF TG_OP = 'DELETE' THEN
UPDATE public.asset_search_v2
SET deleted_at = NOW()
WHERE asset_id = OLD.id AND asset_type = 'metric_file';
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-- Function for dashboard_files table with screenshot_bucket_key and created_by support
CREATE OR REPLACE FUNCTION sync_dashboard_files_to_text_search()
RETURNS TRIGGER AS $$
DECLARE
metric_text TEXT := '';
BEGIN
IF TG_OP = 'INSERT' OR TG_OP = 'UPDATE' THEN
-- Get metric titles and descriptions from IDs in dashboard content JSON
SELECT STRING_AGG(
COALESCE(mf.content ->> 'name', mf.name) || E'\n' ||
COALESCE(mf.content ->> 'description', ''),
E'\n\n'
) INTO metric_text
FROM public.metric_files mf
WHERE mf.id::text IN (
-- Extract metric IDs from dashboard content JSON
SELECT DISTINCT jsonb_extract_path_text(item_value, 'id')
FROM jsonb_array_elements(NEW.content -> 'rows') AS row_value,
jsonb_array_elements(row_value -> 'items') AS item_value
WHERE jsonb_extract_path_text(item_value, 'id') ~ '^[a-f0-9-]{36}$'
)
AND mf.deleted_at IS NULL;
INSERT INTO public.asset_search_v2 (
asset_id, asset_type, title, additional_text, organization_id,
created_by, created_at, updated_at, deleted_at, screenshot_bucket_key
)
VALUES (
NEW.id, 'dashboard_file', COALESCE(NEW.name, ''),
TRIM(COALESCE(NEW.content ->> 'description', '') || ' ' || COALESCE(metric_text, '')),
NEW.organization_id, NEW.created_by,
NEW.created_at, NEW.updated_at, NEW.deleted_at, NEW.screenshot_bucket_key
)
ON CONFLICT (asset_id, asset_type) DO UPDATE SET
title = EXCLUDED.title,
additional_text = EXCLUDED.additional_text,
updated_at = EXCLUDED.updated_at,
deleted_at = EXCLUDED.deleted_at,
screenshot_bucket_key = EXCLUDED.screenshot_bucket_key;
ELSIF TG_OP = 'DELETE' THEN
UPDATE public.asset_search_v2
SET deleted_at = NOW()
WHERE asset_id = OLD.id AND asset_type = 'dashboard_file';
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-- Function for report_files table with screenshot_bucket_key and created_by support
CREATE OR REPLACE FUNCTION sync_report_files_to_text_search()
RETURNS TRIGGER AS $$
DECLARE
cleaned_content text;
metric_text text := '';
BEGIN
IF TG_OP = 'INSERT' OR TG_OP = 'UPDATE' THEN
-- Get metric names and descriptions for extracted IDs
SELECT STRING_AGG(
COALESCE(mf.content ->> 'name', mf.name) || E'\n' ||
COALESCE(mf.content ->> 'description', ''),
E'\n\n'
) INTO metric_text
FROM public.metric_files mf
WHERE mf.id::text IN (
-- Extract metric IDs from <metric metricId="uuid"> tags (both self-closing and full tags)
SELECT DISTINCT matches[1]
FROM regexp_matches(COALESCE(NEW.content, ''), '<metric[^>]*metricId="([a-f0-9-]{36})"[^>]*(?:/>|>.*?</metric>)', 'g') AS matches
WHERE matches[1] IS NOT NULL
)
AND mf.deleted_at IS NULL;
-- Remove <metric> tags and newlines from content
cleaned_content := regexp_replace(COALESCE(NEW.content, ''), '<metric[^>]*(?:/>|>.*?</metric>)', '', 'g');
cleaned_content := regexp_replace(cleaned_content, '\n', '', 'g');
INSERT INTO public.asset_search_v2 (
asset_id, asset_type, title, additional_text, organization_id,
created_by, created_at, updated_at, deleted_at, screenshot_bucket_key
)
VALUES (
NEW.id, 'report_file', COALESCE(NEW.name, ''),
TRIM(cleaned_content || ' ' || COALESCE(metric_text, '')),
NEW.organization_id, NEW.created_by,
NEW.created_at, NEW.updated_at, NEW.deleted_at, NEW.screenshot_bucket_key
)
ON CONFLICT (asset_id, asset_type) DO UPDATE SET
title = EXCLUDED.title,
additional_text = EXCLUDED.additional_text,
updated_at = EXCLUDED.updated_at,
deleted_at = EXCLUDED.deleted_at,
screenshot_bucket_key = EXCLUDED.screenshot_bucket_key;
ELSIF TG_OP = 'DELETE' THEN
UPDATE public.asset_search_v2
SET deleted_at = NOW()
WHERE asset_id = OLD.id AND asset_type = 'report_file';
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-- Recreate triggers for each table
CREATE TRIGGER sync_chats_text_search
AFTER INSERT OR UPDATE OR DELETE ON chats
FOR EACH ROW EXECUTE FUNCTION sync_chats_to_text_search();
CREATE TRIGGER sync_metric_files_text_search
AFTER INSERT OR UPDATE OR DELETE ON metric_files
FOR EACH ROW EXECUTE FUNCTION sync_metric_files_to_text_search();
CREATE TRIGGER sync_dashboard_files_text_search
AFTER INSERT OR UPDATE OR DELETE ON dashboard_files
FOR EACH ROW EXECUTE FUNCTION sync_dashboard_files_to_text_search();
CREATE TRIGGER sync_report_files_text_search
AFTER INSERT OR UPDATE OR DELETE ON report_files
FOR EACH ROW EXECUTE FUNCTION sync_report_files_to_text_search();

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -827,6 +827,20 @@
"when": 1759428123492,
"tag": "0118_violet_vin_gonzales",
"breakpoints": true
},
{
"idx": 119,
"version": "7",
"when": 1759868105747,
"tag": "0119_careful_shriek",
"breakpoints": true
},
{
"idx": 120,
"version": "7",
"when": 1759868882572,
"tag": "0120_aberrant_xavin",
"breakpoints": true
}
]
}

View File

@ -1,7 +1,7 @@
import { and, eq, gte, inArray, isNull, lte, sql } from 'drizzle-orm';
import { z } from 'zod';
import { db } from '../../connection';
import { assetSearchV2 } from '../../schema';
import { assetSearchV2, users } from '../../schema';
import { PaginationInputSchema, type SearchPaginatedResponse } from '../../schema-types';
import { createPermissionedAssetsSubquery } from './access-control-helpers';
@ -112,14 +112,19 @@ export async function searchText(input: SearchTextInput): Promise<SearchTextResp
additionalText: additionalSnippetSql,
updatedAt: assetSearchV2.updatedAt,
screenshotBucketKey: assetSearchV2.screenshotBucketKey,
createdBy: assetSearchV2.createdBy,
createdByName: sql<string>`COALESCE(${users.name}, ${users.email})`,
createdByAvatarUrl: users.avatarUrl,
})
.from(assetSearchV2)
.innerJoin(users, eq(assetSearchV2.createdBy, users.id))
.innerJoin(
permissionedAssetsSubquery,
eq(assetSearchV2.assetId, permissionedAssetsSubquery.assetId)
)
.where(and(...allConditions))
.orderBy(
sql`CASE WHEN ${assetSearchV2.assetType} = 'metric_file' THEN 1 ELSE 0 END`,
sql`pgroonga_score("asset_search_v2".tableoid, "asset_search_v2".ctid) DESC`,
assetSearchV2.updatedAt
)

View File

@ -8,4 +8,7 @@ export const TextSearchResultSchema = z.object({
additionalText: z.string().nullable(),
updatedAt: z.string().datetime(),
screenshotBucketKey: z.string().nullable(),
createdBy: z.string().uuid(),
createdByName: z.string(),
createdByAvatarUrl: z.string().nullable(),
});

View File

@ -1902,6 +1902,7 @@ export const assetSearchV2 = pgTable(
organizationId: uuid('organization_id').notNull(),
title: text('title').notNull(),
additionalText: text('additional_text'),
createdBy: uuid('created_by').notNull(),
createdAt: timestamp('created_at', { withTimezone: true, mode: 'string' })
.defaultNow()
.notNull(),
@ -1917,6 +1918,11 @@ export const assetSearchV2 = pgTable(
foreignColumns: [organizations.id],
name: 'asset_search_v2_organization_id_fkey',
}).onDelete('cascade'),
foreignKey({
columns: [table.createdBy],
foreignColumns: [users.id],
name: 'asset_search_v2_created_by_fkey',
}).onDelete('cascade'),
index('pgroonga_search_title_description_index').using(
'pgroonga',
table.title.asc().nullsLast().op('pgroonga_text_full_text_search_ops_v2'),

View File

@ -51,7 +51,6 @@ export const SearchTextRequestSchema = z
export const SearchTextDataSchema = TextSearchResultSchema.extend({
ancestors: AssetAncestorsSchema.optional(),
screenshotUrl: z.string().optional(),
createdBy: UserInfoResponseSchema.optional(),
});
export type { AssetAncestors } from '@buster/database/schema-types';