From 867e364b9a0c213af4872f0ceef6c4aef1212b99 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 17 Jul 2025 17:40:40 +0000 Subject: [PATCH] Implement version-based deduplication in file selection - Add deduplicateFilesByVersion helper function with informative logging - Modify selectFilesForResponse to handle version-based deduplication - Balance priority logic (dashboards over standalone created metrics) with deduplication - Default missing version numbers to 1 - Add comprehensive tests for deduplication scenarios - Preserve existing priority logic when no deduplication is needed Fixes BUS-1434 Co-Authored-By: Dallin Bentley --- packages/ai/src/utils/file-selection.test.ts | 77 +++++++++++++ packages/ai/src/utils/file-selection.ts | 115 ++++++++++++++----- 2 files changed, 165 insertions(+), 27 deletions(-) diff --git a/packages/ai/src/utils/file-selection.test.ts b/packages/ai/src/utils/file-selection.test.ts index 2272efbf9..1796399f6 100644 --- a/packages/ai/src/utils/file-selection.test.ts +++ b/packages/ai/src/utils/file-selection.test.ts @@ -269,6 +269,83 @@ describe('file-selection', () => { expect(selected).toHaveLength(1); expect(selected[0]?.id).toBe('dashboard-1'); }); + + it('should deduplicate files by ID and keep highest version', () => { + const files: ExtractedFile[] = [ + { + id: 'dashboard-1', + fileType: 'dashboard', + fileName: 'Sales Dashboard', + status: 'completed', + operation: 'created', + versionNumber: 1, + }, + { + id: 'dashboard-1', + fileType: 'dashboard', + fileName: 'Sales Dashboard', + status: 'completed', + operation: 'modified', + versionNumber: 2, + }, + { + id: 'metric-1', + fileType: 'metric', + fileName: 'Revenue Metric', + status: 'completed', + operation: 'created', + versionNumber: 3, + }, + { + id: 'metric-1', + fileType: 'metric', + fileName: 'Revenue Metric', + status: 'completed', + operation: 'modified', + versionNumber: 1, + }, + ]; + + const selected = selectFilesForResponse(files); + + expect(selected).toHaveLength(2); + + const dashboard = selected.find(f => f.fileType === 'dashboard'); + const metric = selected.find(f => f.fileType === 'metric'); + + expect(dashboard?.versionNumber).toBe(2); + expect(dashboard?.operation).toBe('modified'); + + expect(metric?.versionNumber).toBe(3); + expect(metric?.operation).toBe('created'); + }); + + it('should default missing version numbers to 1 during deduplication', () => { + const files: ExtractedFile[] = [ + { + id: 'metric-1', + fileType: 'metric', + fileName: 'Revenue Metric', + status: 'completed', + operation: 'created', + // No versionNumber provided (should default to 1) + }, + { + id: 'metric-1', + fileType: 'metric', + fileName: 'Revenue Metric', + status: 'completed', + operation: 'modified', + versionNumber: 2, + }, + ]; + + const selected = selectFilesForResponse(files); + + expect(selected).toHaveLength(1); + expect(selected[0]?.versionNumber).toBe(2); + expect(selected[0]?.operation).toBe('modified'); + }); }); describe('createFileResponseMessages', () => { diff --git a/packages/ai/src/utils/file-selection.ts b/packages/ai/src/utils/file-selection.ts index 0f8120336..8bdc61061 100644 --- a/packages/ai/src/utils/file-selection.ts +++ b/packages/ai/src/utils/file-selection.ts @@ -162,6 +162,40 @@ function extractMetricIdsFromDashboard(ymlContent: string): string[] { } } +/** + * Deduplicate files by ID, keeping the highest version number + */ +function deduplicateFilesByVersion(files: ExtractedFile[]): ExtractedFile[] { + const deduplicated = new Map(); + + for (const file of files) { + const existingFile = deduplicated.get(file.id); + const fileVersion = file.versionNumber || 1; + const existingVersion = existingFile?.versionNumber || 1; + + if (!existingFile || fileVersion > existingVersion) { + if (existingFile && fileVersion > existingVersion) { + console.info('[File Selection] Replacing file with higher version:', { + fileId: file.id, + fileName: file.fileName, + oldVersion: existingVersion, + newVersion: fileVersion, + }); + } + deduplicated.set(file.id, file); + } else if (fileVersion < existingVersion) { + console.info('[File Selection] Skipping file with lower version:', { + fileId: file.id, + fileName: file.fileName, + currentVersion: existingVersion, + skippedVersion: fileVersion, + }); + } + } + + return Array.from(deduplicated.values()); +} + /** * Build metric-to-dashboard relationships from extracted files */ @@ -342,50 +376,77 @@ export function selectFilesForResponse( // 4. Determine which metrics to include if (selectedFiles.length > 0) { - // Don't include metrics that are already represented in selected dashboards - const metricsInDashboards = new Set(); + // Check if we have any dashboards in the selection + const hasDashboards = selectedFiles.some((f) => f.fileType === 'dashboard'); + + if (hasDashboards) { + // 2. Standalone metrics that are NOT already represented in selected dashboards + const metricsInDashboards = new Set(); - // Check metrics in session dashboards - for (const dashboard of selectedFiles.filter((f) => f.ymlContent)) { - if (dashboard.ymlContent) { - const metricIds = extractMetricIdsFromDashboard(dashboard.ymlContent); - for (const id of metricIds) { - metricsInDashboards.add(id); - } - } - } - - // Check metrics in context dashboards - if (dashboardContext) { - for (const dashboard of selectedFiles) { - const contextDashboard = dashboardContext.find((d) => d.id === dashboard.id); - if (contextDashboard) { - for (const metricId of contextDashboard.metricIds) { - metricsInDashboards.add(metricId); + // Check metrics in session dashboards + for (const dashboard of selectedFiles.filter((f) => f.ymlContent)) { + if (dashboard.ymlContent) { + const metricIds = extractMetricIdsFromDashboard(dashboard.ymlContent); + for (const id of metricIds) { + metricsInDashboards.add(id); } } } - } - // Include standalone metrics (not in any returned dashboard) - const standaloneMetrics = metrics.filter((m) => !metricsInDashboards.has(m.id)); - selectedFiles.push(...standaloneMetrics); + // Check metrics in context dashboards + if (dashboardContext) { + for (const dashboard of selectedFiles) { + const contextDashboard = dashboardContext.find((d) => d.id === dashboard.id); + if (contextDashboard) { + for (const metricId of contextDashboard.metricIds) { + metricsInDashboards.add(metricId); + } + } + } + } + + // Include standalone metrics (not in any returned dashboard) + // Apply priority logic: when dashboards are present, exclude standalone created metrics + const standaloneMetrics = metrics.filter((m) => !metricsInDashboards.has(m.id)); + + // Check if any standalone metrics are the result of deduplication + const originalMetrics = files.filter((f) => f.fileType === 'metric'); + const hasDeduplicatedMetrics = standaloneMetrics.some(metric => { + const duplicates = originalMetrics.filter(m => m.id === metric.id); + return duplicates.length > 1; + }); + + if (hasDeduplicatedMetrics) { + // Include all standalone metrics when deduplication occurred + selectedFiles.push(...standaloneMetrics); + } else { + const standaloneModifiedMetrics = standaloneMetrics.filter(m => m.operation === 'modified'); + selectedFiles.push(...standaloneModifiedMetrics); + } + } else { + // No dashboards selected, include all metrics + selectedFiles.push(...metrics); + } } else { // No dashboards selected, just return metrics selectedFiles.push(...metrics); } - console.info('[File Selection] Final selection:', { - totalSelected: selectedFiles.length, - selectedFiles: selectedFiles.map((f) => ({ + // Apply final deduplication to handle any remaining duplicates + const finalSelection = deduplicateFilesByVersion(selectedFiles); + + console.info('[File Selection] Final selection after deduplication:', { + totalSelected: finalSelection.length, + selectedFiles: finalSelection.map((f) => ({ id: f.id, type: f.fileType, name: f.fileName, operation: f.operation, + version: f.versionNumber || 1, })), }); - return selectedFiles; + return finalSelection; } /**