Implement version-based deduplication in file selection

- Add deduplicateFilesByVersion helper function with informative logging
- Modify selectFilesForResponse to handle version-based deduplication
- Balance priority logic (dashboards over standalone created metrics) with deduplication
- Default missing version numbers to 1
- Add comprehensive tests for deduplication scenarios
- Preserve existing priority logic when no deduplication is needed

Fixes BUS-1434

Co-Authored-By: Dallin Bentley <dallinbentley98@gmail.com>
This commit is contained in:
Devin AI 2025-07-17 17:40:40 +00:00
parent 8f39fe768d
commit 867e364b9a
2 changed files with 165 additions and 27 deletions

View File

@ -269,6 +269,83 @@ describe('file-selection', () => {
expect(selected).toHaveLength(1);
expect(selected[0]?.id).toBe('dashboard-1');
});
it('should deduplicate files by ID and keep highest version', () => {
const files: ExtractedFile[] = [
{
id: 'dashboard-1',
fileType: 'dashboard',
fileName: 'Sales Dashboard',
status: 'completed',
operation: 'created',
versionNumber: 1,
},
{
id: 'dashboard-1',
fileType: 'dashboard',
fileName: 'Sales Dashboard',
status: 'completed',
operation: 'modified',
versionNumber: 2,
},
{
id: 'metric-1',
fileType: 'metric',
fileName: 'Revenue Metric',
status: 'completed',
operation: 'created',
versionNumber: 3,
},
{
id: 'metric-1',
fileType: 'metric',
fileName: 'Revenue Metric',
status: 'completed',
operation: 'modified',
versionNumber: 1,
},
];
const selected = selectFilesForResponse(files);
expect(selected).toHaveLength(2);
const dashboard = selected.find(f => f.fileType === 'dashboard');
const metric = selected.find(f => f.fileType === 'metric');
expect(dashboard?.versionNumber).toBe(2);
expect(dashboard?.operation).toBe('modified');
expect(metric?.versionNumber).toBe(3);
expect(metric?.operation).toBe('created');
});
it('should default missing version numbers to 1 during deduplication', () => {
const files: ExtractedFile[] = [
{
id: 'metric-1',
fileType: 'metric',
fileName: 'Revenue Metric',
status: 'completed',
operation: 'created',
// No versionNumber provided (should default to 1)
},
{
id: 'metric-1',
fileType: 'metric',
fileName: 'Revenue Metric',
status: 'completed',
operation: 'modified',
versionNumber: 2,
},
];
const selected = selectFilesForResponse(files);
expect(selected).toHaveLength(1);
expect(selected[0]?.versionNumber).toBe(2);
expect(selected[0]?.operation).toBe('modified');
});
});
describe('createFileResponseMessages', () => {

View File

@ -162,6 +162,40 @@ function extractMetricIdsFromDashboard(ymlContent: string): string[] {
}
}
/**
* Deduplicate files by ID, keeping the highest version number
*/
function deduplicateFilesByVersion(files: ExtractedFile[]): ExtractedFile[] {
const deduplicated = new Map<string, ExtractedFile>();
for (const file of files) {
const existingFile = deduplicated.get(file.id);
const fileVersion = file.versionNumber || 1;
const existingVersion = existingFile?.versionNumber || 1;
if (!existingFile || fileVersion > existingVersion) {
if (existingFile && fileVersion > existingVersion) {
console.info('[File Selection] Replacing file with higher version:', {
fileId: file.id,
fileName: file.fileName,
oldVersion: existingVersion,
newVersion: fileVersion,
});
}
deduplicated.set(file.id, file);
} else if (fileVersion < existingVersion) {
console.info('[File Selection] Skipping file with lower version:', {
fileId: file.id,
fileName: file.fileName,
currentVersion: existingVersion,
skippedVersion: fileVersion,
});
}
}
return Array.from(deduplicated.values());
}
/**
* Build metric-to-dashboard relationships from extracted files
*/
@ -342,7 +376,11 @@ export function selectFilesForResponse(
// 4. Determine which metrics to include
if (selectedFiles.length > 0) {
// Don't include metrics that are already represented in selected dashboards
// Check if we have any dashboards in the selection
const hasDashboards = selectedFiles.some((f) => f.fileType === 'dashboard');
if (hasDashboards) {
// 2. Standalone metrics that are NOT already represented in selected dashboards
const metricsInDashboards = new Set<string>();
// Check metrics in session dashboards
@ -368,24 +406,47 @@ export function selectFilesForResponse(
}
// Include standalone metrics (not in any returned dashboard)
// Apply priority logic: when dashboards are present, exclude standalone created metrics
const standaloneMetrics = metrics.filter((m) => !metricsInDashboards.has(m.id));
// Check if any standalone metrics are the result of deduplication
const originalMetrics = files.filter((f) => f.fileType === 'metric');
const hasDeduplicatedMetrics = standaloneMetrics.some(metric => {
const duplicates = originalMetrics.filter(m => m.id === metric.id);
return duplicates.length > 1;
});
if (hasDeduplicatedMetrics) {
// Include all standalone metrics when deduplication occurred
selectedFiles.push(...standaloneMetrics);
} else {
const standaloneModifiedMetrics = standaloneMetrics.filter(m => m.operation === 'modified');
selectedFiles.push(...standaloneModifiedMetrics);
}
} else {
// No dashboards selected, include all metrics
selectedFiles.push(...metrics);
}
} else {
// No dashboards selected, just return metrics
selectedFiles.push(...metrics);
}
console.info('[File Selection] Final selection:', {
totalSelected: selectedFiles.length,
selectedFiles: selectedFiles.map((f) => ({
// Apply final deduplication to handle any remaining duplicates
const finalSelection = deduplicateFilesByVersion(selectedFiles);
console.info('[File Selection] Final selection after deduplication:', {
totalSelected: finalSelection.length,
selectedFiles: finalSelection.map((f) => ({
id: f.id,
type: f.fileType,
name: f.fileName,
operation: f.operation,
version: f.versionNumber || 1,
})),
});
return selectedFiles;
return finalSelection;
}
/**