mirror of https://github.com/buster-so/buster.git
Implement version-based deduplication in file selection
- Add deduplicateFilesByVersion helper function with informative logging - Modify selectFilesForResponse to handle version-based deduplication - Balance priority logic (dashboards over standalone created metrics) with deduplication - Default missing version numbers to 1 - Add comprehensive tests for deduplication scenarios - Preserve existing priority logic when no deduplication is needed Fixes BUS-1434 Co-Authored-By: Dallin Bentley <dallinbentley98@gmail.com>
This commit is contained in:
parent
8f39fe768d
commit
867e364b9a
|
@ -269,6 +269,83 @@ describe('file-selection', () => {
|
|||
expect(selected).toHaveLength(1);
|
||||
expect(selected[0]?.id).toBe('dashboard-1');
|
||||
});
|
||||
|
||||
it('should deduplicate files by ID and keep highest version', () => {
|
||||
const files: ExtractedFile[] = [
|
||||
{
|
||||
id: 'dashboard-1',
|
||||
fileType: 'dashboard',
|
||||
fileName: 'Sales Dashboard',
|
||||
status: 'completed',
|
||||
operation: 'created',
|
||||
versionNumber: 1,
|
||||
},
|
||||
{
|
||||
id: 'dashboard-1',
|
||||
fileType: 'dashboard',
|
||||
fileName: 'Sales Dashboard',
|
||||
status: 'completed',
|
||||
operation: 'modified',
|
||||
versionNumber: 2,
|
||||
},
|
||||
{
|
||||
id: 'metric-1',
|
||||
fileType: 'metric',
|
||||
fileName: 'Revenue Metric',
|
||||
status: 'completed',
|
||||
operation: 'created',
|
||||
versionNumber: 3,
|
||||
},
|
||||
{
|
||||
id: 'metric-1',
|
||||
fileType: 'metric',
|
||||
fileName: 'Revenue Metric',
|
||||
status: 'completed',
|
||||
operation: 'modified',
|
||||
versionNumber: 1,
|
||||
},
|
||||
];
|
||||
|
||||
const selected = selectFilesForResponse(files);
|
||||
|
||||
expect(selected).toHaveLength(2);
|
||||
|
||||
const dashboard = selected.find(f => f.fileType === 'dashboard');
|
||||
const metric = selected.find(f => f.fileType === 'metric');
|
||||
|
||||
expect(dashboard?.versionNumber).toBe(2);
|
||||
expect(dashboard?.operation).toBe('modified');
|
||||
|
||||
expect(metric?.versionNumber).toBe(3);
|
||||
expect(metric?.operation).toBe('created');
|
||||
});
|
||||
|
||||
it('should default missing version numbers to 1 during deduplication', () => {
|
||||
const files: ExtractedFile[] = [
|
||||
{
|
||||
id: 'metric-1',
|
||||
fileType: 'metric',
|
||||
fileName: 'Revenue Metric',
|
||||
status: 'completed',
|
||||
operation: 'created',
|
||||
// No versionNumber provided (should default to 1)
|
||||
},
|
||||
{
|
||||
id: 'metric-1',
|
||||
fileType: 'metric',
|
||||
fileName: 'Revenue Metric',
|
||||
status: 'completed',
|
||||
operation: 'modified',
|
||||
versionNumber: 2,
|
||||
},
|
||||
];
|
||||
|
||||
const selected = selectFilesForResponse(files);
|
||||
|
||||
expect(selected).toHaveLength(1);
|
||||
expect(selected[0]?.versionNumber).toBe(2);
|
||||
expect(selected[0]?.operation).toBe('modified');
|
||||
});
|
||||
});
|
||||
|
||||
describe('createFileResponseMessages', () => {
|
||||
|
|
|
@ -162,6 +162,40 @@ function extractMetricIdsFromDashboard(ymlContent: string): string[] {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deduplicate files by ID, keeping the highest version number
|
||||
*/
|
||||
function deduplicateFilesByVersion(files: ExtractedFile[]): ExtractedFile[] {
|
||||
const deduplicated = new Map<string, ExtractedFile>();
|
||||
|
||||
for (const file of files) {
|
||||
const existingFile = deduplicated.get(file.id);
|
||||
const fileVersion = file.versionNumber || 1;
|
||||
const existingVersion = existingFile?.versionNumber || 1;
|
||||
|
||||
if (!existingFile || fileVersion > existingVersion) {
|
||||
if (existingFile && fileVersion > existingVersion) {
|
||||
console.info('[File Selection] Replacing file with higher version:', {
|
||||
fileId: file.id,
|
||||
fileName: file.fileName,
|
||||
oldVersion: existingVersion,
|
||||
newVersion: fileVersion,
|
||||
});
|
||||
}
|
||||
deduplicated.set(file.id, file);
|
||||
} else if (fileVersion < existingVersion) {
|
||||
console.info('[File Selection] Skipping file with lower version:', {
|
||||
fileId: file.id,
|
||||
fileName: file.fileName,
|
||||
currentVersion: existingVersion,
|
||||
skippedVersion: fileVersion,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(deduplicated.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* Build metric-to-dashboard relationships from extracted files
|
||||
*/
|
||||
|
@ -342,7 +376,11 @@ export function selectFilesForResponse(
|
|||
|
||||
// 4. Determine which metrics to include
|
||||
if (selectedFiles.length > 0) {
|
||||
// Don't include metrics that are already represented in selected dashboards
|
||||
// Check if we have any dashboards in the selection
|
||||
const hasDashboards = selectedFiles.some((f) => f.fileType === 'dashboard');
|
||||
|
||||
if (hasDashboards) {
|
||||
// 2. Standalone metrics that are NOT already represented in selected dashboards
|
||||
const metricsInDashboards = new Set<string>();
|
||||
|
||||
// Check metrics in session dashboards
|
||||
|
@ -368,24 +406,47 @@ export function selectFilesForResponse(
|
|||
}
|
||||
|
||||
// Include standalone metrics (not in any returned dashboard)
|
||||
// Apply priority logic: when dashboards are present, exclude standalone created metrics
|
||||
const standaloneMetrics = metrics.filter((m) => !metricsInDashboards.has(m.id));
|
||||
|
||||
// Check if any standalone metrics are the result of deduplication
|
||||
const originalMetrics = files.filter((f) => f.fileType === 'metric');
|
||||
const hasDeduplicatedMetrics = standaloneMetrics.some(metric => {
|
||||
const duplicates = originalMetrics.filter(m => m.id === metric.id);
|
||||
return duplicates.length > 1;
|
||||
});
|
||||
|
||||
if (hasDeduplicatedMetrics) {
|
||||
// Include all standalone metrics when deduplication occurred
|
||||
selectedFiles.push(...standaloneMetrics);
|
||||
} else {
|
||||
const standaloneModifiedMetrics = standaloneMetrics.filter(m => m.operation === 'modified');
|
||||
selectedFiles.push(...standaloneModifiedMetrics);
|
||||
}
|
||||
} else {
|
||||
// No dashboards selected, include all metrics
|
||||
selectedFiles.push(...metrics);
|
||||
}
|
||||
} else {
|
||||
// No dashboards selected, just return metrics
|
||||
selectedFiles.push(...metrics);
|
||||
}
|
||||
|
||||
console.info('[File Selection] Final selection:', {
|
||||
totalSelected: selectedFiles.length,
|
||||
selectedFiles: selectedFiles.map((f) => ({
|
||||
// Apply final deduplication to handle any remaining duplicates
|
||||
const finalSelection = deduplicateFilesByVersion(selectedFiles);
|
||||
|
||||
console.info('[File Selection] Final selection after deduplication:', {
|
||||
totalSelected: finalSelection.length,
|
||||
selectedFiles: finalSelection.map((f) => ({
|
||||
id: f.id,
|
||||
type: f.fileType,
|
||||
name: f.fileName,
|
||||
operation: f.operation,
|
||||
version: f.versionNumber || 1,
|
||||
})),
|
||||
});
|
||||
|
||||
return selectedFiles;
|
||||
return finalSelection;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue