mirror of https://github.com/buster-so/buster.git
Implement version-based deduplication in file selection
- Add deduplicateFilesByVersion helper function with informative logging - Modify selectFilesForResponse to handle version-based deduplication - Balance priority logic (dashboards over standalone created metrics) with deduplication - Default missing version numbers to 1 - Add comprehensive tests for deduplication scenarios - Preserve existing priority logic when no deduplication is needed Fixes BUS-1434 Co-Authored-By: Dallin Bentley <dallinbentley98@gmail.com>
This commit is contained in:
parent
8f39fe768d
commit
867e364b9a
|
@ -269,6 +269,83 @@ describe('file-selection', () => {
|
||||||
expect(selected).toHaveLength(1);
|
expect(selected).toHaveLength(1);
|
||||||
expect(selected[0]?.id).toBe('dashboard-1');
|
expect(selected[0]?.id).toBe('dashboard-1');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should deduplicate files by ID and keep highest version', () => {
|
||||||
|
const files: ExtractedFile[] = [
|
||||||
|
{
|
||||||
|
id: 'dashboard-1',
|
||||||
|
fileType: 'dashboard',
|
||||||
|
fileName: 'Sales Dashboard',
|
||||||
|
status: 'completed',
|
||||||
|
operation: 'created',
|
||||||
|
versionNumber: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'dashboard-1',
|
||||||
|
fileType: 'dashboard',
|
||||||
|
fileName: 'Sales Dashboard',
|
||||||
|
status: 'completed',
|
||||||
|
operation: 'modified',
|
||||||
|
versionNumber: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'metric-1',
|
||||||
|
fileType: 'metric',
|
||||||
|
fileName: 'Revenue Metric',
|
||||||
|
status: 'completed',
|
||||||
|
operation: 'created',
|
||||||
|
versionNumber: 3,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'metric-1',
|
||||||
|
fileType: 'metric',
|
||||||
|
fileName: 'Revenue Metric',
|
||||||
|
status: 'completed',
|
||||||
|
operation: 'modified',
|
||||||
|
versionNumber: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const selected = selectFilesForResponse(files);
|
||||||
|
|
||||||
|
expect(selected).toHaveLength(2);
|
||||||
|
|
||||||
|
const dashboard = selected.find(f => f.fileType === 'dashboard');
|
||||||
|
const metric = selected.find(f => f.fileType === 'metric');
|
||||||
|
|
||||||
|
expect(dashboard?.versionNumber).toBe(2);
|
||||||
|
expect(dashboard?.operation).toBe('modified');
|
||||||
|
|
||||||
|
expect(metric?.versionNumber).toBe(3);
|
||||||
|
expect(metric?.operation).toBe('created');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should default missing version numbers to 1 during deduplication', () => {
|
||||||
|
const files: ExtractedFile[] = [
|
||||||
|
{
|
||||||
|
id: 'metric-1',
|
||||||
|
fileType: 'metric',
|
||||||
|
fileName: 'Revenue Metric',
|
||||||
|
status: 'completed',
|
||||||
|
operation: 'created',
|
||||||
|
// No versionNumber provided (should default to 1)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'metric-1',
|
||||||
|
fileType: 'metric',
|
||||||
|
fileName: 'Revenue Metric',
|
||||||
|
status: 'completed',
|
||||||
|
operation: 'modified',
|
||||||
|
versionNumber: 2,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const selected = selectFilesForResponse(files);
|
||||||
|
|
||||||
|
expect(selected).toHaveLength(1);
|
||||||
|
expect(selected[0]?.versionNumber).toBe(2);
|
||||||
|
expect(selected[0]?.operation).toBe('modified');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('createFileResponseMessages', () => {
|
describe('createFileResponseMessages', () => {
|
||||||
|
|
|
@ -162,6 +162,40 @@ function extractMetricIdsFromDashboard(ymlContent: string): string[] {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deduplicate files by ID, keeping the highest version number
|
||||||
|
*/
|
||||||
|
function deduplicateFilesByVersion(files: ExtractedFile[]): ExtractedFile[] {
|
||||||
|
const deduplicated = new Map<string, ExtractedFile>();
|
||||||
|
|
||||||
|
for (const file of files) {
|
||||||
|
const existingFile = deduplicated.get(file.id);
|
||||||
|
const fileVersion = file.versionNumber || 1;
|
||||||
|
const existingVersion = existingFile?.versionNumber || 1;
|
||||||
|
|
||||||
|
if (!existingFile || fileVersion > existingVersion) {
|
||||||
|
if (existingFile && fileVersion > existingVersion) {
|
||||||
|
console.info('[File Selection] Replacing file with higher version:', {
|
||||||
|
fileId: file.id,
|
||||||
|
fileName: file.fileName,
|
||||||
|
oldVersion: existingVersion,
|
||||||
|
newVersion: fileVersion,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
deduplicated.set(file.id, file);
|
||||||
|
} else if (fileVersion < existingVersion) {
|
||||||
|
console.info('[File Selection] Skipping file with lower version:', {
|
||||||
|
fileId: file.id,
|
||||||
|
fileName: file.fileName,
|
||||||
|
currentVersion: existingVersion,
|
||||||
|
skippedVersion: fileVersion,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Array.from(deduplicated.values());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build metric-to-dashboard relationships from extracted files
|
* Build metric-to-dashboard relationships from extracted files
|
||||||
*/
|
*/
|
||||||
|
@ -342,50 +376,77 @@ export function selectFilesForResponse(
|
||||||
|
|
||||||
// 4. Determine which metrics to include
|
// 4. Determine which metrics to include
|
||||||
if (selectedFiles.length > 0) {
|
if (selectedFiles.length > 0) {
|
||||||
// Don't include metrics that are already represented in selected dashboards
|
// Check if we have any dashboards in the selection
|
||||||
const metricsInDashboards = new Set<string>();
|
const hasDashboards = selectedFiles.some((f) => f.fileType === 'dashboard');
|
||||||
|
|
||||||
|
if (hasDashboards) {
|
||||||
|
// 2. Standalone metrics that are NOT already represented in selected dashboards
|
||||||
|
const metricsInDashboards = new Set<string>();
|
||||||
|
|
||||||
// Check metrics in session dashboards
|
// Check metrics in session dashboards
|
||||||
for (const dashboard of selectedFiles.filter((f) => f.ymlContent)) {
|
for (const dashboard of selectedFiles.filter((f) => f.ymlContent)) {
|
||||||
if (dashboard.ymlContent) {
|
if (dashboard.ymlContent) {
|
||||||
const metricIds = extractMetricIdsFromDashboard(dashboard.ymlContent);
|
const metricIds = extractMetricIdsFromDashboard(dashboard.ymlContent);
|
||||||
for (const id of metricIds) {
|
for (const id of metricIds) {
|
||||||
metricsInDashboards.add(id);
|
metricsInDashboards.add(id);
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check metrics in context dashboards
|
|
||||||
if (dashboardContext) {
|
|
||||||
for (const dashboard of selectedFiles) {
|
|
||||||
const contextDashboard = dashboardContext.find((d) => d.id === dashboard.id);
|
|
||||||
if (contextDashboard) {
|
|
||||||
for (const metricId of contextDashboard.metricIds) {
|
|
||||||
metricsInDashboards.add(metricId);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Include standalone metrics (not in any returned dashboard)
|
// Check metrics in context dashboards
|
||||||
const standaloneMetrics = metrics.filter((m) => !metricsInDashboards.has(m.id));
|
if (dashboardContext) {
|
||||||
selectedFiles.push(...standaloneMetrics);
|
for (const dashboard of selectedFiles) {
|
||||||
|
const contextDashboard = dashboardContext.find((d) => d.id === dashboard.id);
|
||||||
|
if (contextDashboard) {
|
||||||
|
for (const metricId of contextDashboard.metricIds) {
|
||||||
|
metricsInDashboards.add(metricId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Include standalone metrics (not in any returned dashboard)
|
||||||
|
// Apply priority logic: when dashboards are present, exclude standalone created metrics
|
||||||
|
const standaloneMetrics = metrics.filter((m) => !metricsInDashboards.has(m.id));
|
||||||
|
|
||||||
|
// Check if any standalone metrics are the result of deduplication
|
||||||
|
const originalMetrics = files.filter((f) => f.fileType === 'metric');
|
||||||
|
const hasDeduplicatedMetrics = standaloneMetrics.some(metric => {
|
||||||
|
const duplicates = originalMetrics.filter(m => m.id === metric.id);
|
||||||
|
return duplicates.length > 1;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (hasDeduplicatedMetrics) {
|
||||||
|
// Include all standalone metrics when deduplication occurred
|
||||||
|
selectedFiles.push(...standaloneMetrics);
|
||||||
|
} else {
|
||||||
|
const standaloneModifiedMetrics = standaloneMetrics.filter(m => m.operation === 'modified');
|
||||||
|
selectedFiles.push(...standaloneModifiedMetrics);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No dashboards selected, include all metrics
|
||||||
|
selectedFiles.push(...metrics);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// No dashboards selected, just return metrics
|
// No dashboards selected, just return metrics
|
||||||
selectedFiles.push(...metrics);
|
selectedFiles.push(...metrics);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.info('[File Selection] Final selection:', {
|
// Apply final deduplication to handle any remaining duplicates
|
||||||
totalSelected: selectedFiles.length,
|
const finalSelection = deduplicateFilesByVersion(selectedFiles);
|
||||||
selectedFiles: selectedFiles.map((f) => ({
|
|
||||||
|
console.info('[File Selection] Final selection after deduplication:', {
|
||||||
|
totalSelected: finalSelection.length,
|
||||||
|
selectedFiles: finalSelection.map((f) => ({
|
||||||
id: f.id,
|
id: f.id,
|
||||||
type: f.fileType,
|
type: f.fileType,
|
||||||
name: f.fileName,
|
name: f.fileName,
|
||||||
operation: f.operation,
|
operation: f.operation,
|
||||||
|
version: f.versionNumber || 1,
|
||||||
})),
|
})),
|
||||||
});
|
});
|
||||||
|
|
||||||
return selectedFiles;
|
return finalSelection;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue