buster/apps/api/libs/sql_analyzer/tests/semantic_substitution_tests.rs

694 lines
23 KiB
Rust

use sql_analyzer::{
substitute_semantic_query, validate_and_substitute_semantic_query, Filter, Metric, Parameter,
ParameterType, SemanticLayer, SqlAnalyzerError, ValidationMode
};
use anyhow::Result;
fn create_test_semantic_layer() -> SemanticLayer {
let mut semantic_layer = SemanticLayer::new();
// Add tables
semantic_layer.add_table("users", vec!["id", "name", "email", "created_at", "status"]);
semantic_layer.add_table("orders", vec!["id", "user_id", "amount", "created_at", "status", "product_id"]);
semantic_layer.add_table("products", vec!["id", "name", "price"]);
semantic_layer.add_table("order_items", vec!["id", "order_id", "product_id", "quantity"]);
// Add relationships
semantic_layer.add_relationship(sql_analyzer::Relationship {
from_table: "users".to_string(),
from_column: "id".to_string(),
to_table: "orders".to_string(),
to_column: "user_id".to_string(),
});
semantic_layer.add_relationship(sql_analyzer::Relationship {
from_table: "orders".to_string(),
from_column: "id".to_string(),
to_table: "order_items".to_string(),
to_column: "order_id".to_string(),
});
semantic_layer.add_relationship(sql_analyzer::Relationship {
from_table: "products".to_string(),
from_column: "id".to_string(),
to_table: "order_items".to_string(),
to_column: "product_id".to_string(),
});
// Add metrics
semantic_layer.add_metric(Metric {
name: "metric_TotalOrders".to_string(),
table: "orders".to_string(),
expression: "COUNT(orders.id)".to_string(),
parameters: vec![],
description: Some("Total number of orders".to_string()),
});
semantic_layer.add_metric(Metric {
name: "metric_TotalSpending".to_string(),
table: "orders".to_string(),
expression: "SUM(orders.amount)".to_string(),
parameters: vec![],
description: Some("Total spending across all orders".to_string()),
});
semantic_layer.add_metric(Metric {
name: "metric_OrdersLastNDays".to_string(),
table: "orders".to_string(),
expression: "COUNT(CASE WHEN orders.created_at >= CURRENT_DATE - INTERVAL '{{n}}' DAY THEN orders.id END)".to_string(),
parameters: vec![
Parameter {
name: "n".to_string(),
param_type: ParameterType::Number,
default: Some("30".to_string()),
},
],
description: Some("Orders in the last N days".to_string()),
});
semantic_layer.add_metric(Metric {
name: "metric_AverageOrderValue".to_string(),
table: "orders".to_string(),
expression: "SUM(orders.amount) / NULLIF(COUNT(orders.id), 0)".to_string(),
parameters: vec![],
description: Some("Average order value".to_string()),
});
// Add recursive metric that uses another metric
semantic_layer.add_metric(Metric {
name: "metric_RecursiveMetric".to_string(),
table: "orders".to_string(),
expression: "metric_TotalOrders / 2".to_string(),
parameters: vec![],
description: Some("Half of total orders".to_string()),
});
// Add filters
semantic_layer.add_filter(Filter {
name: "filter_IsRecentOrder".to_string(),
table: "orders".to_string(),
expression: "orders.created_at >= CURRENT_DATE - INTERVAL '30' DAY".to_string(),
parameters: vec![],
description: Some("Orders from the last 30 days".to_string()),
});
semantic_layer.add_filter(Filter {
name: "filter_OrderAmountGt".to_string(),
table: "orders".to_string(),
expression: "orders.amount > {{amount}}".to_string(),
parameters: vec![
Parameter {
name: "amount".to_string(),
param_type: ParameterType::Number,
default: Some("100".to_string()),
},
],
description: Some("Orders with amount greater than a threshold".to_string()),
});
semantic_layer.add_filter(Filter {
name: "filter_IsActiveUser".to_string(),
table: "users".to_string(),
expression: "users.status = 'active'".to_string(),
parameters: vec![],
description: Some("Active users".to_string()),
});
semantic_layer
}
#[tokio::test]
async fn test_simple_metric_substitution() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = "SELECT user_id, metric_TotalOrders FROM orders GROUP BY user_id";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
assert!(result.contains("(COUNT(orders.id))"),
"Simple metric should be substituted correctly");
Ok(())
}
#[tokio::test]
async fn test_parameterized_metric_substitution() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = "SELECT user_id, metric_OrdersLastNDays(60) FROM orders";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
assert!(result.contains("INTERVAL '60' DAY"),
"Parameterized metric should substitute parameters correctly");
Ok(())
}
#[tokio::test]
async fn test_default_parameter_values() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = "SELECT user_id, metric_OrdersLastNDays() FROM orders";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
assert!(result.contains("INTERVAL '30' DAY"),
"Should use default parameter value");
Ok(())
}
#[tokio::test]
async fn test_recursive_metric_substitution() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = "SELECT user_id, metric_RecursiveMetric FROM orders";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
assert!(result.contains("((COUNT(orders.id))) / 2"),
"Recursive metric should be fully substituted");
Ok(())
}
#[tokio::test]
async fn test_filter_substitution() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = "SELECT * FROM users WHERE filter_IsActiveUser";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
assert!(result.contains("WHERE (users.status = 'active')"),
"Filter should be substituted correctly");
Ok(())
}
#[tokio::test]
async fn test_parameterized_filter_substitution() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = "SELECT * FROM orders WHERE filter_OrderAmountGt(200)";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
assert!(result.contains("WHERE (orders.amount > 200)"),
"Parameterized filter should substitute parameter correctly");
Ok(())
}
#[tokio::test]
async fn test_compound_expressions_with_metrics() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = "SELECT user_id, metric_TotalOrders + metric_RecursiveMetric AS combined FROM orders";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
assert!(result.contains("(COUNT(orders.id)) + (((COUNT(orders.id))) / 2)"),
"Compound expressions with metrics should be substituted correctly");
Ok(())
}
#[tokio::test]
async fn test_missing_required_parameter() -> Result<()> {
let mut semantic_layer = create_test_semantic_layer();
// Add a metric with a required parameter (no default)
semantic_layer.add_metric(Metric {
name: "metric_RequiredParam".to_string(),
table: "orders".to_string(),
expression: "COUNT(CASE WHEN orders.created_at > '{{cutoff_date}}' THEN 1 END)".to_string(),
parameters: vec![
Parameter {
name: "cutoff_date".to_string(),
param_type: ParameterType::Date,
default: None, // No default - required parameter
},
],
description: Some("Metric with required parameter".to_string()),
});
let sql = "SELECT metric_RequiredParam() FROM orders";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await;
assert!(result.is_err(), "Should return error for missing required parameter");
if let Err(SqlAnalyzerError::MissingParameter(msg)) = result {
assert!(msg.contains("Missing required parameter"),
"Error should mention missing parameter");
} else {
panic!("Expected MissingParameter error but got: {:?}", result);
}
Ok(())
}
#[tokio::test]
async fn test_circular_reference_detection() -> Result<()> {
let mut semantic_layer = create_test_semantic_layer();
// Create circular reference: A -> B -> A
semantic_layer.add_metric(Metric {
name: "metric_CircularA".to_string(),
table: "orders".to_string(),
expression: "metric_CircularB + 10".to_string(),
parameters: vec![],
description: Some("Part of circular reference".to_string()),
});
semantic_layer.add_metric(Metric {
name: "metric_CircularB".to_string(),
table: "orders".to_string(),
expression: "metric_CircularA * 2".to_string(),
parameters: vec![],
description: Some("Part of circular reference".to_string()),
});
let sql = "SELECT metric_CircularA FROM orders";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await;
assert!(result.is_err(), "Should detect circular reference");
if let Err(SqlAnalyzerError::SubstitutionError(msg)) = result {
assert!(msg.contains("Circular reference"),
"Error should mention circular reference");
} else {
panic!("Expected SubstitutionError for circular reference but got: {:?}", result);
}
Ok(())
}
#[tokio::test]
async fn test_validate_and_substitute() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = "SELECT u.id, u.name, metric_TotalOrders FROM users u JOIN orders o ON u.id = o.user_id";
let result = validate_and_substitute_semantic_query(
sql.to_string(),
semantic_layer,
ValidationMode::Flexible,
).await?;
assert!(result.contains("(COUNT(orders.id))"), "Metric should be substituted after validation");
Ok(())
}
#[tokio::test]
async fn test_complex_query_with_metrics_and_filters() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = r#"
WITH revenue_data AS (
SELECT
user_id,
metric_TotalSpending AS total_revenue,
metric_OrdersLastNDays(90) AS q_orders
FROM orders
WHERE filter_OrderAmountGt(50)
GROUP BY user_id
)
SELECT
u.name,
rd.total_revenue,
rd.q_orders,
rd.total_revenue / NULLIF(rd.q_orders, 0) AS avg_order
FROM users u
JOIN revenue_data rd ON u.id = rd.user_id
WHERE filter_IsActiveUser
"#;
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
// Check various substitutions in different parts of the query
assert!(result.contains("(SUM(orders.amount)) AS total_revenue"),
"Should substitute metric in CTE correctly");
assert!(result.contains("INTERVAL '90' DAY"),
"Should substitute parameterized metric with parameter");
assert!(result.contains("WHERE (orders.amount > 50)"),
"Should substitute parameterized filter with parameter");
assert!(result.contains("WHERE (users.status = 'active')"),
"Should substitute filter in main query");
Ok(())
}
#[tokio::test]
async fn test_metrics_in_subquery() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = r#"
SELECT
u.name,
(SELECT metric_TotalOrders FROM orders o WHERE o.user_id = u.id) AS order_count
FROM users u
WHERE filter_IsActiveUser
"#;
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
// Check substitution in subquery
assert!(result.contains("(SELECT (COUNT(orders.id)) FROM orders"),
"Should substitute metric in subquery correctly");
// Check filter substitution
assert!(result.contains("WHERE (users.status = 'active')"),
"Should substitute filter correctly");
Ok(())
}
#[tokio::test]
async fn test_metrics_in_complex_expressions() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = r#"
SELECT
u.id,
u.name,
CASE
WHEN metric_TotalOrders > 10 THEN 'High Volume'
WHEN metric_TotalOrders > 5 THEN 'Medium Volume'
ELSE 'Low Volume'
END as volume_category,
metric_TotalSpending / NULLIF(metric_TotalOrders, 0) as avg_order_value
FROM
users u
JOIN
orders o ON u.id = o.user_id
GROUP BY
u.id, u.name
HAVING
metric_TotalOrders > 0
"#;
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
// Check CASE expression substitution
assert!(result.contains("WHEN (COUNT(orders.id)) > 10 THEN"),
"Should substitute metric in CASE condition correctly");
// Check complex expression with division
assert!(result.contains("(SUM(orders.amount)) / NULLIF((COUNT(orders.id)), 0)"),
"Should substitute metrics in division expression correctly");
// Check HAVING clause
assert!(result.contains("HAVING (COUNT(orders.id)) > 0"),
"Should substitute metric in HAVING clause correctly");
Ok(())
}
#[tokio::test]
async fn test_metrics_in_join_conditions() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
// This is an unusual case but should be handled correctly
let sql = r#"
SELECT u.id, p.name
FROM users u
JOIN orders o ON u.id = o.user_id AND o.amount > metric_AverageOrderValue
JOIN products p ON o.product_id = p.id
"#;
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
// Check metric substitution in JOIN condition
assert!(result.contains("ON u.id = o.user_id AND o.amount > (SUM(orders.amount) / NULLIF(COUNT(orders.id), 0))"),
"Should substitute metric in JOIN condition correctly");
Ok(())
}
#[tokio::test]
async fn test_multiple_parameter_metric() -> Result<()> {
let mut semantic_layer = create_test_semantic_layer();
semantic_layer.add_metric(Metric {
name: "metric_DateRangeRevenue".to_string(),
table: "orders".to_string(),
expression: "SUM(CASE WHEN orders.created_at BETWEEN '{{start_date}}' AND '{{end_date}}' THEN orders.amount ELSE 0 END)".to_string(),
parameters: vec![
Parameter {
name: "start_date".to_string(),
param_type: ParameterType::Date,
default: Some("'2023-01-01'".to_string()),
},
Parameter {
name: "end_date".to_string(),
param_type: ParameterType::Date,
default: Some("'2023-12-31'".to_string()),
},
],
description: Some("Revenue within date range".to_string()),
});
let sql = "SELECT user_id, metric_DateRangeRevenue('2023-06-01', '2023-06-30') FROM orders";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
assert!(result.contains("BETWEEN '2023-06-01' AND '2023-06-30'"),
"Should substitute multiple parameters correctly");
Ok(())
}
#[tokio::test]
async fn test_nested_metrics() -> Result<()> {
let mut semantic_layer = create_test_semantic_layer();
// Add deeply nested metrics: A -> B -> C -> D (non-circular)
semantic_layer.add_metric(Metric {
name: "metric_D".to_string(),
table: "orders".to_string(),
expression: "COUNT(orders.id)".to_string(),
parameters: vec![],
description: Some("Base metric".to_string()),
});
semantic_layer.add_metric(Metric {
name: "metric_C".to_string(),
table: "orders".to_string(),
expression: "metric_D * 2".to_string(),
parameters: vec![],
description: Some("Uses metric D".to_string()),
});
semantic_layer.add_metric(Metric {
name: "metric_B".to_string(),
table: "orders".to_string(),
expression: "metric_C + 10".to_string(),
parameters: vec![],
description: Some("Uses metric C".to_string()),
});
semantic_layer.add_metric(Metric {
name: "metric_A".to_string(),
table: "orders".to_string(),
expression: "metric_B / 2".to_string(),
parameters: vec![],
description: Some("Uses metric B".to_string()),
});
let sql = "SELECT metric_A FROM orders";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
// Check for fully recursive substitution
assert!(result.contains("((((COUNT(orders.id)) * 2) + 10) / 2)"),
"Should recursively substitute all nested metrics");
Ok(())
}
#[tokio::test]
async fn test_union_query_with_metrics() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = r#"
SELECT
user_id,
'Current' as period,
metric_TotalOrders
FROM
orders
WHERE
filter_IsRecentOrder
UNION ALL
SELECT
user_id,
'Previous' as period,
metric_TotalOrders
FROM
orders
WHERE
NOT filter_IsRecentOrder
"#;
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
// Check metric and filter substitution on both sides of the UNION
assert!(result.matches("(COUNT(orders.id))").count() == 2,
"Should substitute metrics on both sides of UNION");
assert!(result.matches("orders.created_at >= CURRENT_DATE - INTERVAL '30' DAY").count() == 2,
"Should substitute filter on both sides of UNION");
Ok(())
}
#[tokio::test]
async fn test_metrics_in_having_and_order_by() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = r#"
SELECT
user_id,
metric_TotalOrders
FROM
orders
GROUP BY
user_id
HAVING
metric_TotalOrders > 5
ORDER BY
metric_TotalSpending DESC
"#;
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
// Debug print the result
println!("DEBUG - result: {}", result);
assert!(result.contains("HAVING (COUNT(orders.id)) > 5"),
"Should substitute metric in HAVING clause");
assert!(result.contains("ORDER BY (SUM(orders.amount)) DESC"),
"Should substitute metric in ORDER BY clause");
Ok(())
}
#[tokio::test]
async fn test_parameter_type_validation() -> Result<()> {
let mut semantic_layer = create_test_semantic_layer();
semantic_layer.add_metric(Metric {
name: "metric_TypedParameter".to_string(),
table: "orders".to_string(),
expression: "SUM(CASE WHEN orders.created_at >= '{{date_param}}' AND orders.amount > {{amount_param}} THEN orders.amount ELSE 0 END)".to_string(),
parameters: vec![
Parameter {
name: "date_param".to_string(),
param_type: ParameterType::Date,
default: Some("'2023-01-01'".to_string()),
},
Parameter {
name: "amount_param".to_string(),
param_type: ParameterType::Number,
default: Some("100".to_string()),
},
],
description: Some("Sum with typed parameters".to_string()),
});
// Test with valid parameters
let sql_valid = "SELECT metric_TypedParameter('2023-06-01', 200) FROM orders";
let result_valid = substitute_semantic_query(sql_valid.to_string(), semantic_layer.clone()).await?;
assert!(result_valid.contains("'2023-06-01'"), "Should substitute date parameter correctly");
assert!(result_valid.contains("200"), "Should substitute number parameter correctly");
// Test with invalid number parameter
let sql_invalid = "SELECT metric_TypedParameter('2023-06-01', 'not-a-number') FROM orders";
let result_invalid = substitute_semantic_query(sql_invalid.to_string(), semantic_layer).await;
assert!(result_invalid.is_err(), "Should reject invalid number parameter");
if let Err(SqlAnalyzerError::InvalidParameter(msg)) = result_invalid {
assert!(msg.contains("Expected number"), "Error should mention invalid number parameter");
} else {
panic!("Expected InvalidParameter error but got: {:?}", result_invalid);
}
Ok(())
}
#[tokio::test]
async fn test_parameter_escaping() -> Result<()> {
let mut semantic_layer = create_test_semantic_layer();
semantic_layer.add_filter(Filter {
name: "filter_LikePattern".to_string(),
table: "users".to_string(),
expression: "users.email LIKE '{{pattern}}'".to_string(),
parameters: vec![
Parameter {
name: "pattern".to_string(),
param_type: ParameterType::String,
default: Some("'%example.com%'".to_string()),
},
],
description: Some("Filter emails with pattern".to_string()),
});
let sql = "SELECT * FROM users WHERE filter_LikePattern('%special\\_chars%')";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
assert!(result.contains("LIKE '%special\\_chars%'"),
"Should preserve special characters in parameter substitution");
Ok(())
}
#[tokio::test]
async fn test_metrics_with_explicit_aliases() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = "SELECT user_id, metric_TotalOrders AS order_count FROM orders GROUP BY user_id";
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
assert!(result.contains("(COUNT(orders.id)) AS order_count"),
"Should preserve alias when substituting metric");
Ok(())
}
#[tokio::test]
async fn test_deeply_nested_metrics_and_filters() -> Result<()> {
let semantic_layer = create_test_semantic_layer();
let sql = r#"
WITH filtered_orders AS (
SELECT * FROM orders WHERE filter_OrderAmountGt(200)
)
SELECT
u.id,
metric_TotalOrders,
(SELECT metric_TotalSpending FROM filtered_orders WHERE user_id = u.id) AS user_spending
FROM users u
WHERE u.id IN (SELECT user_id FROM filtered_orders GROUP BY user_id HAVING metric_TotalOrders > 5)
"#;
let result = substitute_semantic_query(sql.to_string(), semantic_layer).await?;
// Test for various substitutions
assert!(result.contains("WHERE (orders.amount > 200)"),
"Should substitute filter in CTE");
assert!(result.contains("(COUNT(orders.id))"),
"Should substitute metric in main query");
assert!(result.contains("(SELECT (SUM(orders.amount)) FROM"),
"Should substitute metric in scalar subquery");
assert!(result.contains("HAVING (COUNT(orders.id)) > 5"),
"Should substitute metric in HAVING clause of IN subquery");
Ok(())
}