buster/api/libs/sql_analyzer/tests/analysis_tests.rs

275 lines
9.1 KiB
Rust
Raw Normal View History

use sql_analyzer::{analyze_query, SqlAnalyzerError, JoinInfo};
use tokio;
#[tokio::test]
async fn test_simple_query() {
let sql = "SELECT u.id, u.name FROM schema.users u";
let result = analyze_query(sql.to_string()).await.unwrap();
assert_eq!(result.tables.len(), 1);
assert_eq!(result.joins.len(), 0);
assert_eq!(result.ctes.len(), 0);
let table = &result.tables[0];
assert_eq!(table.database_identifier, None);
assert_eq!(table.schema_identifier, Some("schema".to_string()));
assert_eq!(table.table_identifier, "users");
assert_eq!(table.alias, Some("u".to_string()));
let columns_vec: Vec<_> = table.columns.iter().collect();
assert!(
columns_vec.len() == 2,
"Expected 2 columns, got {}",
columns_vec.len()
);
assert!(table.columns.contains("id"), "Missing 'id' column");
assert!(table.columns.contains("name"), "Missing 'name' column");
}
#[tokio::test]
async fn test_joins() {
let sql =
"SELECT u.id, o.order_id FROM schema.users u JOIN schema.orders o ON u.id = o.user_id";
let result = analyze_query(sql.to_string()).await.unwrap();
assert_eq!(result.tables.len(), 2);
2025-04-29 23:21:03 +08:00
assert!(result.joins.len() > 0, "Should detect at least one join");
let table_names: Vec<String> = result
.tables
.iter()
.map(|t| t.table_identifier.clone())
.collect();
assert!(table_names.contains(&"users".to_string()));
assert!(table_names.contains(&"orders".to_string()));
2025-04-29 23:21:03 +08:00
let join_exists = result.joins.iter().any(|join| {
2025-04-30 04:27:38 +08:00
(join.left_table == "users" && join.right_table == "orders")
|| (join.left_table == "orders" && join.right_table == "users")
});
assert!(
2025-04-29 23:21:03 +08:00
join_exists,
2025-04-30 04:27:38 +08:00
"Expected to find a join between tables users and orders"
);
}
#[tokio::test]
async fn test_cte_query() {
let sql = "WITH user_orders AS (
SELECT u.id, o.order_id
FROM schema.users u
JOIN schema.orders o ON u.id = o.user_id
)
SELECT uo.id, uo.order_id FROM user_orders uo";
2025-04-30 04:27:38 +08:00
let result = analyze_query(sql.to_string()).await.unwrap();
2025-04-30 04:27:38 +08:00
println!("Result: {:?}", result);
2025-04-30 04:27:38 +08:00
assert_eq!(result.ctes.len(), 1);
let cte = &result.ctes[0];
assert_eq!(cte.name, "user_orders");
assert_eq!(cte.summary.tables.len(), 2);
assert_eq!(cte.summary.joins.len(), 1);
}
#[tokio::test]
async fn test_vague_references() {
2025-04-30 04:27:38 +08:00
let sql = "SELECT u.id FROM users u";
let result = analyze_query(sql.to_string()).await;
assert!(result.is_err());
if let Err(SqlAnalyzerError::VagueReferences(msg)) = result {
2025-04-29 23:21:03 +08:00
assert!(msg.contains("Vague tables") || msg.contains("Vague/Unknown"), "Expected VagueTables error for 'users u', got: {}", msg);
} else {
panic!("Expected VagueReferences error, got: {:?}", result);
}
2025-04-30 04:27:38 +08:00
let sql = "SELECT id FROM schema.users";
let result = analyze_query(sql.to_string()).await;
assert!(result.is_err());
if let Err(SqlAnalyzerError::VagueReferences(msg)) = result {
2025-04-29 23:21:03 +08:00
assert!(msg.contains("Vague columns") || msg.contains("Vague/Unknown"), "Expected VagueColumns error for 'id', got: {}", msg);
} else {
panic!("Expected VagueReferences error, got: {:?}", result);
}
}
#[tokio::test]
async fn test_fully_qualified_query() {
let sql = "SELECT u.id, u.name FROM database.schema.users u";
let result = analyze_query(sql.to_string()).await.unwrap();
assert_eq!(result.tables.len(), 1);
let table = &result.tables[0];
assert_eq!(table.database_identifier, Some("database".to_string()));
assert_eq!(table.schema_identifier, Some("schema".to_string()));
assert_eq!(table.table_identifier, "users");
}
#[tokio::test]
async fn test_complex_cte_lineage() {
let sql = "WITH
users_cte AS (
SELECT u.id, u.name FROM schema.users u
)
SELECT uc.id, uc.name FROM users_cte uc";
2025-04-30 04:27:38 +08:00
let result = analyze_query(sql.to_string()).await.unwrap();
2025-04-30 04:27:38 +08:00
assert_eq!(result.ctes.len(), 1);
let cte = &result.ctes[0];
assert_eq!(cte.name, "users_cte");
assert_eq!(cte.summary.tables.len(), 1);
}
#[tokio::test]
async fn test_invalid_sql() {
2025-04-30 04:27:38 +08:00
let sql = "SELECT * FRM users";
let result = analyze_query(sql.to_string()).await;
2025-04-30 04:27:38 +08:00
assert!(result.is_err());
if let Err(SqlAnalyzerError::ParseError(msg)) = result {
assert!(msg.contains("Expected") || msg.contains("syntax error"));
} else {
panic!("Expected ParseError, got: {:?}", result);
}
}
#[tokio::test]
2025-04-29 23:21:03 +08:00
async fn test_analysis_nested_subqueries_as_join() {
let sql = r#"
2025-04-29 23:21:03 +08:00
WITH main_data AS (
SELECT
t1.col1,
t2.col2,
t1.id as t1_id,
2025-04-30 04:27:38 +08:00
c.id as c_id
2025-04-29 23:21:03 +08:00
FROM db1.schema1.tableA t1
JOIN db1.schema1.tableB t2 ON t1.id = t2.a_id
2025-04-30 04:27:38 +08:00
LEFT JOIN db1.schema2.tableC c ON c.id = t1.id
2025-04-29 23:21:03 +08:00
WHERE t1.status = 'active'
)
SELECT
2025-04-29 23:21:03 +08:00
md.col1,
2025-04-30 04:27:38 +08:00
COUNT(md.c_id) as sub_count
FROM
2025-04-30 04:27:38 +08:00
main_data md
2025-04-29 23:21:03 +08:00
WHERE md.col1 > 100
2025-04-30 04:27:38 +08:00
GROUP BY md.col1;
2025-04-29 23:21:03 +08:00
"#;
let result = analyze_query(sql.to_string())
.await
2025-04-30 04:27:38 +08:00
.expect("Analysis failed for nested query rewritten as JOIN in CTE");
println!("Result: {:?}", result);
2025-04-29 23:21:03 +08:00
2025-04-30 04:27:38 +08:00
assert_eq!(result.ctes.len(), 1, "Should detect 1 CTE");
2025-04-29 23:21:03 +08:00
let main_cte = &result.ctes[0];
assert_eq!(main_cte.name, "main_data");
assert_eq!(main_cte.summary.joins.len(), 2, "Should detect 2 joins inside the CTE summary");
let join1_exists = main_cte.summary.joins.iter().any(|j|
2025-04-30 04:27:38 +08:00
(j.left_table == "tableA" && j.right_table == "tableB") || (j.left_table == "tableB" && j.right_table == "tableA")
);
2025-04-29 23:21:03 +08:00
let join2_exists = main_cte.summary.joins.iter().any(|j|
2025-04-30 04:27:38 +08:00
(j.left_table == "tableB" && j.right_table == "tableC") || (j.left_table == "tableC" && j.right_table == "tableB")
2025-04-29 23:21:03 +08:00
);
2025-04-30 04:27:38 +08:00
assert!(join1_exists, "Join between tableA and tableB not found in CTE summary");
assert!(join2_exists, "Join between tableB and tableC not found in CTE summary");
2025-04-29 23:21:03 +08:00
assert_eq!(result.joins.len(), 0, "Overall query should have no direct joins");
2025-04-30 04:27:38 +08:00
assert_eq!(result.tables.len(), 4, "Should detect all 3 base tables (A, B, C) and the CTE");
2025-04-29 23:21:03 +08:00
let table_names: std::collections::HashSet<String> = result
.tables
.iter()
2025-04-29 23:21:03 +08:00
.map(|t| format!("{}.{}.{}", t.database_identifier.as_deref().unwrap_or(""), t.schema_identifier.as_deref().unwrap_or(""), t.table_identifier))
.collect();
2025-04-29 23:21:03 +08:00
assert!(table_names.contains(&"db1.schema1.tableA".to_string()), "Missing tableA");
assert!(table_names.contains(&"db1.schema1.tableB".to_string()), "Missing tableB");
assert!(table_names.contains(&"db1.schema2.tableC".to_string()), "Missing tableC");
}
#[tokio::test]
async fn test_analysis_union_all() {
let sql = r#"
SELECT u.id, u.name FROM db1.schema1.users u WHERE u.status = 'active'
UNION ALL
SELECT e.user_id, e.username FROM db2.schema1.employees e WHERE e.role = 'manager'
UNION ALL
SELECT c.pk, c.full_name FROM db1.schema2.contractors c WHERE c.end_date IS NULL;
"#;
let result = analyze_query(sql.to_string())
.await
.expect("Analysis failed for UNION ALL test");
assert_eq!(result.ctes.len(), 0, "Should be no CTEs");
assert_eq!(result.joins.len(), 0, "Should be no joins");
assert_eq!(result.tables.len(), 3, "Should detect all 3 tables across UNIONs");
let table_names: std::collections::HashSet<String> = result
.tables
.iter()
.map(|t| {
format!(
"{}.{}.{}",
t.database_identifier.as_deref().unwrap_or(""),
t.schema_identifier.as_deref().unwrap_or(""),
t.table_identifier
)
})
.collect();
assert!(
table_names.contains(&"db1.schema1.users".to_string()),
"Missing users table"
);
assert!(
table_names.contains(&"db2.schema1.employees".to_string()),
"Missing employees table"
);
assert!(
table_names.contains(&"db1.schema2.contractors".to_string()),
"Missing contractors table"
);
}
#[tokio::test]
async fn test_analysis_combined_complexity() {
let sql = r#"
WITH active_users AS (
2025-04-30 04:27:38 +08:00
SELECT u.id, u.name FROM db1.schema1.users u WHERE u.status = 'active'
),
recent_orders AS (
2025-04-30 04:27:38 +08:00
SELECT ro.user_id, MAX(ro.order_date) as last_order_date
FROM db1.schema1.orders ro
GROUP BY ro.user_id
)
SELECT au.name, ro.last_order_date
2025-04-30 04:27:38 +08:00
FROM active_users au
JOIN recent_orders ro ON au.id = ro.user_id
2025-04-30 04:27:38 +08:00
JOIN (
SELECT p_sub.item_id, p_sub.category FROM db2.schema1.products p_sub WHERE p_sub.is_available = true
) p ON p.item_id = ro.user_id
WHERE au.id IN (SELECT sl.user_id FROM db1.schema2.special_list sl)
UNION ALL
2025-04-30 04:27:38 +08:00
SELECT e.name, e.hire_date
FROM db2.schema1.employees e
WHERE e.department = 'Sales';
"#;
2025-04-30 04:27:38 +08:00
let result = analyze_query(sql.to_string()).await.unwrap();
2025-04-30 04:27:38 +08:00
println!("Result: {:?}", result);
2025-04-30 04:27:38 +08:00
assert_eq!(result.ctes.len(), 2, "Should detect 2 CTEs");
assert_eq!(result.joins.len(), 2, "Should detect 2 joins in the main query");
}