2025-04-29 22:31:51 +08:00
|
|
|
use sql_analyzer::{analyze_query, SqlAnalyzerError, JoinInfo};
|
|
|
|
use tokio;
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn test_simple_query() {
|
|
|
|
let sql = "SELECT u.id, u.name FROM schema.users u";
|
|
|
|
let result = analyze_query(sql.to_string()).await.unwrap();
|
|
|
|
|
|
|
|
assert_eq!(result.tables.len(), 1);
|
|
|
|
assert_eq!(result.joins.len(), 0);
|
|
|
|
assert_eq!(result.ctes.len(), 0);
|
|
|
|
|
|
|
|
let table = &result.tables[0];
|
|
|
|
assert_eq!(table.database_identifier, None);
|
|
|
|
assert_eq!(table.schema_identifier, Some("schema".to_string()));
|
|
|
|
assert_eq!(table.table_identifier, "users");
|
|
|
|
assert_eq!(table.alias, Some("u".to_string()));
|
|
|
|
|
|
|
|
let columns_vec: Vec<_> = table.columns.iter().collect();
|
|
|
|
assert!(
|
|
|
|
columns_vec.len() == 2,
|
|
|
|
"Expected 2 columns, got {}",
|
|
|
|
columns_vec.len()
|
|
|
|
);
|
|
|
|
assert!(table.columns.contains("id"), "Missing 'id' column");
|
|
|
|
assert!(table.columns.contains("name"), "Missing 'name' column");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn test_joins() {
|
|
|
|
let sql =
|
|
|
|
"SELECT u.id, o.order_id FROM schema.users u JOIN schema.orders o ON u.id = o.user_id";
|
|
|
|
let result = analyze_query(sql.to_string()).await.unwrap();
|
|
|
|
|
|
|
|
assert_eq!(result.tables.len(), 2);
|
2025-04-29 23:21:03 +08:00
|
|
|
assert!(result.joins.len() > 0, "Should detect at least one join");
|
2025-04-29 22:31:51 +08:00
|
|
|
|
|
|
|
let table_names: Vec<String> = result
|
|
|
|
.tables
|
|
|
|
.iter()
|
|
|
|
.map(|t| t.table_identifier.clone())
|
|
|
|
.collect();
|
|
|
|
assert!(table_names.contains(&"users".to_string()));
|
|
|
|
assert!(table_names.contains(&"orders".to_string()));
|
|
|
|
|
2025-04-29 23:21:03 +08:00
|
|
|
let join_exists = result.joins.iter().any(|join| {
|
2025-04-30 04:27:38 +08:00
|
|
|
(join.left_table == "users" && join.right_table == "orders")
|
|
|
|
|| (join.left_table == "orders" && join.right_table == "users")
|
2025-04-29 22:31:51 +08:00
|
|
|
});
|
|
|
|
assert!(
|
2025-04-29 23:21:03 +08:00
|
|
|
join_exists,
|
2025-04-30 04:27:38 +08:00
|
|
|
"Expected to find a join between tables users and orders"
|
2025-04-29 22:31:51 +08:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn test_cte_query() {
|
|
|
|
let sql = "WITH user_orders AS (
|
|
|
|
SELECT u.id, o.order_id
|
|
|
|
FROM schema.users u
|
|
|
|
JOIN schema.orders o ON u.id = o.user_id
|
|
|
|
)
|
|
|
|
SELECT uo.id, uo.order_id FROM user_orders uo";
|
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
let result = analyze_query(sql.to_string()).await.unwrap();
|
2025-04-29 22:31:51 +08:00
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
println!("Result: {:?}", result);
|
2025-04-29 22:31:51 +08:00
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
assert_eq!(result.ctes.len(), 1);
|
|
|
|
let cte = &result.ctes[0];
|
|
|
|
assert_eq!(cte.name, "user_orders");
|
|
|
|
assert_eq!(cte.summary.tables.len(), 2);
|
|
|
|
assert_eq!(cte.summary.joins.len(), 1);
|
2025-04-29 22:31:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn test_vague_references() {
|
2025-04-30 04:27:38 +08:00
|
|
|
let sql = "SELECT u.id FROM users u";
|
2025-04-29 22:31:51 +08:00
|
|
|
let result = analyze_query(sql.to_string()).await;
|
|
|
|
|
|
|
|
assert!(result.is_err());
|
|
|
|
if let Err(SqlAnalyzerError::VagueReferences(msg)) = result {
|
2025-04-29 23:21:03 +08:00
|
|
|
assert!(msg.contains("Vague tables") || msg.contains("Vague/Unknown"), "Expected VagueTables error for 'users u', got: {}", msg);
|
2025-04-29 22:31:51 +08:00
|
|
|
} else {
|
|
|
|
panic!("Expected VagueReferences error, got: {:?}", result);
|
|
|
|
}
|
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
let sql = "SELECT id FROM schema.users";
|
2025-04-29 22:31:51 +08:00
|
|
|
let result = analyze_query(sql.to_string()).await;
|
|
|
|
|
|
|
|
assert!(result.is_err());
|
|
|
|
if let Err(SqlAnalyzerError::VagueReferences(msg)) = result {
|
2025-04-29 23:21:03 +08:00
|
|
|
assert!(msg.contains("Vague columns") || msg.contains("Vague/Unknown"), "Expected VagueColumns error for 'id', got: {}", msg);
|
2025-04-29 22:31:51 +08:00
|
|
|
} else {
|
|
|
|
panic!("Expected VagueReferences error, got: {:?}", result);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn test_fully_qualified_query() {
|
|
|
|
let sql = "SELECT u.id, u.name FROM database.schema.users u";
|
|
|
|
let result = analyze_query(sql.to_string()).await.unwrap();
|
|
|
|
|
|
|
|
assert_eq!(result.tables.len(), 1);
|
|
|
|
let table = &result.tables[0];
|
|
|
|
assert_eq!(table.database_identifier, Some("database".to_string()));
|
|
|
|
assert_eq!(table.schema_identifier, Some("schema".to_string()));
|
|
|
|
assert_eq!(table.table_identifier, "users");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn test_complex_cte_lineage() {
|
|
|
|
let sql = "WITH
|
|
|
|
users_cte AS (
|
|
|
|
SELECT u.id, u.name FROM schema.users u
|
|
|
|
)
|
|
|
|
SELECT uc.id, uc.name FROM users_cte uc";
|
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
let result = analyze_query(sql.to_string()).await.unwrap();
|
2025-04-29 22:31:51 +08:00
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
assert_eq!(result.ctes.len(), 1);
|
|
|
|
let cte = &result.ctes[0];
|
|
|
|
assert_eq!(cte.name, "users_cte");
|
|
|
|
assert_eq!(cte.summary.tables.len(), 1);
|
2025-04-29 22:31:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn test_invalid_sql() {
|
2025-04-30 04:27:38 +08:00
|
|
|
let sql = "SELECT * FRM users";
|
2025-04-29 22:31:51 +08:00
|
|
|
let result = analyze_query(sql.to_string()).await;
|
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
|
2025-04-29 22:31:51 +08:00
|
|
|
assert!(result.is_err());
|
|
|
|
if let Err(SqlAnalyzerError::ParseError(msg)) = result {
|
|
|
|
assert!(msg.contains("Expected") || msg.contains("syntax error"));
|
|
|
|
} else {
|
|
|
|
panic!("Expected ParseError, got: {:?}", result);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
2025-04-29 23:21:03 +08:00
|
|
|
async fn test_analysis_nested_subqueries_as_join() {
|
2025-04-29 22:31:51 +08:00
|
|
|
let sql = r#"
|
2025-04-29 23:21:03 +08:00
|
|
|
WITH main_data AS (
|
|
|
|
SELECT
|
|
|
|
t1.col1,
|
|
|
|
t2.col2,
|
|
|
|
t1.id as t1_id,
|
2025-04-30 04:27:38 +08:00
|
|
|
c.id as c_id
|
2025-04-29 23:21:03 +08:00
|
|
|
FROM db1.schema1.tableA t1
|
|
|
|
JOIN db1.schema1.tableB t2 ON t1.id = t2.a_id
|
2025-04-30 04:27:38 +08:00
|
|
|
LEFT JOIN db1.schema2.tableC c ON c.id = t1.id
|
2025-04-29 23:21:03 +08:00
|
|
|
WHERE t1.status = 'active'
|
|
|
|
)
|
2025-04-29 22:31:51 +08:00
|
|
|
SELECT
|
2025-04-29 23:21:03 +08:00
|
|
|
md.col1,
|
2025-04-30 04:27:38 +08:00
|
|
|
COUNT(md.c_id) as sub_count
|
2025-04-29 22:31:51 +08:00
|
|
|
FROM
|
2025-04-30 04:27:38 +08:00
|
|
|
main_data md
|
2025-04-29 23:21:03 +08:00
|
|
|
WHERE md.col1 > 100
|
2025-04-30 04:27:38 +08:00
|
|
|
GROUP BY md.col1;
|
2025-04-29 23:21:03 +08:00
|
|
|
"#;
|
2025-04-29 22:31:51 +08:00
|
|
|
|
|
|
|
let result = analyze_query(sql.to_string())
|
|
|
|
.await
|
2025-04-30 04:27:38 +08:00
|
|
|
.expect("Analysis failed for nested query rewritten as JOIN in CTE");
|
|
|
|
|
|
|
|
println!("Result: {:?}", result);
|
2025-04-29 23:21:03 +08:00
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
assert_eq!(result.ctes.len(), 1, "Should detect 1 CTE");
|
2025-04-29 23:21:03 +08:00
|
|
|
let main_cte = &result.ctes[0];
|
|
|
|
assert_eq!(main_cte.name, "main_data");
|
|
|
|
|
|
|
|
assert_eq!(main_cte.summary.joins.len(), 2, "Should detect 2 joins inside the CTE summary");
|
|
|
|
|
|
|
|
let join1_exists = main_cte.summary.joins.iter().any(|j|
|
2025-04-30 04:27:38 +08:00
|
|
|
(j.left_table == "tableA" && j.right_table == "tableB") || (j.left_table == "tableB" && j.right_table == "tableA")
|
2025-04-29 22:31:51 +08:00
|
|
|
);
|
2025-04-29 23:21:03 +08:00
|
|
|
let join2_exists = main_cte.summary.joins.iter().any(|j|
|
2025-04-30 04:27:38 +08:00
|
|
|
(j.left_table == "tableB" && j.right_table == "tableC") || (j.left_table == "tableC" && j.right_table == "tableB")
|
2025-04-29 23:21:03 +08:00
|
|
|
);
|
2025-04-30 04:27:38 +08:00
|
|
|
assert!(join1_exists, "Join between tableA and tableB not found in CTE summary");
|
|
|
|
assert!(join2_exists, "Join between tableB and tableC not found in CTE summary");
|
2025-04-29 23:21:03 +08:00
|
|
|
|
|
|
|
assert_eq!(result.joins.len(), 0, "Overall query should have no direct joins");
|
2025-04-29 22:31:51 +08:00
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
assert_eq!(result.tables.len(), 4, "Should detect all 3 base tables (A, B, C) and the CTE");
|
2025-04-29 23:21:03 +08:00
|
|
|
|
2025-04-29 22:31:51 +08:00
|
|
|
let table_names: std::collections::HashSet<String> = result
|
|
|
|
.tables
|
|
|
|
.iter()
|
2025-04-29 23:21:03 +08:00
|
|
|
.map(|t| format!("{}.{}.{}", t.database_identifier.as_deref().unwrap_or(""), t.schema_identifier.as_deref().unwrap_or(""), t.table_identifier))
|
2025-04-29 22:31:51 +08:00
|
|
|
.collect();
|
|
|
|
|
2025-04-29 23:21:03 +08:00
|
|
|
assert!(table_names.contains(&"db1.schema1.tableA".to_string()), "Missing tableA");
|
|
|
|
assert!(table_names.contains(&"db1.schema1.tableB".to_string()), "Missing tableB");
|
|
|
|
assert!(table_names.contains(&"db1.schema2.tableC".to_string()), "Missing tableC");
|
2025-04-29 22:31:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn test_analysis_union_all() {
|
|
|
|
let sql = r#"
|
|
|
|
SELECT u.id, u.name FROM db1.schema1.users u WHERE u.status = 'active'
|
|
|
|
UNION ALL
|
|
|
|
SELECT e.user_id, e.username FROM db2.schema1.employees e WHERE e.role = 'manager'
|
|
|
|
UNION ALL
|
|
|
|
SELECT c.pk, c.full_name FROM db1.schema2.contractors c WHERE c.end_date IS NULL;
|
|
|
|
"#;
|
|
|
|
|
|
|
|
let result = analyze_query(sql.to_string())
|
|
|
|
.await
|
|
|
|
.expect("Analysis failed for UNION ALL test");
|
|
|
|
|
|
|
|
assert_eq!(result.ctes.len(), 0, "Should be no CTEs");
|
|
|
|
assert_eq!(result.joins.len(), 0, "Should be no joins");
|
|
|
|
assert_eq!(result.tables.len(), 3, "Should detect all 3 tables across UNIONs");
|
|
|
|
|
|
|
|
let table_names: std::collections::HashSet<String> = result
|
|
|
|
.tables
|
|
|
|
.iter()
|
|
|
|
.map(|t| {
|
|
|
|
format!(
|
|
|
|
"{}.{}.{}",
|
|
|
|
t.database_identifier.as_deref().unwrap_or(""),
|
|
|
|
t.schema_identifier.as_deref().unwrap_or(""),
|
|
|
|
t.table_identifier
|
|
|
|
)
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
assert!(
|
|
|
|
table_names.contains(&"db1.schema1.users".to_string()),
|
|
|
|
"Missing users table"
|
|
|
|
);
|
|
|
|
assert!(
|
|
|
|
table_names.contains(&"db2.schema1.employees".to_string()),
|
|
|
|
"Missing employees table"
|
|
|
|
);
|
|
|
|
assert!(
|
|
|
|
table_names.contains(&"db1.schema2.contractors".to_string()),
|
|
|
|
"Missing contractors table"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
async fn test_analysis_combined_complexity() {
|
|
|
|
let sql = r#"
|
|
|
|
WITH active_users AS (
|
2025-04-30 04:27:38 +08:00
|
|
|
SELECT u.id, u.name FROM db1.schema1.users u WHERE u.status = 'active'
|
2025-04-29 22:31:51 +08:00
|
|
|
),
|
|
|
|
recent_orders AS (
|
2025-04-30 04:27:38 +08:00
|
|
|
SELECT ro.user_id, MAX(ro.order_date) as last_order_date
|
2025-04-29 22:31:51 +08:00
|
|
|
FROM db1.schema1.orders ro
|
|
|
|
GROUP BY ro.user_id
|
|
|
|
)
|
|
|
|
SELECT au.name, ro.last_order_date
|
2025-04-30 04:27:38 +08:00
|
|
|
FROM active_users au
|
2025-04-29 22:31:51 +08:00
|
|
|
JOIN recent_orders ro ON au.id = ro.user_id
|
2025-04-30 04:27:38 +08:00
|
|
|
JOIN (
|
|
|
|
SELECT p_sub.item_id, p_sub.category FROM db2.schema1.products p_sub WHERE p_sub.is_available = true
|
|
|
|
) p ON p.item_id = ro.user_id
|
|
|
|
WHERE au.id IN (SELECT sl.user_id FROM db1.schema2.special_list sl)
|
2025-04-29 22:31:51 +08:00
|
|
|
UNION ALL
|
2025-04-30 04:27:38 +08:00
|
|
|
SELECT e.name, e.hire_date
|
2025-04-29 22:31:51 +08:00
|
|
|
FROM db2.schema1.employees e
|
|
|
|
WHERE e.department = 'Sales';
|
|
|
|
"#;
|
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
let result = analyze_query(sql.to_string()).await.unwrap();
|
2025-04-29 22:31:51 +08:00
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
println!("Result: {:?}", result);
|
2025-04-29 22:31:51 +08:00
|
|
|
|
2025-04-30 04:27:38 +08:00
|
|
|
assert_eq!(result.ctes.len(), 2, "Should detect 2 CTEs");
|
|
|
|
assert_eq!(result.joins.len(), 2, "Should detect 2 joins in the main query");
|
|
|
|
}
|