mirror of https://github.com/buster-so/buster.git
feat: enhance column metadata retrieval across database sources
- Add support for capturing source type (table, view, materialized view) - Improve column metadata queries for Postgres, MySQL, BigQuery, and Snowflake - Include more comprehensive column information during dataset import - Extend DatasetColumnRecord to include source_type field
This commit is contained in:
parent
6e5c299389
commit
fa480f6797
|
@ -26,6 +26,7 @@ pub struct DatasetColumnRecord {
|
||||||
pub type_: String,
|
pub type_: String,
|
||||||
pub nullable: bool,
|
pub nullable: bool,
|
||||||
pub comment: Option<String>,
|
pub comment: Option<String>,
|
||||||
|
pub source_type: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn import_dataset_columns(
|
pub async fn import_dataset_columns(
|
||||||
|
@ -143,39 +144,76 @@ async fn get_postgres_columns(
|
||||||
schema_name: &String,
|
schema_name: &String,
|
||||||
credentials: &PostgresCredentials,
|
credentials: &PostgresCredentials,
|
||||||
) -> Result<Vec<DatasetColumnRecord>> {
|
) -> Result<Vec<DatasetColumnRecord>> {
|
||||||
let (postgres_conn, child_process, tempfile) = match get_postgres_connection(credentials).await
|
let (postgres_conn, child_process, tempfile) = match get_postgres_connection(credentials).await {
|
||||||
{
|
|
||||||
Ok(conn) => conn,
|
Ok(conn) => conn,
|
||||||
Err(e) => return Err(e),
|
Err(e) => return Err(e),
|
||||||
};
|
};
|
||||||
|
|
||||||
let sql = format!("SELECT
|
// Query for tables and views
|
||||||
c.column_name as name,
|
let regular_sql = format!(
|
||||||
c.data_type as type_,
|
"SELECT
|
||||||
CASE WHEN c.is_nullable = 'YES' THEN true ELSE false END as nullable,
|
c.column_name as name,
|
||||||
pgd.description AS comment
|
c.data_type as type_,
|
||||||
FROM
|
CASE WHEN c.is_nullable = 'YES' THEN true ELSE false END as nullable,
|
||||||
information_schema.columns c
|
pgd.description AS comment,
|
||||||
LEFT JOIN
|
t.table_type as source_type
|
||||||
pg_catalog.pg_statio_all_tables as st on c.table_schema = st.schemaname and c.table_name = st.relname
|
FROM
|
||||||
LEFT JOIN
|
information_schema.columns c
|
||||||
pg_catalog.pg_description pgd on pgd.objoid = st.relid and pgd.objsubid = c.ordinal_position
|
JOIN
|
||||||
WHERE
|
information_schema.tables t ON c.table_name = t.table_name AND c.table_schema = t.table_schema
|
||||||
c.table_name = '{dataset_name}'
|
LEFT JOIN
|
||||||
AND c.table_schema = '{schema_name}'
|
pg_catalog.pg_statio_all_tables as st on c.table_schema = st.schemaname and c.table_name = st.relname
|
||||||
ORDER BY
|
LEFT JOIN
|
||||||
c.table_schema,
|
pg_catalog.pg_description pgd on pgd.objoid = st.relid and pgd.objsubid = c.ordinal_position
|
||||||
c.table_name,
|
WHERE
|
||||||
c.ordinal_position;"
|
c.table_name = '{dataset_name}'
|
||||||
|
AND c.table_schema = '{schema_name}'
|
||||||
|
AND t.table_type IN ('BASE TABLE', 'VIEW')
|
||||||
|
ORDER BY
|
||||||
|
c.ordinal_position;"
|
||||||
);
|
);
|
||||||
|
|
||||||
let cols = match sqlx::query_as::<_, DatasetColumnRecord>(&sql)
|
// Query for materialized views
|
||||||
|
let mv_sql = format!(
|
||||||
|
"SELECT
|
||||||
|
a.attname as name,
|
||||||
|
format_type(a.atttypid, a.atttypmod) as type_,
|
||||||
|
NOT a.attnotnull as nullable,
|
||||||
|
d.description as comment,
|
||||||
|
'MATERIALIZED_VIEW' as source_type
|
||||||
|
FROM pg_class c
|
||||||
|
JOIN pg_namespace n ON n.oid = c.relnamespace
|
||||||
|
JOIN pg_attribute a ON a.attrelid = c.oid
|
||||||
|
LEFT JOIN pg_description d ON d.objoid = c.oid AND d.objsubid = a.attnum
|
||||||
|
WHERE c.relkind = 'm'
|
||||||
|
AND n.nspname = '{schema_name}'
|
||||||
|
AND c.relname = '{dataset_name}'
|
||||||
|
AND a.attnum > 0
|
||||||
|
AND NOT a.attisdropped
|
||||||
|
ORDER BY a.attnum;"
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut cols = Vec::new();
|
||||||
|
|
||||||
|
// Get regular tables and views
|
||||||
|
let regular_cols = match sqlx::query_as::<_, DatasetColumnRecord>(®ular_sql)
|
||||||
.fetch_all(&postgres_conn)
|
.fetch_all(&postgres_conn)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(cols) => cols,
|
Ok(c) => c,
|
||||||
Err(e) => return Err(anyhow!("Error fetching columns: {:?}", e)),
|
Err(e) => return Err(anyhow!("Error fetching regular columns: {:?}", e)),
|
||||||
};
|
};
|
||||||
|
cols.extend(regular_cols);
|
||||||
|
|
||||||
|
// Get materialized view columns
|
||||||
|
let mv_cols = match sqlx::query_as::<_, DatasetColumnRecord>(&mv_sql)
|
||||||
|
.fetch_all(&postgres_conn)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(c) => c,
|
||||||
|
Err(e) => return Err(anyhow!("Error fetching materialized view columns: {:?}", e)),
|
||||||
|
};
|
||||||
|
cols.extend(mv_cols);
|
||||||
|
|
||||||
if let (Some(mut child_process), Some(tempfile)) = (child_process, tempfile) {
|
if let (Some(mut child_process), Some(tempfile)) = (child_process, tempfile) {
|
||||||
child_process.kill()?;
|
child_process.kill()?;
|
||||||
|
@ -198,16 +236,19 @@ async fn get_mysql_columns(
|
||||||
|
|
||||||
let sql = format!(
|
let sql = format!(
|
||||||
"SELECT
|
"SELECT
|
||||||
CAST(COLUMN_NAME AS CHAR) as name,
|
CAST(c.COLUMN_NAME AS CHAR) as name,
|
||||||
CAST(DATA_TYPE AS CHAR) as type_,
|
CAST(c.DATA_TYPE AS CHAR) as type_,
|
||||||
CASE WHEN IS_NULLABLE = 'YES' THEN true ELSE false END as nullable,
|
CASE WHEN c.IS_NULLABLE = 'YES' THEN true ELSE false END as nullable,
|
||||||
CAST(COLUMN_COMMENT AS CHAR) as comment
|
CAST(c.COLUMN_COMMENT AS CHAR) as comment,
|
||||||
|
CAST(t.TABLE_TYPE AS CHAR) as source_type
|
||||||
FROM
|
FROM
|
||||||
INFORMATION_SCHEMA.COLUMNS
|
INFORMATION_SCHEMA.COLUMNS c
|
||||||
|
JOIN
|
||||||
|
INFORMATION_SCHEMA.TABLES t ON c.TABLE_NAME = t.TABLE_NAME AND c.TABLE_SCHEMA = t.TABLE_SCHEMA
|
||||||
WHERE
|
WHERE
|
||||||
TABLE_NAME = '{}'
|
c.TABLE_NAME = '{}'
|
||||||
ORDER BY
|
ORDER BY
|
||||||
ORDINAL_POSITION;",
|
c.ORDINAL_POSITION;",
|
||||||
dataset_name
|
dataset_name
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -234,14 +275,35 @@ async fn get_bigquery_columns(
|
||||||
|
|
||||||
let sql = format!(
|
let sql = format!(
|
||||||
r#"
|
r#"
|
||||||
SELECT
|
WITH all_columns AS (
|
||||||
column_name AS name,
|
-- Regular tables and views
|
||||||
data_type AS type_,
|
SELECT
|
||||||
is_nullable = 'YES' AS nullable
|
column_name AS name,
|
||||||
FROM `region-us`.INFORMATION_SCHEMA.COLUMNS
|
data_type AS type_,
|
||||||
WHERE table_name = '{dataset_name}'
|
is_nullable = 'YES' AS nullable,
|
||||||
|
NULL as comment,
|
||||||
|
table_type as source_type
|
||||||
|
FROM `region-us`.INFORMATION_SCHEMA.COLUMNS c
|
||||||
|
JOIN `region-us`.INFORMATION_SCHEMA.TABLES t
|
||||||
|
USING(table_name, table_schema)
|
||||||
|
WHERE table_name = '{dataset_name}'
|
||||||
|
|
||||||
|
UNION ALL
|
||||||
|
|
||||||
|
-- Materialized views specific metadata if needed
|
||||||
|
SELECT
|
||||||
|
column_name AS name,
|
||||||
|
data_type AS type_,
|
||||||
|
is_nullable = 'YES' AS nullable,
|
||||||
|
NULL as comment,
|
||||||
|
'MATERIALIZED_VIEW' as source_type
|
||||||
|
FROM `region-us`.INFORMATION_SCHEMA.MATERIALIZED_VIEWS mv
|
||||||
|
JOIN `region-us`.INFORMATION_SCHEMA.COLUMNS c
|
||||||
|
USING(table_name, table_schema)
|
||||||
|
WHERE mv.table_name = '{dataset_name}'
|
||||||
|
)
|
||||||
|
SELECT * FROM all_columns
|
||||||
"#,
|
"#,
|
||||||
dataset_name = dataset_name
|
|
||||||
);
|
);
|
||||||
|
|
||||||
let query_request = QueryRequest {
|
let query_request = QueryRequest {
|
||||||
|
@ -256,7 +318,7 @@ async fn get_bigquery_columns(
|
||||||
.job()
|
.job()
|
||||||
.query(&project_id, query_request)
|
.query(&project_id, query_request)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| anyhow!("Error fetching table and views records: {:?}", e))?;
|
.map_err(|e| anyhow!("Error fetching columns: {:?}", e))?;
|
||||||
|
|
||||||
let mut columns = Vec::new();
|
let mut columns = Vec::new();
|
||||||
|
|
||||||
|
@ -284,11 +346,25 @@ async fn get_bigquery_columns(
|
||||||
.ok_or_else(|| anyhow!("Missing nullable value"))?
|
.ok_or_else(|| anyhow!("Missing nullable value"))?
|
||||||
.parse::<bool>()?;
|
.parse::<bool>()?;
|
||||||
|
|
||||||
|
let comment = cols[3]
|
||||||
|
.value
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(|s| s.to_string());
|
||||||
|
|
||||||
|
let source_type = cols[4]
|
||||||
|
.value
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or_else(|| anyhow!("Missing source type"))?
|
||||||
|
.to_string();
|
||||||
|
|
||||||
columns.push(DatasetColumnRecord {
|
columns.push(DatasetColumnRecord {
|
||||||
name,
|
name,
|
||||||
type_,
|
type_,
|
||||||
nullable,
|
nullable,
|
||||||
comment: None,
|
comment,
|
||||||
|
source_type,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -306,18 +382,44 @@ async fn get_snowflake_columns(
|
||||||
let uppercase_dataset_name = dataset_name.to_uppercase();
|
let uppercase_dataset_name = dataset_name.to_uppercase();
|
||||||
|
|
||||||
let sql = format!(
|
let sql = format!(
|
||||||
"SELECT
|
"WITH all_objects AS (
|
||||||
COLUMN_NAME AS name,
|
-- Regular tables and views
|
||||||
DATA_TYPE AS type_,
|
SELECT
|
||||||
CASE WHEN IS_NULLABLE = 'YES' THEN true ELSE false END AS nullable,
|
c.COLUMN_NAME AS name,
|
||||||
COMMENT AS comment
|
c.DATA_TYPE AS type_,
|
||||||
FROM
|
CASE WHEN c.IS_NULLABLE = 'YES' THEN true ELSE false END AS nullable,
|
||||||
INFORMATION_SCHEMA.COLUMNS
|
c.COMMENT AS comment,
|
||||||
WHERE
|
t.TABLE_TYPE as source_type
|
||||||
TABLE_NAME = '{}'
|
FROM
|
||||||
ORDER BY
|
INFORMATION_SCHEMA.COLUMNS c
|
||||||
ORDINAL_POSITION;",
|
JOIN
|
||||||
uppercase_dataset_name
|
INFORMATION_SCHEMA.TABLES t
|
||||||
|
ON c.TABLE_NAME = t.TABLE_NAME
|
||||||
|
AND c.TABLE_SCHEMA = t.TABLE_SCHEMA
|
||||||
|
WHERE
|
||||||
|
c.TABLE_NAME = '{uppercase_dataset_name}'
|
||||||
|
|
||||||
|
UNION ALL
|
||||||
|
|
||||||
|
-- Materialized views
|
||||||
|
SELECT
|
||||||
|
c.COLUMN_NAME AS name,
|
||||||
|
c.DATA_TYPE AS type_,
|
||||||
|
CASE WHEN c.IS_NULLABLE = 'YES' THEN true ELSE false END AS nullable,
|
||||||
|
c.COMMENT AS comment,
|
||||||
|
'MATERIALIZED_VIEW' as source_type
|
||||||
|
FROM
|
||||||
|
INFORMATION_SCHEMA.COLUMNS c
|
||||||
|
JOIN
|
||||||
|
INFORMATION_SCHEMA.VIEWS v
|
||||||
|
ON c.TABLE_NAME = v.TABLE_NAME
|
||||||
|
AND c.TABLE_SCHEMA = v.TABLE_SCHEMA
|
||||||
|
WHERE
|
||||||
|
c.TABLE_NAME = '{uppercase_dataset_name}'
|
||||||
|
AND v.IS_MATERIALIZED = 'YES'
|
||||||
|
)
|
||||||
|
SELECT * FROM all_objects
|
||||||
|
ORDER BY name;",
|
||||||
);
|
);
|
||||||
|
|
||||||
// Execute the query using the Snowflake client
|
// Execute the query using the Snowflake client
|
||||||
|
@ -385,6 +487,7 @@ async fn get_snowflake_columns(
|
||||||
type_,
|
type_,
|
||||||
nullable,
|
nullable,
|
||||||
comment,
|
comment,
|
||||||
|
source_type: "TABLE".to_string(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue