Merge branch 'dallin/bus-920-feature-finish-rest-of-permissions' of https://github.com/buster-so/buster into dallin/bus-920-feature-finish-rest-of-permissions

This commit is contained in:
Nate Kelley 2025-01-22 10:32:38 -07:00
commit 2171e3edad
No known key found for this signature in database
GPG Key ID: FD90372AB8D98B4F
3 changed files with 332 additions and 95 deletions

View File

@ -11,7 +11,7 @@ anyhow = "1.0.86"
arrow = { version = "54.0.0", features = ["json"] }
async-compression = { version = "0.4.11", features = ["tokio"] }
axum = { version = "0.7.5", features = ["ws"] }
base64 = "0.22.1"
base64 = "0.21"
bb8-redis = "0.18.0"
chrono = { version = "0.4.38", features = ["serde"] }
cohere-rust = "0.6.0"

View File

@ -8,7 +8,8 @@ use crate::{
clients::supabase_vault::read_secret,
query_engine::{
credentials::{
BigqueryCredentials, DatabricksCredentials, MySqlCredentials, PostgresCredentials, SnowflakeCredentials, SqlServerCredentials,
BigqueryCredentials, DatabricksCredentials, MySqlCredentials, PostgresCredentials,
SnowflakeCredentials, SqlServerCredentials,
},
data_source_connections::{
get_bigquery_client::get_bigquery_client,
@ -228,7 +229,7 @@ async fn route_to_query(
DataSourceType::Snowflake => {
let credentials: SnowflakeCredentials = serde_json::from_str(&credentials_string)?;
let snowflake_client = match get_snowflake_client(&credentials).await {
let mut snowflake_client = match get_snowflake_client(&credentials).await {
Ok(snowflake_client) => snowflake_client,
Err(e) => {
tracing::error!("There was an issue while establishing a connection to the parent data source: {}", e);

View File

@ -1,8 +1,17 @@
use arrow::array::Array;
use arrow::array::{Array, AsArray};
use indexmap::IndexMap;
use arrow::array::{
BooleanArray, Int8Array, Int16Array, Int32Array, Int64Array,
UInt8Array, UInt16Array, UInt32Array, UInt64Array,
Float32Array, Float64Array, StringArray, LargeStringArray,
BinaryArray, LargeBinaryArray, Date32Array, Date64Array,
TimestampNanosecondArray, Decimal128Array, Decimal256Array,
FixedSizeBinaryArray,
};
use arrow::datatypes::TimeUnit;
use anyhow::{anyhow, Error};
use chrono::{LocalResult, TimeZone, Utc};
use chrono::{LocalResult, TimeZone, Utc, NaiveTime};
use snowflake_api::SnowflakeApi;
use serde_json::Value;
@ -10,7 +19,7 @@ use serde_json::Value;
use crate::utils::query_engine::data_types::DataType;
pub async fn snowflake_query(
snowflake_client: SnowflakeApi,
mut snowflake_client: SnowflakeApi,
query: String,
) -> Result<Vec<IndexMap<std::string::String, DataType>>, Error> {
let rows = match snowflake_client.exec(&query).await {
@ -27,51 +36,324 @@ pub async fn snowflake_query(
.enumerate()
.map(|(col_idx, field)| {
let column = batch.column(col_idx);
let value = match column.data_type() {
arrow::datatypes::DataType::Int64 => {
let array = column
.as_any()
.downcast_ref::<arrow::array::Int64Array>()
.unwrap();
if array.is_null(row_idx) {
Value::Null
} else {
Value::Number(array.value(row_idx).into())
let data_type = match column.data_type() {
arrow::datatypes::DataType::Boolean => {
let array = column.as_any().downcast_ref::<BooleanArray>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Bool(Some(array.value(row_idx))) }
}
arrow::datatypes::DataType::Int8 => {
let array = column.as_any().downcast_ref::<Int8Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Int2(Some(array.value(row_idx) as i16)) }
}
arrow::datatypes::DataType::Int16 => {
let array = column.as_any().downcast_ref::<Int16Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Int2(Some(array.value(row_idx))) }
}
arrow::datatypes::DataType::Int32 => {
let array = column.as_any().downcast_ref::<Int32Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Int4(Some(array.value(row_idx))) }
}
arrow::datatypes::DataType::Int64 => {
let array = column.as_any().downcast_ref::<Int64Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Int8(Some(array.value(row_idx))) }
}
arrow::datatypes::DataType::UInt8 => {
let array = column.as_any().downcast_ref::<UInt8Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Int2(Some(array.value(row_idx) as i16)) }
}
arrow::datatypes::DataType::UInt16 => {
let array = column.as_any().downcast_ref::<UInt16Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Int4(Some(array.value(row_idx) as i32)) }
}
arrow::datatypes::DataType::UInt32 => {
let array = column.as_any().downcast_ref::<UInt32Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Int8(Some(array.value(row_idx) as i64)) }
}
arrow::datatypes::DataType::UInt64 => {
let array = column.as_any().downcast_ref::<UInt64Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Int8(Some(array.value(row_idx) as i64)) }
}
arrow::datatypes::DataType::Float32 => {
let array = column.as_any().downcast_ref::<Float32Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Float4(Some(array.value(row_idx))) }
}
arrow::datatypes::DataType::Float64 => {
let array = column
.as_any()
.downcast_ref::<arrow::array::Float64Array>()
.unwrap();
if array.is_null(row_idx) {
Value::Null
} else {
Value::Number(
serde_json::Number::from_f64(
array.value(row_idx),
)
.unwrap_or(serde_json::Number::from(0)),
)
}
let array = column.as_any().downcast_ref::<Float64Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Float8(Some(array.value(row_idx))) }
}
arrow::datatypes::DataType::Utf8 => {
let array = column
.as_any()
.downcast_ref::<arrow::array::StringArray>()
.unwrap();
if array.is_null(row_idx) {
Value::Null
let array = column.as_any().downcast_ref::<StringArray>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Text(Some(array.value(row_idx).to_string())) }
}
arrow::datatypes::DataType::LargeUtf8 => {
let array = column.as_any().downcast_ref::<LargeStringArray>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Text(Some(array.value(row_idx).to_string())) }
}
arrow::datatypes::DataType::Binary => {
let array = column.as_any().downcast_ref::<BinaryArray>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Bytea(Some(array.value(row_idx).to_vec())) }
}
arrow::datatypes::DataType::LargeBinary => {
let array = column.as_any().downcast_ref::<LargeBinaryArray>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Bytea(Some(array.value(row_idx).to_vec())) }
}
arrow::datatypes::DataType::Date32 => {
let array = column.as_any().downcast_ref::<Date32Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else {
let days = array.value(row_idx);
let timestamp = days as i64 * 24 * 60 * 60;
match Utc.timestamp_opt(timestamp, 0) {
LocalResult::Single(dt) => DataType::Date(Some(dt.date_naive())),
_ => DataType::Null,
}
}
}
arrow::datatypes::DataType::Date64 => {
let array = column.as_any().downcast_ref::<Date64Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else {
let millis = array.value(row_idx);
let secs = millis / 1000;
let nanos = ((millis % 1000) * 1_000_000) as u32;
match Utc.timestamp_opt(secs, nanos) {
LocalResult::Single(dt) => DataType::Date(Some(dt.date_naive())),
_ => DataType::Null,
}
}
}
arrow::datatypes::DataType::Timestamp(unit, tz) => {
let array = column.as_any().downcast_ref::<TimestampNanosecondArray>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else {
let nanos = array.value(row_idx);
let (secs, subsec_nanos) = match unit {
TimeUnit::Second => (nanos, 0),
TimeUnit::Millisecond => (nanos / 1000, (nanos % 1000) * 1_000_000),
TimeUnit::Microsecond => (nanos / 1_000_000, (nanos % 1_000_000) * 1000),
TimeUnit::Nanosecond => (nanos / 1_000_000_000, nanos % 1_000_000_000),
};
match Utc.timestamp_opt(secs as i64, subsec_nanos as u32) {
LocalResult::Single(dt) => match tz {
Some(_) => DataType::Timestamptz(Some(dt)),
None => DataType::Timestamp(Some(dt.naive_utc())),
},
_ => DataType::Null,
}
}
}
arrow::datatypes::DataType::Decimal128(precision, scale) => {
let array = column.as_any().downcast_ref::<Decimal128Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else {
let val = array.value(row_idx);
let scale_factor = 10_f64.powi(-(*scale as i32));
let float_val = val as f64 * scale_factor;
DataType::Float8(Some(float_val))
}
}
arrow::datatypes::DataType::Decimal256(precision, scale) => {
let array = column.as_any().downcast_ref::<Decimal256Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else {
let val = array.value(row_idx);
// Convert the i256 to string first to handle large numbers
let val_str = val.to_string();
if let Ok(float_val) = val_str.parse::<f64>() {
let scale_factor = 10_f64.powi(-(*scale as i32));
DataType::Float8(Some(float_val * scale_factor))
} else {
Value::String(array.value(row_idx).to_string())
DataType::Null
}
}
// Add other data types as needed
}
arrow::datatypes::DataType::Null => DataType::Null,
arrow::datatypes::DataType::Float16 => {
let array = column.as_any().downcast_ref::<Float32Array>().unwrap(); // Float16 gets converted to Float32 in Arrow
if array.is_null(row_idx) { DataType::Null }
else { DataType::Float4(Some(array.value(row_idx))) }
}
arrow::datatypes::DataType::Time32(time_unit) => {
let array = column.as_any().downcast_ref::<Int32Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else {
let val = array.value(row_idx);
let nanos = match time_unit {
TimeUnit::Second => val as i64 * 1_000_000_000,
TimeUnit::Millisecond => val as i64 * 1_000_000,
_ => val as i64,
};
let time = NaiveTime::from_num_seconds_from_midnight_opt(
(nanos / 1_000_000_000) as u32,
(nanos % 1_000_000_000) as u32,
);
match time {
Some(t) => DataType::Time(Some(t)),
None => DataType::Null,
}
}
}
arrow::datatypes::DataType::Time64(time_unit) => {
let array = column.as_any().downcast_ref::<Int64Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else {
let val = array.value(row_idx);
let nanos = match time_unit {
TimeUnit::Microsecond => val * 1000,
TimeUnit::Nanosecond => val,
_ => val * 1_000_000_000,
};
let time = NaiveTime::from_num_seconds_from_midnight_opt(
(nanos / 1_000_000_000) as u32,
(nanos % 1_000_000_000) as u32,
);
match time {
Some(t) => DataType::Time(Some(t)),
None => DataType::Null,
}
}
}
arrow::datatypes::DataType::Duration(_) => {
// Convert duration to milliseconds as float for consistency
let array = column.as_any().downcast_ref::<Int64Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Float8(Some(array.value(row_idx) as f64)) }
}
arrow::datatypes::DataType::Interval(_) => {
// Convert interval to a string representation
let array = column.as_any().downcast_ref::<Int64Array>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Text(Some(array.value(row_idx).to_string())) }
}
arrow::datatypes::DataType::FixedSizeBinary(_) => {
let array = column.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Bytea(Some(array.value(row_idx).to_vec())) }
}
arrow::datatypes::DataType::BinaryView => {
// BinaryView is similar to Binary
let array = column.as_any().downcast_ref::<BinaryArray>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Bytea(Some(array.value(row_idx).to_vec())) }
}
arrow::datatypes::DataType::Utf8View => {
// Utf8View is similar to Utf8
let array = column.as_any().downcast_ref::<StringArray>().unwrap();
if array.is_null(row_idx) { DataType::Null }
else { DataType::Text(Some(array.value(row_idx).to_string())) }
}
arrow::datatypes::DataType::List(_) |
arrow::datatypes::DataType::ListView(_) |
arrow::datatypes::DataType::FixedSizeList(_, _) |
arrow::datatypes::DataType::LargeList(_) |
arrow::datatypes::DataType::LargeListView(_) => {
let list_array = column.as_any().downcast_ref::<arrow::array::ListArray>().unwrap();
if list_array.is_null(row_idx) { DataType::Null }
else {
let values = list_array.value(row_idx);
let json_array = Value::Array(
(0..values.len())
.filter_map(|i| {
if values.is_null(i) {
None
} else if let Some(num) = values.as_any().downcast_ref::<Int32Array>() {
Some(Value::Number(num.value(i).into()))
} else if let Some(num) = values.as_any().downcast_ref::<Int64Array>() {
Some(Value::Number(num.value(i).into()))
} else if let Some(str) = values.as_any().downcast_ref::<StringArray>() {
Some(Value::String(str.value(i).to_string()))
} else {
None
}
})
.collect()
);
DataType::Json(Some(json_array))
}
}
arrow::datatypes::DataType::Struct(fields) => {
let struct_array = column.as_any().downcast_ref::<arrow::array::StructArray>().unwrap();
if struct_array.is_null(row_idx) { DataType::Null }
else {
let mut map = serde_json::Map::new();
for (field, col) in fields.iter().zip(struct_array.columns().iter()) {
let field_name = field.name();
let value = match col.data_type() {
arrow::datatypes::DataType::Int32 => {
let array = col.as_any().downcast_ref::<Int32Array>().unwrap();
if array.is_null(row_idx) { Value::Null }
else { Value::Number(array.value(row_idx).into()) }
}
// Add more field types as needed
_ => Value::Null,
};
(field.name().clone(), value)
map.insert(field_name.to_string(), value);
}
DataType::Json(Some(Value::Object(map)))
}
}
arrow::datatypes::DataType::Union(_, _) => {
// Unions are complex - convert to string representation
DataType::Text(Some("Union type not fully supported".to_string()))
}
arrow::datatypes::DataType::Dictionary(_, _) => {
let dict_array = column.as_any().downcast_ref::<arrow::array::DictionaryArray<arrow::datatypes::Int32Type>>().unwrap();
if dict_array.is_null(row_idx) { DataType::Null }
else {
let values = dict_array.values();
match values.data_type() {
arrow::datatypes::DataType::Utf8 => {
let string_values = values.as_any().downcast_ref::<StringArray>().unwrap();
let key = dict_array.keys().value(row_idx);
DataType::Text(Some(string_values.value(key as usize).to_string()))
}
_ => DataType::Text(Some("Unsupported dictionary type".to_string())),
}
}
}
arrow::datatypes::DataType::Map(_, _) => {
// Convert map to JSON object
let map_array = column.as_map();
if map_array.is_null(row_idx) { DataType::Null }
else {
let entries = map_array.value(row_idx);
let mut json_map = serde_json::Map::new();
// Assuming string keys and numeric values for simplicity
for i in 0..entries.len() {
if let (Some(key), Some(value)) = (
entries.column(0).as_any().downcast_ref::<StringArray>().map(|arr| arr.value(i)),
entries.column(1).as_any().downcast_ref::<Int64Array>().map(|arr| arr.value(i))
) {
json_map.insert(key.to_string(), Value::Number(value.into()));
}
}
DataType::Json(Some(Value::Object(json_map)))
}
}
arrow::datatypes::DataType::RunEndEncoded(_, _) => {
// Convert run-length encoded data to its base type
// This is a simplified handling
DataType::Text(Some("Run-length encoded type not fully supported".to_string()))
}
};
(field.name().clone(), data_type)
})
.collect::<IndexMap<String, Value>>()
.collect::<IndexMap<String, DataType>>()
})
})
.collect()
@ -84,58 +366,12 @@ pub async fn snowflake_query(
}
};
let result = rows
.iter()
.map(|row| {
row.iter()
.map(|(key, value)| {
(
key.clone(),
match value {
Value::Null => DataType::Null,
Value::Bool(val) => DataType::Bool(Some(val.clone())),
Value::Number(val) => match val.as_i64() {
Some(int_val) => DataType::Int8(Some(int_val)),
None => match val.as_f64() {
Some(float_val) => DataType::Float8(Some(float_val)),
None => DataType::Null,
},
},
Value::String(val) => DataType::Text(Some(val.clone())),
Value::Array(_) => DataType::Null,
Value::Object(val) => {
tracing::debug!("OBJECT: {:#?}", val);
if let (
Some(&Value::Number(ref epoch)),
Some(&Value::Number(ref fraction)),
Some(&Value::Number(ref timezone)),
) = (val.get("epoch"), val.get("fraction"), val.get("timezone"))
{
if let (Some(epoch), Some(fraction), Some(_)) =
(epoch.as_i64(), fraction.as_i64(), timezone.as_i64())
{
match Utc.timestamp_opt(epoch, 0) {
LocalResult::Single(dt) => {
let nanos = fraction as u32;
let dt = dt
+ chrono::Duration::nanoseconds(nanos as i64);
DataType::Timestamptz(Some(dt))
}
_ => DataType::Null,
}
} else {
DataType::Null
}
} else {
DataType::Null
match snowflake_client.close_session().await {
Ok(_) => (),
Err(e) => {
tracing::error!("There was an issue while closing the snowflake client: {}", e);
}
}
},
)
})
.collect::<IndexMap<String, DataType>>()
})
.collect::<Vec<IndexMap<String, DataType>>>();
Ok(result)
Ok(rows)
}