apply optional limit to all query routes in query engine

This commit is contained in:
dal 2025-04-01 12:10:15 -06:00
parent 8899fb8549
commit bf008d70b1
No known key found for this signature in database
GPG Key ID: 16F4B0E1E9F61122
9 changed files with 83 additions and 1476 deletions

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,10 @@
The Query Engine library provides connectivity and query execution functionality for various data sources in Buster. It abstracts away the details of connecting to different database systems, allows secure credential management, and provides a unified interface for executing queries across multiple database technologies.
## Result Limitations
All database queries are capped at a maximum of 5000 rows by default to ensure performance and prevent excessive resource usage. This limit can be overridden by passing a specific limit parameter when calling the query functions.
## Key Functionality
- Data source connection management for multiple database types
@ -58,13 +62,18 @@ src/
## Usage Patterns
```rust
use query_engine::data_source_query_routes::query_engine::{execute_query, QueryResult};
use query_engine::data_types::{DataSource, DataSourceType};
use query_engine::data_source_query_routes::query_engine::query_engine;
use query_engine::data_types::DataType;
use uuid::Uuid;
use indexmap::IndexMap;
async fn example_query(data_source: DataSource) -> Result<QueryResult, anyhow::Error> {
// Execute a query against a data source
let query = "SELECT * FROM users LIMIT 10";
let result = execute_query(&data_source, query).await?;
async fn example_query(data_source_id: &Uuid, sql: &str) -> Result<Vec<IndexMap<String, DataType>>, anyhow::Error> {
// Execute a query against a data source using the default 5000-row limit
let result = query_engine(data_source_id, sql, None).await?;
// Or specify a custom limit
let custom_limit = Some(1000);
let limited_result = query_engine(data_source_id, sql, custom_limit).await?;
Ok(result)
}

View File

@ -11,6 +11,7 @@ pub async fn bigquery_query(
client: Client,
project_id: String,
query: String,
limit: Option<i64>,
) -> Result<Vec<IndexMap<String, DataType>>> {
let query_request = QueryRequest {
connection_properties: None,
@ -19,7 +20,7 @@ pub async fn bigquery_query(
kind: None,
labels: None,
location: None,
max_results: Some(500),
max_results: Some(limit.unwrap_or(5000).min(i32::MAX as i64) as i32),
maximum_bytes_billed: None,
parameter_mode: None,
preserve_nulls: None,

View File

@ -10,6 +10,7 @@ use crate::{
pub async fn databricks_query(
databricks_client: Databricks,
query: String,
limit: Option<i64>,
) -> Result<Vec<IndexMap<std::string::String, DataType>>, Error> {
let results = match databricks_client.query(query).await {
Ok(results) => results,
@ -20,9 +21,10 @@ pub async fn databricks_query(
};
let mut result: Vec<IndexMap<String, DataType>> = Vec::new();
let max_rows = limit.unwrap_or(5000) as usize;
let rows = match results.result.data_array {
Some(rows) => rows,
Some(rows) => rows.into_iter().take(max_rows).collect::<Vec<_>>(),
None => return Ok(Vec::new()),
};

View File

@ -11,6 +11,7 @@ use crate::data_types::DataType;
pub async fn mysql_query(
pg_pool: Pool<MySql>,
query: String,
limit: Option<i64>,
) -> Result<Vec<IndexMap<std::string::String, DataType>>, Error> {
let mut stream = sqlx::query(&query).fetch(&pg_pool);
@ -61,7 +62,11 @@ pub async fn mysql_query(
}
count += 1;
if count >= 5000 {
if let Some(row_limit) = limit {
if count >= row_limit {
break;
}
} else if count >= 5000 {
break;
}
}

View File

@ -95,7 +95,7 @@ async fn route_to_query(
match redshift_query(redshift_client, sql.to_owned()).await {
match redshift_query(redshift_client, sql.to_owned(), limit).await {
Ok(results) => results,
Err(e) => {
tracing::error!("There was an issue while fetching the tables: {}", e);
@ -114,7 +114,7 @@ async fn route_to_query(
}
};
let results = match mysql_query(mysql_pool, sql.to_owned()).await {
let results = match mysql_query(mysql_pool, sql.to_owned(), limit).await {
Ok(results) => results,
Err(e) => {
tracing::error!("There was an issue while fetching the tables: {}", e);
@ -139,7 +139,7 @@ async fn route_to_query(
match bigquery_query(bq_client, project_id, sql.to_owned()).await {
match bigquery_query(bq_client, project_id, sql.to_owned(), limit).await {
Ok(results) => results,
Err(e) => {
tracing::error!("There was an issue while fetching the tables: {}", e);
@ -160,7 +160,7 @@ async fn route_to_query(
}
};
let results = match sql_server_query(sql_server_pool, sql.to_owned()).await {
let results = match sql_server_query(sql_server_pool, sql.to_owned(), limit).await {
Ok(results) => results,
Err(e) => {
tracing::error!("There was an issue while fetching the tables: {}", e);
@ -185,7 +185,7 @@ async fn route_to_query(
match databricks_query(databricks_client, sql.to_owned()).await {
match databricks_query(databricks_client, sql.to_owned(), limit).await {
Ok(results) => results,
Err(e) => {
tracing::error!("There was an issue while fetching the tables: {}", e);
@ -204,7 +204,7 @@ async fn route_to_query(
match snowflake_query(snowflake_client, sql.to_owned()).await {
match snowflake_query(snowflake_client, sql.to_owned(), limit).await {
Ok(results) => results,
Err(e) => {
tracing::error!("There was an issue while fetching the tables: {}", e);

View File

@ -11,6 +11,7 @@ use crate::data_types::DataType;
pub async fn redshift_query(
pg_pool: Pool<Postgres>,
query: String,
limit: Option<i64>,
) -> Result<Vec<IndexMap<std::string::String, DataType>>, Error> {
let mut stream = sqlx::query(&query).fetch(&pg_pool);
@ -56,7 +57,11 @@ pub async fn redshift_query(
result.push(row_map);
count += 1;
if count >= 1000 {
if let Some(row_limit) = limit {
if count >= row_limit {
break;
}
} else if count >= 5000 {
break;
}
}

View File

@ -115,12 +115,14 @@ fn handle_snowflake_timestamp_struct(
pub async fn snowflake_query(
mut snowflake_client: SnowflakeApi,
query: String,
limit: Option<i64>,
) -> Result<Vec<IndexMap<std::string::String, DataType>>, Error> {
const MAX_ROWS: usize = 1_000;
const DEFAULT_MAX_ROWS: usize = 5000;
let query_no_semicolon = query.trim_end_matches(';');
let max_rows = limit.map(|l| l as usize).unwrap_or(DEFAULT_MAX_ROWS);
let limited_query = if !query_no_semicolon.to_lowercase().contains("limit") {
format!("{} FETCH FIRST {} ROWS ONLY", query_no_semicolon, MAX_ROWS)
format!("{} FETCH FIRST {} ROWS ONLY", query_no_semicolon, max_rows)
} else {
query_no_semicolon.to_string()
};

View File

@ -10,6 +10,7 @@ use tokio_util::compat::Compat;
pub async fn sql_server_query(
mut client: Client<Compat<TcpStream>>,
query: String,
limit: Option<i64>,
) -> Result<Vec<IndexMap<std::string::String, DataType>>, Error> {
let rows = match client.query(query, &[]).await {
Ok(rows) => rows,
@ -22,7 +23,7 @@ pub async fn sql_server_query(
let mut result: Vec<IndexMap<String, DataType>> = Vec::new();
let query_result = match rows.into_first_result().await {
Ok(query_result) => query_result.into_iter().take(1000),
Ok(query_result) => query_result.into_iter().take(limit.unwrap_or(5000) as usize),
Err(e) => {
tracing::error!("Unable to fetch query result: {:?}", e);
let err = anyhow!("Unable to fetch query result: {}", e);