mirror of https://github.com/buster-so/buster.git
Merge pull request #292 from buster-so/dal/hotfixes
strict mode and ignore metadata func
This commit is contained in:
commit
792558a8f6
|
@ -652,7 +652,7 @@ impl Agent {
|
|||
session_id: thread_ref.id.to_string(),
|
||||
trace_id: Uuid::new_v4().to_string(),
|
||||
}),
|
||||
reasoning_effort: Some("high".to_string()),
|
||||
reasoning_effort: Some("medium".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
|
|
|
@ -178,7 +178,7 @@ impl BusterMultiAgent {
|
|||
let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
|
||||
"o4-mini".to_string()
|
||||
} else {
|
||||
"gemini-2.5-pro-exp-03-25".to_string()
|
||||
"o4-mini".to_string()
|
||||
};
|
||||
|
||||
// Create agent, passing the provider
|
||||
|
|
|
@ -49,7 +49,7 @@ pub fn get_configuration(agent_data: &ModeAgentData, data_source_syntax: Option<
|
|||
let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
|
||||
"o4-mini".to_string()
|
||||
} else {
|
||||
"gemini-2.5-pro-exp-03-25".to_string()
|
||||
"o4-mini".to_string()
|
||||
};
|
||||
|
||||
// 3. Define the tool loader closure
|
||||
|
@ -349,6 +349,12 @@ To conclude your worklow, you use the `finish_and_respond` tool to send a final
|
|||
- **SQL Requirements**:
|
||||
- Use database-qualified schema-qualified table names (`<DATABASE_NAME>.<SCHEMA_NAME>.<TABLE_NAME>`).
|
||||
- Use fully qualified column names with table aliases (e.g., `<table_alias>.<column>`).
|
||||
- **MANDATORY SQL NAMING CONVENTIONS**:
|
||||
- **All Table References**: MUST be fully qualified: `DATABASE_NAME.SCHEMA_NAME.TABLE_NAME`.
|
||||
- **All Column References**: MUST be qualified with their table alias (e.g., `alias.column_name`) or CTE name (e.g., `cte_alias.column_name_from_cte`).
|
||||
- **Inside CTE Definitions**: When defining a CTE (e.g., `WITH my_cte AS (SELECT t.column1 FROM DATABASE.SCHEMA.TABLE1 t ...)`), all columns selected from underlying database tables MUST use their table alias (e.g., `t.column1`, not just `column1`). This applies even if the CTE is simple and selects from only one table.
|
||||
- **Selecting From CTEs**: When selecting from a defined CTE, use the CTE's alias for its columns (e.g., `SELECT mc.column1 FROM my_cte mc ...`).
|
||||
- **Universal Application**: These naming conventions are strict requirements and apply universally to all parts of the SQL query, including every CTE definition and every subsequent SELECT statement. Non-compliance will lead to errors.
|
||||
- **Context Adherence**: Strictly use only columns that are present in the data context provided by search results. Never invent or assume columns.
|
||||
- Select specific columns (avoid `SELECT *` or `COUNT(*)`).
|
||||
- Use CTEs instead of subqueries, and use snake_case for naming them.
|
||||
|
|
|
@ -36,7 +36,7 @@ pub fn get_configuration(agent_data: &ModeAgentData, _data_source_syntax: Option
|
|||
let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
|
||||
"o4-mini".to_string()
|
||||
} else {
|
||||
"gemini-2.5-pro-exp-03-25".to_string()
|
||||
"o4-mini".to_string()
|
||||
};
|
||||
|
||||
// 3. Define the tool loader closure
|
||||
|
|
|
@ -41,7 +41,7 @@ pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration {
|
|||
if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
|
||||
"o4-mini".to_string()
|
||||
} else {
|
||||
"gemini-2.5-pro-exp-03-25".to_string()
|
||||
"o4-mini".to_string()
|
||||
};
|
||||
|
||||
// 3. Define the tool loader closure
|
||||
|
|
|
@ -39,7 +39,7 @@ pub fn get_configuration(
|
|||
if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
|
||||
"o4-mini".to_string()
|
||||
} else {
|
||||
"gemini-2.5-pro-exp-03-25".to_string()
|
||||
"o4-mini".to_string()
|
||||
};
|
||||
|
||||
// 3. Define the tool loader closure
|
||||
|
|
|
@ -39,7 +39,7 @@ pub fn get_configuration(
|
|||
if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
|
||||
"o4-mini".to_string()
|
||||
} else {
|
||||
"gemini-2.5-pro-exp-03-25".to_string()
|
||||
"o4-mini".to_string()
|
||||
};
|
||||
|
||||
// 3. Define the tool loader closure
|
||||
|
|
|
@ -31,7 +31,7 @@ pub fn get_configuration(
|
|||
if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
|
||||
"o4-mini".to_string()
|
||||
} else {
|
||||
"gemini-2.5-flash-preview-04-17".to_string()
|
||||
"gemini-2.0-flash-001".to_string()
|
||||
};
|
||||
|
||||
// 3. Define the tool loader closure
|
||||
|
|
|
@ -163,6 +163,7 @@ pub const METRIC_YML_SCHEMA: &str = r##"
|
|||
# Note: Respond only with the time period, without explanation or additional copy.
|
||||
# `sql`: The SQL query for the metric.
|
||||
# - RULE: MUST use the pipe `|` block scalar style to preserve formatting and newlines.
|
||||
# - NOTE: Remember to use fully qualified names: DATABASE_NAME.SCHEMA_NAME.TABLE_NAME for tables and table_alias.column for columns. This applies to all table and column references, including those within Common Table Expressions (CTEs) and when selecting from CTEs.
|
||||
# - Example:
|
||||
# sql: |
|
||||
# SELECT ...
|
||||
|
@ -231,6 +232,7 @@ properties:
|
|||
SQL query using YAML pipe syntax (|).
|
||||
The SQL query should be formatted with proper indentation using the YAML pipe (|) syntax.
|
||||
This ensures the multi-line SQL is properly parsed while preserving whitespace and newlines.
|
||||
IMPORTANT: Remember to use fully qualified names: DATABASE_NAME.SCHEMA_NAME.TABLE_NAME for tables and table_alias.column for columns. This rule is critical for all table and column references, including those within Common Table Expressions (CTEs) and when selecting from CTEs.
|
||||
Example:
|
||||
sql: |
|
||||
SELECT column1, column2
|
||||
|
|
|
@ -367,7 +367,7 @@ impl ToolExecutor for CreateDashboardFilesTool {
|
|||
async fn get_schema(&self) -> Value {
|
||||
serde_json::json!({
|
||||
"name": self.get_name(),
|
||||
"strict": false,
|
||||
"strict": true,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["files"],
|
||||
|
@ -377,7 +377,7 @@ impl ToolExecutor for CreateDashboardFilesTool {
|
|||
"items": {
|
||||
"type": "object",
|
||||
"required": ["name", "yml_content"],
|
||||
"strict": false,
|
||||
"strict": true,
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
|
|
|
@ -308,7 +308,7 @@ impl ToolExecutor for CreateMetricFilesTool {
|
|||
serde_json::json!({
|
||||
"name": self.get_name(),
|
||||
"description": get_create_metrics_description().await,
|
||||
"strict": false,
|
||||
"strict": true,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["files"],
|
||||
|
@ -318,7 +318,7 @@ impl ToolExecutor for CreateMetricFilesTool {
|
|||
"items": {
|
||||
"type": "object",
|
||||
"required": ["name", "yml_content"],
|
||||
"strict": false,
|
||||
"strict": true,
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
|
|
|
@ -382,7 +382,7 @@ impl ToolExecutor for ModifyDashboardFilesTool {
|
|||
serde_json::json!({
|
||||
"name": self.get_name(),
|
||||
"description": get_modify_dashboards_description().await,
|
||||
"strict": false,
|
||||
"strict": true,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["files"],
|
||||
|
@ -393,7 +393,7 @@ impl ToolExecutor for ModifyDashboardFilesTool {
|
|||
"items": {
|
||||
"type": "object",
|
||||
"required": ["id", "yml_content"],
|
||||
"strict": false,
|
||||
"strict": true,
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
|
|
|
@ -486,7 +486,7 @@ impl ToolExecutor for ModifyMetricFilesTool {
|
|||
serde_json::json!({
|
||||
"name": self.get_name(),
|
||||
"description": get_modify_metrics_description().await,
|
||||
"strict": false,
|
||||
"strict": true,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["files"],
|
||||
|
|
|
@ -87,7 +87,7 @@ impl ToolExecutor for CreatePlanInvestigative {
|
|||
serde_json::json!({
|
||||
"name": self.get_name(),
|
||||
"description": get_create_plan_investigative_description().await,
|
||||
"strict": false,
|
||||
"strict": true,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
|
|
|
@ -84,7 +84,7 @@ impl ToolExecutor for CreatePlanStraightforward {
|
|||
serde_json::json!({
|
||||
"name": self.get_name(),
|
||||
"description": get_create_plan_straightforward_description().await,
|
||||
"strict": false,
|
||||
"strict": true,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ChatCompletionRequest {
|
||||
|
@ -50,7 +51,7 @@ pub struct ChatCompletionRequest {
|
|||
pub parallel_tool_calls: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub user: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(skip_serializing_if = "should_skip_metadata")]
|
||||
pub metadata: Option<Metadata>,
|
||||
}
|
||||
|
||||
|
@ -1065,3 +1066,17 @@ mod tests {
|
|||
assert_eq!(deserialized_resp.usage.total_tokens, 99);
|
||||
}
|
||||
}
|
||||
|
||||
fn should_skip_metadata(metadata: &Option<Metadata>) -> bool {
|
||||
let env_var_condition_met = match env::var("LLM_BASE_URL") {
|
||||
Ok(val) => val == "https://api.openai.com/v1",
|
||||
Err(_) => false, // If env var is not set or any error, condition is not met
|
||||
};
|
||||
|
||||
if env_var_condition_met {
|
||||
return true; // If env var condition is met, always skip
|
||||
}
|
||||
|
||||
// If env var condition is not met, then skip only if metadata is None
|
||||
metadata.is_none()
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ use anyhow::Result;
|
|||
use rand;
|
||||
use sqlparser::ast::{
|
||||
Cte, Expr, Join, JoinConstraint, JoinOperator, ObjectName, Query, SelectItem, SetExpr,
|
||||
Statement, TableFactor, Visit, Visitor, WindowSpec,
|
||||
Statement, TableFactor, Visit, Visitor, WindowSpec, TableAlias,
|
||||
};
|
||||
use sqlparser::dialect::GenericDialect;
|
||||
use sqlparser::parser::Parser;
|
||||
|
@ -49,6 +49,7 @@ struct QueryAnalyzer {
|
|||
vague_columns: Vec<String>,
|
||||
vague_tables: Vec<String>,
|
||||
ctes: Vec<CteSummary>,
|
||||
current_select_list_aliases: HashSet<String>,
|
||||
}
|
||||
|
||||
impl QueryAnalyzer {
|
||||
|
@ -65,6 +66,7 @@ impl QueryAnalyzer {
|
|||
current_from_relation_identifier: None,
|
||||
current_scope_aliases: HashMap::new(),
|
||||
parent_scope_aliases: HashMap::new(),
|
||||
current_select_list_aliases: HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -207,6 +209,7 @@ impl QueryAnalyzer {
|
|||
// Process a SELECT query
|
||||
fn process_select_query(&mut self, select: &sqlparser::ast::Select) {
|
||||
self.current_scope_aliases.clear();
|
||||
self.current_select_list_aliases.clear();
|
||||
self.current_from_relation_identifier = None;
|
||||
|
||||
let mut join_conditions_to_visit: Vec<&Expr> = Vec::new();
|
||||
|
@ -223,6 +226,14 @@ impl QueryAnalyzer {
|
|||
}
|
||||
}
|
||||
|
||||
// Populate select list aliases *before* processing expressions in WHERE, GROUP BY, HAVING, or SELECT list itself
|
||||
// This makes them available for resolution in those clauses.
|
||||
for item in &select.projection {
|
||||
if let SelectItem::ExprWithAlias { alias, .. } = item {
|
||||
self.current_select_list_aliases.insert(alias.value.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Process all parts of the query with collected context
|
||||
let combined_aliases_for_visit = self
|
||||
.current_scope_aliases
|
||||
|
@ -1031,6 +1042,13 @@ impl QueryAnalyzer {
|
|||
}
|
||||
}
|
||||
None => {
|
||||
// Check if it's a known select list alias first
|
||||
if self.current_select_list_aliases.contains(column) {
|
||||
// It's a select list alias, consider it resolved for this scope.
|
||||
// No need to add to vague_columns or assign to a table.
|
||||
return;
|
||||
}
|
||||
|
||||
// Special handling for nested fields without qualifier
|
||||
// For example: "SELECT user.device.type" in BigQuery becomes "SELECT user__device__type"
|
||||
if dialect_nested {
|
||||
|
@ -1142,6 +1160,7 @@ impl QueryAnalyzer {
|
|||
scope_stack: self.scope_stack.clone(),
|
||||
parent_scope_aliases: HashMap::new(),
|
||||
current_scope_aliases: HashMap::new(),
|
||||
current_select_list_aliases: HashSet::new(),
|
||||
..QueryAnalyzer::new()
|
||||
}
|
||||
}
|
||||
|
@ -1172,12 +1191,11 @@ impl Visitor for QueryAnalyzer {
|
|||
type Break = ();
|
||||
|
||||
fn pre_visit_expr(&mut self, expr: &Expr) -> ControlFlow<Self::Break> {
|
||||
let available_aliases = self
|
||||
.current_scope_aliases
|
||||
.iter()
|
||||
.chain(self.parent_scope_aliases.iter())
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
let mut available_aliases = self.parent_scope_aliases.clone();
|
||||
available_aliases.extend(self.current_scope_aliases.clone());
|
||||
for alias in &self.current_select_list_aliases {
|
||||
available_aliases.insert(alias.clone(), alias.clone()); // Select list aliases map to themselves
|
||||
}
|
||||
|
||||
match expr {
|
||||
Expr::Identifier(ident) => {
|
||||
|
@ -1222,14 +1240,29 @@ impl QueryAnalyzer {
|
|||
sqlparser::ast::FunctionArg::Unnamed(arg_expr) => {
|
||||
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = arg_expr {
|
||||
self.visit_expr_with_parent_scope(expr, available_aliases);
|
||||
} else if let sqlparser::ast::FunctionArgExpr::QualifiedWildcard(name) = arg_expr {
|
||||
// Handle cases like COUNT(table.*)
|
||||
let qualifier = name.0.first().map(|i| i.value.clone()).unwrap_or_default();
|
||||
if !qualifier.is_empty() {
|
||||
if !available_aliases.contains_key(&qualifier) && // Check against combined available_aliases
|
||||
!self.tables.contains_key(&qualifier) &&
|
||||
!self.is_known_cte_definition(&qualifier) {
|
||||
self.vague_tables.push(qualifier);
|
||||
}
|
||||
}
|
||||
} else if let sqlparser::ast::FunctionArgExpr::Wildcard = arg_expr {
|
||||
// Handle COUNT(*) - no specific column to track here
|
||||
}
|
||||
}
|
||||
sqlparser::ast::FunctionArg::Named { arg: named_arg, .. } => {
|
||||
sqlparser::ast::FunctionArg::Named { name, arg: named_arg, operator: _ } => {
|
||||
// Argument name itself might be an identifier (though less common in SQL for this context)
|
||||
// self.add_column_reference(None, &name.value, &available_aliases);
|
||||
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = named_arg {
|
||||
self.visit_expr_with_parent_scope(expr, available_aliases);
|
||||
}
|
||||
}
|
||||
sqlparser::ast::FunctionArg::ExprNamed { arg: expr_named_arg, .. } => {
|
||||
sqlparser::ast::FunctionArg::ExprNamed { name, arg: expr_named_arg, operator: _ } => {
|
||||
// self.add_column_reference(None, &name.value, &available_aliases);
|
||||
if let sqlparser::ast::FunctionArgExpr::Expr(expr) = expr_named_arg {
|
||||
self.visit_expr_with_parent_scope(expr, available_aliases);
|
||||
}
|
||||
|
@ -1246,16 +1279,41 @@ impl QueryAnalyzer {
|
|||
..
|
||||
})) = &function.over
|
||||
{
|
||||
for expr in partition_by {
|
||||
self.visit_expr_with_parent_scope(expr, available_aliases);
|
||||
for expr_item in partition_by { // expr_item is &Expr
|
||||
self.visit_expr_with_parent_scope(expr_item, available_aliases);
|
||||
}
|
||||
for order_expr in order_by {
|
||||
self.visit_expr_with_parent_scope(&order_expr.expr, available_aliases);
|
||||
for order_expr_item in order_by { // order_expr_item is &OrderByExpr
|
||||
self.visit_expr_with_parent_scope(&order_expr_item.expr, available_aliases);
|
||||
}
|
||||
if let Some(frame) = window_frame {
|
||||
frame.start_bound.visit(self);
|
||||
// frame.start_bound and frame.end_bound are WindowFrameBound
|
||||
// which can contain Expr that needs visiting.
|
||||
// The default Visitor implementation should handle these if they are Expr.
|
||||
// However, sqlparser::ast::WindowFrameBound is not directly visitable.
|
||||
// We need to manually extract expressions from it.
|
||||
|
||||
// Example for start_bound:
|
||||
match &frame.start_bound {
|
||||
sqlparser::ast::WindowFrameBound::CurrentRow => {}
|
||||
sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) |
|
||||
sqlparser::ast::WindowFrameBound::Following(Some(expr)) => {
|
||||
self.visit_expr_with_parent_scope(expr, available_aliases);
|
||||
}
|
||||
sqlparser::ast::WindowFrameBound::Preceding(None) |
|
||||
sqlparser::ast::WindowFrameBound::Following(None) => {}
|
||||
}
|
||||
|
||||
// Example for end_bound:
|
||||
if let Some(end_bound) = &frame.end_bound {
|
||||
end_bound.visit(self);
|
||||
match end_bound {
|
||||
sqlparser::ast::WindowFrameBound::CurrentRow => {}
|
||||
sqlparser::ast::WindowFrameBound::Preceding(Some(expr)) |
|
||||
sqlparser::ast::WindowFrameBound::Following(Some(expr)) => {
|
||||
self.visit_expr_with_parent_scope(expr, available_aliases);
|
||||
}
|
||||
sqlparser::ast::WindowFrameBound::Preceding(None) |
|
||||
sqlparser::ast::WindowFrameBound::Following(None) => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,52 @@ async fn test_simple_query() {
|
|||
assert!(table.columns.contains("name"), "Missing 'name' column");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_complex_cte_with_date_function() {
|
||||
let sql = "WITH top5 AS (
|
||||
SELECT ptr.product_name, SUM(ptr.metric_producttotalrevenue) AS total_revenue
|
||||
FROM ont_ont.product_total_revenue AS ptr
|
||||
GROUP BY ptr.product_name
|
||||
ORDER BY total_revenue DESC
|
||||
LIMIT 5
|
||||
)
|
||||
SELECT
|
||||
MAKE_DATE(pqs.year::int, ((pqs.quarter - 1) * 3 + 1)::int, 1) AS quarter_start,
|
||||
pqs.product_name,
|
||||
SUM(pqs.metric_productquarterlysales) AS quarterly_revenue
|
||||
FROM ont_ont.product_quarterly_sales AS pqs
|
||||
JOIN top5 ON pqs.product_name = top5.product_name
|
||||
GROUP BY quarter_start, pqs.product_name
|
||||
ORDER BY quarter_start ASC, pqs.product_name;";
|
||||
|
||||
let result = analyze_query(sql.to_string()).await.unwrap();
|
||||
|
||||
// Check CTE
|
||||
assert_eq!(result.ctes.len(), 1);
|
||||
let cte = &result.ctes[0];
|
||||
assert_eq!(cte.name, "top5");
|
||||
assert_eq!(cte.summary.tables.len(), 1);
|
||||
assert_eq!(cte.summary.joins.len(), 0);
|
||||
|
||||
// Check main query tables
|
||||
assert_eq!(result.tables.len(), 2);
|
||||
let table_names: Vec<String> = result.tables.iter().map(|t| t.table_identifier.clone()).collect();
|
||||
assert!(table_names.contains(&"product_quarterly_sales".to_string()));
|
||||
assert!(table_names.contains(&"product_total_revenue".to_string()));
|
||||
|
||||
// Check joins
|
||||
assert_eq!(result.joins.len(), 1);
|
||||
let join = result.joins.iter().next().unwrap();
|
||||
assert_eq!(join.left_table, "product_quarterly_sales");
|
||||
assert_eq!(join.right_table, "product_total_revenue");
|
||||
|
||||
// Check schema identifiers
|
||||
for table in result.tables {
|
||||
assert_eq!(table.schema_identifier, Some("ont_ont".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_joins() {
|
||||
let sql =
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
dev:
|
||||
docker stop buster-redis-make || true && \
|
||||
docker rm buster-redis-make || true && \
|
||||
cd .. && docker run -d --name buster-redis-make -p 6379:6379 -v buster_redis_data:/data redis && cd api && \
|
||||
supabase start
|
||||
supabase db reset
|
||||
|
@ -26,6 +28,8 @@ stop:
|
|||
supabase stop
|
||||
|
||||
fast:
|
||||
docker stop buster-redis-make || true && \
|
||||
docker rm buster-redis-make || true && \
|
||||
cd .. && docker run -d --name buster-redis-make -p 6379:6379 -v buster_redis_data:/data redis && cd api && \
|
||||
export RUST_LOG=debug && \
|
||||
export CARGO_INCREMENTAL=1 && \
|
||||
|
|
Loading…
Reference in New Issue