From 8b765814547156c87daafe2e93095c17b57807ba Mon Sep 17 00:00:00 2001 From: dal Date: Mon, 5 May 2025 16:58:59 -0600 Subject: [PATCH 1/2] timeframe and extract type --- .../src/tools/categories/file_tools/common.rs | 37 ++++++------ .../file_tools/search_data_catalog.rs | 58 ++++++++++++++++++- 2 files changed, 74 insertions(+), 21 deletions(-) diff --git a/api/libs/agents/src/tools/categories/file_tools/common.rs b/api/libs/agents/src/tools/categories/file_tools/common.rs index 02636c9cb..aace3dc92 100644 --- a/api/libs/agents/src/tools/categories/file_tools/common.rs +++ b/api/libs/agents/src/tools/categories/file_tools/common.rs @@ -150,19 +150,17 @@ pub const METRIC_YML_SCHEMA: &str = r##" # # --- FIELD DETAILS & RULES --- # `name`: Human-readable title (e.g., Total Sales). -# - RULE: Should NOT contain underscores (`_`). Use spaces instead. +# - RULE: CANNOT contain underscores (`_`). Use spaces instead. # `description`: Detailed explanation of the metric. # `timeFrame`: Human-readable time period covered by the query, similar to a filter in a BI tool. -# RULE: Must accurately reflect the date/time filter used in the `sql` field. Do not misrepresent the time range. -# Examples: -# - Relative Dates: "Last 7 days", "Last 30 days", "Last Quarter", "Last Year", "Year to Date" -# - Fixed Dates: "June 1, 2025 - June 3, 2025", "2024", "Q2 2024" -# - Comparisons: Use the format "Comparison - [Period 1] vs [Period 2]". Examples: -# - "Comparison - Last 30 days vs Previous 30 days" -# - "Comparison - This Quarter vs Last Quarter" -# - "Comparison - 2024 vs 2023" -# - "Comparison - Q2 2024 vs Q2 2023" -# RULE: Follow general quoting rules. Should not contain ':'. +# - For queries with fixed date filters, use specific date ranges, e.g., "January 1, 2020 - December 31, 2020", "2024", "Q2 2024", "June 1, 2025". +# - For queries with relative date filters or no date filter, use relative terms, e.g., "Today", "Yesterday", "Last 7 days", "Last 30 days", "Last Quarter", "Last 12 Months", "Year to Date", "All time", etc. +# - For comparisons, use "Comparison: [Period 1] vs [Period 2]", with each period formatted according to whether it is fixed or relative, e.g., "Comparison: Last 30 days vs Previous 30 days" or "Comparison: June 1, 2025 - June 30, 2025 vs July 1, 2025 - July 31, 2025". +# Rules: +# - Must accurately reflect the date/time filter used in the `sql` field. Do not misrepresent the time range. +# - Use full month names for dates, e.g., "January", not "Jan". +# - Follow general quoting rules. CANNOT contain ':'. +# Note: Respond only with the time period, without explanation or additional copy. # `sql`: The SQL query for the metric. # - RULE: MUST use the pipe `|` block scalar style to preserve formatting and newlines. # - Example: @@ -182,7 +180,7 @@ pub const METRIC_YML_SCHEMA: &str = r##" # --- GENERAL YAML RULES --- # 1. Use standard YAML syntax (indentation, colons for key-value, `-` for arrays). # 2. Quoting: Generally avoid quotes for simple strings. Use double quotes (`"...") ONLY if a string contains special characters (like :, {, }, [, ], ,, &, *, #, ?, |, -, <, >, =, !, %, @, `) or needs to preserve leading/trailing whitespace. -# 3. Metric name or description should not contain `:` +# 3. Metric name, timeframe, or description CANNOT contain `:` # ------------------------------------- # --- FORMAL SCHEMA --- (Used for validation, reflects rules above) @@ -216,14 +214,13 @@ properties: Human-readable time period covered by the SQL query, similar to a filter in a BI tool. RULE: Must accurately reflect the date/time filter used in the `sql` field. Do not misrepresent the time range. Examples: - - Relative Dates: "Last 7 days", "Last 30 days", "Last Quarter", "Last Year", "Year to Date" - - Fixed Dates: "June 1, 2025 - June 3, 2025", "2024", "Q2 2024" - - Comparisons: Use the format "Comparison - [Period 1] vs [Period 2]". Examples: - - "Comparison - Last 30 days vs Previous 30 days" - - "Comparison - This Quarter vs Last Quarter" - - "Comparison - 2024 vs 2023" - - "Comparison - Q2 2024 vs Q2 2023" - RULE: Follow general quoting rules. Should not contain ':'. + - Fixed Dates: "January 1, 2020 - December 31, 2020", "2024", "Q2 2024", "June 1, 2025" + - Relative Dates: "Today", "Yesterday", "Last 7 days", "Last 30 days", "Last Quarter", "Last 12 Months", "Year to Date", "All time" + - Comparisons: Use the format "Comparison: [Period 1] vs [Period 2]". Examples: + - "Comparison: Last 30 days vs Previous 30 days" + - "Comparison: June 1, 2025 - June 30, 2025 vs July 1, 2025 - July 31, 2025" + RULE: Use full month names for dates, e.g., "January", not "Jan". + RULE: Follow general quoting rules. CANNOT contain ':'. # SQL QUERY ### SQL Best Practices and Constraints** (when creating new metrics) diff --git a/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs b/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs index df3ed0ed1..f4808b4fa 100644 --- a/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs +++ b/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs @@ -10,7 +10,12 @@ use cohere_rust::{ api::rerank::{ReRankModel, ReRankRequest}, Cohere, }; -use database::{pool::get_pg_pool, schema::datasets}; +use database::{ + enums::DataSourceType, + pool::get_pg_pool, + schema::datasets, + schema::data_sources, +}; use diesel::prelude::*; use diesel_async::RunQueryDsl; use futures::stream::{self, StreamExt}; @@ -392,6 +397,45 @@ impl ToolExecutor for SearchDataCatalogTool { ).await; debug!(data_source_id = %target_data_source_id, "Cached data source ID in agent state"); + // --- BEGIN: Spawn concurrent task to fetch data source syntax --- + let agent_clone = self.agent.clone(); // Clone Arc for the async block + let syntax_future = tokio::spawn(async move { + let result: Result = async { + let mut conn = get_pg_pool().get().await + .context("Failed to get DB connection for data source type lookup")?; + + let source_type = data_sources::table + .filter(data_sources::id.eq(target_data_source_id)) + .select(data_sources::type_) // <-- Use type_ as per user edit + .first::(&mut conn) // <-- Use corrected enum name + .await + .context(format!("Failed to find data source type for ID: {}", target_data_source_id))?; + + // Use the enum's to_string() method directly + let syntax_string = source_type.to_string(); + Ok(syntax_string) + }.await; + + // Set state inside the spawned task + match result { + Ok(syntax) => { + debug!(data_source_id = %target_data_source_id, syntax = %syntax, "Determined data source syntax concurrently"); + agent_clone.set_state_value( + "data_source_syntax".to_string(), + Value::String(syntax) + ).await; + }, + Err(e) => { + warn!(data_source_id = %target_data_source_id, error = %e, "Failed to determine data source syntax concurrently, setting state to null"); + agent_clone.set_state_value( + "data_source_syntax".to_string(), + Value::Null + ).await; + } + } + }); + // --- END: Spawn concurrent task to fetch data source syntax --- + // --- BEGIN REORDERED VALUE SEARCH --- // Extract value search terms @@ -716,6 +760,18 @@ impl ToolExecutor for SearchDataCatalogTool { updated_results.push(updated_result); } + // --- BEGIN: Wait for syntax future --- + // Ensure the syntax task completes before finishing. + if let Err(e) = syntax_future.await { + // Handle potential join errors (e.g., if the spawned task panicked) + warn!(error = %e, "Syntax fetching task failed to join"); + // Depending on requirements, you might want to return an error here + // or ensure the state is explicitly null if it didn't get set. + // For now, we'll just log the warning, as the task itself handles + // setting state to null on internal errors. + } + // --- END: Wait for syntax future --- + // Return the updated results let mut message = if updated_results.is_empty() { "No relevant datasets found after filtering.".to_string() From 8fd0ce820da6cea1d90ca9c110b15cce5a71b4d9 Mon Sep 17 00:00:00 2001 From: dal Date: Mon, 5 May 2025 17:35:36 -0600 Subject: [PATCH 2/2] sql dialect guidance and passing --- .../agents/src/agents/buster_multi_agent.rs | 33 ++++-- api/libs/agents/src/agents/modes/analysis.rs | 103 +++++++++++++++--- .../src/agents/modes/data_catalog_search.rs | 2 +- .../agents/src/agents/modes/initialization.rs | 2 +- api/libs/agents/src/agents/modes/planning.rs | 2 +- api/libs/agents/src/agents/modes/review.rs | 6 +- .../src/tools/categories/file_tools/common.rs | 20 ++-- 7 files changed, 128 insertions(+), 40 deletions(-) diff --git a/api/libs/agents/src/agents/buster_multi_agent.rs b/api/libs/agents/src/agents/buster_multi_agent.rs index ffa2eae43..7aae09edb 100644 --- a/api/libs/agents/src/agents/buster_multi_agent.rs +++ b/api/libs/agents/src/agents/buster_multi_agent.rs @@ -57,15 +57,31 @@ impl ModeProvider for BusterModeProvider { ) -> Result { let current_mode = determine_agent_state(state); + // Extract syntax (it might be None if not set yet, which is fine) + let data_source_syntax = state + .get("data_source_syntax") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + // Call the appropriate get_configuration function based on the mode + // Pass the extracted syntax (or None) to all modes let mode_config = match current_mode { - AgentState::Initializing => modes::initialization::get_configuration(&self.agent_data), - AgentState::DataCatalogSearch => { - modes::data_catalog_search::get_configuration(&self.agent_data) + AgentState::Initializing => { + modes::initialization::get_configuration(&self.agent_data, data_source_syntax) + } + AgentState::DataCatalogSearch => { + modes::data_catalog_search::get_configuration(&self.agent_data, data_source_syntax) + } + AgentState::Planning => { + modes::planning::get_configuration(&self.agent_data, data_source_syntax) + } + AgentState::AnalysisExecution => { + // Syntax is guaranteed to be extracted here or passed as None + modes::analysis::get_configuration(&self.agent_data, data_source_syntax) + } + AgentState::Review => { + modes::review::get_configuration(&self.agent_data, data_source_syntax) } - AgentState::Planning => modes::planning::get_configuration(&self.agent_data), - AgentState::AnalysisExecution => modes::analysis::get_configuration(&self.agent_data), - AgentState::Review => modes::review::get_configuration(&self.agent_data), }; Ok(mode_config) @@ -120,7 +136,8 @@ impl BusterMultiAgent { .into_iter() .filter_map(|ds| ds.yml_content) // Get Some(String), filter out None .map(|content| serde_yaml::from_str::(&content)) // Parse String -> Result - .filter_map(|result| { // Handle Result + .filter_map(|result| { + // Handle Result match result { Ok(parsed_root) => { // Extract info from the first model if available @@ -130,7 +147,7 @@ impl BusterMultiAgent { tracing::warn!("Parsed YAML has no models"); None } - }, + } Err(e) => { tracing::warn!("Failed to parse dataset YAML: {}", e); None // Filter out errors diff --git a/api/libs/agents/src/agents/modes/analysis.rs b/api/libs/agents/src/agents/modes/analysis.rs index b865bb494..5911613d6 100644 --- a/api/libs/agents/src/agents/modes/analysis.rs +++ b/api/libs/agents/src/agents/modes/analysis.rs @@ -24,12 +24,27 @@ use crate::tools::{ }; // Function to get the configuration for the AnalysisExecution mode -pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration { - // 1. Get the prompt, formatted with current data - let prompt = PROMPT.replace("{TODAYS_DATE}", &agent_data.todays_date); - // Note: This prompt doesn't use {DATASETS} +pub fn get_configuration(agent_data: &ModeAgentData, data_source_syntax: Option) -> ModeConfiguration { + // Determine SQL dialect guidance based on syntax + let syntax = data_source_syntax.as_deref().unwrap_or("postgres"); // Default to postgres + let sql_dialect_guidance = match syntax { + "snowflake" => SNOWFLAKE_DIALECT_GUIDANCE.to_string(), + "bigquery" => BIGQUERY_DIALECT_GUIDANCE.to_string(), + "redshift" => REDSHIFT_DIALECT_GUIDANCE.to_string(), + "mysql" | "mariadb" => MYSQL_MARIADB_DIALECT_GUIDANCE.to_string(), + "sqlserver" => SQLSERVER_DIALECT_GUIDANCE.to_string(), + "databricks" => DATABRICKS_DIALECT_GUIDANCE.to_string(), + "supabase" => POSTGRES_DIALECT_GUIDANCE.to_string(), // Supabase uses Postgres + "postgres" => POSTGRES_DIALECT_GUIDANCE.to_string(), // Explicit postgres case + _ => POSTGRES_DIALECT_GUIDANCE.to_string(), // Default to Postgres for any others + }; - // 2. Define the model for this mode (Using default based on original MODEL = None) + // 1. Get the prompt, formatted with current data and SQL guidance + let prompt = PROMPT + .replace("{TODAYS_DATE}", &agent_data.todays_date) + .replace("{SQL_DIALECT_GUIDANCE}", &sql_dialect_guidance); + + // 2. Define the model for this mode let model = "gemini-2.5-pro-exp-03-25".to_string(); // 3. Define the tool loader closure @@ -139,7 +154,7 @@ pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration { }) }); - // 4. Define terminating tools for this mode (From original load_tools) + // 4. Define terminating tools for this mode let terminating_tools = vec![Done::get_name()]; // 5. Construct and return the ModeConfiguration @@ -151,7 +166,71 @@ pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration { } } -// Keep the prompt constant, but it's no longer pub +// Placeholder for SQL dialect guidance +const POSTGRES_DIALECT_GUIDANCE: &str = r##" +- **Date/Time Functions (PostgreSQL/Supabase)**: + - **`DATE_TRUNC`**: Prefer `DATE_TRUNC('day', column)`, `DATE_TRUNC('week', column)`, `DATE_TRUNC('month', column)`, etc., for grouping time series data. Note that `'week'` starts on Monday. + - **`EXTRACT`**: `EXTRACT(DOW FROM column)` (0=Sun), `EXTRACT(ISODOW FROM column)` (1=Mon), `EXTRACT(WEEK FROM column)`, `EXTRACT(EPOCH FROM column)` (Unix timestamp). + - **Intervals**: Use `INTERVAL '1 day'`, `INTERVAL '1 month'`, etc. + - **Current Date/Time**: `CURRENT_DATE`, `CURRENT_TIMESTAMP`, `NOW()`. +"##; + +const SNOWFLAKE_DIALECT_GUIDANCE: &str = r##" +- **Date/Time Functions (Snowflake)**: + - **`DATE_TRUNC`**: Similar usage: `DATE_TRUNC('DAY', column)`, `DATE_TRUNC('WEEK', column)`, `DATE_TRUNC('MONTH', column)`. Week start depends on `WEEK_START` parameter (default Sunday). + - **`EXTRACT`**: `EXTRACT(dayofweek FROM column)` (0=Sun), `EXTRACT(dayofweekiso FROM column)` (1=Mon), `EXTRACT(weekiso FROM column)`. Use `DATE_PART` for more options (e.g., `DATE_PART('epoch_second', column)`). + - **DateAdd/DateDiff**: Use `DATEADD(day, 1, column)`, `DATEDIFF(day, start_date, end_date)`. + - **Intervals**: Use `INTERVAL '1 DAY'`, `INTERVAL '1 MONTH'`. + - **Current Date/Time**: `CURRENT_DATE()`, `CURRENT_TIMESTAMP()`, `SYSDATE()`. +"##; + +const BIGQUERY_DIALECT_GUIDANCE: &str = r##" +- **Date/Time Functions (BigQuery)**: + - **`DATE_TRUNC`**: `DATE_TRUNC(column, DAY)`, `DATE_TRUNC(column, WEEK)`, `DATE_TRUNC(column, MONTH)`, etc. Week starts Sunday by default, use `WEEK(MONDAY)` for Monday start. + - **`EXTRACT`**: `EXTRACT(DAYOFWEEK FROM column)` (1=Sun, 7=Sat), `EXTRACT(ISOWEEK FROM column)`. + - **DateAdd/DateDiff**: Use `DATE_ADD(column, INTERVAL 1 DAY)`, `DATE_SUB(column, INTERVAL 1 MONTH)`, `DATE_DIFF(end_date, start_date, DAY)`. + - **Intervals**: Use `INTERVAL 1 DAY`, `INTERVAL 1 MONTH`. + - **Current Date/Time**: `CURRENT_DATE()`, `CURRENT_TIMESTAMP()`, `CURRENT_DATETIME()`. +"##; + +// Add constants for other dialects +const REDSHIFT_DIALECT_GUIDANCE: &str = r##" +- **Date/Time Functions (Redshift)**: + - **`DATE_TRUNC`**: Similar to PostgreSQL: `DATE_TRUNC('day', column)`, `DATE_TRUNC('week', column)`, `DATE_TRUNC('month', column)`. Week starts Monday. + - **`EXTRACT`**: `EXTRACT(DOW FROM column)` (0=Sun), `EXTRACT(EPOCH FROM column)`. Also supports `DATE_PART` (e.g., `DATE_PART(w, column)` for week). + - **DateAdd/DateDiff**: Use `DATEADD(day, 1, column)`, `DATEDIFF(day, start_date, end_date)`. + - **Intervals**: Use `INTERVAL '1 day'`, `INTERVAL '1 month'`. + - **Current Date/Time**: `GETDATE()`, `CURRENT_DATE`, `SYSDATE`. +"##; + +const MYSQL_MARIADB_DIALECT_GUIDANCE: &str = r##" +- **Date/Time Functions (MySQL/MariaDB)**: + - **`DATE_FORMAT`**: Use `DATE_FORMAT(column, '%Y-%m-01')` for month truncation. For week, use `STR_TO_DATE(CONCAT(YEAR(column),'-',WEEK(column, 1),' Monday'), '%X-%V %W')` (Mode 1 starts week on Monday). + - **`EXTRACT`**: `EXTRACT(DAYOFWEEK FROM column)` (1=Sun, 7=Sat), `EXTRACT(WEEK FROM column)`. `UNIX_TIMESTAMP(column)` for epoch seconds. + - **DateAdd/DateDiff**: Use `DATE_ADD(column, INTERVAL 1 DAY)`, `DATE_SUB(column, INTERVAL 1 MONTH)`, `DATEDIFF(end_date, start_date)`. + - **Intervals**: Use `INTERVAL 1 DAY`, `INTERVAL 1 MONTH`. + - **Current Date/Time**: `CURDATE()`, `NOW()`, `CURRENT_TIMESTAMP`. +"##; + +const SQLSERVER_DIALECT_GUIDANCE: &str = r##" +- **Date/Time Functions (SQL Server)**: + - **`DATE_TRUNC`**: Available in recent versions: `DATE_TRUNC('day', column)`, `DATE_TRUNC('week', column)`, `DATE_TRUNC('month', column)`. Week start depends on `DATEFIRST` setting. + - **`DATEPART`**: `DATEPART(weekday, column)`, `DATEPART(iso_week, column)`, `DATEPART(epoch, column)` (requires user function usually). + - **DateAdd/DateDiff**: Use `DATEADD(day, 1, column)`, `DATEDIFF(day, start_date, end_date)`. + - **Intervals**: Generally handled by `DATEADD`/`DATEDIFF`. + - **Current Date/Time**: `GETDATE()`, `SYSDATETIME()`, `CURRENT_TIMESTAMP`. +"##; + +const DATABRICKS_DIALECT_GUIDANCE: &str = r##" +- **Date/Time Functions (Databricks SQL)**: + - **`DATE_TRUNC`**: `DATE_TRUNC('DAY', column)`, `DATE_TRUNC('WEEK', column)`, `DATE_TRUNC('MONTH', column)`. Week starts Monday. + - **`EXTRACT`**: `EXTRACT(DAYOFWEEK FROM column)` (1=Sun, 7=Sat), `EXTRACT(WEEK FROM column)`. `unix_timestamp(column)` for epoch seconds. + - **DateAdd/DateDiff**: Use `date_add(column, 1)`, `date_sub(column, 30)`, `datediff(end_date, start_date)`. + - **Intervals**: Use `INTERVAL 1 DAY`, `INTERVAL 1 MONTH`. + - **Current Date/Time**: `current_date()`, `current_timestamp()`. +"##; + +// Keep the prompt template constant, but add the guidance placeholder const PROMPT: &str = r##"### Role & Task You are Buster, an expert analytics and data engineer. Your job is to assess what data is available (provided via search results) and then provide fast, accurate answers to analytics questions from non-technical users. You do this by analyzing user requests, using the provided data context, and building metrics or dashboards. @@ -239,7 +318,10 @@ To conclude your worklow, you use the `finish_and_respond` tool to send a final --- ## SQL Best Practices and Constraints** (when creating new metrics) -- USE POSTGRESQL SYNTAX + +**Current SQL Dialect Guidance:** +{SQL_DIALECT_GUIDANCE} + - **Keep Queries Simple**: Strive for simplicity and clarity in your SQL. Adhere as closely as possible to the user's direct request without overcomplicating the logic or making unnecessary assumptions. - **Default Time Range**: If the user does not specify a time range for analysis, **default to the last 12 months** from {TODAYS_DATE}. Clearly state this assumption if making it. - **Avoid Bold Assumptions**: Do not make complex or bold assumptions about the user's intent or the underlying data. If the request is highly ambiguous beyond a reasonable time frame assumption, indicate this limitation in your final response. @@ -285,8 +367,3 @@ You MUST plan extensively before each function call, and reflect extensively on // No specific model override for analysis/execution mode pub const MODEL: Option<&str> = None; - -// Function to get the formatted prompt for this mode -pub fn get_prompt(todays_date: &str) -> String { - PROMPT.replace("{TODAYS_DATE}", todays_date) -} diff --git a/api/libs/agents/src/agents/modes/data_catalog_search.rs b/api/libs/agents/src/agents/modes/data_catalog_search.rs index 8db9865da..6067732be 100644 --- a/api/libs/agents/src/agents/modes/data_catalog_search.rs +++ b/api/libs/agents/src/agents/modes/data_catalog_search.rs @@ -21,7 +21,7 @@ use crate::tools::{ }; // Function to get the configuration for the DataCatalogSearch mode -pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration { +pub fn get_configuration(agent_data: &ModeAgentData, _data_source_syntax: Option) -> ModeConfiguration { // 1. Get the prompt, formatted with current data let prompt = DATA_CATALOG_SEARCH_PROMPT .replace("{DATASETS}", &agent_data.dataset_with_descriptions.join("\n\n")) // Deref Arc and Vec to get slice for join diff --git a/api/libs/agents/src/agents/modes/initialization.rs b/api/libs/agents/src/agents/modes/initialization.rs index 0c4ead2be..30f6a9fa2 100644 --- a/api/libs/agents/src/agents/modes/initialization.rs +++ b/api/libs/agents/src/agents/modes/initialization.rs @@ -18,7 +18,7 @@ use crate::tools::{ }; // Function to get the configuration for the Initialization mode -pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration { +pub fn get_configuration(agent_data: &ModeAgentData, _data_source_syntax: Option) -> ModeConfiguration { // 1. Get the prompt, formatted with current data let prompt = INTIALIZATION_PROMPT .replace("{DATASETS}", &agent_data.dataset_with_descriptions.join("\n\n")) diff --git a/api/libs/agents/src/agents/modes/planning.rs b/api/libs/agents/src/agents/modes/planning.rs index db739198e..c91c50296 100644 --- a/api/libs/agents/src/agents/modes/planning.rs +++ b/api/libs/agents/src/agents/modes/planning.rs @@ -21,7 +21,7 @@ use crate::tools::{ }; // Function to get the configuration for the Planning mode -pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration { +pub fn get_configuration(agent_data: &ModeAgentData, _data_source_syntax: Option) -> ModeConfiguration { // 1. Get the prompt, formatted with current data let prompt = PLANNING_PROMPT .replace("{TODAYS_DATE}", &agent_data.todays_date) diff --git a/api/libs/agents/src/agents/modes/review.rs b/api/libs/agents/src/agents/modes/review.rs index a332277a0..c819c3f58 100644 --- a/api/libs/agents/src/agents/modes/review.rs +++ b/api/libs/agents/src/agents/modes/review.rs @@ -19,12 +19,12 @@ use crate::tools::{ }; // Function to get the configuration for the Review mode -pub fn get_configuration(_agent_data: &ModeAgentData) -> ModeConfiguration { +pub fn get_configuration(_agent_data: &ModeAgentData, _data_source_syntax: Option) -> ModeConfiguration { // 1. Get the prompt (doesn't need formatting for this mode) - let prompt = REVIEW_PROMPT.to_string(); + let prompt = REVIEW_PROMPT.to_string(); // Use the correct constant // 2. Define the model for this mode (From original MODEL const) - let model = "gemini-2.0-flash-001".to_string(); + let model = "gemini-2.5-pro-exp-03-25".to_string(); // 3. Define the tool loader closure let tool_loader: Box< diff --git a/api/libs/agents/src/tools/categories/file_tools/common.rs b/api/libs/agents/src/tools/categories/file_tools/common.rs index aace3dc92..3d2770f2b 100644 --- a/api/libs/agents/src/tools/categories/file_tools/common.rs +++ b/api/libs/agents/src/tools/categories/file_tools/common.rs @@ -223,25 +223,19 @@ properties: RULE: Follow general quoting rules. CANNOT contain ':'. # SQL QUERY - ### SQL Best Practices and Constraints** (when creating new metrics) - # - **Constraints**: Only join tables with explicit entity relationships. - # - **SQL Requirements**: - # - Use schema-qualified table names (`..`). - # - Use fully qualified column names with table aliases (e.g., `.`). - # - Select specific columns (avoid `SELECT *` or `COUNT(*)`). - # - Use CTEs instead of subqueries, and use snake_case for naming them. - # - Use `DISTINCT` (not `DISTINCT ON`) with matching `GROUP BY`/`SORT BY` clauses. - # - Show entity names rather than just IDs. - # - Handle date conversions appropriately. - # - Order dates in ascending order. - # - Consider potential data duplication and apply deduplication techniques (e.g., `DISTINCT`, `GROUP BY`) where necessary. + # Describes how the SQL should be formatted within the YAML sql: required: true type: string description: | - SQL query using YAML pipe syntax (|) + SQL query using YAML pipe syntax (|). The SQL query should be formatted with proper indentation using the YAML pipe (|) syntax. This ensures the multi-line SQL is properly parsed while preserving whitespace and newlines. + Example: + sql: | + SELECT column1, column2 + FROM my_table + WHERE condition; # CHART CONFIGURATION chartConfig: