Merge branch 'evals' of https://github.com/buster-so/buster into evals

2025-04-16 14:40:45 -06:00 · 2025-04-16 14:40:45 -06:00 · 54ae8adc81
parent 4d2af2e29f 139c434c79
commit 54ae8adc81
4 changed files with 14 additions and 11 deletions
--- a/api/libs/agents/src/agents/modes/analysis.rs
+++ b/api/libs/agents/src/agents/modes/analysis.rs
@ -247,7 +247,7 @@ To conclude your worklow, you use the `finish_and_respond` tool to send a final
  - Format output for the specified visualization type.  
  - Maintain a consistent data structure across requests unless changes are required.  
  - Use explicit ordering for custom buckets or categories.
-
+  - Avoid division by zero errors by using NULLIF() or CASE statements (e.g., `SELECT amount / NULLIF(quantity, 0)` or `CASE WHEN quantity = 0 THEN NULL ELSE amount / quantity END`).
 ---

 You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved.
--- a/api/libs/agents/src/tools/categories/file_tools/common.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/common.rs
@ -110,9 +110,7 @@ pub const METRIC_YML_SCHEMA: &str = r##"
 # --- FIELD DETAILS & RULES --- 
 # `name`: Human-readable title (e.g., Total Sales). 
 #   - RULE: Should NOT contain underscores (`_`). Use spaces instead.
-#   - RULE: If using colons (`:`) or other special YAML chars, enclose the *entire* string in double quotes (`\"...\"`). Avoid if possible.
 # `description`: Detailed explanation of the metric. 
-#   - RULE: If using colons (`:`) or other special YAML chars, enclose the *entire* string in double quotes (`\"...\"`).
 # `datasetIds`: Array of Dataset UUIDs this metric uses. 
 #   - RULE: Use standard YAML array syntax (`- uuid`). 
 #   - RULE: UUIDs should NEVER be quoted.
@ -120,7 +118,6 @@ pub const METRIC_YML_SCHEMA: &str = r##"
 #     datasetIds:
 #       - 123e4567-e89b-12d3-a456-426614174000
 # `timeFrame`: Human-readable time period covered by the query (e.g., Last 30 days). 
-#   - RULE: If using colons (`:`) or other special YAML chars, enclose the *entire* string in double quotes (`\"...\"`).
 # `sql`: The SQL query for the metric.
 #   - RULE: MUST use the pipe `|` block scalar style to preserve formatting and newlines.
 #   - Example:
@ -140,6 +137,7 @@ pub const METRIC_YML_SCHEMA: &str = r##"
 # --- GENERAL YAML RULES ---
 # 1. Use standard YAML syntax (indentation, colons for key-value, `-` for arrays).
 # 2. Quoting: Generally avoid quotes for simple strings. Use double quotes (`"...") ONLY if a string contains special characters (like :, {, }, [, ], ,, &, *, #, ?, |, -, <, >, =, !, %, @, `) or needs to preserve leading/trailing whitespace. 
+# 3. Metric name should not contain `:`
 # -------------------------------------

 # --- FORMAL SCHEMA --- (Used for validation, reflects rules above)
@ -150,16 +148,19 @@ description: Metric definition with SQL query and visualization settings
 properties:
  # NAME
  name:
+    required: true
    type: string
-    description: Human-readable title (e.g., Total Sales). NO underscores. Follow quoting rules.
+    description: Human-readable title (e.g., Total Sales). NO underscores. Follow quoting rules. Should not contain `:`

  # DESCRIPTION
  description:
+    required: true
    type: string
-    description: Detailed description. Follow quoting rules.
+    description: Detailed description. Follow quoting rules. Should not contain `:`

  # DATASET IDS
  datasetIds:
+    required: true
    type: array
    description: UUIDs of datasets this metric belongs to (NEVER quoted).
    items:
@ -169,8 +170,9 @@ properties:
    
  # TIME FRAME
  timeFrame:
+    required: true
    type: string
-    description: Human-readable time period covered by the query. Follow quoting rules.
+    description: Human-readable time period covered by the query. Follow quoting rules. Should not contain `:`

  # SQL QUERY
  ### SQL Best Practices and Constraints** (when creating new metrics)  
@ -188,8 +190,10 @@ properties:
  #    - Maintain a consistent data structure across requests unless changes are required.  
  #    - Use explicit ordering for custom buckets or categories.
  #    - When grouping metrics by dates, default to monthly granularity for spans over 2 months, yearly for over 3 years, weekly for under 2 months, and daily for under a week, unless the user specifies a different granularity.
+  #    - Avoid division by zero errors by using NULLIF() or CASE statements (e.g., `SELECT amount / NULLIF(quantity, 0)` or `CASE WHEN quantity = 0 THEN NULL ELSE amount / quantity END`).
  ###
  sql:
+    required: true
    type: string
    description: |
      SQL query using YAML pipe syntax (|)
@ -198,6 +202,7 @@ properties:

  # CHART CONFIGURATION
  chartConfig:
+    required: true
    description: Visualization settings (must include selectedChartType, columnLabelFormats, and ONE chart-specific block)
    allOf: # Base requirements for ALL chart types
      - $ref: '#/definitions/base_chart_config'
--- a/api/libs/handlers/src/chats/post_chat_handler.rs
+++ b/api/libs/handlers/src/chats/post_chat_handler.rs
@ -2096,8 +2096,6 @@ fn transform_assistant_tool_message(

                // If parser returns a reasoning message (File type expected)
                if let Ok(Some(BusterReasoningMessage::File(mut file_reasoning))) = parse_result {
-                    // Set the secondary title using elapsed_duration when creating the initial message
-                    file_reasoning.secondary_title = format!("{} seconds", last_reasoning_completion_time.elapsed().as_secs()); // Use Delta
                    // Added missing variable initializations
                    let mut has_updates = false;
                    let mut updated_files_map = std::collections::HashMap::new();
--- a/api/libs/litellm/src/types.rs
+++ b/api/libs/litellm/src/types.rs
@ -15,7 +15,7 @@ pub struct ChatCompletionRequest {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub logit_bias: Option<HashMap<String, i32>>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub log_probs: Option<bool>,
+    pub logprobs: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_logprobs: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
@ -71,7 +71,7 @@ impl Default for ChatCompletionRequest {
            reasoning_effort: None,
            frequency_penalty: None,
            logit_bias: None,
-            log_probs: None,
+            logprobs: None,
            top_logprobs: None,
            max_completion_tokens: None,
            n: None,