prompt improvements

2025-04-18 07:42:33 -06:00 · 2025-04-18 07:42:33 -06:00 · 670b56986d
parent 3bd13501f7
commit 670b56986d
5 changed files with 57 additions and 37 deletions
--- a/api/libs/agents/src/tools/categories/file_tools/common.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/common.rs
@ -156,7 +156,12 @@ properties:
  description:
    required: true
    type: string
-    description: Detailed description. Follow quoting rules. Should not contain `:`
+    description: |
+      A natural language description of the metric, essentially rephrasing the 'name' field as a question or statement. 
+      Example: If name is "Total Sales", description could be "What are the total sales?".
+      RULE: Should NOT describe the chart type, axes, or any visualization aspects.
+      RULE: Follow general quoting rules. 
+      RULE: Should not contain ':'.

  # DATASET IDS
  datasetIds:
@ -172,7 +177,16 @@ properties:
  timeFrame:
    required: true
    type: string
-    description: Human-readable time period covered by the query. Follow quoting rules. Should not contain `:`
+    description: |
+      Human-readable time period covered by the SQL query. 
+      RULE: Must accurately reflect the date/time filter used in the `sql` field. Do not misrepresent the time range.
+      - If the SQL uses fixed dates (e.g., `BETWEEN '2025-06-01' AND '2025-06-03'`), use specific dates: "June 1, 2025 - June 3, 2025".
+      - If the SQL uses dynamic relative dates (e.g., `created_at >= NOW() - INTERVAL '3 days'`), use relative terms: "Last 3 days".
+      - For comparisons between two periods, use the format "Comparison - [Period 1] vs [Period 2]". Examples:
+        - "Comparison - This Week vs Last Week"
+        - "Comparison - Q3 2024 vs Q3 2023"
+        - "Comparison - June 1, 2025 vs August 1, 2025"
+      RULE: Follow general quoting rules. Should not contain ':'.

  # SQL QUERY
  ### SQL Best Practices and Constraints** (when creating new metrics)  
@ -279,7 +293,7 @@ definitions:
          -
            currency # Note: The "$" sign is automatically prepended.
          -
-            percent # Note: The value is automatically multiplied by 100 and the "%" sign is appended.
+            percent # Note: "%" sign is appended. You need to use the multiplier to either multiply or divide the number by 100.
          - number
          - date
          - string
@ -297,13 +311,13 @@ definitions:
        description: Maximum number of fraction digits to display
      multiplier:
        type: number
-        description: Value to multiply the number by before display
+        description: Value to multiply the number by before display. This is useful for percentages.
      prefix:
        type: string
      suffix:
        type: string
      replaceMissingDataWith:
-        description: Value to display when data is missing, this should be set to null as default.
+        description: Value to display when data is missing, needs to be set to 0.
      compactNumbers:
        type: boolean
        description: Whether to display numbers in compact form (e.g., 1K, 1M)
@ -321,7 +335,12 @@ definitions:
        description: Whether to interpret dates as UTC
      convertNumberTo:
        type: string
-        description:  this is useful for converting numberic 1-12 into month names
+        description: Optional. Convert numeric values to time units or date parts.  This is a necessity for time series data when numbers are passed instead of the date.
+        enum:
+          - day_of_week
+          - month_of_year
+          - quarter
+
    required:
      - columnType
      - style
--- a/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/create_dashboards.rs
@ -396,7 +396,7 @@ async fn get_dashboard_yml_description() -> String {

 async fn get_dashboard_name_description() -> String {
    if env::var("USE_BRAINTRUST_PROMPTS").is_err() {
-        return "The name of the dashboard file to be created. Do not include the file extension.".to_string();
+        return "The natural language name/title for the dashboard, exactly matching the 'name' field within the YML content. This name will identify the dashboard in the UI. Do not include file extensions or use file path characters.".to_string();
    }

    let client = BraintrustClient::new(None, "96af8b2b-cf3c-494f-9092-44eb3d5b96ff").unwrap();
@ -404,7 +404,7 @@ async fn get_dashboard_name_description() -> String {
        Ok(message) => message,
        Err(e) => {
            eprintln!("Failed to get prompt system message: {}", e);
-            "The name of the dashboard file to be created. Do not include the file extension.".to_string()
+            "The natural language name/title for the dashboard, exactly matching the 'name' field within the YML content. This name will identify the dashboard in the UI. Do not include file extensions or use file path characters.".to_string()
        }
    }
 }
--- a/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs
+++ b/api/libs/agents/src/tools/categories/file_tools/create_metrics.rs
@ -312,7 +312,7 @@ async fn get_create_metrics_description() -> String {

 async fn get_metric_name_description() -> String {
    if env::var("USE_BRAINTRUST_PROMPTS").is_err() {
-        return "This is a natural language name/title for the metric. It will be used to identify the metric in the UI.".to_string();
+        return "The natural language name/title for the metric, exactly matching the 'name' field within the YML content. This name will identify the metric in the UI. Do not include file extensions or use file path characters.".to_string();
    }

    let client = BraintrustClient::new(None, "96af8b2b-cf3c-494f-9092-44eb3d5b96ff").unwrap();
@ -320,7 +320,7 @@ async fn get_metric_name_description() -> String {
        Ok(message) => message,
        Err(e) => {
            eprintln!("Failed to get prompt system message: {}", e);
-            "This is a natural language name/title for the metric. It will be used to identify the metric in the UI.".to_string()
+            "The natural language name/title for the metric, exactly matching the 'name' field within the YML content. This name will identify the metric in the UI. Do not include file extensions or use file path characters.".to_string()
        }
    }
 }
--- a/api/libs/database/src/types/metric_yml.rs
+++ b/api/libs/database/src/types/metric_yml.rs
@ -7,12 +7,12 @@ use diesel::{
    sql_types::Jsonb,
 };
 use indexmap::IndexMap;
+use lazy_static::lazy_static;
+use regex::Regex;
 use serde::{Deserialize, Serialize};
-use serde_json::Value;
+use serde_json::json;
 use std::io::Write;
 use uuid::Uuid;
-use regex::Regex;
-use lazy_static::lazy_static;

 // Helper function to sanitize string values for YAML
 fn sanitize_yaml_string(value: &str) -> String {
@ -76,7 +76,6 @@ pub enum ChartConfig {
    Table(TableChartConfig),
 }

-
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(rename_all = "camelCase")]
 #[serde(untagged)]
@ -201,7 +200,7 @@ impl ColumnLabelFormat {
            multiplier: None,
            prefix: None,
            suffix: None,
-            replace_missing_data_with: None,
+            replace_missing_data_with: Some(json!(0)),
            compact_numbers: None,
            currency: None,
            date_format: None,
@ -528,22 +527,23 @@ impl MetricYml {
        for (index, line) in yml_content.lines().enumerate() {
            // Store SQL line info for potential timeFrame insertion
            if sql_line_index.is_none() {
-                 if let Some(caps) = SQL_KEY_RE.captures(line) {
+                if let Some(caps) = SQL_KEY_RE.captures(line) {
                    sql_line_index = Some(index);
                    sql_line_indent = Some(caps.get(1).map_or("", |m| m.as_str()).to_string());
                }
            }

-            let current_indent = INDENT_RE.captures(line).map_or(0, |caps| {
-                caps.get(1).map_or(0, |m| m.as_str().len())
-            });
+            let current_indent = INDENT_RE
+                .captures(line)
+                .map_or(0, |caps| caps.get(1).map_or(0, |m| m.as_str().len()));

            // --- Colors Block Logic ---
            if in_colors_block && colors_indent.map_or(false, |indent| current_indent <= indent) {
                in_colors_block = false;
                colors_indent = None;
            }
-            if !in_colors_block { // Only check for start if not already in block
+            if !in_colors_block {
+                // Only check for start if not already in block
                if let Some(caps) = COLORS_START_RE.captures(line) {
                    in_colors_block = true;
                    colors_indent = Some(caps.get(1).map_or(0, |m| m.as_str().len()));
@ -558,14 +558,14 @@ impl MetricYml {
                    processed_lines.push(format!("{}'{}'", marker_part, color_part));
                    continue;
                } else {
-                     processed_lines.push(line.to_string()); // Add line within color block as is if not a hex item
-                     continue;
+                    processed_lines.push(line.to_string()); // Add line within color block as is if not a hex item
+                    continue;
                }
            }
            // --- End Colors Block Logic ---

            // --- String Sanitization & timeFrame Check ---
-             if let Some(caps) = SANITIZE_KEYS_RE.captures(line) {
+            if let Some(caps) = SANITIZE_KEYS_RE.captures(line) {
                let indent = caps.name("indent").map_or("", |m| m.as_str());
                let key = caps.name("key").map_or("", |m| m.as_str());
                let value = caps.name("value").map_or("", |m| m.as_str());
@ -589,15 +589,15 @@ impl MetricYml {
        // Insert default timeFrame if not found
        if !time_frame_found {
            if let Some(index) = sql_line_index {
-                 // Use the indent captured from the sql line
+                // Use the indent captured from the sql line
                let indent = sql_line_indent.unwrap_or_else(|| "  ".to_string()); // Default indent if sql indent capture failed
                processed_lines.insert(index, format!("{}timeFrame: 'all_time'", indent));
            } else {
                // Fallback: append if sql key wasn't found (shouldn't happen for valid metric)
                // Or maybe error out?
-                 eprintln!("Warning: sql key not found in metric YAML, cannot insert default timeFrame correctly.");
-                 // Append at end with default indent - might break YAML structure
-                 processed_lines.push("  timeFrame: 'all_time'".to_string()); 
+                eprintln!("Warning: sql key not found in metric YAML, cannot insert default timeFrame correctly.");
+                // Append at end with default indent - might break YAML structure
+                processed_lines.push("  timeFrame: 'all_time'".to_string());
            }
        }

--- a/api/libs/handlers/src/chats/post_chat_handler.rs
+++ b/api/libs/handlers/src/chats/post_chat_handler.rs
@ -2280,21 +2280,22 @@ pub fn normalize_asset_fields(request: &ChatCreateNewChat) -> (Option<Uuid>, Opt

 // Constants for title generation
 const TITLE_GENERATION_PROMPT: &str = r#"
-You are a conversation title generator. Your task is to generate a clear, concise, and descriptive title for a conversation based on the user messages and assistant responses provided.
+You are a conversation title generator. Your task is to generate a clear, concise title (3-10 words) that summarizes the **core subject matter** of the conversation, primarily based on the **most recent user message**.

 Guidelines:
-1. The title should be 3-10 words and should capture the core topic or intent of the conversation
-2. Focus on key topics, questions, or themes from the conversation
-3. Be specific rather than generic when possible
-4. Avoid phrases like "Conversation about..." or "Discussion on..."
-5. Don't include mentions of yourself in the title
-6. The title should make sense out of context
-7. Pay attention to the most recent messages to guide topic changes, etc.
+1.  **Focus on the Topic:** Identify the key nouns, concepts, or goals mentioned by the user, especially in their latest message. What is the conversation *about*?
+2.  **Prioritize Recent Request:** The title should strongly reflect the subject of the most recent user input.
+3.  **Be Specific & Concise:** Capture the essence in 3-10 words.
+4.  **AVOID Action Verbs/File Types:** Do NOT use words like "creating", "modifying", "updating", "dashboard", "metric", "file", "chart", "visualization" unless the user's core request was *specifically* about the process of creation/modification itself (rare). Instead, focus on *what* is being created or modified (e.g., "Monthly Sales Goals", not "Creating Metric for Monthly Sales Goals").
+5.  **Natural Language:** Phrase the title naturally.
+6.  **No Self-Reference:** Do not mention yourself (the assistant).
+7.  **Context Independent:** The title should make sense on its own.
+8.  **Example:** If the last user message is "Show me total revenue for Q3 as a line chart", a good title would be "Q3 Total Revenue" or "Quarter 3 Revenue Analysis". A bad title would be "Creating Q3 Revenue Line Chart".

-Conversation:
+Conversation History (Most recent messages are most important):
 {conversation_messages}

-Return only the title text with no additional formatting, explanation, quotes, new lines, special characters, etc.
+Return ONLY the generated title text. Do not include quotes, explanations, or any other text.
 "#;

 /// Generates a title for a conversation by processing user and assistant messages.