From fc5736a4b315969bd2910a1662d1098cd2a9aacd Mon Sep 17 00:00:00 2001 From: dal Date: Wed, 9 Apr 2025 17:17:05 -0600 Subject: [PATCH 1/2] ok I think I fixed the ghost message --- api/libs/handlers/src/chats/post_chat_handler.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/api/libs/handlers/src/chats/post_chat_handler.rs b/api/libs/handlers/src/chats/post_chat_handler.rs index c6bcc5d5f..308c667da 100644 --- a/api/libs/handlers/src/chats/post_chat_handler.rs +++ b/api/libs/handlers/src/chats/post_chat_handler.rs @@ -210,11 +210,18 @@ pub async fn post_chat_handler( } if request.prompt.is_none() && asset_id.is_some() && asset_type.is_some() { + // Remove the initial empty message added by initialize_chat in this specific case + let message_id_str = message_id.to_string(); + chat_with_messages.messages.remove(&message_id_str); + chat_with_messages.message_ids.retain(|id| id != &message_id_str); + let asset_id_value = asset_id.unwrap(); let asset_type_value = asset_type.unwrap(); let messages = generate_asset_messages(asset_id_value, asset_type_value, &user).await?; + println!("messages: {:?}", messages); + // Add messages to chat and associate with chat_id let mut updated_messages = Vec::new(); for mut message in messages { From a9649b38a9981158393ec458ba3b0ab915b791c8 Mon Sep 17 00:00:00 2001 From: dal Date: Wed, 9 Apr 2025 17:58:20 -0600 Subject: [PATCH 2/2] nailed down model file --- cli/cli/src/commands/mod.rs | 6 +- cli/libs/semantic_layer/README.md | 82 ++++++++++++++++ .../semantic_layer/examples/model_file.yml | 95 +++++++++++++++++++ cli/libs/semantic_layer/spec.yml | 37 ++++++++ 4 files changed, 217 insertions(+), 3 deletions(-) create mode 100644 cli/libs/semantic_layer/README.md create mode 100644 cli/libs/semantic_layer/examples/model_file.yml create mode 100644 cli/libs/semantic_layer/spec.yml diff --git a/cli/cli/src/commands/mod.rs b/cli/cli/src/commands/mod.rs index f84a18f21..1704c27b0 100644 --- a/cli/cli/src/commands/mod.rs +++ b/cli/cli/src/commands/mod.rs @@ -2,11 +2,11 @@ pub mod auth; mod deploy; mod generate; mod init; -pub mod version; pub mod update; +pub mod version; -pub use auth::{auth, auth_with_args, AuthArgs}; +pub use auth::{auth_with_args, AuthArgs}; pub use deploy::deploy; -pub use generate::{GenerateCommand, generate}; +pub use generate::{generate, GenerateCommand}; pub use init::init; pub use update::UpdateCommand; diff --git a/cli/libs/semantic_layer/README.md b/cli/libs/semantic_layer/README.md new file mode 100644 index 000000000..e521c715c --- /dev/null +++ b/cli/libs/semantic_layer/README.md @@ -0,0 +1,82 @@ +# Model Schema Documentation + +This document describes a YAML-based schema for defining data models, designed for an AI data analyst (LLM) to interpret and generate SQL queries. The schema supports complex multi-table relationships, filters, and metrics, aiming for simplicity, clarity, and LLM usability while mirroring business entities (e.g., Palantir ontology inspiration). + +## Overview +- **Purpose**: Digitally clone a business by modeling entities (tables/models), their attributes, relationships, and analytical logic. +- **Key Features**: + - Modular models with dimensions, measures, metrics, filters, and entities. + - Multi-table `filters` and `metrics` using columns from related models via `entities`. + - Parameterized `filters` and `metrics` for dynamic queries. + - Structured `entities` for reliable join parsing. + +## Top-Level Structure +- **`models`**: Array of model objects, each representing a table or view. + +### Model Fields +- **`name`** (required, string): Unique model identifier (e.g., `culture`). +- **`description`** (optional, string): Human-readable description. + +### Dimensions +Filterable fields or identifiers (e.g., categorical attributes). + +- **`name`** (required, string): Column name in the data source. +- **`description`** (optional, string): Field explanation. +- **`type`** (optional, string): Data type (e.g., `character`); inferred if omitted. +- **`searchable`** (optional, boolean, default: `false`): Index for search. +- **`options`** (optional, array of strings, default: `null`): Valid values (e.g., `["active", "inactive"]`). + +### Measures +Raw quantitative fields for analysis. + +- **`name`** (required, string): Column name in the data source. +- **`description`** (optional, string): Field explanation. +- **`type`** (optional, string): Data type (e.g., `integer`); inferred if omitted. + +### Metrics +Aggregated or derived values, optionally parameterized. + +- **`name`** (required, string): Metric name (e.g., `total_revenue`). +- **`expr`** (required, string): Expression (e.g., `SUM(revenue)`). Can use `model.column` syntax (e.g., `logins.login_count`) for entity columns. +- **`description`** (optional, string): Metric explanation. +- **`args`** (optional, array of objects, default: `null`): + - **`name`** (required, string): Argument name (e.g., `days`). + - **`type`** (required, string): Data type (e.g., `integer`). + - **`description`** (optional, string): Argument purpose. +- **Notes**: For `many-to-many` relationships, pre-aggregate entity data to avoid duplication (e.g., use a subquery). + +### Filters +Reusable boolean conditions, optionally parameterized. + +- **`name`** (required, string): Filter name (e.g., `active_customer`). +- **`expr`** (required, string): Boolean expression (e.g., `login_count > 1`). Can use `model.column` for entity columns. +- **`description`** (optional, string): Filter explanation. +- **`args`** (optional, array of objects, default: `null`): + - **`name`** (required, string): Argument name. + - **`type`** (required, string): Data type. + - **`description`** (optional, string): Argument purpose. +- **Notes**: Use `EXISTS` or subqueries for `many-to-many` to preserve intent without duplication. + +### Entities +Relationships to other models, enabling multi-table joins. + +- **`name`** (required, string): Related model name (e.g., `logins`). +- **`primary_key`** (required, string): Current model’s join column (e.g., `cultureid`). +- **`foreign_key`** (required, string): Entity model’s join column (e.g., `cultureid`). +- **`type`** (optional, string): Join type (`LEFT`, `INNER`, `RIGHT`, `FULL`); LLM decides if omitted based on `expr` context. +- **`cardinality`** (optional, string, default: `null`): Relationship type (e.g., `one-to-many`, `many-to-many`). +- **`description`** (optional, string): Relationship explanation. +- **Notes**: + - Join is `. = .` with LLM-chosen `type`. + - `cardinality` hints at duplication risk (e.g., `many-to-many` may need subqueries). + +## SQL Compilation +- **Joins**: Use `entities` to join models, with LLM selecting `type` if unspecified. +- **Many-to-Many**: + - Pre-aggregate entity data (e.g., `GROUP BY cultureid`) or use `EXISTS` to avoid duplicating base rows. + - Example: `SELECT SUM(revenue) FROM culture WHERE EXISTS (SELECT 1 FROM culture_products WHERE ...)`. + +## Design Choices +- **Option 3**: `filters` and `metrics` can reference entity columns, reducing model sprawl. +- **Key Pairs**: `primary_key`/`foreign_key` over `join_on` for structured parsing and LLM ease. +- **Dynamic Joins**: Optional `type` lets the LLM adapt to query context, balancing flexibility and simplicity. \ No newline at end of file diff --git a/cli/libs/semantic_layer/examples/model_file.yml b/cli/libs/semantic_layer/examples/model_file.yml new file mode 100644 index 000000000..78e8d3914 --- /dev/null +++ b/cli/libs/semantic_layer/examples/model_file.yml @@ -0,0 +1,95 @@ +models: + # Base model representing cultural entities + - name: culture + description: Core model for cultural groups + dimensions: + - name: cultureid + description: Unique identifier for the culture + - name: name + description: Culture name + options: ["Western", "Eastern"] + measures: + - name: revenue + description: Revenue generated by the culture + filters: + # Complex filter using columns from logins and subscriptions + - name: active_subscribed_customer + expr: logins.login_count > {threshold} AND subscriptions.subscription_status = 'active' + args: + - name: threshold + type: integer + description: Minimum number of logins + description: Customers with logins above threshold and active subscription + metrics: + # Metric using entity columns, requires deduplication for many-to-many + - name: popular_product_revenue + expr: SUM(revenue) WHERE culture_products.product_count > 5 + description: Revenue from cultures with popular products + entities: + - name: logins + primary_key: cultureid + foreign_key: cultureid + type: LEFT # Explicitly set, but LLM could override + cardinality: one-to-many + description: Links to login activity + - name: subscriptions + primary_key: cultureid + foreign_key: cultureid + cardinality: one-to-one + description: Links to subscription data (no type, LLM decides) + - name: culture_products + primary_key: cultureid + foreign_key: cultureid + cardinality: many-to-many + description: Links to product associations (many-to-many via junction) + + # Model for login activity + - name: logins + description: Tracks user logins by culture + dimensions: + - name: cultureid + description: Foreign key to culture + measures: + - name: login_count + description: Number of logins + entities: + - name: culture + primary_key: cultureid + foreign_key: cultureid + cardinality: many-to-one + + # Model for subscriptions + - name: subscriptions + description: Subscription status for cultures + dimensions: + - name: cultureid + description: Foreign key to culture + - name: subscription_status + description: Current subscription status + options: ["active", "inactive"] + entities: + - name: culture + primary_key: cultureid + foreign_key: cultureid + cardinality: one-to-one + + # Junction model for many-to-many between culture and products + - name: culture_products + description: Junction table linking cultures to products + dimensions: + - name: cultureid + description: Foreign key to culture + - name: productid + description: Foreign key to products + measures: + - name: product_count + description: Number of products in this association + entities: + - name: culture + primary_key: cultureid + foreign_key: cultureid + cardinality: many-to-many + - name: products + primary_key: productid + foreign_key: productid + cardinality: many-to-many \ No newline at end of file diff --git a/cli/libs/semantic_layer/spec.yml b/cli/libs/semantic_layer/spec.yml new file mode 100644 index 000000000..e730eafcf --- /dev/null +++ b/cli/libs/semantic_layer/spec.yml @@ -0,0 +1,37 @@ +# Schema specification for the model structure +models: + - name: string # Required + description: string # Optional + dimensions: + - name: string # Required + description: string # Optional + type: string # Optional, inferred if omitted + searchable: boolean # Optional, default: false + options: [string] # Optional, default: null + measures: + - name: string # Required + description: string # Optional + type: string # Optional, inferred if omitted + metrics: + - name: string # Required + expr: string # Required, can use model.column from entities + description: string # Optional + args: # Optional, required only if expr contains arguments, default: null + - name: string # Required + type: string # Required + description: string # Optional + filters: + - name: string # Required + expr: string # Required, can use model.column from entities + description: string # Optional + args: # Optional, required only if expr contains arguments, default: null + - name: string # Required + type: string # Required + description: string # Optional + entities: + - name: string # Required + primary_key: string # Required + foreign_key: string # Required + type: string # Optional, e.g., "LEFT", "INNER"; LLM decides if omitted + cardinality: string # Optional, e.g., "one-to-many", "many-to-many" + description: string # Optional \ No newline at end of file