Merge pull request #285 from buster-so/evals

Evals
This commit is contained in:
dal 2025-05-08 00:57:10 -07:00 committed by GitHub
commit 54c88cfdda
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
41 changed files with 1838 additions and 657 deletions

View File

@ -1,128 +1,25 @@
# General Application Settings
ENVIRONMENT="development"
BUSTER_URL="http://web:3000"
# API VARS
ENVIRONMENT="local"
DATABASE_URL="postgresql://postgres:postgres@127.0.0.1:54322/postgres"
POOLER_URL="postgresql://postgres:postgres@127.0.0.1:54322/postgres"
JWT_SECRET="super-secret-jwt-token-with-at-least-32-characters-long"
REDIS_URL="redis://localhost:6379"
BUSTER_URL="http://localhost:3000"
BUSTER_WH_TOKEN="buster-wh-token"
LOG_LEVEL="debug"
SUPABASE_URL="http://localhost:54321"
SUPABASE_SERVICE_ROLE_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU"
# --- API Service Specific ---
# Direct Database Connection (for API service and potentially others)
DATABASE_URL="postgresql://postgres:your-super-secret-and-long-postgres-password@db:5432/postgres"
# Pooled Database Connection (for API service, uses Supavisor)
POOLER_URL="postgresql://postgres:your-super-secret-and-long-postgres-password@db:5432/postgres"
# Redis Connection
REDIS_URL="redis://buster-redis:6379"
# Supabase Connection for API service
SUPABASE_URL="http://kong:8000"
SUPABASE_SERVICE_ROLE_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q"
# AI VARS
RERANK_API_KEY="your_rerank_api_key"
RERANK_MODEL="rerank-v3.5"
RERANK_BASE_URL="https://api.cohere.com/v2/rerank"
LLM_API_KEY="your_llm_api_key"
LLM_BASE_URL="https://api.openai.com/v1"
# --- LLM / AI Services ---
EMBEDDING_PROVIDER="ollama"
EMBEDDING_MODEL="mxbai-embed-large"
COHERE_API_KEY=""
OPENAI_API_KEY="" # For OpenAI models or Supabase Studio assistant
LLM_API_KEY="test-key"
LLM_BASE_URL="http://litellm:4001"
# --- Web Client (Next.js) Specific ---
NEXT_PUBLIC_API_URL="http://localhost:3001" # External URL for the API service (buster-api)
NEXT_PUBLIC_URL="http://localhost:3000" # External URL for the Web service (buster-web)
NEXT_PUBLIC_SUPABASE_URL="http://localhost:8000" # External URL for Supabase (Kong proxy)
NEXT_PUBLIC_SUPABASE_ANON_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJhbm9uIiwKICAgICJpc3MiOiAic3VwYWJhc2UtZGVtbyIsCiAgICAiaWF0IjogMTY0MTc2OTIwMCwKICAgICJleHAiOiAxNzk5NTM1NjAwCn0.dc_X5iR_VP_qT0zsiyj_I_OZ2T9FtRU2BBNWN8Bu4GE"
NEXT_PRIVATE_SUPABASE_SERVICE_ROLE_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q"
#################################################
# Supabase Stack Configuration Variables
# These are primarily used by the Supabase services themselves
# (defined in supabase/docker-compose.yml)
# and are sourced from this .env file when `docker compose up` is run.
#################################################
############
# Secrets
############
POSTGRES_PASSWORD="your-super-secret-and-long-postgres-password"
JWT_SECRET="your-super-secret-jwt-token-with-at-least-32-characters-long"
ANON_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJhbm9uIiwKICAgICJpc3MiOiAic3VwYWJhc2UtZGVtbyIsCiAgICAiaWF0IjogMTY0MTc2OTIwMCwKICAgICJleHAiOiAxNzk5NTM1NjAwCn0.dc_X5iR_VP_qT0zsiyj_I_OZ2T9FtRU2BBNWN8Bu4GE"
SERVICE_ROLE_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q"
DASHBOARD_USERNAME="supabase"
DASHBOARD_PASSWORD="this_password_is_insecure_and_should_be_updated"
############
# Database
############
POSTGRES_HOST="db"
POSTGRES_DB="postgres"
POSTGRES_PORT="5432"
############
# Supavisor -- Database pooler
############
POOLER_PROXY_PORT_TRANSACTION="6543"
POOLER_DEFAULT_POOL_SIZE="20"
POOLER_MAX_CLIENT_CONN="100"
POOLER_TENANT_ID="your-tenant-id"
############
# API Proxy - Kong
############
KONG_HTTP_PORT="8000"
KONG_HTTPS_PORT="8443"
############
# API - PostgREST
############
PGRST_DB_SCHEMAS="public,storage,graphql_public"
############
# Auth - GoTrue
############
SITE_URL="http://localhost:3000" # Default base URL for the site (used in emails, etc.)
ADDITIONAL_REDIRECT_URLS=""
JWT_EXPIRY="3600"
DISABLE_SIGNUP="false"
API_EXTERNAL_URL="http://localhost:8000" # Publicly accessible URL for the Supabase API (via Kong)
## Mailer Config
MAILER_URLPATHS_CONFIRMATION="/auth/v1/verify"
MAILER_URLPATHS_INVITE="/auth/v1/verify"
MAILER_URLPATHS_RECOVERY="/auth/v1/verify"
MAILER_URLPATHS_EMAIL_CHANGE="/auth/v1/verify"
## Email auth
ENABLE_EMAIL_SIGNUP="true"
ENABLE_EMAIL_AUTOCONFIRM="false"
SMTP_ADMIN_EMAIL="admin@buster.so"
SMTP_HOST="supabase-mail"
SMTP_PORT="2500"
SMTP_USER=""
SMTP_PASS=""
SMTP_SENDER_NAME="Buster"
ENABLE_ANONYMOUS_USERS="true"
## Phone auth
ENABLE_PHONE_SIGNUP="true"
ENABLE_PHONE_AUTOCONFIRM="true"
############
# Studio - Supabase Dashboard
############
STUDIO_DEFAULT_ORGANIZATION="Default Organization"
STUDIO_DEFAULT_PROJECT="Default Project"
STUDIO_PORT="3003"
SUPABASE_PUBLIC_URL="http://localhost:8000" # Public URL for Supabase (Kong), used by Studio
# Image Proxy
IMGPROXY_ENABLE_WEBP_DETECTION="true"
############
# Functions - Supabase Edge Functions
############
FUNCTIONS_VERIFY_JWT="false"
############
# Logs - Logflare
############
LOGFLARE_LOGGER_BACKEND_API_KEY="your-super-secret-and-long-logflare-key"
LOGFLARE_API_KEY="your-super-secret-and-long-logflare-key"
DOCKER_SOCKET_LOCATION="/var/run/docker.sock"
GOOGLE_PROJECT_ID="GOOGLE_PROJECT_ID"
GOOGLE_PROJECT_NUMBER="GOOGLE_PROJECT_NUMBER"
# WEB VARS
NEXT_PUBLIC_API_URL="http://127.0.0.1:3001"
NEXT_PUBLIC_URL="http://localhost:3000"
NEXT_PUBLIC_SUPABASE_URL="http://127.0.0.1:54321"
NEXT_PUBLIC_SUPABASE_ANON_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6ImFub24iLCJleHAiOjE5ODM4MTI5OTZ9.CRXP1A7WOeoJeXxjNni43kdQwgnWNReilDMblYTn_I0"
NEXT_PUBLIC_WEB_SOCKET_URL="ws://127.0.0.1:3001"

View File

@ -31,10 +31,10 @@ jobs:
target: aarch64-apple-darwin
artifact_name: buster-cli-darwin-arm64.tar.gz
use_tar: true
- os: windows-latest
target: x86_64-pc-windows-msvc
artifact_name: buster-cli-windows-x86_64.zip
use_tar: false
# - os: windows-latest
# target: x86_64-pc-windows-msvc
# artifact_name: buster-cli-windows-x86_64.zip
# use_tar: false
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
@ -95,14 +95,14 @@ jobs:
else
sha256sum ${{ matrix.artifact_name }} > ${{ matrix.artifact_name }}.sha256
fi
- name: Compress binary (Windows)
if: matrix.use_tar == false
working-directory: ./cli
shell: pwsh
run: |
cd target/${{ matrix.target }}/release
Compress-Archive -Path buster-cli.exe -DestinationPath ${{ matrix.artifact_name }}
Get-FileHash -Algorithm SHA256 ${{ matrix.artifact_name }} | Select-Object -ExpandProperty Hash > ${{ matrix.artifact_name }}.sha256
# - name: Compress binary (Windows)
# if: matrix.use_tar == false
# working-directory: ./cli
# shell: pwsh
# run: |
# cd target/${{ matrix.target }}/release
# Compress-Archive -Path buster-cli.exe -DestinationPath ${{ matrix.artifact_name }}
# Get-FileHash -Algorithm SHA256 ${{ matrix.artifact_name }} | Select-Object -ExpandProperty Hash > ${{ matrix.artifact_name }}.sha256
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
@ -115,6 +115,9 @@ jobs:
release:
needs: build
runs-on: ubuntu-latest
outputs:
cli_version: ${{ steps.get_version.outputs.version }}
cli_tag_name: ${{ steps.create_the_release.outputs.tag_name }}
steps:
- name: Checkout code
uses: actions/checkout@v4
@ -131,6 +134,7 @@ jobs:
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Extracted version: $VERSION"
- name: Create Release
id: create_the_release
uses: softprops/action-gh-release@v1
with:
tag_name: v${{ steps.get_version.outputs.version }}
@ -142,8 +146,8 @@ jobs:
**/buster-cli-darwin-x86_64.tar.gz.sha256
**/buster-cli-darwin-arm64.tar.gz
**/buster-cli-darwin-arm64.tar.gz.sha256
**/buster-cli-windows-x86_64.zip
**/buster-cli-windows-x86_64.zip.sha256
# **/buster-cli-windows-x86_64.zip
# **/buster-cli-windows-x86_64.zip.sha256
draft: false
prerelease: false
generate_release_notes: true

View File

@ -94,6 +94,16 @@ jobs:
with:
ref: ${{ github.sha }}
- name: Docker meta for API
id: meta_api
uses: docker/metadata-action@v5
with:
images: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }}
tags: |
type=semver,pattern={{version}},value=${{ env.API_VERSION }}
type=sha,format=short
type=raw,value=latest
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@ -105,16 +115,32 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push API image
id: build_api_image_platform
uses: useblacksmith/build-push-action@v1
with:
context: ./api
file: ./api/Dockerfile
push: true
platforms: ${{ matrix.docker_platform }}
tags: |
${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }}:${{ env.API_VERSION }}-${{ matrix.platform }}
${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }}:${{ github.sha }}-${{ matrix.platform }}
${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }}:latest-${{ matrix.platform }}
tags: ${{ steps.meta_api.outputs.tags }}
labels: ${{ steps.meta_api.outputs.labels }}
outputs: type=image,name=${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }},push-by-digest=true,name-canonical=true
- name: Export API digest
run: |
mkdir -p ${{ runner.temp }}/digests
digest_full="${{ steps.build_api_image_platform.outputs.digest }}"
digest_sha="${digest_full#sha256:}"
echo "Digest SHA for API ${{ matrix.platform }}: ${digest_sha}"
echo "${digest_sha}" > "${{ runner.temp }}/digests/api-${{ matrix.platform }}.sha"
- name: Upload API digest file
uses: actions/upload-artifact@v4
with:
name: api-digest-${{ matrix.platform }}
path: ${{ runner.temp }}/digests/api-${{ matrix.platform }}.sha
if-no-files-found: error
retention-days: 1
- name: Set API Package Visibility to Public
env:
@ -139,6 +165,7 @@ jobs:
name: Build and Push Web Image
needs: prepare_docker_release_info
if: needs.prepare_docker_release_info.outputs.web_version_found == 'true'
environment: main
strategy:
fail-fast: false
matrix:
@ -159,6 +186,16 @@ jobs:
with:
ref: ${{ github.sha }}
- name: Docker meta for Web
id: meta_web
uses: docker/metadata-action@v5
with:
images: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }}
tags: |
type=semver,pattern={{version}},value=${{ env.WEB_VERSION }}
type=sha,format=short
type=raw,value=latest
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@ -170,21 +207,38 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Web image
id: build_web_image_platform
uses: useblacksmith/build-push-action@v1
with:
context: ./web
file: ./web/Dockerfile
push: true
platforms: ${{ matrix.docker_platform }}
tags: |
${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }}:${{ env.WEB_VERSION }}-${{ matrix.platform }}
${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }}:${{ github.sha }}-${{ matrix.platform }}
${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }}:latest-${{ matrix.platform }}
tags: ${{ steps.meta_web.outputs.tags }}
labels: ${{ steps.meta_web.outputs.labels }}
outputs: type=image,name=${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }},push-by-digest=true,name-canonical=true
build-args: |
NEXT_PUBLIC_API_URL=${{ secrets.NEXT_PUBLIC_API_URL }}
NEXT_PUBLIC_URL=${{ secrets.NEXT_PUBLIC_URL }}
NEXT_PUBLIC_SUPABASE_URL=${{ secrets.NEXT_PUBLIC_SUPABASE_URL }}
NEXT_PUBLIC_SUPABASE_ANON_KEY=${{ secrets.NEXT_PUBLIC_SUPABASE_ANON_KEY }}
NEXT_PUBLIC_WEB_SOCKET_URL=${{ secrets.NEXT_PUBLIC_WEB_SOCKET_URL }}
- name: Export Web digest
run: |
mkdir -p ${{ runner.temp }}/digests
digest_full="${{ steps.build_web_image_platform.outputs.digest }}"
digest_sha="${digest_full#sha256:}"
echo "Digest SHA for Web ${{ matrix.platform }}: ${digest_sha}"
echo "${digest_sha}" > "${{ runner.temp }}/digests/web-${{ matrix.platform }}.sha"
- name: Upload Web digest file
uses: actions/upload-artifact@v4
with:
name: web-digest-${{ matrix.platform }}
path: ${{ runner.temp }}/digests/web-${{ matrix.platform }}.sha
if-no-files-found: error
retention-days: 1
- name: Set Web Package Visibility to Public
env:
@ -204,3 +258,134 @@ jobs:
echo "Failed to set package $ORG_NAME/${{ env.WEB_IMAGE_NAME }} visibility to public. HTTP Status: $RESPONSE_CODE"
# Optionally, fail the step: exit 1
fi
merge_api_manifests:
name: Merge API Manifests
runs-on: blacksmith-4vcpu-ubuntu-2204
needs: [prepare_docker_release_info, build_and_push_api]
if: needs.prepare_docker_release_info.outputs.api_version_found == 'true'
steps:
- name: Download API digests
uses: actions/download-artifact@v4
with:
path: ${{ runner.temp }}/all_api_digests
pattern: api-digest-*
merge-multiple: true
- name: Log in to Docker Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker meta for API Manifest
id: meta_api_manifest
uses: docker/metadata-action@v5
with:
images: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }}
tags: |
type=semver,pattern={{version}},value=${{ needs.prepare_docker_release_info.outputs.api_version }}
type=sha,format=short
type=raw,value=latest
# Ensure DOCKER_METADATA_OUTPUT_JSON is populated for the next step
# outputs: |
# json
- name: Create and push API manifest list
env:
API_IMAGE_FULL_NAME: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }}
working-directory: ${{ runner.temp }}/all_api_digests
run: |
echo "Listing downloaded API digests in $(pwd):"
ls -lR .
TAG_ARGS=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")
echo "Generated tag arguments for API manifest: $TAG_ARGS"
DIGEST_FILES_FOUND=$(find . -type f -name '*.sha' -print)
if [ -z "$DIGEST_FILES_FOUND" ]; then
echo "Error: No API digest files (*.sha) found."
exit 1
fi
IMAGE_PLUS_DIGEST_ARGS=""
for digest_file_path in $DIGEST_FILES_FOUND; do
sha_value=$(cat "$digest_file_path")
IMAGE_PLUS_DIGEST_ARGS="$IMAGE_PLUS_DIGEST_ARGS ${API_IMAGE_FULL_NAME}@sha256:${sha_value}"
done
echo "API Manifest images with digests: $IMAGE_PLUS_DIGEST_ARGS"
if [ -z "$IMAGE_PLUS_DIGEST_ARGS" ]; then
echo "Error: No API digests were processed to create the manifest."
exit 1
fi
docker buildx imagetools create $TAG_ARGS $IMAGE_PLUS_DIGEST_ARGS
merge_web_manifests:
name: Merge Web Manifests
runs-on: blacksmith-4vcpu-ubuntu-2204
needs: [prepare_docker_release_info, build_and_push_web]
if: needs.prepare_docker_release_info.outputs.web_version_found == 'true'
steps:
- name: Download Web digests
uses: actions/download-artifact@v4
with:
path: ${{ runner.temp }}/all_web_digests
pattern: web-digest-*
merge-multiple: true
- name: Log in to Docker Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker meta for Web Manifest
id: meta_web_manifest
uses: docker/metadata-action@v5
with:
images: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }}
tags: |
type=semver,pattern={{version}},value=${{ needs.prepare_docker_release_info.outputs.web_version }}
type=sha,format=short
type=raw,value=latest
# outputs: |
# json
- name: Create and push Web manifest list
env:
WEB_IMAGE_FULL_NAME: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }}
working-directory: ${{ runner.temp }}/all_web_digests
run: |
echo "Listing downloaded Web digests in $(pwd):"
ls -lR .
TAG_ARGS=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")
echo "Generated tag arguments for Web manifest: $TAG_ARGS"
DIGEST_FILES_FOUND=$(find . -type f -name '*.sha' -print)
if [ -z "$DIGEST_FILES_FOUND" ]; then
echo "Error: No Web digest files (*.sha) found."
exit 1
fi
IMAGE_PLUS_DIGEST_ARGS=""
for digest_file_path in $DIGEST_FILES_FOUND; do
sha_value=$(cat "$digest_file_path")
IMAGE_PLUS_DIGEST_ARGS="$IMAGE_PLUS_DIGEST_ARGS ${WEB_IMAGE_FULL_NAME}@sha256:${sha_value}"
done
echo "Web Manifest images with digests: $IMAGE_PLUS_DIGEST_ARGS"
if [ -z "$IMAGE_PLUS_DIGEST_ARGS" ]; then
echo "Error: No Web digests were processed to create the manifest."
exit 1
fi
docker buildx imagetools create $TAG_ARGS $IMAGE_PLUS_DIGEST_ARGS

View File

@ -1,61 +0,0 @@
# Build stage for the API
FROM lukemathwalker/cargo-chef AS api-builder
WORKDIR /app/api
COPY api/ .
RUN cargo install diesel_cli --no-default-features --features postgres
RUN cargo build --release --bin bi_api
# Build stage for the web app
FROM node:18 AS web-builder
WORKDIR /app/web
COPY web/ .
RUN npm ci
RUN npm run build
RUN npm prune --production
# Final stage
FROM debian:bookworm-slim
WORKDIR /app
# Install runtime dependencies
RUN apt-get update && apt-get install -y \
ca-certificates \
curl \
postgresql-client \
libpq-dev \
nodejs \
npm \
&& rm -rf /var/lib/apt/lists/*
# Copy built artifacts
COPY --from=api-builder /app/api/target/release/bi_api ./api/
COPY --from=api-builder /usr/local/cargo/bin/diesel /usr/local/bin/diesel
COPY --from=web-builder /app/web/.next ./web/.next
COPY --from=web-builder /app/web/public ./web/public
COPY --from=web-builder /app/web/package.json ./web/
COPY --from=web-builder /app/web/node_modules ./web/node_modules
COPY docker-compose.yml .
COPY api/migrations ./migrations/
COPY api/diesel.toml .
# Copy entrypoint script
COPY <<EOF /app/entrypoint.sh
#!/bin/bash
set -e
until pg_isready -h db -p 5432; do
echo "Waiting for database to be ready..."
sleep 2
done
export DATABASE_URL="postgresql://\${POSTGRES_USER:-postgres}:\${POSTGRES_PASSWORD:-your-super-secret-password}@db:5432/\${POSTGRES_DB:-buster}"
echo "Running diesel migrations..."
diesel migration run
echo "Starting services..."
cd web && npm start & cd .. && docker-compose up
EOF
RUN chmod +x /app/entrypoint.sh
ENTRYPOINT ["/app/entrypoint.sh"]

35
Makefile Normal file
View File

@ -0,0 +1,35 @@
# Makefile (root)
.PHONY: dev api-dev-fg web-dev-fg stop
# Main development target: runs both API and Web dev servers in parallel.
# They will run until Ctrl+C is pressed.
dev:
@echo "Starting API and Web development servers..."
@echo "Press Ctrl+C to stop all."
# Start API dev server in the background
# The subshell ( ... ) ensures that 'cd' doesn't affect subsequent commands at this level.
(cd api && $(MAKE) dev) & \
# Start Web dev server in the background
(cd web && $(MAKE) dev) & \
# Wait for all background jobs of this shell to complete.
# Since dev servers run indefinitely, this 'wait' will also run indefinitely until interrupted (Ctrl+C).
wait
@echo "Development servers stopped or shell command finished."
# Target to stop API-specific services (like Docker containers, Supabase).
# The web dev server (npm run dev) is expected to be stopped when 'make dev' is interrupted (Ctrl+C).
stop:
@echo "Stopping API services (Redis, Supabase)..."
$(MAKE) -C api stop
@echo "API services stopped. If 'make dev' was running, web server should also be stopped."
# Individual targets if you want to run them separately (e.g., in different terminal tabs)
# These are foreground targets.
api-dev-fg:
@echo "Starting API development server (foreground)..."
cd api && $(MAKE) dev
web-dev-fg:
@echo "Starting Web development server (foreground)..."
cd web && $(MAKE) dev

View File

@ -36,6 +36,8 @@ tokio-retry = { workspace = true }
thiserror = { workspace = true }
raindrop = { path = "../raindrop" }
sql_analyzer = { path = "../sql_analyzer" }
rerank = { path = "../rerank" }
semantic_layer = { path = "../semantic_layer" }
# Development dependencies
[dev-dependencies]

View File

@ -4,6 +4,7 @@ use dataset_security::get_permissioned_datasets;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
use std::env;
use std::sync::Arc;
use tokio::sync::broadcast;
use uuid::Uuid;
@ -23,6 +24,9 @@ use crate::{agent::ModeProvider, Agent, AgentError, AgentExt, AgentThread}; // A
use litellm::AgentMessage;
// Import the semantic layer models
use semantic_layer::models::SemanticLayerSpec; // Assuming models.rs is accessible like this
// Import AgentState and determine_agent_state (assuming they are pub in modes/mod.rs or similar)
// If not, they might need to be moved or re-exported.
// For now, let's assume they are accessible via crate::agents::modes::{AgentState, determine_agent_state}
@ -34,6 +38,7 @@ pub struct BusterSuperAgentOutput {
pub duration: i64,
pub thread_id: Uuid,
pub messages: Vec<AgentMessage>,
pub message_id: Option<Uuid>,
}
#[derive(Debug, Deserialize, Serialize)]
@ -114,16 +119,16 @@ impl DatasetWithDescriptions {
}
// Define structs for YAML parsing
#[derive(Debug, Deserialize)]
struct YamlRoot {
models: Vec<ModelInfo>,
}
// #[derive(Debug, Deserialize)]
// struct YamlRoot {
// models: Vec<ModelInfo>,
// }
#[derive(Debug, Deserialize)]
struct ModelInfo {
name: String,
description: String,
}
// #[derive(Debug, Deserialize)]
// struct ModelInfo {
// name: String,
// description: String,
// }
impl BusterMultiAgent {
pub async fn new(user_id: Uuid, session_id: Uuid, is_follow_up: bool) -> Result<Self> {
@ -135,14 +140,19 @@ impl BusterMultiAgent {
let dataset_descriptions: Vec<String> = permissioned_datasets
.into_iter()
.filter_map(|ds| ds.yml_content) // Get Some(String), filter out None
.map(|content| serde_yaml::from_str::<YamlRoot>(&content)) // Parse String -> Result<YamlRoot, Error>
.map(|content| serde_yaml::from_str::<SemanticLayerSpec>(&content)) // Parse String -> Result<SemanticLayerSpec, Error>
.filter_map(|result| {
// Handle Result
match result {
Ok(parsed_root) => {
Ok(parsed_spec) => {
// Extract info from the first model if available
if let Some(model) = parsed_root.models.first() {
Some(format!("{}: {}", model.name, model.description))
if let Some(model) = parsed_spec.models.first() {
// model.description is Option<String>, handle it
let description = model
.description
.as_deref()
.unwrap_or("No description available");
Some(format!("{}: {}", model.name, description))
} else {
tracing::warn!("Parsed YAML has no models");
None
@ -165,9 +175,15 @@ impl BusterMultiAgent {
// Create the mode provider
let mode_provider = Arc::new(BusterModeProvider { agent_data });
let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"o4-mini".to_string()
} else {
"gemini-2.5-pro-exp-03-25".to_string()
};
// Create agent, passing the provider
let agent = Arc::new(Agent::new(
"gemini-2.5-pro-exp-03-25".to_string(), // Initial model (can be overridden by first mode)
model, // Initial model (can be overridden by first mode)
user_id,
session_id,
"buster_multi_agent".to_string(),

View File

@ -1,6 +1,6 @@
use anyhow::Result;
use serde_json::Value;
use std::collections::HashMap;
use std::{collections::HashMap, env};
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
@ -45,7 +45,12 @@ pub fn get_configuration(agent_data: &ModeAgentData, data_source_syntax: Option<
.replace("{SQL_DIALECT_GUIDANCE}", &sql_dialect_guidance);
// 2. Define the model for this mode
let model = "gemini-2.5-pro-exp-03-25".to_string();
let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"o4-mini".to_string()
} else {
"gemini-2.5-pro-exp-03-25".to_string()
};
// 3. Define the tool loader closure
let tool_loader: Box<

View File

@ -1,6 +1,6 @@
use anyhow::Result;
use serde_json::Value;
use std::collections::HashMap;
use std::{collections::HashMap, env};
use std::sync::Arc;
use std::pin::Pin;
use std::future::Future;
@ -32,7 +32,12 @@ pub fn get_configuration(agent_data: &ModeAgentData, _data_source_syntax: Option
// Note: This prompt doesn't use {TODAYS_DATE}
// 2. Define the model for this mode
let model = "gemini-2.5-pro-exp-03-25".to_string(); // Use gemini-2.5-pro-exp-03-25 as requested
let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"o4-mini".to_string()
} else {
"gemini-2.5-pro-exp-03-25".to_string()
};
// 3. Define the tool loader closure
let tool_loader: Box<dyn Fn(&Arc<Agent>) -> Pin<Box<dyn Future<Output = Result<()>> + Send>> + Send + Sync> =

View File

@ -1,9 +1,10 @@
use anyhow::Result;
use serde_json::Value;
use std::collections::HashMap;
use std::sync::Arc;
use std::pin::Pin;
use std::env;
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
// Import necessary types from the parent module (modes/mod.rs)
use super::{ModeAgentData, ModeConfiguration};
@ -13,102 +14,178 @@ use crate::{Agent, ToolExecutor};
use crate::tools::{
categories::{
file_tools::{
CreateDashboardFilesTool,
CreateMetricFilesTool,
ModifyDashboardFilesTool,
ModifyMetricFilesTool,
SearchDataCatalogTool,
},
planning_tools::{
CreatePlanInvestigative,
CreatePlanStraightforward,
},
response_tools::{
Done,
MessageUserClarifyingQuestion,
CreateDashboardFilesTool, CreateMetricFilesTool, ModifyDashboardFilesTool,
ModifyMetricFilesTool, SearchDataCatalogTool,
},
planning_tools::{CreatePlanInvestigative, CreatePlanStraightforward},
response_tools::{Done, MessageUserClarifyingQuestion},
utility_tools::no_search_needed::NoSearchNeededTool,
},
planning_tools::ReviewPlan,
IntoToolCallExecutor,
};
// Function to get the configuration for the FollowUpInitialization mode
pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration {
// 1. Get the prompt, formatted with current data
let prompt = FOLLOW_UP_INTIALIZATION_PROMPT
.replace("{DATASETS}", &agent_data.dataset_with_descriptions.join("\n\n"))
.replace(
"{DATASETS}",
&agent_data.dataset_with_descriptions.join("\n\n"),
)
.replace("{TODAYS_DATE}", &agent_data.todays_date);
// 2. Define the model for this mode (Using a default, adjust if needed)
let model = "gemini-2.5-pro-exp-03-25".to_string(); // Assuming default based on original MODEL = None
let model =
if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"o4-mini".to_string()
} else {
"gemini-2.5-pro-exp-03-25".to_string()
};
// 3. Define the tool loader closure
let tool_loader: Box<dyn Fn(&Arc<Agent>) -> Pin<Box<dyn Future<Output = Result<()>> + Send>> + Send + Sync> =
Box::new(|agent_arc: &Arc<Agent>| {
let agent_clone = Arc::clone(agent_arc); // Clone Arc for the async block
Box::pin(async move {
// Clear existing tools before loading mode-specific ones
agent_clone.clear_tools().await;
let tool_loader: Box<
dyn Fn(&Arc<Agent>) -> Pin<Box<dyn Future<Output = Result<()>> + Send>> + Send + Sync,
> = Box::new(|agent_arc: &Arc<Agent>| {
let agent_clone = Arc::clone(agent_arc); // Clone Arc for the async block
Box::pin(async move {
// Clear existing tools before loading mode-specific ones
agent_clone.clear_tools().await;
// Instantiate all potentially relevant tools for follow-up state
let search_data_catalog_tool = SearchDataCatalogTool::new(agent_clone.clone());
let no_search_needed_tool = NoSearchNeededTool::new(agent_clone.clone());
let create_plan_straightforward_tool = CreatePlanStraightforward::new(agent_clone.clone());
let create_plan_investigative_tool = CreatePlanInvestigative::new(agent_clone.clone());
let create_metric_files_tool = CreateMetricFilesTool::new(agent_clone.clone());
let modify_metric_files_tool = ModifyMetricFilesTool::new(agent_clone.clone());
let create_dashboard_files_tool = CreateDashboardFilesTool::new(agent_clone.clone());
let modify_dashboard_files_tool = ModifyDashboardFilesTool::new(agent_clone.clone());
let message_user_clarifying_question_tool = MessageUserClarifyingQuestion::new();
let done_tool = Done::new(agent_clone.clone());
let review_tool = ReviewPlan::new(agent_clone.clone());
// Instantiate all potentially relevant tools for follow-up state
let search_data_catalog_tool = SearchDataCatalogTool::new(agent_clone.clone());
let no_search_needed_tool = NoSearchNeededTool::new(agent_clone.clone());
let create_plan_straightforward_tool =
CreatePlanStraightforward::new(agent_clone.clone());
let create_plan_investigative_tool = CreatePlanInvestigative::new(agent_clone.clone());
let create_metric_files_tool = CreateMetricFilesTool::new(agent_clone.clone());
let modify_metric_files_tool = ModifyMetricFilesTool::new(agent_clone.clone());
let create_dashboard_files_tool = CreateDashboardFilesTool::new(agent_clone.clone());
let modify_dashboard_files_tool = ModifyDashboardFilesTool::new(agent_clone.clone());
let message_user_clarifying_question_tool = MessageUserClarifyingQuestion::new();
let done_tool = Done::new(agent_clone.clone());
let review_tool = ReviewPlan::new(agent_clone.clone());
// --- Define Conditions based on Agent State (as per original load_tools) ---
let search_condition = Some(|state: &HashMap<String, Value>| -> bool {
!state.get("searched_data_catalog").and_then(Value::as_bool).unwrap_or(false)
});
let planning_condition = Some(|state: &HashMap<String, Value>| -> bool {
let searched = state.get("searched_data_catalog").and_then(Value::as_bool).unwrap_or(false);
let has_context = state.contains_key("data_context"); // Assuming context presence implies adequacy
let has_plan = state.contains_key("plan_available");
searched && has_context && !has_plan
});
let analysis_condition = Some(|state: &HashMap<String, Value>| -> bool {
state.contains_key("data_context") && state.contains_key("plan_available")
});
let modify_condition = Some(|state: &HashMap<String, Value>| -> bool {
state.contains_key("metrics_available")
});
let review_condition = Some(|state: &HashMap<String, Value>| -> bool {
state.get("review_needed").and_then(Value::as_bool).unwrap_or(false)
});
let always_available = Some(|_state: &HashMap<String, Value>| -> bool { true }); // For done/clarify
// --- Define Conditions based on Agent State (as per original load_tools) ---
let search_condition = Some(|state: &HashMap<String, Value>| -> bool {
!state
.get("searched_data_catalog")
.and_then(Value::as_bool)
.unwrap_or(false)
});
let planning_condition = Some(|state: &HashMap<String, Value>| -> bool {
let searched = state
.get("searched_data_catalog")
.and_then(Value::as_bool)
.unwrap_or(false);
let has_context = state.contains_key("data_context"); // Assuming context presence implies adequacy
let has_plan = state.contains_key("plan_available");
searched && has_context && !has_plan
});
let analysis_condition = Some(|state: &HashMap<String, Value>| -> bool {
state.contains_key("data_context") && state.contains_key("plan_available")
});
let modify_condition = Some(|state: &HashMap<String, Value>| -> bool {
state.contains_key("metrics_available")
});
let review_condition = Some(|state: &HashMap<String, Value>| -> bool {
state
.get("review_needed")
.and_then(Value::as_bool)
.unwrap_or(false)
});
let always_available = Some(|_state: &HashMap<String, Value>| -> bool { true }); // For done/clarify
// Add tools with their respective conditions
agent_clone.add_tool(search_data_catalog_tool.get_name(), search_data_catalog_tool.into_tool_call_executor(), search_condition.clone()).await;
agent_clone.add_tool(no_search_needed_tool.get_name(), no_search_needed_tool.into_tool_call_executor(), search_condition).await;
agent_clone.add_tool(create_plan_straightforward_tool.get_name(), create_plan_straightforward_tool.into_tool_call_executor(), planning_condition.clone()).await;
agent_clone.add_tool(create_plan_investigative_tool.get_name(), create_plan_investigative_tool.into_tool_call_executor(), planning_condition).await;
agent_clone.add_tool(create_metric_files_tool.get_name(), create_metric_files_tool.into_tool_call_executor(), analysis_condition.clone()).await;
agent_clone.add_tool(modify_metric_files_tool.get_name(), modify_metric_files_tool.into_tool_call_executor(), modify_condition.clone()).await;
agent_clone.add_tool(create_dashboard_files_tool.get_name(), create_dashboard_files_tool.into_tool_call_executor(), analysis_condition.clone()).await;
agent_clone.add_tool(modify_dashboard_files_tool.get_name(), modify_dashboard_files_tool.into_tool_call_executor(), modify_condition.clone()).await;
agent_clone.add_tool(review_tool.get_name(), review_tool.into_tool_call_executor(), review_condition).await;
agent_clone.add_tool(message_user_clarifying_question_tool.get_name(), message_user_clarifying_question_tool.into_tool_call_executor(), always_available.clone()).await;
agent_clone.add_tool(done_tool.get_name(), done_tool.into_tool_call_executor(), always_available).await;
// Add tools with their respective conditions
agent_clone
.add_tool(
search_data_catalog_tool.get_name(),
search_data_catalog_tool.into_tool_call_executor(),
search_condition.clone(),
)
.await;
agent_clone
.add_tool(
no_search_needed_tool.get_name(),
no_search_needed_tool.into_tool_call_executor(),
search_condition,
)
.await;
agent_clone
.add_tool(
create_plan_straightforward_tool.get_name(),
create_plan_straightforward_tool.into_tool_call_executor(),
planning_condition.clone(),
)
.await;
agent_clone
.add_tool(
create_plan_investigative_tool.get_name(),
create_plan_investigative_tool.into_tool_call_executor(),
planning_condition,
)
.await;
agent_clone
.add_tool(
create_metric_files_tool.get_name(),
create_metric_files_tool.into_tool_call_executor(),
analysis_condition.clone(),
)
.await;
agent_clone
.add_tool(
modify_metric_files_tool.get_name(),
modify_metric_files_tool.into_tool_call_executor(),
modify_condition.clone(),
)
.await;
agent_clone
.add_tool(
create_dashboard_files_tool.get_name(),
create_dashboard_files_tool.into_tool_call_executor(),
analysis_condition.clone(),
)
.await;
agent_clone
.add_tool(
modify_dashboard_files_tool.get_name(),
modify_dashboard_files_tool.into_tool_call_executor(),
modify_condition.clone(),
)
.await;
agent_clone
.add_tool(
review_tool.get_name(),
review_tool.into_tool_call_executor(),
review_condition,
)
.await;
agent_clone
.add_tool(
message_user_clarifying_question_tool.get_name(),
message_user_clarifying_question_tool.into_tool_call_executor(),
always_available.clone(),
)
.await;
agent_clone
.add_tool(
done_tool.get_name(),
done_tool.into_tool_call_executor(),
always_available,
)
.await;
Ok(())
})
});
Ok(())
})
});
// 4. Define terminating tools for this mode
let terminating_tools = vec![
// From original load_tools
// Use hardcoded names if static access isn't available
"message_user_clarifying_question".to_string(), // Assuming this is the name
"finish_and_respond".to_string(), // Assuming this is the name for Done tool
"finish_and_respond".to_string(), // Assuming this is the name for Done tool
];
// 5. Construct and return the ModeConfiguration
@ -120,7 +197,6 @@ pub fn get_configuration(agent_data: &ModeAgentData) -> ModeConfiguration {
}
}
// Keep the prompt constant, but it's no longer pub
const FOLLOW_UP_INTIALIZATION_PROMPT: &str = r##"## Overview
You are Buster, an AI assistant and expert in **data analytics, data science, and data engineering**. You operate within the **Buster platform**, the world's best BI tool, assisting non-technical users with their analytics tasks. Your capabilities include:

View File

@ -1,6 +1,7 @@
use anyhow::Result;
use serde_json::Value;
use std::collections::HashMap;
use std::env;
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
@ -18,16 +19,28 @@ use crate::tools::{
};
// Function to get the configuration for the Initialization mode
pub fn get_configuration(agent_data: &ModeAgentData, _data_source_syntax: Option<String>) -> ModeConfiguration {
pub fn get_configuration(
agent_data: &ModeAgentData,
_data_source_syntax: Option<String>,
) -> ModeConfiguration {
// 1. Get the prompt, formatted with current data
let prompt = INTIALIZATION_PROMPT
.replace("{DATASETS}", &agent_data.dataset_with_descriptions.join("\n\n"))
.replace(
"{DATASETS}",
&agent_data.dataset_with_descriptions.join("\n\n"),
)
.replace("{TODAYS_DATE}", &agent_data.todays_date);
// 2. Define the model for this mode (Using a default, adjust if needed)
// Since the original MODEL was None, we might use the agent's default
// or specify a standard one like "gemini-2.5-pro-exp-03-25". Let's use "gemini-2.5-pro-exp-03-25".
let model = "gemini-2.5-pro-exp-03-25".to_string();
let model =
if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"o4-mini".to_string()
} else {
"gemini-2.5-pro-exp-03-25".to_string()
};
// 3. Define the tool loader closure
let tool_loader: Box<

View File

@ -1,6 +1,6 @@
use anyhow::Result;
use serde_json::Value;
use std::collections::HashMap;
use std::{collections::HashMap, env};
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
@ -21,14 +21,26 @@ use crate::tools::{
};
// Function to get the configuration for the Planning mode
pub fn get_configuration(agent_data: &ModeAgentData, _data_source_syntax: Option<String>) -> ModeConfiguration {
pub fn get_configuration(
agent_data: &ModeAgentData,
_data_source_syntax: Option<String>,
) -> ModeConfiguration {
// 1. Get the prompt, formatted with current data
let prompt = PLANNING_PROMPT
.replace("{TODAYS_DATE}", &agent_data.todays_date)
.replace("{DATASETS}", &agent_data.dataset_with_descriptions.join("\n\n"));
.replace(
"{DATASETS}",
&agent_data.dataset_with_descriptions.join("\n\n"),
);
// 2. Define the model for this mode (Using default based on original MODEL = None)
let model = "gemini-2.5-pro-exp-03-25".to_string();
let model =
if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"o4-mini".to_string()
} else {
"gemini-2.5-pro-exp-03-25".to_string()
};
// 3. Define the tool loader closure
let tool_loader: Box<

View File

@ -1,6 +1,6 @@
use anyhow::Result;
use serde_json::Value;
use std::collections::HashMap;
use std::{collections::HashMap, env};
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
@ -19,12 +19,20 @@ use crate::tools::{
};
// Function to get the configuration for the Review mode
pub fn get_configuration(_agent_data: &ModeAgentData, _data_source_syntax: Option<String>) -> ModeConfiguration {
pub fn get_configuration(
_agent_data: &ModeAgentData,
_data_source_syntax: Option<String>,
) -> ModeConfiguration {
// 1. Get the prompt (doesn't need formatting for this mode)
let prompt = REVIEW_PROMPT.to_string(); // Use the correct constant
// 2. Define the model for this mode (From original MODEL const)
let model = "gemini-2.5-pro-exp-03-25".to_string();
let model =
if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"o4-mini".to_string()
} else {
"gemini-2.5-flash-preview-04-17".to_string()
};
// 3. Define the tool loader closure
let tool_loader: Box<

View File

@ -27,6 +27,10 @@ use uuid::Uuid;
use dataset_security::{get_permissioned_datasets, PermissionedDataset};
use sqlx::PgPool;
use stored_values;
use rerank::Reranker;
// Import SemanticLayerSpec
use semantic_layer::models::SemanticLayerSpec;
use crate::{agent::Agent, tools::ToolExecutor};
@ -883,25 +887,29 @@ async fn rerank_datasets(
if documents.is_empty() || all_datasets.is_empty() {
return Ok(vec![]);
}
let co = Cohere::default();
let request = ReRankRequest {
query,
documents,
model: ReRankModel::EnglishV3,
top_n: Some(35),
..Default::default()
};
// Initialize your custom reranker
let reranker = Reranker::new()
.map_err(|e| anyhow::anyhow!("Failed to initialize custom Reranker: {}", e))?;
let rerank_results = match co.rerank(&request).await {
// Convert documents from Vec<String> to Vec<&str> for the rerank library
let doc_slices: Vec<&str> = documents.iter().map(AsRef::as_ref).collect();
// Define top_n, e.g., 35 as used with Cohere
let top_n = 35;
// Call your custom reranker's rerank method
let rerank_results = match reranker.rerank(query, &doc_slices, top_n).await {
Ok(results) => results,
Err(e) => {
error!(error = %e, query = query, "Cohere rerank API call failed");
return Err(anyhow::anyhow!("Cohere rerank failed: {}", e));
error!(error = %e, query = query, "Custom reranker API call failed");
return Err(anyhow::anyhow!("Custom reranker failed: {}", e));
}
};
let mut ranked_datasets = Vec::new();
// The structure of RerankResult from your library (index, relevance_score)
// is compatible with the existing loop logic.
for result in rerank_results {
if let Some(dataset) = all_datasets.get(result.index as usize) {
ranked_datasets.push(RankedDataset {
@ -909,17 +917,19 @@ async fn rerank_datasets(
});
} else {
error!(
"Invalid dataset index {} from Cohere for query '{}'. Max index: {}",
"Invalid dataset index {} from custom reranker for query '{}'. Max index: {}",
result.index,
query,
all_datasets.len() - 1
all_datasets.len().saturating_sub(1) // Avoid panic on empty all_datasets (though guarded above)
);
}
}
let relevant_datasets = ranked_datasets.into_iter().collect::<Vec<_>>();
Ok(relevant_datasets)
// The original code collected into Vec<_> then returned. This is fine.
// let relevant_datasets = ranked_datasets.into_iter().collect::<Vec<_>>();
// Ok(relevant_datasets)
// Simpler:
Ok(ranked_datasets)
}
async fn llm_filter_helper(
@ -977,8 +987,14 @@ async fn llm_filter_helper(
let llm_client = LiteLLMClient::new(None, None);
let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"gpt-4.1-mini".to_string()
} else {
"gemini-2.0-flash-001".to_string()
};
let request = ChatCompletionRequest {
model: "gemini-2.0-flash-001".to_string(),
model,
messages: vec![AgentMessage::User {
id: None,
content: prompt,
@ -1160,115 +1176,153 @@ async fn generate_embeddings_batch(texts: Vec<String>) -> Result<Vec<(String, Ve
/// Parse YAML content to find models with searchable dimensions
fn extract_searchable_dimensions(yml_content: &str) -> Result<Vec<SearchableDimension>> {
let yaml: serde_yaml::Value = serde_yaml::from_str(yml_content)
.context("Failed to parse dataset YAML content")?;
let mut searchable_dimensions = Vec::new();
// Check if models field exists
if let Some(models) = yaml["models"].as_sequence() {
for model in models {
let model_name = model["name"].as_str().unwrap_or("unknown_model").to_string();
// Check if dimensions field exists
if let Some(dimensions) = model["dimensions"].as_sequence() {
for dimension in dimensions {
// Check if dimension has searchable: true
if let Some(true) = dimension["searchable"].as_bool() {
let dimension_name = dimension["name"].as_str().unwrap_or("unknown_dimension").to_string();
// Store this dimension as searchable
// Try parsing with SemanticLayerSpec first
match serde_yaml::from_str::<SemanticLayerSpec>(yml_content) {
Ok(spec) => {
debug!("Successfully parsed yml_content with SemanticLayerSpec for extract_searchable_dimensions");
for model in spec.models {
for dimension in model.dimensions {
if dimension.searchable {
searchable_dimensions.push(SearchableDimension {
model_name: model_name.clone(), // Clone here to avoid move
dimension_name: dimension_name.clone(),
dimension_path: vec!["models".to_string(), model_name.clone(), "dimensions".to_string(), dimension_name],
model_name: model.name.clone(),
dimension_name: dimension.name.clone(),
// The dimension_path might need adjustment if its usage relies on the old dynamic structure.
// For now, creating a simplified path. This might need review based on how dimension_path is consumed.
dimension_path: vec!["models".to_string(), model.name.clone(), "dimensions".to_string(), dimension.name],
});
}
}
}
}
Err(e_spec) => {
warn!(
"Failed to parse yml_content with SemanticLayerSpec (error: {}), falling back to generic serde_yaml::Value for extract_searchable_dimensions. Consider updating YAML to new spec.",
e_spec
);
// Fallback to original dynamic parsing logic
let yaml: serde_yaml::Value = serde_yaml::from_str(yml_content)
.context("Failed to parse dataset YAML content (fallback)")?;
if let Some(models) = yaml["models"].as_sequence() {
for model_val in models {
let model_name = model_val["name"].as_str().unwrap_or("unknown_model").to_string();
if let Some(dimensions) = model_val["dimensions"].as_sequence() {
for dimension_val in dimensions {
if let Some(true) = dimension_val["searchable"].as_bool() {
let dimension_name = dimension_val["name"].as_str().unwrap_or("unknown_dimension").to_string();
searchable_dimensions.push(SearchableDimension {
model_name: model_name.clone(),
dimension_name: dimension_name.clone(),
dimension_path: vec!["models".to_string(), model_name.clone(), "dimensions".to_string(), dimension_name],
});
}
}
}
}
}
}
}
Ok(searchable_dimensions)
}
/// Extract database structure from YAML content based on actual model structure
fn extract_database_info_from_yaml(yml_content: &str) -> Result<HashMap<String, HashMap<String, HashMap<String, Vec<String>>>>> {
let yaml: serde_yaml::Value = serde_yaml::from_str(yml_content)
.context("Failed to parse dataset YAML content")?;
// Structure: database -> schema -> table -> columns
let mut database_info = HashMap::new();
// Process models
if let Some(models) = yaml["models"].as_sequence() {
for model in models {
// Extract database, schema, and model name (which acts as table name)
let database_name = model["database"].as_str().unwrap_or("unknown").to_string();
let schema_name = model["schema"].as_str().unwrap_or("public").to_string();
let table_name = model["name"].as_str().unwrap_or("unknown_model").to_string();
// Initialize the nested structure if needed
database_info
.entry(database_name.clone())
.or_insert_with(HashMap::new)
.entry(schema_name.clone())
.or_insert_with(HashMap::new);
// Collect column names from dimensions, measures, and metrics
let mut columns = Vec::new();
// Add dimensions
if let Some(dimensions) = model["dimensions"].as_sequence() {
for dim in dimensions {
if let Some(dim_name) = dim["name"].as_str() {
columns.push(dim_name.to_string());
// Also add the expression as a potential column to search
if let Some(expr) = dim["expr"].as_str() {
if expr != dim_name {
columns.push(expr.to_string());
let mut database_info: HashMap<String, HashMap<String, HashMap<String, Vec<String>>>> = HashMap::new();
match serde_yaml::from_str::<SemanticLayerSpec>(yml_content) {
Ok(spec) => {
debug!("Successfully parsed yml_content with SemanticLayerSpec for extract_database_info_from_yaml");
for model in spec.models {
let db_name = model.database.as_deref().unwrap_or("unknown_db").to_string();
let sch_name = model.schema.as_deref().unwrap_or("unknown_schema").to_string();
let tbl_name = model.name.clone(); // model.name is table name
let mut columns = Vec::new();
for dim in model.dimensions {
columns.push(dim.name);
// Assuming 'expr' is not directly a column name in SemanticLayerSpec's Dimension for this purpose.
// If dimensions can have expressions that resolve to column names, adjust here.
}
for measure in model.measures {
columns.push(measure.name);
// Assuming 'expr' is not directly a column name here either.
}
for metric in model.metrics {
columns.push(metric.name); // Metrics usually have names, expressions might be too complex for simple column list
}
database_info
.entry(db_name)
.or_default()
.entry(sch_name)
.or_default()
.insert(tbl_name, columns);
}
}
Err(e_spec) => {
warn!(
"Failed to parse yml_content with SemanticLayerSpec (error: {}), falling back to generic serde_yaml::Value for extract_database_info_from_yaml. Consider updating YAML to new spec.",
e_spec
);
let yaml: serde_yaml::Value = serde_yaml::from_str(yml_content)
.context("Failed to parse dataset YAML content (fallback)")?;
if let Some(models) = yaml["models"].as_sequence() {
for model_val in models {
let database_name = model_val["database"].as_str().unwrap_or("unknown").to_string();
let schema_name = model_val["schema"].as_str().unwrap_or("public").to_string();
let table_name = model_val["name"].as_str().unwrap_or("unknown_model").to_string();
database_info
.entry(database_name.clone())
.or_insert_with(HashMap::new)
.entry(schema_name.clone())
.or_insert_with(HashMap::new);
let mut columns = Vec::new();
if let Some(dimensions) = model_val["dimensions"].as_sequence() {
for dim in dimensions {
if let Some(dim_name) = dim["name"].as_str() {
columns.push(dim_name.to_string());
if let Some(expr) = dim["expr"].as_str() {
if expr != dim_name {
columns.push(expr.to_string());
}
}
}
}
}
}
}
// Add measures
if let Some(measures) = model["measures"].as_sequence() {
for measure in measures {
if let Some(measure_name) = measure["name"].as_str() {
columns.push(measure_name.to_string());
// Also add the expression as a potential column to search
if let Some(expr) = measure["expr"].as_str() {
if expr != measure_name {
columns.push(expr.to_string());
if let Some(measures) = model_val["measures"].as_sequence() {
for measure in measures {
if let Some(measure_name) = measure["name"].as_str() {
columns.push(measure_name.to_string());
if let Some(expr) = measure["expr"].as_str() {
if expr != measure_name {
columns.push(expr.to_string());
}
}
}
}
}
}
}
// Add metrics
if let Some(metrics) = model["metrics"].as_sequence() {
for metric in metrics {
if let Some(metric_name) = metric["name"].as_str() {
columns.push(metric_name.to_string());
if let Some(metrics) = model_val["metrics"].as_sequence() {
for metric in metrics {
if let Some(metric_name) = metric["name"].as_str() {
columns.push(metric_name.to_string());
}
}
}
database_info
.get_mut(&database_name)
.unwrap()
.get_mut(&schema_name)
.unwrap()
.insert(table_name, columns);
}
}
// Store columns for this model
database_info
.get_mut(&database_name)
.unwrap()
.get_mut(&schema_name)
.unwrap()
.insert(table_name, columns);
}
}
Ok(database_info)
}

View File

@ -1,3 +1,5 @@
use std::env;
use anyhow::Result;
use litellm::{AgentMessage, ChatCompletionRequest, LiteLLMClient, Metadata, ResponseFormat};
use serde_json::Value;
@ -61,8 +63,14 @@ Example Output for the above plan: `["Create line chart visualization 'Daily Tra
plan
);
let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"gpt-4.1-mini".to_string()
} else {
"gemini-2.0-flash-001".to_string()
};
let request = ChatCompletionRequest {
model: "gemini-2.0-flash-001".to_string(),
model,
messages: vec![AgentMessage::User { id: None, content: prompt, name: None }],
stream: Some(false),
response_format: Some(ResponseFormat { type_: "json_object".to_string(), json_schema: None }),

View File

@ -20,6 +20,7 @@ regex = { workspace = true }
indexmap = { workspace = true }
async-trait = { workspace = true }
# Local dependencies
database = { path = "../database" }
agents = { path = "../agents" }

View File

@ -69,9 +69,15 @@ pub async fn generate_conversation_title(
// Set up LiteLLM client
let llm_client = LiteLLMClient::new(None, None);
let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"gpt-4.1-mini".to_string()
} else {
"gemini-2.0-flash-001".to_string()
};
// Create the request
let request = ChatCompletionRequest {
model: "gpt-4o-mini".to_string(),
model,
messages: vec![LiteLLMAgentMessage::User {
id: None,
content: prompt,

View File

@ -2,6 +2,7 @@ use agents::tools::file_tools::common::{generate_deterministic_uuid, ModifyFiles
use dashmap::DashMap;
use middleware::AuthenticatedUser;
use std::collections::HashSet;
use std::env;
use std::{collections::HashMap, time::{Instant, Duration}};
use std::sync::Arc;
@ -2712,9 +2713,15 @@ pub async fn generate_conversation_title(
// Set up LiteLLM client
let llm_client = LiteLLMClient::new(None, None);
let model = if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"gpt-4.1-mini".to_string()
} else {
"gemini-2.0-flash-001".to_string()
};
// Create the request
let request = ChatCompletionRequest {
model: "gemini-2.0-flash-001".to_string(),
model,
messages: vec![LiteLLMAgentMessage::User {
id: None,
content: prompt,

View File

@ -94,48 +94,50 @@ pub async fn auth(mut req: Request, next: Next) -> Result<Response, StatusCode>
};
// --- Payment Required Check START ---
if let Some(org_membership) = user.organizations.get(0) {
let org_id = org_membership.id;
let pg_pool = get_pg_pool();
let mut conn = match pg_pool.get().await {
Ok(conn) => conn,
Err(e) => {
tracing::error!("Failed to get DB connection for payment check: {}", e);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
};
if env::var("ENVIRONMENT").unwrap_or_default() == "production" {
if let Some(org_membership) = user.organizations.get(0) {
let org_id = org_membership.id;
let pg_pool = get_pg_pool();
let mut conn = match pg_pool.get().await {
Ok(conn) => conn,
Err(e) => {
tracing::error!("Failed to get DB connection for payment check: {}", e);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
};
match database::schema::organizations::table
.filter(database::schema::organizations::id.eq(org_id))
.select(database::schema::organizations::payment_required)
.first::<bool>(&mut conn)
.await
{
Ok(payment_required) => {
if payment_required {
tracing::warn!(
match database::schema::organizations::table
.filter(database::schema::organizations::id.eq(org_id))
.select(database::schema::organizations::payment_required)
.first::<bool>(&mut conn)
.await
{
Ok(payment_required) => {
if payment_required {
tracing::warn!(
user_id = %user.id,
org_id = %org_id,
"Access denied due to payment requirement for organization."
);
return Err(StatusCode::PAYMENT_REQUIRED);
}
}
Err(diesel::NotFound) => {
tracing::error!(
user_id = %user.id,
org_id = %org_id,
"Access denied due to payment requirement for organization."
"Organization not found during payment check."
);
return Err(StatusCode::PAYMENT_REQUIRED);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
Err(e) => {
tracing::error!(
user_id = %user.id,
org_id = %org_id,
"Database error during payment check: {}", e
);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
}
Err(diesel::NotFound) => {
tracing::error!(
user_id = %user.id,
org_id = %org_id,
"Organization not found during payment check."
);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
Err(e) => {
tracing::error!(
user_id = %user.id,
org_id = %org_id,
"Database error during payment check: {}", e
);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
}
}

View File

@ -7,3 +7,7 @@ edition = "2021"
reqwest = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
[dev-dependencies]
dotenv = { workspace = true }
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }

View File

@ -2,15 +2,7 @@ use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::error::Error;
#[derive(Debug)]
pub enum RerankerType {
Cohere,
Mxbai,
Jina,
}
pub struct Reranker {
reranker_type: RerankerType,
api_key: String,
base_url: String,
model: String,
@ -19,23 +11,11 @@ pub struct Reranker {
impl Reranker {
pub fn new() -> Result<Self, Box<dyn Error>> {
let provider = std::env::var("RERANK_PROVIDER")?;
let reranker_type = match provider.to_lowercase().as_str() {
"cohere" => RerankerType::Cohere,
"mxbai" => RerankerType::Mxbai,
"jina" => RerankerType::Jina,
_ => return Err("Invalid provider specified".into()),
};
let api_key = std::env::var("RERANK_API_KEY")?;
let model = std::env::var("RERANK_MODEL")?;
let base_url = match reranker_type {
RerankerType::Cohere => "https://api.cohere.com/v2/rerank",
RerankerType::Mxbai => "https://api.mixedbread.ai/v1/rerank",
RerankerType::Jina => "https://api.jina.ai/v1/rerank",
}.to_string();
let base_url = std::env::var("RERANK_BASE_URL")?;
let client = Client::new();
Ok(Self {
reranker_type,
api_key,
base_url,
model,

View File

@ -0,0 +1,56 @@
use rerank::{Reranker, RerankResult};
use std::error::Error;
#[tokio::test]
async fn test_reranker_integration() -> Result<(), Box<dyn Error>> {
// Load environment variables from .env file
dotenv::dotenv().ok();
// Initialize the reranker
let reranker = Reranker::new()?;
// Define a sample query and documents
let query = "What is the capital of France?";
let documents = vec![
"Paris is a major European city and a global center for art, fashion, gastronomy and culture.",
"London is the capital and largest city of England and the United Kingdom.",
"The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France.",
"Berlin is the capital and largest city of Germany by both area and population.",
];
let top_n = 2;
// Perform reranking
let results: Vec<RerankResult> = reranker.rerank(query, &documents, top_n).await?;
// Assertions
assert_eq!(results.len(), top_n, "Should return top_n results");
// Check that indices are within the bounds of the original documents
for result in &results {
assert!(result.index < documents.len(), "Result index should be valid");
}
// Optional: Print results for manual verification (can be removed later)
println!("Query: {}", query);
for result in &results {
println!(
"Document Index: {}, Score: {:.4}, Document: {}",
result.index,
result.relevance_score,
documents[result.index]
);
}
// Example assertion: if we expect Paris-related documents to be ranked higher.
// This is a very basic check and might need adjustment based on actual model behavior.
if !results.is_empty() {
let first_result_doc = documents[results[0].index];
assert!(
first_result_doc.to_lowercase().contains("paris"),
"The top result for 'capital of France' should ideally mention Paris. Model output: {}",
first_result_doc
);
}
Ok(())
}

View File

@ -1,5 +1,5 @@
dev:
cd .. && docker compose up -d redis && cd api && \
cd .. && docker run -d --name buster-redis-make -p 6379:6379 -v buster_redis_data:/data redis && cd api && \
supabase start
supabase db reset
export DATABASE_URL=postgres://postgres:postgres@127.0.0.1:54322/postgres && \
@ -21,12 +21,12 @@ update-seed:
> libs/database/seed.sql
stop:
docker compose down && \
supabase stop && \
pkill ollama
docker stop buster-redis-make || true && \
docker rm buster-redis-make || true && \
supabase stop
fast:
cd .. && docker compose up -d redis && cd api && \
cd .. && docker run -d --name buster-redis-make -p 6379:6379 -v buster_redis_data:/data redis && cd api && \
export RUST_LOG=debug && \
export CARGO_INCREMENTAL=1 && \
nice cargo watch -C server -x run

View File

@ -1,6 +1,6 @@
[package]
name = "buster_server"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
default-run = "buster_server"

View File

@ -10,7 +10,10 @@ use futures::stream::{self, StreamExt};
use litellm::{AgentMessage, ChatCompletionRequest, LiteLLMClient, Metadata, ResponseFormat};
use middleware::types::AuthenticatedUser;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::{
collections::{HashMap, HashSet},
env,
};
use tracing::{debug, error, info, warn};
use uuid::Uuid;
@ -318,9 +321,16 @@ async fn filter_datasets_with_llm(
// Initialize LiteLLM client
let llm_client = LiteLLMClient::new(None, None);
let model =
if env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()) == "local" {
"gpt-4.1-mini".to_string()
} else {
"gemini-2.0-flash-001".to_string()
};
// Create the request
let request = ChatCompletionRequest {
model: "gemini-2.0-flash-001".to_string(), // Using a small model for cost efficiency
model, // Using a small model for cost efficiency
messages: vec![AgentMessage::User {
id: None,
content: prompt,

View File

@ -42,6 +42,7 @@ crossterm = "0.29" # Add crossterm explicitly
rustyline = "15.0.0"
once_cell = "1.19.0"
pathdiff = "0.2.1"
rust-embed = { version = "8.7.1", features = ["include-exclude"] }
# Keep dev-dependencies separate if they aren't shared
# tempfile = "3.16.0"

View File

@ -1,6 +1,6 @@
[package]
name = "buster-cli"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
build = "build.rs"
@ -46,6 +46,7 @@ once_cell = { workspace = true }
pathdiff = { workspace = true }
# Add the shared query engine library
query_engine = { workspace = true }
rust-embed = { workspace = true, features = ["include-exclude"] }
litellm = { path = "../../api/libs/litellm" }

View File

@ -0,0 +1,163 @@
use crate::error::BusterError;
use dirs;
use std::fs;
use std::path::PathBuf;
use crate::commands::config_utils;
async fn get_app_base_dir() -> Result<PathBuf, BusterError> {
let home_dir = dirs::home_dir().ok_or_else(|| {
BusterError::CommandError(
"Failed to get home directory. Cannot determine app directory for config reset.".to_string(),
)
})?;
Ok(home_dir.join(".buster"))
}
pub async fn reset_llm_settings() -> Result<(), BusterError> {
let app_base_dir = get_app_base_dir().await?;
println!("Resetting LLM and Reranker configurations...");
let files_to_delete = [
".openai_api_key",
".reranker_provider",
".reranker_api_key",
".reranker_model",
".reranker_base_url",
];
let mut all_successful = true;
let mut any_deleted = false;
for file_name in files_to_delete.iter() {
let file_path = app_base_dir.join(file_name);
if file_path.exists() {
match fs::remove_file(&file_path) {
Ok(_) => {
println!("Successfully deleted {}", file_path.display());
any_deleted = true;
}
Err(e) => {
eprintln!("Failed to delete {}: {}. Please remove it manually.", file_path.display(), e);
all_successful = false;
}
}
}
}
if !any_deleted && all_successful {
println!("No cached LLM or Reranker configurations found to reset.");
} else if all_successful {
println!("LLM and Reranker configurations have been reset successfully.");
println!("You will be prompted to enter them again on the next relevant command (e.g., buster start).");
} else {
println!("Some configurations could not be automatically reset. Please check messages above.");
}
Ok(())
}
// Function to get current LLM API key (from cache or .env as a fallback display)
fn get_current_llm_api_key_display(app_base_dir: &PathBuf) -> Result<String, BusterError> {
match config_utils::get_cached_value(app_base_dir, ".openai_api_key")? {
Some(key) => Ok(if key.len() > 4 { format!("...{}", &key[key.len()-4..]) } else { "****".to_string() }),
None => Ok("Not set".to_string()), // Or try to read from .env if complex display needed
}
}
// Function to get current Reranker config display (from cache or .env)
fn get_current_reranker_config_display(app_base_dir: &PathBuf) -> Result<String, BusterError> {
let provider = config_utils::get_cached_value(app_base_dir, ".reranker_provider")?;
let model = config_utils::get_cached_value(app_base_dir, ".reranker_model")?;
if let (Some(p), Some(m)) = (provider, model) {
Ok(format!("Provider: {}, Model: {}", p, m))
} else {
Ok("Not fully set".to_string())
}
}
pub async fn manage_settings_interactive() -> Result<(), BusterError> {
let app_base_dir = config_utils::get_app_base_dir().map_err(|e| {
BusterError::CommandError(format!("Failed to get app base directory: {}", e))
})?;
let target_dotenv_path = app_base_dir.join(".env");
println!("--- Buster Interactive Configuration ---");
// Manage OpenAI API Key
let current_llm_key_display = get_current_llm_api_key_display(&app_base_dir)?;
let update_llm = config_utils::prompt_for_input(
&format!("Current OpenAI API Key: {}. Update? (y/n)", current_llm_key_display),
Some("n"),
false
)?.to_lowercase();
let mut llm_api_key_to_set: Option<String> = None;
if update_llm == "y" {
// Call with force_prompt = true, but the function itself will ask for confirmation if a key exists
// For a cleaner flow here, we handle the top-level decision to update.
let new_key = config_utils::prompt_for_input("Enter new OpenAI API Key:", None, true)?;
config_utils::cache_value(&app_base_dir, ".openai_api_key", &new_key)?;
llm_api_key_to_set = Some(new_key);
println!("OpenAI API Key updated and cached.");
} else {
// If not updating, we still need the current key for .env update
llm_api_key_to_set = config_utils::get_cached_value(&app_base_dir, ".openai_api_key")?;
}
// Manage Reranker Settings
let current_reranker_display = get_current_reranker_config_display(&app_base_dir)?;
let update_reranker = config_utils::prompt_for_input(
&format!("Current Reranker settings: {}. Update? (y/n)", current_reranker_display),
Some("n"),
false
)?.to_lowercase();
let mut reranker_config_to_set: Option<config_utils::RerankerConfig> = None;
if update_reranker == "y" {
// This function internally handles its own detailed prompting flow
let new_reranker_config = config_utils::prompt_and_manage_reranker_settings(&app_base_dir, true)?;
reranker_config_to_set = Some(new_reranker_config);
println!("Reranker settings updated and cached.");
} else {
// If not updating, get current cached values for .env update
let p = config_utils::get_cached_value(&app_base_dir, ".reranker_provider")?;
let k = config_utils::get_cached_value(&app_base_dir, ".reranker_api_key")?;
let m = config_utils::get_cached_value(&app_base_dir, ".reranker_model")?;
let u = config_utils::get_cached_value(&app_base_dir, ".reranker_base_url")?;
if let (Some(provider), Some(api_key), Some(model), Some(base_url)) = (p,k,m,u) {
reranker_config_to_set = Some(config_utils::RerankerConfig { provider, api_key, model, base_url });
}
}
// Update .env file with the (potentially new) settings
// We need to ensure we have values for all fields update_env_file expects,
// even if only some were updated in this session.
let final_llm_api_key = llm_api_key_to_set.clone();
let final_rerank_api_key = reranker_config_to_set.as_ref().map(|c| c.api_key.clone());
let final_rerank_model = reranker_config_to_set.as_ref().map(|c| c.model.clone());
let final_rerank_base_url = reranker_config_to_set.as_ref().map(|c| c.base_url.clone());
// Default LLM_BASE_URL if not set (important if .env is created from scratch)
// The update_env_file function also has a fallback for this.
let llm_base_url_default = "https://api.openai.com/v1".to_string();
let current_llm_base_url = if target_dotenv_path.exists() {
let env_content = std::fs::read_to_string(&target_dotenv_path).map_err(|e| {
BusterError::CommandError(format!("Failed to read .env file: {}", e))
})?;
env_content.lines().find(|line| line.starts_with("LLM_BASE_URL=")).map_or(None, |line| line.split_once('=').map(|(_,v)| v.trim_matches('"').to_string()))
} else { None };
config_utils::update_env_file(
&target_dotenv_path,
final_llm_api_key.as_deref(),
final_rerank_api_key.as_deref(),
final_rerank_model.as_deref(),
final_rerank_base_url.as_deref(),
current_llm_base_url.as_deref().or(Some(&llm_base_url_default)) // Ensure LLM_BASE_URL is present
)?;
println!("Configuration saved to {}.", target_dotenv_path.display());
Ok(())
}

View File

@ -0,0 +1,230 @@
use crate::error::BusterError;
use dirs;
use std::fs;
use std::io::{self, Write};
use std::path::{Path, PathBuf};
// Moved from run.rs
pub fn prompt_for_input(prompt_message: &str, default_value: Option<&str>, is_sensitive: bool) -> Result<String, BusterError> {
if let Some(def_val) = default_value {
print!("{} (default: {}): ", prompt_message, def_val);
} else {
print!("{}: ", prompt_message);
}
io::stdout().flush().map_err(|e| BusterError::CommandError(format!("Failed to flush stdout: {}", e)))?;
let mut input = String::new();
// Simple masking for sensitive input is complex in raw terminal io without extra crates.
// For a real CLI, rpassword or similar would be used.
// Here, we just read the line.
io::stdin().read_line(&mut input).map_err(|e| BusterError::CommandError(format!("Failed to read line: {}", e)))?;
let trimmed_input = input.trim().to_string();
if trimmed_input.is_empty() {
if let Some(def_val) = default_value {
Ok(def_val.to_string())
} else {
println!("Input cannot be empty. Please try again.");
prompt_for_input(prompt_message, default_value, is_sensitive) // Recurse
}
} else {
Ok(trimmed_input)
}
}
pub fn get_app_base_dir() -> Result<PathBuf, BusterError> {
dirs::home_dir()
.map(|home| home.join(".buster"))
.ok_or_else(|| BusterError::CommandError("Failed to get home directory.".to_string()))
}
pub fn get_cached_value(app_base_dir: &Path, cache_file_name: &str) -> Result<Option<String>, BusterError> {
let cache_file_path = app_base_dir.join(cache_file_name);
if cache_file_path.exists() {
fs::read_to_string(cache_file_path)
.map(|val| Some(val.trim().to_string()))
.map_err(|e| BusterError::CommandError(format!("Failed to read cached file {}: {}", cache_file_name, e)))
} else {
Ok(None)
}
}
pub fn cache_value(app_base_dir: &Path, cache_file_name: &str, value: &str) -> Result<(), BusterError> {
let cache_file_path = app_base_dir.join(cache_file_name);
fs::create_dir_all(app_base_dir).map_err(|e| BusterError::CommandError(format!("Failed to create app base dir {}: {}", app_base_dir.display(), e)))?;
fs::write(cache_file_path, value)
.map_err(|e| BusterError::CommandError(format!("Failed to cache value to {}: {}", cache_file_name, e)))
}
pub fn update_env_file(
target_dotenv_path: &Path,
llm_api_key: Option<&str>,
rerank_api_key: Option<&str>,
rerank_model: Option<&str>,
rerank_base_url: Option<&str>,
llm_base_url: Option<&str> // Added for completeness, though not prompted by user yet
) -> Result<(), BusterError> {
let mut new_env_lines: Vec<String> = Vec::new();
let mut llm_key_updated = false;
let mut rerank_key_updated = false;
let mut rerank_model_updated = false;
let mut rerank_base_updated = false;
let mut llm_base_updated = false;
if target_dotenv_path.exists() {
let env_content = fs::read_to_string(target_dotenv_path).map_err(|e| {
BusterError::CommandError(format!("Failed to read .env file at {}: {}", target_dotenv_path.display(), e))
})?;
for line in env_content.lines() {
if line.starts_with("LLM_API_KEY=") && llm_api_key.is_some() {
new_env_lines.push(format!("LLM_API_KEY=\"{}\"", llm_api_key.unwrap()));
llm_key_updated = true;
} else if line.starts_with("RERANK_API_KEY=") && rerank_api_key.is_some() {
new_env_lines.push(format!("RERANK_API_KEY=\"{}\"", rerank_api_key.unwrap()));
rerank_key_updated = true;
} else if line.starts_with("RERANK_MODEL=") && rerank_model.is_some() {
new_env_lines.push(format!("RERANK_MODEL=\"{}\"", rerank_model.unwrap()));
rerank_model_updated = true;
} else if line.starts_with("RERANK_BASE_URL=") && rerank_base_url.is_some() {
new_env_lines.push(format!("RERANK_BASE_URL=\"{}\"", rerank_base_url.unwrap()));
rerank_base_updated = true;
} else if line.starts_with("LLM_BASE_URL=") && llm_base_url.is_some() {
new_env_lines.push(format!("LLM_BASE_URL=\"{}\"", llm_base_url.unwrap()));
llm_base_updated = true;
} else {
new_env_lines.push(line.to_string());
}
}
}
// Add any keys that were not found and updated, if new values are provided
if !llm_key_updated && llm_api_key.is_some() {
new_env_lines.push(format!("LLM_API_KEY=\"{}\"", llm_api_key.unwrap()));
}
if !rerank_key_updated && rerank_api_key.is_some() {
new_env_lines.push(format!("RERANK_API_KEY=\"{}\"", rerank_api_key.unwrap()));
}
if !rerank_model_updated && rerank_model.is_some() {
new_env_lines.push(format!("RERANK_MODEL=\"{}\"", rerank_model.unwrap()));
}
if !rerank_base_updated && rerank_base_url.is_some() {
new_env_lines.push(format!("RERANK_BASE_URL=\"{}\"", rerank_base_url.unwrap()));
}
if !llm_base_updated && llm_base_url.is_some() {
new_env_lines.push(format!("LLM_BASE_URL=\"{}\"", llm_base_url.unwrap()));
} else if !llm_base_updated && llm_base_url.is_none() && !target_dotenv_path.exists() {
// Ensure default LLM_BASE_URL if .env is being created from scratch and no override provided
new_env_lines.push("LLM_BASE_URL=\"https://api.openai.com/v1\"".to_string());
}
fs::write(target_dotenv_path, new_env_lines.join("\n")).map_err(|e| {
BusterError::CommandError(format!("Failed to write updated .env file to {}: {}", target_dotenv_path.display(), e))
})
}
pub fn prompt_and_manage_openai_api_key(app_base_dir: &Path, force_prompt: bool) -> Result<String, BusterError> {
let cache_file = ".openai_api_key";
let mut current_key = get_cached_value(app_base_dir, cache_file)?;
if force_prompt || current_key.is_none() {
if current_key.is_some() {
let key_display = current_key.as_ref().map_or("", |k| if k.len() > 4 { &k[k.len()-4..] } else { "****" });
let update_choice = prompt_for_input(&format!("Current OpenAI API key ends with ...{}. Update? (y/n)", key_display), Some("n"), false)?.to_lowercase();
if update_choice != "y" {
return Ok(current_key.unwrap());
}
}
let new_key = prompt_for_input("Enter your OpenAI API Key:", None, true)?;
cache_value(app_base_dir, cache_file, &new_key)?;
current_key = Some(new_key);
}
current_key.ok_or_else(|| BusterError::CommandError("OpenAI API Key setup failed.".to_string()))
}
pub struct RerankerConfig {
pub provider: String,
pub api_key: String,
pub model: String,
pub base_url: String,
}
pub fn prompt_and_manage_reranker_settings(app_base_dir: &Path, force_prompt: bool) -> Result<RerankerConfig, BusterError> {
let provider_cache = ".reranker_provider";
let key_cache = ".reranker_api_key";
let model_cache = ".reranker_model";
let url_cache = ".reranker_base_url";
let mut current_provider = get_cached_value(app_base_dir, provider_cache)?;
let mut current_key = get_cached_value(app_base_dir, key_cache)?;
let mut current_model = get_cached_value(app_base_dir, model_cache)?;
let mut current_url = get_cached_value(app_base_dir, url_cache)?;
let mut needs_update = force_prompt;
if !needs_update && (current_provider.is_none() || current_key.is_none() || current_model.is_none() || current_url.is_none()) {
needs_update = true; // If any part is missing, force update flow for initial setup
}
if needs_update {
if !force_prompt && current_provider.is_some() && current_model.is_some() { // Already prompted if force_prompt is true
let update_choice = prompt_for_input(&format!("Current Reranker: {} (Model: {}). Update settings? (y/n)", current_provider.as_ref().unwrap_or(&"N/A".to_string()), current_model.as_ref().unwrap_or(&"N/A".to_string())), Some("n"), false)?.to_lowercase();
if update_choice != "y" && current_provider.is_some() && current_key.is_some() && current_model.is_some() && current_url.is_some(){
return Ok(RerankerConfig {
provider: current_provider.unwrap(),
api_key: current_key.unwrap(),
model: current_model.unwrap(),
base_url: current_url.unwrap(),
});
}
} else if force_prompt && current_provider.is_some() && current_model.is_some() {
let update_choice = prompt_for_input(&format!("Current Reranker: {} (Model: {}). Update settings? (y/n)", current_provider.as_ref().unwrap_or(&"N/A".to_string()), current_model.as_ref().unwrap_or(&"N/A".to_string())), Some("n"), false)?.to_lowercase();
if update_choice != "y" && current_provider.is_some() && current_key.is_some() && current_model.is_some() && current_url.is_some(){
return Ok(RerankerConfig {
provider: current_provider.unwrap(),
api_key: current_key.unwrap(),
model: current_model.unwrap(),
base_url: current_url.unwrap(),
});
}
}
println!("--- Reranker Setup ---");
println!("Choose your reranker provider:");
println!("1: Cohere");
println!("2: Mixedbread");
println!("3: Jina");
let provider_choice = loop {
match prompt_for_input("Enter choice (1-3):", Some("1"), false)?.parse::<u32>() {
Ok(choice @ 1..=3) => break choice,
_ => println!("Invalid choice. Please enter a number between 1 and 3."),
}
};
let (new_provider, default_model, default_url) = match provider_choice {
1 => ("Cohere", "rerank-english-v3.0", "https://api.cohere.com/v1/rerank"), // user asked for v3.5 but official docs say v3.0 for rerank model
2 => ("Mixedbread", "mixedbread-ai/mxbai-rerank-xsmall-v1", "https://api.mixedbread.ai/v1/reranking"),
3 => ("Jina", "jina-reranker-v1-base-en", "https://api.jina.ai/v1/rerank"),
_ => unreachable!(),
};
let new_key_val = prompt_for_input(&format!("Enter your {} API Key:", new_provider), None, true)?;
let new_model_val = prompt_for_input(&format!("Enter {} model name:", new_provider), Some(default_model), false)?;
let new_url_val = prompt_for_input(&format!("Enter {} rerank base URL:", new_provider), Some(default_url), false)?;
cache_value(app_base_dir, provider_cache, new_provider)?;
cache_value(app_base_dir, key_cache, &new_key_val)?;
cache_value(app_base_dir, model_cache, &new_model_val)?;
cache_value(app_base_dir, url_cache, &new_url_val)?;
current_provider = Some(new_provider.to_string());
current_key = Some(new_key_val);
current_model = Some(new_model_val);
current_url = Some(new_url_val);
}
if let (Some(prov), Some(key), Some(model), Some(url)) = (current_provider, current_key, current_model, current_url) {
Ok(RerankerConfig { provider: prov, api_key: key, model, base_url: url })
} else {
Err(BusterError::CommandError("Reranker configuration setup failed. Some values are missing.".to_string()))
}
}

View File

@ -411,7 +411,7 @@ fn to_deploy_request(model: &Model, sql_content: String) -> DeployDatasetsReques
env: "dev".to_string(), // Assuming "dev" environment for now, might need configuration
type_: "view".to_string(), // Assuming models are deployed as views, might need configuration
name: model.name.clone(),
model: None, // This seems to be for a different kind of model (e.g. Python model), not semantic layer model name itself
model: Some(model.name.clone()), // Use the model's name for the 'model' field
schema,
database: model.database.clone(),
description: model.description.clone().unwrap_or_default(),
@ -639,7 +639,7 @@ pub async fn deploy(path: Option<&str>, dry_run: bool, recursive: bool) -> Resul
match client.deploy_datasets(deploy_requests_final).await {
Ok(response) => handle_deploy_response(&response, &mut result, &model_mappings_final, &progress),
Err(e) => {
eprintln!("❌ Critical error during deployment API call: {}", e.to_string().red());
eprintln!("❌ Critical error during deployment API call: {}\nDetailed error: {:?}", e.to_string().red(), e);
// Populate failures for all models that were attempted if a general API error occurs
for mapping in model_mappings_final {
result.failures.push((

View File

@ -1,4 +1,6 @@
pub mod auth;
pub mod config;
pub mod config_utils;
pub mod deploy;
pub mod generate;
pub mod init;

View File

@ -1,55 +1,412 @@
use std::process::{Command, Stdio};
use crate::commands::config_utils;
use crate::error::BusterError;
use dirs;
use indicatif::{ProgressBar, ProgressStyle};
use rust_embed::RustEmbed;
use std::fs;
use std::io::{self, Write};
use std::path::PathBuf;
use std::process::Command;
use std::time::Duration;
const DOCKER_COMPOSE_FILE: &str = "docker-compose.yml"; // Or the path you decide for it
#[derive(RustEmbed)]
#[folder = "../../"]
#[include = "docker-compose.yml"]
#[include = "litellm_vertex_config.yaml"]
#[include = "supabase/.env.example"]
#[include = "supabase/**/*"]
#[exclude = "supabase/volumes/db/data/**/*"]
#[exclude = "supabase/volumes/storage/**/*"]
#[exclude = "supabase/.env"]
#[exclude = "supabase/test.http"]
#[exclude = "supabase/docker-compose.override.yml"]
struct StaticAssets;
pub async fn start() -> Result<(), BusterError> {
println!("Attempting to start services with docker-compose...");
let mut cmd = Command::new("docker-compose");
cmd.arg("-f")
.arg(DOCKER_COMPOSE_FILE)
.arg("up")
.arg("-d");
cmd.stdout(Stdio::inherit());
cmd.stderr(Stdio::inherit());
let status = cmd.status().map_err(|e| {
BusterError::CommandError(format!("Failed to execute docker-compose up: {}", e))
async fn setup_persistent_app_environment() -> Result<PathBuf, BusterError> {
let app_base_dir = config_utils::get_app_base_dir().map_err(|e| {
BusterError::CommandError(format!("Failed to get app base directory: {}", e))
})?;
if status.success() {
println!("Services started successfully in detached mode.");
fs::create_dir_all(&app_base_dir).map_err(|e| {
BusterError::CommandError(format!(
"Failed to create persistent app directory at {}: {}",
app_base_dir.display(),
e
))
})?;
for filename_cow in StaticAssets::iter() {
let filename = filename_cow.as_ref();
let asset = StaticAssets::get(filename).ok_or_else(|| {
BusterError::CommandError(format!("Failed to get embedded asset: {}", filename))
})?;
let target_file_path = app_base_dir.join(filename);
if let Some(parent) = target_file_path.parent() {
fs::create_dir_all(parent).map_err(|e| {
BusterError::CommandError(format!(
"Failed to create directory {}: {}",
parent.display(),
e
))
})?;
}
fs::write(&target_file_path, asset.data).map_err(|e| {
BusterError::CommandError(format!(
"Failed to write embedded file {} to {}: {}",
filename,
target_file_path.display(),
e
))
})?;
}
let supabase_volumes_functions_path = app_base_dir.join("supabase/volumes/functions");
fs::create_dir_all(supabase_volumes_functions_path).map_err(|e| {
BusterError::CommandError(format!(
"Failed to create supabase/volumes/functions in persistent app dir: {}",
e
))
})?;
let target_dotenv_path = app_base_dir.join(".env");
// --- BEGIN API Key and Reranker Setup using config_utils ---
println!("--- Buster Configuration Setup ---");
let llm_api_key = config_utils::prompt_and_manage_openai_api_key(&app_base_dir, false)?;
let reranker_config = config_utils::prompt_and_manage_reranker_settings(&app_base_dir, false)?;
// Update .env file
config_utils::update_env_file(
&target_dotenv_path,
Some(&llm_api_key),
Some(&reranker_config.api_key),
Some(&reranker_config.model),
Some(&reranker_config.base_url),
None, // Not prompting for LLM_BASE_URL in this flow yet, example has it.
)
.map_err(|e| {
BusterError::CommandError(format!(
"Failed to ensure .env file configurations in {}: {}",
target_dotenv_path.display(),
e
))
})?;
println!("--- Configuration Setup Complete ---");
// --- END API Key and Reranker Setup using config_utils ---
// Additionally copy the .env to the supabase subdirectory
let supabase_dotenv_path = app_base_dir.join("supabase/.env");
fs::copy(&target_dotenv_path, &supabase_dotenv_path).map_err(|e| {
BusterError::CommandError(format!(
"Failed to copy .env from {} to {}: {}",
target_dotenv_path.display(),
supabase_dotenv_path.display(),
e
))
})?;
Ok(app_base_dir)
}
async fn run_docker_compose_command(
args: &[&str],
operation_name: &str,
) -> Result<(), BusterError> {
let persistent_app_dir = setup_persistent_app_environment().await?;
let data_db_path = persistent_app_dir.join("supabase/volumes/db/data");
fs::create_dir_all(&data_db_path).map_err(|e| {
BusterError::CommandError(format!(
"Failed to create persistent data directory at {}: {}",
data_db_path.display(),
e
))
})?;
let data_storage_path = persistent_app_dir.join("supabase/volumes/storage");
fs::create_dir_all(&data_storage_path).map_err(|e| {
BusterError::CommandError(format!(
"Failed to create persistent data directory at {}: {}",
data_storage_path.display(),
e
))
})?;
let pb = ProgressBar::new_spinner();
pb.enable_steady_tick(Duration::from_millis(120));
pb.set_style(
ProgressStyle::default_spinner()
.tick_strings(&["▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸", ""])
.template("{spinner:.blue} {msg}")
.expect("Failed to set progress bar style"),
);
if operation_name == "Starting" {
pb.set_message(format!(
"{} Buster services... (this may take a few minutes)",
operation_name
));
} else {
pb.set_message(format!("{} Buster services...", operation_name));
}
let mut cmd = Command::new("docker");
cmd.current_dir(&persistent_app_dir);
cmd.arg("compose")
.arg("-p")
.arg("buster")
.arg("-f")
.arg("docker-compose.yml")
.args(args);
let output = cmd.output().map_err(|e| {
BusterError::CommandError(format!(
"Failed to execute docker compose {}: {}",
args.join(" "),
e
))
})?;
if output.status.success() {
pb.finish_with_message(format!(
"Buster services {} successfully.",
operation_name.to_lowercase()
));
Ok(())
} else {
Err(BusterError::CommandError(
format!("docker-compose up -d failed with status: {}", status)
))
let err_msg = format!(
"docker compose {} failed (status: {}). Logs:\nWorking directory: {}\nStdout:\n{}\nStderr:\n{}",
args.join(" "),
output.status,
persistent_app_dir.display(),
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
);
pb.abandon_with_message(format!(
"Error: docker compose {} failed. See console for details.",
args.join(" ")
));
println!("\nDocker Compose Error Details:\n{}", err_msg);
Err(BusterError::CommandError(err_msg))
}
}
pub async fn stop() -> Result<(), BusterError> {
println!("Attempting to stop services with docker-compose...");
pub async fn start() -> Result<(), BusterError> {
run_docker_compose_command(&["up", "-d"], "Starting").await
}
let mut cmd = Command::new("docker-compose");
cmd.arg("-f")
.arg(DOCKER_COMPOSE_FILE)
pub async fn stop() -> Result<(), BusterError> {
run_docker_compose_command(&["down"], "Stopping").await
}
pub async fn reset() -> Result<(), BusterError> {
println!("WARNING: This command will stop all Buster services, attempt to remove their current images, and then restart them from scratch.");
println!(
"This can lead to a complete wipe of the Buster database and any other local service data."
);
println!("This action is irreversible.");
print!("Are you sure you want to proceed with resetting? (y/n): ");
io::stdout()
.flush()
.map_err(|e| BusterError::CommandError(format!("Failed to flush stdout: {}", e)))?;
let mut confirmation = String::new();
io::stdin()
.read_line(&mut confirmation)
.map_err(|e| BusterError::CommandError(format!("Failed to read user input: {}", e)))?;
if confirmation.trim().to_lowercase() != "y" {
println!("Reset cancelled by user.");
return Ok(());
}
let persistent_app_dir = setup_persistent_app_environment().await?;
let pb = ProgressBar::new_spinner();
pb.enable_steady_tick(Duration::from_millis(120));
pb.set_style(
ProgressStyle::default_spinner()
.tick_strings(&["▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸", ""])
.template("{spinner:.blue} {msg}")
.expect("Failed to set progress bar style"),
);
// Step 1: Stop services
pb.set_message("Resetting Buster services (step 1/4): Stopping services...");
let mut down_cmd = Command::new("docker");
down_cmd
.current_dir(&persistent_app_dir)
.arg("compose")
.arg("-p")
.arg("buster")
.arg("-f")
.arg("docker-compose.yml")
.arg("down");
cmd.stdout(Stdio::inherit());
cmd.stderr(Stdio::inherit());
let status = cmd.status().map_err(|e| {
BusterError::CommandError(format!("Failed to execute docker-compose down: {}", e))
let down_output = down_cmd.output().map_err(|e| {
BusterError::CommandError(format!("Failed to execute docker compose down: {}", e))
})?;
if status.success() {
println!("Services stopped successfully.");
Ok(())
} else {
Err(BusterError::CommandError(
format!("docker-compose down failed with status: {}", status)
))
if !down_output.status.success() {
let err_msg = format!(
"docker compose down failed (status: {}). Logs:
Working directory: {}
Stdout:
{}
Stderr:
{}",
down_output.status,
persistent_app_dir.display(),
String::from_utf8_lossy(&down_output.stdout),
String::from_utf8_lossy(&down_output.stderr)
);
pb.abandon_with_message("Error: docker compose down failed. See console for details.");
println!("\nDocker Compose Down Error Details:\n{}", err_msg);
return Err(BusterError::CommandError(err_msg));
}
}
// Step 2: Clear persistent data volumes
pb.set_message("Resetting Buster services (step 2/4): Clearing persistent data volumes...");
let db_volume_path = persistent_app_dir.join("supabase/volumes/db/data");
let storage_volume_path = persistent_app_dir.join("supabase/volumes/storage");
if db_volume_path.exists() {
fs::remove_dir_all(&db_volume_path).map_err(|e| {
BusterError::CommandError(format!(
"Failed to remove db volume at {}: {}",
db_volume_path.display(),
e
))
})?;
}
fs::create_dir_all(&db_volume_path).map_err(|e| {
BusterError::CommandError(format!(
"Failed to recreate db volume at {}: {}",
db_volume_path.display(),
e
))
})?;
pb.println(format!(
"Successfully cleared and recreated database volume: {}",
db_volume_path.display()
));
if storage_volume_path.exists() {
fs::remove_dir_all(&storage_volume_path).map_err(|e| {
BusterError::CommandError(format!(
"Failed to remove storage volume at {}: {}",
storage_volume_path.display(),
e
))
})?;
}
fs::create_dir_all(&storage_volume_path).map_err(|e| {
BusterError::CommandError(format!(
"Failed to recreate storage volume at {}: {}",
storage_volume_path.display(),
e
))
})?;
pb.println(format!(
"Successfully cleared and recreated storage volume: {}",
storage_volume_path.display()
));
// Step 3: Identify service images
pb.set_message("Resetting Buster services (step 3/4): Identifying service images...");
let mut config_images_cmd = Command::new("docker");
config_images_cmd
.current_dir(&persistent_app_dir)
.arg("compose")
.arg("-p")
.arg("buster")
.arg("-f")
.arg("docker-compose.yml")
.arg("config")
.arg("--images");
let config_images_output = config_images_cmd.output().map_err(|e| {
BusterError::CommandError(format!(
"Failed to execute docker compose config --images: {}",
e
))
})?;
if !config_images_output.status.success() {
let err_msg = format!(
"docker compose config --images failed (status: {}). Logs:
Working directory: {}
Stdout:
{}
Stderr:
{}",
config_images_output.status,
persistent_app_dir.display(),
String::from_utf8_lossy(&config_images_output.stdout),
String::from_utf8_lossy(&config_images_output.stderr)
);
pb.abandon_with_message(
"Error: Failed to identify service images. See console for details.",
);
println!(
"\nDocker Compose Config --images Error Details:\n{}",
err_msg
);
return Err(BusterError::CommandError(err_msg));
}
let image_list_str = String::from_utf8_lossy(&config_images_output.stdout);
let image_names: Vec<&str> = image_list_str
.lines()
.filter(|line| !line.trim().is_empty())
.collect();
// Step 4: Remove service images
if image_names.is_empty() {
pb.println(
"No images identified by docker-compose config --images. Skipping image removal.",
);
} else {
pb.set_message(format!(
"Resetting Buster services (step 4/4): Removing {} service image(s)...",
image_names.len()
));
for (index, image_name) in image_names.iter().enumerate() {
let current_image_name = image_name.trim();
if current_image_name.is_empty() {
continue;
}
pb.set_message(format!(
"Resetting Buster services (step 4/4): Removing image {}/{} ('{}')...",
index + 1,
image_names.len(),
current_image_name
));
let mut rmi_cmd = Command::new("docker");
rmi_cmd.arg("image").arg("rm").arg(current_image_name);
let rmi_output = rmi_cmd.output().map_err(|e| {
BusterError::CommandError(format!(
"Failed to execute docker image rm {}: {}",
current_image_name, e
))
})?;
// Log warning on failure but continue, as image might not exist or be in use by other non-project containers
if !rmi_output.status.success() {
let rmi_stderr = String::from_utf8_lossy(&rmi_output.stderr);
if !rmi_stderr.trim().is_empty() && !rmi_stderr.contains("No such image") {
// Don't warn if image was already gone
pb.println(format!("Warning: Could not remove image '{}'. It might be in use or already removed. Stderr: {}", current_image_name, rmi_stderr.trim()));
}
}
}
}
pb.finish_with_message(
"Buster services stopped, volumes cleared, and images removed successfully.",
);
Ok(())
}

View File

@ -80,8 +80,14 @@ pub enum Commands {
#[arg(long)]
path: Option<String>,
},
/// Interactively manage LLM and Reranker configurations
Config,
/// Start the Buster services
Start,
/// Stop the Buster services
Stop,
/// Restart the Buster services
Reset,
}
#[derive(Parser)]
@ -138,8 +144,10 @@ async fn main() {
target_semantic_file,
} => commands::generate::generate_semantic_models_command(path, target_semantic_file).await,
Commands::Parse { path } => commands::parse::parse_models_command(path).await,
Commands::Config => commands::config::manage_settings_interactive().await.map_err(anyhow::Error::from),
Commands::Start => run::start().await.map_err(anyhow::Error::from),
Commands::Stop => run::stop().await.map_err(anyhow::Error::from),
Commands::Reset => run::reset().await.map_err(anyhow::Error::from),
};
if let Err(e) = result {

View File

@ -32,14 +32,10 @@ services:
- EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER}
- EMBEDDING_MODEL=${EMBEDDING_MODEL}
- COHERE_API_KEY=${COHERE_API_KEY}
- ENVIRONMENT=${ENVIRONMENT}
ports:
- "3001:3001"
deploy:
resources:
limits:
memory: 4G
reservations:
memory: 2G
- "3000:3000"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3001/health"]
interval: 30s
@ -52,29 +48,34 @@ services:
condition: service_healthy
kong:
condition: service_healthy
web:
image: ghcr.io/buster-so/buster/web:latest
container_name: buster-web
ports:
- "3000:3000"
env_file:
- .env
depends_on:
api:
condition: service_healthy
network_mode: "service:api"
litellm:
image: ghcr.io/berriai/litellm:main-stable
container_name: buster-litellm
restart: always
ports:
- "4001:4001"
env_file:
- .env
environment:
- LITELLM_ENV=local
depends_on:
supavisor:
condition: service_healthy
# Pausing this for local deployments until we can build out better multi-model support.
# litellm:
# image: ghcr.io/berriai/litellm:main-latest
# container_name: buster-litellm
# volumes:
# - ./litellm_vertex_config.yaml:/litellm_vertex_config.yaml
# command: ["--config", "/litellm_vertex_config.yaml", "--port", "4001"]
# ports:
# - "4001:4001"
# healthcheck:
# test: ["CMD", "curl", "-f", "http://localhost:4001/health/readiness"]
# interval: 30s
# timeout: 10s
# retries: 3
# depends_on:
# api:
# condition: service_healthy
volumes:
buster_redis_data:

15
start
View File

@ -1,15 +0,0 @@
#!/bin/bash
echo "Starting Supabase..."
cd supabase
docker compose up -d
echo "Waiting for Supabase to be healthy..."
until curl -s http://localhost:54321/rest/v1/ > /dev/null; do
echo "Waiting for Supabase..."
sleep 5
done
echo "Supabase is ready! Starting main services..."
cd ..
docker compose up

View File

@ -1,15 +0,0 @@
#!/bin/bash
echo "Starting Supabase..."
cd supabase
docker compose up -d
echo "Waiting for Supabase to be healthy..."
until curl -s http://localhost:54321/rest/v1/ > /dev/null; do
echo "Waiting for Supabase..."
sleep 5
done
echo "Supabase is ready! Starting main services..."
cd ..
docker compose up

View File

@ -1,58 +1,86 @@
# General Application Settings
ENVIRONMENT="development"
BUSTER_URL="http://localhost:3000"
BUSTER_WH_TOKEN="buster-wh-token"
# --- API Service Specific ---
# Direct Database Connection (for API service and potentially others)
DATABASE_URL="postgresql://postgres.your-tenant-id:your-super-secret-and-long-postgres-password@supavisor:5432/postgres"
# Pooled Database Connection (for API service, uses Supavisor)
POOLER_URL="postgresql://postgres.your-tenant-id:your-super-secret-and-long-postgres-password@supavisor:5432/postgres"
# Redis Connection
REDIS_URL="redis://buster-redis:6379"
# Supabase Connection for API service
SUPABASE_URL="http://kong:8000"
SUPABASE_SERVICE_ROLE_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q"
# --- LLM / AI Services ---
EMBEDDING_PROVIDER="ollama"
EMBEDDING_MODEL="mxbai-embed-large"
COHERE_API_KEY=""
OPENAI_API_KEY="" # For OpenAI models or Supabase Studio assistant
LLM_API_KEY="test-key"
LLM_BASE_URL="http://buster-litellm:4001"
# --- Web Client (Next.js) Specific ---
NEXT_PUBLIC_API_URL="http://localhost:3001" # External URL for the API service (buster-api)
NEXT_PUBLIC_URL="http://localhost:3000" # External URL for the Web service (buster-web)
NEXT_PUBLIC_SUPABASE_URL="http://kong:8000" # External URL for Supabase (Kong proxy)
NEXT_PUBLIC_WS_URL="ws://localhost:3001"
NEXT_PUBLIC_SUPABASE_ANON_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJhbm9uIiwKICAgICJpc3MiOiAic3VwYWJhc2UtZGVtbyIsCiAgICAiaWF0IjogMTY0MTc2OTIwMCwKICAgICJleHAiOiAxNzk5NTM1NjAwCn0.dc_X5iR_VP_qT0zsiyj_I_OZ2T9FtRU2BBNWN8Bu4GE"
NEXT_PRIVATE_SUPABASE_SERVICE_ROLE_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q"
#################################################
# Supabase Stack Configuration Variables
# These are primarily used by the Supabase services themselves
# (defined in supabase/docker-compose.yml)
# and are sourced from this .env file when `docker compose up` is run.
#################################################
############
# Secrets
# YOU MUST CHANGE THESE BEFORE GOING INTO PRODUCTION
############
POSTGRES_PASSWORD=your-super-secret-and-long-postgres-password
JWT_SECRET=your-super-secret-jwt-token-with-at-least-32-characters-long
ANON_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJhbm9uIiwKICAgICJpc3MiOiAic3VwYWJhc2UtZGVtbyIsCiAgICAiaWF0IjogMTY0MTc2OTIwMCwKICAgICJleHAiOiAxNzk5NTM1NjAwCn0.dc_X5iR_VP_qT0zsiyj_I_OZ2T9FtRU2BBNWN8Bu4GE
SERVICE_ROLE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q
DASHBOARD_USERNAME=supabase
DASHBOARD_PASSWORD=this_password_is_insecure_and_should_be_updated
POSTGRES_PASSWORD="your-super-secret-and-long-postgres-password"
JWT_SECRET="your-super-secret-jwt-token-with-at-least-32-characters-long"
ANON_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJhbm9uIiwKICAgICJpc3MiOiAic3VwYWJhc2UtZGVtbyIsCiAgICAiaWF0IjogMTY0MTc2OTIwMCwKICAgICJleHAiOiAxNzk5NTM1NjAwCn0.dc_X5iR_VP_qT0zsiyj_I_OZ2T9FtRU2BBNWN8Bu4GE"
SERVICE_ROLE_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ey AgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q"
DASHBOARD_USERNAME="supabase"
DASHBOARD_PASSWORD="this_password_is_insecure_and_should_be_updated"
############
# Database - You can change these to any PostgreSQL database that has logical replication enabled.
# Database
############
POSTGRES_HOST=db
POSTGRES_DB=postgres
POSTGRES_PORT=5432
# default user is postgres
POSTGRES_HOST="db"
POSTGRES_DB="postgres"
POSTGRES_PORT="5432"
############
# Supavisor -- Database pooler
############
POOLER_PROXY_PORT_TRANSACTION=6543
POOLER_DEFAULT_POOL_SIZE=20
POOLER_MAX_CLIENT_CONN=100
POOLER_TENANT_ID=your-tenant-id
POOLER_PROXY_PORT_TRANSACTION="6543"
POOLER_DEFAULT_POOL_SIZE="20"
POOLER_MAX_CLIENT_CONN="100"
POOLER_TENANT_ID="your-tenant-id"
############
# API Proxy - Configuration for the Kong Reverse proxy.
# API Proxy - Kong
############
KONG_HTTP_PORT=8000
KONG_HTTPS_PORT=8443
KONG_HTTP_PORT="8000"
KONG_HTTPS_PORT="8443"
############
# API - Configuration for PostgREST.
# API - PostgREST
############
PGRST_DB_SCHEMAS=public,storage,graphql_public
PGRST_DB_SCHEMAS="public,storage,graphql_public"
############
# Auth - Configuration for the GoTrue authentication server.
# Auth - GoTrue
############
## General
SITE_URL=http://localhost:3003
ADDITIONAL_REDIRECT_URLS=
JWT_EXPIRY=3600
DISABLE_SIGNUP=false
API_EXTERNAL_URL=http://localhost:8000
SITE_URL="http://localhost:3000" # Default base URL for the site (used in emails, etc.)
ADDITIONAL_REDIRECT_URLS=""
JWT_EXPIRY="3600"
DISABLE_SIGNUP="false"
API_EXTERNAL_URL="http://localhost:8000" # Publicly accessible URL for the Supabase API (via Kong)
## Mailer Config
MAILER_URLPATHS_CONFIRMATION="/auth/v1/verify"
@ -61,57 +89,41 @@ MAILER_URLPATHS_RECOVERY="/auth/v1/verify"
MAILER_URLPATHS_EMAIL_CHANGE="/auth/v1/verify"
## Email auth
ENABLE_EMAIL_SIGNUP=true
ENABLE_EMAIL_AUTOCONFIRM=false
SMTP_ADMIN_EMAIL=admin@buster.so
SMTP_HOST=supabase-mail
SMTP_PORT=2500
SMTP_USER=
SMTP_PASS=
SMTP_SENDER_NAME=Buster
ENABLE_ANONYMOUS_USERS=true
ENABLE_EMAIL_SIGNUP="true"
ENABLE_EMAIL_AUTOCONFIRM="true"
SMTP_ADMIN_EMAIL="admin@buster.so"
SMTP_HOST="supabase-mail"
SMTP_PORT="2500"
SMTP_USER=""
SMTP_PASS=""
SMTP_SENDER_NAME="Buster"
ENABLE_ANONYMOUS_USERS="true"
## Phone auth
ENABLE_PHONE_SIGNUP=true
ENABLE_PHONE_AUTOCONFIRM=true
ENABLE_PHONE_SIGNUP="true"
ENABLE_PHONE_AUTOCONFIRM="true"
############
# Studio - Configuration for the Dashboard
# Studio - Supabase Dashboard
############
STUDIO_DEFAULT_ORGANIZATION="Default Organization"
STUDIO_DEFAULT_PROJECT="Default Project"
STUDIO_PORT="3003"
SUPABASE_PUBLIC_URL="http://localhost:8000" # Public URL for Supabase (Kong), used by Studio
STUDIO_DEFAULT_ORGANIZATION=Default Organization
STUDIO_DEFAULT_PROJECT=Default Project
STUDIO_PORT=3003
# replace if you intend to use Studio outside of localhost
SUPABASE_PUBLIC_URL=http://localhost:8000
# Enable webp support
IMGPROXY_ENABLE_WEBP_DETECTION=true
# Add your OpenAI API key to enable SQL Editor Assistant
OPENAI_API_KEY=
# Image Proxy
IMGPROXY_ENABLE_WEBP_DETECTION="true"
############
# Functions - Configuration for Functions
# Functions - Supabase Edge Functions
############
# NOTE: VERIFY_JWT applies to all functions. Per-function VERIFY_JWT is not supported yet.
FUNCTIONS_VERIFY_JWT=false
FUNCTIONS_VERIFY_JWT="false"
############
# Logs - Configuration for Logflare
# Please refer to https://supabase.com/docs/reference/self-hosting-analytics/introduction
# Logs - Logflare
############
LOGFLARE_LOGGER_BACKEND_API_KEY=your-super-secret-and-long-logflare-key
# Change vector.toml sinks to reflect this change
LOGFLARE_API_KEY=your-super-secret-and-long-logflare-key
# Docker socket location - this value will differ depending on your OS
DOCKER_SOCKET_LOCATION=/var/run/docker.sock
# Google Cloud Project details
GOOGLE_PROJECT_ID=GOOGLE_PROJECT_ID
GOOGLE_PROJECT_NUMBER=GOOGLE_PROJECT_NUMBER
LOGFLARE_LOGGER_BACKEND_API_KEY="your-super-secret-and-long-logflare-key"
LOGFLARE_API_KEY="your-super-secret-and-long-logflare-key"
DOCKER_SOCKET_LOCATION="/var/run/docker.sock"
GOOGLE_PROJECT_ID="GOOGLE_PROJECT_ID"
GOOGLE_PROJECT_NUMBER="GOOGLE_PROJECT_NUMBER"

View File

@ -16,6 +16,46 @@ services:
- '9000:9000' # web interface
- '1100:1100' # POP3
studio:
container_name: supabase-studio
image: supabase/studio:20241202-71e5240
restart: unless-stopped
healthcheck:
test:
[
"CMD",
"node",
"-e",
"fetch('http://studio:3000/api/profile').then((r) => {if (r.status !== 200) throw new Error(r.status)})"
]
timeout: 10s
interval: 5s
retries: 3
depends_on:
analytics:
condition: service_healthy
environment:
STUDIO_PG_META_URL: http://meta:8080
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
DEFAULT_ORGANIZATION_NAME: ${STUDIO_DEFAULT_ORGANIZATION}
DEFAULT_PROJECT_NAME: ${STUDIO_DEFAULT_PROJECT}
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
SUPABASE_URL: http://kong:8000
SUPABASE_PUBLIC_URL: ${SUPABASE_PUBLIC_URL}
SUPABASE_ANON_KEY: ${ANON_KEY}
SUPABASE_SERVICE_KEY: ${SERVICE_ROLE_KEY}
AUTH_JWT_SECRET: ${JWT_SECRET}
LOGFLARE_API_KEY: ${LOGFLARE_API_KEY}
LOGFLARE_URL: http://analytics:4000
NEXT_PUBLIC_ENABLE_LOGS: true
# Comment to use Big Query backend for analytics
NEXT_ANALYTICS_BACKEND_PROVIDER: postgres
# Uncomment to use Big Query backend for analytics
# NEXT_ANALYTICS_BACKEND_PROVIDER: bigquery
kong:
container_name: supabase-kong
image: kong:2.8.1
@ -147,6 +187,52 @@ services:
PGRST_APP_SETTINGS_JWT_EXP: ${JWT_EXPIRY}
command: "postgrest"
realtime:
# This container name looks inconsistent but is correct because realtime constructs tenant id by parsing the subdomain
container_name: realtime-dev.supabase-realtime
image: supabase/realtime:v2.33.70
depends_on:
db:
# Disable this if you are using an external Postgres database
condition: service_healthy
analytics:
condition: service_healthy
healthcheck:
test:
[
"CMD",
"curl",
"-sSfL",
"--head",
"-o",
"/dev/null",
"-H",
"Authorization: Bearer ${ANON_KEY}",
"http://localhost:4000/api/tenants/realtime-dev/health"
]
timeout: 5s
interval: 5s
retries: 3
restart: unless-stopped
environment:
PORT: 4000
DB_HOST: ${POSTGRES_HOST}
DB_PORT: ${POSTGRES_PORT}
DB_USER: supabase_admin
DB_PASSWORD: ${POSTGRES_PASSWORD}
DB_NAME: ${POSTGRES_DB}
DB_AFTER_CONNECT_QUERY: 'SET search_path TO _realtime'
DB_ENC_KEY: supabaserealtime
API_JWT_SECRET: ${JWT_SECRET}
SECRET_KEY_BASE: UpNVntn3cDxHJpq99YMc1T1AQgQpc8kfYTuRgBiYa15BLrx8etQoXz3gZv1/u2oq
ERL_AFLAGS: -proto_dist inet_tcp
DNS_NODES: "''"
RLIMIT_NOFILE: "10000"
APP_NAME: realtime
SEED_SELF_HOST: true
RUN_JANITOR: true
# To use S3 backed storage: docker compose -f docker-compose.yml -f docker-compose.s3.yml up
storage:
container_name: supabase-storage
image: supabase/storage-api:v1.11.13
@ -206,6 +292,24 @@ services:
volumes:
- ./volumes/storage:/var/lib/storage:z
meta:
container_name: supabase-meta
image: supabase/postgres-meta:v0.84.2
depends_on:
db:
# Disable this if you are using an external Postgres database
condition: service_healthy
analytics:
condition: service_healthy
restart: unless-stopped
environment:
PG_META_PORT: 8080
PG_META_DB_HOST: ${POSTGRES_HOST}
PG_META_DB_PORT: ${POSTGRES_PORT}
PG_META_DB_NAME: ${POSTGRES_DB}
PG_META_DB_USER: supabase_admin
PG_META_DB_PASSWORD: ${POSTGRES_PASSWORD}
functions:
container_name: supabase-edge-functions
image: supabase/edge-runtime:v1.65.3
@ -379,7 +483,7 @@ services:
- POOLER_TENANT_ID=${POOLER_TENANT_ID}
- POOLER_DEFAULT_POOL_SIZE=${POOLER_DEFAULT_POOL_SIZE}
- POOLER_MAX_CLIENT_CONN=${POOLER_MAX_CLIENT_CONN}
- POOLER_POOL_MODE=transaction
- POOLER_POOL_MODE=session
volumes:
- ./volumes/pooler/pooler.exs:/etc/pooler/pooler.exs:ro

7
tag_info.json Normal file
View File

@ -0,0 +1,7 @@
{
"api_tag": "api/v0.1.1", "api_version": "0.1.1"
,
"web_tag": "web/v0.1.1", "web_version": "0.1.1"
,
"cli_tag": "cli/v0.1.1", "cli_version": "0.1.1"
}

4
web/package-lock.json generated
View File

@ -1,12 +1,12 @@
{
"name": "web",
"version": "0.1.0",
"version": "0.1.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "web",
"version": "0.1.0",
"version": "0.1.1",
"dependencies": {
"@dnd-kit/core": "^6.3.1",
"@dnd-kit/modifiers": "^9.0.0",

View File

@ -1,6 +1,6 @@
{
"name": "web",
"version": "0.1.0",
"version": "0.1.1",
"private": true,
"scripts": {
"dev": "next dev --turbo",