From fac8e8b673b8f11596faef88fb9d4dbb1da12bf8 Mon Sep 17 00:00:00 2001 From: dal Date: Wed, 21 May 2025 23:58:48 -0700 Subject: [PATCH 1/4] Staging (#330) * Create a better handler for clicking favorites * chore(versions): bump api to v0.1.9; bump web to v0.1.9; bump cli to v0.1.9 [skip ci] * chore: update tag_info.json with potential release versions [skip ci] * Create a better handler for clicking favorites * update chat favorites * chore(versions): bump api to v0.1.10; bump web to v0.1.10; bump cli to v0.1.10 [skip ci] * chore: update tag_info.json with potential release versions [skip ci] * Update tests to be ran with multiple workers * create chat records update * Create createChatRecord.test.ts * chore(versions): bump api to v0.1.11; bump web to v0.1.11; bump cli to v0.1.11 [skip ci] * chore: update tag_info.json with potential release versions [skip ci] * fix yesterday bucket * add fast embed rerank for local deployment (#329) * add fast embed rerank for local * chore(versions): bump api to v0.1.12; bump web to v0.1.12; bump cli to v0.1.12 [skip ci] * chore: update tag_info.json with potential release versions [skip ci] --------- Co-authored-by: github-actions[bot] --------- Co-authored-by: Nate Kelley Co-authored-by: github-actions[bot] Co-authored-by: Nate Kelley <133379588+nate-kelley-buster@users.noreply.github.com> --- .gitignore | 2 ++ api/Cargo.toml | 1 + api/libs/rerank/Cargo.toml | 2 ++ api/libs/rerank/src/lib.rs | 60 +++++++++++++++++++++++++++++++++++--- api/server/Cargo.toml | 3 +- api/server/src/main.rs | 21 +++++++++---- cli/cli/Cargo.toml | 2 +- tag_info.json | 6 ++-- web/package-lock.json | 4 +-- web/package.json | 2 +- 10 files changed, 86 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index b77fb323c..d86e05386 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,8 @@ crash.log crash.*.log +.fastembed_cache/ + # Exclude all .tfvars files, which are likely to contain sensitive data, such as # password, private keys, and other secrets. These should not be part of version # control as they are data points which are potentially sensitive and subject diff --git a/api/Cargo.toml b/api/Cargo.toml index 07532fa64..354ab1e98 100644 --- a/api/Cargo.toml +++ b/api/Cargo.toml @@ -110,6 +110,7 @@ diesel_migrations = "2.0.0" html-escape = "0.2.13" tokio-cron-scheduler = "0.13.0" tokio-retry = "0.3.0" +fastembed = "4.8.0" [profile.release] debug = false diff --git a/api/libs/rerank/Cargo.toml b/api/libs/rerank/Cargo.toml index 44adfc306..52ad7369c 100644 --- a/api/libs/rerank/Cargo.toml +++ b/api/libs/rerank/Cargo.toml @@ -7,6 +7,8 @@ edition = "2021" reqwest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +dotenv = { workspace = true } +fastembed = "4.8.0" [dev-dependencies] dotenv = { workspace = true } diff --git a/api/libs/rerank/src/lib.rs b/api/libs/rerank/src/lib.rs index 67177db37..95fb716b9 100644 --- a/api/libs/rerank/src/lib.rs +++ b/api/libs/rerank/src/lib.rs @@ -1,25 +1,40 @@ use reqwest::Client; use serde::{Deserialize, Serialize}; use std::error::Error; +use dotenv::dotenv; +use std::env; pub struct Reranker { api_key: String, base_url: String, model: String, client: Client, + environment: String, } impl Reranker { pub fn new() -> Result> { - let api_key = std::env::var("RERANK_API_KEY")?; - let model = std::env::var("RERANK_MODEL")?; - let base_url = std::env::var("RERANK_BASE_URL")?; + dotenv().ok(); + let environment = env::var("ENVIRONMENT").unwrap_or_else(|_| "production".to_string()); + + // If local environment, we don't need these values + let (api_key, model, base_url) = if environment == "local" { + (String::new(), String::new(), String::new()) + } else { + ( + env::var("RERANK_API_KEY")?, + env::var("RERANK_MODEL")?, + env::var("RERANK_BASE_URL")?, + ) + }; + let client = Client::new(); Ok(Self { api_key, base_url, model, client, + environment, }) } @@ -29,6 +44,12 @@ impl Reranker { documents: &[&str], top_n: usize, ) -> Result, Box> { + // Use local fastembed reranking if ENVIRONMENT is set to local + if self.environment == "local" { + return self.local_rerank(query, documents, top_n).await; + } + + // Otherwise use the remote API let request_body = RerankRequest { query: query.to_string(), documents: documents.iter().map(|s| s.to_string()).collect(), @@ -45,6 +66,37 @@ impl Reranker { let response_body: RerankResponse = response.json().await?; Ok(response_body.results) } + + async fn local_rerank( + &self, + query: &str, + documents: &[&str], + top_n: usize, + ) -> Result, Box> { + use fastembed::{TextRerank, RerankInitOptions, RerankerModel}; + + // Initialize the reranker model + let model = TextRerank::try_new( + RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn).with_show_download_progress(true), + )?; + + // Limit top_n to the number of documents + let actual_top_n = std::cmp::min(top_n, documents.len()); + + // Perform reranking + let fastembed_results = model.rerank(query, documents.to_vec(),false, Some(actual_top_n))?; + + // Convert fastembed results to our RerankResult format + let results = fastembed_results + .iter() + .map(|result| RerankResult { + index: result.index, + relevance_score: result.score, + }) + .collect(); + + Ok(results) + } } #[derive(Serialize)] @@ -60,7 +112,7 @@ struct RerankResponse { results: Vec, } -#[derive(Deserialize)] +#[derive(Deserialize, Clone, Debug)] pub struct RerankResult { pub index: usize, pub relevance_score: f32, diff --git a/api/server/Cargo.toml b/api/server/Cargo.toml index eb29bced4..a3e8305fb 100644 --- a/api/server/Cargo.toml +++ b/api/server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "buster_server" -version = "0.1.11" +version = "0.1.12" edition = "2021" default-run = "buster_server" @@ -39,6 +39,7 @@ tower-http = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } uuid = { workspace = true } +fastembed = { workspace = true } # Local dependencies handlers = { path = "../libs/handlers" } diff --git a/api/server/src/main.rs b/api/server/src/main.rs index 550fae76c..7b799fe1d 100644 --- a/api/server/src/main.rs +++ b/api/server/src/main.rs @@ -5,12 +5,15 @@ use std::env; use std::sync::Arc; use std::time::Duration; -use axum::{Extension, Router, extract::Request}; -use middleware::{cors::cors, error::{init_sentry, sentry_layer, init_tracing_subscriber}}; +use axum::{extract::Request, Extension, Router}; use database::{self, pool::init_pools}; use diesel::{Connection, PgConnection}; use diesel_migrations::{embed_migrations, EmbeddedMigrations, MigrationHarness}; use dotenv::dotenv; +use middleware::{ + cors::cors, + error::{init_sentry, init_tracing_subscriber, sentry_layer}, +}; use rustls::crypto::ring; use stored_values::jobs::trigger_stale_sync_jobs; use tokio::sync::broadcast; @@ -19,6 +22,7 @@ use tower::ServiceBuilder; use tower_http::{compression::CompressionLayer, trace::TraceLayer}; use tracing::{error, info, warn}; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; +use fastembed::{InitOptions, RerankInitOptions, RerankerModel, TextRerank}; pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!(); @@ -30,6 +34,13 @@ async fn main() -> Result<(), anyhow::Error> { let environment = env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()); let is_development = environment == "development"; + if environment == "local" { + let options = + RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn).with_show_download_progress(true); + let model = TextRerank::try_new(options)?; + println!("Model loaded and ready!"); + } + ring::default_provider() .install_default() .expect("Failed to install default crypto provider"); @@ -43,9 +54,9 @@ async fn main() -> Result<(), anyhow::Error> { let log_level = env::var("LOG_LEVEL") .unwrap_or_else(|_| "warn".to_string()) .to_uppercase(); - - let env_filter = EnvFilter::try_from_default_env() - .unwrap_or_else(|_| EnvFilter::new(log_level)); + + let env_filter = + EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(log_level)); // Initialize the tracing subscriber with Sentry integration using our middleware helper init_tracing_subscriber(env_filter); diff --git a/cli/cli/Cargo.toml b/cli/cli/Cargo.toml index 0a4a090a3..7b49afda0 100644 --- a/cli/cli/Cargo.toml +++ b/cli/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "buster-cli" -version = "0.1.11" +version = "0.1.12" edition = "2021" build = "build.rs" diff --git a/tag_info.json b/tag_info.json index 42c6a92ba..41185f5f4 100644 --- a/tag_info.json +++ b/tag_info.json @@ -1,7 +1,7 @@ { - "api_tag": "api/v0.1.11", "api_version": "0.1.11" + "api_tag": "api/v0.1.12", "api_version": "0.1.12" , - "web_tag": "web/v0.1.11", "web_version": "0.1.11" + "web_tag": "web/v0.1.12", "web_version": "0.1.12" , - "cli_tag": "cli/v0.1.11", "cli_version": "0.1.11" + "cli_tag": "cli/v0.1.12", "cli_version": "0.1.12" } diff --git a/web/package-lock.json b/web/package-lock.json index ed39a8010..d9b21b3da 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -1,12 +1,12 @@ { "name": "web", - "version": "0.1.11", + "version": "0.1.12", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "web", - "version": "0.1.11", + "version": "0.1.12", "dependencies": { "@dnd-kit/core": "^6.3.1", "@dnd-kit/modifiers": "^9.0.0", diff --git a/web/package.json b/web/package.json index b7da90768..bf10b1d22 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "web", - "version": "0.1.11", + "version": "0.1.12", "private": true, "scripts": { "dev": "next dev --turbo", From 5e7c8fc6b1b18cf830c0cbd7c0a79200e5ac2823 Mon Sep 17 00:00:00 2001 From: dal Date: Tue, 3 Jun 2025 17:51:51 -0600 Subject: [PATCH 3/4] upgrade fastembed --- api/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/Cargo.toml b/api/Cargo.toml index 354ab1e98..289d7ba32 100644 --- a/api/Cargo.toml +++ b/api/Cargo.toml @@ -110,7 +110,7 @@ diesel_migrations = "2.0.0" html-escape = "0.2.13" tokio-cron-scheduler = "0.13.0" tokio-retry = "0.3.0" -fastembed = "4.8.0" +fastembed = "4.9.0" [profile.release] debug = false From 993929720e83f10b857e831beae241e81e8aa602 Mon Sep 17 00:00:00 2001 From: dal Date: Tue, 3 Jun 2025 18:01:53 -0600 Subject: [PATCH 4/4] rip out fastembed due to build errors --- api/Cargo.toml | 1 - api/libs/rerank/Cargo.toml | 1 - api/libs/rerank/src/lib.rs | 55 ++++---------------------------------- api/server/Cargo.toml | 1 - api/server/src/main.rs | 8 ------ 5 files changed, 5 insertions(+), 61 deletions(-) diff --git a/api/Cargo.toml b/api/Cargo.toml index 289d7ba32..07532fa64 100644 --- a/api/Cargo.toml +++ b/api/Cargo.toml @@ -110,7 +110,6 @@ diesel_migrations = "2.0.0" html-escape = "0.2.13" tokio-cron-scheduler = "0.13.0" tokio-retry = "0.3.0" -fastembed = "4.9.0" [profile.release] debug = false diff --git a/api/libs/rerank/Cargo.toml b/api/libs/rerank/Cargo.toml index 52ad7369c..2c0b57863 100644 --- a/api/libs/rerank/Cargo.toml +++ b/api/libs/rerank/Cargo.toml @@ -8,7 +8,6 @@ reqwest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } dotenv = { workspace = true } -fastembed = "4.8.0" [dev-dependencies] dotenv = { workspace = true } diff --git a/api/libs/rerank/src/lib.rs b/api/libs/rerank/src/lib.rs index 95fb716b9..5411bb394 100644 --- a/api/libs/rerank/src/lib.rs +++ b/api/libs/rerank/src/lib.rs @@ -9,24 +9,15 @@ pub struct Reranker { base_url: String, model: String, client: Client, - environment: String, } impl Reranker { pub fn new() -> Result> { dotenv().ok(); - let environment = env::var("ENVIRONMENT").unwrap_or_else(|_| "production".to_string()); - // If local environment, we don't need these values - let (api_key, model, base_url) = if environment == "local" { - (String::new(), String::new(), String::new()) - } else { - ( - env::var("RERANK_API_KEY")?, - env::var("RERANK_MODEL")?, - env::var("RERANK_BASE_URL")?, - ) - }; + let api_key = env::var("RERANK_API_KEY")?; + let model = env::var("RERANK_MODEL")?; + let base_url = env::var("RERANK_BASE_URL")?; let client = Client::new(); Ok(Self { @@ -34,7 +25,6 @@ impl Reranker { base_url, model, client, - environment, }) } @@ -44,18 +34,13 @@ impl Reranker { documents: &[&str], top_n: usize, ) -> Result, Box> { - // Use local fastembed reranking if ENVIRONMENT is set to local - if self.environment == "local" { - return self.local_rerank(query, documents, top_n).await; - } - - // Otherwise use the remote API let request_body = RerankRequest { query: query.to_string(), documents: documents.iter().map(|s| s.to_string()).collect(), top_n, model: self.model.clone(), }; + let response = self .client .post(&self.base_url) @@ -63,40 +48,10 @@ impl Reranker { .json(&request_body) .send() .await?; + let response_body: RerankResponse = response.json().await?; Ok(response_body.results) } - - async fn local_rerank( - &self, - query: &str, - documents: &[&str], - top_n: usize, - ) -> Result, Box> { - use fastembed::{TextRerank, RerankInitOptions, RerankerModel}; - - // Initialize the reranker model - let model = TextRerank::try_new( - RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn).with_show_download_progress(true), - )?; - - // Limit top_n to the number of documents - let actual_top_n = std::cmp::min(top_n, documents.len()); - - // Perform reranking - let fastembed_results = model.rerank(query, documents.to_vec(),false, Some(actual_top_n))?; - - // Convert fastembed results to our RerankResult format - let results = fastembed_results - .iter() - .map(|result| RerankResult { - index: result.index, - relevance_score: result.score, - }) - .collect(); - - Ok(results) - } } #[derive(Serialize)] diff --git a/api/server/Cargo.toml b/api/server/Cargo.toml index 9eadbf44b..8d54a687d 100644 --- a/api/server/Cargo.toml +++ b/api/server/Cargo.toml @@ -39,7 +39,6 @@ tower-http = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } uuid = { workspace = true } -fastembed = { workspace = true } # Local dependencies handlers = { path = "../libs/handlers" } diff --git a/api/server/src/main.rs b/api/server/src/main.rs index 7b799fe1d..674d82524 100644 --- a/api/server/src/main.rs +++ b/api/server/src/main.rs @@ -22,7 +22,6 @@ use tower::ServiceBuilder; use tower_http::{compression::CompressionLayer, trace::TraceLayer}; use tracing::{error, info, warn}; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; -use fastembed::{InitOptions, RerankInitOptions, RerankerModel, TextRerank}; pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!(); @@ -34,13 +33,6 @@ async fn main() -> Result<(), anyhow::Error> { let environment = env::var("ENVIRONMENT").unwrap_or_else(|_| "development".to_string()); let is_development = environment == "development"; - if environment == "local" { - let options = - RerankInitOptions::new(RerankerModel::JINARerankerV1TurboEn).with_show_download_progress(true); - let model = TextRerank::try_new(options)?; - println!("Model loaded and ready!"); - } - ring::default_provider() .install_default() .expect("Failed to install default crypto provider");