diff --git a/.github/workflows/docker-release.yml b/.github/workflows/docker-release.yml index bce380d86..675f8b8f2 100644 --- a/.github/workflows/docker-release.yml +++ b/.github/workflows/docker-release.yml @@ -94,6 +94,16 @@ jobs: with: ref: ${{ github.sha }} + - name: Docker meta for API + id: meta_api + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }} + tags: | + type=semver,pattern={{version}},value=${{ env.API_VERSION }} + type=sha,format=short + type=raw,value=latest + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -105,16 +115,32 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push API image + id: build_api_image_platform uses: useblacksmith/build-push-action@v1 with: context: ./api file: ./api/Dockerfile push: true platforms: ${{ matrix.docker_platform }} - tags: | - ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }}:${{ env.API_VERSION }}-${{ matrix.platform }} - ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }}:${{ github.sha }}-${{ matrix.platform }} - ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }}:latest-${{ matrix.platform }} + tags: ${{ steps.meta_api.outputs.tags }} + labels: ${{ steps.meta_api.outputs.labels }} + outputs: type=image,name=${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }},push-by-digest=true,name-canonical=true + + - name: Export API digest + run: | + mkdir -p ${{ runner.temp }}/digests + digest_full="${{ steps.build_api_image_platform.outputs.digest }}" + digest_sha="${digest_full#sha256:}" + echo "Digest SHA for API ${{ matrix.platform }}: ${digest_sha}" + echo "${digest_sha}" > "${{ runner.temp }}/digests/api-${{ matrix.platform }}.sha" + + - name: Upload API digest file + uses: actions/upload-artifact@v4 + with: + name: api-digest-${{ matrix.platform }} + path: ${{ runner.temp }}/digests/api-${{ matrix.platform }}.sha + if-no-files-found: error + retention-days: 1 - name: Set API Package Visibility to Public env: @@ -160,6 +186,16 @@ jobs: with: ref: ${{ github.sha }} + - name: Docker meta for Web + id: meta_web + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }} + tags: | + type=semver,pattern={{version}},value=${{ env.WEB_VERSION }} + type=sha,format=short + type=raw,value=latest + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -171,16 +207,16 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push Web image + id: build_web_image_platform uses: useblacksmith/build-push-action@v1 with: context: ./web file: ./web/Dockerfile push: true platforms: ${{ matrix.docker_platform }} - tags: | - ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }}:${{ env.WEB_VERSION }}-${{ matrix.platform }} - ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }}:${{ github.sha }}-${{ matrix.platform }} - ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }}:latest-${{ matrix.platform }} + tags: ${{ steps.meta_web.outputs.tags }} + labels: ${{ steps.meta_web.outputs.labels }} + outputs: type=image,name=${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }},push-by-digest=true,name-canonical=true build-args: | NEXT_PUBLIC_API_URL=${{ secrets.NEXT_PUBLIC_API_URL }} NEXT_PUBLIC_URL=${{ secrets.NEXT_PUBLIC_URL }} @@ -188,6 +224,22 @@ jobs: NEXT_PUBLIC_SUPABASE_ANON_KEY=${{ secrets.NEXT_PUBLIC_SUPABASE_ANON_KEY }} NEXT_PUBLIC_WEB_SOCKET_URL=${{ secrets.NEXT_PUBLIC_WEB_SOCKET_URL }} + - name: Export Web digest + run: | + mkdir -p ${{ runner.temp }}/digests + digest_full="${{ steps.build_web_image_platform.outputs.digest }}" + digest_sha="${digest_full#sha256:}" + echo "Digest SHA for Web ${{ matrix.platform }}: ${digest_sha}" + echo "${digest_sha}" > "${{ runner.temp }}/digests/web-${{ matrix.platform }}.sha" + + - name: Upload Web digest file + uses: actions/upload-artifact@v4 + with: + name: web-digest-${{ matrix.platform }} + path: ${{ runner.temp }}/digests/web-${{ matrix.platform }}.sha + if-no-files-found: error + retention-days: 1 + - name: Set Web Package Visibility to Public env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -206,3 +258,134 @@ jobs: echo "Failed to set package $ORG_NAME/${{ env.WEB_IMAGE_NAME }} visibility to public. HTTP Status: $RESPONSE_CODE" # Optionally, fail the step: exit 1 fi + + merge_api_manifests: + name: Merge API Manifests + runs-on: blacksmith-4vcpu-ubuntu-2204 + needs: [prepare_docker_release_info, build_and_push_api] + if: needs.prepare_docker_release_info.outputs.api_version_found == 'true' + steps: + - name: Download API digests + uses: actions/download-artifact@v4 + with: + path: ${{ runner.temp }}/all_api_digests + pattern: api-digest-* + merge-multiple: true + + - name: Log in to Docker Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Docker meta for API Manifest + id: meta_api_manifest + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }} + tags: | + type=semver,pattern={{version}},value=${{ needs.prepare_docker_release_info.outputs.api_version }} + type=sha,format=short + type=raw,value=latest + # Ensure DOCKER_METADATA_OUTPUT_JSON is populated for the next step + # outputs: | + # json + + - name: Create and push API manifest list + env: + API_IMAGE_FULL_NAME: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.API_IMAGE_NAME }} + working-directory: ${{ runner.temp }}/all_api_digests + run: | + echo "Listing downloaded API digests in $(pwd):" + ls -lR . + + TAG_ARGS=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") + echo "Generated tag arguments for API manifest: $TAG_ARGS" + + DIGEST_FILES_FOUND=$(find . -type f -name '*.sha' -print) + if [ -z "$DIGEST_FILES_FOUND" ]; then + echo "Error: No API digest files (*.sha) found." + exit 1 + fi + + IMAGE_PLUS_DIGEST_ARGS="" + for digest_file_path in $DIGEST_FILES_FOUND; do + sha_value=$(cat "$digest_file_path") + IMAGE_PLUS_DIGEST_ARGS="$IMAGE_PLUS_DIGEST_ARGS ${API_IMAGE_FULL_NAME}@sha256:${sha_value}" + done + echo "API Manifest images with digests: $IMAGE_PLUS_DIGEST_ARGS" + + if [ -z "$IMAGE_PLUS_DIGEST_ARGS" ]; then + echo "Error: No API digests were processed to create the manifest." + exit 1 + fi + docker buildx imagetools create $TAG_ARGS $IMAGE_PLUS_DIGEST_ARGS + + merge_web_manifests: + name: Merge Web Manifests + runs-on: blacksmith-4vcpu-ubuntu-2204 + needs: [prepare_docker_release_info, build_and_push_web] + if: needs.prepare_docker_release_info.outputs.web_version_found == 'true' + steps: + - name: Download Web digests + uses: actions/download-artifact@v4 + with: + path: ${{ runner.temp }}/all_web_digests + pattern: web-digest-* + merge-multiple: true + + - name: Log in to Docker Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Docker meta for Web Manifest + id: meta_web_manifest + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }} + tags: | + type=semver,pattern={{version}},value=${{ needs.prepare_docker_release_info.outputs.web_version }} + type=sha,format=short + type=raw,value=latest + # outputs: | + # json + + - name: Create and push Web manifest list + env: + WEB_IMAGE_FULL_NAME: ${{ env.DOCKER_REGISTRY_OWNER }}/${{ env.WEB_IMAGE_NAME }} + working-directory: ${{ runner.temp }}/all_web_digests + run: | + echo "Listing downloaded Web digests in $(pwd):" + ls -lR . + + TAG_ARGS=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") + echo "Generated tag arguments for Web manifest: $TAG_ARGS" + + DIGEST_FILES_FOUND=$(find . -type f -name '*.sha' -print) + if [ -z "$DIGEST_FILES_FOUND" ]; then + echo "Error: No Web digest files (*.sha) found." + exit 1 + fi + + IMAGE_PLUS_DIGEST_ARGS="" + for digest_file_path in $DIGEST_FILES_FOUND; do + sha_value=$(cat "$digest_file_path") + IMAGE_PLUS_DIGEST_ARGS="$IMAGE_PLUS_DIGEST_ARGS ${WEB_IMAGE_FULL_NAME}@sha256:${sha_value}" + done + echo "Web Manifest images with digests: $IMAGE_PLUS_DIGEST_ARGS" + + if [ -z "$IMAGE_PLUS_DIGEST_ARGS" ]; then + echo "Error: No Web digests were processed to create the manifest." + exit 1 + fi + docker buildx imagetools create $TAG_ARGS $IMAGE_PLUS_DIGEST_ARGS diff --git a/api/libs/agents/Cargo.toml b/api/libs/agents/Cargo.toml index af9b3de18..5b029498d 100644 --- a/api/libs/agents/Cargo.toml +++ b/api/libs/agents/Cargo.toml @@ -36,6 +36,7 @@ tokio-retry = { workspace = true } thiserror = { workspace = true } raindrop = { path = "../raindrop" } sql_analyzer = { path = "../sql_analyzer" } +rerank = { path = "../rerank" } # Development dependencies [dev-dependencies] diff --git a/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs b/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs index fb681e729..84978eb26 100644 --- a/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs +++ b/api/libs/agents/src/tools/categories/file_tools/search_data_catalog.rs @@ -27,6 +27,7 @@ use uuid::Uuid; use dataset_security::{get_permissioned_datasets, PermissionedDataset}; use sqlx::PgPool; use stored_values; +use rerank::Reranker; use crate::{agent::Agent, tools::ToolExecutor}; @@ -883,25 +884,29 @@ async fn rerank_datasets( if documents.is_empty() || all_datasets.is_empty() { return Ok(vec![]); } - let co = Cohere::default(); - let request = ReRankRequest { - query, - documents, - model: ReRankModel::EnglishV3, - top_n: Some(35), - ..Default::default() - }; + // Initialize your custom reranker + let reranker = Reranker::new() + .map_err(|e| anyhow::anyhow!("Failed to initialize custom Reranker: {}", e))?; - let rerank_results = match co.rerank(&request).await { + // Convert documents from Vec to Vec<&str> for the rerank library + let doc_slices: Vec<&str> = documents.iter().map(AsRef::as_ref).collect(); + + // Define top_n, e.g., 35 as used with Cohere + let top_n = 35; + + // Call your custom reranker's rerank method + let rerank_results = match reranker.rerank(query, &doc_slices, top_n).await { Ok(results) => results, Err(e) => { - error!(error = %e, query = query, "Cohere rerank API call failed"); - return Err(anyhow::anyhow!("Cohere rerank failed: {}", e)); + error!(error = %e, query = query, "Custom reranker API call failed"); + return Err(anyhow::anyhow!("Custom reranker failed: {}", e)); } }; let mut ranked_datasets = Vec::new(); + // The structure of RerankResult from your library (index, relevance_score) + // is compatible with the existing loop logic. for result in rerank_results { if let Some(dataset) = all_datasets.get(result.index as usize) { ranked_datasets.push(RankedDataset { @@ -909,17 +914,19 @@ async fn rerank_datasets( }); } else { error!( - "Invalid dataset index {} from Cohere for query '{}'. Max index: {}", + "Invalid dataset index {} from custom reranker for query '{}'. Max index: {}", result.index, query, - all_datasets.len() - 1 + all_datasets.len().saturating_sub(1) // Avoid panic on empty all_datasets (though guarded above) ); } } - let relevant_datasets = ranked_datasets.into_iter().collect::>(); - - Ok(relevant_datasets) + // The original code collected into Vec<_> then returned. This is fine. + // let relevant_datasets = ranked_datasets.into_iter().collect::>(); + // Ok(relevant_datasets) + // Simpler: + Ok(ranked_datasets) } async fn llm_filter_helper( diff --git a/cli/cli/src/commands/run.rs b/cli/cli/src/commands/run.rs index bce1471f1..3b39a89a1 100644 --- a/cli/cli/src/commands/run.rs +++ b/cli/cli/src/commands/run.rs @@ -288,8 +288,10 @@ Stderr: .arg("docker-compose.yml") .arg("up") .arg("-d") - .arg("--pull") // Ensure latest images are pulled - .arg("--force-recreate"); // Recreate containers even if config hasn't changed + .arg("--pull") + .arg("always") + .arg("--force-recreate") + .arg("--remove-orphans"); let up_output = up_cmd.output().map_err(|e| BusterError::CommandError(format!("Failed to execute docker compose up: {}", e)))?; diff --git a/docker-compose.yml b/docker-compose.yml index 537b46609..5ce16d86e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,7 +16,7 @@ services: retries: 30 api: - image: ghcr.io/buster-so/buster/api:latest-arm64 + image: ghcr.io/buster-so/buster/api:latest container_name: buster-api env_file: - .env @@ -50,7 +50,7 @@ services: condition: service_healthy web: - image: ghcr.io/buster-so/buster/web:latest-arm64 + image: ghcr.io/buster-so/buster/web:latest container_name: buster-web env_file: - .env