Merge pull request #152 from buster-so/dal/cli-tool-bugs

bigquery fine tuning.
2025-02-26 06:51:56 -08:00 · 2025-02-26 06:51:56 -08:00 · e554c74802
parent 1494574a88 89e92cf81f
commit e554c74802
8 changed files with 302 additions and 22 deletions
--- a/.github/workflows/cli-release.yml
+++ b/.github/workflows/cli-release.yml
@ -101,14 +101,20 @@ jobs:
    needs: build
    runs-on: ubuntu-latest
    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
      - name: Download all artifacts
        uses: actions/download-artifact@v4

-      - name: Get version
+      - name: Extract version from Cargo.toml
        id: get_version
        run: |
-          VERSION=0.0.6
+          VERSION=$(grep '^version =' cli/Cargo.toml | sed 's/version = "\(.*\)"/\1/')
          echo "version=$VERSION" >> $GITHUB_OUTPUT
+          echo "Extracted version: $VERSION"

      - name: Create Release
        uses: softprops/action-gh-release@v1
--- a/.github/workflows/version-bump.yml
+++ b/.github/workflows/version-bump.yml
@ -0,0 +1,63 @@
+name: Version Bump
+
+on:
+  pull_request:
+    types: [closed]
+    branches:
+      - main
+    paths:
+      - 'cli/**'
+
+jobs:
+  bump-version:
+    if: github.event.pull_request.merged == true
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Setup Git
+        run: |
+          git config --global user.name "GitHub Actions"
+          git config --global user.email "actions@github.com"
+
+      - name: Determine version bump type
+        id: bump_type
+        run: |
+          PR_TITLE="${{ github.event.pull_request.title }}"
+          PR_BODY="${{ github.event.pull_request.body }}"
+          PR_LABELS="${{ toJson(github.event.pull_request.labels.*.name) }}"
+          
+          if [[ "$PR_TITLE" == *"BREAKING CHANGE"* || "$PR_TITLE" == *"major"* || "$PR_BODY" == *"BREAKING CHANGE"* || "$PR_LABELS" == *"major"* ]]; then
+            echo "type=major" >> $GITHUB_OUTPUT
+            echo "Detected major version bump"
+          elif [[ "$PR_TITLE" == *"feat"* || "$PR_TITLE" == *"feature"* || "$PR_TITLE" == *"minor"* || "$PR_LABELS" == *"minor"* || "$PR_LABELS" == *"feature"* ]]; then
+            echo "type=minor" >> $GITHUB_OUTPUT
+            echo "Detected minor version bump"
+          else
+            echo "type=patch" >> $GITHUB_OUTPUT
+            echo "Detected patch version bump"
+          fi
+
+      - name: Install cargo-bump
+        run: cargo install cargo-bump
+
+      - name: Bump version
+        working-directory: ./cli
+        run: |
+          BUMP_TYPE="${{ steps.bump_type.outputs.type }}"
+          cargo bump $BUMP_TYPE
+          NEW_VERSION=$(grep '^version =' Cargo.toml | sed 's/version = "\(.*\)"/\1/')
+          echo "New version: $NEW_VERSION"
+          echo "new_version=$NEW_VERSION" >> $GITHUB_ENV
+
+      - name: Commit and push version bump
+        run: |
+          git add cli/Cargo.toml
+          git commit -m "Bump version to ${{ env.new_version }} [skip ci]"
+          git push 
--- a/api/src/utils/query_engine/credentials.rs
+++ b/api/src/utils/query_engine/credentials.rs
@ -31,7 +31,6 @@ pub struct AthenaCredentials {
 pub struct BigqueryCredentials {
    pub credentials_json: Value,
    pub project_id: String,
-    pub dataset_ids: Option<Vec<String>>,
 }

 #[derive(Serialize, Deserialize, Debug, Clone)]
--- a/api/src/utils/query_engine/import_datasets.rs
+++ b/api/src/utils/query_engine/import_datasets.rs
@ -233,18 +233,7 @@ async fn get_bigquery_tables_and_views(
        Err(e) => return Err(e),
    };

-    let schema_string = if let Some(datasets) = &credentials.dataset_ids {
-        format!(
-            "IN ({})",
-            datasets
-                .iter()
-                .map(|s| format!("'{}'", s))
-                .collect::<Vec<String>>()
-                .join(", ")
-        )
-    } else {
-        "NOT IN ('INFORMATION_SCHEMA')".to_string()
-    };
+    let schema_string = "NOT IN ('INFORMATION_SCHEMA')".to_string();

    let tables_and_views_query = format!(
        "
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "buster-cli"
-version = "0.0.6"
+version = "0.0.7"
 edition = "2021"
 build = "build.rs"

--- a/cli/VERSIONING.md
+++ b/cli/VERSIONING.md
@ -0,0 +1,52 @@
+# Semantic Versioning for Buster CLI
+
+This project uses automated semantic versioning based on pull request metadata. The version is automatically bumped when a pull request is merged into the main branch.
+
+## How It Works
+
+1. When a pull request is merged into the main branch, a GitHub Action workflow automatically determines the type of version bump needed.
+2. The version in `Cargo.toml` is updated accordingly.
+3. The changes are committed back to the repository.
+4. When a release is created, the version is extracted directly from `Cargo.toml`.
+
+## Version Bump Rules
+
+The type of version bump is determined by the following rules:
+
+### Major Version Bump (X.y.z → X+1.0.0)
+
+A major version bump occurs when:
+- The PR title contains "BREAKING CHANGE" or "major"
+- The PR body contains "BREAKING CHANGE"
+- The PR has a "major" label
+
+### Minor Version Bump (x.Y.z → x.Y+1.0)
+
+A minor version bump occurs when:
+- The PR title contains "feat", "feature", or "minor"
+- The PR has a "minor" or "feature" label
+
+### Patch Version Bump (x.y.Z → x.y.Z+1)
+
+A patch version bump occurs by default when:
+- The PR doesn't match any of the above criteria
+
+## Manual Version Control
+
+If you need to manually control the version:
+
+1. You can add specific labels to your PR:
+   - `major` for a major version bump
+   - `minor` or `feature` for a minor version bump
+   - Any other label will result in a patch version bump
+
+2. You can include specific keywords in your PR title:
+   - "BREAKING CHANGE" or "major" for a major version bump
+   - "feat", "feature", or "minor" for a minor version bump
+
+## Example PR Titles
+
+- `feat: add new command for user management` → Minor version bump
+- `fix: resolve issue with file uploads` → Patch version bump
+- `BREAKING CHANGE: change API response format` → Major version bump
+- `chore: update dependencies` → Patch version bump 
--- a/cli/src/commands/init.rs
+++ b/cli/src/commands/init.rs
@ -12,7 +12,7 @@ use std::time::Duration;

 use crate::utils::{
    buster_credentials::get_and_validate_buster_credentials,
-    profiles::{Credential, PostgresCredentials},
+    profiles::{BigqueryCredentials, Credential, PostgresCredentials},
    BusterClient, BusterConfig, PostDataSourcesRequest,
 };

@ -120,12 +120,15 @@ pub async fn init(destination_path: Option<&str>) -> Result<()> {
        DatabaseType::Postgres => {
            setup_postgres(buster_creds.url, buster_creds.api_key, &config_path).await
        }
+        DatabaseType::BigQuery => {
+            setup_bigquery(buster_creds.url, buster_creds.api_key, &config_path).await
+        }
        _ => {
            println!(
                "{}",
                format!("{} support is coming soon!", db_type).yellow()
            );
-            println!("Currently, only Redshift and Postgres are supported.");
+            println!("Currently, only Redshift, Postgres, and BigQuery are supported.");
            Err(anyhow::anyhow!("Database type not yet implemented"))
        }
    }
@ -374,7 +377,7 @@ async fn setup_postgres(

    // Collect port (with validation)
    let port_str = Text::new("Enter the PostgreSQL port:")
-        .with_default("5432")  // Default Postgres port is 5432
+        .with_default("5432") // Default Postgres port is 5432
        .with_help_message("Default PostgreSQL port is 5432")
        .with_validator(|input: &str| match input.parse::<u16>() {
            Ok(_) => Ok(Validation::Valid),
@ -419,7 +422,7 @@ async fn setup_postgres(
    // Collect schema (optional)
    let schema = Text::new("Enter the PostgreSQL schema (optional):")
        .with_help_message("Leave blank to access all available schemas")
-        .with_default("public")  // Default Postgres schema is usually 'public'
+        .with_default("public") // Default Postgres schema is usually 'public'
        .prompt()?;
    let schema = if schema.trim().is_empty() {
        None
@ -524,6 +527,172 @@ async fn setup_postgres(
    }
 }

+async fn setup_bigquery(
+    buster_url: String,
+    buster_api_key: String,
+    config_path: &Path,
+) -> Result<()> {
+    println!("{}", "Setting up BigQuery connection...".bold().green());
+
+    // Collect name (with validation)
+    let name_regex = Regex::new(r"^[a-zA-Z0-9_-]+$")?;
+    let name = Text::new("Enter a unique name for this data source:")
+        .with_help_message("Only alphanumeric characters, dash (-) and underscore (_) allowed")
+        .with_validator(move |input: &str| {
+            if input.trim().is_empty() {
+                return Ok(Validation::Invalid("Name cannot be empty".into()));
+            }
+            if name_regex.is_match(input) {
+                Ok(Validation::Valid)
+            } else {
+                Ok(Validation::Invalid(
+                    "Name must contain only alphanumeric characters, dash (-) or underscore (_)"
+                        .into(),
+                ))
+            }
+        })
+        .prompt()?;
+
+    // Collect project ID
+    let project_id = Text::new("Enter the Google Cloud project ID:")
+        .with_help_message("Example: my-project-123456")
+        .with_validator(|input: &str| {
+            if input.trim().is_empty() {
+                return Ok(Validation::Invalid("Project ID cannot be empty".into()));
+            }
+            Ok(Validation::Valid)
+        })
+        .prompt()?;
+
+    // Collect dataset ID (optional)
+    let dataset_id = Text::new("Enter the BigQuery dataset ID (optional):")
+        .with_help_message("Leave blank to access all available datasets")
+        .prompt()?;
+    let dataset_id = if dataset_id.trim().is_empty() {
+        None
+    } else {
+        Some(dataset_id)
+    };
+
+    // Collect credentials JSON
+    println!(
+        "\n{}",
+        "BigQuery requires a service account credentials JSON file.".bold()
+    );
+    println!(
+        "You can create one in the Google Cloud Console under IAM & Admin > Service Accounts."
+    );
+
+    let credentials_path = Text::new("Enter the path to your credentials JSON file:")
+        .with_help_message("Example: /path/to/credentials.json")
+        .with_validator(|input: &str| {
+            let path = Path::new(input);
+            if !path.exists() {
+                return Ok(Validation::Invalid("File does not exist".into()));
+            }
+            if !path.is_file() {
+                return Ok(Validation::Invalid("Path is not a file".into()));
+            }
+            Ok(Validation::Valid)
+        })
+        .prompt()?;
+
+    // Read credentials file
+    let credentials_content = match fs::read_to_string(&credentials_path) {
+        Ok(content) => content,
+        Err(e) => {
+            return Err(anyhow::anyhow!("Failed to read credentials file: {}", e));
+        }
+    };
+
+    // Parse JSON to ensure it's valid
+    let credentials_json: serde_yaml::Value = match serde_yaml::from_str(&credentials_content) {
+        Ok(json) => json,
+        Err(e) => {
+            return Err(anyhow::anyhow!("Invalid JSON in credentials file: {}", e));
+        }
+    };
+
+    // Show summary and confirm
+    println!("\n{}", "Connection Summary:".bold());
+    println!("Name: {}", name.cyan());
+    println!("Project ID: {}", project_id.cyan());
+
+    // Display dataset ID with clear indication if it's empty
+    if let Some(ds) = &dataset_id {
+        println!("Dataset ID: {}", ds.cyan());
+    } else {
+        println!("Dataset ID: {}", "All datasets (null)".cyan());
+    }
+
+    println!("Credentials: {}", credentials_path.cyan());
+
+    let confirm = Confirm::new("Do you want to create this data source?")
+        .with_default(true)
+        .prompt()?;
+
+    if !confirm {
+        println!("{}", "Data source creation cancelled.".yellow());
+        return Ok(());
+    }
+
+    // Create API request
+    let request = PostDataSourcesRequest {
+        name: name.clone(),
+        env: "dev".to_string(), // Default to dev environment
+        credential: Credential::Bigquery(BigqueryCredentials {
+            credentials_json,
+            project_id: project_id.clone(),
+            dataset_ids: dataset_id.as_ref().map(|id| vec![id.clone()]),
+        }),
+    };
+
+    // Send to API with progress indicator
+    let spinner = ProgressBar::new_spinner();
+    spinner.set_style(
+        ProgressStyle::default_spinner()
+            .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ ")
+            .template("{spinner:.green} {msg}")
+            .unwrap(),
+    );
+    spinner.set_message("Sending credentials to Buster API...");
+    spinner.enable_steady_tick(Duration::from_millis(100));
+
+    let client = BusterClient::new(buster_url, buster_api_key)?;
+
+    match client.post_data_sources(vec![request]).await {
+        Ok(_) => {
+            spinner.finish_with_message(
+                "✓ Data source created successfully!"
+                    .green()
+                    .bold()
+                    .to_string(),
+            );
+            println!(
+                "\nData source '{}' is now available for use with Buster.",
+                name.cyan()
+            );
+
+            // Create buster.yml file
+            create_buster_config_file(
+                config_path,
+                &name,
+                Some(&project_id),     // Project ID maps to database
+                dataset_id.as_deref(), // Dataset ID maps to schema
+            )?;
+
+            println!("You can now use this data source with other Buster commands.");
+            Ok(())
+        }
+        Err(e) => {
+            spinner.finish_with_message("✗ Failed to create data source".red().bold().to_string());
+            println!("\nError: {}", e);
+            println!("Please check your credentials and try again.");
+            Err(anyhow::anyhow!("Failed to create data source: {}", e))
+        }
+    }
+}
+
 // Helper function to create buster.yml file
 fn create_buster_config_file(
    path: &Path,
--- a/cli/src/utils/exclusion.rs
+++ b/cli/src/utils/exclusion.rs
@ -10,8 +10,10 @@ use walkdir::WalkDir;
 #[derive(Debug, Deserialize, Serialize, Clone)]
 pub struct BusterConfig {
    pub data_source_name: Option<String>,
-    pub schema: Option<String>,
-    pub database: Option<String>,
+    #[serde(alias = "dataset_id")]     // BigQuery alias for schema
+    pub schema: Option<String>,        // For SQL DBs: schema, For BigQuery: dataset ID
+    #[serde(alias = "project_id")]     // BigQuery alias for database
+    pub database: Option<String>,      // For SQL DBs: database, For BigQuery: project ID
    pub exclude_files: Option<Vec<String>>,
    pub exclude_tags: Option<Vec<String>>,
 }