From a70389b4e8f85f85cb2c6f6539ba07394d9fad32 Mon Sep 17 00:00:00 2001 From: dal Date: Wed, 26 Feb 2025 07:45:22 -0700 Subject: [PATCH 1/2] bigquery support --- .github/workflows/cli-release.yml | 2 +- api/src/utils/query_engine/credentials.rs | 1 - api/src/utils/query_engine/import_datasets.rs | 13 +- cli/Cargo.toml | 2 +- cli/src/commands/init.rs | 177 +++++++++++++++++- cli/src/utils/exclusion.rs | 6 +- 6 files changed, 180 insertions(+), 21 deletions(-) diff --git a/.github/workflows/cli-release.yml b/.github/workflows/cli-release.yml index e8090d6d1..a73d8427d 100644 --- a/.github/workflows/cli-release.yml +++ b/.github/workflows/cli-release.yml @@ -107,7 +107,7 @@ jobs: - name: Get version id: get_version run: | - VERSION=0.0.6 + VERSION=0.0.7 echo "version=$VERSION" >> $GITHUB_OUTPUT - name: Create Release diff --git a/api/src/utils/query_engine/credentials.rs b/api/src/utils/query_engine/credentials.rs index fd821d78c..9f54c1f40 100644 --- a/api/src/utils/query_engine/credentials.rs +++ b/api/src/utils/query_engine/credentials.rs @@ -31,7 +31,6 @@ pub struct AthenaCredentials { pub struct BigqueryCredentials { pub credentials_json: Value, pub project_id: String, - pub dataset_ids: Option>, } #[derive(Serialize, Deserialize, Debug, Clone)] diff --git a/api/src/utils/query_engine/import_datasets.rs b/api/src/utils/query_engine/import_datasets.rs index 8809ac769..dacd80cb9 100644 --- a/api/src/utils/query_engine/import_datasets.rs +++ b/api/src/utils/query_engine/import_datasets.rs @@ -233,18 +233,7 @@ async fn get_bigquery_tables_and_views( Err(e) => return Err(e), }; - let schema_string = if let Some(datasets) = &credentials.dataset_ids { - format!( - "IN ({})", - datasets - .iter() - .map(|s| format!("'{}'", s)) - .collect::>() - .join(", ") - ) - } else { - "NOT IN ('INFORMATION_SCHEMA')".to_string() - }; + let schema_string = "NOT IN ('INFORMATION_SCHEMA')".to_string(); let tables_and_views_query = format!( " diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 81fcb7547..16a0ebf16 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "buster-cli" -version = "0.0.6" +version = "0.0.7" edition = "2021" build = "build.rs" diff --git a/cli/src/commands/init.rs b/cli/src/commands/init.rs index 47f749a68..85ecc4201 100644 --- a/cli/src/commands/init.rs +++ b/cli/src/commands/init.rs @@ -12,7 +12,7 @@ use std::time::Duration; use crate::utils::{ buster_credentials::get_and_validate_buster_credentials, - profiles::{Credential, PostgresCredentials}, + profiles::{BigqueryCredentials, Credential, PostgresCredentials}, BusterClient, BusterConfig, PostDataSourcesRequest, }; @@ -120,12 +120,15 @@ pub async fn init(destination_path: Option<&str>) -> Result<()> { DatabaseType::Postgres => { setup_postgres(buster_creds.url, buster_creds.api_key, &config_path).await } + DatabaseType::BigQuery => { + setup_bigquery(buster_creds.url, buster_creds.api_key, &config_path).await + } _ => { println!( "{}", format!("{} support is coming soon!", db_type).yellow() ); - println!("Currently, only Redshift and Postgres are supported."); + println!("Currently, only Redshift, Postgres, and BigQuery are supported."); Err(anyhow::anyhow!("Database type not yet implemented")) } } @@ -374,7 +377,7 @@ async fn setup_postgres( // Collect port (with validation) let port_str = Text::new("Enter the PostgreSQL port:") - .with_default("5432") // Default Postgres port is 5432 + .with_default("5432") // Default Postgres port is 5432 .with_help_message("Default PostgreSQL port is 5432") .with_validator(|input: &str| match input.parse::() { Ok(_) => Ok(Validation::Valid), @@ -419,7 +422,7 @@ async fn setup_postgres( // Collect schema (optional) let schema = Text::new("Enter the PostgreSQL schema (optional):") .with_help_message("Leave blank to access all available schemas") - .with_default("public") // Default Postgres schema is usually 'public' + .with_default("public") // Default Postgres schema is usually 'public' .prompt()?; let schema = if schema.trim().is_empty() { None @@ -524,6 +527,172 @@ async fn setup_postgres( } } +async fn setup_bigquery( + buster_url: String, + buster_api_key: String, + config_path: &Path, +) -> Result<()> { + println!("{}", "Setting up BigQuery connection...".bold().green()); + + // Collect name (with validation) + let name_regex = Regex::new(r"^[a-zA-Z0-9_-]+$")?; + let name = Text::new("Enter a unique name for this data source:") + .with_help_message("Only alphanumeric characters, dash (-) and underscore (_) allowed") + .with_validator(move |input: &str| { + if input.trim().is_empty() { + return Ok(Validation::Invalid("Name cannot be empty".into())); + } + if name_regex.is_match(input) { + Ok(Validation::Valid) + } else { + Ok(Validation::Invalid( + "Name must contain only alphanumeric characters, dash (-) or underscore (_)" + .into(), + )) + } + }) + .prompt()?; + + // Collect project ID + let project_id = Text::new("Enter the Google Cloud project ID:") + .with_help_message("Example: my-project-123456") + .with_validator(|input: &str| { + if input.trim().is_empty() { + return Ok(Validation::Invalid("Project ID cannot be empty".into())); + } + Ok(Validation::Valid) + }) + .prompt()?; + + // Collect dataset ID (optional) + let dataset_id = Text::new("Enter the BigQuery dataset ID (optional):") + .with_help_message("Leave blank to access all available datasets") + .prompt()?; + let dataset_id = if dataset_id.trim().is_empty() { + None + } else { + Some(dataset_id) + }; + + // Collect credentials JSON + println!( + "\n{}", + "BigQuery requires a service account credentials JSON file.".bold() + ); + println!( + "You can create one in the Google Cloud Console under IAM & Admin > Service Accounts." + ); + + let credentials_path = Text::new("Enter the path to your credentials JSON file:") + .with_help_message("Example: /path/to/credentials.json") + .with_validator(|input: &str| { + let path = Path::new(input); + if !path.exists() { + return Ok(Validation::Invalid("File does not exist".into())); + } + if !path.is_file() { + return Ok(Validation::Invalid("Path is not a file".into())); + } + Ok(Validation::Valid) + }) + .prompt()?; + + // Read credentials file + let credentials_content = match fs::read_to_string(&credentials_path) { + Ok(content) => content, + Err(e) => { + return Err(anyhow::anyhow!("Failed to read credentials file: {}", e)); + } + }; + + // Parse JSON to ensure it's valid + let credentials_json: serde_yaml::Value = match serde_yaml::from_str(&credentials_content) { + Ok(json) => json, + Err(e) => { + return Err(anyhow::anyhow!("Invalid JSON in credentials file: {}", e)); + } + }; + + // Show summary and confirm + println!("\n{}", "Connection Summary:".bold()); + println!("Name: {}", name.cyan()); + println!("Project ID: {}", project_id.cyan()); + + // Display dataset ID with clear indication if it's empty + if let Some(ds) = &dataset_id { + println!("Dataset ID: {}", ds.cyan()); + } else { + println!("Dataset ID: {}", "All datasets (null)".cyan()); + } + + println!("Credentials: {}", credentials_path.cyan()); + + let confirm = Confirm::new("Do you want to create this data source?") + .with_default(true) + .prompt()?; + + if !confirm { + println!("{}", "Data source creation cancelled.".yellow()); + return Ok(()); + } + + // Create API request + let request = PostDataSourcesRequest { + name: name.clone(), + env: "dev".to_string(), // Default to dev environment + credential: Credential::Bigquery(BigqueryCredentials { + credentials_json, + project_id: project_id.clone(), + dataset_ids: dataset_id.as_ref().map(|id| vec![id.clone()]), + }), + }; + + // Send to API with progress indicator + let spinner = ProgressBar::new_spinner(); + spinner.set_style( + ProgressStyle::default_spinner() + .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ ") + .template("{spinner:.green} {msg}") + .unwrap(), + ); + spinner.set_message("Sending credentials to Buster API..."); + spinner.enable_steady_tick(Duration::from_millis(100)); + + let client = BusterClient::new(buster_url, buster_api_key)?; + + match client.post_data_sources(vec![request]).await { + Ok(_) => { + spinner.finish_with_message( + "✓ Data source created successfully!" + .green() + .bold() + .to_string(), + ); + println!( + "\nData source '{}' is now available for use with Buster.", + name.cyan() + ); + + // Create buster.yml file + create_buster_config_file( + config_path, + &name, + Some(&project_id), // Project ID maps to database + dataset_id.as_deref(), // Dataset ID maps to schema + )?; + + println!("You can now use this data source with other Buster commands."); + Ok(()) + } + Err(e) => { + spinner.finish_with_message("✗ Failed to create data source".red().bold().to_string()); + println!("\nError: {}", e); + println!("Please check your credentials and try again."); + Err(anyhow::anyhow!("Failed to create data source: {}", e)) + } + } +} + // Helper function to create buster.yml file fn create_buster_config_file( path: &Path, diff --git a/cli/src/utils/exclusion.rs b/cli/src/utils/exclusion.rs index c75bbd4a4..e6bd4d507 100644 --- a/cli/src/utils/exclusion.rs +++ b/cli/src/utils/exclusion.rs @@ -10,8 +10,10 @@ use walkdir::WalkDir; #[derive(Debug, Deserialize, Serialize, Clone)] pub struct BusterConfig { pub data_source_name: Option, - pub schema: Option, - pub database: Option, + #[serde(alias = "dataset_id")] // BigQuery alias for schema + pub schema: Option, // For SQL DBs: schema, For BigQuery: dataset ID + #[serde(alias = "project_id")] // BigQuery alias for database + pub database: Option, // For SQL DBs: database, For BigQuery: project ID pub exclude_files: Option>, pub exclude_tags: Option>, } From 89e92cf81f73dfae053df8b60b751026b5bcc31b Mon Sep 17 00:00:00 2001 From: dal Date: Wed, 26 Feb 2025 07:51:04 -0700 Subject: [PATCH 2/2] ok better version handling --- .github/workflows/cli-release.yml | 10 ++++- .github/workflows/version-bump.yml | 63 ++++++++++++++++++++++++++++++ cli/VERSIONING.md | 52 ++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/version-bump.yml create mode 100644 cli/VERSIONING.md diff --git a/.github/workflows/cli-release.yml b/.github/workflows/cli-release.yml index a73d8427d..a49ec8f29 100644 --- a/.github/workflows/cli-release.yml +++ b/.github/workflows/cli-release.yml @@ -101,14 +101,20 @@ jobs: needs: build runs-on: ubuntu-latest steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Download all artifacts uses: actions/download-artifact@v4 - - name: Get version + - name: Extract version from Cargo.toml id: get_version run: | - VERSION=0.0.7 + VERSION=$(grep '^version =' cli/Cargo.toml | sed 's/version = "\(.*\)"/\1/') echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "Extracted version: $VERSION" - name: Create Release uses: softprops/action-gh-release@v1 diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml new file mode 100644 index 000000000..ae5d671a9 --- /dev/null +++ b/.github/workflows/version-bump.yml @@ -0,0 +1,63 @@ +name: Version Bump + +on: + pull_request: + types: [closed] + branches: + - main + paths: + - 'cli/**' + +jobs: + bump-version: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup Git + run: | + git config --global user.name "GitHub Actions" + git config --global user.email "actions@github.com" + + - name: Determine version bump type + id: bump_type + run: | + PR_TITLE="${{ github.event.pull_request.title }}" + PR_BODY="${{ github.event.pull_request.body }}" + PR_LABELS="${{ toJson(github.event.pull_request.labels.*.name) }}" + + if [[ "$PR_TITLE" == *"BREAKING CHANGE"* || "$PR_TITLE" == *"major"* || "$PR_BODY" == *"BREAKING CHANGE"* || "$PR_LABELS" == *"major"* ]]; then + echo "type=major" >> $GITHUB_OUTPUT + echo "Detected major version bump" + elif [[ "$PR_TITLE" == *"feat"* || "$PR_TITLE" == *"feature"* || "$PR_TITLE" == *"minor"* || "$PR_LABELS" == *"minor"* || "$PR_LABELS" == *"feature"* ]]; then + echo "type=minor" >> $GITHUB_OUTPUT + echo "Detected minor version bump" + else + echo "type=patch" >> $GITHUB_OUTPUT + echo "Detected patch version bump" + fi + + - name: Install cargo-bump + run: cargo install cargo-bump + + - name: Bump version + working-directory: ./cli + run: | + BUMP_TYPE="${{ steps.bump_type.outputs.type }}" + cargo bump $BUMP_TYPE + NEW_VERSION=$(grep '^version =' Cargo.toml | sed 's/version = "\(.*\)"/\1/') + echo "New version: $NEW_VERSION" + echo "new_version=$NEW_VERSION" >> $GITHUB_ENV + + - name: Commit and push version bump + run: | + git add cli/Cargo.toml + git commit -m "Bump version to ${{ env.new_version }} [skip ci]" + git push \ No newline at end of file diff --git a/cli/VERSIONING.md b/cli/VERSIONING.md new file mode 100644 index 000000000..ce3627070 --- /dev/null +++ b/cli/VERSIONING.md @@ -0,0 +1,52 @@ +# Semantic Versioning for Buster CLI + +This project uses automated semantic versioning based on pull request metadata. The version is automatically bumped when a pull request is merged into the main branch. + +## How It Works + +1. When a pull request is merged into the main branch, a GitHub Action workflow automatically determines the type of version bump needed. +2. The version in `Cargo.toml` is updated accordingly. +3. The changes are committed back to the repository. +4. When a release is created, the version is extracted directly from `Cargo.toml`. + +## Version Bump Rules + +The type of version bump is determined by the following rules: + +### Major Version Bump (X.y.z → X+1.0.0) + +A major version bump occurs when: +- The PR title contains "BREAKING CHANGE" or "major" +- The PR body contains "BREAKING CHANGE" +- The PR has a "major" label + +### Minor Version Bump (x.Y.z → x.Y+1.0) + +A minor version bump occurs when: +- The PR title contains "feat", "feature", or "minor" +- The PR has a "minor" or "feature" label + +### Patch Version Bump (x.y.Z → x.y.Z+1) + +A patch version bump occurs by default when: +- The PR doesn't match any of the above criteria + +## Manual Version Control + +If you need to manually control the version: + +1. You can add specific labels to your PR: + - `major` for a major version bump + - `minor` or `feature` for a minor version bump + - Any other label will result in a patch version bump + +2. You can include specific keywords in your PR title: + - "BREAKING CHANGE" or "major" for a major version bump + - "feat", "feature", or "minor" for a minor version bump + +## Example PR Titles + +- `feat: add new command for user management` → Minor version bump +- `fix: resolve issue with file uploads` → Patch version bump +- `BREAKING CHANGE: change API response format` → Major version bump +- `chore: update dependencies` → Patch version bump \ No newline at end of file