diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 19c7a99e7..ddf2156da 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -36,6 +36,7 @@ colored = "3.0" rustls = { version = "0.23", features = ["tls12"] } url = "2.5.0" zip = "2.2.2" +glob = "0.3.1" [dev-dependencies] tempfile = "3.16.0" diff --git a/cli/README.md b/cli/README.md index 6f2a0b2ef..ebe1774b3 100644 --- a/cli/README.md +++ b/cli/README.md @@ -120,8 +120,26 @@ your-project/ # buster.yml data_source_name: "my_warehouse" # Your default data source schema: "analytics" # Default schema for models +database: "prod" # Optional database name +exclude_files: # Optional list of files to exclude from generation + - "temp_*.sql" # Exclude all SQL files starting with temp_ + - "test/**/*.sql" # Exclude all SQL files in test directories + - "customers.sql" # Exclude a specific file ``` +The configuration supports the following fields: +- `data_source_name`: (Required) Default data source for your models +- `schema`: (Required) Default schema for your models +- `database`: (Optional) Default database name +- `exclude_files`: (Optional) List of glob patterns for files to exclude from generation + - Supports standard glob patterns (*, **, ?, etc.) + - Matches against relative paths from source directory + - Common use cases: + - Excluding temporary files: `temp_*.sql` + - Excluding test files: `test/**/*.sql` + - Excluding specific files: `customers.sql` + - Excluding files in directories: `archive/**/*.sql` + ### Model Definition Example ```yaml diff --git a/cli/src/commands/generate.rs b/cli/src/commands/generate.rs index adba69861..f250c2ac5 100644 --- a/cli/src/commands/generate.rs +++ b/cli/src/commands/generate.rs @@ -13,6 +13,7 @@ use crate::utils::{ BusterClient, GenerateApiRequest, GenerateApiResponse, yaml_diff_merger::YamlDiffMerger, }; +use glob; #[derive(Debug)] pub struct GenerateCommand { @@ -21,6 +22,7 @@ pub struct GenerateCommand { data_source_name: Option, schema: Option, database: Option, + config: BusterConfig, } #[derive(Debug)] @@ -74,11 +76,27 @@ pub struct BusterConfig { pub data_source_name: Option, pub schema: Option, pub database: Option, + pub exclude_files: Option>, +} + +impl BusterConfig { + fn validate_exclude_patterns(&self) -> Result<()> { + if let Some(patterns) = &self.exclude_files { + for pattern in patterns { + match glob::Pattern::new(pattern) { + Ok(_) => continue, + Err(e) => return Err(anyhow::anyhow!("Invalid glob pattern '{}': {}", pattern, e)), + } + } + } + Ok(()) + } } struct GenerateProgress { total_files: usize, processed: usize, + excluded: usize, current_file: String, status: String, } @@ -88,6 +106,7 @@ impl GenerateProgress { Self { total_files, processed: 0, + excluded: 0, current_file: String::new(), status: String::new(), } @@ -116,6 +135,11 @@ impl GenerateProgress { fn log_info(&self, info: &str) { println!("ℹ️ {}: {}", self.current_file, info); } + + fn log_excluded(&mut self, file: &str, pattern: &str) { + self.excluded += 1; + println!("⚠️ Skipping {} (matched exclude pattern: {})", file, pattern); + } } impl GenerateCommand { @@ -126,12 +150,20 @@ impl GenerateCommand { schema: Option, database: Option, ) -> Self { + let config = BusterConfig { + data_source_name: data_source_name.clone(), + schema: schema.clone(), + database: database.clone(), + exclude_files: None, + }; + Self { source_path, destination_path, data_source_name, schema, database, + config, } } @@ -147,7 +179,17 @@ impl GenerateCommand { progress.status = "Scanning source directory...".to_string(); progress.log_progress(); - let model_names = self.process_sql_files(&mut progress).await?; + // Create a new command with the loaded config + let cmd = GenerateCommand { + source_path: self.source_path.clone(), + destination_path: self.destination_path.clone(), + data_source_name: self.data_source_name.clone(), + schema: self.schema.clone(), + database: self.database.clone(), + config, // Use the loaded config + }; + + let model_names = cmd.process_sql_files(&mut progress).await?; // Print results println!("\n✅ Successfully processed all files"); @@ -166,9 +208,9 @@ impl GenerateCommand { // Prepare API request let request = GenerateApiRequest { - data_source_name: config.data_source_name.expect("data_source_name is required"), - schema: config.schema.expect("schema is required"), - database: config.database, + data_source_name: cmd.config.data_source_name.expect("data_source_name is required"), + schema: cmd.config.schema.expect("schema is required"), + database: cmd.config.database, model_names: model_names.iter().map(|m| m.name.clone()).collect(), }; @@ -244,7 +286,7 @@ impl GenerateCommand { if buster_yml_path.exists() { println!("✅ Found existing buster.yml"); let content = fs::read_to_string(&buster_yml_path)?; - let config: BusterConfig = serde_yaml::from_str(&content)?; + let mut config: BusterConfig = serde_yaml::from_str(&content)?; // Validate required fields let mut missing_fields = Vec::new(); @@ -262,6 +304,19 @@ impl GenerateCommand { )); } + // Validate exclude patterns if present + if let Err(e) = config.validate_exclude_patterns() { + return Err(anyhow::anyhow!("Invalid exclude_files configuration: {}", e)); + } + + // Log exclude patterns if present + if let Some(patterns) = &config.exclude_files { + println!("ℹ️ Found {} exclude pattern(s):", patterns.len()); + for pattern in patterns { + println!(" - {}", pattern); + } + } + Ok(config) } else { println!("ℹ️ No buster.yml found, creating new configuration"); @@ -292,6 +347,7 @@ impl GenerateCommand { data_source_name: Some(data_source_name), schema: Some(schema), database, + exclude_files: None, }; // Write the config to file @@ -308,6 +364,26 @@ impl GenerateCommand { let mut seen_names: HashMap = HashMap::new(); let mut errors = Vec::new(); + // Compile glob patterns once + let exclude_patterns: Vec = if let Some(patterns) = &self.config.exclude_files { + println!("🔍 Found exclude patterns: {:?}", patterns); + patterns.iter() + .filter_map(|p| match glob::Pattern::new(p) { + Ok(pattern) => { + println!("✅ Compiled pattern: {}", p); + Some(pattern) + } + Err(e) => { + progress.log_warning(&format!("Invalid exclude pattern '{}': {}", p, e)); + None + } + }) + .collect() + } else { + println!("ℹ️ No exclude patterns found"); + Vec::new() + }; + // Get list of SQL files first to set total let sql_files: Vec<_> = fs::read_dir(&self.source_path)? .filter_map(|entry| entry.ok()) @@ -325,17 +401,37 @@ impl GenerateCommand { for entry in sql_files { progress.processed += 1; - progress.current_file = entry - .file_name() - .to_str() - .unwrap_or("unknown") - .to_string(); + let file_path = entry.path(); + + // Get the relative path from the source directory + let relative_path = file_path.strip_prefix(&self.source_path) + .unwrap_or(&file_path) + .to_string_lossy() + .into_owned(); + + progress.current_file = relative_path.clone(); + progress.status = "Checking exclusions...".to_string(); + progress.log_progress(); + + println!("🔍 Checking file: {}", relative_path); + // Check if file matches any exclude pattern + if let Some(matching_pattern) = exclude_patterns.iter() + .find(|p| { + let matches = p.matches(&relative_path); + println!(" - Testing pattern '{}' against '{}': {}", p.as_str(), relative_path, matches); + matches + }) { + println!("⛔ Excluding file: {} (matched pattern: {})", relative_path, matching_pattern.as_str()); + progress.log_excluded(&relative_path, matching_pattern.as_str()); + continue; + } + progress.status = "Processing file...".to_string(); progress.log_progress(); - match self.process_single_sql_file(&entry.path()).await { + match self.process_single_sql_file(&file_path).await { Ok(model_name) => { - // Check for duplicates + println!("📝 Processing model: {} from file: {}", model_name.name, relative_path); if let Some(existing) = seen_names.get(&model_name.name) { errors.push(GenerateError::DuplicateModelName { name: model_name.name, @@ -359,6 +455,17 @@ impl GenerateCommand { } } + // Print final model list for debugging + println!("\n📋 Final model list:"); + for model in &names { + println!(" - {} (from {})", model.name, model.source_file.display()); + } + + // Update final summary with exclusion information + if progress.excluded > 0 { + println!("\nℹ️ Excluded {} files based on patterns", progress.excluded); + } + if !errors.is_empty() { // Log all errors println!("\n❌ Encountered errors during processing:");