diff --git a/apps/framework-cli/src/cli/routines/mod.rs b/apps/framework-cli/src/cli/routines/mod.rs index c25017e6c..e43ad7e13 100644 --- a/apps/framework-cli/src/cli/routines/mod.rs +++ b/apps/framework-cli/src/cli/routines/mod.rs @@ -504,7 +504,7 @@ pub async fn start_development_mode( let changed = externally_managed.iter().any(|t| { if let Some(remote_table) = tables.get(&t.name) { - !compute_table_columns_diff(t, remote_table).is_empty() + !compute_table_columns_diff(t, remote_table, &[]).is_empty() || !remote_table.order_by_equals(t) || t.engine != remote_table.engine } else { diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 6d6cae619..2f68cc9e3 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -1790,7 +1790,8 @@ impl InfrastructureMap { ); } else { // Compute the basic diff components - let mut column_changes = compute_table_columns_diff(table, target_table); + let mut column_changes = + compute_table_columns_diff(table, target_table, ignore_ops); // For DeletionProtected tables, filter out destructive column changes if target_table.life_cycle == LifeCycle::DeletionProtected @@ -2009,7 +2010,7 @@ impl InfrastructureMap { return None; } - let column_changes = compute_table_columns_diff(table, target_table); + let column_changes = compute_table_columns_diff(table, target_table, &[]); let order_by_changed = !table.order_by_equals(target_table); let order_by_change = if order_by_changed { @@ -2078,7 +2079,7 @@ impl InfrastructureMap { return None; } - let mut column_changes = compute_table_columns_diff(table, target_table); + let mut column_changes = compute_table_columns_diff(table, target_table, &[]); // For DeletionProtected tables, filter out destructive column changes if target_table.life_cycle == LifeCycle::DeletionProtected { @@ -2769,21 +2770,40 @@ fn ttl_expressions_are_equivalent(before: &Option, after: &Option bool { - // Check all non-data_type and non-ttl and non-codec fields first - if before.name != after.name - || before.required != after.required - || before.unique != after.unique +fn columns_are_equivalent( + before: &Column, + after: &Column, + ignore_ops: &[crate::infrastructure::olap::clickhouse::IgnorableOperation], +) -> bool { + use crate::infrastructure::olap::clickhouse::{ + diff_strategy::{column_types_are_equivalent, normalize_column_for_low_cardinality_ignore}, + IgnorableOperation, + }; + + // Check if we should ignore LowCardinality differences + let ignore_low_cardinality = + ignore_ops.contains(&IgnorableOperation::IgnoreStringLowCardinalityDifferences); + + // Normalize columns if ignore flag is set + let normalized_before = + normalize_column_for_low_cardinality_ignore(before, ignore_low_cardinality); + let normalized_after = + normalize_column_for_low_cardinality_ignore(after, ignore_low_cardinality); + + // Check all non-data_type and non-ttl fields first + if normalized_before.name != normalized_after.name + || normalized_before.required != normalized_after.required + || normalized_before.unique != normalized_after.unique // primary_key change is handled at the table level - || before.default != after.default - || before.annotations != after.annotations - || before.comment != after.comment + || normalized_before.default != normalized_after.default + || normalized_before.annotations != normalized_after.annotations + || normalized_before.comment != normalized_after.comment { return false; } // Special handling for TTL comparison: normalize both expressions before comparing - if !ttl_expressions_are_equivalent(&before.ttl, &after.ttl) { + if !ttl_expressions_are_equivalent(&normalized_before.ttl, &normalized_after.ttl) { return false; } @@ -2795,8 +2815,11 @@ fn columns_are_equivalent(before: &Column, after: &Column) -> bool { // Use ClickHouse-specific semantic comparison for data types // This handles special cases like enums and JSON types with order-independent typed_paths - use crate::infrastructure::olap::clickhouse::diff_strategy::column_types_are_equivalent; - column_types_are_equivalent(&before.data_type, &after.data_type) + column_types_are_equivalent( + &normalized_before.data_type, + &normalized_after.data_type, + ignore_low_cardinality, + ) } /// Check if two topics are equal, ignoring metadata @@ -2865,7 +2888,11 @@ fn api_endpoints_equal_ignore_metadata(a: &ApiEndpoint, b: &ApiEndpoint) -> bool /// /// # Returns /// A vector of `ColumnChange` objects describing the differences -pub fn compute_table_columns_diff(before: &Table, after: &Table) -> Vec { +pub fn compute_table_columns_diff( + before: &Table, + after: &Table, + ignore_ops: &[crate::infrastructure::olap::clickhouse::IgnorableOperation], +) -> Vec { let mut diff = Vec::new(); // Create a HashMap of the 'before' columns: O(n) @@ -2879,7 +2906,7 @@ pub fn compute_table_columns_diff(before: &Table, after: &Table) -> Vec { @@ -3421,7 +3448,7 @@ mod diff_tests { codec: None, }); - let diff = compute_table_columns_diff(&before, &after); + let diff = compute_table_columns_diff(&before, &after, &[]); assert_eq!(diff.len(), 1, "Expected one change"); match &diff[0] { ColumnChange::Updated { @@ -3521,7 +3548,7 @@ mod diff_tests { }, ]); - let diff = compute_table_columns_diff(&before, &after); + let diff = compute_table_columns_diff(&before, &after, &[]); assert_eq!(diff.len(), 3, "Expected three changes"); // Count each type of change @@ -3678,7 +3705,7 @@ mod diff_tests { codec: None, }); - let diff = compute_table_columns_diff(&before, &after); + let diff = compute_table_columns_diff(&before, &after, &[]); assert_eq!(diff.len(), 1, "Expected one change"); match &diff[0] { ColumnChange::Updated { @@ -3753,7 +3780,7 @@ mod diff_tests { }, ]); - let diff = compute_table_columns_diff(&before, &after); + let diff = compute_table_columns_diff(&before, &after, &[]); assert!( diff.is_empty(), "Expected no changes despite reordered columns" @@ -3788,7 +3815,7 @@ mod diff_tests { col.data_type = ColumnType::BigInt; } - let diff = compute_table_columns_diff(&before, &after); + let diff = compute_table_columns_diff(&before, &after, &[]); assert_eq!(diff.len(), 1, "Expected one change in large table"); } @@ -3857,7 +3884,7 @@ mod diff_tests { }); } - let diff = compute_table_columns_diff(&before, &after); + let diff = compute_table_columns_diff(&before, &after, &[]); assert_eq!( diff.len(), @@ -3903,7 +3930,7 @@ mod diff_tests { codec: None, }); - let diff = compute_table_columns_diff(&before, &after); + let diff = compute_table_columns_diff(&before, &after, &[]); assert_eq!( diff.len(), 1, @@ -3984,7 +4011,7 @@ mod diff_tests { codec: None, }); - let diff = compute_table_columns_diff(&before, &after); + let diff = compute_table_columns_diff(&before, &after, &[]); assert_eq!(diff.len(), 2, "Expected changes for edge case columns"); } @@ -4009,17 +4036,17 @@ mod diff_tests { codec: None, }; let col2 = col1.clone(); - assert!(columns_are_equivalent(&col1, &col2)); + assert!(columns_are_equivalent(&col1, &col2, &[])); // Test 2: Different names should not be equivalent let mut col3 = col1.clone(); col3.name = "different".to_string(); - assert!(!columns_are_equivalent(&col1, &col3)); + assert!(!columns_are_equivalent(&col1, &col3, &[])); // Test 2b: Different comments should not be equivalent let mut col_with_comment = col1.clone(); col_with_comment.comment = Some("User documentation".to_string()); - assert!(!columns_are_equivalent(&col1, &col_with_comment)); + assert!(!columns_are_equivalent(&col1, &col_with_comment, &[])); // Test 3: String enum from TypeScript vs integer enum from ClickHouse let typescript_enum_col = Column { @@ -4075,7 +4102,8 @@ mod diff_tests { // These should be equivalent due to the enum semantic comparison assert!(columns_are_equivalent( &clickhouse_enum_col, - &typescript_enum_col + &typescript_enum_col, + &[] )); // Test 4: Different enum values should not be equivalent @@ -4100,7 +4128,8 @@ mod diff_tests { assert!(!columns_are_equivalent( &typescript_enum_col, - &different_enum_col + &different_enum_col, + &[] )); // Test 5: Non-enum types should use standard equality @@ -4130,7 +4159,7 @@ mod diff_tests { codec: None, }; - assert!(!columns_are_equivalent(&int_col1, &int_col2)); + assert!(!columns_are_equivalent(&int_col1, &int_col2, &[])); } #[test] @@ -4186,7 +4215,7 @@ mod diff_tests { }; // These should be equivalent - order of typed_paths doesn't matter - assert!(columns_are_equivalent(&json_col1, &json_col2)); + assert!(columns_are_equivalent(&json_col1, &json_col2, &[])); // Test: Different typed_paths should not be equivalent let json_col3 = Column { @@ -4211,7 +4240,7 @@ mod diff_tests { codec: None, }; - assert!(!columns_are_equivalent(&json_col1, &json_col3)); + assert!(!columns_are_equivalent(&json_col1, &json_col3, &[])); // Test: Different max_dynamic_paths should not be equivalent let json_col4 = Column { @@ -4237,7 +4266,7 @@ mod diff_tests { codec: None, }; - assert!(!columns_are_equivalent(&json_col1, &json_col4)); + assert!(!columns_are_equivalent(&json_col1, &json_col4, &[])); } #[test] @@ -4315,7 +4344,11 @@ mod diff_tests { }; // These should be equivalent - order doesn't matter at any level - assert!(columns_are_equivalent(&nested_json_col1, &nested_json_col2)); + assert!(columns_are_equivalent( + &nested_json_col1, + &nested_json_col2, + &[] + )); } #[test] @@ -4418,7 +4451,8 @@ mod diff_tests { // These should be equivalent - name difference doesn't matter if structure matches assert!(columns_are_equivalent( &col_with_generated_name, - &col_with_user_name + &col_with_user_name, + &[] )); // Test: Different column structures should not be equivalent @@ -4455,7 +4489,8 @@ mod diff_tests { assert!(!columns_are_equivalent( &col_with_user_name, - &col_different_structure + &col_different_structure, + &[] )); } @@ -4609,7 +4644,7 @@ mod diff_tests { }; // These should be equivalent - name differences at all levels don't matter - assert!(columns_are_equivalent(&col_generated, &col_user)); + assert!(columns_are_equivalent(&col_generated, &col_user, &[])); } #[test] @@ -4638,7 +4673,11 @@ mod diff_tests { codec: Some("ZSTD(3)".to_string()), ..base_col.clone() }; - assert!(columns_are_equivalent(&col_with_codec1, &col_with_codec2)); + assert!(columns_are_equivalent( + &col_with_codec1, + &col_with_codec2, + &[] + )); // Test 2: Columns with different codecs should not be equivalent let col_with_different_codec = Column { @@ -4647,11 +4686,12 @@ mod diff_tests { }; assert!(!columns_are_equivalent( &col_with_codec1, - &col_with_different_codec + &col_with_different_codec, + &[] )); // Test 3: Column with codec vs column without codec should not be equivalent - assert!(!columns_are_equivalent(&col_with_codec1, &base_col)); + assert!(!columns_are_equivalent(&col_with_codec1, &base_col, &[])); // Test 4: Columns with codec chains should be detected as different let col_with_chain1 = Column { @@ -4662,7 +4702,11 @@ mod diff_tests { codec: Some("Delta, ZSTD".to_string()), ..base_col.clone() }; - assert!(!columns_are_equivalent(&col_with_chain1, &col_with_chain2)); + assert!(!columns_are_equivalent( + &col_with_chain1, + &col_with_chain2, + &[] + )); // Test 5: Codec with different compression levels should be detected as different let col_zstd3 = Column { @@ -4673,7 +4717,7 @@ mod diff_tests { codec: Some("ZSTD(9)".to_string()), ..base_col.clone() }; - assert!(!columns_are_equivalent(&col_zstd3, &col_zstd9)); + assert!(!columns_are_equivalent(&col_zstd3, &col_zstd9, &[])); // Test 6: Normalized codec comparison - user "Delta" vs ClickHouse "Delta(4)" let col_user_delta = Column { @@ -4684,7 +4728,7 @@ mod diff_tests { codec: Some("Delta(4)".to_string()), ..base_col.clone() }; - assert!(columns_are_equivalent(&col_user_delta, &col_ch_delta)); + assert!(columns_are_equivalent(&col_user_delta, &col_ch_delta, &[])); // Test 7: Normalized codec comparison - user "Gorilla" vs ClickHouse "Gorilla(8)" let col_user_gorilla = Column { @@ -4695,7 +4739,11 @@ mod diff_tests { codec: Some("Gorilla(8)".to_string()), ..base_col.clone() }; - assert!(columns_are_equivalent(&col_user_gorilla, &col_ch_gorilla)); + assert!(columns_are_equivalent( + &col_user_gorilla, + &col_ch_gorilla, + &[] + )); // Test 8: Normalized chain comparison - "Delta, LZ4" vs "Delta(4), LZ4" let col_user_chain = Column { @@ -4706,7 +4754,7 @@ mod diff_tests { codec: Some("Delta(4), LZ4".to_string()), ..base_col.clone() }; - assert!(columns_are_equivalent(&col_user_chain, &col_ch_chain)); + assert!(columns_are_equivalent(&col_user_chain, &col_ch_chain, &[])); } } @@ -6004,4 +6052,164 @@ mod diff_orchestration_worker_tests { assert!(removed_found, "Python worker removal not detected"); assert!(added_found, "Typescript worker addition not detected"); } + + #[test] + fn test_ignore_string_low_cardinality_differences_integration() { + use crate::framework::core::infrastructure::table::{ + Column, ColumnType, IntType, OrderBy, Table, + }; + use crate::framework::core::partial_infrastructure_map::LifeCycle; + use crate::framework::versions::Version; + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + use crate::infrastructure::olap::clickhouse::IgnorableOperation; + + // Create two tables that differ only in LowCardinality annotations + let table_with_low_cardinality = Table { + name: "test_table".to_string(), + database: None, + cluster_name: None, + columns: vec![ + Column { + name: "id".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + annotations: vec![("LowCardinality".to_string(), serde_json::json!(true))], + comment: None, + ttl: None, + codec: None, + }, + Column { + name: "name".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + codec: None, + }, + ], + order_by: OrderBy::Fields(vec!["id".to_string()]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + version: Some(Version::from_string("1.0.0".to_string())), + source_primitive: PrimitiveSignature { + name: "test".to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + primary_key_expression: None, + }; + + let table_without_low_cardinality = Table { + name: "test_table".to_string(), + database: None, + cluster_name: None, + columns: vec![ + Column { + name: "id".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + annotations: vec![], // No LowCardinality annotation + comment: None, + ttl: None, + codec: None, + }, + Column { + name: "name".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + codec: None, + }, + ], + order_by: OrderBy::Fields(vec!["id".to_string()]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + version: Some(Version::from_string("1.0.0".to_string())), + source_primitive: PrimitiveSignature { + name: "test".to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + primary_key_expression: None, + }; + + // Test 1: Without ignore flag, should detect difference + let diff_without_ignore = compute_table_columns_diff( + &table_with_low_cardinality, + &table_without_low_cardinality, + &[], + ); + assert_eq!( + diff_without_ignore.len(), + 1, + "Expected one column change without ignore flag" + ); + + // Test 2: With ignore flag, should detect no difference + let ignore_ops = vec![IgnorableOperation::IgnoreStringLowCardinalityDifferences]; + let diff_with_ignore = compute_table_columns_diff( + &table_with_low_cardinality, + &table_without_low_cardinality, + &ignore_ops, + ); + assert_eq!( + diff_with_ignore.len(), + 0, + "Expected no column changes with ignore flag" + ); + + // Test 3: Test with columns_are_equivalent directly + assert!(!columns_are_equivalent( + &table_with_low_cardinality.columns[0], + &table_without_low_cardinality.columns[0], + &[] + )); + assert!(columns_are_equivalent( + &table_with_low_cardinality.columns[0], + &table_without_low_cardinality.columns[0], + &ignore_ops + )); + + // Test 4: Ensure other differences are still detected + let mut table_with_different_type = table_without_low_cardinality.clone(); + table_with_different_type.columns[0].data_type = ColumnType::Int(IntType::Int64); + + let diff_different_type = compute_table_columns_diff( + &table_with_low_cardinality, + &table_with_different_type, + &ignore_ops, + ); + assert_eq!( + diff_different_type.len(), + 1, + "Should still detect actual type differences even with ignore flag" + ); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index a308ce62c..f17fd33f9 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -170,7 +170,11 @@ pub fn enums_are_equivalent(actual: &DataEnum, target: &DataEnum) -> bool { /// /// # Returns /// `true` if the Nested types are semantically equivalent, `false` otherwise -pub fn nested_are_equivalent(actual: &Nested, target: &Nested) -> bool { +pub fn nested_are_equivalent( + actual: &Nested, + target: &Nested, + ignore_low_cardinality: bool, +) -> bool { // First check direct equality (fast path) if actual == target { return true; @@ -190,22 +194,32 @@ pub fn nested_are_equivalent(actual: &Nested, target: &Nested) -> bool { // Note: We assume columns are in the same order. If ClickHouse reorders nested columns, // we may need to add order-independent comparison here as well. for (actual_col, target_col) in actual.columns.iter().zip(target.columns.iter()) { + // Normalize columns to handle LowCardinality comparison when requested + let normalized_actual = + normalize_column_for_low_cardinality_ignore(actual_col, ignore_low_cardinality); + let normalized_target = + normalize_column_for_low_cardinality_ignore(target_col, ignore_low_cardinality); + // Use columns_are_equivalent for full semantic comparison // We need to be careful here to avoid infinite recursion // So we'll do a simpler comparison for now - if actual_col.name != target_col.name - || actual_col.required != target_col.required - || actual_col.unique != target_col.unique - || actual_col.default != target_col.default - || actual_col.annotations != target_col.annotations - || actual_col.comment != target_col.comment - || actual_col.ttl != target_col.ttl + if normalized_actual.name != normalized_target.name + || normalized_actual.required != normalized_target.required + || normalized_actual.unique != normalized_target.unique + || normalized_actual.default != normalized_target.default + || normalized_actual.annotations != normalized_target.annotations + || normalized_actual.comment != normalized_target.comment + || normalized_actual.ttl != normalized_target.ttl { return false; } // Recursively compare data types - if !column_types_are_equivalent(&actual_col.data_type, &target_col.data_type) { + if !column_types_are_equivalent( + &actual_col.data_type, + &target_col.data_type, + ignore_low_cardinality, + ) { return false; } } @@ -218,23 +232,29 @@ pub fn nested_are_equivalent(actual: &Nested, target: &Nested) -> bool { /// This is used for comparing nested types within JsonOptions, handling special cases /// like enums, nested JSON types, and Nested column types. Also recursively handles /// container types (Array, Nullable, Map, NamedTuple) to ensure nested comparisons work. +/// When `ignore_low_cardinality` is true, treats String and LowCardinality(String) as equivalent. /// /// # Arguments /// * `a` - The first ColumnType to compare /// * `b` - The second ColumnType to compare +/// * `ignore_low_cardinality` - Whether to consider String and LowCardinality(String) equivalent /// /// # Returns /// `true` if the ColumnTypes are semantically equivalent, `false` otherwise -pub fn column_types_are_equivalent(a: &ColumnType, b: &ColumnType) -> bool { +pub fn column_types_are_equivalent( + a: &ColumnType, + b: &ColumnType, + ignore_low_cardinality: bool, +) -> bool { match (a, b) { (ColumnType::Enum(a_enum), ColumnType::Enum(b_enum)) => { enums_are_equivalent(a_enum, b_enum) } (ColumnType::Json(a_opts), ColumnType::Json(b_opts)) => { - json_options_are_equivalent(a_opts, b_opts) + json_options_are_equivalent(a_opts, b_opts, ignore_low_cardinality) } (ColumnType::Nested(a_nested), ColumnType::Nested(b_nested)) => { - nested_are_equivalent(a_nested, b_nested) + nested_are_equivalent(a_nested, b_nested, ignore_low_cardinality) } // Recursively handle Array types ( @@ -246,10 +266,13 @@ pub fn column_types_are_equivalent(a: &ColumnType, b: &ColumnType) -> bool { element_type: b_elem, element_nullable: b_nullable, }, - ) => a_nullable == b_nullable && column_types_are_equivalent(a_elem, b_elem), + ) => { + a_nullable == b_nullable + && column_types_are_equivalent(a_elem, b_elem, ignore_low_cardinality) + } // Recursively handle Nullable types (ColumnType::Nullable(a_inner), ColumnType::Nullable(b_inner)) => { - column_types_are_equivalent(a_inner, b_inner) + column_types_are_equivalent(a_inner, b_inner, ignore_low_cardinality) } // Recursively handle Map types ( @@ -261,7 +284,10 @@ pub fn column_types_are_equivalent(a: &ColumnType, b: &ColumnType) -> bool { key_type: b_key, value_type: b_val, }, - ) => column_types_are_equivalent(a_key, b_key) && column_types_are_equivalent(a_val, b_val), + ) => { + column_types_are_equivalent(a_key, b_key, ignore_low_cardinality) + && column_types_are_equivalent(a_val, b_val, ignore_low_cardinality) + } // Recursively handle NamedTuple types (ColumnType::NamedTuple(a_fields), ColumnType::NamedTuple(b_fields)) => { if a_fields.len() != b_fields.len() { @@ -271,7 +297,8 @@ pub fn column_types_are_equivalent(a: &ColumnType, b: &ColumnType) -> bool { .iter() .zip(b_fields.iter()) .all(|((a_name, a_type), (b_name, b_type))| { - a_name == b_name && column_types_are_equivalent(a_type, b_type) + a_name == b_name + && column_types_are_equivalent(a_type, b_type, ignore_low_cardinality) }) } // For all other types, use standard equality @@ -279,6 +306,33 @@ pub fn column_types_are_equivalent(a: &ColumnType, b: &ColumnType) -> bool { } } +/// Normalizes a column for LowCardinality ignore comparisons. +/// +/// When `ignore_low_cardinality` is true, this strips LowCardinality annotations +/// from the column to allow String and LowCardinality(String) to be considered equivalent. +/// +/// # Arguments +/// * `column` - The column to normalize +/// * `ignore_low_cardinality` - Whether to strip LowCardinality annotations +/// +/// # Returns +/// A normalized copy of the column with LowCardinality annotations stripped if requested +pub fn normalize_column_for_low_cardinality_ignore( + column: &Column, + ignore_low_cardinality: bool, +) -> Column { + if !ignore_low_cardinality { + return column.clone(); + } + + let mut normalized = column.clone(); + // Strip LowCardinality annotations + normalized + .annotations + .retain(|(key, _)| key != "LowCardinality"); + normalized +} + /// Checks if two JsonOptions are semantically equivalent. /// /// This is important because the order of `typed_paths` shouldn't matter for equivalence. @@ -288,10 +342,15 @@ pub fn column_types_are_equivalent(a: &ColumnType, b: &ColumnType) -> bool { /// # Arguments /// * `a` - The first JsonOptions to compare /// * `b` - The second JsonOptions to compare +/// * `ignore_low_cardinality` - Whether to consider String and LowCardinality(String) equivalent /// /// # Returns /// `true` if the JsonOptions are semantically equivalent, `false` otherwise -pub fn json_options_are_equivalent(a: &JsonOptions, b: &JsonOptions) -> bool { +pub fn json_options_are_equivalent( + a: &JsonOptions, + b: &JsonOptions, + ignore_low_cardinality: bool, +) -> bool { // First check direct equality (fast path) if a == b { return true; @@ -315,7 +374,7 @@ pub fn json_options_are_equivalent(a: &JsonOptions, b: &JsonOptions) -> bool { // We need semantic comparison for the types to handle nested JSON for (a_path, a_type) in &a.typed_paths { let found = b.typed_paths.iter().any(|(b_path, b_type)| { - a_path == b_path && column_types_are_equivalent(a_type, b_type) + a_path == b_path && column_types_are_equivalent(a_type, b_type, ignore_low_cardinality) }); if !found { return false; @@ -2220,4 +2279,304 @@ mod tests { // Should NOT trigger drop+create - both are the same function, just with/without outer parens assert_eq!(changes.len(), 0); } + + #[test] + fn test_normalize_column_for_low_cardinality_ignore_when_disabled() { + use crate::framework::core::infrastructure::table::{Column, ColumnType}; + + // Create a column with LowCardinality annotation + let column_with_low_cardinality = Column { + name: "test_col".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![("LowCardinality".to_string(), serde_json::json!(true))], + comment: None, + ttl: None, + codec: None, + }; + + // When ignore_low_cardinality is false, column should be unchanged + let normalized = + normalize_column_for_low_cardinality_ignore(&column_with_low_cardinality, false); + assert_eq!(normalized.annotations.len(), 1); + assert_eq!(normalized.annotations[0].0, "LowCardinality"); + assert_eq!(normalized.annotations[0].1, serde_json::json!(true)); + } + + #[test] + fn test_normalize_column_for_low_cardinality_ignore_when_enabled() { + use crate::framework::core::infrastructure::table::{Column, ColumnType}; + + // Create a column with LowCardinality and other annotations + let column_with_annotations = Column { + name: "test_col".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![ + ("LowCardinality".to_string(), serde_json::json!(true)), + ("other_annotation".to_string(), serde_json::json!("value")), + ], + comment: None, + ttl: None, + codec: None, + }; + + // When ignore_low_cardinality is true, only LowCardinality annotation should be stripped + let normalized = + normalize_column_for_low_cardinality_ignore(&column_with_annotations, true); + assert_eq!(normalized.annotations.len(), 1); + assert_eq!(normalized.annotations[0].0, "other_annotation"); + assert_eq!(normalized.annotations[0].1, serde_json::json!("value")); + } + + #[test] + fn test_normalize_column_for_low_cardinality_ignore_no_annotations() { + use crate::framework::core::infrastructure::table::{Column, ColumnType}; + + // Create a column without any annotations + let column_without_annotations = Column { + name: "test_col".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + codec: None, + }; + + // Should work fine with no annotations + let normalized = + normalize_column_for_low_cardinality_ignore(&column_without_annotations, true); + assert_eq!(normalized.annotations.len(), 0); + } + + #[test] + fn test_column_types_are_equivalent_with_ignore_low_cardinality_disabled() { + use crate::framework::core::infrastructure::table::{ColumnType, IntType}; + + let string_type = ColumnType::String; + let int_type = ColumnType::Int(IntType::Int32); + + // When ignore_low_cardinality is false, basic types should work as before + assert!(column_types_are_equivalent( + &string_type, + &string_type, + false + )); + assert!(!column_types_are_equivalent(&string_type, &int_type, false)); + } + + #[test] + fn test_column_types_are_equivalent_with_ignore_low_cardinality_enabled() { + use crate::framework::core::infrastructure::table::{ColumnType, IntType}; + + let string_type = ColumnType::String; + let int_type = ColumnType::Int(IntType::Int32); + + // Basic functionality should still work when ignore_low_cardinality is true + assert!(column_types_are_equivalent( + &string_type, + &string_type, + true + )); + assert!(!column_types_are_equivalent(&string_type, &int_type, true)); + } + + #[test] + fn test_column_types_are_equivalent_with_nested_array_types() { + use crate::framework::core::infrastructure::table::{ColumnType, IntType}; + + let array_string = ColumnType::Array { + element_type: Box::new(ColumnType::String), + element_nullable: false, + }; + let array_int = ColumnType::Array { + element_type: Box::new(ColumnType::Int(IntType::Int32)), + element_nullable: false, + }; + + // Test that nested comparison works correctly with ignore flag + assert!(column_types_are_equivalent( + &array_string, + &array_string, + true + )); + assert!(!column_types_are_equivalent( + &array_string, + &array_int, + true + )); + } + + #[test] + fn test_column_types_are_equivalent_with_nullable_types() { + use crate::framework::core::infrastructure::table::{ColumnType, IntType}; + + let nullable_string = ColumnType::Nullable(Box::new(ColumnType::String)); + let nullable_int = ColumnType::Nullable(Box::new(ColumnType::Int(IntType::Int32))); + + // Test that nested comparison works correctly with ignore flag + assert!(column_types_are_equivalent( + &nullable_string, + &nullable_string, + true + )); + assert!(!column_types_are_equivalent( + &nullable_string, + &nullable_int, + true + )); + } + + #[test] + fn test_column_types_are_equivalent_with_map_types() { + use crate::framework::core::infrastructure::table::{ColumnType, IntType}; + + let map_string_string = ColumnType::Map { + key_type: Box::new(ColumnType::String), + value_type: Box::new(ColumnType::String), + }; + let map_string_int = ColumnType::Map { + key_type: Box::new(ColumnType::String), + value_type: Box::new(ColumnType::Int(IntType::Int32)), + }; + + // Test that nested comparison works correctly with ignore flag + assert!(column_types_are_equivalent( + &map_string_string, + &map_string_string, + true + )); + assert!(!column_types_are_equivalent( + &map_string_string, + &map_string_int, + true + )); + } + + #[test] + fn test_json_options_are_equivalent_with_ignore_low_cardinality() { + use crate::framework::core::infrastructure::table::{ColumnType, IntType, JsonOptions}; + + let json_opts_1 = JsonOptions { + max_dynamic_paths: Some(100), + max_dynamic_types: Some(50), + skip_paths: vec![], + skip_regexps: vec![], + typed_paths: vec![ + ("path1".to_string(), ColumnType::String), + ("path2".to_string(), ColumnType::Int(IntType::Int32)), + ], + }; + + let json_opts_2 = JsonOptions { + max_dynamic_paths: Some(100), + max_dynamic_types: Some(50), + skip_paths: vec![], + skip_regexps: vec![], + typed_paths: vec![ + ("path2".to_string(), ColumnType::Int(IntType::Int32)), + ("path1".to_string(), ColumnType::String), // Different order + ], + }; + + // Should be equivalent regardless of order and ignore flag value + assert!(json_options_are_equivalent( + &json_opts_1, + &json_opts_2, + false + )); + assert!(json_options_are_equivalent( + &json_opts_1, + &json_opts_2, + true + )); + } + + #[test] + fn test_nested_are_equivalent_with_ignore_low_cardinality() { + use crate::framework::core::infrastructure::table::{Column, ColumnType, Nested}; + + // Test 1: Identical nested types (existing test case) + let nested_1 = Nested { + name: "test_nested".to_string(), + jwt: false, + columns: vec![Column { + name: "field1".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + codec: None, + }], + }; + + let nested_2 = nested_1.clone(); + + // Should be equivalent with any ignore flag value + assert!(nested_are_equivalent(&nested_1, &nested_2, false)); + assert!(nested_are_equivalent(&nested_1, &nested_2, true)); + + // Test 2: Edge case - one column has LowCardinality annotation, other doesn't + let nested_with_low_card = Nested { + name: "test_nested".to_string(), + jwt: false, + columns: vec![Column { + name: "field1".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![("LowCardinality".to_string(), serde_json::json!(true))], + comment: None, + ttl: None, + codec: None, + }], + }; + + let nested_without_low_card = Nested { + name: "test_nested".to_string(), + jwt: false, + columns: vec![Column { + name: "field1".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + codec: None, + }], + }; + + // When ignore_low_cardinality=false, they should be different + assert!(!nested_are_equivalent( + &nested_with_low_card, + &nested_without_low_card, + false + )); + + // When ignore_low_cardinality=true, they should be equivalent (this was the bug) + assert!(nested_are_equivalent( + &nested_with_low_card, + &nested_without_low_card, + true + )); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index de95ab678..b6ad5512d 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -240,6 +240,7 @@ pub enum IgnorableOperation { ModifyTableTtl, ModifyColumnTtl, ModifyPartitionBy, + IgnoreStringLowCardinalityDifferences, } impl IgnorableOperation { @@ -289,6 +290,15 @@ pub fn normalize_table_for_diff(table: &Table, ignore_ops: &[IgnorableOperation] } } + // Strip LowCardinality annotations if ignored + if ignore_ops.contains(&IgnorableOperation::IgnoreStringLowCardinalityDifferences) { + for column in &mut normalized.columns { + column + .annotations + .retain(|(key, _)| key != "LowCardinality"); + } + } + normalized } @@ -3651,6 +3661,113 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra ); } + #[test] + fn test_normalize_table_for_diff_strips_low_cardinality_annotations() { + use crate::framework::core::infrastructure::table::{Column, ColumnType, OrderBy, Table}; + use crate::framework::core::infrastructure_map::{PrimitiveSignature, PrimitiveTypes}; + use crate::framework::core::partial_infrastructure_map::LifeCycle; + + let table = Table { + name: "test_table".to_string(), + columns: vec![ + Column { + name: "id".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + annotations: vec![("LowCardinality".to_string(), serde_json::json!(true))], + comment: None, + ttl: None, + codec: None, + }, + Column { + name: "name".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![ + ("LowCardinality".to_string(), serde_json::json!(true)), + ("other".to_string(), serde_json::json!("value")), + ], + comment: None, + ttl: None, + codec: None, + }, + Column { + name: "regular_column".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![("other".to_string(), serde_json::json!("value"))], + comment: None, + ttl: None, + codec: None, + }, + ], + order_by: OrderBy::Fields(vec!["id".to_string()]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + version: None, + source_primitive: PrimitiveSignature { + name: "Test".to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::default_for_deserialization(), + engine_params_hash: None, + table_settings: None, + indexes: vec![], + database: None, + cluster_name: None, + table_ttl_setting: None, + primary_key_expression: None, + }; + + let ignore_ops = vec![IgnorableOperation::IgnoreStringLowCardinalityDifferences]; + let normalized = super::normalize_table_for_diff(&table, &ignore_ops); + + // Check that LowCardinality annotations were stripped + assert_eq!( + normalized.columns[0].annotations.len(), + 0, + "Column 'id' should have no annotations after LowCardinality stripping" + ); + + assert_eq!( + normalized.columns[1].annotations.len(), + 1, + "Column 'name' should have only non-LowCardinality annotations" + ); + assert_eq!( + normalized.columns[1].annotations[0].0, "other", + "Only the 'other' annotation should remain for 'name' column" + ); + + assert_eq!( + normalized.columns[2].annotations.len(), + 1, + "Regular column should keep its non-LowCardinality annotations" + ); + assert_eq!( + normalized.columns[2].annotations[0].0, "other", + "Regular column should still have its 'other' annotation" + ); + + // Check that other fields remain unchanged + assert_eq!(normalized.name, table.name); + assert_eq!(normalized.columns[0].name, "id"); + assert_eq!(normalized.columns[1].name, "name"); + assert_eq!(normalized.columns[2].name, "regular_column"); + assert_eq!(normalized.order_by, table.order_by); + } + #[test] fn test_reconstruct_sql_resource_from_mv_with_standard_sql() { let create_query =