From 5655fc64352c9925a726b5e1322b3f3acff75dab Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Mon, 1 Dec 2025 10:56:30 -0700 Subject: [PATCH 1/7] first pass --- .../test/utils/database-utils.ts | 27 +++++++ .../test/utils/schema-definitions.ts | 50 +++++++++++++ apps/framework-cli/src/cli/local_webserver.rs | 1 + .../framework-cli/src/cli/routines/migrate.rs | 5 ++ .../framework/core/infra_reality_checker.rs | 3 + .../framework/core/infrastructure/table.rs | 9 +++ .../src/framework/core/infrastructure_map.rs | 69 +++++++++++++++++ apps/framework-cli/src/framework/core/plan.rs | 3 + .../src/framework/core/plan_validator.rs | 2 + .../src/framework/python/generate.rs | 58 ++++++++++++-- .../src/framework/python/utils.rs | 1 + .../src/framework/streaming/generate.rs | 1 + .../src/framework/typescript/generate.rs | 64 ++++++++++++---- .../olap/clickhouse/diff_strategy.rs | 4 + .../infrastructure/olap/clickhouse/mapper.rs | 27 +++++++ .../src/infrastructure/olap/clickhouse/mod.rs | 75 ++++++++++++++++--- .../infrastructure/olap/clickhouse/model.rs | 1 + .../infrastructure/olap/clickhouse/queries.rs | 45 ++++++++++- .../olap/clickhouse/type_parser.rs | 1 + .../src/infrastructure/olap/ddl_ordering.rs | 9 +++ .../processes/kafka_clickhouse_sync.rs | 10 +++ .../src/utilities/validate_passthrough.rs | 35 +++++++++ packages/protobuf/infrastructure_map.proto | 2 + packages/py-moose-lib/moose_lib/__init__.py | 1 + .../py-moose-lib/moose_lib/data_models.py | 49 ++++++++++++ .../ts-moose-lib/src/browserCompatible.ts | 1 + .../src/dataModels/dataModelTypes.ts | 1 + .../src/dataModels/typeConvert.ts | 34 ++++++++- packages/ts-moose-lib/src/dataModels/types.ts | 26 +++++++ packages/ts-moose-lib/src/dmv2/internal.ts | 5 ++ .../tests/cluster-validation.test.ts | 1 + .../tests/olap-table-versioning.test.ts | 1 + templates/python-tests/src/ingest/models.py | 30 ++++++++ .../typescript-tests/src/ingest/models.ts | 25 +++++++ 34 files changed, 642 insertions(+), 34 deletions(-) diff --git a/apps/framework-cli-e2e/test/utils/database-utils.ts b/apps/framework-cli-e2e/test/utils/database-utils.ts index 60ae5a38c..1d7b8d6c5 100644 --- a/apps/framework-cli-e2e/test/utils/database-utils.ts +++ b/apps/framework-cli-e2e/test/utils/database-utils.ts @@ -256,6 +256,7 @@ export interface ExpectedColumn { nullable?: boolean; comment?: string; codec?: string | RegExp; + materialized?: string | RegExp; } /** @@ -454,6 +455,32 @@ export const validateTableSchema = async ( ); } } + + // Materialized validation (if specified) + if (expectedCol.materialized !== undefined) { + const actualMaterialized = actualCol.default_expression; + const actualDefaultType = actualCol.default_type; + let materializedMatches = false; + + // Check that it's actually a MATERIALIZED column + if (actualDefaultType === "MATERIALIZED") { + if (typeof expectedCol.materialized === "string") { + // Exact string match + materializedMatches = + actualMaterialized === expectedCol.materialized; + } else if (expectedCol.materialized instanceof RegExp) { + // Regex match for complex expressions + materializedMatches = + expectedCol.materialized.test(actualMaterialized); + } + } + + if (!materializedMatches) { + errors.push( + `Column '${expectedCol.name}' materialized mismatch: expected '${expectedCol.materialized}', got '${actualDefaultType === "MATERIALIZED" ? actualMaterialized : "(not materialized)"}'`, + ); + } + } } // Check for unexpected columns (optional - could be made configurable) diff --git a/apps/framework-cli-e2e/test/utils/schema-definitions.ts b/apps/framework-cli-e2e/test/utils/schema-definitions.ts index 3e3ba864c..bc90f049a 100644 --- a/apps/framework-cli-e2e/test/utils/schema-definitions.ts +++ b/apps/framework-cli-e2e/test/utils/schema-definitions.ts @@ -455,6 +455,29 @@ export const TYPESCRIPT_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "status_code", type: "Float64" }, ], }, + // Materialized column test table + { + tableName: "MaterializedTest", + columns: [ + { name: "id", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + { name: "userId", type: "String" }, + { + name: "eventDate", + type: /Date(32)?/, + materialized: "toDate(timestamp)", + }, + { name: "userHash", type: "UInt64", materialized: "cityHash64(userId)" }, + { name: "log_blob", type: "JSON", codec: "ZSTD(3)" }, + { + name: "combinationHash", + type: "Array(UInt64)", + materialized: + "arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))", + codec: "ZSTD(1)", + }, + ], + }, ]; // ============ PYTHON TEMPLATE SCHEMA DEFINITIONS ============ @@ -873,6 +896,33 @@ export const PYTHON_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "status_code", type: "Float64" }, ], }, + // Materialized column test table + { + tableName: "MaterializedTest", + columns: [ + { name: "id", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + { name: "user_id", type: "String" }, + { + name: "event_date", + type: /Date(32)?/, + materialized: "toDate(timestamp)", + }, + { + name: "user_hash", + type: "UInt64", + materialized: "cityHash64(user_id)", + }, + { name: "log_blob", type: "JSON", codec: "ZSTD(3)" }, + { + name: "combination_hash", + type: "Array(UInt64)", + materialized: + "arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))", + codec: "ZSTD(1)", + }, + ], + }, ]; // ============ HELPER FUNCTIONS ============ diff --git a/apps/framework-cli/src/cli/local_webserver.rs b/apps/framework-cli/src/cli/local_webserver.rs index 558878c39..0f769bd48 100644 --- a/apps/framework-cli/src/cli/local_webserver.rs +++ b/apps/framework-cli/src/cli/local_webserver.rs @@ -3547,6 +3547,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index e9583c626..8b819cae2 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -764,6 +764,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -800,6 +801,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }); table } @@ -1144,6 +1146,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, after_column: None, database: Some("bad_db".to_string()), @@ -1162,6 +1165,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, after_column: Column { name: "col".to_string(), @@ -1174,6 +1178,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, database: Some("another_bad_db".to_string()), cluster_name: None, diff --git a/apps/framework-cli/src/framework/core/infra_reality_checker.rs b/apps/framework-cli/src/framework/core/infra_reality_checker.rs index 75e6f6c6e..5a172857c 100644 --- a/apps/framework-cli/src/framework/core/infra_reality_checker.rs +++ b/apps/framework-cli/src/framework/core/infra_reality_checker.rs @@ -516,6 +516,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -613,6 +614,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }); let mock_client = MockOlapClient { @@ -683,6 +685,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }; actual_table.columns.push(timestamp_col.clone()); infra_table.columns.push(timestamp_col); diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index fe022faec..05cc9b345 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -677,6 +677,8 @@ pub struct Column { pub ttl: Option, #[serde(skip_serializing_if = "Option::is_none", default)] pub codec: Option, // Compression codec expression (e.g., "ZSTD(3)", "Delta, LZ4") + #[serde(skip_serializing_if = "Option::is_none", default)] + pub materialized: Option, // MATERIALIZED column expression } #[derive(Debug, Clone, Eq, PartialEq, Hash)] @@ -1192,6 +1194,7 @@ impl Column { comment: self.comment.clone(), ttl: self.ttl.clone(), codec: self.codec.clone(), + materialized: self.materialized.clone(), special_fields: Default::default(), } } @@ -1215,6 +1218,7 @@ impl Column { comment: proto.comment, ttl: proto.ttl, codec: proto.codec, + materialized: proto.materialized, } } } @@ -1595,6 +1599,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }; let json = serde_json::to_string(&nested_column).unwrap(); @@ -1616,6 +1621,7 @@ mod tests { comment: Some("[MOOSE_METADATA:DO_NOT_MODIFY] {\"version\":1,\"enum\":{\"name\":\"TestEnum\",\"members\":[]}}".to_string()), ttl: None, codec: None, + materialized: None, }; // Convert to proto and back @@ -1640,6 +1646,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }; let proto = column_without_comment.to_proto(); @@ -1825,6 +1832,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "name".to_string(), @@ -1837,6 +1845,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 6d6cae619..c922d7620 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -3033,6 +3033,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "name".to_string(), @@ -3045,6 +3046,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "to_be_removed".to_string(), @@ -3057,6 +3059,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3093,6 +3096,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "name".to_string(), @@ -3105,6 +3109,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "age".to_string(), // New column @@ -3117,6 +3122,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string(), "name".to_string()]), // Changed order_by @@ -3167,6 +3173,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "to_remove".to_string(), @@ -3179,6 +3186,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -3196,6 +3204,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "new_column".to_string(), @@ -3208,6 +3217,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -3345,6 +3355,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3377,6 +3388,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3406,6 +3418,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); after.columns.push(Column { @@ -3419,6 +3432,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3454,6 +3468,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "to_remove".to_string(), @@ -3466,6 +3481,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "to_modify".to_string(), @@ -3478,6 +3494,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]); @@ -3494,6 +3511,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "to_modify".to_string(), // modified @@ -3506,6 +3524,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "new_column".to_string(), // added @@ -3518,6 +3537,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]); @@ -3663,6 +3683,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); after.columns.push(Column { @@ -3676,6 +3697,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3710,6 +3732,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "name".to_string(), @@ -3722,6 +3745,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]); @@ -3738,6 +3762,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "id".to_string(), @@ -3750,6 +3775,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]); @@ -3778,6 +3804,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; before.columns.push(col.clone()); after.columns.push(col); @@ -3820,6 +3847,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); // Change every other column type in the after table @@ -3854,6 +3882,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); } @@ -3885,6 +3914,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); after.columns.push(Column { @@ -3901,6 +3931,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -3942,6 +3973,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); after.columns.push(Column { @@ -3955,6 +3987,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); // Test special characters in column name @@ -3969,6 +4002,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); after.columns.push(Column { @@ -3982,6 +4016,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }); let diff = compute_table_columns_diff(&before, &after); @@ -4007,6 +4042,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let col2 = col1.clone(); assert!(columns_are_equivalent(&col1, &col2)); @@ -4045,6 +4081,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let clickhouse_enum_col = Column { @@ -4070,6 +4107,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // These should be equivalent due to the enum semantic comparison @@ -4096,6 +4134,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; assert!(!columns_are_equivalent( @@ -4115,6 +4154,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let int_col2 = Column { @@ -4128,6 +4168,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; assert!(!columns_are_equivalent(&int_col1, &int_col2)); @@ -4160,6 +4201,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let json_col2 = Column { @@ -4183,6 +4225,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // These should be equivalent - order of typed_paths doesn't matter @@ -4209,6 +4252,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; assert!(!columns_are_equivalent(&json_col1, &json_col3)); @@ -4235,6 +4279,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; assert!(!columns_are_equivalent(&json_col1, &json_col4)); @@ -4278,6 +4323,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let nested_json_col2 = Column { @@ -4312,6 +4358,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // These should be equivalent - order doesn't matter at any level @@ -4344,6 +4391,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "priority".to_string(), @@ -4356,6 +4404,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -4368,6 +4417,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let col_with_user_name = Column { @@ -4389,6 +4439,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "priority".to_string(), @@ -4401,6 +4452,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -4413,6 +4465,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // These should be equivalent - name difference doesn't matter if structure matches @@ -4440,6 +4493,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }], // Missing priority column jwt: false, }), @@ -4451,6 +4505,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; assert!(!columns_are_equivalent( @@ -4488,6 +4543,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "notifications".to_string(), @@ -4500,6 +4556,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -4512,6 +4569,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }], jwt: false, }), @@ -4523,6 +4581,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }], jwt: false, }), @@ -4534,6 +4593,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; let col_user = Column { @@ -4560,6 +4620,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "notifications".to_string(), @@ -4572,6 +4633,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -4584,6 +4646,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }], jwt: false, }), @@ -4595,6 +4658,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }], jwt: false, }), @@ -4606,6 +4670,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // These should be equivalent - name differences at all levels don't matter @@ -4627,6 +4692,7 @@ mod diff_tests { comment: None, ttl: None, codec: None, + materialized: None, }; // Test 1: Columns with same codec should be equivalent @@ -5063,6 +5129,7 @@ mod diff_topic_tests { comment: None, ttl: None, codec: None, + materialized: None, }], metadata: None, life_cycle: LifeCycle::FullyManaged, @@ -5354,6 +5421,7 @@ mod diff_topic_to_table_sync_process_tests { comment: None, ttl: None, codec: None, + materialized: None, }], version: Some(version.clone()), source_primitive: PrimitiveSignature { @@ -5478,6 +5546,7 @@ mod diff_topic_to_table_sync_process_tests { comment: None, ttl: None, codec: None, + materialized: None, }]; assert_eq!( diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index 04224f90e..d21e1e422 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -510,6 +510,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -716,6 +717,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }); // Create test project first to get the database name @@ -1094,6 +1096,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }); // Create mock OLAP client with the reality table diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index a8a7ba777..8cf1bd3c9 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -150,6 +150,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -327,6 +328,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/framework/python/generate.rs b/apps/framework-cli/src/framework/python/generate.rs index e4362b697..8a35ea64b 100644 --- a/apps/framework-cli/src/framework/python/generate.rs +++ b/apps/framework-cli/src/framework/python/generate.rs @@ -557,7 +557,7 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri .unwrap(); writeln!( output, - "from moose_lib import clickhouse_default, ClickHouseCodec, LifeCycle, ClickHouseTTL" + "from moose_lib import clickhouse_default, ClickHouseCodec, ClickHouseMaterialized, LifeCycle, ClickHouseTTL" ) .unwrap(); writeln!( @@ -669,11 +669,27 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri if let Some(ref codec_expr) = column.codec { type_str = format!("Annotated[{}, ClickHouseCodec({:?})]", type_str, codec_expr); } - if let Some(ref default_expr) = column.default { - type_str = format!( - "Annotated[{}, clickhouse_default({:?})]", - type_str, default_expr - ); + // Handle DEFAULT and MATERIALIZED (mutually exclusive) + match (&column.default, &column.materialized) { + (Some(default_expr), None) => { + type_str = format!( + "Annotated[{}, clickhouse_default({:?})]", + type_str, default_expr + ); + } + (None, Some(materialized_expr)) => { + type_str = format!( + "Annotated[{}, ClickHouseMaterialized({:?})]", + type_str, materialized_expr + ); + } + (None, None) => { + // No default or materialized, do nothing + } + (Some(_), Some(_)) => { + // This should never happen due to validation + panic!("Column '{}' has both DEFAULT and MATERIALIZED - this should be caught by validation", column.name) + } } let type_str = if can_use_key_wrapping && column.primary_key { @@ -1043,6 +1059,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "timestamp".to_string(), @@ -1055,6 +1072,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "optional_text".to_string(), @@ -1067,6 +1085,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["primary_key".to_string()]), @@ -1101,7 +1120,7 @@ from enum import IntEnum, Enum from moose_lib import Key, IngestPipeline, IngestPipelineConfig, OlapTable, OlapConfig, clickhouse_datetime64, clickhouse_decimal, ClickhouseSize, StringToEnumMixin from moose_lib.data_models import ClickHouseJson from moose_lib import Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon, FixedString -from moose_lib import clickhouse_default, ClickHouseCodec, LifeCycle, ClickHouseTTL +from moose_lib import clickhouse_default, ClickHouseCodec, ClickHouseMaterialized, LifeCycle, ClickHouseTTL from moose_lib.blocks import MergeTreeEngine, ReplacingMergeTreeEngine, AggregatingMergeTreeEngine, SummingMergeTreeEngine, S3QueueEngine, ReplicatedMergeTreeEngine, ReplicatedReplacingMergeTreeEngine, ReplicatedAggregatingMergeTreeEngine, ReplicatedSummingMergeTreeEngine class Foo(BaseModel): @@ -1132,6 +1151,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "numbers".to_string(), @@ -1147,6 +1167,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "nested_numbers".to_string(), @@ -1165,6 +1186,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1221,6 +1243,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "city".to_string(), @@ -1233,6 +1256,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "zipCode".to_string(), @@ -1245,6 +1269,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -1264,6 +1289,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "address".to_string(), @@ -1276,6 +1302,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "addresses".to_string(), @@ -1291,6 +1318,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1350,6 +1378,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "data".to_string(), @@ -1362,6 +1391,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1421,6 +1451,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -1479,6 +1510,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "version".to_string(), @@ -1491,6 +1523,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "is_deleted".to_string(), @@ -1503,6 +1536,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1552,6 +1586,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "coordinates".to_string(), @@ -1567,6 +1602,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "metadata".to_string(), @@ -1582,6 +1618,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1645,6 +1682,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "timestamp".to_string(), @@ -1657,6 +1695,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "email".to_string(), @@ -1669,6 +1708,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: Some("timestamp + INTERVAL 30 DAY".to_string()), codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string(), "timestamp".to_string()]), @@ -1717,6 +1757,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -1784,6 +1825,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "payload".to_string(), @@ -1805,6 +1847,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1861,6 +1904,7 @@ user_table = OlapTable[User]("User", OlapConfig( comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/framework/python/utils.rs b/apps/framework-cli/src/framework/python/utils.rs index cf9a87364..c8a3be936 100644 --- a/apps/framework-cli/src/framework/python/utils.rs +++ b/apps/framework-cli/src/framework/python/utils.rs @@ -54,6 +54,7 @@ impl ColumnBuilder { comment: None, ttl: None, codec: None, + materialized: None, }) } } diff --git a/apps/framework-cli/src/framework/streaming/generate.rs b/apps/framework-cli/src/framework/streaming/generate.rs index f96692dc5..4da15a4e0 100644 --- a/apps/framework-cli/src/framework/streaming/generate.rs +++ b/apps/framework-cli/src/framework/streaming/generate.rs @@ -515,6 +515,7 @@ my_function = StreamingFunction( comment: None, ttl: None, codec: None, + materialized: None, }) .collect() } diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index b5628cd29..e6d16aa78 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -341,6 +341,7 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> "LifeCycle", "ClickHouseTTL", "ClickHouseCodec", + "ClickHouseMaterialized", ]; if uses_simple_aggregate { @@ -578,24 +579,36 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> } } - // Append ClickHouseTTL type tag if present on the column - if let Some(expr) = &column.ttl { - type_str = format!("{type_str} & ClickHouseTTL<\"{}\">", expr); - } - // Wrap with Codec if present - let type_str = match column.codec.as_ref() { - None => type_str, - Some(ref codec) => format!("{type_str} & ClickHouseCodec<{codec:?}>"), - }; - let type_str = match column.default { - None => type_str, - Some(ref default) if type_str == "Date" => { + // Handle DEFAULT and MATERIALIZED (mutually exclusive) + let type_str = match (&column.default, &column.materialized) { + (Some(default), None) if type_str == "Date" => { // https://github.com/samchon/typia/issues/1658 format!("WithDefault<{type_str}, {:?}>", default) } - Some(ref default) => { + (Some(default), None) => { format!("{type_str} & ClickHouseDefault<{:?}>", default) } + (None, Some(materialized)) => { + format!("{type_str} & ClickHouseMaterialized<{:?}>", materialized) + } + (None, None) => type_str, + (Some(_), Some(_)) => { + // This should never happen due to validation, but handle it gracefully + panic!("Column '{}' has both DEFAULT and MATERIALIZED - this should be caught by validation", column.name) + } + }; + + // Append ClickHouseTTL type tag if present on the column + let type_str = if let Some(expr) = &column.ttl { + format!("{type_str} & ClickHouseTTL<\"{}\">", expr) + } else { + type_str + }; + + // Wrap with Codec if present + let type_str = match column.codec.as_ref() { + None => type_str, + Some(ref codec) => format!("{type_str} & ClickHouseCodec<{codec:?}>"), }; let type_str = if can_use_key_wrapping && column.primary_key { format!("Key<{type_str}>") @@ -941,6 +954,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "city".to_string(), @@ -953,6 +967,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "zip_code".to_string(), @@ -965,6 +980,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], jwt: false, @@ -984,6 +1000,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "address".to_string(), @@ -996,6 +1013,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "addresses".to_string(), @@ -1011,6 +1029,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1073,6 +1092,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "data".to_string(), @@ -1085,6 +1105,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1143,6 +1164,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -1196,6 +1218,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "version".to_string(), @@ -1208,6 +1231,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "is_deleted".to_string(), @@ -1220,6 +1244,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1268,6 +1293,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }], sample_by: None, order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1322,6 +1348,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "version".to_string(), @@ -1334,6 +1361,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "is_deleted".to_string(), @@ -1346,6 +1374,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, ], sample_by: None, @@ -1402,6 +1431,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["u64".to_string()]), partition_by: None, @@ -1478,6 +1508,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "status".to_string(), @@ -1490,6 +1521,7 @@ export const UserTable = new OlapTable("User", { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1548,6 +1580,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "timestamp".to_string(), @@ -1560,6 +1593,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "email".to_string(), @@ -1572,6 +1606,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: Some("timestamp + INTERVAL 30 DAY".to_string()), codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string(), "timestamp".to_string()]), @@ -1622,6 +1657,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "payload".to_string(), @@ -1643,6 +1679,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -1693,6 +1730,7 @@ export const TaskTable = new OlapTable("Task", { comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index a308ce62c..2479997a1 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -668,6 +668,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "timestamp".to_string(), @@ -680,6 +681,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(order_by), @@ -807,6 +809,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, position_after: Some("timestamp".to_string()), }]; @@ -865,6 +868,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, position_after: Some("timestamp".to_string()), }]; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs index f52e4763b..a588a2fef 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs @@ -54,6 +54,27 @@ fn generate_column_comment(column: &Column) -> Result, Clickhouse pub fn std_column_to_clickhouse_column( column: Column, ) -> Result { + // Validate mutual exclusivity of DEFAULT and MATERIALIZED + if column.default.is_some() && column.materialized.is_some() { + return Err(ClickhouseError::InvalidParameters { + message: format!( + "Column '{}' cannot have both DEFAULT and MATERIALIZED. Use one or the other.", + column.name + ), + }); + } + + // Validate that MATERIALIZED columns are not primary keys + if column.materialized.is_some() && column.primary_key { + return Err(ClickhouseError::InvalidParameters { + message: format!( + "Column '{}' cannot be both MATERIALIZED and a primary key. \ + MATERIALIZED columns are computed and cannot be used as primary keys.", + column.name + ), + }); + } + let comment = generate_column_comment(&column)?; let mut column_type = @@ -84,6 +105,7 @@ pub fn std_column_to_clickhouse_column( comment, ttl: column.ttl.clone(), codec: column.codec.clone(), + materialized: column.materialized.clone(), }; Ok(clickhouse_column) @@ -429,6 +451,7 @@ mod tests { comment: Some("This is a user comment about the record type".to_string()), ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column_with_user_comment).unwrap(); @@ -454,6 +477,7 @@ mod tests { comment: Some(format!("Old user comment {}", old_metadata)), ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column_with_both).unwrap(); @@ -481,6 +505,7 @@ mod tests { comment: Some(old_metadata), ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column_metadata_only).unwrap(); @@ -524,6 +549,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "status".to_string(), @@ -536,6 +562,7 @@ mod tests { comment: Some("User status field".to_string()), // User comment ttl: None, codec: None, + materialized: None, }, ], jwt: false, diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index de95ab678..4c49b0843 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -823,6 +823,13 @@ async fn execute_add_table_column( .map(|d| format!(" DEFAULT {}", d)) .unwrap_or_default(); + // Include MATERIALIZED clause if column has a materialized expression + let materialized_clause = clickhouse_column + .materialized + .as_ref() + .map(|m| format!(" MATERIALIZED {}", m)) + .unwrap_or_default(); + let codec_clause = clickhouse_column .codec .as_ref() @@ -841,13 +848,14 @@ async fn execute_add_table_column( }; let add_column_query = format!( - "ALTER TABLE `{}`.`{}`{} ADD COLUMN `{}` {}{}{}{} {}", + "ALTER TABLE `{}`.`{}`{} ADD COLUMN `{}` {}{}{}{}{} {}", db_name, table_name, cluster_clause, clickhouse_column.name, column_type_string, default_clause, + materialized_clause, codec_clause, ttl_clause, position_clause @@ -908,6 +916,7 @@ async fn execute_modify_table_column( // Check if only the comment has changed let data_type_changed = before_column.data_type != after_column.data_type; let default_changed = before_column.default != after_column.default; + let materialized_changed = before_column.materialized != after_column.materialized; let required_changed = before_column.required != after_column.required; let comment_changed = before_column.comment != after_column.comment; let ttl_changed = before_column.ttl != after_column.ttl; @@ -918,6 +927,7 @@ async fn execute_modify_table_column( if !data_type_changed && !required_changed && !default_changed + && !materialized_changed && !ttl_changed && !codec_changed && comment_changed @@ -958,7 +968,7 @@ async fn execute_modify_table_column( tracing::info!( "Executing ModifyTableColumn for table: {}, column: {} ({}→{})\ -data_type_changed: {data_type_changed}, default_changed: {default_changed}, required_changed: {required_changed}, comment_changed: {comment_changed}, ttl_changed: {ttl_changed}, codec_changed: {codec_changed}", +data_type_changed: {data_type_changed}, default_changed: {default_changed}, materialized_changed: {materialized_changed}, required_changed: {required_changed}, comment_changed: {comment_changed}, ttl_changed: {ttl_changed}, codec_changed: {codec_changed}", table_name, after_column.name, before_column.data_type, @@ -970,6 +980,8 @@ data_type_changed: {data_type_changed}, default_changed: {default_changed}, requ // Build all the SQL statements needed (main modify + optional removes) let removing_default = before_column.default.is_some() && after_column.default.is_none(); + let removing_materialized = + before_column.materialized.is_some() && after_column.materialized.is_none(); let removing_ttl = before_column.ttl.is_some() && after_column.ttl.is_none(); let removing_codec = before_column.codec.is_some() && after_column.codec.is_none(); let queries = build_modify_column_sql( @@ -977,6 +989,7 @@ data_type_changed: {data_type_changed}, default_changed: {default_changed}, requ table_name, &clickhouse_column, removing_default, + removing_materialized, removing_ttl, removing_codec, cluster_name, @@ -1027,11 +1040,13 @@ async fn execute_modify_column_comment( Ok(()) } +#[allow(clippy::too_many_arguments)] fn build_modify_column_sql( db_name: &str, table_name: &str, ch_col: &ClickHouseColumn, removing_default: bool, + removing_materialized: bool, removing_ttl: bool, removing_codec: bool, cluster_name: Option<&str>, @@ -1053,6 +1068,14 @@ fn build_modify_column_sql( )); } + // Add REMOVE MATERIALIZED statement if needed + if removing_materialized { + statements.push(format!( + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` REMOVE MATERIALIZED", + db_name, table_name, cluster_clause, ch_col.name + )); + } + // Add REMOVE TTL statement if needed if removing_ttl { statements.push(format!( @@ -1082,6 +1105,13 @@ fn build_modify_column_sql( .map(|d| format!(" DEFAULT {}", d)) .unwrap_or_default(); + // MATERIALIZED clause: If omitted, ClickHouse KEEPS any existing MATERIALIZED + let materialized_clause = ch_col + .materialized + .as_ref() + .map(|m| format!(" MATERIALIZED {}", m)) + .unwrap_or_default(); + // TTL clause: If omitted, ClickHouse KEEPS any existing TTL // Therefore, TTL removal requires a separate REMOVE TTL statement let ttl_clause = ch_col @@ -1102,26 +1132,28 @@ fn build_modify_column_sql( let main_sql = if let Some(ref comment) = ch_col.comment { let escaped_comment = comment.replace('\'', "''"); format!( - "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}{} COMMENT '{}'", + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}{}{} COMMENT '{}'", db_name, table_name, cluster_clause, ch_col.name, column_type_string, default_clause, + materialized_clause, codec_clause, ttl_clause, escaped_comment ) } else { format!( - "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}{}", + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}{}{}", db_name, table_name, cluster_clause, ch_col.name, column_type_string, default_clause, + materialized_clause, codec_clause, ttl_clause ) @@ -1809,16 +1841,17 @@ impl OlapOperations for ConfiguredDBClient { None }; - let default = match default_kind.deref() { - "" => None, - "DEFAULT" => Some(default_expression), - "MATERIALIZED" | "ALIAS" => { - debug!("MATERIALIZED and ALIAS not yet handled."); - None + let (default, materialized) = match default_kind.deref() { + "" => (None, None), + "DEFAULT" => (Some(default_expression.clone()), None), + "MATERIALIZED" => (None, Some(default_expression.clone())), + "ALIAS" => { + debug!("ALIAS columns not yet handled."); + (None, None) } _ => { debug!("Unknown default kind: {default_kind} for column {col_name}"); - None + (None, None) } }; @@ -1875,6 +1908,7 @@ impl OlapOperations for ConfiguredDBClient { comment: column_comment, ttl: normalized_ttl, codec, + materialized, }; columns.push(column); @@ -2823,6 +2857,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("Old user comment".to_string()), ttl: None, codec: None, + materialized: None, }; let after_column = Column { @@ -2842,6 +2877,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("New user comment".to_string()), ttl: None, codec: None, + materialized: None, }; // The execute_modify_table_column function should detect this as comment-only change @@ -2868,6 +2904,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("Number of things".to_string()), ttl: None, codec: None, + materialized: None, }; let after_column = Column { default: Some("42".to_string()), @@ -2876,7 +2913,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra let ch_after = std_column_to_clickhouse_column(after_column).unwrap(); let sqls = - build_modify_column_sql("db", "table", &ch_after, false, false, false, None).unwrap(); + build_modify_column_sql("db", "table", &ch_after, false, false, false, false, None) + .unwrap(); assert_eq!(sqls.len(), 1); assert_eq!( @@ -2901,6 +2939,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("old".to_string()), ttl: None, codec: None, + materialized: None, }; let after_column = Column { @@ -2934,6 +2973,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("Updated description field".to_string()), ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); @@ -2945,6 +2985,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra false, false, false, + false, None, ) .unwrap(); @@ -2972,6 +3013,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("Hash of the ID".to_string()), ttl: None, codec: None, + materialized: None, }; let sqls = build_modify_column_sql( @@ -2981,6 +3023,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra false, false, false, + false, None, ) .unwrap(); @@ -3003,6 +3046,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: None, ttl: None, codec: None, + materialized: None, }; let sqls = build_modify_column_sql( @@ -3012,6 +3056,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra false, false, false, + false, None, ) .unwrap(); @@ -3034,6 +3079,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: None, ttl: None, codec: None, + materialized: None, }; let sqls = build_modify_column_sql( @@ -3043,6 +3089,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra false, false, false, + false, None, ) .unwrap(); @@ -3417,6 +3464,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: Some("Number of items".to_string()), ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); @@ -3479,6 +3527,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: None, ttl: None, codec: None, + materialized: None, }; let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); @@ -3544,6 +3593,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: None, ttl: Some("created_at + INTERVAL 7 DAY".to_string()), codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: Some("toYYYYMM(created_at)".to_string()), @@ -3612,6 +3662,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra comment: None, ttl: Some("created_at + INTERVAL 7 DAY".to_string()), codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: Some("toYYYYMM(created_at)".to_string()), diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs index 7f8a8c295..5646c599e 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs @@ -433,6 +433,7 @@ pub struct ClickHouseColumn { pub comment: Option, // Column comment for metadata storage pub ttl: Option, pub codec: Option, // Compression codec expression (e.g., "ZSTD(3)", "Delta, LZ4") + pub materialized: Option, // MATERIALIZED column expression } impl ClickHouseColumn { diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 6de287a77..048b5791d 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -124,7 +124,7 @@ static CREATE_TABLE_TEMPLATE: &str = r#" CREATE TABLE IF NOT EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} ( -{{#each fields}} `{{field_name}}` {{{field_type}}} {{field_nullable}}{{#if field_default}} DEFAULT {{{field_default}}}{{/if}}{{#if field_codec}} CODEC({{{field_codec}}}){{/if}}{{#if field_ttl}} TTL {{{field_ttl}}}{{/if}}{{#if field_comment}} COMMENT '{{{field_comment}}}'{{/if}}{{#unless @last}}, +{{#each fields}} `{{field_name}}` {{{field_type}}} {{field_nullable}}{{#if field_default}} DEFAULT {{{field_default}}}{{/if}}{{#if field_materialized}} MATERIALIZED {{{field_materialized}}}{{/if}}{{#if field_codec}} CODEC({{{field_codec}}}){{/if}}{{#if field_ttl}} TTL {{{field_ttl}}}{{/if}}{{#if field_comment}} COMMENT '{{{field_comment}}}'{{/if}}{{#unless @last}}, {{/unless}}{{/each}}{{#if has_indexes}}, {{#each indexes}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}} ) ENGINE = {{engine}}{{#if primary_key_string}} @@ -3084,6 +3084,7 @@ fn builds_field_context(columns: &[ClickHouseColumn]) -> Result, Clic let field_ttl = column.ttl.as_ref(); let field_codec = column.codec.as_ref(); + let field_materialized = column.materialized.as_ref(); // Default values from ClickHouse/Python are already properly formatted // - String literals come with quotes: 'active' @@ -3097,6 +3098,7 @@ fn builds_field_context(columns: &[ClickHouseColumn]) -> Result, Clic "field_type": field_type, "field_ttl": field_ttl, "field_codec": field_codec, + "field_materialized": field_materialized, "field_default": formatted_default, "field_nullable": if let ClickHouseColumnType::Nullable(_) = column.column_type { // if type is Nullable, do not add extra specifier @@ -3135,6 +3137,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_2".to_string(), @@ -3146,6 +3149,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_3".to_string(), @@ -3157,6 +3161,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_4".to_string(), @@ -3168,6 +3173,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_5".to_string(), @@ -3179,6 +3185,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_6".to_string(), @@ -3202,6 +3209,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_field_7".to_string(), @@ -3213,6 +3221,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]); @@ -3298,6 +3307,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "name".to_string(), @@ -3309,6 +3319,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec![]), @@ -3350,6 +3361,7 @@ PRIMARY KEY (`id`) comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -3389,6 +3401,7 @@ ENGINE = MergeTree comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -3430,6 +3443,7 @@ ENGINE = MergeTree comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "sample_hash".to_string(), @@ -3441,6 +3455,7 @@ ENGINE = MergeTree comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "created_at".to_string(), @@ -3452,6 +3467,7 @@ ENGINE = MergeTree comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec![]), @@ -3493,6 +3509,7 @@ ENGINE = MergeTree comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, @@ -3535,6 +3552,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }], engine: ClickhouseEngine::ReplacingMergeTree { ver: None, @@ -3573,6 +3591,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "version".to_string(), @@ -3584,6 +3603,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3629,6 +3649,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "version".to_string(), @@ -3640,6 +3661,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "is_deleted".to_string(), @@ -3651,6 +3673,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3696,6 +3719,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }], sample_by: None, order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3803,6 +3827,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "nested_data".to_string(), @@ -3817,6 +3842,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "field2".to_string(), @@ -3828,6 +3854,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ]), required: true, @@ -3837,6 +3864,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "status".to_string(), @@ -3860,6 +3888,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ], sample_by: None, @@ -3903,6 +3932,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "event_id".to_string(), @@ -3914,6 +3944,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "timestamp".to_string(), @@ -3925,6 +3956,7 @@ ORDER BY (`id`) "#; comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::SingleExpr("(user_id, cityHash64(event_id), timestamp)".to_string()), @@ -3968,6 +4000,7 @@ ORDER BY (user_id, cityHash64(event_id), timestamp)"#; comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec!["product_id".to_string()]), partition_by: None, @@ -4010,6 +4043,7 @@ ORDER BY (user_id, cityHash64(event_id), timestamp)"#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "data".to_string(), @@ -4021,6 +4055,7 @@ ORDER BY (user_id, cityHash64(event_id), timestamp)"#; comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec![]), @@ -4497,6 +4532,7 @@ SETTINGS keeper_path = '/clickhouse/s3queue/test_table', mode = 'unordered', s3q comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -5046,6 +5082,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -5095,6 +5132,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: None, + materialized: None, }], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -5194,6 +5232,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: None, + materialized: None, }; let cluster_clause = Some("test_cluster") @@ -6071,6 +6110,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: None, + materialized: None, }, ClickHouseColumn { name: "log_blob".to_string(), @@ -6082,6 +6122,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: Some("ZSTD(3)".to_string()), + materialized: None, }, ClickHouseColumn { name: "timestamp".to_string(), @@ -6093,6 +6134,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: Some("Delta, LZ4".to_string()), + materialized: None, }, ClickHouseColumn { name: "tags".to_string(), @@ -6104,6 +6146,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; comment: None, ttl: None, codec: Some("ZSTD(1)".to_string()), + materialized: None, }, ]; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs index 14a606179..cdb26dd80 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/type_parser.rs @@ -1679,6 +1679,7 @@ pub fn convert_ast_to_column_type( comment: None, ttl: None, codec: None, + materialized: None, }); } TupleElement::Unnamed(_) => { diff --git a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs index 235992aa7..48fbbf72f 100644 --- a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs +++ b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs @@ -1353,6 +1353,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, after_column: None, dependency_info: DependencyInfo { @@ -1681,6 +1682,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }; // Create operations with correct dependencies @@ -2751,6 +2753,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }; // Create operations with signatures that work with the current implementation @@ -2940,6 +2943,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "old_column".to_string(), @@ -2952,6 +2956,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -2988,6 +2993,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "new_column".to_string(), @@ -3000,6 +3006,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], order_by: OrderBy::Fields(vec!["id".to_string()]), @@ -3035,6 +3042,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }), ColumnChange::Added { column: Column { @@ -3048,6 +3056,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, position_after: Some("id".to_string()), }, diff --git a/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs b/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs index 0c4e4e652..6ecfd8122 100644 --- a/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs +++ b/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs @@ -1215,6 +1215,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "B".to_string(), @@ -1227,6 +1228,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "C".to_string(), @@ -1245,6 +1247,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "b".to_string(), @@ -1263,6 +1266,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "e".to_string(), @@ -1275,6 +1279,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "f".to_string(), @@ -1287,6 +1292,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], }), @@ -1298,6 +1304,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "c".to_string(), @@ -1310,6 +1317,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], }), @@ -1321,6 +1329,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "D".to_string(), @@ -1333,6 +1342,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ], }; diff --git a/apps/framework-cli/src/utilities/validate_passthrough.rs b/apps/framework-cli/src/utilities/validate_passthrough.rs index 087b8b404..9bdcf2b49 100644 --- a/apps/framework-cli/src/utilities/validate_passthrough.rs +++ b/apps/framework-cli/src/utilities/validate_passthrough.rs @@ -649,6 +649,7 @@ impl<'de, S: SerializeValue> Visitor<'de> for &mut ValueVisitor<'_, S> { comment: None, ttl: None, codec: None, + materialized: None, } }) .collect(); @@ -1318,6 +1319,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "int_col".to_string(), @@ -1330,6 +1332,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "float_col".to_string(), @@ -1342,6 +1345,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "bool_col".to_string(), @@ -1354,6 +1358,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "date_col".to_string(), @@ -1366,6 +1371,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1401,6 +1407,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let json = r#" @@ -1436,6 +1443,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let json = r#" @@ -1478,6 +1486,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Test valid enum value @@ -1528,6 +1537,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "nested_int".to_string(), @@ -1540,6 +1550,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1555,6 +1566,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "nested_object".to_string(), @@ -1571,6 +1583,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1630,6 +1643,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "optional_field".to_string(), @@ -1642,6 +1656,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1674,6 +1689,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "aud".to_string(), @@ -1686,6 +1702,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "exp".to_string(), @@ -1698,6 +1715,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1713,6 +1731,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, Column { name: "jwt_object".to_string(), @@ -1729,6 +1748,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }, ]; @@ -1775,6 +1795,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Test valid map @@ -1833,6 +1854,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Test valid map with numeric keys (as strings in JSON) @@ -1888,6 +1910,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Min boundary 0 @@ -1932,6 +1955,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Min boundary -32768 @@ -1976,6 +2000,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let positive_limit: BigInt = BigInt::from(1u8) << 127usize; @@ -2022,6 +2047,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let positive_limit: BigInt = BigInt::from(1u8) << 255usize; @@ -2068,6 +2094,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let limit: BigUint = BigUint::from(1u8) << 256usize; @@ -2115,6 +2142,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Valid keys @@ -2156,6 +2184,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let positive_limit: BigInt = BigInt::from(1u8) << 255usize; @@ -2197,6 +2226,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let limit: BigUint = BigUint::from(1u8) << 256usize; @@ -2242,6 +2272,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; let json = r#" @@ -2274,6 +2305,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // missing nested path @@ -2307,6 +2339,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // null at the nested path counts as missing for non-nullable types @@ -2355,6 +2388,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Test 1: Two's complement value (what -1 becomes with naive cast) should be rejected @@ -2425,6 +2459,7 @@ mod tests { comment: None, ttl: None, codec: None, + materialized: None, }]; // Test negative values work with i64 diff --git a/packages/protobuf/infrastructure_map.proto b/packages/protobuf/infrastructure_map.proto index ee606a91d..e53f3d990 100644 --- a/packages/protobuf/infrastructure_map.proto +++ b/packages/protobuf/infrastructure_map.proto @@ -247,6 +247,8 @@ message Column { optional string ttl = 10; // Compression codec expression (e.g., "ZSTD(3)", "Delta, LZ4") optional string codec = 11; + // Materialized expression + optional string materialized = 12; } enum SimpleColumnType { diff --git a/packages/py-moose-lib/moose_lib/__init__.py b/packages/py-moose-lib/moose_lib/__init__.py index d50c9c72a..66c9e5486 100644 --- a/packages/py-moose-lib/moose_lib/__init__.py +++ b/packages/py-moose-lib/moose_lib/__init__.py @@ -40,6 +40,7 @@ ClickhouseDefault, clickhouse_default, ClickHouseTTL, + ClickHouseMaterialized, ClickHouseCodec, # Integer types Int8, diff --git a/packages/py-moose-lib/moose_lib/data_models.py b/packages/py-moose-lib/moose_lib/data_models.py index dc0a3591b..5f5f6a3cd 100644 --- a/packages/py-moose-lib/moose_lib/data_models.py +++ b/packages/py-moose-lib/moose_lib/data_models.py @@ -77,6 +77,41 @@ class ClickHouseCodec: expression: str +@dataclasses.dataclass(frozen=True) +class ClickHouseMaterialized: + """ + ClickHouse MATERIALIZED column annotation. + The column value is computed at INSERT time and physically stored. + Cannot be explicitly inserted by users. + + Args: + expression: ClickHouse SQL expression using column names (snake_case) + + Examples: + # Extract date component + event_date: Annotated[date, ClickHouseMaterialized("toDate(event_time)")] + + # Precompute hash + user_hash: Annotated[int, ClickHouseMaterialized("cityHash64(user_id)")] + + # Complex expression with JSON + combination_hash: Annotated[ + list[int], + ClickHouseMaterialized( + "arrayMap(kv -> cityHash64(kv.1, kv.2), " + "JSONExtractKeysAndValuesRaw(toString(log_blob)))" + ) + ] + + Notes: + - Expression uses ClickHouse column names, not Python field names + - MATERIALIZED and DEFAULT are mutually exclusive + - Can be combined with ClickHouseCodec for compression + - Changing the expression modifies the column in-place (existing values preserved) + """ + expression: str + + @dataclasses.dataclass(frozen=True) class ClickHouseJson: max_dynamic_paths: int | None = None @@ -619,6 +654,19 @@ def _to_columns(model: type[BaseModel]) -> list[Column]: None, ) + # Extract MATERIALIZED expression from metadata, if provided + materialized_expr = next( + (md.expression for md in mds if isinstance(md, ClickHouseMaterialized)), + None, + ) + + # Validate mutual exclusivity of DEFAULT and MATERIALIZED + if default_expr and materialized_expr: + raise ValueError( + f"Column '{column_name}' cannot have both DEFAULT and MATERIALIZED. " + f"Use one or the other." + ) + # Extract TTL expression from metadata, if provided ttl_expr = next( (md.expression for md in mds if isinstance(md, ClickHouseTTL)), @@ -639,6 +687,7 @@ def _to_columns(model: type[BaseModel]) -> list[Column]: unique=False, primary_key=primary_key, default=default_expr, + materialized=materialized_expr, annotations=annotations, ttl=ttl_expr, codec=codec_expr, diff --git a/packages/ts-moose-lib/src/browserCompatible.ts b/packages/ts-moose-lib/src/browserCompatible.ts index a0d74719a..e3a19f8a6 100644 --- a/packages/ts-moose-lib/src/browserCompatible.ts +++ b/packages/ts-moose-lib/src/browserCompatible.ts @@ -61,6 +61,7 @@ export { ClickHouseNamedTuple, ClickHouseDefault, ClickHouseTTL, + ClickHouseMaterialized, WithDefault, ClickHouseCodec, // Added friendly aliases and numeric helpers diff --git a/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts b/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts index ba932bc77..30162c850 100644 --- a/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts +++ b/packages/ts-moose-lib/src/dataModels/dataModelTypes.ts @@ -32,6 +32,7 @@ export interface Column { unique: false; // what is this for? primary_key: boolean; default: string | null; + materialized: string | null; ttl: string | null; codec: string | null; annotations: [string, any][]; diff --git a/packages/ts-moose-lib/src/dataModels/typeConvert.ts b/packages/ts-moose-lib/src/dataModels/typeConvert.ts index 93b0c778d..5a6650c30 100644 --- a/packages/ts-moose-lib/src/dataModels/typeConvert.ts +++ b/packages/ts-moose-lib/src/dataModels/typeConvert.ts @@ -306,6 +306,27 @@ const handleDefault = (t: ts.Type, checker: TypeChecker): string | null => { return defaultType.value; }; +/** Detect ClickHouse materialized annotation on a type and return raw sql */ +const handleMaterialized = ( + t: ts.Type, + checker: TypeChecker, +): string | null => { + const materializedType = getTaggedType( + t, + checker, + "_clickhouse_materialized", + ); + if (materializedType === null) { + return null; + } + if (!materializedType.isStringLiteral()) { + throw new UnsupportedFeature( + 'ClickHouseMaterialized must use a string literal, e.g. ClickHouseMaterialized<"toDate(timestamp)">', + ); + } + return materializedType.value; +}; + /** Detect ClickHouse TTL annotation on a type and return raw sql */ const handleTtl = (t: ts.Type, checker: TypeChecker): string | null => { const ttlType = getTaggedType(t, checker, "_clickhouse_ttl"); @@ -970,13 +991,24 @@ export const toColumns = (t: ts.Type, checker: TypeChecker): Column[] => { node?.type, ); + const defaultValue = defaultExpression ?? handleDefault(type, checker); + const materializedValue = handleMaterialized(type, checker); + + // Validate mutual exclusivity of DEFAULT and MATERIALIZED + if (defaultValue && materializedValue) { + throw new UnsupportedFeature( + `Column '${prop.name}' cannot have both ClickHouseDefault and ClickHouseMaterialized. Use one or the other.`, + ); + } + return { name: prop.name, data_type: dataType, primary_key: isKey, required: !nullable, unique: false, - default: defaultExpression ?? handleDefault(type, checker), + default: defaultValue, + materialized: materializedValue, ttl: handleTtl(type, checker), codec: handleCodec(type, checker), annotations, diff --git a/packages/ts-moose-lib/src/dataModels/types.ts b/packages/ts-moose-lib/src/dataModels/types.ts index cca3b934b..1099dadc8 100644 --- a/packages/ts-moose-lib/src/dataModels/types.ts +++ b/packages/ts-moose-lib/src/dataModels/types.ts @@ -203,6 +203,32 @@ export type ClickHouseTTL = { _clickhouse_ttl?: SqlExpression; }; +/** + * ClickHouse MATERIALIZED column annotation. + * The column value is computed at INSERT time and physically stored. + * Cannot be explicitly inserted by users. + * + * @example + * interface Events { + * eventTime: Date; + * // Extract date component - computed and stored at insert time + * eventDate: Date & ClickHouseMaterialized<"toDate(event_time)">; + * + * userId: string; + * // Precompute hash for fast lookups + * userHash: number & ClickHouseInt<"uint64"> & ClickHouseMaterialized<"cityHash64(user_id)">; + * } + * + * @remarks + * - Expression uses ClickHouse column names (snake_case), not TypeScript field names + * - MATERIALIZED and DEFAULT are mutually exclusive + * - Can be combined with ClickHouseCodec for compression + * - Changing the expression modifies the column in-place (existing values preserved) + */ +export type ClickHouseMaterialized = { + _clickhouse_materialized?: SqlExpression; +}; + /** * See also {@link ClickHouseDefault} * diff --git a/packages/ts-moose-lib/src/dmv2/internal.ts b/packages/ts-moose-lib/src/dmv2/internal.ts index a795f1c54..818b7503c 100644 --- a/packages/ts-moose-lib/src/dmv2/internal.ts +++ b/packages/ts-moose-lib/src/dmv2/internal.ts @@ -1169,6 +1169,7 @@ export const dlqColumns: Column[] = [ annotations: [], ttl: null, codec: null, + materialized: null, }, { name: "errorMessage", @@ -1180,6 +1181,7 @@ export const dlqColumns: Column[] = [ annotations: [], ttl: null, codec: null, + materialized: null, }, { name: "errorType", @@ -1191,6 +1193,7 @@ export const dlqColumns: Column[] = [ annotations: [], ttl: null, codec: null, + materialized: null, }, { name: "failedAt", @@ -1202,6 +1205,7 @@ export const dlqColumns: Column[] = [ annotations: [], ttl: null, codec: null, + materialized: null, }, { name: "source", @@ -1213,6 +1217,7 @@ export const dlqColumns: Column[] = [ annotations: [], ttl: null, codec: null, + materialized: null, }, ]; diff --git a/packages/ts-moose-lib/tests/cluster-validation.test.ts b/packages/ts-moose-lib/tests/cluster-validation.test.ts index c7939c3ce..664d2178c 100644 --- a/packages/ts-moose-lib/tests/cluster-validation.test.ts +++ b/packages/ts-moose-lib/tests/cluster-validation.test.ts @@ -25,6 +25,7 @@ const createMockColumns = (fields: string[]): Column[] => default: null, ttl: null, codec: null, + materialized: null, annotations: [], })); diff --git a/packages/ts-moose-lib/tests/olap-table-versioning.test.ts b/packages/ts-moose-lib/tests/olap-table-versioning.test.ts index 9496bc212..7a9c6f350 100644 --- a/packages/ts-moose-lib/tests/olap-table-versioning.test.ts +++ b/packages/ts-moose-lib/tests/olap-table-versioning.test.ts @@ -28,6 +28,7 @@ const createMockColumns = (fields: string[]): Column[] => default: null, ttl: null, codec: null, + materialized: null, annotations: [], })); diff --git a/templates/python-tests/src/ingest/models.py b/templates/python-tests/src/ingest/models.py index 77290de52..3eb29680a 100644 --- a/templates/python-tests/src/ingest/models.py +++ b/templates/python-tests/src/ingest/models.py @@ -748,3 +748,33 @@ class CodecTest(BaseModel): table=True, dead_letter_queue=True )) + +# =======Materialized Columns Test======= +from moose_lib import ClickHouseMaterialized + +class MaterializedTest(BaseModel): + """Test model for materialized column support.""" + id: Key[str] + timestamp: datetime + user_id: str + event_date: Annotated[date, ClickHouseMaterialized("toDate(timestamp)")] + user_hash: Annotated[UInt64, ClickHouseMaterialized("cityHash64(user_id)")] + log_blob: Annotated[Any, ClickHouseCodec("ZSTD(3)")] + combination_hash: Annotated[ + list[UInt64], + ClickHouseMaterialized( + "arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))" + ), + ClickHouseCodec("ZSTD(1)"), + ] + + +materialized_test_model = IngestPipeline[MaterializedTest]( + "MaterializedTest", + IngestPipelineConfig( + ingest_api=True, + stream=True, + table=True, + dead_letter_queue=True + ) +) diff --git a/templates/typescript-tests/src/ingest/models.ts b/templates/typescript-tests/src/ingest/models.ts index 762a1cb73..9f910a811 100644 --- a/templates/typescript-tests/src/ingest/models.ts +++ b/templates/typescript-tests/src/ingest/models.ts @@ -12,6 +12,7 @@ import { ClickHousePoint, ClickHouseRing, ClickHouseLineString, + ClickHouseMaterialized, ClickHouseMultiLineString, ClickHousePolygon, ClickHouseMultiPolygon, @@ -748,3 +749,27 @@ export const CodecTestPipeline = new IngestPipeline("CodecTest", { stream: true, ingestApi: true, }); + +// =======Materialized Columns Test======= +export interface MaterializedTest { + id: Key; + timestamp: DateTime; + userId: string; + eventDate: string & + typia.tags.Format<"date"> & + ClickHouseMaterialized<"toDate(timestamp)">; + userHash: UInt64 & ClickHouseMaterialized<"cityHash64(userId)">; + log_blob: Record & ClickHouseCodec<"ZSTD(3)">; + combinationHash: UInt64[] & + ClickHouseMaterialized<"arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))"> & + ClickHouseCodec<"ZSTD(1)">; +} + +export const MaterializedTestPipeline = new IngestPipeline( + "MaterializedTest", + { + table: true, + stream: true, + ingestApi: true, + }, +); From 4111d24a35c12248a4acfe3dcd167455a5f22109 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Mon, 1 Dec 2025 11:14:32 -0700 Subject: [PATCH 2/7] add tests --- .../src/infrastructure/olap/clickhouse/mod.rs | 73 +++++++++ .../infrastructure/olap/clickhouse/queries.rs | 147 ++++++++++++++++++ .../py-moose-lib/moose_lib/data_models.py | 1 + .../py-moose-lib/tests/test_materialized.py | 70 +++++++++ .../ts-moose-lib/tests/typeConvert.test.ts | 28 ++++ 5 files changed, 319 insertions(+) create mode 100644 packages/py-moose-lib/tests/test_materialized.py diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 4c49b0843..c14f5aaa6 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -3843,4 +3843,77 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra } } } + + #[test] + fn test_modify_column_with_materialized() { + use crate::infrastructure::olap::clickhouse::model::ClickHouseColumn; + + // Test changing a MATERIALIZED expression + let ch_col = ClickHouseColumn { + name: "event_date".to_string(), + column_type: ClickHouseColumnType::Date, + required: true, + primary_key: false, + unique: false, + default: None, + materialized: Some("toStartOfMonth(event_time)".to_string()), + comment: None, + ttl: None, + codec: None, + }; + + let sqls = build_modify_column_sql( + "test_db", + "test_table", + &ch_col, + false, // removing_default + false, // removing_materialized + false, // removing_ttl + false, // removing_codec + None, + ) + .unwrap(); + + assert_eq!(sqls.len(), 1); + assert_eq!( + sqls[0], + "ALTER TABLE `test_db`.`test_table` MODIFY COLUMN IF EXISTS `event_date` Date MATERIALIZED toStartOfMonth(event_time)" + ); + } + + #[test] + fn test_remove_materialized_sql_generation() { + use crate::infrastructure::olap::clickhouse::model::ClickHouseColumn; + + let ch_col = ClickHouseColumn { + name: "user_hash".to_string(), + column_type: ClickHouseColumnType::ClickhouseInt(ClickHouseInt::UInt64), + required: true, + primary_key: false, + unique: false, + default: None, + materialized: None, + comment: None, + ttl: None, + codec: None, + }; + + let sqls = build_modify_column_sql( + "test_db", + "test_table", + &ch_col, + false, + true, // removing_materialized + false, + false, + None, + ) + .unwrap(); + + assert!(!sqls.is_empty()); + assert_eq!( + sqls[0], + "ALTER TABLE `test_db`.`test_table` MODIFY COLUMN `user_hash` REMOVE MATERIALIZED" + ); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 048b5791d..3403f0ab6 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -6180,4 +6180,151 @@ ORDER BY (`id`) "#; assert_eq!(query.trim(), expected.trim()); } + + #[test] + fn test_create_table_with_materialized_column() { + use crate::framework::versions::Version; + + let columns = vec![ + ClickHouseColumn { + name: "event_time".to_string(), + column_type: ClickHouseColumnType::DateTime64 { precision: 3 }, + required: true, + primary_key: false, + unique: false, + default: None, + materialized: None, + comment: None, + ttl: None, + codec: None, + }, + ClickHouseColumn { + name: "event_date".to_string(), + column_type: ClickHouseColumnType::Date, + required: true, + primary_key: false, + unique: false, + default: None, + materialized: Some("toDate(event_time)".to_string()), + comment: None, + ttl: None, + codec: None, + }, + ]; + + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns, + order_by: OrderBy::Fields(vec!["event_time".to_string()]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, + }; + + let query = create_table_query("test_db", table, false).unwrap(); + let expected = r#" +CREATE TABLE IF NOT EXISTS `test_db`.`test_table` +( + `event_time` DateTime64(3) NOT NULL, + `event_date` Date NOT NULL MATERIALIZED toDate(event_time) +) +ENGINE = MergeTree +ORDER BY (`event_time`) +"#; + assert_eq!(query.trim(), expected.trim()); + } + + #[test] + fn test_materialized_column_with_codec() { + use crate::framework::core::infrastructure::table::JsonOptions; + use crate::framework::versions::Version; + + // Test customer's use case: MATERIALIZED column with CODEC + let columns = vec![ + ClickHouseColumn { + name: "log_blob".to_string(), + column_type: ClickHouseColumnType::Json(JsonOptions::default()), + required: true, + primary_key: false, + unique: false, + default: None, + materialized: None, + comment: None, + ttl: None, + codec: Some("ZSTD(3)".to_string()), + }, + ClickHouseColumn { + name: "combination_hash".to_string(), + column_type: ClickHouseColumnType::Array(Box::new( + ClickHouseColumnType::ClickhouseInt(ClickHouseInt::UInt64), + )), + required: true, + primary_key: false, + unique: false, + default: None, + materialized: Some( + "arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))".to_string(), + ), + comment: None, + ttl: None, + codec: Some("ZSTD(1)".to_string()), + }, + ]; + + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "logs".to_string(), + columns, + order_by: OrderBy::SingleExpr("tuple()".to_string()), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: None, + primary_key_expression: None, + }; + + let query = create_table_query("test_db", table, false).unwrap(); + + // Verify the query contains the MATERIALIZED clause and CODEC + assert!(query.contains("MATERIALIZED arrayMap")); + assert!(query.contains("CODEC(ZSTD(1))")); + assert!(query.contains("CODEC(ZSTD(3))")); + } + + #[test] + fn test_validation_default_and_materialized_mutually_exclusive() { + use crate::framework::core::infrastructure::table::{Column, ColumnType, IntType}; + use crate::infrastructure::olap::clickhouse::mapper::std_column_to_clickhouse_column; + + let column = Column { + name: "bad_column".to_string(), + data_type: ColumnType::Int(IntType::Int32), + required: true, + unique: false, + primary_key: false, + default: Some("42".to_string()), + materialized: Some("id + 1".to_string()), // Invalid: both default and materialized + annotations: vec![], + comment: None, + ttl: None, + codec: None, + }; + + let result = std_column_to_clickhouse_column(column); + assert!(result.is_err()); + let error_msg = result.unwrap_err().to_string(); + assert!( + error_msg.contains("both DEFAULT and MATERIALIZED") + || error_msg.contains("mutually exclusive") + ); + } } diff --git a/packages/py-moose-lib/moose_lib/data_models.py b/packages/py-moose-lib/moose_lib/data_models.py index 5f5f6a3cd..0c46275a5 100644 --- a/packages/py-moose-lib/moose_lib/data_models.py +++ b/packages/py-moose-lib/moose_lib/data_models.py @@ -312,6 +312,7 @@ class Column(BaseModel): annotations: list[Tuple[str, Any]] = [] ttl: str | None = None codec: str | None = None + materialized: str | None = None def to_expr(self): # Lazy import to avoid circular dependency at import time diff --git a/packages/py-moose-lib/tests/test_materialized.py b/packages/py-moose-lib/tests/test_materialized.py new file mode 100644 index 000000000..adedb5228 --- /dev/null +++ b/packages/py-moose-lib/tests/test_materialized.py @@ -0,0 +1,70 @@ +from datetime import datetime, date +from typing import Annotated, Any +from pydantic import BaseModel +from moose_lib import Key, ClickHouseMaterialized, ClickHouseCodec, UInt64 +from moose_lib.data_models import _to_columns +import pytest + + +def test_materialized_basic(): + """Test basic MATERIALIZED annotation converts to correct expression.""" + + class MaterializedTest(BaseModel): + timestamp: datetime + event_date: Annotated[date, ClickHouseMaterialized("toDate(timestamp)")] + + columns = _to_columns(MaterializedTest) + by_name = {col.name: col for col in columns} + + assert by_name["timestamp"].materialized is None + assert by_name["event_date"].materialized == "toDate(timestamp)" + + +def test_materialized_hash(): + """Test MATERIALIZED with hash function.""" + + class HashTest(BaseModel): + user_id: str + user_hash: Annotated[UInt64, ClickHouseMaterialized("cityHash64(user_id)")] + + columns = _to_columns(HashTest) + by_name = {col.name: col for col in columns} + + assert by_name["user_id"].materialized is None + assert by_name["user_hash"].materialized == "cityHash64(user_id)" + + +def test_materialized_with_codec(): + """Test MATERIALIZED combined with CODEC.""" + + class MaterializedCodecTest(BaseModel): + log_blob: Annotated[Any, ClickHouseCodec("ZSTD(3)")] + combination_hash: Annotated[ + list[UInt64], + ClickHouseMaterialized("arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))"), + ClickHouseCodec("ZSTD(1)") + ] + + columns = _to_columns(MaterializedCodecTest) + by_name = {col.name: col for col in columns} + + assert by_name["log_blob"].materialized is None + assert by_name["log_blob"].codec == "ZSTD(3)" + assert by_name["combination_hash"].materialized == "arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))" + assert by_name["combination_hash"].codec == "ZSTD(1)" + + +def test_materialized_mutually_exclusive_with_default(): + """Test that MATERIALIZED and DEFAULT are mutually exclusive.""" + from moose_lib import clickhouse_default + + class BadModel(BaseModel): + bad_field: Annotated[ + str, + clickhouse_default("'default_value'"), + ClickHouseMaterialized("'materialized_value'") + ] + + with pytest.raises(ValueError, match="cannot have both DEFAULT and MATERIALIZED"): + _to_columns(BadModel) + diff --git a/packages/ts-moose-lib/tests/typeConvert.test.ts b/packages/ts-moose-lib/tests/typeConvert.test.ts index b4658b865..090e99bbe 100644 --- a/packages/ts-moose-lib/tests/typeConvert.test.ts +++ b/packages/ts-moose-lib/tests/typeConvert.test.ts @@ -289,4 +289,32 @@ describe("typeConvert mappings for helper types", function () { fs.rmSync(tempDir, { recursive: true, force: true }); } }); + + it("maps Materialized annotations for computed columns", function () { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "moose-typeconv-")); + try { + const source = ` + import { ClickHouseMaterialized, UInt64 } from "@514labs/moose-lib"; + + export interface TestModel { + timestamp: Date; + userId: string; + eventDate: Date & ClickHouseMaterialized<"toDate(timestamp)">; + userHash: UInt64 & ClickHouseMaterialized<"cityHash64(userId)">; + no_materialized: string; + } + `; + const { checker, type } = createProgramWithSource(tempDir, source); + const columns = toColumns(type, checker); + const byName = Object.fromEntries(columns.map((c) => [c.name, c])); + + expect(byName.timestamp.materialized).to.equal(null); + expect(byName.userId.materialized).to.equal(null); + expect(byName.eventDate.materialized).to.equal("toDate(timestamp)"); + expect(byName.userHash.materialized).to.equal("cityHash64(userId)"); + expect(byName.no_materialized.materialized).to.equal(null); + } finally { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); }); From b75d99f7a6f441cc2eaf659b4a731a12cdb38b4d Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Mon, 1 Dec 2025 11:37:06 -0700 Subject: [PATCH 3/7] docs --- .../llm-docs/python/table-setup.md | 29 ++++ .../llm-docs/typescript/table-setup.md | 28 ++++ .../src/pages/moose/olap/_meta.tsx | 3 + .../pages/moose/olap/materialized-columns.mdx | 132 ++++++++++++++++++ packages/ts-moose-lib/src/dataModels/types.ts | 5 +- 5 files changed, 194 insertions(+), 3 deletions(-) create mode 100644 apps/framework-docs/src/pages/moose/olap/materialized-columns.mdx diff --git a/apps/framework-docs/llm-docs/python/table-setup.md b/apps/framework-docs/llm-docs/python/table-setup.md index 65da3efc7..a85d634ad 100644 --- a/apps/framework-docs/llm-docs/python/table-setup.md +++ b/apps/framework-docs/llm-docs/python/table-setup.md @@ -976,3 +976,32 @@ class Events(BaseModel): # Codec + Numeric type event_count: Annotated[UInt64, ClickHouseCodec("DoubleDelta, LZ4")] ``` + +## Materialized Columns + +Pre-compute and store values at INSERT time for faster queries: + +```python +from typing import Annotated +from datetime import datetime, date +from moose_lib import ClickHouseMaterialized, ClickHouseCodec, UInt64 +from pydantic import BaseModel + +class UserEvents(BaseModel): + timestamp: datetime + user_id: str + + # Extract date (use exact field names in expressions) + event_date: Annotated[date, ClickHouseMaterialized("toDate(timestamp)")] + + # Precompute hash + user_hash: Annotated[UInt64, ClickHouseMaterialized("cityHash64(user_id)")] + + # Combine with CODEC + log_data: Annotated[Any, ClickHouseMaterialized("..."), ClickHouseCodec("ZSTD(1)")] +``` + +**Notes:** +- Cannot combine with DEFAULT (mutually exclusive) +- Cannot be primary keys +- Use exact field names in expressions diff --git a/apps/framework-docs/llm-docs/typescript/table-setup.md b/apps/framework-docs/llm-docs/typescript/table-setup.md index 98cc5ce06..da469d95e 100644 --- a/apps/framework-docs/llm-docs/typescript/table-setup.md +++ b/apps/framework-docs/llm-docs/typescript/table-setup.md @@ -825,3 +825,31 @@ interface Events { event_count: UInt64 & ClickHouseCodec<"DoubleDelta, LZ4">; } ``` + +## Materialized Columns + +Pre-compute and store values at INSERT time for faster queries: + +```typescript +import { ClickHouseMaterialized, UInt64 } from '@514labs/moose-lib'; +import typia from 'typia'; + +interface UserEvents { + timestamp: DateTime; + userId: string; + + // Extract date (use exact field names in expressions) + eventDate: Date & ClickHouseMaterialized<"toDate(timestamp)">; + + // Precompute hash + userHash: UInt64 & ClickHouseMaterialized<"cityHash64(userId)">; + + // Combine with CODEC + logData: Record & ClickHouseMaterialized<"..."> & ClickHouseCodec<"ZSTD(1)">; +} +``` + +**Notes:** +- Cannot combine with DEFAULT (mutually exclusive) +- Cannot be primary keys +- Use exact field names in expressions diff --git a/apps/framework-docs/src/pages/moose/olap/_meta.tsx b/apps/framework-docs/src/pages/moose/olap/_meta.tsx index 1d3db08d4..2ab36c1e9 100644 --- a/apps/framework-docs/src/pages/moose/olap/_meta.tsx +++ b/apps/framework-docs/src/pages/moose/olap/_meta.tsx @@ -23,6 +23,9 @@ const rawMeta = { compression: { title: "Compression Codecs", }, + "materialized-columns": { + title: "Materialized Columns", + }, "schema-optimization": { title: "Schema Optimization", }, diff --git a/apps/framework-docs/src/pages/moose/olap/materialized-columns.mdx b/apps/framework-docs/src/pages/moose/olap/materialized-columns.mdx new file mode 100644 index 000000000..d345ea9a9 --- /dev/null +++ b/apps/framework-docs/src/pages/moose/olap/materialized-columns.mdx @@ -0,0 +1,132 @@ +import { TypeScript, Python, LanguageSwitcher, Callout } from "@/components"; + + + +# Materialized Columns + +MATERIALIZED columns compute and store values at INSERT time, making queries faster at the cost of disk space and insert overhead. + +## When to use + +- Pre-compute expensive operations: hash functions, JSON parsing, complex calculations +- Index computed values for faster lookups +- Partition or order by derived values (dates from timestamps, hashes from IDs) + + +MATERIALIZED columns are automatically calculated at insert time and cannot be set by users. + + +## Basic Usage + + +```typescript +import { OlapTable, Key, DateTime, ClickHouseMaterialized, UInt64 } from "@514labs/moose-lib"; +import typia from "typia"; + +interface UserEvents { + id: Key; + timestamp: DateTime; + userId: string; + logBlob: Record & ClickHouseCodec<"ZSTD(3)">; + + // Extract date for partitioning + eventDate: Date & ClickHouseMaterialized<"toDate(timestamp)">; + + // Precompute hash for fast lookups + userHash: UInt64 & ClickHouseMaterialized<"cityHash64(userId)">; + + // Parse JSON once at insert (expensive) + combinationHash: UInt64[] & + ClickHouseMaterialized<"arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(logBlob)))"> & + ClickHouseCodec<"ZSTD(1)">; +} + +export const UserEventsTable = new OlapTable("UserEvents", { + orderByFields: ["userHash", "eventDate"], + partitionBy: "toYYYYMM(eventDate)" +}); +``` + + + +```python +from typing import Annotated, Any +from datetime import datetime, date +from moose_lib import OlapTable, OlapConfig, Key, ClickHouseMaterialized, ClickHouseCodec, UInt64 +from pydantic import BaseModel + +class UserEvents(BaseModel): + id: Key[str] + timestamp: datetime + user_id: str + log_blob: Annotated[Any, ClickHouseCodec("ZSTD(3)")] + + # Extract date for partitioning + event_date: Annotated[date, ClickHouseMaterialized("toDate(timestamp)")] + + # Precompute hash for fast lookups + user_hash: Annotated[UInt64, ClickHouseMaterialized("cityHash64(user_id)")] + + # Parse JSON once at insert (expensive) + combination_hash: Annotated[ + list[UInt64], + ClickHouseMaterialized("arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))"), + ClickHouseCodec("ZSTD(1)") + ] + +user_events_table = OlapTable[UserEvents]( + "UserEvents", + OlapConfig( + order_by_fields=["user_hash", "event_date"], + partition_by="toYYYYMM(event_date)" + ) +) +``` + + +## Common Patterns + +**Date/Time Extraction:** +- `toDate(timestamp)` - Extract date for partitioning +- `toHour(timestamp)` - Extract hour for time-series analysis +- `toStartOfMonth(timestamp)` - Monthly aggregation key + +**Hash Functions:** +- `cityHash64(user_id)` - Fast user lookups +- `cityHash64(user_id, session_id)` - Combined hash for deduplication + +**JSON Processing:** +- `JSONExtractString(log_blob, 'level')` - Extract specific field +- `arrayMap(kv -> cityHash64(...), JSONExtractKeysAndValuesRaw(...))` - Hash all key-value pairs + +## Important Notes + + +**Column Names in Expressions:** Use the exact field names from your data model. Moose preserves your naming convention (camelCase in TypeScript, snake_case in Python) in ClickHouse columns. + + +**Restrictions:** +- Cannot combine MATERIALIZED with DEFAULT (mutually exclusive) +- Cannot be primary keys +- Can combine with ClickHouseCodec and ClickHouseTTL + +**Schema Changes:** +- **Add:** `ALTER TABLE ADD COLUMN ... MATERIALIZED expr` +- **Modify:** `ALTER TABLE MODIFY COLUMN ... MATERIALIZED new_expr` (preserves existing values) +- **Remove:** `ALTER TABLE MODIFY COLUMN ... REMOVE MATERIALIZED` + +## Syncing from Remote + +When using `moose init --from-remote`, MATERIALIZED column definitions are automatically preserved: + +```bash +moose init my-app --from-remote --language typescript +# Generated models include ClickHouseMaterialized annotations +``` + +## Related + +- [Supported Types](/moose/olap/supported-types) - All column types +- [Compression Codecs](/moose/olap/compression) - Optimize storage +- [TTL (Time-to-Live)](/moose/olap/ttl) - Auto-expire data +- [ClickHouse Docs](https://clickhouse.com/docs/en/sql-reference/statements/create/table#materialized) - Detailed reference diff --git a/packages/ts-moose-lib/src/dataModels/types.ts b/packages/ts-moose-lib/src/dataModels/types.ts index 1099dadc8..bcce68d25 100644 --- a/packages/ts-moose-lib/src/dataModels/types.ts +++ b/packages/ts-moose-lib/src/dataModels/types.ts @@ -210,17 +210,16 @@ export type ClickHouseTTL = { * * @example * interface Events { - * eventTime: Date; + * eventTime: DateTime; * // Extract date component - computed and stored at insert time * eventDate: Date & ClickHouseMaterialized<"toDate(event_time)">; * * userId: string; * // Precompute hash for fast lookups - * userHash: number & ClickHouseInt<"uint64"> & ClickHouseMaterialized<"cityHash64(user_id)">; + * userHash: UInt64 & ClickHouseMaterialized<"cityHash64(userId)">; * } * * @remarks - * - Expression uses ClickHouse column names (snake_case), not TypeScript field names * - MATERIALIZED and DEFAULT are mutually exclusive * - Can be combined with ClickHouseCodec for compression * - Changing the expression modifies the column in-place (existing values preserved) From a2fdbac0bee8dc85907550cb3d0f60b70261d594 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Mon, 1 Dec 2025 12:19:46 -0700 Subject: [PATCH 4/7] fix --- .../src/framework/core/infrastructure_map.rs | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index c922d7620..3a6a8a893 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -2776,6 +2776,7 @@ fn columns_are_equivalent(before: &Column, after: &Column) -> bool { || before.unique != after.unique // primary_key change is handled at the table level || before.default != after.default + || before.materialized != after.materialized || before.annotations != after.annotations || before.comment != after.comment { @@ -4774,6 +4775,75 @@ mod diff_tests { }; assert!(columns_are_equivalent(&col_user_chain, &col_ch_chain)); } + + #[test] + fn test_columns_are_equivalent_with_materialized() { + use crate::framework::core::infrastructure::table::{Column, ColumnType}; + + let base_col = Column { + name: "event_date".to_string(), + data_type: ColumnType::Date, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + codec: None, + materialized: None, + }; + + // Test 1: Columns with same materialized expression should be equivalent + let col_with_mat1 = Column { + materialized: Some("toDate(timestamp)".to_string()), + ..base_col.clone() + }; + let col_with_mat2 = Column { + materialized: Some("toDate(timestamp)".to_string()), + ..base_col.clone() + }; + assert!(columns_are_equivalent(&col_with_mat1, &col_with_mat2)); + + // Test 2: Columns with different materialized expressions should not be equivalent + let col_with_different_mat = Column { + materialized: Some("toStartOfMonth(timestamp)".to_string()), + ..base_col.clone() + }; + assert!(!columns_are_equivalent( + &col_with_mat1, + &col_with_different_mat + )); + + // Test 3: Column with materialized vs column without materialized should not be equivalent + assert!(!columns_are_equivalent(&col_with_mat1, &base_col)); + + // Test 4: Two columns without materialized (None) should be equivalent + let base_col2 = base_col.clone(); + assert!(columns_are_equivalent(&base_col, &base_col2)); + + // Test 5: Adding materialized to a column should be detected as a change + let col_before = Column { + materialized: None, + ..base_col.clone() + }; + let col_after = Column { + materialized: Some("cityHash64(user_id)".to_string()), + ..base_col.clone() + }; + assert!(!columns_are_equivalent(&col_before, &col_after)); + + // Test 6: Removing materialized from a column should be detected as a change + let col_with_mat = Column { + materialized: Some("cityHash64(user_id)".to_string()), + ..base_col.clone() + }; + let col_without_mat = Column { + materialized: None, + ..base_col.clone() + }; + assert!(!columns_are_equivalent(&col_with_mat, &col_without_mat)); + } } #[cfg(test)] From 7f7049d15d5d79241cebef36c53162400cfbf131 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Mon, 1 Dec 2025 12:39:39 -0700 Subject: [PATCH 5/7] fix --- .../src/framework/typescript/generate.rs | 45 ++++++++++--------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index e6d16aa78..bbd0c468f 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -579,26 +579,9 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> } } - // Handle DEFAULT and MATERIALIZED (mutually exclusive) - let type_str = match (&column.default, &column.materialized) { - (Some(default), None) if type_str == "Date" => { - // https://github.com/samchon/typia/issues/1658 - format!("WithDefault<{type_str}, {:?}>", default) - } - (Some(default), None) => { - format!("{type_str} & ClickHouseDefault<{:?}>", default) - } - (None, Some(materialized)) => { - format!("{type_str} & ClickHouseMaterialized<{:?}>", materialized) - } - (None, None) => type_str, - (Some(_), Some(_)) => { - // This should never happen due to validation, but handle it gracefully - panic!("Column '{}' has both DEFAULT and MATERIALIZED - this should be caught by validation", column.name) - } - }; - - // Append ClickHouseTTL type tag if present on the column + // Apply TTL, Codec, and Materialized BEFORE default handling + // This prevents WithDefault from being used when Date has other annotations + // (e.g., "Date & ClickHouseTTL<...>" != "Date", so we use ClickHouseDefault instead) let type_str = if let Some(expr) = &column.ttl { format!("{type_str} & ClickHouseTTL<\"{}\">", expr) } else { @@ -610,6 +593,28 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> None => type_str, Some(ref codec) => format!("{type_str} & ClickHouseCodec<{codec:?}>"), }; + + // Wrap with Materialized if present + // Note: Mutual exclusivity with DEFAULT is validated earlier in std_column_to_clickhouse_column + let type_str = match column.materialized.as_ref() { + None => type_str, + Some(materialized) => { + format!("{type_str} & ClickHouseMaterialized<{:?}>", materialized) + } + }; + + // Handle DEFAULT after TTL/Codec/Materialized + // WithDefault only applies to plain Date (not "Date & ClickHouse...") + let type_str = match &column.default { + Some(default) if type_str == "Date" => { + // https://github.com/samchon/typia/issues/1658 + format!("WithDefault<{type_str}, {:?}>", default) + } + Some(default) => { + format!("{type_str} & ClickHouseDefault<{:?}>", default) + } + None => type_str, + }; let type_str = if can_use_key_wrapping && column.primary_key { format!("Key<{type_str}>") } else { From 1a6f273f5cb6fb6a5c4a5e2193cf6de0cff7fc20 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Mon, 1 Dec 2025 12:48:50 -0700 Subject: [PATCH 6/7] fix --- .../src/framework/typescript/generate.rs | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index bbd0c468f..e66a9ffd7 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -579,41 +579,42 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> } } - // Apply TTL, Codec, and Materialized BEFORE default handling - // This prevents WithDefault from being used when Date has other annotations - // (e.g., "Date & ClickHouseTTL<...>" != "Date", so we use ClickHouseDefault instead) + // Apply TTL and Codec first (these can coexist with DEFAULT/MATERIALIZED) let type_str = if let Some(expr) = &column.ttl { format!("{type_str} & ClickHouseTTL<\"{}\">", expr) } else { type_str }; - // Wrap with Codec if present let type_str = match column.codec.as_ref() { None => type_str, Some(ref codec) => format!("{type_str} & ClickHouseCodec<{codec:?}>"), }; - // Wrap with Materialized if present - // Note: Mutual exclusivity with DEFAULT is validated earlier in std_column_to_clickhouse_column - let type_str = match column.materialized.as_ref() { - None => type_str, - Some(materialized) => { - format!("{type_str} & ClickHouseMaterialized<{:?}>", materialized) - } - }; - - // Handle DEFAULT after TTL/Codec/Materialized - // WithDefault only applies to plain Date (not "Date & ClickHouse...") - let type_str = match &column.default { - Some(default) if type_str == "Date" => { + // Handle DEFAULT and MATERIALIZED (mutually exclusive) + // Apply these AFTER TTL/Codec to prevent WithDefault when Date has other annotations + let type_str = match (&column.default, &column.materialized) { + (Some(default), None) if type_str == "Date" => { // https://github.com/samchon/typia/issues/1658 + // WithDefault only for plain Date (not "Date & ClickHouse...") format!("WithDefault<{type_str}, {:?}>", default) } - Some(default) => { + (Some(default), None) => { format!("{type_str} & ClickHouseDefault<{:?}>", default) } - None => type_str, + (None, Some(materialized)) => { + format!("{type_str} & ClickHouseMaterialized<{:?}>", materialized) + } + (None, None) => type_str, + (Some(_), Some(_)) => { + // Both DEFAULT and MATERIALIZED are set - this should never happen + // but we need to handle it gracefully rather than silently generating invalid code + panic!( + "Column '{}' has both DEFAULT and MATERIALIZED set. \ + These are mutually exclusive in ClickHouse.", + column.name + ) + } }; let type_str = if can_use_key_wrapping && column.primary_key { format!("Key<{type_str}>") From 6d3927514abeba6512c228688ca3e4ac334fea6e Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 2 Dec 2025 13:13:43 -0700 Subject: [PATCH 7/7] docs v2 --- .../moosestack/olap/materialized-columns.mdx | 137 ++++++++++++++++++ .../src/config/navigation.ts | 6 + 2 files changed, 143 insertions(+) create mode 100644 apps/framework-docs-v2/content/moosestack/olap/materialized-columns.mdx diff --git a/apps/framework-docs-v2/content/moosestack/olap/materialized-columns.mdx b/apps/framework-docs-v2/content/moosestack/olap/materialized-columns.mdx new file mode 100644 index 000000000..bf8ce6f32 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/olap/materialized-columns.mdx @@ -0,0 +1,137 @@ +--- +title: Materialized Columns +description: Pre-compute and store column values at insert time for faster queries +order: 16 +category: olap +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Materialized Columns + +MATERIALIZED columns compute and store values at INSERT time, making queries faster at the cost of disk space and insert overhead. + +## When to use + +- Pre-compute expensive operations: hash functions, JSON parsing, complex calculations +- Index computed values for faster lookups +- Partition or order by derived values (dates from timestamps, hashes from IDs) + + +MATERIALIZED columns are automatically calculated at insert time and cannot be set by users. + + +## Basic Usage + + + +```ts filename="MaterializedExample.ts" copy +import { OlapTable, Key, DateTime, ClickHouseMaterialized, ClickHouseCodec, UInt64 } from "@514labs/moose-lib"; + +interface UserEvents { + id: Key; + timestamp: DateTime; + userId: string; + logBlob: Record & ClickHouseCodec<"ZSTD(3)">; + + // Extract date for partitioning + eventDate: Date & ClickHouseMaterialized<"toDate(timestamp)">; + + // Precompute hash for fast lookups + userHash: UInt64 & ClickHouseMaterialized<"cityHash64(userId)">; + + // Parse JSON once at insert (expensive) + combinationHash: UInt64[] & + ClickHouseMaterialized<"arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(logBlob)))"> & + ClickHouseCodec<"ZSTD(1)">; +} + +export const UserEventsTable = new OlapTable("UserEvents", { + orderByFields: ["userHash", "eventDate"], + partitionBy: "toYYYYMM(eventDate)" +}); +``` + + +```python filename="materialized_example.py" copy +from typing import Annotated, Any +from datetime import datetime, date +from moose_lib import OlapTable, OlapConfig, Key, ClickHouseMaterialized, ClickHouseCodec, UInt64 +from pydantic import BaseModel + +class UserEvents(BaseModel): + id: Key[str] + timestamp: datetime + user_id: str + log_blob: Annotated[Any, ClickHouseCodec("ZSTD(3)")] + + # Extract date for partitioning + event_date: Annotated[date, ClickHouseMaterialized("toDate(timestamp)")] + + # Precompute hash for fast lookups + user_hash: Annotated[UInt64, ClickHouseMaterialized("cityHash64(user_id)")] + + # Parse JSON once at insert (expensive) + combination_hash: Annotated[ + list[UInt64], + ClickHouseMaterialized("arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))"), + ClickHouseCodec("ZSTD(1)") + ] + +user_events_table = OlapTable[UserEvents]( + "UserEvents", + OlapConfig( + order_by_fields=["user_hash", "event_date"], + partition_by="toYYYYMM(event_date)" + ) +) +``` + + + +## Common Patterns + +**Date/Time Extraction:** +- `toDate(timestamp)` - Extract date for partitioning +- `toHour(timestamp)` - Extract hour for time-series analysis +- `toStartOfMonth(timestamp)` - Monthly aggregation key + +**Hash Functions:** +- `cityHash64(user_id)` - Fast user lookups +- `cityHash64(user_id, session_id)` - Combined hash for deduplication + +**JSON Processing:** +- `JSONExtractString(log_blob, 'level')` - Extract specific field +- `arrayMap(kv -> cityHash64(...), JSONExtractKeysAndValuesRaw(...))` - Hash all key-value pairs + +## Important Notes + + +Use the exact field names from your data model. Moose preserves your naming convention (camelCase in TypeScript, snake_case in Python) in ClickHouse columns. + + +**Restrictions:** +- Cannot combine MATERIALIZED with DEFAULT (mutually exclusive) +- Cannot be primary keys +- Can combine with ClickHouseCodec and ClickHouseTTL + +**Schema Changes:** +- **Add:** `ALTER TABLE ADD COLUMN ... MATERIALIZED expr` +- **Modify:** `ALTER TABLE MODIFY COLUMN ... MATERIALIZED new_expr` (preserves existing values) +- **Remove:** `ALTER TABLE MODIFY COLUMN ... REMOVE MATERIALIZED` + +## Syncing from Remote + +When using `moose init --from-remote`, MATERIALIZED column definitions are automatically preserved: + +```bash filename="Terminal" copy +moose init my-app --from-remote --language typescript +# Generated models include ClickHouseMaterialized annotations +``` + +## Related + +- [Supported Types](/moosestack/olap/supported-types) - All column types +- [Schema Optimization](/moosestack/olap/schema-optimization) - Optimize storage +- [TTL (Time-to-Live)](/moosestack/olap/ttl) - Auto-expire data +- [ClickHouse Docs](https://clickhouse.com/docs/en/sql-reference/statements/create/table#materialized) - Detailed reference diff --git a/apps/framework-docs-v2/src/config/navigation.ts b/apps/framework-docs-v2/src/config/navigation.ts index 30ac60a44..381da9ae3 100644 --- a/apps/framework-docs-v2/src/config/navigation.ts +++ b/apps/framework-docs-v2/src/config/navigation.ts @@ -286,6 +286,12 @@ const moosestackNavigationConfig: NavigationConfig = [ title: "TTL (Time-to-Live)", languages: ["typescript", "python"], }, + { + type: "page", + slug: "moosestack/olap/materialized-columns", + title: "Materialized Columns", + languages: ["typescript", "python"], + }, { type: "page", slug: "moosestack/olap/schema-optimization",