Skip to content

Commit 2c856f3

Browse files
authored
primary key expression (#3031)
<!-- CURSOR_SUMMARY --> > [!NOTE] > Adds first-class primary key expression support across OLAP pipelines, generating/reading PRIMARY KEY in DDL, normalizing for diffs, and exposing config in TS/Python SDKs with tests and docs. > > - **OLAP/ClickHouse core**: > - Add `primary_key_expression` to `Table`/ClickHouse models and protobuf; include in (de)serialization and introspection. > - Generate PRIMARY KEY in `create_table_query` (uses expression or column flags) and expose in list_tables via SQL parsing (`extract_primary_key_from_create_table`). > - Normalize PKs (strip spaces/backticks/outer parens) and compare in diffs; PK changes trigger drop+create. > - **Code generation**: > - TS/Python generators output `primaryKeyExpression`/`primary_key_expression` in table configs and switch key-wrapping logic to use it. > - **SDK/Internal config**: > - TS/Python SDKs add `primaryKeyExpression`/`primary_key_expression` to `OlapTable` config and internal representations. > - **Tests**: > - Extensive unit tests for PK parsing/normalization/diff behavior and DDL generation; e2e template tests validating PRIMARY KEY and ORDER BY in DDL. > - **Docs**: > - New/updated docs detailing primary key expressions, constraints, and examples in TS/Python, including ORDER BY prefix requirement. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 3bdd55e. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY -->
1 parent d619ad3 commit 2c856f3

File tree

32 files changed

+1430
-56
lines changed

32 files changed

+1430
-56
lines changed

apps/framework-cli-e2e/test/templates.test.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,54 @@ const createTemplateTestSuite = (config: TemplateTestConfig) => {
292292
}
293293
});
294294

295+
it("should include PRIMARY KEY expression in DDL when configured", async function () {
296+
if (config.isTestsVariant) {
297+
// Test 1: Primary key with hash function
298+
const ddl1 = await getTableDDL("PrimaryKeyExpressionTest", "local");
299+
const primaryKeyPattern =
300+
config.language === "typescript" ?
301+
"PRIMARY KEY (userId, cityHash64(eventId))"
302+
: "PRIMARY KEY (user_id, cityHash64(event_id))";
303+
const orderByPattern =
304+
config.language === "typescript" ?
305+
"ORDER BY (userId, cityHash64(eventId), timestamp)"
306+
: "ORDER BY (user_id, cityHash64(event_id), timestamp)";
307+
308+
if (!ddl1.includes(primaryKeyPattern)) {
309+
throw new Error(
310+
`PRIMARY KEY expression not found in PrimaryKeyExpressionTest DDL. Expected: ${primaryKeyPattern}. DDL: ${ddl1}`,
311+
);
312+
}
313+
if (!ddl1.includes(orderByPattern)) {
314+
throw new Error(
315+
`ORDER BY expression not found in PrimaryKeyExpressionTest DDL. Expected: ${orderByPattern}. DDL: ${ddl1}`,
316+
);
317+
}
318+
319+
// Test 2: Primary key with different ordering
320+
const ddl2 = await getTableDDL("PrimaryKeyOrderingTest", "local");
321+
const primaryKeyPattern2 =
322+
config.language === "typescript" ?
323+
"PRIMARY KEY productId"
324+
: "PRIMARY KEY product_id";
325+
const orderByPattern2 =
326+
config.language === "typescript" ?
327+
"ORDER BY (productId, category, brand)"
328+
: "ORDER BY (product_id, category, brand)";
329+
330+
if (!ddl2.includes(primaryKeyPattern2)) {
331+
throw new Error(
332+
`PRIMARY KEY expression not found in PrimaryKeyOrderingTest DDL. Expected: ${primaryKeyPattern2}. DDL: ${ddl2}`,
333+
);
334+
}
335+
if (!ddl2.includes(orderByPattern2)) {
336+
throw new Error(
337+
`ORDER BY expression not found in PrimaryKeyOrderingTest DDL. Expected: ${orderByPattern2}. DDL: ${ddl2}`,
338+
);
339+
}
340+
}
341+
});
342+
295343
it("should generate FixedString types in DDL including type aliases", async function () {
296344
if (config.isTestsVariant && config.language === "python") {
297345
const ddl = await getTableDDL("FixedStringTest", "local");

apps/framework-cli-e2e/test/utils/schema-definitions.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,25 @@ export const TYPESCRIPT_TEST_SCHEMAS: ExpectedTableSchema[] = [
421421
{ name: "payloadBasic", type: "JSON(count Int64, name String)" },
422422
],
423423
},
424+
// Primary Key Expression Tests
425+
{
426+
tableName: "PrimaryKeyExpressionTest",
427+
columns: [
428+
{ name: "userId", type: "String" },
429+
{ name: "eventId", type: "String" },
430+
{ name: "timestamp", type: /DateTime\('UTC'\)/ },
431+
{ name: "category", type: "String" },
432+
],
433+
},
434+
{
435+
tableName: "PrimaryKeyOrderingTest",
436+
columns: [
437+
{ name: "productId", type: "String" },
438+
{ name: "category", type: "String" },
439+
{ name: "brand", type: "String" },
440+
{ name: "timestamp", type: /DateTime\('UTC'\)/ },
441+
],
442+
},
424443
];
425444

426445
// ============ PYTHON TEMPLATE SCHEMA DEFINITIONS ============
@@ -805,6 +824,25 @@ export const PYTHON_TEST_SCHEMAS: ExpectedTableSchema[] = [
805824
{ name: "payload_basic", type: "JSON(count Int64, name String)" },
806825
],
807826
},
827+
// Primary Key Expression Tests
828+
{
829+
tableName: "PrimaryKeyExpressionTest",
830+
columns: [
831+
{ name: "user_id", type: "String" },
832+
{ name: "event_id", type: "String" },
833+
{ name: "timestamp", type: /DateTime\('UTC'\)/ },
834+
{ name: "category", type: "String" },
835+
],
836+
},
837+
{
838+
tableName: "PrimaryKeyOrderingTest",
839+
columns: [
840+
{ name: "product_id", type: "String" },
841+
{ name: "category", type: "String" },
842+
{ name: "brand", type: "String" },
843+
{ name: "timestamp", type: /DateTime\('UTC'\)/ },
844+
],
845+
},
808846
];
809847

810848
// ============ HELPER FUNCTIONS ============

apps/framework-cli/src/cli/local_webserver.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3564,6 +3564,7 @@ mod tests {
35643564
database: None,
35653565
table_ttl_setting: None,
35663566
cluster_name: None,
3567+
primary_key_expression: None,
35673568
}
35683569
}
35693570

apps/framework-cli/src/cli/routines/migrate.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,7 @@ mod tests {
781781
table_settings: None,
782782
table_ttl_setting: None,
783783
cluster_name: None,
784+
primary_key_expression: None,
784785
}
785786
}
786787

apps/framework-cli/src/cli/routines/seed_data.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,7 @@ mod tests {
614614
table_settings: None,
615615
table_ttl_setting: None,
616616
cluster_name: None,
617+
primary_key_expression: None,
617618
}
618619
}
619620

apps/framework-cli/src/framework/core/infra_reality_checker.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,7 @@ mod tests {
533533
database: None,
534534
table_ttl_setting: None,
535535
cluster_name: None,
536+
primary_key_expression: None,
536537
}
537538
}
538539

apps/framework-cli/src/framework/core/infrastructure/table.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,10 @@ pub struct Table {
302302
/// Optional cluster name for ON CLUSTER support in ClickHouse
303303
#[serde(skip_serializing_if = "Option::is_none", default)]
304304
pub cluster_name: Option<String>,
305+
/// Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified)
306+
/// Allows for complex primary keys using functions or different column ordering
307+
#[serde(skip_serializing_if = "Option::is_none", default)]
308+
pub primary_key_expression: Option<String>,
305309
}
306310

307311
impl Table {
@@ -402,6 +406,75 @@ impl Table {
402406
.collect()
403407
}
404408

409+
/// Returns a normalized representation of the primary key for comparison purposes.
410+
///
411+
/// This handles both:
412+
/// - `primary_key_expression`: Uses the expression directly
413+
/// - Column-level `primary_key` flags: Builds an expression from column names
414+
///
415+
/// The result is normalized (trimmed, spaces removed, backticks removed, and outer
416+
/// parentheses stripped for single-element tuples) to enable semantic comparison.
417+
/// For example:
418+
/// - `primary_key_expression: Some("(foo, bar)")` returns "(foo,bar)"
419+
/// - Columns foo, bar with `primary_key: true` returns "(foo,bar)"
420+
/// - `primary_key_expression: Some("foo")` returns "foo"
421+
/// - `primary_key_expression: Some("(foo)")` returns "foo" (outer parens stripped)
422+
/// - Single column foo with `primary_key: true` returns "foo"
423+
pub fn normalized_primary_key_expr(&self) -> String {
424+
let expr = if let Some(ref pk_expr) = self.primary_key_expression {
425+
// Use the explicit primary_key_expression
426+
pk_expr.clone()
427+
} else {
428+
// Build from column-level primary_key flags
429+
let pk_cols = self.primary_key_columns();
430+
if pk_cols.is_empty() {
431+
String::new()
432+
} else if pk_cols.len() == 1 {
433+
pk_cols[0].to_string()
434+
} else {
435+
format!("({})", pk_cols.join(", "))
436+
}
437+
};
438+
439+
// Normalize: trim, remove backticks, remove spaces
440+
let mut normalized = expr
441+
.trim()
442+
.trim_matches('`')
443+
.replace('`', "")
444+
.replace(" ", "");
445+
446+
// Strip outer parentheses if this is a single-element tuple
447+
// E.g., "(col)" -> "col", "(cityHash64(col))" -> "cityHash64(col)"
448+
// But keep "(col1,col2)" as-is
449+
if normalized.starts_with('(') && normalized.ends_with(')') {
450+
// Check if there are any top-level commas (not inside nested parentheses)
451+
let inner = &normalized[1..normalized.len() - 1];
452+
let has_top_level_comma = {
453+
let mut depth = 0;
454+
let mut found_comma = false;
455+
for ch in inner.chars() {
456+
match ch {
457+
'(' => depth += 1,
458+
')' => depth -= 1,
459+
',' if depth == 0 => {
460+
found_comma = true;
461+
break;
462+
}
463+
_ => {}
464+
}
465+
}
466+
found_comma
467+
};
468+
469+
// If no top-level comma, it's a single-element tuple - strip outer parens
470+
if !has_top_level_comma {
471+
normalized = inner.to_string();
472+
}
473+
}
474+
475+
normalized
476+
}
477+
405478
pub fn order_by_with_fallback(&self) -> OrderBy {
406479
// table (in infra map created by older version of moose) may leave order_by unspecified,
407480
// but the implicit order_by from primary keys can be the same
@@ -473,6 +546,7 @@ impl Table {
473546
table_settings: self.table_settings.clone().unwrap_or_default(),
474547
table_ttl_setting: self.table_ttl_setting.clone(),
475548
cluster_name: self.cluster_name.clone(),
549+
primary_key_expression: self.primary_key_expression.clone(),
476550
metadata: MessageField::from_option(self.metadata.as_ref().map(|m| {
477551
infrastructure_map::Metadata {
478552
description: m.description.clone().unwrap_or_default(),
@@ -581,6 +655,7 @@ impl Table {
581655
database: proto.database,
582656
table_ttl_setting: proto.table_ttl_setting,
583657
cluster_name: proto.cluster_name,
658+
primary_key_expression: proto.primary_key_expression,
584659
}
585660
}
586661
}
@@ -1647,6 +1722,7 @@ mod tests {
16471722
database: None,
16481723
table_ttl_setting: None,
16491724
cluster_name: None,
1725+
primary_key_expression: None,
16501726
};
16511727
assert_eq!(table1.id(DEFAULT_DATABASE_NAME), "local_users");
16521728

@@ -1776,6 +1852,7 @@ mod tests {
17761852
database: None,
17771853
table_ttl_setting: None,
17781854
cluster_name: None,
1855+
primary_key_expression: None,
17791856
};
17801857

17811858
// Target table from code: explicit order_by that matches primary key
@@ -1799,6 +1876,7 @@ mod tests {
17991876
database: None,
18001877
table_ttl_setting: None,
18011878
cluster_name: None,
1879+
primary_key_expression: None,
18021880
};
18031881

18041882
// These should be equal because:

apps/framework-cli/src/framework/core/infrastructure_map.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3065,6 +3065,7 @@ mod tests {
30653065
database: None,
30663066
table_ttl_setting: None,
30673067
cluster_name: None,
3068+
primary_key_expression: None,
30683069
};
30693070

30703071
let after = Table {
@@ -3121,6 +3122,7 @@ mod tests {
31213122
database: None,
31223123
table_ttl_setting: None,
31233124
cluster_name: None,
3125+
primary_key_expression: None,
31243126
};
31253127

31263128
let diff = compute_table_columns_diff(&before, &after);
@@ -3297,6 +3299,7 @@ mod diff_tests {
32973299
database: None,
32983300
table_ttl_setting: None,
32993301
cluster_name: None,
3302+
primary_key_expression: None,
33003303
}
33013304
}
33023305

apps/framework-cli/src/framework/core/partial_infrastructure_map.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,9 @@ struct PartialTable {
273273
/// Optional cluster name for ON CLUSTER support
274274
#[serde(default)]
275275
pub cluster: Option<String>,
276+
/// Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified)
277+
#[serde(default, alias = "primary_key_expression")]
278+
pub primary_key_expression: Option<String>,
276279
}
277280

278281
/// Represents a topic definition from user code before it's converted into a complete [`Topic`].
@@ -743,6 +746,7 @@ impl PartialInfrastructureMap {
743746
table_ttl_setting,
744747
database: partial_table.database.clone(),
745748
cluster_name: partial_table.cluster.clone(),
749+
primary_key_expression: partial_table.primary_key_expression.clone(),
746750
};
747751
Ok((table.id(default_database), table))
748752
})

apps/framework-cli/src/framework/core/plan.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ mod tests {
527527
database: None,
528528
table_ttl_setting: None,
529529
cluster_name: None,
530+
primary_key_expression: None,
530531
}
531532
}
532533

0 commit comments

Comments
 (0)