Skip to content

Commit 3d1c6ba

Browse files
authored
fix ORDER BY extraction when projections exist (#3052)
<!-- CURSOR_SUMMARY --> > [!NOTE] > Restricts ORDER BY parsing to the table clause after ENGINE to avoid capturing projection ORDER BY; adds tests for this case. > > - **ClickHouse parsing**: > - Refines `extract_order_by_from_create_query` to search for `ORDER BY` only after the `ENGINE` clause and ignore occurrences tied to `PRIMARY KEY` or within projections. > - **Tests**: > - Adds a projection-heavy CREATE TABLE case to ensure only the main table `ORDER BY` is extracted. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit f6957fd. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY -->
1 parent 9bb2167 commit 3d1c6ba

File tree

1 file changed

+29
-3
lines changed
  • apps/framework-cli/src/infrastructure/olap/clickhouse

1 file changed

+29
-3
lines changed

apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2363,13 +2363,25 @@ static ORDER_BY_TERMINATOR_PATTERN: LazyLock<regex::Regex> = LazyLock::new(|| {
23632363
pub fn extract_order_by_from_create_query(create_query: &str) -> Vec<String> {
23642364
debug!("Extracting ORDER BY from query: {}", create_query);
23652365

2366+
// Find the main ORDER BY clause (not ones inside projections)
2367+
// We need to search for ORDER BY that comes after the ENGINE clause
2368+
let upper = create_query.to_uppercase();
2369+
let engine_pos = upper.find("ENGINE").unwrap_or_else(|| {
2370+
debug!("No ENGINE clause found");
2371+
0
2372+
});
2373+
2374+
// Search for ORDER BY only in the part after ENGINE
2375+
let after_engine = &create_query[engine_pos..];
2376+
let upper_after_engine = &upper[engine_pos..];
2377+
23662378
// Find the ORDER BY clause, being careful not to match PRIMARY KEY
23672379
let mut after_order_by = None;
2368-
for (idx, _) in create_query.to_uppercase().match_indices("ORDER BY") {
2380+
for (idx, _) in upper_after_engine.match_indices("ORDER BY") {
23692381
// Check if this is not part of "PRIMARY KEY" by looking at the preceding text
2370-
let preceding_text = &create_query[..idx].trim_end().to_uppercase();
2382+
let preceding_text = &upper_after_engine[..idx].trim_end();
23712383
if !preceding_text.ends_with("PRIMARY KEY") {
2372-
after_order_by = Some(&create_query[idx..]);
2384+
after_order_by = Some(&after_engine[idx..]);
23732385
break;
23742386
}
23752387
}
@@ -2796,6 +2808,20 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra
27962808
let query = "CREATE TABLE test (id Int64) ENGINE = MergeTree()";
27972809
let order_by = extract_order_by_from_create_query(query);
27982810
assert_eq!(order_by, Vec::<String>::new());
2811+
2812+
// Test with projections that have their own ORDER BY clauses
2813+
// Should extract the main table ORDER BY, not the projection ORDER BY
2814+
let query = r#"CREATE TABLE local.ParsedLogsV2_0_0 (`orgId` String, `projectId` String, `branchId` String, `date` DateTime('UTC'), `message` String, `severityNumber` Float64, `severityLevel` String, `source` String, `sessionId` String, `serviceName` String, `machineId` String, PROJECTION severity_level_projection (SELECT severityLevel, date, orgId, projectId, branchId, machineId, source, message ORDER BY severityLevel, date), PROJECTION machine_source_projection (SELECT machineId, source, date, orgId, projectId, branchId, severityLevel, message ORDER BY machineId, source, date)) ENGINE = MergeTree PRIMARY KEY (orgId, projectId, branchId) ORDER BY (orgId, projectId, branchId, date) TTL date + toIntervalDay(90) SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_granularity_bytes = 10485760"#;
2815+
let order_by = extract_order_by_from_create_query(query);
2816+
assert_eq!(
2817+
order_by,
2818+
vec![
2819+
"orgId".to_string(),
2820+
"projectId".to_string(),
2821+
"branchId".to_string(),
2822+
"date".to_string()
2823+
]
2824+
);
27992825
}
28002826

28012827
#[test]

0 commit comments

Comments
 (0)