Skip to content

Commit 491bc61

Browse files
authored
fix order by change detection (#3017)
old code fails when `target_table.order_by` == `target_table` primary key == actual table primary key, actual table `order_by` == `[]` <!-- CURSOR_SUMMARY --> --- > [!NOTE] > Adds `order_by_with_fallback` and uses it to compare/normalize ORDER BY (fallback to primary key for MergeTree), fixing false diffs; updates diff logic and tests. > > - **Core Infrastructure (`apps/framework-cli/src/framework/core/infrastructure/table.rs`)**: > - Add `Table::order_by_with_fallback()` to derive ORDER BY from primary keys when empty for MergeTree engines. > - Update `Table::order_by_equals()` to compare using fallback. > - **Infra Map (`apps/framework-cli/src/framework/core/infrastructure_map.rs`)**: > - Replace ad-hoc ORDER BY diff logic with `table.order_by_equals(target_table)`. > - `InfrastructureMap::normalize()` now sets `table.order_by` via `order_by_with_fallback()` when empty. > - **Tests**: > - Add test for ORDER BY equality with implicit primary key and for non-MergeTree (S3) behavior. > - Adjust existing diff paths to rely on new equality/normalization. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit eaa16da. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY -->
1 parent ef7eafc commit 491bc61

File tree

2 files changed

+164
-78
lines changed

2 files changed

+164
-78
lines changed

apps/framework-cli/src/framework/core/infrastructure/table.rs

Lines changed: 159 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -402,18 +402,27 @@ impl Table {
402402
.collect()
403403
}
404404

405+
pub fn order_by_with_fallback(&self) -> OrderBy {
406+
// table (in infra map created by older version of moose) may leave order_by unspecified,
407+
// but the implicit order_by from primary keys can be the same
408+
// ONLY for the MergeTree family
409+
// S3 supports ORDER BY but does not auto set ORDER BY from PRIMARY KEY
410+
// Buffer, S3Queue, and Distributed don't support ORDER BY
411+
if self.order_by.is_empty() && self.engine.is_merge_tree_family() {
412+
OrderBy::Fields(
413+
self.primary_key_columns()
414+
.iter()
415+
.map(|c| c.to_string())
416+
.collect(),
417+
)
418+
} else {
419+
self.order_by.clone()
420+
}
421+
}
422+
405423
pub fn order_by_equals(&self, target: &Table) -> bool {
406424
self.order_by == target.order_by
407-
// target may leave order_by unspecified,
408-
// but the implicit order_by from primary keys can be the same
409-
// ONLY for engines that support ORDER BY (MergeTree family and S3)
410-
// Buffer, S3Queue, and Distributed don't support ORDER BY
411-
|| (target.order_by.is_empty()
412-
&& target.engine.supports_order_by()
413-
&& matches!(
414-
&self.order_by,
415-
OrderBy::Fields(v) if v.iter().map(String::as_str).collect::<Vec<_>>() == target.primary_key_columns()
416-
))
425+
|| self.order_by_with_fallback() == target.order_by_with_fallback()
417426
}
418427

419428
pub fn to_proto(&self) -> ProtoTable {
@@ -1712,4 +1721,144 @@ mod tests {
17121721
};
17131722
assert_eq!(table7.id(DEFAULT_DATABASE_NAME), "local_users_1_0");
17141723
}
1724+
1725+
#[test]
1726+
fn test_order_by_equals_with_implicit_primary_key() {
1727+
use crate::framework::core::infrastructure_map::PrimitiveTypes;
1728+
1729+
// Test case: actual table has empty order_by (implicit primary key),
1730+
// target table has explicit order_by that matches the primary key.
1731+
// This should be considered equal for MergeTree engines.
1732+
1733+
let columns = vec![
1734+
Column {
1735+
name: "id".to_string(),
1736+
data_type: ColumnType::String,
1737+
required: true,
1738+
unique: false,
1739+
primary_key: true,
1740+
default: None,
1741+
annotations: vec![],
1742+
comment: None,
1743+
ttl: None,
1744+
},
1745+
Column {
1746+
name: "name".to_string(),
1747+
data_type: ColumnType::String,
1748+
required: true,
1749+
unique: false,
1750+
primary_key: false,
1751+
default: None,
1752+
annotations: vec![],
1753+
comment: None,
1754+
ttl: None,
1755+
},
1756+
];
1757+
1758+
// Actual table from database: empty order_by (implicitly uses primary key)
1759+
let actual_table = Table {
1760+
name: "test_table".to_string(),
1761+
columns: columns.clone(),
1762+
order_by: OrderBy::Fields(vec![]), // Empty - will fall back to primary key
1763+
partition_by: None,
1764+
sample_by: None,
1765+
engine: ClickhouseEngine::MergeTree,
1766+
version: None,
1767+
source_primitive: PrimitiveSignature {
1768+
name: "test".to_string(),
1769+
primitive_type: PrimitiveTypes::DataModel,
1770+
},
1771+
metadata: None,
1772+
life_cycle: LifeCycle::FullyManaged,
1773+
engine_params_hash: None,
1774+
table_settings: None,
1775+
indexes: vec![],
1776+
database: None,
1777+
table_ttl_setting: None,
1778+
cluster_name: None,
1779+
};
1780+
1781+
// Target table from code: explicit order_by that matches primary key
1782+
let target_table = Table {
1783+
name: "test_table".to_string(),
1784+
columns: columns.clone(),
1785+
order_by: OrderBy::Fields(vec!["id".to_string()]), // Explicit order_by
1786+
partition_by: None,
1787+
sample_by: None,
1788+
engine: ClickhouseEngine::MergeTree,
1789+
version: None,
1790+
source_primitive: PrimitiveSignature {
1791+
name: "test".to_string(),
1792+
primitive_type: PrimitiveTypes::DataModel,
1793+
},
1794+
metadata: None,
1795+
life_cycle: LifeCycle::FullyManaged,
1796+
engine_params_hash: None,
1797+
table_settings: None,
1798+
indexes: vec![],
1799+
database: None,
1800+
table_ttl_setting: None,
1801+
cluster_name: None,
1802+
};
1803+
1804+
// These should be equal because:
1805+
// - actual_table has empty order_by but MergeTree engine
1806+
// - actual_table.order_by_with_fallback() returns ["id"] (from primary key)
1807+
// - target_table.order_by is ["id"]
1808+
// - target_table.order_by_with_fallback() returns ["id"]
1809+
// - ["id"] == ["id"]
1810+
assert!(
1811+
actual_table.order_by_equals(&target_table),
1812+
"actual table with empty order_by should equal target with explicit primary key order_by"
1813+
);
1814+
1815+
// Reverse direction should also work
1816+
assert!(
1817+
target_table.order_by_equals(&actual_table),
1818+
"target table with explicit primary key order_by should equal actual with empty order_by"
1819+
);
1820+
1821+
// Test with different order_by - should NOT be equal
1822+
let different_target = Table {
1823+
order_by: OrderBy::Fields(vec!["name".to_string()]),
1824+
..target_table.clone()
1825+
};
1826+
assert!(
1827+
!actual_table.order_by_equals(&different_target),
1828+
"tables with different order_by should not be equal"
1829+
);
1830+
1831+
// Test with non-MergeTree engine (S3) - empty order_by should stay empty
1832+
let actual_s3 = Table {
1833+
engine: ClickhouseEngine::S3 {
1834+
path: "s3://bucket/path".to_string(),
1835+
format: "Parquet".to_string(),
1836+
aws_access_key_id: None,
1837+
aws_secret_access_key: None,
1838+
compression: None,
1839+
partition_strategy: None,
1840+
partition_columns_in_data_file: None,
1841+
},
1842+
..actual_table.clone()
1843+
};
1844+
1845+
let target_s3 = Table {
1846+
engine: ClickhouseEngine::S3 {
1847+
path: "s3://bucket/path".to_string(),
1848+
format: "Parquet".to_string(),
1849+
aws_access_key_id: None,
1850+
aws_secret_access_key: None,
1851+
compression: None,
1852+
partition_strategy: None,
1853+
partition_columns_in_data_file: None,
1854+
},
1855+
..target_table.clone()
1856+
};
1857+
1858+
// For S3 engine, empty order_by doesn't fall back to primary key
1859+
assert!(
1860+
!actual_s3.order_by_equals(&target_s3),
1861+
"S3 engine should not infer order_by from primary key"
1862+
);
1863+
}
17151864
}

apps/framework-cli/src/framework/core/infrastructure_map.rs

Lines changed: 5 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,34 +1826,7 @@ impl InfrastructureMap {
18261826
before: normalized_table.partition_by.clone(),
18271827
after: normalized_target.partition_by.clone(),
18281828
};
1829-
1830-
// Compute ORDER BY changes
1831-
fn order_by_from_primary_key(target_table: &Table) -> Vec<String> {
1832-
target_table
1833-
.columns
1834-
.iter()
1835-
.filter_map(|c| {
1836-
if c.primary_key {
1837-
Some(c.name.clone())
1838-
} else {
1839-
None
1840-
}
1841-
})
1842-
.collect()
1843-
}
1844-
1845-
let order_by_changed = table.order_by != target_table.order_by
1846-
// target may leave order_by unspecified,
1847-
// but the implicit order_by from primary keys can be the same
1848-
// ONLY for engines that support ORDER BY (MergeTree family and S3)
1849-
// Buffer, S3Queue, and Distributed don't support ORDER BY
1850-
&& !(target_table.order_by.is_empty()
1851-
&& target_table.engine.supports_order_by()
1852-
&& matches!(
1853-
&table.order_by,
1854-
OrderBy::Fields(v)
1855-
if *v == order_by_from_primary_key(target_table)
1856-
));
1829+
let order_by_changed = !table.order_by_equals(target_table);
18571830

18581831
// Detect engine change (e.g., MergeTree -> ReplacingMergeTree)
18591832
let engine_changed = table.engine != target_table.engine;
@@ -2130,32 +2103,7 @@ impl InfrastructureMap {
21302103
}
21312104
}
21322105

2133-
fn order_by_from_primary_key(target_table: &Table) -> Vec<String> {
2134-
target_table
2135-
.columns
2136-
.iter()
2137-
.filter_map(|c| {
2138-
if c.primary_key {
2139-
Some(c.name.clone())
2140-
} else {
2141-
None
2142-
}
2143-
})
2144-
.collect()
2145-
}
2146-
2147-
let order_by_changed = table.order_by != target_table.order_by
2148-
// target may leave order_by unspecified,
2149-
// but the implicit order_by from primary keys can be the same
2150-
// ONLY for engines that support ORDER BY (MergeTree family and S3)
2151-
// Buffer, S3Queue, and Distributed don't support ORDER BY
2152-
&& !(target_table.order_by.is_empty()
2153-
&& target_table.engine.supports_order_by()
2154-
&& matches!(
2155-
&table.order_by,
2156-
crate::framework::core::infrastructure::table::OrderBy::Fields(v)
2157-
if *v == order_by_from_primary_key(target_table)
2158-
));
2106+
let order_by_changed = !table.order_by_equals(target_table);
21592107

21602108
let order_by_change = if order_by_changed {
21612109
OrderByChange {
@@ -2578,7 +2526,7 @@ impl InfrastructureMap {
25782526
/// This is needed because older CLI versions didn't persist order_by when it was
25792527
/// derived from primary key columns.
25802528
pub fn normalize(mut self) -> Self {
2581-
use crate::framework::core::infrastructure::table::{ColumnType, OrderBy};
2529+
use crate::framework::core::infrastructure::table::ColumnType;
25822530

25832531
self.tables = self
25842532
.tables
@@ -2587,19 +2535,8 @@ impl InfrastructureMap {
25872535
// Fall back to primary key columns if order_by is empty for MergeTree engines
25882536
// This ensures backward compatibility when order_by isn't explicitly set
25892537
// We only do this for MergeTree family to avoid breaking S3 tables
2590-
if table.order_by.is_empty() && table.engine.is_merge_tree_family() {
2591-
let primary_key_columns: Vec<String> = table
2592-
.columns
2593-
.iter()
2594-
.filter_map(|c| {
2595-
if c.primary_key {
2596-
Some(c.name.clone())
2597-
} else {
2598-
None
2599-
}
2600-
})
2601-
.collect();
2602-
table.order_by = OrderBy::Fields(primary_key_columns);
2538+
if table.order_by.is_empty() {
2539+
table.order_by = table.order_by_with_fallback();
26032540
}
26042541

26052542
// Normalize columns: ClickHouse doesn't support Nullable(Array(...))

0 commit comments

Comments
 (0)