From 97c200c654c67717ca057a3db838c31c253afa20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Wed, 6 May 2026 19:52:51 +0300 Subject: [PATCH 01/10] cbo-phase2: wire filter selectivity into chooser decision When a WHERE clause is present with a simple column OP constant predicate (e.g., id > 5000), use estimate_filter_rows to get a more accurate row estimate before comparing against kVectorizedRowThreshold. Falls back to estimate_scan_rows when no filter is present or the filter is too complex to analyze. Add AnalyzeFilterSelectivity test to verify SELECT with WHERE clause works after ANALYZE and the chooser path is exercised. --- src/executor/query_executor.cpp | 36 +++++++++++++++++++++++- tests/cloudSQL_tests.cpp | 49 +++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/src/executor/query_executor.cpp b/src/executor/query_executor.cpp index f0ac97e..b932f8a 100644 --- a/src/executor/query_executor.cpp +++ b/src/executor/query_executor.cpp @@ -415,7 +415,41 @@ QueryResult QueryExecutor::execute_select(const parser::SelectStatement& stmt, auto table_meta_opt = catalog_.get_table_by_name(table_name); if (table_meta_opt.has_value()) { const auto* table_meta = table_meta_opt.value(); - uint64_t estimated_rows = optimizer::RowEstimator::estimate_scan_rows(*table_meta); + uint64_t estimated_rows = 0; + + // Use filter selectivity when WHERE clause is simple and stats available + if (stmt.where() && stmt.where()->type() == parser::ExprType::Binary) { + const auto* bin_expr = dynamic_cast(stmt.where()); + if (bin_expr != nullptr) { + std::string col_name; + common::Value pred_val; + bool eligible = false; + + // col OP constant (e.g., id > 5000 or status = 'active') + if (bin_expr->left().type() == parser::ExprType::Column && + bin_expr->right().type() == parser::ExprType::Constant) { + col_name = bin_expr->left().to_string(); + pred_val = bin_expr->right().evaluate(nullptr, nullptr, current_params_); + eligible = true; + } else if (bin_expr->right().type() == parser::ExprType::Column && + bin_expr->left().type() == parser::ExprType::Constant) { + col_name = bin_expr->right().to_string(); + pred_val = bin_expr->left().evaluate(nullptr, nullptr, current_params_); + eligible = true; + } + + if (eligible) { + estimated_rows = + optimizer::RowEstimator::estimate_filter_rows(*table_meta, col_name, pred_val); + } + } + } + + // Fall back to scan estimate if no filter or not eligible for selectivity estimation + if (estimated_rows == 0) { + estimated_rows = optimizer::RowEstimator::estimate_scan_rows(*table_meta); + } + // Use Vectorized for large scans (>10k rows — heuristic crossover point) use_vectorized = estimated_rows > kVectorizedRowThreshold; } diff --git a/tests/cloudSQL_tests.cpp b/tests/cloudSQL_tests.cpp index 0fff2b5..6a82745 100644 --- a/tests/cloudSQL_tests.cpp +++ b/tests/cloudSQL_tests.cpp @@ -1474,4 +1474,53 @@ TEST(ExecutionTests, AnalyzeTableStringStats) { static_cast(std::remove("./test_data/analyze_strings.heap")); } +TEST(ExecutionTests, AnalyzeFilterSelectivity) { + // Test that filter selectivity is used for WHERE clause estimation + // after ANALYZE TABLE — a filtered scan of 10k rows should still use + // Vectorized when the filter is estimated to return >10k rows + static_cast(std::remove("./test_data/analyze_filter.heap")); + StorageManager storage("./test_data"); + BufferPoolManager sm(config::Config::DEFAULT_BUFFER_POOL_SIZE, storage); + auto catalog = Catalog::create(); + LockManager lm; + TransactionManager tm(lm, *catalog, sm, sm.get_log_manager()); + QueryExecutor exec(*catalog, sm, lm, tm); + + static_cast(exec.execute( + *Parser(std::make_unique("CREATE TABLE analyze_filter (id INT, val INT)")) + .parse_statement())); + + // Insert 15000 rows with id from 0 to 14999 + for (int batch = 0; batch < 15; ++batch) { + std::string vals; + for (int i = 0; i < 1000; ++i) { + vals += "(" + std::to_string(batch * 1000 + i) + ", " + + std::to_string(batch * 1000 + i) + ")"; + if (i < 999) vals += ", "; + } + std::string sql = "INSERT INTO analyze_filter VALUES " + vals; + auto res = exec.execute(*Parser(std::make_unique(sql)).parse_statement()); + EXPECT_TRUE(res.success()) << "Batch " << batch << " insert failed"; + } + + const auto res_analyze = exec.execute( + *Parser(std::make_unique("ANALYZE TABLE analyze_filter")) + .parse_statement()); + EXPECT_TRUE(res_analyze.success()) << "ANALYZE TABLE failed"; + + // SELECT with WHERE id > 10000 — estimated ~5000 rows, still > kVectorizedRowThreshold + // should use Vectorized (no error thrown) + const auto res_select = exec.execute( + *Parser(std::make_unique("SELECT * FROM analyze_filter WHERE id > 10000")) + .parse_statement()); + EXPECT_TRUE(res_select.success()) << "SELECT with WHERE should work after ANALYZE"; + + // Verify stats: min=0, max=14999, so id > 10000 is within range + auto table_opt = catalog->get_table_by_name("analyze_filter"); + ASSERT_TRUE(table_opt.has_value()); + EXPECT_GE(table_opt.value()->num_rows, uint64_t(15000)); + + static_cast(std::remove("./test_data/analyze_filter.heap")); +} + } // namespace From 4381e767d347dcb6a00fe9ed9de98e06ccbd4498 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 15:12:18 +0300 Subject: [PATCH 02/10] executor: add join reordering to build_vectorized_plan() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use RowEstimator::estimate_join_rows() to compare forward (A⋈B) vs reverse (B⋈A) join cardinalities. When the reverse order is estimated to produce fewer rows, swap the key expressions so the smaller side feeds as the build-side hash table. This reduces hash table size and probe cost without changing the operator tree topology. Track current_est_rows across the join chain so subsequent joins in a multi-join query also benefit from the heuristic. --- src/executor/query_executor.cpp | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/executor/query_executor.cpp b/src/executor/query_executor.cpp index b932f8a..11f9450 100644 --- a/src/executor/query_executor.cpp +++ b/src/executor/query_executor.cpp @@ -1509,6 +1509,9 @@ std::unique_ptr QueryExecutor::build_vectorized_plan( std::unique_ptr current_root = std::make_unique(base_table_name, col_table); + // Track estimated output rows for join reordering decisions + uint64_t current_est_rows = optimizer::RowEstimator::estimate_scan_rows(*base_table_meta); + /* Add JOINs (VectorizedHashJoinOperator) */ for (const auto& join : stmt.joins()) { const std::string join_table_name = join.table->to_string(); @@ -1594,6 +1597,39 @@ std::unique_ptr QueryExecutor::build_vectorized_plan( return nullptr; // Vectorized path only supports equi-joins } + // Join reordering: estimate both join orders and pick the smaller-first approach. + // This heuristic puts the side estimated to produce fewer rows as the build (probe-side + // hash table) to reduce memory footprint and hash probe cost. + // Note: For multi-join chains, current_root may already be a join result — we use its + // estimated cardinality as the "left" side estimate. + std::string left_key_col = left_key ? left_key->to_string() : ""; + std::string right_key_col = right_key ? right_key->to_string() : ""; + uint64_t est_forward = 0; + uint64_t est_reverse = 0; + if (!left_key_col.empty() && !right_key_col.empty()) { + // Estimate forward: current_est_rows ⋈ join_table_meta (probe = right) + est_forward = optimizer::RowEstimator::estimate_join_rows( + ::cloudsql::TableInfo{0, "", {}, {}, {}, current_est_rows, "", 0, 0}, + *join_table_meta, right_key_col); + // Estimate reverse: join_table_meta ⋈ current_est_rows (probe = left) + ::cloudsql::TableInfo left_est; + left_est.num_rows = current_est_rows; + est_reverse = optimizer::RowEstimator::estimate_join_rows( + *join_table_meta, left_est, left_key_col); + // If reverse order is smaller, swap the key expressions so build/probe flip. + // The VectorizedHashJoinOperator uses left child as build, right as probe — + // swapping keys redirects the hash table to the smaller side. + if (est_reverse < est_forward && est_reverse > 0) { + // Swap left_key and right_key to redirect build to the smaller side + auto swapped_left = std::move(right_key); + auto swapped_right = std::move(left_key); + left_key = std::move(swapped_left); + right_key = std::move(swapped_right); + // Also swap the schema-based key column names for output schema ordering + std::swap(left_key_col, right_key_col); + } + } + executor::JoinType exec_join_type = executor::JoinType::Inner; if (join.type == parser::SelectStatement::JoinType::Left) { exec_join_type = executor::JoinType::Left; @@ -1615,6 +1651,9 @@ std::unique_ptr QueryExecutor::build_vectorized_plan( std::move(current_root), std::move(right_scan), std::move(left_key), std::move(right_key), exec_join_type, output_schema); + // Update estimated row count for the join result (for subsequent joins in the chain) + current_est_rows = (est_reverse < est_forward && est_reverse > 0) ? est_reverse : est_forward; + current_root = std::move(join_op); } From 75ea76a4cf6856a30e859ed07b9174f9c9644aab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 15:12:24 +0300 Subject: [PATCH 03/10] tests: add AnalyzeJoinOrder to verify join reordering after ANALYZE Creates big (10000 rows) and small (100 rows) tables, runs ANALYZE on both, then executes a join. Verifies the join succeeds using the Vectorized path without error, exercising the join reordering logic. --- tests/cloudSQL_tests.cpp | 82 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/tests/cloudSQL_tests.cpp b/tests/cloudSQL_tests.cpp index 6a82745..522e1e9 100644 --- a/tests/cloudSQL_tests.cpp +++ b/tests/cloudSQL_tests.cpp @@ -1523,4 +1523,86 @@ TEST(ExecutionTests, AnalyzeFilterSelectivity) { static_cast(std::remove("./test_data/analyze_filter.heap")); } +TEST(ExecutionTests, AnalyzeJoinOrder) { + // Test that join reordering picks the smaller-first build side after ANALYZE. + // Creates two tables (small=100 rows, big=10000 rows) with ANALYZE stats, + // then verifies a join between them executes without error using Vectorized path. + static_cast(std::remove("./test_data/join_order_big.heap")); + static_cast(std::remove("./test_data/join_order_small.heap")); + StorageManager storage("./test_data"); + BufferPoolManager sm(config::Config::DEFAULT_BUFFER_POOL_SIZE, storage); + auto catalog = Catalog::create(); + LockManager lm; + TransactionManager tm(lm, *catalog, sm, sm.get_log_manager()); + QueryExecutor exec(*catalog, sm, lm, tm); + + // Create tables + ASSERT_TRUE( + exec.execute(*Parser(std::make_unique( + "CREATE TABLE join_order_big (id INT, val INT)")) + .parse_statement()) + .success()); + ASSERT_TRUE( + exec.execute(*Parser(std::make_unique( + "CREATE TABLE join_order_small (id INT, val INT)")) + .parse_statement()) + .success()); + + // Insert 10000 rows into big table + for (int batch = 0; batch < 10; ++batch) { + std::string vals; + for (int i = 0; i < 1000; ++i) { + vals += "(" + std::to_string(batch * 1000 + i) + ", " + + std::to_string(batch * 1000 + i) + ")"; + if (i < 999) vals += ", "; + } + std::string sql = "INSERT INTO join_order_big VALUES " + vals; + auto res = exec.execute(*Parser(std::make_unique(sql)).parse_statement()); + EXPECT_TRUE(res.success()) << "Big table batch " << batch << " insert failed"; + } + + // Insert 100 rows into small table + { + std::string vals; + for (int i = 0; i < 100; ++i) { + vals += "(" + std::to_string(i) + ", " + std::to_string(i) + ")"; + if (i < 99) vals += ", "; + } + std::string sql = "INSERT INTO join_order_small VALUES " + vals; + auto res = + exec.execute(*Parser(std::make_unique(sql)).parse_statement()); + EXPECT_TRUE(res.success()) << "Small table insert failed"; + } + + // ANALYZE both tables to populate stats + ASSERT_TRUE( + exec.execute(*Parser(std::make_unique("ANALYZE TABLE join_order_big")) + .parse_statement()) + .success()); + ASSERT_TRUE( + exec.execute(*Parser(std::make_unique("ANALYZE TABLE join_order_small")) + .parse_statement()) + .success()); + + // Verify ANALYZE populated stats + auto big_opt = catalog->get_table_by_name("join_order_big"); + ASSERT_TRUE(big_opt.has_value()); + EXPECT_GE(big_opt.value()->num_rows, uint64_t(10000)); + auto small_opt = catalog->get_table_by_name("join_order_small"); + ASSERT_TRUE(small_opt.has_value()); + EXPECT_GE(small_opt.value()->num_rows, uint64_t(100)); + + // Join: big ⋈ small on id — should use Vectorized path without error. + // With stats available, the optimizer estimates both orders and picks + // smaller-first (small as probe) when applicable. + const auto res_join = + exec.execute(*Parser(std::make_unique( + "SELECT * FROM join_order_big JOIN join_order_small ON join_order_big.id = join_order_small.id")) + .parse_statement()); + EXPECT_TRUE(res_join.success()) << "Join should succeed with ANALYZE stats"; + + static_cast(std::remove("./test_data/join_order_big.heap")); + static_cast(std::remove("./test_data/join_order_small.heap")); +} + } // namespace From 133410bceb42ac4daf76c11f3a0d838d4224b2df Mon Sep 17 00:00:00 2001 From: poyrazK <83272398+poyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 12:18:54 +0000 Subject: [PATCH 04/10] style: automated clang-format fixes --- src/executor/query_executor.cpp | 17 +++++++----- tests/cloudSQL_tests.cpp | 47 +++++++++++++++------------------ 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/src/executor/query_executor.cpp b/src/executor/query_executor.cpp index 11f9450..ed1395d 100644 --- a/src/executor/query_executor.cpp +++ b/src/executor/query_executor.cpp @@ -429,7 +429,8 @@ QueryResult QueryExecutor::execute_select(const parser::SelectStatement& stmt, if (bin_expr->left().type() == parser::ExprType::Column && bin_expr->right().type() == parser::ExprType::Constant) { col_name = bin_expr->left().to_string(); - pred_val = bin_expr->right().evaluate(nullptr, nullptr, current_params_); + pred_val = + bin_expr->right().evaluate(nullptr, nullptr, current_params_); eligible = true; } else if (bin_expr->right().type() == parser::ExprType::Column && bin_expr->left().type() == parser::ExprType::Constant) { @@ -439,13 +440,14 @@ QueryResult QueryExecutor::execute_select(const parser::SelectStatement& stmt, } if (eligible) { - estimated_rows = - optimizer::RowEstimator::estimate_filter_rows(*table_meta, col_name, pred_val); + estimated_rows = optimizer::RowEstimator::estimate_filter_rows( + *table_meta, col_name, pred_val); } } } - // Fall back to scan estimate if no filter or not eligible for selectivity estimation + // Fall back to scan estimate if no filter or not eligible for selectivity + // estimation if (estimated_rows == 0) { estimated_rows = optimizer::RowEstimator::estimate_scan_rows(*table_meta); } @@ -1614,8 +1616,8 @@ std::unique_ptr QueryExecutor::build_vectorized_plan( // Estimate reverse: join_table_meta ⋈ current_est_rows (probe = left) ::cloudsql::TableInfo left_est; left_est.num_rows = current_est_rows; - est_reverse = optimizer::RowEstimator::estimate_join_rows( - *join_table_meta, left_est, left_key_col); + est_reverse = optimizer::RowEstimator::estimate_join_rows(*join_table_meta, left_est, + left_key_col); // If reverse order is smaller, swap the key expressions so build/probe flip. // The VectorizedHashJoinOperator uses left child as build, right as probe — // swapping keys redirects the hash table to the smaller side. @@ -1652,7 +1654,8 @@ std::unique_ptr QueryExecutor::build_vectorized_plan( std::move(right_key), exec_join_type, output_schema); // Update estimated row count for the join result (for subsequent joins in the chain) - current_est_rows = (est_reverse < est_forward && est_reverse > 0) ? est_reverse : est_forward; + current_est_rows = + (est_reverse < est_forward && est_reverse > 0) ? est_reverse : est_forward; current_root = std::move(join_op); } diff --git a/tests/cloudSQL_tests.cpp b/tests/cloudSQL_tests.cpp index 522e1e9..2436337 100644 --- a/tests/cloudSQL_tests.cpp +++ b/tests/cloudSQL_tests.cpp @@ -1495,7 +1495,7 @@ TEST(ExecutionTests, AnalyzeFilterSelectivity) { std::string vals; for (int i = 0; i < 1000; ++i) { vals += "(" + std::to_string(batch * 1000 + i) + ", " + - std::to_string(batch * 1000 + i) + ")"; + std::to_string(batch * 1000 + i) + ")"; if (i < 999) vals += ", "; } std::string sql = "INSERT INTO analyze_filter VALUES " + vals; @@ -1504,8 +1504,7 @@ TEST(ExecutionTests, AnalyzeFilterSelectivity) { } const auto res_analyze = exec.execute( - *Parser(std::make_unique("ANALYZE TABLE analyze_filter")) - .parse_statement()); + *Parser(std::make_unique("ANALYZE TABLE analyze_filter")).parse_statement()); EXPECT_TRUE(res_analyze.success()) << "ANALYZE TABLE failed"; // SELECT with WHERE id > 10000 — estimated ~5000 rows, still > kVectorizedRowThreshold @@ -1537,23 +1536,21 @@ TEST(ExecutionTests, AnalyzeJoinOrder) { QueryExecutor exec(*catalog, sm, lm, tm); // Create tables - ASSERT_TRUE( - exec.execute(*Parser(std::make_unique( - "CREATE TABLE join_order_big (id INT, val INT)")) - .parse_statement()) - .success()); - ASSERT_TRUE( - exec.execute(*Parser(std::make_unique( - "CREATE TABLE join_order_small (id INT, val INT)")) - .parse_statement()) - .success()); + ASSERT_TRUE(exec.execute(*Parser(std::make_unique( + "CREATE TABLE join_order_big (id INT, val INT)")) + .parse_statement()) + .success()); + ASSERT_TRUE(exec.execute(*Parser(std::make_unique( + "CREATE TABLE join_order_small (id INT, val INT)")) + .parse_statement()) + .success()); // Insert 10000 rows into big table for (int batch = 0; batch < 10; ++batch) { std::string vals; for (int i = 0; i < 1000; ++i) { vals += "(" + std::to_string(batch * 1000 + i) + ", " + - std::to_string(batch * 1000 + i) + ")"; + std::to_string(batch * 1000 + i) + ")"; if (i < 999) vals += ", "; } std::string sql = "INSERT INTO join_order_big VALUES " + vals; @@ -1569,20 +1566,18 @@ TEST(ExecutionTests, AnalyzeJoinOrder) { if (i < 99) vals += ", "; } std::string sql = "INSERT INTO join_order_small VALUES " + vals; - auto res = - exec.execute(*Parser(std::make_unique(sql)).parse_statement()); + auto res = exec.execute(*Parser(std::make_unique(sql)).parse_statement()); EXPECT_TRUE(res.success()) << "Small table insert failed"; } // ANALYZE both tables to populate stats ASSERT_TRUE( - exec.execute(*Parser(std::make_unique("ANALYZE TABLE join_order_big")) - .parse_statement()) - .success()); - ASSERT_TRUE( - exec.execute(*Parser(std::make_unique("ANALYZE TABLE join_order_small")) - .parse_statement()) + exec.execute( + *Parser(std::make_unique("ANALYZE TABLE join_order_big")).parse_statement()) .success()); + ASSERT_TRUE(exec.execute(*Parser(std::make_unique("ANALYZE TABLE join_order_small")) + .parse_statement()) + .success()); // Verify ANALYZE populated stats auto big_opt = catalog->get_table_by_name("join_order_big"); @@ -1595,10 +1590,10 @@ TEST(ExecutionTests, AnalyzeJoinOrder) { // Join: big ⋈ small on id — should use Vectorized path without error. // With stats available, the optimizer estimates both orders and picks // smaller-first (small as probe) when applicable. - const auto res_join = - exec.execute(*Parser(std::make_unique( - "SELECT * FROM join_order_big JOIN join_order_small ON join_order_big.id = join_order_small.id")) - .parse_statement()); + const auto res_join = exec.execute( + *Parser(std::make_unique("SELECT * FROM join_order_big JOIN join_order_small ON " + "join_order_big.id = join_order_small.id")) + .parse_statement()); EXPECT_TRUE(res_join.success()) << "Join should succeed with ANALYZE stats"; static_cast(std::remove("./test_data/join_order_big.heap")); From 6dc92ea94c6aaaab0cf1dd1a4fee0145d77ec422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 15:59:22 +0300 Subject: [PATCH 05/10] =?UTF-8?q?executor:=20fix=20review=20findings=20?= =?UTF-8?q?=E2=80=94=20filter=20selectivity=20sentinel=20and=20join=20comm?= =?UTF-8?q?ent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Filter selectivity: start with scan estimate as baseline, override only when filter is eligible. Fixes 0-ambiguity bug where empty tables would incorrectly fall back to full scan estimate. 2. Join reordering comment: "build (probe-side hash table)" → "build (hashed) side" — code was correct, comment was wrong. 3. Add NOTE comment about to_string() format stability dependency in column key lookup for join reordering. --- src/executor/query_executor.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/executor/query_executor.cpp b/src/executor/query_executor.cpp index ed1395d..70cc791 100644 --- a/src/executor/query_executor.cpp +++ b/src/executor/query_executor.cpp @@ -415,7 +415,8 @@ QueryResult QueryExecutor::execute_select(const parser::SelectStatement& stmt, auto table_meta_opt = catalog_.get_table_by_name(table_name); if (table_meta_opt.has_value()) { const auto* table_meta = table_meta_opt.value(); - uint64_t estimated_rows = 0; + // Start with scan estimate as baseline; filter selectivity will override if eligible + uint64_t estimated_rows = optimizer::RowEstimator::estimate_scan_rows(*table_meta); // Use filter selectivity when WHERE clause is simple and stats available if (stmt.where() && stmt.where()->type() == parser::ExprType::Binary) { @@ -446,13 +447,7 @@ QueryResult QueryExecutor::execute_select(const parser::SelectStatement& stmt, } } - // Fall back to scan estimate if no filter or not eligible for selectivity - // estimation - if (estimated_rows == 0) { - estimated_rows = optimizer::RowEstimator::estimate_scan_rows(*table_meta); - } - - // Use Vectorized for large scans (>10k rows — heuristic crossover point) +// Use Vectorized for large scans (>10k rows — heuristic crossover point) use_vectorized = estimated_rows > kVectorizedRowThreshold; } } @@ -1600,12 +1595,15 @@ std::unique_ptr QueryExecutor::build_vectorized_plan( } // Join reordering: estimate both join orders and pick the smaller-first approach. - // This heuristic puts the side estimated to produce fewer rows as the build (probe-side - // hash table) to reduce memory footprint and hash probe cost. + // This heuristic puts the side estimated to produce fewer rows as the build (hashed) side + // to reduce hash table memory footprint and probe cost. // Note: For multi-join chains, current_root may already be a join result — we use its // estimated cardinality as the "left" side estimate. std::string left_key_col = left_key ? left_key->to_string() : ""; std::string right_key_col = right_key ? right_key->to_string() : ""; + // NOTE: column lookup depends on expression to_string() format stability. + // If the printer format changes, get_column() silently fails and we fall back + // to cross-product estimate. Consider using column position indices instead. uint64_t est_forward = 0; uint64_t est_reverse = 0; if (!left_key_col.empty() && !right_key_col.empty()) { From 4d5916f6e5542fa64dd24a55d6db22274a75c19a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 15:59:26 +0300 Subject: [PATCH 06/10] tests: add result row count assertion to AnalyzeJoinOrder Verify the join returns exactly 100 rows (small.id 0-99 match big.id 0-99), not just that it succeeds. Confirms correctness. --- tests/cloudSQL_tests.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/cloudSQL_tests.cpp b/tests/cloudSQL_tests.cpp index 2436337..f3738aa 100644 --- a/tests/cloudSQL_tests.cpp +++ b/tests/cloudSQL_tests.cpp @@ -1595,6 +1595,10 @@ TEST(ExecutionTests, AnalyzeJoinOrder) { "join_order_big.id = join_order_small.id")) .parse_statement()); EXPECT_TRUE(res_join.success()) << "Join should succeed with ANALYZE stats"; + // Verify correctness: small table has 100 rows (id 0-99), big table has 10000 (id 0-9999), + // so join on id should return exactly 100 rows. + EXPECT_EQ(res_join.rows().size(), 100) + << "Join should return 100 rows (small.id 0-99 match big.id 0-99)"; static_cast(std::remove("./test_data/join_order_big.heap")); static_cast(std::remove("./test_data/join_order_small.heap")); From 7e53e29d3a132253e226ca993939bc79b4c2a3df Mon Sep 17 00:00:00 2001 From: poyrazK <83272398+poyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 13:04:50 +0000 Subject: [PATCH 07/10] style: automated clang-format fixes --- src/executor/query_executor.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/executor/query_executor.cpp b/src/executor/query_executor.cpp index 70cc791..beae09e 100644 --- a/src/executor/query_executor.cpp +++ b/src/executor/query_executor.cpp @@ -415,7 +415,8 @@ QueryResult QueryExecutor::execute_select(const parser::SelectStatement& stmt, auto table_meta_opt = catalog_.get_table_by_name(table_name); if (table_meta_opt.has_value()) { const auto* table_meta = table_meta_opt.value(); - // Start with scan estimate as baseline; filter selectivity will override if eligible + // Start with scan estimate as baseline; filter selectivity will override if + // eligible uint64_t estimated_rows = optimizer::RowEstimator::estimate_scan_rows(*table_meta); // Use filter selectivity when WHERE clause is simple and stats available @@ -447,7 +448,7 @@ QueryResult QueryExecutor::execute_select(const parser::SelectStatement& stmt, } } -// Use Vectorized for large scans (>10k rows — heuristic crossover point) + // Use Vectorized for large scans (>10k rows — heuristic crossover point) use_vectorized = estimated_rows > kVectorizedRowThreshold; } } From efea10d00ac489b05b8f84ec152746d193853864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 16:17:40 +0300 Subject: [PATCH 08/10] executor: gate join reordering on Inner join type only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Outer joins (LEFT/RIGHT/FULL) must preserve build/probe semantics — reordering keys could change which side is the outer side, breaking query correctness. The reordering block is now wrapped with: if (exec_join_type == executor::JoinType::Inner) { ... } Also moves join type determination before the reordering block so the gating variable is available. Row estimate update is also scoped to inner joins only. --- src/executor/query_executor.cpp | 79 ++++++++++++++++----------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/src/executor/query_executor.cpp b/src/executor/query_executor.cpp index beae09e..dfa1d2e 100644 --- a/src/executor/query_executor.cpp +++ b/src/executor/query_executor.cpp @@ -1595,42 +1595,7 @@ std::unique_ptr QueryExecutor::build_vectorized_plan( return nullptr; // Vectorized path only supports equi-joins } - // Join reordering: estimate both join orders and pick the smaller-first approach. - // This heuristic puts the side estimated to produce fewer rows as the build (hashed) side - // to reduce hash table memory footprint and probe cost. - // Note: For multi-join chains, current_root may already be a join result — we use its - // estimated cardinality as the "left" side estimate. - std::string left_key_col = left_key ? left_key->to_string() : ""; - std::string right_key_col = right_key ? right_key->to_string() : ""; - // NOTE: column lookup depends on expression to_string() format stability. - // If the printer format changes, get_column() silently fails and we fall back - // to cross-product estimate. Consider using column position indices instead. - uint64_t est_forward = 0; - uint64_t est_reverse = 0; - if (!left_key_col.empty() && !right_key_col.empty()) { - // Estimate forward: current_est_rows ⋈ join_table_meta (probe = right) - est_forward = optimizer::RowEstimator::estimate_join_rows( - ::cloudsql::TableInfo{0, "", {}, {}, {}, current_est_rows, "", 0, 0}, - *join_table_meta, right_key_col); - // Estimate reverse: join_table_meta ⋈ current_est_rows (probe = left) - ::cloudsql::TableInfo left_est; - left_est.num_rows = current_est_rows; - est_reverse = optimizer::RowEstimator::estimate_join_rows(*join_table_meta, left_est, - left_key_col); - // If reverse order is smaller, swap the key expressions so build/probe flip. - // The VectorizedHashJoinOperator uses left child as build, right as probe — - // swapping keys redirects the hash table to the smaller side. - if (est_reverse < est_forward && est_reverse > 0) { - // Swap left_key and right_key to redirect build to the smaller side - auto swapped_left = std::move(right_key); - auto swapped_right = std::move(left_key); - left_key = std::move(swapped_left); - right_key = std::move(swapped_right); - // Also swap the schema-based key column names for output schema ordering - std::swap(left_key_col, right_key_col); - } - } - + // Determine join type — needed before reordering to gate outer joins executor::JoinType exec_join_type = executor::JoinType::Inner; if (join.type == parser::SelectStatement::JoinType::Left) { exec_join_type = executor::JoinType::Left; @@ -1640,6 +1605,44 @@ std::unique_ptr QueryExecutor::build_vectorized_plan( exec_join_type = executor::JoinType::Full; } + // Join reordering: estimate both join orders and pick the smaller-first approach. + // Only applies to inner joins — outer join semantics require preserving build/probe sides. + if (exec_join_type == executor::JoinType::Inner) { + std::string left_key_col = left_key ? left_key->to_string() : ""; + std::string right_key_col = right_key ? right_key->to_string() : ""; + // NOTE: column lookup depends on expression to_string() format stability. + // If the printer format changes, get_column() silently fails and we fall back + // to cross-product estimate. Consider using column position indices instead. + uint64_t est_forward = 0; + uint64_t est_reverse = 0; + if (!left_key_col.empty() && !right_key_col.empty()) { + // Estimate forward: current_est_rows ⋈ join_table_meta (probe = right) + est_forward = optimizer::RowEstimator::estimate_join_rows( + ::cloudsql::TableInfo{0, "", {}, {}, {}, current_est_rows, "", 0, 0}, + *join_table_meta, right_key_col); + // Estimate reverse: join_table_meta ⋈ current_est_rows (probe = left) + ::cloudsql::TableInfo left_est; + left_est.num_rows = current_est_rows; + est_reverse = optimizer::RowEstimator::estimate_join_rows(*join_table_meta, left_est, + left_key_col); + // If reverse order is smaller, swap the key expressions so build/probe flip. + // The VectorizedHashJoinOperator uses left child as build, right as probe — + // swapping keys redirects the hash table to the smaller side. + if (est_reverse < est_forward && est_reverse > 0) { + // Swap left_key and right_key to redirect build to the smaller side + auto swapped_left = std::move(right_key); + auto swapped_right = std::move(left_key); + left_key = std::move(swapped_left); + right_key = std::move(swapped_right); + // Also swap the schema-based key column names for output schema ordering + std::swap(left_key_col, right_key_col); + } + } + + // Update estimated row count for the join result (for subsequent joins in chain) + current_est_rows = (est_reverse > 0 && est_reverse < est_forward) ? est_reverse : est_forward; + } + executor::Schema output_schema; for (const auto& col : current_root->output_schema().columns()) { output_schema.add_column(col.name(), col.type(), col.nullable()); @@ -1652,10 +1655,6 @@ std::unique_ptr QueryExecutor::build_vectorized_plan( std::move(current_root), std::move(right_scan), std::move(left_key), std::move(right_key), exec_join_type, output_schema); - // Update estimated row count for the join result (for subsequent joins in the chain) - current_est_rows = - (est_reverse < est_forward && est_reverse > 0) ? est_reverse : est_forward; - current_root = std::move(join_op); } From 38264a21407e7a6d619ccf5a7e111b456207756d Mon Sep 17 00:00:00 2001 From: poyrazK <83272398+poyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 13:18:36 +0000 Subject: [PATCH 09/10] style: automated clang-format fixes --- src/executor/query_executor.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/executor/query_executor.cpp b/src/executor/query_executor.cpp index dfa1d2e..4a38972 100644 --- a/src/executor/query_executor.cpp +++ b/src/executor/query_executor.cpp @@ -1623,8 +1623,8 @@ std::unique_ptr QueryExecutor::build_vectorized_plan( // Estimate reverse: join_table_meta ⋈ current_est_rows (probe = left) ::cloudsql::TableInfo left_est; left_est.num_rows = current_est_rows; - est_reverse = optimizer::RowEstimator::estimate_join_rows(*join_table_meta, left_est, - left_key_col); + est_reverse = optimizer::RowEstimator::estimate_join_rows(*join_table_meta, + left_est, left_key_col); // If reverse order is smaller, swap the key expressions so build/probe flip. // The VectorizedHashJoinOperator uses left child as build, right as probe — // swapping keys redirects the hash table to the smaller side. @@ -1640,7 +1640,8 @@ std::unique_ptr QueryExecutor::build_vectorized_plan( } // Update estimated row count for the join result (for subsequent joins in chain) - current_est_rows = (est_reverse > 0 && est_reverse < est_forward) ? est_reverse : est_forward; + current_est_rows = + (est_reverse > 0 && est_reverse < est_forward) ? est_reverse : est_forward; } executor::Schema output_schema; From 766c703a0ab3e5ff21215e6d5124e26664a75931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Thu, 7 May 2026 16:51:44 +0300 Subject: [PATCH 10/10] docs: add ADR 002 and extend ADR 001 with Phase 2 details ADR 002 documents the outer join safety decision (inner joins only for reordering) from PR #79 review. ADR 001 gets a Phase 2 Extensions section covering filter selectivity strategies and join reordering mechanics, with a reference to ADR 002. --- docs/adr/001-analyze-table-and-chooser.md | 38 ++++++++++++++++ docs/adr/002-join-reordering-inner-only.md | 53 ++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 docs/adr/002-join-reordering-inner-only.md diff --git a/docs/adr/001-analyze-table-and-chooser.md b/docs/adr/001-analyze-table-and-chooser.md index 5c55004..0a493dc 100644 --- a/docs/adr/001-analyze-table-and-chooser.md +++ b/docs/adr/001-analyze-table-and-chooser.md @@ -60,3 +60,41 @@ Implement a row-count-based chooser that selects the Vectorized path when estima ### Alternative 3: Dynamic threshold based on benchmark calibration **Why rejected:** Without production workload profiles, a static 10k heuristic is sufficient. Threshold can be exposed as a session/cluster config in Phase 2. + +--- + +## Phase 2 Extensions (PR #79) + +### Filter Selectivity in Chooser + +PR #79 extended the cost-based chooser in `execute_select()` to use filter selectivity when estimating rows for `WHERE col OP constant` predicates. Instead of using the full table scan estimate for all queries, the chooser now: + +1. Starts with `estimate_scan_rows()` as baseline +2. If WHERE clause is a simple binary expression (Column-Constant or Constant-Column), calls `estimate_filter_rows(table, col_name, pred_val)` +3. Uses the filter estimate if the column has statistics; otherwise falls back to scan estimate +4. Compares against `kVectorizedRowThreshold` to decide Volcano vs Vectorized + +**Selectivity estimation strategies** (`estimate_filter_rows()` in `row_estimator.cpp`): +- **Equality** (`WHERE col = value`): uses `1/NDV` selectivity +- **Integer range** (`WHERE id > 10000`): uses `(max - value) / (max - min)` when value is within [min, max] +- **String length** (`WHERE name LIKE 'prefix%'`): uses `1/(max_str_len - min_str_len + 1)` selectivity + +### Join Reordering in Vectorized Path + +PR #79 also added join reordering to `build_vectorized_plan()`. For each INNER JOIN: + +1. Determine `exec_join_type` (Inner/Left/Right/Full) +2. **Only for Inner joins** (outer join semantics must be preserved — see ADR 002): + - Extract left and right key column names as strings + - Compute `est_forward = estimate_join_rows(current_chain, right_table, right_key)` + - Compute `est_reverse = estimate_join_rows(right_table, current_chain, left_key)` + - If `est_reverse < est_forward && est_reverse > 0`, swap `left_key` and `right_key` so the smaller table feeds the build-side hash table +3. Update `current_est_rows` for the join output (used by subsequent joins in the chain) + +**Key constraint:** Swapping key expressions does not change the operator tree topology (left child remains build, right child remains probe). It only redirects which table's key is hashed on the build side. Physical reordering (swapping build/probe children) is a Phase 3 item. + +**Row estimate tracking:** `current_est_rows` is initialized from `estimate_scan_rows(base_table)` and updated after each join in the chain, allowing subsequent joins to benefit from accurate intermediate sizes. + +### ADR 002: Outer Join Safety + +The inner-join-only gating for join reordering is documented in [ADR 002](./002-join-reordering-inner-only.md). Outer joins (LEFT/RIGHT/FULL) skip reordering entirely to preserve which side is the outer (null-producing) side. diff --git a/docs/adr/002-join-reordering-inner-only.md b/docs/adr/002-join-reordering-inner-only.md new file mode 100644 index 0000000..de891bc --- /dev/null +++ b/docs/adr/002-join-reordering-inner-only.md @@ -0,0 +1,53 @@ +# ADR 002: Join Reordering — Inner Join Only Constraint + +## Status +Accepted + +## Date +2026-05-07 + +## Context + +PR #79 extended the Cost-Based Optimizer (Phase 2) with join reordering in `build_vectorized_plan()`. For each INNER JOIN, the optimizer estimates both orderings (A⋈B and B⋈A) using `RowEstimator::estimate_join_rows()` and swaps key expressions when the reverse order produces fewer rows — reducing the build-side hash table size. + +However, the same reordering logic was originally written to apply to all join types including LEFT, RIGHT, and FULL outer joins. For outer joins, swapping key expressions can change which side is the outer (built) side, breaking query semantics — the side that must be preserved for null-outer tuples. + +## Decision + +Wrap the join reordering block with a type check: + +```cpp +if (exec_join_type == executor::JoinType::Inner) { + // ... estimate both orderings, swap keys if reverse is smaller ... +} +``` + +Outer joins (LEFT/RIGHT/FULL) skip reordering entirely. The `exec_join_type` variable is determined before the reordering block so the gating is available. + +Additionally, the `current_est_rows` update (which tracks join output cardinality for subsequent joins in a chain) is also scoped inside the inner-join block, since outer join output size doesn't follow the same selectivity model. + +## Consequences + +### Positive +- LEFT/RIGHT/FULL joins preserve outer side semantics — null-outer tuples are produced from the correct side +- INNER joins benefit from smaller build-side hash tables when the right table is smaller +- `current_est_rows` for outer join chains is not polluted with potentially misleading inner-join estimates + +### Negative +- A LEFT JOIN where the right (inner) table is much smaller won't get key-swapped to reduce build size — the larger left side remains the build side +- Future work could include physical operator tree reordering for outer joins (not just key swapping), but that requires more significant changes to `build_vectorized_plan()` + +### Neutral +- Volcano path (`build_plan()`) does not have join reordering — this constraint applies only to the Vectorized path +- The inner join gating is a one-line change but is well-documented + +## Alternatives Considered + +### Alternative 1: Reorder operator tree for outer joins (swap build/probe children) +**Why rejected:** `VectorizedHashJoinOperator` always uses `current_root` as build and `right_scan` as probe. Swapping children would require reconstructing the scan operators, which is architecturally complex for Phase 2. The simpler key-swap approach is sufficient for inner joins. + +### Alternative 2: Apply reordering to LEFT/RIGHT but not FULL +**Why rejected:** LEFT and RIGHT outer joins also have semantics that depend on preserving which side is the outer side. FULL outer joins are the clearest case (both sides can produce null-outer tuples), but LEFT/RIGHT have the same issue. Gate on all outer join types consistently. + +### Alternative 3: Key swap for outer joins with outer-side detection +**Why rejected:** Detecting which side is the "outer" side of an outer join requires semantic analysis of the query — which side produces nulls when the other side has no match. This is non-trivial and better left to a future Phase 3 physical optimizer. \ No newline at end of file