diff --git a/.github/workflows/cpp-cpu.yml b/.github/workflows/pipeline.yml similarity index 97% rename from .github/workflows/cpp-cpu.yml rename to .github/workflows/pipeline.yml index 3db8102..8b1d668 100644 --- a/.github/workflows/cpp-cpu.yml +++ b/.github/workflows/pipeline.yml @@ -1,8 +1,8 @@ -name: C++ CPU Tests +name: CI Pipeline on: push: - branches: [master] + branches: [master, tpch_bench] pull_request: jobs: @@ -14,7 +14,7 @@ jobs: image: rapidsai/base:25.10a-cuda12-py3.12 - cudf: "25.02" image: rapidsai/base:25.02-cuda12.0-py3.12 - name: C++ CPU Tests (cudf ${{ matrix.cudf }}) + name: CI Pipeline (cudf ${{ matrix.cudf }}) runs-on: ubuntu-latest container: image: ${{ matrix.image }} diff --git a/Cargo.lock b/Cargo.lock index be8a70b..7adcd35 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,19 +150,40 @@ version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3a3ec4fe573f9d1f59d99c085197ef669b00b088ba1d7bb75224732d9357a74" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-arith 53.4.1", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-cast 53.4.1", + "arrow-csv 53.4.1", + "arrow-data 53.4.1", + "arrow-ipc 53.4.1", + "arrow-json 53.4.1", + "arrow-ord 53.4.1", + "arrow-row 53.4.1", + "arrow-schema 53.4.1", + "arrow-select 53.4.1", + "arrow-string 53.4.1", +] + +[[package]] +name = "arrow" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc208515aa0151028e464cc94a692156e945ce5126abd3537bb7fd6ba2143ed1" +dependencies = [ + "arrow-arith 54.2.1", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-cast 54.2.1", + "arrow-csv 54.2.1", + "arrow-data 54.3.1", + "arrow-ipc 54.2.1", + "arrow-json 54.2.1", + "arrow-ord 54.2.1", + "arrow-row 54.2.1", + "arrow-schema 54.3.1", + "arrow-select 54.2.1", + "arrow-string 54.2.1", ] [[package]] @@ -171,15 +192,29 @@ version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dcf19f07792d8c7f91086c67b574a79301e367029b17fcf63fb854332246a10" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", "chrono", "half", "num", ] +[[package]] +name = "arrow-arith" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e07e726e2b3f7816a85c6a45b6ec118eeeabf0b2a8c208122ad949437181f49a" +dependencies = [ + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", + "chrono", + "num", +] + [[package]] name = "arrow-array" version = "53.4.1" @@ -187,9 +222,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7845c32b41f7053e37a075b3c2f29c6f5ea1b3ca6e5df7a2d325ee6e1b4a63cf" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.5", + "num", +] + +[[package]] +name = "arrow-array" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2262eba4f16c78496adfd559a29fe4b24df6088efc9985a873d58e92be022d5" +dependencies = [ + "ahash", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", "chrono", "chrono-tz", "half", @@ -208,17 +260,49 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "263f4801ff1839ef53ebd06f99a56cecd1dbaf314ec893d93168e2e860e0291c" +dependencies = [ + "bytes", + "half", + "num", +] + [[package]] name = "arrow-cast" version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6365f8527d4f87b133eeb862f9b8093c009d41a210b8f101f91aa2392f61daac" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "arrow-select 53.4.1", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-cast" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4103d88c5b441525ed4ac23153be7458494c2b0c9a11115848fdb9b81f6f886a" +dependencies = [ + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", + "arrow-select 54.2.1", "atoi", "base64", "chrono", @@ -235,11 +319,11 @@ version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30dac4d23ac769300349197b845e0fd18c7f9f15d260d4659ae6b5a9ca06f586" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-cast 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", "chrono", "csv", "csv-core", @@ -248,14 +332,42 @@ dependencies = [ "regex", ] +[[package]] +name = "arrow-csv" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d3cb0914486a3cae19a5cad2598e44e225d53157926d0ada03c20521191a65" +dependencies = [ + "arrow-array 54.2.1", + "arrow-cast 54.2.1", + "arrow-schema 54.3.1", + "chrono", + "csv", + "csv-core", + "lazy_static", + "regex", +] + [[package]] name = "arrow-data" version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd962fc3bf7f60705b25bcaa8eb3318b2545aa1d528656525ebdd6a17a6cd6fb" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 53.4.1", + "arrow-schema 53.4.1", + "half", + "num", +] + +[[package]] +name = "arrow-data" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61cfdd7d99b4ff618f167e548b2411e5dd2c98c0ddebedd7df433d34c20a4429" +dependencies = [ + "arrow-buffer 54.3.1", + "arrow-schema 54.3.1", "half", "num", ] @@ -266,11 +378,25 @@ version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3527365b24372f9c948f16e53738eb098720eea2093ae73c7af04ac5e30a39b" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-cast 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "flatbuffers 24.12.23", + "lz4_flex", +] + +[[package]] +name = "arrow-ipc" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddecdeab02491b1ce88885986e25002a3da34dd349f682c7cfe67bab7cc17b86" +dependencies = [ + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", "flatbuffers 24.12.23", "lz4_flex", ] @@ -281,11 +407,31 @@ version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acdec0024749fc0d95e025c0b0266d78613727b3b3a5d4cf8ea47eb6d38afdd1" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-cast 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-json" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d03b9340013413eb84868682ace00a1098c81a5ebc96d279f7ebf9a4cac3c0fd" +dependencies = [ + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-cast 54.2.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", "chrono", "half", "indexmap", @@ -301,15 +447,28 @@ version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79af2db0e62a508d34ddf4f76bfd6109b6ecc845257c9cba6f939653668f89ac" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "arrow-select 53.4.1", "half", "num", ] +[[package]] +name = "arrow-ord" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f841bfcc1997ef6ac48ee0305c4dfceb1f7c786fe31e67c1186edf775e1f1160" +dependencies = [ + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", + "arrow-select 54.2.1", +] + [[package]] name = "arrow-row" version = "53.4.1" @@ -317,10 +476,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da30e9d10e9c52f09ea0cf15086d6d785c11ae8dcc3ea5f16d402221b6ac7735" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "half", +] + +[[package]] +name = "arrow-row" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1eeb55b0a0a83851aa01f2ca5ee5648f607e8506ba6802577afdda9d75cdedcd" +dependencies = [ + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", "half", ] @@ -330,6 +502,12 @@ version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35b0f9c0c3582dd55db0f136d3b44bfa0189df07adcf7dc7f2f2e74db0f52eb8" +[[package]] +name = "arrow-schema" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cfaf5e440be44db5413b75b72c2a87c1f8f0627117d110264048f2969b99e9" + [[package]] name = "arrow-select" version = "53.4.1" @@ -337,10 +515,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92fc337f01635218493c23da81a364daf38c694b05fc20569c3193c11c561984" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "num", +] + +[[package]] +name = "arrow-select" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e2932aece2d0c869dd2125feb9bd1709ef5c445daa3838ac4112dcfa0fda52c" +dependencies = [ + "ahash", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", "num", ] @@ -350,11 +542,28 @@ version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d596a9fc25dae556672d5069b090331aca8acb93cae426d8b7dcdf1c558fa0ce" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "arrow-select 53.4.1", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "arrow-string" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "912e38bd6a7a7714c1d9b61df80315685553b7455e8a6045c27531d8ecd5b458" +dependencies = [ + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", + "arrow-select 54.2.1", "memchr", "num", "regex", @@ -446,16 +655,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.3" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", - "cpufeatures", + "cpufeatures 0.3.0", ] [[package]] @@ -527,9 +736,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.56" +version = "1.2.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +checksum = "b7a4d3ec6524d28a329fc53654bbadc9bdd7b0431f5d65f1a56ffb28a1ee5283" dependencies = [ "find-msvc-tools", "jobserver", @@ -607,9 +816,9 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "cmake" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" dependencies = [ "cc", ] @@ -671,6 +880,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -743,40 +961,91 @@ version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "014fc8c384ecacedaabb3bc8359c2a6c6e9d8f7bea65be3434eccacfc37f52d9" dependencies = [ - "arrow", - "arrow-array", - "arrow-ipc", - "arrow-schema", + "arrow 53.4.1", + "arrow-array 53.4.1", + "arrow-ipc 53.4.1", + "arrow-schema 53.4.1", "async-compression", "async-trait", "bytes", "bzip2", "chrono", "dashmap", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", - "datafusion-functions-table", - "datafusion-functions-window", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-sql", + "datafusion-catalog 44.0.0", + "datafusion-common 44.0.0", + "datafusion-common-runtime 44.0.0", + "datafusion-execution 44.0.0", + "datafusion-expr 44.0.0", + "datafusion-functions 44.0.0", + "datafusion-functions-aggregate 44.0.0", + "datafusion-functions-nested 44.0.0", + "datafusion-functions-table 44.0.0", + "datafusion-functions-window 44.0.0", + "datafusion-optimizer 44.0.0", + "datafusion-physical-expr 44.0.0", + "datafusion-physical-expr-common 44.0.0", + "datafusion-physical-optimizer 44.0.0", + "datafusion-physical-plan 44.0.0", + "datafusion-sql 44.0.0", "flate2", "futures", "glob", - "itertools", + "itertools 0.13.0", "log", "object_store", "parking_lot", - "parquet", + "parquet 53.4.1", + "rand", + "regex", + "sqlparser", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" +dependencies = [ + "arrow 54.2.1", + "arrow-array 54.2.1", + "arrow-ipc 54.2.1", + "arrow-schema 54.3.1", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-catalog 45.0.0", + "datafusion-common 45.0.0", + "datafusion-common-runtime 45.0.0", + "datafusion-execution 45.0.0", + "datafusion-expr 45.0.0", + "datafusion-functions 45.0.0", + "datafusion-functions-aggregate 45.0.0", + "datafusion-functions-nested 45.0.0", + "datafusion-functions-table 45.0.0", + "datafusion-functions-window 45.0.0", + "datafusion-optimizer 45.0.0", + "datafusion-physical-expr 45.0.0", + "datafusion-physical-expr-common 45.0.0", + "datafusion-physical-optimizer 45.0.0", + "datafusion-physical-plan 45.0.0", + "datafusion-sql 45.0.0", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "parquet 54.2.1", "rand", "regex", "sqlparser", @@ -795,13 +1064,34 @@ version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee60d33e210ef96070377ae667ece7caa0e959c8387496773d4a1a72f1a5012e" dependencies = [ - "arrow-schema", + "arrow-schema 53.4.1", + "async-trait", + "datafusion-common 44.0.0", + "datafusion-execution 44.0.0", + "datafusion-expr 44.0.0", + "datafusion-physical-plan 44.0.0", + "parking_lot", +] + +[[package]] +name = "datafusion-catalog" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1" +dependencies = [ + "arrow 54.2.1", "async-trait", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-plan", + "dashmap", + "datafusion-common 45.0.0", + "datafusion-execution 45.0.0", + "datafusion-expr 45.0.0", + "datafusion-physical-plan 45.0.0", + "datafusion-sql 45.0.0", + "futures", + "itertools 0.14.0", + "log", "parking_lot", + "sqlparser", ] [[package]] @@ -811,17 +1101,44 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b42b7d720fe21ed9cca2ebb635f3f13a12cfab786b41e0fba184fb2e620525b" dependencies = [ "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow 53.4.1", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-schema 53.4.1", + "half", + "hashbrown 0.14.5", + "indexmap", + "libc", + "log", + "object_store", + "parquet 53.4.1", + "paste", + "recursive", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" +dependencies = [ + "ahash", + "arrow 54.2.1", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-ipc 54.2.1", + "arrow-schema 54.3.1", + "base64", "half", "hashbrown 0.14.5", "indexmap", "libc", "log", "object_store", - "parquet", + "parquet 54.2.1", "paste", "recursive", "sqlparser", @@ -839,22 +1156,57 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-common-runtime" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48" +dependencies = [ + "log", + "tokio", +] + [[package]] name = "datafusion-doc" version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c278dbd64860ed0bb5240fc1f4cb6aeea437153910aea69bcf7d5a8d6d0454f3" +[[package]] +name = "datafusion-doc" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66" + [[package]] name = "datafusion-execution" version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e22cb02af47e756468b3cbfee7a83e3d4f2278d452deb4b033ba933c75169486" dependencies = [ - "arrow", + "arrow 53.4.1", "dashmap", - "datafusion-common", - "datafusion-expr", + "datafusion-common 44.0.0", + "datafusion-expr 44.0.0", + "futures", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-execution" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27" +dependencies = [ + "arrow 54.2.1", + "dashmap", + "datafusion-common 45.0.0", + "datafusion-expr 45.0.0", "futures", "log", "object_store", @@ -870,14 +1222,35 @@ version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62298eadb1d15b525df1315e61a71519ffc563d41d5c3b2a30fda2d70f77b93c" dependencies = [ - "arrow", + "arrow 53.4.1", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", + "datafusion-common 44.0.0", + "datafusion-doc 44.0.0", + "datafusion-expr-common 44.0.0", + "datafusion-functions-aggregate-common 44.0.0", + "datafusion-functions-window-common 44.0.0", + "datafusion-physical-expr-common 44.0.0", + "indexmap", + "paste", + "recursive", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1" +dependencies = [ + "arrow 54.2.1", + "chrono", + "datafusion-common 45.0.0", + "datafusion-doc 45.0.0", + "datafusion-expr-common 45.0.0", + "datafusion-functions-aggregate-common 45.0.0", + "datafusion-functions-window-common 45.0.0", + "datafusion-physical-expr-common 45.0.0", "indexmap", "paste", "recursive", @@ -891,9 +1264,21 @@ version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dda7f73c5fc349251cd3dcb05773c5bf55d2505a698ef9d38dfc712161ea2f55" dependencies = [ - "arrow", - "datafusion-common", - "itertools", + "arrow 53.4.1", + "datafusion-common 44.0.0", + "itertools 0.13.0", +] + +[[package]] +name = "datafusion-expr-common" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885" +dependencies = [ + "arrow 54.2.1", + "datafusion-common 45.0.0", + "itertools 0.14.0", + "paste", ] [[package]] @@ -902,21 +1287,51 @@ version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd197f3b2975424d3a4898ea46651be855a46721a56727515dbd5c9e2fb597da" dependencies = [ - "arrow", - "arrow-buffer", + "arrow 53.4.1", + "arrow-buffer 53.4.1", "base64", "blake2", "blake3", "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-macros", + "datafusion-common 44.0.0", + "datafusion-doc 44.0.0", + "datafusion-execution 44.0.0", + "datafusion-expr 44.0.0", + "datafusion-expr-common 44.0.0", + "datafusion-macros 44.0.0", "hashbrown 0.14.5", "hex", - "itertools", + "itertools 0.13.0", + "log", + "md-5", + "rand", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" +dependencies = [ + "arrow 54.2.1", + "arrow-buffer 54.3.1", + "base64", + "blake2", + "blake3", + "chrono", + "datafusion-common 45.0.0", + "datafusion-doc 45.0.0", + "datafusion-execution 45.0.0", + "datafusion-expr 45.0.0", + "datafusion-expr-common 45.0.0", + "datafusion-macros 45.0.0", + "hashbrown 0.14.5", + "hex", + "itertools 0.14.0", "log", "md-5", "rand", @@ -933,16 +1348,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aabbe48fba18f9981b134124381bee9e46f93518b8ad2f9721ee296cef5affb9" dependencies = [ "ahash", - "arrow", - "arrow-schema", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "arrow 53.4.1", + "arrow-schema 53.4.1", + "datafusion-common 44.0.0", + "datafusion-doc 44.0.0", + "datafusion-execution 44.0.0", + "datafusion-expr 44.0.0", + "datafusion-functions-aggregate-common 44.0.0", + "datafusion-macros 44.0.0", + "datafusion-physical-expr 44.0.0", + "datafusion-physical-expr-common 44.0.0", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" +dependencies = [ + "ahash", + "arrow 54.2.1", + "arrow-buffer 54.3.1", + "arrow-schema 54.3.1", + "datafusion-common 45.0.0", + "datafusion-doc 45.0.0", + "datafusion-execution 45.0.0", + "datafusion-expr 45.0.0", + "datafusion-functions-aggregate-common 45.0.0", + "datafusion-macros 45.0.0", + "datafusion-physical-expr 45.0.0", + "datafusion-physical-expr-common 45.0.0", "half", "log", "paste", @@ -955,10 +1393,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7a3fefed9c8c11268d446d924baca8cabf52fe32f73fdaa20854bac6473590c" dependencies = [ "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", + "arrow 53.4.1", + "datafusion-common 44.0.0", + "datafusion-expr-common 44.0.0", + "datafusion-physical-expr-common 44.0.0", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" +dependencies = [ + "ahash", + "arrow 54.2.1", + "datafusion-common 45.0.0", + "datafusion-expr-common 45.0.0", + "datafusion-physical-expr-common 45.0.0", ] [[package]] @@ -967,18 +1418,42 @@ version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6360f27464fab857bec698af39b2ae331dc07c8bf008fb4de387a19cdc6815a5" dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-physical-expr-common", - "itertools", + "arrow 53.4.1", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-ord 53.4.1", + "arrow-schema 53.4.1", + "datafusion-common 44.0.0", + "datafusion-execution 44.0.0", + "datafusion-expr 44.0.0", + "datafusion-functions 44.0.0", + "datafusion-functions-aggregate 44.0.0", + "datafusion-physical-expr-common 44.0.0", + "itertools 0.13.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-nested" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" +dependencies = [ + "arrow 54.2.1", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-ord 54.2.1", + "arrow-schema 54.3.1", + "datafusion-common 45.0.0", + "datafusion-doc 45.0.0", + "datafusion-execution 45.0.0", + "datafusion-expr 45.0.0", + "datafusion-functions 45.0.0", + "datafusion-functions-aggregate 45.0.0", + "datafusion-macros 45.0.0", + "datafusion-physical-expr-common 45.0.0", + "itertools 0.14.0", "log", "paste", ] @@ -989,12 +1464,28 @@ version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c35c070eb705c12795dab399c3809f4dfbc290678c624d3989490ca9b8449c1" dependencies = [ - "arrow", + "arrow 53.4.1", + "async-trait", + "datafusion-catalog 44.0.0", + "datafusion-common 44.0.0", + "datafusion-expr 44.0.0", + "datafusion-physical-plan 44.0.0", + "parking_lot", + "paste", +] + +[[package]] +name = "datafusion-functions-table" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36" +dependencies = [ + "arrow 54.2.1", "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-plan", + "datafusion-catalog 45.0.0", + "datafusion-common 45.0.0", + "datafusion-expr 45.0.0", + "datafusion-physical-plan 45.0.0", "parking_lot", "paste", ] @@ -1005,13 +1496,30 @@ version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52229bca26b590b140900752226c829f15fc1a99840e1ca3ce1a9534690b82a8" dependencies = [ - "datafusion-common", - "datafusion-doc", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 44.0.0", + "datafusion-doc 44.0.0", + "datafusion-expr 44.0.0", + "datafusion-functions-window-common 44.0.0", + "datafusion-macros 44.0.0", + "datafusion-physical-expr 44.0.0", + "datafusion-physical-expr-common 44.0.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5" +dependencies = [ + "datafusion-common 45.0.0", + "datafusion-doc 45.0.0", + "datafusion-expr 45.0.0", + "datafusion-functions-window-common 45.0.0", + "datafusion-macros 45.0.0", + "datafusion-physical-expr 45.0.0", + "datafusion-physical-expr-common 45.0.0", "log", "paste", ] @@ -1022,8 +1530,18 @@ version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "367befc303b64a668a10ae6988a064a9289e1999e71a7f8e526b6e14d6bdd9d6" dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", + "datafusion-common 44.0.0", + "datafusion-physical-expr-common 44.0.0", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4" +dependencies = [ + "datafusion-common 45.0.0", + "datafusion-physical-expr-common 45.0.0", ] [[package]] @@ -1036,19 +1554,49 @@ dependencies = [ "syn", ] +[[package]] +name = "datafusion-macros" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" +dependencies = [ + "datafusion-expr 45.0.0", + "quote", + "syn", +] + [[package]] name = "datafusion-optimizer" version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53b520413906f755910422b016fb73884ae6e9e1b376de4f9584b6c0e031da75" dependencies = [ - "arrow", + "arrow 53.4.1", "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", + "datafusion-common 44.0.0", + "datafusion-expr 44.0.0", + "datafusion-physical-expr 44.0.0", "indexmap", - "itertools", + "itertools 0.13.0", + "log", + "recursive", + "regex", + "regex-syntax", +] + +[[package]] +name = "datafusion-optimizer" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54" +dependencies = [ + "arrow 54.2.1", + "chrono", + "datafusion-common 45.0.0", + "datafusion-expr 45.0.0", + "datafusion-physical-expr 45.0.0", + "indexmap", + "itertools 0.14.0", "log", "recursive", "regex", @@ -1062,22 +1610,47 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acd6ddc378f6ad19af95ccd6790dec8f8e1264bc4c70e99ddc1830c1a1c78ccd" dependencies = [ "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", + "arrow 53.4.1", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-schema 53.4.1", + "datafusion-common 44.0.0", + "datafusion-expr 44.0.0", + "datafusion-expr-common 44.0.0", + "datafusion-functions-aggregate-common 44.0.0", + "datafusion-physical-expr-common 44.0.0", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.13.0", + "log", + "paste", + "petgraph 0.6.5", +] + +[[package]] +name = "datafusion-physical-expr" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" +dependencies = [ + "ahash", + "arrow 54.2.1", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-schema 54.3.1", + "datafusion-common 45.0.0", + "datafusion-expr 45.0.0", + "datafusion-expr-common 45.0.0", + "datafusion-functions-aggregate-common 45.0.0", + "datafusion-physical-expr-common 45.0.0", "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", "paste", - "petgraph", + "petgraph 0.7.1", ] [[package]] @@ -1087,11 +1660,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06e6c05458eccd74b4c77ed6a1fe63d52434240711de7f6960034794dad1caf5" dependencies = [ "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", + "arrow 53.4.1", + "datafusion-common 44.0.0", + "datafusion-expr-common 44.0.0", + "hashbrown 0.14.5", + "itertools 0.13.0", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" +dependencies = [ + "ahash", + "arrow 54.2.1", + "arrow-buffer 54.3.1", + "datafusion-common 45.0.0", + "datafusion-expr-common 45.0.0", "hashbrown 0.14.5", - "itertools", + "itertools 0.14.0", ] [[package]] @@ -1100,15 +1688,37 @@ version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9dc3a82190f49c37d377f31317e07ab5d7588b837adadba8ac367baad5dc2351" dependencies = [ - "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-plan", - "itertools", + "arrow 53.4.1", + "datafusion-common 44.0.0", + "datafusion-execution 44.0.0", + "datafusion-expr-common 44.0.0", + "datafusion-physical-expr 44.0.0", + "datafusion-physical-plan 44.0.0", + "itertools 0.13.0", + "log", + "recursive", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" +dependencies = [ + "arrow 54.2.1", + "arrow-schema 54.3.1", + "datafusion-common 45.0.0", + "datafusion-execution 45.0.0", + "datafusion-expr 45.0.0", + "datafusion-expr-common 45.0.0", + "datafusion-physical-expr 45.0.0", + "datafusion-physical-expr-common 45.0.0", + "datafusion-physical-plan 45.0.0", + "futures", + "itertools 0.14.0", "log", "recursive", + "url", ] [[package]] @@ -1118,25 +1728,57 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a6608bc9844b4ddb5ed4e687d173e6c88700b1d0482f43894617d18a1fe75da" dependencies = [ "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", + "arrow 53.4.1", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-ord 53.4.1", + "arrow-schema 53.4.1", "async-trait", "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", + "datafusion-common 44.0.0", + "datafusion-common-runtime 44.0.0", + "datafusion-execution 44.0.0", + "datafusion-expr 44.0.0", + "datafusion-functions-window-common 44.0.0", + "datafusion-physical-expr 44.0.0", + "datafusion-physical-expr-common 44.0.0", "futures", "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.13.0", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "datafusion-physical-plan" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" +dependencies = [ + "ahash", + "arrow 54.2.1", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-ord 54.2.1", + "arrow-schema 54.3.1", + "async-trait", + "chrono", + "datafusion-common 45.0.0", + "datafusion-common-runtime 45.0.0", + "datafusion-execution 45.0.0", + "datafusion-expr 45.0.0", + "datafusion-functions-window-common 45.0.0", + "datafusion-physical-expr 45.0.0", + "datafusion-physical-expr-common 45.0.0", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.14.0", "log", "parking_lot", "pin-project-lite", @@ -1149,12 +1791,31 @@ version = "44.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a884061c79b33d0c8e84a6f4f4be8bdc12c0f53f5af28ddf5d6d95ac0b15fdc" dependencies = [ - "arrow", - "arrow-array", - "arrow-schema", + "arrow 53.4.1", + "arrow-array 53.4.1", + "arrow-schema 53.4.1", "bigdecimal", - "datafusion-common", - "datafusion-expr", + "datafusion-common 44.0.0", + "datafusion-expr 44.0.0", + "indexmap", + "log", + "recursive", + "regex", + "sqlparser", +] + +[[package]] +name = "datafusion-sql" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" +dependencies = [ + "arrow 54.2.1", + "arrow-array 54.2.1", + "arrow-schema 54.3.1", + "bigdecimal", + "datafusion-common 45.0.0", + "datafusion-expr 45.0.0", "indexmap", "log", "recursive", @@ -1208,9 +1869,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.3.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" [[package]] name = "find-msvc-tools" @@ -1224,6 +1885,12 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flatbuffers" version = "24.12.23" @@ -1505,12 +2172,13 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" dependencies = [ "displaydoc", "potential_utf", + "utf8_iter", "yoke", "zerofrom", "zerovec", @@ -1518,9 +2186,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" dependencies = [ "displaydoc", "litemap", @@ -1531,9 +2199,9 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" dependencies = [ "icu_collections", "icu_normalizer_data", @@ -1545,15 +2213,15 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" [[package]] name = "icu_properties" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" dependencies = [ "icu_collections", "icu_locale_core", @@ -1565,15 +2233,15 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.1.2" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" [[package]] name = "icu_provider" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" dependencies = [ "displaydoc", "icu_locale_core", @@ -1613,9 +2281,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.13.0" +version = "2.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "45a8a2b9cb3e0b0c1803dbb0758ffac5de2f425b23c28f518faabd9d805342ff" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -1644,11 +2312,20 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jobserver" @@ -1662,9 +2339,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.91" +version = "0.3.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +checksum = "2e04e2ef80ce82e13552136fabeef8a5ed1f985a96805761cbb9a2c34e7664d9" dependencies = [ "once_cell", "wasm-bindgen", @@ -1741,9 +2418,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.183" +version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" [[package]] name = "libm" @@ -1759,9 +2436,9 @@ checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" [[package]] name = "lock_api" @@ -1780,9 +2457,9 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lz4_flex" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" dependencies = [ "twox-hash 2.1.2", ] @@ -1928,7 +2605,7 @@ dependencies = [ "chrono", "futures", "humantime", - "itertools", + "itertools 0.13.0", "parking_lot", "percent-encoding", "snafu", @@ -1989,13 +2666,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f8cf58b29782a7add991f655ff42929e31a7859f5319e53db9e39a714cb113c" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-cast 53.4.1", + "arrow-data 53.4.1", + "arrow-ipc 53.4.1", + "arrow-schema 53.4.1", + "arrow-select 53.4.1", "base64", "brotli", "bytes", @@ -2018,6 +2695,43 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "parquet" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f88838dca3b84d41444a0341b19f347e8098a3898b0f21536654b8b799e11abd" +dependencies = [ + "ahash", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-cast 54.2.1", + "arrow-data 54.3.1", + "arrow-ipc 54.2.1", + "arrow-schema 54.3.1", + "arrow-select 54.2.1", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.15.5", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash 1.6.3", + "zstd", + "zstd-sys", +] + [[package]] name = "paste" version = "1.0.15" @@ -2030,7 +2744,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "datafusion", + "datafusion 44.0.0", "num_cpus", "peacockdb-core", "peacockdb-ffi", @@ -2041,7 +2755,8 @@ dependencies = [ name = "peacockdb-core" version = "0.1.0" dependencies = [ - "datafusion", + "arrow 54.2.1", + "datafusion 45.0.0", "flatbuffers 25.12.19", "flatc-fork", "tokio", @@ -2066,7 +2781,17 @@ version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ - "fixedbitset", + "fixedbitset 0.4.2", + "indexmap", +] + +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset 0.5.7", "indexmap", ] @@ -2102,9 +2827,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "potential_utf" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" dependencies = [ "zerovec", ] @@ -2307,9 +3032,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "semver" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" [[package]] name = "seq-macro" @@ -2366,7 +3091,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -2378,9 +3103,15 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simd-adler32" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "siphasher" @@ -2542,9 +3273,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" dependencies = [ "displaydoc", "zerovec", @@ -2552,9 +3283,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.50.0" +version = "1.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +checksum = "f66bf9585cda4b724d3e78ab34b73fb2bbaba9011b9bfdf69dc836382ea13b8c" dependencies = [ "bytes", "pin-project-lite", @@ -2563,9 +3294,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.1" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", @@ -2646,9 +3377,9 @@ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-width" @@ -2688,9 +3419,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.22.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -2739,9 +3470,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.114" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +checksum = "0551fc1bb415591e3372d0bc4780db7e587d84e2a7e79da121051c5c4b89d0b0" dependencies = [ "cfg-if", "once_cell", @@ -2752,9 +3483,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.114" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +checksum = "7fbdf9a35adf44786aecd5ff89b4563a90325f9da0923236f6104e603c7e86be" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2762,9 +3493,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.114" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +checksum = "dca9693ef2bab6d4e6707234500350d8dad079eb508dca05530c85dc3a529ff2" dependencies = [ "bumpalo", "proc-macro2", @@ -2775,9 +3506,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.114" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +checksum = "39129a682a6d2d841b6c429d0c51e5cb0ed1a03829d8b3d1e69a011e62cb3d3b" dependencies = [ "unicode-ident", ] @@ -3066,9 +3797,9 @@ dependencies = [ [[package]] name = "writeable" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" [[package]] name = "xz2" @@ -3081,9 +3812,9 @@ dependencies = [ [[package]] name = "yoke" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -3092,9 +3823,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ "proc-macro2", "quote", @@ -3104,18 +3835,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.42" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.42" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", @@ -3124,18 +3855,18 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", "quote", @@ -3145,9 +3876,9 @@ dependencies = [ [[package]] name = "zerotrie" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" dependencies = [ "displaydoc", "yoke", @@ -3156,9 +3887,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ "yoke", "zerofrom", @@ -3167,9 +3898,9 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", diff --git a/build.md b/build.md index d89a2af..c4d7996 100644 --- a/build.md +++ b/build.md @@ -87,3 +87,12 @@ Run it on the GPU machine with the cudf library path set: export LD_LIBRARY_PATH=$HOME/miniforge3/envs/rapids-26.02/lib ~/peacockdb/peacockdb ``` + +## RUN Tests + +cargo test -p peacockdb-core --test test_queries +cargo test -p peacockdb-core --test test_cpu_executor + +## RUN All rust non-gpu tests + +cargo test --features rust-only diff --git a/peacockdb-core/Cargo.toml b/peacockdb-core/Cargo.toml index 59f2301..4cc3c5e 100644 --- a/peacockdb-core/Cargo.toml +++ b/peacockdb-core/Cargo.toml @@ -7,6 +7,13 @@ edition = "2024" flatc-fork = "0.6" [dependencies] -datafusion = "44" +arrow = "54.2.0" +datafusion = "45" flatbuffers = "25" -tokio = { version = "1", features = ["rt-multi-thread"] } +tokio = { version = "1", features = ["rt-multi-thread", "macros"] } + +[dev-dependencies] +datafusion = "45" +tokio = { version = "1", features = ["rt-multi-thread", "macros"] } + +# No [patch] section needed \ No newline at end of file diff --git a/peacockdb-core/src/cpu_executor.rs b/peacockdb-core/src/cpu_executor.rs index af54dd0..2e07d5e 100644 --- a/peacockdb-core/src/cpu_executor.rs +++ b/peacockdb-core/src/cpu_executor.rs @@ -14,19 +14,7 @@ use crate::gpu_rule::{ GpuSortPreservingMergeExec, }; -// --------------------------------------------------------------------------- -// GPU → CPU unwrapping -// --------------------------------------------------------------------------- - -/// Strip a GPU wrapper from a node, returning the inner CPU `ExecutionPlan`. -/// -/// `GpuScanExec` also carries a `batch_size` that must be forwarded to the -/// `TaskContext` so the Parquet reader produces correctly-sized batches. -/// Every other GPU node is a transparent shell — its inner CPU node already -/// carries the right configuration (e.g. `CoalesceBatchesExec.target_batch_size` -/// was patched by `GpuMemoryBudgetRule`). -/// -/// Non-GPU nodes are returned unchanged with `None` for the batch size. +// fn strip_gpu(node: Arc) -> (Arc, Option) { macro_rules! try_strip { ($ty:ty) => { @@ -103,6 +91,18 @@ impl NodeMemoryStats { } } +/// Recursively strip all GPU wrapper nodes from a plan tree, returning a +/// structurally identical tree composed of plain DataFusion CPU nodes. +pub fn strip_gpu_tree(plan: Arc) -> Result> { + let (cpu_node, _) = strip_gpu(plan); + let stripped_children = cpu_node + .children() + .into_iter() + .map(|c| strip_gpu_tree(c.clone())) + .collect::>>()?; + cpu_node.with_new_children(stripped_children) +} + /// Execute a physical plan one node at a time, bottom-up, on CPU. /// /// GPU wrapper nodes (`GpuFilterExec`, `GpuScanExec`, …) are stripped to their @@ -111,10 +111,6 @@ impl NodeMemoryStats { /// overridden to that value so the Parquet reader produces the same batch sizes /// the GPU planner computed. /// -/// `on_node` is called after each node completes, in post-order (children before -/// parent), with the CPU node name and its output batches. Pass `&mut |_, _| {}` -/// when no instrumentation is needed. -/// /// For each node the function: /// 1. Strips the GPU wrapper (if any) → CPU node + optional batch_size. /// 2. Applies the batch_size override to `TaskContext` if present. @@ -277,248 +273,4 @@ pub fn batch_logical_size(batch: &RecordBatch) -> usize { .sum() } -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - use crate::create_context_with_tables; - use datafusion::arrow::array::{Int64Array, StringViewArray}; - use std::path::PathBuf; - - fn testdata_dir() -> PathBuf { - PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../testdata/tpch.sf1") - } - - fn has_gpu_node(plan: &Arc) -> bool { - plan.name().starts_with("Gpu") || plan.children().iter().any(|c| has_gpu_node(c)) - } - - fn all_node_names(plan: &Arc) -> Vec { - let mut names = vec![plan.name().to_string()]; - for child in plan.children() { - names.extend(all_node_names(child)); - } - names - } - - fn scan_batch_sizes(plan: &Arc) -> Vec { - use crate::gpu_rule::GpuScanExec; - let mut sizes = vec![]; - if let Some(scan) = plan.as_any().downcast_ref::() { - sizes.push(scan.gpu_batch_size); - } - for child in plan.children() { - sizes.extend(scan_batch_sizes(child)); - } - sizes - } - - fn fmt_plan(plan: &Arc) -> String { - use datafusion::physical_plan::display::DisplayableExecutionPlan; - DisplayableExecutionPlan::new(plan.as_ref()) - .indent(true) - .to_string() - } - - async fn make_ctx(budget: usize) -> datafusion::execution::context::SessionContext { - create_context_with_tables(&testdata_dir(), 1, budget) - .await - .unwrap() - } - - const FULL_BUDGET: usize = 2 * 1024 * 1024 * 1024; - const TIGHT_BUDGET: usize = 10 * 1024; - - #[tokio::test] - async fn test_execution_strips_gpu_nodes() { - let ctx = make_ctx(FULL_BUDGET).await; - let plan = ctx - .sql("SELECT count(*) FROM nation WHERE n_regionkey >= 0") - .await - .unwrap() - .create_physical_plan() - .await - .unwrap(); - - assert!( - has_gpu_node(&plan), - "expected GPU nodes in plan, got: {:?}", - all_node_names(&plan) - ); - - let mut stats: Vec = vec![]; - execute_node_by_node_instrumented(plan, ctx.task_ctx(), &mut stats) - .await - .unwrap(); - - assert!(!stats.is_empty(), "no nodes were executed"); - let gpu_names: Vec<&str> = stats - .iter() - .filter(|s| s.node_name.starts_with("Gpu")) - .map(|s| s.node_name.as_str()) - .collect(); - assert!(gpu_names.is_empty(), "GPU nodes not stripped: {gpu_names:?}"); - } - - #[tokio::test] - async fn test_cpu_results_match_direct_execution() { - let ctx = make_ctx(FULL_BUDGET).await; - let query = "SELECT n_name FROM nation WHERE n_regionkey >= 0 ORDER BY n_name"; - - let reference: Vec = - ctx.sql(query).await.unwrap().collect().await.unwrap(); - let ref_names: Vec = reference - .iter() - .flat_map(|b| { - b.column(0) - .as_any() - .downcast_ref::() - .unwrap() - .iter() - .map(|v| v.unwrap().to_string()) - }) - .collect(); - - let plan = ctx - .sql(query) - .await - .unwrap() - .create_physical_plan() - .await - .unwrap(); - let task_ctx = ctx.task_ctx(); - let cpu_batches = execute_node_by_node(plan, task_ctx, &mut |_, _| {}).await.unwrap(); - let cpu_names: Vec = cpu_batches - .iter() - .flat_map(|b| { - b.column(0) - .as_any() - .downcast_ref::() - .unwrap() - .iter() - .map(|v| v.unwrap().to_string()) - }) - .collect(); - - assert_eq!( - cpu_names, ref_names, - "CPU executor result differs from direct execution" - ); - assert_eq!(cpu_names.len(), 25, "nation table must have 25 rows"); - } - - #[tokio::test] - async fn test_memory_boundary_preserved_tight_budget() { - let query = "SELECT count(*) FROM customer WHERE c_custkey > 0"; - - let ctx_full = make_ctx(FULL_BUDGET).await; - let plan_full = ctx_full.sql(query).await.unwrap() - .create_physical_plan().await.unwrap(); - - let ctx_tight = make_ctx(TIGHT_BUDGET).await; - let plan_tight = ctx_tight.sql(query).await.unwrap() - .create_physical_plan().await.unwrap(); - - eprintln!("\n=== FULL BUDGET ({} GiB) plan ===\n{}", FULL_BUDGET / (1024*1024*1024), fmt_plan(&plan_full)); - eprintln!("=== TIGHT BUDGET ({} KiB) plan ===\n{}", TIGHT_BUDGET / 1024, fmt_plan(&plan_tight)); - - // ── Extract the batch_size ceiling from GpuScanExec ───────────────── - let tight_scan_sizes = scan_batch_sizes(&plan_tight); - assert!( - !tight_scan_sizes.is_empty(), - "expected GpuScanExec in tight plan; node names: {:?}", - all_node_names(&plan_tight) - ); - let gpu_batch_size = *tight_scan_sizes.iter().max().unwrap(); - - let full_scan_sizes = scan_batch_sizes(&plan_full); - let full_batch_size = *full_scan_sizes.iter().max().unwrap(); - - eprintln!( - "GpuScanExec batch_size — full budget: {full_batch_size}, tight budget: {gpu_batch_size}" - ); - assert!( - gpu_batch_size < full_batch_size, - "tight budget batch_size ({gpu_batch_size}) should be smaller than full budget ({full_batch_size})" - ); - - let mut stats: Vec = vec![]; - let batches = - execute_node_by_node_instrumented(plan_tight, ctx_tight.task_ctx(), &mut stats) - .await - .unwrap(); - - let count = batches[0].column(0).as_any().downcast_ref::().unwrap().value(0); - assert_eq!(count, 150_000, "customer table must have 150 000 rows"); - - let scan_stats: Vec<&NodeMemoryStats> = stats - .iter() - .filter(|s| s.node_name == "ParquetExec") - .collect(); - assert!(!scan_stats.is_empty(), "expected ParquetExec in stats"); - - eprintln!("Per-node stats (post-order):"); - for s in &stats { - eprintln!( - " {}: rows={}, max_batch={}, alloc={}B, logical={}B", - s.node_name, s.row_count, s.max_batch_rows, s.allocated_bytes, s.logical_bytes - ); - } - - for s in &scan_stats { - assert!( - s.max_batch_rows <= gpu_batch_size, - "ParquetExec batch {} rows exceeds gpu_batch_size={}", - s.max_batch_rows, gpu_batch_size - ); - } - - let gpu_names: Vec<&str> = stats - .iter() - .filter(|s| s.node_name.starts_with("Gpu")) - .map(|s| s.node_name.as_str()) - .collect(); - assert!(gpu_names.is_empty(), "GPU nodes in stats: {gpu_names:?}"); - } - - #[tokio::test] - async fn test_instrumented_stats_are_populated() { - let ctx = make_ctx(FULL_BUDGET).await; - let plan = ctx - .sql("SELECT n_name, n_regionkey FROM nation WHERE n_regionkey = 1") - .await - .unwrap() - .create_physical_plan() - .await - .unwrap(); - - let mut stats: Vec = vec![]; - let batches = - execute_node_by_node_instrumented(plan, ctx.task_ctx(), &mut stats).await.unwrap(); - - let final_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); - let root_stat = stats.last().unwrap(); - assert_eq!( - root_stat.row_count, final_rows, - "root node row_count in stats does not match actual output" - ); - assert!( - root_stat.allocated_bytes > 0, - "root node allocated_bytes should be > 0" - ); - assert!( - root_stat.logical_bytes > 0, - "root node logical_bytes should be > 0" - ); - assert!( - root_stat.allocated_bytes >= root_stat.logical_bytes, - "allocated_bytes ({}) must be >= logical_bytes ({})", - root_stat.allocated_bytes, - root_stat.logical_bytes - ); - } - -} +// Tests live in tests/test_cpu_executor.rs diff --git a/peacockdb-core/src/gpu_rule.rs b/peacockdb-core/src/gpu_rule.rs index 32f0384..7135a56 100644 --- a/peacockdb-core/src/gpu_rule.rs +++ b/peacockdb-core/src/gpu_rule.rs @@ -279,7 +279,7 @@ impl PhysicalOptimizerRule for GpuExecutionRule { /// Estimated byte width of a single row for the given schema. /// Uses `DataType::primitive_width()` for fixed-size types, /// falls back to 32 bytes for variable-length types (Utf8, Binary, etc.). -pub(crate) fn row_width(schema: &SchemaRef) -> usize { +pub fn row_width(schema: &SchemaRef) -> usize { schema .fields() .iter() @@ -339,22 +339,22 @@ impl CardinalityEstimator for TrivialCardinalityEstimator { /// Memory is modeled as a linear function of the scan batch size N. /// `output_row_ratio` tracks the cumulative row multiplier: if a filter has /// 50% selectivity, downstream operators see 0.5 × N rows instead of N. -pub(crate) struct SubtreeMemory { +pub struct SubtreeMemory { /// Peak GPU memory as bytes per scan-batch-row N: /// `peak_bytes = subtree_max_row_bytes * N`. - pub(crate) subtree_max_row_bytes: usize, + pub subtree_max_row_bytes: usize, /// Output row width in bytes (per output row). - pub(crate) output_width: usize, + pub output_width: usize, /// Ratio of output rows to original batch size N. /// 1.0 means row count is preserved; <1.0 after filters; >1.0 after fan-out joins. - pub(crate) output_row_ratio: f64, + pub output_row_ratio: f64, } /// Walk the plan tree and compute peak memory as a linear function of batch size N. /// /// Per-operator memory = input batch + output batch, where the row counts are /// adjusted by selectivity (filters) and cardinality (joins) estimators. -pub(crate) fn analyze_memory(plan: &Arc) -> SubtreeMemory { +pub fn analyze_memory(plan: &Arc) -> SubtreeMemory { analyze_memory_with( plan, &TrivialSelectivityEstimator, diff --git a/peacockdb-core/src/lib.rs b/peacockdb-core/src/lib.rs index fd228cc..184d460 100644 --- a/peacockdb-core/src/lib.rs +++ b/peacockdb-core/src/lib.rs @@ -1,14 +1,14 @@ -mod gpu_rule; +pub mod gpu_rule; pub mod cpu_executor; #[allow(unused_imports, dead_code, clippy::all)] -mod generated { +pub mod generated { pub mod gpu_plan_generated { include!(concat!(env!("OUT_DIR"), "/gpu_plan_generated.rs")); } } pub mod plan_serializer; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::Arc; use datafusion::arrow::record_batch::RecordBatch; @@ -21,13 +21,10 @@ use datafusion::error::Result; use cpu_executor::{execute_node_by_node, NodeMemoryStats}; use gpu_rule::{GpuExecutionRule, GpuMemoryBudgetRule}; -/// Scans `data_dir` for `.parquet` files and registers each as a table in a new -/// `SessionContext`. The table name is the file stem (e.g. `orders.parquet` → `orders`). -pub async fn create_context_with_tables( - data_dir: &Path, +pub fn build_session_state_with_gpu_rules( target_partitions: usize, - gpu_memory_budget: usize, -) -> Result { + gpu_memory_budget: usize +) -> SessionContext { let base = SessionContext::new(); let mut config = base.state().config().clone(); config.options_mut().execution.target_partitions = target_partitions; @@ -36,45 +33,73 @@ pub async fn create_context_with_tables( .with_physical_optimizer_rule(Arc::new(GpuExecutionRule)) .with_physical_optimizer_rule(Arc::new(GpuMemoryBudgetRule::new(gpu_memory_budget))) .build(); - let ctx = SessionContext::new_with_state(state); + + SessionContext::new_with_state(state) +} - let entries = std::fs::read_dir(data_dir).map_err(|e| { - datafusion::error::DataFusionError::IoError(e) - })?; +pub fn build_session_state( + target_partitions: usize +) -> SessionContext { + let base = SessionContext::new(); + let mut config = base.state().config().clone(); + config.options_mut().execution.target_partitions = target_partitions; + let state = SessionStateBuilder::new_from_existing(base.state()) + .with_config(config) + .build(); + + SessionContext::new_with_state(state) +} - for entry in entries { - let entry = entry.map_err(|e| datafusion::error::DataFusionError::IoError(e))?; - let path = entry.path(); +async fn read_table(path: PathBuf, ctx: &SessionContext) -> Result<(String, Arc), ()> { + if path.extension().and_then(|e| e.to_str()) != Some("parquet") { + () + } - if path.extension().and_then(|e| e.to_str()) != Some("parquet") { - continue; - } + let table_name = path + .file_stem() + .and_then(|s| s.to_str()) + .ok_or_else(|| datafusion::error::DataFusionError::Plan( + format!("could not derive table name from path: {}", path.display()), + )).unwrap() + .to_string(); - let table_name = path - .file_stem() - .and_then(|s| s.to_str()) - .ok_or_else(|| datafusion::error::DataFusionError::Plan( - format!("could not derive table name from path: {}", path.display()), - ))? - .to_string(); + let table_url = ListingTableUrl::parse(path.to_str().unwrap()).unwrap(); + let format = Arc::new(ParquetFormat::default().with_enable_pruning(true)); + let listing_options = ListingOptions::new(format).with_file_extension(".parquet"); - let table_url = ListingTableUrl::parse(path.to_str().unwrap())?; - let format = Arc::new(ParquetFormat::default().with_enable_pruning(true)); - let listing_options = ListingOptions::new(format).with_file_extension(".parquet"); + let resolved_schema = listing_options.infer_schema(&ctx.state(), &table_url).await.unwrap(); - let resolved_schema = listing_options.infer_schema(&ctx.state(), &table_url).await?; + let config = ListingTableConfig::new(table_url) + .with_listing_options(listing_options) + .with_schema(resolved_schema); - let config = ListingTableConfig::new(table_url) - .with_listing_options(listing_options) - .with_schema(resolved_schema); + let table = Arc::new(ListingTable::try_new(config).unwrap()); - let table = Arc::new(ListingTable::try_new(config)?); + Ok((table_name, table)) +} + +pub async fn register_tables_for( + ctx: SessionContext, + data_dir: &Path +) -> Result { + for entry in std::fs::read_dir(data_dir)? { + let path = entry?.path(); + let Ok((table_name, table)) = read_table(path, &ctx).await else { continue; }; ctx.register_table(&table_name, table)?; } Ok(ctx) } +pub async fn create_context_with_tables( + data_dir: &Path, + target_partitions: usize, + gpu_memory_budget: usize, +) -> Result { + let ctx = build_session_state_with_gpu_rules(target_partitions, gpu_memory_budget); + register_tables_for(ctx, data_dir).await +} + // --------------------------------------------------------------------------- // CpuExecutor // --------------------------------------------------------------------------- @@ -139,293 +164,3 @@ impl CpuExecutor { Ok((batches, stats)) } } - -#[cfg(test)] -mod tests { - use super::*; - use datafusion::arrow::array::Int64Array; - use datafusion::physical_plan::ExecutionPlan; - use gpu_rule::{analyze_memory, row_width, GpuScanExec}; - use std::path::PathBuf; - use std::sync::Arc; - - fn testdata_dir() -> PathBuf { - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("../testdata/tpch.minimal") - } - - // ── CpuExecutor integration tests ──────────────────────────────────────── - - /// Full end-to-end example showing the idiomatic usage: - /// 1. CpuExecutor::new — builds a SessionContext with GPU rules - /// 2. exec.execute(sql) — SQL → GPU plan → CPU execution → RecordBatches - #[tokio::test] - async fn test_cpu_executor_simple_query() { - let exec = CpuExecutor::new(&testdata_dir(), 1, 2 * 1024 * 1024 * 1024) - .await - .unwrap(); - - let batches = exec - .execute("SELECT count(*) FROM nation WHERE n_regionkey >= 0") - .await - .unwrap(); - - let count = batches[0] - .column(0) - .as_any() - .downcast_ref::() - .unwrap() - .value(0); - - assert_eq!(count, 25); - } - - /// execute_instrumented returns both results and per-node stats in one call. - #[tokio::test] - async fn test_cpu_executor_instrumented() { - let exec = CpuExecutor::new(&testdata_dir(), 1, 2 * 1024 * 1024 * 1024) - .await - .unwrap(); - - let (batches, stats) = exec - .execute_instrumented("SELECT count(*) FROM nation WHERE n_regionkey >= 0") - .await - .unwrap(); - - let count = batches[0] - .column(0) - .as_any() - .downcast_ref::() - .unwrap() - .value(0); - assert_eq!(count, 25); - - // Every stat entry must name a CPU node, never a GPU wrapper. - for s in &stats { - assert!( - !s.node_name.starts_with("Gpu"), - "GPU node '{}' leaked into stats", - s.node_name - ); - } - assert!(!stats.is_empty()); - } - - async fn count(ctx: &SessionContext, query: &str) -> i64 { - let batches = ctx.sql(query).await.unwrap().collect().await.unwrap(); - batches[0] - .column(0) - .as_any() - .downcast_ref::() - .unwrap() - .value(0) - } - - use datafusion::physical_plan::display::DisplayableExecutionPlan; - - const TEST_TARGET_PARTITIONS: usize = 8; - const TEST_GPU_MEMORY_BUDGET: usize = 2 * 1024 * 1024 * 1024; // 2 GiB - - fn test_ctx(data_dir: &Path) -> impl std::future::Future> { - create_context_with_tables(data_dir, TEST_TARGET_PARTITIONS, TEST_GPU_MEMORY_BUDGET) - } - - fn plans_dir() -> PathBuf { - PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../testdata/plans") - } - - /// Render the plan to a string, normalizing `ParquetExec:` lines to - /// `ParquetExec: table=` so canonical files are path-independent. - fn plan_str(plan: &Arc) -> String { - let raw = DisplayableExecutionPlan::new(plan.as_ref()) - .indent(false) - .to_string(); - raw.lines() - .filter(|l| !l.is_empty()) - .map(|line| { - if line.trim_start().starts_with("ParquetExec:") { - let indent = line.len() - line.trim_start().len(); - let table = line.find(".parquet") - .and_then(|end| line[..end].rfind('/').map(|sep| &line[sep + 1..end])) - .unwrap_or("unknown"); - format!("{}ParquetExec: table={table}", &line[..indent]) - } else { - line.to_string() - } - }) - .collect::>() - .join("\n") - } - - /// Render per-node memory analysis: row_width and subtree_max_row_bytes. - fn memory_str(plan: &Arc) -> String { - fn walk(plan: &Arc, indent: usize, lines: &mut Vec) { - let mem = analyze_memory(plan); - let rw = row_width(&plan.schema()); - let prefix = " ".repeat(indent); - lines.push(format!( - "{}{}: row_width={}, subtree_max_row_bytes={}", - prefix, - plan.name(), - rw, - mem.subtree_max_row_bytes - )); - for child in plan.children() { - walk(child, indent + 2, lines); - } - } - let mut lines = Vec::new(); - walk(plan, 0, &mut lines); - lines.join("\n") - } - - fn assert_plan_matches_canonical(plan: &Arc, name: &str) { - let canonical_path = plans_dir().join(format!("{name}.txt")); - let canonical = std::fs::read_to_string(&canonical_path) - .unwrap_or_else(|_| panic!("canonical file not found: {}", canonical_path.display())); - let actual = format!("{}\n--- memory ---\n{}", plan_str(plan), memory_str(plan)); - assert_eq!( - actual, - canonical.trim_end(), - "plan for '{name}' does not match {}", - canonical_path.display() - ); - - // Flatbuffer roundtrip: serialize → deserialize → re-serialize, - // verify the plan survives the round trip. - assert_flatbuffer_roundtrip(plan, name); - } - - fn assert_flatbuffer_roundtrip(plan: &Arc, name: &str) { - let bytes = plan_serializer::serialize_plan(plan) - .unwrap_or_else(|e| panic!("flatbuffer serialization failed for '{name}': {e}")); - - let reconstructed = plan_serializer::deserialize_plan(&bytes) - .unwrap_or_else(|e| panic!("flatbuffer deserialization failed for '{name}': {e}")); - - let original = plan_str(plan); - let roundtripped = plan_str(&reconstructed); - assert_eq!( - roundtripped, original, - "flatbuffer roundtrip mismatch for '{name}'" - ); - } - - // ── Basic correctness ──────────────────────────────────────────────────── - - #[tokio::test] - async fn test_nation_row_count() { - let ctx = test_ctx(&testdata_dir()).await.unwrap(); - assert_eq!(count(&ctx, "SELECT count(*) FROM nation").await, 25); - } - - #[tokio::test] - async fn test_region_nation_join() { - let ctx = test_ctx(&testdata_dir()).await.unwrap(); - let n = count( - &ctx, - "SELECT count(*) FROM nation JOIN region ON nation.n_regionkey = region.r_regionkey", - ) - .await; - assert_eq!(n, 25); - } - - // ── GPU plan node tests ────────────────────────────────────────────────── - - /// Filter + aggregate: SELECT count(*) FROM customer WHERE c_acctbal > 0 - /// Expected GPU nodes: GpuAggregateExec (partial + final), GpuFilterExec - #[tokio::test] - async fn test_gpu_nodes_filter_agg() { - let ctx = test_ctx(&testdata_dir()).await.unwrap(); - let query = "SELECT count(*) FROM customer WHERE c_acctbal > 0"; - - let plan = ctx.sql(query).await.unwrap().create_physical_plan().await.unwrap(); - assert_plan_matches_canonical(&plan, "filter_agg"); - - let n = count(&ctx, query).await; - assert!(n > 0 && n <= 150_000, "unexpected count {n}"); - } - - /// Hash join + sort: nations joined with their region, sorted by name. - /// Expected GPU nodes: GpuSortExec, GpuHashJoinExec - #[tokio::test] - async fn test_gpu_nodes_join_sort() { - let ctx = test_ctx(&testdata_dir()).await.unwrap(); - let query = " - SELECT n.n_name, r.r_name - FROM nation n JOIN region r ON n.n_regionkey = r.r_regionkey - ORDER BY n.n_name"; - - let plan = ctx.sql(query).await.unwrap().create_physical_plan().await.unwrap(); - assert_plan_matches_canonical(&plan, "join_sort"); - - // Result: 25 rows (every nation has exactly one region) - let batches = ctx.sql(query).await.unwrap().collect().await.unwrap(); - let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); - assert_eq!(total_rows, 25); - } - - /// Group by + join + sort: nations per region, sorted descending by count. - /// Expected GPU nodes: GpuSortExec, GpuAggregateExec, GpuHashJoinExec - #[tokio::test] - async fn test_gpu_nodes_group_join_sort() { - let ctx = test_ctx(&testdata_dir()).await.unwrap(); - let query = " - SELECT r.r_name, count(*) AS nation_count - FROM nation n JOIN region r ON n.n_regionkey = r.r_regionkey - GROUP BY r.r_name - ORDER BY nation_count DESC, r.r_name"; - - let plan = ctx.sql(query).await.unwrap().create_physical_plan().await.unwrap(); - assert_plan_matches_canonical(&plan, "group_join_sort"); - - // Result: 5 regions, each with exactly 5 nations. - let batches = ctx.sql(query).await.unwrap().collect().await.unwrap(); - let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); - assert_eq!(total_rows, 5); - let counts = batches[0].column(1).as_any().downcast_ref::().unwrap(); - for i in 0..counts.len() { - assert_eq!(counts.value(i), 5, "region {} has {} nations, expected 5", i, counts.value(i)); - } - } - - // ── Memory budget tests ────────────────────────────────────────────────── - - /// Find all GpuScanExec nodes and return their batch sizes. - fn scan_batch_sizes(plan: &Arc) -> Vec { - let mut sizes = Vec::new(); - if let Some(scan) = plan.as_any().downcast_ref::() { - sizes.push(scan.gpu_batch_size); - } - for child in plan.children() { - sizes.extend(scan_batch_sizes(child)); - } - sizes - } - - /// With a tight GPU memory budget, the batch size should be reduced below - /// the default 8192. Results must still be correct. - #[tokio::test] - async fn test_memory_budget_reduces_batch_size() { - // 10 KiB budget → should force a very small batch size. - let ctx = - create_context_with_tables(&testdata_dir(), TEST_TARGET_PARTITIONS, 10 * 1024).await.unwrap(); - let query = " - SELECT n.n_name, r.r_name - FROM nation n JOIN region r ON n.n_regionkey = r.r_regionkey - ORDER BY n.n_name"; - - let plan = ctx.sql(query).await.unwrap().create_physical_plan().await.unwrap(); - let sizes = scan_batch_sizes(&plan); - assert!(!sizes.is_empty(), "expected GpuScanExec nodes in plan"); - for &bs in &sizes { - assert!(bs < 8192, "expected batch_size < 8192 with 10KiB budget, got {bs}"); - assert!(bs >= 1, "batch_size must be at least 1"); - } - - // Results must still be correct despite smaller batches. - let batches = ctx.sql(query).await.unwrap().collect().await.unwrap(); - let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); - assert_eq!(total_rows, 25); - } -} diff --git a/peacockdb-core/src/plan_serializer.rs b/peacockdb-core/src/plan_serializer.rs index 62ac7a7..8302403 100644 --- a/peacockdb-core/src/plan_serializer.rs +++ b/peacockdb-core/src/plan_serializer.rs @@ -32,7 +32,7 @@ use crate::gpu_rule::{ /// Serialize an entire GPU execution plan tree into a FlatBuffer byte vector. /// /// Returns `Err` if the plan contains nodes that cannot be serialized (e.g. -/// unsupported expression types or plan nodes). +/// unsupported expression types or plan nodes) pub fn serialize_plan(plan: &Arc) -> Result, String> { let mut builder = FlatBufferBuilder::with_capacity(4096); let root = serialize_plan_node(&mut builder, plan)?; @@ -1385,70 +1385,3 @@ fn deserialize_gpu_sort_preserving_merge( Ok(Arc::new(GpuSortPreservingMergeExec::new(Arc::new(merge_exec)))) } - -#[cfg(test)] -mod tests { - use super::*; - - /// Verify that a serialized plan round-trips through FlatBuffer validation. - fn assert_valid_flatbuffer(bytes: &[u8]) { - let plan = flatbuffers::root::(bytes) - .expect("invalid FlatBuffer"); - assert!(plan.root().is_some(), "root PlanNode should be present"); - } - - /// Helper: build a plan from SQL and serialize it. - async fn serialize_query(sql: &str) -> Vec { - let data_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("../testdata/tpch.minimal"); - let ctx = crate::create_context_with_tables(&data_dir, 1, 2 * 1024 * 1024 * 1024) - .await - .unwrap(); - let plan = ctx - .sql(sql) - .await - .unwrap() - .create_physical_plan() - .await - .unwrap(); - serialize_plan(&plan).expect("serialization failed") - } - - #[tokio::test] - async fn test_serialize_filter_agg() { - let bytes = serialize_query( - "SELECT count(*) FROM customer WHERE c_acctbal > 0", - ).await; - assert_valid_flatbuffer(&bytes); - - let plan = flatbuffers::root::(&bytes).unwrap(); - let root = plan.root().unwrap(); - // Root should be an aggregate. - assert_eq!(root.node_type(), fb::PlanNodeKind::GpuAggregate); - } - - #[tokio::test] - async fn test_serialize_join_sort() { - let bytes = serialize_query( - "SELECT n.n_name, r.r_name \ - FROM nation n JOIN region r ON n.n_regionkey = r.r_regionkey \ - ORDER BY n.n_name", - ).await; - assert_valid_flatbuffer(&bytes); - - let plan = flatbuffers::root::(&bytes).unwrap(); - let root = plan.root().unwrap(); - assert_eq!(root.node_type(), fb::PlanNodeKind::GpuSort); - } - - #[tokio::test] - async fn test_serialize_group_join_sort() { - let bytes = serialize_query( - "SELECT r.r_name, count(*) AS nation_count \ - FROM nation n JOIN region r ON n.n_regionkey = r.r_regionkey \ - GROUP BY r.r_name \ - ORDER BY nation_count DESC, r.r_name", - ).await; - assert_valid_flatbuffer(&bytes); - } -} diff --git a/peacockdb-core/tests/test_cpu_executor.rs b/peacockdb-core/tests/test_cpu_executor.rs new file mode 100644 index 0000000..3325036 --- /dev/null +++ b/peacockdb-core/tests/test_cpu_executor.rs @@ -0,0 +1,290 @@ +use std::path::PathBuf; +use std::sync::Arc; + +use datafusion::arrow::array::Int64Array; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::arrow::util::pretty::pretty_format_batches; +use datafusion::physical_plan::ExecutionPlan; + +use peacockdb_core::cpu_executor::{ + execute_node_by_node, execute_node_by_node_instrumented, NodeMemoryStats, +}; +use peacockdb_core::{create_context_with_tables, build_session_state, register_tables_for}; + +fn testdata_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../testdata/tpch.sf1") +} + +fn queries_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../testdata/tpch-queries") +} + +fn has_gpu_node(plan: &Arc) -> bool { + plan.name().starts_with("Gpu") || plan.children().iter().any(|c| has_gpu_node(c)) +} + +fn all_node_names(plan: &Arc) -> Vec { + let mut names = vec![plan.name().to_string()]; + for child in plan.children() { + names.extend(all_node_names(child)); + } + names +} + +fn scan_batch_sizes(plan: &Arc) -> Vec { + use peacockdb_core::gpu_rule::GpuScanExec; + let mut sizes = vec![]; + if let Some(scan) = plan.as_any().downcast_ref::() { + sizes.push(scan.gpu_batch_size); + } + for child in plan.children() { + sizes.extend(scan_batch_sizes(child)); + } + sizes +} + +fn fmt_plan(plan: &Arc) -> String { + use datafusion::physical_plan::display::DisplayableExecutionPlan; + DisplayableExecutionPlan::new(plan.as_ref()) + .indent(true) + .to_string() +} + +async fn make_ctx(budget: usize) -> datafusion::execution::context::SessionContext { + create_context_with_tables(&testdata_dir(), 1, budget) + .await + .unwrap() +} + +const FULL_BUDGET: usize = 2 * 1024 * 1024 * 1024; +const TIGHT_BUDGET: usize = 10 * 1024; + +// +#[tokio::test] +async fn test_execution_strips_gpu_nodes() { + let ctx = make_ctx(FULL_BUDGET).await; + let plan = ctx + .sql("SELECT count(*) FROM nation WHERE n_regionkey >= 0") + .await + .unwrap() + .create_physical_plan() + .await + .unwrap(); + + assert!( + has_gpu_node(&plan), + "expected GPU nodes in plan, got: {:?}", + all_node_names(&plan) + ); + + let mut stats: Vec = vec![]; + execute_node_by_node_instrumented(plan, ctx.task_ctx(), &mut stats) + .await + .unwrap(); + + assert!(!stats.is_empty(), "no nodes were executed"); + let gpu_names: Vec<&str> = stats + .iter() + .filter(|s| s.node_name.starts_with("Gpu")) + .map(|s| s.node_name.as_str()) + .collect(); + assert!(gpu_names.is_empty(), "GPU nodes not stripped: {gpu_names:?}"); +} + +/// Render RecordBatches as a pretty table and sort the data rows so that +/// result comparison is order-independent (queries without ORDER BY may +/// return rows in any order depending on the executor path). +fn batches_to_sorted_str(batches: &[RecordBatch]) -> String { + let formatted = pretty_format_batches(batches).unwrap().to_string(); + let lines: Vec<&str> = formatted.lines().collect(); + // Layout: border / header / border / ...data rows... / border + if lines.len() > 4 { + let mut data = lines[3..lines.len() - 1].to_vec(); + data.sort_unstable(); + let mut out = lines[..3].to_vec(); + out.extend(data); + out.push(lines[lines.len() - 1]); + out.join("\n") + } else { + formatted + } +} + +/// Run `name.sql` through both plain DataFusion and the CPU executor, then +/// assert that the result sets are equal (order-independent). +async fn assert_cpu_results_match_datafusion(name: &str) { + let data_dir = testdata_dir(); + + let sql_path = queries_dir().join(format!("{name}.sql")); + let sql = std::fs::read_to_string(&sql_path) + .unwrap_or_else(|_| panic!("query file not found: {}", sql_path.display())); + let mut df_ctx = build_session_state(1); + df_ctx = register_tables_for(df_ctx, &data_dir).await.unwrap(); + // Ground truth: plain DataFusion without GPU rules. + let expected = df_ctx.sql(&sql).await.unwrap().collect().await.unwrap(); + + // CPU executor: GPU-annotated plan executed node-by-node on CPU. + let cpu_ctx = make_ctx(FULL_BUDGET).await; + let plan = cpu_ctx.sql(&sql).await.unwrap().create_physical_plan().await.unwrap(); + let actual = execute_node_by_node(plan, cpu_ctx.task_ctx(), &mut |_, _| {}) + .await + .unwrap(); + + assert_eq!( + batches_to_sorted_str(&actual), + batches_to_sorted_str(&expected), + "CPU executor result for '{name}' differs from plain DataFusion" + ); +} + +macro_rules! cpu_result_test { + ($func_name:ident, $query_name:literal) => { + #[tokio::test] + async fn $func_name() { + assert_cpu_results_match_datafusion($query_name).await; + } + }; +} + +cpu_result_test!(test_cpu_scan_limit, "scan-limit"); +cpu_result_test!(test_cpu_filter_project, "filter-project"); +cpu_result_test!(test_cpu_aggregate_groupby, "aggregate-groupby"); +cpu_result_test!(test_cpu_hash_join, "hash-join"); +cpu_result_test!(test_cpu_left_join, "left-join"); +cpu_result_test!(test_cpu_semi_join, "semi-join"); +cpu_result_test!(test_cpu_anti_join, "anti-join"); +cpu_result_test!(test_cpu_nested_loop_join, "nested-loop-join"); +cpu_result_test!(test_cpu_mixed_join, "mixed-join"); +cpu_result_test!(test_cpu_cross_join, "cross-join"); +cpu_result_test!(test_cpu_q1, "q1"); +cpu_result_test!(test_cpu_q2, "q2"); +cpu_result_test!(test_cpu_q3, "q3"); +cpu_result_test!(test_cpu_q4, "q4"); +cpu_result_test!(test_cpu_q5, "q5"); +cpu_result_test!(test_cpu_q6, "q6"); +cpu_result_test!(test_cpu_q7, "q7"); +cpu_result_test!(test_cpu_q8, "q8"); +cpu_result_test!(test_cpu_q9, "q9"); +cpu_result_test!(test_cpu_q10, "q10"); +cpu_result_test!(test_cpu_q11, "q11"); +cpu_result_test!(test_cpu_q12, "q12"); +cpu_result_test!(test_cpu_q13, "q13"); +cpu_result_test!(test_cpu_q14, "q14"); +// cpu_result_test!(test_cpu_q15, "q15"); // q15 uses a view; skip like test_queries.rs +cpu_result_test!(test_cpu_q16, "q16"); +cpu_result_test!(test_cpu_q17, "q17"); +cpu_result_test!(test_cpu_q18, "q18"); +cpu_result_test!(test_cpu_q19, "q19"); +cpu_result_test!(test_cpu_q20, "q20"); +cpu_result_test!(test_cpu_q21, "q21"); +cpu_result_test!(test_cpu_q22, "q22"); + +#[tokio::test] +async fn test_memory_boundary_preserved_tight_budget() { + let query = "SELECT count(*) FROM customer WHERE c_custkey > 0"; + + let ctx_full = make_ctx(FULL_BUDGET).await; + let plan_full = ctx_full.sql(query).await.unwrap() + .create_physical_plan().await.unwrap(); + + let ctx_tight = make_ctx(TIGHT_BUDGET).await; + let plan_tight = ctx_tight.sql(query).await.unwrap() + .create_physical_plan().await.unwrap(); + + eprintln!("\n=== FULL BUDGET ({} GiB) plan ===\n{}", FULL_BUDGET / (1024*1024*1024), fmt_plan(&plan_full)); + eprintln!("=== TIGHT BUDGET ({} KiB) plan ===\n{}", TIGHT_BUDGET / 1024, fmt_plan(&plan_tight)); + + let tight_scan_sizes = scan_batch_sizes(&plan_tight); + assert!( + !tight_scan_sizes.is_empty(), + "expected GpuScanExec in tight plan; node names: {:?}", + all_node_names(&plan_tight) + ); + let gpu_batch_size = *tight_scan_sizes.iter().max().unwrap(); + + let full_scan_sizes = scan_batch_sizes(&plan_full); + let full_batch_size = *full_scan_sizes.iter().max().unwrap(); + + eprintln!( + "GpuScanExec batch_size — full budget: {full_batch_size}, tight budget: {gpu_batch_size}" + ); + assert!( + gpu_batch_size < full_batch_size, + "tight budget batch_size ({gpu_batch_size}) should be smaller than full budget ({full_batch_size})" + ); + + let mut stats: Vec = vec![]; + let batches = + execute_node_by_node_instrumented(plan_tight, ctx_tight.task_ctx(), &mut stats) + .await + .unwrap(); + + let count = batches[0].column(0).as_any().downcast_ref::().unwrap().value(0); + assert_eq!(count, 150_000, "customer table must have 150 000 rows"); + + let scan_stats: Vec<&NodeMemoryStats> = stats + .iter() + .filter(|s| s.node_name == "ParquetExec") + .collect(); + assert!(!scan_stats.is_empty(), "expected ParquetExec in stats"); + + eprintln!("Per-node stats (post-order):"); + for s in &stats { + eprintln!( + " {}: rows={}, max_batch={}, alloc={}B, logical={}B", + s.node_name, s.row_count, s.max_batch_rows, s.allocated_bytes, s.logical_bytes + ); + } + + for s in &scan_stats { + assert!( + s.max_batch_rows <= gpu_batch_size, + "ParquetExec batch {} rows exceeds gpu_batch_size={}", + s.max_batch_rows, gpu_batch_size + ); + } + + let gpu_names: Vec<&str> = stats + .iter() + .filter(|s| s.node_name.starts_with("Gpu")) + .map(|s| s.node_name.as_str()) + .collect(); + assert!(gpu_names.is_empty(), "GPU nodes in stats: {gpu_names:?}"); +} + +#[tokio::test] +async fn test_instrumented_stats_are_populated() { + let ctx = make_ctx(FULL_BUDGET).await; + let plan = ctx + .sql("SELECT n_name, n_regionkey FROM nation WHERE n_regionkey = 1") + .await + .unwrap() + .create_physical_plan() + .await + .unwrap(); + + let mut stats: Vec = vec![]; + let batches = + execute_node_by_node_instrumented(plan, ctx.task_ctx(), &mut stats).await.unwrap(); + + let final_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + let root_stat = stats.last().unwrap(); + assert_eq!( + root_stat.row_count, final_rows, + "root node row_count in stats does not match actual output" + ); + assert!( + root_stat.allocated_bytes > 0, + "root node allocated_bytes should be > 0" + ); + assert!( + root_stat.logical_bytes > 0, + "root node logical_bytes should be > 0" + ); + assert!( + root_stat.allocated_bytes >= root_stat.logical_bytes, + "allocated_bytes ({}) must be >= logical_bytes ({})", + root_stat.allocated_bytes, + root_stat.logical_bytes + ); +} diff --git a/peacockdb-core/tests/test_plan_serialiser.rs b/peacockdb-core/tests/test_plan_serialiser.rs new file mode 100644 index 0000000..af03155 --- /dev/null +++ b/peacockdb-core/tests/test_plan_serialiser.rs @@ -0,0 +1,61 @@ +use std::path::PathBuf; + +use peacockdb_core::generated::gpu_plan_generated::peacock::plan as fb; +use peacockdb_core::plan_serializer::serialize_plan; + +fn assert_valid_flatbuffer(bytes: &[u8]) { + let plan = flatbuffers::root::(bytes).expect("invalid FlatBuffer"); + assert!(plan.root().is_some(), "root PlanNode should be present"); +} + +async fn serialize_query(sql: &str) -> Vec { + let data_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../testdata/tpch.minimal"); + let ctx = peacockdb_core::create_context_with_tables(&data_dir, 1, 2 * 1024 * 1024 * 1024) + .await + .unwrap(); + let plan = ctx + .sql(sql) + .await + .unwrap() + .create_physical_plan() + .await + .unwrap(); + serialize_plan(&plan).expect("serialization failed") +} + +#[tokio::test] +async fn test_serialize_filter_agg() { + let bytes = serialize_query("SELECT count(*) FROM customer WHERE c_acctbal > 0").await; + assert_valid_flatbuffer(&bytes); + + let plan = flatbuffers::root::(&bytes).unwrap(); + let root = plan.root().unwrap(); + assert_eq!(root.node_type(), fb::PlanNodeKind::GpuAggregate); +} + +#[tokio::test] +async fn test_serialize_join_sort() { + let bytes = serialize_query( + "SELECT n.n_name, r.r_name \ + FROM nation n JOIN region r ON n.n_regionkey = r.r_regionkey \ + ORDER BY n.n_name", + ) + .await; + assert_valid_flatbuffer(&bytes); + + let plan = flatbuffers::root::(&bytes).unwrap(); + let root = plan.root().unwrap(); + assert_eq!(root.node_type(), fb::PlanNodeKind::GpuSort); +} + +#[tokio::test] +async fn test_serialize_group_join_sort() { + let bytes = serialize_query( + "SELECT r.r_name, count(*) AS nation_count \ + FROM nation n JOIN region r ON n.n_regionkey = r.r_regionkey \ + GROUP BY r.r_name \ + ORDER BY nation_count DESC, r.r_name", + ) + .await; + assert_valid_flatbuffer(&bytes); +} diff --git a/peacockdb-core/tests/test_queries.rs b/peacockdb-core/tests/test_queries.rs index 5e50d20..1454a7c 100644 --- a/peacockdb-core/tests/test_queries.rs +++ b/peacockdb-core/tests/test_queries.rs @@ -1,36 +1,80 @@ //! Parameterized tests that canonize GPU execution plans for TPC-H queries. //! //! Each test reads a SQL file from `testdata/tpch-queries/.sql`, plans it -//! against the SF-1 dataset, and asserts the plan matches the canonical file in -//! `testdata/plans.sf1/.txt`. +//! against the SF-1 dataset, strips GPU wrappers, and compares the result against +//! the canonical plan stored in `tests/canondata/.txt`. //! -//! To generate or update canonical plans, run with: -//! UPDATE_CANONICAL=1 cargo test -p peacockdb-core --test test_queries +//! # Canonizing +//! +//! To write (or overwrite) canonical files from the current actual output, run with +//! `CANONIZE=1` or pass `-Z` to the test binary: +//! +//! CANONIZE=1 cargo test --test test_queries +//! +//! Each canonical file contains the normalized, GPU-stripped physical plan for one +//! TPC-H query. ParquetExec lines are normalized to `ParquetExec: table=` so +//! the files are path-independent. use std::path::PathBuf; use std::sync::Arc; +use datafusion::arrow::array::Int64Array; +use datafusion::execution::context::SessionContext; use datafusion::physical_plan::display::DisplayableExecutionPlan; use datafusion::physical_plan::ExecutionPlan; +use peacockdb_core::build_session_state_with_gpu_rules; +use peacockdb_core::register_tables_for; +use peacockdb_core::cpu_executor::strip_gpu_tree; use peacockdb_core::create_context_with_tables; +use peacockdb_core::gpu_rule::GpuScanExec; use peacockdb_core::plan_serializer; +use peacockdb_core::CpuExecutor; const TARGET_PARTITIONS: usize = 8; -const GPU_MEMORY_BUDGET: usize = 2 * 1024 * 1024 * 1024; // 2 GiB +const TEST_TARGET_PARTITIONS: usize = 8; +const TEST_GPU_MEMORY_BUDGET: usize = 2 * 1024 * 1024 * 1024; // 2 GiB fn testdata_dir() -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../testdata/tpch.sf1") } +fn testdata_minimal_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../testdata/tpch.minimal") +} + fn queries_dir() -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../testdata/tpch-queries") } -fn plans_dir() -> PathBuf { +fn canondata_dir() -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../testdata/plans.sf1") } +fn plans_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../testdata/plans") +} + +fn test_ctx(data_dir: &std::path::Path) -> impl std::future::Future> + '_ { + create_context_with_tables(data_dir, TEST_TARGET_PARTITIONS, TEST_GPU_MEMORY_BUDGET) +} + +async fn count(ctx: &SessionContext, query: &str) -> i64 { + let batches = ctx.sql(query).await.unwrap().collect().await.unwrap(); + batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap() + .value(0) +} + +/// Activated by: +/// - setting the `UPDATE_CANONICAL` environment variable (any value), e.g. `UPDATE_CANONICAL=1 cargo test` +fn is_canonize_mode() -> bool { + std::env::var("UPDATE_CANONICAL").is_ok() +} + /// Render the plan to a string, normalizing ParquetExec lines to be path-independent. fn plan_str(plan: &Arc) -> String { let raw = DisplayableExecutionPlan::new(plan.as_ref()) @@ -102,6 +146,34 @@ fn assert_plan_matches_canonical(plan: &Arc, name: &str) { } } +async fn compare_plans_with_query(name: &str, sql: &str) { + let data_dir = testdata_dir(); + let gpu_ctx = register_tables_for(build_session_state_with_gpu_rules(TARGET_PARTITIONS, TEST_GPU_MEMORY_BUDGET), &data_dir) + .await + .unwrap(); + + let gpu_plan = gpu_ctx.sql(sql).await.unwrap().create_physical_plan().await.unwrap(); + let actual = plan_str(&strip_gpu_tree(gpu_plan).unwrap()); + + let canon_path = canondata_dir().join(format!("{name}.txt")); + + if is_canonize_mode() { + std::fs::create_dir_all(canondata_dir()).unwrap(); + std::fs::write(&canon_path, &actual).unwrap(); + println!("canonized: {}", canon_path.display()); + return; + } + + let expected = std::fs::read_to_string(&canon_path) + .unwrap_or_else(|_| panic!( + "canonical file not found: {}. Run with UPDATE_CANONICAL=1 to create it.", + canon_path.display() + )); + let expected = expected.trim_end().to_string(); + + assert_eq!(actual, expected, "GPU-stripped plan does not match canonical for '{name}'"); +} + async fn run_query_test(name: &str) { let data_dir = testdata_dir(); if !data_dir.exists() { @@ -115,19 +187,7 @@ async fn run_query_test(name: &str) { let sql = std::fs::read_to_string(&sql_path) .unwrap_or_else(|_| panic!("query file not found: {}", sql_path.display())); - let ctx = create_context_with_tables(&data_dir, TARGET_PARTITIONS, GPU_MEMORY_BUDGET) - .await - .unwrap(); - - let plan = ctx - .sql(sql.trim()) - .await - .unwrap() - .create_physical_plan() - .await - .unwrap(); - - assert_plan_matches_canonical(&plan, name); + compare_plans_with_query(name, &sql).await; } macro_rules! query_test { @@ -149,3 +209,202 @@ query_test!(test_anti_join, "anti-join"); query_test!(test_nested_loop_join, "nested-loop-join"); query_test!(test_mixed_join, "mixed-join"); query_test!(test_cross_join, "cross-join"); + + +query_test!(tpch_q1, "q1"); +query_test!(tpch_q2, "q2"); +query_test!(tpch_q3, "q3"); +query_test!(tpch_q4, "q4"); +query_test!(tpch_q5, "q5"); +query_test!(tpch_q6, "q6"); +query_test!(tpch_q7, "q7"); +query_test!(tpch_q8, "q8"); +query_test!(tpch_q9, "q9"); +query_test!(tpch_q10, "q10"); +query_test!(tpch_q11, "q11"); +query_test!(tpch_q12, "q12"); +query_test!(tpch_q13, "q13"); +query_test!(tpch_q14, "q14"); +// query_test!(tpch_q15, "q15"); +query_test!(tpch_q16, "q16"); +query_test!(tpch_q17, "q17"); +query_test!(tpch_q18, "q18"); +query_test!(tpch_q19, "q19"); +query_test!(tpch_q20, "q20"); +query_test!(tpch_q21, "q21"); +query_test!(tpch_q22, "q22"); + +// ── CpuExecutor integration tests ──────────────────────────────────────── + +/// Full end-to-end example showing the idiomatic usage: +/// 1. CpuExecutor::new — builds a SessionContext with GPU rules +/// 2. exec.execute(sql) — SQL → GPU plan → CPU execution → RecordBatches +#[tokio::test] +async fn test_cpu_executor_simple_query() { + let exec = CpuExecutor::new(&testdata_minimal_dir(), 1, 2 * 1024 * 1024 * 1024) + .await + .unwrap(); + + let batches = exec + .execute("SELECT count(*) FROM nation WHERE n_regionkey >= 0") + .await + .unwrap(); + + let count = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap() + .value(0); + + assert_eq!(count, 25); +} + +/// execute_instrumented returns both results and per-node stats in one call. +#[tokio::test] +async fn test_cpu_executor_instrumented() { + let exec = CpuExecutor::new(&testdata_minimal_dir(), 1, 2 * 1024 * 1024 * 1024) + .await + .unwrap(); + + let (batches, stats) = exec + .execute_instrumented("SELECT count(*) FROM nation WHERE n_regionkey >= 0") + .await + .unwrap(); + + let count = batches[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap() + .value(0); + assert_eq!(count, 25); + + // Every stat entry must name a CPU node, never a GPU wrapper. + for s in &stats { + assert!( + !s.node_name.starts_with("Gpu"), + "GPU node '{}' leaked into stats", + s.node_name + ); + } + assert!(!stats.is_empty()); +} + +// ── Basic correctness ──────────────────────────────────────────────────── + +#[tokio::test] +async fn test_nation_row_count() { + let ctx = test_ctx(&testdata_minimal_dir()).await.unwrap(); + assert_eq!(count(&ctx, "SELECT count(*) FROM nation").await, 25); +} + +#[tokio::test] +async fn test_region_nation_join() { + let ctx = test_ctx(&testdata_minimal_dir()).await.unwrap(); + let n = count( + &ctx, + "SELECT count(*) FROM nation JOIN region ON nation.n_regionkey = region.r_regionkey", + ) + .await; + assert_eq!(n, 25); +} + +// ── GPU plan node tests ────────────────────────────────────────────────── + +/// Filter + aggregate: SELECT count(*) FROM customer WHERE c_acctbal > 0 +/// Expected GPU nodes: GpuAggregateExec (partial + final), GpuFilterExec +#[tokio::test] +async fn test_gpu_nodes_filter_agg() { + let ctx = test_ctx(&testdata_minimal_dir()).await.unwrap(); + let query = "SELECT count(*) FROM customer WHERE c_acctbal > 0"; + + let plan = ctx.sql(query).await.unwrap().create_physical_plan().await.unwrap(); + assert_plan_matches_canonical(&plan, "filter_agg"); + + let n = count(&ctx, query).await; + assert!(n > 0 && n <= 150_000, "unexpected count {n}"); +} + +/// Hash join + sort: nations joined with their region, sorted by name. +/// Expected GPU nodes: GpuSortExec, GpuHashJoinExec +#[tokio::test] +async fn test_gpu_nodes_join_sort() { + let ctx = test_ctx(&testdata_minimal_dir()).await.unwrap(); + let query = " + SELECT n.n_name, r.r_name + FROM nation n JOIN region r ON n.n_regionkey = r.r_regionkey + ORDER BY n.n_name"; + + let plan = ctx.sql(query).await.unwrap().create_physical_plan().await.unwrap(); + assert_plan_matches_canonical(&plan, "join_sort"); + + // Result: 25 rows (every nation has exactly one region) + let batches = ctx.sql(query).await.unwrap().collect().await.unwrap(); + let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + assert_eq!(total_rows, 25); +} + +/// Group by + join + sort: nations per region, sorted descending by count. +/// Expected GPU nodes: GpuSortExec, GpuAggregateExec, GpuHashJoinExec +#[tokio::test] +async fn test_gpu_nodes_group_join_sort() { + let ctx = test_ctx(&testdata_minimal_dir()).await.unwrap(); + let query = " + SELECT r.r_name, count(*) AS nation_count + FROM nation n JOIN region r ON n.n_regionkey = r.r_regionkey + GROUP BY r.r_name + ORDER BY nation_count DESC, r.r_name"; + + let plan = ctx.sql(query).await.unwrap().create_physical_plan().await.unwrap(); + assert_plan_matches_canonical(&plan, "group_join_sort"); + + // Result: 5 regions, each with exactly 5 nations. + let batches = ctx.sql(query).await.unwrap().collect().await.unwrap(); + let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + assert_eq!(total_rows, 5); + let counts = batches[0].column(1).as_any().downcast_ref::().unwrap(); + for i in 0..counts.len() { + assert_eq!(counts.value(i), 5, "region {} has {} nations, expected 5", i, counts.value(i)); + } +} + +// ── Memory budget tests ────────────────────────────────────────────────── + +/// Find all GpuScanExec nodes and return their batch sizes. +fn scan_batch_sizes(plan: &Arc) -> Vec { + let mut sizes = Vec::new(); + if let Some(scan) = plan.as_any().downcast_ref::() { + sizes.push(scan.gpu_batch_size); + } + for child in plan.children() { + sizes.extend(scan_batch_sizes(child)); + } + sizes +} + +/// With a tight GPU memory budget, the batch size should be reduced below +/// the default 8192. Results must still be correct. +#[tokio::test] +async fn test_memory_budget_reduces_batch_size() { + // 10 KiB budget → should force a very small batch size. + let ctx = + create_context_with_tables(&testdata_minimal_dir(), TEST_TARGET_PARTITIONS, 10 * 1024).await.unwrap(); + let query = " + SELECT n.n_name, r.r_name + FROM nation n JOIN region r ON n.n_regionkey = r.r_regionkey + ORDER BY n.n_name"; + + let plan = ctx.sql(query).await.unwrap().create_physical_plan().await.unwrap(); + let sizes = scan_batch_sizes(&plan); + assert!(!sizes.is_empty(), "expected GpuScanExec nodes in plan"); + for &bs in &sizes { + assert!(bs < 8192, "expected batch_size < 8192 with 10KiB budget, got {bs}"); + assert!(bs >= 1, "batch_size must be at least 1"); + } + + // Results must still be correct despite smaller batches. + let batches = ctx.sql(query).await.unwrap().collect().await.unwrap(); + let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + assert_eq!(total_rows, 25); +} diff --git a/peacockdb/src/main.rs b/peacockdb/src/main.rs index 444795c..34f09c2 100644 --- a/peacockdb/src/main.rs +++ b/peacockdb/src/main.rs @@ -32,7 +32,7 @@ async fn main() -> anyhow::Result<()> { let ctx = create_context_with_tables(&cli.data_dir, target_partitions, cli.gpu_memory_budget).await?; let df = ctx.sql(&cli.query).await?; let batches = df.collect().await?; - print_batches(&batches)?; + //print_batches(&batches)?; Ok(()) } diff --git a/testdata/plans.sf1/aggregate-groupby.txt b/testdata/plans.sf1/aggregate-groupby.txt index 3db3962..849a1a4 100644 --- a/testdata/plans.sf1/aggregate-groupby.txt +++ b/testdata/plans.sf1/aggregate-groupby.txt @@ -1,5 +1,5 @@ -GpuAggregateExec: group_by=[l_returnflag], aggr=[sum(lineitem.l_quantity)] - GpuCoalesceBatchesExec: target_batch_size=22369621 - GpuRepartitionExec: partitioning=Hash([l_returnflag@0], 8), input_partitions=8 - GpuAggregateExec: group_by=[l_returnflag], aggr=[sum(lineitem.l_quantity)] - GpuScanExec: batch_size=22369621 \ No newline at end of file +AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=22369621 + RepartitionExec: partitioning=Hash([l_returnflag@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[l_returnflag@1 as l_returnflag], aggr=[sum(lineitem.l_quantity)] + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/anti-join.txt b/testdata/plans.sf1/anti-join.txt index 11f6228..0ff0477 100644 --- a/testdata/plans.sf1/anti-join.txt +++ b/testdata/plans.sf1/anti-join.txt @@ -1,10 +1,10 @@ -GpuCoalesceBatchesExec: target_batch_size=6242685 - GpuHashJoinExec - GpuCoalesceBatchesExec: target_batch_size=6242685 - GpuRepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 - GpuCoalesceBatchesExec: target_batch_size=6242685 - GpuFilterExec - GpuScanExec: batch_size=6242685 - GpuCoalesceBatchesExec: target_batch_size=6242685 - GpuRepartitionExec: partitioning=Hash([o_custkey@1], 8), input_partitions=8 - GpuScanExec: batch_size=6242685 \ No newline at end of file +CoalesceBatchesExec: target_batch_size=6242685 + HashJoinExec: mode=Partitioned, join_type=RightAnti, on=[(c_custkey@0, o_custkey@1)] + CoalesceBatchesExec: target_batch_size=6242685 + RepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=6242685 + FilterExec: c_mktsegment@1 = BUILDING, projection=[c_custkey@0] + ParquetExec: table=customer + CoalesceBatchesExec: target_batch_size=6242685 + RepartitionExec: partitioning=Hash([o_custkey@1], 8), input_partitions=8 + ParquetExec: table=orders \ No newline at end of file diff --git a/testdata/plans.sf1/cross-join.txt b/testdata/plans.sf1/cross-join.txt index 3dcb46f..8056ea6 100644 --- a/testdata/plans.sf1/cross-join.txt +++ b/testdata/plans.sf1/cross-join.txt @@ -1,3 +1,3 @@ CrossJoinExec - GpuScanExec: batch_size=29826161 - GpuScanExec: batch_size=29826161 \ No newline at end of file + ParquetExec: table=region + ParquetExec: table=nation \ No newline at end of file diff --git a/testdata/plans.sf1/filter-project.txt b/testdata/plans.sf1/filter-project.txt index ad0242a..a522b0c 100644 --- a/testdata/plans.sf1/filter-project.txt +++ b/testdata/plans.sf1/filter-project.txt @@ -1,3 +1,3 @@ -GpuCoalesceBatchesExec: target_batch_size=44739242 - GpuFilterExec - GpuScanExec: batch_size=44739242 \ No newline at end of file +CoalesceBatchesExec: target_batch_size=44739242 + FilterExec: l_quantity@1 > Some(3000),15,2 + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/hash-join.txt b/testdata/plans.sf1/hash-join.txt index 8d07f98..6b088f5 100644 --- a/testdata/plans.sf1/hash-join.txt +++ b/testdata/plans.sf1/hash-join.txt @@ -1,8 +1,8 @@ -GpuCoalesceBatchesExec: target_batch_size=2462710 - GpuHashJoinExec - GpuCoalesceBatchesExec: target_batch_size=2462710 - GpuRepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 - GpuScanExec: batch_size=2462710 - GpuCoalesceBatchesExec: target_batch_size=2462710 - GpuRepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 - GpuScanExec: batch_size=2462710 \ No newline at end of file +CoalesceBatchesExec: target_batch_size=2462710 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)] + CoalesceBatchesExec: target_batch_size=2462710 + RepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 + ParquetExec: table=orders + CoalesceBatchesExec: target_batch_size=2462710 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/left-join.txt b/testdata/plans.sf1/left-join.txt index 8d07f98..1cc664f 100644 --- a/testdata/plans.sf1/left-join.txt +++ b/testdata/plans.sf1/left-join.txt @@ -1,8 +1,8 @@ -GpuCoalesceBatchesExec: target_batch_size=2462710 - GpuHashJoinExec - GpuCoalesceBatchesExec: target_batch_size=2462710 - GpuRepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 - GpuScanExec: batch_size=2462710 - GpuCoalesceBatchesExec: target_batch_size=2462710 - GpuRepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 - GpuScanExec: batch_size=2462710 \ No newline at end of file +CoalesceBatchesExec: target_batch_size=2462710 + HashJoinExec: mode=Partitioned, join_type=Left, on=[(o_orderkey@0, l_orderkey@0)] + CoalesceBatchesExec: target_batch_size=2462710 + RepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 + ParquetExec: table=orders + CoalesceBatchesExec: target_batch_size=2462710 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/mixed-join.txt b/testdata/plans.sf1/mixed-join.txt index 8d07f98..ad172e6 100644 --- a/testdata/plans.sf1/mixed-join.txt +++ b/testdata/plans.sf1/mixed-join.txt @@ -1,8 +1,8 @@ -GpuCoalesceBatchesExec: target_batch_size=2462710 - GpuHashJoinExec - GpuCoalesceBatchesExec: target_batch_size=2462710 - GpuRepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 - GpuScanExec: batch_size=2462710 - GpuCoalesceBatchesExec: target_batch_size=2462710 - GpuRepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 - GpuScanExec: batch_size=2462710 \ No newline at end of file +CoalesceBatchesExec: target_batch_size=2462710 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], filter=l_shipdate@1 >= o_orderdate@0 AND l_shipdate@1 <= o_orderdate@0 + IntervalMonthDayNano { months: 0, days: 90, nanoseconds: 0 } + CoalesceBatchesExec: target_batch_size=2462710 + RepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 + ParquetExec: table=orders + CoalesceBatchesExec: target_batch_size=2462710 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/nested-loop-join.txt b/testdata/plans.sf1/nested-loop-join.txt index 2816568..0267b2f 100644 --- a/testdata/plans.sf1/nested-loop-join.txt +++ b/testdata/plans.sf1/nested-loop-join.txt @@ -1,3 +1,3 @@ NestedLoopJoinExec: join_type=Inner, filter=n_regionkey@1 > r_regionkey@0 - GpuScanExec: batch_size=29826161 - GpuScanExec: batch_size=29826161 \ No newline at end of file + ParquetExec: table=region + ParquetExec: table=nation \ No newline at end of file diff --git a/testdata/plans.sf1/q1.txt b/testdata/plans.sf1/q1.txt new file mode 100644 index 0000000..6745256 --- /dev/null +++ b/testdata/plans.sf1/q1.txt @@ -0,0 +1,11 @@ +SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST, l_linestatus@1 ASC NULLS LAST] + SortExec: expr=[l_returnflag@0 ASC NULLS LAST, l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] + AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + CoalesceBatchesExec: target_batch_size=5478274 + RepartitionExec: partitioning=Hash([l_returnflag@0, l_linestatus@1], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_1, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] + CoalesceBatchesExec: target_batch_size=5478274 + FilterExec: l_shipdate@6 <= 1998-09-02, projection=[l_quantity@0, l_extendedprice@1, l_discount@2, l_tax@3, l_returnflag@4, l_linestatus@5] + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q10.txt b/testdata/plans.sf1/q10.txt new file mode 100644 index 0000000..6decc90 --- /dev/null +++ b/testdata/plans.sf1/q10.txt @@ -0,0 +1,34 @@ +SortPreservingMergeExec: [revenue@2 DESC], fetch=20 + SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] + AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=4880644 + RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=4880644 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=4880644 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=4880644 + RepartitionExec: partitioning=Hash([c_nationkey@3], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=4880644 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=4880644 + RepartitionExec: partitioning=Hash([o_orderkey@7], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=4880644 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] + CoalesceBatchesExec: target_batch_size=4880644 + RepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 + ParquetExec: table=customer + CoalesceBatchesExec: target_batch_size=4880644 + RepartitionExec: partitioning=Hash([o_custkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=4880644 + FilterExec: o_orderdate@2 >= 1993-10-01 AND o_orderdate@2 < 1994-01-01, projection=[o_orderkey@0, o_custkey@1] + ParquetExec: table=orders + CoalesceBatchesExec: target_batch_size=4880644 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=4880644 + FilterExec: l_returnflag@3 = R, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2] + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q11.txt b/testdata/plans.sf1/q11.txt new file mode 100644 index 0000000..0438af3 --- /dev/null +++ b/testdata/plans.sf1/q11.txt @@ -0,0 +1,52 @@ +SortPreservingMergeExec: [value@1 DESC] + SortExec: expr=[value@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value] + NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.000002)@1, projection=[ps_partkey@1, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2] + ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.000002 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.000002)] + AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=24403223 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=24403223 + FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([s_nationkey@2], 8), input_partitions=8 + ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=24403223 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([s_suppkey@0], 8), input_partitions=8 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=supplier + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([ps_suppkey@0], 8), input_partitions=8 + ParquetExec: table=partsupp + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([ps_partkey@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=24403223 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=24403223 + FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([s_nationkey@3], 8), input_partitions=8 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=24403223 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([s_suppkey@0], 8), input_partitions=8 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=supplier + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([ps_suppkey@1], 8), input_partitions=8 + ParquetExec: table=partsupp \ No newline at end of file diff --git a/testdata/plans.sf1/q12.txt b/testdata/plans.sf1/q12.txt new file mode 100644 index 0000000..bdce9c3 --- /dev/null +++ b/testdata/plans.sf1/q12.txt @@ -0,0 +1,18 @@ +SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST] + SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] + AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([l_shipmode@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] + CoalesceBatchesExec: target_batch_size=14913080 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14913080 + FilterExec: (l_shipmode@4 = MAIL OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1994-01-01 AND l_receiptdate@3 < 1995-01-01, projection=[l_orderkey@0, l_shipmode@4] + ParquetExec: table=lineitem + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 + ParquetExec: table=orders \ No newline at end of file diff --git a/testdata/plans.sf1/q13.txt b/testdata/plans.sf1/q13.txt new file mode 100644 index 0000000..44163b2 --- /dev/null +++ b/testdata/plans.sf1/q13.txt @@ -0,0 +1,19 @@ +SortPreservingMergeExec: [custdist@1 DESC, c_count@0 DESC] + SortExec: expr=[custdist@1 DESC, c_count@0 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] + AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=33554432 + RepartitionExec: partitioning=Hash([c_count@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)] + ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] + AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] + CoalesceBatchesExec: target_batch_size=33554432 + HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] + CoalesceBatchesExec: target_batch_size=33554432 + RepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 + ParquetExec: table=customer + CoalesceBatchesExec: target_batch_size=33554432 + RepartitionExec: partitioning=Hash([o_custkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=33554432 + FilterExec: o_comment@2 NOT LIKE %special%requests%, projection=[o_orderkey@0, o_custkey@1] + ParquetExec: table=orders \ No newline at end of file diff --git a/testdata/plans.sf1/q14.txt b/testdata/plans.sf1/q14.txt new file mode 100644 index 0000000..a12159a --- /dev/null +++ b/testdata/plans.sf1/q14.txt @@ -0,0 +1,16 @@ +ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_1, p_type@0 as p_type] + CoalesceBatchesExec: target_batch_size=14913080 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([p_partkey@0], 8), input_partitions=8 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=part + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([l_partkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14913080 + FilterExec: l_shipdate@3 >= 1995-09-01 AND l_shipdate@3 < 1995-10-01, projection=[l_partkey@0, l_extendedprice@1, l_discount@2] + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q16.txt b/testdata/plans.sf1/q16.txt new file mode 100644 index 0000000..6de953f --- /dev/null +++ b/testdata/plans.sf1/q16.txt @@ -0,0 +1,33 @@ +SortPreservingMergeExec: [supplier_cnt@3 DESC, p_brand@0 ASC NULLS LAST, p_type@1 ASC NULLS LAST, p_size@2 ASC NULLS LAST] + SortExec: expr=[supplier_cnt@3 DESC, p_brand@0 ASC NULLS LAST, p_type@1 ASC NULLS LAST, p_size@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] + AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] + CoalesceBatchesExec: target_batch_size=12782640 + RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] + AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[] + CoalesceBatchesExec: target_batch_size=12782640 + RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2, alias1@3], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] + CoalesceBatchesExec: target_batch_size=12782640 + HashJoinExec: mode=Partitioned, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] + CoalesceBatchesExec: target_batch_size=12782640 + RepartitionExec: partitioning=Hash([s_suppkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=12782640 + FilterExec: s_comment@1 LIKE %Customer%Complaints%, projection=[s_suppkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=supplier + CoalesceBatchesExec: target_batch_size=12782640 + RepartitionExec: partitioning=Hash([ps_suppkey@0], 8), input_partitions=8 + ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] + CoalesceBatchesExec: target_batch_size=12782640 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] + CoalesceBatchesExec: target_batch_size=12782640 + RepartitionExec: partitioning=Hash([p_partkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=12782640 + FilterExec: p_brand@1 != Brand#45 AND p_type@2 NOT LIKE MEDIUM POLISHED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(49) }, Literal { value: Int32(14) }, Literal { value: Int32(23) }, Literal { value: Int32(45) }, Literal { value: Int32(19) }, Literal { value: Int32(3) }, Literal { value: Int32(36) }, Literal { value: Int32(9) }]) + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=part + CoalesceBatchesExec: target_batch_size=12782640 + RepartitionExec: partitioning=Hash([ps_partkey@0], 8), input_partitions=8 + ParquetExec: table=partsupp \ No newline at end of file diff --git a/testdata/plans.sf1/q17.txt b/testdata/plans.sf1/q17.txt new file mode 100644 index 0000000..17b0084 --- /dev/null +++ b/testdata/plans.sf1/q17.txt @@ -0,0 +1,24 @@ +ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalesceBatchesExec: target_batch_size=24403223 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=24403223 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([p_partkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=24403223 + FilterExec: p_brand@1 = Brand#23 AND p_container@2 = MED BOX, projection=[p_partkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=part + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([l_partkey@0], 8), input_partitions=8 + ParquetExec: table=lineitem + ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=24403223 + RepartitionExec: partitioning=Hash([l_partkey@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q18.txt b/testdata/plans.sf1/q18.txt new file mode 100644 index 0000000..aac20a9 --- /dev/null +++ b/testdata/plans.sf1/q18.txt @@ -0,0 +1,30 @@ +SortPreservingMergeExec: [o_totalprice@4 DESC, o_orderdate@3 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC, o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] + AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=12201611 + RepartitionExec: partitioning=Hash([c_name@0, c_custkey@1, o_orderkey@2, o_orderdate@3, o_totalprice@4], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=12201611 + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] + CoalesceBatchesExec: target_batch_size=12201611 + FilterExec: sum(lineitem.l_quantity)@1 > Some(30000),25,2, projection=[l_orderkey@0] + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=12201611 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + ParquetExec: table=lineitem + CoalesceBatchesExec: target_batch_size=12201611 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] + CoalesceBatchesExec: target_batch_size=12201611 + RepartitionExec: partitioning=Hash([o_orderkey@2], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=12201611 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=12201611 + RepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 + ParquetExec: table=customer + CoalesceBatchesExec: target_batch_size=12201611 + RepartitionExec: partitioning=Hash([o_custkey@1], 8), input_partitions=8 + ParquetExec: table=orders + CoalesceBatchesExec: target_batch_size=12201611 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q19.txt b/testdata/plans.sf1/q19.txt new file mode 100644 index 0000000..c837c5c --- /dev/null +++ b/testdata/plans.sf1/q19.txt @@ -0,0 +1,17 @@ +ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=12201611 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#12 AND p_container@3 IN ([Literal { value: Utf8View("SM CASE") }, Literal { value: Utf8View("SM BOX") }, Literal { value: Utf8View("SM PACK") }, Literal { value: Utf8View("SM PKG") }]) AND l_quantity@0 >= Some(100),15,2 AND l_quantity@0 <= Some(1100),15,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND p_container@3 IN ([Literal { value: Utf8View("MED BAG") }, Literal { value: Utf8View("MED BOX") }, Literal { value: Utf8View("MED PKG") }, Literal { value: Utf8View("MED PACK") }]) AND l_quantity@0 >= Some(1000),15,2 AND l_quantity@0 <= Some(2000),15,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND p_container@3 IN ([Literal { value: Utf8View("LG CASE") }, Literal { value: Utf8View("LG BOX") }, Literal { value: Utf8View("LG PACK") }, Literal { value: Utf8View("LG PKG") }]) AND l_quantity@0 >= Some(2000),15,2 AND l_quantity@0 <= Some(3000),15,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] + CoalesceBatchesExec: target_batch_size=12201611 + RepartitionExec: partitioning=Hash([p_partkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=12201611 + FilterExec: (p_brand@1 = Brand#12 AND p_container@3 IN ([Literal { value: Utf8View("SM CASE") }, Literal { value: Utf8View("SM BOX") }, Literal { value: Utf8View("SM PACK") }, Literal { value: Utf8View("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND p_container@3 IN ([Literal { value: Utf8View("MED BAG") }, Literal { value: Utf8View("MED BOX") }, Literal { value: Utf8View("MED PKG") }, Literal { value: Utf8View("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND p_container@3 IN ([Literal { value: Utf8View("LG CASE") }, Literal { value: Utf8View("LG BOX") }, Literal { value: Utf8View("LG PACK") }, Literal { value: Utf8View("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=part + CoalesceBatchesExec: target_batch_size=12201611 + RepartitionExec: partitioning=Hash([l_partkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=12201611 + FilterExec: (l_quantity@1 >= Some(100),15,2 AND l_quantity@1 <= Some(1100),15,2 OR l_quantity@1 >= Some(1000),15,2 AND l_quantity@1 <= Some(2000),15,2 OR l_quantity@1 >= Some(2000),15,2 AND l_quantity@1 <= Some(3000),15,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON, projection=[l_partkey@0, l_quantity@1, l_extendedprice@2, l_discount@3] + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q2.txt b/testdata/plans.sf1/q2.txt new file mode 100644 index 0000000..8a63b3d --- /dev/null +++ b/testdata/plans.sf1/q2.txt @@ -0,0 +1,80 @@ +SortPreservingMergeExec: [s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] + CoalesceBatchesExec: target_batch_size=4473924 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=4473924 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([r_regionkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=4473924 + FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=region + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([n_regionkey@9], 8), input_partitions=8 + ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=4473924 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([s_nationkey@4], 8), input_partitions=8 + ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=4473924 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([s_suppkey@0], 8), input_partitions=8 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=supplier + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([ps_suppkey@2], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=4473924 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([p_partkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=4473924 + FilterExec: p_size@3 = 15 AND p_type@2 LIKE %BRASS, projection=[p_partkey@0, p_mfgr@1] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=part + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([ps_partkey@0], 8), input_partitions=8 + ParquetExec: table=partsupp + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 8), input_partitions=8 + ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([ps_partkey@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=4473924 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([r_regionkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=4473924 + FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=region + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([n_regionkey@2], 8), input_partitions=8 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=4473924 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([s_nationkey@2], 8), input_partitions=8 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=4473924 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([s_suppkey@0], 8), input_partitions=8 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=supplier + CoalesceBatchesExec: target_batch_size=4473924 + RepartitionExec: partitioning=Hash([ps_suppkey@1], 8), input_partitions=8 + ParquetExec: table=partsupp \ No newline at end of file diff --git a/testdata/plans.sf1/q20.txt b/testdata/plans.sf1/q20.txt new file mode 100644 index 0000000..34f778e --- /dev/null +++ b/testdata/plans.sf1/q20.txt @@ -0,0 +1,43 @@ +SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] + SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + CoalesceBatchesExec: target_batch_size=14128181 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([s_suppkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14128181 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14128181 + FilterExec: n_name@1 = CANADA, projection=[n_nationkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([s_nationkey@3], 8), input_partitions=8 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=supplier + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([ps_suppkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14128181 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14128181 + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([p_partkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14128181 + FilterExec: p_name@1 LIKE forest%, projection=[p_partkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=part + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([ps_partkey@0], 8), input_partitions=8 + ParquetExec: table=partsupp + ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=14128181 + FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01, projection=[l_partkey@0, l_suppkey@1, l_quantity@2] + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q21.txt b/testdata/plans.sf1/q21.txt new file mode 100644 index 0000000..d429026 --- /dev/null +++ b/testdata/plans.sf1/q21.txt @@ -0,0 +1,51 @@ +SortPreservingMergeExec: [numwait@1 DESC, s_name@0 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[numwait@1 DESC, s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] + AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([s_name@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=19173961 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] + CoalesceBatchesExec: target_batch_size=19173961 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([l_orderkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=19173961 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=19173961 + FilterExec: n_name@1 = SAUDI ARABIA, projection=[n_nationkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([s_nationkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=19173961 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=19173961 + FilterExec: o_orderstatus@1 = F, projection=[o_orderkey@0] + ParquetExec: table=orders + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([l_orderkey@2], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=19173961 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([s_suppkey@0], 8), input_partitions=8 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=supplier + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([l_suppkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=19173961 + FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1] + ParquetExec: table=lineitem + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + ParquetExec: table=lineitem + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=19173961 + FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1] + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q22.txt b/testdata/plans.sf1/q22.txt new file mode 100644 index 0000000..4e7afec --- /dev/null +++ b/testdata/plans.sf1/q22.txt @@ -0,0 +1,25 @@ +SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] + SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] + AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([cntrycode@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal] + NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(19, 6)) > avg(customer.c_acctbal)@1, projection=[c_phone@1, c_acctbal@2] + AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)] + CoalesceBatchesExec: target_batch_size=19173961 + FilterExec: c_acctbal@1 > Some(0),15,2 AND substr(c_phone@0, 1, 2) IN ([Literal { value: Utf8View("13") }, Literal { value: Utf8View("31") }, Literal { value: Utf8View("23") }, Literal { value: Utf8View("29") }, Literal { value: Utf8View("30") }, Literal { value: Utf8View("18") }, Literal { value: Utf8View("17") }]), projection=[c_acctbal@1] + ParquetExec: table=customer + CoalesceBatchesExec: target_batch_size=19173961 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=19173961 + FilterExec: substr(c_phone@1, 1, 2) IN ([Literal { value: Utf8View("13") }, Literal { value: Utf8View("31") }, Literal { value: Utf8View("23") }, Literal { value: Utf8View("29") }, Literal { value: Utf8View("30") }, Literal { value: Utf8View("18") }, Literal { value: Utf8View("17") }]) + ParquetExec: table=customer + CoalesceBatchesExec: target_batch_size=19173961 + RepartitionExec: partitioning=Hash([o_custkey@0], 8), input_partitions=8 + ParquetExec: table=orders \ No newline at end of file diff --git a/testdata/plans.sf1/q3.txt b/testdata/plans.sf1/q3.txt new file mode 100644 index 0000000..cca835f --- /dev/null +++ b/testdata/plans.sf1/q3.txt @@ -0,0 +1,28 @@ +SortPreservingMergeExec: [revenue@1 DESC, o_orderdate@2 ASC NULLS LAST], fetch=10 + SortExec: TopK(fetch=10), expr=[revenue@1 DESC, o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=20648881 + RepartitionExec: partitioning=Hash([l_orderkey@0, o_orderdate@1, o_shippriority@2], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=20648881 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=20648881 + RepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=20648881 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] + CoalesceBatchesExec: target_batch_size=20648881 + RepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=20648881 + FilterExec: c_mktsegment@1 = BUILDING, projection=[c_custkey@0] + ParquetExec: table=customer + CoalesceBatchesExec: target_batch_size=20648881 + RepartitionExec: partitioning=Hash([o_custkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=20648881 + FilterExec: o_orderdate@2 < 1995-03-15 + ParquetExec: table=orders + CoalesceBatchesExec: target_batch_size=20648881 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=20648881 + FilterExec: l_shipdate@3 > 1995-03-15, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2] + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q4.txt b/testdata/plans.sf1/q4.txt new file mode 100644 index 0000000..f5efefe --- /dev/null +++ b/testdata/plans.sf1/q4.txt @@ -0,0 +1,19 @@ +SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] + SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] + AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=25565281 + RepartitionExec: partitioning=Hash([o_orderpriority@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=25565281 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] + CoalesceBatchesExec: target_batch_size=25565281 + RepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=25565281 + FilterExec: o_orderdate@1 >= 1993-07-01 AND o_orderdate@1 < 1993-10-01, projection=[o_orderkey@0, o_orderpriority@2] + ParquetExec: table=orders + CoalesceBatchesExec: target_batch_size=25565281 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=25565281 + FilterExec: l_receiptdate@2 > l_commitdate@1, projection=[l_orderkey@0] + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q5.txt b/testdata/plans.sf1/q5.txt new file mode 100644 index 0000000..c1719be --- /dev/null +++ b/testdata/plans.sf1/q5.txt @@ -0,0 +1,51 @@ +SortPreservingMergeExec: [revenue@1 DESC] + SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([n_name@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=14913080 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([r_regionkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14913080 + FilterExec: r_name@1 = ASIA, projection=[r_regionkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=region + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([n_regionkey@3], 8), input_partitions=8 + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=14913080 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([s_nationkey@2], 8), input_partitions=8 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=14913080 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 8), input_partitions=8 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=supplier + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14913080 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([o_orderkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14913080 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 + ParquetExec: table=customer + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([o_custkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14913080 + FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01, projection=[o_orderkey@0, o_custkey@1] + ParquetExec: table=orders + CoalesceBatchesExec: target_batch_size=14913080 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q6.txt b/testdata/plans.sf1/q6.txt new file mode 100644 index 0000000..246972d --- /dev/null +++ b/testdata/plans.sf1/q6.txt @@ -0,0 +1,7 @@ +ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=25565281 + FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(5),15,2 AND l_discount@2 <= Some(7),15,2 AND l_quantity@0 < Some(2400),15,2, projection=[l_extendedprice@1, l_discount@2] + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q7.txt b/testdata/plans.sf1/q7.txt new file mode 100644 index 0000000..05cc1f1 --- /dev/null +++ b/testdata/plans.sf1/q7.txt @@ -0,0 +1,55 @@ +SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST, cust_nation@1 ASC NULLS LAST, l_year@2 ASC NULLS LAST] + SortExec: expr=[supp_nation@0 ASC NULLS LAST, cust_nation@1 ASC NULLS LAST, l_year@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([supp_nation@0, cust_nation@1, l_year@2], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] + ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] + CoalesceBatchesExec: target_batch_size=10324440 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = FRANCE AND n_name@1 = GERMANY OR n_name@0 = GERMANY AND n_name@1 = FRANCE, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=10324440 + FilterExec: n_name@1 = GERMANY OR n_name@1 = FRANCE + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([c_nationkey@3], 8), input_partitions=8 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=10324440 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=10324440 + FilterExec: n_name@1 = FRANCE OR n_name@1 = GERMANY + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([s_nationkey@0], 8), input_partitions=8 + ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=10324440 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 + ParquetExec: table=customer + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([o_custkey@4], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=10324440 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([l_orderkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=10324440 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([s_suppkey@0], 8), input_partitions=8 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=supplier + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([l_suppkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=10324440 + FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 + ParquetExec: table=lineitem + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 + ParquetExec: table=orders \ No newline at end of file diff --git a/testdata/plans.sf1/q8.txt b/testdata/plans.sf1/q8.txt new file mode 100644 index 0000000..7fa12ab --- /dev/null +++ b/testdata/plans.sf1/q8.txt @@ -0,0 +1,72 @@ +SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] + SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] + AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([o_year@0], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] + CoalesceBatchesExec: target_batch_size=14128181 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([r_regionkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14128181 + FilterExec: r_name@1 = AMERICA, projection=[r_regionkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=region + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([n_regionkey@3], 8), input_partitions=8 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=14128181 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([s_nationkey@2], 8), input_partitions=8 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=14128181 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([c_nationkey@4], 8), input_partitions=8 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=14128181 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 + ParquetExec: table=customer + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([o_custkey@3], 8), input_partitions=8 + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] + CoalesceBatchesExec: target_batch_size=14128181 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14128181 + FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 + ParquetExec: table=orders + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=14128181 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([s_suppkey@0], 8), input_partitions=8 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=supplier + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([l_suppkey@1], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14128181 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([p_partkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=14128181 + FilterExec: p_type@1 = ECONOMY ANODIZED STEEL, projection=[p_partkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=part + CoalesceBatchesExec: target_batch_size=14128181 + RepartitionExec: partitioning=Hash([l_partkey@1], 8), input_partitions=8 + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/q9.txt b/testdata/plans.sf1/q9.txt new file mode 100644 index 0000000..15218ad --- /dev/null +++ b/testdata/plans.sf1/q9.txt @@ -0,0 +1,51 @@ +SortPreservingMergeExec: [nation@0 ASC NULLS LAST, o_year@1 DESC] + SortExec: expr=[nation@0 ASC NULLS LAST, o_year@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] + AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([nation@0, o_year@1], 8), input_partitions=8 + AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] + CoalesceBatchesExec: target_batch_size=10324440 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([n_nationkey@0], 8), input_partitions=1 + ParquetExec: table=nation + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([s_nationkey@3], 8), input_partitions=8 + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] + CoalesceBatchesExec: target_batch_size=10324440 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([o_orderkey@0], 8), input_partitions=8 + ParquetExec: table=orders + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([l_orderkey@0], 8), input_partitions=8 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=10324440 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 8), input_partitions=8 + ParquetExec: table=partsupp + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 8), input_partitions=8 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=10324440 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([s_suppkey@0], 8), input_partitions=8 + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=supplier + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([l_suppkey@2], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=10324440 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([p_partkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=10324440 + FilterExec: p_name@1 LIKE %green%, projection=[p_partkey@0] + RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 + ParquetExec: table=part + CoalesceBatchesExec: target_batch_size=10324440 + RepartitionExec: partitioning=Hash([l_partkey@1], 8), input_partitions=8 + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/scan-limit.txt b/testdata/plans.sf1/scan-limit.txt index 991c094..1c8ed92 100644 --- a/testdata/plans.sf1/scan-limit.txt +++ b/testdata/plans.sf1/scan-limit.txt @@ -1,3 +1,3 @@ GlobalLimitExec: skip=0, fetch=10 - GpuCoalescePartitionsExec - GpuScanExec: batch_size=8012998 \ No newline at end of file + CoalescePartitionsExec + ParquetExec: table=lineitem \ No newline at end of file diff --git a/testdata/plans.sf1/semi-join.txt b/testdata/plans.sf1/semi-join.txt index 11f6228..3841131 100644 --- a/testdata/plans.sf1/semi-join.txt +++ b/testdata/plans.sf1/semi-join.txt @@ -1,10 +1,10 @@ -GpuCoalesceBatchesExec: target_batch_size=6242685 - GpuHashJoinExec - GpuCoalesceBatchesExec: target_batch_size=6242685 - GpuRepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 - GpuCoalesceBatchesExec: target_batch_size=6242685 - GpuFilterExec - GpuScanExec: batch_size=6242685 - GpuCoalesceBatchesExec: target_batch_size=6242685 - GpuRepartitionExec: partitioning=Hash([o_custkey@1], 8), input_partitions=8 - GpuScanExec: batch_size=6242685 \ No newline at end of file +CoalesceBatchesExec: target_batch_size=6242685 + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(c_custkey@0, o_custkey@1)] + CoalesceBatchesExec: target_batch_size=6242685 + RepartitionExec: partitioning=Hash([c_custkey@0], 8), input_partitions=8 + CoalesceBatchesExec: target_batch_size=6242685 + FilterExec: c_mktsegment@1 = BUILDING, projection=[c_custkey@0] + ParquetExec: table=customer + CoalesceBatchesExec: target_batch_size=6242685 + RepartitionExec: partitioning=Hash([o_custkey@1], 8), input_partitions=8 + ParquetExec: table=orders \ No newline at end of file diff --git a/testdata/plans/filter_agg.txt b/testdata/plans/filter_agg.txt index 74a9f3f..e0f1e85 100644 --- a/testdata/plans/filter_agg.txt +++ b/testdata/plans/filter_agg.txt @@ -4,12 +4,4 @@ GpuAggregateExec: group_by=[], aggr=[count(*)] GpuProjectExec GpuCoalesceBatchesExec: target_batch_size=67108864 GpuFilterExec - GpuScanExec: batch_size=67108864 ---- memory --- -GpuAggregateExec: row_width=8, subtree_max_row_bytes=32 - GpuCoalescePartitionsExec: row_width=8, subtree_max_row_bytes=32 - GpuAggregateExec: row_width=8, subtree_max_row_bytes=32 - GpuProjectExec: row_width=1, subtree_max_row_bytes=32 - GpuCoalesceBatchesExec: row_width=16, subtree_max_row_bytes=32 - GpuFilterExec: row_width=16, subtree_max_row_bytes=32 - GpuScanExec: row_width=16, subtree_max_row_bytes=16 + GpuScanExec: batch_size=67108864 \ No newline at end of file diff --git a/testdata/plans/group_join_sort.txt b/testdata/plans/group_join_sort.txt index 0e5423e..e56abc5 100644 --- a/testdata/plans/group_join_sort.txt +++ b/testdata/plans/group_join_sort.txt @@ -9,17 +9,4 @@ GpuSortPreservingMergeExec: [nation_count@1 DESC, r_name@0 ASC NULLS LAST] GpuCoalesceBatchesExec: target_batch_size=26843545 GpuHashJoinExec GpuScanExec: batch_size=26843545 - GpuScanExec: batch_size=26843545 ---- memory --- -GpuSortPreservingMergeExec: row_width=40, subtree_max_row_bytes=80 - GpuSortExec: row_width=40, subtree_max_row_bytes=80 - GpuProjectExec: row_width=40, subtree_max_row_bytes=80 - GpuAggregateExec: row_width=40, subtree_max_row_bytes=80 - GpuCoalesceBatchesExec: row_width=40, subtree_max_row_bytes=72 - GpuRepartitionExec: row_width=40, subtree_max_row_bytes=72 - GpuAggregateExec: row_width=40, subtree_max_row_bytes=72 - GpuRepartitionExec: row_width=32, subtree_max_row_bytes=72 - GpuCoalesceBatchesExec: row_width=32, subtree_max_row_bytes=72 - GpuHashJoinExec: row_width=32, subtree_max_row_bytes=72 - GpuScanExec: row_width=36, subtree_max_row_bytes=36 - GpuScanExec: row_width=4, subtree_max_row_bytes=4 + GpuScanExec: batch_size=26843545 \ No newline at end of file diff --git a/testdata/plans/join_sort.txt b/testdata/plans/join_sort.txt index 4b246f6..a833a3f 100644 --- a/testdata/plans/join_sort.txt +++ b/testdata/plans/join_sort.txt @@ -3,11 +3,4 @@ GpuSortExec GpuCoalesceBatchesExec: target_batch_size=15790320 GpuHashJoinExec GpuScanExec: batch_size=15790320 - GpuScanExec: batch_size=15790320 ---- memory --- -GpuSortExec: row_width=64, subtree_max_row_bytes=136 - GpuProjectExec: row_width=64, subtree_max_row_bytes=136 - GpuCoalesceBatchesExec: row_width=64, subtree_max_row_bytes=136 - GpuHashJoinExec: row_width=64, subtree_max_row_bytes=136 - GpuScanExec: row_width=36, subtree_max_row_bytes=36 - GpuScanExec: row_width=36, subtree_max_row_bytes=36 + GpuScanExec: batch_size=15790320 \ No newline at end of file diff --git a/testdata/tpch-queries/NOTICE b/testdata/tpch-queries/NOTICE new file mode 100644 index 0000000..4533d6d --- /dev/null +++ b/testdata/tpch-queries/NOTICE @@ -0,0 +1,10 @@ +The SQL queries in this directory are derived from the TPC-H benchmark specification. + +TPC-H is a decision support benchmark developed and maintained by the Transaction +Processing Performance Council (TPC). The benchmark specification and queries are +Copyright (C) 1993-2022 Transaction Processing Performance Council. All rights reserved. + +These queries are used here solely for non-commercial benchmarking and research purposes +in accordance with the TPC Fair Use Policy (https://www.tpc.org/tpc_documents_current_versions/pdf/tpc_fair_use_policy.pdf). + +The source of the benchmark specification is: https://www.tpc.org/tpch/ diff --git a/testdata/tpch-queries/q1.sql b/testdata/tpch-queries/q1.sql new file mode 100644 index 0000000..18ee0bd --- /dev/null +++ b/testdata/tpch-queries/q1.sql @@ -0,0 +1,15 @@ +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from lineitem +where l_shipdate <= date '1998-12-01' - interval '90' day +group by l_returnflag, l_linestatus +order by l_returnflag, l_linestatus; diff --git a/testdata/tpch-queries/q10.sql b/testdata/tpch-queries/q10.sql new file mode 100644 index 0000000..d8e0f4b --- /dev/null +++ b/testdata/tpch-queries/q10.sql @@ -0,0 +1,19 @@ +select + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +from customer, orders, lineitem, nation +where c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= date '1993-10-01' + and o_orderdate < date '1993-10-01' + interval '3' month + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment +order by revenue desc +limit 20; diff --git a/testdata/tpch-queries/q11.sql b/testdata/tpch-queries/q11.sql new file mode 100644 index 0000000..6b40861 --- /dev/null +++ b/testdata/tpch-queries/q11.sql @@ -0,0 +1,16 @@ +select + ps_partkey, + sum(ps_supplycost * ps_availqty) as value +from partsupp, supplier, nation +where ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' +group by ps_partkey +having sum(ps_supplycost * ps_availqty) > ( + select sum(ps_supplycost * ps_availqty) * 0.000002 + from partsupp, supplier, nation + where ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' +) +order by value desc; diff --git a/testdata/tpch-queries/q12.sql b/testdata/tpch-queries/q12.sql new file mode 100644 index 0000000..5f5553a --- /dev/null +++ b/testdata/tpch-queries/q12.sql @@ -0,0 +1,19 @@ +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' or o_orderpriority = '2-HIGH' then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' and o_orderpriority <> '2-HIGH' then 1 + else 0 + end) as low_line_count +from orders, lineitem +where o_orderkey = l_orderkey + and l_shipmode in ('MAIL', 'SHIP') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= date '1994-01-01' + and l_receiptdate < date '1994-01-01' + interval '1' year +group by l_shipmode +order by l_shipmode; diff --git a/testdata/tpch-queries/q13.sql b/testdata/tpch-queries/q13.sql new file mode 100644 index 0000000..1e961fc --- /dev/null +++ b/testdata/tpch-queries/q13.sql @@ -0,0 +1,14 @@ +select + c_count, + count(*) as custdist +from ( + select + c_custkey, + count(o_orderkey) as c_count + from customer + left outer join orders on c_custkey = o_custkey + and o_comment not like '%special%requests%' + group by c_custkey +) as c_orders +group by c_count +order by custdist desc, c_count desc; diff --git a/testdata/tpch-queries/q14.sql b/testdata/tpch-queries/q14.sql new file mode 100644 index 0000000..cc73712 --- /dev/null +++ b/testdata/tpch-queries/q14.sql @@ -0,0 +1,9 @@ +select + 100.00 * sum(case + when p_type like 'PROMO%' then l_extendedprice * (1 - l_discount) + else 0 + end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue +from lineitem, part +where l_partkey = p_partkey + and l_shipdate >= date '1995-09-01' + and l_shipdate < date '1995-09-01' + interval '1' month; diff --git a/testdata/tpch-queries/q15.sql b/testdata/tpch-queries/q15.sql new file mode 100644 index 0000000..480b0b7 --- /dev/null +++ b/testdata/tpch-queries/q15.sql @@ -0,0 +1,18 @@ +-- TODO: remove view from this query +-- Q15 requires a view: revenue0 +-- create view revenue0 as +-- select l_suppkey as supplier_no, sum(l_extendedprice * (1 - l_discount)) as total_revenue +-- from lineitem +-- where l_shipdate >= date '1996-01-01' and l_shipdate < date '1996-01-01' + interval '3' month +-- group by l_suppkey; + +-- select +-- s_suppkey, +-- s_name, +-- s_address, +-- s_phone, +-- total_revenue +-- from supplier, revenue0 +-- where s_suppkey = supplier_no +-- and total_revenue = (select max(total_revenue) from revenue0) +-- order by s_suppkey; diff --git a/testdata/tpch-queries/q16.sql b/testdata/tpch-queries/q16.sql new file mode 100644 index 0000000..9e8c2d0 --- /dev/null +++ b/testdata/tpch-queries/q16.sql @@ -0,0 +1,17 @@ +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from partsupp, part +where p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select s_suppkey + from supplier + where s_comment like '%Customer%Complaints%' + ) +group by p_brand, p_type, p_size +order by supplier_cnt desc, p_brand, p_type, p_size; diff --git a/testdata/tpch-queries/q17.sql b/testdata/tpch-queries/q17.sql new file mode 100644 index 0000000..a679b55 --- /dev/null +++ b/testdata/tpch-queries/q17.sql @@ -0,0 +1,11 @@ +select + sum(l_extendedprice) / 7.0 as avg_yearly +from lineitem, part +where p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container = 'MED BOX' + and l_quantity < ( + select 0.2 * avg(l_quantity) + from lineitem + where l_partkey = p_partkey + ); diff --git a/testdata/tpch-queries/q18.sql b/testdata/tpch-queries/q18.sql new file mode 100644 index 0000000..bd05ae5 --- /dev/null +++ b/testdata/tpch-queries/q18.sql @@ -0,0 +1,19 @@ +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + sum(l_quantity) +from customer, orders, lineitem +where o_orderkey in ( + select l_orderkey + from lineitem + group by l_orderkey + having sum(l_quantity) > 300 +) +and c_custkey = o_custkey +and o_orderkey = l_orderkey +group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice +order by o_totalprice desc, o_orderdate +limit 100; diff --git a/testdata/tpch-queries/q19.sql b/testdata/tpch-queries/q19.sql new file mode 100644 index 0000000..e7a1aba --- /dev/null +++ b/testdata/tpch-queries/q19.sql @@ -0,0 +1,28 @@ +select + sum(l_extendedprice * (1 - l_discount)) as revenue +from lineitem, part +where ( + p_partkey = l_partkey + and p_brand = 'Brand#12' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= 1 and l_quantity <= 1 + 10 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' +) or ( + p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= 10 and l_quantity <= 10 + 10 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' +) or ( + p_partkey = l_partkey + and p_brand = 'Brand#34' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= 20 and l_quantity <= 20 + 10 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' +); diff --git a/testdata/tpch-queries/q2.sql b/testdata/tpch-queries/q2.sql new file mode 100644 index 0000000..54ebf5c --- /dev/null +++ b/testdata/tpch-queries/q2.sql @@ -0,0 +1,28 @@ +select + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from part, supplier, partsupp, nation, region +where p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 15 + and p_type like '%BRASS' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + and ps_supplycost = ( + select min(ps_supplycost) + from partsupp, supplier, nation, region + where p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + ) +order by s_acctbal desc, n_name, s_name, p_partkey +limit 100; diff --git a/testdata/tpch-queries/q20.sql b/testdata/tpch-queries/q20.sql new file mode 100644 index 0000000..664644f --- /dev/null +++ b/testdata/tpch-queries/q20.sql @@ -0,0 +1,24 @@ +select + s_name, + s_address +from supplier, nation +where s_suppkey in ( + select ps_suppkey + from partsupp + where ps_partkey in ( + select p_partkey + from part + where p_name like 'forest%' + ) + and ps_availqty > ( + select 0.5 * sum(l_quantity) + from lineitem + where l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + ) +) +and s_nationkey = n_nationkey +and n_name = 'CANADA' +order by s_name; diff --git a/testdata/tpch-queries/q21.sql b/testdata/tpch-queries/q21.sql new file mode 100644 index 0000000..9a8af94 --- /dev/null +++ b/testdata/tpch-queries/q21.sql @@ -0,0 +1,26 @@ +select + s_name, + count(*) as numwait +from supplier, lineitem l1, orders, nation +where s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select * + from lineitem l2 + where l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select * + from lineitem l3 + where l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' +group by s_name +order by numwait desc, s_name +limit 100; diff --git a/testdata/tpch-queries/q22.sql b/testdata/tpch-queries/q22.sql new file mode 100644 index 0000000..bbe553c --- /dev/null +++ b/testdata/tpch-queries/q22.sql @@ -0,0 +1,24 @@ +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from ( + select + substring(c_phone, 1, 2) as cntrycode, + c_acctbal + from customer + where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > ( + select avg(c_acctbal) + from customer + where c_acctbal > 0.00 + and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') + ) + and not exists ( + select * + from orders + where o_custkey = c_custkey + ) +) as custsale +group by cntrycode +order by cntrycode; diff --git a/testdata/tpch-queries/q3.sql b/testdata/tpch-queries/q3.sql new file mode 100644 index 0000000..0cb53fb --- /dev/null +++ b/testdata/tpch-queries/q3.sql @@ -0,0 +1,14 @@ +select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from customer, orders, lineitem +where c_mktsegment = 'BUILDING' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < date '1995-03-15' + and l_shipdate > date '1995-03-15' +group by l_orderkey, o_orderdate, o_shippriority +order by revenue desc, o_orderdate +limit 10; diff --git a/testdata/tpch-queries/q4.sql b/testdata/tpch-queries/q4.sql new file mode 100644 index 0000000..e8cd676 --- /dev/null +++ b/testdata/tpch-queries/q4.sql @@ -0,0 +1,14 @@ +select + o_orderpriority, + count(*) as order_count +from orders +where o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and exists ( + select * + from lineitem + where l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by o_orderpriority +order by o_orderpriority; diff --git a/testdata/tpch-queries/q5.sql b/testdata/tpch-queries/q5.sql new file mode 100644 index 0000000..5194f18 --- /dev/null +++ b/testdata/tpch-queries/q5.sql @@ -0,0 +1,15 @@ +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from customer, orders, lineitem, supplier, nation, region +where c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and o_orderdate >= date '1994-01-01' + and o_orderdate < date '1994-01-01' + interval '1' year +group by n_name +order by revenue desc; diff --git a/testdata/tpch-queries/q6.sql b/testdata/tpch-queries/q6.sql new file mode 100644 index 0000000..82af111 --- /dev/null +++ b/testdata/tpch-queries/q6.sql @@ -0,0 +1,7 @@ +select + sum(l_extendedprice * l_discount) as revenue +from lineitem +where l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + and l_discount between .06 - 0.01 and .06 + 0.01 + and l_quantity < 24; diff --git a/testdata/tpch-queries/q7.sql b/testdata/tpch-queries/q7.sql new file mode 100644 index 0000000..1a1e68f --- /dev/null +++ b/testdata/tpch-queries/q7.sql @@ -0,0 +1,25 @@ +select + supp_nation, + cust_nation, + l_year, + sum(volume) as revenue +from ( + select + n1.n_name as supp_nation, + n2.n_name as cust_nation, + extract(year from l_shipdate) as l_year, + l_extendedprice * (1 - l_discount) as volume + from supplier, lineitem, orders, customer, nation n1, nation n2 + where s_suppkey = l_suppkey + and o_orderkey = l_orderkey + and c_custkey = o_custkey + and s_nationkey = n1.n_nationkey + and c_nationkey = n2.n_nationkey + and ( + (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') + or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') + ) + and l_shipdate between date '1995-01-01' and date '1996-12-31' +) as shipping +group by supp_nation, cust_nation, l_year +order by supp_nation, cust_nation, l_year; diff --git a/testdata/tpch-queries/q8.sql b/testdata/tpch-queries/q8.sql new file mode 100644 index 0000000..5de4c69 --- /dev/null +++ b/testdata/tpch-queries/q8.sql @@ -0,0 +1,22 @@ +select + o_year, + sum(case when nation = 'BRAZIL' then volume else 0 end) / sum(volume) as mkt_share +from ( + select + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) as volume, + n2.n_name as nation + from part, supplier, lineitem, orders, customer, nation n1, nation n2, region + where p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = 'AMERICA' + and s_nationkey = n2.n_nationkey + and o_orderdate between date '1995-01-01' and date '1996-12-31' + and p_type = 'ECONOMY ANODIZED STEEL' +) as all_nations +group by o_year +order by o_year; diff --git a/testdata/tpch-queries/q9.sql b/testdata/tpch-queries/q9.sql new file mode 100644 index 0000000..fc288f0 --- /dev/null +++ b/testdata/tpch-queries/q9.sql @@ -0,0 +1,20 @@ +select + nation, + o_year, + sum(amount) as sum_profit +from ( + select + n_name as nation, + extract(year from o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from part, supplier, lineitem, partsupp, orders, nation + where s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%green%' +) as profit +group by nation, o_year +order by nation, o_year desc; diff --git a/testdata/tpch.minimal/customer.parquet b/testdata/tpch.minimal/customer.parquet index 36e628d..f6bff1c 100644 Binary files a/testdata/tpch.minimal/customer.parquet and b/testdata/tpch.minimal/customer.parquet differ diff --git a/testdata/tpch.minimal/nation.parquet b/testdata/tpch.minimal/nation.parquet index dfb99ed..72f1d79 100644 Binary files a/testdata/tpch.minimal/nation.parquet and b/testdata/tpch.minimal/nation.parquet differ diff --git a/testdata/tpch.minimal/part.parquet b/testdata/tpch.minimal/part.parquet index 4dc80aa..f066be0 100644 Binary files a/testdata/tpch.minimal/part.parquet and b/testdata/tpch.minimal/part.parquet differ diff --git a/testdata/tpch.minimal/region.parquet b/testdata/tpch.minimal/region.parquet index d561a77..f071f7b 100644 Binary files a/testdata/tpch.minimal/region.parquet and b/testdata/tpch.minimal/region.parquet differ diff --git a/testdata/tpch.minimal/supplier.parquet b/testdata/tpch.minimal/supplier.parquet index 0a7c7c5..7fc2e1f 100644 Binary files a/testdata/tpch.minimal/supplier.parquet and b/testdata/tpch.minimal/supplier.parquet differ