From 1941bfa3226e4d4685f5ba68c3c8b94baca53e9c Mon Sep 17 00:00:00 2001 From: 1686a Date: Fri, 5 Sep 2025 22:41:00 +0200 Subject: [PATCH 1/9] don't ignore indefinitely failing seeds --- quickwit/quickwit-config/src/node_config/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index 590f16f90ac..ee2666f944e 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -618,15 +618,17 @@ impl NodeConfig { // validation purposes. Additionally, we need to append a default port if necessary and // finally return the addresses as strings, which is tricky for IPv6. We let the logic baked // in `HostAddr` handle this complexity. + let mut found_something = false; for peer_seed in &self.peer_seeds { let peer_seed_addr = HostAddr::parse_with_default_port(peer_seed, default_gossip_port)?; if let Err(error) = peer_seed_addr.resolve().await { warn!(peer_seed = %peer_seed_addr, error = ?error, "failed to resolve peer seed address"); - continue; + } else { + found_something = true; } peer_seed_addrs.push(peer_seed_addr.to_string()) } - if !self.peer_seeds.is_empty() && peer_seed_addrs.is_empty() { + if !self.peer_seeds.is_empty() && !found_something { warn!("failed to resolve all the peer seed addresses") } Ok(peer_seed_addrs) From 174e5e7fafeeae9465d680c04a949dac423ae36b Mon Sep 17 00:00:00 2001 From: trinity Pointard Date: Mon, 8 Sep 2025 09:40:21 +0200 Subject: [PATCH 2/9] fix tests --- quickwit/quickwit-config/src/node_config/serialize.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs index 9ab90e53ee3..19f5e13865c 100644 --- a/quickwit/quickwit-config/src/node_config/serialize.rs +++ b/quickwit/quickwit-config/src/node_config/serialize.rs @@ -948,6 +948,7 @@ mod tests { assert_eq!( node_config.peer_seed_addrs().await.unwrap(), vec![ + "unresolvable.example.com:1789".to_string(), "localhost:1789".to_string(), "localhost:1337".to_string(), "127.0.0.1:1789".to_string(), From 340a365e7279538df655fbcaa63ba8e097ca6477 Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Tue, 9 Sep 2025 10:43:14 -0400 Subject: [PATCH 3/9] Remove `quickwit-lambda` package (#5884) * Remove `quickwit-lambda` package * Fix warning --- CHANGELOG.md | 6 +- LICENSE-3rdparty.csv | 47 +- distribution/lambda/.gitignore | 18 - distribution/lambda/Makefile | 149 -- distribution/lambda/Pipfile | 23 - distribution/lambda/Pipfile.lock | 441 ------ distribution/lambda/README.md | 174 --- distribution/lambda/cdk/__init__.py | 0 distribution/lambda/cdk/app.py | 56 - distribution/lambda/cdk/cdk.json | 57 - distribution/lambda/cdk/cli.py | 465 ------ distribution/lambda/cdk/setup.py | 7 - distribution/lambda/cdk/stacks/__init__.py | 0 .../lambda/cdk/stacks/examples/__init__.py | 0 .../lambda/cdk/stacks/examples/hdfs_stack.py | 74 - .../cdk/stacks/examples/mock_data_stack.py | 207 --- .../lambda/cdk/stacks/services/__init__.py | 0 .../cdk/stacks/services/indexer_service.py | 48 - .../cdk/stacks/services/quickwit_service.py | 72 - .../cdk/stacks/services/searcher_service.py | 37 - .../resources/data-generator/handler.py | 37 - .../resources/data-generator/wordlist.json.gz | Bin 5822 -> 0 bytes distribution/lambda/resources/hdfs-logs.yaml | 38 - distribution/lambda/resources/mock-sales.yaml | 38 - .../tutorials/tutorial-aws-lambda-simple.md | 143 -- quickwit/Cargo.lock | 1377 ++++++++--------- quickwit/Cargo.toml | 3 +- quickwit/Makefile | 8 - quickwit/quickwit-aws/src/lib.rs | 2 +- quickwit/quickwit-lambda/Cargo.toml | 72 - quickwit/quickwit-lambda/README.md | 4 + quickwit/quickwit-lambda/src/bin/indexer.rs | 26 - quickwit/quickwit-lambda/src/bin/searcher.rs | 26 - quickwit/quickwit-lambda/src/environment.rs | 34 - .../src/indexer/environment.rs | 97 -- .../quickwit-lambda/src/indexer/handler.rs | 76 - .../src/indexer/ingest/helpers.rs | 347 ----- .../quickwit-lambda/src/indexer/ingest/mod.rs | 263 ---- quickwit/quickwit-lambda/src/indexer/mod.rs | 20 - quickwit/quickwit-lambda/src/indexer/model.rs | 109 -- quickwit/quickwit-lambda/src/lib.rs | 19 - quickwit/quickwit-lambda/src/logger.rs | 187 --- quickwit/quickwit-lambda/src/searcher/api.rs | 150 -- .../src/searcher/environment.rs | 80 - quickwit/quickwit-lambda/src/searcher/mod.rs | 19 - .../src/searcher/warp_lambda.rs | 233 --- quickwit/quickwit-lambda/src/utils.rs | 62 - 47 files changed, 653 insertions(+), 4698 deletions(-) delete mode 100644 distribution/lambda/.gitignore delete mode 100644 distribution/lambda/Makefile delete mode 100644 distribution/lambda/Pipfile delete mode 100644 distribution/lambda/Pipfile.lock delete mode 100644 distribution/lambda/README.md delete mode 100644 distribution/lambda/cdk/__init__.py delete mode 100755 distribution/lambda/cdk/app.py delete mode 100644 distribution/lambda/cdk/cdk.json delete mode 100644 distribution/lambda/cdk/cli.py delete mode 100644 distribution/lambda/cdk/setup.py delete mode 100644 distribution/lambda/cdk/stacks/__init__.py delete mode 100644 distribution/lambda/cdk/stacks/examples/__init__.py delete mode 100644 distribution/lambda/cdk/stacks/examples/hdfs_stack.py delete mode 100644 distribution/lambda/cdk/stacks/examples/mock_data_stack.py delete mode 100644 distribution/lambda/cdk/stacks/services/__init__.py delete mode 100644 distribution/lambda/cdk/stacks/services/indexer_service.py delete mode 100644 distribution/lambda/cdk/stacks/services/quickwit_service.py delete mode 100644 distribution/lambda/cdk/stacks/services/searcher_service.py delete mode 100644 distribution/lambda/resources/data-generator/handler.py delete mode 100644 distribution/lambda/resources/data-generator/wordlist.json.gz delete mode 100644 distribution/lambda/resources/hdfs-logs.yaml delete mode 100644 distribution/lambda/resources/mock-sales.yaml delete mode 100644 docs/get-started/tutorials/tutorial-aws-lambda-simple.md delete mode 100644 quickwit/quickwit-lambda/Cargo.toml create mode 100644 quickwit/quickwit-lambda/README.md delete mode 100644 quickwit/quickwit-lambda/src/bin/indexer.rs delete mode 100644 quickwit/quickwit-lambda/src/bin/searcher.rs delete mode 100644 quickwit/quickwit-lambda/src/environment.rs delete mode 100644 quickwit/quickwit-lambda/src/indexer/environment.rs delete mode 100644 quickwit/quickwit-lambda/src/indexer/handler.rs delete mode 100644 quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs delete mode 100644 quickwit/quickwit-lambda/src/indexer/ingest/mod.rs delete mode 100644 quickwit/quickwit-lambda/src/indexer/mod.rs delete mode 100644 quickwit/quickwit-lambda/src/indexer/model.rs delete mode 100644 quickwit/quickwit-lambda/src/lib.rs delete mode 100644 quickwit/quickwit-lambda/src/logger.rs delete mode 100644 quickwit/quickwit-lambda/src/searcher/api.rs delete mode 100644 quickwit/quickwit-lambda/src/searcher/environment.rs delete mode 100644 quickwit/quickwit-lambda/src/searcher/mod.rs delete mode 100644 quickwit/quickwit-lambda/src/searcher/warp_lambda.rs delete mode 100644 quickwit/quickwit-lambda/src/utils.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index fe09833f35a..e149eea60dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,7 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Retry on S3 internal error (#5504) - Allow specifying OTEL index ID in header (#5503) - Add a metric to count storage errors and their error code (#5497) -- Add support for concatenated fields (#4773, #5369, #5331) +- Add support for concatenated fields (#4773, #5369, #5331) - Add number of splits per root/leaf search histograms (#5472) - Introduce a searcher config option to timeout get requests (#5467) - Add fingerprint to task in cluster state (#5464) @@ -52,7 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add more env var config for Postgres (#5365) - Enable str fast field range queries (#5324) - Allow querying non-existing fields (#5308) -- Support updating doc mapper through api (#5253) +- Support updating doc mapper through api (#5253) - Add optional special handling for hex in code tokenizer (#5200) - Added a circuit breaker layer (#5134) - Various performance optimizations in Tantivy (https://github.com/quickwit-oss/tantivy/blob/main/CHANGELOG.md) @@ -83,7 +83,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix grafana searchers dashboard (#5455) - Fix jaeger http endpoint (#5378) - Fix file re-ingestion after EOF (#5330) -- Fix source path in Lambda distrib (#5327) - Fix configuration interpolation (#5403) - Fix jaeger duration parse error (#5518) - Fix unit conversion in jaeger http search endpoint (#5519) @@ -91,6 +90,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - Remove support for 2-digit years in java datetime parser (#5596) - Remove DocMapper trait (#5508) +- Remove support for AWS Lambda (#5884) # [0.8.1] diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index b19672aa0bc..95fb38524ac 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -7,7 +7,6 @@ aho-corasick,https://github.com/BurntSushi/aho-corasick,Unlicense OR MIT,Andrew aliasable,https://github.com/avitex/rust-aliasable,MIT,avitex alloca,https://github.com/playXE/alloca-rs,MIT,"Adel Prokurov , StackOverflowExcept1on" allocator-api2,https://github.com/zakarumych/allocator-api2,MIT OR Apache-2.0,Zakarum -android-tzdata,https://github.com/RumovZ/android-tzdata,MIT OR Apache-2.0,RumovZ android_system_properties,https://github.com/nical/android_system_properties,MIT OR Apache-2.0,Nicolas Silva anes,https://github.com/zrzka/anes-rs,MIT OR Apache-2.0,Robert Vojta ansi-str,https://github.com/zhiburt/ansi-str,MIT,Maxim Zhiburt @@ -51,7 +50,6 @@ aws-smithy-runtime-api,https://github.com/smithy-lang/smithy-rs,Apache-2.0,"AWS aws-smithy-types,https://github.com/smithy-lang/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " aws-smithy-xml,https://github.com/smithy-lang/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " aws-types,https://github.com/smithy-lang/smithy-rs,Apache-2.0,"AWS Rust SDK Team , Russell Cohen " -aws_lambda_events,https://github.com/awslabs/aws-lambda-rust-runtime,MIT,"Christian Legnitto , Sam Rijs , David Calavera " axum,https://github.com/tokio-rs/axum,MIT,The axum Authors axum-core,https://github.com/tokio-rs/axum,MIT,The axum-core Authors backtrace,https://github.com/rust-lang/backtrace-rs,MIT OR Apache-2.0,The Rust Project Developers @@ -99,6 +97,8 @@ coarsetime,https://github.com/jedisct1/rust-coarsetime,ISC,Frank Denis , James Munns " colorchoice,https://github.com/rust-cli/anstyle,MIT OR Apache-2.0,The colorchoice Authors colored,https://github.com/mackwic/colored,MPL-2.0,Thomas Wickham +compression-codecs,https://github.com/Nullus157/async-compression,MIT OR Apache-2.0,"Wim Looman , Allen Bui " +compression-core,https://github.com/Nullus157/async-compression,MIT OR Apache-2.0,"Wim Looman , Allen Bui " console,https://github.com/console-rs/console,MIT,Armin Ronacher const-oid,https://github.com/RustCrypto/formats/tree/master/const-oid,Apache-2.0 OR MIT,RustCrypto Developers constant_time_eq,https://github.com/cesarb/constant_time_eq,CC0-1.0,Cesar Eduardo Barros @@ -161,6 +161,7 @@ fastdivide,https://github.com/fulmicoton/fastdivide,zlib-acknowledgement OR MIT, fastrand,https://github.com/smol-rs/fastrand,Apache-2.0 OR MIT,Stjepan Glavina ff,https://github.com/zkcrypto/ff,MIT OR Apache-2.0,"Sean Bowe , Jack Grigg " filetime,https://github.com/alexcrichton/filetime,MIT OR Apache-2.0,Alex Crichton +find-msvc-tools,https://github.com/rust-lang/cc-rs,MIT OR Apache-2.0,The find-msvc-tools Authors fixedbitset,https://github.com/petgraph/fixedbitset,MIT OR Apache-2.0,bluss flate2,https://github.com/rust-lang/flate2-rs,MIT OR Apache-2.0,"Alex Crichton , Josh Triplett " float-cmp,https://github.com/mikedilger/float-cmp,MIT,Mike Dilger @@ -238,11 +239,8 @@ is_terminal_polyfill,https://github.com/polyfill-rs/is_terminal_polyfill,MIT OR itertools,https://github.com/rust-itertools/itertools,MIT OR Apache-2.0,bluss itoa,https://github.com/dtolnay/itoa,MIT OR Apache-2.0,David Tolnay jobserver,https://github.com/rust-lang/jobserver-rs,MIT OR Apache-2.0,Alex Crichton -js-sys,https://github.com/rustwasm/wasm-bindgen/tree/master/crates/js-sys,MIT OR Apache-2.0,The wasm-bindgen Developers +js-sys,https://github.com/wasm-bindgen/wasm-bindgen/tree/master/crates/js-sys,MIT OR Apache-2.0,The wasm-bindgen Developers json_comments,https://github.com/tmccombs/json-comments-rs,Apache-2.0,Thayne McCombs -lambda_http,https://github.com/awslabs/aws-lambda-rust-runtime,Apache-2.0,"David Calavera , Harold Sun " -lambda_runtime,https://github.com/awslabs/aws-lambda-rust-runtime,Apache-2.0,"David Calavera , Harold Sun " -lambda_runtime_api_client,https://github.com/awslabs/aws-lambda-rust-runtime,Apache-2.0,"David Calavera , Harold Sun " lazy_static,https://github.com/rust-lang-nursery/lazy-static.rs,MIT OR Apache-2.0,Marvin Löbel lazycell,https://github.com/indiv0/lazycell,MIT OR Apache-2.0,"Alex Crichton , Nikita Pekin " levenshtein_automata,https://github.com/tantivy-search/levenshtein-automata,MIT,Paul Masurel @@ -306,7 +304,7 @@ num-rational,https://github.com/rust-num/num-rational,MIT OR Apache-2.0,The Rust num-traits,https://github.com/rust-num/num-traits,MIT OR Apache-2.0,The Rust Project Developers num_cpus,https://github.com/seanmonstar/num_cpus,MIT OR Apache-2.0,Sean McArthur number_prefix,https://github.com/ogham/rust-number-prefix,MIT,Benjamin Sago -numfmt,https://github.com/kdr-aus/numfmt,MIT,Kurt Lawrence +numfmt,https://github.com/kurtlawrence/numfmt,MIT,Kurt Lawrence object,https://github.com/gimli-rs/object,Apache-2.0 OR MIT,The object Authors once_cell,https://github.com/matklad/once_cell,MIT OR Apache-2.0,Aleksey Kladov once_cell_polyfill,https://github.com/polyfill-rs/once_cell_polyfill,MIT OR Apache-2.0,The once_cell_polyfill Authors @@ -314,14 +312,12 @@ oneshot,https://github.com/faern/oneshot,MIT OR Apache-2.0,Linus Färnstrand openssl-probe,https://github.com/alexcrichton/openssl-probe,MIT OR Apache-2.0,Alex Crichton opentelemetry,https://github.com/open-telemetry/opentelemetry-rust,Apache-2.0,The opentelemetry Authors -opentelemetry-http,https://github.com/open-telemetry/opentelemetry-rust,Apache-2.0,The opentelemetry-http Authors opentelemetry-otlp,https://github.com/open-telemetry/opentelemetry-rust/tree/main/opentelemetry-otlp,Apache-2.0,The opentelemetry-otlp Authors opentelemetry-proto,https://github.com/open-telemetry/opentelemetry-rust/tree/main/opentelemetry-proto,Apache-2.0,The opentelemetry-proto Authors opentelemetry_sdk,https://github.com/open-telemetry/opentelemetry-rust,Apache-2.0,The opentelemetry_sdk Authors ouroboros,https://github.com/someguynamedjosh/ouroboros,MIT OR Apache-2.0,Josh ouroboros_macro,https://github.com/someguynamedjosh/ouroboros,MIT OR Apache-2.0,Josh outref,https://github.com/Nugine/outref,MIT,The outref Authors -overload,https://github.com/danaugrs/overload,MIT,Daniel Salvadori ownedbytes,https://github.com/quickwit-oss/tantivy,MIT,"Paul Masurel , Pascal Seitz " p256,https://github.com/RustCrypto/elliptic-curves/tree/master/p256,Apache-2.0 OR MIT,RustCrypto Developers papergrid,https://github.com/zhiburt/tabled,MIT,Maxim Zhiburt @@ -374,7 +370,6 @@ prost-build,https://github.com/tokio-rs/prost,Apache-2.0,"Dan Burkert , Lucio Franco , Casper Meijn , Tokio Contributors " prost-types,https://github.com/tokio-rs/prost,Apache-2.0,"Dan Burkert , Lucio Franco , Casper Meijn , Tokio Contributors " protobuf,https://github.com/stepancheg/rust-protobuf,MIT,Stepan Koltsov -query_map,https://github.com/calavera/query-map-rs,MIT,The query_map Authors quick-error,http://github.com/tailhook/quick-error,MIT OR Apache-2.0,"Paul Colomiets , Colin Kiegel " quinn,https://github.com/quinn-rs/quinn,MIT OR Apache-2.0,The quinn Authors quinn-proto,https://github.com/quinn-rs/quinn,MIT OR Apache-2.0,The quinn-proto Authors @@ -386,17 +381,15 @@ rand_chacha,https://github.com/rust-random/rand,MIT OR Apache-2.0,"The Rand Proj rand_core,https://github.com/rust-random/rand,MIT OR Apache-2.0,"The Rand Project Developers, The Rust Project Developers" rand_distr,https://github.com/rust-random/rand,MIT OR Apache-2.0,The Rand Project Developers rand_xorshift,https://github.com/rust-random/rngs,MIT OR Apache-2.0,"The Rand Project Developers, The Rust Project Developers" -rayon,https://github.com/rayon-rs/rayon,MIT OR Apache-2.0,"Niko Matsakis , Josh Stone " -rayon-core,https://github.com/rayon-rs/rayon,MIT OR Apache-2.0,"Niko Matsakis , Josh Stone " +rayon,https://github.com/rayon-rs/rayon,MIT OR Apache-2.0,The rayon Authors +rayon-core,https://github.com/rayon-rs/rayon,MIT OR Apache-2.0,The rayon-core Authors redox_syscall,https://gitlab.redox-os.org/redox-os/syscall,MIT,Jeremy Soller ref-cast,https://github.com/dtolnay/ref-cast,MIT OR Apache-2.0,David Tolnay ref-cast-impl,https://github.com/dtolnay/ref-cast,MIT OR Apache-2.0,David Tolnay regex,https://github.com/rust-lang/regex,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " -regex-automata,https://github.com/BurntSushi/regex-automata,Unlicense OR MIT,Andrew Gallant -regex-automata,https://github.com/rust-lang/regex/tree/master/regex-automata,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " -regex-lite,https://github.com/rust-lang/regex/tree/master/regex-lite,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " -regex-syntax,https://github.com/rust-lang/regex,MIT OR Apache-2.0,The Rust Project Developers -regex-syntax,https://github.com/rust-lang/regex/tree/master/regex-syntax,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " +regex-automata,https://github.com/rust-lang/regex,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " +regex-lite,https://github.com/rust-lang/regex,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " +regex-syntax,https://github.com/rust-lang/regex,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " reqwest,https://github.com/seanmonstar/reqwest,MIT OR Apache-2.0,Sean McArthur reqwest-middleware,https://github.com/TrueLayer/reqwest-middleware,MIT OR Apache-2.0,Rodrigo Gryzinski reqwest-retry,https://github.com/TrueLayer/reqwest-middleware,MIT OR Apache-2.0,Rodrigo Gryzinski @@ -437,10 +430,8 @@ semver,https://github.com/dtolnay/semver,MIT OR Apache-2.0,David Tolnay serde,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar , David Tolnay " serde_derive,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar , David Tolnay " -serde_dynamo,https://github.com/zenlist/serde_dynamo,MIT,Bryan Burgers serde_json,https://github.com/serde-rs/json,MIT OR Apache-2.0,"Erick Tryzelaar , David Tolnay " serde_json_borrow,https://github.com/PSeitz/serde_json_borrow,MIT,Pascal Seitz -serde_path_to_error,https://github.com/dtolnay/path-to-error,MIT OR Apache-2.0,David Tolnay serde_qs,https://github.com/samscott89/serde_qs,MIT OR Apache-2.0,Sam Scott serde_spanned,https://github.com/toml-rs/toml,MIT OR Apache-2.0,The serde_spanned Authors serde_urlencoded,https://github.com/nox/serde_urlencoded,MIT OR Apache-2.0,Anthony Ramine @@ -555,14 +546,14 @@ warp,https://github.com/seanmonstar/warp,MIT,Sean McArthur -web-sys,https://github.com/rustwasm/wasm-bindgen/tree/master/crates/web-sys,MIT OR Apache-2.0,The wasm-bindgen Developers +web-sys,https://github.com/wasm-bindgen/wasm-bindgen/tree/master/crates/web-sys,MIT OR Apache-2.0,The wasm-bindgen Developers web-time,https://github.com/daxpedda/web-time,MIT OR Apache-2.0,The web-time Authors webpki-roots,https://github.com/rustls/webpki-roots,CDLA-Permissive-2.0,The webpki-roots Authors which,https://github.com/harryfei/which-rs,MIT,Harry Fei @@ -577,10 +568,12 @@ windows-core,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft windows-implement,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft windows-interface,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft windows-link,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft +windows-link,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,The windows-link Authors windows-registry,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft windows-result,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft windows-strings,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft windows-sys,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft +windows-sys,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,The windows-sys Authors windows-targets,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft windows_aarch64_gnullvm,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft windows_aarch64_msvc,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft @@ -591,7 +584,7 @@ windows_x86_64_gnu,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Mic windows_x86_64_gnullvm,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft windows_x86_64_msvc,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Microsoft winnow,https://github.com/winnow-rs/winnow,MIT,The winnow Authors -wit-bindgen-rt,https://github.com/bytecodealliance/wit-bindgen,Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT,The wit-bindgen-rt Authors +wit-bindgen,https://github.com/bytecodealliance/wit-bindgen,Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT,Alex Crichton writeable,https://github.com/unicode-org/icu4x,Unicode-3.0,The ICU4X Project Developers xattr,https://github.com/Stebalien/xattr,MIT OR Apache-2.0,Steven Allen xmlparser,https://github.com/RazrFalcon/xmlparser,MIT OR Apache-2.0,Yevhenii Reizner diff --git a/distribution/lambda/.gitignore b/distribution/lambda/.gitignore deleted file mode 100644 index 85cfc7bfdef..00000000000 --- a/distribution/lambda/.gitignore +++ /dev/null @@ -1,18 +0,0 @@ -*.swp -package-lock.json -__pycache__ -.pytest_cache -.venv -*.egg-info -build/ -.mypy_cache - -# CDK asset staging directory -.cdk.staging -cdk.out - -# AWS SAM build directory -.aws-sam - -# Benchmark output files -*.log diff --git a/distribution/lambda/Makefile b/distribution/lambda/Makefile deleted file mode 100644 index 0c4decfce57..00000000000 --- a/distribution/lambda/Makefile +++ /dev/null @@ -1,149 +0,0 @@ -.SILENT: -.ONESHELL: -SHELL := bash -.SHELLFLAGS := -eu -o pipefail -c - -QW_LAMBDA_VERSION?=$(shell git tag --sort=committerdate | grep -E "aws-lambda-beta-" | tail -1 | cut -b 12-) -$(if $(QW_LAMBDA_VERSION),,$(error "Version tag not found, try 'git fetch --tags' or specify QW_LAMBDA_VERSION=beta-")) -PACKAGE_BASE_URL=https://github.com/quickwit-oss/quickwit/releases/download/aws-lambda-$(QW_LAMBDA_VERSION)/ -SEARCHER_PACKAGE_FILE=quickwit-lambda-searcher-$(QW_LAMBDA_VERSION)-x86_64.zip -INDEXER_PACKAGE_FILE=quickwit-lambda-indexer-$(QW_LAMBDA_VERSION)-x86_64.zip -export SEARCHER_PACKAGE_PATH=cdk.out/$(SEARCHER_PACKAGE_FILE) -export INDEXER_PACKAGE_PATH=cdk.out/$(INDEXER_PACKAGE_FILE) - -check-env: -ifndef CDK_ACCOUNT - $(error CDK_ACCOUNT is undefined) -endif -ifndef CDK_REGION - $(error CDK_REGION is undefined) -endif - -# Build or download the packages from the release page -# - Download by default, the version can be set with QW_LAMBDA_VERSION -# - To build locally, set QW_LAMBDA_BUILD=1 -package: - mkdir -p cdk.out - if [ "$${QW_LAMBDA_BUILD:-0}" = "1" ] - then - pushd ../../quickwit/ - rustc --version - # TODO: remove --disable-optimizations when upgrading to a release containing - # https://github.com/cargo-lambda/cargo-lambda/issues/649 (> 1.2.1) - cargo lambda build \ - -p quickwit-lambda \ - --disable-optimizations \ - --release \ - --output-format zip \ - --target x86_64-unknown-linux-gnu - popd - cp -u ../../quickwit/target/lambda/searcher/bootstrap.zip $(SEARCHER_PACKAGE_PATH) - cp -u ../../quickwit/target/lambda/indexer/bootstrap.zip $(INDEXER_PACKAGE_PATH) - else - if ! [ -f $(SEARCHER_PACKAGE_PATH) ]; then - echo "Downloading package $(PACKAGE_BASE_URL)$(SEARCHER_PACKAGE_FILE)" - curl --fail -C - -Ls -o $(SEARCHER_PACKAGE_PATH) $(PACKAGE_BASE_URL)$(SEARCHER_PACKAGE_FILE) - else - echo "Using cached package $(SEARCHER_PACKAGE_PATH)" - fi - if ! [ -f $(INDEXER_PACKAGE_PATH) ]; then - echo "Downloading package $(PACKAGE_BASE_URL)$(INDEXER_PACKAGE_FILE)" - curl --fail -C - -Ls -o $(INDEXER_PACKAGE_PATH) $(PACKAGE_BASE_URL)$(INDEXER_PACKAGE_FILE) - else - echo "Using cached package $(INDEXER_PACKAGE_PATH)" - fi - fi - -indexer-package-path: - echo -n $(INDEXER_PACKAGE_PATH) - -searcher-package-path: - echo -n $(SEARCHER_PACKAGE_PATH) - -bootstrap: - cdk bootstrap aws://$$CDK_ACCOUNT/$$CDK_REGION - -deploy-hdfs: package check-env - cdk deploy --require-approval never -a cdk/app.py HdfsStack - -deploy-mock-data: package check-env - cdk deploy --require-approval never -a cdk/app.py MockDataStack - -print-mock-data-metastore: check-env - python -c 'from cdk import cli; cli.print_mock_data_metastore()' - -# address https://github.com/aws/aws-cdk/issues/20060 -before-destroy: - mkdir -p cdk.out - touch $(INDEXER_PACKAGE_PATH) - touch $(SEARCHER_PACKAGE_PATH) - -destroy-hdfs: before-destroy check-env - python -c 'from cdk import cli; cli.empty_hdfs_bucket()' - cdk destroy --force -a cdk/app.py HdfsStack - -destroy-mock-data: before-destroy check-env - python -c 'from cdk import cli; cli.empty_mock_data_buckets()' - cdk destroy --force -a cdk/app.py MockDataStack - -clean: - rm -rf cdk.out - -## Invocation examples - -invoke-mock-data-searcher: check-env - python -c 'from cdk import cli; cli.invoke_mock_data_searcher()' - -invoke-hdfs-indexer: check-env - python -c 'from cdk import cli; cli.upload_hdfs_src_file()' - python -c 'from cdk import cli; cli.invoke_hdfs_indexer()' - -invoke-hdfs-searcher-term: check-env - python -c 'from cdk import cli; cli.invoke_hdfs_searcher("""{"query": "severity_text:ERROR", "max_hits": 10}""")' - -invoke-hdfs-searcher-histogram: check-env - python -c 'from cdk import cli; cli.invoke_hdfs_searcher("""{ "query": "*", "max_hits": 0, "aggs": { "events": { "date_histogram": { "field": "timestamp", "fixed_interval": "1d" }, "aggs": { "log_level": { "terms": { "size": 10, "field": "severity_text", "order": { "_count": "desc" } } } } } } }""")' - -bench-index: - mem_sizes=( 10240 8192 6144 4096 3072 2048 ) - export QW_LAMBDA_DISABLE_MERGE=true - for mem_size in "$${mem_sizes[@]}" - do - export INDEXER_MEMORY_SIZE=$${mem_size} - $(MAKE) deploy-hdfs - python -c 'from cdk import cli; cli.benchmark_hdfs_indexing()' - done - -bench-search-term: - export QW_LAMBDA_ENABLE_VERBOSE_JSON_LOGS=true - mem_sizes=( 1024 2048 4096 8192 ) - for mem_size in "$${mem_sizes[@]}" - do - export SEARCHER_MEMORY_SIZE=$${mem_size} - $(MAKE) deploy-hdfs - python -c 'from cdk import cli; cli.benchmark_hdfs_search("""{"query": "severity_text:ERROR", "max_hits": 10}""")' - done - -bench-search-histogram: - export QW_LAMBDA_ENABLE_VERBOSE_JSON_LOGS=true - mem_sizes=( 1024 2048 4096 8192 ) - for mem_size in "$${mem_sizes[@]}" - do - export SEARCHER_MEMORY_SIZE=$${mem_size} - $(MAKE) deploy-hdfs - python -c 'from cdk import cli; cli.benchmark_hdfs_search("""{ "query": "*", "max_hits": 0, "aggs": { "events": { "date_histogram": { "field": "timestamp", "fixed_interval": "1d" }, "aggs": { "log_level": { "terms": { "size": 10, "field": "severity_text", "order": { "_count": "desc" } } } } } } }""")' - done - -bench-search: - for run in {1..30} - do - export QW_LAMBDA_PARTIAL_REQUEST_CACHE_CAPACITY=0 - $(MAKE) bench-search-term - $(MAKE) bench-search-histogram - export QW_LAMBDA_PARTIAL_REQUEST_CACHE_CAPACITY=64MB - $(MAKE) bench-search-term - $(MAKE) bench-search-histogram - done - -test-mock-data-endpoints: - python -c 'from cdk import cli; cli.test_mock_data_endpoints()' diff --git a/distribution/lambda/Pipfile b/distribution/lambda/Pipfile deleted file mode 100644 index a82f12135a2..00000000000 --- a/distribution/lambda/Pipfile +++ /dev/null @@ -1,23 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[packages] -cdk = {file = "cdk", editable = true} -aws-cdk-lib = "2.95.1" -cargo-lambda = "1.1.0" -constructs = "10.3.0" -pyyaml = "6.0.1" -black = "24.3.0" -boto3 = "1.28.59" -mypy = "1.7.0" -ziglang = "0.11.0" - -# types -boto3-stubs = "1.28.59" -types-requests = "2.31.0.2" -types-pyyaml = "6.0.12.11" - -[requires] -python_version = "3.10" diff --git a/distribution/lambda/Pipfile.lock b/distribution/lambda/Pipfile.lock deleted file mode 100644 index 1f7189cf900..00000000000 --- a/distribution/lambda/Pipfile.lock +++ /dev/null @@ -1,441 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "1ff8dd1cf56643a683dac32ad8cc42e8d2d24474dc2fd98b2c1b78c26203b829" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.10" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "attrs": { - "hashes": [ - "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30", - "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1" - ], - "markers": "python_version >= '3.7'", - "version": "==23.2.0" - }, - "aws-cdk-lib": { - "hashes": [ - "sha256:3d16664234db35cfe8731390f67f13fd155ba9482c1c0207e2c04c123a73e0a1", - "sha256:f99fc196fb233c09f73eb5699bd6ff9d6aac75496cc01dc1edb589e25a2d69c1" - ], - "index": "pypi", - "markers": "python_version ~= '3.7'", - "version": "==2.95.1" - }, - "aws-cdk.asset-awscli-v1": { - "hashes": [ - "sha256:3ef87d6530736b3a7b0f777fe3b4297994dd40c3ce9306d95f80f48fb18036e8", - "sha256:96205ea2e5e132ec52fabfff37ea25b9b859498f167d05b32564c949822cd331" - ], - "markers": "python_version ~= '3.8'", - "version": "==2.2.202" - }, - "aws-cdk.asset-kubectl-v20": { - "hashes": [ - "sha256:346283e43018a43e3b3ca571de3f44e85d49c038dc20851894cb8f9b2052b164", - "sha256:7f0617ab6cb942b066bd7174bf3e1f377e57878c3e1cddc21d6b2d13c92d0cc1" - ], - "markers": "python_version ~= '3.7'", - "version": "==2.1.2" - }, - "aws-cdk.asset-node-proxy-agent-v6": { - "hashes": [ - "sha256:b62cb10c69a42cab135e6bc670e3d2d3121fd4f53a0f61e53449da4b12738a6f", - "sha256:ef2ff0634ab037e2ebddbe69d7c92515a847c6c8bb2abdfc85b089f5e87761cb" - ], - "markers": "python_version ~= '3.8'", - "version": "==2.0.3" - }, - "black": { - "hashes": [ - "sha256:2818cf72dfd5d289e48f37ccfa08b460bf469e67fb7c4abb07edc2e9f16fb63f", - "sha256:41622020d7120e01d377f74249e677039d20e6344ff5851de8a10f11f513bf93", - "sha256:4acf672def7eb1725f41f38bf6bf425c8237248bb0804faa3965c036f7672d11", - "sha256:4be5bb28e090456adfc1255e03967fb67ca846a03be7aadf6249096100ee32d0", - "sha256:4f1373a7808a8f135b774039f61d59e4be7eb56b2513d3d2f02a8b9365b8a8a9", - "sha256:56f52cfbd3dabe2798d76dbdd299faa046a901041faf2cf33288bc4e6dae57b5", - "sha256:65b76c275e4c1c5ce6e9870911384bff5ca31ab63d19c76811cb1fb162678213", - "sha256:65c02e4ea2ae09d16314d30912a58ada9a5c4fdfedf9512d23326128ac08ac3d", - "sha256:6905238a754ceb7788a73f02b45637d820b2f5478b20fec82ea865e4f5d4d9f7", - "sha256:79dcf34b33e38ed1b17434693763301d7ccbd1c5860674a8f871bd15139e7837", - "sha256:7bb041dca0d784697af4646d3b62ba4a6b028276ae878e53f6b4f74ddd6db99f", - "sha256:7d5e026f8da0322b5662fa7a8e752b3fa2dac1c1cbc213c3d7ff9bdd0ab12395", - "sha256:9f50ea1132e2189d8dff0115ab75b65590a3e97de1e143795adb4ce317934995", - "sha256:a0c9c4a0771afc6919578cec71ce82a3e31e054904e7197deacbc9382671c41f", - "sha256:aadf7a02d947936ee418777e0247ea114f78aff0d0959461057cae8a04f20597", - "sha256:b5991d523eee14756f3c8d5df5231550ae8993e2286b8014e2fdea7156ed0959", - "sha256:bf21b7b230718a5f08bd32d5e4f1db7fc8788345c8aea1d155fc17852b3410f5", - "sha256:c45f8dff244b3c431b36e3224b6be4a127c6aca780853574c00faf99258041eb", - "sha256:c7ed6668cbbfcd231fa0dc1b137d3e40c04c7f786e626b405c62bcd5db5857e4", - "sha256:d7de8d330763c66663661a1ffd432274a2f92f07feeddd89ffd085b5744f85e7", - "sha256:e19cb1c6365fd6dc38a6eae2dcb691d7d83935c10215aef8e6c38edee3f77abd", - "sha256:e2af80566f43c85f5797365077fb64a393861a3730bd110971ab7a0c94e873e7" - ], - "index": "pypi", - "markers": "python_version >= '3.8'", - "version": "==24.3.0" - }, - "boto3": { - "hashes": [ - "sha256:7623b52c135becf145f762a9cc4203a1fb30055bb1cc7a254f82e5f7954d44a1", - "sha256:aa861e5568a564a5ce2fff5413d6ae2cda0eed7399b3a949bc861a20915e2046" - ], - "index": "pypi", - "markers": "python_version >= '3.7'", - "version": "==1.28.59" - }, - "boto3-stubs": { - "hashes": [ - "sha256:4d8f4387e128561413a54944396a3d316b3113aa9c74ccf34ff850c2e0e4bb9a", - "sha256:921e5ef6acf22736191087d7fef08d095353310b752d4f341d975f0965fe176b" - ], - "index": "pypi", - "markers": "python_version >= '3.7'", - "version": "==1.28.59" - }, - "botocore": { - "hashes": [ - "sha256:b8f35d65f2b45af50c36fc25cc1844d6bd61d38d2148b2ef133b8f10e198555d", - "sha256:ce58e688222df73ec5691f934be1a2122a52c9d11d3037b586b3fff16ed6d25f" - ], - "markers": "python_version >= '3.7'", - "version": "==1.31.85" - }, - "botocore-stubs": { - "hashes": [ - "sha256:64d80a3467e3b19939e9c2750af33328b3087f8f524998dbdf7ed168227f507d", - "sha256:b0345f55babd8b901c53804fc5c326a4a0bd2e23e3b71f9ea5d9f7663466e6ba" - ], - "markers": "python_version >= '3.8' and python_version < '4.0'", - "version": "==1.34.94" - }, - "cargo-lambda": { - "hashes": [ - "sha256:43d2d5b7d175bb4ad1fa92e520ca381213ab6725b41aff87f5bf9515c3912c43", - "sha256:4877adea6c25b5db94cf14dab286370de6fc69822d242527076ebf2c733f5dd6", - "sha256:504c549b07e0801b56d99c52224c8ec89df21e728c5290542512b61099190f12", - "sha256:50d375944be1051009c4ce24cd927302c3d8b260217ad41aaf643a014a02e696", - "sha256:51c29d9873a608f01f25192e0d0dd4c6e2eb364658cc9cc0d028d0b1186d1c44", - "sha256:55a30f4fe3c6c1d86ff7d9936605ea847d3b98f1f34c87577d58c646b69eed81", - "sha256:a6a5463821ed5e113feec15837ab9201fc75671344a442dd19e407648d085550", - "sha256:b3e402961550aee058f830dc41b53294fd00aeb3620c08f1af4f65468dbe6d13" - ], - "index": "pypi", - "version": "==1.1.0" - }, - "cattrs": { - "hashes": [ - "sha256:0341994d94971052e9ee70662542699a3162ea1e0c62f7ce1b4a57f563685108", - "sha256:a934090d95abaa9e911dac357e3a8699e0b4b14f8529bcc7d2b1ad9d51672b9f" - ], - "markers": "python_version >= '3.8'", - "version": "==23.2.3" - }, - "cdk": { - "editable": true, - "file": "cdk" - }, - "click": { - "hashes": [ - "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", - "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" - ], - "markers": "python_version >= '3.7'", - "version": "==8.1.7" - }, - "constructs": { - "hashes": [ - "sha256:2972f514837565ff5b09171cfba50c0159dfa75ee86a42921ea8c86f2941b3d2", - "sha256:518551135ec236f9cc6b86500f4fbbe83b803ccdc6c2cb7684e0b7c4d234e7b1" - ], - "index": "pypi", - "markers": "python_version ~= '3.7'", - "version": "==10.3.0" - }, - "exceptiongroup": { - "hashes": [ - "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad", - "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16" - ], - "markers": "python_version < '3.11'", - "version": "==1.2.1" - }, - "importlib-resources": { - "hashes": [ - "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c", - "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145" - ], - "markers": "python_version >= '3.8'", - "version": "==6.4.0" - }, - "jmespath": { - "hashes": [ - "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", - "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe" - ], - "markers": "python_version >= '3.7'", - "version": "==1.0.1" - }, - "jsii": { - "hashes": [ - "sha256:3067d523126ce8178374dd958c60350efc831fc2ef3eb94a0a755d64fa4cc22d", - "sha256:64bbaf9c494626bc0afd1b95834f0dba66a2f2ecbb0da97fa3000c4b01d67857" - ], - "markers": "python_version ~= '3.8'", - "version": "==1.98.0" - }, - "mypy": { - "hashes": [ - "sha256:0e81ffd120ee24959b449b647c4b2fbfcf8acf3465e082b8d58fd6c4c2b27e46", - "sha256:185cff9b9a7fec1f9f7d8352dff8a4c713b2e3eea9c6c4b5ff7f0edf46b91e41", - "sha256:1e280b5697202efa698372d2f39e9a6713a0395a756b1c6bd48995f8d72690dc", - "sha256:1fe46e96ae319df21359c8db77e1aecac8e5949da4773c0274c0ef3d8d1268a9", - "sha256:2b53655a295c1ed1af9e96b462a736bf083adba7b314ae775563e3fb4e6795f5", - "sha256:551d4a0cdcbd1d2cccdcc7cb516bb4ae888794929f5b040bb51aae1846062901", - "sha256:55d28d7963bef00c330cb6461db80b0b72afe2f3c4e2963c99517cf06454e665", - "sha256:5da84d7bf257fd8f66b4f759a904fd2c5a765f70d8b52dde62b521972a0a2357", - "sha256:6cb8d5f6d0fcd9e708bb190b224089e45902cacef6f6915481806b0c77f7786d", - "sha256:7a7b1e399c47b18feb6f8ad4a3eef3813e28c1e871ea7d4ea5d444b2ac03c418", - "sha256:870bd1ffc8a5862e593185a4c169804f2744112b4a7c55b93eb50f48e7a77010", - "sha256:87c076c174e2c7ef8ab416c4e252d94c08cd4980a10967754f91571070bf5fbe", - "sha256:96650d9a4c651bc2a4991cf46f100973f656d69edc7faf91844e87fe627f7e96", - "sha256:a3637c03f4025f6405737570d6cbfa4f1400eb3c649317634d273687a09ffc2f", - "sha256:a79cdc12a02eb526d808a32a934c6fe6df07b05f3573d210e41808020aed8b5d", - "sha256:b633f188fc5ae1b6edca39dae566974d7ef4e9aaaae00bc36efe1f855e5173ac", - "sha256:bf7a2f0a6907f231d5e41adba1a82d7d88cf1f61a70335889412dec99feeb0f8", - "sha256:c1b06b4b109e342f7dccc9efda965fc3970a604db70f8560ddfdee7ef19afb05", - "sha256:cddee95dea7990e2215576fae95f6b78a8c12f4c089d7e4367564704e99118d3", - "sha256:d01921dbd691c4061a3e2ecdbfbfad029410c5c2b1ee88946bf45c62c6c91210", - "sha256:d0fa29919d2e720c8dbaf07d5578f93d7b313c3e9954c8ec05b6d83da592e5d9", - "sha256:d6ed9a3997b90c6f891138e3f83fb8f475c74db4ccaa942a1c7bf99e83a989a1", - "sha256:d93e76c2256aa50d9c82a88e2f569232e9862c9982095f6d54e13509f01222fc", - "sha256:df67fbeb666ee8828f675fee724cc2cbd2e4828cc3df56703e02fe6a421b7401", - "sha256:f29386804c3577c83d76520abf18cfcd7d68264c7e431c5907d250ab502658ee", - "sha256:f65f385a6f43211effe8c682e8ec3f55d79391f70a201575def73d08db68ead1", - "sha256:fc9fe455ad58a20ec68599139ed1113b21f977b536a91b42bef3ffed5cce7391" - ], - "index": "pypi", - "markers": "python_version >= '3.8'", - "version": "==1.7.0" - }, - "mypy-extensions": { - "hashes": [ - "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", - "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782" - ], - "markers": "python_version >= '3.5'", - "version": "==1.0.0" - }, - "packaging": { - "hashes": [ - "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5", - "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9" - ], - "markers": "python_version >= '3.7'", - "version": "==24.0" - }, - "pathspec": { - "hashes": [ - "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", - "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712" - ], - "markers": "python_version >= '3.8'", - "version": "==0.12.1" - }, - "platformdirs": { - "hashes": [ - "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee", - "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3" - ], - "markers": "python_version >= '3.8'", - "version": "==4.2.2" - }, - "publication": { - "hashes": [ - "sha256:0248885351febc11d8a1098d5c8e3ab2dabcf3e8c0c96db1e17ecd12b53afbe6", - "sha256:68416a0de76dddcdd2930d1c8ef853a743cc96c82416c4e4d3b5d901c6276dc4" - ], - "version": "==0.0.3" - }, - "python-dateutil": { - "hashes": [ - "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", - "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.9.0.post0" - }, - "pyyaml": { - "hashes": [ - "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", - "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", - "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", - "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", - "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", - "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", - "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595", - "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", - "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", - "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", - "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", - "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", - "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", - "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", - "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", - "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", - "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", - "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6", - "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", - "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", - "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", - "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", - "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", - "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", - "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", - "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", - "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", - "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", - "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", - "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef", - "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", - "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd", - "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3", - "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0", - "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515", - "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c", - "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c", - "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924", - "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34", - "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", - "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", - "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", - "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", - "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", - "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", - "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", - "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", - "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", - "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585", - "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d", - "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f" - ], - "index": "pypi", - "markers": "python_version >= '3.6'", - "version": "==6.0.1" - }, - "s3transfer": { - "hashes": [ - "sha256:10d6923c6359175f264811ef4bf6161a3156ce8e350e705396a7557d6293c33a", - "sha256:fd3889a66f5fe17299fe75b82eae6cf722554edca744ca5d5fe308b104883d2e" - ], - "markers": "python_version >= '3.7'", - "version": "==0.7.0" - }, - "six": { - "hashes": [ - "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", - "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==1.16.0" - }, - "tomli": { - "hashes": [ - "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", - "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" - ], - "markers": "python_version < '3.11'", - "version": "==2.0.1" - }, - "typeguard": { - "hashes": [ - "sha256:00edaa8da3a133674796cf5ea87d9f4b4c367d77476e185e80251cc13dfbb8c4", - "sha256:5e3e3be01e887e7eafae5af63d1f36c849aaa94e3a0112097312aabfa16284f1" - ], - "markers": "python_full_version >= '3.5.3'", - "version": "==2.13.3" - }, - "types-awscrt": { - "hashes": [ - "sha256:3ae374b553e7228ba41a528cf42bd0b2ad7303d806c73eff4aaaac1515e3ea4e", - "sha256:64898a2f4a2468f66233cb8c29c5f66de907cf80ba1ef5bb1359aef2f81bb521" - ], - "markers": "python_version >= '3.7' and python_version < '4.0'", - "version": "==0.20.9" - }, - "types-pyyaml": { - "hashes": [ - "sha256:7d340b19ca28cddfdba438ee638cd4084bde213e501a3978738543e27094775b", - "sha256:a461508f3096d1d5810ec5ab95d7eeecb651f3a15b71959999988942063bf01d" - ], - "index": "pypi", - "version": "==6.0.12.11" - }, - "types-requests": { - "hashes": [ - "sha256:56d181c85b5925cbc59f4489a57e72a8b2166f18273fd8ba7b6fe0c0b986f12a", - "sha256:6aa3f7faf0ea52d728bb18c0a0d1522d9bfd8c72d26ff6f61bfc3d06a411cf40" - ], - "index": "pypi", - "version": "==2.31.0.2" - }, - "types-s3transfer": { - "hashes": [ - "sha256:02154cce46528287ad76ad1a0153840e0492239a0887e8833466eccf84b98da0", - "sha256:49a7c81fa609ac1532f8de3756e64b58afcecad8767933310228002ec7adff74" - ], - "markers": "python_version >= '3.8' and python_version < '4.0'", - "version": "==0.10.1" - }, - "types-urllib3": { - "hashes": [ - "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f", - "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e" - ], - "version": "==1.26.25.14" - }, - "typing-extensions": { - "hashes": [ - "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0", - "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a" - ], - "markers": "python_version < '3.12'", - "version": "==4.11.0" - }, - "urllib3": { - "hashes": [ - "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84", - "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e" - ], - "markers": "python_version >= '3.10'", - "version": "==2.0.7" - }, - "ziglang": { - "hashes": [ - "sha256:038b95cac9adef0c6dce9b72bdad895a0e4e0654c77c4a8f84fe79d2909a366e", - "sha256:45e8116428267e20529b9ee43a7e7364791c1a092845d2143b248a1dbf6760b0", - "sha256:4f848c8cca520cb12357cfa3d303bf1149a30566f4c1e5999284dbdf921cc2b8", - "sha256:5fe81f91fd872fc32ed0f82807df6c680a82cbea56a9f24f818e9da299049022", - "sha256:97ac4312a358d2a4ba2c153fdb1827caf6bc158501a468ebd6a554b50edee42e", - "sha256:a7edc7020e7ffbbb3af3a40c17a9bda65d5a65132ff933e153ffa80d8f5ad731", - "sha256:bd046eeab97ad51048575768f6dae10468b3a4449f4467ed61dae621faf6ee55", - "sha256:d6372bada34714a5395539cc4d76e9cc6062739cee5ce9949a250f7c525ceb94" - ], - "index": "pypi", - "markers": "python_version ~= '3.5'", - "version": "==0.11.0" - } - }, - "develop": {} -} diff --git a/distribution/lambda/README.md b/distribution/lambda/README.md deleted file mode 100644 index 5aee77e6643..00000000000 --- a/distribution/lambda/README.md +++ /dev/null @@ -1,174 +0,0 @@ - -# Quickwit on AWS Lambda - -- Get the latest [Lambda binaries](https://github.com/quickwit-oss/quickwit/releases/tag/aws-lambda-beta-01) -- Read the [beta release announcement](https://quickwit.io/blog/quickwit-lambda-beta) - -## Quickstart - -- [Search on 20 million log dataset on S3 with lambda](https://quickwit.io/docs/get-started/tutorials/tutorial-aws-lambda-simple) -- [E2E use case with HTTP API](https://quickwit.io/docs/guides/e2e-serverless-aws-lambda) - -## Build and run yourself - -### Prerequisites - -- Install AWS CDK Toolkit (cdk command) - - `npm install -g aws-cdk` -- Ensure `curl` and `make` are installed -- To run the invocation example `make` commands, you will also need Python 3.10 - or later and `pip` installed (see [Python venv](#python-venv) below). - -### AWS Lambda service quotas - -For newly created AWS accounts, a conservative quota of 10 concurrent executions -is applied to Lambda in each individual region. If that's the case, CDK won't be -able to apply the reserved concurrency of the indexing Quickwit lambda. You can -increase the quota without charge using the [Service Quotas -console](https://console.aws.amazon.com/servicequotas/home/services/lambda/quotas). - -> **Note:** The request can take hours or even days to be processed. - -### Python venv - -The Python environment is configured using pipenv: - -``` -# Install pipenv if needed. -pip install --user pipenv -pipenv shell -pipenv install -``` - -### Example stacks - -Provided demonstration setups: -- HDFS example data: index the [HDFS - dataset](https://quickwit-datasets-public.s3.amazonaws.com/hdfs-logs-multitenants-10000.json) - by triggering the Quickwit lambda manually. -- Mock Data generator: start a mock data generator lambda that pushes mock JSON - data every X minutes to S3. Those files trigger the Quickwit indexer lambda - automatically. - -### Deploy and run - -The Makefile is a useful entrypoint to show how the Lambda deployment can used. - -Configure your shell and AWS account: -```bash -# replace with your AWS account ID and preferred region -export CDK_ACCOUNT=123456789 -export CDK_REGION=us-east-1 -make bootstrap -``` - -Deploy, index and query the HDFS dataset: -```bash -make deploy-hdfs -make invoke-hdfs-indexer -make invoke-hdfs-searcher -``` - -Deploy the mock data generator and query the indexed data: -```bash -make deploy-mock-data -# wait a few minutes... -make invoke-mock-data-searcher -``` - -### Configurations - -The following environment variables can be configured on the Lambda functions. -Note that only a small subset of all Quickwit configurations are exposed to -simplify the setup and avoid unstable deployments. - -| Variable | Description | Default | -|---|---|---| -| QW_LAMBDA_INDEX_ID | the index this Lambda interacts with (one and only one) | required | -| QW_LAMBDA_METASTORE_BUCKET | bucket name for metastore files | required | -| QW_LAMBDA_INDEX_BUCKET | bucket name for split files | required | -| QW_LAMBDA_OPENTELEMETRY_URL | HTTP OTEL tracing collector endpoint | none, OTEL disabled | -| QW_LAMBDA_OPENTELEMETRY_AUTHORIZATION | Authorization header value for HTTP OTEL calls | none, OTEL disabled | -| QW_LAMBDA_ENABLE_VERBOSE_JSON_LOGS | true to enable JSON logging of spans and logs in Cloudwatch | false | -| RUST_LOG | [Rust logging config][1] | info | - -[1]: https://rust-lang-nursery.github.io/rust-cookbook/development_tools/debugging/config_log.html - - -> [!TIP] -> The Indexer Lambda's logging is quite verbose. To reduce the associated -> CloudWatch costs, you can disable some lower level logs by setting the -> `RUST_LOG` environment variable to `info,quickwit_actors=warn`, or disable -> INFO logs altogether by setting `RUST_LOG=warn`. - -Indexer only: -| Variable | Description | Default | -|---|---|---| -| QW_LAMBDA_INDEX_CONFIG_URI | location of the index configuration file, e.g `s3://mybucket/index-config.yaml` | required | -| QW_LAMBDA_DISABLE_MERGE | true to disable compaction merges | false | -| QW_LAMBDA_DISABLE_JANITOR | true to disable retention enforcement and garbage collection | false | -| QW_LAMBDA_MAX_CHECKPOINTS | maximum number of ingested file names to keep in source history | 100 | - -Searcher only: -| Variable | Description | Default | -|---|---|---| -| QW_LAMBDA_SEARCHER_METASTORE_POLLING_INTERVAL_SECONDS | refresh interval of the metastore | 60 | -| QW_LAMBDA_PARTIAL_REQUEST_CACHE_CAPACITY | `searcher.partial_request_cache_capacity` node config | 64M | - - -### Set up a search API - -You can configure an HTTP API endpoint around the Quickwit Searcher Lambda. The -mock data example stack shows such a configuration. The API Gateway is enabled -when the `SEARCHER_API_KEY` environment variable is set: - -```bash -SEARCHER_API_KEY=my-at-least-20-char-long-key make deploy-mock-data -``` - -> [!WARNING] -> The API key is stored in plain text in the CDK stack. For a real world -> deployment, the key should be fetched from something like [AWS Secrets -> Manager](https://docs.aws.amazon.com/cdk/v2/guide/get_secrets_manager_value.html). - -Note that the response is always gzipped compressed, regardless of the -`Accept-Encoding` request header: - -```bash -curl -d '{"query":"quantity:>5", "max_hits": 10}' -H "Content-Type: application/json" -H "x-api-key: my-at-least-20-char-long-key" -X POST https://{api_id}.execute-api.{region}.amazonaws.com/api/v1/mock-sales/search --compressed -``` - -### Useful CDK commands - - * `cdk ls` list all stacks in the app - * `cdk synth` emits the synthesized CloudFormation template - * `cdk deploy` deploy this stack to your default AWS account/region - * `cdk diff` compare deployed stack with current state - * `cdk docs` open CDK documentation - -### Grafana data source setup - -You can query and visualize the Quickwit Searcher Lambda from Grafana by using the [Quickwit data source for Grafana](https://grafana.com/grafana/plugins/quickwit-quickwit-datasource/). - -#### Prerequisites - -- [Set up HTTP API endpoint for Quickwit Searcher Lambda](#set-up-a-search-api) -- [Install Quickwit data source plugin on Grafana](https://github.com/quickwit-oss/quickwit-datasource#installation) - -#### Configure Grafana data source - -If you don't have a Grafana instance running yet, you can start one with the Quickwit plugin installed using Docker: - -```bash -docker run -e GF_INSTALL_PLUGINS="quickwit-quickwit-datasource" -p 3000:3000 grafana/grafana -``` - -In the `Connections > Data sources` page, add a new Quickwit data source and configure the following settings: - -|Variable|Description|Example| -|--|--|--| -|HTTP URL| HTTP search endpoint for Quickwit Searcher Lambda | https://*******.execute-api.us-east-1.amazonaws.com/api/v1 | -|Custom HTTP Headers| If you configure API Gateway to require an API key, set `x-api-key` HTTP Header | Header: `x-api-key`
Value: API key value| -|Index ID| Same as `QW_LAMBDA_INDEX_ID` | hdfs-logs | - -After entering these values, click "Save & test". You can now query your Quickwit Lambda from Grafana! diff --git a/distribution/lambda/cdk/__init__.py b/distribution/lambda/cdk/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/distribution/lambda/cdk/app.py b/distribution/lambda/cdk/app.py deleted file mode 100755 index d9a4fe9d52e..00000000000 --- a/distribution/lambda/cdk/app.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 -import os -from typing import Literal - -import aws_cdk as cdk - -from stacks.services.quickwit_service import DEFAULT_LAMBDA_MEMORY_SIZE -from stacks.examples.hdfs_stack import HdfsStack -from stacks.examples.mock_data_stack import MockDataStack - -HDFS_STACK_NAME = "HdfsStack" -MOCK_DATA_STACK_NAME = "MockDataStack" - - -def package_location_from_env(type: Literal["searcher"] | Literal["indexer"]) -> str: - path_var = f"{type.upper()}_PACKAGE_PATH" - if path_var in os.environ: - return os.environ[path_var] - else: - print( - f"Could not infer the {type} package location. Configure it using the {path_var} environment variable" - ) - exit(1) - - -app = cdk.App() - -HdfsStack( - app, - HDFS_STACK_NAME, - env=cdk.Environment( - account=os.getenv("CDK_ACCOUNT"), region=os.getenv("CDK_REGION") - ), - indexer_memory_size=int( - os.environ.get("INDEXER_MEMORY_SIZE", DEFAULT_LAMBDA_MEMORY_SIZE) - ), - searcher_memory_size=int( - os.environ.get("SEARCHER_MEMORY_SIZE", DEFAULT_LAMBDA_MEMORY_SIZE) - ), - indexer_package_location=package_location_from_env("indexer"), - searcher_package_location=package_location_from_env("searcher"), -) - -MockDataStack( - app, - MOCK_DATA_STACK_NAME, - env=cdk.Environment( - account=os.getenv("CDK_ACCOUNT"), region=os.getenv("CDK_REGION") - ), - indexer_package_location=package_location_from_env("indexer"), - searcher_package_location=package_location_from_env("searcher"), - search_api_key=os.getenv("SEARCHER_API_KEY", None), - data_generation_interval_sec=int(os.getenv("DATA_GENERATION_INTERVAL_SEC", 300)), -) - -app.synth() diff --git a/distribution/lambda/cdk/cdk.json b/distribution/lambda/cdk/cdk.json deleted file mode 100644 index ed7ea2f776b..00000000000 --- a/distribution/lambda/cdk/cdk.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "app": "python3 app.py", - "watch": { - "include": [ - "**" - ], - "exclude": [ - "README.md", - "cdk*.json", - "requirements*.txt", - "**/__init__.py", - "python/__pycache__", - "tests" - ] - }, - "context": { - "@aws-cdk/aws-lambda:recognizeLayerVersion": true, - "@aws-cdk/core:checkSecretUsage": true, - "@aws-cdk/core:target-partitions": [ - "aws", - "aws-cn" - ], - "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, - "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, - "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, - "@aws-cdk/aws-iam:minimizePolicies": true, - "@aws-cdk/core:validateSnapshotRemovalPolicy": true, - "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, - "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, - "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, - "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, - "@aws-cdk/core:enablePartitionLiterals": true, - "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, - "@aws-cdk/aws-iam:standardizedServicePrincipals": true, - "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, - "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, - "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, - "@aws-cdk/aws-route53-patters:useCertificate": true, - "@aws-cdk/customresources:installLatestAwsSdkDefault": false, - "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, - "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, - "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, - "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, - "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, - "@aws-cdk/aws-redshift:columnId": true, - "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, - "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, - "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, - "@aws-cdk/aws-kms:aliasNameRef": true, - "@aws-cdk/aws-autoscaling:generateLaunchTemplateInsteadOfLaunchConfig": true, - "@aws-cdk/core:includePrefixInUniqueNameGeneration": true, - "@aws-cdk/aws-efs:denyAnonymousAccess": true, - "@aws-cdk/aws-opensearchservice:enableOpensearchMultiAzWithStandby": true, - "@aws-cdk/aws-lambda-nodejs:useLatestRuntimeVersion": true, - "@aws-cdk/aws-efs:mountTargetOrderInsensitiveLogicalId": true - } -} diff --git a/distribution/lambda/cdk/cli.py b/distribution/lambda/cdk/cli.py deleted file mode 100644 index ecb3ffdb155..00000000000 --- a/distribution/lambda/cdk/cli.py +++ /dev/null @@ -1,465 +0,0 @@ -"""Helper scripts to test and explore the deployed infrastructure. - -These functions are wrapped by the Makefile for convenience.""" - -import base64 -import gzip -import http.client -import json -import os -import re -import subprocess -import tempfile -import time -from dataclasses import dataclass -from functools import cache -from io import BytesIO -from urllib.parse import urlparse - -import boto3 -import botocore.config -import botocore.exceptions -from . import app -from stacks.examples import hdfs_stack, mock_data_stack - -region = os.environ["CDK_REGION"] - -example_host = "quickwit-datasets-public.s3.amazonaws.com" -example_hdfs_file = "hdfs-logs-multitenants.json.gz" -INDEXING_BOTO_CONFIG = botocore.config.Config( - retries={"max_attempts": 0}, read_timeout=60 * 15 -) -session = boto3.Session(region_name=region) -mock_sales_index_id = "mock-sales" -hdfs_logs_index_id = "hdfs-logs" - - -@cache -def _get_cloudformation_output_value(stack_name: str, export_name: str) -> str: - client = session.client("cloudformation") - stacks = client.describe_stacks(StackName=stack_name)["Stacks"] - if len(stacks) != 1: - print(f"Stack {stack_name} not identified uniquely, found {stacks}") - outputs = stacks[0]["Outputs"] - for output in outputs: - if "ExportName" in output and output["ExportName"] == export_name: - return output["OutputValue"] - else: - print(f"Export name {export_name} not found in stack {stack_name}") - exit(1) - - -def _decompress_if_gzip(payload: bytes, headers: dict) -> str: - if headers.get("content-encoding", "") == "gzip": - return gzip.GzipFile(mode="rb", fileobj=BytesIO(payload)).read().decode() - else: - return payload.decode() - - -@dataclass -class LambdaResult: - function_error: str - log_tail: str - payload: str - raw_size_bytes: int - status_code: int - - @staticmethod - def from_lambda_response(lambda_resp: dict) -> "LambdaResult": - payload = lambda_resp["Payload"].read().decode() - return LambdaResult( - function_error=lambda_resp.get("FunctionError", ""), - log_tail=base64.b64decode(lambda_resp["LogResult"]).decode(), - payload=payload, - raw_size_bytes=len(payload), - status_code=0, - ) - - @staticmethod - def from_lambda_gateway_response(lambda_resp: dict) -> "LambdaResult": - gw_str = lambda_resp["Payload"].read().decode() - gw_obj = json.loads(gw_str) - if "body" in gw_obj: - payload = gw_obj["body"] - status_code = gw_obj["statusCode"] - else: - payload = gw_str - status_code = -1 - if gw_obj.get("isBase64Encoded", False): - dec_payload = base64.b64decode(payload) - payload = _decompress_if_gzip(dec_payload, gw_obj.get("headers", {})) - return LambdaResult( - function_error=lambda_resp.get("FunctionError", ""), - log_tail=base64.b64decode(lambda_resp["LogResult"]).decode(), - payload=payload, - raw_size_bytes=len(gw_str), - status_code=status_code, - ) - - def extract_report(self) -> str: - """Expect "REPORT RequestId: xxx Duration: yyy..." to be in log tail""" - for line in reversed(self.log_tail.strip().splitlines()): - if line.startswith("REPORT"): - return line - else: - raise ValueError(f"Could not find report in log tail") - - def request_id(self) -> str: - report = self.extract_report() - match = re.search(r"RequestId: ([0-9a-z\-]+)", report) - if match: - return match.group(1) - else: - raise ValueError(f"Could not find RequestId in report: {report}") - - -def _format_lambda_output( - lambda_result: LambdaResult, duration=None, max_resp_size=10 * 1000 -): - if lambda_result.function_error != "": - print("\n## FUNCTION ERROR:") - print(lambda_result.function_error) - print("\n## RAW RESPONSE SIZE (BYTES):") - if len(lambda_result.payload) == 0: - ratio = "empty payload" - else: - ratio = f"{(lambda_result.raw_size_bytes / len(lambda_result.payload)):.1f}x the final payload" - print(f"{lambda_result.raw_size_bytes} ({ratio})") - print(f"\n## RESPONSE [{lambda_result.status_code}]:") - payload_size = len(lambda_result.payload) - print(lambda_result.payload[:max_resp_size]) - if payload_size > max_resp_size: - print(f"Response too long ({payload_size}), truncated to {max_resp_size} bytes") - - if duration is not None: - print("\n## TOTAL INVOCATION DURATION:") - print(duration) - - -def upload_hdfs_src_file(): - bucket_name = _get_cloudformation_output_value( - app.HDFS_STACK_NAME, hdfs_stack.INDEX_STORE_BUCKET_NAME_EXPORT_NAME - ) - uri = f"s3://{bucket_name}/{example_hdfs_file}" - try: - resp = session.client("s3").head_object( - Bucket=bucket_name, Key=example_hdfs_file - ) - print(f"{uri} already exists ({resp['ContentLength']} bytes), skipping upload") - return - except botocore.exceptions.ClientError as e: - if e.response["Error"]["Code"] != "404": - raise e - print(f"download dataset https://{example_host}/{example_hdfs_file}") - conn = http.client.HTTPSConnection(example_host) - conn.request("GET", f"/{example_hdfs_file}") - response = conn.getresponse() - if response.status != 200: - print(f"Failed to fetch dataset") - exit(1) - with tempfile.NamedTemporaryFile() as tmp: - while True: - chunk = response.read(1024 * 1024) - if len(chunk) == 0: - break - tmp.write(chunk) - tmp.flush() - print(f"downloaded {tmp.tell()} bytes") - print(f"upload dataset to {uri}") - session.client("s3").upload_file( - Bucket=bucket_name, Filename=tmp.name, Key=example_hdfs_file - ) - - -def invoke_hdfs_indexer() -> LambdaResult: - function_name = _get_cloudformation_output_value( - app.HDFS_STACK_NAME, hdfs_stack.INDEXER_FUNCTION_NAME_EXPORT_NAME - ) - print(f"indexer function name: {function_name}") - bucket_name = _get_cloudformation_output_value( - app.HDFS_STACK_NAME, hdfs_stack.INDEX_STORE_BUCKET_NAME_EXPORT_NAME - ) - source_uri = f"s3://{bucket_name}/{example_hdfs_file}" - print(f"src_file: {source_uri}") - invoke_start = time.time() - resp = session.client("lambda", config=INDEXING_BOTO_CONFIG).invoke( - FunctionName=function_name, - InvocationType="RequestResponse", - LogType="Tail", - Payload=f"""{{ "source_uri": "{source_uri}" }}""", - ) - invoke_duration = time.time() - invoke_start - lambda_result = LambdaResult.from_lambda_response(resp) - _format_lambda_output(lambda_result, invoke_duration) - return lambda_result - - -def _invoke_searcher( - stack_name: str, - index_id: str, - function_export_name: str, - payload: str, - download_logs: bool, -) -> LambdaResult: - function_name = _get_cloudformation_output_value(stack_name, function_export_name) - client = session.client("lambda") - print(f"searcher function name: {function_name}") - invoke_start = time.time() - resp = client.invoke( - FunctionName=function_name, - InvocationType="RequestResponse", - LogType="Tail", - Payload=json.dumps( - { - "resource": f"/api/v1/{index_id}/search", - "path": f"/api/v1/{index_id}/search", - "httpMethod": "POST", - "headers": { - "Content-Type": "application/json", - }, - "requestContext": { - "httpMethod": "POST", - }, - "body": payload, - "isBase64Encoded": False, - } - ), - ) - invoke_duration = time.time() - invoke_start - lambda_result = LambdaResult.from_lambda_gateway_response(resp) - _format_lambda_output(lambda_result, invoke_duration) - if download_logs: - download_logs_to_file(lambda_result.request_id(), function_name, invoke_start) - return lambda_result - - -def invoke_hdfs_searcher(payload: str, download_logs: bool = True) -> LambdaResult: - return _invoke_searcher( - app.HDFS_STACK_NAME, - hdfs_logs_index_id, - hdfs_stack.SEARCHER_FUNCTION_NAME_EXPORT_NAME, - payload, - download_logs, - ) - - -def get_logs( - function_name: str, request_id: str, timestamp_unix_ms: int, timeout: float = 60 -): - print(f"Getting logs for requestId: {request_id}...") - client = session.client("logs") - log_group_name = f"/aws/lambda/{function_name}" - paginator = client.get_paginator("filter_log_events") - lower_time_bound = timestamp_unix_ms - 1000 * 3600 - upper_time_bound = timestamp_unix_ms + 1000 * 3600 - last_event_id = "" - last_event_found = True - start_time = time.time() - while time.time() - start_time < timeout: - describe_resp = client.describe_log_groups(logGroupNamePrefix=log_group_name) - group_names = [group["logGroupName"] for group in describe_resp["logGroups"]] - if log_group_name in group_names: - break - print(f"log group not found, retrying...") - time.sleep(3) - while time.time() - start_time < timeout: - for page in paginator.paginate( - logGroupName=log_group_name, - filterPattern=f"%{request_id}%", - startTime=lower_time_bound, - endTime=upper_time_bound, - ): - for event in page["events"]: - if last_event_found or event["eventId"] == last_event_id: - last_event_found = True - last_event_id = event["eventId"] - yield event["message"] - if event["message"].startswith("REPORT"): - last_event_id = "REPORT" - break - if last_event_id == "REPORT": - break - if last_event_id == "REPORT": - break - elif last_event_id == "": - print(f"no event found, retrying...") - else: - print(f"last event not found, retrying...") - last_event_found = False - time.sleep(3) - - else: - raise TimeoutError(f"Log collection timed out after {timeout}s") - - -def download_logs_to_file(request_id: str, function_name: str, invoke_start: float): - try: - with open(f"lambda.{request_id}.log", "w") as f: - for log in get_logs( - function_name, - request_id, - int(invoke_start * 1000), - ): - f.write(log) - print(f"Logs written to lambda.{request_id}.log") - except Exception as e: - print(f"Failed to download logs: {e}") - - -def invoke_mock_data_searcher(): - _invoke_searcher( - app.MOCK_DATA_STACK_NAME, - mock_sales_index_id, - mock_data_stack.SEARCHER_FUNCTION_NAME_EXPORT_NAME, - """{"query": "id:1", "sort_by": "ts", "max_hits": 10}""", - True, - ) - - -def _clean_s3_bucket(bucket_name: str, prefix: str = ""): - print(f"Cleaning up bucket {bucket_name}/{prefix}...") - s3 = session.resource("s3") - bucket = s3.Bucket(bucket_name) - try: - bucket.objects.filter(Prefix=prefix).delete() - except s3.meta.client.exceptions.NoSuchBucket: - print(f"Bucket {bucket_name} not found, skipping cleanup") - - -def empty_hdfs_bucket(): - bucket_name = _get_cloudformation_output_value( - app.HDFS_STACK_NAME, hdfs_stack.INDEX_STORE_BUCKET_NAME_EXPORT_NAME - ) - _clean_s3_bucket(bucket_name) - - -def empty_mock_data_buckets(): - bucket_name = _get_cloudformation_output_value( - app.MOCK_DATA_STACK_NAME, mock_data_stack.INDEX_STORE_BUCKET_NAME_EXPORT_NAME - ) - _clean_s3_bucket(bucket_name) - bucket_name = _get_cloudformation_output_value( - app.MOCK_DATA_STACK_NAME, mock_data_stack.SOURCE_BUCKET_NAME_EXPORT_NAME - ) - _clean_s3_bucket(bucket_name) - - -def print_mock_data_metastore(): - bucket_name = _get_cloudformation_output_value( - app.MOCK_DATA_STACK_NAME, mock_data_stack.INDEX_STORE_BUCKET_NAME_EXPORT_NAME - ) - s3 = session.client("s3") - response = s3.get_object( - Bucket=bucket_name, Key=f"index/{mock_sales_index_id}/metastore.json" - ) - print(response["Body"].read().decode()) - - -@cache -def _git_commit(): - return subprocess.run( - ["git", "describe", "--dirty"], check=True, capture_output=True, text=True - ).stdout.strip() - - -def benchmark_hdfs_indexing(): - memory_size = os.environ["INDEXER_MEMORY_SIZE"] - bucket_name = _get_cloudformation_output_value( - app.HDFS_STACK_NAME, hdfs_stack.INDEX_STORE_BUCKET_NAME_EXPORT_NAME - ) - _clean_s3_bucket(bucket_name, "index/") - bench_result = { - "run": "benchmark_hdfs_indexing", - "ts": time.time(), - "commit": _git_commit(), - "memory_size": memory_size, - "env": { - k: os.environ[k] - for k in os.environ.keys() - if k.startswith("QW_LAMBDA_") - and k != "QW_LAMBDA_OPENTELEMETRY_AUTHORIZATION" - }, - } - try: - indexer_result = invoke_hdfs_indexer() - bench_result["lambda_report"] = indexer_result.extract_report() - except Exception as e: - bench_result["invocation_error"] = repr(e) - print(f"Failed to invoke indexer") - - with open(f"lambda-bench.log", "a+") as f: - f.write(json.dumps(bench_result)) - f.write("\n") - - -def benchmark_hdfs_search(payload: str): - memory_size = os.environ["SEARCHER_MEMORY_SIZE"] - for _ in range(2): - bench_result = { - "run": "benchmark_hdfs_search", - "ts": time.time(), - "commit": _git_commit(), - "memory_size": memory_size, - "payload": json.loads(payload), - "env": { - k: os.environ[k] - for k in os.environ.keys() - if k != "QW_LAMBDA_OPENTELEMETRY_AUTHORIZATION" - }, - } - try: - indexer_result = invoke_hdfs_searcher(payload, download_logs=False) - bench_result["lambda_report"] = indexer_result.extract_report() - except Exception as e: - bench_result["invocation_error"] = repr(e) - print(f"Failed to invoke searcher") - - with open(f"lambda-bench.log", "a+") as f: - f.write(json.dumps(bench_result)) - f.write("\n") - - -def test_mock_data_endpoints(): - apigw_url = _get_cloudformation_output_value( - app.MOCK_DATA_STACK_NAME, mock_data_stack.API_GATEWAY_EXPORT_NAME - ) - - def req(method, path, body=None, expected_status=200): - conn = http.client.HTTPSConnection(urlparse(apigw_url).netloc) - conn.request( - method, - path, - body, - headers={"x-api-key": os.getenv("SEARCHER_API_KEY")}, - ) - response = conn.getresponse() - print(f"{method} {path}") - headers = {k: v for (k, v) in response.getheaders()} - body = _decompress_if_gzip(response.read(), headers) - if response.status != expected_status: - print(f"[{response.status}] => {body}") - exit(1) - else: - print(f"[{response.status}] => {json.dumps(json.loads(body))[0:100]}") - - req("GET", f"/api/v1/{mock_sales_index_id}/search?query=animal") - req( - "POST", - f"/api/v1/{mock_sales_index_id}/search", - '{"query":"quantity:>5", "max_hits": 10}', - ) - req("GET", f"/api/v1/_elastic/{mock_sales_index_id}/_search?q=animal") - req( - "POST", - f"/api/v1/_elastic/{mock_sales_index_id}/_search", - '{"query":{"bool":{"must":[{"range":{"quantity":{"gt":5}}}]}},"size":10}', - ) - req("GET", f"/api/v1/_elastic/{mock_sales_index_id}/_field_caps?fields=quantity") - # expected errors - req( - "GET", - f"/api/v1/_elastic/{mock_sales_index_id}/_search?query=animal", - expected_status=400, - ) - req("GET", f"/api/v1/_elastic/_search?q=animal", expected_status=501) - req("GET", f"/api/v1/indexes/{mock_sales_index_id}") diff --git a/distribution/lambda/cdk/setup.py b/distribution/lambda/cdk/setup.py deleted file mode 100644 index 2f0f3ebd7e5..00000000000 --- a/distribution/lambda/cdk/setup.py +++ /dev/null @@ -1,7 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name="cdk", - version="0.1.0", - packages=find_packages(), -) diff --git a/distribution/lambda/cdk/stacks/__init__.py b/distribution/lambda/cdk/stacks/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/distribution/lambda/cdk/stacks/examples/__init__.py b/distribution/lambda/cdk/stacks/examples/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/distribution/lambda/cdk/stacks/examples/hdfs_stack.py b/distribution/lambda/cdk/stacks/examples/hdfs_stack.py deleted file mode 100644 index 5e1d7350a99..00000000000 --- a/distribution/lambda/cdk/stacks/examples/hdfs_stack.py +++ /dev/null @@ -1,74 +0,0 @@ -import aws_cdk -from aws_cdk import Stack, aws_s3_assets -from constructs import Construct -import yaml - -from ..services import quickwit_service - - -INDEX_STORE_BUCKET_NAME_EXPORT_NAME = "hdfs-index-store-bucket-name" -INDEXER_FUNCTION_NAME_EXPORT_NAME = "hdfs-indexer-function-name" -SEARCHER_FUNCTION_NAME_EXPORT_NAME = "hdfs-searcher-function-name" - - -class HdfsStack(Stack): - def __init__( - self, - scope: Construct, - construct_id: str, - indexer_memory_size: int, - searcher_memory_size: int, - indexer_package_location: str, - searcher_package_location: str, - **kwargs - ) -> None: - super().__init__(scope, construct_id, **kwargs) - - index_config_local_path = "./resources/hdfs-logs.yaml" - - with open(index_config_local_path) as f: - index_config_dict = yaml.safe_load(f) - index_id = index_config_dict["index_id"] - - index_config = aws_s3_assets.Asset( - self, - "mock-data-index-config", - path=index_config_local_path, - ) - lambda_env = { - **quickwit_service.extract_local_env(), - "RUST_LOG": "quickwit=debug", - } - qw_svc = quickwit_service.QuickwitService( - self, - "Quickwit", - index_id=index_id, - index_config_bucket=index_config.s3_bucket_name, - index_config_key=index_config.s3_object_key, - indexer_environment=lambda_env, - searcher_environment=lambda_env, - indexer_memory_size=indexer_memory_size, - searcher_memory_size=searcher_memory_size, - indexer_package_location=indexer_package_location, - searcher_package_location=searcher_package_location, - indexer_timeout=aws_cdk.Duration.minutes(10), - ) - - aws_cdk.CfnOutput( - self, - "index-store-bucket-name", - value=qw_svc.bucket.bucket_name, - export_name=INDEX_STORE_BUCKET_NAME_EXPORT_NAME, - ) - aws_cdk.CfnOutput( - self, - "indexer-function-name", - value=qw_svc.indexer.lambda_function.function_name, - export_name=INDEXER_FUNCTION_NAME_EXPORT_NAME, - ) - aws_cdk.CfnOutput( - self, - "searcher-function-name", - value=qw_svc.searcher.lambda_function.function_name, - export_name=SEARCHER_FUNCTION_NAME_EXPORT_NAME, - ) diff --git a/distribution/lambda/cdk/stacks/examples/mock_data_stack.py b/distribution/lambda/cdk/stacks/examples/mock_data_stack.py deleted file mode 100644 index 8a4a5c9290b..00000000000 --- a/distribution/lambda/cdk/stacks/examples/mock_data_stack.py +++ /dev/null @@ -1,207 +0,0 @@ -import aws_cdk -from aws_cdk import ( - Stack, - aws_apigateway, - aws_lambda, - aws_s3, - aws_s3_assets, - aws_s3_notifications, - aws_events, - aws_events_targets, -) -from constructs import Construct -import yaml - -from ..services import quickwit_service - -SEARCHER_FUNCTION_NAME_EXPORT_NAME = "mock-data-searcher-function-name" -INDEX_STORE_BUCKET_NAME_EXPORT_NAME = "mock-data-index-store-bucket-name" -SOURCE_BUCKET_NAME_EXPORT_NAME = "mock-data-source-bucket-name" -API_GATEWAY_EXPORT_NAME = "mock-data-api-gateway-url" - - -class Source(Construct): - """An synthetic data source that generates mock data and pushes it to the - indexer through a staging S3 bucket""" - - def __init__( - self, - scope: Construct, - construct_id: str, - index_id: str, - qw_svc: quickwit_service.QuickwitService, - data_generation_interval_sec: int, - **kwargs, - ): - super().__init__(scope, construct_id, **kwargs) - mock_data_bucket = aws_s3.Bucket( - self, - "mock-data", - removal_policy=aws_cdk.RemovalPolicy.DESTROY, - lifecycle_rules=[ - aws_s3.LifecycleRule(enabled=True, expiration=aws_cdk.Duration.days(1)) - ], - ) - - generator_lambda = aws_lambda.Function( - self, - id="MockDataGenerator", - code=aws_lambda.Code.from_asset("resources/data-generator/"), - runtime=aws_lambda.Runtime.PYTHON_3_10, - handler="handler.lambda_handler", - environment={ - "BUCKET_NAME": mock_data_bucket.bucket_name, - "PREFIX": index_id, - }, - timeout=aws_cdk.Duration.seconds(30), - memory_size=1024, - ) - mock_data_bucket.grant_read_write(generator_lambda) - rule = aws_events.Rule( - self, - "ScheduledRule", - schedule=aws_events.Schedule.rate( - aws_cdk.Duration.seconds(data_generation_interval_sec) - ), - ) - rule.add_target(aws_events_targets.LambdaFunction(generator_lambda)) - - mock_data_bucket.grant_read(qw_svc.indexer.lambda_function) - mock_data_bucket.add_object_created_notification( - aws_s3_notifications.LambdaDestination(qw_svc.indexer.lambda_function) - ) - aws_cdk.CfnOutput( - self, - "source-bucket-name", - value=mock_data_bucket.bucket_name, - export_name=SOURCE_BUCKET_NAME_EXPORT_NAME, - ) - - -class SearchAPI(Construct): - """An API Gateway example configuration to expose the Searcher Lambda - function as a Quickwit search endpoint.""" - - def __init__( - self, - scope: Construct, - construct_id: str, - index_id: str, - qw_svc: quickwit_service.QuickwitService, - api_key: str, - **kwargs, - ) -> None: - super().__init__(scope, construct_id, **kwargs) - - api = aws_apigateway.RestApi( - self, - "quickwit-search-api", - rest_api_name=f"Quickwit {index_id} search API", - deploy=False, - ) - searcher_integration = aws_apigateway.LambdaIntegration( - qw_svc.searcher.lambda_function - ) - search_resource = api.root.add_resource("v1").add_resource("{proxy+}") - search_resource.add_method("POST", searcher_integration, api_key_required=True) - search_resource.add_method("GET", searcher_integration, api_key_required=True) - # Change the deployment id (api-deployment-x) each time the API changes, - # otherwise changes are not deployed. - api_deployment = aws_apigateway.Deployment(self, "api-deployment-1", api=api) - api_stage = aws_apigateway.Stage( - self, "api", deployment=api_deployment, stage_name="api" - ) - plan = aws_apigateway.UsagePlan( - self, - "default-usage-plan", - api_stages=[aws_apigateway.UsagePlanPerApiStage(api=api, stage=api_stage)], - description="Usage plan for the Quickwit search API", - ) - key = aws_apigateway.ApiKey( - self, - "default-api-key", - value=api_key, - description="Default API key for the Quickwit search API", - ) - plan.add_api_key(key) - api.deployment_stage = api_stage - - aws_cdk.CfnOutput( - self, - "search-api-url", - value=api.url.rstrip("/") + search_resource.path, - export_name=API_GATEWAY_EXPORT_NAME, - ) - - -class MockDataStack(Stack): - def __init__( - self, - scope: Construct, - construct_id: str, - indexer_package_location: str, - searcher_package_location: str, - search_api_key: str | None = None, - data_generation_interval_sec: int = 300, - **kwargs, - ) -> None: - """If `search_api_key` is not set, the search API is not deployed.""" - super().__init__(scope, construct_id, **kwargs) - - index_config_local_path = "resources/mock-sales.yaml" - with open(index_config_local_path) as f: - index_config_dict = yaml.safe_load(f) - index_id = index_config_dict["index_id"] - - index_config = aws_s3_assets.Asset( - self, - "mock-data-index-config", - path=index_config_local_path, - ) - lambda_env = quickwit_service.extract_local_env() - qw_svc = quickwit_service.QuickwitService( - self, - "Quickwit", - index_id=index_id, - index_config_bucket=index_config.s3_bucket_name, - index_config_key=index_config.s3_object_key, - indexer_environment={ - # the actor system is very verbose when the source is shutting - # down (each Lambda invocation) - "RUST_LOG": "info,quickwit_actors=warn", - **lambda_env, - }, - searcher_environment=lambda_env, - indexer_package_location=indexer_package_location, - searcher_package_location=searcher_package_location, - ) - - Source( - self, - "Source", - index_id=index_id, - qw_svc=qw_svc, - data_generation_interval_sec=data_generation_interval_sec, - ) - - if search_api_key is not None: - SearchAPI( - self, - "SearchAPI", - index_id=index_id, - qw_svc=qw_svc, - api_key=search_api_key, - ) - - aws_cdk.CfnOutput( - self, - "index-store-bucket-name", - value=qw_svc.bucket.bucket_name, - export_name=INDEX_STORE_BUCKET_NAME_EXPORT_NAME, - ) - aws_cdk.CfnOutput( - self, - "searcher-function-name", - value=qw_svc.searcher.lambda_function.function_name, - export_name=SEARCHER_FUNCTION_NAME_EXPORT_NAME, - ) diff --git a/distribution/lambda/cdk/stacks/services/__init__.py b/distribution/lambda/cdk/stacks/services/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/distribution/lambda/cdk/stacks/services/indexer_service.py b/distribution/lambda/cdk/stacks/services/indexer_service.py deleted file mode 100644 index 56ce7be5def..00000000000 --- a/distribution/lambda/cdk/stacks/services/indexer_service.py +++ /dev/null @@ -1,48 +0,0 @@ -import aws_cdk -from aws_cdk import aws_lambda, aws_s3, aws_iam -from constructs import Construct - - -class IndexerService(Construct): - def __init__( - self, - scope: Construct, - construct_id: str, - store_bucket: aws_s3.Bucket, - index_id: str, - index_config_bucket: str, - index_config_key: str, - memory_size: int, - timeout: aws_cdk.Duration, - environment: dict[str, str], - asset_path: str, - **kwargs, - ) -> None: - super().__init__(scope, construct_id, **kwargs) - - self.lambda_function = aws_lambda.Function( - self, - id="Lambda", - code=aws_lambda.Code.from_asset(asset_path), - runtime=aws_lambda.Runtime.PROVIDED_AL2, - handler="N/A", - environment={ - "QW_LAMBDA_INDEX_BUCKET": store_bucket.bucket_name, - "QW_LAMBDA_METASTORE_BUCKET": store_bucket.bucket_name, - "QW_LAMBDA_INDEX_ID": index_id, - "QW_LAMBDA_INDEX_CONFIG_URI": f"s3://{index_config_bucket}/{index_config_key}", - **environment, - }, - timeout=timeout, - retry_attempts=0, - reserved_concurrent_executions=1, - memory_size=memory_size, - ephemeral_storage_size=aws_cdk.Size.gibibytes(10), - ) - self.lambda_function.add_to_role_policy( - aws_iam.PolicyStatement( - actions=["s3:GetObject"], - resources=[f"arn:aws:s3:::{index_config_bucket}/{index_config_key}"], - ) - ) - store_bucket.grant_read_write(self.lambda_function) diff --git a/distribution/lambda/cdk/stacks/services/quickwit_service.py b/distribution/lambda/cdk/stacks/services/quickwit_service.py deleted file mode 100644 index 66294ec239b..00000000000 --- a/distribution/lambda/cdk/stacks/services/quickwit_service.py +++ /dev/null @@ -1,72 +0,0 @@ -import os - -import aws_cdk -from aws_cdk import aws_s3 -from constructs import Construct - -from . import indexer_service, searcher_service - -# Using 3008MB as default because higher memory configurations need to be -# enabled for each AWS account through the support. -DEFAULT_LAMBDA_MEMORY_SIZE = 3008 - - -def extract_local_env() -> dict[str, str]: - """Extracts local environment variables QW_LAMBDA_* and QW_DISABLE_TELEMETRY""" - return { - k: os.environ[k] - for k in os.environ.keys() - if (k.startswith("QW_LAMBDA_") or k == "QW_DISABLE_TELEMETRY") - } - - -class QuickwitService(Construct): - def __init__( - self, - scope: Construct, - construct_id: str, - index_config_bucket: str, - index_config_key: str, - index_id: str, - searcher_package_location: str, - indexer_package_location: str, - indexer_memory_size: int = DEFAULT_LAMBDA_MEMORY_SIZE, - indexer_environment: dict[str, str] = {}, - # small default timeout to avoid unexpected costs and hanging indexers - indexer_timeout: aws_cdk.Duration = aws_cdk.Duration.minutes(1), - searcher_memory_size: int = DEFAULT_LAMBDA_MEMORY_SIZE, - searcher_environment: dict[str, str] = {}, - **kwargs, - ) -> None: - """Create a new Quickwit Lambda service construct node. - - `{indexer|searcher}_package_location` is the path of the `zip` asset for - the Lambda function. - """ - super().__init__(scope, construct_id, **kwargs) - self.bucket = aws_s3.Bucket( - self, - "IndexStore", - removal_policy=aws_cdk.RemovalPolicy.DESTROY, - ) - self.indexer = indexer_service.IndexerService( - self, - "Indexer", - store_bucket=self.bucket, - index_id=index_id, - index_config_bucket=index_config_bucket, - index_config_key=index_config_key, - memory_size=indexer_memory_size, - timeout=indexer_timeout, - environment=indexer_environment, - asset_path=indexer_package_location, - ) - self.searcher = searcher_service.SearcherService( - self, - "Searcher", - store_bucket=self.bucket, - index_id=index_id, - memory_size=searcher_memory_size, - environment=searcher_environment, - asset_path=searcher_package_location, - ) diff --git a/distribution/lambda/cdk/stacks/services/searcher_service.py b/distribution/lambda/cdk/stacks/services/searcher_service.py deleted file mode 100644 index 5950a2de73e..00000000000 --- a/distribution/lambda/cdk/stacks/services/searcher_service.py +++ /dev/null @@ -1,37 +0,0 @@ -import aws_cdk -from aws_cdk import aws_lambda, aws_s3, PhysicalName -from constructs import Construct - - -class SearcherService(Construct): - def __init__( - self, - scope: Construct, - construct_id: str, - store_bucket: aws_s3.Bucket, - index_id: str, - memory_size: int, - environment: dict[str, str], - asset_path: str, - **kwargs - ) -> None: - super().__init__(scope, construct_id, **kwargs) - - self.lambda_function = aws_lambda.Function( - self, - id="Lambda", - code=aws_lambda.Code.from_asset(asset_path), - runtime=aws_lambda.Runtime.PROVIDED_AL2, - handler="N/A", - environment={ - "QW_LAMBDA_INDEX_BUCKET": store_bucket.bucket_name, - "QW_LAMBDA_METASTORE_BUCKET": store_bucket.bucket_name, - "QW_LAMBDA_INDEX_ID": index_id, - **environment, - }, - timeout=aws_cdk.Duration.seconds(30), - memory_size=memory_size, - ephemeral_storage_size=aws_cdk.Size.gibibytes(10), - ) - - store_bucket.grant_read_write(self.lambda_function) diff --git a/distribution/lambda/resources/data-generator/handler.py b/distribution/lambda/resources/data-generator/handler.py deleted file mode 100644 index 7382246ddb6..00000000000 --- a/distribution/lambda/resources/data-generator/handler.py +++ /dev/null @@ -1,37 +0,0 @@ -import json -import os -import random -import time -import gzip -import random - -import boto3 - -s3 = boto3.client("s3") - -with gzip.open("wordlist.json.gz", "r") as f: - words: list[str] = json.loads(f.read()) - - -def lambda_handler(event, context): - data = [] - base_time = time.time() - for i in range(100000): - item = { - "ts": int(base_time * 1000) + i, - "id": i, - "name": f"Item {i}", - "price": round(random.uniform(1, 100), 2), - "quantity": random.randint(1, 10), - "description": " ".join(random.choices(words, k=8)), - } - data.append(item) - - json_str = "\n".join(json.dumps(d) for d in data) - json_gz = gzip.compress(json_str.encode(), compresslevel=6) - - s3.put_object( - Bucket=os.environ["BUCKET_NAME"], - Key=f"{os.environ['PREFIX']}/{int(base_time)}.gz", - Body=json_gz, - ) diff --git a/distribution/lambda/resources/data-generator/wordlist.json.gz b/distribution/lambda/resources/data-generator/wordlist.json.gz deleted file mode 100644 index d24eadcd6a45b1542bd9b0c9833cfb4cd672e6f0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5822 zcmV;v7D4GBiwFqMKf`4L0C#V4WNc}3bS`RhZ*Bl&+DVecG!TX1J*Oa8a|sp@K?FON ztc3K+PD)OhRN93(J;9JK1+41(HP|(D*VO&f7hit)Rr%`czb<~3F8)MxgvRCMji8Ov z!V_Atgwvy69y7J#bXpoTYgkKP%fvOY*vApIBV=RFuoE&geS;oG8kUV$>HoMjVp-7+ zF{I~|p_v{W=gMWxU9@5eFIy(mFI&=FW~S71j=x?-j2_m)R)lugLLIz5TalI1BS2_< z(wrXw14yY?Uv9w;{%pt*Fyib*V`-On@+LbmJ`0OhL>^;$cqoz^d9|RSv?pSYkniK# zcGlS7J1qKBo_O?+pCukl*2T%8+rsILOX>VU)+*q*w!j7>J4kzia+Ds{@hn{K79Gni z(D_OKCh#XCuf9IS&!c7R<3kSx?Yrp4`jjWV!(!c8%!8wk?TI*1Y<1BJb}v_3VYh_%jzE5E(B$g2l0f)r7dhlH!v`rmK*f=1f7uDk@1o$d>m(7?nF7z zBBUYs4s_rQ&rxN)Bc=}(9?Rp>OMTNgPS4P`IF7`;!L(9+4X$N#@zn%cS?VFbj@WZU zFVI#t#*ntKx0<-xQbrS;R(cWkbZ9>P2ft4{4)#1|xn( z8q`)q8HOI7prj@>ImtIX?0X8N!=8+ea10_Y%bPkVr7YFR>168iCtS-l!59bK2pfZ^ zdksT)BSeFHxv^M=u|!)~lzY42wfF|3Q$x4(hxCv(3V9#H>P z<4b*NN5mHBJ(Mdg<$KTxPYHpJ4-#nPoIz2Jj`}7?BR`=l4mFB3aym)1CnP=8 zxkV8g?~pYZZ9M1jo$`+KIKr2}`Xxxd?7W$s?Uid20 zGksr;2>E%XJ&gJ$+Xi~ueKnM&TX2-Nu$Jje-(if82Co^{?y7HiSb7U;-O1JwHl?PM zPiPW5B^p11EWR`3$WfF7Psj;}rH6)NnlrEp4SbEOVVYf+%Ow$@(T?$q9OTQ0g1Xz>Dl{;XX#@p?D&O zRM2D6Cex2HBQN7aC00AqS1%Uj@;j9Agl_5bdu-`N4sybjl{^=_K`f0b$WlZ&WXqM)ft~4q*>lasy-NleXaT zb4nD9)N7bCj-o9CR`IWr(zC@K$Q(+aMdg1vcRAOgX9>d^GS|&;tijL1IWR1{$L}x8 zmyBaY$xmY8SDx?)VMA?vJGSVsHz+zWI5D`cIN#?LHl|4&NkIdV;`u^w!*;`ws2 zj;yN`)z|QK^4ANthK)5boi%HG+G3AuW=eyxyohE%Jwx$Z(I z)kEhAqo0tyDNVs>L-jS*(@f5|mkk-Y!jbe2mEkCU2{{IsM7FLaXEg!jYMUGCyolMi+d42wPq zelGa443vy(GCE~YXH1Y@L>V`NbLW-dH^X$XXH8g?|BY6pKf$AHpwEz2kQ*#)px!BH z*b%z6-E-eV^nAbGP#X10LFrBz<$LgF57qLJ5MO+QA#L%Lp-Zjnda(>cnJhHwOhIW2 zy-+-@ZFSLgDv!Py=$kVo_)orWEvoVwA+$>1_6$Iuq>*U^hT z(FnX=Fy3u!&y;%g|0WN!1r(IR!e_uxJsmfdIgI6@SvCV${Kk3BqBU3WTjOjSH`rCq zE}G3ZY;1LCHeU;}p3G~`e}xQXqf9WiYdT}64p|Oh*ow6!=ZWg*Arsxg(?jt~f>QU;1`8(oLuv}s8NLT6 zC{MwZ@q}W&Z3tbpIM$G%OY$+q>xJs8JkQ9F zYAlN#QBrdH+>3JSL~8jv7LHRE_wlR-SRNA^A7>etWv*{4p)G)e+c#0HhOxTLJ>r+a z9plHUeG(26BoxbKFhM7InlE!E5P zb(K8}e;#ekR@NGdtoREhdwo3lI)~p%b@C8J#@QJaz4HdLJ@fR{GBk6})pCQm7u~YN z)7PY7RkCFI!u2YhsGin|HRN8>B=$ugP-pidnlT!dur=bDoOy$3m~)MFtwY=5m}1^o z(rGIThc(e+PY&UTZO=b=m74O@uqUD+Bt3N*80w@^z6axJ{JU>{`0?8qJU~g8VoNK?p@EfnrWF={%w91&<#55>P|gy$}2^iev(TFDz0 zjIDy+NUI2MyOWn8+&guIv{9fo7%g)T8>oYt9?XRt`t2X|vQY=fxs#r9DB~U$My?*k z$V41$LTA9r2D(ARGvJ_hSU66ExG53ink?^F zeH4xKe;_Z)dgzW_y)XZDWeE-K+u@=OfNe@Z`**U6zV=q682gO4D zMfK2efb-f?Z&=+0uN>(}&%L~t{Fu1t#sTU)q3I`RKM$i@3YzW@eNZ>1M@&aDcRB&e z(84!DaeTyOX{l0kJpL~=vZs8fPs&nGrf%r!{$!Ey+!oF)y$|vX8A4OM_`gd=Y?g-5 zwx>SjiRe9~4VJc|6_G|wAI80Lg-34`C~4(k^zorNdLUKDA-ZKBr6DU{diJ51zWQMd z3l6S65ilciqw)YB0ykp8brdCk?g;A!qdjC+_Vhz~X#QNZ zMfl(N{_hK^_y-Lo)q_2zU;kY8m^#YF_x((l8|wfge(o4DH1a@`;>gs=IEV53ux!tq zQ-1DX+qfe%F6&P9t6_;Lcc!6CRs$M6hSAq_d!km(n>f>0ZEz~`H=Yu*gs^^{L9-k@iydxO)FpE&o>DnZX9iW=jg|tCF zYA=colka!Rep@X0P#0l&7mpB}=9e;A4>_mgFxrK@i?EuSYFYXbUc3sVCiRfl6H(v2 zw#a!~gj0D>z2>#*Jej5D)Rw3nEI8Gq^95Ty_j;*^J#;UU*U=gF&$WBhBSMOe%$YVs zpl>jyYp-i54fM7<*@^_|kQ&vCgpj4CsMBGp+d)rQdbKqlc}kuRF#ZshhvO({K}(LM z_!^hJc>3yx!546JGEH>^kHx-|qla`L{wFOp5ux#cse0`jV>-n@TQt8h%WjXVjxG-) z@9wE~)r1)P{o&x9cd8K0YkcQ$9eY?-VDzkcVNIQ-oAgZAUENb{)CtOLd}*(vhd+>S zqmfKFHG9JM3V5VB}Yrp(|an z#2?&7&7ct6cFqr8z8b_)9=cADT^gM-!S{u*&WiP2Kei&nR-^8FPID)_hi|`LVFngA4aktctJnNi*(gP{f6wT>*&noJ4z$xG-E0wk#AlG z(97b92or0@>UE-~#&Ym#D#uz8e)(~_3>@MQv{}4i9O)U9&>qUi4_)YzC5J)>TjelT z?aS0P%(AcZ`FR?+wNq~ zAYEv+LosWKhl#fu_z$iov)gE%us0OMN7mT!wCYRA= zbe9jwLp3+7b+=^E9ZNjqoT23w@TCjWLkQYK`!4Y*u5yLd;WaBA)*1H%`6ak@qzus- zneSGUvAU3Jllw!CtARFw-x5;P(bfDAOObkp2KIDK#hE&~EA^~byF5n^->G1&buF&4 z4Xc$6nRs zrydMWvbkA)BCPVzH9A;M+`WiKePI4Dz6BIoEMd)f=`NJwyH5 zUFwoovWUqgS-ymQ5LFA-$+_@0s2shlF%jjD%nxK0b#!*k5K9o#ebdq(8nRg}~^wsgSGGx@tm-rx}yF9k8L$e0{MC5s*mq?QX z1zo-~7Oz{@AyG#}CEp?|*3}GiSKJfymaNDIm+zL0@!O~wjFx;cuY~o1s^YzPo!FL@ z&QurMA)`Fu^957YUg+-Bm8!`|PV=Xr>XtGh9&wn<`lGStdh4Kr>YD5I@W{gQw9fwf zEM&}lLxz1K>ej>4SH}xZ{-8oJ`mC=OvmQ#fr5=4Z*{eamtE;u@U zZ@btF`6Zsz7CLw6ehDvFxXztXKy5_rR0E}ZmwzBt6_1?)gN}x zSZEPjs6jp`Bc?-k;a(r;@q9aKYhstoN$yumA1pMD0~c8f#BF!^3SG;Z;{vJs=uEAV z2-XI29Kqo{DD-rI)W!xc(?#W)S{VK)4(jb8bh~byQ?RCV&2jscvHOdpU zk8i~2h~Rk|X5~g$GxEE`!0f_C#h5{|@eMzsQ+Jbk5iiLk-$`!qN*Q7anuv6Wxu&K- z-or&~P3#)2t%+T->hvm*4ZYN&ie*M^;^?BeC>c_bP;=a{V7!w15B3ttN%k7Mz&_`Pinth=Cl z#3R1n;-!cE{qMO`#eZ)mnK=Vxi8&R!bWo+B)9*Ko-coSG^fkPW&aO_KCydtgb-}{z z2@mrnCKwiaryOxvol&+N*6w*BdqmCLKc!wFe%BXfUnC2D55i78*t$pxPld>I8KW(?R|r)E`0;@-7bcjwtNDf=VYEDhvCBwq^SaJp*s^()Mx&m*p zhqmA$+H`qR!+LPs+pd}**I`2{i+we^Y9tS@tB)^vpD(c&n1cIO^8u#|q=y2r?Y7Qp#7^<@L!2)9PI7PV7pN}jIrFVr`QcySduz`Roi9Uc=|}b4Ko;0S{-4b3>8TfX9bNPg zf{*xt@w68cYt3chhB;}kT9J3$ooWq_e5Y4K zlxP+HDB{oT2enc5k|xB5niE{KW*?fU_)@K<4UECJ?J8%O-=Jspg~NWts2QY7Uo`U3 zRecxL>Ea?7eSm6*)TmxyvQs9UA>ZXr&0TVko_7&Oen9;}JU_r0q$7kbH{a!JblD`| z5%B?uA|0WUo(S~9sN25K>6XU{MWd#NV4cCyJuSiq#zJxtP3$48OB)uh6TU~(yf0iA zs}15M%?lRc4GLe`=^?D!9u49tO_2OouK7Tr3qoBS--R%R@8la)D^jDnP(tG?L)s*V z(HAI_W*MgpX{UTq?cs5|qU*r<9cSufFEDzP5W3t#F=|CRq*laJvPYZ0ZG7n|IIN_H=|51>xP@^GeG|Fcf5OJkKmGdCU#or{ IB*SR{0N>bsU;qFB diff --git a/distribution/lambda/resources/hdfs-logs.yaml b/distribution/lambda/resources/hdfs-logs.yaml deleted file mode 100644 index 1fe923d2233..00000000000 --- a/distribution/lambda/resources/hdfs-logs.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# -# Index config file for hdfs-logs dataset. -# - -version: 0.8 - -index_id: hdfs-logs - -doc_mapping: - field_mappings: - - name: timestamp - type: datetime - input_formats: - - unix_timestamp - output_format: unix_timestamp_secs - fast_precision: seconds - fast: true - - name: tenant_id - type: u64 - - name: severity_text - type: text - tokenizer: raw - fast: true - - name: body - type: text - tokenizer: default - record: position - - name: resource - type: json - tokenizer: raw - tag_fields: [tenant_id] - timestamp_field: timestamp - -search_settings: - default_search_fields: [severity_text, body] - -indexing_settings: - split_num_docs_target: 2000000 diff --git a/distribution/lambda/resources/mock-sales.yaml b/distribution/lambda/resources/mock-sales.yaml deleted file mode 100644 index c8a367ad540..00000000000 --- a/distribution/lambda/resources/mock-sales.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# -# Index config file for mock-sales data generator. -# - -version: 0.8 - -index_id: mock-sales - -doc_mapping: - field_mappings: - - name: ts - type: datetime - input_formats: - - unix_timestamp - output_format: unix_timestamp_millis - precision: milliseconds - fast: true - - name: id - type: u64 - - name: name - type: text - tokenizer: raw - - name: price - type: f64 - fast: true - - name: quantity - type: u64 - fast: true - - name: description - type: text - tokenizer: default - timestamp_field: ts - -search_settings: - default_search_fields: [name, description] - -indexing_settings: - split_num_docs_target: 2000000 diff --git a/docs/get-started/tutorials/tutorial-aws-lambda-simple.md b/docs/get-started/tutorials/tutorial-aws-lambda-simple.md deleted file mode 100644 index 58e59a7e4c4..00000000000 --- a/docs/get-started/tutorials/tutorial-aws-lambda-simple.md +++ /dev/null @@ -1,143 +0,0 @@ ---- -title: Search with AWS Lambda -description: Index and search using AWS Lambda on 20 million log entries -tags: [aws, integration] -icon_url: /img/tutorials/aws-logo.png -sidebar_position: 4 ---- - -In this tutorial, we will index and search about 20 million log entries (7 GB decompressed) located on AWS S3 with Quickwit Lambda. - -Concretely, we will deploy an AWS CloudFormation stack with the Quickwit Lambdas, and two buckets: one staging for hosting gzipped newline-delimited JSON files to be indexed and one for hosting the index data. The staging bucket is optional as Quickwit indexer can read data from any S3 files it has access to. - -![Tutorial stack overview](../../assets/images/quickwit-lambda-service.svg) - -## Install - -### Install AWS CDK - -We will use [AWS CDK](https://aws.amazon.com/cdk/) for our infrastructure automation script. Install it using [npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm): -```bash -npm install -g aws-cdk - -You also need AWS credentials to be properly configured in your shell. One way is using the [credentials file](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html). - -Finally, clone the Quickwit repository: -```bash -git clone https://github.com/quickwit-oss/tutorials.git -cd tutorials/simple-lambda-stack -``` - -### Setup python environment - -We use python 3.10 to define the AWS CloudFormation stack we need to deploy, and a python CLI to invoke Lambdas. -Let's install those few packages (boto3, aws-cdk-lib, click, pyyaml). - -```bash -# Install pipenv if needed. -pip install --user pipenv -pipenv shell -pipenv install -``` - -### Download Quickwit Lambdas - -```bash -mkdir -p cdk.out -wget -P cdk.out https://github.com/quickwit-oss/quickwit/releases/download/aws-lambda-beta-01/quickwit-lambda-indexer-beta-01-x86_64.zip -wget -P cdk.out https://github.com/quickwit-oss/quickwit/releases/download/aws-lambda-beta-01/quickwit-lambda-searcher-beta-01-x86_64.zip -``` - -### Bootstrap and deploy - -Configure the AWS region and [account id](https://docs.aws.amazon.com/IAM/latest/UserGuide/console_account-alias.html) where you want to deploy the stack: - -```bash -export CDK_ACCOUNT=123456789 -# us-east-1 is where the Quickwit public dataset bucket is located -export CDK_REGION=us-east-1 -``` - -If this region/account pair was not bootstrapped by CDK yet, run: -```bash -cdk bootstrap aws://$CDK_ACCOUNT/$CDK_REGION -``` - -This initializes some basic resources to host artifacts such as Lambda packages. We can now deploy the stack: -```bash -cdk deploy -a cdk/app.py -``` - -## Index the HDFS logs dataset - -Here is an example of a log entry of the dataset: -```json -{ - "timestamp": 1460530013, - "severity_text": "INFO", - "body": "PacketResponder: BP-108841162-10.10.34.11-1440074360971:blk_1074072698_331874, type=HAS_DOWNSTREAM_IN_PIPELINE terminating", - "resource": { - "service": "datanode/01" - }, - "attributes": { - "class": "org.apache.hadoop.hdfs.server.datanode.DataNode" - }, - "tenant_id": 58 -} -``` - -If you have 5 minutes ahead of you, you can index the whole dataset which is available on our public S3 bucket. - -```bash -python cli.py index s3://quickwit-datasets-public/hdfs-logs-multitenants.json.gz -``` - -:::note - -Once the indexing Lambda is started, you cannot stop it manually. A keyboard interrupt will stop the client but not the function. If you try running it again, you might get a `TooManyRequestsException` error. That's because the concurrency of the indexer is set to 1 to avoid race conditions on the metastore. - -::: - -If you don't want to wait, just index the 10,000 documents dataset: - -```bash -python cli.py index s3://quickwit-datasets-public/hdfs-logs-multitenants-10000.json -``` - -## Execute search queries - -Let's start with a query on the field `severity_text` and look for errors: `severity_text:ERROR`: - -```bash -python cli.py search '{"query":"severity_text:ERROR"}' -``` - -It should respond in under 1 second and return 10 hits out of 345 if you indexed the whole dataset. If you index the first 10,000 documents, you won't have any hits, try to query `INFO` logs instead. - - -Let's now run a more advanced query: a date histogram with a term aggregation on the `severity_text`` field: - -```bash -python cli.py search '{ "query": "*", "max_hits": 0, "aggs": { "events": { "date_histogram": { "field": "timestamp", "fixed_interval": "30d" }, "aggs": { "log_level": { "terms": { "size": 10, "field": "severity_text", "order": { "_count": "desc" } } } } } } }' -``` - -It should respond under 2 seconds and return the top log levels per 30 days. - - -### Cleaning up - -First, you have to delete the files created on your S3 buckets. -Once done, you can delete the stack. - -```bash -cdk destroy -a cdk/app.py -rm -rf cdk.out -``` - -Congratz! You finished this tutorial! You can level up with the following tutorials to discover all Quickwit features. - -## Next steps - -- [Search log service on AWS](/blog/log-search-service-for-under-7-dollars) which covers an end-to-end use case. -- [Search REST API](../../reference/rest-api) -- [Query language](../../reference/query-language) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 58b1862e243..b732a7ae330 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -124,12 +124,6 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -166,9 +160,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.19" +version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" +checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" dependencies = [ "anstyle", "anstyle-parse", @@ -196,29 +190,29 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.9" +version = "3.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] name = "anyhow" -version = "1.0.98" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" [[package]] name = "arc-swap" @@ -282,17 +276,15 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.27" +version = "0.4.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddb939d66e4ae03cee6091612804ba446b12878410cfa17f785f4dd67d4014e8" +checksum = "977eb15ea9efd848bb8a4a1a2500347ed7f0bf794edf0dc3ddcf439f43d36b23" dependencies = [ - "flate2", + "compression-codecs", + "compression-core", "futures-core", - "memchr", "pin-project-lite", "tokio", - "zstd 0.13.3", - "zstd-safe 7.2.4", ] [[package]] @@ -308,18 +300,18 @@ dependencies = [ "futures-lite 2.6.1", "parking", "polling", - "rustix 1.0.7", + "rustix 1.1.1", "slab", "windows-sys 0.60.2", ] [[package]] name = "async-lock" -version = "3.4.0" +version = "3.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" +checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc" dependencies = [ - "event-listener 5.4.0", + "event-listener 5.4.1", "event-listener-strategy", "pin-project-lite", ] @@ -337,9 +329,9 @@ dependencies = [ "async-task", "blocking", "cfg-if", - "event-listener 5.4.0", + "event-listener 5.4.1", "futures-lite 2.6.1", - "rustix 1.0.7", + "rustix 1.1.1", ] [[package]] @@ -354,7 +346,7 @@ dependencies = [ "cfg-if", "futures-core", "futures-io", - "rustix 1.0.7", + "rustix 1.1.1", "signal-hook-registry", "slab", "windows-sys 0.60.2", @@ -391,7 +383,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -402,13 +394,13 @@ checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" [[package]] name = "async-trait" -version = "0.1.88" +version = "0.1.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -434,9 +426,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.1" +version = "1.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c18d005c70d2b9c0c1ea8876c039db0ec7fb71164d25c73ccea21bf41fd02171" +checksum = "8bc1b40fb26027769f16960d2f4a6bc20c4bb755d403e552c8c1a73af433c246" dependencies = [ "aws-credential-types", "aws-runtime", @@ -444,8 +436,8 @@ dependencies = [ "aws-sdk-ssooidc", "aws-sdk-sts", "aws-smithy-async", - "aws-smithy-http 0.62.1", - "aws-smithy-json 0.61.4", + "aws-smithy-http 0.62.3", + "aws-smithy-json 0.61.5", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -464,9 +456,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.3" +version = "1.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "687bc16bc431a8533fe0097c7f0182874767f920989d7260950172ae8e3c4465" +checksum = "d025db5d9f52cbc413b167136afb3d8aeea708c0d8884783cf6253be5e22f6f2" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -476,9 +468,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.13.2" +version = "1.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08b5d4e069cbc868041a64bd68dc8cb39a0d79585cd6c5a24caa8c2d622121be" +checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba" dependencies = [ "aws-lc-sys", "zeroize", @@ -499,15 +491,15 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.8" +version = "1.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f6c68419d8ba16d9a7463671593c54f81ba58cab466e9b759418da606dcc2e2" +checksum = "c034a1bc1d70e16e7f4e4caf7e9f7693e4c9c24cd91cf17c2a0b21abaebc7c8b" dependencies = [ "aws-credential-types", "aws-sigv4", "aws-smithy-async", "aws-smithy-eventstream", - "aws-smithy-http 0.62.1", + "aws-smithy-http 0.62.3", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -524,16 +516,16 @@ dependencies = [ [[package]] name = "aws-sdk-kinesis" -version = "1.78.0" +version = "1.88.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40c5352ced77b5b0d7415614c4affdf7d0d3a39f3de389f1e65c3e3c840d93a" +checksum = "6d59431117b456eabf7e27ef9cc0137af9cf880937ce459e3ce9dfdf983328b5" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-eventstream", - "aws-smithy-http 0.62.1", - "aws-smithy-json 0.61.4", + "aws-smithy-http 0.62.3", + "aws-smithy-json 0.61.5", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -581,15 +573,15 @@ dependencies = [ [[package]] name = "aws-sdk-sqs" -version = "1.74.0" +version = "1.83.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "256b8f7caffe3240a543f60409be9bba23038ceef5933da63e09d89197fc2333" +checksum = "9b3e9f9a923e6c20272c40de609faa8624b687d203bc77df99ccb86a5de5944b" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http 0.62.1", - "aws-smithy-json 0.61.4", + "aws-smithy-http 0.62.3", + "aws-smithy-json 0.61.5", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -603,15 +595,15 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.74.0" +version = "1.83.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a69de9c1b9272da2872af60c7402683e7f45c06267735b4332deacb203239b" +checksum = "643cd43af212d2a1c4dedff6f044d7e1961e5d9e7cfe773d70f31d9842413886" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http 0.62.1", - "aws-smithy-json 0.61.4", + "aws-smithy-http 0.62.3", + "aws-smithy-json 0.61.5", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -625,15 +617,15 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.75.0" +version = "1.84.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0b161d836fac72bdd5ac1a4cd1cdc38ab888c7af26cfd95f661be4409505e63" +checksum = "20ec4a95bd48e0db7a424356a161f8d87bd6a4f0af37204775f0da03d9e39fc3" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http 0.62.1", - "aws-smithy-json 0.61.4", + "aws-smithy-http 0.62.3", + "aws-smithy-json 0.61.5", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -647,15 +639,15 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.76.0" +version = "1.85.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb1cd79a3412751a341a28e2cd0d6fa4345241976da427b075a0c0cd5409f886" +checksum = "410309ad0df4606bc721aff0d89c3407682845453247213a0ccc5ff8801ee107" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http 0.62.1", - "aws-smithy-json 0.61.4", + "aws-smithy-http 0.62.3", + "aws-smithy-json 0.61.5", "aws-smithy-query", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -670,13 +662,13 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.3" +version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddfb9021f581b71870a17eac25b52335b82211cdc092e02b6876b2bcefa61666" +checksum = "084c34162187d39e3740cb635acd73c4e3a551a36146ad6fe8883c929c9f876c" dependencies = [ "aws-credential-types", "aws-smithy-eventstream", - "aws-smithy-http 0.62.1", + "aws-smithy-http 0.62.3", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", @@ -730,9 +722,9 @@ dependencies = [ [[package]] name = "aws-smithy-eventstream" -version = "0.60.9" +version = "0.60.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "338a3642c399c0a5d157648426110e199ca7fd1c689cc395676b81aa563700c4" +checksum = "182b03393e8c677347fb5705a04a9392695d47d20ef0a2f8cfe28c8e6b9b9778" dependencies = [ "aws-smithy-types", "bytes", @@ -762,9 +754,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.62.1" +version = "0.62.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99335bec6cdc50a346fda1437f9fefe33abf8c99060739a546a16457f2862ca9" +checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9" dependencies = [ "aws-smithy-eventstream", "aws-smithy-runtime-api", @@ -783,9 +775,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.0.6" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f108f1ca850f3feef3009bdcc977be201bca9a91058864d9de0684e64514bee0" +checksum = "147e8eea63a40315d704b97bf9bc9b8c1402ae94f89d5ad6f7550d963309da1b" dependencies = [ "aws-smithy-async", "aws-smithy-protocol-test", @@ -793,25 +785,26 @@ dependencies = [ "aws-smithy-types", "bytes", "h2 0.3.27", - "h2 0.4.11", + "h2 0.4.12", "http 0.2.12", "http 1.3.1", "http-body 0.4.6", "http-body 1.0.1", "hyper 0.14.32", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-rustls 0.24.2", "hyper-rustls 0.27.7", "hyper-util", - "indexmap 2.10.0", + "indexmap 2.11.1", "pin-project-lite", "rustls 0.21.12", - "rustls 0.23.29", + "rustls 0.23.31", "rustls-native-certs 0.8.1", "rustls-pki-types", "serde", "serde_json", "tokio", + "tokio-rustls 0.26.2", "tower 0.5.2", "tracing", ] @@ -827,9 +820,9 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.61.4" +version = "0.61.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a16e040799d29c17412943bdbf488fd75db04112d0c0d4b9290bacf5ae0014b9" +checksum = "eaa31b350998e703e9826b2104dd6f63be0508666e1aba88137af060e8944047" dependencies = [ "aws-smithy-types", ] @@ -859,7 +852,7 @@ dependencies = [ "regex-lite", "roxmltree 0.14.1", "serde_json", - "thiserror 2.0.12", + "thiserror 2.0.16", ] [[package]] @@ -874,12 +867,12 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.8.4" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3aaec682eb189e43c8a19c3dab2fe54590ad5f2cc2d26ab27608a20f2acf81c" +checksum = "d3946acbe1ead1301ba6862e712c7903ca9bb230bdf1fbd1b5ac54158ef2ab1f" dependencies = [ "aws-smithy-async", - "aws-smithy-http 0.62.1", + "aws-smithy-http 0.62.3", "aws-smithy-http-client", "aws-smithy-observability", "aws-smithy-runtime-api", @@ -899,9 +892,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.8.3" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9852b9226cb60b78ce9369022c0df678af1cac231c882d5da97a0c4e03be6e67" +checksum = "07f5e0fc8a6b3f2303f331b94504bbf754d85488f402d6f1dd7a6080f99afe56" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -951,9 +944,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.7" +version = "1.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a322fec39e4df22777ed3ad8ea868ac2f94cd15e1a55f6ee8d8d6305057689a" +checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -963,26 +956,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "aws_lambda_events" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "144ec7565561115498a288850cc6a42b279e09b6c4b88f623eecb9c8ca96c08c" -dependencies = [ - "base64 0.22.1", - "bytes", - "chrono", - "flate2", - "http 1.3.1", - "http-body 1.0.1", - "http-serde 2.1.1", - "query_map", - "serde", - "serde_dynamo", - "serde_json", - "serde_with", -] - [[package]] name = "axum" version = "0.6.20" @@ -1139,7 +1112,7 @@ dependencies = [ "pin-project", "quick-xml 0.31.0", "rand 0.8.5", - "reqwest 0.12.22", + "reqwest 0.12.23", "rustc_version", "serde", "serde_json", @@ -1229,9 +1202,9 @@ dependencies = [ [[package]] name = "backon" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302eaff5357a264a2c42f127ecb8bac761cf99749fc3dc95677e2743991f99e7" +checksum = "592277618714fbcecda9a02ba7a8781f319d26532a88553bbacc77ba5d2b3a8d" dependencies = [ "fastrand 2.3.0", "gloo-timers", @@ -1273,9 +1246,9 @@ checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" [[package]] name = "base62" -version = "2.2.1" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10e52a7bcb1d6beebee21fb5053af9e3cbb7a7ed1a4909e534040e676437ab1f" +checksum = "0104d4d8d15e458f21dcd027ea350bf38e4364954909402f4da075aca8d0f136" dependencies = [ "rustversion", ] @@ -1329,7 +1302,7 @@ version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "cexpr", "clang-sys", "itertools 0.12.1", @@ -1342,17 +1315,17 @@ dependencies = [ "regex", "rustc-hash 1.1.0", "shlex", - "syn 2.0.104", + "syn 2.0.106", "which", ] [[package]] name = "bindgen" -version = "0.71.1" +version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "cexpr", "clang-sys", "itertools 0.13.0", @@ -1361,7 +1334,7 @@ dependencies = [ "regex", "rustc-hash 2.1.1", "shlex", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1404,9 +1377,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.1" +version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" dependencies = [ "serde", ] @@ -1465,9 +1438,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.6.4" +version = "3.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61138465baf186c63e8d9b6b613b508cd832cba4ce93cf37ce5f096f91ac1a6" +checksum = "c2529c31017402be841eb45892278a6c21a000c0a17643af326c73a73f83f0fb" dependencies = [ "bon-macros", "rustversion", @@ -1475,17 +1448,17 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.6.4" +version = "3.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40d1dad34aa19bf02295382f08d9bc40651585bd497266831d40ee6296fb49ca" +checksum = "d82020dadcb845a345591863adb65d74fa8dc5c18a0b6d408470e13b7adc7005" dependencies = [ - "darling", + "darling 0.21.3", "ident_case", "prettyplease", "proc-macro2", "quote", "rustversion", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1508,7 +1481,7 @@ dependencies = [ "proc-macro-crate 3.3.0", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1565,9 +1538,9 @@ checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" [[package]] name = "bytemuck" -version = "1.23.1" +version = "1.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422" +checksum = "3995eaeebcdf32f91f980d360f78732ddc061097ab4e39991ae7a6ace9194677" [[package]] name = "byteorder" @@ -1668,10 +1641,11 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.29" +version = "1.2.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362" +checksum = "5252b3d2648e5eedbc1a6f501e3c795e07025c1e93bbf8bbdd6eef7f447a6d54" dependencies = [ + "find-msvc-tools", "jobserver", "libc", "shlex", @@ -1703,9 +1677,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] name = "cfg_aliases" @@ -1757,7 +1731,7 @@ dependencies = [ "bytes", "itertools 0.14.0", "lru 0.13.0", - "rand 0.9.1", + "rand 0.9.2", "serde", "tokio", "tokio-stream", @@ -1767,17 +1741,16 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "serde", "wasm-bindgen", - "windows-link", + "windows-link 0.2.0", ] [[package]] @@ -1858,18 +1831,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.41" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9" +checksum = "7eac00902d9d136acd712710d71823fb8ac8004ca445a89e73a41d45aa712931" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.41" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d" +checksum = "2ad9bbf750e73b5884fb8a211a9424a1906c1e156724260fdae972f31d70e1d6" dependencies = [ "anstream", "anstyle", @@ -1920,7 +1893,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" dependencies = [ - "thiserror 2.0.12", + "thiserror 2.0.16", ] [[package]] @@ -1963,6 +1936,25 @@ dependencies = [ "sha1", ] +[[package]] +name = "compression-codecs" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "485abf41ac0c8047c07c87c72c8fb3eb5197f6e9d7ded615dfd1a00ae00a0f64" +dependencies = [ + "compression-core", + "flate2", + "memchr", + "zstd 0.13.3", + "zstd-safe 7.2.4", +] + +[[package]] +name = "compression-core" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e47641d3deaf41fb1538ac1f54735925e275eaf3bf4d55c81b137fba797e5cbb" + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -2311,9 +2303,9 @@ dependencies = [ [[package]] name = "curve25519-dalek" -version = "4.2.0" +version = "4.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373b7c5dbd637569a2cca66e8d66b8c446a1e7bf064ea321d265d7b3dfe7c97e" +checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" dependencies = [ "cfg-if", "cpufeatures", @@ -2333,7 +2325,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -2342,8 +2334,18 @@ version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + +[[package]] +name = "darling" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +dependencies = [ + "darling_core 0.21.3", + "darling_macro 0.21.3", ] [[package]] @@ -2357,7 +2359,21 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.104", + "syn 2.0.106", +] + +[[package]] +name = "darling_core" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.106", ] [[package]] @@ -2366,9 +2382,20 @@ version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ - "darling_core", + "darling_core 0.20.11", "quote", - "syn 2.0.104", + "syn 2.0.106", +] + +[[package]] +name = "darling_macro" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +dependencies = [ + "darling_core 0.21.3", + "quote", + "syn 2.0.106", ] [[package]] @@ -2379,9 +2406,9 @@ checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" [[package]] name = "data-url" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c297a1c74b71ae29df00c3e22dd9534821d60eb9af5a0192823fa2acea70c2a" +checksum = "be1e0bca6c3637f992fc1cc7cbc52a78c1ef6db076dbf1059c4323d6a2048376" [[package]] name = "dbl" @@ -2394,12 +2421,12 @@ dependencies = [ [[package]] name = "deadpool" -version = "0.10.0" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb84100978c1c7b37f09ed3ce3e5f843af02c2a2c431bae5b19230dad2c1b490" +checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b" dependencies = [ - "async-trait", "deadpool-runtime", + "lazy_static", "num_cpus", "tokio", ] @@ -2442,9 +2469,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.4.0" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" +checksum = "d630bccd429a5bb5a64b5e94f693bfc48c9f8566418fda4c494cc94f911f87cc" dependencies = [ "powerfmt", "serde", @@ -2460,7 +2487,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -2507,19 +2534,19 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] name = "dns-lookup" -version = "2.0.4" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5766087c2235fec47fafa4cfecc81e494ee679d0fd4a59887ea0919bfb0e4fc" +checksum = "cf5597a4b7fe5275fc9dcf88ce26326bc8e4cb87d0130f33752d4c5f717793cf" dependencies = [ "cfg-if", "libc", - "socket2", - "windows-sys 0.48.0", + "socket2 0.6.0", + "windows-sys 0.60.2", ] [[package]] @@ -2560,9 +2587,9 @@ checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" [[package]] name = "downcast-rs" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea8a8b81cacc08888170eef4d13b775126db426d0b348bee9d18c2c1eaf123cf" +checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" [[package]] name = "dtoa" @@ -2590,9 +2617,9 @@ checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" [[package]] name = "dyn-clone" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] name = "ecdsa" @@ -2826,13 +2853,13 @@ dependencies = [ [[package]] name = "enum-iterator-derive" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1ab991c1362ac86c61ab6f556cff143daa22e5a15e4e189df818b2fd19fe65b" +checksum = "685adfa4d6f3d765a26bc5dbc936577de9abf756c1feeb3089b01dd395034842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -2856,12 +2883,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.0", ] [[package]] @@ -2883,9 +2910,9 @@ checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" [[package]] name = "event-listener" -version = "5.4.0" +version = "5.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3492acde4c3fc54c845eaab3eed8bd00c7a7d881f78bfc801e43a93dec1331ae" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" dependencies = [ "concurrent-queue", "parking", @@ -2898,7 +2925,7 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" dependencies = [ - "event-listener 5.4.0", + "event-listener 5.4.1", "pin-project-lite", ] @@ -2920,8 +2947,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" dependencies = [ "bit-set", - "regex-automata 0.4.9", - "regex-syntax 0.8.5", + "regex-automata", + "regex-syntax", ] [[package]] @@ -2967,22 +2994,28 @@ dependencies = [ [[package]] name = "fiat-crypto" -version = "0.3.0" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64cd1e32ddd350061ae6edb1b082d7c54915b5c672c389143b9a63403a109f24" +checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" [[package]] name = "filetime" -version = "0.2.25" +version = "0.2.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" dependencies = [ "cfg-if", "libc", "libredox", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d" + [[package]] name = "findshlibs" version = "0.10.2" @@ -3070,9 +3103,9 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "form_urlencoded" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" dependencies = [ "percent-encoding", ] @@ -3210,7 +3243,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -3251,9 +3284,9 @@ dependencies = [ [[package]] name = "generator" -version = "0.8.5" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d18470a76cb7f8ff746cf1f7470914f900252ec36bbc40b569d74b1258446827" +checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" dependencies = [ "cc", "cfg-if", @@ -3308,7 +3341,7 @@ dependencies = [ "js-sys", "libc", "r-efi", - "wasi 0.14.2+wasi-0.2.4", + "wasi 0.14.4+wasi-0.2.4", "wasm-bindgen", ] @@ -3320,9 +3353,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "gloo-timers" @@ -3426,9 +3459,9 @@ dependencies = [ [[package]] name = "grok" -version = "2.3.1" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71644bbce1040587eead1dddb22ff2ffc45bbc5e9d13e32b75a28ad539fb24ca" +checksum = "2e2d7bd791814b06a609b74361ac35b448eb4718548937c6de718554a4348577" dependencies = [ "glob", "onig", @@ -3468,7 +3501,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.10.0", + "indexmap 2.11.1", "slab", "tokio", "tokio-util", @@ -3477,9 +3510,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17da50a276f1e01e0ba6c029e47b7100754904ee8a278f886546e98575380785" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" dependencies = [ "atomic-waker", "bytes", @@ -3487,7 +3520,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.3.1", - "indexmap 2.10.0", + "indexmap 2.11.1", "slab", "tokio", "tokio-util", @@ -3525,9 +3558,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.4" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", @@ -3653,7 +3686,7 @@ checksum = "a56f203cd1c76362b69e3863fd987520ac36cf70a8c92627449b2f64a8cf7d65" dependencies = [ "cfg-if", "libc", - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -3734,16 +3767,6 @@ dependencies = [ "serde", ] -[[package]] -name = "http-serde" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f056c8559e3757392c8d091e796416e4649d8e49e88b8d76df6c002f05027fd" -dependencies = [ - "http 1.3.1", - "serde", -] - [[package]] name = "http-types" version = "2.12.0" @@ -3799,7 +3822,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "socket2 0.5.10", "tokio", "tower-service", "tracing", @@ -3808,20 +3831,22 @@ dependencies = [ [[package]] name = "hyper" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" +checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" dependencies = [ + "atomic-waker", "bytes", "futures-channel", - "futures-util", - "h2 0.4.11", + "futures-core", + "h2 0.4.12", "http 1.3.1", "http-body 1.0.1", "httparse", "httpdate", "itoa", "pin-project-lite", + "pin-utils", "smallvec", "tokio", "want", @@ -3850,16 +3875,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.3.1", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-util", "log", - "rustls 0.23.29", + "rustls 0.23.31", "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", "tokio-rustls 0.26.2", "tower-service", - "webpki-roots 1.0.1", + "webpki-roots 1.0.2", ] [[package]] @@ -3880,7 +3905,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper 1.6.0", + "hyper 1.7.0", "hyper-util", "pin-project-lite", "tokio", @@ -3908,7 +3933,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-util", "native-tls", "tokio", @@ -3918,9 +3943,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f66d5bd4c6f02bf0542fad85d626775bab9258cf795a4256dcaf3161114d1df" +checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" dependencies = [ "base64 0.22.1", "bytes", @@ -3929,12 +3954,12 @@ dependencies = [ "futures-util", "http 1.3.1", "http-body 1.0.1", - "hyper 1.6.0", + "hyper 1.7.0", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.0", "system-configuration 0.6.1", "tokio", "tower-service", @@ -4069,9 +4094,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" dependencies = [ "idna_adapter", "smallvec", @@ -4101,12 +4126,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.10.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +checksum = "206a8042aec68fa4a62e8d3f7aa4ceb508177d9324faf261e1959e495b7a1921" dependencies = [ "equivalent", - "hashbrown 0.15.4", + "hashbrown 0.15.5", "serde", ] @@ -4142,7 +4167,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" dependencies = [ "ahash 0.8.12", - "indexmap 2.10.0", + "indexmap 2.11.1", "is-terminal", "itoa", "log", @@ -4174,7 +4199,7 @@ checksum = "6c38228f24186d9cc68c729accb4d413be9eaed6ad07ff79e0270d9e56f3de13" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4201,11 +4226,11 @@ dependencies = [ [[package]] name = "io-uring" -version = "0.7.8" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013" +checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "cfg-if", "libc", ] @@ -4296,9 +4321,9 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jobserver" -version = "0.1.33" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ "getrandom 0.3.3", "libc", @@ -4306,9 +4331,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "0c0b063578492ceec17683ef2f8c5e89121fbd0b172cbc280635ab7567db2738" dependencies = [ "once_cell", "wasm-bindgen", @@ -4380,7 +4405,7 @@ dependencies = [ "lalrpop-util", "petgraph", "regex", - "regex-syntax 0.8.5", + "regex-syntax", "sha3", "string_cache", "term", @@ -4394,86 +4419,10 @@ version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5baa5e9ff84f1aefd264e6869907646538a52147a755d494517a8007fb48733" dependencies = [ - "regex-automata 0.4.9", + "regex-automata", "rustversion", ] -[[package]] -name = "lambda_http" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b470cea1ec37b96e9543870a33c1f4b9e243754fe5892668efa3125ea12784" -dependencies = [ - "aws_lambda_events", - "base64 0.22.1", - "bytes", - "encoding_rs", - "futures", - "futures-util", - "http 1.3.1", - "http-body 1.0.1", - "http-body-util", - "hyper 1.6.0", - "lambda_runtime", - "mime", - "percent-encoding", - "pin-project-lite", - "serde", - "serde_json", - "serde_urlencoded", - "tokio-stream", - "url", -] - -[[package]] -name = "lambda_runtime" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed49669d6430292aead991e19bf13153135a884f916e68f32997c951af637ebe" -dependencies = [ - "async-stream", - "base64 0.22.1", - "bytes", - "futures", - "http 1.3.1", - "http-body 1.0.1", - "http-body-util", - "http-serde 2.1.1", - "hyper 1.6.0", - "hyper-util", - "lambda_runtime_api_client", - "pin-project", - "serde", - "serde_json", - "serde_path_to_error", - "tokio", - "tokio-stream", - "tower 0.4.13", - "tower-layer", - "tracing", -] - -[[package]] -name = "lambda_runtime_api_client" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c90a10f094475a34a04da2be11686c4dcfe214d93413162db9ffdff3d3af293a" -dependencies = [ - "bytes", - "futures-channel", - "futures-util", - "http 1.3.1", - "http-body 1.0.1", - "http-body-util", - "hyper 1.6.0", - "hyper-util", - "tokio", - "tower 0.4.13", - "tower-service", - "tracing", - "tracing-subscriber", -] - [[package]] name = "lazy_static" version = "1.5.0" @@ -4497,9 +4446,9 @@ checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" [[package]] name = "libc" -version = "0.2.174" +version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" [[package]] name = "libloading" @@ -4508,7 +4457,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", - "windows-targets 0.53.2", + "windows-targets 0.53.3", ] [[package]] @@ -4519,13 +4468,13 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libredox" -version = "0.1.4" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1580801010e535496706ba011c15f8532df6b42297d2e471fec38ceadd8c0638" +checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "libc", - "redox_syscall 0.5.13", + "redox_syscall 0.5.17", ] [[package]] @@ -4796,6 +4745,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + [[package]] name = "litemap" version = "0.8.0" @@ -4814,9 +4769,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.27" +version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "loom" @@ -4837,7 +4792,7 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" dependencies = [ - "hashbrown 0.15.4", + "hashbrown 0.15.5", ] [[package]] @@ -4846,7 +4801,7 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "227748d55f2f0ab4735d87fd623798cb6b664512fe979705f829c9f81c934465" dependencies = [ - "hashbrown 0.15.4", + "hashbrown 0.15.5", ] [[package]] @@ -4888,11 +4843,11 @@ checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" [[package]] name = "matchers" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" dependencies = [ - "regex-automata 0.1.10", + "regex-automata", ] [[package]] @@ -4946,9 +4901,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "memmap2" -version = "0.9.7" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28" +checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" dependencies = [ "libc", ] @@ -4977,7 +4932,7 @@ checksum = "d6c74ab4f1a0c0ab045260ee4727b23c00cc17e5eff5095262d08eef8c3c8d49" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -5054,7 +5009,7 @@ dependencies = [ "crossbeam-channel", "crossbeam-epoch", "crossbeam-utils", - "event-listener 5.4.0", + "event-listener 5.4.1", "futures-util", "loom", "parking_lot 0.12.4", @@ -5195,12 +5150,11 @@ dependencies = [ [[package]] name = "nu-ansi-term" -version = "0.46.0" +version = "0.50.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" dependencies = [ - "overload", - "winapi 0.3.9", + "windows-sys 0.52.0", ] [[package]] @@ -5325,7 +5279,7 @@ dependencies = [ "proc-macro-crate 1.3.1", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -5337,7 +5291,7 @@ dependencies = [ "proc-macro-crate 3.3.0", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -5357,9 +5311,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "numfmt" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db7467e47de9fb6ea5b3f47dc34c1cf0b86359f072a46f6278119544cdbd0021" +checksum = "8ea1a14c0c3b00c5b3f3ab9625c35601c5cac06a94bb6b17c27327a8f1d520c6" dependencies = [ "dtoa", "itoa", @@ -5438,7 +5392,7 @@ version = "6.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "libc", "once_cell", "onig_sys", @@ -5487,7 +5441,7 @@ dependencies = [ "percent-encoding", "quick-xml 0.37.5", "reqsign", - "reqwest 0.12.22", + "reqwest 0.12.23", "serde", "serde_json", "tokio", @@ -5532,7 +5486,7 @@ version = "0.10.73" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "cfg-if", "foreign-types", "libc", @@ -5549,7 +5503,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -5560,9 +5514,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-src" -version = "300.5.1+3.5.1" +version = "300.5.2+3.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "735230c832b28c000e3bc117119e6466a663ec73506bc0a9907ea4187508e42a" +checksum = "d270b79e2926f5150189d475bc7e9d2c69f9c4697b185fa917d5a32b792d21b4" dependencies = [ "cc", ] @@ -5594,19 +5548,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "opentelemetry-http" -version = "0.27.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a8a7f5f6ba7c1b286c2fbca0454eaba116f63bbe69ed250b642d36fbb04d80" -dependencies = [ - "async-trait", - "bytes", - "http 1.3.1", - "opentelemetry", - "reqwest 0.12.22", -] - [[package]] name = "opentelemetry-otlp" version = "0.27.0" @@ -5617,11 +5558,9 @@ dependencies = [ "futures-core", "http 1.3.1", "opentelemetry", - "opentelemetry-http", "opentelemetry-proto", "opentelemetry_sdk", "prost 0.13.5", - "reqwest 0.12.22", "thiserror 1.0.69", "tokio", "tonic 0.12.3", @@ -5710,7 +5649,7 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -5719,12 +5658,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - [[package]] name = "ownedbytes" version = "0.7.0" @@ -5830,7 +5763,7 @@ checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.13", + "redox_syscall 0.5.17", "smallvec", "windows-targets 0.52.6", ] @@ -5922,9 +5855,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "perf-event" @@ -5952,7 +5885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323" dependencies = [ "memchr", - "thiserror 2.0.12", + "thiserror 2.0.16", "ucd-trie", ] @@ -5976,7 +5909,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -5996,7 +5929,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", - "indexmap 2.10.0", + "indexmap 2.11.1", ] [[package]] @@ -6043,7 +5976,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -6252,7 +6185,7 @@ dependencies = [ "concurrent-queue", "hermit-abi", "pin-project-lite", - "rustix 1.0.7", + "rustix 1.1.1", "windows-sys 0.60.2", ] @@ -6275,9 +6208,9 @@ checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" [[package]] name = "postcard" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c1de96e20f51df24ca73cafcc4690e044854d803259db27a00a461cb3b9d17a" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" dependencies = [ "cobs", "embedded-io 0.4.0", @@ -6287,9 +6220,9 @@ dependencies = [ [[package]] name = "potential_utf" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" dependencies = [ "zerovec", ] @@ -6392,12 +6325,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.35" +version = "0.2.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -6454,9 +6387,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] @@ -6469,7 +6402,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "version_check", "yansi", ] @@ -6480,7 +6413,7 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "hex", "lazy_static", "procfs-core", @@ -6493,7 +6426,7 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "hex", ] @@ -6522,13 +6455,13 @@ checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f" dependencies = [ "bit-set", "bit-vec", - "bitflags 2.9.1", + "bitflags 2.9.4", "lazy_static", "num-traits", - "rand 0.9.1", + "rand 0.9.2", "rand_chacha 0.9.0", "rand_xorshift", - "regex-syntax 0.8.5", + "regex-syntax", "rusty-fork", "tempfile", "unarray", @@ -6570,7 +6503,7 @@ dependencies = [ "prost 0.13.5", "prost-types 0.13.5", "regex", - "syn 2.0.104", + "syn 2.0.106", "tempfile", ] @@ -6597,7 +6530,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -6637,9 +6570,9 @@ checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" [[package]] name = "psl" -version = "2.1.124" +version = "2.1.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "281f96193296bf4916d7a112df4cc578aaf7a4d71f1570667779bb07364ed837" +checksum = "89a33878b44e45231ecbc8c619cc8059e4adab882b25812192676fe08dcf352f" dependencies = [ "psl-types", ] @@ -6717,17 +6650,6 @@ dependencies = [ "zstd 0.13.3", ] -[[package]] -name = "query_map" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eab6b8b1074ef3359a863758dae650c7c0c6027927a085b7af911c8e0bf3a15" -dependencies = [ - "form_urlencoded", - "serde", - "serde_derive", -] - [[package]] name = "quick-error" version = "1.2.3" @@ -6778,7 +6700,7 @@ dependencies = [ "serde", "serde_json", "sync_wrapper 1.0.2", - "thiserror 2.0.12", + "thiserror 2.0.16", "tokio", "tracing", ] @@ -6798,7 +6720,7 @@ dependencies = [ "aws-types", "futures", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-rustls 0.27.7", "quickwit-common", "tokio", @@ -6842,11 +6764,11 @@ dependencies = [ "quickwit-serve", "quickwit-storage", "quickwit-telemetry", - "reqwest 0.12.22", + "reqwest 0.12.23", "serde_json", "tabled", "tempfile", - "thiserror 2.0.12", + "thiserror 2.0.16", "thousands", "tikv-jemalloc-ctl", "tikv-jemallocator", @@ -6896,7 +6818,7 @@ dependencies = [ "prost-build", "quote", "serde", - "syn 2.0.104", + "syn 2.0.106", "tonic-build", ] @@ -6916,7 +6838,7 @@ dependencies = [ "quickwit-common", "quickwit-proto", "serde", - "thiserror 2.0.12", + "thiserror 2.0.16", "tokio", "tokio-stream", "tonic 0.13.1", @@ -6941,7 +6863,7 @@ dependencies = [ "home", "hostname 0.3.1", "http 1.3.1", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-util", "itertools 0.14.0", "once_cell", @@ -6958,7 +6880,7 @@ dependencies = [ "siphasher", "sysinfo", "tempfile", - "thiserror 2.0.12", + "thiserror 2.0.16", "tikv-jemalloc-ctl", "tikv-jemallocator", "tokio", @@ -6980,7 +6902,7 @@ dependencies = [ "cron", "enum-iterator", "http 0.2.12", - "http-serde 1.1.3", + "http-serde", "humantime", "itertools 0.14.0", "json_comments", @@ -7074,7 +6996,7 @@ dependencies = [ "binggan", "fnv", "hex", - "indexmap 2.10.0", + "indexmap 2.11.1", "itertools 0.14.0", "matches", "nom", @@ -7091,7 +7013,7 @@ dependencies = [ "serde_yaml", "siphasher", "tantivy", - "thiserror 2.0.12", + "thiserror 2.0.16", "time", "tracing", "utoipa", @@ -7111,7 +7033,7 @@ dependencies = [ "quickwit-metastore", "quickwit-proto", "quickwit-storage", - "thiserror 2.0.12", + "thiserror 2.0.16", "time", "tokio", "tracing", @@ -7165,12 +7087,12 @@ dependencies = [ "rand 0.8.5", "rdkafka", "regex", - "reqwest 0.12.22", + "reqwest 0.12.23", "serde", "serde_json", "tantivy", "tempfile", - "thiserror 2.0.12", + "thiserror 2.0.16", "time", "tokio", "tracing", @@ -7209,7 +7131,7 @@ dependencies = [ "serde_json", "serde_json_borrow", "tempfile", - "thiserror 2.0.12", + "thiserror 2.0.16", "tokio", "tonic 0.13.1", "tower 0.5.2", @@ -7225,7 +7147,7 @@ dependencies = [ "anyhow", "aws-sdk-sqs", "futures-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-util", "itertools 0.14.0", "quickwit-actors", @@ -7240,7 +7162,7 @@ dependencies = [ "quickwit-serve", "quickwit-storage", "rand 0.8.5", - "reqwest 0.12.22", + "reqwest 0.12.23", "serde_json", "tempfile", "tokio", @@ -7307,66 +7229,20 @@ dependencies = [ "serde_json", "tantivy", "tempfile", - "thiserror 2.0.12", + "thiserror 2.0.16", "time", "tokio", "tracing", "utoipa", ] -[[package]] -name = "quickwit-lambda" -version = "0.8.0" -dependencies = [ - "anyhow", - "aws_lambda_events", - "bytesize", - "chitchat", - "chrono", - "flate2", - "http 1.3.1", - "lambda_http", - "lambda_runtime", - "mime_guess", - "once_cell", - "opentelemetry", - "opentelemetry-otlp", - "opentelemetry_sdk", - "quickwit-actors", - "quickwit-cli", - "quickwit-cluster", - "quickwit-common", - "quickwit-config", - "quickwit-index-management", - "quickwit-indexing", - "quickwit-ingest", - "quickwit-janitor", - "quickwit-metastore", - "quickwit-proto", - "quickwit-search", - "quickwit-serve", - "quickwit-storage", - "quickwit-telemetry", - "rand 0.8.5", - "reqwest 0.12.22", - "serde", - "serde_json", - "serial_test", - "time", - "tokio", - "tracing", - "tracing-opentelemetry", - "tracing-subscriber", - "warp", -] - [[package]] name = "quickwit-macros" version = "0.8.0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -7394,7 +7270,7 @@ dependencies = [ "quickwit-storage", "rand 0.8.5", "regex", - "regex-syntax 0.8.5", + "regex-syntax", "sea-query", "sea-query-binder", "serde", @@ -7403,7 +7279,7 @@ dependencies = [ "serial_test", "sqlx", "tempfile", - "thiserror 2.0.12", + "thiserror 2.0.16", "time", "tokio", "tokio-stream", @@ -7430,7 +7306,7 @@ dependencies = [ "quickwit-proto", "serde", "serde_json", - "thiserror 2.0.12", + "thiserror 2.0.16", "time", "tokio", "tonic 0.13.1", @@ -7461,7 +7337,7 @@ dependencies = [ "serde", "serde_json", "sqlx", - "thiserror 2.0.12", + "thiserror 2.0.16", "tokio", "tonic 0.13.1", "tonic-build", @@ -7495,7 +7371,7 @@ dependencies = [ "serde_with", "tantivy", "tantivy-fst", - "thiserror 2.0.12", + "thiserror 2.0.16", "time", "whichlang", ] @@ -7516,12 +7392,12 @@ dependencies = [ "quickwit-proto", "quickwit-search", "quickwit-serve", - "reqwest 0.12.22", + "reqwest 0.12.23", "reqwest-middleware", "reqwest-retry", "serde", "serde_json", - "thiserror 2.0.12", + "thiserror 2.0.16", "tokio", "tracing", "wiremock", @@ -7564,7 +7440,7 @@ dependencies = [ "serde_json_borrow", "tantivy", "tantivy-fst", - "thiserror 2.0.12", + "thiserror 2.0.16", "tokio", "tokio-stream", "tower 0.5.2", @@ -7591,9 +7467,9 @@ dependencies = [ "glob", "hex", "http 1.3.1", - "http-serde 1.1.3", + "http-serde", "humantime", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-rustls 0.27.7", "hyper-util", "itertools 0.14.0", @@ -7632,7 +7508,7 @@ dependencies = [ "serde_qs 0.12.0", "serde_with", "tempfile", - "thiserror 2.0.12", + "thiserror 2.0.16", "time", "tokio", "tokio-rustls 0.24.1", @@ -7670,7 +7546,7 @@ dependencies = [ "fnv", "futures", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "lru 0.13.0", "md5", "mockall", @@ -7685,12 +7561,12 @@ dependencies = [ "rand 0.8.5", "regex", "reqsign", - "reqwest 0.12.22", + "reqwest 0.12.23", "serde", "serde_json", "tantivy", "tempfile", - "thiserror 2.0.12", + "thiserror 2.0.16", "tokio", "tokio-stream", "tokio-util", @@ -7709,7 +7585,7 @@ dependencies = [ "md5", "once_cell", "quickwit-common", - "reqwest 0.12.22", + "reqwest 0.12.23", "serde", "serde_json", "tokio", @@ -7720,9 +7596,9 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" dependencies = [ "bytes", "cfg_aliases", @@ -7730,9 +7606,9 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash 2.1.1", - "rustls 0.23.29", - "socket2", - "thiserror 2.0.12", + "rustls 0.23.31", + "socket2 0.6.0", + "thiserror 2.0.16", "tokio", "tracing", "web-time", @@ -7740,20 +7616,20 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.12" +version = "0.11.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e" +checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" dependencies = [ "bytes", "getrandom 0.3.3", "lru-slab", - "rand 0.9.1", + "rand 0.9.2", "ring 0.17.14", "rustc-hash 2.1.1", - "rustls 0.23.29", + "rustls 0.23.31", "rustls-pki-types", "slab", - "thiserror 2.0.12", + "thiserror 2.0.16", "tinyvec", "tracing", "web-time", @@ -7761,16 +7637,16 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.13" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2", + "socket2 0.6.0", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -7826,9 +7702,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.1" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", @@ -7921,9 +7797,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" dependencies = [ "either", "rayon-core", @@ -7931,9 +7807,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.1" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -7984,11 +7860,11 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.13" +version = "0.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", ] [[package]] @@ -8008,58 +7884,43 @@ checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] name = "regex" -version = "1.11.1" +version = "1.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.9", - "regex-syntax 0.8.5", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", + "regex-automata", + "regex-syntax", ] [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.5", + "regex-syntax", ] [[package]] name = "regex-lite" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" - -[[package]] -name = "regex-syntax" -version = "0.6.29" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "943f41321c63ef1c92fd763bfe054d2668f7f225a5c29f0105903dc2fc04ba30" [[package]] name = "regex-syntax" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" [[package]] name = "rend" @@ -8090,7 +7951,7 @@ dependencies = [ "log", "percent-encoding", "rand 0.8.5", - "reqwest 0.12.22", + "reqwest 0.12.23", "rsa", "serde", "serde_json", @@ -8144,19 +8005,18 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.22" +version = "0.12.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531" +checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" dependencies = [ "base64 0.22.1", "bytes", - "futures-channel", "futures-core", "futures-util", "http 1.3.1", "http-body 1.0.1", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-rustls 0.27.7", "hyper-tls 0.6.0", "hyper-util", @@ -8166,8 +8026,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.29", - "rustls-native-certs 0.8.1", + "rustls 0.23.31", "rustls-pki-types", "serde", "serde_json", @@ -8185,7 +8044,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.1", + "webpki-roots 1.0.2", ] [[package]] @@ -8197,7 +8056,7 @@ dependencies = [ "anyhow", "async-trait", "http 1.3.1", - "reqwest 0.12.22", + "reqwest 0.12.23", "serde", "thiserror 1.0.69", "tower-service", @@ -8214,9 +8073,9 @@ dependencies = [ "futures", "getrandom 0.2.16", "http 1.3.1", - "hyper 1.6.0", + "hyper 1.7.0", "parking_lot 0.11.2", - "reqwest 0.12.22", + "reqwest 0.12.23", "reqwest-middleware", "retry-policies", "thiserror 1.0.69", @@ -8378,7 +8237,7 @@ dependencies = [ "proc-macro2", "quote", "rust-embed-utils", - "syn 2.0.104", + "syn 2.0.106", "walkdir", ] @@ -8420,9 +8279,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.25" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" [[package]] name = "rustc-hash" @@ -8451,7 +8310,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "errno", "libc", "linux-raw-sys 0.4.15", @@ -8460,15 +8319,16 @@ dependencies = [ [[package]] name = "rustix" -version = "1.0.7" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +checksum = "9621e389a110cae094269936383d69b869492f03e5c1ed2d575a53c029d4441d" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "errno", "libc", + "linux-raw-sys 0.11.0", "linux-raw-sys 0.9.4", - "windows-sys 0.59.0", + "windows-sys 0.61.0", ] [[package]] @@ -8485,9 +8345,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.29" +version = "0.23.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2491382039b29b9b11ff08b76ff6c97cf287671dbb74f0be44bda389fffe9bd1" +checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc" dependencies = [ "aws-lc-rs", "log", @@ -8520,7 +8380,7 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.2.0", + "security-framework 3.4.0", ] [[package]] @@ -8582,9 +8442,9 @@ checksum = "0b5a6a926633a8ce739286680df905e1d1d01db609fc0e09d28e9b901ac7b22f" [[package]] name = "rustversion" -version = "1.0.21" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "rusty-fork" @@ -8636,20 +8496,20 @@ dependencies = [ [[package]] name = "scc" -version = "2.3.4" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22b2d775fb28f245817589471dd49c5edf64237f4a19d10ce9a92ff4651a27f4" +checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc" dependencies = [ "sdd", ] [[package]] name = "schannel" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.0", ] [[package]] @@ -8711,9 +8571,9 @@ dependencies = [ [[package]] name = "sdd" -version = "3.0.9" +version = "3.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f5557d2bbddd5afd236ba7856b0e494f5acc7ce805bb0774cc5674b20a06b4" +checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" [[package]] name = "sea-query" @@ -8741,12 +8601,12 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bae0cbad6ab996955664982739354128c58d16e126114fe88c2a493642502aab" dependencies = [ - "darling", + "darling 0.20.11", "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.104", - "thiserror 2.0.12", + "syn 2.0.106", + "thiserror 2.0.16", ] [[package]] @@ -8789,7 +8649,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "core-foundation 0.9.4", "core-foundation-sys", "libc", @@ -8798,11 +8658,11 @@ dependencies = [ [[package]] name = "security-framework" -version = "3.2.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" +checksum = "60b369d18893388b345804dc0007963c99b7d665ae71d275812d828c6f089640" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -8811,9 +8671,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.14.0" +version = "2.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" +checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" dependencies = [ "core-foundation-sys", "libc", @@ -8858,26 +8718,16 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", -] - -[[package]] -name = "serde_dynamo" -version = "4.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36c1b1792cfd9de29eb373ee6a4b74650369c096f55db7198ceb7b8921d1f7f" -dependencies = [ - "base64 0.21.7", - "serde", + "syn 2.0.106", ] [[package]] name = "serde_json" -version = "1.0.140" +version = "1.0.143" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" dependencies = [ - "indexmap 2.10.0", + "indexmap 2.11.1", "itoa", "memchr", "ryu", @@ -8969,7 +8819,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.10.0", + "indexmap 2.11.1", "schemars 0.9.0", "schemars 1.0.4", "serde", @@ -8985,10 +8835,10 @@ version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" dependencies = [ - "darling", + "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -8997,7 +8847,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.10.0", + "indexmap 2.11.1", "itoa", "ryu", "serde", @@ -9027,7 +8877,7 @@ checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -9128,9 +8978,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.5" +version = "1.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" +checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" dependencies = [ "libc", ] @@ -9169,7 +9019,7 @@ checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" dependencies = [ "num-bigint", "num-traits", - "thiserror 2.0.12", + "thiserror 2.0.16", "time", ] @@ -9190,9 +9040,9 @@ dependencies = [ [[package]] name = "slab" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" [[package]] name = "smallvec" @@ -9212,11 +9062,11 @@ dependencies = [ [[package]] name = "snafu" -version = "0.8.6" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320b01e011bf8d5d7a4a4a4be966d9160968935849c83b918827f6a435e7f627" +checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" dependencies = [ - "snafu-derive 0.8.6", + "snafu-derive 0.8.9", ] [[package]] @@ -9233,14 +9083,14 @@ dependencies = [ [[package]] name = "snafu-derive" -version = "0.8.6" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1961e2ef424c1424204d3a5d6975f934f56b6d50ff5732382d84ebf460e147f7" +checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -9259,6 +9109,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "socket2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "spin" version = "0.5.2" @@ -9338,7 +9198,7 @@ dependencies = [ "futures-util", "hashlink", "hex", - "indexmap 2.10.0", + "indexmap 2.11.1", "log", "memchr", "once_cell", @@ -9407,7 +9267,7 @@ checksum = "1ed31390216d20e538e447a7a9b959e06ed9fc51c37b514b46eb758016ecd418" dependencies = [ "atoi", "base64 0.21.7", - "bitflags 2.9.1", + "bitflags 2.9.4", "byteorder", "bytes", "crc", @@ -9450,7 +9310,7 @@ checksum = "7c824eb80b894f926f89a0b9da0c7f435d27cdd35b8c655b114e58223918577e" dependencies = [ "atoi", "base64 0.21.7", - "bitflags 2.9.1", + "bitflags 2.9.4", "byteorder", "crc", "dotenvy", @@ -9569,9 +9429,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "symbolic-common" -version = "12.15.5" +version = "12.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a1150bdda9314f6cfeeea801c23f5593c6e6a6c72e64f67e48d723a12b8efdb" +checksum = "9da12f8fecbbeaa1ee62c1d50dc656407e007c3ee7b2a41afce4b5089eaef15e" dependencies = [ "debugid", "memmap2", @@ -9581,9 +9441,9 @@ dependencies = [ [[package]] name = "symbolic-demangle" -version = "12.15.5" +version = "12.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f66537def48fbc704a92e4fdaab7833bc7cb2255faca8182592fb5fa617eb82" +checksum = "6fd35afe0ef9d35d3dcd41c67ddf882fc832a387221338153b7cd685a105495c" dependencies = [ "cpp_demangle", "rustc-demangle", @@ -9603,9 +9463,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.104" +version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", @@ -9635,7 +9495,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -9679,7 +9539,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "core-foundation 0.9.4", "system-configuration-sys 0.6.0", ] @@ -9784,7 +9644,7 @@ dependencies = [ "tantivy-stacker", "tantivy-tokenizer-api", "tempfile", - "thiserror 2.0.12", + "thiserror 2.0.16", "time", "uuid", "winapi 0.3.9", @@ -9833,7 +9693,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" dependencies = [ "byteorder", - "regex-syntax 0.8.5", + "regex-syntax", "utf8-ranges", ] @@ -9895,24 +9755,24 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.20.0" +version = "3.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e" dependencies = [ "fastrand 2.3.0", "getrandom 0.3.3", "once_cell", - "rustix 1.0.7", - "windows-sys 0.59.0", + "rustix 1.1.1", + "windows-sys 0.60.2", ] [[package]] name = "term" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a43bddab41f8626c7bdaab872bbba75f8df5847b516d77c569c746e2ae5eb746" +checksum = "2111ef44dae28680ae9752bb89409e7310ca33a8c621ebe7b106cf5c928b3ac0" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.0", ] [[package]] @@ -9941,11 +9801,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.12" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" dependencies = [ - "thiserror-impl 2.0.12", + "thiserror-impl 2.0.16", ] [[package]] @@ -9956,18 +9816,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] name = "thiserror-impl" -version = "2.0.12" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -10018,12 +9878,11 @@ dependencies = [ [[package]] name = "time" -version = "0.3.41" +version = "0.3.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" +checksum = "83bde6f1ec10e72d583d91623c939f623002284ef622b87de38cfd546cbf2031" dependencies = [ "deranged", - "itoa", "js-sys", "libc", "num-conv", @@ -10036,9 +9895,9 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.4" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" +checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" [[package]] name = "time-fmt" @@ -10052,9 +9911,9 @@ dependencies = [ [[package]] name = "time-macros" -version = "0.2.22" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" +checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" dependencies = [ "num-conv", "time-core", @@ -10082,9 +9941,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" dependencies = [ "tinyvec_macros", ] @@ -10097,9 +9956,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.46.1" +version = "1.47.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17" +checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" dependencies = [ "backtrace", "bytes", @@ -10110,17 +9969,17 @@ dependencies = [ "pin-project-lite", "signal-hook-registry", "slab", - "socket2", + "socket2 0.6.0", "tokio-macros", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "tokio-io-timeout" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b74022ada614a1b4834de765f9bb43877f910cc8ce4be40e89042c9223a8bf" +checksum = "0bd86198d9ee903fedd2f9a2e72014287c0d9167e4ae43b5853007205dda1b76" dependencies = [ "pin-project-lite", "tokio", @@ -10134,7 +9993,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -10186,7 +10045,7 @@ version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" dependencies = [ - "rustls 0.23.29", + "rustls 0.23.31", "tokio", ] @@ -10216,16 +10075,16 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.15" +version = "0.7.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" +checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" dependencies = [ "bytes", "futures-core", "futures-io", "futures-sink", "futures-util", - "hashbrown 0.15.4", + "hashbrown 0.15.5", "pin-project-lite", "slab", "tokio", @@ -10258,7 +10117,7 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.10.0", + "indexmap 2.11.1", "serde", "serde_spanned", "toml_datetime", @@ -10271,9 +10130,9 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.10.0", + "indexmap 2.11.1", "toml_datetime", - "winnow 0.7.12", + "winnow 0.7.13", ] [[package]] @@ -10320,17 +10179,17 @@ dependencies = [ "axum 0.7.9", "base64 0.22.1", "bytes", - "h2 0.4.11", + "h2 0.4.12", "http 1.3.1", "http-body 1.0.1", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-timeout 0.5.2", "hyper-util", "percent-encoding", "pin-project", "prost 0.13.5", - "socket2", + "socket2 0.5.10", "tokio", "tokio-stream", "tower 0.4.13", @@ -10350,18 +10209,18 @@ dependencies = [ "base64 0.22.1", "bytes", "flate2", - "h2 0.4.11", + "h2 0.4.12", "http 1.3.1", "http-body 1.0.1", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-timeout 0.5.2", "hyper-util", "percent-encoding", "pin-project", "prost 0.13.5", "rustls-native-certs 0.8.1", - "socket2", + "socket2 0.5.10", "tokio", "tokio-rustls 0.26.2", "tokio-stream", @@ -10383,7 +10242,7 @@ dependencies = [ "prost-build", "prost-types 0.13.5", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -10439,7 +10298,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", - "indexmap 2.10.0", + "indexmap 2.11.1", "pin-project-lite", "slab", "sync_wrapper 1.0.2", @@ -10457,7 +10316,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61c5bb1d698276a2443e5ecfabc1008bf15a36c12e6a7176e7bf089ea9131140" dependencies = [ "async-compression", - "bitflags 2.9.1", + "bitflags 2.9.4", "bytes", "futures-core", "futures-util", @@ -10477,7 +10336,7 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.9.4", "bytes", "futures-util", "http 1.3.1", @@ -10521,7 +10380,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -10575,14 +10434,14 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.19" +version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" dependencies = [ "matchers", "nu-ansi-term", "once_cell", - "regex", + "regex-automata", "serde", "serde_json", "sharded-slab", @@ -10670,7 +10529,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "470dbf6591da1b39d43c14523b2b469c86879a53e8b758c8e090a470fe7b1fbe" dependencies = [ - "rand 0.9.1", + "rand 0.9.2", "serde", "web-time", ] @@ -10774,9 +10633,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.4" +version = "2.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" dependencies = [ "form_urlencoded", "idna", @@ -10836,7 +10695,7 @@ version = "4.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23" dependencies = [ - "indexmap 2.10.0", + "indexmap 2.11.1", "serde", "serde_json", "utoipa-gen", @@ -10851,19 +10710,19 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "ulid", ] [[package]] name = "uuid" -version = "1.17.0" +version = "1.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" +checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" dependencies = [ "getrandom 0.3.3", "js-sys", - "rand 0.9.1", + "rand 0.9.2", "serde", "wasm-bindgen", ] @@ -10928,7 +10787,7 @@ dependencies = [ "hostname 0.4.1", "iana-time-zone", "idna", - "indexmap 2.10.0", + "indexmap 2.11.1", "indoc", "influxdb-line-protocol", "itertools 0.14.0", @@ -10962,12 +10821,12 @@ dependencies = [ "sha2", "sha3", "simdutf8", - "snafu 0.8.6", + "snafu 0.8.9", "snap", "strip-ansi-escapes", "syslog_loose", "termcolor", - "thiserror 2.0.12", + "thiserror 2.0.16", "tokio", "tracing", "uaparser", @@ -11055,7 +10914,6 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4378d202ff965b011c64817db11d5829506d3404edeadb61f190d111da3f231c" dependencies = [ - "async-compression", "bytes", "futures-channel", "futures-util", @@ -11093,11 +10951,11 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasi" -version = "0.14.2+wasi-0.2.4" +version = "0.14.4+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +checksum = "88a5f4a424faf49c3c2c344f166f0662341d470ea185e939657aaff130f0ec4a" dependencies = [ - "wit-bindgen-rt", + "wit-bindgen", ] [[package]] @@ -11117,35 +10975,36 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "7e14915cadd45b529bb8d1f343c4ed0ac1de926144b746e2710f9cd05df6603b" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", + "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.100" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +checksum = "e28d1ba982ca7923fd01448d5c30c6864d0a14109560296a162f80f305fb93bb" dependencies = [ "bumpalo", "log", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.50" +version = "0.4.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "0ca85039a9b469b38336411d6d6ced91f3fc87109a2a27b0c197663f5144dffe" dependencies = [ "cfg-if", "js-sys", @@ -11156,9 +11015,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "7c3d463ae3eff775b0c45df9da45d68837702ac35af998361e2c84e7c5ec1b0d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -11166,22 +11025,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "7bb4ce89b08211f923caf51d527662b75bdc9c9c7aab40f86dcb9fb85ac552aa" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "f143854a3b13752c6950862c906306adb27c7e839f7414cec8fea35beab624c1" dependencies = [ "unicode-ident", ] @@ -11216,9 +11075,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.77" +version = "0.3.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +checksum = "77e4b637749ff0d92b8fad63aa1f7cff3cbe125fd49c175cd6345e7272638b12" dependencies = [ "js-sys", "wasm-bindgen", @@ -11251,9 +11110,9 @@ checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" [[package]] name = "webpki-roots" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8782dd5a41a24eed3a4f40b606249b3e236ca61adf1f25ea4d45c73de122b502" +checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2" dependencies = [ "rustls-pki-types", ] @@ -11278,11 +11137,11 @@ checksum = "0b9aa3ad29c3d08283ac6b769e3ec15ad1ddb88af7d2e9bc402c574973b937e7" [[package]] name = "whoami" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6994d13118ab492c3c80c1f81928718159254c53c472bf9ce36f8dae4add02a7" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" dependencies = [ - "redox_syscall 0.5.13", + "libredox", "wasite", ] @@ -11316,11 +11175,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.0", ] [[package]] @@ -11348,7 +11207,7 @@ dependencies = [ "windows-collections", "windows-core 0.61.2", "windows-future", - "windows-link", + "windows-link 0.1.3", "windows-numerics", ] @@ -11381,7 +11240,7 @@ checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ "windows-implement 0.60.0", "windows-interface 0.59.1", - "windows-link", + "windows-link 0.1.3", "windows-result 0.3.4", "windows-strings", ] @@ -11393,7 +11252,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" dependencies = [ "windows-core 0.61.2", - "windows-link", + "windows-link 0.1.3", "windows-threading", ] @@ -11405,7 +11264,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11416,7 +11275,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11427,7 +11286,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11438,7 +11297,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11447,6 +11306,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +[[package]] +name = "windows-link" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" + [[package]] name = "windows-numerics" version = "0.2.0" @@ -11454,7 +11319,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" dependencies = [ "windows-core 0.61.2", - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -11463,7 +11328,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" dependencies = [ - "windows-link", + "windows-link 0.1.3", "windows-result 0.3.4", "windows-strings", ] @@ -11483,7 +11348,7 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -11492,7 +11357,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -11528,7 +11393,16 @@ version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.53.2", + "windows-targets 0.53.3", +] + +[[package]] +name = "windows-sys" +version = "0.61.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e201184e40b2ede64bc2ea34968b28e33622acdbbf37104f0e4a33f7abe657aa" +dependencies = [ + "windows-link 0.2.0", ] [[package]] @@ -11564,10 +11438,11 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.53.2" +version = "0.53.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" dependencies = [ + "windows-link 0.1.3", "windows_aarch64_gnullvm 0.53.0", "windows_aarch64_msvc 0.53.0", "windows_i686_gnu 0.53.0", @@ -11584,7 +11459,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -11736,9 +11611,9 @@ dependencies = [ [[package]] name = "winnow" -version = "0.7.12" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" dependencies = [ "memchr", ] @@ -11755,18 +11630,17 @@ dependencies = [ [[package]] name = "wiremock" -version = "0.6.4" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2b8b99d4cdbf36b239a9532e31fe4fb8acc38d1897c1761e161550a7dc78e6a" +checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031" dependencies = [ "assert-json-diff", - "async-trait", "base64 0.22.1", "deadpool", "futures", "http 1.3.1", "http-body-util", - "hyper 1.6.0", + "hyper 1.7.0", "hyper-util", "log", "once_cell", @@ -11778,13 +11652,10 @@ dependencies = [ ] [[package]] -name = "wit-bindgen-rt" -version = "0.39.0" +name = "wit-bindgen" +version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags 2.9.1", -] +checksum = "5c573471f125075647d03df72e026074b7203790d41351cd6edc96f46bcccd36" [[package]] name = "woothee" @@ -11818,7 +11689,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af3a19837351dc82ba89f8a125e22a3c475f05aba604acc023d62b2739ae2909" dependencies = [ "libc", - "rustix 1.0.7", + "rustix 1.1.1", ] [[package]] @@ -11862,28 +11733,28 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.26" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.26" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11903,7 +11774,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "synstructure", ] @@ -11926,9 +11797,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.2" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" +checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" dependencies = [ "yoke", "zerofrom", @@ -11943,7 +11814,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -12005,11 +11876,11 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.15+zstd.1.5.7" +version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ - "bindgen 0.71.1", + "bindgen 0.72.1", "cc", "pkg-config", ] diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 86d17907be9..a68147f782a 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -19,7 +19,6 @@ members = [ "quickwit-integration-tests", "quickwit-jaeger", "quickwit-janitor", - "quickwit-lambda", "quickwit-macros", "quickwit-metastore", @@ -36,7 +35,7 @@ members = [ "quickwit-telemetry", ] -# The following list excludes `quickwit-metastore-utils` and `quickwit-lambda` +# The following list excludes `quickwit-metastore-utils` # from the default member to ease build/deps. default-members = [ "quickwit-actors", diff --git a/quickwit/Makefile b/quickwit/Makefile index aed3f16a46d..e3dac8de1d5 100644 --- a/quickwit/Makefile +++ b/quickwit/Makefile @@ -40,14 +40,6 @@ test-all: test-failpoints: cargo nextest run --test failpoints --features fail/failpoints -test-lambda: - AWS_ACCESS_KEY_ID=ignored \ - AWS_SECRET_ACCESS_KEY=ignored \ - AWS_REGION=us-east-1 \ - QW_S3_ENDPOINT=http://localhost:4566 \ - QW_S3_FORCE_PATH_STYLE_ACCESS=1 \ - cargo nextest run --all-features -p quickwit-lambda --retries 1 - # TODO: to be replaced by https://github.com/quickwit-oss/quickwit/issues/237 TARGET ?= x86_64-unknown-linux-gnu .PHONY: build diff --git a/quickwit/quickwit-aws/src/lib.rs b/quickwit/quickwit-aws/src/lib.rs index 3fad7aad979..31b63d121e2 100644 --- a/quickwit/quickwit-aws/src/lib.rs +++ b/quickwit/quickwit-aws/src/lib.rs @@ -42,5 +42,5 @@ pub async fn get_aws_config() -> &'static aws_config::SdkConfig { /// Returns the AWS behavior version. pub fn aws_behavior_version() -> BehaviorVersion { - BehaviorVersion::v2025_01_17() + BehaviorVersion::v2025_08_07() } diff --git a/quickwit/quickwit-lambda/Cargo.toml b/quickwit/quickwit-lambda/Cargo.toml deleted file mode 100644 index a43cce32913..00000000000 --- a/quickwit/quickwit-lambda/Cargo.toml +++ /dev/null @@ -1,72 +0,0 @@ -[package] -name = "quickwit-lambda" -description = "Serverless Quickwit on AWS Lambda" - -version.workspace = true -edition.workspace = true -homepage.workspace = true -documentation.workspace = true -repository.workspace = true -authors.workspace = true -license.workspace = true - -[[bin]] -name = "indexer" -path = "src/bin/indexer.rs" - -[[bin]] -name = "searcher" -path = "src/bin/searcher.rs" - -[features] -s3-localstack-tests = [] - -[dependencies] -anyhow = { workspace = true } -aws_lambda_events = "0.16" -bytesize = { workspace = true } -chitchat = { workspace = true } -chrono = { workspace = true } -flate2 = { workspace = true } -http = { workspace = true } -lambda_http = "0.14" -lambda_runtime = "0.13" -mime_guess = { workspace = true } -once_cell = { workspace = true } -opentelemetry = { workspace = true } -opentelemetry_sdk = { workspace = true } -opentelemetry-otlp = { workspace = true, features = [ - "reqwest-client", - "reqwest-rustls", - "http-proto", -] } -rand = { workspace = true } -reqwest = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -time = { workspace = true } -tokio = { workspace = true } -tracing = { workspace = true } -tracing-opentelemetry = { workspace = true } -tracing-subscriber = { workspace = true, features = ["json"] } -warp = { workspace = true, features = ["compression-gzip"] } - - -quickwit-actors = { workspace = true } -quickwit-cli = { workspace = true } -quickwit-cluster = { workspace = true } -quickwit-common = { workspace = true } -quickwit-config = { workspace = true } -quickwit-index-management = { workspace = true } -quickwit-indexing = { workspace = true } -quickwit-ingest = { workspace = true } -quickwit-janitor = { workspace = true } -quickwit-metastore = { workspace = true } -quickwit-proto = { workspace = true } -quickwit-search = { workspace = true } -quickwit-serve = { workspace = true } -quickwit-storage = { workspace = true } -quickwit-telemetry = { workspace = true } - -[dev-dependencies] -serial_test = { workspace = true } diff --git a/quickwit/quickwit-lambda/README.md b/quickwit/quickwit-lambda/README.md new file mode 100644 index 00000000000..26aacdb0a0c --- /dev/null +++ b/quickwit/quickwit-lambda/README.md @@ -0,0 +1,4 @@ +# Deprecation + +This package was removed in Q3 2025. The maintenance burden was high and the +feature was unused. diff --git a/quickwit/quickwit-lambda/src/bin/indexer.rs b/quickwit/quickwit-lambda/src/bin/indexer.rs deleted file mode 100644 index 7af8d250dc0..00000000000 --- a/quickwit/quickwit-lambda/src/bin/indexer.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use lambda_runtime::service_fn; -use quickwit_lambda::indexer::handler; -use quickwit_lambda::logger; - -#[tokio::main] -async fn main() -> anyhow::Result<()> { - logger::setup_lambda_tracer(tracing::Level::INFO)?; - let func = service_fn(handler); - lambda_runtime::run(func) - .await - .map_err(|e| anyhow::anyhow!(e)) -} diff --git a/quickwit/quickwit-lambda/src/bin/searcher.rs b/quickwit/quickwit-lambda/src/bin/searcher.rs deleted file mode 100644 index b961271275b..00000000000 --- a/quickwit/quickwit-lambda/src/bin/searcher.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use quickwit_lambda::logger; -use quickwit_lambda::searcher::{setup_searcher_api, warp_lambda}; - -#[tokio::main] -async fn main() -> anyhow::Result<()> { - logger::setup_lambda_tracer(tracing::Level::INFO)?; - let routes = setup_searcher_api().await?; - let warp_service = warp::service(routes); - warp_lambda::run(warp_service) - .await - .map_err(|e| anyhow::anyhow!(e)) -} diff --git a/quickwit/quickwit-lambda/src/environment.rs b/quickwit/quickwit-lambda/src/environment.rs deleted file mode 100644 index 7b26129cf54..00000000000 --- a/quickwit/quickwit-lambda/src/environment.rs +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::env::var; - -use once_cell::sync::Lazy; -use quickwit_common::get_bool_from_env; - -pub static INDEX_ID: Lazy = Lazy::new(|| { - var("QW_LAMBDA_INDEX_ID").expect("environment variable `QW_LAMBDA_INDEX_ID` should be set") -}); - -/// Configures the fmt tracing subscriber to log as json and include span -/// boundaries. This is very verbose and is only used to generate advanced KPIs -/// from Lambda runs (e.g. for blog post benchmarks) -pub static ENABLE_VERBOSE_JSON_LOGS: Lazy = - Lazy::new(|| get_bool_from_env("QW_LAMBDA_ENABLE_VERBOSE_JSON_LOGS", false)); - -pub static OPENTELEMETRY_URL: Lazy> = - Lazy::new(|| var("QW_LAMBDA_OPENTELEMETRY_URL").ok()); - -pub static OPENTELEMETRY_AUTHORIZATION: Lazy> = - Lazy::new(|| var("QW_LAMBDA_OPENTELEMETRY_AUTHORIZATION").ok()); diff --git a/quickwit/quickwit-lambda/src/indexer/environment.rs b/quickwit/quickwit-lambda/src/indexer/environment.rs deleted file mode 100644 index f2f960f224b..00000000000 --- a/quickwit/quickwit-lambda/src/indexer/environment.rs +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::env::var; - -use once_cell::sync::Lazy; -use quickwit_common::get_bool_from_env; - -pub const CONFIGURATION_TEMPLATE: &str = r#" -version: 0.8 -node_id: lambda-indexer -cluster_id: lambda-ephemeral -metastore_uri: s3://${QW_LAMBDA_METASTORE_BUCKET}/${QW_LAMBDA_METASTORE_PREFIX:-index} -default_index_root_uri: s3://${QW_LAMBDA_INDEX_BUCKET}/${QW_LAMBDA_INDEX_PREFIX:-index} -data_dir: /tmp -"#; - -pub static INDEX_CONFIG_URI: Lazy = Lazy::new(|| { - var("QW_LAMBDA_INDEX_CONFIG_URI") - .expect("environment variable `QW_LAMBDA_INDEX_CONFIG_URI` should be set") -}); - -pub static DISABLE_MERGE: Lazy = - Lazy::new(|| get_bool_from_env("QW_LAMBDA_DISABLE_MERGE", false)); - -pub static DISABLE_JANITOR: Lazy = - Lazy::new(|| get_bool_from_env("QW_LAMBDA_DISABLE_JANITOR", false)); - -pub static MAX_CHECKPOINTS: Lazy = Lazy::new(|| { - var("QW_LAMBDA_MAX_CHECKPOINTS").map_or(100, |v| { - v.parse() - .expect("QW_LAMBDA_MAX_CHECKPOINTS must be a positive integer") - }) -}); - -#[cfg(test)] -mod tests { - - use quickwit_config::{ConfigFormat, NodeConfig}; - - use super::*; - - #[tokio::test] - #[serial_test::file_serial(with_env)] - async fn test_load_config() { - // SAFETY: this test may not be entirely sound if not run with nextest or --test-threads=1 - // as this is only a test, and it would be extremly inconvenient to run it in a different - // way, we are keeping it that way - // file_serial may not be enough, given other tests not ran serially could read env - - let bucket = "mock-test-bucket"; - unsafe { - std::env::set_var("QW_LAMBDA_METASTORE_BUCKET", bucket); - std::env::set_var("QW_LAMBDA_INDEX_BUCKET", bucket); - std::env::set_var( - "QW_LAMBDA_INDEX_CONFIG_URI", - "s3://mock-index-config-bucket", - ); - std::env::set_var("QW_LAMBDA_INDEX_ID", "lambda-test"); - }; - let node_config = NodeConfig::load(ConfigFormat::Yaml, CONFIGURATION_TEMPLATE.as_bytes()) - .await - .unwrap(); - // - assert_eq!( - node_config.data_dir_path.to_string_lossy(), - "/tmp", - "only `/tmp` is writeable in AWS Lambda" - ); - assert_eq!( - node_config.default_index_root_uri, - "s3://mock-test-bucket/index" - ); - assert_eq!( - node_config.metastore_uri.to_string(), - "s3://mock-test-bucket/index" - ); - - unsafe { - std::env::remove_var("QW_LAMBDA_METASTORE_BUCKET"); - std::env::remove_var("QW_LAMBDA_INDEX_BUCKET"); - std::env::remove_var("QW_LAMBDA_INDEX_CONFIG_URI"); - std::env::remove_var("QW_LAMBDA_INDEX_ID"); - } - } -} diff --git a/quickwit/quickwit-lambda/src/indexer/handler.rs b/quickwit/quickwit-lambda/src/indexer/handler.rs deleted file mode 100644 index 87468c476f1..00000000000 --- a/quickwit/quickwit-lambda/src/indexer/handler.rs +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use lambda_runtime::{Error, LambdaEvent}; -use serde_json::Value; -use tracing::{Instrument, debug_span, error, info, info_span}; - -use super::environment::{DISABLE_JANITOR, DISABLE_MERGE, INDEX_CONFIG_URI}; -use super::ingest::{IngestArgs, ingest}; -use super::model::IndexerEvent; -use crate::environment::INDEX_ID; -use crate::logger; -use crate::utils::LambdaContainerContext; - -async fn indexer_handler(event: LambdaEvent) -> Result { - let container_ctx = LambdaContainerContext::load(); - let memory = event.context.env_config.memory; - let payload = serde_json::from_value::(event.payload)?; - - let ingest_res = ingest(IngestArgs { - input_path: payload.uri()?, - input_format: quickwit_config::SourceInputFormat::Json, - vrl_script: None, - // TODO: instead of clearing the cache, we use a cache and set its max - // size with indexer_config.split_store_max_num_bytes - clear_cache: true, - }) - .instrument(debug_span!( - "ingest", - memory, - env.INDEX_CONFIG_URI = *INDEX_CONFIG_URI, - env.INDEX_ID = *INDEX_ID, - env.DISABLE_MERGE = *DISABLE_MERGE, - env.DISABLE_JANITOR = *DISABLE_JANITOR, - cold = container_ctx.cold, - container_id = container_ctx.container_id, - )) - .await; - - match ingest_res { - Ok(stats) => { - info!(stats=?stats, "Indexing succeeded"); - Ok(serde_json::to_value(stats)?) - } - Err(e) => { - error!(err=?e, "Indexing failed"); - Err(anyhow::anyhow!("Indexing failed").into()) - } - } -} - -pub async fn handler(event: LambdaEvent) -> Result { - let request_id = event.context.request_id.clone(); - let mut response = indexer_handler(event) - .instrument(info_span!("indexer_handler", request_id)) - .await; - if let Err(e) = &response { - error!(err=?e, "Handler failed"); - } - if let Ok(Value::Object(ref mut map)) = response { - map.insert("request_id".to_string(), Value::String(request_id)); - } - logger::flush_tracer(); - response -} diff --git a/quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs b/quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs deleted file mode 100644 index a44ea9e1f44..00000000000 --- a/quickwit/quickwit-lambda/src/indexer/ingest/helpers.rs +++ /dev/null @@ -1,347 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashSet; -use std::num::NonZeroUsize; -use std::path::Path; - -use anyhow::{Context, bail}; -use chitchat::FailureDetectorConfig; -use chitchat::transport::ChannelTransport; -use quickwit_actors::{ActorHandle, Mailbox, Universe}; -use quickwit_cluster::{Cluster, ClusterMember}; -use quickwit_common::pubsub::EventBroker; -use quickwit_common::runtimes::RuntimesConfig; -use quickwit_common::uri::Uri; -use quickwit_config::merge_policy_config::MergePolicyConfig; -use quickwit_config::service::QuickwitService; -use quickwit_config::{ - ConfigFormat, IndexConfig, NodeConfig, SourceConfig, SourceInputFormat, SourceParams, - TransformConfig, load_index_config_from_user_config, -}; -use quickwit_indexing::IndexingPipeline; -use quickwit_indexing::actors::{IndexingService, MergePipeline, MergeSchedulerService}; -use quickwit_indexing::models::{DetachIndexingPipeline, DetachMergePipeline, SpawnPipeline}; -use quickwit_ingest::IngesterPool; -use quickwit_janitor::{JanitorService, start_janitor_service}; -use quickwit_metastore::{ - AddSourceRequestExt, CreateIndexRequestExt, CreateIndexResponseExt, IndexMetadata, - IndexMetadataResponseExt, -}; -use quickwit_proto::indexing::CpuCapacity; -use quickwit_proto::metastore::{ - AddSourceRequest, CreateIndexRequest, IndexMetadataRequest, MetastoreError, MetastoreService, - MetastoreServiceClient, ResetSourceCheckpointRequest, -}; -use quickwit_proto::types::PipelineUid; -use quickwit_search::SearchJobPlacer; -use quickwit_storage::StorageResolver; -use quickwit_telemetry::payload::{QuickwitFeature, QuickwitTelemetryInfo, TelemetryEvent}; -use tracing::{debug, info, instrument}; - -use crate::environment::INDEX_ID; -use crate::indexer::environment::{ - DISABLE_JANITOR, DISABLE_MERGE, INDEX_CONFIG_URI, MAX_CHECKPOINTS, -}; - -const LAMBDA_SOURCE_ID: &str = "ingest-lambda-source"; - -/// The indexing service needs to update its cluster chitchat state so that the control plane is -/// aware of the running tasks. We thus create a fake cluster to instantiate the indexing service -/// and avoid impacting potential control plane running on the cluster. -pub(super) async fn create_empty_cluster( - config: &NodeConfig, - services: &[QuickwitService], -) -> anyhow::Result { - let self_node = ClusterMember { - node_id: config.node_id.clone(), - generation_id: quickwit_cluster::GenerationId::now(), - is_ready: false, - enabled_services: HashSet::from_iter(services.to_owned()), - gossip_advertise_addr: config.gossip_advertise_addr, - grpc_advertise_addr: config.grpc_advertise_addr, - indexing_tasks: Vec::new(), - indexing_cpu_capacity: CpuCapacity::zero(), - }; - let cluster = Cluster::join( - config.cluster_id.clone(), - self_node, - config.gossip_advertise_addr, - Vec::new(), - config.gossip_interval, - FailureDetectorConfig::default(), - &ChannelTransport::default(), - Default::default(), - ) - .await?; - Ok(cluster) -} - -/// TODO refactor with `dir_and_filename` in file source -fn dir_and_filename(filepath: &Path) -> anyhow::Result<(Uri, &Path)> { - let dir_uri: Uri = filepath - .parent() - .context("Parent directory could not be resolved")? - .to_str() - .context("Path cannot be turned to string")? - .parse()?; - let file_name = filepath - .file_name() - .context("Path does not appear to be a file")?; - Ok((dir_uri, file_name.as_ref())) -} - -#[instrument(level = "debug", skip(resolver))] -pub(super) async fn load_index_config( - resolver: &StorageResolver, - default_index_root_uri: &Uri, -) -> anyhow::Result { - let (dir, file) = dir_and_filename(Path::new(&*INDEX_CONFIG_URI))?; - let index_config_storage = resolver.resolve(&dir).await?; - let bytes = index_config_storage.get_all(file).await?; - let mut index_config = load_index_config_from_user_config( - ConfigFormat::Yaml, - bytes.as_slice(), - default_index_root_uri, - )?; - if *DISABLE_MERGE { - debug!("force disable merges"); - index_config.indexing_settings.merge_policy = MergePolicyConfig::Nop; - } - Ok(index_config) -} - -pub(super) async fn send_telemetry() { - let services: HashSet = - HashSet::from_iter([QuickwitService::Indexer.as_str().to_string()]); - let telemetry_info = - QuickwitTelemetryInfo::new(services, HashSet::from_iter([QuickwitFeature::AwsLambda])); - let _telemetry_handle_opt = quickwit_telemetry::start_telemetry_loop(telemetry_info); - quickwit_telemetry::send_telemetry_event(TelemetryEvent::RunCommand).await; -} - -/// Convert the incoming file path to a source config -pub(super) async fn configure_source( - input_uri: Uri, - input_format: SourceInputFormat, - vrl_script: Option, -) -> anyhow::Result { - let transform_config = vrl_script.map(|vrl_script| TransformConfig::new(vrl_script, None)); - let source_params = SourceParams::file_from_uri(input_uri); - Ok(SourceConfig { - source_id: LAMBDA_SOURCE_ID.to_owned(), - num_pipelines: NonZeroUsize::MIN, - enabled: true, - source_params, - transform_config, - input_format, - }) -} - -/// Check if the index exists, creating it if necessary -/// -/// If the index exists but without the Lambda source ([`LAMBDA_SOURCE_ID`]), -/// the source is added. -pub(super) async fn init_index_if_necessary( - metastore: &mut MetastoreServiceClient, - storage_resolver: &StorageResolver, - default_index_root_uri: &Uri, - source_config: &SourceConfig, -) -> anyhow::Result { - let metadata_result = metastore - .index_metadata(IndexMetadataRequest::for_index_id(INDEX_ID.clone())) - .await; - let metadata = match metadata_result { - Ok(metadata_resp) => { - let current_metadata = metadata_resp.deserialize_index_metadata()?; - if !current_metadata.sources.contains_key(LAMBDA_SOURCE_ID) { - let add_source_request = AddSourceRequest::try_from_source_config( - current_metadata.index_uid.clone(), - source_config, - )?; - metastore.add_source(add_source_request).await?; - metastore - .index_metadata(IndexMetadataRequest::for_index_id(INDEX_ID.clone())) - .await? - .deserialize_index_metadata()? - } else { - current_metadata - } - } - Err(MetastoreError::NotFound(_)) => { - info!( - index_id = *INDEX_ID, - index_config_uri = *INDEX_CONFIG_URI, - "Index not found, creating it" - ); - let index_config = load_index_config(storage_resolver, default_index_root_uri).await?; - if index_config.index_id != *INDEX_ID { - bail!( - "Expected index ID was {} but config file had {}", - *INDEX_ID, - index_config.index_id, - ); - } - let create_index_request = CreateIndexRequest::try_from_index_and_source_configs( - &index_config, - std::slice::from_ref(source_config), - )?; - let create_resp = metastore.create_index(create_index_request).await?; - - info!(index_uid = %create_resp.index_uid(), "index created"); - create_resp.deserialize_index_metadata()? - } - Err(e) => bail!(e), - }; - Ok(metadata) -} - -pub(super) async fn spawn_services( - universe: &Universe, - cluster: Cluster, - metastore: MetastoreServiceClient, - storage_resolver: StorageResolver, - node_config: &NodeConfig, - runtime_config: RuntimesConfig, -) -> anyhow::Result<( - ActorHandle, - Option>, -)> { - let event_broker = EventBroker::default(); - - // spawn merge scheduler service - let merge_scheduler_service = - MergeSchedulerService::new(node_config.indexer_config.merge_concurrency.get()); - let (merge_scheduler_service_mailbox, _) = - universe.spawn_builder().spawn(merge_scheduler_service); - - // spawn indexer service - let indexing_service = IndexingService::new( - node_config.node_id.clone(), - node_config.data_dir_path.clone(), - node_config.indexer_config.clone(), - runtime_config.num_threads_blocking, - cluster, - metastore.clone(), - None, - merge_scheduler_service_mailbox.clone(), - IngesterPool::default(), - storage_resolver.clone(), - event_broker.clone(), - ) - .await?; - let (_, indexing_service_handle) = universe.spawn_builder().spawn(indexing_service); - - // spawn janitor service - let janitor_service_opt = if *DISABLE_JANITOR { - None - } else { - Some( - start_janitor_service( - universe, - node_config, - metastore, - SearchJobPlacer::default(), - storage_resolver, - event_broker, - false, - ) - .await?, - ) - }; - Ok((indexing_service_handle, janitor_service_opt)) -} - -/// Spawn and split an indexing pipeline -pub(super) async fn spawn_pipelines( - indexing_server_mailbox: &Mailbox, - source_config: SourceConfig, -) -> anyhow::Result<(ActorHandle, ActorHandle)> { - let pipeline_id = indexing_server_mailbox - .ask_for_res(SpawnPipeline { - index_id: INDEX_ID.clone(), - source_config, - pipeline_uid: PipelineUid::default(), - }) - .await?; - let merge_pipeline_handle = indexing_server_mailbox - .ask_for_res(DetachMergePipeline { - pipeline_id: pipeline_id.merge_pipeline_id(), - }) - .await?; - let indexing_pipeline_handle = indexing_server_mailbox - .ask_for_res(DetachIndexingPipeline { pipeline_id }) - .await?; - Ok((indexing_pipeline_handle, merge_pipeline_handle)) -} - -/// Prune old Lambda file checkpoints if there are too many -/// -/// Without pruning checkpoints accumulate indefinitely. This is particularly -/// problematic when indexing a lot of small files, as the metastore will grow -/// large even for a small index. -/// -/// The current implementation just deletes all checkpoints if there are more -/// than QW_LAMBDA_MAX_CHECKPOINTS. When this purging is performed, the Lambda -/// indexer might ingest the same file again if it receives a duplicate -/// notification. -pub(super) async fn prune_lambda_source( - metastore: &mut MetastoreServiceClient, - index_metadata: IndexMetadata, -) -> anyhow::Result<()> { - let lambda_checkpoint_opt = index_metadata - .checkpoint - .source_checkpoint(LAMBDA_SOURCE_ID); - - if let Some(lambda_checkpoint) = lambda_checkpoint_opt { - if lambda_checkpoint.num_partitions() > *MAX_CHECKPOINTS { - info!( - partitions = lambda_checkpoint.num_partitions(), - "prune Lambda checkpoints" - ); - metastore - .reset_source_checkpoint(ResetSourceCheckpointRequest { - index_uid: Some(index_metadata.index_uid.clone()), - source_id: LAMBDA_SOURCE_ID.to_owned(), - }) - .await?; - } - } - - Ok(()) -} - -/// Observe the merge pipeline until there are no more ongoing merges -pub(super) async fn wait_for_merges( - merge_pipeline_handle: ActorHandle, -) -> anyhow::Result<()> { - // TODO: find a way to stop the MergePlanner actor in the MergePipeline, - // otherwise a new merge might be scheduled after this loop. That shouldn't - // have any concrete impact as the merge will be immediately cancelled, but - // it might generate errors during the universe shutdown (i.e "Failed to - // acquire permit") - loop { - let state = merge_pipeline_handle.state(); - let obs = merge_pipeline_handle.observe().await; - debug!(state=?state, ongoing=obs.num_ongoing_merges, "merge pipeline state"); - if obs.num_ongoing_merges == 0 { - break; - } - // We tolerate a relatively low refresh rate because the indexer - // typically runs for longer periods of times and merges happen only - // occasionally. - tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; - } - Ok(()) -} diff --git a/quickwit/quickwit-lambda/src/indexer/ingest/mod.rs b/quickwit/quickwit-lambda/src/indexer/ingest/mod.rs deleted file mode 100644 index 4f7295bc89a..00000000000 --- a/quickwit/quickwit-lambda/src/indexer/ingest/mod.rs +++ /dev/null @@ -1,263 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod helpers; - -use std::collections::HashSet; - -use anyhow::bail; -use helpers::{ - configure_source, create_empty_cluster, init_index_if_necessary, send_telemetry, - spawn_pipelines, spawn_services, -}; -use quickwit_actors::Universe; -use quickwit_cli::start_actor_runtimes; -use quickwit_cli::tool::start_statistics_reporting_loop; -use quickwit_common::runtimes::RuntimesConfig; -use quickwit_common::uri::Uri; -use quickwit_config::SourceInputFormat; -use quickwit_config::service::QuickwitService; -use quickwit_index_management::clear_cache_directory; -use quickwit_indexing::models::IndexingStatistics; -use tracing::{debug, info}; - -use crate::indexer::environment::{CONFIGURATION_TEMPLATE, DISABLE_JANITOR}; -use crate::indexer::ingest::helpers::{prune_lambda_source, wait_for_merges}; -use crate::utils::load_node_config; - -#[derive(Debug, Eq, PartialEq)] -pub struct IngestArgs { - pub input_path: Uri, - pub input_format: SourceInputFormat, - pub vrl_script: Option, - pub clear_cache: bool, -} - -pub async fn ingest(args: IngestArgs) -> anyhow::Result { - debug!(args=?args, "lambda-ingest"); - - send_telemetry().await; - - let (config, storage_resolver, mut metastore) = - load_node_config(CONFIGURATION_TEMPLATE).await?; - - let source_config = - configure_source(args.input_path, args.input_format, args.vrl_script).await?; - - let index_metadata = init_index_if_necessary( - &mut metastore, - &storage_resolver, - &config.default_index_root_uri, - &source_config, - ) - .await?; - - let mut services = vec![QuickwitService::Indexer]; - if !*DISABLE_JANITOR { - services.push(QuickwitService::Janitor); - } - let cluster = create_empty_cluster(&config, &services[..]).await?; - let universe = Universe::new(); - let runtimes_config = RuntimesConfig::default(); - - start_actor_runtimes(runtimes_config, &HashSet::from_iter(services))?; - - let (indexing_service_handle, _janitor_service_guard) = spawn_services( - &universe, - cluster, - metastore.clone(), - storage_resolver.clone(), - &config, - runtimes_config, - ) - .await?; - - let (indexing_pipeline_handle, merge_pipeline_handle) = - spawn_pipelines(indexing_service_handle.mailbox(), source_config).await?; - - prune_lambda_source(&mut metastore, index_metadata).await?; - - debug!("wait for indexing to complete"); - let statistics = start_statistics_reporting_loop(indexing_pipeline_handle, false).await?; - - debug!("wait for merges to complete"); - wait_for_merges(merge_pipeline_handle).await?; - - debug!("indexing completed, tearing down actors"); - // TODO: is it really necessary to terminate the indexing service? - // Quitting the universe should be enough. - universe - .send_exit_with_success(indexing_service_handle.mailbox()) - .await?; - indexing_service_handle.join().await; - debug!("quitting universe"); - universe.quit().await; - debug!("universe.quit() awaited"); - - if args.clear_cache { - info!("clearing local cache directory"); - clear_cache_directory(&config.data_dir_path).await?; - info!("local cache directory cleared"); - } - - if statistics.num_invalid_docs > 0 { - bail!("Failed to ingest {} documents", statistics.num_invalid_docs) - } - Ok(statistics) -} - -#[cfg(all(test, feature = "s3-localstack-tests"))] -mod tests { - use std::path::PathBuf; - use std::str::FromStr; - - use quickwit_common::new_coolid; - use quickwit_storage::StorageResolver; - - use super::*; - - async fn put_object( - storage_resolver: StorageResolver, - bucket: &str, - prefix: &str, - filename: &str, - data: Vec, - ) -> Uri { - let src_location = format!("s3://{bucket}/{prefix}"); - let storage_uri = Uri::from_str(&src_location).unwrap(); - let storage = storage_resolver.resolve(&storage_uri).await.unwrap(); - storage - .put(&PathBuf::from(filename), Box::new(data)) - .await - .unwrap(); - storage_uri.join(filename).unwrap() - } - - #[tokio::test] - #[serial_test::file_serial(with_env)] - async fn test_ingest() -> anyhow::Result<()> { - // SAFETY: this test may not be entirely sound if not run with nextest or --test-threads=1 - // as this is only a test, and it would be extremly inconvenient to run it in a different - // way, we are keeping it that way - // file_serial may not be enough, given other tests not ran serially could read env - - quickwit_common::setup_logging_for_tests(); - let bucket = "quickwit-integration-tests"; - let prefix = new_coolid("lambda-ingest-test"); - let storage_resolver = StorageResolver::unconfigured(); - - let index_config = br#" - version: 0.8 - index_id: lambda-test - doc_mapping: - field_mappings: - - name: timestamp - type: datetime - input_formats: - - unix_timestamp - fast: true - timestamp_field: timestamp - "#; - let config_uri = put_object( - storage_resolver.clone(), - bucket, - &prefix, - "index-config.yaml", - index_config.to_vec(), - ) - .await; - - // TODO use dependency injection instead of lazy static for env configs - unsafe { - std::env::set_var("QW_LAMBDA_METASTORE_BUCKET", bucket); - std::env::set_var("QW_LAMBDA_INDEX_BUCKET", bucket); - std::env::set_var("QW_LAMBDA_METASTORE_PREFIX", &prefix); - std::env::set_var("QW_LAMBDA_INDEX_PREFIX", &prefix); - std::env::set_var("QW_LAMBDA_INDEX_CONFIG_URI", config_uri.as_str()); - std::env::set_var("QW_LAMBDA_INDEX_ID", "lambda-test"); - } - - // first ingestion creates the index metadata - let test_data_1 = br#"{"timestamp": 1724140899, "field1": "value1"}"#; - let test_data_1_uri = put_object( - storage_resolver.clone(), - bucket, - &prefix, - "data.json", - test_data_1.to_vec(), - ) - .await; - - { - let args = IngestArgs { - input_path: test_data_1_uri.clone(), - input_format: SourceInputFormat::Json, - vrl_script: None, - clear_cache: true, - }; - let stats = ingest(args).await?; - assert_eq!(stats.num_invalid_docs, 0); - assert_eq!(stats.num_docs, 1); - } - - tokio::time::sleep(std::time::Duration::from_secs(1)).await; - - { - // ingesting the same data again is a no-op - let args = IngestArgs { - input_path: test_data_1_uri, - input_format: SourceInputFormat::Json, - vrl_script: None, - clear_cache: true, - }; - let stats = ingest(args).await?; - assert_eq!(stats.num_invalid_docs, 0); - assert_eq!(stats.num_docs, 0); - } - - { - // second ingestion should not fail when metadata already exists - let test_data = br#"{"timestamp": 1724149900, "field1": "value2"}"#; - let test_data_uri = put_object( - storage_resolver.clone(), - bucket, - &prefix, - "data2.json", - test_data.to_vec(), - ) - .await; - - let args = IngestArgs { - input_path: test_data_uri, - input_format: SourceInputFormat::Json, - vrl_script: None, - clear_cache: true, - }; - let stats = ingest(args).await?; - assert_eq!(stats.num_invalid_docs, 0); - assert_eq!(stats.num_docs, 1); - } - - unsafe { - std::env::remove_var("QW_LAMBDA_METASTORE_BUCKET"); - std::env::remove_var("QW_LAMBDA_INDEX_BUCKET"); - std::env::remove_var("QW_LAMBDA_METASTORE_PREFIX"); - std::env::remove_var("QW_LAMBDA_INDEX_PREFIX"); - std::env::remove_var("QW_LAMBDA_INDEX_CONFIG_URI"); - std::env::remove_var("QW_LAMBDA_INDEX_ID"); - } - - Ok(()) - } -} diff --git a/quickwit/quickwit-lambda/src/indexer/mod.rs b/quickwit/quickwit-lambda/src/indexer/mod.rs deleted file mode 100644 index 308ec589c1a..00000000000 --- a/quickwit/quickwit-lambda/src/indexer/mod.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod environment; -mod handler; -mod ingest; -mod model; - -pub use handler::handler; diff --git a/quickwit/quickwit-lambda/src/indexer/model.rs b/quickwit/quickwit-lambda/src/indexer/model.rs deleted file mode 100644 index c87958c9ea8..00000000000 --- a/quickwit/quickwit-lambda/src/indexer/model.rs +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::str::FromStr; - -use aws_lambda_events::event::s3::S3Event; -use quickwit_common::uri::Uri; -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(untagged)] -/// Event types that can be used to invoke the indexer Lambda. -pub enum IndexerEvent { - Custom { source_uri: String }, - S3(S3Event), -} - -impl IndexerEvent { - pub fn uri(&self) -> anyhow::Result { - let path: String = match self { - IndexerEvent::Custom { source_uri } => source_uri.clone(), - IndexerEvent::S3(event) => [ - "s3://", - event.records[0].s3.bucket.name.as_ref().unwrap(), - "/", - event.records[0].s3.object.key.as_ref().unwrap(), - ] - .join(""), - }; - Uri::from_str(&path) - } -} - -#[cfg(test)] -mod tests { - use serde_json::json; - - use super::*; - - #[test] - fn test_custom_event_uri() { - let cust_event = json!({ - "source_uri": "s3://quickwit-test/test.json" - }); - let parsed_cust_event: IndexerEvent = serde_json::from_value(cust_event).unwrap(); - assert_eq!( - parsed_cust_event.uri().unwrap(), - Uri::from_str("s3://quickwit-test/test.json").unwrap(), - ); - } - - #[test] - fn test_s3_event_uri() { - let s3_event = json!({ - "Records": [ - { - "eventVersion": "2.0", - "eventSource": "aws:s3", - "awsRegion": "us-east-1", - "eventTime": "1970-01-01T00:00:00.000Z", - "eventName": "ObjectCreated:Put", - "userIdentity": { - "principalId": "EXAMPLE" - }, - "requestParameters": { - "sourceIPAddress": "127.0.0.1" - }, - "responseElements": { - "x-amz-request-id": "EXAMPLE123456789", - "x-amz-id-2": "EXAMPLE123/5678abcdefghijklambdaisawesome/mnopqrstuvwxyzABCDEFGH" - }, - "s3": { - "s3SchemaVersion": "1.0", - "configurationId": "testConfigRule", - "bucket": { - "name": "quickwit-test", - "ownerIdentity": { - "principalId": "EXAMPLE" - }, - "arn": "arn:aws:s3:::quickwit-test" - }, - "object": { - "key": "test.json", - "size": 1024, - "eTag": "0123456789abcdef0123456789abcdef", - "sequencer": "0A1B2C3D4E5F678901" - } - } - } - ] - }); - let s3_event: IndexerEvent = serde_json::from_value(s3_event).unwrap(); - assert_eq!( - s3_event.uri().unwrap(), - Uri::from_str("s3://quickwit-test/test.json").unwrap(), - ); - } -} diff --git a/quickwit/quickwit-lambda/src/lib.rs b/quickwit/quickwit-lambda/src/lib.rs deleted file mode 100644 index e7efc4287b4..00000000000 --- a/quickwit/quickwit-lambda/src/lib.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod environment; -pub mod indexer; -pub mod logger; -pub mod searcher; -mod utils; diff --git a/quickwit/quickwit-lambda/src/logger.rs b/quickwit/quickwit-lambda/src/logger.rs deleted file mode 100644 index c722f19f9a4..00000000000 --- a/quickwit/quickwit-lambda/src/logger.rs +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use anyhow::Context; -use once_cell::sync::OnceCell; -use opentelemetry::trace::TracerProvider as _; -use opentelemetry::{KeyValue, global}; -use opentelemetry_otlp::{WithExportConfig, WithHttpConfig}; -use opentelemetry_sdk::propagation::TraceContextPropagator; -use opentelemetry_sdk::trace::{BatchConfigBuilder, TracerProvider}; -use opentelemetry_sdk::{Resource, trace}; -use quickwit_serve::BuildInfo; -use tracing::{Level, debug}; -use tracing_subscriber::fmt::format::{FmtSpan, JsonFields}; -use tracing_subscriber::fmt::time::UtcTime; -use tracing_subscriber::layer::SubscriberExt; -use tracing_subscriber::prelude::*; -use tracing_subscriber::registry::LookupSpan; -use tracing_subscriber::{EnvFilter, Layer}; - -use crate::environment::{ - ENABLE_VERBOSE_JSON_LOGS, OPENTELEMETRY_AUTHORIZATION, OPENTELEMETRY_URL, -}; - -static TRACER_PROVIDER: OnceCell = OnceCell::new(); -pub(crate) const RUNTIME_CONTEXT_SPAN: &str = "runtime_context"; - -fn fmt_env_filter(level: Level) -> EnvFilter { - let default_directive = format!("quickwit={level}") - .parse() - .expect("default directive should be valid"); - EnvFilter::builder() - .with_default_directive(default_directive) - .from_env_lossy() -} - -fn fmt_time_format() -> UtcTime>> { - // We do not rely on the Rfc3339 implementation, because it has a nanosecond precision. - // See discussion here: https://github.com/time-rs/time/discussions/418 - UtcTime::new( - time::format_description::parse( - "[year]-[month]-[day]T[hour]:[minute]:[second].[subsecond digits:3]Z", - ) - .expect("Time format invalid."), - ) -} - -fn compact_fmt_layer(level: Level) -> impl Layer -where - S: for<'a> LookupSpan<'a>, - S: tracing::Subscriber, -{ - let event_format = tracing_subscriber::fmt::format() - .with_target(true) - .with_timer(fmt_time_format()) - .compact(); - - tracing_subscriber::fmt::layer::() - .event_format(event_format) - .with_ansi(false) - .with_filter(fmt_env_filter(level)) -} - -fn json_fmt_layer(level: Level) -> impl Layer -where - S: for<'a> LookupSpan<'a>, - S: tracing::Subscriber, -{ - let event_format = tracing_subscriber::fmt::format() - .with_target(true) - .with_timer(fmt_time_format()) - .json(); - tracing_subscriber::fmt::layer::() - .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE) - .event_format(event_format) - .fmt_fields(JsonFields::default()) - .with_ansi(false) - .with_filter(fmt_env_filter(level)) -} - -fn fmt_layer(level: Level) -> Box + Send + Sync + 'static> -where - S: for<'a> LookupSpan<'a>, - S: tracing::Subscriber, -{ - if *ENABLE_VERBOSE_JSON_LOGS { - json_fmt_layer(level).boxed() - } else { - compact_fmt_layer(level).boxed() - } -} - -fn otlp_layer( - ot_url: String, - ot_auth: String, - level: Level, - build_info: &BuildInfo, -) -> anyhow::Result> -where - S: for<'a> LookupSpan<'a>, - S: tracing::Subscriber, -{ - let headers = std::collections::HashMap::from([("Authorization".to_string(), ot_auth)]); - let otlp_exporter = opentelemetry_otlp::SpanExporter::builder() - .with_http() - .with_endpoint(ot_url) - .with_headers(headers) - .build() - .context("failed to initialize OpenTelemetry OTLP exporter")?; - let batch_processor = - trace::BatchSpanProcessor::builder(otlp_exporter, opentelemetry_sdk::runtime::Tokio) - .with_batch_config( - BatchConfigBuilder::default() - // Quickwit can generate a lot of spans, especially in debug mode, and the - // default queue size of 2048 is too small. - .with_max_queue_size(32_768) - .build(), - ) - .build(); - let provider = opentelemetry_sdk::trace::TracerProvider::builder() - .with_span_processor(batch_processor) - .with_resource(Resource::new([ - KeyValue::new("service.name", "quickwit"), - KeyValue::new("service.version", build_info.version.clone()), - ])) - .build(); - TRACER_PROVIDER - .set(provider.clone()) - .expect("cell should be empty"); - let tracer = provider.tracer("quickwit"); - let env_filter = std::env::var(EnvFilter::DEFAULT_ENV) - .map(|_| EnvFilter::from_default_env()) - .or_else(|_| { - // record the runtime context span for trace querying - EnvFilter::try_new(format!( - "quickwit={level},quickwit[{RUNTIME_CONTEXT_SPAN}]=trace" - )) - }) - .expect("Failed to set up OTLP tracing filter."); - Ok(tracing_opentelemetry::layer() - .with_tracer(tracer) - .with_filter(env_filter)) -} - -pub fn setup_lambda_tracer(level: Level) -> anyhow::Result<()> { - global::set_text_map_propagator(TraceContextPropagator::new()); - let registry = tracing_subscriber::registry(); - let build_info = BuildInfo::get(); - if let (Some(ot_url), Some(ot_auth)) = ( - OPENTELEMETRY_URL.clone(), - OPENTELEMETRY_AUTHORIZATION.clone(), - ) { - registry - .with(fmt_layer(level)) - .with(otlp_layer(ot_url, ot_auth, level, build_info)?) - .try_init() - .context("failed to register tracing subscriber")?; - } else { - registry - .with(fmt_layer(level)) - .try_init() - .context("failed to register tracing subscriber")?; - } - Ok(()) -} - -pub fn flush_tracer() { - if let Some(tracer_provider) = TRACER_PROVIDER.get() { - debug!("flush tracers"); - for res in tracer_provider.force_flush() { - if let Err(err) = res { - debug!(?err, "failed to flush tracer"); - } - } - } -} diff --git a/quickwit/quickwit-lambda/src/searcher/api.rs b/quickwit/quickwit-lambda/src/searcher/api.rs deleted file mode 100644 index 7ba034de5a4..00000000000 --- a/quickwit/quickwit-lambda/src/searcher/api.rs +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashSet; -use std::net::{Ipv4Addr, SocketAddr}; -use std::sync::Arc; - -use quickwit_config::SearcherConfig; -use quickwit_config::service::QuickwitService; -use quickwit_proto::metastore::MetastoreServiceClient; -use quickwit_search::{ - ClusterClient, SearchJobPlacer, SearchService, SearchServiceClient, SearchServiceImpl, - SearcherContext, SearcherPool, -}; -use quickwit_serve::lambda_search_api::*; -use quickwit_storage::StorageResolver; -use quickwit_telemetry::payload::{QuickwitFeature, QuickwitTelemetryInfo, TelemetryEvent}; -use tracing::{error, info}; -use warp::Filter; -use warp::filters::path::FullPath; -use warp::reject::Rejection; - -use crate::searcher::environment::CONFIGURATION_TEMPLATE; -use crate::utils::load_node_config; - -async fn create_local_search_service( - searcher_config: SearcherConfig, - metastore: MetastoreServiceClient, - storage_resolver: StorageResolver, -) -> Arc { - let searcher_pool = SearcherPool::default(); - let search_job_placer = SearchJobPlacer::new(searcher_pool.clone()); - let cluster_client = ClusterClient::new(search_job_placer); - // TODO configure split cache - let searcher_context = Arc::new(SearcherContext::new(searcher_config, None)); - let search_service = Arc::new(SearchServiceImpl::new( - metastore, - storage_resolver, - cluster_client.clone(), - searcher_context.clone(), - )); - // Add search service to pool to avoid "no available searcher nodes in the pool" error - let socket_addr = SocketAddr::new(Ipv4Addr::LOCALHOST.into(), 7280u16); - let search_service_client = - SearchServiceClient::from_service(search_service.clone(), socket_addr); - searcher_pool.insert(socket_addr, search_service_client); - search_service -} - -fn native_api( - search_service: Arc, -) -> impl Filter + Clone { - search_get_handler(search_service.clone()).or(search_post_handler(search_service)) -} - -fn es_compat_api( - search_service: Arc, - metastore: MetastoreServiceClient, -) -> impl Filter + Clone { - es_compat_search_handler(search_service.clone()) - .or(es_compat_index_search_handler(search_service.clone())) - .or(es_compat_index_count_handler(search_service.clone())) - .or(es_compat_scroll_handler(search_service.clone())) - .or(es_compat_index_multi_search_handler(search_service.clone())) - .or(es_compat_index_field_capabilities_handler( - search_service.clone(), - )) - .or(es_compat_index_stats_handler(metastore.clone())) - .or(es_compat_stats_handler(metastore.clone())) - .or(es_compat_index_cat_indices_handler(metastore.clone())) - .or(es_compat_cat_indices_handler(metastore.clone())) - .or(es_compat_resolve_index_handler(metastore.clone())) -} - -fn index_api( - metastore: MetastoreServiceClient, -) -> impl Filter + Clone { - get_index_metadata_handler(metastore) -} - -fn v1_searcher_api( - search_service: Arc, - metastore: MetastoreServiceClient, -) -> impl Filter + Clone { - warp::path!("api" / "v1" / ..) - .and( - native_api(search_service.clone()) - .or(es_compat_api(search_service, metastore.clone())) - .or(index_api(metastore)), - ) - .with(warp::filters::compression::gzip()) - .recover(|rejection| { - error!(?rejection, "request rejected"); - recover_fn(rejection) - }) -} - -pub async fn setup_searcher_api() --> anyhow::Result + Clone> { - let (node_config, storage_resolver, metastore) = - load_node_config(CONFIGURATION_TEMPLATE).await?; - - let telemetry_info = QuickwitTelemetryInfo::new( - HashSet::from_iter([QuickwitService::Searcher.as_str().to_string()]), - HashSet::from_iter([QuickwitFeature::AwsLambda]), - ); - let _telemetry_handle_opt = quickwit_telemetry::start_telemetry_loop(telemetry_info); - - let search_service = create_local_search_service( - node_config.searcher_config, - metastore.clone(), - storage_resolver, - ) - .await; - - let before_hook = warp::path::full() - .and(warp::method()) - .and_then(|route: FullPath, method: warp::http::Method| async move { - info!( - method = method.as_str(), - route = route.as_str(), - "new request" - ); - quickwit_telemetry::send_telemetry_event(TelemetryEvent::RunCommand).await; - Ok::<_, std::convert::Infallible>(()) - }) - .untuple_one(); - - let after_hook = warp::log::custom(|info| { - info!(status = info.status().as_str(), "request completed"); - }); - - let api = warp::any() - .and(before_hook) - .and(v1_searcher_api(search_service, metastore)) - .with(after_hook); - - Ok(api) -} diff --git a/quickwit/quickwit-lambda/src/searcher/environment.rs b/quickwit/quickwit-lambda/src/searcher/environment.rs deleted file mode 100644 index 25c4793e771..00000000000 --- a/quickwit/quickwit-lambda/src/searcher/environment.rs +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -pub(crate) const CONFIGURATION_TEMPLATE: &str = r#" -version: 0.8 -node_id: lambda-searcher -metastore_uri: s3://${QW_LAMBDA_METASTORE_BUCKET}/${QW_LAMBDA_METASTORE_PREFIX:-index}#polling_interval=${QW_LAMBDA_SEARCHER_METASTORE_POLLING_INTERVAL_SECONDS:-60}s -default_index_root_uri: s3://${QW_LAMBDA_INDEX_BUCKET}/${QW_LAMBDA_INDEX_PREFIX:-index} -data_dir: /tmp -searcher: - partial_request_cache_capacity: ${QW_LAMBDA_PARTIAL_REQUEST_CACHE_CAPACITY:-64M} -"#; - -#[cfg(test)] -mod tests { - - use bytesize::ByteSize; - use quickwit_config::{ConfigFormat, NodeConfig}; - - use super::*; - - #[tokio::test] - #[serial_test::file_serial(with_env)] - async fn test_load_config() { - // SAFETY: this test may not be entirely sound if not run with nextest or --test-threads=1 - // as this is only a test, and it would be extremly inconvenient to run it in a different - // way, we are keeping it that way - // file_serial may not be enough, given other tests not ran serially could read env - - let bucket = "mock-test-bucket"; - unsafe { - std::env::set_var("QW_LAMBDA_METASTORE_BUCKET", bucket); - std::env::set_var("QW_LAMBDA_INDEX_BUCKET", bucket); - std::env::set_var( - "QW_LAMBDA_INDEX_CONFIG_URI", - "s3://mock-index-config-bucket", - ); - std::env::set_var("QW_LAMBDA_INDEX_ID", "lambda-test"); - } - - let node_config = NodeConfig::load(ConfigFormat::Yaml, CONFIGURATION_TEMPLATE.as_bytes()) - .await - .unwrap(); - assert_eq!( - node_config.data_dir_path.to_string_lossy(), - "/tmp", - "only `/tmp` is writeable in AWS Lambda" - ); - assert_eq!( - node_config.default_index_root_uri, - "s3://mock-test-bucket/index" - ); - assert_eq!( - node_config.metastore_uri.to_string(), - "s3://mock-test-bucket/index#polling_interval=60s" - ); - assert_eq!( - node_config.searcher_config.partial_request_cache_capacity, - ByteSize::mb(64) - ); - - unsafe { - std::env::remove_var("QW_LAMBDA_METASTORE_BUCKET"); - std::env::remove_var("QW_LAMBDA_INDEX_BUCKET"); - std::env::remove_var("QW_LAMBDA_INDEX_CONFIG_URI"); - std::env::remove_var("QW_LAMBDA_INDEX_ID"); - } - } -} diff --git a/quickwit/quickwit-lambda/src/searcher/mod.rs b/quickwit/quickwit-lambda/src/searcher/mod.rs deleted file mode 100644 index e00d9418a6d..00000000000 --- a/quickwit/quickwit-lambda/src/searcher/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod api; -mod environment; -pub mod warp_lambda; - -pub use api::setup_searcher_api; diff --git a/quickwit/quickwit-lambda/src/searcher/warp_lambda.rs b/quickwit/quickwit-lambda/src/searcher/warp_lambda.rs deleted file mode 100644 index 03937c2ebba..00000000000 --- a/quickwit/quickwit-lambda/src/searcher/warp_lambda.rs +++ /dev/null @@ -1,233 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Based on https://github.com/aslamplr/warp_lambda under MIT license - -use core::future::Future; -use std::collections::HashSet; -use std::convert::Infallible; -use std::marker::PhantomData; -use std::pin::Pin; -use std::str::FromStr; -use std::task::{Context, Poll}; - -use anyhow::anyhow; -use lambda_http::http::HeaderValue; -use lambda_http::{ - Adapter, Body as LambdaBody, Error as LambdaError, Request, RequestExt, Response, Service, - lambda_runtime, -}; -use mime_guess::{Mime, mime}; -use once_cell::sync::Lazy; -use tracing::{Instrument, info_span}; -use warp::hyper::Body as WarpBody; -pub use {lambda_http, warp}; - -pub type WarpRequest = warp::http::Request; -pub type WarpResponse = warp::http::Response; - -static PLAINTEXT_MIMES: Lazy> = Lazy::new(|| { - HashSet::from_iter([ - mime::APPLICATION_JAVASCRIPT, - mime::APPLICATION_JAVASCRIPT_UTF_8, - mime::APPLICATION_JSON, - ]) -}); - -pub async fn run<'a, S>(service: S) -> Result<(), LambdaError> -where - S: Service + Send + 'a, - S::Future: Send + 'a, -{ - lambda_runtime::run(Adapter::from(WarpAdapter::new(service))).await -} - -#[derive(Clone)] -pub struct WarpAdapter<'a, S> -where - S: Service, - S::Future: Send + 'a, -{ - warp_service: S, - _phantom_data: PhantomData<&'a WarpResponse>, -} - -impl<'a, S> WarpAdapter<'a, S> -where - S: Service, - S::Future: Send + 'a, -{ - pub fn new(warp_service: S) -> Self { - Self { - warp_service, - _phantom_data: PhantomData, - } - } -} - -impl<'a, S> Service for WarpAdapter<'a, S> -where - S: Service + 'a, - S::Future: Send + 'a, -{ - type Response = Response; - type Error = LambdaError; - type Future = Pin> + Send + 'a>>; - - fn poll_ready(&mut self, ctx: &mut Context<'_>) -> Poll> { - self.warp_service - .poll_ready(ctx) - .map_err(|error| match error {}) - } - - fn call(&mut self, request: Request) -> Self::Future { - let query_params = request.query_string_parameters(); - let request_id = request.lambda_context().request_id.clone(); - let (parts, body) = request.into_parts(); - let mut warp_parts = lambda_parts_to_warp_parts(&parts); - let (content_len, warp_body) = match body { - LambdaBody::Empty => (0, WarpBody::empty()), - LambdaBody::Text(text) => (text.len(), WarpBody::from(text.into_bytes())), - LambdaBody::Binary(bytes) => (bytes.len(), WarpBody::from(bytes)), - }; - let mut uri = format!("http://{}{}", "127.0.0.1", parts.uri.path()); - if !query_params.is_empty() { - let url_res = reqwest::Url::parse_with_params(&uri, query_params.iter()); - if let Ok(url) = url_res { - uri = url.into(); - } else { - return Box::pin(async move { Err(anyhow!("invalid url: {uri}").into()) }); - } - } - warp_parts.uri = warp::hyper::Uri::from_str(uri.as_str()).unwrap(); - // REST API Gateways swallow the content-length header which is required - // by many Quickwit routes (`warp::body::content_length_limit(xxx)`) - if let warp::http::header::Entry::Vacant(entry) = warp_parts.headers.entry("Content-Length") - { - entry.insert(content_len.into()); - } - let warp_request = WarpRequest::from_parts(warp_parts, warp_body); - - // Call warp service with warp request, save future - let warp_fut = self.warp_service.call(warp_request); - - // Create lambda future - let fut = async move { - let warp_response = warp_fut.await?; - let (warp_parts, warp_body): (_, _) = warp_response.into_parts(); - let parts = warp_parts_to_lambda_parts(&warp_parts); - let body = warp_body_to_lambda_body(&parts, warp_body).await?; - let lambda_response = Response::from_parts(parts, body); - Ok::(lambda_response) - } - .instrument(info_span!("searcher request", request_id)); - Box::pin(fut) - } -} - -fn lambda_parts_to_warp_parts( - parts: &lambda_http::http::request::Parts, -) -> warp::http::request::Parts { - let mut builder = warp::http::Request::builder() - .method(lambda_method_to_warp_method(&parts.method)) - .uri(lambda_uri_to_warp_uri(&parts.uri)) - .version(lambda_version_to_warp_version(parts.version)); - - for (name, value) in parts.headers.iter() { - builder = builder.header(name.as_str(), value.as_bytes()); - } - let request = builder.body(()).unwrap(); - let (parts, _) = request.into_parts(); - parts -} - -fn warp_parts_to_lambda_parts( - parts: &warp::http::response::Parts, -) -> lambda_http::http::response::Parts { - let mut builder = lambda_http::http::Response::builder() - .status(parts.status.as_u16()) - .version(warp_version_to_lambda_version(parts.version)); - - for (name, value) in parts.headers.iter() { - builder = builder.header(name.as_str(), value.as_bytes()); - } - let response = builder.body(()).unwrap(); - let (parts, _) = response.into_parts(); - parts -} - -async fn warp_body_to_lambda_body( - parts: &lambda_http::http::response::Parts, - warp_body: WarpBody, -) -> Result { - // Concatenate all bytes into a single buffer - let body_bytes = warp::hyper::body::to_bytes(warp_body).await?.to_vec(); - - // Attempt to determine the Content-Type - let content_type_opt: Option<&HeaderValue> = parts.headers.get("Content-Type"); - let content_encoding_opt: Option<&HeaderValue> = parts.headers.get("Content-Encoding"); - - // If Content-Encoding is present, assume compression - // If Content-Type is not present, don't assume is a string - if let (Some(content_type), None) = (content_type_opt, content_encoding_opt) { - let content_type_str = content_type.to_str()?; - let mime = content_type_str.parse::()?; - - if PLAINTEXT_MIMES.contains(&mime) || mime.type_() == mime::TEXT { - return Ok(LambdaBody::Text(String::from_utf8(body_bytes)?)); - } - } - // Not a text response, make binary - Ok(LambdaBody::Binary(body_bytes)) -} - -fn lambda_method_to_warp_method(method: &lambda_http::http::Method) -> warp::http::Method { - method.as_str().parse::().unwrap() -} - -fn lambda_uri_to_warp_uri(uri: &lambda_http::http::Uri) -> warp::http::Uri { - uri.to_string().parse::().unwrap() -} - -fn lambda_version_to_warp_version(version: lambda_http::http::Version) -> warp::http::Version { - if version == lambda_http::http::Version::HTTP_09 { - warp::http::Version::HTTP_09 - } else if version == lambda_http::http::Version::HTTP_10 { - warp::http::Version::HTTP_10 - } else if version == lambda_http::http::Version::HTTP_11 { - warp::http::Version::HTTP_11 - } else if version == lambda_http::http::Version::HTTP_2 { - warp::http::Version::HTTP_2 - } else if version == lambda_http::http::Version::HTTP_3 { - warp::http::Version::HTTP_3 - } else { - panic!("invalid HTTP version: {version:?}"); - } -} - -fn warp_version_to_lambda_version(version: warp::http::Version) -> lambda_http::http::Version { - if version == warp::http::Version::HTTP_09 { - lambda_http::http::Version::HTTP_09 - } else if version == warp::http::Version::HTTP_10 { - lambda_http::http::Version::HTTP_10 - } else if version == warp::http::Version::HTTP_11 { - lambda_http::http::Version::HTTP_11 - } else if version == warp::http::Version::HTTP_2 { - lambda_http::http::Version::HTTP_2 - } else if version == warp::http::Version::HTTP_3 { - lambda_http::http::Version::HTTP_3 - } else { - panic!("invalid HTTP version: {version:?}"); - } -} diff --git a/quickwit/quickwit-lambda/src/utils.rs b/quickwit/quickwit-lambda/src/utils.rs deleted file mode 100644 index 84ce69e3c27..00000000000 --- a/quickwit/quickwit-lambda/src/utils.rs +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::atomic::AtomicU32; -use std::sync::atomic::Ordering::SeqCst; - -use anyhow::Context; -use quickwit_config::{ConfigFormat, NodeConfig}; -use quickwit_metastore::MetastoreResolver; -use quickwit_proto::metastore::MetastoreServiceClient; -use quickwit_storage::StorageResolver; -use tracing::info; - -pub(crate) async fn load_node_config( - config_template: &str, -) -> anyhow::Result<(NodeConfig, StorageResolver, MetastoreServiceClient)> { - let config = NodeConfig::load(ConfigFormat::Yaml, config_template.as_bytes()) - .await - .with_context(|| format!("Failed to parse node config `{config_template}`."))?; - info!(config=?config, "loaded node config"); - let storage_resolver = StorageResolver::configured(&config.storage_configs); - let metastore_resolver = - MetastoreResolver::configured(storage_resolver.clone(), &config.metastore_configs); - let metastore: MetastoreServiceClient = - metastore_resolver.resolve(&config.metastore_uri).await?; - Ok((config, storage_resolver, metastore)) -} - -static CONTAINER_ID: AtomicU32 = AtomicU32::new(0); - -pub struct LambdaContainerContext { - pub container_id: u32, - pub cold: bool, -} - -impl LambdaContainerContext { - /// Configure and return the Lambda container context. - /// - /// The `cold` field returned will be `true` only the first time this - /// function is called. - pub fn load() -> Self { - let mut container_id = CONTAINER_ID.load(SeqCst); - let mut cold = false; - if container_id == 0 { - container_id = rand::random(); - CONTAINER_ID.store(container_id, SeqCst); - cold = true; - } - Self { container_id, cold } - } -} From 9eeeea592b92ca7cd676b7056dc405f1f0b31381 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Sep 2025 12:19:14 -0400 Subject: [PATCH 4/9] Bump the github-actions group with 2 updates (#5882) Bumps the github-actions group with 2 updates: [actions/github-script](https://github.com/actions/github-script) and [actions/setup-node](https://github.com/actions/setup-node). Updates `actions/github-script` from 7 to 8 - [Release notes](https://github.com/actions/github-script/releases) - [Commits](https://github.com/actions/github-script/compare/v7...v8) Updates `actions/setup-node` from 4 to 5 - [Release notes](https://github.com/actions/setup-node/releases) - [Commits](https://github.com/actions/setup-node/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/github-script dependency-version: '8' dependency-type: direct:production update-type: version-update:semver-major dependency-group: github-actions - dependency-name: actions/setup-node dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major dependency-group: github-actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/cbench.yml | 2 +- .github/workflows/ui-ci.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cbench.yml b/.github/workflows/cbench.yml index 79fbed7ff8b..6a966489274 100644 --- a/.github/workflows/cbench.yml +++ b/.github/workflows/cbench.yml @@ -103,7 +103,7 @@ jobs: run: | echo "::error title=User not allowed to run the benchmark::User must be in list ${{ steps.authorized-users.outputs.users }}" - name: Add a PR comment with comparison results - uses: actions/github-script@v7 + uses: actions/github-script@v8 if: contains(fromJSON(steps.authorized-users.outputs.users), github.actor) && github.event_name == 'pull_request_target' # Inspired from: https://github.com/actions/github-script/blob/60a0d83039c74a4aee543508d2ffcb1c3799cdea/.github/workflows/pull-request-test.yml with: diff --git a/.github/workflows/ui-ci.yml b/.github/workflows/ui-ci.yml index 5979517a96d..5c6e9d68e14 100644 --- a/.github/workflows/ui-ci.yml +++ b/.github/workflows/ui-ci.yml @@ -59,7 +59,7 @@ jobs: QW_TEST_DATABASE_URL: postgres://quickwit-dev:quickwit-dev@postgres:5432/quickwit-metastore-dev steps: - uses: actions/checkout@v5 - - uses: actions/setup-node@v4 + - uses: actions/setup-node@v5 with: node-version: 20 cache: "yarn" From 4a1d9cdc4d1dd242cc80d3fc11e8c18abd894e48 Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Tue, 9 Sep 2025 14:27:31 -0400 Subject: [PATCH 5/9] Remove search stream endpoint (#5886) --- CHANGELOG.md | 2 +- docs/configuration/node-config.md | 3 +- .../add-full-text-search-to-your-olap-db.md | 254 ------ docs/overview/introduction.md | 1 - docs/reference/rest-api.md | 55 -- quickwit/quickwit-cli/tests/cli.rs | 12 - .../quickwit-config/src/node_config/mod.rs | 16 +- .../src/node_config/serialize.rs | 2 +- .../protos/quickwit/search.proto | 73 -- .../src/codegen/quickwit/quickwit.search.rs | 182 ---- quickwit/quickwit-proto/src/lib.rs | 15 - quickwit/quickwit-search/src/client.rs | 62 +- .../quickwit-search/src/cluster_client.rs | 170 +--- quickwit/quickwit-search/src/filters.rs | 173 ---- quickwit/quickwit-search/src/leaf.rs | 21 - quickwit/quickwit-search/src/lib.rs | 3 - quickwit/quickwit-search/src/retry/mod.rs | 1 - .../src/retry/search_stream.rs | 128 --- .../src/search_stream/collector.rs | 241 ------ .../quickwit-search/src/search_stream/leaf.rs | 792 ------------------ .../quickwit-search/src/search_stream/mod.rs | 183 ---- .../quickwit-search/src/search_stream/root.rs | 386 --------- quickwit/quickwit-search/src/service.rs | 64 +- quickwit/quickwit-serve/src/rest.rs | 2 - .../src/search_api/grpc_adapter.rs | 31 +- quickwit/quickwit-serve/src/search_api/mod.rs | 125 --- .../src/search_api/rest_handler.rs | 282 +------ 27 files changed, 23 insertions(+), 3256 deletions(-) delete mode 100644 docs/guides/add-full-text-search-to-your-olap-db.md delete mode 100644 quickwit/quickwit-search/src/filters.rs delete mode 100644 quickwit/quickwit-search/src/retry/search_stream.rs delete mode 100644 quickwit/quickwit-search/src/search_stream/collector.rs delete mode 100644 quickwit/quickwit-search/src/search_stream/leaf.rs delete mode 100644 quickwit/quickwit-search/src/search_stream/mod.rs delete mode 100644 quickwit/quickwit-search/src/search_stream/root.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index e149eea60dc..6cf3abcc4f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -91,7 +91,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove support for 2-digit years in java datetime parser (#5596) - Remove DocMapper trait (#5508) - Remove support for AWS Lambda (#5884) - +- Remove search stream endpoint (#5886) # [0.8.1] diff --git a/docs/configuration/node-config.md b/docs/configuration/node-config.md index 03bdb9164d6..ed4bca51517 100644 --- a/docs/configuration/node-config.md +++ b/docs/configuration/node-config.md @@ -198,11 +198,10 @@ This section contains the configuration options for a Searcher. | --- | --- | --- | | `aggregation_memory_limit` | Controls the maximum amount of memory that can be used for aggregations before aborting. This limit is per searcher node. A node may run concurrent queries, which share the limit. The first query that will hit the limit will be aborted and frees its memory. It is used to prevent excessive memory usage during the aggregation phase, which can lead to performance degradation or crashes. | `500M`| | `aggregation_bucket_limit` | Determines the maximum number of buckets returned to the client. | `65000` | -| `fast_field_cache_capacity` | Fast field in memory cache capacity on a Searcher. If your filter by dates, run aggregations, range queries, or if you use the search stream API, or even for tracing, it might worth increasing this parameter. The [metrics](../reference/metrics.md) starting by `quickwit_cache_fastfields_cache` can help you make an informed choice when setting this value. | `1G` | +| `fast_field_cache_capacity` | Fast field in memory cache capacity on a Searcher. If your filter by dates, run aggregations, range queries, or even for tracing, it might worth increasing this parameter. The [metrics](../reference/metrics.md) starting by `quickwit_cache_fastfields_cache` can help you make an informed choice when setting this value. | `1G` | | `split_footer_cache_capacity` | Split footer in memory cache (it is essentially the hotcache) capacity on a Searcher.| `500M` | | `partial_request_cache_capacity` | Partial request in memory cache capacity on a Searcher. Cache intermediate state for a request, possibly making subsequent requests faster. It can be disabled by setting the size to `0`. | `64M` | | `max_num_concurrent_split_searches` | Maximum number of concurrent split search requests running on a Searcher. | `100` | -| `max_num_concurrent_split_streams` | Maximum number of concurrent split stream requests running on a Searcher. | `100` | | `split_cache` | Searcher split cache configuration options defined in the section below. Cache disabled if unspecified. | | | `request_timeout_secs` | The time before a search request is cancelled. This should match the timeout of the stack calling into quickwit if there is one set. | `30` | diff --git a/docs/guides/add-full-text-search-to-your-olap-db.md b/docs/guides/add-full-text-search-to-your-olap-db.md deleted file mode 100644 index 999a831cda5..00000000000 --- a/docs/guides/add-full-text-search-to-your-olap-db.md +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: Full-text search on ClickHouse -description: Add full-text search to ClickHouse, using the Quickwit search streaming feature. -tags: [clickhouse, integration] -icon_url: /img/tutorials/clickhouse.svg -sidebar_position: 10 ---- - - -This guide will help you add full-text search to a well-known OLAP database, ClickHouse, using the Quickwit search streaming feature. Indeed Quickwit exposes a REST endpoint that streams ids or whatever attributes matching a search query **extremely fast** (up to 50 million in 1 second), and ClickHouse can easily use them with joins queries. - -We will take the [GitHub archive dataset](https://www.gharchive.org/), which gathers more than 3 billion GitHub events: `PullRequestEvent`, `IssuesEvent`... You can dive into this [great analysis](https://ghe.clickhouse.tech/) made by ClickHouse to have a good understanding of the dataset. We also took strong inspiration from this work, and we are very grateful to them for sharing this. - -## Install - -```bash -curl -L https://install.quickwit.io | sh -cd quickwit-v*/ -``` - -## Start a Quickwit server - -```bash -./quickwit run -``` - -## Create a Quickwit index - -After [starting Quickwit], we need to create an index configured to receive these events. Let's first look at the data to ingest. Here is an event example: - -```JSON -{ - "id": 11410577343, - "event_type": "PullRequestEvent", - "actor_login": "renovate[bot]", - "repo_name": "dmtrKovalenko/reason-date-fns", - "created_at": 1580515200000, - "action": "closed", - "number": 44, - "title": "Update dependency rollup to ^1.31.0", - "labels": [], - "ref": null, - "additions": 5, - "deletions": 5, - "commit_id": null, - "body":"This PR contains the following updates..." -} -``` - -We don't need to index all fields described above as `title` and `body` are the fields of interest for our full-text search tutorial. -The `id` will be helpful for making the JOINs in ClickHouse, `created_at` and `event_type` may also be beneficial for timestamp pruning and filtering. - -```yaml title="gh-archive-index-config.yaml" -version: 0.7 -index_id: gh-archive -# By default, the index will be stored in your data directory, -# but you can store it on s3 or on a custom path as follows: -# index_uri: s3://my-bucket/gh-archive -# index_uri: file://my-big-ssd-harddrive/ -doc_mapping: - store_source: false - field_mappings: - - name: id - type: u64 - fast: true - - name: created_at - type: datetime - input_formats: - - unix_timestamp - output_format: unix_timestamp_secs - fast_precision: seconds - fast: true - - name: event_type - type: text - tokenizer: raw - - name: title - type: text - tokenizer: default - record: position - - name: body - type: text - tokenizer: default - record: position - timestamp_field: created_at - -search_settings: - default_search_fields: [title, body] -``` - -```bash -curl -o gh-archive-index-config.yaml https://raw.githubusercontent.com/quickwit-oss/quickwit/main/config/tutorials/gh-archive/index-config-for-clickhouse.yaml -./quickwit index create --index-config gh-archive-index-config.yaml -``` - -## Indexing events - -The dataset is a compressed [NDJSON file](https://quickwit-datasets-public.s3.amazonaws.com/gh-archive/gh-archive-2021-12.json.gz). -Let's index it. - -```bash -wget https://quickwit-datasets-public.s3.amazonaws.com/gh-archive/gh-archive-2021-12-text-only.json.gz -gunzip -c gh-archive-2021-12-text-only.json.gz | ./quickwit index ingest --index gh-archive -``` - -You can check it's working by using the `search` command and looking for `tantivy` word: -```bash -./quickwit index search --index gh-archive --query "tantivy" -``` - - -## Streaming IDs - -We are now ready to fetch some ids with the search stream endpoint. Let's start by streaming them on a simple -query and with a `csv` output format. - -```bash -curl "http://127.0.0.1:7280/api/v1/gh-archive/search/stream?query=tantivy&output_format=csv&fast_field=id" -``` - -We will use the `click_house` binary output format in the following sections to speed up queries. - - -## ClickHouse - -Let's leave Quickwit for now and [install ClickHouse](https://clickhouse.com/docs/en/install). Start a ClickHouse server. - -### Create database and table - -Once installed, just start a client and execute the following sql statements: -```SQL -CREATE DATABASE "gh-archive"; -USE "gh-archive"; - - -CREATE TABLE github_events -( - id UInt64, - event_type Enum('CommitCommentEvent' = 1, 'CreateEvent' = 2, 'DeleteEvent' = 3, 'ForkEvent' = 4, - 'GollumEvent' = 5, 'IssueCommentEvent' = 6, 'IssuesEvent' = 7, 'MemberEvent' = 8, - 'PublicEvent' = 9, 'PullRequestEvent' = 10, 'PullRequestReviewCommentEvent' = 11, - 'PushEvent' = 12, 'ReleaseEvent' = 13, 'SponsorshipEvent' = 14, 'WatchEvent' = 15, - 'GistEvent' = 16, 'FollowEvent' = 17, 'DownloadEvent' = 18, 'PullRequestReviewEvent' = 19, - 'ForkApplyEvent' = 20, 'Event' = 21, 'TeamAddEvent' = 22), - actor_login LowCardinality(String), - repo_name LowCardinality(String), - created_at Int64, - action Enum('none' = 0, 'created' = 1, 'added' = 2, 'edited' = 3, 'deleted' = 4, 'opened' = 5, 'closed' = 6, 'reopened' = 7, 'assigned' = 8, 'unassigned' = 9, - 'labeled' = 10, 'unlabeled' = 11, 'review_requested' = 12, 'review_request_removed' = 13, 'synchronize' = 14, 'started' = 15, 'published' = 16, 'update' = 17, 'create' = 18, 'fork' = 19, 'merged' = 20), - comment_id UInt64, - body String, - ref LowCardinality(String), - number UInt32, - title String, - labels Array(LowCardinality(String)), - additions UInt32, - deletions UInt32, - commit_id String -) ENGINE = MergeTree ORDER BY (event_type, repo_name, created_at); -``` - -### Import events - -We have created a second dataset, `gh-archive-2021-12.json.gz`, which gathers all events, even ones with no -text. So it's better to insert it into ClickHouse, but if you don't have the time, you can use the dataset -`gh-archive-2021-12-text-only.json.gz` used for Quickwit. - -```bash -wget https://quickwit-datasets-public.s3.amazonaws.com/gh-archive/gh-archive-2021-12.json.gz -gunzip -c gh-archive-2021-12.json.gz | clickhouse-client -d gh-archive --query="INSERT INTO github_events FORMAT JSONEachRow" -``` - -Let's check it's working: -```SQL -# Top repositories by stars -SELECT repo_name, count() AS stars -FROM github_events -GROUP BY repo_name -ORDER BY stars DESC LIMIT 5 - -┌─repo_name─────────────────────────────────┬─stars─┐ -│ test-organization-kkjeer/app-test-2 │ 16697 │ -│ test-organization-kkjeer/bot-validation-2 │ 15326 │ -│ microsoft/winget-pkgs │ 14099 │ -│ conda-forge/releases │ 13332 │ -│ NixOS/nixpkgs │ 12860 │ -└───────────────────────────────────────────┴───────┘ -``` - -### Use Quickwit search inside ClickHouse - -ClickHouse has an exciting feature called [URL Table Engine](https://clickhouse.com/docs/en/engines/table-engines/special/url/) that queries data from a remote HTTP/HTTPS server. -This is precisely what we need: by creating a table pointing to Quickwit search stream endpoint, we will fetch ids that match a query from ClickHouse. - -```SQL -SELECT count(*) FROM url('http://127.0.0.1:7280/api/v1/gh-archive/search/stream?query=log4j+OR+log4shell&fast_field=id&output_format=click_house_row_binary', RowBinary, 'id UInt64') - -┌─count()─┐ -│ 217469 │ -└─────────┘ - -1 row in set. Elapsed: 0.068 sec. Processed 217.47 thousand rows, 1.74 MB (3.19 million rows/s., 25.55 MB/s.) -``` - -We are fetching 217 469 u64 ids in 0.068 seconds. That's 3.19 million rows per second, not bad. And it's possible to increase the throughput if fast field are already cached. - - -Let's do another example with a more exciting query that will match `log4j` or `log4shell` and count events per day: - -```SQL -SELECT - count(*), - toDate(fromUnixTimestamp64Milli(created_at)) AS date -FROM github_events -WHERE id IN ( - SELECT id - FROM url('http://127.0.0.1:7280/api/v1/gh-archive/search/stream?query=log4j+OR+log4shell&fast_field=id&output_format=click_house_row_binary', RowBinary, 'id UInt64') -) -GROUP BY date - -Query id: 10cb0d5a-7817-424e-8248-820fa2c425b8 - -┌─count()─┬───────date─┐ -│ 96 │ 2021-12-01 │ -│ 66 │ 2021-12-02 │ -│ 70 │ 2021-12-03 │ -│ 62 │ 2021-12-04 │ -│ 67 │ 2021-12-05 │ -│ 167 │ 2021-12-06 │ -│ 140 │ 2021-12-07 │ -│ 104 │ 2021-12-08 │ -│ 157 │ 2021-12-09 │ -│ 88110 │ 2021-12-10 │ -│ 2937 │ 2021-12-11 │ -│ 1533 │ 2021-12-12 │ -│ 5935 │ 2021-12-13 │ -│ 118025 │ 2021-12-14 │ -└─────────┴────────────┘ - -14 rows in set. Elapsed: 0.124 sec. Processed 8.35 million rows, 123.10 MB (67.42 million rows/s., 993.55 MB/s.) - -``` - -We can see two spikes on the 2021-12-10 and 2021-12-14. - -## Wrapping up - -We have just scratched the surface of full-text search from ClickHouse with this small subset of GitHub archive. -You can play with the complete dataset that you can download from our public S3 bucket. -We have made available monthly gzipped ndjson files from 2015 until 2021. Here are `2015-01` links: -- full JSON dataset https://quickwit-datasets-public.s3.amazonaws.com/gh-archive/gh-archive-2015-01.json.gz -- text-only JSON dataset https://quickwit-datasets-public.s3.amazonaws.com/gh-archive/gh-archive-2015-01-text-only.json.gz - -The search stream endpoint is powerful enough to stream 100 million ids to ClickHouse in less than 2 seconds on a multi TB dataset. -And you should be comfortable playing with search stream on even bigger datasets. diff --git a/docs/overview/introduction.md b/docs/overview/introduction.md index db84adfda6e..a219a8e82aa 100644 --- a/docs/overview/introduction.md +++ b/docs/overview/introduction.md @@ -27,7 +27,6 @@ Check out our guides to see how you can use Quickwit: - [Log management](../log-management/overview.md) - [Distributed Tracing](../distributed-tracing/overview.md) -- Adding full-text search capabilities to [OLAP databases such as ClickHouse](../guides/add-full-text-search-to-your-olap-db). ## Key features diff --git a/docs/reference/rest-api.md b/docs/reference/rest-api.md index 5047afd6308..54ac05c040b 100644 --- a/docs/reference/rest-api.md +++ b/docs/reference/rest-api.md @@ -113,61 +113,6 @@ GET api/v1/stackoverflow*/search } ``` -### Search stream in an index - -``` -GET api/v1//search/stream?query=searchterm&fast_field=my_id -``` - -Streams field values from ALL documents matching a search query in the target index ``, in a specified output format among the following: - -- [CSV](https://datatracker.ietf.org/doc/html/rfc4180) -- [ClickHouse RowBinary](https://clickhouse.tech/docs/en/interfaces/formats/#rowbinary). If `partition_by_field` is set, Quickwit returns chunks of data for each partition field value. Each chunk starts with 16 bytes being partition value and content length and then the `fast_field` values in `RowBinary` format. - -`fast_field` and `partition_by_field` must be fast fields of type `i64` or `u64`. - -This endpoint is available as long as you have at least one node running a searcher service in the cluster. - - - -:::note - -The endpoint will return 10 million values if 10 million documents match the query. This is expected, this endpoint is made to support queries matching millions of documents and return field values in a reasonable response time. - -::: - -#### Path variable - -| Variable | Description | -| ------------- | ------------- | -| `index id` | The index id | - -#### Get parameters - -| Variable | Type | Description | Default value | -|---------------------|------------|----------------------------------------------------------------------------------------------------------|----------------------------------------------------| -| `query` | `String` | Query text. See the [query language doc](query-language.md) | _required_ | -| `fast_field` | `String` | Name of a field to retrieve from documents. This field must be a fast field of type `i64` or `u64`. | _required_ | -| `search_field` | `[String]` | Fields to search on. Comma-separated list, e.g. "field1,field2" | index_config.search_settings.default_search_fields | -| `start_timestamp` | `i64` | If set, restrict search to documents with a `timestamp >= start_timestamp`. The value must be in seconds. | | -| `end_timestamp` | `i64` | If set, restrict search to documents with a `timestamp < end_timestamp`. The value must be in seconds. | | -| `partition_by_field` | `String` | If set, the endpoint returns chunks of data for each partition field value. This field must be a fast field of type `i64` or `u64`. | | -| `output_format` | `String` | Response output format. `csv` or `clickHouseRowBinary` | `csv` | - -:::info -The `start_timestamp` and `end_timestamp` should be specified in seconds regardless of the timestamp field precision. -::: - -#### Response - -The response is an HTTP stream. Depending on the client's capability, it is an HTTP1.1 [chunked transfer encoded stream](https://en.wikipedia.org/wiki/Chunked_transfer_encoding) or an HTTP2 stream. - -It returns a list of all the field values from documents matching the query. The field must be marked as "fast" in the index config for this to work. -The formatting is based on the specified output format. - -On error, an "X-Stream-Error" header will be sent via the trailers channel with information about the error, and the stream will be closed via [`sender.abort()`](https://docs.rs/hyper/0.14.16/hyper/body/struct.Sender.html#method.abort). -Depending on the client, the trailer header with error details may not be shown. The error will also be logged in quickwit ("Error when streaming search results"). - ## Ingest API ### Ingest data into an index diff --git a/quickwit/quickwit-cli/tests/cli.rs b/quickwit/quickwit-cli/tests/cli.rs index 3ecc473a7a8..bbd416cfd7f 100644 --- a/quickwit/quickwit-cli/tests/cli.rs +++ b/quickwit/quickwit-cli/tests/cli.rs @@ -960,18 +960,6 @@ async fn test_all_local_index() { let result: Value = serde_json::from_str(&query_response).unwrap(); assert_eq!(result["num_hits"], Value::Number(Number::from(2i64))); - let search_stream_response = reqwest::get(format!( - "http://127.0.0.1:{}/api/v1/{}/search/stream?query=level:info&output_format=csv&fast_field=ts", - test_env.rest_listen_port, - test_env.index_id - )) - .await - .unwrap() - .text() - .await - .unwrap(); - assert_eq!(search_stream_response, "72057597000000\n72057608000000\n"); - let args = DeleteIndexArgs { client_args: test_env.default_client_args(), index_id, diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index ee2666f944e..5b5176fda7f 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -268,7 +268,9 @@ pub struct SearcherConfig { pub split_footer_cache_capacity: ByteSize, pub partial_request_cache_capacity: ByteSize, pub max_num_concurrent_split_searches: usize, - pub max_num_concurrent_split_streams: usize, + // Deprecated: stream search requests are no longer supported. + #[serde(alias = "max_num_concurrent_split_streams", default, skip_serializing)] + pub _max_num_concurrent_split_streams: Option, // Strangely, if None, this will also have the effect of not forwarding // to searcher. // TODO document and fix if necessary. @@ -322,8 +324,8 @@ impl Default for SearcherConfig { fast_field_cache_capacity: ByteSize::gb(1), split_footer_cache_capacity: ByteSize::mb(500), partial_request_cache_capacity: ByteSize::mb(64), - max_num_concurrent_split_streams: 100, max_num_concurrent_split_searches: 100, + _max_num_concurrent_split_streams: None, aggregation_memory_limit: ByteSize::mb(500), aggregation_bucket_limit: 65000, split_cache: None, @@ -355,16 +357,6 @@ impl SearcherConfig { split_cache_limits.max_file_descriptors ); } - if self.max_num_concurrent_split_streams - > split_cache_limits.max_file_descriptors.get() as usize - { - anyhow::bail!( - "max_num_concurrent_split_streams ({}) must be lower or equal to \ - split_cache.max_file_descriptors ({})", - self.max_num_concurrent_split_streams, - split_cache_limits.max_file_descriptors - ); - } if self.warmup_single_split_initial_allocation > self.warmup_memory_budget { anyhow::bail!( "warmup_single_split_initial_allocation ({}) must be lower or equal to \ diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs index 19f5e13865c..ab8ae75243d 100644 --- a/quickwit/quickwit-config/src/node_config/serialize.rs +++ b/quickwit/quickwit-config/src/node_config/serialize.rs @@ -662,7 +662,7 @@ mod tests { split_footer_cache_capacity: ByteSize::gb(1), partial_request_cache_capacity: ByteSize::mb(64), max_num_concurrent_split_searches: 150, - max_num_concurrent_split_streams: 120, + _max_num_concurrent_split_streams: Some(serde::de::IgnoredAny), split_cache: None, request_timeout_secs: NonZeroU64::new(30).unwrap(), storage_timeout_policy: Some(crate::StorageTimeoutPolicy { diff --git a/quickwit/quickwit-proto/protos/quickwit/search.proto b/quickwit/quickwit-proto/protos/quickwit/search.proto index ca96ad13e60..be5fbf3ca9d 100644 --- a/quickwit/quickwit-proto/protos/quickwit/search.proto +++ b/quickwit/quickwit-proto/protos/quickwit/search.proto @@ -37,9 +37,6 @@ service SearchService { /// This methods takes `PartialHit`s and returns `Hit`s. rpc FetchDocs(FetchDocsRequest) returns (FetchDocsResponse); - // Perform a leaf stream on a given set of splits. - rpc LeafSearchStream(LeafSearchStreamRequest) returns (stream LeafSearchStreamResponse); - // Root list terms API. // This RPC identifies the set of splits on which the query should run on, // and dispatches the several calls to `LeafListTerms`. @@ -578,73 +575,3 @@ message LeafListTermsResponse { // Total number of single split search attempted. uint64 num_attempted_splits = 4; } - -// -- Stream ------------------- - -enum OutputFormat { - // Comma Separated Values format (https://datatracker.ietf.org/doc/html/rfc4180). - // The delimiter is `,`. - CSV = 0; //< This will be the default value - // Format data by row in ClickHouse binary format. - // https://clickhouse.tech/docs/en/interfaces/formats/#rowbinary - CLICK_HOUSE_ROW_BINARY = 1; -} - -message SearchStreamRequest { - // Index ID - string index_id = 1; - - // deprecated `query`. - reserved 2; - - // Quickwit Query AST encoded in Json - string query_ast = 11; - - // deprecated `search_fields` - reserved 3; - - // The time filter is interpreted as a semi-open interval. [start, end) - optional int64 start_timestamp = 4; - optional int64 end_timestamp = 5; - - // Name of the fast field to extract - string fast_field = 6; - - // The output format - OutputFormat output_format = 7; - - reserved 8; // deprecated field: tags - - // The field by which we want to partition - optional string partition_by_field = 9; - - // Fields to extract snippet on. - repeated string snippet_fields = 10; -} - -message LeafSearchStreamRequest { - // Stream request. This is a perfect copy of the original stream request, - // that was sent to root. - SearchStreamRequest request = 1; - - // Index split ids to apply the query on. - // This ids are resolved from the index_uri defined in the stream request. - repeated SplitIdAndFooterOffsets split_offsets = 2; - - // `DocMapper` as json serialized trait. - string doc_mapper = 5; - - // Index URI. The index URI defines the location of the storage that contains the - // split files. - string index_uri = 6; - -} - - -message LeafSearchStreamResponse { - // Row of data serialized in bytes. - bytes data = 1; - - // Split id. - string split_id = 2; -} diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs index 6d4b8ae137a..cd051fb9c33 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs @@ -556,62 +556,6 @@ pub struct LeafListTermsResponse { pub num_attempted_splits: u64, } #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct SearchStreamRequest { - /// Index ID - #[prost(string, tag = "1")] - pub index_id: ::prost::alloc::string::String, - /// Quickwit Query AST encoded in Json - #[prost(string, tag = "11")] - pub query_ast: ::prost::alloc::string::String, - /// The time filter is interpreted as a semi-open interval. [start, end) - #[prost(int64, optional, tag = "4")] - pub start_timestamp: ::core::option::Option, - #[prost(int64, optional, tag = "5")] - pub end_timestamp: ::core::option::Option, - /// Name of the fast field to extract - #[prost(string, tag = "6")] - pub fast_field: ::prost::alloc::string::String, - /// The output format - #[prost(enumeration = "OutputFormat", tag = "7")] - pub output_format: i32, - /// The field by which we want to partition - #[prost(string, optional, tag = "9")] - pub partition_by_field: ::core::option::Option<::prost::alloc::string::String>, - /// Fields to extract snippet on. - #[prost(string, repeated, tag = "10")] - pub snippet_fields: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, -} -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LeafSearchStreamRequest { - /// Stream request. This is a perfect copy of the original stream request, - /// that was sent to root. - #[prost(message, optional, tag = "1")] - pub request: ::core::option::Option, - /// Index split ids to apply the query on. - /// This ids are resolved from the index_uri defined in the stream request. - #[prost(message, repeated, tag = "2")] - pub split_offsets: ::prost::alloc::vec::Vec, - /// `DocMapper` as json serialized trait. - #[prost(string, tag = "5")] - pub doc_mapper: ::prost::alloc::string::String, - /// Index URI. The index URI defines the location of the storage that contains the - /// split files. - #[prost(string, tag = "6")] - pub index_uri: ::prost::alloc::string::String, -} -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct LeafSearchStreamResponse { - /// Row of data serialized in bytes. - #[prost(bytes = "vec", tag = "1")] - pub data: ::prost::alloc::vec::Vec, - /// Split id. - #[prost(string, tag = "2")] - pub split_id: ::prost::alloc::string::String, -} -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[serde(rename_all = "snake_case")] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] @@ -757,40 +701,6 @@ impl SortDatetimeFormat { } } } -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[serde(rename_all = "snake_case")] -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] -#[repr(i32)] -pub enum OutputFormat { - /// Comma Separated Values format (). - /// The delimiter is `,`. - /// - /// < This will be the default value - Csv = 0, - /// Format data by row in ClickHouse binary format. - /// - ClickHouseRowBinary = 1, -} -impl OutputFormat { - /// String value of the enum field names used in the ProtoBuf definition. - /// - /// The values are not transformed in any way and thus are considered stable - /// (if the ProtoBuf definition does not change) and safe for programmatic use. - pub fn as_str_name(&self) -> &'static str { - match self { - Self::Csv => "CSV", - Self::ClickHouseRowBinary => "CLICK_HOUSE_ROW_BINARY", - } - } - /// Creates an enum from field names used in the ProtoBuf definition. - pub fn from_str_name(value: &str) -> ::core::option::Option { - match value { - "CSV" => Some(Self::Csv), - "CLICK_HOUSE_ROW_BINARY" => Some(Self::ClickHouseRowBinary), - _ => None, - } - } -} /// Generated client implementations. pub mod search_service_client { #![allow( @@ -965,33 +875,6 @@ pub mod search_service_client { .insert(GrpcMethod::new("quickwit.search.SearchService", "FetchDocs")); self.inner.unary(req, path, codec).await } - /// Perform a leaf stream on a given set of splits. - pub async fn leaf_search_stream( - &mut self, - request: impl tonic::IntoRequest, - ) -> std::result::Result< - tonic::Response>, - tonic::Status, - > { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; - let codec = tonic::codec::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/quickwit.search.SearchService/LeafSearchStream", - ); - let mut req = request.into_request(); - req.extensions_mut() - .insert( - GrpcMethod::new("quickwit.search.SearchService", "LeafSearchStream"), - ); - self.inner.server_streaming(req, path, codec).await - } /// Root list terms API. /// This RPC identifies the set of splits on which the query should run on, /// and dispatches the several calls to `LeafListTerms`. @@ -1271,23 +1154,6 @@ pub mod search_service_server { tonic::Response, tonic::Status, >; - /// Server streaming response type for the LeafSearchStream method. - type LeafSearchStreamStream: tonic::codegen::tokio_stream::Stream< - Item = std::result::Result< - super::LeafSearchStreamResponse, - tonic::Status, - >, - > - + std::marker::Send - + 'static; - /// Perform a leaf stream on a given set of splits. - async fn leaf_search_stream( - &self, - request: tonic::Request, - ) -> std::result::Result< - tonic::Response, - tonic::Status, - >; /// Root list terms API. /// This RPC identifies the set of splits on which the query should run on, /// and dispatches the several calls to `LeafListTerms`. @@ -1571,54 +1437,6 @@ pub mod search_service_server { }; Box::pin(fut) } - "/quickwit.search.SearchService/LeafSearchStream" => { - #[allow(non_camel_case_types)] - struct LeafSearchStreamSvc(pub Arc); - impl< - T: SearchService, - > tonic::server::ServerStreamingService< - super::LeafSearchStreamRequest, - > for LeafSearchStreamSvc { - type Response = super::LeafSearchStreamResponse; - type ResponseStream = T::LeafSearchStreamStream; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; - fn call( - &mut self, - request: tonic::Request, - ) -> Self::Future { - let inner = Arc::clone(&self.0); - let fut = async move { - ::leaf_search_stream(&inner, request) - .await - }; - Box::pin(fut) - } - } - let accept_compression_encodings = self.accept_compression_encodings; - let send_compression_encodings = self.send_compression_encodings; - let max_decoding_message_size = self.max_decoding_message_size; - let max_encoding_message_size = self.max_encoding_message_size; - let inner = self.inner.clone(); - let fut = async move { - let method = LeafSearchStreamSvc(inner); - let codec = tonic::codec::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ) - .apply_max_message_size_config( - max_decoding_message_size, - max_encoding_message_size, - ); - let res = grpc.server_streaming(method, req).await; - Ok(res) - }; - Box::pin(fut) - } "/quickwit.search.SearchService/RootListTerms" => { #[allow(non_camel_case_types)] struct RootListTermsSvc(pub Arc); diff --git a/quickwit/quickwit-proto/src/lib.rs b/quickwit/quickwit-proto/src/lib.rs index c61fa799491..07f9831ef17 100644 --- a/quickwit/quickwit-proto/src/lib.rs +++ b/quickwit/quickwit-proto/src/lib.rs @@ -104,21 +104,6 @@ pub mod opentelemetry { } } -impl TryFrom for search::SearchRequest { - type Error = anyhow::Error; - - fn try_from(search_stream_req: search::SearchStreamRequest) -> Result { - Ok(Self { - index_id_patterns: vec![search_stream_req.index_id], - query_ast: search_stream_req.query_ast, - snippet_fields: search_stream_req.snippet_fields, - start_timestamp: search_stream_req.start_timestamp, - end_timestamp: search_stream_req.end_timestamp, - ..Default::default() - }) - } -} - impl TryFrom for search::SearchRequest { type Error = anyhow::Error; diff --git a/quickwit/quickwit-search/src/client.rs b/quickwit/quickwit-search/src/client.rs index 0715e74cf1f..194bf0b2bd0 100644 --- a/quickwit/quickwit-search/src/client.rs +++ b/quickwit/quickwit-search/src/client.rs @@ -18,18 +18,14 @@ use std::sync::Arc; use std::time::Duration; use bytesize::ByteSize; -use futures::{StreamExt, TryStreamExt}; use http::Uri; -use quickwit_proto::search::{ - GetKvRequest, LeafSearchStreamResponse, PutKvRequest, ReportSplitsRequest, -}; +use quickwit_proto::search::{GetKvRequest, PutKvRequest, ReportSplitsRequest}; use quickwit_proto::tonic::Request; use quickwit_proto::tonic::codegen::InterceptedService; use quickwit_proto::tonic::transport::{Channel, Endpoint}; use quickwit_proto::{SpanContextInterceptor, tonic}; -use tokio_stream::wrappers::UnboundedReceiverStream; use tower::timeout::Timeout; -use tracing::{Instrument, info_span, warn}; +use tracing::warn; use crate::SearchService; use crate::error::parse_grpc_error; @@ -148,60 +144,6 @@ impl SearchServiceClient { } } - /// Perform leaf stream. - pub async fn leaf_search_stream( - &mut self, - request: quickwit_proto::search::LeafSearchStreamRequest, - ) -> UnboundedReceiverStream> { - match &mut self.client_impl { - SearchServiceClientImpl::Grpc(grpc_client) => { - let mut grpc_client_clone = grpc_client.clone(); - let span = info_span!( - "client:leaf_search_stream", - grpc_addr=?self.grpc_addr() - ); - let tonic_request = Request::new(request); - let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); - tokio::spawn( - async move { - let tonic_result = grpc_client_clone - .leaf_search_stream(tonic_request) - .await - .map_err(|tonic_error| parse_grpc_error(&tonic_error)); - // If the grpc client fails, send the error in the channel and stop. - if let Err(error) = tonic_result { - // It is ok to ignore error sending error. - let _ = result_sender.send(Err(error)); - return; - } - let mut results_stream = tonic_result - .unwrap() - .into_inner() - .map_err(|tonic_error| parse_grpc_error(&tonic_error)); - while let Some(search_result) = results_stream.next().await { - let send_result = result_sender.send(search_result); - // If we get a sending error, stop consuming the stream. - if send_result.is_err() { - break; - } - } - } - .instrument(span), - ); - UnboundedReceiverStream::new(result_receiver) - } - SearchServiceClientImpl::Local(service) => { - let stream_result = service.leaf_search_stream(request).await; - stream_result.unwrap_or_else(|error| { - let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); - // Receiver cannot be closed here, ignore error. - let _ = result_sender.send(Err(error)); - UnboundedReceiverStream::new(result_receiver) - }) - } - } - } - /// Perform fetch docs. pub async fn fetch_docs( &mut self, diff --git a/quickwit/quickwit-search/src/cluster_client.rs b/quickwit/quickwit-search/src/cluster_client.rs index c4e78179b73..2b09fd3c641 100644 --- a/quickwit/quickwit-search/src/cluster_client.rs +++ b/quickwit/quickwit-search/src/cluster_client.rs @@ -19,19 +19,14 @@ use futures::future::ready; use futures::{Future, StreamExt}; use quickwit_proto::search::{ FetchDocsRequest, FetchDocsResponse, GetKvRequest, LeafListFieldsRequest, LeafListTermsRequest, - LeafListTermsResponse, LeafSearchRequest, LeafSearchResponse, LeafSearchStreamRequest, - LeafSearchStreamResponse, ListFieldsResponse, PutKvRequest, + LeafListTermsResponse, LeafSearchRequest, LeafSearchResponse, ListFieldsResponse, PutKvRequest, }; use tantivy::aggregation::intermediate_agg_result::IntermediateAggregationResults; -use tokio::sync::mpsc::error::SendError; -use tokio::sync::mpsc::{UnboundedSender, unbounded_channel}; -use tokio_stream::wrappers::UnboundedReceiverStream; use tracing::{debug, error, info, warn}; use crate::retry::search::LeafSearchRetryPolicy; -use crate::retry::search_stream::{LeafSearchStreamRetryPolicy, SuccessfulSplitIds}; use crate::retry::{DefaultRetryPolicy, RetryPolicy, retry_client}; -use crate::{SearchError, SearchJobPlacer, SearchServiceClient, merge_resource_stats_it}; +use crate::{SearchJobPlacer, SearchServiceClient, merge_resource_stats_it}; /// Maximum number of put requests emitted to perform a replicated given PUT KV. const MAX_PUT_KV_ATTEMPTS: usize = 6; @@ -131,56 +126,6 @@ impl ClusterClient { client.leaf_list_fields(request.clone()).await } - /// Leaf search stream with retry on another node client. - pub async fn leaf_search_stream( - &self, - request: LeafSearchStreamRequest, - mut client: SearchServiceClient, - ) -> UnboundedReceiverStream> { - // We need a dedicated channel to send results with retry. First we send only the successful - // responses and ignore errors. If there are some errors, we make one retry and - // in this case we send all results. - let (result_sender, result_receiver) = unbounded_channel(); - let client_pool = self.search_job_placer.clone(); - let retry_policy = LeafSearchStreamRetryPolicy {}; - tokio::spawn(async move { - let result_stream = client.leaf_search_stream(request.clone()).await; - // Forward only responses and not errors to the sender as we will make one retry on - // errors. - let forward_result = - forward_leaf_search_stream(result_stream, result_sender.clone(), false).await; - if let Some(retry_request) = retry_policy.retry_request(request, &forward_result) { - assert!(!retry_request.split_offsets.is_empty()); - let retry_client_res = retry_client( - &client_pool, - client.grpc_addr(), - &retry_request.split_offsets[0].split_id, - ) - .await; - let mut retry_client = match retry_client_res { - Ok(retry_client) => retry_client, - Err(error) => { - // Propagates the error if we cannot get a new client and stops the task. - let _ = result_sender.send(Err(SearchError::from(error))); - return; - } - }; - debug!( - "Leaf search stream response error. Retry once to execute {:?} with {:?}", - retry_request, client - ); - let retry_results_stream = retry_client.leaf_search_stream(retry_request).await; - // Forward all results to the result_sender as we won't do another retry. - // It is ok to ignore send errors, there is nothing else to do. - let _ = - forward_leaf_search_stream(retry_results_stream, result_sender.clone(), true) - .await; - } - }); - - UnboundedReceiverStream::new(result_receiver) - } - /// Leaf search with retry on another node client. pub async fn leaf_list_terms( &self, @@ -347,46 +292,20 @@ fn merge_original_with_retry_leaf_search_results( } } -// Forward leaf search stream results into a sender and -// returns the split ids of Ok(response). -// If `send_error` is false, errors are ignored and not forwarded. This is -// useful if you want to make a retry before propagating errors. -async fn forward_leaf_search_stream( - mut stream: UnboundedReceiverStream>, - sender: UnboundedSender>, - send_error: bool, -) -> Result>> { - let mut successful_split_ids: Vec = Vec::new(); - while let Some(result) = stream.next().await { - match result { - Ok(response) => { - successful_split_ids.push(response.split_id.clone()); - sender.send(Ok(response))?; - } - Err(error) => { - if send_error { - sender.send(Err(error))?; - } - } - } - } - Ok(SuccessfulSplitIds(successful_split_ids)) -} - #[cfg(test)] mod tests { use std::collections::HashSet; use std::net::SocketAddr; use quickwit_proto::search::{ - LeafRequestRef, PartialHit, SearchRequest, SearchStreamRequest, SortValue, - SplitIdAndFooterOffsets, SplitSearchError, + LeafRequestRef, PartialHit, SearchRequest, SortValue, SplitIdAndFooterOffsets, + SplitSearchError, }; use quickwit_query::query_ast::qast_json_helper; use super::*; use crate::root::SearchJob; - use crate::{MockSearchService, searcher_pool_for_test}; + use crate::{MockSearchService, SearchError, searcher_pool_for_test}; fn mock_partial_hit(split_id: &str, sort_value: u64, doc_id: u32) -> PartialHit { PartialHit { @@ -450,42 +369,6 @@ mod tests { } } - fn mock_leaf_search_stream_request() -> LeafSearchStreamRequest { - let search_request = SearchStreamRequest { - index_id: "test-idx".to_string(), - query_ast: qast_json_helper("text", &["body"]), - snippet_fields: Vec::new(), - start_timestamp: None, - end_timestamp: None, - fast_field: "fast".to_string(), - output_format: 0, - partition_by_field: None, - }; - LeafSearchStreamRequest { - request: Some(search_request), - doc_mapper: "doc_mapper".to_string(), - index_uri: "uri".to_string(), - split_offsets: vec![ - SplitIdAndFooterOffsets { - split_id: "split_1".to_string(), - split_footer_start: 0, - split_footer_end: 100, - timestamp_start: None, - timestamp_end: None, - num_docs: 0, - }, - SplitIdAndFooterOffsets { - split_id: "split_2".to_string(), - split_footer_start: 0, - split_footer_end: 100, - timestamp_start: None, - timestamp_end: None, - num_docs: 0, - }, - ], - } - } - #[tokio::test] async fn test_cluster_client_fetch_docs_no_retry() { let request = mock_doc_request("split_1"); @@ -703,49 +586,6 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_cluster_client_leaf_stream_retry_on_error() { - let request = mock_leaf_search_stream_request(); - - let mut mock_search_service_1 = MockSearchService::new(); - mock_search_service_1 - .expect_leaf_search_stream() - .return_once(|_| Err(SearchError::Internal("error".to_string()))); - - let mut mock_search_service_2 = MockSearchService::new(); - let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); - mock_search_service_2 - .expect_leaf_search_stream() - .return_once(|_| Ok(UnboundedReceiverStream::new(result_receiver))); - - let searcher_pool = searcher_pool_for_test([ - ("127.0.0.1:1001", mock_search_service_1), - ("127.0.0.1:1002", mock_search_service_2), - ]); - let search_job_placer = SearchJobPlacer::new(searcher_pool.clone()); - let cluster_client = ClusterClient::new(search_job_placer); - - result_sender - .send(Ok(LeafSearchStreamResponse { - data: Vec::new(), - split_id: "split_1".to_string(), - })) - .unwrap(); - result_sender - .send(Err(SearchError::Internal("last split error".to_string()))) - .unwrap(); - drop(result_sender); - - let first_client_addr: SocketAddr = "127.0.0.1:1001".parse().unwrap(); - let first_client = searcher_pool.get(&first_client_addr).unwrap(); - let result = cluster_client - .leaf_search_stream(request, first_client) - .await; - let results: Vec<_> = result.collect().await; - assert_eq!(results.len(), 2); - assert!(results[0].is_ok()); - } - #[tokio::test] async fn test_put_kv_happy_path() { // 3 servers 1, 2, 3 diff --git a/quickwit/quickwit-search/src/filters.rs b/quickwit/quickwit-search/src/filters.rs deleted file mode 100644 index 72acfab61e4..00000000000 --- a/quickwit/quickwit-search/src/filters.rs +++ /dev/null @@ -1,173 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::ops::{Bound, RangeBounds, RangeInclusive}; - -use tantivy::columnar::Cardinality; -use tantivy::fastfield::Column; -use tantivy::{DateTime, DocId, SegmentReader}; - -/// A filter that only retains docs within a time range. -#[derive(Clone)] -pub struct TimestampFilter { - /// The time range represented as (lower_bound, upper_bound). - // TODO replace this with a RangeInclusive if it improves perf? - time_range: (Bound, Bound), - timestamp_column: Column, -} - -impl TimestampFilter { - #[inline] - pub fn contains_timestamp(&self, ts: &DateTime) -> bool { - self.time_range.contains(ts) - } - - #[inline] - /// Fetches the timestamp of a given doc from the column storage and checks if it is within the - /// time range. - pub fn contains_doc_timestamp(&self, doc_id: DocId) -> bool { - if let Some(ts) = self.timestamp_column.first(doc_id) { - self.contains_timestamp(&ts) - } else { - false - } - } -} - -/// Creates a timestamp field depending on the user request. -/// -/// The start/end timestamp are in seconds and are interpreted as -/// a semi-open interval [start, end). -pub fn create_timestamp_filter_builder( - timestamp_field_opt: Option<&str>, - start_timestamp_secs: Option, - end_timestamp_secs: Option, -) -> Option { - let timestamp_field = timestamp_field_opt?; - if start_timestamp_secs.is_none() && end_timestamp_secs.is_none() { - return None; - } - let start_timestamp_bound: Bound = start_timestamp_secs - .map(|timestamp_secs| Bound::Included(DateTime::from_timestamp_secs(timestamp_secs))) - .unwrap_or(Bound::Unbounded); - let end_timestamp_bound: Bound = end_timestamp_secs - .map(|timestamp_secs| Bound::Excluded(DateTime::from_timestamp_secs(timestamp_secs))) - .unwrap_or(Bound::Unbounded); - Some(TimestampFilterBuilder::new( - timestamp_field.to_string(), - start_timestamp_bound, - end_timestamp_bound, - )) -} - -#[derive(Clone, Debug)] -pub struct TimestampFilterBuilder { - pub timestamp_field_name: String, - start_timestamp: Bound, - end_timestamp: Bound, -} - -impl TimestampFilterBuilder { - pub fn new( - timestamp_field_name: String, - start_timestamp: Bound, - end_timestamp: Bound, - ) -> TimestampFilterBuilder { - TimestampFilterBuilder { - timestamp_field_name, - start_timestamp, - end_timestamp, - } - } - - /// None means that all documents are matching the timestamp range. - pub fn build( - &self, - segment_reader: &SegmentReader, - ) -> tantivy::Result> { - let timestamp_column_opt: Option> = - segment_reader - .fast_fields() - .column_opt::(&self.timestamp_field_name)?; - let timestamp_column = timestamp_column_opt - .unwrap_or_else(|| Column::build_empty_column(segment_reader.max_doc())); - let time_range = (self.start_timestamp, self.end_timestamp); - if time_range == (Bound::Unbounded, Bound::Unbounded) { - return Ok(None); - } - if timestamp_column.index.get_cardinality() == Cardinality::Full { - let segment_range: RangeInclusive = - timestamp_column.min_value()..=timestamp_column.max_value(); - if is_segment_always_within_timestamp_range(segment_range, time_range) { - return Ok(None); - } - } - Ok(Some(TimestampFilter { - time_range, - timestamp_column, - })) - } -} - -/// Determine if all docs of a segment always satisfy the requested timestamp range. -/// -/// Note: -/// - segment_range: is an inclusive range on both ends `[min, max]`. -/// - timestamp_range: is a half open range `[min, max[`. -fn is_segment_always_within_timestamp_range( - segment_range: RangeInclusive, - timestamp_range: impl RangeBounds, -) -> bool { - timestamp_range.contains(segment_range.start()) && timestamp_range.contains(segment_range.end()) -} - -#[cfg(test)] -mod tests { - use tantivy::DateTime; - - use super::is_segment_always_within_timestamp_range; - - const TEST_START: DateTime = DateTime::from_timestamp_secs(1_662_529_435); - const TEST_MIDDLE: DateTime = DateTime::from_timestamp_secs(1_662_629_435); - const TEST_END: DateTime = DateTime::from_timestamp_secs(1_662_639_435); - - #[test] - fn test_is_segment_always_within_timestamp_range() { - assert_eq!( - is_segment_always_within_timestamp_range(TEST_START..=TEST_END, ..), - true - ); - - assert_eq!( - is_segment_always_within_timestamp_range( - TEST_START..=TEST_MIDDLE, - TEST_START..TEST_END - ), - true - ); - - assert_eq!( - is_segment_always_within_timestamp_range( - TEST_START..=TEST_END, - TEST_START..TEST_MIDDLE - ), - false - ); - - assert_eq!( - is_segment_always_within_timestamp_range(TEST_START..=TEST_END, TEST_START..TEST_END), - false - ); - } -} diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 14b84b45bbb..d470e4d3a15 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -909,27 +909,6 @@ impl QueryAstTransformer for RemoveTimestampRange<'_> { } } -pub(crate) fn rewrite_start_end_time_bounds( - start_timestamp_opt: &mut Option, - end_timestamp_opt: &mut Option, - split: &SplitIdAndFooterOffsets, -) { - if let (Some(split_start), Some(split_end)) = (split.timestamp_start, split.timestamp_end) { - if let Some(start_timestamp) = start_timestamp_opt { - // both starts are inclusive - if *start_timestamp <= split_start { - *start_timestamp_opt = None; - } - } - if let Some(end_timestamp) = end_timestamp_opt { - // search end is exclusive, split end is inclusive - if *end_timestamp > split_end { - *end_timestamp_opt = None; - } - } - } -} - /// Checks if request is a simple all query. /// Simple in this case would still including sorting fn is_simple_all_query(search_request: &SearchRequest) -> bool { diff --git a/quickwit/quickwit-search/src/lib.rs b/quickwit/quickwit-search/src/lib.rs index b56fd55be4b..0706cd7f81c 100644 --- a/quickwit/quickwit-search/src/lib.rs +++ b/quickwit/quickwit-search/src/lib.rs @@ -22,7 +22,6 @@ mod cluster_client; mod collector; mod error; mod fetch_docs; -mod filters; mod find_trace_ids_collector; mod leaf; mod leaf_cache; @@ -35,7 +34,6 @@ mod root; mod scroll_context; mod search_job_placer; mod search_response_rest; -mod search_stream; mod service; pub(crate) mod top_k_collector; @@ -90,7 +88,6 @@ pub use crate::search_job_placer::{Job, SearchJobPlacer}; pub use crate::search_response_rest::{ AggregationResults, SearchPlanResponseRest, SearchResponseRest, }; -pub use crate::search_stream::root_search_stream; pub use crate::service::{MockSearchService, SearchService, SearchServiceImpl}; /// A pool of searcher clients identified by their gRPC socket address. diff --git a/quickwit/quickwit-search/src/retry/mod.rs b/quickwit/quickwit-search/src/retry/mod.rs index 316569222ae..996665717cf 100644 --- a/quickwit/quickwit-search/src/retry/mod.rs +++ b/quickwit/quickwit-search/src/retry/mod.rs @@ -13,7 +13,6 @@ // limitations under the License. pub mod search; -pub mod search_stream; use std::collections::HashSet; use std::net::SocketAddr; diff --git a/quickwit/quickwit-search/src/retry/search_stream.rs b/quickwit/quickwit-search/src/retry/search_stream.rs deleted file mode 100644 index 0d94b896327..00000000000 --- a/quickwit/quickwit-search/src/retry/search_stream.rs +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use quickwit_proto::search::{LeafSearchStreamRequest, LeafSearchStreamResponse}; -use tokio::sync::mpsc::error::SendError; -use tracing::warn; - -use super::RetryPolicy; - -pub struct SuccessfulSplitIds(pub Vec); - -/// Retry policy for consuming the result stream of a LeafSearchStreamRequest. -/// A retry is only made if there are some missing splits. -/// As errors only come from a closed receiver, we ignore them. -pub struct LeafSearchStreamRetryPolicy {} - -impl - RetryPolicy< - LeafSearchStreamRequest, - SuccessfulSplitIds, - SendError>, - > for LeafSearchStreamRetryPolicy -{ - // Returns a retry request that is either: - // - a clone of the initial request on error - // - or a request on failing split ids only. - fn retry_request( - &self, - mut request: LeafSearchStreamRequest, - response_res: &Result< - SuccessfulSplitIds, - SendError>, - >, - ) -> Option { - match response_res { - Ok(SuccessfulSplitIds(successful_split_ids)) => { - if successful_split_ids.len() == request.split_offsets.len() { - // All splits were successful! - return None; - } - // We retry the failed splits. - request.split_offsets.retain(|split_metadata| { - !successful_split_ids.contains(&split_metadata.split_id) - }); - Some(request) - } - Err(SendError(_)) => { - // The receiver channel was dropped. - // There is no need to retry. - warn!( - "Receiver channel closed during stream search request. The client probably \ - closed the connection?" - ); - None - } - } - } -} - -#[cfg(test)] -mod tests { - use quickwit_proto::search::{ - LeafSearchStreamRequest, LeafSearchStreamResponse, SplitIdAndFooterOffsets, - }; - use tokio::sync::mpsc::error::SendError; - - use crate::retry::RetryPolicy; - use crate::retry::search_stream::{LeafSearchStreamRetryPolicy, SuccessfulSplitIds}; - - #[tokio::test] - async fn test_retry_policy_search_stream_should_not_retry_on_send_error() { - let retry_policy = LeafSearchStreamRetryPolicy {}; - let request = LeafSearchStreamRequest::default(); - let response = LeafSearchStreamResponse::default(); - let response_res = Err(SendError(Ok(response))); - let retry_req_opt = retry_policy.retry_request(request, &response_res); - assert!(retry_req_opt.is_none()); - } - - #[tokio::test] - async fn test_retry_policy_search_stream_should_not_retry_on_successful_response() { - let retry_policy = LeafSearchStreamRetryPolicy {}; - let request = LeafSearchStreamRequest::default(); - let response_res = Ok(SuccessfulSplitIds(Vec::new())); - let retry_req_opt = retry_policy.retry_request(request, &response_res); - assert!(retry_req_opt.is_none()); - } - - #[tokio::test] - async fn test_retry_policy_search_stream_should_retry_on_failed_splits() { - let split_1 = SplitIdAndFooterOffsets { - split_id: "split_1".to_string(), - split_footer_end: 100, - split_footer_start: 0, - timestamp_start: None, - timestamp_end: None, - num_docs: 0, - }; - let split_2 = SplitIdAndFooterOffsets { - split_id: "split_2".to_string(), - split_footer_end: 100, - split_footer_start: 0, - timestamp_start: None, - timestamp_end: None, - num_docs: 0, - }; - let retry_policy = LeafSearchStreamRetryPolicy {}; - let request = LeafSearchStreamRequest { - split_offsets: vec![split_1, split_2], - ..Default::default() - }; - let response_res = Ok(SuccessfulSplitIds(vec!["split_1".to_string()])); - let retry_req = retry_policy.retry_request(request, &response_res).unwrap(); - assert_eq!(retry_req.split_offsets.len(), 1); - assert_eq!(retry_req.split_offsets[0].split_id, "split_2"); - } -} diff --git a/quickwit/quickwit-search/src/search_stream/collector.rs b/quickwit/quickwit-search/src/search_stream/collector.rs deleted file mode 100644 index 33e3526de0f..00000000000 --- a/quickwit/quickwit-search/src/search_stream/collector.rs +++ /dev/null @@ -1,241 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashMap; -use std::hash::Hash; -use std::marker::PhantomData; - -use tantivy::collector::{Collector, SegmentCollector}; -use tantivy::columnar::{DynamicColumn, HasAssociatedColumnType}; -use tantivy::fastfield::Column; -use tantivy::{DocId, Score, SegmentOrdinal, SegmentReader}; - -use crate::filters::{TimestampFilter, TimestampFilterBuilder}; - -#[derive(Clone)] -pub struct FastFieldSegmentCollector { - fast_field_values: Vec, - column_opt: Option>, - timestamp_filter_opt: Option, -} - -impl FastFieldSegmentCollector { - pub fn new( - column_opt: Option>, - timestamp_filter_opt: Option, - ) -> Self { - Self { - fast_field_values: Vec::new(), - column_opt, - timestamp_filter_opt, - } - } - - fn accept_document(&self, doc_id: DocId) -> bool { - if let Some(ref timestamp_filter) = self.timestamp_filter_opt { - return timestamp_filter.contains_doc_timestamp(doc_id); - } - true - } -} - -impl SegmentCollector for FastFieldSegmentCollector { - type Fruit = Vec; - - fn collect(&mut self, doc_id: DocId, _score: Score) { - let Some(column) = self.column_opt.as_ref() else { - return; - }; - if !self.accept_document(doc_id) { - return; - } - self.fast_field_values.extend(column.values_for_doc(doc_id)); - } - - fn harvest(self) -> Vec { - self.fast_field_values - } -} - -#[derive(Clone)] -pub struct FastFieldCollector { - pub fast_field_to_collect: String, - pub timestamp_filter_builder_opt: Option, - pub _marker: PhantomData, -} - -impl Collector for FastFieldCollector -where DynamicColumn: Into>> -{ - type Child = FastFieldSegmentCollector; - type Fruit = Vec; - - fn for_segment( - &self, - _segment_ord: SegmentOrdinal, - segment_reader: &SegmentReader, - ) -> tantivy::Result { - let timestamp_filter_opt = - if let Some(timestamp_filter_builder) = &self.timestamp_filter_builder_opt { - timestamp_filter_builder.build(segment_reader)? - } else { - None - }; - - let column_opt: Option> = segment_reader - .fast_fields() - .column_opt::(&self.fast_field_to_collect)?; - - Ok(FastFieldSegmentCollector::new( - column_opt, - timestamp_filter_opt, - )) - } - - fn requires_scoring(&self) -> bool { - // We do not need BM25 scoring in Quickwit. - false - } - - fn merge_fruits(&self, segment_fruits: Vec>) -> tantivy::Result { - Ok(segment_fruits.into_iter().flatten().collect::>()) - } -} - -#[derive(Clone)] -pub struct PartionnedFastFieldCollector { - pub fast_field_to_collect: String, - pub partition_by_fast_field: String, - pub timestamp_filter_builder_opt: Option, - pub _marker: PhantomData<(Item, PartitionItem)>, -} - -#[derive(Debug, Eq, PartialEq)] -pub struct PartitionValues { - pub partition_value: PartitionItem, - pub fast_field_values: Vec, -} - -impl Collector - for PartionnedFastFieldCollector -where - DynamicColumn: Into>>, - DynamicColumn: Into>>, -{ - type Child = PartitionedFastFieldSegmentCollector; - type Fruit = Vec>; - - fn for_segment( - &self, - _segment_ord: SegmentOrdinal, - segment_reader: &SegmentReader, - ) -> tantivy::Result { - let timestamp_filter_opt = - if let Some(timestamp_filter_builder) = &self.timestamp_filter_builder_opt { - timestamp_filter_builder.build(segment_reader)? - } else { - None - }; - let column_opt: Option> = segment_reader - .fast_fields() - .column_opt(&self.fast_field_to_collect)?; - - let partition_column_opt = segment_reader - .fast_fields() - .column_opt(self.partition_by_fast_field.as_str())?; - - Ok(PartitionedFastFieldSegmentCollector::new( - column_opt, - partition_column_opt, - timestamp_filter_opt, - )) - } - - fn requires_scoring(&self) -> bool { - // We do not need BM25 scoring in Quickwit. - false - } - - fn merge_fruits( - &self, - segment_fruits: Vec>>, - ) -> tantivy::Result { - Ok(segment_fruits - .into_iter() - .flat_map(|e| e.into_iter()) - .map(|(partition_value, values)| PartitionValues { - partition_value, - fast_field_values: values, - }) - .collect()) - } -} - -#[derive(Clone)] -pub struct PartitionedFastFieldSegmentCollector { - fast_field_values: HashMap>, - fast_field_reader: Option>, - partition_by_fast_field_reader: Option>, - timestamp_filter_opt: Option, -} - -impl PartitionedFastFieldSegmentCollector { - pub fn new( - fast_field_reader: Option>, - partition_by_fast_field_reader: Option>, - timestamp_filter_opt: Option, - ) -> Self { - Self { - fast_field_values: HashMap::default(), - fast_field_reader, - partition_by_fast_field_reader, - timestamp_filter_opt, - } - } - - fn accept_document(&self, doc_id: DocId) -> bool { - if let Some(ref timestamp_filter) = self.timestamp_filter_opt { - return timestamp_filter.contains_doc_timestamp(doc_id); - } - true - } -} - -impl - SegmentCollector for PartitionedFastFieldSegmentCollector -{ - type Fruit = HashMap>; - - fn collect(&mut self, doc_id: DocId, _score: Score) { - let Some(column) = self.fast_field_reader.as_ref() else { - return; - }; - let Some(partition_column) = self.partition_by_fast_field_reader.as_ref() else { - return; - }; - if !self.accept_document(doc_id) { - return; - } - if let Some(partition) = partition_column.first(doc_id) { - self.fast_field_values - .entry(partition) - .or_default() - .extend(column.values_for_doc(doc_id)); - } - } - - fn harvest(self) -> Self::Fruit { - self.fast_field_values - } -} diff --git a/quickwit/quickwit-search/src/search_stream/leaf.rs b/quickwit/quickwit-search/src/search_stream/leaf.rs deleted file mode 100644 index d5a8311f6bc..00000000000 --- a/quickwit/quickwit-search/src/search_stream/leaf.rs +++ /dev/null @@ -1,792 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashSet; -use std::hash::Hash; -use std::marker::PhantomData; -use std::sync::Arc; - -use futures::{FutureExt, StreamExt}; -use quickwit_common::pretty::PrettySample; -use quickwit_doc_mapper::{DocMapper, FastFieldWarmupInfo, WarmupInfo}; -use quickwit_proto::search::{ - LeafSearchStreamResponse, OutputFormat, SearchRequest, SearchStreamRequest, - SplitIdAndFooterOffsets, -}; -use quickwit_storage::{ByteRangeCache, Storage}; -use tantivy::columnar::{DynamicColumn, HasAssociatedColumnType}; -use tantivy::fastfield::Column; -use tantivy::query::Query; -use tantivy::schema::{Field, Schema, Type}; -use tantivy::{DateTime, ReloadPolicy, Searcher}; -use tokio_stream::wrappers::UnboundedReceiverStream; -use tracing::*; - -use super::FastFieldCollector; -use super::collector::{PartionnedFastFieldCollector, PartitionValues}; -use crate::filters::{TimestampFilterBuilder, create_timestamp_filter_builder}; -use crate::leaf::{open_index_with_caches, rewrite_start_end_time_bounds, warmup}; -use crate::service::SearcherContext; -use crate::{Result, SearchError}; - -/// `leaf` step of search stream. -// Note: we return a stream of a result with a tonic::Status error -// to be compatible with the stream coming from the grpc client. -// It would be better to have a SearchError but we need then -// to process stream in grpc_adapter.rs to change SearchError -// to tonic::Status as tonic::Status is required by the stream result -// signature defined by proto generated code. -#[instrument(skip_all, fields(index = request.index_id))] -pub async fn leaf_search_stream( - searcher_context: Arc, - request: SearchStreamRequest, - storage: Arc, - splits: Vec, - doc_mapper: Arc, -) -> UnboundedReceiverStream> { - info!(split_offsets = ?PrettySample::new(&splits, 5)); - let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); - let span = info_span!("leaf_search_stream",); - tokio::spawn( - async move { - let mut stream = - leaf_search_results_stream(searcher_context, request, storage, splits, doc_mapper) - .await; - while let Some(item) = stream.next().await { - if let Err(error) = result_sender.send(item) { - error!( - "Failed to send leaf search stream result. Stop sending. Cause: {}", - error - ); - break; - } - } - } - .instrument(span), - ); - UnboundedReceiverStream::new(result_receiver) -} - -async fn leaf_search_results_stream( - searcher_context: Arc, - request: SearchStreamRequest, - storage: Arc, - splits: Vec, - doc_mapper: Arc, -) -> impl futures::Stream> + Sync + Send + 'static { - let max_num_concurrent_split_streams = searcher_context - .searcher_config - .max_num_concurrent_split_streams; - futures::stream::iter(splits) - .map(move |split| { - leaf_search_stream_single_split( - searcher_context.clone(), - split, - doc_mapper.clone(), - request.clone(), - storage.clone(), - ) - .shared() - }) - .buffer_unordered(max_num_concurrent_split_streams) -} - -/// Apply a leaf search on a single split. -#[instrument(skip_all, fields(split_id = %split.split_id))] -async fn leaf_search_stream_single_split( - searcher_context: Arc, - split: SplitIdAndFooterOffsets, - doc_mapper: Arc, - mut stream_request: SearchStreamRequest, - storage: Arc, -) -> crate::Result { - // TODO: Should we track the memory here using the SearchPermitProvider? - let _leaf_split_stream_permit = searcher_context - .split_stream_semaphore - .acquire() - .await - .expect("Failed to acquire permit. This should never happen! Please, report on https://github.com/quickwit-oss/quickwit/issues."); - rewrite_start_end_time_bounds( - &mut stream_request.start_timestamp, - &mut stream_request.end_timestamp, - &split, - ); - - let cache = - ByteRangeCache::with_infinite_capacity(&quickwit_storage::STORAGE_METRICS.shortlived_cache); - let (index, _) = open_index_with_caches( - &searcher_context, - storage, - &split, - Some(doc_mapper.tokenizer_manager()), - Some(cache), - ) - .await?; - let split_schema = index.schema(); - - let request_fields = Arc::new(SearchStreamRequestFields::from_request( - &stream_request, - &split_schema, - doc_mapper.as_ref(), - )?); - - let output_format = OutputFormat::try_from(stream_request.output_format) - .map_err(|_| SearchError::Internal("invalid output format specified".to_string()))?; - - if request_fields.partition_by_fast_field.is_some() - && output_format != OutputFormat::ClickHouseRowBinary - { - return Err(SearchError::Internal( - "invalid output format specified, only ClickHouseRowBinary is allowed when providing \ - a partitioned-by field" - .to_string(), - )); - } - - let search_request = Arc::new(SearchRequest::try_from(stream_request.clone())?); - let query_ast = serde_json::from_str(&search_request.query_ast) - .map_err(|err| SearchError::InvalidQuery(err.to_string()))?; - let (query, mut warmup_info) = doc_mapper.query(split_schema.clone(), &query_ast, false)?; - let reader = index - .reader_builder() - .reload_policy(ReloadPolicy::Manual) - .try_into()?; - let searcher = reader.searcher(); - - let timestamp_filter_builder_opt: Option = - create_timestamp_filter_builder( - request_fields.timestamp_field_name(), - search_request.start_timestamp, - search_request.end_timestamp, - ); - - let requires_scoring = search_request - .sort_fields - .iter() - .any(|sort| sort.field_name == "_score"); - - let fast_fields = request_fields - .fast_fields_for_request(timestamp_filter_builder_opt.as_ref()) - .into_iter() - .map(|name| FastFieldWarmupInfo { - name, - with_subfields: false, - }) - .collect(); - let stream_warmup_info = WarmupInfo { - fast_fields, - // TODO no test fail if this line get removed - field_norms: requires_scoring, - ..Default::default() - }; - warmup_info.merge(stream_warmup_info); - warmup_info.simplify(); - - warmup(&searcher, &warmup_info).await?; - - let span = info_span!( - "collect_fast_field", - split_id = %split.split_id, - request_fields=%request_fields, - ); - - let _ = span.enter(); - let m_request_fields = request_fields.clone(); - let collect_handle = crate::search_thread_pool().run_cpu_intensive(move || { - let mut buffer = Vec::new(); - match m_request_fields.fast_field_types() { - (Type::I64, None) => { - let collected_values = collect_values::( - &m_request_fields, - timestamp_filter_builder_opt, - &searcher, - &query, - )?; - super::serialize::(&collected_values, &mut buffer, output_format).map_err( - |_| { - SearchError::Internal("error when serializing i64 during export".to_owned()) - }, - )?; - } - (Type::U64, None) => { - let collected_values = collect_values::( - &m_request_fields, - timestamp_filter_builder_opt, - &searcher, - &query, - )?; - super::serialize::(&collected_values, &mut buffer, output_format).map_err( - |_| { - SearchError::Internal("error when serializing u64 during export".to_owned()) - }, - )?; - } - (Type::Date, None) => { - let collected_values = collect_values::( - &m_request_fields, - timestamp_filter_builder_opt, - &searcher, - &query, - )?; - // It may seem overkill and expensive considering DateTime is just a wrapper - // over the i64, but the compiler is smarter than it looks and the code - // below actually is zero-cost: No allocation and no copy happens. - let collected_values_as_micros = collected_values - .into_iter() - .map(|date_time| date_time.into_timestamp_micros()) - .collect::>(); - // We serialize Date as i64 microseconds. - super::serialize::(&collected_values_as_micros, &mut buffer, output_format) - .map_err(|_| { - SearchError::Internal("error when serializing i64 during export".to_owned()) - })?; - } - (Type::I64, Some(Type::I64)) => { - let collected_values = collect_partitioned_values::( - &m_request_fields, - timestamp_filter_builder_opt, - &searcher, - &query, - )?; - super::serialize_partitions::(collected_values.as_slice(), &mut buffer) - .map_err(|_| { - SearchError::Internal("error when serializing i64 during export".to_owned()) - })?; - } - (Type::U64, Some(Type::U64)) => { - let collected_values = collect_partitioned_values::( - &m_request_fields, - timestamp_filter_builder_opt, - &searcher, - &query, - )?; - super::serialize_partitions::(collected_values.as_slice(), &mut buffer) - .map_err(|_| { - SearchError::Internal("error when serializing i64 during export".to_owned()) - })?; - } - (fast_field_type, None) => { - return Err(SearchError::Internal(format!( - "search stream does not support fast field of type `{fast_field_type:?}`" - ))); - } - (fast_field_type, Some(partition_fast_field_type)) => { - return Err(SearchError::Internal(format!( - "search stream does not support the combination of fast field type \ - `{fast_field_type:?}` and partition fast field type \ - `{partition_fast_field_type:?}`" - ))); - } - }; - Result::>::Ok(buffer) - }); - let buffer = collect_handle.await.map_err(|_| { - error!(split_id = %split.split_id, request_fields=%request_fields, "failed to collect fast field"); - SearchError::Internal(format!("error when collecting fast field values for split {}", split.split_id)) - })??; - Ok(LeafSearchStreamResponse { - data: buffer, - split_id: split.split_id, - }) -} - -fn collect_values( - request_fields: &SearchStreamRequestFields, - timestamp_filter_builder_opt: Option, - searcher: &Searcher, - query: &dyn Query, -) -> crate::Result> -where - DynamicColumn: Into>>, -{ - let collector = FastFieldCollector:: { - fast_field_to_collect: request_fields.fast_field_name().to_string(), - timestamp_filter_builder_opt, - _marker: PhantomData, - }; - let result = searcher.search(query, &collector)?; - Ok(result) -} - -fn collect_partitioned_values< - Item: HasAssociatedColumnType, - TPartitionValue: HasAssociatedColumnType + Eq + Hash, ->( - request_fields: &SearchStreamRequestFields, - timestamp_filter_builder_opt: Option, - searcher: &Searcher, - query: &dyn Query, -) -> crate::Result>> -where - DynamicColumn: Into>> + Into>>, -{ - let collector = PartionnedFastFieldCollector:: { - fast_field_to_collect: request_fields.fast_field_name().to_string(), - partition_by_fast_field: request_fields - .partition_by_fast_field_name() - .expect("`partition_by_fast_field` is not defined. This should never happen! Please, report on https://github.com/quickwit-oss/quickwit/issues.") - .to_string(), - timestamp_filter_builder_opt, - _marker: PhantomData, - }; - let result = searcher.search(query, &collector)?; - Ok(result) -} - -#[derive(Debug)] -// TODO move to owned values, implement Send + Sync -struct SearchStreamRequestFields { - fast_field: Field, - partition_by_fast_field: Option, - timestamp_field_name: Option, - schema: Schema, -} - -impl std::fmt::Display for SearchStreamRequestFields { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "fast_field: {},", self.fast_field_name())?; - write!( - f, - "timestamp_field: {},", - self.timestamp_field_name().unwrap_or("None") - )?; - write!( - f, - "partition_by_fast_field: {}", - self.partition_by_fast_field_name().unwrap_or("None") - ) - } -} - -impl SearchStreamRequestFields { - pub fn from_request( - stream_request: &SearchStreamRequest, - schema: &Schema, - doc_mapper: &DocMapper, - ) -> crate::Result { - let fast_field = schema.get_field(&stream_request.fast_field)?; - - if !Self::is_fast_field(schema, &fast_field) { - return Err(SearchError::InvalidQuery(format!( - "field `{}` is not a fast field", - &stream_request.fast_field - ))); - } - - let timestamp_field_name = doc_mapper.timestamp_field_name().map(ToString::to_string); - let partition_by_fast_field = stream_request - .partition_by_field - .as_deref() - .and_then(|field_name| schema.get_field(field_name).ok()); - - if partition_by_fast_field.is_some() - && !Self::is_fast_field(schema, &partition_by_fast_field.unwrap()) - { - return Err(SearchError::InvalidQuery(format!( - "field `{}` is not a fast field", - &stream_request.partition_by_field.as_deref().unwrap() - ))); - } - - Ok(SearchStreamRequestFields { - schema: schema.to_owned(), - fast_field, - partition_by_fast_field, - timestamp_field_name, - }) - } - - pub fn fast_field_types(&self) -> (Type, Option) { - ( - self.schema - .get_field_entry(self.fast_field) - .field_type() - .value_type(), - self.partition_by_fast_field - .map(|field| self.schema.get_field_entry(field).field_type().value_type()), - ) - } - - fn fast_fields_for_request( - &self, - timestamp_filter_builder_opt: Option<&TimestampFilterBuilder>, - ) -> HashSet { - let mut set = HashSet::new(); - set.insert(self.fast_field_name().to_string()); - if let Some(timestamp_filter_builder) = timestamp_filter_builder_opt { - set.insert(timestamp_filter_builder.timestamp_field_name.clone()); - } - if let Some(partition_by_fast_field) = self.partition_by_fast_field_name() { - set.insert(partition_by_fast_field.to_string()); - } - set - } - - pub fn timestamp_field_name(&self) -> Option<&str> { - self.timestamp_field_name.as_deref() - } - - pub fn fast_field_name(&self) -> &str { - self.schema.get_field_name(self.fast_field) - } - - pub fn partition_by_fast_field_name(&self) -> Option<&str> { - self.partition_by_fast_field - .map(|field| self.schema.get_field_name(field)) - } - - fn is_fast_field(schema: &Schema, field: &Field) -> bool { - schema.get_field_entry(*field).is_fast() - } -} - -#[cfg(test)] -mod tests { - use std::collections::HashMap; - use std::convert::TryInto; - use std::str::from_utf8; - - use itertools::Itertools; - use quickwit_indexing::TestSandbox; - use quickwit_metastore::{ListSplitsRequestExt, MetastoreServiceStreamSplitsExt}; - use quickwit_proto::metastore::{ListSplitsRequest, MetastoreService}; - use quickwit_query::query_ast::qast_json_helper; - use serde_json::json; - use tantivy::time::{Duration, OffsetDateTime}; - - use super::*; - use crate::extract_split_and_footer_offsets; - - #[tokio::test] - async fn test_leaf_search_stream_to_csv_output_with_filtering() -> anyhow::Result<()> { - let index_id = "single-node-simple"; - let doc_mapping_yaml = r#" - field_mappings: - - name: body - type: text - - name: ts - type: datetime - fast: true - timestamp_field: ts - "#; - let test_sandbox = TestSandbox::create(index_id, doc_mapping_yaml, "", &["body"]).await?; - - let mut docs = Vec::new(); - let mut filtered_timestamp_values = Vec::new(); - let start_timestamp = 72057595; - let end_timestamp = start_timestamp + 20; - for i in 0..30 { - let timestamp = start_timestamp + (i + 1) as i64; - let body = format!("info @ t:{timestamp}"); - docs.push(json!({"body": body, "ts": timestamp})); - if timestamp < end_timestamp { - filtered_timestamp_values.push(timestamp); - } - } - test_sandbox.add_documents(docs).await?; - - let request = SearchStreamRequest { - index_id: index_id.to_string(), - query_ast: qast_json_helper("info", &["body"]), - snippet_fields: Vec::new(), - start_timestamp: None, - end_timestamp: Some(end_timestamp), - fast_field: "ts".to_string(), - output_format: 0, - partition_by_field: None, - }; - let splits = test_sandbox - .metastore() - .list_splits(ListSplitsRequest::try_from_index_uid(test_sandbox.index_uid()).unwrap()) - .await? - .collect_splits() - .await - .unwrap(); - let splits_offsets = splits - .into_iter() - .map(|split| extract_split_and_footer_offsets(&split.split_metadata)) - .collect(); - let searcher_context = Arc::new(SearcherContext::for_test()); - let mut single_node_stream = leaf_search_stream( - searcher_context, - request, - test_sandbox.storage(), - splits_offsets, - test_sandbox.doc_mapper(), - ) - .await; - let res = single_node_stream.next().await.expect("no leaf result")?; - assert_eq!( - from_utf8(&res.data)?, - format!( - "{}\n", - filtered_timestamp_values - .iter() - .map(|timestamp_secs| (timestamp_secs * 1_000_000).to_string()) - .join("\n") - ) - ); - test_sandbox.assert_quit().await; - Ok(()) - } - - #[tokio::test] - async fn test_leaf_search_stream_filtering_with_datetime() -> anyhow::Result<()> { - let index_id = "single-node-simple-datetime"; - let doc_mapping_yaml = r#" - field_mappings: - - name: body - type: text - - name: ts - type: datetime - input_formats: - - "unix_timestamp" - fast: true - timestamp_field: ts - "#; - let test_sandbox = TestSandbox::create(index_id, doc_mapping_yaml, "", &["body"]).await?; - let mut docs = Vec::new(); - let mut filtered_timestamp_values = Vec::new(); - let start_date = OffsetDateTime::now_utc(); - let num_days = 20; - for i in 0..30 { - let dt = start_date.checked_add(Duration::days(i + 1)).unwrap(); - let body = format!("info @ t:{}", i + 1); - docs.push(json!({"body": body, "ts": dt.unix_timestamp()})); - if i + 1 < num_days { - let ts_secs = dt.unix_timestamp() * 1_000_000; - filtered_timestamp_values.push(ts_secs.to_string()); - } - } - test_sandbox.add_documents(docs).await?; - - let end_timestamp = start_date - .checked_add(Duration::days(num_days)) - .unwrap() - .unix_timestamp(); - let request = SearchStreamRequest { - index_id: index_id.to_string(), - query_ast: qast_json_helper("info", &["body"]), - snippet_fields: Vec::new(), - start_timestamp: None, - end_timestamp: Some(end_timestamp), - fast_field: "ts".to_string(), - output_format: 0, - partition_by_field: None, - }; - let splits = test_sandbox - .metastore() - .list_splits(ListSplitsRequest::try_from_index_uid(test_sandbox.index_uid()).unwrap()) - .await? - .collect_splits() - .await?; - let splits_offsets = splits - .into_iter() - .map(|split| extract_split_and_footer_offsets(&split.split_metadata)) - .collect(); - let searcher_context = Arc::new(SearcherContext::for_test()); - let mut single_node_stream = leaf_search_stream( - searcher_context, - request, - test_sandbox.storage(), - splits_offsets, - test_sandbox.doc_mapper(), - ) - .await; - let res = single_node_stream.next().await.expect("no leaf result")?; - assert_eq!( - from_utf8(&res.data)?, - format!("{}\n", filtered_timestamp_values.join("\n")) - ); - test_sandbox.assert_quit().await; - Ok(()) - } - - #[tokio::test] - async fn test_leaf_search_stream_with_string_fast_field_should_return_proper_error() - -> anyhow::Result<()> { - let index_id = "single-node-simple-string-fast-field"; - let doc_mapping_yaml = r#" - field_mappings: - - name: body - type: text - - name: app - type: text - tokenizer: raw - fast: true - "#; - let test_sandbox = TestSandbox::create(index_id, doc_mapping_yaml, "{}", &["body"]).await?; - - test_sandbox - .add_documents(vec![json!({"body": "body", "app": "my-app"})]) - .await?; - - let request = SearchStreamRequest { - index_id: index_id.to_string(), - query_ast: qast_json_helper("info", &["body"]), - snippet_fields: Vec::new(), - start_timestamp: None, - end_timestamp: None, - fast_field: "app".to_string(), - output_format: 0, - partition_by_field: None, - }; - let splits = test_sandbox - .metastore() - .list_splits(ListSplitsRequest::try_from_index_uid(test_sandbox.index_uid()).unwrap()) - .await? - .collect_splits() - .await?; - let splits_offsets = splits - .into_iter() - .map(|split| extract_split_and_footer_offsets(&split.split_metadata)) - .collect(); - let searcher_context = Arc::new(SearcherContext::for_test()); - let mut single_node_stream = leaf_search_stream( - searcher_context, - request, - test_sandbox.storage(), - splits_offsets, - test_sandbox.doc_mapper(), - ) - .await; - let res = single_node_stream.next().await.expect("no leaf result"); - let error_message = res.unwrap_err().to_string(); - assert!(error_message.contains("search stream does not support fast field of type `Str`"),); - test_sandbox.assert_quit().await; - Ok(()) - } - - #[tokio::test] - async fn test_leaf_search_stream_to_partitioned_clickhouse_binary_output_with_filtering() - -> anyhow::Result<()> { - let index_id = "single-node-simple-2"; - let doc_mapping_yaml = r#" - field_mappings: - - name: body - type: text - - name: ts - type: datetime - fast: true - - name: partition_by_fast_field - type: u64 - fast: true - - name: fast_field - type: u64 - fast: true - timestamp_field: ts - "#; - let test_sandbox = TestSandbox::create(index_id, doc_mapping_yaml, "", &["body"]).await?; - - let mut docs = Vec::new(); - let partition_by_fast_field_values = [1, 2, 3, 4, 5]; - let mut expected_output_tmp: HashMap> = HashMap::new(); - let start_timestamp = 72057595; - let end_timestamp: i64 = start_timestamp + 20; - for i in 0..30 { - let timestamp = start_timestamp + (i + 1) as i64; - let body = format!("info @ t:{timestamp}"); - let partition_number = partition_by_fast_field_values[i % 5]; - let fast_field: u64 = (i * 2).try_into().unwrap(); - docs.push(json!({ - "body": body, - "ts": timestamp, - "partition_by_fast_field": partition_number, - "fast_field": fast_field, - })); - if timestamp < end_timestamp { - if let Some(values_for_partition) = expected_output_tmp.get_mut(&partition_number) { - values_for_partition.push(fast_field) - } else { - expected_output_tmp.insert(partition_number, vec![fast_field]); - } - } - } - test_sandbox.add_documents(docs).await?; - let mut expected_output: Vec> = expected_output_tmp - .iter() - .map(|(key, value)| PartitionValues { - partition_value: *key, - fast_field_values: value.to_vec(), - }) - .collect(); - - let request = SearchStreamRequest { - index_id: index_id.to_string(), - query_ast: qast_json_helper("info", &["body"]), - snippet_fields: Vec::new(), - start_timestamp: None, - end_timestamp: Some(end_timestamp), - fast_field: "fast_field".to_string(), - output_format: 1, - partition_by_field: Some(String::from("partition_by_fast_field")), - }; - let splits = test_sandbox - .metastore() - .list_splits(ListSplitsRequest::try_from_index_uid(test_sandbox.index_uid()).unwrap()) - .await? - .collect_splits() - .await?; - let splits_offsets = splits - .into_iter() - .map(|split| extract_split_and_footer_offsets(&split.split_metadata)) - .collect(); - let searcher_context = Arc::new(SearcherContext::for_test()); - let mut single_node_stream = leaf_search_stream( - searcher_context, - request, - test_sandbox.storage(), - splits_offsets, - test_sandbox.doc_mapper(), - ) - .await; - let res = single_node_stream.next().await.expect("no leaf result")?; - let mut deserialized_output = deserialize_partitions(res.data); - expected_output.sort_by(|l, r| l.partition_value.cmp(&r.partition_value)); - deserialized_output.sort_by(|l, r| l.partition_value.cmp(&r.partition_value)); - assert_eq!(expected_output, deserialized_output); - test_sandbox.assert_quit().await; - Ok(()) - } - - fn deserialize_partitions(buffer: Vec) -> Vec> { - // Note: this function is only meant to be used with valid payloads for testing purposes - let mut cursor = 0; - let mut partitions_values = Vec::new(); - while cursor < buffer.len() { - let partition_slice: [u8; 8] = buffer[cursor..cursor + 8].try_into().unwrap(); - let partition = u64::from_le_bytes(partition_slice); - cursor += 8; - - let payload_size_slice: [u8; 8] = buffer[cursor..cursor + 8].try_into().unwrap(); - let payload_size = u64::from_le_bytes(payload_size_slice); - let nb_values: usize = (payload_size / 8).try_into().unwrap(); - cursor += 8; - - let mut partition_value = PartitionValues { - partition_value: partition, - fast_field_values: Vec::with_capacity(nb_values), - }; - - for _ in 0..nb_values { - let value_slice: [u8; 8] = buffer[cursor..cursor + 8].try_into().unwrap(); - let value = u64::from_le_bytes(value_slice); - cursor += 8; - partition_value.fast_field_values.push(value); - } - partitions_values.push(partition_value); - } - partitions_values - } -} diff --git a/quickwit/quickwit-search/src/search_stream/mod.rs b/quickwit/quickwit-search/src/search_stream/mod.rs deleted file mode 100644 index dba519d822e..00000000000 --- a/quickwit/quickwit-search/src/search_stream/mod.rs +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod collector; -mod leaf; -mod root; - -use std::fmt::Display; -use std::io; -use std::io::Write; - -pub use collector::FastFieldCollector; -pub use leaf::leaf_search_stream; -use quickwit_proto::search::OutputFormat; -pub use root::root_search_stream; -use tantivy::columnar::MonotonicallyMappableToU64; - -use self::collector::PartitionValues; - -pub trait ToLittleEndian { - fn to_le_bytes(&self) -> [u8; 8]; -} - -impl ToLittleEndian for u64 { - fn to_le_bytes(&self) -> [u8; 8] { - u64::to_le_bytes(*self) - } -} - -impl ToLittleEndian for i64 { - fn to_le_bytes(&self) -> [u8; 8] { - i64::to_le_bytes(*self) - } -} - -impl ToLittleEndian for f64 { - fn to_le_bytes(&self) -> [u8; 8] { - f64::to_le_bytes(*self) - } -} - -/// Serialize the values into the `buffer` as bytes. -/// -/// Please note that the `buffer` is always cleared. -pub fn serialize( - values: &[T], - buffer: &mut Vec, - format: OutputFormat, -) -> io::Result<()> { - match format { - OutputFormat::Csv => serialize_csv(values, buffer), - OutputFormat::ClickHouseRowBinary => serialize_click_house_row_binary(values, buffer), - } -} - -pub fn serialize_partitions< - TFastValue: MonotonicallyMappableToU64, - TPartitionFastValue: MonotonicallyMappableToU64, ->( - p_values: &[PartitionValues], - buffer: &mut Vec, -) -> io::Result<()> { - let buf_size = helpers::partitions_size_in_bytes(p_values); - buffer.clear(); - buffer.reserve_exact(buf_size); - for partition in p_values { - let values_byte_size = std::mem::size_of::() * partition.fast_field_values.len(); - - buffer.extend(partition.partition_value.to_u64().to_le_bytes()); - buffer.extend(values_byte_size.to_le_bytes()); - - for value in &partition.fast_field_values { - buffer.extend(value.to_u64().to_le_bytes()); - } - } - Ok(()) -} - -fn serialize_csv(values: &[T], buffer: &mut Vec) -> io::Result<()> { - buffer.clear(); - for value in values { - writeln!(buffer, "{value}")?; - } - Ok(()) -} - -fn serialize_click_house_row_binary( - values: &[T], - buffer: &mut Vec, -) -> io::Result<()> { - buffer.clear(); - buffer.reserve_exact(8 * values.len()); - for value in values { - buffer.extend(value.to_le_bytes()); - } - Ok(()) -} - -mod helpers { - use super::collector::PartitionValues; - - #[inline(always)] - pub fn partitions_size_in_bytes( - partitions: &[PartitionValues], - ) -> usize { - let mut size = 0; - for partition in partitions { - size += partition_size_in_bytes(partition); - } - size - } - - #[inline(always)] - fn partition_size_in_bytes( - partition: &PartitionValues, - ) -> usize { - std::mem::size_of::() * partition.fast_field_values.len() - + std::mem::size_of::() - + std::mem::size_of::() - } -} - -#[cfg(test)] -mod tests { - use crate::search_stream::collector::PartitionValues; - use crate::search_stream::{serialize_click_house_row_binary, serialize_csv}; - - #[test] - fn test_serialize_row_binary() { - let mut buffer = Vec::new(); - serialize_click_house_row_binary::(&[-10i64], &mut buffer).unwrap(); - assert_eq!(buffer, (-10i64).to_le_bytes()); - - let mut buffer = Vec::new(); - serialize_click_house_row_binary::(&[-10f64], &mut buffer).unwrap(); - assert_eq!(buffer, (-10f64).to_le_bytes()); - } - - #[test] - fn test_serialize_csv() { - let mut buffer = Vec::new(); - serialize_csv::(&[-10i64], &mut buffer).unwrap(); - assert_eq!(buffer, "-10\n".as_bytes()); - } - - #[test] - fn test_serialize_partitions() { - let mut buffer = Vec::new(); - let partition_1 = PartitionValues { - partition_value: 1u64, - fast_field_values: vec![3u64, 4u64], - }; - let partition_2 = PartitionValues { - partition_value: 2u64, - fast_field_values: vec![5u64], - }; - super::serialize_partitions::(&[partition_1, partition_2], &mut buffer).unwrap(); - let expected_buffer: Vec = vec![ - 1u64.to_le_bytes(), - 16usize.to_le_bytes(), - 3u64.to_le_bytes(), - 4u64.to_le_bytes(), - 2u64.to_le_bytes(), - 8usize.to_le_bytes(), - 5u64.to_le_bytes(), - ] - .into_iter() - .flatten() - .collect(); - assert_eq!(buffer, expected_buffer); - } -} diff --git a/quickwit/quickwit-search/src/search_stream/root.rs b/quickwit/quickwit-search/src/search_stream/root.rs deleted file mode 100644 index 5f76a9d29cd..00000000000 --- a/quickwit/quickwit-search/src/search_stream/root.rs +++ /dev/null @@ -1,386 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashSet; - -use bytes::Bytes; -use futures::{StreamExt, TryStreamExt}; -use quickwit_common::uri::Uri; -use quickwit_config::build_doc_mapper; -use quickwit_doc_mapper::tag_pruning::extract_tags_from_query; -use quickwit_metastore::IndexMetadataResponseExt; -use quickwit_proto::metastore::{IndexMetadataRequest, MetastoreService, MetastoreServiceClient}; -use quickwit_proto::search::{LeafSearchStreamRequest, SearchRequest, SearchStreamRequest}; -use quickwit_query::query_ast::QueryAst; -use tokio_stream::StreamMap; -use tracing::*; - -use crate::cluster_client::ClusterClient; -use crate::root::{SearchJob, refine_start_end_timestamp_from_ast}; -use crate::{SearchError, list_relevant_splits}; - -/// Perform a distributed search stream. -#[instrument(skip(metastore, cluster_client))] -pub async fn root_search_stream( - mut search_stream_request: SearchStreamRequest, - mut metastore: MetastoreServiceClient, - cluster_client: ClusterClient, -) -> crate::Result>> { - // TODO: building a search request should not be necessary for listing splits. - // This needs some refactoring: relevant splits, metadata_map, jobs... - let index_metadata_request = - IndexMetadataRequest::for_index_id(search_stream_request.index_id.clone()); - let index_metadata = metastore - .index_metadata(index_metadata_request) - .await? - .deserialize_index_metadata()?; - let index_uid = index_metadata.index_uid.clone(); - let index_config = index_metadata.into_index_config(); - - let doc_mapper = build_doc_mapper(&index_config.doc_mapping, &index_config.search_settings) - .map_err(|err| { - SearchError::Internal(format!("failed to build doc mapper. cause: {err}")) - })?; - - let query_ast: QueryAst = serde_json::from_str(&search_stream_request.query_ast) - .map_err(|err| SearchError::InvalidQuery(err.to_string()))?; - let query_ast_resolved = query_ast.parse_user_query(doc_mapper.default_search_fields())?; - let tags_filter_ast = extract_tags_from_query(query_ast_resolved.clone()); - - if let Some(timestamp_field) = doc_mapper.timestamp_field_name() { - refine_start_end_timestamp_from_ast( - &query_ast_resolved, - timestamp_field, - &mut search_stream_request.start_timestamp, - &mut search_stream_request.end_timestamp, - ); - } - - // Validates the query by effectively building it against the current schema. - doc_mapper.query(doc_mapper.schema(), &query_ast_resolved, true)?; - search_stream_request.query_ast = serde_json::to_string(&query_ast_resolved)?; - - let search_request = SearchRequest::try_from(search_stream_request.clone())?; - let split_metadatas = list_relevant_splits( - vec![index_uid], - search_request.start_timestamp, - search_request.end_timestamp, - tags_filter_ast, - &mut metastore, - ) - .await?; - - let doc_mapper_str = serde_json::to_string(&doc_mapper).map_err(|err| { - SearchError::Internal(format!("failed to serialize doc mapper: cause {err}")) - })?; - - let index_uri: &Uri = &index_config.index_uri; - let leaf_search_jobs: Vec = split_metadatas.iter().map(SearchJob::from).collect(); - let assigned_leaf_search_jobs = cluster_client - .search_job_placer - .assign_jobs(leaf_search_jobs, &HashSet::default()) - .await?; - - let mut stream_map: StreamMap = StreamMap::new(); - for (leaf_ord, (client, client_jobs)) in assigned_leaf_search_jobs.enumerate() { - let leaf_request: LeafSearchStreamRequest = jobs_to_leaf_request( - &search_stream_request, - &doc_mapper_str, - index_uri.as_ref(), - client_jobs, - ); - let leaf_stream = cluster_client - .leaf_search_stream(leaf_request, client) - .await; - stream_map.insert(leaf_ord, leaf_stream); - } - Ok(stream_map - .map(|(_leaf_ord, result)| result) - .map_ok(|leaf_response| Bytes::from(leaf_response.data))) -} - -fn jobs_to_leaf_request( - request: &SearchStreamRequest, - doc_mapper_str: &str, - index_uri: &str, // TODO make Uri - jobs: Vec, -) -> LeafSearchStreamRequest { - LeafSearchStreamRequest { - request: Some(request.clone()), - split_offsets: jobs.into_iter().map(Into::into).collect(), - doc_mapper: doc_mapper_str.to_string(), - index_uri: index_uri.to_string(), - } -} - -#[cfg(test)] -mod tests { - - use quickwit_common::ServiceStream; - use quickwit_indexing::MockSplitBuilder; - use quickwit_metastore::{IndexMetadata, ListSplitsResponseExt}; - use quickwit_proto::metastore::{ - IndexMetadataResponse, ListSplitsResponse, MockMetastoreService, - }; - use quickwit_proto::search::OutputFormat; - use quickwit_query::query_ast::qast_json_helper; - use tokio_stream::wrappers::UnboundedReceiverStream; - - use super::*; - use crate::{MockSearchService, SearchJobPlacer, searcher_pool_for_test}; - - #[tokio::test] - async fn test_root_search_stream_single_split() -> anyhow::Result<()> { - let request = quickwit_proto::search::SearchStreamRequest { - index_id: "test-index".to_string(), - query_ast: qast_json_helper("test", &["body"]), - fast_field: "timestamp".to_string(), - output_format: OutputFormat::Csv as i32, - ..Default::default() - }; - let mut mock_metastore = MockMetastoreService::new(); - let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); - let index_uid = index_metadata.index_uid.clone(); - mock_metastore.expect_index_metadata().returning(move |_| { - Ok(IndexMetadataResponse::try_from_index_metadata(&index_metadata).unwrap()) - }); - mock_metastore.expect_list_splits().returning(move |_| { - let splits = vec![ - MockSplitBuilder::new("split1") - .with_index_uid(&index_uid) - .build(), - ]; - let splits = ListSplitsResponse::try_from_splits(splits).unwrap(); - Ok(ServiceStream::from(vec![Ok(splits)])) - }); - let mut mock_search_service = MockSearchService::new(); - let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); - result_sender.send(Ok(quickwit_proto::search::LeafSearchStreamResponse { - data: b"123".to_vec(), - split_id: "split_1".to_string(), - }))?; - result_sender.send(Ok(quickwit_proto::search::LeafSearchStreamResponse { - data: b"456".to_vec(), - split_id: "split_1".to_string(), - }))?; - mock_search_service.expect_leaf_search_stream().return_once( - |_leaf_search_req: quickwit_proto::search::LeafSearchStreamRequest| { - Ok(UnboundedReceiverStream::new(result_receiver)) - }, - ); - // The test will hang on indefinitely if we don't drop the receiver. - drop(result_sender); - - let searcher_pool = searcher_pool_for_test([("127.0.0.1:1001", mock_search_service)]); - let search_job_placer = SearchJobPlacer::new(searcher_pool); - let cluster_client = ClusterClient::new(search_job_placer.clone()); - let result: Vec = root_search_stream( - request, - MetastoreServiceClient::from_mock(mock_metastore), - cluster_client, - ) - .await? - .try_collect() - .await?; - assert_eq!(result.len(), 2); - assert_eq!(&result[0], &b"123"[..]); - assert_eq!(&result[1], &b"456"[..]); - Ok(()) - } - - #[tokio::test] - async fn test_root_search_stream_single_split_partitioned() -> anyhow::Result<()> { - let request = quickwit_proto::search::SearchStreamRequest { - index_id: "test-index".to_string(), - query_ast: qast_json_helper("test", &["body"]), - fast_field: "timestamp".to_string(), - output_format: OutputFormat::Csv as i32, - partition_by_field: Some("timestamp".to_string()), - ..Default::default() - }; - let mut mock_metastore = MockMetastoreService::new(); - let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); - let index_uid = index_metadata.index_uid.clone(); - mock_metastore.expect_index_metadata().returning(move |_| { - Ok(IndexMetadataResponse::try_from_index_metadata(&index_metadata).unwrap()) - }); - mock_metastore.expect_list_splits().returning(move |_| { - let splits = vec![ - MockSplitBuilder::new("split1") - .with_index_uid(&index_uid) - .build(), - ]; - let splits = ListSplitsResponse::try_from_splits(splits).unwrap(); - Ok(ServiceStream::from(vec![Ok(splits)])) - }); - let mut mock_search_service = MockSearchService::new(); - let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); - result_sender.send(Ok(quickwit_proto::search::LeafSearchStreamResponse { - data: b"123".to_vec(), - split_id: "1".to_string(), - }))?; - result_sender.send(Ok(quickwit_proto::search::LeafSearchStreamResponse { - data: b"456".to_vec(), - split_id: "2".to_string(), - }))?; - mock_search_service.expect_leaf_search_stream().return_once( - |_leaf_search_req: quickwit_proto::search::LeafSearchStreamRequest| { - Ok(UnboundedReceiverStream::new(result_receiver)) - }, - ); - // The test will hang on indefinitely if we don't drop the sender. - drop(result_sender); - - let searcher_pool = searcher_pool_for_test([("127.0.0.1:1001", mock_search_service)]); - let search_job_placer = SearchJobPlacer::new(searcher_pool); - let cluster_client = ClusterClient::new(search_job_placer.clone()); - let stream = root_search_stream( - request, - MetastoreServiceClient::from_mock(mock_metastore), - cluster_client, - ) - .await?; - let result: Vec<_> = stream.try_collect().await?; - assert_eq!(result.len(), 2); - assert_eq!(&result[0], &b"123"[..]); - assert_eq!(&result[1], &b"456"[..]); - Ok(()) - } - - #[tokio::test] - async fn test_root_search_stream_single_split_with_error() -> anyhow::Result<()> { - let request = quickwit_proto::search::SearchStreamRequest { - index_id: "test-index".to_string(), - query_ast: qast_json_helper("test", &["body"]), - fast_field: "timestamp".to_string(), - output_format: OutputFormat::Csv as i32, - ..Default::default() - }; - let mut mock_metastore = MockMetastoreService::new(); - let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); - let index_uid = index_metadata.index_uid.clone(); - mock_metastore.expect_index_metadata().returning(move |_| { - Ok(IndexMetadataResponse::try_from_index_metadata(&index_metadata).unwrap()) - }); - mock_metastore.expect_list_splits().returning(move |_| { - let splits = vec![ - MockSplitBuilder::new("split1") - .with_index_uid(&index_uid) - .build(), - MockSplitBuilder::new("split2") - .with_index_uid(&index_uid) - .build(), - ]; - let splits = ListSplitsResponse::try_from_splits(splits).unwrap(); - Ok(ServiceStream::from(vec![Ok(splits)])) - }); - let mut mock_search_service = MockSearchService::new(); - let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); - result_sender.send(Ok(quickwit_proto::search::LeafSearchStreamResponse { - data: b"123".to_vec(), - split_id: "split1".to_string(), - }))?; - result_sender.send(Err(SearchError::Internal("error".to_string())))?; - mock_search_service - .expect_leaf_search_stream() - .withf(|request| request.split_offsets.len() == 2) // First request. - .return_once( - |_leaf_search_req: quickwit_proto::search::LeafSearchStreamRequest| { - Ok(UnboundedReceiverStream::new(result_receiver)) - }, - ); - mock_search_service - .expect_leaf_search_stream() - .withf(|request| request.split_offsets.len() == 1) // Retry request on the failed split. - .return_once( - |_leaf_search_req: quickwit_proto::search::LeafSearchStreamRequest| { - Err(SearchError::Internal("error".to_string())) - }, - ); - // The test will hang on indefinitely if we don't drop the sender. - drop(result_sender); - - let searcher_pool = searcher_pool_for_test([("127.0.0.1:1001", mock_search_service)]); - let search_job_placer = SearchJobPlacer::new(searcher_pool); - let cluster_client = ClusterClient::new(search_job_placer.clone()); - let stream = root_search_stream( - request, - MetastoreServiceClient::from_mock(mock_metastore), - cluster_client, - ) - .await?; - let result: Result, SearchError> = stream.try_collect().await; - assert_eq!(result.is_err(), true); - assert_eq!(result.unwrap_err().to_string(), "internal error: `error`"); - Ok(()) - } - - #[tokio::test] - async fn test_root_search_stream_with_invalid_query() -> anyhow::Result<()> { - let mut mock_metastore = MockMetastoreService::new(); - let index_metadata = IndexMetadata::for_test("test-index", "ram:///test-index"); - let index_uid = index_metadata.index_uid.clone(); - mock_metastore.expect_index_metadata().returning(move |_| { - Ok(IndexMetadataResponse::try_from_index_metadata(&index_metadata).unwrap()) - }); - mock_metastore.expect_list_splits().returning(move |_| { - let splits = vec![ - MockSplitBuilder::new("split") - .with_index_uid(&index_uid) - .build(), - ]; - let splits = ListSplitsResponse::try_from_splits(splits).unwrap(); - Ok(ServiceStream::from(vec![Ok(splits)])) - }); - - let searcher_pool = searcher_pool_for_test([("127.0.0.1:1001", MockSearchService::new())]); - let search_job_placer = SearchJobPlacer::new(searcher_pool); - let metastore = MetastoreServiceClient::from_mock(mock_metastore); - assert!( - root_search_stream( - quickwit_proto::search::SearchStreamRequest { - index_id: "test-index".to_string(), - query_ast: qast_json_helper(r#"invalid_field:"test""#, &[]), - fast_field: "timestamp".to_string(), - output_format: OutputFormat::Csv as i32, - partition_by_field: Some("timestamp".to_string()), - ..Default::default() - }, - metastore.clone(), - ClusterClient::new(search_job_placer.clone()), - ) - .await - .is_err() - ); - - assert!( - root_search_stream( - quickwit_proto::search::SearchStreamRequest { - index_id: "test-index".to_string(), - query_ast: qast_json_helper("test", &["invalid_field"]), - fast_field: "timestamp".to_string(), - output_format: OutputFormat::Csv as i32, - partition_by_field: Some("timestamp".to_string()), - ..Default::default() - }, - metastore, - ClusterClient::new(search_job_placer.clone()), - ) - .await - .is_err() - ); - - Ok(()) - } -} diff --git a/quickwit/quickwit-search/src/service.rs b/quickwit/quickwit-search/src/service.rs index 70f2875bbbd..9e1921a5c3a 100644 --- a/quickwit/quickwit-search/src/service.rs +++ b/quickwit/quickwit-search/src/service.rs @@ -12,13 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::pin::Pin; use std::str::FromStr; use std::sync::Arc; use std::time::{Duration, Instant}; use async_trait::async_trait; -use bytes::Bytes; use quickwit_common::uri::Uri; use quickwit_config::SearcherConfig; use quickwit_doc_mapper::DocMapper; @@ -26,17 +24,14 @@ use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_proto::search::{ FetchDocsRequest, FetchDocsResponse, GetKvRequest, Hit, LeafListFieldsRequest, LeafListTermsRequest, LeafListTermsResponse, LeafSearchRequest, LeafSearchResponse, - LeafSearchStreamRequest, LeafSearchStreamResponse, ListFieldsRequest, ListFieldsResponse, - ListTermsRequest, ListTermsResponse, PutKvRequest, ReportSplitsRequest, ReportSplitsResponse, - ScrollRequest, SearchPlanResponse, SearchRequest, SearchResponse, SearchStreamRequest, - SnippetRequest, + ListFieldsRequest, ListFieldsResponse, ListTermsRequest, ListTermsResponse, PutKvRequest, + ReportSplitsRequest, ReportSplitsResponse, ScrollRequest, SearchPlanResponse, SearchRequest, + SearchResponse, SnippetRequest, }; use quickwit_storage::{ MemorySizedCache, QuickwitCache, SplitCache, StorageCache, StorageResolver, }; use tantivy::aggregation::AggregationLimitsGuard; -use tokio::sync::Semaphore; -use tokio_stream::wrappers::UnboundedReceiverStream; use crate::leaf::multi_index_leaf_search; use crate::leaf_cache::LeafSearchCache; @@ -47,7 +42,6 @@ use crate::metrics_trackers::LeafSearchMetricsFuture; use crate::root::fetch_docs_phase; use crate::scroll_context::{MiniKV, ScrollContext, ScrollKeyAndStartOffset}; use crate::search_permit_provider::SearchPermitProvider; -use crate::search_stream::{leaf_search_stream, root_search_stream}; use crate::{ClusterClient, SearchError, fetch_docs, root_search, search_plan}; #[derive(Clone)] @@ -89,18 +83,6 @@ pub trait SearchService: 'static + Send + Sync { /// This methods takes `PartialHit`s and returns `Hit`s. async fn fetch_docs(&self, request: FetchDocsRequest) -> crate::Result; - /// Performs a root search returning a receiver for streaming - async fn root_search_stream( - &self, - request: SearchStreamRequest, - ) -> crate::Result> + Send>>>; - - /// Performs a leaf search on a given set of splits and returns a stream. - async fn leaf_search_stream( - &self, - request: LeafSearchStreamRequest, - ) -> crate::Result>>; - /// Root search API. /// This RPC identifies the set of splits on which the query should run on, /// and dispatches the multiple calls to `LeafSearch`. @@ -239,40 +221,6 @@ impl SearchService for SearchServiceImpl { Ok(fetch_docs_response) } - async fn root_search_stream( - &self, - stream_request: SearchStreamRequest, - ) -> crate::Result> + Send>>> { - let data = root_search_stream( - stream_request, - self.metastore.clone(), - self.cluster_client.clone(), - ) - .await?; - Ok(Box::pin(data)) - } - - async fn leaf_search_stream( - &self, - leaf_stream_request: LeafSearchStreamRequest, - ) -> crate::Result>> { - let stream_request = leaf_stream_request - .request - .ok_or_else(|| SearchError::Internal("no search request".to_string()))?; - let index_uri = Uri::from_str(&leaf_stream_request.index_uri)?; - let storage = self.storage_resolver.resolve(&index_uri).await?; - let doc_mapper = deserialize_doc_mapper(&leaf_stream_request.doc_mapper)?; - let leaf_receiver = leaf_search_stream( - self.searcher_context.clone(), - stream_request, - storage, - leaf_stream_request.split_offsets, - doc_mapper, - ) - .await; - Ok(leaf_receiver) - } - async fn root_list_terms( &self, list_terms_request: ListTermsRequest, @@ -457,8 +405,6 @@ pub struct SearcherContext { pub search_permit_provider: SearchPermitProvider, /// Split footer cache. pub split_footer_cache: MemorySizedCache, - /// Counting semaphore to limit concurrent split stream requests. - pub split_stream_semaphore: Semaphore, /// Recent sub-query cache. pub leaf_search_cache: LeafSearchCache, /// Search split cache. `None` if no split cache is configured. @@ -473,7 +419,6 @@ impl std::fmt::Debug for SearcherContext { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { f.debug_struct("SearcherContext") .field("searcher_config", &self.searcher_config) - .field("split_stream_semaphore", &self.split_stream_semaphore) .finish() } } @@ -497,8 +442,6 @@ impl SearcherContext { searcher_config.max_num_concurrent_split_searches, searcher_config.warmup_memory_budget, ); - let split_stream_semaphore = - Semaphore::new(searcher_config.max_num_concurrent_split_streams); let fast_field_cache_capacity = searcher_config.fast_field_cache_capacity.as_u64() as usize; let storage_long_term_cache = Arc::new(QuickwitCache::new(fast_field_cache_capacity)); let leaf_search_cache = @@ -515,7 +458,6 @@ impl SearcherContext { fast_fields_cache: storage_long_term_cache, search_permit_provider: leaf_search_split_semaphore, split_footer_cache: global_split_footer_cache, - split_stream_semaphore, leaf_search_cache, list_fields_cache, split_cache_opt, diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index 1f3a33bb456..01f5207e544 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -49,7 +49,6 @@ use crate::otlp_api::otlp_ingest_api_handlers; use crate::rest_api_response::{RestApiError, RestApiResponse}; use crate::search_api::{ search_get_handler, search_plan_get_handler, search_plan_post_handler, search_post_handler, - search_stream_handler, }; use crate::template_api::index_template_api_handlers; use crate::ui_handler::ui_handler; @@ -256,7 +255,6 @@ fn search_routes( .or(search_post_handler(search_service.clone())) .or(search_plan_get_handler(search_service.clone())) .or(search_plan_post_handler(search_service.clone())) - .or(search_stream_handler(search_service)) .recover(recover_fn) .boxed() } diff --git a/quickwit/quickwit-serve/src/search_api/grpc_adapter.rs b/quickwit/quickwit-serve/src/search_api/grpc_adapter.rs index 65bb4eb329b..c5250ee2465 100644 --- a/quickwit/quickwit-serve/src/search_api/grpc_adapter.rs +++ b/quickwit/quickwit-serve/src/search_api/grpc_adapter.rs @@ -15,14 +15,12 @@ use std::sync::Arc; use async_trait::async_trait; -use futures::TryStreamExt; use quickwit_proto::error::convert_to_grpc_result; use quickwit_proto::search::{ - GetKvRequest, GetKvResponse, LeafListFieldsRequest, LeafSearchStreamRequest, - LeafSearchStreamResponse, ListFieldsRequest, ListFieldsResponse, ReportSplitsRequest, - ReportSplitsResponse, search_service_server as grpc, + GetKvRequest, GetKvResponse, LeafListFieldsRequest, ListFieldsRequest, ListFieldsResponse, + ReportSplitsRequest, ReportSplitsResponse, search_service_server as grpc, }; -use quickwit_proto::{GrpcServiceError, set_parent_span_from_request_metadata, tonic}; +use quickwit_proto::{set_parent_span_from_request_metadata, tonic}; use quickwit_search::SearchService; use tracing::instrument; @@ -70,29 +68,6 @@ impl grpc::SearchService for GrpcSearchAdapter { convert_to_grpc_result(fetch_docs_result) } - type LeafSearchStreamStream = std::pin::Pin< - Box< - dyn futures::Stream> - + Send - + Sync, - >, - >; - #[instrument(name = "search_adapter:leaf_search_stream", skip(self, request))] - async fn leaf_search_stream( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - set_parent_span_from_request_metadata(request.metadata()); - let leaf_search_request = request.into_inner(); - let leaf_search_result = self - .0 - .leaf_search_stream(leaf_search_request) - .await - .map_err(|error| error.into_grpc_status())? - .map_err(|error| error.into_grpc_status()); - Ok(tonic::Response::new(Box::pin(leaf_search_result))) - } - #[instrument(skip(self, request))] async fn root_list_terms( &self, diff --git a/quickwit/quickwit-serve/src/search_api/mod.rs b/quickwit/quickwit-serve/src/search_api/mod.rs index df4a15b0a66..ef1f643257c 100644 --- a/quickwit/quickwit-serve/src/search_api/mod.rs +++ b/quickwit/quickwit-serve/src/search_api/mod.rs @@ -19,130 +19,5 @@ pub use self::grpc_adapter::GrpcSearchAdapter; pub use self::rest_handler::{ SearchApi, SearchRequestQueryString, SortBy, search_get_handler, search_plan_get_handler, search_plan_post_handler, search_post_handler, search_request_from_api_request, - search_stream_handler, }; pub(crate) use self::rest_handler::{extract_index_id_patterns, extract_index_id_patterns_default}; - -#[cfg(test)] -mod tests { - use std::net::SocketAddr; - use std::sync::Arc; - - use bytesize::ByteSize; - use futures::TryStreamExt; - use quickwit_common::ServiceStream; - use quickwit_indexing::MockSplitBuilder; - use quickwit_metastore::{IndexMetadata, IndexMetadataResponseExt, ListSplitsResponseExt}; - use quickwit_proto::metastore::{ - IndexMetadataResponse, ListSplitsResponse, MetastoreServiceClient, MockMetastoreService, - }; - use quickwit_proto::search::OutputFormat; - use quickwit_proto::search::search_service_server::SearchServiceServer; - use quickwit_proto::tonic; - use quickwit_query::query_ast::qast_json_helper; - use quickwit_search::{ - ClusterClient, MockSearchService, SearchError, SearchJobPlacer, SearchService, - SearcherPool, create_search_client_from_grpc_addr, root_search_stream, - }; - use tokio_stream::wrappers::UnboundedReceiverStream; - use tonic::transport::Server; - - use crate::search_api::GrpcSearchAdapter; - - async fn start_test_server( - address: SocketAddr, - search_service: Arc, - ) -> anyhow::Result<()> { - let search_grpc_adapter = GrpcSearchAdapter::from(search_service); - tokio::spawn(async move { - Server::builder() - .add_service(SearchServiceServer::new(search_grpc_adapter)) - .serve(address) - .await?; - Result::<_, anyhow::Error>::Ok(()) - }); - Ok(()) - } - - #[tokio::test] - async fn test_serve_search_stream_with_a_leaf_error_on_leaf_node() -> anyhow::Result<()> { - // This test aims at checking the client gRPC implementation. - let request = quickwit_proto::search::SearchStreamRequest { - index_id: "test-index".to_string(), - query_ast: qast_json_helper("test", &["body"]), - snippet_fields: Vec::new(), - start_timestamp: None, - end_timestamp: None, - fast_field: "timestamp".to_string(), - output_format: OutputFormat::Csv as i32, - partition_by_field: None, - }; - let mut mock_metastore = MockMetastoreService::new(); - let index_metadata = IndexMetadata::for_test("test-index", "ram:///indexes/test-index"); - let index_uid = index_metadata.index_uid.clone(); - mock_metastore.expect_index_metadata().returning(move |_| { - Ok(IndexMetadataResponse::try_from_index_metadata(&index_metadata).unwrap()) - }); - mock_metastore.expect_list_splits().returning(move |_| { - let splits = vec![ - MockSplitBuilder::new("split_1") - .with_index_uid(&index_uid) - .build(), - MockSplitBuilder::new("split_2") - .with_index_uid(&index_uid) - .build(), - ]; - let splits = ListSplitsResponse::try_from_splits(splits).unwrap(); - Ok(ServiceStream::from(vec![Ok(splits)])) - }); - let mut mock_search_service = MockSearchService::new(); - let (result_sender, result_receiver) = tokio::sync::mpsc::unbounded_channel(); - result_sender.send(Ok(quickwit_proto::search::LeafSearchStreamResponse { - data: b"123".to_vec(), - split_id: "split_1".to_string(), - }))?; - result_sender.send(Err(SearchError::Internal("Error on `split2`".to_string())))?; - mock_search_service - .expect_leaf_search_stream() - .withf(|request| request.split_offsets.len() == 2) // First request. - .return_once( - |_leaf_search_req: quickwit_proto::search::LeafSearchStreamRequest| { - Ok(UnboundedReceiverStream::new(result_receiver)) - }, - ); - mock_search_service - .expect_leaf_search_stream() - .withf(|request| request.split_offsets.len() == 1) // Retry request on the failing split. - .return_once( - |_leaf_search_req: quickwit_proto::search::LeafSearchStreamRequest| { - Err(SearchError::Internal("error again on `split2`".to_string())) - }, - ); - // The test will hang on indefinitely if we don't drop the sender. - drop(result_sender); - - let grpc_addr: SocketAddr = "127.0.0.1:10001".parse()?; - start_test_server(grpc_addr, Arc::new(mock_search_service)).await?; - - let searcher_pool = SearcherPool::default(); - searcher_pool.insert( - grpc_addr, - create_search_client_from_grpc_addr(grpc_addr, ByteSize::mib(1)), - ); - let search_job_placer = SearchJobPlacer::new(searcher_pool); - let cluster_client = ClusterClient::new(search_job_placer.clone()); - let stream = root_search_stream( - request, - MetastoreServiceClient::from_mock(mock_metastore), - cluster_client, - ) - .await?; - let search_stream_result: Result, SearchError> = stream.try_collect().await; - let search_error = search_stream_result.unwrap_err(); - assert_eq!( - search_error.to_string(), - "internal error: `internal error: `error again on `split2```" - ); - Ok(()) - } -} diff --git a/quickwit/quickwit-serve/src/search_api/rest_handler.rs b/quickwit/quickwit-serve/src/search_api/rest_handler.rs index f8a120cc083..557942ff668 100644 --- a/quickwit/quickwit-serve/src/search_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/search_api/rest_handler.rs @@ -15,20 +15,15 @@ use std::convert::TryFrom; use std::sync::Arc; -use futures::stream::StreamExt; use percent_encoding::percent_decode_str; use quickwit_config::validate_index_id_pattern; -use quickwit_proto::ServiceError; -use quickwit_proto::search::{CountHits, OutputFormat, SortField, SortOrder}; -use quickwit_proto::types::IndexId; +use quickwit_proto::search::{CountHits, SortField, SortOrder}; use quickwit_query::query_ast::query_ast_from_user_text; use quickwit_search::{SearchError, SearchPlanResponseRest, SearchResponseRest, SearchService}; -use serde::{Deserialize, Deserializer, Serialize, Serializer, de}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_json::Value as JsonValue; use tracing::info; -use warp::hyper::header::{CONTENT_TYPE, HeaderValue}; -use warp::hyper::{HeaderMap, StatusCode}; -use warp::{Filter, Rejection, Reply, reply}; +use warp::{Filter, Rejection}; use crate::rest_api_response::into_rest_api_response; use crate::simple_list::{from_simple_list, to_simple_list}; @@ -39,13 +34,11 @@ use crate::{BodyFormat, with_arg}; paths( search_get_handler, search_post_handler, - search_stream_handler, search_plan_get_handler, search_plan_post_handler, ), components(schemas( BodyFormat, - OutputFormat, SearchRequestQueryString, SearchResponseRest, SearchPlanResponseRest, @@ -150,25 +143,6 @@ fn default_max_hits() -> u64 { 20 } -// Deserialize a string field and return and error if it's empty. -// We have 2 issues with this implementation: -// - this is not generic and thus nos sustainable and we may need to -// use an external crate for validation in the future like -// this one https://github.com/Keats/validator. -// - the error does not mention the field name and this is not user friendly. There -// is an external crate that can help https://github.com/dtolnay/path-to-error but -// I did not find a way to plug it to serde_qs. -// Conclusion: the best way I found to reject a user query that contains an empty -// string on an mandatory field is this serializer. -fn deserialize_non_empty_string<'de, D>(deserializer: D) -> Result -where D: Deserializer<'de> { - let value = String::deserialize(deserializer)?; - if value.is_empty() { - return Err(de::Error::custom("expected a non-empty string field")); - } - Ok(value) -} - /// This struct represents the QueryString passed to /// the rest API. #[derive( @@ -428,27 +402,6 @@ pub fn search_post_handler( .then(search) } -#[utoipa::path( - get, - tag = "Search", - path = "/{index_id}/search/stream", - responses( - (status = 200, description = "Successfully executed search.") - ), - params( - SearchStreamRequestQueryString, - ("index_id" = String, Path, description = "The index ID to search."), - ) -)] -/// Stream Search Index -pub fn search_stream_handler( - search_service: Arc, -) -> impl Filter + Clone { - search_stream_filter() - .and(with_arg(search_service)) - .then(search_stream) -} - #[utoipa::path( get, tag = "Search", @@ -495,133 +448,9 @@ pub fn search_plan_post_handler( .then(search_plan) } -/// This struct represents the search stream query passed to -/// the REST API. -#[derive(Deserialize, Debug, Eq, PartialEq, utoipa::IntoParams)] -#[into_params(parameter_in = Query)] -#[serde(deny_unknown_fields)] -struct SearchStreamRequestQueryString { - /// Query text. The query language is that of tantivy. - pub query: String, - // Fields to search on. - #[param(rename = "search_field")] - #[serde(default)] - #[serde(rename(deserialize = "search_field"))] - #[serde(deserialize_with = "from_simple_list")] - pub search_fields: Option>, - /// Fields to extract snippet on - #[serde(default)] - #[serde(rename(deserialize = "snippet_fields"))] // TODO: Was this supposed to be `snippet_field`? - CF - #[serde(deserialize_with = "from_simple_list")] - pub snippet_fields: Option>, - /// If set, restricts search to documents with a `timestamp >= start_timestamp`. - pub start_timestamp: Option, - /// If set, restricts search to documents with a `timestamp < end_timestamp``. - pub end_timestamp: Option, - /// The fast field to extract. - #[serde(deserialize_with = "deserialize_non_empty_string")] - pub fast_field: String, - /// The requested output format. - #[serde(default)] - pub output_format: OutputFormat, - #[serde(default)] - pub partition_by_field: Option, -} - -async fn search_stream_endpoint( - index_id: IndexId, - search_request: SearchStreamRequestQueryString, - search_service: &dyn SearchService, -) -> Result { - let query_ast = query_ast_from_user_text(&search_request.query, search_request.search_fields); - let query_ast_json = serde_json::to_string(&query_ast)?; - let request = quickwit_proto::search::SearchStreamRequest { - index_id, - query_ast: query_ast_json, - snippet_fields: search_request.snippet_fields.unwrap_or_default(), - start_timestamp: search_request.start_timestamp, - end_timestamp: search_request.end_timestamp, - fast_field: search_request.fast_field, - output_format: search_request.output_format as i32, - partition_by_field: search_request.partition_by_field, - }; - let mut data = search_service.root_search_stream(request).await?; - let (mut sender, body) = warp::hyper::Body::channel(); - tokio::spawn(async move { - while let Some(result) = data.next().await { - match result { - Ok(bytes) => { - if sender.send_data(bytes).await.is_err() { - sender.abort(); - break; - } - } - Err(error) => { - // Add trailer to signal to the client that there is an error. Only works - // if the request is made with an http2 client that can read it... and - // actually this seems pretty rare, for example `curl` will not show this - // trailer. Thus we also call `sender.abort()` so that the - // client will see something wrong happened. But he will - // need to look at the logs to understand that. - tracing::error!(error=?error, "error when streaming search results"); - let header_value_str = - format!("Error when streaming search results: {error:?}."); - let header_value = HeaderValue::from_str(header_value_str.as_str()) - .unwrap_or_else(|_| HeaderValue::from_static("Search stream error")); - let mut trailers = HeaderMap::new(); - trailers.insert("X-Stream-Error", header_value); - let _ = sender.send_trailers(trailers).await; - sender.abort(); - break; - } - }; - } - }); - Ok(body) -} - -fn make_streaming_reply(result: Result) -> impl Reply { - let status_code: StatusCode; - let body = match result { - Ok(body) => { - status_code = StatusCode::OK; - warp::reply::Response::new(body) - } - Err(error) => { - status_code = - crate::convert_status_code_to_legacy_http(error.error_code().http_status_code()); - warp::reply::Response::new(warp::hyper::Body::from(error.to_string())) - } - }; - reply::with_status(body, status_code) -} - -async fn search_stream( - index_id: IndexId, - request: SearchStreamRequestQueryString, - search_service: Arc, -) -> impl warp::Reply { - info!(index_id=%index_id,request=?request, "search_stream"); - let content_type = match request.output_format { - OutputFormat::ClickHouseRowBinary => "application/octet-stream", - OutputFormat::Csv => "text/csv", - }; - let reply = - make_streaming_reply(search_stream_endpoint(index_id, request, &*search_service).await); - reply::with_header(reply, CONTENT_TYPE, content_type) -} - -fn search_stream_filter() --> impl Filter + Clone { - warp::path!(String / "search" / "stream") - .and(warp::get()) - .and(serde_qs::warp::query(serde_qs::Config::default())) -} - #[cfg(test)] mod tests { use assert_json_diff::{assert_json_eq, assert_json_include}; - use bytes::Bytes; use mockall::predicate; use quickwit_search::{MockSearchService, SearchError}; use serde_json::{Value as JsonValue, json}; @@ -635,7 +464,6 @@ mod tests { let mock_search_service_in_arc = Arc::new(mock_search_service); search_get_handler(mock_search_service_in_arc.clone()) .or(search_post_handler(mock_search_service_in_arc.clone())) - .or(search_stream_handler(mock_search_service_in_arc.clone())) .or(search_plan_get_handler(mock_search_service_in_arc.clone())) .or(search_plan_post_handler(mock_search_service_in_arc.clone())) .recover(recover_fn) @@ -1163,110 +991,6 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_rest_search_stream_api() { - let mut mock_search_service = MockSearchService::new(); - mock_search_service - .expect_root_search_stream() - .return_once(|_| { - Ok(Box::pin(futures::stream::iter(vec![ - Ok(Bytes::from("first row\n")), - Ok(Bytes::from("second row")), - ]))) - }); - let rest_search_stream_api_handler = search_handler(mock_search_service); - let response = warp::test::request() - .path( - "/my-index/search/stream?query=obama&search_field=body&fast_field=external_id&\ - output_format=csv", - ) - .reply(&rest_search_stream_api_handler) - .await; - assert_eq!(response.status(), 200); - let body = String::from_utf8_lossy(response.body()); - assert_eq!(body, "first row\nsecond row"); - } - - #[tokio::test] - async fn test_rest_search_stream_api_csv() { - let (index, req) = warp::test::request() - .path("/my-index/search/stream?query=obama&fast_field=external_id&output_format=csv") - .filter(&super::search_stream_filter()) - .await - .unwrap(); - assert_eq!(&index, "my-index"); - assert_eq!( - &req, - &super::SearchStreamRequestQueryString { - query: "obama".to_string(), - search_fields: None, - snippet_fields: None, - start_timestamp: None, - end_timestamp: None, - fast_field: "external_id".to_string(), - output_format: OutputFormat::Csv, - partition_by_field: None, - } - ); - } - - #[tokio::test] - async fn test_rest_search_stream_api_click_house_row_binary() { - let (index, req) = warp::test::request() - .path( - "/my-index/search/stream?query=obama&fast_field=external_id&\ - output_format=click_house_row_binary", - ) - .filter(&super::search_stream_filter()) - .await - .unwrap(); - assert_eq!(&index, "my-index"); - assert_eq!( - &req, - &super::SearchStreamRequestQueryString { - query: "obama".to_string(), - search_fields: None, - snippet_fields: None, - start_timestamp: None, - end_timestamp: None, - fast_field: "external_id".to_string(), - output_format: OutputFormat::ClickHouseRowBinary, - partition_by_field: None, - } - ); - } - - #[tokio::test] - async fn test_rest_search_stream_api_error() { - let rejection = warp::test::request() - .path( - "/my-index/search/stream?query=obama&fast_field=external_id&\ - output_format=ClickHouseRowBinary", - ) - .filter(&super::search_stream_filter()) - .await - .unwrap_err(); - let parse_error = rejection.find::().unwrap(); - assert_eq!( - parse_error.to_string(), - "unknown variant `ClickHouseRowBinary`, expected `csv` or `click_house_row_binary`" - ); - } - - #[tokio::test] - async fn test_rest_search_stream_api_error_empty_fastfield() { - let rejection = warp::test::request() - .path( - "/my-index/search/stream?query=obama&fast_field=&\ - output_format=click_house_row_binary", - ) - .filter(&super::search_stream_filter()) - .await - .unwrap_err(); - let parse_error = rejection.find::().unwrap(); - assert_eq!(parse_error.to_string(), "expected a non-empty string field"); - } - #[tokio::test] async fn test_rest_search_api_route_serialize_results_with_snippet() -> anyhow::Result<()> { let mut mock_search_service = MockSearchService::new(); From 344a19c28f11f48e157743a6d223de18d70a5528 Mon Sep 17 00:00:00 2001 From: Abdul Andha <86802346+Abdul-Andha@users.noreply.github.com> Date: Tue, 9 Sep 2025 16:57:34 -0400 Subject: [PATCH 6/9] Upgrade Warp (#5870) --- LICENSE-3rdparty.csv | 8 - quickwit/Cargo.lock | 266 +++++------------- quickwit/Cargo.toml | 30 +- quickwit/quickwit-cli/Cargo.toml | 1 + quickwit/quickwit-cli/src/main.rs | 4 + quickwit/quickwit-config/Cargo.toml | 2 +- .../quickwit-config/src/node_config/mod.rs | 2 +- .../src/node_config/serialize.rs | 2 +- .../src/source/queue_sources/sqs_queue.rs | 16 +- .../quickwit-integration-tests/Cargo.toml | 1 + .../src/test_utils/cluster_sandbox.rs | 4 + quickwit/quickwit-lambda/README.md | 2 +- quickwit/quickwit-serve/Cargo.toml | 4 +- .../src/developer_api/heap_prof.rs | 5 +- .../src/elasticsearch_api/filter.rs | 26 +- .../src/elasticsearch_api/model/error.rs | 12 +- quickwit/quickwit-serve/src/format.rs | 3 +- .../src/index_api/index_resource.rs | 8 +- .../src/index_api/rest_handler.rs | 2 +- .../src/index_api/source_resource.rs | 2 +- .../src/index_api/split_resource.rs | 2 +- .../src/ingest_api/rest_handler.rs | 4 +- .../src/jaeger_api/rest_handler.rs | 2 +- quickwit/quickwit-serve/src/lib.rs | 5 - quickwit/quickwit-serve/src/rest.rs | 266 +++++------------- .../quickwit-serve/src/rest_api_response.rs | 4 +- .../src/search_api/rest_handler.rs | 6 +- quickwit/quickwit-serve/src/ui_handler.rs | 2 +- 28 files changed, 207 insertions(+), 484 deletions(-) diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 95fb38524ac..9e1bab93930 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -203,7 +203,6 @@ htmlescape,https://github.com/veddan/rust-htmlescape,Apache-2.0 OR MIT OR MP http,https://github.com/hyperium/http,MIT OR Apache-2.0,"Alex Crichton , Carl Lerche , Sean McArthur " http-body,https://github.com/hyperium/http-body,MIT,"Carl Lerche , Lucio Franco , Sean McArthur " http-body-util,https://github.com/hyperium/http-body,MIT,"Carl Lerche , Lucio Franco , Sean McArthur " -http-range-header,https://github.com/MarcusGrass/parse-range-headers,MIT,The http-range-header Authors http-serde,https://gitlab.com/kornelski/http-serde,Apache-2.0 OR MIT,Kornel httparse,https://github.com/seanmonstar/httparse,MIT OR Apache-2.0,Sean McArthur httpdate,https://github.com/pyfisch/httpdate,MIT OR Apache-2.0,Pyfisch @@ -242,7 +241,6 @@ jobserver,https://github.com/rust-lang/jobserver-rs,MIT OR Apache-2.0,Alex Crich js-sys,https://github.com/wasm-bindgen/wasm-bindgen/tree/master/crates/js-sys,MIT OR Apache-2.0,The wasm-bindgen Developers json_comments,https://github.com/tmccombs/json-comments-rs,Apache-2.0,Thayne McCombs lazy_static,https://github.com/rust-lang-nursery/lazy-static.rs,MIT OR Apache-2.0,Marvin Löbel -lazycell,https://github.com/indiv0/lazycell,MIT OR Apache-2.0,"Alex Crichton , Nikita Pekin " levenshtein_automata,https://github.com/tantivy-search/levenshtein-automata,MIT,Paul Masurel libc,https://github.com/rust-lang/libc,MIT OR Apache-2.0,The Rust Project Developers libloading,https://github.com/nagisa/rust_libloading,ISC,Simonas Kazlauskas @@ -287,7 +285,6 @@ mio,https://github.com/tokio-rs/mio,MIT,"Carl Lerche , Thomas mockall,https://github.com/asomers/mockall,MIT OR Apache-2.0,Alan Somers mockall_derive,https://github.com/asomers/mockall,MIT OR Apache-2.0,Alan Somers mrecordlog,https://github.com/quickwit-oss/mrecordlog,MIT,The mrecordlog Authors -multer,https://github.com/rousan/multer-rs,MIT,Rousan Ali multimap,https://github.com/havarnov/multimap,MIT OR Apache-2.0,Håvar Nøvik murmurhash32,https://github.com/quickwit-inc/murmurhash32,MIT,Paul Masurel nanorand,https://github.com/Absolucy/nanorand-rs,Zlib,Lucy @@ -402,7 +399,6 @@ rust-embed-impl,https://github.com/pyros2097/rust-embed,MIT,pyros2097 rust-stemmers,https://github.com/CurrySoftware/rust-stemmers,MIT OR BSD-3-Clause,"Jakob Demler , CurrySoftware " rustc-demangle,https://github.com/rust-lang/rustc-demangle,MIT OR Apache-2.0,Alex Crichton -rustc-hash,https://github.com/rust-lang-nursery/rustc-hash,Apache-2.0 OR MIT,The Rust Project Developers rustc-hash,https://github.com/rust-lang/rustc-hash,Apache-2.0 OR MIT,The Rust Project Developers rustix,https://github.com/bytecodealliance/rustix,Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT,"Dan Gohman , Jakub Konka " rustls,https://github.com/rustls/rustls,Apache-2.0 OR ISC OR MIT,The rustls Authors @@ -494,7 +490,6 @@ tokio-macros,https://github.com/tokio-rs/tokio,MIT,Tokio Contributors tokio-rustls,https://github.com/rustls/tokio-rustls,MIT OR Apache-2.0,The tokio-rustls Authors tokio-stream,https://github.com/tokio-rs/tokio,MIT,Tokio Contributors -tokio-tungstenite,https://github.com/snapview/tokio-tungstenite,MIT,"Daniel Abramov , Alexey Galakhov " tokio-util,https://github.com/tokio-rs/tokio,MIT,Tokio Contributors toml,https://github.com/toml-rs/toml,MIT OR Apache-2.0,Alex Crichton toml_datetime,https://github.com/toml-rs/toml,MIT OR Apache-2.0,The toml_datetime Authors @@ -516,7 +511,6 @@ tracing-serde,https://github.com/tokio-rs/tracing,MIT,Tokio Contributors , David Barsky , Tokio Contributors " try-lock,https://github.com/seanmonstar/try-lock,MIT,Sean McArthur ttl_cache,https://github.com/stusmall/ttl_cache,MIT OR Apache-2.0,Stu Small -tungstenite,https://github.com/snapview/tungstenite-rs,MIT OR Apache-2.0,"Alexey Galakhov, Daniel Abramov" typenum,https://github.com/paholg/typenum,MIT OR Apache-2.0,"Paho Lurie-Gregg , Andre Bogus " ulid,https://github.com/dylanhart/ulid-rs,MIT,dylanhart unarray,https://github.com/cameron1024/unarray,MIT OR Apache-2.0,The unarray Authors @@ -528,7 +522,6 @@ untrusted,https://github.com/briansmith/untrusted,ISC,Brian Smith , Bertram Truong " username,https://pijul.org/darcs/user,MIT OR Apache-2.0,Pierre-Étienne Meunier -utf-8,https://github.com/SimonSapin/rust-utf8,MIT OR Apache-2.0,Simon Sapin utf8-ranges,https://github.com/BurntSushi/utf8-ranges,Unlicense OR MIT,Andrew Gallant utf8_iter,https://github.com/hsivonen/utf8_iter,Apache-2.0 OR MIT,Henri Sivonen utf8parse,https://github.com/alacritty/vte,Apache-2.0 OR MIT,"Joe Wilm , Christian Duerr " @@ -556,7 +549,6 @@ wasm-timer,https://github.com/tomaka/wasm-timer,MIT,Pierre Krieger whichlang,https://github.com/quickwit-oss/whichlang,MIT,"Quickwit, Inc. " winapi,https://github.com/retep998/winapi-rs,MIT,Peter Atashian winapi,https://github.com/retep998/winapi-rs,MIT OR Apache-2.0,Peter Atashian diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index b732a7ae330..a5460b06d59 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -300,7 +300,7 @@ dependencies = [ "futures-lite 2.6.1", "parking", "polling", - "rustix 1.1.1", + "rustix 1.1.2", "slab", "windows-sys 0.60.2", ] @@ -331,7 +331,7 @@ dependencies = [ "cfg-if", "event-listener 5.4.1", "futures-lite 2.6.1", - "rustix 1.1.1", + "rustix 1.1.2", ] [[package]] @@ -346,7 +346,7 @@ dependencies = [ "cfg-if", "futures-core", "futures-io", - "rustix 1.1.1", + "rustix 1.1.2", "signal-hook-registry", "slab", "windows-sys 0.60.2", @@ -468,9 +468,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.13.3" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba" +checksum = "94b8ff6c09cd57b16da53641caa860168b88c172a5ee163b0288d3d6eea12786" dependencies = [ "aws-lc-sys", "zeroize", @@ -478,11 +478,11 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.30.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbfd150b5dbdb988bcc8fb1fe787eb6b7ee6180ca24da683b61ea5405f3d43ff" +checksum = "0e44d16778acaf6a9ec9899b92cebd65580b83f685446bf2e1f5d3d732f99dcd" dependencies = [ - "bindgen 0.69.5", + "bindgen", "cc", "cmake", "dunce", @@ -1296,29 +1296,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bindgen" -version = "0.69.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" -dependencies = [ - "bitflags 2.9.4", - "cexpr", - "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn 2.0.106", - "which", -] - [[package]] name = "bindgen" version = "0.72.1" @@ -1329,10 +1306,12 @@ dependencies = [ "cexpr", "clang-sys", "itertools 0.13.0", + "log", + "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash 2.1.1", + "rustc-hash", "shlex", "syn 2.0.106", ] @@ -1348,7 +1327,7 @@ dependencies = [ "miniserde", "peakmem-alloc", "perf-event", - "rustc-hash 2.1.1", + "rustc-hash", "rustop", "unicode-width 0.1.14", "yansi", @@ -3591,14 +3570,14 @@ dependencies = [ [[package]] name = "headers" -version = "0.3.9" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06683b93020a07e3dbcf5f8c0f6d40080d725bea7936fc01ad345c01b97dc270" +checksum = "b3314d5adb5d94bcdf56771f2e50dbbc80bb4bdf88967526706205ac9eff24eb" dependencies = [ - "base64 0.21.7", + "base64 0.22.1", "bytes", "headers-core", - "http 0.2.12", + "http 1.3.1", "httpdate", "mime", "sha1", @@ -3606,11 +3585,11 @@ dependencies = [ [[package]] name = "headers-core" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" +checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" dependencies = [ - "http 0.2.12", + "http 1.3.1", ] [[package]] @@ -3751,19 +3730,13 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "http-range-header" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "add0ab9360ddbd88cfeb3bd9574a1d85cfdfa14db10b3e21d3700dbc4328758f" - [[package]] name = "http-serde" -version = "1.1.3" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f560b665ad9f1572cfcaf034f7fb84338a7ce945216d64a90fd81f046a3caee" +checksum = "0f056c8559e3757392c8d091e796416e4649d8e49e88b8d76df6c002f05027fd" dependencies = [ - "http 0.2.12", + "http 1.3.1", "serde", ] @@ -4286,15 +4259,6 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -4432,12 +4396,6 @@ dependencies = [ "spin 0.9.8", ] -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "levenshtein_automata" version = "0.2.1" @@ -4739,12 +4697,6 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" -[[package]] -name = "linux-raw-sys" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" - [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -5034,24 +4986,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "multer" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" -dependencies = [ - "bytes", - "encoding_rs", - "futures-util", - "http 0.2.12", - "httparse", - "log", - "memchr", - "mime", - "spin 0.9.8", - "version_check", -] - [[package]] name = "multimap" version = "0.10.1" @@ -6185,7 +6119,7 @@ dependencies = [ "concurrent-queue", "hermit-abi", "pin-project-lite", - "rustix 1.1.1", + "rustix 1.1.2", "windows-sys 0.60.2", ] @@ -6765,6 +6699,7 @@ dependencies = [ "quickwit-storage", "quickwit-telemetry", "reqwest 0.12.23", + "rustls 0.23.31", "serde_json", "tabled", "tempfile", @@ -6901,7 +6836,7 @@ dependencies = [ "chrono", "cron", "enum-iterator", - "http 0.2.12", + "http 1.3.1", "http-serde", "humantime", "itertools 0.14.0", @@ -7163,6 +7098,7 @@ dependencies = [ "quickwit-storage", "rand 0.8.5", "reqwest 0.12.23", + "rustls 0.23.31", "serde_json", "tempfile", "tokio", @@ -7365,7 +7301,7 @@ dependencies = [ "quickwit-common", "quickwit-datetime", "regex", - "rustc-hash 2.1.1", + "rustc-hash", "serde", "serde_json", "serde_with", @@ -7467,6 +7403,8 @@ dependencies = [ "glob", "hex", "http 1.3.1", + "http-body 1.0.1", + "http-body-util", "http-serde", "humantime", "hyper 1.7.0", @@ -7501,8 +7439,8 @@ dependencies = [ "quickwit-telemetry", "regex", "rust-embed", - "rustls 0.21.12", - "rustls-pemfile", + "rustls 0.23.31", + "rustls-pemfile 2.2.0", "serde", "serde_json", "serde_qs 0.12.0", @@ -7511,14 +7449,14 @@ dependencies = [ "thiserror 2.0.16", "time", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls 0.26.2", "tokio-stream", "tokio-util", "tonic 0.13.1", "tonic-health", "tonic-reflection", "tower 0.5.2", - "tower-http 0.4.4", + "tower-http", "tracing", "utoipa", "warp", @@ -7605,7 +7543,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.1.1", + "rustc-hash", "rustls 0.23.31", "socket2 0.6.0", "thiserror 2.0.16", @@ -7625,7 +7563,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring 0.17.14", - "rustc-hash 2.1.1", + "rustc-hash", "rustls 0.23.31", "rustls-pki-types", "slab", @@ -7985,7 +7923,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls 0.21.12", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", @@ -8037,7 +7975,7 @@ dependencies = [ "tokio-rustls 0.26.2", "tokio-util", "tower 0.5.2", - "tower-http 0.6.6", + "tower-http", "tower-service", "url", "wasm-bindgen", @@ -8283,12 +8221,6 @@ version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hash" version = "2.1.1" @@ -8319,15 +8251,14 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9621e389a110cae094269936383d69b869492f03e5c1ed2d575a53c029d4441d" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ "bitflags 2.9.4", "errno", "libc", "linux-raw-sys 0.11.0", - "linux-raw-sys 0.9.4", "windows-sys 0.61.0", ] @@ -8366,7 +8297,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" dependencies = [ "openssl-probe", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "schannel", "security-framework 2.11.1", ] @@ -8392,6 +8323,15 @@ dependencies = [ "base64 0.21.7", ] +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "rustls-pki-types" version = "1.12.0" @@ -8780,12 +8720,9 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0431a35568651e363364210c91983c1da5eb29404d9f0928b67d4ebcfa7d330c" dependencies = [ - "futures", "percent-encoding", "serde", "thiserror 1.0.69", - "tracing", - "warp", ] [[package]] @@ -9205,7 +9142,7 @@ dependencies = [ "paste", "percent-encoding", "rustls 0.21.12", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "serde", "serde_json", "sha2", @@ -9630,7 +9567,7 @@ dependencies = [ "rayon", "regex", "rust-stemmers", - "rustc-hash 2.1.1", + "rustc-hash", "serde", "serde_json", "sketches-ddsketch", @@ -9755,15 +9692,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.21.0" +version = "3.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e" +checksum = "84fa4d11fadde498443cca10fd3ac23c951f0dc59e080e9f4b93d4df4e4eea53" dependencies = [ "fastrand 2.3.0", "getrandom 0.3.3", "once_cell", - "rustix 1.1.1", - "windows-sys 0.60.2", + "rustix 1.1.2", + "windows-sys 0.61.0", ] [[package]] @@ -10061,18 +9998,6 @@ dependencies = [ "tokio-util", ] -[[package]] -name = "tokio-tungstenite" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c83b561d025642014097b66e6c1bb422783339e0909e4429cde4749d1990bc38" -dependencies = [ - "futures-util", - "log", - "tokio", - "tungstenite", -] - [[package]] name = "tokio-util" version = "0.7.16" @@ -10157,7 +10082,7 @@ dependencies = [ "percent-encoding", "pin-project", "prost 0.11.9", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "tokio", "tokio-rustls 0.24.1", "tokio-stream", @@ -10309,40 +10234,23 @@ dependencies = [ "tracing", ] -[[package]] -name = "tower-http" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c5bb1d698276a2443e5ecfabc1008bf15a36c12e6a7176e7bf089ea9131140" -dependencies = [ - "async-compression", - "bitflags 2.9.4", - "bytes", - "futures-core", - "futures-util", - "http 0.2.12", - "http-body 0.4.6", - "http-range-header", - "pin-project-lite", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", -] - [[package]] name = "tower-http" version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" dependencies = [ + "async-compression", "bitflags 2.9.4", "bytes", + "futures-core", "futures-util", "http 1.3.1", "http-body 1.0.1", "iri-string", "pin-project-lite", + "tokio", + "tokio-util", "tower 0.5.2", "tower-layer", "tower-service", @@ -10469,25 +10377,6 @@ dependencies = [ "linked-hash-map", ] -[[package]] -name = "tungstenite" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ef1a641ea34f399a848dea702823bbecfb4c486f911735368f1f137cb8257e1" -dependencies = [ - "byteorder", - "bytes", - "data-encoding", - "http 1.3.1", - "httparse", - "log", - "rand 0.8.5", - "sha1", - "thiserror 1.0.69", - "url", - "utf-8", -] - [[package]] name = "typenum" version = "1.18.0" @@ -10659,12 +10548,6 @@ dependencies = [ "winapi 0.2.8", ] -[[package]] -name = "utf-8" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" - [[package]] name = "utf8-ranges" version = "1.0.5" @@ -10910,20 +10793,22 @@ dependencies = [ [[package]] name = "warp" -version = "0.3.7" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4378d202ff965b011c64817db11d5829506d3404edeadb61f190d111da3f231c" +checksum = "51d06d9202adc1f15d709c4f4a2069be5428aa912cc025d6f268ac441ab066b0" dependencies = [ "bytes", "futures-channel", "futures-util", "headers", - "http 0.2.12", - "hyper 0.14.32", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "hyper 1.7.0", + "hyper-util", "log", "mime", "mime_guess", - "multer", "percent-encoding", "pin-project", "scoped-tls", @@ -10931,7 +10816,6 @@ dependencies = [ "serde_json", "serde_urlencoded", "tokio", - "tokio-tungstenite", "tokio-util", "tower-service", "tracing", @@ -11117,18 +11001,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix 0.38.44", -] - [[package]] name = "whichlang" version = "0.1.1" @@ -11689,7 +11561,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af3a19837351dc82ba89f8a125e22a3c475f05aba604acc023d62b2739ae2909" dependencies = [ "libc", - "rustix 1.1.1", + "rustix 1.1.2", ] [[package]] @@ -11880,7 +11752,7 @@ version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ - "bindgen 0.72.1", + "bindgen", "cc", "pkg-config", ] diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index a68147f782a..cc1f181d196 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -122,17 +122,17 @@ hex = "0.4" home = "0.5" hostname = "0.3" http = "1.3" -http-body-util = "0.1.3" -http-serde = "1.1" +http-body = "1.0" +http-body-util = "0.1" +http-serde = "2.1" humantime = "2.2" -hyper = { version = "1.6", features = ["client", "http1", "http2", "server"] } +hyper = { version = "1.7", features = ["client", "http1", "http2", "server"] } hyper-rustls = "0.27" hyper-util = { version = "0.1", features = ["full"] } indexmap = { version = "2.9", features = ["serde"] } indicatif = "0.17" itertools = "0.14" json_comments = "0.2" -legacy-http = { version = "0.2", package = "http" } libz-sys = "1.1" lindera-core = "0.27" lindera-dictionary = "0.27" @@ -204,8 +204,8 @@ reqwest-middleware = "0.4" reqwest-retry = "0.7" rust-embed = "6.8" rustc-hash = "2.1" -rustls = "0.21" -rustls-pemfile = "1.0" +rustls = "0.23" +rustls-pemfile = "2.2" sea-query = { version = "0.30" } sea-query-binder = { version = "0.5", features = [ "runtime-tokio-rustls", @@ -215,7 +215,7 @@ sea-query-binder = { version = "0.5", features = [ serde = { version = "1.0.219", features = ["derive", "rc"] } serde_json = "1.0" serde_json_borrow = "0.5" -serde_qs = { version = "0.12", features = ["warp"] } +serde_qs = { version = "0.12" } serde_with = "3.12" serde_yaml = "0.9" serial_test = { version = "3.2", features = ["file_locks"] } @@ -239,7 +239,7 @@ tikv-jemallocator = "0.5" time = { version = "0.3", features = ["std", "formatting", "macros"] } tokio = { version = "1.45", features = ["full"] } tokio-metrics = { version = "0.3", features = ["rt"] } -tokio-rustls = { version = "0.24", default-features = false } +tokio-rustls = { version = "0.26", default-features = false } tokio-stream = { version = "0.1", features = ["sync"] } tokio-util = { version = "0.7", features = ["full"] } toml = "0.7" @@ -260,7 +260,7 @@ tower = { version = "0.5", features = [ "util", ] } # legacy version because of warp -tower-http = { version = "0.4", features = [ +tower-http = { version = "0.6", features = [ "compression-gzip", "compression-zstd", "cors", @@ -285,20 +285,20 @@ vrl = { version = "0.22", default-features = false, features = [ "stdlib", "value", ] } -warp = "0.3" +warp = { version = "0.4", features = ["server", "test"] } whichlang = "0.1" wiremock = "0.6" zstd = "0.13" -aws-config = "1.6" +aws-config = "1.8" aws-credential-types = { version = "1.2", features = ["hardcoded-credentials"] } aws-runtime = "1.5" -aws-sdk-kinesis = "1.74" +aws-sdk-kinesis = "1.86" aws-sdk-s3 = "=1.62" -aws-sdk-sqs = "1.70" +aws-sdk-sqs = "1.82" aws-smithy-async = "1.2" -aws-smithy-http-client = { version = "1.0" } -aws-smithy-runtime = "1.8" +aws-smithy-http-client = { version = "1.1", features = ["default-client"] } +aws-smithy-runtime = "1.9" aws-smithy-types = { version = "1.3", features = [ "byte-stream-poll-next", "http-body-1-x", diff --git a/quickwit/quickwit-cli/Cargo.toml b/quickwit/quickwit-cli/Cargo.toml index 2ddbd21baae..c595cb7e90a 100644 --- a/quickwit/quickwit-cli/Cargo.toml +++ b/quickwit/quickwit-cli/Cargo.toml @@ -40,6 +40,7 @@ opentelemetry = { workspace = true } opentelemetry_sdk = { workspace = true } opentelemetry-otlp = { workspace = true } reqwest = { workspace = true } +rustls = { workspace = true } serde_json = { workspace = true } tabled = { workspace = true } tempfile = { workspace = true } diff --git a/quickwit/quickwit-cli/src/main.rs b/quickwit/quickwit-cli/src/main.rs index 989752dfd05..691725eab4e 100644 --- a/quickwit/quickwit-cli/src/main.rs +++ b/quickwit/quickwit-cli/src/main.rs @@ -89,6 +89,10 @@ async fn main_impl() -> anyhow::Result<()> { } }; + rustls::crypto::ring::default_provider() + .install_default() + .expect("rustls crypto ring default provider installation should not fail"); + #[cfg(feature = "jemalloc")] start_jemalloc_metrics_loop(); diff --git a/quickwit/quickwit-config/Cargo.toml b/quickwit/quickwit-config/Cargo.toml index df52a40ced7..7cf75818444 100644 --- a/quickwit/quickwit-config/Cargo.toml +++ b/quickwit/quickwit-config/Cargo.toml @@ -17,11 +17,11 @@ bytesize = { workspace = true } chrono = { workspace = true } cron = { workspace = true } enum-iterator = { workspace = true } +http = { workspace = true } http-serde = { workspace = true } humantime = { workspace = true } itertools = { workspace = true } json_comments = { workspace = true } -legacy-http = { workspace = true } new_string_template = { workspace = true } once_cell = { workspace = true } regex = { workspace = true } diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index 5b5176fda7f..bb8a17daaeb 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -23,7 +23,7 @@ use std::time::Duration; use anyhow::{bail, ensure}; use bytesize::ByteSize; -use legacy_http::HeaderMap; +use http::HeaderMap; use quickwit_common::net::HostAddr; use quickwit_common::shared_consts::{ DEFAULT_SHARD_BURST_LIMIT, DEFAULT_SHARD_SCALE_UP_FACTOR, DEFAULT_SHARD_THROUGHPUT_LIMIT, diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs index ab8ae75243d..b5f39ceb0ac 100644 --- a/quickwit/quickwit-config/src/node_config/serialize.rs +++ b/quickwit/quickwit-config/src/node_config/serialize.rs @@ -19,7 +19,7 @@ use std::time::Duration; use anyhow::{Context, bail}; use bytesize::ByteSize; -use legacy_http::HeaderMap; +use http::HeaderMap; use quickwit_common::fs::get_disk_size; use quickwit_common::net::{Host, find_private_ip, get_short_hostname}; use quickwit_common::new_coolid; diff --git a/quickwit/quickwit-indexing/src/source/queue_sources/sqs_queue.rs b/quickwit/quickwit-indexing/src/source/queue_sources/sqs_queue.rs index 1e4c55142ac..d9159de1a35 100644 --- a/quickwit/quickwit-indexing/src/source/queue_sources/sqs_queue.rs +++ b/quickwit/quickwit-indexing/src/source/queue_sources/sqs_queue.rs @@ -318,21 +318,21 @@ pub mod test_helpers { /// /// Returns the queue URL to use for the source and a guard for the /// temporary mock server - pub fn start_mock_sqs_get_queue_attributes_endpoint() -> (String, oneshot::Sender<()>) { + pub async fn start_mock_sqs_get_queue_attributes_endpoint() -> (String, oneshot::Sender<()>) { let hello = warp::path!().map(|| "{}"); let (tx, rx) = oneshot::channel(); - let (addr, server) = - warp::serve(hello).bind_with_graceful_shutdown(([127, 0, 0, 1], 0), async { - rx.await.ok(); - }); - tokio::spawn(server); - let queue_url = format!("http://{}:{}/", addr.ip(), addr.port()); + let server = warp::serve(hello).bind(([127, 0, 0, 1], 0)).await; + let signal_future = async { + rx.await.ok(); + }; + server.graceful(signal_future); + let queue_url = "http://127.0.0.1:0/".to_string(); (queue_url, tx) } #[tokio::test] async fn test_mock_sqs_get_queue_attributes_endpoint() { - let (queue_url, _shutdown) = start_mock_sqs_get_queue_attributes_endpoint(); + let (queue_url, _shutdown) = start_mock_sqs_get_queue_attributes_endpoint().await; check_connectivity(&queue_url).await.unwrap(); drop(_shutdown); check_connectivity(&queue_url).await.unwrap_err(); diff --git a/quickwit/quickwit-integration-tests/Cargo.toml b/quickwit/quickwit-integration-tests/Cargo.toml index 23f2d884718..55308cff556 100644 --- a/quickwit/quickwit-integration-tests/Cargo.toml +++ b/quickwit/quickwit-integration-tests/Cargo.toml @@ -27,6 +27,7 @@ hyper-util = { workspace = true } itertools = { workspace = true } rand = { workspace = true } reqwest = { workspace = true } +rustls = { workspace = true } serde_json = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true } diff --git a/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs b/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs index 81889a9311a..36fbadbbcea 100644 --- a/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs +++ b/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs @@ -176,6 +176,10 @@ pub struct ResolvedClusterConfig { impl ResolvedClusterConfig { /// Start a cluster using this config and waits for the nodes to be ready pub async fn start(self) -> ClusterSandbox { + rustls::crypto::ring::default_provider() + .install_default() + .expect("rustls crypto ring default provider installation should not fail"); + let mut node_shutdown_handles = Vec::new(); let runtimes_config = RuntimesConfig::light_for_tests(); let storage_resolver = StorageResolver::unconfigured(); diff --git a/quickwit/quickwit-lambda/README.md b/quickwit/quickwit-lambda/README.md index 26aacdb0a0c..88fa9c8748a 100644 --- a/quickwit/quickwit-lambda/README.md +++ b/quickwit/quickwit-lambda/README.md @@ -1,4 +1,4 @@ # Deprecation This package was removed in Q3 2025. The maintenance burden was high and the -feature was unused. +feature was unused. \ No newline at end of file diff --git a/quickwit/quickwit-serve/Cargo.toml b/quickwit/quickwit-serve/Cargo.toml index 0d31bfd3db1..99c8fe3922d 100644 --- a/quickwit/quickwit-serve/Cargo.toml +++ b/quickwit/quickwit-serve/Cargo.toml @@ -23,6 +23,8 @@ futures-util = { workspace = true } glob = { workspace = true } hex = { workspace = true } http = { workspace = true } +http-body = { workspace = true } +http-body-util = { workspace = true } http-serde = { workspace = true } humantime = { workspace = true } hyper = { workspace = true } @@ -55,7 +57,7 @@ tower = { workspace = true, features = ["limit"] } tower-http = { workspace = true } tracing = { workspace = true } utoipa = { workspace = true } -warp = { workspace = true } +warp = { workspace = true, features = ["server"] } zstd = { workspace = true } quickwit-actors = { workspace = true } diff --git a/quickwit/quickwit-serve/src/developer_api/heap_prof.rs b/quickwit/quickwit-serve/src/developer_api/heap_prof.rs index a1d3cd3d224..a6b833bc4ac 100644 --- a/quickwit/quickwit-serve/src/developer_api/heap_prof.rs +++ b/quickwit/quickwit-serve/src/developer_api/heap_prof.rs @@ -35,7 +35,7 @@ pub fn heap_prof_handlers() async fn start_profiler_handler( params: ProfilerQueryParams, - ) -> Result, warp::Rejection> { + ) -> Result { start_profiling(params.min_alloc_size, params.backtrace_every); let response = warp::reply::with_status("Heap profiling started", warp::http::StatusCode::OK) @@ -43,8 +43,7 @@ pub fn heap_prof_handlers() Ok(response) } - async fn stop_profiler_handler() - -> Result, warp::Rejection> { + async fn stop_profiler_handler() -> Result { stop_profiling(); let response = warp::reply::with_status("Heap profiling stopped", warp::http::StatusCode::OK) diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs b/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs index 3104ded1b68..b8d2343f666 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs @@ -50,7 +50,7 @@ pub(crate) fn elasticsearch_filter() -> impl Filter + Clone { warp::path!("_elastic" / "_search") .and(warp::get().or(warp::post()).unify()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) } #[utoipa::path( @@ -74,7 +74,7 @@ pub(crate) fn elastic_bulk_filter( content_length_limit.as_u64(), )) .and(get_body_bytes()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) } #[utoipa::path( @@ -98,9 +98,7 @@ pub(crate) fn elastic_index_bulk_filter( content_length_limit.as_u64(), )) .and(get_body_bytes()) - .and(serde_qs::warp::query::( - serde_qs::Config::default(), - )) + .and(warp::query::()) } /// Like the warp json filter, but accepts an empty body and interprets it as `T::default`. @@ -138,7 +136,7 @@ pub(crate) fn elastic_index_field_capabilities_filter() -> impl Filter< warp::path!("_elastic" / String / "_field_caps") .and_then(extract_index_id_patterns) .and(warp::get().or(warp::post()).unify()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) .and(json_or_empty()) } @@ -154,7 +152,7 @@ pub(crate) fn elastic_field_capabilities_filter() -> impl Filter< warp::path!("_elastic" / "_field_caps") .and_then(extract_index_id_patterns_default) .and(warp::get().or(warp::post()).unify()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) .and(json_or_empty()) } @@ -173,7 +171,7 @@ pub(crate) fn elastic_index_count_filter() warp::path!("_elastic" / String / "_count") .and_then(extract_index_id_patterns) .and(warp::get().or(warp::post()).unify()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) .and(json_or_empty()) } @@ -183,7 +181,7 @@ pub(crate) fn elastic_delete_index_filter() warp::path!("_elastic" / String) .and(warp::delete()) .and_then(extract_index_id_patterns) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) } // No support for any query parameters for now. @@ -212,7 +210,7 @@ pub(crate) fn elastic_index_cat_indices_filter() warp::path!("_elastic" / "_cat" / "indices" / String) .and_then(extract_index_id_patterns) .and(warp::get()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) } #[utoipa::path(get, tag = "Search", path = "/_cat/indices")] @@ -220,7 +218,7 @@ pub(crate) fn elastic_cat_indices_filter() -> impl Filter + Clone { warp::path!("_elastic" / "_cat" / "indices") .and(warp::get()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) } #[utoipa::path(get, tag = "Search", path = "/{index}/_search")] @@ -229,7 +227,7 @@ pub(crate) fn elastic_index_search_filter() warp::path!("_elastic" / String / "_search") .and_then(extract_index_id_patterns) .and(warp::get().or(warp::post()).unify()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) .and(json_or_empty()) } @@ -240,7 +238,7 @@ pub(crate) fn elastic_multi_search_filter() .and(warp::body::content_length_limit(BODY_LENGTH_LIMIT.as_u64())) .and(warp::body::bytes()) .and(warp::post()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) } fn merge_scroll_body_params( @@ -259,7 +257,7 @@ pub(crate) fn elastic_scroll_filter() warp::path!("_elastic" / "_search" / "scroll") .and(warp::body::content_length_limit(BODY_LENGTH_LIMIT.as_u64())) .and(warp::get().or(warp::post()).unify()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) .and(json_or_empty()) .map( |scroll_query_params: ScrollQueryParams, scroll_body: ScrollQueryParams| { diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/error.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/error.rs index 713a3d8e5ce..c11807fb4fb 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/error.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/error.rs @@ -22,8 +22,6 @@ use quickwit_search::SearchError; use serde::{Deserialize, Serialize}; use warp::hyper::StatusCode; -use crate::convert_status_code_to_legacy_http; - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ElasticsearchError { #[serde(with = "http_serde::status_code")] @@ -71,7 +69,7 @@ impl From for ElasticsearchError { additional_details: Default::default(), }; ElasticsearchError { - status: crate::convert_status_code_to_legacy_http(status), + status, error: reason, } } @@ -79,9 +77,7 @@ impl From for ElasticsearchError { impl From for ElasticsearchError { fn from(ingest_service_error: IngestServiceError) -> Self { - let status = crate::convert_status_code_to_legacy_http( - ingest_service_error.error_code().http_status_code(), - ); + let status = ingest_service_error.error_code().http_status_code(); let reason = ErrorCause { reason: Some(ingest_service_error.to_string()), @@ -113,7 +109,7 @@ impl From for ElasticsearchError { additional_details: Default::default(), }; ElasticsearchError { - status: crate::convert_status_code_to_legacy_http(status), + status, error: reason, } } @@ -133,7 +129,7 @@ impl From for ElasticsearchError { additional_details: Default::default(), }; ElasticsearchError { - status: convert_status_code_to_legacy_http(status), + status, error: reason, } } diff --git a/quickwit/quickwit-serve/src/format.rs b/quickwit/quickwit-serve/src/format.rs index 7b53f92f9c8..1f0d8fb19ec 100644 --- a/quickwit/quickwit-serve/src/format.rs +++ b/quickwit/quickwit-serve/src/format.rs @@ -80,8 +80,7 @@ struct FormatQueryString { pub(crate) fn extract_format_from_qs() -> impl Filter + Clone { - serde_qs::warp::query::(serde_qs::Config::default()) - .map(|format_qs: FormatQueryString| format_qs.format) + warp::query::().map(|format_qs: FormatQueryString| format_qs.format) } #[derive(Debug, Error)] diff --git a/quickwit/quickwit-serve/src/index_api/index_resource.rs b/quickwit/quickwit-serve/src/index_api/index_resource.rs index 29634af7881..cd0a5c0e4f6 100644 --- a/quickwit/quickwit-serve/src/index_api/index_resource.rs +++ b/quickwit/quickwit-serve/src/index_api/index_resource.rs @@ -81,7 +81,7 @@ pub fn list_indexes_metadata_handler( ) -> impl Filter + Clone { warp::path!("indexes") .and(warp::get()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) .and(with_arg(metastore)) .then(list_indexes_metadata) .and(extract_format_from_qs()) @@ -238,7 +238,7 @@ pub fn create_index_handler( ) -> impl Filter + Clone { warp::path!("indexes") .and(warp::post()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) .and(extract_config_format()) .and(warp::body::content_length_limit(1024 * 1024)) .and(warp::filters::body::bytes()) @@ -294,7 +294,7 @@ pub struct UpdateQueryParams { } fn update_index_qp() -> impl Filter + Clone { - serde_qs::warp::query::(serde_qs::Config::default()) + warp::query::() } pub fn update_index_handler( @@ -449,7 +449,7 @@ pub fn delete_index_handler( ) -> impl Filter + Clone { warp::path!("indexes" / String) .and(warp::delete()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) .and(with_arg(index_service)) .then(delete_index) .and(extract_format_from_qs()) diff --git a/quickwit/quickwit-serve/src/index_api/rest_handler.rs b/quickwit/quickwit-serve/src/index_api/rest_handler.rs index 1b6e3a7a649..bc4c8d9b105 100644 --- a/quickwit/quickwit-serve/src/index_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/index_api/rest_handler.rs @@ -1140,7 +1140,7 @@ mod tests { use quickwit_indexing::source::sqs_queue::test_helpers::start_mock_sqs_get_queue_attributes_endpoint; let metastore = metastore_for_test(); - let (queue_url, _guard) = start_mock_sqs_get_queue_attributes_endpoint(); + let (queue_url, _guard) = start_mock_sqs_get_queue_attributes_endpoint().await; let index_service = IndexService::new(metastore.clone(), StorageResolver::unconfigured()); let mut node_config = NodeConfig::for_test(); node_config.default_index_root_uri = Uri::for_test("file:///default-index-root-uri"); diff --git a/quickwit/quickwit-serve/src/index_api/source_resource.rs b/quickwit/quickwit-serve/src/index_api/source_resource.rs index 39eb09b8894..66f3069b228 100644 --- a/quickwit/quickwit-serve/src/index_api/source_resource.rs +++ b/quickwit/quickwit-serve/src/index_api/source_resource.rs @@ -111,7 +111,7 @@ pub struct UpdateQueryParams { } fn update_source_qp() -> impl Filter + Clone { - serde_qs::warp::query::(serde_qs::Config::default()) + warp::query::() } pub fn update_source_handler( diff --git a/quickwit/quickwit-serve/src/index_api/split_resource.rs b/quickwit/quickwit-serve/src/index_api/split_resource.rs index a439328ffa1..a062186e551 100644 --- a/quickwit/quickwit-serve/src/index_api/split_resource.rs +++ b/quickwit/quickwit-serve/src/index_api/split_resource.rs @@ -141,7 +141,7 @@ pub fn list_splits_handler( ) -> impl Filter + Clone { warp::path!("indexes" / String / "splits") .and(warp::get()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) .and(with_arg(metastore)) .then(list_splits) .and(extract_format_from_qs()) diff --git a/quickwit/quickwit-serve/src/ingest_api/rest_handler.rs b/quickwit/quickwit-serve/src/ingest_api/rest_handler.rs index a12d154c63c..9040c8fd700 100644 --- a/quickwit/quickwit-serve/src/ingest_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/ingest_api/rest_handler.rs @@ -98,9 +98,7 @@ fn ingest_filter( config.content_length_limit.as_u64(), )) .and(get_body_bytes()) - .and(serde_qs::warp::query::( - serde_qs::Config::default(), - )) + .and(warp::query::()) } fn ingest_handler( diff --git a/quickwit/quickwit-serve/src/jaeger_api/rest_handler.rs b/quickwit/quickwit-serve/src/jaeger_api/rest_handler.rs index 45d3d5d42f6..def8a4c6ca7 100644 --- a/quickwit/quickwit-serve/src/jaeger_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/jaeger_api/rest_handler.rs @@ -139,7 +139,7 @@ pub fn jaeger_traces_search_handler( ) -> impl Filter + Clone { jaeger_api_path_filter() .and(warp::path!("traces")) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) .and(require(jaeger_service_opt)) .then(jaeger_traces_search) .map(|result| make_jaeger_api_response(result, BodyFormat::default())) diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 91afc4a4dd7..44c0f648b5a 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -257,11 +257,6 @@ async fn balance_channel_for_service( BalanceChannel::from_stream(service_change_stream) } -fn convert_status_code_to_legacy_http(status_code: http::StatusCode) -> warp::http::StatusCode { - warp::http::StatusCode::from_u16(status_code.as_u16()) - .unwrap_or(warp::http::StatusCode::INTERNAL_SERVER_ERROR) -} - async fn start_ingest_client_if_needed( node_config: &NodeConfig, universe: &Universe, diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index 01f5207e544..e0e8e637f6b 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -13,23 +13,24 @@ // limitations under the License. use std::fmt::Formatter; -use std::pin::Pin; use std::sync::Arc; +use hyper_util::rt::{TokioExecutor, TokioIo}; +use hyper_util::server::conn::auto::Builder; +use hyper_util::service::TowerToHyperService; use quickwit_common::tower::BoxFutureInfaillible; use quickwit_config::{disable_ingest_v1, enable_ingest_v2}; use quickwit_search::SearchService; use tokio::net::TcpListener; +use tokio_rustls::TlsAcceptor; +use tokio_util::either::Either; use tower::ServiceBuilder; -use tower::make::Shared; use tower_http::compression::CompressionLayer; use tower_http::compression::predicate::{NotForContentType, Predicate, SizeAbove}; use tower_http::cors::CorsLayer; use tracing::{error, info}; use warp::filters::log::Info; use warp::hyper::http::HeaderValue; -use warp::hyper::server::accept::Accept; -use warp::hyper::server::conn::AddrIncoming; use warp::hyper::{Method, StatusCode, http}; use warp::{Filter, Rejection, Reply, redirect}; @@ -110,7 +111,7 @@ impl CompressionPredicate { impl Predicate for CompressionPredicate { fn should_compress(&self, response: &http::Response) -> bool - where B: warp::hyper::body::HttpBody { + where B: http_body::Body { if let Some(size_above) = self.size_above_opt { size_above.should_compress(response) } else { @@ -219,32 +220,61 @@ pub(crate) async fn start_rest_server( "starting REST server listening on {rest_listen_addr}" ); - let incoming = AddrIncoming::from_listener(tcp_listener)?; + let service = TowerToHyperService::new(service); - let maybe_tls_incoming = - if let Some(tls_config) = &quickwit_services.node_config.rest_config.tls { - let rustls_config = tls::make_rustls_config(tls_config)?; - EitherIncoming::Left(tls::TlsAcceptor::new(rustls_config, incoming)) - } else { - EitherIncoming::Right(incoming) - }; + let server = Builder::new(TokioExecutor::new()); + let graceful = hyper_util::server::graceful::GracefulShutdown::new(); + let mut shutdown_signal = std::pin::pin!(shutdown_signal); + readiness_trigger.await; - // `graceful_shutdown()` seems to be blocking in presence of existing connections. - // The following approach of dropping the serve supposedly is not bullet proof, but it seems to - // work in our unit test. - // - // See more of the discussion here: - // https://github.com/hyperium/hyper/issues/2386 - - let serve_fut = async move { + loop { tokio::select! { - res = warp::hyper::Server::builder(maybe_tls_incoming).serve(Shared::new(service)) => { res } - _ = shutdown_signal => { Ok(()) } + conn = tcp_listener.accept() => { + let (stream, _remote_addr) = match conn { + Ok(conn) => conn, + Err(err) => { + error!("failed to accept connection: {err:#}"); + continue; + } + }; + + let either_stream = + if let Some(tls_config) = &quickwit_services.node_config.rest_config.tls { + let rustls_config = tls::make_rustls_config(tls_config)?; + let acceptor = TlsAcceptor::from(rustls_config); + let tls_stream = match acceptor.accept(stream).await { + Ok(tls_stream) => tls_stream, + Err(err) => { + error!("failed to perform tls handshake: {err:#}"); + continue; + } + }; + Either::Left(tls_stream) + } else { + Either::Right(stream) + }; + + let conn = server.serve_connection_with_upgrades(TokioIo::new(either_stream), service.clone()); + let conn = graceful.watch(conn.into_owned()); + tokio::spawn(async move { + if let Err(err) = conn.await { + error!("failed to serve connection: {err:#}"); + } + }); + }, + _ = &mut shutdown_signal => { + info!("REST server shutdown signal received"); + break; + } } - }; + } + + tokio::select! { + _ = graceful.shutdown() => { + info!("gracefully shutdown"); + } + } - let (serve_res, _trigger_res) = tokio::join!(serve_fut, readiness_trigger); - serve_res?; Ok(()) } @@ -480,158 +510,37 @@ fn build_cors(cors_origins: &[String]) -> CorsLayer { mod tls { // most of this module is copied from hyper-tls examples, licensed under Apache 2.0, MIT or ISC - use std::future::Future; - use std::pin::Pin; use std::sync::Arc; - use std::task::{Context, Poll, ready}; use std::vec::Vec; use std::{fs, io}; use quickwit_config::TlsConfig; - use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; + use rustls::pki_types::{CertificateDer, PrivateKeyDer}; use tokio_rustls::rustls::ServerConfig; - use warp::hyper::server::accept::Accept; - use warp::hyper::server::conn::{AddrIncoming, AddrStream}; fn io_error(error: String) -> io::Error { io::Error::other(error) } // Load public certificate from file. - fn load_certs(filename: &str) -> io::Result> { + fn load_certs(filename: &str) -> io::Result>> { // Open certificate file. - let certfile = fs::read(filename) + let certfile = fs::File::open(filename) .map_err(|error| io_error(format!("failed to open {filename}: {error}")))?; - + let mut reader = io::BufReader::new(certfile); // Load and return certificate. - let certs = rustls_pemfile::certs(&mut certfile.as_ref()) - .map_err(|_| io_error("failed to load certificate".to_string()))?; - Ok(certs.into_iter().map(rustls::Certificate).collect()) + rustls_pemfile::certs(&mut reader).collect() } // Load private key from file. - fn load_private_key(filename: &str) -> io::Result { + fn load_private_key(filename: &str) -> io::Result> { // Open keyfile. - let keyfile = fs::read(filename) + let keyfile = fs::File::open(filename) .map_err(|error| io_error(format!("failed to open {filename}: {error}")))?; + let mut reader = io::BufReader::new(keyfile); // Load and return a single private key. - let keys = rustls_pemfile::pkcs8_private_keys(&mut keyfile.as_ref()) - .map_err(|_| io_error("failed to load private key".to_string()))?; - - if keys.len() != 1 { - return Err(io_error(format!( - "expected a single private key, got {}", - keys.len() - ))); - } - - Ok(rustls::PrivateKey(keys[0].clone())) - } - - pub struct TlsAcceptor { - config: Arc, - incoming: AddrIncoming, - } - - impl TlsAcceptor { - pub fn new(config: Arc, incoming: AddrIncoming) -> TlsAcceptor { - TlsAcceptor { config, incoming } - } - } - - impl Accept for TlsAcceptor { - type Conn = TlsStream; - type Error = io::Error; - - fn poll_accept( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll>> { - let pin = self.get_mut(); - match ready!(Pin::new(&mut pin.incoming).poll_accept(cx)) { - Some(Ok(sock)) => Poll::Ready(Some(Ok(TlsStream::new(sock, pin.config.clone())))), - Some(Err(e)) => Poll::Ready(Some(Err(e))), - None => Poll::Ready(None), - } - } - } - - enum State { - Handshaking(tokio_rustls::Accept), - Streaming(tokio_rustls::server::TlsStream), - } - - // tokio_rustls::server::TlsStream doesn't expose constructor methods, - // so we have to TlsAcceptor::accept and handshake to have access to it - // TlsStream implements AsyncRead/AsyncWrite handshaking tokio_rustls::Accept first - pub struct TlsStream { - state: State, - } - - impl TlsStream { - fn new(stream: AddrStream, config: Arc) -> TlsStream { - let accept = tokio_rustls::TlsAcceptor::from(config).accept(stream); - TlsStream { - state: State::Handshaking(accept), - } - } - } - - impl AsyncRead for TlsStream { - fn poll_read( - self: Pin<&mut Self>, - cx: &mut Context, - buf: &mut ReadBuf, - ) -> Poll> { - let pin = self.get_mut(); - match pin.state { - State::Handshaking(ref mut accept) => match ready!(Pin::new(accept).poll(cx)) { - Ok(mut stream) => { - let result = Pin::new(&mut stream).poll_read(cx, buf); - pin.state = State::Streaming(stream); - result - } - Err(err) => Poll::Ready(Err(err)), - }, - State::Streaming(ref mut stream) => Pin::new(stream).poll_read(cx, buf), - } - } - } - - impl AsyncWrite for TlsStream { - fn poll_write( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &[u8], - ) -> Poll> { - let pin = self.get_mut(); - match pin.state { - State::Handshaking(ref mut accept) => match ready!(Pin::new(accept).poll(cx)) { - Ok(mut stream) => { - let result = Pin::new(&mut stream).poll_write(cx, buf); - pin.state = State::Streaming(stream); - result - } - Err(err) => Poll::Ready(Err(err)), - }, - State::Streaming(ref mut stream) => Pin::new(stream).poll_write(cx, buf), - } - } - - fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - match self.state { - State::Handshaking(_) => Poll::Ready(Ok(())), - State::Streaming(ref mut stream) => Pin::new(stream).poll_flush(cx), - } - } - - fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - match self.state { - State::Handshaking(_) => Poll::Ready(Ok(())), - State::Streaming(ref mut stream) => Pin::new(stream).poll_shutdown(cx), - } - } + rustls_pemfile::private_key(&mut reader).map(|key| key.unwrap()) } pub fn make_rustls_config(config: &TlsConfig) -> anyhow::Result> { @@ -645,7 +554,6 @@ mod tls { } let mut cfg = rustls::ServerConfig::builder() - .with_safe_defaults() .with_no_client_auth() .with_single_cert(certs, key) .map_err(|error| io_error(error.to_string()))?; @@ -655,50 +563,6 @@ mod tls { } } -enum EitherIncoming { - Left(L), - Right(R), -} - -impl EitherIncoming { - pub fn as_pin_mut(self: Pin<&mut Self>) -> EitherIncoming, Pin<&mut R>> { - // SAFETY: `get_unchecked_mut` is fine because we don't move anything. - // We can use `new_unchecked` because the `inner` parts are guaranteed - // to be pinned, as they come from `self` which is pinned, and we never - // offer an unpinned `&mut A` or `&mut B` through `Pin<&mut Self>`. We - // also don't have an implementation of `Drop`, nor manual `Unpin`. - unsafe { - match self.get_unchecked_mut() { - EitherIncoming::Left(inner) => EitherIncoming::Left(Pin::new_unchecked(inner)), - EitherIncoming::Right(inner) => EitherIncoming::Right(Pin::new_unchecked(inner)), - } - } - } -} - -impl Accept for EitherIncoming -where - L: Accept, - R: Accept, -{ - type Conn = tokio_util::either::Either; - type Error = E; - - fn poll_accept( - self: Pin<&mut Self>, - cx: &mut std::task::Context<'_>, - ) -> std::task::Poll>> { - match self.as_pin_mut() { - EitherIncoming::Left(l) => l - .poll_accept(cx) - .map(|opt| opt.map(|res| res.map(tokio_util::either::Either::Left))), - EitherIncoming::Right(r) => r - .poll_accept(cx) - .map(|opt| opt.map(|res| res.map(tokio_util::either::Either::Right))), - } - } -} - #[cfg(test)] mod tests { use std::future::Future; diff --git a/quickwit/quickwit-serve/src/rest_api_response.rs b/quickwit/quickwit-serve/src/rest_api_response.rs index 3efd8158971..0bf56f831f0 100644 --- a/quickwit/quickwit-serve/src/rest_api_response.rs +++ b/quickwit/quickwit-serve/src/rest_api_response.rs @@ -40,9 +40,7 @@ pub(crate) fn into_rest_api_response( body_format: BodyFormat, ) -> RestApiResponse { let rest_api_result = result.map_err(|error| RestApiError { - status_code: crate::convert_status_code_to_legacy_http( - error.error_code().http_status_code(), - ), + status_code: error.error_code().http_status_code(), message: error.to_string(), }); let status_code = match &rest_api_result { diff --git a/quickwit/quickwit-serve/src/search_api/rest_handler.rs b/quickwit/quickwit-serve/src/search_api/rest_handler.rs index 557942ff668..6bc33186f89 100644 --- a/quickwit/quickwit-serve/src/search_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/search_api/rest_handler.rs @@ -298,7 +298,7 @@ fn search_get_filter() warp::path!(String / "search") .and_then(extract_index_id_patterns) .and(warp::get()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) } fn search_post_filter() @@ -315,7 +315,7 @@ fn search_plan_get_filter() warp::path!(String / "search-plan") .and_then(extract_index_id_patterns) .and(warp::get()) - .and(serde_qs::warp::query(serde_qs::Config::default())) + .and(warp::query()) } fn search_plan_post_filter() @@ -866,7 +866,7 @@ mod tests { .unwrap() .as_str() .unwrap() - .contains("unknown field `end_unix_timestamp`") + .contains("Invalid query string") ); } diff --git a/quickwit/quickwit-serve/src/ui_handler.rs b/quickwit/quickwit-serve/src/ui_handler.rs index 76743a73047..c6f748d9da7 100644 --- a/quickwit/quickwit-serve/src/ui_handler.rs +++ b/quickwit/quickwit-serve/src/ui_handler.rs @@ -60,7 +60,7 @@ async fn serve_impl(path: &str) -> Result, Rejection> { let asset = Asset::get(path_to_file).ok_or_else(warp::reject::not_found)?; let mime = mime_guess::from_path(path_to_file).first_or_octet_stream(); - let mut res = Response::new(asset.data.into()); + let mut res = Response::new(asset.data.into_owned().into()); res.headers_mut().insert( "content-type", HeaderValue::from_str(mime.as_ref()).unwrap(), From 1d96c6b22c63ee26e7a0d79738bcf9783e249c29 Mon Sep 17 00:00:00 2001 From: fulmicoton Date: Wed, 10 Sep 2025 14:17:53 +0200 Subject: [PATCH 7/9] Fixing the counter for indexing pipelines It was stuck to 0 because I forgot to increment the guard upon creation. --- quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index b41eba19c79..0ba8fd4952e 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -159,7 +159,9 @@ impl IndexingPipeline { let indexing_pipelines_gauge = crate::metrics::INDEXER_METRICS .indexing_pipelines .with_label_values([¶ms.pipeline_id.index_uid.index_id]); - let indexing_pipelines_gauge_guard = OwnedGaugeGuard::from_gauge(indexing_pipelines_gauge); + let mut indexing_pipelines_gauge_guard = + OwnedGaugeGuard::from_gauge(indexing_pipelines_gauge); + indexing_pipelines_gauge_guard.add(1); let params_fingerprint = params.params_fingerprint; IndexingPipeline { params, From ec1add6e8401b7a77f4cf70f13a596636454e6c4 Mon Sep 17 00:00:00 2001 From: fulmicoton Date: Fri, 12 Sep 2025 12:03:26 +0200 Subject: [PATCH 8/9] Fixing GaugeGuard's API --- quickwit/quickwit-actors/src/mailbox.rs | 4 +--- quickwit/quickwit-common/src/metrics.rs | 15 +++++++++++---- quickwit/quickwit-common/src/stream_utils.rs | 5 ++--- quickwit/quickwit-common/src/thread_pool.rs | 9 ++++----- quickwit/quickwit-indexing/src/actors/indexer.rs | 10 ++++++---- .../src/actors/indexing_pipeline.rs | 5 ++--- .../quickwit-indexing/src/models/processed_doc.rs | 6 ++++-- .../quickwit-indexing/src/models/raw_doc_batch.rs | 12 ++++++++---- quickwit/quickwit-indexing/src/source/mod.rs | 2 +- .../quickwit-ingest/src/ingest_v2/ingester.rs | 6 ++++-- .../quickwit-ingest/src/ingest_v2/replication.rs | 6 ++++-- quickwit/quickwit-ingest/src/ingest_v2/router.rs | 6 ++++-- quickwit/quickwit-search/src/scroll_context.rs | 7 ++++--- .../quickwit-search/src/search_permit_provider.rs | 4 ++-- quickwit/quickwit-serve/src/decompression.rs | 6 ++++-- quickwit/quickwit-serve/src/load_shield.rs | 8 ++++---- quickwit/quickwit-storage/src/metrics.rs | 11 ++++++----- 17 files changed, 71 insertions(+), 51 deletions(-) diff --git a/quickwit/quickwit-actors/src/mailbox.rs b/quickwit/quickwit-actors/src/mailbox.rs index 899e289182a..8883af2f134 100644 --- a/quickwit/quickwit-actors/src/mailbox.rs +++ b/quickwit/quickwit-actors/src/mailbox.rs @@ -395,9 +395,7 @@ fn get_actor_inboxes_count_gauge_guard() -> GaugeGuard<'static> { &[], ) }); - let mut gauge_guard = GaugeGuard::from_gauge(gauge); - gauge_guard.add(1); - gauge_guard + GaugeGuard::from_gauge_with_initial_value(gauge, 1) } pub(crate) fn create_mailbox( diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index c59bf953937..352624f093c 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -219,8 +219,13 @@ impl std::fmt::Debug for GaugeGuard<'_> { } impl<'a> GaugeGuard<'a> { - pub fn from_gauge(gauge: &'a IntGauge) -> Self { - Self { gauge, delta: 0i64 } + pub fn from_gauge_with_initial_value(gauge: &'a IntGauge, initial_value: i64) -> Self { + let mut gauge = Self { + gauge, + delta: initial_value, + }; + gauge.add(initial_value); + gauge } pub fn get(&self) -> i64 { @@ -256,8 +261,10 @@ impl std::fmt::Debug for OwnedGaugeGuard { } impl OwnedGaugeGuard { - pub fn from_gauge(gauge: IntGauge) -> Self { - Self { gauge, delta: 0i64 } + pub fn from_gauge_with_initial_value(gauge: IntGauge, initial_value: i64) -> Self { + let mut gauge = Self { gauge, delta: 0i64 }; + gauge.add(initial_value); + gauge } pub fn get(&self) -> i64 { diff --git a/quickwit/quickwit-common/src/stream_utils.rs b/quickwit/quickwit-common/src/stream_utils.rs index e0fc126b465..c9769558916 100644 --- a/quickwit/quickwit-common/src/stream_utils.rs +++ b/quickwit/quickwit-common/src/stream_utils.rs @@ -240,9 +240,8 @@ where T: fmt::Debug impl InFlightValue { pub fn new(value: T, value_size: ByteSize, gauge: &'static IntGauge) -> Self { - let mut gauge_guard = GaugeGuard::from_gauge(gauge); - gauge_guard.add(value_size.as_u64() as i64); - + let gauge_guard = + GaugeGuard::from_gauge_with_initial_value(gauge, value_size.as_u64() as i64); Self(value, gauge_guard) } diff --git a/quickwit/quickwit-common/src/thread_pool.rs b/quickwit/quickwit-common/src/thread_pool.rs index 18201196cf9..00aaa52bcdc 100644 --- a/quickwit/quickwit-common/src/thread_pool.rs +++ b/quickwit/quickwit-common/src/thread_pool.rs @@ -85,9 +85,8 @@ impl ThreadPool { { let span = tracing::Span::current(); let ongoing_tasks = self.ongoing_tasks.clone(); - let mut pending_tasks_guard: OwnedGaugeGuard = - OwnedGaugeGuard::from_gauge(self.pending_tasks.clone()); - pending_tasks_guard.add(1i64); + let pending_tasks_guard: OwnedGaugeGuard = + OwnedGaugeGuard::from_gauge_with_initial_value(self.pending_tasks.clone(), 1i64); let (tx, rx) = oneshot::channel(); self.thread_pool.spawn(move || { drop(pending_tasks_guard); @@ -95,8 +94,8 @@ impl ThreadPool { return; } let _guard = span.enter(); - let mut ongoing_task_guard = GaugeGuard::from_gauge(&ongoing_tasks); - ongoing_task_guard.add(1i64); + let _ongoing_task_guard = + GaugeGuard::from_gauge_with_initial_value(&ongoing_tasks, 1i64); let result = cpu_intensive_fn(); let _ = tx.send(result); }); diff --git a/quickwit/quickwit-indexing/src/actors/indexer.rs b/quickwit/quickwit-indexing/src/actors/indexer.rs index 84ba3987f4a..142501381e3 100644 --- a/quickwit/quickwit-indexing/src/actors/indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/indexer.rs @@ -219,9 +219,10 @@ impl IndexerState { let publish_lock = self.publish_lock.clone(); let publish_token_opt = self.publish_token_opt.clone(); - let mut split_builders_guard = - GaugeGuard::from_gauge(&crate::metrics::INDEXER_METRICS.split_builders); - split_builders_guard.add(1); + let split_builders_guard = GaugeGuard::from_gauge_with_initial_value( + &crate::metrics::INDEXER_METRICS.split_builders, + 1, + ); let workbench = IndexingWorkbench { workbench_id, @@ -233,10 +234,11 @@ impl IndexerState { publish_lock, publish_token_opt, last_delete_opstamp, - memory_usage: GaugeGuard::from_gauge( + memory_usage: GaugeGuard::from_gauge_with_initial_value( &quickwit_common::metrics::MEMORY_METRICS .in_flight .index_writer, + 0i64, ), cooperative_indexing_period, split_builders_guard, diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 0ba8fd4952e..ab7390e2ad7 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -159,9 +159,8 @@ impl IndexingPipeline { let indexing_pipelines_gauge = crate::metrics::INDEXER_METRICS .indexing_pipelines .with_label_values([¶ms.pipeline_id.index_uid.index_id]); - let mut indexing_pipelines_gauge_guard = - OwnedGaugeGuard::from_gauge(indexing_pipelines_gauge); - indexing_pipelines_gauge_guard.add(1); + let indexing_pipelines_gauge_guard = + OwnedGaugeGuard::from_gauge_with_initial_value(indexing_pipelines_gauge, 1); let params_fingerprint = params.params_fingerprint; IndexingPipeline { params, diff --git a/quickwit/quickwit-indexing/src/models/processed_doc.rs b/quickwit/quickwit-indexing/src/models/processed_doc.rs index bed695aa1d4..6aa8ab82a97 100644 --- a/quickwit/quickwit-indexing/src/models/processed_doc.rs +++ b/quickwit/quickwit-indexing/src/models/processed_doc.rs @@ -51,8 +51,10 @@ impl ProcessedDocBatch { force_commit: bool, ) -> Self { let delta = docs.iter().map(|doc| doc.num_bytes as i64).sum::(); - let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.indexer_mailbox); - gauge_guard.add(delta); + let gauge_guard = GaugeGuard::from_gauge_with_initial_value( + &MEMORY_METRICS.in_flight.indexer_mailbox, + delta, + ); Self { docs, checkpoint_delta, diff --git a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs index f88d9fcac2b..7f255a0eaa3 100644 --- a/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs +++ b/quickwit/quickwit-indexing/src/models/raw_doc_batch.rs @@ -34,9 +34,10 @@ impl RawDocBatch { force_commit: bool, ) -> Self { let delta = docs.iter().map(|doc| doc.len() as i64).sum::(); - let mut gauge_guard = - GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.doc_processor_mailbox); - gauge_guard.add(delta); + let gauge_guard = GaugeGuard::from_gauge_with_initial_value( + &MEMORY_METRICS.in_flight.doc_processor_mailbox, + delta, + ); Self { docs, @@ -67,7 +68,10 @@ impl fmt::Debug for RawDocBatch { impl Default for RawDocBatch { fn default() -> Self { - let _gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.doc_processor_mailbox); + let _gauge_guard = GaugeGuard::from_gauge_with_initial_value( + &MEMORY_METRICS.in_flight.doc_processor_mailbox, + 0i64, + ); Self { docs: Vec::new(), checkpoint_delta: SourceCheckpointDelta::default(), diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index 5601e31618d..19f9f916461 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -532,7 +532,7 @@ impl BatchBuilder { SourceType::Pulsar => MEMORY_METRICS.in_flight.pulsar(), _ => MEMORY_METRICS.in_flight.other(), }; - let gauge_guard = GaugeGuard::from_gauge(gauge); + let gauge_guard = GaugeGuard::from_gauge_with_initial_value(gauge, 0i64); Self { docs: Vec::with_capacity(capacity), diff --git a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs index c2683af12d1..aaae7ca4b11 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/ingester.rs @@ -1115,8 +1115,10 @@ impl IngesterService for Ingester { _ => None, }) .sum::(); - let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingester_persist); - gauge_guard.add(request_size_bytes as i64); + let _gauge_guard = GaugeGuard::from_gauge_with_initial_value( + &MEMORY_METRICS.in_flight.ingester_persist, + request_size_bytes as i64, + ); self.persist_inner(persist_request).await } diff --git a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs index 5e286ec5b84..f727d012868 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/replication.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/replication.rs @@ -504,8 +504,10 @@ impl ReplicationTask { ))); } let request_size_bytes = replicate_request.num_bytes(); - let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingester_replicate); - gauge_guard.add(request_size_bytes as i64); + let _gauge_guard = GaugeGuard::from_gauge_with_initial_value( + &MEMORY_METRICS.in_flight.ingester_replicate, + request_size_bytes as i64, + ); self.current_replication_seqno += 1; diff --git a/quickwit/quickwit-ingest/src/ingest_v2/router.rs b/quickwit/quickwit-ingest/src/ingest_v2/router.rs index e48cd647c04..b022d2ffd4b 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/router.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/router.rs @@ -602,8 +602,10 @@ impl IngestRouterService for IngestRouter { async fn ingest(&self, ingest_request: IngestRequestV2) -> IngestV2Result { let request_size_bytes = ingest_request.num_bytes(); - let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.ingest_router); - gauge_guard.add(request_size_bytes as i64); + let _gauge_guard = GaugeGuard::from_gauge_with_initial_value( + &MEMORY_METRICS.in_flight.ingest_router, + request_size_bytes as i64, + ); let num_subrequests = ingest_request.subrequests.len(); let _permit = self diff --git a/quickwit/quickwit-search/src/scroll_context.rs b/quickwit/quickwit-search/src/scroll_context.rs index a4a31a856b5..9f10135fd54 100644 --- a/quickwit/quickwit-search/src/scroll_context.rs +++ b/quickwit/quickwit-search/src/scroll_context.rs @@ -148,9 +148,10 @@ impl Default for MiniKV { impl MiniKV { pub async fn put(&self, key: Vec, payload: Vec, ttl: Duration) { - let mut metric_guard = - GaugeGuard::from_gauge(&crate::SEARCH_METRICS.searcher_local_kv_store_size_bytes); - metric_guard.add(payload.len() as i64); + let metric_guard = GaugeGuard::from_gauge_with_initial_value( + &crate::SEARCH_METRICS.searcher_local_kv_store_size_bytes, + payload.len() as i64, + ); let mut cache_lock = self.ttl_with_cache.write().await; cache_lock.insert( key, diff --git a/quickwit/quickwit-search/src/search_permit_provider.rs b/quickwit/quickwit-search/src/search_permit_provider.rs index c320f5cc79b..68221a3aade 100644 --- a/quickwit/quickwit-search/src/search_permit_provider.rs +++ b/quickwit/quickwit-search/src/search_permit_provider.rs @@ -212,10 +212,10 @@ impl SearchPermitActor { while let Some((permit_requester_tx, next_permit_size)) = self.pop_next_request_if_serviceable() { - let mut ongoing_gauge_guard = GaugeGuard::from_gauge( + let ongoing_gauge_guard = GaugeGuard::from_gauge_with_initial_value( &crate::SEARCH_METRICS.leaf_search_single_split_tasks_ongoing, + 1, ); - ongoing_gauge_guard.add(1); self.total_memory_allocated += next_permit_size; self.num_warmup_slots_available -= 1; permit_requester_tx diff --git a/quickwit/quickwit-serve/src/decompression.rs b/quickwit/quickwit-serve/src/decompression.rs index d65df7d3bea..f6ac63cddfe 100644 --- a/quickwit/quickwit-serve/src/decompression.rs +++ b/quickwit/quickwit-serve/src/decompression.rs @@ -114,8 +114,10 @@ pub(crate) struct Body { impl Body { pub fn new(content: Bytes, load_shield_permit: LoadShieldPermit) -> Body { - let mut gauge_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.rest_server); - gauge_guard.add(content.len() as i64); + let gauge_guard = GaugeGuard::from_gauge_with_initial_value( + &MEMORY_METRICS.in_flight.rest_server, + content.len() as i64, + ); Body { content, _gauge_guard: gauge_guard, diff --git a/quickwit/quickwit-serve/src/load_shield.rs b/quickwit/quickwit-serve/src/load_shield.rs index 477c6e73d79..2f607aad746 100644 --- a/quickwit/quickwit-serve/src/load_shield.rs +++ b/quickwit/quickwit-serve/src/load_shield.rs @@ -78,13 +78,13 @@ impl LoadShield { } pub async fn acquire_permit(&'static self) -> Result { - let mut pending_gauge_guard = GaugeGuard::from_gauge(&self.pending_gauge); - pending_gauge_guard.add(1); + let pending_gauge_guard = + GaugeGuard::from_gauge_with_initial_value(&self.pending_gauge, 1i64); let in_flight_permit_opt = self.acquire_in_flight_permit().await?; let concurrency_permit_opt = self.acquire_concurrency_permit().await; drop(pending_gauge_guard); - let mut ongoing_gauge_guard = GaugeGuard::from_gauge(&self.ongoing_gauge); - ongoing_gauge_guard.add(1); + let ongoing_gauge_guard = + GaugeGuard::from_gauge_with_initial_value(&self.ongoing_gauge, 1i64); Ok(LoadShieldPermit { _in_flight_permit_opt: in_flight_permit_opt, _concurrency_permit_opt: concurrency_permit_opt, diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 43ef588e192..21d540a40dd 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -232,12 +232,13 @@ pub static CACHE_METRICS_FOR_TESTS: Lazy = pub fn object_storage_get_slice_in_flight_guards( get_request_size: usize, ) -> (GaugeGuard<'static>, GaugeGuard<'static>) { - let mut bytes_guard = GaugeGuard::from_gauge( + let bytes_guard = GaugeGuard::from_gauge_with_initial_value( &crate::STORAGE_METRICS.object_storage_get_slice_in_flight_num_bytes, + get_request_size as i64, + ); + let count_guard = GaugeGuard::from_gauge_with_initial_value( + &crate::STORAGE_METRICS.object_storage_get_slice_in_flight_count, + 1, ); - bytes_guard.add(get_request_size as i64); - let mut count_guard = - GaugeGuard::from_gauge(&crate::STORAGE_METRICS.object_storage_get_slice_in_flight_count); - count_guard.add(1); (bytes_guard, count_guard) } From 570f538c8edf5403e6d67eb8fa5ca2b0652bf6bc Mon Sep 17 00:00:00 2001 From: fulmicoton Date: Fri, 12 Sep 2025 12:10:25 +0200 Subject: [PATCH 9/9] using index_label method on pipeline counter --- quickwit/quickwit-common/src/metrics.rs | 5 +---- quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs | 4 +++- quickwit/quickwit-metastore/src/metastore/postgres/pool.rs | 5 ++--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index 352624f093c..ae13a351cd0 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -220,10 +220,7 @@ impl std::fmt::Debug for GaugeGuard<'_> { impl<'a> GaugeGuard<'a> { pub fn from_gauge_with_initial_value(gauge: &'a IntGauge, initial_value: i64) -> Self { - let mut gauge = Self { - gauge, - delta: initial_value, - }; + let mut gauge = Self { gauge, delta: 0i64 }; gauge.add(initial_value); gauge } diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index ab7390e2ad7..4ac454c2cd3 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -158,7 +158,9 @@ impl IndexingPipeline { pub fn new(params: IndexingPipelineParams) -> Self { let indexing_pipelines_gauge = crate::metrics::INDEXER_METRICS .indexing_pipelines - .with_label_values([¶ms.pipeline_id.index_uid.index_id]); + .with_label_values([&quickwit_common::metrics::index_label( + ¶ms.pipeline_id.index_uid.index_id, + )]); let indexing_pipelines_gauge_guard = OwnedGaugeGuard::from_gauge_with_initial_value(indexing_pipelines_gauge, 1); let params_fingerprint = params.params_fingerprint; diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs index f0437300095..879dad2a66c 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/pool.rs @@ -59,9 +59,8 @@ impl<'a, DB: Database> Acquire<'a> for &TrackedPool { .set(self.inner_pool.num_idle() as i64); Box::pin(async move { - let mut gauge_guard = GaugeGuard::from_gauge(&POSTGRES_METRICS.acquire_connections); - gauge_guard.add(1); - + let _gauge_guard = + GaugeGuard::from_gauge_with_initial_value(&POSTGRES_METRICS.acquire_connections, 1); let conn = acquire_conn_fut.await?; Ok(conn) })