diff --git a/README.md b/README.md index 715b342cc..bf7ec95c8 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,11 @@ Nighthawk currently offers: See [navigating the codebase](docs/root/navigating_the_codebase.md) for a description of the directory structure. +## Additional Documentation + +See the [howto](docs/root/howto) directory for documentation aimed at specific +use-cases. + ## Building Nighthawk ### Prerequisites diff --git a/docs/root/howto/LLM_LOAD_GENERATION.md b/docs/root/howto/LLM_LOAD_GENERATION.md new file mode 100644 index 000000000..77fb247c7 --- /dev/null +++ b/docs/root/howto/LLM_LOAD_GENERATION.md @@ -0,0 +1,53 @@ +# Load Testing with LLM-formatted Requests + +## Overview + +If you would like to perform a load test against an LLM backend, using +the [Completions API spec](https://developers.openai.com/api/docs/guides/completions/), +there is an LLM Request Source plugin that can emulate that workload. +These request bodies looks like the following: + +``` +{ + "model": "Qwen/Qwen2.5-1.5B-Instruct", + "max_tokens": 10, + "messages": [ + { + "role": "user", + "content": "L Q 5 i x D q v p X" + } + ] +} +``` + +This is generated based on input you provide. The 4 inputs are: + +1. Model Name (required) + - Name of the LLM model the requests are being sent to +2. Request Token Count (default 0) + - Number of "tokens" generated for the request +3. Response Max Token Count (default 0) + - Maximum number of tokens for the model to respond with +4. [Request Options List](https://github.com/envoyproxy/nighthawk/blob/09d64d769972513989a95766a98e28f5d6bb05c2/api/client/options.proto#L32) (optional) + - This allows you to add headers and choose request method of the requests + +A few additional details about the request options list: + +1. Header 'Content-Type: application/json' added by default +2. Ignore the "request_body_size" and "json_body" in this field +3. If a host name is required, use ":authority" header instead of ":host" + +The config for running with this request source is passed into the "--request-source-plugin-config" flag. +Here is an example of how that flag might look for running a load test with this LLM Request Source plugin: +``` +--request-source-plugin-config "{name:\"nighthawk.request_source.llm\",typed_config:{\"@type\":\"type.googleapis.com/nighthawk.LlmRequestSourcePluginConfig\", model_name: \"Qwen/Qwen2.5-1.5B-Instruct\", req_token_count: 10, resp_max_tokens: 10, options_list:{options:[{request_headers:[{header:{key:\":authority\",value:\"team1.example.com\"}}]}]}}}" +``` + +Please be conscious about using escape characters in your string. + +## Tokenizer + +We do not use a real tokenizer for generating tokens in the requests. Instead, +we do a naive "tokenizer" where each "token" is just a random character in the +range of [A-Za-z0-9] with a space between each. This means that the length of +the requested message will always be 2*req_token_count-1. diff --git a/docs/root/overview.md b/docs/root/overview.md index 700281176..efdba94d8 100644 --- a/docs/root/overview.md +++ b/docs/root/overview.md @@ -147,6 +147,10 @@ to fire off should look like. A couple of implementations exist: - a request source [plugin](https://github.com/envoyproxy/nighthawk/blob/9f97c2d9cb86b84a158ccba33832d135e1b96c7a/source/request_source/request_options_list_plugin_impl.h#L94) which replays requests from memory. +- a request source + [plugin](https://github.com/envoyproxy/nighthawk/blob/9f97c2d9cb86b84a158ccba33832d135e1b96c7a/source/request_source/llm_request_source_plugin_impl.h) + which creates requests based on the Completions API spec. See + [howto](howto/LLM_LOAD_GENERATION.md) for more details. ### StreamDecoder diff --git a/source/BUILD b/source/BUILD new file mode 100644 index 000000000..e69de29bb diff --git a/source/client/BUILD b/source/client/BUILD index e7745d1a4..1520ef409 100644 --- a/source/client/BUILD +++ b/source/client/BUILD @@ -103,6 +103,8 @@ envoy_cc_library( "//source/common:nighthawk_common_lib", "//source/common:nighthawk_service_client_impl", "//source/common:request_source_impl_lib", + "//source/request_source:llm_request_source_plugin_cc_proto", + "//source/request_source:llm_request_source_plugin_impl", "//source/request_source:request_options_list_plugin_impl", "//source/user_defined_output:user_defined_output_plugin_creator", "@envoy//envoy/config:xds_manager_interface", diff --git a/source/request_source/BUILD b/source/request_source/BUILD index 9fdbf5151..0b7761ea0 100644 --- a/source/request_source/BUILD +++ b/source/request_source/BUILD @@ -3,6 +3,7 @@ load( "envoy_cc_library", "envoy_package", ) +load("@envoy_api//bazel:api_build_system.bzl", "api_cc_py_proto_library") licenses(["notice"]) # Apache 2 @@ -31,3 +32,47 @@ envoy_cc_library( "@envoy//source/exe:platform_impl_lib", ], ) + +api_cc_py_proto_library( + name = "llm_request_source_plugin", + srcs = [ + "llm_request_source_plugin.proto", + ], + visibility = ["//visibility:public"], + deps = [ + "//api/client:base", + ], +) + +envoy_cc_library( + name = "llm_request_source_plugin_impl", + srcs = [ + "llm_request_source_plugin_impl.cc", + ], + hdrs = [ + "llm_request_source_plugin_impl.h", + ], + repository = "@envoy", + visibility = ["//visibility:public"], + deps = [ + ":llm_request_source_plugin_cc_proto", + "//include/nighthawk/request_source:request_source_plugin_config_factory_lib", + "//source/common:nighthawk_common_lib", + "//source/common:request_impl_lib", + "//source/common:request_source_impl_lib", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/random", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/strings:string_view", + "@envoy//source/common/common:thread_lib_with_external_headers", + "@envoy//source/common/protobuf:message_validator_lib_with_external_headers", + "@envoy//source/common/protobuf:protobuf_with_external_headers", + "@envoy//source/common/protobuf:utility_lib_with_external_headers", + "@envoy//source/exe:platform_header_lib_with_external_headers", + "@envoy//source/exe:platform_impl_lib", + ], +) diff --git a/source/request_source/llm_request_source_plugin.proto b/source/request_source/llm_request_source_plugin.proto new file mode 100644 index 000000000..b5f8c1c5c --- /dev/null +++ b/source/request_source/llm_request_source_plugin.proto @@ -0,0 +1,20 @@ +syntax = "proto3"; + +package nighthawk; + +import "api/client/options.proto"; + +// Config for `LlmRequestSourcePlugin`. +message LlmRequestSourcePluginConfig { + // Model to use for the request. This field is required. + string model_name = 1; + + // Number of tokens to generate in the request. Defaults to 0. + int32 req_token_count = 2; + + // Maximum number of tokens to return in the response. Defaults to 0. + int32 resp_max_tokens = 3; + + // The options_list will be used to apply headers to the request. + nighthawk.client.RequestOptionsList options_list = 4; +} diff --git a/source/request_source/llm_request_source_plugin_impl.cc b/source/request_source/llm_request_source_plugin_impl.cc new file mode 100644 index 000000000..45b88f300 --- /dev/null +++ b/source/request_source/llm_request_source_plugin_impl.cc @@ -0,0 +1,125 @@ +#include "source/request_source/llm_request_source_plugin_impl.h" + +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/random/random.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" + +#include "source/request_source/llm_request_source_plugin.pb.h" + +#include "envoy/api/api.h" +#include "envoy/config/core/v3/base.pb.h" +#include "envoy/config/core/v3/extension.pb.h" +#include "envoy/http/header_map.h" +#include "envoy/registry/registry.h" +#include "external/envoy/source/common/http/header_map_impl.h" +#include "external/envoy/source/common/protobuf/protobuf.h" +#include "external/envoy/source/common/protobuf/utility.h" + +#include "api/client/options.pb.h" +#include "api/request_source/request_source_plugin.pb.h" +#include "nighthawk/common/request.h" +#include "nighthawk/common/request_source.h" +#include "nighthawk/request_source/request_source_plugin_config_factory.h" +#include "source/common/request_impl.h" + +namespace Nighthawk { +namespace { + +absl::Status ValidateConfig(const nighthawk::LlmRequestSourcePluginConfig& config) { + if (config.model_name().empty()) { + return absl::InvalidArgumentError("Model name is required."); + } + + return absl::OkStatus(); +} + +constexpr absl::string_view kCharset = "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz"; + +std::string GenerateRandomPrompt(int num_tokens) { + std::string result_string; + absl::BitGen bitgen; + + for (int i = 0; i < num_tokens; ++i) { + // Append a random character from the charset. + absl::StrAppend(&result_string, + std::string(1, kCharset[absl::Uniform(bitgen, 0, kCharset.length())])); + + // Add a space between tokens. This is a naive way to calculate the number + // of tokens in the string as generally spaces delineate tokens. + if (i < num_tokens - 1) { + absl::StrAppend(&result_string, " "); + } + } + + return result_string; +} + +} // namespace + +Nighthawk::RequestGenerator LlmRequestSourcePlugin::get() { + return [this]() -> std::unique_ptr { + Envoy::Http::RequestHeaderMapPtr headers = Envoy::Http::RequestHeaderMapImpl::create(); + Envoy::Http::HeaderMapImpl::copyFrom(*headers, *header_); + + std::string body = + absl::StrFormat(R"json( + { + "model": "%s", + "max_tokens": %d, + "messages": [ + { + "role": "user", + "content": "%s" + } + ] + } + )json", + model_name_, resp_max_tokens_, GenerateRandomPrompt(req_token_count_)); + + headers->setMethod( + envoy::config::core::v3::RequestMethod_Name(envoy::config::core::v3::RequestMethod::POST)); + headers->setContentType("application/json"); + headers->setContentLength(body.size()); + + auto path_key = Envoy::Http::LowerCaseString(":path"); + headers->setCopy(path_key, "/v1/completions"); + + return std::make_unique(std::move(headers), body); + }; +} + +Nighthawk::RequestSourcePtr +LlmRequestSourcePluginFactory::createRequestSourcePlugin(const Envoy::Protobuf::Message& message, + Envoy::Api::Api&, + Envoy::Http::RequestHeaderMapPtr header) { + const auto* any = Envoy::Protobuf::DynamicCastToGenerated(&message); + nighthawk::LlmRequestSourcePluginConfig llm_config; + THROW_IF_NOT_OK(Envoy::MessageUtil::unpackTo(*any, llm_config)); + THROW_IF_NOT_OK(ValidateConfig(llm_config)); + + for (const nighthawk::client::RequestOptions& request_option : + llm_config.options_list().options()) { + for (const envoy::config::core::v3::HeaderValueOption& option_header : + request_option.request_headers()) { + auto lower_case_key = Envoy::Http::LowerCaseString(option_header.header().key()); + header->setCopy(lower_case_key, option_header.header().value()); + } + } + + return std::make_unique(std::string(llm_config.model_name()), + llm_config.req_token_count(), + llm_config.resp_max_tokens(), std::move(header)); +}; + +REGISTER_FACTORY(LlmRequestSourcePluginFactory, Nighthawk::RequestSourcePluginConfigFactory); + +} // namespace Nighthawk diff --git a/source/request_source/llm_request_source_plugin_impl.h b/source/request_source/llm_request_source_plugin_impl.h new file mode 100644 index 000000000..5057fdd08 --- /dev/null +++ b/source/request_source/llm_request_source_plugin_impl.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include + +#include "source/request_source/llm_request_source_plugin.pb.h" + +#include "absl/log/log.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" + +#include "envoy/api/api.h" +#include "envoy/config/core/v3/extension.pb.h" +#include "envoy/http/header_map.h" +#include "external/envoy/source/common/common/logger.h" +#include "external/envoy/source/common/protobuf/protobuf.h" + +#include "api/client/options.pb.h" +#include "nighthawk/common/request_source.h" +#include "nighthawk/request_source/request_source_plugin_config_factory.h" + +namespace Nighthawk { + +constexpr inline absl::string_view kLlmRequestSourcePluginName = "nighthawk.request_source.llm"; + +// A Nighthawk RequestSource that generates completions API requests. +// +// The request source generates requests with the following characteristics: +// - The request body is a JSON object with the following fields: +// - model: The name of the model to use for inference. +// - max_tokens: The maximum number of tokens to return in the response. +// - messages: A list with a single JSON object containing the following +// fields: +// - role: "user" +// - content: A string containing `req_token_count` randomly generated +// tokens. +// - The request headers are copied from the provided header map with the +// following modifications: +// - Method: POST +// - Content-Type: application/json +// - Content-Length: The length of the request body. +// - :path: /v1/completions +class LlmRequestSourcePlugin : public Nighthawk::RequestSource, + public Envoy::Logger::Loggable { +public: + explicit LlmRequestSourcePlugin(std::string model_name, int req_token_count, int resp_max_tokens, + Envoy::Http::RequestHeaderMapPtr header) + : model_name_(model_name), req_token_count_(req_token_count), + resp_max_tokens_(resp_max_tokens), header_(std::move(header)) {}; + + Nighthawk::RequestGenerator get() override; + void initOnThread() override {}; + void destroyOnThread() override {}; + +private: + // Model to use for the request. + std::string model_name_; + // Number of tokens to generate in the request. + int req_token_count_; + // Maximum number of tokens from the model to return in the response. + int resp_max_tokens_; + // The options_list will be used to apply headers to the request. + std::unique_ptr options_list_; + // Headers for the request. + Envoy::Http::RequestHeaderMapPtr header_; +}; + +// Factory class for creating LlmRequestSourcePlugin objects. +class LlmRequestSourcePluginFactory : public virtual Nighthawk::RequestSourcePluginConfigFactory { +public: + std::string name() const override { return std::string(kLlmRequestSourcePluginName); } + + Envoy::ProtobufTypes::MessagePtr createEmptyConfigProto() override { + return std::make_unique(); + } + + Nighthawk::RequestSourcePtr + createRequestSourcePlugin(const Envoy::Protobuf::Message&, Envoy::Api::Api&, + Envoy::Http::RequestHeaderMapPtr header) override; +}; + +} // namespace Nighthawk diff --git a/test/request_source/BUILD b/test/request_source/BUILD index 28d234ccc..2968fb1c5 100644 --- a/test/request_source/BUILD +++ b/test/request_source/BUILD @@ -58,3 +58,21 @@ envoy_cc_test( "@envoy//test/mocks/api:api_mocks", ], ) + +envoy_cc_test( + name = "llm_request_source_plugin_test", + srcs = ["llm_request_source_plugin_test.cc"], + repository = "@envoy", + deps = [ + "//source/common:request_impl_lib", + "//source/common:request_source_impl_lib", + "//source/request_source:llm_request_source_plugin_cc_proto", + "//source/request_source:llm_request_source_plugin_impl", + "@envoy//source/common/common:assert_lib_with_external_headers", + "@envoy//source/common/http:header_map_lib_with_external_headers", + "@envoy//source/common/json:json_loader_lib", + "@envoy//source/common/protobuf:protobuf_with_external_headers", + "@envoy//source/common/protobuf:utility_lib_with_external_headers", + "@envoy//test/mocks/api:api_mocks", + ], +) diff --git a/test/request_source/llm_request_source_plugin_test.cc b/test/request_source/llm_request_source_plugin_test.cc new file mode 100644 index 000000000..12d3b6310 --- /dev/null +++ b/test/request_source/llm_request_source_plugin_test.cc @@ -0,0 +1,92 @@ +#include +#include +#include +#include + +#include "source/request_source/llm_request_source_plugin.pb.h" +#include "source/request_source/llm_request_source_plugin_impl.h" + +#include "nighthawk/common/request.h" +#include "nighthawk/common/request_source.h" + +#include "external/envoy/source/common/common/assert.h" +#include "external/envoy/source/common/http/header_map_impl.h" +#include "external/envoy/source/common/json/json_loader.h" +#include "external/envoy/source/common/protobuf/protobuf.h" +#include "external/envoy/source/common/protobuf/utility.h" +#include "external/envoy/test/mocks/api/mocks.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace Nighthawk { +namespace { + +using ::Envoy::Protobuf::TextFormat; +using ::testing::NiceMock; + +TEST(LlmRequestSourcePluginTest, TestLlmRequestSourcePlugin) { + nighthawk::LlmRequestSourcePluginConfig config; + TextFormat::ParseFromString(R"pb( + model_name: "test_model" + req_token_count: 100 + resp_max_tokens: 100 + options_list { + options { + request_headers { header { key: "test_header" value: "test_value" } } + } + } + )pb", + &config); + Envoy::Http::RequestHeaderMapPtr headers = Envoy::Http::RequestHeaderMapImpl::create(); + LlmRequestSourcePluginFactory factory; + NiceMock mock_api; + Envoy::Protobuf::Any config_wrapper; + config_wrapper.PackFrom(config); + Nighthawk::RequestSourcePtr llm_request_source = + factory.createRequestSourcePlugin(config_wrapper, mock_api, std::move(headers)); + ASSERT_NE(llm_request_source, nullptr); + + Nighthawk::RequestGenerator request_generator = llm_request_source->get(); + std::unique_ptr request = request_generator(); + + Envoy::Json::ObjectSharedPtr json_object = + Envoy::Json::Factory::loadFromString(request->body()).value(); + + EXPECT_EQ(json_object->getString("model").value(), "test_model"); + EXPECT_EQ(json_object->getInteger("max_tokens").value(), 100); + + std::vector messages = + json_object->getObjectArray("messages").value(); + Envoy::Json::ObjectSharedPtr first_message_obj = messages[0]; + std::string content = first_message_obj->getString("content").value(); + std::vector tokens = absl::StrSplit(content, ' '); + EXPECT_EQ(tokens.size(), 100); + EXPECT_EQ(request->header()->get(Envoy::Http::LowerCaseString("test_header")).size(), 1); +} + +TEST(LlmRequestSourcePluginTest, TestLlmRequestSourcePluginFactory) { + nighthawk::LlmRequestSourcePluginConfig config; + TextFormat::ParseFromString(R"pb( + model_name: "test_model" + req_token_count: 100 + resp_max_tokens: 100 + options_list { + options { + request_headers { header { key: "test_header" value: "test_value" } } + } + } + )pb", + &config); + Envoy::Http::RequestHeaderMapPtr headers = Envoy::Http::RequestHeaderMapImpl::create(); + LlmRequestSourcePluginFactory factory; + NiceMock mock_api; + Envoy::Protobuf::Any config_wrapper; + config_wrapper.PackFrom(config); + Nighthawk::RequestSourcePtr llm_request_source = + factory.createRequestSourcePlugin(config_wrapper, mock_api, std::move(headers)); + ASSERT_NE(llm_request_source, nullptr); +} + +} // namespace +} // namespace Nighthawk diff --git a/test/request_source:llm_request_source_plugin_test b/test/request_source:llm_request_source_plugin_test new file mode 100644 index 000000000..e69de29bb