-
Notifications
You must be signed in to change notification settings - Fork 89
Expand file tree
/
Copy pathllm_request_source_plugin_impl.h
More file actions
83 lines (69 loc) · 3.18 KB
/
llm_request_source_plugin_impl.h
File metadata and controls
83 lines (69 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#pragma once
#include <memory>
#include <string>
#include <utility>
#include "source/request_source/llm_request_source_plugin.pb.h"
#include "absl/log/log.h"
#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "envoy/api/api.h"
#include "envoy/config/core/v3/extension.pb.h"
#include "envoy/http/header_map.h"
#include "external/envoy/source/common/common/logger.h"
#include "external/envoy/source/common/protobuf/protobuf.h"
#include "api/client/options.pb.h"
#include "nighthawk/common/request_source.h"
#include "nighthawk/request_source/request_source_plugin_config_factory.h"
namespace Nighthawk {
constexpr inline absl::string_view kLlmRequestSourcePluginName = "nighthawk.request_source.llm";
// A Nighthawk RequestSource that generates completions API requests.
//
// The request source generates requests with the following characteristics:
// - The request body is a JSON object with the following fields:
// - model: The name of the model to use for inference.
// - max_tokens: The maximum number of tokens to return in the response.
// - messages: A list with a single JSON object containing the following
// fields:
// - role: "user"
// - content: A string containing `req_token_count` randomly generated
// tokens.
// - The request headers are copied from the provided header map with the
// following modifications:
// - Method: POST
// - Content-Type: application/json
// - Content-Length: The length of the request body.
// - :path: /v1/completions
class LlmRequestSourcePlugin : public Nighthawk::RequestSource,
public Envoy::Logger::Loggable<Envoy::Logger::Id::http> {
public:
explicit LlmRequestSourcePlugin(std::string model_name, int req_token_count, int resp_max_tokens,
Envoy::Http::RequestHeaderMapPtr header)
: model_name_(model_name), req_token_count_(req_token_count),
resp_max_tokens_(resp_max_tokens), header_(std::move(header)) {};
Nighthawk::RequestGenerator get() override;
void initOnThread() override {};
void destroyOnThread() override {};
private:
// Model to use for the request.
std::string model_name_;
// Number of tokens to generate in the request.
int req_token_count_;
// Maximum number of tokens from the model to return in the response.
int resp_max_tokens_;
// The options_list will be used to apply headers to the request.
std::unique_ptr<const nighthawk::client::RequestOptionsList> options_list_;
// Headers for the request.
Envoy::Http::RequestHeaderMapPtr header_;
};
// Factory class for creating LlmRequestSourcePlugin objects.
class LlmRequestSourcePluginFactory : public virtual Nighthawk::RequestSourcePluginConfigFactory {
public:
std::string name() const override { return std::string(kLlmRequestSourcePluginName); }
Envoy::ProtobufTypes::MessagePtr createEmptyConfigProto() override {
return std::make_unique<nighthawk::LlmRequestSourcePluginConfig>();
}
Nighthawk::RequestSourcePtr
createRequestSourcePlugin(const Envoy::Protobuf::Message&, Envoy::Api::Api&,
Envoy::Http::RequestHeaderMapPtr header) override;
};
} // namespace Nighthawk