-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplugin.cpp
More file actions
205 lines (184 loc) · 8.03 KB
/
plugin.cpp
File metadata and controls
205 lines (184 loc) · 8.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#include "plugin.hpp"
#include "illixr/switchboard.hpp"
#include "illixr/record_logger.hpp"
#include "illixr/relative_clock.hpp"
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <string>
#include <fstream>
// #include <chrono>
// #include <thread>
#include "json.hpp"
#include "httplib.h"
#include "illixr/data_format/string_data.hpp"
using namespace ILLIXR;
using namespace ILLIXR::data_format;
using json = nlohmann::json;
// top of file, after includes
static const ILLIXR::record_header rh_llama_response{
"llama_cli.response",
std::vector<std::pair<std::string, const std::type_info&>>{
{"timestamp_ns", typeid(std::uint64_t)},
{"text", typeid(std::string)}
}
};
[[maybe_unused]] llama_cli::llama_cli(const std::string& name, phonebook* pb)
: plugin{name, pb}
, switchboard_{phonebook_->lookup_impl<switchboard>()}
, text_reader_{switchboard_->get_reader<string_data>("transcript_topic")}
, response_publisher_{switchboard_->get_writer<string_data>("response_topic")}
, reader_{switchboard_->get_buffered_reader<string_data>("transcript_topic")}
, record_logger_{phonebook_->lookup_impl<ILLIXR::record_logger>()} {}
void llama_cli::start() {
if (!switchboard_->topic_exists("transcript_topic")) {
std::cerr << "Error: No topic" << std::endl;
return;
}
// switchboard::ptr<const string_data> event = text_reader_.get_rw();
// if (event) {
// std::cout << "Sending text: " << event->value << std::endl;
// } else {
// std::cerr << "Error: No transcript available from in current topic" << std::endl;
// return;
// }
// httplib::Client cli("http://172.22.70.20:8888");
// httplib::Client cli("172.22.70.20", 8888);
httplib::Client cli("http://localhost:8080");
cli.set_connection_timeout(5,0);
std::ifstream f("/srv/scratch/yuanyi2/ILLIXR/plugins/llama_cli/scene_desc_merge.json");
if (!f) {
std::cerr << "Error: couldn't open file.";
return;
}
json scene_desc_data = json::parse(f); //
// for (auto& element : scene_desc_data) {
// std::string obj = element.dump();
// }
std::string scene_desc_str = scene_desc_data.dump();
prompt_scene_desc =
R"PROMPT(
You are an agent that reads a JSON file containing description of objects as tags along with their ids.
You should match the object to the provided tags in the JSON and return the id of the closest matching object.
The user will provide a sentence containing an action, mentioning one or more objects.
You should also identify the action, which can be locate, insert, remove, turn on, turn off, count, etc.
You should finally return the identified action and the ids.
Example user question 1: Turn on the *light fixture*?
Your answer: id:2, action: turn on.
Example user question 2: Where is the cat?
Your answer: id:67, action: locate.
Now here is the JSON file of objects.
)PROMPT";
json body;
// short prompt
switchboard::ptr<const string_data> event = reader_.dequeue();
body = {
{"prompt", prompt_scene_desc + scene_desc_str + event->value},
{"model", "Mistral-7B-Instruct-v0.3.Q2_K.gguf"},
{"stream", false}
};
std::string body_str = body.dump();
// // comm latency: start: absolute time
// auto start = std::chrono::high_resolution_clock::now();
std::shared_ptr<relative_clock> rel_clock = phonebook_->lookup_impl<ILLIXR::relative_clock>();
if (!rel_clock) {
std::cerr << "Error: relative_clock not available\n";
return;
}
auto start_tp = rel_clock->now();
// int64_t start = ILLIXR::relative_clock::absolute_ns();
auto res = cli.Post("/completion", body_str, "application/json");
if (!res) {
std::cerr << "Error: http response empty." << std::endl;
}
if (res->status != 200) {
std::cerr << "Error: " << res->status << " - " << res->body << std::endl;
return;
}
// auto end = std::chrono::high_resolution_clock::now();
// std::chrono::duration<double, std::nano> duration_ns = end - start;
// std::cout << "Communication time: " << duration_ns.count() << " nanoseconds\n";
// int64_t end = ILLIXR::relative_clock::absolute_ns();
// int64_t duration_ns = end - start;
auto end_tp = rel_clock->now();
auto dur = end_tp - start_tp; // clock_duration_ (nanoseconds)
int64_t duration_ns = static_cast<int64_t>(dur.count());
std::cerr << "llama communication latency " << duration_ns << std::endl;
json response_json = json::parse(res->body);
std::cout << "Response: " << response_json["content"] << std::endl;
string_data data{response_json["content"]};
response_publisher_.put(std::make_shared<string_data>(data));
// if (record_logger_) {
// std::vector<std::any> vals;
// vals.push_back(duration_ns);
// vals.push_back(data); // std::string
// ILLIXR::record rec{rh_llama_response, std::move(vals)};
// record_logger_->log(rec);
// {
// std::ofstream csv("/scratch/yuanyi2/llama_cli_log/llama_cli_log.csv", std::ios::app);
// if (csv) {
// // escape double quotes in text
// std::string text = data.value;
// size_t p = 0;
// while ((p = text.find('"', p)) != std::string::npos) {
// text.insert(p, 1, '"');
// p += 2;
// }
// csv << std::to_string(ILLIXR::relative_clock::absolute_ns()) << ',' // timestamp_ns
// << (double)duration_ns / 1e6 << ',' // latency_ms
// << '"' << text << '"' << '\n'; // quoted text
// }
// }
// }
while(reader_.size() != 0) {
switchboard::ptr<const string_data> event = reader_.dequeue();
body = {
{"prompt", event->value},
{"model", "Mistral-7B-Instruct-v0.3.Q2_K.gguf"},
{"stream", false}
};
std::string body_str = body.dump();
std::shared_ptr<relative_clock> rel_clock = phonebook_->lookup_impl<ILLIXR::relative_clock>();
if (!rel_clock) {
std::cerr << "Error: relative_clock not available\n";
return;
}
auto start_tp = rel_clock->now();
auto res = cli.Post("/completion", body_str, "application/json");
if (!res) {
std::cerr << "Error: http response empty." << std::endl;
}
if (res->status != 200) {
std::cerr << "Error: " << res->status << " - " << res->body << std::endl;
return;
}
auto end_tp = rel_clock->now();
auto dur = end_tp - start_tp; // clock_duration_ (nanoseconds)
int64_t duration_ns = static_cast<int64_t>(dur.count());
std::cerr << "Communication latency " << duration_ns << std::endl;
json response_json = json::parse(res->body);
std::cout << "Response: " << response_json["content"] << std::endl;
string_data data{response_json["content"]};
response_publisher_.put(std::make_shared<string_data>(data));
}
// json body = {
// {"prompt", prompt_scene_desc + scene_desc_str + event->value},
// {"model", "Mistral-7B-Instruct-v0.3.Q2_K.gguf"},
// {"stream", false}
// };
// std::string body_str = body.dump();
// auto res = cli.Post("/completion", body_str, "application/json");
// if (!res) {
// std::cerr << "Error: http response empty." << std::endl;
// }
// if (res->status != 200) {
// std::cerr << "Error: " << res->status << " - " << res->body << std::endl;
// return;
// }
// json response_json = json::parse(res->body);
// std::cout << "Response: " << response_json["content"] << std::endl;
// string_data data{response_json["content"]};
// response_publisher_.put(std::make_shared<string_data>(data));
return;
}
PLUGIN_MAIN(llama_cli)