-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcsv.hpp
More file actions
314 lines (282 loc) · 8.46 KB
/
csv.hpp
File metadata and controls
314 lines (282 loc) · 8.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
#pragma once
#include <csv.h>
#include <vector>
#include <iostream>
#include <boost/lexical_cast.hpp>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <typeinfo>
namespace csv {
namespace detail {
struct Result {
int number{0};
std::string message{"success"};
operator bool() const { return number == 0; }
};
class MappedFile {
private:
int fd_;
size_t size_;
char* data_;
public:
MappedFile(const std::string& filename) : fd_(0), size_(0), data_(NULL) {
fd_ = ::open(filename.c_str(), O_RDONLY);
if (fd_ < 0) {
set_error();
return;
}
struct stat statbuf;
if (::fstat(fd_, &statbuf) < 0) {
set_error();
return;
}
size_ = statbuf.st_size;
data_ = (char*)::mmap(NULL, size_, PROT_READ, MAP_PRIVATE, fd_, 0);
if (data_ == NULL) {
set_error();
return;
}
}
~MappedFile() {
::munmap(data_, size_);
::close(fd_);
}
const char* begin() const { return data_; }
const char* end() const { return data_ + size_; }
operator bool() { return status; }
detail::Result status;
private:
void set_error() {
status.number = errno;
status.message = strerror(errno);
}
};
// generates sequence numbers used in template expansion
namespace sequence {
template <int... Is>
struct index {};
template <int N, int... Is>
struct generate : generate<N - 1, N - 1, Is...> {};
template <int... Is>
struct generate<0, Is...> : index<Is...> {};
} // namespace sequence
namespace meta {
template <class F>
class fields;
// function pointer
template <class R, class... Args>
class fields<R (*)(Args...)> : public fields<R(Args...)> {};
// member function pointer
template <class C, class R, class... Args>
class fields<R (C::*)(Args...)> : public fields<R(Args...)> {};
// const member function pointer
template <class C, class R, class... Args>
class fields<R (C::*)(Args...) const> : public fields<R(Args...)> {};
// member object pointer
template <class C, class R>
class fields<R(C::*)> : public fields<R(C&)> {};
// functor
template <class F>
class fields : public fields<decltype(&F::operator())> {};
// reference
template <class F>
class fields<F&> : public fields<F> {};
// perfect reference
template <class F>
class fields<F&&> : public fields<F> {};
// impl
template <class R, class... Args>
class fields<R(Args...)> {
using mutator_t = std::function<void(const char* buf, size_t len)>;
public:
fields() {
setupFieldHandlers(
typename detail::sequence::generate<sizeof...(Args)>::index());
}
void accept_field(size_t field_pos, const char* buf, size_t len) {
if (field_pos < mutators.size()) {
mutators[field_pos](buf, len);
}
}
template <typename F>
void accept_row(F& sink) {
call_func(sink,
typename detail::sequence::generate<sizeof...(Args)>::index());
}
template <typename F, int... S>
void call_func(F& sink, detail::sequence::index<S...>) {
sink(std::get<S>(values)...);
}
private:
std::tuple<typename std::decay<Args>::type...> values;
std::vector<mutator_t> mutators;
private:
template <int... S>
void setupFieldHandlers(detail::sequence::index<S...>) {
setupFieldHandlers(std::get<S>(values)...);
}
template <typename F, typename... Fa>
void setupFieldHandlers(F& arg, Fa&... args) {
size_t field_num = mutators.size();
mutators.push_back([field_num, &arg](const char* buf, size_t len) {
if (len > 0) {
arg = boost::lexical_cast<F>(buf, len);
} else {
arg = F();
}
});
setupFieldHandlers(args...);
}
void setupFieldHandlers() {
// this is the terminal function for recursive template expansion
}
};
} // namespace meta
} // detail
/* A C++ wrapper around libcsv, see `make_parser` below */
struct filter_result {
bool drop;
constexpr filter_result(bool b) : drop(b) {}
operator bool() const { return drop; }
};
static constexpr filter_result ROW_DROP{true};
static constexpr filter_result ROW_OK{false};
template <typename F>
class CsvParser {
using this_type = CsvParser<F>;
public:
// return true if field should cause row to be ignored
using filter_function_type = std::function<
filter_result(size_t field_num, const char* buf, size_t len)>;
using error_callback_type = std::function<
filter_result(size_t line_number, size_t field_number,
const std::string& error_message, std::exception_ptr ex)>;
CsvParser(const F& sink) : sink{sink} { csv_init(&parser, 0); }
~CsvParser() { csv_free(&parser); }
//
void set_delim_char(unsigned char delim) { parser.delim_char = delim; }
void set_quote_char(unsigned char quote) { parser.quote_char = quote; }
void set_skip_header() { skip_row = true; }
void set_error_func(const error_callback_type func) { error_func = func; }
void set_comment_mark(const std::string& prefix) {
auto is_comment = [prefix](size_t field_num, const char* buf, size_t len) {
return field_num == 0 && //
len >= prefix.length() && //
std::equal(prefix.begin(), prefix.end(), buf);
};
return add_row_filter(is_comment);
}
/* Limitation: Fields are coerced to their types as they are
* encountered, so these filters can't prevent conversion by
* looking at data later in the same row. */
void add_row_filter(const filter_function_type filter) {
auto orig = filter_func;
filter_func = [orig, filter](size_t field_num, const char* buf,
size_t len) {
return orig(field_num, buf, len) || filter(field_num, buf, len);
};
}
//
bool ParseFile(const std::string& filename) {
detail::MappedFile data(filename);
if (!data) {
return status = data.status;
}
return Parse(data.begin(), data.end()) && Finish();
}
template <typename T>
bool Parse(const T& str) {
return Parse(str.data(), str.data() + str.length());
}
template <typename It>
bool Parse(const It& begin, const It& end) {
csv_parse(&parser, begin, end - begin, on_field, on_record, this);
return update_status();
}
template <typename IoStream>
bool ParseStream(IoStream& input) {
char buf[4096];
do {
input.read(buf, sizeof(buf));
csv_parse(&parser, buf, input.gcount(), on_field, on_record, this);
} while (input && update_status());
return Finish();
}
bool Finish() {
csv_fini(&parser, on_field, on_record, this);
return update_status();
}
const std::string& ErrorString() { return status.message; }
operator bool() { return status; }
private:
static void on_field(void* data, size_t len, void* this_ptr) {
this_type* t = reinterpret_cast<this_type*>(this_ptr);
t->accept_field((char*)data, len);
}
static void on_record(int, void* this_ptr) {
this_type* t = reinterpret_cast<this_type*>(this_ptr);
t->accept_row();
};
private:
detail::meta::fields<F> fields;
csv_parser parser;
const F& sink;
detail::Result status;
filter_function_type filter_func = [](size_t, const char*,
size_t) { return ROW_OK; };
error_callback_type error_func = [](size_t row, size_t column,
const std::string& err,
const std::exception_ptr) {
std::cerr << "[csv.hpp] Exception at row " << row << ", column " << column
<< ": " << err << "\n";
return ROW_DROP;
};
bool skip_row{false};
size_t current_line = 0;
size_t current_field = 0;
private:
void accept_field(const char* buf, size_t len) {
skip_row = skip_row || filter_func(current_field, buf, len);
if (!skip_row) {
try {
fields.accept_field(current_field, buf, len);
}
catch (std::exception& e) {
skip_row = error_func(current_line + 1, current_field + 1, e.what(),
std::current_exception());
}
}
++current_field;
}
void accept_row() {
if (!skip_row) {
fields.accept_row(sink);
} else {
skip_row = false;
}
current_field = 0;
++current_line;
}
const detail::Result& update_status() {
if (status.number == 0 && parser.status != 0) {
status.number = parser.status;
status.message = csv_error(&parser);
}
return status;
}
};
template <typename F>
CsvParser<F> make_parser(F&& f) {
return CsvParser<F>(f);
}
// used to ignore input fields
struct ignore {};
} // namespace csv
namespace boost {
template <>
inline csv::ignore lexical_cast(const char*, size_t) {
static csv::ignore instance;
return instance;
}
}