diff --git a/src/Column.hxx b/src/Column.hxx index 56d8db67..9669e954 100644 --- a/src/Column.hxx +++ b/src/Column.hxx @@ -7,27 +7,41 @@ namespace tablator { class Column { public: + // These constexprs are used by HDF5-support code. static constexpr char const *COL_ARRAY_SIZE = "array_size"; static constexpr char const *COL_FIELD_PROPERTIES = "field_properties"; static constexpr char const *COL_NAME = "name"; static constexpr char const *COL_TYPE = "type"; + static constexpr char const *COL_DYNAMIC_ARRAY_FLAG = "dynamic_array_flag"; - Column(const std::string &Name, const Data_Type &Type, const size_t &Array_size) - : Column(Name, Type, Array_size, Field_Properties()) {} + Column(const std::string &name, const Data_Type &type, const size_t &array_size, + const Field_Properties &field_properties, bool dynamic_array_flag) + : name_(name), + type_(type), + array_size_(array_size), + field_properties_(field_properties), + dynamic_array_flag_(dynamic_array_flag) {} - Column(const std::string &Name, const Data_Type &Type, const size_t &Array_size, - const Field_Properties &Field_properties) - : name_(Name), - type_(Type), - array_size_(Array_size), - field_properties_(Field_properties) {} + Column(const std::string &name, const Data_Type &type, const size_t &array_size, + const Field_Properties &field_properties) + : Column(name, type, array_size, field_properties, + ((type == Data_Type::CHAR) || + (array_size == std::numeric_limits::max()))) {} - inline size_t get_data_size() const { - return tablator::data_size(type_) * array_size_; - } + Column(const std::string &name, const Data_Type &type, const size_t &array_size, + bool dynamic_array_flag) + : Column(name, type, array_size, Field_Properties(), dynamic_array_flag) {} + + + Column(const std::string &name, const Data_Type &type, const size_t &array_size) + : Column(name, type, array_size, Field_Properties()) {} - // deprecated - inline size_t data_size() const { return get_data_size(); } + + Column(const std::string &name, const Data_Type &type, + const Field_Properties &field_properties) + : Column(name, type, 1, field_properties) {} + + Column(const std::string &name, const Data_Type &type) : Column(name, type, 1) {} // accessors inline const std::string &get_name() const { return name_; } @@ -63,7 +77,12 @@ public: inline ATTRIBUTES &get_field_property_attributes() { return get_field_properties().get_attributes(); } + inline bool get_dynamic_array_flag() const { return dynamic_array_flag_; } + inline void set_dynamic_array_flag(bool b) { dynamic_array_flag_ = b; } + inline size_t get_data_size() const { + return tablator::data_size(type_) * array_size_; + } private: std::string name_; @@ -72,6 +91,7 @@ private: // Actual array_size for fixed-length arrays; maximum array_size otherwise. size_t array_size_; Field_Properties field_properties_; + bool dynamic_array_flag_; }; diff --git a/src/Common.hxx b/src/Common.hxx index 5c239920..02f63af7 100644 --- a/src/Common.hxx +++ b/src/Common.hxx @@ -64,6 +64,7 @@ static const std::string PARAMREF("PARAMref"); static const std::string QUERY_STATUS("QUERY_STATUS"); static const std::string REF("ref"); static const std::string RESOURCE("RESOURCE"); +static const std::string STREAM("STREAM"); static const std::string TABLE("TABLE"); static const std::string TABLEDATA("TABLEDATA"); static const std::string TIMESYS("TIMESYS"); diff --git a/src/Data_Details.hxx b/src/Data_Details.hxx new file mode 100644 index 00000000..0d61812f --- /dev/null +++ b/src/Data_Details.hxx @@ -0,0 +1,72 @@ +#pragma once + +#include "Common.hxx" +#include "Field_Framework.hxx" +#include "Row.hxx" + +namespace tablator { + +class Data_Details { +public: + Data_Details(size_t row_size, size_t num_rows = 0) : row_size_(row_size) { + init(num_rows); + } + + Data_Details(const Field_Framework &field_framework, size_t num_rows = 0) + : Data_Details(field_framework.get_row_size(), num_rows) {} + + + void append_row(const Row &row) { + assert(row.get_data().size() == get_row_size()); + data_.reserve(data_.size() + row.get_data().size()); + data_.insert(data_.end(), row.get_data().begin(), row.get_data().end()); + } + + + void append_rows(const Data_Details &other) { + assert(other.get_row_size() == get_row_size()); + + data_.reserve(data_.size() + other.get_data().size()); + data_.insert(data_.end(), other.get_data().begin(), other.get_data().end()); + } + + void adjust_num_rows(const size_t new_num_rows) { + data_.resize(new_num_rows * get_row_size()); + } + + void reserve_rows(const size_t &new_num_rows) { + data_.reserve(get_row_size() * new_num_rows); + } + + // accessors + + size_t get_data_size() const { return data_.size(); } + + size_t get_num_rows() const { + if (get_row_size() == 0) { + return 0; + } + return get_data_size() / get_row_size(); + }; + + inline size_t get_row_size() const { return row_size_; } + + inline const std::vector &get_data() const { return data_; } + + // Non-const to support append_rows(). + inline std::vector &get_data() { return data_; } + + inline void set_data(const std::vector &d) { data_ = d; } + + +private: + void init(const size_t &new_num_rows) { + reserve_rows(new_num_rows); + } + + // Can't be const because of append_rows(). + std::vector data_; + size_t row_size_; +}; + +} // namespace tablator diff --git a/src/Data_Element.hxx b/src/Data_Element.hxx index 21c85a46..79b96a5f 100644 --- a/src/Data_Element.hxx +++ b/src/Data_Element.hxx @@ -1,37 +1,67 @@ #pragma once -#include -#include - #include "Column.hxx" - -// JTODO: Create struct to hold commonly used (columns, offsets) pair? +#include "Data_Details.hxx" +#include "Field_Framework.hxx" namespace tablator { class Data_Element { public: - Data_Element(const std::vector &columns, const std::vector &offsets, - const std::vector &data) - : columns_(columns), offsets_(offsets), data_(data) {} + Data_Element(const Field_Framework &field_framework, + const Data_Details &data_details) + : field_framework_(field_framework), data_details_(data_details) {} + + Data_Element(const Field_Framework &field_framework, size_t num_rows = 0) + : Data_Element(field_framework, Data_Details(field_framework, num_rows)) {} // accessors - inline const std::vector &get_columns() const { return columns_; } - inline std::vector &get_columns() { return columns_; } + const Field_Framework &get_field_framework() const { return field_framework_; } + + Field_Framework &get_field_framework() { return field_framework_; } + + + const Data_Details &get_data_details() const { return data_details_; } + + Data_Details &get_data_details() { return data_details_; } - inline const std::vector &get_offsets() const { return offsets_; } - inline std::vector &get_offsets() { return offsets_; } + inline const std::vector &get_columns() const { + return field_framework_.get_columns(); + } + // Non-const to allow query_server to modify field_properties. + inline std::vector &get_columns() { return field_framework_.get_columns(); } - inline const std::vector &get_data() const { return data_; } - inline std::vector &get_data() { return data_; } + inline const std::vector &get_offsets() const { + return field_framework_.get_offsets(); + } - inline void set_data(const std::vector &d) { data_ = d; } + size_t get_row_size() const { return field_framework_.get_row_size(); } + size_t get_num_rows() const { return data_details_.get_num_rows(); } + + void adjust_num_rows(const size_t &new_num_rows) { + data_details_.adjust_num_rows(new_num_rows); + } + + void reserve_rows(const size_t &new_num_rows) { + get_data_details().reserve_rows(new_num_rows); + } + + inline const std::vector &get_data() const { + return data_details_.get_data(); + } + + // Non-const to support append_rows(). + inline std::vector &get_data() { return data_details_.get_data(); } + + inline void set_data(const std::vector &d) { data_details_.set_data(d); } private: - std::vector columns_; - std::vector offsets_ = {0}; - std::vector data_; + // Non-const to allow query_server to update column's field_properties. + Field_Framework field_framework_; + + // Non-const because of append_rows(). + Data_Details data_details_; }; } // namespace tablator diff --git a/src/Field_Framework.hxx b/src/Field_Framework.hxx new file mode 100644 index 00000000..0afebdd9 --- /dev/null +++ b/src/Field_Framework.hxx @@ -0,0 +1,95 @@ +#pragma once + +#include "Column.hxx" +#include "Utils/Null_Utils.hxx" + +namespace tablator { + +class Field_Framework { +public: + Field_Framework(const std::vector &incoming_columns, + bool got_null_bitfields_column = false) + : offsets_({0}) { + if ((got_null_bitfields_column && incoming_columns.size() == 1) || + incoming_columns.empty()) { + throw std::runtime_error( + "Field_Framework constructor: must be " + "non-empty."); + } + // JTODO avoid copies + if (!got_null_bitfields_column) { + const size_t null_flags_size = bits_to_bytes(incoming_columns.size() + 1); + append_column(null_bitfield_flags_name, Data_Type::UINT8_LE, + null_flags_size, + Field_Properties::Builder() + .add_description(null_bitfield_flags_description) + .build()); + } + + for (auto &col : incoming_columns) { + append_column(col); + } + } + + // accessors + + inline size_t get_row_size() const { + if (offsets_.empty()) { + throw std::runtime_error(" is empty"); + } + return offsets_.back(); + } + + inline const std::vector &get_columns() const { return columns_; } + inline std::vector &get_columns() { return columns_; } + + inline const std::vector &get_offsets() const { return offsets_; } + inline std::vector &get_offsets() { return offsets_; } + +private: + void append_column(const Column &column) { + columns_.emplace_back(column); + size_t old_row_size = *offsets_.rbegin(); + size_t new_row_size = old_row_size + columns_.rbegin()->get_data_size(); + offsets_.push_back(new_row_size); + } + + inline void append_column(const std::string &name, const Data_Type &type, + const size_t &array_size, + const Field_Properties &field_properties, + bool dynamic_array_flag) { + append_column( + Column(name, type, array_size, field_properties, dynamic_array_flag)); + } + + inline void append_column(const std::string &name, const Data_Type &type, + const size_t &array_size, + const Field_Properties &field_properties) { + append_column(Column(name, type, array_size, field_properties)); + } + + inline void append_column(const std::string &name, const Data_Type &type, + const size_t &array_size, bool dynamic_array_flag) { + append_column(Column(name, type, array_size, dynamic_array_flag)); + } + + inline void append_column(const std::string &name, const Data_Type &type, + const size_t &size) { + append_column(Column(name, type, size)); + } + + inline void append_column(const std::string &name, const Data_Type &type, + const Field_Properties &field_properties) { + append_column(Column(name, type, field_properties)); + } + + + inline void append_column(const std::string &name, const Data_Type &type) { + append_column(Column(name, type)); + } + + std::vector columns_; + std::vector offsets_; +}; + +} // namespace tablator diff --git a/src/Property.hxx b/src/Property.hxx index 8c375541..e4d98745 100644 --- a/src/Property.hxx +++ b/src/Property.hxx @@ -21,16 +21,6 @@ public: Property(const std::string &Value, const ATTRIBUTES &Attributes) : attributes_(Attributes), value_(Value) {} - // Called internally, directly or otherwise, only by flatten_properties(). - std::vector flatten(const std::string &key) const { - std::vector result; - result.push_back(std::make_pair(key, value_)); - for (auto &a : attributes_) - result.push_back( - std::make_pair(key + "." + XMLATTR_DOT + a.first, a.second)); - return result; - } - const ATTRIBUTES &get_attributes() const { return attributes_; } ATTRIBUTES &get_attributes() { return attributes_; } diff --git a/src/Resource_Element.hxx b/src/Resource_Element.hxx index 159c1b99..1389c99f 100644 --- a/src/Resource_Element.hxx +++ b/src/Resource_Element.hxx @@ -373,9 +373,28 @@ public: return get_table_elements().at(MAIN_TABLE_ELEMENT_IDX); } + const Field_Framework &get_field_framework() const { + return get_main_table_element().get_field_framework(); + } + + Field_Framework &get_field_framework() { + return get_main_table_element().get_field_framework(); + } + + + const Data_Details &get_data_details() const { + return get_main_table_element().get_data_details(); + } + + Data_Details &get_data_details() { + return get_main_table_element().get_data_details(); + } + + const std::vector &get_columns() const { return get_main_table_element().get_columns(); } + std::vector &get_columns() { return get_main_table_element().get_columns(); } @@ -383,13 +402,23 @@ public: const std::vector &get_offsets() const { return get_main_table_element().get_offsets(); } - std::vector &get_offsets() { - return get_main_table_element().get_offsets(); + + size_t get_row_size() const { return get_main_table_element().get_row_size(); } + size_t get_num_rows() const { return get_main_table_element().get_num_rows(); } + + // called by query_server to trim result set + void adjust_num_rows(const size_t &new_num_rows) { + get_main_table_element().adjust_num_rows(new_num_rows); + } + + void reserve_rows(const size_t &new_num_rows) { + get_main_table_element().reserve_rows(new_num_rows); } std::vector &get_table_element_params() { return get_main_table_element().get_params(); } + const std::vector &get_table_element_params() const { return get_main_table_element().get_params(); } diff --git a/src/Row.hxx b/src/Row.hxx index 6c1fffdc..6d450ee1 100644 --- a/src/Row.hxx +++ b/src/Row.hxx @@ -3,60 +3,62 @@ #include #include +#include "Common.hxx" #include "Data_Type.hxx" -#include "unsafe_copy_to_row.hxx" namespace tablator { class Row { public: - // JTODO Update tablator clients to call get_data() and then make data a private - // class member. - std::vector data; + Row(const size_t &data_size) : data_(data_size) {} - Row(const size_t &size) : data(size) {} + void fill_with_zeros() { std::fill(data_.begin(), data_.end(), 0); } - void fill_with_zeros() { std::fill(data.begin(), data.end(), 0); } - - // backward compatibility - void set_zero() { fill_with_zeros(); } - - void set_null(const Data_Type &type, const size_t &array_size, - const size_t &col_idx, const size_t &offset, - const size_t &offset_end); + void insert_null(Data_Type type, const size_t &array_size, const size_t &col_idx, + const size_t &offset, const size_t &offset_end); template void insert(const T &element, const size_t &offset) { - assert(offset + sizeof(T) <= data.size()); - unsafe_copy_to_row(element, offset, data.data()); + assert(offset + sizeof(T) <= data_.size()); + std::copy(reinterpret_cast(&element), + reinterpret_cast(&element) + sizeof(T), + data_.data() + offset); } template void insert(const T &begin, const T &end, const size_t &offset) { - assert(offset + std::distance(begin, end) <= data.size()); - std::copy(begin, end, data.data() + offset); + assert(offset + std::distance(begin, end) <= data_.size()); + std::copy(begin, end, data_.data() + offset); } + void insert(const std::string &element, const size_t &offset_begin, const size_t &offset_end) { std::string element_copy(element); element_copy.resize(offset_end - offset_begin, '\0'); - std::copy(element_copy.begin(), element_copy.end(), data.data() + offset_begin); + std::copy(element_copy.begin(), element_copy.end(), + data_.data() + offset_begin); } + + void insert_from_ascii(const std::string &element, const Data_Type &data_type, + const size_t &array_size, const size_t &column, + const size_t &offset, const size_t &offset_end); + + + size_t get_size() const { return data_.size(); } + + const std::vector &get_data() const { return data_; } + std::vector &get_data() { return data_; } + +private: template - void insert_null(const size_t &offset) { + void insert_null_internal(const size_t &offset) { insert(tablator::get_null(), offset); } - size_t get_size() const { return data.size(); } - // Deprecated - size_t size() const { return get_size(); } + void insert_null_by_type(Data_Type data_type, const size_t &offset); - const std::vector &get_data() const { return data; } - std::vector &get_data() { return data; } - -private: - void set_null_internal(const Data_Type &data_type, const size_t &offset); + std::vector data_; }; diff --git a/src/Utils/Table_Utils/insert_ascii_in_row.cxx b/src/Row/insert_from_ascii.cxx similarity index 70% rename from src/Utils/Table_Utils/insert_ascii_in_row.cxx rename to src/Row/insert_from_ascii.cxx index ff5033c6..f732964f 100644 --- a/src/Utils/Table_Utils/insert_ascii_in_row.cxx +++ b/src/Row/insert_from_ascii.cxx @@ -3,13 +3,13 @@ #include #include -#include "../../data_size.hxx" -#include "../Table_Utils.hxx" +#include "../Row.hxx" +#include "../data_size.hxx" namespace tablator { -void insert_ascii_in_row(Row &row, const Data_Type &data_type, const size_t &array_size, - const size_t &column, const std::string &element, - const size_t &offset, const size_t &offset_end) { +void Row::insert_from_ascii(const std::string &element, const Data_Type &data_type, + const size_t &array_size, const size_t &col_idx, + const size_t &offset, const size_t &offset_end) { if (array_size != 1 && data_type != Data_Type::CHAR) { std::vector elements; boost::split(elements, element, boost::is_any_of(" ")); @@ -22,15 +22,15 @@ void insert_ascii_in_row(Row &row, const Data_Type &data_type, const size_t &arr auto element_offset = offset; auto element_size = data_size(data_type); for (auto &e : elements) { - insert_ascii_in_row(row, data_type, 1, column, e, element_offset, - element_offset + element_size); + insert_from_ascii(e, data_type, 1, col_idx, element_offset, + element_offset + element_size); element_offset += element_size; } } else { switch (data_type) { case Data_Type::INT8_LE: if (element == "?" || element == " " || element[0] == '\0') { - row.set_null(data_type, array_size, column, offset, offset_end); + insert_null(data_type, array_size, col_idx, offset, offset_end); } else { bool result = (boost::iequals(element, "true") || boost::iequals(element, "t") || element == "1"); @@ -38,7 +38,7 @@ void insert_ascii_in_row(Row &row, const Data_Type &data_type, const size_t &arr boost::iequals(element, "f") || element == "0")) { throw std::exception(); } - row.insert(static_cast(result), offset); + insert(static_cast(result), offset); } break; case Data_Type::UINT8_LE: { @@ -47,55 +47,54 @@ void insert_ascii_in_row(Row &row, const Data_Type &data_type, const size_t &arr if (result > std::numeric_limits::max() || result < std::numeric_limits::lowest()) throw std::exception(); - row.insert(static_cast(result), offset); + insert(static_cast(result), offset); } break; case Data_Type::INT16_LE: { int result = boost::lexical_cast(element); if (result > std::numeric_limits::max() || result < std::numeric_limits::lowest()) throw std::exception(); - row.insert(static_cast(result), offset); + insert(static_cast(result), offset); } break; case Data_Type::UINT16_LE: { int result = boost::lexical_cast(element); if (result > std::numeric_limits::max() || result < std::numeric_limits::lowest()) throw std::exception(); - row.insert(static_cast(result), offset); + insert(static_cast(result), offset); } break; case Data_Type::INT32_LE: { long result = boost::lexical_cast(element); if (result > std::numeric_limits::max() || result < std::numeric_limits::lowest()) throw std::exception(); - row.insert(static_cast(result), offset); + insert(static_cast(result), offset); } break; case Data_Type::UINT32_LE: { long result = boost::lexical_cast(element); if (result > std::numeric_limits::max() || result < std::numeric_limits::lowest()) throw std::exception(); - row.insert(static_cast(result), offset); + insert(static_cast(result), offset); } break; case Data_Type::INT64_LE: - row.insert(boost::lexical_cast(element), offset); + insert(boost::lexical_cast(element), offset); break; case Data_Type::UINT64_LE: - row.insert(boost::lexical_cast(element), offset); + insert(boost::lexical_cast(element), offset); break; case Data_Type::FLOAT32_LE: - row.insert(boost::lexical_cast(element), offset); + insert(boost::lexical_cast(element), offset); break; case Data_Type::FLOAT64_LE: - row.insert(boost::lexical_cast(element), offset); + insert(boost::lexical_cast(element), offset); break; case Data_Type::CHAR: - row.insert(element, offset, offset_end); + insert(element, offset, offset_end); break; default: - throw std::runtime_error( - "Unknown data type in insert_ascii_in_row(): " + - to_string(data_type)); + throw std::runtime_error("Unknown data type in insert_from_ascii(): " + + to_string(data_type)); } } } diff --git a/src/Row/set_null.cxx b/src/Row/insert_null.cxx similarity index 55% rename from src/Row/set_null.cxx rename to src/Row/insert_null.cxx index 76696bd1..ec45a8e4 100644 --- a/src/Row/set_null.cxx +++ b/src/Row/insert_null.cxx @@ -3,21 +3,21 @@ #include "../Row.hxx" #include "../data_size.hxx" -void tablator::Row::set_null(const Data_Type &data_type, const size_t &array_size, - const size_t &col_idx, const size_t &offset, - const size_t &offset_end) { +void tablator::Row::insert_null(Data_Type data_type, const size_t& array_size, + const size_t& col_idx, const size_t& offset, + const size_t& offset_end) { const int byte = (col_idx - 1) / 8; const char mask = (128 >> ((col_idx - 1) % 8)); // Update the null_bitfield_flag's bit for this column. - data[byte] = data[byte] | mask; + data_[byte] = data_[byte] | mask; size_t curr_offset = offset; size_t data_type_size = data_size(data_type); // Mark the indicated array elements as null. for (size_t i = 0; i < array_size; ++i) { - set_null_internal(data_type, curr_offset); + insert_null_by_type(data_type, curr_offset); curr_offset += data_type_size; if (curr_offset >= offset_end) { // Shouldn't happen. @@ -26,44 +26,42 @@ void tablator::Row::set_null(const Data_Type &data_type, const size_t &array_siz } } -void tablator::Row::set_null_internal(const Data_Type &data_type, - const size_t &offset) { +void tablator::Row::insert_null_by_type(Data_Type data_type, const size_t& offset) { switch (data_type) { case Data_Type::INT8_LE: - insert_null(offset); + insert_null_internal(offset); break; case Data_Type::UINT8_LE: - insert_null(offset); + insert_null_internal(offset); break; case Data_Type::INT16_LE: - insert_null(offset); + insert_null_internal(offset); break; case Data_Type::UINT16_LE: - insert_null(offset); + insert_null_internal(offset); break; case Data_Type::INT32_LE: - insert_null(offset); + insert_null_internal(offset); break; case Data_Type::UINT32_LE: - insert_null(offset); + insert_null_internal(offset); break; case Data_Type::INT64_LE: - insert_null(offset); + insert_null_internal(offset); break; case Data_Type::UINT64_LE: - insert_null(offset); + insert_null_internal(offset); break; case Data_Type::FLOAT32_LE: - insert_null(offset); + insert_null_internal(offset); break; case Data_Type::FLOAT64_LE: - insert_null(offset); + insert_null_internal(offset); break; case Data_Type::CHAR: insert('\0', offset); break; default: - throw std::runtime_error( - "Unexpected data type in tablator::Row::set_null()"); + throw std::runtime_error("Unexpected data type in insert_null_by_type()"); } } diff --git a/src/Table.hxx b/src/Table.hxx index 9dae9f57..0936e034 100644 --- a/src/Table.hxx +++ b/src/Table.hxx @@ -195,12 +195,16 @@ public: // constructors Table(const std::vector &Columns, - const std::map &property_map); + const std::map &property_map, + bool got_null_bitfields_column = false, size_t num_rows = 0); Table(const std::vector &Columns, - const tablator::Labeled_Properties &property_pair_vec); - Table(const std::vector &Columns) - : Table(Columns, std::map()) {} + const tablator::Labeled_Properties &property_pair_vec, + bool got_null_bitfields_column = false, size_t num_rows = 0); + Table(const std::vector &Columns, bool got_null_bitfields_column = false, + size_t num_rows = 0) + : Table(Columns, std::map(), + got_null_bitfields_column, num_rows) {} Table(const boost::filesystem::path &input_path, const Format &format); Table(const boost::filesystem::path &input_path) { read_unknown(input_path); } @@ -241,20 +245,15 @@ public: } size_t get_column_offset(size_t col_idx) const { - const auto &columns = get_columns(); validate_column_index(col_idx); return get_offsets().at(col_idx); } size_t get_column_offset(const std::string &name) const { - auto col_idx = column_index(name); + auto col_idx = get_column_index(name); return get_offsets().at(col_idx); } - // JTODO deprecated - size_t column_index(const std::string &name) const { - return get_column_index(name); - } size_t column_offset(size_t col_idx) const { return get_column_offset(col_idx); } size_t column_offset(const std::string &name) const { return get_column_offset(name); @@ -265,7 +264,7 @@ public: std::vector col_ids; for (const std::string &col_name : col_names) { - size_t col_id = column_index(col_name); + size_t col_id = get_column_index(col_name); col_ids.emplace_back(col_id); } return col_ids; @@ -297,15 +296,8 @@ public: // table modifiers - void append_row(const Row &row) { - assert(row.data.size() == get_row_size()); - tablator::append_row(get_data(), row); - } - - void unsafe_append_row(const char *row) { - tablator::unsafe_append_row(get_data(), row, get_row_size()); - } - + // query_server and ZTF-mtc-utils call this function. + void append_row(const Row &row) { get_data_details().append_row(row); } void append_rows(const Table &table2); @@ -319,9 +311,6 @@ public: const Command_Line_Options &options = default_options) const { write(path, Format(path), options); } - void write_hdf5(std::ostream &os) const; - void write_hdf5(const boost::filesystem::path &p) const; - void write_ipac_table(std::ostream &os, const Command_Line_Options &options = default_options) const { Ipac_Table_Writer::write(*this, os, options); @@ -365,7 +354,7 @@ public: std::ostream &os, const std::vector &column_ids, const Command_Line_Options options = default_options) const { Ipac_Table_Writer::write_subtable_by_column_and_row(*this, os, column_ids, 0, - num_rows(), options); + get_num_rows(), options); } void write_single_ipac_record(std::ostream &os, size_t row_idx, @@ -411,10 +400,6 @@ public: requested_row_ids, options); } - - void write_dsv(std::ostream &os, const char &separator, - const Command_Line_Options &options = default_options) const; - void write_sql_create_table(std::ostream &os, const std::string &table_name, const Format::Enums &sql_type) const { using namespace std::string_literals; @@ -462,63 +447,6 @@ public: void write_sqlite_db(const boost::filesystem::path &path, const Command_Line_Options &options) const; - void write_fits(std::ostream &os) const; - - void write_fits(const boost::filesystem::path &filename) const; - - void write_fits(fitsfile *fits_file) const; - - void write_tabledata(std::ostream &os, const Format::Enums &output_format, - const Command_Line_Options &options) const; - - void write_html(std::ostream &os, const Command_Line_Options &options) const; - - boost::property_tree::ptree generate_property_tree() const; - - void read_unknown(const boost::filesystem::path &path); - void read_unknown(std::istream &input_stream); - void read_ipac_table(std::istream &input_stream); - void read_ipac_table(const boost::filesystem::path &path) { - boost::filesystem::ifstream input_stream(path); - read_ipac_table(input_stream); - } - void read_fits(const boost::filesystem::path &path); - void read_hdf5(const boost::filesystem::path &path); - void read_json5(std::istream &input_stream); - void read_json5(const boost::filesystem::path &path) { - boost::filesystem::ifstream input_stream(path); - read_json5(input_stream); - } - void read_json(std::istream &input_stream); - void read_json(const boost::filesystem::path &path) { - boost::filesystem::ifstream input_stream(path); - read_json(input_stream); - } - - void read_votable(std::istream &input_stream); - void read_votable(const boost::filesystem::path &path) { - boost::filesystem::ifstream input_stream(path); - read_votable(input_stream); - } - - void read_dsv(std::istream &input_stream, const Format &format); - void read_dsv(const boost::filesystem::path &path, const Format &format) { - if (path == "-") { - read_dsv(std::cin, format); - } else { - boost::filesystem::ifstream input_stream(path); - read_dsv(input_stream, format); - } - } - - void read_dsv_rows(const std::list> &dsv) { - set_data(read_dsv_rows(get_columns(), get_offsets(), dsv)); - } - - void set_column_info(std::list> &dsv) { - set_column_info(get_columns(), get_offsets(), dsv); - }; - // Following the VOTable convention, we use the most significant // bit for the first column. @@ -562,7 +490,7 @@ public: template std::vector extract_value(const std::string &col_name, size_t row_idx) const { - auto col_idx = column_index(col_name); + auto col_idx = get_column_index(col_name); return extract_value(col_idx, row_idx); } @@ -608,7 +536,7 @@ public: template std::vector extract_column(const std::string &col_name) const { - auto col_idx = column_index(col_name); + auto col_idx = get_column_index(col_name); return extract_column(col_idx); } @@ -621,7 +549,7 @@ public: const auto &columns = get_columns(); auto &column = columns[col_idx]; - size_t row_count = num_rows(); + size_t row_count = get_num_rows(); std::vector col_vec; col_vec.reserve(row_count * column.get_array_size()); for (size_t curr_row_idx = 0; curr_row_idx < row_count; ++curr_row_idx) { @@ -651,8 +579,6 @@ public: get_columns().at(col_idx).get_array_size()); } - -public: void insert_string_column_value_into_row(Row &row, size_t col_idx, const uint8_t *data_ptr, uint32_t curr_array_size) const; @@ -677,23 +603,14 @@ public: get_columns().at(col_idx).get_array_size()); } - // accessors - - size_t get_row_size() const { return tablator::get_row_size(get_offsets()); } - size_t get_num_rows() const { - return tablator::get_num_rows(get_offsets(), get_data()); + void winnow_rows(const std::set &selected_row_idx_list) { + tablator::winnow_rows(get_data(), selected_row_idx_list, + get_num_rows(), get_row_size()); } - size_t get_num_columns() const { return get_columns().size(); } - // called by query_server to trim result set - void resize_data(const size_t &new_num_rows) { - tablator::resize_data(get_data(), new_num_rows, get_row_size()); - } + // accessors - // deprecated - inline void resize_rows(const size_t &new_num_rows) { resize_data(new_num_rows); } - size_t row_size() const { return get_row_size(); } - size_t num_rows() const { return get_num_rows(); } + size_t get_num_columns() const { return get_columns().size(); } //=========================================================== @@ -757,21 +674,56 @@ public: return get_resource_elements().at(get_results_resource_idx()); } + + Field_Framework &get_field_framework() { + return get_results_resource_element().get_field_framework(); + } + + const Field_Framework &get_field_framework() const { + return get_results_resource_element().get_field_framework(); + } + + Data_Details &get_data_details() { + return get_results_resource_element().get_data_details(); + } + + const Data_Details &get_data_details() const { + return get_results_resource_element().get_data_details(); + } + + void adjust_num_rows(const size_t &new_num_rows) { + get_results_resource_element().adjust_num_rows(new_num_rows); + } + + + // deprecated + // called by query_server to trim result set + void resize_data(const size_t &new_num_rows) { + get_results_resource_element().adjust_num_rows(new_num_rows); + } + + // Non-const to allow query_server to modify Field_Properties. std::vector &get_columns() { return get_results_resource_element().get_columns(); } + const std::vector &get_columns() const { return get_results_resource_element().get_columns(); } - std::vector &get_offsets() { + const std::vector &get_offsets() const { return get_results_resource_element().get_offsets(); } - const std::vector &get_offsets() const { - return get_results_resource_element().get_offsets(); + size_t get_row_size() const { + return get_results_resource_element().get_row_size(); + } + + size_t get_num_rows() const { + return get_results_resource_element().get_num_rows(); } + Labeled_Properties &get_resource_element_labeled_properties() { return get_results_resource_element().get_labeled_properties(); } @@ -787,6 +739,7 @@ public: std::vector &get_table_element_params() { return get_results_resource_element().get_table_element_params(); } + const std::vector &get_table_element_params() const { return get_results_resource_element().get_table_element_params(); } @@ -794,10 +747,12 @@ public: std::vector &get_table_element_fields() { return get_results_resource_element().get_table_element_fields(); } + const std::vector &get_table_element_fields() const { return get_results_resource_element().get_table_element_fields(); } + // Non-const to support append_row(). std::vector &get_data() { return get_results_resource_element().get_data(); } @@ -806,12 +761,10 @@ public: return get_results_resource_element().get_data(); } - Table_Element &get_main_table_element() { return get_results_resource_element().get_main_table_element(); } - const Table_Element &get_main_table_element() const { return get_results_resource_element().get_main_table_element(); } @@ -932,6 +885,62 @@ public: private: + void write_hdf5(std::ostream &os) const; + void write_hdf5(const boost::filesystem::path &p) const; + + void write_dsv(std::ostream &os, const char &separator, + const Command_Line_Options &options = default_options) const; + + void write_fits(std::ostream &os) const; + + void write_fits(const boost::filesystem::path &filename) const; + + void write_fits(fitsfile *fits_file) const; + + void write_tabledata(std::ostream &os, const Format::Enums &output_format, + const Command_Line_Options &options) const; + + void write_html(std::ostream &os, const Command_Line_Options &options) const; + + boost::property_tree::ptree generate_property_tree() const; + + void read_unknown(const boost::filesystem::path &path); + void read_unknown(std::istream &input_stream); + void read_ipac_table(std::istream &input_stream); + void read_ipac_table(const boost::filesystem::path &path) { + boost::filesystem::ifstream input_stream(path); + read_ipac_table(input_stream); + } + void read_fits(const boost::filesystem::path &path); + void read_hdf5(const boost::filesystem::path &path); + void read_json5(std::istream &input_stream); + void read_json5(const boost::filesystem::path &path) { + boost::filesystem::ifstream input_stream(path); + read_json5(input_stream); + } + void read_json(std::istream &input_stream); + void read_json(const boost::filesystem::path &path) { + boost::filesystem::ifstream input_stream(path); + read_json(input_stream); + } + + void read_votable(std::istream &input_stream); + void read_votable(const boost::filesystem::path &path) { + boost::filesystem::ifstream input_stream(path); + read_votable(input_stream); + } + + void read_dsv(std::istream &input_stream, const Format &format); + void read_dsv(const boost::filesystem::path &path, const Format &format) { + if (path == "-") { + read_dsv(std::cin, format); + } else { + boost::filesystem::ifstream input_stream(path); + read_dsv(input_stream, format); + } + } + + std::vector get_column_widths(const Command_Line_Options &options) const { return Ipac_Table_Writer::get_column_widths(*this, options); } @@ -976,90 +985,31 @@ private: options); } - - void reserve_data(const size_t &new_num_rows) { - tablator::reserve_data(get_data(), new_num_rows, get_row_size()); - } - // helpers for reading - - // WARNING: The private append_column() routines do not increase - // the size of the null column. The expectation is that the - // number of columns is known before adding columns. - void append_column(const std::string &name, const Data_Type &type) { - append_column(name, type, 1); - } - void append_column(const std::string &name, const Data_Type &type, - const size_t &size) { - append_column(name, type, size, Field_Properties()); - } - - void append_column(const std::string &name, const Data_Type &type, - const size_t &size, const Field_Properties &field_properties) { - append_column(Column(name, type, size, field_properties)); - } - - void append_column(const Column &column) { - tablator::append_column(get_columns(), get_offsets(), column); - } - - size_t read_ipac_header(std::istream &ipac_file, std::array, 4> &Columns, std::vector &ipac_table_offsets, Labeled_Properties &labeled_resource_properties); - void create_types_from_ipac_headers( - std::array, 4> &Columns, - const std::vector &ipac_column_widths) { - create_types_from_ipac_headers(get_columns(), get_offsets(), Columns, - ipac_column_widths); - } - - void append_ipac_data_member(const std::string &name, const std::string &data_type, - const size_t &size) { - append_ipac_data_member(get_columns(), get_offsets(), name, data_type, size); - } - - void shrink_ipac_string_columns_to_fit(const std::vector &column_widths) { - shrink_ipac_string_columns_to_fit(get_columns(), get_offsets(), get_data(), - column_widths); - }; - - static void append_ipac_data_member(std::vector &columns, - std::vector &offsets, const std::string &name, const std::string &data_type, const size_t &size); - static void create_types_from_ipac_headers( - std::vector &columns, std::vector &offsets, + static Field_Framework create_types_from_ipac_headers( const std::array, 4> &ipac_columns, const std::vector &ipac_column_widths); static void shrink_ipac_string_columns_to_fit( - std::vector &columns, std::vector &offsets, - std::vector &data, const std::vector &column_widths); - - - // miscellaneous helpers for reading - - - static std::vector read_dsv_rows( - std::vector &columns, std::vector &offsets, - const std::list> &dsv); - + Field_Framework &field_framework, Data_Details &data_details, + const std::vector &column_widths); - // used only for read_dsv()? - static void set_column_info(std::vector &columns, - std::vector &offsets, - std::list> &dsv); + static Data_Details read_dsv_rows(Field_Framework &field_framework, + const std::list> &dsv); - // This function is not used internally. - static std::vector flatten_properties( - const Labeled_Properties &properties); + // used only for read_dsv() + static Field_Framework set_column_info(std::list> &dsv); Table(const std::vector &resource_elements, const Options &options) diff --git a/src/Table/Column_Row_Accessors.cxx b/src/Table/Column_Row_Accessors.cxx index 36eae894..c80a8088 100644 --- a/src/Table/Column_Row_Accessors.cxx +++ b/src/Table/Column_Row_Accessors.cxx @@ -14,7 +14,7 @@ const uint8_t *tablator::Table::extract_value_ptr(size_t col_idx, if (col_idx >= columns.size()) { throw std::runtime_error("Invalid column index: " + std::to_string(col_idx)); } - if (row_idx >= num_rows()) { + if (row_idx >= get_num_rows()) { throw std::runtime_error("Invalid row index: " + std::to_string(row_idx)); } @@ -24,7 +24,7 @@ const uint8_t *tablator::Table::extract_value_ptr(size_t col_idx, std::string tablator::Table::extract_value_as_string( const std::string &col_name, size_t row_idx, const Command_Line_Options &options) const { - size_t col_idx = column_index(col_name); // throws if col_name is invalid + size_t col_idx = get_column_index(col_name); // throws if col_name is invalid return extract_value_as_string(col_idx, row_idx, options); } @@ -41,7 +41,7 @@ std::string tablator::Table::extract_value_as_string( throw std::runtime_error("Invalid row index: " + std::to_string(row_idx)); } - size_t curr_row_offset = row_idx * row_size(); + size_t curr_row_offset = row_idx * get_row_size(); auto &column = columns[col_idx]; if (is_null_value(row_idx, col_idx)) { auto &null_value = column.get_field_properties().get_values().null; @@ -61,7 +61,7 @@ std::string tablator::Table::extract_value_as_string( std::vector tablator::Table::extract_column_values_as_strings( const std::string &col_name, const Command_Line_Options &options) const { - size_t col_idx = column_index(col_name); // throws if col_name is invalid + size_t col_idx = get_column_index(col_name); // throws if col_name is invalid std::vector col_vals; @@ -127,8 +127,8 @@ void tablator::Table::insert_null_into_row(tablator::Row &row, size_t col_idx, uint32_t array_size) const { validate_parameters(row, *this, col_idx, 0 /* elt_idx */, array_size); const auto &column = get_columns().at(col_idx); - row.set_null(column.get_type(), sizeof(uint32_t), col_idx, - get_offsets().at(col_idx), get_offsets().at(col_idx + 1)); + row.insert_null(column.get_type(), sizeof(uint32_t), col_idx, + get_offsets().at(col_idx), get_offsets().at(col_idx + 1)); } @@ -138,7 +138,6 @@ void tablator::Table::insert_array_element_into_row(tablator::Row &row, size_t c size_t elt_idx, const uint8_t *data_ptr) const { validate_parameters(row, *this, col_idx, elt_idx, 1 /* num_elements_to_insert */); - const auto &column = get_columns().at(col_idx); insert_blob_to_row_internal(row, *this, col_idx, elt_idx, data_ptr, 1); } //=============================================================== diff --git a/src/Table/Table.cxx b/src/Table/Table.cxx index 6cbfe2f5..b2a057f2 100644 --- a/src/Table/Table.cxx +++ b/src/Table/Table.cxx @@ -53,76 +53,17 @@ void append_column_attributes_with_label( namespace tablator { -void Table::append_rows(const Table &table2) { - assert(table2.get_row_size() == get_row_size()); - - size_t num_columns = get_num_columns(); - assert(table2.get_num_columns() == num_columns); - - const auto &columns = get_columns(); - const auto &table2_columns = table2.get_columns(); - - const auto &offsets = get_offsets(); - const auto &table2_offsets = table2.get_offsets(); - - - for (size_t col_idx = 0; col_idx < num_columns; ++col_idx) { - const auto &column = columns.at(col_idx); - const auto &table2_column = table2_columns.at(col_idx); - if (column.get_name() != table2_column.get_name()) { - throw std::runtime_error("Column names differ at index " + - std::to_string(col_idx)); - } - if (column.get_type() != table2_column.get_type()) { - throw std::runtime_error("Column types differ at index " + - std::to_string(col_idx)); - } - - if (offsets.at(col_idx) != table2_offsets.at(col_idx)) { - throw std::runtime_error("Offsets differ at index " + - std::to_string(col_idx)); - } - if (col_idx == num_columns - 1) { - // Check final offset value - if (offsets.at(col_idx + 1) != table2_offsets.at(col_idx + 1)) { - throw std::runtime_error("Final offset values differ."); - } - } - } - - tablator::append_rows(get_data(), table2.get_data()); -} - - -Table_Element load_columns_and_offsets(const std::vector &columns) { - if (columns.empty()) { - throw std::runtime_error("This table has no columns"); - } - - std::vector tabledata_columns; - std::vector tabledata_offsets = {0}; - - const size_t null_flags_size = bits_to_bytes(columns.size()); - tablator::append_column(tabledata_columns, tabledata_offsets, - null_bitfield_flags_name, Data_Type::UINT8_LE, - null_flags_size, - Field_Properties::Builder() - .add_description(null_bitfield_flags_description) - .build()); - - for (auto &c : columns) { - tablator::append_column(tabledata_columns, tabledata_offsets, c); - } - - return Table_Element::Builder(tabledata_columns, tabledata_offsets, - std::vector() /* data */) - .build(); -} - +// ========================================================= +// Implementation of Table member functions +// ========================================================= Table::Table(const std::vector &columns, - const std::map &property_map) { - add_resource_element(load_columns_and_offsets(columns)); + const std::map &property_map, + bool got_null_bitfields_column, size_t num_rows) { + add_resource_element( + Table_Element::Builder(Field_Framework(columns, got_null_bitfields_column), + num_rows) + .build()); for (auto &p : property_map) { add_labeled_property(p.first, Property(p.second)); @@ -131,9 +72,13 @@ Table::Table(const std::vector &columns, Table::Table(const std::vector &columns, - const Labeled_Properties &property_pair_vec) + const Labeled_Properties &property_pair_vec, + bool got_null_bitfields_column, size_t num_rows) : results_resource_idx_(0) { - add_resource_element(load_columns_and_offsets(columns)); + add_resource_element( + Table_Element::Builder(Field_Framework(columns, got_null_bitfields_column), + num_rows) + .build()); for (const auto &label_and_prop : property_pair_vec) { if (boost::starts_with(label_and_prop.first, VOTABLE_RESOURCE_DOT)) { @@ -435,5 +380,50 @@ void Table::stash_resource_element_labeled_property( } } +//=========================================================== + +// Helper function + +bool close_enough_to_append_rows(const Field_Framework &ff1, + const Field_Framework &ff2) { + if (ff1.get_row_size() != ff2.get_row_size()) { + return false; + } + const auto &ff1_columns = ff1.get_columns(); + const auto &ff2_columns = ff2.get_columns(); + + if (ff1_columns.size() != ff2_columns.size()) { + return false; + } + const auto &ff1_offsets = ff1.get_offsets(); + const auto &ff2_offsets = ff2.get_offsets(); + + + for (size_t col_idx = 0; col_idx < ff1_columns.size(); ++col_idx) { + const auto &ff1_column = ff1_columns.at(col_idx); + const auto &ff2_column = ff2_columns.at(col_idx); + + if ((ff1_column.get_name() != ff2_column.get_name()) || + (ff1_column.get_type() != ff2_column.get_type()) || + (ff1_column.get_array_size() != ff2_column.get_array_size()) || + (ff1_column.get_dynamic_array_flag() != + ff2_column.get_dynamic_array_flag()) || + (ff1_offsets.at(col_idx) != ff2_offsets.at(col_idx))) { + // Never mind comparing field_properties for now. 01Jul25 + return false; + } + } + // Check final offset value. + return (ff1_offsets.back() == ff2_offsets.back()); +} + +void Table::append_rows(const Table &table2) { + if (!close_enough_to_append_rows(get_field_framework(), + table2.get_field_framework())) { + throw std::runtime_error("The tables are not similar enough to append rows."); + } + get_data_details().append_rows(table2.get_data_details()); +} + }; // namespace tablator diff --git a/src/Table/flatten_properties.cxx b/src/Table/flatten_properties.cxx deleted file mode 100644 index f1788434..00000000 --- a/src/Table/flatten_properties.cxx +++ /dev/null @@ -1,12 +0,0 @@ -#include "../Table.hxx" - -std::vector tablator::Table::flatten_properties( - const std::vector &label_and_property_list) { - std::vector result; - for (auto &label_and_prop : label_and_property_list) { - std::vector flattened_prop( - label_and_prop.second.flatten(label_and_prop.first)); - result.insert(result.end(), flattened_prop.begin(), flattened_prop.end()); - } - return result; -} diff --git a/src/Table/generate_property_tree/add_to_property_tree.cxx b/src/Table/generate_property_tree/add_to_property_tree.cxx index 50f725b7..a5736c2c 100644 --- a/src/Table/generate_property_tree/add_to_property_tree.cxx +++ b/src/Table/generate_property_tree/add_to_property_tree.cxx @@ -20,6 +20,7 @@ void Min_Max_to_xml(boost::property_tree::ptree &tree, const std::string &min_ma // If json_prep is true, find (or, if none exists, create) a tree with // label