From 3635ec569d474d4256c9ade192cc697f8d40a5f6 Mon Sep 17 00:00:00 2001 From: Judith Silverman Date: Mon, 9 Jun 2025 19:14:48 -0700 Subject: [PATCH 1/8] IRSA-6936: tablator: provide high-level accessors II Add Table::retain_only_selected_rows() to support query_server's mcen option --- src/Table.hxx | 14 ++++-- src/Table/Column_Row_Accessors.cxx | 1 - src/Utils/Table_Utils.hxx | 10 +++- .../Table_Utils/retain_only_selected_rows.cxx | 46 +++++++++++++++++++ src/tablator/main.cxx | 31 ++++++++++++- test/convert.sh | 15 ++++++ test/multi_formatted.tbl | 11 +++++ wscript | 4 +- 8 files changed, 123 insertions(+), 9 deletions(-) create mode 100644 src/Utils/Table_Utils/retain_only_selected_rows.cxx create mode 100644 test/multi_formatted.tbl diff --git a/src/Table.hxx b/src/Table.hxx index 9dae9f57..2f8a79ca 100644 --- a/src/Table.hxx +++ b/src/Table.hxx @@ -241,7 +241,6 @@ public: } size_t get_column_offset(size_t col_idx) const { - const auto &columns = get_columns(); validate_column_index(col_idx); return get_offsets().at(col_idx); } @@ -677,6 +676,11 @@ public: get_columns().at(col_idx).get_array_size()); } + void retain_only_selected_rows(const std::set &selected_row_idx_list) { + tablator::retain_only_selected_rows(get_data(), selected_row_idx_list, + get_num_rows(), get_row_size()); + } + // accessors size_t get_row_size() const { return tablator::get_row_size(get_offsets()); } @@ -686,12 +690,14 @@ public: size_t get_num_columns() const { return get_columns().size(); } // called by query_server to trim result set - void resize_data(const size_t &new_num_rows) { - tablator::resize_data(get_data(), new_num_rows, get_row_size()); + void adjust_num_rows(const size_t &new_num_rows) { + tablator::adjust_num_rows(get_data(), new_num_rows, get_row_size()); } // deprecated - inline void resize_rows(const size_t &new_num_rows) { resize_data(new_num_rows); } + inline void resize_rows(const size_t &new_num_rows) { + adjust_num_rows(new_num_rows); + } size_t row_size() const { return get_row_size(); } size_t num_rows() const { return get_num_rows(); } diff --git a/src/Table/Column_Row_Accessors.cxx b/src/Table/Column_Row_Accessors.cxx index 36eae894..ee88a1c3 100644 --- a/src/Table/Column_Row_Accessors.cxx +++ b/src/Table/Column_Row_Accessors.cxx @@ -138,7 +138,6 @@ void tablator::Table::insert_array_element_into_row(tablator::Row &row, size_t c size_t elt_idx, const uint8_t *data_ptr) const { validate_parameters(row, *this, col_idx, elt_idx, 1 /* num_elements_to_insert */); - const auto &column = get_columns().at(col_idx); insert_blob_to_row_internal(row, *this, col_idx, elt_idx, data_ptr, 1); } //=============================================================== diff --git a/src/Utils/Table_Utils.hxx b/src/Utils/Table_Utils.hxx index 736d88e4..95fa1bcf 100644 --- a/src/Utils/Table_Utils.hxx +++ b/src/Utils/Table_Utils.hxx @@ -1,5 +1,7 @@ #pragma once +#include + #include "../Column.hxx" #include "../Row.hxx" @@ -58,7 +60,7 @@ inline void append_rows(std::vector &data, const std::vector & } -inline void resize_data(std::vector &data, const size_t &new_num_rows, +inline void adjust_num_rows(std::vector &data, const size_t &new_num_rows, uint row_size) { data.resize(new_num_rows * row_size); } @@ -73,4 +75,10 @@ void insert_ascii_in_row(Row &row, const Data_Type &data_type, const size_t &arr const size_t &column, const std::string &element, const size_t &offset, const size_t &offset_end); + +void retain_only_selected_rows(std::vector &data, + const std::set &selected_row_idx_list, + size_t num_rows, size_t row_size); + + } // namespace tablator diff --git a/src/Utils/Table_Utils/retain_only_selected_rows.cxx b/src/Utils/Table_Utils/retain_only_selected_rows.cxx new file mode 100644 index 00000000..239dddf0 --- /dev/null +++ b/src/Utils/Table_Utils/retain_only_selected_rows.cxx @@ -0,0 +1,46 @@ +#include "../Table_Utils.hxx" + +void tablator::retain_only_selected_rows(std::vector &data, + const std::set &selected_row_idx_list, + size_t num_rows, size_t row_size) { + if (data.size() != num_rows * row_size) { + // JTODO relax this condition? + throw std::runtime_error( + "Mismatch between data.size(), num_rows, and row_size."); + } + size_t num_selected_rows = selected_row_idx_list.size(); + if (num_selected_rows > num_rows) { + throw std::runtime_error("Number of selected rows must not exceed " + + std::to_string(num_rows)); + } + if (*selected_row_idx_list.rbegin() >= num_rows) { + throw std::runtime_error("invalid row index: " + + std::to_string(*selected_row_idx_list.rbegin())); + } + if (num_selected_rows == num_rows) { + return; + } + + const auto data_start_ptr = data.data(); + auto read_ptr = data_start_ptr; + auto write_ptr = data_start_ptr; + size_t write_idx = 0; + size_t prev_row_idx = 0; + + for (auto row_idx : selected_row_idx_list) { + if (row_idx >= write_idx) { + if (row_idx > write_idx) { + // Copy row_idx-th row to begin immediately after the end of the + // previous copied row. + read_ptr += ((row_idx - prev_row_idx) * row_size); + std::copy(read_ptr, read_ptr + row_size, write_ptr); + } + ++write_idx; + write_ptr += row_size; + prev_row_idx = row_idx; + } + } + + // Delete all data past the last row copied. + data.resize(std::distance(data_start_ptr, write_ptr)); +} diff --git a/src/tablator/main.cxx b/src/tablator/main.cxx index f04f2428..55785c77 100644 --- a/src/tablator/main.cxx +++ b/src/tablator/main.cxx @@ -244,8 +244,10 @@ int main(int argc, char *argv[]) { size_t row_id = SIZE_T_MAX; size_t start_row = SIZE_T_MAX; size_t row_count = SIZE_T_MAX; - std::vector row_list; + std::vector row_list; // for writing ipac_table + std::set retain_row_list; // for modifying table in place std::string row_string; + std::string retain_row_string; bool call_static_f = false; bool exclude_cols_f = false; bool skip_comments_f = false; @@ -260,7 +262,8 @@ int main(int argc, char *argv[]) { std::string trim_decimal_runs = "1"; std::string counter_column_name = ""; bool combine_tables_f = false; - bool append_rows_f = true; + bool append_rows_f = false; + // Declare the supported options. boost::program_options::options_description visible_options("Options"); @@ -298,6 +301,9 @@ int main(int argc, char *argv[]) { "number of consecutive rows to write (output-format ipac_table only)")( "row-list", boost::program_options::value(&row_string), "list of rows to write (output-format ipac_table only)")( + "retain-row-list", + boost::program_options::value(&retain_row_string), + "list of rows to retain, dropping others")( "static", boost::program_options::value(&call_static_f), "call static function, not Table class member")( "column-to-extract", @@ -401,6 +407,19 @@ int main(int argc, char *argv[]) { std::istream_iterator(), std::back_inserter(row_list)); } + if (option_variables.count("retain-row-list")) { + if (option_variables.count("row-list")) { + std::cerr << "The parameters 'row-list' and 'retain-row-list' are " + "mutually " + "incompatible.\n"; + return 1; + } + std::stringstream retain_row_stream(retain_row_string); + std::copy(std::istream_iterator(retain_row_stream), + std::istream_iterator(), + std::inserter(retain_row_list, retain_row_list.begin())); + } + if (!option_variables.count("column-to-extract") && option_variables.count("as-string")) { std::cerr << "The parameter 'as-string' is valid only if " @@ -649,6 +668,14 @@ int main(int argc, char *argv[]) { boost::filesystem::ofstream output_stream(output_path); in_table1.write(output_stream, output_path.stem().native(), output_format, options); + } else if (!retain_row_list.empty()) { + // JTODO make this option incompatible with other options + boost::filesystem::ifstream input_stream(input_path); + tablator::Table in_table(input_stream, input_format); + in_table.retain_only_selected_rows(retain_row_list); + boost::filesystem::ofstream output_stream(output_path); + in_table.write(output_stream, output_path.stem().native(), output_format, + options); } else { tablator::Table table(input_path, input_format); table.write(output_path, output_format, options); diff --git a/test/convert.sh b/test/convert.sh index b685e2e0..e7f19616 100644 --- a/test/convert.sh +++ b/test/convert.sh @@ -182,6 +182,14 @@ else rm -f temp.json5 fi +${tablator_bin} --retain-row-list "1 4" test/multi temp.vot 2> /dev/null +if [ $? -eq 0 ]; then + echo "FAIL: invalid argument to retain-row-list" +else + echo "PASS: invalid argument to retain-row-list" + rm -f temp.tbl +fi + ########################################################### @@ -1418,3 +1426,10 @@ else echo "FAIL: append json5 table" fi +${tablator_bin} --retain-row-list "1 2 3" test/multi_formatted.tbl temp.tbl && diff test/back_and_forth_tables/multi_row_123.tbl temp.tbl +if [ $? -eq 0 ]; then + echo "PASS: retain rows" + rm -f temp.json5 +else + echo "FAIL: retain rows" +fi diff --git a/test/multi_formatted.tbl b/test/multi_formatted.tbl new file mode 100644 index 00000000..8a2cb9d4 --- /dev/null +++ b/test/multi_formatted.tbl @@ -0,0 +1,11 @@ +\fixlen = T +\RowsRetrieved = 4 +\type = 'results' +| object| ra| dec| htm20| htm7| htm3| shtm20| shtm7| shtm3|flags| SSO| +| char| double| double| char| long| int| long| int| int| int| int| +| | | | | | | | | | | | +| null| null| null| null| null| null| null| null| null| null|null| + 118289arstratraetratratsrastratsrastrats 359.88703 50.83257 16446744073709551616 3294967296 12000 8223372036854775808 1147483648 12000 122 0 + 113368 344.41273 -29.62225 8446744073709551616 294967296 43002 -7223372036854775808 -2047483648 13002 242 1 + 113368 344.41273 -29.62225 8446744073709551616 294967296 43002 -7223372036854775808 -2047483648 -23002 211 0 + 113368 344.41273 -29.62225 8446744073709551616 294967296 43002 -7223372036854775808 -2047483648 -31002 211 1 diff --git a/wscript b/wscript index ed14e27e..f716e2ad 100644 --- a/wscript +++ b/wscript @@ -6,7 +6,7 @@ def options(opt): def configure(conf): conf.load('compiler_cxx gnu_dirs cxx14 hdf5_cxx cfitsio CCfits boost json5_parser sqlite3 vsqlitepp') - conf.check_boost(lib='filesystem system program_options regex') + conf.check_boost(lib='filesystem system program_options regex thread') def build(bld): @@ -14,6 +14,7 @@ def build(bld): default_flags=['-Wall', '-Wextra', '-g'] else: default_flags=['-Wall', '-Wextra', '-g', '-Ofast', '-fno-finite-math-only', '-DNDEBUG'] + default_flags.append("-DBOOST_SPIRIT_THREADSAFE") use_packages=['cxx14', 'hdf5', 'hdf5_cxx', 'cfitsio', 'CCfits', 'BOOST', 'json5_parser', 'sqlite3', 'vsqlitepp'] @@ -71,6 +72,7 @@ def build(bld): 'src/Ipac_Table_Writer/Ipac_Table_Writer.cxx', 'src/Utils/Table_Utils/insert_ascii_in_row.cxx', 'src/Utils/Table_Utils/append_column.cxx', + 'src/Utils/Table_Utils/retain_only_selected_rows.cxx', 'src/ptree_readers/ptree_readers.cxx', 'src/ptree_readers/Utils.cxx', 'src/ptree_readers/extract_attributes.cxx', From cfc400082d3e6224789bae1340912215d9b26c36 Mon Sep 17 00:00:00 2001 From: Judith Silverman Date: Thu, 19 Jun 2025 10:23:13 -0700 Subject: [PATCH 2/8] IRSA-6967: tablator: store row-level size of variable-length arrays I Column: Add dynamic_array_flag class member ptree_readers: Retire Field_And_Flag helper class --- src/Column.hxx | 45 ++++++-- src/Table.hxx | 21 +++- .../add_to_property_tree.cxx | 1 + src/Table/write_tabledata/write_tabledata.cxx | 2 +- src/Table_Ops.cxx | 16 +-- src/Utils/Table_Utils.hxx | 30 ++++- .../Table_Utils/retain_only_selected_rows.cxx | 1 - src/ptree_readers.hxx | 23 +--- src/ptree_readers/Utils.cxx | 2 +- src/ptree_readers/ptree_readers.cxx | 2 +- .../read_field/read_field.cxx | 9 +- .../append_data_from_stream.cxx | 105 ++++++++++-------- .../insert_swapped.cxx | 61 +++++----- .../compute_column_array_sizes.cxx | 29 +++-- .../read_binary2/read_binary2.cxx | 42 +++---- .../read_data_element/read_data_element.cxx | 6 +- .../read_tabledata/read_tabledata.cxx | 20 ++-- .../read_table_element/read_table_element.cxx | 48 ++------ 18 files changed, 244 insertions(+), 219 deletions(-) diff --git a/src/Column.hxx b/src/Column.hxx index 56d8db67..c193be8d 100644 --- a/src/Column.hxx +++ b/src/Column.hxx @@ -7,28 +7,47 @@ namespace tablator { class Column { public: + // These constexprs are used by HDF5-support code. static constexpr char const *COL_ARRAY_SIZE = "array_size"; static constexpr char const *COL_FIELD_PROPERTIES = "field_properties"; static constexpr char const *COL_NAME = "name"; static constexpr char const *COL_TYPE = "type"; + static constexpr char const *COL_DYNAMIC_ARRAY_FLAG = "dynamic_array_flag"; - Column(const std::string &Name, const Data_Type &Type, const size_t &Array_size) - : Column(Name, Type, Array_size, Field_Properties()) {} + Column(const std::string &name, const Data_Type &type, const size_t &array_size, + const Field_Properties &field_properties, bool dynamic_array_flag) + : name_(name), + type_(type), + array_size_(array_size), + field_properties_(field_properties), + dynamic_array_flag_(dynamic_array_flag) {} + + Column(const std::string &name, const Data_Type &type, const size_t &array_size, + const Field_Properties &field_properties) + : Column(name, type, array_size, field_properties, + ((type == Data_Type::CHAR) || + (array_size == std::numeric_limits::max()))) {} + + Column(const std::string &name, const Data_Type &type, const size_t &array_size, + bool dynamic_array_flag) + : Column(name, type, array_size, Field_Properties(), dynamic_array_flag) {} + + + Column(const std::string &name, const Data_Type &type, const size_t &array_size) + : Column(name, type, array_size, Field_Properties()) {} + + + Column(const std::string &name, const Data_Type &type, + const Field_Properties &field_properties) + : Column(name, type, 1, field_properties) {} + + Column(const std::string &name, const Data_Type &type) : Column(name, type, 1) {} - Column(const std::string &Name, const Data_Type &Type, const size_t &Array_size, - const Field_Properties &Field_properties) - : name_(Name), - type_(Type), - array_size_(Array_size), - field_properties_(Field_properties) {} inline size_t get_data_size() const { return tablator::data_size(type_) * array_size_; } - // deprecated - inline size_t data_size() const { return get_data_size(); } - // accessors inline const std::string &get_name() const { return name_; } inline const Data_Type &get_type() const { return type_; } @@ -63,7 +82,8 @@ public: inline ATTRIBUTES &get_field_property_attributes() { return get_field_properties().get_attributes(); } - + inline bool get_dynamic_array_flag() const { return dynamic_array_flag_; } + inline void set_dynamic_array_flag(bool b) { dynamic_array_flag_ = b; } private: std::string name_; @@ -72,6 +92,7 @@ private: // Actual array_size for fixed-length arrays; maximum array_size otherwise. size_t array_size_; Field_Properties field_properties_; + bool dynamic_array_flag_; }; diff --git a/src/Table.hxx b/src/Table.hxx index 2f8a79ca..c8260934 100644 --- a/src/Table.hxx +++ b/src/Table.hxx @@ -992,12 +992,15 @@ private: // WARNING: The private append_column() routines do not increase // the size of the null column. The expectation is that the // number of columns is known before adding columns. - void append_column(const std::string &name, const Data_Type &type) { - append_column(name, type, 1); + void append_column(const std::string &name, const Data_Type &type, + const size_t &size, const Field_Properties &field_properties, + bool dynamic_array_flag) { + append_column(Column(name, type, size, field_properties, dynamic_array_flag)); } + void append_column(const std::string &name, const Data_Type &type, - const size_t &size) { - append_column(name, type, size, Field_Properties()); + const size_t &size, bool dynamic_array_flag) { + append_column(Column(name, type, size, dynamic_array_flag)); } void append_column(const std::string &name, const Data_Type &type, @@ -1005,11 +1008,19 @@ private: append_column(Column(name, type, size, field_properties)); } + void append_column(const std::string &name, const Data_Type &type, + const size_t &size) { + append_column(name, type, size); + } + + void append_column(const std::string &name, const Data_Type &type) { + append_column(name, type); + } + void append_column(const Column &column) { tablator::append_column(get_columns(), get_offsets(), column); } - size_t read_ipac_header(std::istream &ipac_file, std::array, 4> &Columns, std::vector &ipac_table_offsets, diff --git a/src/Table/generate_property_tree/add_to_property_tree.cxx b/src/Table/generate_property_tree/add_to_property_tree.cxx index 50f725b7..a5736c2c 100644 --- a/src/Table/generate_property_tree/add_to_property_tree.cxx +++ b/src/Table/generate_property_tree/add_to_property_tree.cxx @@ -20,6 +20,7 @@ void Min_Max_to_xml(boost::property_tree::ptree &tree, const std::string &min_ma // If json_prep is true, find (or, if none exists, create) a tree with // label