-
Notifications
You must be signed in to change notification settings - Fork 3
fix: Fix incorrect edge table state when transforming between bundled and unbundled #28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -151,12 +151,52 @@ void batch_put_edges_with_default_edata(const std::vector<vid_t>& src_lid, | |
| case DataTypeId::kEmpty: | ||
| batch_put_edges_with_default_edata_impl<EmptyType>(src_lid, dst_lid, | ||
| EmptyType(), out_csr); | ||
| break; | ||
| default: | ||
| THROW_NOT_SUPPORTED_EXCEPTION("not support edge data type: " + | ||
| std::to_string(property_type)); | ||
| } | ||
| } | ||
|
|
||
| void batch_put_edges_with_edata(const std::vector<vid_t>& src_lid, | ||
| const std::vector<vid_t>& dst_lid, | ||
| DataTypeId property_type, | ||
| const std::vector<Property>& edge_data, | ||
| CsrBase* out_csr) { | ||
| switch (property_type) { | ||
| #define TYPE_DISPATCHER(enum_val, type) \ | ||
| case DataTypeId::enum_val: { \ | ||
| std::vector<type> typed_data; \ | ||
| typed_data.reserve(edge_data.size()); \ | ||
| for (const auto& prop : edge_data) { \ | ||
| typed_data.emplace_back(PropUtils<type>::to_typed(prop)); \ | ||
| } \ | ||
| dynamic_cast<TypedCsrBase<type>*>(out_csr)->batch_put_edges( \ | ||
| src_lid, dst_lid, typed_data); \ | ||
| break; \ | ||
| } | ||
| TYPE_DISPATCHER(kBoolean, bool); | ||
| TYPE_DISPATCHER(kInt32, int32_t); | ||
| TYPE_DISPATCHER(kUInt32, uint32_t); | ||
| TYPE_DISPATCHER(kInt64, int64_t); | ||
| TYPE_DISPATCHER(kUInt64, uint64_t); | ||
| TYPE_DISPATCHER(kFloat, float); | ||
| TYPE_DISPATCHER(kDouble, double); | ||
| TYPE_DISPATCHER(kDate, Date); | ||
| TYPE_DISPATCHER(kTimestampMs, DateTime); | ||
| TYPE_DISPATCHER(kInterval, Interval); | ||
| #undef TYPE_DISPATCHER | ||
| case DataTypeId::kEmpty: { | ||
| dynamic_cast<TypedCsrBase<EmptyType>*>(out_csr)->batch_put_edges( | ||
| src_lid, dst_lid, {}); | ||
| break; | ||
| } | ||
| default: | ||
| LOG(FATAL) << "Unsupported edge property type " | ||
| << static_cast<int>(property_type); | ||
| } | ||
| } | ||
|
|
||
| template <typename T> | ||
| std::unique_ptr<CsrBase> create_csr_impl(bool is_mutable, | ||
| EdgeStrategy strategy) { | ||
|
|
@@ -729,7 +769,7 @@ void EdgeTable::AddProperties(const std::vector<std::string>& prop_names, | |
| // NOTE: Rather than check meta_->is_bundled(),we check whether the table | ||
| // is empty. | ||
| if (meta_->properties.size() == 1) { | ||
| dropAndCreateNewBundledCSR(); | ||
| dropAndCreateNewBundledCSR(nullptr); | ||
| } else { | ||
| dropAndCreateNewUnbundledCSR(false); | ||
| } | ||
|
|
@@ -770,6 +810,12 @@ void EdgeTable::DeleteProperties(const std::vector<std::string>& col_names) { | |
| table_->delete_column(col); | ||
| VLOG(1) << "delete column " << col; | ||
| } | ||
| if (table_->col_num() == 0) { | ||
| dropAndCreateNewUnbundledCSR(true); | ||
| } else if (table_->col_num() == 1) { | ||
| auto remaining_col = table_->get_column_by_id(0); | ||
| dropAndCreateNewBundledCSR(remaining_col); | ||
| } | ||
|
Comment on lines
+813
to
+818
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Incorrect transition for
The guard should use the remaining column's actual type — or equivalently check whether the edge should truly become bundled — rather than unconditionally calling if (table_->col_num() == 0) {
dropAndCreateNewUnbundledCSR(true);
} else if (table_->col_num() == 1) {
auto remaining_col = table_->get_column_by_id(0);
// Only transition to bundled for non-varchar types
if (remaining_col->type() != DataTypeId::kVarchar) {
dropAndCreateNewBundledCSR(remaining_col);
}
// For kVarchar the table stays unbundled; no CSR rebuild needed
} |
||
| } | ||
| } | ||
|
|
||
|
|
@@ -782,9 +828,10 @@ int32_t EdgeTable::AddEdge(vid_t src_lid, vid_t dst_lid, | |
| (edge_data.size() == 0 && | ||
| (meta_->properties.empty() || | ||
| meta_->properties[0] == DataTypeId::kEmpty))); | ||
| in_csr_->put_generic_edge(dst_lid, src_lid, edge_data[0], ts, alloc); | ||
| Property bundled_data = edge_data.empty() ? Property() : edge_data[0]; | ||
| in_csr_->put_generic_edge(dst_lid, src_lid, bundled_data, ts, alloc); | ||
| oe_offset = | ||
| out_csr_->put_generic_edge(src_lid, dst_lid, edge_data[0], ts, alloc); | ||
| out_csr_->put_generic_edge(src_lid, dst_lid, bundled_data, ts, alloc); | ||
| } else { | ||
| if (meta_->properties.size() != edge_data.size()) { | ||
| THROW_INVALID_ARGUMENT_EXCEPTION( | ||
|
|
@@ -885,7 +932,9 @@ void EdgeTable::Compact(bool compact_csr, bool sort_on_compaction, | |
| in_csr_->reset_timestamp(); | ||
| } | ||
|
|
||
| void EdgeTable::dropAndCreateNewBundledCSR() { | ||
| void EdgeTable::dropAndCreateNewBundledCSR( | ||
| std::shared_ptr<ColumnBase> remaining_col) { | ||
| CHECK(meta_->properties.size() == 1); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CHECK fails when called from
Concretely, if an unbundled edge table has 2 properties and one is deleted (leaving 1), A minimal fix is to derive the remaining property type from // instead of:
CHECK(meta_->properties.size() == 1);
new_out_csr = create_csr(meta_->oe_mutable, meta_->oe_strategy,
meta_->properties[0].id());
// ...
// consider:
// DataTypeId remaining_type = (remaining_col != nullptr)
// ? remaining_col->type()
// : meta_->properties[0].id();
// new_out_csr = create_csr(meta_->oe_mutable, meta_->oe_strategy, remaining_type); |
||
| auto suffix = get_next_csr_path_suffix(); | ||
| std::string next_oe_csr_path = | ||
| tmp_dir(work_dir_) + "/" + | ||
|
|
@@ -898,9 +947,7 @@ void EdgeTable::dropAndCreateNewBundledCSR() { | |
| meta_->edge_label_name) + | ||
| suffix; | ||
|
|
||
| auto edges = out_csr_->batch_export(nullptr); | ||
| std::unique_ptr<CsrBase> new_out_csr, new_in_csr; | ||
| assert(meta_->properties.size() == 1); | ||
| new_out_csr = create_csr(meta_->oe_mutable, meta_->oe_strategy, | ||
| meta_->properties[0].id()); | ||
| new_in_csr = create_csr(meta_->ie_mutable, meta_->ie_strategy, | ||
|
|
@@ -911,13 +958,36 @@ void EdgeTable::dropAndCreateNewBundledCSR() { | |
| new_out_csr->resize(out_csr_->size()); | ||
| new_in_csr->resize(in_csr_->size()); | ||
|
|
||
| batch_put_edges_with_default_edata( | ||
| std::get<0>(edges), std::get<1>(edges), meta_->properties[0].id(), | ||
| meta_->default_property_values[0], new_out_csr.get()); | ||
| batch_put_edges_with_default_edata( | ||
| std::get<1>(edges), std::get<0>(edges), meta_->properties[0].id(), | ||
| meta_->default_property_values[0], new_in_csr.get()); | ||
| if (remaining_col == nullptr) { | ||
| auto edges = out_csr_->batch_export(nullptr); | ||
| batch_put_edges_with_default_edata( | ||
| std::get<0>(edges), std::get<1>(edges), meta_->properties[0].id(), | ||
| meta_->default_property_values[0], new_out_csr.get()); | ||
| batch_put_edges_with_default_edata( | ||
| std::get<1>(edges), std::get<0>(edges), meta_->properties[0].id(), | ||
| meta_->default_property_values[0], new_in_csr.get()); | ||
| } else { | ||
| auto row_id_col = std::make_shared<ULongColumn>(0, StorageStrategy::kMem); | ||
| auto edges = out_csr_->batch_export(row_id_col); | ||
| std::vector<Property> remaining_data; | ||
| remaining_data.reserve(row_id_col->size()); | ||
| for (size_t i = 0; i < row_id_col->size(); ++i) { | ||
| auto row_id = row_id_col->get_view(i); | ||
| CHECK_LT(row_id, remaining_col->size()); | ||
| remaining_data.emplace_back(remaining_col->get_prop(row_id)); | ||
| } | ||
| batch_put_edges_with_edata(std::get<0>(edges), std::get<1>(edges), | ||
| meta_->properties[0].id(), remaining_data, | ||
| new_out_csr.get()); | ||
| batch_put_edges_with_edata(std::get<1>(edges), std::get<0>(edges), | ||
| meta_->properties[0].id(), remaining_data, | ||
| new_in_csr.get()); | ||
| } | ||
|
|
||
| table_->drop(); | ||
| table_ = std::make_unique<Table>(); | ||
| table_idx_.store(0); | ||
| capacity_.store(0); | ||
| out_csr_->close(); | ||
| in_csr_->close(); | ||
| out_csr_ = std::move(new_out_csr); | ||
|
|
@@ -955,9 +1025,20 @@ void EdgeTable::dropAndCreateNewUnbundledCSR(bool delete_property) { | |
| if (table_->col_num() >= 1) { | ||
| prev_data_col = table_->get_column_by_id(0); | ||
| } | ||
| } else { | ||
| // delete_property == true, which means the EdgeTable will become use csr of | ||
| // empty type. we need to reset capacity and table_idx to 0 | ||
| table_idx_.store(0); | ||
| capacity_.store(0); | ||
| } | ||
|
|
||
| auto edges = out_csr_->batch_export(prev_data_col); | ||
| if (!delete_property) { | ||
| size_t edge_num = std::get<0>(edges).size(); | ||
| table_idx_.store(edge_num); | ||
| capacity_.store(calculate_new_capacity(edge_num, false)); | ||
| table_->resize(capacity_.load()); | ||
| } | ||
| // Set default value for other columns | ||
| for (size_t col_id = 1; col_id < table_->col_num(); ++col_id) { | ||
| auto col = table_->get_column_by_id(col_id); | ||
|
|
@@ -970,7 +1051,7 @@ void EdgeTable::dropAndCreateNewUnbundledCSR(bool delete_property) { | |
| VLOG(10) << "Set default value for column " << col_id << ": " | ||
| << default_value.to_string() | ||
| << ", type: " << std::to_string(default_value.type()); | ||
| for (size_t row = 0; row < col->size(); ++row) { | ||
| for (size_t row = 0; row < std::get<0>(edges).size(); ++row) { | ||
| col->set_any(row, default_value); | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
kVarchar/ string type not handledbatch_put_edges_with_edatauses a hand-rolledTYPE_DISPATCHERthat mirrorsFOR_EACH_DATA_TYPE_NO_STRINGbut does not includekVarchar.batch_put_edges_with_default_edata(the existing counterpart, line 137) handles the same set and documents this gap with a// TODO(zhanglei)comment.While
kVarcharis currently never stored in a bundled CSR (anddropAndCreateNewBundledCSRis the only caller), thedefaultbranch silently callsLOG(FATAL), making the failure mode hard to trace. At a minimum, the comment frombatch_put_edges_with_default_edatashould be replicated here, and ideally aTHROW_NOT_SUPPORTED_EXCEPTION(consistent with the existing helper) could be used in place ofLOG(FATAL)to give callers a chance to recover.