Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/neug/storages/graph/edge_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ class EdgeTable {
void Compact(bool compact_csr, bool sort_on_compaction, timestamp_t ts);

private:
void dropAndCreateNewBundledCSR();
void dropAndCreateNewBundledCSR(std::shared_ptr<ColumnBase> prev_data_col);
void dropAndCreateNewUnbundledCSR(bool delete_property);
std::string get_next_csr_path_suffix();

Expand Down
107 changes: 94 additions & 13 deletions src/storages/graph/edge_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,52 @@ void batch_put_edges_with_default_edata(const std::vector<vid_t>& src_lid,
case DataTypeId::kEmpty:
batch_put_edges_with_default_edata_impl<EmptyType>(src_lid, dst_lid,
EmptyType(), out_csr);
break;
default:
THROW_NOT_SUPPORTED_EXCEPTION("not support edge data type: " +
std::to_string(property_type));
}
}

void batch_put_edges_with_edata(const std::vector<vid_t>& src_lid,
const std::vector<vid_t>& dst_lid,
DataTypeId property_type,
const std::vector<Property>& edge_data,
CsrBase* out_csr) {
switch (property_type) {
#define TYPE_DISPATCHER(enum_val, type) \
case DataTypeId::enum_val: { \
std::vector<type> typed_data; \
typed_data.reserve(edge_data.size()); \
for (const auto& prop : edge_data) { \
typed_data.emplace_back(PropUtils<type>::to_typed(prop)); \
} \
dynamic_cast<TypedCsrBase<type>*>(out_csr)->batch_put_edges( \
src_lid, dst_lid, typed_data); \
break; \
}
TYPE_DISPATCHER(kBoolean, bool);
TYPE_DISPATCHER(kInt32, int32_t);
TYPE_DISPATCHER(kUInt32, uint32_t);
TYPE_DISPATCHER(kInt64, int64_t);
TYPE_DISPATCHER(kUInt64, uint64_t);
TYPE_DISPATCHER(kFloat, float);
TYPE_DISPATCHER(kDouble, double);
TYPE_DISPATCHER(kDate, Date);
TYPE_DISPATCHER(kTimestampMs, DateTime);
TYPE_DISPATCHER(kInterval, Interval);
#undef TYPE_DISPATCHER
case DataTypeId::kEmpty: {
dynamic_cast<TypedCsrBase<EmptyType>*>(out_csr)->batch_put_edges(
src_lid, dst_lid, {});
break;
}
default:
LOG(FATAL) << "Unsupported edge property type "
<< static_cast<int>(property_type);
}
Comment on lines +161 to +197
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kVarchar / string type not handled

batch_put_edges_with_edata uses a hand-rolled TYPE_DISPATCHER that mirrors FOR_EACH_DATA_TYPE_NO_STRING but does not include kVarchar. batch_put_edges_with_default_edata (the existing counterpart, line 137) handles the same set and documents this gap with a // TODO(zhanglei) comment.

While kVarchar is currently never stored in a bundled CSR (and dropAndCreateNewBundledCSR is the only caller), the default branch silently calls LOG(FATAL), making the failure mode hard to trace. At a minimum, the comment from batch_put_edges_with_default_edata should be replicated here, and ideally a THROW_NOT_SUPPORTED_EXCEPTION (consistent with the existing helper) could be used in place of LOG(FATAL) to give callers a chance to recover.

}

template <typename T>
std::unique_ptr<CsrBase> create_csr_impl(bool is_mutable,
EdgeStrategy strategy) {
Expand Down Expand Up @@ -729,7 +769,7 @@ void EdgeTable::AddProperties(const std::vector<std::string>& prop_names,
// NOTE: Rather than check meta_->is_bundled(),we check whether the table
// is empty.
if (meta_->properties.size() == 1) {
dropAndCreateNewBundledCSR();
dropAndCreateNewBundledCSR(nullptr);
} else {
dropAndCreateNewUnbundledCSR(false);
}
Expand Down Expand Up @@ -770,6 +810,12 @@ void EdgeTable::DeleteProperties(const std::vector<std::string>& col_names) {
table_->delete_column(col);
VLOG(1) << "delete column " << col;
}
if (table_->col_num() == 0) {
dropAndCreateNewUnbundledCSR(true);
} else if (table_->col_num() == 1) {
auto remaining_col = table_->get_column_by_id(0);
dropAndCreateNewBundledCSR(remaining_col);
}
Comment on lines +813 to +818
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Incorrect transition for kVarchar remaining property

dropAndCreateNewBundledCSR is called whenever exactly one column remains after deletion, but EdgeSchema::is_bundled() (in schema.cc:241–252) explicitly returns false when the single property is kVarchar. A kVarchar edge table is always unbundled, so attempting to bundle it will hit LOG(FATAL) inside create_csr (which dispatches via FOR_EACH_DATA_TYPE_NO_STRING, deliberately excluding strings).

The guard should use the remaining column's actual type — or equivalently check whether the edge should truly become bundled — rather than unconditionally calling dropAndCreateNewBundledCSR for any single-column result:

if (table_->col_num() == 0) {
  dropAndCreateNewUnbundledCSR(true);
} else if (table_->col_num() == 1) {
  auto remaining_col = table_->get_column_by_id(0);
  // Only transition to bundled for non-varchar types
  if (remaining_col->type() != DataTypeId::kVarchar) {
    dropAndCreateNewBundledCSR(remaining_col);
  }
  // For kVarchar the table stays unbundled; no CSR rebuild needed
}

}
}

Expand All @@ -782,9 +828,10 @@ int32_t EdgeTable::AddEdge(vid_t src_lid, vid_t dst_lid,
(edge_data.size() == 0 &&
(meta_->properties.empty() ||
meta_->properties[0] == DataTypeId::kEmpty)));
in_csr_->put_generic_edge(dst_lid, src_lid, edge_data[0], ts, alloc);
Property bundled_data = edge_data.empty() ? Property() : edge_data[0];
in_csr_->put_generic_edge(dst_lid, src_lid, bundled_data, ts, alloc);
oe_offset =
out_csr_->put_generic_edge(src_lid, dst_lid, edge_data[0], ts, alloc);
out_csr_->put_generic_edge(src_lid, dst_lid, bundled_data, ts, alloc);
} else {
if (meta_->properties.size() != edge_data.size()) {
THROW_INVALID_ARGUMENT_EXCEPTION(
Expand Down Expand Up @@ -885,7 +932,9 @@ void EdgeTable::Compact(bool compact_csr, bool sort_on_compaction,
in_csr_->reset_timestamp();
}

void EdgeTable::dropAndCreateNewBundledCSR() {
void EdgeTable::dropAndCreateNewBundledCSR(
std::shared_ptr<ColumnBase> remaining_col) {
CHECK(meta_->properties.size() == 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CHECK fails when called from DeleteProperties

dropAndCreateNewBundledCSR is now called from DeleteProperties (line 817), but at that call site meta_ has not yet been updated — it still reflects the pre-deletion property list. The surrounding production code in property_graph.cc (line 601–602) intentionally calls edge_table.DeleteProperties before schema_.DeleteEdgeProperties, because the edge table's delete logic depends on the old schema state.

Concretely, if an unbundled edge table has 2 properties and one is deleted (leaving 1), meta_->properties.size() is still 2 when this function is entered from DeleteProperties, so the CHECK always fires. Even if CHECK is a no-op in release mode, create_csr at line 952 would then receive the first original type (which may not match the remaining column's type), causing wrong-type CSR construction or a LOG(FATAL) if that type is kVarchar.

A minimal fix is to derive the remaining property type from remaining_col->type() instead of relying on meta_->properties[0], and to replace the strict equality CHECK with an assertion against the table column count:

// instead of:
CHECK(meta_->properties.size() == 1);
new_out_csr = create_csr(meta_->oe_mutable, meta_->oe_strategy,
                         meta_->properties[0].id());
// ...
// consider:
//   DataTypeId remaining_type = (remaining_col != nullptr)
//       ? remaining_col->type()
//       : meta_->properties[0].id();
//   new_out_csr = create_csr(meta_->oe_mutable, meta_->oe_strategy, remaining_type);

auto suffix = get_next_csr_path_suffix();
std::string next_oe_csr_path =
tmp_dir(work_dir_) + "/" +
Expand All @@ -898,9 +947,7 @@ void EdgeTable::dropAndCreateNewBundledCSR() {
meta_->edge_label_name) +
suffix;

auto edges = out_csr_->batch_export(nullptr);
std::unique_ptr<CsrBase> new_out_csr, new_in_csr;
assert(meta_->properties.size() == 1);
new_out_csr = create_csr(meta_->oe_mutable, meta_->oe_strategy,
meta_->properties[0].id());
new_in_csr = create_csr(meta_->ie_mutable, meta_->ie_strategy,
Expand All @@ -911,13 +958,36 @@ void EdgeTable::dropAndCreateNewBundledCSR() {
new_out_csr->resize(out_csr_->size());
new_in_csr->resize(in_csr_->size());

batch_put_edges_with_default_edata(
std::get<0>(edges), std::get<1>(edges), meta_->properties[0].id(),
meta_->default_property_values[0], new_out_csr.get());
batch_put_edges_with_default_edata(
std::get<1>(edges), std::get<0>(edges), meta_->properties[0].id(),
meta_->default_property_values[0], new_in_csr.get());
if (remaining_col == nullptr) {
auto edges = out_csr_->batch_export(nullptr);
batch_put_edges_with_default_edata(
std::get<0>(edges), std::get<1>(edges), meta_->properties[0].id(),
meta_->default_property_values[0], new_out_csr.get());
batch_put_edges_with_default_edata(
std::get<1>(edges), std::get<0>(edges), meta_->properties[0].id(),
meta_->default_property_values[0], new_in_csr.get());
} else {
auto row_id_col = std::make_shared<ULongColumn>(0, StorageStrategy::kMem);
auto edges = out_csr_->batch_export(row_id_col);
std::vector<Property> remaining_data;
remaining_data.reserve(row_id_col->size());
for (size_t i = 0; i < row_id_col->size(); ++i) {
auto row_id = row_id_col->get_view(i);
CHECK_LT(row_id, remaining_col->size());
remaining_data.emplace_back(remaining_col->get_prop(row_id));
}
batch_put_edges_with_edata(std::get<0>(edges), std::get<1>(edges),
meta_->properties[0].id(), remaining_data,
new_out_csr.get());
batch_put_edges_with_edata(std::get<1>(edges), std::get<0>(edges),
meta_->properties[0].id(), remaining_data,
new_in_csr.get());
}

table_->drop();
table_ = std::make_unique<Table>();
table_idx_.store(0);
capacity_.store(0);
out_csr_->close();
in_csr_->close();
out_csr_ = std::move(new_out_csr);
Expand Down Expand Up @@ -955,9 +1025,20 @@ void EdgeTable::dropAndCreateNewUnbundledCSR(bool delete_property) {
if (table_->col_num() >= 1) {
prev_data_col = table_->get_column_by_id(0);
}
} else {
// delete_property == true, which means the EdgeTable will become use csr of
// empty type. we need to reset capacity and table_idx to 0
table_idx_.store(0);
capacity_.store(0);
}

auto edges = out_csr_->batch_export(prev_data_col);
if (!delete_property) {
size_t edge_num = std::get<0>(edges).size();
table_idx_.store(edge_num);
capacity_.store(calculate_new_capacity(edge_num, false));
table_->resize(capacity_.load());
}
// Set default value for other columns
for (size_t col_id = 1; col_id < table_->col_num(); ++col_id) {
auto col = table_->get_column_by_id(col_id);
Expand All @@ -970,7 +1051,7 @@ void EdgeTable::dropAndCreateNewUnbundledCSR(bool delete_property) {
VLOG(10) << "Set default value for column " << col_id << ": "
<< default_value.to_string()
<< ", type: " << std::to_string(default_value.type());
for (size_t row = 0; row < col->size(); ++row) {
for (size_t row = 0; row < std::get<0>(edges).size(); ++row) {
col->set_any(row, default_value);
}
}
Expand Down
Loading
Loading