From 2db43027b6cd2b90b18fc2e95e6934c7c61fd5d2 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Tue, 30 Sep 2025 22:07:14 -0700 Subject: [PATCH 01/12] add validator --- .gitignore | 4 +- CMakeLists.txt | 24 +- cmake/run_dataset_validator.cmake | 31 +++ generate_test_data.py | 144 ----------- scripts/generate_test_data.py | 164 ++++++++++++ tools/dbscan_dataset_validator.cpp | 398 +++++++++++++++++++++++++++++ 6 files changed, 619 insertions(+), 146 deletions(-) create mode 100644 cmake/run_dataset_validator.cmake delete mode 100644 generate_test_data.py create mode 100755 scripts/generate_test_data.py create mode 100644 tools/dbscan_dataset_validator.cpp diff --git a/.gitignore b/.gitignore index 42167f6..5adba1c 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,8 @@ Thumbs.db # Test data files test_data*.bin *.bin +!tests/data/dbscan_static_data.bin +!tests/data/dbscan_static_truth.bin # Executables dbscan_tests @@ -43,4 +45,4 @@ dbscan_tests *.tmp *.temp .cache/ -AGENTS.md \ No newline at end of file +AGENTS.md diff --git a/CMakeLists.txt b/CMakeLists.txt index c88060e..df35fb7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,6 +79,20 @@ target_include_directories(dbscan_tests include ) +add_executable(dbscan_dataset_validator + tools/dbscan_dataset_validator.cpp +) + +target_link_libraries(dbscan_dataset_validator + PRIVATE + dbscan +) + +target_include_directories(dbscan_dataset_validator + PRIVATE + include +) + # Benchmark executable add_executable(dbscan_benchmark benchmark/benchmark_dbscan.cpp @@ -101,4 +115,12 @@ target_include_directories(dbscan_benchmark # Enable testing enable_testing() -add_test(NAME dbscan_tests COMMAND dbscan_tests) \ No newline at end of file +add_test(NAME dbscan_tests COMMAND dbscan_tests) + +add_test( + NAME dbscan_dataset_validation + COMMAND ${CMAKE_COMMAND} + -DPROJECT_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} + -DPROJECT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR} + -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/run_dataset_validator.cmake +) diff --git a/cmake/run_dataset_validator.cmake b/cmake/run_dataset_validator.cmake new file mode 100644 index 0000000..2790383 --- /dev/null +++ b/cmake/run_dataset_validator.cmake @@ -0,0 +1,31 @@ +set(DATA_FILE "${PROJECT_SOURCE_DIR}/tests/data/dbscan_static_data.bin") +set(TRUTH_FILE "${PROJECT_SOURCE_DIR}/tests/data/dbscan_static_truth.bin") +set(VALIDATOR "${PROJECT_BINARY_DIR}/dbscan_dataset_validator") + +if(NOT EXISTS "${DATA_FILE}") + message(FATAL_ERROR "Static dataset not found: ${DATA_FILE}") +endif() + +if(NOT EXISTS "${TRUTH_FILE}") + message(FATAL_ERROR "Truth labels file not found: ${TRUTH_FILE}") +endif() + +if(NOT EXISTS "${VALIDATOR}") + message(FATAL_ERROR "dbscan_dataset_validator executable not found: ${VALIDATOR}") +endif() + +execute_process( + COMMAND "${VALIDATOR}" + "--data" "${DATA_FILE}" + "--truth" "${TRUTH_FILE}" + "--eps" "10" + "--min-samples" "3" + "--impl" "both" + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}" + RESULT_VARIABLE VALIDATOR_RESULT + COMMAND_ECHO STDOUT +) + +if(NOT VALIDATOR_RESULT EQUAL 0) + message(FATAL_ERROR "dbscan_dataset_validator reported a mismatch (exit code ${VALIDATOR_RESULT})") +endif() diff --git a/generate_test_data.py b/generate_test_data.py deleted file mode 100644 index a299dfa..0000000 --- a/generate_test_data.py +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/env python3 - -import numpy as np -import struct -import sys - -def generate_test_data(n_samples=300, n_centers=4, output_file='test_data.bin'): - """ - Generate test data with known clusters and save to binary file. - Creates simple clusters manually without sklearn. - """ - return generate_test_data_with_params(n_samples, n_centers, 0.8, 5, output_file) - -def load_test_data(input_file='test_data.bin'): - """ - Load test data from binary file. - Returns: points (N, 2), labels - """ - with open(input_file, 'rb') as f: - # Read number of points - n_points = struct.unpack('I', f.read(4))[0] - - points = [] - labels = [] - - # Read points - for _ in range(n_points): - x, y = struct.unpack('dd', f.read(16)) - points.append([x, y]) - - # Read labels - for _ in range(n_points): - label = struct.unpack('i', f.read(4))[0] - labels.append(label) - - return np.array(points), np.array(labels) - -def generate_multiple_test_cases(): - """Generate test cases with different data sizes and parameters""" - test_cases = [ - # (n_samples, n_centers, eps, min_samples, output_file) - (500, 3, 0.8, 5, 'test_data_500.bin'), - (10000, 5, 0.8, 5, 'test_data_10k.bin'), - (100000, 8, 0.8, 5, 'test_data_100k.bin'), # Using 100k instead of 1M for practicality - # Different eps values - (1000, 4, 0.5, 5, 'test_data_eps_0_5.bin'), - (1000, 4, 1.2, 5, 'test_data_eps_1_2.bin'), - # Different min_samples values - (1000, 4, 0.8, 3, 'test_data_minpts_3.bin'), - (1000, 4, 0.8, 10, 'test_data_minpts_10.bin'), - ] - - for n_samples, n_centers, eps, min_samples, output_file in test_cases: - print(f"Generating {n_samples} points with {n_centers} centers (eps={eps}, min_samples={min_samples})...") - generate_test_data_with_params(n_samples, n_centers, eps, min_samples, output_file) - print(f"Saved to {output_file}\n") - -def generate_test_data_with_params(n_samples=300, n_centers=4, eps=0.8, min_samples=5, output_file='test_data.bin'): - """ - Generate test data with specific DBSCAN parameters - """ - np.random.seed(42) - - # Generate clustered data manually - X = [] - true_labels = [] - - for center_idx in range(n_centers): - # Create cluster centers - center_x = np.random.uniform(-5, 5) - center_y = np.random.uniform(-5, 5) - - # Generate points around each center - cluster_size = n_samples // n_centers - for _ in range(cluster_size): - x = center_x + np.random.normal(0, 0.6) - y = center_y + np.random.normal(0, 0.6) - X.append([x, y]) - true_labels.append(center_idx) - - # Add some noise points - noise_points = n_samples // 10 - for _ in range(noise_points): - x = np.random.uniform(-10, 10) - y = np.random.uniform(-10, 10) - X.append([x, y]) - true_labels.append(-1) # Noise - - X = np.array(X) - true_labels = np.array(true_labels) - - # Simple DBSCAN-like clustering for ground truth - labels = np.full(len(X), -1) - cluster_id = 0 - - for i in range(len(X)): - if labels[i] != -1: - continue - - # Find neighbors within eps - neighbors = [] - for j in range(len(X)): - if i == j: - continue - dist = np.sqrt((X[i][0] - X[j][0])**2 + (X[i][1] - X[j][1])**2) - if dist <= eps: - neighbors.append(j) - - if len(neighbors) >= min_samples: # min_samples - labels[i] = cluster_id - # Expand cluster (simplified) - for neighbor in neighbors: - if labels[neighbor] == -1: - labels[neighbor] = cluster_id - cluster_id += 1 - else: - labels[i] = -1 - - # Save data to binary file - with open(output_file, 'wb') as f: - # Write number of points - f.write(struct.pack('I', len(X))) - - # Write points as doubles (x, y) - for point in X: - f.write(struct.pack('dd', point[0], point[1])) - - # Write computed labels for validation - for label in labels: - f.write(struct.pack('i', label)) - - print(f"Generated {len(X)} points with {n_centers} clusters") - print(f"Simple clustering found {len(set(labels)) - (1 if -1 in labels else 0)} clusters") - - return X, labels - -if __name__ == '__main__': - if len(sys.argv) > 1 and sys.argv[1] == '--multiple': - generate_multiple_test_cases() - elif len(sys.argv) > 1: - output_file = sys.argv[1] - generate_test_data(output_file=output_file) - else: - generate_test_data() \ No newline at end of file diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py new file mode 100755 index 0000000..16a893b --- /dev/null +++ b/scripts/generate_test_data.py @@ -0,0 +1,164 @@ +#!/usr/bin/env -S uv run +# /// script +# dependencies = ["numpy", "scikit-learn"] +# /// + +import argparse +from pathlib import Path + +import numpy as np +from sklearn.cluster import DBSCAN + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Generate synthetic DBSCAN test data with embedded Gaussian clusters." + ) + parser.add_argument( + "--uniform-count", + type=int, + default=200_000, + help="Number of background points drawn from a uniform distribution (default: 200_000).", + ) + parser.add_argument( + "--cluster-count", + type=int, + default=100, + help="Number of Gaussian clusters to sprinkle into the dataset (default: 100).", + ) + parser.add_argument( + "--points-per-cluster", + type=int, + default=256, + help="Number of points sampled for each Gaussian cluster (default: 256).", + ) + parser.add_argument( + "--area-width", + type=int, + default=1_000_000, + help="Width/height of the square area measured in pixels (default: 1_000_000).", + ) + parser.add_argument( + "--cluster-sigma", + type=float, + default=50.0 / 3.0, + help="Standard deviation for the Gaussian clusters in pixels (default: 50/3).", + ) + parser.add_argument( + "--eps", + type=float, + default=60.0, + help="DBSCAN epsilon radius in pixels for scikit-learn clustering (default: 60).", + ) + parser.add_argument( + "--min-samples", + type=int, + default=16, + help="DBSCAN min_samples parameter for scikit-learn clustering (default: 16).", + ) + parser.add_argument( + "--seed", + type=int, + default=42, + help="Seed for the random number generator (default: 42).", + ) + parser.add_argument( + "--data-file", + type=Path, + default=Path("data.bin"), + help="Path to the output coordinate binary file (default: data.bin).", + ) + parser.add_argument( + "--truth-file", + type=Path, + default=Path("truth.bin"), + help="Path to the output truth label binary file (default: truth.bin).", + ) + return parser.parse_args() + + +def generate_uniform_points(rng: np.random.Generator, count: int, width: int) -> np.ndarray: + if count <= 0: + return np.empty((0, 2), dtype=np.uint32) + + coords = rng.uniform(0, width, size=(count, 2)) + coords = np.rint(coords, casting="unsafe") # round to nearest integer pixel + coords = np.clip(coords, 0, width - 1) + return coords.astype(np.uint32) + + +def generate_gaussian_clusters( + rng: np.random.Generator, + cluster_count: int, + points_per_cluster: int, + width: int, + sigma: float, +) -> np.ndarray: + if cluster_count <= 0 or points_per_cluster <= 0: + return np.empty((0, 2), dtype=np.uint32) + + centers = rng.uniform(0, width, size=(cluster_count, 2)) + clusters = [] + + for center in centers: + samples = rng.normal(loc=center, scale=sigma, size=(points_per_cluster, 2)) + samples = np.rint(samples, casting="unsafe") + samples = np.clip(samples, 0, width - 1) + clusters.append(samples.astype(np.uint32)) + + if not clusters: + return np.empty((0, 2), dtype=np.uint32) + + return np.vstack(clusters) + + +def write_data_file(path: Path, coords_yx: np.ndarray) -> None: + coords_yx.astype(np.uint32).tofile(path) + + +def write_truth_file(path: Path, labels: np.ndarray) -> None: + labels.astype(np.int32).tofile(path) + + +def main() -> None: + args = parse_args() + rng = np.random.default_rng(args.seed) + + uniform_coords = generate_uniform_points(rng, args.uniform_count, args.area_width) + cluster_coords = generate_gaussian_clusters( + rng, + cluster_count=args.cluster_count, + points_per_cluster=args.points_per_cluster, + width=args.area_width, + sigma=args.cluster_sigma, + ) + + if uniform_coords.size == 0 and cluster_coords.size == 0: + raise ValueError("No points generated; adjust the generator parameters.") + + # Concatenate and shuffle to avoid ordering artifacts. + all_coords_yx = np.vstack([uniform_coords, cluster_coords]) + rng.shuffle(all_coords_yx, axis=0) + + # Prepare float coordinates for clustering (x, y order for scikit-learn). + coords_xy_float = all_coords_yx[:, [1, 0]].astype(np.float64) + + dbscan = DBSCAN(eps=args.eps, min_samples=args.min_samples, n_jobs=-1) + labels = dbscan.fit_predict(coords_xy_float) + + # Persist data in requested layout: (y, x) pairs as uint32. + write_data_file(args.data_file, all_coords_yx) + write_truth_file(args.truth_file, labels) + + unique_labels, counts = np.unique(labels, return_counts=True) + cluster_labels = unique_labels[unique_labels != -1] + noise_count = counts[unique_labels == -1].sum() if -1 in unique_labels else 0 + + print(f"Generated {all_coords_yx.shape[0]} total points.") + print(f"Uniform points: {uniform_coords.shape[0]}, clustered points: {cluster_coords.shape[0]}.") + print(f"DBSCAN discovered {cluster_labels.size} clusters and {noise_count} noise points.") + print(f"Data written to {args.data_file.resolve()} and labels to {args.truth_file.resolve()}.") + + +if __name__ == "__main__": + main() diff --git a/tools/dbscan_dataset_validator.cpp b/tools/dbscan_dataset_validator.cpp new file mode 100644 index 0000000..fe246d8 --- /dev/null +++ b/tools/dbscan_dataset_validator.cpp @@ -0,0 +1,398 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dbscan.h" +#include "dbscan_optimized.h" + +namespace { + +struct Options { + std::filesystem::path data_path{"data.bin"}; + std::filesystem::path truth_path{"truth.bin"}; + double eps{60.0}; + int32_t min_samples{16}; + + enum class Implementation { Baseline, Optimized, Both } implementation{Implementation::Both}; + std::optional mismatch_output_dir; +}; + +void print_usage(const char *program_name) { + std::cout << "Usage: " << program_name + << " [--data ] [--truth ] [--eps ] [--min-samples ]" + << " [--impl baseline|optimized|both] [--dump-mismatches ]\n"; +} + +Options parse_arguments(int argc, char **argv) { + Options options; + + for (int i = 1; i < argc; ++i) { + const std::string arg{argv[i]}; + + if (arg == "--help" || arg == "-h") { + print_usage(argv[0]); + std::exit(0); + } else if (arg == "--data") { + if (i + 1 >= argc) + throw std::invalid_argument("--data expects a path argument"); + options.data_path = argv[++i]; + } else if (arg == "--truth") { + if (i + 1 >= argc) + throw std::invalid_argument("--truth expects a path argument"); + options.truth_path = argv[++i]; + } else if (arg == "--eps") { + if (i + 1 >= argc) + throw std::invalid_argument("--eps expects a numeric argument"); + options.eps = std::stod(argv[++i]); + } else if (arg == "--min-samples") { + if (i + 1 >= argc) + throw std::invalid_argument("--min-samples expects an integer argument"); + options.min_samples = static_cast(std::stoi(argv[++i])); + } else if (arg == "--impl") { + if (i + 1 >= argc) + throw std::invalid_argument("--impl expects one of: baseline, optimized, both"); + const std::string value{argv[++i]}; + if (value == "baseline") { + options.implementation = Options::Implementation::Baseline; + } else if (value == "optimized") { + options.implementation = Options::Implementation::Optimized; + } else if (value == "both") { + options.implementation = Options::Implementation::Both; + } else { + throw std::invalid_argument("--impl expects one of: baseline, optimized, both"); + } + } else if (arg == "--dump-mismatches") { + if (i + 1 >= argc) + throw std::invalid_argument("--dump-mismatches expects a directory path"); + options.mismatch_output_dir = std::filesystem::path{argv[++i]}; + } else { + throw std::invalid_argument("Unknown argument: " + arg); + } + } + + if (options.eps <= 0.0) + throw std::invalid_argument("--eps must be positive"); + if (options.min_samples <= 0) + throw std::invalid_argument("--min-samples must be positive"); + + return options; +} + +std::vector> load_points(const std::filesystem::path &path) { + std::ifstream stream(path, std::ios::binary); + if (!stream) + throw std::runtime_error("Failed to open data file: " + path.string()); + + stream.seekg(0, std::ios::end); + const auto file_size = stream.tellg(); + stream.seekg(0, std::ios::beg); + + if (file_size % (sizeof(uint32_t) * 2) != 0) + throw std::runtime_error("Data file does not contain a whole number of (y, x) uint32 pairs: " + path.string()); + + const std::size_t num_values = static_cast(file_size) / sizeof(uint32_t); + const std::size_t num_points = num_values / 2; + + std::vector raw(num_values); + stream.read(reinterpret_cast(raw.data()), static_cast(raw.size() * sizeof(uint32_t))); + if (!stream) + throw std::runtime_error("Failed to read data file: " + path.string()); + + std::vector> points(num_points); + for (std::size_t i = 0; i < num_points; ++i) { + const uint32_t y = raw[2 * i]; + const uint32_t x = raw[2 * i + 1]; + points[i] = dbscan::Point{static_cast(x), static_cast(y)}; + } + + return points; +} + +std::vector load_labels(const std::filesystem::path &path) { + std::ifstream stream(path, std::ios::binary); + if (!stream) + throw std::runtime_error("Failed to open truth file: " + path.string()); + + stream.seekg(0, std::ios::end); + const auto file_size = stream.tellg(); + stream.seekg(0, std::ios::beg); + + if (file_size % sizeof(int32_t) != 0) + throw std::runtime_error("Truth file does not contain a whole number of int32 labels: " + path.string()); + + const std::size_t num_labels = static_cast(file_size) / sizeof(int32_t); + std::vector labels(num_labels); + stream.read(reinterpret_cast(labels.data()), static_cast(labels.size() * sizeof(int32_t))); + if (!stream) + throw std::runtime_error("Failed to read truth file: " + path.string()); + + return labels; +} + +std::size_t count_clusters(const std::vector &labels) { + std::unordered_set clusters; + clusters.reserve(labels.size()); + for (int32_t label : labels) { + if (label != -1) + clusters.insert(label); + } + return clusters.size(); +} + +std::size_t count_noise(const std::vector &labels) { + return static_cast(std::count(labels.begin(), labels.end(), -1)); +} + +struct LabelIndex { + std::unordered_map to_index; + std::vector values; +}; + +LabelIndex make_index(const std::vector &labels) { + LabelIndex index; + index.values.reserve(labels.size()); + index.to_index.reserve(labels.size()); + + for (int32_t label : labels) { + if (!index.to_index.contains(label)) { + const std::size_t idx = index.values.size(); + index.values.push_back(label); + index.to_index.emplace(label, idx); + } + } + + return index; +} + +double combination2(std::int64_t n) { + if (n <= 1) + return 0.0; + return static_cast(n) * static_cast(n - 1) / 2.0; +} + +struct EvaluationMetrics { + double adjusted_rand{0.0}; + double remapped_accuracy{0.0}; + std::size_t mismatched_points{0}; + std::size_t predicted_clusters{0}; + std::size_t truth_clusters{0}; + std::size_t predicted_noise{0}; + std::size_t truth_noise{0}; + bool passed{false}; +}; + +EvaluationMetrics evaluate(const std::vector &predicted, const std::vector &truth, + std::vector *mismatch_indices = nullptr) { + if (predicted.size() != truth.size()) + throw std::runtime_error("Predicted labels and truth labels must have the same length"); + + const std::size_t total_points = truth.size(); + const auto predicted_index = make_index(predicted); + const auto truth_index = make_index(truth); + + const std::size_t predicted_size = predicted_index.values.size(); + const std::size_t truth_size = truth_index.values.size(); + + std::vector contingency(predicted_size * truth_size, 0); + std::vector predicted_counts(predicted_size, 0); + std::vector truth_counts(truth_size, 0); + + for (std::size_t i = 0; i < total_points; ++i) { + const auto predicted_it = predicted_index.to_index.find(predicted[i]); + const auto truth_it = truth_index.to_index.find(truth[i]); + const std::size_t predicted_row = predicted_it->second; + const std::size_t truth_col = truth_it->second; + + const std::size_t cell_index = predicted_row * truth_size + truth_col; + ++contingency[cell_index]; + ++predicted_counts[predicted_row]; + ++truth_counts[truth_col]; + } + + double sum_combination = 0.0; + for (std::int64_t count : contingency) + sum_combination += combination2(count); + + double predicted_combination = 0.0; + for (std::int64_t count : predicted_counts) + predicted_combination += combination2(count); + + double truth_combination = 0.0; + for (std::int64_t count : truth_counts) + truth_combination += combination2(count); + + const double total_pairs = combination2(static_cast(total_points)); + double expected_index = 0.0; + if (total_pairs > 0.0) + expected_index = (predicted_combination * truth_combination) / total_pairs; + + const double max_index = 0.5 * (predicted_combination + truth_combination); + const double denominator = max_index - expected_index; + + EvaluationMetrics metrics; + if (denominator == 0.0) { + metrics.adjusted_rand = 1.0; + } else { + metrics.adjusted_rand = (sum_combination - expected_index) / denominator; + } + + std::unordered_map remap; + remap.reserve(predicted_size); + for (std::size_t row = 0; row < predicted_size; ++row) { + const int32_t predicted_label = predicted_index.values[row]; + if (predicted_label == -1) { + remap.emplace(predicted_label, -1); + continue; + } + + const std::int64_t *row_ptr = contingency.data() + row * truth_size; + std::size_t best_col = 0; + std::int64_t best_count = -1; + for (std::size_t col = 0; col < truth_size; ++col) { + if (row_ptr[col] > best_count) { + best_count = row_ptr[col]; + best_col = col; + } + } + remap.emplace(predicted_label, truth_index.values[best_col]); + } + + if (mismatch_indices) + mismatch_indices->clear(); + + std::size_t matches = 0; + for (std::size_t i = 0; i < total_points; ++i) { + const int32_t predicted_label = predicted[i]; + const auto mapping_it = remap.find(predicted_label); + const int32_t mapped_label = mapping_it != remap.end() ? mapping_it->second : predicted_label; + if (mapped_label == truth[i]) + ++matches; + else if (mismatch_indices) + mismatch_indices->push_back(i); + } + + metrics.remapped_accuracy = + total_points == 0 ? 1.0 : static_cast(matches) / static_cast(total_points); + metrics.mismatched_points = mismatch_indices ? mismatch_indices->size() : (total_points - matches); + + metrics.predicted_clusters = count_clusters(predicted); + metrics.truth_clusters = count_clusters(truth); + metrics.predicted_noise = count_noise(predicted); + metrics.truth_noise = count_noise(truth); + metrics.passed = metrics.mismatched_points == 0 && metrics.predicted_clusters == metrics.truth_clusters; + + return metrics; +} + +struct RunResult { + std::string name; + EvaluationMetrics metrics; +}; + +} // namespace + +int main(int argc, char **argv) { + try { + const Options options = parse_arguments(argc, argv); + + const auto points = load_points(options.data_path); + const auto truth_labels = load_labels(options.truth_path); + + if (points.size() != truth_labels.size()) + throw std::runtime_error("Point count and truth label count differ"); + + std::cout << "Loaded " << points.size() << " points from " << options.data_path << "\n"; + std::cout << "Using eps=" << options.eps << ", min_samples=" << options.min_samples << "\n"; + + const auto truth_cluster_count = count_clusters(truth_labels); + const auto truth_noise_count = count_noise(truth_labels); + std::cout << "Ground truth clusters: " << truth_cluster_count << "; noise points: " << truth_noise_count << "\n"; + + std::vector results; + results.reserve(2); + + if (options.implementation == Options::Implementation::Baseline || + options.implementation == Options::Implementation::Both) { + std::cout << "\n[baseline] Running clustering..." << std::flush; + const auto start = std::chrono::steady_clock::now(); + dbscan::DBSCAN baseline(options.eps, options.min_samples); + const auto clustering = baseline.cluster(points); + std::vector mismatches; + const auto metrics = + evaluate(clustering.labels, truth_labels, options.mismatch_output_dir ? &mismatches : nullptr); + const auto end = std::chrono::steady_clock::now(); + const auto elapsed_ms = std::chrono::duration_cast(end - start).count(); + std::cout << " done in " << elapsed_ms << " ms" << std::endl; + results.push_back({"baseline", metrics}); + + if (options.mismatch_output_dir && !mismatches.empty()) { + std::filesystem::create_directories(*options.mismatch_output_dir); + auto file_path = *options.mismatch_output_dir / "baseline_mismatches.txt"; + std::ofstream out(file_path); + if (!out) + throw std::runtime_error("Failed to open mismatch output file: " + file_path.string()); + for (std::size_t index : mismatches) + out << index << '\n'; + std::cout << "[baseline] Wrote " << mismatches.size() << " mismatches to " << file_path << "\n"; + } + } + + if (options.implementation == Options::Implementation::Optimized || + options.implementation == Options::Implementation::Both) { + std::cout << "\n[optimized] Running clustering..." << std::flush; + const auto start = std::chrono::steady_clock::now(); + dbscan::DBSCANOptimized optimized(options.eps, options.min_samples); + const auto clustering = optimized.cluster(points); + std::vector mismatches; + const auto metrics = + evaluate(clustering.labels, truth_labels, options.mismatch_output_dir ? &mismatches : nullptr); + const auto end = std::chrono::steady_clock::now(); + const auto elapsed_ms = std::chrono::duration_cast(end - start).count(); + std::cout << " done in " << elapsed_ms << " ms" << std::endl; + results.push_back({"optimized", metrics}); + + if (options.mismatch_output_dir && !mismatches.empty()) { + std::filesystem::create_directories(*options.mismatch_output_dir); + auto file_path = *options.mismatch_output_dir / "optimized_mismatches.txt"; + std::ofstream out(file_path); + if (!out) + throw std::runtime_error("Failed to open mismatch output file: " + file_path.string()); + for (std::size_t index : mismatches) + out << index << '\n'; + std::cout << "[optimized] Wrote " << mismatches.size() << " mismatches to " << file_path << "\n"; + } + } + + std::cout << std::fixed << std::setprecision(6); + + bool all_passed = true; + for (const auto &result : results) { + const auto &m = result.metrics; + std::cout << "\nImplementation: " << result.name << "\n"; + std::cout << " clusters: " << m.predicted_clusters << " (truth " << m.truth_clusters << ")\n"; + std::cout << " noise points: " << m.predicted_noise << " (truth " << m.truth_noise << ")\n"; + std::cout << " adjusted rand index: " << m.adjusted_rand << "\n"; + std::cout << " remapped accuracy: " << m.remapped_accuracy * 100.0 << "%\n"; + std::cout << " mismatched points: " << m.mismatched_points << "\n"; + std::cout << " status: " << (m.passed ? "PASS" : "FAIL") << "\n"; + all_passed = all_passed && m.passed; + } + + return all_passed ? 0 : 1; + } catch (const std::exception &ex) { + std::cerr << "Error: " << ex.what() << "\n"; + print_usage(argv[0]); + return 1; + } +} From 13fd4c6a3a79d6307f40763303e5f7b65284d3bf Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Tue, 30 Sep 2025 22:20:30 -0700 Subject: [PATCH 02/12] add l1 impl --- CMakeLists.txt | 2 + include/dbscan_grid2d_l1.h | 18 +++ src/dbscan_grid2d_l1.cpp | 189 +++++++++++++++++++++++++++++ tests/test_dbscan_grid2d_l1.cpp | 96 +++++++++++++++ tools/dbscan_dataset_validator.cpp | 87 ++++++++++--- 5 files changed, 378 insertions(+), 14 deletions(-) create mode 100644 include/dbscan_grid2d_l1.h create mode 100644 src/dbscan_grid2d_l1.cpp create mode 100644 tests/test_dbscan_grid2d_l1.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index df35fb7..2bcc52b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,7 @@ endif() add_library(dbscan STATIC src/dbscan.cpp src/dbscan_optimized.cpp + src/dbscan_grid2d_l1.cpp ) target_include_directories(dbscan @@ -64,6 +65,7 @@ FetchContent_MakeAvailable(nanobench) add_executable(dbscan_tests tests/test_dbscan.cpp tests/test_dbscan_optimized.cpp + tests/test_dbscan_grid2d_l1.cpp tests/test_parallel_for.cpp tests/test_union_find.cpp ) diff --git a/include/dbscan_grid2d_l1.h b/include/dbscan_grid2d_l1.h new file mode 100644 index 0000000..7527c04 --- /dev/null +++ b/include/dbscan_grid2d_l1.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include +#include + +namespace dbscan { + +struct DBSCANGrid2D_L1 { + uint32_t eps; + uint32_t min_samples; + + DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value); + + [[nodiscard]] std::vector fit_predict(const uint32_t *x, const uint32_t *y, std::size_t count) const; +}; + +} // namespace dbscan diff --git a/src/dbscan_grid2d_l1.cpp b/src/dbscan_grid2d_l1.cpp new file mode 100644 index 0000000..a0d8575 --- /dev/null +++ b/src/dbscan_grid2d_l1.cpp @@ -0,0 +1,189 @@ +#include "dbscan_grid2d_l1.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace dbscan { + +namespace { + +constexpr uint64_t pack_cell(uint32_t ix, uint32_t iy) noexcept { + return (static_cast(ix) << 32U) | static_cast(iy); +} + +constexpr uint32_t cell_of(uint32_t value, uint32_t cell_size) noexcept { + return cell_size == 0 ? value : static_cast(value / cell_size); +} + +template +void for_each_neighbor(uint32_t point_index, uint32_t eps, const uint32_t *x, const uint32_t *y, + const std::vector &cell_x, const std::vector &cell_y, + const std::vector &ordered_indices, const std::vector &cell_offsets, + const std::vector &unique_keys, Fn &&fn) { + + const uint32_t base_cx = cell_x[point_index]; + const uint32_t base_cy = cell_y[point_index]; + + for (int dx = -1; dx <= 1; ++dx) { + const int64_t nx = static_cast(base_cx) + dx; + if (nx < 0) + continue; + + for (int dy = -1; dy <= 1; ++dy) { + const int64_t ny = static_cast(base_cy) + dy; + if (ny < 0) + continue; + + const uint64_t key = pack_cell(static_cast(nx), static_cast(ny)); + const auto it = std::lower_bound(unique_keys.begin(), unique_keys.end(), key); + if (it == unique_keys.end() || *it != key) + continue; + + const std::size_t cell_idx = static_cast(std::distance(unique_keys.begin(), it)); + const std::size_t begin = cell_offsets[cell_idx]; + const std::size_t end = cell_offsets[cell_idx + 1]; + + for (std::size_t pos = begin; pos < end; ++pos) { + const uint32_t neighbor_idx = ordered_indices[pos]; + + const uint32_t x_a = x[point_index]; + const uint32_t y_a = y[point_index]; + const uint32_t x_b = x[neighbor_idx]; + const uint32_t y_b = y[neighbor_idx]; + + const uint32_t dx_abs = x_a > x_b ? x_a - x_b : x_b - x_a; + const uint32_t dy_abs = y_a > y_b ? y_a - y_b : y_b - y_a; + const uint64_t manhattan = static_cast(dx_abs) + static_cast(dy_abs); + + if (manhattan <= static_cast(eps)) { + if (!fn(neighbor_idx)) + return; + } + } + } + } +} + +} // namespace + +DBSCANGrid2D_L1::DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value) + : eps(eps_value), min_samples(min_samples_value) { + if (eps == 0) + throw std::invalid_argument("eps must be greater than zero for DBSCANGrid2D_L1"); + if (min_samples == 0) + throw std::invalid_argument("min_samples must be greater than zero for DBSCANGrid2D_L1"); +} + +std::vector DBSCANGrid2D_L1::fit_predict(const uint32_t *x, const uint32_t *y, std::size_t count) const { + if (x == nullptr || y == nullptr) + throw std::invalid_argument("Input coordinate arrays must be non-null"); + + if (count == 0) + return {}; + + const uint32_t cell_size = eps; + + std::vector cell_x(count); + std::vector cell_y(count); + std::vector keys(count); + std::vector ordered_indices(count); + std::iota(ordered_indices.begin(), ordered_indices.end(), 0U); + + for (std::size_t i = 0; i < count; ++i) { + const uint32_t cx = cell_of(x[i], cell_size); + const uint32_t cy = cell_of(y[i], cell_size); + cell_x[i] = cx; + cell_y[i] = cy; + keys[i] = pack_cell(cx, cy); + } + + std::sort(ordered_indices.begin(), ordered_indices.end(), [&](uint32_t lhs, uint32_t rhs) { + const uint64_t key_lhs = keys[lhs]; + const uint64_t key_rhs = keys[rhs]; + if (key_lhs == key_rhs) + return lhs < rhs; + return key_lhs < key_rhs; + }); + + std::vector cell_offsets; + cell_offsets.reserve(count + 1); + std::vector unique_keys; + unique_keys.reserve(count); + + std::size_t pos = 0; + while (pos < count) { + const uint64_t key = keys[ordered_indices[pos]]; + unique_keys.push_back(key); + cell_offsets.push_back(pos); + + do { + ++pos; + } while (pos < count && keys[ordered_indices[pos]] == key); + } + cell_offsets.push_back(count); + + std::vector labels(count, -1); + std::vector is_core(count, 0U); + + const uint32_t eps_value = eps; + const uint32_t min_samples_value = min_samples; + + for (std::size_t i = 0; i < count; ++i) { + uint32_t neighbor_count = 0; + for_each_neighbor(static_cast(i), eps_value, x, y, cell_x, cell_y, ordered_indices, cell_offsets, + unique_keys, [&](uint32_t) { + ++neighbor_count; + return neighbor_count < min_samples_value; + }); + + if (neighbor_count >= min_samples_value) + is_core[i] = 1U; + } + + std::vector stack; + stack.reserve(count); + std::vector neighbor_buffer; + neighbor_buffer.reserve(64); + + int32_t next_label = 0; + for (std::size_t i = 0; i < count; ++i) { + if (!is_core[i] || labels[i] != -1) + continue; + + labels[i] = next_label; + stack.clear(); + stack.push_back(static_cast(i)); + + while (!stack.empty()) { + const uint32_t current = stack.back(); + stack.pop_back(); + + neighbor_buffer.clear(); + for_each_neighbor(current, eps_value, x, y, cell_x, cell_y, ordered_indices, cell_offsets, unique_keys, + [&](uint32_t neighbor) { + neighbor_buffer.push_back(neighbor); + return true; + }); + + for (uint32_t neighbor : neighbor_buffer) { + if (labels[neighbor] == -1) { + labels[neighbor] = next_label; + if (is_core[neighbor]) + stack.push_back(neighbor); + } + } + } + + ++next_label; + } + + return labels; +} + +} // namespace dbscan diff --git a/tests/test_dbscan_grid2d_l1.cpp b/tests/test_dbscan_grid2d_l1.cpp new file mode 100644 index 0000000..bf6100a --- /dev/null +++ b/tests/test_dbscan_grid2d_l1.cpp @@ -0,0 +1,96 @@ +#include "dbscan_grid2d_l1.h" + +#include + +#include +#include +#include +#include + +namespace { + +void load_fixture(const std::filesystem::path &data_path, const std::filesystem::path &truth_path, + std::vector &x_out, std::vector &y_out, std::vector &truth_out) { + std::ifstream data_stream(data_path, std::ios::binary); + REQUIRE(data_stream.good()); + + data_stream.seekg(0, std::ios::end); + const auto data_size = data_stream.tellg(); + data_stream.seekg(0, std::ios::beg); + + REQUIRE(data_size % (sizeof(uint32_t) * 2) == 0); + const std::size_t point_count = static_cast(data_size) / (sizeof(uint32_t) * 2); + + x_out.assign(point_count, 0U); + y_out.assign(point_count, 0U); + + for (std::size_t i = 0; i < point_count; ++i) { + uint32_t y = 0; + uint32_t x = 0; + data_stream.read(reinterpret_cast(&y), sizeof(uint32_t)); + data_stream.read(reinterpret_cast(&x), sizeof(uint32_t)); + REQUIRE(data_stream.good()); + y_out[i] = y; + x_out[i] = x; + } + + std::ifstream truth_stream(truth_path, std::ios::binary); + REQUIRE(truth_stream.good()); + + truth_stream.seekg(0, std::ios::end); + const auto truth_size = truth_stream.tellg(); + truth_stream.seekg(0, std::ios::beg); + + REQUIRE(truth_size % sizeof(int32_t) == 0); + const std::size_t truth_count = static_cast(truth_size) / sizeof(int32_t); + REQUIRE(truth_count == point_count); + + truth_out.assign(truth_count, 0); + truth_stream.read(reinterpret_cast(truth_out.data()), + static_cast(truth_count * sizeof(int32_t))); + REQUIRE(truth_stream.good()); +} + +} // namespace + +TEST_CASE("DBSCANGrid2D_L1 clusters dense neighbors", "[dbscan][grid_l1]") { + const std::vector x = {0, 1, 2, 100}; + const std::vector y = {0, 0, 1, 200}; + + dbscan::DBSCANGrid2D_L1 algo(4, 3); + auto labels = algo.fit_predict(x.data(), y.data(), x.size()); + + REQUIRE(labels.size() == x.size()); + REQUIRE(labels[0] == labels[1]); + REQUIRE(labels[1] == labels[2]); + REQUIRE(labels[0] != -1); + REQUIRE(labels[3] == -1); +} + +TEST_CASE("DBSCANGrid2D_L1 respects min_samples threshold", "[dbscan][grid_l1]") { + const std::vector coords = {0, 2, 4}; + dbscan::DBSCANGrid2D_L1 algo(3, 4); + auto labels = algo.fit_predict(coords.data(), coords.data(), coords.size()); + + REQUIRE(labels.size() == coords.size()); + for (int32_t label : labels) { + REQUIRE(label == -1); + } +} + +TEST_CASE("DBSCANGrid2D_L1 matches fixture truth", "[dbscan][grid_l1]") { + const std::filesystem::path root = std::filesystem::path{"tests"} / "data"; + const auto data_path = root / "dbscan_static_data.bin"; + const auto truth_path = root / "dbscan_static_truth.bin"; + + std::vector x; + std::vector y; + std::vector truth; + load_fixture(data_path, truth_path, x, y, truth); + + dbscan::DBSCANGrid2D_L1 algo(10, 3); + auto labels = algo.fit_predict(x.data(), y.data(), x.size()); + + REQUIRE(labels.size() == truth.size()); + REQUIRE(labels == truth); +} diff --git a/tools/dbscan_dataset_validator.cpp b/tools/dbscan_dataset_validator.cpp index fe246d8..2c5a26e 100644 --- a/tools/dbscan_dataset_validator.cpp +++ b/tools/dbscan_dataset_validator.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -14,6 +15,7 @@ #include #include "dbscan.h" +#include "dbscan_grid2d_l1.h" #include "dbscan_optimized.h" namespace { @@ -24,14 +26,16 @@ struct Options { double eps{60.0}; int32_t min_samples{16}; - enum class Implementation { Baseline, Optimized, Both } implementation{Implementation::Both}; + bool run_baseline{true}; + bool run_optimized{true}; + bool run_grid_l1{false}; std::optional mismatch_output_dir; }; void print_usage(const char *program_name) { std::cout << "Usage: " << program_name << " [--data ] [--truth ] [--eps ] [--min-samples ]" - << " [--impl baseline|optimized|both] [--dump-mismatches ]\n"; + << " [--impl baseline|optimized|grid|both|all] [--dump-mismatches ]\n"; } Options parse_arguments(int argc, char **argv) { @@ -61,16 +65,30 @@ Options parse_arguments(int argc, char **argv) { options.min_samples = static_cast(std::stoi(argv[++i])); } else if (arg == "--impl") { if (i + 1 >= argc) - throw std::invalid_argument("--impl expects one of: baseline, optimized, both"); + throw std::invalid_argument("--impl expects one of: baseline, optimized, grid, both, all"); const std::string value{argv[++i]}; if (value == "baseline") { - options.implementation = Options::Implementation::Baseline; + options.run_baseline = true; + options.run_optimized = false; + options.run_grid_l1 = false; } else if (value == "optimized") { - options.implementation = Options::Implementation::Optimized; + options.run_baseline = false; + options.run_optimized = true; + options.run_grid_l1 = false; + } else if (value == "grid" || value == "grid_l1") { + options.run_baseline = false; + options.run_optimized = false; + options.run_grid_l1 = true; } else if (value == "both") { - options.implementation = Options::Implementation::Both; + options.run_baseline = true; + options.run_optimized = true; + options.run_grid_l1 = false; + } else if (value == "all") { + options.run_baseline = true; + options.run_optimized = true; + options.run_grid_l1 = true; } else { - throw std::invalid_argument("--impl expects one of: baseline, optimized, both"); + throw std::invalid_argument("--impl expects one of: baseline, optimized, grid, both, all"); } } else if (arg == "--dump-mismatches") { if (i + 1 >= argc) @@ -89,7 +107,8 @@ Options parse_arguments(int argc, char **argv) { return options; } -std::vector> load_points(const std::filesystem::path &path) { +std::vector> load_points(const std::filesystem::path &path, std::vector *x_out, + std::vector *y_out) { std::ifstream stream(path, std::ios::binary); if (!stream) throw std::runtime_error("Failed to open data file: " + path.string()); @@ -110,10 +129,19 @@ std::vector> load_points(const std::filesystem::path &path throw std::runtime_error("Failed to read data file: " + path.string()); std::vector> points(num_points); + if (x_out) + x_out->assign(num_points, 0U); + if (y_out) + y_out->assign(num_points, 0U); + for (std::size_t i = 0; i < num_points; ++i) { const uint32_t y = raw[2 * i]; const uint32_t x = raw[2 * i + 1]; points[i] = dbscan::Point{static_cast(x), static_cast(y)}; + if (x_out) + (*x_out)[i] = x; + if (y_out) + (*y_out)[i] = y; } return points; @@ -306,7 +334,9 @@ int main(int argc, char **argv) { try { const Options options = parse_arguments(argc, argv); - const auto points = load_points(options.data_path); + std::vector x_coords; + std::vector y_coords; + const auto points = load_points(options.data_path, &x_coords, &y_coords); const auto truth_labels = load_labels(options.truth_path); if (points.size() != truth_labels.size()) @@ -320,10 +350,9 @@ int main(int argc, char **argv) { std::cout << "Ground truth clusters: " << truth_cluster_count << "; noise points: " << truth_noise_count << "\n"; std::vector results; - results.reserve(2); + results.reserve(3); - if (options.implementation == Options::Implementation::Baseline || - options.implementation == Options::Implementation::Both) { + if (options.run_baseline) { std::cout << "\n[baseline] Running clustering..." << std::flush; const auto start = std::chrono::steady_clock::now(); dbscan::DBSCAN baseline(options.eps, options.min_samples); @@ -348,8 +377,7 @@ int main(int argc, char **argv) { } } - if (options.implementation == Options::Implementation::Optimized || - options.implementation == Options::Implementation::Both) { + if (options.run_optimized) { std::cout << "\n[optimized] Running clustering..." << std::flush; const auto start = std::chrono::steady_clock::now(); dbscan::DBSCANOptimized optimized(options.eps, options.min_samples); @@ -374,6 +402,37 @@ int main(int argc, char **argv) { } } + if (options.run_grid_l1) { + const auto eps_int = static_cast(std::llround(options.eps)); + if (std::fabs(options.eps - static_cast(eps_int)) > 1e-6) { + throw std::invalid_argument("grid_l1 implementation requires integer eps value"); + } + if (x_coords.size() != y_coords.size()) + throw std::runtime_error("Mismatch between x and y coordinate counts"); + + std::cout << "\n[grid_l1] Running clustering..." << std::flush; + const auto start = std::chrono::steady_clock::now(); + dbscan::DBSCANGrid2D_L1 grid_algo(eps_int, static_cast(options.min_samples)); + const auto labels = grid_algo.fit_predict(x_coords.data(), y_coords.data(), x_coords.size()); + std::vector mismatches; + const auto metrics = evaluate(labels, truth_labels, options.mismatch_output_dir ? &mismatches : nullptr); + const auto end = std::chrono::steady_clock::now(); + const auto elapsed_ms = std::chrono::duration_cast(end - start).count(); + std::cout << " done in " << elapsed_ms << " ms" << std::endl; + results.push_back({"grid_l1", metrics}); + + if (options.mismatch_output_dir && !mismatches.empty()) { + std::filesystem::create_directories(*options.mismatch_output_dir); + auto file_path = *options.mismatch_output_dir / "grid_l1_mismatches.txt"; + std::ofstream out(file_path); + if (!out) + throw std::runtime_error("Failed to open mismatch output file: " + file_path.string()); + for (std::size_t index : mismatches) + out << index << '\n'; + std::cout << "[grid_l1] Wrote " << mismatches.size() << " mismatches to " << file_path << "\n"; + } + } + std::cout << std::fixed << std::setprecision(6); bool all_passed = true; From b2f46dc70828dc62dcf1384dca799d82f5f75480 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Tue, 30 Sep 2025 22:23:32 -0700 Subject: [PATCH 03/12] commit some test files --- tests/data/dbscan_static_data.bin | Bin 0 -> 80 bytes tests/data/dbscan_static_truth.bin | Bin 0 -> 40 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/data/dbscan_static_data.bin create mode 100644 tests/data/dbscan_static_truth.bin diff --git a/tests/data/dbscan_static_data.bin b/tests/data/dbscan_static_data.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fe411357874f5466ac30d7899c6a3450886634f GIT binary patch literal 80 zcmYdcU|>iA;#45c1mZLxP6FZtDEkDE2H{gcd<=+>1MyiXJ`JQNFhcMlAZ-NX0|2}% B3y1&! literal 0 HcmV?d00001 diff --git a/tests/data/dbscan_static_truth.bin b/tests/data/dbscan_static_truth.bin new file mode 100644 index 0000000000000000000000000000000000000000..64aa8ea48cbbe80e618c97cf8308a34cdb51e386 GIT binary patch literal 40 RcmZQzKn09IE;9ZP1pp*W2mJs5 literal 0 HcmV?d00001 From 619a6764cc894bb7caf76c5213ad03c1c82402d7 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Tue, 30 Sep 2025 22:29:03 -0700 Subject: [PATCH 04/12] support l1/l2 metric --- scripts/generate_test_data.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index 16a893b..fbb362c 100755 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -50,6 +50,12 @@ def parse_args() -> argparse.Namespace: default=60.0, help="DBSCAN epsilon radius in pixels for scikit-learn clustering (default: 60).", ) + parser.add_argument( + "--metric", + type=str, + default="l2", + help="Distance metric: 'l2' (Euclidean) or 'l1' (Manhattan).", + ) parser.add_argument( "--min-samples", type=int, @@ -143,7 +149,15 @@ def main() -> None: # Prepare float coordinates for clustering (x, y order for scikit-learn). coords_xy_float = all_coords_yx[:, [1, 0]].astype(np.float64) - dbscan = DBSCAN(eps=args.eps, min_samples=args.min_samples, n_jobs=-1) + metric = args.metric.lower() + if metric == "l1": + sklearn_metric = "cityblock" + elif metric == "l2": + sklearn_metric = "euclidean" + else: + raise ValueError("Unsupported metric. Choose 'l1' or 'l2'.") + + dbscan = DBSCAN(eps=args.eps, min_samples=args.min_samples, metric=sklearn_metric, n_jobs=-1) labels = dbscan.fit_predict(coords_xy_float) # Persist data in requested layout: (y, x) pairs as uint32. @@ -156,7 +170,9 @@ def main() -> None: print(f"Generated {all_coords_yx.shape[0]} total points.") print(f"Uniform points: {uniform_coords.shape[0]}, clustered points: {cluster_coords.shape[0]}.") - print(f"DBSCAN discovered {cluster_labels.size} clusters and {noise_count} noise points.") + print( + f"DBSCAN ({metric.upper()}) discovered {cluster_labels.size} clusters and {noise_count} noise points." + ) print(f"Data written to {args.data_file.resolve()} and labels to {args.truth_file.resolve()}.") From 9fc31ffe0540cdee89bbfa35b63ac1a0cdd10574 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Tue, 30 Sep 2025 22:33:35 -0700 Subject: [PATCH 05/12] update benchmark to focus on l1 gri dimplementation --- benchmark/benchmark_dbscan.cpp | 203 +++++++++++++-------------------- 1 file changed, 81 insertions(+), 122 deletions(-) diff --git a/benchmark/benchmark_dbscan.cpp b/benchmark/benchmark_dbscan.cpp index 7fb065c..b7cec7c 100644 --- a/benchmark/benchmark_dbscan.cpp +++ b/benchmark/benchmark_dbscan.cpp @@ -1,145 +1,104 @@ -#include "dbscan.h" -#include "dbscan_optimized.h" -#include -#include -#include +#include "dbscan_grid2d_l1.h" + +#include +#include +#include #include -#include +#include #include +#include #include #include -// Generate clustered 2D data for benchmarking -std::vector> generate_benchmark_data(size_t n_points, int n_clusters = 8) { - std::vector> points; - points.reserve(n_points); - - // Create clusters - for (int c = 0; c < n_clusters; ++c) { - double center_x = c * 5.0; - double center_y = c * 5.0; - size_t points_per_cluster = n_points / n_clusters; - - for (size_t i = 0; i < points_per_cluster; ++i) { - double x = center_x + (static_cast(rand()) / RAND_MAX - 0.5) * 2.0; - double y = center_y + (static_cast(rand()) / RAND_MAX - 0.5) * 2.0; - points.push_back({x, y}); - } - } +namespace { - // Add some noise points - size_t noise_points = n_points / 10; - for (size_t i = 0; i < noise_points; ++i) { - double x = 50.0 + (static_cast(rand()) / RAND_MAX - 0.5) * 20.0; - double y = 50.0 + (static_cast(rand()) / RAND_MAX - 0.5) * 20.0; - points.push_back({x, y}); - } +struct Uint32Dataset { + std::vector x; + std::vector y; +}; - return points; -} +Uint32Dataset generate_uint32_dataset(std::size_t cluster_count, std::size_t points_per_cluster, + std::size_t noise_points, uint32_t area_width, uint32_t cluster_sigma, + std::mt19937 &rng) { + std::uniform_real_distribution uniform_dist(0.0, static_cast(area_width)); + std::normal_distribution normal_dist(0.0, static_cast(cluster_sigma)); -int main() { - // Seed random number generator - srand(static_cast(time(nullptr))); + Uint32Dataset dataset; + dataset.x.reserve(cluster_count * points_per_cluster + noise_points); + dataset.y.reserve(cluster_count * points_per_cluster + noise_points); - ankerl::nanobench::Bench bench; + for (std::size_t c = 0; c < cluster_count; ++c) { + const double center_x = uniform_dist(rng); + const double center_y = uniform_dist(rng); - // Benchmark different data sizes - std::vector data_sizes = {1000, 10000, 50000, 100000}; - - for (size_t n_points : data_sizes) { - std::cout << "\n=== Benchmarking with " << n_points << " points ===" << std::endl; - - // Generate test data - auto points = generate_benchmark_data(n_points); - - // Benchmark original DBSCAN - bench.title("Original DBSCAN").run("Original DBSCAN " + std::to_string(n_points) + " points", [&]() { - dbscan::DBSCAN dbscan(0.8, 5); - auto result = dbscan.cluster(points); - ankerl::nanobench::doNotOptimizeAway(result); - }); - - // Benchmark optimized DBSCAN - bench.title("Optimized DBSCAN").run("Optimized DBSCAN " + std::to_string(n_points) + " points", [&]() { - dbscan::DBSCANOptimized dbscan(0.8, 5); - auto result = dbscan.cluster(points); - ankerl::nanobench::doNotOptimizeAway(result); - }); + for (std::size_t i = 0; i < points_per_cluster; ++i) { + const double sample_x = center_x + normal_dist(rng); + const double sample_y = center_y + normal_dist(rng); - // Memory usage comparison - { - dbscan::DBSCAN original_dbscan(0.8, 5); - auto original_result = original_dbscan.cluster(points); + const uint32_t clamped_x = + static_cast(std::min(static_cast(area_width - 1), std::max(0.0, std::round(sample_x)))); + const uint32_t clamped_y = + static_cast(std::min(static_cast(area_width - 1), std::max(0.0, std::round(sample_y)))); - dbscan::DBSCANOptimized optimized_dbscan(0.8, 5); - auto optimized_result = optimized_dbscan.cluster(points); - - std::cout << "Original DBSCAN found " << original_result.num_clusters << " clusters" << std::endl; - std::cout << "Optimized DBSCAN found " << optimized_result.num_clusters << " clusters" << std::endl; + dataset.x.push_back(clamped_x); + dataset.y.push_back(clamped_y); } } - // Performance comparison with different parameters - std::cout << "\n=== Parameter Sensitivity Benchmark ===" << std::endl; - - auto test_points = generate_benchmark_data(10000); + std::uniform_int_distribution uniform_int(0, area_width - 1); + for (std::size_t i = 0; i < noise_points; ++i) { + dataset.x.push_back(uniform_int(rng)); + dataset.y.push_back(uniform_int(rng)); + } - // Different eps values - std::vector eps_values = {0.3, 0.5, 0.8, 1.2}; + return dataset; +} - for (double eps : eps_values) { - bench.title("EPS Parameter").run("Optimized DBSCAN eps=" + std::to_string(eps), [&]() { - dbscan::DBSCANOptimized dbscan(eps, 5); - auto result = dbscan.cluster(test_points); - ankerl::nanobench::doNotOptimizeAway(result); - }); - } +} // namespace - // Different min_pts values - std::vector min_pts_values = {3, 5, 10, 15}; +int main() { + constexpr uint32_t area_width = 1'000'000; + constexpr uint32_t cluster_sigma = 50; // Approximately 3 sigma ~ 150 px footprint + constexpr uint32_t eps = 60; + constexpr uint32_t min_samples = 16; - for (int min_pts : min_pts_values) { - bench.title("MinPts Parameter").run("Optimized DBSCAN min_pts=" + std::to_string(min_pts), [&]() { - dbscan::DBSCANOptimized dbscan(0.8, min_pts); - auto result = dbscan.cluster(test_points); - ankerl::nanobench::doNotOptimizeAway(result); + std::mt19937 rng(1337u); + ankerl::nanobench::Bench bench; + bench.title("DBSCANGrid2D_L1"); + bench.relative(true); + + struct Scenario { + std::size_t clusters; + std::size_t points_per_cluster; + }; + + const std::vector scenarios = { + {32, 256}, + {64, 256}, + {100, 256}, + {128, 256}, + }; + + std::cout << "Benchmarking DBSCANGrid2D_L1 with Manhattan distance" << std::endl; + std::cout << "eps=" << eps << ", min_samples=" << min_samples << std::endl; + + for (const auto &scenario : scenarios) { + const std::size_t cluster_points = scenario.clusters * scenario.points_per_cluster; + const std::size_t noise_points = cluster_points * 2; // 2x noise compared to clustered points + + auto dataset = generate_uint32_dataset(scenario.clusters, scenario.points_per_cluster, noise_points, area_width, + cluster_sigma, rng); + + const std::size_t total_points = dataset.x.size(); + std::cout << "\nScenario: " << scenario.clusters << " clusters, " << scenario.points_per_cluster + << " points/cluster, total points=" << total_points << std::endl; + + bench.run("grid-l1 " + std::to_string(total_points) + " pts", [&]() { + dbscan::DBSCANGrid2D_L1 algo(eps, min_samples); + auto labels = algo.fit_predict(dataset.x.data(), dataset.y.data(), total_points); + ankerl::nanobench::doNotOptimizeAway(labels); }); } - // Detailed performance analysis - std::cout << "\n=== Detailed Performance Analysis ===" << std::endl; - - auto large_dataset = generate_benchmark_data(50000); - - // Time both implementations on larger dataset - { - std::cout << "Running performance comparison on 50k points..." << std::endl; - - // Original DBSCAN timing - auto start_time = std::chrono::high_resolution_clock::now(); - dbscan::DBSCAN original_dbscan(0.8, 5); - auto original_result = original_dbscan.cluster(large_dataset); - auto end_time = std::chrono::high_resolution_clock::now(); - auto original_duration = std::chrono::duration_cast(end_time - start_time); - - // Optimized DBSCAN timing - start_time = std::chrono::high_resolution_clock::now(); - dbscan::DBSCANOptimized optimized_dbscan(0.8, 5); - auto optimized_result = optimized_dbscan.cluster(large_dataset); - end_time = std::chrono::high_resolution_clock::now(); - auto optimized_duration = std::chrono::duration_cast(end_time - start_time); - - std::cout << "Original DBSCAN: " << original_duration.count() << "ms, " << original_result.num_clusters - << " clusters" << std::endl; - std::cout << "Optimized DBSCAN: " << optimized_duration.count() << "ms, " << optimized_result.num_clusters - << " clusters" << std::endl; - - if (original_duration.count() > 0) { - double speedup = static_cast(original_duration.count()) / optimized_duration.count(); - std::cout << "Speedup: " << speedup << "x" << std::endl; - } - } - return 0; -} \ No newline at end of file +} From 0507e24a966f85369ca234c69835fcb51b8aa767 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Tue, 30 Sep 2025 22:39:40 -0700 Subject: [PATCH 06/12] add a test for parallelization --- CMakeLists.txt | 1 + include/parallel.hpp | 36 +++++++++++++++++++++++- tests/test_parallelize.cpp | 56 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 tests/test_parallelize.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2bcc52b..bcc6c50 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,6 +67,7 @@ add_executable(dbscan_tests tests/test_dbscan_optimized.cpp tests/test_dbscan_grid2d_l1.cpp tests/test_parallel_for.cpp + tests/test_parallelize.cpp tests/test_union_find.cpp ) diff --git a/include/parallel.hpp b/include/parallel.hpp index dc07709..5487241 100644 --- a/include/parallel.hpp +++ b/include/parallel.hpp @@ -33,4 +33,38 @@ inline void parallel_for(size_t begin, size_t end, size_t n_threads, const std:: th.join(); } -} // namespace utils \ No newline at end of file +inline void parallelize(size_t begin, size_t end, size_t num_threads, size_t chunk_size, + std::function &&chunk_processor) { + if (begin >= end) + return; + + if (num_threads == 0) + num_threads = std::thread::hardware_concurrency(); + if (num_threads == 0) + num_threads = 1; + + if (chunk_size == 0) + chunk_size = std::max(1, (end - begin + num_threads - 1) / num_threads); + + std::atomic next_begin{begin}; + std::vector workers; + workers.reserve(num_threads); + + for (size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) { + workers.emplace_back([&, chunk_size]() { + while (true) { + size_t start = next_begin.fetch_add(chunk_size, std::memory_order_relaxed); + if (start >= end) + break; + + size_t stop = std::min(end, start + chunk_size); + chunk_processor(start, stop); + } + }); + } + + for (auto &worker : workers) + worker.join(); +} + +} // namespace utils diff --git a/tests/test_parallelize.cpp b/tests/test_parallelize.cpp new file mode 100644 index 0000000..13c2650 --- /dev/null +++ b/tests/test_parallelize.cpp @@ -0,0 +1,56 @@ +#include "parallel.hpp" + +#include +#include +#include +#include +#include + +TEST_CASE("parallelize processes full range", "[parallel]") { + constexpr std::size_t N = 10'000; + std::vector> visited(N); + for (auto &flag : visited) + flag.store(false, std::memory_order_relaxed); + + utils::parallelize(0, N, 4, 128, [&](std::size_t begin, std::size_t end) { + for (std::size_t i = begin; i < end; ++i) { + bool expected = false; + REQUIRE(visited[i].compare_exchange_strong(expected, true, std::memory_order_relaxed)); + } + }); + + for (const auto &flag : visited) { + REQUIRE(flag.load(std::memory_order_relaxed)); + } +} + +TEST_CASE("parallelize handles uneven chunks", "[parallel]") { + constexpr std::size_t N = 1'023; + std::vector> counts(N); + for (auto &value : counts) + value.store(0, std::memory_order_relaxed); + + utils::parallelize(0, N, 3, 100, [&](std::size_t begin, std::size_t end) { + for (std::size_t i = begin; i < end; ++i) { + counts[i].fetch_add(1, std::memory_order_relaxed); + } + }); + + for (const auto &value : counts) { + REQUIRE(value.load(std::memory_order_relaxed) == 1); + } +} + +TEST_CASE("parallelize default chunk size", "[parallel]") { + constexpr std::size_t N = 5'000; + std::vector out(N, 0); + + utils::parallelize(0, N, 8, 0, [&](std::size_t begin, std::size_t end) { + for (std::size_t i = begin; i < end; ++i) { + out[i] = static_cast(i); + } + }); + + for (std::size_t i = 0; i < N; ++i) + REQUIRE(out[i] == static_cast(i)); +} From 8884c1fcf31240043a0e382e21c2e0d73dd09b5e Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Tue, 30 Sep 2025 22:48:41 -0700 Subject: [PATCH 07/12] update bench --- benchmark/benchmark_dbscan.cpp | 31 ++++++++++++++------ include/dbscan_grid2d_l1.h | 5 +++- src/dbscan_grid2d_l1.cpp | 52 ++++++++++++++++++++-------------- 3 files changed, 57 insertions(+), 31 deletions(-) diff --git a/benchmark/benchmark_dbscan.cpp b/benchmark/benchmark_dbscan.cpp index b7cec7c..95fc230 100644 --- a/benchmark/benchmark_dbscan.cpp +++ b/benchmark/benchmark_dbscan.cpp @@ -66,6 +66,9 @@ int main() { ankerl::nanobench::Bench bench; bench.title("DBSCANGrid2D_L1"); bench.relative(true); + bench.warmup(2); + bench.minEpochIterations(10); + bench.unit("pt"); struct Scenario { std::size_t clusters; @@ -73,14 +76,16 @@ int main() { }; const std::vector scenarios = { - {32, 256}, - {64, 256}, - {100, 256}, - {128, 256}, + {64, 256}, // ~16K cluster points + 32K noise => ~48K total + {128, 256}, // ~32K cluster points + 64K noise => ~96K total + {256, 256}, // ~65K cluster points + 131K noise => ~196K total + {512, 256}, // ~131K cluster points + 262K noise => ~393K total + {640, 256}, // ~163K cluster points + 327K noise => ~490K total }; std::cout << "Benchmarking DBSCANGrid2D_L1 with Manhattan distance" << std::endl; std::cout << "eps=" << eps << ", min_samples=" << min_samples << std::endl; + std::cout << "Thread sweep: 0 (auto), 1, 2, 4, 8" << std::endl; for (const auto &scenario : scenarios) { const std::size_t cluster_points = scenario.clusters * scenario.points_per_cluster; @@ -93,11 +98,19 @@ int main() { std::cout << "\nScenario: " << scenario.clusters << " clusters, " << scenario.points_per_cluster << " points/cluster, total points=" << total_points << std::endl; - bench.run("grid-l1 " + std::to_string(total_points) + " pts", [&]() { - dbscan::DBSCANGrid2D_L1 algo(eps, min_samples); - auto labels = algo.fit_predict(dataset.x.data(), dataset.y.data(), total_points); - ankerl::nanobench::doNotOptimizeAway(labels); - }); + bench.batch(static_cast(total_points)); + bench.context("points", std::to_string(total_points)); + + const std::vector thread_counts = {0, 1, 2, 4, 8}; + for (std::size_t thread_count : thread_counts) { + const std::string label = "grid-l1 " + std::to_string(total_points) + " pts threads=" + + (thread_count == 0 ? std::string("auto") : std::to_string(thread_count)); + bench.run(label, [&]() { + dbscan::DBSCANGrid2D_L1 algo(eps, min_samples, thread_count); + auto labels = algo.fit_predict(dataset.x.data(), dataset.y.data(), total_points); + ankerl::nanobench::doNotOptimizeAway(labels); + }); + } } return 0; diff --git a/include/dbscan_grid2d_l1.h b/include/dbscan_grid2d_l1.h index 7527c04..8f18b7d 100644 --- a/include/dbscan_grid2d_l1.h +++ b/include/dbscan_grid2d_l1.h @@ -9,8 +9,11 @@ namespace dbscan { struct DBSCANGrid2D_L1 { uint32_t eps; uint32_t min_samples; + std::size_t num_threads; + std::size_t chunk_size; - DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value); + DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value, std::size_t num_threads_value = 0, + std::size_t chunk_size_value = 0); [[nodiscard]] std::vector fit_predict(const uint32_t *x, const uint32_t *y, std::size_t count) const; }; diff --git a/src/dbscan_grid2d_l1.cpp b/src/dbscan_grid2d_l1.cpp index a0d8575..0489aae 100644 --- a/src/dbscan_grid2d_l1.cpp +++ b/src/dbscan_grid2d_l1.cpp @@ -3,12 +3,13 @@ #include #include #include -#include #include #include #include #include +#include "parallel.hpp" + namespace dbscan { namespace { @@ -72,8 +73,9 @@ void for_each_neighbor(uint32_t point_index, uint32_t eps, const uint32_t *x, co } // namespace -DBSCANGrid2D_L1::DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value) - : eps(eps_value), min_samples(min_samples_value) { +DBSCANGrid2D_L1::DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value, std::size_t num_threads_value, + std::size_t chunk_size_value) + : eps(eps_value), min_samples(min_samples_value), num_threads(num_threads_value), chunk_size(chunk_size_value) { if (eps == 0) throw std::invalid_argument("eps must be greater than zero for DBSCANGrid2D_L1"); if (min_samples == 0) @@ -95,13 +97,17 @@ std::vector DBSCANGrid2D_L1::fit_predict(const uint32_t *x, const uint3 std::vector ordered_indices(count); std::iota(ordered_indices.begin(), ordered_indices.end(), 0U); - for (std::size_t i = 0; i < count; ++i) { - const uint32_t cx = cell_of(x[i], cell_size); - const uint32_t cy = cell_of(y[i], cell_size); - cell_x[i] = cx; - cell_y[i] = cy; - keys[i] = pack_cell(cx, cy); - } + const std::size_t index_chunk = chunk_size == 0 ? 1024 : chunk_size; + utils::parallelize(0, count, num_threads, index_chunk, + [&](std::size_t begin, std::size_t end) { + for (std::size_t i = begin; i < end; ++i) { + const uint32_t cx = cell_of(x[i], cell_size); + const uint32_t cy = cell_of(y[i], cell_size); + cell_x[i] = cx; + cell_y[i] = cy; + keys[i] = pack_cell(cx, cy); + } + }); std::sort(ordered_indices.begin(), ordered_indices.end(), [&](uint32_t lhs, uint32_t rhs) { const uint64_t key_lhs = keys[lhs]; @@ -134,17 +140,21 @@ std::vector DBSCANGrid2D_L1::fit_predict(const uint32_t *x, const uint3 const uint32_t eps_value = eps; const uint32_t min_samples_value = min_samples; - for (std::size_t i = 0; i < count; ++i) { - uint32_t neighbor_count = 0; - for_each_neighbor(static_cast(i), eps_value, x, y, cell_x, cell_y, ordered_indices, cell_offsets, - unique_keys, [&](uint32_t) { - ++neighbor_count; - return neighbor_count < min_samples_value; - }); - - if (neighbor_count >= min_samples_value) - is_core[i] = 1U; - } + const std::size_t core_chunk = chunk_size == 0 ? 512 : chunk_size; + utils::parallelize(0, count, num_threads, core_chunk, + [&](std::size_t begin, std::size_t end) { + for (std::size_t idx = begin; idx < end; ++idx) { + uint32_t neighbor_count = 0; + for_each_neighbor(static_cast(idx), eps_value, x, y, cell_x, cell_y, ordered_indices, cell_offsets, + unique_keys, [&](uint32_t) { + ++neighbor_count; + return neighbor_count < min_samples_value; + }); + + if (neighbor_count >= min_samples_value) + is_core[idx] = 1U; + } + }); std::vector stack; stack.reserve(count); From 3959b5e17915b238db82747925c1fbfe5a174164 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Wed, 1 Oct 2025 07:10:53 -0700 Subject: [PATCH 08/12] add performance timing instrumentation --- include/dbscan_grid2d_l1.h | 6 +- include/perf_timer.h | 51 ++++++++++ src/dbscan_grid2d_l1.cpp | 164 +++++++++++++++++++------------- tests/test_dbscan_grid2d_l1.cpp | 6 ++ 4 files changed, 159 insertions(+), 68 deletions(-) create mode 100644 include/perf_timer.h diff --git a/include/dbscan_grid2d_l1.h b/include/dbscan_grid2d_l1.h index 8f18b7d..8c51190 100644 --- a/include/dbscan_grid2d_l1.h +++ b/include/dbscan_grid2d_l1.h @@ -4,6 +4,8 @@ #include #include +#include "perf_timer.h" + namespace dbscan { struct DBSCANGrid2D_L1 { @@ -15,7 +17,9 @@ struct DBSCANGrid2D_L1 { DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value, std::size_t num_threads_value = 0, std::size_t chunk_size_value = 0); - [[nodiscard]] std::vector fit_predict(const uint32_t *x, const uint32_t *y, std::size_t count) const; + [[nodiscard]] std::vector fit_predict(const uint32_t *x, const uint32_t *y, std::size_t count); + + PerfTiming perf_timing_; }; } // namespace dbscan diff --git a/include/perf_timer.h b/include/perf_timer.h new file mode 100644 index 0000000..61e9881 --- /dev/null +++ b/include/perf_timer.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include + +namespace dbscan { + +struct PerfTimingEntry { + std::string label; + double duration_ms; +}; + +struct PerfTiming { + void clear() { entries_.clear(); } + + void add(std::string label, double duration_ms) { + entries_.push_back({std::move(label), duration_ms}); + } + + [[nodiscard]] const std::vector &entries() const { return entries_; } + +private: + std::vector entries_; + + friend class ScopedTimer; +}; + +class ScopedTimer { +public: + ScopedTimer(std::string label, PerfTiming &sink) + : sink_(sink), label_(std::move(label)), start_(std::chrono::steady_clock::now()) {} + + ScopedTimer(const ScopedTimer &) = delete; + ScopedTimer &operator=(const ScopedTimer &) = delete; + + ~ScopedTimer() { + const auto end = std::chrono::steady_clock::now(); + const auto elapsed = std::chrono::duration_cast>(end - start_); + sink_.add(std::move(label_), elapsed.count()); + } + +private: + PerfTiming &sink_; + std::string label_; + std::chrono::steady_clock::time_point start_; +}; + +} // namespace dbscan + diff --git a/src/dbscan_grid2d_l1.cpp b/src/dbscan_grid2d_l1.cpp index 0489aae..1b324f2 100644 --- a/src/dbscan_grid2d_l1.cpp +++ b/src/dbscan_grid2d_l1.cpp @@ -9,11 +9,13 @@ #include #include "parallel.hpp" +#include "perf_timer.h" namespace dbscan { namespace { +// Compact 2D cell coordinates into a sortable key so we can reuse std::sort rather than bespoke grid maps. constexpr uint64_t pack_cell(uint32_t ix, uint32_t iy) noexcept { return (static_cast(ix) << 32U) | static_cast(iy); } @@ -22,6 +24,8 @@ constexpr uint32_t cell_of(uint32_t value, uint32_t cell_size) noexcept { return cell_size == 0 ? value : static_cast(value / cell_size); } +// Neighbors are explored by scanning adjacent grid cells so that the expensive L1 radius test only touches +// candidates that share the precomputed bucket, keeping the branch predictor in our favor. template void for_each_neighbor(uint32_t point_index, uint32_t eps, const uint32_t *x, const uint32_t *y, const std::vector &cell_x, const std::vector &cell_y, @@ -82,13 +86,16 @@ DBSCANGrid2D_L1::DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value, throw std::invalid_argument("min_samples must be greater than zero for DBSCANGrid2D_L1"); } -std::vector DBSCANGrid2D_L1::fit_predict(const uint32_t *x, const uint32_t *y, std::size_t count) const { +std::vector DBSCANGrid2D_L1::fit_predict(const uint32_t *x, const uint32_t *y, std::size_t count) { if (x == nullptr || y == nullptr) throw std::invalid_argument("Input coordinate arrays must be non-null"); if (count == 0) return {}; + perf_timing_.clear(); + ScopedTimer total_timer("total", perf_timing_); + const uint32_t cell_size = eps; std::vector cell_x(count); @@ -98,41 +105,52 @@ std::vector DBSCANGrid2D_L1::fit_predict(const uint32_t *x, const uint3 std::iota(ordered_indices.begin(), ordered_indices.end(), 0U); const std::size_t index_chunk = chunk_size == 0 ? 1024 : chunk_size; - utils::parallelize(0, count, num_threads, index_chunk, - [&](std::size_t begin, std::size_t end) { - for (std::size_t i = begin; i < end; ++i) { - const uint32_t cx = cell_of(x[i], cell_size); - const uint32_t cy = cell_of(y[i], cell_size); - cell_x[i] = cx; - cell_y[i] = cy; - keys[i] = pack_cell(cx, cy); - } - }); + { + // Precompute grid placements in parallel so later stages can stay read-only and avoid rehashing coordinates. + ScopedTimer timer("precompute_cells", perf_timing_); + utils::parallelize(0, count, num_threads, index_chunk, [&](std::size_t begin, std::size_t end) { + for (std::size_t i = begin; i < end; ++i) { + const uint32_t cx = cell_of(x[i], cell_size); + const uint32_t cy = cell_of(y[i], cell_size); + cell_x[i] = cx; + cell_y[i] = cy; + keys[i] = pack_cell(cx, cy); + } + }); + } - std::sort(ordered_indices.begin(), ordered_indices.end(), [&](uint32_t lhs, uint32_t rhs) { - const uint64_t key_lhs = keys[lhs]; - const uint64_t key_rhs = keys[rhs]; - if (key_lhs == key_rhs) - return lhs < rhs; - return key_lhs < key_rhs; - }); + { + // Sorting indices by packed cell ensures neighbors form contiguous spans which we can scan without lookups. + ScopedTimer timer("sort_indices", perf_timing_); + std::sort(ordered_indices.begin(), ordered_indices.end(), [&](uint32_t lhs, uint32_t rhs) { + const uint64_t key_lhs = keys[lhs]; + const uint64_t key_rhs = keys[rhs]; + if (key_lhs == key_rhs) + return lhs < rhs; + return key_lhs < key_rhs; + }); + } std::vector cell_offsets; cell_offsets.reserve(count + 1); std::vector unique_keys; unique_keys.reserve(count); - std::size_t pos = 0; - while (pos < count) { - const uint64_t key = keys[ordered_indices[pos]]; - unique_keys.push_back(key); - cell_offsets.push_back(pos); - - do { - ++pos; - } while (pos < count && keys[ordered_indices[pos]] == key); + { + // Build a CSR-style view of the sorted cells so we can jump directly to the occupants of any neighboring bucket. + ScopedTimer timer("build_cell_offsets", perf_timing_); + std::size_t pos = 0; + while (pos < count) { + const uint64_t key = keys[ordered_indices[pos]]; + unique_keys.push_back(key); + cell_offsets.push_back(pos); + + do { + ++pos; + } while (pos < count && keys[ordered_indices[pos]] == key); + } + cell_offsets.push_back(count); } - cell_offsets.push_back(count); std::vector labels(count, -1); std::vector is_core(count, 0U); @@ -141,56 +159,68 @@ std::vector DBSCANGrid2D_L1::fit_predict(const uint32_t *x, const uint3 const uint32_t min_samples_value = min_samples; const std::size_t core_chunk = chunk_size == 0 ? 512 : chunk_size; - utils::parallelize(0, count, num_threads, core_chunk, - [&](std::size_t begin, std::size_t end) { - for (std::size_t idx = begin; idx < end; ++idx) { - uint32_t neighbor_count = 0; - for_each_neighbor(static_cast(idx), eps_value, x, y, cell_x, cell_y, ordered_indices, cell_offsets, - unique_keys, [&](uint32_t) { - ++neighbor_count; - return neighbor_count < min_samples_value; - }); - - if (neighbor_count >= min_samples_value) - is_core[idx] = 1U; - } - }); + { + // Core detection runs as an isolated pass so expansion can treat label writes as the only mutation, simplifying + // synchronization even when the search function is invoked concurrently. + ScopedTimer timer("core_detection", perf_timing_); + utils::parallelize(0, count, num_threads, core_chunk, [&](std::size_t begin, std::size_t end) { + for (std::size_t idx = begin; idx < end; ++idx) { + uint32_t neighbor_count = 0; + for_each_neighbor(static_cast(idx), eps_value, x, y, cell_x, cell_y, ordered_indices, cell_offsets, + unique_keys, [&](uint32_t) { + ++neighbor_count; + return neighbor_count < min_samples_value; + }); + + if (neighbor_count >= min_samples_value) + is_core[idx] = 1U; + } + }); + } + // Stack-based expansion keeps the cluster growth iterative, avoiding recursion while allowing work reuse. std::vector stack; stack.reserve(count); + + // Recycle a neighbor buffer per cluster to amortize allocations across large components. std::vector neighbor_buffer; neighbor_buffer.reserve(64); int32_t next_label = 0; - for (std::size_t i = 0; i < count; ++i) { - if (!is_core[i] || labels[i] != -1) - continue; + { + ScopedTimer timer("cluster_expansion", perf_timing_); + for (std::size_t i = 0; i < count; ++i) { + if (!is_core[i] || labels[i] != -1) + continue; - labels[i] = next_label; - stack.clear(); - stack.push_back(static_cast(i)); - - while (!stack.empty()) { - const uint32_t current = stack.back(); - stack.pop_back(); - - neighbor_buffer.clear(); - for_each_neighbor(current, eps_value, x, y, cell_x, cell_y, ordered_indices, cell_offsets, unique_keys, - [&](uint32_t neighbor) { - neighbor_buffer.push_back(neighbor); - return true; - }); - - for (uint32_t neighbor : neighbor_buffer) { - if (labels[neighbor] == -1) { - labels[neighbor] = next_label; - if (is_core[neighbor]) - stack.push_back(neighbor); + labels[i] = next_label; + stack.clear(); + stack.push_back(static_cast(i)); + + while (!stack.empty()) { + const uint32_t current = stack.back(); + stack.pop_back(); + + neighbor_buffer.clear(); + // Capture neighbors into a buffer first so every touch of labels happens after the search, keeping the + // expansion phase deterministic regardless of how for_each_neighbor yields matches. + for_each_neighbor(current, eps_value, x, y, cell_x, cell_y, ordered_indices, cell_offsets, unique_keys, + [&](uint32_t neighbor) { + neighbor_buffer.push_back(neighbor); + return true; + }); + + for (uint32_t neighbor : neighbor_buffer) { + if (labels[neighbor] == -1) { + labels[neighbor] = next_label; + if (is_core[neighbor]) + stack.push_back(neighbor); + } } } - } - ++next_label; + ++next_label; + } } return labels; diff --git a/tests/test_dbscan_grid2d_l1.cpp b/tests/test_dbscan_grid2d_l1.cpp index bf6100a..7f977fa 100644 --- a/tests/test_dbscan_grid2d_l1.cpp +++ b/tests/test_dbscan_grid2d_l1.cpp @@ -9,6 +9,8 @@ namespace { +// The binary fixtures store Y first then X to mirror the grid compaction used in production; this loader +// preserves that ordering so the test exercises identical memory access patterns as the runtime path. void load_fixture(const std::filesystem::path &data_path, const std::filesystem::path &truth_path, std::vector &x_out, std::vector &y_out, std::vector &truth_out) { std::ifstream data_stream(data_path, std::ios::binary); @@ -54,6 +56,8 @@ void load_fixture(const std::filesystem::path &data_path, const std::filesystem: } // namespace TEST_CASE("DBSCANGrid2D_L1 clusters dense neighbors", "[dbscan][grid_l1]") { + // Points are arranged so the Manhattan frontier just connects the first three but leaves the outlier isolated, + // validating that the L1 grid expansion covers diagonals without absorbing distant noise. const std::vector x = {0, 1, 2, 100}; const std::vector y = {0, 0, 1, 200}; @@ -68,6 +72,7 @@ TEST_CASE("DBSCANGrid2D_L1 clusters dense neighbors", "[dbscan][grid_l1]") { } TEST_CASE("DBSCANGrid2D_L1 respects min_samples threshold", "[dbscan][grid_l1]") { + // Every point is deliberately spaced just beyond eps so we confirm the min_samples guard suppresses tiny clusters. const std::vector coords = {0, 2, 4}; dbscan::DBSCANGrid2D_L1 algo(3, 4); auto labels = algo.fit_predict(coords.data(), coords.data(), coords.size()); @@ -79,6 +84,7 @@ TEST_CASE("DBSCANGrid2D_L1 respects min_samples threshold", "[dbscan][grid_l1]") } TEST_CASE("DBSCANGrid2D_L1 matches fixture truth", "[dbscan][grid_l1]") { + // Fixture run mirrors the end-to-end validator to ensure the optimized grid path stays aligned with reference data. const std::filesystem::path root = std::filesystem::path{"tests"} / "data"; const auto data_path = root / "dbscan_static_data.bin"; const auto truth_path = root / "dbscan_static_truth.bin"; From 1773a490c4cb2808ef523a888e76654c64648971 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Wed, 1 Oct 2025 07:51:47 -0700 Subject: [PATCH 09/12] log perf timing --- include/perf_timer.h | 7 ++++++- tools/dbscan_dataset_validator.cpp | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/perf_timer.h b/include/perf_timer.h index 61e9881..7bc5843 100644 --- a/include/perf_timer.h +++ b/include/perf_timer.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -47,5 +48,9 @@ class ScopedTimer { std::chrono::steady_clock::time_point start_; }; -} // namespace dbscan +inline std::ostream &operator<<(std::ostream &out, const PerfTimingEntry &entry) { + out << entry.label << ": " << entry.duration_ms << " ms"; + return out; +} +} // namespace dbscan diff --git a/tools/dbscan_dataset_validator.cpp b/tools/dbscan_dataset_validator.cpp index 2c5a26e..684a48c 100644 --- a/tools/dbscan_dataset_validator.cpp +++ b/tools/dbscan_dataset_validator.cpp @@ -419,6 +419,12 @@ int main(int argc, char **argv) { const auto end = std::chrono::steady_clock::now(); const auto elapsed_ms = std::chrono::duration_cast(end - start).count(); std::cout << " done in " << elapsed_ms << " ms" << std::endl; + if (!grid_algo.perf_timing_.entries().empty()) { + // Emit step-level timings so dataset runs surface bottlenecks without separate profiling passes. + std::cout << "[grid_l1] component timings" << std::endl; + for (const auto &entry : grid_algo.perf_timing_.entries()) + std::cout << " " << entry << std::endl; + } results.push_back({"grid_l1", metrics}); if (options.mismatch_output_dir && !mismatches.empty()) { From a3a99b42b3ff87a126e94a2601ff51fc59d6b3d5 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Wed, 1 Oct 2025 08:25:54 -0700 Subject: [PATCH 10/12] multithreaded expansion --- include/dbscan_grid2d_l1.h | 20 +- src/dbscan_grid2d_l1.cpp | 327 +++++++++++++++++++++++++---- tests/test_dbscan_grid2d_l1.cpp | 37 +++- tools/dbscan_dataset_validator.cpp | 107 +++++++--- 4 files changed, 418 insertions(+), 73 deletions(-) diff --git a/include/dbscan_grid2d_l1.h b/include/dbscan_grid2d_l1.h index 8c51190..0a402f6 100644 --- a/include/dbscan_grid2d_l1.h +++ b/include/dbscan_grid2d_l1.h @@ -8,18 +8,36 @@ namespace dbscan { +enum class GridExpansionMode { + Sequential, + FrontierParallel, + UnionFind +}; + struct DBSCANGrid2D_L1 { uint32_t eps; uint32_t min_samples; std::size_t num_threads; std::size_t chunk_size; + GridExpansionMode expansion_mode; DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value, std::size_t num_threads_value = 0, - std::size_t chunk_size_value = 0); + std::size_t chunk_size_value = 0, GridExpansionMode mode_value = GridExpansionMode::Sequential); [[nodiscard]] std::vector fit_predict(const uint32_t *x, const uint32_t *y, std::size_t count); PerfTiming perf_timing_; }; +template +struct DBSCANGrid2D_L1T : DBSCANGrid2D_L1 { + DBSCANGrid2D_L1T(uint32_t eps_value, uint32_t min_samples_value, std::size_t num_threads_value = 0, + std::size_t chunk_size_value = 0) + : DBSCANGrid2D_L1(eps_value, min_samples_value, num_threads_value, chunk_size_value, Mode) {} +}; + +using DBSCANGrid2D_L1Frontier = DBSCANGrid2D_L1T; +using DBSCANGrid2D_L1UnionFind = DBSCANGrid2D_L1T; + } // namespace dbscan + diff --git a/src/dbscan_grid2d_l1.cpp b/src/dbscan_grid2d_l1.cpp index 1b324f2..f412207 100644 --- a/src/dbscan_grid2d_l1.cpp +++ b/src/dbscan_grid2d_l1.cpp @@ -1,10 +1,13 @@ #include "dbscan_grid2d_l1.h" #include +#include #include #include +#include #include #include +#include #include #include @@ -75,11 +78,269 @@ void for_each_neighbor(uint32_t point_index, uint32_t eps, const uint32_t *x, co } } +struct ExpansionContext { + const uint32_t *x; + const uint32_t *y; + std::size_t count; + uint32_t eps; + uint32_t min_samples; + const std::vector &cell_x; + const std::vector &cell_y; + const std::vector &ordered_indices; + const std::vector &cell_offsets; + const std::vector &unique_keys; + const std::vector &is_core; + std::size_t num_threads; + std::size_t chunk_size; +}; + +void sequential_expand(const ExpansionContext &ctx, std::vector &labels) { + std::vector stack; + stack.reserve(ctx.count); + std::vector neighbor_buffer; + neighbor_buffer.reserve(64); + + int32_t next_label = 0; + for (std::size_t i = 0; i < ctx.count; ++i) { + if (!ctx.is_core[i] || labels[i] != -1) + continue; + + labels[i] = next_label; + stack.clear(); + stack.push_back(static_cast(i)); + + while (!stack.empty()) { + const uint32_t current = stack.back(); + stack.pop_back(); + + neighbor_buffer.clear(); + // Capture neighbors into a buffer first so every touch of labels happens after the search, keeping the + // expansion phase deterministic regardless of how for_each_neighbor yields matches. + for_each_neighbor(current, ctx.eps, ctx.x, ctx.y, ctx.cell_x, ctx.cell_y, ctx.ordered_indices, ctx.cell_offsets, + ctx.unique_keys, [&](uint32_t neighbor) { + neighbor_buffer.push_back(neighbor); + return true; + }); + + for (uint32_t neighbor : neighbor_buffer) { + if (labels[neighbor] == -1) { + labels[neighbor] = next_label; + if (ctx.is_core[neighbor]) + stack.push_back(neighbor); + } + } + } + + ++next_label; + } +} + +void frontier_expand(const ExpansionContext &ctx, std::vector &labels); + +void union_find_expand(const ExpansionContext &ctx, std::vector &labels); + +void frontier_expand(const ExpansionContext &ctx, std::vector &labels) { + std::vector> shared_labels(ctx.count); + for (std::size_t i = 0; i < ctx.count; ++i) + shared_labels[i].store(labels[i], std::memory_order_relaxed); + + int32_t next_label = 0; + std::vector frontier; + frontier.reserve(256); + + const std::size_t frontier_chunk = ctx.chunk_size == 0 ? 64 : ctx.chunk_size; + + for (std::size_t seed = 0; seed < ctx.count; ++seed) { + if (!ctx.is_core[seed] || shared_labels[seed].load(std::memory_order_acquire) != -1) + continue; + + const int32_t label = next_label++; + shared_labels[seed].store(label, std::memory_order_release); + frontier.clear(); + frontier.push_back(static_cast(seed)); + + while (!frontier.empty()) { + std::vector next_frontier; + std::mutex next_mutex; + + utils::parallelize(0, frontier.size(), ctx.num_threads, frontier_chunk, [&](std::size_t begin, std::size_t end) { + std::vector local_next; + local_next.reserve(32); + std::vector neighbor_buffer; + neighbor_buffer.reserve(64); + + for (std::size_t idx = begin; idx < end; ++idx) { + const uint32_t current = frontier[idx]; + + neighbor_buffer.clear(); + for_each_neighbor(current, ctx.eps, ctx.x, ctx.y, ctx.cell_x, ctx.cell_y, ctx.ordered_indices, + ctx.cell_offsets, ctx.unique_keys, [&](uint32_t neighbor) { + neighbor_buffer.push_back(neighbor); + return true; + }); + + for (uint32_t neighbor : neighbor_buffer) { + if (ctx.is_core[neighbor]) { + int32_t expected = -1; + if (shared_labels[neighbor].compare_exchange_strong(expected, label, std::memory_order_acq_rel)) + local_next.push_back(neighbor); + } else { + int32_t expected = -1; + shared_labels[neighbor].compare_exchange_strong(expected, label, std::memory_order_acq_rel); + } + } + } + + if (!local_next.empty()) { + std::sort(local_next.begin(), local_next.end()); + local_next.erase(std::unique(local_next.begin(), local_next.end()), local_next.end()); + std::lock_guard lock(next_mutex); + next_frontier.insert(next_frontier.end(), local_next.begin(), local_next.end()); + } + }); + + if (next_frontier.empty()) + break; + + std::sort(next_frontier.begin(), next_frontier.end()); + next_frontier.erase(std::unique(next_frontier.begin(), next_frontier.end()), next_frontier.end()); + frontier.swap(next_frontier); + } + } + + for (std::size_t i = 0; i < ctx.count; ++i) + labels[i] = shared_labels[i].load(std::memory_order_relaxed); +} + +void union_find_expand(const ExpansionContext &ctx, std::vector &labels) { + constexpr uint32_t invalid = std::numeric_limits::max(); + std::vector> parents(ctx.count); + for (std::size_t i = 0; i < ctx.count; ++i) { + if (ctx.is_core[i]) + parents[i].store(static_cast(i), std::memory_order_relaxed); + else + parents[i].store(invalid, std::memory_order_relaxed); + } + + auto find_root = [&](uint32_t node) { + uint32_t parent = parents[node].load(std::memory_order_acquire); + if (parent == invalid) + return invalid; + + while (true) { + uint32_t grandparent = parents[parent].load(std::memory_order_acquire); + if (grandparent == parent) { + if (parent != node) + parents[node].store(parent, std::memory_order_release); + return parent; + } + parents[node].compare_exchange_strong(parent, grandparent, std::memory_order_acq_rel); + node = parent; + parent = parents[node].load(std::memory_order_acquire); + if (parent == invalid) + return invalid; + } + }; + + auto unite = [&](uint32_t a, uint32_t b) { + while (true) { + a = find_root(a); + b = find_root(b); + if (a == invalid || b == invalid || a == b) + return; + + if (a < b) { + uint32_t expected = b; + if (parents[b].compare_exchange_strong(expected, a, std::memory_order_acq_rel)) + return; + } else { + uint32_t expected = a; + if (parents[a].compare_exchange_strong(expected, b, std::memory_order_acq_rel)) + return; + } + } + }; + + const std::size_t union_chunk = ctx.chunk_size == 0 ? 512 : ctx.chunk_size; + utils::parallelize(0, ctx.count, ctx.num_threads, union_chunk, [&](std::size_t begin, std::size_t end) { + for (std::size_t idx = begin; idx < end; ++idx) { + if (!ctx.is_core[idx]) + continue; + + for_each_neighbor(static_cast(idx), ctx.eps, ctx.x, ctx.y, ctx.cell_x, ctx.cell_y, ctx.ordered_indices, + ctx.cell_offsets, ctx.unique_keys, [&](uint32_t neighbor) { + if (ctx.is_core[neighbor]) + unite(static_cast(idx), neighbor); + return true; + }); + } + }); + + std::vector root_for_point(ctx.count, invalid); + for (std::size_t i = 0; i < ctx.count; ++i) { + if (ctx.is_core[i]) + root_for_point[i] = find_root(static_cast(i)); + } + + std::vector component_min(ctx.count, invalid); + for (std::size_t i = 0; i < ctx.count; ++i) { + if (!ctx.is_core[i]) + continue; + const uint32_t root = root_for_point[i]; + if (root == invalid) + continue; + if (component_min[root] > i) + component_min[root] = static_cast(i); + } + + std::vector> components; + components.reserve(ctx.count); + for (std::size_t i = 0; i < ctx.count; ++i) { + if (component_min[i] != invalid) + components.emplace_back(component_min[i], static_cast(i)); + } + + std::sort(components.begin(), components.end()); + + std::vector root_label(ctx.count, -1); + int32_t next_label = 0; + for (const auto &[min_index, root] : components) { + (void)min_index; + root_label[root] = next_label++; + } + + for (std::size_t i = 0; i < ctx.count; ++i) { + if (!ctx.is_core[i]) + continue; + const uint32_t root = root_for_point[i]; + if (root == invalid) + continue; + labels[i] = root_label[root]; + } + + for (std::size_t i = 0; i < ctx.count; ++i) { + if (ctx.is_core[i]) + continue; + + int32_t best_label = -1; + for_each_neighbor(static_cast(i), ctx.eps, ctx.x, ctx.y, ctx.cell_x, ctx.cell_y, ctx.ordered_indices, + ctx.cell_offsets, ctx.unique_keys, [&](uint32_t neighbor) { + if (!ctx.is_core[neighbor]) + return true; + const int32_t candidate = labels[neighbor]; + if (candidate != -1 && (best_label == -1 || candidate < best_label)) + best_label = candidate; + return true; + }); + labels[i] = best_label; + } +} } // namespace DBSCANGrid2D_L1::DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value, std::size_t num_threads_value, - std::size_t chunk_size_value) - : eps(eps_value), min_samples(min_samples_value), num_threads(num_threads_value), chunk_size(chunk_size_value) { + std::size_t chunk_size_value, GridExpansionMode mode_value) + : eps(eps_value), min_samples(min_samples_value), num_threads(num_threads_value), chunk_size(chunk_size_value), + expansion_mode(mode_value) { if (eps == 0) throw std::invalid_argument("eps must be greater than zero for DBSCANGrid2D_L1"); if (min_samples == 0) @@ -178,48 +439,32 @@ std::vector DBSCANGrid2D_L1::fit_predict(const uint32_t *x, const uint3 }); } - // Stack-based expansion keeps the cluster growth iterative, avoiding recursion while allowing work reuse. - std::vector stack; - stack.reserve(count); - - // Recycle a neighbor buffer per cluster to amortize allocations across large components. - std::vector neighbor_buffer; - neighbor_buffer.reserve(64); + const ExpansionContext context{x, + y, + count, + eps_value, + min_samples_value, + cell_x, + cell_y, + ordered_indices, + cell_offsets, + unique_keys, + is_core, + num_threads, + chunk_size}; - int32_t next_label = 0; { ScopedTimer timer("cluster_expansion", perf_timing_); - for (std::size_t i = 0; i < count; ++i) { - if (!is_core[i] || labels[i] != -1) - continue; - - labels[i] = next_label; - stack.clear(); - stack.push_back(static_cast(i)); - - while (!stack.empty()) { - const uint32_t current = stack.back(); - stack.pop_back(); - - neighbor_buffer.clear(); - // Capture neighbors into a buffer first so every touch of labels happens after the search, keeping the - // expansion phase deterministic regardless of how for_each_neighbor yields matches. - for_each_neighbor(current, eps_value, x, y, cell_x, cell_y, ordered_indices, cell_offsets, unique_keys, - [&](uint32_t neighbor) { - neighbor_buffer.push_back(neighbor); - return true; - }); - - for (uint32_t neighbor : neighbor_buffer) { - if (labels[neighbor] == -1) { - labels[neighbor] = next_label; - if (is_core[neighbor]) - stack.push_back(neighbor); - } - } - } - - ++next_label; + switch (expansion_mode) { + case GridExpansionMode::Sequential: + sequential_expand(context, labels); + break; + case GridExpansionMode::FrontierParallel: + frontier_expand(context, labels); + break; + case GridExpansionMode::UnionFind: + union_find_expand(context, labels); + break; } } diff --git a/tests/test_dbscan_grid2d_l1.cpp b/tests/test_dbscan_grid2d_l1.cpp index 7f977fa..b08b0d0 100644 --- a/tests/test_dbscan_grid2d_l1.cpp +++ b/tests/test_dbscan_grid2d_l1.cpp @@ -9,6 +9,17 @@ namespace { +std::filesystem::path fixture_root() { + const std::filesystem::path candidates[] = {std::filesystem::path{"tests"} / "data", + std::filesystem::path{".."} / "tests" / "data", + std::filesystem::path{".."} / ".." / "tests" / "data"}; + for (const auto &candidate : candidates) { + if (std::filesystem::exists(candidate)) + return candidate; + } + return candidates[0]; +} + // The binary fixtures store Y first then X to mirror the grid compaction used in production; this loader // preserves that ordering so the test exercises identical memory access patterns as the runtime path. void load_fixture(const std::filesystem::path &data_path, const std::filesystem::path &truth_path, @@ -85,7 +96,7 @@ TEST_CASE("DBSCANGrid2D_L1 respects min_samples threshold", "[dbscan][grid_l1]") TEST_CASE("DBSCANGrid2D_L1 matches fixture truth", "[dbscan][grid_l1]") { // Fixture run mirrors the end-to-end validator to ensure the optimized grid path stays aligned with reference data. - const std::filesystem::path root = std::filesystem::path{"tests"} / "data"; + const std::filesystem::path root = fixture_root(); const auto data_path = root / "dbscan_static_data.bin"; const auto truth_path = root / "dbscan_static_truth.bin"; @@ -99,4 +110,28 @@ TEST_CASE("DBSCANGrid2D_L1 matches fixture truth", "[dbscan][grid_l1]") { REQUIRE(labels.size() == truth.size()); REQUIRE(labels == truth); + + dbscan::DBSCANGrid2D_L1Frontier frontier_algo(10, 3, 4); + auto frontier_labels = frontier_algo.fit_predict(x.data(), y.data(), x.size()); + REQUIRE(frontier_labels == truth); + + dbscan::DBSCANGrid2D_L1UnionFind union_algo(10, 3, 4); + auto union_labels = union_algo.fit_predict(x.data(), y.data(), x.size()); + REQUIRE(union_labels == truth); +} + +TEST_CASE("DBSCANGrid2D_L1 parallel variants align with sequential", "[dbscan][grid_l1][parallel]") { + const std::vector x = {0, 1, 2, 5, 40}; + const std::vector y = {0, 0, 1, 4, 80}; + + dbscan::DBSCANGrid2D_L1 sequential(6, 3); + const auto expected = sequential.fit_predict(x.data(), y.data(), x.size()); + + dbscan::DBSCANGrid2D_L1Frontier frontier(6, 3, 4); + auto frontier_labels = frontier.fit_predict(x.data(), y.data(), x.size()); + REQUIRE(frontier_labels == expected); + + dbscan::DBSCANGrid2D_L1UnionFind union_algo(6, 3, 4); + auto union_labels = union_algo.fit_predict(x.data(), y.data(), x.size()); + REQUIRE(union_labels == expected); } diff --git a/tools/dbscan_dataset_validator.cpp b/tools/dbscan_dataset_validator.cpp index 684a48c..88ad292 100644 --- a/tools/dbscan_dataset_validator.cpp +++ b/tools/dbscan_dataset_validator.cpp @@ -29,13 +29,15 @@ struct Options { bool run_baseline{true}; bool run_optimized{true}; bool run_grid_l1{false}; + std::vector grid_modes; std::optional mismatch_output_dir; }; void print_usage(const char *program_name) { std::cout << "Usage: " << program_name << " [--data ] [--truth ] [--eps ] [--min-samples ]" - << " [--impl baseline|optimized|grid|both|all] [--dump-mismatches ]\n"; + << " [--impl baseline|optimized|grid|grid_frontier|grid_union|grid_all|both|all]" + << " [--dump-mismatches ]\n"; } Options parse_arguments(int argc, char **argv) { @@ -65,30 +67,54 @@ Options parse_arguments(int argc, char **argv) { options.min_samples = static_cast(std::stoi(argv[++i])); } else if (arg == "--impl") { if (i + 1 >= argc) - throw std::invalid_argument("--impl expects one of: baseline, optimized, grid, both, all"); + throw std::invalid_argument( + "--impl expects one of: baseline, optimized, grid, grid_frontier, grid_union, grid_all, both, all"); const std::string value{argv[++i]}; if (value == "baseline") { options.run_baseline = true; options.run_optimized = false; options.run_grid_l1 = false; + options.grid_modes.clear(); } else if (value == "optimized") { options.run_baseline = false; options.run_optimized = true; options.run_grid_l1 = false; + options.grid_modes.clear(); } else if (value == "grid" || value == "grid_l1") { options.run_baseline = false; options.run_optimized = false; options.run_grid_l1 = true; + options.grid_modes = {dbscan::GridExpansionMode::Sequential}; + } else if (value == "grid_frontier") { + options.run_baseline = false; + options.run_optimized = false; + options.run_grid_l1 = true; + options.grid_modes = {dbscan::GridExpansionMode::FrontierParallel}; + } else if (value == "grid_union" || value == "grid_union_find") { + options.run_baseline = false; + options.run_optimized = false; + options.run_grid_l1 = true; + options.grid_modes = {dbscan::GridExpansionMode::UnionFind}; + } else if (value == "grid_all") { + options.run_baseline = false; + options.run_optimized = false; + options.run_grid_l1 = true; + options.grid_modes = {dbscan::GridExpansionMode::Sequential, dbscan::GridExpansionMode::FrontierParallel, + dbscan::GridExpansionMode::UnionFind}; } else if (value == "both") { options.run_baseline = true; options.run_optimized = true; options.run_grid_l1 = false; + options.grid_modes.clear(); } else if (value == "all") { options.run_baseline = true; options.run_optimized = true; options.run_grid_l1 = true; + if (options.grid_modes.empty()) + options.grid_modes = {dbscan::GridExpansionMode::Sequential}; } else { - throw std::invalid_argument("--impl expects one of: baseline, optimized, grid, both, all"); + throw std::invalid_argument( + "--impl expects one of: baseline, optimized, grid, grid_frontier, grid_union, grid_all, both, all"); } } else if (arg == "--dump-mismatches") { if (i + 1 >= argc) @@ -103,6 +129,8 @@ Options parse_arguments(int argc, char **argv) { throw std::invalid_argument("--eps must be positive"); if (options.min_samples <= 0) throw std::invalid_argument("--min-samples must be positive"); + if (options.run_grid_l1 && options.grid_modes.empty()) + options.grid_modes = {dbscan::GridExpansionMode::Sequential}; return options; } @@ -402,6 +430,22 @@ int main(int argc, char **argv) { } } + auto grid_variant_info = [](dbscan::GridExpansionMode mode) { + struct VariantInfo { + std::string label; + std::string slug; + }; + switch (mode) { + case dbscan::GridExpansionMode::Sequential: + return VariantInfo{"grid_l1", "grid_l1"}; + case dbscan::GridExpansionMode::FrontierParallel: + return VariantInfo{"grid_l1_frontier", "grid_l1_frontier"}; + case dbscan::GridExpansionMode::UnionFind: + return VariantInfo{"grid_l1_union", "grid_l1_union"}; + } + return VariantInfo{"grid_l1", "grid_l1"}; + }; + if (options.run_grid_l1) { const auto eps_int = static_cast(std::llround(options.eps)); if (std::fabs(options.eps - static_cast(eps_int)) > 1e-6) { @@ -409,33 +453,36 @@ int main(int argc, char **argv) { } if (x_coords.size() != y_coords.size()) throw std::runtime_error("Mismatch between x and y coordinate counts"); - - std::cout << "\n[grid_l1] Running clustering..." << std::flush; - const auto start = std::chrono::steady_clock::now(); - dbscan::DBSCANGrid2D_L1 grid_algo(eps_int, static_cast(options.min_samples)); - const auto labels = grid_algo.fit_predict(x_coords.data(), y_coords.data(), x_coords.size()); - std::vector mismatches; - const auto metrics = evaluate(labels, truth_labels, options.mismatch_output_dir ? &mismatches : nullptr); - const auto end = std::chrono::steady_clock::now(); - const auto elapsed_ms = std::chrono::duration_cast(end - start).count(); - std::cout << " done in " << elapsed_ms << " ms" << std::endl; - if (!grid_algo.perf_timing_.entries().empty()) { - // Emit step-level timings so dataset runs surface bottlenecks without separate profiling passes. - std::cout << "[grid_l1] component timings" << std::endl; - for (const auto &entry : grid_algo.perf_timing_.entries()) - std::cout << " " << entry << std::endl; - } - results.push_back({"grid_l1", metrics}); - - if (options.mismatch_output_dir && !mismatches.empty()) { - std::filesystem::create_directories(*options.mismatch_output_dir); - auto file_path = *options.mismatch_output_dir / "grid_l1_mismatches.txt"; - std::ofstream out(file_path); - if (!out) - throw std::runtime_error("Failed to open mismatch output file: " + file_path.string()); - for (std::size_t index : mismatches) - out << index << '\n'; - std::cout << "[grid_l1] Wrote " << mismatches.size() << " mismatches to " << file_path << "\n"; + for (auto mode : options.grid_modes) { + const auto info = grid_variant_info(mode); + std::cout << "\n[" << info.label << "] Running clustering..." << std::flush; + const auto start = std::chrono::steady_clock::now(); + dbscan::DBSCANGrid2D_L1 grid_algo(eps_int, static_cast(options.min_samples), 0, 0, mode); + const auto labels = grid_algo.fit_predict(x_coords.data(), y_coords.data(), x_coords.size()); + std::vector mismatches; + const auto metrics = evaluate(labels, truth_labels, options.mismatch_output_dir ? &mismatches : nullptr); + const auto end = std::chrono::steady_clock::now(); + const auto elapsed_ms = std::chrono::duration_cast(end - start).count(); + std::cout << " done in " << elapsed_ms << " ms" << std::endl; + if (!grid_algo.perf_timing_.entries().empty()) { + // Emit step-level timings so dataset runs surface bottlenecks without separate profiling passes. + std::cout << "[" << info.label << "] component timings" << std::endl; + for (const auto &entry : grid_algo.perf_timing_.entries()) + std::cout << " " << entry << std::endl; + } + results.push_back({info.label, metrics}); + + if (options.mismatch_output_dir && !mismatches.empty()) { + std::filesystem::create_directories(*options.mismatch_output_dir); + auto file_path = *options.mismatch_output_dir / (info.slug + "_mismatches.txt"); + std::ofstream out(file_path); + if (!out) + throw std::runtime_error("Failed to open mismatch output file: " + file_path.string()); + for (std::size_t index : mismatches) + out << index << '\n'; + std::cout << "[" << info.label << "] Wrote " << mismatches.size() << " mismatches to " << file_path + << "\n"; + } } } From 82d7fef1954d009455be15e3f57f9ce24a7ba93a Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Thu, 2 Oct 2025 08:15:05 -0700 Subject: [PATCH 11/12] commit --- CMakeLists.txt | 1 + benchmark/benchmark_dbscan.cpp | 8 +- include/dbscan_grid2d_l1.h | 36 +-- src/dbscan_grid2d_l1.cpp | 477 +---------------------------- src/dbscan_grid2d_l1_aos.cpp | 25 ++ src/dbscan_grid2d_l1_impl.hpp | 459 +++++++++++++++++++++++++++ tests/test_dbscan_grid2d_l1.cpp | 67 ++-- tools/dbscan_dataset_validator.cpp | 16 +- 8 files changed, 568 insertions(+), 521 deletions(-) create mode 100644 src/dbscan_grid2d_l1_aos.cpp create mode 100644 src/dbscan_grid2d_l1_impl.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bcc6c50..0ed6314 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,7 @@ add_library(dbscan STATIC src/dbscan.cpp src/dbscan_optimized.cpp src/dbscan_grid2d_l1.cpp + src/dbscan_grid2d_l1_aos.cpp ) target_include_directories(dbscan diff --git a/benchmark/benchmark_dbscan.cpp b/benchmark/benchmark_dbscan.cpp index 95fc230..55b0584 100644 --- a/benchmark/benchmark_dbscan.cpp +++ b/benchmark/benchmark_dbscan.cpp @@ -106,9 +106,11 @@ int main() { const std::string label = "grid-l1 " + std::to_string(total_points) + " pts threads=" + (thread_count == 0 ? std::string("auto") : std::to_string(thread_count)); bench.run(label, [&]() { - dbscan::DBSCANGrid2D_L1 algo(eps, min_samples, thread_count); - auto labels = algo.fit_predict(dataset.x.data(), dataset.y.data(), total_points); - ankerl::nanobench::doNotOptimizeAway(labels); + dbscan::DBSCANGrid2DL1Params params{eps, min_samples}; + params.num_threads = thread_count; + auto result = + dbscan::dbscan_grid2d_l1(dataset.x.data(), 1, dataset.y.data(), 1, total_points, params); + ankerl::nanobench::doNotOptimizeAway(result.labels); }); } } diff --git a/include/dbscan_grid2d_l1.h b/include/dbscan_grid2d_l1.h index 0a402f6..c3e91c1 100644 --- a/include/dbscan_grid2d_l1.h +++ b/include/dbscan_grid2d_l1.h @@ -14,30 +14,30 @@ enum class GridExpansionMode { UnionFind }; -struct DBSCANGrid2D_L1 { +struct DBSCANGrid2DL1Params { uint32_t eps; uint32_t min_samples; - std::size_t num_threads; - std::size_t chunk_size; - GridExpansionMode expansion_mode; - - DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value, std::size_t num_threads_value = 0, - std::size_t chunk_size_value = 0, GridExpansionMode mode_value = GridExpansionMode::Sequential); - - [[nodiscard]] std::vector fit_predict(const uint32_t *x, const uint32_t *y, std::size_t count); + std::size_t num_threads = 0; + std::size_t chunk_size = 0; +}; - PerfTiming perf_timing_; +struct Grid2DPoint { + uint32_t x; + uint32_t y; }; -template -struct DBSCANGrid2D_L1T : DBSCANGrid2D_L1 { - DBSCANGrid2D_L1T(uint32_t eps_value, uint32_t min_samples_value, std::size_t num_threads_value = 0, - std::size_t chunk_size_value = 0) - : DBSCANGrid2D_L1(eps_value, min_samples_value, num_threads_value, chunk_size_value, Mode) {} +struct DBSCANGrid2DL1Result { + std::vector labels; + PerfTiming perf_timing; }; -using DBSCANGrid2D_L1Frontier = DBSCANGrid2D_L1T; -using DBSCANGrid2D_L1UnionFind = DBSCANGrid2D_L1T; +[[nodiscard]] DBSCANGrid2DL1Result dbscan_grid2d_l1(const uint32_t *x, std::size_t x_stride, const uint32_t *y, + std::size_t y_stride, std::size_t count, + const DBSCANGrid2DL1Params ¶ms, + GridExpansionMode mode = GridExpansionMode::Sequential); -} // namespace dbscan +[[nodiscard]] DBSCANGrid2DL1Result dbscan_grid2d_l1_aos(const Grid2DPoint *points, std::size_t count, + const DBSCANGrid2DL1Params ¶ms, + GridExpansionMode mode = GridExpansionMode::Sequential); +} // namespace dbscan diff --git a/src/dbscan_grid2d_l1.cpp b/src/dbscan_grid2d_l1.cpp index f412207..8e3cdc6 100644 --- a/src/dbscan_grid2d_l1.cpp +++ b/src/dbscan_grid2d_l1.cpp @@ -1,474 +1,23 @@ #include "dbscan_grid2d_l1.h" -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include "parallel.hpp" -#include "perf_timer.h" +#include "dbscan_grid2d_l1_impl.hpp" namespace dbscan { -namespace { - -// Compact 2D cell coordinates into a sortable key so we can reuse std::sort rather than bespoke grid maps. -constexpr uint64_t pack_cell(uint32_t ix, uint32_t iy) noexcept { - return (static_cast(ix) << 32U) | static_cast(iy); -} - -constexpr uint32_t cell_of(uint32_t value, uint32_t cell_size) noexcept { - return cell_size == 0 ? value : static_cast(value / cell_size); -} - -// Neighbors are explored by scanning adjacent grid cells so that the expensive L1 radius test only touches -// candidates that share the precomputed bucket, keeping the branch predictor in our favor. -template -void for_each_neighbor(uint32_t point_index, uint32_t eps, const uint32_t *x, const uint32_t *y, - const std::vector &cell_x, const std::vector &cell_y, - const std::vector &ordered_indices, const std::vector &cell_offsets, - const std::vector &unique_keys, Fn &&fn) { - - const uint32_t base_cx = cell_x[point_index]; - const uint32_t base_cy = cell_y[point_index]; - - for (int dx = -1; dx <= 1; ++dx) { - const int64_t nx = static_cast(base_cx) + dx; - if (nx < 0) - continue; - - for (int dy = -1; dy <= 1; ++dy) { - const int64_t ny = static_cast(base_cy) + dy; - if (ny < 0) - continue; - - const uint64_t key = pack_cell(static_cast(nx), static_cast(ny)); - const auto it = std::lower_bound(unique_keys.begin(), unique_keys.end(), key); - if (it == unique_keys.end() || *it != key) - continue; - - const std::size_t cell_idx = static_cast(std::distance(unique_keys.begin(), it)); - const std::size_t begin = cell_offsets[cell_idx]; - const std::size_t end = cell_offsets[cell_idx + 1]; - - for (std::size_t pos = begin; pos < end; ++pos) { - const uint32_t neighbor_idx = ordered_indices[pos]; - - const uint32_t x_a = x[point_index]; - const uint32_t y_a = y[point_index]; - const uint32_t x_b = x[neighbor_idx]; - const uint32_t y_b = y[neighbor_idx]; - - const uint32_t dx_abs = x_a > x_b ? x_a - x_b : x_b - x_a; - const uint32_t dy_abs = y_a > y_b ? y_a - y_b : y_b - y_a; - const uint64_t manhattan = static_cast(dx_abs) + static_cast(dy_abs); - - if (manhattan <= static_cast(eps)) { - if (!fn(neighbor_idx)) - return; - } - } - } - } -} - -struct ExpansionContext { - const uint32_t *x; - const uint32_t *y; - std::size_t count; - uint32_t eps; - uint32_t min_samples; - const std::vector &cell_x; - const std::vector &cell_y; - const std::vector &ordered_indices; - const std::vector &cell_offsets; - const std::vector &unique_keys; - const std::vector &is_core; - std::size_t num_threads; - std::size_t chunk_size; -}; - -void sequential_expand(const ExpansionContext &ctx, std::vector &labels) { - std::vector stack; - stack.reserve(ctx.count); - std::vector neighbor_buffer; - neighbor_buffer.reserve(64); - - int32_t next_label = 0; - for (std::size_t i = 0; i < ctx.count; ++i) { - if (!ctx.is_core[i] || labels[i] != -1) - continue; - - labels[i] = next_label; - stack.clear(); - stack.push_back(static_cast(i)); - - while (!stack.empty()) { - const uint32_t current = stack.back(); - stack.pop_back(); - - neighbor_buffer.clear(); - // Capture neighbors into a buffer first so every touch of labels happens after the search, keeping the - // expansion phase deterministic regardless of how for_each_neighbor yields matches. - for_each_neighbor(current, ctx.eps, ctx.x, ctx.y, ctx.cell_x, ctx.cell_y, ctx.ordered_indices, ctx.cell_offsets, - ctx.unique_keys, [&](uint32_t neighbor) { - neighbor_buffer.push_back(neighbor); - return true; - }); - - for (uint32_t neighbor : neighbor_buffer) { - if (labels[neighbor] == -1) { - labels[neighbor] = next_label; - if (ctx.is_core[neighbor]) - stack.push_back(neighbor); - } - } - } - - ++next_label; - } -} - -void frontier_expand(const ExpansionContext &ctx, std::vector &labels); - -void union_find_expand(const ExpansionContext &ctx, std::vector &labels); - -void frontier_expand(const ExpansionContext &ctx, std::vector &labels) { - std::vector> shared_labels(ctx.count); - for (std::size_t i = 0; i < ctx.count; ++i) - shared_labels[i].store(labels[i], std::memory_order_relaxed); - - int32_t next_label = 0; - std::vector frontier; - frontier.reserve(256); - - const std::size_t frontier_chunk = ctx.chunk_size == 0 ? 64 : ctx.chunk_size; - - for (std::size_t seed = 0; seed < ctx.count; ++seed) { - if (!ctx.is_core[seed] || shared_labels[seed].load(std::memory_order_acquire) != -1) - continue; - - const int32_t label = next_label++; - shared_labels[seed].store(label, std::memory_order_release); - frontier.clear(); - frontier.push_back(static_cast(seed)); - - while (!frontier.empty()) { - std::vector next_frontier; - std::mutex next_mutex; - - utils::parallelize(0, frontier.size(), ctx.num_threads, frontier_chunk, [&](std::size_t begin, std::size_t end) { - std::vector local_next; - local_next.reserve(32); - std::vector neighbor_buffer; - neighbor_buffer.reserve(64); - - for (std::size_t idx = begin; idx < end; ++idx) { - const uint32_t current = frontier[idx]; - - neighbor_buffer.clear(); - for_each_neighbor(current, ctx.eps, ctx.x, ctx.y, ctx.cell_x, ctx.cell_y, ctx.ordered_indices, - ctx.cell_offsets, ctx.unique_keys, [&](uint32_t neighbor) { - neighbor_buffer.push_back(neighbor); - return true; - }); - - for (uint32_t neighbor : neighbor_buffer) { - if (ctx.is_core[neighbor]) { - int32_t expected = -1; - if (shared_labels[neighbor].compare_exchange_strong(expected, label, std::memory_order_acq_rel)) - local_next.push_back(neighbor); - } else { - int32_t expected = -1; - shared_labels[neighbor].compare_exchange_strong(expected, label, std::memory_order_acq_rel); - } - } - } - - if (!local_next.empty()) { - std::sort(local_next.begin(), local_next.end()); - local_next.erase(std::unique(local_next.begin(), local_next.end()), local_next.end()); - std::lock_guard lock(next_mutex); - next_frontier.insert(next_frontier.end(), local_next.begin(), local_next.end()); - } - }); - - if (next_frontier.empty()) - break; - - std::sort(next_frontier.begin(), next_frontier.end()); - next_frontier.erase(std::unique(next_frontier.begin(), next_frontier.end()), next_frontier.end()); - frontier.swap(next_frontier); - } - } - - for (std::size_t i = 0; i < ctx.count; ++i) - labels[i] = shared_labels[i].load(std::memory_order_relaxed); -} - -void union_find_expand(const ExpansionContext &ctx, std::vector &labels) { - constexpr uint32_t invalid = std::numeric_limits::max(); - std::vector> parents(ctx.count); - for (std::size_t i = 0; i < ctx.count; ++i) { - if (ctx.is_core[i]) - parents[i].store(static_cast(i), std::memory_order_relaxed); - else - parents[i].store(invalid, std::memory_order_relaxed); - } - - auto find_root = [&](uint32_t node) { - uint32_t parent = parents[node].load(std::memory_order_acquire); - if (parent == invalid) - return invalid; - - while (true) { - uint32_t grandparent = parents[parent].load(std::memory_order_acquire); - if (grandparent == parent) { - if (parent != node) - parents[node].store(parent, std::memory_order_release); - return parent; - } - parents[node].compare_exchange_strong(parent, grandparent, std::memory_order_acq_rel); - node = parent; - parent = parents[node].load(std::memory_order_acquire); - if (parent == invalid) - return invalid; - } - }; - - auto unite = [&](uint32_t a, uint32_t b) { - while (true) { - a = find_root(a); - b = find_root(b); - if (a == invalid || b == invalid || a == b) - return; - - if (a < b) { - uint32_t expected = b; - if (parents[b].compare_exchange_strong(expected, a, std::memory_order_acq_rel)) - return; - } else { - uint32_t expected = a; - if (parents[a].compare_exchange_strong(expected, b, std::memory_order_acq_rel)) - return; - } - } - }; - - const std::size_t union_chunk = ctx.chunk_size == 0 ? 512 : ctx.chunk_size; - utils::parallelize(0, ctx.count, ctx.num_threads, union_chunk, [&](std::size_t begin, std::size_t end) { - for (std::size_t idx = begin; idx < end; ++idx) { - if (!ctx.is_core[idx]) - continue; - - for_each_neighbor(static_cast(idx), ctx.eps, ctx.x, ctx.y, ctx.cell_x, ctx.cell_y, ctx.ordered_indices, - ctx.cell_offsets, ctx.unique_keys, [&](uint32_t neighbor) { - if (ctx.is_core[neighbor]) - unite(static_cast(idx), neighbor); - return true; - }); - } - }); - - std::vector root_for_point(ctx.count, invalid); - for (std::size_t i = 0; i < ctx.count; ++i) { - if (ctx.is_core[i]) - root_for_point[i] = find_root(static_cast(i)); - } - - std::vector component_min(ctx.count, invalid); - for (std::size_t i = 0; i < ctx.count; ++i) { - if (!ctx.is_core[i]) - continue; - const uint32_t root = root_for_point[i]; - if (root == invalid) - continue; - if (component_min[root] > i) - component_min[root] = static_cast(i); - } - - std::vector> components; - components.reserve(ctx.count); - for (std::size_t i = 0; i < ctx.count; ++i) { - if (component_min[i] != invalid) - components.emplace_back(component_min[i], static_cast(i)); - } - - std::sort(components.begin(), components.end()); - - std::vector root_label(ctx.count, -1); - int32_t next_label = 0; - for (const auto &[min_index, root] : components) { - (void)min_index; - root_label[root] = next_label++; - } - - for (std::size_t i = 0; i < ctx.count; ++i) { - if (!ctx.is_core[i]) - continue; - const uint32_t root = root_for_point[i]; - if (root == invalid) - continue; - labels[i] = root_label[root]; - } - - for (std::size_t i = 0; i < ctx.count; ++i) { - if (ctx.is_core[i]) - continue; - - int32_t best_label = -1; - for_each_neighbor(static_cast(i), ctx.eps, ctx.x, ctx.y, ctx.cell_x, ctx.cell_y, ctx.ordered_indices, - ctx.cell_offsets, ctx.unique_keys, [&](uint32_t neighbor) { - if (!ctx.is_core[neighbor]) - return true; - const int32_t candidate = labels[neighbor]; - if (candidate != -1 && (best_label == -1 || candidate < best_label)) - best_label = candidate; - return true; - }); - labels[i] = best_label; - } -} -} // namespace - -DBSCANGrid2D_L1::DBSCANGrid2D_L1(uint32_t eps_value, uint32_t min_samples_value, std::size_t num_threads_value, - std::size_t chunk_size_value, GridExpansionMode mode_value) - : eps(eps_value), min_samples(min_samples_value), num_threads(num_threads_value), chunk_size(chunk_size_value), - expansion_mode(mode_value) { - if (eps == 0) - throw std::invalid_argument("eps must be greater than zero for DBSCANGrid2D_L1"); - if (min_samples == 0) - throw std::invalid_argument("min_samples must be greater than zero for DBSCANGrid2D_L1"); -} - -std::vector DBSCANGrid2D_L1::fit_predict(const uint32_t *x, const uint32_t *y, std::size_t count) { - if (x == nullptr || y == nullptr) - throw std::invalid_argument("Input coordinate arrays must be non-null"); - - if (count == 0) - return {}; - - perf_timing_.clear(); - ScopedTimer total_timer("total", perf_timing_); - - const uint32_t cell_size = eps; - - std::vector cell_x(count); - std::vector cell_y(count); - std::vector keys(count); - std::vector ordered_indices(count); - std::iota(ordered_indices.begin(), ordered_indices.end(), 0U); - - const std::size_t index_chunk = chunk_size == 0 ? 1024 : chunk_size; - { - // Precompute grid placements in parallel so later stages can stay read-only and avoid rehashing coordinates. - ScopedTimer timer("precompute_cells", perf_timing_); - utils::parallelize(0, count, num_threads, index_chunk, [&](std::size_t begin, std::size_t end) { - for (std::size_t i = begin; i < end; ++i) { - const uint32_t cx = cell_of(x[i], cell_size); - const uint32_t cy = cell_of(y[i], cell_size); - cell_x[i] = cx; - cell_y[i] = cy; - keys[i] = pack_cell(cx, cy); - } - }); - } - - { - // Sorting indices by packed cell ensures neighbors form contiguous spans which we can scan without lookups. - ScopedTimer timer("sort_indices", perf_timing_); - std::sort(ordered_indices.begin(), ordered_indices.end(), [&](uint32_t lhs, uint32_t rhs) { - const uint64_t key_lhs = keys[lhs]; - const uint64_t key_rhs = keys[rhs]; - if (key_lhs == key_rhs) - return lhs < rhs; - return key_lhs < key_rhs; - }); - } - - std::vector cell_offsets; - cell_offsets.reserve(count + 1); - std::vector unique_keys; - unique_keys.reserve(count); - - { - // Build a CSR-style view of the sorted cells so we can jump directly to the occupants of any neighboring bucket. - ScopedTimer timer("build_cell_offsets", perf_timing_); - std::size_t pos = 0; - while (pos < count) { - const uint64_t key = keys[ordered_indices[pos]]; - unique_keys.push_back(key); - cell_offsets.push_back(pos); - - do { - ++pos; - } while (pos < count && keys[ordered_indices[pos]] == key); - } - cell_offsets.push_back(count); - } - - std::vector labels(count, -1); - std::vector is_core(count, 0U); - - const uint32_t eps_value = eps; - const uint32_t min_samples_value = min_samples; - - const std::size_t core_chunk = chunk_size == 0 ? 512 : chunk_size; - { - // Core detection runs as an isolated pass so expansion can treat label writes as the only mutation, simplifying - // synchronization even when the search function is invoked concurrently. - ScopedTimer timer("core_detection", perf_timing_); - utils::parallelize(0, count, num_threads, core_chunk, [&](std::size_t begin, std::size_t end) { - for (std::size_t idx = begin; idx < end; ++idx) { - uint32_t neighbor_count = 0; - for_each_neighbor(static_cast(idx), eps_value, x, y, cell_x, cell_y, ordered_indices, cell_offsets, - unique_keys, [&](uint32_t) { - ++neighbor_count; - return neighbor_count < min_samples_value; - }); - - if (neighbor_count >= min_samples_value) - is_core[idx] = 1U; - } - }); - } - - const ExpansionContext context{x, - y, - count, - eps_value, - min_samples_value, - cell_x, - cell_y, - ordered_indices, - cell_offsets, - unique_keys, - is_core, - num_threads, - chunk_size}; - - { - ScopedTimer timer("cluster_expansion", perf_timing_); - switch (expansion_mode) { - case GridExpansionMode::Sequential: - sequential_expand(context, labels); - break; - case GridExpansionMode::FrontierParallel: - frontier_expand(context, labels); - break; - case GridExpansionMode::UnionFind: - union_find_expand(context, labels); - break; - } - } - - return labels; +DBSCANGrid2DL1Result dbscan_grid2d_l1(const uint32_t *x, std::size_t x_stride, const uint32_t *y, std::size_t y_stride, + std::size_t count, const DBSCANGrid2DL1Params ¶ms, GridExpansionMode mode) { + if (params.eps == 0) + throw std::invalid_argument("eps must be greater than zero for dbscan_grid2d_l1"); + if (params.min_samples == 0) + throw std::invalid_argument("min_samples must be greater than zero for dbscan_grid2d_l1"); + if (count != 0 && (x == nullptr || y == nullptr)) + throw std::invalid_argument("Input coordinate arrays must be non-null when count > 0"); + if (count != 0 && (x_stride == 0 || y_stride == 0)) + throw std::invalid_argument("Stride must be positive when count > 0"); + + return detail::dbscan_grid2d_l1_impl(x, x_stride, y, y_stride, count, params, mode); } } // namespace dbscan diff --git a/src/dbscan_grid2d_l1_aos.cpp b/src/dbscan_grid2d_l1_aos.cpp new file mode 100644 index 0000000..15cc900 --- /dev/null +++ b/src/dbscan_grid2d_l1_aos.cpp @@ -0,0 +1,25 @@ +#include "dbscan_grid2d_l1.h" + +#include +#include + +namespace dbscan { + +DBSCANGrid2DL1Result dbscan_grid2d_l1_aos(const Grid2DPoint *points, std::size_t count, + const DBSCANGrid2DL1Params ¶ms, GridExpansionMode mode) { + if (params.eps == 0) + throw std::invalid_argument("eps must be greater than zero for dbscan_grid2d_l1_aos"); + if (params.min_samples == 0) + throw std::invalid_argument("min_samples must be greater than zero for dbscan_grid2d_l1_aos"); + if (count != 0 && points == nullptr) + throw std::invalid_argument("Input point array must be non-null when count > 0"); + + static_assert(sizeof(Grid2DPoint) == sizeof(uint32_t) * 2, "Grid2DPoint is expected to be tightly packed"); + const std::size_t stride = sizeof(Grid2DPoint) / sizeof(uint32_t); + + const uint32_t *x = count == 0 ? nullptr : &points[0].x; + const uint32_t *y = count == 0 ? nullptr : &points[0].y; + return dbscan_grid2d_l1(x, stride, y, stride, count, params, mode); +} + +} // namespace dbscan diff --git a/src/dbscan_grid2d_l1_impl.hpp b/src/dbscan_grid2d_l1_impl.hpp new file mode 100644 index 0000000..afa8722 --- /dev/null +++ b/src/dbscan_grid2d_l1_impl.hpp @@ -0,0 +1,459 @@ +#pragma once + +#include "dbscan_grid2d_l1.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "parallel.hpp" +#include "perf_timer.h" + +namespace dbscan::detail { + +constexpr uint64_t pack_cell(uint32_t ix, uint32_t iy) noexcept { + return (static_cast(ix) << 32U) | static_cast(iy); +} + +constexpr uint32_t cell_of(uint32_t value, uint32_t cell_size) noexcept { + return cell_size == 0 ? value : static_cast(value / cell_size); +} + +inline uint32_t load_coord(const uint32_t *ptr, std::size_t stride, std::size_t index) noexcept { + return *(ptr + index * stride); +} + +template +void for_each_neighbor(uint32_t point_index, uint32_t eps, const uint32_t *x, std::size_t x_stride, + const uint32_t *y, std::size_t y_stride, const std::vector &cell_x, + const std::vector &cell_y, const std::vector &ordered_indices, + const std::vector &cell_offsets, const std::vector &unique_keys, + Fn &&fn) { + + const uint32_t base_cx = cell_x[point_index]; + const uint32_t base_cy = cell_y[point_index]; + const uint32_t x_a = load_coord(x, x_stride, point_index); + const uint32_t y_a = load_coord(y, y_stride, point_index); + + for (int dx = -1; dx <= 1; ++dx) { + const int64_t nx = static_cast(base_cx) + dx; + if (nx < 0) + continue; + + for (int dy = -1; dy <= 1; ++dy) { + const int64_t ny = static_cast(base_cy) + dy; + if (ny < 0) + continue; + + const uint64_t key = pack_cell(static_cast(nx), static_cast(ny)); + const auto it = std::lower_bound(unique_keys.begin(), unique_keys.end(), key); + if (it == unique_keys.end() || *it != key) + continue; + + const std::size_t cell_idx = static_cast(std::distance(unique_keys.begin(), it)); + const std::size_t begin = cell_offsets[cell_idx]; + const std::size_t end = cell_offsets[cell_idx + 1]; + + for (std::size_t pos = begin; pos < end; ++pos) { + const uint32_t neighbor_idx = ordered_indices[pos]; + + const uint32_t x_b = load_coord(x, x_stride, neighbor_idx); + const uint32_t y_b = load_coord(y, y_stride, neighbor_idx); + + const uint32_t dx_abs = x_a > x_b ? x_a - x_b : x_b - x_a; + const uint32_t dy_abs = y_a > y_b ? y_a - y_b : y_b - y_a; + const uint64_t manhattan = static_cast(dx_abs) + static_cast(dy_abs); + + if (manhattan <= static_cast(eps)) { + if (!fn(neighbor_idx)) + return; + } + } + } + } +} + +struct ExpansionContext { + const uint32_t *x; + std::size_t x_stride; + const uint32_t *y; + std::size_t y_stride; + std::size_t count; + uint32_t eps; + uint32_t min_samples; + const std::vector &cell_x; + const std::vector &cell_y; + const std::vector &ordered_indices; + const std::vector &cell_offsets; + const std::vector &unique_keys; + const std::vector &is_core; + std::size_t num_threads; + std::size_t chunk_size; +}; + +void sequential_expand(const ExpansionContext &ctx, std::vector &labels) { + std::vector stack; + stack.reserve(ctx.count); + std::vector neighbor_buffer; + neighbor_buffer.reserve(64); + + int32_t next_label = 0; + for (std::size_t i = 0; i < ctx.count; ++i) { + if (!ctx.is_core[i] || labels[i] != -1) + continue; + + labels[i] = next_label; + stack.clear(); + stack.push_back(static_cast(i)); + + while (!stack.empty()) { + const uint32_t current = stack.back(); + stack.pop_back(); + + neighbor_buffer.clear(); + for_each_neighbor(current, ctx.eps, ctx.x, ctx.x_stride, ctx.y, ctx.y_stride, ctx.cell_x, ctx.cell_y, + ctx.ordered_indices, ctx.cell_offsets, ctx.unique_keys, [&](uint32_t neighbor) { + neighbor_buffer.push_back(neighbor); + return true; + }); + + for (uint32_t neighbor : neighbor_buffer) { + if (labels[neighbor] == -1) { + labels[neighbor] = next_label; + if (ctx.is_core[neighbor]) + stack.push_back(neighbor); + } + } + } + + ++next_label; + } +} + +void frontier_expand(const ExpansionContext &ctx, std::vector &labels) { + std::vector> shared_labels(ctx.count); + for (std::size_t i = 0; i < ctx.count; ++i) + shared_labels[i].store(labels[i], std::memory_order_relaxed); + + int32_t next_label = 0; + std::vector frontier; + frontier.reserve(256); + + const std::size_t frontier_chunk = ctx.chunk_size == 0 ? 64 : ctx.chunk_size; + + for (std::size_t seed = 0; seed < ctx.count; ++seed) { + if (!ctx.is_core[seed] || shared_labels[seed].load(std::memory_order_acquire) != -1) + continue; + + const int32_t label = next_label++; + shared_labels[seed].store(label, std::memory_order_release); + frontier.clear(); + frontier.push_back(static_cast(seed)); + + while (!frontier.empty()) { + std::vector next_frontier; + std::mutex next_mutex; + + utils::parallelize(0, frontier.size(), ctx.num_threads, frontier_chunk, [&](std::size_t begin, std::size_t end) { + std::vector local_next; + local_next.reserve(32); + std::vector neighbor_buffer; + neighbor_buffer.reserve(64); + + for (std::size_t idx = begin; idx < end; ++idx) { + const uint32_t current = frontier[idx]; + + neighbor_buffer.clear(); + for_each_neighbor(current, ctx.eps, ctx.x, ctx.x_stride, ctx.y, ctx.y_stride, ctx.cell_x, ctx.cell_y, + ctx.ordered_indices, ctx.cell_offsets, ctx.unique_keys, [&](uint32_t neighbor) { + neighbor_buffer.push_back(neighbor); + return true; + }); + + for (uint32_t neighbor : neighbor_buffer) { + if (ctx.is_core[neighbor]) { + int32_t expected = -1; + if (shared_labels[neighbor].compare_exchange_strong(expected, label, std::memory_order_acq_rel)) + local_next.push_back(neighbor); + } else { + int32_t expected = -1; + shared_labels[neighbor].compare_exchange_strong(expected, label, std::memory_order_acq_rel); + } + } + } + + if (!local_next.empty()) { + std::sort(local_next.begin(), local_next.end()); + local_next.erase(std::unique(local_next.begin(), local_next.end()), local_next.end()); + std::lock_guard lock(next_mutex); + next_frontier.insert(next_frontier.end(), local_next.begin(), local_next.end()); + } + }); + + if (next_frontier.empty()) + break; + + std::sort(next_frontier.begin(), next_frontier.end()); + next_frontier.erase(std::unique(next_frontier.begin(), next_frontier.end()), next_frontier.end()); + frontier.swap(next_frontier); + } + } + + for (std::size_t i = 0; i < ctx.count; ++i) + labels[i] = shared_labels[i].load(std::memory_order_relaxed); +} + +void union_find_expand(const ExpansionContext &ctx, std::vector &labels) { + constexpr uint32_t invalid = std::numeric_limits::max(); + std::vector> parents(ctx.count); + for (std::size_t i = 0; i < ctx.count; ++i) { + if (ctx.is_core[i]) + parents[i].store(static_cast(i), std::memory_order_relaxed); + else + parents[i].store(invalid, std::memory_order_relaxed); + } + + auto find_root = [&](uint32_t node) { + uint32_t parent = parents[node].load(std::memory_order_acquire); + if (parent == invalid) + return invalid; + + while (true) { + uint32_t grandparent = parents[parent].load(std::memory_order_acquire); + if (grandparent == parent) { + if (parent != node) + parents[node].store(parent, std::memory_order_release); + return parent; + } + parents[node].compare_exchange_strong(parent, grandparent, std::memory_order_acq_rel); + node = parent; + parent = parents[node].load(std::memory_order_acquire); + if (parent == invalid) + return invalid; + } + }; + + auto unite = [&](uint32_t a, uint32_t b) { + while (true) { + a = find_root(a); + b = find_root(b); + if (a == invalid || b == invalid || a == b) + return; + + if (a < b) { + uint32_t expected = b; + if (parents[b].compare_exchange_strong(expected, a, std::memory_order_acq_rel)) + return; + } else { + uint32_t expected = a; + if (parents[a].compare_exchange_strong(expected, b, std::memory_order_acq_rel)) + return; + } + } + }; + + const std::size_t union_chunk = ctx.chunk_size == 0 ? 512 : ctx.chunk_size; + utils::parallelize(0, ctx.count, ctx.num_threads, union_chunk, [&](std::size_t begin, std::size_t end) { + for (std::size_t idx = begin; idx < end; ++idx) { + if (!ctx.is_core[idx]) + continue; + + for_each_neighbor(static_cast(idx), ctx.eps, ctx.x, ctx.x_stride, ctx.y, ctx.y_stride, ctx.cell_x, + ctx.cell_y, ctx.ordered_indices, ctx.cell_offsets, ctx.unique_keys, [&](uint32_t neighbor) { + if (ctx.is_core[neighbor]) + unite(static_cast(idx), neighbor); + return true; + }); + } + }); + + std::vector root_for_point(ctx.count, invalid); + for (std::size_t i = 0; i < ctx.count; ++i) { + if (ctx.is_core[i]) + root_for_point[i] = find_root(static_cast(i)); + } + + std::vector component_min(ctx.count, invalid); + for (std::size_t i = 0; i < ctx.count; ++i) { + if (!ctx.is_core[i]) + continue; + const uint32_t root = root_for_point[i]; + if (root == invalid) + continue; + if (component_min[root] > i) + component_min[root] = static_cast(i); + } + + std::vector> components; + components.reserve(ctx.count); + for (std::size_t i = 0; i < ctx.count; ++i) { + if (component_min[i] != invalid) + components.emplace_back(component_min[i], static_cast(i)); + } + + std::sort(components.begin(), components.end()); + + std::vector root_label(ctx.count, -1); + int32_t next_label = 0; + for (const auto &[min_index, root] : components) { + (void)min_index; + root_label[root] = next_label++; + } + + for (std::size_t i = 0; i < ctx.count; ++i) { + if (!ctx.is_core[i]) + continue; + const uint32_t root = root_for_point[i]; + if (root == invalid) + continue; + labels[i] = root_label[root]; + } + + for (std::size_t i = 0; i < ctx.count; ++i) { + if (ctx.is_core[i]) + continue; + + int32_t best_label = -1; + for_each_neighbor(static_cast(i), ctx.eps, ctx.x, ctx.x_stride, ctx.y, ctx.y_stride, ctx.cell_x, + ctx.cell_y, ctx.ordered_indices, ctx.cell_offsets, ctx.unique_keys, [&](uint32_t neighbor) { + if (!ctx.is_core[neighbor]) + return true; + const int32_t candidate = labels[neighbor]; + if (candidate != -1 && (best_label == -1 || candidate < best_label)) + best_label = candidate; + return true; + }); + labels[i] = best_label; + } +} + +inline DBSCANGrid2DL1Result dbscan_grid2d_l1_impl(const uint32_t *x, std::size_t x_stride, const uint32_t *y, + std::size_t y_stride, std::size_t count, + const DBSCANGrid2DL1Params ¶ms, + GridExpansionMode expansion_mode) { + DBSCANGrid2DL1Result result; + result.perf_timing.clear(); + if (count == 0) + return result; + + ScopedTimer total_timer("total", result.perf_timing); + + const uint32_t cell_size = params.eps; + + std::vector cell_x(count); + std::vector cell_y(count); + std::vector keys(count); + std::vector ordered_indices(count); + std::iota(ordered_indices.begin(), ordered_indices.end(), 0U); + + const std::size_t index_chunk = params.chunk_size == 0 ? 1024 : params.chunk_size; + { + ScopedTimer timer("precompute_cells", result.perf_timing); + utils::parallelize(0, count, params.num_threads, index_chunk, [&](std::size_t begin, std::size_t end) { + for (std::size_t i = begin; i < end; ++i) { + const uint32_t cx = cell_of(load_coord(x, x_stride, i), cell_size); + const uint32_t cy = cell_of(load_coord(y, y_stride, i), cell_size); + cell_x[i] = cx; + cell_y[i] = cy; + keys[i] = pack_cell(cx, cy); + } + }); + } + + { + ScopedTimer timer("sort_indices", result.perf_timing); + std::sort(ordered_indices.begin(), ordered_indices.end(), [&](uint32_t lhs, uint32_t rhs) { + const uint64_t key_lhs = keys[lhs]; + const uint64_t key_rhs = keys[rhs]; + if (key_lhs == key_rhs) + return lhs < rhs; + return key_lhs < key_rhs; + }); + } + + std::vector cell_offsets; + cell_offsets.reserve(count + 1); + std::vector unique_keys; + unique_keys.reserve(count); + + { + ScopedTimer timer("build_cell_offsets", result.perf_timing); + std::size_t pos = 0; + while (pos < count) { + const uint64_t key = keys[ordered_indices[pos]]; + unique_keys.push_back(key); + cell_offsets.push_back(pos); + + do { + ++pos; + } while (pos < count && keys[ordered_indices[pos]] == key); + } + cell_offsets.push_back(count); + } + + result.labels.assign(count, -1); + std::vector is_core(count, 0U); + + const uint32_t eps_value = params.eps; + const uint32_t min_samples_value = params.min_samples; + + const std::size_t core_chunk = params.chunk_size == 0 ? 512 : params.chunk_size; + { + ScopedTimer timer("core_detection", result.perf_timing); + utils::parallelize(0, count, params.num_threads, core_chunk, [&](std::size_t begin, std::size_t end) { + for (std::size_t idx = begin; idx < end; ++idx) { + uint32_t neighbor_count = 0; + for_each_neighbor(static_cast(idx), eps_value, x, x_stride, y, y_stride, cell_x, cell_y, + ordered_indices, cell_offsets, unique_keys, [&](uint32_t) { + ++neighbor_count; + return neighbor_count < min_samples_value; + }); + + if (neighbor_count >= min_samples_value) + is_core[idx] = 1U; + } + }); + } + + const ExpansionContext context{x, + x_stride, + y, + y_stride, + count, + eps_value, + min_samples_value, + cell_x, + cell_y, + ordered_indices, + cell_offsets, + unique_keys, + is_core, + params.num_threads, + params.chunk_size}; + + { + ScopedTimer timer("cluster_expansion", result.perf_timing); + switch (expansion_mode) { + case GridExpansionMode::Sequential: + sequential_expand(context, result.labels); + break; + case GridExpansionMode::FrontierParallel: + frontier_expand(context, result.labels); + break; + case GridExpansionMode::UnionFind: + union_find_expand(context, result.labels); + break; + } + } + + return result; +} + +} // namespace dbscan::detail + diff --git a/tests/test_dbscan_grid2d_l1.cpp b/tests/test_dbscan_grid2d_l1.cpp index b08b0d0..034240f 100644 --- a/tests/test_dbscan_grid2d_l1.cpp +++ b/tests/test_dbscan_grid2d_l1.cpp @@ -64,38 +64,45 @@ void load_fixture(const std::filesystem::path &data_path, const std::filesystem: REQUIRE(truth_stream.good()); } +std::vector make_aos(const std::vector &x, const std::vector &y) { + std::vector points(x.size()); + for (std::size_t i = 0; i < points.size(); ++i) + points[i] = dbscan::Grid2DPoint{x[i], y[i]}; + return points; +} + } // namespace TEST_CASE("DBSCANGrid2D_L1 clusters dense neighbors", "[dbscan][grid_l1]") { - // Points are arranged so the Manhattan frontier just connects the first three but leaves the outlier isolated, - // validating that the L1 grid expansion covers diagonals without absorbing distant noise. const std::vector x = {0, 1, 2, 100}; const std::vector y = {0, 0, 1, 200}; - dbscan::DBSCANGrid2D_L1 algo(4, 3); - auto labels = algo.fit_predict(x.data(), y.data(), x.size()); + const dbscan::DBSCANGrid2DL1Params params{4, 3}; + const auto soa_result = dbscan::dbscan_grid2d_l1(x.data(), 1, y.data(), 1, x.size(), params); + const auto &labels = soa_result.labels; REQUIRE(labels.size() == x.size()); REQUIRE(labels[0] == labels[1]); REQUIRE(labels[1] == labels[2]); REQUIRE(labels[0] != -1); REQUIRE(labels[3] == -1); + + const auto aos_points = make_aos(x, y); + const auto aos_result = dbscan::dbscan_grid2d_l1_aos(aos_points.data(), aos_points.size(), params); + REQUIRE(aos_result.labels == labels); } TEST_CASE("DBSCANGrid2D_L1 respects min_samples threshold", "[dbscan][grid_l1]") { - // Every point is deliberately spaced just beyond eps so we confirm the min_samples guard suppresses tiny clusters. const std::vector coords = {0, 2, 4}; - dbscan::DBSCANGrid2D_L1 algo(3, 4); - auto labels = algo.fit_predict(coords.data(), coords.data(), coords.size()); - - REQUIRE(labels.size() == coords.size()); - for (int32_t label : labels) { + const dbscan::DBSCANGrid2DL1Params params{3, 4}; + const auto result = dbscan::dbscan_grid2d_l1(coords.data(), 1, coords.data(), 1, coords.size(), params); + REQUIRE(result.labels.size() == coords.size()); + for (int32_t label : result.labels) { REQUIRE(label == -1); } } TEST_CASE("DBSCANGrid2D_L1 matches fixture truth", "[dbscan][grid_l1]") { - // Fixture run mirrors the end-to-end validator to ensure the optimized grid path stays aligned with reference data. const std::filesystem::path root = fixture_root(); const auto data_path = root / "dbscan_static_data.bin"; const auto truth_path = root / "dbscan_static_truth.bin"; @@ -105,33 +112,33 @@ TEST_CASE("DBSCANGrid2D_L1 matches fixture truth", "[dbscan][grid_l1]") { std::vector truth; load_fixture(data_path, truth_path, x, y, truth); - dbscan::DBSCANGrid2D_L1 algo(10, 3); - auto labels = algo.fit_predict(x.data(), y.data(), x.size()); - - REQUIRE(labels.size() == truth.size()); - REQUIRE(labels == truth); + const dbscan::DBSCANGrid2DL1Params params{10, 3}; + const auto sequential = dbscan::dbscan_grid2d_l1(x.data(), 1, y.data(), 1, x.size(), params); + REQUIRE(sequential.labels == truth); - dbscan::DBSCANGrid2D_L1Frontier frontier_algo(10, 3, 4); - auto frontier_labels = frontier_algo.fit_predict(x.data(), y.data(), x.size()); - REQUIRE(frontier_labels == truth); + const auto frontier = + dbscan::dbscan_grid2d_l1(x.data(), 1, y.data(), 1, x.size(), params, dbscan::GridExpansionMode::FrontierParallel); + REQUIRE(frontier.labels == truth); - dbscan::DBSCANGrid2D_L1UnionFind union_algo(10, 3, 4); - auto union_labels = union_algo.fit_predict(x.data(), y.data(), x.size()); - REQUIRE(union_labels == truth); + const auto union_find = + dbscan::dbscan_grid2d_l1(x.data(), 1, y.data(), 1, x.size(), params, dbscan::GridExpansionMode::UnionFind); + REQUIRE(union_find.labels == truth); } TEST_CASE("DBSCANGrid2D_L1 parallel variants align with sequential", "[dbscan][grid_l1][parallel]") { const std::vector x = {0, 1, 2, 5, 40}; const std::vector y = {0, 0, 1, 4, 80}; - dbscan::DBSCANGrid2D_L1 sequential(6, 3); - const auto expected = sequential.fit_predict(x.data(), y.data(), x.size()); + dbscan::DBSCANGrid2DL1Params params{6, 3}; + params.num_threads = 4; + + const auto sequential = dbscan::dbscan_grid2d_l1(x.data(), 1, y.data(), 1, x.size(), params); - dbscan::DBSCANGrid2D_L1Frontier frontier(6, 3, 4); - auto frontier_labels = frontier.fit_predict(x.data(), y.data(), x.size()); - REQUIRE(frontier_labels == expected); + const auto frontier = + dbscan::dbscan_grid2d_l1(x.data(), 1, y.data(), 1, x.size(), params, dbscan::GridExpansionMode::FrontierParallel); + REQUIRE(frontier.labels == sequential.labels); - dbscan::DBSCANGrid2D_L1UnionFind union_algo(6, 3, 4); - auto union_labels = union_algo.fit_predict(x.data(), y.data(), x.size()); - REQUIRE(union_labels == expected); + const auto union_find = + dbscan::dbscan_grid2d_l1(x.data(), 1, y.data(), 1, x.size(), params, dbscan::GridExpansionMode::UnionFind); + REQUIRE(union_find.labels == sequential.labels); } diff --git a/tools/dbscan_dataset_validator.cpp b/tools/dbscan_dataset_validator.cpp index 88ad292..6afc06b 100644 --- a/tools/dbscan_dataset_validator.cpp +++ b/tools/dbscan_dataset_validator.cpp @@ -453,21 +453,25 @@ int main(int argc, char **argv) { } if (x_coords.size() != y_coords.size()) throw std::runtime_error("Mismatch between x and y coordinate counts"); + dbscan::DBSCANGrid2DL1Params params{eps_int, static_cast(options.min_samples)}; for (auto mode : options.grid_modes) { const auto info = grid_variant_info(mode); std::cout << "\n[" << info.label << "] Running clustering..." << std::flush; const auto start = std::chrono::steady_clock::now(); - dbscan::DBSCANGrid2D_L1 grid_algo(eps_int, static_cast(options.min_samples), 0, 0, mode); - const auto labels = grid_algo.fit_predict(x_coords.data(), y_coords.data(), x_coords.size()); - std::vector mismatches; - const auto metrics = evaluate(labels, truth_labels, options.mismatch_output_dir ? &mismatches : nullptr); + const auto result = dbscan::dbscan_grid2d_l1(x_coords.data(), 1, y_coords.data(), 1, x_coords.size(), params, + mode); const auto end = std::chrono::steady_clock::now(); const auto elapsed_ms = std::chrono::duration_cast(end - start).count(); std::cout << " done in " << elapsed_ms << " ms" << std::endl; - if (!grid_algo.perf_timing_.entries().empty()) { + + std::vector mismatches; + const auto metrics = + evaluate(result.labels, truth_labels, options.mismatch_output_dir ? &mismatches : nullptr); + + if (!result.perf_timing.entries().empty()) { // Emit step-level timings so dataset runs surface bottlenecks without separate profiling passes. std::cout << "[" << info.label << "] component timings" << std::endl; - for (const auto &entry : grid_algo.perf_timing_.entries()) + for (const auto &entry : result.perf_timing.entries()) std::cout << " " << entry << std::endl; } results.push_back({info.label, metrics}); From 671914aa9cf502addc3a008089e65f543faaadff Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Thu, 2 Oct 2025 08:17:59 -0700 Subject: [PATCH 12/12] refactor --- tools/dbscan_dataset_validator.cpp | 75 ++++++++++++++++++------------ 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/tools/dbscan_dataset_validator.cpp b/tools/dbscan_dataset_validator.cpp index 6afc06b..c132b4a 100644 --- a/tools/dbscan_dataset_validator.cpp +++ b/tools/dbscan_dataset_validator.cpp @@ -454,39 +454,52 @@ int main(int argc, char **argv) { if (x_coords.size() != y_coords.size()) throw std::runtime_error("Mismatch between x and y coordinate counts"); dbscan::DBSCANGrid2DL1Params params{eps_int, static_cast(options.min_samples)}; + std::vector aos_points; + aos_points.reserve(x_coords.size()); + for (std::size_t i = 0; i < x_coords.size(); ++i) + aos_points.push_back(dbscan::Grid2DPoint{x_coords[i], y_coords[i]}); for (auto mode : options.grid_modes) { const auto info = grid_variant_info(mode); - std::cout << "\n[" << info.label << "] Running clustering..." << std::flush; - const auto start = std::chrono::steady_clock::now(); - const auto result = dbscan::dbscan_grid2d_l1(x_coords.data(), 1, y_coords.data(), 1, x_coords.size(), params, - mode); - const auto end = std::chrono::steady_clock::now(); - const auto elapsed_ms = std::chrono::duration_cast(end - start).count(); - std::cout << " done in " << elapsed_ms << " ms" << std::endl; - - std::vector mismatches; - const auto metrics = - evaluate(result.labels, truth_labels, options.mismatch_output_dir ? &mismatches : nullptr); - - if (!result.perf_timing.entries().empty()) { - // Emit step-level timings so dataset runs surface bottlenecks without separate profiling passes. - std::cout << "[" << info.label << "] component timings" << std::endl; - for (const auto &entry : result.perf_timing.entries()) - std::cout << " " << entry << std::endl; - } - results.push_back({info.label, metrics}); - - if (options.mismatch_output_dir && !mismatches.empty()) { - std::filesystem::create_directories(*options.mismatch_output_dir); - auto file_path = *options.mismatch_output_dir / (info.slug + "_mismatches.txt"); - std::ofstream out(file_path); - if (!out) - throw std::runtime_error("Failed to open mismatch output file: " + file_path.string()); - for (std::size_t index : mismatches) - out << index << '\n'; - std::cout << "[" << info.label << "] Wrote " << mismatches.size() << " mismatches to " << file_path - << "\n"; - } + const auto run_layout = [&](const std::string &layout, auto &&invoke) { + const std::string display_label = info.label + "/" + layout; + std::cout << "\n[" << display_label << "] Running clustering..." << std::flush; + const auto start = std::chrono::steady_clock::now(); + const auto result = invoke(); + const auto end = std::chrono::steady_clock::now(); + const auto elapsed_ms = std::chrono::duration_cast(end - start).count(); + std::cout << " done in " << elapsed_ms << " ms" << std::endl; + + std::vector mismatches; + const auto metrics = + evaluate(result.labels, truth_labels, options.mismatch_output_dir ? &mismatches : nullptr); + + if (!result.perf_timing.entries().empty()) { + std::cout << "[" << display_label << "] component timings" << std::endl; + for (const auto &entry : result.perf_timing.entries()) + std::cout << " " << entry << std::endl; + } + results.push_back({display_label, metrics}); + + if (options.mismatch_output_dir && !mismatches.empty()) { + std::filesystem::create_directories(*options.mismatch_output_dir); + auto file_path = *options.mismatch_output_dir / (info.slug + "_" + layout + "_mismatches.txt"); + std::ofstream out(file_path); + if (!out) + throw std::runtime_error("Failed to open mismatch output file: " + file_path.string()); + for (std::size_t index : mismatches) + out << index << '\n'; + std::cout << "[" << display_label << "] Wrote " << mismatches.size() << " mismatches to " << file_path + << "\n"; + } + }; + + run_layout("soa", [&]() { + return dbscan::dbscan_grid2d_l1(x_coords.data(), 1, y_coords.data(), 1, x_coords.size(), params, mode); + }); + + run_layout("aos", [&]() { + return dbscan::dbscan_grid2d_l1_aos(aos_points.data(), aos_points.size(), params, mode); + }); } }