Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@ All significant changes to this project will be documented in this file.
### Breaking changes

* `CountMinSketch` now has a type parameter for the count type. Possible values are `u8` to `u64` and `i8` to `i64`.
* `HllUnion::get_result` is renamed to `HllUnion::to_sketch`.

### New features

* `CountMinSketch` with unsigned values now supports `halve` and `decay` operations.
* `CpcSketch` and `CpcUnion` are now available for cardinality estimation.

## v0.2.0 (2025-01-14)
## v0.2.0 (2026-01-14)

This is the initial release. It includes the following sketches:

Expand Down
27 changes: 24 additions & 3 deletions datasketches/src/cpc/union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
//! because of the partially inverted Logic in the Sliding flavor, where the presence of coupons
//! is sometimes indicated by the ABSENCE of row_col pairs in the surprises table.)
//!
//! How does [`CpcUnion::get_result`] work?
//! How does [`CpcUnion::to_sketch`] work?
//!
//! If the union has an Accumulator state, make a copy of that sketch.
//!
Expand Down Expand Up @@ -116,8 +116,29 @@ impl CpcUnion {
self.lg_k
}

/// Returns the result of union operations as a CPC sketch.
pub fn get_result(&self) -> CpcSketch {
/// Get the union result as a new sketch.
///
/// # Examples
///
/// ```
/// # use datasketches::cpc::CpcUnion;
/// # use datasketches::cpc::CpcSketch;
///
/// let mut s1 = CpcSketch::new(12);
/// s1.update(&"apple");
///
/// let mut s2 = CpcSketch::new(12);
/// s2.update(&"apple");
/// s2.update(&"banana");
///
/// let mut union = CpcUnion::new(12);
/// union.update(&s1);
/// union.update(&s2);
///
/// let result = union.to_sketch();
/// assert_eq!(result.estimate().trunc(), 2.0);
/// ```
pub fn to_sketch(&self) -> CpcSketch {
match &self.state {
UnionState::Accumulator(sketch) => {
if sketch.is_empty() {
Expand Down
2 changes: 1 addition & 1 deletion datasketches/src/hll/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
//! union.update(&left);
//! union.update(&right);
//!
//! let result = union.get_result(HllType::Hll8);
//! let result = union.to_sketch(HllType::Hll8);
//! assert!(result.estimate() >= 2.0);
//! ```

Expand Down
12 changes: 6 additions & 6 deletions datasketches/src/hll/union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ impl HllUnion {
/// # use datasketches::hll::HllUnion;
/// let mut union = HllUnion::new(10);
/// union.update_value("apple");
/// let _result = union.get_result(HllType::Hll8);
/// let _result = union.to_sketch(HllType::Hll8);
/// ```
pub fn new(lg_max_k: u8) -> Self {
assert!(
Expand All @@ -101,7 +101,7 @@ impl HllUnion {
/// # use datasketches::hll::HllUnion;
/// let mut union = HllUnion::new(10);
/// union.update_value("apple");
/// let _result = union.get_result(HllType::Hll8);
/// let _result = union.to_sketch(HllType::Hll8);
/// ```
pub fn update_value<T: Hash>(&mut self, value: T) {
self.gadget.update(value);
Expand All @@ -128,7 +128,7 @@ impl HllUnion {
/// let mut union = HllUnion::new(10);
/// union.update(&left);
/// union.update(&right);
/// let result = union.get_result(HllType::Hll8);
/// let result = union.to_sketch(HllType::Hll8);
/// assert!(result.estimate() >= 2.0);
/// ```
pub fn update(&mut self, sketch: &HllSketch) {
Expand Down Expand Up @@ -237,7 +237,7 @@ impl HllUnion {
self.gadget = HllSketch::from_mode(final_lg_k, Mode::Array8(new_array));
}

/// Get the union result as a new sketch
/// Get the union result as a new sketch.
///
/// Returns a copy of the internal gadget sketch with the specified target HLL type.
/// If the requested type differs from the gadget's type, conversion is performed.
Expand All @@ -253,10 +253,10 @@ impl HllUnion {
/// # use datasketches::hll::HllUnion;
/// let mut union = HllUnion::new(10);
/// union.update_value("apple");
/// let result = union.get_result(HllType::Hll6);
/// let result = union.to_sketch(HllType::Hll6);
/// assert!(result.estimate() >= 1.0);
/// ```
pub fn get_result(&self, hll_type: HllType) -> HllSketch {
pub fn to_sketch(&self, hll_type: HllType) -> HllSketch {
let gadget_type = self.gadget.target_type();

if hll_type == gadget_type {
Expand Down
16 changes: 8 additions & 8 deletions datasketches/tests/cpc_union_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const RELATIVE_ERROR_FOR_LG_K_11: f64 = 0.02;
#[test]
fn test_empty() {
let union = CpcUnion::new(11);
let sketch = union.get_result();
let sketch = union.to_sketch();
assert!(sketch.is_empty());
assert_eq!(sketch.estimate(), 0.0);
}
Expand All @@ -37,13 +37,13 @@ fn test_two_values() {
let mut union = CpcUnion::new(11);
union.update(&sketch);

let result = union.get_result();
let result = union.to_sketch();
assert!(!result.is_empty());
assert_eq!(result.estimate(), 1.0);

sketch.update(2);
union.update(&sketch);
let result = union.get_result();
let result = union.to_sketch();
assert!(!result.is_empty());
assert_that!(
sketch.estimate(),
Expand All @@ -60,7 +60,7 @@ fn test_custom_seed() {

let mut union = CpcUnion::with_seed(11, 123);
union.update(&sketch);
let result = union.get_result();
let result = union.to_sketch();
assert!(!result.is_empty());
assert_that!(
result.estimate(),
Expand Down Expand Up @@ -94,7 +94,7 @@ fn test_large_values() {
}
union.update(&tmp);
}
let result = union.get_result();
let result = union.to_sketch();
assert!(!result.is_empty());
assert_eq!(result.num_coupons(), union.num_coupons());
let estimate = sketch.estimate();
Expand All @@ -112,7 +112,7 @@ fn test_reduce_k_empty() {
}
let mut union = CpcUnion::new(12);
union.update(&sketch);
let result = union.get_result();
let result = union.to_sketch();
assert_eq!(result.lg_k(), 11);
assert_that!(
result.estimate(),
Expand All @@ -136,7 +136,7 @@ fn test_reduce_k_sparse() {
}
union.update(&sketch11);

let result = union.get_result();
let result = union.to_sketch();
assert_eq!(result.lg_k(), 11);
assert_that!(
result.estimate(),
Expand All @@ -160,7 +160,7 @@ fn test_reduce_k_window() {
}
union.update(&sketch11);

let result = union.get_result();
let result = union.to_sketch();
assert_eq!(result.lg_k(), 11);
assert_that!(
result.estimate(),
Expand Down
16 changes: 8 additions & 8 deletions datasketches/tests/hll_union_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ fn test_union_mixed_modes() {
union.update(&sketch1);
union.update(&sketch2);

let result = union.get_result(HllType::Hll8);
let result = union.to_sketch(HllType::Hll8);
let estimate = result.estimate();

// Should estimate ~10,003 unique values
Expand Down Expand Up @@ -205,9 +205,9 @@ fn test_union_mixed_hll_types() {
union.update(&sketch3);

// Test getting result in different types
let result4 = union.get_result(HllType::Hll4);
let result6 = union.get_result(HllType::Hll6);
let result8 = union.get_result(HllType::Hll8);
let result4 = union.to_sketch(HllType::Hll4);
let result6 = union.to_sketch(HllType::Hll6);
let result8 = union.to_sketch(HllType::Hll8);

assert_eq!(result4.target_type(), HllType::Hll4);
assert_eq!(result6.target_type(), HllType::Hll6);
Expand Down Expand Up @@ -257,7 +257,7 @@ fn test_union_lg_k_handling() {
union.update(&sketch3);
assert_eq!(union.lg_config_k(), 8, "Gadget should downsize to lg_k=8");

let result = union.get_result(HllType::Hll8);
let result = union.to_sketch(HllType::Hll8);
let estimate = result.estimate();

// Should estimate ~10,000 unique values (0-9,999)
Expand All @@ -276,7 +276,7 @@ fn test_union_lg_k_handling() {
}

union2.update(&sketch_high_precision);
let result2 = union2.get_result(HllType::Hll8);
let result2 = union2.to_sketch(HllType::Hll8);
assert_eq!(result2.lg_config_k(), 10, "Result should be at lg_k=10");

let estimate2 = result2.estimate();
Expand Down Expand Up @@ -459,7 +459,7 @@ fn test_union_associativity() {
let mut union1 = HllUnion::new(12);
union1.update(&sketch_a);
union1.update(&sketch_b);
let ab_sketch = union1.get_result(HllType::Hll8);
let ab_sketch = union1.to_sketch(HllType::Hll8);

let mut union2 = HllUnion::new(12);
union2.update(&ab_sketch);
Expand All @@ -470,7 +470,7 @@ fn test_union_associativity() {
let mut union3 = HllUnion::new(12);
union3.update(&sketch_b);
union3.update(&sketch_c);
let bc_sketch = union3.get_result(HllType::Hll8);
let bc_sketch = union3.to_sketch(HllType::Hll8);

let mut union4 = HllUnion::new(12);
union4.update(&sketch_a);
Expand Down