count/include/count_min.hpp

-Original file line number
+Diff line change
@@ Expand Up / @@ -370,7 +370,7 @@ class count_min_sketch{ @@
       std::vector<W, Allocator> _sketch_array; // the array stored by the sketch
       uint64_t _seed;
       W _total_weight;
-      std::vector<uint64_t> hash_seeds;
+      std::vector<uint64_t> _hash_seeds;
       enum flags {IS_EMPTY};
       static const uint8_t PREAMBLE_LONGS_SHORT = 2; // Empty -> need second byte for sketch parameters
@@ Expand Down @@

count/include/count_min_impl.hpp

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -39,7 +39,9 @@ _num_buckets(num_buckets),
  
    _sketch_array((num_hashes*num_buckets < 1<<30) ? num_hashes*num_buckets : 0, 0, _allocator),

    _seed(seed),

    _total_weight(0) {

      if (num_buckets < 3) throw std::invalid_argument("Using fewer than 3 buckets incurs relative error greater than 1.");

      if (num_buckets < 3) {

        throw std::invalid_argument("Using fewer than 3 buckets incurs relative error greater than 1.");

      }

      // This check is to ensure later compatibility with a Java implementation whose maximum size can only

      // be 2^31-1.  We check only against 2^30 for simplicity.

    @@ -50,10 +52,10 @@ _total_weight(0) {
  
      std::default_random_engine rng(_seed);

      std::uniform_int_distribution<uint64_t> extra_hash_seeds(0, std::numeric_limits<uint64_t>::max());

      hash_seeds.reserve(num_hashes);

      _hash_seeds.reserve(num_hashes);

      for (uint64_t i=0; i < num_hashes; ++i) {

        hash_seeds.push_back(extra_hash_seeds(rng) + _seed); // Adds the global seed to all hash functions.

        _hash_seeds.push_back(extra_hash_seeds(rng) + _seed); // Adds the global seed to all hash functions.

      }

    }

    @@ -128,7 +130,7 @@ std::vector<uint64_t> count_min_sketch<W,A>::get_hashes(const void* item, size_t
  
      sketch_update_locations.reserve(_num_hashes);

      uint64_t hash_seed_index = 0;

      for (const auto &it: hash_seeds) {

      for (const auto &it: _hash_seeds) {

        HashState hashes;

        MurmurHash3_x64_128(item, size, it, hashes); // ? BEWARE OVERFLOW.

        uint64_t hash = hashes.h1;

    @@ -147,7 +149,9 @@ W count_min_sketch<W,A>::get_estimate(int64_t item) const {return get_estimate(&
  
    template<typename W, typename A>

    W count_min_sketch<W,A>::get_estimate(const std::string& item) const {

      if (item.empty()) return 0; // Empty strings are not inserted into the sketch.

      if (item.empty()) {

        return 0; // Empty strings are not inserted into the sketch.

      }

      return get_estimate(item.c_str(), item.length());

    }

    @@ -176,7 +180,9 @@ void count_min_sketch<W,A>::update(int64_t item, W weight) {
  
    template<typename W, typename A>

    void count_min_sketch<W,A>::update(const std::string& item, W weight) {

      if (item.empty()) return;

      if (item.empty()) {

        return;

      }

      update(item.c_str(), item.length(), weight);

    }

    @@ -201,7 +207,9 @@ W count_min_sketch<W,A>::get_upper_bound(int64_t item) const {return get_upper_b
  
    template<typename W, typename A>

    W count_min_sketch<W,A>::get_upper_bound(const std::string& item) const {

      if (item.empty()) return 0; // Empty strings are not inserted into the sketch.

      if (item.empty()) {

        return 0; // Empty strings are not inserted into the sketch.

      }

      return get_upper_bound(item.c_str(), item.length());

    }

    @@ -218,7 +226,9 @@ W count_min_sketch<W,A>::get_lower_bound(int64_t item) const {return get_lower_b
  
    template<typename W, typename A>

    W count_min_sketch<W,A>::get_lower_bound(const std::string& item) const {

      if (item.empty()) return 0; // Empty strings are not inserted into the sketch.

      if (item.empty()) {

        return 0; // Empty strings are not inserted into the sketch.

      }

      return get_lower_bound(item.c_str(), item.length());

    }

    @@ -290,7 +300,9 @@ void count_min_sketch<W,A>::serialize(std::ostream& os) const {
  
      write(os, nhashes);

      write(os, seed_hash);

      write(os, unused8);

      if (is_empty()) return; // sketch is empty, no need to write further bytes.

      if (is_empty()) {

        return; // sketch is empty, no need to write further bytes.

      }

      // Long 2

      write(os, _total_weight);

    @@ -327,7 +339,9 @@ auto count_min_sketch<W,A>::deserialize(std::istream& is, uint64_t seed, const A
  
      }

      count_min_sketch c(nhashes, nbuckets, seed, allocator);

      const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;

      if (is_empty == 1) return c; // sketch is empty, no need to read further.

      if (is_empty == 1) {

        return c; // sketch is empty, no need to read further.

      }

      // Set the sketch weight and read in the sketch values

      const auto weight = read<W>(is);

    @@ -373,7 +387,9 @@ auto count_min_sketch<W,A>::serialize(unsigned header_size_bytes) const -> vecto
  
      ptr += copy_to_mem(nhashes, ptr);

      ptr += copy_to_mem(seed_hash, ptr);

      ptr += copy_to_mem(null_characters_8, ptr);

      if (is_empty()) return bytes; // sketch is empty, no need to write further bytes.

      if (is_empty()) {

        return bytes; // sketch is empty, no need to write further bytes.

      }

      // Long 2

      const W t_weight = _total_weight;

    @@ -423,7 +439,9 @@ auto count_min_sketch<W,A>::deserialize(const void* bytes, size_t size, uint64_t
  
      }

      count_min_sketch c(nhashes, nbuckets, seed, allocator);

      const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;

      if (is_empty) return c; // sketch is empty, no need to read further.

      if (is_empty) {

        return c; // sketch is empty, no need to read further.

      }

      ensure_minimum_memory(size, sizeof(W) * (1 + nbuckets * nhashes));

    @@ -449,8 +467,9 @@ string<A> count_min_sketch<W,A>::to_string() const {
  
      // count the number of used entries in the sketch

      uint64_t num_nonzero = 0;

      for (const auto entry: _sketch_array) {

        if (entry != static_cast<W>(0.0))

        if (entry != static_cast<W>(0.0)) {

          ++num_nonzero;

        }

      }

      // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.

fix: Add the missing brackets and keep the class member name consistency #479

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

SYaoJun wants to merge 1 commit into apache:master from SYaoJun:fix

+33 −14

-Original file line number
+Diff line change
@@ Expand Up / @@ -370,7 +370,7 @@ class count_min_sketch{ @@
       std::vector<W, Allocator> _sketch_array; // the array stored by the sketch
       uint64_t _seed;
       W _total_weight;
-      std::vector<uint64_t> hash_seeds;
+      std::vector<uint64_t> _hash_seeds;
       enum flags {IS_EMPTY};
       static const uint8_t PREAMBLE_LONGS_SHORT = 2; // Empty -> need second byte for sketch parameters
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix: Add the missing brackets and keep the class member name consistency #479

Diff view

Diff view

There are no files selected for viewing

fix: Add the missing brackets and keep the class member name consistency #479

Are you sure you want to change the base?

fix: Add the missing brackets and keep the class member name consistency #479

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing