diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b53f3a8..91029f0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,6 +18,6 @@ jobs: python-version: ${{ matrix.python }} - name: Install dependencies run: | - pip install nose2 numpy scipy scikit-learn + pip install nose2 numpy scipy scikit-learn gmpy2 - name: Run tests run: nose2 -v --pretty-assert diff --git a/setup.py b/setup.py index 4767be2..68c2aad 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ platforms = 'any', install_requires = [ 'numpy', + 'gmpy2', ], tests_require = [ 'nose2', diff --git a/simhash/__init__.py b/simhash/__init__.py index 4fd83be..789ee97 100644 --- a/simhash/__init__.py +++ b/simhash/__init__.py @@ -10,6 +10,7 @@ from itertools import groupby import numpy as np +from gmpy2 import popcount try: from collections.abc import Iterable @@ -160,12 +161,7 @@ def _bitarray_from_bytes(b): def distance(self, another): assert self.f == another.f - x = (self.value ^ another.value) & ((1 << self.f) - 1) - ans = 0 - while x: - ans += 1 - x &= x - 1 - return ans + return popcount(self.value ^ another.value) class SimhashIndex(object):