From 1ee85b567da759a0ab09905ea5b8768a13b8ba20 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 09:16:45 +0100 Subject: [PATCH 001/118] attempt to refactor nn graph building --- pygsp/graphs/nngraphs/nngraph.py | 117 ++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 34 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 397af4e7..95c2c5cc 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -10,6 +10,21 @@ _logger = utils.build_logger(__name__) +# conversion between the FLANN conventions and the various backend functions +_dist_translation = { + 'scipy-kdtree': { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf + }, + 'scipy-pdist' : { + 'euclidean': 'euclidean', + 'manhattan': 'cityblock', + 'max_dist': 'chebyshev', + 'minkowski': 'minkowski' + }, + + } def _import_pfl(): try: @@ -20,6 +35,46 @@ def _import_pfl(): 'pip (or conda) install pyflann (or pyflann3).') return pfl + + +def _knn_sp_kdtree(_X, _num_neighbors, _dist_type, _order=0): + kdt = spatial.KDTree(_X) + D, NN = kdt.query(_X, k=(_num_neighbors + 1), + p=_dist_translation['scipy-kdtree'][_dist_type]) + return NN, D + +def _knn_flann(_X, _num_neighbors, _dist_type, _order): + pfl = _import_pfl() + pfl.set_distance_type(_dist_type, order=_order) + flann = pfl.FLANN() + + # Default FLANN parameters (I tried changing the algorithm and + # testing performance on huge matrices, but the default one + # seems to work best). + NN, D = flann.nn(_X, _X, num_neighbors=(_num_neighbors + 1), + algorithm='kdtree') + return NN, D + +def _radius_sp_kdtree(_X, _epsilon, _dist_type, order=0): + kdt = spatial.KDTree(_X) + D, NN = kdt.query(_X, k=None, distance_upper_bound=_epsilon, + p=_dist_translation['scipy-kdtree'][_dist_type]) + return NN, D + +def _knn_sp_pdist(_X, _num_neighbors, _dist_type, _order): + pd = spatial.distance.squareform( + spatial.distance.pdist(_X, + _dist_translation['scipy-pdist'][_dist_type], + p=_order)) + pds = np.sort(pd)[:, 0:_num_neighbors+1] + pdi = pd.argsort()[:, 0:_num_neighbors+1] + return pdi, pds + +def _radius_sp_pdist(): + raise NotImplementedError() + +def _radius_flann(): + raise NotImplementedError() class NNGraph(Graph): r"""Nearest-neighbor graph from given point cloud. @@ -33,9 +88,11 @@ class NNGraph(Graph): Type of nearest neighbor graph to create. The options are 'knn' for k-Nearest Neighbors or 'radius' for epsilon-Nearest Neighbors (default is 'knn'). - use_flann : bool, optional - Use Fast Library for Approximate Nearest Neighbors (FLANN) or not. - (default is False) + backend : {'scipy-kdtree', 'scipy-pdist', 'flann'} + Type of the backend for graph construction. + - 'scipy-kdtree'(default) will use scipy.spatial.KDTree + - 'scipy-pdist' will use scipy.spatial.distance.pdist + - 'flann' use Fast Library for Approximate Nearest Neighbors (FLANN) center : bool, optional Center the data so that it has zero mean (default is True) rescale : bool, optional @@ -74,20 +131,34 @@ class NNGraph(Graph): """ - def __init__(self, Xin, NNtype='knn', use_flann=False, center=True, + def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, rescale=True, k=10, sigma=0.1, epsilon=0.01, gtype=None, plotting={}, symmetrize_type='average', dist_type='euclidean', order=0, **kwargs): self.Xin = Xin self.NNtype = NNtype - self.use_flann = use_flann + self.backend = backend self.center = center self.rescale = rescale self.k = k self.sigma = sigma self.epsilon = epsilon - + _dist_translation['scipy-kdtree']['minkowski'] = order + + self._nn_functions = { + 'knn': { + 'scipy-kdtree':_knn_sp_kdtree, + 'scipy-pdist': _knn_sp_pdist, + 'flann': _knn_flann + }, + 'radius': { + 'scipy-kdtree':_radius_sp_kdtree, + 'scipy-pdist': _radius_sp_pdist, + 'flann': _radius_flann + }, + } + if gtype is None: gtype = 'nearest neighbors' else: @@ -108,33 +179,15 @@ def __init__(self, Xin, NNtype='knn', use_flann=False, center=True, scale = np.power(N, 1. / float(min(d, 3))) / 10. Xout *= scale / bounding_radius - # Translate distance type string to corresponding Minkowski order. - dist_translation = {"euclidean": 2, - "manhattan": 1, - "max_dist": np.inf, - "minkowski": order - } + if self.NNtype == 'knn': spi = np.zeros((N * k)) spj = np.zeros((N * k)) spv = np.zeros((N * k)) - if self.use_flann: - pfl = _import_pfl() - pfl.set_distance_type(dist_type, order=order) - flann = pfl.FLANN() - - # Default FLANN parameters (I tried changing the algorithm and - # testing performance on huge matrices, but the default one - # seems to work best). - NN, D = flann.nn(Xout, Xout, num_neighbors=(k + 1), - algorithm='kdtree') - - else: - kdt = spatial.KDTree(Xout) - D, NN = kdt.query(Xout, k=(k + 1), - p=dist_translation[dist_type]) + NN, D = self._nn_functions[NNtype][backend](Xout, k, + dist_type, order) for i in range(N): spi[i * k:(i + 1) * k] = np.kron(np.ones((k)), i) @@ -144,13 +197,9 @@ def __init__(self, Xin, NNtype='knn', use_flann=False, center=True, elif self.NNtype == 'radius': - kdt = spatial.KDTree(Xout) - D, NN = kdt.query(Xout, k=None, distance_upper_bound=epsilon, - p=dist_translation[dist_type]) - count = 0 - for i in range(N): - count = count + len(NN[i]) - + NN, D = self.__nn_functions[NNtype][backend](Xout, epsilon, dist_type, order) + count = sum(map(len, NN)) + spi = np.zeros((count)) spj = np.zeros((count)) spv = np.zeros((count)) From 4bacd5c70579c68647daf794bcb2078281a528f0 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 11:14:21 +0100 Subject: [PATCH 002/118] update tests --- pygsp/graphs/nngraphs/nngraph.py | 7 ++++--- pygsp/tests/test_graphs.py | 29 ++++++++++++++++------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 95c2c5cc..4b9f2f3d 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -91,7 +91,7 @@ class NNGraph(Graph): backend : {'scipy-kdtree', 'scipy-pdist', 'flann'} Type of the backend for graph construction. - 'scipy-kdtree'(default) will use scipy.spatial.KDTree - - 'scipy-pdist' will use scipy.spatial.distance.pdist + - 'scipy-pdist' will use scipy.spatial.distance.pdist (slowest but exact) - 'flann' use Fast Library for Approximate Nearest Neighbors (FLANN) center : bool, optional Center the data so that it has zero mean (default is True) @@ -187,7 +187,7 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, spv = np.zeros((N * k)) NN, D = self._nn_functions[NNtype][backend](Xout, k, - dist_type, order) + dist_type, order) for i in range(N): spi[i * k:(i + 1) * k] = np.kron(np.ones((k)), i) @@ -197,7 +197,8 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, elif self.NNtype == 'radius': - NN, D = self.__nn_functions[NNtype][backend](Xout, epsilon, dist_type, order) + NN, D = self.__nn_functions[NNtype][backend](Xout, epsilon, + dist_type, order) count = sum(map(len, NN)) spi = np.zeros((count)) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index dc51fec5..afde1947 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -182,19 +182,22 @@ def test_set_coordinates(self): def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - - for dist_type in dist_types: - - # Only p-norms with 1<=p<=infinity permitted. - if dist_type != 'minkowski': - graphs.NNGraph(Xin, NNtype='radius', dist_type=dist_type) - graphs.NNGraph(Xin, NNtype='knn', dist_type=dist_type) - - # Distance type unsupported in the C bindings, - # use the C++ bindings instead. - if dist_type != 'max_dist': - graphs.NNGraph(Xin, use_flann=True, NNtype='knn', - dist_type=dist_type) + backends = ['scipy-kdtree', 'scipy-pdist', 'flann'] + for cur_backend in backends: + for dist_type in dist_types: + + # Only p-norms with 1<=p<=infinity permitted. + if dist_type != 'minkowski': + graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, + dist_type=dist_type) + graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, + dist_type=dist_type) + + # Distance type unsupported in the C bindings, + # use the C++ bindings instead. + if dist_type != 'max_dist': + graphs.NNGraph(Xin, backend=cur_backend, NNtype='knn', + dist_type=dist_type) def test_bunny(self): graphs.Bunny() From 00bbcdd37579cae13a24e620df6e0bec303f1883 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 11:42:11 +0100 Subject: [PATCH 003/118] fix typo --- pygsp/graphs/nngraphs/nngraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 4b9f2f3d..f3fc9908 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -197,7 +197,7 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, elif self.NNtype == 'radius': - NN, D = self.__nn_functions[NNtype][backend](Xout, epsilon, + NN, D = self._nn_functions[NNtype][backend](Xout, epsilon, dist_type, order) count = sum(map(len, NN)) From b822333ac85adbbc69b85453732ce5073efbf1be Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 12:30:45 +0100 Subject: [PATCH 004/118] fix tests (avoiding not implemented combinations) --- pygsp/graphs/nngraphs/nngraph.py | 4 ++-- pygsp/tests/test_graphs.py | 33 ++++++++++++++++++++------------ 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index f3fc9908..d7b112fd 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -70,10 +70,10 @@ def _knn_sp_pdist(_X, _num_neighbors, _dist_type, _order): pdi = pd.argsort()[:, 0:_num_neighbors+1] return pdi, pds -def _radius_sp_pdist(): +def _radius_sp_pdist(_X, _epsilon, _dist_type, order=0): raise NotImplementedError() -def _radius_flann(): +def _radius_flann(_X, _epsilon, _dist_type, order=0): raise NotImplementedError() class NNGraph(Graph): diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index afde1947..cdbad6ce 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -183,21 +183,30 @@ def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] backends = ['scipy-kdtree', 'scipy-pdist', 'flann'] + order=3 # for minkowski + for cur_backend in backends: - for dist_type in dist_types: - - # Only p-norms with 1<=p<=infinity permitted. + for dist_type in dist_types: if dist_type != 'minkowski': - graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, - dist_type=dist_type) - graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, - dist_type=dist_type) - - # Distance type unsupported in the C bindings, - # use the C++ bindings instead. - if dist_type != 'max_dist': - graphs.NNGraph(Xin, backend=cur_backend, NNtype='knn', + # curently radius only implemented with scipy kdtree + if cur_backend == 'scipy-kdtree': + graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, + dist_type=dist_type) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, dist_type=dist_type) + else: + # Only p-norms with 1<=p<=infinity permitted. + # flann only accepts integer orders + if cur_backend == 'scipy-kdtree': + graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, + dist_type=dist_type, order=order) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, + dist_type=dist_type, order=order) + def test_bunny(self): graphs.Bunny() From 38aebd0f77af6a5cf1ed733fb80e31d95b111434 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 13:55:46 +0100 Subject: [PATCH 005/118] - fix missing space after colon in dictionary - do not use underscores in functions args --- pygsp/graphs/nngraphs/nngraph.py | 36 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index d7b112fd..2d0fce2d 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -37,37 +37,37 @@ def _import_pfl(): -def _knn_sp_kdtree(_X, _num_neighbors, _dist_type, _order=0): - kdt = spatial.KDTree(_X) - D, NN = kdt.query(_X, k=(_num_neighbors + 1), - p=_dist_translation['scipy-kdtree'][_dist_type]) +def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): + kdt = spatial.KDTree(X) + D, NN = kdt.query(X, k=(num_neighbors + 1), + p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D -def _knn_flann(_X, _num_neighbors, _dist_type, _order): +def _knn_flann(X, num_neighbors, dist_type, order): pfl = _import_pfl() - pfl.set_distance_type(_dist_type, order=_order) + pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() # Default FLANN parameters (I tried changing the algorithm and # testing performance on huge matrices, but the default one # seems to work best). - NN, D = flann.nn(_X, _X, num_neighbors=(_num_neighbors + 1), + NN, D = flann.nn(X, X, num_neighbors=(num_neighbors + 1), algorithm='kdtree') return NN, D -def _radius_sp_kdtree(_X, _epsilon, _dist_type, order=0): - kdt = spatial.KDTree(_X) - D, NN = kdt.query(_X, k=None, distance_upper_bound=_epsilon, - p=_dist_translation['scipy-kdtree'][_dist_type]) +def _radius_sp_kdtree(X, epsilon, dist_type, order=0): + kdt = spatial.KDTree(X) + D, NN = kdt.query(X, k=None, distance_upper_bound=epsilon, + p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D -def _knn_sp_pdist(_X, _num_neighbors, _dist_type, _order): +def _knn_sp_pdist(X, num_neighbors, dist_type, _order): pd = spatial.distance.squareform( - spatial.distance.pdist(_X, - _dist_translation['scipy-pdist'][_dist_type], + spatial.distance.pdist(X, + _dist_translation['scipy-pdist'][dist_type], p=_order)) - pds = np.sort(pd)[:, 0:_num_neighbors+1] - pdi = pd.argsort()[:, 0:_num_neighbors+1] + pds = np.sort(pd)[:, 0:num_neighbors+1] + pdi = pd.argsort()[:, 0:num_neighbors+1] return pdi, pds def _radius_sp_pdist(_X, _epsilon, _dist_type, order=0): @@ -148,12 +148,12 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, self._nn_functions = { 'knn': { - 'scipy-kdtree':_knn_sp_kdtree, + 'scipy-kdtree': _knn_sp_kdtree, 'scipy-pdist': _knn_sp_pdist, 'flann': _knn_flann }, 'radius': { - 'scipy-kdtree':_radius_sp_kdtree, + 'scipy-kdtree': _radius_sp_kdtree, 'scipy-pdist': _radius_sp_pdist, 'flann': _radius_flann }, From 524c60fcde3a68b034dcdeaa85cef30256639059 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 14:09:03 +0100 Subject: [PATCH 006/118] fix (matlab) GSP url --- README.rst | 2 +- pygsp/tests/test_graphs.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 2d6c59d4..99d27364 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ The documentation is available on `Read the Docs `_ and development takes place on `GitHub `_. -(A `Matlab counterpart `_ exists.) +(A `Matlab counterpart `_ exists.) The PyGSP facilitates a wide variety of operations on graphs, like computing their Fourier basis, filtering or interpolating signals, plotting graphs, diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index cdbad6ce..fd66deda 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -186,7 +186,8 @@ def test_nngraph(self): order=3 # for minkowski for cur_backend in backends: - for dist_type in dist_types: + for dist_type in dist_types: + print("backend={} dist={}".format(cur_backend, dist_type)) if dist_type != 'minkowski': # curently radius only implemented with scipy kdtree if cur_backend == 'scipy-kdtree': From ae838148d584f8ae66baa0deb8fa764230d051de Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 15:05:45 +0100 Subject: [PATCH 007/118] throw exception when using FLANN + max_dist (produces incorrect results) --- pygsp/graphs/nngraphs/nngraph.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 2d0fce2d..827b2d1d 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -45,6 +45,10 @@ def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): def _knn_flann(X, num_neighbors, dist_type, order): pfl = _import_pfl() + # the combination FLANN + max_dist produces incorrect results + # do not allow it + if dist_type == 'max_dist': + raise ValueError('FLANN and max_dist is not supported') pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() From 62fc0ce26ddc46405fe229253559eb05a1838ac4 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 19 Mar 2018 15:21:43 +0100 Subject: [PATCH 008/118] update test case to fit FLANN & max_dist exception --- pygsp/tests/test_graphs.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index fd66deda..484f32ae 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -182,24 +182,16 @@ def test_set_coordinates(self): def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-pdist', 'flann'] - order=3 # for minkowski + backends = ['scipy-kdtree', 'scipy-pdist', 'flann'] + order=3 # for minkowski, FLANN only accepts integer orders for cur_backend in backends: for dist_type in dist_types: - print("backend={} dist={}".format(cur_backend, dist_type)) - if dist_type != 'minkowski': - # curently radius only implemented with scipy kdtree - if cur_backend == 'scipy-kdtree': - graphs.NNGraph(Xin, NNtype='radius', - backend=cur_backend, - dist_type=dist_type) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type) + if cur_backend == 'flann' and dist_type == 'max_dist': + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='knn', backend=cur_backend, + dist_type=dist_type) else: - # Only p-norms with 1<=p<=infinity permitted. - # flann only accepts integer orders if cur_backend == 'scipy-kdtree': graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, @@ -208,7 +200,6 @@ def test_nngraph(self): backend=cur_backend, dist_type=dist_type, order=order) - def test_bunny(self): graphs.Bunny() From 6f473fa4175e68d5eba4d1640559ea2fb55c9b13 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 07:44:42 +0100 Subject: [PATCH 009/118] implement nn graph using pdist using radius --- pygsp/graphs/nngraphs/nngraph.py | 35 ++++++++++++++++++++++++++------ pygsp/tests/test_graphs.py | 2 +- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 827b2d1d..d32f2cc0 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -65,20 +65,43 @@ def _radius_sp_kdtree(X, epsilon, dist_type, order=0): p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D -def _knn_sp_pdist(X, num_neighbors, dist_type, _order): +def _knn_sp_pdist(X, num_neighbors, dist_type, order): pd = spatial.distance.squareform( spatial.distance.pdist(X, _dist_translation['scipy-pdist'][dist_type], - p=_order)) + p=order)) pds = np.sort(pd)[:, 0:num_neighbors+1] pdi = pd.argsort()[:, 0:num_neighbors+1] return pdi, pds -def _radius_sp_pdist(_X, _epsilon, _dist_type, order=0): - raise NotImplementedError() +def _radius_sp_pdist(X, epsilon, dist_type, order): + N, dim = np.shape(X) + pd = spatial.distance.squareform( + spatial.distance.pdist(X, + _dist_translation['scipy-pdist'][dist_type], + p=order)) + pdf = pd < epsilon + D = [] + NN = [] + for k in range(N): + v = pd[k, pdf[k, :]] + # use the same conventions as in scipy.distance.kdtree + NN.append(v.argsort()) + D.append(np.sort(v)) + + return NN, D -def _radius_flann(_X, _epsilon, _dist_type, order=0): - raise NotImplementedError() +def _radius_flann(X, epsilon, dist_type, order=0): + pfl = _import_pfl() + # the combination FLANN + max_dist produces incorrect results + # do not allow it + if dist_type == 'max_dist': + raise ValueError('FLANN and max_dist is not supported') + pfl.set_distance_type(dist_type, order=order) + flann = pfl.FLANN() + flann.build_index(X) + + flann.delete_index() class NNGraph(Graph): r"""Nearest-neighbor graph from given point cloud. diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 484f32ae..bdeaf9d1 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -192,7 +192,7 @@ def test_nngraph(self): NNtype='knn', backend=cur_backend, dist_type=dist_type) else: - if cur_backend == 'scipy-kdtree': + if cur_backend != 'flann': graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, dist_type=dist_type, order=order) From 25ec6d23f20875c9f148d650a5ce4d37641d7aa8 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 09:09:02 +0100 Subject: [PATCH 010/118] implement radius nn graph with flann --- pygsp/graphs/nngraphs/nngraph.py | 36 ++++++++++++++++++++------------ pygsp/tests/test_graphs.py | 7 +++---- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index d32f2cc0..d65a92b9 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -3,7 +3,8 @@ import traceback import numpy as np -from scipy import sparse, spatial +from scipy import sparse +import scipy.spatial as sps from pygsp import utils from pygsp.graphs import Graph # prevent circular import in Python < 3.5 @@ -38,17 +39,17 @@ def _import_pfl(): def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): - kdt = spatial.KDTree(X) + kdt = sps.KDTree(X) D, NN = kdt.query(X, k=(num_neighbors + 1), p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D def _knn_flann(X, num_neighbors, dist_type, order): - pfl = _import_pfl() # the combination FLANN + max_dist produces incorrect results # do not allow it if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') + pfl = _import_pfl() pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() @@ -60,26 +61,26 @@ def _knn_flann(X, num_neighbors, dist_type, order): return NN, D def _radius_sp_kdtree(X, epsilon, dist_type, order=0): - kdt = spatial.KDTree(X) + kdt = sps.KDTree(X) D, NN = kdt.query(X, k=None, distance_upper_bound=epsilon, p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D def _knn_sp_pdist(X, num_neighbors, dist_type, order): - pd = spatial.distance.squareform( - spatial.distance.pdist(X, - _dist_translation['scipy-pdist'][dist_type], - p=order)) + pd = sps.distance.squareform( + sps.distance.pdist(X, + metric=_dist_translation['scipy-pdist'][dist_type], + p=order)) pds = np.sort(pd)[:, 0:num_neighbors+1] pdi = pd.argsort()[:, 0:num_neighbors+1] return pdi, pds def _radius_sp_pdist(X, epsilon, dist_type, order): N, dim = np.shape(X) - pd = spatial.distance.squareform( - spatial.distance.pdist(X, - _dist_translation['scipy-pdist'][dist_type], - p=order)) + pd = sps.distance.squareform( + sps.distance.pdist(X, + metric=_dist_translation['scipy-pdist'][dist_type], + p=order)) pdf = pd < epsilon D = [] NN = [] @@ -92,16 +93,25 @@ def _radius_sp_pdist(X, epsilon, dist_type, order): return NN, D def _radius_flann(X, epsilon, dist_type, order=0): - pfl = _import_pfl() + N, dim = np.shape(X) # the combination FLANN + max_dist produces incorrect results # do not allow it if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') + + pfl = _import_pfl() pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() flann.build_index(X) + D = [] + NN = [] + for k in range(N): + nn, d = flann.nn_radius(X[k, :], epsilon) + D.append(d) + NN.append(nn) flann.delete_index() + return NN, D class NNGraph(Graph): r"""Nearest-neighbor graph from given point cloud. diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index bdeaf9d1..6e70347c 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -192,10 +192,9 @@ def test_nngraph(self): NNtype='knn', backend=cur_backend, dist_type=dist_type) else: - if cur_backend != 'flann': - graphs.NNGraph(Xin, NNtype='radius', - backend=cur_backend, - dist_type=dist_type, order=order) + graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, + dist_type=dist_type, order=order) graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type, order=order) From 96b628e2bf86ed31c2c753d73d80d3d5e10243aa Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 10:06:03 +0100 Subject: [PATCH 011/118] flann returns the squared distance when called with 'euclidean' distance -> fix fix radius nn graph for spdist --- pygsp/graphs/nngraphs/nngraph.py | 29 ++++++++++++++++++++--------- pygsp/tests/test_graphs.py | 15 +++++++++++++++ 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index d65a92b9..175460f0 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -49,6 +49,7 @@ def _knn_flann(X, num_neighbors, dist_type, order): # do not allow it if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') + pfl = _import_pfl() pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() @@ -58,6 +59,8 @@ def _knn_flann(X, num_neighbors, dist_type, order): # seems to work best). NN, D = flann.nn(X, X, num_neighbors=(num_neighbors + 1), algorithm='kdtree') + if dist_type == 'euclidean': # flann returns squared distances + return NN, np.sqrt(D) return NN, D def _radius_sp_kdtree(X, epsilon, dist_type, order=0): @@ -86,8 +89,9 @@ def _radius_sp_pdist(X, epsilon, dist_type, order): NN = [] for k in range(N): v = pd[k, pdf[k, :]] + d = pd[k, :].argsort() # use the same conventions as in scipy.distance.kdtree - NN.append(v.argsort()) + NN.append(d[0:len(v)]) D.append(np.sort(v)) return NN, D @@ -98,8 +102,8 @@ def _radius_flann(X, epsilon, dist_type, order=0): # do not allow it if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') - pfl = _import_pfl() + pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() flann.build_index(X) @@ -107,12 +111,23 @@ def _radius_flann(X, epsilon, dist_type, order=0): D = [] NN = [] for k in range(N): - nn, d = flann.nn_radius(X[k, :], epsilon) + nn, d = flann.nn_radius(X[k, :], epsilon*epsilon) D.append(d) NN.append(nn) flann.delete_index() + if dist_type == 'euclidean': # flann returns squared distances + return NN, np.sqrt(D) return NN, D +def center_input(X, N): + return X - np.kron(np.ones((N, 1)), np.mean(X, axis=0)) + +def rescale_input(X, N, d): + bounding_radius = 0.5 * np.linalg.norm(np.amax(X, axis=0) - + np.amin(X, axis=0), 2) + scale = np.power(N, 1. / float(min(d, 3))) / 10. + return X * scale / bounding_radius + class NNGraph(Graph): r"""Nearest-neighbor graph from given point cloud. @@ -207,14 +222,10 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, Xout = self.Xin if self.center: - Xout = self.Xin - np.kron(np.ones((N, 1)), - np.mean(self.Xin, axis=0)) + Xout = center_input(Xout, N) if self.rescale: - bounding_radius = 0.5 * np.linalg.norm(np.amax(Xout, axis=0) - - np.amin(Xout, axis=0), 2) - scale = np.power(N, 1. / float(min(d, 3))) / 10. - Xout *= scale / bounding_radius + Xout = rescale_input(Xout, N, d) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 6e70347c..c7853839 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -187,6 +187,7 @@ def test_nngraph(self): for cur_backend in backends: for dist_type in dist_types: + #print("backend={} dist={}".format(cur_backend, dist_type)) if cur_backend == 'flann' and dist_type == 'max_dist': self.assertRaises(ValueError, graphs.NNGraph, Xin, NNtype='knn', backend=cur_backend, @@ -199,6 +200,20 @@ def test_nngraph(self): backend=cur_backend, dist_type=dist_type, order=order) + def test_nngraph_consistency(self): + #Xin = np.arange(180).reshape(60, 3) + Xin = np.random.uniform(-5, 5, (60, 3)) + dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] + backends = ['scipy-kdtree', 'flann'] + num_neighbors=5 + + G = graphs.NNGraph(Xin, NNtype='knn', + backend='scipy-pdist', k=num_neighbors) + for cur_backend in backends: + for dist_type in dist_types: + Gt = graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, k=num_neighbors) + def test_bunny(self): graphs.Bunny() From 09bbff42c295829d14e05e7dce07db5df33d6d81 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 15:33:04 +0100 Subject: [PATCH 012/118] compute sqrt of list properly --- pygsp/graphs/nngraphs/nngraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 175460f0..085b7efc 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -116,7 +116,7 @@ def _radius_flann(X, epsilon, dist_type, order=0): NN.append(nn) flann.delete_index() if dist_type == 'euclidean': # flann returns squared distances - return NN, np.sqrt(D) + return NN, list(map(np.sqrt, D)) return NN, D def center_input(X, N): From 27b9a038d16f129fb63a1e3344fe2fb81e8936bf Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 15:14:08 +0100 Subject: [PATCH 013/118] use cyflann instead of pyflann (radius search not working) --- pygsp/graphs/nngraphs/nngraph.py | 43 +++++++++++++++++--------------- setup.py | 4 +-- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 085b7efc..979319e4 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -27,14 +27,14 @@ } -def _import_pfl(): +def _import_cfl(): try: - import pyflann as pfl + import cyflann as cfl except Exception: - raise ImportError('Cannot import pyflann. Choose another nearest ' + raise ImportError('Cannot import cyflann. Choose another nearest ' 'neighbors method or try to install it with ' - 'pip (or conda) install pyflann (or pyflann3).') - return pfl + 'pip (or conda) install cyflann.') + return cfl @@ -50,15 +50,15 @@ def _knn_flann(X, num_neighbors, dist_type, order): if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') - pfl = _import_pfl() - pfl.set_distance_type(dist_type, order=order) - flann = pfl.FLANN() - + cfl = _import_cfl() + cfl.set_distance_type(dist_type, order=order) + c = cfl.FLANNIndex(algorithm='kdtree') + c.build_index(X) # Default FLANN parameters (I tried changing the algorithm and # testing performance on huge matrices, but the default one # seems to work best). - NN, D = flann.nn(X, X, num_neighbors=(num_neighbors + 1), - algorithm='kdtree') + NN, D = c.nn_index(X, num_neighbors + 1) + c.free_index() if dist_type == 'euclidean': # flann returns squared distances return NN, np.sqrt(D) return NN, D @@ -102,19 +102,18 @@ def _radius_flann(X, epsilon, dist_type, order=0): # do not allow it if dist_type == 'max_dist': raise ValueError('FLANN and max_dist is not supported') - pfl = _import_pfl() - - pfl.set_distance_type(dist_type, order=order) - flann = pfl.FLANN() - flann.build_index(X) + cfl = _import_cfl() + cfl.set_distance_type(dist_type, order=order) + c = cfl.FLANNIndex(algorithm='kdtree') + c.build_index(X) D = [] NN = [] for k in range(N): - nn, d = flann.nn_radius(X[k, :], epsilon*epsilon) + nn, d = c.nn_radius(X[k, :], epsilon*epsilon) D.append(d) NN.append(nn) - flann.delete_index() + c.free_index() if dist_type == 'euclidean': # flann returns squared distances return NN, list(map(np.sqrt, D)) return NN, D @@ -228,7 +227,13 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, Xout = rescale_input(Xout, N, d) + if self._nn_functions.get(NNtype) == None: + raise ValueError('Invalid NNtype {}'.format(self.NNtype)) + if self._nn_functions[NNtype].get(backend) == None: + raise ValueError('Invalid backend {} for type {}'.format(backend, + self.NNtype)) + if self.NNtype == 'knn': spi = np.zeros((N * k)) spj = np.zeros((N * k)) @@ -262,8 +267,6 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, float(self.sigma)) start = start + leng - else: - raise ValueError('Unknown NNtype {}'.format(self.NNtype)) W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N)) diff --git a/setup.py b/setup.py index bcf0ccd9..b247f3a2 100644 --- a/setup.py +++ b/setup.py @@ -30,8 +30,8 @@ # Construct patch graphs from images. 'scikit-image', # Approximate nearest neighbors for kNN graphs. - 'pyflann; python_version == "2.*"', - 'pyflann3; python_version == "3.*"', + 'flann', + 'cyflann', # Convex optimization on graph. 'pyunlocbox', # Plot graphs, signals, and filters. From 8a1f9b907b45a4cbad2b8ba48cddc535f7f82981 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 15:14:39 +0100 Subject: [PATCH 014/118] check nn graphs building against pdist reference --- pygsp/tests/test_graphs.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index c7853839..48277e3f 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -9,6 +9,7 @@ import numpy as np import scipy.linalg +import scipy.sparse.linalg from skimage import data, img_as_float from pygsp import graphs @@ -199,20 +200,45 @@ def test_nngraph(self): graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type, order=order) + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='badtype', backend=cur_backend, + dist_type=dist_type) + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='knn', backend='badtype', + dist_type=dist_type) def test_nngraph_consistency(self): - #Xin = np.arange(180).reshape(60, 3) Xin = np.random.uniform(-5, 5, (60, 3)) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] backends = ['scipy-kdtree', 'flann'] - num_neighbors=5 + num_neighbors=4 + epsilon=0.1 + # use pdist as ground truth G = graphs.NNGraph(Xin, NNtype='knn', backend='scipy-pdist', k=num_neighbors) - for cur_backend in backends: + for cur_backend in backends: for dist_type in dist_types: + if cur_backend == 'flann' and dist_type == 'max_dist': + continue + #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, k=num_neighbors) + d = scipy.sparse.linalg.norm(G.W - Gt.W) + self.assertTrue(d < 0.01, 'Graphs (knn) are not identical error='.format(d)) + + G = graphs.NNGraph(Xin, NNtype='radius', + backend='scipy-pdist', epsilon=epsilon) + for cur_backend in backends: + for dist_type in dist_types: + if cur_backend == 'flann' and dist_type == 'max_dist': + continue + #print("backend={} dist={}".format(cur_backend, dist_type)) + Gt = graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, epsilon=epsilon) + d = scipy.sparse.linalg.norm(G.W - Gt.W, ord=1) + self.assertTrue(d < 0.01, + 'Graphs (radius) are not identical error='.format(d)) def test_bunny(self): graphs.Bunny() From 6e9e2ac856a2fcafa36c2fc2fd12b8b6f1055fa2 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 15:36:49 +0100 Subject: [PATCH 015/118] cyflann needs the flann library to be installed on the system try to install via before_install --- .travis.yml | 4 ++++ setup.py | 5 ++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 835c6d85..d8f45358 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,10 @@ python: - 3.5 - 3.6 +before_install: + - sudo apt-get -qq update + - sudo apt-get install -y libflann-dev + addons: apt: packages: diff --git a/setup.py b/setup.py index b247f3a2..a9f92004 100644 --- a/setup.py +++ b/setup.py @@ -30,8 +30,7 @@ # Construct patch graphs from images. 'scikit-image', # Approximate nearest neighbors for kNN graphs. - 'flann', - 'cyflann', + 'cyflann', # Convex optimization on graph. 'pyunlocbox', # Plot graphs, signals, and filters. @@ -60,7 +59,7 @@ # Dependencies to build and upload packages. 'pkg': [ 'wheel', - 'twine', + 'twine' ], }, license="BSD", From 811de06e5adc694e8ba496bbc59e7d874b9b9695 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 15:14:39 +0100 Subject: [PATCH 016/118] check nn graphs building against pdist reference --- pygsp/tests/test_graphs.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index c7853839..48277e3f 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -9,6 +9,7 @@ import numpy as np import scipy.linalg +import scipy.sparse.linalg from skimage import data, img_as_float from pygsp import graphs @@ -199,20 +200,45 @@ def test_nngraph(self): graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type, order=order) + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='badtype', backend=cur_backend, + dist_type=dist_type) + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='knn', backend='badtype', + dist_type=dist_type) def test_nngraph_consistency(self): - #Xin = np.arange(180).reshape(60, 3) Xin = np.random.uniform(-5, 5, (60, 3)) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] backends = ['scipy-kdtree', 'flann'] - num_neighbors=5 + num_neighbors=4 + epsilon=0.1 + # use pdist as ground truth G = graphs.NNGraph(Xin, NNtype='knn', backend='scipy-pdist', k=num_neighbors) - for cur_backend in backends: + for cur_backend in backends: for dist_type in dist_types: + if cur_backend == 'flann' and dist_type == 'max_dist': + continue + #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, k=num_neighbors) + d = scipy.sparse.linalg.norm(G.W - Gt.W) + self.assertTrue(d < 0.01, 'Graphs (knn) are not identical error='.format(d)) + + G = graphs.NNGraph(Xin, NNtype='radius', + backend='scipy-pdist', epsilon=epsilon) + for cur_backend in backends: + for dist_type in dist_types: + if cur_backend == 'flann' and dist_type == 'max_dist': + continue + #print("backend={} dist={}".format(cur_backend, dist_type)) + Gt = graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, epsilon=epsilon) + d = scipy.sparse.linalg.norm(G.W - Gt.W, ord=1) + self.assertTrue(d < 0.01, + 'Graphs (radius) are not identical error='.format(d)) def test_bunny(self): graphs.Bunny() From 813fe3988c89d693e685d61ce5d69fbd8cd6070a Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 16:18:33 +0100 Subject: [PATCH 017/118] backport stuff from cyflann branch --- pygsp/graphs/nngraphs/nngraph.py | 7 +++++-- pygsp/tests/test_graphs.py | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 085b7efc..747c8542 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -228,7 +228,12 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, Xout = rescale_input(Xout, N, d) + if self._nn_functions.get(NNtype) == None: + raise ValueError('Invalid NNtype {}'.format(self.NNtype)) + if self._nn_functions[NNtype].get(backend) == None: + raise ValueError('Invalid backend {} for type {}'.format(backend, + self.NNtype)) if self.NNtype == 'knn': spi = np.zeros((N * k)) spj = np.zeros((N * k)) @@ -262,8 +267,6 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, float(self.sigma)) start = start + leng - else: - raise ValueError('Unknown NNtype {}'.format(self.NNtype)) W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N)) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 48277e3f..b2ddf18b 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -219,26 +219,26 @@ def test_nngraph_consistency(self): backend='scipy-pdist', k=num_neighbors) for cur_backend in backends: for dist_type in dist_types: - if cur_backend == 'flann' and dist_type == 'max_dist': + if cur_backend == 'flann': # skip flann for now continue #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, k=num_neighbors) d = scipy.sparse.linalg.norm(G.W - Gt.W) - self.assertTrue(d < 0.01, 'Graphs (knn) are not identical error='.format(d)) + self.assertTrue(d < 0.01, "Graphs (knn) are not identical") G = graphs.NNGraph(Xin, NNtype='radius', backend='scipy-pdist', epsilon=epsilon) for cur_backend in backends: for dist_type in dist_types: - if cur_backend == 'flann' and dist_type == 'max_dist': + if cur_backend == 'flann': # skip flann for now continue #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, epsilon=epsilon) d = scipy.sparse.linalg.norm(G.W - Gt.W, ord=1) self.assertTrue(d < 0.01, - 'Graphs (radius) are not identical error='.format(d)) + "Graphs (radius) are not identical") def test_bunny(self): graphs.Bunny() From 4a4d59796aa1c8c3f0a040122bf8659119690fa2 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Tue, 20 Mar 2018 16:24:33 +0100 Subject: [PATCH 018/118] flann should (mostly) work for knn graphs --- pygsp/tests/test_graphs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index b2ddf18b..8d1a008d 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -219,7 +219,7 @@ def test_nngraph_consistency(self): backend='scipy-pdist', k=num_neighbors) for cur_backend in backends: for dist_type in dist_types: - if cur_backend == 'flann': # skip flann for now + if cur_backend == 'flann' and dist_type == 'max_dist': continue #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='knn', From 53dffc12670c6664fddca317c76a8a2199e1e992 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Wed, 21 Mar 2018 08:18:49 +0100 Subject: [PATCH 019/118] fix pdist warnings --- pygsp/graphs/nngraphs/nngraph.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 979319e4..c2357468 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -5,6 +5,7 @@ import numpy as np from scipy import sparse import scipy.spatial as sps +import scipy.spatial.distance as spsd from pygsp import utils from pygsp.graphs import Graph # prevent circular import in Python < 3.5 @@ -70,20 +71,24 @@ def _radius_sp_kdtree(X, epsilon, dist_type, order=0): return NN, D def _knn_sp_pdist(X, num_neighbors, dist_type, order): - pd = sps.distance.squareform( - sps.distance.pdist(X, - metric=_dist_translation['scipy-pdist'][dist_type], - p=order)) + if dist_type == 'minkowski': + p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type], + p=order) + else: + p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type]) + pd = spsd.squareform(p) pds = np.sort(pd)[:, 0:num_neighbors+1] pdi = pd.argsort()[:, 0:num_neighbors+1] return pdi, pds def _radius_sp_pdist(X, epsilon, dist_type, order): N, dim = np.shape(X) - pd = sps.distance.squareform( - sps.distance.pdist(X, - metric=_dist_translation['scipy-pdist'][dist_type], - p=order)) + if dist_type == 'minkowski': + p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type], + p=order) + else: + p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type]) + pd = spsd.squareform(p) pdf = pd < epsilon D = [] NN = [] From 1309e92b963f5177822fb1e5406b2bc67f812272 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Wed, 21 Mar 2018 12:28:35 +0100 Subject: [PATCH 020/118] implement and use scipy-ckdtree as default (faster than kdtree) --- pygsp/graphs/nngraphs/nngraph.py | 40 +++++++++++++++++++++++++++++--- pygsp/tests/test_graphs.py | 13 ++++++----- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 747c8542..faa4d9f5 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -18,6 +18,11 @@ 'manhattan': 1, 'max_dist': np.inf }, + 'scipy-ckdtree': { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf + }, 'scipy-pdist' : { 'euclidean': 'euclidean', 'manhattan': 'cityblock', @@ -44,6 +49,13 @@ def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D +def _knn_sp_ckdtree(X, num_neighbors, dist_type, order=0): + kdt = sps.cKDTree(X) + D, NN = kdt.query(X, k=(num_neighbors + 1), + p=_dist_translation['scipy-ckdtree'][dist_type]) + return NN, D + + def _knn_flann(X, num_neighbors, dist_type, order): # the combination FLANN + max_dist produces incorrect results # do not allow it @@ -66,9 +78,27 @@ def _knn_flann(X, num_neighbors, dist_type, order): def _radius_sp_kdtree(X, epsilon, dist_type, order=0): kdt = sps.KDTree(X) D, NN = kdt.query(X, k=None, distance_upper_bound=epsilon, - p=_dist_translation['scipy-kdtree'][dist_type]) + p=_dist_translation['scipy-kdtree'][dist_type]) return NN, D +def _radius_sp_ckdtree(X, epsilon, dist_type, order=0): + N, dim = np.shape(X) + kdt = sps.cKDTree(X) + nn = kdt.query_ball_point(X, r=epsilon, + p=_dist_translation['scipy-ckdtree'][dist_type]) + D = [] + NN = [] + for k in range(N): + x = np.matlib.repmat(X[k, :], len(nn[k]), 1) + d = np.linalg.norm(x - X[nn[k], :], + ord=_dist_translation['scipy-ckdtree'][dist_type], + axis=1) + nidx = d.argsort() + NN.append(np.take(nn[k], nidx)) + D.append(np.sort(d)) + return NN, D + + def _knn_sp_pdist(X, num_neighbors, dist_type, order): pd = sps.distance.squareform( sps.distance.pdist(X, @@ -142,7 +172,8 @@ class NNGraph(Graph): is 'knn'). backend : {'scipy-kdtree', 'scipy-pdist', 'flann'} Type of the backend for graph construction. - - 'scipy-kdtree'(default) will use scipy.spatial.KDTree + - 'scipy-kdtree' will use scipy.spatial.KDTree + - 'scipy-ckdtree'(default) will use scipy.spatial.cKDTree - 'scipy-pdist' will use scipy.spatial.distance.pdist (slowest but exact) - 'flann' use Fast Library for Approximate Nearest Neighbors (FLANN) center : bool, optional @@ -183,7 +214,7 @@ class NNGraph(Graph): """ - def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, + def __init__(self, Xin, NNtype='knn', backend='scipy-ckdtree', center=True, rescale=True, k=10, sigma=0.1, epsilon=0.01, gtype=None, plotting={}, symmetrize_type='average', dist_type='euclidean', order=0, **kwargs): @@ -197,15 +228,18 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-kdtree', center=True, self.sigma = sigma self.epsilon = epsilon _dist_translation['scipy-kdtree']['minkowski'] = order + _dist_translation['scipy-ckdtree']['minkowski'] = order self._nn_functions = { 'knn': { 'scipy-kdtree': _knn_sp_kdtree, + 'scipy-ckdtree': _knn_sp_ckdtree, 'scipy-pdist': _knn_sp_pdist, 'flann': _knn_flann }, 'radius': { 'scipy-kdtree': _radius_sp_kdtree, + 'scipy-ckdtree': _radius_sp_ckdtree, 'scipy-pdist': _radius_sp_pdist, 'flann': _radius_flann }, diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 8d1a008d..e8ac8f99 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -183,7 +183,7 @@ def test_set_coordinates(self): def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-pdist', 'flann'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'flann'] order=3 # for minkowski, FLANN only accepts integer orders for cur_backend in backends: @@ -194,9 +194,10 @@ def test_nngraph(self): NNtype='knn', backend=cur_backend, dist_type=dist_type) else: - graphs.NNGraph(Xin, NNtype='radius', - backend=cur_backend, - dist_type=dist_type, order=order) + if cur_backend != 'flann': #pyflann fails on radius query + graphs.NNGraph(Xin, NNtype='radius', + backend=cur_backend, + dist_type=dist_type, order=order) graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type, order=order) @@ -208,9 +209,9 @@ def test_nngraph(self): dist_type=dist_type) def test_nngraph_consistency(self): - Xin = np.random.uniform(-5, 5, (60, 3)) + Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'flann'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann'] num_neighbors=4 epsilon=0.1 From 648fa9130007f8e005808be3d11a92370c0f4410 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Thu, 22 Mar 2018 11:20:40 +0100 Subject: [PATCH 021/118] backport README changes from master --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 99d27364..387daa92 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ The documentation is available on `Read the Docs `_ and development takes place on `GitHub `_. -(A `Matlab counterpart `_ exists.) +(A (mostly unmaintained) `Matlab version `_ exists.) The PyGSP facilitates a wide variety of operations on graphs, like computing their Fourier basis, filtering or interpolating signals, plotting graphs, From 8e7c553249f4b762078ceb6ae593d998ccbf3b4f Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Fri, 23 Mar 2018 10:00:13 +0100 Subject: [PATCH 022/118] add nmslib --- pygsp/graphs/nngraphs/nngraph.py | 39 ++++++++++++++++++++++++++++---- pygsp/tests/test_graphs.py | 24 ++++++++++++++------ setup.py | 4 +++- 3 files changed, 55 insertions(+), 12 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index d8d85b34..b58c8fc5 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -30,7 +30,12 @@ 'max_dist': 'chebyshev', 'minkowski': 'minkowski' }, - + 'nmslib' : { + 'euclidean': 'l2', + 'manhattan': 'l1', + 'max_dist': 'linf', + 'minkowski': 'lp' + } } def _import_cfl(): @@ -42,7 +47,14 @@ def _import_cfl(): 'pip (or conda) install cyflann.') return cfl - +def _import_nmslib(): + try: + import nmslib as nms + except Exception: + raise ImportError('Cannot import nmslib. Choose another nearest ' + 'neighbors method or try to install it with ' + 'pip (or conda) install nmslib.') + return nms def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): kdt = sps.KDTree(X) @@ -111,6 +123,20 @@ def _knn_sp_pdist(X, num_neighbors, dist_type, order): pdi = pd.argsort()[:, 0:num_neighbors+1] return pdi, pds +def _knn_nmslib(X, num_neighbors, dist_type, order): + N, dim = np.shape(X) + if dist_type == 'minkowski': + raise ValueError('unsupported distance type (lp) for nmslib') + nms = _import_nmslib() + nmsidx = nms.init(space=_dist_translation['nmslib'][dist_type]) + nmsidx.addDataPointBatch(X) + nmsidx.createIndex() + q = nmsidx.knnQueryBatch(X, k=num_neighbors+1) + nn, d = zip(*q) + D = np.concatenate(d).reshape(N, num_neighbors+1) + NN = np.concatenate(nn).reshape(N, num_neighbors+1) + return NN, D + def _radius_sp_pdist(X, epsilon, dist_type, order): N, dim = np.shape(X) if dist_type == 'minkowski': @@ -153,6 +179,9 @@ def _radius_flann(X, epsilon, dist_type, order=0): return NN, list(map(np.sqrt, D)) return NN, D +def _radius_nmslib(X, epsilon, dist_type, order=0): + raise ValueError('nmslib does not support (yet?) range queries') + def center_input(X, N): return X - np.kron(np.ones((N, 1)), np.mean(X, axis=0)) @@ -239,13 +268,15 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-ckdtree', center=True, 'scipy-kdtree': _knn_sp_kdtree, 'scipy-ckdtree': _knn_sp_ckdtree, 'scipy-pdist': _knn_sp_pdist, - 'flann': _knn_flann + 'flann': _knn_flann, + 'nmslib': _knn_nmslib }, 'radius': { 'scipy-kdtree': _radius_sp_kdtree, 'scipy-ckdtree': _radius_sp_ckdtree, 'scipy-pdist': _radius_sp_pdist, - 'flann': _radius_flann + 'flann': _radius_flann, + 'nmslib': _radius_nmslib }, } diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 8613d3b1..25ae17b4 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -183,24 +183,30 @@ def test_set_coordinates(self): def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'flann'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib'] order=3 # for minkowski, FLANN only accepts integer orders for cur_backend in backends: for dist_type in dist_types: #print("backend={} dist={}".format(cur_backend, dist_type)) - if cur_backend == 'flann' and dist_type == 'max_dist': + if (cur_backend == 'flann' and + dist_type == 'max_dist') or (cur_backend == 'nmslib' and + dist_type == 'minkowski'): self.assertRaises(ValueError, graphs.NNGraph, Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type) else: - if cur_backend != 'flann': #pyflann fails on radius query + if cur_backend == 'nmslib': + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='radius', backend=cur_backend, + dist_type=dist_type, order=order) + else: graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, dist_type=dist_type, order=order) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type, order=order) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, + dist_type=dist_type, order=order) self.assertRaises(ValueError, graphs.NNGraph, Xin, NNtype='badtype', backend=cur_backend, dist_type=dist_type) @@ -211,7 +217,7 @@ def test_nngraph(self): def test_nngraph_consistency(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'nmslib']#, 'flann'] num_neighbors=4 epsilon=0.1 @@ -222,6 +228,8 @@ def test_nngraph_consistency(self): for dist_type in dist_types: if cur_backend == 'flann' and dist_type == 'max_dist': continue + if cur_backend == 'nmslib' and dist_type == 'minkowski': + continue #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, k=num_neighbors) @@ -234,6 +242,8 @@ def test_nngraph_consistency(self): for dist_type in dist_types: if cur_backend == 'flann' and dist_type == 'max_dist': continue + if cur_backend == 'nmslib': #unsupported + continue #print("backend={} dist={}".format(cur_backend, dist_type)) Gt = graphs.NNGraph(Xin, NNtype='radius', backend=cur_backend, epsilon=epsilon) diff --git a/setup.py b/setup.py index a9f92004..2979ac83 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,9 @@ # Construct patch graphs from images. 'scikit-image', # Approximate nearest neighbors for kNN graphs. - 'cyflann', + 'cyflann', + 'pybind11', + 'nmslib', # Convex optimization on graph. 'pyunlocbox', # Plot graphs, signals, and filters. From b83e4672e801bf88ca0de6468b4e9b33a1995e2b Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 26 Mar 2018 10:12:43 +0200 Subject: [PATCH 023/118] test flann when not on windows --- pygsp/tests/test_graphs.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 25ae17b4..14277ed4 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -6,7 +6,7 @@ """ import unittest - +import os import numpy as np import scipy.linalg import scipy.sparse.linalg @@ -183,7 +183,9 @@ def test_set_coordinates(self): def test_nngraph(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib'] + if os.name != 'nt': + backends.append('flann') order=3 # for minkowski, FLANN only accepts integer orders for cur_backend in backends: @@ -217,7 +219,9 @@ def test_nngraph(self): def test_nngraph_consistency(self): Xin = np.arange(90).reshape(30, 3) dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'nmslib']#, 'flann'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'nmslib'] + if os.name != 'nt': + backends.append('flann') num_neighbors=4 epsilon=0.1 From 28b78589c6dbd9d6149c2c8dfeba39a794282690 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Thu, 29 Mar 2018 10:56:41 +0200 Subject: [PATCH 024/118] use the same code to build sparse matrix for knn and radius --- pygsp/graphs/nngraphs/nngraph.py | 41 ++++++++++++-------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index d8d85b34..b35b6d38 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -273,37 +273,26 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-ckdtree', center=True, raise ValueError('Invalid backend {} for type {}'.format(backend, self.NNtype)) if self.NNtype == 'knn': - spi = np.zeros((N * k)) - spj = np.zeros((N * k)) - spv = np.zeros((N * k)) - NN, D = self._nn_functions[NNtype][backend](Xout, k, dist_type, order) - - for i in range(N): - spi[i * k:(i + 1) * k] = np.kron(np.ones((k)), i) - spj[i * k:(i + 1) * k] = NN[i, 1:] - spv[i * k:(i + 1) * k] = np.exp(-np.power(D[i, 1:], 2) / - float(self.sigma)) - + elif self.NNtype == 'radius': - NN, D = self._nn_functions[NNtype][backend](Xout, epsilon, dist_type, order) - count = sum(map(len, NN)) - - spi = np.zeros((count)) - spj = np.zeros((count)) - spv = np.zeros((count)) - - start = 0 - for i in range(N): - leng = len(NN[i]) - 1 - spi[start:start + leng] = np.kron(np.ones((leng)), i) - spj[start:start + leng] = NN[i][1:] - spv[start:start + leng] = np.exp(-np.power(D[i][1:], 2) / - float(self.sigma)) - start = start + leng + countV = list(map(len, NN)) + count = sum(countV) + spi = np.zeros((count)) + spj = np.zeros((count)) + spv = np.zeros((count)) + + start = 0 + for i in range(N): + leng = countV[i] - 1 + spi[start:start + leng] = np.kron(np.ones((leng)), i) + spj[start:start + leng] = NN[i][1:] + spv[start:start + leng] = np.exp(-np.power(D[i][1:], 2) / + float(self.sigma)) + start = start + leng W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N)) From 188c4a61ffe93fa0647030758c31ebfe21a2f163 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Thu, 29 Mar 2018 13:44:10 +0200 Subject: [PATCH 025/118] building the graph with rescale/center=False should also work --- pygsp/tests/test_graphs.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 14277ed4..a48e6034 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -197,6 +197,9 @@ def test_nngraph(self): self.assertRaises(ValueError, graphs.NNGraph, Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type) + self.assertRaises(ValueError, graphs.NNGraph, Xin, + NNtype='radius', backend=cur_backend, + dist_type=dist_type) else: if cur_backend == 'nmslib': self.assertRaises(ValueError, graphs.NNGraph, Xin, @@ -209,6 +212,18 @@ def test_nngraph(self): graphs.NNGraph(Xin, NNtype='knn', backend=cur_backend, dist_type=dist_type, order=order) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, + dist_type=dist_type, order=order, + center=False) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, + dist_type=dist_type, order=order, + rescale=False) + graphs.NNGraph(Xin, NNtype='knn', + backend=cur_backend, + dist_type=dist_type, order=order, + rescale=False, center=False) self.assertRaises(ValueError, graphs.NNGraph, Xin, NNtype='badtype', backend=cur_backend, dist_type=dist_type) From 8e98b77b0f8d26f83ef10064e454a14811796539 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Thu, 29 Mar 2018 14:58:25 +0200 Subject: [PATCH 026/118] update doc for nmslib --- pygsp/graphs/nngraphs/nngraph.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index a5f766e8..0ba86c92 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -209,6 +209,7 @@ class NNGraph(Graph): - 'scipy-ckdtree'(default) will use scipy.spatial.cKDTree - 'scipy-pdist' will use scipy.spatial.distance.pdist (slowest but exact) - 'flann' use Fast Library for Approximate Nearest Neighbors (FLANN) + - 'nmslib' use nmslib for approximate nearest neighbors (faster in high-dimensional spaces) center : bool, optional Center the data so that it has zero mean (default is True) rescale : bool, optional From 08ae29fae719af6e551b531c381d52cf747e8ea0 Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Mon, 9 Apr 2018 09:46:45 +0200 Subject: [PATCH 027/118] enable multithreading with ckdtree/nmslib --- pygsp/graphs/nngraphs/nngraph.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 0ba86c92..5d564089 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -6,6 +6,7 @@ from scipy import sparse import scipy.spatial as sps import scipy.spatial.distance as spsd +import multiprocessing from pygsp import utils from pygsp.graphs import Graph # prevent circular import in Python < 3.5 @@ -65,7 +66,8 @@ def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): def _knn_sp_ckdtree(X, num_neighbors, dist_type, order=0): kdt = sps.cKDTree(X) D, NN = kdt.query(X, k=(num_neighbors + 1), - p=_dist_translation['scipy-ckdtree'][dist_type]) + p=_dist_translation['scipy-ckdtree'][dist_type], + n_jobs=-1) return NN, D @@ -98,7 +100,8 @@ def _radius_sp_ckdtree(X, epsilon, dist_type, order=0): N, dim = np.shape(X) kdt = sps.cKDTree(X) nn = kdt.query_ball_point(X, r=epsilon, - p=_dist_translation['scipy-ckdtree'][dist_type]) + p=_dist_translation['scipy-ckdtree'][dist_type], + n_jobs=-1) D = [] NN = [] for k in range(N): @@ -125,13 +128,14 @@ def _knn_sp_pdist(X, num_neighbors, dist_type, order): def _knn_nmslib(X, num_neighbors, dist_type, order): N, dim = np.shape(X) + ncpu = multiprocessing.cpu_count() if dist_type == 'minkowski': raise ValueError('unsupported distance type (lp) for nmslib') nms = _import_nmslib() nmsidx = nms.init(space=_dist_translation['nmslib'][dist_type]) nmsidx.addDataPointBatch(X) nmsidx.createIndex() - q = nmsidx.knnQueryBatch(X, k=num_neighbors+1) + q = nmsidx.knnQueryBatch(X, k=num_neighbors+1, num_threads=int(ncpu/2)) nn, d = zip(*q) D = np.concatenate(d).reshape(N, num_neighbors+1) NN = np.concatenate(nn).reshape(N, num_neighbors+1) From a562896516c75514c1522337e12188c83f9dd57c Mon Sep 17 00:00:00 2001 From: Nicolas Aspert Date: Wed, 20 Jun 2018 11:45:33 +0200 Subject: [PATCH 028/118] fix _get_extra_repr --- pygsp/graphs/nngraphs/nngraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 73449c6c..ec91afe5 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -345,7 +345,7 @@ def __init__(self, Xin, NNtype='knn', backend='scipy-ckdtree', center=True, def _get_extra_repr(self): return {'NNtype': self.NNtype, - 'use_flann': self.use_flann, + 'backend': self.backend, 'center': self.center, 'rescale': self.rescale, 'k': self.k, From 8a51649f2435b50381d4b11fc4718a63861be0ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Thu, 14 Feb 2019 01:00:40 +0100 Subject: [PATCH 029/118] NNGraph: clean and doc (PR #21) --- README.rst | 2 +- pygsp/graphs/nngraphs/bunny.py | 4 +- pygsp/graphs/nngraphs/cube.py | 2 +- pygsp/graphs/nngraphs/imgpatches.py | 8 +- pygsp/graphs/nngraphs/nngraph.py | 507 ++++++++++++++-------------- pygsp/graphs/nngraphs/sensor.py | 2 +- pygsp/graphs/nngraphs/sphere.py | 2 +- pygsp/graphs/nngraphs/twomoons.py | 30 +- pygsp/tests/test_graphs.py | 146 ++++---- pygsp/tests/test_plotting.py | 4 +- setup.py | 3 +- 11 files changed, 359 insertions(+), 351 deletions(-) diff --git a/README.rst b/README.rst index a768428a..7f0b8776 100644 --- a/README.rst +++ b/README.rst @@ -37,7 +37,7 @@ The documentation is available on `Read the Docs `_ and development takes place on `GitHub `_. -(A (mostly unmaintained) `Matlab version `_ exists.) +A (mostly unmaintained) `Matlab version `_ exists. The PyGSP facilitates a wide variety of operations on graphs, like computing their Fourier basis, filtering or interpolating signals, plotting graphs, diff --git a/pygsp/graphs/nngraphs/bunny.py b/pygsp/graphs/nngraphs/bunny.py index d9dac407..21729823 100644 --- a/pygsp/graphs/nngraphs/bunny.py +++ b/pygsp/graphs/nngraphs/bunny.py @@ -34,7 +34,7 @@ def __init__(self, **kwargs): 'distance': 8, } - super(Bunny, self).__init__(Xin=data['bunny'], - epsilon=0.02, NNtype='radius', + super(Bunny, self).__init__(data['bunny'], center=False, rescale=False, + kind='radius', radius=0.02, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/nngraphs/cube.py b/pygsp/graphs/nngraphs/cube.py index 820e401c..85f51347 100644 --- a/pygsp/graphs/nngraphs/cube.py +++ b/pygsp/graphs/nngraphs/cube.py @@ -89,7 +89,7 @@ def __init__(self, 'distance': 9, } - super(Cube, self).__init__(Xin=pts, k=10, + super(Cube, self).__init__(pts, k=10, center=False, rescale=False, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/nngraphs/imgpatches.py b/pygsp/graphs/nngraphs/imgpatches.py index ea11be06..95ce9149 100644 --- a/pygsp/graphs/nngraphs/imgpatches.py +++ b/pygsp/graphs/nngraphs/imgpatches.py @@ -10,7 +10,8 @@ class ImgPatches(NNGraph): Extract a feature vector in the form of a patch for every pixel of an image, then construct a nearest-neighbor graph between these feature - vectors. The feature matrix, i.e. the patches, can be found in :attr:`Xin`. + vectors. The feature matrix, i.e., the patches, can be found in + :attr:`features`. Parameters ---------- @@ -35,9 +36,10 @@ class ImgPatches(NNGraph): >>> from skimage import data, img_as_float >>> img = img_as_float(data.camera()[::64, ::64]) >>> G = graphs.ImgPatches(img, patch_shape=(3, 3)) - >>> print('{} nodes ({} x {} pixels)'.format(G.Xin.shape[0], *img.shape)) + >>> N, d = G.features.shape + >>> print('{} nodes ({} x {} pixels)'.format(N, *img.shape)) 64 nodes (8 x 8 pixels) - >>> print('{} features per node'.format(G.Xin.shape[1])) + >>> print('{} features per node'.format(d)) 9 features per node >>> G.set_coordinates(kind='spring', seed=42) >>> fig, axes = plt.subplots(1, 2) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index c099a546..85b7ab43 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -1,44 +1,51 @@ # -*- coding: utf-8 -*- +from __future__ import division + +import multiprocessing import traceback import numpy as np -import numpy.matlib -from scipy import sparse -import scipy.spatial as sps -import scipy.spatial.distance as spsd -import multiprocessing +from scipy import sparse, spatial from pygsp import utils from pygsp.graphs import Graph # prevent circular import in Python < 3.5 -_logger = utils.build_logger(__name__) # conversion between the FLANN conventions and the various backend functions -_dist_translation = { - 'scipy-kdtree': { - 'euclidean': 2, - 'manhattan': 1, - 'max_dist': np.inf - }, - 'scipy-ckdtree': { - 'euclidean': 2, - 'manhattan': 1, - 'max_dist': np.inf - }, - 'scipy-pdist' : { - 'euclidean': 'euclidean', - 'manhattan': 'cityblock', - 'max_dist': 'chebyshev', - 'minkowski': 'minkowski' - }, - 'nmslib' : { - 'euclidean': 'l2', - 'manhattan': 'l1', - 'max_dist': 'linf', - 'minkowski': 'lp' - } - } +_metrics = { + 'scipy-pdist': { + 'euclidean': 'euclidean', + 'manhattan': 'cityblock', + 'max_dist': 'chebyshev', + 'minkowski': 'minkowski', + }, + 'scipy-kdtree': { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf, + 'minkowski': 0, + }, + 'scipy-ckdtree': { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf, + 'minkowski': 0, + }, + 'flann': { + 'euclidean': 'euclidean', + 'manhattan': 'manhattan', +# 'max_dist': 'max_dist', # produces incorrect results + 'minkowski': 'minkowski', + }, + 'nmslib': { + 'euclidean': 'l2', + 'manhattan': 'l1', + 'max_dist': 'linf', +# 'minkowski': 'lp', # unsupported + } +} + def _import_cfl(): try: @@ -50,6 +57,7 @@ def _import_cfl(): 'Original exception: {}'.format(e)) return cfl + def _import_nmslib(): try: import nmslib as nms @@ -58,58 +66,55 @@ def _import_nmslib(): 'neighbors method or try to install it with ' 'pip (or conda) install nmslib.') return nms - -def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): - kdt = sps.KDTree(X) - D, NN = kdt.query(X, k=(num_neighbors + 1), - p=_dist_translation['scipy-kdtree'][dist_type]) + + +def _knn_sp_kdtree(features, num_neighbors, metric, order): + p = order if metric == 'minkowski' else _metrics['scipy-kdtree'][metric] + kdt = spatial.KDTree(features) + D, NN = kdt.query(features, k=(num_neighbors + 1), p=p) return NN, D -def _knn_sp_ckdtree(X, num_neighbors, dist_type, order=0): - kdt = sps.cKDTree(X) - D, NN = kdt.query(X, k=(num_neighbors + 1), - p=_dist_translation['scipy-ckdtree'][dist_type], - n_jobs=-1) + +def _knn_sp_ckdtree(features, num_neighbors, metric, order): + p = order if metric == 'minkowski' else _metrics['scipy-ckdtree'][metric] + kdt = spatial.cKDTree(features) + D, NN = kdt.query(features, k=(num_neighbors + 1), p=p, n_jobs=-1) return NN, D -def _knn_flann(X, num_neighbors, dist_type, order): - # the combination FLANN + max_dist produces incorrect results - # do not allow it - if dist_type == 'max_dist': - raise ValueError('FLANN and max_dist is not supported') - +def _knn_flann(features, num_neighbors, metric, order): cfl = _import_cfl() - cfl.set_distance_type(dist_type, order=order) + cfl.set_distance_type(metric, order=order) c = cfl.FLANNIndex(algorithm='kdtree') - c.build_index(X) + c.build_index(features) # Default FLANN parameters (I tried changing the algorithm and # testing performance on huge matrices, but the default one # seems to work best). - NN, D = c.nn_index(X, num_neighbors + 1) + NN, D = c.nn_index(features, num_neighbors + 1) c.free_index() - if dist_type == 'euclidean': # flann returns squared distances + if metric == 'euclidean': # flann returns squared distances return NN, np.sqrt(D) return NN, D -def _radius_sp_kdtree(X, epsilon, dist_type, order=0): - kdt = sps.KDTree(X) - D, NN = kdt.query(X, k=None, distance_upper_bound=epsilon, - p=_dist_translation['scipy-kdtree'][dist_type]) + +def _radius_sp_kdtree(features, radius, metric, order): + p = order if metric == 'minkowski' else _metrics['scipy-kdtree'][metric] + kdt = spatial.KDTree(features) + D, NN = kdt.query(features, k=None, distance_upper_bound=radius, p=p) return NN, D -def _radius_sp_ckdtree(X, epsilon, dist_type, order=0): - N, dim = np.shape(X) - kdt = sps.cKDTree(X) - nn = kdt.query_ball_point(X, r=epsilon, - p=_dist_translation['scipy-ckdtree'][dist_type], - n_jobs=-1) + +def _radius_sp_ckdtree(features, radius, metric, order): + p = order if metric == 'minkowski' else _metrics['scipy-ckdtree'][metric] + n_vertices, _ = features.shape + kdt = spatial.cKDTree(features) + nn = kdt.query_ball_point(features, r=radius, p=p, n_jobs=-1) D = [] NN = [] - for k in range(N): - x = np.matlib.repmat(X[k, :], len(nn[k]), 1) - d = np.linalg.norm(x - X[nn[k], :], - ord=_dist_translation['scipy-ckdtree'][dist_type], + for k in range(n_vertices): + x = np.tile(features[k, :], (len(nn[k]), 1)) + d = np.linalg.norm(x - features[nn[k], :], + ord=_metrics['scipy-ckdtree'][metric], axis=1) nidx = d.argsort() NN.append(np.take(nn[k], nidx)) @@ -117,250 +122,260 @@ def _radius_sp_ckdtree(X, epsilon, dist_type, order=0): return NN, D -def _knn_sp_pdist(X, num_neighbors, dist_type, order): - if dist_type == 'minkowski': - p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type], - p=order) - else: - p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type]) - pd = spsd.squareform(p) - pds = np.sort(pd)[:, 0:num_neighbors+1] - pdi = pd.argsort()[:, 0:num_neighbors+1] +def _knn_sp_pdist(features, num_neighbors, metric, order): + p = spatial.distance.pdist(features, + metric=_metrics['scipy-pdist'][metric], + p=order) + pd = spatial.distance.squareform(p) + pds = np.sort(pd)[:, :num_neighbors+1] + pdi = pd.argsort()[:, :num_neighbors+1] return pdi, pds - -def _knn_nmslib(X, num_neighbors, dist_type, order): - N, dim = np.shape(X) + + +def _knn_nmslib(features, num_neighbors, metric, _): + n_vertices, _ = features.shape ncpu = multiprocessing.cpu_count() - if dist_type == 'minkowski': - raise ValueError('unsupported distance type (lp) for nmslib') nms = _import_nmslib() - nmsidx = nms.init(space=_dist_translation['nmslib'][dist_type]) - nmsidx.addDataPointBatch(X) + nmsidx = nms.init(space=_metrics['nmslib'][metric]) + nmsidx.addDataPointBatch(features) nmsidx.createIndex() - q = nmsidx.knnQueryBatch(X, k=num_neighbors+1, num_threads=int(ncpu/2)) + q = nmsidx.knnQueryBatch(features, k=num_neighbors+1, + num_threads=int(ncpu/2)) nn, d = zip(*q) - D = np.concatenate(d).reshape(N, num_neighbors+1) - NN = np.concatenate(nn).reshape(N, num_neighbors+1) + D = np.concatenate(d).reshape(n_vertices, num_neighbors+1) + NN = np.concatenate(nn).reshape(n_vertices, num_neighbors+1) return NN, D - -def _radius_sp_pdist(X, epsilon, dist_type, order): - N, dim = np.shape(X) - if dist_type == 'minkowski': - p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type], - p=order) - else: - p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type]) - pd = spsd.squareform(p) - pdf = pd < epsilon + + +def _radius_sp_pdist(features, radius, metric, order): + n_vertices, _ = np.shape(features) + p = spatial.distance.pdist(features, + metric=_metrics['scipy-pdist'][metric], + p=order) + pd = spatial.distance.squareform(p) + pdf = pd < radius D = [] NN = [] - for k in range(N): + for k in range(n_vertices): v = pd[k, pdf[k, :]] d = pd[k, :].argsort() # use the same conventions as in scipy.distance.kdtree NN.append(d[0:len(v)]) D.append(np.sort(v)) - + return NN, D -def _radius_flann(X, epsilon, dist_type, order=0): - N, dim = np.shape(X) - # the combination FLANN + max_dist produces incorrect results - # do not allow it - if dist_type == 'max_dist': - raise ValueError('FLANN and max_dist is not supported') - + +def _radius_flann(features, radius, metric, order): + n_vertices, _ = features.shape cfl = _import_cfl() - cfl.set_distance_type(dist_type, order=order) + cfl.set_distance_type(metric, order=order) c = cfl.FLANNIndex(algorithm='kdtree') - c.build_index(X) + c.build_index(features) D = [] NN = [] - for k in range(N): - nn, d = c.nn_radius(X[k, :], epsilon*epsilon) + for k in range(n_vertices): + nn, d = c.nn_radius(features[k, :], radius**2) D.append(d) NN.append(nn) c.free_index() - if dist_type == 'euclidean': # flann returns squared distances - return NN, list(map(np.sqrt, D)) + if metric == 'euclidean': + # Flann returns squared distances. + D = list(map(np.sqrt, D)) return NN, D -def _radius_nmslib(X, epsilon, dist_type, order=0): - raise ValueError('nmslib does not support (yet?) range queries') -def center_input(X, N): - return X - np.kron(np.ones((N, 1)), np.mean(X, axis=0)) +_nn_functions = { + 'knn': { + 'scipy-pdist': _knn_sp_pdist, + 'scipy-kdtree': _knn_sp_kdtree, + 'scipy-ckdtree': _knn_sp_ckdtree, + 'flann': _knn_flann, + 'nmslib': _knn_nmslib, + }, + 'radius': { + 'scipy-pdist': _radius_sp_pdist, + 'scipy-kdtree': _radius_sp_kdtree, + 'scipy-ckdtree': _radius_sp_ckdtree, + 'flann': _radius_flann, + }, +} + + +def center_features(features): + n_vertices, _ = features.shape + return features - np.kron(np.ones((n_vertices, 1)), np.mean(features, 0)) + + +def rescale_features(features): + n_vertices, dimensionality = features.shape + bounding_radius = 0.5 * np.linalg.norm(np.amax(features, axis=0) - + np.amin(features, axis=0), 2) + scale = np.power(n_vertices, 1 / min(dimensionality, 3)) / 10 + return features * scale / bounding_radius -def rescale_input(X, N, d): - bounding_radius = 0.5 * np.linalg.norm(np.amax(X, axis=0) - - np.amin(X, axis=0), 2) - scale = np.power(N, 1. / float(min(d, 3))) / 10. - return X * scale / bounding_radius class NNGraph(Graph): - r"""Nearest-neighbor graph from given point cloud. + r"""Nearest-neighbor graph. + + The nearest-neighbor graph is built from a set of features, where the edge + weight between vertices :math:`v_i` and :math:`v_j` is given by + + .. math:: A(i,j) = \exp \left( -\frac{d^2(v_i, v_j)}{\sigma^2} \right), + + where :math:`d(v_i, v_j)` is a distance measure between some representation + (the features) of :math:`v_i` and :math:`v_j`. For example, the features + might be the 3D coordinates of points in a point cloud. Then, if + ``metric='euclidean'``, :math:`d(v_i, v_j) = \| x_i - x_j \|_2`, where + :math:`x_i` is the 3D position of vertex :math:`v_i`. + + The similarity matrix :math:`A` is sparsified by either keeping the ``k`` + closest vertices for each vertex (if ``type='knn'``), or by setting to zero + any distance greater than ``radius`` (if ``type='radius'``). Parameters ---------- - Xin : ndarray - Input points, Should be an `N`-by-`d` matrix, where `N` is the number - of nodes in the graph and `d` is the dimension of the feature space. - NNtype : string, optional - Type of nearest neighbor graph to create. The options are 'knn' for - k-Nearest Neighbors or 'radius' for epsilon-Nearest Neighbors (default - is 'knn'). - backend : {'scipy-kdtree', 'scipy-pdist', 'flann'} - Type of the backend for graph construction. - - 'scipy-kdtree' will use scipy.spatial.KDTree - - 'scipy-ckdtree'(default) will use scipy.spatial.cKDTree - - 'scipy-pdist' will use scipy.spatial.distance.pdist (slowest but exact) - - 'flann' use Fast Library for Approximate Nearest Neighbors (FLANN) - - 'nmslib' use nmslib for approximate nearest neighbors (faster in high-dimensional spaces) + features : ndarray + An `N`-by-`d` matrix, where `N` is the number of nodes in the graph and + `d` is the number of features. center : bool, optional - Center the data so that it has zero mean (default is True) + Whether to center the features to have zero mean. rescale : bool, optional - Rescale the data so that it lies in a l2-sphere (default is True) - k : int, optional - Number of neighbors for knn (default is 10) - sigma : float, optional - Width of the similarity kernel. - By default, it is set to the average of the nearest neighbor distance. - epsilon : float, optional - Radius for the epsilon-neighborhood search (default is 0.01) - plotting : dict, optional - Dictionary of plotting parameters. See :obj:`pygsp.plotting`. - (default is {}) - symmetrize_type : string, optional - Type of symmetrization to use for the adjacency matrix. See - :func:`pygsp.utils.symmetrization` for the options. - (default is 'average') - dist_type : string, optional - Type of distance to compute. See - :func:`pyflann.index.set_distance_type` for possible options. - (default is 'euclidean') + Whether to scale the features so that they lie in an l2-ball. + metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional + Metric used to compute pairwise distances. order : float, optional - Only used if dist_type is 'minkowski'; represents the order of the - Minkowski distance. (default is 0) + The order of the Minkowski distance for ``metric='minkowski'``. + kind : {'knn', 'radius'}, optional + Kind of nearest neighbor graph to create. Either ``'knn'`` for + k-nearest neighbors or ``'radius'`` for epsilon-nearest neighbors. + k : int, optional + Number of neighbors considered when building a k-NN graph with + ``type='knn'``. + radius : float, optional + Radius of the ball when building a radius graph with ``type='radius'``. + kernel_width : float, optional + Width of the Gaussian kernel. By default, it is set to the average of + the distances of neighboring vertices. + backend : string, optional + * ``'scipy-pdist'`` uses :func:`scipy.spatial.distance.pdist` to + compute pairwise distances. The method is brute force and computes + all distances. That is the slowest method. + * ``'scipy-kdtree'`` uses :class:`scipy.spatial.KDTree`. The method + builds a k-d tree to prune the number of pairwise distances it has to + compute. + * ``'scipy-ckdtree'`` uses :class:`scipy.spatial.cKDTree`. The same as + ``'scipy-kdtree'`` but with C bindings, which should be faster. + * ``'flann'`` uses the `Fast Library for Approximate Nearest Neighbors + (FLANN) `_. That method is an + approximation. + * ``'nmslib'`` uses the `Non-Metric Space Library (NMSLIB) + `_. That method is an + approximation. It should be the fastest in high-dimensional spaces. Examples -------- >>> import matplotlib.pyplot as plt - >>> X = np.random.RandomState(42).uniform(size=(30, 2)) - >>> G = graphs.NNGraph(X) + >>> features = np.random.RandomState(42).uniform(size=(30, 2)) + >>> G = graphs.NNGraph(features) >>> fig, axes = plt.subplots(1, 2) >>> _ = axes[0].spy(G.W, markersize=5) >>> _ = G.plot(ax=axes[1]) """ - def __init__(self, Xin, NNtype='knn', backend='scipy-ckdtree', center=True, - rescale=True, k=10, sigma=None, epsilon=0.01, - plotting={}, symmetrize_type='average', dist_type='euclidean', - order=0, **kwargs): + def __init__(self, features, *, center=True, rescale=True, + metric='euclidean', order=0, + kind='knn', k=10, radius=0.01, + kernel_width=None, + backend='scipy-ckdtree', + **kwargs): - self.Xin = Xin - self.NNtype = NNtype - self.backend = backend + self.features = features # stored in coords, but scaled and centered self.center = center self.rescale = rescale - self.k = k - self.sigma = sigma - self.epsilon = epsilon - - _dist_translation['scipy-kdtree']['minkowski'] = order - _dist_translation['scipy-ckdtree']['minkowski'] = order - - self._nn_functions = { - 'knn': { - 'scipy-kdtree': _knn_sp_kdtree, - 'scipy-ckdtree': _knn_sp_ckdtree, - 'scipy-pdist': _knn_sp_pdist, - 'flann': _knn_flann, - 'nmslib': _knn_nmslib - }, - 'radius': { - 'scipy-kdtree': _radius_sp_kdtree, - 'scipy-ckdtree': _radius_sp_ckdtree, - 'scipy-pdist': _radius_sp_pdist, - 'flann': _radius_flann, - 'nmslib': _radius_nmslib - }, - } - - - - self.symmetrize_type = symmetrize_type - self.dist_type = dist_type + self.metric = metric self.order = order + self.kind = kind + self.k = k + self.radius = radius + self.kernel_width = kernel_width + self.backend = backend + + N, d = np.shape(features) + + if _nn_functions.get(kind) is None: + raise ValueError('Invalid kind "{}".'.format(kind)) - N, d = np.shape(self.Xin) - Xout = self.Xin + if backend not in _metrics.keys(): + raise ValueError('Invalid backend "{}".'.format(backend)) - if k >= N: + if _metrics['scipy-pdist'].get(metric) is None: + raise ValueError('Invalid metric "{}".'.format(metric)) + + if _nn_functions[kind].get(backend) is None: + raise ValueError('{} does not support kind "{}".'.format( + backend, kind)) + + if _metrics[backend].get(metric) is None: + raise ValueError('{} does not support the {} metric.'.format( + backend, metric)) + + if kind == 'knn' and k >= N: raise ValueError('The number of neighbors (k={}) must be smaller ' - 'than the number of nodes ({}).'.format(k, N)) - - if self.center: - Xout = center_input(Xout, N) - - if self.rescale: - Xout = rescale_input(Xout, N, d) - - if self._nn_functions.get(NNtype) == None: - raise ValueError('Invalid NNtype {}'.format(self.NNtype)) - - if self._nn_functions[NNtype].get(backend) == None: - raise ValueError('Invalid backend {} for type {}'.format(backend, - self.NNtype)) - - if self.NNtype == 'knn': - NN, D = self._nn_functions[NNtype][backend](Xout, k, - dist_type, order) - if self.sigma is None: - self.sigma = np.mean(D[:, 1:]) # Discard distance to self. - - elif self.NNtype == 'radius': - NN, D = self._nn_functions[NNtype][backend](Xout, epsilon, - dist_type, order) - if self.sigma is None: + 'than the number of vertices ({}).'.format(k, N)) + + if center: + features = center_features(features) + + if rescale: + features = rescale_features(features) + + if kind == 'knn': + NN, D = _nn_functions[kind][backend](features, k, metric, order) + if self.kernel_width is None: + # Discard distance to self. + self.kernel_width = np.mean(D[:, 1:]) + + elif kind == 'radius': + NN, D = _nn_functions[kind][backend](features, radius, + metric, order) + if self.kernel_width is None: # Discard distance to self. - self.sigma = np.mean([np.mean(d[1:]) for d in D]) + self.kernel_width = np.mean([np.mean(d[1:]) for d in D]) + countV = list(map(len, NN)) - count = sum(countV) + count = sum(countV) spi = np.zeros((count)) spj = np.zeros((count)) spv = np.zeros((count)) start = 0 for i in range(N): - leng = countV[i] - 1 - spi[start:start + leng] = np.kron(np.ones((leng)), i) - spj[start:start + leng] = NN[i][1:] - spv[start:start + leng] = np.exp(-np.power(D[i][1:], 2) / - float(self.sigma)) - start = start + leng + length = countV[i] - 1 + distance = np.power(D[i][1:], 2) + spi[start:start + length] = np.kron(np.ones((length)), i) + spj[start:start + length] = NN[i][1:] + spv[start:start + length] = np.exp(-distance / self.kernel_width) + start = start + length W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N)) - # Sanity check - if np.shape(W)[0] != np.shape(W)[1]: - raise ValueError('Weight matrix W is not square') - - # Enforce symmetry. Note that checking symmetry with - # np.abs(W - W.T).sum() is as costly as the symmetrization itself. - W = utils.symmetrize(W, method=symmetrize_type) + # Enforce symmetry. May have been broken by k-NN. Checking symmetry + # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. + W = utils.symmetrize(W, method='average') - super(NNGraph, self).__init__(W=W, plotting=plotting, - coords=Xout, **kwargs) + super(NNGraph, self).__init__(W=W, coords=features, **kwargs) def _get_extra_repr(self): - return {'NNtype': self.NNtype, - 'backend': self.backend, - 'center': self.center, - 'rescale': self.rescale, - 'k': self.k, - 'sigma': '{:.2f}'.format(self.sigma), - 'epsilon': '{:.2f}'.format(self.epsilon), - 'symmetrize_type': self.symmetrize_type, - 'dist_type': self.dist_type, - 'order': self.order} + return { + 'center': self.center, + 'rescale': self.rescale, + 'metric': self.metric, + 'order': self.order, + 'kind': self.kind, + 'k': self.k, + 'radius': '{:.2f}'.format(self.radius), + 'kernel_width': '{:.2f}'.format(self.kernel_width), + 'backend': self.backend, + } diff --git a/pygsp/graphs/nngraphs/sensor.py b/pygsp/graphs/nngraphs/sensor.py index ac623a50..5491e0c7 100644 --- a/pygsp/graphs/nngraphs/sensor.py +++ b/pygsp/graphs/nngraphs/sensor.py @@ -75,7 +75,7 @@ def __init__(self, N=64, k=6, distributed=False, seed=None, **kwargs): coords = rs.uniform(0, 1, (N, 2)) - super(Sensor, self).__init__(Xin=coords, k=k, + super(Sensor, self).__init__(coords, k=k, rescale=False, center=False, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/nngraphs/sphere.py b/pygsp/graphs/nngraphs/sphere.py index e375b5b4..2ad9c233 100644 --- a/pygsp/graphs/nngraphs/sphere.py +++ b/pygsp/graphs/nngraphs/sphere.py @@ -64,7 +64,7 @@ def __init__(self, 'vertex_size': 80, } - super(Sphere, self).__init__(Xin=pts, k=10, + super(Sphere, self).__init__(pts, k=10, center=False, rescale=False, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/nngraphs/twomoons.py b/pygsp/graphs/nngraphs/twomoons.py index af87d0d8..6da8ca95 100644 --- a/pygsp/graphs/nngraphs/twomoons.py +++ b/pygsp/graphs/nngraphs/twomoons.py @@ -16,7 +16,7 @@ class TwoMoons(NNGraph): two_moons graph or a synthesized one (default is 'standard'). 'standard' : Create a two_moons graph from a based graph. 'synthesized' : Create a synthesized two_moon - sigmag : float + kernel_width : float Variance of the distance kernel (default = 0.05) dim : int The dimensionality of the points (default = 2). @@ -24,7 +24,7 @@ class TwoMoons(NNGraph): N : int Number of vertices (default = 2000) Only valid for moontype == 'synthesized'. - sigmad : float + variance : float Variance of the data (do not set it too high or you won't see anything) (default = 0.05) Only valid for moontype == 'synthesized'. @@ -44,11 +44,11 @@ class TwoMoons(NNGraph): """ - def _create_arc_moon(self, N, sigmad, distance, number, seed): + def _create_arc_moon(self, N, variance, distance, number, seed): rs = np.random.RandomState(seed) phi = rs.rand(N, 1) * np.pi r = 1 - rb = sigmad * rs.normal(size=(N, 1)) + rb = variance * rs.normal(size=(N, 1)) ab = rs.rand(N, 1) * 2 * np.pi b = rb * np.exp(1j * ab) bx = np.real(b) @@ -63,29 +63,29 @@ def _create_arc_moon(self, N, sigmad, distance, number, seed): return np.concatenate((moonx, moony), axis=1) - def __init__(self, moontype='standard', dim=2, sigmag=0.05, - N=400, sigmad=0.07, distance=0.5, seed=None, **kwargs): + def __init__(self, moontype='standard', dim=2, kernel_width=0.05, + N=400, variance=0.07, distance=0.5, seed=None, **kwargs): self.moontype = moontype self.dim = dim - self.sigmag = sigmag - self.sigmad = sigmad + self.kernel_width = kernel_width + self.variance = variance self.distance = distance self.seed = seed if moontype == 'standard': N1, N2 = 1000, 1000 data = utils.loadmat('pointclouds/two_moons') - Xin = data['features'][:dim].T + coords = data['features'][:dim].T elif moontype == 'synthesized': N1 = N // 2 N2 = N - N1 - coords1 = self._create_arc_moon(N1, sigmad, distance, 1, seed) - coords2 = self._create_arc_moon(N2, sigmad, distance, 2, seed) + coords1 = self._create_arc_moon(N1, variance, distance, 1, seed) + coords2 = self._create_arc_moon(N2, variance, distance, 2, seed) - Xin = np.concatenate((coords1, coords2)) + coords = np.concatenate((coords1, coords2)) else: raise ValueError('Unknown moontype {}'.format(moontype)) @@ -96,15 +96,15 @@ def __init__(self, moontype='standard', dim=2, sigmag=0.05, 'vertex_size': 30, } - super(TwoMoons, self).__init__(Xin=Xin, sigma=sigmag, k=5, + super(TwoMoons, self).__init__(coords, kernel_width=kernel_width, k=5, center=False, rescale=False, plotting=plotting, **kwargs) def _get_extra_repr(self): attrs = {'moontype': self.moontype, 'dim': self.dim, - 'sigmag': '{:.2f}'.format(self.sigmag), - 'sigmad': '{:.2f}'.format(self.sigmad), + 'kernel_width': '{:.2f}'.format(self.kernel_width), + 'variance': '{:.2f}'.format(self.variance), 'distance': '{:.2f}'.format(self.distance), 'seed': self.seed} attrs.update(super(TwoMoons, self)._get_extra_repr()) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index df23a0db..c516a8a7 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -8,7 +8,6 @@ from __future__ import division import unittest -import os import numpy as np import scipy.linalg from scipy import sparse @@ -349,96 +348,89 @@ def test_set_coordinates(self): self.assertRaises(ValueError, G.set_coordinates, 'invalid') def test_nngraph(self, n_vertices=30): - rs = np.random.RandomState(42) - Xin = rs.normal(size=(n_vertices, 3)) - dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib'] - if os.name != 'nt': - backends.append('flann') - order=3 # for minkowski, FLANN only accepts integer orders - - for cur_backend in backends: - for dist_type in dist_types: - #print("backend={} dist={}".format(cur_backend, dist_type)) - if (cur_backend == 'flann' and - dist_type == 'max_dist') or (cur_backend == 'nmslib' and - dist_type == 'minkowski'): - self.assertRaises(ValueError, graphs.NNGraph, Xin, - NNtype='knn', backend=cur_backend, - dist_type=dist_type) - self.assertRaises(ValueError, graphs.NNGraph, Xin, - NNtype='radius', backend=cur_backend, - dist_type=dist_type) + features = np.random.RandomState(42).normal(size=(n_vertices, 3)) + metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib', + 'flann'] + order = 3 # for minkowski, FLANN only accepts integer orders + + for backend in backends: + for metric in metrics: + if ((backend == 'flann' and metric == 'max_dist') or + (backend == 'nmslib' and metric == 'minkowski')): + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='knn', backend=backend, + metric=metric) + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='radius', backend=backend, + metric=metric) else: - if cur_backend == 'nmslib': - self.assertRaises(ValueError, graphs.NNGraph, Xin, - NNtype='radius', backend=cur_backend, - dist_type=dist_type, order=order) + if backend == 'nmslib': + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='radius', backend=backend, + metric=metric, order=order) else: - graphs.NNGraph(Xin, NNtype='radius', - backend=cur_backend, - dist_type=dist_type, order=order) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type, order=order) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type, order=order, + graphs.NNGraph(features, kind='radius', + backend=backend, + metric=metric, order=order) + graphs.NNGraph(features, kind='knn', + backend=backend, + metric=metric, order=order) + graphs.NNGraph(features, kind='knn', + backend=backend, + metric=metric, order=order, center=False) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type, order=order, + graphs.NNGraph(features, kind='knn', + backend=backend, + metric=metric, order=order, rescale=False) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type, order=order, + graphs.NNGraph(features, kind='knn', + backend=backend, + metric=metric, order=order, rescale=False, center=False) - self.assertRaises(ValueError, graphs.NNGraph, Xin, - NNtype='badtype', backend=cur_backend, - dist_type=dist_type) - self.assertRaises(ValueError, graphs.NNGraph, Xin, - NNtype='knn', backend='badtype', - dist_type=dist_type) + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='invalid', backend=backend, + metric=metric) + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='knn', backend='invalid', + metric=metric) + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='knn', k=n_vertices+1) def test_nngraph_consistency(self): - Xin = np.arange(90).reshape(30, 3) - dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'nmslib'] - if os.name != 'nt': - backends.append('flann') - num_neighbors=4 - epsilon=0.1 - - # use pdist as ground truth - G = graphs.NNGraph(Xin, NNtype='knn', + features = np.arange(90).reshape(30, 3) + metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib'] + num_neighbors = 4 + radius = 0.1 + + G = graphs.NNGraph(features, kind='knn', backend='scipy-pdist', k=num_neighbors) - for cur_backend in backends: - for dist_type in dist_types: - if cur_backend == 'flann' and dist_type == 'max_dist': + for backend in backends: + for metric in metrics: + if backend == 'flann' and metric == 'max_dist': continue - if cur_backend == 'nmslib' and dist_type == 'minkowski': + if backend == 'nmslib' and metric == 'minkowski': continue - #print("backend={} dist={}".format(cur_backend, dist_type)) - Gt = graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, k=num_neighbors) + Gt = graphs.NNGraph(features, kind='knn', + backend=backend, k=num_neighbors) d = sparse.linalg.norm(G.W - Gt.W) - self.assertTrue(d < 0.01, 'Graphs (knn {}/{}) are not identical error={}'.format(cur_backend, dist_type, d)) - - G = graphs.NNGraph(Xin, NNtype='radius', - backend='scipy-pdist', epsilon=epsilon) - for cur_backend in backends: - for dist_type in dist_types: - if cur_backend == 'flann' and dist_type == 'max_dist': + self.assertTrue(d < 0.01, 'Graphs (knn {}/{}) are not identical error={}'.format(backend, metric, d)) + + G = graphs.NNGraph(features, kind='radius', + backend='scipy-pdist', radius=radius) + for backend in backends: + for metric in metrics: + if backend == 'flann' and metric == 'max_dist': continue - if cur_backend == 'nmslib': #unsupported + if backend == 'nmslib': continue - #print("backend={} dist={}".format(cur_backend, dist_type)) - Gt = graphs.NNGraph(Xin, NNtype='radius', - backend=cur_backend, epsilon=epsilon) + Gt = graphs.NNGraph(features, kind='radius', + backend=backend, radius=radius) d = sparse.linalg.norm(G.W - Gt.W, ord=1) - self.assertTrue(d < 0.01, - 'Graphs (radius {}/{}) are not identical error={}'.format(cur_backend, dist_type, d)) - + self.assertTrue(d < 0.01, + 'Graphs (radius {}/{}) are not identical error={}'.format(backend, metric, d)) + def test_bunny(self): graphs.Bunny() diff --git a/pygsp/tests/test_plotting.py b/pygsp/tests/test_plotting.py index 0aa29d44..a28ae0c3 100644 --- a/pygsp/tests/test_plotting.py +++ b/pygsp/tests/test_plotting.py @@ -50,8 +50,8 @@ def test_plot_graphs(self): # Classes who require parameters. if classname == 'NNGraph': - Xin = np.arange(90).reshape(30, 3) - Gs.append(Graph(Xin)) + features = np.random.RandomState(42).normal(size=(30, 3)) + Gs.append(Graph(features)) elif classname in ['ImgPatches', 'Grid2dImgPatches']: Gs.append(Graph(img=self._img, patch_shape=(3, 3))) else: diff --git a/setup.py b/setup.py index b49ade95..6bdd0b57 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,6 @@ 'scikit-image', # Approximate nearest neighbors for kNN graphs. 'cyflann', - 'pybind11', 'nmslib', # Convex optimization on graph. 'pyunlocbox', @@ -69,7 +68,7 @@ # Dependencies to build and upload packages. 'pkg': [ 'wheel', - 'twine' + 'twine', ], }, license="BSD", From 9b5d8c0040d4285b71a804eded4498d8edd5c4b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Thu, 14 Feb 2019 01:11:24 +0100 Subject: [PATCH 030/118] python 2.7 doesn't support keyword-only args --- pygsp/graphs/nngraphs/nngraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 85b7ab43..eaa2ff1b 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -284,7 +284,7 @@ class NNGraph(Graph): """ - def __init__(self, features, *, center=True, rescale=True, + def __init__(self, features, center=True, rescale=True, metric='euclidean', order=0, kind='knn', k=10, radius=0.01, kernel_width=None, From 720646e798e399edb75a160796d17a8f20f86225 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Fri, 15 Feb 2019 10:28:04 +0100 Subject: [PATCH 031/118] simplify test_nngraph (PR #21) --- pygsp/tests/test_graphs.py | 57 ++++++++++++++------------------------ 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index c516a8a7..c018ad02 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -348,6 +348,7 @@ def test_set_coordinates(self): self.assertRaises(ValueError, G.set_coordinates, 'invalid') def test_nngraph(self, n_vertices=30): + """Test all the combinations of metric, kind, backend.""" features = np.random.RandomState(42).normal(size=(n_vertices, 3)) metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib', @@ -356,46 +357,30 @@ def test_nngraph(self, n_vertices=30): for backend in backends: for metric in metrics: - if ((backend == 'flann' and metric == 'max_dist') or - (backend == 'nmslib' and metric == 'minkowski')): - self.assertRaises(ValueError, graphs.NNGraph, features, - kind='knn', backend=backend, - metric=metric) - self.assertRaises(ValueError, graphs.NNGraph, features, - kind='radius', backend=backend, - metric=metric) - else: - if backend == 'nmslib': - self.assertRaises(ValueError, graphs.NNGraph, features, - kind='radius', backend=backend, - metric=metric, order=order) + for kind in ['knn', 'radius']: + params = dict(features=features, metric=metric, + order=order, kind=kind, backend=backend) + # Unsupported combinations. + if backend == 'flann' and metric == 'max_dist': + self.assertRaises(ValueError, graphs.NNGraph, **params) + elif backend == 'nmslib' and metric == 'minkowski': + self.assertRaises(ValueError, graphs.NNGraph, **params) + elif backend == 'nmslib' and kind == 'radius': + self.assertRaises(ValueError, graphs.NNGraph, **params) else: - graphs.NNGraph(features, kind='radius', - backend=backend, - metric=metric, order=order) - graphs.NNGraph(features, kind='knn', - backend=backend, - metric=metric, order=order) - graphs.NNGraph(features, kind='knn', - backend=backend, - metric=metric, order=order, - center=False) - graphs.NNGraph(features, kind='knn', - backend=backend, - metric=metric, order=order, - rescale=False) - graphs.NNGraph(features, kind='knn', - backend=backend, - metric=metric, order=order, - rescale=False, center=False) + graphs.NNGraph(**params, center=False) + graphs.NNGraph(**params, rescale=False) + graphs.NNGraph(**params, center=False, rescale=False) + + # Invalid parameters. + self.assertRaises(ValueError, graphs.NNGraph, features, + metric='invalid') self.assertRaises(ValueError, graphs.NNGraph, features, - kind='invalid', backend=backend, - metric=metric) + kind='invalid') self.assertRaises(ValueError, graphs.NNGraph, features, - kind='knn', backend='invalid', - metric=metric) + backend='invalid') self.assertRaises(ValueError, graphs.NNGraph, features, - kind='knn', k=n_vertices+1) + kind='knn', k=n_vertices+1) def test_nngraph_consistency(self): features = np.arange(90).reshape(30, 3) From 172d83f4d39724b7666ffffe45e45f1e81fba34b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Fri, 15 Feb 2019 11:13:22 +0100 Subject: [PATCH 032/118] python 2.7 doesn't support dict unpacking --- pygsp/tests/test_graphs.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index c018ad02..ff8c5797 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -358,19 +358,26 @@ def test_nngraph(self, n_vertices=30): for backend in backends: for metric in metrics: for kind in ['knn', 'radius']: - params = dict(features=features, metric=metric, - order=order, kind=kind, backend=backend) # Unsupported combinations. if backend == 'flann' and metric == 'max_dist': - self.assertRaises(ValueError, graphs.NNGraph, **params) + self.assertRaises(ValueError, graphs.NNGraph, features, + metric=metric, backend=backend) elif backend == 'nmslib' and metric == 'minkowski': - self.assertRaises(ValueError, graphs.NNGraph, **params) + self.assertRaises(ValueError, graphs.NNGraph, features, + metric=metric, backend=backend) elif backend == 'nmslib' and kind == 'radius': - self.assertRaises(ValueError, graphs.NNGraph, **params) + self.assertRaises(ValueError, graphs.NNGraph, features, + kind=kind, backend=backend) else: - graphs.NNGraph(**params, center=False) - graphs.NNGraph(**params, rescale=False) - graphs.NNGraph(**params, center=False, rescale=False) + graphs.NNGraph(features, metric=metric, order=order, + kind=kind, backend=backend, + center=False) + graphs.NNGraph(features, metric=metric, order=order, + kind=kind, backend=backend, + rescale=False) + graphs.NNGraph(features, metric=metric, order=order, + kind=kind, backend=backend, + center=False, rescale=False) # Invalid parameters. self.assertRaises(ValueError, graphs.NNGraph, features, From be16da94b3dfe4c4293bf94470fec283f742a2b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Fri, 15 Feb 2019 11:30:14 +0100 Subject: [PATCH 033/118] correct number of edges (PR #21) --- pygsp/graphs/nngraphs/nngraph.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index eaa2ff1b..18d9d679 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -344,22 +344,23 @@ def __init__(self, features, center=True, rescale=True, # Discard distance to self. self.kernel_width = np.mean([np.mean(d[1:]) for d in D]) - countV = list(map(len, NN)) - count = sum(countV) - spi = np.zeros((count)) - spj = np.zeros((count)) - spv = np.zeros((count)) + n_edges = [len(x) - 1 for x in NN] # remove distance to self + value = np.empty(sum(n_edges), dtype=np.float) + row = np.empty_like(value, dtype=np.int) + col = np.empty_like(value, dtype=np.int) start = 0 - for i in range(N): - length = countV[i] - 1 - distance = np.power(D[i][1:], 2) - spi[start:start + length] = np.kron(np.ones((length)), i) - spj[start:start + length] = NN[i][1:] - spv[start:start + length] = np.exp(-distance / self.kernel_width) - start = start + length - - W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N)) + for vertex in range(N): + if kind == 'knn': + assert n_edges[vertex] == k + end = start + n_edges[vertex] + distance = np.power(D[vertex][1:], 2) + value[start:end] = np.exp(-distance / self.kernel_width) + row[start:end] = np.full(n_edges[vertex], vertex) + col[start:end] = NN[vertex][1:] + start = end + + W = sparse.csc_matrix((value, (row, col)), shape=(N, N)) # Enforce symmetry. May have been broken by k-NN. Checking symmetry # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. From a8798180d7ebd6f16ab16de681d9ef08e44b85c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Fri, 15 Feb 2019 11:32:50 +0100 Subject: [PATCH 034/118] order=3 by default (order=0 is not supported by all backends) --- pygsp/graphs/nngraphs/nngraph.py | 2 +- pygsp/tests/test_graphs.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 18d9d679..592d84e5 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -285,7 +285,7 @@ class NNGraph(Graph): """ def __init__(self, features, center=True, rescale=True, - metric='euclidean', order=0, + metric='euclidean', order=3, kind='knn', k=10, radius=0.01, kernel_width=None, backend='scipy-ckdtree', diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index ff8c5797..072bc8b4 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -353,7 +353,6 @@ def test_nngraph(self, n_vertices=30): metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib', 'flann'] - order = 3 # for minkowski, FLANN only accepts integer orders for backend in backends: for metric in metrics: @@ -369,13 +368,13 @@ def test_nngraph(self, n_vertices=30): self.assertRaises(ValueError, graphs.NNGraph, features, kind=kind, backend=backend) else: - graphs.NNGraph(features, metric=metric, order=order, - kind=kind, backend=backend, + graphs.NNGraph(features, metric=metric, kind=kind, + backend=backend, center=False) - graphs.NNGraph(features, metric=metric, order=order, + graphs.NNGraph(features, metric=metric, kind=kind, backend=backend, rescale=False) - graphs.NNGraph(features, metric=metric, order=order, + graphs.NNGraph(features, metric=metric, kind=kind, backend=backend, center=False, rescale=False) From 719d397034a5b92824e4a831a280b193b2708ad6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Fri, 15 Feb 2019 12:02:02 +0100 Subject: [PATCH 035/118] avoid deprecation warning --- pygsp/graphs/nngraphs/nngraph.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 592d84e5..e0bfc4d3 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -123,9 +123,13 @@ def _radius_sp_ckdtree(features, radius, metric, order): def _knn_sp_pdist(features, num_neighbors, metric, order): - p = spatial.distance.pdist(features, - metric=_metrics['scipy-pdist'][metric], - p=order) + if metric == 'minkowski': + p = spatial.distance.pdist(features, + metric=_metrics['scipy-pdist'][metric], + p=order) + else: + p = spatial.distance.pdist(features, + metric=_metrics['scipy-pdist'][metric]) pd = spatial.distance.squareform(p) pds = np.sort(pd)[:, :num_neighbors+1] pdi = pd.argsort()[:, :num_neighbors+1] @@ -148,10 +152,14 @@ def _knn_nmslib(features, num_neighbors, metric, _): def _radius_sp_pdist(features, radius, metric, order): - n_vertices, _ = np.shape(features) - p = spatial.distance.pdist(features, - metric=_metrics['scipy-pdist'][metric], - p=order) + n_vertices, _ = features.shape + if metric == 'minkowski': + p = spatial.distance.pdist(features, + metric=_metrics['scipy-pdist'][metric], + p=order) + else: + p = spatial.distance.pdist(features, + metric=_metrics['scipy-pdist'][metric]) pd = spatial.distance.squareform(p) pdf = pd < radius D = [] @@ -162,7 +170,6 @@ def _radius_sp_pdist(features, radius, metric, order): # use the same conventions as in scipy.distance.kdtree NN.append(d[0:len(v)]) D.append(np.sort(v)) - return NN, D From eb2ab0b94728ccfcf4e8f76307f21bdeef1fb82c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Fri, 15 Feb 2019 12:46:10 +0100 Subject: [PATCH 036/118] deal with empty neighborhood --- pygsp/graphs/nngraphs/nngraph.py | 19 +++++++++++++++++-- pygsp/tests/test_graphs.py | 2 ++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index e0bfc4d3..5550ff7b 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -332,6 +332,9 @@ def __init__(self, features, center=True, rescale=True, raise ValueError('The number of neighbors (k={}) must be smaller ' 'than the number of vertices ({}).'.format(k, N)) + if kind == 'radius' and radius <= 0: + raise ValueError('The radius must be greater than 0.') + if center: features = center_features(features) @@ -348,14 +351,26 @@ def __init__(self, features, center=True, rescale=True, NN, D = _nn_functions[kind][backend](features, radius, metric, order) if self.kernel_width is None: - # Discard distance to self. - self.kernel_width = np.mean([np.mean(d[1:]) for d in D]) + # Discard distance to self and deal with disconnected vertices. + means = [] + for distance in D: + if len(distance) > 1: + means.append(np.mean(distance[1:])) + self.kernel_width = np.mean(means) if len(means) > 0 else 0 n_edges = [len(x) - 1 for x in NN] # remove distance to self value = np.empty(sum(n_edges), dtype=np.float) row = np.empty_like(value, dtype=np.int) col = np.empty_like(value, dtype=np.int) + if kind == 'radius': + n_disconnected = np.sum(np.asarray(n_edges) == 0) + if n_disconnected > 0: + logger = utils.build_logger(__name__) + logger.warning('{} vertices (out of {}) are disconnected. ' + 'Consider increasing the radius or setting ' + 'kind=knn.'.format(n_disconnected, N)) + start = 0 for vertex in range(N): if kind == 'knn': diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 072bc8b4..0b55aa44 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -387,6 +387,8 @@ def test_nngraph(self, n_vertices=30): backend='invalid') self.assertRaises(ValueError, graphs.NNGraph, features, kind='knn', k=n_vertices+1) + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='radius', radius=0) def test_nngraph_consistency(self): features = np.arange(90).reshape(30, 3) From b2bfb51c2840489b25aa323bd358a82d80dd3e1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Fri, 15 Feb 2019 15:28:59 +0100 Subject: [PATCH 037/118] nngraph: don't store features --- pygsp/graphs/nngraphs/imgpatches.py | 10 +++++----- pygsp/graphs/nngraphs/nngraph.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pygsp/graphs/nngraphs/imgpatches.py b/pygsp/graphs/nngraphs/imgpatches.py index 95ce9149..0e09f9de 100644 --- a/pygsp/graphs/nngraphs/imgpatches.py +++ b/pygsp/graphs/nngraphs/imgpatches.py @@ -36,7 +36,7 @@ class ImgPatches(NNGraph): >>> from skimage import data, img_as_float >>> img = img_as_float(data.camera()[::64, ::64]) >>> G = graphs.ImgPatches(img, patch_shape=(3, 3)) - >>> N, d = G.features.shape + >>> N, d = G.patches.shape >>> print('{} nodes ({} x {} pixels)'.format(N, *img.shape)) 64 nodes (8 x 8 pixels) >>> print('{} features per node'.format(d)) @@ -85,16 +85,16 @@ def __init__(self, img, patch_shape=(3, 3), **kwargs): # Alternative: sklearn.feature_extraction.image.extract_patches_2d. # sklearn has much less dependencies than skimage. try: - import skimage + from skimage.util import view_as_windows except Exception as e: raise ImportError('Cannot import skimage, which is needed to ' 'extract patches. Try to install it with ' 'pip (or conda) install scikit-image. ' 'Original exception: {}'.format(e)) - patches = skimage.util.view_as_windows(img, window_shape=window_shape) - patches = patches.reshape((h * w, r * c * d)) + self.patches = view_as_windows(img, window_shape) + self.patches = self.patches.reshape((h * w, r * c * d)) - super(ImgPatches, self).__init__(patches, **kwargs) + super(ImgPatches, self).__init__(self.patches, **kwargs) def _get_extra_repr(self): attrs = dict(patch_shape=self.patch_shape) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 5550ff7b..4b2f3c7a 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -298,7 +298,7 @@ def __init__(self, features, center=True, rescale=True, backend='scipy-ckdtree', **kwargs): - self.features = features # stored in coords, but scaled and centered + # features is stored in coords, potentially standardized self.center = center self.rescale = rescale self.metric = metric From e1879eec5dbefa707bd7fb0bb388d613e4abf986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Fri, 15 Feb 2019 15:34:38 +0100 Subject: [PATCH 038/118] nngraph: further cleanup --- pygsp/graphs/nngraphs/nngraph.py | 91 +++++++++++++++----------------- 1 file changed, 43 insertions(+), 48 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 4b2f3c7a..8a89d220 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -210,8 +210,7 @@ def _radius_flann(features, radius, metric, order): def center_features(features): - n_vertices, _ = features.shape - return features - np.kron(np.ones((n_vertices, 1)), np.mean(features, 0)) + return features - np.mean(features, axis=0) def rescale_features(features): @@ -298,67 +297,52 @@ def __init__(self, features, center=True, rescale=True, backend='scipy-ckdtree', **kwargs): - # features is stored in coords, potentially standardized - self.center = center - self.rescale = rescale - self.metric = metric - self.order = order - self.kind = kind - self.k = k - self.radius = radius - self.kernel_width = kernel_width - self.backend = backend - - N, d = np.shape(features) + n_vertices, _ = features.shape + if _metrics['scipy-pdist'].get(metric) is None: + raise ValueError('Invalid metric "{}".'.format(metric)) if _nn_functions.get(kind) is None: raise ValueError('Invalid kind "{}".'.format(kind)) - if backend not in _metrics.keys(): raise ValueError('Invalid backend "{}".'.format(backend)) - - if _metrics['scipy-pdist'].get(metric) is None: - raise ValueError('Invalid metric "{}".'.format(metric)) - - if _nn_functions[kind].get(backend) is None: - raise ValueError('{} does not support kind "{}".'.format( - backend, kind)) - if _metrics[backend].get(metric) is None: - raise ValueError('{} does not support the {} metric.'.format( + raise ValueError('{} does not support metric="{}".'.format( backend, metric)) - - if kind == 'knn' and k >= N: + if _nn_functions[kind].get(backend) is None: + raise ValueError('{} does not support kind="{}".'.format( + backend, kind)) + if kind == 'knn' and k >= n_vertices: raise ValueError('The number of neighbors (k={}) must be smaller ' - 'than the number of vertices ({}).'.format(k, N)) - - if kind == 'radius' and radius <= 0: + 'than the number of vertices ({}).'.format( + k, n_vertices)) + if kind == 'radius' and radius is not None and radius <= 0: raise ValueError('The radius must be greater than 0.') if center: features = center_features(features) - if rescale: features = rescale_features(features) + nn_function = _nn_functions[kind][backend] if kind == 'knn': - NN, D = _nn_functions[kind][backend](features, k, metric, order) - if self.kernel_width is None: - # Discard distance to self. - self.kernel_width = np.mean(D[:, 1:]) - + neighbors, distances = nn_function(features, k, metric, order) elif kind == 'radius': - NN, D = _nn_functions[kind][backend](features, radius, - metric, order) - if self.kernel_width is None: - # Discard distance to self and deal with disconnected vertices. + neighbors, distances = nn_function(features, radius, metric, order) + + if kernel_width is None: + # Discard distance to self and deal with disconnected vertices. + if kind == 'knn': + kernel_width = np.mean(distances[:, 1:]) + elif kind == 'radius': means = [] - for distance in D: + for distance in distances: if len(distance) > 1: means.append(np.mean(distance[1:])) - self.kernel_width = np.mean(means) if len(means) > 0 else 0 + kernel_width = np.mean(means) if len(means) > 0 else 0 + # Alternative: kernel_width = radius / 2 + # Alternative: kernel_width = radius / np.log(2) - n_edges = [len(x) - 1 for x in NN] # remove distance to self + n_edges = [len(n) - 1 for n in neighbors] # remove distance to self value = np.empty(sum(n_edges), dtype=np.float) row = np.empty_like(value, dtype=np.int) col = np.empty_like(value, dtype=np.int) @@ -369,25 +353,36 @@ def __init__(self, features, center=True, rescale=True, logger = utils.build_logger(__name__) logger.warning('{} vertices (out of {}) are disconnected. ' 'Consider increasing the radius or setting ' - 'kind=knn.'.format(n_disconnected, N)) + 'kind=knn.'.format(n_disconnected, n_vertices)) start = 0 - for vertex in range(N): + for vertex in range(n_vertices): if kind == 'knn': assert n_edges[vertex] == k end = start + n_edges[vertex] - distance = np.power(D[vertex][1:], 2) - value[start:end] = np.exp(-distance / self.kernel_width) + distance = np.power(distances[vertex][1:], 2) + value[start:end] = np.exp(-distance / kernel_width) row[start:end] = np.full(n_edges[vertex], vertex) - col[start:end] = NN[vertex][1:] + col[start:end] = neighbors[vertex][1:] start = end - W = sparse.csc_matrix((value, (row, col)), shape=(N, N)) + W = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices)) # Enforce symmetry. May have been broken by k-NN. Checking symmetry # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. W = utils.symmetrize(W, method='average') + # features is stored in coords, potentially standardized + self.center = center + self.rescale = rescale + self.metric = metric + self.order = order + self.kind = kind + self.radius = radius + self.kernel_width = kernel_width + self.k = k + self.backend = backend + super(NNGraph, self).__init__(W=W, coords=features, **kwargs) def _get_extra_repr(self): From bf7427fa96dd522f26274ebd9569c6ec631f7a6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Fri, 15 Feb 2019 16:05:12 +0100 Subject: [PATCH 039/118] nngraph: standardize instead of center and rescale Rescaling was useful before the adaptive setting (to the mean of the distances) of the kernel width. It is now useless. Centering can be combined with normalizing to unit variance to standardize the data. --- pygsp/graphs/nngraphs/bunny.py | 4 +--- pygsp/graphs/nngraphs/cube.py | 4 +--- pygsp/graphs/nngraphs/nngraph.py | 35 +++++++++---------------------- pygsp/graphs/nngraphs/sensor.py | 4 +--- pygsp/graphs/nngraphs/sphere.py | 4 +--- pygsp/graphs/nngraphs/twomoons.py | 1 - pygsp/tests/test_graphs.py | 13 ++++-------- 7 files changed, 18 insertions(+), 47 deletions(-) diff --git a/pygsp/graphs/nngraphs/bunny.py b/pygsp/graphs/nngraphs/bunny.py index 21729823..cc269c10 100644 --- a/pygsp/graphs/nngraphs/bunny.py +++ b/pygsp/graphs/nngraphs/bunny.py @@ -34,7 +34,5 @@ def __init__(self, **kwargs): 'distance': 8, } - super(Bunny, self).__init__(data['bunny'], - center=False, rescale=False, - kind='radius', radius=0.02, + super(Bunny, self).__init__(data['bunny'], kind='radius', radius=0.02, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/nngraphs/cube.py b/pygsp/graphs/nngraphs/cube.py index 85f51347..2328c2cb 100644 --- a/pygsp/graphs/nngraphs/cube.py +++ b/pygsp/graphs/nngraphs/cube.py @@ -89,9 +89,7 @@ def __init__(self, 'distance': 9, } - super(Cube, self).__init__(pts, k=10, - center=False, rescale=False, - plotting=plotting, **kwargs) + super(Cube, self).__init__(pts, k=10, plotting=plotting, **kwargs) def _get_extra_repr(self): attrs = {'radius': '{:.2f}'.format(self.radius), diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 8a89d220..0548d803 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -209,18 +209,6 @@ def _radius_flann(features, radius, metric, order): } -def center_features(features): - return features - np.mean(features, axis=0) - - -def rescale_features(features): - n_vertices, dimensionality = features.shape - bounding_radius = 0.5 * np.linalg.norm(np.amax(features, axis=0) - - np.amin(features, axis=0), 2) - scale = np.power(n_vertices, 1 / min(dimensionality, 3)) / 10 - return features * scale / bounding_radius - - class NNGraph(Graph): r"""Nearest-neighbor graph. @@ -244,10 +232,9 @@ class NNGraph(Graph): features : ndarray An `N`-by-`d` matrix, where `N` is the number of nodes in the graph and `d` is the number of features. - center : bool, optional - Whether to center the features to have zero mean. - rescale : bool, optional - Whether to scale the features so that they lie in an l2-ball. + standardize : bool, optional + Whether to rescale the features so that each feature has a mean of 0 + and standard deviation of 1 (unit variance). metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional Metric used to compute pairwise distances. order : float, optional @@ -290,7 +277,7 @@ class NNGraph(Graph): """ - def __init__(self, features, center=True, rescale=True, + def __init__(self, features, standardize=False, metric='euclidean', order=3, kind='knn', k=10, radius=0.01, kernel_width=None, @@ -318,10 +305,10 @@ def __init__(self, features, center=True, rescale=True, if kind == 'radius' and radius is not None and radius <= 0: raise ValueError('The radius must be greater than 0.') - if center: - features = center_features(features) - if rescale: - features = rescale_features(features) + if standardize: + # Don't alter the original data (users would be surprised). + features = features - np.mean(features, axis=0) + features /= np.std(features, axis=0) nn_function = _nn_functions[kind][backend] if kind == 'knn': @@ -373,8 +360,7 @@ def __init__(self, features, center=True, rescale=True, W = utils.symmetrize(W, method='average') # features is stored in coords, potentially standardized - self.center = center - self.rescale = rescale + self.standardize = standardize self.metric = metric self.order = order self.kind = kind @@ -387,8 +373,7 @@ def __init__(self, features, center=True, rescale=True, def _get_extra_repr(self): return { - 'center': self.center, - 'rescale': self.rescale, + 'standardize': self.standardize, 'metric': self.metric, 'order': self.order, 'kind': self.kind, diff --git a/pygsp/graphs/nngraphs/sensor.py b/pygsp/graphs/nngraphs/sensor.py index 5491e0c7..09764b02 100644 --- a/pygsp/graphs/nngraphs/sensor.py +++ b/pygsp/graphs/nngraphs/sensor.py @@ -75,9 +75,7 @@ def __init__(self, N=64, k=6, distributed=False, seed=None, **kwargs): coords = rs.uniform(0, 1, (N, 2)) - super(Sensor, self).__init__(coords, k=k, - rescale=False, center=False, - plotting=plotting, **kwargs) + super(Sensor, self).__init__(coords, k=k, plotting=plotting, **kwargs) def _get_extra_repr(self): return {'k': self.k, diff --git a/pygsp/graphs/nngraphs/sphere.py b/pygsp/graphs/nngraphs/sphere.py index 2ad9c233..f323f562 100644 --- a/pygsp/graphs/nngraphs/sphere.py +++ b/pygsp/graphs/nngraphs/sphere.py @@ -64,9 +64,7 @@ def __init__(self, 'vertex_size': 80, } - super(Sphere, self).__init__(pts, k=10, - center=False, rescale=False, - plotting=plotting, **kwargs) + super(Sphere, self).__init__(pts, k=10, plotting=plotting, **kwargs) def _get_extra_repr(self): attrs = {'radius': '{:.2f}'.format(self.radius), diff --git a/pygsp/graphs/nngraphs/twomoons.py b/pygsp/graphs/nngraphs/twomoons.py index 6da8ca95..1d7cdde1 100644 --- a/pygsp/graphs/nngraphs/twomoons.py +++ b/pygsp/graphs/nngraphs/twomoons.py @@ -97,7 +97,6 @@ def __init__(self, moontype='standard', dim=2, kernel_width=0.05, } super(TwoMoons, self).__init__(coords, kernel_width=kernel_width, k=5, - center=False, rescale=False, plotting=plotting, **kwargs) def _get_extra_repr(self): diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 0b55aa44..25ef4077 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -368,15 +368,10 @@ def test_nngraph(self, n_vertices=30): self.assertRaises(ValueError, graphs.NNGraph, features, kind=kind, backend=backend) else: - graphs.NNGraph(features, metric=metric, kind=kind, - backend=backend, - center=False) - graphs.NNGraph(features, metric=metric, - kind=kind, backend=backend, - rescale=False) - graphs.NNGraph(features, metric=metric, - kind=kind, backend=backend, - center=False, rescale=False) + for standardize in [True, False]: + graphs.NNGraph(features, standardize=standardize, + metric=metric, kind=kind, + backend=backend) # Invalid parameters. self.assertRaises(ValueError, graphs.NNGraph, features, From ec74ed7e9e2efa8c2ae6140ed81f2bcbf2ce3f65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Sat, 16 Feb 2019 02:11:35 +0100 Subject: [PATCH 040/118] nngraph: simplify default kernel_width --- pygsp/graphs/nngraphs/nngraph.py | 33 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 0548d803..e0f775cd 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -316,23 +316,7 @@ def __init__(self, features, standardize=False, elif kind == 'radius': neighbors, distances = nn_function(features, radius, metric, order) - if kernel_width is None: - # Discard distance to self and deal with disconnected vertices. - if kind == 'knn': - kernel_width = np.mean(distances[:, 1:]) - elif kind == 'radius': - means = [] - for distance in distances: - if len(distance) > 1: - means.append(np.mean(distance[1:])) - kernel_width = np.mean(means) if len(means) > 0 else 0 - # Alternative: kernel_width = radius / 2 - # Alternative: kernel_width = radius / np.log(2) - n_edges = [len(n) - 1 for n in neighbors] # remove distance to self - value = np.empty(sum(n_edges), dtype=np.float) - row = np.empty_like(value, dtype=np.int) - col = np.empty_like(value, dtype=np.int) if kind == 'radius': n_disconnected = np.sum(np.asarray(n_edges) == 0) @@ -342,19 +326,30 @@ def __init__(self, features, standardize=False, 'Consider increasing the radius or setting ' 'kind=knn.'.format(n_disconnected, n_vertices)) + value = np.empty(sum(n_edges), dtype=np.float) + row = np.empty_like(value, dtype=np.int) + col = np.empty_like(value, dtype=np.int) start = 0 for vertex in range(n_vertices): if kind == 'knn': assert n_edges[vertex] == k end = start + n_edges[vertex] - distance = np.power(distances[vertex][1:], 2) - value[start:end] = np.exp(-distance / kernel_width) + value[start:end] = distances[vertex][1:] row[start:end] = np.full(n_edges[vertex], vertex) col[start:end] = neighbors[vertex][1:] start = end - W = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices)) + if kernel_width is None: + kernel_width = np.mean(W.data) if W.nnz > 0 else np.nan + # Alternative: kernel_width = radius / 2 or radius / np.log(2). + # Users can easily do the above. + + def kernel(distance, width): + return np.exp(-distance**2 / width) + + W.data = kernel(W.data, kernel_width) + # Enforce symmetry. May have been broken by k-NN. Checking symmetry # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. W = utils.symmetrize(W, method='average') From c1e1148a8ee713f005a255ed649630aac4b95666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Sat, 16 Feb 2019 02:51:31 +0100 Subject: [PATCH 041/118] nngraph: test empty graph --- pygsp/graphs/nngraphs/nngraph.py | 10 +++++----- pygsp/tests/test_graphs.py | 13 ++++++++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index e0f775cd..15ee63c9 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -298,11 +298,11 @@ def __init__(self, features, standardize=False, if _nn_functions[kind].get(backend) is None: raise ValueError('{} does not support kind="{}".'.format( backend, kind)) - if kind == 'knn' and k >= n_vertices: - raise ValueError('The number of neighbors (k={}) must be smaller ' - 'than the number of vertices ({}).'.format( - k, n_vertices)) - if kind == 'radius' and radius is not None and radius <= 0: + if not (1 <= k < n_vertices): + raise ValueError('The number of neighbors (k={}) must be greater ' + 'than 0 and smaller than the number of vertices ' + '({}).'.format(k, n_vertices)) + if (radius is not None) and (radius <= 0): raise ValueError('The radius must be greater than 0.') if standardize: diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 25ef4077..42e1235c 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -381,10 +381,21 @@ def test_nngraph(self, n_vertices=30): self.assertRaises(ValueError, graphs.NNGraph, features, backend='invalid') self.assertRaises(ValueError, graphs.NNGraph, features, - kind='knn', k=n_vertices+1) + kind='knn', k=0) + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='knn', k=n_vertices) self.assertRaises(ValueError, graphs.NNGraph, features, kind='radius', radius=0) + # Empty graph. + for backend in backends: + if backend == 'nmslib': + continue + with self.assertLogs(level='WARNING'): + graph = graphs.NNGraph(features, kind='radius', radius=1e-9, + backend=backend) + self.assertEqual(graph.n_edges, 0) + def test_nngraph_consistency(self): features = np.arange(90).reshape(30, 3) metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] From 57ce98c52ef8519627d0192c209239c89a13237c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Sat, 16 Feb 2019 03:10:09 +0100 Subject: [PATCH 042/118] no assertLogs in python 2.7 --- pygsp/tests/test_graphs.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 42e1235c..6f863bbb 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -7,7 +7,9 @@ from __future__ import division +import sys import unittest + import numpy as np import scipy.linalg from scipy import sparse @@ -388,13 +390,14 @@ def test_nngraph(self, n_vertices=30): kind='radius', radius=0) # Empty graph. - for backend in backends: - if backend == 'nmslib': - continue - with self.assertLogs(level='WARNING'): - graph = graphs.NNGraph(features, kind='radius', radius=1e-9, - backend=backend) - self.assertEqual(graph.n_edges, 0) + if sys.version_info > (3, 4): # no assertLogs in python 2.7 + for backend in backends: + if backend == 'nmslib': + continue + with self.assertLogs(level='WARNING'): + graph = graphs.NNGraph(features, kind='radius', + radius=1e-9, backend=backend) + self.assertEqual(graph.n_edges, 0) def test_nngraph_consistency(self): features = np.arange(90).reshape(30, 3) From 695272b1b73e2f55b0f31fc351121d82470af576 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Sat, 16 Feb 2019 03:03:14 +0100 Subject: [PATCH 043/118] nngraph: fix symmetrization Symmetrizing with the average was setting the distance between v1 and v2 as the average between 0 and the true distance if v1 was the k-NN of v2 but v2 was not in the k-NN of v1. Moreover, symmetrizing before taking the mean assures that every distance is counted twice (only some would be counted twice otherwise). --- doc/tutorials/optimization.rst | 4 ++-- pygsp/filters/filter.py | 4 ++-- pygsp/graphs/fourier.py | 2 +- pygsp/graphs/nngraphs/nngraph.py | 8 ++++---- pygsp/tests/test_filters.py | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/tutorials/optimization.rst b/doc/tutorials/optimization.rst index 7127a9c5..dd6d91ad 100644 --- a/doc/tutorials/optimization.rst +++ b/doc/tutorials/optimization.rst @@ -85,7 +85,7 @@ We start with the graph TV regularization. We will use the :class:`pyunlocbox.so >>> prob1 = pyunlocbox.solvers.solve([d, r, f], solver=solver, ... x0=x0, rtol=0, maxit=1000) Solution found after 1000 iterations: - objective function f(sol) = 2.250584e+02 + objective function f(sol) = 2.138668e+02 stopping criterion: MAXIT >>> >>> fig, ax = G.plot(prob1['sol']) @@ -107,7 +107,7 @@ This figure shows the label signal recovered by graph total variation regulariza >>> prob2 = pyunlocbox.solvers.solve([r, f], solver=solver, ... x0=x0, rtol=0, maxit=1000) Solution found after 1000 iterations: - objective function f(sol) = 6.504290e+01 + objective function f(sol) = 7.413918e+01 stopping criterion: MAXIT >>> >>> fig, ax = G.plot(prob2['sol']) diff --git a/pygsp/filters/filter.py b/pygsp/filters/filter.py index 8fbf44d7..56d5508d 100644 --- a/pygsp/filters/filter.py +++ b/pygsp/filters/filter.py @@ -254,7 +254,7 @@ def filter(self, s, method='chebyshev', order=30): >>> _ = G.plot(s1, ax=axes[0]) >>> _ = G.plot(s2, ax=axes[1]) >>> print('{:.5f}'.format(np.linalg.norm(s1 - s2))) - 0.26808 + 0.28049 Perfect reconstruction with Itersine, a tight frame: @@ -449,7 +449,7 @@ def estimate_frame_bounds(self, x=None): A=1.708, B=2.359 >>> A, B = g.estimate_frame_bounds(G.e) >>> print('A={:.3f}, B={:.3f}'.format(A, B)) - A=1.723, B=2.359 + A=1.712, B=2.348 The frame bounds can be seen in the plot of the filter bank as the minimum and maximum of their squared sum (the black curve): diff --git a/pygsp/graphs/fourier.py b/pygsp/graphs/fourier.py index 80f55769..286b3252 100644 --- a/pygsp/graphs/fourier.py +++ b/pygsp/graphs/fourier.py @@ -78,7 +78,7 @@ def coherence(self): >>> graph.compute_fourier_basis() >>> minimum = 1 / np.sqrt(graph.n_vertices) >>> print('{:.2f} in [{:.2f}, 1]'.format(graph.coherence, minimum)) - 0.88 in [0.12, 1] + 0.93 in [0.12, 1] >>> >>> # Plot the most localized eigenvector. >>> import matplotlib.pyplot as plt diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 15ee63c9..225f7e6c 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -340,6 +340,10 @@ def __init__(self, features, standardize=False, start = end W = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices)) + # Enforce symmetry. May have been broken by k-NN. Checking symmetry + # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. + W = utils.symmetrize(W, method='fill') + if kernel_width is None: kernel_width = np.mean(W.data) if W.nnz > 0 else np.nan # Alternative: kernel_width = radius / 2 or radius / np.log(2). @@ -350,10 +354,6 @@ def kernel(distance, width): W.data = kernel(W.data, kernel_width) - # Enforce symmetry. May have been broken by k-NN. Checking symmetry - # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. - W = utils.symmetrize(W, method='average') - # features is stored in coords, potentially standardized self.standardize = standardize self.metric = metric diff --git a/pygsp/tests/test_filters.py b/pygsp/tests/test_filters.py index 0bf066f5..fe093da0 100644 --- a/pygsp/tests/test_filters.py +++ b/pygsp/tests/test_filters.py @@ -228,7 +228,7 @@ def test_modulation_gabor(self): f2 = filters.Gabor(self._G, f) s1 = f1.filter(self._signal) s2 = f2.filter(self._signal) - np.testing.assert_allclose(s1, -s2, atol=1e-5) + np.testing.assert_allclose(s1, s2, atol=1e-5) def test_halfcosine(self): f = filters.HalfCosine(self._G, Nf=4) From 505e456121fe82d6774727413a764e9d3733366f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Sat, 16 Feb 2019 03:15:15 +0100 Subject: [PATCH 044/118] nngraph: fix radius cKDTree (PR #21) --- pygsp/graphs/nngraphs/nngraph.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 225f7e6c..3a5a4d2a 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -107,19 +107,16 @@ def _radius_sp_kdtree(features, radius, metric, order): def _radius_sp_ckdtree(features, radius, metric, order): p = order if metric == 'minkowski' else _metrics['scipy-ckdtree'][metric] n_vertices, _ = features.shape - kdt = spatial.cKDTree(features) - nn = kdt.query_ball_point(features, r=radius, p=p, n_jobs=-1) - D = [] - NN = [] - for k in range(n_vertices): - x = np.tile(features[k, :], (len(nn[k]), 1)) - d = np.linalg.norm(x - features[nn[k], :], - ord=_metrics['scipy-ckdtree'][metric], - axis=1) - nidx = d.argsort() - NN.append(np.take(nn[k], nidx)) - D.append(np.sort(d)) - return NN, D + tree = spatial.cKDTree(features) + D, NN = tree.query(features, k=n_vertices, distance_upper_bound=radius, + p=p, n_jobs=-1) + distances = [] + neighbors = [] + for d, n in zip(D, NN): + mask = (d != np.inf) + distances.append(d[mask]) + neighbors.append(n[mask]) + return neighbors, distances def _knn_sp_pdist(features, num_neighbors, metric, order): From 204ad1976ad532cdbe6e5aabb2c487840b98ebdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Tue, 19 Feb 2019 14:48:59 +0100 Subject: [PATCH 045/118] compact code --- pygsp/tests/test_graphs.py | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 6f863bbb..5c97a2f6 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -351,7 +351,8 @@ def test_set_coordinates(self): def test_nngraph(self, n_vertices=30): """Test all the combinations of metric, kind, backend.""" - features = np.random.RandomState(42).normal(size=(n_vertices, 3)) + Graph = graphs.NNGraph + data = np.random.RandomState(42).normal(size=(n_vertices, 3)) metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib', 'flann'] @@ -361,42 +362,35 @@ def test_nngraph(self, n_vertices=30): for kind in ['knn', 'radius']: # Unsupported combinations. if backend == 'flann' and metric == 'max_dist': - self.assertRaises(ValueError, graphs.NNGraph, features, + self.assertRaises(ValueError, Graph, data, metric=metric, backend=backend) elif backend == 'nmslib' and metric == 'minkowski': - self.assertRaises(ValueError, graphs.NNGraph, features, + self.assertRaises(ValueError, Graph, data, metric=metric, backend=backend) elif backend == 'nmslib' and kind == 'radius': - self.assertRaises(ValueError, graphs.NNGraph, features, + self.assertRaises(ValueError, Graph, data, kind=kind, backend=backend) else: for standardize in [True, False]: - graphs.NNGraph(features, standardize=standardize, - metric=metric, kind=kind, - backend=backend) + Graph(data, standardize=standardize, metric=metric, + kind=kind, backend=backend) # Invalid parameters. - self.assertRaises(ValueError, graphs.NNGraph, features, - metric='invalid') - self.assertRaises(ValueError, graphs.NNGraph, features, - kind='invalid') - self.assertRaises(ValueError, graphs.NNGraph, features, - backend='invalid') - self.assertRaises(ValueError, graphs.NNGraph, features, - kind='knn', k=0) - self.assertRaises(ValueError, graphs.NNGraph, features, - kind='knn', k=n_vertices) - self.assertRaises(ValueError, graphs.NNGraph, features, - kind='radius', radius=0) + self.assertRaises(ValueError, Graph, data, metric='invalid') + self.assertRaises(ValueError, Graph, data, kind='invalid') + self.assertRaises(ValueError, Graph, data, backend='invalid') + self.assertRaises(ValueError, Graph, data, kind='knn', k=0) + self.assertRaises(ValueError, Graph, data, kind='knn', k=n_vertices) + self.assertRaises(ValueError, Graph, data, kind='radius', radius=0) # Empty graph. if sys.version_info > (3, 4): # no assertLogs in python 2.7 for backend in backends: if backend == 'nmslib': - continue + continue # nmslib doesn't support radius with self.assertLogs(level='WARNING'): - graph = graphs.NNGraph(features, kind='radius', - radius=1e-9, backend=backend) + graph = Graph(data, kind='radius', radius=1e-9, + backend=backend) self.assertEqual(graph.n_edges, 0) def test_nngraph_consistency(self): From 2b253375de2e37490624e17539da1cdc9c8eb8c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Tue, 19 Feb 2019 15:00:08 +0100 Subject: [PATCH 046/118] NNGraph: allow user to pass parameters to backends --- pygsp/graphs/nngraphs/nngraph.py | 96 +++++++++++++++++++------------- pygsp/tests/test_graphs.py | 13 +++++ 2 files changed, 71 insertions(+), 38 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 3a5a4d2a..0bbeea43 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -68,48 +68,53 @@ def _import_nmslib(): return nms -def _knn_sp_kdtree(features, num_neighbors, metric, order): +def _knn_sp_kdtree(features, k, metric, order, params): p = order if metric == 'minkowski' else _metrics['scipy-kdtree'][metric] - kdt = spatial.KDTree(features) - D, NN = kdt.query(features, k=(num_neighbors + 1), p=p) - return NN, D + eps = params.pop('eps', 0) + tree = spatial.KDTree(features, **params) + distances, neighbors = tree.query(features, k=k+1, p=p, eps=eps) + return neighbors, distances -def _knn_sp_ckdtree(features, num_neighbors, metric, order): +def _knn_sp_ckdtree(features, k, metric, order, params): p = order if metric == 'minkowski' else _metrics['scipy-ckdtree'][metric] - kdt = spatial.cKDTree(features) - D, NN = kdt.query(features, k=(num_neighbors + 1), p=p, n_jobs=-1) - return NN, D + eps = params.pop('eps', 0) + kdt = spatial.cKDTree(features, **params) + distances, neighbors = kdt.query(features, k=k+1, p=p, eps=eps, n_jobs=-1) + return neighbors, distances -def _knn_flann(features, num_neighbors, metric, order): +def _knn_flann(features, k, metric, order, params): cfl = _import_cfl() cfl.set_distance_type(metric, order=order) - c = cfl.FLANNIndex(algorithm='kdtree') - c.build_index(features) + index = cfl.FLANNIndex() + index.build_index(features, **params) # Default FLANN parameters (I tried changing the algorithm and # testing performance on huge matrices, but the default one # seems to work best). - NN, D = c.nn_index(features, num_neighbors + 1) - c.free_index() + neighbors, distances = index.nn_index(features, k+1) + index.free_index() if metric == 'euclidean': # flann returns squared distances - return NN, np.sqrt(D) - return NN, D + np.sqrt(distances, out=distances) + return neighbors, distances -def _radius_sp_kdtree(features, radius, metric, order): +def _radius_sp_kdtree(features, radius, metric, order, params): p = order if metric == 'minkowski' else _metrics['scipy-kdtree'][metric] - kdt = spatial.KDTree(features) - D, NN = kdt.query(features, k=None, distance_upper_bound=radius, p=p) - return NN, D + eps = params.pop('eps', 0) + tree = spatial.KDTree(features, **params) + distances, neighbors = tree.query(features, p=p, eps=eps, k=None, + distance_upper_bound=radius) + return neighbors, distances -def _radius_sp_ckdtree(features, radius, metric, order): +def _radius_sp_ckdtree(features, radius, metric, order, params): p = order if metric == 'minkowski' else _metrics['scipy-ckdtree'][metric] n_vertices, _ = features.shape - tree = spatial.cKDTree(features) + eps = params.pop('eps', 0) + tree = spatial.cKDTree(features, **params) D, NN = tree.query(features, k=n_vertices, distance_upper_bound=radius, - p=p, n_jobs=-1) + p=p, eps=eps, n_jobs=-1) distances = [] neighbors = [] for d, n in zip(D, NN): @@ -119,7 +124,7 @@ def _radius_sp_ckdtree(features, radius, metric, order): return neighbors, distances -def _knn_sp_pdist(features, num_neighbors, metric, order): +def _knn_sp_pdist(features, num_neighbors, metric, order, _): if metric == 'minkowski': p = spatial.distance.pdist(features, metric=_metrics['scipy-pdist'][metric], @@ -133,22 +138,26 @@ def _knn_sp_pdist(features, num_neighbors, metric, order): return pdi, pds -def _knn_nmslib(features, num_neighbors, metric, _): +def _knn_nmslib(features, num_neighbors, metric, _, params): n_vertices, _ = features.shape ncpu = multiprocessing.cpu_count() nms = _import_nmslib() - nmsidx = nms.init(space=_metrics['nmslib'][metric]) - nmsidx.addDataPointBatch(features) - nmsidx.createIndex() - q = nmsidx.knnQueryBatch(features, k=num_neighbors+1, - num_threads=int(ncpu/2)) + params_index = params.pop('index', None) + params_query = params.pop('query', None) + index = nms.init(space=_metrics['nmslib'][metric], **params) + index.addDataPointBatch(features) + index.createIndex(params_index) + if params_query is not None: + index.setQueryTimeParams(params_query) + q = index.knnQueryBatch(features, k=num_neighbors+1, + num_threads=int(ncpu/2)) nn, d = zip(*q) D = np.concatenate(d).reshape(n_vertices, num_neighbors+1) NN = np.concatenate(nn).reshape(n_vertices, num_neighbors+1) return NN, D -def _radius_sp_pdist(features, radius, metric, order): +def _radius_sp_pdist(features, radius, metric, order, _): n_vertices, _ = features.shape if metric == 'minkowski': p = spatial.distance.pdist(features, @@ -170,19 +179,19 @@ def _radius_sp_pdist(features, radius, metric, order): return NN, D -def _radius_flann(features, radius, metric, order): +def _radius_flann(features, radius, metric, order, params): n_vertices, _ = features.shape cfl = _import_cfl() cfl.set_distance_type(metric, order=order) - c = cfl.FLANNIndex(algorithm='kdtree') - c.build_index(features) + index = cfl.FLANNIndex() + index.build_index(features, **params) D = [] NN = [] for k in range(n_vertices): - nn, d = c.nn_radius(features[k, :], radius**2) + nn, d = index.nn_radius(features[k, :], radius**2) D.append(d) NN.append(nn) - c.free_index() + index.free_index() if metric == 'euclidean': # Flann returns squared distances. D = list(map(np.sqrt, D)) @@ -263,6 +272,10 @@ class NNGraph(Graph): `_. That method is an approximation. It should be the fastest in high-dimensional spaces. + kwargs : dict + Parameters to be passed to the :class:`Graph` constructor or the + backend library. + Examples -------- >>> import matplotlib.pyplot as plt @@ -283,6 +296,13 @@ def __init__(self, features, standardize=False, n_vertices, _ = features.shape + params_graph = dict() + for key in ['lap_type', 'plotting']: + try: + params_graph[key] = kwargs.pop(key) + except KeyError: + pass + if _metrics['scipy-pdist'].get(metric) is None: raise ValueError('Invalid metric "{}".'.format(metric)) if _nn_functions.get(kind) is None: @@ -309,9 +329,9 @@ def __init__(self, features, standardize=False, nn_function = _nn_functions[kind][backend] if kind == 'knn': - neighbors, distances = nn_function(features, k, metric, order) + neighbors, distances = nn_function(features, k, metric, order, kwargs) elif kind == 'radius': - neighbors, distances = nn_function(features, radius, metric, order) + neighbors, distances = nn_function(features, radius, metric, order, kwargs) n_edges = [len(n) - 1 for n in neighbors] # remove distance to self @@ -361,7 +381,7 @@ def kernel(distance, width): self.k = k self.backend = backend - super(NNGraph, self).__init__(W=W, coords=features, **kwargs) + super(NNGraph, self).__init__(W=W, coords=features, **params_graph) def _get_extra_repr(self): return { diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 5c97a2f6..c0142317 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -393,6 +393,19 @@ def test_nngraph(self, n_vertices=30): backend=backend) self.assertEqual(graph.n_edges, 0) + # Backend parameters. + Graph(data, lap_type='normalized') + Graph(data, plotting=dict(vertex_size=10)) + Graph(data, backend='flann', kind='knn', algorithm='kmeans') + Graph(data, backend='flann', kind='radius', random_seed=0) + Graph(data, backend='nmslib', method='vptree') + Graph(data, backend='nmslib', index=dict(post=2)) + Graph(data, backend='nmslib', query=dict(efSearch=10)) + for backend in ['scipy-kdtree', 'scipy-ckdtree']: + for kind in ['knn', 'radius']: + Graph(data, backend=backend, kind=kind, eps=1e-2) + Graph(data, backend=backend, kind=kind, leafsize=9) + def test_nngraph_consistency(self): features = np.arange(90).reshape(30, 3) metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] From 8cc3539d920a7a97c9a4050e8ff6eca402321642 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Tue, 19 Feb 2019 15:16:02 +0100 Subject: [PATCH 047/118] fix flann distances --- pygsp/graphs/nngraphs/nngraph.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 0bbeea43..bcaf7ffe 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -94,8 +94,10 @@ def _knn_flann(features, k, metric, order, params): # seems to work best). neighbors, distances = index.nn_index(features, k+1) index.free_index() - if metric == 'euclidean': # flann returns squared distances + if metric == 'euclidean': np.sqrt(distances, out=distances) + elif metric == 'minkowski': + np.power(distances, 1/order, out=distances) return neighbors, distances @@ -185,17 +187,22 @@ def _radius_flann(features, radius, metric, order, params): cfl.set_distance_type(metric, order=order) index = cfl.FLANNIndex() index.build_index(features, **params) - D = [] - NN = [] - for k in range(n_vertices): - nn, d = index.nn_radius(features[k, :], radius**2) - D.append(d) - NN.append(nn) + distances = [] + neighbors = [] + if metric == 'euclidean': + radius = radius**2 + elif metric == 'minkowski': + radius = radius**order + for vertex in range(n_vertices): + neighbor, distance = index.nn_radius(features[vertex, :], radius) + distances.append(distance) + neighbors.append(neighbor) index.free_index() if metric == 'euclidean': - # Flann returns squared distances. - D = list(map(np.sqrt, D)) - return NN, D + distances = list(map(np.sqrt, distances)) + elif metric == 'minkowski': + distances = list(map(lambda d: np.power(d, 1/order), distances)) + return neighbors, distances _nn_functions = { From ebc5c054ae36684a032374eaf3b83b1db582077a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Tue, 19 Feb 2019 15:35:07 +0100 Subject: [PATCH 048/118] NNGraph: test consistency across backends --- pygsp/tests/test_graphs.py | 65 ++++++++++++-------------------------- 1 file changed, 20 insertions(+), 45 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index c0142317..14291294 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -349,17 +349,19 @@ def test_set_coordinates(self): G.set_coordinates('community2D') self.assertRaises(ValueError, G.set_coordinates, 'invalid') - def test_nngraph(self, n_vertices=30): + def test_nngraph(self, n_vertices=25): """Test all the combinations of metric, kind, backend.""" Graph = graphs.NNGraph - data = np.random.RandomState(42).normal(size=(n_vertices, 3)) + data = np.random.RandomState(42).uniform(size=(n_vertices, 3)) metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib', - 'flann'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib'] - for backend in backends: - for metric in metrics: - for kind in ['knn', 'radius']: + for metric in metrics: + for kind in ['knn', 'radius']: + params = dict(features=data, metric=metric, kind=kind, + radius=0.8) + ref = Graph(**params, backend='scipy-pdist') + for backend in backends: # Unsupported combinations. if backend == 'flann' and metric == 'max_dist': self.assertRaises(ValueError, Graph, data, @@ -371,9 +373,16 @@ def test_nngraph(self, n_vertices=30): self.assertRaises(ValueError, Graph, data, kind=kind, backend=backend) else: - for standardize in [True, False]: - Graph(data, standardize=standardize, metric=metric, - kind=kind, backend=backend) + params['backend'] = backend + if backend == 'flann': + graph = Graph(**params, random_seed=42) + else: + graph = Graph(**params) + np.testing.assert_allclose(graph.W.toarray(), + ref.W.toarray(), rtol=1e-5) + + Graph(data, standardize=False) + Graph(data, standardize=True) # Invalid parameters. self.assertRaises(ValueError, Graph, data, metric='invalid') @@ -385,7 +394,7 @@ def test_nngraph(self, n_vertices=30): # Empty graph. if sys.version_info > (3, 4): # no assertLogs in python 2.7 - for backend in backends: + for backend in backends + ['scipy-pdist']: if backend == 'nmslib': continue # nmslib doesn't support radius with self.assertLogs(level='WARNING'): @@ -406,40 +415,6 @@ def test_nngraph(self, n_vertices=30): Graph(data, backend=backend, kind=kind, eps=1e-2) Graph(data, backend=backend, kind=kind, leafsize=9) - def test_nngraph_consistency(self): - features = np.arange(90).reshape(30, 3) - metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib'] - num_neighbors = 4 - radius = 0.1 - - G = graphs.NNGraph(features, kind='knn', - backend='scipy-pdist', k=num_neighbors) - for backend in backends: - for metric in metrics: - if backend == 'flann' and metric == 'max_dist': - continue - if backend == 'nmslib' and metric == 'minkowski': - continue - Gt = graphs.NNGraph(features, kind='knn', - backend=backend, k=num_neighbors) - d = sparse.linalg.norm(G.W - Gt.W) - self.assertTrue(d < 0.01, 'Graphs (knn {}/{}) are not identical error={}'.format(backend, metric, d)) - - G = graphs.NNGraph(features, kind='radius', - backend='scipy-pdist', radius=radius) - for backend in backends: - for metric in metrics: - if backend == 'flann' and metric == 'max_dist': - continue - if backend == 'nmslib': - continue - Gt = graphs.NNGraph(features, kind='radius', - backend=backend, radius=radius) - d = sparse.linalg.norm(G.W - Gt.W, ord=1) - self.assertTrue(d < 0.01, - 'Graphs (radius {}/{}) are not identical error={}'.format(backend, metric, d)) - def test_bunny(self): graphs.Bunny() From 1167f52cb5bf4e80a5042df437f5705c37b8a771 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Tue, 19 Feb 2019 18:53:08 +0100 Subject: [PATCH 049/118] python 2.7 dict unpacking --- pygsp/tests/test_graphs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 14291294..295395a2 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -360,7 +360,7 @@ def test_nngraph(self, n_vertices=25): for kind in ['knn', 'radius']: params = dict(features=data, metric=metric, kind=kind, radius=0.8) - ref = Graph(**params, backend='scipy-pdist') + ref = Graph(backend='scipy-pdist', **params) for backend in backends: # Unsupported combinations. if backend == 'flann' and metric == 'max_dist': @@ -375,7 +375,7 @@ def test_nngraph(self, n_vertices=25): else: params['backend'] = backend if backend == 'flann': - graph = Graph(**params, random_seed=42) + graph = Graph(random_seed=42, **params) else: graph = Graph(**params) np.testing.assert_allclose(graph.W.toarray(), From 3638cfdfaeaedbe4873152fb1958feef56e5a408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Wed, 20 Feb 2019 00:57:02 +0100 Subject: [PATCH 050/118] pdist accepts no parameters --- pygsp/graphs/nngraphs/nngraph.py | 4 +++- pygsp/tests/test_graphs.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index bcaf7ffe..150fd0c9 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -126,7 +126,9 @@ def _radius_sp_ckdtree(features, radius, metric, order, params): return neighbors, distances -def _knn_sp_pdist(features, num_neighbors, metric, order, _): +def _knn_sp_pdist(features, num_neighbors, metric, order, params): + if params: + raise ValueError('unexpected parameters {}'.format(params)) if metric == 'minkowski': p = spatial.distance.pdist(features, metric=_metrics['scipy-pdist'][metric], diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 295395a2..c7568acd 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -414,6 +414,7 @@ def test_nngraph(self, n_vertices=25): for kind in ['knn', 'radius']: Graph(data, backend=backend, kind=kind, eps=1e-2) Graph(data, backend=backend, kind=kind, leafsize=9) + self.assertRaises(ValueError, Graph, data, backend='scipy-pdist', a=0) def test_bunny(self): graphs.Bunny() From 9b663aada4bfa293c4372d33cf82d401088910c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Wed, 20 Feb 2019 01:37:48 +0100 Subject: [PATCH 051/118] NNGraph: test distance on a circle --- pygsp/tests/test_graphs.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index c7568acd..32621016 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -381,6 +381,24 @@ def test_nngraph(self, n_vertices=25): np.testing.assert_allclose(graph.W.toarray(), ref.W.toarray(), rtol=1e-5) + # Distances between points on a circle. + angles = [0, 2 * np.pi / n_vertices] + points = np.stack([np.cos(angles), np.sin(angles)], axis=1) + distance = np.linalg.norm(points[0] - points[1]) + weight = np.exp(-distance**2) + column = np.zeros(n_vertices) + column[1] = weight + column[-1] = weight + adjacency = scipy.linalg.circulant(column) + for kind in ['knn', 'radius']: + for backend in backends + ['scipy-pdist']: + if backend == 'nmslib' and kind == 'radius': + continue # unsupported + data = graphs.Ring(n_vertices).coords + graph = Graph(data, kind=kind, k=2, radius=1.01*distance, + kernel_width=1, backend=backend) + np.testing.assert_allclose(graph.W.toarray(), adjacency) + Graph(data, standardize=False) Graph(data, standardize=True) From 4af4118940ff7e01a5a2510caf72ca4f942c28e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Wed, 20 Feb 2019 01:55:31 +0100 Subject: [PATCH 052/118] NNGraph pdist: don't sort twice --- pygsp/graphs/nngraphs/nngraph.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 150fd0c9..c84d2a59 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -126,20 +126,18 @@ def _radius_sp_ckdtree(features, radius, metric, order, params): return neighbors, distances -def _knn_sp_pdist(features, num_neighbors, metric, order, params): +def _knn_sp_pdist(features, k, metric, order, params): if params: raise ValueError('unexpected parameters {}'.format(params)) if metric == 'minkowski': - p = spatial.distance.pdist(features, - metric=_metrics['scipy-pdist'][metric], - p=order) + distances = spatial.distance.pdist(features, metric='minkowski', p=order) else: - p = spatial.distance.pdist(features, - metric=_metrics['scipy-pdist'][metric]) - pd = spatial.distance.squareform(p) - pds = np.sort(pd)[:, :num_neighbors+1] - pdi = pd.argsort()[:, :num_neighbors+1] - return pdi, pds + distances = spatial.distance.pdist( + features, metric=_metrics['scipy-pdist'][metric]) + distances = spatial.distance.squareform(distances) + neighbors = np.argsort(distances)[:, :k+1] + distances = np.take_along_axis(distances, neighbors, axis=-1) + return neighbors, distances def _knn_nmslib(features, num_neighbors, metric, _, params): From 624af23a4e32fd56f38c9121d6c3af7e5e951a78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Wed, 20 Feb 2019 04:26:45 +0100 Subject: [PATCH 053/118] NNGraph: fuse knn and radius implementations --- pygsp/graphs/nngraphs/nngraph.py | 315 ++++++++++++------------------- pygsp/tests/test_graphs.py | 8 +- 2 files changed, 124 insertions(+), 199 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index c84d2a59..c980283d 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -12,216 +12,150 @@ from pygsp.graphs import Graph # prevent circular import in Python < 3.5 -# conversion between the FLANN conventions and the various backend functions -_metrics = { - 'scipy-pdist': { - 'euclidean': 'euclidean', - 'manhattan': 'cityblock', - 'max_dist': 'chebyshev', - 'minkowski': 'minkowski', - }, - 'scipy-kdtree': { +def _metric_kdtree(metric, order): + _metrics = { 'euclidean': 2, 'manhattan': 1, 'max_dist': np.inf, - 'minkowski': 0, - }, - 'scipy-ckdtree': { - 'euclidean': 2, - 'manhattan': 1, - 'max_dist': np.inf, - 'minkowski': 0, - }, - 'flann': { - 'euclidean': 'euclidean', - 'manhattan': 'manhattan', -# 'max_dist': 'max_dist', # produces incorrect results - 'minkowski': 'minkowski', - }, - 'nmslib': { - 'euclidean': 'l2', - 'manhattan': 'l1', - 'max_dist': 'linf', -# 'minkowski': 'lp', # unsupported + 'minkowski': order, } -} - - -def _import_cfl(): try: - import cyflann as cfl - except Exception as e: - raise ImportError('Cannot import cyflann. Choose another nearest ' - 'neighbors method or try to install it with ' - 'pip (or conda) install cyflann. ' - 'Original exception: {}'.format(e)) - return cfl + return _metrics[metric] + except KeyError: + raise ValueError('unknown metric {} for scipy-kdtree'.format(metric)) -def _import_nmslib(): - try: - import nmslib as nms - except Exception: - raise ImportError('Cannot import nmslib. Choose another nearest ' - 'neighbors method or try to install it with ' - 'pip (or conda) install nmslib.') - return nms +def _scipy_pdist(features, metric, order, kind, k, radius, params): + if params: + raise ValueError('unexpected parameters {}'.format(params)) + metric = 'cityblock' if metric == 'manhattan' else metric + metric = 'chebyshev' if metric == 'max_dist' else metric + params = dict(metric=metric) + if metric == 'minkowski': + params['p'] = order + dist = spatial.distance.pdist(features, **params) + dist = spatial.distance.squareform(dist) + if kind == 'knn': + neighbors = np.argsort(dist)[:, :k+1] + distances = np.take_along_axis(dist, neighbors, axis=-1) + elif kind == 'radius': + distances = [] + neighbors = [] + for distance in dist: + neighbor = np.flatnonzero(distance < radius) + neighbors.append(neighbor) + distances.append(distance[neighbor]) + return neighbors, distances -def _knn_sp_kdtree(features, k, metric, order, params): - p = order if metric == 'minkowski' else _metrics['scipy-kdtree'][metric] +def _scipy_kdtree(features, metric, order, kind, k, radius, params): + metric = _metric_kdtree(metric, order) eps = params.pop('eps', 0) tree = spatial.KDTree(features, **params) - distances, neighbors = tree.query(features, k=k+1, p=p, eps=eps) + params = dict(p=metric, eps=eps) + if kind == 'knn': + params['k'] = k + 1 + elif kind == 'radius': + params['k'] = None + params['distance_upper_bound'] = radius + distances, neighbors = tree.query(features, **params) return neighbors, distances -def _knn_sp_ckdtree(features, k, metric, order, params): - p = order if metric == 'minkowski' else _metrics['scipy-ckdtree'][metric] +def _scipy_ckdtree(features, metric, order, kind, k, radius, params): + metric = _metric_kdtree(metric, order) eps = params.pop('eps', 0) - kdt = spatial.cKDTree(features, **params) - distances, neighbors = kdt.query(features, k=k+1, p=p, eps=eps, n_jobs=-1) - return neighbors, distances - - -def _knn_flann(features, k, metric, order, params): - cfl = _import_cfl() + tree = spatial.cKDTree(features, **params) + params = dict(p=metric, eps=eps, n_jobs=-1) + if kind == 'knn': + params['k'] = k + 1 + elif kind == 'radius': + params['k'] = features.shape[0] # number of vertices + params['distance_upper_bound'] = radius + distances, neighbors = tree.query(features, **params) + if kind == 'knn': + return neighbors, distances + elif kind == 'radius': + dist = [] + neigh = [] + for distance, neighbor in zip(distances, neighbors): + mask = (distance != np.inf) + dist.append(distance[mask]) + neigh.append(neighbor[mask]) + return neigh, dist + + +def _flann(features, metric, order, kind, k, radius, params): + if metric == 'max_dist': + raise ValueError('flann gives wrong results for metric="max_dist".') + try: + import cyflann as cfl + except Exception as e: + raise ImportError('Cannot import cyflann. Choose another nearest ' + 'neighbors backend or try to install it with ' + 'pip (or conda) install cyflann. ' + 'Original exception: {}'.format(e)) cfl.set_distance_type(metric, order=order) index = cfl.FLANNIndex() index.build_index(features, **params) - # Default FLANN parameters (I tried changing the algorithm and - # testing performance on huge matrices, but the default one - # seems to work best). - neighbors, distances = index.nn_index(features, k+1) + # I tried changing the algorithm and testing performance on huge matrices, + # but the default parameters seems to work best. + if kind == 'knn': + neighbors, distances = index.nn_index(features, k+1) + if metric == 'euclidean': + np.sqrt(distances, out=distances) + elif metric == 'minkowski': + np.power(distances, 1/order, out=distances) + elif kind == 'radius': + distances = [] + neighbors = [] + if metric == 'euclidean': + radius = radius**2 + elif metric == 'minkowski': + radius = radius**order + n_vertices, _ = features.shape + for vertex in range(n_vertices): + neighbor, distance = index.nn_radius(features[vertex, :], radius) + distances.append(distance) + neighbors.append(neighbor) + if metric == 'euclidean': + distances = list(map(np.sqrt, distances)) + elif metric == 'minkowski': + distances = list(map(lambda d: np.power(d, 1/order), distances)) index.free_index() - if metric == 'euclidean': - np.sqrt(distances, out=distances) - elif metric == 'minkowski': - np.power(distances, 1/order, out=distances) - return neighbors, distances - - -def _radius_sp_kdtree(features, radius, metric, order, params): - p = order if metric == 'minkowski' else _metrics['scipy-kdtree'][metric] - eps = params.pop('eps', 0) - tree = spatial.KDTree(features, **params) - distances, neighbors = tree.query(features, p=p, eps=eps, k=None, - distance_upper_bound=radius) - return neighbors, distances - - -def _radius_sp_ckdtree(features, radius, metric, order, params): - p = order if metric == 'minkowski' else _metrics['scipy-ckdtree'][metric] - n_vertices, _ = features.shape - eps = params.pop('eps', 0) - tree = spatial.cKDTree(features, **params) - D, NN = tree.query(features, k=n_vertices, distance_upper_bound=radius, - p=p, eps=eps, n_jobs=-1) - distances = [] - neighbors = [] - for d, n in zip(D, NN): - mask = (d != np.inf) - distances.append(d[mask]) - neighbors.append(n[mask]) return neighbors, distances -def _knn_sp_pdist(features, k, metric, order, params): - if params: - raise ValueError('unexpected parameters {}'.format(params)) +def _nmslib(features, metric, order, kind, k, _, params): + if kind == 'radius': + raise ValueError('nmslib does not support kind="radius".') if metric == 'minkowski': - distances = spatial.distance.pdist(features, metric='minkowski', p=order) - else: - distances = spatial.distance.pdist( - features, metric=_metrics['scipy-pdist'][metric]) - distances = spatial.distance.squareform(distances) - neighbors = np.argsort(distances)[:, :k+1] - distances = np.take_along_axis(distances, neighbors, axis=-1) - return neighbors, distances - - -def _knn_nmslib(features, num_neighbors, metric, _, params): + raise ValueError('nmslib does not support metric="minkowski".') + try: + import nmslib as nms + except Exception: + raise ImportError('Cannot import nmslib. Choose another nearest ' + 'neighbors method or try to install it with ' + 'pip (or conda) install nmslib.') n_vertices, _ = features.shape - ncpu = multiprocessing.cpu_count() - nms = _import_nmslib() params_index = params.pop('index', None) params_query = params.pop('query', None) - index = nms.init(space=_metrics['nmslib'][metric], **params) + metric = 'l2' if metric == 'euclidean' else metric + metric = 'l1' if metric == 'manhattan' else metric + metric = 'linf' if metric == 'max_dist' else metric + index = nms.init(space=metric, **params) index.addDataPointBatch(features) index.createIndex(params_index) if params_query is not None: index.setQueryTimeParams(params_query) - q = index.knnQueryBatch(features, k=num_neighbors+1, - num_threads=int(ncpu/2)) - nn, d = zip(*q) - D = np.concatenate(d).reshape(n_vertices, num_neighbors+1) - NN = np.concatenate(nn).reshape(n_vertices, num_neighbors+1) - return NN, D - - -def _radius_sp_pdist(features, radius, metric, order, _): - n_vertices, _ = features.shape - if metric == 'minkowski': - p = spatial.distance.pdist(features, - metric=_metrics['scipy-pdist'][metric], - p=order) - else: - p = spatial.distance.pdist(features, - metric=_metrics['scipy-pdist'][metric]) - pd = spatial.distance.squareform(p) - pdf = pd < radius - D = [] - NN = [] - for k in range(n_vertices): - v = pd[k, pdf[k, :]] - d = pd[k, :].argsort() - # use the same conventions as in scipy.distance.kdtree - NN.append(d[0:len(v)]) - D.append(np.sort(v)) - return NN, D - - -def _radius_flann(features, radius, metric, order, params): - n_vertices, _ = features.shape - cfl = _import_cfl() - cfl.set_distance_type(metric, order=order) - index = cfl.FLANNIndex() - index.build_index(features, **params) - distances = [] - neighbors = [] - if metric == 'euclidean': - radius = radius**2 - elif metric == 'minkowski': - radius = radius**order - for vertex in range(n_vertices): - neighbor, distance = index.nn_radius(features[vertex, :], radius) - distances.append(distance) - neighbors.append(neighbor) - index.free_index() - if metric == 'euclidean': - distances = list(map(np.sqrt, distances)) - elif metric == 'minkowski': - distances = list(map(lambda d: np.power(d, 1/order), distances)) + ncpu = multiprocessing.cpu_count() + q = index.knnQueryBatch(features, k=k+1, num_threads=int(ncpu/2)) + neighbors, distances = zip(*q) + distances = np.concatenate(distances).reshape(n_vertices, k+1) + neighbors = np.concatenate(neighbors).reshape(n_vertices, k+1) return neighbors, distances -_nn_functions = { - 'knn': { - 'scipy-pdist': _knn_sp_pdist, - 'scipy-kdtree': _knn_sp_kdtree, - 'scipy-ckdtree': _knn_sp_ckdtree, - 'flann': _knn_flann, - 'nmslib': _knn_nmslib, - }, - 'radius': { - 'scipy-pdist': _radius_sp_pdist, - 'scipy-kdtree': _radius_sp_kdtree, - 'scipy-ckdtree': _radius_sp_ckdtree, - 'flann': _radius_flann, - }, -} - - class NNGraph(Graph): r"""Nearest-neighbor graph. @@ -250,6 +184,8 @@ class NNGraph(Graph): and standard deviation of 1 (unit variance). metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional Metric used to compute pairwise distances. + More metrics may be supported for some backends. + Please refer to the documentation of the chosen backend. order : float, optional The order of the Minkowski distance for ``metric='minkowski'``. kind : {'knn', 'radius'}, optional @@ -310,19 +246,9 @@ def __init__(self, features, standardize=False, except KeyError: pass - if _metrics['scipy-pdist'].get(metric) is None: - raise ValueError('Invalid metric "{}".'.format(metric)) - if _nn_functions.get(kind) is None: + if kind not in ['knn', 'radius']: raise ValueError('Invalid kind "{}".'.format(kind)) - if backend not in _metrics.keys(): - raise ValueError('Invalid backend "{}".'.format(backend)) - if _metrics[backend].get(metric) is None: - raise ValueError('{} does not support metric="{}".'.format( - backend, metric)) - if _nn_functions[kind].get(backend) is None: - raise ValueError('{} does not support kind="{}".'.format( - backend, kind)) - if not (1 <= k < n_vertices): + if (kind == 'knn') and not (1 <= k < n_vertices): raise ValueError('The number of neighbors (k={}) must be greater ' 'than 0 and smaller than the number of vertices ' '({}).'.format(k, n_vertices)) @@ -334,11 +260,12 @@ def __init__(self, features, standardize=False, features = features - np.mean(features, axis=0) features /= np.std(features, axis=0) - nn_function = _nn_functions[kind][backend] - if kind == 'knn': - neighbors, distances = nn_function(features, k, metric, order, kwargs) - elif kind == 'radius': - neighbors, distances = nn_function(features, radius, metric, order, kwargs) + try: + function = globals()['_' + backend.replace('-', '_')] + except KeyError: + raise ValueError('Invalid backend "{}".'.format(backend)) + neighbors, distances = function(features, metric, order, + kind, k, radius, kwargs) n_edges = [len(n) - 1 for n in neighbors] # remove distance to self diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 32621016..474efb1e 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -423,15 +423,13 @@ def test_nngraph(self, n_vertices=25): # Backend parameters. Graph(data, lap_type='normalized') Graph(data, plotting=dict(vertex_size=10)) - Graph(data, backend='flann', kind='knn', algorithm='kmeans') - Graph(data, backend='flann', kind='radius', random_seed=0) + Graph(data, backend='flann', algorithm='kmeans') Graph(data, backend='nmslib', method='vptree') Graph(data, backend='nmslib', index=dict(post=2)) Graph(data, backend='nmslib', query=dict(efSearch=10)) for backend in ['scipy-kdtree', 'scipy-ckdtree']: - for kind in ['knn', 'radius']: - Graph(data, backend=backend, kind=kind, eps=1e-2) - Graph(data, backend=backend, kind=kind, leafsize=9) + Graph(data, backend=backend, eps=1e-2) + Graph(data, backend=backend, leafsize=9) self.assertRaises(ValueError, Graph, data, backend='scipy-pdist', a=0) def test_bunny(self): From 043579e34a6199b2d3551dd4091465b8f54f806f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Wed, 20 Feb 2019 05:00:33 +0100 Subject: [PATCH 054/118] nmslib: number of thread is automatically set to max By default, num_threads = 0. https://github.com/nmslib/nmslib/blob/712be85c878512686e3c63e80a766b3d4213cfef/python_bindings/nmslib.cc#L557 Then, numThreads = std::thread::hardware_concurrency() sets it to the number of concurrent threads supported by the implementation. https://github.com/nmslib/nmslib/blob/314f48e0f86efafe9f6ad853236603400030a0ac/similarity_search/include/thread_pool.h#L64 --- pygsp/graphs/nngraphs/nngraph.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index c980283d..457ea44e 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -2,9 +2,6 @@ from __future__ import division -import multiprocessing -import traceback - import numpy as np from scipy import sparse, spatial @@ -148,9 +145,8 @@ def _nmslib(features, metric, order, kind, k, _, params): index.createIndex(params_index) if params_query is not None: index.setQueryTimeParams(params_query) - ncpu = multiprocessing.cpu_count() - q = index.knnQueryBatch(features, k=k+1, num_threads=int(ncpu/2)) - neighbors, distances = zip(*q) + results = index.knnQueryBatch(features, k=k+1) + neighbors, distances = zip(*results) distances = np.concatenate(distances).reshape(n_vertices, k+1) neighbors = np.concatenate(neighbors).reshape(n_vertices, k+1) return neighbors, distances From 1d22376784356066399c4d841e3b884ecaed0f57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Wed, 20 Feb 2019 12:04:49 +0100 Subject: [PATCH 055/118] order consistent with metric --- pygsp/graphs/nngraphs/nngraph.py | 36 +++++++++++++++----------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 457ea44e..212db4bc 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -9,19 +9,6 @@ from pygsp.graphs import Graph # prevent circular import in Python < 3.5 -def _metric_kdtree(metric, order): - _metrics = { - 'euclidean': 2, - 'manhattan': 1, - 'max_dist': np.inf, - 'minkowski': order, - } - try: - return _metrics[metric] - except KeyError: - raise ValueError('unknown metric {} for scipy-kdtree'.format(metric)) - - def _scipy_pdist(features, metric, order, kind, k, radius, params): if params: raise ValueError('unexpected parameters {}'.format(params)) @@ -45,11 +32,12 @@ def _scipy_pdist(features, metric, order, kind, k, radius, params): return neighbors, distances -def _scipy_kdtree(features, metric, order, kind, k, radius, params): - metric = _metric_kdtree(metric, order) +def _scipy_kdtree(features, _, order, kind, k, radius, params): + if order is None: + raise ValueError('invalid metric for scipy-kdtree') eps = params.pop('eps', 0) tree = spatial.KDTree(features, **params) - params = dict(p=metric, eps=eps) + params = dict(p=order, eps=eps) if kind == 'knn': params['k'] = k + 1 elif kind == 'radius': @@ -59,11 +47,12 @@ def _scipy_kdtree(features, metric, order, kind, k, radius, params): return neighbors, distances -def _scipy_ckdtree(features, metric, order, kind, k, radius, params): - metric = _metric_kdtree(metric, order) +def _scipy_ckdtree(features, _, order, kind, k, radius, params): + if order is None: + raise ValueError('invalid metric for scipy-kdtree') eps = params.pop('eps', 0) tree = spatial.cKDTree(features, **params) - params = dict(p=metric, eps=eps, n_jobs=-1) + params = dict(p=order, eps=eps, n_jobs=-1) if kind == 'knn': params['k'] = k + 1 elif kind == 'radius': @@ -251,6 +240,15 @@ def __init__(self, features, standardize=False, if (radius is not None) and (radius <= 0): raise ValueError('The radius must be greater than 0.') + # Order consistent with metric (used by kdtree and ckdtree). + _orders = { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf, + 'minkowski': order, + } + order = _orders.pop(metric, None) + if standardize: # Don't alter the original data (users would be surprised). features = features - np.mean(features, axis=0) From 076307634a30e48785405c36e9d04ec4eb453191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Wed, 20 Feb 2019 12:08:42 +0100 Subject: [PATCH 056/118] cleaner error handling --- pygsp/graphs/nngraphs/nngraph.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 212db4bc..3dd22ab6 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -231,14 +231,16 @@ def __init__(self, features, standardize=False, except KeyError: pass - if kind not in ['knn', 'radius']: + if kind == 'knn': + if not 1 <= k < n_vertices: + raise ValueError('The number of neighbors (k={}) must be ' + 'greater than 0 and smaller than the number ' + 'of vertices ({}).'.format(k, n_vertices)) + elif kind == 'radius': + if (radius is not None) and (radius <= 0): + raise ValueError('The radius must be greater than 0.') + else: raise ValueError('Invalid kind "{}".'.format(kind)) - if (kind == 'knn') and not (1 <= k < n_vertices): - raise ValueError('The number of neighbors (k={}) must be greater ' - 'than 0 and smaller than the number of vertices ' - '({}).'.format(k, n_vertices)) - if (radius is not None) and (radius <= 0): - raise ValueError('The radius must be greater than 0.') # Order consistent with metric (used by kdtree and ckdtree). _orders = { From 3f0c2b55dc1d95c6490ef902bc8c8d1e1a0ccc50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Wed, 20 Feb 2019 16:54:03 +0100 Subject: [PATCH 057/118] nngraph: test standardization --- pygsp/tests/test_graphs.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 474efb1e..52d4aeec 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -399,8 +399,9 @@ def test_nngraph(self, n_vertices=25): kernel_width=1, backend=backend) np.testing.assert_allclose(graph.W.toarray(), adjacency) - Graph(data, standardize=False) - Graph(data, standardize=True) + graph = Graph(data, standardize=True) + np.testing.assert_allclose(np.mean(graph.coords, axis=0), 0, atol=1e-7) + np.testing.assert_allclose(np.std(graph.coords, axis=0), 1) # Invalid parameters. self.assertRaises(ValueError, Graph, data, metric='invalid') From 26e12e37ed5be39ad9db36f7953d0b95c52263fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Thu, 21 Feb 2019 00:51:30 +0100 Subject: [PATCH 058/118] nngraph: radius estimation --- pygsp/graphs/nngraphs/nngraph.py | 70 ++++++++++++++++++++++---------- pygsp/tests/test_graphs.py | 14 ++++--- 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 3dd22ab6..74248e1d 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -179,8 +179,17 @@ class NNGraph(Graph): k : int, optional Number of neighbors considered when building a k-NN graph with ``type='knn'``. - radius : float, optional + radius : float or {'estimate', 'estimate-knn'}, optional Radius of the ball when building a radius graph with ``type='radius'``. + It is hard to set an optimal radius. If too small, some vertices won't + be connected to any other vertex. If too high, vertices will be + connected to many other vertices and the graph won't be sparse (high + average degree). If no good radius is known a priori, we can estimate + one. ``'estimate'`` sets the radius as the expected average distance + between vertices for a uniform sampling of the ambient space. + ``'estimate-knn'`` first builds a knn graph and sets the radius to the + average distance. ``'estimate-knn'`` usually gives a better estimation + but is more costly. ``'estimate'`` can be better in low dimension. kernel_width : float, optional Width of the Gaussian kernel. By default, it is set to the average of the distances of neighboring vertices. @@ -217,12 +226,12 @@ class NNGraph(Graph): def __init__(self, features, standardize=False, metric='euclidean', order=3, - kind='knn', k=10, radius=0.01, + kind='knn', k=10, radius='estimate-knn', kernel_width=None, backend='scipy-ckdtree', **kwargs): - n_vertices, _ = features.shape + n_vertices, dimensionality = features.shape params_graph = dict() for key in ['lap_type', 'plotting']: @@ -231,16 +240,10 @@ def __init__(self, features, standardize=False, except KeyError: pass - if kind == 'knn': - if not 1 <= k < n_vertices: - raise ValueError('The number of neighbors (k={}) must be ' - 'greater than 0 and smaller than the number ' - 'of vertices ({}).'.format(k, n_vertices)) - elif kind == 'radius': - if (radius is not None) and (radius <= 0): - raise ValueError('The radius must be greater than 0.') - else: - raise ValueError('Invalid kind "{}".'.format(kind)) + if standardize: + # Don't alter the original data (users would be surprised). + features = features - np.mean(features, axis=0) + features /= np.std(features, axis=0) # Order consistent with metric (used by kdtree and ckdtree). _orders = { @@ -251,10 +254,28 @@ def __init__(self, features, standardize=False, } order = _orders.pop(metric, None) - if standardize: - # Don't alter the original data (users would be surprised). - features = features - np.mean(features, axis=0) - features /= np.std(features, axis=0) + if kind == 'knn': + if not 1 <= k < n_vertices: + raise ValueError('The number of neighbors (k={}) must be ' + 'greater than 0 and smaller than the number ' + 'of vertices ({}).'.format(k, n_vertices)) + radius = None + elif kind == 'radius': + if radius == 'estimate': + maximums = np.amax(features, axis=0) + minimums = np.amin(features, axis=0) + distance_max = np.linalg.norm(maximums - minimums, order) + radius = distance_max / np.power(n_vertices, 1/dimensionality) + elif radius == 'estimate-knn': + graph = NNGraph(features, standardize=standardize, + metric=metric, order=order, kind='knn', k=k, + kernel_width=None, backend=backend, **kwargs) + radius = graph.kernel_width + elif radius <= 0: + raise ValueError('The radius must be greater than 0.') + k = None + else: + raise ValueError('Invalid kind "{}".'.format(kind)) try: function = globals()['_' + backend.replace('-', '_')] @@ -314,13 +335,18 @@ def kernel(distance, width): super(NNGraph, self).__init__(W=W, coords=features, **params_graph) def _get_extra_repr(self): - return { + attrs = { 'standardize': self.standardize, 'metric': self.metric, 'order': self.order, 'kind': self.kind, - 'k': self.k, - 'radius': '{:.2f}'.format(self.radius), - 'kernel_width': '{:.2f}'.format(self.kernel_width), - 'backend': self.backend, } + if self.k is not None: + attrs['k'] = self.k + if self.radius is not None: + attrs['radius'] = '{:.2e}'.format(self.radius) + attrs.update({ + 'kernel_width': '{:.2e}'.format(self.kernel_width), + 'backend': self.backend, + }) + return attrs diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 52d4aeec..e4f4879f 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -349,7 +349,7 @@ def test_set_coordinates(self): G.set_coordinates('community2D') self.assertRaises(ValueError, G.set_coordinates, 'invalid') - def test_nngraph(self, n_vertices=25): + def test_nngraph(self, n_vertices=24): """Test all the combinations of metric, kind, backend.""" Graph = graphs.NNGraph data = np.random.RandomState(42).uniform(size=(n_vertices, 3)) @@ -358,8 +358,7 @@ def test_nngraph(self, n_vertices=25): for metric in metrics: for kind in ['knn', 'radius']: - params = dict(features=data, metric=metric, kind=kind, - radius=0.8) + params = dict(features=data, metric=metric, kind=kind) ref = Graph(backend='scipy-pdist', **params) for backend in backends: # Unsupported combinations. @@ -375,7 +374,7 @@ def test_nngraph(self, n_vertices=25): else: params['backend'] = backend if backend == 'flann': - graph = Graph(random_seed=42, **params) + graph = Graph(random_seed=0, **params) else: graph = Graph(**params) np.testing.assert_allclose(graph.W.toarray(), @@ -390,15 +389,20 @@ def test_nngraph(self, n_vertices=25): column[1] = weight column[-1] = weight adjacency = scipy.linalg.circulant(column) + data = graphs.Ring(n_vertices).coords for kind in ['knn', 'radius']: for backend in backends + ['scipy-pdist']: if backend == 'nmslib' and kind == 'radius': continue # unsupported - data = graphs.Ring(n_vertices).coords graph = Graph(data, kind=kind, k=2, radius=1.01*distance, kernel_width=1, backend=backend) np.testing.assert_allclose(graph.W.toarray(), adjacency) + graph = Graph(data, kind='radius', radius='estimate') + np.testing.assert_allclose(graph.radius, np.sqrt(8 / n_vertices)) + graph = Graph(data, kind='radius', k=2, radius='estimate-knn') + np.testing.assert_allclose(graph.radius, distance) + graph = Graph(data, standardize=True) np.testing.assert_allclose(np.mean(graph.coords, axis=0), 0, atol=1e-7) np.testing.assert_allclose(np.std(graph.coords, axis=0), 1) From 080bb5c623d2e75d0fdbad9ffec4cba9605d4459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Thu, 21 Feb 2019 01:10:40 +0100 Subject: [PATCH 059/118] fix others uses of radius --- pygsp/graphs/nngraphs/cube.py | 13 ++++++++----- pygsp/graphs/nngraphs/sphere.py | 11 ++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pygsp/graphs/nngraphs/cube.py b/pygsp/graphs/nngraphs/cube.py index 2328c2cb..b9cb1d60 100644 --- a/pygsp/graphs/nngraphs/cube.py +++ b/pygsp/graphs/nngraphs/cube.py @@ -10,12 +10,12 @@ class Cube(NNGraph): Parameters ---------- - radius : float - Edge lenght (default = 1) nb_pts : int Number of vertices (default = 300) nb_dim : int Dimension (default = 3) + length : float + Edge length (default = 1) sampling : string Variance of the distance kernel (default = 'random') (Can now only be 'random') @@ -35,20 +35,23 @@ class Cube(NNGraph): """ def __init__(self, - radius=1, nb_pts=300, nb_dim=3, + length=1, sampling='random', seed=None, **kwargs): - self.radius = radius self.nb_pts = nb_pts self.nb_dim = nb_dim + self.length = length self.sampling = sampling self.seed = seed rs = np.random.RandomState(seed) + if length != 1: + raise NotImplementedError('Only length=1 is implemented.') + if self.nb_dim > 3: raise NotImplementedError("Dimension > 3 not supported yet!") @@ -92,7 +95,7 @@ def __init__(self, super(Cube, self).__init__(pts, k=10, plotting=plotting, **kwargs) def _get_extra_repr(self): - attrs = {'radius': '{:.2f}'.format(self.radius), + attrs = {'length': '{:.2e}'.format(self.length), 'nb_pts': self.nb_pts, 'nb_dim': self.nb_dim, 'sampling': self.sampling, diff --git a/pygsp/graphs/nngraphs/sphere.py b/pygsp/graphs/nngraphs/sphere.py index f323f562..1262b922 100644 --- a/pygsp/graphs/nngraphs/sphere.py +++ b/pygsp/graphs/nngraphs/sphere.py @@ -10,12 +10,12 @@ class Sphere(NNGraph): Parameters ---------- - radius : float - Radius of the sphere (default = 1) nb_pts : int Number of vertices (default = 300) nb_dim : int Dimension (default = 3) + diameter : float + Radius of the sphere (default = 2) sampling : string Variance of the distance kernel (default = 'random') (Can now only be 'random') @@ -35,14 +35,14 @@ class Sphere(NNGraph): """ def __init__(self, - radius=1, nb_pts=300, nb_dim=3, + diameter=2, sampling='random', seed=None, **kwargs): - self.radius = radius + self.diameter = diameter self.nb_pts = nb_pts self.nb_dim = nb_dim self.sampling = sampling @@ -55,6 +55,7 @@ def __init__(self, for i in range(self.nb_pts): pts[i] /= np.linalg.norm(pts[i]) + pts[i] *= (diameter / 2) else: @@ -67,7 +68,7 @@ def __init__(self, super(Sphere, self).__init__(pts, k=10, plotting=plotting, **kwargs) def _get_extra_repr(self): - attrs = {'radius': '{:.2f}'.format(self.radius), + attrs = {'diameter': '{:.2e}'.format(self.diameter), 'nb_pts': self.nb_pts, 'nb_dim': self.nb_dim, 'sampling': self.sampling, From dad41058475c581d266b01f3104340afbc4b44b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Sun, 24 Feb 2019 17:48:07 +0100 Subject: [PATCH 060/118] nngraph: check shape of features --- pygsp/graphs/nngraphs/nngraph.py | 3 +++ pygsp/tests/test_graphs.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 74248e1d..83d20530 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -231,6 +231,9 @@ def __init__(self, features, standardize=False, backend='scipy-ckdtree', **kwargs): + features = np.asanyarray(features) + if features.ndim != 2: + raise ValueError('features should be #vertices x dimensionality') n_vertices, dimensionality = features.shape params_graph = dict() diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index e4f4879f..dd203cb7 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -408,6 +408,8 @@ def test_nngraph(self, n_vertices=24): np.testing.assert_allclose(np.std(graph.coords, axis=0), 1) # Invalid parameters. + self.assertRaises(ValueError, Graph, np.ones(n_vertices)) + self.assertRaises(ValueError, Graph, np.ones((n_vertices, 3, 4))) self.assertRaises(ValueError, Graph, data, metric='invalid') self.assertRaises(ValueError, Graph, data, kind='invalid') self.assertRaises(ValueError, Graph, data, backend='invalid') From f544e1e8d82b3b9faf9f58e61807a1494dc5555b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Sun, 24 Feb 2019 18:05:53 +0100 Subject: [PATCH 061/118] nngraph: fix definition of gaussian kernel --- doc/tutorials/optimization.rst | 4 ++-- pygsp/filters/filter.py | 4 ++-- pygsp/graphs/fourier.py | 2 +- pygsp/graphs/nngraphs/nngraph.py | 2 +- pygsp/tests/test_filters.py | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/tutorials/optimization.rst b/doc/tutorials/optimization.rst index dd6d91ad..0fd16457 100644 --- a/doc/tutorials/optimization.rst +++ b/doc/tutorials/optimization.rst @@ -85,7 +85,7 @@ We start with the graph TV regularization. We will use the :class:`pyunlocbox.so >>> prob1 = pyunlocbox.solvers.solve([d, r, f], solver=solver, ... x0=x0, rtol=0, maxit=1000) Solution found after 1000 iterations: - objective function f(sol) = 2.138668e+02 + objective function f(sol) = 2.367630e+02 stopping criterion: MAXIT >>> >>> fig, ax = G.plot(prob1['sol']) @@ -107,7 +107,7 @@ This figure shows the label signal recovered by graph total variation regulariza >>> prob2 = pyunlocbox.solvers.solve([r, f], solver=solver, ... x0=x0, rtol=0, maxit=1000) Solution found after 1000 iterations: - objective function f(sol) = 7.413918e+01 + objective function f(sol) = 3.668690e+01 stopping criterion: MAXIT >>> >>> fig, ax = G.plot(prob2['sol']) diff --git a/pygsp/filters/filter.py b/pygsp/filters/filter.py index 56d5508d..f267025d 100644 --- a/pygsp/filters/filter.py +++ b/pygsp/filters/filter.py @@ -254,7 +254,7 @@ def filter(self, s, method='chebyshev', order=30): >>> _ = G.plot(s1, ax=axes[0]) >>> _ = G.plot(s2, ax=axes[1]) >>> print('{:.5f}'.format(np.linalg.norm(s1 - s2))) - 0.28049 + 0.27175 Perfect reconstruction with Itersine, a tight frame: @@ -449,7 +449,7 @@ def estimate_frame_bounds(self, x=None): A=1.708, B=2.359 >>> A, B = g.estimate_frame_bounds(G.e) >>> print('A={:.3f}, B={:.3f}'.format(A, B)) - A=1.712, B=2.348 + A=1.720, B=2.355 The frame bounds can be seen in the plot of the filter bank as the minimum and maximum of their squared sum (the black curve): diff --git a/pygsp/graphs/fourier.py b/pygsp/graphs/fourier.py index 286b3252..43170fe6 100644 --- a/pygsp/graphs/fourier.py +++ b/pygsp/graphs/fourier.py @@ -78,7 +78,7 @@ def coherence(self): >>> graph.compute_fourier_basis() >>> minimum = 1 / np.sqrt(graph.n_vertices) >>> print('{:.2f} in [{:.2f}, 1]'.format(graph.coherence, minimum)) - 0.93 in [0.12, 1] + 0.91 in [0.12, 1] >>> >>> # Plot the most localized eigenvector. >>> import matplotlib.pyplot as plt diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 83d20530..b962ab50 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -321,7 +321,7 @@ def __init__(self, features, standardize=False, # Users can easily do the above. def kernel(distance, width): - return np.exp(-distance**2 / width) + return np.exp(-distance**2 / width**2) W.data = kernel(W.data, kernel_width) diff --git a/pygsp/tests/test_filters.py b/pygsp/tests/test_filters.py index fe093da0..cef1689d 100644 --- a/pygsp/tests/test_filters.py +++ b/pygsp/tests/test_filters.py @@ -149,7 +149,7 @@ def test_frame(self): def get_frame(freq_response): return self._G.U.dot(np.diag(freq_response).dot(self._G.U.T)) gL = np.concatenate([get_frame(gl) for gl in g.evaluate(self._G.e)]) - np.testing.assert_allclose(gL1, gL) + np.testing.assert_allclose(gL1, gL, atol=1e-10) np.testing.assert_allclose(gL2, gL, atol=1e-10) def test_complement(self, frame_bound=2.5): @@ -228,7 +228,7 @@ def test_modulation_gabor(self): f2 = filters.Gabor(self._G, f) s1 = f1.filter(self._signal) s2 = f2.filter(self._signal) - np.testing.assert_allclose(s1, s2, atol=1e-5) + np.testing.assert_allclose(s1, -s2, atol=1e-5) def test_halfcosine(self): f = filters.HalfCosine(self._G, Nf=4) From 9c8e86e3bb7b8131de7570b7a247f0b365818887 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Sun, 24 Feb 2019 18:19:58 +0100 Subject: [PATCH 062/118] nngraph: allow users to choose the similarity kernel --- doc/tutorials/optimization.rst | 4 +- pygsp/filters/filter.py | 4 +- pygsp/graphs/nngraphs/nngraph.py | 133 ++++++++++++++++++++++++++----- pygsp/tests/test_graphs.py | 11 ++- 4 files changed, 128 insertions(+), 24 deletions(-) diff --git a/doc/tutorials/optimization.rst b/doc/tutorials/optimization.rst index 0fd16457..b3740d85 100644 --- a/doc/tutorials/optimization.rst +++ b/doc/tutorials/optimization.rst @@ -85,7 +85,7 @@ We start with the graph TV regularization. We will use the :class:`pyunlocbox.so >>> prob1 = pyunlocbox.solvers.solve([d, r, f], solver=solver, ... x0=x0, rtol=0, maxit=1000) Solution found after 1000 iterations: - objective function f(sol) = 2.367630e+02 + objective function f(sol) = 2.256055e+02 stopping criterion: MAXIT >>> >>> fig, ax = G.plot(prob1['sol']) @@ -107,7 +107,7 @@ This figure shows the label signal recovered by graph total variation regulariza >>> prob2 = pyunlocbox.solvers.solve([r, f], solver=solver, ... x0=x0, rtol=0, maxit=1000) Solution found after 1000 iterations: - objective function f(sol) = 3.668690e+01 + objective function f(sol) = 4.376481e+01 stopping criterion: MAXIT >>> >>> fig, ax = G.plot(prob2['sol']) diff --git a/pygsp/filters/filter.py b/pygsp/filters/filter.py index f267025d..5abc1169 100644 --- a/pygsp/filters/filter.py +++ b/pygsp/filters/filter.py @@ -254,7 +254,7 @@ def filter(self, s, method='chebyshev', order=30): >>> _ = G.plot(s1, ax=axes[0]) >>> _ = G.plot(s2, ax=axes[1]) >>> print('{:.5f}'.format(np.linalg.norm(s1 - s2))) - 0.27175 + 0.26995 Perfect reconstruction with Itersine, a tight frame: @@ -449,7 +449,7 @@ def estimate_frame_bounds(self, x=None): A=1.708, B=2.359 >>> A, B = g.estimate_frame_bounds(G.e) >>> print('A={:.3f}, B={:.3f}'.format(A, B)) - A=1.720, B=2.355 + A=1.839, B=2.359 The frame bounds can be seen in the plot of the filter bank as the minimum and maximum of their squared sum (the black curve): diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index b962ab50..8b9ffa98 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -2,6 +2,8 @@ from __future__ import division +from functools import partial + import numpy as np from scipy import sparse, spatial @@ -147,13 +149,17 @@ class NNGraph(Graph): The nearest-neighbor graph is built from a set of features, where the edge weight between vertices :math:`v_i` and :math:`v_j` is given by - .. math:: A(i,j) = \exp \left( -\frac{d^2(v_i, v_j)}{\sigma^2} \right), + .. math:: A(i,j) = k \left( \frac{d(v_i, v_j)}{\sigma} \right), where :math:`d(v_i, v_j)` is a distance measure between some representation - (the features) of :math:`v_i` and :math:`v_j`. For example, the features - might be the 3D coordinates of points in a point cloud. Then, if - ``metric='euclidean'``, :math:`d(v_i, v_j) = \| x_i - x_j \|_2`, where - :math:`x_i` is the 3D position of vertex :math:`v_i`. + (the features) of :math:`v_i` and :math:`v_j`, :math:`k` is a kernel + function that transforms a distance in :math:`[0, \infty]` to a similarity + measure in :math:`[0, 1]`, and :math:`\sigma` is the kernel width. + + For example, the features might be the 3D coordinates of points in a point + cloud. Then, if ``metric='euclidean'`` and ``kernel='gaussian'`` (the + defaults), :math:`A(i,j) = \exp(-\log(2) \| x_i - x_j \|_2^2 / \sigma^2)`, + where :math:`x_i` is the 3D position of vertex :math:`v_i`. The similarity matrix :math:`A` is sparsified by either keeping the ``k`` closest vertices for each vertex (if ``type='knn'``), or by setting to zero @@ -190,9 +196,30 @@ class NNGraph(Graph): ``'estimate-knn'`` first builds a knn graph and sets the radius to the average distance. ``'estimate-knn'`` usually gives a better estimation but is more costly. ``'estimate'`` can be better in low dimension. + kernel : string or function + The function :math:`k` that transforms a distance to a similarity. + The following kernels are pre-defined. + + * ``'gaussian'`` defines the Gaussian, also known as the radial basis + function (RBF), kernel :math:`k(d) = \exp(-\log(2) d^2)`. + * ``'exponential'`` defines the kernel :math:`k(d) = \exp(-\log(2) d)`. + * ``'rectangular'`` returns 1 if :math:`d < 1` and 0 otherwise. + * ``'triangular'`` defines the kernel :math:`k(d) = \max(1 - d/2, 0)`. + * Other kernels are ``'tricube'``, ``'triweight'``, ``'quartic'``, + ``'epanechnikov'``, ``'logistic'``, and ``'sigmoid'``. + See `Wikipedia `_. + + Another option is to pass a function that takes a vector of pairwise + distances and returns the similarities. All the predefined kernels + return a similarity of 0.5 when the distance is one. + An example of custom kernel is ``kernel=lambda d: d.min() / d``. kernel_width : float, optional - Width of the Gaussian kernel. By default, it is set to the average of - the distances of neighboring vertices. + Control the width, also known as the bandwidth, :math:`\sigma` of the + kernel by scaling the distances as ``distances / kernel_width`` before + calling the kernel function. + By default, it is set to the average of all computed distances. + When building a radius graph, it's common to set it as a function of + the radius, such as ``radius / 2``. backend : string, optional * ``'scipy-pdist'`` uses :func:`scipy.spatial.distance.pdist` to compute pairwise distances. The method is brute force and computes @@ -222,15 +249,34 @@ class NNGraph(Graph): >>> _ = axes[0].spy(G.W, markersize=5) >>> _ = G.plot(ax=axes[1]) + Plot all the pre-defined kernels. + + >>> width = 0.3 + >>> distances = np.linspace(0, 1, 200) + >>> fig, ax = plt.subplots() + >>> for name, kernel in graphs.NNGraph._kernels.items(): + ... _ = ax.plot(distances, kernel(distances / width), label=name) + >>> _ = ax.set_xlabel('distance [0, inf]') + >>> _ = ax.set_ylabel('similarity [0, 1]') + >>> _ = ax.legend(loc='upper right') + """ def __init__(self, features, standardize=False, metric='euclidean', order=3, kind='knn', k=10, radius='estimate-knn', - kernel_width=None, + kernel='gaussian', kernel_width=None, backend='scipy-ckdtree', **kwargs): + # features is stored in coords, potentially standardized + self.standardize = standardize + self.metric = metric + self.kind = kind + self.kernel = kernel + self.k = k + self.backend = backend + features = np.asanyarray(features) if features.ndim != 2: raise ValueError('features should be #vertices x dimensionality') @@ -317,23 +363,21 @@ def __init__(self, features, standardize=False, if kernel_width is None: kernel_width = np.mean(W.data) if W.nnz > 0 else np.nan - # Alternative: kernel_width = radius / 2 or radius / np.log(2). - # Users can easily do the above. - def kernel(distance, width): - return np.exp(-distance**2 / width**2) + if not callable(kernel): + try: + kernel = self._kernels[kernel] + except KeyError: + raise ValueError('Unknown kernel {}.'.format(kernel)) - W.data = kernel(W.data, kernel_width) + assert np.all(W.data >= 0), 'Distance must be in [0, inf].' + W.data = kernel(W.data / kernel_width) + if not np.all((W.data >= 0) & (W.data <= 1)): + raise ValueError('Kernel returned similarity not in [0, 1].') - # features is stored in coords, potentially standardized - self.standardize = standardize - self.metric = metric self.order = order - self.kind = kind self.radius = radius self.kernel_width = kernel_width - self.k = k - self.backend = backend super(NNGraph, self).__init__(W=W, coords=features, **params_graph) @@ -349,7 +393,58 @@ def _get_extra_repr(self): if self.radius is not None: attrs['radius'] = '{:.2e}'.format(self.radius) attrs.update({ + 'kernel': '{}'.format(self.kernel), 'kernel_width': '{:.2e}'.format(self.kernel_width), 'backend': self.backend, }) return attrs + + @staticmethod + def _kernel_rectangular(distance): + return (distance < 1).astype(np.float) + + @staticmethod + def _kernel_triangular(distance, value_at_one=0.5): + distance = value_at_one * distance + return np.maximum(1 - distance, 0) + + @staticmethod + def _kernel_exponential(distance, power=1, value_at_one=0.5): + cst = np.log(value_at_one) + return np.exp(cst * distance**power) + + @staticmethod + def _kernel_powers(distance, pow1, pow2, value_at_one=0.5): + cst = (1 - value_at_one**(1/pow2))**(1/pow1) + distance = np.clip(cst * distance, 0, 1) + return (1 - distance**pow1)**pow2 + + @staticmethod + def _kernel_logistic(distance, value_at_one=0.5): + cst = 4 / value_at_one - 2 + cst = np.log(0.5 * (cst + np.sqrt(cst**2 - 4))) + distance = cst * distance + return 4 / (np.exp(distance) + 2 + np.exp(-distance)) + + @staticmethod + def _kernel_sigmoid(distance, value_at_one=0.5): + cst = 2 / value_at_one + cst = np.log(0.5 * (cst + np.sqrt(cst**2 - 4))) + distance = cst * distance + return 2 / (np.exp(distance) + np.exp(-distance)) + + _kernels = { + 'rectangular': _kernel_rectangular.__func__, + 'triangular': _kernel_triangular.__func__, + + 'exponential': _kernel_exponential.__func__, + 'gaussian': partial(_kernel_exponential.__func__, power=2), + + 'tricube': partial(_kernel_powers.__func__, pow1=3, pow2=3), + 'triweight': partial(_kernel_powers.__func__, pow1=2, pow2=3), + 'quartic': partial(_kernel_powers.__func__, pow1=2, pow2=2), + 'epanechnikov': partial(_kernel_powers.__func__, pow1=2, pow2=1), + + 'logistic': _kernel_logistic.__func__, + 'sigmoid': _kernel_sigmoid.__func__, + } diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index dd203cb7..db245a04 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -384,7 +384,7 @@ def test_nngraph(self, n_vertices=24): angles = [0, 2 * np.pi / n_vertices] points = np.stack([np.cos(angles), np.sin(angles)], axis=1) distance = np.linalg.norm(points[0] - points[1]) - weight = np.exp(-distance**2) + weight = np.exp(-np.log(2) * distance**2) column = np.zeros(n_vertices) column[1] = weight column[-1] = weight @@ -412,6 +412,7 @@ def test_nngraph(self, n_vertices=24): self.assertRaises(ValueError, Graph, np.ones((n_vertices, 3, 4))) self.assertRaises(ValueError, Graph, data, metric='invalid') self.assertRaises(ValueError, Graph, data, kind='invalid') + self.assertRaises(ValueError, Graph, data, kernel='invalid') self.assertRaises(ValueError, Graph, data, backend='invalid') self.assertRaises(ValueError, Graph, data, kind='knn', k=0) self.assertRaises(ValueError, Graph, data, kind='knn', k=n_vertices) @@ -439,6 +440,14 @@ def test_nngraph(self, n_vertices=24): Graph(data, backend=backend, leafsize=9) self.assertRaises(ValueError, Graph, data, backend='scipy-pdist', a=0) + # Kernels. + for name, kernel in graphs.NNGraph._kernels.items(): + similarity = 0 if name == 'rectangular' else 0.5 + np.testing.assert_allclose(kernel(np.ones(10)), similarity) + np.testing.assert_allclose(kernel(np.zeros(10)), 1) + Graph(data, kernel=lambda d: d.min()/d) + self.assertRaises(ValueError, Graph, data, kernel=lambda d: 1/d) + def test_bunny(self): graphs.Bunny() From bfef54852f67d840b773359475d75cd722d20c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Sun, 24 Feb 2019 23:44:05 +0100 Subject: [PATCH 063/118] nngraph: fix attributes --- pygsp/graphs/nngraphs/nngraph.py | 2 +- pygsp/tests/test_graphs.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 8b9ffa98..21d0ccd3 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -322,7 +322,7 @@ def __init__(self, features, standardize=False, radius = graph.kernel_width elif radius <= 0: raise ValueError('The radius must be greater than 0.') - k = None + self.k = None else: raise ValueError('Invalid kind "{}".'.format(kind)) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index db245a04..a36bb778 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -448,6 +448,10 @@ def test_nngraph(self, n_vertices=24): Graph(data, kernel=lambda d: d.min()/d) self.assertRaises(ValueError, Graph, data, kernel=lambda d: 1/d) + # Attributes. + self.assertEqual(Graph(data, kind='knn').radius, None) + self.assertEqual(Graph(data, kind='radius').k, None) + def test_bunny(self): graphs.Bunny() From 5c2e856491bda27d5ede0416735355c8b25a892e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Sun, 24 Feb 2019 23:53:07 +0100 Subject: [PATCH 064/118] nngraph: fix intermittent test failure of nmslib --- pygsp/tests/test_graphs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index a36bb778..2450fe90 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -434,7 +434,7 @@ def test_nngraph(self, n_vertices=24): Graph(data, backend='flann', algorithm='kmeans') Graph(data, backend='nmslib', method='vptree') Graph(data, backend='nmslib', index=dict(post=2)) - Graph(data, backend='nmslib', query=dict(efSearch=10)) + Graph(data, backend='nmslib', query=dict(efSearch=100)) for backend in ['scipy-kdtree', 'scipy-ckdtree']: Graph(data, backend=backend, eps=1e-2) Graph(data, backend=backend, leafsize=9) From af5aecaad83ae909ecf0e32d5cc468ab4e1918ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Sun, 24 Feb 2019 23:54:27 +0100 Subject: [PATCH 065/118] nngraph: width = radius / 2 --- pygsp/graphs/nngraphs/nngraph.py | 12 +++++++----- pygsp/tests/test_graphs.py | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 21d0ccd3..3a77c8aa 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -215,11 +215,10 @@ class NNGraph(Graph): An example of custom kernel is ``kernel=lambda d: d.min() / d``. kernel_width : float, optional Control the width, also known as the bandwidth, :math:`\sigma` of the - kernel by scaling the distances as ``distances / kernel_width`` before + kernel. It scales the distances as ``distances / kernel_width`` before calling the kernel function. - By default, it is set to the average of all computed distances. - When building a radius graph, it's common to set it as a function of - the radius, such as ``radius / 2``. + By default, it is set to the average of all computed distances for + ``kind='knn'`` and to half the radius for ``kind='radius'``. backend : string, optional * ``'scipy-pdist'`` uses :func:`scipy.spatial.distance.pdist` to compute pairwise distances. The method is brute force and computes @@ -362,7 +361,10 @@ def __init__(self, features, standardize=False, W = utils.symmetrize(W, method='fill') if kernel_width is None: - kernel_width = np.mean(W.data) if W.nnz > 0 else np.nan + if kind == 'knn': + kernel_width = np.mean(W.data) if W.nnz > 0 else np.nan + elif kind == 'radius': + kernel_width = radius / 2 if not callable(kernel): try: diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 2450fe90..94ebe2fa 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -374,7 +374,7 @@ def test_nngraph(self, n_vertices=24): else: params['backend'] = backend if backend == 'flann': - graph = Graph(random_seed=0, **params) + graph = Graph(random_seed=40, **params) else: graph = Graph(**params) np.testing.assert_allclose(graph.W.toarray(), From 0fc8fd1b6449fca5ccd39219bc48ade0f6054778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Mon, 25 Feb 2019 01:11:04 +0100 Subject: [PATCH 066/118] nngraph: doc and examples --- pygsp/graphs/nngraphs/nngraph.py | 124 +++++++++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 8 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 3a77c8aa..3a68b27e 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -175,6 +175,18 @@ class NNGraph(Graph): and standard deviation of 1 (unit variance). metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional Metric used to compute pairwise distances. + + * ``'euclidean'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_2`. + * ``'manhattan'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_1`. + * ``'minkowski'`` generalizes the above and defines distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_p` + where :math:`p` is the ``order`` of the norm. + * ``'max_dist'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_\infty = \max(x_i - x_j)`, where + the maximum is taken over the elements of the vector. + More metrics may be supported for some backends. Please refer to the documentation of the chosen backend. order : float, optional @@ -225,9 +237,10 @@ class NNGraph(Graph): all distances. That is the slowest method. * ``'scipy-kdtree'`` uses :class:`scipy.spatial.KDTree`. The method builds a k-d tree to prune the number of pairwise distances it has to - compute. + compute. That is an efficient strategy for low-dimensional spaces. * ``'scipy-ckdtree'`` uses :class:`scipy.spatial.cKDTree`. The same as ``'scipy-kdtree'`` but with C bindings, which should be faster. + That is the default. * ``'flann'`` uses the `Fast Library for Approximate Nearest Neighbors (FLANN) `_. That method is an approximation. @@ -235,29 +248,124 @@ class NNGraph(Graph): `_. That method is an approximation. It should be the fastest in high-dimensional spaces. + You can look at this `benchmark + `_ to get an idea of the + relative performance of those backends. It's nonetheless wise to run + some tests on your own data. kwargs : dict Parameters to be passed to the :class:`Graph` constructor or the backend library. Examples -------- + + Construction of a graph from a set of features. + >>> import matplotlib.pyplot as plt - >>> features = np.random.RandomState(42).uniform(size=(30, 2)) + >>> rs = np.random.RandomState(42) + >>> features = rs.uniform(size=(30, 2)) >>> G = graphs.NNGraph(features) >>> fig, axes = plt.subplots(1, 2) >>> _ = axes[0].spy(G.W, markersize=5) >>> _ = G.plot(ax=axes[1]) - Plot all the pre-defined kernels. + Radius versus knn graph. + + >>> features = rs.uniform(size=(100, 3)) + >>> fig, ax = plt.subplots() + >>> G = graphs.NNGraph(features, kind='radius', radius=0.2964) + >>> label = 'radius graph ({} edges)'.format(G.n_edges) + >>> _ = ax.hist(G.W.data, bins=20, label=label, alpha=0.5) + >>> G = graphs.NNGraph(features, kind='knn', k=6) + >>> label = 'knn graph ({} edges)'.format(G.n_edges) + >>> _ = ax.hist(G.W.data, bins=20, label=label, alpha=0.5) + >>> _ = ax.legend() + >>> _ = ax.set_title('edge weights') + + Control of the sparsity of knn and radius graphs. + + >>> features = rs.uniform(size=(100, 3)) + >>> n_edges = dict(knn=[], radius=[]) + >>> n_neighbors = np.arange(1, 100, 5) + >>> radiuses = np.arange(0.05, 1.5, 0.05) + >>> for k in n_neighbors: + ... G = graphs.NNGraph(features, kind='knn', k=k) + ... n_edges['knn'].append(G.n_edges) + >>> for radius in radiuses: + ... G = graphs.NNGraph(features, kind='radius', radius=radius) + ... n_edges['radius'].append(G.n_edges) + >>> fig, axes = plt.subplots(1, 2, sharey=True) + >>> _ = axes[0].plot(n_neighbors, n_edges['knn']) + >>> _ = axes[1].plot(radiuses, n_edges['radius']) + >>> _ = axes[0].set_ylabel('number of edges') + >>> _ = axes[0].set_xlabel('number of neighbors (knn graph)') + >>> _ = axes[1].set_xlabel('radius (radius graph)') + >>> _ = fig.suptitle('Sparsity') + + Choice of metric and the curse of dimensionality. + >>> fig, axes = plt.subplots(1, 2) + >>> for dim, ax in zip([3, 30], axes): + ... features = rs.uniform(size=(100, dim)) + ... for metric in ['euclidean', 'manhattan', 'max_dist', 'cosine']: + ... G = graphs.NNGraph(features, metric=metric, + ... backend='scipy-pdist') + ... _ = ax.hist(G.W.data, bins=20, label=metric, alpha=0.5) + ... _ = ax.legend() + ... _ = ax.set_title('edge weights, {} dimensions'.format(dim)) + + Choice of kernel. + + >>> fig, axes = plt.subplots(1, 2) >>> width = 0.3 >>> distances = np.linspace(0, 1, 200) - >>> fig, ax = plt.subplots() >>> for name, kernel in graphs.NNGraph._kernels.items(): - ... _ = ax.plot(distances, kernel(distances / width), label=name) - >>> _ = ax.set_xlabel('distance [0, inf]') - >>> _ = ax.set_ylabel('similarity [0, 1]') - >>> _ = ax.legend(loc='upper right') + ... _ = axes[0].plot(distances, kernel(distances / width), label=name) + >>> _ = axes[0].set_xlabel('distance [0, inf]') + >>> _ = axes[0].set_ylabel('similarity [0, 1]') + >>> _ = axes[0].legend(loc='upper right') + >>> features = rs.uniform(size=(100, 3)) + >>> for kernel in ['gaussian', 'triangular', 'tricube', 'exponential']: + ... G = graphs.NNGraph(features, kernel=kernel) + ... _ = axes[1].hist(G.W.data, bins=20, label=kernel, alpha=0.5) + >>> _ = axes[1].legend() + >>> _ = axes[1].set_title('edge weights') + + Choice of kernel width. + + >>> fig, axes = plt.subplots() + >>> for width in [.2, .3, .4, .6, .8, None]: + ... G = graphs.NNGraph(features, kernel_width=width) + ... label = 'width = {:.2f}'.format(G.kernel_width) + ... _ = axes.hist(G.W.data, bins=20, label=label, alpha=0.5) + >>> _ = axes.legend(loc='upper left') + >>> _ = axes.set_title('edge weights') + + Choice of backend. Compare on your data! + + >>> import time + >>> sizes = [300, 1000, 3000] + >>> dims = [3, 100] + >>> backends = ['scipy-pdist', 'scipy-kdtree', 'scipy-ckdtree', 'flann', + ... 'nmslib'] + >>> times = np.full((len(sizes), len(dims), len(backends)), np.nan) + >>> for i, size in enumerate(sizes): + ... for j, dim in enumerate(dims): + ... for k, backend in enumerate(backends): + ... if (size * dim) > 1e4 and backend == 'scipy-kdtree': + ... continue # too slow + ... features = rs.uniform(size=(size, dim)) + ... start = time.time() + ... _ = graphs.NNGraph(features, backend=backend) + ... times[i][j][k] = time.time() - start + >>> fig, axes = plt.subplots(1, 2, sharey=True) + >>> for j, (dim, ax) in enumerate(zip(dims, axes)): + ... for k, backend in enumerate(backends): + ... _ = ax.loglog(sizes, times[:, j, k], '.-', label=backend) + ... _ = ax.set_title('{} dimensions'.format(dim)) + ... _ = ax.set_xlabel('number of vertices') + >>> _ = axes[0].set_ylabel('execution time [s]') + >>> _ = axes[1].legend(loc='upper left') """ From cbb25372ba773da069940ee1071c3eaec5462549 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Mon, 25 Feb 2019 13:00:21 +0100 Subject: [PATCH 067/118] nngraph: update history --- doc/history.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/history.rst b/doc/history.rst index fa6cd4d6..1bc7840d 100644 --- a/doc/history.rst +++ b/doc/history.rst @@ -24,6 +24,9 @@ History * New implementation of the Sensor graph that is simpler and scales better. * A new learning module with three functions to solve standard semi-supervised classification and regression problems. +* A much improved, fixed, documented, and tested NNGraph. The user can now + select the backend and similarity kernel. The radius can be estimated and + features standardized. (PR #43) Experimental filter API (to be tested and validated): From 1da0e55094e9c8c8c894aa79ac193b053626cc95 Mon Sep 17 00:00:00 2001 From: nperraud <6399466+nperraud@users.noreply.github.com> Date: Mon, 25 Feb 2019 13:39:19 +0100 Subject: [PATCH 068/118] Update nngraph.py --- pygsp/graphs/nngraphs/nngraph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 3a68b27e..4434261e 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -154,7 +154,7 @@ class NNGraph(Graph): where :math:`d(v_i, v_j)` is a distance measure between some representation (the features) of :math:`v_i` and :math:`v_j`, :math:`k` is a kernel function that transforms a distance in :math:`[0, \infty]` to a similarity - measure in :math:`[0, 1]`, and :math:`\sigma` is the kernel width. + measure generally in :math:`[0, 1]`, and :math:`\sigma` is the kernel width. For example, the features might be the 3D coordinates of points in a point cloud. Then, if ``metric='euclidean'`` and ``kernel='gaussian'`` (the @@ -163,7 +163,7 @@ class NNGraph(Graph): The similarity matrix :math:`A` is sparsified by either keeping the ``k`` closest vertices for each vertex (if ``type='knn'``), or by setting to zero - any distance greater than ``radius`` (if ``type='radius'``). + the similarity when the distance is greater than ``radius`` (if ``type='radius'``). Parameters ---------- From 17dc1c66bf222fc28a300a3b7be67854948cc63d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Fri, 1 Mar 2019 04:15:24 +0100 Subject: [PATCH 069/118] nngraph: only warn for similarity > 1 --- pygsp/graphs/nngraphs/nngraph.py | 12 +++++++----- pygsp/tests/test_graphs.py | 4 +++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 4434261e..cb14319f 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -11,6 +11,9 @@ from pygsp.graphs import Graph # prevent circular import in Python < 3.5 +_logger = utils.build_logger(__name__) + + def _scipy_pdist(features, metric, order, kind, k, radius, params): if params: raise ValueError('unexpected parameters {}'.format(params)) @@ -445,10 +448,9 @@ def __init__(self, features, standardize=False, if kind == 'radius': n_disconnected = np.sum(np.asarray(n_edges) == 0) if n_disconnected > 0: - logger = utils.build_logger(__name__) - logger.warning('{} vertices (out of {}) are disconnected. ' - 'Consider increasing the radius or setting ' - 'kind=knn.'.format(n_disconnected, n_vertices)) + _logger.warning('{} vertices (out of {}) are disconnected. ' + 'Consider increasing the radius or setting ' + 'kind=knn.'.format(n_disconnected, n_vertices)) value = np.empty(sum(n_edges), dtype=np.float) row = np.empty_like(value, dtype=np.int) @@ -483,7 +485,7 @@ def __init__(self, features, standardize=False, assert np.all(W.data >= 0), 'Distance must be in [0, inf].' W.data = kernel(W.data / kernel_width) if not np.all((W.data >= 0) & (W.data <= 1)): - raise ValueError('Kernel returned similarity not in [0, 1].') + _logger.warning('Kernel returned similarity not in [0, 1].') self.order = order self.radius = radius diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 94ebe2fa..8f4249bd 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -446,7 +446,9 @@ def test_nngraph(self, n_vertices=24): np.testing.assert_allclose(kernel(np.ones(10)), similarity) np.testing.assert_allclose(kernel(np.zeros(10)), 1) Graph(data, kernel=lambda d: d.min()/d) - self.assertRaises(ValueError, Graph, data, kernel=lambda d: 1/d) + if sys.version_info > (3, 4): # no assertLogs in python 2.7 + with self.assertLogs(level='WARNING'): + Graph(data, kernel=lambda d: 1/d) # Attributes. self.assertEqual(Graph(data, kind='knn').radius, None) From ad5caeecdda588d19ac88d17cd22c88e1db8c639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Thu, 14 Mar 2019 00:23:31 +0100 Subject: [PATCH 070/118] show original exception if nmslib cannot be imported --- pygsp/graphs/nngraphs/nngraph.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index cb14319f..4ca8f9a2 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -124,10 +124,11 @@ def _nmslib(features, metric, order, kind, k, _, params): raise ValueError('nmslib does not support metric="minkowski".') try: import nmslib as nms - except Exception: + except Exception as e: raise ImportError('Cannot import nmslib. Choose another nearest ' - 'neighbors method or try to install it with ' - 'pip (or conda) install nmslib.') + 'neighbors backend or try to install it with ' + 'pip (or conda) install nmslib. ' + 'Original exception: {}'.format(e)) n_vertices, _ = features.shape params_index = params.pop('index', None) params_query = params.pop('query', None) From fc043fe8163052ad85d7d88a1668553c62ca2fbd Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Thu, 18 Jul 2019 15:38:31 +0200 Subject: [PATCH 071/118] backup commit --- learn-graph-test.py | 82 +++++++++ learn_graph.py | 401 ++++++++++++++++++++++++++++++++++++++++++++ nn-test.py | 46 +++++ nn.py | 245 +++++++++++++++++++++++++++ pygsp/utils.py | 20 +-- 5 files changed, 784 insertions(+), 10 deletions(-) create mode 100644 learn-graph-test.py create mode 100644 learn_graph.py create mode 100644 nn-test.py create mode 100644 nn.py diff --git a/learn-graph-test.py b/learn-graph-test.py new file mode 100644 index 00000000..8e922918 --- /dev/null +++ b/learn-graph-test.py @@ -0,0 +1,82 @@ +import unittest +import numpy as np +from nn import nn, sparse_distance_matrix +from learn_graph import prox_sum_log, isvector, issymetric, squareform_sp, sum_squareform, lin_map, norm_S +from scipy import sparse + +class TestCase(unittest.TestCase): + def test_prox_sum_log(self): + np.testing.assert_array_less(0,prox_sum_log(np.random.randn((10)),0.1)) + def test_isvector(self): + assert(isvector(4)==False) + assert(isvector([4,4])==False) + assert(isvector(np.array([4,4,4]))==True) + assert(isvector(np.zeros([3,4]))==False) + assert(isvector(np.zeros([3]))==True) + assert(isvector(np.zeros([3,0]))==False) + assert(isvector(np.zeros([3,1]))==False) + assert(isvector(sparse.csr_matrix((10,1)))==True) + + def test_issymetric(self): + data = np.random.RandomState(42).uniform(size=(100, 3)) + # neighbors, distances = nn(data, backend='flann') + neighbors, distances = nn(data) + W = sparse_distance_matrix(neighbors, distances) + assert(issymetric(W)) + W3 = W.copy() + W3[:,0] = -W3[:,0] + assert(issymetric(W3)==False) + + def test_squareform_sp(self): + data = np.random.RandomState(42).uniform(size=(100, 3)) + # neighbors, distances = nn(data, backend='flann') + neighbors, distances = nn(data) + W = sparse_distance_matrix(neighbors, distances) + w = squareform_sp(W) + W2 = squareform_sp(w) + np.testing.assert_array_almost_equal(W.todense(), W2.todense()) + + def test_sum_squareform(self): + nt =10 + # a) Create a random symetric matrix + A = np.random.rand(nt,nt) + A = A +A.transpose() + A = A - np.diag(np.diag(A)) + # b) Make the test + res1 = np.sum(A, axis=1) + a = squareform_sp(A) + S,St = sum_squareform(nt) + res2 = S @ a + np.testing.assert_allclose(res1,res2) + np.testing.assert_array_equal(S.transpose().todense(), St.todense()) + + def test_sum_squareform_sparse(self): + data = np.random.uniform(size=(100, 3)) + # neighbors, distances = nn(data, backend='flann') + neighbors, distances = nn(data) + W = sparse_distance_matrix(neighbors, distances) + nt = W.shape[0] + # b) Make the test + res1 = np.squeeze(np.asarray(np.sum(W.todense(), axis=1))) + w = squareform_sp(W) + S,St = sum_squareform(nt, w) + wfull = w.data + res2 = S @ wfull + np.testing.assert_allclose(res1,res2) + np.testing.assert_array_equal(S.transpose().todense(), St.todense()) + + def test_linmap(self): + np.testing.assert_array_almost_equal(lin_map(np.arange(10)/10,[0,10],[0,1]),np.arange(10)) + np.testing.assert_array_almost_equal(lin_map(np.arange(11)/10,[0,10]),np.arange(11)) + + def test_norm_S(self): + edge_mask = None + for n in range(15): + S, St = sum_squareform(10, mask=edge_mask) + res1 = np.linalg.norm(S.todense(),2) + res2 = norm_S(S) + np.testing.assert_allclose(res1,res2) + +suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/learn_graph.py b/learn_graph.py new file mode 100644 index 00000000..c094911d --- /dev/null +++ b/learn_graph.py @@ -0,0 +1,401 @@ +import numpy as np +from scipy import sparse +from scipy.spatial.distance import squareform +from time import time + +def isvector(x): + '''Test if x is a vector''' + if sparse.issparse(x): + return x.shape[1]==1 + try: + return len(x.shape)==1 + except: + return False + + + +def prox_sum_log(x, gamma): + '''Solves the proximal problem: + sol = argmin_z 0.5*||x - z||_2^2 - gamma * sum(log(z)) + The solution is given by + sol = (x + sqrt(x.^2 + 4*gamma)) /2 + ''' + sol = (x + np.sqrt(x*x + 4*gamma)) /2 + return sol + + +def issymetric(W, tol=1e-7): + '''Test if a sparse matrix is symmetric''' + WT = sparse.triu(W) - sparse.triu(W.transpose()) + return np.sum(np.abs(WT)) MATRIX + l = w.shape[0] + n = np.int(np.round((1 + np.sqrt(1+8*l))/2)) + # check input + if not(l == n*(n-1)//2): + raise ValueError('Bad vector size!') + + ind_vec, _, data = sparse.find(w) + + num_nz = len(ind_vec) + + # indices inside the matrix + ind_i = np.zeros((num_nz)) + ind_j = np.zeros((num_nz)) + + curr_row = 0 + offset = 0 + # length of current row of matrix, counting from after the diagonal + sl = n - 1 + for ii in range(num_nz): + ind_vec_i = ind_vec[ii] + # if we change row, the vector index is bigger by at least the + # length of the line + the offset. + while(ind_vec_i >= (sl + offset)): + offset = offset + sl + sl = sl - 1 + curr_row = curr_row + 1 + + ind_i[ii] = curr_row + ind_j[ii] = ind_vec_i - offset + (n-sl) + indx = np.concatenate((ind_i, ind_j)) + indy = np.concatenate((ind_j, ind_i)) + data = np.concatenate((data, data)) + w = sparse.csr_matrix((data, (indx, indy)), (n, n)) + return w + else: + ## MATRIX -> VECTOR + # first checks + assert(len(w.shape)==2) + m, n = w.shape + if not(m == n) or np.sum(np.abs(w.diagonal()))>0: + raise ValueError('Matrix has to be square with zero diagonal!'); + if safe: + assert(issymetric(w)) + ind_i, ind_j, s = sparse.find(w); + # keep only upper triangular part + ind_upper = ind_i < ind_j + ind_i = ind_i[ind_upper]+1 + ind_j = ind_j[ind_upper]+1 + s = s[ind_upper] + # compute new (vector) index from [i,j] (matrix) indices + new_ind = ind_j + (ind_i-1)*n - ind_i*(ind_i+1)//2-1 + w = sparse.csr_matrix((s,(new_ind, np.zeros(new_ind.shape))), shape=(n*(n-1)//2, 1)) + return w + + +def sum_squareform(n, mask=None): + '''sparse matrix that sums the squareform of a vector + + Input parameters: + n: size of matrix W + mask: if given, S only contain the columns indicated by the mask + + Output parameters: + S: matrix so that S*w = sum(W) for vector w = squareform(W) + St: the adjoint of S + + Creates sparse matrices S, St = S' so that + S*w = sum(W), where w = squareform(W) + + The mask is used for large scale computations where only a few + non-zeros in W are to be summed. It needs to be the same size as w, + n(n-1)/2 elements. + + Properties of S: + * size(S) = [n, (n(n-1)/2)] % if no mask is given. + * size(S, 2) = nnz(w) % if mask is given + * norm(S)^2 = 2(n-1) + * sum(S) = 2*ones(1, n*(n-1)/2) + * sum(St) = sum(squareform(mask)) -- for full mask = (n-1)*ones(n,1) + ''' + if mask is not None: + if not(mask.shape[0] == n*(n-1)//2): + raise ValueError('mask size has to be n(n-1)/2'); + + ind_vec, _ = mask.nonzero() + + # final number of columns is the nnz(mask) + ncols = len(ind_vec) + + # indices inside the matrix + I = np.zeros((ncols)).astype(np.int) + J = np.zeros((ncols)).astype(np.int) + + curr_row = 0 + offset = 0 + # length of current row of matrix, counting from after the diagonal + sl = n - 1; + for ii in range(ncols): + ind_vec_i = ind_vec[ii] + # if we change row, the vector index is bigger by at least the + # length of the line + the offset. + while(ind_vec_i >= (sl + offset)): + offset = offset + sl + sl = sl - 1 + curr_row = curr_row + 1 + I[ii] = curr_row + J[ii] = ind_vec_i - offset + (n-sl) + else: + # number of columns is the length of w given size of W + ncols = ((n-1)*n)//2; + + I = np.zeros((ncols)).astype(np.int) + J = np.zeros((ncols)).astype(np.int) + + # offset + k = 0; + for i in range(1,n): + I[k: k + (n-i)] = np.arange(i, n).astype(np.int) + k = k + (n-i) + k = 0; + for i in range(1,n): + J[k: k + (n-i)] = i-1 + k = k + (n-i) + indx = np.concatenate((I,J)) + indy = np.concatenate((np.arange(ncols),np.arange(ncols))) + data = np.ones((2*ncols)) + S = sparse.csr_matrix((data, (indx, indy)), shape=(n,ncols), dtype=np.int) + St = sparse.csr_matrix((data, (indy, indx)), shape=(ncols, n), dtype=np.int) + return S, St + + +def lin_map(X, lims_out, lims_in=None): + '''Map linearly from a given range to another.''' + + if lims_in is None: + lims_in = [np.min(X), np.max(X)]; + + a = lims_in[0]; + b = lims_in[1]; + c = lims_out[0]; + d = lims_out[1]; + + Y = (X-a) * ((d-c)/(b-a)) + c; + return Y + + +def norm_S(S): + '''Norm of the matrix given by the function sum_squareform''' + return np.sqrt(2*np.max(np.sum(S, axis=1))) + + +def learn_graph_log_degree(Z): + a = 1 + b = 1 + c = 0 + verbosity = 2 + maxit = 1000 + tol = 1e-5 + step_size = .5 # from (0, 1) + fix_zeros = False + max_w = np.inf + edge_mask = None + w_0 = 0 + + if isvector(Z): + z = Z; + else: + z = squareform_sp(Z) + + l = len(z) # number of edges + # n(n-1)/2 = l => n = (1 + sqrt(1+8*l))/ 2 + n = int(np.round((1 + np.sqrt(1+8*l))/ 2)) # number of nodes + + if not(w_0==0): + if c==0: + raise ValueError('When w_0 is specified, c should not be 0'); + if not isvector(w_0): + w_0 = squareform_sp(w_0) + else: + w_0 = 0; + + # if sparsity pattern is fixed we optimize with respect to a smaller number + # of variables, all included in w + if edge_mask is not None: + if not(isvector(edge_mask)): + edge_mask = squareform_sp(edge_mask) + raise NotImplementedError() + # # use only the non-zero elements to optimize + # ind = find(params.edge_mask(:)); + # z = full(z(ind)); + # if not(isscalar(w_0)) + # w_0 = full(w_0(ind)); + + + w = np.zeros(z.shape); + + ## Needed operators + # S*w = sum(W) + + S, St = sum_squareform(n, mask=edge_mask) + + # S: edges -> nodes + K_op = lambda w : S@w + + # S': nodes -> edges + Kt_op = lambda z: St@z + + # if edge_mask is not None: + # norm_K = normest(S); + # # approximation: + # # sqrt(2*(n-1)) * sqrt(nnz(params.edge_mask) / (n*(n+1)/2)) /sqrt(2) + # else: + # # the next is an upper bound if we use a mask + # norm_K = sqrt(2*(n-1)); + + # This should be always the good value because S is made only with ones. To check... + norm_K = norm_S(S) + + + ## Learn the graph + # min_{W>=0} tr(X'*L*X) - gc * sum(log(sum(W))) + gp * norm(W-W0,'fro')^2, where L = diag(sum(W))-W + # min_W I{W>=0} + W(:)'*Dx(:) - gc * sum(log(sum(W))) + gp * norm(W-W0,'fro')^2 + # min_W f(W) + g(L_op(W)) + h(W) + + # put proximal of trace plus positivity together + feval = lambda w: 2*np.sum(w*z) # half should be counted + fprox = lambda w, gamma: np.minimum(max_w, np.maximum(0, w - 2*gamma*z)) # weighted soft thresholding op + + geval = lambda v: -a * np.sum(np.log(v+1e-15)) + gprox = lambda v, gamma: prox_sum_log(v, gamma*a) + # proximal of conjugate of g: v-gamma*gprox(v/gamma, 1/gamma) + g_star_prox = lambda v, gamma: v - gamma*a * prox_sum_log(v/(gamma*a), 1/(gamma*a)) + + if w_0 == 0: + # "if" not needed, for c = 0 both are the same but gains speed + heval = lambda w: b * np.sum(w*w) + hgrad = lambda w: 2 * b * w + hbeta = 2 * b; + else: + heval = lambda w: b * np.sum(w*w) + c * np.sum((w - w_0)**2) + hgrad = lambda w: 2 * ((b+c) * w - c * w_0) + hbeta = 2 * (b+c) + + + ## Custom FBF based primal dual (see [1] = [Komodakis, Pesquet]) + # parameters mu, epsilon for convergence (see [1]) + mu = hbeta + norm_K; #TODO: is it squared or not?? + epsilon = lin_map(0.0, [0, 1/(1+mu)], [0,1]); # in (0, 1/(1+mu) ) + + # INITIALIZATION + # primal variable ALREADY INITIALIZED + # dual variable + v_n = K_op(w) + + stat = dict() + stat['time'] = np.nan + if verbosity > 1: + stat['f_eval'] = np.empty((maxit)) + stat['g_eval'] = np.empty((maxit)) + stat['h_eval'] = np.empty((maxit)) + stat['fgh_eval'] = np.empty((maxit)) + stat['pos_violation'] = np.empty((maxit)) + + if verbosity > 1: + print('Relative change of primal, dual variables, and objective fun\n'); + + tstart = time() + gn = lin_map(step_size, [epsilon, (1-epsilon)/mu], [0,1]) # in [epsilon, (1-epsilon)/mu] + for i in range(maxit): + Y_n = w - gn * (hgrad(w) + Kt_op(v_n)) + y_n = v_n + gn * (K_op(w)) + P_n = fprox(Y_n, gn) + p_n = g_star_prox(y_n, gn) # = y_n - gn*g_prox(y_n/gn, 1/gn) + Q_n = P_n - gn * (hgrad(P_n) + Kt_op(p_n)) + q_n = p_n + gn * (K_op(P_n)) + + if verbosity > 1: + stat['f_eval'][i] = feval(w) + stat['g_eval'][i] = geval(K_op(w)) + stat['h_eval'][i] = heval(w) + stat['fgh_eval'][i] = stat['f_eval'][i] + stat['g_eval'][i] + stat['h_eval'][i] + stat['pos_violation'][i] = -np.sum(np.minimum(0,w)) + + rel_norm_primal = np.linalg.norm(- Y_n + Q_n)/(np.linalg.norm(w)+1e-15) + rel_norm_dual = np.linalg.norm(- y_n + q_n)/(np.linalg.norm(v_n)+1e-15) + + if verbosity > 3: + print('iter {:4d}: {:6.4e} {:6.4e} {:6.3e}'.format(i, rel_norm_primal, rel_norm_dual, stat['fgh_eval'][i])) + elif verbosity > 2: + print('iter {:4d}: {%6.4e} {:6.4e} {:6.3e}'.format(i, rel_norm_primal, rel_norm_dual, stat['fgh_eval'][i])) + elif verbosity > 1: + print('iter {:4d}: {:6.4e} {:6.4e}'.format(i, rel_norm_primal, rel_norm_dual)) + + w = w - Y_n + Q_n + v_n = v_n - y_n + q_n + + if (rel_norm_primal < tol) and (rel_norm_dual < tol): + break + + stat['time'] = time()-tstart + if verbosity > 0: + print('# iters: {:4d}. Rel primal: {:6.4e} Rel dual: {:6.4e} OBJ {:6.3e}'.format( + i, rel_norm_primal, rel_norm_dual, feval(w) + geval(K_op(w)) + heval(w))) + print('Time needed is {:f} seconds'.format(stat['time'])) + + if edge_mask is not None: + raise NotImplementedError() + # w = sparse(ind, ones(size(ind)), w, l, 1); + + w[w<0]=0 + if isvector(Z): + W = w; + else: + W = squareform_sp(w); + + return W + +def compute_theta_bounds(Z, geom_mean=False): + '''Compute the values of parameter theta (controlling sparsity) + that should be expected to give each sparsity level. Return upper + and lower bounds each sparsity level k=[1, ..., n-1] neighbors/node. + + Z : distance matrix + geom_mean: use geometric mean instead of arithmetic mean? default: False + ''' + # Z is the zero-diagonal pairwise distance matrix between nodes + + if isvector(Z): + ValueError('Z must be a matrix.') + assert(Z.shape[0]==Z.shape[1]) + assert(len(Z.shape)==2) + n = len(Z); + + # don't take into account the diagonal of Z + Z_sorted = np.zeros((n, n-1)); + for i in range(n): + Z_sorted[i, :] = np.sort(np.concatenate((Z[i,:i],Z[i,i+1:])), kind='mergesort') + + n, m = Z_sorted.shape + + B_k = np.cumsum(Z_sorted, axis=1) # cummulative sum for each row + K_mat = np.tile(np.arange(1,m+1), (n, 1)) + ## Theoretical intervals of theta for each desired sparsity level: + if geom_mean: + theta_u = np.mean(1./(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted)+1e-15), axis=0) + else: + # try geometric mean instead of arithmetic: + theta_u = np.exp(np.mean(np.log(1/(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted)+1e-15)), axis=0)) + theta_l = np.zeros(theta_u.shape) + theta_l[:-1] = theta_u[1:] + return theta_l, theta_u, Z_sorted + +def gsp_compute_graph_learning_theta(Z, k, geom_mean=False): + + # Z is the zero-diagonal pairwise distance matrix between nodes + theta_min, theta_max, _ = compute_theta_bounds(Z, geom_mean) + theta_min = theta_min[k-1]; + theta_max = theta_max[k-1]; + + if k > 1: + theta = np.sqrt(theta_min * theta_max); + else: + theta = theta_min * 1.1; + return theta, theta_min, theta_max \ No newline at end of file diff --git a/nn-test.py b/nn-test.py new file mode 100644 index 00000000..77a6cde6 --- /dev/null +++ b/nn-test.py @@ -0,0 +1,46 @@ +import unittest +import numpy as np +from nn import nn + +class TestCase(unittest.TestCase): + def test_nngraph(self, n_vertices=24): + data = np.random.RandomState(42).uniform(size=(n_vertices, 3)) + metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib'] + + for metric in metrics: + for kind in ['knn', 'radius']: + for backend in backends: + params = dict(features=data, metric=metric, kind=kind, radius=0.25) + ref_nn, ref_d = nn(backend='scipy-pdist', **params) + # Unsupported combinations. + if backend == 'flann' and metric == 'max_dist': + self.assertRaises(ValueError, nn, data, + metric=metric, backend=backend) + elif backend == 'nmslib' and metric == 'minkowski': + self.assertRaises(ValueError, nn, data, + metric=metric, backend=backend) + elif backend == 'nmslib' and kind == 'radius': + self.assertRaises(ValueError, nn, data, + kind=kind, backend=backend) + else: + params['backend'] = backend + if backend == 'flann': +# params['target_precision'] = 1 + other_nn, other_d = nn(random_seed=44, **params) + else: + other_nn, other_d = nn(**params) + print(kind, backend) + for a,b in zip(ref_nn, other_nn): + np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) + + for a,b in zip(ref_d, other_d): + np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) + + def test_sparse_distance_matrix(self): + data = np.random.RandomState(42).uniform(size=(24, 3)) + + +suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/nn.py b/nn.py new file mode 100644 index 00000000..3ae6a60d --- /dev/null +++ b/nn.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +import numpy as np +from scipy import sparse, spatial +from pygsp import utils + +def _scipy_pdist(features, metric, order, kind, k, radius, params): + if params: + raise ValueError('unexpected parameters {}'.format(params)) + metric = 'cityblock' if metric == 'manhattan' else metric + metric = 'chebyshev' if metric == 'max_dist' else metric + params = dict(metric=metric) + if metric == 'minkowski': + params['p'] = order + dist = spatial.distance.pdist(features, **params) + dist = spatial.distance.squareform(dist) + if kind == 'knn': + neighbors = np.argsort(dist)[:, :k+1] + distances = np.take_along_axis(dist, neighbors, axis=-1) + elif kind == 'radius': + distances = [] + neighbors = [] + for distance in dist: + neighbor = np.flatnonzero(distance < radius) + neighbors.append(neighbor) + distances.append(distance[neighbor]) + return neighbors, distances + + +def _scipy_kdtree(features, _, order, kind, k, radius, params): + if order is None: + raise ValueError('invalid metric for scipy-kdtree') + eps = params.pop('eps', 0) + tree = spatial.KDTree(features, **params) + params = dict(p=order, eps=eps) + if kind == 'knn': + params['k'] = k + 1 + elif kind == 'radius': + params['k'] = None + params['distance_upper_bound'] = radius + distances, neighbors = tree.query(features, **params) + return neighbors, distances + + +def _scipy_ckdtree(features, _, order, kind, k, radius, params): + if order is None: + raise ValueError('invalid metric for scipy-kdtree') + eps = params.pop('eps', 0) + tree = spatial.cKDTree(features, **params) + params = dict(p=order, eps=eps, n_jobs=-1) + if kind == 'knn': + params['k'] = k + 1 + elif kind == 'radius': + params['k'] = features.shape[0] # number of vertices + params['distance_upper_bound'] = radius + distances, neighbors = tree.query(features, **params) + if kind == 'knn': + return neighbors, distances + elif kind == 'radius': + dist = [] + neigh = [] + for distance, neighbor in zip(distances, neighbors): + mask = (distance != np.inf) + dist.append(distance[mask]) + neigh.append(neighbor[mask]) + return neigh, dist + + +def _flann(features, metric, order, kind, k, radius, params): + if metric == 'max_dist': + raise ValueError('flann gives wrong results for metric="max_dist".') + try: + import cyflann as cfl + except Exception as e: + raise ImportError('Cannot import cyflann. Choose another nearest ' + 'neighbors backend or try to install it with ' + 'pip (or conda) install cyflann. ' + 'Original exception: {}'.format(e)) + cfl.set_distance_type(metric, order=order) + index = cfl.FLANNIndex() + index.build_index(features, **params) + # I tried changing the algorithm and testing performance on huge matrices, + # but the default parameters seems to work best. + if kind == 'knn': + neighbors, distances = index.nn_index(features, k+1) + if metric == 'euclidean': + np.sqrt(distances, out=distances) + elif metric == 'minkowski': + np.power(distances, 1/order, out=distances) + elif kind == 'radius': + distances = [] + neighbors = [] + if metric == 'euclidean': + radius = radius**2 + elif metric == 'minkowski': + radius = radius**order + n_vertices, _ = features.shape + for vertex in range(n_vertices): + neighbor, distance = index.nn_radius(features[vertex, :], radius) + distances.append(distance) + neighbors.append(neighbor) + if metric == 'euclidean': + distances = list(map(np.sqrt, distances)) + elif metric == 'minkowski': + distances = list(map(lambda d: np.power(d, 1/order), distances)) + index.free_index() + return neighbors, distances + + +def _nmslib(features, metric, order, kind, k, _, params): + if kind == 'radius': + raise ValueError('nmslib does not support kind="radius".') + if metric == 'minkowski': + raise ValueError('nmslib does not support metric="minkowski".') + try: + import nmslib as nms + except Exception as e: + raise ImportError('Cannot import nmslib. Choose another nearest ' + 'neighbors backend or try to install it with ' + 'pip (or conda) install nmslib. ' + 'Original exception: {}'.format(e)) + n_vertices, _ = features.shape + params_index = params.pop('index', None) + params_query = params.pop('query', None) + metric = 'l2' if metric == 'euclidean' else metric + metric = 'l1' if metric == 'manhattan' else metric + metric = 'linf' if metric == 'max_dist' else metric + index = nms.init(space=metric, **params) + index.addDataPointBatch(features) + index.createIndex(params_index) + if params_query is not None: + index.setQueryTimeParams(params_query) + results = index.knnQueryBatch(features, k=k+1) + neighbors, distances = zip(*results) + distances = np.concatenate(distances).reshape(n_vertices, k+1) + neighbors = np.concatenate(neighbors).reshape(n_vertices, k+1) + return neighbors, distances + +def nn(features, metric='euclidean', order=2, kind='knn', k=10, radius=None, backend='scipy-ckdtree', **kwargs): + '''Find nearest neighboors. + + Parameters + ---------- + features : data numpy array + metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional + Metric used to compute pairwise distances. + + * ``'euclidean'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_2`. + * ``'manhattan'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_1`. + * ``'minkowski'`` generalizes the above and defines distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_p` + where :math:`p` is the ``order`` of the norm. + * ``'max_dist'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_\infty = \max(x_i - x_j)`, where + the maximum is taken over the elements of the vector. + + More metrics may be supported for some backends. + Please refer to the documentation of the chosen backend. + kind : 'knn' or 'radius' (default 'knn') + k : number of nearest neighboors if 'knn' is selected + radius : radius of the search if 'radius' is slected + + order : float, optional + The order of the Minkowski distance for ``metric='minkowski'``. + backend : string, optional + * ``'scipy-pdist'`` uses :func:`scipy.spatial.distance.pdist` to + compute pairwise distances. The method is brute force and computes + all distances. That is the slowest method. + * ``'scipy-kdtree'`` uses :class:`scipy.spatial.KDTree`. The method + builds a k-d tree to prune the number of pairwise distances it has to + compute. That is an efficient strategy for low-dimensional spaces. + * ``'scipy-ckdtree'`` uses :class:`scipy.spatial.cKDTree`. The same as + ``'scipy-kdtree'`` but with C bindings, which should be faster. + That is the default. + * ``'flann'`` uses the `Fast Library for Approximate Nearest Neighbors + (FLANN) `_. That method is an + approximation. + * ``'nmslib'`` uses the `Non-Metric Space Library (NMSLIB) + `_. That method is an + approximation. It should be the fastest in high-dimensional spaces. + + You can look at this `benchmark + `_ to get an idea of the + relative performance of those backends. It's nonetheless wise to run + some tests on your own data. + ''' + if kind=='knn': + radius = None + elif kind=='radius': + k = None + else: + raise ValueError('"kind" must be "knn" or "radius"') + + _orders = { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf, + 'minkowski': order, + } + order = _orders.pop(metric, None) + try: + function = globals()['_' + backend.replace('-', '_')] + except KeyError: + raise ValueError('Invalid backend "{}".'.format(backend)) + neighbors, distances = function(features, metric, order, + kind, k, radius, kwargs) + return neighbors, distances + + +def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, kind = None): + '''Build a sparse distance matrix.''' + n_edges = [len(n) - 1 for n in neighbors] # remove distance to self + if safe and kind is None: + raise ValueError('Please specify "kind" to "knn" or "radius" to use the safe mode') + + if safe and kind == 'radius': + n_disconnected = np.sum(np.asarray(n_edges) == 0) + if n_disconnected > 0: + _logger.warning('{} points (out of {}) have no neighboors. ' + 'Consider increasing the radius or setting ' + 'kind=knn.'.format(n_disconnected, n_vertices)) + + value = np.empty(sum(n_edges), dtype=np.float) + row = np.empty_like(value, dtype=np.int) + col = np.empty_like(value, dtype=np.int) + start = 0 + n_vertices = len(n_edges) + for vertex in range(n_vertices): + if safe and kind == 'knn': + assert n_edges[vertex] == k + end = start + n_edges[vertex] + value[start:end] = distances[vertex][1:] + row[start:end] = np.full(n_edges[vertex], vertex) + col[start:end] = neighbors[vertex][1:] + start = end + W = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices)) + if symmetrize: + # Enforce symmetry. May have been broken by k-NN. Checking symmetry + # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. + W = utils.symmetrize(W, method='fill') + return W \ No newline at end of file diff --git a/pygsp/utils.py b/pygsp/utils.py index 94474eb1..f9e20086 100644 --- a/pygsp/utils.py +++ b/pygsp/utils.py @@ -115,19 +115,16 @@ def distanz(x, y=None): [2., 1., 0.]]) """ - try: - x.shape[1] - except IndexError: - x = x.reshape(1, x.shape[0]) + if len(x.shape)<2: + x = np.expand_dims(x, axis=0) if y is None: y = x - + diag_zero = True else: - try: - y.shape[1] - except IndexError: - y = y.reshape(1, y.shape[0]) + diag_zero = False + if len(y.shape)<2: + y = np.expand_dims(xy, axis=0) rx, cx = x.shape ry, cy = y.shape @@ -142,7 +139,10 @@ def distanz(x, y=None): d = abs(np.kron(np.ones((cy, 1)), xx).T + np.kron(np.ones((cx, 1)), yy) - 2 * xy) - + + if diag_zero: + # Remove diagonal errors + d = d-np.diag(np.diag(d)) return np.sqrt(d) From bb8133cab1212efa0a4ab2eb89ed679ea0f1dc00 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Thu, 18 Jul 2019 17:35:07 +0200 Subject: [PATCH 072/118] working sparse solution --- learn-graph-test.py | 33 ++++++++- learn_graph.py | 173 ++++++++++++++++++++++---------------------- 2 files changed, 118 insertions(+), 88 deletions(-) diff --git a/learn-graph-test.py b/learn-graph-test.py index 8e922918..85160116 100644 --- a/learn-graph-test.py +++ b/learn-graph-test.py @@ -1,8 +1,10 @@ import unittest import numpy as np from nn import nn, sparse_distance_matrix -from learn_graph import prox_sum_log, isvector, issymetric, squareform_sp, sum_squareform, lin_map, norm_S +from learn_graph import prox_sum_log, isvector, issymetric, squareform_sp, sum_squareform, lin_map, norm_S, learn_graph_log_degree, gsp_compute_graph_learning_theta from scipy import sparse +import pygsp as pg +from pygsp.utils import distanz class TestCase(unittest.TestCase): def test_prox_sum_log(self): @@ -76,7 +78,34 @@ def test_norm_S(self): res1 = np.linalg.norm(S.todense(),2) res2 = norm_S(S) np.testing.assert_allclose(res1,res2) - + + def test_learn_graph_log(self): + # Create a bunch of signals + n=100 + d = 400 + G = pg.graphs.Sensor(N=n,k=6) + G.compute_fourier_basis() + # g = pg.filters.Heat(G, scale=5) + g = pg.filters.Filter(G,lambda x:1/(1+5*x)) + S = np.random.randn(n,d) + X = np.squeeze(g.filter(S)) + + Z = distanz(X.transpose()) + k=6 + theta, theta_min, theta_max = gsp_compute_graph_learning_theta(Z, k) + learned_W, _ = learn_graph_log_degree(theta*Z, verbosity=0) + + neighbors, distances = nn(X, k=3*k, kind='knn') + np.testing.assert_equal(distances[:,0],0) + dmat = distances[:,1:] + theta2, theta_min2, theta_max2 = gsp_compute_graph_learning_theta(dmat, k) + np.testing.assert_allclose(theta, theta2) + np.testing.assert_allclose(theta_min, theta_min2) + np.testing.assert_allclose(theta_max, theta_max2) + W = sparse_distance_matrix(neighbors, distances) + learned_W2, _ = learn_graph_log_degree(W*theta2, edge_mask=W>0, verbosity=0) + assert(np.sum(np.abs(learned_W2.todense()-learned_W))/np.sum(np.abs(learned_W))<1e-4) + suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/learn_graph.py b/learn_graph.py index c094911d..1e4878ee 100644 --- a/learn_graph.py +++ b/learn_graph.py @@ -186,25 +186,81 @@ def norm_S(S): return np.sqrt(2*np.max(np.sum(S, axis=1))) -def learn_graph_log_degree(Z): - a = 1 - b = 1 - c = 0 - verbosity = 2 - maxit = 1000 - tol = 1e-5 - step_size = .5 # from (0, 1) - fix_zeros = False - max_w = np.inf - edge_mask = None - w_0 = 0 +def compute_theta_bounds(Z, geom_mean=False, islist=None): + '''Compute the values of parameter theta (controlling sparsity) + that should be expected to give each sparsity level. Return upper + and lower bounds each sparsity level k=[1, ..., n-1] neighbors/node. + + Z : distance matrix + geom_mean: use geometric mean instead of arithmetic mean? default: False + ''' + # Z is the zero-diagonal pairwise distance matrix between nodes + + if isvector(Z): + ValueError('Z must be a matrix.') + assert(len(Z.shape)==2) + n = len(Z); + + if islist is None: + islist = not(Z.shape[0]==Z.shape[1]) + if islist: + Z_sorted = np.sort(Z, axis=1) + else: + assert(Z.shape[0]==Z.shape[1]) + # don't take into account the diagonal of Z + Z_sorted = np.zeros((n, n-1)); + for i in range(n): + Z_sorted[i, :] = np.sort(np.concatenate((Z[i,:i],Z[i,i+1:])), kind='mergesort') + + n, m = Z_sorted.shape + + B_k = np.cumsum(Z_sorted, axis=1) # cummulative sum for each row + K_mat = np.tile(np.arange(1,m+1), (n, 1)) + ## Theoretical intervals of theta for each desired sparsity level: + if geom_mean: + theta_u = np.mean(1./(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted)+1e-15), axis=0) + else: + # try geometric mean instead of arithmetic: + theta_u = np.exp(np.mean(np.log(1/(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted)+1e-15)), axis=0)) + theta_l = np.zeros(theta_u.shape) + theta_l[:-1] = theta_u[1:] + return theta_l, theta_u, Z_sorted + +def gsp_compute_graph_learning_theta(Z, k, geom_mean=False, islist=None): + + + # Z is the zero-diagonal pairwise distance matrix between nodes + theta_min, theta_max, _ = compute_theta_bounds(Z, geom_mean, islist) + theta_min = theta_min[k-1]; + theta_max = theta_max[k-1]; + + if k > 1: + theta = np.sqrt(theta_min * theta_max); + else: + theta = theta_min * 1.1; + return theta, theta_min, theta_max + + +def learn_graph_log_degree(Z, + a = 1, + b = 1, + c = 0, + verbosity = 1, + maxit = 1000, + tol = 1e-5, + step_size = .5, + fix_zeros = False, + max_w = np.inf, + edge_mask = None, + w_0 = 0, + rel_edge=1e-5): if isvector(Z): z = Z; else: z = squareform_sp(Z) - l = len(z) # number of edges + l = z.shape[0] # number of edges # n(n-1)/2 = l => n = (1 + sqrt(1+8*l))/ 2 n = int(np.round((1 + np.sqrt(1+8*l))/ 2)) # number of nodes @@ -221,14 +277,13 @@ def learn_graph_log_degree(Z): if edge_mask is not None: if not(isvector(edge_mask)): edge_mask = squareform_sp(edge_mask) - raise NotImplementedError() - # # use only the non-zero elements to optimize - # ind = find(params.edge_mask(:)); - # z = full(z(ind)); - # if not(isscalar(w_0)) - # w_0 = full(w_0(ind)); - + # use only the non-zero elements to optimize + ind, _ = edge_mask.nonzero() + z = z[ind].data + if not(np.isscalar(w_0)): + w_0 = w_0[ind].data + w = np.zeros(z.shape); ## Needed operators @@ -237,20 +292,11 @@ def learn_graph_log_degree(Z): S, St = sum_squareform(n, mask=edge_mask) # S: edges -> nodes - K_op = lambda w : S@w + K_op = lambda w : S.dot(w) # S': nodes -> edges - Kt_op = lambda z: St@z - - # if edge_mask is not None: - # norm_K = normest(S); - # # approximation: - # # sqrt(2*(n-1)) * sqrt(nnz(params.edge_mask) / (n*(n+1)/2)) /sqrt(2) - # else: - # # the next is an upper bound if we use a mask - # norm_K = sqrt(2*(n-1)); + Kt_op = lambda z: St.dot(z) - # This should be always the good value because S is made only with ones. To check... norm_K = norm_S(S) @@ -288,7 +334,7 @@ def learn_graph_log_degree(Z): # primal variable ALREADY INITIALIZED # dual variable v_n = K_op(w) - + stat = dict() stat['time'] = np.nan if verbosity > 1: @@ -339,63 +385,18 @@ def learn_graph_log_degree(Z): print('# iters: {:4d}. Rel primal: {:6.4e} Rel dual: {:6.4e} OBJ {:6.3e}'.format( i, rel_norm_primal, rel_norm_dual, feval(w) + geval(K_op(w)) + heval(w))) print('Time needed is {:f} seconds'.format(stat['time'])) - - if edge_mask is not None: - raise NotImplementedError() - # w = sparse(ind, ones(size(ind)), w, l, 1); - w[w<0]=0 - if isvector(Z): - W = w; - else: - W = squareform_sp(w); - - return W - -def compute_theta_bounds(Z, geom_mean=False): - '''Compute the values of parameter theta (controlling sparsity) - that should be expected to give each sparsity level. Return upper - and lower bounds each sparsity level k=[1, ..., n-1] neighbors/node. + # Force positivity on the solution - Z : distance matrix - geom_mean: use geometric mean instead of arithmetic mean? default: False - ''' - # Z is the zero-diagonal pairwise distance matrix between nodes + w[w0 + w = sparse.csr_matrix((w[indw], (ind[indw], np.zeros((np.sum(indw))))), (l, 1)) - n, m = Z_sorted.shape - - B_k = np.cumsum(Z_sorted, axis=1) # cummulative sum for each row - K_mat = np.tile(np.arange(1,m+1), (n, 1)) - ## Theoretical intervals of theta for each desired sparsity level: - if geom_mean: - theta_u = np.mean(1./(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted)+1e-15), axis=0) + if isvector(Z): + W = w; else: - # try geometric mean instead of arithmetic: - theta_u = np.exp(np.mean(np.log(1/(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted)+1e-15)), axis=0)) - theta_l = np.zeros(theta_u.shape) - theta_l[:-1] = theta_u[1:] - return theta_l, theta_u, Z_sorted - -def gsp_compute_graph_learning_theta(Z, k, geom_mean=False): - - # Z is the zero-diagonal pairwise distance matrix between nodes - theta_min, theta_max, _ = compute_theta_bounds(Z, geom_mean) - theta_min = theta_min[k-1]; - theta_max = theta_max[k-1]; + W = squareform_sp(w); - if k > 1: - theta = np.sqrt(theta_min * theta_max); - else: - theta = theta_min * 1.1; - return theta, theta_min, theta_max \ No newline at end of file + return W, stat From f9cc066dfb4bd01f5d9f87c684479760f25873e3 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 22 Jul 2019 10:22:01 +0200 Subject: [PATCH 073/118] add nn support --- pygsp/_nearest_neighbor.py | 245 +++++++++++++++++++++++++++ pygsp/tests/test_nearest_neighbor.py | 46 +++++ 2 files changed, 291 insertions(+) create mode 100644 pygsp/_nearest_neighbor.py create mode 100644 pygsp/tests/test_nearest_neighbor.py diff --git a/pygsp/_nearest_neighbor.py b/pygsp/_nearest_neighbor.py new file mode 100644 index 00000000..3ae6a60d --- /dev/null +++ b/pygsp/_nearest_neighbor.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- + +from __future__ import division + +import numpy as np +from scipy import sparse, spatial +from pygsp import utils + +def _scipy_pdist(features, metric, order, kind, k, radius, params): + if params: + raise ValueError('unexpected parameters {}'.format(params)) + metric = 'cityblock' if metric == 'manhattan' else metric + metric = 'chebyshev' if metric == 'max_dist' else metric + params = dict(metric=metric) + if metric == 'minkowski': + params['p'] = order + dist = spatial.distance.pdist(features, **params) + dist = spatial.distance.squareform(dist) + if kind == 'knn': + neighbors = np.argsort(dist)[:, :k+1] + distances = np.take_along_axis(dist, neighbors, axis=-1) + elif kind == 'radius': + distances = [] + neighbors = [] + for distance in dist: + neighbor = np.flatnonzero(distance < radius) + neighbors.append(neighbor) + distances.append(distance[neighbor]) + return neighbors, distances + + +def _scipy_kdtree(features, _, order, kind, k, radius, params): + if order is None: + raise ValueError('invalid metric for scipy-kdtree') + eps = params.pop('eps', 0) + tree = spatial.KDTree(features, **params) + params = dict(p=order, eps=eps) + if kind == 'knn': + params['k'] = k + 1 + elif kind == 'radius': + params['k'] = None + params['distance_upper_bound'] = radius + distances, neighbors = tree.query(features, **params) + return neighbors, distances + + +def _scipy_ckdtree(features, _, order, kind, k, radius, params): + if order is None: + raise ValueError('invalid metric for scipy-kdtree') + eps = params.pop('eps', 0) + tree = spatial.cKDTree(features, **params) + params = dict(p=order, eps=eps, n_jobs=-1) + if kind == 'knn': + params['k'] = k + 1 + elif kind == 'radius': + params['k'] = features.shape[0] # number of vertices + params['distance_upper_bound'] = radius + distances, neighbors = tree.query(features, **params) + if kind == 'knn': + return neighbors, distances + elif kind == 'radius': + dist = [] + neigh = [] + for distance, neighbor in zip(distances, neighbors): + mask = (distance != np.inf) + dist.append(distance[mask]) + neigh.append(neighbor[mask]) + return neigh, dist + + +def _flann(features, metric, order, kind, k, radius, params): + if metric == 'max_dist': + raise ValueError('flann gives wrong results for metric="max_dist".') + try: + import cyflann as cfl + except Exception as e: + raise ImportError('Cannot import cyflann. Choose another nearest ' + 'neighbors backend or try to install it with ' + 'pip (or conda) install cyflann. ' + 'Original exception: {}'.format(e)) + cfl.set_distance_type(metric, order=order) + index = cfl.FLANNIndex() + index.build_index(features, **params) + # I tried changing the algorithm and testing performance on huge matrices, + # but the default parameters seems to work best. + if kind == 'knn': + neighbors, distances = index.nn_index(features, k+1) + if metric == 'euclidean': + np.sqrt(distances, out=distances) + elif metric == 'minkowski': + np.power(distances, 1/order, out=distances) + elif kind == 'radius': + distances = [] + neighbors = [] + if metric == 'euclidean': + radius = radius**2 + elif metric == 'minkowski': + radius = radius**order + n_vertices, _ = features.shape + for vertex in range(n_vertices): + neighbor, distance = index.nn_radius(features[vertex, :], radius) + distances.append(distance) + neighbors.append(neighbor) + if metric == 'euclidean': + distances = list(map(np.sqrt, distances)) + elif metric == 'minkowski': + distances = list(map(lambda d: np.power(d, 1/order), distances)) + index.free_index() + return neighbors, distances + + +def _nmslib(features, metric, order, kind, k, _, params): + if kind == 'radius': + raise ValueError('nmslib does not support kind="radius".') + if metric == 'minkowski': + raise ValueError('nmslib does not support metric="minkowski".') + try: + import nmslib as nms + except Exception as e: + raise ImportError('Cannot import nmslib. Choose another nearest ' + 'neighbors backend or try to install it with ' + 'pip (or conda) install nmslib. ' + 'Original exception: {}'.format(e)) + n_vertices, _ = features.shape + params_index = params.pop('index', None) + params_query = params.pop('query', None) + metric = 'l2' if metric == 'euclidean' else metric + metric = 'l1' if metric == 'manhattan' else metric + metric = 'linf' if metric == 'max_dist' else metric + index = nms.init(space=metric, **params) + index.addDataPointBatch(features) + index.createIndex(params_index) + if params_query is not None: + index.setQueryTimeParams(params_query) + results = index.knnQueryBatch(features, k=k+1) + neighbors, distances = zip(*results) + distances = np.concatenate(distances).reshape(n_vertices, k+1) + neighbors = np.concatenate(neighbors).reshape(n_vertices, k+1) + return neighbors, distances + +def nn(features, metric='euclidean', order=2, kind='knn', k=10, radius=None, backend='scipy-ckdtree', **kwargs): + '''Find nearest neighboors. + + Parameters + ---------- + features : data numpy array + metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional + Metric used to compute pairwise distances. + + * ``'euclidean'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_2`. + * ``'manhattan'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_1`. + * ``'minkowski'`` generalizes the above and defines distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_p` + where :math:`p` is the ``order`` of the norm. + * ``'max_dist'`` defines pairwise distances as + :math:`d(v_i, v_j) = \| x_i - x_j \|_\infty = \max(x_i - x_j)`, where + the maximum is taken over the elements of the vector. + + More metrics may be supported for some backends. + Please refer to the documentation of the chosen backend. + kind : 'knn' or 'radius' (default 'knn') + k : number of nearest neighboors if 'knn' is selected + radius : radius of the search if 'radius' is slected + + order : float, optional + The order of the Minkowski distance for ``metric='minkowski'``. + backend : string, optional + * ``'scipy-pdist'`` uses :func:`scipy.spatial.distance.pdist` to + compute pairwise distances. The method is brute force and computes + all distances. That is the slowest method. + * ``'scipy-kdtree'`` uses :class:`scipy.spatial.KDTree`. The method + builds a k-d tree to prune the number of pairwise distances it has to + compute. That is an efficient strategy for low-dimensional spaces. + * ``'scipy-ckdtree'`` uses :class:`scipy.spatial.cKDTree`. The same as + ``'scipy-kdtree'`` but with C bindings, which should be faster. + That is the default. + * ``'flann'`` uses the `Fast Library for Approximate Nearest Neighbors + (FLANN) `_. That method is an + approximation. + * ``'nmslib'`` uses the `Non-Metric Space Library (NMSLIB) + `_. That method is an + approximation. It should be the fastest in high-dimensional spaces. + + You can look at this `benchmark + `_ to get an idea of the + relative performance of those backends. It's nonetheless wise to run + some tests on your own data. + ''' + if kind=='knn': + radius = None + elif kind=='radius': + k = None + else: + raise ValueError('"kind" must be "knn" or "radius"') + + _orders = { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf, + 'minkowski': order, + } + order = _orders.pop(metric, None) + try: + function = globals()['_' + backend.replace('-', '_')] + except KeyError: + raise ValueError('Invalid backend "{}".'.format(backend)) + neighbors, distances = function(features, metric, order, + kind, k, radius, kwargs) + return neighbors, distances + + +def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, kind = None): + '''Build a sparse distance matrix.''' + n_edges = [len(n) - 1 for n in neighbors] # remove distance to self + if safe and kind is None: + raise ValueError('Please specify "kind" to "knn" or "radius" to use the safe mode') + + if safe and kind == 'radius': + n_disconnected = np.sum(np.asarray(n_edges) == 0) + if n_disconnected > 0: + _logger.warning('{} points (out of {}) have no neighboors. ' + 'Consider increasing the radius or setting ' + 'kind=knn.'.format(n_disconnected, n_vertices)) + + value = np.empty(sum(n_edges), dtype=np.float) + row = np.empty_like(value, dtype=np.int) + col = np.empty_like(value, dtype=np.int) + start = 0 + n_vertices = len(n_edges) + for vertex in range(n_vertices): + if safe and kind == 'knn': + assert n_edges[vertex] == k + end = start + n_edges[vertex] + value[start:end] = distances[vertex][1:] + row[start:end] = np.full(n_edges[vertex], vertex) + col[start:end] = neighbors[vertex][1:] + start = end + W = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices)) + if symmetrize: + # Enforce symmetry. May have been broken by k-NN. Checking symmetry + # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. + W = utils.symmetrize(W, method='fill') + return W \ No newline at end of file diff --git a/pygsp/tests/test_nearest_neighbor.py b/pygsp/tests/test_nearest_neighbor.py new file mode 100644 index 00000000..77a6cde6 --- /dev/null +++ b/pygsp/tests/test_nearest_neighbor.py @@ -0,0 +1,46 @@ +import unittest +import numpy as np +from nn import nn + +class TestCase(unittest.TestCase): + def test_nngraph(self, n_vertices=24): + data = np.random.RandomState(42).uniform(size=(n_vertices, 3)) + metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib'] + + for metric in metrics: + for kind in ['knn', 'radius']: + for backend in backends: + params = dict(features=data, metric=metric, kind=kind, radius=0.25) + ref_nn, ref_d = nn(backend='scipy-pdist', **params) + # Unsupported combinations. + if backend == 'flann' and metric == 'max_dist': + self.assertRaises(ValueError, nn, data, + metric=metric, backend=backend) + elif backend == 'nmslib' and metric == 'minkowski': + self.assertRaises(ValueError, nn, data, + metric=metric, backend=backend) + elif backend == 'nmslib' and kind == 'radius': + self.assertRaises(ValueError, nn, data, + kind=kind, backend=backend) + else: + params['backend'] = backend + if backend == 'flann': +# params['target_precision'] = 1 + other_nn, other_d = nn(random_seed=44, **params) + else: + other_nn, other_d = nn(**params) + print(kind, backend) + for a,b in zip(ref_nn, other_nn): + np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) + + for a,b in zip(ref_d, other_d): + np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) + + def test_sparse_distance_matrix(self): + data = np.random.RandomState(42).uniform(size=(24, 3)) + + +suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 29d6fa6a52dbfe26cade3a6adca1df4a5486c04f Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 22 Jul 2019 10:44:00 +0200 Subject: [PATCH 074/118] make test work --- pygsp/_nearest_neighbor.py | 4 +-- pygsp/tests/test_all.py | 2 ++ pygsp/tests/test_nearest_neighbor.py | 53 +++++++++++++++++++++------- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/pygsp/_nearest_neighbor.py b/pygsp/_nearest_neighbor.py index 3ae6a60d..daa87c60 100644 --- a/pygsp/_nearest_neighbor.py +++ b/pygsp/_nearest_neighbor.py @@ -138,7 +138,7 @@ def _nmslib(features, metric, order, kind, k, _, params): neighbors = np.concatenate(neighbors).reshape(n_vertices, k+1) return neighbors, distances -def nn(features, metric='euclidean', order=2, kind='knn', k=10, radius=None, backend='scipy-ckdtree', **kwargs): +def nearest_neighbor(features, metric='euclidean', order=2, kind='knn', k=10, radius=None, backend='scipy-ckdtree', **kwargs): '''Find nearest neighboors. Parameters @@ -212,7 +212,7 @@ def nn(features, metric='euclidean', order=2, kind='knn', k=10, radius=None, bac def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, kind = None): - '''Build a sparse distance matrix.''' + '''Build a sparse distance matrix from nearest neighbors''' n_edges = [len(n) - 1 for n in neighbors] # remove distance to self if safe and kind is None: raise ValueError('Please specify "kind" to "knn" or "radius" to use the safe mode') diff --git a/pygsp/tests/test_all.py b/pygsp/tests/test_all.py index 2d52a185..1fc9a55d 100755 --- a/pygsp/tests/test_all.py +++ b/pygsp/tests/test_all.py @@ -14,9 +14,11 @@ from pygsp.tests import test_docstrings from pygsp.tests import test_plotting from pygsp.tests import test_learning +from pygsp.tests import test_nearest_neighbor suites = [] +suites.append(test_nearest_neighbor.suite) suites.append(test_graphs.suite) suites.append(test_filters.suite) suites.append(test_utils.suite) diff --git a/pygsp/tests/test_nearest_neighbor.py b/pygsp/tests/test_nearest_neighbor.py index 77a6cde6..fe3f636f 100644 --- a/pygsp/tests/test_nearest_neighbor.py +++ b/pygsp/tests/test_nearest_neighbor.py @@ -1,6 +1,6 @@ import unittest import numpy as np -from nn import nn +from pygsp._nearest_neighbor import nearest_neighbor, sparse_distance_matrix class TestCase(unittest.TestCase): def test_nngraph(self, n_vertices=24): @@ -12,35 +12,62 @@ def test_nngraph(self, n_vertices=24): for kind in ['knn', 'radius']: for backend in backends: params = dict(features=data, metric=metric, kind=kind, radius=0.25) - ref_nn, ref_d = nn(backend='scipy-pdist', **params) + ref_nn, ref_d = nearest_neighbor(backend='scipy-pdist', **params) # Unsupported combinations. if backend == 'flann' and metric == 'max_dist': - self.assertRaises(ValueError, nn, data, + self.assertRaises(ValueError, nearest_neighbor, data, metric=metric, backend=backend) elif backend == 'nmslib' and metric == 'minkowski': - self.assertRaises(ValueError, nn, data, + self.assertRaises(ValueError, nearest_neighbor, data, metric=metric, backend=backend) elif backend == 'nmslib' and kind == 'radius': - self.assertRaises(ValueError, nn, data, + self.assertRaises(ValueError, nearest_neighbor, data, kind=kind, backend=backend) else: params['backend'] = backend if backend == 'flann': -# params['target_precision'] = 1 - other_nn, other_d = nn(random_seed=44, **params) + other_nn, other_d = nearest_neighbor(random_seed=44, **params) else: - other_nn, other_d = nn(**params) + other_nn, other_d = nearest_neighbor(**params) print(kind, backend) for a,b in zip(ref_nn, other_nn): np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) for a,b in zip(ref_d, other_d): np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) - + def test_sparse_distance_matrix(self): - data = np.random.RandomState(42).uniform(size=(24, 3)) - - + data = np.random.RandomState(42).uniform(size=(200, 3)) + neighbors, distances = nearest_neighbor(data) + W = sparse_distance_matrix(neighbors, distances, symmetrize=True) + # Assert symetry + np.testing.assert_allclose(W.todense(), W.T.todense()) + # positivity + np.testing.assert_array_equal(W.todense()>=0, True) + # 0 diag + np.testing.assert_array_equal(np.diag(W.todense())==0, True) + + # Assert that it is not symmetric anymore + W = sparse_distance_matrix(neighbors, distances, symmetrize=False) + assert(np.sum(np.abs(W.todense()-W.T.todense()))>0.1) + # positivity + np.testing.assert_array_equal(W.todense()>=0, True) + # 0 diag + np.testing.assert_array_equal(np.diag(W.todense())==0, True) + # everything is used once + np.testing.assert_allclose(np.sum(W.todense()), np.sum(distances)) + + # simple test with a kernel + W = sparse_distance_matrix(neighbors, 1/(1+distances), symmetrize=True) + # Assert symetry + np.testing.assert_allclose(W.todense(), W.T.todense()) + # positivity + np.testing.assert_array_equal(W.todense()>=0, True) + # 0 diag + np.testing.assert_array_equal(np.diag(W.todense())==0, True) + + suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) + if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() From 5818eb01c3ab3e0cc8b69663a3c5377c858c0180 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 22 Jul 2019 11:05:02 +0200 Subject: [PATCH 075/118] backup commit --- learn-graph-test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learn-graph-test.py b/learn-graph-test.py index 85160116..1dc769be 100644 --- a/learn-graph-test.py +++ b/learn-graph-test.py @@ -1,7 +1,7 @@ import unittest import numpy as np from nn import nn, sparse_distance_matrix -from learn_graph import prox_sum_log, isvector, issymetric, squareform_sp, sum_squareform, lin_map, norm_S, learn_graph_log_degree, gsp_compute_graph_learning_theta +from learn_graph import * from scipy import sparse import pygsp as pg from pygsp.utils import distanz From 280ae3cd8b9712c4664dbb787d98f96274f9a132 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 22 Jul 2019 11:06:48 +0200 Subject: [PATCH 076/118] fix tests --- pygsp/tests/test_graphs.py | 2 +- pygsp/tests/test_nearest_neighbor.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 8f4249bd..ad7e27a8 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -358,7 +358,7 @@ def test_nngraph(self, n_vertices=24): for metric in metrics: for kind in ['knn', 'radius']: - params = dict(features=data, metric=metric, kind=kind) + params = dict(features=data, metric=metric, kind=kind, k=4) ref = Graph(backend='scipy-pdist', **params) for backend in backends: # Unsupported combinations. diff --git a/pygsp/tests/test_nearest_neighbor.py b/pygsp/tests/test_nearest_neighbor.py index fe3f636f..feaa7509 100644 --- a/pygsp/tests/test_nearest_neighbor.py +++ b/pygsp/tests/test_nearest_neighbor.py @@ -68,6 +68,3 @@ def test_sparse_distance_matrix(self): suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) - -if __name__ == '__main__': - unittest.main() From 0b1242b5d666eb3949b9bf50cec33efccf74f81a Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 22 Jul 2019 11:37:37 +0200 Subject: [PATCH 077/118] make k=4 to pass tests --- pygsp/tests/test_nearest_neighbor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/tests/test_nearest_neighbor.py b/pygsp/tests/test_nearest_neighbor.py index feaa7509..5d734153 100644 --- a/pygsp/tests/test_nearest_neighbor.py +++ b/pygsp/tests/test_nearest_neighbor.py @@ -11,7 +11,7 @@ def test_nngraph(self, n_vertices=24): for metric in metrics: for kind in ['knn', 'radius']: for backend in backends: - params = dict(features=data, metric=metric, kind=kind, radius=0.25) + params = dict(features=data, metric=metric, kind=kind, radius=0.25, k=4) ref_nn, ref_d = nearest_neighbor(backend='scipy-pdist', **params) # Unsupported combinations. if backend == 'flann' and metric == 'max_dist': From ddc290e2b76ec5d1dbb468ed1e7ba61c9edcad8f Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Thu, 25 Jul 2019 09:45:14 +0200 Subject: [PATCH 078/118] test --- pygsp/tests/test_graphs.py | 4 ++-- pygsp/tests/test_nearest_neighbor.py | 22 +++++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index ad7e27a8..ad2c473b 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -374,7 +374,7 @@ def test_nngraph(self, n_vertices=24): else: params['backend'] = backend if backend == 'flann': - graph = Graph(random_seed=40, **params) + graph = Graph(random_seed=40, target_precision=1, **params) else: graph = Graph(**params) np.testing.assert_allclose(graph.W.toarray(), @@ -561,4 +561,4 @@ def test_grid2dimgpatches(self): graphs.Grid2dImgPatches(img=self._img, patch_shape=(3, 3)) -suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) +suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) \ No newline at end of file diff --git a/pygsp/tests/test_nearest_neighbor.py b/pygsp/tests/test_nearest_neighbor.py index 5d734153..e90efc9f 100644 --- a/pygsp/tests/test_nearest_neighbor.py +++ b/pygsp/tests/test_nearest_neighbor.py @@ -3,7 +3,7 @@ from pygsp._nearest_neighbor import nearest_neighbor, sparse_distance_matrix class TestCase(unittest.TestCase): - def test_nngraph(self, n_vertices=24): + def test_nngraph(self, n_vertices=100): data = np.random.RandomState(42).uniform(size=(n_vertices, 3)) metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib'] @@ -11,7 +11,8 @@ def test_nngraph(self, n_vertices=24): for metric in metrics: for kind in ['knn', 'radius']: for backend in backends: - params = dict(features=data, metric=metric, kind=kind, radius=0.25, k=4) + params = dict(features=data, metric=metric, kind=kind, radius=0.25, k=10) + ref_nn, ref_d = nearest_neighbor(backend='scipy-pdist', **params) # Unsupported combinations. if backend == 'flann' and metric == 'max_dist': @@ -26,15 +27,22 @@ def test_nngraph(self, n_vertices=24): else: params['backend'] = backend if backend == 'flann': - other_nn, other_d = nearest_neighbor(random_seed=44, **params) + other_nn, other_d = nearest_neighbor(random_seed=44, target_precision=1, **params) else: other_nn, other_d = nearest_neighbor(**params) print(kind, backend) - for a,b in zip(ref_nn, other_nn): - np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) + if backend == 'flann': + for a,b in zip(ref_nn, other_nn): + assert(len(set(a)-set(b))<=2) - for a,b in zip(ref_d, other_d): - np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) + for a,b in zip(ref_d, other_d): + np.testing.assert_allclose(np.mean(a).astype(np.float32),np.mean(b), atol=2e-2) + else: + for a,b,c,d in zip(ref_nn, other_nn, ref_d, other_d): + e = set(a)-set(b) + assert(len(e)<=1) + if len(e)==0: + np.testing.assert_allclose(np.sort(c),np.sort(d), rtol=1e-5) def test_sparse_distance_matrix(self): data = np.random.RandomState(42).uniform(size=(200, 3)) From 9ad6bff45c803c8fb70517807cde52aa0325b2aa Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Fri, 26 Jul 2019 13:27:39 +0200 Subject: [PATCH 079/118] making test pass -- not very clean nn function --- pygsp/_nearest_neighbor.py | 6 +- pygsp/graphs/nngraphs/nngraph.py | 170 +-------------------------- pygsp/tests/test_graphs.py | 7 +- pygsp/tests/test_nearest_neighbor.py | 3 +- 4 files changed, 14 insertions(+), 172 deletions(-) diff --git a/pygsp/_nearest_neighbor.py b/pygsp/_nearest_neighbor.py index daa87c60..15dd97ce 100644 --- a/pygsp/_nearest_neighbor.py +++ b/pygsp/_nearest_neighbor.py @@ -6,6 +6,8 @@ from scipy import sparse, spatial from pygsp import utils +_logger = utils.build_logger(__name__) + def _scipy_pdist(features, metric, order, kind, k, radius, params): if params: raise ValueError('unexpected parameters {}'.format(params)) @@ -211,12 +213,13 @@ def nearest_neighbor(features, metric='euclidean', order=2, kind='knn', k=10, ra return neighbors, distances -def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, kind = None): +def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, kind = None, k=None): '''Build a sparse distance matrix from nearest neighbors''' n_edges = [len(n) - 1 for n in neighbors] # remove distance to self if safe and kind is None: raise ValueError('Please specify "kind" to "knn" or "radius" to use the safe mode') + n_vertices = len(n_edges) if safe and kind == 'radius': n_disconnected = np.sum(np.asarray(n_edges) == 0) if n_disconnected > 0: @@ -228,7 +231,6 @@ def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, ki row = np.empty_like(value, dtype=np.int) col = np.empty_like(value, dtype=np.int) start = 0 - n_vertices = len(n_edges) for vertex in range(n_vertices): if safe and kind == 'knn': assert n_edges[vertex] == k diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index 4ca8f9a2..dbb45eaa 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -9,144 +9,12 @@ from pygsp import utils from pygsp.graphs import Graph # prevent circular import in Python < 3.5 +from pygsp._nearest_neighbor import nearest_neighbor, sparse_distance_matrix _logger = utils.build_logger(__name__) -def _scipy_pdist(features, metric, order, kind, k, radius, params): - if params: - raise ValueError('unexpected parameters {}'.format(params)) - metric = 'cityblock' if metric == 'manhattan' else metric - metric = 'chebyshev' if metric == 'max_dist' else metric - params = dict(metric=metric) - if metric == 'minkowski': - params['p'] = order - dist = spatial.distance.pdist(features, **params) - dist = spatial.distance.squareform(dist) - if kind == 'knn': - neighbors = np.argsort(dist)[:, :k+1] - distances = np.take_along_axis(dist, neighbors, axis=-1) - elif kind == 'radius': - distances = [] - neighbors = [] - for distance in dist: - neighbor = np.flatnonzero(distance < radius) - neighbors.append(neighbor) - distances.append(distance[neighbor]) - return neighbors, distances - - -def _scipy_kdtree(features, _, order, kind, k, radius, params): - if order is None: - raise ValueError('invalid metric for scipy-kdtree') - eps = params.pop('eps', 0) - tree = spatial.KDTree(features, **params) - params = dict(p=order, eps=eps) - if kind == 'knn': - params['k'] = k + 1 - elif kind == 'radius': - params['k'] = None - params['distance_upper_bound'] = radius - distances, neighbors = tree.query(features, **params) - return neighbors, distances - - -def _scipy_ckdtree(features, _, order, kind, k, radius, params): - if order is None: - raise ValueError('invalid metric for scipy-kdtree') - eps = params.pop('eps', 0) - tree = spatial.cKDTree(features, **params) - params = dict(p=order, eps=eps, n_jobs=-1) - if kind == 'knn': - params['k'] = k + 1 - elif kind == 'radius': - params['k'] = features.shape[0] # number of vertices - params['distance_upper_bound'] = radius - distances, neighbors = tree.query(features, **params) - if kind == 'knn': - return neighbors, distances - elif kind == 'radius': - dist = [] - neigh = [] - for distance, neighbor in zip(distances, neighbors): - mask = (distance != np.inf) - dist.append(distance[mask]) - neigh.append(neighbor[mask]) - return neigh, dist - - -def _flann(features, metric, order, kind, k, radius, params): - if metric == 'max_dist': - raise ValueError('flann gives wrong results for metric="max_dist".') - try: - import cyflann as cfl - except Exception as e: - raise ImportError('Cannot import cyflann. Choose another nearest ' - 'neighbors backend or try to install it with ' - 'pip (or conda) install cyflann. ' - 'Original exception: {}'.format(e)) - cfl.set_distance_type(metric, order=order) - index = cfl.FLANNIndex() - index.build_index(features, **params) - # I tried changing the algorithm and testing performance on huge matrices, - # but the default parameters seems to work best. - if kind == 'knn': - neighbors, distances = index.nn_index(features, k+1) - if metric == 'euclidean': - np.sqrt(distances, out=distances) - elif metric == 'minkowski': - np.power(distances, 1/order, out=distances) - elif kind == 'radius': - distances = [] - neighbors = [] - if metric == 'euclidean': - radius = radius**2 - elif metric == 'minkowski': - radius = radius**order - n_vertices, _ = features.shape - for vertex in range(n_vertices): - neighbor, distance = index.nn_radius(features[vertex, :], radius) - distances.append(distance) - neighbors.append(neighbor) - if metric == 'euclidean': - distances = list(map(np.sqrt, distances)) - elif metric == 'minkowski': - distances = list(map(lambda d: np.power(d, 1/order), distances)) - index.free_index() - return neighbors, distances - - -def _nmslib(features, metric, order, kind, k, _, params): - if kind == 'radius': - raise ValueError('nmslib does not support kind="radius".') - if metric == 'minkowski': - raise ValueError('nmslib does not support metric="minkowski".') - try: - import nmslib as nms - except Exception as e: - raise ImportError('Cannot import nmslib. Choose another nearest ' - 'neighbors backend or try to install it with ' - 'pip (or conda) install nmslib. ' - 'Original exception: {}'.format(e)) - n_vertices, _ = features.shape - params_index = params.pop('index', None) - params_query = params.pop('query', None) - metric = 'l2' if metric == 'euclidean' else metric - metric = 'l1' if metric == 'manhattan' else metric - metric = 'linf' if metric == 'max_dist' else metric - index = nms.init(space=metric, **params) - index.addDataPointBatch(features) - index.createIndex(params_index) - if params_query is not None: - index.setQueryTimeParams(params_query) - results = index.knnQueryBatch(features, k=k+1) - neighbors, distances = zip(*results) - distances = np.concatenate(distances).reshape(n_vertices, k+1) - neighbors = np.concatenate(neighbors).reshape(n_vertices, k+1) - return neighbors, distances - - class NNGraph(Graph): r"""Nearest-neighbor graph. @@ -437,39 +305,9 @@ def __init__(self, features, standardize=False, else: raise ValueError('Invalid kind "{}".'.format(kind)) - try: - function = globals()['_' + backend.replace('-', '_')] - except KeyError: - raise ValueError('Invalid backend "{}".'.format(backend)) - neighbors, distances = function(features, metric, order, - kind, k, radius, kwargs) - - n_edges = [len(n) - 1 for n in neighbors] # remove distance to self - - if kind == 'radius': - n_disconnected = np.sum(np.asarray(n_edges) == 0) - if n_disconnected > 0: - _logger.warning('{} vertices (out of {}) are disconnected. ' - 'Consider increasing the radius or setting ' - 'kind=knn.'.format(n_disconnected, n_vertices)) - - value = np.empty(sum(n_edges), dtype=np.float) - row = np.empty_like(value, dtype=np.int) - col = np.empty_like(value, dtype=np.int) - start = 0 - for vertex in range(n_vertices): - if kind == 'knn': - assert n_edges[vertex] == k - end = start + n_edges[vertex] - value[start:end] = distances[vertex][1:] - row[start:end] = np.full(n_edges[vertex], vertex) - col[start:end] = neighbors[vertex][1:] - start = end - W = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices)) - - # Enforce symmetry. May have been broken by k-NN. Checking symmetry - # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. - W = utils.symmetrize(W, method='fill') + neighbors, distances = nearest_neighbor(features, metric=metric, order=order, + kind=kind, k=k, radius=radius, backend=backend, **kwargs) + W = sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=True, kind = kind, k=k) if kernel_width is None: if kind == 'knn': diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index ad2c473b..b3fec0e1 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -354,11 +354,12 @@ def test_nngraph(self, n_vertices=24): Graph = graphs.NNGraph data = np.random.RandomState(42).uniform(size=(n_vertices, 3)) metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib'] + # Not testing , 'flann', 'nmslib' as they are tested in test_nearest_neighbor + backends = ['scipy-kdtree', 'scipy-ckdtree'] for metric in metrics: for kind in ['knn', 'radius']: - params = dict(features=data, metric=metric, kind=kind, k=4) + params = dict(features=data, metric=metric, kind=kind, k=6) ref = Graph(backend='scipy-pdist', **params) for backend in backends: # Unsupported combinations. @@ -561,4 +562,4 @@ def test_grid2dimgpatches(self): graphs.Grid2dImgPatches(img=self._img, patch_shape=(3, 3)) -suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) \ No newline at end of file +suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) diff --git a/pygsp/tests/test_nearest_neighbor.py b/pygsp/tests/test_nearest_neighbor.py index e90efc9f..c11d38b7 100644 --- a/pygsp/tests/test_nearest_neighbor.py +++ b/pygsp/tests/test_nearest_neighbor.py @@ -11,7 +11,7 @@ def test_nngraph(self, n_vertices=100): for metric in metrics: for kind in ['knn', 'radius']: for backend in backends: - params = dict(features=data, metric=metric, kind=kind, radius=0.25, k=10) + params = dict(features=data, metric=metric, kind=kind, radius=0.25, k=8) ref_nn, ref_d = nearest_neighbor(backend='scipy-pdist', **params) # Unsupported combinations. @@ -76,3 +76,4 @@ def test_sparse_distance_matrix(self): suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) + From 17e24a731054bfa4c06e17b6329252a04ffa9046 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Fri, 16 Aug 2019 16:09:12 +0200 Subject: [PATCH 080/118] update tests --- pygsp/tests/test_graphs.py | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index b3fec0e1..f628a84d 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -362,23 +362,8 @@ def test_nngraph(self, n_vertices=24): params = dict(features=data, metric=metric, kind=kind, k=6) ref = Graph(backend='scipy-pdist', **params) for backend in backends: - # Unsupported combinations. - if backend == 'flann' and metric == 'max_dist': - self.assertRaises(ValueError, Graph, data, - metric=metric, backend=backend) - elif backend == 'nmslib' and metric == 'minkowski': - self.assertRaises(ValueError, Graph, data, - metric=metric, backend=backend) - elif backend == 'nmslib' and kind == 'radius': - self.assertRaises(ValueError, Graph, data, - kind=kind, backend=backend) - else: - params['backend'] = backend - if backend == 'flann': - graph = Graph(random_seed=40, target_precision=1, **params) - else: - graph = Graph(**params) - np.testing.assert_allclose(graph.W.toarray(), + graph = Graph(**params) + np.testing.assert_allclose(graph.W.toarray(), ref.W.toarray(), rtol=1e-5) # Distances between points on a circle. @@ -393,8 +378,6 @@ def test_nngraph(self, n_vertices=24): data = graphs.Ring(n_vertices).coords for kind in ['knn', 'radius']: for backend in backends + ['scipy-pdist']: - if backend == 'nmslib' and kind == 'radius': - continue # unsupported graph = Graph(data, kind=kind, k=2, radius=1.01*distance, kernel_width=1, backend=backend) np.testing.assert_allclose(graph.W.toarray(), adjacency) From b308bfaf164264de9cfd0f25855bce3bf635c1b8 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Sat, 17 Aug 2019 13:58:58 +0200 Subject: [PATCH 081/118] going further --- learn-graph-test.py | 4 +++- learn_graph.py => learngraph.py | 0 2 files changed, 3 insertions(+), 1 deletion(-) rename learn_graph.py => learngraph.py (100%) diff --git a/learn-graph-test.py b/learn-graph-test.py index 1dc769be..cd3c9779 100644 --- a/learn-graph-test.py +++ b/learn-graph-test.py @@ -1,10 +1,12 @@ import unittest import numpy as np from nn import nn, sparse_distance_matrix -from learn_graph import * +from pygsp._nearest_neighbor import nearest_neighbor as nn +from pygsp._nearest_neighbor import sparse_distance_matrix from scipy import sparse import pygsp as pg from pygsp.utils import distanz +from learngraph import * class TestCase(unittest.TestCase): def test_prox_sum_log(self): diff --git a/learn_graph.py b/learngraph.py similarity index 100% rename from learn_graph.py rename to learngraph.py From b25b3ba435a28fd08293ff6bf6e877ff43ff4820 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Sat, 17 Aug 2019 14:07:06 +0200 Subject: [PATCH 082/118] small fix --- pygsp/graphs/nngraphs/nngraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index dbb45eaa..3f56734e 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -330,7 +330,7 @@ def __init__(self, features, standardize=False, self.radius = radius self.kernel_width = kernel_width - super(NNGraph, self).__init__(W=W, coords=features, **params_graph) + super(NNGraph, self).__init__(W, coords=features, **params_graph) def _get_extra_repr(self): attrs = { From 8e2ff0ecea58b944e3c25060da50064da8f4c35b Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Sat, 17 Aug 2019 14:23:00 +0200 Subject: [PATCH 083/118] fix doc --- pygsp/graphs/fourier.py | 2 +- pygsp/tests/test_graphs.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pygsp/graphs/fourier.py b/pygsp/graphs/fourier.py index 0525bf69..d699ceaa 100644 --- a/pygsp/graphs/fourier.py +++ b/pygsp/graphs/fourier.py @@ -85,7 +85,7 @@ def coherence(self): >>> graph.compute_fourier_basis() >>> minimum = 1 / np.sqrt(graph.n_vertices) >>> print('{:.2f} in [{:.2f}, 1]'.format(graph.coherence, minimum)) - 0.91 in [0.12, 1] + 0.87 in [0.12, 1] >>> >>> # Plot the most localized eigenvector. >>> import matplotlib.pyplot as plt diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index e2db0ffc..22708397 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -988,3 +988,6 @@ def test_import_errors(self): suite_import_export = unittest.TestLoader().loadTestsFromTestCase(TestImportExport) suite = unittest.TestSuite([suite_graphs, suite_import_export]) + +if __name__ == '__main__': + unittest.TextTestRunner(verbosity=2).run(suite) \ No newline at end of file From 74ea306e69b6359adde511c12be994d611822cb3 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Sat, 17 Aug 2019 14:50:59 +0200 Subject: [PATCH 084/118] fix test --- pygsp/tests/test_graphs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 22708397..73cda6c7 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -592,13 +592,13 @@ def test_nngraph(self, n_vertices=30): # Only p-norms with 1<=p<=infinity permitted. if dist_type != 'minkowski': - graphs.NNGraph(Xin, NNtype='radius', dist_type=dist_type) - graphs.NNGraph(Xin, NNtype='knn', dist_type=dist_type) + graphs.NNGraph(Xin, kind='radius', dist_type=dist_type) + graphs.NNGraph(Xin, kind='knn', dist_type=dist_type) # Distance type unsupported in the C bindings, # use the C++ bindings instead. if dist_type != 'max_dist': - graphs.NNGraph(Xin, use_flann=True, NNtype='knn', + graphs.NNGraph(Xin, use_flann=True, kind='knn', dist_type=dist_type) def test_bunny(self): From cc78ff22acc7b8c0ced0c32c13c0813f150488b1 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Sat, 17 Aug 2019 14:52:17 +0200 Subject: [PATCH 085/118] fix test --- pygsp/tests/test_graphs.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 73cda6c7..dbc4dd78 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -583,24 +583,6 @@ def test_subgraph(self, n_vertices=100): self.assertIs(graph.lap_type, self._G.lap_type) self.assertEqual(graph.plotting, self._G.plotting) - def test_nngraph(self, n_vertices=30): - rs = np.random.RandomState(42) - Xin = rs.normal(size=(n_vertices, 3)) - dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - - for dist_type in dist_types: - - # Only p-norms with 1<=p<=infinity permitted. - if dist_type != 'minkowski': - graphs.NNGraph(Xin, kind='radius', dist_type=dist_type) - graphs.NNGraph(Xin, kind='knn', dist_type=dist_type) - - # Distance type unsupported in the C bindings, - # use the C++ bindings instead. - if dist_type != 'max_dist': - graphs.NNGraph(Xin, use_flann=True, kind='knn', - dist_type=dist_type) - def test_bunny(self): graphs.Bunny() From 92b6fbb4398b475a80d465463203a69ffd579316 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Sat, 17 Aug 2019 15:20:12 +0200 Subject: [PATCH 086/118] fix test_graphs --- pygsp/tests/test_graphs.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index dbc4dd78..2c09a539 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -17,12 +17,15 @@ import scipy.linalg from scipy import sparse import networkx as nx -import graph_tool as gt -import graph_tool.generation from skimage import data, img_as_float from pygsp import graphs +import graph_tool as gt +import graph_tool.generation + + + class TestCase(unittest.TestCase): @@ -970,6 +973,3 @@ def test_import_errors(self): suite_import_export = unittest.TestLoader().loadTestsFromTestCase(TestImportExport) suite = unittest.TestSuite([suite_graphs, suite_import_export]) - -if __name__ == '__main__': - unittest.TextTestRunner(verbosity=2).run(suite) \ No newline at end of file From 152bfae7f09e79ee20d80bb7d3df3643483e4cee Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 08:57:51 +0200 Subject: [PATCH 087/118] update reduction --- pygsp/reduction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/reduction.py b/pygsp/reduction.py index 50a8ae67..184d4b35 100644 --- a/pygsp/reduction.py +++ b/pygsp/reduction.py @@ -56,7 +56,7 @@ def graph_sparsify(M, epsilon, maxiter=10): Examples -------- >>> from pygsp import reduction - >>> G = graphs.Sensor(256, k=20, distributed=True) + >>> G = graphs.Sensor(256, k=20, distributed=True, seed=0) >>> epsilon = 0.4 >>> G2 = reduction.graph_sparsify(G, epsilon) From 8a0f86981c1c0bfba841c04d97884ac0f65699c2 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 11:11:31 +0200 Subject: [PATCH 088/118] move files --- pygsp/graphs/__init__.py | 8 +++ learngraph.py => pygsp/graphs/learngraph.py | 52 +++++++++++++++++++ pygsp/tests/test_all.py | 2 + .../tests/test_learn_graph.py | 2 +- 4 files changed, 63 insertions(+), 1 deletion(-) rename learngraph.py => pygsp/graphs/learngraph.py (90%) rename learn-graph-test.py => pygsp/tests/test_learn_graph.py (99%) diff --git a/pygsp/graphs/__init__.py b/pygsp/graphs/__init__.py index 23f9ccfa..22a48a33 100644 --- a/pygsp/graphs/__init__.py +++ b/pygsp/graphs/__init__.py @@ -182,6 +182,13 @@ Sensor Sphere TwoMoons + +Learning graph from data +------------------------ + +.. autosummary:: + + LearnedGraph """ @@ -197,6 +204,7 @@ 'ErdosRenyi', 'FullConnected', 'Grid2d', + 'LearnGraph', 'LineGraph', 'Logo', 'LowStretchTree', diff --git a/learngraph.py b/pygsp/graphs/learngraph.py similarity index 90% rename from learngraph.py rename to pygsp/graphs/learngraph.py index 1e4878ee..ddfce9d2 100644 --- a/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -2,6 +2,11 @@ from scipy import sparse from scipy.spatial.distance import squareform from time import time +from pygsp import utils + +from . import Graph # prevent circular import in Python < 3.5 + +logger = utils.build_logger(__name__) def isvector(x): '''Test if x is a vector''' @@ -400,3 +405,50 @@ def learn_graph_log_degree(Z, W = squareform_sp(w); return W, stat + + +class LearnGraph(Graph): + r"""Learned graph. + + Parameters + ---------- + X : int + Data + + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> fig, axes = plt.subplots(2, 2, figsize=(10, 8)) + >>> for i, directed in enumerate([False, True]): + ... G = graphs.Path(N=10, directed=directed) + ... _ = axes[i, 0].spy(G.W) + ... _ = G.plot(ax=axes[i, 1]) + + References + ---------- + See :cite:`strang1999discrete` for more informations. + + """ + + def __init__(self, N=16, directed=False, **kwargs): + + self.directed = directed + if directed: + sources = np.arange(0, N-1) + targets = np.arange(1, N) + n_edges = N - 1 + else: + sources = np.concatenate((np.arange(0, N-1), np.arange(1, N))) + targets = np.concatenate((np.arange(1, N), np.arange(0, N-1))) + n_edges = 2 * (N - 1) + weights = np.ones(n_edges) + W = sparse.csr_matrix((weights, (sources, targets)), shape=(N, N)) + plotting = {"limits": np.array([-1, N, -1, 1])} + + super(Path, self).__init__(W=W, plotting=plotting, **kwargs) + + self.set_coordinates('line2D') + + def _get_extra_repr(self): + return dict(directed=self.directed) \ No newline at end of file diff --git a/pygsp/tests/test_all.py b/pygsp/tests/test_all.py index 1fc9a55d..7b8aaef8 100755 --- a/pygsp/tests/test_all.py +++ b/pygsp/tests/test_all.py @@ -15,9 +15,11 @@ from pygsp.tests import test_plotting from pygsp.tests import test_learning from pygsp.tests import test_nearest_neighbor +from pygsp.tests import test_learn_graph suites = [] +suites.append(test_learn_graph.suite) suites.append(test_nearest_neighbor.suite) suites.append(test_graphs.suite) suites.append(test_filters.suite) diff --git a/learn-graph-test.py b/pygsp/tests/test_learn_graph.py similarity index 99% rename from learn-graph-test.py rename to pygsp/tests/test_learn_graph.py index cd3c9779..449dc424 100644 --- a/learn-graph-test.py +++ b/pygsp/tests/test_learn_graph.py @@ -6,7 +6,7 @@ from scipy import sparse import pygsp as pg from pygsp.utils import distanz -from learngraph import * +from pygsp.graphs.learngraph import * class TestCase(unittest.TestCase): def test_prox_sum_log(self): From 594e048c9e1068e64a546099043590c82f66828b Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 11:56:27 +0200 Subject: [PATCH 089/118] first version --- pygsp/graphs/learngraph.py | 44 +++++++++++++++++++++----------------- pygsp/utils.py | 2 +- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learngraph.py index ddfce9d2..c60282b9 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -3,7 +3,8 @@ from scipy.spatial.distance import squareform from time import time from pygsp import utils - +from pygsp.utils import distanz +from pygsp._nearest_neighbor import nearest_neighbor, sparse_distance_matrix from . import Graph # prevent circular import in Python < 3.5 logger = utils.build_logger(__name__) @@ -407,13 +408,13 @@ def learn_graph_log_degree(Z, return W, stat + class LearnGraph(Graph): r"""Learned graph. Parameters ---------- - X : int - Data + X : Data [Nnodes x Nsignals] Examples @@ -431,24 +432,27 @@ class LearnGraph(Graph): """ - def __init__(self, N=16, directed=False, **kwargs): - - self.directed = directed - if directed: - sources = np.arange(0, N-1) - targets = np.arange(1, N) - n_edges = N - 1 + def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, plotting={}, lap_type='combinatorial', param_nn={}, param_opt={}): + + if sparse: + if kk is None: + kk = 3*k + neighbors, distances = nearest_neighbor(X, k=kk, **param_nn) + Z = sparse_distance_matrix(neighbors, distances) + edge_mask = Z>0 + Zp = distances[:,1:] else: - sources = np.concatenate((np.arange(0, N-1), np.arange(1, N))) - targets = np.concatenate((np.arange(1, N), np.arange(0, N-1))) - n_edges = 2 * (N - 1) - weights = np.ones(n_edges) - W = sparse.csr_matrix((weights, (sources, targets)), shape=(N, N)) - plotting = {"limits": np.array([-1, N, -1, 1])} - - super(Path, self).__init__(W=W, plotting=plotting, **kwargs) - - self.set_coordinates('line2D') + Z = distanz(X.transpose()) + Zp = Z + edge_mask = None + if alpha is None and beta is None: + theta, theta_min, theta_max = gsp_compute_graph_learning_theta(Zp, k) + W, stat = learn_graph_log_degree(Z*theta, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt) + else: + W, stat = learn_graph_log_degree(Z, a=a, b=b, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt) + super(Graph).__init__(W, lap_type=lap_type, plotting=plotting) + + self._stat = stat def _get_extra_repr(self): return dict(directed=self.directed) \ No newline at end of file diff --git a/pygsp/utils.py b/pygsp/utils.py index f9e20086..2931221d 100644 --- a/pygsp/utils.py +++ b/pygsp/utils.py @@ -124,7 +124,7 @@ def distanz(x, y=None): else: diag_zero = False if len(y.shape)<2: - y = np.expand_dims(xy, axis=0) + y = np.expand_dims(y, axis=0) rx, cx = x.shape ry, cy = y.shape From b6f9da63603c57963ecaa77713e3e7db8dc8ea52 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 12:45:04 +0200 Subject: [PATCH 090/118] update graph --- pygsp/graphs/learngraph.py | 43 ++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learngraph.py index c60282b9..8ee81b51 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -409,7 +409,7 @@ def learn_graph_log_degree(Z, -class LearnGraph(Graph): +class LearnedGraph(Graph): r"""Learned graph. Parameters @@ -419,12 +419,32 @@ class LearnGraph(Graph): Examples -------- - >>> import matplotlib.pyplot as plt - >>> fig, axes = plt.subplots(2, 2, figsize=(10, 8)) - >>> for i, directed in enumerate([False, True]): - ... G = graphs.Path(N=10, directed=directed) - ... _ = axes[i, 0].spy(G.W) - ... _ = G.plot(ax=axes[i, 1]) + >>> # A) Create a bunch of smooth signals + >>> n = 100 # Number of nodes + >>> d = 50 # Number of signals + >>> k = 6 # Average number of neighboors + >>> # A1) Create a graph + >>> coords = np.random.RandomState(0).uniform(size=(n, 2)) + >>> G = pg.graphs.NNGraph(coords,k=10, kernel='gaussian') + >>> + >>> # A2) Create a lowpass filter + >>> G.estimate_lmax() + >>> g = pg.filters.Filter(G,lambda x:1/(1+5*x)) + >>> + >>> # A3) Create signals by filtering random noise + >>> S = np.random.randn(n,d) + >>> X = np.squeeze(g.filter(S)) + >>> + >>> # B) Learn the graph + >>> Glearned = LearnedGraph(X,k=k) + >>> # plot the learned graph + >>> Glearned.coords = coords + >>> + >>> # C) Plot the graph + >>> fig, (ax1,ax2) = plt.subplots(1,2) + >>> G.plot_signal(X[:,1], ax=ax1,title='Signal generating graph') + >>> Glearned.plot_signal(X[:,1], ax=ax2,title='Learned graph') + >>> # The graph is expected to be less connected as explained in the paper References ---------- @@ -445,14 +465,11 @@ def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, Z = distanz(X.transpose()) Zp = Z edge_mask = None - if alpha is None and beta is None: + if a is None and b is None: theta, theta_min, theta_max = gsp_compute_graph_learning_theta(Zp, k) W, stat = learn_graph_log_degree(Z*theta, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt) else: W, stat = learn_graph_log_degree(Z, a=a, b=b, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt) - super(Graph).__init__(W, lap_type=lap_type, plotting=plotting) + super().__init__(W, lap_type=lap_type, plotting=plotting) - self._stat = stat - - def _get_extra_repr(self): - return dict(directed=self.directed) \ No newline at end of file + self._stat = stat \ No newline at end of file From 859608141682af64001c385fb65293e47e0bb5ad Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 13:19:25 +0200 Subject: [PATCH 091/118] updates --- doc/references.bib | 17 +++++++++++++++++ pygsp/graphs/learngraph.py | 13 +++++++++++-- pygsp/tests/test_graphs.py | 30 ++++++++++++++++++++++++++++++ pygsp/tests/test_plotting.py | 1 + 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/doc/references.bib b/doc/references.bib index de0ade43..70603173 100644 --- a/doc/references.bib +++ b/doc/references.bib @@ -220,3 +220,20 @@ @article{grassi2018timevertex year={2018}, journal={IEEE Transactions on Signal Processing}, } + +@inproceedings{ + kalofolias2018large, + title={Large Scale Graph Learning From Smooth Signals}, + author={Vassilis Kalofolias and Nathanaël Perraudin}, + booktitle={International Conference on Learning Representations}, + year={2019}, + url={https://openreview.net/forum?id=ryGkSo0qYm}, + } + +@inproceedings{kalofolias2016learn, + title={How to learn a graph from smooth signals}, + author={Kalofolias, Vassilis}, + booktitle={Artificial Intelligence and Statistics}, + pages={920--929}, + year={2016} +} diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learngraph.py index 8ee81b51..56259561 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -260,6 +260,15 @@ def learn_graph_log_degree(Z, edge_mask = None, w_0 = 0, rel_edge=1e-5): + r"""Learn a graph from distances + + This function solves the convex optimization problem: + + Parameters + ---------- + Z : Distance matrices [Nnodes x Nnodes]. It can be sparse + + """ if isvector(Z): z = Z; @@ -409,7 +418,7 @@ def learn_graph_log_degree(Z, -class LearnedGraph(Graph): +class LearnGraph(Graph): r"""Learned graph. Parameters @@ -448,7 +457,7 @@ class LearnedGraph(Graph): References ---------- - See :cite:`strang1999discrete` for more informations. + See :cite:`kalofolias2018large` and :cite:`kalofolias2016learn` for more informations. """ diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 2c09a539..84d62289 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -711,6 +711,36 @@ def test_grid2d_diagonals(self): self.assertEqual(G.W[9, 16], 1.0) self.assertEqual(G.W[20, 27], 1.0) + def test_learn_graph(self): + from pygsp.graphs import LearnGraph + from pygsp.utils import distanz + from pygsp.graphs.learngraph import gsp_compute_graph_learning_theta + + # Create a bunch of signals + n=100 + d = 400 + k = 6 + coords = np.random.RandomState(0).uniform(size=(n, 2)) + G = pg.graphs.NNGraph(coords,k=10, kernel='gaussian') + G.compute_fourier_basis() + # g = pg.filters.Heat(G, scale=5) + g = pg.filters.Filter(G,lambda x:1/(1+5*x)) + S = np.random.randn(n,d) + X = np.squeeze(g.filter(S)) + + # Test if large scale and small scale are close + G_learned1 = LearnGraph(X, k=k, sparse=False) + G_learned2 = LearnGraph(X,k=k, sparse=True) + assert(np.sum(np.abs(G_learned2.W.todense()-G_learned1.W.todense()))/np.sum(np.abs(G_learned1.W.todense()))<1e-4) + + # Test automatic parameters tuning + Z = distanz(X.transpose()) + theta, theta_min, theta_max = gsp_compute_graph_learning_theta(Z, k) + a = 1/theta + b = a + G_learned3 = LearnGraph(X,a=a,b=b, sparse=False) + assert(np.sum(np.abs(G_learned3.W.todense()-G_learned1.W.todense()))/np.sum(np.abs(G_learned1.W.todense()))<1e-3) + suite_graphs = unittest.TestLoader().loadTestsFromTestCase(TestCase) diff --git a/pygsp/tests/test_plotting.py b/pygsp/tests/test_plotting.py index 3694b4f7..1c5bd464 100644 --- a/pygsp/tests/test_plotting.py +++ b/pygsp/tests/test_plotting.py @@ -39,6 +39,7 @@ def test_plot_graphs(self): 'FullConnected', 'RandomRegular', 'StochasticBlockModel', + 'LearnGraph', } # Coordinates are not in 2D or 3D. From ec3d535123a45ee0b5e39bc030bce07b9da5462c Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 13:42:37 +0200 Subject: [PATCH 092/118] fix bugs --- pygsp/graphs/learngraph.py | 45 ++++++++++++++++++++++++++++++++++---- pygsp/tests/test_graphs.py | 6 ++--- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learngraph.py index 56259561..7b401597 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -262,12 +262,44 @@ def learn_graph_log_degree(Z, rel_edge=1e-5): r"""Learn a graph from distances - This function solves the convex optimization problem: + This function computes a weighted + adjacency matrix $W$ from squared pairwise distances in $Z$, using the + smoothness assumption that $\text{trace}(X^TLX)$ is small, where $X$ is + the data (columns) changing smoothly from node to node on the graph and + $L = D-W$ is the combinatorial graph Laplacian. See :cite:`kalofolias2018large` + and :cite:`kalofolias2016learn` for the theory behind the algorithm. + + Alternatively, Z can contain other types of distances and use the + smoothness assumption that sum(sum(W * Z)) is small. + + The minimization problem solved is + + minimize_W sum(sum(W .* Z)) - a * sum(log(sum(W))) + b * ||W||_F^2/2 + c * ||W-W_0||_F^2/2 + The algorithm used is forward-backward-forward (FBF) based primal dual + optimization. + Parameters ---------- - Z : Distance matrices [Nnodes x Nnodes]. It can be sparse - + Z : Distance matrices [Nnodes x Nnodes]. It can be sparse + a : Weight of the connectivity term (prevents all nodes to be disconnected) + b : Weight of the sparsity term + c : Weight of the L2 prior (Default 0) + verbosity : How much should display the algorithm - 0: nothing, + 1: summary at convergence, 2: each steps (Default 1) + maxit : maximum number of iterations (Default: 1000) + tol : tolerance to stop iterating + step_size : Step size from the interval (0,1). Default: 0.5 + max_w : Maximum weight allowed for each edge (or inf) + edge_mask : Mask indicating the non zero edges if "fix_zeros" + fix_zeros : Fix a set of edges to zero (True/False) + w_0 : Vector for adding prior c/2*||w - w_0||^2 + rel_edge : Remove all edges bellow this tolerance after convergence of the algorithm + + References + ---------- + See :cite:`kalofolias2018large` and :cite:`kalofolias2016learn` for more informations. + """ if isvector(Z): @@ -420,6 +452,8 @@ def learn_graph_log_degree(Z, class LearnGraph(Graph): r"""Learned graph. + + Return a graph learned with with the function Parameters ---------- @@ -428,6 +462,9 @@ class LearnGraph(Graph): Examples -------- + >>> import numpy as np + >>> import pygsp as pg + >>> from matplotlib import pyplot as plt >>> # A) Create a bunch of smooth signals >>> n = 100 # Number of nodes >>> d = 50 # Number of signals @@ -445,7 +482,7 @@ class LearnGraph(Graph): >>> X = np.squeeze(g.filter(S)) >>> >>> # B) Learn the graph - >>> Glearned = LearnedGraph(X,k=k) + >>> Glearned = LearnGraph(X,k=k) >>> # plot the learned graph >>> Glearned.coords = coords >>> diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 84d62289..f89f421e 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -20,6 +20,7 @@ from skimage import data, img_as_float from pygsp import graphs +from pygsp import filters import graph_tool as gt import graph_tool.generation @@ -721,10 +722,9 @@ def test_learn_graph(self): d = 400 k = 6 coords = np.random.RandomState(0).uniform(size=(n, 2)) - G = pg.graphs.NNGraph(coords,k=10, kernel='gaussian') + G = graphs.NNGraph(coords,k=10, kernel='gaussian') G.compute_fourier_basis() - # g = pg.filters.Heat(G, scale=5) - g = pg.filters.Filter(G,lambda x:1/(1+5*x)) + g = filters.Filter(G,lambda x:1/(1+5*x)) S = np.random.randn(n,d) X = np.squeeze(g.filter(S)) From 33e5b8edbb1b39e601b8dfa1223cbaa2e226e103 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 14:01:15 +0200 Subject: [PATCH 093/118] update doc --- pygsp/graphs/learngraph.py | 44 ++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learngraph.py index 7b401597..bd89a674 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -255,7 +255,6 @@ def learn_graph_log_degree(Z, maxit = 1000, tol = 1e-5, step_size = .5, - fix_zeros = False, max_w = np.inf, edge_mask = None, w_0 = 0, @@ -291,8 +290,7 @@ def learn_graph_log_degree(Z, tol : tolerance to stop iterating step_size : Step size from the interval (0,1). Default: 0.5 max_w : Maximum weight allowed for each edge (or inf) - edge_mask : Mask indicating the non zero edges if "fix_zeros" - fix_zeros : Fix a set of edges to zero (True/False) + edge_mask : Mask indicating the non zero edges (for the scaling version) w_0 : Vector for adding prior c/2*||w - w_0||^2 rel_edge : Remove all edges bellow this tolerance after convergence of the algorithm @@ -453,13 +451,41 @@ def learn_graph_log_degree(Z, class LearnGraph(Graph): r"""Learned graph. - Return a graph learned with with the function + Return a graph learned with with the function by :func:`learn_graph_log_degree`. + + This function computes the pairwise distances $Z$ between the vectos in $X$. Then + the adjacency matrix $W$ is estiamged from $Z$, using the + smoothness assumption that $\text{trace}(X^TLX)$ is small, where $X$ is + the data (columns) changing smoothly from node to node on the graph and + $L = D-W$ is the combinatorial graph Laplacian. See :cite:`kalofolias2018large` + and :cite:`kalofolias2016learn` for the theory behind the algorithm. + + Alternatively, Z can contain other types of distances and use the + smoothness assumption that sum(sum(W * Z)) is small. + + The minimization problem solved is + + minimize_W sum(sum(W .* Z)) - a * sum(log(sum(W))) + b * ||W||_F^2/2 + c * ||W-W_0||_F^2/2 + + In order to scale, this function can automatically: + 1. Compute the optimal value of a,b + 2. Use a resticted support to reduce the computational and the memory cost + By default, these option are enabled and only the average number of neighboors k should be set. + + Alternatively, the value of a and be can be set. Parameters ---------- X : Data [Nnodes x Nsignals] - - + a : Weight of the connectivity term (leave this to None for automatic setup) + b : Weight of the sparsity term (leave this to None for automatic setup) + k : desired average number of neigboors + kk: desired number of neiboors for the sparse support (Default 3k) + sparse: Use a sparse support. Set this to True to scale. (Default True) + rel_edge : Remove all edges bellow this tolerance after convergence of the algorithm + param_nn : Parameters for the nearest neighboor search (dictionary). See :func:`nearest_neighbor`. + param_opt: Parameters for the optimization algorithm. See :func:`learn_graph_log_degree`. + Examples -------- >>> import numpy as np @@ -482,7 +508,7 @@ class LearnGraph(Graph): >>> X = np.squeeze(g.filter(S)) >>> >>> # B) Learn the graph - >>> Glearned = LearnGraph(X,k=k) + >>> Glearned = pg.graphs.LearnGraph(X,k=k) >>> # plot the learned graph >>> Glearned.coords = coords >>> @@ -498,7 +524,7 @@ class LearnGraph(Graph): """ - def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, plotting={}, lap_type='combinatorial', param_nn={}, param_opt={}): + def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, param_nn={}, param_opt={} **kwargs): if sparse: if kk is None: @@ -516,6 +542,6 @@ def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, W, stat = learn_graph_log_degree(Z*theta, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt) else: W, stat = learn_graph_log_degree(Z, a=a, b=b, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt) - super().__init__(W, lap_type=lap_type, plotting=plotting) + super().__init__(W, **kwargs) self._stat = stat \ No newline at end of file From 50157312390cb3a8dc1a0a6c6ef7e75052a32087 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 14:10:18 +0200 Subject: [PATCH 094/118] update history --- doc/history.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/history.rst b/doc/history.rst index 77be17cc..f67ea4f0 100644 --- a/doc/history.rst +++ b/doc/history.rst @@ -33,7 +33,8 @@ History * We now have a gallery of examples! That is convenient for users to get a taste of what the library can do, and to start working from a code snippet. * Merged all the extra requirements in a single dev requirement. - +* A function to learn the graph from the signal has been added with doc and tests. + This function can be used simply using the graph ``LearnGraph`` Experimental filter API (to be tested and validated): From 0d0aff7fb2fc9df29dab48b30c70adaff27dd6db Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 14:12:52 +0200 Subject: [PATCH 095/118] small fix --- pygsp/graphs/learngraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learngraph.py index bd89a674..2fb15d6c 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -524,7 +524,7 @@ class LearnGraph(Graph): """ - def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, param_nn={}, param_opt={} **kwargs): + def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, param_nn={}, param_opt={}, **kwargs): if sparse: if kk is None: From e37171ac4e4f7eb9dfae046abc7c27550e1fa09a Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 14:31:02 +0200 Subject: [PATCH 096/118] update doc --- pygsp/graphs/learngraph.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learngraph.py index 2fb15d6c..87ea8aec 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -508,15 +508,16 @@ class LearnGraph(Graph): >>> X = np.squeeze(g.filter(S)) >>> >>> # B) Learn the graph - >>> Glearned = pg.graphs.LearnGraph(X,k=k) + >>> param_opt = {'verbosity':0} + >>> Glearned = LearnGraph(X,k=k, param_opt=param_opt) >>> # plot the learned graph >>> Glearned.coords = coords >>> - >>> # C) Plot the graph + >>> # C) Plot the graph and one signal >>> fig, (ax1,ax2) = plt.subplots(1,2) - >>> G.plot_signal(X[:,1], ax=ax1,title='Signal generating graph') - >>> Glearned.plot_signal(X[:,1], ax=ax2,title='Learned graph') - >>> # The graph is expected to be less connected as explained in the paper + >>> _ = G.plot_signal(X[:,1], ax=ax1,title='Signal generating graph') + >>> _ = Glearned.plot_signal(X[:,1], ax=ax2,title='Learned graph') + >>> # The two graphs are not expected to be the same! References ---------- From b3c8efed46eca8d777090d9531c81473728d61fe Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 15:11:17 +0200 Subject: [PATCH 097/118] add code to test each module separatly --- pygsp/tests/test_docstrings.py | 3 +++ pygsp/tests/test_filters.py | 3 +++ pygsp/tests/test_graphs.py | 3 +++ pygsp/tests/test_nearest_neighbor.py | 2 ++ pygsp/tests/test_plotting.py | 3 +++ pygsp/tests/test_utils.py | 3 +++ 6 files changed, 17 insertions(+) diff --git a/pygsp/tests/test_docstrings.py b/pygsp/tests/test_docstrings.py index d8f7ff45..b6d9068e 100644 --- a/pygsp/tests/test_docstrings.py +++ b/pygsp/tests/test_docstrings.py @@ -41,3 +41,6 @@ def setup(doctest): suite_tutorials = test_docstrings('.', '.rst') suite = unittest.TestSuite([suite_reference, suite_tutorials]) + +if __name__ == '__main__': + unittest.main() diff --git a/pygsp/tests/test_filters.py b/pygsp/tests/test_filters.py index cef1689d..8a63f2c8 100644 --- a/pygsp/tests/test_filters.py +++ b/pygsp/tests/test_filters.py @@ -350,3 +350,6 @@ def test_approximations(self): suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) + +if __name__ == '__main__': + unittest.main() diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index f89f421e..78576055 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -1003,3 +1003,6 @@ def test_import_errors(self): suite_import_export = unittest.TestLoader().loadTestsFromTestCase(TestImportExport) suite = unittest.TestSuite([suite_graphs, suite_import_export]) + +if __name__ == '__main__': + unittest.main() diff --git a/pygsp/tests/test_nearest_neighbor.py b/pygsp/tests/test_nearest_neighbor.py index c11d38b7..d15fdc29 100644 --- a/pygsp/tests/test_nearest_neighbor.py +++ b/pygsp/tests/test_nearest_neighbor.py @@ -77,3 +77,5 @@ def test_sparse_distance_matrix(self): suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) +if __name__ == '__main__': + unittest.main() diff --git a/pygsp/tests/test_plotting.py b/pygsp/tests/test_plotting.py index 1c5bd464..d3b580d4 100644 --- a/pygsp/tests/test_plotting.py +++ b/pygsp/tests/test_plotting.py @@ -171,3 +171,6 @@ def test_unknown_backend(self): suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/pygsp/tests/test_utils.py b/pygsp/tests/test_utils.py index af23f4b8..e5247113 100644 --- a/pygsp/tests/test_utils.py +++ b/pygsp/tests/test_utils.py @@ -34,3 +34,6 @@ def test_symmetrize(self): suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) + +if __name__ == '__main__': + unittest.main() From 3ed33f96382d3c811eab07789834095160c04ecd Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 15:18:02 +0200 Subject: [PATCH 098/118] fix doc --- pygsp/graphs/learngraph.py | 2 +- pygsp/tests/test_learning.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learngraph.py index 87ea8aec..246e020b 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -509,7 +509,7 @@ class LearnGraph(Graph): >>> >>> # B) Learn the graph >>> param_opt = {'verbosity':0} - >>> Glearned = LearnGraph(X,k=k, param_opt=param_opt) + >>> Glearned = pg.graphs.LearnGraph(X,k=k, param_opt=param_opt) >>> # plot the learned graph >>> Glearned.coords = coords >>> diff --git a/pygsp/tests/test_learning.py b/pygsp/tests/test_learning.py index b960d8a9..1d838e96 100644 --- a/pygsp/tests/test_learning.py +++ b/pygsp/tests/test_learning.py @@ -132,3 +132,6 @@ def test_classification_tikhonov(self): suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From af9845eac5ad6cfd81e9b3068f4f6439c4bf643a Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 17:29:35 +0200 Subject: [PATCH 099/118] fix some doc --- pygsp/graphs/learngraph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learngraph.py index 246e020b..778cfb4f 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -468,8 +468,8 @@ class LearnGraph(Graph): minimize_W sum(sum(W .* Z)) - a * sum(log(sum(W))) + b * ||W||_F^2/2 + c * ||W-W_0||_F^2/2 In order to scale, this function can automatically: - 1. Compute the optimal value of a,b - 2. Use a resticted support to reduce the computational and the memory cost + 1. compute the optimal value of a,b and, + 2. use a resticted support to reduce the computational and the memory cost. By default, these option are enabled and only the average number of neighboors k should be set. Alternatively, the value of a and be can be set. From 57269fcab1e8529084e7ff6049a95c85284ed2e4 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 19:39:30 +0200 Subject: [PATCH 100/118] fix tests --- pygsp/tests/test_learn_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/tests/test_learn_graph.py b/pygsp/tests/test_learn_graph.py index 449dc424..fd1caa6b 100644 --- a/pygsp/tests/test_learn_graph.py +++ b/pygsp/tests/test_learn_graph.py @@ -106,7 +106,7 @@ def test_learn_graph_log(self): np.testing.assert_allclose(theta_max, theta_max2) W = sparse_distance_matrix(neighbors, distances) learned_W2, _ = learn_graph_log_degree(W*theta2, edge_mask=W>0, verbosity=0) - assert(np.sum(np.abs(learned_W2.todense()-learned_W))/np.sum(np.abs(learned_W))<1e-4) + assert(np.sum(np.abs(learned_W2.todense()-learned_W))/np.sum(np.abs(learned_W))<1e-3) suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) if __name__ == '__main__': From 9e2b38255ad8b2fbb9ba5eb1d3503346fa1f7155 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 19:42:51 +0200 Subject: [PATCH 101/118] fix compat python 2.7 --- pygsp/graphs/learngraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learngraph.py index 778cfb4f..9401a15b 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -543,6 +543,6 @@ def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, W, stat = learn_graph_log_degree(Z*theta, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt) else: W, stat = learn_graph_log_degree(Z, a=a, b=b, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt) - super().__init__(W, **kwargs) + super(LearnGraph, self).__init__(W, **kwargs) self._stat = stat \ No newline at end of file From 5af2527a3b6d3b96de836e44324946c138d25cb4 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 22:00:36 +0200 Subject: [PATCH 102/118] fix compat python 2.7 --- pygsp/tests/test_learn_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygsp/tests/test_learn_graph.py b/pygsp/tests/test_learn_graph.py index fd1caa6b..6c6243f0 100644 --- a/pygsp/tests/test_learn_graph.py +++ b/pygsp/tests/test_learn_graph.py @@ -50,7 +50,7 @@ def test_sum_squareform(self): res1 = np.sum(A, axis=1) a = squareform_sp(A) S,St = sum_squareform(nt) - res2 = S @ a + res2 = S.dot(a) np.testing.assert_allclose(res1,res2) np.testing.assert_array_equal(S.transpose().todense(), St.todense()) @@ -65,7 +65,7 @@ def test_sum_squareform_sparse(self): w = squareform_sp(W) S,St = sum_squareform(nt, w) wfull = w.data - res2 = S @ wfull + res2 = S.dot(wfull) np.testing.assert_allclose(res1,res2) np.testing.assert_array_equal(S.transpose().todense(), St.todense()) From 0b14b4a2f77910a8e3a6f74f33ee906346a96864 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 22:20:29 +0200 Subject: [PATCH 103/118] fix compat python 2.7 --- pygsp/tests/test_learn_graph.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pygsp/tests/test_learn_graph.py b/pygsp/tests/test_learn_graph.py index 6c6243f0..81a4c056 100644 --- a/pygsp/tests/test_learn_graph.py +++ b/pygsp/tests/test_learn_graph.py @@ -1,3 +1,5 @@ +from __future__ import division + import unittest import numpy as np from nn import nn, sparse_distance_matrix From 3e7daa2591890d283b93191dfbb6818bcfd29dda Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Mon, 19 Aug 2019 22:41:40 +0200 Subject: [PATCH 104/118] fix random test --- pygsp/tests/test_learn_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygsp/tests/test_learn_graph.py b/pygsp/tests/test_learn_graph.py index 81a4c056..9c90886d 100644 --- a/pygsp/tests/test_learn_graph.py +++ b/pygsp/tests/test_learn_graph.py @@ -87,7 +87,7 @@ def test_learn_graph_log(self): # Create a bunch of signals n=100 d = 400 - G = pg.graphs.Sensor(N=n,k=6) + G = pg.graphs.Sensor(N=n,k=6, seed=0) G.compute_fourier_basis() # g = pg.filters.Heat(G, scale=5) g = pg.filters.Filter(G,lambda x:1/(1+5*x)) @@ -108,7 +108,7 @@ def test_learn_graph_log(self): np.testing.assert_allclose(theta_max, theta_max2) W = sparse_distance_matrix(neighbors, distances) learned_W2, _ = learn_graph_log_degree(W*theta2, edge_mask=W>0, verbosity=0) - assert(np.sum(np.abs(learned_W2.todense()-learned_W))/np.sum(np.abs(learned_W))<1e-3) + assert(np.sum(np.abs(learned_W2.todense()-learned_W))/np.sum(np.abs(learned_W))<1e-4) suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) if __name__ == '__main__': From c0ee54c43aff0c80253f3ca14663031bcb494146 Mon Sep 17 00:00:00 2001 From: nperraud <6399466+nperraud@users.noreply.github.com> Date: Thu, 26 Sep 2019 18:01:35 +0200 Subject: [PATCH 105/118] Update learngraph.py add epsilon for stability --- pygsp/graphs/learngraph.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learngraph.py index 9401a15b..85ddfdd6 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learngraph.py @@ -224,10 +224,10 @@ def compute_theta_bounds(Z, geom_mean=False, islist=None): K_mat = np.tile(np.arange(1,m+1), (n, 1)) ## Theoretical intervals of theta for each desired sparsity level: if geom_mean: - theta_u = np.mean(1./(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted)+1e-15), axis=0) + theta_u = np.mean(1./(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted+1e-7)+1e-15), axis=0) else: # try geometric mean instead of arithmetic: - theta_u = np.exp(np.mean(np.log(1/(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted)+1e-15)), axis=0)) + theta_u = np.exp(np.mean(np.log(1/(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted+1e-7)+1e-15)), axis=0)) theta_l = np.zeros(theta_u.shape) theta_l[:-1] = theta_u[1:] return theta_l, theta_u, Z_sorted @@ -545,4 +545,4 @@ def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, W, stat = learn_graph_log_degree(Z, a=a, b=b, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt) super(LearnGraph, self).__init__(W, **kwargs) - self._stat = stat \ No newline at end of file + self._stat = stat From 28c85fb255e485006a69b414fbed33681398eefc Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Sun, 10 Nov 2019 09:38:32 +0100 Subject: [PATCH 106/118] update test and remove useless files --- nn-test.py | 46 ----- nn.py | 245 --------------------------- pygsp/tests/test_docstrings.py | 2 - pygsp/tests/test_filters.py | 2 - pygsp/tests/test_graphs.py | 2 - pygsp/tests/test_learn_graph.py | 4 +- pygsp/tests/test_learning.py | 3 - pygsp/tests/test_nearest_neighbor.py | 2 - pygsp/tests/test_plotting.py | 5 +- pygsp/tests/test_utils.py | 3 - 10 files changed, 2 insertions(+), 312 deletions(-) delete mode 100644 nn-test.py delete mode 100644 nn.py diff --git a/nn-test.py b/nn-test.py deleted file mode 100644 index 77a6cde6..00000000 --- a/nn-test.py +++ /dev/null @@ -1,46 +0,0 @@ -import unittest -import numpy as np -from nn import nn - -class TestCase(unittest.TestCase): - def test_nngraph(self, n_vertices=24): - data = np.random.RandomState(42).uniform(size=(n_vertices, 3)) - metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib'] - - for metric in metrics: - for kind in ['knn', 'radius']: - for backend in backends: - params = dict(features=data, metric=metric, kind=kind, radius=0.25) - ref_nn, ref_d = nn(backend='scipy-pdist', **params) - # Unsupported combinations. - if backend == 'flann' and metric == 'max_dist': - self.assertRaises(ValueError, nn, data, - metric=metric, backend=backend) - elif backend == 'nmslib' and metric == 'minkowski': - self.assertRaises(ValueError, nn, data, - metric=metric, backend=backend) - elif backend == 'nmslib' and kind == 'radius': - self.assertRaises(ValueError, nn, data, - kind=kind, backend=backend) - else: - params['backend'] = backend - if backend == 'flann': -# params['target_precision'] = 1 - other_nn, other_d = nn(random_seed=44, **params) - else: - other_nn, other_d = nn(**params) - print(kind, backend) - for a,b in zip(ref_nn, other_nn): - np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) - - for a,b in zip(ref_d, other_d): - np.testing.assert_allclose(np.sort(a),np.sort(b), rtol=1e-5) - - def test_sparse_distance_matrix(self): - data = np.random.RandomState(42).uniform(size=(24, 3)) - - -suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/nn.py b/nn.py deleted file mode 100644 index 3ae6a60d..00000000 --- a/nn.py +++ /dev/null @@ -1,245 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import division - -import numpy as np -from scipy import sparse, spatial -from pygsp import utils - -def _scipy_pdist(features, metric, order, kind, k, radius, params): - if params: - raise ValueError('unexpected parameters {}'.format(params)) - metric = 'cityblock' if metric == 'manhattan' else metric - metric = 'chebyshev' if metric == 'max_dist' else metric - params = dict(metric=metric) - if metric == 'minkowski': - params['p'] = order - dist = spatial.distance.pdist(features, **params) - dist = spatial.distance.squareform(dist) - if kind == 'knn': - neighbors = np.argsort(dist)[:, :k+1] - distances = np.take_along_axis(dist, neighbors, axis=-1) - elif kind == 'radius': - distances = [] - neighbors = [] - for distance in dist: - neighbor = np.flatnonzero(distance < radius) - neighbors.append(neighbor) - distances.append(distance[neighbor]) - return neighbors, distances - - -def _scipy_kdtree(features, _, order, kind, k, radius, params): - if order is None: - raise ValueError('invalid metric for scipy-kdtree') - eps = params.pop('eps', 0) - tree = spatial.KDTree(features, **params) - params = dict(p=order, eps=eps) - if kind == 'knn': - params['k'] = k + 1 - elif kind == 'radius': - params['k'] = None - params['distance_upper_bound'] = radius - distances, neighbors = tree.query(features, **params) - return neighbors, distances - - -def _scipy_ckdtree(features, _, order, kind, k, radius, params): - if order is None: - raise ValueError('invalid metric for scipy-kdtree') - eps = params.pop('eps', 0) - tree = spatial.cKDTree(features, **params) - params = dict(p=order, eps=eps, n_jobs=-1) - if kind == 'knn': - params['k'] = k + 1 - elif kind == 'radius': - params['k'] = features.shape[0] # number of vertices - params['distance_upper_bound'] = radius - distances, neighbors = tree.query(features, **params) - if kind == 'knn': - return neighbors, distances - elif kind == 'radius': - dist = [] - neigh = [] - for distance, neighbor in zip(distances, neighbors): - mask = (distance != np.inf) - dist.append(distance[mask]) - neigh.append(neighbor[mask]) - return neigh, dist - - -def _flann(features, metric, order, kind, k, radius, params): - if metric == 'max_dist': - raise ValueError('flann gives wrong results for metric="max_dist".') - try: - import cyflann as cfl - except Exception as e: - raise ImportError('Cannot import cyflann. Choose another nearest ' - 'neighbors backend or try to install it with ' - 'pip (or conda) install cyflann. ' - 'Original exception: {}'.format(e)) - cfl.set_distance_type(metric, order=order) - index = cfl.FLANNIndex() - index.build_index(features, **params) - # I tried changing the algorithm and testing performance on huge matrices, - # but the default parameters seems to work best. - if kind == 'knn': - neighbors, distances = index.nn_index(features, k+1) - if metric == 'euclidean': - np.sqrt(distances, out=distances) - elif metric == 'minkowski': - np.power(distances, 1/order, out=distances) - elif kind == 'radius': - distances = [] - neighbors = [] - if metric == 'euclidean': - radius = radius**2 - elif metric == 'minkowski': - radius = radius**order - n_vertices, _ = features.shape - for vertex in range(n_vertices): - neighbor, distance = index.nn_radius(features[vertex, :], radius) - distances.append(distance) - neighbors.append(neighbor) - if metric == 'euclidean': - distances = list(map(np.sqrt, distances)) - elif metric == 'minkowski': - distances = list(map(lambda d: np.power(d, 1/order), distances)) - index.free_index() - return neighbors, distances - - -def _nmslib(features, metric, order, kind, k, _, params): - if kind == 'radius': - raise ValueError('nmslib does not support kind="radius".') - if metric == 'minkowski': - raise ValueError('nmslib does not support metric="minkowski".') - try: - import nmslib as nms - except Exception as e: - raise ImportError('Cannot import nmslib. Choose another nearest ' - 'neighbors backend or try to install it with ' - 'pip (or conda) install nmslib. ' - 'Original exception: {}'.format(e)) - n_vertices, _ = features.shape - params_index = params.pop('index', None) - params_query = params.pop('query', None) - metric = 'l2' if metric == 'euclidean' else metric - metric = 'l1' if metric == 'manhattan' else metric - metric = 'linf' if metric == 'max_dist' else metric - index = nms.init(space=metric, **params) - index.addDataPointBatch(features) - index.createIndex(params_index) - if params_query is not None: - index.setQueryTimeParams(params_query) - results = index.knnQueryBatch(features, k=k+1) - neighbors, distances = zip(*results) - distances = np.concatenate(distances).reshape(n_vertices, k+1) - neighbors = np.concatenate(neighbors).reshape(n_vertices, k+1) - return neighbors, distances - -def nn(features, metric='euclidean', order=2, kind='knn', k=10, radius=None, backend='scipy-ckdtree', **kwargs): - '''Find nearest neighboors. - - Parameters - ---------- - features : data numpy array - metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional - Metric used to compute pairwise distances. - - * ``'euclidean'`` defines pairwise distances as - :math:`d(v_i, v_j) = \| x_i - x_j \|_2`. - * ``'manhattan'`` defines pairwise distances as - :math:`d(v_i, v_j) = \| x_i - x_j \|_1`. - * ``'minkowski'`` generalizes the above and defines distances as - :math:`d(v_i, v_j) = \| x_i - x_j \|_p` - where :math:`p` is the ``order`` of the norm. - * ``'max_dist'`` defines pairwise distances as - :math:`d(v_i, v_j) = \| x_i - x_j \|_\infty = \max(x_i - x_j)`, where - the maximum is taken over the elements of the vector. - - More metrics may be supported for some backends. - Please refer to the documentation of the chosen backend. - kind : 'knn' or 'radius' (default 'knn') - k : number of nearest neighboors if 'knn' is selected - radius : radius of the search if 'radius' is slected - - order : float, optional - The order of the Minkowski distance for ``metric='minkowski'``. - backend : string, optional - * ``'scipy-pdist'`` uses :func:`scipy.spatial.distance.pdist` to - compute pairwise distances. The method is brute force and computes - all distances. That is the slowest method. - * ``'scipy-kdtree'`` uses :class:`scipy.spatial.KDTree`. The method - builds a k-d tree to prune the number of pairwise distances it has to - compute. That is an efficient strategy for low-dimensional spaces. - * ``'scipy-ckdtree'`` uses :class:`scipy.spatial.cKDTree`. The same as - ``'scipy-kdtree'`` but with C bindings, which should be faster. - That is the default. - * ``'flann'`` uses the `Fast Library for Approximate Nearest Neighbors - (FLANN) `_. That method is an - approximation. - * ``'nmslib'`` uses the `Non-Metric Space Library (NMSLIB) - `_. That method is an - approximation. It should be the fastest in high-dimensional spaces. - - You can look at this `benchmark - `_ to get an idea of the - relative performance of those backends. It's nonetheless wise to run - some tests on your own data. - ''' - if kind=='knn': - radius = None - elif kind=='radius': - k = None - else: - raise ValueError('"kind" must be "knn" or "radius"') - - _orders = { - 'euclidean': 2, - 'manhattan': 1, - 'max_dist': np.inf, - 'minkowski': order, - } - order = _orders.pop(metric, None) - try: - function = globals()['_' + backend.replace('-', '_')] - except KeyError: - raise ValueError('Invalid backend "{}".'.format(backend)) - neighbors, distances = function(features, metric, order, - kind, k, radius, kwargs) - return neighbors, distances - - -def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, kind = None): - '''Build a sparse distance matrix.''' - n_edges = [len(n) - 1 for n in neighbors] # remove distance to self - if safe and kind is None: - raise ValueError('Please specify "kind" to "knn" or "radius" to use the safe mode') - - if safe and kind == 'radius': - n_disconnected = np.sum(np.asarray(n_edges) == 0) - if n_disconnected > 0: - _logger.warning('{} points (out of {}) have no neighboors. ' - 'Consider increasing the radius or setting ' - 'kind=knn.'.format(n_disconnected, n_vertices)) - - value = np.empty(sum(n_edges), dtype=np.float) - row = np.empty_like(value, dtype=np.int) - col = np.empty_like(value, dtype=np.int) - start = 0 - n_vertices = len(n_edges) - for vertex in range(n_vertices): - if safe and kind == 'knn': - assert n_edges[vertex] == k - end = start + n_edges[vertex] - value[start:end] = distances[vertex][1:] - row[start:end] = np.full(n_edges[vertex], vertex) - col[start:end] = neighbors[vertex][1:] - start = end - W = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices)) - if symmetrize: - # Enforce symmetry. May have been broken by k-NN. Checking symmetry - # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. - W = utils.symmetrize(W, method='fill') - return W \ No newline at end of file diff --git a/pygsp/tests/test_docstrings.py b/pygsp/tests/test_docstrings.py index b6d9068e..279b65f2 100644 --- a/pygsp/tests/test_docstrings.py +++ b/pygsp/tests/test_docstrings.py @@ -42,5 +42,3 @@ def setup(doctest): suite = unittest.TestSuite([suite_reference, suite_tutorials]) -if __name__ == '__main__': - unittest.main() diff --git a/pygsp/tests/test_filters.py b/pygsp/tests/test_filters.py index 8a63f2c8..d51243b5 100644 --- a/pygsp/tests/test_filters.py +++ b/pygsp/tests/test_filters.py @@ -351,5 +351,3 @@ def test_approximations(self): suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) -if __name__ == '__main__': - unittest.main() diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 78576055..31e118f0 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -1004,5 +1004,3 @@ def test_import_errors(self): suite_import_export = unittest.TestLoader().loadTestsFromTestCase(TestImportExport) suite = unittest.TestSuite([suite_graphs, suite_import_export]) -if __name__ == '__main__': - unittest.main() diff --git a/pygsp/tests/test_learn_graph.py b/pygsp/tests/test_learn_graph.py index 9c90886d..c619662b 100644 --- a/pygsp/tests/test_learn_graph.py +++ b/pygsp/tests/test_learn_graph.py @@ -110,6 +110,4 @@ def test_learn_graph_log(self): learned_W2, _ = learn_graph_log_degree(W*theta2, edge_mask=W>0, verbosity=0) assert(np.sum(np.abs(learned_W2.todense()-learned_W))/np.sum(np.abs(learned_W))<1e-4) -suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) -if __name__ == '__main__': - unittest.main() \ No newline at end of file +suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) \ No newline at end of file diff --git a/pygsp/tests/test_learning.py b/pygsp/tests/test_learning.py index 1d838e96..b960d8a9 100644 --- a/pygsp/tests/test_learning.py +++ b/pygsp/tests/test_learning.py @@ -132,6 +132,3 @@ def test_classification_tikhonov(self): suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/pygsp/tests/test_nearest_neighbor.py b/pygsp/tests/test_nearest_neighbor.py index d15fdc29..c11d38b7 100644 --- a/pygsp/tests/test_nearest_neighbor.py +++ b/pygsp/tests/test_nearest_neighbor.py @@ -77,5 +77,3 @@ def test_sparse_distance_matrix(self): suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) -if __name__ == '__main__': - unittest.main() diff --git a/pygsp/tests/test_plotting.py b/pygsp/tests/test_plotting.py index d3b580d4..70b56464 100644 --- a/pygsp/tests/test_plotting.py +++ b/pygsp/tests/test_plotting.py @@ -170,7 +170,4 @@ def test_unknown_backend(self): self.assertRaises(ValueError, G.plot, backend='abc') -suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file +suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) \ No newline at end of file diff --git a/pygsp/tests/test_utils.py b/pygsp/tests/test_utils.py index e5247113..af23f4b8 100644 --- a/pygsp/tests/test_utils.py +++ b/pygsp/tests/test_utils.py @@ -34,6 +34,3 @@ def test_symmetrize(self): suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) - -if __name__ == '__main__': - unittest.main() From 97ab20c1bd0038bbc210dacf8e9f9416f37d739b Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Sun, 10 Nov 2019 09:48:24 +0100 Subject: [PATCH 107/118] small fix in import --- pygsp/tests/test_learn_graph.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pygsp/tests/test_learn_graph.py b/pygsp/tests/test_learn_graph.py index c619662b..5648d81e 100644 --- a/pygsp/tests/test_learn_graph.py +++ b/pygsp/tests/test_learn_graph.py @@ -2,7 +2,6 @@ import unittest import numpy as np -from nn import nn, sparse_distance_matrix from pygsp._nearest_neighbor import nearest_neighbor as nn from pygsp._nearest_neighbor import sparse_distance_matrix from scipy import sparse From 1f83d68d5a8c566513a760baddf52f0a4e48f980 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Sun, 10 Nov 2019 10:11:57 +0100 Subject: [PATCH 108/118] update seed --- pygsp/tests/test_learn_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/tests/test_learn_graph.py b/pygsp/tests/test_learn_graph.py index 5648d81e..7775dac2 100644 --- a/pygsp/tests/test_learn_graph.py +++ b/pygsp/tests/test_learn_graph.py @@ -86,7 +86,7 @@ def test_learn_graph_log(self): # Create a bunch of signals n=100 d = 400 - G = pg.graphs.Sensor(N=n,k=6, seed=0) + G = pg.graphs.Sensor(N=n,k=6, seed=1) G.compute_fourier_basis() # g = pg.filters.Heat(G, scale=5) g = pg.filters.Filter(G,lambda x:1/(1+5*x)) From 3c18c1042d21648596e2d30d7efe0df4ee761f8d Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Sun, 10 Nov 2019 10:20:39 +0100 Subject: [PATCH 109/118] update seed --- pygsp/tests/test_learn_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygsp/tests/test_learn_graph.py b/pygsp/tests/test_learn_graph.py index 7775dac2..707c2896 100644 --- a/pygsp/tests/test_learn_graph.py +++ b/pygsp/tests/test_learn_graph.py @@ -86,7 +86,7 @@ def test_learn_graph_log(self): # Create a bunch of signals n=100 d = 400 - G = pg.graphs.Sensor(N=n,k=6, seed=1) + G = pg.graphs.Sensor(N=n,k=6, seed=0) G.compute_fourier_basis() # g = pg.filters.Heat(G, scale=5) g = pg.filters.Filter(G,lambda x:1/(1+5*x)) @@ -107,6 +107,6 @@ def test_learn_graph_log(self): np.testing.assert_allclose(theta_max, theta_max2) W = sparse_distance_matrix(neighbors, distances) learned_W2, _ = learn_graph_log_degree(W*theta2, edge_mask=W>0, verbosity=0) - assert(np.sum(np.abs(learned_W2.todense()-learned_W))/np.sum(np.abs(learned_W))<1e-4) + assert(np.sum(np.abs(learned_W2.todense()-learned_W))/np.sum(np.abs(learned_W))<3e-3) suite = unittest.TestLoader().loadTestsFromTestCase(TestCase) \ No newline at end of file From 3bdbfc082e405c3452a4de8cec409b43a913fd37 Mon Sep 17 00:00:00 2001 From: Vassilis Kalofolias Date: Mon, 11 Nov 2019 22:54:42 +0100 Subject: [PATCH 110/118] minor fix in playground.ipynb --- playground.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playground.ipynb b/playground.ipynb index c22fd25f..6f0831d1 100644 --- a/playground.ipynb +++ b/playground.ipynb @@ -40,7 +40,7 @@ "source": [ "G = graphs.Logo()\n", "G.estimate_lmax()\n", - "g = filters.Heat(G, tau=100)" + "g = filters.Heat(G, scale=100)" ] }, { From 23d51d924c4161cb226365be66306a34a5819928 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Fri, 15 Nov 2019 10:41:22 +0100 Subject: [PATCH 111/118] rename --- pygsp/graphs/__init__.py | 2 +- pygsp/graphs/{learngraph.py => learned.py} | 11 +++++------ pygsp/tests/test_graphs.py | 8 ++++---- 3 files changed, 10 insertions(+), 11 deletions(-) rename pygsp/graphs/{learngraph.py => learned.py} (98%) diff --git a/pygsp/graphs/__init__.py b/pygsp/graphs/__init__.py index 22a48a33..be897b95 100644 --- a/pygsp/graphs/__init__.py +++ b/pygsp/graphs/__init__.py @@ -204,7 +204,7 @@ 'ErdosRenyi', 'FullConnected', 'Grid2d', - 'LearnGraph', + 'LearnedFromSmoothSignals', 'LineGraph', 'Logo', 'LowStretchTree', diff --git a/pygsp/graphs/learngraph.py b/pygsp/graphs/learned.py similarity index 98% rename from pygsp/graphs/learngraph.py rename to pygsp/graphs/learned.py index 85ddfdd6..2085ab6a 100644 --- a/pygsp/graphs/learngraph.py +++ b/pygsp/graphs/learned.py @@ -448,8 +448,8 @@ def learn_graph_log_degree(Z, -class LearnGraph(Graph): - r"""Learned graph. +class LearnedFromSmoothSignals(Graph): + r"""Learned graph from smooth signals. Return a graph learned with with the function by :func:`learn_graph_log_degree`. @@ -509,7 +509,7 @@ class LearnGraph(Graph): >>> >>> # B) Learn the graph >>> param_opt = {'verbosity':0} - >>> Glearned = pg.graphs.LearnGraph(X,k=k, param_opt=param_opt) + >>> Glearned = pg.graphs.LearnedFromSmoothSignals(X,k=k, param_opt=param_opt) >>> # plot the learned graph >>> Glearned.coords = coords >>> @@ -541,8 +541,7 @@ def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, if a is None and b is None: theta, theta_min, theta_max = gsp_compute_graph_learning_theta(Zp, k) W, stat = learn_graph_log_degree(Z*theta, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt) - else: + else: W, stat = learn_graph_log_degree(Z, a=a, b=b, edge_mask=edge_mask, rel_edge=rel_edge, **param_opt) - super(LearnGraph, self).__init__(W, **kwargs) - + super(LearnedFromSmoothSignals, self).__init__(W, **kwargs) self._stat = stat diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index c1ed59c5..54cb329e 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -713,7 +713,7 @@ def test_grid2d_diagonals(self): self.assertEqual(G.W[20, 27], 1.0) def test_learn_graph(self): - from pygsp.graphs import LearnGraph + from pygsp.graphs import LearnedFromSmoothSignals from pygsp.utils import distanz from pygsp.graphs.learngraph import gsp_compute_graph_learning_theta @@ -729,8 +729,8 @@ def test_learn_graph(self): X = np.squeeze(g.filter(S)) # Test if large scale and small scale are close - G_learned1 = LearnGraph(X, k=k, sparse=False) - G_learned2 = LearnGraph(X,k=k, sparse=True) + G_learned1 = LearnedFromSmoothSignals(X, k=k, sparse=False) + G_learned2 = LearnedFromSmoothSignals(X,k=k, sparse=True) assert(np.sum(np.abs(G_learned2.W.todense()-G_learned1.W.todense()))/np.sum(np.abs(G_learned1.W.todense()))<1e-4) # Test automatic parameters tuning @@ -738,7 +738,7 @@ def test_learn_graph(self): theta, theta_min, theta_max = gsp_compute_graph_learning_theta(Z, k) a = 1/theta b = a - G_learned3 = LearnGraph(X,a=a,b=b, sparse=False) + G_learned3 = LearnedFromSmoothSignals(X,a=a,b=b, sparse=False) assert(np.sum(np.abs(G_learned3.W.todense()-G_learned1.W.todense()))/np.sum(np.abs(G_learned1.W.todense()))<1e-3) suite_graphs = unittest.TestLoader().loadTestsFromTestCase(TestCase) From 84613efa018a50a178acdfa774927dcd407aa351 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Fri, 15 Nov 2019 11:00:55 +0100 Subject: [PATCH 112/118] hack my way through renaming --- pygsp/graphs/__init__.py | 2 +- pygsp/graphs/graph.py | 4 ++-- pygsp/tests/test_learn_graph.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pygsp/graphs/__init__.py b/pygsp/graphs/__init__.py index be897b95..39659696 100644 --- a/pygsp/graphs/__init__.py +++ b/pygsp/graphs/__init__.py @@ -204,7 +204,6 @@ 'ErdosRenyi', 'FullConnected', 'Grid2d', - 'LearnedFromSmoothSignals', 'LineGraph', 'Logo', 'LowStretchTree', @@ -231,4 +230,5 @@ __all__ = _GRAPHS + _NNGRAPHS _utils.import_classes(_GRAPHS, 'graphs', 'graphs') +from pygsp.graphs.learned import LearnedFromSmoothSignals _utils.import_classes(_NNGRAPHS, 'graphs.nngraphs', 'graphs') diff --git a/pygsp/graphs/graph.py b/pygsp/graphs/graph.py index 1c17bcab..a0cea791 100644 --- a/pygsp/graphs/graph.py +++ b/pygsp/graphs/graph.py @@ -2,8 +2,8 @@ from __future__ import division -import os -from collections import Counter +# import os +# from collections import Counter import numpy as np from scipy import sparse diff --git a/pygsp/tests/test_learn_graph.py b/pygsp/tests/test_learn_graph.py index 707c2896..6558d410 100644 --- a/pygsp/tests/test_learn_graph.py +++ b/pygsp/tests/test_learn_graph.py @@ -7,7 +7,7 @@ from scipy import sparse import pygsp as pg from pygsp.utils import distanz -from pygsp.graphs.learngraph import * +from pygsp.graphs.learned import * class TestCase(unittest.TestCase): def test_prox_sum_log(self): From 27ed937a3eefe6138d20de93360ae2d41d8cd258 Mon Sep 17 00:00:00 2001 From: Nathanael Perraudin Date: Fri, 15 Nov 2019 11:18:37 +0100 Subject: [PATCH 113/118] fix import --- pygsp/tests/test_graphs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index 54cb329e..040b87c3 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -715,7 +715,7 @@ def test_grid2d_diagonals(self): def test_learn_graph(self): from pygsp.graphs import LearnedFromSmoothSignals from pygsp.utils import distanz - from pygsp.graphs.learngraph import gsp_compute_graph_learning_theta + from pygsp.graphs.learned import gsp_compute_graph_learning_theta # Create a bunch of signals n=100 From 7b732b6d0acfb479c108bbbd44d63ac7d11dfa15 Mon Sep 17 00:00:00 2001 From: Vassilis Kalofolias Date: Sun, 17 Nov 2019 18:26:53 +0100 Subject: [PATCH 114/118] Big bug in graph learning: Z needs **squared** Euclidean distances --- pygsp/graphs/learned.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/pygsp/graphs/learned.py b/pygsp/graphs/learned.py index 2085ab6a..c9bf7298 100644 --- a/pygsp/graphs/learned.py +++ b/pygsp/graphs/learned.py @@ -12,9 +12,9 @@ def isvector(x): '''Test if x is a vector''' if sparse.issparse(x): - return x.shape[1]==1 + return (np.ndim(x) == 1) or (np.ndim(x) == 2 and x.shape[1] == 1) try: - return len(x.shape)==1 + return (np.ndim(x) == 2 and len(x.shape) == 1) except: return False @@ -451,10 +451,10 @@ def learn_graph_log_degree(Z, class LearnedFromSmoothSignals(Graph): r"""Learned graph from smooth signals. - Return a graph learned with with the function by :func:`learn_graph_log_degree`. + Return a graph learned with the function of :func:`learn_graph_log_degree`. - This function computes the pairwise distances $Z$ between the vectos in $X$. Then - the adjacency matrix $W$ is estiamged from $Z$, using the + This function computes the squared pairwise distances $Z$ between the vectors + in $X$. Then the adjacency matrix $W$ is estimated from $Z$, using the smoothness assumption that $\text{trace}(X^TLX)$ is small, where $X$ is the data (columns) changing smoothly from node to node on the graph and $L = D-W$ is the combinatorial graph Laplacian. See :cite:`kalofolias2018large` @@ -468,11 +468,11 @@ class LearnedFromSmoothSignals(Graph): minimize_W sum(sum(W .* Z)) - a * sum(log(sum(W))) + b * ||W||_F^2/2 + c * ||W-W_0||_F^2/2 In order to scale, this function can automatically: - 1. compute the optimal value of a,b and, + 1. compute the optimal values of (a, b) and 2. use a resticted support to reduce the computational and the memory cost. By default, these option are enabled and only the average number of neighboors k should be set. - Alternatively, the value of a and be can be set. + Alternatively, the values of a, b can be set. Parameters ---------- @@ -509,7 +509,7 @@ class LearnedFromSmoothSignals(Graph): >>> >>> # B) Learn the graph >>> param_opt = {'verbosity':0} - >>> Glearned = pg.graphs.LearnedFromSmoothSignals(X,k=k, param_opt=param_opt) + >>> Glearned = pg.graphs.LearnedFromSmoothSignals(X, k=k, param_opt=param_opt) >>> # plot the learned graph >>> Glearned.coords = coords >>> @@ -521,8 +521,7 @@ class LearnedFromSmoothSignals(Graph): References ---------- - See :cite:`kalofolias2018large` and :cite:`kalofolias2016learn` for more informations. - + See :cite:`kalofolias2018large` and :cite:`kalofolias2016learn` for more information. """ def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, param_nn={}, param_opt={}, **kwargs): @@ -531,11 +530,13 @@ def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, if kk is None: kk = 3*k neighbors, distances = nearest_neighbor(X, k=kk, **param_nn) - Z = sparse_distance_matrix(neighbors, distances) + # the original method needs squared distances!! + Z = sparse_distance_matrix(neighbors, distances**2) edge_mask = Z>0 - Zp = distances[:,1:] + Zp = distances[:,1:]**2 else: - Z = distanz(X.transpose()) + # the original method needs squared distances!! + Z = distanz(X.transpose())**2 Zp = Z edge_mask = None if a is None and b is None: From 60f8e5f7e304c91d63f19f9bb458926da6b3f8c1 Mon Sep 17 00:00:00 2001 From: Vassilis Kalofolias Date: Sun, 29 Mar 2020 21:31:29 +0200 Subject: [PATCH 115/118] Allow to set manually edge_mask in LearnedFromSmoothSignals Fixed bugs and improved compute_theta_bounds (geom_mean=True vs False, islist->is_sorted) Added some documentation (utils._nearest_neighbor) Allowed LearnedFromSmoothSignals to take manually an edge_mask --- doc/tutorials/graph_learning.py | 139 ++++++++++++++++++++++++++++++++ pygsp/_nearest_neighbor.py | 52 ++++++++++-- pygsp/graphs/__init__.py | 2 +- pygsp/graphs/learned.py | 132 +++++++++++++++++++++--------- pygsp/utils.py | 10 +-- 5 files changed, 288 insertions(+), 47 deletions(-) create mode 100755 doc/tutorials/graph_learning.py diff --git a/doc/tutorials/graph_learning.py b/doc/tutorials/graph_learning.py new file mode 100755 index 00000000..a0f28b97 --- /dev/null +++ b/doc/tutorials/graph_learning.py @@ -0,0 +1,139 @@ +import numpy as np +import pygsp +from matplotlib import pyplot as plt + +#%% +n_signals = 4 +def f1(x, y): return (-np.sin((2-x-y)**2)/2 + np.cos(y*3))/2 +def f2(x, y): return np.cos((x+y)**2); +def f3(x, y): return ((x-.5)**2 + (y-.5)**3 + x - y)*3; +def f4(x, y): return np.sin(3*((x-.5)**2+(y-.5)**2)); + + + +#%% +n_nodes = 300 +coords = np.random.uniform(size=(n_nodes, 2)) + +# the vectors of smooth signals will be stored as columns of X +X = np.zeros((n_nodes, n_signals)) +for i, f in enumerate((f1, f2, f3, f4)): + X[:, i] = f(coords[:, 0], coords[:, 1]) + + + +#%% +plt.figure(figsize=(16, 10)) +for i, f in enumerate((f1, f2, f3, f4)): + plt.subplot(2, 2, i+1); + plt.scatter(coords[:, 0], coords[:, 1], 500, X[:, i], '.'); + plt.title('smooth signal {}'.format(i+1)); + plt.axis('off'); + plt.colorbar(); + + +#%% + +k = 5 +param_opt = {'maxit':2000} + +G1 = pygsp.graphs.LearnedFromSmoothSignals(X[:, 0], coords=coords, k=k, param_opt=param_opt) +G2 = pygsp.graphs.LearnedFromSmoothSignals(X[:, 1], coords=coords, k=k, param_opt=param_opt) +G3 = pygsp.graphs.LearnedFromSmoothSignals(X[:, 2], coords=coords, k=k, param_opt=param_opt) +G4 = pygsp.graphs.LearnedFromSmoothSignals(X[:, 3], coords=coords, k=k, param_opt=param_opt) + +#%% + + +# G1.set_coordinates() +plt.figure(figsize=(18, 16)) +for i, G in enumerate((G1, G2, G3, G4)): + _, _, weights = G.get_edge_list() + ax = plt.subplot(2, 2, i+1); + G.plot(vertex_size=50, edge_width=weights, ax=ax) + plt.xticks([]) + plt.yticks([]) + plt.title('n_vertices = {}, n_edges = {}, k = {:.1f}'.format(G.n_vertices, G.n_edges, G.n_edges * 2 / G.n_vertices)) + + + +#%% + +G_all = pygsp.graphs.LearnedFromSmoothSignals(X, coords=coords, k=k, param_opt=param_opt) +plt.figure(figsize=(8, 6)) +ax = plt.axes() +G_all.plot(vertex_size=0, ax=ax) +plt.xticks([]), plt.yticks([]) +plt.title('n_vertices = {}, n_edges = {}, k = {:.1f}'.format(G_all.n_vertices, G_all.n_edges, G_all.n_edges * 2 / G_all.n_vertices)) + + + +#%% + +G_coords = pygsp.graphs.LearnedFromSmoothSignals(coords, coords=coords, k=k, param_opt=param_opt) +plt.figure(figsize=(8, 6)) +ax = plt.axes() +G_coords.plot(vertex_size=0, ax=ax, edges=True) + + + + +#%% + + +import scipy +Z = scipy.spatial.distance_matrix(X, X)**2 +plt.imshow(Z) +print(np.count_nonzero(Z)) +edge_mask = (Z < 1 / np.sqrt(n_nodes)).astype(np.int) +np.fill_diagonal(edge_mask, 0) +print(np.count_nonzero(edge_mask)) +edge_mask[:3,:3] + + + +#%% + +G_all = pygsp.graphs.LearnedFromSmoothSignals(X, coords=coords, k=k, + param_opt=param_opt, + sparse=True, edge_mask=edge_mask) +plt.figure(figsize=(8, 6)) +ax = plt.axes() +G_all.plot(vertex_size=0, ax=ax) +plt.xticks([]), plt.yticks([]) +plt.title('n_vertices = {}, n_edges = {}, k = {:.1f}'.format(G_all.n_vertices, G_all.n_edges, G_all.n_edges * 2 / G_all.n_vertices)) + + + + +#%% + + + + + + +#%% + + + + + + +#%% + + + + + + +#%% + + + + + + +#%% + + diff --git a/pygsp/_nearest_neighbor.py b/pygsp/_nearest_neighbor.py index 15dd97ce..eafb6ab2 100644 --- a/pygsp/_nearest_neighbor.py +++ b/pygsp/_nearest_neighbor.py @@ -4,6 +4,7 @@ import numpy as np from scipy import sparse, spatial +from scipy.linalg import norm from pygsp import utils _logger = utils.build_logger(__name__) @@ -142,7 +143,18 @@ def _nmslib(features, metric, order, kind, k, _, params): def nearest_neighbor(features, metric='euclidean', order=2, kind='knn', k=10, radius=None, backend='scipy-ckdtree', **kwargs): '''Find nearest neighboors. - + Inputs: + features [n, m]: + rows correspond to nodes, columns correspond to features/dimensions + k [default 10]: number of neighbors per node + + Outputs: + neighbors [n, k]: + list of nodes neighboring to node of each row + distances [n, k]: + distance of each row's node to each of its k neighbors + + Parameters ---------- features : data numpy array @@ -239,9 +251,39 @@ def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, ki row[start:end] = np.full(n_edges[vertex], vertex) col[start:end] = neighbors[vertex][1:] start = end - W = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices)) + Z = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices)) if symmetrize: # Enforce symmetry. May have been broken by k-NN. Checking symmetry - # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. - W = utils.symmetrize(W, method='fill') - return W \ No newline at end of file + # with np.abs(Z - Z.T).sum() is as costly as the symmetrization itself. + Z = utils.symmetrize(Z, method='fill') + return Z + + +def distances_from_edge_mask(X, edge_mask, form='vector'): + ''' + Compute pairwise distances between rows of X, only for pairs corresponding + to non-zeros of edge_mask + + Inputs: + X [n, m]: rows are nodes, columns are features + edge_mask [n, n]: sparse mask, non-zeros correspond to allowed edges + form: 'vector' (default) or 'matrix' to return z or Z + + Output: + Z [n, n]: sparse matrix with same pattern as edge_mask, containing + corresponding pairwise distances (NOT SQUARED) + + Important function to reduce computation in case edge mask is given + + TODO: merge with sparse_distance_matrix() and nearest_neighbor() + TODO: clean up and expose to the user + ''' + r, c = np.where(edge_mask) + z = norm(X[r] - X[c], axis=1) + if form == 'vector': + return z + elif form == 'matrix': + return sparse.csr_matrix((z, (r, c))) + else: + _logger.error('form can either be "vector" or "matrix"') + \ No newline at end of file diff --git a/pygsp/graphs/__init__.py b/pygsp/graphs/__init__.py index 39659696..54bb92b5 100644 --- a/pygsp/graphs/__init__.py +++ b/pygsp/graphs/__init__.py @@ -183,7 +183,7 @@ Sphere TwoMoons -Learning graph from data +Learning graphs from data ------------------------ .. autosummary:: diff --git a/pygsp/graphs/learned.py b/pygsp/graphs/learned.py index c9bf7298..967d6433 100644 --- a/pygsp/graphs/learned.py +++ b/pygsp/graphs/learned.py @@ -5,9 +5,11 @@ from pygsp import utils from pygsp.utils import distanz from pygsp._nearest_neighbor import nearest_neighbor, sparse_distance_matrix +from pygsp._nearest_neighbor import distances_from_edge_mask from . import Graph # prevent circular import in Python < 3.5 -logger = utils.build_logger(__name__) +_logger = utils.build_logger(__name__) + def isvector(x): '''Test if x is a vector''' @@ -19,7 +21,6 @@ def isvector(x): return False - def prox_sum_log(x, gamma): '''Solves the proximal problem: sol = argmin_z 0.5*||x - z||_2^2 - gamma * sum(log(z)) @@ -33,7 +34,8 @@ def prox_sum_log(x, gamma): def issymetric(W, tol=1e-7): '''Test if a sparse matrix is symmetric''' WT = sparse.triu(W) - sparse.triu(W.transpose()) - return np.sum(np.abs(WT)) so get rid of it''' + return np.sort(np.partition(Z, k, axis=axis)[:, :k], axis=1) + + +def compute_theta_bounds(Z, geom_mean=False, is_sorted=False, kmax=None): '''Compute the values of parameter theta (controlling sparsity) that should be expected to give each sparsity level. Return upper and lower bounds each sparsity level k=[1, ..., n-1] neighbors/node. - Z : distance matrix + You can reduce computation by giving a maximum k needed + + Z : squared or pairwise distance matrix (zero diagonal) + OR [n,m] sized distance matrix with m smallest distances (nonzero) per + node. Not necessarily sorted, but if sorted we save computation geom_mean: use geometric mean instead of arithmetic mean? default: False ''' - # Z is the zero-diagonal pairwise distance matrix between nodes if isvector(Z): ValueError('Z must be a matrix.') assert(len(Z.shape)==2) - n = len(Z); - if islist is None: - islist = not(Z.shape[0]==Z.shape[1]) - if islist: - Z_sorted = np.sort(Z, axis=1) - else: - assert(Z.shape[0]==Z.shape[1]) - # don't take into account the diagonal of Z - Z_sorted = np.zeros((n, n-1)); - for i in range(n): - Z_sorted[i, :] = np.sort(np.concatenate((Z[i,:i],Z[i,i+1:])), kind='mergesort') + n, m = Z.shape; - n, m = Z_sorted.shape + assert(n >= m) + + if kmax is None: + kmax = m + + if n == m: + # square distance matrix contains zero diagonals and is not expected to + # be sorted! + if kmax == m: + Z_sorted = np.sort(Z, axis=1)[:, 1:] + else: + Z_sorted = sort_k_smallest(Z, kmax+1, axis=1)[:, 1:] + m -= 1 + else: + # rectangular distance matrix without the zeros + if is_sorted: + assert(np.all(Z[:,0]==0)) + else: + if kmax == m: + Z_sorted = np.sort(Z, axis=1) + else: + # only sort the smallest k elements! + Z_sorted = sort_k_smallest(Z, kmax, axis=1) B_k = np.cumsum(Z_sorted, axis=1) # cummulative sum for each row K_mat = np.tile(np.arange(1,m+1), (n, 1)) ## Theoretical intervals of theta for each desired sparsity level: if geom_mean: - theta_u = np.mean(1./(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted+1e-7)+1e-15), axis=0) - else: # try geometric mean instead of arithmetic: theta_u = np.exp(np.mean(np.log(1/(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted+1e-7)+1e-15)), axis=0)) + else: + theta_u = np.mean(1./(np.sqrt(K_mat*Z_sorted*Z_sorted - B_k*Z_sorted+1e-7)+1e-15), axis=0) theta_l = np.zeros(theta_u.shape) theta_l[:-1] = theta_u[1:] return theta_l, theta_u, Z_sorted -def gsp_compute_graph_learning_theta(Z, k, geom_mean=False, islist=None): +def gsp_compute_graph_learning_theta(Z, k, geom_mean=False, is_sorted=False): + ''' + Z : squared or pairwise distance matrix (zero diagonal) + OR [n,m] sized distance matrix with m smallest distances (nonzero) + per node. Not necessarily sorted, but if sorted we save computation + ''' - # Z is the zero-diagonal pairwise distance matrix between nodes - theta_min, theta_max, _ = compute_theta_bounds(Z, geom_mean, islist) + theta_min, theta_max, _ = compute_theta_bounds(Z, geom_mean, is_sorted) theta_min = theta_min[k-1]; theta_max = theta_max[k-1]; @@ -296,7 +327,7 @@ def learn_graph_log_degree(Z, References ---------- - See :cite:`kalofolias2018large` and :cite:`kalofolias2016learn` for more informations. + See :cite:`kalofolias2018large` and :cite:`kalofolias2016learn` for more information. """ @@ -355,7 +386,7 @@ def learn_graph_log_degree(Z, fprox = lambda w, gamma: np.minimum(max_w, np.maximum(0, w - 2*gamma*z)) # weighted soft thresholding op geval = lambda v: -a * np.sum(np.log(v+1e-15)) - gprox = lambda v, gamma: prox_sum_log(v, gamma*a) + # gprox = lambda v, gamma: prox_sum_log(v, gamma*a) # proximal of conjugate of g: v-gamma*gprox(v/gamma, 1/gamma) g_star_prox = lambda v, gamma: v - gamma*a * prox_sum_log(v/(gamma*a), 1/(gamma*a)) @@ -428,7 +459,7 @@ def learn_graph_log_degree(Z, stat['time'] = time()-tstart if verbosity > 0: print('# iters: {:4d}. Rel primal: {:6.4e} Rel dual: {:6.4e} OBJ {:6.3e}'.format( - i, rel_norm_primal, rel_norm_dual, feval(w) + geval(K_op(w)) + heval(w))) + i+1, rel_norm_primal, rel_norm_dual, feval(w) + geval(K_op(w)) + heval(w))) print('Time needed is {:f} seconds'.format(stat['time'])) # Force positivity on the solution @@ -481,8 +512,9 @@ class LearnedFromSmoothSignals(Graph): b : Weight of the sparsity term (leave this to None for automatic setup) k : desired average number of neigboors kk: desired number of neiboors for the sparse support (Default 3k) - sparse: Use a sparse support. Set this to True to scale. (Default True) + sparse: Use a sparse support. Set this to True to scale. (Default False for n<1000, True otherwise) rel_edge : Remove all edges bellow this tolerance after convergence of the algorithm + edge_mask: A mask of the allowed edge pattern, size [Nnodes x Nnodes]. Only edges with >0 will be learned param_nn : Parameters for the nearest neighboor search (dictionary). See :func:`nearest_neighbor`. param_opt: Parameters for the optimization algorithm. See :func:`learn_graph_log_degree`. @@ -524,16 +556,44 @@ class LearnedFromSmoothSignals(Graph): See :cite:`kalofolias2018large` and :cite:`kalofolias2016learn` for more information. """ - def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, param_nn={}, param_opt={}, **kwargs): - + # TODO: allow Z to be given by user if pre-computed + def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=None, rel_edge=1e-5, edge_mask=None, param_nn={}, param_opt={}, **kwargs): + if sparse is None: + if X.shape[0] <= 1000: + sparse = False + else: + sparse = True + _logger.info( + '''For large graphs (1000 nodes+) graph learning might be considerably slow (O(n^2) + complexity). Consider using a sparse set of allowed edges to learn. Automatic + fallback to approximate NN. To override this decision specify sparse=False. To + suppress this info message, specify sparse=True.''') + if sparse: - if kk is None: - kk = 3*k - neighbors, distances = nearest_neighbor(X, k=kk, **param_nn) - # the original method needs squared distances!! - Z = sparse_distance_matrix(neighbors, distances**2) - edge_mask = Z>0 - Zp = distances[:,1:]**2 + if edge_mask is None: + if kk is None: + kk = 3*k + neighbors, distances = nearest_neighbor(X, k=kk, **param_nn) + # the original method needs squared distances!! + Z = sparse_distance_matrix(neighbors, distances**2) + edge_mask = Z>0 + Zp = distances[:, 1:]**2 + else: + '''If edge mask is given, the pairwise distance should be only + computed for the edges indicated by the mask''' + # TODO: form should not be matrix here if + # inside learn_graph_log_degree it becomes eventually vector + # TODO: allow different distances than euclidean!! + Z = distances_from_edge_mask(X, edge_mask, form='matrix')**2 + if a is None or b is None: + # TODO: implement automatic a and b selection here! + _logger.error( + '''If you set manually your own edge_mask, you also + have to set up a and b manually''' + ) + raise NotImplementedError( + '''If you set manually an edge_mask, you also have to + set a and b manually''') else: # the original method needs squared distances!! Z = distanz(X.transpose())**2 diff --git a/pygsp/utils.py b/pygsp/utils.py index 2931221d..4dd0f6ef 100644 --- a/pygsp/utils.py +++ b/pygsp/utils.py @@ -26,12 +26,12 @@ def build_logger(name): formatter = logging.Formatter( "%(asctime)s:[%(levelname)s](%(name)s.%(funcName)s): %(message)s") - steam_handler = logging.StreamHandler() - steam_handler.setLevel(logging.DEBUG) - steam_handler.setFormatter(formatter) + stream_handler = logging.StreamHandler() + stream_handler.setLevel(logging.DEBUG) + stream_handler.setFormatter(formatter) logger.setLevel(logging.DEBUG) - logger.addHandler(steam_handler) + logger.addHandler(stream_handler) return logger @@ -91,7 +91,7 @@ def loadmat(path): def distanz(x, y=None): r""" - Calculate the distance between two colon vectors. + Calculate the distance between two vectors. Parameters ---------- From 2e80742418346f62ce8549f22e21748d1e716605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathana=C3=ABl=20Perraudin?= <6399466+nperraud@users.noreply.github.com> Date: Thu, 14 May 2020 22:38:05 +0200 Subject: [PATCH 116/118] fix square bug --- pygsp/graphs/learned.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygsp/graphs/learned.py b/pygsp/graphs/learned.py index 2085ab6a..296121f4 100644 --- a/pygsp/graphs/learned.py +++ b/pygsp/graphs/learned.py @@ -531,11 +531,11 @@ def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, if kk is None: kk = 3*k neighbors, distances = nearest_neighbor(X, k=kk, **param_nn) - Z = sparse_distance_matrix(neighbors, distances) + Z = sparse_distance_matrix(neighbors, distances**2) edge_mask = Z>0 Zp = distances[:,1:] else: - Z = distanz(X.transpose()) + Z = distanz(X.transpose())**2 Zp = Z edge_mask = None if a is None and b is None: From 6005c132fd3b8715dadffa87b524f267ca0291b7 Mon Sep 17 00:00:00 2001 From: nperraud <6399466+nperraud@users.noreply.github.com> Date: Fri, 15 May 2020 14:33:43 +0200 Subject: [PATCH 117/118] Update learned.py --- pygsp/graphs/learned.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/graphs/learned.py b/pygsp/graphs/learned.py index 296121f4..285a111a 100644 --- a/pygsp/graphs/learned.py +++ b/pygsp/graphs/learned.py @@ -533,7 +533,7 @@ def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=True, rel_edge=1e-5, neighbors, distances = nearest_neighbor(X, k=kk, **param_nn) Z = sparse_distance_matrix(neighbors, distances**2) edge_mask = Z>0 - Zp = distances[:,1:] + Zp = distances[:,1:]**2 else: Z = distanz(X.transpose())**2 Zp = Z From e69158cdc0121c13a86907d53a590a736e8f3a87 Mon Sep 17 00:00:00 2001 From: Vassilis Kalofolias Date: Sun, 21 Jun 2020 13:05:59 +0200 Subject: [PATCH 118/118] minor exception fix --- playground.ipynb | 22 ++++++++++++++++++++-- pygsp/graphs/learned.py | 3 ++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/playground.ipynb b/playground.ipynb index 6f0831d1..4327ef3a 100644 --- a/playground.ipynb +++ b/playground.ipynb @@ -117,7 +117,25 @@ ] } ], - "metadata": {}, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5" + } + }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/pygsp/graphs/learned.py b/pygsp/graphs/learned.py index 967d6433..c5f9ba4e 100644 --- a/pygsp/graphs/learned.py +++ b/pygsp/graphs/learned.py @@ -591,7 +591,8 @@ def __init__(self, X, a=None, b=None, k=10, kk=None, sparse=None, rel_edge=1e-5, '''If you set manually your own edge_mask, you also have to set up a and b manually''' ) - raise NotImplementedError( + # raise NotImplementedError( + raise TypeError( '''If you set manually an edge_mask, you also have to set a and b manually''') else: