From 911f69293dbabb275ff5fa812697270de301decb Mon Sep 17 00:00:00 2001 From: FRIGO Matteo Date: Mon, 29 Mar 2021 11:39:38 +0200 Subject: [PATCH 1/4] FEAT: implementation of non-contiguous group lasso penalty This commit adds a new class to the copt.penalty module which defines a non-contiguous version of the existing GroupL1 penalty. The main differences between the two are: - Parsing of the input. GroupL1nc requires just non-overlapping groups. - Definition of block matrix adapted to non-contiguous setting. - Evaluation of _prox_gl adapted to non-contiguous setting. - Result of simple test adapted to new block matrix. Minor style/PEP8 improvements have been applied to the modified files. --- copt/penalty.py | 93 ++++++++++++++++++++++++++++++++++++++++- tests/test_penalties.py | 46 ++++++++++++++++---- 2 files changed, 131 insertions(+), 8 deletions(-) diff --git a/copt/penalty.py b/copt/penalty.py index b9774f39..af9eb962 100644 --- a/copt/penalty.py +++ b/copt/penalty.py @@ -143,6 +143,97 @@ def _prox_gl(x, i, indices, indptr, d, step_size): return _prox_gl, B +class GroupL1nc: + """ + Group Lasso penalty with NON-CONTIGUOUS and NON-OVERLAPPING groups + + Args: + alpha: float + Constant multiplying this loss + + blocks: list of lists + + """ + + def __init__(self, alpha, groups): + self.alpha = alpha + + sum_groups = np.sum([len(g) for g in groups]) + all_indices = list(groups[0]) + for g in groups[1:]: + all_indices.extend(list(g)) + n_unique = np.unique(all_indices).size + if sum_groups != n_unique: + raise ValueError('Groups must not overlap.') + self.groups = groups + + def __call__(self, x): + return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups]) + + def prox(self, x, step_size): + out = x.copy() + for g in self.groups: + norm = np.linalg.norm(x[g]) + if norm > self.alpha * step_size: + out[g] -= step_size * self.alpha * out[g] / norm + else: + out[g] = 0 + return out + + def prox_factory(self, n_features): + B_data = np.zeros(n_features) + B_indices = np.zeros(n_features, dtype=np.int32) + B_indptr = np.zeros(n_features + 1, dtype=np.int32) + + feature_pointer = 0 + block_pointer = 0 + for g in self.groups: + for atom in g: + B_data[feature_pointer] = 1. + B_indices[feature_pointer] = atom + feature_pointer += 1 + B_indptr[block_pointer + 1] = B_indptr[block_pointer] + len(g) + block_pointer += 1 + + excluded_indices = np.ones(n_features, dtype=np.int32) + excluded_indices[B_indices[: feature_pointer + 1]] = 0. + for i in np.where(excluded_indices)[0]: + B_data[feature_pointer] = -1. + B_indices[feature_pointer] = i + feature_pointer += 1 + + B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1 + block_pointer += 1 + + B_indptr = B_indptr[: block_pointer + 1] + B = sparse.csr_matrix((B_data, B_indices, B_indptr)) + + alpha = self.alpha + + @njit + def _prox_gl(x, i, indices, indptr, d, step_size): + for b in range(indptr[i], indptr[i + 1]): + h = indices[b] + if B_data[B_indices[B_indptr[h]]] <= 0: + continue + ss = step_size * d[h] + norm = 0.0 + for j in range(B_indptr[h], B_indptr[h + 1]): + j_idx = B_indices[j] + norm += x[j_idx] ** 2 + norm = np.sqrt(norm) + if norm > alpha * ss: + for j in range(B_indptr[h], B_indptr[h + 1]): + j_idx = B_indices[j] + x[j_idx] *= 1 - alpha * ss / norm + else: + for j in range(B_indptr[h], B_indptr[h + 1]): + j_idx = B_indices[j] + x[j_idx] = 0.0 + + return _prox_gl, B + + class FusedLasso: """ Fused Lasso penalty @@ -304,4 +395,4 @@ def prox(self, x, step_size): self.n_cols, max_iter=self.max_iter, tol=self.tol, - ) \ No newline at end of file + ) diff --git a/tests/test_penalties.py b/tests/test_penalties.py index a67de838..f13e39bb 100644 --- a/tests/test_penalties.py +++ b/tests/test_penalties.py @@ -1,14 +1,15 @@ import numpy as np -import copt as cp +import pytest +from numpy import testing + import copt.constraint import copt.penalty from copt import tv_prox -from numpy import testing -import pytest proximal_penalties = [ copt.penalty.L1Norm(1.0), copt.penalty.GroupL1(1.0, np.array_split(np.arange(16), 5)), + copt.penalty.GroupL1nc(1.0, np.array_split(np.arange(16), 5)), copt.penalty.TraceNorm(1.0, (4, 4)), copt.constraint.TraceBall(1.0, (4, 4)), copt.penalty.TotalVariation2D(1.0, (4, 4)), @@ -59,6 +60,36 @@ def test_GroupL1(): # assert np.unique(blocks[g]).size == 1 +def test_GroupL1nc(): + groups = [(0, 1), (2, 3)] + g1 = copt.penalty.GroupL1nc(1.0, groups) + _, B = g1.prox_factory(5) + assert np.all( + B.toarray() + == np.array( + [ + [1.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 0.0, -1.0], + ] + ) + ) + + groups = [(0, 1), (3, 4)] + g2 = copt.penalty.GroupL1nc(1.0, groups) + _, B = g2.prox_factory(5) + assert np.all( + B.toarray() + == np.array( + [ + [1.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 1.0, 1.0], + [0.0, 0.0, -1.0, 0.0, 0.0], + ] + ) + ) + + def test_tv1_prox(): """ Use the properties of strongly convex functions to test the implementation @@ -99,7 +130,8 @@ def tv_norm(x, n_rows, n_cols): for nrun in range(20): x = np.random.randn(n_features) - x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10, max_iter=10000) + x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10, + max_iter=10000) diff_obj = tv_norm(x, n_rows, n_cols) - tv_norm(x_next, n_rows, n_cols) testing.assert_array_less( ((x - x_next) ** 2).sum() / gamma, (1 + epsilon) * diff_obj @@ -137,8 +169,8 @@ def test_three_inequality(pen): lhs = 2 * (pen(xi) - pen(u)) rhs = ( - np.linalg.norm(u - z) ** 2 - - np.linalg.norm(u - xi) ** 2 - - np.linalg.norm(xi - z) ** 2 + np.linalg.norm(u - z) ** 2 + - np.linalg.norm(u - xi) ** 2 + - np.linalg.norm(xi - z) ** 2 ) assert lhs <= rhs, pen From 8478f5ca504e1b701b5184bf313988dec7764555 Mon Sep 17 00:00:00 2001 From: FRIGO Matteo Date: Fri, 2 Apr 2021 14:39:48 +0200 Subject: [PATCH 2/4] merge GroupL1 and GroupL1nc classes and generalise group-wise penalty This commit merges the GroupL1 classes for contiguous and non-contiguous groups, adding the possibility to apply group-specific penalties. The commit also adds the necessary tests and improves the existing ones. Minor PEP8 issues are fixed. --- copt/penalty.py | 130 ++++++++++------------------------------ tests/test_penalties.py | 97 ++++++++++++------------------ 2 files changed, 71 insertions(+), 156 deletions(-) diff --git a/copt/penalty.py b/copt/penalty.py index af9eb962..34cd84c4 100644 --- a/copt/penalty.py +++ b/copt/penalty.py @@ -55,7 +55,7 @@ def _prox_L1(x, i, indices, indptr, d, step_size): class GroupL1: """ - Group Lasso penalty + Group Lasso penalty for non-overlapping groups Args: alpha: float @@ -63,99 +63,18 @@ class GroupL1: blocks: list of lists - """ - - def __init__(self, alpha, groups): - self.alpha = alpha - # groups need to be increasing - for i, g in enumerate(groups): - if not np.all(np.diff(g) == 1): - raise ValueError("Groups must be contiguous") - if i > 0 and groups[i - 1][-1] >= g[0]: - raise ValueError("Groups must be increasing") - self.groups = groups - - def __call__(self, x): - return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups]) - - def prox(self, x, step_size): - out = x.copy() - for g in self.groups: - - norm = np.linalg.norm(x[g]) - if norm > self.alpha * step_size: - out[g] -= step_size * self.alpha * out[g] / norm - else: - out[g] = 0 - return out - - def prox_factory(self, n_features): - B_data = np.zeros(n_features) - B_indices = np.arange(n_features, dtype=np.int32) - B_indptr = np.zeros(n_features + 1, dtype=np.int32) - - feature_pointer = 0 - block_pointer = 0 - for g in self.groups: - while feature_pointer < g[0]: - # non-penalized feature - B_data[feature_pointer] = -1.0 - B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1 - feature_pointer += 1 - block_pointer += 1 - B_indptr[block_pointer + 1] = B_indptr[block_pointer] - for _ in g: - B_data[feature_pointer] = 1.0 - B_indptr[block_pointer + 1] += 1 - feature_pointer += 1 - block_pointer += 1 - for _ in range(feature_pointer, n_features): - B_data[feature_pointer] = -1.0 - B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1 - feature_pointer += 1 - block_pointer += 1 - - B_indptr = B_indptr[: block_pointer + 1] - B = sparse.csr_matrix((B_data, B_indices, B_indptr)) - alpha = self.alpha - - @njit - def _prox_gl(x, i, indices, indptr, d, step_size): - for b in range(indptr[i], indptr[i + 1]): - h = indices[b] - if B_data[B_indices[B_indptr[h]]] <= 0: - continue - ss = step_size * d[h] - norm = 0.0 - for j in range(B_indptr[h], B_indptr[h + 1]): - j_idx = B_indices[j] - norm += x[j_idx] ** 2 - norm = np.sqrt(norm) - if norm > alpha * ss: - for j in range(B_indptr[h], B_indptr[h + 1]): - j_idx = B_indices[j] - x[j_idx] *= 1 - alpha * ss / norm - else: - for j in range(B_indptr[h], B_indptr[h + 1]): - j_idx = B_indices[j] - x[j_idx] = 0.0 - - return _prox_gl, B - - -class GroupL1nc: - """ - Group Lasso penalty with NON-CONTIGUOUS and NON-OVERLAPPING groups - - Args: - alpha: float - Constant multiplying this loss - - blocks: list of lists + weights: (optional) + - If not passed, each group gets the same penalty (=1). (default) + - If 'nf', each groups gets a penalty equal to the square root of + the number of features indexed in it. + - If 'nfi', each group gets a penalty equal to the inverse of the + square root of the number of features indexed in it. + - If iterable, the i-th group gets a penalty equal to the i-th + entry of the passed iterable. """ - def __init__(self, alpha, groups): + def __init__(self, alpha, groups, weights=None): self.alpha = alpha sum_groups = np.sum([len(g) for g in groups]) @@ -165,17 +84,34 @@ def __init__(self, alpha, groups): n_unique = np.unique(all_indices).size if sum_groups != n_unique: raise ValueError('Groups must not overlap.') - self.groups = groups + self.groups = [list(g) for g in groups] + + if weights is None: + self.weights = np.ones(len(self.groups), dtype=np.float32) + elif isinstance(weights, str): + if weights == 'nf': + self.weights = np.asarray([np.sqrt(len(g)) for g in + self.groups]) + elif weights == 'nfi': + self.weights = np.asarray([1 / np.sqrt(len(g)) for g in + self.groups]) + else: + if len(weights) != len(self.groups): + raise ValueError('Length of weights must be equal to number ' + 'of groups.') + self.weights = np.asarray(weights) def __call__(self, x): - return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups]) + return self.alpha * np.sum([w * np.linalg.norm(x[g]) for w, g in + zip(self.weights, self.groups)]) def prox(self, x, step_size): out = x.copy() - for g in self.groups: + for w, g in zip(self.weights, self.groups): norm = np.linalg.norm(x[g]) - if norm > self.alpha * step_size: - out[g] -= step_size * self.alpha * out[g] / norm + r = w * self.alpha * step_size + if norm > r: + out[g] -= r * out[g] / norm else: out[g] = 0 return out @@ -222,7 +158,7 @@ def _prox_gl(x, i, indices, indptr, d, step_size): j_idx = B_indices[j] norm += x[j_idx] ** 2 norm = np.sqrt(norm) - if norm > alpha * ss: + if norm > alpha * ss * self.weights[h]: for j in range(B_indptr[h], B_indptr[h + 1]): j_idx = B_indices[j] x[j_idx] *= 1 - alpha * ss / norm diff --git a/tests/test_penalties.py b/tests/test_penalties.py index f13e39bb..92ea6dcb 100644 --- a/tests/test_penalties.py +++ b/tests/test_penalties.py @@ -9,7 +9,6 @@ proximal_penalties = [ copt.penalty.L1Norm(1.0), copt.penalty.GroupL1(1.0, np.array_split(np.arange(16), 5)), - copt.penalty.GroupL1nc(1.0, np.array_split(np.arange(16), 5)), copt.penalty.TraceNorm(1.0, (4, 4)), copt.constraint.TraceBall(1.0, (4, 4)), copt.penalty.TotalVariation2D(1.0, (4, 4)), @@ -18,76 +17,56 @@ def test_GroupL1(): + groups = [(0, 1), (1, 2)] + with np.testing.assert_raises(ValueError): + copt.penalty.GroupL1(1.0, groups) + groups = [(0, 1), (2, 3)] + weights = [1, 2, 3] + with np.testing.assert_raises(ValueError): + copt.penalty.GroupL1(1.0, groups, weights) + g1 = copt.penalty.GroupL1(1.0, groups) - _, B = g1.prox_factory(5) - assert np.all( - B.toarray() - == np.array( - [ - [1.0, 1.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 1.0, 1.0, 0.0], - [0.0, 0.0, 0.0, 0.0, -1.0], - ] - ) - ) + x = np.array([1., 2, 3, 4]) + gt = np.linalg.norm(x[[0, 1]]) + np.linalg.norm(x[[2, 3]]) + np.testing.assert_almost_equal(g1(x), gt) - groups = [(0, 1), (3, 4)] - g2 = copt.penalty.GroupL1(1.0, groups) - _, B = g2.prox_factory(5) - assert np.all( - B.toarray() - == np.array( - [ - [1.0, 1.0, 0.0, 0.0, 0.0], - [0.0, 0.0, -1.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 1.0, 1.0], - ] - ) - ) + gt = np.array([1., 1]) + np.testing.assert_almost_equal(g1.weights, gt) + g2 = copt.penalty.GroupL1(1.0, groups, 'nf') + gt = np.array([np.sqrt(2), np.sqrt(2)]) + np.testing.assert_almost_equal(g2.weights, gt) -# -# for blocks in [[(0, 1), (2, 3)], ]: -# pen = cp.utils.GroupL1(1., blocks) -# counter = 0 -# for g in pen.groups: -# for j in g: -# counter += 1 -# assert counter == blocks.size -# assert pen.groups -# for g in pen.groups: -# assert np.unique(blocks[g]).size == 1 + g3 = copt.penalty.GroupL1(1.0, groups, 'nfi') + gt = 1. / gt + np.testing.assert_almost_equal(g3.weights, gt) + gt = np.random.rand(len(groups)) + g4 = copt.penalty.GroupL1(1.0, groups, gt) + np.testing.assert_almost_equal(g4.weights, gt) -def test_GroupL1nc(): - groups = [(0, 1), (2, 3)] - g1 = copt.penalty.GroupL1nc(1.0, groups) _, B = g1.prox_factory(5) - assert np.all( - B.toarray() - == np.array( - [ - [1.0, 1.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 1.0, 1.0, 0.0], - [0.0, 0.0, 0.0, 0.0, -1.0], - ] - ) + gt = np.array( + [ + [1.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 0.0, -1.0], + ] ) + np.testing.assert_almost_equal(B.toarray(), gt) groups = [(0, 1), (3, 4)] - g2 = copt.penalty.GroupL1nc(1.0, groups) - _, B = g2.prox_factory(5) - assert np.all( - B.toarray() - == np.array( - [ - [1.0, 1.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 1.0, 1.0], - [0.0, 0.0, -1.0, 0.0, 0.0], - ] - ) + g5 = copt.penalty.GroupL1(1.0, groups) + _, B = g5.prox_factory(5) + gt = np.array( + [ + [1.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 1.0, 1.0], + [0.0, 0.0, -1.0, 0.0, 0.0], + ] ) + np.testing.assert_almost_equal(B.toarray(), gt) def test_tv1_prox(): From 25ba88161e1839d3325ba8db37ffd079d0e3ef8f Mon Sep 17 00:00:00 2001 From: FRIGO Matteo Date: Sat, 3 Apr 2021 16:36:27 +0200 Subject: [PATCH 3/4] separate penalty classes for overlapping and non-overlapping GroupL1 This commit adds the OGroupL1 class for the overlapping groups case. The class is independent w.r.t. GroupL1 as the computation of the proximal operator in the sparse case is quite delicate to deal with. The corresponding tests have been adapted and extended where possible. --- copt/penalty.py | 86 ++++++++++++++++++++++++++++++++++++++++ tests/test_penalties.py | 84 ++++++++++++++++++++++++++++++++++++--- tests/test_randomized.py | 4 +- 3 files changed, 167 insertions(+), 7 deletions(-) diff --git a/copt/penalty.py b/copt/penalty.py index 34cd84c4..edc36f19 100644 --- a/copt/penalty.py +++ b/copt/penalty.py @@ -170,6 +170,92 @@ def _prox_gl(x, i, indices, indptr, d, step_size): return _prox_gl, B +class OGroupL1: + """ + Overlapping Group Lasso penalty + Args: + alpha: float + Constant multiplying this loss + blocks: list of lists + """ + + def __init__(self, alpha, groups): + self.alpha = alpha + # groups need to be increasing + for i, g in enumerate(groups): + if not np.all(np.diff(g) == 1): + raise ValueError("Groups must be contiguous") + if i > 0 and groups[i - 1][-1] >= g[0]: + raise ValueError("Groups must be increasing") + self.groups = groups + + def __call__(self, x): + return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups]) + + def prox(self, x, step_size): + out = x.copy() + for g in self.groups: + norm = np.linalg.norm(x[g]) + if norm > self.alpha * step_size: + out[g] -= step_size * self.alpha * out[g] / norm + else: + out[g] = 0 + return out + + def prox_factory(self, n_features): + B_data = np.zeros(n_features) + B_indices = np.arange(n_features, dtype=np.int32) + B_indptr = np.zeros(n_features + 1, dtype=np.int32) + + feature_pointer = 0 + block_pointer = 0 + for g in self.groups: + while feature_pointer < g[0]: + # non-penalized feature + B_data[feature_pointer] = -1.0 + B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1 + feature_pointer += 1 + block_pointer += 1 + B_indptr[block_pointer + 1] = B_indptr[block_pointer] + for _ in g: + B_data[feature_pointer] = 1.0 + B_indptr[block_pointer + 1] += 1 + feature_pointer += 1 + block_pointer += 1 + for _ in range(feature_pointer, n_features): + B_data[feature_pointer] = -1.0 + B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1 + feature_pointer += 1 + block_pointer += 1 + + B_indptr = B_indptr[: block_pointer + 1] + B = sparse.csr_matrix((B_data, B_indices, B_indptr)) + alpha = self.alpha + + @njit + def _prox_gl(x, i, indices, indptr, d, step_size): + for b in range(indptr[i], indptr[i + 1]): + h = indices[b] + if B_data[B_indices[B_indptr[h]]] <= 0: + continue + ss = step_size * d[h] + norm = 0.0 + for j in range(B_indptr[h], B_indptr[h + 1]): + j_idx = B_indices[j] + norm += x[j_idx] ** 2 + norm = np.sqrt(norm) + if norm > alpha * ss: + for j in range(B_indptr[h], B_indptr[h + 1]): + j_idx = B_indices[j] + x[j_idx] *= 1 - alpha * ss / norm + else: + for j in range(B_indptr[h], B_indptr[h + 1]): + j_idx = B_indices[j] + x[j_idx] = 0.0 + + return _prox_gl, B + + class FusedLasso: """ Fused Lasso penalty diff --git a/tests/test_penalties.py b/tests/test_penalties.py index 92ea6dcb..ff270c61 100644 --- a/tests/test_penalties.py +++ b/tests/test_penalties.py @@ -9,6 +9,7 @@ proximal_penalties = [ copt.penalty.L1Norm(1.0), copt.penalty.GroupL1(1.0, np.array_split(np.arange(16), 5)), + copt.penalty.OGroupL1(1.0, np.array_split(np.arange(16), 5)), copt.penalty.TraceNorm(1.0, (4, 4)), copt.constraint.TraceBall(1.0, (4, 4)), copt.penalty.TotalVariation2D(1.0, (4, 4)), @@ -17,35 +18,66 @@ def test_GroupL1(): + # non-overlapping groups = [(0, 1), (1, 2)] with np.testing.assert_raises(ValueError): copt.penalty.GroupL1(1.0, groups) + # converts group type from tuple to list groups = [(0, 1), (2, 3)] + pen = copt.penalty.GroupL1(1.0, groups) + for g in pen.groups: + np.testing.assert_(isinstance(g, list)) + + # same number of groups and weights + groups = [[0, 1], [2, 3]] weights = [1, 2, 3] with np.testing.assert_raises(ValueError): copt.penalty.GroupL1(1.0, groups, weights) - g1 = copt.penalty.GroupL1(1.0, groups) - x = np.array([1., 2, 3, 4]) - gt = np.linalg.norm(x[[0, 1]]) + np.linalg.norm(x[[2, 3]]) - np.testing.assert_almost_equal(g1(x), gt) - + # evaluation of penalty and prox + x = np.array([0.01, 0.5, 3, 4]) + weights = np.array([10, .2]) + g0 = copt.penalty.GroupL1(1, groups, weights) + # eval + result = g0(x) + gt = (weights[0] * np.linalg.norm(x[groups[0]], 2) + + weights[1] * np.linalg.norm(x[groups[1]], 2)) + np.testing.assert_almost_equal(result, gt) + # prox + gt = x.copy() + # the first group has norm lower than the corresponding weight + gt[groups[0]] = 0 + # the second group has norm higher than the corresponding weight + gt[groups[1]] -= (x[groups[1]] * weights[1] / + np.linalg.norm(x[groups[1]])) + prox = g0.prox(x, 1) + np.testing.assert_almost_equal(prox, gt) + + # default weights + g1 = copt.penalty.GroupL1(1, groups) gt = np.array([1., 1]) np.testing.assert_almost_equal(g1.weights, gt) + # weights: sqrt(|g|) g2 = copt.penalty.GroupL1(1.0, groups, 'nf') gt = np.array([np.sqrt(2), np.sqrt(2)]) np.testing.assert_almost_equal(g2.weights, gt) + # weights: sqrt(|g|) ** -1 g3 = copt.penalty.GroupL1(1.0, groups, 'nfi') gt = 1. / gt np.testing.assert_almost_equal(g3.weights, gt) + # custom weights gt = np.random.rand(len(groups)) g4 = copt.penalty.GroupL1(1.0, groups, gt) np.testing.assert_almost_equal(g4.weights, gt) + expected = (np.linalg.norm(x[[0, 1]]) * gt[0] + + np.linalg.norm(x[[2, 3]]) * gt[1]) + np.testing.assert_almost_equal(g4(x), expected) + # sparse proximal _, B = g1.prox_factory(5) gt = np.array( [ @@ -69,6 +101,48 @@ def test_GroupL1(): np.testing.assert_almost_equal(B.toarray(), gt) +def test_OverlappingGroupL1(): + groups = [(0, 1), (2, 3)] + g1 = copt.penalty.OGroupL1(1.0, groups) + _, B = g1.prox_factory(5) + assert np.all( + B.toarray() + == np.array( + [ + [1.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 0.0, -1.0], + ] + ) + ) + + groups = [(0, 1), (3, 4)] + g2 = copt.penalty.OGroupL1(1.0, groups) + _, B = g2.prox_factory(5) + assert np.all( + B.toarray() + == np.array( + [ + [1.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, -1.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 1.0, 1.0], + ] + ) + ) + + +# for blocks in [[(0, 1), (2, 3)], ]: +# pen = cp.utils.GroupL1(1., blocks) +# counter = 0 +# for g in pen.groups: +# for j in g: +# counter += 1 +# assert counter == blocks.size +# assert pen.groups +# for g in pen.groups: +# assert np.unique(blocks[g]).size == 1 + + def test_tv1_prox(): """ Use the properties of strongly convex functions to test the implementation diff --git a/tests/test_randomized.py b/tests/test_randomized.py index 34d720e2..9c64684e 100644 --- a/tests/test_randomized.py +++ b/tests/test_randomized.py @@ -195,8 +195,8 @@ def test_vrtos_ogl(): groups_2 = [np.arange(5, 10)] f = copt.loss.LogLoss(A, b, alpha) for beta in np.logspace(-3, 3, 3): - p_1 = copt.penalty.GroupL1(beta, groups_1) - p_2 = copt.penalty.GroupL1(beta, groups_2) + p_1 = copt.penalty.OGroupL1(beta, groups_1) + p_2 = copt.penalty.OGroupL1(beta, groups_2) L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density opt_vrtos = cp.minimize_vrtos( From be2c4e921555e606df7ff1e9ae9ee0cf447d469d Mon Sep 17 00:00:00 2001 From: FRIGO Matteo Date: Tue, 6 Apr 2021 10:23:09 +0200 Subject: [PATCH 4/4] remove overlapping group lasso penalty The current implementation of the overlapping group lass is incorrect. This commit removes it from the codebase. The corresponding tests are also removed, with the exception of a test for the VRTOS solver involving overlapping group lasso penalties, which is now commented. Minor PEP8 issues are solved. --- copt/penalty.py | 86 ---------------------------------------- tests/test_penalties.py | 43 -------------------- tests/test_randomized.py | 66 +++++++++++++++--------------- 3 files changed, 33 insertions(+), 162 deletions(-) diff --git a/copt/penalty.py b/copt/penalty.py index edc36f19..34cd84c4 100644 --- a/copt/penalty.py +++ b/copt/penalty.py @@ -170,92 +170,6 @@ def _prox_gl(x, i, indices, indptr, d, step_size): return _prox_gl, B -class OGroupL1: - """ - Overlapping Group Lasso penalty - Args: - alpha: float - Constant multiplying this loss - blocks: list of lists - """ - - def __init__(self, alpha, groups): - self.alpha = alpha - # groups need to be increasing - for i, g in enumerate(groups): - if not np.all(np.diff(g) == 1): - raise ValueError("Groups must be contiguous") - if i > 0 and groups[i - 1][-1] >= g[0]: - raise ValueError("Groups must be increasing") - self.groups = groups - - def __call__(self, x): - return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups]) - - def prox(self, x, step_size): - out = x.copy() - for g in self.groups: - norm = np.linalg.norm(x[g]) - if norm > self.alpha * step_size: - out[g] -= step_size * self.alpha * out[g] / norm - else: - out[g] = 0 - return out - - def prox_factory(self, n_features): - B_data = np.zeros(n_features) - B_indices = np.arange(n_features, dtype=np.int32) - B_indptr = np.zeros(n_features + 1, dtype=np.int32) - - feature_pointer = 0 - block_pointer = 0 - for g in self.groups: - while feature_pointer < g[0]: - # non-penalized feature - B_data[feature_pointer] = -1.0 - B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1 - feature_pointer += 1 - block_pointer += 1 - B_indptr[block_pointer + 1] = B_indptr[block_pointer] - for _ in g: - B_data[feature_pointer] = 1.0 - B_indptr[block_pointer + 1] += 1 - feature_pointer += 1 - block_pointer += 1 - for _ in range(feature_pointer, n_features): - B_data[feature_pointer] = -1.0 - B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1 - feature_pointer += 1 - block_pointer += 1 - - B_indptr = B_indptr[: block_pointer + 1] - B = sparse.csr_matrix((B_data, B_indices, B_indptr)) - alpha = self.alpha - - @njit - def _prox_gl(x, i, indices, indptr, d, step_size): - for b in range(indptr[i], indptr[i + 1]): - h = indices[b] - if B_data[B_indices[B_indptr[h]]] <= 0: - continue - ss = step_size * d[h] - norm = 0.0 - for j in range(B_indptr[h], B_indptr[h + 1]): - j_idx = B_indices[j] - norm += x[j_idx] ** 2 - norm = np.sqrt(norm) - if norm > alpha * ss: - for j in range(B_indptr[h], B_indptr[h + 1]): - j_idx = B_indices[j] - x[j_idx] *= 1 - alpha * ss / norm - else: - for j in range(B_indptr[h], B_indptr[h + 1]): - j_idx = B_indices[j] - x[j_idx] = 0.0 - - return _prox_gl, B - - class FusedLasso: """ Fused Lasso penalty diff --git a/tests/test_penalties.py b/tests/test_penalties.py index ff270c61..214f82a3 100644 --- a/tests/test_penalties.py +++ b/tests/test_penalties.py @@ -9,7 +9,6 @@ proximal_penalties = [ copt.penalty.L1Norm(1.0), copt.penalty.GroupL1(1.0, np.array_split(np.arange(16), 5)), - copt.penalty.OGroupL1(1.0, np.array_split(np.arange(16), 5)), copt.penalty.TraceNorm(1.0, (4, 4)), copt.constraint.TraceBall(1.0, (4, 4)), copt.penalty.TotalVariation2D(1.0, (4, 4)), @@ -101,48 +100,6 @@ def test_GroupL1(): np.testing.assert_almost_equal(B.toarray(), gt) -def test_OverlappingGroupL1(): - groups = [(0, 1), (2, 3)] - g1 = copt.penalty.OGroupL1(1.0, groups) - _, B = g1.prox_factory(5) - assert np.all( - B.toarray() - == np.array( - [ - [1.0, 1.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 1.0, 1.0, 0.0], - [0.0, 0.0, 0.0, 0.0, -1.0], - ] - ) - ) - - groups = [(0, 1), (3, 4)] - g2 = copt.penalty.OGroupL1(1.0, groups) - _, B = g2.prox_factory(5) - assert np.all( - B.toarray() - == np.array( - [ - [1.0, 1.0, 0.0, 0.0, 0.0], - [0.0, 0.0, -1.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 1.0, 1.0], - ] - ) - ) - - -# for blocks in [[(0, 1), (2, 3)], ]: -# pen = cp.utils.GroupL1(1., blocks) -# counter = 0 -# for g in pen.groups: -# for j in g: -# counter += 1 -# assert counter == blocks.size -# assert pen.groups -# for g in pen.groups: -# assert np.unique(blocks[g]).size == 1 - - def test_tv1_prox(): """ Use the properties of strongly convex functions to test the implementation diff --git a/tests/test_randomized.py b/tests/test_randomized.py index 9c64684e..0e7828c5 100644 --- a/tests/test_randomized.py +++ b/tests/test_randomized.py @@ -1,10 +1,10 @@ import numpy as np +import pytest from scipy import sparse + import copt as cp import copt.loss import copt.penalty -from copt import randomized -import pytest np.random.seed(0) n_samples, n_features = 20, 10 @@ -188,37 +188,37 @@ def test_gl(groups): assert np.linalg.norm(grad_map) < 1e-6 -def test_vrtos_ogl(): - """Test on overlapping group lasso""" - alpha = 1.0 / n_samples - groups_1 = [np.arange(8)] - groups_2 = [np.arange(5, 10)] - f = copt.loss.LogLoss(A, b, alpha) - for beta in np.logspace(-3, 3, 3): - p_1 = copt.penalty.OGroupL1(beta, groups_1) - p_2 = copt.penalty.OGroupL1(beta, groups_2) - L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density - - opt_vrtos = cp.minimize_vrtos( - f.partial_deriv, - A, - b, - np.zeros(n_features), - 1 / (3 * L), - alpha=alpha, - max_iter=200, - prox_1=p_1.prox_factory(n_features), - prox_2=p_2.prox_factory(n_features), - ) - - opt_tos = cp.minimize_three_split( - f.f_grad, np.zeros(n_features), prox_1=p_1.prox, prox_2=p_2.prox - ) - - norm = np.linalg.norm(opt_tos.x) - if norm < 1e-10: - norm = 1 - assert np.linalg.norm(opt_vrtos.x - opt_tos.x) / norm < 1e-4 +# def test_vrtos_ogl(): +# """Test on overlapping group lasso""" +# alpha = 1.0 / n_samples +# groups_1 = [np.arange(8)] +# groups_2 = [np.arange(5, 10)] +# f = copt.loss.LogLoss(A, b, alpha) +# for beta in np.logspace(-3, 3, 3): +# p_1 = copt.penalty.OGroupL1(beta, groups_1) +# p_2 = copt.penalty.OGroupL1(beta, groups_2) +# L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density +# +# opt_vrtos = cp.minimize_vrtos( +# f.partial_deriv, +# A, +# b, +# np.zeros(n_features), +# 1 / (3 * L), +# alpha=alpha, +# max_iter=200, +# prox_1=p_1.prox_factory(n_features), +# prox_2=p_2.prox_factory(n_features), +# ) +# +# opt_tos = cp.minimize_three_split( +# f.f_grad, np.zeros(n_features), prox_1=p_1.prox, prox_2=p_2.prox +# ) +# +# norm = np.linalg.norm(opt_tos.x) +# if norm < 1e-10: +# norm = 1 +# assert np.linalg.norm(opt_vrtos.x - opt_tos.x) / norm < 1e-4 @pytest.mark.parametrize("A_data", [A, A2])