From 911f69293dbabb275ff5fa812697270de301decb Mon Sep 17 00:00:00 2001
From: FRIGO Matteo <frigo.mat@live.com>
Date: Mon, 29 Mar 2021 11:39:38 +0200
Subject: [PATCH 1/4] FEAT: implementation of non-contiguous group lasso
 penalty

This commit adds a new class to the copt.penalty module which defines
a non-contiguous version of the existing GroupL1 penalty. The main
differences between the two are:
- Parsing of the input. GroupL1nc requires just non-overlapping groups.
- Definition of block matrix adapted to non-contiguous setting.
- Evaluation of _prox_gl adapted to non-contiguous setting.
- Result of simple test adapted to new block matrix.

Minor style/PEP8 improvements have been applied to the modified files.
---
 copt/penalty.py         | 93 ++++++++++++++++++++++++++++++++++++++++-
 tests/test_penalties.py | 46 ++++++++++++++++----
 2 files changed, 131 insertions(+), 8 deletions(-)

diff --git a/copt/penalty.py b/copt/penalty.py
index b9774f39..af9eb962 100644
--- a/copt/penalty.py
+++ b/copt/penalty.py
@@ -143,6 +143,97 @@ def _prox_gl(x, i, indices, indptr, d, step_size):
         return _prox_gl, B
 
 
+class GroupL1nc:
+    """
+    Group Lasso penalty with NON-CONTIGUOUS and NON-OVERLAPPING groups
+
+    Args:
+        alpha: float
+            Constant multiplying this loss
+
+        blocks: list of lists
+
+    """
+
+    def __init__(self, alpha, groups):
+        self.alpha = alpha
+
+        sum_groups = np.sum([len(g) for g in groups])
+        all_indices = list(groups[0])
+        for g in groups[1:]:
+            all_indices.extend(list(g))
+        n_unique = np.unique(all_indices).size
+        if sum_groups != n_unique:
+            raise ValueError('Groups must not overlap.')
+        self.groups = groups
+
+    def __call__(self, x):
+        return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups])
+
+    def prox(self, x, step_size):
+        out = x.copy()
+        for g in self.groups:
+            norm = np.linalg.norm(x[g])
+            if norm > self.alpha * step_size:
+                out[g] -= step_size * self.alpha * out[g] / norm
+            else:
+                out[g] = 0
+        return out
+
+    def prox_factory(self, n_features):
+        B_data = np.zeros(n_features)
+        B_indices = np.zeros(n_features, dtype=np.int32)
+        B_indptr = np.zeros(n_features + 1, dtype=np.int32)
+
+        feature_pointer = 0
+        block_pointer = 0
+        for g in self.groups:
+            for atom in g:
+                B_data[feature_pointer] = 1.
+                B_indices[feature_pointer] = atom
+                feature_pointer += 1
+            B_indptr[block_pointer + 1] = B_indptr[block_pointer] + len(g)
+            block_pointer += 1
+
+        excluded_indices = np.ones(n_features, dtype=np.int32)
+        excluded_indices[B_indices[: feature_pointer + 1]] = 0.
+        for i in np.where(excluded_indices)[0]:
+            B_data[feature_pointer] = -1.
+            B_indices[feature_pointer] = i
+            feature_pointer += 1
+
+            B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
+            block_pointer += 1
+
+        B_indptr = B_indptr[: block_pointer + 1]
+        B = sparse.csr_matrix((B_data, B_indices, B_indptr))
+
+        alpha = self.alpha
+
+        @njit
+        def _prox_gl(x, i, indices, indptr, d, step_size):
+            for b in range(indptr[i], indptr[i + 1]):
+                h = indices[b]
+                if B_data[B_indices[B_indptr[h]]] <= 0:
+                    continue
+                ss = step_size * d[h]
+                norm = 0.0
+                for j in range(B_indptr[h], B_indptr[h + 1]):
+                    j_idx = B_indices[j]
+                    norm += x[j_idx] ** 2
+                norm = np.sqrt(norm)
+                if norm > alpha * ss:
+                    for j in range(B_indptr[h], B_indptr[h + 1]):
+                        j_idx = B_indices[j]
+                        x[j_idx] *= 1 - alpha * ss / norm
+                else:
+                    for j in range(B_indptr[h], B_indptr[h + 1]):
+                        j_idx = B_indices[j]
+                        x[j_idx] = 0.0
+
+        return _prox_gl, B
+
+
 class FusedLasso:
     """
     Fused Lasso penalty
@@ -304,4 +395,4 @@ def prox(self, x, step_size):
             self.n_cols,
             max_iter=self.max_iter,
             tol=self.tol,
-        )
\ No newline at end of file
+        )
diff --git a/tests/test_penalties.py b/tests/test_penalties.py
index a67de838..f13e39bb 100644
--- a/tests/test_penalties.py
+++ b/tests/test_penalties.py
@@ -1,14 +1,15 @@
 import numpy as np
-import copt as cp
+import pytest
+from numpy import testing
+
 import copt.constraint
 import copt.penalty
 from copt import tv_prox
-from numpy import testing
-import pytest
 
 proximal_penalties = [
     copt.penalty.L1Norm(1.0),
     copt.penalty.GroupL1(1.0, np.array_split(np.arange(16), 5)),
+    copt.penalty.GroupL1nc(1.0, np.array_split(np.arange(16), 5)),
     copt.penalty.TraceNorm(1.0, (4, 4)),
     copt.constraint.TraceBall(1.0, (4, 4)),
     copt.penalty.TotalVariation2D(1.0, (4, 4)),
@@ -59,6 +60,36 @@ def test_GroupL1():
 #             assert np.unique(blocks[g]).size == 1
 
 
+def test_GroupL1nc():
+    groups = [(0, 1), (2, 3)]
+    g1 = copt.penalty.GroupL1nc(1.0, groups)
+    _, B = g1.prox_factory(5)
+    assert np.all(
+        B.toarray()
+        == np.array(
+            [
+                [1.0, 1.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 1.0, 1.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0, -1.0],
+            ]
+        )
+    )
+
+    groups = [(0, 1), (3, 4)]
+    g2 = copt.penalty.GroupL1nc(1.0, groups)
+    _, B = g2.prox_factory(5)
+    assert np.all(
+        B.toarray()
+        == np.array(
+            [
+                [1.0, 1.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 1.0, 1.0],
+                [0.0, 0.0, -1.0, 0.0, 0.0],
+            ]
+        )
+    )
+
+
 def test_tv1_prox():
     """
     Use the properties of strongly convex functions to test the implementation
@@ -99,7 +130,8 @@ def tv_norm(x, n_rows, n_cols):
 
     for nrun in range(20):
         x = np.random.randn(n_features)
-        x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10, max_iter=10000)
+        x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10,
+                                   max_iter=10000)
         diff_obj = tv_norm(x, n_rows, n_cols) - tv_norm(x_next, n_rows, n_cols)
         testing.assert_array_less(
             ((x - x_next) ** 2).sum() / gamma, (1 + epsilon) * diff_obj
@@ -137,8 +169,8 @@ def test_three_inequality(pen):
 
         lhs = 2 * (pen(xi) - pen(u))
         rhs = (
-            np.linalg.norm(u - z) ** 2
-            - np.linalg.norm(u - xi) ** 2
-            - np.linalg.norm(xi - z) ** 2
+                np.linalg.norm(u - z) ** 2
+                - np.linalg.norm(u - xi) ** 2
+                - np.linalg.norm(xi - z) ** 2
         )
         assert lhs <= rhs, pen

From 8478f5ca504e1b701b5184bf313988dec7764555 Mon Sep 17 00:00:00 2001
From: FRIGO Matteo <frigo.mat@live.com>
Date: Fri, 2 Apr 2021 14:39:48 +0200
Subject: [PATCH 2/4] merge GroupL1 and GroupL1nc classes and generalise
 group-wise penalty

This commit merges the GroupL1 classes for contiguous and non-contiguous
groups, adding the possibility to apply group-specific penalties.

The commit also adds the necessary tests and improves the existing ones.

Minor PEP8 issues are fixed.
---
 copt/penalty.py         | 130 ++++++++++------------------------------
 tests/test_penalties.py |  97 ++++++++++++------------------
 2 files changed, 71 insertions(+), 156 deletions(-)

diff --git a/copt/penalty.py b/copt/penalty.py
index af9eb962..34cd84c4 100644
--- a/copt/penalty.py
+++ b/copt/penalty.py
@@ -55,7 +55,7 @@ def _prox_L1(x, i, indices, indptr, d, step_size):
 
 class GroupL1:
     """
-    Group Lasso penalty
+    Group Lasso penalty for non-overlapping groups
 
     Args:
         alpha: float
@@ -63,99 +63,18 @@ class GroupL1:
 
         blocks: list of lists
 
-    """
-
-    def __init__(self, alpha, groups):
-        self.alpha = alpha
-        # groups need to be increasing
-        for i, g in enumerate(groups):
-            if not np.all(np.diff(g) == 1):
-                raise ValueError("Groups must be contiguous")
-            if i > 0 and groups[i - 1][-1] >= g[0]:
-                raise ValueError("Groups must be increasing")
-        self.groups = groups
-
-    def __call__(self, x):
-        return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups])
-
-    def prox(self, x, step_size):
-        out = x.copy()
-        for g in self.groups:
-
-            norm = np.linalg.norm(x[g])
-            if norm > self.alpha * step_size:
-                out[g] -= step_size * self.alpha * out[g] / norm
-            else:
-                out[g] = 0
-        return out
-
-    def prox_factory(self, n_features):
-        B_data = np.zeros(n_features)
-        B_indices = np.arange(n_features, dtype=np.int32)
-        B_indptr = np.zeros(n_features + 1, dtype=np.int32)
-
-        feature_pointer = 0
-        block_pointer = 0
-        for g in self.groups:
-            while feature_pointer < g[0]:
-                # non-penalized feature
-                B_data[feature_pointer] = -1.0
-                B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
-                feature_pointer += 1
-                block_pointer += 1
-            B_indptr[block_pointer + 1] = B_indptr[block_pointer]
-            for _ in g:
-                B_data[feature_pointer] = 1.0
-                B_indptr[block_pointer + 1] += 1
-                feature_pointer += 1
-            block_pointer += 1
-        for _ in range(feature_pointer, n_features):
-            B_data[feature_pointer] = -1.0
-            B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
-            feature_pointer += 1
-            block_pointer += 1
-
-        B_indptr = B_indptr[: block_pointer + 1]
-        B = sparse.csr_matrix((B_data, B_indices, B_indptr))
-        alpha = self.alpha
-
-        @njit
-        def _prox_gl(x, i, indices, indptr, d, step_size):
-            for b in range(indptr[i], indptr[i + 1]):
-                h = indices[b]
-                if B_data[B_indices[B_indptr[h]]] <= 0:
-                    continue
-                ss = step_size * d[h]
-                norm = 0.0
-                for j in range(B_indptr[h], B_indptr[h + 1]):
-                    j_idx = B_indices[j]
-                    norm += x[j_idx] ** 2
-                norm = np.sqrt(norm)
-                if norm > alpha * ss:
-                    for j in range(B_indptr[h], B_indptr[h + 1]):
-                        j_idx = B_indices[j]
-                        x[j_idx] *= 1 - alpha * ss / norm
-                else:
-                    for j in range(B_indptr[h], B_indptr[h + 1]):
-                        j_idx = B_indices[j]
-                        x[j_idx] = 0.0
-
-        return _prox_gl, B
-
-
-class GroupL1nc:
-    """
-    Group Lasso penalty with NON-CONTIGUOUS and NON-OVERLAPPING groups
-
-    Args:
-        alpha: float
-            Constant multiplying this loss
-
-        blocks: list of lists
+        weights: (optional)
+            - If not passed, each group gets the same penalty (=1). (default)
+            - If 'nf', each groups gets a penalty equal to the square root of
+              the number of features indexed in it.
+            - If 'nfi', each group gets a penalty equal to the inverse of the
+              square root of the number of features indexed in it.
+            - If iterable, the i-th group gets a penalty equal to the i-th
+              entry of the passed iterable.
 
     """
 
-    def __init__(self, alpha, groups):
+    def __init__(self, alpha, groups, weights=None):
         self.alpha = alpha
 
         sum_groups = np.sum([len(g) for g in groups])
@@ -165,17 +84,34 @@ def __init__(self, alpha, groups):
         n_unique = np.unique(all_indices).size
         if sum_groups != n_unique:
             raise ValueError('Groups must not overlap.')
-        self.groups = groups
+        self.groups = [list(g) for g in groups]
+
+        if weights is None:
+            self.weights = np.ones(len(self.groups), dtype=np.float32)
+        elif isinstance(weights, str):
+            if weights == 'nf':
+                self.weights = np.asarray([np.sqrt(len(g)) for g in
+                                           self.groups])
+            elif weights == 'nfi':
+                self.weights = np.asarray([1 / np.sqrt(len(g)) for g in
+                                           self.groups])
+        else:
+            if len(weights) != len(self.groups):
+                raise ValueError('Length of weights must be equal to number '
+                                 'of groups.')
+            self.weights = np.asarray(weights)
 
     def __call__(self, x):
-        return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups])
+        return self.alpha * np.sum([w * np.linalg.norm(x[g]) for w, g in
+                                    zip(self.weights, self.groups)])
 
     def prox(self, x, step_size):
         out = x.copy()
-        for g in self.groups:
+        for w, g in zip(self.weights, self.groups):
             norm = np.linalg.norm(x[g])
-            if norm > self.alpha * step_size:
-                out[g] -= step_size * self.alpha * out[g] / norm
+            r = w * self.alpha * step_size
+            if norm > r:
+                out[g] -= r * out[g] / norm
             else:
                 out[g] = 0
         return out
@@ -222,7 +158,7 @@ def _prox_gl(x, i, indices, indptr, d, step_size):
                     j_idx = B_indices[j]
                     norm += x[j_idx] ** 2
                 norm = np.sqrt(norm)
-                if norm > alpha * ss:
+                if norm > alpha * ss * self.weights[h]:
                     for j in range(B_indptr[h], B_indptr[h + 1]):
                         j_idx = B_indices[j]
                         x[j_idx] *= 1 - alpha * ss / norm
diff --git a/tests/test_penalties.py b/tests/test_penalties.py
index f13e39bb..92ea6dcb 100644
--- a/tests/test_penalties.py
+++ b/tests/test_penalties.py
@@ -9,7 +9,6 @@
 proximal_penalties = [
     copt.penalty.L1Norm(1.0),
     copt.penalty.GroupL1(1.0, np.array_split(np.arange(16), 5)),
-    copt.penalty.GroupL1nc(1.0, np.array_split(np.arange(16), 5)),
     copt.penalty.TraceNorm(1.0, (4, 4)),
     copt.constraint.TraceBall(1.0, (4, 4)),
     copt.penalty.TotalVariation2D(1.0, (4, 4)),
@@ -18,76 +17,56 @@
 
 
 def test_GroupL1():
+    groups = [(0, 1), (1, 2)]
+    with np.testing.assert_raises(ValueError):
+        copt.penalty.GroupL1(1.0, groups)
+
     groups = [(0, 1), (2, 3)]
+    weights = [1, 2, 3]
+    with np.testing.assert_raises(ValueError):
+        copt.penalty.GroupL1(1.0, groups, weights)
+
     g1 = copt.penalty.GroupL1(1.0, groups)
-    _, B = g1.prox_factory(5)
-    assert np.all(
-        B.toarray()
-        == np.array(
-            [
-                [1.0, 1.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 1.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0, -1.0],
-            ]
-        )
-    )
+    x = np.array([1., 2, 3, 4])
+    gt = np.linalg.norm(x[[0, 1]]) + np.linalg.norm(x[[2, 3]])
+    np.testing.assert_almost_equal(g1(x), gt)
 
-    groups = [(0, 1), (3, 4)]
-    g2 = copt.penalty.GroupL1(1.0, groups)
-    _, B = g2.prox_factory(5)
-    assert np.all(
-        B.toarray()
-        == np.array(
-            [
-                [1.0, 1.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, -1.0, 0.0, 0.0],
-                [0.0, 0.0, 0.0, 1.0, 1.0],
-            ]
-        )
-    )
+    gt = np.array([1., 1])
+    np.testing.assert_almost_equal(g1.weights, gt)
 
+    g2 = copt.penalty.GroupL1(1.0, groups, 'nf')
+    gt = np.array([np.sqrt(2), np.sqrt(2)])
+    np.testing.assert_almost_equal(g2.weights, gt)
 
-#
-#     for blocks in [[(0, 1), (2, 3)], ]:
-#         pen = cp.utils.GroupL1(1., blocks)
-#         counter = 0
-#         for g in pen.groups:
-#             for j in g:
-#                 counter += 1
-#         assert counter == blocks.size
-#         assert pen.groups
-#         for g in pen.groups:
-#             assert np.unique(blocks[g]).size == 1
+    g3 = copt.penalty.GroupL1(1.0, groups, 'nfi')
+    gt = 1. / gt
+    np.testing.assert_almost_equal(g3.weights, gt)
 
+    gt = np.random.rand(len(groups))
+    g4 = copt.penalty.GroupL1(1.0, groups, gt)
+    np.testing.assert_almost_equal(g4.weights, gt)
 
-def test_GroupL1nc():
-    groups = [(0, 1), (2, 3)]
-    g1 = copt.penalty.GroupL1nc(1.0, groups)
     _, B = g1.prox_factory(5)
-    assert np.all(
-        B.toarray()
-        == np.array(
-            [
-                [1.0, 1.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 1.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0, -1.0],
-            ]
-        )
+    gt = np.array(
+        [
+            [1.0, 1.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 1.0, 1.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, -1.0],
+        ]
     )
+    np.testing.assert_almost_equal(B.toarray(), gt)
 
     groups = [(0, 1), (3, 4)]
-    g2 = copt.penalty.GroupL1nc(1.0, groups)
-    _, B = g2.prox_factory(5)
-    assert np.all(
-        B.toarray()
-        == np.array(
-            [
-                [1.0, 1.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 0.0, 1.0, 1.0],
-                [0.0, 0.0, -1.0, 0.0, 0.0],
-            ]
-        )
+    g5 = copt.penalty.GroupL1(1.0, groups)
+    _, B = g5.prox_factory(5)
+    gt = np.array(
+        [
+            [1.0, 1.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 1.0, 1.0],
+            [0.0, 0.0, -1.0, 0.0, 0.0],
+        ]
     )
+    np.testing.assert_almost_equal(B.toarray(), gt)
 
 
 def test_tv1_prox():

From 25ba88161e1839d3325ba8db37ffd079d0e3ef8f Mon Sep 17 00:00:00 2001
From: FRIGO Matteo <frigo.mat@live.com>
Date: Sat, 3 Apr 2021 16:36:27 +0200
Subject: [PATCH 3/4] separate penalty classes for overlapping and
 non-overlapping GroupL1

This commit adds the OGroupL1 class for the overlapping groups case.
The class is independent w.r.t. GroupL1 as the computation of the
proximal operator in the sparse case is quite delicate to deal with.

The corresponding tests have been adapted and extended where possible.
---
 copt/penalty.py          | 86 ++++++++++++++++++++++++++++++++++++++++
 tests/test_penalties.py  | 84 ++++++++++++++++++++++++++++++++++++---
 tests/test_randomized.py |  4 +-
 3 files changed, 167 insertions(+), 7 deletions(-)

diff --git a/copt/penalty.py b/copt/penalty.py
index 34cd84c4..edc36f19 100644
--- a/copt/penalty.py
+++ b/copt/penalty.py
@@ -170,6 +170,92 @@ def _prox_gl(x, i, indices, indptr, d, step_size):
         return _prox_gl, B
 
 
+class OGroupL1:
+    """
+    Overlapping Group Lasso penalty
+    Args:
+        alpha: float
+            Constant multiplying this loss
+        blocks: list of lists
+    """
+
+    def __init__(self, alpha, groups):
+        self.alpha = alpha
+        # groups need to be increasing
+        for i, g in enumerate(groups):
+            if not np.all(np.diff(g) == 1):
+                raise ValueError("Groups must be contiguous")
+            if i > 0 and groups[i - 1][-1] >= g[0]:
+                raise ValueError("Groups must be increasing")
+        self.groups = groups
+
+    def __call__(self, x):
+        return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups])
+
+    def prox(self, x, step_size):
+        out = x.copy()
+        for g in self.groups:
+            norm = np.linalg.norm(x[g])
+            if norm > self.alpha * step_size:
+                out[g] -= step_size * self.alpha * out[g] / norm
+            else:
+                out[g] = 0
+        return out
+
+    def prox_factory(self, n_features):
+        B_data = np.zeros(n_features)
+        B_indices = np.arange(n_features, dtype=np.int32)
+        B_indptr = np.zeros(n_features + 1, dtype=np.int32)
+
+        feature_pointer = 0
+        block_pointer = 0
+        for g in self.groups:
+            while feature_pointer < g[0]:
+                # non-penalized feature
+                B_data[feature_pointer] = -1.0
+                B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
+                feature_pointer += 1
+                block_pointer += 1
+            B_indptr[block_pointer + 1] = B_indptr[block_pointer]
+            for _ in g:
+                B_data[feature_pointer] = 1.0
+                B_indptr[block_pointer + 1] += 1
+                feature_pointer += 1
+            block_pointer += 1
+        for _ in range(feature_pointer, n_features):
+            B_data[feature_pointer] = -1.0
+            B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
+            feature_pointer += 1
+            block_pointer += 1
+
+        B_indptr = B_indptr[: block_pointer + 1]
+        B = sparse.csr_matrix((B_data, B_indices, B_indptr))
+        alpha = self.alpha
+
+        @njit
+        def _prox_gl(x, i, indices, indptr, d, step_size):
+            for b in range(indptr[i], indptr[i + 1]):
+                h = indices[b]
+                if B_data[B_indices[B_indptr[h]]] <= 0:
+                    continue
+                ss = step_size * d[h]
+                norm = 0.0
+                for j in range(B_indptr[h], B_indptr[h + 1]):
+                    j_idx = B_indices[j]
+                    norm += x[j_idx] ** 2
+                norm = np.sqrt(norm)
+                if norm > alpha * ss:
+                    for j in range(B_indptr[h], B_indptr[h + 1]):
+                        j_idx = B_indices[j]
+                        x[j_idx] *= 1 - alpha * ss / norm
+                else:
+                    for j in range(B_indptr[h], B_indptr[h + 1]):
+                        j_idx = B_indices[j]
+                        x[j_idx] = 0.0
+
+        return _prox_gl, B
+
+
 class FusedLasso:
     """
     Fused Lasso penalty
diff --git a/tests/test_penalties.py b/tests/test_penalties.py
index 92ea6dcb..ff270c61 100644
--- a/tests/test_penalties.py
+++ b/tests/test_penalties.py
@@ -9,6 +9,7 @@
 proximal_penalties = [
     copt.penalty.L1Norm(1.0),
     copt.penalty.GroupL1(1.0, np.array_split(np.arange(16), 5)),
+    copt.penalty.OGroupL1(1.0, np.array_split(np.arange(16), 5)),
     copt.penalty.TraceNorm(1.0, (4, 4)),
     copt.constraint.TraceBall(1.0, (4, 4)),
     copt.penalty.TotalVariation2D(1.0, (4, 4)),
@@ -17,35 +18,66 @@
 
 
 def test_GroupL1():
+    # non-overlapping
     groups = [(0, 1), (1, 2)]
     with np.testing.assert_raises(ValueError):
         copt.penalty.GroupL1(1.0, groups)
 
+    # converts group type from tuple to list
     groups = [(0, 1), (2, 3)]
+    pen = copt.penalty.GroupL1(1.0, groups)
+    for g in pen.groups:
+        np.testing.assert_(isinstance(g, list))
+
+    # same number of groups and weights
+    groups = [[0, 1], [2, 3]]
     weights = [1, 2, 3]
     with np.testing.assert_raises(ValueError):
         copt.penalty.GroupL1(1.0, groups, weights)
 
-    g1 = copt.penalty.GroupL1(1.0, groups)
-    x = np.array([1., 2, 3, 4])
-    gt = np.linalg.norm(x[[0, 1]]) + np.linalg.norm(x[[2, 3]])
-    np.testing.assert_almost_equal(g1(x), gt)
-
+    # evaluation of penalty and prox
+    x = np.array([0.01, 0.5, 3, 4])
+    weights = np.array([10, .2])
+    g0 = copt.penalty.GroupL1(1, groups, weights)
+    # eval
+    result = g0(x)
+    gt = (weights[0] * np.linalg.norm(x[groups[0]], 2) +
+          weights[1] * np.linalg.norm(x[groups[1]], 2))
+    np.testing.assert_almost_equal(result, gt)
+    # prox
+    gt = x.copy()
+    # the first group has norm lower than the corresponding weight
+    gt[groups[0]] = 0
+    # the second group has norm higher than the corresponding weight
+    gt[groups[1]] -= (x[groups[1]] * weights[1] /
+                      np.linalg.norm(x[groups[1]]))
+    prox = g0.prox(x, 1)
+    np.testing.assert_almost_equal(prox, gt)
+
+    # default weights
+    g1 = copt.penalty.GroupL1(1, groups)
     gt = np.array([1., 1])
     np.testing.assert_almost_equal(g1.weights, gt)
 
+    # weights: sqrt(|g|)
     g2 = copt.penalty.GroupL1(1.0, groups, 'nf')
     gt = np.array([np.sqrt(2), np.sqrt(2)])
     np.testing.assert_almost_equal(g2.weights, gt)
 
+    # weights: sqrt(|g|) ** -1
     g3 = copt.penalty.GroupL1(1.0, groups, 'nfi')
     gt = 1. / gt
     np.testing.assert_almost_equal(g3.weights, gt)
 
+    # custom weights
     gt = np.random.rand(len(groups))
     g4 = copt.penalty.GroupL1(1.0, groups, gt)
     np.testing.assert_almost_equal(g4.weights, gt)
+    expected = (np.linalg.norm(x[[0, 1]]) * gt[0] +
+                np.linalg.norm(x[[2, 3]]) * gt[1])
+    np.testing.assert_almost_equal(g4(x), expected)
 
+    # sparse proximal
     _, B = g1.prox_factory(5)
     gt = np.array(
         [
@@ -69,6 +101,48 @@ def test_GroupL1():
     np.testing.assert_almost_equal(B.toarray(), gt)
 
 
+def test_OverlappingGroupL1():
+    groups = [(0, 1), (2, 3)]
+    g1 = copt.penalty.OGroupL1(1.0, groups)
+    _, B = g1.prox_factory(5)
+    assert np.all(
+        B.toarray()
+        == np.array(
+            [
+                [1.0, 1.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 1.0, 1.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0, -1.0],
+            ]
+        )
+    )
+
+    groups = [(0, 1), (3, 4)]
+    g2 = copt.penalty.OGroupL1(1.0, groups)
+    _, B = g2.prox_factory(5)
+    assert np.all(
+        B.toarray()
+        == np.array(
+            [
+                [1.0, 1.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, -1.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 1.0, 1.0],
+            ]
+        )
+    )
+
+
+#     for blocks in [[(0, 1), (2, 3)], ]:
+#         pen = cp.utils.GroupL1(1., blocks)
+#         counter = 0
+#         for g in pen.groups:
+#             for j in g:
+#                 counter += 1
+#         assert counter == blocks.size
+#         assert pen.groups
+#         for g in pen.groups:
+#             assert np.unique(blocks[g]).size == 1
+
+
 def test_tv1_prox():
     """
     Use the properties of strongly convex functions to test the implementation
diff --git a/tests/test_randomized.py b/tests/test_randomized.py
index 34d720e2..9c64684e 100644
--- a/tests/test_randomized.py
+++ b/tests/test_randomized.py
@@ -195,8 +195,8 @@ def test_vrtos_ogl():
     groups_2 = [np.arange(5, 10)]
     f = copt.loss.LogLoss(A, b, alpha)
     for beta in np.logspace(-3, 3, 3):
-        p_1 = copt.penalty.GroupL1(beta, groups_1)
-        p_2 = copt.penalty.GroupL1(beta, groups_2)
+        p_1 = copt.penalty.OGroupL1(beta, groups_1)
+        p_2 = copt.penalty.OGroupL1(beta, groups_2)
         L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
 
         opt_vrtos = cp.minimize_vrtos(

From be2c4e921555e606df7ff1e9ae9ee0cf447d469d Mon Sep 17 00:00:00 2001
From: FRIGO Matteo <frigo.mat@live.com>
Date: Tue, 6 Apr 2021 10:23:09 +0200
Subject: [PATCH 4/4] remove overlapping group lasso penalty

The current implementation of the overlapping group lass is incorrect.
This commit removes it from the codebase.
The corresponding tests are also removed, with the exception of a test
for the VRTOS solver involving overlapping group lasso penalties, which
is now commented.

Minor PEP8 issues are solved.
---
 copt/penalty.py          | 86 ----------------------------------------
 tests/test_penalties.py  | 43 --------------------
 tests/test_randomized.py | 66 +++++++++++++++---------------
 3 files changed, 33 insertions(+), 162 deletions(-)

diff --git a/copt/penalty.py b/copt/penalty.py
index edc36f19..34cd84c4 100644
--- a/copt/penalty.py
+++ b/copt/penalty.py
@@ -170,92 +170,6 @@ def _prox_gl(x, i, indices, indptr, d, step_size):
         return _prox_gl, B
 
 
-class OGroupL1:
-    """
-    Overlapping Group Lasso penalty
-    Args:
-        alpha: float
-            Constant multiplying this loss
-        blocks: list of lists
-    """
-
-    def __init__(self, alpha, groups):
-        self.alpha = alpha
-        # groups need to be increasing
-        for i, g in enumerate(groups):
-            if not np.all(np.diff(g) == 1):
-                raise ValueError("Groups must be contiguous")
-            if i > 0 and groups[i - 1][-1] >= g[0]:
-                raise ValueError("Groups must be increasing")
-        self.groups = groups
-
-    def __call__(self, x):
-        return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups])
-
-    def prox(self, x, step_size):
-        out = x.copy()
-        for g in self.groups:
-            norm = np.linalg.norm(x[g])
-            if norm > self.alpha * step_size:
-                out[g] -= step_size * self.alpha * out[g] / norm
-            else:
-                out[g] = 0
-        return out
-
-    def prox_factory(self, n_features):
-        B_data = np.zeros(n_features)
-        B_indices = np.arange(n_features, dtype=np.int32)
-        B_indptr = np.zeros(n_features + 1, dtype=np.int32)
-
-        feature_pointer = 0
-        block_pointer = 0
-        for g in self.groups:
-            while feature_pointer < g[0]:
-                # non-penalized feature
-                B_data[feature_pointer] = -1.0
-                B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
-                feature_pointer += 1
-                block_pointer += 1
-            B_indptr[block_pointer + 1] = B_indptr[block_pointer]
-            for _ in g:
-                B_data[feature_pointer] = 1.0
-                B_indptr[block_pointer + 1] += 1
-                feature_pointer += 1
-            block_pointer += 1
-        for _ in range(feature_pointer, n_features):
-            B_data[feature_pointer] = -1.0
-            B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
-            feature_pointer += 1
-            block_pointer += 1
-
-        B_indptr = B_indptr[: block_pointer + 1]
-        B = sparse.csr_matrix((B_data, B_indices, B_indptr))
-        alpha = self.alpha
-
-        @njit
-        def _prox_gl(x, i, indices, indptr, d, step_size):
-            for b in range(indptr[i], indptr[i + 1]):
-                h = indices[b]
-                if B_data[B_indices[B_indptr[h]]] <= 0:
-                    continue
-                ss = step_size * d[h]
-                norm = 0.0
-                for j in range(B_indptr[h], B_indptr[h + 1]):
-                    j_idx = B_indices[j]
-                    norm += x[j_idx] ** 2
-                norm = np.sqrt(norm)
-                if norm > alpha * ss:
-                    for j in range(B_indptr[h], B_indptr[h + 1]):
-                        j_idx = B_indices[j]
-                        x[j_idx] *= 1 - alpha * ss / norm
-                else:
-                    for j in range(B_indptr[h], B_indptr[h + 1]):
-                        j_idx = B_indices[j]
-                        x[j_idx] = 0.0
-
-        return _prox_gl, B
-
-
 class FusedLasso:
     """
     Fused Lasso penalty
diff --git a/tests/test_penalties.py b/tests/test_penalties.py
index ff270c61..214f82a3 100644
--- a/tests/test_penalties.py
+++ b/tests/test_penalties.py
@@ -9,7 +9,6 @@
 proximal_penalties = [
     copt.penalty.L1Norm(1.0),
     copt.penalty.GroupL1(1.0, np.array_split(np.arange(16), 5)),
-    copt.penalty.OGroupL1(1.0, np.array_split(np.arange(16), 5)),
     copt.penalty.TraceNorm(1.0, (4, 4)),
     copt.constraint.TraceBall(1.0, (4, 4)),
     copt.penalty.TotalVariation2D(1.0, (4, 4)),
@@ -101,48 +100,6 @@ def test_GroupL1():
     np.testing.assert_almost_equal(B.toarray(), gt)
 
 
-def test_OverlappingGroupL1():
-    groups = [(0, 1), (2, 3)]
-    g1 = copt.penalty.OGroupL1(1.0, groups)
-    _, B = g1.prox_factory(5)
-    assert np.all(
-        B.toarray()
-        == np.array(
-            [
-                [1.0, 1.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 1.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0, -1.0],
-            ]
-        )
-    )
-
-    groups = [(0, 1), (3, 4)]
-    g2 = copt.penalty.OGroupL1(1.0, groups)
-    _, B = g2.prox_factory(5)
-    assert np.all(
-        B.toarray()
-        == np.array(
-            [
-                [1.0, 1.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, -1.0, 0.0, 0.0],
-                [0.0, 0.0, 0.0, 1.0, 1.0],
-            ]
-        )
-    )
-
-
-#     for blocks in [[(0, 1), (2, 3)], ]:
-#         pen = cp.utils.GroupL1(1., blocks)
-#         counter = 0
-#         for g in pen.groups:
-#             for j in g:
-#                 counter += 1
-#         assert counter == blocks.size
-#         assert pen.groups
-#         for g in pen.groups:
-#             assert np.unique(blocks[g]).size == 1
-
-
 def test_tv1_prox():
     """
     Use the properties of strongly convex functions to test the implementation
diff --git a/tests/test_randomized.py b/tests/test_randomized.py
index 9c64684e..0e7828c5 100644
--- a/tests/test_randomized.py
+++ b/tests/test_randomized.py
@@ -1,10 +1,10 @@
 import numpy as np
+import pytest
 from scipy import sparse
+
 import copt as cp
 import copt.loss
 import copt.penalty
-from copt import randomized
-import pytest
 
 np.random.seed(0)
 n_samples, n_features = 20, 10
@@ -188,37 +188,37 @@ def test_gl(groups):
             assert np.linalg.norm(grad_map) < 1e-6
 
 
-def test_vrtos_ogl():
-    """Test on overlapping group lasso"""
-    alpha = 1.0 / n_samples
-    groups_1 = [np.arange(8)]
-    groups_2 = [np.arange(5, 10)]
-    f = copt.loss.LogLoss(A, b, alpha)
-    for beta in np.logspace(-3, 3, 3):
-        p_1 = copt.penalty.OGroupL1(beta, groups_1)
-        p_2 = copt.penalty.OGroupL1(beta, groups_2)
-        L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
-
-        opt_vrtos = cp.minimize_vrtos(
-            f.partial_deriv,
-            A,
-            b,
-            np.zeros(n_features),
-            1 / (3 * L),
-            alpha=alpha,
-            max_iter=200,
-            prox_1=p_1.prox_factory(n_features),
-            prox_2=p_2.prox_factory(n_features),
-        )
-
-        opt_tos = cp.minimize_three_split(
-            f.f_grad, np.zeros(n_features), prox_1=p_1.prox, prox_2=p_2.prox
-        )
-
-        norm = np.linalg.norm(opt_tos.x)
-        if norm < 1e-10:
-            norm = 1
-        assert np.linalg.norm(opt_vrtos.x - opt_tos.x) / norm < 1e-4
+# def test_vrtos_ogl():
+#     """Test on overlapping group lasso"""
+#     alpha = 1.0 / n_samples
+#     groups_1 = [np.arange(8)]
+#     groups_2 = [np.arange(5, 10)]
+#     f = copt.loss.LogLoss(A, b, alpha)
+#     for beta in np.logspace(-3, 3, 3):
+#         p_1 = copt.penalty.OGroupL1(beta, groups_1)
+#         p_2 = copt.penalty.OGroupL1(beta, groups_2)
+#         L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
+#
+#         opt_vrtos = cp.minimize_vrtos(
+#             f.partial_deriv,
+#             A,
+#             b,
+#             np.zeros(n_features),
+#             1 / (3 * L),
+#             alpha=alpha,
+#             max_iter=200,
+#             prox_1=p_1.prox_factory(n_features),
+#             prox_2=p_2.prox_factory(n_features),
+#         )
+#
+#         opt_tos = cp.minimize_three_split(
+#             f.f_grad, np.zeros(n_features), prox_1=p_1.prox, prox_2=p_2.prox
+#         )
+#
+#         norm = np.linalg.norm(opt_tos.x)
+#         if norm < 1e-10:
+#             norm = 1
+#         assert np.linalg.norm(opt_vrtos.x - opt_tos.x) / norm < 1e-4
 
 
 @pytest.mark.parametrize("A_data", [A, A2])