khaotik · khaotik · Aug 21, 2017 · Aug 21, 2017 · Aug 27, 2017
diff --git a/README.md b/README.md
@@ -3,6 +3,10 @@ Tensorflow implementation of "Speaker-Independent Speech Separation with Deep At
 
 [Link](https://arxiv.org/abs/1707.03634) to original paper
 
+### Deep clustering
+This codebase also contains an implementation for Deep Clustering model.
+Details are inside `README_DPCL.md`.
+
 **STILL WORK IN PROGRESS, EXPECT BUGS**
 
 ## Requirements

diff --git a/README_DPCL.md b/README_DPCL.md
@@ -0,0 +1,36 @@
+# DPCL-Tensorflow
+Tensorflow implementation of “Deep clustering: Discriminative embeddings for segmentation and separation”
+
+[Link](https://arxiv.org/abs/1508.04306) to original paper
+
+## Requirements
+
+Same as DaNet model, see main `README.md` for details.
+
+## Usage
+
+### Setup dataset
+
+Same as DaNet model, see main `README.md` for details.
+
+### Setup hyperparameter
+
+There is a `[--DPCL--]` section in `app/hparams.py`. It contains hyperparameters
+specific to Deep Clustering model.
+
+Otherwise, basic hyperparameters such as `BATCH_SIZE`, `LR` are shared between models.
+See main `README.md` for details.
+
+### Perform experiments
+
+Run `dpcl.py` for Deep Clustering related experiments.
+Arguments are identical to `main.py`, see main `README.md` for details.
+
+### Use custom dataset
+
+Same as DaNet model, see main `README.md` for details.
+
+### Customize model
+
+Deep Clustering shares “encoder” module with DaNet. It doesn’t use other modules.
+See main `README.md` for more details.
diff --git a/app/datasets/wsj0.py b/app/datasets/wsj0.py
@@ -41,13 +41,15 @@ def epoch(self, subset, batch_size, shuffle=False):
             dict(train=0, valid=1, test=2)[subset]][3]
         indices = np.arange(
             ((dset_size + batch_size - 1) // batch_size)*batch_size)
+        indices[-batch_size:] = np.sort(indices[-batch_size:])
         indices %= dset_size
-        if shuffle:
-            np.random.shuffle(indices)
         req_itor = SequentialScheme(
             examples=indices, batch_size=batch_size).get_request_iterator()
         for req in req_itor:
             data_pt = dataset.get_data(handle, req)
+            if shuffle:
+                perm = np.random.permutation(batch_size)
+                data_pt = tuple(d[perm] for d in data_pt)
             max_len = max(map(len, data_pt[0]))
             spectra_li = [utils.random_zeropad(
                 x, max_len - len(x), axis=-2)

diff --git a/app/hparams.py b/app/hparams.py
@@ -82,6 +82,10 @@
 
 SUMMARY_DIR = './logs'
 
+# [--DPCL SPECIFIC--]
+MAX_KMEANS_ITERS = 3
+TRAIN_ASSIGN_THRES = 4.  # log magnitude difference threshold for assignment
+
 # ==========================================================================
 # normally you don't need touch anything below if you just want to tweak
 # some hyperparameters

diff --git a/app/modules.py b/app/modules.py
@@ -102,6 +102,7 @@ class ToyEncoder(Encoder):
     '''
     def __init__(self, model, name):
         self.name = name
+        self.debug_fetches = {}
 
     def __call__(self, s_signals, s_dropout_keep=1.):
         with tf.variable_scope(self.name):

diff --git a/app/ops.py b/app/ops.py
@@ -327,3 +327,78 @@ def pit_mse_loss(s_x, s_y, pit_axis=1, perm_size=None, name='pit_loss'):
     return s_loss, v_perms, s_loss_sets_idx
 
 
+def spherical_kmeans_step(s_points, s_centers):
+    '''
+    Assumes points and centers are unit vectors in embedding space.
+    The similarity measure is cosine angle.
+
+    Args:
+        s_points: tensor of shape [num_points, embedding_size]
+            it must be unit vectors on embedding space
+        s_centers: tensor of shape [num_centers, embedding_size]
+            centers prior to EM iteration
+
+    Returns:
+        s_new_centers: same shape as s_centers
+            centers after one EM step
+
+    '''
+    n_cluster = s_centers.get_shape().as_list()[0]
+    assert isinstance(n_cluster, int)
+    # [N, E], [C, E] -> [N, C]
+    s_cosines = tf.matmul(
+        s_points, tf.transpose(s_centers))
+    s_assigns = tf.argmax(s_cosines, axis=1)
+    s_new_centers = tf.unsorted_segment_sum(
+        s_points, s_assigns, hparams.MAX_N_SIGNAL)
+    # normalize centers
+    s_new_centers = s_new_centers * tf.rsqrt(
+        tf.reduce_sum(
+            tf.square(s_new_centers),
+            axis=-1, keep_dims=True) + hparams.EPS)
+    return s_new_centers
+
+
+def kmeans(s_points, s_centers, fn_step, max_step=100, stop_threshold=1e-4):
+    '''
+    Perform k-means clustering
+    Args:
+        s_points: tensor of shape [batch_size, num_points, embedding_size]
+        s_centers: tensor of shape [batch_size, num_centers, embedding_size]
+        fn_step: function
+            takes (s_points, s_centers) as input,
+            outputs updated s_centers
+        max_step: int, max step for k-means
+        stop_threshold:
+            stops when all value of center does not change more than this for an step
+
+    Returns:
+        s_final_centers: same shape as s_centers
+    '''
+    batch_size = s_points.get_shape().as_list()[0]
+    assert isinstance(batch_size, int)
+
+    def fn_cond(s_steps_, s_points_, s_centers_, s_max_diff_):
+        return (s_steps_ < max_step) & (s_max_diff_ > stop_threshold)
+
+    def fn_body(s_step_, s_points_, s_centers_, s_max_diff_):
+        s_centers_tp1 = fn_step(s_points_, s_centers_)
+        s_max_diff_tp1 = tf.reduce_max(
+            tf.abs(s_centers_tp1 - s_centers_),
+            axis=None)
+        return (s_step_+1, s_points_, s_centers_, s_max_diff_tp1)
+
+    def fn_kmeans(s_input_li_):
+        _1, _2, s_centers_ts, _3 = tf.while_loop(
+            fn_cond, fn_body,
+            s_input_li_, back_prop=False)
+        return s_centers_ts
+
+    with tf.device('/cpu:0'):
+        s_step = tf.zeros([hparams.BATCH_SIZE], dtype=hparams.INTX)
+
+    s_max_diff = tf.constant(
+        stop_threshold+1., dtype=hparams.FLOATX, shape=[hparams.BATCH_SIZE])
+    return tf.map_fn(
+        fn_kmeans, [s_step, s_points, s_centers, s_max_diff],
+        dtype=hparams.FLOATX, back_prop=False)