Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ Tensorflow implementation of "Speaker-Independent Speech Separation with Deep At

[Link](https://arxiv.org/abs/1707.03634) to original paper

### Deep clustering
This codebase also contains an implementation for Deep Clustering model.
Details are inside `README_DPCL.md`.

**STILL WORK IN PROGRESS, EXPECT BUGS**

## Requirements
Expand Down
36 changes: 36 additions & 0 deletions README_DPCL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# DPCL-Tensorflow
Tensorflow implementation of “Deep clustering: Discriminative embeddings for segmentation and separation”

[Link](https://arxiv.org/abs/1508.04306) to original paper

## Requirements

Same as DaNet model, see main `README.md` for details.

## Usage

### Setup dataset

Same as DaNet model, see main `README.md` for details.

### Setup hyperparameter

There is a `[--DPCL--]` section in `app/hparams.py`. It contains hyperparameters
specific to Deep Clustering model.

Otherwise, basic hyperparameters such as `BATCH_SIZE`, `LR` are shared between models.
See main `README.md` for details.

### Perform experiments

Run `dpcl.py` for Deep Clustering related experiments.
Arguments are identical to `main.py`, see main `README.md` for details.

### Use custom dataset

Same as DaNet model, see main `README.md` for details.

### Customize model

Deep Clustering shares “encoder” module with DaNet. It doesn’t use other modules.
See main `README.md` for more details.
6 changes: 4 additions & 2 deletions app/datasets/wsj0.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@ def epoch(self, subset, batch_size, shuffle=False):
dict(train=0, valid=1, test=2)[subset]][3]
indices = np.arange(
((dset_size + batch_size - 1) // batch_size)*batch_size)
indices[-batch_size:] = np.sort(indices[-batch_size:])
indices %= dset_size
if shuffle:
np.random.shuffle(indices)
req_itor = SequentialScheme(
examples=indices, batch_size=batch_size).get_request_iterator()
for req in req_itor:
data_pt = dataset.get_data(handle, req)
if shuffle:
perm = np.random.permutation(batch_size)
data_pt = tuple(d[perm] for d in data_pt)
max_len = max(map(len, data_pt[0]))
spectra_li = [utils.random_zeropad(
x, max_len - len(x), axis=-2)
Expand Down
4 changes: 4 additions & 0 deletions app/hparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@

SUMMARY_DIR = './logs'

# [--DPCL SPECIFIC--]
MAX_KMEANS_ITERS = 3
TRAIN_ASSIGN_THRES = 4. # log magnitude difference threshold for assignment

# ==========================================================================
# normally you don't need touch anything below if you just want to tweak
# some hyperparameters
Expand Down
1 change: 1 addition & 0 deletions app/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class ToyEncoder(Encoder):
'''
def __init__(self, model, name):
self.name = name
self.debug_fetches = {}

def __call__(self, s_signals, s_dropout_keep=1.):
with tf.variable_scope(self.name):
Expand Down
75 changes: 75 additions & 0 deletions app/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,3 +327,78 @@ def pit_mse_loss(s_x, s_y, pit_axis=1, perm_size=None, name='pit_loss'):
return s_loss, v_perms, s_loss_sets_idx


def spherical_kmeans_step(s_points, s_centers):
'''
Assumes points and centers are unit vectors in embedding space.
The similarity measure is cosine angle.

Args:
s_points: tensor of shape [num_points, embedding_size]
it must be unit vectors on embedding space
s_centers: tensor of shape [num_centers, embedding_size]
centers prior to EM iteration

Returns:
s_new_centers: same shape as s_centers
centers after one EM step

'''
n_cluster = s_centers.get_shape().as_list()[0]
assert isinstance(n_cluster, int)
# [N, E], [C, E] -> [N, C]
s_cosines = tf.matmul(
s_points, tf.transpose(s_centers))
s_assigns = tf.argmax(s_cosines, axis=1)
s_new_centers = tf.unsorted_segment_sum(
s_points, s_assigns, hparams.MAX_N_SIGNAL)
# normalize centers
s_new_centers = s_new_centers * tf.rsqrt(
tf.reduce_sum(
tf.square(s_new_centers),
axis=-1, keep_dims=True) + hparams.EPS)
return s_new_centers


def kmeans(s_points, s_centers, fn_step, max_step=100, stop_threshold=1e-4):
'''
Perform k-means clustering
Args:
s_points: tensor of shape [batch_size, num_points, embedding_size]
s_centers: tensor of shape [batch_size, num_centers, embedding_size]
fn_step: function
takes (s_points, s_centers) as input,
outputs updated s_centers
max_step: int, max step for k-means
stop_threshold:
stops when all value of center does not change more than this for an step

Returns:
s_final_centers: same shape as s_centers
'''
batch_size = s_points.get_shape().as_list()[0]
assert isinstance(batch_size, int)

def fn_cond(s_steps_, s_points_, s_centers_, s_max_diff_):
return (s_steps_ < max_step) & (s_max_diff_ > stop_threshold)

def fn_body(s_step_, s_points_, s_centers_, s_max_diff_):
s_centers_tp1 = fn_step(s_points_, s_centers_)
s_max_diff_tp1 = tf.reduce_max(
tf.abs(s_centers_tp1 - s_centers_),
axis=None)
return (s_step_+1, s_points_, s_centers_, s_max_diff_tp1)

def fn_kmeans(s_input_li_):
_1, _2, s_centers_ts, _3 = tf.while_loop(
fn_cond, fn_body,
s_input_li_, back_prop=False)
return s_centers_ts

with tf.device('/cpu:0'):
s_step = tf.zeros([hparams.BATCH_SIZE], dtype=hparams.INTX)

s_max_diff = tf.constant(
stop_threshold+1., dtype=hparams.FLOATX, shape=[hparams.BATCH_SIZE])
return tf.map_fn(
fn_kmeans, [s_step, s_points, s_centers, s_max_diff],
dtype=hparams.FLOATX, back_prop=False)
Loading