From ae437a9c15d640f13347efe8508e8769e055bbf8 Mon Sep 17 00:00:00 2001 From: Frederik Rautenberg Date: Thu, 20 Jul 2023 15:02:45 +0200 Subject: [PATCH 1/2] Fix function cos_similarity. If dimension of input is (1,N) or (N,1), cos_similarity was not calculated. --- paderbox/math/vector.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/paderbox/math/vector.py b/paderbox/math/vector.py index a84cc055..bdc7578a 100644 --- a/paderbox/math/vector.py +++ b/paderbox/math/vector.py @@ -25,7 +25,10 @@ def cos_distance(a, b): :param b: vector b (1xN or Nx1 numpy array) :return: distance (scalar) """ - return 0.5 * (1 - sum(a * b) / np.sqrt(sum(a ** 2) * sum(b ** 2))) + assert a.shape == b.shape, 'Both vectors must have the same dimension' + assert a.squeeze().ndim == 1 and b.squeeze().ndim == 1, \ + f"Input must be vectors: {a.shape} {b.shape}" + return 0.5 * (1 - np.sum(a * b) / np.sqrt(np.sum(a ** 2) * np.sum(b ** 2))) def normalize_vector_to_unit_length(data): From a2ef5af4b09cfc4c7523bb489cd104bf4ec5f650 Mon Sep 17 00:00:00 2001 From: RautenbergFrederik <129868951+RautenbergFrederik@users.noreply.github.com> Date: Thu, 20 Jul 2023 16:08:43 +0200 Subject: [PATCH 2/2] Update vector.py Use cos similarity to calculate the cos distance --- paderbox/math/vector.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/paderbox/math/vector.py b/paderbox/math/vector.py index bdc7578a..73f1cebd 100644 --- a/paderbox/math/vector.py +++ b/paderbox/math/vector.py @@ -19,16 +19,15 @@ def cos_similarity(A, B): def cos_distance(a, b): """ - cosine distance between vector a and b: 1/2*(1-a*b/|a|*|b|) + cosine distance between array A and B + Args: + A: array with shape (...,d) + B: array with shape (...,d) - :param a: vector a (1xN or Nx1 numpy array) - :param b: vector b (1xN or Nx1 numpy array) - :return: distance (scalar) + Returns: + cosine distance with shape (...) """ - assert a.shape == b.shape, 'Both vectors must have the same dimension' - assert a.squeeze().ndim == 1 and b.squeeze().ndim == 1, \ - f"Input must be vectors: {a.shape} {b.shape}" - return 0.5 * (1 - np.sum(a * b) / np.sqrt(np.sum(a ** 2) * np.sum(b ** 2))) + return 0.5 * (1 - cos_similarity(A, B)) def normalize_vector_to_unit_length(data):