Anatole-DC · Kelly-an · Jun 5, 2024 · Jun 5, 2024 · Jun 5, 2024 · Jun 5, 2024
diff --git a/koregraph/api/machine_learning/load_dataset.py b/koregraph/api/machine_learning/load_dataset.py
@@ -2,12 +2,28 @@
     All utilities functions to load the train dataset.
 """
 
+from typing import Any, Tuple
+from os import listdir
 from random import sample
 
-from numpy import ndarray, append
-
-from koregraph.utils.controllers.pickles import load_pickle_object
-from koregraph.config.params import GENERATED_PICKLE_DIRECTORY
+from numpy import ndarray, append, isnan, any, nan_to_num, concatenate, split, delete
+
+from koregraph.utils.controllers.pickles import load_pickle_object, save_object_pickle
+from koregraph.api.preprocessing.audio_preprocessing import music_to_numpy
+from koregraph.api.preprocessing.posture_preprocessing import (
+    fill_forward,
+    cut_percentage,
+)
+from koregraph.config.params import (
+    GENERATED_KEYPOINTS_DIRECTORY,
+    GENERATED_AUDIO_DIRECTORY,
+    GENERATED_PICKLE_DIRECTORY,
+    ALL_ADVANCED_MOVE_NAMES,
+    CHUNK_SIZE,
+    FRAME_FORMAT,
+    GENERATED_FEATURES_DIRECTORY,
+    PERCENTAGE_CUT,
+)
 
 
 def load_preprocess_dataset(
@@ -47,6 +63,105 @@ def load_preprocess_dataset(
     return X, y
 
 
+def load_chunk_preprocess_dataset() -> Tuple[ndarray, ndarray]:
+    chore_names = ALL_ADVANCED_MOVE_NAMES[:100]
+    X = None
+    y = None
+    for chore_name in chore_names:
+        chore_name = chore_name.replace(".pkl", "")
+        _, _, _, _, music_name, _ = chore_name.split("_")
+
+        chore_path = GENERATED_KEYPOINTS_DIRECTORY / chore_name / str(CHUNK_SIZE)
+        music_path = GENERATED_AUDIO_DIRECTORY / music_name / str(CHUNK_SIZE)
+        print(f"Parsing {chore_name} chunks")
+
+        for file in listdir(chore_path):
+            chunk_id = file.replace(".pkl", "").split("_")[-1]
+            chore_filepath = chore_path / file
+            music_filepath = music_path / f"{music_name}_{chunk_id}.mp3"
+
+            X_tmp = music_to_numpy(music_filepath)
+            y_tmp = load_pickle_object(chore_filepath)["keypoints2d"]
+
+            y_tmp = fill_forward(y_tmp)
+            if isnan(y_tmp).any():
+                print(f"Fill forward failed for chunk {chunk_id}. Filling with 0")
+                y_tmp = nan_to_num(y_tmp, 0)
+
+            if y is None:
+                y = y_tmp
+            else:
+                y = append(y, y_tmp, axis=0)
+            if X is None:
+                X = X_tmp
+            else:
+                X = append(X, X_tmp, axis=0)
+
+    return X.reshape(-1, CHUNK_SIZE * 60, 128), y.reshape(-1, CHUNK_SIZE * 60 * 34)
+
+
+def load_next_chunks_preprocess_dataset(
+    dataset_size: float = 1.0, perc_cut: float = PERCENTAGE_CUT
+):
+    chore_names = ALL_ADVANCED_MOVE_NAMES[
+        : int(len(ALL_ADVANCED_MOVE_NAMES) * dataset_size)
+    ]
+    X = None
+    y = None
+    for chore in chore_names:
+        chore_name = chore.name
+        # music_name = chore.music
+
+        chore_path = GENERATED_KEYPOINTS_DIRECTORY / chore_name / str(CHUNK_SIZE)
+        # music_path = GENERATED_AUDIO_DIRECTORY / music_name / str(CHUNK_SIZE)
+        print(f"Parsing {chore_name} chunks")
+
+        for file in listdir(chore_path):
+            chunk_id = file.replace(".pkl", "").split("_")[-1]
+            chore_filepath = chore_path / file
+            # music_filepath = music_path / f"{music_name}_{chunk_id}.mp3"
+
+            # audio_tmp = music_to_numpy(music_filepath)
+            chore_tmp = load_pickle_object(chore_filepath)["keypoints2d"]
+
+            chore_tmp = fill_forward(chore_tmp)
+            if isnan(chore_tmp).any():
+                print(f"Fill forward failed for chunk {chunk_id}. Filling with 0")
+                chore_tmp = nan_to_num(chore_tmp, 0)
+            chore_tmp[:, :, 0] = chore_tmp[:, :, 0] / FRAME_FORMAT[0]
+            chore_tmp[:, :, 1] = chore_tmp[:, :, 1] / FRAME_FORMAT[1]
+
+            chore_X, y_tmp = cut_percentage(chore_tmp.reshape(-1, 34), perc_cut)
+            # audio_X, _ = cut_percentage(audio_tmp, perc_cut)
+            # X_tmp = concatenate((chore_X, audio_X), axis=1)
+            X_tmp = chore_X
+
+            # print('X_tmp shape', X_tmp.shape)
+            # print('y_tmp shape', y_tmp.shape)
+
+            if y is None:
+                y = y_tmp
+            else:
+                y = append(y, y_tmp, axis=0)
+
+            if X is None:
+                X = X_tmp
+            else:
+                X = append(X, X_tmp, axis=0)
+
+    X = X.reshape(-1, int((CHUNK_SIZE * (1 - perc_cut)) * 60), 17, 2)
+    y = y.reshape(-1, int(CHUNK_SIZE * perc_cut * 60 * 34))
+
+    print("X final shape", X.shape)
+    print("y final shape", y.shape)
+    save_object_pickle(X, obj_path=GENERATED_FEATURES_DIRECTORY / "x")
+    save_object_pickle(y, obj_path=GENERATED_FEATURES_DIRECTORY / "y")
+
+    y = delete(y, [60, 63], axis=0)
+    X = delete(X, [60, 63], axis=0)
+    return X, y
+
+
 def check_dataset_format(): ...
 
 

diff --git a/koregraph/api/machine_learning/loss.py b/koregraph/api/machine_learning/loss.py
@@ -0,0 +1,30 @@
+from tensorflow import reduce_mean, square, expand_dims
+from keras.saving import register_keras_serializable
+
+
+def distance_frame_to_frame(frame1, frame2):
+    distance = 0
+    for i in range(0, 34, 2):
+        distance += (frame1[:, i] - frame2[:, i]) ** 2 + (
+            frame1[:, (i + 1)] - frame2[:, (i + 1)]
+        ) ** 2
+
+    return distance
+
+
+@register_keras_serializable()
+def my_mse(y_true, y_pred):
+    distances = distance_frame_to_frame(y_true[::, :], y_pred[::, :])
+
+    return reduce_mean(square(distances))
+
+
+@register_keras_serializable()
+def my_mse_maximise_movement(y_true, y_pred):
+    first_frame = expand_dims(y_pred[0, :], 0)
+    last_frame = expand_dims(y_pred[-1, :], 0)
+    distances = distance_frame_to_frame(
+        y_true[::, :], y_pred[::, :]
+    ) - distance_frame_to_frame(first_frame, last_frame)
+
+    return reduce_mean(square(distances))
diff --git a/koregraph/api/machine_learning/neural_network.py b/koregraph/api/machine_learning/neural_network.py
@@ -11,10 +11,19 @@
     Dropout,
     Bidirectional,
     Conv1D,
+    Conv2D,
+    Conv2DTranspose,
     MaxPooling1D,
+    MaxPooling2D,
+    Flatten,
+    TimeDistributed,
+    Input,
 )
+from keras.initializers import glorot_uniform
 from keras.models import Sequential, Model
-from keras.optimizers import RMSprop
+from keras.optimizers import Adam, RMSprop
+
+from koregraph.api.machine_learning.loss import my_mse
 
 
 def prepare_model(X, y) -> Model:
@@ -44,7 +53,7 @@ def prepare_model(X, y) -> Model:
             Dropout(rate=0.2),
             Dense(64, activation="relu"),
             Dropout(rate=0.2),
-            Dense(y.shape[1], activation="sigmoid"),
+            Dense(y.shape[1], activation="relu"),
         ]
     )
 
@@ -80,5 +89,127 @@ def initialize_model(X, y) -> Model:
     return compiled_model
 
 
+def initialize_model_chunks(X, y) -> Model:
+    """Initialize a compiled model.
+
+    Returns:
+        Model: The compiled model.
+    """
+
+    # normalization_layer = Normalization()
+    # normalization_layer.adapt(X)
+    print("x 0 shape", X[0].shape)
+    new_model = Sequential(
+        [
+            # normalization_layer,
+            Bidirectional(
+                LSTM(
+                    256,
+                    activation="tanh",
+                    kernel_initializer=glorot_uniform(),
+                    return_sequences=True,
+                ),
+            ),
+            # # BatchNormalization(),
+            LSTM(128, activation="tanh", recurrent_dropout=0.2),
+            Dense(256, activation="relu"),
+            Dense(256, activation="relu", activity_regularizer="l2"),
+            Dense(128, activation="relu", activity_regularizer="l2"),
+            Dropout(rate=0.2),
+            Dense(64, activation="relu", activity_regularizer="l2"),
+            Dropout(rate=0.2),
+            Dense(y.shape[1], activation="linear"),
+        ]
+    )
+
+    adam = Adam(learning_rate=0.00001)  # , clipvalue=0.01)
+    new_model.compile(loss="mse", optimizer=adam, metrics=["mae"])
+    return new_model
+
+
+def initialize_model_next_chunks(X, y) -> Model:
+    """Initialize a compiled model.
+
+    Returns:
+        Model: The compiled model.
+    """
+
+    new_model = Sequential(
+        [
+            Input(X[0].shape),
+            Conv2D(
+                512,
+                kernel_size=(3, 3),
+                activation="relu",
+                padding="same",
+            ),
+            MaxPooling2D((2, 2), padding="same"),
+            Conv2D(
+                256,
+                kernel_size=(3, 3),
+                activation="relu",
+                padding="same",
+            ),
+            MaxPooling2D((2, 2), padding="same"),
+            Conv2D(
+                128,
+                kernel_size=(3, 3),
+                activation="relu",
+                padding="same",
+            ),
+            MaxPooling2D((2, 2), padding="same"),
+            Conv2D(
+                64,
+                kernel_size=(3, 3),
+                activation="relu",
+                padding="same",
+            ),
+            MaxPooling2D((2, 2), padding="same"),
+            Conv2D(
+                128,
+                kernel_size=(3, 3),
+                activation="relu",
+                padding="same",
+            ),
+            MaxPooling2D((2, 2), padding="same"),
+            Conv2DTranspose(
+                32, (3, 3), strides=(2, 2), padding="same", activation="relu"
+            ),
+            Conv2DTranspose(
+                16, (3, 3), strides=(2, 2), padding="same", activation="relu"
+            ),
+            Conv2DTranspose(
+                1, (3, 3), strides=(2, 2), padding="same", activation="relu"
+            ),
+            TimeDistributed(Flatten()),
+            Bidirectional(
+                LSTM(
+                    512,
+                    activation="tanh",
+                    kernel_initializer=glorot_uniform(),
+                    return_sequences=True,
+                ),
+            ),
+            Bidirectional(
+                LSTM(
+                    256,
+                    activation="tanh",
+                    kernel_initializer=glorot_uniform(),
+                    return_sequences=False,
+                ),
+            ),
+            Dense(256, activation="relu"),
+            Dense(128, activation="relu"),
+            Dropout(rate=0.2),
+            Dense(64, activation="relu"),
+            Dropout(rate=0.2),
+            Dense(y.shape[1], activation="relu"),
+        ]
+    )
+
+    new_model.compile(loss="mse", optimizer="adam", metrics=["mae"])
+    return new_model
+
+
 if __name__ == "__main__":
     initialize_model()