diff --git a/koregraph/api/machine_learning/load_dataset.py b/koregraph/api/machine_learning/load_dataset.py index e234a27..bf5eb7e 100644 --- a/koregraph/api/machine_learning/load_dataset.py +++ b/koregraph/api/machine_learning/load_dataset.py @@ -2,12 +2,28 @@ All utilities functions to load the train dataset. """ +from typing import Any, Tuple +from os import listdir from random import sample -from numpy import ndarray, append - -from koregraph.utils.controllers.pickles import load_pickle_object -from koregraph.config.params import GENERATED_PICKLE_DIRECTORY +from numpy import ndarray, append, isnan, any, nan_to_num, concatenate, split, delete + +from koregraph.utils.controllers.pickles import load_pickle_object, save_object_pickle +from koregraph.api.preprocessing.audio_preprocessing import music_to_numpy +from koregraph.api.preprocessing.posture_preprocessing import ( + fill_forward, + cut_percentage, +) +from koregraph.config.params import ( + GENERATED_KEYPOINTS_DIRECTORY, + GENERATED_AUDIO_DIRECTORY, + GENERATED_PICKLE_DIRECTORY, + ALL_ADVANCED_MOVE_NAMES, + CHUNK_SIZE, + FRAME_FORMAT, + GENERATED_FEATURES_DIRECTORY, + PERCENTAGE_CUT, +) def load_preprocess_dataset( @@ -47,6 +63,105 @@ def load_preprocess_dataset( return X, y +def load_chunk_preprocess_dataset() -> Tuple[ndarray, ndarray]: + chore_names = ALL_ADVANCED_MOVE_NAMES[:100] + X = None + y = None + for chore_name in chore_names: + chore_name = chore_name.replace(".pkl", "") + _, _, _, _, music_name, _ = chore_name.split("_") + + chore_path = GENERATED_KEYPOINTS_DIRECTORY / chore_name / str(CHUNK_SIZE) + music_path = GENERATED_AUDIO_DIRECTORY / music_name / str(CHUNK_SIZE) + print(f"Parsing {chore_name} chunks") + + for file in listdir(chore_path): + chunk_id = file.replace(".pkl", "").split("_")[-1] + chore_filepath = chore_path / file + music_filepath = music_path / f"{music_name}_{chunk_id}.mp3" + + X_tmp = music_to_numpy(music_filepath) + y_tmp = load_pickle_object(chore_filepath)["keypoints2d"] + + y_tmp = fill_forward(y_tmp) + if isnan(y_tmp).any(): + print(f"Fill forward failed for chunk {chunk_id}. Filling with 0") + y_tmp = nan_to_num(y_tmp, 0) + + if y is None: + y = y_tmp + else: + y = append(y, y_tmp, axis=0) + if X is None: + X = X_tmp + else: + X = append(X, X_tmp, axis=0) + + return X.reshape(-1, CHUNK_SIZE * 60, 128), y.reshape(-1, CHUNK_SIZE * 60 * 34) + + +def load_next_chunks_preprocess_dataset( + dataset_size: float = 1.0, perc_cut: float = PERCENTAGE_CUT +): + chore_names = ALL_ADVANCED_MOVE_NAMES[ + : int(len(ALL_ADVANCED_MOVE_NAMES) * dataset_size) + ] + X = None + y = None + for chore in chore_names: + chore_name = chore.name + # music_name = chore.music + + chore_path = GENERATED_KEYPOINTS_DIRECTORY / chore_name / str(CHUNK_SIZE) + # music_path = GENERATED_AUDIO_DIRECTORY / music_name / str(CHUNK_SIZE) + print(f"Parsing {chore_name} chunks") + + for file in listdir(chore_path): + chunk_id = file.replace(".pkl", "").split("_")[-1] + chore_filepath = chore_path / file + # music_filepath = music_path / f"{music_name}_{chunk_id}.mp3" + + # audio_tmp = music_to_numpy(music_filepath) + chore_tmp = load_pickle_object(chore_filepath)["keypoints2d"] + + chore_tmp = fill_forward(chore_tmp) + if isnan(chore_tmp).any(): + print(f"Fill forward failed for chunk {chunk_id}. Filling with 0") + chore_tmp = nan_to_num(chore_tmp, 0) + chore_tmp[:, :, 0] = chore_tmp[:, :, 0] / FRAME_FORMAT[0] + chore_tmp[:, :, 1] = chore_tmp[:, :, 1] / FRAME_FORMAT[1] + + chore_X, y_tmp = cut_percentage(chore_tmp.reshape(-1, 34), perc_cut) + # audio_X, _ = cut_percentage(audio_tmp, perc_cut) + # X_tmp = concatenate((chore_X, audio_X), axis=1) + X_tmp = chore_X + + # print('X_tmp shape', X_tmp.shape) + # print('y_tmp shape', y_tmp.shape) + + if y is None: + y = y_tmp + else: + y = append(y, y_tmp, axis=0) + + if X is None: + X = X_tmp + else: + X = append(X, X_tmp, axis=0) + + X = X.reshape(-1, int((CHUNK_SIZE * (1 - perc_cut)) * 60), 17, 2) + y = y.reshape(-1, int(CHUNK_SIZE * perc_cut * 60 * 34)) + + print("X final shape", X.shape) + print("y final shape", y.shape) + save_object_pickle(X, obj_path=GENERATED_FEATURES_DIRECTORY / "x") + save_object_pickle(y, obj_path=GENERATED_FEATURES_DIRECTORY / "y") + + y = delete(y, [60, 63], axis=0) + X = delete(X, [60, 63], axis=0) + return X, y + + def check_dataset_format(): ... diff --git a/koregraph/api/machine_learning/loss.py b/koregraph/api/machine_learning/loss.py new file mode 100644 index 0000000..8633a0d --- /dev/null +++ b/koregraph/api/machine_learning/loss.py @@ -0,0 +1,30 @@ +from tensorflow import reduce_mean, square, expand_dims +from keras.saving import register_keras_serializable + + +def distance_frame_to_frame(frame1, frame2): + distance = 0 + for i in range(0, 34, 2): + distance += (frame1[:, i] - frame2[:, i]) ** 2 + ( + frame1[:, (i + 1)] - frame2[:, (i + 1)] + ) ** 2 + + return distance + + +@register_keras_serializable() +def my_mse(y_true, y_pred): + distances = distance_frame_to_frame(y_true[::, :], y_pred[::, :]) + + return reduce_mean(square(distances)) + + +@register_keras_serializable() +def my_mse_maximise_movement(y_true, y_pred): + first_frame = expand_dims(y_pred[0, :], 0) + last_frame = expand_dims(y_pred[-1, :], 0) + distances = distance_frame_to_frame( + y_true[::, :], y_pred[::, :] + ) - distance_frame_to_frame(first_frame, last_frame) + + return reduce_mean(square(distances)) diff --git a/koregraph/api/machine_learning/neural_network.py b/koregraph/api/machine_learning/neural_network.py index 81b1464..fd7dd86 100644 --- a/koregraph/api/machine_learning/neural_network.py +++ b/koregraph/api/machine_learning/neural_network.py @@ -11,10 +11,19 @@ Dropout, Bidirectional, Conv1D, + Conv2D, + Conv2DTranspose, MaxPooling1D, + MaxPooling2D, + Flatten, + TimeDistributed, + Input, ) +from keras.initializers import glorot_uniform from keras.models import Sequential, Model -from keras.optimizers import RMSprop +from keras.optimizers import Adam, RMSprop + +from koregraph.api.machine_learning.loss import my_mse def prepare_model(X, y) -> Model: @@ -44,7 +53,7 @@ def prepare_model(X, y) -> Model: Dropout(rate=0.2), Dense(64, activation="relu"), Dropout(rate=0.2), - Dense(y.shape[1], activation="sigmoid"), + Dense(y.shape[1], activation="relu"), ] ) @@ -80,5 +89,127 @@ def initialize_model(X, y) -> Model: return compiled_model +def initialize_model_chunks(X, y) -> Model: + """Initialize a compiled model. + + Returns: + Model: The compiled model. + """ + + # normalization_layer = Normalization() + # normalization_layer.adapt(X) + print("x 0 shape", X[0].shape) + new_model = Sequential( + [ + # normalization_layer, + Bidirectional( + LSTM( + 256, + activation="tanh", + kernel_initializer=glorot_uniform(), + return_sequences=True, + ), + ), + # # BatchNormalization(), + LSTM(128, activation="tanh", recurrent_dropout=0.2), + Dense(256, activation="relu"), + Dense(256, activation="relu", activity_regularizer="l2"), + Dense(128, activation="relu", activity_regularizer="l2"), + Dropout(rate=0.2), + Dense(64, activation="relu", activity_regularizer="l2"), + Dropout(rate=0.2), + Dense(y.shape[1], activation="linear"), + ] + ) + + adam = Adam(learning_rate=0.00001) # , clipvalue=0.01) + new_model.compile(loss="mse", optimizer=adam, metrics=["mae"]) + return new_model + + +def initialize_model_next_chunks(X, y) -> Model: + """Initialize a compiled model. + + Returns: + Model: The compiled model. + """ + + new_model = Sequential( + [ + Input(X[0].shape), + Conv2D( + 512, + kernel_size=(3, 3), + activation="relu", + padding="same", + ), + MaxPooling2D((2, 2), padding="same"), + Conv2D( + 256, + kernel_size=(3, 3), + activation="relu", + padding="same", + ), + MaxPooling2D((2, 2), padding="same"), + Conv2D( + 128, + kernel_size=(3, 3), + activation="relu", + padding="same", + ), + MaxPooling2D((2, 2), padding="same"), + Conv2D( + 64, + kernel_size=(3, 3), + activation="relu", + padding="same", + ), + MaxPooling2D((2, 2), padding="same"), + Conv2D( + 128, + kernel_size=(3, 3), + activation="relu", + padding="same", + ), + MaxPooling2D((2, 2), padding="same"), + Conv2DTranspose( + 32, (3, 3), strides=(2, 2), padding="same", activation="relu" + ), + Conv2DTranspose( + 16, (3, 3), strides=(2, 2), padding="same", activation="relu" + ), + Conv2DTranspose( + 1, (3, 3), strides=(2, 2), padding="same", activation="relu" + ), + TimeDistributed(Flatten()), + Bidirectional( + LSTM( + 512, + activation="tanh", + kernel_initializer=glorot_uniform(), + return_sequences=True, + ), + ), + Bidirectional( + LSTM( + 256, + activation="tanh", + kernel_initializer=glorot_uniform(), + return_sequences=False, + ), + ), + Dense(256, activation="relu"), + Dense(128, activation="relu"), + Dropout(rate=0.2), + Dense(64, activation="relu"), + Dropout(rate=0.2), + Dense(y.shape[1], activation="relu"), + ] + ) + + new_model.compile(loss="mse", optimizer="adam", metrics=["mae"]) + return new_model + + if __name__ == "__main__": initialize_model() diff --git a/koregraph/api/machine_learning/prediction_workflow.py b/koregraph/api/machine_learning/prediction_workflow.py index a6052a9..cfe2940 100644 --- a/koregraph/api/machine_learning/prediction_workflow.py +++ b/koregraph/api/machine_learning/prediction_workflow.py @@ -1,12 +1,14 @@ -from numpy import ones as np_ones +from numpy import ones as np_ones, concatenate, append from tensorflow.keras.models import load_model from koregraph.models.choregraphy import Choregraphy from koregraph.utils.controllers.choregraphies import save_choregaphy_chunk +from koregraph.utils.controllers.pickles import load_pickle_object from koregraph.api.preprocessing.audio_preprocessing import music_to_numpy from koregraph.api.preprocessing.posture_preprocessing import ( upscale_posture_pred, posture_array_to_keypoints, + downscale_posture, ) from koregraph.tools.video_builder import ( keypoints_video_audio_builder_from_choreography, @@ -15,11 +17,22 @@ AUDIO_DIRECTORY, MODEL_OUTPUT_DIRECTORY, PREDICTION_OUTPUT_DIRECTORY, + PERCENTAGE_CUT, + GENERATED_KEYPOINTS_DIRECTORY, + GENERATED_AUDIO_DIRECTORY, + CHUNK_SIZE, + FRAME_FORMAT, ) +from koregraph.api.preprocessing.posture_preprocessing import cut_percentage from koregraph.api.preprocessing.audio_preprocessing import scale_audio -def predict(audio_name: str = "mBR0", model_name: str = "model", backup: bool = False): +def predict( + audio_name: str = "mBR0", + model_name: str = "model", + backup: bool = False, + chunk: bool = False, +): # model_path = MODEL_OUTPUT_DIRECTORY / (model_name + ".pkl") # model = load_pickle_object(model_path) @@ -39,8 +52,14 @@ def predict(audio_name: str = "mBR0", model_name: str = "model", backup: bool = prediction = model.predict(input) prediction = upscale_posture_pred(posture_array_to_keypoints(prediction)) + print("Prediction shape:", prediction.shape) + print("predciton min", prediction.min()) + print("predciton max", prediction.max()) prediction_name = ( - (model_name + "_" + audio_name) if audio_name not in model_name else model_name + # model_name.replace("_", "-") + "_sBM_cAll_d00_" + audio_name + "_ch01" + (model_name + "_" + audio_name) + if audio_name not in model_name + else model_name ) chore = Choregraphy(prediction_name, prediction.reshape(-1, 17, 2)) @@ -53,5 +72,124 @@ def predict(audio_name: str = "mBR0", model_name: str = "model", backup: bool = print("Happy viewing!") +def predict_next_move( + audio_name: str = "mBR0", + model_name: str = "model", + chore_chunk_name: str = "gBR_sFM_cAll_d04_mBR0_ch01", + chunk_id: int = 0, + perc_cut: float = PERCENTAGE_CUT, + backup: bool = False, +): + def build_next_input(first_frames, prediction): + prediction_frame_size = int((CHUNK_SIZE * perc_cut) * 60) + first_frames = first_frames.reshape( + -1, int((CHUNK_SIZE * (1 - perc_cut)) * 60), 17, 2 + ) + prediction = prediction.reshape(-1, prediction_frame_size, 17, 2) + + print(first_frames.shape) + print(prediction.shape) + return concatenate( + (first_frames[0, prediction_frame_size:, :, :], prediction[0, :, :, :]), + axis=0, + ) + + # model_path = MODEL_OUTPUT_DIRECTORY / (model_name + ".pkl") + # model = load_pickle_object(model_path) + + model_path = ( + MODEL_OUTPUT_DIRECTORY + / model_name + / f"{model_name}{'_backup' if backup else ''}.keras" + ) + model = load_model(model_path) + + audio_filepath = ( + GENERATED_AUDIO_DIRECTORY + / audio_name + / str(CHUNK_SIZE) + / (f"{audio_name}_{chunk_id}.mp3") + ) + audio = music_to_numpy(audio_filepath) + + # cut input, take 8 sec + print("Before cut", audio.shape) + audio, _ = cut_percentage(audio, perc_cut) + print("After cut", audio.shape) + print("min audio: ", audio.min()) + print("max audio: ", audio.max()) + # add beginning of chore + chore = load_pickle_object( + GENERATED_KEYPOINTS_DIRECTORY + / chore_chunk_name + / str(CHUNK_SIZE) + / (f"{chore_chunk_name}_{chunk_id}.pkl") + ) + input = chore["keypoints2d"] + + input = downscale_posture(input) + input = input.reshape(-1, 34) + input, _ = cut_percentage(input.reshape(-1, 34), perc_cut) + + print("min input: ", input.min()) + print("max input: ", input.max()) + + input = input.reshape(-1, int((CHUNK_SIZE * (1 - perc_cut)) * 60), 17, 2) + + print(input.shape) + all_output = input + + # Predict first frame + print("Prediction 0") + prediction = model.predict(input) + print("prediction shape", prediction.shape) + print("prediction min", prediction.min()) + print("prediction max", prediction.max()) + all_output = concatenate( + (all_output[0].reshape(-1, 17, 2), prediction.reshape(-1, 17, 2)), axis=0 + ) + + # Predict next frame + for i in range(4): + print(f"Prediction {i+1}") + input = build_next_input(input, prediction) + input = input.reshape(-1, int((CHUNK_SIZE * (1 - perc_cut)) * 60), 17, 2) + print(f"Next input shape {input.shape}") + prediction = model.predict(input) + all_output = concatenate((all_output, prediction.reshape(-1, 17, 2)), axis=0) + print("prediction shape", prediction.shape) + print("prediction min", prediction.min()) + print("prediction max", prediction.max()) + + print(prediction.shape) + prediction = upscale_posture_pred(posture_array_to_keypoints(prediction)) + all_output = upscale_posture_pred(posture_array_to_keypoints(all_output)) + + print("Prediction shape:", prediction.shape) + print("All shape:", all_output.shape) + prediction_name = ( + model_name.replace("_", "-") + "_sBM_cAll_d00_" + audio_name + "_ch01" + ) + output_name = model_name.replace("_", "-") + "_sBM_cAll_d00_" + audio_name + "_ch02" + + output = append(upscale_posture_pred(posture_array_to_keypoints(input)), prediction) + + chore = Choregraphy( + prediction_name, output.reshape(-1, 17, 2), np_ones(output.shape[0]) + ) + chore_all = Choregraphy( + output_name, all_output.reshape(-1, 17, 2), np_ones(all_output.shape[0]) + ) + # Save prediction to pkl + save_choregaphy_chunk(chore, PREDICTION_OUTPUT_DIRECTORY) + save_choregaphy_chunk(chore_all, PREDICTION_OUTPUT_DIRECTORY) + + # Create video + keypoints_video_audio_builder_from_choreography(chore, audio_name) + # keypoints_video_audio_builder_from_choreography(chore_all) + + print("Happy viewing!") + + if __name__ == "__main__": predict(audio_name="mBR2", chore_id="02") diff --git a/koregraph/api/preprocessing/chunks.py b/koregraph/api/preprocessing/chunks_api.py similarity index 93% rename from koregraph/api/preprocessing/chunks.py rename to koregraph/api/preprocessing/chunks_api.py index f4517bf..1d55760 100644 --- a/koregraph/api/preprocessing/chunks.py +++ b/koregraph/api/preprocessing/chunks_api.py @@ -24,10 +24,10 @@ def generate_chunk( choregraphy_name: str, chunk_size: int = CHUNK_SIZE, reload_music: bool = False ): # Clean previous chunks out if needed - chore_path = GENERATED_KEYPOINTS_DIRECTORY / choregraphy_name / chunk_size + chore_path = GENERATED_KEYPOINTS_DIRECTORY / choregraphy_name / str(chunk_size) reset_chunks(chore_path) _, _, _, _, music_name, _ = choregraphy_name.split("_") - music_path = GENERATED_AUDIO_DIRECTORY / music_name / chunk_size + music_path = GENERATED_AUDIO_DIRECTORY / music_name / str(chunk_size) reset_chunks(music_path, reload_music) # Get and save chunks diff --git a/koregraph/api/preprocessing/dataset_preprocessing.py b/koregraph/api/preprocessing/dataset_preprocessing.py index b26ce95..57569ce 100644 --- a/koregraph/api/preprocessing/dataset_preprocessing.py +++ b/koregraph/api/preprocessing/dataset_preprocessing.py @@ -16,6 +16,7 @@ AUDIO_DIRECTORY, GENERATED_AUDIO_SILENCE_DIRECTORY, GENERATED_PICKLE_DIRECTORY, + CHUNK_SIZE, ) from koregraph.models.aist_file import AISTFile from koregraph.models.choregraphy import Choregraphy @@ -29,6 +30,7 @@ posture_array_to_keypoints, upscale_posture_pred, ) +from koregraph.api.preprocessing.chunks_api import generate_chunk from koregraph.utils.controllers.musics import load_music, save_audio_chunk @@ -88,7 +90,10 @@ def generate_training_pickles( # Ensure X and y have the same length if len(train_choregraphy) != len(train_audio): - train_audio = train_audio[: len(train_choregraphy)] + if len(train_choregraphy) > len(train_audio): + train_choregraphy = train_choregraphy[: len(train_audio)] + else: + train_audio = train_audio[: len(train_choregraphy)] assert len(train_choregraphy) == len( train_audio @@ -106,6 +111,15 @@ def generate_training_pickles( return generated_pickles_path +def generate_all_chunks(): + for chore in ALL_ADVANCED_MOVE_NAMES: + try: + generate_chunk(chore.name, CHUNK_SIZE, False) + except Exception as e: + print(f"Error with {chore.name}: {e}") + continue + + if __name__ == "__main__": test_file = ALL_ADVANCED_MOVE_NAMES[0] diff --git a/koregraph/api/preprocessing/posture_preprocessing.py b/koregraph/api/preprocessing/posture_preprocessing.py index 5e9164c..fa3efe1 100644 --- a/koregraph/api/preprocessing/posture_preprocessing.py +++ b/koregraph/api/preprocessing/posture_preprocessing.py @@ -1,7 +1,7 @@ from pickle import load as load_pickle from typing import Tuple -from numpy import ndarray, nan_to_num, ones, zeros +from numpy import ndarray, nan_to_num, ones, zeros, asarray, isnan from koregraph.api.preprocessing.interpolation import add_transition from koregraph.config.params import ( @@ -174,6 +174,30 @@ def generate_and_export_choreography(posture_file_2): return final_array +def fill_forward(arr): + """ + Fill NaN values in the array with the previous row's values for each column. + + Parameters: + arr (numpy.ndarray): Input array with possible NaN values. + + Returns: + numpy.ndarray: Array with NaN values filled forward. + """ + arr = asarray(arr, dtype=float) + + for i in range(1, arr.shape[0]): + mask = isnan(arr[i, :]) + arr[i, mask] = arr[i - 1, mask] + + return arr + + +def cut_percentage(x: ndarray, perc: float) -> Tuple[ndarray, ndarray]: + idx = len(x) - int(len(x) * perc) + return x[:idx], x[idx:] + + if __name__ == "__main__": export_choregraphy_keypoints( Choregraphy( diff --git a/koregraph/api/training/chunks_workflow.py b/koregraph/api/training/chunks_workflow.py new file mode 100644 index 0000000..7d12aec --- /dev/null +++ b/koregraph/api/training/chunks_workflow.py @@ -0,0 +1,40 @@ +from numpy import expand_dims, float32, ndarray, isnan, any, isinf + +from koregraph.api.machine_learning.neural_network import initialize_model_chunks +from koregraph.api.machine_learning.load_dataset import ( + load_chunk_preprocess_dataset, +) +from koregraph.utils.controllers.pickles import save_object_pickle + + +def train_chunks_workflow(model_name: str = "model"): + + X, y = load_chunk_preprocess_dataset() + + y = y.astype(float32) + print("y has nan", isnan(y).any()) + print("X has nan", isnan(X).any()) + print("y has inf", isinf(y).any()) + print("X has inf", isinf(X).any()) + print("Y min", y.min()) + print("Y max", y.max()) + + print("Model X shape:", X.shape) + print("Model y shape:", y.shape) + + model = initialize_model_chunks(X, y) + + history = model.fit( + x=X, + y=y, + validation_split=0.2, + batch_size=16, + epochs=50, + ) + + save_object_pickle(model, model_name) + save_object_pickle(history, model_name + "_history") + + +if __name__ == "__main__": + train_chunks_workflow() diff --git a/koregraph/api/training/next_chunks_workflow.py b/koregraph/api/training/next_chunks_workflow.py new file mode 100644 index 0000000..dd7c853 --- /dev/null +++ b/koregraph/api/training/next_chunks_workflow.py @@ -0,0 +1,107 @@ +from numpy import expand_dims, float32, ndarray, isnan, any, isinf +from tensorflow.keras import Model +from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping + +from koregraph.api.machine_learning.neural_network import initialize_model_next_chunks +from koregraph.api.machine_learning.load_dataset import ( + load_next_chunks_preprocess_dataset as load_preprocess_dataset, +) +from koregraph.utils.controllers.pickles import save_object_pickle, load_pickle_object +from koregraph.config.params import ( + GENERATED_FEATURES_DIRECTORY, + CHUNK_SIZE, + PERCENTAGE_CUT, + WEIGHTS_BACKUP_DIRECTORY, + MODEL_OUTPUT_DIRECTORY, +) +from koregraph.api.machine_learning.callbacks import GCSCallback, HistorySaver + + +def train_workflow( + model_name: str = "model", + epochs: int = 16, + batch_size: int = 16, + dataset_size: float = 1.0, + backup_model: Model = None, + initial_epoch: int = 0, + patience: int = 20, + with_cloud: bool = False, +): + + X, y = load_preprocess_dataset(dataset_size=dataset_size) + # X = load_pickle_object(GENERATED_FEATURES_DIRECTORY / "x.pkl") + # y = load_pickle_object(GENERATED_FEATURES_DIRECTORY / "y.pkl") + y = y.astype(float32) + + print("y has nan", isnan(y).any()) + print("X has nan", isnan(X).any()) + print("y has inf", isinf(y).any()) + print("X has inf", isinf(X).any()) + print("Y min", y.min()) + print("Y max", y.max()) + + X = X.reshape(-1, int((CHUNK_SIZE * (1 - PERCENTAGE_CUT)) * 60), 17, 2) + y = y.reshape(-1, int(CHUNK_SIZE * PERCENTAGE_CUT * 60 * 17 * 2)) + + print("Model X shape:", X.shape) + print("Model y shape:", y.shape) + + model = initialize_model_next_chunks(X, y) if backup_model is None else backup_model + + model_backup_path = MODEL_OUTPUT_DIRECTORY / model_name + model_backup_path.mkdir(parents=True, exist_ok=True) + + model_callbacks = ( + [ + ModelCheckpoint( + WEIGHTS_BACKUP_DIRECTORY / f"{model_name}_backup.keras", + monitor="val_loss", + verbose=0, + save_best_only=False, + save_weights_only=False, + mode="auto", + save_freq="epoch", + initial_value_threshold=None, + ), + EarlyStopping( + monitor="val_loss", + patience=patience, + verbose=0, + restore_best_weights=True, + ), + ], + ) + model.summary() + + if with_cloud: + model_callbacks.append(GCSCallback(model_backup_path, "koregraph")) + + history = model.fit( + x=X, + y=y, + validation_split=0.2, + batch_size=batch_size, + epochs=epochs, + initial_epoch=initial_epoch, + callbacks=model_callbacks, + ) + + print("Exporting model locally") + (MODEL_OUTPUT_DIRECTORY / model_name).mkdir(exist_ok=True, parents=True) + model.save(MODEL_OUTPUT_DIRECTORY / model_name / f"{model_name}.keras") + save_object_pickle( + history, + model_name + "_history", + MODEL_OUTPUT_DIRECTORY / model_name / f"{model_name}_history.pkl", + ) + + if with_cloud: + print("Exporting model to google cloud storage") + GCSCallback(model_backup_path, "koregraph").upload_file_to_gcs( + MODEL_OUTPUT_DIRECTORY / model_name / f"{model_name}.keras", + f"generated/models/{model_name}/", + ) + + +if __name__ == "__main__": + train_workflow() diff --git a/koregraph/cli/chunk_command.py b/koregraph/cli/chunk_command.py index db757b9..5421a1e 100644 --- a/koregraph/cli/chunk_command.py +++ b/koregraph/cli/chunk_command.py @@ -2,7 +2,8 @@ from pathlib import Path from koregraph.config.params import CHUNK_SIZE -from koregraph.api.preprocessing.chunks import generate_chunk +from koregraph.api.preprocessing.chunks_api import generate_chunk +from koregraph.api.preprocessing.dataset_preprocessing import generate_all_chunks parser = ArgumentParser( "Koregraph chunk", @@ -10,14 +11,15 @@ ) parser.add_argument( - "-c", "--choregraphy", dest="choregraphy", required=True, help="Choregraphy name" + "-c", "--choregraphy", dest="choregraphy", required=False, help="Choregraphy name" ) +parser.add_argument("-a", "--all", dest="all", action="store_true") parser.add_argument( "-s", "--size", dest="chunk_size", - required=True, + required=False, help="Size of chunks in seconds", default=CHUNK_SIZE, ) @@ -34,6 +36,11 @@ def main(): arguments = parser.parse_args() + if arguments.all: + generate_all_chunks() + return + + assert arguments.choregraphy is not None choregraphy_name = Path(arguments.choregraphy).stem chunk_size = arguments.chunk_size reload_music = arguments.reload_music diff --git a/koregraph/cli/model.py b/koregraph/cli/model.py index 10432b3..8ec6879 100644 --- a/koregraph/cli/model.py +++ b/koregraph/cli/model.py @@ -8,8 +8,16 @@ from koregraph.utils.controllers.pickles import load_pickle_object from koregraph.api.training.train_workflow import train_workflow +from koregraph.api.training.chunks_workflow import train_chunks_workflow +from koregraph.api.training.next_chunks_workflow import ( + train_workflow as train_pred_next_workflow, +) from koregraph.api.environment.training_cloud import run_mlflow_pipeline -from koregraph.config.params import WEIGHTS_BACKUP_DIRECTORY +from koregraph.config.params import ( + WEIGHTS_BACKUP_DIRECTORY, + MODEL_OUTPUT_DIRECTORY, + AUDIO_DIRECTORY, +) from koregraph.utils.storage import init_file_storage @@ -27,6 +35,10 @@ default="model", ) +parser.add_argument("--chunks", dest="chunks", action="store_true") + +parser.add_argument("--next-chunks", dest="predict_next", action="store_true") + parser.add_argument( "-c", "--with-cloud", @@ -92,6 +104,8 @@ def main(): dataset_size = float(arguments.dataset_size) batch_size = int(arguments.batch_size) restore_backup = bool(arguments.restore_backup) + chunks = bool(arguments.chunks) + predict_next = bool(arguments.predict_next) epochs = int(arguments.epochs) patience = int(arguments.patience) @@ -124,16 +138,33 @@ def main(): if model is not None: print(f"Using backup for model {model_name} at epoch {initial_epoch}") - train_workflow( - model_name=model_name, - epochs=epochs, - batch_size=batch_size, - dataset_size=dataset_size, - backup_model=model, - initial_epoch=initial_epoch, - patience=patience, - with_cloud=with_cloud, - ) + if chunks: + print("Training with chunks") + train_chunks_workflow(model_name=model_name) + elif predict_next: + print("Training with chunks: predicting next X seconds") + train_pred_next_workflow( + model_name=model_name, + epochs=epochs, + batch_size=batch_size, + dataset_size=dataset_size, + backup_model=model, + initial_epoch=initial_epoch, + patience=patience, + with_cloud=with_cloud, + ) + else: + print("Running training locally") + train_workflow( + model_name=model_name, + epochs=epochs, + batch_size=batch_size, + dataset_size=dataset_size, + backup_model=model, + initial_epoch=initial_epoch, + patience=patience, + with_cloud=with_cloud, + ) if __name__ == "__main__": diff --git a/koregraph/cli/predict.py b/koregraph/cli/predict.py index 579de14..afdafa4 100644 --- a/koregraph/cli/predict.py +++ b/koregraph/cli/predict.py @@ -1,6 +1,9 @@ from argparse import ArgumentParser -from koregraph.api.machine_learning.prediction_workflow import predict as predict_api +from koregraph.api.machine_learning.prediction_workflow import ( + predict as predict_api, + predict_next_move, +) from koregraph.utils.cloud.cloud_bucket import download_model_history_from_bucket parser = ArgumentParser( @@ -20,6 +23,10 @@ default="model", ) +parser.add_argument( + "-c", "--choregraphy", dest="choregraphy", required=False, help="Choregraphy name" +) + parser.add_argument( "--from-cloud", dest="from_cloud", @@ -36,6 +43,14 @@ help="When passed, will use the backup model.", ) +parser.add_argument( + "-i", "--chunk-id", dest="chunk_id", required=False, help="Choregraphy name" +) + +parser.add_argument("--chunks", dest="is_chunks", action="store_true") + +parser.add_argument("--predict-next", dest="predict_next", action="store_true") + def main(): arguments = parser.parse_args() @@ -53,7 +68,26 @@ def main(): if backup: print("Using backup model") - predict_api(audio_name=audio_name, model_name=model_name, backup=backup) + is_chunks = arguments.is_chunks + choregraphy = arguments.choregraphy + chunk_id = arguments.chunk_id + + if is_chunks: + predict_api(audio_name=audio_name, model_name=model_name, chunk=is_chunks) + elif arguments.predict_next: + assert choregraphy is not None + assert chunk_id is not None + predict_next_move( + audio_name=audio_name, + model_name=model_name, + chore_chunk_name=choregraphy, + chunk_id=chunk_id, + backup=backup, + ) + else: + predict_api( + audio_name=audio_name, model_name=model_name, backup=backup, chunk=is_chunks + ) if __name__ == "__main__": diff --git a/koregraph/config/params.py b/koregraph/config/params.py index 29137a9..642abe1 100644 --- a/koregraph/config/params.py +++ b/koregraph/config/params.py @@ -66,6 +66,11 @@ "GENERATED_PICKLE_DIRECTORY", GENERATED_OUTPUT_PATH / "data", Path ) +GENERATED_FEATURES_DIRECTORY: Path = get_env_or_default( + "GENERATED_FEATURES_DIRECTORY", GENERATED_OUTPUT_PATH / "features", Path +) +GENERATED_FEATURES_DIRECTORY.mkdir(parents=True, exist_ok=True) + GENERATED_LOSS_DIRECTORY: Path = get_env_or_default( "GENERATED_LOSS_DIRECTORY", GENERATED_OUTPUT_PATH / "loss/", Path ) @@ -122,9 +127,11 @@ LAST_CHUNK_TYPE_STRATEGY = LAST_CHUNK_TYPE.ROLLING -CHUNK_SIZE: int = get_env_or_default("CHUNK_SIZE", 10, int) +CHUNK_SIZE: int = get_env_or_default("CHUNK_SIZE", 5, int) FRAME_FORMAT = get_env_or_default("FRAME_FORMAT", (1920, 1080), tuple) X_MIN = -80 X_MAX = 0 + +PERCENTAGE_CUT = 0.05