From 28486455eb5bea4abda7e8711d950330ba83d7ca Mon Sep 17 00:00:00 2001 From: Juan C Date: Wed, 10 Feb 2021 12:28:01 -0500 Subject: [PATCH 01/43] Enable export all single cells --- deepprofiler/__main__.py | 8 ++++++-- deepprofiler/dataset/sampling.py | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py index f1959559..db4bda15 100644 --- a/deepprofiler/__main__.py +++ b/deepprofiler/__main__.py @@ -142,12 +142,16 @@ def prepare(context): # Second tool: Sample single cells for training @cli.command() +@click.option("--mode", default="sample") @click.pass_context -def sample_sc(context): +def sample_sc(context, mode): if context.parent.obj["config"]["prepare"]["compression"]["implement"]: context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"] dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"]) - deepprofiler.dataset.sampling.sample_dataset(context.obj["config"], dset) + if mode == "sample": + deepprofiler.dataset.sampling.sample_dataset(context.obj["config"], dset) + elif mode == "export_all": + deepprofiler.dataset.sampling.export_dataset(context.obj["config"], dset) print("Single-cell sampling complete.") diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py index 010100c5..83a2b1ec 100644 --- a/deepprofiler/dataset/sampling.py +++ b/deepprofiler/dataset/sampling.py @@ -15,6 +15,7 @@ class SingleCellSampler(deepprofiler.imaging.cropping.CropGenerator): def start(self, session): + self.all_metadata = [] self.session = session # Define input data batches with tf.variable_scope("train_inputs"): @@ -48,6 +49,18 @@ def process_batch(self, batch): output = self.session.run(self.input_variables["labeled_crops"], feed_dict) return output[0], metadata.reset_index(drop=True) + def export_single_cells(self, key, image_array, meta): + outdir = self.config["paths"]["single_cell_sample"] + key = self.dset.keyGen(meta) + batch = {"keys": [key], "images": [image_array], "targets": [], "locations": []} + batch["locations"].append(deepprofiler.imaging.boxes.get_locations(key, self.config)) + batch["targets"].append([t.get_values(meta) for t in self.dset.targets]) + crops, metadata = self.process_batch(batch) + for j in range(crops.shape[0]): + image = deepprofiler.imaging.cropping.unfold_channels(crops[j,:,:,:]) + skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image) + self.all_metadata.append(metadata) + print("{}: {} single cells".format(key, crops.shape[0])) def start_session(): configuration = tf.ConfigProto() @@ -112,3 +125,17 @@ def sample_dataset(config, dset): all_metadata = pd.concat(all_metadata).reset_index(drop=True) all_metadata.to_csv(os.path.join(outdir, "sc-metadata.csv"), index=False) +def export_dataset(config, dset): + outdir = config["paths"]["single_cell_sample"] + if not is_directory_empty(outdir): + return + + session = start_session() + cropper = SingleCellSampler(config, dset) + cropper.start(session) + dset.scan(cropper.export_single_cells, frame="all") + df = pd.concat(cropper.all_metadata).reset_index(drop=True) + df.to_csv(outdir, os.path.join("sc-metadata.csv"), index=False) + print("Exporting: done") + + From ee7bf7f04a04096e477653f5874515716b030839 Mon Sep 17 00:00:00 2001 From: Juan C Date: Thu, 11 Feb 2021 10:55:14 -0500 Subject: [PATCH 02/43] Fix csv output dir --- deepprofiler/dataset/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py index 83a2b1ec..d710ae6d 100644 --- a/deepprofiler/dataset/sampling.py +++ b/deepprofiler/dataset/sampling.py @@ -135,7 +135,7 @@ def export_dataset(config, dset): cropper.start(session) dset.scan(cropper.export_single_cells, frame="all") df = pd.concat(cropper.all_metadata).reset_index(drop=True) - df.to_csv(outdir, os.path.join("sc-metadata.csv"), index=False) + df.to_csv(os.path.join(outdir, "sc-metadata.csv"), index=False) print("Exporting: done") From c01485a89037b5485e839b523530bb431fe04aa4 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Mon, 21 Jun 2021 19:05:28 +0200 Subject: [PATCH 03/43] Fixed the issue that targets could not be obtained for crop generator. --- deepprofiler/dataset/image_dataset.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/deepprofiler/dataset/image_dataset.py b/deepprofiler/dataset/image_dataset.py index 5f99a85c..f641dc8d 100644 --- a/deepprofiler/dataset/image_dataset.py +++ b/deepprofiler/dataset/image_dataset.py @@ -230,16 +230,17 @@ def read_dataset(config, mode = 'train'): ) # Add training targets - if mode == 'train': - for t in config["train"]["partition"]["targets"]: - new_target = deepprofiler.dataset.target.MetadataColumnTarget(t, metadata.data[t].unique()) - dset.add_target(new_target) + + for t in config["train"]["partition"]["targets"]: + new_target = deepprofiler.dataset.target.MetadataColumnTarget(t, metadata.data[t].unique()) + dset.add_target(new_target) # Activate outlines for masking if needed if config["dataset"]["locations"]["mask_objects"]: dset.outlines = outlines - dset.prepare_training_locations() + if mode == 'train': + dset.prepare_training_locations() return dset From f1361c33ab4ff5b0a47671660f4aee09f11284f1 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Tue, 27 Jul 2021 10:59:02 -0400 Subject: [PATCH 04/43] Update folder structure --- deepprofiler/dataset/sampling.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py index 47227c80..9122da69 100644 --- a/deepprofiler/dataset/sampling.py +++ b/deepprofiler/dataset/sampling.py @@ -25,19 +25,20 @@ def process_batch(self, batch): batch["locations"][i]["Target"] = batch["targets"][i][0] batch["locations"][i]["Class_Name"] = self.dset.targets[0].values[batch["targets"][i][0]] metadata = pd.concat(batch["locations"]) - cols = ["Key","Target","Nuclei_Location_Center_X","Nuclei_Location_Center_Y"] - seps = ["+","@","x",".png"] - metadata["Image_Name"] = "" + cols = ["Key", "Target", "Nuclei_Location_Center_X", "Nuclei_Location_Center_Y"] + seps = ["/", "@", "x", ".png"] + metadata["Image_Name"] = '' for c in range(len(cols)): - metadata["Image_Name"] += metadata[cols[c]].astype(str).str.replace("/","-") + seps[c] - + metadata["Image_Name"] += metadata[cols[c]].astype(str) + seps[c] + print(metadata["Image_Name"]) + boxes, box_ind, targets, masks = deepprofiler.imaging.boxes.prepare_boxes(batch, self.config) feed_dict = { - self.input_variables["image_ph"]:batch["images"], - self.input_variables["boxes_ph"]:boxes, - self.input_variables["box_ind_ph"]:box_ind, - self.input_variables["mask_ind_ph"]:masks + self.input_variables["image_ph"]: batch["images"], + self.input_variables["boxes_ph"]: boxes, + self.input_variables["box_ind_ph"]: box_ind, + self.input_variables["mask_ind_ph"]: masks } for i in range(len(targets)): tname = "target_" + str(i) @@ -99,7 +100,9 @@ def sample_dataset(config, dset): if len(batch["keys"]) > 0: crops, metadata = cropper.process_batch(batch) for j in range(crops.shape[0]): - image = deepprofiler.imaging.cropping.unfold_channels(crops[j,:,:,:]) + image = deepprofiler.imaging.cropping.unfold_channels(crops[j, :, :, :]) + plate, well_site, rest = metadata.loc[j, "Image_Name"].split('/') + os.makedirs(os.path.join(outdir, plate, well_site), exist_ok=True) skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image) all_metadata.append(metadata) From 78e0018069a4b45163af3647f47a4d852004a9d1 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Tue, 27 Jul 2021 13:21:47 -0400 Subject: [PATCH 05/43] Add missing imports --- setup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 0a31a5d8..075bfb74 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,10 @@ "scikit-learn>=0.19.0", "scipy>=1.1", "comet-ml>=3.1.6", - "tensorflow_addons" + "efficientnet>=1.0.0", + "Keras==2.2.5", + "tensorflow_addons", + ], extras_require={ "test": [ @@ -37,4 +40,4 @@ "codecov>=2.0" ] } -) +) \ No newline at end of file From f65fe7d4ad7d5e1c823fa7848519a3f57dbbfcd2 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Tue, 27 Jul 2021 13:30:04 -0400 Subject: [PATCH 06/43] Add missing imports --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 075bfb74..050bcc3f 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,6 @@ "efficientnet>=1.0.0", "Keras==2.2.5", "tensorflow_addons", - ], extras_require={ "test": [ From 29db8a0922ad799e5a00e96c1048821d997dad10 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Fri, 6 Aug 2021 16:22:49 -0400 Subject: [PATCH 07/43] Change sampling to folder structure --- deepprofiler/dataset/sampling.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py index 9122da69..a2a8d068 100644 --- a/deepprofiler/dataset/sampling.py +++ b/deepprofiler/dataset/sampling.py @@ -3,6 +3,7 @@ import threading import tqdm import os +import shutil import tensorflow as tf @@ -21,16 +22,15 @@ def start(self, session): def process_batch(self, batch): for i in range(len(batch["keys"])): - batch["locations"][i]["Key"] = batch["keys"][i] + batch["locations"][i]["Key"] = batch["keys"][i].replace('-', '/') batch["locations"][i]["Target"] = batch["targets"][i][0] batch["locations"][i]["Class_Name"] = self.dset.targets[0].values[batch["targets"][i][0]] metadata = pd.concat(batch["locations"]) cols = ["Key", "Target", "Nuclei_Location_Center_X", "Nuclei_Location_Center_Y"] seps = ["/", "@", "x", ".png"] - metadata["Image_Name"] = '' + metadata["Image_Name"] = "" for c in range(len(cols)): - metadata["Image_Name"] += metadata[cols[c]].astype(str) + seps[c] - print(metadata["Image_Name"]) + metadata["Image_Name"] += metadata[cols[c]].astype(str).str + seps[c] boxes, box_ind, targets, masks = deepprofiler.imaging.boxes.prepare_boxes(batch, self.config) @@ -70,8 +70,7 @@ def is_directory_empty(outdir): return False elif erase == "y": print("Removing previous sampled files") - for f in tqdm.tqdm(files): - os.remove(os.path.join(outdir, f)) + shutil.rmtree(outdir) return True @@ -100,9 +99,9 @@ def sample_dataset(config, dset): if len(batch["keys"]) > 0: crops, metadata = cropper.process_batch(batch) for j in range(crops.shape[0]): + plate, well, site, name = metadata.loc[j, "Image_Name"].split('/') + os.makedirs(os.path.join(outdir, plate, well, site), exist_ok=True) image = deepprofiler.imaging.cropping.unfold_channels(crops[j, :, :, :]) - plate, well_site, rest = metadata.loc[j, "Image_Name"].split('/') - os.makedirs(os.path.join(outdir, plate, well_site), exist_ok=True) skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image) all_metadata.append(metadata) From 40aac088e5e2b81c7b835d15c37057ef07d696ad Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Fri, 6 Aug 2021 16:26:50 -0400 Subject: [PATCH 08/43] setup shouldnt change --- setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.py b/setup.py index 050bcc3f..08d9c96c 100644 --- a/setup.py +++ b/setup.py @@ -28,8 +28,6 @@ "scikit-learn>=0.19.0", "scipy>=1.1", "comet-ml>=3.1.6", - "efficientnet>=1.0.0", - "Keras==2.2.5", "tensorflow_addons", ], extras_require={ From 6ac33648f065ecd1c3cd0fab2fdba4a238eeee10 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Fri, 6 Aug 2021 16:28:00 -0400 Subject: [PATCH 09/43] setup shouldnt change --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 08d9c96c..6cebe025 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ "scikit-learn>=0.19.0", "scipy>=1.1", "comet-ml>=3.1.6", - "tensorflow_addons", + "tensorflow_addons" ], extras_require={ "test": [ From 7504699c0fec77911bac54ef373e81b178ce20eb Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Fri, 6 Aug 2021 16:28:50 -0400 Subject: [PATCH 10/43] setup shouldnt change --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6cebe025..0a31a5d8 100644 --- a/setup.py +++ b/setup.py @@ -37,4 +37,4 @@ "codecov>=2.0" ] } -) \ No newline at end of file +) From b389654b60408d5dd6f05577e918847564ce2e85 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Mon, 9 Aug 2021 16:39:52 -0400 Subject: [PATCH 11/43] Adapted profilng and tests --- deepprofiler/learning/profiling.py | 6 +++--- tests/deepprofiler/learning/test_profiling.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/deepprofiler/learning/profiling.py b/deepprofiler/learning/profiling.py index a76ae2de..80a20dcc 100644 --- a/deepprofiler/learning/profiling.py +++ b/deepprofiler/learning/profiling.py @@ -52,7 +52,7 @@ def configure(self): print("Extracting output from layer:", self.config["profile"]["feature_layer"]) def check(self, meta): - output_file = self.config["paths"]["features"] + "/{}/{}_{}.npz" + output_file = self.config["paths"]["features"] + "/{}/{}/{}.npz" output_file = output_file.format( meta["Metadata_Plate"], meta["Metadata_Well"], meta["Metadata_Site"]) # Check if features were computed before @@ -65,9 +65,9 @@ def check(self, meta): # Function to process a single image def extract_features(self, key, image_array, meta): # key is a placeholder start = tic() - output_file = self.config["paths"]["features"] + "/{}/{}_{}.npz" + output_file = self.config["paths"]["features"] + "/{}/{}/{}.npz" output_file = output_file.format( meta["Metadata_Plate"], meta["Metadata_Well"], meta["Metadata_Site"]) - os.makedirs(self.config["paths"]["features"] + "/{}".format(meta["Metadata_Plate"]), exist_ok=True) + os.makedirs(self.config["paths"]["features"] + "/{}/{}".format(meta["Metadata_Plate"], meta["Metadata_Well"]), exist_ok=True) batch_size = self.config["profile"]["batch_size"] image_key, image_names, outlines = self.dset.get_image_paths(meta) diff --git a/tests/deepprofiler/learning/test_profiling.py b/tests/deepprofiler/learning/test_profiling.py index f92ced5c..ad6fefd4 100644 --- a/tests/deepprofiler/learning/test_profiling.py +++ b/tests/deepprofiler/learning/test_profiling.py @@ -64,7 +64,7 @@ def test_extract_features(profile, metadata, locations, checkpoint): image = np.random.randint(0, 256, (128, 128, 3), dtype=np.uint8) profile.configure() profile.extract_features(None, image, meta) - output_file = profile.config["paths"]["features"] + "/{}/{}_{}.npz"\ + output_file = profile.config["paths"]["features"] + "/{}/{}/{}.npz"\ .format(meta["Metadata_Plate"], meta["Metadata_Well"], meta["Metadata_Site"]) assert os.path.isfile(output_file) @@ -73,6 +73,6 @@ def test_profile(config, dataset, data, locations, checkpoint): with tf.compat.v1.Session().as_default(): deepprofiler.learning.profiling.profile(config, dataset) for index, row in dataset.meta.data.iterrows(): - output_file = config["paths"]["features"] + "/{}/{}_{}.npz" \ + output_file = config["paths"]["features"] + "/{}/{}/{}.npz" \ .format(row["Metadata_Plate"], row["Metadata_Well"], row["Metadata_Site"]) assert os.path.isfile(output_file) From 1ac1d7d03e8b0aa360c1d88a9bebd330401bdaf2 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Wed, 11 Aug 2021 16:22:48 -0400 Subject: [PATCH 12/43] fix leftover string --- deepprofiler/dataset/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py index a2a8d068..849c8207 100644 --- a/deepprofiler/dataset/sampling.py +++ b/deepprofiler/dataset/sampling.py @@ -30,7 +30,7 @@ def process_batch(self, batch): seps = ["/", "@", "x", ".png"] metadata["Image_Name"] = "" for c in range(len(cols)): - metadata["Image_Name"] += metadata[cols[c]].astype(str).str + seps[c] + metadata["Image_Name"] += metadata[cols[c]].astype(str) + seps[c] boxes, box_ind, targets, masks = deepprofiler.imaging.boxes.prepare_boxes(batch, self.config) From e25758951656c75cd0e044cb0054e0aac909273a Mon Sep 17 00:00:00 2001 From: Juan C Date: Mon, 23 Aug 2021 19:00:15 -0400 Subject: [PATCH 13/43] Expanding data augmentations --- deepprofiler/imaging/augmentations.py | 76 ++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 6 deletions(-) diff --git a/deepprofiler/imaging/augmentations.py b/deepprofiler/imaging/augmentations.py index 8678ab5f..6478e6ce 100644 --- a/deepprofiler/imaging/augmentations.py +++ b/deepprofiler/imaging/augmentations.py @@ -1,6 +1,6 @@ import numpy as np import tensorflow as tf -import tensorflow_addons +import tensorflow_addons as tfa tf.compat.v1.disable_v2_behavior() @@ -9,8 +9,69 @@ # CROPPING AND TRANSFORMATION OPERATIONS ################################################# +def random_crop(image): + w,h,c = image.shape + + size = tf.random.uniform([1], minval=int(w.value*0.6), maxval=w, dtype=tf.int32) + crop = tf.image.random_crop(image, [size[0],size[0],c]) + + result = tf.image.resize( + tf.expand_dims(crop, 0), [w,h], method="bicubic" + ) + + return result[0,...] + + +def random_illumination(image): + # Make channels independent images + numchn = image.shape[-1].value + source = tf.transpose(image, [2, 1, 0]) + source = tf.expand_dims(source, -1) + source = tf.image.grayscale_to_rgb(source) + + # Apply illumination augmentations + bright = tf.random.uniform([numchn], minval=-0.2, maxval=0.2, dtype=tf.float32) + channels = [tf.image.adjust_brightness(source[s,...], bright[s]) for s in range(numchn)] + contrast = tf.random.uniform([numchn], minval=0.5, maxval=1.5, dtype=tf.float32) + channels = [tf.image.adjust_contrast(channels[s], contrast[s]) for s in range(numchn)] + result = tf.concat([tf.expand_dims(t, 0) for t in channels], axis=0) + + # Recover multi-channel image + result = tf.image.rgb_to_grayscale(result) + result = tf.transpose(result[:,:,:,0], [2, 1, 0]) + return result + + +def random_flips(image): + # Horizontal flips + augmented = tf.image.random_flip_left_right(image) + + # 90 degree rotations + angle = tf.random.uniform([1], minval=0, maxval=4, dtype=tf.int32) + augmented = tf.image.rot90(augmented, angle[0]) + + return augmented + +def random_rotate(image): + w, h, c = image.shape + image = tfa.image.rotate(image, np.pi / tf.random.uniform(shape=[], minval=1, maxval=10, dtype=tf.float32)) + image = tf.image.central_crop(image, 0.7) + return tf.image.resize(image, (w, h)) + + +def augment(image): + if tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32), tf.cast(0.5, tf.float32)): + augm = random_crop(image) + else: + augm = random_rotate(image) + + augm = random_illumination(augm) + augm = random_flips(augm) -def augment(crop): + return augm + + +def old_augment(crop): with tf.compat.v1.variable_scope("augmentation"): # Horizontal flips augmented = tf.image.random_flip_left_right(crop) @@ -39,21 +100,24 @@ def augment(crop): def augment_multiple(crops, parallel=None): - with tf.compat.v1.variable_scope("augmentation"): - return tf.map_fn(augment, crops, parallel_iterations=parallel, dtype=tf.float32) + print("+") + return tf.map_fn(augment, crops, parallel_iterations=parallel, dtype=tf.float32) ## A layer for GPU accelerated augmentations +#AugmentationLayer = tf.keras.layers.Lambda(augment_multiple) + class AugmentationLayer(tf.compat.v1.keras.layers.Layer): def __init__(self, **kwargs): + self.is_training = True super(AugmentationLayer, self).__init__(**kwargs) def build(self, input_shape): return - def call(self, input_tensor, training=False): - if training: + def call(self, input_tensor): + if self.is_training: return augment_multiple(input_tensor) else: return input_tensor From 29200796a6d457e2e8e6e096a3463ca7ac38e4db Mon Sep 17 00:00:00 2001 From: Juan C Date: Tue, 24 Aug 2021 17:51:45 -0400 Subject: [PATCH 14/43] Enhanced augmentations --- deepprofiler/imaging/augmentations.py | 33 +++------------------------ deepprofiler/imaging/cropping.py | 10 ++++---- 2 files changed, 8 insertions(+), 35 deletions(-) diff --git a/deepprofiler/imaging/augmentations.py b/deepprofiler/imaging/augmentations.py index 6478e6ce..6ab7f00d 100644 --- a/deepprofiler/imaging/augmentations.py +++ b/deepprofiler/imaging/augmentations.py @@ -1,6 +1,7 @@ import numpy as np import tensorflow as tf import tensorflow_addons as tfa +import sys tf.compat.v1.disable_v2_behavior() @@ -39,6 +40,7 @@ def random_illumination(image): # Recover multi-channel image result = tf.image.rgb_to_grayscale(result) result = tf.transpose(result[:,:,:,0], [2, 1, 0]) + result = result / tf.math.reduce_max(result) return result @@ -65,42 +67,13 @@ def augment(image): else: augm = random_rotate(image) - augm = random_illumination(augm) augm = random_flips(augm) + augm = random_illumination(augm) return augm -def old_augment(crop): - with tf.compat.v1.variable_scope("augmentation"): - # Horizontal flips - augmented = tf.image.random_flip_left_right(crop) - - # 90 degree rotations - angle = tf.compat.v1.random_uniform([1], minval=0, maxval=4, dtype=tf.int32) - augmented = tf.image.rot90(augmented, angle[0]) - - # 5 degree inclinations - angle = tf.compat.v1.random_normal([1], mean=0.0, stddev=0.03 * np.pi, dtype=tf.float32) - augmented = tensorflow_addons.image.rotate(augmented, angle[0], interpolation="BILINEAR") - - # Translations (3% movement in x and y) - offsets = tf.compat.v1.random_normal([2], - mean=0, - stddev=int(crop.shape[0].value * 0.03) - ) - augmented = tensorflow_addons.image.translate(augmented, translations=offsets) - - # Illumination changes (10% changes in intensity) - illum_s = tf.compat.v1.random_normal([1], mean=1.0, stddev=0.1, dtype=tf.float32) - illum_t = tf.compat.v1.random_normal([1], mean=0.0, stddev=0.1, dtype=tf.float32) - augmented = augmented * illum_s + illum_t - - return augmented - - def augment_multiple(crops, parallel=None): - print("+") return tf.map_fn(augment, crops, parallel_iterations=parallel, dtype=tf.float32) diff --git a/deepprofiler/imaging/cropping.py b/deepprofiler/imaging/cropping.py index c042914a..ddb32d64 100644 --- a/deepprofiler/imaging/cropping.py +++ b/deepprofiler/imaging/cropping.py @@ -43,11 +43,11 @@ def fold_channels(crop): # Expected input image shape: (h, w * c), with h = w # Output image shape: (h, w, c), with h = w output = np.reshape(crop, (crop.shape[0], crop.shape[0], -1), order="F").astype(np.float) - for i in range(output.shape[-1]): - mean = np.mean(output[:, :, i]) - std = np.std(output[:, :, i]) - output[:, :, i] = (output[:, :, i] - mean) / std - return output + #for i in range(output.shape[-1]): + # mean = np.mean(output[:, :, i]) + # std = np.std(output[:, :, i]) + # output[:, :, i] = (output[:, :, i] - mean) / std + return output / 255. # TODO: implement abstract crop generator From 38fbf43e5fbf0d2f66e4a10800b66afe9bd354c1 Mon Sep 17 00:00:00 2001 From: Juan C Date: Tue, 24 Aug 2021 17:52:22 -0400 Subject: [PATCH 15/43] Implemented average class precision metric --- deepprofiler/learning/training.py | 4 ++++ plugins/metrics/average_class_precision.py | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 plugins/metrics/average_class_precision.py diff --git a/deepprofiler/learning/training.py b/deepprofiler/learning/training.py index 290580c2..29f1df44 100644 --- a/deepprofiler/learning/training.py +++ b/deepprofiler/learning/training.py @@ -1,3 +1,4 @@ +import tensorflow as tf import importlib ################################################# @@ -8,6 +9,7 @@ def learn_model(config, dset, epoch=1, seed=None, verbose=1): model_module = importlib.import_module("plugins.models.{}".format(config["train"]["model"]["name"])) crop_module = importlib.import_module("plugins.crop_generators.{}".format(config["train"]["model"]["crop_generator"])) + config["num_classes"] = len(dset.training_images["Target"].unique()) if "metrics" in config["train"]["model"].keys(): if type(config["train"]["model"]["metrics"]) not in [list, dict]: raise ValueError("Metrics should be a list or dictionary.") @@ -29,6 +31,8 @@ def learn_model(config, dset, epoch=1, seed=None, verbose=1): for k, v in config["train"]["model"]["metrics"].items()} else: metrics = ["accuracy"] + + importlib.invalidate_caches() crop_generator = crop_module.GeneratorClass diff --git a/plugins/metrics/average_class_precision.py b/plugins/metrics/average_class_precision.py new file mode 100644 index 00000000..71fb675d --- /dev/null +++ b/plugins/metrics/average_class_precision.py @@ -0,0 +1,19 @@ +import tensorflow as tf +from deepprofiler.learning.metric import Metric + +tf.compat.v1.disable_v2_behavior() + +class MetricClass(Metric): + + def create_metric(self): + def metric_func(y_true, y_pred): + return self.metric(y_true, y_pred) + metric_func.__name__ = "average_class_precision" + self.f = metric_func + + def metric(self, y_true, y_pred): + result = 0 + self.single_class_prec = [tf.keras.metrics.Precision(class_id=cls) for cls in range(self.config["num_classes"])] + for cls_prec in self.single_class_prec: + result += cls_prec(y_true, y_pred) + return result / len(self.single_class_prec) From 306fc182d7d159143d4c558e022e2987d77b0bfe Mon Sep 17 00:00:00 2001 From: Juan C Date: Sat, 28 Aug 2021 13:11:13 -0400 Subject: [PATCH 16/43] Removing object state (not needed) --- plugins/metrics/average_class_precision.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/metrics/average_class_precision.py b/plugins/metrics/average_class_precision.py index 71fb675d..b4c40117 100644 --- a/plugins/metrics/average_class_precision.py +++ b/plugins/metrics/average_class_precision.py @@ -13,7 +13,7 @@ def metric_func(y_true, y_pred): def metric(self, y_true, y_pred): result = 0 - self.single_class_prec = [tf.keras.metrics.Precision(class_id=cls) for cls in range(self.config["num_classes"])] - for cls_prec in self.single_class_prec: + single_class_prec = [tf.keras.metrics.Precision(class_id=cls) for cls in range(self.config["num_classes"])] + for cls_prec in single_class_prec: result += cls_prec(y_true, y_pred) - return result / len(self.single_class_prec) + return result / len(single_class_prec) From 43f0223cf9b33a44fe7c82400e5e6c50b1ae6b09 Mon Sep 17 00:00:00 2001 From: Nikita Moshkov Date: Sat, 28 Aug 2021 16:12:08 -0400 Subject: [PATCH 17/43] Fall back to external implementation of EffNet + first layer replication --- plugins/models/efficientnet.py | 37 +++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py index 44ad732c..aea98a5f 100644 --- a/plugins/models/efficientnet.py +++ b/plugins/models/efficientnet.py @@ -1,4 +1,6 @@ import tensorflow as tf +import numpy +import efficientnet.tfkeras as efn from deepprofiler.learning.model import DeepProfilerModel from deepprofiler.imaging.augmentations import AugmentationLayer @@ -14,14 +16,14 @@ def __init__(self, config, dset, generator, val_generator, is_training): ## Define supported models def get_supported_models(self): return { - 0: tf.compat.v1.keras.applications.EfficientNetB0, - 1: tf.compat.v1.keras.applications.EfficientNetB1, - 2: tf.compat.v1.keras.applications.EfficientNetB2, - 3: tf.compat.v1.keras.applications.EfficientNetB3, - 4: tf.compat.v1.keras.applications.EfficientNetB4, - 5: tf.compat.v1.keras.applications.EfficientNetB5, - 6: tf.compat.v1.keras.applications.EfficientNetB6, - 7: tf.compat.v1.keras.applications.EfficientNetB7, + 0: efn.EfficientNetB0, + 1: efn.EfficientNetB1, + 2: efn.EfficientNetB2, + 3: efn.EfficientNetB3, + 4: efn.EfficientNetB4, + 5: efn.EfficientNetB5, + 6: efn.EfficientNetB6, + 7: efn.EfficientNetB7, } def get_model(self, config, input_image=None, weights=None, include_top=False): @@ -100,9 +102,24 @@ def copy_pretrained_weights(self): # => Transfer all weights except conv1.1 total_layers = len(base_model.layers) - for i in range(5, total_layers): + for i in range(2, total_layers): if len(base_model.layers[i].weights) > 0: print("Setting pre-trained weights: {:.2f}%".format((i / total_layers) * 100), end="\r") self.feature_model.layers[i + lshift].set_weights(base_model.layers[i].get_weights()) - + + # => Replicate filters of first layer as needed + weights = base_model.layers[1].get_weights() + available_channels = weights[0].shape[2] + target_shape = self.feature_model.layers[1 + lshift].weights[0].shape + new_weights = numpy.zeros(target_shape) + + for i in range(new_weights.shape[2]): + j = i % available_channels + new_weights[:,:,i,:] = weights[0][:,:,j,:] + + weights_array = [new_weights] + if len(weights) > 1: + weights_array += weights[1:] + + self.feature_model.layers[1 + lshift].set_weights(weights_array) print("Network initialized with pretrained ImageNet weights") From 65154c4a92eff26e7965d9e8003f2784bb5ab3ae Mon Sep 17 00:00:00 2001 From: John Arevalo Date: Tue, 24 Aug 2021 18:59:32 -0400 Subject: [PATCH 18/43] Add tqdm and tensorflow as dependencies --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0a31a5d8..548a240c 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,9 @@ "scikit-learn>=0.19.0", "scipy>=1.1", "comet-ml>=3.1.6", - "tensorflow_addons" + "tensorflow==2.5.*", + "tensorflow_addons", + "tqdm>=4.62" ], extras_require={ "test": [ From c91b9d821a37d90583d19d209be2e53fe3f08d8d Mon Sep 17 00:00:00 2001 From: John Arevalo Date: Wed, 25 Aug 2021 18:16:39 -0400 Subject: [PATCH 19/43] Rename pretrain models with _name property --- deepprofiler/learning/profiling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepprofiler/learning/profiling.py b/deepprofiler/learning/profiling.py index ef1ddfad..33849334 100644 --- a/deepprofiler/learning/profiling.py +++ b/deepprofiler/learning/profiling.py @@ -41,7 +41,7 @@ def configure(self): self.dpmodel.feature_model.load_weights(checkpoint) except ValueError: print("Loading weights without classifier (different number of classes)") - self.dpmodel.feature_model.layers[-1].name = "classifier" + self.dpmodel.feature_model.layers[-1]._name = "classifier" self.dpmodel.feature_model.load_weights(checkpoint, by_name=True) self.dpmodel.feature_model.summary() From 66f20370ff121b0b5fb7c3b061aaa38237458df9 Mon Sep 17 00:00:00 2001 From: Juan C Date: Tue, 14 Sep 2021 15:45:00 -0400 Subject: [PATCH 20/43] Various training improvements --- deepprofiler/imaging/augmentations.py | 6 +-- deepprofiler/imaging/cropping.py | 9 ++-- deepprofiler/learning/model.py | 1 + .../crop_generators/sampled_crop_generator.py | 53 +++++++++---------- plugins/models/efficientnet.py | 4 +- 5 files changed, 39 insertions(+), 34 deletions(-) diff --git a/deepprofiler/imaging/augmentations.py b/deepprofiler/imaging/augmentations.py index 6ab7f00d..55187492 100644 --- a/deepprofiler/imaging/augmentations.py +++ b/deepprofiler/imaging/augmentations.py @@ -31,16 +31,16 @@ def random_illumination(image): source = tf.image.grayscale_to_rgb(source) # Apply illumination augmentations - bright = tf.random.uniform([numchn], minval=-0.2, maxval=0.2, dtype=tf.float32) + bright = tf.random.uniform([numchn], minval=-0.1, maxval=0.1, dtype=tf.float32) channels = [tf.image.adjust_brightness(source[s,...], bright[s]) for s in range(numchn)] - contrast = tf.random.uniform([numchn], minval=0.5, maxval=1.5, dtype=tf.float32) + contrast = tf.random.uniform([numchn], minval=0.8, maxval=1.2, dtype=tf.float32) channels = [tf.image.adjust_contrast(channels[s], contrast[s]) for s in range(numchn)] result = tf.concat([tf.expand_dims(t, 0) for t in channels], axis=0) # Recover multi-channel image result = tf.image.rgb_to_grayscale(result) result = tf.transpose(result[:,:,:,0], [2, 1, 0]) - result = result / tf.math.reduce_max(result) + #result = result / tf.math.reduce_max(result) return result diff --git a/deepprofiler/imaging/cropping.py b/deepprofiler/imaging/cropping.py index ddb32d64..f106e694 100644 --- a/deepprofiler/imaging/cropping.py +++ b/deepprofiler/imaging/cropping.py @@ -22,9 +22,12 @@ def crop_graph(image_ph, boxes_ph, box_ind_ph, mask_ind_ph, box_size, mask_boxes mask_values = tf.ones_like(crops[:, :, :, -1], dtype=tf.float32) * tf.cast(mask_ind, dtype=tf.float32) masks = tf.compat.v1.to_float(tf.equal(crops[:, :, :, -1], mask_values)) crops = crops[:, :, :, 0:-1] * tf.expand_dims(masks, -1) - mean = tf.math.reduce_mean(crops, axis=[1, 2], keepdims=True) - std = tf.math.reduce_std(crops, axis=[1, 2], keepdims=True) - crops = (crops - mean)/std + #mean = tf.math.reduce_mean(crops, axis=[1, 2], keepdims=True) + #std = tf.math.reduce_std(crops, axis=[1, 2], keepdims=True) + #crops = (crops - mean)/std + mini = tf.math.reduce_min(crops, axis=[1, 2], keepdims=True) + maxi = tf.math.reduce_max(crops, axis=[1, 2], keepdims=True) + crops = (crops - mini) / maxi return crops diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py index 0890bee4..df2f4535 100644 --- a/deepprofiler/learning/model.py +++ b/deepprofiler/learning/model.py @@ -62,6 +62,7 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1): # Get training parameters epochs, steps, schedule_epochs, schedule_lr, freq = setup_params(self, experiment) + steps = self.train_crop_generator.expected_steps # Load weights self.load_weights(epoch) diff --git a/plugins/crop_generators/sampled_crop_generator.py b/plugins/crop_generators/sampled_crop_generator.py index 174b04d1..60f05e09 100644 --- a/plugins/crop_generators/sampled_crop_generator.py +++ b/plugins/crop_generators/sampled_crop_generator.py @@ -28,21 +28,32 @@ def __init__(self, config, dset): def start(self, session): - self.samples = pd.read_csv(os.path.join(self.directory, "sc-metadata.csv")) - self.samples = self.samples.sample(frac=1.0).reset_index(drop=True) + self.all_cells = pd.read_csv(os.path.join(self.directory, "sc_metadata.csv")) + #self.samples = self.samples.sample(frac=1.0).reset_index(drop=True) + self.balanced_sample() + self.expected_steps = self.samples.shape[0] / self.batch_size self.num_classes = len(self.samples["Target"].unique()) - ''' - self.generator = self.datagen.flow_from_dataframe( - dataframe=samples, - x_col="Image_Name", - y_col="Class_Name", - class_mode="categorical", - directory=self.directory, - color_mode="grayscale", - target_size=(self.box_size, self.box_size * self.num_channels), - batch_size=self.config["train"]["model"]["params"]["batch_size"] - ) - ''' + + + def balanced_sample(self): + # Obtain distribution of single cells per class + #df = self.all_cells[self.all_cells.Training_Status_Alpha == "Training"].sample(frac=1.0).reset_index(drop=True) + df = self.all_cells[self.all_cells.Next_Training_Status == "Training"].sample(frac=1.0).reset_index(drop=True) + + counts = df.groupby("Class_Name").count().reset_index()[["Class_Name", "Key"]] + sample_size = int(counts.Key.median()) + counts = {r.Class_Name: r.Key for k,r in counts.iterrows()} + + # Sample the same number of cells per class + class_samples = [] + for cls in df.Class_Name.unique(): + class_samples.append(df[df.Class_Name == cls].sample(n=sample_size, replace=counts[cls] < sample_size)) + self.samples = pd.concat(class_samples) + + # Randomize order + self.samples = self.samples.sample(frac=1.0).reset_index(drop=True) + print(" >> Shuffling training sample with",len(self.samples),"examples") + def generate(self, sess, global_step=0): pointer = 0 @@ -52,7 +63,7 @@ def generate(self, sess, global_step=0): y = [] for i in range(self.batch_size): if pointer >= len(self.samples): - self.samples = self.samples.sample(frac=1.0).reset_index(drop=True) + self.balanced_sample() pointer = 0 filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"]) im = skimage.io.imread(filename).astype(np.float32) @@ -64,18 +75,6 @@ def generate(self, sess, global_step=0): # break - def generate_old(self, sess, global_step=0): - while True: - try: - x_, y = next(self.generator) - x = np.zeros([x_.shape[0], self.box_size, self.box_size, self.num_channels]) - for i in range(x_.shape[0]): - x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(x_[i]) - yield (x, y) #tf.keras.utils.to_categorical(y, num_classes=self.num_classes)) - except: - break - - def stop(self, session): session.close() return diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py index aea98a5f..aa4bb08a 100644 --- a/plugins/models/efficientnet.py +++ b/plugins/models/efficientnet.py @@ -53,7 +53,9 @@ def define_model(self, config, dset): optimizer = tf.compat.v1.keras.optimizers.SGD(lr=config["train"]["model"]["params"]["learning_rate"], momentum=0.9, nesterov=True) - loss_func = "categorical_crossentropy" + #loss_func = "categorical_crossentropy" + loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing=0.2) + if self.is_training is False and "use_pretrained_input_size" in config["profile"].keys(): input_tensor = tf.compat.v1.keras.layers.Input( (config["profile"]["use_pretrained_input_size"], config["profile"]["use_pretrained_input_size"], 3), From cdb628956b5d4231af91b57e85db4457faa750a8 Mon Sep 17 00:00:00 2001 From: Juan C Date: Sat, 18 Sep 2021 17:38:31 -0400 Subject: [PATCH 21/43] Train/val with pre-cropped cells --- deepprofiler/dataset/image_dataset.py | 4 +- deepprofiler/learning/model.py | 35 ++++--- deepprofiler/learning/training.py | 2 +- .../crop_generators/sampled_crop_generator.py | 98 +++++++++++++------ plugins/models/efficientnet.py | 7 +- 5 files changed, 88 insertions(+), 58 deletions(-) diff --git a/deepprofiler/dataset/image_dataset.py b/deepprofiler/dataset/image_dataset.py index f6bc2710..761f6432 100644 --- a/deepprofiler/dataset/image_dataset.py +++ b/deepprofiler/dataset/image_dataset.py @@ -238,8 +238,8 @@ def read_dataset(config, mode = 'train'): if config["dataset"]["locations"]["mask_objects"]: dset.outlines = outlines - if mode == 'train': - dset.prepare_training_locations() + #if mode == 'train': + # dset.prepare_training_locations() return dset diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py index df2f4535..6ad4b4ed 100644 --- a/deepprofiler/learning/model.py +++ b/deepprofiler/learning/model.py @@ -28,7 +28,7 @@ def __init__(self, config, dset, crop_generator, val_crop_generator, is_training self.config = config self.dset = dset self.train_crop_generator = crop_generator(config, dset) - self.val_crop_generator = val_crop_generator(config, dset) + self.val_crop_generator = crop_generator(config, dset, mode="Validation") #val_crop_generator(config, dset) self.random_seed = None self.is_training = is_training @@ -61,7 +61,7 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1): self.train_crop_generator.start(main_session) # Get training parameters - epochs, steps, schedule_epochs, schedule_lr, freq = setup_params(self, experiment) + epochs, schedule_epochs, schedule_lr, freq = setup_params(self, experiment) steps = self.train_crop_generator.expected_steps # Load weights @@ -72,7 +72,7 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1): # Train model self.feature_model.fit_generator( - generator=self.train_crop_generator.generate(main_session), + generator=self.train_crop_generator.generator(main_session), steps_per_epoch=steps, epochs=epochs, callbacks=callbacks, @@ -137,11 +137,17 @@ def start_main_session(): def load_validation_data(dpmodel, session): dpmodel.val_crop_generator.start(session) - x_validation, y_validation = deepprofiler.learning.validation.load_validation_data( - dpmodel.config, - dpmodel.dset, - dpmodel.val_crop_generator, - session) + x_validation = [] + y_validation = [] + + for batch in dpmodel.val_crop_generator.generate(): + x_validation.append(batch[0]) + y_validation.append(batch[1]) + + x_validation = np.concatenate(x_validation) + y_validation = np.concatenate(y_validation) + print("Validation data:", x_validation.shape, y_validation.shape) + return x_validation, y_validation @@ -166,12 +172,6 @@ def setup_callbacks(dpmodel, lr_schedule_epochs, lr_schedule_lr, dset, experimen csv_output = dpmodel.config["paths"]["logs"] + "/log.csv" callback_csv = tf.compat.v1.keras.callbacks.CSVLogger(filename=csv_output) - # Queue stats - qstats = tf.compat.v1.keras.callbacks.LambdaCallback( - on_train_begin=lambda logs: dset.show_setup(), - on_epoch_end=lambda epoch, logs: experiment.log_metrics(dset.show_stats()) if experiment else dset.show_stats() - ) - # Learning rate schedule def lr_schedule(epoch, lr): if epoch in lr_schedule_epochs: @@ -182,15 +182,14 @@ def lr_schedule(epoch, lr): # Collect all callbacks if lr_schedule_epochs: callback_lr_schedule = tf.compat.v1.keras.callbacks.LearningRateScheduler(lr_schedule, verbose=1) - callbacks = [callback_model_checkpoint, callback_csv, callback_lr_schedule, qstats] + callbacks = [callback_model_checkpoint, callback_csv, callback_lr_schedule] else: - callbacks = [callback_model_checkpoint, callback_csv, qstats] + callbacks = [callback_model_checkpoint, callback_csv] return callbacks def setup_params(dpmodel, experiment): epochs = dpmodel.config["train"]["model"]["epochs"] - steps = dpmodel.dset.steps_per_epoch lr_schedule_epochs = [] lr_schedule_lr = [] if 'comet_ml' in dpmodel.config["train"].keys(): @@ -219,7 +218,7 @@ def setup_params(dpmodel, experiment): else: freq = 1 - return epochs, steps, lr_schedule_epochs, lr_schedule_lr, freq + return epochs, lr_schedule_epochs, lr_schedule_lr, freq def close(dpmodel, crop_session): diff --git a/deepprofiler/learning/training.py b/deepprofiler/learning/training.py index 29f1df44..db186664 100644 --- a/deepprofiler/learning/training.py +++ b/deepprofiler/learning/training.py @@ -9,7 +9,7 @@ def learn_model(config, dset, epoch=1, seed=None, verbose=1): model_module = importlib.import_module("plugins.models.{}".format(config["train"]["model"]["name"])) crop_module = importlib.import_module("plugins.crop_generators.{}".format(config["train"]["model"]["crop_generator"])) - config["num_classes"] = len(dset.training_images["Target"].unique()) + #config["num_classes"] = len(dset.training_images["Target"].unique()) if "metrics" in config["train"]["model"].keys(): if type(config["train"]["model"]["metrics"]) not in [list, dict]: raise ValueError("Metrics should be a list or dictionary.") diff --git a/plugins/crop_generators/sampled_crop_generator.py b/plugins/crop_generators/sampled_crop_generator.py index 60f05e09..1792c5e9 100644 --- a/plugins/crop_generators/sampled_crop_generator.py +++ b/plugins/crop_generators/sampled_crop_generator.py @@ -3,6 +3,7 @@ import pandas as pd import skimage.io import tensorflow as tf +import tqdm import deepprofiler.imaging.cropping @@ -18,66 +19,99 @@ class GeneratorClass(deepprofiler.imaging.cropping.CropGenerator): - def __init__(self, config, dset): + def __init__(self, config, dset, mode="Training"): super(GeneratorClass, self).__init__(config, dset) #self.datagen = tf.keras.preprocessing.image.ImageDataGenerator() self.directory = config["paths"]["single_cell_sample"] self.num_channels = len(config["dataset"]["images"]["channels"]) self.box_size = self.config["dataset"]["locations"]["box_size"] self.batch_size = self.config["train"]["model"]["params"]["batch_size"] + self.mode = mode + # Load metadata + self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv")) + self.target = config["train"]["partition"]["targets"][0] - def start(self, session): - self.all_cells = pd.read_csv(os.path.join(self.directory, "sc_metadata.csv")) - #self.samples = self.samples.sample(frac=1.0).reset_index(drop=True) + # Index targets for one-hot encoded labels + self.split_data = self.all_cells[self.all_cells.Training_Status_TenGenes == self.mode].reset_index(drop=True) + self.classes = list(self.split_data[self.target].unique()) + self.num_classes = len(self.classes) + self.classes.sort() + self.classes = {self.classes[i]:i for i in range(self.num_classes)} + + # Identify targets and samples self.balanced_sample() - self.expected_steps = self.samples.shape[0] / self.batch_size - self.num_classes = len(self.samples["Target"].unique()) + self.expected_steps = (self.samples.shape[0] // self.batch_size) + int(self.samples.shape[0] % self.batch_size > 0) + # Report number of classes globally + self.config["num_classes"] = self.num_classes + print(" >> Number of classes:", self.num_classes) + + + def start(self, session): + #self.all_cells = pd.read_csv(os.path.join(self.directory, "sc_metadata.csv")) + #self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv")) + #self.samples = self.samples.sample(frac=1.0).reset_index(drop=True) + pass def balanced_sample(self): # Obtain distribution of single cells per class - #df = self.all_cells[self.all_cells.Training_Status_Alpha == "Training"].sample(frac=1.0).reset_index(drop=True) - df = self.all_cells[self.all_cells.Next_Training_Status == "Training"].sample(frac=1.0).reset_index(drop=True) - - counts = df.groupby("Class_Name").count().reset_index()[["Class_Name", "Key"]] + counts = self.split_data.groupby("Class_Name").count().reset_index()[["Class_Name", "Key"]] sample_size = int(counts.Key.median()) counts = {r.Class_Name: r.Key for k,r in counts.iterrows()} # Sample the same number of cells per class class_samples = [] - for cls in df.Class_Name.unique(): - class_samples.append(df[df.Class_Name == cls].sample(n=sample_size, replace=counts[cls] < sample_size)) + for cls in self.split_data.Class_Name.unique(): + class_samples.append(self.split_data[self.split_data.Class_Name == cls].sample(n=sample_size, replace=counts[cls] < sample_size)) self.samples = pd.concat(class_samples) # Randomize order - self.samples = self.samples.sample(frac=1.0).reset_index(drop=True) - print(" >> Shuffling training sample with",len(self.samples),"examples") + if self.mode == "Training": + print(" >> Shuffling training sample with",len(self.samples),"examples") + self.samples = self.samples.sample(frac=1.0).reset_index() + else: + self.samples = self.samples.sample(frac=0.1).reset_index() + print(self.samples[self.target].value_counts()) - def generate(self, sess, global_step=0): + def generator(self, sess, global_step=0): pointer = 0 while True: - #try: - x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels]) - y = [] - for i in range(self.batch_size): - if pointer >= len(self.samples): - self.balanced_sample() - pointer = 0 - filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"]) - im = skimage.io.imread(filename).astype(np.float32) - x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im) - y.append(self.samples.loc[pointer, "Target"]) - pointer += 1 - yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes)) - #except: - # break + x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels]) + y = [] + for i in range(self.batch_size): + if pointer >= len(self.samples): + self.balanced_sample() + pointer = 0 + filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"]) + im = skimage.io.imread(filename).astype(np.float32) + x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im) + y.append(self.classes[self.samples.loc[pointer, self.target]]) + pointer += 1 + yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes)) + + + def generate(self): + pointer = 0 + for k in range(self.expected_steps): + x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels]) + y = [] + for i in range(self.batch_size): + if pointer >= len(self.samples): + break + filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"]) + im = skimage.io.imread(filename).astype(np.float32) + x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im) + y.append(self.classes[self.samples.loc[pointer, self.target]]) + pointer += 1 + if len(y) < x.shape[0]: + x = x[0:len(y),...] + yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes)) def stop(self, session): - session.close() - return + pass ## Reusing the Single Image Crop Generator. No changes needed diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py index aa4bb08a..91ae2a22 100644 --- a/plugins/models/efficientnet.py +++ b/plugins/models/efficientnet.py @@ -74,11 +74,8 @@ def define_model(self, config, dset): # 2. Create an output embedding for each target class_outputs = [] - i = 0 - for t in dset.targets: - y = tf.compat.v1.keras.layers.Dense(t.shape[1], activation="softmax", name=t.field_name)(features) - class_outputs.append(y) - i += 1 + y = tf.compat.v1.keras.layers.Dense(config["num_classes"], activation="softmax", name="ClassProb")(features) + class_outputs.append(y) # 4. Create and compile model model = tf.compat.v1.keras.models.Model(inputs=input_image, outputs=class_outputs) From f188826db66da355ca2ee0faec7fd830eefc4c6d Mon Sep 17 00:00:00 2001 From: Juan C Date: Sat, 18 Sep 2021 20:00:05 -0400 Subject: [PATCH 22/43] Added online soft labels --- deepprofiler/learning/model.py | 7 + .../crop_generators/online_labels_cropgen.py | 160 ++++++++++++++++++ plugins/models/efficientnet.py | 2 +- 3 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 plugins/crop_generators/online_labels_cropgen.py diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py index 6ad4b4ed..c72ee49a 100644 --- a/deepprofiler/learning/model.py +++ b/deepprofiler/learning/model.py @@ -185,6 +185,13 @@ def lr_schedule(epoch, lr): callbacks = [callback_model_checkpoint, callback_csv, callback_lr_schedule] else: callbacks = [callback_model_checkpoint, callback_csv] + + # Online labels callback + update_labels = tf.compat.v1.keras.callbacks.LambdaCallback( + on_epoch_end=lambda epoch, logs: dpmodel.train_crop_generator.update_online_labels(dpmodel.feature_model, epoch) + ) + callbacks.append(update_labels) + return callbacks diff --git a/plugins/crop_generators/online_labels_cropgen.py b/plugins/crop_generators/online_labels_cropgen.py new file mode 100644 index 00000000..c0f48fba --- /dev/null +++ b/plugins/crop_generators/online_labels_cropgen.py @@ -0,0 +1,160 @@ +import os +import numpy as np +import pandas as pd +import skimage.io +import tensorflow as tf +import tqdm + +import deepprofiler.imaging.cropping + +tf.compat.v1.disable_v2_behavior() + +## Wrapper for Keras ImageDataGenerator +## The Keras generator is not completely useful, because it makes assumptions about +## color (grayscale or RGB). We need flexibility for color channels, and augmentations +## tailored to multi-dimensional microscopy images. It's based on PIL rather than skimage. +## In addition, the samples loaded in this generator have unfolded channels, which +## requires us to fold them back to a tensor before feeding them to a CNN. + + +class GeneratorClass(deepprofiler.imaging.cropping.CropGenerator): + + def __init__(self, config, dset, mode="Training"): + super(GeneratorClass, self).__init__(config, dset) + #self.datagen = tf.keras.preprocessing.image.ImageDataGenerator() + self.directory = config["paths"]["single_cell_sample"] + self.num_channels = len(config["dataset"]["images"]["channels"]) + self.box_size = self.config["dataset"]["locations"]["box_size"] + self.batch_size = self.config["train"]["model"]["params"]["batch_size"] + self.mode = mode + + # Load metadata + self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv")) + self.target = config["train"]["partition"]["targets"][0] + + # Index targets for one-hot encoded labels + self.split_data = self.all_cells[self.all_cells.Training_Status_TenGenes == self.mode].reset_index(drop=True) + self.classes = list(self.split_data[self.target].unique()) + self.num_classes = len(self.classes) + self.classes.sort() + self.classes = {self.classes[i]:i for i in range(self.num_classes)} + + # Identify targets and samples + self.balanced_sample() + self.expected_steps = (self.samples.shape[0] // self.batch_size) + int(self.samples.shape[0] % self.batch_size > 0) + + # Report number of classes globally + self.config["num_classes"] = self.num_classes + print(" >> Number of classes:", self.num_classes) + + # Online labels + if self.mode == "Training": + self.init_online_labels() + + + def start(self, session): + pass + + def balanced_sample(self): + # Obtain distribution of single cells per class + counts = self.split_data.groupby("Class_Name").count().reset_index()[["Class_Name", "Key"]] + sample_size = int(counts.Key.median()) + counts = {r.Class_Name: r.Key for k,r in counts.iterrows()} + + # Sample the same number of cells per class + class_samples = [] + for cls in self.split_data.Class_Name.unique(): + class_samples.append(self.split_data[self.split_data.Class_Name == cls].sample(n=sample_size, replace=counts[cls] < sample_size)) + self.samples = pd.concat(class_samples) + + # Randomize order + if self.mode == "Training": + self.samples = self.samples.sample(frac=1.0).reset_index() + else: + self.samples = self.samples.sample(frac=0.1).reset_index() + print(self.samples[self.target].value_counts()) + + + def generator(self, sess, global_step=0): + pointer = 0 + while True: + x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels]) + y = [] + for i in range(self.batch_size): + if pointer >= len(self.samples): + self.balanced_sample() + pointer = 0 + filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"]) + im = skimage.io.imread(filename).astype(np.float32) + x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im) + y.append([self.soft_labels[self.samples.loc[pointer, "index"], :]]) + pointer += 1 + yield(x, np.concatenate(y, axis=0)) + + + def generate(self, source="samples"): + pointer = 0 + if source == "splits": + dataframe = self.split_data + steps = (len(self.split_data) // self.batch_size) + int(len(self.split_data) % self.batch_size > 0) + msg = "Predicting soft labels" + else: + dataframe = self.samples + steps = self.expected_steps + msg = "Loading validation data" + + for k in tqdm.tqdm(range(steps), desc=msg): + x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels]) + y = [] + for i in range(self.batch_size): + if pointer >= len(dataframe): + break + filename = os.path.join(self.directory, dataframe.loc[pointer, "Image_Name"]) + im = skimage.io.imread(filename).astype(np.float32) + x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im) + y.append(self.classes[dataframe.loc[pointer, self.target]]) + pointer += 1 + if len(y) < x.shape[0]: + x = x[0:len(y),...] + yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes)) + + + def init_online_labels(self): + LABEL_SMOOTHING = 0.2 + self.soft_labels = np.zeros((self.split_data.shape[0], self.num_classes)) + LABEL_SMOOTHING/self.num_classes + print("Soft labels:", self.soft_labels.shape) + for k,r in self.split_data.iterrows(): + label = self.classes[self.split_data.loc[k, self.target]] + self.soft_labels[k, label] += 1. - LABEL_SMOOTHING + print("Total labels:", np.sum(self.soft_labels)) + sl = pd.DataFrame(data=self.soft_labels) + sl.to_csv("soft_labels_0000.csv", index=False) + + + def update_online_labels(self, model, epoch): + # Prepare parameters and predictions + LAMBDA = 0.01 + predictions = [] + + # Get predictions with the model + model.get_layer("augmentation_layer").is_training = False + for batch in self.generate(source="splits"): + predictions.append( model.predict(batch[0]) ) + model.get_layer("augmentation_layer").is_training = True + + # Update soft labels + predictions = np.concatenate(predictions, axis=0) + self.soft_labels = (1 - LAMBDA)*self.soft_labels + LAMBDA*predictions + print(" >> Labels updated", predictions.shape) + + # Save labels for this epoch + sl = pd.DataFrame(data=self.soft_labels) + sl.to_csv("soft_labels_{:04d}.csv".format(epoch+1), index=False) + + + def stop(self, session): + pass + +## Reusing the Single Image Crop Generator. No changes needed + +SingleImageGeneratorClass = deepprofiler.imaging.cropping.SingleImageCropGenerator diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py index 91ae2a22..00030621 100644 --- a/plugins/models/efficientnet.py +++ b/plugins/models/efficientnet.py @@ -33,7 +33,7 @@ def get_model(self, config, input_image=None, weights=None, include_top=False): error_msg = str(num_layers) + " conv_blocks not in " + SM assert num_layers in supported_models.keys(), error_msg - if self.is_training and weights is None: + if self.is_training: #and weights is None: input_image = AugmentationLayer()(input_image) model = supported_models[num_layers]( From 33d8bbc8adcf01a3c556b430e41bc309c4c4c07c Mon Sep 17 00:00:00 2001 From: Juan C Date: Sat, 18 Sep 2021 20:11:47 -0400 Subject: [PATCH 23/43] Minimal augmentations --- deepprofiler/imaging/augmentations.py | 8 ++++---- plugins/models/efficientnet.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/deepprofiler/imaging/augmentations.py b/deepprofiler/imaging/augmentations.py index 55187492..e239489d 100644 --- a/deepprofiler/imaging/augmentations.py +++ b/deepprofiler/imaging/augmentations.py @@ -62,10 +62,10 @@ def random_rotate(image): def augment(image): - if tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32), tf.cast(0.5, tf.float32)): - augm = random_crop(image) - else: - augm = random_rotate(image) + #if tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32), tf.cast(0.5, tf.float32)): + # augm = random_crop(image) + #else: + # augm = random_rotate(image) augm = random_flips(augm) augm = random_illumination(augm) diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py index 00030621..91ae2a22 100644 --- a/plugins/models/efficientnet.py +++ b/plugins/models/efficientnet.py @@ -33,7 +33,7 @@ def get_model(self, config, input_image=None, weights=None, include_top=False): error_msg = str(num_layers) + " conv_blocks not in " + SM assert num_layers in supported_models.keys(), error_msg - if self.is_training: #and weights is None: + if self.is_training and weights is None: input_image = AugmentationLayer()(input_image) model = supported_models[num_layers]( From de261f52da4668dae7df24d1c91ee68bc2031051 Mon Sep 17 00:00:00 2001 From: Juan C Date: Sat, 18 Sep 2021 20:13:41 -0400 Subject: [PATCH 24/43] Fixed typo --- deepprofiler/imaging/augmentations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepprofiler/imaging/augmentations.py b/deepprofiler/imaging/augmentations.py index e239489d..b0430384 100644 --- a/deepprofiler/imaging/augmentations.py +++ b/deepprofiler/imaging/augmentations.py @@ -67,7 +67,7 @@ def augment(image): #else: # augm = random_rotate(image) - augm = random_flips(augm) + augm = random_flips(image) augm = random_illumination(augm) return augm From baee639a5fd89ccf9d1bef218400eb05afe74141 Mon Sep 17 00:00:00 2001 From: Juan C Date: Mon, 20 Sep 2021 17:23:09 -0400 Subject: [PATCH 25/43] Save soft labels --- .../crop_generators/online_labels_cropgen.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/plugins/crop_generators/online_labels_cropgen.py b/plugins/crop_generators/online_labels_cropgen.py index c0f48fba..711da5f4 100644 --- a/plugins/crop_generators/online_labels_cropgen.py +++ b/plugins/crop_generators/online_labels_cropgen.py @@ -30,10 +30,21 @@ def __init__(self, config, dset, mode="Training"): # Load metadata self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv")) + + ## UNCOMMENT FOR ALPHA SET + #self.all_cells.loc[(self.all_cells.Training_Status == "Unused") & self.all_cells.Metadata_Plate.isin([41756,41757]), "Training_Status_Alpha"] = "Validation" + + ## UNCOMMENT FOR SINGLE CELL BALANCED SET + self.all_cells.loc[self.all_cells.Training_Status == "Training", "Training_Status"] = "XXX" + self.all_cells.loc[self.all_cells.Training_Status == "SingleCellTraining", "Training_Status"] = "Training" + self.all_cells.loc[self.all_cells.Training_Status == "Validation", "Training_Status"] = "YYY" + self.all_cells.loc[self.all_cells.Training_Status == "SingleCellValidation", "Training_Status"] = "Validation" + self.target = config["train"]["partition"]["targets"][0] # Index targets for one-hot encoded labels - self.split_data = self.all_cells[self.all_cells.Training_Status_TenGenes == self.mode].reset_index(drop=True) + #self.split_data = self.all_cells[self.all_cells.Training_Status_TenGenes == self.mode].reset_index(drop=True) + self.split_data = self.all_cells[self.all_cells.Training_Status == self.mode].reset_index(drop=True) self.classes = list(self.split_data[self.target].unique()) self.num_classes = len(self.classes) self.classes.sort() @@ -49,6 +60,8 @@ def __init__(self, config, dset, mode="Training"): # Online labels if self.mode == "Training": + self.out_dir = config["paths"]["results"] + "soft_labels/" + os.makedirs(self.out_dir, exist_ok=True) self.init_online_labels() @@ -128,7 +141,7 @@ def init_online_labels(self): self.soft_labels[k, label] += 1. - LABEL_SMOOTHING print("Total labels:", np.sum(self.soft_labels)) sl = pd.DataFrame(data=self.soft_labels) - sl.to_csv("soft_labels_0000.csv", index=False) + sl.to_csv(self.out_dir + "0000.csv", index=False) def update_online_labels(self, model, epoch): @@ -149,7 +162,7 @@ def update_online_labels(self, model, epoch): # Save labels for this epoch sl = pd.DataFrame(data=self.soft_labels) - sl.to_csv("soft_labels_{:04d}.csv".format(epoch+1), index=False) + sl.to_csv(self.out_dir + "{:04d}.csv".format(epoch+1), index=False) def stop(self, session): From 7d015f06038980925edc5d8428d95516552a2f13 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Mon, 27 Sep 2021 16:15:28 +0200 Subject: [PATCH 26/43] Training strategy with TF2 dataset and augmentations --- deepprofiler/__main__.py | 9 + deepprofiler/learning/tf2train.py | 301 ++++++++++++++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100644 deepprofiler/learning/tf2train.py diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py index 70ef5b62..7d751b2d 100644 --- a/deepprofiler/__main__.py +++ b/deepprofiler/__main__.py @@ -14,6 +14,7 @@ import deepprofiler.dataset.image_dataset import deepprofiler.dataset.sampling import deepprofiler.learning.training +import deepprofiler.learning.tf2train import deepprofiler.learning.profiling import deepprofiler.download.normalize_bbbc021_metadata @@ -163,6 +164,14 @@ def train(context, epoch, seed): deepprofiler.learning.training.learn_model(context.obj["config"], dset, epoch, seed) +# Third tool (b): Train a network with TF dataset +@cli.command() +@click.option("--epoch", default=1) +@click.pass_context +def traintf2(context, epoch): + deepprofiler.learning.tf2train.learn_model(context.obj["config"], epoch) + + # Fourth tool: Profile cells and extract features @cli.command() @click.pass_context diff --git a/deepprofiler/learning/tf2train.py b/deepprofiler/learning/tf2train.py new file mode 100644 index 00000000..2350c200 --- /dev/null +++ b/deepprofiler/learning/tf2train.py @@ -0,0 +1,301 @@ +import comet_ml +import os +import numpy as np +import pandas as pd +import tensorflow as tf +import tensorflow_addons as tfa +import efficientnet.tfkeras as efn + +tf.compat.v1.enable_v2_behavior() +tf.config.run_functions_eagerly(True) + +AUTOTUNE = tf.data.AUTOTUNE + + +def make_dataset(path, batch_size, single_cell_metadata, config, is_training): + @tf.function + def fold_channels(crop): + assert tf.executing_eagerly() + crop = crop.numpy() + output = np.reshape(crop, (crop.shape[0], crop.shape[0], -1), order="F").astype(np.float32) + output = output / 255. + for i in range(output.shape[-1]): + mean = np.mean(output[:, :, i]) + std = np.std(output[:, :, i]) + output[:, :, i] = (output[:, :, i] - mean) / std + return tf.convert_to_tensor(output, dtype=tf.float32) + + def parse_image(filename): + image = tf.io.read_file(filename) + image = tf.image.decode_png(image, channels=0) + image = tf.py_function(func=fold_channels, inp=[image], Tout=tf.float32) + return image + + def configure_for_performance(ds, is_training): + + ds = ds.shuffle(buffer_size=323000) + if is_training: + ds = augment(ds) + ds = ds.batch(batch_size) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) + return ds + + def random_illumination(image): + # Make channels independent images + numchn = len(config["dataset"]["images"]["channels"]) + source = tf.transpose(image, [2, 1, 0]) + source = tf.expand_dims(source, -1) + source = tf.image.grayscale_to_rgb(source) + + # Apply illumination augmentations + bright = tf.random.uniform([numchn], minval=-0.4, maxval=0.4, dtype=tf.float32) + channels = [tf.image.adjust_brightness(source[s, ...], bright[s]) for s in range(numchn)] + contrast = tf.random.uniform([numchn], minval=0.6, maxval=1.4, dtype=tf.float32) + channels = [tf.image.adjust_contrast(channels[s], contrast[s]) for s in range(numchn)] + result = tf.concat([tf.expand_dims(t, 0) for t in channels], axis=0) + + # Recover multi-channel image + result = tf.image.rgb_to_grayscale(result) + result = tf.transpose(result[:, :, :, 0], [2, 1, 0]) + # result = result / tf.math.reduce_max(result) + + return result + + def random_flips(image): + augmented = tf.image.random_flip_left_right(image) + + # 90 degree rotations + angle = tf.random.uniform([1], minval=0, maxval=4, dtype=tf.int32) + augmented = tf.image.rot90(augmented, angle[0]) + return augmented + + def random_crop_or_rotate(image): + w, h, c = config["dataset"]["locations"]["box_size"], config["dataset"]["locations"]["box_size"], len( + config["dataset"]["images"]["channels"]) + if tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32), tf.cast(0.5, tf.float32)): + size = tf.random.uniform([1], minval=int(w * 0.8), maxval=w, dtype=tf.int32) + image = tf.image.random_crop(image, [size[0], size[0], c]) + return tf.image.resize(image, (w, h)) + else: + return image + + + def augment(ds): + ds = ds.map( + lambda image, label: (random_crop_or_rotate(image), label), num_parallel_calls=AUTOTUNE + ).map( + lambda image, label: (random_flips(image), label), num_parallel_calls=AUTOTUNE + ).map( + lambda image, label: (random_illumination(image), label), num_parallel_calls=AUTOTUNE + ) + return ds + + filenames = single_cell_metadata["Image_Name"].tolist() + for i in range(len(filenames)): + filenames[i] = os.path.join(path, filenames[i]) + + steps = np.math.ceil(len(filenames) / batch_size) + filenames_ds = tf.data.Dataset.from_tensor_slices(filenames) + images_ds = filenames_ds.map(parse_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) + labels = tf.keras.utils.to_categorical(single_cell_metadata["Categorical"]) + labels_ds = tf.data.Dataset.from_tensor_slices(labels) + ds = tf.data.Dataset.zip((images_ds, labels_ds)) + ds = configure_for_performance(ds, is_training) + return ds, steps + + +def setup_callbacks(config): + callbacks = [] + + # CSV Log + csv_output = config["paths"]["logs"] + "/log.csv" + callback_csv = tf.keras.callbacks.CSVLogger(filename=csv_output) + callbacks.append(callback_csv) + + # Checkpoints + output_file = config["paths"]["checkpoints"] + "/checkpoint_{epoch:04d}.hdf5" + period = 1 + save_best = False + if "checkpoint_policy" in config["train"]["model"] and isinstance( + config["train"]["model"]["checkpoint_policy"], int): + period = int(config["train"]["model"]["checkpoint_policy"]) + elif "checkpoint_policy" in config["train"]["model"] and config["train"]["model"]["checkpoint_policy"] == 'best': + save_best = True + + callback_model_checkpoint = tf.keras.callbacks.ModelCheckpoint( + filepath=output_file, + save_weights_only=True, + save_best_only=save_best, + period=period + ) + callbacks.append(callback_model_checkpoint) + epochs = config["train"]["model"]["epochs"] + + def lr_schedule(epoch, lr): + if lr_schedule_epochs and epoch in lr_schedule_epochs: + return lr_schedule_lr[lr_schedule_epochs.index(epoch)] + else: + return lr + + if "lr_schedule" in config["train"]["model"]: + if config["train"]["model"]["lr_schedule"] == "cosine": + lr_schedule_epochs = [x for x in range(epochs)] + init_lr = config["train"]["model"]["params"]["learning_rate"] + # Linear warm up + lr_schedule_lr = [init_lr / (5 - t) for t in range(5)] + # Cosine decay + lr_schedule_lr += [0.5 * (1 + np.cos((np.pi * t) / epochs)) * init_lr for t in range(5, epochs)] + callback_lr_schedule = tf.keras.callbacks.LearningRateScheduler(lr_schedule, verbose=1) + elif config["train"]["model"]["lr_schedule"] == "plateau": + callback_lr_schedule = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.0001) + config["train"]["validation"]["frequency"] = 1 + else: + assert len(config["train"]["model"]["lr_schedule"]["epoch"]) == \ + len(config["train"]["model"]["lr_schedule"]["lr"]), "Make sure that the length of " \ + "lr_schedule->epoch equals the length of " \ + "lr_schedule->lr in the config file." + + lr_schedule_epochs = config["train"]["model"]["lr_schedule"]["epoch"] + lr_schedule_lr = config["train"]["model"]["lr_schedule"]["lr"] + callback_lr_schedule = tf.keras.callbacks.LearningRateScheduler(lr_schedule, verbose=1) + + callbacks.append(callback_lr_schedule) + + return callbacks + + +def setup_comet_ml(config): + if 'comet_ml' in config["train"].keys(): + experiment = comet_ml.Experiment( + api_key=config["train"]["comet_ml"]["api_key"], + project_name=config["train"]["comet_ml"]["project_name"], + auto_param_logging=True, + auto_histogram_weight_logging=False, + auto_histogram_gradient_logging=False, + auto_histogram_activation_logging=False + ) + if config["experiment_name"] != "results": + experiment.set_name(config["experiment_name"]) + experiment.log_others(config) + else: + experiment = None + return experiment + + +def learn_model(config, epoch): + DENSE_KERNEL_INITIALIZER = { + 'class_name': 'VarianceScaling', + 'config': { + 'scale': 1. / 3., + 'mode': 'fan_out', + 'distribution': 'uniform' + } + } + + BATCH_SIZE = config["train"]["model"]["params"]["batch_size"] + strategy_lr = config["train"]["model"]["params"]["learning_rate"] + + experiment = setup_comet_ml(config) + + single_cell_metadata = pd.read_csv( + os.path.join(config["paths"]["single_cell_sample"], "expanded_sc_metadata_alpha.csv")) + single_cell_metadata = single_cell_metadata[ + ["Class_Name", "Image_Name", "Training_Status", "Training_Status_Alpha"]] + single_cell_metadata = single_cell_metadata[single_cell_metadata["Training_Status"] != "Unused"] + + num_classes = len(pd.unique(single_cell_metadata["Class_Name"])) + single_cell_metadata["Categorical"] = pd.Categorical(single_cell_metadata["Class_Name"]).codes + + path = config["paths"]["single_cell_sample"] + dataset, steps_per_epoch = make_dataset(path, BATCH_SIZE, + single_cell_metadata[ + single_cell_metadata["Training_Status_Alpha"] == "Training"], + config, + is_training=True) + validation_dataset, _ = make_dataset(path, BATCH_SIZE, + single_cell_metadata[ + single_cell_metadata["Training_Status_Alpha"] == "Validation"], + config, + is_training=False) + + input_shape = (config["dataset"]["locations"]["box_size"], config["dataset"]["locations"]["box_size"], + len(config["dataset"]["images"]["channels"])) + input_image = tf.keras.layers.Input(input_shape) + + model = efn.EfficientNetB0( + include_top=False, weights=None, input_tensor=input_image, + input_shape=input_shape + ) + features = tf.keras.layers.GlobalAveragePooling2D(name='avg_pool')(model.output) + y = tf.keras.layers.Dense(num_classes, activation='softmax', name='predictions', + kernel_initializer=DENSE_KERNEL_INITIALIZER)(features) + model = tf.keras.models.Model(inputs=input_image, outputs=y) + + regularizer = tf.keras.regularizers.l2(0.00001) + for layer in model.layers: + if hasattr(layer, "kernel_regularizer"): + setattr(layer, "kernel_regularizer", regularizer) + + model = tf.keras.models.model_from_json(model.to_json()) + optimizer = tf.keras.optimizers.SGD(learning_rate=strategy_lr) + loss_func = tf.keras.losses.CategoricalCrossentropy(from_logits=False) # , label_smoothing = 0.6) + + model.compile(optimizer, loss_func, + metrics=["accuracy", tfa.metrics.F1Score(num_classes=num_classes, average='macro'), + tf.keras.metrics.TopKCategoricalAccuracy(k=5), tf.keras.metrics.Precision()]) + + callbacks = setup_callbacks(config, strategy_lr) + + if epoch == 1 and config["train"]["model"]["initialization"] == "ImageNet": + base_model = efn.EfficientNetB0(weights='imagenet', include_top=False) + total_layers = len(base_model.layers) + for i in range(2, total_layers): + if len(base_model.layers[i].weights) > 0: + model.layers[i].set_weights(base_model.layers[i].get_weights()) + + # => Replicate filters of first layer as needed + + weights = base_model.layers[1].get_weights() + available_channels = weights[0].shape[2] + target_shape = model.layers[1].weights[0].shape + new_weights = np.zeros(target_shape) + + for i in range(new_weights.shape[2]): + j = i % available_channels + new_weights[:, :, i, :] = weights[0][:, :, j, :] + + weights_array = [new_weights] + if len(weights) > 1: + weights_array += weights[1:] + + model.layers[1].set_weights(weights_array) + print(model.layers[1].name, + np.array_equal(np.array(model.layers[1].get_weights()), np.array(base_model.layers[1].get_weights()))) + + print("Network initialized with pretrained ImageNet weights") + + elif epoch > 1: + output_file = config["paths"]["checkpoints"] + "/checkpoint_{epoch:04d}.hdf5" + previous_model = output_file.format(epoch=epoch - 1) + model.load_weights(previous_model) + + print(model.summary()) + if experiment: + with experiment.train(): + model.fit(dataset, + epochs=config["train"]["model"]["epochs"], + callbacks=callbacks, + verbose=1, + validation_data=validation_dataset, + validation_freq=config["train"]["validation"]["frequency"], + initial_epoch=epoch - 1 + ) + else: + model.fit(dataset, + epochs=config["train"]["model"]["epochs"], + callbacks=callbacks, + verbose=1, + validation_data=validation_dataset, + validation_freq=config["train"]["validation"]["frequency"], + initial_epoch=epoch - 1 + ) \ No newline at end of file From 858bc4c18d216b8a873627e7b26b45b5a055d9a3 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Mon, 27 Sep 2021 16:31:17 +0200 Subject: [PATCH 27/43] Add export of all single cells from TF1 branch --- deepprofiler/__main__.py | 12 ++++++++---- deepprofiler/dataset/sampling.py | 27 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py index 7d751b2d..fccf2d0d 100644 --- a/deepprofiler/__main__.py +++ b/deepprofiler/__main__.py @@ -143,12 +143,16 @@ def prepare(context): # Second tool: Sample single cells for training @cli.command() +@click.option("--mode", default="sample") @click.pass_context -def sample_sc(context): +def sample_sc(context, mode): if context.parent.obj["config"]["prepare"]["compression"]["implement"]: context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"] - dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='train') - deepprofiler.dataset.sampling.sample_dataset(context.obj["config"], dset) + dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"]) + if mode == "sample": + deepprofiler.dataset.sampling.sample_dataset(context.obj["config"], dset) + elif mode == "export_all": + deepprofiler.dataset.sampling.export_dataset(context.obj["config"], dset) print("Single-cell sampling complete.") @@ -176,7 +180,7 @@ def traintf2(context, epoch): @cli.command() @click.pass_context @click.option("--part", - help="Part of index to process", + help="Part of index to process", default=-1, type=click.INT) def profile(context, part): diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py index 47227c80..54f1e982 100644 --- a/deepprofiler/dataset/sampling.py +++ b/deepprofiler/dataset/sampling.py @@ -13,6 +13,7 @@ class SingleCellSampler(deepprofiler.imaging.cropping.CropGenerator): def start(self, session): + self.all_metadata = [] self.session = session # Define input data batches with tf.compat.v1.variable_scope("train_inputs"): @@ -46,6 +47,20 @@ def process_batch(self, batch): output = self.session.run(self.input_variables["labeled_crops"], feed_dict) return output[0], metadata.reset_index(drop=True) + def export_single_cells(self, key, image_array, meta): + outdir = self.config["paths"]["single_cell_sample"] + key = self.dset.keyGen(meta) + batch = {"keys": [key], "images": [image_array], "targets": [], "locations": []} + batch["locations"].append(deepprofiler.imaging.boxes.get_locations(key, self.config)) + batch["targets"].append([t.get_values(meta) for t in self.dset.targets]) + crops, metadata = self.process_batch(batch) + for j in range(crops.shape[0]): + image = deepprofiler.imaging.cropping.unfold_channels(crops[j,:,:,:]) + skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image) + + self.all_metadata.append(metadata) + print("{}: {} single cells".format(key, crops.shape[0])) + def start_session(): configuration = tf.compat.v1.ConfigProto() @@ -112,3 +127,15 @@ def sample_dataset(config, dset): all_metadata = pd.concat(all_metadata).reset_index(drop=True) all_metadata.to_csv(os.path.join(outdir, "sc-metadata.csv"), index=False) +def export_dataset(config, dset): + outdir = config["paths"]["single_cell_sample"] + if not is_directory_empty(outdir): + return + + session = start_session() + cropper = SingleCellSampler(config, dset) + cropper.start(session) + dset.scan(cropper.export_single_cells, frame="all") + df = pd.concat(cropper.all_metadata).reset_index(drop=True) + df.to_csv(os.path.join(outdir, "sc-metadata.csv"), index=False) + print("Exporting: done") \ No newline at end of file From 619171cb6151626c488e542d528189d6b4a07906 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Mon, 27 Sep 2021 16:40:03 +0200 Subject: [PATCH 28/43] Explicitly switch off eager mode --- deepprofiler/learning/profiling.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/deepprofiler/learning/profiling.py b/deepprofiler/learning/profiling.py index 33849334..2f525d18 100644 --- a/deepprofiler/learning/profiling.py +++ b/deepprofiler/learning/profiling.py @@ -3,12 +3,11 @@ import numpy as np import tensorflow as tf -from tensorflow.compat.v1.keras import backend as K from deepprofiler.dataset.utils import tic, toc tf.compat.v1.disable_v2_behavior() - +tf.config.run_functions_eagerly(False) class Profile(object): @@ -32,7 +31,7 @@ def __init__(self, config, dset): def configure(self): # Main session configuration - self.profile_crop_generator.start(K.get_session()) + self.profile_crop_generator.start(tf.compat.v1.keras.backend.get_session()) # Create feature extractor if self.config["profile"]["checkpoint"] != "None": @@ -72,7 +71,7 @@ def extract_features(self, key, image_array, meta): # key is a placeholder batch_size = self.config["profile"]["batch_size"] image_key, image_names, outlines = self.dset.get_image_paths(meta) crop_locations = self.profile_crop_generator.prepare_image( - K.get_session(), + tf.compat.v1.keras.backend.get_session(), image_array, meta, False @@ -84,7 +83,7 @@ def extract_features(self, key, image_array, meta): # key is a placeholder repeats = self.config["train"]["model"]["crop_generator"] == "repeat_channel_crop_generator" # Extract features - crops = next(self.profile_crop_generator.generate(K.get_session()))[0] # single image crop generator yields one batch + crops = next(self.profile_crop_generator.generate(tf.compat.v1.keras.backend.get_session()))[0] # single image crop generator yields one batch feats = self.feat_extractor.predict(crops, batch_size=batch_size) if repeats: feats = np.reshape(feats, (self.num_channels, total_crops, -1)) From e2ff13928520a345bfc47f7df33dad24c288ca65 Mon Sep 17 00:00:00 2001 From: Juan C Date: Mon, 27 Sep 2021 11:57:34 -0400 Subject: [PATCH 29/43] Individual channel crop geenerator added --- deepprofiler/learning/model.py | 11 +- deepprofiler/learning/profiling.py | 2 +- .../individual_channel_cropgen.py | 171 ++++++++++++++++++ 3 files changed, 178 insertions(+), 6 deletions(-) create mode 100644 plugins/crop_generators/individual_channel_cropgen.py diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py index c72ee49a..6e4a8ec9 100644 --- a/deepprofiler/learning/model.py +++ b/deepprofiler/learning/model.py @@ -187,11 +187,12 @@ def lr_schedule(epoch, lr): callbacks = [callback_model_checkpoint, callback_csv] # Online labels callback - update_labels = tf.compat.v1.keras.callbacks.LambdaCallback( - on_epoch_end=lambda epoch, logs: dpmodel.train_crop_generator.update_online_labels(dpmodel.feature_model, epoch) - ) - callbacks.append(update_labels) - + if dpmodel.config["train"]["model"]["crop_generator"] == "online_labels_cropgen": + update_labels = tf.compat.v1.keras.callbacks.LambdaCallback( + on_epoch_end=lambda epoch, logs: dpmodel.train_crop_generator.update_online_labels(dpmodel.feature_model, epoch) + ) + callbacks.append(update_labels) + return callbacks diff --git a/deepprofiler/learning/profiling.py b/deepprofiler/learning/profiling.py index ef1ddfad..558dac9a 100644 --- a/deepprofiler/learning/profiling.py +++ b/deepprofiler/learning/profiling.py @@ -81,7 +81,7 @@ def extract_features(self, key, image_array, meta): # key is a placeholder if total_crops == 0: print("No cells to profile:", output_file) return - repeats = self.config["train"]["model"]["crop_generator"] == "repeat_channel_crop_generator" + repeats = self.config["train"]["model"]["crop_generator"] in ["repeat_channel_crop_generator", "individual_channel_cropgen"] # Extract features crops = next(self.profile_crop_generator.generate(K.get_session()))[0] # single image crop generator yields one batch diff --git a/plugins/crop_generators/individual_channel_cropgen.py b/plugins/crop_generators/individual_channel_cropgen.py new file mode 100644 index 00000000..4795a30b --- /dev/null +++ b/plugins/crop_generators/individual_channel_cropgen.py @@ -0,0 +1,171 @@ +import os +import numpy as np +import pandas as pd +import skimage.io +import tensorflow as tf +import tqdm + +import deepprofiler.imaging.cropping + +tf.compat.v1.disable_v2_behavior() + +## Wrapper for Keras ImageDataGenerator +## The Keras generator is not completely useful, because it makes assumptions about +## color (grayscale or RGB). We need flexibility for color channels, and augmentations +## tailored to multi-dimensional microscopy images. It's based on PIL rather than skimage. +## In addition, the samples loaded in this generator have unfolded channels, which +## requires us to fold them back to a tensor before feeding them to a CNN. + + +class GeneratorClass(deepprofiler.imaging.cropping.CropGenerator): + + def __init__(self, config, dset, mode="Training"): + super(GeneratorClass, self).__init__(config, dset) + #self.datagen = tf.keras.preprocessing.image.ImageDataGenerator() + self.directory = config["paths"]["single_cell_sample"] + self.num_channels = len(config["dataset"]["images"]["channels"]) + self.box_size = self.config["dataset"]["locations"]["box_size"] + self.batch_size = self.config["train"]["model"]["params"]["batch_size"] + self.mode = mode + + # Load metadata + self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv")) + # ALPHA SET HACK: + self.all_cells.loc[(self.all_cells.Training_Status == "Unused") & self.all_cells.Metadata_Plate.isin([41756,41757]), "Training_Status_Alpha"] = "Validation" + self.target = config["train"]["partition"]["targets"][0] + + # Keep track of the real number of channels for internal object use + if mode == "Training": + self.config["real_channels"] = config["dataset"]["images"]["channels"] + else: + self.num_channels = len(self.config["real_channels"]) + + # Distribute channels in separate records in the reference index + self.split_data = self.all_cells[self.all_cells.Training_Status_Alpha == self.mode].reset_index(drop=True) + before = len(self.split_data) + channels_data = [self.split_data.copy() for k in range(self.num_channels)] + for k in range(self.num_channels): + channels_data[k]["Channel"] = k + self.split_data = pd.concat(channels_data, axis=0) + after = len(self.split_data) + print(" >> Records before separating channels:", before, ". After:", after) + + # Index targets for one-hot encoded labels + self.classes = list(self.split_data[self.target].unique()) + self.num_classes = len(self.classes) + self.classes.sort() + self.classes = {self.classes[i]:i for i in range(self.num_classes)} + + # Identify targets and samples + self.balanced_sample() + self.expected_steps = (self.samples.shape[0] // self.batch_size) + int(self.samples.shape[0] % self.batch_size > 0) + + # Report number of classes and channels globally + self.config["num_classes"] = self.num_classes + self.config["dataset"]["images"]["channels"] = ["Individual"] # Alter the number of channels for the rest of the program! + print(" >> Number of classes:", self.num_classes, ". Number of channels:", len(self.config["dataset"]["images"]["channels"])) + + + def start(self, session): + pass + + def balanced_sample(self): + # Obtain distribution of single cells per class + counts = self.split_data.groupby("Class_Name").count().reset_index()[["Class_Name", "Key"]] + sample_size = int(counts.Key.median()) + counts = {r.Class_Name: r.Key for k,r in counts.iterrows()} + + # Sample the same number of cells per class + class_samples = [] + for cls in self.split_data.Class_Name.unique(): + class_samples.append(self.split_data[self.split_data.Class_Name == cls].sample(n=sample_size, replace=counts[cls] < sample_size)) + self.samples = pd.concat(class_samples) + + # Randomize order + if self.mode == "Training": + print(" >> Shuffling training sample with",len(self.samples),"examples") + self.samples = self.samples.sample(frac=1.0).reset_index() + else: + self.samples = self.samples.sample(frac=0.005).reset_index() + print(self.samples[self.target].value_counts()) + + + def load_sample_image(self, pointer): + filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"]) + im = skimage.io.imread(filename).astype(np.float32) + channel = self.samples.loc[pointer, "Channel"] + folded = deepprofiler.imaging.cropping.fold_channels(im) + return folded[:,:,channel] + + + def generator(self, sess, global_step=0): + pointer = 0 + while True: + x = np.zeros([self.batch_size, self.box_size, self.box_size, 1]) + y = [] + for i in range(self.batch_size): + if pointer >= len(self.samples): + self.balanced_sample() + pointer = 0 + x[i,:,:,0] = self.load_sample_image(pointer) + y.append(self.classes[self.samples.loc[pointer, self.target]]) + pointer += 1 + yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes)) + + + def generate(self): + pointer = 0 + for k in range(self.expected_steps): + x = np.zeros([self.batch_size, self.box_size, self.box_size, 1]) + y = [] + for i in range(self.batch_size): + if pointer >= len(self.samples): + break + x[i,:,:,0] = self.load_sample_image(pointer) + y.append(self.classes[self.samples.loc[pointer, self.target]]) + pointer += 1 + if len(y) < x.shape[0]: + x = x[0:len(y),...] + yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes)) + + + def stop(self, session): + pass + +## Class for generating crops from single images with separated channels + +def separate_channels(crops, network_input_size): + #resized_crops = tf.compat.v1.image.resize_images(crops, size=(network_input_size, network_input_size)) + reordered_channels = tf.transpose(crops, [3, 0, 1, 2]) + reshaped_data = tf.reshape(reordered_channels, shape=[-1, network_input_size, network_input_size, 1]) + #rgb_data = tf.image.grayscale_to_rgb(reshaped_data) + # Transform pixels in the range [-1,1] required for InceptionResNetv2 + #crop_min = tf.reduce_min(rgb_data, axis=[1,2,3], keepdims=True) + #crop_max = tf.reduce_max(rgb_data, axis=[1,2,3], keepdims=True) + #norm_rgb = ((rgb_data - crop_min)/(crop_max - crop_min))*2.0 - 1.0 + #return norm_rgb + return reshaped_data + + +class SingleImageGeneratorClass(deepprofiler.imaging.cropping.SingleImageCropGenerator): + + def __init__(self, config, dset): + # Recover the real set of channels + config["dataset"]["images"]["channels"] = config["real_channels"] + + # Then initialize the crop generator + super().__init__(config, dset) + width = self.config["dataset"]["locations"]["box_size"] + height = width + channels = len(self.config["dataset"]["images"]["channels"]) + self.crop_ph = tf.compat.v1.placeholder(tf.float32, (None, width, height, channels)) + self.resized = separate_channels(self.crop_ph, width) + + def generate(self, session, global_step=0): + crops = session.run(self.resized, feed_dict={self.crop_ph:self.image_pool}) + labels = np.tile(self.label_pool, [3,1]) + + global_step += 1 + + yield crops, labels + From 1b6a17493e4c9070d4356aeb3bec0a036c891aeb Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Tue, 28 Sep 2021 11:29:17 +0200 Subject: [PATCH 30/43] Switches between crop-generators. --- deepprofiler/imaging/cropping.py | 1 + deepprofiler/learning/model.py | 39 ++++++++++++++----- .../crop_generators/sampled_crop_generator.py | 1 + 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/deepprofiler/imaging/cropping.py b/deepprofiler/imaging/cropping.py index f106e694..56ad81ee 100644 --- a/deepprofiler/imaging/cropping.py +++ b/deepprofiler/imaging/cropping.py @@ -11,6 +11,7 @@ import deepprofiler.imaging.boxes tf.compat.v1.disable_v2_behavior() +tf.config.run_functions_eagerly(False) def crop_graph(image_ph, boxes_ph, box_ind_ph, mask_ind_ph, box_size, mask_boxes=False): diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py index 6e4a8ec9..9c64fa6a 100644 --- a/deepprofiler/learning/model.py +++ b/deepprofiler/learning/model.py @@ -12,6 +12,7 @@ import deepprofiler.learning.validation tf.compat.v1.disable_v2_behavior() +tf.config.run_functions_eagerly(False) ################################################## # This class should be used as an abstract base @@ -28,7 +29,10 @@ def __init__(self, config, dset, crop_generator, val_crop_generator, is_training self.config = config self.dset = dset self.train_crop_generator = crop_generator(config, dset) - self.val_crop_generator = crop_generator(config, dset, mode="Validation") #val_crop_generator(config, dset) + if self.config['train']['model']['crop_generator'] == 'online_labels_cropgen': + self.val_crop_generator = crop_generator(config, dset, mode="Validation") + else: + self.val_crop_generator = val_crop_generator(config, dset) self.random_seed = None self.is_training = is_training @@ -62,7 +66,13 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1): # Get training parameters epochs, schedule_epochs, schedule_lr, freq = setup_params(self, experiment) - steps = self.train_crop_generator.expected_steps + if self.config['train']['model']['crop_generator'] == 'online_labels_cropgen': + steps = self.train_crop_generator.expected_steps + elif self.config['train']['model']['crop_generator'] == 'sampled_crop_generator': + steps = int((len(os.listdir(self.config['paths']['single_cell_sample'])) - 1) + / self.config["train"]["model"]["params"]["batch_size"]) + else: + steps = self.dset.steps_per_epoch # Load weights self.load_weights(epoch) @@ -137,15 +147,26 @@ def start_main_session(): def load_validation_data(dpmodel, session): dpmodel.val_crop_generator.start(session) - x_validation = [] - y_validation = [] - for batch in dpmodel.val_crop_generator.generate(): - x_validation.append(batch[0]) - y_validation.append(batch[1]) + if dpmodel.config['train']['model']['crop_generator'] == 'online_labels_cropgen': + x_validation = [] + y_validation = [] + + for batch in dpmodel.val_crop_generator.generate(): + x_validation.append(batch[0]) + y_validation.append(batch[1]) + + x_validation = np.concatenate(x_validation) + y_validation = np.concatenate(y_validation) + + else: + x_validation, y_validation = deepprofiler.learning.validation.load_validation_data( + dpmodel.config, + dpmodel.dset, + dpmodel.val_crop_generator, + session + ) - x_validation = np.concatenate(x_validation) - y_validation = np.concatenate(y_validation) print("Validation data:", x_validation.shape, y_validation.shape) return x_validation, y_validation diff --git a/plugins/crop_generators/sampled_crop_generator.py b/plugins/crop_generators/sampled_crop_generator.py index 1792c5e9..47350f0d 100644 --- a/plugins/crop_generators/sampled_crop_generator.py +++ b/plugins/crop_generators/sampled_crop_generator.py @@ -8,6 +8,7 @@ import deepprofiler.imaging.cropping tf.compat.v1.disable_v2_behavior() +tf.config.run_functions_eagerly(False) ## Wrapper for Keras ImageDataGenerator ## The Keras generator is not completely useful, because it makes assumptions about From 3c9d7eeb0b6e61092ff6b3269776cc2bbc2ea663 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Tue, 28 Sep 2021 18:05:27 +0200 Subject: [PATCH 31/43] Cleanup of sampling.py after merge --- deepprofiler/dataset/sampling.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py index e5eb664b..e3725b86 100644 --- a/deepprofiler/dataset/sampling.py +++ b/deepprofiler/dataset/sampling.py @@ -26,8 +26,8 @@ def process_batch(self, batch): batch["locations"][i]["Target"] = batch["targets"][i][0] batch["locations"][i]["Class_Name"] = self.dset.targets[0].values[batch["targets"][i][0]] metadata = pd.concat(batch["locations"]) - cols = ["Key","Target","Nuclei_Location_Center_X","Nuclei_Location_Center_Y"] - seps = ["+","@","x",".png"] + cols = ["Key", "Target", "Nuclei_Location_Center_X", "Nuclei_Location_Center_Y"] + seps = ["+", "@", "x", ".png"] metadata["Image_Name"] = "" for c in range(len(cols)): metadata["Image_Name"] += metadata[cols[c]].astype(str).str.replace("/","-") + seps[c] @@ -57,15 +57,10 @@ def export_single_cells(self, key, image_array, meta): for j in range(crops.shape[0]): image = deepprofiler.imaging.cropping.unfold_channels(crops[j,:,:,:]) skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image) -<<<<<<< HEAD self.all_metadata.append(metadata) print("{}: {} single cells".format(key, crops.shape[0])) -======= - self.all_metadata.append(metadata) - print("{}: {} single cells".format(key, crops.shape[0])) ->>>>>>> master def start_session(): configuration = tf.compat.v1.ConfigProto() @@ -119,7 +114,7 @@ def sample_dataset(config, dset): if len(batch["keys"]) > 0: crops, metadata = cropper.process_batch(batch) for j in range(crops.shape[0]): - image = deepprofiler.imaging.cropping.unfold_channels(crops[j,:,:,:]) + image = deepprofiler.imaging.cropping.unfold_channels(crops[j, :, :, :]) skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image) all_metadata.append(metadata) From 56aead75a12517b6f25b600b0e2561f41745ec02 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Wed, 29 Sep 2021 18:54:37 +0200 Subject: [PATCH 32/43] Online crop-generators naming. --- deepprofiler/learning/model.py | 2 +- .../crop_generators/online_labels_cropgen.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py index 9c64fa6a..c2b7714a 100644 --- a/deepprofiler/learning/model.py +++ b/deepprofiler/learning/model.py @@ -82,7 +82,7 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1): # Train model self.feature_model.fit_generator( - generator=self.train_crop_generator.generator(main_session), + generator=self.train_crop_generator.generate(main_session), steps_per_epoch=steps, epochs=epochs, callbacks=callbacks, diff --git a/plugins/crop_generators/online_labels_cropgen.py b/plugins/crop_generators/online_labels_cropgen.py index 711da5f4..4521136b 100644 --- a/plugins/crop_generators/online_labels_cropgen.py +++ b/plugins/crop_generators/online_labels_cropgen.py @@ -48,7 +48,7 @@ def __init__(self, config, dset, mode="Training"): self.classes = list(self.split_data[self.target].unique()) self.num_classes = len(self.classes) self.classes.sort() - self.classes = {self.classes[i]:i for i in range(self.num_classes)} + self.classes = {self.classes[i]: i for i in range(self.num_classes)} # Identify targets and samples self.balanced_sample() @@ -88,7 +88,7 @@ def balanced_sample(self): print(self.samples[self.target].value_counts()) - def generator(self, sess, global_step=0): + def generate(self, sess, global_step=0): pointer = 0 while True: x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels]) @@ -105,7 +105,7 @@ def generator(self, sess, global_step=0): yield(x, np.concatenate(y, axis=0)) - def generate(self, source="samples"): + def generator(self, source="samples"): pointer = 0 if source == "splits": dataframe = self.split_data @@ -124,19 +124,19 @@ def generate(self, source="samples"): break filename = os.path.join(self.directory, dataframe.loc[pointer, "Image_Name"]) im = skimage.io.imread(filename).astype(np.float32) - x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im) + x[i, :, :, :] = deepprofiler.imaging.cropping.fold_channels(im) y.append(self.classes[dataframe.loc[pointer, self.target]]) pointer += 1 if len(y) < x.shape[0]: - x = x[0:len(y),...] - yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes)) + x = x[0:len(y), ...] + yield(x, tf.keras.utils.to_categorical(y, num_classes = self.num_classes)) def init_online_labels(self): LABEL_SMOOTHING = 0.2 self.soft_labels = np.zeros((self.split_data.shape[0], self.num_classes)) + LABEL_SMOOTHING/self.num_classes print("Soft labels:", self.soft_labels.shape) - for k,r in self.split_data.iterrows(): + for k, r in self.split_data.iterrows(): label = self.classes[self.split_data.loc[k, self.target]] self.soft_labels[k, label] += 1. - LABEL_SMOOTHING print("Total labels:", np.sum(self.soft_labels)) @@ -151,8 +151,8 @@ def update_online_labels(self, model, epoch): # Get predictions with the model model.get_layer("augmentation_layer").is_training = False - for batch in self.generate(source="splits"): - predictions.append( model.predict(batch[0]) ) + for batch in self.generator(source = "splits"): + predictions.append(model.predict(batch[0])) model.get_layer("augmentation_layer").is_training = True # Update soft labels From 2eb7ecfacec8ef6bc7418dcbefa1def43b5d80a6 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Fri, 1 Oct 2021 15:14:58 +0200 Subject: [PATCH 33/43] Augmentation parameter for ResNet and EfficientNet models. --- plugins/models/efficientnet.py | 29 +++++++++++++++-------------- plugins/models/resnet.py | 24 ++++++++++++++---------- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py index 91ae2a22..770b49bc 100644 --- a/plugins/models/efficientnet.py +++ b/plugins/models/efficientnet.py @@ -5,7 +5,8 @@ from deepprofiler.learning.model import DeepProfilerModel from deepprofiler.imaging.augmentations import AugmentationLayer -tf.compat.v1.disable_v2_behavior() +#tf.compat.v1.disable_v2_behavior() +#tf.config.run_functions_eagerly(False) class ModelClass(DeepProfilerModel): @@ -33,7 +34,7 @@ def get_model(self, config, input_image=None, weights=None, include_top=False): error_msg = str(num_layers) + " conv_blocks not in " + SM assert num_layers in supported_models.keys(), error_msg - if self.is_training and weights is None: + if self.is_training and weights is None and self.config["train"]['model'].get('augmentations') is True: input_image = AugmentationLayer()(input_image) model = supported_models[num_layers]( @@ -53,8 +54,7 @@ def define_model(self, config, dset): optimizer = tf.compat.v1.keras.optimizers.SGD(lr=config["train"]["model"]["params"]["learning_rate"], momentum=0.9, nesterov=True) - #loss_func = "categorical_crossentropy" - loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing=0.2) + loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing=0.2) # TODO:parameterize? if self.is_training is False and "use_pretrained_input_size" in config["profile"].keys(): input_tensor = tf.compat.v1.keras.layers.Input( @@ -65,8 +65,7 @@ def define_model(self, config, dset): input_shape = ( config["dataset"]["locations"]["box_size"], # height config["dataset"]["locations"]["box_size"], # width - len(config["dataset"]["images"][ - "channels"]) # channels + len(config["dataset"]["images"]["channels"]) # channels ) input_image = tf.compat.v1.keras.layers.Input(input_shape) model = self.get_model(config, input_image=input_image) @@ -74,13 +73,12 @@ def define_model(self, config, dset): # 2. Create an output embedding for each target class_outputs = [] - y = tf.compat.v1.keras.layers.Dense(config["num_classes"], activation="softmax", name="ClassProb")(features) + y = tf.compat.v1.keras.layers.Dense(len(dset.targets[0].values), activation="softmax", name="ClassProb")(features) class_outputs.append(y) # 4. Create and compile model model = tf.compat.v1.keras.models.Model(inputs=input_image, outputs=class_outputs) - ## Added weight decay following tricks reported in: ## https://github.com/keras-team/keras/issues/2717 regularizer = tf.compat.v1.keras.regularizers.l2(0.00001) @@ -88,16 +86,19 @@ def define_model(self, config, dset): if hasattr(layer, "kernel_regularizer"): setattr(layer, "kernel_regularizer", regularizer) - model = tf.compat.v1.keras.models.model_from_json( - model.to_json(), - {'AugmentationLayer': AugmentationLayer} - ) + if self.config["train"]["model"].get("augmentations") is True: + model = tf.compat.v1.keras.models.model_from_json( + model.to_json(), + {'AugmentationLayer': AugmentationLayer} + ) + else: + model = tf.compat.v1.keras.models.model_from_json(model.to_json()) return model, optimizer, loss_func def copy_pretrained_weights(self): base_model = self.get_model(self.config, weights="imagenet") - lshift = self.is_training # Shift one layer to accommodate the AugmentationLayer + lshift = self.feature_model.layers[1].name == 'augmentation_layer' # Shift one layer to accommodate the AugmentationLayer # => Transfer all weights except conv1.1 total_layers = len(base_model.layers) @@ -114,7 +115,7 @@ def copy_pretrained_weights(self): for i in range(new_weights.shape[2]): j = i % available_channels - new_weights[:,:,i,:] = weights[0][:,:,j,:] + new_weights[:, :, i, :] = weights[0][:, :, j, :] weights_array = [new_weights] if len(weights) > 1: diff --git a/plugins/models/resnet.py b/plugins/models/resnet.py index 8afc9c19..4a75c75f 100644 --- a/plugins/models/resnet.py +++ b/plugins/models/resnet.py @@ -34,7 +34,7 @@ def get_model(self, config, input_image=None, weights=None, pooling=None, includ num_layers = config["train"]["model"]["params"]["conv_blocks"] error_msg = str(num_layers) + " conv_blocks not in " + SM assert num_layers in supported_models.keys(), error_msg - if self.is_training and weights is None: + if self.is_training and weights is None and self.config["train"]['model'].get('augmentations') is True: input_image = AugmentationLayer()(input_image) if pooling is not None: model = supported_models[num_layers](input_tensor=input_image, pooling=pooling, include_top=include_top, @@ -92,10 +92,14 @@ def define_model(self, config, dset): if hasattr(layer, "kernel_regularizer"): setattr(layer, "kernel_regularizer", regularizer) - model = tf.compat.v1.keras.models.model_from_json( - model.to_json(), - {'AugmentationLayer': AugmentationLayer} - ) + if self.config["train"]["model"].get("augmentations") is True: + model = tf.compat.v1.keras.models.model_from_json( + model.to_json(), + {'AugmentationLayer': AugmentationLayer} + ) + else: + model = tf.compat.v1.keras.models.model_from_json(model.to_json()) + return model, optimizer, loss_func @@ -103,15 +107,15 @@ def define_model(self, config, dset): ## Support for ImageNet initialization def copy_pretrained_weights(self): base_model = self.get_model(self.config, weights="imagenet") - lshift = int(self.is_training) # Shift one layer to accommodate the AugmentationLayer + lshift = self.feature_model.layers[1].name == 'augmentation_layer' # Shift one layer to accommodate the AugmentationLayer # => Transfer all weights except conv1.1 total_layers = len(base_model.layers) - for i in range(3,total_layers): + for i in range(3, total_layers): if len(base_model.layers[i].weights) > 0: print("Setting pre-trained weights: {:.2f}%".format((i/total_layers)*100), end="\r") self.feature_model.layers[i + lshift].set_weights(base_model.layers[i].get_weights()) - + # => Replicate filters of first layer as needed weights = base_model.layers[2].get_weights() available_channels = weights[0].shape[2] @@ -120,10 +124,10 @@ def copy_pretrained_weights(self): for i in range(new_weights.shape[2]): j = i % available_channels - new_weights[:,:,i,:] = weights[0][:,:,j,:] + new_weights[:, :, i, :] = weights[0][:, :, j, :] weights_array = [new_weights] - if len(weights) > 1: + if len(weights) > 1: weights_array += weights[1:] self.feature_model.layers[2 + lshift].set_weights(weights_array) From 46848fad8400b7d18908a4db19a8aca805b1d88f Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Fri, 1 Oct 2021 15:15:41 +0200 Subject: [PATCH 34/43] Test config update. --- tests/files/config/test.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/files/config/test.json b/tests/files/config/test.json index cf8230d5..8f147729 100644 --- a/tests/files/config/test.json +++ b/tests/files/config/test.json @@ -43,6 +43,7 @@ }, "model": { "name": "cnn", + "augmentations": false, "crop_generator": "crop_generator", "metrics": ["accuracy"], "epochs": 5, From c9299b941a7309e0f7a47f63b489f7c750468b07 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Fri, 1 Oct 2021 20:35:26 +0200 Subject: [PATCH 35/43] Update sampled_crop_generator.py and image_dataset.py --- deepprofiler/dataset/image_dataset.py | 2 +- .../crop_generators/sampled_crop_generator.py | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/deepprofiler/dataset/image_dataset.py b/deepprofiler/dataset/image_dataset.py index 4a7a63e7..5d369d25 100644 --- a/deepprofiler/dataset/image_dataset.py +++ b/deepprofiler/dataset/image_dataset.py @@ -211,7 +211,7 @@ def read_dataset(config, mode = 'train'): print(metadata.data.info()) # Split training data - if mode == 'train': + if mode == 'train' and config["train"]["model"]["crop_generator"] == 'crop_generator': split_field = config["train"]["partition"]["split_field"] trainingFilter = lambda df: df[split_field].isin(config["train"]["partition"]["training_values"]) validationFilter = lambda df: df[split_field].isin(config["train"]["partition"]["validation_values"]) diff --git a/plugins/crop_generators/sampled_crop_generator.py b/plugins/crop_generators/sampled_crop_generator.py index 47350f0d..52f303dc 100644 --- a/plugins/crop_generators/sampled_crop_generator.py +++ b/plugins/crop_generators/sampled_crop_generator.py @@ -30,19 +30,24 @@ def __init__(self, config, dset, mode="Training"): self.mode = mode # Load metadata - self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv")) - self.target = config["train"]["partition"]["targets"][0] + self.all_cells = pd.read_csv(os.path.join(self.directory, "sc-metadata.csv")) + self.target = "Class_Name"#config["train"]["partition"]["targets"][0] # Index targets for one-hot encoded labels - self.split_data = self.all_cells[self.all_cells.Training_Status_TenGenes == self.mode].reset_index(drop=True) - self.classes = list(self.split_data[self.target].unique()) + self.split_data = self.all_cells[self.all_cells[self.config["train"]["partition"]["split_field"]] == + self.mode].reset_index(drop=True) + + + + self.classes = list(self.all_cells[self.target].unique()) self.num_classes = len(self.classes) self.classes.sort() - self.classes = {self.classes[i]:i for i in range(self.num_classes)} + self.classes = {self.classes[i]: i for i in range(self.num_classes)} # Identify targets and samples self.balanced_sample() - self.expected_steps = (self.samples.shape[0] // self.batch_size) + int(self.samples.shape[0] % self.batch_size > 0) + self.expected_steps = (self.samples.shape[0] // self.batch_size) + \ + int(self.samples.shape[0] % self.batch_size > 0) # Report number of classes globally self.config["num_classes"] = self.num_classes From 6f05e2e826c0d95961a2097dff8b09c8707b68f9 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Fri, 1 Oct 2021 20:54:41 +0200 Subject: [PATCH 36/43] Update conditions in model.py and image_dataset.py --- deepprofiler/dataset/image_dataset.py | 4 ++-- deepprofiler/learning/model.py | 18 ++++++++---------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/deepprofiler/dataset/image_dataset.py b/deepprofiler/dataset/image_dataset.py index 5d369d25..7119c0cd 100644 --- a/deepprofiler/dataset/image_dataset.py +++ b/deepprofiler/dataset/image_dataset.py @@ -239,8 +239,8 @@ def read_dataset(config, mode = 'train'): dset.outlines = outlines # For training with sampled_crop_generator, no need to read locations again necessary. - #if mode == 'train': - # dset.prepare_training_locations() + if mode == 'train' and config["train"]["model"]["crop_generator"] == 'crop_generator': + dset.prepare_training_locations() return dset diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py index c2b7714a..e1519ccc 100644 --- a/deepprofiler/learning/model.py +++ b/deepprofiler/learning/model.py @@ -28,11 +28,12 @@ def __init__(self, config, dset, crop_generator, val_crop_generator, is_training self.optimizer = None self.config = config self.dset = dset - self.train_crop_generator = crop_generator(config, dset) - if self.config['train']['model']['crop_generator'] == 'online_labels_cropgen': - self.val_crop_generator = crop_generator(config, dset, mode="Validation") - else: - self.val_crop_generator = val_crop_generator(config, dset) + if is_training: + self.train_crop_generator = crop_generator(config, dset) + if self.config['train']['model']['crop_generator'] in ['online_labels_cropgen', 'sampled_crop_generator']: + self.val_crop_generator = crop_generator(config, dset, mode="Validation") + else: + self.val_crop_generator = val_crop_generator(config, dset) self.random_seed = None self.is_training = is_training @@ -66,11 +67,8 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1): # Get training parameters epochs, schedule_epochs, schedule_lr, freq = setup_params(self, experiment) - if self.config['train']['model']['crop_generator'] == 'online_labels_cropgen': + if self.config['train']['model']['crop_generator'] in ['online_labels_cropgen', 'sampled_crop_generator']: steps = self.train_crop_generator.expected_steps - elif self.config['train']['model']['crop_generator'] == 'sampled_crop_generator': - steps = int((len(os.listdir(self.config['paths']['single_cell_sample'])) - 1) - / self.config["train"]["model"]["params"]["batch_size"]) else: steps = self.dset.steps_per_epoch @@ -82,7 +80,7 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1): # Train model self.feature_model.fit_generator( - generator=self.train_crop_generator.generate(main_session), + generator=self.train_crop_generator.generator(main_session), steps_per_epoch=steps, epochs=epochs, callbacks=callbacks, From f1c235d2f67306851cacc6b140f513aa9e054a32 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Fri, 1 Oct 2021 21:01:46 +0200 Subject: [PATCH 37/43] Condition in model.py --- deepprofiler/learning/model.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py index e1519ccc..e6700f21 100644 --- a/deepprofiler/learning/model.py +++ b/deepprofiler/learning/model.py @@ -14,6 +14,7 @@ tf.compat.v1.disable_v2_behavior() tf.config.run_functions_eagerly(False) + ################################################## # This class should be used as an abstract base # class for plugin models. @@ -146,7 +147,7 @@ def start_main_session(): def load_validation_data(dpmodel, session): dpmodel.val_crop_generator.start(session) - if dpmodel.config['train']['model']['crop_generator'] == 'online_labels_cropgen': + if dpmodel.config['train']['model']['crop_generator'] in ['online_labels_cropgen', 'sampled_crop_generator']: x_validation = [] y_validation = [] @@ -186,7 +187,7 @@ def setup_callbacks(dpmodel, lr_schedule_epochs, lr_schedule_lr, dset, experimen save_best_only=save_best, period=period ) - + # CSV Log csv_output = dpmodel.config["paths"]["logs"] + "/log.csv" callback_csv = tf.compat.v1.keras.callbacks.CSVLogger(filename=csv_output) @@ -201,9 +202,9 @@ def lr_schedule(epoch, lr): # Collect all callbacks if lr_schedule_epochs: callback_lr_schedule = tf.compat.v1.keras.callbacks.LearningRateScheduler(lr_schedule, verbose=1) - callbacks = [callback_model_checkpoint, callback_csv, callback_lr_schedule] + callbacks = [callback_model_checkpoint, callback_csv, callback_lr_schedule] else: - callbacks = [callback_model_checkpoint, callback_csv] + callbacks = [callback_model_checkpoint, callback_csv] # Online labels callback if dpmodel.config["train"]["model"]["crop_generator"] == "online_labels_cropgen": @@ -211,7 +212,7 @@ def lr_schedule(epoch, lr): on_epoch_end=lambda epoch, logs: dpmodel.train_crop_generator.update_online_labels(dpmodel.feature_model, epoch) ) callbacks.append(update_labels) - + return callbacks From 13059ebd146fbd0e2f657585ed73ce8067e3a592 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Fri, 1 Oct 2021 21:27:35 +0200 Subject: [PATCH 38/43] Number of classes in EfficientNet --- plugins/models/efficientnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py index 770b49bc..0e0e21c1 100644 --- a/plugins/models/efficientnet.py +++ b/plugins/models/efficientnet.py @@ -73,7 +73,7 @@ def define_model(self, config, dset): # 2. Create an output embedding for each target class_outputs = [] - y = tf.compat.v1.keras.layers.Dense(len(dset.targets[0].values), activation="softmax", name="ClassProb")(features) + y = tf.compat.v1.keras.layers.Dense(self.config["num_classes"], activation="softmax", name="ClassProb")(features) class_outputs.append(y) # 4. Create and compile model From 7860974b0f6203b758fcab6de1773203fad8f45f Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Mon, 4 Oct 2021 14:05:11 +0200 Subject: [PATCH 39/43] Number of classes in ResNet. https://github.com/cytomining/DeepProfiler/issues/285 --- plugins/models/resnet.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/plugins/models/resnet.py b/plugins/models/resnet.py index 4a75c75f..565da634 100644 --- a/plugins/models/resnet.py +++ b/plugins/models/resnet.py @@ -76,12 +76,9 @@ def define_model(self, config, dset): # 2. Create an output embedding for each target class_outputs = [] - i = 0 - for t in dset.targets: - y = tf.compat.v1.keras.layers.Dense(t.shape[1], activation="softmax", name=t.field_name)(features) - class_outputs.append(y) - i += 1 - + y = tf.compat.v1.keras.layers.Dense(self.config["num_classes"], activation="softmax", name="ClassProb")( + features) + class_outputs.append(y) # 4. Create and compile model model = tf.compat.v1.keras.models.Model(inputs=input_image, outputs=class_outputs) From c578dc16a8853da5df92bd86048c0f2979661942 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Mon, 4 Oct 2021 14:05:44 +0200 Subject: [PATCH 40/43] Number of classes for profiling. https://github.com/cytomining/DeepProfiler/issues/286 --- deepprofiler/learning/profiling.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepprofiler/learning/profiling.py b/deepprofiler/learning/profiling.py index 9918e4fd..b2741cd8 100644 --- a/deepprofiler/learning/profiling.py +++ b/deepprofiler/learning/profiling.py @@ -23,9 +23,11 @@ def __init__(self, config, dset): "plugins.crop_generators.{}".format(config["train"]["model"]["crop_generator"]) ).SingleImageGeneratorClass + self.config["num_classes"] = self.dset.targets[0].shape[1] + self.dpmodel = importlib.import_module( "plugins.models.{}".format(config["train"]["model"]["name"]) - ).ModelClass(config, dset, self.crop_generator, self.profile_crop_generator, is_training=False) + ).ModelClass(self.config, dset, self.crop_generator, self.profile_crop_generator, is_training=False) self.profile_crop_generator = self.profile_crop_generator(config, dset) From 2f781174a710c22ff22bb41d259db2fcec3128a0 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Mon, 4 Oct 2021 17:55:58 +0200 Subject: [PATCH 41/43] Parameterize label smoothing. --- plugins/models/efficientnet.py | 3 ++- plugins/models/resnet.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py index 0e0e21c1..4b7b4626 100644 --- a/plugins/models/efficientnet.py +++ b/plugins/models/efficientnet.py @@ -54,7 +54,8 @@ def define_model(self, config, dset): optimizer = tf.compat.v1.keras.optimizers.SGD(lr=config["train"]["model"]["params"]["learning_rate"], momentum=0.9, nesterov=True) - loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing=0.2) # TODO:parameterize? + loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing= + self.config["train"]["model"]["params"]["label_smoothing"]) if self.is_training is False and "use_pretrained_input_size" in config["profile"].keys(): input_tensor = tf.compat.v1.keras.layers.Input( diff --git a/plugins/models/resnet.py b/plugins/models/resnet.py index 565da634..92dbe40c 100644 --- a/plugins/models/resnet.py +++ b/plugins/models/resnet.py @@ -46,7 +46,9 @@ def get_model(self, config, input_image=None, weights=None, pooling=None, includ ## Model definition def define_model(self, config, dset): # 1. Create ResNet architecture to extract features - loss_func = "categorical_crossentropy" + loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing= + self.config["train"]["model"]["params"][ + "label_smoothing"]) optimizer = tf.compat.v1.keras.optimizers.SGD(learning_rate=config["train"]["model"]["params"]["learning_rate"], momentum=0.9, nesterov=True) if "use_pretrained_input_size" in config["profile"].keys() and self.is_training is False: From 785bbff0d875d7029f40f9b15af372b99a08ffc6 Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Mon, 4 Oct 2021 17:58:21 +0200 Subject: [PATCH 42/43] Label smoothing parameter in the test.json config. --- tests/files/config/test.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/files/config/test.json b/tests/files/config/test.json index 8f147729..8449a97f 100644 --- a/tests/files/config/test.json +++ b/tests/files/config/test.json @@ -53,6 +53,7 @@ "learning_rate": 0.0001, "batch_size": 2, "conv_blocks": 1, + "label_smoothing": 0.0, "feature_dim": 100, "latent_dim": 100, "epsilon_std": 1.0 From 9b73e44e31c65561084a43fd57cd592f15abe2ed Mon Sep 17 00:00:00 2001 From: arkkienkeli Date: Tue, 5 Oct 2021 20:39:08 +0200 Subject: [PATCH 43/43] EfficientNet package in setup.py --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 548a240c..f1dee1f9 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ "beautifulsoup4>=4.6", "click>=6.7", "comet_ml>=1.0", + "efficientnet==1.1.1", "gpyopt>=1.2", "lxml>=4.2", "numpy>=1.13", @@ -30,7 +31,7 @@ "comet-ml>=3.1.6", "tensorflow==2.5.*", "tensorflow_addons", - "tqdm>=4.62" + "tqdm>=4.62", ], extras_require={ "test": [