From 28486455eb5bea4abda7e8711d950330ba83d7ca Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Wed, 10 Feb 2021 12:28:01 -0500
Subject: [PATCH 01/43] Enable export all single cells

---
 deepprofiler/__main__.py         |  8 ++++++--
 deepprofiler/dataset/sampling.py | 27 +++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py
index f1959559..db4bda15 100644
--- a/deepprofiler/__main__.py
+++ b/deepprofiler/__main__.py
@@ -142,12 +142,16 @@ def prepare(context):
 
 # Second tool: Sample single cells for training
 @cli.command()
+@click.option("--mode", default="sample")
 @click.pass_context
-def sample_sc(context):
+def sample_sc(context, mode):
     if context.parent.obj["config"]["prepare"]["compression"]["implement"]:
         context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
     dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"])
-    deepprofiler.dataset.sampling.sample_dataset(context.obj["config"], dset)
+    if mode == "sample":
+        deepprofiler.dataset.sampling.sample_dataset(context.obj["config"], dset)
+    elif mode == "export_all":
+        deepprofiler.dataset.sampling.export_dataset(context.obj["config"], dset)
     print("Single-cell sampling complete.")
 
 
diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py
index 010100c5..83a2b1ec 100644
--- a/deepprofiler/dataset/sampling.py
+++ b/deepprofiler/dataset/sampling.py
@@ -15,6 +15,7 @@
 class SingleCellSampler(deepprofiler.imaging.cropping.CropGenerator):
 
     def start(self, session):
+        self.all_metadata = []
         self.session = session
         # Define input data batches
         with tf.variable_scope("train_inputs"):
@@ -48,6 +49,18 @@ def process_batch(self, batch):
         output = self.session.run(self.input_variables["labeled_crops"], feed_dict)
         return output[0], metadata.reset_index(drop=True)
 
+    def export_single_cells(self, key, image_array, meta):
+        outdir = self.config["paths"]["single_cell_sample"]
+        key = self.dset.keyGen(meta)
+        batch = {"keys": [key], "images": [image_array], "targets": [], "locations": []}
+        batch["locations"].append(deepprofiler.imaging.boxes.get_locations(key, self.config))
+        batch["targets"].append([t.get_values(meta) for t in self.dset.targets])
+        crops, metadata = self.process_batch(batch)
+        for j in range(crops.shape[0]):
+            image = deepprofiler.imaging.cropping.unfold_channels(crops[j,:,:,:])
+            skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image)
+        self.all_metadata.append(metadata)
+        print("{}: {} single cells".format(key, crops.shape[0]))
 
 def start_session():
     configuration = tf.ConfigProto()
@@ -112,3 +125,17 @@ def sample_dataset(config, dset):
     all_metadata = pd.concat(all_metadata).reset_index(drop=True)
     all_metadata.to_csv(os.path.join(outdir, "sc-metadata.csv"), index=False)
 
+def export_dataset(config, dset):
+    outdir = config["paths"]["single_cell_sample"]
+    if not is_directory_empty(outdir):
+        return
+
+    session = start_session()
+    cropper = SingleCellSampler(config, dset)
+    cropper.start(session)
+    dset.scan(cropper.export_single_cells, frame="all")
+    df = pd.concat(cropper.all_metadata).reset_index(drop=True)
+    df.to_csv(outdir, os.path.join("sc-metadata.csv"), index=False)
+    print("Exporting: done")    
+
+

From ee7bf7f04a04096e477653f5874515716b030839 Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Thu, 11 Feb 2021 10:55:14 -0500
Subject: [PATCH 02/43] Fix csv output dir

---
 deepprofiler/dataset/sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py
index 83a2b1ec..d710ae6d 100644
--- a/deepprofiler/dataset/sampling.py
+++ b/deepprofiler/dataset/sampling.py
@@ -135,7 +135,7 @@ def export_dataset(config, dset):
     cropper.start(session)
     dset.scan(cropper.export_single_cells, frame="all")
     df = pd.concat(cropper.all_metadata).reset_index(drop=True)
-    df.to_csv(outdir, os.path.join("sc-metadata.csv"), index=False)
+    df.to_csv(os.path.join(outdir, "sc-metadata.csv"), index=False)
     print("Exporting: done")    
 
 

From c01485a89037b5485e839b523530bb431fe04aa4 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Mon, 21 Jun 2021 19:05:28 +0200
Subject: [PATCH 03/43] Fixed the issue that targets could not be obtained for
 crop generator.

---
 deepprofiler/dataset/image_dataset.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/deepprofiler/dataset/image_dataset.py b/deepprofiler/dataset/image_dataset.py
index 5f99a85c..f641dc8d 100644
--- a/deepprofiler/dataset/image_dataset.py
+++ b/deepprofiler/dataset/image_dataset.py
@@ -230,16 +230,17 @@ def read_dataset(config, mode = 'train'):
     )
 
     # Add training targets
-    if mode == 'train':
-        for t in config["train"]["partition"]["targets"]:
-            new_target = deepprofiler.dataset.target.MetadataColumnTarget(t, metadata.data[t].unique())
-            dset.add_target(new_target)
+
+    for t in config["train"]["partition"]["targets"]:
+        new_target = deepprofiler.dataset.target.MetadataColumnTarget(t, metadata.data[t].unique())
+        dset.add_target(new_target)
 
     # Activate outlines for masking if needed
     if config["dataset"]["locations"]["mask_objects"]:
         dset.outlines = outlines
 
-    dset.prepare_training_locations()
+    if mode == 'train':
+        dset.prepare_training_locations()
 
     return dset
 

From f1361c33ab4ff5b0a47671660f4aee09f11284f1 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Tue, 27 Jul 2021 10:59:02 -0400
Subject: [PATCH 04/43] Update folder structure

---
 deepprofiler/dataset/sampling.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py
index 47227c80..9122da69 100644
--- a/deepprofiler/dataset/sampling.py
+++ b/deepprofiler/dataset/sampling.py
@@ -25,19 +25,20 @@ def process_batch(self, batch):
             batch["locations"][i]["Target"] = batch["targets"][i][0]
             batch["locations"][i]["Class_Name"] = self.dset.targets[0].values[batch["targets"][i][0]]
         metadata = pd.concat(batch["locations"])
-        cols = ["Key","Target","Nuclei_Location_Center_X","Nuclei_Location_Center_Y"]
-        seps = ["+","@","x",".png"]
-        metadata["Image_Name"] = ""
+        cols = ["Key", "Target", "Nuclei_Location_Center_X", "Nuclei_Location_Center_Y"]
+        seps = ["/", "@", "x", ".png"]
+        metadata["Image_Name"] = ''
         for c in range(len(cols)):
-            metadata["Image_Name"] += metadata[cols[c]].astype(str).str.replace("/","-") + seps[c]
-        
+            metadata["Image_Name"] += metadata[cols[c]].astype(str) + seps[c]
+        print(metadata["Image_Name"])
+
         boxes, box_ind, targets, masks = deepprofiler.imaging.boxes.prepare_boxes(batch, self.config)
 
         feed_dict = {
-            self.input_variables["image_ph"]:batch["images"],
-            self.input_variables["boxes_ph"]:boxes,
-            self.input_variables["box_ind_ph"]:box_ind,
-            self.input_variables["mask_ind_ph"]:masks
+            self.input_variables["image_ph"]: batch["images"],
+            self.input_variables["boxes_ph"]: boxes,
+            self.input_variables["box_ind_ph"]: box_ind,
+            self.input_variables["mask_ind_ph"]: masks
         }
         for i in range(len(targets)):
             tname = "target_" + str(i)
@@ -99,7 +100,9 @@ def sample_dataset(config, dset):
         if len(batch["keys"]) > 0:
             crops, metadata = cropper.process_batch(batch)
             for j in range(crops.shape[0]):
-                image = deepprofiler.imaging.cropping.unfold_channels(crops[j,:,:,:])
+                image = deepprofiler.imaging.cropping.unfold_channels(crops[j, :, :, :])
+                plate, well_site, rest = metadata.loc[j, "Image_Name"].split('/')
+                os.makedirs(os.path.join(outdir, plate, well_site), exist_ok=True)
                 skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image)
             all_metadata.append(metadata)
 

From 78e0018069a4b45163af3647f47a4d852004a9d1 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Tue, 27 Jul 2021 13:21:47 -0400
Subject: [PATCH 05/43] Add missing imports

---
 setup.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 0a31a5d8..075bfb74 100644
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,10 @@
         "scikit-learn>=0.19.0",
         "scipy>=1.1",
         "comet-ml>=3.1.6",
-        "tensorflow_addons"
+        "efficientnet>=1.0.0",
+        "Keras==2.2.5",
+        "tensorflow_addons",
+
     ],
     extras_require={
         "test": [
@@ -37,4 +40,4 @@
             "codecov>=2.0"
         ]
     }
-)
+)
\ No newline at end of file

From f65fe7d4ad7d5e1c823fa7848519a3f57dbbfcd2 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Tue, 27 Jul 2021 13:30:04 -0400
Subject: [PATCH 06/43] Add missing imports

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 075bfb74..050bcc3f 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,6 @@
         "efficientnet>=1.0.0",
         "Keras==2.2.5",
         "tensorflow_addons",
-
     ],
     extras_require={
         "test": [

From 29db8a0922ad799e5a00e96c1048821d997dad10 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Fri, 6 Aug 2021 16:22:49 -0400
Subject: [PATCH 07/43] Change sampling to folder structure

---
 deepprofiler/dataset/sampling.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py
index 9122da69..a2a8d068 100644
--- a/deepprofiler/dataset/sampling.py
+++ b/deepprofiler/dataset/sampling.py
@@ -3,6 +3,7 @@
 import threading
 import tqdm
 import os
+import shutil
 
 import tensorflow as tf
 
@@ -21,16 +22,15 @@ def start(self, session):
 
     def process_batch(self, batch):
         for i in range(len(batch["keys"])):
-            batch["locations"][i]["Key"] = batch["keys"][i]
+            batch["locations"][i]["Key"] = batch["keys"][i].replace('-', '/')
             batch["locations"][i]["Target"] = batch["targets"][i][0]
             batch["locations"][i]["Class_Name"] = self.dset.targets[0].values[batch["targets"][i][0]]
         metadata = pd.concat(batch["locations"])
         cols = ["Key", "Target", "Nuclei_Location_Center_X", "Nuclei_Location_Center_Y"]
         seps = ["/", "@", "x", ".png"]
-        metadata["Image_Name"] = ''
+        metadata["Image_Name"] = ""
         for c in range(len(cols)):
-            metadata["Image_Name"] += metadata[cols[c]].astype(str) + seps[c]
-        print(metadata["Image_Name"])
+            metadata["Image_Name"] += metadata[cols[c]].astype(str).str + seps[c]
 
         boxes, box_ind, targets, masks = deepprofiler.imaging.boxes.prepare_boxes(batch, self.config)
 
@@ -70,8 +70,7 @@ def is_directory_empty(outdir):
             return False
         elif erase == "y":
             print("Removing previous sampled files")
-            for f in tqdm.tqdm(files):
-                os.remove(os.path.join(outdir, f))
+            shutil.rmtree(outdir)
     return True
 
 
@@ -100,9 +99,9 @@ def sample_dataset(config, dset):
         if len(batch["keys"]) > 0:
             crops, metadata = cropper.process_batch(batch)
             for j in range(crops.shape[0]):
+                plate, well, site, name = metadata.loc[j, "Image_Name"].split('/')
+                os.makedirs(os.path.join(outdir, plate, well, site), exist_ok=True)
                 image = deepprofiler.imaging.cropping.unfold_channels(crops[j, :, :, :])
-                plate, well_site, rest = metadata.loc[j, "Image_Name"].split('/')
-                os.makedirs(os.path.join(outdir, plate, well_site), exist_ok=True)
                 skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image)
             all_metadata.append(metadata)
 

From 40aac088e5e2b81c7b835d15c37057ef07d696ad Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Fri, 6 Aug 2021 16:26:50 -0400
Subject: [PATCH 08/43] setup shouldnt change

---
 setup.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/setup.py b/setup.py
index 050bcc3f..08d9c96c 100644
--- a/setup.py
+++ b/setup.py
@@ -28,8 +28,6 @@
         "scikit-learn>=0.19.0",
         "scipy>=1.1",
         "comet-ml>=3.1.6",
-        "efficientnet>=1.0.0",
-        "Keras==2.2.5",
         "tensorflow_addons",
     ],
     extras_require={

From 6ac33648f065ecd1c3cd0fab2fdba4a238eeee10 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Fri, 6 Aug 2021 16:28:00 -0400
Subject: [PATCH 09/43] setup shouldnt change

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 08d9c96c..6cebe025 100644
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,7 @@
         "scikit-learn>=0.19.0",
         "scipy>=1.1",
         "comet-ml>=3.1.6",
-        "tensorflow_addons",
+        "tensorflow_addons"
     ],
     extras_require={
         "test": [

From 7504699c0fec77911bac54ef373e81b178ce20eb Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Fri, 6 Aug 2021 16:28:50 -0400
Subject: [PATCH 10/43] setup shouldnt change

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 6cebe025..0a31a5d8 100644
--- a/setup.py
+++ b/setup.py
@@ -37,4 +37,4 @@
             "codecov>=2.0"
         ]
     }
-)
\ No newline at end of file
+)

From b389654b60408d5dd6f05577e918847564ce2e85 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Mon, 9 Aug 2021 16:39:52 -0400
Subject: [PATCH 11/43] Adapted profilng and tests

---
 deepprofiler/learning/profiling.py            | 6 +++---
 tests/deepprofiler/learning/test_profiling.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/deepprofiler/learning/profiling.py b/deepprofiler/learning/profiling.py
index a76ae2de..80a20dcc 100644
--- a/deepprofiler/learning/profiling.py
+++ b/deepprofiler/learning/profiling.py
@@ -52,7 +52,7 @@ def configure(self):
         print("Extracting output from layer:", self.config["profile"]["feature_layer"])
 
     def check(self, meta):
-        output_file = self.config["paths"]["features"] + "/{}/{}_{}.npz"
+        output_file = self.config["paths"]["features"] + "/{}/{}/{}.npz"
         output_file = output_file.format( meta["Metadata_Plate"], meta["Metadata_Well"], meta["Metadata_Site"])
 
         # Check if features were computed before
@@ -65,9 +65,9 @@ def check(self, meta):
     # Function to process a single image
     def extract_features(self, key, image_array, meta):  # key is a placeholder
         start = tic()
-        output_file = self.config["paths"]["features"] + "/{}/{}_{}.npz"
+        output_file = self.config["paths"]["features"] + "/{}/{}/{}.npz"
         output_file = output_file.format( meta["Metadata_Plate"], meta["Metadata_Well"], meta["Metadata_Site"])
-        os.makedirs(self.config["paths"]["features"] + "/{}".format(meta["Metadata_Plate"]), exist_ok=True)
+        os.makedirs(self.config["paths"]["features"] + "/{}/{}".format(meta["Metadata_Plate"], meta["Metadata_Well"]), exist_ok=True)
 
         batch_size = self.config["profile"]["batch_size"]
         image_key, image_names, outlines = self.dset.get_image_paths(meta)
diff --git a/tests/deepprofiler/learning/test_profiling.py b/tests/deepprofiler/learning/test_profiling.py
index f92ced5c..ad6fefd4 100644
--- a/tests/deepprofiler/learning/test_profiling.py
+++ b/tests/deepprofiler/learning/test_profiling.py
@@ -64,7 +64,7 @@ def test_extract_features(profile, metadata, locations, checkpoint):
         image = np.random.randint(0, 256, (128, 128, 3), dtype=np.uint8)
         profile.configure()
         profile.extract_features(None, image, meta)
-        output_file = profile.config["paths"]["features"] + "/{}/{}_{}.npz"\
+        output_file = profile.config["paths"]["features"] + "/{}/{}/{}.npz"\
             .format(meta["Metadata_Plate"], meta["Metadata_Well"], meta["Metadata_Site"])
         assert os.path.isfile(output_file)
 
@@ -73,6 +73,6 @@ def test_profile(config, dataset, data, locations, checkpoint):
     with tf.compat.v1.Session().as_default():
         deepprofiler.learning.profiling.profile(config, dataset)
         for index, row in dataset.meta.data.iterrows():
-            output_file = config["paths"]["features"] + "/{}/{}_{}.npz" \
+            output_file = config["paths"]["features"] + "/{}/{}/{}.npz" \
                 .format(row["Metadata_Plate"], row["Metadata_Well"], row["Metadata_Site"])
             assert os.path.isfile(output_file)

From 1ac1d7d03e8b0aa360c1d88a9bebd330401bdaf2 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Wed, 11 Aug 2021 16:22:48 -0400
Subject: [PATCH 12/43] fix leftover string

---
 deepprofiler/dataset/sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py
index a2a8d068..849c8207 100644
--- a/deepprofiler/dataset/sampling.py
+++ b/deepprofiler/dataset/sampling.py
@@ -30,7 +30,7 @@ def process_batch(self, batch):
         seps = ["/", "@", "x", ".png"]
         metadata["Image_Name"] = ""
         for c in range(len(cols)):
-            metadata["Image_Name"] += metadata[cols[c]].astype(str).str + seps[c]
+            metadata["Image_Name"] += metadata[cols[c]].astype(str) + seps[c]
 
         boxes, box_ind, targets, masks = deepprofiler.imaging.boxes.prepare_boxes(batch, self.config)
 

From e25758951656c75cd0e044cb0054e0aac909273a Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Mon, 23 Aug 2021 19:00:15 -0400
Subject: [PATCH 13/43] Expanding data augmentations

---
 deepprofiler/imaging/augmentations.py | 76 ++++++++++++++++++++++++---
 1 file changed, 70 insertions(+), 6 deletions(-)

diff --git a/deepprofiler/imaging/augmentations.py b/deepprofiler/imaging/augmentations.py
index 8678ab5f..6478e6ce 100644
--- a/deepprofiler/imaging/augmentations.py
+++ b/deepprofiler/imaging/augmentations.py
@@ -1,6 +1,6 @@
 import numpy as np
 import tensorflow as tf
-import tensorflow_addons
+import tensorflow_addons as tfa
 
 tf.compat.v1.disable_v2_behavior()
 
@@ -9,8 +9,69 @@
 # CROPPING AND TRANSFORMATION OPERATIONS
 #################################################
 
+def random_crop(image):
+    w,h,c = image.shape
+    
+    size = tf.random.uniform([1], minval=int(w.value*0.6), maxval=w, dtype=tf.int32)
+    crop = tf.image.random_crop(image, [size[0],size[0],c])
+    
+    result = tf.image.resize(
+        tf.expand_dims(crop, 0), [w,h], method="bicubic"
+    )
+    
+    return result[0,...]
+
+
+def random_illumination(image):
+    # Make channels independent images
+    numchn = image.shape[-1].value
+    source = tf.transpose(image, [2, 1, 0])
+    source = tf.expand_dims(source, -1)
+    source = tf.image.grayscale_to_rgb(source)
+    
+    # Apply illumination augmentations
+    bright = tf.random.uniform([numchn], minval=-0.2, maxval=0.2, dtype=tf.float32)
+    channels = [tf.image.adjust_brightness(source[s,...], bright[s]) for s in range(numchn)]
+    contrast = tf.random.uniform([numchn], minval=0.5, maxval=1.5, dtype=tf.float32)
+    channels = [tf.image.adjust_contrast(channels[s], contrast[s]) for s in range(numchn)]
+    result = tf.concat([tf.expand_dims(t, 0) for t in channels], axis=0)
+    
+    # Recover multi-channel image
+    result = tf.image.rgb_to_grayscale(result)
+    result = tf.transpose(result[:,:,:,0], [2, 1, 0])
+    return result
+
+
+def random_flips(image):
+    # Horizontal flips
+    augmented = tf.image.random_flip_left_right(image)
+
+    # 90 degree rotations
+    angle = tf.random.uniform([1], minval=0, maxval=4, dtype=tf.int32)
+    augmented = tf.image.rot90(augmented, angle[0])
+    
+    return augmented
+
+def random_rotate(image):
+    w, h, c = image.shape
+    image = tfa.image.rotate(image, np.pi / tf.random.uniform(shape=[], minval=1, maxval=10, dtype=tf.float32))
+    image = tf.image.central_crop(image, 0.7)
+    return tf.image.resize(image, (w, h))
+
+
+def augment(image):
+    if tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32), tf.cast(0.5, tf.float32)):
+        augm = random_crop(image)
+    else:
+        augm = random_rotate(image)
+
+    augm = random_illumination(augm)
+    augm = random_flips(augm)
 
-def augment(crop):
+    return augm
+
+
+def old_augment(crop):
     with tf.compat.v1.variable_scope("augmentation"):
         # Horizontal flips
         augmented = tf.image.random_flip_left_right(crop)
@@ -39,21 +100,24 @@ def augment(crop):
 
 
 def augment_multiple(crops, parallel=None):
-    with tf.compat.v1.variable_scope("augmentation"):
-        return tf.map_fn(augment, crops, parallel_iterations=parallel, dtype=tf.float32)
+    print("+")
+    return tf.map_fn(augment, crops, parallel_iterations=parallel, dtype=tf.float32)
 
 
 ## A layer for GPU accelerated augmentations
 
+#AugmentationLayer = tf.keras.layers.Lambda(augment_multiple)
+
 class AugmentationLayer(tf.compat.v1.keras.layers.Layer):
     def __init__(self, **kwargs):
+        self.is_training = True
         super(AugmentationLayer, self).__init__(**kwargs)
 
     def build(self, input_shape):
         return
 
-    def call(self, input_tensor, training=False):
-        if training:
+    def call(self, input_tensor):
+        if self.is_training:
             return augment_multiple(input_tensor)
         else:
             return input_tensor

From 29200796a6d457e2e8e6e096a3463ca7ac38e4db Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Tue, 24 Aug 2021 17:51:45 -0400
Subject: [PATCH 14/43] Enhanced augmentations

---
 deepprofiler/imaging/augmentations.py | 33 +++------------------------
 deepprofiler/imaging/cropping.py      | 10 ++++----
 2 files changed, 8 insertions(+), 35 deletions(-)

diff --git a/deepprofiler/imaging/augmentations.py b/deepprofiler/imaging/augmentations.py
index 6478e6ce..6ab7f00d 100644
--- a/deepprofiler/imaging/augmentations.py
+++ b/deepprofiler/imaging/augmentations.py
@@ -1,6 +1,7 @@
 import numpy as np
 import tensorflow as tf
 import tensorflow_addons as tfa
+import sys
 
 tf.compat.v1.disable_v2_behavior()
 
@@ -39,6 +40,7 @@ def random_illumination(image):
     # Recover multi-channel image
     result = tf.image.rgb_to_grayscale(result)
     result = tf.transpose(result[:,:,:,0], [2, 1, 0])
+    result = result / tf.math.reduce_max(result)
     return result
 
 
@@ -65,42 +67,13 @@ def augment(image):
     else:
         augm = random_rotate(image)
 
-    augm = random_illumination(augm)
     augm = random_flips(augm)
+    augm = random_illumination(augm)
 
     return augm
 
 
-def old_augment(crop):
-    with tf.compat.v1.variable_scope("augmentation"):
-        # Horizontal flips
-        augmented = tf.image.random_flip_left_right(crop)
-
-        # 90 degree rotations
-        angle = tf.compat.v1.random_uniform([1], minval=0, maxval=4, dtype=tf.int32)
-        augmented = tf.image.rot90(augmented, angle[0])
-
-        # 5 degree inclinations
-        angle = tf.compat.v1.random_normal([1], mean=0.0, stddev=0.03 * np.pi, dtype=tf.float32)
-        augmented = tensorflow_addons.image.rotate(augmented, angle[0], interpolation="BILINEAR")
-
-        # Translations (3% movement in x and y)
-        offsets = tf.compat.v1.random_normal([2],
-                                             mean=0,
-                                             stddev=int(crop.shape[0].value * 0.03)
-                                             )
-        augmented = tensorflow_addons.image.translate(augmented, translations=offsets)
-
-        # Illumination changes (10% changes in intensity)
-        illum_s = tf.compat.v1.random_normal([1], mean=1.0, stddev=0.1, dtype=tf.float32)
-        illum_t = tf.compat.v1.random_normal([1], mean=0.0, stddev=0.1, dtype=tf.float32)
-        augmented = augmented * illum_s + illum_t
-
-    return augmented
-
-
 def augment_multiple(crops, parallel=None):
-    print("+")
     return tf.map_fn(augment, crops, parallel_iterations=parallel, dtype=tf.float32)
 
 
diff --git a/deepprofiler/imaging/cropping.py b/deepprofiler/imaging/cropping.py
index c042914a..ddb32d64 100644
--- a/deepprofiler/imaging/cropping.py
+++ b/deepprofiler/imaging/cropping.py
@@ -43,11 +43,11 @@ def fold_channels(crop):
     # Expected input image shape: (h, w * c), with h = w
     # Output image shape: (h, w, c), with h = w
     output = np.reshape(crop, (crop.shape[0], crop.shape[0], -1), order="F").astype(np.float)
-    for i in range(output.shape[-1]):
-        mean = np.mean(output[:, :, i])
-        std = np.std(output[:, :, i])
-        output[:, :, i] = (output[:, :, i] - mean) / std
-    return output
+    #for i in range(output.shape[-1]):
+    #    mean = np.mean(output[:, :, i])
+    #    std = np.std(output[:, :, i])
+    #    output[:, :, i] = (output[:, :, i] - mean) / std
+    return output / 255.
 
 
 # TODO: implement abstract crop generator

From 38fbf43e5fbf0d2f66e4a10800b66afe9bd354c1 Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Tue, 24 Aug 2021 17:52:22 -0400
Subject: [PATCH 15/43] Implemented average class precision metric

---
 deepprofiler/learning/training.py          |  4 ++++
 plugins/metrics/average_class_precision.py | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 plugins/metrics/average_class_precision.py

diff --git a/deepprofiler/learning/training.py b/deepprofiler/learning/training.py
index 290580c2..29f1df44 100644
--- a/deepprofiler/learning/training.py
+++ b/deepprofiler/learning/training.py
@@ -1,3 +1,4 @@
+import tensorflow as tf
 import importlib
 
 #################################################
@@ -8,6 +9,7 @@
 def learn_model(config, dset, epoch=1, seed=None, verbose=1):
     model_module = importlib.import_module("plugins.models.{}".format(config["train"]["model"]["name"]))
     crop_module = importlib.import_module("plugins.crop_generators.{}".format(config["train"]["model"]["crop_generator"]))
+    config["num_classes"] = len(dset.training_images["Target"].unique())
     if "metrics" in config["train"]["model"].keys():
         if type(config["train"]["model"]["metrics"]) not in [list, dict]:
             raise ValueError("Metrics should be a list or dictionary.")
@@ -29,6 +31,8 @@ def learn_model(config, dset, epoch=1, seed=None, verbose=1):
                        for k, v in config["train"]["model"]["metrics"].items()}
     else:
         metrics = ["accuracy"]
+
+
     importlib.invalidate_caches()
 
     crop_generator = crop_module.GeneratorClass
diff --git a/plugins/metrics/average_class_precision.py b/plugins/metrics/average_class_precision.py
new file mode 100644
index 00000000..71fb675d
--- /dev/null
+++ b/plugins/metrics/average_class_precision.py
@@ -0,0 +1,19 @@
+import tensorflow as tf
+from deepprofiler.learning.metric import Metric
+
+tf.compat.v1.disable_v2_behavior()
+
+class MetricClass(Metric):
+
+    def create_metric(self):
+        def metric_func(y_true, y_pred):
+            return self.metric(y_true, y_pred)
+        metric_func.__name__ = "average_class_precision"
+        self.f = metric_func
+        
+    def metric(self, y_true, y_pred):
+        result = 0
+        self.single_class_prec = [tf.keras.metrics.Precision(class_id=cls) for cls in range(self.config["num_classes"])]
+        for cls_prec in self.single_class_prec:
+            result += cls_prec(y_true, y_pred)
+        return result / len(self.single_class_prec) 

From 306fc182d7d159143d4c558e022e2987d77b0bfe Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Sat, 28 Aug 2021 13:11:13 -0400
Subject: [PATCH 16/43] Removing object state (not needed)

---
 plugins/metrics/average_class_precision.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/plugins/metrics/average_class_precision.py b/plugins/metrics/average_class_precision.py
index 71fb675d..b4c40117 100644
--- a/plugins/metrics/average_class_precision.py
+++ b/plugins/metrics/average_class_precision.py
@@ -13,7 +13,7 @@ def metric_func(y_true, y_pred):
         
     def metric(self, y_true, y_pred):
         result = 0
-        self.single_class_prec = [tf.keras.metrics.Precision(class_id=cls) for cls in range(self.config["num_classes"])]
-        for cls_prec in self.single_class_prec:
+        single_class_prec = [tf.keras.metrics.Precision(class_id=cls) for cls in range(self.config["num_classes"])]
+        for cls_prec in single_class_prec:
             result += cls_prec(y_true, y_pred)
-        return result / len(self.single_class_prec) 
+        return result / len(single_class_prec) 

From 43f0223cf9b33a44fe7c82400e5e6c50b1ae6b09 Mon Sep 17 00:00:00 2001
From: Nikita Moshkov <nmoshkov@broadinstitute.org>
Date: Sat, 28 Aug 2021 16:12:08 -0400
Subject: [PATCH 17/43] Fall back to external implementation of EffNet + first
 layer replication

---
 plugins/models/efficientnet.py | 37 +++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py
index 44ad732c..aea98a5f 100644
--- a/plugins/models/efficientnet.py
+++ b/plugins/models/efficientnet.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
+import numpy
+import efficientnet.tfkeras as efn
 
 from deepprofiler.learning.model import DeepProfilerModel
 from deepprofiler.imaging.augmentations import AugmentationLayer
@@ -14,14 +16,14 @@ def __init__(self, config, dset, generator, val_generator, is_training):
     ## Define supported models
     def get_supported_models(self):
         return {
-            0: tf.compat.v1.keras.applications.EfficientNetB0,
-            1: tf.compat.v1.keras.applications.EfficientNetB1,
-            2: tf.compat.v1.keras.applications.EfficientNetB2,
-            3: tf.compat.v1.keras.applications.EfficientNetB3,
-            4: tf.compat.v1.keras.applications.EfficientNetB4,
-            5: tf.compat.v1.keras.applications.EfficientNetB5,
-            6: tf.compat.v1.keras.applications.EfficientNetB6,
-            7: tf.compat.v1.keras.applications.EfficientNetB7,
+            0: efn.EfficientNetB0,
+            1: efn.EfficientNetB1,
+            2: efn.EfficientNetB2,
+            3: efn.EfficientNetB3,
+            4: efn.EfficientNetB4,
+            5: efn.EfficientNetB5,
+            6: efn.EfficientNetB6,
+            7: efn.EfficientNetB7,
         }
 
     def get_model(self, config, input_image=None, weights=None, include_top=False):
@@ -100,9 +102,24 @@ def copy_pretrained_weights(self):
 
         # => Transfer all weights except conv1.1
         total_layers = len(base_model.layers)
-        for i in range(5, total_layers):
+        for i in range(2, total_layers):
             if len(base_model.layers[i].weights) > 0:
                 print("Setting pre-trained weights: {:.2f}%".format((i / total_layers) * 100), end="\r")
                 self.feature_model.layers[i + lshift].set_weights(base_model.layers[i].get_weights())
-
+        
+        # => Replicate filters of first layer as needed
+        weights = base_model.layers[1].get_weights()
+        available_channels = weights[0].shape[2]
+        target_shape = self.feature_model.layers[1 + lshift].weights[0].shape
+        new_weights = numpy.zeros(target_shape)
+
+        for i in range(new_weights.shape[2]):
+            j = i % available_channels
+            new_weights[:,:,i,:] = weights[0][:,:,j,:]
+
+        weights_array = [new_weights]
+        if len(weights) > 1: 
+            weights_array += weights[1:]
+
+        self.feature_model.layers[1 + lshift].set_weights(weights_array)
         print("Network initialized with pretrained ImageNet weights")

From 65154c4a92eff26e7965d9e8003f2784bb5ab3ae Mon Sep 17 00:00:00 2001
From: John Arevalo <johnarevalo@gmail.com>
Date: Tue, 24 Aug 2021 18:59:32 -0400
Subject: [PATCH 18/43] Add tqdm and tensorflow as dependencies

---
 setup.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 0a31a5d8..548a240c 100644
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,9 @@
         "scikit-learn>=0.19.0",
         "scipy>=1.1",
         "comet-ml>=3.1.6",
-        "tensorflow_addons"
+        "tensorflow==2.5.*",
+        "tensorflow_addons",
+        "tqdm>=4.62"
     ],
     extras_require={
         "test": [

From c91b9d821a37d90583d19d209be2e53fe3f08d8d Mon Sep 17 00:00:00 2001
From: John Arevalo <johnarevalo@gmail.com>
Date: Wed, 25 Aug 2021 18:16:39 -0400
Subject: [PATCH 19/43] Rename pretrain models with _name property

---
 deepprofiler/learning/profiling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepprofiler/learning/profiling.py b/deepprofiler/learning/profiling.py
index ef1ddfad..33849334 100644
--- a/deepprofiler/learning/profiling.py
+++ b/deepprofiler/learning/profiling.py
@@ -41,7 +41,7 @@ def configure(self):
                 self.dpmodel.feature_model.load_weights(checkpoint)
             except ValueError:
                 print("Loading weights without classifier (different number of classes)")
-                self.dpmodel.feature_model.layers[-1].name = "classifier"
+                self.dpmodel.feature_model.layers[-1]._name = "classifier"
                 self.dpmodel.feature_model.load_weights(checkpoint, by_name=True)
 
         self.dpmodel.feature_model.summary()

From 66f20370ff121b0b5fb7c3b061aaa38237458df9 Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Tue, 14 Sep 2021 15:45:00 -0400
Subject: [PATCH 20/43] Various training improvements

---
 deepprofiler/imaging/augmentations.py         |  6 +--
 deepprofiler/imaging/cropping.py              |  9 ++--
 deepprofiler/learning/model.py                |  1 +
 .../crop_generators/sampled_crop_generator.py | 53 +++++++++----------
 plugins/models/efficientnet.py                |  4 +-
 5 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/deepprofiler/imaging/augmentations.py b/deepprofiler/imaging/augmentations.py
index 6ab7f00d..55187492 100644
--- a/deepprofiler/imaging/augmentations.py
+++ b/deepprofiler/imaging/augmentations.py
@@ -31,16 +31,16 @@ def random_illumination(image):
     source = tf.image.grayscale_to_rgb(source)
     
     # Apply illumination augmentations
-    bright = tf.random.uniform([numchn], minval=-0.2, maxval=0.2, dtype=tf.float32)
+    bright = tf.random.uniform([numchn], minval=-0.1, maxval=0.1, dtype=tf.float32)
     channels = [tf.image.adjust_brightness(source[s,...], bright[s]) for s in range(numchn)]
-    contrast = tf.random.uniform([numchn], minval=0.5, maxval=1.5, dtype=tf.float32)
+    contrast = tf.random.uniform([numchn], minval=0.8, maxval=1.2, dtype=tf.float32)
     channels = [tf.image.adjust_contrast(channels[s], contrast[s]) for s in range(numchn)]
     result = tf.concat([tf.expand_dims(t, 0) for t in channels], axis=0)
     
     # Recover multi-channel image
     result = tf.image.rgb_to_grayscale(result)
     result = tf.transpose(result[:,:,:,0], [2, 1, 0])
-    result = result / tf.math.reduce_max(result)
+    #result = result / tf.math.reduce_max(result)
     return result
 
 
diff --git a/deepprofiler/imaging/cropping.py b/deepprofiler/imaging/cropping.py
index ddb32d64..f106e694 100644
--- a/deepprofiler/imaging/cropping.py
+++ b/deepprofiler/imaging/cropping.py
@@ -22,9 +22,12 @@ def crop_graph(image_ph, boxes_ph, box_ind_ph, mask_ind_ph, box_size, mask_boxes
             mask_values = tf.ones_like(crops[:, :, :, -1], dtype=tf.float32) * tf.cast(mask_ind, dtype=tf.float32)
             masks = tf.compat.v1.to_float(tf.equal(crops[:, :, :, -1], mask_values))
             crops = crops[:, :, :, 0:-1] * tf.expand_dims(masks, -1)
-        mean = tf.math.reduce_mean(crops, axis=[1, 2], keepdims=True)
-        std = tf.math.reduce_std(crops, axis=[1, 2], keepdims=True)
-        crops = (crops - mean)/std
+        #mean = tf.math.reduce_mean(crops, axis=[1, 2], keepdims=True)
+        #std = tf.math.reduce_std(crops, axis=[1, 2], keepdims=True)
+        #crops = (crops - mean)/std
+        mini = tf.math.reduce_min(crops, axis=[1, 2], keepdims=True)
+        maxi = tf.math.reduce_max(crops, axis=[1, 2], keepdims=True)
+        crops = (crops - mini) / maxi
     return crops
 
 
diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py
index 0890bee4..df2f4535 100644
--- a/deepprofiler/learning/model.py
+++ b/deepprofiler/learning/model.py
@@ -62,6 +62,7 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1):
 
         # Get training parameters
         epochs, steps, schedule_epochs, schedule_lr, freq = setup_params(self, experiment)
+        steps = self.train_crop_generator.expected_steps
 
         # Load weights
         self.load_weights(epoch)
diff --git a/plugins/crop_generators/sampled_crop_generator.py b/plugins/crop_generators/sampled_crop_generator.py
index 174b04d1..60f05e09 100644
--- a/plugins/crop_generators/sampled_crop_generator.py
+++ b/plugins/crop_generators/sampled_crop_generator.py
@@ -28,21 +28,32 @@ def __init__(self, config, dset):
 
 
     def start(self, session):
-        self.samples = pd.read_csv(os.path.join(self.directory, "sc-metadata.csv"))
-        self.samples = self.samples.sample(frac=1.0).reset_index(drop=True)
+        self.all_cells = pd.read_csv(os.path.join(self.directory, "sc_metadata.csv"))
+        #self.samples = self.samples.sample(frac=1.0).reset_index(drop=True)
+        self.balanced_sample()
+        self.expected_steps = self.samples.shape[0] / self.batch_size
         self.num_classes = len(self.samples["Target"].unique())
-        '''
-        self.generator = self.datagen.flow_from_dataframe(
-                dataframe=samples, 
-                x_col="Image_Name",
-                y_col="Class_Name",
-                class_mode="categorical",
-                directory=self.directory,
-                color_mode="grayscale",
-                target_size=(self.box_size, self.box_size * self.num_channels),
-                batch_size=self.config["train"]["model"]["params"]["batch_size"]
-        )
-        '''
+
+
+    def balanced_sample(self):
+        # Obtain distribution of single cells per class
+        #df = self.all_cells[self.all_cells.Training_Status_Alpha == "Training"].sample(frac=1.0).reset_index(drop=True)
+        df = self.all_cells[self.all_cells.Next_Training_Status == "Training"].sample(frac=1.0).reset_index(drop=True)
+
+        counts = df.groupby("Class_Name").count().reset_index()[["Class_Name", "Key"]]
+        sample_size = int(counts.Key.median())
+        counts = {r.Class_Name: r.Key for k,r in counts.iterrows()}
+
+        # Sample the same number of cells per class
+        class_samples = []
+        for cls in df.Class_Name.unique():
+            class_samples.append(df[df.Class_Name == cls].sample(n=sample_size, replace=counts[cls] < sample_size))
+        self.samples = pd.concat(class_samples)
+
+        # Randomize order
+        self.samples = self.samples.sample(frac=1.0).reset_index(drop=True)
+        print(" >> Shuffling training sample with",len(self.samples),"examples")
+
 
     def generate(self, sess, global_step=0):
         pointer = 0
@@ -52,7 +63,7 @@ def generate(self, sess, global_step=0):
                 y = []
                 for i in range(self.batch_size):
                     if pointer >= len(self.samples):
-                        self.samples = self.samples.sample(frac=1.0).reset_index(drop=True)
+                        self.balanced_sample()
                         pointer = 0
                     filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"])
                     im = skimage.io.imread(filename).astype(np.float32)
@@ -64,18 +75,6 @@ def generate(self, sess, global_step=0):
             #   break
 
 
-    def generate_old(self, sess, global_step=0):
-        while True:
-            try:
-                x_, y = next(self.generator)
-                x = np.zeros([x_.shape[0], self.box_size, self.box_size, self.num_channels])
-                for i in range(x_.shape[0]):
-                    x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(x_[i])
-                yield (x, y) #tf.keras.utils.to_categorical(y, num_classes=self.num_classes))
-            except:
-                break
-
-
     def stop(self, session):
         session.close()
         return
diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py
index aea98a5f..aa4bb08a 100644
--- a/plugins/models/efficientnet.py
+++ b/plugins/models/efficientnet.py
@@ -53,7 +53,9 @@ def define_model(self, config, dset):
 
         optimizer = tf.compat.v1.keras.optimizers.SGD(lr=config["train"]["model"]["params"]["learning_rate"], momentum=0.9,
                                          nesterov=True)
-        loss_func = "categorical_crossentropy"
+        #loss_func = "categorical_crossentropy"
+        loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing=0.2)
+
         if self.is_training is False and "use_pretrained_input_size" in config["profile"].keys():
             input_tensor = tf.compat.v1.keras.layers.Input(
                 (config["profile"]["use_pretrained_input_size"], config["profile"]["use_pretrained_input_size"], 3),

From cdb628956b5d4231af91b57e85db4457faa750a8 Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Sat, 18 Sep 2021 17:38:31 -0400
Subject: [PATCH 21/43] Train/val with pre-cropped cells

---
 deepprofiler/dataset/image_dataset.py         |  4 +-
 deepprofiler/learning/model.py                | 35 ++++---
 deepprofiler/learning/training.py             |  2 +-
 .../crop_generators/sampled_crop_generator.py | 98 +++++++++++++------
 plugins/models/efficientnet.py                |  7 +-
 5 files changed, 88 insertions(+), 58 deletions(-)

diff --git a/deepprofiler/dataset/image_dataset.py b/deepprofiler/dataset/image_dataset.py
index f6bc2710..761f6432 100644
--- a/deepprofiler/dataset/image_dataset.py
+++ b/deepprofiler/dataset/image_dataset.py
@@ -238,8 +238,8 @@ def read_dataset(config, mode = 'train'):
     if config["dataset"]["locations"]["mask_objects"]:
         dset.outlines = outlines
 
-    if mode == 'train':
-        dset.prepare_training_locations()
+    #if mode == 'train':
+    #    dset.prepare_training_locations()
 
     return dset
 
diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py
index df2f4535..6ad4b4ed 100644
--- a/deepprofiler/learning/model.py
+++ b/deepprofiler/learning/model.py
@@ -28,7 +28,7 @@ def __init__(self, config, dset, crop_generator, val_crop_generator, is_training
         self.config = config
         self.dset = dset
         self.train_crop_generator = crop_generator(config, dset)
-        self.val_crop_generator = val_crop_generator(config, dset)
+        self.val_crop_generator = crop_generator(config, dset, mode="Validation") #val_crop_generator(config, dset)
         self.random_seed = None
         self.is_training = is_training
 
@@ -61,7 +61,7 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1):
         self.train_crop_generator.start(main_session)
 
         # Get training parameters
-        epochs, steps, schedule_epochs, schedule_lr, freq = setup_params(self, experiment)
+        epochs, schedule_epochs, schedule_lr, freq = setup_params(self, experiment)
         steps = self.train_crop_generator.expected_steps
 
         # Load weights
@@ -72,7 +72,7 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1):
 
         # Train model
         self.feature_model.fit_generator(
-            generator=self.train_crop_generator.generate(main_session),
+            generator=self.train_crop_generator.generator(main_session),
             steps_per_epoch=steps,
             epochs=epochs,
             callbacks=callbacks,
@@ -137,11 +137,17 @@ def start_main_session():
 
 def load_validation_data(dpmodel, session):
     dpmodel.val_crop_generator.start(session)
-    x_validation, y_validation = deepprofiler.learning.validation.load_validation_data(
-        dpmodel.config,
-        dpmodel.dset,
-        dpmodel.val_crop_generator,
-        session)
+    x_validation = []
+    y_validation = []
+
+    for batch in dpmodel.val_crop_generator.generate():
+        x_validation.append(batch[0])
+        y_validation.append(batch[1])
+
+    x_validation = np.concatenate(x_validation)
+    y_validation = np.concatenate(y_validation)
+    print("Validation data:", x_validation.shape, y_validation.shape)
+
     return x_validation, y_validation
 
 
@@ -166,12 +172,6 @@ def setup_callbacks(dpmodel, lr_schedule_epochs, lr_schedule_lr, dset, experimen
     csv_output = dpmodel.config["paths"]["logs"] + "/log.csv"
     callback_csv = tf.compat.v1.keras.callbacks.CSVLogger(filename=csv_output)
 
-    # Queue stats
-    qstats = tf.compat.v1.keras.callbacks.LambdaCallback(
-        on_train_begin=lambda logs: dset.show_setup(),
-        on_epoch_end=lambda epoch, logs: experiment.log_metrics(dset.show_stats()) if experiment else dset.show_stats()
-    )
-
     # Learning rate schedule
     def lr_schedule(epoch, lr):
         if epoch in lr_schedule_epochs:
@@ -182,15 +182,14 @@ def lr_schedule(epoch, lr):
     # Collect all callbacks
     if lr_schedule_epochs:
         callback_lr_schedule = tf.compat.v1.keras.callbacks.LearningRateScheduler(lr_schedule, verbose=1)
-        callbacks = [callback_model_checkpoint, callback_csv, callback_lr_schedule, qstats]
+        callbacks = [callback_model_checkpoint, callback_csv, callback_lr_schedule] 
     else:
-        callbacks = [callback_model_checkpoint, callback_csv, qstats]
+        callbacks = [callback_model_checkpoint, callback_csv] 
     return callbacks
 
 
 def setup_params(dpmodel, experiment):
     epochs = dpmodel.config["train"]["model"]["epochs"]
-    steps = dpmodel.dset.steps_per_epoch
     lr_schedule_epochs = []
     lr_schedule_lr = []
     if 'comet_ml' in dpmodel.config["train"].keys():
@@ -219,7 +218,7 @@ def setup_params(dpmodel, experiment):
     else:
         freq = 1
 
-    return epochs, steps, lr_schedule_epochs, lr_schedule_lr, freq
+    return epochs, lr_schedule_epochs, lr_schedule_lr, freq
 
 
 def close(dpmodel, crop_session):
diff --git a/deepprofiler/learning/training.py b/deepprofiler/learning/training.py
index 29f1df44..db186664 100644
--- a/deepprofiler/learning/training.py
+++ b/deepprofiler/learning/training.py
@@ -9,7 +9,7 @@
 def learn_model(config, dset, epoch=1, seed=None, verbose=1):
     model_module = importlib.import_module("plugins.models.{}".format(config["train"]["model"]["name"]))
     crop_module = importlib.import_module("plugins.crop_generators.{}".format(config["train"]["model"]["crop_generator"]))
-    config["num_classes"] = len(dset.training_images["Target"].unique())
+    #config["num_classes"] = len(dset.training_images["Target"].unique())
     if "metrics" in config["train"]["model"].keys():
         if type(config["train"]["model"]["metrics"]) not in [list, dict]:
             raise ValueError("Metrics should be a list or dictionary.")
diff --git a/plugins/crop_generators/sampled_crop_generator.py b/plugins/crop_generators/sampled_crop_generator.py
index 60f05e09..1792c5e9 100644
--- a/plugins/crop_generators/sampled_crop_generator.py
+++ b/plugins/crop_generators/sampled_crop_generator.py
@@ -3,6 +3,7 @@
 import pandas as pd
 import skimage.io
 import tensorflow as tf
+import tqdm
 
 import deepprofiler.imaging.cropping
 
@@ -18,66 +19,99 @@
 
 class GeneratorClass(deepprofiler.imaging.cropping.CropGenerator):
 
-    def __init__(self, config, dset):
+    def __init__(self, config, dset, mode="Training"):
         super(GeneratorClass, self).__init__(config, dset)
         #self.datagen = tf.keras.preprocessing.image.ImageDataGenerator()
         self.directory = config["paths"]["single_cell_sample"]
         self.num_channels = len(config["dataset"]["images"]["channels"])
         self.box_size = self.config["dataset"]["locations"]["box_size"]
         self.batch_size = self.config["train"]["model"]["params"]["batch_size"]
+        self.mode = mode
 
+        # Load metadata
+        self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv"))
+        self.target = config["train"]["partition"]["targets"][0]
 
-    def start(self, session):
-        self.all_cells = pd.read_csv(os.path.join(self.directory, "sc_metadata.csv"))
-        #self.samples = self.samples.sample(frac=1.0).reset_index(drop=True)
+        # Index targets for one-hot encoded labels
+        self.split_data = self.all_cells[self.all_cells.Training_Status_TenGenes == self.mode].reset_index(drop=True)
+        self.classes = list(self.split_data[self.target].unique())
+        self.num_classes = len(self.classes)
+        self.classes.sort()
+        self.classes = {self.classes[i]:i for i in range(self.num_classes)}
+
+        # Identify targets and samples
         self.balanced_sample()
-        self.expected_steps = self.samples.shape[0] / self.batch_size
-        self.num_classes = len(self.samples["Target"].unique())
+        self.expected_steps = (self.samples.shape[0] // self.batch_size) + int(self.samples.shape[0] % self.batch_size > 0)
 
+        # Report number of classes globally
+        self.config["num_classes"] = self.num_classes
+        print(" >> Number of classes:", self.num_classes)
+
+
+    def start(self, session):
+        #self.all_cells = pd.read_csv(os.path.join(self.directory, "sc_metadata.csv"))
+        #self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv"))
+        #self.samples = self.samples.sample(frac=1.0).reset_index(drop=True)
+        pass
 
     def balanced_sample(self):
         # Obtain distribution of single cells per class
-        #df = self.all_cells[self.all_cells.Training_Status_Alpha == "Training"].sample(frac=1.0).reset_index(drop=True)
-        df = self.all_cells[self.all_cells.Next_Training_Status == "Training"].sample(frac=1.0).reset_index(drop=True)
-
-        counts = df.groupby("Class_Name").count().reset_index()[["Class_Name", "Key"]]
+        counts = self.split_data.groupby("Class_Name").count().reset_index()[["Class_Name", "Key"]]
         sample_size = int(counts.Key.median())
         counts = {r.Class_Name: r.Key for k,r in counts.iterrows()}
 
         # Sample the same number of cells per class
         class_samples = []
-        for cls in df.Class_Name.unique():
-            class_samples.append(df[df.Class_Name == cls].sample(n=sample_size, replace=counts[cls] < sample_size))
+        for cls in self.split_data.Class_Name.unique():
+            class_samples.append(self.split_data[self.split_data.Class_Name == cls].sample(n=sample_size, replace=counts[cls] < sample_size))
         self.samples = pd.concat(class_samples)
 
         # Randomize order
-        self.samples = self.samples.sample(frac=1.0).reset_index(drop=True)
-        print(" >> Shuffling training sample with",len(self.samples),"examples")
+        if self.mode == "Training":
+            print(" >> Shuffling training sample with",len(self.samples),"examples")
+            self.samples = self.samples.sample(frac=1.0).reset_index()
+        else:
+            self.samples = self.samples.sample(frac=0.1).reset_index()
+            print(self.samples[self.target].value_counts())
 
 
-    def generate(self, sess, global_step=0):
+    def generator(self, sess, global_step=0):
         pointer = 0
         while True:
-            #try:
-                x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels])
-                y = []
-                for i in range(self.batch_size):
-                    if pointer >= len(self.samples):
-                        self.balanced_sample()
-                        pointer = 0
-                    filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"])
-                    im = skimage.io.imread(filename).astype(np.float32)
-                    x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im)
-                    y.append(self.samples.loc[pointer, "Target"])
-                    pointer += 1
-                yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes))
-            #except:
-            #   break
+            x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels])
+            y = []
+            for i in range(self.batch_size):
+                if pointer >= len(self.samples):
+                    self.balanced_sample()
+                    pointer = 0
+                filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"])
+                im = skimage.io.imread(filename).astype(np.float32)
+                x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im)
+                y.append(self.classes[self.samples.loc[pointer, self.target]])
+                pointer += 1
+            yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes))
+
+
+    def generate(self):
+        pointer = 0
+        for k in range(self.expected_steps):
+            x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels])
+            y = []
+            for i in range(self.batch_size):
+                if pointer >= len(self.samples):
+                    break
+                filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"])
+                im = skimage.io.imread(filename).astype(np.float32)
+                x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im)
+                y.append(self.classes[self.samples.loc[pointer, self.target]])
+                pointer += 1
+            if len(y) < x.shape[0]:
+                x = x[0:len(y),...]
+            yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes))
 
 
     def stop(self, session):
-        session.close()
-        return
+        pass
 
 ## Reusing the Single Image Crop Generator. No changes needed
 
diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py
index aa4bb08a..91ae2a22 100644
--- a/plugins/models/efficientnet.py
+++ b/plugins/models/efficientnet.py
@@ -74,11 +74,8 @@ def define_model(self, config, dset):
             # 2. Create an output embedding for each target
             class_outputs = []
 
-            i = 0
-            for t in dset.targets:
-                y = tf.compat.v1.keras.layers.Dense(t.shape[1], activation="softmax", name=t.field_name)(features)
-                class_outputs.append(y)
-                i += 1
+            y = tf.compat.v1.keras.layers.Dense(config["num_classes"], activation="softmax", name="ClassProb")(features)
+            class_outputs.append(y)
 
             # 4. Create and compile model
             model = tf.compat.v1.keras.models.Model(inputs=input_image, outputs=class_outputs)

From f188826db66da355ca2ee0faec7fd830eefc4c6d Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Sat, 18 Sep 2021 20:00:05 -0400
Subject: [PATCH 22/43] Added online soft labels

---
 deepprofiler/learning/model.py                |   7 +
 .../crop_generators/online_labels_cropgen.py  | 160 ++++++++++++++++++
 plugins/models/efficientnet.py                |   2 +-
 3 files changed, 168 insertions(+), 1 deletion(-)
 create mode 100644 plugins/crop_generators/online_labels_cropgen.py

diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py
index 6ad4b4ed..c72ee49a 100644
--- a/deepprofiler/learning/model.py
+++ b/deepprofiler/learning/model.py
@@ -185,6 +185,13 @@ def lr_schedule(epoch, lr):
         callbacks = [callback_model_checkpoint, callback_csv, callback_lr_schedule] 
     else:
         callbacks = [callback_model_checkpoint, callback_csv] 
+
+    # Online labels callback
+    update_labels = tf.compat.v1.keras.callbacks.LambdaCallback(
+            on_epoch_end=lambda epoch, logs: dpmodel.train_crop_generator.update_online_labels(dpmodel.feature_model, epoch)
+    )
+    callbacks.append(update_labels)
+
     return callbacks
 
 
diff --git a/plugins/crop_generators/online_labels_cropgen.py b/plugins/crop_generators/online_labels_cropgen.py
new file mode 100644
index 00000000..c0f48fba
--- /dev/null
+++ b/plugins/crop_generators/online_labels_cropgen.py
@@ -0,0 +1,160 @@
+import os
+import numpy as np
+import pandas as pd
+import skimage.io
+import tensorflow as tf
+import tqdm
+
+import deepprofiler.imaging.cropping
+
+tf.compat.v1.disable_v2_behavior()
+
+## Wrapper for Keras ImageDataGenerator
+## The Keras generator is not completely useful, because it makes assumptions about
+## color (grayscale or RGB). We need flexibility for color channels, and augmentations
+## tailored to multi-dimensional microscopy images. It's based on PIL rather than skimage.
+## In addition, the samples loaded in this generator have unfolded channels, which
+## requires us to fold them back to a tensor before feeding them to a CNN.
+
+
+class GeneratorClass(deepprofiler.imaging.cropping.CropGenerator):
+
+    def __init__(self, config, dset, mode="Training"):
+        super(GeneratorClass, self).__init__(config, dset)
+        #self.datagen = tf.keras.preprocessing.image.ImageDataGenerator()
+        self.directory = config["paths"]["single_cell_sample"]
+        self.num_channels = len(config["dataset"]["images"]["channels"])
+        self.box_size = self.config["dataset"]["locations"]["box_size"]
+        self.batch_size = self.config["train"]["model"]["params"]["batch_size"]
+        self.mode = mode
+
+        # Load metadata
+        self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv"))
+        self.target = config["train"]["partition"]["targets"][0]
+
+        # Index targets for one-hot encoded labels
+        self.split_data = self.all_cells[self.all_cells.Training_Status_TenGenes == self.mode].reset_index(drop=True)
+        self.classes = list(self.split_data[self.target].unique())
+        self.num_classes = len(self.classes)
+        self.classes.sort()
+        self.classes = {self.classes[i]:i for i in range(self.num_classes)}
+
+        # Identify targets and samples
+        self.balanced_sample()
+        self.expected_steps = (self.samples.shape[0] // self.batch_size) + int(self.samples.shape[0] % self.batch_size > 0)
+
+        # Report number of classes globally
+        self.config["num_classes"] = self.num_classes
+        print(" >> Number of classes:", self.num_classes)
+
+        # Online labels
+        if self.mode == "Training":
+            self.init_online_labels()
+
+
+    def start(self, session):
+        pass
+
+    def balanced_sample(self):
+        # Obtain distribution of single cells per class
+        counts = self.split_data.groupby("Class_Name").count().reset_index()[["Class_Name", "Key"]]
+        sample_size = int(counts.Key.median())
+        counts = {r.Class_Name: r.Key for k,r in counts.iterrows()}
+
+        # Sample the same number of cells per class
+        class_samples = []
+        for cls in self.split_data.Class_Name.unique():
+            class_samples.append(self.split_data[self.split_data.Class_Name == cls].sample(n=sample_size, replace=counts[cls] < sample_size))
+        self.samples = pd.concat(class_samples)
+
+        # Randomize order
+        if self.mode == "Training":
+            self.samples = self.samples.sample(frac=1.0).reset_index()
+        else:
+            self.samples = self.samples.sample(frac=0.1).reset_index()
+            print(self.samples[self.target].value_counts())
+
+
+    def generator(self, sess, global_step=0):
+        pointer = 0
+        while True:
+            x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels])
+            y = []
+            for i in range(self.batch_size):
+                if pointer >= len(self.samples):
+                    self.balanced_sample()
+                    pointer = 0
+                filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"])
+                im = skimage.io.imread(filename).astype(np.float32)
+                x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im)
+                y.append([self.soft_labels[self.samples.loc[pointer, "index"], :]])
+                pointer += 1
+            yield(x, np.concatenate(y, axis=0))
+
+
+    def generate(self, source="samples"):
+        pointer = 0
+        if source == "splits":
+            dataframe = self.split_data
+            steps = (len(self.split_data) // self.batch_size) + int(len(self.split_data) % self.batch_size > 0)
+            msg = "Predicting soft labels"
+        else:
+            dataframe = self.samples
+            steps = self.expected_steps
+            msg = "Loading validation data"
+
+        for k in tqdm.tqdm(range(steps), desc=msg):
+            x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels])
+            y = []
+            for i in range(self.batch_size):
+                if pointer >= len(dataframe):
+                    break
+                filename = os.path.join(self.directory, dataframe.loc[pointer, "Image_Name"])
+                im = skimage.io.imread(filename).astype(np.float32)
+                x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im)
+                y.append(self.classes[dataframe.loc[pointer, self.target]])
+                pointer += 1
+            if len(y) < x.shape[0]:
+                x = x[0:len(y),...]
+            yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes))
+
+
+    def init_online_labels(self):
+        LABEL_SMOOTHING = 0.2
+        self.soft_labels = np.zeros((self.split_data.shape[0], self.num_classes)) + LABEL_SMOOTHING/self.num_classes
+        print("Soft labels:", self.soft_labels.shape)
+        for k,r in self.split_data.iterrows():
+            label = self.classes[self.split_data.loc[k, self.target]]
+            self.soft_labels[k, label] += 1. - LABEL_SMOOTHING
+        print("Total labels:", np.sum(self.soft_labels))
+        sl = pd.DataFrame(data=self.soft_labels)
+        sl.to_csv("soft_labels_0000.csv", index=False)
+
+
+    def update_online_labels(self, model, epoch):
+        # Prepare parameters and predictions
+        LAMBDA = 0.01
+        predictions = []
+
+        # Get predictions with the model
+        model.get_layer("augmentation_layer").is_training = False
+        for batch in self.generate(source="splits"):
+            predictions.append( model.predict(batch[0]) )
+        model.get_layer("augmentation_layer").is_training = True
+
+        # Update soft labels
+        predictions = np.concatenate(predictions, axis=0)
+        self.soft_labels = (1 - LAMBDA)*self.soft_labels + LAMBDA*predictions
+        print(" >> Labels updated", predictions.shape)
+
+        # Save labels for this epoch
+        sl = pd.DataFrame(data=self.soft_labels)
+        sl.to_csv("soft_labels_{:04d}.csv".format(epoch+1), index=False)
+
+
+    def stop(self, session):
+        pass
+
+## Reusing the Single Image Crop Generator. No changes needed
+
+SingleImageGeneratorClass = deepprofiler.imaging.cropping.SingleImageCropGenerator
diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py
index 91ae2a22..00030621 100644
--- a/plugins/models/efficientnet.py
+++ b/plugins/models/efficientnet.py
@@ -33,7 +33,7 @@ def get_model(self, config, input_image=None, weights=None, include_top=False):
         error_msg = str(num_layers) + " conv_blocks not in " + SM
         assert num_layers in supported_models.keys(), error_msg
 
-        if self.is_training and weights is None:
+        if self.is_training: #and weights is None:
             input_image = AugmentationLayer()(input_image)
 
         model = supported_models[num_layers](

From 33d8bbc8adcf01a3c556b430e41bc309c4c4c07c Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Sat, 18 Sep 2021 20:11:47 -0400
Subject: [PATCH 23/43] Minimal augmentations

---
 deepprofiler/imaging/augmentations.py | 8 ++++----
 plugins/models/efficientnet.py        | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/deepprofiler/imaging/augmentations.py b/deepprofiler/imaging/augmentations.py
index 55187492..e239489d 100644
--- a/deepprofiler/imaging/augmentations.py
+++ b/deepprofiler/imaging/augmentations.py
@@ -62,10 +62,10 @@ def random_rotate(image):
 
 
 def augment(image):
-    if tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32), tf.cast(0.5, tf.float32)):
-        augm = random_crop(image)
-    else:
-        augm = random_rotate(image)
+    #if tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32), tf.cast(0.5, tf.float32)):
+    #    augm = random_crop(image)
+    #else:
+    #    augm = random_rotate(image)
 
     augm = random_flips(augm)
     augm = random_illumination(augm)
diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py
index 00030621..91ae2a22 100644
--- a/plugins/models/efficientnet.py
+++ b/plugins/models/efficientnet.py
@@ -33,7 +33,7 @@ def get_model(self, config, input_image=None, weights=None, include_top=False):
         error_msg = str(num_layers) + " conv_blocks not in " + SM
         assert num_layers in supported_models.keys(), error_msg
 
-        if self.is_training: #and weights is None:
+        if self.is_training and weights is None:
             input_image = AugmentationLayer()(input_image)
 
         model = supported_models[num_layers](

From de261f52da4668dae7df24d1c91ee68bc2031051 Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Sat, 18 Sep 2021 20:13:41 -0400
Subject: [PATCH 24/43] Fixed typo

---
 deepprofiler/imaging/augmentations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepprofiler/imaging/augmentations.py b/deepprofiler/imaging/augmentations.py
index e239489d..b0430384 100644
--- a/deepprofiler/imaging/augmentations.py
+++ b/deepprofiler/imaging/augmentations.py
@@ -67,7 +67,7 @@ def augment(image):
     #else:
     #    augm = random_rotate(image)
 
-    augm = random_flips(augm)
+    augm = random_flips(image)
     augm = random_illumination(augm)
 
     return augm

From baee639a5fd89ccf9d1bef218400eb05afe74141 Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Mon, 20 Sep 2021 17:23:09 -0400
Subject: [PATCH 25/43] Save soft labels

---
 .../crop_generators/online_labels_cropgen.py  | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/plugins/crop_generators/online_labels_cropgen.py b/plugins/crop_generators/online_labels_cropgen.py
index c0f48fba..711da5f4 100644
--- a/plugins/crop_generators/online_labels_cropgen.py
+++ b/plugins/crop_generators/online_labels_cropgen.py
@@ -30,10 +30,21 @@ def __init__(self, config, dset, mode="Training"):
 
         # Load metadata
         self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv"))
+
+        ## UNCOMMENT FOR ALPHA SET
+        #self.all_cells.loc[(self.all_cells.Training_Status == "Unused") & self.all_cells.Metadata_Plate.isin([41756,41757]), "Training_Status_Alpha"] = "Validation"
+
+        ## UNCOMMENT FOR SINGLE CELL BALANCED SET
+        self.all_cells.loc[self.all_cells.Training_Status == "Training", "Training_Status"] = "XXX"
+        self.all_cells.loc[self.all_cells.Training_Status == "SingleCellTraining", "Training_Status"] = "Training"
+        self.all_cells.loc[self.all_cells.Training_Status == "Validation", "Training_Status"] = "YYY"
+        self.all_cells.loc[self.all_cells.Training_Status == "SingleCellValidation", "Training_Status"] = "Validation"
+
         self.target = config["train"]["partition"]["targets"][0]
 
         # Index targets for one-hot encoded labels
-        self.split_data = self.all_cells[self.all_cells.Training_Status_TenGenes == self.mode].reset_index(drop=True)
+        #self.split_data = self.all_cells[self.all_cells.Training_Status_TenGenes == self.mode].reset_index(drop=True)
+        self.split_data = self.all_cells[self.all_cells.Training_Status == self.mode].reset_index(drop=True)
         self.classes = list(self.split_data[self.target].unique())
         self.num_classes = len(self.classes)
         self.classes.sort()
@@ -49,6 +60,8 @@ def __init__(self, config, dset, mode="Training"):
 
         # Online labels
         if self.mode == "Training":
+            self.out_dir = config["paths"]["results"] + "soft_labels/"
+            os.makedirs(self.out_dir, exist_ok=True)
             self.init_online_labels()
 
 
@@ -128,7 +141,7 @@ def init_online_labels(self):
             self.soft_labels[k, label] += 1. - LABEL_SMOOTHING
         print("Total labels:", np.sum(self.soft_labels))
         sl = pd.DataFrame(data=self.soft_labels)
-        sl.to_csv("soft_labels_0000.csv", index=False)
+        sl.to_csv(self.out_dir + "0000.csv", index=False)
 
 
     def update_online_labels(self, model, epoch):
@@ -149,7 +162,7 @@ def update_online_labels(self, model, epoch):
 
         # Save labels for this epoch
         sl = pd.DataFrame(data=self.soft_labels)
-        sl.to_csv("soft_labels_{:04d}.csv".format(epoch+1), index=False)
+        sl.to_csv(self.out_dir + "{:04d}.csv".format(epoch+1), index=False)
 
 
     def stop(self, session):

From 7d015f06038980925edc5d8428d95516552a2f13 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Mon, 27 Sep 2021 16:15:28 +0200
Subject: [PATCH 26/43] Training strategy with TF2 dataset and augmentations

---
 deepprofiler/__main__.py          |   9 +
 deepprofiler/learning/tf2train.py | 301 ++++++++++++++++++++++++++++++
 2 files changed, 310 insertions(+)
 create mode 100644 deepprofiler/learning/tf2train.py

diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py
index 70ef5b62..7d751b2d 100644
--- a/deepprofiler/__main__.py
+++ b/deepprofiler/__main__.py
@@ -14,6 +14,7 @@
 import deepprofiler.dataset.image_dataset
 import deepprofiler.dataset.sampling
 import deepprofiler.learning.training
+import deepprofiler.learning.tf2train
 import deepprofiler.learning.profiling
 import deepprofiler.download.normalize_bbbc021_metadata
 
@@ -163,6 +164,14 @@ def train(context, epoch, seed):
     deepprofiler.learning.training.learn_model(context.obj["config"], dset, epoch, seed)
 
 
+# Third tool (b): Train a network with TF dataset
+@cli.command()
+@click.option("--epoch", default=1)
+@click.pass_context
+def traintf2(context, epoch):
+    deepprofiler.learning.tf2train.learn_model(context.obj["config"], epoch)
+
+
 # Fourth tool: Profile cells and extract features
 @cli.command()
 @click.pass_context
diff --git a/deepprofiler/learning/tf2train.py b/deepprofiler/learning/tf2train.py
new file mode 100644
index 00000000..2350c200
--- /dev/null
+++ b/deepprofiler/learning/tf2train.py
@@ -0,0 +1,301 @@
+import comet_ml
+import os
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+import tensorflow_addons as tfa
+import efficientnet.tfkeras as efn
+
+tf.compat.v1.enable_v2_behavior()
+tf.config.run_functions_eagerly(True)
+
+AUTOTUNE = tf.data.AUTOTUNE
+
+
+def make_dataset(path, batch_size, single_cell_metadata, config, is_training):
+    @tf.function
+    def fold_channels(crop):
+        assert tf.executing_eagerly()
+        crop = crop.numpy()
+        output = np.reshape(crop, (crop.shape[0], crop.shape[0], -1), order="F").astype(np.float32)
+        output = output / 255.
+        for i in range(output.shape[-1]):
+            mean = np.mean(output[:, :, i])
+            std = np.std(output[:, :, i])
+            output[:, :, i] = (output[:, :, i] - mean) / std
+        return tf.convert_to_tensor(output, dtype=tf.float32)
+
+    def parse_image(filename):
+        image = tf.io.read_file(filename)
+        image = tf.image.decode_png(image, channels=0)
+        image = tf.py_function(func=fold_channels, inp=[image], Tout=tf.float32)
+        return image
+
+    def configure_for_performance(ds, is_training):
+
+        ds = ds.shuffle(buffer_size=323000)
+        if is_training:
+            ds = augment(ds)
+        ds = ds.batch(batch_size)
+        ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
+        return ds
+
+    def random_illumination(image):
+        # Make channels independent images
+        numchn = len(config["dataset"]["images"]["channels"])
+        source = tf.transpose(image, [2, 1, 0])
+        source = tf.expand_dims(source, -1)
+        source = tf.image.grayscale_to_rgb(source)
+
+        # Apply illumination augmentations
+        bright = tf.random.uniform([numchn], minval=-0.4, maxval=0.4, dtype=tf.float32)
+        channels = [tf.image.adjust_brightness(source[s, ...], bright[s]) for s in range(numchn)]
+        contrast = tf.random.uniform([numchn], minval=0.6, maxval=1.4, dtype=tf.float32)
+        channels = [tf.image.adjust_contrast(channels[s], contrast[s]) for s in range(numchn)]
+        result = tf.concat([tf.expand_dims(t, 0) for t in channels], axis=0)
+
+        # Recover multi-channel image
+        result = tf.image.rgb_to_grayscale(result)
+        result = tf.transpose(result[:, :, :, 0], [2, 1, 0])
+        # result = result / tf.math.reduce_max(result)
+
+        return result
+
+    def random_flips(image):
+        augmented = tf.image.random_flip_left_right(image)
+
+        # 90 degree rotations
+        angle = tf.random.uniform([1], minval=0, maxval=4, dtype=tf.int32)
+        augmented = tf.image.rot90(augmented, angle[0])
+        return augmented
+
+    def random_crop_or_rotate(image):
+        w, h, c = config["dataset"]["locations"]["box_size"], config["dataset"]["locations"]["box_size"], len(
+            config["dataset"]["images"]["channels"])
+        if tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32), tf.cast(0.5, tf.float32)):
+            size = tf.random.uniform([1], minval=int(w * 0.8), maxval=w, dtype=tf.int32)
+            image = tf.image.random_crop(image, [size[0], size[0], c])
+            return tf.image.resize(image, (w, h))
+        else:
+            return image
+
+
+    def augment(ds):
+        ds = ds.map(
+            lambda image, label: (random_crop_or_rotate(image), label), num_parallel_calls=AUTOTUNE
+        ).map(
+            lambda image, label: (random_flips(image), label), num_parallel_calls=AUTOTUNE
+        ).map(
+            lambda image, label: (random_illumination(image), label), num_parallel_calls=AUTOTUNE
+        )
+        return ds
+
+    filenames = single_cell_metadata["Image_Name"].tolist()
+    for i in range(len(filenames)):
+        filenames[i] = os.path.join(path, filenames[i])
+
+    steps = np.math.ceil(len(filenames) / batch_size)
+    filenames_ds = tf.data.Dataset.from_tensor_slices(filenames)
+    images_ds = filenames_ds.map(parse_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
+    labels = tf.keras.utils.to_categorical(single_cell_metadata["Categorical"])
+    labels_ds = tf.data.Dataset.from_tensor_slices(labels)
+    ds = tf.data.Dataset.zip((images_ds, labels_ds))
+    ds = configure_for_performance(ds, is_training)
+    return ds, steps
+
+
+def setup_callbacks(config):
+    callbacks = []
+
+    # CSV Log
+    csv_output = config["paths"]["logs"] + "/log.csv"
+    callback_csv = tf.keras.callbacks.CSVLogger(filename=csv_output)
+    callbacks.append(callback_csv)
+
+    # Checkpoints
+    output_file = config["paths"]["checkpoints"] + "/checkpoint_{epoch:04d}.hdf5"
+    period = 1
+    save_best = False
+    if "checkpoint_policy" in config["train"]["model"] and isinstance(
+            config["train"]["model"]["checkpoint_policy"], int):
+        period = int(config["train"]["model"]["checkpoint_policy"])
+    elif "checkpoint_policy" in config["train"]["model"] and config["train"]["model"]["checkpoint_policy"] == 'best':
+        save_best = True
+
+    callback_model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
+        filepath=output_file,
+        save_weights_only=True,
+        save_best_only=save_best,
+        period=period
+    )
+    callbacks.append(callback_model_checkpoint)
+    epochs = config["train"]["model"]["epochs"]
+
+    def lr_schedule(epoch, lr):
+        if lr_schedule_epochs and epoch in lr_schedule_epochs:
+            return lr_schedule_lr[lr_schedule_epochs.index(epoch)]
+        else:
+            return lr
+
+    if "lr_schedule" in config["train"]["model"]:
+        if config["train"]["model"]["lr_schedule"] == "cosine":
+            lr_schedule_epochs = [x for x in range(epochs)]
+            init_lr = config["train"]["model"]["params"]["learning_rate"]
+            # Linear warm up
+            lr_schedule_lr = [init_lr / (5 - t) for t in range(5)]
+            # Cosine decay
+            lr_schedule_lr += [0.5 * (1 + np.cos((np.pi * t) / epochs)) * init_lr for t in range(5, epochs)]
+            callback_lr_schedule = tf.keras.callbacks.LearningRateScheduler(lr_schedule, verbose=1)
+        elif config["train"]["model"]["lr_schedule"] == "plateau":
+            callback_lr_schedule = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.0001)
+            config["train"]["validation"]["frequency"] = 1
+        else:
+            assert len(config["train"]["model"]["lr_schedule"]["epoch"]) == \
+                   len(config["train"]["model"]["lr_schedule"]["lr"]), "Make sure that the length of " \
+                                                                       "lr_schedule->epoch equals the length of " \
+                                                                       "lr_schedule->lr in the config file."
+
+            lr_schedule_epochs = config["train"]["model"]["lr_schedule"]["epoch"]
+            lr_schedule_lr = config["train"]["model"]["lr_schedule"]["lr"]
+            callback_lr_schedule = tf.keras.callbacks.LearningRateScheduler(lr_schedule, verbose=1)
+
+        callbacks.append(callback_lr_schedule)
+
+    return callbacks
+
+
+def setup_comet_ml(config):
+    if 'comet_ml' in config["train"].keys():
+        experiment = comet_ml.Experiment(
+            api_key=config["train"]["comet_ml"]["api_key"],
+            project_name=config["train"]["comet_ml"]["project_name"],
+            auto_param_logging=True,
+            auto_histogram_weight_logging=False,
+            auto_histogram_gradient_logging=False,
+            auto_histogram_activation_logging=False
+        )
+        if config["experiment_name"] != "results":
+            experiment.set_name(config["experiment_name"])
+        experiment.log_others(config)
+    else:
+        experiment = None
+    return experiment
+
+
+def learn_model(config, epoch):
+    DENSE_KERNEL_INITIALIZER = {
+        'class_name': 'VarianceScaling',
+        'config': {
+            'scale': 1. / 3.,
+            'mode': 'fan_out',
+            'distribution': 'uniform'
+        }
+    }
+
+    BATCH_SIZE = config["train"]["model"]["params"]["batch_size"]
+    strategy_lr = config["train"]["model"]["params"]["learning_rate"]
+
+    experiment = setup_comet_ml(config)
+
+    single_cell_metadata = pd.read_csv(
+        os.path.join(config["paths"]["single_cell_sample"], "expanded_sc_metadata_alpha.csv"))
+    single_cell_metadata = single_cell_metadata[
+        ["Class_Name", "Image_Name", "Training_Status", "Training_Status_Alpha"]]
+    single_cell_metadata = single_cell_metadata[single_cell_metadata["Training_Status"] != "Unused"]
+
+    num_classes = len(pd.unique(single_cell_metadata["Class_Name"]))
+    single_cell_metadata["Categorical"] = pd.Categorical(single_cell_metadata["Class_Name"]).codes
+
+    path = config["paths"]["single_cell_sample"]
+    dataset, steps_per_epoch = make_dataset(path, BATCH_SIZE,
+                                            single_cell_metadata[
+                                                single_cell_metadata["Training_Status_Alpha"] == "Training"],
+                                            config,
+                                            is_training=True)
+    validation_dataset, _ = make_dataset(path, BATCH_SIZE,
+                                         single_cell_metadata[
+                                             single_cell_metadata["Training_Status_Alpha"] == "Validation"],
+                                         config,
+                                         is_training=False)
+
+    input_shape = (config["dataset"]["locations"]["box_size"], config["dataset"]["locations"]["box_size"],
+                   len(config["dataset"]["images"]["channels"]))
+    input_image = tf.keras.layers.Input(input_shape)
+
+    model = efn.EfficientNetB0(
+        include_top=False, weights=None, input_tensor=input_image,
+        input_shape=input_shape
+    )
+    features = tf.keras.layers.GlobalAveragePooling2D(name='avg_pool')(model.output)
+    y = tf.keras.layers.Dense(num_classes, activation='softmax', name='predictions',
+                              kernel_initializer=DENSE_KERNEL_INITIALIZER)(features)
+    model = tf.keras.models.Model(inputs=input_image, outputs=y)
+
+    regularizer = tf.keras.regularizers.l2(0.00001)
+    for layer in model.layers:
+        if hasattr(layer, "kernel_regularizer"):
+            setattr(layer, "kernel_regularizer", regularizer)
+
+    model = tf.keras.models.model_from_json(model.to_json())
+    optimizer = tf.keras.optimizers.SGD(learning_rate=strategy_lr)
+    loss_func = tf.keras.losses.CategoricalCrossentropy(from_logits=False)  # , label_smoothing = 0.6)
+
+    model.compile(optimizer, loss_func,
+                  metrics=["accuracy", tfa.metrics.F1Score(num_classes=num_classes, average='macro'),
+                           tf.keras.metrics.TopKCategoricalAccuracy(k=5), tf.keras.metrics.Precision()])
+
+    callbacks = setup_callbacks(config, strategy_lr)
+
+    if epoch == 1 and config["train"]["model"]["initialization"] == "ImageNet":
+        base_model = efn.EfficientNetB0(weights='imagenet', include_top=False)
+        total_layers = len(base_model.layers)
+        for i in range(2, total_layers):
+            if len(base_model.layers[i].weights) > 0:
+                model.layers[i].set_weights(base_model.layers[i].get_weights())
+
+        # => Replicate filters of first layer as needed
+
+        weights = base_model.layers[1].get_weights()
+        available_channels = weights[0].shape[2]
+        target_shape = model.layers[1].weights[0].shape
+        new_weights = np.zeros(target_shape)
+
+        for i in range(new_weights.shape[2]):
+            j = i % available_channels
+            new_weights[:, :, i, :] = weights[0][:, :, j, :]
+
+        weights_array = [new_weights]
+        if len(weights) > 1:
+            weights_array += weights[1:]
+
+        model.layers[1].set_weights(weights_array)
+        print(model.layers[1].name,
+              np.array_equal(np.array(model.layers[1].get_weights()), np.array(base_model.layers[1].get_weights())))
+
+        print("Network initialized with pretrained ImageNet weights")
+
+    elif epoch > 1:
+        output_file = config["paths"]["checkpoints"] + "/checkpoint_{epoch:04d}.hdf5"
+        previous_model = output_file.format(epoch=epoch - 1)
+        model.load_weights(previous_model)
+
+    print(model.summary())
+    if experiment:
+        with experiment.train():
+            model.fit(dataset,
+                      epochs=config["train"]["model"]["epochs"],
+                      callbacks=callbacks,
+                      verbose=1,
+                      validation_data=validation_dataset,
+                      validation_freq=config["train"]["validation"]["frequency"],
+                      initial_epoch=epoch - 1
+                      )
+    else:
+        model.fit(dataset,
+                  epochs=config["train"]["model"]["epochs"],
+                  callbacks=callbacks,
+                  verbose=1,
+                  validation_data=validation_dataset,
+                  validation_freq=config["train"]["validation"]["frequency"],
+                  initial_epoch=epoch - 1
+                  )
\ No newline at end of file

From 858bc4c18d216b8a873627e7b26b45b5a055d9a3 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Mon, 27 Sep 2021 16:31:17 +0200
Subject: [PATCH 27/43] Add export of all single cells from TF1 branch

---
 deepprofiler/__main__.py         | 12 ++++++++----
 deepprofiler/dataset/sampling.py | 27 +++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py
index 7d751b2d..fccf2d0d 100644
--- a/deepprofiler/__main__.py
+++ b/deepprofiler/__main__.py
@@ -143,12 +143,16 @@ def prepare(context):
 
 # Second tool: Sample single cells for training
 @cli.command()
+@click.option("--mode", default="sample")
 @click.pass_context
-def sample_sc(context):
+def sample_sc(context, mode):
     if context.parent.obj["config"]["prepare"]["compression"]["implement"]:
         context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
-    dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='train')
-    deepprofiler.dataset.sampling.sample_dataset(context.obj["config"], dset)
+    dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"])
+    if mode == "sample":
+        deepprofiler.dataset.sampling.sample_dataset(context.obj["config"], dset)
+    elif mode == "export_all":
+        deepprofiler.dataset.sampling.export_dataset(context.obj["config"], dset)
     print("Single-cell sampling complete.")
 
 
@@ -176,7 +180,7 @@ def traintf2(context, epoch):
 @cli.command()
 @click.pass_context
 @click.option("--part",
-              help="Part of index to process", 
+               help="Part of index to process",
               default=-1, 
               type=click.INT)
 def profile(context, part):
diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py
index 47227c80..54f1e982 100644
--- a/deepprofiler/dataset/sampling.py
+++ b/deepprofiler/dataset/sampling.py
@@ -13,6 +13,7 @@
 class SingleCellSampler(deepprofiler.imaging.cropping.CropGenerator):
 
     def start(self, session):
+        self.all_metadata = []
         self.session = session
         # Define input data batches
         with tf.compat.v1.variable_scope("train_inputs"):
@@ -46,6 +47,20 @@ def process_batch(self, batch):
         output = self.session.run(self.input_variables["labeled_crops"], feed_dict)
         return output[0], metadata.reset_index(drop=True)
 
+    def export_single_cells(self, key, image_array, meta):
+        outdir = self.config["paths"]["single_cell_sample"]
+        key = self.dset.keyGen(meta)
+        batch = {"keys": [key], "images": [image_array], "targets": [], "locations": []}
+        batch["locations"].append(deepprofiler.imaging.boxes.get_locations(key, self.config))
+        batch["targets"].append([t.get_values(meta) for t in self.dset.targets])
+        crops, metadata = self.process_batch(batch)
+        for j in range(crops.shape[0]):
+            image = deepprofiler.imaging.cropping.unfold_channels(crops[j,:,:,:])
+            skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image)
+
+        self.all_metadata.append(metadata)
+        print("{}: {} single cells".format(key, crops.shape[0]))
+
 
 def start_session():
     configuration = tf.compat.v1.ConfigProto()
@@ -112,3 +127,15 @@ def sample_dataset(config, dset):
     all_metadata = pd.concat(all_metadata).reset_index(drop=True)
     all_metadata.to_csv(os.path.join(outdir, "sc-metadata.csv"), index=False)
 
+def export_dataset(config, dset):
+    outdir = config["paths"]["single_cell_sample"]
+    if not is_directory_empty(outdir):
+        return
+
+    session = start_session()
+    cropper = SingleCellSampler(config, dset)
+    cropper.start(session)
+    dset.scan(cropper.export_single_cells, frame="all")
+    df = pd.concat(cropper.all_metadata).reset_index(drop=True)
+    df.to_csv(os.path.join(outdir, "sc-metadata.csv"), index=False)
+    print("Exporting: done")
\ No newline at end of file

From 619171cb6151626c488e542d528189d6b4a07906 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Mon, 27 Sep 2021 16:40:03 +0200
Subject: [PATCH 28/43] Explicitly switch off eager mode

---
 deepprofiler/learning/profiling.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/deepprofiler/learning/profiling.py b/deepprofiler/learning/profiling.py
index 33849334..2f525d18 100644
--- a/deepprofiler/learning/profiling.py
+++ b/deepprofiler/learning/profiling.py
@@ -3,12 +3,11 @@
 
 import numpy as np
 import tensorflow as tf
-from tensorflow.compat.v1.keras import backend as K
 
 from deepprofiler.dataset.utils import tic, toc
 
 tf.compat.v1.disable_v2_behavior()
-
+tf.config.run_functions_eagerly(False)
 
 class Profile(object):
     
@@ -32,7 +31,7 @@ def __init__(self, config, dset):
 
     def configure(self):        
         # Main session configuration
-        self.profile_crop_generator.start(K.get_session())
+        self.profile_crop_generator.start(tf.compat.v1.keras.backend.get_session())
         
         # Create feature extractor
         if self.config["profile"]["checkpoint"] != "None":
@@ -72,7 +71,7 @@ def extract_features(self, key, image_array, meta):  # key is a placeholder
         batch_size = self.config["profile"]["batch_size"]
         image_key, image_names, outlines = self.dset.get_image_paths(meta)
         crop_locations = self.profile_crop_generator.prepare_image(
-                                   K.get_session(),
+                                   tf.compat.v1.keras.backend.get_session(),
                                    image_array,
                                    meta,
                                    False
@@ -84,7 +83,7 @@ def extract_features(self, key, image_array, meta):  # key is a placeholder
         repeats = self.config["train"]["model"]["crop_generator"] == "repeat_channel_crop_generator"
         
         # Extract features
-        crops = next(self.profile_crop_generator.generate(K.get_session()))[0]  # single image crop generator yields one batch
+        crops = next(self.profile_crop_generator.generate(tf.compat.v1.keras.backend.get_session()))[0]  # single image crop generator yields one batch
         feats = self.feat_extractor.predict(crops, batch_size=batch_size)
         if repeats:
             feats = np.reshape(feats, (self.num_channels, total_crops, -1))

From e2ff13928520a345bfc47f7df33dad24c288ca65 Mon Sep 17 00:00:00 2001
From: Juan C <jccaicedo@gmail.com>
Date: Mon, 27 Sep 2021 11:57:34 -0400
Subject: [PATCH 29/43] Individual channel crop geenerator added

---
 deepprofiler/learning/model.py                |  11 +-
 deepprofiler/learning/profiling.py            |   2 +-
 .../individual_channel_cropgen.py             | 171 ++++++++++++++++++
 3 files changed, 178 insertions(+), 6 deletions(-)
 create mode 100644 plugins/crop_generators/individual_channel_cropgen.py

diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py
index c72ee49a..6e4a8ec9 100644
--- a/deepprofiler/learning/model.py
+++ b/deepprofiler/learning/model.py
@@ -187,11 +187,12 @@ def lr_schedule(epoch, lr):
         callbacks = [callback_model_checkpoint, callback_csv] 
 
     # Online labels callback
-    update_labels = tf.compat.v1.keras.callbacks.LambdaCallback(
-            on_epoch_end=lambda epoch, logs: dpmodel.train_crop_generator.update_online_labels(dpmodel.feature_model, epoch)
-    )
-    callbacks.append(update_labels)
-
+    if dpmodel.config["train"]["model"]["crop_generator"] == "online_labels_cropgen":
+        update_labels = tf.compat.v1.keras.callbacks.LambdaCallback(
+                on_epoch_end=lambda epoch, logs: dpmodel.train_crop_generator.update_online_labels(dpmodel.feature_model, epoch)
+        )
+        callbacks.append(update_labels)
+        
     return callbacks
 
 
diff --git a/deepprofiler/learning/profiling.py b/deepprofiler/learning/profiling.py
index ef1ddfad..558dac9a 100644
--- a/deepprofiler/learning/profiling.py
+++ b/deepprofiler/learning/profiling.py
@@ -81,7 +81,7 @@ def extract_features(self, key, image_array, meta):  # key is a placeholder
         if total_crops == 0:
             print("No cells to profile:", output_file)
             return
-        repeats = self.config["train"]["model"]["crop_generator"] == "repeat_channel_crop_generator"
+        repeats = self.config["train"]["model"]["crop_generator"] in ["repeat_channel_crop_generator", "individual_channel_cropgen"]
         
         # Extract features
         crops = next(self.profile_crop_generator.generate(K.get_session()))[0]  # single image crop generator yields one batch
diff --git a/plugins/crop_generators/individual_channel_cropgen.py b/plugins/crop_generators/individual_channel_cropgen.py
new file mode 100644
index 00000000..4795a30b
--- /dev/null
+++ b/plugins/crop_generators/individual_channel_cropgen.py
@@ -0,0 +1,171 @@
+import os
+import numpy as np
+import pandas as pd
+import skimage.io
+import tensorflow as tf
+import tqdm
+
+import deepprofiler.imaging.cropping
+
+tf.compat.v1.disable_v2_behavior()
+
+## Wrapper for Keras ImageDataGenerator
+## The Keras generator is not completely useful, because it makes assumptions about
+## color (grayscale or RGB). We need flexibility for color channels, and augmentations
+## tailored to multi-dimensional microscopy images. It's based on PIL rather than skimage.
+## In addition, the samples loaded in this generator have unfolded channels, which
+## requires us to fold them back to a tensor before feeding them to a CNN.
+
+
+class GeneratorClass(deepprofiler.imaging.cropping.CropGenerator):
+
+    def __init__(self, config, dset, mode="Training"):
+        super(GeneratorClass, self).__init__(config, dset)
+        #self.datagen = tf.keras.preprocessing.image.ImageDataGenerator()
+        self.directory = config["paths"]["single_cell_sample"]
+        self.num_channels = len(config["dataset"]["images"]["channels"])
+        self.box_size = self.config["dataset"]["locations"]["box_size"]
+        self.batch_size = self.config["train"]["model"]["params"]["batch_size"]
+        self.mode = mode
+
+        # Load metadata
+        self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv"))
+        # ALPHA SET HACK:
+        self.all_cells.loc[(self.all_cells.Training_Status == "Unused") & self.all_cells.Metadata_Plate.isin([41756,41757]), "Training_Status_Alpha"] = "Validation"
+        self.target = config["train"]["partition"]["targets"][0]
+
+        # Keep track of the real number of channels for internal object use
+        if mode == "Training": 
+            self.config["real_channels"] = config["dataset"]["images"]["channels"]
+        else:
+            self.num_channels = len(self.config["real_channels"])
+
+        # Distribute channels in separate records in the reference index
+        self.split_data = self.all_cells[self.all_cells.Training_Status_Alpha == self.mode].reset_index(drop=True)
+        before = len(self.split_data)
+        channels_data = [self.split_data.copy() for k in range(self.num_channels)]
+        for k in range(self.num_channels):
+            channels_data[k]["Channel"] = k
+        self.split_data = pd.concat(channels_data, axis=0)
+        after = len(self.split_data)
+        print(" >> Records before separating channels:", before, ". After:", after)
+
+        # Index targets for one-hot encoded labels
+        self.classes = list(self.split_data[self.target].unique())
+        self.num_classes = len(self.classes)
+        self.classes.sort()
+        self.classes = {self.classes[i]:i for i in range(self.num_classes)}
+
+        # Identify targets and samples
+        self.balanced_sample()
+        self.expected_steps = (self.samples.shape[0] // self.batch_size) + int(self.samples.shape[0] % self.batch_size > 0)
+
+        # Report number of classes and channels globally
+        self.config["num_classes"] = self.num_classes
+        self.config["dataset"]["images"]["channels"] = ["Individual"] # Alter the number of channels for the rest of the program!
+        print(" >> Number of classes:", self.num_classes, ". Number of channels:", len(self.config["dataset"]["images"]["channels"]))
+
+
+    def start(self, session):
+        pass
+
+    def balanced_sample(self):
+        # Obtain distribution of single cells per class
+        counts = self.split_data.groupby("Class_Name").count().reset_index()[["Class_Name", "Key"]]
+        sample_size = int(counts.Key.median())
+        counts = {r.Class_Name: r.Key for k,r in counts.iterrows()}
+
+        # Sample the same number of cells per class
+        class_samples = []
+        for cls in self.split_data.Class_Name.unique():
+            class_samples.append(self.split_data[self.split_data.Class_Name == cls].sample(n=sample_size, replace=counts[cls] < sample_size))
+        self.samples = pd.concat(class_samples)
+
+        # Randomize order
+        if self.mode == "Training":
+            print(" >> Shuffling training sample with",len(self.samples),"examples")
+            self.samples = self.samples.sample(frac=1.0).reset_index()
+        else:
+            self.samples = self.samples.sample(frac=0.005).reset_index()
+            print(self.samples[self.target].value_counts())
+
+
+    def load_sample_image(self, pointer):
+        filename = os.path.join(self.directory, self.samples.loc[pointer, "Image_Name"])
+        im = skimage.io.imread(filename).astype(np.float32)
+        channel = self.samples.loc[pointer, "Channel"]
+        folded = deepprofiler.imaging.cropping.fold_channels(im)
+        return folded[:,:,channel]
+
+
+    def generator(self, sess, global_step=0):
+        pointer = 0
+        while True:
+            x = np.zeros([self.batch_size, self.box_size, self.box_size, 1])
+            y = []
+            for i in range(self.batch_size):
+                if pointer >= len(self.samples):
+                    self.balanced_sample()
+                    pointer = 0
+                x[i,:,:,0] = self.load_sample_image(pointer) 
+                y.append(self.classes[self.samples.loc[pointer, self.target]])
+                pointer += 1
+            yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes))
+
+
+    def generate(self):
+        pointer = 0
+        for k in range(self.expected_steps):
+            x = np.zeros([self.batch_size, self.box_size, self.box_size, 1])
+            y = []
+            for i in range(self.batch_size):
+                if pointer >= len(self.samples):
+                    break
+                x[i,:,:,0] = self.load_sample_image(pointer) 
+                y.append(self.classes[self.samples.loc[pointer, self.target]])
+                pointer += 1
+            if len(y) < x.shape[0]:
+                x = x[0:len(y),...]
+            yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes))
+
+
+    def stop(self, session):
+        pass
+
+## Class for generating crops from single images with separated channels
+
+def separate_channels(crops, network_input_size):
+    #resized_crops = tf.compat.v1.image.resize_images(crops, size=(network_input_size, network_input_size))
+    reordered_channels = tf.transpose(crops, [3, 0, 1, 2])
+    reshaped_data = tf.reshape(reordered_channels, shape=[-1, network_input_size, network_input_size, 1])
+    #rgb_data = tf.image.grayscale_to_rgb(reshaped_data)
+    # Transform pixels in the range [-1,1] required for InceptionResNetv2
+    #crop_min = tf.reduce_min(rgb_data, axis=[1,2,3], keepdims=True)
+    #crop_max = tf.reduce_max(rgb_data, axis=[1,2,3], keepdims=True)
+    #norm_rgb = ((rgb_data - crop_min)/(crop_max - crop_min))*2.0 - 1.0
+    #return norm_rgb
+    return reshaped_data
+
+
+class SingleImageGeneratorClass(deepprofiler.imaging.cropping.SingleImageCropGenerator):
+
+    def __init__(self, config, dset):
+        # Recover the real set of channels
+        config["dataset"]["images"]["channels"] = config["real_channels"]
+
+        # Then initialize the crop generator
+        super().__init__(config, dset)
+        width = self.config["dataset"]["locations"]["box_size"]
+        height = width
+        channels = len(self.config["dataset"]["images"]["channels"])
+        self.crop_ph = tf.compat.v1.placeholder(tf.float32, (None, width, height, channels))
+        self.resized = separate_channels(self.crop_ph, width)
+
+    def generate(self, session, global_step=0):
+        crops = session.run(self.resized, feed_dict={self.crop_ph:self.image_pool})
+        labels = np.tile(self.label_pool, [3,1])
+
+        global_step += 1
+
+        yield crops, labels
+

From 1b6a17493e4c9070d4356aeb3bec0a036c891aeb Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Tue, 28 Sep 2021 11:29:17 +0200
Subject: [PATCH 30/43] Switches between crop-generators.

---
 deepprofiler/imaging/cropping.py              |  1 +
 deepprofiler/learning/model.py                | 39 ++++++++++++++-----
 .../crop_generators/sampled_crop_generator.py |  1 +
 3 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/deepprofiler/imaging/cropping.py b/deepprofiler/imaging/cropping.py
index f106e694..56ad81ee 100644
--- a/deepprofiler/imaging/cropping.py
+++ b/deepprofiler/imaging/cropping.py
@@ -11,6 +11,7 @@
 import deepprofiler.imaging.boxes
 
 tf.compat.v1.disable_v2_behavior()
+tf.config.run_functions_eagerly(False)
 
 
 def crop_graph(image_ph, boxes_ph, box_ind_ph, mask_ind_ph, box_size, mask_boxes=False):
diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py
index 6e4a8ec9..9c64fa6a 100644
--- a/deepprofiler/learning/model.py
+++ b/deepprofiler/learning/model.py
@@ -12,6 +12,7 @@
 import deepprofiler.learning.validation
 
 tf.compat.v1.disable_v2_behavior()
+tf.config.run_functions_eagerly(False)
 
 ##################################################
 # This class should be used as an abstract base
@@ -28,7 +29,10 @@ def __init__(self, config, dset, crop_generator, val_crop_generator, is_training
         self.config = config
         self.dset = dset
         self.train_crop_generator = crop_generator(config, dset)
-        self.val_crop_generator = crop_generator(config, dset, mode="Validation") #val_crop_generator(config, dset)
+        if self.config['train']['model']['crop_generator'] == 'online_labels_cropgen':
+            self.val_crop_generator = crop_generator(config, dset, mode="Validation")
+        else:
+            self.val_crop_generator = val_crop_generator(config, dset)
         self.random_seed = None
         self.is_training = is_training
 
@@ -62,7 +66,13 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1):
 
         # Get training parameters
         epochs, schedule_epochs, schedule_lr, freq = setup_params(self, experiment)
-        steps = self.train_crop_generator.expected_steps
+        if self.config['train']['model']['crop_generator'] == 'online_labels_cropgen':
+            steps = self.train_crop_generator.expected_steps
+        elif self.config['train']['model']['crop_generator'] == 'sampled_crop_generator':
+            steps = int((len(os.listdir(self.config['paths']['single_cell_sample'])) - 1)
+                         / self.config["train"]["model"]["params"]["batch_size"])
+        else:
+            steps = self.dset.steps_per_epoch
 
         # Load weights
         self.load_weights(epoch)
@@ -137,15 +147,26 @@ def start_main_session():
 
 def load_validation_data(dpmodel, session):
     dpmodel.val_crop_generator.start(session)
-    x_validation = []
-    y_validation = []
 
-    for batch in dpmodel.val_crop_generator.generate():
-        x_validation.append(batch[0])
-        y_validation.append(batch[1])
+    if dpmodel.config['train']['model']['crop_generator'] == 'online_labels_cropgen':
+        x_validation = []
+        y_validation = []
+
+        for batch in dpmodel.val_crop_generator.generate():
+            x_validation.append(batch[0])
+            y_validation.append(batch[1])
+
+        x_validation = np.concatenate(x_validation)
+        y_validation = np.concatenate(y_validation)
+
+    else:
+        x_validation, y_validation = deepprofiler.learning.validation.load_validation_data(
+            dpmodel.config,
+            dpmodel.dset,
+            dpmodel.val_crop_generator,
+            session
+        )
 
-    x_validation = np.concatenate(x_validation)
-    y_validation = np.concatenate(y_validation)
     print("Validation data:", x_validation.shape, y_validation.shape)
 
     return x_validation, y_validation
diff --git a/plugins/crop_generators/sampled_crop_generator.py b/plugins/crop_generators/sampled_crop_generator.py
index 1792c5e9..47350f0d 100644
--- a/plugins/crop_generators/sampled_crop_generator.py
+++ b/plugins/crop_generators/sampled_crop_generator.py
@@ -8,6 +8,7 @@
 import deepprofiler.imaging.cropping
 
 tf.compat.v1.disable_v2_behavior()
+tf.config.run_functions_eagerly(False)
 
 ## Wrapper for Keras ImageDataGenerator
 ## The Keras generator is not completely useful, because it makes assumptions about

From 3c9d7eeb0b6e61092ff6b3269776cc2bbc2ea663 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Tue, 28 Sep 2021 18:05:27 +0200
Subject: [PATCH 31/43] Cleanup of sampling.py after merge

---
 deepprofiler/dataset/sampling.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/deepprofiler/dataset/sampling.py b/deepprofiler/dataset/sampling.py
index e5eb664b..e3725b86 100644
--- a/deepprofiler/dataset/sampling.py
+++ b/deepprofiler/dataset/sampling.py
@@ -26,8 +26,8 @@ def process_batch(self, batch):
             batch["locations"][i]["Target"] = batch["targets"][i][0]
             batch["locations"][i]["Class_Name"] = self.dset.targets[0].values[batch["targets"][i][0]]
         metadata = pd.concat(batch["locations"])
-        cols = ["Key","Target","Nuclei_Location_Center_X","Nuclei_Location_Center_Y"]
-        seps = ["+","@","x",".png"]
+        cols = ["Key", "Target", "Nuclei_Location_Center_X", "Nuclei_Location_Center_Y"]
+        seps = ["+", "@", "x", ".png"]
         metadata["Image_Name"] = ""
         for c in range(len(cols)):
             metadata["Image_Name"] += metadata[cols[c]].astype(str).str.replace("/","-") + seps[c]
@@ -57,15 +57,10 @@ def export_single_cells(self, key, image_array, meta):
         for j in range(crops.shape[0]):
             image = deepprofiler.imaging.cropping.unfold_channels(crops[j,:,:,:])
             skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image)
-<<<<<<< HEAD
 
         self.all_metadata.append(metadata)
         print("{}: {} single cells".format(key, crops.shape[0]))
 
-=======
-        self.all_metadata.append(metadata)
-        print("{}: {} single cells".format(key, crops.shape[0]))
->>>>>>> master
 
 def start_session():
     configuration = tf.compat.v1.ConfigProto()
@@ -119,7 +114,7 @@ def sample_dataset(config, dset):
         if len(batch["keys"]) > 0:
             crops, metadata = cropper.process_batch(batch)
             for j in range(crops.shape[0]):
-                image = deepprofiler.imaging.cropping.unfold_channels(crops[j,:,:,:])
+                image = deepprofiler.imaging.cropping.unfold_channels(crops[j, :, :, :])
                 skimage.io.imsave(os.path.join(outdir, metadata.loc[j, "Image_Name"]), image)
             all_metadata.append(metadata)
 

From 56aead75a12517b6f25b600b0e2561f41745ec02 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Wed, 29 Sep 2021 18:54:37 +0200
Subject: [PATCH 32/43] Online crop-generators naming.

---
 deepprofiler/learning/model.py                 |  2 +-
 .../crop_generators/online_labels_cropgen.py   | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py
index 9c64fa6a..c2b7714a 100644
--- a/deepprofiler/learning/model.py
+++ b/deepprofiler/learning/model.py
@@ -82,7 +82,7 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1):
 
         # Train model
         self.feature_model.fit_generator(
-            generator=self.train_crop_generator.generator(main_session),
+            generator=self.train_crop_generator.generate(main_session),
             steps_per_epoch=steps,
             epochs=epochs,
             callbacks=callbacks,
diff --git a/plugins/crop_generators/online_labels_cropgen.py b/plugins/crop_generators/online_labels_cropgen.py
index 711da5f4..4521136b 100644
--- a/plugins/crop_generators/online_labels_cropgen.py
+++ b/plugins/crop_generators/online_labels_cropgen.py
@@ -48,7 +48,7 @@ def __init__(self, config, dset, mode="Training"):
         self.classes = list(self.split_data[self.target].unique())
         self.num_classes = len(self.classes)
         self.classes.sort()
-        self.classes = {self.classes[i]:i for i in range(self.num_classes)}
+        self.classes = {self.classes[i]: i for i in range(self.num_classes)}
 
         # Identify targets and samples
         self.balanced_sample()
@@ -88,7 +88,7 @@ def balanced_sample(self):
             print(self.samples[self.target].value_counts())
 
 
-    def generator(self, sess, global_step=0):
+    def generate(self, sess, global_step=0):
         pointer = 0
         while True:
             x = np.zeros([self.batch_size, self.box_size, self.box_size, self.num_channels])
@@ -105,7 +105,7 @@ def generator(self, sess, global_step=0):
             yield(x, np.concatenate(y, axis=0))
 
 
-    def generate(self, source="samples"):
+    def generator(self, source="samples"):
         pointer = 0
         if source == "splits":
             dataframe = self.split_data
@@ -124,19 +124,19 @@ def generate(self, source="samples"):
                     break
                 filename = os.path.join(self.directory, dataframe.loc[pointer, "Image_Name"])
                 im = skimage.io.imread(filename).astype(np.float32)
-                x[i,:,:,:] = deepprofiler.imaging.cropping.fold_channels(im)
+                x[i, :, :, :] = deepprofiler.imaging.cropping.fold_channels(im)
                 y.append(self.classes[dataframe.loc[pointer, self.target]])
                 pointer += 1
             if len(y) < x.shape[0]:
-                x = x[0:len(y),...]
-            yield(x, tf.keras.utils.to_categorical(y, num_classes=self.num_classes))
+                x = x[0:len(y), ...]
+            yield(x, tf.keras.utils.to_categorical(y, num_classes = self.num_classes))
 
 
     def init_online_labels(self):
         LABEL_SMOOTHING = 0.2
         self.soft_labels = np.zeros((self.split_data.shape[0], self.num_classes)) + LABEL_SMOOTHING/self.num_classes
         print("Soft labels:", self.soft_labels.shape)
-        for k,r in self.split_data.iterrows():
+        for k, r in self.split_data.iterrows():
             label = self.classes[self.split_data.loc[k, self.target]]
             self.soft_labels[k, label] += 1. - LABEL_SMOOTHING
         print("Total labels:", np.sum(self.soft_labels))
@@ -151,8 +151,8 @@ def update_online_labels(self, model, epoch):
 
         # Get predictions with the model
         model.get_layer("augmentation_layer").is_training = False
-        for batch in self.generate(source="splits"):
-            predictions.append( model.predict(batch[0]) )
+        for batch in self.generator(source = "splits"):
+            predictions.append(model.predict(batch[0]))
         model.get_layer("augmentation_layer").is_training = True
 
         # Update soft labels

From 2eb7ecfacec8ef6bc7418dcbefa1def43b5d80a6 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Fri, 1 Oct 2021 15:14:58 +0200
Subject: [PATCH 33/43] Augmentation parameter for ResNet and EfficientNet
 models.

---
 plugins/models/efficientnet.py | 29 +++++++++++++++--------------
 plugins/models/resnet.py       | 24 ++++++++++++++----------
 2 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py
index 91ae2a22..770b49bc 100644
--- a/plugins/models/efficientnet.py
+++ b/plugins/models/efficientnet.py
@@ -5,7 +5,8 @@
 from deepprofiler.learning.model import DeepProfilerModel
 from deepprofiler.imaging.augmentations import AugmentationLayer
 
-tf.compat.v1.disable_v2_behavior()
+#tf.compat.v1.disable_v2_behavior()
+#tf.config.run_functions_eagerly(False)
 
 
 class ModelClass(DeepProfilerModel):
@@ -33,7 +34,7 @@ def get_model(self, config, input_image=None, weights=None, include_top=False):
         error_msg = str(num_layers) + " conv_blocks not in " + SM
         assert num_layers in supported_models.keys(), error_msg
 
-        if self.is_training and weights is None:
+        if self.is_training and weights is None and self.config["train"]['model'].get('augmentations') is True:
             input_image = AugmentationLayer()(input_image)
 
         model = supported_models[num_layers](
@@ -53,8 +54,7 @@ def define_model(self, config, dset):
 
         optimizer = tf.compat.v1.keras.optimizers.SGD(lr=config["train"]["model"]["params"]["learning_rate"], momentum=0.9,
                                          nesterov=True)
-        #loss_func = "categorical_crossentropy"
-        loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing=0.2)
+        loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing=0.2)  # TODO:parameterize?
 
         if self.is_training is False and "use_pretrained_input_size" in config["profile"].keys():
             input_tensor = tf.compat.v1.keras.layers.Input(
@@ -65,8 +65,7 @@ def define_model(self, config, dset):
             input_shape = (
                 config["dataset"]["locations"]["box_size"],  # height
                 config["dataset"]["locations"]["box_size"],  # width
-                len(config["dataset"]["images"][
-                        "channels"])  # channels
+                len(config["dataset"]["images"]["channels"])  # channels
             )
             input_image = tf.compat.v1.keras.layers.Input(input_shape)
             model = self.get_model(config, input_image=input_image)
@@ -74,13 +73,12 @@ def define_model(self, config, dset):
             # 2. Create an output embedding for each target
             class_outputs = []
 
-            y = tf.compat.v1.keras.layers.Dense(config["num_classes"], activation="softmax", name="ClassProb")(features)
+            y = tf.compat.v1.keras.layers.Dense(len(dset.targets[0].values), activation="softmax", name="ClassProb")(features)
             class_outputs.append(y)
 
             # 4. Create and compile model
             model = tf.compat.v1.keras.models.Model(inputs=input_image, outputs=class_outputs)
 
-
             ## Added weight decay following tricks reported in:
             ## https://github.com/keras-team/keras/issues/2717
             regularizer = tf.compat.v1.keras.regularizers.l2(0.00001)
@@ -88,16 +86,19 @@ def define_model(self, config, dset):
                 if hasattr(layer, "kernel_regularizer"):
                     setattr(layer, "kernel_regularizer", regularizer)
 
-            model = tf.compat.v1.keras.models.model_from_json(
-                model.to_json(),
-                {'AugmentationLayer': AugmentationLayer}
-            )
+            if self.config["train"]["model"].get("augmentations") is True:
+                model = tf.compat.v1.keras.models.model_from_json(
+                    model.to_json(),
+                    {'AugmentationLayer': AugmentationLayer}
+                )
+            else:
+                model = tf.compat.v1.keras.models.model_from_json(model.to_json())
 
         return model, optimizer, loss_func
 
     def copy_pretrained_weights(self):
         base_model = self.get_model(self.config, weights="imagenet")
-        lshift = self.is_training  # Shift one layer to accommodate the AugmentationLayer
+        lshift = self.feature_model.layers[1].name == 'augmentation_layer'  # Shift one layer to accommodate the AugmentationLayer
 
         # => Transfer all weights except conv1.1
         total_layers = len(base_model.layers)
@@ -114,7 +115,7 @@ def copy_pretrained_weights(self):
 
         for i in range(new_weights.shape[2]):
             j = i % available_channels
-            new_weights[:,:,i,:] = weights[0][:,:,j,:]
+            new_weights[:, :, i, :] = weights[0][:, :, j, :]
 
         weights_array = [new_weights]
         if len(weights) > 1: 
diff --git a/plugins/models/resnet.py b/plugins/models/resnet.py
index 8afc9c19..4a75c75f 100644
--- a/plugins/models/resnet.py
+++ b/plugins/models/resnet.py
@@ -34,7 +34,7 @@ def get_model(self, config, input_image=None, weights=None, pooling=None, includ
         num_layers = config["train"]["model"]["params"]["conv_blocks"]
         error_msg = str(num_layers) + " conv_blocks not in " + SM
         assert num_layers in supported_models.keys(), error_msg
-        if self.is_training and weights is None:
+        if self.is_training and weights is None and self.config["train"]['model'].get('augmentations') is True:
             input_image = AugmentationLayer()(input_image)
         if pooling is not None:
             model = supported_models[num_layers](input_tensor=input_image, pooling=pooling, include_top=include_top,
@@ -92,10 +92,14 @@ def define_model(self, config, dset):
                 if hasattr(layer, "kernel_regularizer"):
                     setattr(layer, "kernel_regularizer", regularizer)
 
-            model = tf.compat.v1.keras.models.model_from_json(
-                model.to_json(),
-                {'AugmentationLayer': AugmentationLayer}
-            )
+            if self.config["train"]["model"].get("augmentations") is True:
+                model = tf.compat.v1.keras.models.model_from_json(
+                    model.to_json(),
+                    {'AugmentationLayer': AugmentationLayer}
+                )
+            else:
+                model = tf.compat.v1.keras.models.model_from_json(model.to_json())
+
 
         return model, optimizer, loss_func
 
@@ -103,15 +107,15 @@ def define_model(self, config, dset):
     ## Support for ImageNet initialization
     def copy_pretrained_weights(self):
         base_model = self.get_model(self.config, weights="imagenet")
-        lshift = int(self.is_training) # Shift one layer to accommodate the AugmentationLayer
+        lshift = self.feature_model.layers[1].name == 'augmentation_layer'  # Shift one layer to accommodate the AugmentationLayer
 
         # => Transfer all weights except conv1.1
         total_layers = len(base_model.layers)
-        for i in range(3,total_layers):
+        for i in range(3, total_layers):
             if len(base_model.layers[i].weights) > 0:
                 print("Setting pre-trained weights: {:.2f}%".format((i/total_layers)*100), end="\r")
                 self.feature_model.layers[i + lshift].set_weights(base_model.layers[i].get_weights())
-        
+
         # => Replicate filters of first layer as needed
         weights = base_model.layers[2].get_weights()
         available_channels = weights[0].shape[2]
@@ -120,10 +124,10 @@ def copy_pretrained_weights(self):
 
         for i in range(new_weights.shape[2]):
             j = i % available_channels
-            new_weights[:,:,i,:] = weights[0][:,:,j,:]
+            new_weights[:, :, i, :] = weights[0][:, :, j, :]
 
         weights_array = [new_weights]
-        if len(weights) > 1: 
+        if len(weights) > 1:
             weights_array += weights[1:]
 
         self.feature_model.layers[2 + lshift].set_weights(weights_array)

From 46848fad8400b7d18908a4db19a8aca805b1d88f Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Fri, 1 Oct 2021 15:15:41 +0200
Subject: [PATCH 34/43] Test config update.

---
 tests/files/config/test.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/files/config/test.json b/tests/files/config/test.json
index cf8230d5..8f147729 100644
--- a/tests/files/config/test.json
+++ b/tests/files/config/test.json
@@ -43,6 +43,7 @@
         },
         "model": {
             "name": "cnn",
+            "augmentations": false,
             "crop_generator": "crop_generator",
             "metrics": ["accuracy"],
             "epochs": 5,

From c9299b941a7309e0f7a47f63b489f7c750468b07 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Fri, 1 Oct 2021 20:35:26 +0200
Subject: [PATCH 35/43] Update sampled_crop_generator.py and image_dataset.py

---
 deepprofiler/dataset/image_dataset.py           |  2 +-
 .../crop_generators/sampled_crop_generator.py   | 17 +++++++++++------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/deepprofiler/dataset/image_dataset.py b/deepprofiler/dataset/image_dataset.py
index 4a7a63e7..5d369d25 100644
--- a/deepprofiler/dataset/image_dataset.py
+++ b/deepprofiler/dataset/image_dataset.py
@@ -211,7 +211,7 @@ def read_dataset(config, mode = 'train'):
     print(metadata.data.info())
 
     # Split training data
-    if mode == 'train':
+    if mode == 'train' and config["train"]["model"]["crop_generator"] == 'crop_generator':
         split_field = config["train"]["partition"]["split_field"]
         trainingFilter = lambda df: df[split_field].isin(config["train"]["partition"]["training_values"])
         validationFilter = lambda df: df[split_field].isin(config["train"]["partition"]["validation_values"])
diff --git a/plugins/crop_generators/sampled_crop_generator.py b/plugins/crop_generators/sampled_crop_generator.py
index 47350f0d..52f303dc 100644
--- a/plugins/crop_generators/sampled_crop_generator.py
+++ b/plugins/crop_generators/sampled_crop_generator.py
@@ -30,19 +30,24 @@ def __init__(self, config, dset, mode="Training"):
         self.mode = mode
 
         # Load metadata
-        self.all_cells = pd.read_csv(os.path.join(self.directory, "expanded_sc_metadata_tengenes.csv"))
-        self.target = config["train"]["partition"]["targets"][0]
+        self.all_cells = pd.read_csv(os.path.join(self.directory, "sc-metadata.csv"))
+        self.target = "Class_Name"#config["train"]["partition"]["targets"][0]
 
         # Index targets for one-hot encoded labels
-        self.split_data = self.all_cells[self.all_cells.Training_Status_TenGenes == self.mode].reset_index(drop=True)
-        self.classes = list(self.split_data[self.target].unique())
+        self.split_data = self.all_cells[self.all_cells[self.config["train"]["partition"]["split_field"]] ==
+                                         self.mode].reset_index(drop=True)
+
+
+
+        self.classes = list(self.all_cells[self.target].unique())
         self.num_classes = len(self.classes)
         self.classes.sort()
-        self.classes = {self.classes[i]:i for i in range(self.num_classes)}
+        self.classes = {self.classes[i]: i for i in range(self.num_classes)}
 
         # Identify targets and samples
         self.balanced_sample()
-        self.expected_steps = (self.samples.shape[0] // self.batch_size) + int(self.samples.shape[0] % self.batch_size > 0)
+        self.expected_steps = (self.samples.shape[0] // self.batch_size) + \
+                                   int(self.samples.shape[0] % self.batch_size > 0)
 
         # Report number of classes globally
         self.config["num_classes"] = self.num_classes

From 6f05e2e826c0d95961a2097dff8b09c8707b68f9 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Fri, 1 Oct 2021 20:54:41 +0200
Subject: [PATCH 36/43] Update conditions in model.py and image_dataset.py

---
 deepprofiler/dataset/image_dataset.py |  4 ++--
 deepprofiler/learning/model.py        | 18 ++++++++----------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/deepprofiler/dataset/image_dataset.py b/deepprofiler/dataset/image_dataset.py
index 5d369d25..7119c0cd 100644
--- a/deepprofiler/dataset/image_dataset.py
+++ b/deepprofiler/dataset/image_dataset.py
@@ -239,8 +239,8 @@ def read_dataset(config, mode = 'train'):
         dset.outlines = outlines
 
     # For training with sampled_crop_generator, no need to read locations again necessary.
-    #if mode == 'train':
-    #    dset.prepare_training_locations()
+    if mode == 'train' and config["train"]["model"]["crop_generator"] == 'crop_generator':
+        dset.prepare_training_locations()
 
     return dset
 
diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py
index c2b7714a..e1519ccc 100644
--- a/deepprofiler/learning/model.py
+++ b/deepprofiler/learning/model.py
@@ -28,11 +28,12 @@ def __init__(self, config, dset, crop_generator, val_crop_generator, is_training
         self.optimizer = None
         self.config = config
         self.dset = dset
-        self.train_crop_generator = crop_generator(config, dset)
-        if self.config['train']['model']['crop_generator'] == 'online_labels_cropgen':
-            self.val_crop_generator = crop_generator(config, dset, mode="Validation")
-        else:
-            self.val_crop_generator = val_crop_generator(config, dset)
+        if is_training:
+            self.train_crop_generator = crop_generator(config, dset)
+            if self.config['train']['model']['crop_generator'] in ['online_labels_cropgen', 'sampled_crop_generator']:
+                self.val_crop_generator = crop_generator(config, dset, mode="Validation")
+            else:
+                self.val_crop_generator = val_crop_generator(config, dset)
         self.random_seed = None
         self.is_training = is_training
 
@@ -66,11 +67,8 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1):
 
         # Get training parameters
         epochs, schedule_epochs, schedule_lr, freq = setup_params(self, experiment)
-        if self.config['train']['model']['crop_generator'] == 'online_labels_cropgen':
+        if self.config['train']['model']['crop_generator'] in ['online_labels_cropgen', 'sampled_crop_generator']:
             steps = self.train_crop_generator.expected_steps
-        elif self.config['train']['model']['crop_generator'] == 'sampled_crop_generator':
-            steps = int((len(os.listdir(self.config['paths']['single_cell_sample'])) - 1)
-                         / self.config["train"]["model"]["params"]["batch_size"])
         else:
             steps = self.dset.steps_per_epoch
 
@@ -82,7 +80,7 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1):
 
         # Train model
         self.feature_model.fit_generator(
-            generator=self.train_crop_generator.generate(main_session),
+            generator=self.train_crop_generator.generator(main_session),
             steps_per_epoch=steps,
             epochs=epochs,
             callbacks=callbacks,

From f1c235d2f67306851cacc6b140f513aa9e054a32 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Fri, 1 Oct 2021 21:01:46 +0200
Subject: [PATCH 37/43] Condition in model.py

---
 deepprofiler/learning/model.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/deepprofiler/learning/model.py b/deepprofiler/learning/model.py
index e1519ccc..e6700f21 100644
--- a/deepprofiler/learning/model.py
+++ b/deepprofiler/learning/model.py
@@ -14,6 +14,7 @@
 tf.compat.v1.disable_v2_behavior()
 tf.config.run_functions_eagerly(False)
 
+
 ##################################################
 # This class should be used as an abstract base
 # class for plugin models.
@@ -146,7 +147,7 @@ def start_main_session():
 def load_validation_data(dpmodel, session):
     dpmodel.val_crop_generator.start(session)
 
-    if dpmodel.config['train']['model']['crop_generator'] == 'online_labels_cropgen':
+    if dpmodel.config['train']['model']['crop_generator'] in ['online_labels_cropgen', 'sampled_crop_generator']:
         x_validation = []
         y_validation = []
 
@@ -186,7 +187,7 @@ def setup_callbacks(dpmodel, lr_schedule_epochs, lr_schedule_lr, dset, experimen
         save_best_only=save_best,
         period=period
     )
-    
+
     # CSV Log
     csv_output = dpmodel.config["paths"]["logs"] + "/log.csv"
     callback_csv = tf.compat.v1.keras.callbacks.CSVLogger(filename=csv_output)
@@ -201,9 +202,9 @@ def lr_schedule(epoch, lr):
     # Collect all callbacks
     if lr_schedule_epochs:
         callback_lr_schedule = tf.compat.v1.keras.callbacks.LearningRateScheduler(lr_schedule, verbose=1)
-        callbacks = [callback_model_checkpoint, callback_csv, callback_lr_schedule] 
+        callbacks = [callback_model_checkpoint, callback_csv, callback_lr_schedule]
     else:
-        callbacks = [callback_model_checkpoint, callback_csv] 
+        callbacks = [callback_model_checkpoint, callback_csv]
 
     # Online labels callback
     if dpmodel.config["train"]["model"]["crop_generator"] == "online_labels_cropgen":
@@ -211,7 +212,7 @@ def lr_schedule(epoch, lr):
                 on_epoch_end=lambda epoch, logs: dpmodel.train_crop_generator.update_online_labels(dpmodel.feature_model, epoch)
         )
         callbacks.append(update_labels)
-        
+
     return callbacks
 
 

From 13059ebd146fbd0e2f657585ed73ce8067e3a592 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Fri, 1 Oct 2021 21:27:35 +0200
Subject: [PATCH 38/43] Number of classes in EfficientNet

---
 plugins/models/efficientnet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py
index 770b49bc..0e0e21c1 100644
--- a/plugins/models/efficientnet.py
+++ b/plugins/models/efficientnet.py
@@ -73,7 +73,7 @@ def define_model(self, config, dset):
             # 2. Create an output embedding for each target
             class_outputs = []
 
-            y = tf.compat.v1.keras.layers.Dense(len(dset.targets[0].values), activation="softmax", name="ClassProb")(features)
+            y = tf.compat.v1.keras.layers.Dense(self.config["num_classes"], activation="softmax", name="ClassProb")(features)
             class_outputs.append(y)
 
             # 4. Create and compile model

From 7860974b0f6203b758fcab6de1773203fad8f45f Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Mon, 4 Oct 2021 14:05:11 +0200
Subject: [PATCH 39/43] Number of classes in ResNet.
 https://github.com/cytomining/DeepProfiler/issues/285

---
 plugins/models/resnet.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/plugins/models/resnet.py b/plugins/models/resnet.py
index 4a75c75f..565da634 100644
--- a/plugins/models/resnet.py
+++ b/plugins/models/resnet.py
@@ -76,12 +76,9 @@ def define_model(self, config, dset):
             # 2. Create an output embedding for each target
             class_outputs = []
 
-            i = 0
-            for t in dset.targets:
-                y = tf.compat.v1.keras.layers.Dense(t.shape[1], activation="softmax", name=t.field_name)(features)
-                class_outputs.append(y)
-                i += 1
-
+            y = tf.compat.v1.keras.layers.Dense(self.config["num_classes"], activation="softmax", name="ClassProb")(
+                features)
+            class_outputs.append(y)
             # 4. Create and compile model
             model = tf.compat.v1.keras.models.Model(inputs=input_image, outputs=class_outputs)
 

From c578dc16a8853da5df92bd86048c0f2979661942 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Mon, 4 Oct 2021 14:05:44 +0200
Subject: [PATCH 40/43] Number of classes for profiling.
 https://github.com/cytomining/DeepProfiler/issues/286

---
 deepprofiler/learning/profiling.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/deepprofiler/learning/profiling.py b/deepprofiler/learning/profiling.py
index 9918e4fd..b2741cd8 100644
--- a/deepprofiler/learning/profiling.py
+++ b/deepprofiler/learning/profiling.py
@@ -23,9 +23,11 @@ def __init__(self, config, dset):
             "plugins.crop_generators.{}".format(config["train"]["model"]["crop_generator"])
         ).SingleImageGeneratorClass
 
+        self.config["num_classes"] = self.dset.targets[0].shape[1]
+
         self.dpmodel = importlib.import_module(
             "plugins.models.{}".format(config["train"]["model"]["name"])
-        ).ModelClass(config, dset, self.crop_generator, self.profile_crop_generator, is_training=False)
+        ).ModelClass(self.config, dset, self.crop_generator, self.profile_crop_generator, is_training=False)
 
         self.profile_crop_generator = self.profile_crop_generator(config, dset)
 

From 2f781174a710c22ff22bb41d259db2fcec3128a0 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Mon, 4 Oct 2021 17:55:58 +0200
Subject: [PATCH 41/43] Parameterize label smoothing.

---
 plugins/models/efficientnet.py | 3 ++-
 plugins/models/resnet.py       | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/plugins/models/efficientnet.py b/plugins/models/efficientnet.py
index 0e0e21c1..4b7b4626 100644
--- a/plugins/models/efficientnet.py
+++ b/plugins/models/efficientnet.py
@@ -54,7 +54,8 @@ def define_model(self, config, dset):
 
         optimizer = tf.compat.v1.keras.optimizers.SGD(lr=config["train"]["model"]["params"]["learning_rate"], momentum=0.9,
                                          nesterov=True)
-        loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing=0.2)  # TODO:parameterize?
+        loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing=
+                                                                      self.config["train"]["model"]["params"]["label_smoothing"])
 
         if self.is_training is False and "use_pretrained_input_size" in config["profile"].keys():
             input_tensor = tf.compat.v1.keras.layers.Input(
diff --git a/plugins/models/resnet.py b/plugins/models/resnet.py
index 565da634..92dbe40c 100644
--- a/plugins/models/resnet.py
+++ b/plugins/models/resnet.py
@@ -46,7 +46,9 @@ def get_model(self, config, input_image=None, weights=None, pooling=None, includ
     ## Model definition
     def define_model(self, config, dset):
         # 1. Create ResNet architecture to extract features
-        loss_func = "categorical_crossentropy"
+        loss_func = tf.compat.v1.keras.losses.CategoricalCrossentropy(label_smoothing=
+                                                                      self.config["train"]["model"]["params"][
+                                                                          "label_smoothing"])
         optimizer = tf.compat.v1.keras.optimizers.SGD(learning_rate=config["train"]["model"]["params"]["learning_rate"],
                                                       momentum=0.9, nesterov=True)
         if "use_pretrained_input_size" in config["profile"].keys() and self.is_training is False:

From 785bbff0d875d7029f40f9b15af372b99a08ffc6 Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Mon, 4 Oct 2021 17:58:21 +0200
Subject: [PATCH 42/43] Label smoothing parameter in the test.json config.

---
 tests/files/config/test.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/files/config/test.json b/tests/files/config/test.json
index 8f147729..8449a97f 100644
--- a/tests/files/config/test.json
+++ b/tests/files/config/test.json
@@ -53,6 +53,7 @@
                 "learning_rate": 0.0001,
                 "batch_size": 2,
                 "conv_blocks": 1,
+                "label_smoothing": 0.0,
                 "feature_dim": 100,
                 "latent_dim": 100,
                 "epsilon_std": 1.0

From 9b73e44e31c65561084a43fd57cd592f15abe2ed Mon Sep 17 00:00:00 2001
From: arkkienkeli <nikitam851@gmail.com>
Date: Tue, 5 Oct 2021 20:39:08 +0200
Subject: [PATCH 43/43] EfficientNet package in setup.py

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 548a240c..f1dee1f9 100644
--- a/setup.py
+++ b/setup.py
@@ -20,6 +20,7 @@
         "beautifulsoup4>=4.6",
         "click>=6.7",
         "comet_ml>=1.0",
+        "efficientnet==1.1.1",
         "gpyopt>=1.2",
         "lxml>=4.2",
         "numpy>=1.13",
@@ -30,7 +31,7 @@
         "comet-ml>=3.1.6",
         "tensorflow==2.5.*",
         "tensorflow_addons",
-        "tqdm>=4.62"
+        "tqdm>=4.62",
     ],
     extras_require={
         "test": [