MarcCoru · roserustowicz · Feb 7, 2019 · Feb 7, 2019 · Feb 7, 2019 · Feb 8, 2019
diff --git a/Dataset.py b/Dataset.py
@@ -1,4 +1,4 @@
-from S2parser import S2parser
+from S2parser_africa import S2parser
 import tensorflow as tf
 import os
 import configparser
@@ -8,7 +8,7 @@
 class Dataset():
     """ A wrapper class around Tensorflow Dataset api handling data normalization and augmentation """
 
-    def __init__(self, datadir, verbose=False, temporal_samples=None, section="dataset", augment=False):
+    def __init__(self, datadir, verbose=False, temporal_samples=None, section="dataset", augment=False, country=None):
         self.verbose = verbose
 
         self.augment = augment
@@ -53,6 +53,7 @@ def __init__(self, datadir, verbose=False, temporal_samples=None, section="datas
                 id,cl = row.split('|')
                 self.ids.append(int(id))
                 self.classes.append(cl)
+        print('classes: ', self.classes)
 
         ## create a lookup table to map labelids to dimension ids
 
@@ -76,74 +77,92 @@ def __init__(self, datadir, verbose=False, temporal_samples=None, section="datas
         datacfg.read(cfgpath)
         cfg = datacfg[section]
 
-        self.tileidfolder = os.path.join(dataroot, "tileids")
+        self.country = country
+        if self.country is None:
+            self.tileidfolder = os.path.join(dataroot, "tileids")
+        else:
+            self.tileidfolder = os.path.join(dataroot, "tileids/" + self.country + "/tileids")
         self.datadir = os.path.join(dataroot, cfg["datadir"])
 
         assert 'pix10' in cfg.keys()
         assert 'nobs' in cfg.keys()
         assert 'nbands10' in cfg.keys()
-        assert 'nbands20' in cfg.keys()
-        assert 'nbands60' in cfg.keys()
 
         self.tiletable=cfg["tiletable"]
 
         self.nobs = int(cfg["nobs"])
 
         self.expected_shapes = self.calc_expected_shapes(int(cfg["pix10"]),
                                                          int(cfg["nobs"]),
-                                                         int(cfg["nbands10"]),
-                                                         int(cfg["nbands20"]),
-                                                         int(cfg["nbands60"])
+                                                         int(cfg["nbands10"])
                                                          )
 
 
         # expected datatypes as read from disk
-        self.expected_datatypes = (tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.int64)
+        self.expected_datatypes = (tf.float32, tf.float32, tf.float32, tf.int64)
 
-    def calc_expected_shapes(self, pix10, nobs, bands10, bands20, bands60):
-        pix20 = pix10 / 2;
-        pix60 = pix10 / 6;
+    def calc_expected_shapes(self, pix10, nobs, bands10):
         x10shape = (nobs, pix10, pix10, bands10)
-        x20shape = (nobs, pix20, pix20, bands20)
-        x60shape = (nobs, pix60, pix60, bands60)
         doyshape = (nobs,)
         yearshape = (nobs,)
         labelshape = (nobs, pix10, pix10)
 
-        return [x10shape, x20shape, x60shape, doyshape, yearshape, labelshape]
+        print('x10shape: ', x10shape)
+        print('doyshape: ', doyshape)
+        print('yearshape: ', yearshape)
+        print('labelshape: ', labelshape)
+
+        return [x10shape, doyshape, yearshape, labelshape]
 
     def transform_labels(self,feature):
         """
         1. take only first labelmap, as labels are not supposed to change
         2. perform label lookup as stored label ids might be not sequential labelid:[0,3,4] -> dimid:[0,1,2]
         """
 
-        x10, x20, x60, doy, year, labels = feature
+        x10, doy, year, labels = feature
 
         # take first label time [46,24,24] -> [24,24]
         # labels are not supposed to change over the time series
         #labels = labels[0]
+        #print('unique_labels: ', tf.unique(tf.stack(tf.reshape(labels, -1))))
         labels = self.id_lookup_table.lookup(labels)
 
-        return x10, x20, x60, doy, year, labels
+        return x10, doy, year, labels
 
     def normalize(self, feature):
-
-        x10, x20, x60, doy, year, labels = feature
-        x10 = tf.scalar_mul(1e-4, tf.cast(x10, tf.float32))
-        x20 = tf.scalar_mul(1e-4, tf.cast(x20, tf.float32))
-        x60 = tf.scalar_mul(1e-4, tf.cast(x60, tf.float32))
-
+        """
+        Normalizes between 0 and 1.
+        """ 
+        x10, doy, year, labels = feature
+
         doy = tf.cast(doy, tf.float32) / 365
-
         # year = (2016 - tf.cast(year, tf.float32)) / 2017
         year = tf.cast(year, tf.float32) - 2016
+
+        if self.country in ['ghana', 'southsudan', 'germany']:
+            S2_BAND_MEANS = { 'ghana': tf.constant([2620.00, 2519.89, 2630.31, 2739.81, 3225.22, 3562.64, 3356.57, 3788.05, 2915.40, 2102.65]),
+                              'southsudan': tf.constant([2119.15, 2061.95, 2127.71, 2277.60, 2784.21, 3088.40, 2939.33, 3308.03, 2597.14, 1834.81]),
+                              'germany': tf.constant([1991.37, 2026.92, 2136.22, 6844.82, 9951.98, 11638.58, 3664.66, 12375.27, 7351.99, 5027.96, 0., 0., 0.])}
+
+            S2_BAND_STDS = { 'ghana': tf.constant([2171.62, 2085.69, 2174.37, 2084.56, 2058.97, 2117.31, 1988.70, 2099.78, 1209.48, 918.19]),
+                             'southsudan': tf.constant([2113.41, 2026.64, 2126.10, 2093.35, 2066.81, 2114.85, 2049.70, 2111.51, 1320.97, 1029.58]),
+                             'germany': tf.constant([1943.62, 1755.82, 1841.09, 5703.38, 5104.90, 5136.54, 1663.27, 5125.05, 3682.57, 3273.71, 10000., 10000., 10000.])}
+
+            band_means = S2_BAND_MEANS[self.country]
+            band_stds = S2_BAND_STDS[self.country]
 
-        return x10, x20, x60, doy, year, labels
+            x10 = tf.cast(x10, tf.float32)
+            x10 = tf.divide( tf.subtract( x10, tf.reshape(band_means, shape=[1, 1, 1, -1]) ), tf.reshape(band_stds, shape=[1, 1, 1, -1]) )
+
+        else:
+            x10 = tf.scalar_mul(1e-4, tf.cast(x10, tf.float32))
+
+        return x10, doy, year, labels
 
     def augment(self, feature):
 
-        x10, x20, x60, doy, year, labels = feature
+        x10, doy, year, labels = feature
 
         ## Flip UD
 
@@ -152,8 +171,6 @@ def augment(self, feature):
 
         # flip
         x10 = tf.cond(condition, lambda: tf.reverse(x10, axis=[1]), lambda: x10)
-        x20 = tf.cond(condition, lambda: tf.reverse(x20, axis=[1]), lambda: x20)
-        x60 = tf.cond(condition, lambda: tf.reverse(x60, axis=[1]), lambda: x60)
         labels = tf.cond(condition, lambda: tf.reverse(labels, axis=[1]), lambda: labels)
 
 
@@ -164,11 +181,9 @@ def augment(self, feature):
 
         # flip
         x10 = tf.cond(condition, lambda: tf.reverse(x10, axis=[2]), lambda: x10)
-        x20 = tf.cond(condition, lambda: tf.reverse(x20, axis=[2]), lambda: x20)
-        x60 = tf.cond(condition, lambda: tf.reverse(x60, axis=[2]), lambda: x60)
         labels = tf.cond(condition, lambda: tf.reverse(labels, axis=[2]), lambda: labels)
 
-        return x10, x20, x60, doy, year, labels
+        return x10, doy, year, labels
 
 
     def temporal_sample(self, feature):
@@ -180,7 +195,7 @@ def temporal_sample(self, feature):
         if n is None:
             return feature
 
-        x10, x20, x60, doy, year, labels = feature
+        x10, doy, year, labels = feature
 
         # data format 1, 2, 1, 2, -1,-1,-1
         # sequence lengths indexes are negative values.
@@ -194,21 +209,19 @@ def temporal_sample(self, feature):
         shuffled_range = tf.random_shuffle(tf.range(max_obs))[0:n]
 
         idxs = -tf.nn.top_k(-shuffled_range, k=n).values
-
+        print('idxs: ', idxs)
         x10 = tf.gather(x10, idxs)
-        x20 = tf.gather(x20, idxs)
-        x60 = tf.gather(x60, idxs)
         doy = tf.gather(doy, idxs)
         year = tf.gather(year, idxs)
 
-        return x10, x20, x60, doy, year, labels
+        return x10, doy, year, labels
 
     def get_ids(self, partition, fold=0):
 
         def readids(path):
             with open(path, 'r') as f:
                 lines = f.readlines()
-            return [int(l.replace("\n", "")) for l in lines]
+            return [l.replace("\n", "") for l in lines]
 
         traintest = "{partition}_fold{fold}.tileids"
         eval = "{partition}.tileids"
@@ -233,18 +246,17 @@ def create_tf_dataset(self, partition, fold, batchsize, shuffle, prefetch_batche
 
         # set of ids as present in database of given partition (train/test/eval) and fold (0-9)
         allids = self.get_ids(partition=partition, fold=fold)
-
+       
         # set of ids present in local folder (e.g. 1.tfrecord)
         tiles = os.listdir(self.datadir)
-
         if tiles[0].endswith(".gz"):
             compression = "GZIP"
             ext = ".tfrecord.gz"
         else:
             compression = ""
             ext = ".tfrecord"
 
-        downloaded_ids = [int(t.replace(".gz", "").replace(".tfrecord", "")) for t in tiles]
+        downloaded_ids = [t.replace(".gz", "").replace(".tfrecord", "") for t in tiles]
 
         # intersection of available ids and partition ods
         if overwrite_ids is None:
@@ -253,7 +265,6 @@ def create_tf_dataset(self, partition, fold, batchsize, shuffle, prefetch_batche
             print "overwriting data ids! due to manual input"
             ids = overwrite_ids
 
-
         filenames = [os.path.join(self.datadir, str(id) + ext) for id in ids]
 
         if self.verbose:
@@ -320,9 +331,7 @@ def main():
 
     with tf.Session() as sess:
         sess.run([iterator.initializer, tf.tables_initializer()])
-        x10, x20, x60, doy, year, labels = sess.run(iterator.get_next())
-        print x10.shape
-
+        x10, doy, year, labels = sess.run(iterator.get_next())
 
 if __name__ == "__main__":
     main()
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,5 @@
-FROM tensorflow/tensorflow:1.4.0-gpu
+FROM tensorflow/tensorflow:1.12.0-gpu
+#FROM tensorflow/tensorflow:1.4.0-gpu
 
 LABEL maintainer="Marc Rußwurm <marc.russwurm@tum.de>"
 

diff --git a/S2parser.py b/S2parser.py
@@ -59,7 +59,7 @@ def write(self, filename, x10, x20, x60, doy, year, labels):
         sys.stdout.flush()
 
     def get_shapes(self, sample):
-        print "reading shape of data using the sample "+sample
+        print("reading shape of data using the sample "+sample)
         data = self.read_and_return(sample)
         return [tensor.shape for tensor in data]
 
@@ -92,12 +92,12 @@ def read(self,filenames):
         elif isinstance(filenames,tf.FIFOQueue):
             filename_queue = filenames
         else:
-            print "please insert either list or tf.FIFOQueue"
+            print("please insert either list or tf.FIFOQueue")
 
         reader = tf.TFRecordReader()
         f, serialized_example = reader.read(filename_queue)
 
-        print f
+        print(f)
 
         feature = tf.parse_single_example(serialized_example, features=self.feature_format)
 
@@ -153,10 +153,10 @@ def read_and_return(self,filename):
             return sess.run(feature_op)
 
 def test():
-    print "Running self test:"
-    print "temporary tfrecord file is written with random numbers"
-    print "tfrecord file is read back"
-    print "contents are compared"
+    print("Running self test:")
+    print("temporary tfrecord file is written with random numbers")
+    print("tfrecord file is read back")
+    print("contents are compared")
 
     filename="tmptile.tfrecord"
 
@@ -176,11 +176,11 @@ def test():
     x10_, x20_, x60_, doy_, year_, labels_ = read_and_return(filename)
 
     # test if wrote and read data is the same
-    print "TEST"
+    print("TEST")
     if np.all(x10_==x10) and np.all(x20_==x20) and np.all(x60_==x60) and np.all(labels_==labels) and np.all(doy_==doy) and np.all(year_==year):
-        print "PASSED"
+        print("PASSED")
     else:
-        print "NOT PASSED"
+        print("NOT PASSED")
 
 
     # remove file
@@ -195,4 +195,4 @@ def test():
     #x10, x20, x60, doy, year, labels = read_and_return("data/bavaria/1.tfrecord")
     parser = S2parser()
 
-    parser.tfrecord_to_pickle("1.tfrecord","1.pkl")
+    parser.tfrecord_to_pickle("1.tfrecord","1.pkl")