iceberg-project · iparask · Jul 1, 2019 · Jul 2, 2019 · Jul 2, 2019 · Jul 3, 2019
diff --git a/.flake8rc b/.flake8rc
@@ -1,3 +1,3 @@
 [flake8]
-ignore = E124, W293, E261, W291, W292, F403, E303, F405
+ignore = E124, W293, E261, W291, W292, F403, E303, F405, W504
 max-line-length = 80
diff --git a/.travis.yml b/.travis.yml
@@ -27,6 +27,12 @@ install:
 #  - pip install .
 #  - pip install coverage
   - pip install keras
+  - pip install radical.utils==0.72
+  - pip install radical.saga==0.72
+  - pip install radical.pilot==0.72
+  - pip install radical.entk==0.72
+  - pip install tifffile
+  - pip install pandas
   - pip install flake8
   - pip install pylint
 #  - pip install codecov

diff --git a/README.md b/README.md
@@ -1,4 +1,92 @@
-# Rivers (Arctic hydrology)
-
-We provide a classification algorithm for ice surface features from high-resolution imagery.  This algorithm was developed by convolutional neural network training to detect regions of large and small rivers and to distinguish them from crevasses and non-channelized slush. We also provide a detection algorithm to extract polyline water features from the classified high-probability river areas.
-
+# Rivers (Arctic hydrology)
+
+We provide a classification algorithm for ice surface features from high-resolution imagery.  This algorithm was developed by convolutional neural network training to detect regions of large and small rivers and to distinguish them from crevasses and non-channelized slush. We also provide a detection algorithm to extract polyline water features from the classified high-probability river areas.
+
+## Prerequisites - all available on bridges via the commands below
+- Linux
+- Python 3
+- CPU and NVIDIA GPU + CUDA CuDNN
+
+## Software Dependencies - these will be installed automatically with the installation below.
+- numpy
+- scipy
+- tifffile
+- keras >= 1.0
+- tensorboardX==1.8
+- opencv-python
+- rasterio
+- affine
+
+## Installation
+Preliminaries:  
+These instructions are specific to XSEDE Bridges but other resources can be used if cuda, python3, and a NVIDIA P100 GPU are present, in which case 'module load' instructions can be skipped, which are specific to Bridges.  
+
+For Unix or Mac Users:    
+Login to bridges via ssh using a Unix or Mac command line terminal.  Login is available to bridges directly or through the XSEDE portal. Please see the [Bridges User's Guide](https://portal.xsede.org/psc-bridges).  
+
+For Windows Users:  
+Many tools are available for ssh access to bridges.  Please see [Ubuntu](https://ubuntu.com/tutorials/tutorial-ubuntu-on-windows#1-overview), [MobaXterm](https://mobaxterm.mobatek.net) or [PuTTY](https://www.chiark.greenend.org.uk/~sgtatham/putty/)
+
+### PSC Bridges
+Once you have logged into bridges, you can follow one of two methods for installing iceberg-rivers.
+
+#### Method 1 (Recommended):  
+
+The lines below following a '$' are commands to enter (or cut and paste) into your terminal (note that all commands are case-sensitive, meaning capital and lowercase letters are differentiated.)  Everything following '#' are comments to explain the reason for the command and should not be included in what you enter.  Lines that do not start with '$' or '[rivers_env] $' are output you should expect to see.
+
+```bash
+$ pwd
+/home/username
+$ cd $SCRATCH                      # switch to your working space.
+$ mkdir Rivers                      # create a directory to work in.
+$ cd Rivers                         # move into your working directory.
+$ module load cuda                 # load parallel computing architecture.
+$ module load python3              # load correct python version.
+$ virtualenv rivers_env             # create a virtual environment to isolate your work from the default system.
+$ source rivers_env/bin/activate    # activate your environment. Notice the command line prompt changes to show your environment on the next line.
+[rivers_env] $ pwd
+/pylon5/group/username/Rivers
+[rivers_env] $ export PYTHONPATH=<path>/rivers_env/lib/python3.5/site-packages # set a system variable to point python to your specific code. (Replace <path> with the results of pwd command above.
+[rivers_env] $ pip install iceberg_rivers.search # pip is a python tool to extract the requested software (iceberg_rivers.search in this case) from a repository. (this may take several minutes).
+```
+
+#### Method 2 (Installing from source; recommended for developers only): 
+
+```bash
+$ git clone https://github.com/iceberg-project/Rivers.git
+$ module load cuda
+$ module load python3
+$ virtualenv rivers_env
+$ source rivers_env/bin/activate
+[rivers_env] $ export PYTHONPATH=<path>/rivers_env/lib/python3.5/site-packages
+[rivers_env] $ pip install . --upgrade
+```
+
+#### To test
+```bash
+[iceberg_rivers] $ deactivate       # exit your virtual environment.
+$ interact -p GPU-small --gres=gpu:p100:1  # request a compute node.  This package has been tested on P100 GPUs on bridges, but that does not exclude any other resource that offers the same GPUs. (this may take a minute or two or more to receive an allocation).
+$ cd $SCRATCH/Rivers                # make sure you are in the same directory where everything was set up before.
+$ module load cuda                 # load parallel computing architecture, as before.
+$ module load python3              # load correct python version, as before.
+$ source rivers_env/bin/activate    # activate your environment, no need to create a new environment because the Rivers tools are installed and isolated here.
+[iceberg_rivers] $ iceberg_rivers.detect --help  # this will display a help screen of available usage and parameters.
+```
+## Prediction
+- Download a pre-trained model at: 
+
+You can download to your local machine and use scp, ftp, rsync, or Globus to [transfer to bridges](https://portal.xsede.org/psc-bridges).
+
+Rivers predicting is executed in three steps: 
+First, follow the environment setup commands under 'To test' above. Then create tiles from an input GeoTiff image and write to the output_folder. The scale_bands parameter (in pixels) depends on the trained model being used.  The default scale_bands is 299 for the pre-trained model downloaded above.  If you use your own model the scale_bands may be different.
+```bash
+[iceberg_rivers] $ iceberg_rivers.tiling --scale_bands=299 --input_image=<image_abspath> --output_folder=./test
+```
+Then, detect rivers on each tile and output counts and confidence for each tile.
+```bash
+[iceberg_rivers] $ iceberg_rivers.predicting --input_image=<image_filename> --model_architecture=UnetCntWRN --hyperparameter_set=A --training_set=test_vanilla --test_folder=./test --model_path=./ --output_folder=./test_image
+```
+Finally, mosaic all the tiles back into one image
+```bash
+[iceberg_rivers] $ iceberg_rivers.mosaic --input_folder=./test_image
+```
diff --git a/src/classification/model.py b/src/classification/model.py
@@ -0,0 +1,143 @@
+import numpy as np 
+import os
+import skimage.io as io
+import skimage.transform as trans
+import numpy as np
+from keras.models import *
+from keras.layers import *
+from keras.optimizers import *
+from keras.callbacks import ModelCheckpoint, LearningRateScheduler
+from keras import backend as k
+from keras.models import Model
+from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Conv2DTranspose, BatchNormalization, Dropout
+from keras.optimizers import Adam
+from keras.utils import plot_model
+
+# Custom IoU metric
+def mean_iou(y_true, y_pred):
+    prec = []
+    for t in np.arange(0.5, 1.0, 0.05):
+        y_pred_ = tf.to_int32(y_pred > t)
+        score, up_opt = tf.metrics.mean_iou(y_true, y_pred_, 2)
+        K.get_session().run(tf.local_variables_initializer())
+        with tf.control_dependencies([up_opt]):
+            score = tf.identity(score)
+        prec.append(score)
+    return K.mean(K.stack(prec), axis=0)
+
+def recall_m(y_true, y_pred):
+        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
+        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
+        recall = true_positives / (possible_positives + K.epsilon())
+        return recall
+
+def precision_m(y_true, y_pred):
+        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
+        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
+        precision = true_positives / (predicted_positives + K.epsilon())
+        return precision
+
+def f1_m(y_true, y_pred):
+    precision = precision_m(y_true, y_pred)
+    recall = recall_m(y_true, y_pred)
+    return 2*((precision*recall)/(precision+recall+K.epsilon()))
+
+def unet(n_classes=1, im_sz=224, n_channels=3, n_filters_start=32, growth_factor=2, upconv=True):
+    droprate=0.25
+    n_filters = n_filters_start
+    inputs = Input((im_sz, im_sz, n_channels))
+    #inputs = BatchNormalization()(inputs)
+    conv1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(inputs)
+    conv1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv1)
+    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
+    #pool1 = Dropout(droprate)(pool1)
+
+    n_filters *= growth_factor
+    pool1 = BatchNormalization()(pool1)
+    conv2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool1)
+    conv2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv2)
+    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
+    pool2 = Dropout(droprate)(pool2)
+
+    n_filters *= growth_factor
+    pool2 = BatchNormalization()(pool2)
+    conv3 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool2)
+    conv3 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv3)
+    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
+    pool3 = Dropout(droprate)(pool3)
+
+    n_filters *= growth_factor
+    pool3 = BatchNormalization()(pool3)
+    conv4_0 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool3)
+    conv4_0 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv4_0)
+    pool4_1 = MaxPooling2D(pool_size=(2, 2))(conv4_0)
+    pool4_1 = Dropout(droprate)(pool4_1)
+
+    n_filters *= growth_factor
+    pool4_1 = BatchNormalization()(pool4_1)
+    conv4_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool4_1)
+    conv4_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv4_1)
+    pool4_2 = MaxPooling2D(pool_size=(2, 2))(conv4_1)
+    pool4_2 = Dropout(droprate)(pool4_2)
+
+    n_filters *= growth_factor
+    conv5 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool4_2)
+    conv5 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv5)
+
+    n_filters //= growth_factor
+    if upconv:
+        up6_1 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv5), conv4_1])
+    else:
+        up6_1 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4_1])
+    up6_1 = BatchNormalization()(up6_1)
+    conv6_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up6_1)
+    conv6_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv6_1)
+    conv6_1 = Dropout(droprate)(conv6_1)
+
+    n_filters //= growth_factor
+    if upconv:
+        up6_2 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv6_1), conv4_0])
+    else:
+        up6_2 = concatenate([UpSampling2D(size=(2, 2))(conv6_1), conv4_0])
+    up6_2 = BatchNormalization()(up6_2)
+    conv6_2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up6_2)
+    conv6_2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv6_2)
+    conv6_2 = Dropout(droprate)(conv6_2)
+
+    n_filters //= growth_factor
+    if upconv:
+        up7 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv6_2), conv3])
+    else:
+        up7 = concatenate([UpSampling2D(size=(2, 2))(conv6_2), conv3])
+    up7 = BatchNormalization()(up7)
+    conv7 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up7)
+    conv7 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv7)
+    conv7 = Dropout(droprate)(conv7)
+
+    n_filters //= growth_factor
+    if upconv:
+        up8 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv7), conv2])
+    else:
+        up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2])
+    up8 = BatchNormalization()(up8)
+    conv8 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up8)
+    conv8 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv8)
+    conv8 = Dropout(droprate)(conv8)
+
+    n_filters //= growth_factor
+    if upconv:
+        up9 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv8), conv1])
+    else:
+        up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1])
+    conv9 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up9)
+    conv9 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv9)
+
+    conv10 = Conv2D(n_classes, (1, 1), activation='sigmoid')(conv9)
+
+    model = Model(inputs=inputs, outputs=conv10)
+
+    model.compile(optimizer=Adam(lr = 1e-5), loss = 'binary_crossentropy', metrics = ['accuracy',f1_m,precision_m, recall_m])
+    model.summary()
+
+    return model
+
diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m
@@ -1,76 +1,64 @@
-% Author: Samira Daneshgar-Asl
-% License: MIT
-% Copyright: 2018-2019
-
-clear all
-clc
-
-[FileName, PathName] = uigetfile('*.tif', 'Select the 8 bit image file with 3 bands:');
-File = fullfile(PathName, FileName);
-[I, R] = geotiffread(File);
-img=I;
-clear I;
-
-patch_size=800;
-
-desired_row_size=(patch_size/2)*ceil(size(img,1)/(patch_size/2));
-desired_col_size=(patch_size/2)*ceil(size(img,2)/(patch_size/2));
-
-image = zeros(desired_row_size,desired_col_size,size(img,3),'int16');
-image(1:size(img,1),1:size(img,2),:) = img;
-
-a=1:patch_size/2:size(image, 1);
-b=1:patch_size/2:size(image, 2);
-row = 1:patch_size/2:a(1,end-1);
-col = 1:patch_size/2:b(1,end-1);
-A=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single');
-B=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single');
-
-if isunix
-    path = 'data/WV_predicted';
-elseif ispc
-    path = 'data\WV_predicted';
-else
-    path = ''
-    disp 'Something went wrong';
-end
-
-files = dir(fullfile(path, '*.tif'));
-files_ordered = natsortfiles({files.name});
-totalFiles = numel(files);
-
-k=1;
-for i=1:size(row,2)
-    for j=1:size(col,2)
-        I=imread(fullfile('data\WV_predicted',files_ordered{1,k}));
-        A(row(i):row(i)+patch_size-1,col(j):col(j)+patch_size-1)=I;
-        B=max(A,B);
-        if k~=totalFiles
-            k=k+1;
-        else
-        end
-    end
-end
-
-B(sum(image,3)==0)=0;
-xi = [col(1,1)-.5, col(1,end)+patch_size-1+.5];
-yi = [row(1,1)-.5, row(1,end)+patch_size-1+.5];
-[xlimits, ylimits] = intrinsicToWorld(R, xi, yi);
-subR = R;
-subR.RasterSize = size(B);
-subR.XLimWorld = sort(xlimits);
-subR.YLimWorld = sort(ylimits);
-info = geotiffinfo(File);
-writeFileName=[strtok(FileName, '.'),'-mask-predicted.tif'];
-geotiffwrite(writeFileName,B,subR,'GeoKeyDirectoryTag',info.GeoTIFFTags.GeoKeyDirectoryTag,'TiffTags',struct('Compression',Tiff.Compression.None))
-
-
-
-
-
-
-
-
-
-
-
+% Author: Samira Daneshgar-Asl, Ioannis Paraskevakos
+% License: MIT
+% Copyright: 2018-2019
+
+function mosaic(FileName, FilePath, PredictedPath, WriteDir)
+    File = fullfile(FilePath,FileName);
+    [img, R] = geotiffread(File);
+    if isunix
+        path = strcat(PredictedPath, '/data/predicted_tiles/', strtok(FileName, '.'));
+    elseif ispc
+        path = strcat(PredictedPath, '\data\predicted_tiles\', strtok(FileName, '.'));
+    else
+        path = '';
+        disp 'Something went wrong';
+    end
+
+    if ~exist(WriteDir, 'dir')
+        mkdir(WriteDir);
+    end
+
+    patch_size=800;
+
+    desired_row_size=(patch_size/2)*ceil(size(img,1)/(patch_size/2));
+    desired_col_size=(patch_size/2)*ceil(size(img,2)/(patch_size/2));
+
+    image = zeros(desired_row_size,desired_col_size,size(img,3),'int8');
+    image(1:size(img,1),1:size(img,2),:) = img;
+
+    a=1:patch_size/2:size(image, 1);
+    b=1:patch_size/2:size(image, 2);
+    row = 1:patch_size/2:a(1,end-1);
+    col = 1:patch_size/2:b(1,end-1);
+    A=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single');
+    B=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single');
+
+    files = dir(fullfile(path, '*.tif'));
+    files_ordered = natsortfiles({files.name});
+    totalFiles = numel(files);
+
+    k=1;
+    for i=1:size(row,2)
+        for j=1:size(col,2)
+            I=imread(fullfile(path, files_ordered{1,k}));
+            A(row(i):row(i)+patch_size-1,col(j):col(j)+patch_size-1)=I;
+            B=max(A,B);
+            if k~=totalFiles
+                k=k+1;
+            else
+            end
+        end
+    end
+
+    B(sum(image,3)==0)=0;
+    xi = [col(1,1)-.5, col(1,end)+patch_size-1+.5];
+    yi = [row(1,1)-.5, row(1,end)+patch_size-1+.5];
+    [xlimits, ylimits] = intrinsicToWorld(R, xi, yi);
+    subR = R;
+    subR.RasterSize = size(B);
+    subR.XLimWorld = sort(xlimits);
+    subR.YLimWorld = sort(ylimits);
+    info = geotiffinfo(File);
+    writeFileName=fullfile(WriteDir,strcat(strtok(FileName, '.'),'-mask-predicted.tif'));
+    geotiffwrite(writeFileName,B,subR,'GeoKeyDirectoryTag',info.GeoTIFFTags.GeoKeyDirectoryTag,'TiffTags',struct('Compression',Tiff.Compression.None))
+end