From 56e6e58a4ed8cc39a2384e755d94abc8682ec52e Mon Sep 17 00:00:00 2001 From: Ioannis Date: Mon, 1 Jul 2019 11:47:05 -0900 Subject: [PATCH 01/31] First commit of pipeline --- src/entk_script/entk_script.py | 263 +++++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) create mode 100644 src/entk_script/entk_script.py diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py new file mode 100644 index 0000000..2f978d2 --- /dev/null +++ b/src/entk_script/entk_script.py @@ -0,0 +1,263 @@ +""" +Seals Use Case EnTK Analysis Script +========================================================== + +This script contains the EnTK Pipeline script for the Rivers Use Case + +Author: Ioannis Paraskevakos +License: MIT +Copyright: 2018-2019 +""" + +from __future__ import print_function +import argparse +import os +import pandas as pd + +from radical.entk import Pipeline, Stage, Task, AppManager + + +def generate_discover_pipeline(path): + ''' + This function takes as an input a path on Bridges and returns a pipeline + that will provide a file for all the images that exist in that path. + ''' + pipeline = Pipeline() + pipeline.name = 'Disc' + stage = Stage() + stage.name = 'Disc-S0' + # Create Task 1, training + task = Task() + task.name = 'Disc-T0' + task.pre_exec = ['module load psc_path/1.1', + 'module load slurm/default', + 'module load intel/17.4', + 'module load python3', + 'source $SCRATCH/pytorchCuda/bin/activate', + 'export PYTHONPATH=$SCRATCH/pytorchCuda/lib/' +\ + 'python3.5/site-packages:$PYTHONPATH'] + task.executable = 'python3' # Assign executable to the task + task.arguments = ['image_disc.py', '%s' % path, '--filename=images.csv', + '--filesize'] + task.download_output_data = ['images.csv'] + task.upload_input_data = ['image_disc.py'] + task.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + 'thread_type': 'OpenMP'} + stage.add_tasks(task) + # Add Stage to the Pipeline + pipeline.add_stages(stage) + + return pipeline + + +def generate_pipeline(name, image, model_name, device): + + ''' + This function creates a pipeline for an image that will be analyzed. + + :Arguments: + :name: Pipeline name, str + :image: image path, str + :image_size: image size in MBs, int + :tile_size: The size of each tile, int + :model_path: Path to the model file, str + :model_arch: Prediction Model Architecture, str + :model_name: Prediction Model Name, str + :hyperparam_set: Which hyperparameter set to use, str + :device: Which GPU device will be used by this pipeline, int + ''' + # Create a Pipeline object + entk_pipeline = Pipeline() + entk_pipeline.name = name + # Create a Stage object + stage0 = Stage() + stage0.name = '%s-S0' % (name) + # Create Task 1, training + task0 = Task() + task0.name = '%s-T0' % stage0.name + task0.pre_exec = ['module load psc_path/1.1', + 'module load slurm/default', + 'module load intel/17.4', + 'module load python3', + 'source $SCRATCH/pytorchCuda/bin/activate', + 'export PYTHONPATH=$SCRATCH/pytorchCuda/lib/' +\ + 'python3.5/site-packages:$PYTHONPATH'] + task0.executable = 'python3' # Assign executable to the task + # Assign arguments for the task executable + task0.arguments = ['tile_raster.py', '--scale_bands=%s' % scale_bands, + '--input_image=%s' % image.split('/')[-1], + # This line points to the local filesystem of the node + # that the tiling of the image happened. + '--output_folder=$NODE_LFS_PATH/%s' % task0.name] + task0.link_input_data = [image] + task0.upload_input_data = [os.path.abspath('../tiling/tile_raster.py')] + task0.cpu_reqs = {'processes': 1, 'threads_per_process': 10, + 'thread_type': 'OpenMP'} + task0.lfs_per_process = image_size + + stage0.add_tasks(task0) + # Add Stage to the Pipeline + entk_pipeline.add_stages(stage0) + + # Create a Stage object + stage1 = Stage() + stage1.name = '%s-S1' % (name) + # Create Task 1, training + task1 = Task() + task1.name = '%s-T1' % stage1.name + task1.pre_exec = ['module load psc_path/1.1', + 'module load slurm/default', + 'module load intel/17.4', + 'module load python3', + 'module load cuda', + 'source $SCRATCH/pytorchCuda/bin/activate', + 'export PYTHONPATH=$SCRATCH/pytorchCuda/lib/' +\ + 'python3.5/site-packages:$PYTHONPATH', + 'export CUDA_VISIBLE_DEVICES=%d' % device] + task1.executable = 'python3' # Assign executable to the task + + # Assign arguments for the task executable + task1.arguments = ['predict_raster.py', + '--input_image', image.split('/')[-1], + '--model_architecture', model_arch, + '--hyperparameter_set', hyperparam_set, + '--training_set', training_set, + '--test_folder', '$NODE_LFS_PATH/%s' % task0.name, + '--model_path', './', + '--output_folder', './%s' % image.split('/')[-1]. + split('.')[0]] + task1.link_input_data = ['$SHARED/%s.tar' % model_name] + task1.upload_input_data = [os.path.abspath('../predicting/' + + 'predict_raster.py'), + os.path.abspath('../predicting/' + + 'predict_sealnet.py'), + os.path.abspath('../utils/')] + task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + 'thread_type': 'OpenMP'} + task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1, + 'thread_type': 'OpenMP'} + # Download resuting images + task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1]. + split('.')[0], + image.split('/')[-1])] + task1.tag = task0.name + + stage1.add_tasks(task1) + # Add Stage to the Pipeline + entk_pipeline.add_stages(stage1) + + return entk_pipeline + + +def create_aggregated_output(image_names, path): + + ''' + This function takes a list of images and aggregates the results into a + single CSV file + ''' + + aggr_results = pd.DataFrame(columns=['Image', 'Seals']) + for image in image_names: + image_pred = pd.read_csv(path + image.split('/')[-1] + + '_predictions.csv') + aggr_results.loc[len(aggr_results)] = [image.split('/')[-1], + image_pred['predictions'].sum()] + + aggr_results.to_csv(path + '/seal_predictions.csv', index=False) + + +def args_parser(): + + ''' + Argument Parsing Function for the script. + ''' + parser = argparse.ArgumentParser(description='Executes the Seals ' + + 'pipeline for a set of images') + + parser.add_argument('-c', '--cpus', type=int, default=1, + help='The number of CPUs required for execution') + parser.add_argument('-g', '--gpus', type=int, default=1, + help='The number of GPUs required for execution') + parser.add_argument('-ip', '--input_dir', type=str, + help='Images input directory on the selected resource') + parser.add_argument('-m', '--model', type=str, + help='Which model will be used') + parser.add_argument('-p', '--project', type=str, + help='The project that will be charged') + parser.add_argument('-q', '--queue', type=str, + help='The queue from which resources are requested.') + parser.add_argument('-r', '--resource', type=str, + help='HPC resource on which the script will run.') + parser.add_argument('-w', '--walltime', type=int, + help='The amount of time resources are requested in' + + ' minutes') + parser.add_argument('--scale_bands', type=str, + help='for multi-scale models, string with size of' + + ' scale bands separated by spaces') + parser.add_argument('--name', type=str, + help='name of the execution. It has to be a unique' + + ' value') + + return parser.parse_args() + + +if __name__ == '__main__': + + args = args_parser() + + res_dict = {'resource': args.resource, + 'walltime': args.walltime, + 'cpus': args.cpus, + 'gpus': args.gpus, + 'schema': 'gsissh', + 'project': args.project, + 'queue': args.queue} + + try: + + # Create Application Manager + appman = AppManager(port=32773, hostname='localhost', name=args.name, + autoterminate=False, write_workflow=True) + + # Assign resource manager to the Application Manager + appman.resource_desc = res_dict + appman.shared_data = [os.path.abspath('../../models/Heatmap-Cnt/' + + 'UnetCntWRN/' + + 'UnetCntWRN_ts-vanilla.tar')] + # Create a task that discovers the dataset + #disc_pipeline = generate_discover_pipeline(args.input_dir) + #appman.workflow = set([disc_pipeline]) + + # Run + #appman.run() + + images = pd.read_csv('Des3Images.csv') + #images.sort_values(by='Size',axis=0,inplace=True) + #images.reset_index(drop='index',inplace=True) + print('Images Found:', len(images)) + # Create a single pipeline per image + pipelines = list() + dev = 0 + for idx in range(0,3097): + p1 = generate_pipeline(name='P%s' % idx, + image=images['Filename'][idx], + image_size=images['Size'][idx], + scale_bands=args.scale_bands, + model_arch=args.model, + training_set='test_vanilla', + model_name='UnetCntWRN_ts-vanilla', + hyperparam_set='A', + device=dev) + dev = dev ^ 1 + pipelines.append(p1) + # Assign the workflow as a set of Pipelines to the Application Manager + appman.workflow = set(pipelines) + + # Run the Application Manager + appman.run() + + print('Done') + + finally: + # Now that all images have been analyzed, release the resources. + appman.resource_terminate() From 968f7d3b789842fe3a814981a4f270915c921a65 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Tue, 2 Jul 2019 10:51:11 -0400 Subject: [PATCH 02/31] multipagetif as a function and discovery python file included --- src/classification/tmp | 1 - src/entk_script/image_disc.py | 52 +++++++++++++++++++++++++++++++++++ src/entk_script/tmp | 1 - src/utils/multipagetiff.m | 15 ++++++++++ src/utils/tmp | 1 - 5 files changed, 67 insertions(+), 3 deletions(-) delete mode 100644 src/classification/tmp create mode 100644 src/entk_script/image_disc.py delete mode 100644 src/entk_script/tmp create mode 100644 src/utils/multipagetiff.m delete mode 100644 src/utils/tmp diff --git a/src/classification/tmp b/src/classification/tmp deleted file mode 100644 index d00491f..0000000 --- a/src/classification/tmp +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/src/entk_script/image_disc.py b/src/entk_script/image_disc.py new file mode 100644 index 0000000..9c0912d --- /dev/null +++ b/src/entk_script/image_disc.py @@ -0,0 +1,52 @@ +""" +Image Discovery Kernel +========================================================== +This script takes as input a path and returns a dataframe +with all the images and their size. +Author: Ioannis Paraskevakos +License: MIT +Copyright: 2018-2019 +""" +from glob import glob +import argparse +import os +import math +import pandas as pd + + +def image_discovery(path, filename='list.csv', filesize=False): + """ + This function creates a dataframe with image names and size from a path. + :Arguments: + :path: Images path, str + :filename: The filename of the CSV file containing the dataframe. + Default Value: list.csv + :filesize: Whether or not the image sizes should be inluded to the + dataframe. Default value: False + """ + + filepaths = glob(path + '/*.tif') + if filesize: + dataset_df = pd.DataFrame(columns=['Filename', 'Size']) + for filepath in filepaths: + filesize = int(math.ceil(os.path.getsize(filepath)/1024/1024)) + dataset_df.loc[len(dataset_df)] = [filepath, filesize] + else: + dataset_df = pd.DataFrame(columns=['Filename']) + for filepath in filepaths: + dataset_df.loc[len(dataset_df)] = [filepath] + + dataset_df.to_csv(filename, index=False) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('path', help='Path to a remote resource where data \ + are') + parser.add_argument('--filename', type=str, default='list.csv', + help='Name of the output CSV file') + parser.add_argument('--filesize', help='Include the filesize to the \ + output CSV', action='store_true') + args = parser.parse_args() + + image_discovery(args.path, args.filename, args.filesize) diff --git a/src/entk_script/tmp b/src/entk_script/tmp deleted file mode 100644 index d00491f..0000000 --- a/src/entk_script/tmp +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/src/utils/multipagetiff.m b/src/utils/multipagetiff.m new file mode 100644 index 0000000..77aad63 --- /dev/null +++ b/src/utils/multipagetiff.m @@ -0,0 +1,15 @@ +% Author: Samira Daneshgar-Asl +% License: MIT +% Copyright: 2018-2019 + +% when we have an 8-bit image with 3 bands and we want to save it as a multi-page + +function multipagetiff(ReadImage, WriteDir) + +if ~exist(WriteDir, 'dir') + mkdir(WriteDir); +end + +image = geotiffread(ReadImage); +writeFileName = strcat(WriteDir,'/multipage-',num2str(ReadImage)); +saveastiff(image,writeFileName); diff --git a/src/utils/tmp b/src/utils/tmp deleted file mode 100644 index d00491f..0000000 --- a/src/utils/tmp +++ /dev/null @@ -1 +0,0 @@ -1 From 854159f29127400fffab5e3f02ab04fd02793a64 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Tue, 2 Jul 2019 11:39:06 -0400 Subject: [PATCH 03/31] EnTK runs multipage --- src/entk_script/entk_script.py | 174 +++++++------------ src/utils/saveastiff.m | 303 +++++++++++++++++++++++++++++++++ 2 files changed, 368 insertions(+), 109 deletions(-) create mode 100644 src/utils/saveastiff.m diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index 2f978d2..e301ec8 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -29,13 +29,8 @@ def generate_discover_pipeline(path): # Create Task 1, training task = Task() task.name = 'Disc-T0' - task.pre_exec = ['module load psc_path/1.1', - 'module load slurm/default', - 'module load intel/17.4', - 'module load python3', - 'source $SCRATCH/pytorchCuda/bin/activate', - 'export PYTHONPATH=$SCRATCH/pytorchCuda/lib/' +\ - 'python3.5/site-packages:$PYTHONPATH'] + task.pre_exec = ['module load anaconda3/2019.03', + 'source activate keras-gpu'] task.executable = 'python3' # Assign executable to the task task.arguments = ['image_disc.py', '%s' % path, '--filename=images.csv', '--filesize'] @@ -50,7 +45,7 @@ def generate_discover_pipeline(path): return pipeline -def generate_pipeline(name, image, model_name, device): +def generate_pipeline(name, image, image_size, model_name, device): ''' This function creates a pipeline for an image that will be analyzed. @@ -59,11 +54,7 @@ def generate_pipeline(name, image, model_name, device): :name: Pipeline name, str :image: image path, str :image_size: image size in MBs, int - :tile_size: The size of each tile, int - :model_path: Path to the model file, str - :model_arch: Prediction Model Architecture, str :model_name: Prediction Model Name, str - :hyperparam_set: Which hyperparameter set to use, str :device: Which GPU device will be used by this pipeline, int ''' # Create a Pipeline object @@ -75,23 +66,16 @@ def generate_pipeline(name, image, model_name, device): # Create Task 1, training task0 = Task() task0.name = '%s-T0' % stage0.name - task0.pre_exec = ['module load psc_path/1.1', - 'module load slurm/default', - 'module load intel/17.4', - 'module load python3', - 'source $SCRATCH/pytorchCuda/bin/activate', - 'export PYTHONPATH=$SCRATCH/pytorchCuda/lib/' +\ - 'python3.5/site-packages:$PYTHONPATH'] - task0.executable = 'python3' # Assign executable to the task + task0.pre_exec = ['module load matlab'] + task0.executable = 'matlab' # Assign executable to the task # Assign arguments for the task executable - task0.arguments = ['tile_raster.py', '--scale_bands=%s' % scale_bands, - '--input_image=%s' % image.split('/')[-1], - # This line points to the local filesystem of the node - # that the tiling of the image happened. - '--output_folder=$NODE_LFS_PATH/%s' % task0.name] + task0.arguments = ['-nodisplay', '-nosplash', '-r', + 'multipagetiff("%s","$NODE_LFS_PATH/%s")' % (image, + task0.name)] task0.link_input_data = [image] - task0.upload_input_data = [os.path.abspath('../tiling/tile_raster.py')] - task0.cpu_reqs = {'processes': 1, 'threads_per_process': 10, + task0.upload_input_data = [os.path.abspath('../utils/multipagetiff.m'), + os.path.abspath('../utils/saveastiff.m')] + task0.cpu_reqs = {'processes': 1, 'threads_per_process': 1, 'thread_type': 'OpenMP'} task0.lfs_per_process = image_size @@ -99,73 +83,56 @@ def generate_pipeline(name, image, model_name, device): # Add Stage to the Pipeline entk_pipeline.add_stages(stage0) - # Create a Stage object - stage1 = Stage() - stage1.name = '%s-S1' % (name) - # Create Task 1, training - task1 = Task() - task1.name = '%s-T1' % stage1.name - task1.pre_exec = ['module load psc_path/1.1', - 'module load slurm/default', - 'module load intel/17.4', - 'module load python3', - 'module load cuda', - 'source $SCRATCH/pytorchCuda/bin/activate', - 'export PYTHONPATH=$SCRATCH/pytorchCuda/lib/' +\ - 'python3.5/site-packages:$PYTHONPATH', - 'export CUDA_VISIBLE_DEVICES=%d' % device] - task1.executable = 'python3' # Assign executable to the task - - # Assign arguments for the task executable - task1.arguments = ['predict_raster.py', - '--input_image', image.split('/')[-1], - '--model_architecture', model_arch, - '--hyperparameter_set', hyperparam_set, - '--training_set', training_set, - '--test_folder', '$NODE_LFS_PATH/%s' % task0.name, - '--model_path', './', - '--output_folder', './%s' % image.split('/')[-1]. - split('.')[0]] - task1.link_input_data = ['$SHARED/%s.tar' % model_name] - task1.upload_input_data = [os.path.abspath('../predicting/' + - 'predict_raster.py'), - os.path.abspath('../predicting/' + - 'predict_sealnet.py'), - os.path.abspath('../utils/')] - task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1, - 'thread_type': 'OpenMP'} - task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1, - 'thread_type': 'OpenMP'} - # Download resuting images - task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1]. - split('.')[0], - image.split('/')[-1])] - task1.tag = task0.name - - stage1.add_tasks(task1) - # Add Stage to the Pipeline - entk_pipeline.add_stages(stage1) + ## Create a Stage object + #stage1 = Stage() + #stage1.name = '%s-S1' % (name) + ## Create Task 1, training + #task1 = Task() + #task1.name = '%s-T1' % stage1.name + #task1.pre_exec = ['module load psc_path/1.1', + # 'module load slurm/default', + # 'module load intel/17.4', + # 'module load python3', + # 'module load cuda', + # 'source $SCRATCH/pytorchCuda/bin/activate', + # 'export PYTHONPATH=$SCRATCH/pytorchCuda/lib/' +\ + # 'python3.5/site-packages:$PYTHONPATH', + # 'export CUDA_VISIBLE_DEVICES=%d' % device] + #task1.executable = 'python3' # Assign executable to the task +# + ## Assign arguments for the task executable + #task1.arguments = ['predict_raster.py', + # '--input_image', image.split('/')[-1], + # '--model_architecture', model_arch, + # '--hyperparameter_set', hyperparam_set, + # '--training_set', training_set, + # '--test_folder', '$NODE_LFS_PATH/%s' % task0.name, + # '--model_path', './', + # '--output_folder', './%s' % image.split('/')[-1]. + # split('.')[0]] + #task1.link_input_data = ['$SHARED/%s.tar' % model_name] + #task1.upload_input_data = [os.path.abspath('../predicting/' + + # 'predict_raster.py'), + # os.path.abspath('../predicting/' + + # 'predict_sealnet.py'), + # os.path.abspath('../utils/')] + #task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + # 'thread_type': 'OpenMP'} + #task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1, + # 'thread_type': 'OpenMP'} + ## Download resuting images + #task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1]. + # split('.')[0], + # image.split('/')[-1])] + #task1.tag = task0.name +# + #stage1.add_tasks(task1) + ## Add Stage to the Pipeline + #entk_pipeline.add_stages(stage1) return entk_pipeline -def create_aggregated_output(image_names, path): - - ''' - This function takes a list of images and aggregates the results into a - single CSV file - ''' - - aggr_results = pd.DataFrame(columns=['Image', 'Seals']) - for image in image_names: - image_pred = pd.read_csv(path + image.split('/')[-1] + - '_predictions.csv') - aggr_results.loc[len(aggr_results)] = [image.split('/')[-1], - image_pred['predictions'].sum()] - - aggr_results.to_csv(path + '/seal_predictions.csv', index=False) - - def args_parser(): ''' @@ -191,9 +158,6 @@ def args_parser(): parser.add_argument('-w', '--walltime', type=int, help='The amount of time resources are requested in' + ' minutes') - parser.add_argument('--scale_bands', type=str, - help='for multi-scale models, string with size of' + - ' scale bands separated by spaces') parser.add_argument('--name', type=str, help='name of the execution. It has to be a unique' + ' value') @@ -221,32 +185,24 @@ def args_parser(): # Assign resource manager to the Application Manager appman.resource_desc = res_dict - appman.shared_data = [os.path.abspath('../../models/Heatmap-Cnt/' + - 'UnetCntWRN/' + - 'UnetCntWRN_ts-vanilla.tar')] + appman.shared_data = [os.path.abspath('../../models/unet_weights.hdf5')] # Create a task that discovers the dataset - #disc_pipeline = generate_discover_pipeline(args.input_dir) - #appman.workflow = set([disc_pipeline]) + disc_pipeline = generate_discover_pipeline(args.input_dir) + appman.workflow = set([disc_pipeline]) # Run - #appman.run() - - images = pd.read_csv('Des3Images.csv') - #images.sort_values(by='Size',axis=0,inplace=True) - #images.reset_index(drop='index',inplace=True) + appman.run() + images = pd.from_csv('images.csv') + print('Images Found:', len(images)) # Create a single pipeline per image pipelines = list() dev = 0 - for idx in range(0,3097): + for idx in range(0,len(images)): p1 = generate_pipeline(name='P%s' % idx, image=images['Filename'][idx], image_size=images['Size'][idx], - scale_bands=args.scale_bands, - model_arch=args.model, - training_set='test_vanilla', - model_name='UnetCntWRN_ts-vanilla', - hyperparam_set='A', + model_name='test', device=dev) dev = dev ^ 1 pipelines.append(p1) diff --git a/src/utils/saveastiff.m b/src/utils/saveastiff.m new file mode 100644 index 0000000..76d4db3 --- /dev/null +++ b/src/utils/saveastiff.m @@ -0,0 +1,303 @@ +function res = saveastiff(data, path, options) +% options.color +% : true or FALSE +% : If this is true, third dimension should be 3 and the data is saved as a color image. +% options.compress +% : 'no', 'lzw', 'jpeg' or 'adobe'. +% Compression type. +% 'no' : Uncompressed(Default) +% 'lzw' : lossless LZW +% 'jpeg' : lossy JPEG (When using JPEG compression, ImageWidth, +% ImageLength, and RowsPerStrip must be multiples of 16.) +% 'adobe' : lossless Adobe-style +% options.message +% : TRUE or false. +% If this is false, all messages are skipped. +% options.append +% : true or FALSE +% If path is exist, the data is appended to an existing file. +% If path is not exist, this options is ignored. +% options.overwrite +% : true or FALSE +% Overwrite to an existing file. +% options.big +% : true or FALSE, +% Use 64 bit addressing and allows for files > 4GB +% +% Defalut value of 'options' is +% options.color = false; +% options.compress = 'no'; +% options.message = true; +% options.append = false; +% options.overwrite = false; +% options.big = false; +% +% res : Return value. It is 0 when the function is finished with no error. +% If an error is occured in the function, it will have a positive +% number (error code). +% +% Copyright (c) 2012, YoonOh Tak +% All rights reserved. +% +% Redistribution and use in source and binary forms, with or without +% modification, are permitted provided that the following conditions are +% met: +% +% * Redistributions of source code must retain the above copyright +% notice, this list of conditions and the following disclaimer. +% * Redistributions in binary form must reproduce the above copyright +% notice, this list of conditions and the following disclaimer in +% the documentation and/or other materials provided with the distribution +% * Neither the name of the Gwangju Institute of Science and Technology (GIST), Republic of Korea nor the names +% of its contributors may be used to endorse or promote products derived +% from this software without specific prior written permission. +% +% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +% ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +% LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +% POSSIBILITY OF SUCH DAMAGE. + +tStart = tic; +errcode = 0; +try +%% Init options parameter +if nargin < 3 % Use default options + options.color = false; + options.compress = 'no'; + options.message = true; + options.append = false; + options.overwrite = false; +end +if ~isfield(options, 'message'), options.message = true; end +if ~isfield(options, 'append'), options.append = false; end +if ~isfield(options, 'compress'), options.compress = 'no'; end +if ~isfield(options, 'color'), options.color = false; end +if ~isfield(options, 'overwrite'), options.overwrite = false; end +if isfield(options, 'big') == 0, options.big = false; end + +if isempty(data), errcode = 1; assert(false); end +if (options.color == false && ndims(data) > 3) || ... + (options.color == true && ndims(data) > 4) + % Maximum dimension of a grayscale image is 3 of [height, width, frame] + % Maximum dimension of a color image is 4 of [height, width, color, frame] + errcode = 2; assert(false); +end + +%% Get image informations +% http://www.awaresystems.be/imaging/tiff/tifftags/photometricinterpretation.html +if ~options.color + if ndims(data) >= 4, errcode = 2; assert(false); end; + [height, width, depth] = size(data); + tagstruct.Photometric = Tiff.Photometric.MinIsBlack; +% tagstruct.Photometric = Tiff.Photometric.MinIsWhite; +% tagstruct.Photometric = Tiff.Photometric.Mask; +% tagstruct.Photometric = Tiff.Photometric.Separated; +else + if ndims(data) >= 5, errcode = 2; assert(false); end; + [height, width, cc, depth] = size(data); % cc: color channels. 3: rgb, 4: rgb with alpha channel + if cc ~= 3 && cc ~= 4, errcode = 3; assert(false); end; + tagstruct.Photometric = Tiff.Photometric.RGB; +% tagstruct.Photometric = Tiff.Photometric.CIELab; +% tagstruct.Photometric = Tiff.Photometric.ICCLab; +% tagstruct.Photometric = Tiff.Photometric.ITULab; +% (Unsupported)tagstruct.Photometric = Tiff.Photometric.Palette; +% (Unsupported)tagstruct.Photometric = Tiff.Photometric.YCbCr; +end +tagstruct.ImageLength = height; +tagstruct.ImageWidth = width; +tagstruct.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky; % (RGB RGB,RGB RGB,RGB RGB), http://www.awaresystems.be/imaging/tiff/tifftags/planarconfiguration.html +% (Unsupported)tagstruct.PlanarConfiguration = Tiff.PlanarConfiguration.Separate; % (RRR RRR, GGG GGG, BBB BBB), % http://www.awaresystems.be/imaging/tiff/tifftags/planarconfiguration.html + +%% Complex number +% http://www.awaresystems.be/imaging/tiff/tifftags/samplesperpixel.html +if ~options.color && isreal(data) % Grayscale image with real numbers + tagstruct.SamplesPerPixel = 1; + data = reshape(data, height, width, 1, depth); +elseif ~options.color && ~isreal(data) % Grayscale image with complex numbers + tagstruct.SamplesPerPixel = 2; + data = reshape([real(data) imag(data)], height, width, 2, depth); +elseif options.color && isreal(data) % Color image with real numbers + tagstruct.SamplesPerPixel = cc; + if cc == 4 + tagstruct.ExtraSamples = Tiff.ExtraSamples.AssociatedAlpha; % The forth channel is alpha channel + end + data = reshape(data, height, width, cc, depth); +elseif options.color && ~isreal(data) % Color image with complex numbers + tagstruct.SamplesPerPixel = cc * 2; + if cc == 3 + tagstruct.ExtraSamples = repmat(Tiff.ExtraSamples.Unspecified, 1, 3); % 3(real)+3(imag) = 6 = 3(rgb) + 3(Extra) + else + tagstruct.ExtraSamples = repmat(Tiff.ExtraSamples.Unspecified, 1, 5); % 4(real)+4(imag) = 8 = 3(rgb) + 5(Extra) + end + data = reshape([real(data) imag(data)], height, width, cc*2, depth); +end + +%% Image compression +% http://www.awaresystems.be/imaging/tiff/tifftags/compression.html +switch lower(options.compress) + case 'no' + tagstruct.Compression = Tiff.Compression.None; + case 'lzw' + tagstruct.Compression = Tiff.Compression.LZW; + case 'jpeg' + tagstruct.Compression = Tiff.Compression.JPEG; + case 'adobe' + tagstruct.Compression = Tiff.Compression.AdobeDeflate; + otherwise + % Use tag nubmer in http://www.awaresystems.be/imaging/tiff/tifftags/compression.html + tagstruct.Compression = options.compress; +end + +%% Sample format +% http://www.awaresystems.be/imaging/tiff/tifftags/sampleformat.html +switch class(data) + % Unsupported Matlab data type: char, logical, cell, struct, function_handle, class. + case {'uint8', 'uint16', 'uint32'} + tagstruct.SampleFormat = Tiff.SampleFormat.UInt; + case {'int8', 'int16', 'int32'} + tagstruct.SampleFormat = Tiff.SampleFormat.Int; + if options.color + errcode = 4; assert(false); + end + case {'single', 'double', 'uint64', 'int64'} + tagstruct.SampleFormat = Tiff.SampleFormat.IEEEFP; + otherwise + % (Unsupported)Void, ComplexInt, ComplexIEEEFP + errcode = 5; assert(false); +end + +%% Bits per sample +% http://www.awaresystems.be/imaging/tiff/tifftags/bitspersample.html +switch class(data) + case {'uint8', 'int8'} + tagstruct.BitsPerSample = 8; + case {'uint16', 'int16'} + tagstruct.BitsPerSample = 16; + case {'uint32', 'int32'} + tagstruct.BitsPerSample = 32; + case {'single'} + tagstruct.BitsPerSample = 32; + case {'double', 'uint64', 'int64'} + tagstruct.BitsPerSample = 64; + otherwise + errcode = 5; assert(false); +end + +%% Rows per strip +maxstripsize = 8*1024; +tagstruct.RowsPerStrip = ceil(maxstripsize/(width*(tagstruct.BitsPerSample/8)*size(data,3))); % http://www.awaresystems.be/imaging/tiff/tifftags/rowsperstrip.html +if tagstruct.Compression == Tiff.Compression.JPEG + tagstruct.RowsPerStrip = max(16,round(tagstruct.RowsPerStrip/16)*16); +end + +%% Overwrite check +if exist(path, 'file') && ~options.append + if ~options.overwrite + errcode = 6; assert(false); + end +end + +%% Save path configuration +path_parent = pwd; +[pathstr, fname, fext] = fileparts(path); +if ~isempty(pathstr) + if ~exist(pathstr, 'dir') + mkdir(pathstr); + end + cd(pathstr); +end + +%% Write image data to a file +file_opening_error_count = 0; +while ~exist('tfile', 'var') + try + if ~options.append % Make a new file + s=whos('data'); + if s.bytes > 2^32-1 || options.big + tfile = Tiff([fname, fext], 'w8'); % Big Tiff file + else + tfile = Tiff([fname, fext], 'w'); + end + else + if ~exist([fname, fext], 'file') % Make a new file + s=whos('data'); + if s.bytes > 2^32-1 || options.big + tfile = Tiff([fname, fext], 'w8'); % Big Tiff file + else + tfile = Tiff([fname, fext], 'w'); + end + else % Append to an existing file + tfile = Tiff([fname, fext], 'r+'); + while ~tfile.lastDirectory(); % Append a new image to the last directory of an exiting file + tfile.nextDirectory(); + end + tfile.writeDirectory(); + end + end + catch + file_opening_error_count = file_opening_error_count + 1; + pause(0.1); + if file_opening_error_count > 5 % automatically retry to open for 5 times. + reply = input('Failed to open the file. Do you wish to retry? Y/n: ', 's'); + if isempty(reply) || any(upper(reply) == 'Y') + file_opening_error_count = 0; + else + errcode = 7; + assert(false); + end + end + end +end + +for d = 1:depth + tfile.setTag(tagstruct); + tfile.write(data(:, :, :, d)); + if d ~= depth + tfile.writeDirectory(); + end +end + +tfile.close(); +if exist('path_parent', 'var'), cd(path_parent); end + +tElapsed = toc(tStart); +if options.message + display(sprintf('The file was saved successfully. Elapsed time : %.3f s.', tElapsed)); +end + +catch exception +%% Exception management + if exist('tfile', 'var'), tfile.close(); end + switch errcode + case 1 + if options.message, error '''data'' is empty.'; end; + case 2 + if options.message, error 'Data dimension is too large.'; end; + case 3 + if options.message, error 'Third dimesion (color depth) should be 3 or 4.'; end; + case 4 + if options.message, error 'Color image cannot have int8, int16 or int32 format.'; end; + case 5 + if options.message, error 'Unsupported Matlab data type. (char, logical, cell, struct, function_handle, class)'; end; + case 6 + if options.message, error 'File already exists.'; end; + case 7 + if options.message, error(['Failed to open the file ''' path '''.']); end; + otherwise + if exist('fname', 'var') && exist('fext', 'var') + delete([fname fext]); + end + if exist('path_parent', 'var'), cd(path_parent); end + rethrow(exception); + end + if exist('path_parent', 'var'), cd(path_parent); end +end +res = errcode; +end \ No newline at end of file From 792f41ba96e6b81bceb60c67e41ed5d45259ff9d Mon Sep 17 00:00:00 2001 From: Ioannis Date: Wed, 3 Jul 2019 04:53:20 -0900 Subject: [PATCH 04/31] Fixing matlab input --- src/entk_script/entk_script.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index e301ec8..e37f632 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -69,12 +69,12 @@ def generate_pipeline(name, image, image_size, model_name, device): task0.pre_exec = ['module load matlab'] task0.executable = 'matlab' # Assign executable to the task # Assign arguments for the task executable - task0.arguments = ['-nodisplay', '-nosplash', '-r', - 'multipagetiff("%s","$NODE_LFS_PATH/%s")' % (image, - task0.name)] - task0.link_input_data = [image] + task0.arguments = ["-nodisplay", "-nosplash", "-r", + "multipagetiff('%s','$NODE_LFS_PATH/%s');exit" % (image.split('/')[-1], + task0.name)] task0.upload_input_data = [os.path.abspath('../utils/multipagetiff.m'), os.path.abspath('../utils/saveastiff.m')] + task0.link_input_data = [image] task0.cpu_reqs = {'processes': 1, 'threads_per_process': 1, 'thread_type': 'OpenMP'} task0.lfs_per_process = image_size @@ -192,7 +192,8 @@ def args_parser(): # Run appman.run() - images = pd.from_csv('images.csv') + print('Run Discovery') + images = pd.read_csv('images.csv') print('Images Found:', len(images)) # Create a single pipeline per image @@ -216,4 +217,5 @@ def args_parser(): finally: # Now that all images have been analyzed, release the resources. + print('Closing resources') appman.resource_terminate() From 5957443b2fef04f7f23b18bde45ea81d1f5debf4 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Wed, 3 Jul 2019 10:53:07 -0400 Subject: [PATCH 05/31] Connecting predict in the pipeline --- src/classification/predict.py | 74 +++++++++++++++++++--------------- src/entk_script/entk_script.py | 74 ++++++++++++++-------------------- 2 files changed, 72 insertions(+), 76 deletions(-) diff --git a/src/classification/predict.py b/src/classification/predict.py index e4bccb4..c500d31 100644 --- a/src/classification/predict.py +++ b/src/classification/predict.py @@ -1,31 +1,17 @@ """ -Author: Samira Daneshgar-Asl +Authors: Samira Daneshgar-Asl, Ioannis Paraskevakos License: MIT Copyright: 2018-2019 """ +import os import math +import argparse import numpy as np import tifffile as tiff -import os -import sys from train_unet import weights_path, get_model, normalize, PATCH_SZ, N_CLASSES -if not os.path.exists('data/WV_predicted'): - os.makedirs('data/WV_predicted') - -image = normalize(tiff.imread('8bit-3bands Multi-Page Images/name of the multi-page WV image.tif').transpose([1, 2, 0])) -wind_row, wind_col = 800,800 # dimensions of the image -windowSize = 800 -stepSize=400 - -desired_row_size=stepSize*math.ceil(image.shape[0]/stepSize) -desired_col_size=stepSize*math.ceil(image.shape[1]/stepSize) -img = np.zeros((desired_row_size,desired_col_size,image.shape[2]), dtype=image.dtype) -img[:image.shape[0],:image.shape[1]] = image - - def predict(x, model, patch_sz=160, n_classes=2): img_height = x.shape[0] img_width = x.shape[1] @@ -35,7 +21,8 @@ def predict(x, model, patch_sz=160, n_classes=2): npatches_horizontal = math.ceil(img_width / patch_sz) extended_height = patch_sz * npatches_vertical extended_width = patch_sz * npatches_horizontal - ext_x = np.zeros(shape=(extended_height, extended_width, n_channels), dtype=np.float32) + ext_x = np.zeros(shape=(extended_height, extended_width, n_channels), + dtype=np.float32) # fill extended image with mirrors: ext_x[:img_height, :img_width, :] = x for i in range(img_height, extended_height): @@ -53,7 +40,8 @@ def predict(x, model, patch_sz=160, n_classes=2): patches_array = np.asarray(patches_list) # predictions: patches_predict = model.predict(patches_array, batch_size=4) - prediction = np.zeros(shape=(extended_height, extended_width, n_classes), dtype=np.float32) + prediction = np.zeros(shape=(extended_height, extended_width, n_classes), + dtype=np.float32) for k in range(patches_predict.shape[0]): i = k // npatches_horizontal j = k % npatches_vertical @@ -66,23 +54,45 @@ def predict(x, model, patch_sz=160, n_classes=2): def sliding_window(img, stepSize, windowSize): for y in range(0, img.shape[0], stepSize): for x in range(0, img.shape[1], stepSize): - yield (x, y, img[y:y + windowSize, x:x + windowSize,:]) + yield (x, y, img[y:y + windowSize, x:x + windowSize, :]) def main(): - outPath = "data/WV_predicted" - i=0 - for(x,y, window) in sliding_window(img, stepSize, windowSize): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--input', type=str, + help='Input Multipage Image') + parser.add_argument('-o', '----output_folder', type=str, + help='Path where output will be stored.') + + args = parser.parse_args() + model = get_model() + model.load_weights(weights_path) + outPath = args.output_folder + "data/WV_predicted" + if not os.path.exists(outPath): + os.makedirs(outPath) + + image = normalize(tiff.imread(args.input).transpose([1, 2, 0])) + wind_row, wind_col = 800, 800 # dimensions of the image + windowSize = 800 + stepSize = 400 + + desired_row_size = stepSize * math.ceil(image.shape[0] / stepSize) + desired_col_size = stepSize * math.ceil(image.shape[1] / stepSize) + img = np.zeros((desired_row_size, desired_col_size, image.shape[2]), + dtype=image.dtype) + img[:image.shape[0], :image.shape[1]] = image + i = 0 + for(x, y, window) in sliding_window(img, stepSize, windowSize): if window.shape[0] != wind_row or window.shape[1] != wind_col: continue - t_img = img[y:y+wind_row,x:x+wind_col,:]# the image which has to be predicted - mask = predict(t_img, model, patch_sz=PATCH_SZ, n_classes=N_CLASSES).transpose([2,0,1]) - cnt=str(i) - imagename="image"+cnt+".tif" - fullpath = os.path.join(outPath,imagename) + # the image which has to be predicted + t_img = img[y:y+wind_row, x:x+wind_col, :] + mask = predict(t_img, model, patch_sz=PATCH_SZ, + n_classes=N_CLASSES).transpose([2, 0, 1]) + cnt = str(i) + imagename = "image" + cnt + ".tif" + fullpath = os.path.join(outPath, imagename) tiff.imsave(fullpath, mask) - i=i+1 + i += 1 -if __name__ == '__main__': - model = get_model() - model.load_weights(weights_path) +if __name__ == '__main__': main() \ No newline at end of file diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index e37f632..db7374a 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -70,8 +70,8 @@ def generate_pipeline(name, image, image_size, model_name, device): task0.executable = 'matlab' # Assign executable to the task # Assign arguments for the task executable task0.arguments = ["-nodisplay", "-nosplash", "-r", - "multipagetiff('%s','$NODE_LFS_PATH/%s');exit" % (image.split('/')[-1], - task0.name)] + "multipagetiff('%s','$NODE_LFS_PATH/%s');exit" + % (image.split('/')[-1], task0.name)] task0.upload_input_data = [os.path.abspath('../utils/multipagetiff.m'), os.path.abspath('../utils/saveastiff.m')] task0.link_input_data = [image] @@ -83,48 +83,34 @@ def generate_pipeline(name, image, image_size, model_name, device): # Add Stage to the Pipeline entk_pipeline.add_stages(stage0) - ## Create a Stage object - #stage1 = Stage() - #stage1.name = '%s-S1' % (name) - ## Create Task 1, training - #task1 = Task() - #task1.name = '%s-T1' % stage1.name - #task1.pre_exec = ['module load psc_path/1.1', - # 'module load slurm/default', - # 'module load intel/17.4', - # 'module load python3', - # 'module load cuda', - # 'source $SCRATCH/pytorchCuda/bin/activate', - # 'export PYTHONPATH=$SCRATCH/pytorchCuda/lib/' +\ - # 'python3.5/site-packages:$PYTHONPATH', - # 'export CUDA_VISIBLE_DEVICES=%d' % device] - #task1.executable = 'python3' # Assign executable to the task -# - ## Assign arguments for the task executable - #task1.arguments = ['predict_raster.py', - # '--input_image', image.split('/')[-1], - # '--model_architecture', model_arch, - # '--hyperparameter_set', hyperparam_set, - # '--training_set', training_set, - # '--test_folder', '$NODE_LFS_PATH/%s' % task0.name, - # '--model_path', './', - # '--output_folder', './%s' % image.split('/')[-1]. - # split('.')[0]] - #task1.link_input_data = ['$SHARED/%s.tar' % model_name] - #task1.upload_input_data = [os.path.abspath('../predicting/' + - # 'predict_raster.py'), - # os.path.abspath('../predicting/' + - # 'predict_sealnet.py'), - # os.path.abspath('../utils/')] - #task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1, - # 'thread_type': 'OpenMP'} - #task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1, - # 'thread_type': 'OpenMP'} - ## Download resuting images - #task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1]. - # split('.')[0], - # image.split('/')[-1])] - #task1.tag = task0.name + # Create a Stage object + stage1 = Stage() + stage1.name = '%s-S1' % (name) + # Create Task 1, training + task1 = Task() + task1.name = '%s-T1' % stage1.name + task1.pre_exec = ['module load anaconda3/2019.03', + 'source activate keras-gpu' + 'export CUDA_VISIBLE_DEVICES=%d' % device] + task1.executable = 'python3' # Assign executable to the task + + # Assign arguments for the task executable + task1.arguments = ['predict.py', + '--input', '$NODE_LFS_PATH/%s/multi-' % (task0.name, image.split('/')[-1]), + '--output_folder', '$NODE_LFS_PATH/%s' % task1.name] + task1.link_input_data = ['$SHARED/unet_weights.hdf5'] + task1.upload_input_data = [os.path.abspath('../classification/predict.py'), + os.path.abspath('../classification/' + + 'gen_patches.py'), + os.path.abspath('../classification/' + + 'train_unet.py'), + os.path.abspath('../classification/' + + 'unet_model.py')] + task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + 'thread_type': 'OpenMP'} + task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1, + 'thread_type': 'OpenMP'} + task1.tag = task0.name # #stage1.add_tasks(task1) ## Add Stage to the Pipeline From 22db288853952937b0f97cdc1304d0779f7f40f9 Mon Sep 17 00:00:00 2001 From: Ioannis Date: Mon, 8 Jul 2019 06:55:01 -0900 Subject: [PATCH 06/31] Debugging EnTK script --- src/entk_script/entk_script.py | 60 ++++++++++++++++------------------ 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index db7374a..80a77ab 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -90,15 +90,15 @@ def generate_pipeline(name, image, image_size, model_name, device): task1 = Task() task1.name = '%s-T1' % stage1.name task1.pre_exec = ['module load anaconda3/2019.03', - 'source activate keras-gpu' + 'source activate keras-gpu', 'export CUDA_VISIBLE_DEVICES=%d' % device] task1.executable = 'python3' # Assign executable to the task # Assign arguments for the task executable task1.arguments = ['predict.py', - '--input', '$NODE_LFS_PATH/%s/multi-' % (task0.name, image.split('/')[-1]), + '--input', '$NODE_LFS_PATH/%s/multipage-%s' % (task0.name, image.split('/')[-1]), '--output_folder', '$NODE_LFS_PATH/%s' % task1.name] - task1.link_input_data = ['$SHARED/unet_weights.hdf5'] + task1.link_input_data = ['$SHARED/unet_weights.hdf5 > weights/unet_weights.hdf5'] task1.upload_input_data = [os.path.abspath('../classification/predict.py'), os.path.abspath('../classification/' + 'gen_patches.py'), @@ -111,10 +111,10 @@ def generate_pipeline(name, image, image_size, model_name, device): task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1, 'thread_type': 'OpenMP'} task1.tag = task0.name -# - #stage1.add_tasks(task1) - ## Add Stage to the Pipeline - #entk_pipeline.add_stages(stage1) + + stage1.add_tasks(task1) + # Add Stage to the Pipeline + entk_pipeline.add_stages(stage1) return entk_pipeline @@ -163,45 +163,43 @@ def args_parser(): 'project': args.project, 'queue': args.queue} - try: - # Create Application Manager - appman = AppManager(port=32773, hostname='localhost', name=args.name, + appman = AppManager(port=32773, hostname='localhost', name=args.name, autoterminate=False, write_workflow=True) # Assign resource manager to the Application Manager - appman.resource_desc = res_dict - appman.shared_data = [os.path.abspath('../../models/unet_weights.hdf5')] + appman.resource_desc = res_dict + appman.shared_data = [os.path.abspath('../../models/unet_weights.hdf5')] # Create a task that discovers the dataset - disc_pipeline = generate_discover_pipeline(args.input_dir) - appman.workflow = set([disc_pipeline]) + disc_pipeline = generate_discover_pipeline(args.input_dir) + appman.workflow = set([disc_pipeline]) # Run - appman.run() - print('Run Discovery') - images = pd.read_csv('images.csv') + appman.run() + print('Run Discovery') + images = pd.read_csv('images.csv') - print('Images Found:', len(images)) - # Create a single pipeline per image - pipelines = list() - dev = 0 - for idx in range(0,len(images)): - p1 = generate_pipeline(name='P%s' % idx, + print('Images Found:', len(images)) + # Create a single pipeline per image + pipelines = list() + dev = 0 + for idx in range(0,len(images)): + p1 = generate_pipeline(name='P%s' % idx, image=images['Filename'][idx], image_size=images['Size'][idx], model_name='test', device=dev) - dev = dev ^ 1 - pipelines.append(p1) + dev = dev ^ 1 + pipelines.append(p1) # Assign the workflow as a set of Pipelines to the Application Manager - appman.workflow = set(pipelines) + appman.workflow = set(pipelines) # Run the Application Manager - appman.run() + appman.run() - print('Done') + print('Done') - finally: + #finally: # Now that all images have been analyzed, release the resources. - print('Closing resources') - appman.resource_terminate() + print('Closing resources') + appman.resource_terminate() From 2e0986add1d2f0f099fbe12b36344a3531d68eef Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Mon, 8 Jul 2019 12:53:08 -0400 Subject: [PATCH 07/31] Finalizing EnTK script --- src/classification/mosaic.m | 107 ++++++++++++++------------------- src/entk_script/entk_script.py | 26 ++++++++ src/utils/multipagetiff.m | 13 ++-- 3 files changed, 79 insertions(+), 67 deletions(-) diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m index 5990d03..bf30428 100644 --- a/src/classification/mosaic.m +++ b/src/classification/mosaic.m @@ -1,67 +1,52 @@ -% Author: Samira Daneshgar-Asl +% Author: Samira Daneshgar-Asl, Ioannis Paraskevakos % License: MIT % Copyright: 2018-2019 -clear all -clc - -[FileName, PathName] = uigetfile('*.tif', 'Select the 8 bit image file with 3 bands:'); -File = fullfile(PathName, FileName); -[I, R] = geotiffread(File); -img=I; -clear I; - -patch_size=800; - -desired_row_size=(patch_size/2)*ceil(size(img,1)/(patch_size/2)); -desired_col_size=(patch_size/2)*ceil(size(img,2)/(patch_size/2)); - -image = zeros(desired_row_size,desired_col_size,size(img,3),'int16'); -image(1:size(img,1),1:size(img,2),:) = img; - -a=1:patch_size/2:size(image, 1); -b=1:patch_size/2:size(image, 2); -row = 1:patch_size/2:a(1,end-1); -col = 1:patch_size/2:b(1,end-1); -A=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single'); -B=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single'); - -files = dir(fullfile('data\WV_predicted', '*.tif')); -files_ordered = natsortfiles({files.name}); -totalFiles = numel(files); - -k=1; -for i=1:size(row,2) - for j=1:size(col,2) - I=imread(fullfile('data\WV_predicted',files_ordered{1,k})); - A(row(i):row(i)+patch_size-1,col(j):col(j)+patch_size-1)=I; - B=max(A,B); - if k~=totalFiles - k=k+1; - else +function mosaic(FileName, FilePath, PredictedPath) + File = fullfile(FilePath,FileName); + [img, R] = geotiffread(File); + + patch_size=800; + + desired_row_size=(patch_size/2)*ceil(size(img,1)/(patch_size/2)); + desired_col_size=(patch_size/2)*ceil(size(img,2)/(patch_size/2)); + + image = zeros(desired_row_size,desired_col_size,size(img,3),'int16'); + image(1:size(img,1),1:size(img,2),:) = img; + + a=1:patch_size/2:size(image, 1); + b=1:patch_size/2:size(image, 2); + row = 1:patch_size/2:a(1,end-1); + col = 1:patch_size/2:b(1,end-1); + A=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single'); + B=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single'); + + files = dir(fullfile(PredictedPath,'data/WV_predicted', '*.tif')); + files_ordered = natsortfiles({files.name}); + totalFiles = numel(files); + + k=1; + for i=1:size(row,2) + for j=1:size(col,2) + I=imread(fullfile(PredictedPath, 'data/WV_predicted', files_ordered{1,k})); + A(row(i):row(i)+patch_size-1,col(j):col(j)+patch_size-1)=I; + B=max(A,B); + if k~=totalFiles + k=k+1; + else + end end end -end - -B(sum(image,3)==0)=0; -xi = [col(1,1)-.5, col(1,end)+patch_size-1+.5]; -yi = [row(1,1)-.5, row(1,end)+patch_size-1+.5]; -[xlimits, ylimits] = intrinsicToWorld(R, xi, yi); -subR = R; -subR.RasterSize = size(B); -subR.XLimWorld = sort(xlimits); -subR.YLimWorld = sort(ylimits); -info = geotiffinfo(File); -writeFileName=[strtok(FileName, '.'),'-mask-predicted.tif']; -geotiffwrite(writeFileName,B,subR,'GeoKeyDirectoryTag',info.GeoTIFFTags.GeoKeyDirectoryTag,'TiffTags',struct('Compression',Tiff.Compression.None)) - - - - - - - - - - + B(sum(image,3)==0)=0; + xi = [col(1,1)-.5, col(1,end)+patch_size-1+.5]; + yi = [row(1,1)-.5, row(1,end)+patch_size-1+.5]; + [xlimits, ylimits] = intrinsicToWorld(R, xi, yi); + subR = R; + subR.RasterSize = size(B); + subR.XLimWorld = sort(xlimits); + subR.YLimWorld = sort(ylimits); + info = geotiffinfo(File); + writeFileName=[strtok(FileName, '.'),'-mask-predicted.tif']; + geotiffwrite(writeFileName,B,subR,'GeoKeyDirectoryTag',info.GeoTIFFTags.GeoKeyDirectoryTag,'TiffTags',struct('Compression',Tiff.Compression.None)) +end \ No newline at end of file diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index 80a77ab..8647a00 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -116,6 +116,32 @@ def generate_pipeline(name, image, image_size, model_name, device): # Add Stage to the Pipeline entk_pipeline.add_stages(stage1) + # Create a Stage object + stage2 = Stage() + stage2.name = '%s-S2' % (name) + # Create Task 1, training + task2 = Task() + task2.name = '%s-T2' % stage2.name + task2.pre_exec = ['module load matlab'] + task2.executable = 'matlab' # Assign executable to the task + # Assign arguments for the task executable + task2.arguments = ["-nodisplay", "-nosplash", "-r", + "mosaic('%s', './', $NODE_LFS_PATH/%s');exit" + % (image.split('/')[-1], task1.name)] + task2.link_input_data = [image] + task2.upload_input_data = [os.path.abspath('../classification/mosaic.m'), + os.path.abspath('../classification/' + + 'natsortfiles.m'), + os.path.abspath('../classification/' + + 'natsort.m')] + task2.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + 'thread_type': 'OpenMP'} + task2.tag = task1.name + + stage2.add_tasks(task2) + # Add Stage to the Pipeline + entk_pipeline.add_stages(stage2) + return entk_pipeline diff --git a/src/utils/multipagetiff.m b/src/utils/multipagetiff.m index 77aad63..aded640 100644 --- a/src/utils/multipagetiff.m +++ b/src/utils/multipagetiff.m @@ -6,10 +6,11 @@ function multipagetiff(ReadImage, WriteDir) -if ~exist(WriteDir, 'dir') - mkdir(WriteDir); -end + if ~exist(WriteDir, 'dir') + mkdir(WriteDir); + end -image = geotiffread(ReadImage); -writeFileName = strcat(WriteDir,'/multipage-',num2str(ReadImage)); -saveastiff(image,writeFileName); + image = geotiffread(ReadImage); + writeFileName = strcat(WriteDir,'/multipage-',num2str(ReadImage)); + saveastiff(image,writeFileName); +end \ No newline at end of file From 8aa062fd739582b8bacb91a4356109bdac9cca66 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Mon, 8 Jul 2019 14:48:36 -0400 Subject: [PATCH 08/31] Pylint and flake8 --- .flake8rc | 2 +- .travis.yml | 1 + src/entk_script/entk_script.py | 57 +++++++++++++++++----------------- src/entk_script/image_disc.py | 2 +- 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/.flake8rc b/.flake8rc index 60c91a7..b820fa8 100644 --- a/.flake8rc +++ b/.flake8rc @@ -1,3 +1,3 @@ [flake8] -ignore = E124, W293, E261, W291, W292, F403, E303, F405 +ignore = E124, W293, E261, W291, W292, F403, E303, F405, W504 max-line-length = 80 diff --git a/.travis.yml b/.travis.yml index 2024dc6..2c89cc3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,6 +27,7 @@ install: # - pip install . # - pip install coverage - pip install keras + - pip intall radical.entk - pip install flake8 - pip install pylint # - pip install codecov diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index 8647a00..fd328fb 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -45,7 +45,7 @@ def generate_discover_pipeline(path): return pipeline -def generate_pipeline(name, image, image_size, model_name, device): +def generate_pipeline(name, image, image_size, device): ''' This function creates a pipeline for an image that will be analyzed. @@ -54,7 +54,6 @@ def generate_pipeline(name, image, image_size, model_name, device): :name: Pipeline name, str :image: image path, str :image_size: image size in MBs, int - :model_name: Prediction Model Name, str :device: Which GPU device will be used by this pipeline, int ''' # Create a Pipeline object @@ -93,12 +92,13 @@ def generate_pipeline(name, image, image_size, model_name, device): 'source activate keras-gpu', 'export CUDA_VISIBLE_DEVICES=%d' % device] task1.executable = 'python3' # Assign executable to the task - # Assign arguments for the task executable task1.arguments = ['predict.py', - '--input', '$NODE_LFS_PATH/%s/multipage-%s' % (task0.name, image.split('/')[-1]), + '--input', '$NODE_LFS_PATH/%s/multipage-%s' % + (task0.name, image.split('/')[-1]), '--output_folder', '$NODE_LFS_PATH/%s' % task1.name] - task1.link_input_data = ['$SHARED/unet_weights.hdf5 > weights/unet_weights.hdf5'] + task1.link_input_data = ['$SHARED/unet_weights.hdf5 >' + + 'weights/unet_weights.hdf5'] task1.upload_input_data = [os.path.abspath('../classification/predict.py'), os.path.abspath('../classification/' + 'gen_patches.py'), @@ -188,44 +188,45 @@ def args_parser(): 'schema': 'gsissh', 'project': args.project, 'queue': args.queue} - + try: # Create Application Manager - appman = AppManager(port=32773, hostname='localhost', name=args.name, + appman = AppManager(port=32773, hostname='localhost', name=args.name, autoterminate=False, write_workflow=True) # Assign resource manager to the Application Manager - appman.resource_desc = res_dict - appman.shared_data = [os.path.abspath('../../models/unet_weights.hdf5')] + appman.resource_desc = res_dict + appman.shared_data = [os.path.abspath('../../models/unet_weights.hdf5')] # Create a task that discovers the dataset - disc_pipeline = generate_discover_pipeline(args.input_dir) - appman.workflow = set([disc_pipeline]) + disc_pipeline = generate_discover_pipeline(args.input_dir) + appman.workflow = set([disc_pipeline]) # Run - appman.run() - print('Run Discovery') - images = pd.read_csv('images.csv') + appman.run() + print('Run Discovery') + images = pd.read_csv('images.csv') - print('Images Found:', len(images)) - # Create a single pipeline per image - pipelines = list() - dev = 0 - for idx in range(0,len(images)): - p1 = generate_pipeline(name='P%s' % idx, + print('Images Found:', len(images)) + # Create a single pipeline per image + pipelines = list() + dev = 0 + for idx in range(0, len(images)): + p1 = generate_pipeline(name='P%s' % idx, image=images['Filename'][idx], image_size=images['Size'][idx], - model_name='test', device=dev) dev = dev ^ 1 pipelines.append(p1) # Assign the workflow as a set of Pipelines to the Application Manager - appman.workflow = set(pipelines) + appman.workflow = set(pipelines) # Run the Application Manager - appman.run() - - print('Done') + appman.run() - #finally: + print('Done') + except Exception as e: + # Something unexpected happened in the code above + print('Caught Error: %s' % e) + finally: # Now that all images have been analyzed, release the resources. - print('Closing resources') - appman.resource_terminate() + print('Closing resources') + appman.resource_terminate() diff --git a/src/entk_script/image_disc.py b/src/entk_script/image_disc.py index 9c0912d..bf8d20c 100644 --- a/src/entk_script/image_disc.py +++ b/src/entk_script/image_disc.py @@ -29,7 +29,7 @@ def image_discovery(path, filename='list.csv', filesize=False): if filesize: dataset_df = pd.DataFrame(columns=['Filename', 'Size']) for filepath in filepaths: - filesize = int(math.ceil(os.path.getsize(filepath)/1024/1024)) + filesize = int(math.ceil(os.path.getsize(filepath) / 1024 / 1024)) dataset_df.loc[len(dataset_df)] = [filepath, filesize] else: dataset_df = pd.DataFrame(columns=['Filename']) From a5ae6aa07f5b572972222bf96a19ed37b4322cc0 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Mon, 8 Jul 2019 15:46:55 -0400 Subject: [PATCH 09/31] Fixing conflicts between branches --- .travis.yml | 2 +- src/classification/mosaic.m | 12 +- src/classification/multipagetiff.m | 29 --- src/classification/saveastiff.m | 303 ----------------------------- src/utils/multipagetiff.m | 9 +- 5 files changed, 19 insertions(+), 336 deletions(-) delete mode 100644 src/classification/multipagetiff.m delete mode 100644 src/classification/saveastiff.m diff --git a/.travis.yml b/.travis.yml index 2c89cc3..5f97bea 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,7 +27,7 @@ install: # - pip install . # - pip install coverage - pip install keras - - pip intall radical.entk + - pip install radical.entk - pip install flake8 - pip install pylint # - pip install codecov diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m index 1f5e67b..fd8b58f 100644 --- a/src/classification/mosaic.m +++ b/src/classification/mosaic.m @@ -5,6 +5,14 @@ function mosaic(FileName, FilePath, PredictedPath) File = fullfile(FilePath,FileName); [img, R] = geotiffread(File); + if isunix + path = 'data/WV_predicted'; + elseif ispc + path = 'data\WV_predicted'; + else + path = '' + disp 'Something went wrong'; + end patch_size=800; @@ -21,14 +29,14 @@ function mosaic(FileName, FilePath, PredictedPath) A=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single'); B=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single'); - files = dir(fullfile(PredictedPath,'data/WV_predicted', '*.tif')); + files = dir(fullfile(PredictedPath, path, '*.tif')); files_ordered = natsortfiles({files.name}); totalFiles = numel(files); k=1; for i=1:size(row,2) for j=1:size(col,2) - I=imread(fullfile(PredictedPath, 'data/WV_predicted', files_ordered{1,k})); + I=imread(fullfile(PredictedPath, path, files_ordered{1,k})); A(row(i):row(i)+patch_size-1,col(j):col(j)+patch_size-1)=I; B=max(A,B); if k~=totalFiles diff --git a/src/classification/multipagetiff.m b/src/classification/multipagetiff.m deleted file mode 100644 index ad2879b..0000000 --- a/src/classification/multipagetiff.m +++ /dev/null @@ -1,29 +0,0 @@ -% Author: Samira Daneshgar-Asl -% License: MIT -% Copyright: 2018-2019 - -% when we have an 8-bit image with 3 bands and we want to save it as a multi-page -clear all -clc - -CurrentDir=pwd; -WriteDir = fullfile(pwd, '8bit-3bands Multi-Page Images'); -if ~exist(WriteDir, 'dir') - mkdir(WriteDir); -end - -ReadDir = fullfile(pwd, '8bit-3bands Images'); -files1 =dir(ReadDir); -files1(1:2) = []; -totalFiles = numel(files1); - -for i =1:totalFiles - Fileaddress{i,1}=strcat(ReadDir,'\',files1(i).name); - file{i} = geotiffread(Fileaddress{i,1}); - cd(CurrentDir) - writeFileName = strcat(WriteDir,'\multipage-',num2str(files1(i).name)); - saveastiff(file{i},writeFileName); - cd(ReadDir) % return to actualFile folder -end - - diff --git a/src/classification/saveastiff.m b/src/classification/saveastiff.m deleted file mode 100644 index 76d4db3..0000000 --- a/src/classification/saveastiff.m +++ /dev/null @@ -1,303 +0,0 @@ -function res = saveastiff(data, path, options) -% options.color -% : true or FALSE -% : If this is true, third dimension should be 3 and the data is saved as a color image. -% options.compress -% : 'no', 'lzw', 'jpeg' or 'adobe'. -% Compression type. -% 'no' : Uncompressed(Default) -% 'lzw' : lossless LZW -% 'jpeg' : lossy JPEG (When using JPEG compression, ImageWidth, -% ImageLength, and RowsPerStrip must be multiples of 16.) -% 'adobe' : lossless Adobe-style -% options.message -% : TRUE or false. -% If this is false, all messages are skipped. -% options.append -% : true or FALSE -% If path is exist, the data is appended to an existing file. -% If path is not exist, this options is ignored. -% options.overwrite -% : true or FALSE -% Overwrite to an existing file. -% options.big -% : true or FALSE, -% Use 64 bit addressing and allows for files > 4GB -% -% Defalut value of 'options' is -% options.color = false; -% options.compress = 'no'; -% options.message = true; -% options.append = false; -% options.overwrite = false; -% options.big = false; -% -% res : Return value. It is 0 when the function is finished with no error. -% If an error is occured in the function, it will have a positive -% number (error code). -% -% Copyright (c) 2012, YoonOh Tak -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions are -% met: -% -% * Redistributions of source code must retain the above copyright -% notice, this list of conditions and the following disclaimer. -% * Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in -% the documentation and/or other materials provided with the distribution -% * Neither the name of the Gwangju Institute of Science and Technology (GIST), Republic of Korea nor the names -% of its contributors may be used to endorse or promote products derived -% from this software without specific prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -% ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -% LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -% POSSIBILITY OF SUCH DAMAGE. - -tStart = tic; -errcode = 0; -try -%% Init options parameter -if nargin < 3 % Use default options - options.color = false; - options.compress = 'no'; - options.message = true; - options.append = false; - options.overwrite = false; -end -if ~isfield(options, 'message'), options.message = true; end -if ~isfield(options, 'append'), options.append = false; end -if ~isfield(options, 'compress'), options.compress = 'no'; end -if ~isfield(options, 'color'), options.color = false; end -if ~isfield(options, 'overwrite'), options.overwrite = false; end -if isfield(options, 'big') == 0, options.big = false; end - -if isempty(data), errcode = 1; assert(false); end -if (options.color == false && ndims(data) > 3) || ... - (options.color == true && ndims(data) > 4) - % Maximum dimension of a grayscale image is 3 of [height, width, frame] - % Maximum dimension of a color image is 4 of [height, width, color, frame] - errcode = 2; assert(false); -end - -%% Get image informations -% http://www.awaresystems.be/imaging/tiff/tifftags/photometricinterpretation.html -if ~options.color - if ndims(data) >= 4, errcode = 2; assert(false); end; - [height, width, depth] = size(data); - tagstruct.Photometric = Tiff.Photometric.MinIsBlack; -% tagstruct.Photometric = Tiff.Photometric.MinIsWhite; -% tagstruct.Photometric = Tiff.Photometric.Mask; -% tagstruct.Photometric = Tiff.Photometric.Separated; -else - if ndims(data) >= 5, errcode = 2; assert(false); end; - [height, width, cc, depth] = size(data); % cc: color channels. 3: rgb, 4: rgb with alpha channel - if cc ~= 3 && cc ~= 4, errcode = 3; assert(false); end; - tagstruct.Photometric = Tiff.Photometric.RGB; -% tagstruct.Photometric = Tiff.Photometric.CIELab; -% tagstruct.Photometric = Tiff.Photometric.ICCLab; -% tagstruct.Photometric = Tiff.Photometric.ITULab; -% (Unsupported)tagstruct.Photometric = Tiff.Photometric.Palette; -% (Unsupported)tagstruct.Photometric = Tiff.Photometric.YCbCr; -end -tagstruct.ImageLength = height; -tagstruct.ImageWidth = width; -tagstruct.PlanarConfiguration = Tiff.PlanarConfiguration.Chunky; % (RGB RGB,RGB RGB,RGB RGB), http://www.awaresystems.be/imaging/tiff/tifftags/planarconfiguration.html -% (Unsupported)tagstruct.PlanarConfiguration = Tiff.PlanarConfiguration.Separate; % (RRR RRR, GGG GGG, BBB BBB), % http://www.awaresystems.be/imaging/tiff/tifftags/planarconfiguration.html - -%% Complex number -% http://www.awaresystems.be/imaging/tiff/tifftags/samplesperpixel.html -if ~options.color && isreal(data) % Grayscale image with real numbers - tagstruct.SamplesPerPixel = 1; - data = reshape(data, height, width, 1, depth); -elseif ~options.color && ~isreal(data) % Grayscale image with complex numbers - tagstruct.SamplesPerPixel = 2; - data = reshape([real(data) imag(data)], height, width, 2, depth); -elseif options.color && isreal(data) % Color image with real numbers - tagstruct.SamplesPerPixel = cc; - if cc == 4 - tagstruct.ExtraSamples = Tiff.ExtraSamples.AssociatedAlpha; % The forth channel is alpha channel - end - data = reshape(data, height, width, cc, depth); -elseif options.color && ~isreal(data) % Color image with complex numbers - tagstruct.SamplesPerPixel = cc * 2; - if cc == 3 - tagstruct.ExtraSamples = repmat(Tiff.ExtraSamples.Unspecified, 1, 3); % 3(real)+3(imag) = 6 = 3(rgb) + 3(Extra) - else - tagstruct.ExtraSamples = repmat(Tiff.ExtraSamples.Unspecified, 1, 5); % 4(real)+4(imag) = 8 = 3(rgb) + 5(Extra) - end - data = reshape([real(data) imag(data)], height, width, cc*2, depth); -end - -%% Image compression -% http://www.awaresystems.be/imaging/tiff/tifftags/compression.html -switch lower(options.compress) - case 'no' - tagstruct.Compression = Tiff.Compression.None; - case 'lzw' - tagstruct.Compression = Tiff.Compression.LZW; - case 'jpeg' - tagstruct.Compression = Tiff.Compression.JPEG; - case 'adobe' - tagstruct.Compression = Tiff.Compression.AdobeDeflate; - otherwise - % Use tag nubmer in http://www.awaresystems.be/imaging/tiff/tifftags/compression.html - tagstruct.Compression = options.compress; -end - -%% Sample format -% http://www.awaresystems.be/imaging/tiff/tifftags/sampleformat.html -switch class(data) - % Unsupported Matlab data type: char, logical, cell, struct, function_handle, class. - case {'uint8', 'uint16', 'uint32'} - tagstruct.SampleFormat = Tiff.SampleFormat.UInt; - case {'int8', 'int16', 'int32'} - tagstruct.SampleFormat = Tiff.SampleFormat.Int; - if options.color - errcode = 4; assert(false); - end - case {'single', 'double', 'uint64', 'int64'} - tagstruct.SampleFormat = Tiff.SampleFormat.IEEEFP; - otherwise - % (Unsupported)Void, ComplexInt, ComplexIEEEFP - errcode = 5; assert(false); -end - -%% Bits per sample -% http://www.awaresystems.be/imaging/tiff/tifftags/bitspersample.html -switch class(data) - case {'uint8', 'int8'} - tagstruct.BitsPerSample = 8; - case {'uint16', 'int16'} - tagstruct.BitsPerSample = 16; - case {'uint32', 'int32'} - tagstruct.BitsPerSample = 32; - case {'single'} - tagstruct.BitsPerSample = 32; - case {'double', 'uint64', 'int64'} - tagstruct.BitsPerSample = 64; - otherwise - errcode = 5; assert(false); -end - -%% Rows per strip -maxstripsize = 8*1024; -tagstruct.RowsPerStrip = ceil(maxstripsize/(width*(tagstruct.BitsPerSample/8)*size(data,3))); % http://www.awaresystems.be/imaging/tiff/tifftags/rowsperstrip.html -if tagstruct.Compression == Tiff.Compression.JPEG - tagstruct.RowsPerStrip = max(16,round(tagstruct.RowsPerStrip/16)*16); -end - -%% Overwrite check -if exist(path, 'file') && ~options.append - if ~options.overwrite - errcode = 6; assert(false); - end -end - -%% Save path configuration -path_parent = pwd; -[pathstr, fname, fext] = fileparts(path); -if ~isempty(pathstr) - if ~exist(pathstr, 'dir') - mkdir(pathstr); - end - cd(pathstr); -end - -%% Write image data to a file -file_opening_error_count = 0; -while ~exist('tfile', 'var') - try - if ~options.append % Make a new file - s=whos('data'); - if s.bytes > 2^32-1 || options.big - tfile = Tiff([fname, fext], 'w8'); % Big Tiff file - else - tfile = Tiff([fname, fext], 'w'); - end - else - if ~exist([fname, fext], 'file') % Make a new file - s=whos('data'); - if s.bytes > 2^32-1 || options.big - tfile = Tiff([fname, fext], 'w8'); % Big Tiff file - else - tfile = Tiff([fname, fext], 'w'); - end - else % Append to an existing file - tfile = Tiff([fname, fext], 'r+'); - while ~tfile.lastDirectory(); % Append a new image to the last directory of an exiting file - tfile.nextDirectory(); - end - tfile.writeDirectory(); - end - end - catch - file_opening_error_count = file_opening_error_count + 1; - pause(0.1); - if file_opening_error_count > 5 % automatically retry to open for 5 times. - reply = input('Failed to open the file. Do you wish to retry? Y/n: ', 's'); - if isempty(reply) || any(upper(reply) == 'Y') - file_opening_error_count = 0; - else - errcode = 7; - assert(false); - end - end - end -end - -for d = 1:depth - tfile.setTag(tagstruct); - tfile.write(data(:, :, :, d)); - if d ~= depth - tfile.writeDirectory(); - end -end - -tfile.close(); -if exist('path_parent', 'var'), cd(path_parent); end - -tElapsed = toc(tStart); -if options.message - display(sprintf('The file was saved successfully. Elapsed time : %.3f s.', tElapsed)); -end - -catch exception -%% Exception management - if exist('tfile', 'var'), tfile.close(); end - switch errcode - case 1 - if options.message, error '''data'' is empty.'; end; - case 2 - if options.message, error 'Data dimension is too large.'; end; - case 3 - if options.message, error 'Third dimesion (color depth) should be 3 or 4.'; end; - case 4 - if options.message, error 'Color image cannot have int8, int16 or int32 format.'; end; - case 5 - if options.message, error 'Unsupported Matlab data type. (char, logical, cell, struct, function_handle, class)'; end; - case 6 - if options.message, error 'File already exists.'; end; - case 7 - if options.message, error(['Failed to open the file ''' path '''.']); end; - otherwise - if exist('fname', 'var') && exist('fext', 'var') - delete([fname fext]); - end - if exist('path_parent', 'var'), cd(path_parent); end - rethrow(exception); - end - if exist('path_parent', 'var'), cd(path_parent); end -end -res = errcode; -end \ No newline at end of file diff --git a/src/utils/multipagetiff.m b/src/utils/multipagetiff.m index aded640..d4826de 100644 --- a/src/utils/multipagetiff.m +++ b/src/utils/multipagetiff.m @@ -11,6 +11,13 @@ function multipagetiff(ReadImage, WriteDir) end image = geotiffread(ReadImage); - writeFileName = strcat(WriteDir,'/multipage-',num2str(ReadImage)); + if isunix + writeFileName = strcat(WriteDir,'/multipage-',num2str(ReadImage)); + elseif ispc + writeFileName = strcat(WriteDir,'\multipage-',num2str(ReadImage)); + else + disp 'Something went wrong'; + end + saveastiff(image,writeFileName); end \ No newline at end of file From 5a349f3a89021f03b2cc7fabcf205ab9f758dadc Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Mon, 8 Jul 2019 16:05:34 -0400 Subject: [PATCH 10/31] Linting fixes --- src/classification/predict.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/classification/predict.py b/src/classification/predict.py index c500d31..285e0ff 100644 --- a/src/classification/predict.py +++ b/src/classification/predict.py @@ -12,6 +12,7 @@ from train_unet import weights_path, get_model, normalize, PATCH_SZ, N_CLASSES + def predict(x, model, patch_sz=160, n_classes=2): img_height = x.shape[0] img_width = x.shape[1] @@ -50,12 +51,14 @@ def predict(x, model, patch_sz=160, n_classes=2): prediction[x0:x1, y0:y1, :] = patches_predict[k, :, :, :] return prediction[:img_height, :img_width, :] + # generating sliding window def sliding_window(img, stepSize, windowSize): for y in range(0, img.shape[0], stepSize): for x in range(0, img.shape[1], stepSize): yield (x, y, img[y:y + windowSize, x:x + windowSize, :]) - + + def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', type=str, @@ -69,7 +72,7 @@ def main(): outPath = args.output_folder + "data/WV_predicted" if not os.path.exists(outPath): os.makedirs(outPath) - + image = normalize(tiff.imread(args.input).transpose([1, 2, 0])) wind_row, wind_col = 800, 800 # dimensions of the image windowSize = 800 @@ -85,7 +88,7 @@ def main(): if window.shape[0] != wind_row or window.shape[1] != wind_col: continue # the image which has to be predicted - t_img = img[y:y+wind_row, x:x+wind_col, :] + t_img = img[y:y + wind_row, x:x + wind_col, :] mask = predict(t_img, model, patch_sz=PATCH_SZ, n_classes=N_CLASSES).transpose([2, 0, 1]) cnt = str(i) @@ -93,6 +96,7 @@ def main(): fullpath = os.path.join(outPath, imagename) tiff.imsave(fullpath, mask) i += 1 - + + if __name__ == '__main__': main() \ No newline at end of file From 11b92ef636c15b75c455f7f22059e8402b9184d9 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Wed, 18 Sep 2019 14:02:21 -0400 Subject: [PATCH 11/31] Fixing mosaic based on Samira's comments --- src/classification/mosaic.m | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m index fd8b58f..81c255f 100644 --- a/src/classification/mosaic.m +++ b/src/classification/mosaic.m @@ -2,7 +2,7 @@ % License: MIT % Copyright: 2018-2019 -function mosaic(FileName, FilePath, PredictedPath) +function mosaic(FileName, FilePath, WriteDir) File = fullfile(FilePath,FileName); [img, R] = geotiffread(File); if isunix @@ -10,7 +10,7 @@ function mosaic(FileName, FilePath, PredictedPath) elseif ispc path = 'data\WV_predicted'; else - path = '' + path = ''; disp 'Something went wrong'; end @@ -29,14 +29,14 @@ function mosaic(FileName, FilePath, PredictedPath) A=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single'); B=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single'); - files = dir(fullfile(PredictedPath, path, '*.tif')); + files = dir(fullfile(WriteDir, path, '*.tif')); files_ordered = natsortfiles({files.name}); totalFiles = numel(files); k=1; for i=1:size(row,2) for j=1:size(col,2) - I=imread(fullfile(PredictedPath, path, files_ordered{1,k})); + I=imread(fullfile(WriteDir, path, files_ordered{1,k})); A(row(i):row(i)+patch_size-1,col(j):col(j)+patch_size-1)=I; B=max(A,B); if k~=totalFiles From 3261fdd7f9724b3500dec4cb01aa1eda7a27bfeb Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Wed, 18 Sep 2019 14:07:46 -0400 Subject: [PATCH 12/31] Fixing output flag --- src/classification/predict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/classification/predict.py b/src/classification/predict.py index 285e0ff..731b1a7 100644 --- a/src/classification/predict.py +++ b/src/classification/predict.py @@ -63,7 +63,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', type=str, help='Input Multipage Image') - parser.add_argument('-o', '----output_folder', type=str, + parser.add_argument('-o', '--output_folder', type=str, help='Path where output will be stored.') args = parser.parse_args() From 274358a996bd70a82601b6ba246d348ae6d5ea45 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Wed, 16 Oct 2019 15:18:20 -0400 Subject: [PATCH 13/31] Updating files based on comments --- src/classification/mosaic.m | 8 ++++++-- src/classification/predict.py | 4 ++-- src/entk_script/entk_script.py | 11 ++++------- src/utils/multipagetiff.m | 26 ++++++++++++++++---------- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m index 81c255f..e813a9b 100644 --- a/src/classification/mosaic.m +++ b/src/classification/mosaic.m @@ -6,14 +6,18 @@ function mosaic(FileName, FilePath, WriteDir) File = fullfile(FilePath,FileName); [img, R] = geotiffread(File); if isunix - path = 'data/WV_predicted'; + path = strcat('data/predicted_tiles',strtok(FileName, '.')); elseif ispc - path = 'data\WV_predicted'; + path = strcat('data\predicted_tiles',strtok(FileName, '.')); else path = ''; disp 'Something went wrong'; end + if ~exist(WriteDir, 'dir') + mkdir(WriteDir); + end + patch_size=800; desired_row_size=(patch_size/2)*ceil(size(img,1)/(patch_size/2)); diff --git a/src/classification/predict.py b/src/classification/predict.py index 731b1a7..cbdc47a 100644 --- a/src/classification/predict.py +++ b/src/classification/predict.py @@ -62,8 +62,8 @@ def sliding_window(img, stepSize, windowSize): def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', type=str, - help='Input Multipage Image') - parser.add_argument('-o', '--output_folder', type=str, + help='Input Multipage Image', required=True) + parser.add_argument('-o', '--output_folder', type=str, default='./' help='Path where output will be stored.') args = parser.parse_args() diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index fd328fb..af3765d 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -45,7 +45,7 @@ def generate_discover_pipeline(path): return pipeline -def generate_pipeline(name, image, image_size, device): +def generate_pipeline(name, image, image_size): ''' This function creates a pipeline for an image that will be analyzed. @@ -89,8 +89,7 @@ def generate_pipeline(name, image, image_size, device): task1 = Task() task1.name = '%s-T1' % stage1.name task1.pre_exec = ['module load anaconda3/2019.03', - 'source activate keras-gpu', - 'export CUDA_VISIBLE_DEVICES=%d' % device] + 'source activate keras-gpu'] task1.executable = 'python3' # Assign executable to the task # Assign arguments for the task executable task1.arguments = ['predict.py', @@ -208,13 +207,11 @@ def args_parser(): print('Images Found:', len(images)) # Create a single pipeline per image pipelines = list() - dev = 0 + for idx in range(0, len(images)): p1 = generate_pipeline(name='P%s' % idx, image=images['Filename'][idx], - image_size=images['Size'][idx], - device=dev) - dev = dev ^ 1 + image_size=images['Size'][idx]) pipelines.append(p1) # Assign the workflow as a set of Pipelines to the Application Manager appman.workflow = set(pipelines) diff --git a/src/utils/multipagetiff.m b/src/utils/multipagetiff.m index d4826de..e5169b6 100644 --- a/src/utils/multipagetiff.m +++ b/src/utils/multipagetiff.m @@ -4,20 +4,26 @@ % when we have an 8-bit image with 3 bands and we want to save it as a multi-page -function multipagetiff(ReadImage, WriteDir) +function multipagetiff(ReadDir, WriteDir) if ~exist(WriteDir, 'dir') mkdir(WriteDir); end - image = geotiffread(ReadImage); - if isunix - writeFileName = strcat(WriteDir,'/multipage-',num2str(ReadImage)); - elseif ispc - writeFileName = strcat(WriteDir,'\multipage-',num2str(ReadImage)); - else - disp 'Something went wrong'; - end + image_files = dir(fullfile(ReadDir, '*.tif')); + totalFiles = numel(image_files); + + for i =1:totalFiles + ReadImage = image_files(i).name; + if isunix + image = geotiffread(strcat(ReadDir,'/',ReadImage)); + writeFileName = strcat(WriteDir,'/',strtok(ReadImage,'.'), '-multipage.tif'); + elseif ispc + image = geotiffread(strcat(ReadDir,'\',ReadImage)); + writeFileName = strcat(WriteDir,'\',strtok(ReadImage,'.'), '-multipage.tif'); + else + disp 'Something went wrong'; + end - saveastiff(image,writeFileName); + saveastiff(image,writeFileName); end \ No newline at end of file From 85d44665624d6c8e1ad71fa29cb40ca5e43bbeef Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Wed, 16 Oct 2019 15:51:49 -0400 Subject: [PATCH 14/31] More updates --- src/classification/mosaic.m | 2 +- src/classification/predict.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m index e813a9b..107a82c 100644 --- a/src/classification/mosaic.m +++ b/src/classification/mosaic.m @@ -59,6 +59,6 @@ function mosaic(FileName, FilePath, WriteDir) subR.XLimWorld = sort(xlimits); subR.YLimWorld = sort(ylimits); info = geotiffinfo(File); - writeFileName=[strtok(FileName, '.'),'-mask-predicted.tif']; + writeFileName=fullfile(WriteDir,strcat(strtok(FileName, '.'),'-mask-predicted.tif')); geotiffwrite(writeFileName,B,subR,'GeoKeyDirectoryTag',info.GeoTIFFTags.GeoKeyDirectoryTag,'TiffTags',struct('Compression',Tiff.Compression.None)) end \ No newline at end of file diff --git a/src/classification/predict.py b/src/classification/predict.py index cbdc47a..5a341ae 100644 --- a/src/classification/predict.py +++ b/src/classification/predict.py @@ -62,14 +62,18 @@ def sliding_window(img, stepSize, windowSize): def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', type=str, - help='Input Multipage Image', required=True) + help='Path and Filename of the 3-Band Multipage WV \ + Image', required=True) parser.add_argument('-o', '--output_folder', type=str, default='./' help='Path where output will be stored.') args = parser.parse_args() model = get_model() model.load_weights(weights_path) - outPath = args.output_folder + "data/WV_predicted" + head, tail = os.path.split(args.input) + getName = tail.split('-multipage.tif') + outPath = args.output_folder + "data/predicted_tiles/" + getName[0] + if not os.path.exists(outPath): os.makedirs(outPath) From 48265f8d7b4ff52569891eb1bc01637d3aaf5438 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Fri, 14 Feb 2020 12:02:39 -0600 Subject: [PATCH 15/31] Update src/classification/predict.py Co-Authored-By: samiradaneshgar <43654320+samiradaneshgar@users.noreply.github.com> --- src/classification/predict.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/classification/predict.py b/src/classification/predict.py index 5a341ae..424b441 100644 --- a/src/classification/predict.py +++ b/src/classification/predict.py @@ -64,7 +64,8 @@ def main(): parser.add_argument('-i', '--input', type=str, help='Path and Filename of the 3-Band Multipage WV \ Image', required=True) - parser.add_argument('-o', '--output_folder', type=str, default='./' + parser.add_argument('-o', '--output_folder', type=str, default='./', + help='Path where output will be stored.') args = parser.parse_args() @@ -103,4 +104,4 @@ def main(): if __name__ == '__main__': - main() \ No newline at end of file + main() From 109d2e101e36f3017ebcd81e641836fc018b6087 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Fri, 14 Feb 2020 12:03:05 -0600 Subject: [PATCH 16/31] Update src/utils/multipagetiff.m Co-Authored-By: samiradaneshgar <43654320+samiradaneshgar@users.noreply.github.com> --- src/utils/multipagetiff.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/multipagetiff.m b/src/utils/multipagetiff.m index e5169b6..9b2a35f 100644 --- a/src/utils/multipagetiff.m +++ b/src/utils/multipagetiff.m @@ -25,5 +25,5 @@ function multipagetiff(ReadDir, WriteDir) disp 'Something went wrong'; end - saveastiff(image,writeFileName); -end \ No newline at end of file + saveastiff(image,char(writeFileName)); +end From 1580a7150a50a1214540764faef90999c6ddbf31 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Fri, 14 Feb 2020 12:12:12 -0600 Subject: [PATCH 17/31] Update src/classification/mosaic.m Co-Authored-By: samiradaneshgar <43654320+samiradaneshgar@users.noreply.github.com> --- src/classification/mosaic.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m index 107a82c..f73483c 100644 --- a/src/classification/mosaic.m +++ b/src/classification/mosaic.m @@ -23,7 +23,7 @@ function mosaic(FileName, FilePath, WriteDir) desired_row_size=(patch_size/2)*ceil(size(img,1)/(patch_size/2)); desired_col_size=(patch_size/2)*ceil(size(img,2)/(patch_size/2)); - image = zeros(desired_row_size,desired_col_size,size(img,3),'int16'); + image = zeros(desired_row_size,desired_col_size,size(img,3),'int8'); image(1:size(img,1),1:size(img,2),:) = img; a=1:patch_size/2:size(image, 1); @@ -61,4 +61,4 @@ function mosaic(FileName, FilePath, WriteDir) info = geotiffinfo(File); writeFileName=fullfile(WriteDir,strcat(strtok(FileName, '.'),'-mask-predicted.tif')); geotiffwrite(writeFileName,B,subR,'GeoKeyDirectoryTag',info.GeoTIFFTags.GeoKeyDirectoryTag,'TiffTags',struct('Compression',Tiff.Compression.None)) -end \ No newline at end of file +end From babd811fe74cfbe9fc5c03029bbac194a4996b6e Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Fri, 14 Feb 2020 12:12:42 -0600 Subject: [PATCH 18/31] Update src/classification/mosaic.m Co-Authored-By: samiradaneshgar <43654320+samiradaneshgar@users.noreply.github.com> --- src/classification/mosaic.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m index f73483c..63f1f8e 100644 --- a/src/classification/mosaic.m +++ b/src/classification/mosaic.m @@ -8,7 +8,7 @@ function mosaic(FileName, FilePath, WriteDir) if isunix path = strcat('data/predicted_tiles',strtok(FileName, '.')); elseif ispc - path = strcat('data\predicted_tiles',strtok(FileName, '.')); + path = strcat('data\predicted_tiles\',strtok(FileName, '.')); else path = ''; disp 'Something went wrong'; From 8456036b92f59589afb01dc9e48014643421ad79 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Fri, 14 Feb 2020 12:12:57 -0600 Subject: [PATCH 19/31] Update src/classification/mosaic.m Co-Authored-By: samiradaneshgar <43654320+samiradaneshgar@users.noreply.github.com> --- src/classification/mosaic.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m index 63f1f8e..16768b0 100644 --- a/src/classification/mosaic.m +++ b/src/classification/mosaic.m @@ -6,7 +6,7 @@ function mosaic(FileName, FilePath, WriteDir) File = fullfile(FilePath,FileName); [img, R] = geotiffread(File); if isunix - path = strcat('data/predicted_tiles',strtok(FileName, '.')); + path = strcat('data/predicted_tiles/',strtok(FileName, '.')); elseif ispc path = strcat('data\predicted_tiles\',strtok(FileName, '.')); else From 53afdcb942e317a4b99cf0d080cdc935db355f30 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Fri, 14 Feb 2020 12:13:06 -0600 Subject: [PATCH 20/31] Update src/classification/mosaic.m Co-Authored-By: samiradaneshgar <43654320+samiradaneshgar@users.noreply.github.com> --- src/classification/mosaic.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m index 16768b0..8ea4277 100644 --- a/src/classification/mosaic.m +++ b/src/classification/mosaic.m @@ -40,7 +40,7 @@ function mosaic(FileName, FilePath, WriteDir) k=1; for i=1:size(row,2) for j=1:size(col,2) - I=imread(fullfile(WriteDir, path, files_ordered{1,k})); + I=imread(fullfile(path, files_ordered{1,k})); A(row(i):row(i)+patch_size-1,col(j):col(j)+patch_size-1)=I; B=max(A,B); if k~=totalFiles From 8d3401c3883cc9c6d8f957fb0c82ddc67a4fda90 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Fri, 14 Feb 2020 12:13:12 -0600 Subject: [PATCH 21/31] Update src/classification/mosaic.m Co-Authored-By: samiradaneshgar <43654320+samiradaneshgar@users.noreply.github.com> --- src/classification/mosaic.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m index 8ea4277..70a0e36 100644 --- a/src/classification/mosaic.m +++ b/src/classification/mosaic.m @@ -33,7 +33,7 @@ function mosaic(FileName, FilePath, WriteDir) A=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single'); B=zeros(a(1,end)+(patch_size/2)-1,b(1,end)+(patch_size/2)-1,'single'); - files = dir(fullfile(WriteDir, path, '*.tif')); + files = dir(fullfile(path, '*.tif')); files_ordered = natsortfiles({files.name}); totalFiles = numel(files); From c81a713462e3b611f8fd24623db194256ad77719 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Fri, 14 Feb 2020 12:13:24 -0600 Subject: [PATCH 22/31] Update src/classification/predict.py Co-Authored-By: samiradaneshgar <43654320+samiradaneshgar@users.noreply.github.com> --- src/classification/predict.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/classification/predict.py b/src/classification/predict.py index 424b441..0f14b9e 100644 --- a/src/classification/predict.py +++ b/src/classification/predict.py @@ -97,7 +97,8 @@ def main(): mask = predict(t_img, model, patch_sz=PATCH_SZ, n_classes=N_CLASSES).transpose([2, 0, 1]) cnt = str(i) - imagename = "image" + cnt + ".tif" + imagename = cnt + ".tif" + fullpath = os.path.join(outPath, imagename) tiff.imsave(fullpath, mask) i += 1 From 942c324f6bb3425daa69cbd02da95a1c8ed7b1de Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Thu, 27 Feb 2020 13:47:15 -0500 Subject: [PATCH 23/31] Updating EnTK pipeline for testing --- src/classification/mosaic.m | 6 +++--- src/classification/predict.py | 27 +++++++++++---------------- src/entk_script/entk_script.py | 5 +++-- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/classification/mosaic.m b/src/classification/mosaic.m index 70a0e36..3d8ca55 100644 --- a/src/classification/mosaic.m +++ b/src/classification/mosaic.m @@ -2,13 +2,13 @@ % License: MIT % Copyright: 2018-2019 -function mosaic(FileName, FilePath, WriteDir) +function mosaic(FileName, FilePath, PredictedPath, WriteDir) File = fullfile(FilePath,FileName); [img, R] = geotiffread(File); if isunix - path = strcat('data/predicted_tiles/',strtok(FileName, '.')); + path = strcat(PredictedPath, '/data/predicted_tiles/', strtok(FileName, '.')); elseif ispc - path = strcat('data\predicted_tiles\',strtok(FileName, '.')); + path = strcat(PredictedPath, '\data\predicted_tiles\', strtok(FileName, '.')); else path = ''; disp 'Something went wrong'; diff --git a/src/classification/predict.py b/src/classification/predict.py index 0f14b9e..7ab6379 100644 --- a/src/classification/predict.py +++ b/src/classification/predict.py @@ -10,8 +10,7 @@ import numpy as np import tifffile as tiff -from train_unet import weights_path, get_model, normalize, PATCH_SZ, N_CLASSES - +from train_unet import get_model, normalize, PATCH_SZ, N_CLASSES def predict(x, model, patch_sz=160, n_classes=2): img_height = x.shape[0] @@ -51,36 +50,34 @@ def predict(x, model, patch_sz=160, n_classes=2): prediction[x0:x1, y0:y1, :] = patches_predict[k, :, :, :] return prediction[:img_height, :img_width, :] - # generating sliding window def sliding_window(img, stepSize, windowSize): for y in range(0, img.shape[0], stepSize): for x in range(0, img.shape[1], stepSize): yield (x, y, img[y:y + windowSize, x:x + windowSize, :]) - def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', type=str, - help='Path and Filename of the 3-Band Multipage WV \ - Image', required=True) + help='Path and Filename of the 3-Band Multipage WV \ Image', required=True) + parser.add_argument('-w', '--weights_path', type=str, + help='Path to the weights') parser.add_argument('-o', '--output_folder', type=str, default='./', + help='Path where output will be stored.') - help='Path where output will be stored.') args = parser.parse_args() model = get_model() - model.load_weights(weights_path) + model.load_weights(args.weights_path) head, tail = os.path.split(args.input) getName = tail.split('-multipage.tif') - outPath = args.output_folder + "data/predicted_tiles/" + getName[0] - + outPath = args.output_folder + "data/predicted_tiles/"+ getName[0] if not os.path.exists(outPath): os.makedirs(outPath) image = normalize(tiff.imread(args.input).transpose([1, 2, 0])) wind_row, wind_col = 800, 800 # dimensions of the image - windowSize = 800 + windowSize = 800 stepSize = 400 desired_row_size = stepSize * math.ceil(image.shape[0] / stepSize) @@ -95,14 +92,12 @@ def main(): # the image which has to be predicted t_img = img[y:y + wind_row, x:x + wind_col, :] mask = predict(t_img, model, patch_sz=PATCH_SZ, - n_classes=N_CLASSES).transpose([2, 0, 1]) + n_classes=N_CLASSES).transpose([2, 0, 1]) cnt = str(i) imagename = cnt + ".tif" - fullpath = os.path.join(outPath, imagename) tiff.imsave(fullpath, mask) i += 1 - -if __name__ == '__main__': - main() +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index af3765d..33e23b1 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -69,7 +69,7 @@ def generate_pipeline(name, image, image_size): task0.executable = 'matlab' # Assign executable to the task # Assign arguments for the task executable task0.arguments = ["-nodisplay", "-nosplash", "-r", - "multipagetiff('%s','$NODE_LFS_PATH/%s');exit" + "multipagetiff('./','$NODE_LFS_PATH/%s');exit" % (image.split('/')[-1], task0.name)] task0.upload_input_data = [os.path.abspath('../utils/multipagetiff.m'), os.path.abspath('../utils/saveastiff.m')] @@ -95,7 +95,8 @@ def generate_pipeline(name, image, image_size): task1.arguments = ['predict.py', '--input', '$NODE_LFS_PATH/%s/multipage-%s' % (task0.name, image.split('/')[-1]), - '--output_folder', '$NODE_LFS_PATH/%s' % task1.name] + '--output_folder', '$NODE_LFS_PATH/%s' % task1.name, + '--weights_path', 'weights/unet_weights.hdf5'] task1.link_input_data = ['$SHARED/unet_weights.hdf5 >' + 'weights/unet_weights.hdf5'] task1.upload_input_data = [os.path.abspath('../classification/predict.py'), From 1e4b0b76c6f37805ea283cb40c2dbf25839300a4 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Thu, 5 Mar 2020 12:10:06 -0500 Subject: [PATCH 24/31] Finalizing EnTK pipeline --- src/entk_script/entk_script.py | 36 ++++++++++++++++------------------ 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index 33e23b1..5dae0e3 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -13,6 +13,7 @@ import argparse import os import pandas as pd +import time from radical.entk import Pipeline, Stage, Task, AppManager @@ -36,7 +37,7 @@ def generate_discover_pipeline(path): '--filesize'] task.download_output_data = ['images.csv'] task.upload_input_data = ['image_disc.py'] - task.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + task.cpu_reqs = {'processes': 1, 'threads_per_process': 1,'process_type': None, 'thread_type': 'OpenMP'} stage.add_tasks(task) # Add Stage to the Pipeline @@ -61,7 +62,7 @@ def generate_pipeline(name, image, image_size): entk_pipeline.name = name # Create a Stage object stage0 = Stage() - stage0.name = '%s-S0' % (name) + stage0.name = '%s-S0' % name # Create Task 1, training task0 = Task() task0.name = '%s-T0' % stage0.name @@ -70,21 +71,20 @@ def generate_pipeline(name, image, image_size): # Assign arguments for the task executable task0.arguments = ["-nodisplay", "-nosplash", "-r", "multipagetiff('./','$NODE_LFS_PATH/%s');exit" - % (image.split('/')[-1], task0.name)] + % (task0.name)] task0.upload_input_data = [os.path.abspath('../utils/multipagetiff.m'), os.path.abspath('../utils/saveastiff.m')] task0.link_input_data = [image] - task0.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + task0.cpu_reqs = {'processes': 1, 'threads_per_process': 1,'process_type': None, 'thread_type': 'OpenMP'} - task0.lfs_per_process = image_size + task0.lfs_per_process = int(image_size) stage0.add_tasks(task0) # Add Stage to the Pipeline entk_pipeline.add_stages(stage0) - # Create a Stage object stage1 = Stage() - stage1.name = '%s-S1' % (name) + stage1.name = '%s-S1' % name # Create Task 1, training task1 = Task() task1.name = '%s-T1' % stage1.name @@ -93,9 +93,9 @@ def generate_pipeline(name, image, image_size): task1.executable = 'python3' # Assign executable to the task # Assign arguments for the task executable task1.arguments = ['predict.py', - '--input', '$NODE_LFS_PATH/%s/multipage-%s' % - (task0.name, image.split('/')[-1]), - '--output_folder', '$NODE_LFS_PATH/%s' % task1.name, + '--input', '$NODE_LFS_PATH/%s/%s-multipage.tif' % + (task0.name, image.split('/')[-1].split('.')[0]), + '--output_folder', '$NODE_LFS_PATH/%s/' % task1.name, '--weights_path', 'weights/unet_weights.hdf5'] task1.link_input_data = ['$SHARED/unet_weights.hdf5 >' + 'weights/unet_weights.hdf5'] @@ -106,16 +106,15 @@ def generate_pipeline(name, image, image_size): 'train_unet.py'), os.path.abspath('../classification/' + 'unet_model.py')] - task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1,'process_type': None, 'thread_type': 'OpenMP'} - task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1, + task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1,'process_type': None, 'thread_type': 'OpenMP'} task1.tag = task0.name stage1.add_tasks(task1) # Add Stage to the Pipeline entk_pipeline.add_stages(stage1) - # Create a Stage object stage2 = Stage() stage2.name = '%s-S2' % (name) @@ -126,7 +125,7 @@ def generate_pipeline(name, image, image_size): task2.executable = 'matlab' # Assign executable to the task # Assign arguments for the task executable task2.arguments = ["-nodisplay", "-nosplash", "-r", - "mosaic('%s', './', $NODE_LFS_PATH/%s');exit" + "mosaic('%s', './', '$NODE_LFS_PATH/%s','./');exit" % (image.split('/')[-1], task1.name)] task2.link_input_data = [image] task2.upload_input_data = [os.path.abspath('../classification/mosaic.m'), @@ -134,14 +133,13 @@ def generate_pipeline(name, image, image_size): 'natsortfiles.m'), os.path.abspath('../classification/' + 'natsort.m')] - task2.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + task2.cpu_reqs = {'processes': 1, 'threads_per_process': 1,'process_type': None, 'thread_type': 'OpenMP'} task2.tag = task1.name stage2.add_tasks(task2) # Add Stage to the Pipeline entk_pipeline.add_stages(stage2) - return entk_pipeline @@ -190,7 +188,7 @@ def args_parser(): 'queue': args.queue} try: # Create Application Manager - appman = AppManager(port=32773, hostname='localhost', name=args.name, + appman = AppManager(port=33235, hostname='two.radical-project.org', name=args.name, autoterminate=False, write_workflow=True) # Assign resource manager to the Application Manager @@ -210,10 +208,10 @@ def args_parser(): pipelines = list() for idx in range(0, len(images)): - p1 = generate_pipeline(name='P%s' % idx, + p1 = generate_pipeline(name='P%03d' % idx, image=images['Filename'][idx], image_size=images['Size'][idx]) - pipelines.append(p1) + pipelines.append(p1) # Assign the workflow as a set of Pipelines to the Application Manager appman.workflow = set(pipelines) From d09f4616c1eb43f9c1776c76db144a0a13683e8a Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Thu, 5 Mar 2020 12:23:07 -0500 Subject: [PATCH 25/31] Travis fixes --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5f97bea..24b4087 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,7 +27,10 @@ install: # - pip install . # - pip install coverage - pip install keras - - pip install radical.entk + - pip install radical.utils==0.72 + - pip install radical.saga==0.72 + - pip install radical.pilot==0.72 + - pip install radical.entk==0.72 - pip install flake8 - pip install pylint # - pip install codecov From e28b6ecd7a0011ed87157738a39cc9aa7252b267 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Thu, 5 Mar 2020 14:23:57 -0500 Subject: [PATCH 26/31] Pylint and flake8 fixes --- src/classification/predict.py | 17 +++++++++++------ src/entk_script/entk_script.py | 26 +++++++++++++------------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/classification/predict.py b/src/classification/predict.py index 7ab6379..8cf2780 100644 --- a/src/classification/predict.py +++ b/src/classification/predict.py @@ -12,6 +12,7 @@ from train_unet import get_model, normalize, PATCH_SZ, N_CLASSES + def predict(x, model, patch_sz=160, n_classes=2): img_height = x.shape[0] img_width = x.shape[1] @@ -50,28 +51,31 @@ def predict(x, model, patch_sz=160, n_classes=2): prediction[x0:x1, y0:y1, :] = patches_predict[k, :, :, :] return prediction[:img_height, :img_width, :] + # generating sliding window def sliding_window(img, stepSize, windowSize): for y in range(0, img.shape[0], stepSize): for x in range(0, img.shape[1], stepSize): yield (x, y, img[y:y + windowSize, x:x + windowSize, :]) + def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', type=str, - help='Path and Filename of the 3-Band Multipage WV \ Image', required=True) + help='Path and Filename of the 3-Band Multipage WV \ + Image', required=True) parser.add_argument('-w', '--weights_path', type=str, - help='Path to the weights') + help='Path to the weights') parser.add_argument('-o', '--output_folder', type=str, default='./', - help='Path where output will be stored.') + help='Path where output will be stored.') args = parser.parse_args() model = get_model() model.load_weights(args.weights_path) - head, tail = os.path.split(args.input) + _, tail = os.path.split(args.input) getName = tail.split('-multipage.tif') - outPath = args.output_folder + "data/predicted_tiles/"+ getName[0] + outPath = args.output_folder + "data/predicted_tiles/" + getName[0] if not os.path.exists(outPath): os.makedirs(outPath) @@ -99,5 +103,6 @@ def main(): tiff.imsave(fullpath, mask) i += 1 + if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index 5dae0e3..e7ea63f 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -13,7 +13,6 @@ import argparse import os import pandas as pd -import time from radical.entk import Pipeline, Stage, Task, AppManager @@ -37,8 +36,8 @@ def generate_discover_pipeline(path): '--filesize'] task.download_output_data = ['images.csv'] task.upload_input_data = ['image_disc.py'] - task.cpu_reqs = {'processes': 1, 'threads_per_process': 1,'process_type': None, - 'thread_type': 'OpenMP'} + task.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + 'process_type': None, 'thread_type': 'OpenMP'} stage.add_tasks(task) # Add Stage to the Pipeline pipeline.add_stages(stage) @@ -75,8 +74,8 @@ def generate_pipeline(name, image, image_size): task0.upload_input_data = [os.path.abspath('../utils/multipagetiff.m'), os.path.abspath('../utils/saveastiff.m')] task0.link_input_data = [image] - task0.cpu_reqs = {'processes': 1, 'threads_per_process': 1,'process_type': None, - 'thread_type': 'OpenMP'} + task0.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + 'process_type': None, 'thread_type': 'OpenMP'} task0.lfs_per_process = int(image_size) stage0.add_tasks(task0) @@ -106,10 +105,10 @@ def generate_pipeline(name, image, image_size): 'train_unet.py'), os.path.abspath('../classification/' + 'unet_model.py')] - task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1,'process_type': None, - 'thread_type': 'OpenMP'} - task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1,'process_type': None, - 'thread_type': 'OpenMP'} + task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + 'process_type': None, 'thread_type': 'OpenMP'} + task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1, + 'process_type': None, 'thread_type': 'OpenMP'} task1.tag = task0.name stage1.add_tasks(task1) @@ -133,8 +132,8 @@ def generate_pipeline(name, image, image_size): 'natsortfiles.m'), os.path.abspath('../classification/' + 'natsort.m')] - task2.cpu_reqs = {'processes': 1, 'threads_per_process': 1,'process_type': None, - 'thread_type': 'OpenMP'} + task2.cpu_reqs = {'processes': 1, 'threads_per_process': 1, + 'process_type': None, 'thread_type': 'OpenMP'} task2.tag = task1.name stage2.add_tasks(task2) @@ -188,8 +187,9 @@ def args_parser(): 'queue': args.queue} try: # Create Application Manager - appman = AppManager(port=33235, hostname='two.radical-project.org', name=args.name, - autoterminate=False, write_workflow=True) + appman = AppManager(port=33235, hostname='two.radical-project.org', + name=args.name, autoterminate=False, + write_workflow=True) # Assign resource manager to the Application Manager appman.resource_desc = res_dict From 401089ace134913d100f18d1b606109080a21c4e Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Fri, 6 Mar 2020 13:14:27 -0500 Subject: [PATCH 27/31] Trying to fix travis --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 24b4087..9665bf8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,6 +31,8 @@ install: - pip install radical.saga==0.72 - pip install radical.pilot==0.72 - pip install radical.entk==0.72 + - pip install tifffile + - pip install pandas - pip install flake8 - pip install pylint # - pip install codecov From acaecc12e2b8139b9321073bd0ecb17e71db88cd Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Thu, 6 Aug 2020 20:35:21 -0400 Subject: [PATCH 28/31] Updated entk script version --- src/entk_script/entk_script.py | 55 ++++++++++++++-------------------- 1 file changed, 22 insertions(+), 33 deletions(-) diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index e7ea63f..b8359bf 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -17,7 +17,7 @@ from radical.entk import Pipeline, Stage, Task, AppManager -def generate_discover_pipeline(path): +def generate_discover_pipeline(path, env_path): ''' This function takes as an input a path on Bridges and returns a pipeline that will provide a file for all the images that exist in that path. @@ -30,7 +30,7 @@ def generate_discover_pipeline(path): task = Task() task.name = 'Disc-T0' task.pre_exec = ['module load anaconda3/2019.03', - 'source activate keras-gpu'] + 'source activate %' env_path] task.executable = 'python3' # Assign executable to the task task.arguments = ['image_disc.py', '%s' % path, '--filename=images.csv', '--filesize'] @@ -45,7 +45,7 @@ def generate_discover_pipeline(path): return pipeline -def generate_pipeline(name, image, image_size): +def generate_pipeline(name, image, image_size, env_path): ''' This function creates a pipeline for an image that will be analyzed. @@ -65,18 +65,14 @@ def generate_pipeline(name, image, image_size): # Create Task 1, training task0 = Task() task0.name = '%s-T0' % stage0.name - task0.pre_exec = ['module load matlab'] - task0.executable = 'matlab' # Assign executable to the task + task0.pre_exec = ['module load anaconda3/2019.03', + 'source activate %s' % env_path] + task0.executable = 'python' # Assign executable to the task # Assign arguments for the task executable - task0.arguments = ["-nodisplay", "-nosplash", "-r", - "multipagetiff('./','$NODE_LFS_PATH/%s');exit" - % (task0.name)] - task0.upload_input_data = [os.path.abspath('../utils/multipagetiff.m'), - os.path.abspath('../utils/saveastiff.m')] - task0.link_input_data = [image] + task0.arguments = ['tile_unet.py', '--input', image, '--output', './'] + task0.upload_input_data = [os.path.abspath('../tiling/tile_unet.py')] task0.cpu_reqs = {'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': 'OpenMP'} - task0.lfs_per_process = int(image_size) stage0.add_tasks(task0) # Add Stage to the Pipeline @@ -88,23 +84,21 @@ def generate_pipeline(name, image, image_size): task1 = Task() task1.name = '%s-T1' % stage1.name task1.pre_exec = ['module load anaconda3/2019.03', - 'source activate keras-gpu'] + 'source activate %s' % env_path] task1.executable = 'python3' # Assign executable to the task # Assign arguments for the task executable - task1.arguments = ['predict.py', - '--input', '$NODE_LFS_PATH/%s/%s-multipage.tif' % - (task0.name, image.split('/')[-1].split('.')[0]), - '--output_folder', '$NODE_LFS_PATH/%s/' % task1.name, - '--weights_path', 'weights/unet_weights.hdf5'] - task1.link_input_data = ['$SHARED/unet_weights.hdf5 >' + - 'weights/unet_weights.hdf5'] - task1.upload_input_data = [os.path.abspath('../classification/predict.py'), + task1.arguments = ['predict_unet.py', + '--input', './' % + '-o', './', '-w', model_path] + task1.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/image_tiles >' % (entk_pipeline.name, stage0.name, task0.name) + + 'image_tiles'] + task1.upload_input_data = [os.path.abspath('../classification/predict_unet.py'), os.path.abspath('../classification/' + 'gen_patches.py'), os.path.abspath('../classification/' + 'train_unet.py'), os.path.abspath('../classification/' + - 'unet_model.py')] + 'model.py')] task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': 'OpenMP'} task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1, @@ -120,18 +114,13 @@ def generate_pipeline(name, image, image_size): # Create Task 1, training task2 = Task() task2.name = '%s-T2' % stage2.name - task2.pre_exec = ['module load matlab'] - task2.executable = 'matlab' # Assign executable to the task + task2.pre_exec = ['module load anaconda3/2019.03'] + task2.executable = 'python' # Assign executable to the task # Assign arguments for the task executable - task2.arguments = ["-nodisplay", "-nosplash", "-r", - "mosaic('%s', './', '$NODE_LFS_PATH/%s','./');exit" - % (image.split('/')[-1], task1.name)] - task2.link_input_data = [image] - task2.upload_input_data = [os.path.abspath('../classification/mosaic.m'), - os.path.abspath('../classification/' + - 'natsortfiles.m'), - os.path.abspath('../classification/' + - 'natsort.m')] + task2.arguments = ["mosaic_unet.py", "-iw", image, '-i', './', '-o', './'] + task2.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/predicted_tiles >' % (entk_pipeline.name, stage1.name, task1.name) + + 'predicted_tiles'] + task2.upload_input_data = [os.path.abspath('../mosaic/mosaic_unet.py')] task2.cpu_reqs = {'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': 'OpenMP'} task2.tag = task1.name From 31b7192e1aff7c47ad7fec3f43e8771edded96c3 Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Fri, 7 Aug 2020 12:02:41 -0500 Subject: [PATCH 29/31] Updating entk script for rivers --- src/classification/model.py | 143 +++++++++++++++++++++++++++++++++ src/entk_script/entk_script.py | 25 ++++-- src/mosaic/mosaic_unet.py | 70 ++++++++++++++++ 3 files changed, 231 insertions(+), 7 deletions(-) create mode 100644 src/classification/model.py create mode 100644 src/mosaic/mosaic_unet.py diff --git a/src/classification/model.py b/src/classification/model.py new file mode 100644 index 0000000..4447511 --- /dev/null +++ b/src/classification/model.py @@ -0,0 +1,143 @@ +import numpy as np +import os +import skimage.io as io +import skimage.transform as trans +import numpy as np +from keras.models import * +from keras.layers import * +from keras.optimizers import * +from keras.callbacks import ModelCheckpoint, LearningRateScheduler +from keras import backend as k +from keras.models import Model +from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Conv2DTranspose, BatchNormalization, Dropout +from keras.optimizers import Adam +from keras.utils import plot_model + +# Custom IoU metric +def mean_iou(y_true, y_pred): + prec = [] + for t in np.arange(0.5, 1.0, 0.05): + y_pred_ = tf.to_int32(y_pred > t) + score, up_opt = tf.metrics.mean_iou(y_true, y_pred_, 2) + K.get_session().run(tf.local_variables_initializer()) + with tf.control_dependencies([up_opt]): + score = tf.identity(score) + prec.append(score) + return K.mean(K.stack(prec), axis=0) + +def recall_m(y_true, y_pred): + true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) + possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) + recall = true_positives / (possible_positives + K.epsilon()) + return recall + +def precision_m(y_true, y_pred): + true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) + predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) + precision = true_positives / (predicted_positives + K.epsilon()) + return precision + +def f1_m(y_true, y_pred): + precision = precision_m(y_true, y_pred) + recall = recall_m(y_true, y_pred) + return 2*((precision*recall)/(precision+recall+K.epsilon())) + +def unet(n_classes=1, im_sz=224, n_channels=3, n_filters_start=32, growth_factor=2, upconv=True): + droprate=0.25 + n_filters = n_filters_start + inputs = Input((im_sz, im_sz, n_channels)) + #inputs = BatchNormalization()(inputs) + conv1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(inputs) + conv1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv1) + pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) + #pool1 = Dropout(droprate)(pool1) + + n_filters *= growth_factor + pool1 = BatchNormalization()(pool1) + conv2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool1) + conv2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv2) + pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) + pool2 = Dropout(droprate)(pool2) + + n_filters *= growth_factor + pool2 = BatchNormalization()(pool2) + conv3 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool2) + conv3 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv3) + pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) + pool3 = Dropout(droprate)(pool3) + + n_filters *= growth_factor + pool3 = BatchNormalization()(pool3) + conv4_0 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool3) + conv4_0 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv4_0) + pool4_1 = MaxPooling2D(pool_size=(2, 2))(conv4_0) + pool4_1 = Dropout(droprate)(pool4_1) + + n_filters *= growth_factor + pool4_1 = BatchNormalization()(pool4_1) + conv4_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool4_1) + conv4_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv4_1) + pool4_2 = MaxPooling2D(pool_size=(2, 2))(conv4_1) + pool4_2 = Dropout(droprate)(pool4_2) + + n_filters *= growth_factor + conv5 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(pool4_2) + conv5 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv5) + + n_filters //= growth_factor + if upconv: + up6_1 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv5), conv4_1]) + else: + up6_1 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4_1]) + up6_1 = BatchNormalization()(up6_1) + conv6_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up6_1) + conv6_1 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv6_1) + conv6_1 = Dropout(droprate)(conv6_1) + + n_filters //= growth_factor + if upconv: + up6_2 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv6_1), conv4_0]) + else: + up6_2 = concatenate([UpSampling2D(size=(2, 2))(conv6_1), conv4_0]) + up6_2 = BatchNormalization()(up6_2) + conv6_2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up6_2) + conv6_2 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv6_2) + conv6_2 = Dropout(droprate)(conv6_2) + + n_filters //= growth_factor + if upconv: + up7 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv6_2), conv3]) + else: + up7 = concatenate([UpSampling2D(size=(2, 2))(conv6_2), conv3]) + up7 = BatchNormalization()(up7) + conv7 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up7) + conv7 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv7) + conv7 = Dropout(droprate)(conv7) + + n_filters //= growth_factor + if upconv: + up8 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv7), conv2]) + else: + up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2]) + up8 = BatchNormalization()(up8) + conv8 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up8) + conv8 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv8) + conv8 = Dropout(droprate)(conv8) + + n_filters //= growth_factor + if upconv: + up9 = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv8), conv1]) + else: + up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1]) + conv9 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(up9) + conv9 = Conv2D(n_filters, (3, 3), activation='relu', padding='same')(conv9) + + conv10 = Conv2D(n_classes, (1, 1), activation='sigmoid')(conv9) + + model = Model(inputs=inputs, outputs=conv10) + + model.compile(optimizer=Adam(lr = 1e-5), loss = 'binary_crossentropy', metrics = ['accuracy',f1_m,precision_m, recall_m]) + model.summary() + + return model + diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index b8359bf..59479b9 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -90,7 +90,8 @@ def generate_pipeline(name, image, image_size, env_path): task1.arguments = ['predict_unet.py', '--input', './' % '-o', './', '-w', model_path] - task1.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/image_tiles >' % (entk_pipeline.name, stage0.name, task0.name) + + task1.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/image_tiles >' % + (entk_pipeline.name, stage0.name, task0.name) + 'image_tiles'] task1.upload_input_data = [os.path.abspath('../classification/predict_unet.py'), os.path.abspath('../classification/' + @@ -114,11 +115,13 @@ def generate_pipeline(name, image, image_size, env_path): # Create Task 1, training task2 = Task() task2.name = '%s-T2' % stage2.name - task2.pre_exec = ['module load anaconda3/2019.03'] + task2.pre_exec = ['module load anaconda3/2019.03' + 'source activate %s' % env_path] task2.executable = 'python' # Assign executable to the task # Assign arguments for the task executable task2.arguments = ["mosaic_unet.py", "-iw", image, '-i', './', '-o', './'] - task2.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/predicted_tiles >' % (entk_pipeline.name, stage1.name, task1.name) + + task2.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/predicted_tiles >' % + (entk_pipeline.name, stage1.name, task1.name) + 'predicted_tiles'] task2.upload_input_data = [os.path.abspath('../mosaic/mosaic_unet.py')] task2.cpu_reqs = {'processes': 1, 'threads_per_process': 1, @@ -156,6 +159,12 @@ def args_parser(): parser.add_argument('-w', '--walltime', type=int, help='The amount of time resources are requested in' + ' minutes') + parser.add_argument('--env', type=str, help='Path to conda environment ' + + ' on Bridges') + parser.add_argument('--uname', type=str, help='RabiitMQ user name') + parser.add_argument('--pwd', type=str, help='RabbitMQ password') + parser.add_argument('--host', type=str, help='RabbitMQ hostname') + parser.add_argument('--port', type=int,'RabbitMQ port') parser.add_argument('--name', type=str, help='name of the execution. It has to be a unique' + ' value') @@ -176,15 +185,16 @@ def args_parser(): 'queue': args.queue} try: # Create Application Manager - appman = AppManager(port=33235, hostname='two.radical-project.org', + appman = AppManager(port=args.port, hostname=args.host, + username=args.uname,password=args.pwd, name=args.name, autoterminate=False, write_workflow=True) # Assign resource manager to the Application Manager appman.resource_desc = res_dict - appman.shared_data = [os.path.abspath('../../models/unet_weights.hdf5')] + appman.shared_data = [os.path.abspath('../../models/ctpn.01-0.07.hdf5')] # Create a task that discovers the dataset - disc_pipeline = generate_discover_pipeline(args.input_dir) + disc_pipeline = generate_discover_pipeline(args.input_dir, args.env) appman.workflow = set([disc_pipeline]) # Run @@ -199,7 +209,8 @@ def args_parser(): for idx in range(0, len(images)): p1 = generate_pipeline(name='P%03d' % idx, image=images['Filename'][idx], - image_size=images['Size'][idx]) + image_size=images['Size'][idx], + env_path=args.env) pipelines.append(p1) # Assign the workflow as a set of Pipelines to the Application Manager appman.workflow = set(pipelines) diff --git a/src/mosaic/mosaic_unet.py b/src/mosaic/mosaic_unet.py new file mode 100644 index 0000000..769dbf1 --- /dev/null +++ b/src/mosaic/mosaic_unet.py @@ -0,0 +1,70 @@ +""" +Authors: Samira Daneshgar-Asl +License: MIT +Copyright: 2019-2020 +""" +import os +import numpy as np +import argparse +import math +from osgeo import gdal +from os import listdir + +#loading image +def load_image(path): + ds = gdal.Open(path) + img_proj = ds.GetProjection() + img_geotrans = ds.GetGeoTransform() + img_data = ds.ReadAsArray(0,0, ds.RasterXSize, ds.RasterYSize) + del ds + image = np.array(img_data,dtype=img_data.dtype) + return img_proj,img_geotrans,image + +#writing mosaic +def write_mosaic(filename,img_proj,img_geotrans,img_data): + driver = gdal.GetDriverByName("GTiff") + bands, (ysize, xsize) = 1,img_data.shape + ds = driver.Create(filename, xsize, ysize, bands, gdal.GDT_Float32) + ds.SetProjection(img_proj) + ds.SetGeoTransform(img_geotrans) + ds.GetRasterBand(1).WriteArray(img_data) + +def args_parser(): + parser = argparse.ArgumentParser(description="generates mosaic") + parser.add_argument('-iw', '--input_WV', type=str, + help='Path and name of the WorldView image') + parser.add_argument('-i', '--input', type=str, + help='Input predicted masks folder') + parser.add_argument('-t', '--tile_size', type=int, default=224, + help='Tile size') + parser.add_argument('-s', '--step', type=int, default=112, + help='Step size') + parser.add_argument('-o', '--output_folder', type=str, default='./', + help='Folder where output mosaic will be stored') + return parser.parse_args() + +if __name__ == '__main__': + args = args_parser() + + masks_path = 'predicted_tiles/' + args.input + '/' + list = sorted(os.listdir(masks_path),key=lambda x: int(os.path.splitext(x)[0])) + + out_path = 'predicted_mosaic/' + args.output_folder + if not os.path.exists(out_path): + os.makedirs(out_path) + + proj, geotrans, image = load_image(args.input_WV) + desired_row_size = args.step * (math.ceil(image.shape[1]/ args.step)+1) + desired_col_size = args.step * (math.ceil(image.shape[2]/ args.step)+1) + mask = np.zeros((desired_row_size,desired_col_size),dtype=np.float64) + + k=0 + for j in range(0, mask.shape[1]-(args.tile_size-args.step), args.step): + for i in range(0, mask.shape[0]-(args.tile_size-args.step), args.step): + mask_name = list[k] + mask_proj, mask_geotranse, mask_tile= load_image(masks_path + mask_name) + mask[i:i + args.tile_size, j:j + args.tile_size]=np.maximum(mask_tile[:,:],mask[i:i + args.tile_size, j:j + args.tile_size]) + k+=1 + write_mosaic(out_path+ "%s_predicted.tif"%args.input, proj, geotrans, mask[0:image.shape[1], 0:image.shape[2]]) + + From ae7cd2caee367b6a12ec42c25964eda8d4395f1f Mon Sep 17 00:00:00 2001 From: Bradley Spitzbart Date: Wed, 12 Aug 2020 14:32:16 -0600 Subject: [PATCH 30/31] Update README with installation and execution instructions --- README.md | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ec3ed2c..732f456 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,92 @@ -# Rivers (Arctic hydrology) - -We provide a classification algorithm for ice surface features from high-resolution imagery. This algorithm was developed by convolutional neural network training to detect regions of large and small rivers and to distinguish them from crevasses and non-channelized slush. We also provide a detection algorithm to extract polyline water features from the classified high-probability river areas. - +# Rivers (Arctic hydrology) + +We provide a classification algorithm for ice surface features from high-resolution imagery. This algorithm was developed by convolutional neural network training to detect regions of large and small rivers and to distinguish them from crevasses and non-channelized slush. We also provide a detection algorithm to extract polyline water features from the classified high-probability river areas. + +## Prerequisites - all available on bridges via the commands below +- Linux +- Python 3 +- CPU and NVIDIA GPU + CUDA CuDNN + +## Software Dependencies - these will be installed automatically with the installation below. +- numpy +- scipy +- tifffile +- keras >= 1.0 +- tensorboardX==1.8 +- opencv-python +- rasterio +- affine + +## Installation +Preliminaries: +These instructions are specific to XSEDE Bridges but other resources can be used if cuda, python3, and a NVIDIA P100 GPU are present, in which case 'module load' instructions can be skipped, which are specific to Bridges. + +For Unix or Mac Users: +Login to bridges via ssh using a Unix or Mac command line terminal. Login is available to bridges directly or through the XSEDE portal. Please see the [Bridges User's Guide](https://portal.xsede.org/psc-bridges). + +For Windows Users: +Many tools are available for ssh access to bridges. Please see [Ubuntu](https://ubuntu.com/tutorials/tutorial-ubuntu-on-windows#1-overview), [MobaXterm](https://mobaxterm.mobatek.net) or [PuTTY](https://www.chiark.greenend.org.uk/~sgtatham/putty/) + +### PSC Bridges +Once you have logged into bridges, you can follow one of two methods for installing iceberg-rivers. + +#### Method 1 (Recommended): + +The lines below following a '$' are commands to enter (or cut and paste) into your terminal (note that all commands are case-sensitive, meaning capital and lowercase letters are differentiated.) Everything following '#' are comments to explain the reason for the command and should not be included in what you enter. Lines that do not start with '$' or '[rivers_env] $' are output you should expect to see. + +```bash +$ pwd +/home/username +$ cd $SCRATCH # switch to your working space. +$ mkdir Rivers # create a directory to work in. +$ cd Rivers # move into your working directory. +$ module load cuda # load parallel computing architecture. +$ module load python3 # load correct python version. +$ virtualenv rivers_env # create a virtual environment to isolate your work from the default system. +$ source rivers_env/bin/activate # activate your environment. Notice the command line prompt changes to show your environment on the next line. +[rivers_env] $ pwd +/pylon5/group/username/Rivers +[rivers_env] $ export PYTHONPATH=/rivers_env/lib/python3.5/site-packages # set a system variable to point python to your specific code. (Replace with the results of pwd command above. +[rivers_env] $ pip install iceberg_rivers.search # pip is a python tool to extract the requested software (iceberg_rivers.search in this case) from a repository. (this may take several minutes). +``` + +#### Method 2 (Installing from source; recommended for developers only): + +```bash +$ git clone https://github.com/iceberg-project/Rivers.git +$ module load cuda +$ module load python3 +$ virtualenv rivers_env +$ source rivers_env/bin/activate +[rivers_env] $ export PYTHONPATH=/rivers_env/lib/python3.5/site-packages +[rivers_env] $ pip install . --upgrade +``` + +#### To test +```bash +[iceberg_rivers] $ deactivate # exit your virtual environment. +$ interact -p GPU-small --gres=gpu:p100:1 # request a compute node. This package has been tested on P100 GPUs on bridges, but that does not exclude any other resource that offers the same GPUs. (this may take a minute or two or more to receive an allocation). +$ cd $SCRATCH/Rivers # make sure you are in the same directory where everything was set up before. +$ module load cuda # load parallel computing architecture, as before. +$ module load python3 # load correct python version, as before. +$ source rivers_env/bin/activate # activate your environment, no need to create a new environment because the Rivers tools are installed and isolated here. +[iceberg_rivers] $ iceberg_rivers.detect --help # this will display a help screen of available usage and parameters. +``` +## Prediction +- Download a pre-trained model at: + +You can download to your local machine and use scp, ftp, rsync, or Globus to [transfer to bridges](https://portal.xsede.org/psc-bridges). + +Rivers predicting is executed in three steps: +First, follow the environment setup commands under 'To test' above. Then create tiles from an input GeoTiff image and write to the output_folder. The scale_bands parameter (in pixels) depends on the trained model being used. The default scale_bands is 299 for the pre-trained model downloaded above. If you use your own model the scale_bands may be different. +```bash +[iceberg_rivers] $ iceberg_rivers.tiling --scale_bands=299 --input_image= --output_folder=./test +``` +Then, detect rivers on each tile and output counts and confidence for each tile. +```bash +[iceberg_rivers] $ iceberg_rivers.predicting --input_image= --model_architecture=UnetCntWRN --hyperparameter_set=A --training_set=test_vanilla --test_folder=./test --model_path=./ --output_folder=./test_image +``` +Finally, mosaic all the tiles back into one image +```bash +[iceberg_rivers] $ iceberg_rivers.mosaic --input_folder=./test_image +``` From 5d54f6195eb627be4c668863230cc2a2f1057d5f Mon Sep 17 00:00:00 2001 From: Ioannis Paraskevakos Date: Thu, 1 Oct 2020 10:55:02 -0500 Subject: [PATCH 31/31] Working EnTK script --- src/entk_script/entk_script.py | 66 +++++++++++++++------------------- 1 file changed, 29 insertions(+), 37 deletions(-) diff --git a/src/entk_script/entk_script.py b/src/entk_script/entk_script.py index 59479b9..7a7d35f 100644 --- a/src/entk_script/entk_script.py +++ b/src/entk_script/entk_script.py @@ -9,7 +9,6 @@ Copyright: 2018-2019 """ -from __future__ import print_function import argparse import os import pandas as pd @@ -29,9 +28,10 @@ def generate_discover_pipeline(path, env_path): # Create Task 1, training task = Task() task.name = 'Disc-T0' - task.pre_exec = ['module load anaconda3/2019.03', - 'source activate %' env_path] - task.executable = 'python3' # Assign executable to the task + task.pre_exec = ['module load python3 cuda/9.0 gdal/2.2.1', + 'source %s/bin/activate' % env_path, + 'export PYTHONPATH=%s/lib/python3.5/site-packages' % env_path] + task.executable = 'python' # Assign executable to the task task.arguments = ['image_disc.py', '%s' % path, '--filename=images.csv', '--filesize'] task.download_output_data = ['images.csv'] @@ -45,7 +45,7 @@ def generate_discover_pipeline(path, env_path): return pipeline -def generate_pipeline(name, image, image_size, env_path): +def generate_pipeline(name, image, image_size, env_path, model_path): ''' This function creates a pipeline for an image that will be analyzed. @@ -65,12 +65,12 @@ def generate_pipeline(name, image, image_size, env_path): # Create Task 1, training task0 = Task() task0.name = '%s-T0' % stage0.name - task0.pre_exec = ['module load anaconda3/2019.03', - 'source activate %s' % env_path] - task0.executable = 'python' # Assign executable to the task + task0.pre_exec = ['module load python3 cuda/9.0 gdal/2.2.1', + 'source %s/bin/activate' % env_path, + 'export PYTHONPATH=%s/lib/python3.5/site-packages' % env_path] + task0.executable = 'iceberg_rivers.tiling' # Assign executable to the task # Assign arguments for the task executable - task0.arguments = ['tile_unet.py', '--input', image, '--output', './'] - task0.upload_input_data = [os.path.abspath('../tiling/tile_unet.py')] + task0.arguments = ['--input', image, '--output', './tiles/'] task0.cpu_reqs = {'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': 'OpenMP'} @@ -83,28 +83,20 @@ def generate_pipeline(name, image, image_size, env_path): # Create Task 1, training task1 = Task() task1.name = '%s-T1' % stage1.name - task1.pre_exec = ['module load anaconda3/2019.03', - 'source activate %s' % env_path] - task1.executable = 'python3' # Assign executable to the task + task1.pre_exec = ['module load python3 cuda/9.0 gdal/2.2.1', + 'source %s/bin/activate' % env_path, + 'export PYTHONPATH=%s/lib/python3.5/site-packages' % env_path] + task1.executable = 'iceberg_rivers.predict' # Assign executable to the task # Assign arguments for the task executable - task1.arguments = ['predict_unet.py', - '--input', './' % - '-o', './', '-w', model_path] - task1.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/image_tiles >' % + task1.arguments = ['--input', './tiles', '-o', './predicted/', '-w', model_path] + task1.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/tiles >' % (entk_pipeline.name, stage0.name, task0.name) + - 'image_tiles'] - task1.upload_input_data = [os.path.abspath('../classification/predict_unet.py'), - os.path.abspath('../classification/' + - 'gen_patches.py'), - os.path.abspath('../classification/' + - 'train_unet.py'), - os.path.abspath('../classification/' + - 'model.py')] + 'tiles'] task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': 'OpenMP'} task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': 'OpenMP'} - task1.tag = task0.name +# task1.tag = task0.name stage1.add_tasks(task1) # Add Stage to the Pipeline @@ -115,18 +107,18 @@ def generate_pipeline(name, image, image_size, env_path): # Create Task 1, training task2 = Task() task2.name = '%s-T2' % stage2.name - task2.pre_exec = ['module load anaconda3/2019.03' - 'source activate %s' % env_path] - task2.executable = 'python' # Assign executable to the task + task2.pre_exec = ['module load python3 cuda/9.0 gdal/2.2.1', + 'source %s/bin/activate' % env_path, + 'export PYTHONPATH=%s/lib/python3.5/site-packages' % env_path] + task2.executable = 'iceberg_rivers.mosaic' # Assign executable to the task # Assign arguments for the task executable - task2.arguments = ["mosaic_unet.py", "-iw", image, '-i', './', '-o', './'] - task2.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/predicted_tiles >' % + task2.arguments = ["-iw", image, '-i', './predicted', '-o', './'] + task2.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/predicted >' % (entk_pipeline.name, stage1.name, task1.name) + - 'predicted_tiles'] - task2.upload_input_data = [os.path.abspath('../mosaic/mosaic_unet.py')] + 'predicted'] task2.cpu_reqs = {'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': 'OpenMP'} - task2.tag = task1.name +# task2.tag = task1.name stage2.add_tasks(task2) # Add Stage to the Pipeline @@ -164,7 +156,7 @@ def args_parser(): parser.add_argument('--uname', type=str, help='RabiitMQ user name') parser.add_argument('--pwd', type=str, help='RabbitMQ password') parser.add_argument('--host', type=str, help='RabbitMQ hostname') - parser.add_argument('--port', type=int,'RabbitMQ port') + parser.add_argument('--port', type=int, help='RabbitMQ port') parser.add_argument('--name', type=str, help='name of the execution. It has to be a unique' + ' value') @@ -192,7 +184,6 @@ def args_parser(): # Assign resource manager to the Application Manager appman.resource_desc = res_dict - appman.shared_data = [os.path.abspath('../../models/ctpn.01-0.07.hdf5')] # Create a task that discovers the dataset disc_pipeline = generate_discover_pipeline(args.input_dir, args.env) appman.workflow = set([disc_pipeline]) @@ -210,7 +201,8 @@ def args_parser(): p1 = generate_pipeline(name='P%03d' % idx, image=images['Filename'][idx], image_size=images['Size'][idx], - env_path=args.env) + env_path=args.env, + model_path=args.model) pipelines.append(p1) # Assign the workflow as a set of Pipelines to the Application Manager appman.workflow = set(pipelines)