From 514c176279a00d4296fa9c004b879ef2f25d8098 Mon Sep 17 00:00:00 2001 From: Sayat Mimar Date: Tue, 28 Nov 2023 13:22:32 -0500 Subject: [PATCH 01/15] adding hpg tra training code --- .../Codes/IterativeTraining_1X.py | 954 +++++++----------- .../Codes/wsi_loader_utils.py | 340 ++++--- .../segmentationschool/segmentation_school.py | 25 +- multic/segmentationschool/slurm_training.sh | 35 + 4 files changed, 593 insertions(+), 761 deletions(-) create mode 100644 multic/segmentationschool/slurm_training.sh diff --git a/multic/segmentationschool/Codes/IterativeTraining_1X.py b/multic/segmentationschool/Codes/IterativeTraining_1X.py index 9c98890..d7fb5fb 100644 --- a/multic/segmentationschool/Codes/IterativeTraining_1X.py +++ b/multic/segmentationschool/Codes/IterativeTraining_1X.py @@ -1,677 +1,405 @@ -import os, sys, cv2, time, random, warnings, multiprocessing#json,# detectron2 +import os,cv2, time, random, multiprocessing,copy +from skimage.color import rgb2hsv,hsv2rgb,rgb2lab,lab2rgb import numpy as np -import matplotlib.pyplot as plt -import lxml.etree as ET -from matplotlib import path -from skimage.transform import resize -from skimage.io import imread, imsave -import glob -from .getWsi import getWsi - -from .xml_to_mask2 import get_supervision_boxes, regions_in_mask_dots, get_vertex_points_dots, masks_from_points, restart_line -from joblib import Parallel, delayed -from shutil import move +from tiffslide import TiffSlide +from .xml_to_mask_minmax import xml_to_mask # from generateTrainSet import generateDatalists -#from subprocess import call -#from .get_choppable_regions import get_choppable_regions -from PIL import Image - +import logging from detectron2.utils.logger import setup_logger +from skimage import exposure + setup_logger() from detectron2 import model_zoo -from detectron2.engine import DefaultPredictor,DefaultTrainer +from detectron2.engine import DefaultTrainer from detectron2.config import get_cfg -from detectron2.utils.visualizer import Visualizer#,ColorMode -from detectron2.data import MetadataCatalog, DatasetCatalog -#from detectron2.structures import BoxMode -from .get_dataset_list import HAIL2Detectron, samples_from_json, samples_from_json_mini -#from detectron2.checkpoint import DetectionCheckpointer -#from detectron2.modeling import build_model - -""" - -Code for - cutting / augmenting / training CNN - -This uses WSI and XML files to train 2 neural networks for semantic segmentation - of histopath tissue via human in the loop training - -""" - +# from detectron2.data import MetadataCatalog, DatasetCatalog +from detectron2.data import detection_utils as utils +import detectron2.data.transforms as T +from detectron2.structures import BoxMode +from detectron2.data import (DatasetCatalog, + MetadataCatalog, + build_detection_test_loader, + build_detection_train_loader, +) +from detectron2.config import configurable +from typing import List, Optional, Union +import torch +from detectron2.evaluation import COCOEvaluator +#from .engine.hooks import LossEvalHook +# sys.append("..") +from .wsi_loader_utils import train_samples_from_WSI, get_slide_data, get_random_chops +from imgaug import augmenters as iaa +from .engine.hooks import LossEvalHook + +global seq +seq = iaa.Sequential([ + iaa.Sometimes(0.5,iaa.OneOf([ + iaa.AddElementwise((-15,15),per_channel=0.5), + iaa.ImpulseNoise(0.05),iaa.CoarseDropout(0.02, size_percent=0.5)])), + iaa.Sometimes(0.5,iaa.OneOf([iaa.GaussianBlur(sigma=(0, 3.0)), + iaa.Sharpen(alpha=(0.0, 1.0), lightness=(0.75, 2.0))])) +]) #Record start time totalStart=time.time() def IterateTraining(args): - ## calculate low resolution block params - downsampleLR = int(args.downsampleRateLR**.5) #down sample for each dimension - region_sizeLR = int(args.boxSizeLR*(downsampleLR)) #Region size before downsampling - stepLR = int(region_sizeLR*(1-args.overlap_percentLR)) #Step size before downsampling - ## calculate low resolution block params - downsampleHR = int(args.downsampleRateHR**.5) #down sample for each dimension - region_sizeHR = int(args.boxSizeHR*(downsampleHR)) #Region size before downsampling - stepHR = int(region_sizeHR*(1-args.overlap_percentHR)) #Step size before downsampling - - global classNum_HR,classEnumLR,classEnumHR + + region_size = int(args.boxSize) #Region size before downsampling + dirs = {'imExt': '.jpeg'} dirs['basedir'] = args.base_dir dirs['maskExt'] = '.png' - dirs['modeldir'] = '/MODELS/' - dirs['tempdirLR'] = '/TempLR/' - dirs['tempdirHR'] = '/TempHR/' - dirs['pretraindir'] = '/Deeplab_network/' - dirs['training_data_dir'] = '/TRAINING_data/' - dirs['model_init'] = 'deeplab_resnet.ckpt' - dirs['project']= '/' + args.project - dirs['data_dir_HR'] = args.base_dir +'/' + args.project + '/Permanent/HR/' - dirs['data_dir_LR'] = args.base_dir +'/' +args.project + '/Permanent/LR/' - - - ##All folders created, initiate WSI loading by human - #raw_input('Please place WSIs in ') - - ##Check iteration session - - currentmodels=os.listdir(dirs['basedir'] + dirs['project'] + dirs['modeldir']) - print('Handcoded iteration') - # currentAnnotationIteration=check_model_generation(dirs) - currentAnnotationIteration=2 - print('Current training session is: ' + str(currentAnnotationIteration)) - - ##Create objects for storing class distributions - annotatedXMLs=glob.glob(dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration) + '/*.xml') - classes=[] + dirs['training_data_dir'] = args.training_data_dir + dirs['val_data_dir'] = args.training_data_dir - if args.classNum == 0: - for xml in annotatedXMLs: - classes.append(get_num_classes(xml)) - classNum_HR = max(classes) - else: - classNum_LR = args.classNum - if args.classNum_HR != 0: - classNum_HR = args.classNum_HR - else: - classNum_HR = classNum_LR - - classNum_HR=args.classNum - - ##for all WSIs in the initiating directory: - if args.chop_data == 'True': - print('Chopping') - - start=time.time() - size_data=[] - - for xmlID in annotatedXMLs: - - #Get unique name of WSI - fileID=xmlID.split('/')[-1].split('.xml')[0] - print('-----------------'+fileID+'----------------') - #create memory addresses for wsi files - for ext in [args.wsi_ext]: - wsiID=dirs['basedir'] + dirs['project']+ dirs['training_data_dir'] + str(currentAnnotationIteration) +'/'+ fileID + ext - - #Ensure annotations exist - if os.path.isfile(wsiID)==True: - break - #Load openslide information about WSI - if ext != '.tif': - slide=getWsi(wsiID) - #WSI level 0 dimensions (largest size) - dim_x,dim_y=slide.dimensions - else: - im = Image.open(wsiID) - dim_x, dim_y=im.size - location=[0,0] - size=[dim_x,dim_y] - tree = ET.parse(xmlID) - root = tree.getroot() - box_supervision_layers=['8'] - # calculate region bounds - global_bounds = {'x_min' : location[0], 'y_min' : location[1], 'x_max' : location[0] + size[0], 'y_max' : location[1] + size[1]} - local_bounds = get_supervision_boxes(root,box_supervision_layers) - num_cores = multiprocessing.cpu_count() - Parallel(n_jobs=num_cores)(delayed(chop_suey_bounds)(args=args,wsiID=wsiID, - dirs=dirs,lb=lb,xmlID=xmlID,box_supervision_layers=box_supervision_layers) for lb in tqdm(local_bounds)) - # for lb in tqdm(local_bounds): - - # size_data.extend(image_sizes) + print('Handcoded iteration') - ''' - wsi_mask=xml_to_mask(xmlID, [0,0], [dim_x,dim_y]) - #Enumerate cpu core count - num_cores = multiprocessing.cpu_count() + #os.environ["CUDA_VISIBLE_DEVICES"]=gpu + #os.system('export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader | sort -nr | head -1 | awk "{ print $NF }")') + os.environ["CUDA_VISIBLE_DEVICES"] ='2,3' + os.environ["CUDA_LAUNCH_BLOCKING"] ='1' - #Generate iterators for parallel chopping of WSIs in high resolution - #index_yHR=range(30240,dim_y-stepHR,stepHR) - #index_xHR=range(840,dim_x-stepHR,stepHR) - index_yHR=range(0,dim_y,stepHR) - index_xHR=range(0,dim_x,stepHR) - index_yHR[-1]=dim_y-stepHR - index_xHR[-1]=dim_x-stepHR - #Create memory address for chopped images high resolution - outdirHR=dirs['basedir'] + dirs['project'] + dirs['tempdirHR'] - #Perform high resolution chopping in parallel and return the number of - #images in each of the labeled classes - chop_regions=get_choppable_regions(wsi=wsiID, - index_x=index_xHR,index_y=index_yHR,boxSize=region_sizeHR,white_percent=args.white_percent) + organType='kidney' + print('Organ meta being set to... '+ organType) + if organType=='liver': + classnames=['Background','BD','A'] + isthing=[0,1,1] + xml_color = [[0,255,0], [0,255,255], [0,0,255]] + tc=['BD','AT'] + sc=['Ob','B'] + elif organType =='kidney': + classnames=['interstitium','medulla','glomerulus','sclerotic glomerulus','tubule','arterial tree'] + classes={} + isthing=[0,0,1,1,1,1] + xml_color = [[0,255,0], [0,255,255], [255,255,0],[0,0,255], [255,0,0], [0,128,255]] + tc=['G','SG','T','A'] + sc=['Ob','I','M','B'] + else: + print('Provided organType not in supported types: kidney, liver') - Parallel(n_jobs=num_cores)(delayed(return_region)(args=args, - wsi_mask=wsi_mask, wsiID=wsiID, - fileID=fileID, yStart=j, xStart=i, idxy=idxy, - idxx=idxx, downsampleRate=args.downsampleRateHR, - outdirT=outdirHR, region_size=region_sizeHR, - dirs=dirs, chop_regions=chop_regions,classNum_HR=classNum_HR) for idxx,i in enumerate(index_xHR) for idxy,j in enumerate(index_yHR)) - #for idxx,i in enumerate(index_xHR): - # for idxy,j in enumerate(index_yHR): - # if chop_regions[idxy,idxx] != 0: - # return_region(args=args,xmlID=xmlID, wsiID=wsiID, fileID=fileID, yStart=j, xStart=i,idxy=idxy, idxx=idxx, - # downsampleRate=args.downsampleRateHR,outdirT=outdirHR, region_size=region_sizeHR, dirs=dirs, - # chop_regions=chop_regions,classNum_HR=classNum_HR) - # else: - # print('pass') - # exit() - print('Time for WSI chopping: ' + str(time.time()-start)) + classNum=len(tc)+len(sc)-1 + print('Number classes: '+ str(classNum)) + classes={} - classEnumHR=np.ones([classNum_HR,1])*classNum_HR + for idx,c in enumerate(classnames): + classes[idx]={'isthing':isthing[idx],'color':xml_color[idx]} - ##High resolution augmentation - #Enumerate high resolution class distribution - classDistHR=np.zeros(len(classEnumHR)) - for idx,value in enumerate(classEnumHR): - classDistHR[idx]=value/sum(classEnumHR) - print(classDistHR) - #Define number of augmentations per class - moveimages(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/regions/', dirs['basedir']+dirs['project'] + '/Permanent/HR/regions/') - moveimages(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/masks/',dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/') + num_images=args.batch_size*args.train_steps + # slide_idxs=train_dset.get_random_slide_idx(num_images) + usable_slides=get_slide_data(args, wsi_directory = dirs['training_data_dir']) + print('Number of slides:', len(usable_slides)) + usable_idx=range(0,len(usable_slides)) + slide_idxs=random.choices(usable_idx,k=num_images) + image_coordinates=get_random_chops(slide_idxs,usable_slides,region_size) + DatasetCatalog.register("my_dataset", lambda:train_samples_from_WSI(args,image_coordinates)) + MetadataCatalog.get("my_dataset").set(thing_classes=tc) + MetadataCatalog.get("my_dataset").set(stuff_classes=sc) - #Total time - print('Time for high resolution augmenting: ' + str((time.time()-totalStart)/60) + ' minutes.') - ''' - - # with open('sizes.csv','w',newline='') as myfile: - # wr=csv.writer(myfile,quoting=csv.QUOTE_ALL) - # wr.writerow(size_data) - # pretrain_HR=get_pretrain(currentAnnotationIteration,'/HR/',dirs) - - modeldir_HR = dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration+1) + '/HR/' - - - ##### HIGH REZ ARGS ##### - dirs['outDirAIHR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/HR/regions/' - dirs['outDirAMHR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/HR/masks/' - - - numImagesHR=len(glob.glob(dirs['outDirAIHR'] + '*' + dirs['imExt'])) - - numStepsHR=(args.epoch_HR*numImagesHR)/ args.CNNbatch_sizeHR - - - #----------------------------------------------------------------------------------------- - # os.environ["CUDA_VISIBLE_DEVICES"]='0' - os.environ["CUDA_VISIBLE_DEVICES"]=str(args.gpu) - # img_dir='/hdd/bg/Detectron2/chop_detectron/Permanent/HR' + usable_slides_val=get_slide_data(args, wsi_directory=dirs['val_data_dir']) - img_dir=dirs['outDirAIHR'] - classnames=['Background','BD','A'] - isthing=[0,1,1] - xml_color = [[0,255,0], [0,255,255], [0,0,255]] + usable_idx_val=range(0,len(usable_slides_val)) + slide_idxs_val=random.choices(usable_idx_val,k=int(args.batch_size*args.train_steps/100)) + image_coordinates_val=get_random_chops(slide_idxs_val,usable_slides_val,region_size) - rand_sample=True - json_file=img_dir+'/detectron_train.json' - HAIL2Detectron(img_dir,rand_sample,json_file,classnames,isthing,xml_color) - tc=['BD','AT'] - sc=['I','B'] - #### From json - DatasetCatalog.register("my_dataset", lambda:samples_from_json(json_file,rand_sample)) - MetadataCatalog.get("my_dataset").set(thing_classes=tc) - MetadataCatalog.get("my_dataset").set(stuff_classes=sc) + - seg_metadata=MetadataCatalog.get("my_dataset") - - - # new_list = DatasetCatalog.get("my_dataset") - # print(len(new_list)) - # for d in random.sample(new_list, 100): - # - # img = cv2.imread(d["file_name"]) - # visualizer = Visualizer(img[:, :, ::-1],metadata=seg_metadata, scale=0.5) - # out = visualizer.draw_dataset_dict(d) - # cv2.namedWindow("output", cv2.WINDOW_NORMAL) - # cv2.imshow("output",out.get_image()[:, :, ::-1]) - # cv2.waitKey(0) # waits until a key is pressed - # cv2.destroyAllWindows() - # exit() + _ = os.system("printf '\nTraining starts...\n'") cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")) cfg.DATASETS.TRAIN = ("my_dataset") - cfg.DATASETS.TEST = () + + cfg.TEST.EVAL_PERIOD=args.eval_period + cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.50 num_cores = multiprocessing.cpu_count() - cfg.DATALOADER.NUM_WORKERS = num_cores-3 - # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml") # Let training initialize from model zoo + cfg.DATALOADER.NUM_WORKERS = args.num_workers - cfg.MODEL.WEIGHTS = os.path.join('/hdd/bg/Detectron2/HAIL_Detectron2/liver/MODELS/0/HR', "model_final.pth") + if args.init_modelfile: + cfg.MODEL.WEIGHTS = args.init_modelfile + else: + cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml") # Let training initialize from model zoo - cfg.SOLVER.IMS_PER_BATCH = 10 + cfg.SOLVER.IMS_PER_BATCH = args.batch_size - # cfg.SOLVER.BASE_LR = 0.02 # pick a good LR - # cfg.SOLVER.LR_policy='steps_with_lrs' - # cfg.SOLVER.MAX_ITER = 50000 - # cfg.SOLVER.STEPS = [30000,40000] - # # cfg.SOLVER.STEPS = [] - # cfg.SOLVER.LRS = [0.002,0.0002] - cfg.SOLVER.BASE_LR = 0.002 # pick a good LR cfg.SOLVER.LR_policy='steps_with_lrs' - cfg.SOLVER.MAX_ITER = 200000 - cfg.SOLVER.STEPS = [150000,180000] - # cfg.SOLVER.STEPS = [] - cfg.SOLVER.LRS = [0.0002,0.00002] - - # cfg.INPUT.CROP.ENABLED = True - # cfg.INPUT.CROP.TYPE='absolute' - # cfg.INPUT.CROP.SIZE=[100,100] - cfg.MODEL.BACKBONE.FREEZE_AT = 0 - # cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[4],[8],[16], [32], [64], [64], [64]] - # cfg.MODEL.RPN.IN_FEATURES = ['p2', 'p2', 'p2', 'p3','p4','p5','p6'] - cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 1.0, 2.0, 3.0]] + cfg.SOLVER.MAX_ITER = args.train_steps + cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR + cfg.SOLVER.LRS = [0.000025,0.0000025] + cfg.SOLVER.STEPS = [70000,90000] + cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32],[64],[128], [256], [512], [1024]] + cfg.MODEL.RPN.IN_FEATURES = ['p2', 'p3', 'p4', 'p5','p6','p6'] + cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[.1,.2,0.33, 0.5, 1.0, 2.0, 3.0,5,10]] cfg.MODEL.ANCHOR_GENERATOR.ANGLES=[-90,-60,-30,0,30,60,90] - - cfg.MODEL.RPN.POSITIVE_FRACTION = 0.75 - cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(tc) cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES =len(sc) - - - # cfg.INPUT.CROP.ENABLED = True - # cfg.INPUT.CROP.TYPE='absolute' - # cfg.INPUT.CROP.SIZE=[64,64] - - cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 # faster, and good enough for this toy dataset (default: 512) - # cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets) - + cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64 cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS=False - cfg.INPUT.MIN_SIZE_TRAIN=0 - # cfg.INPUT.MAX_SIZE_TRAIN=500 - - # exit() - os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) - with open(cfg.OUTPUT_DIR+"/config_record.yaml", "w") as f: - f.write(cfg.dump()) # save config to file - trainer = DefaultTrainer(cfg) - trainer.resume_or_load(resume=False) - trainer.train() - - cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") # path to the model we just trained - cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 # set a custom testing threshold - cfg.TEST.DETECTIONS_PER_IMAGE = 500 - # - # cfg.INPUT.MIN_SIZE_TRAIN=64 - # cfg.INPUT.MAX_SIZE_TRAIN=4000 - cfg.INPUT.MIN_SIZE_TEST=64 - cfg.INPUT.MAX_SIZE_TEST=500 - - - predict_samples=100 - predictor = DefaultPredictor(cfg) - - dataset_dicts = samples_from_json_mini(json_file,predict_samples) - iter=0 - if not os.path.exists(os.getcwd()+'/network_predictions/'): - os.mkdir(os.getcwd()+'/network_predictions/') - for d in random.sample(dataset_dicts, predict_samples): - # print(d["file_name"]) - # imclass=d["file_name"].split('/')[-1].split('_')[-5].split(' ')[-1] - # if imclass in ["TRI","HE"]: - im = cv2.imread(d["file_name"]) - panoptic_seg, segments_info = predictor(im)["panoptic_seg"] # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format - # print(segments_info) - # plt.imshow(panoptic_seg.to("cpu")) - # plt.show() - v = Visualizer(im[:, :, ::-1], seg_metadata, scale=1.2) - v = v.draw_panoptic_seg_predictions(panoptic_seg.to("cpu"), segments_info) - # panoptic segmentation result - # plt.ion() - plt.subplot(121) - plt.imshow(im[:, :, ::-1]) - plt.subplot(122) - plt.imshow(v.get_image()) - plt.savefig(f"./network_predictions/input_{iter}.jpg",dpi=300) - plt.show() - # plt.ioff() - - - # v = Visualizer(im[:, :, ::-1], - # metadata=seg_metadata, - # scale=0.5, - # ) - # out = v.draw_panoptic_seg_predictions(panoptic_seg.to("cpu"),segments_info) - - # imsave('./network_predictions/pred'+str(iter)+'.png',np.hstack((im,v.get_image()))) - iter=iter+1 - # cv2.imshow('',out.get_image()[:, :, ::-1]) - # cv2.waitKey(0) # waits until a key is pressed - # cv2.destroyAllWindows() - #----------------------------------------------------------------------------------------- - - finish_model_generation(dirs,currentAnnotationIteration) - - print('\n\n\033[92;5mPlease place new wsi file(s) in: \n\t' + dirs['basedir'] + dirs['project']+ dirs['training_data_dir'] + str(currentAnnotationIteration+1)) - print('\nthen run [--option predict]\033[0m\n') - - - - -def moveimages(startfolder,endfolder): - filelist=glob.glob(startfolder + '*') - for file in filelist: - fileID=file.split('/')[-1] - move(file,endfolder + fileID) - - -def check_model_generation(dirs): - modelsCurrent=os.listdir(dirs['basedir'] + dirs['project'] + dirs['modeldir']) - gens=map(int,modelsCurrent) - modelOrder=np.sort(gens)[::-1] - - for idx in modelOrder: - #modelsChkptsLR=glob.glob(dirs['basedir'] + dirs['project'] + dirs['modeldir']+str(modelsCurrent[idx]) + '/LR/*.ckpt*') - modelsChkptsHR=glob.glob(dirs['basedir'] + dirs['project'] + dirs['modeldir']+ str(idx) +'/HR/*.ckpt*') - if modelsChkptsHR == []: - continue - else: - return idx - break - -def finish_model_generation(dirs,currentAnnotationIteration): - make_folder(dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration + 1)) + cfg.INPUT.MIN_SIZE_TRAIN=args.boxSize + cfg.INPUT.MAX_SIZE_TRAIN=args.boxSize + + cfg.OUTPUT_DIR = args.base_dir+"/output" -def get_pretrain(currentAnnotationIteration,res,dirs): - - if currentAnnotationIteration==0: - pretrain_file = glob.glob(dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration) + res + '*') - pretrain_file=pretrain_file[0].split('.')[0] + '.' + pretrain_file[0].split('.')[1] - - else: - pretrains=glob.glob(dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration) + res + 'model*') - maxmodel=0 - for modelfiles in pretrains: - modelID=modelfiles.split('.')[-2].split('-')[1] - if int(modelID)>maxmodel: - maxmodel=int(modelID) - pretrain_file=dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration) + res + 'model.ckpt-' + str(maxmodel) - return pretrain_file -def restart_line(): # for printing chopped image labels in command line - sys.stdout.write('\r') - sys.stdout.flush() + def real_data(args,image_coordinates_val): -def file_len(fname): # get txt file length (number of lines) - with open(fname) as f: - for i, l in enumerate(f): - pass - return i + 1 -def make_folder(directory): - if not os.path.exists(directory): - os.makedirs(directory) # make directory if it does not exit already # make new directory # Check if folder exists, if not make it + all_list=[] + for one in train_samples_from_WSI(args,image_coordinates_val): + dataset_dict = one + c=dataset_dict['coordinates'] + h=dataset_dict['height'] + w=dataset_dict['width'] + maskData=xml_to_mask(dataset_dict['xml_loc'], c, [h,w]) + dataset_dict['annotations'] = mask2polygons(maskData) + all_list.append(dataset_dict) -def make_all_folders(dirs): + return all_list + DatasetCatalog.register("my_dataset_val", lambda:real_data(args,image_coordinates_val)) + MetadataCatalog.get("my_dataset_val").set(thing_classes=tc) + MetadataCatalog.get("my_dataset_val").set(stuff_classes=sc) + + cfg.DATASETS.TEST = ("my_dataset_val",) - make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/regions') - make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/masks') - - make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/Augment' +'/regions') - make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/Augment' +'/masks') - - make_folder(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/regions') - make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirHR'] + '/masks') - - make_folder(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/Augment' +'/regions') - make_folder(dirs['basedir']+dirs['project']+ dirs['tempdirHR'] + '/Augment' +'/masks') - - make_folder(dirs['basedir'] +dirs['project']+ dirs['modeldir']) - make_folder(dirs['basedir'] +dirs['project']+ dirs['training_data_dir']) - - - make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/LR/'+ 'regions/') - make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/LR/'+ 'masks/') - make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/HR/'+ 'regions/') - make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/HR/'+ 'masks/') - - make_folder(dirs['basedir'] +dirs['project']+ dirs['training_data_dir']) - - make_folder(dirs['basedir'] + '/Codes/Deeplab_network/datasetLR') - make_folder(dirs['basedir'] + '/Codes/Deeplab_network/datasetHR') + + + #os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) + with open(cfg.OUTPUT_DIR+"/config_record.yaml", "w") as f: + f.write(cfg.dump()) # save config to file -def return_region(args, wsi_mask, wsiID, fileID, yStart, xStart, idxy, idxx, downsampleRate, outdirT, region_size, dirs, chop_regions,classNum_HR): # perform cutting in parallel - sys.stdout.write(' <'+str(xStart)+'/'+ str(yStart)+'/'+str(chop_regions[idxy,idxx] != 0)+ '> ') - sys.stdout.flush() - restart_line() - if chop_regions[idxy,idxx] != 0: + trainer = Trainer(cfg) + print('check and see') + trainer.resume_or_load(resume=False) + trainer.train() - uniqID=fileID + str(yStart) + str(xStart) - if wsiID.split('.')[-1] != 'tif': - slide=getWsi(wsiID) - Im=np.array(slide.read_region((xStart,yStart),0,(region_size,region_size))) - Im=Im[:,:,:3] - else: - yEnd = yStart + region_size - xEnd = xStart + region_size - Im = np.zeros([region_size,region_size,3], dtype=np.uint8) - Im_ = imread(wsiID)[yStart:yEnd, xStart:xEnd, :3] - Im[0:Im_.shape[0], 0:Im_.shape[1], :] = Im_ - - mask_annotation=wsi_mask[yStart:yStart+region_size,xStart:xStart+region_size] - - o1,o2=mask_annotation.shape - if o1 !=region_size: - mask_annotation=np.pad(mask_annotation,((0,region_size-o1),(0,0)),mode='constant') - if o2 !=region_size: - mask_annotation=np.pad(mask_annotation,((0,0),(0,region_size-o2)),mode='constant') - - ''' - if 4 in np.unique(mask_annotation): - plt.subplot(121) - plt.imshow(mask_annotation*20) - plt.subplot(122) - plt.imshow(Im) - pt=[xStart,yStart] - plt.title(pt) - plt.show() - ''' - if downsampleRate !=1: - c=(Im.shape) - s1=int(c[0]/(downsampleRate**.5)) - s2=int(c[1]/(downsampleRate**.5)) - Im=resize(Im,(s1,s2),mode='reflect') - - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - imsave(outdirT + '/regions/' + uniqID + dirs['imExt'],Im) - imsave(outdirT + '/masks/' + uniqID +dirs['maskExt'],mask_annotation) - - -def regions_in_mask(root, bounds, verbose=1): - # find regions to save - IDs_reg = [] - IDs_points = [] - - for Annotation in root.findall("./Annotation"): # for all annotations - annotationID = Annotation.attrib['Id'] - annotationType = Annotation.attrib['Type'] - - # print(Annotation.findall(./)) - if annotationType =='9': - for element in Annotation.iter('InputAnnotationId'): - pointAnnotationID=element.text - - for Region in Annotation.findall("./*/Region"): # iterate on all region - - for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region - # get points - x_point = np.int32(np.float64(Vertex.attrib['X'])) - y_point = np.int32(np.float64(Vertex.attrib['Y'])) - # test if points are in bounds - if bounds['x_min'] <= x_point <= bounds['x_max'] and bounds['y_min'] <= y_point <= bounds['y_max']: # test points in region bounds - # save region Id - IDs_points.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID,'pointAnnotationID':pointAnnotationID}) - break - elif annotationType=='4': - - for Region in Annotation.findall("./*/Region"): # iterate on all region - - for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region - # get points - x_point = np.int32(np.float64(Vertex.attrib['X'])) - y_point = np.int32(np.float64(Vertex.attrib['Y'])) - # test if points are in bounds - if bounds['x_min'] <= x_point <= bounds['x_max'] and bounds['y_min'] <= y_point <= bounds['y_max']: # test points in region bounds - # save region Id - IDs_reg.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID}) - break - return IDs_reg,IDs_points - - -def get_vertex_points(root, IDs_reg,IDs_points, maskModes,excludedIDs,negativeIDs=None): - Regions = [] - Points = [] - - for ID in IDs_reg: - Vertices = [] - if ID['annotationID'] not in excludedIDs: - for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): - Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) - Regions.append({'Vertices':np.array(Vertices),'annotationID':ID['annotationID']}) - - for ID in IDs_points: - Vertices = [] - for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): - Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) - Points.append({'Vertices':np.array(Vertices),'pointAnnotationID':ID['pointAnnotationID']}) - if 'falsepositive' or 'negative' in maskModes: - assert negativeIDs is not None,'Negatively annotated classes must be provided for negative/falsepositive mask mode' - assert 'falsepositive' and 'negative' not in maskModes, 'Negative and false positive mask modes cannot both be true' - - useableRegions=[] - if 'positive' in maskModes: - for Region in Regions: - regionPath=path.Path(Region['Vertices']) - for Point in Points: - if Region['annotationID'] not in negativeIDs: - if regionPath.contains_point(Point['Vertices'][0]): - Region['pointAnnotationID']=Point['pointAnnotationID'] - useableRegions.append(Region) - - if 'negative' in maskModes: - - for Region in Regions: - regionPath=path.Path(Region['Vertices']) - if Region['annotationID'] in negativeIDs: - if not any([regionPath.contains_point(Point['Vertices'][0]) for Point in Points]): - Region['pointAnnotationID']=Region['annotationID'] - useableRegions.append(Region) - if 'falsepositive' in maskModes: - - for Region in Regions: - regionPath=path.Path(Region['Vertices']) - if Region['annotationID'] in negativeIDs: - if not any([regionPath.contains_point(Point['Vertices'][0]) for Point in Points]): - Region['pointAnnotationID']=0 - useableRegions.append(Region) - - return useableRegions -def chop_suey_bounds(lb,xmlID,box_supervision_layers,wsiID,dirs,args): - tree = ET.parse(xmlID) - root = tree.getroot() - lbVerts=np.array(lb['BoxVerts']) - xMin=min(lbVerts[:,0]) - xMax=max(lbVerts[:,0]) - yMin=min(lbVerts[:,1]) - yMax=max(lbVerts[:,1]) - - # test=np.array(slide.read_region((xMin,yMin),0,(xMax-xMin,yMax-yMin)))[:,:,:3] - - local_bound = {'x_min' : xMin, 'y_min' : yMin, 'x_max' : xMax, 'y_max' : yMax} - IDs_reg,IDs_points = regions_in_mask_dots(root=root, bounds=local_bound,box_layers=box_supervision_layers) - - # find regions in bounds - negativeIDs=['4'] - excludedIDs=['1'] - falsepositiveIDs=['4'] - usableRegions= get_vertex_points_dots(root=root, IDs_reg=IDs_reg,IDs_points=IDs_points,excludedIDs=excludedIDs,maskModes=['falsepositive','positive'],negativeIDs=negativeIDs, - falsepositiveIDs=falsepositiveIDs) - - # image_sizes= - masks_from_points(usableRegions,wsiID,dirs,50,args,[xMin,xMax,yMin,yMax]) -''' -def masks_from_points(root,usableRegions,wsiID,dirs): - pas_img = getWsi(wsiID) - image_sizes=[] - basename=wsiID.split('/')[-1].split('.svs')[0] - - for usableRegion in tqdm(usableRegions): - vertices=usableRegion['Vertices'] - x1=min(vertices[:,0]) - x2=max(vertices[:,0]) - y1=min(vertices[:,1]) - y2=max(vertices[:,1]) - points = np.stack([np.asarray(vertices[:,0]), np.asarray(vertices[:,1])], axis=1) - if (x2-x1)>0 and (y2-y1)>0: - l1=x2-x1 - l2=y2-y1 - xMultiplier=np.ceil((l1)/64) - yMultiplier=np.ceil((l2)/64) - pad1=int(xMultiplier*64-l1) - pad2=int(yMultiplier*64-l2) - - points[:,1] = np.int32(np.round(points[:,1] - y1 )) - points[:,0] = np.int32(np.round(points[:,0] - x1 )) - mask = 2*np.ones([y2-y1,x2-x1], dtype=np.uint8) - if int(usableRegion['pointAnnotationID'])==0: - pass - else: - cv2.fillPoly(mask, [points], int(usableRegion['pointAnnotationID'])-4) - PAS = pas_img.read_region((x1,y1), 0, (x2-x1,y2-y1)) - # print(usableRegion['pointAnnotationID']) - PAS = np.array(PAS)[:,:,0:3] - mask=np.pad( mask,((0,pad2),(0,pad1)),'constant',constant_values=(2,2) ) - PAS=np.pad( PAS,((0,pad2),(0,pad1),(0,0)),'constant',constant_values=(0,0) ) - - image_identifier=basename+'_'.join(['',str(x1),str(y1),str(l1),str(l2)]) - mask_out_name=dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/'+image_identifier+'.png' - image_out_name=mask_out_name.replace('/masks/','/regions/') - # basename + '_' + str(image_identifier) + args.imBoxExt - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - imsave(image_out_name,PAS) - imsave(mask_out_name,mask) - # exit() - # extract image region - # plt.subplot(121) - # plt.imshow(PAS) - # plt.subplot(122) - # plt.imshow(mask) - # plt.show() - # image_sizes.append([x2-x1,y2-y1]) + _ = os.system("printf '\nTraining completed!\n'") + +def mask2polygons(mask): + annotation=[] + presentclasses=np.unique(mask) + offset=-3 + presentclasses=presentclasses[presentclasses>2] + presentclasses=list(presentclasses[presentclasses<7]) + for p in presentclasses: + contours, hierarchy = cv2.findContours(np.array(mask==p, dtype='uint8'), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + for contour in contours: + if contour.size>=6: + instance_dict={} + contour_flat=contour.flatten().astype('float').tolist() + xMin=min(contour_flat[::2]) + yMin=min(contour_flat[1::2]) + xMax=max(contour_flat[::2]) + yMax=max(contour_flat[1::2]) + instance_dict['bbox']=[xMin,yMin,xMax,yMax] + instance_dict['bbox_mode']=BoxMode.XYXY_ABS.value + instance_dict['category_id']=p+offset + instance_dict['segmentation']=[contour_flat] + annotation.append(instance_dict) + return annotation + + +class Trainer(DefaultTrainer): + + @classmethod + def build_test_loader(cls, cfg, dataset_name): + return build_detection_test_loader(cfg, dataset_name, mapper=CustomDatasetMapper(cfg, True)) + + @classmethod + def build_train_loader(cls, cfg): + return build_detection_train_loader(cfg, mapper=CustomDatasetMapper(cfg, True)) + + @classmethod + def build_evaluator(cls, cfg, dataset_name, output_folder=None): + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + return COCOEvaluator(dataset_name, cfg, True, output_folder) + + def build_hooks(self): + hooks = super().build_hooks() + hooks.insert(-1,LossEvalHook( + self.cfg.TEST.EVAL_PERIOD, + self.model, + build_detection_test_loader( + self.cfg, + self.cfg.DATASETS.TEST[0], + CustomDatasetMapper(self.cfg, True) + ) + )) + return hooks + + +class CustomDatasetMapper: + + @configurable + def __init__( + self, + is_train: bool, + *, + augmentations: List[Union[T.Augmentation, T.Transform]], + image_format: str, + use_instance_mask: bool = False, + use_keypoint: bool = False, + instance_mask_format: str = "polygon", + keypoint_hflip_indices: Optional[np.ndarray] = None, + precomputed_proposal_topk: Optional[int] = None, + recompute_boxes: bool = False, + ): + """ + NOTE: this interface is experimental. + + Args: + is_train: whether it's used in training or inference + augmentations: a list of augmentations or deterministic transforms to apply + image_format: an image format supported by :func:`detection_utils.read_image`. + use_instance_mask: whether to process instance segmentation annotations, if available + use_keypoint: whether to process keypoint annotations if available + instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation + masks into this format. + keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices` + precomputed_proposal_topk: if given, will load pre-computed + proposals from dataset_dict and keep the top k proposals for each image. + recompute_boxes: whether to overwrite bounding box annotations + by computing tight bounding boxes from instance mask annotations. + """ + if recompute_boxes: + assert use_instance_mask, "recompute_boxes requires instance masks" + # fmt: off + self.is_train = is_train + self.augmentations = T.AugmentationList(augmentations) + self.image_format = image_format + self.use_instance_mask = use_instance_mask + self.instance_mask_format = instance_mask_format + self.use_keypoint = use_keypoint + self.keypoint_hflip_indices = keypoint_hflip_indices + self.proposal_topk = precomputed_proposal_topk + self.recompute_boxes = recompute_boxes + # fmt: on + logger = logging.getLogger(__name__) + mode = "training" if is_train else "inference" + logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}") + + @classmethod + def from_config(cls, cfg, is_train: bool = True): + augs = utils.build_augmentation(cfg, is_train) + if cfg.INPUT.CROP.ENABLED and is_train: + augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) + recompute_boxes = cfg.MODEL.MASK_ON else: - print('Broken region') - return image_sizes -''' + recompute_boxes = False + + ret = { + "is_train": is_train, + "augmentations": augs, + "image_format": cfg.INPUT.FORMAT, + "use_instance_mask": cfg.MODEL.MASK_ON, + "instance_mask_format": cfg.INPUT.MASK_FORMAT, + "use_keypoint": cfg.MODEL.KEYPOINT_ON, + "recompute_boxes": recompute_boxes, + } + + if cfg.MODEL.KEYPOINT_ON: + ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) + + if cfg.MODEL.LOAD_PROPOSALS: + ret["precomputed_proposal_topk"] = ( + cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN + if is_train + else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST + ) + return ret + + def _transform_annotations(self, dataset_dict, transforms, image_shape): + # USER: Modify this if you want to keep them for some reason. + for anno in dataset_dict["annotations"]: + if not self.use_instance_mask: + anno.pop("segmentation", None) + if not self.use_keypoint: + anno.pop("keypoints", None) + + annos = [ + utils.transform_instance_annotations( + obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices + ) + for obj in dataset_dict.pop('annotations') + if obj.get("iscrowd", 0) == 0 + ] + instances = utils.annotations_to_instances( + annos, image_shape, mask_format=self.instance_mask_format + ) + + if self.recompute_boxes: + instances.gt_boxes = instances.gt_masks.get_bounding_boxes() + dataset_dict["instances"] = utils.filter_empty_instances(instances) + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + c=dataset_dict['coordinates'] + h=dataset_dict['height'] + w=dataset_dict['width'] + + slide= TiffSlide(dataset_dict['slide_loc']) + image=np.array(slide.read_region((c[0],c[1]),0,(h,w)))[:,:,:3] + slide.close() + maskData=xml_to_mask(dataset_dict['xml_loc'], c, [h,w]) + + if random.random()>0.5: + hShift=np.random.normal(0,0.05) + lShift=np.random.normal(1,0.025) + # imageblock[im]=randomHSVshift(imageblock[im],hShift,lShift) + image=rgb2hsv(image) + image[:,:,0]=(image[:,:,0]+hShift) + image=hsv2rgb(image) + image=rgb2lab(image) + image[:,:,0]=exposure.adjust_gamma(image[:,:,0],lShift) + image=(lab2rgb(image)*255).astype('uint8') + image = seq(images=[image])[0].squeeze() + + dataset_dict['annotations']=mask2polygons(maskData) + utils.check_image_size(dataset_dict, image) + + sem_seg_gt = maskData + sem_seg_gt[sem_seg_gt>2]=0 + sem_seg_gt[maskData==0] = 3 + sem_seg_gt=np.array(sem_seg_gt).astype('uint8') + aug_input = T.AugInput(image, sem_seg=sem_seg_gt) + transforms = self.augmentations(aug_input) + image, sem_seg_gt = aug_input.image, aug_input.sem_seg + + image_shape = image.shape[:2] # h, w + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + if sem_seg_gt is not None: + dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) + + if "annotations" in dataset_dict: + self._transform_annotations(dataset_dict, transforms, image_shape) + + + return dataset_dict \ No newline at end of file diff --git a/multic/segmentationschool/Codes/wsi_loader_utils.py b/multic/segmentationschool/Codes/wsi_loader_utils.py index ea87ae6..90a98f8 100644 --- a/multic/segmentationschool/Codes/wsi_loader_utils.py +++ b/multic/segmentationschool/Codes/wsi_loader_utils.py @@ -1,85 +1,103 @@ -import openslide,glob,os +import glob, os import numpy as np from scipy.ndimage.morphology import binary_fill_holes -import matplotlib.pyplot as plt from skimage.color import rgb2hsv from skimage.filters import gaussian -# from skimage.morphology import binary_dilation, diamond -# import cv2 from tqdm import tqdm from skimage.io import imread,imsave import multiprocessing from joblib import Parallel, delayed +from shapely.geometry import Polygon +from tiffslide import TiffSlide +import random +import glob +import warnings +from joblib import Parallel, delayed +import multiprocessing +from .xml_to_mask_minmax import write_minmax_to_xml +import lxml.etree as ET -def save_thumb(args,slide_loc): - print(slide_loc) - slideID,slideExt=os.path.splitext(slide_loc.split('/')[-1]) - slide=openslide.OpenSlide(slide_loc) - if slideExt =='.scn': - dim_x=int(slide.properties['openslide.bounds-width'])## add to columns - dim_y=int(slide.properties['openslide.bounds-height'])## add to rows - offsetx=int(slide.properties['openslide.bounds-x'])##start column - offsety=int(slide.properties['openslide.bounds-y'])##start row - elif slideExt in ['.ndpi','.svs']: - dim_x, dim_y=slide.dimensions - offsetx=0 - offsety=0 +MPP = {'V42D20-364_XY01_2235505.svs':0.25, + 'V42D20-364_XY04_2240610.svs':0.25, + 'V42N07-339_XY04_F44.svs':0.25, + 'V42N07-395_XY01_235142.svs':0.25, + 'V42N07-395_XY04_235582.svs':0.25, + 'V42N07-399_XY01_3723.svs':0.25, + 'XY01_IU-21-015F.svs':0.50, + 'XY02_IU-21-016F.svs':0.50, + 'XY03_IU-21-019F.svs':0.50, + 'XY04_IU-21-020F.svs':0.50} - # fullSize=slide.level_dimensions[0] - # resRatio= args.chop_thumbnail_resolution - # ds_1=fullSize[0]/resRatio - # ds_2=fullSize[1]/resRatio - # thumbIm=np.array(slide.get_thumbnail((ds_1,ds_2))) - # if slideExt =='.scn': - # xStt=int(offsetx/resRatio) - # xStp=int((offsetx+dim_x)/resRatio) - # yStt=int(offsety/resRatio) - # yStp=int((offsety+dim_y)/resRatio) - # thumbIm=thumbIm[yStt:yStp,xStt:xStp] - # imsave(slide_loc.replace(slideExt,'_thumb.jpeg'),thumbIm) - slide.associated_images['label'].save(slide_loc.replace(slideExt,'_label.png')) - # imsave(slide_loc.replace(slideExt,'_label.png'),slide.associated_images['label']) - - -def get_image_thumbnails(args): - assert args.target is not None, 'Location of images must be provided' - all_slides=[] - for ext in args.wsi_ext.split(','): - all_slides.extend(glob.glob(args.target+'/*'+ext)) - Parallel(n_jobs=multiprocessing.cpu_count())(delayed(save_thumb)(args,slide_loc) for slide_loc in tqdm(all_slides)) - # for slide_loc in tqdm(all_slides): - -class WSIPredictLoader(): - def __init__(self,args, wsi_directory=None, transform=None): - assert wsi_directory is not None, 'location of training svs and xml must be provided' - mask_out_loc=os.path.join(wsi_directory.replace('/TRAINING_data/0','Permanent/Tissue_masks/'),) - if not os.path.exists(mask_out_loc): - os.makedirs(mask_out_loc) - all_slides=[] - for ext in args.wsi_ext.split(','): - all_slides.extend(glob.glob(wsi_directory+'/*'+ext)) - print('Getting slide metadata and usable regions...') - usable_slides=[] - for slide_loc in all_slides: - slideID,slideExt=os.path.splitext(slide_loc.split('/')[-1]) - print("working slide... "+ slideID,end='\r') - - slide=openslide.OpenSlide(slide_loc) - chop_array=get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc) - mag_x=np.round(float(slide.properties['openslide.mpp-x']),2) - mag_y=np.round(float(slide.properties['openslide.mpp-y']),2) - print(mag_x,mag_y) - usable_slides.append({'slide_loc':slide_loc,'slideID':slideID,'slideExt':slideExt,'slide':slide, - 'chop_array':chop_array,'mag':[mag_x,mag_y]}) - self.usable_slides= usable_slides - self.boxSize40X = args.boxSize - self.boxSize20X = int(args.boxSize)/2 - -class WSITrainingLoader(): - def __init__(self,args, wsi_directory=None, transform=None): +def get_image_meta(i,args): + image_annotation_info={} + image_annotation_info['slide_loc']=i[0] + slide=TiffSlide(image_annotation_info['slide_loc']) + magx=MPP[image_annotation_info['slide_loc'].split('/')[-1]]#np.round(float(slide.properties['tiffslide.mpp-x']),2) + magy=MPP[image_annotation_info['slide_loc'].split('/')[-1]]#np.round(float(slide.properties['tiffslide.mpp-y']),2) + + assert magx == magy + if magx ==0.25: + dx=args.boxSize + dy=args.boxSize + elif magx == 0.5: + dx=int(args.boxSize/2) + dy=int(args.boxSize/2) + else: + print('nonstandard image magnification') + print(slide) + print(magx,magy) + exit() + + image_annotation_info['coordinates']=[i[2][1],i[2][0]] + image_annotation_info['height']=dx + image_annotation_info['width']=dy + image_annotation_info['image_id']=i[1].split('/')[-1].replace('.xml','_'.join(['',str(i[2][1]),str(i[2][0])])) + image_annotation_info['xml_loc']=i[1] + image_annotation_info['file_name']=i[1].split('/')[-1] + slide.close() + + return image_annotation_info + +def train_samples_from_WSI(args,image_coordinates): + + + num_cores=multiprocessing.cpu_count() + + print('Generating detectron2 dictionary format...') + data_list=Parallel(n_jobs=num_cores//2,backend='threading')(delayed(get_image_meta)(i=i, + args=args) for i in tqdm(image_coordinates)) + # print(len(data_list),'this is') + # data_list=[] + # for i in tqdm(image_coordinates): + # data_list.append(get_image_meta(i=i,args=args)) + return data_list + +def WSIGridIterator(wsi_name,choppable_regions,index_x,index_y,region_size,dim_x,dim_y): + wsi_name=os.path.splitext(wsi_name.split('/')[-1])[0] + data_list=[] + for idxy, i in tqdm(enumerate(index_y)): + for idxx, j in enumerate(index_x): + if choppable_regions[idxy, idxx] != 0: + yEnd = min(dim_y,i+region_size) + xEnd = min(dim_x,j+region_size) + xLen=xEnd-j + yLen=yEnd-i + + image_annotation_info={} + image_annotation_info['file_name']='_'.join([wsi_name,str(j),str(i),str(xEnd),str(yEnd)]) + image_annotation_info['height']=yLen + image_annotation_info['width']=xLen + image_annotation_info['image_id']=image_annotation_info['file_name'] + image_annotation_info['xStart']=j + image_annotation_info['yStart']=i + data_list.append(image_annotation_info) + return data_list + +def get_slide_data(args, wsi_directory=None): assert wsi_directory is not None, 'location of training svs and xml must be provided' - mask_out_loc=os.path.join(wsi_directory.replace('/TRAINING_data/0','Permanent/Tissue_masks/'),) + + mask_out_loc=os.path.join(wsi_directory, 'Tissue_masks') if not os.path.exists(mask_out_loc): os.makedirs(mask_out_loc) all_slides=[] @@ -90,34 +108,104 @@ def __init__(self,args, wsi_directory=None, transform=None): usable_slides=[] for slide_loc in all_slides: slideID,slideExt=os.path.splitext(slide_loc.split('/')[-1]) - print("working slide... "+ slideID,end='\r') - - slide=openslide.OpenSlide(slide_loc) - chop_array=get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc) - mag_x=np.round(float(slide.properties['openslide.mpp-x']),2) - mag_y=np.round(float(slide.properties['openslide.mpp-y']),2) - print(mag_x,mag_y) - usable_slides.append({'slide_loc':slide_loc,'slideID':slideID,'slideExt':slideExt,'slide':slide, - 'chop_array':chop_array,'mag':[mag_x,mag_y]}) - self.usable_slides= usable_slides - self.boxSize40X = args.boxSize - self.boxSize20X = int(args.boxSize)/2 + xmlpath=slide_loc.replace(slideExt,'.xml') + if os.path.isfile(xmlpath): + write_minmax_to_xml(xmlpath) + + print("Gathering slide data ... "+ slideID,end='\r') + slide =TiffSlide(slide_loc) + chop_array=get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc) + + mag_x=MPP[slideID+slideExt]#np.round(float(slide.properties['tiffslide.mpp-x']),2) + mag_y=MPP[slideID+slideExt]#np.round(float(slide.properties['tiffslide.mpp-y']),2) + slide.close() + tree = ET.parse(xmlpath) + root = tree.getroot() + balance_classes=args.balanceClasses.split(',') + classNums={} + for b in balance_classes: + classNums[b]=0 + # balance_annotations={} + for Annotation in root.findall("./Annotation"): + + annotationID = Annotation.attrib['Id'] + if annotationID=='7': + print(xmlpath) + exit() + if annotationID in classNums.keys(): + + classNums[annotationID]=len(Annotation.findall("./*/Region")) + else: + pass + + usable_slides.append({'slide_loc':slide_loc,'slideID':slideID, + 'chop_array':chop_array,'num_regions':len(chop_array),'mag':[mag_x,mag_y], + 'xml_loc':xmlpath,'annotations':classNums,'root':root + }) + else: + print('\n') + print('no annotation XML file found for:') + print(slideID) + exit() print('\n') + return usable_slides + +def get_random_chops(slide_idx,usable_slides,region_size): + # chops=[] + choplen=len(slide_idx) + chops=Parallel(n_jobs=multiprocessing.cpu_count(),backend='threading')(delayed(get_chop_data)(idx=idx, + usable_slides=usable_slides,region_size=region_size) for idx in tqdm(slide_idx)) + return chops + + +def get_chop_data(idx,usable_slides,region_size): + if random.random()>0.5: + randSelect=random.randrange(0,usable_slides[idx]['num_regions']) + chopData=[usable_slides[idx]['slide_loc'],usable_slides[idx]['xml_loc'], + usable_slides[idx]['chop_array'][randSelect]] + else: + # print(list(usable_slides[idx]['annotations'].values())) + if sum(usable_slides[idx]['annotations'].values())==0: + randSelect=random.randrange(0,usable_slides[idx]['num_regions']) + chopData=[usable_slides[idx]['slide_loc'],usable_slides[idx]['xml_loc'], + usable_slides[idx]['chop_array'][randSelect]] + else: + classIDs=list(usable_slides[idx]['annotations'].keys()) + classSamples=random.sample(classIDs,len(classIDs)) + for c in classSamples: + if usable_slides[idx]['annotations'][c]==0 or c == '5': + continue + else: + sampledRegionID=random.choice([r.attrib['Id'] for r in usable_slides[idx]['root'].find("./Annotation[@Id='{}']/Regions".format(c)).findall('Region')]) + #sampledRegionID=random.randrange(1,usable_slides[idx]['annotations'][c]+1) + break + + + Verts = usable_slides[idx]['root'].findall("./Annotation[@Id='{}']/Regions/Region[@Id='{}']/Vertices/Vertex".format(c,sampledRegionID)) + + centroid = (Polygon([(int(float(k.attrib['X'])),int(float(k.attrib['Y']))) for k in Verts]).centroid) + + randVertX=int(centroid.x)-region_size//2 + randVertY=int(centroid.y)-region_size//2 + + chopData=[usable_slides[idx]['slide_loc'],usable_slides[idx]['xml_loc'], + [randVertY,randVertX]] + + + return chopData def get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc): slide_regions=[] choppable_regions_list=[] - downsample = int(args.downsampleRate**.5) #down sample for each dimension region_size = int(args.boxSize*(downsample)) #Region size before downsampling - step = int(region_size*(1-args.overlap_percent)) #Step size before downsampling - + step = int(region_size*(1-args.overlap_rate)) #Step size before downsampling if slideExt =='.scn': - dim_x=int(slide.properties['openslide.bounds-width'])## add to columns - dim_y=int(slide.properties['openslide.bounds-height'])## add to rows - offsetx=int(slide.properties['openslide.bounds-x'])##start column - offsety=int(slide.properties['openslide.bounds-y'])##start row + dim_x=int(slide.properties['tiffslide.bounds-width'])## add to columns + dim_y=int(slide.properties['tiffslide.bounds-height'])## add to rows + offsetx=int(slide.properties['tiffslide.bounds-x'])##start column + offsety=int(slide.properties['tiffslide.bounds-y'])##start row index_y=np.array(range(offsety,offsety+dim_y,step)) index_x=np.array(range(offsetx,offsetx+dim_x,step)) index_y[-1]=(offsety+dim_y)-step @@ -135,7 +223,9 @@ def get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc): resRatio= args.chop_thumbnail_resolution ds_1=fullSize[0]/resRatio ds_2=fullSize[1]/resRatio - if args.get_new_tissue_masks: + out_mask_name=os.path.join(mask_out_loc,'_'.join([slideID,slideExt[1:]+'.png'])) + if not os.path.isfile(out_mask_name) or args.get_new_tissue_masks: + print(out_mask_name) thumbIm=np.array(slide.get_thumbnail((ds_1,ds_2))) if slideExt =='.scn': xStt=int(offsetx/resRatio) @@ -143,40 +233,18 @@ def get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc): yStt=int(offsety/resRatio) yStp=int((offsety+dim_y)/resRatio) thumbIm=thumbIm[yStt:yStp,xStt:xStp] - # plt.imshow(thumbIm) - # plt.show() - # input() - # plt.imshow(thumbIm) - # plt.show() - - out_mask_name=os.path.join(mask_out_loc,'_'.join([slideID,slideExt[1:]+'.png'])) - - - if not args.get_new_tissue_masks: - try: - binary=(imread(out_mask_name)/255).astype('bool') - except: - print('failed to load mask for '+ out_mask_name) - print('please set get_new_tissue masks to True') - exit() - # if slideExt =='.scn': - # choppable_regions=np.zeros((len(index_x),len(index_y))) - # elif slideExt in ['.ndpi','.svs']: choppable_regions=np.zeros((len(index_y),len(index_x))) - else: - print(out_mask_name) - # if slideExt =='.scn': - # choppable_regions=np.zeros((len(index_x),len(index_y))) - # elif slideExt in ['.ndpi','.svs']: - choppable_regions=np.zeros((len(index_y),len(index_x))) - hsv=rgb2hsv(thumbIm) g=gaussian(hsv[:,:,1],5) binary=(g>0.05).astype('bool') binary=binary_fill_holes(binary) imsave(out_mask_name.replace('.png','.jpeg'),thumbIm) - imsave(out_mask_name,binary.astype('uint8')*255) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + imsave(out_mask_name,binary.astype('uint8')*255) + binary=(imread(out_mask_name)/255).astype('bool') + choppable_regions=np.zeros((len(index_y),len(index_x))) chop_list=[] for idxy,yi in enumerate(index_y): for idxx,xj in enumerate(index_x): @@ -185,31 +253,13 @@ def get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc): xStart = int(np.round((xj-offsetx)/resRatio)) xStop = int(np.round(((xj-offsetx)+args.boxSize)/resRatio)) box_total=(xStop-xStart)*(yStop-yStart) - if slideExt =='.scn': - # print(xStart,xStop,yStart,yStop) - # print(np.sum(binary[xStart:xStop,yStart:yStop]),args.white_percent,box_total) - # plt.imshow(binary[xStart:xStop,yStart:yStop]) - # plt.show() - if np.sum(binary[yStart:yStop,xStart:xStop])>(args.white_percent*box_total): - - choppable_regions[idxy,idxx]=1 - chop_list.append([index_y[idxy],index_x[idxx]]) - - elif slideExt in ['.ndpi','.svs']: - if np.sum(binary[yStart:yStop,xStart:xStop])>(args.white_percent*box_total): - choppable_regions[idxy,idxx]=1 - chop_list.append([index_y[idxy],index_x[idxx]]) - - imsave(out_mask_name.replace('.png','_chopregions.png'),choppable_regions.astype('uint8')*255) - - # plt.imshow(choppable_regions) - # plt.show() - # choppable_regions_list.extend(chop_list) - # plt.subplot(131) - # plt.imshow(thumbIm) - # plt.subplot(132) - # plt.imshow(binary) - # plt.subplot(133) - # plt.imshow(choppable_regions) - # plt.show() - return chop_list + + if np.sum(binary[yStart:yStop,xStart:xStop])>(args.white_percent*box_total): + choppable_regions[idxy,idxx]=1 + chop_list.append([index_y[idxy],index_x[idxx]]) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + imsave(out_mask_name.replace('.png','_chopregions.png'),choppable_regions.astype('uint8')*255) + + return chop_list \ No newline at end of file diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py index 50e32c1..a0d5b42 100644 --- a/multic/segmentationschool/segmentation_school.py +++ b/multic/segmentationschool/segmentation_school.py @@ -174,7 +174,7 @@ def savetime(args, starttime): ##### Args for training / prediction #################################################### parser.add_argument('--gpu_num', dest='gpu_num', default=2 ,type=int, help='number of GPUs avalable') - parser.add_argument('--gpu', dest='gpu', default=0 ,type=int, + parser.add_argument('--gpu', dest='gpu', default="" ,type=str, help='GPU to use for prediction') parser.add_argument('--iteration', dest='iteration', default='none' ,type=str, help='Which iteration to use for prediction') @@ -188,6 +188,18 @@ def savetime(args, starttime): help='number of classes present in the High res training data [USE ONLY IF DIFFERENT FROM LOW RES]') parser.add_argument('--modelfile', dest='modelfile', default=None ,type=str, help='the desired model file to use for training or prediction') + parser.add_argument('--init_modelfile', dest='init_modelfile', default=None ,type=str, + help='the desired model file to use for training or prediction') + parser.add_argument('--eval_period', dest='eval_period', default=1000 ,type=int, + help='Validation Period') + parser.add_argument('--batch_size', dest='batch_size', default=4 ,type=int, + help='Size of batches for training high resolution CNN') + parser.add_argument('--train_steps', dest='train_steps', default=1000 ,type=int, + help='Size of batches for training high resolution CNN') + parser.add_argument('--training_data_dir', dest='training_data_dir', default=os.getcwd(),type=str, + help='Training Data Folder') + parser.add_argument('--overlap_rate', dest='overlap_rate', default=0.5 ,type=float, + help='overlap percentage of high resolution blocks [0-1]') ### Params for cutting wsi ### #White level cutoff @@ -202,10 +214,14 @@ def savetime(args, starttime): help='size of low resolution blocks') parser.add_argument('--downsampleRateLR', dest='downsampleRateLR', default=16 ,type=int, help='reduce image resolution to 1/downsample rate') + parser.add_argument('--get_new_tissue_masks', dest='get_new_tissue_masks', default=False,type=str2bool, + help="Don't load usable tisse regions from disk, create new ones") + parser.add_argument('--downsampleRate', dest='downsampleRate', default=1 ,type=int, + help='reduce image resolution to 1/downsample rate') #High resolution parameters parser.add_argument('--overlap_percentHR', dest='overlap_percentHR', default=0 ,type=float, help='overlap percentage of high resolution blocks [0-1]') - parser.add_argument('--boxSize', dest='boxSize', default=2048 ,type=int, + parser.add_argument('--boxSize', dest='boxSize', default=1200 ,type=int, help='size of high resolution blocks') parser.add_argument('--downsampleRateHR', dest='downsampleRateHR', default=1 ,type=int, help='reduce image resolution to 1/downsample rate') @@ -226,7 +242,8 @@ def savetime(args, starttime): help='Gaussian variance defining bounds on Hue shift for HSV color augmentation') parser.add_argument('--lbound', dest='lbound', default=0.025 ,type=float, help='Gaussian variance defining bounds on L* gamma shift for color augmentation [alters brightness/darkness of image]') - + parser.add_argument('--balanceClasses', dest='balanceClasses', default='3,4,5,6',type=str, + help="which classes to balance during training") ### Params for training networks ### #Low resolution hyperparameters parser.add_argument('--CNNbatch_sizeLR', dest='CNNbatch_sizeLR', default=2 ,type=int, @@ -280,6 +297,8 @@ def savetime(args, starttime): help='padded region for low resolution region extraction') parser.add_argument('--show_interstitium', dest='show_interstitium', default=True ,type=str2bool, help='padded region for low resolution region extraction') + parser.add_argument('--num_workers', dest='num_workers', default=1 ,type=int, + help='Number of workers for data loader') diff --git a/multic/segmentationschool/slurm_training.sh b/multic/segmentationschool/slurm_training.sh new file mode 100644 index 0000000..e697617 --- /dev/null +++ b/multic/segmentationschool/slurm_training.sh @@ -0,0 +1,35 @@ +#!/bin/sh +#SBATCH --account=pinaki.sarder +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=10 +#SBATCH --mem-per-cpu=16gb +#SBATCH --partition=gpu +#SBATCH --gpus=a100:2 +#SBATCH --time=72:00:00 +#SBATCH --output=./slurm_log.out +#SBATCH --job-name="multic_training" +echo "SLURM_JOBID="$SLURM_JOBID +echo "SLURM_JOB_NODELIST="$SLURM_JOB_NODELIST +echo "SLURM_NNODES="$SLURM_NNODES +echo "SLURMTMPDIR="$SLURMTMPDIR + +echo "working directory = "$SLURM_SUBMIT_DIR +ulimit -s unlimited +module load singularity +ls +ml + +# Add your userid here: +USER=sayat.mimar +# Add the name of the folder containing WSIs here +PROJECT=multic_segment + +CODESDIR=/blue/pinaki.sarder/sayat.mimar/Multi-Compartment-Segmentation/multic/segmentationschool + +DATADIR=$CODESDIR/TRAINING_data +MODELDIR=$CODESDIR/pretrained_model + +CONTAINER=$CODESDIR/multic_segment.sif +CUDA_LAUNCH_BLOCKING=1 +singularity exec --nv -B $(pwd):/exec/,$DATADIR/:/data,$MODELDIR/:/model/ $CONTAINER python3 /exec/segmentation_school.py --option train --base_dir $CODESDIR --init_modelfile $MODELDIR/model_final.pth --training_data_dir $CODESDIR/TRAINING_data/first --train_steps 100000 --eval_period 25000 --num_workers 10 From 877d07fd2667601b506c198553f5f9bf10636502 Mon Sep 17 00:00:00 2001 From: Sayat Mimar Date: Tue, 28 Nov 2023 13:58:09 -0500 Subject: [PATCH 02/15] add hooks --- .../segmentationschool/Codes/engine/hooks.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 multic/segmentationschool/Codes/engine/hooks.py diff --git a/multic/segmentationschool/Codes/engine/hooks.py b/multic/segmentationschool/Codes/engine/hooks.py new file mode 100644 index 0000000..ec1eaf6 --- /dev/null +++ b/multic/segmentationschool/Codes/engine/hooks.py @@ -0,0 +1,69 @@ +import datetime +import logging +import time +import torch +import numpy as np +import detectron2.utils.comm as comm +from detectron2.utils.logger import log_every_n_seconds +from detectron2.engine.hooks import HookBase + +class LossEvalHook(HookBase): + def __init__(self, eval_period, model, data_loader): + self._model = model + self._period = eval_period + self._data_loader = data_loader + + def _do_loss_eval(self): + # Copying inference_on_dataset from evaluator.py + total = len(self._data_loader) + num_warmup = min(5, total - 1) + + start_time = time.perf_counter() + total_compute_time = 0 + losses = [] + for idx, inputs in enumerate(self._data_loader): + if idx == num_warmup: + start_time = time.perf_counter() + total_compute_time = 0 + start_compute_time = time.perf_counter() + if torch.cuda.is_available(): + torch.cuda.synchronize() + total_compute_time += time.perf_counter() - start_compute_time + iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) + seconds_per_img = total_compute_time / iters_after_start + if idx >= num_warmup * 2 or seconds_per_img > 5: + total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start + eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1))) + log_every_n_seconds( + logging.INFO, + "Loss on Validation done {}/{}. {:.4f} s / img. ETA={}".format( + idx + 1, total, seconds_per_img, str(eta) + ), + n=5, + ) + loss_batch = self._get_loss(inputs) + losses.append(loss_batch) + mean_loss = np.mean(losses) + self.trainer.storage.put_scalar('validation_loss', mean_loss) + comm.synchronize() + + return losses + + def _get_loss(self, data): + # How loss is calculated on train_loop + metrics_dict = self._model(data) + metrics_dict = { + k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) + for k, v in metrics_dict.items() + } + total_losses_reduced = sum(loss for loss in metrics_dict.values()) + return total_losses_reduced + + + def after_step(self): + next_iter = self.trainer.iter + 1 + is_final = next_iter == self.trainer.max_iter + if is_final or (self._period > 0 and next_iter % self._period == 0): + self._do_loss_eval() + self.trainer.storage.put_scalars(timetest=12) + \ No newline at end of file From 705d7e3ece298b244c3ff10de50d289ced09f6c4 Mon Sep 17 00:00:00 2001 From: AnishTatke <53684776+AnishTatke@users.noreply.github.com> Date: Fri, 14 Mar 2025 11:06:15 -0400 Subject: [PATCH 03/15] Working Training Commit --- .gitignore | 4 +++ .../Codes/IterativeTraining_1X.py | 6 ++-- .../Codes/wsi_loader_utils.py | 20 +++-------- multic/segmentationschool/slurm_training.sh | 36 ++++++++++++------- setup.py | 3 +- 5 files changed, 36 insertions(+), 33 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..728e32a --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +multic/segmentationschool/logs/ +multic/segmentationschool/output/ +multic/segmentationschool/*.ipynb +multic/segmentationschool/Codes/__pycache__/ \ No newline at end of file diff --git a/multic/segmentationschool/Codes/IterativeTraining_1X.py b/multic/segmentationschool/Codes/IterativeTraining_1X.py index d7fb5fb..b411dc2 100644 --- a/multic/segmentationschool/Codes/IterativeTraining_1X.py +++ b/multic/segmentationschool/Codes/IterativeTraining_1X.py @@ -155,6 +155,7 @@ def IterateTraining(args): cfg.INPUT.MAX_SIZE_TRAIN=args.boxSize cfg.OUTPUT_DIR = args.base_dir+"/output" + os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) def real_data(args,image_coordinates_val): @@ -178,10 +179,7 @@ def real_data(args,image_coordinates_val): cfg.DATASETS.TEST = ("my_dataset_val",) - - - #os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) - with open(cfg.OUTPUT_DIR+"/config_record.yaml", "w") as f: + with open(cfg.OUTPUT_DIR+"/config_record.yaml", "w+") as f: f.write(cfg.dump()) # save config to file diff --git a/multic/segmentationschool/Codes/wsi_loader_utils.py b/multic/segmentationschool/Codes/wsi_loader_utils.py index 90a98f8..909ba83 100644 --- a/multic/segmentationschool/Codes/wsi_loader_utils.py +++ b/multic/segmentationschool/Codes/wsi_loader_utils.py @@ -17,24 +17,14 @@ from .xml_to_mask_minmax import write_minmax_to_xml import lxml.etree as ET -MPP = {'V42D20-364_XY01_2235505.svs':0.25, - 'V42D20-364_XY04_2240610.svs':0.25, - 'V42N07-339_XY04_F44.svs':0.25, - 'V42N07-395_XY01_235142.svs':0.25, - 'V42N07-395_XY04_235582.svs':0.25, - 'V42N07-399_XY01_3723.svs':0.25, - 'XY01_IU-21-015F.svs':0.50, - 'XY02_IU-21-016F.svs':0.50, - 'XY03_IU-21-019F.svs':0.50, - 'XY04_IU-21-020F.svs':0.50} def get_image_meta(i,args): image_annotation_info={} image_annotation_info['slide_loc']=i[0] slide=TiffSlide(image_annotation_info['slide_loc']) - magx=MPP[image_annotation_info['slide_loc'].split('/')[-1]]#np.round(float(slide.properties['tiffslide.mpp-x']),2) - magy=MPP[image_annotation_info['slide_loc'].split('/')[-1]]#np.round(float(slide.properties['tiffslide.mpp-y']),2) + magx=np.round(float(slide.properties['tiffslide.mpp-x']),2) + magy=np.round(float(slide.properties['tiffslide.mpp-y']),2) assert magx == magy if magx ==0.25: @@ -96,7 +86,6 @@ def WSIGridIterator(wsi_name,choppable_regions,index_x,index_y,region_size,dim_x def get_slide_data(args, wsi_directory=None): assert wsi_directory is not None, 'location of training svs and xml must be provided' - mask_out_loc=os.path.join(wsi_directory, 'Tissue_masks') if not os.path.exists(mask_out_loc): os.makedirs(mask_out_loc) @@ -116,8 +105,8 @@ def get_slide_data(args, wsi_directory=None): slide =TiffSlide(slide_loc) chop_array=get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc) - mag_x=MPP[slideID+slideExt]#np.round(float(slide.properties['tiffslide.mpp-x']),2) - mag_y=MPP[slideID+slideExt]#np.round(float(slide.properties['tiffslide.mpp-y']),2) + mag_x=np.round(float(slide.properties['tiffslide.mpp-x']),2) + mag_y=np.round(float(slide.properties['tiffslide.mpp-y']),2) slide.close() tree = ET.parse(xmlpath) root = tree.getroot() @@ -147,7 +136,6 @@ def get_slide_data(args, wsi_directory=None): print('no annotation XML file found for:') print(slideID) exit() - print('\n') return usable_slides diff --git a/multic/segmentationschool/slurm_training.sh b/multic/segmentationschool/slurm_training.sh index e697617..5b880db 100644 --- a/multic/segmentationschool/slurm_training.sh +++ b/multic/segmentationschool/slurm_training.sh @@ -2,13 +2,13 @@ #SBATCH --account=pinaki.sarder #SBATCH --nodes=1 #SBATCH --ntasks=1 -#SBATCH --cpus-per-task=10 -#SBATCH --mem-per-cpu=16gb +#SBATCH --cpus-per-task=8 +#SBATCH --mem=32gb #SBATCH --partition=gpu -#SBATCH --gpus=a100:2 +#SBATCH --gpus=a100:1 #SBATCH --time=72:00:00 -#SBATCH --output=./slurm_log.out -#SBATCH --job-name="multic_training" +#SBATCH --output=logs/slurm_log.out +#SBATCH --job-name="mcs_training" echo "SLURM_JOBID="$SLURM_JOBID echo "SLURM_JOB_NODELIST="$SLURM_JOB_NODELIST echo "SLURM_NNODES="$SLURM_NNODES @@ -21,15 +21,27 @@ ls ml # Add your userid here: -USER=sayat.mimar +USER=anish.tatke # Add the name of the folder containing WSIs here -PROJECT=multic_segment +PROJECT=mcs_training -CODESDIR=/blue/pinaki.sarder/sayat.mimar/Multi-Compartment-Segmentation/multic/segmentationschool +CODESDIR=/blue/pinaki.sarder/anish.tatke/Multi-Compartment-Segmentation/multic/segmentationschool +ORANGEDIR=/orange/pinaki.sarder/anish.tatke/MCS -DATADIR=$CODESDIR/TRAINING_data -MODELDIR=$CODESDIR/pretrained_model +DATADIR=$ORANGEDIR/TRAINING_data +MODELDIR=$ORANGEDIR/pretrained_model -CONTAINER=$CODESDIR/multic_segment.sif +CONTAINER=$ORANGEDIR/multic_segment.sif CUDA_LAUNCH_BLOCKING=1 -singularity exec --nv -B $(pwd):/exec/,$DATADIR/:/data,$MODELDIR/:/model/ $CONTAINER python3 /exec/segmentation_school.py --option train --base_dir $CODESDIR --init_modelfile $MODELDIR/model_final.pth --training_data_dir $CODESDIR/TRAINING_data/first --train_steps 100000 --eval_period 25000 --num_workers 10 + +singularity exec $CONTAINER python -m pip install --user imgaug +singularity exec $CONTAINER python -m pip install --user numpy==1.23 + +singularity exec --nv -B $(pwd):/exec/,$DATADIR/:/data,$MODELDIR/:/model/ $CONTAINER python3 /exec/segmentation_school.py \ + --option train \ + --base_dir $CODESDIR \ + --init_modelfile $MODELDIR/model_final.pth \ + --training_data_dir $DATADIR \ + --train_steps 100000 \ + --eval_period 25000 \ + --num_workers 8 diff --git a/setup.py b/setup.py index fff61e6..f7fdd5d 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ def prerelease_local_scheme(version): install_requires=[ # scientific packages 'nimfa>=1.3.2', - 'numpy>=1.21.1', + 'numpy>=1.23.1', 'scipy>=0.19.0', 'Pillow==9.5.0', 'pandas>=0.19.2', @@ -72,6 +72,7 @@ def prerelease_local_scheme(version): # dask packages 'dask[dataframe]>=1.1.0', 'distributed>=1.21.6', + 'imgaug', # large image sources # 'large-image[sources]', 'girder-slicer-cli-web', From eb9fe85cb43d2c9f2ebcafcc995f181eb8113997 Mon Sep 17 00:00:00 2001 From: AnishTatke <53684776+AnishTatke@users.noreply.github.com> Date: Tue, 18 Mar 2025 16:19:12 -0400 Subject: [PATCH 04/15] Plugin Creation --- .gitignore | 4 +- .../FeatureExtraction/FeatureExtraction.py | 91 -------- .../FeatureExtraction/FeatureExtraction.xml | 87 ------- .../MultiCompartmentSegment.py | 23 -- .../MultiCompartmentSegment.xml | 54 ----- .../MultiCompartmentTrain.py | 146 ++++++++++++ .../MultiCompartmentTrain.xml | 75 ++++++ multic/cli/slicer_cli_list.json | 5 +- .../InitializeFolderStructure.cpython-38.pyc | Bin 4127 -> 3626 bytes .../IterativeTraining_1X.cpython-38.pyc | Bin 14066 -> 12716 bytes .../engine/__pycache__/hooks.cpython-38.pyc | Bin 0 -> 2794 bytes multic/segmentationschool/slurm_training.sh | 11 +- multic/segmentationschool/utils/__init__.py | 0 .../utils/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 197 bytes .../__pycache__/mask_to_xml.cpython-38.pyc | Bin 0 -> 3487 bytes .../__pycache__/xml_to_mask.cpython-38.pyc | Bin 0 -> 4753 bytes .../segmentationschool/utils/mask_to_xml.py | 131 +++++++++++ .../segmentationschool/utils/xml_to_mask.py | 219 ++++++++++++++++++ 18 files changed, 579 insertions(+), 267 deletions(-) delete mode 100644 multic/cli/FeatureExtraction/FeatureExtraction.py delete mode 100644 multic/cli/FeatureExtraction/FeatureExtraction.xml delete mode 100644 multic/cli/MultiCompartmentSegment/MultiCompartmentSegment.py delete mode 100644 multic/cli/MultiCompartmentSegment/MultiCompartmentSegment.xml create mode 100644 multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py create mode 100644 multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml create mode 100644 multic/segmentationschool/Codes/engine/__pycache__/hooks.cpython-38.pyc create mode 100644 multic/segmentationschool/utils/__init__.py create mode 100644 multic/segmentationschool/utils/__pycache__/__init__.cpython-38.pyc create mode 100644 multic/segmentationschool/utils/__pycache__/mask_to_xml.cpython-38.pyc create mode 100644 multic/segmentationschool/utils/__pycache__/xml_to_mask.cpython-38.pyc create mode 100644 multic/segmentationschool/utils/mask_to_xml.py create mode 100644 multic/segmentationschool/utils/xml_to_mask.py diff --git a/.gitignore b/.gitignore index 728e32a..b0528ab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ multic/segmentationschool/logs/ multic/segmentationschool/output/ +multic/segmentationschool/test.py multic/segmentationschool/*.ipynb -multic/segmentationschool/Codes/__pycache__/ \ No newline at end of file +multic/segmentationschool/Codes/__pycache__/ +multic/segmentationschool/__pycache__/ diff --git a/multic/cli/FeatureExtraction/FeatureExtraction.py b/multic/cli/FeatureExtraction/FeatureExtraction.py deleted file mode 100644 index 91dfe8b..0000000 --- a/multic/cli/FeatureExtraction/FeatureExtraction.py +++ /dev/null @@ -1,91 +0,0 @@ -import sys -import os, girder_client -import numpy as np -import pandas as pd -from tiffslide import TiffSlide -from ctk_cli import CLIArgumentParser - -sys.path.append("..") - -from segmentationschool.extraction_utils.extract_ffpe_features import xml_to_mask -from segmentationschool.extraction_utils.layer_dict import NAMES_DICT -from segmentationschool.extraction_utils.process_mc_features import process_glom_features, process_tubules_features, process_arteriol_features - - -MODx=np.zeros((3,)) -MODy=np.zeros((3,)) -MODz=np.zeros((3,)) -MODx[0]= 0.644211 -MODy[0]= 0.716556 -MODz[0]= 0.266844 - -MODx[1]= 0.175411 -MODy[1]= 0.972178 -MODz[1]= 0.154589 - -MODx[2]= 0.0 -MODy[2]= 0.0 -MODz[2]= 0.0 -MOD=[MODx,MODy,MODz] -NAMES = ['non_globally_sclerotic_glomeruli','globally_sclerotic_glomeruli','tubules','arteries/arterioles'] - - -def main(args): - - file = args.input_file - _ = os.system("printf 'Using data from girder_client file: {}\n'".format(file)) - file_name = file.split('/')[-1] - plain_name = file_name.split('.')[0] - folder = args.base_dir - base_dir_id = folder.split('/')[-2] - _ = os.system("printf '\nUsing data from girder_client Folder: {}\n'".format(folder)) - - - gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) - gc.setToken(args.girderToken) - - files = list(gc.listItem(base_dir_id)) - # dict to link filename to gc id - item_dict = dict() - for file in files: - d = {file['name']:file['_id']} - item_dict.update(d) - - file_id = item_dict[file_name] - - annotations = gc.get('/annotation/item/{}'.format(file_id)) - - annotations_filtered = [annot for annot in annotations if annot['annotation']['name'].strip() in NAMES] - - del annotations - - cwd = os.getcwd() - print(cwd) - - slide = TiffSlide(args.input_file) - x,y = slide.dimensions - - mpp = slide.properties['tiffslide.mpp-x'] - mask_xml = xml_to_mask(annotations_filtered,(0,0),(x,y),downsample_factor=args.downsample_factor) - - gloms = process_glom_features(mask_xml, NAMES_DICT['non_globally_sclerotic_glomeruli'], MOD, slide,mpp, h_threshold=args.h_threshold, saturation_threshold=args.saturation_threshold) - s_gloms = process_glom_features(mask_xml, NAMES_DICT['globally_sclerotic_glomeruli'], MOD, slide,mpp, h_threshold=args.h_threshold, saturation_threshold=args.saturation_threshold) - tubs = process_tubules_features(mask_xml, NAMES_DICT['tubules'], MOD, slide,mpp,whitespace_threshold=args.whitespace_threshold) - arts = process_arteriol_features(mask_xml, NAMES_DICT['arteries/arterioles'], mpp) - - - all_comparts = [gloms,s_gloms,tubs, arts] - all_columns = [['x1','x2','y1','y2','Area','Mesangial Area','Mesangial Fraction'], - ['x1','x2','y1','y2','Area','Mesangial Area','Mesangial Fraction'], - ['x1','x2','y1','y2','Average TBM Thickness','Average Cell Thickness','Luminal Fraction'], - ['x1','x2','y1','y2','Arterial Area']] - compart_names = ['gloms','s_gloms','tubs','arts'] - - _ = os.system("printf '\tWriting Excel file: [{}]\n'".format(args.output_filename)) - with pd.ExcelWriter(args.output_filename) as writer: - for idx,compart in enumerate(all_comparts): - df = pd.DataFrame(compart,columns=all_columns[idx]) - df.to_excel(writer, index=False, sheet_name=compart_names[idx]) - -if __name__ == "__main__": - main(CLIArgumentParser().parse_args()) diff --git a/multic/cli/FeatureExtraction/FeatureExtraction.xml b/multic/cli/FeatureExtraction/FeatureExtraction.xml deleted file mode 100644 index 7fdbf2e..0000000 --- a/multic/cli/FeatureExtraction/FeatureExtraction.xml +++ /dev/null @@ -1,87 +0,0 @@ - - - HistomicsTK - Extract Features - Extracts a variety of features an annotated whole-slide image - 0.1.0 - https://github.com/SarderLab/deeplab-WSI - Apache 2.0 - Sayat Mimar (UFL) - This work is part of efforts in digital pathology by the Sarder Lab: University Of Florida. - - - Input/output parameters - - base_dir - - Select the folder containing the annotated slide to be analyzed - input - 0 - - - input_file - - input file - input - 1 - - - downsample_factor - - downsample_factor - 1.0 - input - 2 - - - output_filename - - Select the name and location of the Excel file produced. By default this will be saved in your Private folder. - output - 3 - - - - - Deconvolution Thresholds - - h_threshold - - h-threhshold for glomeruli and sclerotic glomeruli - h_threshold - 160 - - - saturation_threshold - - Satruation Threshold for glomeruli and sclerotic glomeruli - saturation_threshold - 0.3 - - - whitespace_threshold - - Whitespace Threshold for tubules - whitespace_threshold - 0.88 - - - - - A Girder API URL and token for Girder client - - girderApiUrl - api-url - - A Girder API URL (e.g., https://girder.example.com:443/api/v1) - - - - girderToken - token - - A Girder token - - - - \ No newline at end of file diff --git a/multic/cli/MultiCompartmentSegment/MultiCompartmentSegment.py b/multic/cli/MultiCompartmentSegment/MultiCompartmentSegment.py deleted file mode 100644 index 1d1cf81..0000000 --- a/multic/cli/MultiCompartmentSegment/MultiCompartmentSegment.py +++ /dev/null @@ -1,23 +0,0 @@ -import os -import sys -from ctk_cli import CLIArgumentParser - -def main(args): - - folder = args.base_dir - base_dir_id = folder.split('/')[-2] - _ = os.system("printf '\nUsing data from girder_client Folder: {}\n'".format(folder)) - print('new version') - _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(args.input_file)) - - cwd = os.getcwd() - print(cwd) - os.chdir(cwd) - - cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --base_dir {} --modelfile {} --girderApiUrl {} --girderToken {} --files {}".format('predict', args.base_dir, args.modelfile, args.girderApiUrl, args.girderToken, args.input_file) - print(cmd) - sys.stdout.flush() - os.system(cmd) - -if __name__ == "__main__": - main(CLIArgumentParser().parse_args()) diff --git a/multic/cli/MultiCompartmentSegment/MultiCompartmentSegment.xml b/multic/cli/MultiCompartmentSegment/MultiCompartmentSegment.xml deleted file mode 100644 index 9c13982..0000000 --- a/multic/cli/MultiCompartmentSegment/MultiCompartmentSegment.xml +++ /dev/null @@ -1,54 +0,0 @@ - - - HistomicsTK - Multi Compartment Segmentation - Segments multi-level structures from a whole-slide image - 0.1.0 - https://github.com/SarderLab/deeplab-WSI - Apache 2.0 - Sayat Mimar (UFL) - This work is part of efforts in digital pathology by the Sarder Lab: UFL. - - - Input/output parameters - - input_file - - input file - input - 0 - - - base_dir - - Base Directory for the model - input - 1 - - - modelfile - - Trained model file - input - 2 - - - - - A Girder API URL and token for Girder client - - girderApiUrl - api-url - - A Girder API URL (e.g., https://girder.example.com:443/api/v1) - - - - girderToken - token - - A Girder token - - - - diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py new file mode 100644 index 0000000..6ac02f9 --- /dev/null +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py @@ -0,0 +1,146 @@ +import os +import sys +from glob import glob +import girder_client +from ctk_cli import CLIArgumentParser + +sys.path.append("..") +from segmentationschool.utils.mask_to_xml import xml_create, xml_add_annotation, xml_add_region, xml_save +from segmentationschool.utils.xml_to_mask import write_minmax_to_xml + +NAMES = ['cortical_interstitium','medullary_interstitium','non_globally_sclerotic_glomeruli','globally_sclerotic_glomeruli','tubules','arteries/arterioles'] + +def process_xml(gc, files, xml_color, folder, tmp, slides_used) -> list: + for file in files: + slidename = file['name'] + _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(slidename)) + skipSlide = 0 + + # get annotation + item = gc.getItem(file['_id']) + annot = gc.get('/annotation/item/{}'.format(item['_id']), parameters={'sort': 'updated'}) + annot.reverse() + annot = list(annot) + _ = os.system("printf '\tfound [{}] annotation layers...\n'".format(len(annot))) + + # create root for xml file + xmlAnnot = xml_create() + + # all compartments + for class_,compart in enumerate(NAMES): + + compart = compart.replace(' ','') + class_ +=1 + # add layer to xml + xmlAnnot = xml_add_annotation(Annotations=xmlAnnot, xml_color=xml_color, annotationID=class_) + + # test all annotation layers in order created + for iter,a in enumerate(annot): + try: + # check for annotation layer by name + a_name = a['annotation']['name'].replace(' ','') + except: + a_name = None + + if a_name == compart: + # track all layers present + skipSlide +=1 + + pointsList = [] + + # load json data + _ = os.system("printf '\tloading annotation layer: [{}]\n'".format(compart)) + + a_data = a['annotation']['elements'] + + for data in a_data: + pointList = [] + points = data['points'] + for point in points: + pt_dict = {'X': round(point[0]), 'Y': round(point[1])} + pointList.append(pt_dict) + pointsList.append(pointList) + + # write annotations to xml + for i in range(len(pointsList)): + pointList = pointsList[i] + xmlAnnot = xml_add_region(Annotations=xmlAnnot, pointList=pointList, annotationID=class_) + + # print(a['_version'], a['updated'], a['created']) + break + + if skipSlide != len(NAMES): + _ = os.system("printf '\tThis slide is missing annotation layers\n'") + _ = os.system("printf '\tSKIPPING SLIDE...\n'") + del xmlAnnot + continue + + # include slide and fetch annotations + _ = os.system("printf '\tFETCHING SLIDE...\n'") + os.rename('{}/{}'.format(folder, slidename), '{}/{}'.format(tmp, slidename)) + slides_used.append(slidename) + + xml_path = '{}/{}.xml'.format(tmp, os.path.splitext(slidename)[0]) + _ = os.system("printf '\tsaving a created xml annotation file: [{}]\n'".format(xml_path)) + xml_save(Annotations=xmlAnnot, filename=xml_path) + write_minmax_to_xml(xml_path) # to avoid trying to write to the xml from multiple workers + del xmlAnnot + + return slides_used + +def main(args): + + folder = args.training_data_dir + base_dir_id = folder.split('/')[-2] + _ = os.system("printf '\nUsing data from girder_client Folder: {}\n'".format(folder)) + + _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(args.init_modelfile)) + + gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) + gc.setToken(args.girderToken) + # get files in folder + files = gc.listItem(base_dir_id) + xml_color=[65280]*(len(NAMES)+1) + + cwd = os.getcwd() + print(cwd) + os.chdir(cwd) + + tmp = folder + slides_used = [] + ignore_label = len(NAMES)+1 + + slides_used = process_xml(gc, files, xml_color, folder, tmp, slides_used) + + + os.system("ls -lh '{}'".format(tmp)) + + trainlogdir=os.path.join(tmp, 'output') + if not os.path.exists(trainlogdir): + os.makedirs(trainlogdir) + + _ = os.system("printf '\ndone retriving data...\nstarting training...\n\n'") + + + cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --training_data_dir {} --init_modelfile {} --gpu {} --train_steps {} --num_workers {} --girderApiUrl {} --girderToken {}".format('train', tmp.replace(' ', '\ '), args.init_modelfile, args.gpu, args.training_steps, args.num_workers, args.girderApiUrl, args.girderToken) + print(cmd) + sys.stdout.flush() + os.system(cmd) + + os.listdir(trainlogdir) + os.chdir(trainlogdir) + os.system('pwd') + os.system('ls -lh') + + filelist = glob('*.pth') + latest_model = max(filelist, key=os.path.getmtime) + + _ = os.system("printf '\n{}\n'".format(latest_model)) + os.rename(latest_model, args.output_model) + + _ = os.system("printf '\nDone!\n\n'") + + + +if __name__ == "__main__": + main(CLIArgumentParser().parse_args()) \ No newline at end of file diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml new file mode 100644 index 0000000..79e90fd --- /dev/null +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml @@ -0,0 +1,75 @@ + + + HistomicsTK + Multi Compartment Training + Trains Multi compartment segmentation model + 0.1.0 + https://github.com/SarderLab/Multi-Compartment-Segmentation + Apache 2.0 + Sayat Mimar (UFL) + This work is part of efforts in digital pathology by the Sarder Lab: UFL. + + + Input/output parameters + + training_data_dir + + Base Directory for the model + input + 0 + + + init_modelfile + + Trained model file + input + 1 + + + gpu + + A comma separated list of the GPU IDs that will be made avalable for training + 0,1 + 2 + + + training_steps + + The number of steps used for network training. The network will see [steps * batch size] image patches during training + 10000 + 3 + + + num_workers + + Number of workers for Dataloader + 0 + 8 + + + output_model + + Select the name of the output model file produced. By default this will be saved in your Private folder. + output + 5 + + + + + A Girder API URL and token for Girder client + + girderApiUrl + api-url + + A Girder API URL (e.g., https://girder.example.com:443/api/v1) + + + + girderToken + token + + A Girder token + + + + \ No newline at end of file diff --git a/multic/cli/slicer_cli_list.json b/multic/cli/slicer_cli_list.json index 1189b00..dcec582 100644 --- a/multic/cli/slicer_cli_list.json +++ b/multic/cli/slicer_cli_list.json @@ -1,8 +1,5 @@ { - "MultiCompartmentSegment": { - "type" : "python" - }, - "FeatureExtraction": { + "MultiCompartmentTrain": { "type" : "python" } } diff --git a/multic/segmentationschool/Codes/__pycache__/InitializeFolderStructure.cpython-38.pyc b/multic/segmentationschool/Codes/__pycache__/InitializeFolderStructure.cpython-38.pyc index 770f60b56729761a63e0043e6ad7d67c4efe466f..367a0f5cdc84932927afb53cdb06a02d026a7c52 100644 GIT binary patch literal 3626 zcmb7H&u<&Y72a7cmrHUr#$^DLEs&Go7EPoi7NE$6Y!}5M1-s^qs3n(6 zdUoVU?DC{s3%IwQ3h9_Xr2oWTdvcFG>JayPv!s5-L7k;`-prf#cHYc;@0+LRl}gFL z!~gTIL9uQa|E9ss$Hd?dc;mmI;Ra`+;boM0Opi@32Wp1a*zzn!bGa}-wmo}X@Cq1P zJpY~HIo#$2&?0wu5wyfhybN0AF0X*PoIN$#)i+qh*l)Ac8ibP_;L}-I>#UCdSi82g}~xLHB!@?8`yiwj>#qIxqZK^mriC!c7$E!V(paVZ3VzfWdl0W9`4iCPVo8+%~ zPURAom;}N(wrQz0h zN`s$CxAT%tL>AGc&Qu;pqW9<^EoD9KD}Tw|J3wF*NX%V)vN!eBQCiX;%KDW|Z7O6R zzSXgjH7@DZv)P+2A%0v#W0+0WWHzhjGAf(k^~oK}hm!otHUGeXl0~~FH-u}>*xX7i zZj4O%AswDFhq-f0jjFCSUsS{7b zKndvEQj90BM4F$x3;fPtjGn|S+S|Gp!-bvcESD=HztY?4yc;}0~r>Qdr>V%m0 zY{JgH1Fxt+!9Z8D9$mB0UD?5zhyIIRB-FPPIr7R=DRl7DH!9B#KHJ^j-T$hmlWg@W zD`-fl>$#5}@9jK({!9yajaNWA%pvnNgG6QxzF0zv7^`&pqp9*r+Rh$9bryq^+cWnX z{9#*pxeAi~<`i~vJ+*f*``wdJ;80pV=dF`*7tqF%G;ztD6OgBJidI(~;`x>LAAz(SCtDyo_|XC?q< z<-A6)GB2UGs(WQs|0!M7NXln)GZpccGz_}$A&u1+(iQH`tBDIx{B*vOtYEj5>`c(B zqPK$6slm3|8B3}o>ot?>*=WA{n!Q|4DnCQ>$wIRZ%_r|+CGDvOAZ{e9dXKAme~tIh zr2S=@^&6VaqycMxGhb89WGy>Uv~}{q(if`1-Q{@_V!+ZLlIDABVO_0dzWfxiu!cA> zcw^JJFz4%PJz3{Xn$_sDwy%@(%@1$izMb9I2!7bQU)qgyRuK}lkO?TdCh~Nv_18K6 zO7~|U|9?S}YctRGr@?a>Ub2?_2*EFZg=S`Taq-V=4G^DTM8yQ%P$}zNvIlRdJ1gs1 z4J8lc8mSU~$d74mQym3y3x7+rY7vf*q3sQw>S`+ z5Kwj4>xThK1ke3S54ZIwb+gIOwCSFMn(f(Sge$R}@w0)0#iRI8pe_xASYh!rH}>gv zL&4aOQ_z zz*E-qstb`tRqQnv9fIU5LC+lYy^21|c>7pN+N?DYQM|zC$8-u_Q48$(vAVt;Vglji zRS^TM9qdBOEmSrB;(ns*XmF0$jJV-F?NL^hz4{;#Jq4+&Tv92j=Sl?we M)3Ke3v*OhM2Uzcr&j0`b literal 4127 zcmb7HOK%&=5uTnGhfh7NhgUF|U=f&`n6p^~NFX@DI@WF^Kr1h3?_xo0LQv{9MQLV8 z_cXmSF+3^P06y+1kdOP9<`(1ZTk;Bf`Q+^ zcmJ{b+a<&JCk-ZlCI+A5i}%rRgR{VBW{@>ajLpCbt)|5YX9afXG@a0Gx?!%F3%#ZX zyv?1z8qGX+c@D6^J)Q?F@&YdcmUxMm0n5C?X8Q*VzE`S?eD3*TdW@waG{kr)Szvt@$jGGtyhjv0F&Z(W18 z$9O9-xbrt~asx8DJEzPa7X7b<4BL^2lz-~^&qUbYY@GGB8~#r?LR)s?nk`AF)Z1;x zqQ$#1&GlvPK!!JZmOQSZM>=lcWM5iKR#*d^+;Vxe3t9(!mEwxa)X z4*g3yw1IH&X&ol$Ky$FY)*xEa;D}A(puLev8Xj57N?2kh(ATlerg@>w_jsK#uRYB> z*0jt)hT+nLuAyWpI^A9r`@_Y{_^=%x0y{|!LtIG>d`4Y!WC>fyGMd!u%RwYsn~k)P z^>|yg&#>DKn6gM@rSX!jep~IQ1-($#FK29hA@|@H+L~E2OMLU$?4(I(@0cbHbCE4F zmsP9`$`#=D$Q)-s@{n`W{D=X$w>FJ+0Mi_^k)7DwI5g!wQl2sotP@)~iG9r8z$k~# z$WbnhHjF=i_q{oCN4dmF+*KGAJnJfV;GN`?+#5FX62@UvjVUDF8^-Nb1I8y~TgpE9QkzcFr98Fk{istbONuF}*A4&JJNn{CrDoK}^=?PJ z7p6AB)CxqD=0!9Jg={O4n!Px+wU+huiP|ATK->?yN`T*$LI(w>FT0Vd733YPFA>ZP zxl9cit7&(`4i-5SqMuq_e$=#pNIi-ndO4fSH)Scg0M1o>F)e1WdFC-22AX9h{MT3k ze@%1g?$GJZ&O?2_vN2!MxaCW@6^&bc2v@+ZG>%P}%;I;+P})o%?KGWk2ty2=`axfG z(j0{-L@0R|!Xu1BF*NJ>Up?9QV*9C|dLiOYKoogUqVqj^T{|tnp}TXKUwAExKcVI(L36XiRZ(Sn)TXY;QdI=GmBJ z;U!`nW^wm5MiVXY@jg<-*s8vF*jLShma|3SHSp(y?)i&qVqcDj5_+?xr4~na*S5^C>8i^l7tg4p|Rei2QdmUlpBE+ncP*(@2zKg*V!-9YDNbO}2!c@ClEFRxkjABEt96~V_n@hequbDKm(Wv;Ey)cs8|G~LF<}878{R5uX+Is!s8TYMX%y}C)<%SYr2k4PAIwFzCoc2Fy(|1_cG91& z@ufMPTIMq!x?+uNe&Lq+MV$Y(HHhZ=b?)3EM>c;O|IKn_Yuqe{zJu53O6&8Qxh0bS zsyV)lbCYKwAKW;V*G>F1e+@r8?mfBr-OGFNkFv4OOKUswmk3n4B$L0Q9u*No z+echROI0?jW`&`v1N|o{tH1s4yLazS*@L&5wy5zs89b1b$fZxs16s#Bs|bf~Rs#a_ zXwV6%V5<|=r$kk_NESoyyo7YKqH$OqSK!l0jxq`gZKRA_C^j3la#{@AhoY5biPVaf zOg&`5D1MIjz|SB=nhUzILMBM9uuatqBwW*YdRX+NBK%QSRXMsUk`&=&Y zgYUc#w%N0k-_i`2ox-5fwrOfQ)BPIquj7lqLu0V}?7mrGwpG=YSAo@8-L#SUR#*k~ z*ch7YY#sf&iO>4jtg@>4F}r|ksN2@Cg8rLp<}8!6%cTmAeCND^qasM5+B(78<_(LN zlyQQ04VAmvOW6+cI$25n98KzqBiJfNi3FQc@~$yFeY+CRww`=7eZJp;Wr#1<1wPIo z2~(TN%BuHnx7k>BoXDNOj)V2h%G~>`O08sh=U< eL)dK)h(FMA<9RexLzLKx>A7CnoAKto&;JMId!k|h diff --git a/multic/segmentationschool/Codes/__pycache__/IterativeTraining_1X.cpython-38.pyc b/multic/segmentationschool/Codes/__pycache__/IterativeTraining_1X.cpython-38.pyc index 0f28eccd166f3ab58c22074e7f28e473dcc2674f..ff5f3e69f3ed72659f6b138167bd805579ccd4d7 100644 GIT binary patch literal 12716 zcma)idu$v@dS7=>PtSY!mZ--{v$|brXIG-M(muI5X(@6f@s=F&IHO%{*|)vTspfFV zp6O9_j~=`x5|~==8nBQ(;Qq0_iHEk~coW2S9(HUeF&xARY}keY!%mPk5I^n@TQFec z5E#iBdr^L0^^hD&+Cyg0Usruq{nhKM?^SgpmrE=7y-@n&`uD!7DBqz>|1XEk+jzXc zRTYIRTy+!{y;Y{lyv8*7jr9vLIE&-0IYu+V63rw_Hd8FsOtZ8aeV<`jl*gQ0 zGtctC={)WXGzZyWuY8CNq1&^*Wv%5=&ZZysWY zWIF8}ZXRJrWIE#_ciN`o*6X%FK55@=yIx1w z^l{VWwqtI%E;9KUd&TNFepOhFmaLkn+rC*_siSa!+rC}%8*a<&_zlMcDsjcVJCE{-}y!ICx-@c3rS+B8So2XR-;i4vJ z#d^e&Z=;pC*QhtGqE;vJ^y?M=Pqfs-{q^M@PIlPiuUahSH0rB<%l5n)k=*$m zLL!AHhR3_BD~hjtqO7S~>ZZz7t~Jz64S6llc%&4-N^Ru5Q zG}ox}!UJtH56VTfmETggG_;-LBRjdx0YASs5DWx4{vv7)`h&q>kPpx}x#Z{LKC2Hhn$P9px`=t7~IHf=@i4vFu@z{H(-8?I(Ke5A|9cqV_k3WsAd- zXL>#6=Z;{+vIisR!ALM9d!YI8?g9Stwz@g$AKX$l$AYo$xPPd7n4gDCM9ci8U~KJ( zc@*P*2B~A_aee{s!rHUu3I8kI=eCv211NhQxRd57q+a+`6Td-ks$~{ic~E*~bJk4K`?=!$;WNL!HD&J@MYy9=PCe!Em8~n{)`ei;@SNU6bj^nwA=c{-w)z!Lz zx`nm#<^_J4zs;xk6+X?Ud)QaZSNRMt@vHm-zs9fgS^hQtPCZqZ{lPb{@i{)v%iypD zzR15`PxNTL9`TRNJ!`%Z@sZ5EX}-bV>ftB(5>hA4w=fE#c+vdoZt48`&KpIB?#^)RsveZd=IL!W53kR@e>;&4bx>_)3D z1+$47EDNc-o7)(<4l!MW+Tn!;WV1y&U?X<^^aVCLePd?Ie0R1odu6U<&XnGroi0^E z_2pi{+|-To^mX&f+`{xbv*l~dSa0x_eQ)E)f~Z@qLK7ORuxz8{0u))ncPAz$3M}q4 zZrg&TuUNJ9x^O!!&a^8tp*kf+w@5&+K70faYExCFEi8xX6_#yCrQtXHMyClY%;p`( zu~@q9xJ_GhoQ}6K>eU=uxPGHn*uyfz?<{v5dn0EFjI3cf1z*_q!+%Kh)oVizffsyWy~#o}(9;DumW{!3r@3s4 zLd~%}kBXs25>TkC7&?pBTnCy{Ww|?EgJvH(-xc>ZhI${lD+Q@Gy-8>*b)BV~_vF%G z%4+E5z8`6w5j-9lyCg+EfmEp|pR47fDo8bbuAph%5_K;eYBa67ZPr{@@J7pmbs%0x zjlci#v&&A$KHCOvy)ofg0`|gLtJUyUCw$9ax6jVQU}>D0cAITX8ma3u6~3=O9d zfCUk1?ek1)U%>MUo>%d_R*y`bU%%W-{O;xY@sIxI>%VsFPcDnEfP{FCz_UawPJsBK z@$;~o8oy28FUf+Uo<6)tfB{_HNSt*$e!JsW!Dp~o!JnqPUPCW-e`MGgcq(|j4*=@Q zrsgYw0+pme5r1tn7Qpxjbgmv!Hsg3}cpG@f@J{$DQaV(097;M-*S2DtNnay14?N7E zKc-lK8JK@Am=kizE9#QbMH1Z$ zb0p)=w_WF69YvvGwcB=!7gOR5Djj=b2bhq1ENM9)OZINeZP{U#%vEeQY|WYnfE?z^Md;KcL?qTY(P0DhSg#0U@)@8PvEayBymO~@ih2g%qQ z+MSm+a?_2nYur41YF#s(3AoVW&X_xpynD$HOG) zA`*6@hz7&B{5;fLFHG32+YRBi!kA+Nc)kczb_;Dl@7Q5_+45_vCfW31rsrwURuhLM z8Tb=*8=aDFMVNrtY1AOsj-XHsQjMgr>sVP^h#|_x;Dn$J0K>ZnbW>0qfDNEEh(_tV ztTdoDG7>9YGwBzZgbkB3Q`Mix9Uc-(Q}c5RQ!}L{mMPzuH{V-WdZ)Bh3GgnLm_$%^Zgzg^T8WKcot;B} z%+mZ~^?h@yTwbUW#WEIRw!CQG26+r&FCV5CZd6gvoS9vsjl51~kr(PN zqyvPDfRz*Kywhxl38@%hj^raoH(eo}wA4Z^ zs|i+og*Yiif=jA>ruj9dc{T8AWE+G@nSk!cj^TI364g)BvP)s*v0TR^pFDDauj?UL)zTp?%e)T+8oOjq-cdDIcn47(?WNrKmORd)o`u6_$_##fBl{0K%0dr+AaV zo;ZS_eQ7mqBoly;huJ8v9__scAs+|TSAfrJ$@nPJx;CgD zj%5M!@}JcXsKe^8HmK#*V`?^@Rt?ljYM@6n&_-6v$I`K=Ey!5)gn9rq$K}_(_O+}! z7)uU}OG-&?SRF@wdB~?7EJaXCD3+-~uGeAknhj2Y zvpq|)I2>xNc4%~3jawaT$VesEZl7n_l}3wCyDi`CVB5wmA*_2Ww^XV^B;15xC{N9? zq3P=o$>!AJ;?lxRv%F9)g}U6Hi5184eQb2Y@~DrY;X}@mwr@6|aGKU#L9r#4y!rl3 z2$w4mD%c$zTaLFz5>lGDLhdOtj$l8kfd<GTrffw@F49rguIni$Q2Pp7;@}uxqN3tbJ^v z!l5YS*%t-LGgRB@)#1l_)mN=t+gU8s$9(to>m)J=uk|$;3F0;oVjKXnze{O33^Hw5 zF7g-571x1}`1sHUqM?yVBxA$h!6zQAyaGu*sBRqGKepX^`y?>UcNPzEjdvBmSGFL3 zAql}RT}WU*7AR}_8bYLaClDe;PGVAPaq>R=H05|~3*N?7jDmDha_QxV5L}OBNTFKH zAPU?8qlKBM&+t~);Vocy+QJvlpxJ2DElRlnVO&~Qk}btLVWWkHh_RiOFfIvvj$pqT zIh>~1w`2bq>eGpXcK|?<$}5RItI2(v{=W8oBegNIe{v+fBE0xim^4k|Zqp3YCP`34 zFObff=BP5+S^@>T>ZjXhi%hfqzPXS`Tx*e${NaI=#M0x$;X?MLGtwasN9W zkCb3i)1b7YCmWA8HPHKhv`lZ~@dAK-vIFb}sXHP=M|ZsQg}Sj zeoFbr(oyFI<~w2KA%D_^-VfGzX|jO$i-*t{T&)$WW*3mbAs9R(3K&_=M0&US6oOHc zyE&q%c4XH+0{bR;vS1+$=lKN$()Jb5 z_jEcYp!ZRZz#u5MJX#D7!N>yii*s)u-Tu`A*;YX!3eocQ1p$}u6((0Yt=i;=d#7*m z2yX(eJ&!E;4=47Keu6#s29K`vyg>{%vKQce@7YP{$ZquGbfJM!-nJSzt#s@sTG-*{ zrxM>O?5FqT8>pSPV(@86%36rT99HK<_`{JovO|H}{e_2Nr+`6NwD9f9iB>B60c;}qyz z70YCw*^vtS=+J0(q=Z+XxI&>nsw4v{>$cs-IBl?AyRhD09dl?~wbh8*_7DFtS+j3l zU4+r4FI!lqC_3#J2&DeieWH(G+^GLl*{4|BzHjYb-*?ttu24V0z0Pf%daP>&Vc+T? zxZSt-a(!8HXFrRdgCN^D`9u5cyKfYxqBG%0V(too4Q;YeE)ItIDMZu-MgB@cxI+AU z&=4;JATH7O;T$%&_zzU}n*^k!l|ojETavUax3fS}j7yn|Lw_9M%NPtICv);RU;Il{ z4s{3~x0n_Gf~d(hA0z%{VmvgWTLEw*5chuEW`c5;u^aPypz9q4P>#cz*9=(gM*km9 zZ&uwn`V@nBVuu6_XxX!U);B2)QXtcKJTe8OHw*ilY(rRy+FEQ)7c+r|C)SO_zB0Or z9zNN7r?yd>?5MT$T4qykAkqb*M^{) z;#?=+5+%dp=h5y@%94?mA>X4Yr^s*|YQ`u_bM0y|@zfdLTTW7!d)l)&2CtOvPq3b` z-LC{h2X&b~y|l2%5~cDKj@%sZv`7Z+yBRjTpK50p8P1?dpYJjL@&xU7o{m8rQ3JHEJ7nqHVk ztg5t!HmeJZ@0e8zIkICvpaSBLMMHFHG}34%K4m(F??v1l6Sn_22WDi@6OJfuf8Zui z&#gq%B(D|_W6@y*YP*c6ea}cI5s^v8v`1;p_`VU}IQ}%2?4!BQYoU+nnPC;?_0>%SectFY!A~4nx-Q+f|3YaOxX3~KHK2$j_UC4veWLeCiFXG1mSZYVX z?7!wgehKX(dDwOii8@X}&};HaK!toQuHs6D?xW9v8inTJgJ}=+Z)vxr=f!XWSQG9Y zF1Z?;m81}ytW~_ke*s{}cI}Zp%e7C}M;~aRHgOIo70YX%$HGixp?IqnOf8a4VS1NF zbi5~x`!IcxEFu#1mlkM|DK>8CnmYIe{16;GX^Q@6ZXlI+;XMl$=l(tiZELm2*h zh9vxsDGfg}GG07{1xNsgv9{Ze>|HtB@jildnQFIwWDe7=X307N+hi6*vFshi7FpV| zRNCC^00Q5z>`c4a_7PdyZ8;H1?5~5C_yYpJPaV;zWfmvsi9aOt>jZv}fQ)S(c>1uf z(cBeKOTh+-WOQE3>&Y*SL^O~3BSR|bGd8i6J2t?=+sTh>v3>hB(Gtd6UWji zoAl=KhV+%EY&xQlaI|D>5IdlY9ngir&f_?DlhY(t*>a?xMar@Jn@mX3zHvV zFM%53kzGSxDuDFoDf*Vak!N=BvT675^tz6VLf5g&lHC-K^9*!IvX?V>7P%C>Z@6xt zhA>fIJ8^0wNFv7g1RjydO!N)nn(|ve*Udj!58p!%J&DKn1N0rwjVj$iPHv3h4+RD; zoD4irX%@P}h#naJ2wgJ5*aq>%Xm<<_lOeTQVl$2TNe5{@v<}V0ap?$`dE)CD>d|bb z5s8iHru_qJ2jNPk`7o{rWdfx;9%Rs7Hpr|U!gE+dJ8+_mwIdk)&_{F@n)8o#p8=g? zfzdsV>sOFVvjtjrjH0aqdW`t(cB~iC zII!`bcFnmbU8;T7<_i>UEKU}7ZVZyePKLV_9`GPqg&BGC5T|hG3q5PB=j8PHZA-Z0 zC|ZxXZbX?JuAWQeuUfDmaSzc4Q5iBJ^G|wRHD8jqYBjs<@Ah0=2eXS=0$&4QspyOX zhrD~YKgthDUx;@I&>6NU0QAnmf8w$@O{h-*D1a!3k296-* zAY>om7AqnRz_J!!L-f+DTg|2=Na=}h1JL2)Ep!P1ZhOO)LD@+556Y9)$W@fbN|8a5 zgi$-&Lf9cPX8(lfP}dQ_BAJh4DHkDTma-6Vuv?xh!Xe9}urls)x;X!#NO_o&G8`cb zTl^8eE#~`#;u2A$^LcQp`n`haBT3`iBGISE}Q5^5!k09Y`i9d(f^(e}+8pX9KvYn?0HstIv zyhmXP=VM=H2}ey8)oO86+U??Bp*``h35)`S`ZDgNh%w3^Bp{8xEaq57Dafl}sp^ix z6CY9)3ff7I2oo(X=Tp2-sQ*mh-w^m60yMGWF9=9gvIk^vjiQDB8;?iKir`v;tg7Ds z?{rPWxztWR3B{uQUH+`54nLDT7{wW{%P_6@6d#AWkC2MwKXD6r@!tS&*=1ca_k=uT ze4HVP`@%R*`M}`B+bredLUXjz#PDpm?>e)I=m?#ONH+g1Dm@qlHE8=zL{W{%r4gj4 zSfbZ-#P;dEy3rx}&>mvKZo#**MV3D6$4Ay@?5> zE|hCJ)>jvvV5bJzh0|C%=#du#d zSgL}1Y_!lGi!Ze7g%y^c;=JV8#HV)}p3Sne2u3=dU3Mv#n|3YX*)tTF?BHPOn$^Kw z0;_e!=?IpntXcvm-QrKF`LvIlGfsuBJ;?*mNCthA`bu2-H1@@%tSIn&342)a<^ay& zl$B0VvvdmEb6Dw}#pcbbiE!%N0Vf-ik&PBcnmb**zo|##{R<#Cot?kya*83*(3%MB$ab_lh}$8dmZl| z!NADm_q{5TO=*q*4uyW__rCYNSKs^IcRanBPA4S%{psTO7yt2^B>j*ooqsASH}G>l zktK;qOtvK>$Ynzob;VFbt{SSyHA545#E6JIYD7gIGh!mw4PE4MBQElUkq~*(NQpdM z?J~OMV2(_+$LJMh#qO(SjjSlE_P%Pr(J#uHJy0Dq2CMsx{o$Ac#sT1o*axczjf0{s zY9Cr4d@=iQb;ua19x;xHcHPcZj~YisIc^`T9ygAQa>71QJz<;><)nSGddfH@$|?J~ z>S^P&ELo=?N}?rgC#u87uqb!gXR6N|&sWbHXGOi+K35$vMnpMd=d0(9^VJK+1yS#@ zFIF!Zm#UYI%c9^M7te4L$Rr;tyu=h$iF zue0;)414}v*|^0nu(RwON|S7a<yKiuWz*D_ibcP6Z|T3;^`mA}`vuW&1H^*~LwZ5uR#Yz-lFQp}r4e%+hkb(I_t3 zmdUMhrCxK;5h+xQWx|?rEO)(O+VyhT;;6@~b)Ykw^*ZLrj9W{^b=#fg7ORxpI`_N6 zmIYp{)Wm?8m}036@Wh>pvtG2Z0Pe@f>UN#q1~PPX-?rQ$E4sxo{4f>Jpn#dfI_8bt ztZxWt5It_WR>=jWW6M@)wNbCsTrpL0alK+QQ&3pK(b@UCfKnVkI#|x-m?XK?V3DDEUhE|owGWWdrxO6YuN_vTQx0~^j59KZ3dn~Vg&F%3L59MNS zZHVAoDWts^rDD{#sEi)XOBH*mSEPOH2ym)A1?qBws%$&ECAHF+vy0|=EZ-adAo?iU z-p8_AGCR6oLg^SvL;IyxH%1-zx(6h7B3Kjf_Akkx?kg?Y<-NPIcWu~X8tG*Qqz@u) zd}W|L_$UIrC&d~elOHQ=W>TD`5lTGl<-K1AbM zSueYCIA~=8sD0#76fgtcKs$%k4YKFT@&~fFk5++H5h<1XsRdL8 zZT-}P)t&xGYwdTBdV`PThf3>!d#rsNSPr@;yn}4`L-~E_effRm{mA>#rHBwVx_i>Y zcOS=hrai?fzWdB>2p*v?%@o=@%yM=>Y8?vT1P6j2#q8$F#g6Xj%ZtAA;KNfP zKaK2{*agw5(&#Nv&^nCo@|<@#OI&^Wpwt?2i8F>D%Gmca?4rPoajhfl61&V^D66d; z_7LY2rK2dli2YPW{-VgyQdZG-jGg>I^Nz5W@C{Y>dGE+0?UCx`yrbT+hf#3-W%e@m z|2TVvy^8#Vd$xU!U3p)9Ut3bxRrcDu`j&!oFw)MmD_io)dG9#8_92~&-If#VdS^@P zqyzYF~6OwJ)<9TY~r574Ou_3+9V#ROByhe<6IIn}ZVCUN&D5JoYLZW8+&2 zTCSK^*#z>}%xmm*kzZ%GM1F%!ihPt6*jLH~`I zY_=RJ%cAvl^A`Imn`86XuLbrdyHk#Z?UU>+Q7V{UaZh>29|rJmm{X{o2y3^^DRx&t zyaE6(_kcJ7dK;xp%x%;DgiN>6N$G+w30gsdu`4cMF$&urC7B?#_H2^(!lHm~S%^rI$>D zeFOQd`Hra1nqOnZ*06g|U_Il09bbJIC*yfG)H*BhnUIU$*pk>Hap2>qGlY7vvkmhu zwoBgg-td+JdGSwM=UmpdymPFyA0u#@&v@B~GGqHCoavEP-d*ytP;{jCNF@tXcJw`U z30&@-WENZcP;H&}&bQ0nNPC%;w`4EBrL@kwmG%l-rrkhk_49Fr5G$;*1AD%02m9z& zR;p~pJNt_gTm2=K*{mvdztXPJ{&{EV1h>vUj;@_?>yH#jc=|R;RtwInIE&&WqrM{p zw@yW3vixY9oyrs@0;i zO5J+?_RRRi)O_C8#m!70dVayGHm2tCe%wU?O;dAW<77wUvySVo{TxPC$qV^(Sv z_Y*nq#~Qr80zSuUm#bMoJl0@#ItdVN229 z>^b-JUo-P`%K~8C%91a?{ITXoHq2_J=0`Sz47K8hs1-McWg}WMSL%z-9!-hJB~9rP ze$FW*o^)@BG$a?&`jPZV1sBPXr7%N2jNSo{ttmoPRfeO!R^(;Jk34WHg>hfQjyb+! zFM^1wjfVck`iAMOH!Oa?LQ6MEC*{ucu)=9VKSOFL0IW@OvA$ko4)j804+ z5gC7plFO73ef&ivA4{KH%P-ffR=#Ghmx|o7?Mj{r1(DZl7tP7h!c=|?AH~TRf{$|F z63QuY$c4KjjZHs(wQSX_jRwE=0T4P}NHkgbG$AXJ`1fh^STJ;F;K)_G4h`D5HnKDH z6OFi~*}s>2LWMeSBJkR*`6Io!4?l=p#ayb`mY?L{ zeKUX*-TVq*eEq6jtS++RwLe4cFYuF^M?ZfrLs-n2c9-e|kG2cajj%<(QTbJz{>pbL`2mtALx6j7l@bF<2=>RpWf%z_hHkCCh&9jNb!FOJ{c;$uatZm^=$wB;=tX$1{kb`W=5gn(U;asKm zYAz9P9!baS79Rh8CHK#3s`in=vq^AejGAs z9_A>DJur;9U2K>TP$e)Enx2B6oHMrJC!d8!lTH^P-4vVzr?1o(y;8_f-x@!oZ zK&PO_CRvW(U4@}v!7W*$s|y!=9ADWiK@hoI2VO&S*I3n8Z6MdJM!mF51b2^3PcIf- zRA@dlsO84GAG2!rD>!vV7oEC`JExBK6M;HOO&li9_fb^^Vc0lO*GtoMS1n$)0%0Up z*)3Xymm%PIYBlun(S_0ZiG_J1A`}EeTbP(%FcQ=}H8VOsF=r&F-@I+UH8c0d#N52E zt-y_7M1-nh#NL`H+?vGL=={vo9e}0_x982-i8=G;=)%~f5xWW0<`fuWYR+ub?Mi9W z&~K04H46&>j?6Di%+4F?)ZDzG-y9u#`R%;@ykIX5vsX^M}K9iLdhYH89A!qU6*(w9opehS)Y4A0OM z{m)!Qq}Wvas1SROaDkN`VCw2B9l9u8N_8BtV3@C#?qB2@5OKQfjKhofA z2n7b-)*TT9?nwvgfjAndM(F}U-T?%e(+PyC0HWqnpi+uE$?*?_)zMn59$bW>wu3{K zE(VPjyL)>IGAX=O4Hb%+u`6C6@gj~}tE*NbvF+HWs3ZTVM?#P0}Rnigu{e87ZMSZP(&L+SS20MsnLWfYdOOXcrDh z2bBaW^K`YM9II5J*H^fa5R0JO$U&nc)CD7s1wkUguV^T4$xxh<0rzFutcKo2Tue)4 zUtTg|f;BB}bcYv3sJS>sJZu1pG>KV=9n4p99bW^xb6;kBo_Hq>yrgqDFd&xdzI;E> ziDFOdspG_L6O>${gf!MbGwOay4SF6uPKqo5rAzLTdxKQd<%F#7`PU^6%7glltSg$F zQP0SlnnHiCl2D>DN{XgLsYTIcO-ZPOaz;tXXVqRgp#hVUQW7CX!W&gaMD2ha{Zx-? zH&1JaF51WGb3a&dU53upcG5)3*O2 zD9~>VKj&p6uGEH&!r-NXZLWA^vFBQ=yrsE{r+Tud!J7s9cdb`gv)Z&! zv-wFN;k5h1Dqo~>g%VQjh9gE&9Ehd5&A=>iT+%_&*XdZ$-R^7R4*FX(hV;u0-4_Pt zeM?9Q=O^%89l9hXEKkRe|02qJ3@^H?=_eNU2n8%mriH;b{u=$$4q* zhS;d+G(QY_gi^zgP>FV{nIwDXjIgE84D0;s)L|kat);==qLNZ;KrYs~8*FS5ZNpJv zzxnAvZiRL+R3fM-p-Dc&vDwhr*oM&+LM_8!$MJu9hx<$9|9?i<{tWFl=?aoO1cu5g zx^f1)PK?Zd8LfL5ikNGMq0XWLhyG{ox+{A!BNhss z;)fR*r37XIR>T`rF9uIyWL4we3d*g>+P6UMm(iXSYk_y2DDzbKp;FVrQ}Q%dlF{tT zBjO7*qk?nJJo!Jcnm&=wHG}I#aO=5YZI>T|(@xw99;R>OtGJECr!vyG3&cesOk8m_ zh^j*>(T12HWLc5{xL8F~Vf?Aj>!(Ov*v9qq^qon~mB5Z8@)>mma!`kiOo3I;%lsdp zr$pVIf5Z!8_&EoVxQJX~XN7lJlo6Z^J#@PHve%vE z4$1@z3L8Cz1=F=}+KAZwGLc6rk_3YIZy?_zrKpFzHG-HC6zH@(l)pI$iG=e(frWcI z+@eZLUF+R-^5Qe&^WoDe)8PT(RSfnaQDKQfMY2G2WQneG{&%QDuK03bum>au&IH=k?#~l0fnb4e_H%>}onYaQ{k(phYU%8191#MVTFGsvRkOXBJy5WUqN`>OEA5xqMTy!50zHNP17uZ?RifDWUs)rjit})!wjm|hd?{9 zqAw^XL|M$dq+XRs#SLZ?IK+IQ1A8Mv&|Q$r`&#|pJ})7{?oG_zjj$7~WZ$5KH6`&& zu{6R}S+75YYKKrs0oPA(9Z)(8aQzQ2gP{HZLjD6pAwtJpebPa&duvd%fzpn){df$J z@dmy9z|<|~%ecMmzAfpS;}C(p?JUcPFSyT(vmU4@ICx%?^^#!lG9rXUJ$*YpS<#cF zp4~e~9J77XO)ZzpT@AE+@TSz%uAzbd3W`FHHB>}P_#~?2zhv$v{C|NC7bqDt2EIC+ zEw@;60fQ$r&`*g~{TR2FY+Md-xRq^pGo0s#0MLvDCppi_Z#&%2zm1IlBTDE} z7J4E7bt=(C8>k{=1Mpu$;zx>{7dMS0J@W})=J*jjoq=01vJPkB%fgibqkOs8u;8gE zGNE;Il?Lk%io6Cdo#S`GGKc$Zr=D^u+ zTmsRJAlIOlZyJxj-b-J0juN2vn(*@LPrgjxPz!>$S=nl%>a1 zQ3_Q<+nj(CRNJ6bEmTc-`ap{rIxm3FQ0f=(yXea$UvVzrck!t^*@v^lUFWrMyS_)l zh$NOIUxjuRReE8Ak}V5S1noFzdH=B&ff@%{1Dz?+ z(mj1eZN=BFp%sd`X#J=KIe}VC)c$5GNwyw-Q7;LsxG1+UQ!BNn71xkxO~YEDQs*#AA$(|h{0cFaoz)I-L`S=UF9buuLB+QQl*e!W;n zH`dYHLGiEcq4-;W6^ijNh;7r1a+avB-PlVt+8=Gxy#sRy#(xLpEb|epW1<_6U84A9 zDF^kqN0t!e8t$2HrbhC+jx#VL*l+=Yz?WY!dI}V#*zGN_W@03NCO;=wJSg9>xNB|r z@?Bqkn~xB~oTnsD2|Yak+TlI?yAccNrnke@+;GX|GQ|1la2iSfuw*mxO5S4@VmM%UgnI(b@87kuxJoW^`=dwFI`tX(s z3G*qPOxh#~UMvmD08ShHMR1NNUHI28_bWG)LA37HC()Xb`G3Ti5(&xezwM()V$WHI z(tu+a#V_X3!TPC|j&dBA4>?k_)fi8SBZ{{W)B=r8#WkV_HgjMnW7G)PINlDNEQuA3 zlmU7#C7do)haOAyvS6&Wo3MFhH-cjs$J-8pm+1l1t)+xV$_=R?;qd=+=HK>WduGPr zXNhp;pJU0#vNOn1p1KOQhq{BO!K-n!opEC<{ZRfH2mtiO&~g7#IJc# z?8RLO7*c&wEJLE@1Ide_^;#>97TVJYVc_hM1l1@_F!YiHFKg)drb#OX9Ns*7Nw~98 zbP#d;7=F_sWeV5DJhRWtvyGpq|Bjn{Us{q`FY9|(Ckkl)LHTz=DxRI=J1i;If!BEg z|I`=YS9pJjKf4FN;`FngXXsbf`n@Cs$G&NCR+_z?(&L>Q1@_u=&4K6cg(AWU^Poc< z>}D@|>F87tOA`faga1DGm;V70c#rh$PBDQvbC3!Di6F%JOE2N^0b*=U#YL3b*K1Z8 z?)Upvb8b2;*_*lW1miVI=(|F*KG;Xy*U354fk8LCYkQU6LU6l3Up1s#55mRdfNp zbcY)uag`jmrFyk-?G4N?yd6=z^k|+WKMlkr1ZyZ4JKlNhjQOk3&wl{=_zx-hZA00%8RIj7{aLO0Wb z5+pvw95lKKLf3f~P!#Q;I>ka#NOx;!Rrrs9bI%=-MFV;C$MAD}B*M8)%9HR{;>9fd zlWiz(0kh#nbOdr-7*-Kt>&f<9AnelpA;=@?0&yB+zgz^IU76IM_ zFsI6zc-2ZSw$wC&&h-tFAWmSykU7K8Awh(LbOQcwkQfPij}^SZ;{Os2zI=iIXX^bv zlHphYxx*~{PpRvFQu1d={LCI{0Y-U354468yiOnuO4caZkLF zhA3}RIoT19+&vm%jlq)t7okb7JDp4}SbO)_QW)wkF>?+DDP&bLBJdBvtzI<>qaq4M zRz8htoS@-Np^q|*_f=mB{Epv7mH#Xy%apuJ$?qZYbv&vQ4>S&om#1cUw z5iJOu2jl@4o;u-H3(Qv1m4z@AOxK~4@9-`j=wZTj+qy0P?2XAtF$;&)iTkQh#!*9ZPDh7&(E diff --git a/multic/segmentationschool/Codes/engine/__pycache__/hooks.cpython-38.pyc b/multic/segmentationschool/Codes/engine/__pycache__/hooks.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..772b615e009f4d7fb6a17c442f16bb90279adc33 GIT binary patch literal 2794 zcmai0OK%*<5$^7J?Ck84lxT`%6a(1+F@hE1-M|oRz+o8MlAQ!dm;iKqSi~5NW}3?( z=dtSURU~TmL;*SJls~Y0v=9Cl`4fFI;FIn-C4iH!W=M!+1b7DhnCj~4>gun)p1-WE zg$&Q%|Mk!5*XVsmA6Fk2A0MIUBUC9%dCn$n%O||~78B95&ctb2cj7gzUvvcHtOzCn zrx_ye7U3jp`cB>}`jb9qzh$YLdM{Y&$^N-BSxfmL8~U%X7CRVn`&M2}Bl$$CXHgkx znN($}Ar5|5Ri~fC5Rl^4N8sZl6#WZSl97vWkEh}VgL_WuqQ!!#kJd{&X@J&GyI3@| z{XbM%fBGcO$$CN?zkH}qmLH(#ZB(_WS;T6Nn%53$gy4{U#Sey#^`oLnWo|psOscF( zZ9ht56GwR!r&0}F>uQ-F*`CBomnvR6rFTW~E2h@4(!XDfALVm7o@HfxnvHaSyhe;8cxRgicBZRRh5rNqc=$(z}F zR!p5Vz3vL&tKD|A4tCiGY~j{^Dt2&sCTuKu=X{IpvPHLc>+X-)qLHTV7CZIyH+7iu zQ&)X|VZqsDjZV+>PWqVN!|D6Gfb}lhV&^>dUh+lXteN$bjnrS>x>$+Vp}Af6&-uy) z3Og?y_{5f*b!WvFYqht0t6p1i&K|#Kww8Bl57Kq)dHZB%xtj*{`buzSeza`&;sK@I zgI69x`HJGJsCq@XL`rlnvg7Q1I*>tC4!(%<40r+t27|OJ<>2{Z{CqJQJpXWX|7bDL zgYh6MrlY~9pMU)8=L?wNgrt!}D@oXl`)QWOu-9pBt?HCmUxC-%r zC-GF;a5guQPU1XPaH}ec;fCtc8gSLbxeeia^tV_Q^O+4b957Mi(q=P^Jr*z^TbWNW zks98oPpzMo7}0jKopep{c80iM^G9f+W_+@7aX^&v5duHC(=KpBY#nLhNk|Jtk7AP? zPr9_RX1@&~wADK8U@c0kW&ji!=eHoz4^c4@@^uk#7w;wy@VY1=-dlVZy)CqFbJ2|Z z;5EVg{|ZO^$M1x^-)IHm>@LCRk_<=J7E0)dE+3*sDot{~j}G7sVg!&yOS`pL@)diC z6hVTEmVzO3S{bBH?Eu)`!8OisV2N1f5Ji*O42HiPfdycu=3o$E_=z|d>RxM%m}BX! zSm_&Z#5wQyk#U3}ei=xDDTp0?rkG^#YB{!Yx?oDMWpU%|&ugF$=KJaY#gPvW^8@h^i zWIYN8z^>D(@4)bV@~e-E zfQq<%@4MUlZ2QgrU5gT-f=UCnKcfS##7&a&9Y(QgzHmPPAwu8Bmy-%G_+B&OY=f(D zckBV6zzXp&=6WYSLGmRym~L>uP~C>zmRiZjA)f@q2S#c$?5H6`4NjB|iuh?nd9FG% zgOH%^(QxlI&GQ?vvEe0M$k}gTM>jua$jCmjHQ;AE*8=of z_W=0D^<2h s$R1%N{7g`kf)zV%ri19mAOaaM0yz#qT+9L_QW%06G#UL?G8BP?5yY=7{iK}IRQ-a? zyu|EGz2d~8l++^q#JtSn484-XlI&D{-_o3tOkL;v+=9fSlHAn165Zg`bP%0bl9`{U zp9_*s)-Og9D^AYH&(F~>Ey>I&){l?R%*!l^kJl@xyv1RYo1apelWGTY!)G97005dH BH$DIW literal 0 HcmV?d00001 diff --git a/multic/segmentationschool/utils/__pycache__/mask_to_xml.cpython-38.pyc b/multic/segmentationschool/utils/__pycache__/mask_to_xml.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50505ef81d3d8d8714cde57453ed885bc449c979 GIT binary patch literal 3487 zcmZWr&2JRR74NF=hMg5)AghEW>;|u46-(@ZjI$xVi$={<+irWN zd#J7f?5&noVsnYYEk_QXBmcoBr~CRRj%b~TzbwyLOzzsxO%zWp>p;!{LVh)_TI3a!{7SM~tdtwoD z4RK5y$A43>yH4liOK9YDUoz0lc?I&yl5ZX7B|6}N`S}ZO*geDk(6`qFo4z@;5N#E$ ztU-WwHUF_wxypI!RFPr_T(b$Q8afotZYV!cu5LVKRj530*fHlRql^8KFF7}y`)8`A zuE}-f3w}`3%?W$Tn4GPhu3riD<5HHQ+T_)+|*4S?s`%`+{E6$V6RVm zr{_#KNzhC;cX`!Pp>Amo4Lw~KwP$|SRvt90>-K~RZwb0U@`-9jxAn}DQ_ZRfC(q)X zz@GefoDj%=>*%hd8afgo&T$T!qCVXX8a?AMpMxiDbS1}=>@Td!G*MZ;b-F)D%JS;z zufD17cU;q0$@4;CrpQZEe=;2O`o*A-rkT9vuicPjko}XtFXS#sDDtTgl5x7&JWY~3 z6N%z+UM9oQAT=|?Ebo=se%e#pGA*~wT$y1a+uVcrToq$knp&~BS*EJv$^g<$eN<$* zx|Nkm*6Fm`H`N8GW7vfbpRNcuvq73C!?e>fk=2y0lp3CoOl_QJ567u-hshJ;%Ou}Q zjX#o*Xlk2<945-l)0RYt-kY15`K?qPY1Fd{l*TW&lTm8gN6Tfpg~u@=jmzW_dZ}9qeN{RZGBARce4v+B1I~flRPW8 zFTk$5>GBujfy&OW7Q;~@)iBM~`Mc>B{adRq4{5r;T)xFC``bk^SRSivP%hKedaCH* z{uf4jrt#Kz2IrUOKpY;rH8)}*3*F<)V{JC;hJ2QB?y&_obm!S3=H|J_Jy)K^`o2%Q z4!?Q)(-YX(U5BqJ=YT1G;A&^Wl(XZZ`4jhRUino`)wGYe^|zcVKt!TX@rt>ys$*ZM z5d{^+3*U9*MI9ij!U9$gs9@LZXjd@A@ZVlbzA%-n zfg=Y^L@S^01*d9Y)Y5g`z}O`O04{{roQHpcRxQRubGpa4=FQZ+B92mv?tsp=a}=(EHL75)~8ufd(NvlJ%hW>*?as)x6q%l z{U3A-I1qGg5N}9&cnb-#Mm-OtGi%)bqswt~Gt0&5VSGxmit)z?p(}A~{pQAc@8-RY zo9o?`Tk*o`Z&%j3y_Gw6)^FeIZLD7TxOeN4@do6b^)w$3Q;EQg8;7yJf;cs`WHd^1 zVFH;VBua$ps2IidMD}l``Ig#_n^+)=CwGd$-i>FMd&t zw5H_ebovs=uhGhjAdYv6h4AcIMjqcJucv<=f-?)>k1)50C^*GF!)%k;-F@14^bwxA zXx8H?n91V>PD&WYqXMV>F|h2;`B-W(MLun zOtRw|M6o^#TDN{+#wfE(>}5oG9<%#e%v8%#t6BL3WDdM%}1_{ zR9A!(UU^pgde=m6(u#F9!#*% zCb~O*aQL3aMY8Gm(?=z=wwXtML_IS0yAORFyacVZg@+43W0%Ri#DDn;U*B2)Bl;+d zQ8KV^{sTIAX}qh#TRug-L9Y#ad)fonMb8KJkWH>C4n)H1Q`I4S*u+)dj<0=`VJN~- zfYs;+IWFew&jVz87r5vh__`(^(F?!V8^Uy{E>rxV1D zZn}l^^C(@vy%I-j!(=P{yy%b1xPBu;dQJ9V-_}|_8mp-k41<+C8SL$+@(M)AkBM9b zG2xe~L;_38xJJG7$rR!U-B4RtxTC&du|>d6&3NPx@;4;m8j(+kkaNgSK|1xRLr_>+ zhls;z34l82jccsX>}NzS6M6qNv2bw`!yUA80mN|^El|v}GZrj9n{Q>HfjyJviEHsF1%_9<(7`FND6;ENo z9$HC(J>Z}W6*#PxXrl4L5P?}M# z1UBhnJSEfAc*J@~6$ASug6flRvG5HFb}{NeE|=HQ>vmf51N_H6^3!O~rnfj6P*sD2 zAlkQoQ$L^*#iF)_QXAmbV)lNL1iz(&wLO{Lgx7|J7!T4<=>Eu#)GtUBmO*o_YIiU(5+x zIA5}MAm)WDJWv&JM))EC6^gT>B0^AAaZc1k9aJRFi-u@|stI=6YArm*7p=9&3~kpV zdh(0#_VK)pC;uNBWeaPY%lX0@Gi4pIk*(}8Ke9%i;(|f)so=s&?U7$NinKAYhxUjo zf9yhoKV>QBY1mEr?s0#EkGSwgOfjRw1W8JIVs+M9AP(`lX&vGS|2tA*lcGRq3-66vjHy91GH|FcA{XL)iEgzx_DqqUW5 ziwD7?Og4MjAaB{SO1k#yN-k^kMOXVDmNH34DC;B+MfUk1kNd+kFj zL`_#D8?-FUGT;7-B^S~8p})MI?k3B_-XPxYE#%S#uz=Os3(T=!<@4xWua_2f zel+d-reB-(1Jkb~kShqh+BTE7K!>2K+peLjpz9NU1UfQ&cZ_f4#`(`Nm+7m7A~ms`w#XT7GG{^bn#yhU@gYNv@r{6g18~)83_!0gV8o z>?AAH(Dq5nur02G(lJ+8bQDj=UtMWcG|w=W3E2Ry*)MwiM4p3@_BVP15vQs4;-Zkf zbJDg3;8SJULmdcGePsqQ*@D9-g5#03 zM^v(Rdi$S-;d^*;I#r7~Jg^=9BEN(%mLz*3bkBe>4PA|dkIiJLJh};;@`$72m0X1X47UP3R6qor z4^7*Q8&I8_^u3*1W+d12Inn~(@|8E~RZ%5FfrCYrMc5y5yero&!)1nxus7T)xS^^} zbP@E1Tbpo8-TH*v0GDr7iwHBWVZ??R2}$Du&;cO@}bcEvFx`T#-JS6uiTge{nu8JSSViInCse$s|bY-YYVV*}}Rq+Eqrjp{{X+*FbB zH!)jP7jsx0JF=0CYr9K% z`y=~#%&)2Dzv8N)=Dy;zwox>=`NZy*cKG_q4o`Q#{3cHRL^iwoDndZduNf3`Pz&F` z@!sm%Zx`Rcar?b%kKy!`b=~|yJWRAJiv3~IV%mxKdO6U~4TudADjUjA?TSG`08hds z?smn%A=9VaW#oVxB&VEi}MOt(60?|LArVRe1KP>j;D`03}CgqHr*p!W! z3PzbYsBcphae*|lT>ggiT}fC=ySqp$uWP&e*%cjZ^wRW$EZuLNF=%53d`zK9yMoa= zgjsTsn4}@)YZ$T}lBk4mpdp*k!Lf-BjxFSKW)7%ANJwvzv5%n&z?8PXKOsEgWW%;c zjBI#1Z#@I6Pw?bq4j2NcQRipbS%8l7_-nFx0Ft^*H2&?(m17TJr;{BcOQ zq_G~;*1y{&OnUqyltYQs6c>xK9j+)(+z5c-&5gi0cvIQ(5~9|{0|4^{D;(ScoGp(E z4kJ#YP1Q-xu&bV5_nn&#Z6~LST_9#|>H~}Om zI4jV;e|{9I5PfgbcSco|x(H8ARl)a7S-U=}t7sfy9yK$UI>s^(rz~5lc1nf_CW$F& ze}x6CF>kW3s~V`9vL7*dP2oGp?o58GYR6R3P%iWuWN}D2zM(wRhaLLf&NrA>Q-q>A zo&z7zYraGPkNDAqZ|osC=GUhB7-xoGpXyKfJKyYlxwBuMhDnDi?hVF5V^KdoKu#V!#ta+IXg+?wMHM>v&x)_~td<~_*? z+V5w=z^R|XJ>(12{G6H>(HsONl!Isgnt0c!AslqT|1c_HkO4%5h3+78L_?Vr#eS!} zPO#5gPf2n-IZq80TpfNK1L(gaWhTqO_YDkq^=b(OE|cOCHOr)3zmeyJB(p(jF@+E& zQOGw)@fIoCc~WR^Ba^uL$W`Jvv?d)C8SXT_!KQXHKtb)AOOf_O(#-@?UN^?`~`)QqrB;z>&6lm+pIQ_a43EZQ{R54dFzSGUV%^+Wc7CPlM zQ^ZBA*~aORC+zw)o}8v{IpxiR1F|>_e|D2U%RSx%a%`d{UY$R~p0z`8JfsGs1m^;R z{{kbu$T<&rh$LY!ahLM?@sE~3t22S$1~L5?09V`uJV4FJ0TQ$YP~nPo|79ReN)EQX ziGK#TxDD7;HWmI&kbgjk^EGSa>?}S>Es#sOWtovDo@6SFcghJ$BuJ^-9D2(uFOpGj zbCAiTlg8^wT3#aw3AXDeSAou8w_oo2B3YY;fVXbYeHFq^H;v&k`9HwS$?Gg^OMp$S zBbUwtc86RFa9j3TS=8uX0;o)8o=?7@@gaoow%A&$u3Zd1+%H`=OmTf&!etx(I&jUa zT;LkiCFOBx-1vE-=tf|$r%BA_c+3Q5{9%z)%qoKGh{fGBd6Q<9)1^6J5#2ml5QWc$ MPlwI$mGH*@0lha*M*si- literal 0 HcmV?d00001 diff --git a/multic/segmentationschool/utils/mask_to_xml.py b/multic/segmentationschool/utils/mask_to_xml.py new file mode 100644 index 0000000..af69724 --- /dev/null +++ b/multic/segmentationschool/utils/mask_to_xml.py @@ -0,0 +1,131 @@ +import cv2 +import numpy as np +import lxml.etree as ET + +""" +xml_path (string) - the filename of the saved xml +mask (array) - the mask to convert to xml - uint8 array +downsample (int) - amount of downsampling done to the mask + points are upsampled - this can be used to simplify the mask +min_size_thresh (int) - the minimum objectr size allowed in the mask. This is referenced from downsample=1 +xml_color (list) - list of binary color values to be used for classes + +""" + +def mask_to_xml(xml_path, mask, downsample=1, min_size_thresh=0, simplify_contours=0, xml_color=[65280, 65535, 33023, 255, 16711680], verbose=0, return_root=False, maxClass=None, offset={'X': 0,'Y': 0}): + + min_size_thresh /= downsample + + # create xml tree + Annotations = xml_create() + + # get all classes + classes = np.unique(mask) + if maxClass is None: + maxClass = max(classes) + + # add annotation classes to tree + for class_ in range(maxClass+1)[1:]: + if verbose: + print('Creating class: [{}]'.format(class_)) + Annotations = xml_add_annotation(Annotations=Annotations, xml_color=xml_color, annotationID=class_) + + # add contour points to tree classwise + for class_ in classes: # iterate through all classes + + if class_ == 0 or class_ > maxClass: + continue + + if verbose: + print('Working on class [{} of {}]'.format(class_, max(classes))) + + # binarize the mask w.r.t. class_ + binaryMask = mask==class_ + + # get contour points of the mask + pointsList = get_contour_points(binaryMask, downsample=downsample, min_size_thresh=min_size_thresh, simplify_contours=simplify_contours, offset=offset) + for i in range(np.shape(pointsList)[0]): + pointList = pointsList[i] + Annotations = xml_add_region(Annotations=Annotations, pointList=pointList, annotationID=class_) + + if return_root: + # return root, do not save xml file + return Annotations + + # save the final xml file + xml_save(Annotations=Annotations, filename='{}.xml'.format(xml_path.split('.')[0])) + + +def get_contour_points(mask, downsample, min_size_thresh=0, simplify_contours=0, offset={'X': 0,'Y': 0}): + # returns a dict pointList with point 'X' and 'Y' values + # input greyscale binary image + #_, maskPoints, contours = cv2.findContours(np.array(mask), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS) + maskPoints, contours = cv2.findContours(np.uint8(mask), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_L1) + maskPoints = list(maskPoints) + # remove small regions + too_small = [] + for idx, cnt in enumerate(maskPoints): + area = cv2.contourArea(cnt) + if area < min_size_thresh: + too_small.append(idx) + if too_small != []: + too_small.reverse() + for idx in too_small: + maskPoints.pop(idx) + + if simplify_contours > 0: + for idx, cnt in enumerate(maskPoints): + epsilon = simplify_contours*cv2.arcLength(cnt,True) + approx = cv2.approxPolyDP(cnt,epsilon,True) + maskPoints[idx] = approx + + pointsList = [] + for j in range(np.shape(maskPoints)[0]): + pointList = [] + for i in range(0,np.shape(maskPoints[j])[0]): + point = {'X': (maskPoints[j][i][0][0] * downsample) + offset['X'], 'Y': (maskPoints[j][i][0][1] * downsample) + offset['Y']} + pointList.append(point) + pointsList.append(pointList) + return pointsList + +### functions for building an xml tree of annotations ### +def xml_create(): # create new xml tree + # create new xml Tree - Annotations + Annotations = ET.Element('Annotations') + return Annotations + +def xml_add_annotation(Annotations, xml_color, annotationID=None): # add new annotation + # add new Annotation to Annotations + # defualts to new annotationID + if annotationID == None: # not specified + annotationID = len(Annotations.findall('Annotation')) + 1 + Annotation = ET.SubElement(Annotations, 'Annotation', attrib={'Type': '4', 'Visible': '1', 'ReadOnly': '0', 'Incremental': '0', 'LineColorReadOnly': '0', 'LineColor': str(xml_color[annotationID-1]), 'Id': str(annotationID), 'NameReadOnly': '0'}) + Regions = ET.SubElement(Annotation, 'Regions') + return Annotations + +def xml_add_region(Annotations, pointList, annotationID=-1, regionID=None): # add new region to annotation + # add new Region to Annotation + # defualts to last annotationID and new regionID + Annotation = Annotations.find("Annotation[@Id='" + str(annotationID) + "']") + Regions = Annotation.find('Regions') + if regionID == None: # not specified + regionID = len(Regions.findall('Region')) + 1 + Region = ET.SubElement(Regions, 'Region', attrib={'NegativeROA': '0', 'ImageFocus': '-1', 'DisplayId': '1', 'InputRegionId': '0', 'Analyze': '0', 'Type': '0', 'Id': str(regionID)}) + Vertices = ET.SubElement(Region, 'Vertices') + for point in pointList: # add new Vertex + ET.SubElement(Vertices, 'Vertex', attrib={'X': str(point['X']), 'Y': str(point['Y']), 'Z': '0'}) + # add connecting point + ET.SubElement(Vertices, 'Vertex', attrib={'X': str(pointList[0]['X']), 'Y': str(pointList[0]['Y']), 'Z': '0'}) + return Annotations + +def xml_save(Annotations, filename): + xml_data = ET.tostring(Annotations, pretty_print=True) + #xml_data = Annotations.toprettyxml() + f = open(filename, 'w') + f.write(xml_data.decode()) + f.close() + +def read_xml(filename): + # import xml file + tree = ET.parse(filename) + root = tree.getroot() diff --git a/multic/segmentationschool/utils/xml_to_mask.py b/multic/segmentationschool/utils/xml_to_mask.py new file mode 100644 index 0000000..ac48e66 --- /dev/null +++ b/multic/segmentationschool/utils/xml_to_mask.py @@ -0,0 +1,219 @@ +import numpy as np +import lxml.etree as ET +import cv2 +import time +import os + +""" +location (tuple) - (x, y) tuple giving the top left pixel in the level 0 reference frame +size (tuple) - (width, height) tuple giving the region size | set to 'full' for entire mask +downsample - int giving the amount of downsampling done to the output pixel mask + +NOTE: if you plan to loop through xmls parallely, it is nessesary to run write_minmax_to_xml() + on all the files prior - to avoid conflicting file writes + +""" + +def xml_to_mask(xml_path, location, size, tree=None, downsample=1, verbose=0): + + # parse xml and get root + if tree == None: tree = ET.parse(xml_path) + root = tree.getroot() + + if size == 'full': + import math + size = write_minmax_to_xml(xml_path=xml_path, tree=tree, get_absolute_max=True) + size = (math.ceil(size[0]/downsample), math.ceil(size[1]/downsample)) + location = (0,0) + + # calculate region bounds + bounds = {'x_min' : location[0], 'y_min' : location[1], 'x_max' : location[0] + size[0]*downsample, 'y_max' : location[1] + size[1]*downsample} + + IDs = regions_in_mask(xml_path=xml_path, root=root, tree=tree, bounds=bounds, verbose=verbose) + + if verbose != 0: + print('\nFOUND: ' + str(len(IDs)) + ' regions') + + # find regions in bounds + Regions = get_vertex_points(root=root, IDs=IDs, verbose=verbose) + + # fill regions and create mask + mask = Regions_to_mask(Regions=Regions, bounds=bounds, IDs=IDs, downsample=downsample, verbose=verbose) + if verbose != 0: + print('done...\n') + + return mask + + +def regions_in_mask(xml_path, root, tree, bounds, verbose=1): + # find regions to save + IDs = [] + mtime = os.path.getmtime(xml_path) + + write_minmax_to_xml(xml_path, tree) + + for Annotation in root.findall("./Annotation"): # for all annotations + annotationID = Annotation.attrib['Id'] + + for Region in Annotation.findall("./*/Region"): # iterate on all region + + for Vert in Region.findall("./Vertices"): # iterate on all vertex in region + + # get minmax points + Xmin = np.int32(Vert.attrib['Xmin']) + Ymin = np.int32(Vert.attrib['Ymin']) + Xmax = np.int32(Vert.attrib['Xmax']) + Ymax = np.int32(Vert.attrib['Ymax']) + + # test minmax points in region bounds + if bounds['x_min'] <= Xmax and bounds['x_max'] >= Xmin and bounds['y_min'] <= Ymax and bounds['y_max'] >= Ymin: + # save region Id + IDs.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID}) + break + return IDs + +def get_vertex_points(root, IDs, verbose=1): + Regions = [] + + for ID in IDs: # for all IDs + + # get all vertex attributes (points) + Vertices = [] + + for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): + # make array of points + Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) + + Regions.append(np.array(Vertices)) + + return Regions + +def Regions_to_mask(Regions, bounds, IDs, downsample, verbose=1): + # downsample = int(np.round(downsample_factor**(.5))) + + if verbose !=0: + print('\nMAKING MASK:') + + if len(Regions) != 0: # regions present + # get min/max sizes + min_sizes = np.empty(shape=[2,0], dtype=np.int32) + max_sizes = np.empty(shape=[2,0], dtype=np.int32) + for Region in Regions: # fill all regions + min_bounds = np.reshape((np.amin(Region, axis=0)), (2,1)) + max_bounds = np.reshape((np.amax(Region, axis=0)), (2,1)) + min_sizes = np.append(min_sizes, min_bounds, axis=1) + max_sizes = np.append(max_sizes, max_bounds, axis=1) + min_size = np.amin(min_sizes, axis=1) + max_size = np.amax(max_sizes, axis=1) + + # add to old bounds + bounds['x_min_pad'] = min(min_size[1], bounds['x_min']) + bounds['y_min_pad'] = min(min_size[0], bounds['y_min']) + bounds['x_max_pad'] = max(max_size[1], bounds['x_max']) + bounds['y_max_pad'] = max(max_size[0], bounds['y_max']) + + # make blank mask + mask = np.zeros([ int(np.round((bounds['y_max_pad'] - bounds['y_min_pad']) / downsample)), int(np.round((bounds['x_max_pad'] - bounds['x_min_pad']) / downsample)) ], dtype=np.uint8) + + # fill mask polygons + index = 0 + for Region in Regions: + # reformat Regions + Region[:,1] = np.int32(np.round((Region[:,1] - bounds['y_min_pad']) / downsample)) + Region[:,0] = np.int32(np.round((Region[:,0] - bounds['x_min_pad']) / downsample)) + # get annotation ID for mask color + ID = IDs[index] + cv2.fillPoly(mask, [Region], int(ID['annotationID'])) + index = index + 1 + + # reshape mask + x_start = np.int32(np.round((bounds['x_min'] - bounds['x_min_pad']) / downsample)) + y_start = np.int32(np.round((bounds['y_min'] - bounds['y_min_pad']) / downsample)) + x_stop = np.int32(np.round((bounds['x_max'] - bounds['x_min_pad']) / downsample)) + y_stop = np.int32(np.round((bounds['y_max'] - bounds['y_min_pad']) / downsample)) + # pull center mask region + mask = mask[ y_start:y_stop, x_start:x_stop ] + + else: # no Regions + mask = np.zeros([ int(np.round((bounds['y_max'] - bounds['y_min']) / downsample)), int(np.round((bounds['x_max'] - bounds['x_min']) / downsample)) ], dtype=np.uint8) + + return mask + +def write_minmax_to_xml(xml_path, tree=None, time_buffer=10, get_absolute_max=False): + # function to write min and max verticies to each region + + # parse xml and get root + if tree == None: tree = ET.parse(xml_path) + root = tree.getroot() + + try: + if get_absolute_max: + # break the try statement + X_max = 0 + Y_max = 0 + raise ValueError + + # has the xml been modified to include minmax + modtime = np.float64(root.attrib['modtime']) + # has the minmax modified xml been changed? + assert os.path.getmtime(xml_path) < modtime + time_buffer + + except: + + for Annotation in root.findall("./Annotation"): # for all annotations + annotationID = Annotation.attrib['Id'] + + for Region in Annotation.findall("./*/Region"): # iterate on all region + + for Vert in Region.findall("./Vertices"): # iterate on all vertex in region + Xs = [] + Ys = [] + for Vertex in Vert.findall("./Vertex"): # iterate on all vertex in region + # get points + Xs.append(np.int32(np.float64(Vertex.attrib['X']))) + Ys.append(np.int32(np.float64(Vertex.attrib['Y']))) + + # find min and max points + Xs = np.array(Xs) + Ys = np.array(Ys) + + if get_absolute_max: + # get the biggest point in annotation + if Xs != [] and Ys != []: + X_max = max(X_max, np.max(Xs)) + Y_max = max(Y_max, np.max(Ys)) + + else: + # modify the xml + Vert.set("Xmin", "{}".format(np.min(Xs))) + Vert.set("Xmax", "{}".format(np.max(Xs))) + Vert.set("Ymin", "{}".format(np.min(Ys))) + Vert.set("Ymax", "{}".format(np.max(Ys))) + + if get_absolute_max: + # return annotation max point + return (X_max,Y_max) + + else: + # modify the xml with minmax region info + root.set("modtime", "{}".format(time.time())) + xml_data = ET.tostring(tree, pretty_print=True) + #xml_data = Annotations.toprettyxml() + f = open(xml_path, 'w') + f.write(xml_data.decode()) + f.close() + + +def get_num_classes(xml_path,ignore_label=None): + # parse xml and get root + tree = ET.parse(xml_path) + root = tree.getroot() + + annotation_num = 0 + for Annotation in root.findall("./Annotation"): # for all annotations + if ignore_label != None: + if not int(Annotation.attrib['Id']) == ignore_label: + annotation_num += 1 + else: annotation_num += 1 + + return annotation_num + 1 \ No newline at end of file From cdd7c6838aa32b16305da2ae00e0d6e9d55d1148 Mon Sep 17 00:00:00 2001 From: Anish Tatke <53684776+AnishTatke@users.noreply.github.com> Date: Mon, 24 Mar 2025 12:36:40 -0400 Subject: [PATCH 05/15] EOS Changes --- .gitignore | 2 +- Dockerfile | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index b0528ab..9691592 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ multic/segmentationschool/output/ multic/segmentationschool/test.py multic/segmentationschool/*.ipynb multic/segmentationschool/Codes/__pycache__/ -multic/segmentationschool/__pycache__/ +multic/segmentationschool/__pycache__/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 4ca5afb..d010db5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -132,8 +132,7 @@ WORKDIR $mc_path/multic/cli # Test our entrypoint. If we have incompatible versions of numpy and # openslide, one of these will fail RUN python -m slicer_cli_web.cli_list_entrypoint --list_cli -RUN python -m slicer_cli_web.cli_list_entrypoint MultiCompartmentSegment --help -RUN python -m slicer_cli_web.cli_list_entrypoint FeatureExtraction --help +RUN python -m slicer_cli_web.cli_list_entrypoint MultiCompartmentTrain --help ENTRYPOINT ["/bin/bash", "docker-entrypoint.sh"] From cfbb1bdb8025f9ba5930a3bdeb77e433d5e5108f Mon Sep 17 00:00:00 2001 From: AnishTatke <53684776+AnishTatke@users.noreply.github.com> Date: Mon, 24 Mar 2025 15:35:06 -0400 Subject: [PATCH 06/15] Flag Changes: 1st deployment --- .../MultiCompartmentTrain.py | 11 ++++- .../MultiCompartmentTrain.xml | 40 ++++++++++++++----- .../Codes/IterativeTraining_1X.py | 13 ++---- 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py index 6ac02f9..1394191 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py @@ -122,7 +122,16 @@ def main(args): _ = os.system("printf '\ndone retriving data...\nstarting training...\n\n'") - cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --training_data_dir {} --init_modelfile {} --gpu {} --train_steps {} --num_workers {} --girderApiUrl {} --girderToken {}".format('train', tmp.replace(' ', '\ '), args.init_modelfile, args.gpu, args.training_steps, args.num_workers, args.girderApiUrl, args.girderToken) + cmd = f"python3 ../segmentationschool/segmentation_school.py \ + --option {'train'} \ + --training_data_dir {tmp.replace(' ', '\ ')} \ + --init_modelfile {args.init_modelfile} \ + --gpu {args.gpu} \ + --train_steps {args.training_steps} \ + --num_workers {args.num_workers} \ + --girderApiUrl {args.girderApiUrl} \ + --girderToken {args.girderToken}" + print(cmd) sys.stdout.flush() os.system(cmd) diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml index 79e90fd..d514876 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml @@ -25,34 +25,52 @@ input 1 + + output_model + + Select the name of the output model file produced. By default this will be saved in your Private folder. + output + 2 + + + + + Training parameters gpu A comma separated list of the GPU IDs that will be made avalable for training 0,1 - 2 + 0 training_steps The number of steps used for network training. The network will see [steps * batch size] image patches during training - 10000 - 3 + 1000 + 1 num_workers Number of workers for Dataloader 0 - 8 + 2 + + + eval_period + + Validate after these many number of epochs + 250 + 3 + + + batch_size + + Size of batches for training high resolution CNN + 4 + 4 - - output_model - - Select the name of the output model file produced. By default this will be saved in your Private folder. - output - 5 - diff --git a/multic/segmentationschool/Codes/IterativeTraining_1X.py b/multic/segmentationschool/Codes/IterativeTraining_1X.py index b411dc2..0d503af 100644 --- a/multic/segmentationschool/Codes/IterativeTraining_1X.py +++ b/multic/segmentationschool/Codes/IterativeTraining_1X.py @@ -54,15 +54,8 @@ def IterateTraining(args): dirs['training_data_dir'] = args.training_data_dir dirs['val_data_dir'] = args.training_data_dir - - - - print('Handcoded iteration') - - - #os.environ["CUDA_VISIBLE_DEVICES"]=gpu #os.system('export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader | sort -nr | head -1 | awk "{ print $NF }")') os.environ["CUDA_VISIBLE_DEVICES"] ='2,3' @@ -98,7 +91,7 @@ def IterateTraining(args): classes[idx]={'isthing':isthing[idx],'color':xml_color[idx]} - num_images=args.batch_size*args.train_steps + num_images=args.batch_size * args.train_steps # slide_idxs=train_dset.get_random_slide_idx(num_images) usable_slides=get_slide_data(args, wsi_directory = dirs['training_data_dir']) print('Number of slides:', len(usable_slides)) @@ -113,7 +106,7 @@ def IterateTraining(args): usable_slides_val=get_slide_data(args, wsi_directory=dirs['val_data_dir']) usable_idx_val=range(0,len(usable_slides_val)) - slide_idxs_val=random.choices(usable_idx_val,k=int(args.batch_size*args.train_steps/100)) + slide_idxs_val=random.choices(usable_idx_val,k=int(args.batch_size * args.train_steps/100)) image_coordinates_val=get_random_chops(slide_idxs_val,usable_slides_val,region_size) @@ -154,7 +147,7 @@ def IterateTraining(args): cfg.INPUT.MIN_SIZE_TRAIN=args.boxSize cfg.INPUT.MAX_SIZE_TRAIN=args.boxSize - cfg.OUTPUT_DIR = args.base_dir+"/output" + cfg.OUTPUT_DIR = args.training_data_dir + "/output" os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) From 3e452c4b60c86de70f37e6afbbf1a57da84af71f Mon Sep 17 00:00:00 2001 From: AnishTatke <53684776+AnishTatke@users.noreply.github.com> Date: Tue, 25 Mar 2025 12:02:15 -0400 Subject: [PATCH 07/15] Parashurama compatibility --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index d010db5..1b58ea4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -128,6 +128,7 @@ RUN pip install --no-cache-dir --upgrade --ignore-installed pip setuptools && \ RUN python --version && pip --version && pip freeze WORKDIR $mc_path/multic/cli +LABEL entry_path=$mc_path/multic/cli # Test our entrypoint. If we have incompatible versions of numpy and # openslide, one of these will fail From 9b76bff7a08fc790f561dcea32b971f70a1cb5b3 Mon Sep 17 00:00:00 2001 From: Anish Tatke <53684776+AnishTatke@users.noreply.github.com> Date: Tue, 1 Apr 2025 11:47:05 -0400 Subject: [PATCH 08/15] Refactor command construction in MultiCompartmentTrain and update numpy version in setup.py --- Dockerfile | 1 - .../MultiCompartmentTrain.py | 18 +++++++++--------- setup.py | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1b58ea4..20e6b85 100644 --- a/Dockerfile +++ b/Dockerfile @@ -135,5 +135,4 @@ LABEL entry_path=$mc_path/multic/cli RUN python -m slicer_cli_web.cli_list_entrypoint --list_cli RUN python -m slicer_cli_web.cli_list_entrypoint MultiCompartmentTrain --help - ENTRYPOINT ["/bin/bash", "docker-entrypoint.sh"] diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py index 1394191..7a48f89 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py @@ -122,15 +122,15 @@ def main(args): _ = os.system("printf '\ndone retriving data...\nstarting training...\n\n'") - cmd = f"python3 ../segmentationschool/segmentation_school.py \ - --option {'train'} \ - --training_data_dir {tmp.replace(' ', '\ ')} \ - --init_modelfile {args.init_modelfile} \ - --gpu {args.gpu} \ - --train_steps {args.training_steps} \ - --num_workers {args.num_workers} \ - --girderApiUrl {args.girderApiUrl} \ - --girderToken {args.girderToken}" + cmd = "python3 ../segmentationschool/segmentation_school.py \ + --option {} \ + --training_data_dir {} \ + --init_modelfile {} \ + --gpu {} \ + --train_steps {} \ + --num_workers {} \ + --girderApiUrl {} \ + --girderToken {}".format('train', tmp.replace(' ', '\ '), args.init_modelfile, args.gpu, args.training_steps, args.num_workers, args.girderApiUrl, args.girderToken) print(cmd) sys.stdout.flush() diff --git a/setup.py b/setup.py index f7fdd5d..6cad146 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ def prerelease_local_scheme(version): install_requires=[ # scientific packages 'nimfa>=1.3.2', - 'numpy>=1.23.1', + 'numpy==1.23.5', 'scipy>=0.19.0', 'Pillow==9.5.0', 'pandas>=0.19.2', From 4a1f361894b5bb4e17b12225599c231e50c326e4 Mon Sep 17 00:00:00 2001 From: AnishTatke <53684776+AnishTatke@users.noreply.github.com> Date: Mon, 7 Apr 2025 15:27:07 -0400 Subject: [PATCH 09/15] Refine training parameters and error handling in training script --- .../MultiCompartmentTrain/MultiCompartmentTrain.xml | 8 ++++---- .../Codes/IterativeTraining_1X.py | 13 ++++++++++--- multic/segmentationschool/segmentation_school.py | 3 --- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml index d514876..9382c9b 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml @@ -38,9 +38,9 @@ Training parameters gpu - - A comma separated list of the GPU IDs that will be made avalable for training - 0,1 + + Number of GPUs that you want to be made available for this training + 1 0 @@ -80,7 +80,7 @@ api-url A Girder API URL (e.g., https://girder.example.com:443/api/v1) - + https://dsa.rc.ufl.edu/api/v1 girderToken diff --git a/multic/segmentationschool/Codes/IterativeTraining_1X.py b/multic/segmentationschool/Codes/IterativeTraining_1X.py index 0d503af..686ddcf 100644 --- a/multic/segmentationschool/Codes/IterativeTraining_1X.py +++ b/multic/segmentationschool/Codes/IterativeTraining_1X.py @@ -58,8 +58,6 @@ def IterateTraining(args): #os.environ["CUDA_VISIBLE_DEVICES"]=gpu #os.system('export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader | sort -nr | head -1 | awk "{ print $NF }")') - os.environ["CUDA_VISIBLE_DEVICES"] ='2,3' - os.environ["CUDA_LAUNCH_BLOCKING"] ='1' organType='kidney' @@ -129,6 +127,7 @@ def IterateTraining(args): cfg.SOLVER.IMS_PER_BATCH = args.batch_size + cfg.SOLVER.AMP.ENABLED = True cfg.SOLVER.LR_policy='steps_with_lrs' @@ -179,7 +178,15 @@ def real_data(args,image_coordinates_val): trainer = Trainer(cfg) print('check and see') trainer.resume_or_load(resume=False) - trainer.train() + try: + trainer.train() + except RuntimeError as e: + if 'out of memory' in str(e): + print(e) + torch.cuda.empty_cache() + print('Cleared cache') + else: + raise e _ = os.system("printf '\nTraining completed!\n'") diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py index a0d5b42..d6ba3ac 100644 --- a/multic/segmentationschool/segmentation_school.py +++ b/multic/segmentationschool/segmentation_school.py @@ -300,8 +300,5 @@ def savetime(args, starttime): parser.add_argument('--num_workers', dest='num_workers', default=1 ,type=int, help='Number of workers for data loader') - - - args = parser.parse_args() main(args=args) From 9c49e1843ee6ae653592c9da928fc8dcbb5b0853 Mon Sep 17 00:00:00 2001 From: Anish Tatke <53684776+AnishTatke@users.noreply.github.com> Date: Mon, 7 Apr 2025 15:28:08 -0400 Subject: [PATCH 10/15] Enhance slide processing feedback in get_slide_data and remove redundant file rename in process_xml --- multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py | 1 - multic/segmentationschool/Codes/wsi_loader_utils.py | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py index 7a48f89..9634ed9 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py @@ -77,7 +77,6 @@ def process_xml(gc, files, xml_color, folder, tmp, slides_used) -> list: # include slide and fetch annotations _ = os.system("printf '\tFETCHING SLIDE...\n'") - os.rename('{}/{}'.format(folder, slidename), '{}/{}'.format(tmp, slidename)) slides_used.append(slidename) xml_path = '{}/{}.xml'.format(tmp, os.path.splitext(slidename)[0]) diff --git a/multic/segmentationschool/Codes/wsi_loader_utils.py b/multic/segmentationschool/Codes/wsi_loader_utils.py index 909ba83..7c01380 100644 --- a/multic/segmentationschool/Codes/wsi_loader_utils.py +++ b/multic/segmentationschool/Codes/wsi_loader_utils.py @@ -92,6 +92,7 @@ def get_slide_data(args, wsi_directory=None): all_slides=[] for ext in args.wsi_ext.split(','): all_slides.extend(glob.glob(wsi_directory+'/*'+ext)) + print('Found {} slides'.format(len(all_slides))) print('Getting slide metadata and usable regions...') usable_slides=[] @@ -101,8 +102,8 @@ def get_slide_data(args, wsi_directory=None): if os.path.isfile(xmlpath): write_minmax_to_xml(xmlpath) - print("Gathering slide data ... "+ slideID,end='\r') - slide =TiffSlide(slide_loc) + print("Gathering slide data ... "+ slideID + "from " + slide_loc, end='\r') + slide = TiffSlide(slide_loc) chop_array=get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc) mag_x=np.round(float(slide.properties['tiffslide.mpp-x']),2) @@ -136,7 +137,7 @@ def get_slide_data(args, wsi_directory=None): print('no annotation XML file found for:') print(slideID) exit() - print('\n') + print("\n Found {} slides with usable regions".format(len(usable_slides))) return usable_slides def get_random_chops(slide_idx,usable_slides,region_size): From 5f3a0801b17a30480e210f8331c54d7db676235a Mon Sep 17 00:00:00 2001 From: AnishTatke <53684776+AnishTatke@users.noreply.github.com> Date: Mon, 14 Apr 2025 15:43:01 -0400 Subject: [PATCH 11/15] Update .gitignore, refine training parameters, and enhance command-line arguments for training script --- .gitignore | 10 +++++++++- .../MultiCompartmentTrain/MultiCompartmentTrain.py | 4 +++- .../MultiCompartmentTrain/MultiCompartmentTrain.xml | 2 +- .../segmentationschool/Codes/IterativeTraining_1X.py | 11 +---------- multic/segmentationschool/segmentation_school.py | 2 +- multic/segmentationschool/slurm_training.sh | 12 +++++++++--- 6 files changed, 24 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 9691592..c8800bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,14 @@ + multic/segmentationschool/logs/ multic/segmentationschool/output/ multic/segmentationschool/test.py +multic/segmentationschool/profile.html +multic/segmentationschool/profile.json + multic/segmentationschool/*.ipynb + multic/segmentationschool/Codes/__pycache__/ -multic/segmentationschool/__pycache__/ \ No newline at end of file +multic/segmentationschool/utils/__pycache__/ +multic/segmentationschool/__pycache__/ + + diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py index 9634ed9..8b7fc2d 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py @@ -127,9 +127,11 @@ def main(args): --init_modelfile {} \ --gpu {} \ --train_steps {} \ + --eval_period {} \ --num_workers {} \ + --batch_size {} \ --girderApiUrl {} \ - --girderToken {}".format('train', tmp.replace(' ', '\ '), args.init_modelfile, args.gpu, args.training_steps, args.num_workers, args.girderApiUrl, args.girderToken) + --girderToken {}".format('train', tmp.replace(' ', '\ '), args.init_modelfile, args.gpu, args.training_steps, args.eval_period, args.num_workers, args.batch_size, args.girderApiUrl, args.girderToken) print(cmd) sys.stdout.flush() diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml index 9382c9b..45d56d7 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml @@ -68,7 +68,7 @@ batch_size Size of batches for training high resolution CNN - 4 + 2 4 diff --git a/multic/segmentationschool/Codes/IterativeTraining_1X.py b/multic/segmentationschool/Codes/IterativeTraining_1X.py index 686ddcf..f8b8bd7 100644 --- a/multic/segmentationschool/Codes/IterativeTraining_1X.py +++ b/multic/segmentationschool/Codes/IterativeTraining_1X.py @@ -174,19 +174,10 @@ def real_data(args,image_coordinates_val): with open(cfg.OUTPUT_DIR+"/config_record.yaml", "w+") as f: f.write(cfg.dump()) # save config to file - trainer = Trainer(cfg) print('check and see') trainer.resume_or_load(resume=False) - try: - trainer.train() - except RuntimeError as e: - if 'out of memory' in str(e): - print(e) - torch.cuda.empty_cache() - print('Cleared cache') - else: - raise e + trainer.train() _ = os.system("printf '\nTraining completed!\n'") diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py index d6ba3ac..768a322 100644 --- a/multic/segmentationschool/segmentation_school.py +++ b/multic/segmentationschool/segmentation_school.py @@ -192,7 +192,7 @@ def savetime(args, starttime): help='the desired model file to use for training or prediction') parser.add_argument('--eval_period', dest='eval_period', default=1000 ,type=int, help='Validation Period') - parser.add_argument('--batch_size', dest='batch_size', default=4 ,type=int, + parser.add_argument('--batch_size', dest='batch_size', default=2 ,type=int, help='Size of batches for training high resolution CNN') parser.add_argument('--train_steps', dest='train_steps', default=1000 ,type=int, help='Size of batches for training high resolution CNN') diff --git a/multic/segmentationschool/slurm_training.sh b/multic/segmentationschool/slurm_training.sh index 108702c..710b298 100644 --- a/multic/segmentationschool/slurm_training.sh +++ b/multic/segmentationschool/slurm_training.sh @@ -17,6 +17,8 @@ echo "SLURMTMPDIR="$SLURMTMPDIR echo "working directory = "$SLURM_SUBMIT_DIR ulimit -s unlimited module load singularity +pwd +date ls ml @@ -34,11 +36,15 @@ MODELDIR=$ORANGEDIR/pretrained_model CONTAINER=/blue/pinaki.sarder/anish.tatke/sif_containers/mcs_training.sif CUDA_LAUNCH_BLOCKING=1 +# singularity exec --writable $CONTAINER pip install --user numpy==1.23.5 scalene +# singularity exec --writable $CONTAINER python3 -m scalene.set_nvidia_gpu_modes + singularity exec --nv -B $(pwd):/exec/,$DATADIR/:/data,$MODELDIR/:/model/ $CONTAINER python3 /exec/segmentation_school.py \ --option train \ --base_dir $CODESDIR \ --init_modelfile $MODELDIR/model_final.pth \ --training_data_dir $DATADIR \ - --train_steps 10000 \ - --eval_period 2500 \ - --num_workers 8 + --train_steps 20 \ + --eval_period 5 \ + --num_workers 2 \ + --batch_size 2 \ \ No newline at end of file From 771c06142ef74ee60b6bc9122e6b643ef6bc2d7c Mon Sep 17 00:00:00 2001 From: Anish Tatke <53684776+AnishTatke@users.noreply.github.com> Date: Mon, 14 Apr 2025 16:51:17 -0400 Subject: [PATCH 12/15] Rename GPU argument to gpus and update related documentation for consistency --- multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py | 4 ++-- multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml | 4 ++-- multic/segmentationschool/segmentation_school.py | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py index 8b7fc2d..b9e5942 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py @@ -125,13 +125,13 @@ def main(args): --option {} \ --training_data_dir {} \ --init_modelfile {} \ - --gpu {} \ + --gpu_num {} \ --train_steps {} \ --eval_period {} \ --num_workers {} \ --batch_size {} \ --girderApiUrl {} \ - --girderToken {}".format('train', tmp.replace(' ', '\ '), args.init_modelfile, args.gpu, args.training_steps, args.eval_period, args.num_workers, args.batch_size, args.girderApiUrl, args.girderToken) + --girderToken {}".format('train', tmp.replace(' ', '\ '), args.init_modelfile, args.gpus, args.training_steps, args.eval_period, args.num_workers, args.batch_size, args.girderApiUrl, args.girderToken) print(cmd) sys.stdout.flush() diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml index 45d56d7..c394bb1 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml @@ -37,7 +37,7 @@ Training parameters - gpu + gpus Number of GPUs that you want to be made available for this training 1 @@ -59,7 +59,7 @@ eval_period - + Validate after these many number of epochs 250 3 diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py index 768a322..92815b8 100644 --- a/multic/segmentationschool/segmentation_school.py +++ b/multic/segmentationschool/segmentation_school.py @@ -174,8 +174,9 @@ def savetime(args, starttime): ##### Args for training / prediction #################################################### parser.add_argument('--gpu_num', dest='gpu_num', default=2 ,type=int, help='number of GPUs avalable') - parser.add_argument('--gpu', dest='gpu', default="" ,type=str, - help='GPU to use for prediction') + # parser.add_argument('--gpu', dest='gpu', default="" ,type=str, + # help='GPU to use for prediction') + parser.add_argument('--iteration', dest='iteration', default='none' ,type=str, help='Which iteration to use for prediction') parser.add_argument('--prune_HR', dest='prune_HR', default=0.0 ,type=float, From 92b6075dc5ba42f9facef427de8913cdb04b2a05 Mon Sep 17 00:00:00 2001 From: AnishTatke <53684776+AnishTatke@users.noreply.github.com> Date: Tue, 15 Apr 2025 12:19:25 -0400 Subject: [PATCH 13/15] Update training parameters and GPU configuration in training scripts --- .../MultiCompartmentTrain.xml | 8 ++++---- .../Codes/IterativeTraining_1X.py | 9 ++++----- multic/segmentationschool/segmentation_school.py | 15 +++++++++------ 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml index 45d56d7..5fd9d99 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml @@ -47,7 +47,7 @@ training_steps The number of steps used for network training. The network will see [steps * batch size] image patches during training - 1000 + 10000 1 @@ -61,14 +61,14 @@ eval_period Validate after these many number of epochs - 250 + 2500 3 batch_size Size of batches for training high resolution CNN - 2 + 4 4 @@ -80,7 +80,7 @@ api-url A Girder API URL (e.g., https://girder.example.com:443/api/v1) - https://dsa.rc.ufl.edu/api/v1 + girderToken diff --git a/multic/segmentationschool/Codes/IterativeTraining_1X.py b/multic/segmentationschool/Codes/IterativeTraining_1X.py index f8b8bd7..8747540 100644 --- a/multic/segmentationschool/Codes/IterativeTraining_1X.py +++ b/multic/segmentationschool/Codes/IterativeTraining_1X.py @@ -56,9 +56,8 @@ def IterateTraining(args): print('Handcoded iteration') - #os.environ["CUDA_VISIBLE_DEVICES"]=gpu - #os.system('export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader | sort -nr | head -1 | awk "{ print $NF }")') - + os.environ["CUDA_VISIBLE_DEVICES"]=args.gpu + os.environ["CUDA_LAUNCH_BLOCKING"] ='1' organType='kidney' print('Organ meta being set to... '+ organType) @@ -169,13 +168,13 @@ def real_data(args,image_coordinates_val): MetadataCatalog.get("my_dataset_val").set(thing_classes=tc) MetadataCatalog.get("my_dataset_val").set(stuff_classes=sc) - cfg.DATASETS.TEST = ("my_dataset_val",) + cfg.DATASETS.TEST = ("my_dataset_val") with open(cfg.OUTPUT_DIR+"/config_record.yaml", "w+") as f: f.write(cfg.dump()) # save config to file trainer = Trainer(cfg) - print('check and see') + trainer.resume_or_load(resume=False) trainer.train() diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py index 768a322..72e06ad 100644 --- a/multic/segmentationschool/segmentation_school.py +++ b/multic/segmentationschool/segmentation_school.py @@ -172,10 +172,10 @@ def savetime(args, starttime): ##### Args for training / prediction #################################################### - parser.add_argument('--gpu_num', dest='gpu_num', default=2 ,type=int, - help='number of GPUs avalable') - parser.add_argument('--gpu', dest='gpu', default="" ,type=str, - help='GPU to use for prediction') + # parser.add_argument('--gpu_num', dest='gpu_num', default=2 ,type=int, + # help='number of GPUs avalable') + parser.add_argument('--gpu', dest='gpu', default=1 ,type=int, + help='Number of GPU to use for prediction') parser.add_argument('--iteration', dest='iteration', default='none' ,type=str, help='Which iteration to use for prediction') parser.add_argument('--prune_HR', dest='prune_HR', default=0.0 ,type=float, @@ -192,7 +192,7 @@ def savetime(args, starttime): help='the desired model file to use for training or prediction') parser.add_argument('--eval_period', dest='eval_period', default=1000 ,type=int, help='Validation Period') - parser.add_argument('--batch_size', dest='batch_size', default=2 ,type=int, + parser.add_argument('--batch_size', dest='batch_size', default=4 ,type=int, help='Size of batches for training high resolution CNN') parser.add_argument('--train_steps', dest='train_steps', default=1000 ,type=int, help='Size of batches for training high resolution CNN') @@ -300,5 +300,8 @@ def savetime(args, starttime): parser.add_argument('--num_workers', dest='num_workers', default=1 ,type=int, help='Number of workers for data loader') + + + args = parser.parse_args() - main(args=args) + main(args=args) \ No newline at end of file From 9107f4c0a5124663312d6ca6553623e248052c7b Mon Sep 17 00:00:00 2001 From: Anish Tatke <53684776+AnishTatke@users.noreply.github.com> Date: Wed, 16 Apr 2025 13:02:32 -0400 Subject: [PATCH 14/15] Refactor training scripts and configuration: update GPU parameter naming, enhance validation checks, and clean up unused code --- .gitignore | 1 + .../MultiCompartmentTrain.py | 61 ++-- .../MultiCompartmentTrain.xml | 12 +- .../Codes/IterativeTraining_1X.py | 5 +- .../segmentationschool/Codes/engine/hooks.py | 2 +- .../Codes/wsi_loader_utils.py | 1 - .../segmentationschool/segmentation_school.py | 278 +----------------- setup.py | 2 +- 8 files changed, 61 insertions(+), 301 deletions(-) diff --git a/.gitignore b/.gitignore index c8800bf..aaa772b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +multic/segmentationschool/*.sh multic/segmentationschool/logs/ multic/segmentationschool/output/ diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py index b9e5942..df02221 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.py @@ -7,9 +7,33 @@ sys.path.append("..") from segmentationschool.utils.mask_to_xml import xml_create, xml_add_annotation, xml_add_region, xml_save from segmentationschool.utils.xml_to_mask import write_minmax_to_xml +from segmentationschool.segmentation_school import run_it NAMES = ['cortical_interstitium','medullary_interstitium','non_globally_sclerotic_glomeruli','globally_sclerotic_glomeruli','tubules','arteries/arterioles'] +DEFAULT_VALS = { + 'girderApiUrl':' ', + 'girderToken':' ', + 'option':'train', + 'training_data_dir':' ', + 'init_modelfile':' ', + 'output_model':' ', + 'base_dir': os.getcwd(), + 'gpu':1, + 'train_steps':1000, + 'eval_period':250, + 'num_workers':0, + 'batch_size':4, + 'boxSize':1200, + 'wsi_ext': '.svs,.scn,.ndpi', + 'downsampleRate': 1, + 'overlap_rate': 0.5, + 'chop_thumbnail_resolution': 16, + 'get_new_tissue_masks': False, + 'white_percent': 0.01, + 'balanceClasses': '3,4,5,6', +} + def process_xml(gc, files, xml_color, folder, tmp, slides_used) -> list: for file in files: slidename = file['name'] @@ -87,8 +111,16 @@ def process_xml(gc, files, xml_color, folder, tmp, slides_used) -> list: return slides_used -def main(args): - +def main(args): + if args.training_data_dir == ' ': + raise ValueError("Training data directory is required.") + + if args.init_modelfile == ' ': + raise ValueError("Initial model file is required.") + + if args.output_model == ' ': + raise ValueError("Output model file is required.") + folder = args.training_data_dir base_dir_id = folder.split('/')[-2] _ = os.system("printf '\nUsing data from girder_client Folder: {}\n'".format(folder)) @@ -107,35 +139,28 @@ def main(args): tmp = folder slides_used = [] - ignore_label = len(NAMES)+1 slides_used = process_xml(gc, files, xml_color, folder, tmp, slides_used) - os.system("ls -lh '{}'".format(tmp)) + for d in DEFAULT_VALS: + if d not in list(vars(args).keys()): + setattr(args,d,DEFAULT_VALS[d]) + trainlogdir=os.path.join(tmp, 'output') if not os.path.exists(trainlogdir): os.makedirs(trainlogdir) _ = os.system("printf '\ndone retriving data...\nstarting training...\n\n'") + print(vars(args)) + for d in vars(args): + print(f'argument: {d}, value: {getattr(args,d)}') - cmd = "python3 ../segmentationschool/segmentation_school.py \ - --option {} \ - --training_data_dir {} \ - --init_modelfile {} \ - --gpu_num {} \ - --train_steps {} \ - --eval_period {} \ - --num_workers {} \ - --batch_size {} \ - --girderApiUrl {} \ - --girderToken {}".format('train', tmp.replace(' ', '\ '), args.init_modelfile, args.gpus, args.training_steps, args.eval_period, args.num_workers, args.batch_size, args.girderApiUrl, args.girderToken) + run_it(args) - print(cmd) sys.stdout.flush() - os.system(cmd) os.listdir(trainlogdir) os.chdir(trainlogdir) @@ -150,7 +175,5 @@ def main(args): _ = os.system("printf '\nDone!\n\n'") - - if __name__ == "__main__": main(CLIArgumentParser().parse_args()) \ No newline at end of file diff --git a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml index db96f2b..a42c595 100644 --- a/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml +++ b/multic/cli/MultiCompartmentTrain/MultiCompartmentTrain.xml @@ -36,18 +36,18 @@ Training parameters - - gpus + + gpu Number of GPUs that you want to be made available for this training 1 0 - + - training_steps + train_steps The number of steps used for network training. The network will see [steps * batch size] image patches during training - 10000 + 1000 1 @@ -61,7 +61,7 @@ eval_period Validate after these many number of epochs - 2500 + 250 3 diff --git a/multic/segmentationschool/Codes/IterativeTraining_1X.py b/multic/segmentationschool/Codes/IterativeTraining_1X.py index 8747540..e4ed8ad 100644 --- a/multic/segmentationschool/Codes/IterativeTraining_1X.py +++ b/multic/segmentationschool/Codes/IterativeTraining_1X.py @@ -55,9 +55,8 @@ def IterateTraining(args): dirs['val_data_dir'] = args.training_data_dir print('Handcoded iteration') - - os.environ["CUDA_VISIBLE_DEVICES"]=args.gpu - os.environ["CUDA_LAUNCH_BLOCKING"] ='1' + # os.environ["CUDA_VISIBLE_DEVICES"]=args.gpu + # os.environ["CUDA_LAUNCH_BLOCKING"] ='1' organType='kidney' print('Organ meta being set to... '+ organType) diff --git a/multic/segmentationschool/Codes/engine/hooks.py b/multic/segmentationschool/Codes/engine/hooks.py index ec1eaf6..22ce49c 100644 --- a/multic/segmentationschool/Codes/engine/hooks.py +++ b/multic/segmentationschool/Codes/engine/hooks.py @@ -15,7 +15,7 @@ def __init__(self, eval_period, model, data_loader): def _do_loss_eval(self): # Copying inference_on_dataset from evaluator.py - total = len(self._data_loader) + total = max(len(self._data_loader), 1) num_warmup = min(5, total - 1) start_time = time.perf_counter() diff --git a/multic/segmentationschool/Codes/wsi_loader_utils.py b/multic/segmentationschool/Codes/wsi_loader_utils.py index 7c01380..56b96e4 100644 --- a/multic/segmentationschool/Codes/wsi_loader_utils.py +++ b/multic/segmentationschool/Codes/wsi_loader_utils.py @@ -10,7 +10,6 @@ from shapely.geometry import Polygon from tiffslide import TiffSlide import random -import glob import warnings from joblib import Parallel, delayed import multiprocessing diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py index 72e06ad..514c46b 100644 --- a/multic/segmentationschool/segmentation_school.py +++ b/multic/segmentationschool/segmentation_school.py @@ -1,7 +1,6 @@ import os import argparse import sys -import time sys.path.append('..') @@ -26,282 +25,21 @@ for transfer learning """ - -# def get_girder_client(args): -# gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) -# gc.setToken(args.girderToken) - -# return gc - -def str2bool(v): - if isinstance(v, bool): - return v - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - - def main(args): - from segmentationschool.Codes.InitializeFolderStructure import initFolder, purge_training_set, prune_training_set - # from extract_reference_features import getKidneyReferenceFeatures,summarizeKidneyReferenceFeatures - # from TransformXMLs import splice_cortex_XMLs,register_aperio_scn_xmls - # from randomCropGenerator import randomCropGenerator - if args.one_network == True: - from segmentationschool.Codes.IterativeTraining_1X import IterateTraining - from segmentationschool.Codes.IterativePredict_1X import predict - else: - from segmentationschool.Codes.evolve_predictions import evolve - from segmentationschool.Codes.IterativeTraining import IterateTraining - from segmentationschool.Codes.IterativePredict import predict - - # for teaching young segmentations networks - starttime = time.time() - # if args.project == ' ': - # print('Please specify the project name: \n\t--project [folder]') + from segmentationschool.Codes.IterativeTraining_1X import IterateTraining + from segmentationschool.Codes.IterativePredict_1X import predict - if args.option in ['new', 'New']: - initFolder(args=args) - savetime(args=args, starttime=starttime) - elif args.option in ['train', 'Train']: + if args.option in ['train', 'Train']: IterateTraining(args=args) - savetime(args=args, starttime=starttime) + elif args.option in ['predict', 'Predict']: predict(args=args) - savetime(args=args, starttime=starttime) - - elif args.option in ['evolve', 'Evolve']: - evolve(args=args) - elif args.option in ['purge', 'Purge']: - purge_training_set(args=args) - elif args.option in ['prune', 'Prune']: - prune_training_set(args=args) - elif args.option in ['get_features', 'Get_features']: - getKidneyReferenceFeatures(args=args) - elif args.option in ['summarize_features', 'Summarize_features']: - summarizeKidneyReferenceFeatures(args=args) - elif args.option in ['splice_cortex', 'Splice_cortex']: - splice_cortex_XMLs(args=args) - elif args.option in ['register_aperio_scn_xmls', 'Register_aperio_scn_xmls']: - register_aperio_scn_xmls(args=args) - elif args.option in ['get_thumbnails', 'Get_thumbnails']: - from wsi_loader_utils import get_image_thumbnails - get_image_thumbnails(args) - elif args.option in ['random_patch_crop', 'random_patch_crop']: - randomCropGenerator(args=args) else: - print('please specify an option in: \n\t--option [new, train, predict, validate, evolve, purge, prune, get_features, splice_cortex, register_aperio_scn_xmls]') - - -def savetime(args, starttime): - if args.option in ['new', 'New']: - print('new') - # with open(args.runtime_file, 'w') as timefile: - # timefile.write('option' +'\t'+ 'time' +'\t'+ 'epochs_LR' +'\t'+ 'epochs_HR' +'\t'+ 'aug_LR' +'\t'+ 'aug_HR' +'\t'+ 'overlap_percentLR' +'\t'+ 'overlap_percentHR') - if args.option in ['train', 'Train']: - print('not much') - # with open(args.runtime_file, 'a') as timefile: - # timefile.write('\n' + args.option +'\t'+ str(time.time()-starttime) +'\t'+ str(args.epoch_LR) +'\t'+ str(args.epoch_HR) +'\t'+ str(args.aug_LR) +'\t'+ str(args.aug_HR) +'\t'+ str(args.overlap_percentLR) +'\t'+ str(args.overlap_percentHR)) - if args.option in ['predict', 'Predict']: - print('predict') - # with open(args.runtime_file, 'a') as timefile: - # timefile.write('\n' + args.option +'\t'+ str(time.time()-starttime)) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - - ##### Main params (MANDITORY) ############################################## - # School subject - parser.add_argument('--girderApiUrl', dest='girderApiUrl', default=' ' ,type=str, - help='girderApiUrl') - parser.add_argument('--girderToken', dest='girderToken', default=' ' ,type=str, - help='girderToken') - parser.add_argument('--files', dest='files', default=' ' ,type=str, - help='files') - # option - parser.add_argument('--option', dest='option', default=' ' ,type=str, - help='option for [new, train, predict, validate]') - parser.add_argument('--transfer', dest='transfer', default=' ' ,type=str, - help='name of project for transfer learning [pulls the newest model]') - parser.add_argument('--one_network', dest='one_network', default=True ,type=bool, - help='use only high resolution network for training/prediction/validation') - parser.add_argument('--target', dest='target', default=None,type=str, - help='directory with xml transformation targets') - parser.add_argument('--cortextarget', dest='cortextarget', default=None,type=str, - help='directory with cortex annotations for splicing') - parser.add_argument('--output', dest='output', default=None,type=str, - help='directory to save output transformed XMLs') - parser.add_argument('--wsis', dest='wsis', default=None,type=str, - help='directory of WSIs for reference feature extraction') - parser.add_argument('--groupBy', dest='groupBy', default=None,type=str, - help='Name for histomicsUI converted annotation group') - parser.add_argument('--patientData', dest='patientData', default=None,type=str, - help='Location of excel file containing clinical data on patients') - parser.add_argument('--labelColumns', dest='labelColumns', default=None,type=str, - help='Column in excel file to use as label') - parser.add_argument('--labelModality', dest='labelModality', default=None,type=str, - help='Column in excel file to use as label') - parser.add_argument('--IDColumn', dest='IDColumn', default='Label_slides',type=str, - help='Excel column with file name links') - parser.add_argument('--plotFill', dest='plotFill', default=True,type=str2bool, - help='Excel column with file name links') - parser.add_argument('--scatterFeatures', dest='scatterFeatures', default='5,6',type=str, - help='Excel column with file name links') - parser.add_argument('--anchor', dest='anchor', default='Age',type=str, - help='Biometric link data for scatterplot') - parser.add_argument('--exceloutfile', dest='exceloutfile', default=None,type=str, - help='Name of output excel file for feature aggregation') - - -# args.huelabel,args.rowlabel,args.binRows - parser.add_argument('--SummaryOption', dest='SummaryOption', default=None,type=str, - help='What type of feature summary to generate, options:\n'+ - 'BLDensity,ULDensity,UDensity,BDensity,standardScatter,anchorScatter') - - # automatically generated - parser.add_argument('--base_dir', dest='base_dir', default=os.getcwd(),type=str, - help='base directory of code folder') - - parser.add_argument('--code_dir', dest='code_dir', default=os.getcwd(),type=str, - help='base directory of code folder') - - - ##### Args for training / prediction #################################################### - # parser.add_argument('--gpu_num', dest='gpu_num', default=2 ,type=int, - # help='number of GPUs avalable') - parser.add_argument('--gpu', dest='gpu', default=1 ,type=int, - help='Number of GPU to use for prediction') - parser.add_argument('--iteration', dest='iteration', default='none' ,type=str, - help='Which iteration to use for prediction') - parser.add_argument('--prune_HR', dest='prune_HR', default=0.0 ,type=float, - help='percent of high rez data to be randomly removed [0-1]-->[none-all]') - parser.add_argument('--prune_LR', dest='prune_LR', default=0.0 ,type=float, - help='percent of low rez data to be randomly removed [0-1]-->[none-all]') - parser.add_argument('--classNum', dest='classNum', default=0 ,type=int, - help='number of classes present in the training data plus one (one class is specified for background)') - parser.add_argument('--classNum_HR', dest='classNum_HR', default=0 ,type=int, - help='number of classes present in the High res training data [USE ONLY IF DIFFERENT FROM LOW RES]') - parser.add_argument('--modelfile', dest='modelfile', default=None ,type=str, - help='the desired model file to use for training or prediction') - parser.add_argument('--init_modelfile', dest='init_modelfile', default=None ,type=str, - help='the desired model file to use for training or prediction') - parser.add_argument('--eval_period', dest='eval_period', default=1000 ,type=int, - help='Validation Period') - parser.add_argument('--batch_size', dest='batch_size', default=4 ,type=int, - help='Size of batches for training high resolution CNN') - parser.add_argument('--train_steps', dest='train_steps', default=1000 ,type=int, - help='Size of batches for training high resolution CNN') - parser.add_argument('--training_data_dir', dest='training_data_dir', default=os.getcwd(),type=str, - help='Training Data Folder') - parser.add_argument('--overlap_rate', dest='overlap_rate', default=0.5 ,type=float, - help='overlap percentage of high resolution blocks [0-1]') - - ### Params for cutting wsi ### - #White level cutoff - parser.add_argument('--white_percent', dest='white_percent', default=0.01 ,type=float, - help='white level checkpoint for chopping') - parser.add_argument('--chop_thumbnail_resolution', dest='chop_thumbnail_resolution', default=16,type=int, - help='downsample mask to find usable regions') - #Low resolution parameters - parser.add_argument('--overlap_percentLR', dest='overlap_percentLR', default=0.5 ,type=float, - help='overlap percentage of low resolution blocks [0-1]') - parser.add_argument('--boxSizeLR', dest='boxSizeLR', default=450 ,type=int, - help='size of low resolution blocks') - parser.add_argument('--downsampleRateLR', dest='downsampleRateLR', default=16 ,type=int, - help='reduce image resolution to 1/downsample rate') - parser.add_argument('--get_new_tissue_masks', dest='get_new_tissue_masks', default=False,type=str2bool, - help="Don't load usable tisse regions from disk, create new ones") - parser.add_argument('--downsampleRate', dest='downsampleRate', default=1 ,type=int, - help='reduce image resolution to 1/downsample rate') - #High resolution parameters - parser.add_argument('--overlap_percentHR', dest='overlap_percentHR', default=0 ,type=float, - help='overlap percentage of high resolution blocks [0-1]') - parser.add_argument('--boxSize', dest='boxSize', default=1200 ,type=int, - help='size of high resolution blocks') - parser.add_argument('--downsampleRateHR', dest='downsampleRateHR', default=1 ,type=int, - help='reduce image resolution to 1/downsample rate') - parser.add_argument('--training_max_size', dest='training_max_size', default=512 ,type=int, - help='padded region for low resolution region extraction') - parser.add_argument('--Mag20X', dest='Mag20X', default=False,type=str2bool, - help='Perform prediction for 20X (true) slides rather than 40X (false)') - - ### Params for augmenting data ### - #High resolution - parser.add_argument('--aug_HR', dest='aug_HR', default=3 ,type=int, - help='augment high resolution set this many magnitudes') - #Low resolution - parser.add_argument('--aug_LR', dest='aug_LR', default=15 ,type=int, - help='augment low resolution set this many magnitudes') - #Color space transforms - parser.add_argument('--hbound', dest='hbound', default=0.01 ,type=float, - help='Gaussian variance defining bounds on Hue shift for HSV color augmentation') - parser.add_argument('--lbound', dest='lbound', default=0.025 ,type=float, - help='Gaussian variance defining bounds on L* gamma shift for color augmentation [alters brightness/darkness of image]') - parser.add_argument('--balanceClasses', dest='balanceClasses', default='3,4,5,6',type=str, - help="which classes to balance during training") - ### Params for training networks ### - #Low resolution hyperparameters - parser.add_argument('--CNNbatch_sizeLR', dest='CNNbatch_sizeLR', default=2 ,type=int, - help='Size of batches for training low resolution CNN') - #High resolution hyperparameters - parser.add_argument('--CNNbatch_sizeHR', dest='CNNbatch_sizeHR', default=2 ,type=int, - help='Size of batches for training high resolution CNN') - #Hyperparameters - parser.add_argument('--epoch_LR', dest='epoch_LR', default=1 ,type=int, - help='training epochs for low resolution network') - parser.add_argument('--epoch_HR', dest='epoch_HR', default=1 ,type=int, - help='training epochs for high resolution network') - parser.add_argument('--saveIntervals', dest='saveIntervals', default=10 ,type=int, - help='how many checkpoints get saved durring training') - parser.add_argument('--learning_rate_HR', dest='learning_rate_HR', default=2.5e-4, - type=float, help='High rez learning rate') - parser.add_argument('--learning_rate_LR', dest='learning_rate_LR', default=2.5e-4, - type=float, help='Low rez learning rate') - parser.add_argument('--chop_data', dest='chop_data', default='false', - type=str, help='chop and augment new data before training') - parser.add_argument('--crop_detectron_trainset', dest='crop_detectron_trainset', default=False,type=str2bool, - help='chop dot based images to this max size') - parser.add_argument('--predict_data', dest='predict_data', default=True,type=str2bool, - help='chop dot based images to this max size') - parser.add_argument('--roi_thresh', dest='roi_thresh', default=0.01,type=float, - help='chop dot based images to this max size') - - ### Params for saving results ### - parser.add_argument('--outDir', dest='outDir', default='Predictions' ,type=str, - help='output directory') - parser.add_argument('--save_outputs', dest='save_outputs', default=False ,type=bool, - help='save outputs from chopping etc. [final image masks]') - parser.add_argument('--imBoxExt', dest='imBoxExt', default='.jpeg' ,type=str, - help='ext of saved image blocks') - parser.add_argument('--finalImgExt', dest='finalImgExt', default='.jpeg' ,type=str, - help='ext of final saved images') - parser.add_argument('--wsi_ext', dest='wsi_ext', default='.svs,.scn,.ndpi' ,type=str, - help='file ext of wsi images') - parser.add_argument('--bg_intensity', dest='bg_intensity', default=.5 ,type=float, - help='if displaying output classifications [save_outputs = True] background color [0-1]') - parser.add_argument('--approximation_downsample', dest='approx_downsample', default=1 ,type=float, - help='Amount to downsample high resolution prediction boundaries for smoothing') - - - ### Params for optimizing wsi mask cleanup ### - parser.add_argument('--min_size', dest='min_size', default=[30,30,30,30,30,30] ,type=int, - help='min size region to be considered after prepass [in pixels]') - parser.add_argument('--bordercrop', dest='bordercrop', default=300 ,type=int, - help='min size region to be considered after prepass [in pixels]') - parser.add_argument('--LR_region_pad', dest='LR_region_pad', default=50 ,type=int, - help='padded region for low resolution region extraction') - parser.add_argument('--show_interstitium', dest='show_interstitium', default=True ,type=str2bool, - help='padded region for low resolution region extraction') - parser.add_argument('--num_workers', dest='num_workers', default=1 ,type=int, - help='Number of workers for data loader') - - + print('please specify an option in: \n\t--option [predict or train]') +# importable function +def run_it(args): - args = parser.parse_args() - main(args=args) \ No newline at end of file + main(args) diff --git a/setup.py b/setup.py index 6cad146..063df1e 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ def prerelease_local_scheme(version): 'numpy==1.23.5', 'scipy>=0.19.0', 'Pillow==9.5.0', - 'pandas>=0.19.2', + # 'pandas>=0.19.2', 'imageio>=2.3.0', # 'shapely[vectorized]', #'opencv-python-headless<4.7', From e1fdf9ebdae5bc6c3016786ef62fe847554f11a2 Mon Sep 17 00:00:00 2001 From: AnishTatke <53684776+AnishTatke@users.noreply.github.com> Date: Fri, 18 Apr 2025 15:49:45 -0400 Subject: [PATCH 15/15] Refactor IterateTraining function: remove unnecessary line breaks, update dataset test configuration, and improve hook insertion formatting --- multic/segmentationschool/Codes/IterativeTraining_1X.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/multic/segmentationschool/Codes/IterativeTraining_1X.py b/multic/segmentationschool/Codes/IterativeTraining_1X.py index e4ed8ad..f09709d 100644 --- a/multic/segmentationschool/Codes/IterativeTraining_1X.py +++ b/multic/segmentationschool/Codes/IterativeTraining_1X.py @@ -149,8 +149,6 @@ def IterateTraining(args): def real_data(args,image_coordinates_val): - - all_list=[] for one in train_samples_from_WSI(args,image_coordinates_val): dataset_dict = one @@ -167,7 +165,7 @@ def real_data(args,image_coordinates_val): MetadataCatalog.get("my_dataset_val").set(thing_classes=tc) MetadataCatalog.get("my_dataset_val").set(stuff_classes=sc) - cfg.DATASETS.TEST = ("my_dataset_val") + cfg.DATASETS.TEST = ("my_dataset_val", ) with open(cfg.OUTPUT_DIR+"/config_record.yaml", "w+") as f: f.write(cfg.dump()) # save config to file @@ -221,7 +219,7 @@ def build_evaluator(cls, cfg, dataset_name, output_folder=None): def build_hooks(self): hooks = super().build_hooks() - hooks.insert(-1,LossEvalHook( + hooks.insert(-1, LossEvalHook( self.cfg.TEST.EVAL_PERIOD, self.model, build_detection_test_loader(