From 0b3538fb148ec67f3de53d30b7a5ff632fe6e20b Mon Sep 17 00:00:00 2001 From: sayatmimar Date: Thu, 29 Jun 2023 14:50:05 -0700 Subject: [PATCH] add training code and dependencies --- .DS_Store | Bin 0 -> 8196 bytes histomicstk/.DS_Store | Bin 0 -> 8196 bytes histomicstk/cli/.DS_Store | Bin 8196 -> 10244 bytes histomicstk/segmentationschool/.DS_Store | Bin 0 -> 8196 bytes .../Codes/InitializeFolderStructure.py | 74 +- .../Codes/IterativePredict.py | 16 +- .../Codes/IterativePredict_1X.py | 767 +++++++++-------- .../Codes/IterativeTraining.py | 28 +- .../Codes/IterativeTraining_1X.py | 262 ++++-- .../Codes/IterativeTraining_1X_chopless.py | 772 ++++++++++++++++++ .../IterativeTraining_1X_chopless_test.py | 771 +++++++++++++++++ .../segmentationschool/Codes/TransformXMLs.py | 170 +--- .../Codes/dataset_mapper_custom.py | 251 ++++++ .../Codes/evolve_predictions.py | 16 +- .../Codes/generateTrainSet.py | 4 +- .../segmentationschool/Codes/getWsi.py | 0 .../Codes/get_choppable_regions.py | 44 +- .../Codes/get_dataset_list.py | 137 +++- .../Codes/get_network_performance.py | 10 +- .../Codes/get_network_performance_folder.py | 10 +- .../segmentationschool/Codes/predict_xml.py | 2 +- .../Codes/randomHSVshift.py | 6 +- histomicstk/segmentationschool/Codes/utils.py | 4 +- .../Codes/wsi_loader_utils.py | 439 ++++++---- .../Codes/wsi_loader_utils_backup.py | 235 ++++++ .../segmentationschool/Codes/xmlCheck.py | 4 +- .../segmentationschool/Codes/xml_to_mask.py | 6 +- .../segmentationschool/Codes/xml_to_mask2.py | 143 ++-- .../segmentationschool/Codes/xml_to_mask2o.py | 592 ++++++++++++++ .../Codes/xml_to_mask_minmax.py | 57 +- .../segmentationschool/segmentation_school.py | 162 ++-- 31 files changed, 3901 insertions(+), 1081 deletions(-) create mode 100644 .DS_Store create mode 100644 histomicstk/.DS_Store create mode 100644 histomicstk/segmentationschool/.DS_Store mode change 100644 => 100755 histomicstk/segmentationschool/Codes/InitializeFolderStructure.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/IterativePredict.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/IterativePredict_1X.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/IterativeTraining.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/IterativeTraining_1X.py create mode 100755 histomicstk/segmentationschool/Codes/IterativeTraining_1X_chopless.py create mode 100755 histomicstk/segmentationschool/Codes/IterativeTraining_1X_chopless_test.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/TransformXMLs.py create mode 100755 histomicstk/segmentationschool/Codes/dataset_mapper_custom.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/evolve_predictions.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/generateTrainSet.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/getWsi.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/get_choppable_regions.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/get_dataset_list.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/get_network_performance.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/get_network_performance_folder.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/predict_xml.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/randomHSVshift.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/utils.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/wsi_loader_utils.py create mode 100755 histomicstk/segmentationschool/Codes/wsi_loader_utils_backup.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/xmlCheck.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/xml_to_mask.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/xml_to_mask2.py create mode 100755 histomicstk/segmentationschool/Codes/xml_to_mask2o.py mode change 100644 => 100755 histomicstk/segmentationschool/Codes/xml_to_mask_minmax.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..73668fd950c6ca8ff6fe5f62ca4905b7c6f79333 GIT binary patch literal 8196 zcmeI1Piz!r6u{rxmNH)o_A634Y(Y2H+7?&rANmKdxZRdY5d_(mmV#_&cLq9PcBbyk zcKgR-MUO(FQ4=|63@3qjl7k++7>&k6jUhn}UW|J1;?;vu-#6cE11)>-q7r73`QA6* zdvCt)z4^_|zF7dkWI=BSPys-pT}WO{%^fzO7yGI$5{y)mMDho4AO|L7!3A%XbvO`a zAk09RfiMGM2Eq*dA2LAiY_Tb2-uqG;wqXXs4BV3$5YLAg?LsC3xg@3k=%B_g0Z57x zz~87(b%5^^3uGdYOH%r-bWT}4VBm^DiGk9c^f6&hG7-onDWyB0bO#J(#-Kt$e|GYV z`R0HLDZ@6*K$wBm84$NmHB3>}E^78$>-RI6qC1_joD5lfnVGgs*K2FLfsu;JHC1aB zMX6TSE6=5-oMOt;yn>t5^2f!|Va=P$1pN`mv6lN<=BQ~D(|qHAV|$uu8+qT^)D1E@ zF=pDjQ%pJqTX+5Qq@;o|rm!?u7Z$p@d*iXr_U^@aY@w&8Egsv^*}b^Pl=`OjzTwH- z-2B1|i)Y`K5n}L^fSMH%Uo0Q4<<1Pqa3gc2aKA5w%UHEK^whrLH0K4^IAU0uXU-Ud znrFCaUgJ{S!&%2ZoHu4D#<8qp6>>IdYO|V^Es(L%Z&@jG-f+h~W7ZSB3D?Pc1HR$7 zNxot%%{K`DL(`t=*zQpy?~-Kf%DTGfgN-eZb@V*Ze|`OZHN18MV-u7g+cQs@Mt-zt z>fY2b&CMFNZrZ2EW=z-2SjLF0<&4@$2V=h1rg~K!FJ#Q;3#K>68x_SLZP=o+6FRN% zprebk>HL}^?PFV1^|;UsQrDY6@ z>rBqmxr3==GcqLpbTHVvld0obCzqp8+**tW!+m>H^`w~jfaZFKrVX1`^+hm!l2&z0 z==ybCtVwB1w8)F>hufHFlSf>mM#;Ta`7oBtC$Z?2O}-oLT#fyS12d-v|d@-kIWRttODQOnmzZ7$97j|PW(I?p3i$k~%pT#3Mf}=#>BoUZJ9Z#Wy)0jt> zNIZ)#;!AiAU%~Tu0bdJN$3gQ>wJIhA{;i&{HI`y}mR6Lw2d`YzP?`JKq}M9(zQUS?V>Ed{Xt#+!Jj}16e!s3?9Nyw&dzLiW}y^O zv>JVAYWzuz8WYp_B=};Y59*`F8htU6Sbgxts1Lqq^u;GVckXO$mo}*;RubnX_nvdl zx##|T^WC|7X8{0va#{>P0RVwAlX58)*GSCI@`e=hEhQw8;=#!zO*=`oPm`Gqtph`V zK!8AiK!8AiK!CtzK!DC{mW1n^``jCp0RjO6|4Rhe`yoP^$+$09xb$8fRCrealEq~9 z66rEiNJe}a_vH$gNllk9f@i z<6H)1fIxu2Mg-X1y$DiZf{|Rf^8Ribj@#V)6^4q6x0GxZ1ff(Y6CN6yvlqr()y+9Q zYIcUzMpbt%>FeXRZC=*Z?6`}uDfbxPB zzWUel=Et&@XinwYNtjtuYUh11+Tg zQ%G)T@uoB1%fIJ@7Lysyhf8@rh*GICFgPS75ab-KsLrwoc^wuCI_9Q}w+ z0mi)EM`4bP;_9{?yLQ*@ZEET4>RvC~UM@?OQq|HuS7m5o!^~Ej6vL%iGJ#Zmm|7sa(=Hl{4H$xlRyxUo}dtD8DPrYBbt|w#Fhg z&!f~Rdjw&I*?Y4(mE0^fD2<}53gZqbN5r-J;>yDtc-ZWt!Q$ zHBHxOr`E;y@@?R)v@YJvYa$)7JK;_`%7!;B76p) z!x!)kd zel4rOJkP5rcO}m}YH{v(oXT^CbD4O& zbJuNBc!JobpDyHoeqYF28LxzO)_Qj&9F`fOcvY+qOT>85^X;uCU&!}Fo5)i{LSgs+{_B7L z-xPBVCJ`VIAn@-XfW>`deLd87{uC}8hTXLzluuA*iM3nd(hH%&yN;9euHz&x|6xda hgqpILxGz_@BnzdR{vqI||5y#q|KR-hsJ^L+KLNp#j!*yq literal 0 HcmV?d00001 diff --git a/histomicstk/cli/.DS_Store b/histomicstk/cli/.DS_Store index 6b6b0e345b38ffe37f7d3c0a541ffc298255369a..13ee70622afffb739452b67478b092d97c6a964a 100644 GIT binary patch delta 410 zcmZp1XbF&DU|?W$DortDU{C-uIe-{M3-C-V6q~50D9Q!oFar4u45bW3456F| z8w;ngPi)}b%+A5WVGL3s$lwN)OJpcPRs~d_$dC*a%VfxB$V)0OE=bDBPXekwIoV5~ zpxqpS2eWtOvtUQs;;T6n+enb1dKpefB`?0 zhEcPC3?l3=3ogpb$oeZ?c^LHzU|Jo>X(qD_!;n3)*>3D`|9 delta 161 zcmZn(XmOBWU|?W$DortDU;r^WfEYvza8E20o2aKK3KC!h@);O18JrpN8FCp47!o%Y zPGg_gz`L29gN1{UWAZ@>qsiHV+>9qC+X*yqiC0&f8tNz*nHWz#DIm)@W3qyfJmXA| vqRmA@huH*~fhGch1UHay1=+N*@H_Klew9EDpgkb#7$(Q_Ox>(4#?A}?&43_s diff --git a/histomicstk/segmentationschool/.DS_Store b/histomicstk/segmentationschool/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..f87ce8453a54a674df93168c5d8cf4210baeb764 GIT binary patch literal 8196 zcmeHMU2GIp6u#e5%8VW86i~|Y)2$T{(n7Z_6r$j^+X{s?AluRcmF?`#2ot6=WoNc6 zEG0FX7^6I5_nVHf1-&G#sq)jgVYdB@WB^eG`<*r;*00boh9_om`If9++^-| z_ndpqoqOiHd-lvS#?YGAeIxKp27P5#Y7^T$W@G)0r`;e*YvhWSOp4Uw;FUIVE#T=Sh-OCe4?gOdWAX zQl94J-BvAkkbl~vc}FtAxZ828+hZ-W&ooBT>f)qhdzxt*Ip5gS4Vu#5YudUq((2@G z-Sz#XghG~ONlvSkg+kNDjkU4Hc=K3otgyL(#!Zcz#>Ql6$*TC)j)B4BCkiLWPMs6G zLh#MN%BSq|{NycK?r>7PqKI5BZtx$&Hg^m z81=YrnyT-B>*Tzo?<}pVdDqx$kS}xCXd{nqpGiw{LlyF+1xuD!uUfaEY0I|cM7dhA zaM5B#IY7a)J@YBk$n}hvx_4y1=4K6BH|@i{!=`IyETh}j291hHgDm@2l}n@1zI?_! zmN&iQYK(kQ9A++%4O!xExR%4SL3CHD@C$O_>j%2ufYwQ^t9862ej zxI&zWwo2+X=@4JLQ**uDLxxSu^S!sNj7Il!d5f+aI+3)d`21@U4~wC{Z@DH>A7~`D zf;F%<*2xA~juqHxc9vaYSJ>O^9rh9XoZVnI*)8@X`-%O+er3NQf-=m<0+gc?OR)^g zu@((zL=zI|z)p0c3wv=8{Wyd)bPU4A5RPF4<9Hg+;90zgGk6K-a2_w?4ZMlBa2X%s z8otDJOyDbgi(B{}KZyH_6q(#r+=)Q^OnAp(B_;(;F%T_n(~-$0r<#C)M^Y*MP!xkPedw|R@As3mj? zgm&vT(v}k6rHN%rqm@chX_xs?RN3Do?5FHH`;N%^I~YodsEdfC)mVc%G~+R}pbhPK z9J}!ZQs_nxQFH(rGC1nf)WIlD;3URy3eVwryg*bvO;kNgRK0+Ucm=QGHN1{XxPq&A z7w_SHe1MPf1t;zlx^_;X>&?k@9ZuzJ%droWloMQ~d^5^CI)EC>5}K5QY?fwue{4<4 zMV}Sv{cq)>XrWLS-x#lr#T%RaY}DLbKXW#^Ac8P!HoEv%wmv0P!sT7>=LnCLy#8;W z{r&%4UpI6ZMj(v9y%PbHw58fw>3)j8JEq-1] - out=np.zeros_like(image) - if organType=='liver': - for ids in segments_info: - if ids['isthing']: - out[image==ids['id']]=ids['category_id']+1 + for ids in segments_info: + if ids['isthing']: + out[image==ids['id']]=ids['category_id']+1 - else: - out[image==ids['id']]=0 - - elif organType=='kidney': - for ids in segments_info: - if ids['isthing']: - out[image==ids['id']]=ids['category_id']+3 + else: + out[image==ids['id']]=0 + # plt.imshow(out) + # plt.show() + # exit() + return out.astype('uint8') + # with warnings.catch_warnings(): + # warnings.simplefilter("ignore") + # imsave(out_dir+'/'+file_name.split('/')[-1].replace('.jpeg','.png'),out.astype('uint8')) +def validate(args): + # define folder structure dict + dirs = {'outDir': args.base_dir + '/' + args.project + args.outDir} + dirs['txt_save_dir'] = '/txt_files/' + dirs['img_save_dir'] = '/img_files/' + dirs['mask_dir'] = '/wsi_mask/' + dirs['chopped_dir'] = '/originals/' + dirs['save_outputs'] = args.save_outputs + dirs['modeldir'] = '/MODELS/' + dirs['training_data_dir'] = '/TRAINING_data/' + dirs['validation_data_dir'] = '/HOLDOUT_data/' - else: - if args.show_interstitium: - if ids['category_id'] in [1,2]: - out[image==ids['id']]=ids['category_id'] + # find current iteration + if args.iteration == 'none': + iteration = get_iteration(args=args) + else: + iteration = int(args.iteration) + # get all WSIs + WSIs = [] + for ext in [args.wsi_ext]: + WSIs.append(glob.glob(args.base_dir + '/' + args.project + dirs['validation_data_dir'] + '/*' + ext)) + if iteration == 'none': + print('ERROR: no trained models found \n\tplease use [--option train]') else: - print('unsupported organType ') - print(organType) - exit() + for iter in range(1,iteration+1): + dirs['xml_save_dir'] = args.base_dir + '/' + args.project + dirs['validation_data_dir'] + str(iter) + '_Predicted_XMLs/' - return out.astype('uint8') + # check main directory exists + make_folder(dirs['outDir']) + + if not os.path.exists(dirs['xml_save_dir']): + make_folder(dirs['xml_save_dir']) + + print('working on iteration: ' + str(iter)) + + with open(args.base_dir + '/' + args.project + dirs['validation_data_dir'] + 'validation_stats.txt', 'a') as f: + f.write('\niteration: \t'+str(iter)+'\n') + f.write('\twsi\t\t\tsensitivity\t\t\tspecificity\t\t\tprecision\t\t\taccuracy\t\t\tprediction time\n') + + for wsi in WSIs: + # predict xmls + startTime = time.time() + + filename=dirs['xml_save_dir']+'/'+ (wsi.split('/')[-1]).split('.')[0] +'.xml' + if not os.path.isfile(filename): + predict_xml(args=args, dirs=dirs, wsi=wsi, iteration=iter) + + predictTime = time.time() - startTime + # test performance + gt_xml = os.path.splitext(wsi)[0] + '.xml' + predicted_xml = gt_xml.split('/') + predicted_xml = dirs['xml_save_dir'] + predicted_xml[-1] + sensitivity,specificity,precision,accuracy = get_perf(wsi=wsi, xml1=gt_xml, xml2 = predicted_xml, args=args) + + with open(args.base_dir + '/' + args.project + dirs['validation_data_dir'] + 'validation_stats.txt', 'a') as f: + f.write('\t'+wsi.split('/')[-1]+'\t\t'+str(sensitivity)+'\t\t'+str(specificity)+'\t\t'+str(precision)+'\t\t'+str(accuracy)+'\t\t'+str(predictTime)+'\n') + print('\n\n\033[92;5mDone validating: \n\t\033[0m\n') def predict(args): # define folder structure dict - dirs = {'outDir': args.base_dir} + dirs = {'outDir': args.base_dir + '/' + args.project + args.outDir} dirs['txt_save_dir'] = '/txt_files/' dirs['img_save_dir'] = '/img_files/' dirs['mask_dir'] = '/wsi_mask/' @@ -109,262 +137,172 @@ def predict(args): # iteration = get_iteration(args=args) # else: # iteration = int(args.iteration) - downsample = int(args.downsampleRateHR**.5) - region_size = int(args.boxSize*(downsample)) - step = int((region_size-(args.bordercrop*2))*(1-args.overlap_percentHR)) - # gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) - # gc.setToken(args.girderToken) - # project_folder = args.project - # project_dir_id = project_folder.split('/')[-2] - #model_file = args.modelfile - #print(model_file,'here model') - #model_file_id = model_file .split('/')[-2] - print('Handcoded iteration') - iteration=1 print(iteration) - dirs['xml_save_dir'] = args.base_dir - #real_path = os.path.realpath(args.project) - #print(real_path) + dirs['xml_save_dir'] = args.base_dir + '/' + args.project + dirs['training_data_dir'] + str(iteration) + '/Predicted_XMLs/' + if iteration == 'none': print('ERROR: no trained models found \n\tplease use [--option train]') else: # check main directory exists - # make_folder(dirs['outDir']) - # outdir = gc.createFolder(project_directory_id,args.outDir) - # it = gc.createFolder(outdir['_id'],str(iteration)) + make_folder(dirs['outDir']) + make_folder(dirs['xml_save_dir']) # get all WSIs - #WSIs = [] - # usable_ext=args.wsi_ext.split(',') - # for ext in usable_ext: - # WSIs.extend(glob.glob(args.project + '/*' + ext)) - # print('another one') - - # for file in args.files: - # print(file) - # slidename = file['name'] - # _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(slidename)) - # WSIs.append(slidename) - - - # print(len(WSIs), 'number of WSI' ) - print('Building network configuration ...\n') - #modeldir = args.project + dirs['modeldir'] + str(iteration) + '/HR' - - os.environ["CUDA_VISIBLE_DEVICES"]="0,1" - - cfg = get_cfg() - cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")) - cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32],[64],[128], [256], [512], [1024]] - cfg.MODEL.RPN.IN_FEATURES = ['p2', 'p3', 'p4', 'p5','p6','p6'] - # cfg.MODEL.PIXEL_MEAN=[189.409,160.487,193.422] - cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[.1,.2,0.33, 0.5, 1.0, 2.0, 3.0,5,10]] - cfg.MODEL.ANCHOR_GENERATOR.ANGLES=[-90,-60,-30,0,30,60,90] - cfg.DATALOADER.NUM_WORKERS = 10 - cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset (default: 512) - cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS=False - if not args.Mag20X: - cfg.INPUT.MIN_SIZE_TEST=region_size - cfg.INPUT.MAX_SIZE_TEST=region_size - else: - cfg.INPUT.MIN_SIZE_TEST=int(region_size/2) - cfg.INPUT.MAX_SIZE_TEST=int(region_size/2) - - - cfg.MODEL.WEIGHTS = args.modelfile + WSIs = [] + for ext in [args.wsi_ext]: - tc=['G','SG','T','A'] - sc=['Ob','C','M','B'] - classNum=len(tc)+len(sc)-1 - cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(tc) - cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES =len(sc) - - cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.roi_thresh - # cfg.MODEL.PANOPTIC_FPN.ENABLED=False - # cfg.MODEL.PANOPTIC_FPN.INSTANCES_CONFIDENCE_THRESH = args.roi_thresh - # cfg.MODEL.PANOPTIC_FPN.OVERLAP_THRESH = 1 - - predictor = DefaultPredictor(cfg) - broken_slides=[] - for wsi in [args.files]: + WSIs.extend(glob.glob(args.base_dir + '/' + args.project + dirs['training_data_dir'] + str(iteration) + '/*' + ext)) + for wsi in WSIs: # try: + predict_xml(args=args, dirs=dirs, wsi=wsi, iteration=iteration) # except Exception as e: # print('!!! Prediction on ' + wsi + ' failed\n') # print(e) - # reshape regions calc + print('\n\n\033[92;5mPlease correct the xml annotations found in: \n\t' + dirs['xml_save_dir']) + print('\nthen place them in: \n\t'+ args.base_dir + '/' + args.project + dirs['training_data_dir'] + str(iteration) + '/') + print('\nand run [--option train]\033[0m\n') - extsplit = os.path.splitext(wsi) - basename = extsplit[0] - extname = extsplit[-1] - print(basename) - # print(extname) - # try: - slide=openslide.TiffSlide(wsi) - print(wsi,'here/s the silde') - # slide = ti.imread(wsi) - - # except: - # broken_slides.append(wsi) - # continue - # continue + +def predict_xml(args, dirs, wsi, iteration): + # reshape regions calc + downsample = int(args.downsampleRateHR**.5) + region_size = int(args.boxSizeHR*(downsample)) + step = int(region_size)-(args.bordercrop*2) + + # figure out the number of classes + if args.classNum == 0: + annotatedXMLs=glob.glob(args.base_dir + '/' + args.project + dirs['training_data_dir'] + str(iteration-1) + '/*.xml') + classes = [] + for xml in annotatedXMLs: + classes.append(get_num_classes(xml)) + classNum = max(classes) + else: + classNum = args.classNum + + if args.chop_data == 'True': + # chop wsi + fileID, test_num_steps = chop_suey(wsi, dirs, downsample, region_size, step, args) + dirs['fileID'] = fileID + print('Chop SUEY!\n') + else: + basename = os.path.splitext(wsi)[0] + + if wsi.split('.')[-1] != 'tif': + slide=getWsi(wsi) # get image dimensions - if extname=='.scn': - dim_y=int(slide.properties['openslide.bounds-height']) - dim_x=int(slide.properties['openslide.bounds-width']) - offsetx=int(slide.properties['openslide.bounds-x']) - offsety=int(slide.properties['openslide.bounds-y']) - # print(dim_x,dim_y,offsetx,offsety) - else: - dim_x, dim_y=slide.dimensions - offsetx=0 - offsety=0 - - print(dim_x,dim_y) - fileID=basename.split('/') - dirs['fileID'] = fileID[-1] - dirs['extension'] = extname - dirs['file_name'] = wsi.split('/')[-1] - - - wsiMask = np.zeros([dim_y, dim_x], dtype='uint8') - - index_y=np.array(range(offsety,dim_y+offsety,step)) - index_x=np.array(range(offsetx,dim_x+offsetx,step)) - print('Getting thumbnail mask to identify predictable tissue...') - fullSize=slide.level_dimensions[0] - resRatio= args.chop_thumbnail_resolution - ds_1=fullSize[0]/resRatio - ds_2=fullSize[1]/resRatio - thumbIm=np.array(slide.get_thumbnail((ds_1,ds_2))) - if extname =='.scn': - xStt=int(offsetx/resRatio) - xStp=int((offsetx+dim_x)/resRatio) - yStt=int(offsety/resRatio) - yStp=int((offsety+dim_y)/resRatio) - thumbIm=thumbIm[yStt:yStp,xStt:xStp] - - hsv=rgb2hsv(thumbIm) - g=gaussian(hsv[:,:,1],5) - binary=(g>0.05).astype('bool') - binary=binary_fill_holes(binary) - - print('Segmenting tissue ...\n') - totalpatches=len(index_x)*len(index_y) - with tqdm(total=totalpatches,unit='image',colour='green',desc='Total WSI progress') as pbar: - for i,j in coordinate_pairs(index_y,index_x): - - yEnd = min(dim_y+offsety,i+region_size) - xEnd = min(dim_x+offsetx,j+region_size) - # yStart_small = int(np.round((i-offsety)/resRatio)) - # yStop_small = int(np.round(((i-offsety)+args.boxSize)/resRatio)) - # xStart_small = int(np.round((j-offsetx)/resRatio)) - # xStop_small = int(np.round(((j-offsetx)+args.boxSize)/resRatio)) - yStart_small = int(np.round((i-offsety)/resRatio)) - yStop_small = int(np.round(((yEnd-offsety))/resRatio)) - xStart_small = int(np.round((j-offsetx)/resRatio)) - xStop_small = int(np.round(((xEnd-offsetx))/resRatio)) - box_total=(xStop_small-xStart_small)*(yStop_small-yStart_small) - pbar.update(1) - if np.sum(binary[yStart_small:yStop_small,xStart_small:xStop_small])>(args.white_percent*box_total): - - xLen=xEnd-j - yLen=yEnd-i - - dxS=j - dyS=i - dxE=j+xLen - dyE=i+yLen - print(xLen,yLen) - print('here is the length') - im=np.array(slide.read_region((dxS,dyS),0,(xLen,yLen)))[:,:,:3] - #print(sys.getsizeof(im), 'first') - #UPSAMPLE - im = zoom(im,(4,4,1),order=1) - print(sys.getsizeof(im), 'second') - panoptic_seg, segments_info = predictor(im)["panoptic_seg"] - del im - torch.cuda.empty_cache() - print(sys.getsizeof(panoptic_seg), 'third') - print(sys.getsizeof(segments_info), 'forth') - maskpart=decode_panoptic(panoptic_seg.to("cpu").numpy(),segments_info,'kidney',args) - del panoptic_seg, segments_info - #outImageName=basename+'_'.join(['',str(dxS),str(dyS)]) - #print(sys.getsizeof(maskpart), 'fifth') - #DOWNSAMPLE - maskpart=zoom(maskpart,(0.25,0.25),order=0) - #print(sys.getsizeof(maskpart), 'sixth') - - # imsave(outImageName+'_p.png',maskpart) - if dxE != dim_x: - maskpart[:,-int(args.bordercrop/2):]=0 - if dyE != dim_y: - maskpart[-int(args.bordercrop/2):,:]=0 - - if dxS != offsetx: - maskpart[:,:int(args.bordercrop/2)]=0 - if dyS != offsety: - maskpart[:int(args.bordercrop/2),:]=0 - - # xmlbuilder.deconstruct(maskpart,dxS-offsetx,dyS-offsety,args) - # plt.subplot(121) - # plt.imshow(im) - # plt.subplot(122) - # plt.imshow(maskpart) - # plt.show() - - dyE-=offsety - dyS-=offsety - dxS-=offsetx - dxE-=offsetx - - wsiMask[dyS:dyE,dxS:dxE]=np.maximum(maskpart, - wsiMask[dyS:dyE,dxS:dxE]) - - del maskpart - torch.cuda.empty_cache() - # wsiMask[dyS:dyE,dxS:dxE]=maskpart - - # print('showing mask') - # plt.imshow(wsiMask) - # plt.show() - slide.close() - print('\n\nStarting XML construction: ') - - # wsiMask=np.swapaxes(wsiMask,0,1) - # print('swapped axes') - # xmlbuilder.sew(args) - # xmlbuilder.dump_to_xml(args,offsetx,offsety) - if extname=='.scn': - print('here writing 1') - xml_suey(wsiMask=wsiMask, dirs=dirs, args=args, classNum=classNum, downsample=downsample,glob_offset=[offsetx,offsety]) - else: - print('here writing 2') - xml_suey(wsiMask=wsiMask, dirs=dirs, args=args, classNum=classNum, downsample=downsample,glob_offset=[0,0]) + dim_x, dim_y=slide.dimensions + else: + im = Image.open(wsi) + dim_x, dim_y=im.size + fileID=basename.split('/') + dirs['fileID'] = fileID=fileID[len(fileID)-1] + # test_num_steps = file_len(dirs['outDir'] + fileID + dirs['txt_save_dir'] + fileID + '_images' + ".txt") + print('Segmenting tissue ...\n') + network_output_folder=dirs['outDir'] + fileID + dirs['img_save_dir'] + 'prediction' + make_folder(network_output_folder) + test_data_list = fileID + '_images' + '.txt' + modeldir = args.base_dir + '/' + args.project + dirs['modeldir'] + str(iteration) + '/HR' - print('\n\n\033[92;5mPlease correct the xml annotations found in: \n\t' + dirs['xml_save_dir']) - print('\nthen place them in: \n\t'+ dirs['training_data_dir'] + str(iteration) + '/') - print('\nand run [--option train]\033[0m\n') - print('The following slides were not openable by openslide:') - print(broken_slides) + os.environ["CUDA_VISIBLE_DEVICES"]=str(args.gpu) + t=time.time() + + + cfg = get_cfg() + cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")) + # cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[4],[8],[16], [32], [64], [64], [64]] + # cfg.MODEL.RPN.IN_FEATURES = ['p2', 'p2', 'p2', 'p3','p4','p5','p6'] + cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 1.0, 2.0, 3.0]] + cfg.MODEL.ANCHOR_GENERATOR.ANGLES=[-90,-60,-30,0,30,60,90] + cfg.DATALOADER.NUM_WORKERS = 2 + + cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset (default: 512) + cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2 + cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES =2 + cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS=False + cfg.INPUT.MIN_SIZE_TEST=0 + # cfg.INPUT.MAX_SIZE_TEST=region_size + # cfg.INPUT.MIN_SIZE_TEST=64 + # cfg.INPUT.MAX_SIZE_TEST=500 + # cfg.INPUT.MIN_SIZE_TEST=64 + # cfg.INPUT.MAX_SIZE_TEST=3000 + cfg.MODEL.WEIGHTS = args.modelfile + + cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.roi_thresh + if args.predict_data: + predictor = DefaultPredictor(cfg) + # make index for iters + wsiMask = np.zeros([dim_y, dim_x]).astype(np.uint8) + + index_y=np.array(range(0,dim_y,step)) + index_x=np.array(range(0,dim_x,step)) + print('Getting choppable regions') + # get non white regions + choppable_regions = get_choppable_regions(wsi=wsi, index_x=index_x, index_y=index_y, boxSize=region_size,white_percent=args.white_percent) + + print('Building detectron dataset') + DatasetCatalog.register("my_dataset", lambda:WSIGridIterator(wsi,choppable_regions,index_x,index_y,region_size,dim_x,dim_y)) + + + # images_for_prediction=dirs['outDir'] + dirs['fileID'] + dirs['img_save_dir'] + dirs['chopped_dir'] + # DatasetCatalog.register("my_dataset", lambda:HAIL2Detectron_predict(images_for_prediction,region_size)) + MetadataCatalog.get("my_dataset").set(thing_classes=['BD','AT']) + MetadataCatalog.get("my_dataset").set(stuff_classes=['I','BG']) + + seg_metadata=MetadataCatalog.get("my_dataset") + dataset_dicts=DatasetCatalog.get("my_dataset") + # print(len(dataset_dicts)) + # exit() + for d in tqdm(dataset_dicts): + dxS=d["xStart"] + dyS=d["yStart"] + dxE=d["xStart"]+d["width"] + dyE=d["yStart"]+d["height"] + # print(dxS,dyS,dxE,dyE,dim_x,dim_y,np.shape(wsiMask)) + # im = cv2.imread(d["file_name"]) + im=np.array(slide.read_region((dxS,dyS),0,(d["width"],d["height"])))[:,:,:3][:,:,::-1] + panoptic_seg, segments_info = predictor(im)["panoptic_seg"] + wsiMask[dyS:dyE,dxS:dxE]=decode_panoptic(panoptic_seg.to("cpu").numpy(), segments_info,network_output_folder,d["file_name"]) + + + # print('elapsed time '+str(time.time()-t)) + # + # # un chop + # print('\nreconstructing wsi map ...\n') + # wsiMask = un_suey(dirs=dirs, args=args,wsi_a=wsi) + + # save hotspots + # + # if dirs['save_outputs'] == True: + # make_folder(dirs['outDir'] + fileID + dirs['mask_dir']) + # print('saving to: ' + dirs['outDir'] + fileID + dirs['mask_dir'] + fileID + '.png') + # with warnings.catch_warnings(): + # warnings.simplefilter("ignore") + # imsave(dirs['outDir'] + fileID + dirs['mask_dir'] + fileID + '.png', wsiMask) + + print('\n\nStarting XML construction: ') + + xml_suey(wsiMask=wsiMask, dirs=dirs, args=args, classNum=classNum, downsample=downsample) + DatasetCatalog.remove("my_dataset") + + # # clean up + # if dirs['save_outputs'] == False: + # print('cleaning up') + # rmtree(dirs['outDir']+fileID) -def coordinate_pairs(v1,v2): - for i in v1: - for j in v2: - yield i,j def get_iteration(args): - currentmodels=os.listdir(args.base_dir) + currentmodels=os.listdir(args.base_dir + '/' + args.project + '/MODELS/') if not currentmodels: return 'none' else: @@ -389,12 +327,8 @@ def get_test_model(modeldir): return ''.join([modeldir,'/model_',maxmodel,'.pth']) def make_folder(directory): - print(directory,'predict dir') - #if not os.path.exists(directory): - try: + if not os.path.exists(directory): os.makedirs(directory) # make directory if it does not exit already # make new directory - except: - print('folder exists!') def restart_line(): # for printing chopped image labels in command line sys.stdout.write('\r') @@ -402,7 +336,7 @@ def restart_line(): # for printing chopped image labels in command line def getWsi(path): #imports a WSI import openslide - slide = openslide.TiffSlide(path) + slide = openslide.OpenSlide(path) return slide def file_len(fname): # get txt file length (number of lines) @@ -417,7 +351,187 @@ def file_len(fname): # get txt file length (number of lines) return 0 -def xml_suey(wsiMask, dirs, args, classNum, downsample,glob_offset): +def chop_suey(wsi, dirs, downsample, region_size, step, args): # chop wsi + print('\nopening: ' + wsi) + basename = os.path.splitext(wsi)[0] + + if wsi.split('.')[-1] != 'tif': + slide=getWsi(wsi) + # get image dimensions + dim_x, dim_y=slide.dimensions + else: + im = Image.open(wsi) + dim_x, dim_y=im.size + + fileID=basename.split('/') + dirs['fileID'] = fileID=fileID[len(fileID)-1] + print('\nchopping ...\n') + + # make txt file + make_folder(dirs['outDir'] + fileID + dirs['txt_save_dir']) + f_name = dirs['outDir'] + fileID + dirs['txt_save_dir'] + fileID + ".txt" + f2_name = dirs['outDir'] + fileID + dirs['txt_save_dir'] + fileID + '_images' + ".txt" + f = open(f_name, 'w') + f2 = open(f2_name, 'w') + f2.close() + + make_folder(dirs['outDir'] + fileID + dirs['img_save_dir'] + dirs['chopped_dir']) + + f.write('Image dimensions:\n') + + # make index for iters + index_y=np.array(range(0,dim_y,step)) + index_x=np.array(range(0,dim_x,step)) + + f.write('X dim: ' + str((index_x[-1]+region_size)/downsample) +'\n') + f.write('Y dim: ' + str((index_y[-1]+region_size)/downsample) +'\n\n') + f.write('Regions:\n') + f.write('image:xStart:xStop:yStart:yStop\n\n') + f.close() + + # get non white regions + choppable_regions = get_choppable_regions(wsi=wsi, index_x=index_x, index_y=index_y, boxSize=region_size,white_percent=args.white_percent) + + print('saving region:') + + num_cores = multiprocessing.cpu_count() + + Parallel(n_jobs=num_cores, backend='threading')(delayed(chop_wsi)(limits=[dim_y,dim_x],yStart=i, xStart=j, idxx=idxx, idxy=idxy, + f_name=f_name, f2_name=f2_name, dirs=dirs, downsample=downsample, region_size=region_size, args=args, + wsi=wsi, choppable_regions=choppable_regions) for idxy, i in enumerate(index_y) for idxx, j in enumerate(index_x)) + + test_num_steps = file_len(dirs['outDir'] + fileID + dirs['txt_save_dir'] + fileID + '_images' + ".txt") + print('\n\n' + str(test_num_steps) +' image regions chopped') + + return fileID, test_num_steps + +def chop_wsi(limits,yStart, xStart, idxx, idxy, f_name, f2_name, dirs, downsample, region_size, args, wsi, choppable_regions): # perform cutting in parallel + if choppable_regions[idxy, idxx] != 0: + yEnd = yStart+region_size + #print(yEnd) + xEnd = xStart+region_size + #print(xEnd) + xLen=xEnd-xStart + yLen=yEnd-yStart + + if wsi.split('.') != 'tif': + slide = getWsi(wsi) + subsect= np.array(slide.read_region((xStart,yStart),0,(xLen,yLen))) + subsect=subsect[:,:,:3] + + else: + subsect_ = imread(wsi)[yStart:yEnd, xStart:xEnd, :3] + subsect = np.zeros([region_size,region_size,3]) + subsect[0:subsect_.shape[0], 0:subsect_.shape[1], :] = subsect_ + + #print(whiteRatio) + imageIter = str(xStart)+str(yStart) + + f = open(f_name, 'a+') + f2 = open(f2_name, 'a+') + + # append txt file + f.write(imageIter + ':' + str(xStart/downsample) + ':' + str(xEnd/downsample) + + ':' + str(yStart/downsample) + ':' + str(yEnd/downsample) + '\n') + + # resize image + if downsample > 1: + c=(subsect.shape) + s1=int(c[0]/downsample) + s2=int(c[1]/downsample) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + subsect=resize(subsect,(s1,s2), mode='constant') + + # save image + directory = dirs['outDir'] + dirs['fileID'] + dirs['img_save_dir'] + dirs['chopped_dir'] + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + imsave(directory + dirs['fileID'] + str(imageIter) + args.imBoxExt,subsect) + + f2.write(dirs['chopped_dir'] + dirs['fileID'] + str(imageIter) + args.imBoxExt + '\n') + f.close() + f2.close() + + sys.stdout.write(' <'+str(xStart)+':'+str(xEnd)+' '+str(yStart)+':'+str(yEnd)+'> ') + sys.stdout.flush() + restart_line() + +def un_suey(dirs,wsi_a, args): # reconstruct wsi from predicted masks + txtFile = dirs['fileID'] + '.txt' + + # read txt file + f = open(dirs['outDir'] + dirs['fileID'] + dirs['txt_save_dir'] + txtFile, 'r') + lines = f.readlines() + f.close() + lines = np.array(lines) + + # get wsi size + xDim = np.uint32(float((lines[1].split(': ')[1]).split('\n')[0])) + yDim = np.uint32(float((lines[2].split(': ')[1]).split('\n')[0])) + #print('xDim: ' + str(xDim)) + #print('yDim: ' + str(yDim)) + + # make wsi mask + wsiMask = np.zeros([yDim, xDim]).astype(np.uint8) + + # read image regions + for regionNum in range(7, np.size(lines)): + # print regionNum + sys.stdout.write(' <'+str(regionNum-7)+ ' of ' + str(np.size(lines)-8) +'> ') + sys.stdout.flush() + restart_line() + + # get region + region = lines[regionNum].split(':') + region[4] = region[4].split('\n')[0] + + # read mask + mask = imread(dirs['outDir'] + dirs['fileID'] + dirs['img_save_dir'] + 'prediction/' + dirs['fileID'] + region[0] + '.png') + + # get region bounds + xStart = np.uint32(float(region[1])) + #print('xStart: ' + str(xStart)) + xStop = np.uint32(float(region[2])) + #print('xStop: ' + str(xStop)) + yStart = np.uint32(float(region[3])) + #print('yStart: ' + str(yStart)) + yStop = np.uint32(float(region[4])) + #print('yStop: ' + str(yStop)) + + if yStop > yDim: + yStop=yDim + if xStop > xDim: + xStop=xDim + + + + mask_part = wsiMask[yStart:yStop, xStart:xStop] + + + + + ylen, xlen = np.shape(mask_part) + mask = mask[:ylen, :xlen] + + if yStart>0: + mask[-args.bordercrop:,:]=0 + if xStart>0: + mask[:,-args.bordercrop:]=0 + if yStop2: #m=np.squeeze(np.asarray(maskPoints2[j])) #xMax=np.max(m[:,1]) @@ -486,7 +580,7 @@ def get_contour_points(mask, args, downsample,value, offset={'X': 0,'Y': 0}): if cv2.contourArea(maskPoints[j]) > args.min_size[value-1]: pointList = [] - for i in np.array(range(0,len(maskPoints[j]),4)): + for i in range(0,np.shape(maskPoints[j])[0],4): point = {'X': (maskPoints[j][i][0][0] * downsample) + offset['X'], 'Y': (maskPoints[j][i][0][1] * downsample) + offset['Y']} pointList.append(point) pointsList.append(pointList) @@ -503,10 +597,7 @@ def xml_add_annotation(Annotations, annotationID=None): # add new annotation # defualts to new annotationID if annotationID == None: # not specified annotationID = len(Annotations.findall('Annotation')) + 1 - if annotationID in [1,2]: - Annotation = ET.SubElement(Annotations, 'Annotation', attrib={'Type': '4', 'Visible': '0', 'ReadOnly': '0', 'Incremental': '0', 'LineColorReadOnly': '0', 'LineColor': str(xml_color[annotationID-1]), 'Id': str(annotationID), 'NameReadOnly': '0'}) - else: - Annotation = ET.SubElement(Annotations, 'Annotation', attrib={'Type': '4', 'Visible': '1', 'ReadOnly': '0', 'Incremental': '0', 'LineColorReadOnly': '0', 'LineColor': str(xml_color[annotationID-1]), 'Id': str(annotationID), 'NameReadOnly': '0'}) + Annotation = ET.SubElement(Annotations, 'Annotation', attrib={'Type': '4', 'Visible': '1', 'ReadOnly': '0', 'Incremental': '0', 'LineColorReadOnly': '0', 'LineColor': str(xml_color[annotationID-1]), 'Id': str(annotationID), 'NameReadOnly': '0'}) Regions = ET.SubElement(Annotation, 'Regions') return Annotations @@ -525,14 +616,14 @@ def xml_add_region(Annotations, pointList, annotationID=-1, regionID=None): # ad ET.SubElement(Vertices, 'Vertex', attrib={'X': str(pointList[0]['X']), 'Y': str(pointList[0]['Y']), 'Z': '0'}) return Annotations -# def xml_save(Annotations, filename): -# xml_data = ET.tostring(Annotations, pretty_print=True) -# #xml_data = Annotations.toprettyxml() -# f = open(filename, 'w') -# f.write(xml_data.decode()) -# f.close() - -# def read_xml(filename): -# # import xml file -# tree = ET.parse(filename) -# root = tree.getroot() +def xml_save(Annotations, filename): + xml_data = ET.tostring(Annotations, pretty_print=True) + #xml_data = Annotations.toprettyxml() + f = open(filename, 'wb') + f.write(xml_data) + f.close() + +def read_xml(filename): + # import xml file + tree = ET.parse(filename) + root = tree.getroot() diff --git a/histomicstk/segmentationschool/Codes/IterativeTraining.py b/histomicstk/segmentationschool/Codes/IterativeTraining.py old mode 100644 new mode 100755 index b6f887f..c70561e --- a/histomicstk/segmentationschool/Codes/IterativeTraining.py +++ b/histomicstk/segmentationschool/Codes/IterativeTraining.py @@ -2,29 +2,29 @@ import multiprocessing import os import sys -# import cv2 -# import matplotlib.pyplot as plt +import cv2 +import matplotlib.pyplot as plt import time -# import random +import random import warnings -# import argparse +import argparse from skimage.transform import resize from skimage.io import imread, imsave -# from skimage.morphology import remove_small_objects -# from skimage.color import rgb2lab -# from scipy.ndimage.measurements import label -# from scipy.ndimage.morphology import binary_fill_holes +from skimage.morphology import remove_small_objects +from skimage.color import rgb2lab +from scipy.ndimage.measurements import label +from scipy.ndimage.morphology import binary_fill_holes from glob import glob -from .getWsi import getWsi -from .xml_to_mask import xml_to_mask, get_num_classes +from getWsi import getWsi +from xml_to_mask import xml_to_mask,get_num_classes from joblib import Parallel, delayed -from shutil import rmtree,move#,copyfile +from shutil import rmtree,move,copyfile from imgaug import augmenters as iaa -from .randomHSVshift import randomHSVshift -from .generateTrainSet import generateDatalists +from randomHSVshift import randomHSVshift +from generateTrainSet import generateDatalists from subprocess import call -from .get_choppable_regions import get_choppable_regions +from get_choppable_regions import get_choppable_regions """ Code for - cutting / augmenting / training CNN diff --git a/histomicstk/segmentationschool/Codes/IterativeTraining_1X.py b/histomicstk/segmentationschool/Codes/IterativeTraining_1X.py old mode 100644 new mode 100755 index 9c98890..b9e031c --- a/histomicstk/segmentationschool/Codes/IterativeTraining_1X.py +++ b/histomicstk/segmentationschool/Codes/IterativeTraining_1X.py @@ -1,4 +1,4 @@ -import os, sys, cv2, time, random, warnings, multiprocessing#json,# detectron2 +import os, sys, cv2, time, random, warnings, argparse, csv, multiprocessing,json, detectron2 import numpy as np import matplotlib.pyplot as plt import lxml.etree as ET @@ -6,14 +6,13 @@ from skimage.transform import resize from skimage.io import imread, imsave import glob -from .getWsi import getWsi - -from .xml_to_mask2 import get_supervision_boxes, regions_in_mask_dots, get_vertex_points_dots, masks_from_points, restart_line +from getWsi import getWsi +from xml_to_mask2o import * from joblib import Parallel, delayed from shutil import move # from generateTrainSet import generateDatalists -#from subprocess import call -#from .get_choppable_regions import get_choppable_regions +from subprocess import call +from get_choppable_regions import get_choppable_regions from PIL import Image from detectron2.utils.logger import setup_logger @@ -21,12 +20,12 @@ from detectron2 import model_zoo from detectron2.engine import DefaultPredictor,DefaultTrainer from detectron2.config import get_cfg -from detectron2.utils.visualizer import Visualizer#,ColorMode +from detectron2.utils.visualizer import Visualizer,ColorMode from detectron2.data import MetadataCatalog, DatasetCatalog -#from detectron2.structures import BoxMode -from .get_dataset_list import HAIL2Detectron, samples_from_json, samples_from_json_mini -#from detectron2.checkpoint import DetectionCheckpointer -#from detectron2.modeling import build_model +from detectron2.structures import BoxMode +from get_dataset_list import * +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.modeling import build_model """ @@ -52,7 +51,7 @@ def IterateTraining(args): stepHR = int(region_sizeHR*(1-args.overlap_percentHR)) #Step size before downsampling - global classNum_HR,classEnumLR,classEnumHR + global classNum_HR,classEnumLR,classEnumHR dirs = {'imExt': '.jpeg'} dirs['basedir'] = args.base_dir dirs['maskExt'] = '.png' @@ -75,7 +74,7 @@ def IterateTraining(args): currentmodels=os.listdir(dirs['basedir'] + dirs['project'] + dirs['modeldir']) print('Handcoded iteration') # currentAnnotationIteration=check_model_generation(dirs) - currentAnnotationIteration=2 + currentAnnotationIteration=0 print('Current training session is: ' + str(currentAnnotationIteration)) ##Create objects for storing class distributions @@ -126,20 +125,74 @@ def IterateTraining(args): else: im = Image.open(wsiID) dim_x, dim_y=im.size - location=[0,0] - size=[dim_x,dim_y] - tree = ET.parse(xmlID) - root = tree.getroot() - box_supervision_layers=['8'] - # calculate region bounds - global_bounds = {'x_min' : location[0], 'y_min' : location[1], 'x_max' : location[0] + size[0], 'y_max' : location[1] + size[1]} - local_bounds = get_supervision_boxes(root,box_supervision_layers) - num_cores = multiprocessing.cpu_count() - Parallel(n_jobs=num_cores)(delayed(chop_suey_bounds)(args=args,wsiID=wsiID, - dirs=dirs,lb=lb,xmlID=xmlID,box_supervision_layers=box_supervision_layers) for lb in tqdm(local_bounds)) - # for lb in tqdm(local_bounds): - # size_data.extend(image_sizes) + if args.box_supervision: + location=[0,0] + size=[dim_x,dim_y] + tree = ET.parse(xmlID) + root = tree.getroot() + + box_supervision_layers=['9'] + print('Box supervision layer') + print(box_supervision_layers) + # calculate region bounds + global_bounds = {'x_min' : location[0], 'y_min' : location[1], 'x_max' : location[0] + size[0], 'y_max' : location[1] + size[1]} + local_bounds = get_supervision_boxes(root,box_supervision_layers) + num_cores = multiprocessing.cpu_count() + + Parallel(n_jobs=num_cores)(delayed(chop_suey_bounds)(args=args,wsiID=wsiID, + dirs=dirs,lb=lb,xmlID=xmlID,box_supervision_layers=box_supervision_layers) for lb in tqdm(local_bounds)) + + else: + if fileID=='K1300466_6_PAS_05082017_001': + t=time.time() + + wsi_mask=xml_to_mask(xmlID, [0,0], [87519,44938]) + wsi_mask=wsi_mask[:,0:36000] + print('Time for mask generation ' + str(time.time()-t)) + print('Restricted mask for ' + fileID) + + + elif fileID=='K1300473_4_PAS_05082017_001_003': + t=time.time() + + wsi_mask=xml_to_mask(xmlID, [0,0], [128600,46112]) + wsi_mask=wsi_mask[:,0:60000] + print('Time for mask generation ' + str(time.time()-t)) + print('Restricted mask for ' + fileID) + + else: + t=time.time() + wsi_mask=xml_to_mask(xmlID, [0,0], [dim_x,dim_y]) + print('Time for mask generation ' + str(time.time()-t)) + + # wsi_mask=xml_to_mask(xmlID, [0,0], [dim_x,dim_y]) + + #Enumerate cpu core count + num_cores = multiprocessing.cpu_count() + + #Generate iterators for parallel chopping of WSIs in high resolution + + index_yHR=np.array(range(0,dim_y,stepHR)) + index_xHR=np.array(range(0,dim_x,stepHR)) + #Make sure python doesn't forget about our end blocks + index_yHR[-1]=dim_y-stepHR + index_xHR[-1]=dim_x-stepHR + #Create memory address for chopped images high resolution + outdirHR=dirs['basedir'] + dirs['project'] + dirs['tempdirHR'] + + #Perform high resolution chopping in parallel and return the number of + #images in each of the labeled classes + chop_regions=get_choppable_regions(wsi=wsiID, + index_x=index_xHR,index_y=index_yHR,boxSize=region_sizeHR,white_percent=args.white_percent) + + Parallel(n_jobs=num_cores)(delayed(return_region)(args=args, + wsi_mask=wsi_mask, wsiID=wsiID, + fileID=fileID, yStart=j, xStart=i, idxy=idxy, + idxx=idxx, downsampleRate=args.downsampleRateHR, + outdirT=outdirHR, region_size=region_sizeHR, + dirs=dirs, chop_regions=chop_regions,classNum_HR=classNum_HR) for idxx,i in enumerate(index_xHR) for idxy,j in enumerate(index_yHR)) + ''' wsi_mask=xml_to_mask(xmlID, [0,0], [dim_x,dim_y]) @@ -226,16 +279,29 @@ def IterateTraining(args): # img_dir='/hdd/bg/Detectron2/chop_detectron/Permanent/HR' img_dir=dirs['outDirAIHR'] - classnames=['Background','BD','A'] - isthing=[0,1,1] - xml_color = [[0,255,0], [0,255,255], [0,0,255]] - + organType='kidney' + print('Chopping with rules... '+ organType) + if organType=='liver': + classnames=['Background','BD','A'] + isthing=[0,1,1] + xml_color = [[0,255,0], [0,255,255], [0,0,255]] + tc=['BD','AT'] + sc=['Ob','B'] + elif organType =='kidney': + classnames=['interstitium','glomerulus','sclerotic glomerulus','tubule','arterial tree'] + classes={} + isthing=[0,1,1,1,1] + xml_color = [[0,255,0], [0,255,255], [0,0,255], [255,0,0], [0,128,255]] + tc=['G','SG','T','A'] + sc=['Ob','I','B'] + else: + print('Provided organType not in supported types: kidney, liver') rand_sample=True + json_dir=dirs['basedir']+'/'+dirs['project'] + '/Permanent/HR/' + json_file=json_dir+'detectron_train' + if args.prepare_detectron_json: + HAIL2Detectron(img_dir,rand_sample,json_file,classnames,isthing,xml_color,organType,dirs) - json_file=img_dir+'/detectron_train.json' - HAIL2Detectron(img_dir,rand_sample,json_file,classnames,isthing,xml_color) - tc=['BD','AT'] - sc=['I','B'] #### From json DatasetCatalog.register("my_dataset", lambda:samples_from_json(json_file,rand_sample)) MetadataCatalog.get("my_dataset").set(thing_classes=tc) @@ -243,11 +309,12 @@ def IterateTraining(args): seg_metadata=MetadataCatalog.get("my_dataset") - + # # new_list = DatasetCatalog.get("my_dataset") # print(len(new_list)) - # for d in random.sample(new_list, 100): - # + # for d in random.sample(new_list, 10000): + # ident=d["file_name"].split('/')[-1] + # print(ident) # img = cv2.imread(d["file_name"]) # visualizer = Visualizer(img[:, :, ::-1],metadata=seg_metadata, scale=0.5) # out = visualizer.draw_dataset_dict(d) @@ -261,34 +328,36 @@ def IterateTraining(args): cfg.DATASETS.TRAIN = ("my_dataset") cfg.DATASETS.TEST = () num_cores = multiprocessing.cpu_count() - cfg.DATALOADER.NUM_WORKERS = num_cores-3 + cfg.DATALOADER.NUM_WORKERS = 5 # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml") # Let training initialize from model zoo - cfg.MODEL.WEIGHTS = os.path.join('/hdd/bg/Detectron2/HAIL_Detectron2/liver/MODELS/0/HR', "model_final.pth") - - cfg.SOLVER.IMS_PER_BATCH = 10 + # cfg.MODEL.WEIGHTS = os.path.join('/hdd/bg/Detectron2/HAIL_Detectron2/liver/MODELS/0/HR', "model_final.pth") + cfg.MODEL.WEIGHTS = os.path.join('/hdd/bg/Detectron2/HAIL_Detectron2/output_PASHE_finetune_1', "model_0064999.pth") + cfg.SOLVER.IMS_PER_BATCH = 4 - # cfg.SOLVER.BASE_LR = 0.02 # pick a good LR - # cfg.SOLVER.LR_policy='steps_with_lrs' - # cfg.SOLVER.MAX_ITER = 50000 - # cfg.SOLVER.STEPS = [30000,40000] - # # cfg.SOLVER.STEPS = [] - # cfg.SOLVER.LRS = [0.002,0.0002] - cfg.SOLVER.BASE_LR = 0.002 # pick a good LR + cfg.SOLVER.BASE_LR = 0.00002 # pick a good LR cfg.SOLVER.LR_policy='steps_with_lrs' - cfg.SOLVER.MAX_ITER = 200000 - cfg.SOLVER.STEPS = [150000,180000] + cfg.SOLVER.MAX_ITER = 80000 + cfg.SOLVER.STEPS = [] # cfg.SOLVER.STEPS = [] - cfg.SOLVER.LRS = [0.0002,0.00002] + # cfg.SOLVER.LRS = [0.00002] + + # cfg.SOLVER.BASE_LR = 0.002 # pick a good LR + # cfg.SOLVER.LR_policy='steps_with_lrs' + # cfg.SOLVER.MAX_ITER = 200000 + # cfg.SOLVER.STEPS = [150000,180000] + # # cfg.SOLVER.STEPS = [] + # cfg.SOLVER.LRS = [0.0002,0.00002] # cfg.INPUT.CROP.ENABLED = True # cfg.INPUT.CROP.TYPE='absolute' # cfg.INPUT.CROP.SIZE=[100,100] cfg.MODEL.BACKBONE.FREEZE_AT = 0 - # cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[4],[8],[16], [32], [64], [64], [64]] - # cfg.MODEL.RPN.IN_FEATURES = ['p2', 'p2', 'p2', 'p3','p4','p5','p6'] + cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32],[64],[128], [256], [512], [1024]] + cfg.MODEL.RPN.IN_FEATURES = ['p2', 'p3', 'p4', 'p5','p6','p6'] + cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 1.0, 2.0, 3.0]] cfg.MODEL.ANCHOR_GENERATOR.ANGLES=[-90,-60,-30,0,30,60,90] @@ -323,8 +392,8 @@ def IterateTraining(args): # # cfg.INPUT.MIN_SIZE_TRAIN=64 # cfg.INPUT.MAX_SIZE_TRAIN=4000 - cfg.INPUT.MIN_SIZE_TEST=64 - cfg.INPUT.MAX_SIZE_TEST=500 + cfg.INPUT.MIN_SIZE_TEST=1200 + cfg.INPUT.MAX_SIZE_TEST=1200 predict_samples=100 @@ -502,11 +571,14 @@ def return_region(args, wsi_mask, wsiID, fileID, yStart, xStart, idxy, idxx, dow s1=int(c[0]/(downsampleRate**.5)) s2=int(c[1]/(downsampleRate**.5)) Im=resize(Im,(s1,s2),mode='reflect') + mask_out_name=dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/'+uniqID+dirs['maskExt'] + image_out_name=mask_out_name.replace('/masks/','/regions/').replace(dirs['maskExt'],dirs['imExt']) with warnings.catch_warnings(): warnings.simplefilter("ignore") - imsave(outdirT + '/regions/' + uniqID + dirs['imExt'],Im) - imsave(outdirT + '/masks/' + uniqID +dirs['maskExt'],mask_annotation) + + imsave(image_out_name,Im) + imsave(mask_out_name,mask_annotation) def regions_in_mask(root, bounds, verbose=1): @@ -599,8 +671,7 @@ def get_vertex_points(root, IDs_reg,IDs_points, maskModes,excludedIDs,negativeID return useableRegions def chop_suey_bounds(lb,xmlID,box_supervision_layers,wsiID,dirs,args): - tree = ET.parse(xmlID) - root = tree.getroot() + lbVerts=np.array(lb['BoxVerts']) xMin=min(lbVerts[:,0]) xMax=max(lbVerts[:,0]) @@ -610,17 +681,72 @@ def chop_suey_bounds(lb,xmlID,box_supervision_layers,wsiID,dirs,args): # test=np.array(slide.read_region((xMin,yMin),0,(xMax-xMin,yMax-yMin)))[:,:,:3] local_bound = {'x_min' : xMin, 'y_min' : yMin, 'x_max' : xMax, 'y_max' : yMax} - IDs_reg,IDs_points = regions_in_mask_dots(root=root, bounds=local_bound,box_layers=box_supervision_layers) + if args.chop_with_replacement: + tree = ET.parse(xmlID) + root = tree.getroot() + IDs_reg,IDs_points = regions_in_mask_dots(root=root, bounds=local_bound,box_layers=box_supervision_layers) + + # find regions in bounds + negativeIDs=['4'] + excludedIDs=['1'] + falsepositiveIDs=['4'] + usableRegions= get_vertex_points_dots(root=root, IDs_reg=IDs_reg,IDs_points=IDs_points,excludedIDs=excludedIDs,maskModes=['falsepositive','positive'],negativeIDs=negativeIDs, + falsepositiveIDs=falsepositiveIDs) + + # image_sizes= + masks_from_points(usableRegions,wsiID,dirs,50,args,[xMin,xMax,yMin,yMax]) + if args.standard_chop: + l2=yMax-yMin #y + l1=xMax-xMin #x + pas_img = getWsi(wsiID) + dim_x,dim_y=pas_img.dimensions + mask=xml_to_mask(xmlID, [0,0], [dim_x,dim_y],ignore_id=box_supervision_layers, downsample_factor=1, verbose=0) + mask=mask[yMin:yMax,xMin:xMax] + + # print(xMin,yMin,l1,l2) + region=np.array(pas_img.read_region((xMin,yMin),0,(l1,l2)))[:,:,:3] + + basename=wsiID.split('/')[-1].split('.svs')[0] + max_mask_size=args.training_max_size + substepHR = int(max_mask_size*(1-args.overlap_percentHR)) #Step size before downsampling + + + # plt.subplot(121) + # plt.imshow(region) + # plt.subplot(122) + # plt.imshow(mask) + # plt.show() - # find regions in bounds - negativeIDs=['4'] - excludedIDs=['1'] - falsepositiveIDs=['4'] - usableRegions= get_vertex_points_dots(root=root, IDs_reg=IDs_reg,IDs_points=IDs_points,excludedIDs=excludedIDs,maskModes=['falsepositive','positive'],negativeIDs=negativeIDs, - falsepositiveIDs=falsepositiveIDs) + if l11] + presentclasses=list(presentclasses[presentclasses<6]) + + for p in presentclasses: + contours, hierarchy = cv2.findContours(np.array(mask==p).astype('uint8'), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + for contour in contours: + if contour.size>=6: + instance_dict={} + contour_flat=contour.flatten().astype('float').tolist() + xMin=min(contour_flat[::2]) + yMin=min(contour_flat[1::2]) + xMax=max(contour_flat[::2]) + yMax=max(contour_flat[1::2]) + instance_dict['bbox']=[xMin,yMin,xMax,yMax] + instance_dict['bbox_mode']=BoxMode.XYXY_ABS + instance_dict['category_id']=p+offset + instance_dict['segmentation']=[contour_flat] + annotation.append(instance_dict) + return annotation + +def custom_mapper(dataset_dict): + # Implement a mapper, similar to the default DatasetMapper, but with your own customizations + + # transform_list = [T.Resize((200,300)), T.RandomFlip(())] + transform_list = [T.Resize(1200,1200), + T.RandomFlip(prob=0.5, horizontal=True, vertical=True), + T.RandomContrast(0.8, 3), + T.RandomBrightness(0.8, 1.6), + ] + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + c=dataset_dict['coordinates'] + h=dataset_dict['height'] + w=dataset_dict['width'] + image=np.array(openslide.OpenSlide(dataset_dict['slide_loc']).read_region((c[0],c[1]),0,(h,w))) + utils.check_image_size(dataset_dict, image) + image, transforms = T.apply_transform_gens(transform_list, image) + maskData=xml_to_mask(dataset_dict['xml_loc'], c, [h,w]) + dataset_dict['annotations']=mask2polygons(maskData) + + annos = [ + utils.transform_instance_annotations(obj, transforms, image.shape[:2]) + for obj in dataset_dict.pop("annotations") + ] + instances = utils.annotations_to_instances(annos, image.shape[:2]) + dataset_dict["instances"] = utils.filter_empty_instances(instances) + + + + + + + + sem_seg_gt=np.array(maskData==1).astype('uint8') + + aug_input = T.AugInput(image, sem_seg=sem_seg_gt) + transforms = self.augmentations(aug_input) + image, sem_seg_gt = aug_input.image, aug_input.sem_seg + + image_shape = image.shape[:2] # h, w + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + if sem_seg_gt is not None: + dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) + + # USER: Remove if you don't use pre-computed proposals. + # Most users would not need this feature. + if self.proposal_topk is not None: + utils.transform_proposals( + dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk + ) + + if not self.is_train: + # USER: Modify this if you want to keep them for some reason. + dataset_dict.pop("annotations", None) + dataset_dict.pop("sem_seg_file_name", None) + return dataset_dict + + if "annotations" in dataset_dict: + self._transform_annotations(dataset_dict, transforms, image_shape) + + return dataset_dict + +# use this dataloader instead of the default +class CustomTrainer(DefaultTrainer): + @classmethod + def build_test_loader(cls, cfg: CfgNode, dataset_name): + return build_detection_test_loader(cfg, dataset_name, mapper=custom_mapper) + + @classmethod + def build_train_loader(cls, cfg: CfgNode): + return build_detection_train_loader(cfg, mapper=custom_mapper) +trainer = CustomTrainer(cfg) +trainer.resume_or_load(resume=False) +trainer.train() +""" + +Code for - cutting / augmenting / training CNN + +This uses WSI and XML files to train 2 neural networks for semantic segmentation + of histopath tissue via human in the loop training + +""" + + +#Record start time +totalStart=time.time() + +def IterateTraining(args): + ## calculate low resolution block params + downsampleLR = int(args.downsampleRateLR**.5) #down sample for each dimension + region_sizeLR = int(args.boxSizeLR*(downsampleLR)) #Region size before downsampling + stepLR = int(region_sizeLR*(1-args.overlap_percentLR)) #Step size before downsampling + ## calculate low resolution block params + downsample = int(args.downsampleRate**.5) #down sample for each dimension + region_size = int(args.boxSize*(downsample)) #Region size before downsampling + step = int(region_size*(1-args.overlap_percent)) #Step size before downsampling + + + global classNum_HR,classEnumLR,classEnumHR + dirs = {'imExt': '.jpeg'} + dirs['basedir'] = args.base_dir + dirs['maskExt'] = '.png' + dirs['modeldir'] = '/MODELS/' + dirs['tempdirLR'] = '/TempLR/' + dirs['tempdirHR'] = '/TempHR/' + dirs['pretraindir'] = '/Deeplab_network/' + dirs['training_data_dir'] = '/TRAINING_data/' + dirs['model_init'] = 'deeplab_resnet.ckpt' + dirs['project']= '/' + args.project + dirs['data_dir_HR'] = args.base_dir +'/' + args.project + '/Permanent/HR/' + dirs['data_dir_LR'] = args.base_dir +'/' +args.project + '/Permanent/LR/' + + + ##All folders created, initiate WSI loading by human + #raw_input('Please place WSIs in ') + + ##Check iteration session + + currentmodels=os.listdir(dirs['basedir'] + dirs['project'] + dirs['modeldir']) + print('Handcoded iteration') + # currentAnnotationIteration=check_model_generation(dirs) + currentAnnotationIteration=0 + print('Current training session is: ' + str(currentAnnotationIteration)) + dirs['xml_dir']=dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration) + '/' + ##Create objects for storing class distributions + annotatedXMLs=glob.glob(dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration) + '/*.xml') + classes=[] + + + if args.classNum == 0: + for xml in annotatedXMLs: + classes.append(get_num_classes(xml)) + + classNum_HR = max(classes) + else: + classNum_LR = args.classNum + if args.classNum_HR != 0: + classNum_HR = args.classNum_HR + else: + classNum_HR = classNum_LR + + classNum_HR=args.classNum + + train_dset = WSITrainingLoader(args,dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration)) + + + + modeldir_HR = dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration+1) + '/HR/' + + + ##### HIGH REZ ARGS ##### + dirs['outDirAIHR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/HR/regions/' + dirs['outDirAMHR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/HR/masks/' + + + numImagesHR=len(glob.glob(dirs['outDirAIHR'] + '*' + dirs['imExt'])) + + numStepsHR=(args.epoch_HR*numImagesHR)/ args.CNNbatch_sizeHR + + + #----------------------------------------------------------------------------------------- + # os.environ["CUDA_VISIBLE_DEVICES"]='0' + os.environ["CUDA_VISIBLE_DEVICES"]=str(args.gpu) + # img_dir='/hdd/bg/Detectron2/chop_detectron/Permanent/HR' + + img_dir=dirs['outDirAIHR'] + organType='kidney' + print('Chopping with rules... '+ organType) + if organType=='liver': + classnames=['Background','BD','A'] + isthing=[0,1,1] + xml_color = [[0,255,0], [0,255,255], [0,0,255]] + tc=['BD','AT'] + sc=['Ob','B'] + elif organType =='kidney': + classnames=['interstitium','glomerulus','sclerotic glomerulus','tubule','arterial tree'] + classes={} + isthing=[0,1,1,1,1] + xml_color = [[0,255,0], [0,255,255], [0,0,255], [255,0,0], [0,128,255]] + tc=['G','SG','T','A'] + sc=['Ob','I','B'] + else: + print('Provided organType not in supported types: kidney, liver') + rand_sample=True + json_dir=dirs['basedir']+'/'+dirs['project'] + '/Permanent/HR/' + json_file=json_dir+'detectron_train' + classes={} + + for idx,c in enumerate(classnames): + classes[idx]={'isthing':isthing[idx],'color':xml_color[idx]} + IdGen=IdGenerator(classes) + + + # if args.prepare_detectron_json: + # HAIL2Detectron(img_dir,rand_sample,json_file,classnames,isthing,xml_color,organType,dirs) + + #### From json + # DatasetCatalog.register("my_dataset", lambda:samples_from_json(json_file,rand_sample)) + DatasetCatalog.register("my_dataset", lambda:train_samples_from_WSI(train_dset,1000,args,json_file,classnames,isthing,xml_color,organType,dirs)) + MetadataCatalog.get("my_dataset").set(thing_classes=tc) + MetadataCatalog.get("my_dataset").set(stuff_classes=sc) + # exit() + # seg_metadata=MetadataCatalog.get("my_dataset") + # + # + # new_list = DatasetCatalog.get("my_dataset") + # print(len(new_list)) + # for d in random.sample(new_list, 1000): + # # ident=d["file_name"].split('/')[-1] + # # print(ident) + # c=d['coordinates'] + # h=d['height'] + # w=d['width'] + # slide=openslide.OpenSlide(d['slide_loc']) + # x=dirs['xml_dir']+'_'.join(d['image_id'].split('_')[:-2])+'.xml' + # img=np.array(slide.read_region((c[0],c[1]),0,(h,w))) + # slide.close() + # # mask=xml_to_mask(x, c, [h,w]) + # # plt.subplot(121) + # # plt.imshow(im) + # # plt.subplot(122) + # # plt.imshow(mask) + # # plt.show() + # # img = cv2.imread(d["file_name"]) + # visualizer = Visualizer(img[:, :, ::-1],metadata=seg_metadata, scale=0.5,idgen=IdGen) + # out = visualizer.draw_dataset_dict(d,train_dset) + # cv2.namedWindow("output", cv2.WINDOW_NORMAL) + # cv2.imshow("output",out.get_image()[:, :, ::-1]) + # cv2.waitKey(0) # waits until a key is pressed + # cv2.destroyAllWindows() + # exit() + cfg = get_cfg() + cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")) + cfg.DATASETS.TRAIN = ("my_dataset") + cfg.DATASETS.TEST = () + num_cores = multiprocessing.cpu_count() + cfg.DATALOADER.NUM_WORKERS = 5 + # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml") # Let training initialize from model zoo + + # cfg.MODEL.WEIGHTS = os.path.join('/hdd/bg/Detectron2/HAIL_Detectron2/liver/MODELS/0/HR', "model_final.pth") + cfg.MODEL.WEIGHTS = os.path.join('/hdd/bg/Detectron2/HAIL_Detectron2/output_PASHE_finetune_1', "model_0064999.pth") + + cfg.SOLVER.IMS_PER_BATCH = 4 + + + cfg.SOLVER.BASE_LR = 0.00002 # pick a good LR + cfg.SOLVER.LR_policy='steps_with_lrs' + cfg.SOLVER.MAX_ITER = 80000 + cfg.SOLVER.STEPS = [] + # cfg.SOLVER.STEPS = [] + # cfg.SOLVER.LRS = [0.00002] + + # cfg.SOLVER.BASE_LR = 0.002 # pick a good LR + # cfg.SOLVER.LR_policy='steps_with_lrs' + # cfg.SOLVER.MAX_ITER = 200000 + # cfg.SOLVER.STEPS = [150000,180000] + # # cfg.SOLVER.STEPS = [] + # cfg.SOLVER.LRS = [0.0002,0.00002] + + # cfg.INPUT.CROP.ENABLED = True + # cfg.INPUT.CROP.TYPE='absolute' + # cfg.INPUT.CROP.SIZE=[100,100] + cfg.MODEL.BACKBONE.FREEZE_AT = 0 + cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32],[64],[128], [256], [512], [1024]] + cfg.MODEL.RPN.IN_FEATURES = ['p2', 'p3', 'p4', 'p5','p6','p6'] + + cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 1.0, 2.0, 3.0]] + cfg.MODEL.ANCHOR_GENERATOR.ANGLES=[-90,-60,-30,0,30,60,90] + + cfg.MODEL.RPN.POSITIVE_FRACTION = 0.75 + + cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(tc) + cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES =len(sc) + + + # cfg.INPUT.CROP.ENABLED = True + # cfg.INPUT.CROP.TYPE='absolute' + # cfg.INPUT.CROP.SIZE=[64,64] + + cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 # faster, and good enough for this toy dataset (default: 512) + # cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets) + + cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS=False + cfg.INPUT.MIN_SIZE_TRAIN=0 + # cfg.XML_DIR=dirs['xml_dir'] + # cfg.INPUT.MAX_SIZE_TRAIN=500 + # mapper=DatasetMapper(cfg, True,train_dset) + # exit() + os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) + with open(cfg.OUTPUT_DIR+"/config_record.yaml", "w") as f: + f.write(cfg.dump()) # save config to file + trainer = DefaultTrainer(cfg) + trainer.resume_or_load(resume=False) + trainer.train() + + cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") # path to the model we just trained + cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 # set a custom testing threshold + cfg.TEST.DETECTIONS_PER_IMAGE = 500 + # + # cfg.INPUT.MIN_SIZE_TRAIN=64 + # cfg.INPUT.MAX_SIZE_TRAIN=4000 + cfg.INPUT.MIN_SIZE_TEST=1200 + cfg.INPUT.MAX_SIZE_TEST=1200 + + + predict_samples=100 + predictor = DefaultPredictor(cfg) + + dataset_dicts = samples_from_json_mini(json_file,predict_samples) + iter=0 + if not os.path.exists(os.getcwd()+'/network_predictions/'): + os.mkdir(os.getcwd()+'/network_predictions/') + for d in random.sample(dataset_dicts, predict_samples): + # print(d["file_name"]) + # imclass=d["file_name"].split('/')[-1].split('_')[-5].split(' ')[-1] + # if imclass in ["TRI","HE"]: + im = cv2.imread(d["file_name"]) + panoptic_seg, segments_info = predictor(im)["panoptic_seg"] # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format + # print(segments_info) + # plt.imshow(panoptic_seg.to("cpu")) + # plt.show() + v = Visualizer(im[:, :, ::-1], seg_metadata, scale=1.2) + v = v.draw_panoptic_seg_predictions(panoptic_seg.to("cpu"), segments_info) + # panoptic segmentation result + # plt.ion() + plt.subplot(121) + plt.imshow(im[:, :, ::-1]) + plt.subplot(122) + plt.imshow(v.get_image()) + plt.savefig(f"./network_predictions/input_{iter}.jpg",dpi=300) + plt.show() + # plt.ioff() + + + # v = Visualizer(im[:, :, ::-1], + # metadata=seg_metadata, + # scale=0.5, + # ) + # out = v.draw_panoptic_seg_predictions(panoptic_seg.to("cpu"),segments_info) + + # imsave('./network_predictions/pred'+str(iter)+'.png',np.hstack((im,v.get_image()))) + iter=iter+1 + # cv2.imshow('',out.get_image()[:, :, ::-1]) + # cv2.waitKey(0) # waits until a key is pressed + # cv2.destroyAllWindows() + #----------------------------------------------------------------------------------------- + + finish_model_generation(dirs,currentAnnotationIteration) + + print('\n\n\033[92;5mPlease place new wsi file(s) in: \n\t' + dirs['basedir'] + dirs['project']+ dirs['training_data_dir'] + str(currentAnnotationIteration+1)) + print('\nthen run [--option predict]\033[0m\n') + + + + +def moveimages(startfolder,endfolder): + filelist=glob.glob(startfolder + '*') + for file in filelist: + fileID=file.split('/')[-1] + move(file,endfolder + fileID) + + +def check_model_generation(dirs): + modelsCurrent=os.listdir(dirs['basedir'] + dirs['project'] + dirs['modeldir']) + gens=map(int,modelsCurrent) + modelOrder=np.sort(gens)[::-1] + + for idx in modelOrder: + #modelsChkptsLR=glob.glob(dirs['basedir'] + dirs['project'] + dirs['modeldir']+str(modelsCurrent[idx]) + '/LR/*.ckpt*') + modelsChkptsHR=glob.glob(dirs['basedir'] + dirs['project'] + dirs['modeldir']+ str(idx) +'/HR/*.ckpt*') + if modelsChkptsHR == []: + continue + else: + return idx + break + +def finish_model_generation(dirs,currentAnnotationIteration): + make_folder(dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration + 1)) + +def get_pretrain(currentAnnotationIteration,res,dirs): + + if currentAnnotationIteration==0: + pretrain_file = glob.glob(dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration) + res + '*') + pretrain_file=pretrain_file[0].split('.')[0] + '.' + pretrain_file[0].split('.')[1] + + else: + pretrains=glob.glob(dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration) + res + 'model*') + maxmodel=0 + for modelfiles in pretrains: + modelID=modelfiles.split('.')[-2].split('-')[1] + if int(modelID)>maxmodel: + maxmodel=int(modelID) + pretrain_file=dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration) + res + 'model.ckpt-' + str(maxmodel) + return pretrain_file + +def restart_line(): # for printing chopped image labels in command line + sys.stdout.write('\r') + sys.stdout.flush() + +def file_len(fname): # get txt file length (number of lines) + with open(fname) as f: + for i, l in enumerate(f): + pass + return i + 1 + +def make_folder(directory): + if not os.path.exists(directory): + os.makedirs(directory) # make directory if it does not exit already # make new directory # Check if folder exists, if not make it + +def make_all_folders(dirs): + + + make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/regions') + make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/masks') + + make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/Augment' +'/regions') + make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/Augment' +'/masks') + + make_folder(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/regions') + make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirHR'] + '/masks') + + make_folder(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/Augment' +'/regions') + make_folder(dirs['basedir']+dirs['project']+ dirs['tempdirHR'] + '/Augment' +'/masks') + + make_folder(dirs['basedir'] +dirs['project']+ dirs['modeldir']) + make_folder(dirs['basedir'] +dirs['project']+ dirs['training_data_dir']) + + + make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/LR/'+ 'regions/') + make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/LR/'+ 'masks/') + make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/HR/'+ 'regions/') + make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/HR/'+ 'masks/') + + make_folder(dirs['basedir'] +dirs['project']+ dirs['training_data_dir']) + + make_folder(dirs['basedir'] + '/Codes/Deeplab_network/datasetLR') + make_folder(dirs['basedir'] + '/Codes/Deeplab_network/datasetHR') + +def return_region(args, wsi_mask, wsiID, fileID, yStart, xStart, idxy, idxx, downsampleRate, outdirT, region_size, dirs, chop_regions,classNum_HR): # perform cutting in parallel + sys.stdout.write(' <'+str(xStart)+'/'+ str(yStart)+'/'+str(chop_regions[idxy,idxx] != 0)+ '> ') + sys.stdout.flush() + restart_line() + + if chop_regions[idxy,idxx] != 0: + + uniqID=fileID + str(yStart) + str(xStart) + if wsiID.split('.')[-1] != 'tif': + slide=getWsi(wsiID) + Im=np.array(slide.read_region((xStart,yStart),0,(region_size,region_size))) + Im=Im[:,:,:3] + else: + yEnd = yStart + region_size + xEnd = xStart + region_size + Im = np.zeros([region_size,region_size,3], dtype=np.uint8) + Im_ = imread(wsiID)[yStart:yEnd, xStart:xEnd, :3] + Im[0:Im_.shape[0], 0:Im_.shape[1], :] = Im_ + + mask_annotation=wsi_mask[yStart:yStart+region_size,xStart:xStart+region_size] + + o1,o2=mask_annotation.shape + if o1 !=region_size: + mask_annotation=np.pad(mask_annotation,((0,region_size-o1),(0,0)),mode='constant') + if o2 !=region_size: + mask_annotation=np.pad(mask_annotation,((0,0),(0,region_size-o2)),mode='constant') + + ''' + if 4 in np.unique(mask_annotation): + plt.subplot(121) + plt.imshow(mask_annotation*20) + plt.subplot(122) + plt.imshow(Im) + pt=[xStart,yStart] + plt.title(pt) + plt.show() + ''' + if downsampleRate !=1: + c=(Im.shape) + s1=int(c[0]/(downsampleRate**.5)) + s2=int(c[1]/(downsampleRate**.5)) + Im=resize(Im,(s1,s2),mode='reflect') + mask_out_name=dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/'+uniqID+dirs['maskExt'] + image_out_name=mask_out_name.replace('/masks/','/regions/').replace(dirs['maskExt'],dirs['imExt']) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + imsave(image_out_name,Im) + imsave(mask_out_name,mask_annotation) + + +def regions_in_mask(root, bounds, verbose=1): + # find regions to save + IDs_reg = [] + IDs_points = [] + + for Annotation in root.findall("./Annotation"): # for all annotations + annotationID = Annotation.attrib['Id'] + annotationType = Annotation.attrib['Type'] + + # print(Annotation.findall(./)) + if annotationType =='9': + for element in Annotation.iter('InputAnnotationId'): + pointAnnotationID=element.text + + for Region in Annotation.findall("./*/Region"): # iterate on all region + + for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region + # get points + x_point = np.int32(np.float64(Vertex.attrib['X'])) + y_point = np.int32(np.float64(Vertex.attrib['Y'])) + # test if points are in bounds + if bounds['x_min'] <= x_point <= bounds['x_max'] and bounds['y_min'] <= y_point <= bounds['y_max']: # test points in region bounds + # save region Id + IDs_points.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID,'pointAnnotationID':pointAnnotationID}) + break + elif annotationType=='4': + + for Region in Annotation.findall("./*/Region"): # iterate on all region + + for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region + # get points + x_point = np.int32(np.float64(Vertex.attrib['X'])) + y_point = np.int32(np.float64(Vertex.attrib['Y'])) + # test if points are in bounds + if bounds['x_min'] <= x_point <= bounds['x_max'] and bounds['y_min'] <= y_point <= bounds['y_max']: # test points in region bounds + # save region Id + IDs_reg.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID}) + break + return IDs_reg,IDs_points + + +def get_vertex_points(root, IDs_reg,IDs_points, maskModes,excludedIDs,negativeIDs=None): + Regions = [] + Points = [] + + for ID in IDs_reg: + Vertices = [] + if ID['annotationID'] not in excludedIDs: + for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): + Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) + Regions.append({'Vertices':np.array(Vertices),'annotationID':ID['annotationID']}) + + for ID in IDs_points: + Vertices = [] + for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): + Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) + Points.append({'Vertices':np.array(Vertices),'pointAnnotationID':ID['pointAnnotationID']}) + if 'falsepositive' or 'negative' in maskModes: + assert negativeIDs is not None,'Negatively annotated classes must be provided for negative/falsepositive mask mode' + assert 'falsepositive' and 'negative' not in maskModes, 'Negative and false positive mask modes cannot both be true' + + useableRegions=[] + if 'positive' in maskModes: + for Region in Regions: + regionPath=path.Path(Region['Vertices']) + for Point in Points: + if Region['annotationID'] not in negativeIDs: + if regionPath.contains_point(Point['Vertices'][0]): + Region['pointAnnotationID']=Point['pointAnnotationID'] + useableRegions.append(Region) + + if 'negative' in maskModes: + + for Region in Regions: + regionPath=path.Path(Region['Vertices']) + if Region['annotationID'] in negativeIDs: + if not any([regionPath.contains_point(Point['Vertices'][0]) for Point in Points]): + Region['pointAnnotationID']=Region['annotationID'] + useableRegions.append(Region) + if 'falsepositive' in maskModes: + + for Region in Regions: + regionPath=path.Path(Region['Vertices']) + if Region['annotationID'] in negativeIDs: + if not any([regionPath.contains_point(Point['Vertices'][0]) for Point in Points]): + Region['pointAnnotationID']=0 + useableRegions.append(Region) + + return useableRegions +def chop_suey_bounds(lb,xmlID,box_supervision_layers,wsiID,dirs,args): + + lbVerts=np.array(lb['BoxVerts']) + xMin=min(lbVerts[:,0]) + xMax=max(lbVerts[:,0]) + yMin=min(lbVerts[:,1]) + yMax=max(lbVerts[:,1]) + + # test=np.array(slide.read_region((xMin,yMin),0,(xMax-xMin,yMax-yMin)))[:,:,:3] + + local_bound = {'x_min' : xMin, 'y_min' : yMin, 'x_max' : xMax, 'y_max' : yMax} + if args.chop_with_replacement: + tree = ET.parse(xmlID) + root = tree.getroot() + IDs_reg,IDs_points = regions_in_mask_dots(root=root, bounds=local_bound,box_layers=box_supervision_layers) + + # find regions in bounds + negativeIDs=['4'] + excludedIDs=['1'] + falsepositiveIDs=['4'] + usableRegions= get_vertex_points_dots(root=root, IDs_reg=IDs_reg,IDs_points=IDs_points,excludedIDs=excludedIDs,maskModes=['falsepositive','positive'],negativeIDs=negativeIDs, + falsepositiveIDs=falsepositiveIDs) + + # image_sizes= + masks_from_points(usableRegions,wsiID,dirs,50,args,[xMin,xMax,yMin,yMax]) + if args.standard_chop: + l2=yMax-yMin #y + l1=xMax-xMin #x + pas_img = getWsi(wsiID) + dim_x,dim_y=pas_img.dimensions + mask=xml_to_mask(xmlID, [0,0], [dim_x,dim_y],ignore_id=box_supervision_layers, downsample_factor=1, verbose=0) + mask=mask[yMin:yMax,xMin:xMax] + + # print(xMin,yMin,l1,l2) + region=np.array(pas_img.read_region((xMin,yMin),0,(l1,l2)))[:,:,:3] + + basename=wsiID.split('/')[-1].split('.svs')[0] + max_mask_size=args.training_max_size + substepHR = int(max_mask_size*(1-args.overlap_percentHR)) #Step size before downsampling + + + # plt.subplot(121) + # plt.imshow(region) + # plt.subplot(122) + # plt.imshow(mask) + # plt.show() + + if l10 and (y2-y1)>0: + l1=x2-x1 + l2=y2-y1 + xMultiplier=np.ceil((l1)/64) + yMultiplier=np.ceil((l2)/64) + pad1=int(xMultiplier*64-l1) + pad2=int(yMultiplier*64-l2) + + points[:,1] = np.int32(np.round(points[:,1] - y1 )) + points[:,0] = np.int32(np.round(points[:,0] - x1 )) + mask = 2*np.ones([y2-y1,x2-x1], dtype=np.uint8) + if int(usableRegion['pointAnnotationID'])==0: + pass + else: + cv2.fillPoly(mask, [points], int(usableRegion['pointAnnotationID'])-4) + PAS = pas_img.read_region((x1,y1), 0, (x2-x1,y2-y1)) + # print(usableRegion['pointAnnotationID']) + PAS = np.array(PAS)[:,:,0:3] + mask=np.pad( mask,((0,pad2),(0,pad1)),'constant',constant_values=(2,2) ) + PAS=np.pad( PAS,((0,pad2),(0,pad1),(0,0)),'constant',constant_values=(0,0) ) + + image_identifier=basename+'_'.join(['',str(x1),str(y1),str(l1),str(l2)]) + mask_out_name=dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/'+image_identifier+'.png' + image_out_name=mask_out_name.replace('/masks/','/regions/') + # basename + '_' + str(image_identifier) + args.imBoxExt + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + imsave(image_out_name,PAS) + imsave(mask_out_name,mask) + # exit() + # extract image region + # plt.subplot(121) + # plt.imshow(PAS) + # plt.subplot(122) + # plt.imshow(mask) + # plt.show() + # image_sizes.append([x2-x1,y2-y1]) + else: + print('Broken region') + return image_sizes +''' diff --git a/histomicstk/segmentationschool/Codes/IterativeTraining_1X_chopless_test.py b/histomicstk/segmentationschool/Codes/IterativeTraining_1X_chopless_test.py new file mode 100755 index 0000000..4ffd90a --- /dev/null +++ b/histomicstk/segmentationschool/Codes/IterativeTraining_1X_chopless_test.py @@ -0,0 +1,771 @@ +import os, sys, cv2, time, random, warnings, argparse, csv, multiprocessing,json,copy +from skimage.color import rgb2hsv,hsv2rgb,rgb2lab,lab2rgb +import detectron2_custom2.detectron2 +from utils import IdGenerator, id2rgb +import numpy as np +import matplotlib.pyplot as plt +import lxml.etree as ET +from matplotlib import path +from skimage.transform import resize +from skimage.io import imread, imsave +import glob +from getWsi import getWsi +from xml_to_mask_minmax import xml_to_mask +from joblib import Parallel, delayed +from shutil import move +# from generateTrainSet import generateDatalists +from subprocess import call +from get_choppable_regions import get_choppable_regions +from PIL import Image +import logging +from detectron2.utils.logger import setup_logger +from skimage import exposure + +setup_logger() +from detectron2 import model_zoo +from detectron2.engine import DefaultPredictor,DefaultTrainer +from detectron2.config import get_cfg +from detectron2_custom2.detectron2.utils.visualizer import Visualizer,ColorMode +# from detectron2.data import MetadataCatalog, DatasetCatalog +from detectron2.data import detection_utils as utils +import detectron2.data.transforms as T +from detectron2.data.dataset_mapper import DatasetMapper +from detectron2.data import (DatasetCatalog, + MetadataCatalog, + build_detection_test_loader, + build_detection_train_loader, +) +from detectron2.config import configurable +from typing import List, Optional, Union +import torch + +from wsi_loader_utils import * +from imgaug import augmenters as iaa + +global seq +seq = iaa.Sequential([ + iaa.Sometimes(0.5,iaa.OneOf([ + iaa.AddElementwise((-15,15),per_channel=0.5), + iaa.ImpulseNoise(0.05),iaa.CoarseDropout(0.02, size_percent=0.5)])), + iaa.Sometimes(0.5,iaa.OneOf([iaa.GaussianBlur(sigma=(0, 3.0)), + iaa.Sharpen(alpha=(0.0, 1.0), lightness=(0.75, 2.0))])) +]) + +#Record start time +totalStart=time.time() + +def IterateTraining(args): + hsv_aug_rate=args.hsv_aug_prob + ## calculate low resolution block params + downsampleLR = int(args.downsampleRateLR**.5) #down sample for each dimension + region_sizeLR = int(args.boxSizeLR*(downsampleLR)) #Region size before downsampling + stepLR = int(region_sizeLR*(1-args.overlap_percentLR)) #Step size before downsampling + ## calculate low resolution block params + downsample = int(args.downsampleRate**.5) #down sample for each dimension + region_size = int(args.boxSize*(downsample)) #Region size before downsampling + step = int(region_size*(1-args.overlap_rate)) #Step size before downsampling + + # global classNum_HR + dirs = {'imExt': '.jpeg'} + dirs['basedir'] = args.base_dir + dirs['maskExt'] = '.png' + dirs['modeldir'] = '/MODELS/' + dirs['tempdirLR'] = '/TempLR/' + dirs['tempdirHR'] = '/TempHR/' + dirs['pretraindir'] = '/Deeplab_network/' + dirs['training_data_dir'] = '/TRAINING_data/' + dirs['model_init'] = 'deeplab_resnet.ckpt' + dirs['project']= '/' + args.project + dirs['data_dir_HR'] = args.base_dir +'/' + args.project + '/Permanent/HR/' + dirs['data_dir_LR'] = args.base_dir +'/' +args.project + '/Permanent/LR/' + + currentmodels=os.listdir(dirs['basedir'] + dirs['project'] + dirs['modeldir']) + print('Handcoded iteration') + # currentAnnotationIteration=check_model_generation(dirs) + currentAnnotationIteration=0 + print('Current training session is: ' + str(currentAnnotationIteration)) + dirs['xml_dir']=dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration) + '/' + ##Create objects for storing class distributions + # annotatedXMLs=glob.glob(dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration) + '/*.xml') + + + # train_dset = WSITrainingLoader(args,dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration)) + + modeldir_HR = dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration+1) + '/HR/' + + os.environ["CUDA_VISIBLE_DEVICES"]=str(args.gpu) + + organType='kidney' + print('Organ meta being set to... '+ organType) + + if organType=='liver': + classnames=['Background','BD','A'] + isthing=[0,1,1] + xml_color = [[0,255,0], [0,255,255], [0,0,255]] + tc=['BD','AT'] + sc=['Ob','B'] + elif organType =='kidney': + classnames=['interstitium','medulla','glomerulus','sclerotic glomerulus','tubule','arterial tree'] + classes={} + isthing=[0,0,1,1,1,1] + xml_color = [[0,255,0], [0,255,255], [255,255,0],[0,0,255], [255,0,0], [0,128,255]] + tc=['G','SG','T','A'] + sc=['Ob','I','M','B'] + else: + print('Provided organType not in supported types: kidney, liver') + rand_sample=True + json_dir=dirs['basedir']+'/'+dirs['project'] + '/Permanent/HR/' + json_file=json_dir+'detectron_train' + + classNum=len(tc)+len(sc)-1 + print('Number classes: '+ str(classNum)) + classes={} + + for idx,c in enumerate(classnames): + classes[idx]={'isthing':isthing[idx],'color':xml_color[idx]} + IdGen=IdGenerator(classes) + + num_images=args.batch_size*args.train_steps + # slide_idxs=train_dset.get_random_slide_idx(num_images) + usable_slides=get_slide_data(args, wsi_directory=dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration)) + usable_idx=range(0,len(usable_slides)) + slide_idxs=random.choices(usable_idx,k=num_images) + image_coordinates=get_random_chops(slide_idxs,usable_slides,region_size) + # usable_slides=[] + # num_cores=multiprocessing.cpu_count() + # print('Generating detectron2 dictionary format...') + # data_list=Parallel(n_jobs=num_cores,backend='threading')(delayed(get_image_meta)(i=i, + # train_dset=train_dset,args=args) for i in tqdm(image_coordinates)) + DatasetCatalog.register("my_dataset", lambda:train_samples_from_WSI(args,image_coordinates)) + MetadataCatalog.get("my_dataset").set(thing_classes=tc) + MetadataCatalog.get("my_dataset").set(stuff_classes=sc) + + if args.check_training_data: + seg_metadata=MetadataCatalog.get("my_dataset") + new_list = DatasetCatalog.get("my_dataset") + total=len(new_list) + print(total) + print('Visualizing dataset... spacebar to continue, q to quit') + for d in tqdm(random.sample(new_list, total)): + + c=d['coordinates'] + h=d['height'] + w=d['width'] + slide=openslide.OpenSlide(d['slide_loc']) + print(d['slide_loc']) + x=dirs['xml_dir']+'_'.join(d['image_id'].split('_')[:-2])+'.xml' + img=np.array(slide.read_region((c[0],c[1]),0,(h,w)))[:,:,:3] + slide.close() + + if random.random()>hsv_aug_rate: + # plt.subplot(131) + # plt.imshow(img) + + hShift=np.random.normal(0,0.05) + lShift=np.random.normal(1,0.025) + # imageblock[im]=randomHSVshift(imageblock[im],hShift,lShift) + img=rgb2hsv(img) + img[:,:,0]=(img[:,:,0]+hShift) + img=hsv2rgb(img) + img=rgb2lab(img) + img[:,:,0]=exposure.adjust_gamma(img[:,:,0],lShift) + img=(lab2rgb(img)*255).astype('uint8') + # plt.subplot(132) + # plt.imshow(img) + + images_aug = seq(images=[img])[0].squeeze() + # plt.subplot(133) + # plt.imshow(images_aug) + # plt.show() + # print(images_aug.dtype) + # continue + visualizer = Visualizer(img[:, :, ::-1],metadata=seg_metadata, scale=0.5,idgen=IdGen) + out = visualizer.draw_dataset_dict(d,x) + + + + cv2.namedWindow("output", cv2.WINDOW_NORMAL) + cv2.imshow("output",out.get_image()[:, :, ::-1]) + wait_time = 1000 + while cv2.getWindowProperty('output', cv2.WND_PROP_VISIBLE) >= 1: + keyCode = cv2.waitKey(wait_time) + if (keyCode & 0xFF) == ord(" "): + cv2.destroyAllWindows() + break + if (keyCode & 0xFF) == ord("q"): + cv2.destroyAllWindows() + exit() + + exit() + + cfg = get_cfg() + cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")) + cfg.DATASETS.TRAIN = ("my_dataset") + cfg.DATASETS.TEST = () + num_cores = multiprocessing.cpu_count() + cfg.DATALOADER.NUM_WORKERS = 10 + + # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml") # Let training initialize from model zoo + + cfg.MODEL.WEIGHTS = os.path.join('/hdd/bg/Detectron2/HAIL_Detectron2/output_medulla_long1', "model_0214999.pth") + # cfg.MODEL.WEIGHTS = os.path.join('/hdd/bg/Detectron2/HAIL_Detectron2/outputAugRCC', "model_final.pth") + # if args.custom_image_means: + # x=np.array(train_dset.get_image_means()) + # total_means=[float(np.round(np.mean(x[:,0]),3)), + # float(np.round(np.mean(x[:,1]),3)), + # float(np.round(np.mean(x[:,2]),3))] + # print('Using custom pixel means: ') + # print(total_means) + # cfg.MODEL.PIXEL_MEAN=total_means + + + cfg.SOLVER.IMS_PER_BATCH = args.batch_size + + + # cfg.SOLVER.BASE_LR = 0.0025 # pick a good LR + # cfg.SOLVER.POLY = 0.9 # pick a good LR + # cfg.SOLVER.LR_SCHEDULER_NAME = "ExponentialParamScheduler" + + + cfg.SOLVER.LR_policy='steps_with_lrs' + cfg.SOLVER.MAX_ITER = args.train_steps + # cfg.SOLVER.STEPS = [int(.5*args.train_steps),int(.75*args.train_steps),int(.9*args.train_steps)] + cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR + cfg.SOLVER.LRS = [0.000025,0.0000025] + cfg.SOLVER.STEPS = [100000,150000] + # cfg.SOLVER.STEPS = [int(.3333*args.train_steps),int(.6666*args.train_steps),int(.85*args.train_steps)] + # cfg.SOLVER.LRS = [0.00025,0.000025,0.0000025] + # cfg.SOLVER.STEPS = [int(args.train_steps/2)] + # cfg.SOLVER.LRS = [0.00025] + + # cfg.MODEL.BACKBONE.FREEZE_AT = 0 + cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32],[64],[128], [256], [512], [1024]] + cfg.MODEL.RPN.IN_FEATURES = ['p2', 'p3', 'p4', 'p5','p6','p6'] + + cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[.1,.2,0.33, 0.5, 1.0, 2.0, 3.0,5,10]] + cfg.MODEL.ANCHOR_GENERATOR.ANGLES=[-90,-60,-30,0,30,60,90] + + # cfg.MODEL.RPN.POSITIVE_FRACTION = 0.5 + + cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(tc) + cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES =len(sc) + + + # cfg.INPUT.CROP.ENABLED = True + # cfg.INPUT.CROP.TYPE='absolute' + # cfg.INPUT.CROP.SIZE=[64,64] + + cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64 + + cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS=False + cfg.INPUT.MIN_SIZE_TRAIN=args.boxSize + + cfg.INPUT.MAX_SIZE_TRAIN=args.boxSize + + os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) + with open(cfg.OUTPUT_DIR+"/config_record.yaml", "w") as f: + f.write(cfg.dump()) # save config to file + trainer = Trainer(cfg) + + trainer.resume_or_load(resume=False) + + trainer.train() + + + finish_model_generation(dirs,currentAnnotationIteration) + + print('\n\n\033[92;5mPlease place new wsi file(s) in: \n\t' + dirs['basedir'] + dirs['project']+ dirs['training_data_dir'] + str(currentAnnotationIteration+1)) + print('\nthen run [--option predict]\033[0m\n') + + + + + +def check_model_generation(dirs): + modelsCurrent=os.listdir(dirs['basedir'] + dirs['project'] + dirs['modeldir']) + gens=map(int,modelsCurrent) + modelOrder=np.sort(gens)[::-1] + + for idx in modelOrder: + #modelsChkptsLR=glob.glob(dirs['basedir'] + dirs['project'] + dirs['modeldir']+str(modelsCurrent[idx]) + '/LR/*.ckpt*') + modelsChkptsHR=glob.glob(dirs['basedir'] + dirs['project'] + dirs['modeldir']+ str(idx) +'/HR/*.ckpt*') + if modelsChkptsHR == []: + continue + else: + return idx + break + +def finish_model_generation(dirs,currentAnnotationIteration): + make_folder(dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration + 1)) + +def get_pretrain(currentAnnotationIteration,res,dirs): + + if currentAnnotationIteration==0: + pretrain_file = glob.glob(dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration) + res + '*') + pretrain_file=pretrain_file[0].split('.')[0] + '.' + pretrain_file[0].split('.')[1] + + else: + pretrains=glob.glob(dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration) + res + 'model*') + maxmodel=0 + for modelfiles in pretrains: + modelID=modelfiles.split('.')[-2].split('-')[1] + if int(modelID)>maxmodel: + maxmodel=int(modelID) + pretrain_file=dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration) + res + 'model.ckpt-' + str(maxmodel) + return pretrain_file + +def restart_line(): # for printing chopped image labels in command line + sys.stdout.write('\r') + sys.stdout.flush() + +def file_len(fname): # get txt file length (number of lines) + with open(fname) as f: + for i, l in enumerate(f): + pass + return i + 1 + +def make_folder(directory): + if not os.path.exists(directory): + os.makedirs(directory) # make directory if it does not exit already # make new directory # Check if folder exists, if not make it + +def make_all_folders(dirs): + + + make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/regions') + make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/masks') + + make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/Augment' +'/regions') + make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirLR'] + '/Augment' +'/masks') + + make_folder(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/regions') + make_folder(dirs['basedir'] +dirs['project']+ dirs['tempdirHR'] + '/masks') + + make_folder(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/Augment' +'/regions') + make_folder(dirs['basedir']+dirs['project']+ dirs['tempdirHR'] + '/Augment' +'/masks') + + make_folder(dirs['basedir'] +dirs['project']+ dirs['modeldir']) + make_folder(dirs['basedir'] +dirs['project']+ dirs['training_data_dir']) + + + make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/LR/'+ 'regions/') + make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/LR/'+ 'masks/') + make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/HR/'+ 'regions/') + make_folder(dirs['basedir'] +dirs['project']+ '/Permanent' +'/HR/'+ 'masks/') + + make_folder(dirs['basedir'] +dirs['project']+ dirs['training_data_dir']) + + make_folder(dirs['basedir'] + '/Codes/Deeplab_network/datasetLR') + make_folder(dirs['basedir'] + '/Codes/Deeplab_network/datasetHR') + +def return_region(args, wsi_mask, wsiID, fileID, yStart, xStart, idxy, idxx, downsampleRate, outdirT, region_size, dirs, chop_regions,classNum_HR): # perform cutting in parallel + sys.stdout.write(' <'+str(xStart)+'/'+ str(yStart)+'/'+str(chop_regions[idxy,idxx] != 0)+ '> ') + sys.stdout.flush() + restart_line() + + if chop_regions[idxy,idxx] != 0: + + uniqID=fileID + str(yStart) + str(xStart) + if wsiID.split('.')[-1] != 'tif': + slide=getWsi(wsiID) + Im=np.array(slide.read_region((xStart,yStart),0,(region_size,region_size))) + Im=Im[:,:,:3] + else: + yEnd = yStart + region_size + xEnd = xStart + region_size + Im = np.zeros([region_size,region_size,3], dtype=np.uint8) + Im_ = imread(wsiID)[yStart:yEnd, xStart:xEnd, :3] + Im[0:Im_.shape[0], 0:Im_.shape[1], :] = Im_ + + mask_annotation=wsi_mask[yStart:yStart+region_size,xStart:xStart+region_size] + + o1,o2=mask_annotation.shape + if o1 !=region_size: + mask_annotation=np.pad(mask_annotation,((0,region_size-o1),(0,0)),mode='constant') + if o2 !=region_size: + mask_annotation=np.pad(mask_annotation,((0,0),(0,region_size-o2)),mode='constant') + + ''' + if 4 in np.unique(mask_annotation): + plt.subplot(121) + plt.imshow(mask_annotation*20) + plt.subplot(122) + plt.imshow(Im) + pt=[xStart,yStart] + plt.title(pt) + plt.show() + ''' + if downsampleRate !=1: + c=(Im.shape) + s1=int(c[0]/(downsampleRate**.5)) + s2=int(c[1]/(downsampleRate**.5)) + Im=resize(Im,(s1,s2),mode='reflect') + mask_out_name=dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/'+uniqID+dirs['maskExt'] + image_out_name=mask_out_name.replace('/masks/','/regions/').replace(dirs['maskExt'],dirs['imExt']) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + imsave(image_out_name,Im) + imsave(mask_out_name,mask_annotation) + + +def regions_in_mask(root, bounds, verbose=1): + # find regions to save + IDs_reg = [] + IDs_points = [] + + for Annotation in root.findall("./Annotation"): # for all annotations + annotationID = Annotation.attrib['Id'] + annotationType = Annotation.attrib['Type'] + + # print(Annotation.findall(./)) + if annotationType =='9': + for element in Annotation.iter('InputAnnotationId'): + pointAnnotationID=element.text + + for Region in Annotation.findall("./*/Region"): # iterate on all region + + for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region + # get points + x_point = np.int32(np.float64(Vertex.attrib['X'])) + y_point = np.int32(np.float64(Vertex.attrib['Y'])) + # test if points are in bounds + if bounds['x_min'] <= x_point <= bounds['x_max'] and bounds['y_min'] <= y_point <= bounds['y_max']: # test points in region bounds + # save region Id + IDs_points.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID,'pointAnnotationID':pointAnnotationID}) + break + elif annotationType=='4': + + for Region in Annotation.findall("./*/Region"): # iterate on all region + + for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region + # get points + x_point = np.int32(np.float64(Vertex.attrib['X'])) + y_point = np.int32(np.float64(Vertex.attrib['Y'])) + # test if points are in bounds + if bounds['x_min'] <= x_point <= bounds['x_max'] and bounds['y_min'] <= y_point <= bounds['y_max']: # test points in region bounds + # save region Id + IDs_reg.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID}) + break + return IDs_reg,IDs_points + + +def get_vertex_points(root, IDs_reg,IDs_points, maskModes,excludedIDs,negativeIDs=None): + Regions = [] + Points = [] + + for ID in IDs_reg: + Vertices = [] + if ID['annotationID'] not in excludedIDs: + for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): + Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) + Regions.append({'Vertices':np.array(Vertices),'annotationID':ID['annotationID']}) + + for ID in IDs_points: + Vertices = [] + for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): + Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) + Points.append({'Vertices':np.array(Vertices),'pointAnnotationID':ID['pointAnnotationID']}) + if 'falsepositive' or 'negative' in maskModes: + assert negativeIDs is not None,'Negatively annotated classes must be provided for negative/falsepositive mask mode' + assert 'falsepositive' and 'negative' not in maskModes, 'Negative and false positive mask modes cannot both be true' + + useableRegions=[] + if 'positive' in maskModes: + for Region in Regions: + regionPath=path.Path(Region['Vertices']) + for Point in Points: + if Region['annotationID'] not in negativeIDs: + if regionPath.contains_point(Point['Vertices'][0]): + Region['pointAnnotationID']=Point['pointAnnotationID'] + useableRegions.append(Region) + + if 'negative' in maskModes: + + for Region in Regions: + regionPath=path.Path(Region['Vertices']) + if Region['annotationID'] in negativeIDs: + if not any([regionPath.contains_point(Point['Vertices'][0]) for Point in Points]): + Region['pointAnnotationID']=Region['annotationID'] + useableRegions.append(Region) + if 'falsepositive' in maskModes: + + for Region in Regions: + regionPath=path.Path(Region['Vertices']) + if Region['annotationID'] in negativeIDs: + if not any([regionPath.contains_point(Point['Vertices'][0]) for Point in Points]): + Region['pointAnnotationID']=0 + useableRegions.append(Region) + + return useableRegions +def chop_suey_bounds(lb,xmlID,box_supervision_layers,wsiID,dirs,args): + + lbVerts=np.array(lb['BoxVerts']) + xMin=min(lbVerts[:,0]) + xMax=max(lbVerts[:,0]) + yMin=min(lbVerts[:,1]) + yMax=max(lbVerts[:,1]) + + # test=np.array(slide.read_region((xMin,yMin),0,(xMax-xMin,yMax-yMin)))[:,:,:3] + + local_bound = {'x_min' : xMin, 'y_min' : yMin, 'x_max' : xMax, 'y_max' : yMax} + if args.chop_with_replacement: + tree = ET.parse(xmlID) + root = tree.getroot() + IDs_reg,IDs_points = regions_in_mask_dots(root=root, bounds=local_bound,box_layers=box_supervision_layers) + + # find regions in bounds + negativeIDs=['4'] + excludedIDs=['1'] + falsepositiveIDs=['4'] + usableRegions= get_vertex_points_dots(root=root, IDs_reg=IDs_reg,IDs_points=IDs_points,excludedIDs=excludedIDs,maskModes=['falsepositive','positive'],negativeIDs=negativeIDs, + falsepositiveIDs=falsepositiveIDs) + + # image_sizes= + masks_from_points(usableRegions,wsiID,dirs,50,args,[xMin,xMax,yMin,yMax]) + if args.standard_chop: + l2=yMax-yMin #y + l1=xMax-xMin #x + pas_img = getWsi(wsiID) + dim_x,dim_y=pas_img.dimensions + mask=xml_to_mask(xmlID, [0,0], [dim_x,dim_y],ignore_id=box_supervision_layers, downsample_factor=1, verbose=0) + mask=mask[yMin:yMax,xMin:xMax] + + # print(xMin,yMin,l1,l2) + region=np.array(pas_img.read_region((xMin,yMin),0,(l1,l2)))[:,:,:3] + + basename=wsiID.split('/')[-1].split('.svs')[0] + max_mask_size=args.training_max_size + substepHR = int(max_mask_size*(1-args.overlap_rate)) #Step size before downsampling + + + # plt.subplot(121) + # plt.imshow(region) + # plt.subplot(122) + # plt.imshow(mask) + # plt.show() + + if l12] + presentclasses=list(presentclasses[presentclasses<7]) + for p in presentclasses: + contours, hierarchy = cv2.findContours(np.array(mask==p).astype('uint8'), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + for contour in contours: + if contour.size>=6: + instance_dict={} + contour_flat=contour.flatten().astype('float').tolist() + xMin=min(contour_flat[::2]) + yMin=min(contour_flat[1::2]) + xMax=max(contour_flat[::2]) + yMax=max(contour_flat[1::2]) + instance_dict['bbox']=[xMin,yMin,xMax,yMax] + instance_dict['bbox_mode']=BoxMode.XYXY_ABS + instance_dict['category_id']=p+offset + instance_dict['segmentation']=[contour_flat] + annotation.append(instance_dict) + return annotation + + + + +class Trainer(DefaultTrainer): + + @classmethod + def build_test_loader(cls, cfg, dataset_name): + return build_detection_test_loader(cfg, dataset_name, mapper=CustomDatasetMapper(cfg, True)) + + @classmethod + def build_train_loader(cls, cfg): + return build_detection_train_loader(cfg, mapper=CustomDatasetMapper(cfg, True)) + +class CustomDatasetMapper: + + @configurable + def __init__( + self, + is_train: bool, + *, + augmentations: List[Union[T.Augmentation, T.Transform]], + image_format: str, + use_instance_mask: bool = False, + use_keypoint: bool = False, + instance_mask_format: str = "polygon", + keypoint_hflip_indices: Optional[np.ndarray] = None, + precomputed_proposal_topk: Optional[int] = None, + recompute_boxes: bool = False, + ): + """ + NOTE: this interface is experimental. + + Args: + is_train: whether it's used in training or inference + augmentations: a list of augmentations or deterministic transforms to apply + image_format: an image format supported by :func:`detection_utils.read_image`. + use_instance_mask: whether to process instance segmentation annotations, if available + use_keypoint: whether to process keypoint annotations if available + instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation + masks into this format. + keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices` + precomputed_proposal_topk: if given, will load pre-computed + proposals from dataset_dict and keep the top k proposals for each image. + recompute_boxes: whether to overwrite bounding box annotations + by computing tight bounding boxes from instance mask annotations. + """ + if recompute_boxes: + assert use_instance_mask, "recompute_boxes requires instance masks" + # fmt: off + self.is_train = is_train + self.augmentations = T.AugmentationList(augmentations) + self.image_format = image_format + self.use_instance_mask = use_instance_mask + self.instance_mask_format = instance_mask_format + self.use_keypoint = use_keypoint + self.keypoint_hflip_indices = keypoint_hflip_indices + self.proposal_topk = precomputed_proposal_topk + self.recompute_boxes = recompute_boxes + # fmt: on + logger = logging.getLogger(__name__) + mode = "training" if is_train else "inference" + logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}") + + @classmethod + def from_config(cls, cfg, is_train: bool = True): + augs = utils.build_augmentation(cfg, is_train) + if cfg.INPUT.CROP.ENABLED and is_train: + augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) + recompute_boxes = cfg.MODEL.MASK_ON + else: + recompute_boxes = False + + ret = { + "is_train": is_train, + "augmentations": augs, + "image_format": cfg.INPUT.FORMAT, + "use_instance_mask": cfg.MODEL.MASK_ON, + "instance_mask_format": cfg.INPUT.MASK_FORMAT, + "use_keypoint": cfg.MODEL.KEYPOINT_ON, + "recompute_boxes": recompute_boxes, + } + + if cfg.MODEL.KEYPOINT_ON: + ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) + + if cfg.MODEL.LOAD_PROPOSALS: + ret["precomputed_proposal_topk"] = ( + cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN + if is_train + else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST + ) + return ret + + def _transform_annotations(self, dataset_dict, transforms, image_shape): + # USER: Modify this if you want to keep them for some reason. + for anno in dataset_dict["annotations"]: + if not self.use_instance_mask: + anno.pop("segmentation", None) + if not self.use_keypoint: + anno.pop("keypoints", None) + + annos = [ + utils.transform_instance_annotations( + obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices + ) + for obj in dataset_dict.pop("annotations") + if obj.get("iscrowd", 0) == 0 + ] + instances = utils.annotations_to_instances( + annos, image_shape, mask_format=self.instance_mask_format + ) + + if self.recompute_boxes: + instances.gt_boxes = instances.gt_masks.get_bounding_boxes() + dataset_dict["instances"] = utils.filter_empty_instances(instances) + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + c=dataset_dict['coordinates'] + h=dataset_dict['height'] + w=dataset_dict['width'] + + slide=openslide.OpenSlide(dataset_dict['slide_loc']) + image=np.array(slide.read_region((c[0],c[1]),0,(h,w)))[:,:,:3] + slide.close() + maskData=xml_to_mask(dataset_dict['xml_loc'], c, [h,w]) + + if random.random()>0.5: + hShift=np.random.normal(0,0.05) + lShift=np.random.normal(1,0.025) + # imageblock[im]=randomHSVshift(imageblock[im],hShift,lShift) + image=rgb2hsv(image) + image[:,:,0]=(image[:,:,0]+hShift) + image=hsv2rgb(image) + image=rgb2lab(image) + image[:,:,0]=exposure.adjust_gamma(image[:,:,0],lShift) + image=(lab2rgb(image)*255).astype('uint8') + image = seq(images=[image])[0].squeeze() + + dataset_dict['annotations']=mask2polygons(maskData) + utils.check_image_size(dataset_dict, image) + + sem_seg_gt = maskData + sem_seg_gt[sem_seg_gt>2]=0 + sem_seg_gt[maskData==0] = 3 + sem_seg_gt=np.array(sem_seg_gt).astype('uint8') + aug_input = T.AugInput(image, sem_seg=sem_seg_gt) + transforms = self.augmentations(aug_input) + image, sem_seg_gt = aug_input.image, aug_input.sem_seg + + image_shape = image.shape[:2] # h, w + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + if sem_seg_gt is not None: + dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) + + if "annotations" in dataset_dict: + self._transform_annotations(dataset_dict, transforms, image_shape) + + return dataset_dict diff --git a/histomicstk/segmentationschool/Codes/TransformXMLs.py b/histomicstk/segmentationschool/Codes/TransformXMLs.py old mode 100644 new mode 100755 index 6778434..dcd12ca --- a/histomicstk/segmentationschool/Codes/TransformXMLs.py +++ b/histomicstk/segmentationschool/Codes/TransformXMLs.py @@ -1,15 +1,11 @@ -import os,cv2 # sys, time +import os, sys, cv2, time import numpy as np -# import matplotlib.pyplot as plt +import matplotlib.pyplot as plt import lxml.etree as ET -# from matplotlib import path +from matplotlib import path import glob -from .xml_to_mask_minmax import get_annotated_ROIs,xml_to_mask,write_minmax_to_xml -import openslide -from .XML_to_Json_cortex import convert_xml_json -import json -# import copy -# from tqdm import tqdm +from xml_to_mask_minmax import get_annotated_ROIs,xml_to_mask,write_minmax_to_xml + def transform_XMLs(args): xml_color = [65280, 65535, 255, 16711680, 33023] @@ -37,165 +33,9 @@ def transform_XMLs(args): # plt.show() xml_suey(xmlpart,xmloutname,args.classNum,downsample=args.downsampleRate,glob_offset=[0,0],xml_color=xml_color) -def splice_cortex_XMLs(args): - xml_color = [65280,65535,16776960,255, 16711680, 33023] - assert args.target is not None, 'You must provide the directory of XMLs to splice cortex into with --target /path/to/xmls' - assert args.cortextarget is not None, 'You must provide the directory with cortex XMLs with --cortextarget /path/to/xmls' - assert args.output is not None, 'You must provide the directory for output XMLS with --output /path/to/save/location' - baseXMLs=glob.glob(os.path.join(args.target, "*.xml")) - corteXMLs=glob.glob(os.path.join(args.cortextarget, "*.xml")) - output_path=args.output - if args.groupBy is None: - groupAnnotationsBy='Annotations' - else: - groupAnnotationsBy=args.groupBy - if not os.path.isdir(args.output): - print('Creating output folder: ' + args.output) - os.makedirs(args.output) - - for xml in baseXMLs: - print(xml,end='\n',flush=True) - cortexxml=os.path.join(args.cortextarget,xml.split('/')[-1]) - newxmlpath=os.path.join(args.output,xml.split('/')[-1]) - try: - write_minmax_to_xml(cortexxml) - except Exception as e: - print(e) - exit() - write_minmax_to_xml(xml) - basetree = ET.parse(xml) - baseroot = basetree.getroot() - cortextree = ET.parse(cortexxml) - cortexroot = cortextree.getroot() - # print('\n') - # print(len(cortexroot)) - # print(cortexroot[0].tag) - # exit() - Annotations_new=xml_create() - for i in range(1,7): - Annotations_new = xml_add_annotation(Annotations=Annotations_new,xml_color=xml_color,annotationID=i) - # - # if i in [1,2]: - # Annotation = cortexroot.find("Annotation[@Id='" + str(i) + "']") - # # Annotations_new=ET.SubElement(Annotations_new,ET.tostring(Annotation, pretty_print=True)) - # Annotations_new.append(Annotation) - # - # else: - # Annotation = baseroot.find("Annotation[@Id='" + str(i-1) + "']") - # - # Annotation.attrib['Id']=str(i) - # # Annotation.attrib['Id'] - # # Annotations_new.append(Annotation) - # Annotations_new.append(Annotation) - # # Annotations_new.append(ET.fromstring(ET.tostring(Annotation))) - for Annotation in cortexroot.findall("./Annotation"): - annotationID = Annotation.attrib['Id'] - # print(annotationID) - # Annotations_new.append(Annotation) - for Region in Annotation.findall("./*/Region"): # iterate on all region - # Annotation = Annotations.find("Annotation[@Id='" + str(annotationID) + "']") - # Regions = Annotation.find('Regions') - # Annotations_new = ET.SubElement(Annotations=Annotations_new, pointList=np.array(verts), annotationID=int(annotationID)) - verts=[] - for Vert in Region.findall("./Vertices/Vertex"): # iterate on all vertex in region - verts.append({'X':int(float(Vert.attrib['X'])),'Y':int(float(Vert.attrib['Y']))}) - Annotations_new = xml_add_region(Annotations=Annotations_new, pointList=np.array(verts), annotationID=int(annotationID)) - - - for Annotation in baseroot.findall("./Annotation"): - annotationID = Annotation.attrib['Id'] - if annotationID in ['1']: - continue - - Annotation.attrib['Id']=str(int(Annotation.attrib['Id'])+1) - print(annotationID) - # Annotations_new.append(Annotation) - for regionidx,Region in enumerate(Annotation.findall("./*/Region")): # iterate on all region - verts=[] - for Vert in Region.findall("./Vertices/Vertex"): # iterate on all vertex in region - verts.append({'X':int(float(Vert.attrib['X'])),'Y':int(float(Vert.attrib['Y']))}) - Annotations_new = xml_add_region(Annotations=Annotations_new, pointList=np.array(verts), annotationID=int(annotationID)+1,regionID=regionidx+1) - xml_data = ET.tostring(Annotations_new, pretty_print=True) - #xml_data = Annotations.toprettyxml() - f = open(newxmlpath, 'wb') - f.write(xml_data) - f.close() - - # xml_save(Annotations=Annotations_new, filename=newxmlpath) - - # Convert XML to histomicsUI json - tree = ET.parse(newxmlpath) - root = tree.getroot() - - # names=['Interstitium','glomerulus','sclerotic glomerulus','tubules','artery/arteriole'] - names=['Cortex','Medulla','glomerulus','sclerotic glomerulus','tubules','artery/arteriole'] - annotation = convert_xml_json(root, groupAnnotationsBy,names) - - print('Convert to HistomicsUI json...',end='\r',flush=True) - with open(newxmlpath.replace('.xml','.json'), 'w') as annotation_file: - json.dump(annotation, annotation_file, indent=2, sort_keys=False) - print('\nDone.') - - -def register_aperio_scn_xmls(args): - xml_color = [0,65280, 65535, 255, 16711680, 33023] - assert args.target is not None, 'You must provide the directory of XMLs and WSIs to register with --target /path/to/xmls' - assert args.output is not None, 'You must provide the directory for output XMLS with --output /path/to/save/location' - - if args.groupBy is None: - groupAnnotationsBy='Annotations' - else: - groupAnnotationsBy=args.groupBy - if not os.path.isdir(args.output): - print('Creating output folder: ' + args.output) - os.makedirs(args.output) - annotatedXMLs=glob.glob(os.path.join(args.target, "*.xml")) - for xml in annotatedXMLs: - print(xml,end='\r',flush=True) - newxmlpath=os.path.join(args.output,xml.split('/')[-1]) - try: - slide=openslide.OpenSlide(xml.replace('.xml','.scn')) - except Exception as e: - print(e) - exit() - - dim_x=int(slide.properties['openslide.bounds-width'])## add to columns - dim_y=int(slide.properties['openslide.bounds-height'])## add to rows - offsetx=int(slide.properties['openslide.bounds-x'])##start column - offsety=int(slide.properties['openslide.bounds-y'])##start row - - rotationoffset=dim_y-dim_x - - write_minmax_to_xml(xml) - tree = ET.parse(xml) - root = tree.getroot() - Annotations_new = xml_create() - for i in range(1,3): - Annotations_new = xml_add_annotation(Annotations=Annotations_new,xml_color=xml_color,annotationID=i) - for Annotation in root.findall("./Annotation"): # for all Annotations_new - annotationID = Annotation.attrib['Id'] - - IDs=[] - for Region in Annotation.findall("./*/Region"): # iterate on all region - verts=[] - # cnt=[] - for Vert in Region.findall("./Vertices/Vertex"): # iterate on all vertex in region - verts.append({'X':int(float(Vert.attrib['Y'])+offsetx),'Y':dim_x-int(float(Vert.attrib['X']))+offsety+rotationoffset}) - - Annotations_new = xml_add_region(Annotations=Annotations_new, pointList=np.array(verts), annotationID=int(annotationID)) - xml_save(Annotations=Annotations_new, filename=newxmlpath) - # Convert XML to histomicsUI json - tree = ET.parse(newxmlpath) - root = tree.getroot() - names=['Cortex','Medulla','other'] - annotation = convert_xml_json(root, groupAnnotationsBy,names) - print('Convert to HistomicsUI json...',end='\r',flush=True) - with open(newxmlpath.replace('.xml','.json'), 'w') as annotation_file: - json.dump(annotation, annotation_file, indent=2, sort_keys=False) - print('\nDone.') def xml_suey(wsiMask,xmloutname, classNum, downsample,glob_offset,xml_color): # make xml Annotations = xml_create() diff --git a/histomicstk/segmentationschool/Codes/dataset_mapper_custom.py b/histomicstk/segmentationschool/Codes/dataset_mapper_custom.py new file mode 100755 index 0000000..cf0631f --- /dev/null +++ b/histomicstk/segmentationschool/Codes/dataset_mapper_custom.py @@ -0,0 +1,251 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import copy +import logging +import numpy as np +from typing import List, Optional, Union +import torch + +from detectron2.config import configurable + +from . import detection_utils as utils +from . import transforms as T +from xml_to_mask_minmax import xml_to_mask +from skimage.measure import label +def mask2polygons(mask): + annotation=[] + presentclasses=np.unique(mask) + + offset=-2 + presentclasses=presentclasses[presentclasses>1] + presentclasses=list(presentclasses[presentclasses<6]) + if 6 in presentclasses: + print('Beware, this mask directory has level 6 (cortical) annotations in it!') + print('Triggered by '+ imID) + exit() + presentclasses=presentclasses[:-1] + + for p in presentclasses: + contours, hierarchy = cv2.findContours(np.array(mask==p).astype('uint8'), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + for contour in contours: + if contour.size>=6: + instance_dict={} + contour_flat=contour.flatten().astype('float').tolist() + xMin=min(contour_flat[::2]) + yMin=min(contour_flat[1::2]) + xMax=max(contour_flat[::2]) + yMax=max(contour_flat[1::2]) + instance_dict['bbox']=[xMin,yMin,xMax,yMax] + instance_dict['bbox_mode']=BoxMode.XYXY_ABS + instance_dict['category_id']=p+offset + instance_dict['segmentation']=[contour_flat] + annotation.append(instance_dict) + return annotation + +def get_seg_info(mask,IdGen): + seg_info=[] + mask_encoded=np.zeros(np.shape(mask)) + presentclasses=np.unique(mask) + + offset=-2 + presentclasses=presentclasses[presentclasses>1] + presentclasses=list(presentclasses[presentclasses<6]) + + for p in presentclasses: + masklabel=label(mask==p) + for j in range(1,np.max(masklabel)+1): + segment_id=IdGen.get_id(p+offset) + mask_encoded[masklabel==j]=segment_id + seg_info.append({'id':segment_id,'category_id':p+offset,'iscrowd':0,'area':np.sum(masklabel==j),'isthing':1}) + + return seg_info,mask_encoded + +""" +This file contains the default mapping that's applied to "dataset dicts". +""" + +__all__ = ["DatasetMapper"] + + +class DatasetMapper: + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by the model. + + This is the default callable to be used to map your dataset dict into training data. + You may need to follow it to implement your own one for customized logic, + such as a different way to read or transform images. + See :doc:`/tutorials/data_loading` for details. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies cropping/geometric transforms to the image and annotations + 3. Prepare data and annotations to Tensor and :class:`Instances` + """ + + @configurable + def __init__( + self, + WSI_data, + is_train: bool, + *, + augmentations: List[Union[T.Augmentation, T.Transform]], + image_format: str, + use_instance_mask: bool = False, + use_keypoint: bool = False, + instance_mask_format: str = "polygon", + keypoint_hflip_indices: Optional[np.ndarray] = None, + precomputed_proposal_topk: Optional[int] = None, + recompute_boxes: bool = False, + ): + """ + NOTE: this interface is experimental. + + Args: + is_train: whether it's used in training or inference + augmentations: a list of augmentations or deterministic transforms to apply + image_format: an image format supported by :func:`detection_utils.read_image`. + use_instance_mask: whether to process instance segmentation annotations, if available + use_keypoint: whether to process keypoint annotations if available + instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation + masks into this format. + keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices` + precomputed_proposal_topk: if given, will load pre-computed + proposals from dataset_dict and keep the top k proposals for each image. + recompute_boxes: whether to overwrite bounding box annotations + by computing tight bounding boxes from instance mask annotations. + """ + if recompute_boxes: + assert use_instance_mask, "recompute_boxes requires instance masks" + # fmt: off + self.is_train = is_train + self.augmentations = T.AugmentationList(augmentations) + self.image_format = image_format + self.use_instance_mask = use_instance_mask + self.instance_mask_format = instance_mask_format + self.use_keypoint = use_keypoint + self.keypoint_hflip_indices = keypoint_hflip_indices + self.proposal_topk = precomputed_proposal_topk + self.recompute_boxes = recompute_boxes + # fmt: on + logger = logging.getLogger(__name__) + mode = "training" if is_train else "inference" + logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}") + + @classmethod + def from_config(cls, cfg, is_train: bool = True): + augs = utils.build_augmentation(cfg, is_train) + if cfg.INPUT.CROP.ENABLED and is_train: + augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)) + recompute_boxes = cfg.MODEL.MASK_ON + else: + recompute_boxes = False + + ret = { + "is_train": is_train, + "augmentations": augs, + "image_format": cfg.INPUT.FORMAT, + "use_instance_mask": cfg.MODEL.MASK_ON, + "instance_mask_format": cfg.INPUT.MASK_FORMAT, + "use_keypoint": cfg.MODEL.KEYPOINT_ON, + "recompute_boxes": recompute_boxes, + } + + if cfg.MODEL.KEYPOINT_ON: + ret["keypoint_hflip_indices"] = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) + + if cfg.MODEL.LOAD_PROPOSALS: + ret["precomputed_proposal_topk"] = ( + cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN + if is_train + else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST + ) + return ret + + def _transform_annotations(self, dataset_dict, transforms, image_shape): + # USER: Modify this if you want to keep them for some reason. + for anno in dataset_dict["annotations"]: + if not self.use_instance_mask: + anno.pop("segmentation", None) + if not self.use_keypoint: + anno.pop("keypoints", None) + + # USER: Implement additional transformations if you have other types of data + annos = [ + utils.transform_instance_annotations( + obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices + ) + for obj in dataset_dict.pop("annotations") + if obj.get("iscrowd", 0) == 0 + ] + instances = utils.annotations_to_instances( + annos, image_shape, mask_format=self.instance_mask_format + ) + + # After transforms such as cropping are applied, the bounding box may no longer + # tightly bound the object. As an example, imagine a triangle object + # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight + # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to + # the intersection of original bounding box and the cropping box. + if self.recompute_boxes: + instances.gt_boxes = instances.gt_masks.get_bounding_boxes() + dataset_dict["instances"] = utils.filter_empty_instances(instances) + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + dataset_dict = copy.deepcopy(dataset_dict) # this needs to be modified to be WSI info + c=dataset_dict['coordinates'] + h=dataset_dict['height'] + w=dataset_dict['width'] + + x=train_dset.get_xml_folder()+'_'.join(dataset_dict['image_id'].split('_')[:-2])+'.xml' + + image=np.array(train_dset.get_single_openslide(dataset_dict['slide_index']).read_region((c[0],c[1]),0,(h,w))) + maskData=xml_to_mask(x, c, [h,w]) + dataset_dict['annotations']=mask2polygons(maskData) + + utils.check_image_size(dataset_dict, image) + + # USER: Remove if you don't do semantic/panoptic segmentation. + #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + sem_seg_gt = np.array(maskData==1).astype('uint8') + # if "sem_seg_file_name" in dataset_dict: + # sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) + # else: + # sem_seg_gt = None + + aug_input = T.AugInput(image, sem_seg=sem_seg_gt) + transforms = self.augmentations(aug_input) + image, sem_seg_gt = aug_input.image, aug_input.sem_seg + + image_shape = image.shape[:2] # h, w + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + if sem_seg_gt is not None: + dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) + + # USER: Remove if you don't use pre-computed proposals. + # Most users would not need this feature. + if self.proposal_topk is not None: + utils.transform_proposals( + dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk + ) + + if not self.is_train: + # USER: Modify this if you want to keep them for some reason. + dataset_dict.pop("annotations", None) + dataset_dict.pop("sem_seg_file_name", None) + return dataset_dict + + if "annotations" in dataset_dict: + self._transform_annotations(dataset_dict, transforms, image_shape) + + return dataset_dict diff --git a/histomicstk/segmentationschool/Codes/evolve_predictions.py b/histomicstk/segmentationschool/Codes/evolve_predictions.py old mode 100644 new mode 100755 index 400950b..bbc3bb6 --- a/histomicstk/segmentationschool/Codes/evolve_predictions.py +++ b/histomicstk/segmentationschool/Codes/evolve_predictions.py @@ -2,13 +2,13 @@ import numpy as np import os import sys -# import argparse +import argparse import multiprocessing import lxml.etree as ET import warnings -# import time +import time -sys.path.append("..") +sys.path.append(os.getcwd()+'/Codes') from glob import glob from subprocess import call @@ -16,13 +16,13 @@ from skimage.io import imread, imsave from skimage.transform import resize from scipy.ndimage.measurements import label -# from skimage.segmentation import clear_border +from skimage.segmentation import clear_border from skimage.morphology import remove_small_objects -# from skimage import color +from skimage import color from shutil import rmtree -from .IterativeTraining import get_num_classes -from .get_choppable_regions import get_choppable_regions -#from get_network_performance import get_perf +from IterativeTraining import get_num_classes +from get_choppable_regions import get_choppable_regions +from get_network_performance import get_perf """ Code to test a WSI using all saved models in project diff --git a/histomicstk/segmentationschool/Codes/generateTrainSet.py b/histomicstk/segmentationschool/Codes/generateTrainSet.py old mode 100644 new mode 100755 index 7bbf476..7b95f02 --- a/histomicstk/segmentationschool/Codes/generateTrainSet.py +++ b/histomicstk/segmentationschool/Codes/generateTrainSet.py @@ -1,7 +1,7 @@ import glob -# import numpy as np +import numpy as np import os -# from cv2 import imread,imwrite +from cv2 import imread,imwrite def generateDatalists(images,masks,imfolder,maskfolder,imExt,maskExt,f_name1): diff --git a/histomicstk/segmentationschool/Codes/getWsi.py b/histomicstk/segmentationschool/Codes/getWsi.py old mode 100644 new mode 100755 diff --git a/histomicstk/segmentationschool/Codes/get_choppable_regions.py b/histomicstk/segmentationschool/Codes/get_choppable_regions.py old mode 100644 new mode 100755 index 749fa9a..3cbd584 --- a/histomicstk/segmentationschool/Codes/get_choppable_regions.py +++ b/histomicstk/segmentationschool/Codes/get_choppable_regions.py @@ -1,39 +1,23 @@ import numpy as np -from .getWsi import getWsi -# from skimage.filters import threshold_otsu -# from skimage.morphology import binary_closing, disk, remove_small_objects,label +from getWsi import getWsi +from skimage.filters import threshold_otsu +from skimage.morphology import binary_closing, disk, remove_small_objects,label from scipy.ndimage.morphology import binary_fill_holes -# import matplotlib.pyplot as plt +import matplotlib.pyplot as plt from skimage.color import rgb2hsv from skimage.filters import gaussian from skimage.morphology import binary_dilation, diamond +def get_choppable_regions(wsi,index_x, index_y, boxSize,white_percent): + if wsi.split('.')[-1] != 'tif': + slide=getWsi(wsi) + slide_level = slide.level_count-1 -def get_choppable_regions(wsi,index_x, index_y, boxSize,white_percent,dims,glob_offset,extname): - if extname=='.scn': - choppable_regions=np.ones((len(index_y),len(index_x))) - return choppable_regions - else: - - if extname=='.scn': - slide=getWsi(wsi) - - resRatio= 16 - ds_1=int((dims[0]+glob_offset[0])/16) - ds_2=int((dims[1]+glob_offset[1])/16) - Im=np.array(slide.read_region((int(glob_offset[0]/16),int(glob_offset[1]/16)),2,(ds_1,ds_2)))[:,:,:3] - # plt.imshow(Im) - # plt.show() - - else: - slide=getWsi(wsi) - slide_level = slide.level_count-1 - - fullSize=slide.level_dimensions[0] - resRatio= 16 - ds_1=fullSize[0]/16 - ds_2=fullSize[1]/16 - Im=np.array(slide.get_thumbnail((ds_1,ds_2))) + fullSize=slide.level_dimensions[0] + resRatio= 16 + ds_1=fullSize[0]/16 + ds_2=fullSize[1]/16 + Im=np.array(slide.get_thumbnail((ds_1,ds_2))) ID=wsi.split('.svs')[0] @@ -68,5 +52,7 @@ def get_choppable_regions(wsi,index_x, index_y, boxSize,white_percent,dims,glob_ if np.sum(binary2[yStart:yStop,xStart:xStop])>(white_percent*box_total): choppable_regions[idxy,idxx]=1 + else: + choppable_regions=np.ones((len(index_y),len(index_x))) return choppable_regions diff --git a/histomicstk/segmentationschool/Codes/get_dataset_list.py b/histomicstk/segmentationschool/Codes/get_dataset_list.py old mode 100644 new mode 100755 index e499607..4735c5f --- a/histomicstk/segmentationschool/Codes/get_dataset_list.py +++ b/histomicstk/segmentationschool/Codes/get_dataset_list.py @@ -1,17 +1,17 @@ -from .utils import IdGenerator, id2rgb +from utils import IdGenerator, id2rgb from skimage.measure import label -from skimage.io import imsave#,imread +from skimage.io import imread,imsave import random import numpy as np import glob import warnings import matplotlib.pyplot as plt -#from matplotlib import patches +from matplotlib import patches import cv2 from detectron2.structures import BoxMode from joblib import Parallel, delayed import multiprocessing -import os, json +import os,json from tqdm import tqdm class NpEncoder(json.JSONEncoder): def default(self, obj): @@ -23,10 +23,23 @@ def default(self, obj): return obj.tolist() return super(NpEncoder, self).default(obj) -def mask2polygons(mask): +def mask2polygons(mask,organType): annotation=[] presentclasses=np.unique(mask) - presentclasses=presentclasses[presentclasses<2] + if organType=='kidney': + offset=-2 + presentclasses=presentclasses[presentclasses>1] + presentclasses=list(presentclasses[presentclasses<6]) + if 6 in presentclasses: + print('Beware, this mask directory has level 6 (cortical) annotations in it!') + print('Triggered by '+ imID) + exit() + presentclasses=presentclasses[:-1] + + elif organType=='liver': + offset=0 + presentclasses=presentclasses[presentclasses<2] + for p in presentclasses: @@ -41,35 +54,52 @@ def mask2polygons(mask): yMax=max(contour_flat[1::2]) instance_dict['bbox']=[xMin,yMin,xMax,yMax] instance_dict['bbox_mode']=BoxMode.XYXY_ABS - instance_dict['category_id']=p + instance_dict['category_id']=p+offset instance_dict['segmentation']=[contour_flat] annotation.append(instance_dict) return annotation -def get_seg_info(mask,IdGen,imID): +def get_seg_info(mask,IdGen,imID,organType): seg_info=[] mask_encoded=np.zeros(np.shape(mask)) - presentclasses=np.unique(mask) + if organType=='kidney': + offset=-2 + presentclasses=presentclasses[presentclasses>1] + presentclasses=list(presentclasses[presentclasses<6]) + if 6 in presentclasses: + print('Beware, this mask directory has level 6 (cortical) annotations in it!') + print('Triggered by '+ imID) + exit() + presentclasses=presentclasses[:-1] + + elif organType=='liver': + offset=0 + presentclasses=presentclasses[presentclasses<2] - presentclasses=presentclasses[presentclasses<2] for p in presentclasses: masklabel=label(mask==p) for j in range(1,np.max(masklabel)+1): - segment_id=IdGen.get_id(p) + segment_id=IdGen.get_id(p+offset) mask_encoded[masklabel==j]=segment_id - seg_info.append({'id':segment_id,'category_id':p,'iscrowd':0,'area':np.sum(masklabel==j),'isthing':1}) + seg_info.append({'id':segment_id,'category_id':p+offset,'iscrowd':0,'area':np.sum(masklabel==j),'isthing':1}) + return seg_info,mask_encoded -def get_list_parallel(im,total,mask_dir,IdGen,out_dir,rand_sample,i): - imID=im.split('/')[-1].split('.png')[0] - maskname=im.replace('/regions/','/masks/') - sname=maskname.replace('.png','_s.png') - pname=maskname.replace('.png','_p.png') +def get_list_parallel(im,total,mask_dir,IdGen,out_dir,rand_sample,i,organType,dirs): + imID=im.split('/')[-1].split(dirs['imExt'])[0] + maskname=im.replace('/regions/','/masks/').replace(dirs['imExt'],dirs['maskExt']) + sname=maskname.replace(dirs['maskExt'],'_s.png') + pname=maskname.replace(dirs['maskExt'],'_p.png') + maskData=cv2.imread(maskname,0) + if organType=='kidney': + stuff_mask=np.array(maskData==1).astype('uint8') + stuff_mask[maskData==0]=2 - stuff_mask=np.array(maskData==2).astype('uint8') + elif organType=='liver': + stuff_mask=np.array(maskData==2).astype('uint8') # plt.subplot(131) # plt.imshow(maskData) @@ -89,11 +119,11 @@ def get_list_parallel(im,total,mask_dir,IdGen,out_dir,rand_sample,i): image_annotation_info['sem_seg_file_name']=sname # image_annotation_info['sem_seg_file_name']=maskname - out=get_seg_info(maskData,IdGen,imID) + out=get_seg_info(maskData,IdGen,imID,organType) annotations={} image_annotation_info['segments_info']=out[0] - image_annotation_info['annotations']=mask2polygons(maskData) + image_annotation_info['annotations']=mask2polygons(maskData,organType) with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -104,15 +134,20 @@ def get_list_parallel(im,total,mask_dir,IdGen,out_dir,rand_sample,i): # plt.subplot(122) # plt.imshow(rgbim) # plt.show() - imsave(pname,rgbim) - imsave(sname,stuff_mask) + # imsave(pname,rgbim) + # + # imsave(sname,stuff_mask) + # plt.subplot(121) # plt.imshow() # plt.subplot(122) image_annotation_info['pan_seg_file_name']=pname return image_annotation_info -def HAIL2Detectron(img_dir,rand_sample,out_json,classnames,isthing,xml_color,num_images=None): +def HAIL2Detectron(img_dir,rand_sample,out_json,classnames,isthing,xml_color,organType,dirs,num_images=None): + out_json1=out_json+'_p1.json' + out_json2=out_json+'_p2.json' + out_json3=out_json+'_p3.json' classes={} @@ -126,7 +161,7 @@ def HAIL2Detectron(img_dir,rand_sample,out_json,classnames,isthing,xml_color,num out_dir=mask_dir images=[] - images.extend(glob.glob(img_dir+'*.png')) + images.extend(glob.glob(img_dir+'*'+dirs['imExt'])) num_cores = multiprocessing.cpu_count() # num_cores=5 @@ -141,22 +176,48 @@ def HAIL2Detectron(img_dir,rand_sample,out_json,classnames,isthing,xml_color,num if rand_sample: random.shuffle(images) - data_list=Parallel(n_jobs=num_cores)(delayed(get_list_parallel)(i=i,total=total, - im=im,mask_dir=mask_dir,IdGen=IdGen,out_dir=out_dir, - rand_sample=rand_sample) for i,im in enumerate(tqdm(images))) - else: - data_list=Parallel(n_jobs=num_cores)(delayed(get_list_parallel)(i=i,total=total, - im=im,mask_dir=mask_dir,IdGen=IdGen,out_dir=out_dir, - rand_sample=rand_sample) for i,im in enumerate(tqdm(images))) - with open(out_json,'w') as fout: + + + split=round(total/3) + + + + data_list=Parallel(n_jobs=num_cores)(delayed(get_list_parallel)(i=i,total=total, + im=im,mask_dir=mask_dir,IdGen=IdGen,out_dir=out_dir, + rand_sample=rand_sample,organType=organType,dirs=dirs) for i,im in enumerate(tqdm(images[:split]))) + with open(out_json1,'w') as fout: json.dump(data_list,fout,cls=NpEncoder) - return data_list + data_list=Parallel(n_jobs=num_cores)(delayed(get_list_parallel)(i=i,total=total, + im=im,mask_dir=mask_dir,IdGen=IdGen,out_dir=out_dir, + rand_sample=rand_sample,organType=organType,dirs=dirs) for i,im in enumerate(tqdm(images[split:split*2]))) + with open(out_json2,'w') as fout: + json.dump(data_list,fout,cls=NpEncoder) + + data_list=Parallel(n_jobs=num_cores)(delayed(get_list_parallel)(i=i,total=total, + im=im,mask_dir=mask_dir,IdGen=IdGen,out_dir=out_dir, + rand_sample=rand_sample,organType=organType,dirs=dirs) for i,im in enumerate(tqdm(images[2*split:total]))) + with open(out_json3,'w') as fout: + json.dump(data_list,fout,cls=NpEncoder) + + # with open(out_json,'w') as fout: + # json.dump(data_list,fout,cls=NpEncoder) + + # return data_list def samples_from_json(json_file,rand_sample,num_images=None): - with open(json_file) as f: - full_list=json.load(f) + # with open(json_file) as f: + # full_list=json.load(f) + out_json1=json_file+'_p1.json' + out_json2=json_file+'_p2.json' + out_json3=json_file+'_p3.json' + with open(out_json1) as f: + full_list=json.load(f) + with open(out_json2) as f: + full_list.extend(json.load(f)) + with open(out_json3) as f: + full_list.extend(json.load(f)) # json_length=len(full_list) # data_list=[] # if num_images is not None: @@ -235,16 +296,16 @@ def decode_panoptic(image,segments_info,out_dir,file_name): warnings.simplefilter("ignore") imsave(out_dir+'/'+file_name.split('/')[-1].replace('.jpeg','.png'),out.astype('uint8')) -def WSIGridIterator(wsi_name,choppable_regions,index_x,index_y,region_size,dim_x,dim_y,glob_offset): +def WSIGridIterator(wsi_name,choppable_regions,index_x,index_y,region_size,dim_x,dim_y): wsi_name=os.path.splitext(wsi_name.split('/')[-1])[0] data_list=[] for idxy, i in tqdm(enumerate(index_y)): for idxx, j in enumerate(index_x): if choppable_regions[idxy, idxx] != 0: - yEnd = min(dim_y+glob_offset[1],i+region_size) + yEnd = min(dim_y,i+region_size) #print(yEnd) - xEnd = min(dim_x+glob_offset[0],j+region_size) + xEnd = min(dim_x,j+region_size) #print(xEnd) xLen=xEnd-j diff --git a/histomicstk/segmentationschool/Codes/get_network_performance.py b/histomicstk/segmentationschool/Codes/get_network_performance.py old mode 100644 new mode 100755 index e0605c4..3e93c37 --- a/histomicstk/segmentationschool/Codes/get_network_performance.py +++ b/histomicstk/segmentationschool/Codes/get_network_performance.py @@ -1,13 +1,13 @@ import numpy as np -from .getWsi import getWsi -from .xml_to_mask import xml_to_mask -# from joblib import Parallel, delayed -# import multiprocessing +import getWsi +from xml_to_mask import xml_to_mask +from joblib import Parallel, delayed +import multiprocessing from PIL import Image def get_perf(wsi,xml1,xml2,args): if args.wsi_ext != '.tif': - WSIinfo=getWsi(wsi) + WSIinfo=getWsi.getWsi(wsi) dim_x, dim_y=WSIinfo.dimensions else: im = Image.open(wsi) diff --git a/histomicstk/segmentationschool/Codes/get_network_performance_folder.py b/histomicstk/segmentationschool/Codes/get_network_performance_folder.py old mode 100644 new mode 100755 index a3d3b1a..f8cc9d9 --- a/histomicstk/segmentationschool/Codes/get_network_performance_folder.py +++ b/histomicstk/segmentationschool/Codes/get_network_performance_folder.py @@ -1,10 +1,10 @@ import numpy as np -from .getWsi import getWsi -from .xml_to_mask import xml_to_mask +import getWsi +from xml_to_mask import xml_to_mask from joblib import Parallel, delayed import multiprocessing from glob import glob -# from matplotlib import pyplot as plt +from matplotlib import pyplot as plt from PIL import Image #def get_network_performance(WSI_location,xml_annotation,xml_prediction): block_size=2000 @@ -12,7 +12,7 @@ predDir='/home/bgbl/H-AI-L/IFTAKuang/TRAINING_data/1/Predicted_XMLs/' dataDir='/home/bgbl/H-AI-L/IFTAKuang/wsi/' txtDir='/home/bgbl/H-AI-L/IFTAKuang/' -savelist=[] +savelist=[]; f_name1=txtDir + 'performance.txt' f1=open(f_name1,'w') f1.close() @@ -37,7 +37,7 @@ def get_perf(wsi,xml1,xml2,args): #specs=inspect_mask(index_y[0],index_x[0],block_size,xml_annotation,xml_prediction) if args.wsi_ext != '.tif': - WSIinfo=getWsi(wsi) + WSIinfo=getWsi.getWsi(wsi) dim_x, dim_y=WSIinfo.dimensions else: im = Image.open(wsi) diff --git a/histomicstk/segmentationschool/Codes/predict_xml.py b/histomicstk/segmentationschool/Codes/predict_xml.py old mode 100644 new mode 100755 index 340fdcc..2470540 --- a/histomicstk/segmentationschool/Codes/predict_xml.py +++ b/histomicstk/segmentationschool/Codes/predict_xml.py @@ -9,7 +9,7 @@ from skimage.io import imread, imsave from skimage.transform import resize from scipy.ndimage.measurements import label -# from skimage.segmentation import clear_border +from skimage.segmentation import clear_border from skimage.morphology import remove_small_objects import lxml.etree as ET import warnings diff --git a/histomicstk/segmentationschool/Codes/randomHSVshift.py b/histomicstk/segmentationschool/Codes/randomHSVshift.py old mode 100644 new mode 100755 index 51ff6d7..f1d6a42 --- a/histomicstk/segmentationschool/Codes/randomHSVshift.py +++ b/histomicstk/segmentationschool/Codes/randomHSVshift.py @@ -1,8 +1,8 @@ -# import numpy as np +import numpy as np from skimage.color import rgb2hsv,hsv2rgb,rgb2lab,lab2rgb -# import matplotlib as plt -# import cv2 +import matplotlib as plt +import cv2 from skimage import exposure def randomHSVshift(x,hShift,lShift): diff --git a/histomicstk/segmentationschool/Codes/utils.py b/histomicstk/segmentationschool/Codes/utils.py old mode 100644 new mode 100755 index 95b34b4..43e3a4a --- a/histomicstk/segmentationschool/Codes/utils.py +++ b/histomicstk/segmentationschool/Codes/utils.py @@ -2,8 +2,8 @@ from __future__ import division from __future__ import print_function from __future__ import unicode_literals -# import functools -# import traceback +import functools +import traceback import json import numpy as np diff --git a/histomicstk/segmentationschool/Codes/wsi_loader_utils.py b/histomicstk/segmentationschool/Codes/wsi_loader_utils.py old mode 100644 new mode 100755 index ea87ae6..6ee9aa0 --- a/histomicstk/segmentationschool/Codes/wsi_loader_utils.py +++ b/histomicstk/segmentationschool/Codes/wsi_loader_utils.py @@ -4,81 +4,182 @@ import matplotlib.pyplot as plt from skimage.color import rgb2hsv from skimage.filters import gaussian -# from skimage.morphology import binary_dilation, diamond -# import cv2 +from skimage.morphology import binary_dilation, diamond +from skimage.segmentation import clear_border +import cv2 from tqdm import tqdm from skimage.io import imread,imsave -import multiprocessing + +from utils import IdGenerator, id2rgb +from skimage.measure import label +from skimage.io import imread,imsave +import random +import numpy as np +import glob +import warnings +import matplotlib.pyplot as plt +from matplotlib import patches +import cv2 +from detectron2.structures import BoxMode from joblib import Parallel, delayed +import multiprocessing +import os,json +from tqdm import tqdm +from xml_to_mask_minmax import xml_to_mask,write_minmax_to_xml +import lxml.etree as ET +class NpEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, np.integer): + return int(obj) + if isinstance(obj, np.floating): + return float(obj) + if isinstance(obj, np.ndarray): + return obj.tolist() + return super(NpEncoder, self).default(obj) +# +# def mask2polygons(mask,organType): +# annotation=[] +# presentclasses=np.unique(mask) +# if organType=='kidney': +# offset=-2 +# presentclasses=presentclasses[presentclasses>1] +# presentclasses=list(presentclasses[presentclasses<6]) +# if 6 in presentclasses: +# print('Beware, this mask directory has level 6 (cortical) annotations in it!') +# print('Triggered by '+ imID) +# exit() +# presentclasses=presentclasses[:-1] +# +# elif organType=='liver': +# offset=0 +# presentclasses=presentclasses[presentclasses<2] +# +# for p in presentclasses: +# contours, hierarchy = cv2.findContours(np.array(mask==p).astype('uint8'), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) +# for contour in contours: +# if contour.size>=6: +# instance_dict={} +# contour_flat=contour.flatten().astype('float').tolist() +# xMin=min(contour_flat[::2]) +# yMin=min(contour_flat[1::2]) +# xMax=max(contour_flat[::2]) +# yMax=max(contour_flat[1::2]) +# instance_dict['bbox']=[xMin,yMin,xMax,yMax] +# instance_dict['bbox_mode']=BoxMode.XYXY_ABS +# instance_dict['category_id']=p+offset +# instance_dict['segmentation']=[contour_flat] +# annotation.append(instance_dict) +# return annotation -def save_thumb(args,slide_loc): - print(slide_loc) - slideID,slideExt=os.path.splitext(slide_loc.split('/')[-1]) - slide=openslide.OpenSlide(slide_loc) - if slideExt =='.scn': - dim_x=int(slide.properties['openslide.bounds-width'])## add to columns - dim_y=int(slide.properties['openslide.bounds-height'])## add to rows - offsetx=int(slide.properties['openslide.bounds-x'])##start column - offsety=int(slide.properties['openslide.bounds-y'])##start row - elif slideExt in ['.ndpi','.svs']: - dim_x, dim_y=slide.dimensions - offsetx=0 - offsety=0 +# def get_seg_info(mask,IdGen,imID,organType): +# seg_info=[] +# mask_encoded=np.zeros(np.shape(mask)) +# presentclasses=np.unique(mask) +# if organType=='kidney': +# offset=-2 +# presentclasses=presentclasses[presentclasses>1] +# presentclasses=list(presentclasses[presentclasses<6]) +# if 6 in presentclasses: +# print('Beware, this mask directory has level 6 (cortical) annotations in it!') +# print('Triggered by '+ imID) +# exit() +# presentclasses=presentclasses[:-1] +# +# elif organType=='liver': +# offset=0 +# presentclasses=presentclasses[presentclasses<2] +# +# +# for p in presentclasses: +# masklabel=label(mask==p) +# for j in range(1,np.max(masklabel)+1): +# segment_id=IdGen.get_id(p+offset) +# mask_encoded[masklabel==j]=segment_id +# seg_info.append({'id':segment_id,'category_id':p+offset,'iscrowd':0,'area':np.sum(masklabel==j),'isthing':1}) +# +# return seg_info,mask_encoded - # fullSize=slide.level_dimensions[0] - # resRatio= args.chop_thumbnail_resolution - # ds_1=fullSize[0]/resRatio - # ds_2=fullSize[1]/resRatio - # thumbIm=np.array(slide.get_thumbnail((ds_1,ds_2))) - # if slideExt =='.scn': - # xStt=int(offsetx/resRatio) - # xStp=int((offsetx+dim_x)/resRatio) - # yStt=int(offsety/resRatio) - # yStp=int((offsety+dim_y)/resRatio) - # thumbIm=thumbIm[yStt:yStp,xStt:xStp] - # imsave(slide_loc.replace(slideExt,'_thumb.jpeg'),thumbIm) - slide.associated_images['label'].save(slide_loc.replace(slideExt,'_label.png')) - # imsave(slide_loc.replace(slideExt,'_label.png'),slide.associated_images['label']) - - -def get_image_thumbnails(args): - assert args.target is not None, 'Location of images must be provided' - all_slides=[] - for ext in args.wsi_ext.split(','): - all_slides.extend(glob.glob(args.target+'/*'+ext)) - Parallel(n_jobs=multiprocessing.cpu_count())(delayed(save_thumb)(args,slide_loc) for slide_loc in tqdm(all_slides)) - # for slide_loc in tqdm(all_slides): - -class WSIPredictLoader(): - def __init__(self,args, wsi_directory=None, transform=None): - assert wsi_directory is not None, 'location of training svs and xml must be provided' - mask_out_loc=os.path.join(wsi_directory.replace('/TRAINING_data/0','Permanent/Tissue_masks/'),) - if not os.path.exists(mask_out_loc): - os.makedirs(mask_out_loc) - all_slides=[] - for ext in args.wsi_ext.split(','): - all_slides.extend(glob.glob(wsi_directory+'/*'+ext)) - print('Getting slide metadata and usable regions...') +def get_image_meta(i,args): + image_annotation_info={} + # image_annotation_info['slide_loc']=train_dset.get_single_slide_data(i[0]) + image_annotation_info['slide_loc']=i[0] + slide=openslide.OpenSlide(image_annotation_info['slide_loc']) + magx=np.round(float(slide.properties['openslide.mpp-x']),2) + magy=np.round(float(slide.properties['openslide.mpp-y']),2) - usable_slides=[] - for slide_loc in all_slides: - slideID,slideExt=os.path.splitext(slide_loc.split('/')[-1]) - print("working slide... "+ slideID,end='\r') - - slide=openslide.OpenSlide(slide_loc) - chop_array=get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc) - mag_x=np.round(float(slide.properties['openslide.mpp-x']),2) - mag_y=np.round(float(slide.properties['openslide.mpp-y']),2) - print(mag_x,mag_y) - usable_slides.append({'slide_loc':slide_loc,'slideID':slideID,'slideExt':slideExt,'slide':slide, - 'chop_array':chop_array,'mag':[mag_x,mag_y]}) - self.usable_slides= usable_slides - self.boxSize40X = args.boxSize - self.boxSize20X = int(args.boxSize)/2 - -class WSITrainingLoader(): - def __init__(self,args, wsi_directory=None, transform=None): + assert magx == magy + if magx ==0.25: + dx=args.boxSize + dy=args.boxSize + elif magx == 0.5: + dx=int(args.boxSize/2) + dy=int(args.boxSize/2) + else: + print('nonstandard image magnification') + print(slide) + print(magx,magy) + exit() + + image_annotation_info['coordinates']=[i[2][1],i[2][0]] + image_annotation_info['height']=dx + image_annotation_info['width']=dy + image_annotation_info['image_id']=i[1].split('/')[-1].replace('.xml','_'.join(['',str(i[2][1]),str(i[2][0])])) + image_annotation_info['xml_loc']=i[1] + slide.close() + return image_annotation_info + +def train_samples_from_WSI(args,image_coordinates): + + + num_cores=multiprocessing.cpu_count() + print('Generating detectron2 dictionary format...') + data_list=Parallel(n_jobs=num_cores,backend='threading')(delayed(get_image_meta)(i=i, + args=args) for i in tqdm(image_coordinates)) + # print('flushing') + # train_dset.flush_xmldata() + # print('flushed') + # print(vars(train_dset)) + # exit() + ##In serial for debugging + # for i in tqdm(image_coordinates): + # print(image_annotation_info['image_id']) + # image_annotation_info['sem_seg_file_name']=sname + # image_annotation_info['sem_seg_file_name']=maskname + # out=get_seg_info(maskData,IdGen,imID,organType) + # image_annotation_info['segments_info']=out[0] + # image_annotation_info['annotations']=mask2polygons(maskData,organType) + # rgbim=id2rgb(out[1]) + # image_annotation_info['pan_seg_file_name']=pname + # data_list.append(image_annotation_info) + return data_list + +def WSIGridIterator(wsi_name,choppable_regions,index_x,index_y,region_size,dim_x,dim_y): + wsi_name=os.path.splitext(wsi_name.split('/')[-1])[0] + data_list=[] + for idxy, i in tqdm(enumerate(index_y)): + for idxx, j in enumerate(index_x): + if choppable_regions[idxy, idxx] != 0: + yEnd = min(dim_y,i+region_size) + xEnd = min(dim_x,j+region_size) + xLen=xEnd-j + yLen=yEnd-i + + image_annotation_info={} + image_annotation_info['file_name']='_'.join([wsi_name,str(j),str(i),str(xEnd),str(yEnd)]) + image_annotation_info['height']=yLen + image_annotation_info['width']=xLen + image_annotation_info['image_id']=image_annotation_info['file_name'] + image_annotation_info['xStart']=j + image_annotation_info['yStart']=i + data_list.append(image_annotation_info) + return data_list + + +# class WSITrainingLoader(): +# def __init__(self,args, wsi_directory=None, transform=None): +def get_slide_data(args, wsi_directory=None): assert wsi_directory is not None, 'location of training svs and xml must be provided' + mask_out_loc=os.path.join(wsi_directory.replace('/TRAINING_data/0','Permanent/Tissue_masks/'),) if not os.path.exists(mask_out_loc): os.makedirs(mask_out_loc) @@ -90,29 +191,129 @@ def __init__(self,args, wsi_directory=None, transform=None): usable_slides=[] for slide_loc in all_slides: slideID,slideExt=os.path.splitext(slide_loc.split('/')[-1]) - print("working slide... "+ slideID,end='\r') - - slide=openslide.OpenSlide(slide_loc) - chop_array=get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc) - mag_x=np.round(float(slide.properties['openslide.mpp-x']),2) - mag_y=np.round(float(slide.properties['openslide.mpp-y']),2) - print(mag_x,mag_y) - usable_slides.append({'slide_loc':slide_loc,'slideID':slideID,'slideExt':slideExt,'slide':slide, - 'chop_array':chop_array,'mag':[mag_x,mag_y]}) - self.usable_slides= usable_slides - self.boxSize40X = args.boxSize - self.boxSize20X = int(args.boxSize)/2 + xmlpath=slide_loc.replace(slideExt,'.xml') + if os.path.isfile(xmlpath): + write_minmax_to_xml(xmlpath) + + print("Gathering slide data ... "+ slideID,end='\r') + slide=openslide.OpenSlide(slide_loc) + chop_array=get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc) + + mag_x=np.round(float(slide.properties['openslide.mpp-x']),2) + mag_y=np.round(float(slide.properties['openslide.mpp-y']),2) + slide.close() + tree = ET.parse(xmlpath) + root = tree.getroot() + balance_classes=args.balanceClasses.split(',') + classNums={} + for b in balance_classes: + classNums[b]=0 + # balance_annotations={} + for Annotation in root.findall("./Annotation"): + + annotationID = Annotation.attrib['Id'] + if annotationID=='7': + print('annotation classes too high '+ xmlpath) + exit() + if annotationID in classNums.keys(): + + classNums[annotationID]=len(Annotation.findall("./*/Region")) + else: + pass + + usable_slides.append({'slide_loc':slide_loc,'slideID':slideID, + 'chop_array':chop_array,'num_regions':len(chop_array),'mag':[mag_x,mag_y], + 'xml_loc':xmlpath,'annotations':classNums,'root':root, + 'thumb_loc':os.path.join(mask_out_loc,'_'.join([slideID,slideExt[1:]+'.jpeg']))}) + else: + print('\n') + print('no annotation XML file found for:') + print(slideID) + exit() print('\n') + return usable_slides + +# def get_random_slide_idx(self,train_images): +# return random.choices(self.usable_idx,k=train_images) +# def get_single_slide_data(self,idx): +# return self.usable_slides[idx]['slide_loc'] +def get_random_chops(slide_idx,usable_slides,region_size): + # chops=[] + choplen=len(slide_idx) + chops=Parallel(n_jobs=multiprocessing.cpu_count(),backend='threading')(delayed(get_chop_data)(idx=idx, + usable_slides=usable_slides,region_size=region_size) for idx in tqdm(slide_idx)) + return chops +# def get_xml_folder(self): +# return self.xml_folder +# def flush_xmldata(self): +# import gc +# del self.usable_slides +# gc.collect() +# input('collcted') +# # for idx,slide in enumerate(self.usable_slides): +# # # slide['root']=None +# # # self.usable_slides[idx]=slide +# # del self.usable_slides[idx]['root'] +# # del self.usable_slides[idx]['chop_array'] +# def get_image_means(self): +# image_means=[] +# print('Getting custom image means...') +# for usable_slide in tqdm(self.usable_slides): +# +# thumbIm=imread(usable_slide['thumb_loc']) +# binary=(imread(usable_slide['thumb_loc'].replace('.jpeg','.png'))/255) +# maskedIm=(thumbIm*np.repeat(np.expand_dims(binary,axis=-1),axis=2,repeats=3)) +# rmean=np.sum(maskedIm[:,:,0])/np.count_nonzero(binary) +# gmean=np.sum(maskedIm[:,:,1])/np.count_nonzero(binary) +# bmean=np.sum(maskedIm[:,:,2])/np.count_nonzero(binary) +# image_means.append([rmean,gmean,bmean]) +# +# return image_means + +def get_chop_data(idx,usable_slides,region_size): + if random.random()>0.5: + randSelect=random.randrange(0,usable_slides[idx]['num_regions']) + chopData=[usable_slides[idx]['slide_loc'],usable_slides[idx]['xml_loc'], + usable_slides[idx]['chop_array'][randSelect]] + else: + # print(list(usable_slides[idx]['annotations'].values())) + if sum(usable_slides[idx]['annotations'].values())==0: + randSelect=random.randrange(0,usable_slides[idx]['num_regions']) + chopData=[usable_slides[idx]['slide_loc'],usable_slides[idx]['xml_loc'], + usable_slides[idx]['chop_array'][randSelect]] + else: + selectedClass=0 + classIDs=usable_slides[idx]['annotations'].keys() + classSamples=random.sample(classIDs,len(classIDs)) + for c in classSamples: + if usable_slides[idx]['annotations'][c]==0: + pass + else: + sampledRegionID=random.randrange(1,usable_slides[idx]['annotations'][c]+1) + break + # tree = ET.parse(usable_slides[idx]['xml_loc']) + # root = tree.getroot() + # root= + Regions=usable_slides[idx]['root'].findall("./Annotation[@Id='{}']/Regions/Region".format(c)) + sampledRegionID=Regions[sampledRegionID-1].attrib['Id'] + Verts = usable_slides[idx]['root'].findall("./Annotation[@Id='{}']/Regions/Region[@Id='{}']/Vertices/Vertex".format(c,sampledRegionID)) + randVertIdx=random.randrange(0,len(Verts)) + randVertX=int(float(Verts[randVertIdx].attrib['X'])) + randVertY=int(float(Verts[randVertIdx].attrib['Y'])) + randVertX=max(0,randVertX-region_size) + randVertY=max(0,randVertY-region_size) + chopData=[usable_slides[idx]['slide_loc'],usable_slides[idx]['xml_loc'], + [randVertY,randVertX]] + + return chopData def get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc): slide_regions=[] choppable_regions_list=[] - downsample = int(args.downsampleRate**.5) #down sample for each dimension region_size = int(args.boxSize*(downsample)) #Region size before downsampling - step = int(region_size*(1-args.overlap_percent)) #Step size before downsampling - + step = int(region_size*(1-args.overlap_rate)) #Step size before downsampling if slideExt =='.scn': dim_x=int(slide.properties['openslide.bounds-width'])## add to columns dim_y=int(slide.properties['openslide.bounds-height'])## add to rows @@ -135,7 +336,9 @@ def get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc): resRatio= args.chop_thumbnail_resolution ds_1=fullSize[0]/resRatio ds_2=fullSize[1]/resRatio - if args.get_new_tissue_masks: + out_mask_name=os.path.join(mask_out_loc,'_'.join([slideID,slideExt[1:]+'.png'])) + if not os.path.isfile(out_mask_name) or args.get_new_tissue_masks: + print(out_mask_name) thumbIm=np.array(slide.get_thumbnail((ds_1,ds_2))) if slideExt =='.scn': xStt=int(offsetx/resRatio) @@ -143,40 +346,18 @@ def get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc): yStt=int(offsety/resRatio) yStp=int((offsety+dim_y)/resRatio) thumbIm=thumbIm[yStt:yStp,xStt:xStp] - # plt.imshow(thumbIm) - # plt.show() - # input() - # plt.imshow(thumbIm) - # plt.show() - - out_mask_name=os.path.join(mask_out_loc,'_'.join([slideID,slideExt[1:]+'.png'])) - - - if not args.get_new_tissue_masks: - try: - binary=(imread(out_mask_name)/255).astype('bool') - except: - print('failed to load mask for '+ out_mask_name) - print('please set get_new_tissue masks to True') - exit() - # if slideExt =='.scn': - # choppable_regions=np.zeros((len(index_x),len(index_y))) - # elif slideExt in ['.ndpi','.svs']: choppable_regions=np.zeros((len(index_y),len(index_x))) - else: - print(out_mask_name) - # if slideExt =='.scn': - # choppable_regions=np.zeros((len(index_x),len(index_y))) - # elif slideExt in ['.ndpi','.svs']: - choppable_regions=np.zeros((len(index_y),len(index_x))) - hsv=rgb2hsv(thumbIm) g=gaussian(hsv[:,:,1],5) binary=(g>0.05).astype('bool') binary=binary_fill_holes(binary) imsave(out_mask_name.replace('.png','.jpeg'),thumbIm) - imsave(out_mask_name,binary.astype('uint8')*255) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + imsave(out_mask_name,binary.astype('uint8')*255) + binary=(imread(out_mask_name)/255).astype('bool') + choppable_regions=np.zeros((len(index_y),len(index_x))) chop_list=[] for idxy,yi in enumerate(index_y): for idxx,xj in enumerate(index_x): @@ -185,31 +366,13 @@ def get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc): xStart = int(np.round((xj-offsetx)/resRatio)) xStop = int(np.round(((xj-offsetx)+args.boxSize)/resRatio)) box_total=(xStop-xStart)*(yStop-yStart) - if slideExt =='.scn': - # print(xStart,xStop,yStart,yStop) - # print(np.sum(binary[xStart:xStop,yStart:yStop]),args.white_percent,box_total) - # plt.imshow(binary[xStart:xStop,yStart:yStop]) - # plt.show() - if np.sum(binary[yStart:yStop,xStart:xStop])>(args.white_percent*box_total): - - choppable_regions[idxy,idxx]=1 - chop_list.append([index_y[idxy],index_x[idxx]]) - - elif slideExt in ['.ndpi','.svs']: - if np.sum(binary[yStart:yStop,xStart:xStop])>(args.white_percent*box_total): - choppable_regions[idxy,idxx]=1 - chop_list.append([index_y[idxy],index_x[idxx]]) - - imsave(out_mask_name.replace('.png','_chopregions.png'),choppable_regions.astype('uint8')*255) - - # plt.imshow(choppable_regions) - # plt.show() - # choppable_regions_list.extend(chop_list) - # plt.subplot(131) - # plt.imshow(thumbIm) - # plt.subplot(132) - # plt.imshow(binary) - # plt.subplot(133) - # plt.imshow(choppable_regions) - # plt.show() + + if np.sum(binary[yStart:yStop,xStart:xStop])>(args.white_percent*box_total): + choppable_regions[idxy,idxx]=1 + chop_list.append([index_y[idxy],index_x[idxx]]) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + imsave(out_mask_name.replace('.png','_chopregions.png'),choppable_regions.astype('uint8')*255) + return chop_list diff --git a/histomicstk/segmentationschool/Codes/wsi_loader_utils_backup.py b/histomicstk/segmentationschool/Codes/wsi_loader_utils_backup.py new file mode 100755 index 0000000..b2c9980 --- /dev/null +++ b/histomicstk/segmentationschool/Codes/wsi_loader_utils_backup.py @@ -0,0 +1,235 @@ +import openslide,glob,os +import numpy as np +from scipy.ndimage.morphology import binary_fill_holes +import matplotlib.pyplot as plt +from skimage.color import rgb2hsv +from skimage.filters import gaussian +from skimage.morphology import binary_dilation, diamond +import cv2 +from tqdm import tqdm +from skimage.io import imread,imsave + + + +class WSITrainingLoader(): + def __init__(self,args, wsi_directory=None, transform=None): + assert wsi_directory is not None, 'location of training svs and xml must be provided' + mask_out_loc=os.path.join(wsi_directory.replace('/TRAINING_data/0','Permanent/Tissue_masks/'),) + if not os.path.exists(mask_out_loc): + os.makedirs(mask_out_loc) + all_slides=[] + for ext in args.wsi_ext.split(','): + all_slides.extend(glob.glob(wsi_directory+'/*'+ext)) + print('Getting slide metadata and usable regions...') + + usable_slides=[] + for slide_loc in all_slides: + slideID,slideExt=os.path.splitext(slide_loc.split('/')[-1]) + # print("working slide... "+ slideID,end='\r') + + slide=openslide.OpenSlide(slide_loc) + chop_array,num_slide_regions=get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc) + + + usable_slides.append({'slide_loc':slide_loc,'slideID':slideID,'slideExt':slideExt,'slide':slide, + 'chop_array':chop_array}) + for slide_meta in usable_slides: + slide=openslide.OpenSlide(slide_meta['slide_loc']) + print(slide_meta['slideID']) + for corner in tqdm(slide_meta['chop_array']): + if slide_meta['slideExt'] =='.scn': + print(corner) + cv2.imshow('test',np.array(slide.read_region((corner[0],corner[1]),0,(args.boxSize,args.boxSize)))[:,:,:3]) + cv2.waitKey(500) # waits until a key is pressed + # cv2.destroyAllWindows() + elif slide_meta['slideExt'] in ['.ndpi','.svs']: + continue + cv2.imshow('test',np.array(slide.read_region((corner[1],corner[0]),0,(args.boxSize,args.boxSize)))[:,:,:3]) + cv2.waitKey(500) # waits until a key is pressed + # cv2.destroyAllWindows() + # print(slide_meta['chop_array']) + # input() + print('\n') + # 'choppable_regions_x':}) + # for i,name in enumerate(lib['slides']): + # sys.stdout.write('Opening SVS headers: [{}/{}]\r'.format(i+1, len(lib['slides']))) + # sys.stdout.flush() + # slides.append(openslide.OpenSlide(name)) + # print('') + #Flatten grid + # grid = [] + # slideIDX = [] + # for i,g in enumerate(lib['grid']): + # grid.extend(g) + # slideIDX.extend([i]*len(g)) + # + # print('Number of tiles: {}'.format(len(grid))) + # self.slidenames = lib['slides'] + # self.slides = slides + # self.targets = lib['targets'] + # self.grid = grid + # self.slideIDX = slideIDX + # self.transform = transform + # self.mode = None + # self.mult = lib['mult'] + # self.size = int(np.round(224*lib['mult'])) + # self.level = lib['level'] + # def setmode(self,mode): + # self.mode = mode + # def maketraindata(self, idxs): + # self.t_data = [(self.slideIDX[x],self.grid[x],self.targets[self.slideIDX[x]]) for x in idxs] + # def shuffletraindata(self): + # self.t_data = random.sample(self.t_data, len(self.t_data)) + # def __getitem__(self,index): + # if self.mode == 1: + # slideIDX = self.slideIDX[index] + # coord = self.grid[index] + # img = self.slides[slideIDX].read_region(coord,self.level,(self.size,self.size)).convert('RGB') + # if self.mult != 1: + # img = img.resize((224,224),Image.BILINEAR) + # if self.transform is not None: + # img = self.transform(img) + # return img + # elif self.mode == 2: + # slideIDX, coord, target = self.t_data[index] + # img = self.slides[slideIDX].read_region(coord,self.level,(self.size,self.size)).convert('RGB') + # if self.mult != 1: + # img = img.resize((224,224),Image.BILINEAR) + # if self.transform is not None: + # img = self.transform(img) + # return img, target + # def __len__(self): + # if self.mode == 1: + # return len(self.grid) + # elif self.mode == 2: + # return len(self.t_data) + # + + +# img = self.slides[slideIDX].read_region(coord,self.level,(self.size,self.size)).convert('RGB') + +def get_choppable_regions(slide,args,slideID,slideExt,mask_out_loc): + slide_regions=[] + choppable_regions_list=[] + + downsample = int(args.downsampleRate**.5) #down sample for each dimension + region_size = int(args.boxSize*(downsample)) #Region size before downsampling + step = int(region_size*(1-args.overlap_percent)) #Step size before downsampling + + if slideExt =='.scn': + + for i in range(0,1000):#arbitrary number of max regions + try: + dim_x=int(slide.properties[''.join(['openslide.region[',str(i),'].height'])])## add to columns + dim_y=int(slide.properties[''.join(['openslide.region[',str(i),'].width'])])## add to rows + offsetx=int(slide.properties[''.join(['openslide.region[',str(i),'].x'])])##start column + offsety=int(slide.properties[''.join(['openslide.region[',str(i),'].y'])])##start row + slide_regions.append([offsetx,offsety,dim_x,dim_y]) + except KeyError: + break + + for p in slide.properties.keys(): + print(p,slide.properties[p]) + input() + # print(dim_x,dim_y,offsetx,offsety) + elif slideExt in ['.ndpi','.svs']: + # return + dim_x, dim_y=slide.dimensions + + offsetx=0 + offsety=0 + slide_regions.append([offsetx,offsety,dim_x,dim_y]) + + + + fullSize=slide.level_dimensions[0] + resRatio= args.chop_thumbnail_resolution + ds_1=fullSize[0]/resRatio + ds_2=fullSize[1]/resRatio + if args.get_new_tissue_masks: + thumbIm=np.array(slide.get_thumbnail((ds_1,ds_2))) + # plt.imshow(thumbIm) + # plt.show() + for idx,sr in enumerate(slide_regions): + print(sr) + out_mask_name=os.path.join(mask_out_loc,'_'.join([slideID,slideExt[1:],str(idx)+'.png'])) + index_y=np.array(range(sr[1],sr[1]+sr[3],step)) + index_x=np.array(range(sr[0],sr[0]+sr[2],step)) + index_y[-1]=(sr[1]+sr[3])-step + index_x[-1]=(sr[0]+sr[2])-step + + if not args.get_new_tissue_masks: + try: + binary=(imread(out_mask_name)/255).astype('bool') + except: + print('failed to load mask for '+ out_mask_name) + print('please set get_new_tissue masks to True') + exit() + if slideExt =='.scn': + choppable_regions=np.zeros((len(index_x),len(index_y))) + elif slideExt in ['.ndpi','.svs']: + choppable_regions=np.zeros((len(index_y),len(index_x))) + else: + # print('Getting new ') + print(out_mask_name) + + # im=thumbIm[xStart:xStop,yStart:yStop,:] + # choppable_regions=np.zeros((len(index_x),len(index_y))) + if slideExt =='.scn': + THxStart=int(sr[1]/resRatio) + THxStop=int((sr[1]+sr[2])/resRatio) + THyStart=int(sr[0]/resRatio) + THyStop=int((sr[0]+sr[3])/resRatio) + im=thumbIm[THxStart:THxStop,THyStart:THyStop,:] + choppable_regions=np.zeros((len(index_x),len(index_y))) + elif slideExt in ['.ndpi','.svs']: + THxStart=int(sr[0]/resRatio) + THxStop=int((sr[0]+sr[2])/resRatio) + THyStart=int(sr[1]/resRatio) + THyStop=int((sr[1]+sr[3])/resRatio) + im=thumbIm[THyStart:THyStop,THxStart:THxStop,:] + choppable_regions=np.zeros((len(index_y),len(index_x))) + hsv=rgb2hsv(im) + g=gaussian(hsv[:,:,1],5) + binary=(g>0.05).astype('bool') + binary=binary_fill_holes(binary) + imsave(out_mask_name.replace('.png','.jpeg'),im) + imsave(out_mask_name,binary.astype('uint8')*255) + + + chop_list=[] + for idxy,yi in enumerate(index_y): + for idxx,xj in enumerate(index_x): + yStart = int(np.round((yi-sr[1])/resRatio)) + yStop = int(np.round(((yi-sr[1])+args.boxSize)/resRatio)) + xStart = int(np.round((xj-sr[0])/resRatio)) + xStop = int(np.round(((xj-sr[0])+args.boxSize)/resRatio)) + box_total=(xStop-xStart)*(yStop-yStart) + if slideExt =='.scn': + # print(xStart,xStop,yStart,yStop) + # print(np.sum(binary[xStart:xStop,yStart:yStop]),args.white_percent,box_total) + # plt.imshow(binary[xStart:xStop,yStart:yStop]) + # plt.show() + if np.sum(binary[xStart:xStop,yStart:yStop])>(args.white_percent*box_total): + + choppable_regions[idxx,idxy]=1 + chop_list.append([index_x[idxx],index_y[idxy]]) + + elif slideExt in ['.ndpi','.svs']: + if np.sum(binary[yStart:yStop,xStart:xStop])>(args.white_percent*box_total): + choppable_regions[idxy,idxx]=1 + chop_list.append([index_y[idxy],index_x[idxx]]) + + imsave(out_mask_name.replace('.png','_chopregions.png'),choppable_regions.astype('uint8')*255) + + # plt.imshow(choppable_regions) + # plt.show() + choppable_regions_list.extend(chop_list) + # plt.subplot(131) + # plt.imshow(im) + # plt.subplot(132) + # plt.imshow(binary) + # plt.subplot(133) + # plt.imshow(choppable_regions) + # plt.show() + return choppable_regions_list,idx+1 diff --git a/histomicstk/segmentationschool/Codes/xmlCheck.py b/histomicstk/segmentationschool/Codes/xmlCheck.py old mode 100644 new mode 100755 index 3859d6e..65008a0 --- a/histomicstk/segmentationschool/Codes/xmlCheck.py +++ b/histomicstk/segmentationschool/Codes/xmlCheck.py @@ -1,5 +1,5 @@ -from .xml_to_mask import xml_to_mask -from .getWsi import getWsi +from xml_to_mask import xml_to_mask +from getWsi import getWsi from matplotlib import pyplot as plt slide=getWsi('/hdd/bg/HAIL2/DeepZoomPrediction/TRAINING_data/0/52483.svs') diff --git a/histomicstk/segmentationschool/Codes/xml_to_mask.py b/histomicstk/segmentationschool/Codes/xml_to_mask.py old mode 100644 new mode 100755 index c97f3ec..d705d11 --- a/histomicstk/segmentationschool/Codes/xml_to_mask.py +++ b/histomicstk/segmentationschool/Codes/xml_to_mask.py @@ -2,10 +2,10 @@ import sys import lxml.etree as ET import cv2 -# import matplotlib.pyplot as plt -from skimage.morphology import binary_erosion#,binary_dilation, +import matplotlib.pyplot as plt +from skimage.morphology import binary_dilation,binary_erosion from skimage.morphology import disk -# import time +import time def get_num_classes(xml_path): # parse xml and get root diff --git a/histomicstk/segmentationschool/Codes/xml_to_mask2.py b/histomicstk/segmentationschool/Codes/xml_to_mask2.py old mode 100644 new mode 100755 index 878bdec..ca194d0 --- a/histomicstk/segmentationschool/Codes/xml_to_mask2.py +++ b/histomicstk/segmentationschool/Codes/xml_to_mask2.py @@ -2,15 +2,14 @@ import sys, warnings import lxml.etree as ET import cv2 -# import matplotlib.pyplot as plt -from skimage.morphology import binary_erosion #binary_dilation, -from skimage.morphology import disk +import matplotlib.pyplot as plt +from skimage.morphology import binary_dilation,binary_erosion +from skimage.morphology import disk,diamond from skimage.io import imsave -# import time +import time from matplotlib import path -from .getWsi import getWsi -# from tqdm import tqdm - +from getWsi import getWsi +from tqdm import tqdm def get_num_classes(xml_path): # parse xml and get root tree = ET.parse(xml_path) @@ -353,15 +352,17 @@ def masks_from_points(usableRegions,wsiID,dirs,dot_pad,args,dims): #------------------------------------------------------------------------------------------------------------------------------------------------------ -def xml_to_mask(xml_path, location, size, downsample_factor=1, verbose=0): +def xml_to_mask(xml_path, location, size,ignore_id=None, downsample_factor=1, verbose=0): # parse xml and get root tree = ET.parse(xml_path) root = tree.getroot() # calculate region bounds bounds = {'x_min' : location[0], 'y_min' : location[1], 'x_max' : location[0] + size[0], 'y_max' : location[1] + size[1]} - - IDs = regions_in_mask(root=root, bounds=bounds, verbose=verbose) + if ignore_id is not None: + IDs = regions_in_mask(root=root, bounds=bounds, verbose=verbose,ignore_id=ignore_id) + else: + IDs = regions_in_mask(root=root, bounds=bounds, verbose=verbose,ignore_id=['20000']) if verbose != 0: print('\nFOUND: ' + str(len(IDs)) + ' regions') @@ -380,29 +381,31 @@ def restart_line(): # for printing labels in command line sys.stdout.write('\r') sys.stdout.flush() -def regions_in_mask(root, bounds, verbose=1): +def regions_in_mask(root, bounds,ignore_id, verbose=1): # find regions to save IDs = [] for Annotation in root.findall("./Annotation"): # for all annotations annotationID = Annotation.attrib['Id'] + if annotationID in ignore_id: + pass + else: + for Region in Annotation.findall("./*/Region"): # iterate on all region - for Region in Annotation.findall("./*/Region"): # iterate on all region - - if verbose != 0: - sys.stdout.write('TESTING: ' + 'Annotation: ' + annotationID + '\tRegion: ' + Region.attrib['Id']) - sys.stdout.flush() - restart_line() + if verbose != 0: + sys.stdout.write('TESTING: ' + 'Annotation: ' + annotationID + '\tRegion: ' + Region.attrib['Id']) + sys.stdout.flush() + restart_line() - for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region - # get points - x_point = np.int32(np.float64(Vertex.attrib['X'])) - y_point = np.int32(np.float64(Vertex.attrib['Y'])) - # test if points are in bounds - if bounds['x_min'] <= x_point <= bounds['x_max'] and bounds['y_min'] <= y_point <= bounds['y_max']: # test points in region bounds - # save region Id - IDs.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID}) - break + for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region + # get points + x_point = np.int32(np.float64(Vertex.attrib['X'])) + y_point = np.int32(np.float64(Vertex.attrib['Y'])) + # test if points are in bounds + if bounds['x_min'] <= x_point <= bounds['x_max'] and bounds['y_min'] <= y_point <= bounds['y_max']: # test points in region bounds + # save region Id + IDs.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID}) + break return IDs def get_vertex_points(root, IDs, verbose=1): @@ -461,10 +464,13 @@ def Regions_to_mask(Regions, bounds, IDs, downsample_factor, verbose=1): for idx,Region in enumerate(Regions): # reformat Regions - Region2=Region - Region[:,1] = np.int32(np.round((Region[:,1] - bounds['y_min_pad']) / downsample)) - Region[:,0] = np.int32(np.round((Region[:,0] - bounds['x_min_pad']) / downsample)) - + Region2=np.copy(Region) + Region2[:,1] = np.int32(np.round((Region2[:,1] - bounds['y_min_pad']) / downsample)) + Region2[:,0] = np.int32(np.round((Region2[:,0] - bounds['x_min_pad']) / downsample)) + regMinX=min(Region[:,0]) + regMinY=min(Region[:,1]) + regMaxX=max(Region[:,0]) + regMaxY=max(Region[:,1]) x_start = np.int32((np.round((bounds['x_min'] - bounds['x_min_pad'])) / downsample)) y_start = np.int32((np.round((bounds['y_min'] - bounds['y_min_pad'])) / downsample)) x_stop = np.int32((np.round((bounds['x_max'] - bounds['x_min_pad'])) / downsample)) @@ -472,47 +478,36 @@ def Regions_to_mask(Regions, bounds, IDs, downsample_factor, verbose=1): # get annotation ID for mask color ID = IDs[index] - ''' - if int(ID['annotationID'])==4: - xl=x_stop-x_start - yl=y_stop-y_start - Region2[:,0]=Region2[:,0]-x_start - Region2[:,1]=Region2[:,1]-y_start - for vert in Region2: - if vert[0]<0: - vert[0]=0 - if vert[1]<0: - vert[1]=0 - if vert[0]>xl: - vert[0]=xl - if vert[1]>yl: - vert[1]=yl - - - - mask_temp = np.zeros([int((xl) / downsample),int((yl) / downsample)], dtype=np.int8) - - cv2.fillPoly(mask_temp, [Region2], int(ID['annotationID'])) - - - s=disk(2) - e=binary_erosion(mask_temp,s).astype('uint8') - d=binary_dilation(mask_temp,s).astype('uint8') - tub_divider=np.where((d-e)==1) - - mask_temp=mask_temp.astype('uint8') - mask_temp[tub_divider]=5 - - temp_pull=mask[ y_start:y_stop, x_start:x_stop ] - temp_pull[np.where(mask_temp==4)]=4 - temp_pull[np.where(mask_temp==5)]=1 - mask[ y_start:y_stop, x_start:x_stop ]=temp_pull - else: - ''' if int(ID['annotationID'])==4: #print(np.float(idx)/np.float(len(Regions))) + # subregMinX=min(Region2[:,0]) + # subregMinY=min(Region2[:,1]) + # subregMaxX=max(Region2[:,0]) + # subregMaxY=max(Region2[:,1]) + # reg_id=int(ID['annotationID']) + # submask_temp=np.zeros((regMaxY-regMinY,regMaxX-regMinX)) + # cv2.fillPoly(submask_temp,[Region2], reg_id) + # + # tub_prev=mask[subregMinY:subregMaxY,subregMinX:subregMaxX] + # plt.subplot(221) + # plt.imshow(submask_temp) + # plt.subplot(222) + # plt.imshow(tub_prev) + # + # overlap=np.logical_and(tub_prev==reg_id,binary_dilation(submask_temp== reg_id,diamond(1))) + # plt.subplot(223) + # plt.imshow(overlap) + # tub_prev[submask_temp==reg_id]=reg_id + # plt.subplot(224) + # plt.imshow(tub_prev) + # plt.show() + # if np.sum(overlap)>0: + # tub_prev[overlap]=1 + # + # + # mask[subregMinY:subregMaxY,subregMinX:subregMaxX]=tub_prev #t=time.time() cv2.fillPoly(mask_temp, [Region], int(ID['annotationID'])) @@ -522,16 +517,16 @@ def Regions_to_mask(Regions, bounds, IDs, downsample_factor, verbose=1): y1=np.min(Region[:,0]) y2=np.max(Region[:,0]) #t=time.time() - sub_mask=mask_temp[x1:x2,y1:y2] + rough_submask=mask_temp[x1:x2,y1:y2] #print(time.time()-t) #t=time.time() - e=binary_erosion(sub_mask,strel).astype('uint8') + e=binary_erosion(rough_submask,strel).astype('uint8') #print(time.time()-t) #t=time.time() - #d=binary_dilation(sub_mask,strel).astype('uint8') + #d=binary_dilation(rough_submask,strel).astype('uint8') #print(time.time()-t) #t=time.time() @@ -539,17 +534,17 @@ def Regions_to_mask(Regions, bounds, IDs, downsample_factor, verbose=1): #print(time.time()-t) #t=time.time() - #sub_mask[tub_divider]=1 + #rough_submask[tub_divider]=1 #print(time.time()-t) #t=time.time() tub_prev=mask[x1:x2,y1:y2] tub_prev[e==1]=int(ID['annotationID']) - #sub_mask[tub_divider]=1 + #rough_submask[tub_divider]=1 - #overlap=tub_prev&sub_mask - #sub_mask[overlap]=1 + #overlap=tub_prev&rough_submask + #rough_submask[overlap]=1 mask[x1:x2,y1:y2]=tub_prev #print(time.time()-t) diff --git a/histomicstk/segmentationschool/Codes/xml_to_mask2o.py b/histomicstk/segmentationschool/Codes/xml_to_mask2o.py new file mode 100755 index 0000000..49b4ed0 --- /dev/null +++ b/histomicstk/segmentationschool/Codes/xml_to_mask2o.py @@ -0,0 +1,592 @@ +import numpy as np +import sys, warnings +import lxml.etree as ET +import cv2 +import matplotlib.pyplot as plt +from skimage.morphology import binary_dilation,binary_erosion +from skimage.morphology import disk +from skimage.io import imsave +import time +from matplotlib import path +from getWsi import getWsi +from tqdm import tqdm +def get_num_classes(xml_path): + # parse xml and get root + tree = ET.parse(xml_path) + root = tree.getroot() + annotation_num = 0 + for Annotation in root.findall("./Annotation"): # for all annotations + annotation_num = annotation_num + 1 + + return annotation_num + 1 + +""" +location (tuple) - (x, y) tuple giving the top left pixel in the level 0 reference frame +size (tuple) - (width, height) tuple giving the region size +""" + +def get_supervision_boxes(root,boxlayerIDs): + + boxes=[] + for Annotation in root.findall("./Annotation"): # for all annotations + annotationID = Annotation.attrib['Id'] + if annotationID in boxlayerIDs: + for Region in Annotation.findall("./*/Region"): # iterate on all region + box_bounds=[] + for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region + # get points + x_point = np.int32(np.float64(Vertex.attrib['X'])) + y_point = np.int32(np.float64(Vertex.attrib['Y'])) + box_bounds.append([x_point,y_point]) + boxes.append({'BoxVerts':box_bounds,'annotationID':annotationID}) + return boxes + + +def regions_in_mask_dots(root, bounds,box_layers): + # find regions to save + IDs_reg = [] + IDs_points = [] + + for Annotation in root.findall("./Annotation"): # for all annotations + annotationID = Annotation.attrib['Id'] + if annotationID in box_layers: + continue + annotationType = Annotation.attrib['Type'] + + # print(Annotation.findall(./)) + if annotationType =='9': + for element in Annotation.iter('InputAnnotationId'): + pointAnnotationID=element.text + + for Region in Annotation.findall("./*/Region"): # iterate on all region + + for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region + # get points + x_point = np.int32(np.float64(Vertex.attrib['X'])) + y_point = np.int32(np.float64(Vertex.attrib['Y'])) + # test if points are in bounds + if bounds['x_min'] <= x_point <= bounds['x_max'] and bounds['y_min'] <= y_point <= bounds['y_max']: # test points in region bounds + # save region Id + IDs_points.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID,'pointAnnotationID':pointAnnotationID}) + break + elif annotationType=='4': + + for Region in Annotation.findall("./*/Region"): # iterate on all region + + for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region + # get points + x_point = np.int32(np.float64(Vertex.attrib['X'])) + y_point = np.int32(np.float64(Vertex.attrib['Y'])) + # test if points are in bounds + if bounds['x_min'] <= x_point <= bounds['x_max'] and bounds['y_min'] <= y_point <= bounds['y_max']: # test points in region bounds + # save region Id + IDs_reg.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID}) + break + return IDs_reg,IDs_points + +def get_vertex_points_dots(root, IDs_reg,IDs_points, maskModes,excludedIDs,negativeIDs=None,falsepositiveIDs=None): + Regions = [] + Points = [] + + for ID in IDs_reg: + Vertices = [] + if ID['annotationID'] not in excludedIDs: + for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): + Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) + Regions.append({'Vertices':np.array(Vertices),'annotationID':ID['annotationID']}) + + for ID in IDs_points: + Vertices = [] + for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): + Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) + Points.append({'Vertices':np.array(Vertices),'pointAnnotationID':ID['pointAnnotationID']}) + if 'falsepositive' in maskModes: + assert falsepositiveIDs is not None,'False positive annotated classes must be provided for falsepositive mask mode' + + if 'negative' in maskModes: + assert negativeIDs is not None,'Negatively annotated classes must be provided for negative mask mode' + assert 'falsepositive' and 'negative' not in maskModes, 'Negative AND false positive mask modes is not yet supported' + + useableRegions=[] + if 'positive' in maskModes: + + for Region in Regions: + + regionPath=path.Path(Region['Vertices']) + + for Point in Points: + if 'negative' in maskModes: + if Region['annotationID'] not in negativeIDs: + if regionPath.contains_point(Point['Vertices'][0]): + Region['pointAnnotationID']=Point['pointAnnotationID'] + useableRegions.append(Region) + else: + if regionPath.contains_point(Point['Vertices'][0]): + Region['pointAnnotationID']=Point['pointAnnotationID'] + useableRegions.append(Region) + + if 'negative' in maskModes: + + for Region in Regions: + regionPath=path.Path(Region['Vertices']) + if Region['annotationID'] in negativeIDs: + if not any([regionPath.contains_point(Point['Vertices'][0]) for Point in Points]): + Region['pointAnnotationID']=Region['annotationID'] + useableRegions.append(Region) + if 'falsepositive' in maskModes: + + for Region in Regions: + regionPath=path.Path(Region['Vertices']) + if Region['annotationID'] in falsepositiveIDs: + if not any([regionPath.contains_point(Point['Vertices'][0]) for Point in Points]): + Region['pointAnnotationID']=0 + useableRegions.append(Region) + + return useableRegions + +def masks_from_points(usableRegions,wsiID,dirs,dot_pad,args,dims): + pas_img = getWsi(wsiID) + dim_x, dim_y=pas_img.dimensions + image_sizes=[] + basename=wsiID.split('/')[-1].split('.svs')[0] + max_mask_size=args.training_max_size + stepHR = int(max_mask_size*(1-args.overlap_percentHR)) #Step size before downsampling + + region=np.array(pas_img.read_region((dims[0],dims[2]),0,(dims[1]-dims[0],dims[3]-dims[2])))[:,:,:3] + mask = 2*np.ones([dims[3]-dims[2],dims[1]-dims[0]], dtype=np.uint8) + for usableRegion in usableRegions: + vertices=usableRegion['Vertices'] + # x1=min(vertices[:,0]) + # x2=max(vertices[:,0]) + # y1=min(vertices[:,1]) + # y2=max(vertices[:,1]) + + points = np.stack([np.asarray(vertices[:,0]), np.asarray(vertices[:,1])], axis=1) + + points[:,1] = np.int32(np.round(points[:,1] - dims[2] )) + points[:,0] = np.int32(np.round(points[:,0] - dims[0] )) + + if int(usableRegion['pointAnnotationID'])==0: + pass + else: + cv2.fillPoly(mask, [points], int(usableRegion['pointAnnotationID'])-4) + # plt.subplot(121) + # plt.imshow(region) + # plt.subplot(122) + # plt.imshow(mask) + # plt.show() + l2=dims[3]-dims[2] + l1=dims[1]-dims[0] + if l10 and (y2-y1)>0: + mask = 2*np.ones([y2-y1,x2-x1], dtype=np.uint8) + for subRegion in subIDs: + subvertices=subRegion['Vertices'] + points = np.stack([np.asarray(subvertices[:,0]), np.asarray(subvertices[:,1])], axis=1) + + + + # xMultiplier=np.ceil((l1)/64) + # yMultiplier=np.ceil((l2)/64) + # pad1=int(xMultiplier*64-l1) + # pad2=int(yMultiplier*64-l2) + + points[:,1] = np.int32(np.round(points[:,1] - y1 )) + points[:,0] = np.int32(np.round(points[:,0] - x1 )) + + if int(subRegion['pointAnnotationID'])==0: + pass + else: + cv2.fillPoly(mask, [points], int(subRegion['pointAnnotationID'])-4) + + PAS = pas_img.read_region((x1,y1), 0, (x2-x1,y2-y1)) + PAS = np.array(PAS)[:,:,0:3] + # plt.subplot(121) + # plt.imshow(PAS) + # plt.subplot(122) + # plt.imshow(mask) + # plt.show() + # continue + + if l1>max_mask_size and l2>max_mask_size: + + subIndex_yHR=np.array(range(0,l2,max_mask_size)) + subIndex_xHR=np.array(range(0,l1,max_mask_size)) + subIndex_yHR[-1]=l2-max_mask_size + subIndex_xHR[-1]=l1-max_mask_size + for i in subIndex_xHR: + for j in subIndex_yHR: + subRegion=PAS[j:j+max_mask_size,i:i+max_mask_size,:] + subMask=mask[j:j+max_mask_size,i:i+max_mask_size] + image_identifier=basename+'_'.join(['',str(x1),str(y1),str(l1),str(l2),str(i),str(j)]) + mask_out_name=dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/'+image_identifier+'.png' + image_out_name=mask_out_name.replace('/masks/','/regions/') + image_sizes.append([max_mask_size,max_mask_size]) + plt.subplot(121) + plt.imshow(subRegion) + plt.subplot(122) + plt.imshow(subMask) + plt.show() + continue + # basename + '_' + str(image_identifier) + args.imBoxExt + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + imsave(image_out_name,subRegion) + imsave(mask_out_name,subMask) + + elif l1>max_mask_size: + plt.subplot(121) + plt.imshow(subRegion) + plt.subplot(122) + plt.imshow(subMask) + plt.show() + print('small image') + break + subIndex_xHR=np.array(range(0,l1,max_mask_size)) + subIndex_xHR[-1]=l1-max_mask_size + for i in subIndex_xHR: + subRegion=PAS[:,i:i+max_mask_size,:] + subMask=mask[:,i:i+max_mask_size] + image_identifier=basename+'_'.join(['',str(x1),str(y1),str(l1),str(l2),str(l2),str(i)]) + image_sizes.append([max_mask_size,l2]) + mask_out_name=dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/'+image_identifier+'.png' + image_out_name=mask_out_name.replace('/masks/','/regions/') + # basename + '_' + str(image_identifier) + args.imBoxExt + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + imsave(image_out_name,subRegion) + imsave(mask_out_name,subMask) + + elif l2>max_mask_size: + print('small image') + break + subIndex_yHR=np.array(range(0,l2,max_mask_size)) + subIndex_yHR[-1]=l2-max_mask_size + for j in subIndex_yHR: + subRegion=PAS[j:j+max_mask_size,:,:] + subMask=mask[j:j+max_mask_size,:] + image_identifier=basename+'_'.join(['',str(x1),str(y1),str(l1),str(l2),str(j),str(l1)]) + image_sizes.append([max_mask_size,l1]) + mask_out_name=dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/'+image_identifier+'.png' + image_out_name=mask_out_name.replace('/masks/','/regions/') + # basename + '_' + str(image_identifier) + args.imBoxExt + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + imsave(image_out_name,subRegion) + imsave(mask_out_name,subMask) + + else: + print('small image') + break + # pass + image_identifier=basename+'_'.join(['',str(x1),str(y1),str(l1),str(l2)]) + mask_out_name=dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/'+image_identifier+'.png' + image_out_name=mask_out_name.replace('/masks/','/regions/') + image_sizes.append([l1,l2]) + # basename + '_' + str(image_identifier) + args.imBoxExt + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + imsave(image_out_name,PAS) + imsave(mask_out_name,mask) + + # exit() + # extract image region + # plt.subplot(121) + # plt.imshow(PAS) + # plt.subplot(122) + # plt.imshow(mask) + # plt.show() + image_sizes.append([l1,l2]) + + else: + print('Broken region') + ''' + # return image_sizes + + +#------------------------------------------------------------------------------------------------------------------------------------------------------ + + +def xml_to_mask(xml_path, location, size,ignore_id=None, downsample_factor=1, verbose=0): + # parse xml and get root + tree = ET.parse(xml_path) + root = tree.getroot() + + # calculate region bounds + bounds = {'x_min' : location[0], 'y_min' : location[1], 'x_max' : location[0] + size[0], 'y_max' : location[1] + size[1]} + if ignore_id is not None: + IDs = regions_in_mask(root=root, bounds=bounds, verbose=verbose,ignore_id=ignore_id) + else: + IDs = regions_in_mask(root=root, bounds=bounds, verbose=verbose,ignore_id=['20000']) + + if verbose != 0: + print('\nFOUND: ' + str(len(IDs)) + ' regions') + + # find regions in bounds + Regions = get_vertex_points(root=root, IDs=IDs, verbose=verbose) + + # fill regions and create mask + mask = Regions_to_mask(Regions=Regions, bounds=bounds, IDs=IDs, downsample_factor=downsample_factor, verbose=verbose) + if verbose != 0: + print('done...\n') + + return mask + +def restart_line(): # for printing labels in command line + sys.stdout.write('\r') + sys.stdout.flush() + +def regions_in_mask(root, bounds,ignore_id, verbose=1): + # find regions to save + IDs = [] + + for Annotation in root.findall("./Annotation"): # for all annotations + annotationID = Annotation.attrib['Id'] + if annotationID in ignore_id: + pass + else: + for Region in Annotation.findall("./*/Region"): # iterate on all region + + if verbose != 0: + sys.stdout.write('TESTING: ' + 'Annotation: ' + annotationID + '\tRegion: ' + Region.attrib['Id']) + sys.stdout.flush() + restart_line() + + for Vertex in Region.findall("./*/Vertex"): # iterate on all vertex in region + # get points + x_point = np.int32(np.float64(Vertex.attrib['X'])) + y_point = np.int32(np.float64(Vertex.attrib['Y'])) + # test if points are in bounds + if bounds['x_min'] <= x_point <= bounds['x_max'] and bounds['y_min'] <= y_point <= bounds['y_max']: # test points in region bounds + # save region Id + IDs.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID}) + break + return IDs + +def get_vertex_points(root, IDs, verbose=1): + Regions = [] + + for ID in IDs: # for all IDs + if verbose != 0: + sys.stdout.write('PARSING: ' + 'Annotation: ' + ID['annotationID'] + '\tRegion: ' + ID['regionID']) + sys.stdout.flush() + restart_line() + + # get all vertex attributes (points) + Vertices = [] + + for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): + # make array of points + Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) + + + Regions.append(np.array(Vertices)) + + return Regions + +def Regions_to_mask(Regions, bounds, IDs, downsample_factor, verbose=1): + downsample = int(np.round(downsample_factor**(.5))) + strel=disk(3) + if verbose !=0: + print('\nMAKING MASK:') + + if len(Regions) != 0: # regions present + # get min/max sizes + min_sizes = np.empty(shape=[2,0], dtype=np.int32) + max_sizes = np.empty(shape=[2,0], dtype=np.int32) + for Region in Regions: # fill all regions + min_bounds = np.reshape((np.amin(Region, axis=0)), (2,1)) + max_bounds = np.reshape((np.amax(Region, axis=0)), (2,1)) + min_sizes = np.append(min_sizes, min_bounds, axis=1) + max_sizes = np.append(max_sizes, max_bounds, axis=1) + min_size = np.amin(min_sizes, axis=1) + max_size = np.amax(max_sizes, axis=1) + + + + # add to old bounds + bounds['x_min_pad'] = min(min_size[1], bounds['x_min']) + bounds['y_min_pad'] = min(min_size[0], bounds['y_min']) + bounds['x_max_pad'] = max(max_size[1], bounds['x_max']) + bounds['y_max_pad'] = max(max_size[0], bounds['y_max']) + + # make blank mask + mask = np.zeros([ int(np.round((bounds['y_max_pad'] - bounds['y_min_pad']) / downsample)), int(np.round((bounds['x_max_pad'] - bounds['x_min_pad']) / downsample)) ], dtype=np.uint8) + mask_temp = np.zeros([ int(np.round((bounds['y_max_pad'] - bounds['y_min_pad']) / downsample)), int(np.round((bounds['x_max_pad'] - bounds['x_min_pad']) / downsample)) ], dtype=np.uint8) + + # fill mask polygons + index = 0 + for idx,Region in enumerate(Regions): + + # reformat Regions + Region2=Region + Region[:,1] = np.int32(np.round((Region[:,1] - bounds['y_min_pad']) / downsample)) + Region[:,0] = np.int32(np.round((Region[:,0] - bounds['x_min_pad']) / downsample)) + + x_start = np.int32((np.round((bounds['x_min'] - bounds['x_min_pad'])) / downsample)) + y_start = np.int32((np.round((bounds['y_min'] - bounds['y_min_pad'])) / downsample)) + x_stop = np.int32((np.round((bounds['x_max'] - bounds['x_min_pad'])) / downsample)) + y_stop = np.int32((np.round((bounds['y_max'] - bounds['y_min_pad'])) / downsample)) + + # get annotation ID for mask color + ID = IDs[index] + ''' + if int(ID['annotationID'])==4: + xl=x_stop-x_start + yl=y_stop-y_start + Region2[:,0]=Region2[:,0]-x_start + Region2[:,1]=Region2[:,1]-y_start + for vert in Region2: + if vert[0]<0: + vert[0]=0 + if vert[1]<0: + vert[1]=0 + if vert[0]>xl: + vert[0]=xl + if vert[1]>yl: + vert[1]=yl + + + + mask_temp = np.zeros([int((xl) / downsample),int((yl) / downsample)], dtype=np.int8) + + cv2.fillPoly(mask_temp, [Region2], int(ID['annotationID'])) + + + s=disk(2) + e=binary_erosion(mask_temp,s).astype('uint8') + d=binary_dilation(mask_temp,s).astype('uint8') + tub_divider=np.where((d-e)==1) + + mask_temp=mask_temp.astype('uint8') + mask_temp[tub_divider]=5 + + temp_pull=mask[ y_start:y_stop, x_start:x_stop ] + temp_pull[np.where(mask_temp==4)]=4 + temp_pull[np.where(mask_temp==5)]=1 + mask[ y_start:y_stop, x_start:x_stop ]=temp_pull + else: + ''' + + + if int(ID['annotationID'])==4: + #print(np.float(idx)/np.float(len(Regions))) + + #t=time.time() + cv2.fillPoly(mask_temp, [Region], int(ID['annotationID'])) + #print(time.time()-t) + x1=np.min(Region[:,1]) + x2=np.max(Region[:,1]) + y1=np.min(Region[:,0]) + y2=np.max(Region[:,0]) + #t=time.time() + rough_mask=mask_temp[x1:x2,y1:y2] + #print(time.time()-t) + + + #t=time.time() + e=binary_erosion(rough_mask,strel).astype('uint8') + #print(time.time()-t) + + #t=time.time() + #d=binary_dilation(rough_mask,strel).astype('uint8') + #print(time.time()-t) + + #t=time.time() + #tub_divider=np.where((d-e)==1) + #print(time.time()-t) + + #t=time.time() + #rough_mask[tub_divider]=1 + + #print(time.time()-t) + + #t=time.time() + tub_prev=mask[x1:x2,y1:y2] + # tub_prev[e==1]=int(ID['annotationID']) + #rough_mask[tub_divider]=1 + # plt.subplot(221) + # plt.imshow(tub_prev) + + overlap=tub_prev&rough_mask + # plt.subplot(222) + # plt.imshow(overlap) + # plt.subplot(223) + # plt.imshow(e) + + tub_prev[e==1]=int(ID['annotationID']) + #rough_mask[overlap]=1 + tub_prev[overlap==4]=1 + # plt.subplot(224) + # plt.imshow(tub_prev) + # plt.show() + mask[x1:x2,y1:y2]=tub_prev + cv2.fillPoly(mask_temp, [Region], 0) + #print(time.time()-t) + else: + cv2.fillPoly(mask, [Region], int(ID['annotationID'])) + index = index + 1 + + # reshape mask + + # pull center mask region + mask = mask[ y_start:y_stop, x_start:x_stop ] + #plt.imshow(mask*50) + #plt.show() + ''' + msub=np.zeros((y_stop-y_start,x_stop-x_start)) + msub2=msub + msub[np.where(mask==4)]=1 + s=disk(3) + msub=binary_dilation(msub,selem=s)-msub2 + mask[np.where(msub==1)]=5 + ''' + else: # no Regions + mask = np.zeros([ int(np.round((bounds['y_max'] - bounds['y_min']) / downsample)), int(np.round((bounds['x_max'] - bounds['x_min']) / downsample)) ]) + + return mask diff --git a/histomicstk/segmentationschool/Codes/xml_to_mask_minmax.py b/histomicstk/segmentationschool/Codes/xml_to_mask_minmax.py old mode 100644 new mode 100755 index 87b3394..2e9653d --- a/histomicstk/segmentationschool/Codes/xml_to_mask_minmax.py +++ b/histomicstk/segmentationschool/Codes/xml_to_mask_minmax.py @@ -1,10 +1,10 @@ import numpy as np -# import sys +import sys import lxml.etree as ET import cv2 import time import os -from skimage.morphology import binary_erosion#,binary_dilation, +from skimage.morphology import binary_dilation,binary_erosion from skimage.morphology import disk """ location (tuple) - (x, y) tuple giving the top left pixel in the level 0 reference frame @@ -37,49 +37,6 @@ def get_annotated_ROIs(xml_path,location,size,ROI_layer,downsample=1,tree=None): verts.append([int(float(Vert.attrib['X'])),int(float(Vert.attrib['Y']))]) IDs.append({'regionVerts' : verts}) return IDs -def get_annotated_ROIs_coords_withdots(xml_path,location,size,ROI_layer,downsample=1,tree=None): - # parse xml and get root - IDs = [] - if tree == None: tree = ET.parse(xml_path) - root = tree.getroot() - - bounds = {'x_min' : location[0], 'y_min' : location[1], 'x_max' : location[0] + size[0]*downsample, 'y_max' : location[1] + size[1]*downsample} - annotationData={} - annotationTypes={} - linkIDs={} - for Annotation in root.findall("./Annotation"): # for all annotations - annotationID=Annotation.attrib['Id'] - annotationData[annotationID]=[] - annotationTypes[annotationID]=Annotation.attrib['Type'] - - if Annotation.attrib['Type']=='9': - for element in Annotation.iter('InputAnnotationId'): - linkIDs[annotationID]=element.text - else: - linkIDs[annotationID]=annotationID - - for Region in Annotation.findall("./*/Region"): # iterate on all region - verts=[] - - for Vert in Region.findall("./Vertices/Vertex"): # iterate on all vertex in region - vX=int(float(Vert.attrib['X'])) - vY=int(float(Vert.attrib['Y'])) - verts.append([vX,vY]) - verts=np.array(verts) - vXMax=np.max(verts[:,0]) - vXMin=np.min(verts[:,0]) - vYMax=np.max(verts[:,1]) - vYMin=np.min(verts[:,1]) - if Annotation.attrib['Type']=='9': - if bounds['x_min'] <= verts[0][0] and bounds['x_max'] >= verts[0][0] and bounds['y_min'] <= verts[0][1] and bounds['y_max'] >= verts[0][1]: - annotationData[annotationID].append(verts) - else: - if bounds['x_min'] <= vXMax and bounds['x_max'] >= vXMin and bounds['y_min'] <= vYMax and bounds['y_max'] >= vYMin: - annotationData[annotationID].append(verts) - - # IDs.append({'regionVerts' : verts,'type': Region.attrib['Type']}) - return annotationData,annotationTypes,linkIDs - def xml_to_mask(xml_path, location, size,ignore_id=None, tree=None, downsample=1, verbose=0): @@ -195,11 +152,17 @@ def Regions_to_mask(Regions, bounds, IDs, downsample, verbose=1): # reformat Regions Region[:,1] = np.int32(np.round((Region[:,1] - bounds['y_min_pad']) / downsample)) Region[:,0] = np.int32(np.round((Region[:,0] - bounds['x_min_pad']) / downsample)) + if np.min(Region[:,0])<0: + # print(Region[:,0]) + Region[:,0]=np.clip(Region[:,0],a_min=0,a_max=None) + if np.min(Region[:,1])<0: + # print(Region[:,1]) + Region[:,1]=np.clip(Region[:,1],a_min=0,a_max=None) # get annotation ID for mask color ID = IDs[index] - if int(ID['annotationID'])==4: + if int(ID['annotationID'])==['3','4','5','6']: cv2.fillPoly(mask_temp, [Region], int(ID['annotationID'])) x1=np.min(Region[:,1]) x2=np.max(Region[:,1]) @@ -210,7 +173,7 @@ def Regions_to_mask(Regions, bounds, IDs, downsample, verbose=1): tub_prev=mask[x1:x2,y1:y2] overlap=tub_prev&rough_mask tub_prev[e==1]=int(ID['annotationID']) - tub_prev[overlap==4]=1 + tub_prev[overlap==int(ID['annotationID'])]=1 mask[x1:x2,y1:y2]=tub_prev cv2.fillPoly(mask_temp, [Region], 0) else: diff --git a/histomicstk/segmentationschool/segmentation_school.py b/histomicstk/segmentationschool/segmentation_school.py index 2c2e29b..701ded1 100644 --- a/histomicstk/segmentationschool/segmentation_school.py +++ b/histomicstk/segmentationschool/segmentation_school.py @@ -4,7 +4,7 @@ import numpy as np import time -sys.path.append('..') +sys.path.append(os.getcwd()+'/Codes') """ @@ -28,12 +28,6 @@ """ -# def get_girder_client(args): -# gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) -# gc.setToken(args.girderToken) - -# return gc - def str2bool(v): if isinstance(v, bool): return v @@ -44,27 +38,25 @@ def str2bool(v): else: raise argparse.ArgumentTypeError('Boolean value expected.') - def main(args): - from segmentationschool.Codes.InitializeFolderStructure import initFolder, purge_training_set, prune_training_set - # from extract_reference_features import getKidneyReferenceFeatures,summarizeKidneyReferenceFeatures - # from TransformXMLs import splice_cortex_XMLs,register_aperio_scn_xmls - # from randomCropGenerator import randomCropGenerator + from InitializeFolderStructure import initFolder, purge_training_set, prune_training_set + from TransformXMLs import transform_XMLs + # from extract_reference_features import extractKidneyReferenceFeatures if args.one_network == True: - from segmentationschool.Codes.IterativeTraining_1X import IterateTraining - from segmentationschool.Codes.IterativePredict_1X import predict + from IterativeTraining_1X_chopless_test import IterateTraining + from IterativePredict_1X import predict, validate else: - from segmentationschool.Codes.evolve_predictions import evolve - from segmentationschool.Codes.IterativeTraining import IterateTraining - from segmentationschool.Codes.IterativePredict import predict + from evolve_predictions import evolve + from IterativeTraining import IterateTraining + from IterativePredict import predict, validate # for teaching young segmentations networks starttime = time.time() - # if args.project == ' ': - # print('Please specify the project name: \n\t--project [folder]') + if args.project == ' ': + print('Please specify the project name: \n\t--project [folder]') - if args.option in ['new', 'New']: + elif args.option in ['new', 'New']: initFolder(args=args) savetime(args=args, starttime=starttime) elif args.option in ['train', 'Train']: @@ -73,44 +65,33 @@ def main(args): elif args.option in ['predict', 'Predict']: predict(args=args) savetime(args=args, starttime=starttime) - + elif args.option in ['validate', 'Validate']: + validate(args=args) elif args.option in ['evolve', 'Evolve']: evolve(args=args) elif args.option in ['purge', 'Purge']: purge_training_set(args=args) elif args.option in ['prune', 'Prune']: prune_training_set(args=args) - elif args.option in ['get_features', 'Get_features']: - getKidneyReferenceFeatures(args=args) - elif args.option in ['summarize_features', 'Summarize_features']: - summarizeKidneyReferenceFeatures(args=args) - elif args.option in ['splice_cortex', 'Splice_cortex']: - splice_cortex_XMLs(args=args) - elif args.option in ['register_aperio_scn_xmls', 'Register_aperio_scn_xmls']: - register_aperio_scn_xmls(args=args) - elif args.option in ['get_thumbnails', 'Get_thumbnails']: - from wsi_loader_utils import get_image_thumbnails - get_image_thumbnails(args) - elif args.option in ['random_patch_crop', 'random_patch_crop']: - randomCropGenerator(args=args) + elif args.option in ['transform_xmls', 'Transform_xmls']: + transform_XMLs(args=args) + elif args.option in ['extract_features', 'Extract_features']: + extractKidneyReferenceFeatures(args=args) else: - print('please specify an option in: \n\t--option [new, train, predict, validate, evolve, purge, prune, get_features, splice_cortex, register_aperio_scn_xmls]') + print('please specify an option in: \n\t--option [new, train, predict, validate]') def savetime(args, starttime): if args.option in ['new', 'New']: - print('new') - # with open(args.runtime_file, 'w') as timefile: - # timefile.write('option' +'\t'+ 'time' +'\t'+ 'epochs_LR' +'\t'+ 'epochs_HR' +'\t'+ 'aug_LR' +'\t'+ 'aug_HR' +'\t'+ 'overlap_percentLR' +'\t'+ 'overlap_percentHR') + with open(args.base_dir + '/' + args.project + '/runtime.txt', 'w') as timefile: + timefile.write('option' +'\t'+ 'time' +'\t'+ 'epochs_LR' +'\t'+ 'epochs_HR' +'\t'+ 'aug_LR' +'\t'+ 'aug_HR' +'\t'+ 'overlap_percentLR' +'\t'+ 'overlap_percentHR') if args.option in ['train', 'Train']: - print('not much') - # with open(args.runtime_file, 'a') as timefile: - # timefile.write('\n' + args.option +'\t'+ str(time.time()-starttime) +'\t'+ str(args.epoch_LR) +'\t'+ str(args.epoch_HR) +'\t'+ str(args.aug_LR) +'\t'+ str(args.aug_HR) +'\t'+ str(args.overlap_percentLR) +'\t'+ str(args.overlap_percentHR)) + with open(args.base_dir + '/' + args.project + '/runtime.txt', 'a') as timefile: + timefile.write('\n' + args.option +'\t'+ str(time.time()-starttime) +'\t'+ str(args.epoch_LR) +'\t'+ str(args.epoch_HR) +'\t'+ str(args.aug_LR) +'\t'+ str(args.aug_HR) +'\t'+ str(args.overlap_percentLR) +'\t'+ str(args.overlap_percentHR)) if args.option in ['predict', 'Predict']: - print('predict') - # with open(args.runtime_file, 'a') as timefile: - # timefile.write('\n' + args.option +'\t'+ str(time.time()-starttime)) + with open(args.base_dir + '/' + args.project + '/runtime.txt', 'a') as timefile: + timefile.write('\n' + args.option +'\t'+ str(time.time()-starttime)) if __name__ == '__main__': @@ -118,12 +99,8 @@ def savetime(args, starttime): ##### Main params (MANDITORY) ############################################## # School subject - parser.add_argument('--girderApiUrl', dest='girderApiUrl', default=' ' ,type=str, - help='girderApiUrl') - parser.add_argument('--girderToken', dest='girderToken', default=' ' ,type=str, - help='girderToken') - parser.add_argument('--files', dest='files', default=' ' ,type=str, - help='files') + parser.add_argument('--project', dest='project', default=' ' ,type=str, + help='Starting directory to contain training project') # option parser.add_argument('--option', dest='option', default=' ' ,type=str, help='option for [new, train, predict, validate]') @@ -131,46 +108,12 @@ def savetime(args, starttime): help='name of project for transfer learning [pulls the newest model]') parser.add_argument('--one_network', dest='one_network', default=True ,type=bool, help='use only high resolution network for training/prediction/validation') - parser.add_argument('--target', dest='target', default=None,type=str, - help='directory with xml transformation targets') - parser.add_argument('--cortextarget', dest='cortextarget', default=None,type=str, - help='directory with cortex annotations for splicing') - parser.add_argument('--output', dest='output', default=None,type=str, - help='directory to save output transformed XMLs') - parser.add_argument('--wsis', dest='wsis', default=None,type=str, - help='directory of WSIs for reference feature extraction') - parser.add_argument('--groupBy', dest='groupBy', default=None,type=str, - help='Name for histomicsUI converted annotation group') - parser.add_argument('--patientData', dest='patientData', default=None,type=str, - help='Location of excel file containing clinical data on patients') - parser.add_argument('--labelColumns', dest='labelColumns', default=None,type=str, - help='Column in excel file to use as label') - parser.add_argument('--labelModality', dest='labelModality', default=None,type=str, - help='Column in excel file to use as label') - parser.add_argument('--IDColumn', dest='IDColumn', default='Label_slides',type=str, - help='Excel column with file name links') - parser.add_argument('--plotFill', dest='plotFill', default=True,type=str2bool, - help='Excel column with file name links') - parser.add_argument('--scatterFeatures', dest='scatterFeatures', default='5,6',type=str, - help='Excel column with file name links') - parser.add_argument('--anchor', dest='anchor', default='Age',type=str, - help='Biometric link data for scatterplot') - parser.add_argument('--exceloutfile', dest='exceloutfile', default=None,type=str, - help='Name of output excel file for feature aggregation') - - -# args.huelabel,args.rowlabel,args.binRows - parser.add_argument('--SummaryOption', dest='SummaryOption', default=None,type=str, - help='What type of feature summary to generate, options:\n'+ - 'BLDensity,ULDensity,UDensity,BDensity,standardScatter,anchorScatter') # automatically generated parser.add_argument('--base_dir', dest='base_dir', default=os.getcwd(),type=str, help='base directory of code folder') - - parser.add_argument('--code_dir', dest='code_dir', default=os.getcwd(),type=str, - help='base directory of code folder') - + parser.add_argument('--target', dest='target', default=None,type=str, + help='directory to transform xmls') ##### Args for training / prediction #################################################### parser.add_argument('--gpu_num', dest='gpu_num', default=2 ,type=int, @@ -195,7 +138,7 @@ def savetime(args, starttime): parser.add_argument('--white_percent', dest='white_percent', default=0.01 ,type=float, help='white level checkpoint for chopping') parser.add_argument('--chop_thumbnail_resolution', dest='chop_thumbnail_resolution', default=16,type=int, - help='downsample mask to find usable regions') + help='Amount of downsampling in each dimension to determine usable tissue regions') #Low resolution parameters parser.add_argument('--overlap_percentLR', dest='overlap_percentLR', default=0.5 ,type=float, help='overlap percentage of low resolution blocks [0-1]') @@ -204,16 +147,24 @@ def savetime(args, starttime): parser.add_argument('--downsampleRateLR', dest='downsampleRateLR', default=16 ,type=int, help='reduce image resolution to 1/downsample rate') #High resolution parameters - parser.add_argument('--overlap_percentHR', dest='overlap_percentHR', default=0 ,type=float, + parser.add_argument('--overlap_rate', dest='overlap_rate', default=0.5 ,type=float, help='overlap percentage of high resolution blocks [0-1]') - parser.add_argument('--boxSize', dest='boxSize', default=2048 ,type=int, + parser.add_argument('--boxSize', dest='boxSize', default=1200 ,type=int, help='size of high resolution blocks') - parser.add_argument('--downsampleRateHR', dest='downsampleRateHR', default=1 ,type=int, + parser.add_argument('--downsampleRate', dest='downsampleRate', default=1 ,type=int, help='reduce image resolution to 1/downsample rate') parser.add_argument('--training_max_size', dest='training_max_size', default=512 ,type=int, help='padded region for low resolution region extraction') - parser.add_argument('--Mag20X', dest='Mag20X', default=False,type=str2bool, - help='Perform prediction for 20X (true) slides rather than 40X (false)') + parser.add_argument('--box_supervision', dest='box_supervision', default=True,type=str2bool, + help='Use rectangle annotations to confine chopping') + parser.add_argument('--chop_with_replacement', dest='chop_with_replacement', default=False,type=str2bool, + help='make ultimate contour class ID equal to dot-based ID') + parser.add_argument('--standard_chop', dest='standard_chop', default=True,type=str2bool, + help='use contour class ID as defined by the region itself') + parser.add_argument('--get_new_tissue_masks', dest='get_new_tissue_masks', default=False,type=str2bool, + help="Don't load usable tisse regions from disk, create new ones") + parser.add_argument('--balanceClasses', dest='balanceClasses', default='3,4,6',type=str, + help="which classes to balance during training") ### Params for augmenting data ### #High resolution @@ -233,8 +184,11 @@ def savetime(args, starttime): parser.add_argument('--CNNbatch_sizeLR', dest='CNNbatch_sizeLR', default=2 ,type=int, help='Size of batches for training low resolution CNN') #High resolution hyperparameters - parser.add_argument('--CNNbatch_sizeHR', dest='CNNbatch_sizeHR', default=2 ,type=int, + parser.add_argument('--batch_size', dest='batch_size', default=3 ,type=int, help='Size of batches for training high resolution CNN') + parser.add_argument('--train_steps', dest='train_steps', default=300000 ,type=int, + help='Size of batches for training high resolution CNN') + #Hyperparameters #Hyperparameters parser.add_argument('--epoch_LR', dest='epoch_LR', default=1 ,type=int, help='training epochs for low resolution network') @@ -246,17 +200,24 @@ def savetime(args, starttime): type=float, help='High rez learning rate') parser.add_argument('--learning_rate_LR', dest='learning_rate_LR', default=2.5e-4, type=float, help='Low rez learning rate') - parser.add_argument('--chop_data', dest='chop_data', default='false', - type=str, help='chop and augment new data before training') + parser.add_argument('--crop_detectron_trainset', dest='crop_detectron_trainset', default=False,type=str2bool, help='chop dot based images to this max size') parser.add_argument('--predict_data', dest='predict_data', default=True,type=str2bool, help='chop dot based images to this max size') - parser.add_argument('--roi_thresh', dest='roi_thresh', default=0.01,type=float, + parser.add_argument('--roi_thresh', dest='roi_thresh', default=0.7,type=float, + help='chop dot based images to this max size') + parser.add_argument('--prepare_detectron_json', dest='prepare_detectron_json', default=True,type=str2bool, help='chop dot based images to this max size') + parser.add_argument('--custom_image_means', dest='custom_image_means', default=False,type=str2bool, + help='measure image mean for network training') + parser.add_argument('--check_training_data', dest='check_training_data', default=False,type=str2bool, + help='check images visually before training') + parser.add_argument('--hsv_aug_prob', dest='hsv_aug_prob', default=0.1,type=float, + help='if rand (0,1) > hsv_aug_prob, apply aug to this image') ### Params for saving results ### - parser.add_argument('--outDir', dest='outDir', default='Predictions' ,type=str, + parser.add_argument('--outDir', dest='outDir', default='/Predictions/' ,type=str, help='output directory') parser.add_argument('--save_outputs', dest='save_outputs', default=False ,type=bool, help='save outputs from chopping etc. [final image masks]') @@ -273,15 +234,12 @@ def savetime(args, starttime): ### Params for optimizing wsi mask cleanup ### - parser.add_argument('--min_size', dest='min_size', default=[30,30,24000,24000,10,10] ,type=int, + parser.add_argument('--min_size', dest='min_size', default=[0,30,30,30,30] ,type=int, help='min size region to be considered after prepass [in pixels]') - parser.add_argument('--bordercrop', dest='bordercrop', default=300 ,type=int, + parser.add_argument('--bordercrop', dest='bordercrop', default=200 ,type=int, help='min size region to be considered after prepass [in pixels]') parser.add_argument('--LR_region_pad', dest='LR_region_pad', default=50 ,type=int, help='padded region for low resolution region extraction') - parser.add_argument('--show_interstitium', dest='show_interstitium', default=True ,type=str2bool, - help='padded region for low resolution region extraction') -