From f8477bdac6dd432fc65d226e6061d5e1ea32fa68 Mon Sep 17 00:00:00 2001 From: sayatmimar Date: Mon, 1 Apr 2024 17:44:23 -0400 Subject: [PATCH 1/8] adding reference_feature code and dependencies --- Dockerfile | 2 +- .../ReferenceFeatureExtraction.py | 119 +++ .../ReferenceFeatureExtraction.xml | 47 + multic/cli/slicer_cli_list.json | 3 + .../Codes/extract_reference_features.py | 908 +++++++++--------- .../segmentationschool/segmentation_school.py | 5 +- .../segmentationschool/utils/mask_to_xml.py | 135 +++ .../segmentationschool/utils/xml_to_mask.py | 219 +++++ setup.py | 1 + 9 files changed, 979 insertions(+), 460 deletions(-) create mode 100644 multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py create mode 100644 multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml create mode 100644 multic/segmentationschool/utils/mask_to_xml.py create mode 100644 multic/segmentationschool/utils/xml_to_mask.py diff --git a/Dockerfile b/Dockerfile index 4ca5afb..1fa3932 100644 --- a/Dockerfile +++ b/Dockerfile @@ -134,6 +134,6 @@ WORKDIR $mc_path/multic/cli RUN python -m slicer_cli_web.cli_list_entrypoint --list_cli RUN python -m slicer_cli_web.cli_list_entrypoint MultiCompartmentSegment --help RUN python -m slicer_cli_web.cli_list_entrypoint FeatureExtraction --help - +RUN python -m slicer_cli_web.cli_list_entrypoint ReferenceFeatureExtraction --help ENTRYPOINT ["/bin/bash", "docker-entrypoint.sh"] diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py new file mode 100644 index 0000000..ef243d4 --- /dev/null +++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py @@ -0,0 +1,119 @@ +import os +import sys +from glob import glob +import girder_client +from ctk_cli import CLIArgumentParser + +sys.path.append("..") +from segmentationschool.utils.mask_to_xml import xml_create, xml_add_annotation, xml_add_region, xml_save +from segmentationschool.utils.xml_to_mask import write_minmax_to_xml + +NAMES = ['cortical_interstitium','medullary_interstitium','non_globally_sclerotic_glomeruli','globally_sclerotic_glomeruli','tubules','arteries/arterioles'] + + +def main(args): + + folder = args.base_dir + base_dir_id = folder.split('/')[-2] + _ = os.system("printf '\nUsing data from girder_client Folder: {}\n'".format(folder)) + + gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) + gc.setToken(args.girderToken) + # get files in folder + files = gc.listItem(base_dir_id) + xml_color=[65280]*(len(NAMES)+1) + cwd = os.getcwd() + print(cwd) + os.chdir(cwd) + + tmp = folder + + slides_used = [] + + for file in files: + slidename = file['name'] + _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(slidename)) + skipSlide = 0 + + # get annotation + item = gc.getItem(file['_id']) + annot = gc.get('/annotation/item/{}'.format(item['_id']), parameters={'sort': 'updated'}) + annot.reverse() + annot = list(annot) + _ = os.system("printf '\tfound [{}] annotation layers...\n'".format(len(annot))) + + # create root for xml file + xmlAnnot = xml_create() + + # all compartments + for class_,compart in enumerate(NAMES): + + compart = compart.replace(' ','') + class_ +=1 + # add layer to xml + xmlAnnot = xml_add_annotation(Annotations=xmlAnnot, xml_color=xml_color, annotationID=class_) + + # test all annotation layers in order created + for iter,a in enumerate(annot): + + + try: + # check for annotation layer by name + a_name = a['annotation']['name'].replace(' ','') + except: + a_name = None + + if a_name == compart: + # track all layers present + skipSlide +=1 + + pointsList = [] + + # load json data + _ = os.system("printf '\tloading annotation layer: [{}]\n'".format(compart)) + + a_data = a['annotation']['elements'] + + for data in a_data: + pointList = [] + points = data['points'] + for point in points: + pt_dict = {'X': round(point[0]), 'Y': round(point[1])} + pointList.append(pt_dict) + pointsList.append(pointList) + + # write annotations to xml + for i in range(len(pointsList)): + pointList = pointsList[i] + xmlAnnot = xml_add_region(Annotations=xmlAnnot, pointList=pointList, annotationID=class_) + + # print(a['_version'], a['updated'], a['created']) + break + + if skipSlide != len(NAMES): + _ = os.system("printf '\tThis slide is missing annotation layers\n'") + _ = os.system("printf '\tSKIPPING SLIDE...\n'") + del xmlAnnot + continue # correct layers not present + _ = os.system("printf '\tFETCHING SLIDE...\n'") + os.rename('{}/{}'.format(folder, slidename), '{}/{}'.format(tmp, slidename)) + slides_used.append(slidename) + + xml_path = '{}/{}.xml'.format(tmp, os.path.splitext(slidename)[0]) + _ = os.system("printf '\tsaving a created xml annotation file: [{}]\n'".format(xml_path)) + xml_save(Annotations=xmlAnnot, filename=xml_path) + write_minmax_to_xml(xml_path) # to avoid trying to write to the xml from multiple workers + del xmlAnnot + os.system("ls -lh '{}'".format(tmp)) + + _ = os.system("printf '\ndone retriving data...\n\n'") + + + + cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --base_dir {} --files {} --xml_path {} --girderApiUrl {} --girderToken {}".format('get_features', args.base_dir, args.input_file, xml_path, args.girderApiUrl, args.girderToken) + print(cmd) + sys.stdout.flush() + os.system(cmd) + +if __name__ == "__main__": + main(CLIArgumentParser().parse_args()) diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml new file mode 100644 index 0000000..30e0924 --- /dev/null +++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml @@ -0,0 +1,47 @@ + + + HistomicsTK + Multi Compartment Training + Trains Multi compartment segmentation model + 0.1.0 + https://github.com/SarderLab/Multi-Compartment-Segmentation + Apache 2.0 + Sayat Mimar (UFL) + This work is part of efforts in digital pathology by the Sarder Lab: UFL. + + + Input/output parameters + + input_file + + input file + input + 0 + + + base_dir + + Base Directory for the model + input + 1 + + + + + A Girder API URL and token for Girder client + + girderApiUrl + api-url + + A Girder API URL (e.g., https://girder.example.com:443/api/v1) + + + + girderToken + token + + A Girder token + + + + \ No newline at end of file diff --git a/multic/cli/slicer_cli_list.json b/multic/cli/slicer_cli_list.json index 1189b00..24a3824 100644 --- a/multic/cli/slicer_cli_list.json +++ b/multic/cli/slicer_cli_list.json @@ -4,5 +4,8 @@ }, "FeatureExtraction": { "type" : "python" + }, + "ReferenceFeatureExtraction": { + "type" : "python" } } diff --git a/multic/segmentationschool/Codes/extract_reference_features.py b/multic/segmentationschool/Codes/extract_reference_features.py index fb9cdb5..2ca6eb5 100644 --- a/multic/segmentationschool/Codes/extract_reference_features.py +++ b/multic/segmentationschool/Codes/extract_reference_features.py @@ -1,57 +1,61 @@ -import os, sys, cv2, time +import os, cv2 import numpy as np -import matplotlib.pyplot as plt + import lxml.etree as ET +import girder_client from matplotlib import path -# import matplotlib.patches as patches -import glob -from .xml_to_mask_minmax import write_minmax_to_xml,get_annotated_ROIs,xml_to_mask +from skimage.color import rgb2lab,rgb2hsv + +from .xml_to_mask_minmax import write_minmax_to_xml import xlsxwriter import multiprocessing from scipy.ndimage.morphology import distance_transform_edt from scipy.ndimage import binary_fill_holes -# from skimage.transform import resize -# from skimage.util import img_as_ubyte + from tqdm import tqdm import time -import openslide +from tiffslide import TiffSlide from joblib import Parallel, delayed -from skimage.color import rgb2lab,rgb2hsv -# from skimage.io import imsave -# from skimage.morphology import remove_small_objects,binary_erosion,binary_dilation,disk,binary_opening -#from skimage.filters.rank import entropy -# from scipy.ndimage.filters import generic_filter +from skimage.color import rgb2hsv + from skimage.filters import * -#Glom density, Tubule density, -#Interstitial capillary density, percentage glomerulosclerosis, -# # luminal diameter over vessel diameter -# from wsi_loader_utils import get_choppable_regions -# from PIL import Image -from matplotlib import cm -# from matplotlib.colors import ListedColormap, LinearSegmentedColormap -# from scipy.stats import zscore -import pandas as pd -import seaborn as sns -# from PAS_deconvolution import deconvolution -# import warnings def getKidneyReferenceFeatures(args): - assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml' - assert args.wsis is not None, 'Directory of WSIs must be specified, use --wsis /path/to/wsis' + # assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml' + # assert args.wsis is not None, 'Directory of WSIs must be specified, use --wsis /path/to/wsis' + + gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) + gc.setToken(args.girderToken) + + folder = args.base_dir + girder_folder_id = folder.split('/')[-2] + file_name = args.files.split('/')[-1] + files = list(gc.listItem(girder_folder_id)) + item_dict = dict() + + for file in files: + d = {file['name']: file['_id']} + item_dict.update(d) - annotatedXMLs=glob.glob(os.path.join(args.target, "*.xml")) - for xml in tqdm(annotatedXMLs): + slide_item_id = item_dict[file_name] + + #output_dir = args.base_dir + '/tmp' + slide_name,slideExt=file_name.split('.') + xlsx_path = slide_name + '.xlsx' + + annotatedXMLs=[args.xml_path] + for xml in annotatedXMLs: # print(xml,end='\r',flush=True) - print(xml) + print(xml,'here') write_minmax_to_xml(xml) - for ext in args.wsi_ext.split(','): + # for ext in args.wsi_ext.split(','): - if os.path.isfile(os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext))): - wsi=os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext)) - break - slideID,slideExt=os.path.splitext(wsi.split('/')[-1]) + # if os.path.isfile(os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext))): + # wsi=os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext)) + # break + slideExt=file_name.split('.')[-1] all_contours = {'1':[],'2':[],'3':[],'4':[],'5':[],'6':[]} # cortex medulla glomeruli scl_glomeruli tubules arteries(ioles) tree = ET.parse(xml) @@ -75,14 +79,14 @@ def getKidneyReferenceFeatures(args): cortexarea=0 medullaarea=0 - slide=openslide.OpenSlide(wsi) + slide=TiffSlide(args.files) if slideExt =='.scn': - dim_x=int(slide.properties['openslide.bounds-width'])## add to columns - dim_y=int(slide.properties['openslide.bounds-height'])## add to rows - offsetx=int(slide.properties['openslide.bounds-x'])##start column - offsety=int(slide.properties['openslide.bounds-y'])##start row + dim_x=int(slide.properties['tiffslide.bounds-width'])## add to columns + dim_y=int(slide.properties['tiffslide.bounds-height'])## add to rows + offsetx=int(slide.properties['tiffslide.bounds-x'])##start column + offsety=int(slide.properties['tiffslide.bounds-y'])##start row - elif slideExt in ['.ndpi','.svs']: + elif slideExt in ['.ndpi','.svs','.tiff']: dim_x, dim_y=slide.dimensions offsetx=0 offsety=0 @@ -112,15 +116,7 @@ def getKidneyReferenceFeatures(args): binary=binary_fill_holes(binary) total_tissue_area=np.sum(binary)*resRatio*resRatio - # imsave(wsi.replace(slideExt,'.jpeg'),thumbIm) - # with warnings.catch_warnings(): - # warnings.simplefilter("ignore") - # imsave(wsi.replace(slideExt,'.jpeg'),thumbIm) - # imsave(wsi.replace(slideExt,'_b.jpeg'),binary.astype('uint8')*255) - # - # continue - - + for contour in all_contours['1']: cortexcontour.extend(contour) cortexarea+=cv2.contourArea(contour) @@ -132,17 +128,6 @@ def getKidneyReferenceFeatures(args): cortex_path=path.Path(cortexcontour,codes=cortexcodes) else: cortex_path=None - # cortexcontour=np.array(cortexcontour) - # pathxmin=np.min(cortexcontour[:,0]) - # pathxmax=np.max(cortexcontour[:,0]) - # pathymin=np.min(cortexcontour[:,1]) - # pathymax=np.max(cortexcontour[:,1]) - # fig, ax = plt.subplots() - # patch = patches.PathPatch(cortex_path, facecolor='orange', lw=2) - # ax.add_patch(patch) - # ax.set_xlim(pathxmin, pathxmax) - # ax.set_ylim(pathymin, pathymax) - # plt.show() for contour in all_contours['2']: medullacontour.extend(contour) @@ -157,7 +142,7 @@ def getKidneyReferenceFeatures(args): medulla_path=None pseudocortexarea=total_tissue_area-medullaarea # - workbook=xlsxwriter.Workbook(xml.replace('.xml','.xlsx')) + workbook=xlsxwriter.Workbook(xlsx_path) worksheet1 = workbook.add_worksheet('Summary') worksheet2 = workbook.add_worksheet('Interstitium') worksheet3 = workbook.add_worksheet('Glomeruli') @@ -188,7 +173,7 @@ def getKidneyReferenceFeatures(args): args,args.min_size[4],cortex_path,medulla_path) for points in tqdm(all_contours['5'],colour='blue',unit='Tubule',leave=False)) art_features=Parallel(n_jobs=cores)(delayed(points_to_features_art)(points, - args,args.min_size[5],cortex_path,medulla_path,wsi,MOD) for points in tqdm(all_contours['6'],colour='magenta',unit='Artery(-iole)',leave=False)) + args,args.min_size[5],cortex_path,medulla_path,args.files,MOD) for points in tqdm(all_contours['6'],colour='magenta',unit='Artery(-iole)',leave=False)) print('Generating output file..') glom_features=[i for i in glom_features if i is not None] sglom_features=[i for i in sglom_features if i is not None] @@ -368,401 +353,408 @@ def getKidneyReferenceFeatures(args): worksheet6.write(idx+1,0,art[0]) worksheet6.write(idx+1,1,art[1]) worksheet6.write(idx+1,2,art[4]) - workbook.close() - print('Done.') - # exit() - # Parallel(n_jobs=num_cores, backend='threading')(delayed(chop_wsi)(, choppable_regions=choppable_regions) for idxx, j in enumerate(index_x)) -def summarizeKidneyReferenceFeatures(args): - assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml' - assert args.SummaryOption is not None, 'You must specify what type of summary is required with --SummaryOption' - assert args.patientData is not None, 'You must provide patient metadata xlsx file with --patientData' - if args.SummaryOption in ['ULDensity']: - ULDensity(args) - elif args.SummaryOption in ['BLDensity']: - BLDensity(args) - elif args.SummaryOption in ['standardScatter']: - standardScatter(args) - elif args.SummaryOption in ['anchorScatter']: - anchorScatter(args) - elif args.SummaryOption in ['aggregate']: - aggregate(args) - elif args.SummaryOption in ['JoyPlot']: - JoyPlot(args) - else: - print('Incorrect SummaryOption') - -def anchorScatter(args): - patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn,args.anchor],index_col=None) - patientMetrics={} - for idx,patientID in enumerate(patientData[args.IDColumn]): - patientMetrics[patientID]=[patientData[args.labelColumns][idx],patientData[args.anchor][idx]] - - datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) - clinical_legend=[] - clinical_anchor=[] - for datafile in datafiles: - patientID=os.path.splitext(datafile.split('/')[-1])[0] - clinical_legend.append(str(patientMetrics[patientID][0])) - clinical_anchor.append(patientMetrics[patientID][1]) - - sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] - sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] - sortedPatientAnchor=[x for x,_ in sorted(zip(clinical_anchor,datafiles))] - f1,f2=[int(i) for i in args.scatterFeatures.split(',')] - # f1=5 - # f2=7 - - temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0) - index = temp.index - xlabel=index[f1-1] - ylabel=args.anchor - - popIdx=[] - scatter_features=[] - for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')): - features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0)) - # print(np.array(features)) - if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]): - scatter_features.append([features[f1-1][0],features[f2-1][0]]) - else: - popIdx.append(idx) - for p in popIdx[::-1]: - sortedPatientLegend.pop(p) - sortedPatientAnchor.pop(p) - scatter_features=np.array(scatter_features) - - sns.scatterplot(x=scatter_features[:,0],y=sortedPatientAnchor,hue=sortedPatientLegend,palette='viridis',legend='auto') - plt.xlabel(xlabel) - plt.ylabel(ylabel) - plt.legend(title=args.labelColumns) - plt.show() -def standardScatter(args): - patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) - patientMetrics={} - assert args.labelColumns in ['Age','Cr','Sex'], 'Label column must be Age, Cr, or Sex for standard scatter' - if args.labelColumns=='Age': - labelBins=[0,10,20,30,40,50,60,70,80,90,100] - elif args.labelColumns=='Cr': - labelBins=[0,.5,.6,.7,.8,.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2] - elif args.labelColumns=='Sex': - labelBins=None - - for idx,patientID in enumerate(patientData[args.IDColumn]): - patientMetrics[patientID]=patientData[args.labelColumns][idx] - - datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) - clinical_legend=[] - for datafile in datafiles: - patientID=os.path.splitext(datafile.split('/')[-1])[0] - clinical_legend.append(str(patientMetrics[patientID])) - - sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] - sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] - print(sortedPatientLegend) - f1,f2=[int(i) for i in args.scatterFeatures.split(',')] - # f1=5 - # f2=7 - - temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0) - index = temp.index - xlabel=index[f1-1] - ylabel=index[f2-1] - - popIdx=[] - scatter_features=[] - for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')): - features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0)) - # print(np.array(features)) - if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]): - scatter_features.append([features[f1-1][0],features[f2-1][0]]) - else: - popIdx.append(idx) - for p in popIdx[::-1]: - sortedPatientLegend.pop(p) - sortedPatientLegend=np.array(sortedPatientLegend) - - scatter_features=np.array(scatter_features) - - sns.scatterplot(x=scatter_features[:,0],y=scatter_features[:,1],hue=sortedPatientLegend,palette='viridis') - plt.xlabel(xlabel) - plt.ylabel(ylabel) - plt.legend(sortedPatientLegend,title=args.labelColumns) - plt.show() - -def BLDensity(args): - patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None) - patientMetrics={} - for idx,patientID in enumerate(patientData[args.IDColumn]): - patientMetrics[patientID]=patientData[args.labelColumns][idx] - - datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) - clinical_legend=[] - for datafile in datafiles: - patientID=os.path.splitext(datafile.split('/')[-1])[0] - clinical_legend.append(str(patientMetrics[patientID])) - - sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] - sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] - - fig,ax=plt.subplots() - plt.xlabel('Cortical tubular diameter (µm)') - plasma = cm.get_cmap('plasma',len(sortedPatientOrder)) - popIdx=[] - for idx,datafile in enumerate(tqdm(sortedPatientOrder)): - tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii','Cortical areas'],index_col=None).dropna() - if len(tubule_features['Cortical radii'])>0: - # xl1=np.percentile(tubule_features['Cortical radii']*0.25,0.01) - xl2=np.percentile(tubule_features['Cortical radii']*0.25,99) - # yl1=np.percentile(tubule_features['Cortical areas']*0.25*0.25,0.01) - yl2=np.percentile(tubule_features['Cortical areas']*0.25*0.25,99) - # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill) - sns.kdeplot(x=tubule_features['Cortical radii']*0.25,y=tubule_features['Cortical areas']*0.25*0.25, - color=plasma(idx),ax=ax,clip=[[None,xl2],[None,yl2]]) - else: - popIdx.append(idx) - for p in popIdx[::-1]: - sortedPatientLegend.pop(p) - plt.legend(sortedPatientLegend,title=args.labelColumns) - plt.show() - -def ULDensity(args): - - if args.labelModality=='Continuous': - patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) - patientMetrics={} - # print(patientData) - for idx,patientID in enumerate(patientData[args.IDColumn]): - patientMetrics[patientID]=patientData[args.labelColumns][idx] - - datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) - clinical_legend=[] - for datafile in datafiles: - patientID=os.path.splitext(datafile.split('/')[-1])[0] - clinical_legend.append(str(patientMetrics[patientID])) - - sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] - sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] - - fig,ax=plt.subplots() - plt.xlabel('Cortical tubular diameter (µm)') - plasma = cm.get_cmap('plasma',len(sortedPatientOrder)) - popIdx=[] - all_tubules=[] - featurename='Cortical radii' - sheetname='Tubules' - for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')): - tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna() - if len(tubule_features[featurename])>0: - l1=np.percentile(tubule_features[featurename]*0.25,0.05) - l2=np.percentile(tubule_features[featurename]*0.25,99.5) - sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1) - # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1) - all_tubules.extend(np.array(tubule_features[featurename]*0.25)) - else: - popIdx.append(idx) - for p in popIdx[::-1]: - sortedPatientLegend.pop(p) - - plt.legend(sortedPatientLegend,title=args.labelColumns) - plt.title('Cumulative distributions per patient') - # plt.colorbar() - plt.show() - # print(len(all_tubules)) - # fig,ax=plt.subplots() - # plt.xlabel('Cortical tubular diameter (µm)') - # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill) - # plt.title('Cumulative distribution for dataset') - # plt.show() - elif args.labelModality=='Categorical': - patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) - patientMetrics={} - for idx,patientID in enumerate(patientData[args.IDColumn]): - patientMetrics[patientID]=patientData[args.labelColumns][idx] - - datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) - clinical_legend=[] - for datafile in datafiles: - patientID=os.path.splitext(datafile.split('/')[-1])[0] - - clinical_legend.append(str(patientMetrics[patientID])) - - fig,ax=plt.subplots() - plt.xlabel('Cortical tubular diameter (µm)') - plasma = cm.get_cmap('plasma',len(np.unique(clinical_legend))+1) - labelMapper={} - categories=np.unique(clinical_legend) - for idx,l in enumerate(categories): - labelMapper[l]=idx - popIdx=[] - all_tubules=[] - featurename='Radius' - sheetname='Glomeruli' - for idx,datafile in enumerate(tqdm(datafiles,colour='red')): - tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna() - if len(tubule_features[featurename])>0: - l1=np.percentile(tubule_features[featurename]*0.25,0.01) - l2=np.percentile(tubule_features[featurename]*0.25,99.9) - - pcolor=plasma(labelMapper[clinical_legend[idx]]) - # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5) - sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5) - - all_tubules.extend(np.array(tubule_features[featurename]*0.25)) - else: - popIdx.append(idx) - for p in popIdx[::-1]: - clinical_legend.pop(p) - plt.legend(clinical_legend,title=args.labelColumns) - plt.title('Cumulative distributions per patient') - plt.show() - # fig,ax=plt.subplots() - # plt.xlabel('Cortical tubular diameter (µm)') - # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill) - # plt.title('Cumulative distribution for dataset') - # plt.show() - - -def JoyPlot(args): + try: + workbook.close(save_to=xlsx_path) + except: + print("An exception occurred") - url = "https://gist.githubusercontent.com/borgar/31c1e476b8e92a11d7e9/raw/0fae97dab6830ecee185a63c1cee0008f6778ff6/pulsar.csv" - df = pd.read_csv(url, header=None) - df = df.stack().reset_index() - df.columns = ['idx', 'x', 'y'] - - patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) - patientMetrics={} - for idx,patientID in enumerate(patientData[args.IDColumn]): - patientMetrics[patientID]=patientData[args.labelColumns][idx] - datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) - clinical_legend=[] - for datafile in datafiles: - patientID=os.path.splitext(datafile.split('/')[-1])[0] - clinical_legend.append(str(patientMetrics[patientID])) - - sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] - sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] - - popIdx=[] - all_tubules=[] - # outDF=pd.DataFrame() - - for idx,datafile in enumerate(tqdm(sortedPatientOrder[:15],colour='red')): - - tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii'],index_col=None).dropna() - if len(tubule_features['Cortical radii'])>0: - #.values() - feature_array=np.array(tubule_features['Cortical radii']*0.25) - all_tubules.append(feature_array) + gc.uploadFileToItem(slide_item_id, xlsx_path, reference=None, mimeType=None, filename=None, progressCallback=None) + print('Done.') + # exit() + # Parallel(n_jobs=num_cores, backend='threading')(delayed(chop_wsi)(, choppable_regions=choppable_regions) for idxx, j in enumerate(index_x)) - l1=np.percentile(feature_array,0.05) - l2=np.percentile(feature_array,99.5) - # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,cbar=True) - # all_tubules.extend(np.array(tubule_features['Cortical radii']*0.25)) - else: - popIdx.append(idx) - for p in popIdx[::-1]: - sortedPatientLegend.pop(p) - outDF=pd.DataFrame(all_tubules) - - outDF=outDF.stack(dropna=False).reset_index().dropna() - - # outDFT=outDF.transpose() - # input(outDF) - # input(outDF.reset_index()) - # input(outDF.reset_index(drop=True)[::2].reset_index(drop=True)) - # input(outDF.reset_index(drop=True)) - # input(outDF.stack()) - # input(outDF.reset_index().stack()) - # input(outDF.reset_index(drop=True).stack()) - # outDF=outDF.reset_index(drop=True)[::2].reset_index(drop=True) - # outDF = outDF.stack().reset_index() - outDF.columns = ['idx', 'x', 'y'] - outDF.to_csv('test.csv') - # print(outDF.dropna()) - # exit() - - - - - sns.set_theme(rc={"axes.facecolor": (0, 0, 0,0), 'figure.facecolor':'#ffffff', 'axes.grid':False}) - g = sns.FacetGrid(outDF, row='idx',hue="idx", aspect=15) - g.map(sns.kdeplot, "x", - bw_adjust=.9, clip_on=False, - fill=True, alpha=1, linewidth=1.5) - g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.9) - g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False) - def label(x, color, label): - ax = plt.gca() - ax.text(0, .2, label, fontweight="bold", color=color, - ha="left", va="center", transform=ax.transAxes) - g.map(label, "x") - - # Draw the densities in a few steps - # g.map(sns.lineplot, 'x', 'y', clip_on=False, alpha=1, linewidth=1.5) - # g.map(plt.fill_between, 'x', 'y', color='#000000') - # g.map(sns.lineplot, 'x', 'y', clip_on=False, color='#ffffff', lw=2) - # Set the subplots to overlap - g.fig.subplots_adjust(hspace=-0.95) - g.set_titles("") - g.set(yticks=[], xticks=[], ylabel="", xlabel="") - g.despine(bottom=True, left=True) - plt.show() - # plt.savefig('joy.png', facecolor='#000000') - -def aggregate(args): - assert args.exceloutfile is not None, 'You must provide a name of xlsx output file for feature aggregation with --exceloutfile name.xlsx' - usecols=args.labelColumns.split(',') - # index_col=int(args.IDColumn) - patientData=pd.read_excel(args.patientData,usecols=usecols.extend(args.IDColumn),index_col=args.IDColumn) - patientData = patientData.loc[:, ~patientData.columns.str.contains('^Unnamed')] - patientMetrics={} - - pd.set_option('display.max_rows', 100) - patientData.index = patientData.index.map(str) - - datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) - # numGloms=0 - # numSGloms=0 - # numTubules=0 - # numArts=0 - full_data={} - for idx,datafile in enumerate(tqdm(datafiles,colour='red')): - - xlsxid=datafile.split('/')[-1].split('.xlsx')[0] - tubule_features=pd.read_excel(datafile,sheet_name='Summary',header=None,index_col=None).transpose() - names=np.array(tubule_features.iloc[0]) - vals=np.array(tubule_features.iloc[1]) - # numGloms+=vals[0] - # if not np.isnan(vals[5]): - # numSGloms+=vals[5] - # numTubules+=vals[10] - # numArts+=vals[19] - - full_data[xlsxid]=np.append(vals,np.array(patientData.loc[xlsxid]),0) - # print(numGloms) - # print(numSGloms) - # print(numTubules) - # print(numArts) - # exit() - workbook=xlsxwriter.Workbook(args.exceloutfile,{'nan_inf_to_errors': True}) - worksheet1 = workbook.add_worksheet('Aggregation') - - outcolnames=np.append(names,patientData.columns,0) - outrownames=full_data.keys() - for idx,outcol in enumerate(outcolnames): - worksheet1.write(0,idx+1,outcol) - - rowcounter=1 - for key,vals in full_data.items(): - worksheet1.write(rowcounter,0,key) - for idx,val in enumerate(vals): - worksheet1.write(rowcounter,idx+1,val) - rowcounter+=1 - - workbook.close() +# def summarizeKidneyReferenceFeatures(args): +# #assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml' +# assert args.SummaryOption is not None, 'You must specify what type of summary is required with --SummaryOption' +# assert args.patientData is not None, 'You must provide patient metadata xlsx file with --patientData' +# if args.SummaryOption in ['ULDensity']: +# ULDensity(args) +# elif args.SummaryOption in ['BLDensity']: +# BLDensity(args) +# elif args.SummaryOption in ['standardScatter']: +# standardScatter(args) +# elif args.SummaryOption in ['anchorScatter']: +# anchorScatter(args) +# elif args.SummaryOption in ['aggregate']: +# aggregate(args) +# elif args.SummaryOption in ['JoyPlot']: +# JoyPlot(args) +# else: +# print('Incorrect SummaryOption') + +# def anchorScatter(args): +# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn,args.anchor],index_col=None) +# patientMetrics={} +# for idx,patientID in enumerate(patientData[args.IDColumn]): +# patientMetrics[patientID]=[patientData[args.labelColumns][idx],patientData[args.anchor][idx]] + +# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) +# clinical_legend=[] +# clinical_anchor=[] +# for datafile in datafiles: +# patientID=os.path.splitext(datafile.split('/')[-1])[0] +# clinical_legend.append(str(patientMetrics[patientID][0])) +# clinical_anchor.append(patientMetrics[patientID][1]) + +# sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] +# sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] +# sortedPatientAnchor=[x for x,_ in sorted(zip(clinical_anchor,datafiles))] +# f1,f2=[int(i) for i in args.scatterFeatures.split(',')] +# # f1=5 +# # f2=7 + +# temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0) +# index = temp.index +# xlabel=index[f1-1] +# ylabel=args.anchor + +# popIdx=[] +# scatter_features=[] +# for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')): +# features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0)) +# # print(np.array(features)) +# if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]): +# scatter_features.append([features[f1-1][0],features[f2-1][0]]) +# else: +# popIdx.append(idx) +# for p in popIdx[::-1]: +# sortedPatientLegend.pop(p) +# sortedPatientAnchor.pop(p) +# scatter_features=np.array(scatter_features) + +# sns.scatterplot(x=scatter_features[:,0],y=sortedPatientAnchor,hue=sortedPatientLegend,palette='viridis',legend='auto') +# plt.xlabel(xlabel) +# plt.ylabel(ylabel) +# plt.legend(title=args.labelColumns) +# plt.show() +# def standardScatter(args): +# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) +# patientMetrics={} +# assert args.labelColumns in ['Age','Cr','Sex'], 'Label column must be Age, Cr, or Sex for standard scatter' +# if args.labelColumns=='Age': +# labelBins=[0,10,20,30,40,50,60,70,80,90,100] +# elif args.labelColumns=='Cr': +# labelBins=[0,.5,.6,.7,.8,.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2] +# elif args.labelColumns=='Sex': +# labelBins=None + +# for idx,patientID in enumerate(patientData[args.IDColumn]): +# patientMetrics[patientID]=patientData[args.labelColumns][idx] + +# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) +# clinical_legend=[] +# for datafile in datafiles: +# patientID=os.path.splitext(datafile.split('/')[-1])[0] +# clinical_legend.append(str(patientMetrics[patientID])) + +# sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] +# sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] +# print(sortedPatientLegend) +# f1,f2=[int(i) for i in args.scatterFeatures.split(',')] +# # f1=5 +# # f2=7 + +# temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0) +# index = temp.index +# xlabel=index[f1-1] +# ylabel=index[f2-1] + +# popIdx=[] +# scatter_features=[] +# for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')): +# features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0)) +# # print(np.array(features)) +# if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]): +# scatter_features.append([features[f1-1][0],features[f2-1][0]]) +# else: +# popIdx.append(idx) +# for p in popIdx[::-1]: +# sortedPatientLegend.pop(p) +# sortedPatientLegend=np.array(sortedPatientLegend) + +# scatter_features=np.array(scatter_features) + +# sns.scatterplot(x=scatter_features[:,0],y=scatter_features[:,1],hue=sortedPatientLegend,palette='viridis') +# plt.xlabel(xlabel) +# plt.ylabel(ylabel) +# plt.legend(sortedPatientLegend,title=args.labelColumns) +# plt.show() + +# def BLDensity(args): +# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None) +# patientMetrics={} +# for idx,patientID in enumerate(patientData[args.IDColumn]): +# patientMetrics[patientID]=patientData[args.labelColumns][idx] + +# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) +# clinical_legend=[] +# for datafile in datafiles: +# patientID=os.path.splitext(datafile.split('/')[-1])[0] +# clinical_legend.append(str(patientMetrics[patientID])) + +# sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] +# sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] + +# fig,ax=plt.subplots() +# plt.xlabel('Cortical tubular diameter (µm)') +# plasma = cm.get_cmap('plasma',len(sortedPatientOrder)) +# popIdx=[] +# for idx,datafile in enumerate(tqdm(sortedPatientOrder)): +# tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii','Cortical areas'],index_col=None).dropna() +# if len(tubule_features['Cortical radii'])>0: +# # xl1=np.percentile(tubule_features['Cortical radii']*0.25,0.01) +# xl2=np.percentile(tubule_features['Cortical radii']*0.25,99) +# # yl1=np.percentile(tubule_features['Cortical areas']*0.25*0.25,0.01) +# yl2=np.percentile(tubule_features['Cortical areas']*0.25*0.25,99) +# # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill) +# sns.kdeplot(x=tubule_features['Cortical radii']*0.25,y=tubule_features['Cortical areas']*0.25*0.25, +# color=plasma(idx),ax=ax,clip=[[None,xl2],[None,yl2]]) +# else: +# popIdx.append(idx) +# for p in popIdx[::-1]: +# sortedPatientLegend.pop(p) +# plt.legend(sortedPatientLegend,title=args.labelColumns) +# plt.show() + +# def ULDensity(args): + +# if args.labelModality=='Continuous': +# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) +# patientMetrics={} +# # print(patientData) +# for idx,patientID in enumerate(patientData[args.IDColumn]): +# patientMetrics[patientID]=patientData[args.labelColumns][idx] + +# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) +# clinical_legend=[] +# for datafile in datafiles: +# patientID=os.path.splitext(datafile.split('/')[-1])[0] +# clinical_legend.append(str(patientMetrics[patientID])) + +# sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] +# sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] + +# fig,ax=plt.subplots() +# plt.xlabel('Cortical tubular diameter (µm)') +# plasma = cm.get_cmap('plasma',len(sortedPatientOrder)) +# popIdx=[] +# all_tubules=[] +# featurename='Cortical radii' +# sheetname='Tubules' +# for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')): +# tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna() +# if len(tubule_features[featurename])>0: +# l1=np.percentile(tubule_features[featurename]*0.25,0.05) +# l2=np.percentile(tubule_features[featurename]*0.25,99.5) +# sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1) +# # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1) +# all_tubules.extend(np.array(tubule_features[featurename]*0.25)) +# else: +# popIdx.append(idx) +# for p in popIdx[::-1]: +# sortedPatientLegend.pop(p) + +# plt.legend(sortedPatientLegend,title=args.labelColumns) +# plt.title('Cumulative distributions per patient') +# # plt.colorbar() +# plt.show() +# # print(len(all_tubules)) +# # fig,ax=plt.subplots() +# # plt.xlabel('Cortical tubular diameter (µm)') +# # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill) +# # plt.title('Cumulative distribution for dataset') +# # plt.show() +# elif args.labelModality=='Categorical': +# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) +# patientMetrics={} +# for idx,patientID in enumerate(patientData[args.IDColumn]): +# patientMetrics[patientID]=patientData[args.labelColumns][idx] + +# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) +# clinical_legend=[] +# for datafile in datafiles: +# patientID=os.path.splitext(datafile.split('/')[-1])[0] + +# clinical_legend.append(str(patientMetrics[patientID])) + +# fig,ax=plt.subplots() +# plt.xlabel('Cortical tubular diameter (µm)') +# plasma = cm.get_cmap('plasma',len(np.unique(clinical_legend))+1) +# labelMapper={} +# categories=np.unique(clinical_legend) +# for idx,l in enumerate(categories): +# labelMapper[l]=idx +# popIdx=[] +# all_tubules=[] +# featurename='Radius' +# sheetname='Glomeruli' +# for idx,datafile in enumerate(tqdm(datafiles,colour='red')): +# tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna() +# if len(tubule_features[featurename])>0: +# l1=np.percentile(tubule_features[featurename]*0.25,0.01) +# l2=np.percentile(tubule_features[featurename]*0.25,99.9) + +# pcolor=plasma(labelMapper[clinical_legend[idx]]) +# # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5) +# sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5) + +# all_tubules.extend(np.array(tubule_features[featurename]*0.25)) +# else: +# popIdx.append(idx) +# for p in popIdx[::-1]: +# clinical_legend.pop(p) +# plt.legend(clinical_legend,title=args.labelColumns) +# plt.title('Cumulative distributions per patient') +# plt.show() +# # fig,ax=plt.subplots() +# # plt.xlabel('Cortical tubular diameter (µm)') +# # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill) +# # plt.title('Cumulative distribution for dataset') +# # plt.show() + + +# def JoyPlot(args): + +# url = "https://gist.githubusercontent.com/borgar/31c1e476b8e92a11d7e9/raw/0fae97dab6830ecee185a63c1cee0008f6778ff6/pulsar.csv" +# df = pd.read_csv(url, header=None) +# df = df.stack().reset_index() +# df.columns = ['idx', 'x', 'y'] + +# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) +# patientMetrics={} +# for idx,patientID in enumerate(patientData[args.IDColumn]): +# patientMetrics[patientID]=patientData[args.labelColumns][idx] +# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) +# clinical_legend=[] +# for datafile in datafiles: +# patientID=os.path.splitext(datafile.split('/')[-1])[0] +# clinical_legend.append(str(patientMetrics[patientID])) + +# sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] +# sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] + +# popIdx=[] +# all_tubules=[] +# # outDF=pd.DataFrame() + +# for idx,datafile in enumerate(tqdm(sortedPatientOrder[:15],colour='red')): + +# tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii'],index_col=None).dropna() +# if len(tubule_features['Cortical radii'])>0: +# #.values() +# feature_array=np.array(tubule_features['Cortical radii']*0.25) +# all_tubules.append(feature_array) + +# l1=np.percentile(feature_array,0.05) +# l2=np.percentile(feature_array,99.5) +# # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,cbar=True) +# # all_tubules.extend(np.array(tubule_features['Cortical radii']*0.25)) + +# else: +# popIdx.append(idx) +# for p in popIdx[::-1]: +# sortedPatientLegend.pop(p) +# outDF=pd.DataFrame(all_tubules) + +# outDF=outDF.stack(dropna=False).reset_index().dropna() + +# # outDFT=outDF.transpose() +# # input(outDF) +# # input(outDF.reset_index()) +# # input(outDF.reset_index(drop=True)[::2].reset_index(drop=True)) +# # input(outDF.reset_index(drop=True)) +# # input(outDF.stack()) +# # input(outDF.reset_index().stack()) +# # input(outDF.reset_index(drop=True).stack()) +# # outDF=outDF.reset_index(drop=True)[::2].reset_index(drop=True) +# # outDF = outDF.stack().reset_index() +# outDF.columns = ['idx', 'x', 'y'] +# outDF.to_csv('test.csv') +# # print(outDF.dropna()) +# # exit() + + + + +# sns.set_theme(rc={"axes.facecolor": (0, 0, 0,0), 'figure.facecolor':'#ffffff', 'axes.grid':False}) +# g = sns.FacetGrid(outDF, row='idx',hue="idx", aspect=15) +# g.map(sns.kdeplot, "x", +# bw_adjust=.9, clip_on=False, +# fill=True, alpha=1, linewidth=1.5) +# g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.9) +# g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False) +# def label(x, color, label): +# ax = plt.gca() +# ax.text(0, .2, label, fontweight="bold", color=color, +# ha="left", va="center", transform=ax.transAxes) +# g.map(label, "x") + +# # Draw the densities in a few steps +# # g.map(sns.lineplot, 'x', 'y', clip_on=False, alpha=1, linewidth=1.5) +# # g.map(plt.fill_between, 'x', 'y', color='#000000') +# # g.map(sns.lineplot, 'x', 'y', clip_on=False, color='#ffffff', lw=2) +# # Set the subplots to overlap +# g.fig.subplots_adjust(hspace=-0.95) +# g.set_titles("") +# g.set(yticks=[], xticks=[], ylabel="", xlabel="") +# g.despine(bottom=True, left=True) +# plt.show() +# # plt.savefig('joy.png', facecolor='#000000') + +# def aggregate(args): +# assert args.exceloutfile is not None, 'You must provide a name of xlsx output file for feature aggregation with --exceloutfile name.xlsx' +# usecols=args.labelColumns.split(',') +# # index_col=int(args.IDColumn) +# patientData=pd.read_excel(args.patientData,usecols=usecols.extend(args.IDColumn),index_col=args.IDColumn) +# patientData = patientData.loc[:, ~patientData.columns.str.contains('^Unnamed')] +# patientMetrics={} + +# pd.set_option('display.max_rows', 100) +# patientData.index = patientData.index.map(str) + +# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) +# # numGloms=0 +# # numSGloms=0 +# # numTubules=0 +# # numArts=0 +# full_data={} +# for idx,datafile in enumerate(tqdm(datafiles,colour='red')): + +# xlsxid=datafile.split('/')[-1].split('.xlsx')[0] +# tubule_features=pd.read_excel(datafile,sheet_name='Summary',header=None,index_col=None).transpose() +# names=np.array(tubule_features.iloc[0]) +# vals=np.array(tubule_features.iloc[1]) +# # numGloms+=vals[0] +# # if not np.isnan(vals[5]): +# # numSGloms+=vals[5] +# # numTubules+=vals[10] +# # numArts+=vals[19] + +# full_data[xlsxid]=np.append(vals,np.array(patientData.loc[xlsxid]),0) +# # print(numGloms) +# # print(numSGloms) +# # print(numTubules) +# # print(numArts) +# # exit() +# workbook=xlsxwriter.Workbook(args.exceloutfile,{'nan_inf_to_errors': True}) +# worksheet1 = workbook.add_worksheet('Aggregation') + +# outcolnames=np.append(names,patientData.columns,0) +# outrownames=full_data.keys() +# for idx,outcol in enumerate(outcolnames): +# worksheet1.write(0,idx+1,outcol) + +# rowcounter=1 +# for key,vals in full_data.items(): +# worksheet1.write(rowcounter,0,key) +# for idx,val in enumerate(vals): +# worksheet1.write(rowcounter,idx+1,val) +# rowcounter+=1 + +# workbook.close() def points_to_features_glom(points,args,min_size,cortex,medulla): a=cv2.contourArea(points) if a>min_size: @@ -818,7 +810,7 @@ def points_to_features_art(points,args,min_size,cortex,medulla,wsi,MOD): else: containedmedulla=False xMin,xMax,yMin,yMax=[np.min(points[:,0]),np.max(points[:,0]),np.min(points[:,1]),np.max(points[:,1])] - image=np.array(openslide.OpenSlide(wsi).read_region((xMin,yMin),0,(xMax-xMin,yMax-yMin)))[:,:,:3] + image=np.array(TiffSlide(wsi).read_region((xMin,yMin),0,(xMax-xMin,yMax-yMin)))[:,:,:3] if xMax-xMin>5000 or yMax-yMin>5000: return [a,None,None,containedmedulla,None] @@ -879,7 +871,7 @@ def points_to_features_art(points,args,min_size,cortex,medulla,wsi,MOD): -# full_list.append(tubule_features) -# full_list= pd.concat(full_list) -# print(full_list) -# exit() +# # full_list.append(tubule_features) +# # full_list= pd.concat(full_list) +# # print(full_list) +# # exit() diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py index 50e32c1..721b628 100644 --- a/multic/segmentationschool/segmentation_school.py +++ b/multic/segmentationschool/segmentation_school.py @@ -47,7 +47,7 @@ def str2bool(v): def main(args): from segmentationschool.Codes.InitializeFolderStructure import initFolder, purge_training_set, prune_training_set - # from extract_reference_features import getKidneyReferenceFeatures,summarizeKidneyReferenceFeatures + from segmentationschool.Codes.extract_reference_features import getKidneyReferenceFeatures # from TransformXMLs import splice_cortex_XMLs,register_aperio_scn_xmls # from randomCropGenerator import randomCropGenerator if args.one_network == True: @@ -280,6 +280,9 @@ def savetime(args, starttime): help='padded region for low resolution region extraction') parser.add_argument('--show_interstitium', dest='show_interstitium', default=True ,type=str2bool, help='padded region for low resolution region extraction') + + parser.add_argument('--xml_path', dest='xml_path', default=' ' ,type=str, + help='path to xml file') diff --git a/multic/segmentationschool/utils/mask_to_xml.py b/multic/segmentationschool/utils/mask_to_xml.py new file mode 100644 index 0000000..34217e5 --- /dev/null +++ b/multic/segmentationschool/utils/mask_to_xml.py @@ -0,0 +1,135 @@ +import cv2 +import numpy as np +import lxml.etree as ET + +""" +xml_path (string) - the filename of the saved xml +mask (array) - the mask to convert to xml - uint8 array +downsample (int) - amount of downsampling done to the mask + points are upsampled - this can be used to simplify the mask +min_size_thresh (int) - the minimum objectr size allowed in the mask. This is referenced from downsample=1 +xml_color (list) - list of binary color values to be used for classes + +""" + +def mask_to_xml(xml_path, mask, downsample=1, min_size_thresh=0, simplify_contours=0, xml_color=[65280, 65535, 33023, 255, 16711680], verbose=0, return_root=False, maxClass=None, offset={'X': 0,'Y': 0}): + + min_size_thresh /= downsample + + # create xml tree + Annotations = xml_create() + + # get all classes + classes = np.unique(mask) + if maxClass is None: + maxClass = max(classes) + + # add annotation classes to tree + for class_ in range(maxClass+1)[1:]: + if verbose: + print('Creating class: [{}]'.format(class_)) + Annotations = xml_add_annotation(Annotations=Annotations, xml_color=xml_color, annotationID=class_) + + # add contour points to tree classwise + for class_ in classes: # iterate through all classes + + if class_ == 0 or class_ > maxClass: + continue + + if verbose: + print('Working on class [{} of {}]'.format(class_, max(classes))) + + # binarize the mask w.r.t. class_ + binaryMask = mask==class_ + + # get contour points of the mask + pointsList = get_contour_points(binaryMask, downsample=downsample, min_size_thresh=min_size_thresh, simplify_contours=simplify_contours, offset=offset) + for i in range(np.shape(pointsList)[0]): + pointList = pointsList[i] + Annotations = xml_add_region(Annotations=Annotations, pointList=pointList, annotationID=class_) + + if return_root: + # return root, do not save xml file + return Annotations + + # save the final xml file + xml_save(Annotations=Annotations, filename='{}.xml'.format(xml_path.split('.')[0])) + + +def get_contour_points(mask, downsample, min_size_thresh=0, simplify_contours=0, offset={'X': 0,'Y': 0}): + # returns a dict pointList with point 'X' and 'Y' values + # input greyscale binary image + #_, maskPoints, contours = cv2.findContours(np.array(mask), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS) + maskPoints, contours = cv2.findContours(np.uint8(mask), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_L1) + maskPoints = list(maskPoints) + # remove small regions + too_small = [] + for idx, cnt in enumerate(maskPoints): + area = cv2.contourArea(cnt) + if area < min_size_thresh: + too_small.append(idx) + if too_small != []: + too_small.reverse() + for idx in too_small: + maskPoints.pop(idx) + + if simplify_contours > 0: + for idx, cnt in enumerate(maskPoints): + epsilon = simplify_contours*cv2.arcLength(cnt,True) + approx = cv2.approxPolyDP(cnt,epsilon,True) + maskPoints[idx] = approx + + pointsList = [] + for j in range(np.shape(maskPoints)[0]): + pointList = [] + for i in range(0,np.shape(maskPoints[j])[0]): + point = {'X': (maskPoints[j][i][0][0] * downsample) + offset['X'], 'Y': (maskPoints[j][i][0][1] * downsample) + offset['Y']} + pointList.append(point) + pointsList.append(pointList) + return pointsList + +### functions for building an xml tree of annotations ### +def xml_create(): # create new xml tree + # create new xml Tree - Annotations + Annotations = ET.Element('Annotations') + return Annotations + +def xml_add_annotation(Annotations, xml_color, annotationID=None): # add new annotation + # add new Annotation to Annotations + # defualts to new annotationID + if annotationID == None: # not specified + annotationID = len(Annotations.findall('Annotation')) + 1 + Annotation = ET.SubElement(Annotations, 'Annotation', attrib={'Type': '4', 'Visible': '1', 'ReadOnly': '0', 'Incremental': '0', 'LineColorReadOnly': '0', 'LineColor': str(xml_color[annotationID-1]), 'Id': str(annotationID), 'NameReadOnly': '0'}) + Regions = ET.SubElement(Annotation, 'Regions') + return Annotations + +def xml_add_region(Annotations, pointList, annotationID=-1, regionID=None): # add new region to annotation + # add new Region to Annotation + # defualts to last annotationID and new regionID + Annotation = Annotations.find("Annotation[@Id='" + str(annotationID) + "']") + Regions = Annotation.find('Regions') + if regionID == None: # not specified + regionID = len(Regions.findall('Region')) + 1 + Region = ET.SubElement(Regions, 'Region', attrib={'NegativeROA': '0', 'ImageFocus': '-1', 'DisplayId': '1', 'InputRegionId': '0', 'Analyze': '0', 'Type': '0', 'Id': str(regionID)}) + Vertices = ET.SubElement(Region, 'Vertices') + for point in pointList: # add new Vertex + ET.SubElement(Vertices, 'Vertex', attrib={'X': str(point['X']), 'Y': str(point['Y']), 'Z': '0'}) + # add connecting point + ET.SubElement(Vertices, 'Vertex', attrib={'X': str(pointList[0]['X']), 'Y': str(pointList[0]['Y']), 'Z': '0'}) + return Annotations + +def xml_save(Annotations, filename): + xml_data = ET.tostring(Annotations, pretty_print=True) + #xml_data = Annotations.toprettyxml() + f = open(filename, 'w') + f.write(xml_data.decode()) + f.close() + +def read_xml(filename): + # import xml file + tree = ET.parse(filename) + root = tree.getroot() + +if __name__ == '__main__': + main() + \ No newline at end of file diff --git a/multic/segmentationschool/utils/xml_to_mask.py b/multic/segmentationschool/utils/xml_to_mask.py new file mode 100644 index 0000000..aeecd6a --- /dev/null +++ b/multic/segmentationschool/utils/xml_to_mask.py @@ -0,0 +1,219 @@ +import numpy as np +import lxml.etree as ET +import cv2 +import time +import os + +""" +location (tuple) - (x, y) tuple giving the top left pixel in the level 0 reference frame +size (tuple) - (width, height) tuple giving the region size | set to 'full' for entire mask +downsample - int giving the amount of downsampling done to the output pixel mask + +NOTE: if you plan to loop through xmls parallely, it is nessesary to run write_minmax_to_xml() + on all the files prior - to avoid conflicting file writes + +""" + +def xml_to_mask(xml_path, location, size, tree=None, downsample=1, verbose=0): + + # parse xml and get root + if tree == None: tree = ET.parse(xml_path) + root = tree.getroot() + + if size == 'full': + import math + size = write_minmax_to_xml(xml_path=xml_path, tree=tree, get_absolute_max=True) + size = (math.ceil(size[0]/downsample), math.ceil(size[1]/downsample)) + location = (0,0) + + # calculate region bounds + bounds = {'x_min' : location[0], 'y_min' : location[1], 'x_max' : location[0] + size[0]*downsample, 'y_max' : location[1] + size[1]*downsample} + + IDs = regions_in_mask(xml_path=xml_path, root=root, tree=tree, bounds=bounds, verbose=verbose) + + if verbose != 0: + print('\nFOUND: ' + str(len(IDs)) + ' regions') + + # find regions in bounds + Regions = get_vertex_points(root=root, IDs=IDs, verbose=verbose) + + # fill regions and create mask + mask = Regions_to_mask(Regions=Regions, bounds=bounds, IDs=IDs, downsample=downsample, verbose=verbose) + if verbose != 0: + print('done...\n') + + return mask + + +def regions_in_mask(xml_path, root, tree, bounds, verbose=1): + # find regions to save + IDs = [] + mtime = os.path.getmtime(xml_path) + + write_minmax_to_xml(xml_path, tree) + + for Annotation in root.findall("./Annotation"): # for all annotations + annotationID = Annotation.attrib['Id'] + + for Region in Annotation.findall("./*/Region"): # iterate on all region + + for Vert in Region.findall("./Vertices"): # iterate on all vertex in region + + # get minmax points + Xmin = np.int32(Vert.attrib['Xmin']) + Ymin = np.int32(Vert.attrib['Ymin']) + Xmax = np.int32(Vert.attrib['Xmax']) + Ymax = np.int32(Vert.attrib['Ymax']) + + # test minmax points in region bounds + if bounds['x_min'] <= Xmax and bounds['x_max'] >= Xmin and bounds['y_min'] <= Ymax and bounds['y_max'] >= Ymin: + # save region Id + IDs.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID}) + break + return IDs + +def get_vertex_points(root, IDs, verbose=1): + Regions = [] + + for ID in IDs: # for all IDs + + # get all vertex attributes (points) + Vertices = [] + + for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): + # make array of points + Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) + + Regions.append(np.array(Vertices)) + + return Regions + +def Regions_to_mask(Regions, bounds, IDs, downsample, verbose=1): + # downsample = int(np.round(downsample_factor**(.5))) + + if verbose !=0: + print('\nMAKING MASK:') + + if len(Regions) != 0: # regions present + # get min/max sizes + min_sizes = np.empty(shape=[2,0], dtype=np.int32) + max_sizes = np.empty(shape=[2,0], dtype=np.int32) + for Region in Regions: # fill all regions + min_bounds = np.reshape((np.amin(Region, axis=0)), (2,1)) + max_bounds = np.reshape((np.amax(Region, axis=0)), (2,1)) + min_sizes = np.append(min_sizes, min_bounds, axis=1) + max_sizes = np.append(max_sizes, max_bounds, axis=1) + min_size = np.amin(min_sizes, axis=1) + max_size = np.amax(max_sizes, axis=1) + + # add to old bounds + bounds['x_min_pad'] = min(min_size[1], bounds['x_min']) + bounds['y_min_pad'] = min(min_size[0], bounds['y_min']) + bounds['x_max_pad'] = max(max_size[1], bounds['x_max']) + bounds['y_max_pad'] = max(max_size[0], bounds['y_max']) + + # make blank mask + mask = np.zeros([ int(np.round((bounds['y_max_pad'] - bounds['y_min_pad']) / downsample)), int(np.round((bounds['x_max_pad'] - bounds['x_min_pad']) / downsample)) ], dtype=np.uint8) + + # fill mask polygons + index = 0 + for Region in Regions: + # reformat Regions + Region[:,1] = np.int32(np.round((Region[:,1] - bounds['y_min_pad']) / downsample)) + Region[:,0] = np.int32(np.round((Region[:,0] - bounds['x_min_pad']) / downsample)) + # get annotation ID for mask color + ID = IDs[index] + cv2.fillPoly(mask, [Region], int(ID['annotationID'])) + index = index + 1 + + # reshape mask + x_start = np.int32(np.round((bounds['x_min'] - bounds['x_min_pad']) / downsample)) + y_start = np.int32(np.round((bounds['y_min'] - bounds['y_min_pad']) / downsample)) + x_stop = np.int32(np.round((bounds['x_max'] - bounds['x_min_pad']) / downsample)) + y_stop = np.int32(np.round((bounds['y_max'] - bounds['y_min_pad']) / downsample)) + # pull center mask region + mask = mask[ y_start:y_stop, x_start:x_stop ] + + else: # no Regions + mask = np.zeros([ int(np.round((bounds['y_max'] - bounds['y_min']) / downsample)), int(np.round((bounds['x_max'] - bounds['x_min']) / downsample)) ], dtype=np.uint8) + + return mask + +def write_minmax_to_xml(xml_path, tree=None, time_buffer=10, get_absolute_max=False): + # function to write min and max verticies to each region + + # parse xml and get root + if tree == None: tree = ET.parse(xml_path) + root = tree.getroot() + + try: + if get_absolute_max: + # break the try statement + X_max = 0 + Y_max = 0 + raise ValueError + + # has the xml been modified to include minmax + modtime = np.float64(root.attrib['modtime']) + # has the minmax modified xml been changed? + assert os.path.getmtime(xml_path) < modtime + time_buffer + + except: + + for Annotation in root.findall("./Annotation"): # for all annotations + annotationID = Annotation.attrib['Id'] + + for Region in Annotation.findall("./*/Region"): # iterate on all region + + for Vert in Region.findall("./Vertices"): # iterate on all vertex in region + Xs = [] + Ys = [] + for Vertex in Vert.findall("./Vertex"): # iterate on all vertex in region + # get points + Xs.append(np.int32(np.float64(Vertex.attrib['X']))) + Ys.append(np.int32(np.float64(Vertex.attrib['Y']))) + + # find min and max points + Xs = np.array(Xs) + Ys = np.array(Ys) + + if get_absolute_max: + # get the biggest point in annotation + if Xs != [] and Ys != []: + X_max = max(X_max, np.max(Xs)) + Y_max = max(Y_max, np.max(Ys)) + + else: + # modify the xml + Vert.set("Xmin", "{}".format(np.min(Xs))) + Vert.set("Xmax", "{}".format(np.max(Xs))) + Vert.set("Ymin", "{}".format(np.min(Ys))) + Vert.set("Ymax", "{}".format(np.max(Ys))) + + if get_absolute_max: + # return annotation max point + return (X_max,Y_max) + + else: + # modify the xml with minmax region info + root.set("modtime", "{}".format(time.time())) + xml_data = ET.tostring(tree, pretty_print=True) + #xml_data = Annotations.toprettyxml() + f = open(xml_path, 'w') + f.write(xml_data.decode()) + f.close() + + +def get_num_classes(xml_path,ignore_label=None): + # parse xml and get root + tree = ET.parse(xml_path) + root = tree.getroot() + + annotation_num = 0 + for Annotation in root.findall("./Annotation"): # for all annotations + if ignore_label != None: + if not int(Annotation.attrib['Id']) == ignore_label: + annotation_num += 1 + else: annotation_num += 1 + + return annotation_num + 1 diff --git a/setup.py b/setup.py index fff61e6..b89b3c7 100644 --- a/setup.py +++ b/setup.py @@ -78,6 +78,7 @@ def prerelease_local_scheme(version): 'girder-client', # cli 'ctk-cli', + 'XlsxWriter' ], license='Apache Software License 2.0', keywords='multic', From 7501fdc0cdff00926c2b39ee14f1c86ed1505529 Mon Sep 17 00:00:00 2001 From: sayatmimar Date: Thu, 4 Apr 2024 10:45:01 -0400 Subject: [PATCH 2/8] fix xlsx save path --- .../ReferenceFeatureExtraction.xml | 4 ++-- .../Codes/extract_reference_features.py | 12 ++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml index 30e0924..19bc3ad 100644 --- a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml +++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml @@ -1,8 +1,8 @@ HistomicsTK - Multi Compartment Training - Trains Multi compartment segmentation model + Extract Extended Clinical Features + Extract reference features 0.1.0 https://github.com/SarderLab/Multi-Compartment-Segmentation Apache 2.0 diff --git a/multic/segmentationschool/Codes/extract_reference_features.py b/multic/segmentationschool/Codes/extract_reference_features.py index 2ca6eb5..549b4ed 100644 --- a/multic/segmentationschool/Codes/extract_reference_features.py +++ b/multic/segmentationschool/Codes/extract_reference_features.py @@ -40,9 +40,9 @@ def getKidneyReferenceFeatures(args): slide_item_id = item_dict[file_name] - #output_dir = args.base_dir + '/tmp' + output_dir = args.base_dir + '/' slide_name,slideExt=file_name.split('.') - xlsx_path = slide_name + '.xlsx' + xlsx_path = output_dir + 'ExtendedClinical' + '.xlsx' annotatedXMLs=[args.xml_path] for xml in annotatedXMLs: @@ -354,12 +354,8 @@ def getKidneyReferenceFeatures(args): worksheet6.write(idx+1,1,art[1]) worksheet6.write(idx+1,2,art[4]) - - try: - workbook.close(save_to=xlsx_path) - except: - print("An exception occurred") - + workbook.close() + gc.uploadFileToItem(slide_item_id, xlsx_path, reference=None, mimeType=None, filename=None, progressCallback=None) print('Done.') # exit() From 84cd08fb430ec11611d5c9a21777701cc191153b Mon Sep 17 00:00:00 2001 From: sayatmimar Date: Fri, 12 Apr 2024 10:42:04 -0400 Subject: [PATCH 3/8] process only target image --- .../ReferenceFeatureExtraction/ReferenceFeatureExtraction.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py index ef243d4..152cb7c 100644 --- a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py +++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py @@ -14,6 +14,7 @@ def main(args): folder = args.base_dir + wsi = args.input_file base_dir_id = folder.split('/')[-2] _ = os.system("printf '\nUsing data from girder_client Folder: {}\n'".format(folder)) @@ -32,6 +33,8 @@ def main(args): for file in files: slidename = file['name'] + if wsi.split('/')[-1] != slidename: + continue _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(slidename)) skipSlide = 0 From 081419486415bcea352af6cf108a954ef4ea2f81 Mon Sep 17 00:00:00 2001 From: sayatmimar Date: Mon, 22 Apr 2024 17:47:25 -0400 Subject: [PATCH 4/8] add coordinates --- .../ReferenceFeatureExtraction.py | 2 +- .../Codes/extract_reference_features.py | 165 ++++++++++++------ .../segmentationschool/segmentation_school.py | 8 +- 3 files changed, 118 insertions(+), 57 deletions(-) diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py index 152cb7c..75cd230 100644 --- a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py +++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py @@ -113,7 +113,7 @@ def main(args): - cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --base_dir {} --files {} --xml_path {} --girderApiUrl {} --girderToken {}".format('get_features', args.base_dir, args.input_file, xml_path, args.girderApiUrl, args.girderToken) + cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --base_dir {} --files {} --xml_path {} --platform {} --girderApiUrl {} --girderToken {}".format('get_features', args.base_dir, args.input_file, xml_path, 'DSA', args.girderApiUrl, args.girderToken) print(cmd) sys.stdout.flush() os.system(cmd) diff --git a/multic/segmentationschool/Codes/extract_reference_features.py b/multic/segmentationschool/Codes/extract_reference_features.py index 549b4ed..decc38b 100644 --- a/multic/segmentationschool/Codes/extract_reference_features.py +++ b/multic/segmentationschool/Codes/extract_reference_features.py @@ -2,7 +2,7 @@ import numpy as np import lxml.etree as ET -import girder_client + from matplotlib import path from skimage.color import rgb2lab,rgb2hsv @@ -22,45 +22,53 @@ def getKidneyReferenceFeatures(args): + folder = args.base_dir + # assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml' # assert args.wsis is not None, 'Directory of WSIs must be specified, use --wsis /path/to/wsis' + if args.platform == 'DSA': + import girder_client + gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) + gc.setToken(args.girderToken) - gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) - gc.setToken(args.girderToken) + girder_folder_id = folder.split('/')[-2] + file_name = args.files.split('/')[-1] + files = list(gc.listItem(girder_folder_id)) + item_dict = dict() - folder = args.base_dir - girder_folder_id = folder.split('/')[-2] - file_name = args.files.split('/')[-1] - files = list(gc.listItem(girder_folder_id)) - item_dict = dict() + for file in files: + d = {file['name']: file['_id']} + item_dict.update(d) + + slide_item_id = item_dict[file_name] + output_dir = args.base_dir + slide_name,slideExt=file_name.split('.') + items=[(args.files, args.xml_path)] - for file in files: - d = {file['name']: file['_id']} - item_dict.update(d) + elif args.platform == 'HPG': + image_files = [image_name for _, _, files in os.walk(folder) for image_name in files if image_name.endswith('.xml')] + image_names = [os.path.join(folder, f.split('.')[0]) for f in image_files] + slideExt = args.ext + output_dir = args.output_dir + # each item in items is a tuple of (images, annotations) + items = [(f + slideExt, f + '.xml') for f in image_names] - slide_item_id = item_dict[file_name] + else: + raise Exception("Please Enter a valid Platform, DSA or HPG") - output_dir = args.base_dir + '/' - slide_name,slideExt=file_name.split('.') - xlsx_path = output_dir + 'ExtendedClinical' + '.xlsx' + for i in range(len(items)): - annotatedXMLs=[args.xml_path] - for xml in annotatedXMLs: - # print(xml,end='\r',flush=True) + svsfile, xmlfile = items[i] - print(xml,'here') - write_minmax_to_xml(xml) - # for ext in args.wsi_ext.split(','): + print(xmlfile,'here') + write_minmax_to_xml(xmlfile) - # if os.path.isfile(os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext))): - # wsi=os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext)) - # break slideExt=file_name.split('.')[-1] all_contours = {'1':[],'2':[],'3':[],'4':[],'5':[],'6':[]} # cortex medulla glomeruli scl_glomeruli tubules arteries(ioles) - tree = ET.parse(xml) + tree = ET.parse(xmlfile) root = tree.getroot() - basename=os.path.splitext(xml)[0] + basename=os.path.splitext(xmlfile)[0] for Annotation in root.findall("./Annotation"): # for all annotations annotationID = Annotation.attrib['Id'] if annotationID not in ['1','2','3','4','5','6']: @@ -79,7 +87,7 @@ def getKidneyReferenceFeatures(args): cortexarea=0 medullaarea=0 - slide=TiffSlide(args.files) + slide=TiffSlide(svsfile) if slideExt =='.scn': dim_x=int(slide.properties['tiffslide.bounds-width'])## add to columns dim_y=int(slide.properties['tiffslide.bounds-height'])## add to rows @@ -142,6 +150,7 @@ def getKidneyReferenceFeatures(args): medulla_path=None pseudocortexarea=total_tissue_area-medullaarea # + xlsx_path = os.path.join(output_dir, os.path.basename(svsfile).split('.')[0] +'_extended_clinical'+'.xlsx') workbook=xlsxwriter.Workbook(xlsx_path) worksheet1 = workbook.add_worksheet('Summary') worksheet2 = workbook.add_worksheet('Interstitium') @@ -175,28 +184,28 @@ def getKidneyReferenceFeatures(args): art_features=Parallel(n_jobs=cores)(delayed(points_to_features_art)(points, args,args.min_size[5],cortex_path,medulla_path,args.files,MOD) for points in tqdm(all_contours['6'],colour='magenta',unit='Artery(-iole)',leave=False)) print('Generating output file..') - glom_features=[i for i in glom_features if i is not None] - sglom_features=[i for i in sglom_features if i is not None] - tub_features=[i for i in tub_features if i is not None] - art_features=[i for i in art_features if i is not None] + glom_features=np.array([i for i in glom_features if i is not None]) + sglom_features=np.array([i for i in sglom_features if i is not None]) + tub_features=np.array([i for i in tub_features if i is not None]) + art_features=np.array([i for i in art_features if i is not None]) # gloms_features=[i for i in glom_features if i[0]>args.min_sizes[2]] # sglom_features=[i for i in sglom_features if i[0]>args.min_sizes[3]] # cortexgloms=[i for i in glom_features if not i[3]] - cortextubs=[i for i in tub_features if not i[3]] - cortexarts=[i for i in art_features if not i[3]] + cortextubs=np.array([i for i in tub_features if not i[3]]) + cortexarts=np.array([i for i in art_features if not i[3]]) - medullatubs=[i for i in tub_features if i[3]] - medullaarts=[i for i in art_features if i[3]] + medullatubs=np.array([i for i in tub_features if i[3]]) + medullaarts=np.array([i for i in art_features if i[3]]) if pseudocortexarea>0: cortex_glom_area=np.sum(np.array(glom_features)[:,0]) cortex_glom_density=float(cortex_glom_area)/float(pseudocortexarea) - cortex_tub_area=np.sum(np.array(cortextubs)[:,0]) + cortex_tub_area=np.sum(cortextubs[:,0]) cortex_tub_density=float(cortex_tub_area)/float(pseudocortexarea) - cortex_art_area=np.sum(np.array(cortexarts)[:,0]) + cortex_art_area=np.sum(cortexarts[:,0]) cortex_art_density=float(cortex_art_area)/float(pseudocortexarea) # downsample_cortex=get_downsample_cortex(args,all_contours['1']) # exit() @@ -208,9 +217,9 @@ def getKidneyReferenceFeatures(args): cortex_art_density=None if medullaarea>0 and len(medullatubs)>0: - medulla_tub_area=np.sum(np.array(medullatubs)[:,0]) + medulla_tub_area=np.sum(medullatubs[:,0]) if len(medullaarts)>0: - medulla_art_area=np.sum(np.array(medullaarts)[:,0]) + medulla_art_area=np.sum(medullaarts[:,0]) medulla_art_density=float(medulla_art_area)/float(medullaarea) else: medulla_art_density=None @@ -218,11 +227,7 @@ def getKidneyReferenceFeatures(args): else: medulla_tub_density=None medulla_art_density=None - glom_features=np.array(glom_features) - sglom_features=np.array(sglom_features) - tub_features=np.array(tub_features) - art_features=np.array(art_features) - cortexarts=np.array(cortexarts) + worksheet1.write(0,0,'Glomerular density - count:') worksheet1.write(0,1,len(glom_features)/pseudocortexarea) worksheet1.write(1,0,'Average glomerular area:') @@ -265,11 +270,16 @@ def getKidneyReferenceFeatures(args): worksheet1.write(13,1,np.mean(cortextubs[:,1])) worksheet1.write(14,1,np.std(cortextubs[:,1])) if medullaarea>0: - medullatubs=np.array(medullatubs) - worksheet1.write(15,1,np.mean(medullatubs[:,0])) - worksheet1.write(16,1,np.std(medullatubs[:,0])) - worksheet1.write(17,1,np.mean(medullatubs[:,1])) - worksheet1.write(18,1,np.std(medullatubs[:,1])) + if len(medullatubs)>0: + worksheet1.write(15,1,np.mean(medullatubs[:,0])) + worksheet1.write(16,1,np.std(medullatubs[:,0])) + worksheet1.write(17,1,np.mean(medullatubs[:,1])) + worksheet1.write(18,1,np.std(medullatubs[:,1])) + else: + worksheet1.write(15,1,0) + worksheet1.write(16,1,0) + worksheet1.write(17,1,0) + worksheet1.write(18,1,0) worksheet1.write(19,0,'Cortical arterial(olar) density') worksheet1.write(19,1,len(cortexarts)/pseudocortexarea) @@ -316,24 +326,57 @@ def getKidneyReferenceFeatures(args): worksheet1.write(33,1,len(glom_features)) worksheet1.write(34,0,'sGlomerulus count') worksheet1.write(34,1,len(sglom_features)) + worksheet3.write(0,0,'Area') worksheet3.write(0,1,'Radius') + worksheet3.write(0,2,'x1') + worksheet3.write(0,3,'x2') + worksheet3.write(0,4,'y1') + worksheet3.write(0,5,'y2') + for idx,glom in enumerate(glom_features): worksheet3.write(idx+1,0,glom[0]) worksheet3.write(idx+1,1,glom[1]) + worksheet3.write(idx+1,2,glom[4]) + worksheet3.write(idx+1,3,glom[5]) + worksheet3.write(idx+1,4,glom[6]) + worksheet3.write(idx+1,5,glom[7]) + + worksheet4.write(0,0,'Area') worksheet4.write(0,1,'Radius') + + worksheet4.write(0,2,'x1') + worksheet4.write(0,3,'x2') + worksheet4.write(0,4,'y1') + worksheet4.write(0,5,'y2') + for idx,sglom in enumerate(sglom_features): worksheet4.write(idx+1,0,sglom[0]) worksheet4.write(idx+1,1,sglom[1]) + worksheet4.write(idx+1,2,sglom[4]) + worksheet4.write(idx+1,3,sglom[5]) + worksheet4.write(idx+1,4,sglom[6]) + worksheet4.write(idx+1,5,sglom[7]) + worksheet5.write(0,0,'Area') worksheet5.write(0,1,'Radius') + + worksheet5.write(0,6,'x1') + worksheet5.write(0,7,'x2') + worksheet5.write(0,8,'y1') + worksheet5.write(0,9,'y2') for idx,tub in enumerate(tub_features): worksheet5.write(idx+1,0,tub[0]) worksheet5.write(idx+1,1,tub[1]) + worksheet5.write(idx+1,6,tub[4]) + worksheet5.write(idx+1,7,tub[5]) + worksheet5.write(idx+1,8,tub[6]) + worksheet5.write(idx+1,9,tub[7]) + worksheet5.write(0,2,'Cortical areas') worksheet5.write(0,3,'Cortical radii') for idx,tub in enumerate(cortextubs): @@ -349,14 +392,26 @@ def getKidneyReferenceFeatures(args): worksheet6.write(0,0,'Area') worksheet6.write(0,1,'Radius') worksheet6.write(0,2,'Luminal ratio') + + worksheet6.write(0,3,'x1') + worksheet6.write(0,4,'x2') + worksheet6.write(0,5,'y1') + worksheet6.write(0,6,'y2') for idx,art in enumerate(art_features): worksheet6.write(idx+1,0,art[0]) worksheet6.write(idx+1,1,art[1]) worksheet6.write(idx+1,2,art[4]) + worksheet6.write(idx+1,3,art[5]) + worksheet6.write(idx+1,4,art[6]) + worksheet6.write(idx+1,5,art[7]) + worksheet6.write(idx+1,6,art[8]) + workbook.close() - - gc.uploadFileToItem(slide_item_id, xlsx_path, reference=None, mimeType=None, filename=None, progressCallback=None) + if args.platform == 'DSA': + gc.uploadFileToItem(slide_item_id, xlsx_path, reference=None, mimeType=None, filename=None, progressCallback=None) + print('Girder file uploaded!') + print('Done.') # exit() # Parallel(n_jobs=num_cores, backend='threading')(delayed(chop_wsi)(, choppable_regions=choppable_regions) for idxx, j in enumerate(index_x)) @@ -770,7 +825,7 @@ def points_to_features_glom(points,args,min_size,cortex,medulla): dist=distance_transform_edt(binary_mask) - return [a,np.max(dist),None,containedmedulla] + return [a,np.max(dist),None,containedmedulla,yMin,yMax,xMin,xMax] def points_to_features_tub(points,args,min_size,cortex,medulla): a=cv2.contourArea(points) @@ -790,7 +845,7 @@ def points_to_features_tub(points,args,min_size,cortex,medulla): binary_mask=cv2.fillPoly(binary_mask,[points],1) dist=distance_transform_edt(binary_mask) - return [a,np.max(dist),None,containedmedulla] + return [a,np.max(dist),None,containedmedulla,yMin,yMax,xMin,xMax] def points_to_features_art(points,args,min_size,cortex,medulla,wsi,MOD): @@ -809,7 +864,7 @@ def points_to_features_art(points,args,min_size,cortex,medulla,wsi,MOD): image=np.array(TiffSlide(wsi).read_region((xMin,yMin),0,(xMax-xMin,yMax-yMin)))[:,:,:3] if xMax-xMin>5000 or yMax-yMin>5000: - return [a,None,None,containedmedulla,None] + return [a,None,None,containedmedulla,None, yMin,yMax,xMin,xMax] binary_mask=np.zeros((int(yMax-yMin),int(xMax-xMin))) points[:,0]-=xMin points[:,1]-=yMin @@ -863,7 +918,7 @@ def points_to_features_art(points,args,min_size,cortex,medulla,wsi,MOD): # # return [a,distMax*2,containedcortex,containedmedulla,(np.max(WSdist)*2)/(distMax*2)] # else: - return [a,distMax,None,containedmedulla,np.max(WSdist)/distMax] + return [a,distMax,None,containedmedulla,np.max(WSdist)/distMax,yMin,yMax,xMin,xMax] diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py index 721b628..ef78d67 100644 --- a/multic/segmentationschool/segmentation_school.py +++ b/multic/segmentationschool/segmentation_school.py @@ -134,7 +134,7 @@ def savetime(args, starttime): help='directory with xml transformation targets') parser.add_argument('--cortextarget', dest='cortextarget', default=None,type=str, help='directory with cortex annotations for splicing') - parser.add_argument('--output', dest='output', default=None,type=str, + parser.add_argument('--output_dir', dest='output_dir', default=None,type=str, help='directory to save output transformed XMLs') parser.add_argument('--wsis', dest='wsis', default=None,type=str, help='directory of WSIs for reference feature extraction') @@ -284,6 +284,12 @@ def savetime(args, starttime): parser.add_argument('--xml_path', dest='xml_path', default=' ' ,type=str, help='path to xml file') + parser.add_argument('--ext', dest='ext', default='.svs' ,type=str, + help='file extention') + + parser.add_argument('--platform', dest='platform', default='DSA' ,type=str, + help='Run Platform, HPG or DSA') + From 3d9ff20f4d906c58be4764ea5015fa8c67471677 Mon Sep 17 00:00:00 2001 From: sayatmimar Date: Tue, 7 May 2024 10:33:17 -0400 Subject: [PATCH 5/8] fix minor issues --- multic/segmentationschool/Codes/extract_reference_features.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/multic/segmentationschool/Codes/extract_reference_features.py b/multic/segmentationschool/Codes/extract_reference_features.py index decc38b..388c76a 100644 --- a/multic/segmentationschool/Codes/extract_reference_features.py +++ b/multic/segmentationschool/Codes/extract_reference_features.py @@ -63,7 +63,7 @@ def getKidneyReferenceFeatures(args): print(xmlfile,'here') write_minmax_to_xml(xmlfile) - slideExt=file_name.split('.')[-1] + all_contours = {'1':[],'2':[],'3':[],'4':[],'5':[],'6':[]} # cortex medulla glomeruli scl_glomeruli tubules arteries(ioles) tree = ET.parse(xmlfile) @@ -182,7 +182,7 @@ def getKidneyReferenceFeatures(args): args,args.min_size[4],cortex_path,medulla_path) for points in tqdm(all_contours['5'],colour='blue',unit='Tubule',leave=False)) art_features=Parallel(n_jobs=cores)(delayed(points_to_features_art)(points, - args,args.min_size[5],cortex_path,medulla_path,args.files,MOD) for points in tqdm(all_contours['6'],colour='magenta',unit='Artery(-iole)',leave=False)) + args,args.min_size[5],cortex_path,medulla_path,svsfile,MOD) for points in tqdm(all_contours['6'],colour='magenta',unit='Artery(-iole)',leave=False)) print('Generating output file..') glom_features=np.array([i for i in glom_features if i is not None]) sglom_features=np.array([i for i in sglom_features if i is not None]) From 95ac0f41d6c8e5d45e93a60e6ac09d564e85c85a Mon Sep 17 00:00:00 2001 From: sayatmimar Date: Thu, 9 May 2024 14:21:18 -0400 Subject: [PATCH 6/8] add in medulla column and reorganize --- .../ReferenceFeatureExtraction.py | 111 +---- .../Codes/extract_reference_features.py | 456 +----------------- .../segmentationschool/segmentation_school.py | 9 +- .../segmentationschool/utils/json_to_xml.py | 66 +++ .../segmentationschool/utils/xml_to_mask.py | 219 --------- 5 files changed, 118 insertions(+), 743 deletions(-) create mode 100644 multic/segmentationschool/utils/json_to_xml.py delete mode 100644 multic/segmentationschool/utils/xml_to_mask.py diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py index 75cd230..0fb06f8 100644 --- a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py +++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py @@ -5,8 +5,7 @@ from ctk_cli import CLIArgumentParser sys.path.append("..") -from segmentationschool.utils.mask_to_xml import xml_create, xml_add_annotation, xml_add_region, xml_save -from segmentationschool.utils.xml_to_mask import write_minmax_to_xml +from segmentationschool.utils.json_to_xml import get_xml_path NAMES = ['cortical_interstitium','medullary_interstitium','non_globally_sclerotic_glomeruli','globally_sclerotic_glomeruli','tubules','arteries/arterioles'] @@ -15,105 +14,41 @@ def main(args): folder = args.base_dir wsi = args.input_file + file_name = wsi.split('/')[-1] base_dir_id = folder.split('/')[-2] _ = os.system("printf '\nUsing data from girder_client Folder: {}\n'".format(folder)) gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) gc.setToken(args.girderToken) # get files in folder - files = gc.listItem(base_dir_id) - xml_color=[65280]*(len(NAMES)+1) + files = list(gc.listItem(base_dir_id)) + # dict to link filename to gc id + item_dict = dict() + for file in files: + d = {file['name']:file['_id']} + item_dict.update(d) + + file_id = item_dict[file_name] + cwd = os.getcwd() print(cwd) os.chdir(cwd) - tmp = folder + _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(file_name)) + # get annotation + annotations= gc.get('/annotation/item/{}'.format(file_id), parameters={'sort': 'updated'}) + annotations.reverse() + annotations = list(annotations) - slides_used = [] - - for file in files: - slidename = file['name'] - if wsi.split('/')[-1] != slidename: - continue - _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(slidename)) - skipSlide = 0 - - # get annotation - item = gc.getItem(file['_id']) - annot = gc.get('/annotation/item/{}'.format(item['_id']), parameters={'sort': 'updated'}) - annot.reverse() - annot = list(annot) - _ = os.system("printf '\tfound [{}] annotation layers...\n'".format(len(annot))) - - # create root for xml file - xmlAnnot = xml_create() - - # all compartments - for class_,compart in enumerate(NAMES): - - compart = compart.replace(' ','') - class_ +=1 - # add layer to xml - xmlAnnot = xml_add_annotation(Annotations=xmlAnnot, xml_color=xml_color, annotationID=class_) - - # test all annotation layers in order created - for iter,a in enumerate(annot): - - - try: - # check for annotation layer by name - a_name = a['annotation']['name'].replace(' ','') - except: - a_name = None - - if a_name == compart: - # track all layers present - skipSlide +=1 - - pointsList = [] - - # load json data - _ = os.system("printf '\tloading annotation layer: [{}]\n'".format(compart)) - - a_data = a['annotation']['elements'] - - for data in a_data: - pointList = [] - points = data['points'] - for point in points: - pt_dict = {'X': round(point[0]), 'Y': round(point[1])} - pointList.append(pt_dict) - pointsList.append(pointList) - - # write annotations to xml - for i in range(len(pointsList)): - pointList = pointsList[i] - xmlAnnot = xml_add_region(Annotations=xmlAnnot, pointList=pointList, annotationID=class_) - - # print(a['_version'], a['updated'], a['created']) - break - - if skipSlide != len(NAMES): - _ = os.system("printf '\tThis slide is missing annotation layers\n'") - _ = os.system("printf '\tSKIPPING SLIDE...\n'") - del xmlAnnot - continue # correct layers not present - _ = os.system("printf '\tFETCHING SLIDE...\n'") - os.rename('{}/{}'.format(folder, slidename), '{}/{}'.format(tmp, slidename)) - slides_used.append(slidename) - - xml_path = '{}/{}.xml'.format(tmp, os.path.splitext(slidename)[0]) - _ = os.system("printf '\tsaving a created xml annotation file: [{}]\n'".format(xml_path)) - xml_save(Annotations=xmlAnnot, filename=xml_path) - write_minmax_to_xml(xml_path) # to avoid trying to write to the xml from multiple workers - del xmlAnnot - os.system("ls -lh '{}'".format(tmp)) - + annotations_filtered = [annot for annot in annotations if annot['annotation']['name'].strip() in NAMES] + _ = os.system("printf '\tfound [{}] annotation layers...\n'".format(len(annotations_filtered))) + del annotations + # create root for xml file + + xml_path = get_xml_path(annotations_filtered, NAMES, tmp, file_name) _ = os.system("printf '\ndone retriving data...\n\n'") - - - cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --base_dir {} --files {} --xml_path {} --platform {} --girderApiUrl {} --girderToken {}".format('get_features', args.base_dir, args.input_file, xml_path, 'DSA', args.girderApiUrl, args.girderToken) + cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --base_dir {} --file {} --xml_path {} --platform {} --item_id {} --girderApiUrl {} --girderToken {}".format('get_features', args.base_dir, args.input_file, xml_path, 'DSA',file_id, args.girderApiUrl, args.girderToken) print(cmd) sys.stdout.flush() os.system(cmd) diff --git a/multic/segmentationschool/Codes/extract_reference_features.py b/multic/segmentationschool/Codes/extract_reference_features.py index 388c76a..3f048d1 100644 --- a/multic/segmentationschool/Codes/extract_reference_features.py +++ b/multic/segmentationschool/Codes/extract_reference_features.py @@ -31,19 +31,11 @@ def getKidneyReferenceFeatures(args): gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) gc.setToken(args.girderToken) - girder_folder_id = folder.split('/')[-2] - file_name = args.files.split('/')[-1] - files = list(gc.listItem(girder_folder_id)) - item_dict = dict() - - for file in files: - d = {file['name']: file['_id']} - item_dict.update(d) - - slide_item_id = item_dict[file_name] + file_name = args.file.split('/')[-1] + slide_item_id = args.item_id output_dir = args.base_dir slide_name,slideExt=file_name.split('.') - items=[(args.files, args.xml_path)] + items=[(args.file, args.xml_path)] elif args.platform == 'HPG': image_files = [image_name for _, _, files in os.walk(folder) for image_name in files if image_name.endswith('.xml')] @@ -327,8 +319,8 @@ def getKidneyReferenceFeatures(args): worksheet1.write(34,0,'sGlomerulus count') worksheet1.write(34,1,len(sglom_features)) - worksheet3.write(0,0,'Area') - worksheet3.write(0,1,'Radius') + worksheet3.write(0,0,'Area (pixel^2)') + worksheet3.write(0,1,'Radius (pixel)') worksheet3.write(0,2,'x1') worksheet3.write(0,3,'x2') worksheet3.write(0,4,'y1') @@ -344,8 +336,8 @@ def getKidneyReferenceFeatures(args): worksheet3.write(idx+1,5,glom[7]) - worksheet4.write(0,0,'Area') - worksheet4.write(0,1,'Radius') + worksheet4.write(0,0,'Area (pixel^2)') + worksheet4.write(0,1,'Radius (pixel)') worksheet4.write(0,2,'x1') worksheet4.write(0,3,'x2') @@ -361,36 +353,26 @@ def getKidneyReferenceFeatures(args): worksheet4.write(idx+1,4,sglom[6]) worksheet4.write(idx+1,5,sglom[7]) - worksheet5.write(0,0,'Area') - worksheet5.write(0,1,'Radius') + worksheet5.write(0,0,'Area (pixel^2)') + worksheet5.write(0,1,'Radius (pixel)') + worksheet5.write(0,2,'In Medulla') - worksheet5.write(0,6,'x1') - worksheet5.write(0,7,'x2') - worksheet5.write(0,8,'y1') - worksheet5.write(0,9,'y2') + worksheet5.write(0,3,'x1') + worksheet5.write(0,4,'x2') + worksheet5.write(0,5,'y1') + worksheet5.write(0,6,'y2') for idx,tub in enumerate(tub_features): worksheet5.write(idx+1,0,tub[0]) worksheet5.write(idx+1,1,tub[1]) + worksheet5.write(idx+1,2,tub[3]) - worksheet5.write(idx+1,6,tub[4]) - worksheet5.write(idx+1,7,tub[5]) - worksheet5.write(idx+1,8,tub[6]) - worksheet5.write(idx+1,9,tub[7]) - - worksheet5.write(0,2,'Cortical areas') - worksheet5.write(0,3,'Cortical radii') - for idx,tub in enumerate(cortextubs): - worksheet5.write(idx+1,2,tub[0]) - worksheet5.write(idx+1,3,tub[1]) - - worksheet5.write(0,4,'Medullary areas') - worksheet5.write(0,5,'Medullary radii') - for idx,tub in enumerate(medullatubs): - worksheet5.write(idx+1,4,tub[0]) - worksheet5.write(idx+1,5,tub[1]) - - worksheet6.write(0,0,'Area') - worksheet6.write(0,1,'Radius') + worksheet5.write(idx+1,3,tub[4]) + worksheet5.write(idx+1,4,tub[5]) + worksheet5.write(idx+1,5,tub[6]) + worksheet5.write(idx+1,6,tub[7]) + + worksheet6.write(0,0,'Area (pixel^2)') + worksheet6.write(0,1,'Radius (pixel)') worksheet6.write(0,2,'Luminal ratio') worksheet6.write(0,3,'x1') @@ -413,399 +395,7 @@ def getKidneyReferenceFeatures(args): print('Girder file uploaded!') print('Done.') - # exit() - # Parallel(n_jobs=num_cores, backend='threading')(delayed(chop_wsi)(, choppable_regions=choppable_regions) for idxx, j in enumerate(index_x)) - - -# def summarizeKidneyReferenceFeatures(args): -# #assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml' -# assert args.SummaryOption is not None, 'You must specify what type of summary is required with --SummaryOption' -# assert args.patientData is not None, 'You must provide patient metadata xlsx file with --patientData' -# if args.SummaryOption in ['ULDensity']: -# ULDensity(args) -# elif args.SummaryOption in ['BLDensity']: -# BLDensity(args) -# elif args.SummaryOption in ['standardScatter']: -# standardScatter(args) -# elif args.SummaryOption in ['anchorScatter']: -# anchorScatter(args) -# elif args.SummaryOption in ['aggregate']: -# aggregate(args) -# elif args.SummaryOption in ['JoyPlot']: -# JoyPlot(args) -# else: -# print('Incorrect SummaryOption') - -# def anchorScatter(args): -# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn,args.anchor],index_col=None) -# patientMetrics={} -# for idx,patientID in enumerate(patientData[args.IDColumn]): -# patientMetrics[patientID]=[patientData[args.labelColumns][idx],patientData[args.anchor][idx]] - -# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) -# clinical_legend=[] -# clinical_anchor=[] -# for datafile in datafiles: -# patientID=os.path.splitext(datafile.split('/')[-1])[0] -# clinical_legend.append(str(patientMetrics[patientID][0])) -# clinical_anchor.append(patientMetrics[patientID][1]) - -# sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] -# sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] -# sortedPatientAnchor=[x for x,_ in sorted(zip(clinical_anchor,datafiles))] -# f1,f2=[int(i) for i in args.scatterFeatures.split(',')] -# # f1=5 -# # f2=7 - -# temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0) -# index = temp.index -# xlabel=index[f1-1] -# ylabel=args.anchor - -# popIdx=[] -# scatter_features=[] -# for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')): -# features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0)) -# # print(np.array(features)) -# if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]): -# scatter_features.append([features[f1-1][0],features[f2-1][0]]) -# else: -# popIdx.append(idx) -# for p in popIdx[::-1]: -# sortedPatientLegend.pop(p) -# sortedPatientAnchor.pop(p) -# scatter_features=np.array(scatter_features) - -# sns.scatterplot(x=scatter_features[:,0],y=sortedPatientAnchor,hue=sortedPatientLegend,palette='viridis',legend='auto') -# plt.xlabel(xlabel) -# plt.ylabel(ylabel) -# plt.legend(title=args.labelColumns) -# plt.show() -# def standardScatter(args): -# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) -# patientMetrics={} -# assert args.labelColumns in ['Age','Cr','Sex'], 'Label column must be Age, Cr, or Sex for standard scatter' -# if args.labelColumns=='Age': -# labelBins=[0,10,20,30,40,50,60,70,80,90,100] -# elif args.labelColumns=='Cr': -# labelBins=[0,.5,.6,.7,.8,.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2] -# elif args.labelColumns=='Sex': -# labelBins=None - -# for idx,patientID in enumerate(patientData[args.IDColumn]): -# patientMetrics[patientID]=patientData[args.labelColumns][idx] - -# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) -# clinical_legend=[] -# for datafile in datafiles: -# patientID=os.path.splitext(datafile.split('/')[-1])[0] -# clinical_legend.append(str(patientMetrics[patientID])) - -# sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] -# sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] -# print(sortedPatientLegend) -# f1,f2=[int(i) for i in args.scatterFeatures.split(',')] -# # f1=5 -# # f2=7 - -# temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0) -# index = temp.index -# xlabel=index[f1-1] -# ylabel=index[f2-1] - -# popIdx=[] -# scatter_features=[] -# for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')): -# features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0)) -# # print(np.array(features)) -# if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]): -# scatter_features.append([features[f1-1][0],features[f2-1][0]]) -# else: -# popIdx.append(idx) -# for p in popIdx[::-1]: -# sortedPatientLegend.pop(p) -# sortedPatientLegend=np.array(sortedPatientLegend) - -# scatter_features=np.array(scatter_features) - -# sns.scatterplot(x=scatter_features[:,0],y=scatter_features[:,1],hue=sortedPatientLegend,palette='viridis') -# plt.xlabel(xlabel) -# plt.ylabel(ylabel) -# plt.legend(sortedPatientLegend,title=args.labelColumns) -# plt.show() - -# def BLDensity(args): -# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None) -# patientMetrics={} -# for idx,patientID in enumerate(patientData[args.IDColumn]): -# patientMetrics[patientID]=patientData[args.labelColumns][idx] - -# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) -# clinical_legend=[] -# for datafile in datafiles: -# patientID=os.path.splitext(datafile.split('/')[-1])[0] -# clinical_legend.append(str(patientMetrics[patientID])) - -# sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] -# sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] - -# fig,ax=plt.subplots() -# plt.xlabel('Cortical tubular diameter (µm)') -# plasma = cm.get_cmap('plasma',len(sortedPatientOrder)) -# popIdx=[] -# for idx,datafile in enumerate(tqdm(sortedPatientOrder)): -# tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii','Cortical areas'],index_col=None).dropna() -# if len(tubule_features['Cortical radii'])>0: -# # xl1=np.percentile(tubule_features['Cortical radii']*0.25,0.01) -# xl2=np.percentile(tubule_features['Cortical radii']*0.25,99) -# # yl1=np.percentile(tubule_features['Cortical areas']*0.25*0.25,0.01) -# yl2=np.percentile(tubule_features['Cortical areas']*0.25*0.25,99) -# # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill) -# sns.kdeplot(x=tubule_features['Cortical radii']*0.25,y=tubule_features['Cortical areas']*0.25*0.25, -# color=plasma(idx),ax=ax,clip=[[None,xl2],[None,yl2]]) -# else: -# popIdx.append(idx) -# for p in popIdx[::-1]: -# sortedPatientLegend.pop(p) -# plt.legend(sortedPatientLegend,title=args.labelColumns) -# plt.show() - -# def ULDensity(args): - -# if args.labelModality=='Continuous': -# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) -# patientMetrics={} -# # print(patientData) -# for idx,patientID in enumerate(patientData[args.IDColumn]): -# patientMetrics[patientID]=patientData[args.labelColumns][idx] - -# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) -# clinical_legend=[] -# for datafile in datafiles: -# patientID=os.path.splitext(datafile.split('/')[-1])[0] -# clinical_legend.append(str(patientMetrics[patientID])) - -# sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] -# sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] - -# fig,ax=plt.subplots() -# plt.xlabel('Cortical tubular diameter (µm)') -# plasma = cm.get_cmap('plasma',len(sortedPatientOrder)) -# popIdx=[] -# all_tubules=[] -# featurename='Cortical radii' -# sheetname='Tubules' -# for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')): -# tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna() -# if len(tubule_features[featurename])>0: -# l1=np.percentile(tubule_features[featurename]*0.25,0.05) -# l2=np.percentile(tubule_features[featurename]*0.25,99.5) -# sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1) -# # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1) -# all_tubules.extend(np.array(tubule_features[featurename]*0.25)) -# else: -# popIdx.append(idx) -# for p in popIdx[::-1]: -# sortedPatientLegend.pop(p) - -# plt.legend(sortedPatientLegend,title=args.labelColumns) -# plt.title('Cumulative distributions per patient') -# # plt.colorbar() -# plt.show() -# # print(len(all_tubules)) -# # fig,ax=plt.subplots() -# # plt.xlabel('Cortical tubular diameter (µm)') -# # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill) -# # plt.title('Cumulative distribution for dataset') -# # plt.show() -# elif args.labelModality=='Categorical': -# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) -# patientMetrics={} -# for idx,patientID in enumerate(patientData[args.IDColumn]): -# patientMetrics[patientID]=patientData[args.labelColumns][idx] - -# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) -# clinical_legend=[] -# for datafile in datafiles: -# patientID=os.path.splitext(datafile.split('/')[-1])[0] - -# clinical_legend.append(str(patientMetrics[patientID])) - -# fig,ax=plt.subplots() -# plt.xlabel('Cortical tubular diameter (µm)') -# plasma = cm.get_cmap('plasma',len(np.unique(clinical_legend))+1) -# labelMapper={} -# categories=np.unique(clinical_legend) -# for idx,l in enumerate(categories): -# labelMapper[l]=idx -# popIdx=[] -# all_tubules=[] -# featurename='Radius' -# sheetname='Glomeruli' -# for idx,datafile in enumerate(tqdm(datafiles,colour='red')): -# tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna() -# if len(tubule_features[featurename])>0: -# l1=np.percentile(tubule_features[featurename]*0.25,0.01) -# l2=np.percentile(tubule_features[featurename]*0.25,99.9) - -# pcolor=plasma(labelMapper[clinical_legend[idx]]) -# # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5) -# sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5) - -# all_tubules.extend(np.array(tubule_features[featurename]*0.25)) -# else: -# popIdx.append(idx) -# for p in popIdx[::-1]: -# clinical_legend.pop(p) -# plt.legend(clinical_legend,title=args.labelColumns) -# plt.title('Cumulative distributions per patient') -# plt.show() -# # fig,ax=plt.subplots() -# # plt.xlabel('Cortical tubular diameter (µm)') -# # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill) -# # plt.title('Cumulative distribution for dataset') -# # plt.show() - - -# def JoyPlot(args): - -# url = "https://gist.githubusercontent.com/borgar/31c1e476b8e92a11d7e9/raw/0fae97dab6830ecee185a63c1cee0008f6778ff6/pulsar.csv" -# df = pd.read_csv(url, header=None) -# df = df.stack().reset_index() -# df.columns = ['idx', 'x', 'y'] - -# patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str}) -# patientMetrics={} -# for idx,patientID in enumerate(patientData[args.IDColumn]): -# patientMetrics[patientID]=patientData[args.labelColumns][idx] -# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) -# clinical_legend=[] -# for datafile in datafiles: -# patientID=os.path.splitext(datafile.split('/')[-1])[0] -# clinical_legend.append(str(patientMetrics[patientID])) - -# sortedPatientOrder=[x for _, x in sorted(zip(clinical_legend,datafiles))] -# sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))] - -# popIdx=[] -# all_tubules=[] -# # outDF=pd.DataFrame() - -# for idx,datafile in enumerate(tqdm(sortedPatientOrder[:15],colour='red')): - -# tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii'],index_col=None).dropna() -# if len(tubule_features['Cortical radii'])>0: -# #.values() -# feature_array=np.array(tubule_features['Cortical radii']*0.25) -# all_tubules.append(feature_array) - -# l1=np.percentile(feature_array,0.05) -# l2=np.percentile(feature_array,99.5) -# # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,cbar=True) -# # all_tubules.extend(np.array(tubule_features['Cortical radii']*0.25)) - -# else: -# popIdx.append(idx) -# for p in popIdx[::-1]: -# sortedPatientLegend.pop(p) -# outDF=pd.DataFrame(all_tubules) - -# outDF=outDF.stack(dropna=False).reset_index().dropna() - -# # outDFT=outDF.transpose() -# # input(outDF) -# # input(outDF.reset_index()) -# # input(outDF.reset_index(drop=True)[::2].reset_index(drop=True)) -# # input(outDF.reset_index(drop=True)) -# # input(outDF.stack()) -# # input(outDF.reset_index().stack()) -# # input(outDF.reset_index(drop=True).stack()) -# # outDF=outDF.reset_index(drop=True)[::2].reset_index(drop=True) -# # outDF = outDF.stack().reset_index() -# outDF.columns = ['idx', 'x', 'y'] -# outDF.to_csv('test.csv') -# # print(outDF.dropna()) -# # exit() - - - - -# sns.set_theme(rc={"axes.facecolor": (0, 0, 0,0), 'figure.facecolor':'#ffffff', 'axes.grid':False}) -# g = sns.FacetGrid(outDF, row='idx',hue="idx", aspect=15) -# g.map(sns.kdeplot, "x", -# bw_adjust=.9, clip_on=False, -# fill=True, alpha=1, linewidth=1.5) -# g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.9) -# g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False) -# def label(x, color, label): -# ax = plt.gca() -# ax.text(0, .2, label, fontweight="bold", color=color, -# ha="left", va="center", transform=ax.transAxes) -# g.map(label, "x") - -# # Draw the densities in a few steps -# # g.map(sns.lineplot, 'x', 'y', clip_on=False, alpha=1, linewidth=1.5) -# # g.map(plt.fill_between, 'x', 'y', color='#000000') -# # g.map(sns.lineplot, 'x', 'y', clip_on=False, color='#ffffff', lw=2) -# # Set the subplots to overlap -# g.fig.subplots_adjust(hspace=-0.95) -# g.set_titles("") -# g.set(yticks=[], xticks=[], ylabel="", xlabel="") -# g.despine(bottom=True, left=True) -# plt.show() -# # plt.savefig('joy.png', facecolor='#000000') - -# def aggregate(args): -# assert args.exceloutfile is not None, 'You must provide a name of xlsx output file for feature aggregation with --exceloutfile name.xlsx' -# usecols=args.labelColumns.split(',') -# # index_col=int(args.IDColumn) -# patientData=pd.read_excel(args.patientData,usecols=usecols.extend(args.IDColumn),index_col=args.IDColumn) -# patientData = patientData.loc[:, ~patientData.columns.str.contains('^Unnamed')] -# patientMetrics={} - -# pd.set_option('display.max_rows', 100) -# patientData.index = patientData.index.map(str) - -# datafiles=glob.glob(os.path.join(args.target, "*.xlsx")) -# # numGloms=0 -# # numSGloms=0 -# # numTubules=0 -# # numArts=0 -# full_data={} -# for idx,datafile in enumerate(tqdm(datafiles,colour='red')): - -# xlsxid=datafile.split('/')[-1].split('.xlsx')[0] -# tubule_features=pd.read_excel(datafile,sheet_name='Summary',header=None,index_col=None).transpose() -# names=np.array(tubule_features.iloc[0]) -# vals=np.array(tubule_features.iloc[1]) -# # numGloms+=vals[0] -# # if not np.isnan(vals[5]): -# # numSGloms+=vals[5] -# # numTubules+=vals[10] -# # numArts+=vals[19] - -# full_data[xlsxid]=np.append(vals,np.array(patientData.loc[xlsxid]),0) -# # print(numGloms) -# # print(numSGloms) -# # print(numTubules) -# # print(numArts) -# # exit() -# workbook=xlsxwriter.Workbook(args.exceloutfile,{'nan_inf_to_errors': True}) -# worksheet1 = workbook.add_worksheet('Aggregation') - -# outcolnames=np.append(names,patientData.columns,0) -# outrownames=full_data.keys() -# for idx,outcol in enumerate(outcolnames): -# worksheet1.write(0,idx+1,outcol) - -# rowcounter=1 -# for key,vals in full_data.items(): -# worksheet1.write(rowcounter,0,key) -# for idx,val in enumerate(vals): -# worksheet1.write(rowcounter,idx+1,val) -# rowcounter+=1 - -# workbook.close() + def points_to_features_glom(points,args,min_size,cortex,medulla): a=cv2.contourArea(points) if a>min_size: diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py index ef78d67..a12d08f 100644 --- a/multic/segmentationschool/segmentation_school.py +++ b/multic/segmentationschool/segmentation_school.py @@ -121,8 +121,8 @@ def savetime(args, starttime): help='girderApiUrl') parser.add_argument('--girderToken', dest='girderToken', default=' ' ,type=str, help='girderToken') - parser.add_argument('--files', dest='files', default=' ' ,type=str, - help='files') + parser.add_argument('--file', dest='file', default=' ' ,type=str, + help='input WSI file name') # option parser.add_argument('--option', dest='option', default=' ' ,type=str, help='option for [new, train, predict, validate]') @@ -165,7 +165,7 @@ def savetime(args, starttime): # automatically generated parser.add_argument('--base_dir', dest='base_dir', default=os.getcwd(),type=str, - help='base directory of code folder') + help='base directory of Data folder') parser.add_argument('--code_dir', dest='code_dir', default=os.getcwd(),type=str, help='base directory of code folder') @@ -290,6 +290,9 @@ def savetime(args, starttime): parser.add_argument('--platform', dest='platform', default='DSA' ,type=str, help='Run Platform, HPG or DSA') + parser.add_argument('--item_id', dest='item_id', default=' ' , type=str, + help='item id of the WSI in DSA') + diff --git a/multic/segmentationschool/utils/json_to_xml.py b/multic/segmentationschool/utils/json_to_xml.py new file mode 100644 index 0000000..072c631 --- /dev/null +++ b/multic/segmentationschool/utils/json_to_xml.py @@ -0,0 +1,66 @@ +import os +from .mask_to_xml import xml_create, xml_add_annotation, xml_add_region, xml_save + +def get_xml_path(annot, NAMES, tmp, slidename): + + xmlAnnot = xml_create() + skipSlide = 0 + xml_color=[65280]*(len(NAMES)+1) + # all compartments + for class_,compart in enumerate(NAMES): + + compart = compart.replace(' ','') + class_ +=1 + # add layer to xml + xmlAnnot = xml_add_annotation(Annotations=xmlAnnot, xml_color=xml_color, annotationID=class_) + + # test all annotation layers in order created + for iter,a in enumerate(annot): + try: + # check for annotation layer by name + a_name = a['annotation']['name'].replace(' ','') + except: + a_name = None + + if a_name == compart: + # track all layers present + skipSlide +=1 + + pointsList = [] + + # load json data + _ = os.system("printf '\tloading annotation layer: [{}]\n'".format(compart)) + + a_data = a['annotation']['elements'] + + for data in a_data: + pointList = [] + points = data['points'] + for point in points: + pt_dict = {'X': round(point[0]), 'Y': round(point[1])} + pointList.append(pt_dict) + pointsList.append(pointList) + + # write annotations to xml + for i in range(len(pointsList)): + pointList = pointsList[i] + xmlAnnot = xml_add_region(Annotations=xmlAnnot, pointList=pointList, annotationID=class_) + + # print(a['_version'], a['updated'], a['created']) + break + + if skipSlide != len(NAMES): + _ = os.system("printf '\tThis slide is missing annotation layers\n'") + _ = os.system("printf '\tSKIPPING SLIDE...\n'") + + _ = os.system("printf '\tFETCHING SLIDE...\n'") + #os.rename('{}/{}'.format(folder, slidename), '{}/{}'.format(tmp, slidename)) + + xml_path = '{}/{}.xml'.format(tmp, os.path.splitext(slidename)[0]) + _ = os.system("printf '\tsaving a created xml annotation file: [{}]\n'".format(xml_path)) + xml_save(Annotations=xmlAnnot, filename=xml_path) + + del xmlAnnot + + + return xml_path \ No newline at end of file diff --git a/multic/segmentationschool/utils/xml_to_mask.py b/multic/segmentationschool/utils/xml_to_mask.py deleted file mode 100644 index aeecd6a..0000000 --- a/multic/segmentationschool/utils/xml_to_mask.py +++ /dev/null @@ -1,219 +0,0 @@ -import numpy as np -import lxml.etree as ET -import cv2 -import time -import os - -""" -location (tuple) - (x, y) tuple giving the top left pixel in the level 0 reference frame -size (tuple) - (width, height) tuple giving the region size | set to 'full' for entire mask -downsample - int giving the amount of downsampling done to the output pixel mask - -NOTE: if you plan to loop through xmls parallely, it is nessesary to run write_minmax_to_xml() - on all the files prior - to avoid conflicting file writes - -""" - -def xml_to_mask(xml_path, location, size, tree=None, downsample=1, verbose=0): - - # parse xml and get root - if tree == None: tree = ET.parse(xml_path) - root = tree.getroot() - - if size == 'full': - import math - size = write_minmax_to_xml(xml_path=xml_path, tree=tree, get_absolute_max=True) - size = (math.ceil(size[0]/downsample), math.ceil(size[1]/downsample)) - location = (0,0) - - # calculate region bounds - bounds = {'x_min' : location[0], 'y_min' : location[1], 'x_max' : location[0] + size[0]*downsample, 'y_max' : location[1] + size[1]*downsample} - - IDs = regions_in_mask(xml_path=xml_path, root=root, tree=tree, bounds=bounds, verbose=verbose) - - if verbose != 0: - print('\nFOUND: ' + str(len(IDs)) + ' regions') - - # find regions in bounds - Regions = get_vertex_points(root=root, IDs=IDs, verbose=verbose) - - # fill regions and create mask - mask = Regions_to_mask(Regions=Regions, bounds=bounds, IDs=IDs, downsample=downsample, verbose=verbose) - if verbose != 0: - print('done...\n') - - return mask - - -def regions_in_mask(xml_path, root, tree, bounds, verbose=1): - # find regions to save - IDs = [] - mtime = os.path.getmtime(xml_path) - - write_minmax_to_xml(xml_path, tree) - - for Annotation in root.findall("./Annotation"): # for all annotations - annotationID = Annotation.attrib['Id'] - - for Region in Annotation.findall("./*/Region"): # iterate on all region - - for Vert in Region.findall("./Vertices"): # iterate on all vertex in region - - # get minmax points - Xmin = np.int32(Vert.attrib['Xmin']) - Ymin = np.int32(Vert.attrib['Ymin']) - Xmax = np.int32(Vert.attrib['Xmax']) - Ymax = np.int32(Vert.attrib['Ymax']) - - # test minmax points in region bounds - if bounds['x_min'] <= Xmax and bounds['x_max'] >= Xmin and bounds['y_min'] <= Ymax and bounds['y_max'] >= Ymin: - # save region Id - IDs.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID}) - break - return IDs - -def get_vertex_points(root, IDs, verbose=1): - Regions = [] - - for ID in IDs: # for all IDs - - # get all vertex attributes (points) - Vertices = [] - - for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"): - # make array of points - Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))]) - - Regions.append(np.array(Vertices)) - - return Regions - -def Regions_to_mask(Regions, bounds, IDs, downsample, verbose=1): - # downsample = int(np.round(downsample_factor**(.5))) - - if verbose !=0: - print('\nMAKING MASK:') - - if len(Regions) != 0: # regions present - # get min/max sizes - min_sizes = np.empty(shape=[2,0], dtype=np.int32) - max_sizes = np.empty(shape=[2,0], dtype=np.int32) - for Region in Regions: # fill all regions - min_bounds = np.reshape((np.amin(Region, axis=0)), (2,1)) - max_bounds = np.reshape((np.amax(Region, axis=0)), (2,1)) - min_sizes = np.append(min_sizes, min_bounds, axis=1) - max_sizes = np.append(max_sizes, max_bounds, axis=1) - min_size = np.amin(min_sizes, axis=1) - max_size = np.amax(max_sizes, axis=1) - - # add to old bounds - bounds['x_min_pad'] = min(min_size[1], bounds['x_min']) - bounds['y_min_pad'] = min(min_size[0], bounds['y_min']) - bounds['x_max_pad'] = max(max_size[1], bounds['x_max']) - bounds['y_max_pad'] = max(max_size[0], bounds['y_max']) - - # make blank mask - mask = np.zeros([ int(np.round((bounds['y_max_pad'] - bounds['y_min_pad']) / downsample)), int(np.round((bounds['x_max_pad'] - bounds['x_min_pad']) / downsample)) ], dtype=np.uint8) - - # fill mask polygons - index = 0 - for Region in Regions: - # reformat Regions - Region[:,1] = np.int32(np.round((Region[:,1] - bounds['y_min_pad']) / downsample)) - Region[:,0] = np.int32(np.round((Region[:,0] - bounds['x_min_pad']) / downsample)) - # get annotation ID for mask color - ID = IDs[index] - cv2.fillPoly(mask, [Region], int(ID['annotationID'])) - index = index + 1 - - # reshape mask - x_start = np.int32(np.round((bounds['x_min'] - bounds['x_min_pad']) / downsample)) - y_start = np.int32(np.round((bounds['y_min'] - bounds['y_min_pad']) / downsample)) - x_stop = np.int32(np.round((bounds['x_max'] - bounds['x_min_pad']) / downsample)) - y_stop = np.int32(np.round((bounds['y_max'] - bounds['y_min_pad']) / downsample)) - # pull center mask region - mask = mask[ y_start:y_stop, x_start:x_stop ] - - else: # no Regions - mask = np.zeros([ int(np.round((bounds['y_max'] - bounds['y_min']) / downsample)), int(np.round((bounds['x_max'] - bounds['x_min']) / downsample)) ], dtype=np.uint8) - - return mask - -def write_minmax_to_xml(xml_path, tree=None, time_buffer=10, get_absolute_max=False): - # function to write min and max verticies to each region - - # parse xml and get root - if tree == None: tree = ET.parse(xml_path) - root = tree.getroot() - - try: - if get_absolute_max: - # break the try statement - X_max = 0 - Y_max = 0 - raise ValueError - - # has the xml been modified to include minmax - modtime = np.float64(root.attrib['modtime']) - # has the minmax modified xml been changed? - assert os.path.getmtime(xml_path) < modtime + time_buffer - - except: - - for Annotation in root.findall("./Annotation"): # for all annotations - annotationID = Annotation.attrib['Id'] - - for Region in Annotation.findall("./*/Region"): # iterate on all region - - for Vert in Region.findall("./Vertices"): # iterate on all vertex in region - Xs = [] - Ys = [] - for Vertex in Vert.findall("./Vertex"): # iterate on all vertex in region - # get points - Xs.append(np.int32(np.float64(Vertex.attrib['X']))) - Ys.append(np.int32(np.float64(Vertex.attrib['Y']))) - - # find min and max points - Xs = np.array(Xs) - Ys = np.array(Ys) - - if get_absolute_max: - # get the biggest point in annotation - if Xs != [] and Ys != []: - X_max = max(X_max, np.max(Xs)) - Y_max = max(Y_max, np.max(Ys)) - - else: - # modify the xml - Vert.set("Xmin", "{}".format(np.min(Xs))) - Vert.set("Xmax", "{}".format(np.max(Xs))) - Vert.set("Ymin", "{}".format(np.min(Ys))) - Vert.set("Ymax", "{}".format(np.max(Ys))) - - if get_absolute_max: - # return annotation max point - return (X_max,Y_max) - - else: - # modify the xml with minmax region info - root.set("modtime", "{}".format(time.time())) - xml_data = ET.tostring(tree, pretty_print=True) - #xml_data = Annotations.toprettyxml() - f = open(xml_path, 'w') - f.write(xml_data.decode()) - f.close() - - -def get_num_classes(xml_path,ignore_label=None): - # parse xml and get root - tree = ET.parse(xml_path) - root = tree.getroot() - - annotation_num = 0 - for Annotation in root.findall("./Annotation"): # for all annotations - if ignore_label != None: - if not int(Annotation.attrib['Id']) == ignore_label: - annotation_num += 1 - else: annotation_num += 1 - - return annotation_num + 1 From a08093b83a68060740e722ad12931443353102cc Mon Sep 17 00:00:00 2001 From: sayatmimar Date: Thu, 9 May 2024 14:27:46 -0400 Subject: [PATCH 7/8] minor updates --- multic/segmentationschool/segmentation_school.py | 2 +- multic/segmentationschool/utils/json_to_xml.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py index a12d08f..d31b06a 100644 --- a/multic/segmentationschool/segmentation_school.py +++ b/multic/segmentationschool/segmentation_school.py @@ -135,7 +135,7 @@ def savetime(args, starttime): parser.add_argument('--cortextarget', dest='cortextarget', default=None,type=str, help='directory with cortex annotations for splicing') parser.add_argument('--output_dir', dest='output_dir', default=None,type=str, - help='directory to save output transformed XMLs') + help='directory to save output excel file') parser.add_argument('--wsis', dest='wsis', default=None,type=str, help='directory of WSIs for reference feature extraction') parser.add_argument('--groupBy', dest='groupBy', default=None,type=str, diff --git a/multic/segmentationschool/utils/json_to_xml.py b/multic/segmentationschool/utils/json_to_xml.py index 072c631..fd3ff91 100644 --- a/multic/segmentationschool/utils/json_to_xml.py +++ b/multic/segmentationschool/utils/json_to_xml.py @@ -63,4 +63,5 @@ def get_xml_path(annot, NAMES, tmp, slidename): del xmlAnnot - return xml_path \ No newline at end of file + return xml_path + \ No newline at end of file From b98828e8997352089d6a5dda0363a627ab6c5be1 Mon Sep 17 00:00:00 2001 From: Sayat Mimar Date: Wed, 22 May 2024 15:21:46 -0400 Subject: [PATCH 8/8] pathomic script changes --- .../extraction_utils/process_mc_features.py | 102 +++------- .../extraction_utils/run_pathomic_fe.py | 175 ++++++++++++++++++ 2 files changed, 202 insertions(+), 75 deletions(-) create mode 100644 multic/segmentationschool/extraction_utils/run_pathomic_fe.py diff --git a/multic/segmentationschool/extraction_utils/process_mc_features.py b/multic/segmentationschool/extraction_utils/process_mc_features.py index 2f1f759..134d618 100644 --- a/multic/segmentationschool/extraction_utils/process_mc_features.py +++ b/multic/segmentationschool/extraction_utils/process_mc_features.py @@ -5,31 +5,26 @@ from skimage.morphology import remove_small_objects,binary_erosion,binary_dilation,disk,binary_opening,binary_closing from skimage.filters import threshold_otsu from skimage import measure - +import cv2 from .extract_ffpe_features import imreconstruct from .PAS_deconvolution import deconvolution -GLOM_DICT = {3:'Glomeruli',4:'Sclerotic Glomeruli'} - -def process_glom_features(mask_xml, glom_value, MOD, slide, mpp, h_threshold, saturation_threshold): - glomeruli = mask_xml == glom_value - glomeruli = glomeruli.astype(np.uint8) - glomeruli = measure.label(glomeruli) - glomeruli_unique_max = np.max(glomeruli) - gloms = np.zeros((glomeruli_unique_max,7)) - props = measure.regionprops(glomeruli) - - for i in tqdm(range(glomeruli_unique_max),desc=GLOM_DICT[glom_value]): - #Area, Cellularity, Mesangial Area to Cellularity Ratio - area = (props[i].area)*(mpp**2) - x1,y1,x2,y2 = props[i].bbox +def process_glom_features(points, MOD, slide, h_threshold, saturation_threshold): + + #Area, Cellularity, Mesangial Area to Cellularity Ratio + area = cv2.contourArea(points) + if area>0: + y1, y2, x1, x2 =[np.min(points[:,0]),np.max(points[:,0]),np.min(points[:,1]),np.max(points[:,1])] crop = slide.read_region((y1,x1),0,(y2-y1,x2-x1)) crop = np.array(crop) crop = crop[:,:,:3] - mask = (glomeruli[x1:x2,y1:y2] == i+1).astype(np.uint8) + mask = np.zeros((x2-x1,y2-y1),dtype=np.uint8) + points[:,0]-=y1 + points[:,1]-=x1 + mask=cv2.fillPoly(mask,[points],1) hsv = rgb2hsv(crop) hsv = hsv[:,:,1] @@ -51,41 +46,24 @@ def process_glom_features(mask_xml, glom_value, MOD, slide, mpp, h_threshold, sa mask_pixels = np.sum(mask) pas_pixels = np.sum(pas_seg) - mes_fraction = (pas_pixels*(mpp**2)) / area - - gloms[i,0] = x1 - gloms[i,1] = x2 - gloms[i,2] = y1 - gloms[i,3] = y2 - gloms[i,4] = area - gloms[i,5] = pas_pixels*(mpp**2) - gloms[i,6] = mes_fraction - - del glomeruli - - return gloms + mes_fraction = (pas_pixels) / area -def process_tubules_features(mask_xml, tub_value, MOD, slide, mpp, whitespace_threshold): + return [x1,x2,y1,y2, area, pas_pixels, mes_fraction] - tubules = mask_xml == tub_value - tubules = tubules.astype(np.uint8) - tubules = measure.label(tubules) - tubules_unique_max = np.max(tubules) - tubs = np.zeros((tubules_unique_max,7)) - props = measure.regionprops(tubules) - - for i in tqdm(range(tubules_unique_max),desc='Tubules'): - #Area, Cellularity, Mesangial Area to Cellularity Ratio - # i=2615 - area = (props[i].area)*(mpp**2) - x1,y1,x2,y2 = props[i].bbox +def process_tubules_features(points, MOD, slide, whitespace_threshold): + area = cv2.contourArea(points) + if area>0: + y1, y2, x1, x2 =[np.min(points[:,0]),np.max(points[:,0]),np.min(points[:,1]),np.max(points[:,1])] crop = slide.read_region((y1,x1),0,(y2-y1,x2-x1)) crop = np.array(crop) crop = crop[:,:,:3] - mask = (tubules[x1:x2,y1:y2] == i+1).astype(np.uint8) + mask = np.zeros((x2-x1,y2-y1),dtype=np.uint8) + points[:,0]-=y1 + points[:,1]-=x1 + mask=cv2.fillPoly(mask,[points],1) lab = rgb2lab(crop) lab = lab[:,:,0] @@ -156,38 +134,12 @@ def process_tubules_features(mask_xml, tub_value, MOD, slide, mpp, whitespace_th area_tot = np.sum(np.array(cyto_areas)) cyto_avg = np.sum(np.multiply(np.array(cyto_thicknesses),(np.array(cyto_areas)/area_tot))) - tubs[i,0] = x1 - tubs[i,1] = x2 - tubs[i,2] = y1 - tubs[i,3] = y2 - tubs[i,4] = tbm_avg*(mpp**2) - tubs[i,5] = cyto_avg*(mpp**2) - tubs[i,6] = np.sum(WS) / np.sum(mask)# - - del tubules - - return tubs - - -def process_arteriol_features(mask_xml, art_value, mpp): - - arteries = mask_xml == art_value - arteries = arteries.astype(np.uint8) - arteries = measure.label(arteries) - arts_unique_max = np.max(arteries) - arts = np.zeros((arts_unique_max,5)) - props = measure.regionprops(arteries) - - for i in tqdm(range(arts_unique_max),desc='Arteries'): - area = (props[i].area)*(mpp**2) - x1,y1,x2,y2 = props[i].bbox + return [x1,x2,y1,y2,tbm_avg,cyto_avg,np.sum(WS) / np.sum(mask)] - arts[i,0] = x1 - arts[i,1] = x2 - arts[i,2] = y1 - arts[i,3] = y2 - arts[i,4] = area - del arteries +def process_arteriol_features(points): - return arts + area = cv2.contourArea(points) + if area>0: + y1, y2, x1, x2 =[np.min(points[:,0]),np.max(points[:,0]),np.min(points[:,1]),np.max(points[:,1])] + return [x1,x2,y1,y2,area] diff --git a/multic/segmentationschool/extraction_utils/run_pathomic_fe.py b/multic/segmentationschool/extraction_utils/run_pathomic_fe.py new file mode 100644 index 0000000..f453c53 --- /dev/null +++ b/multic/segmentationschool/extraction_utils/run_pathomic_fe.py @@ -0,0 +1,175 @@ +import sys +import os, girder_client +import numpy as np +from tqdm import tqdm +import pandas as pd +from tiffslide import TiffSlide +import lxml.etree as ET +import multiprocessing +from joblib import Parallel, delayed + +MODx=np.zeros((3,)) +MODy=np.zeros((3,)) +MODz=np.zeros((3,)) +MODx[0]= 0.644211 +MODy[0]= 0.716556 +MODz[0]= 0.266844 + +MODx[1]= 0.175411 +MODy[1]= 0.972178 +MODz[1]= 0.154589 + +MODx[2]= 0.0 +MODy[2]= 0.0 +MODz[2]= 0.0 +MOD=[MODx,MODy,MODz] + +from .extract_ffpe_features import xml_to_mask + +from .process_mc_features import process_glom_features, process_tubules_features, process_arteriol_features +from segmentationschool.Codes.xml_to_mask_minmax import write_minmax_to_xml +from segmentationschool.utils.json_to_xml import get_xml_path + +def getPathomicFeatures(args): + + cores=multiprocessing.cpu_count() + folder = args.base_dir + + # assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml' + # assert args.wsis is not None, 'Directory of WSIs must be specified, use --wsis /path/to/wsis' + if args.platform == 'DSA': + import girder_client + gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) + gc.setToken(args.girderToken) + + file_name = args.file.split('/')[-1] + slide_item_id = args.item_id + output_dir = args.base_dir + slide_name,slideExt=file_name.split('.') + items=[(args.file, args.xml_path)] + + elif args.platform == 'HPG': + image_files = [image_name for _, _, files in os.walk(folder) for image_name in files if image_name.endswith('.xml')] + image_names = [os.path.join(folder, f.split('.')[0]) for f in image_files] + slideExt = args.ext + output_dir = args.output_dir + # each item in items is a tuple of (images, annotations) + items = [(f + slideExt, f + '.xml') for f in image_names] + else: + raise Exception("Please Enter a valid Platform, DSA or HPG") + + for i in range(len(items)): + + svsfile, xmlfile = items[i] + + print(xmlfile,'here') + write_minmax_to_xml(xmlfile) + slide = TiffSlide(svsfile) + + all_contours = {'1':[],'2':[],'3':[],'4':[],'5':[],'6':[]} + tree = ET.parse(xmlfile) + root = tree.getroot() + + xlsx_path = os.path.join(output_dir, os.path.basename(svsfile).split('.')[0] +'_pathomic'+'.xlsx') + for Annotation in root.findall("./Annotation"): # for all annotations + annotationID = Annotation.attrib['Id'] + if annotationID not in ['1','2','3','4','5','6']: + pass + else: + for Region in Annotation.findall("./*/Region"): # iterate on all region + verts=[] + for Vert in Region.findall("./Vertices/Vertex"): # iterate on all vertex in region + verts.append([int(float(Vert.attrib['X'])),int(float(Vert.attrib['Y']))]) + all_contours[annotationID].append(np.array(verts)) + + glom_features=Parallel(n_jobs=cores,prefer="threads")(delayed(process_glom_features)(points, + MOD, slide, h_threshold=args.h_threshold, saturation_threshold=args.saturation_threshold) for points in tqdm(all_contours['3'],colour='yellow',unit='Glomerulus',leave=False)) + s_glom_features=Parallel(n_jobs=cores,prefer="threads")(delayed(process_glom_features)(points, + MOD, slide, h_threshold=args.h_threshold, saturation_threshold=args.saturation_threshold) for points in tqdm(all_contours['4'],colour='yellow',unit='Scl. Glomerulus',leave=False)) + tub_features = Parallel(n_jobs=cores,prefer="threads")(delayed(process_tubules_features)(points, + MOD, slide,whitespace_threshold=args.whitespace_threshold) for points in tqdm(all_contours['5'],colour='blue',unit='Tubule',leave=False)) + art_features=Parallel(n_jobs=cores,prefer="threads")(delayed(process_arteriol_features)(points, + ) for points in tqdm(all_contours['6'], colour='magenta',unit='Artery(-iole)',leave=False)) + glom_features=[i for i in glom_features if i is not None] + s_glom_features=[i for i in s_glom_features if i is not None] + tub_features=[i for i in tub_features if i is not None] + art_features=[i for i in art_features if i is not None] + + def get_feature_matrix(features, columns): + m, n = len(features), len(columns) + feature_matrix = np.zeros((m, n)) + for i, feat in enumerate(features): + for j in range(n): + feature_matrix[i,j] = feat[j] + + return feature_matrix + # gloms = np.zeros((len(glom_features),7)) + # s_gloms = np.zeros((len(s_glom_features),7)) + # tubs = np.zeros((len(tub_features),7)) + # arts = np.zeros((len(art_features),5)) + + # for i, glom in enumerate(glom_features): + # gloms[i,0] = glom[0] + # gloms[i,1] = glom[1] + # gloms[i,2] = glom[2] + # gloms[i,3] = glom[3] + # gloms[i,4] = glom[4] + # gloms[i,5] = glom[5] + # gloms[i,6] = glom[6] + + # for i, glom in enumerate(s_glom_features): + # s_gloms[i,0] = s_glom[0] + # s_gloms[i,1] = s_glom[1] + # s_gloms[i,2] = s_glom[2] + # s_gloms[i,3] = s_glom[3] + # s_gloms[i,4] = s_glom[4] + # s_gloms[i,5] = s_glom[5] + # s_gloms[i,6] = s_glom[6] + + # for i, tub in enumerate(tub_features): + # if not tub: + # continue + # tubs[i,0] = tub[0] + # tubs[i,1] = tub[1] + # tubs[i,2] = tub[2] + # tubs[i,3] = tub[3] + # tubs[i,4] = tub[4] + # tubs[i,5] = tub[5] + # tubs[i,6] = tub[6] + + + # for i, art in enumerate(art_features): + # if not art: + # continue + # arts[i,0] = art[0] + # arts[i,1] = art[1] + # arts[i,2] = art[2] + # arts[i,3] = art[3] + # arts[i,4] = art[4] + + + + all_columns = [['x1','x2','y1','y2','Area','Mesangial Area','Mesangial Fraction'], + ['x1','x2','y1','y2','Area','Mesangial Area','Mesangial Fraction'], + ['x1','x2','y1','y2','Average TBM Thickness','Average Cell Thickness','Luminal Fraction'], + ['x1','x2','y1','y2','Arterial Area']] + compart_names = ['gloms','s_gloms','tubs','arts'] + + gloms = get_feature_matrix(glom_features, all_columns[0]) + s_gloms = get_feature_matrix(s_glom_features,all_columns[1]) + tubs = get_feature_matrix(tub_features, all_columns[2]) + arts = get_feature_matrix(art_features,all_columns[3]) + + all_comparts = [gloms,s_gloms,tubs, arts] + + _ = os.system("printf '\tWriting Excel file: [{}]\n'".format(xlsx_path)) + with pd.ExcelWriter(xlsx_path) as writer: + for idx,compart in enumerate(all_comparts): + df = pd.DataFrame(compart,columns=all_columns[idx]) + df.to_excel(writer, index=False, sheet_name=compart_names[idx]) + if args.platform == 'DSA': + gc.uploadFileToItem(file_id, xlsx_path, reference=None, mimeType=None, filename=None, progressCallback=None) + print('Girder file uploaded!') + + +