From f8477bdac6dd432fc65d226e6061d5e1ea32fa68 Mon Sep 17 00:00:00 2001
From: sayatmimar <sayat.mimar@ufl.edu>
Date: Mon, 1 Apr 2024 17:44:23 -0400
Subject: [PATCH 1/8] adding reference_feature code and dependencies

---
 Dockerfile                                    |   2 +-
 .../ReferenceFeatureExtraction.py             | 119 +++
 .../ReferenceFeatureExtraction.xml            |  47 +
 multic/cli/slicer_cli_list.json               |   3 +
 .../Codes/extract_reference_features.py       | 908 +++++++++---------
 .../segmentationschool/segmentation_school.py |   5 +-
 .../segmentationschool/utils/mask_to_xml.py   | 135 +++
 .../segmentationschool/utils/xml_to_mask.py   | 219 +++++
 setup.py                                      |   1 +
 9 files changed, 979 insertions(+), 460 deletions(-)
 create mode 100644 multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
 create mode 100644 multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml
 create mode 100644 multic/segmentationschool/utils/mask_to_xml.py
 create mode 100644 multic/segmentationschool/utils/xml_to_mask.py

diff --git a/Dockerfile b/Dockerfile
index 4ca5afb..1fa3932 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -134,6 +134,6 @@ WORKDIR $mc_path/multic/cli
 RUN python -m slicer_cli_web.cli_list_entrypoint --list_cli
 RUN python -m slicer_cli_web.cli_list_entrypoint MultiCompartmentSegment --help
 RUN python -m slicer_cli_web.cli_list_entrypoint FeatureExtraction --help
-
+RUN python -m slicer_cli_web.cli_list_entrypoint ReferenceFeatureExtraction --help
 
 ENTRYPOINT ["/bin/bash", "docker-entrypoint.sh"]
diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
new file mode 100644
index 0000000..ef243d4
--- /dev/null
+++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
@@ -0,0 +1,119 @@
+import os
+import sys
+from glob import glob
+import girder_client
+from ctk_cli import CLIArgumentParser
+
+sys.path.append("..")
+from segmentationschool.utils.mask_to_xml import xml_create, xml_add_annotation, xml_add_region, xml_save
+from segmentationschool.utils.xml_to_mask import write_minmax_to_xml
+
+NAMES = ['cortical_interstitium','medullary_interstitium','non_globally_sclerotic_glomeruli','globally_sclerotic_glomeruli','tubules','arteries/arterioles']
+
+
+def main(args):
+
+    folder = args.base_dir
+    base_dir_id = folder.split('/')[-2]
+    _ = os.system("printf '\nUsing data from girder_client Folder: {}\n'".format(folder))
+
+    gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
+    gc.setToken(args.girderToken)
+    # get files in folder
+    files = gc.listItem(base_dir_id)
+    xml_color=[65280]*(len(NAMES)+1)
+    cwd = os.getcwd()
+    print(cwd)
+    os.chdir(cwd)
+
+    tmp = folder
+    
+    slides_used = []
+
+    for file in files:
+        slidename = file['name']
+        _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(slidename))
+        skipSlide = 0
+
+        # get annotation
+        item = gc.getItem(file['_id'])
+        annot = gc.get('/annotation/item/{}'.format(item['_id']), parameters={'sort': 'updated'})
+        annot.reverse()
+        annot = list(annot)
+        _ = os.system("printf '\tfound [{}] annotation layers...\n'".format(len(annot)))
+
+        # create root for xml file
+        xmlAnnot = xml_create()
+
+        # all compartments
+        for class_,compart in enumerate(NAMES):
+
+            compart = compart.replace(' ','')
+            class_ +=1
+            # add layer to xml
+            xmlAnnot = xml_add_annotation(Annotations=xmlAnnot, xml_color=xml_color, annotationID=class_)
+
+            # test all annotation layers in order created
+            for iter,a in enumerate(annot):
+   
+
+                try:
+                    # check for annotation layer by name
+                    a_name = a['annotation']['name'].replace(' ','')
+                except:
+                    a_name = None
+
+                if a_name == compart:
+                    # track all layers present
+                    skipSlide +=1
+
+                    pointsList = []
+
+                    # load json data
+                    _ = os.system("printf '\tloading annotation layer: [{}]\n'".format(compart))
+
+                    a_data = a['annotation']['elements']
+
+                    for data in a_data:
+                        pointList = []
+                        points = data['points']
+                        for point in points:
+                            pt_dict = {'X': round(point[0]), 'Y': round(point[1])}
+                            pointList.append(pt_dict)
+                        pointsList.append(pointList)
+
+                    # write annotations to xml
+                    for i in range(len(pointsList)):
+                        pointList = pointsList[i]
+                        xmlAnnot = xml_add_region(Annotations=xmlAnnot, pointList=pointList, annotationID=class_)
+
+                    # print(a['_version'], a['updated'], a['created'])
+                    break
+                
+        if skipSlide != len(NAMES):
+            _ = os.system("printf '\tThis slide is missing annotation layers\n'")
+            _ = os.system("printf '\tSKIPPING SLIDE...\n'")
+            del xmlAnnot
+            continue # correct layers not present
+        _ = os.system("printf '\tFETCHING SLIDE...\n'")
+        os.rename('{}/{}'.format(folder, slidename), '{}/{}'.format(tmp, slidename))
+        slides_used.append(slidename)
+
+        xml_path = '{}/{}.xml'.format(tmp, os.path.splitext(slidename)[0])
+        _ = os.system("printf '\tsaving a created xml annotation file: [{}]\n'".format(xml_path))
+        xml_save(Annotations=xmlAnnot, filename=xml_path)
+        write_minmax_to_xml(xml_path) # to avoid trying to write to the xml from multiple workers
+        del xmlAnnot
+    os.system("ls -lh '{}'".format(tmp))
+    
+    _ = os.system("printf '\ndone retriving data...\n\n'")
+
+
+
+    cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --base_dir {} --files {} --xml_path {} --girderApiUrl {} --girderToken {}".format('get_features', args.base_dir, args.input_file, xml_path, args.girderApiUrl, args.girderToken)
+    print(cmd)
+    sys.stdout.flush()
+    os.system(cmd)
+
+if __name__ == "__main__":
+    main(CLIArgumentParser().parse_args())
diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml
new file mode 100644
index 0000000..30e0924
--- /dev/null
+++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<executable>
+  <category>HistomicsTK</category>
+  <title>Multi Compartment Training</title>
+  <description>Trains Multi compartment segmentation model</description>
+  <version>0.1.0</version>
+  <documentation-url>https://github.com/SarderLab/Multi-Compartment-Segmentation</documentation-url>
+  <license>Apache 2.0</license>
+  <contributor>Sayat Mimar (UFL)</contributor>
+  <acknowledgements>This work is part of efforts in digital pathology by the Sarder Lab: UFL.</acknowledgements>
+  <parameters>
+    <label>IO</label>
+    <description>Input/output parameters</description>
+    <image>
+      <name>input_file</name>
+      <label>input_file</label>
+      <description>input file</description>
+      <channel>input</channel>
+      <index>0</index>
+    </image>
+    <directory>
+      <name>base_dir</name>
+      <label>Base Directory</label>
+      <description>Base Directory for the model</description>
+      <channel>input</channel>
+      <index>1</index>
+    </directory>
+  </parameters>
+  <parameters advanced="true">
+    <label>Girder API URL and Key</label>
+    <description>A Girder API URL and token for Girder client</description>
+    <string>
+      <name>girderApiUrl</name>
+      <longflag>api-url</longflag>
+      <label>Girder API URL</label>
+      <description>A Girder API URL (e.g., https://girder.example.com:443/api/v1)</description>
+      <default></default>
+    </string>
+    <string>
+      <name>girderToken</name>
+      <longflag>token</longflag>
+      <label>Girder API Token</label>
+      <description>A Girder token</description>
+      <default></default>
+    </string>
+  </parameters>
+</executable>
\ No newline at end of file
diff --git a/multic/cli/slicer_cli_list.json b/multic/cli/slicer_cli_list.json
index 1189b00..24a3824 100644
--- a/multic/cli/slicer_cli_list.json
+++ b/multic/cli/slicer_cli_list.json
@@ -4,5 +4,8 @@
   },
   "FeatureExtraction": {
     "type"    : "python"
+  },
+  "ReferenceFeatureExtraction": {
+    "type"    : "python"
   }
 }
diff --git a/multic/segmentationschool/Codes/extract_reference_features.py b/multic/segmentationschool/Codes/extract_reference_features.py
index fb9cdb5..2ca6eb5 100644
--- a/multic/segmentationschool/Codes/extract_reference_features.py
+++ b/multic/segmentationschool/Codes/extract_reference_features.py
@@ -1,57 +1,61 @@
-import os, sys, cv2, time
+import os, cv2
 import numpy as np
-import matplotlib.pyplot as plt
+
 import lxml.etree as ET
+import girder_client
 from matplotlib import path
-# import matplotlib.patches as patches
-import glob
-from .xml_to_mask_minmax import write_minmax_to_xml,get_annotated_ROIs,xml_to_mask
+from skimage.color import rgb2lab,rgb2hsv
+
+from .xml_to_mask_minmax import write_minmax_to_xml
 import xlsxwriter
 import multiprocessing
 from scipy.ndimage.morphology import distance_transform_edt
 from scipy.ndimage import binary_fill_holes
-# from skimage.transform import resize
-# from skimage.util import img_as_ubyte
+
 from tqdm import tqdm
 import time
-import openslide
+from tiffslide import TiffSlide
 from joblib import Parallel, delayed
-from skimage.color import rgb2lab,rgb2hsv
-# from skimage.io import imsave
-# from skimage.morphology import remove_small_objects,binary_erosion,binary_dilation,disk,binary_opening
-#from skimage.filters.rank import entropy
-# from scipy.ndimage.filters import generic_filter
+from skimage.color import rgb2hsv
+
 from skimage.filters import *
-#Glom density, Tubule density,
-#Interstitial capillary density, percentage glomerulosclerosis,
-# # luminal diameter over vessel diameter
-# from wsi_loader_utils import get_choppable_regions
-# from PIL import Image
-from matplotlib import cm
-# from matplotlib.colors import ListedColormap, LinearSegmentedColormap
-# from scipy.stats import zscore
-import pandas as pd
-import seaborn as sns
-# from PAS_deconvolution import deconvolution
-# import warnings
 
 def getKidneyReferenceFeatures(args):
 
-    assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml'
-    assert args.wsis is not None, 'Directory of WSIs must be specified, use --wsis /path/to/wsis'
+    # assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml'
+    # assert args.wsis is not None, 'Directory of WSIs must be specified, use --wsis /path/to/wsis'
+
+    gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
+    gc.setToken(args.girderToken)
+
+    folder = args.base_dir
+    girder_folder_id = folder.split('/')[-2]
+    file_name = args.files.split('/')[-1]
+    files = list(gc.listItem(girder_folder_id))
+    item_dict = dict()
+
+    for file in files:
+        d = {file['name']: file['_id']}
+        item_dict.update(d)
 
-    annotatedXMLs=glob.glob(os.path.join(args.target, "*.xml"))
-    for xml in tqdm(annotatedXMLs):
+    slide_item_id = item_dict[file_name]
+
+    #output_dir = args.base_dir + '/tmp'
+    slide_name,slideExt=file_name.split('.')
+    xlsx_path = slide_name + '.xlsx'
+
+    annotatedXMLs=[args.xml_path]
+    for xml in annotatedXMLs:
         # print(xml,end='\r',flush=True)
 
-        print(xml)
+        print(xml,'here')
         write_minmax_to_xml(xml)
-        for ext in args.wsi_ext.split(','):
+        # for ext in args.wsi_ext.split(','):
 
-            if os.path.isfile(os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext))):
-                wsi=os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext))
-                break
-        slideID,slideExt=os.path.splitext(wsi.split('/')[-1])
+        #     if os.path.isfile(os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext))):
+        #         wsi=os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext))
+        #         break
+        slideExt=file_name.split('.')[-1]
         all_contours = {'1':[],'2':[],'3':[],'4':[],'5':[],'6':[]}
         # cortex medulla glomeruli scl_glomeruli tubules arteries(ioles)
         tree = ET.parse(xml)
@@ -75,14 +79,14 @@ def getKidneyReferenceFeatures(args):
         cortexarea=0
         medullaarea=0
 
-        slide=openslide.OpenSlide(wsi)
+        slide=TiffSlide(args.files)
         if slideExt =='.scn':
-            dim_x=int(slide.properties['openslide.bounds-width'])## add to columns
-            dim_y=int(slide.properties['openslide.bounds-height'])## add to rows
-            offsetx=int(slide.properties['openslide.bounds-x'])##start column
-            offsety=int(slide.properties['openslide.bounds-y'])##start row
+            dim_x=int(slide.properties['tiffslide.bounds-width'])## add to columns
+            dim_y=int(slide.properties['tiffslide.bounds-height'])## add to rows
+            offsetx=int(slide.properties['tiffslide.bounds-x'])##start column
+            offsety=int(slide.properties['tiffslide.bounds-y'])##start row
 
-        elif slideExt in ['.ndpi','.svs']:
+        elif slideExt in ['.ndpi','.svs','.tiff']:
             dim_x, dim_y=slide.dimensions
             offsetx=0
             offsety=0
@@ -112,15 +116,7 @@ def getKidneyReferenceFeatures(args):
         binary=binary_fill_holes(binary)
 
         total_tissue_area=np.sum(binary)*resRatio*resRatio
-        # imsave(wsi.replace(slideExt,'.jpeg'),thumbIm)
-        # with warnings.catch_warnings():
-        #     warnings.simplefilter("ignore")
-        #     imsave(wsi.replace(slideExt,'.jpeg'),thumbIm)
-        #     imsave(wsi.replace(slideExt,'_b.jpeg'),binary.astype('uint8')*255)
-        #
-        # continue
-
-
+  
         for contour in all_contours['1']:
             cortexcontour.extend(contour)
             cortexarea+=cv2.contourArea(contour)
@@ -132,17 +128,6 @@ def getKidneyReferenceFeatures(args):
             cortex_path=path.Path(cortexcontour,codes=cortexcodes)
         else:
             cortex_path=None
-        # cortexcontour=np.array(cortexcontour)
-        # pathxmin=np.min(cortexcontour[:,0])
-        # pathxmax=np.max(cortexcontour[:,0])
-        # pathymin=np.min(cortexcontour[:,1])
-        # pathymax=np.max(cortexcontour[:,1])
-        # fig, ax = plt.subplots()
-        # patch = patches.PathPatch(cortex_path, facecolor='orange', lw=2)
-        # ax.add_patch(patch)
-        # ax.set_xlim(pathxmin, pathxmax)
-        # ax.set_ylim(pathymin, pathymax)
-        # plt.show()
 
         for contour in all_contours['2']:
             medullacontour.extend(contour)
@@ -157,7 +142,7 @@ def getKidneyReferenceFeatures(args):
             medulla_path=None
         pseudocortexarea=total_tissue_area-medullaarea
         #
-        workbook=xlsxwriter.Workbook(xml.replace('.xml','.xlsx'))
+        workbook=xlsxwriter.Workbook(xlsx_path)
         worksheet1 = workbook.add_worksheet('Summary')
         worksheet2 = workbook.add_worksheet('Interstitium')
         worksheet3 = workbook.add_worksheet('Glomeruli')
@@ -188,7 +173,7 @@ def getKidneyReferenceFeatures(args):
             args,args.min_size[4],cortex_path,medulla_path) for points in tqdm(all_contours['5'],colour='blue',unit='Tubule',leave=False))
 
         art_features=Parallel(n_jobs=cores)(delayed(points_to_features_art)(points,
-            args,args.min_size[5],cortex_path,medulla_path,wsi,MOD) for points in tqdm(all_contours['6'],colour='magenta',unit='Artery(-iole)',leave=False))
+            args,args.min_size[5],cortex_path,medulla_path,args.files,MOD) for points in tqdm(all_contours['6'],colour='magenta',unit='Artery(-iole)',leave=False))
         print('Generating output file..')
         glom_features=[i for i in glom_features if i is not None]
         sglom_features=[i for i in sglom_features if i is not None]
@@ -368,401 +353,408 @@ def getKidneyReferenceFeatures(args):
             worksheet6.write(idx+1,0,art[0])
             worksheet6.write(idx+1,1,art[1])
             worksheet6.write(idx+1,2,art[4])
-        workbook.close()
-        print('Done.')
-        # exit()
-    # Parallel(n_jobs=num_cores, backend='threading')(delayed(chop_wsi)(, choppable_regions=choppable_regions)  for idxx, j in enumerate(index_x))
 
 
-def summarizeKidneyReferenceFeatures(args):
-    assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml'
-    assert args.SummaryOption is not None, 'You must specify what type of summary is required with --SummaryOption'
-    assert args.patientData is not None, 'You must provide patient metadata xlsx file with --patientData'
-    if args.SummaryOption in ['ULDensity']:
-        ULDensity(args)
-    elif args.SummaryOption in ['BLDensity']:
-        BLDensity(args)
-    elif args.SummaryOption in ['standardScatter']:
-        standardScatter(args)
-    elif args.SummaryOption in ['anchorScatter']:
-        anchorScatter(args)
-    elif args.SummaryOption in ['aggregate']:
-        aggregate(args)
-    elif args.SummaryOption in ['JoyPlot']:
-        JoyPlot(args)
-    else:
-        print('Incorrect SummaryOption')
-
-def anchorScatter(args):
-    patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn,args.anchor],index_col=None)
-    patientMetrics={}
-    for idx,patientID in enumerate(patientData[args.IDColumn]):
-        patientMetrics[patientID]=[patientData[args.labelColumns][idx],patientData[args.anchor][idx]]
-
-    datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-    clinical_legend=[]
-    clinical_anchor=[]
-    for datafile in datafiles:
-        patientID=os.path.splitext(datafile.split('/')[-1])[0]
-        clinical_legend.append(str(patientMetrics[patientID][0]))
-        clinical_anchor.append(patientMetrics[patientID][1])
-
-    sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
-    sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
-    sortedPatientAnchor=[x for x,_ in sorted(zip(clinical_anchor,datafiles))]
-    f1,f2=[int(i) for i in args.scatterFeatures.split(',')]
-    # f1=5
-    # f2=7
-
-    temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0)
-    index = temp.index
-    xlabel=index[f1-1]
-    ylabel=args.anchor
-
-    popIdx=[]
-    scatter_features=[]
-    for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')):
-        features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0))
-        # print(np.array(features))
-        if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]):
-            scatter_features.append([features[f1-1][0],features[f2-1][0]])
-        else:
-            popIdx.append(idx)
-    for p in popIdx[::-1]:
-        sortedPatientLegend.pop(p)
-        sortedPatientAnchor.pop(p)
-    scatter_features=np.array(scatter_features)
-
-    sns.scatterplot(x=scatter_features[:,0],y=sortedPatientAnchor,hue=sortedPatientLegend,palette='viridis',legend='auto')
-    plt.xlabel(xlabel)
-    plt.ylabel(ylabel)
-    plt.legend(title=args.labelColumns)
-    plt.show()
-def standardScatter(args):
-    patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
-    patientMetrics={}
-    assert args.labelColumns in ['Age','Cr','Sex'], 'Label column must be Age, Cr, or Sex for standard scatter'
-    if args.labelColumns=='Age':
-        labelBins=[0,10,20,30,40,50,60,70,80,90,100]
-    elif args.labelColumns=='Cr':
-        labelBins=[0,.5,.6,.7,.8,.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2]
-    elif args.labelColumns=='Sex':
-        labelBins=None
-
-    for idx,patientID in enumerate(patientData[args.IDColumn]):
-        patientMetrics[patientID]=patientData[args.labelColumns][idx]
-
-    datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-    clinical_legend=[]
-    for datafile in datafiles:
-        patientID=os.path.splitext(datafile.split('/')[-1])[0]
-        clinical_legend.append(str(patientMetrics[patientID]))
-
-    sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
-    sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
-    print(sortedPatientLegend)
-    f1,f2=[int(i) for i in args.scatterFeatures.split(',')]
-    # f1=5
-    # f2=7
-
-    temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0)
-    index = temp.index
-    xlabel=index[f1-1]
-    ylabel=index[f2-1]
-
-    popIdx=[]
-    scatter_features=[]
-    for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')):
-        features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0))
-        # print(np.array(features))
-        if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]):
-            scatter_features.append([features[f1-1][0],features[f2-1][0]])
-        else:
-            popIdx.append(idx)
-    for p in popIdx[::-1]:
-        sortedPatientLegend.pop(p)
-    sortedPatientLegend=np.array(sortedPatientLegend)
-
-    scatter_features=np.array(scatter_features)
-
-    sns.scatterplot(x=scatter_features[:,0],y=scatter_features[:,1],hue=sortedPatientLegend,palette='viridis')
-    plt.xlabel(xlabel)
-    plt.ylabel(ylabel)
-    plt.legend(sortedPatientLegend,title=args.labelColumns)
-    plt.show()
-
-def BLDensity(args):
-    patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None)
-    patientMetrics={}
-    for idx,patientID in enumerate(patientData[args.IDColumn]):
-        patientMetrics[patientID]=patientData[args.labelColumns][idx]
-
-    datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-    clinical_legend=[]
-    for datafile in datafiles:
-        patientID=os.path.splitext(datafile.split('/')[-1])[0]
-        clinical_legend.append(str(patientMetrics[patientID]))
-
-    sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
-    sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
-
-    fig,ax=plt.subplots()
-    plt.xlabel('Cortical tubular diameter (µm)')
-    plasma = cm.get_cmap('plasma',len(sortedPatientOrder))
-    popIdx=[]
-    for idx,datafile in enumerate(tqdm(sortedPatientOrder)):
-        tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii','Cortical areas'],index_col=None).dropna()
-        if len(tubule_features['Cortical radii'])>0:
-            # xl1=np.percentile(tubule_features['Cortical radii']*0.25,0.01)
-            xl2=np.percentile(tubule_features['Cortical radii']*0.25,99)
-            # yl1=np.percentile(tubule_features['Cortical areas']*0.25*0.25,0.01)
-            yl2=np.percentile(tubule_features['Cortical areas']*0.25*0.25,99)
-            # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill)
-            sns.kdeplot(x=tubule_features['Cortical radii']*0.25,y=tubule_features['Cortical areas']*0.25*0.25,
-                color=plasma(idx),ax=ax,clip=[[None,xl2],[None,yl2]])
-        else:
-            popIdx.append(idx)
-    for p in popIdx[::-1]:
-        sortedPatientLegend.pop(p)
-    plt.legend(sortedPatientLegend,title=args.labelColumns)
-    plt.show()
-
-def ULDensity(args):
-
-    if args.labelModality=='Continuous':
-        patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
-        patientMetrics={}
-        # print(patientData)
-        for idx,patientID in enumerate(patientData[args.IDColumn]):
-            patientMetrics[patientID]=patientData[args.labelColumns][idx]
-
-        datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-        clinical_legend=[]
-        for datafile in datafiles:
-            patientID=os.path.splitext(datafile.split('/')[-1])[0]
-            clinical_legend.append(str(patientMetrics[patientID]))
-
-        sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
-        sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
-
-        fig,ax=plt.subplots()
-        plt.xlabel('Cortical tubular diameter (µm)')
-        plasma = cm.get_cmap('plasma',len(sortedPatientOrder))
-        popIdx=[]
-        all_tubules=[]
-        featurename='Cortical radii'
-        sheetname='Tubules'
-        for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')):
-            tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna()
-            if len(tubule_features[featurename])>0:
-                l1=np.percentile(tubule_features[featurename]*0.25,0.05)
-                l2=np.percentile(tubule_features[featurename]*0.25,99.5)
-                sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1)
-                # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1)
-                all_tubules.extend(np.array(tubule_features[featurename]*0.25))
-            else:
-                popIdx.append(idx)
-        for p in popIdx[::-1]:
-            sortedPatientLegend.pop(p)
-
-        plt.legend(sortedPatientLegend,title=args.labelColumns)
-        plt.title('Cumulative distributions per patient')
-        # plt.colorbar()
-        plt.show()
-        # print(len(all_tubules))
-        # fig,ax=plt.subplots()
-        # plt.xlabel('Cortical tubular diameter (µm)')
-        # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill)
-        # plt.title('Cumulative distribution for dataset')
-        # plt.show()
-    elif args.labelModality=='Categorical':
-        patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
-        patientMetrics={}
-        for idx,patientID in enumerate(patientData[args.IDColumn]):
-            patientMetrics[patientID]=patientData[args.labelColumns][idx]
-
-        datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-        clinical_legend=[]
-        for datafile in datafiles:
-            patientID=os.path.splitext(datafile.split('/')[-1])[0]
-
-            clinical_legend.append(str(patientMetrics[patientID]))
-
-        fig,ax=plt.subplots()
-        plt.xlabel('Cortical tubular diameter (µm)')
-        plasma = cm.get_cmap('plasma',len(np.unique(clinical_legend))+1)
-        labelMapper={}
-        categories=np.unique(clinical_legend)
-        for idx,l in enumerate(categories):
-            labelMapper[l]=idx
-        popIdx=[]
-        all_tubules=[]
-        featurename='Radius'
-        sheetname='Glomeruli'
-        for idx,datafile in enumerate(tqdm(datafiles,colour='red')):
-            tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna()
-            if len(tubule_features[featurename])>0:
-                l1=np.percentile(tubule_features[featurename]*0.25,0.01)
-                l2=np.percentile(tubule_features[featurename]*0.25,99.9)
-
-                pcolor=plasma(labelMapper[clinical_legend[idx]])
-                # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5)
-                sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5)
-
-                all_tubules.extend(np.array(tubule_features[featurename]*0.25))
-            else:
-                popIdx.append(idx)
-        for p in popIdx[::-1]:
-            clinical_legend.pop(p)
-        plt.legend(clinical_legend,title=args.labelColumns)
-        plt.title('Cumulative distributions per patient')
-        plt.show()
-        # fig,ax=plt.subplots()
-        # plt.xlabel('Cortical tubular diameter (µm)')
-        # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill)
-        # plt.title('Cumulative distribution for dataset')
-        # plt.show()
-
-
-def JoyPlot(args):
+        try:
+            workbook.close(save_to=xlsx_path)
+        except:
+            print("An exception occurred")
 
-    url = "https://gist.githubusercontent.com/borgar/31c1e476b8e92a11d7e9/raw/0fae97dab6830ecee185a63c1cee0008f6778ff6/pulsar.csv"
-    df = pd.read_csv(url, header=None)
-    df = df.stack().reset_index()
-    df.columns = ['idx', 'x', 'y']
-
-    patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
-    patientMetrics={}
-    for idx,patientID in enumerate(patientData[args.IDColumn]):
-        patientMetrics[patientID]=patientData[args.labelColumns][idx]
-    datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-    clinical_legend=[]
-    for datafile in datafiles:
-        patientID=os.path.splitext(datafile.split('/')[-1])[0]
-        clinical_legend.append(str(patientMetrics[patientID]))
-
-    sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
-    sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
-
-    popIdx=[]
-    all_tubules=[]
-    # outDF=pd.DataFrame()
-
-    for idx,datafile in enumerate(tqdm(sortedPatientOrder[:15],colour='red')):
-
-        tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii'],index_col=None).dropna()
-        if len(tubule_features['Cortical radii'])>0:
-            #.values()
-            feature_array=np.array(tubule_features['Cortical radii']*0.25)
-            all_tubules.append(feature_array)
+        gc.uploadFileToItem(slide_item_id, xlsx_path, reference=None, mimeType=None, filename=None, progressCallback=None)
+        print('Done.')
+        # exit()
+    # Parallel(n_jobs=num_cores, backend='threading')(delayed(chop_wsi)(, choppable_regions=choppable_regions)  for idxx, j in enumerate(index_x))
 
-            l1=np.percentile(feature_array,0.05)
-            l2=np.percentile(feature_array,99.5)
-            # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,cbar=True)
-            # all_tubules.extend(np.array(tubule_features['Cortical radii']*0.25))
 
-        else:
-            popIdx.append(idx)
-    for p in popIdx[::-1]:
-        sortedPatientLegend.pop(p)
-    outDF=pd.DataFrame(all_tubules)
-
-    outDF=outDF.stack(dropna=False).reset_index().dropna()
-
-    # outDFT=outDF.transpose()
-    # input(outDF)
-    # input(outDF.reset_index())
-    # input(outDF.reset_index(drop=True)[::2].reset_index(drop=True))
-    # input(outDF.reset_index(drop=True))
-    # input(outDF.stack())
-    # input(outDF.reset_index().stack())
-    # input(outDF.reset_index(drop=True).stack())
-    # outDF=outDF.reset_index(drop=True)[::2].reset_index(drop=True)
-    # outDF = outDF.stack().reset_index()
-    outDF.columns = ['idx', 'x', 'y']
-    outDF.to_csv('test.csv')
-    # print(outDF.dropna())
-    # exit()
-
-
-
-
-    sns.set_theme(rc={"axes.facecolor": (0, 0, 0,0), 'figure.facecolor':'#ffffff', 'axes.grid':False})
-    g = sns.FacetGrid(outDF, row='idx',hue="idx", aspect=15)
-    g.map(sns.kdeplot, "x",
-          bw_adjust=.9, clip_on=False,
-          fill=True, alpha=1, linewidth=1.5)
-    g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.9)
-    g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
-    def label(x, color, label):
-        ax = plt.gca()
-        ax.text(0, .2, label, fontweight="bold", color=color,
-                ha="left", va="center", transform=ax.transAxes)
-    g.map(label, "x")
-
-    # Draw the densities in a few steps
-    # g.map(sns.lineplot, 'x', 'y', clip_on=False, alpha=1, linewidth=1.5)
-    # g.map(plt.fill_between, 'x', 'y', color='#000000')
-    # g.map(sns.lineplot, 'x', 'y', clip_on=False, color='#ffffff', lw=2)
-    # Set the subplots to overlap
-    g.fig.subplots_adjust(hspace=-0.95)
-    g.set_titles("")
-    g.set(yticks=[], xticks=[], ylabel="", xlabel="")
-    g.despine(bottom=True, left=True)
-    plt.show()
-    # plt.savefig('joy.png', facecolor='#000000')
-
-def aggregate(args):
-    assert args.exceloutfile is not None, 'You must provide a name of xlsx output file for feature aggregation with --exceloutfile name.xlsx'
-    usecols=args.labelColumns.split(',')
-    # index_col=int(args.IDColumn)
-    patientData=pd.read_excel(args.patientData,usecols=usecols.extend(args.IDColumn),index_col=args.IDColumn)
-    patientData = patientData.loc[:, ~patientData.columns.str.contains('^Unnamed')]
-    patientMetrics={}
-
-    pd.set_option('display.max_rows', 100)
-    patientData.index = patientData.index.map(str)
-
-    datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-    # numGloms=0
-    # numSGloms=0
-    # numTubules=0
-    # numArts=0
-    full_data={}
-    for idx,datafile in enumerate(tqdm(datafiles,colour='red')):
-
-        xlsxid=datafile.split('/')[-1].split('.xlsx')[0]
-        tubule_features=pd.read_excel(datafile,sheet_name='Summary',header=None,index_col=None).transpose()
-        names=np.array(tubule_features.iloc[0])
-        vals=np.array(tubule_features.iloc[1])
-        # numGloms+=vals[0]
-        # if not np.isnan(vals[5]):
-        #     numSGloms+=vals[5]
-        # numTubules+=vals[10]
-        # numArts+=vals[19]
-
-        full_data[xlsxid]=np.append(vals,np.array(patientData.loc[xlsxid]),0)
-    # print(numGloms)
-    # print(numSGloms)
-    # print(numTubules)
-    # print(numArts)
-    # exit()
-    workbook=xlsxwriter.Workbook(args.exceloutfile,{'nan_inf_to_errors': True})
-    worksheet1 = workbook.add_worksheet('Aggregation')
-
-    outcolnames=np.append(names,patientData.columns,0)
-    outrownames=full_data.keys()
-    for idx,outcol in enumerate(outcolnames):
-        worksheet1.write(0,idx+1,outcol)
-
-    rowcounter=1
-    for key,vals in full_data.items():
-        worksheet1.write(rowcounter,0,key)
-        for idx,val in enumerate(vals):
-            worksheet1.write(rowcounter,idx+1,val)
-        rowcounter+=1
-
-    workbook.close()
+# def summarizeKidneyReferenceFeatures(args):
+#     #assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml'
+#     assert args.SummaryOption is not None, 'You must specify what type of summary is required with --SummaryOption'
+#     assert args.patientData is not None, 'You must provide patient metadata xlsx file with --patientData'
+#     if args.SummaryOption in ['ULDensity']:
+#         ULDensity(args)
+#     elif args.SummaryOption in ['BLDensity']:
+#         BLDensity(args)
+#     elif args.SummaryOption in ['standardScatter']:
+#         standardScatter(args)
+#     elif args.SummaryOption in ['anchorScatter']:
+#         anchorScatter(args)
+#     elif args.SummaryOption in ['aggregate']:
+#         aggregate(args)
+#     elif args.SummaryOption in ['JoyPlot']:
+#         JoyPlot(args)
+#     else:
+#         print('Incorrect SummaryOption')
+
+# def anchorScatter(args):
+#     patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn,args.anchor],index_col=None)
+#     patientMetrics={}
+#     for idx,patientID in enumerate(patientData[args.IDColumn]):
+#         patientMetrics[patientID]=[patientData[args.labelColumns][idx],patientData[args.anchor][idx]]
+
+#     datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
+#     clinical_legend=[]
+#     clinical_anchor=[]
+#     for datafile in datafiles:
+#         patientID=os.path.splitext(datafile.split('/')[-1])[0]
+#         clinical_legend.append(str(patientMetrics[patientID][0]))
+#         clinical_anchor.append(patientMetrics[patientID][1])
+
+#     sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
+#     sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
+#     sortedPatientAnchor=[x for x,_ in sorted(zip(clinical_anchor,datafiles))]
+#     f1,f2=[int(i) for i in args.scatterFeatures.split(',')]
+#     # f1=5
+#     # f2=7
+
+#     temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0)
+#     index = temp.index
+#     xlabel=index[f1-1]
+#     ylabel=args.anchor
+
+#     popIdx=[]
+#     scatter_features=[]
+#     for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')):
+#         features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0))
+#         # print(np.array(features))
+#         if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]):
+#             scatter_features.append([features[f1-1][0],features[f2-1][0]])
+#         else:
+#             popIdx.append(idx)
+#     for p in popIdx[::-1]:
+#         sortedPatientLegend.pop(p)
+#         sortedPatientAnchor.pop(p)
+#     scatter_features=np.array(scatter_features)
+
+#     sns.scatterplot(x=scatter_features[:,0],y=sortedPatientAnchor,hue=sortedPatientLegend,palette='viridis',legend='auto')
+#     plt.xlabel(xlabel)
+#     plt.ylabel(ylabel)
+#     plt.legend(title=args.labelColumns)
+#     plt.show()
+# def standardScatter(args):
+#     patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
+#     patientMetrics={}
+#     assert args.labelColumns in ['Age','Cr','Sex'], 'Label column must be Age, Cr, or Sex for standard scatter'
+#     if args.labelColumns=='Age':
+#         labelBins=[0,10,20,30,40,50,60,70,80,90,100]
+#     elif args.labelColumns=='Cr':
+#         labelBins=[0,.5,.6,.7,.8,.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2]
+#     elif args.labelColumns=='Sex':
+#         labelBins=None
+
+#     for idx,patientID in enumerate(patientData[args.IDColumn]):
+#         patientMetrics[patientID]=patientData[args.labelColumns][idx]
+
+#     datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
+#     clinical_legend=[]
+#     for datafile in datafiles:
+#         patientID=os.path.splitext(datafile.split('/')[-1])[0]
+#         clinical_legend.append(str(patientMetrics[patientID]))
+
+#     sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
+#     sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
+#     print(sortedPatientLegend)
+#     f1,f2=[int(i) for i in args.scatterFeatures.split(',')]
+#     # f1=5
+#     # f2=7
+
+#     temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0)
+#     index = temp.index
+#     xlabel=index[f1-1]
+#     ylabel=index[f2-1]
+
+#     popIdx=[]
+#     scatter_features=[]
+#     for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')):
+#         features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0))
+#         # print(np.array(features))
+#         if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]):
+#             scatter_features.append([features[f1-1][0],features[f2-1][0]])
+#         else:
+#             popIdx.append(idx)
+#     for p in popIdx[::-1]:
+#         sortedPatientLegend.pop(p)
+#     sortedPatientLegend=np.array(sortedPatientLegend)
+
+#     scatter_features=np.array(scatter_features)
+
+#     sns.scatterplot(x=scatter_features[:,0],y=scatter_features[:,1],hue=sortedPatientLegend,palette='viridis')
+#     plt.xlabel(xlabel)
+#     plt.ylabel(ylabel)
+#     plt.legend(sortedPatientLegend,title=args.labelColumns)
+#     plt.show()
+
+# def BLDensity(args):
+#     patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None)
+#     patientMetrics={}
+#     for idx,patientID in enumerate(patientData[args.IDColumn]):
+#         patientMetrics[patientID]=patientData[args.labelColumns][idx]
+
+#     datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
+#     clinical_legend=[]
+#     for datafile in datafiles:
+#         patientID=os.path.splitext(datafile.split('/')[-1])[0]
+#         clinical_legend.append(str(patientMetrics[patientID]))
+
+#     sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
+#     sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
+
+#     fig,ax=plt.subplots()
+#     plt.xlabel('Cortical tubular diameter (µm)')
+#     plasma = cm.get_cmap('plasma',len(sortedPatientOrder))
+#     popIdx=[]
+#     for idx,datafile in enumerate(tqdm(sortedPatientOrder)):
+#         tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii','Cortical areas'],index_col=None).dropna()
+#         if len(tubule_features['Cortical radii'])>0:
+#             # xl1=np.percentile(tubule_features['Cortical radii']*0.25,0.01)
+#             xl2=np.percentile(tubule_features['Cortical radii']*0.25,99)
+#             # yl1=np.percentile(tubule_features['Cortical areas']*0.25*0.25,0.01)
+#             yl2=np.percentile(tubule_features['Cortical areas']*0.25*0.25,99)
+#             # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill)
+#             sns.kdeplot(x=tubule_features['Cortical radii']*0.25,y=tubule_features['Cortical areas']*0.25*0.25,
+#                 color=plasma(idx),ax=ax,clip=[[None,xl2],[None,yl2]])
+#         else:
+#             popIdx.append(idx)
+#     for p in popIdx[::-1]:
+#         sortedPatientLegend.pop(p)
+#     plt.legend(sortedPatientLegend,title=args.labelColumns)
+#     plt.show()
+
+# def ULDensity(args):
+
+#     if args.labelModality=='Continuous':
+#         patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
+#         patientMetrics={}
+#         # print(patientData)
+#         for idx,patientID in enumerate(patientData[args.IDColumn]):
+#             patientMetrics[patientID]=patientData[args.labelColumns][idx]
+
+#         datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
+#         clinical_legend=[]
+#         for datafile in datafiles:
+#             patientID=os.path.splitext(datafile.split('/')[-1])[0]
+#             clinical_legend.append(str(patientMetrics[patientID]))
+
+#         sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
+#         sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
+
+#         fig,ax=plt.subplots()
+#         plt.xlabel('Cortical tubular diameter (µm)')
+#         plasma = cm.get_cmap('plasma',len(sortedPatientOrder))
+#         popIdx=[]
+#         all_tubules=[]
+#         featurename='Cortical radii'
+#         sheetname='Tubules'
+#         for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')):
+#             tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna()
+#             if len(tubule_features[featurename])>0:
+#                 l1=np.percentile(tubule_features[featurename]*0.25,0.05)
+#                 l2=np.percentile(tubule_features[featurename]*0.25,99.5)
+#                 sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1)
+#                 # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1)
+#                 all_tubules.extend(np.array(tubule_features[featurename]*0.25))
+#             else:
+#                 popIdx.append(idx)
+#         for p in popIdx[::-1]:
+#             sortedPatientLegend.pop(p)
+
+#         plt.legend(sortedPatientLegend,title=args.labelColumns)
+#         plt.title('Cumulative distributions per patient')
+#         # plt.colorbar()
+#         plt.show()
+#         # print(len(all_tubules))
+#         # fig,ax=plt.subplots()
+#         # plt.xlabel('Cortical tubular diameter (µm)')
+#         # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill)
+#         # plt.title('Cumulative distribution for dataset')
+#         # plt.show()
+#     elif args.labelModality=='Categorical':
+#         patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
+#         patientMetrics={}
+#         for idx,patientID in enumerate(patientData[args.IDColumn]):
+#             patientMetrics[patientID]=patientData[args.labelColumns][idx]
+
+#         datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
+#         clinical_legend=[]
+#         for datafile in datafiles:
+#             patientID=os.path.splitext(datafile.split('/')[-1])[0]
+
+#             clinical_legend.append(str(patientMetrics[patientID]))
+
+#         fig,ax=plt.subplots()
+#         plt.xlabel('Cortical tubular diameter (µm)')
+#         plasma = cm.get_cmap('plasma',len(np.unique(clinical_legend))+1)
+#         labelMapper={}
+#         categories=np.unique(clinical_legend)
+#         for idx,l in enumerate(categories):
+#             labelMapper[l]=idx
+#         popIdx=[]
+#         all_tubules=[]
+#         featurename='Radius'
+#         sheetname='Glomeruli'
+#         for idx,datafile in enumerate(tqdm(datafiles,colour='red')):
+#             tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna()
+#             if len(tubule_features[featurename])>0:
+#                 l1=np.percentile(tubule_features[featurename]*0.25,0.01)
+#                 l2=np.percentile(tubule_features[featurename]*0.25,99.9)
+
+#                 pcolor=plasma(labelMapper[clinical_legend[idx]])
+#                 # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5)
+#                 sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5)
+
+#                 all_tubules.extend(np.array(tubule_features[featurename]*0.25))
+#             else:
+#                 popIdx.append(idx)
+#         for p in popIdx[::-1]:
+#             clinical_legend.pop(p)
+#         plt.legend(clinical_legend,title=args.labelColumns)
+#         plt.title('Cumulative distributions per patient')
+#         plt.show()
+#         # fig,ax=plt.subplots()
+#         # plt.xlabel('Cortical tubular diameter (µm)')
+#         # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill)
+#         # plt.title('Cumulative distribution for dataset')
+#         # plt.show()
+
+
+# def JoyPlot(args):
+
+#     url = "https://gist.githubusercontent.com/borgar/31c1e476b8e92a11d7e9/raw/0fae97dab6830ecee185a63c1cee0008f6778ff6/pulsar.csv"
+#     df = pd.read_csv(url, header=None)
+#     df = df.stack().reset_index()
+#     df.columns = ['idx', 'x', 'y']
+
+#     patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
+#     patientMetrics={}
+#     for idx,patientID in enumerate(patientData[args.IDColumn]):
+#         patientMetrics[patientID]=patientData[args.labelColumns][idx]
+#     datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
+#     clinical_legend=[]
+#     for datafile in datafiles:
+#         patientID=os.path.splitext(datafile.split('/')[-1])[0]
+#         clinical_legend.append(str(patientMetrics[patientID]))
+
+#     sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
+#     sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
+
+#     popIdx=[]
+#     all_tubules=[]
+#     # outDF=pd.DataFrame()
+
+#     for idx,datafile in enumerate(tqdm(sortedPatientOrder[:15],colour='red')):
+
+#         tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii'],index_col=None).dropna()
+#         if len(tubule_features['Cortical radii'])>0:
+#             #.values()
+#             feature_array=np.array(tubule_features['Cortical radii']*0.25)
+#             all_tubules.append(feature_array)
+
+#             l1=np.percentile(feature_array,0.05)
+#             l2=np.percentile(feature_array,99.5)
+#             # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,cbar=True)
+#             # all_tubules.extend(np.array(tubule_features['Cortical radii']*0.25))
+
+#         else:
+#             popIdx.append(idx)
+#     for p in popIdx[::-1]:
+#         sortedPatientLegend.pop(p)
+#     outDF=pd.DataFrame(all_tubules)
+
+#     outDF=outDF.stack(dropna=False).reset_index().dropna()
+
+#     # outDFT=outDF.transpose()
+#     # input(outDF)
+#     # input(outDF.reset_index())
+#     # input(outDF.reset_index(drop=True)[::2].reset_index(drop=True))
+#     # input(outDF.reset_index(drop=True))
+#     # input(outDF.stack())
+#     # input(outDF.reset_index().stack())
+#     # input(outDF.reset_index(drop=True).stack())
+#     # outDF=outDF.reset_index(drop=True)[::2].reset_index(drop=True)
+#     # outDF = outDF.stack().reset_index()
+#     outDF.columns = ['idx', 'x', 'y']
+#     outDF.to_csv('test.csv')
+#     # print(outDF.dropna())
+#     # exit()
+
+
+
+
+#     sns.set_theme(rc={"axes.facecolor": (0, 0, 0,0), 'figure.facecolor':'#ffffff', 'axes.grid':False})
+#     g = sns.FacetGrid(outDF, row='idx',hue="idx", aspect=15)
+#     g.map(sns.kdeplot, "x",
+#           bw_adjust=.9, clip_on=False,
+#           fill=True, alpha=1, linewidth=1.5)
+#     g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.9)
+#     g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
+#     def label(x, color, label):
+#         ax = plt.gca()
+#         ax.text(0, .2, label, fontweight="bold", color=color,
+#                 ha="left", va="center", transform=ax.transAxes)
+#     g.map(label, "x")
+
+#     # Draw the densities in a few steps
+#     # g.map(sns.lineplot, 'x', 'y', clip_on=False, alpha=1, linewidth=1.5)
+#     # g.map(plt.fill_between, 'x', 'y', color='#000000')
+#     # g.map(sns.lineplot, 'x', 'y', clip_on=False, color='#ffffff', lw=2)
+#     # Set the subplots to overlap
+#     g.fig.subplots_adjust(hspace=-0.95)
+#     g.set_titles("")
+#     g.set(yticks=[], xticks=[], ylabel="", xlabel="")
+#     g.despine(bottom=True, left=True)
+#     plt.show()
+#     # plt.savefig('joy.png', facecolor='#000000')
+
+# def aggregate(args):
+#     assert args.exceloutfile is not None, 'You must provide a name of xlsx output file for feature aggregation with --exceloutfile name.xlsx'
+#     usecols=args.labelColumns.split(',')
+#     # index_col=int(args.IDColumn)
+#     patientData=pd.read_excel(args.patientData,usecols=usecols.extend(args.IDColumn),index_col=args.IDColumn)
+#     patientData = patientData.loc[:, ~patientData.columns.str.contains('^Unnamed')]
+#     patientMetrics={}
+
+#     pd.set_option('display.max_rows', 100)
+#     patientData.index = patientData.index.map(str)
+
+#     datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
+#     # numGloms=0
+#     # numSGloms=0
+#     # numTubules=0
+#     # numArts=0
+#     full_data={}
+#     for idx,datafile in enumerate(tqdm(datafiles,colour='red')):
+
+#         xlsxid=datafile.split('/')[-1].split('.xlsx')[0]
+#         tubule_features=pd.read_excel(datafile,sheet_name='Summary',header=None,index_col=None).transpose()
+#         names=np.array(tubule_features.iloc[0])
+#         vals=np.array(tubule_features.iloc[1])
+#         # numGloms+=vals[0]
+#         # if not np.isnan(vals[5]):
+#         #     numSGloms+=vals[5]
+#         # numTubules+=vals[10]
+#         # numArts+=vals[19]
+
+#         full_data[xlsxid]=np.append(vals,np.array(patientData.loc[xlsxid]),0)
+#     # print(numGloms)
+#     # print(numSGloms)
+#     # print(numTubules)
+#     # print(numArts)
+#     # exit()
+#     workbook=xlsxwriter.Workbook(args.exceloutfile,{'nan_inf_to_errors': True})
+#     worksheet1 = workbook.add_worksheet('Aggregation')
+
+#     outcolnames=np.append(names,patientData.columns,0)
+#     outrownames=full_data.keys()
+#     for idx,outcol in enumerate(outcolnames):
+#         worksheet1.write(0,idx+1,outcol)
+
+#     rowcounter=1
+#     for key,vals in full_data.items():
+#         worksheet1.write(rowcounter,0,key)
+#         for idx,val in enumerate(vals):
+#             worksheet1.write(rowcounter,idx+1,val)
+#         rowcounter+=1
+
+#     workbook.close()
 def points_to_features_glom(points,args,min_size,cortex,medulla):
     a=cv2.contourArea(points)
     if a>min_size:
@@ -818,7 +810,7 @@ def points_to_features_art(points,args,min_size,cortex,medulla,wsi,MOD):
         else:
             containedmedulla=False
         xMin,xMax,yMin,yMax=[np.min(points[:,0]),np.max(points[:,0]),np.min(points[:,1]),np.max(points[:,1])]
-        image=np.array(openslide.OpenSlide(wsi).read_region((xMin,yMin),0,(xMax-xMin,yMax-yMin)))[:,:,:3]
+        image=np.array(TiffSlide(wsi).read_region((xMin,yMin),0,(xMax-xMin,yMax-yMin)))[:,:,:3]
 
         if xMax-xMin>5000 or yMax-yMin>5000:
             return [a,None,None,containedmedulla,None]
@@ -879,7 +871,7 @@ def points_to_features_art(points,args,min_size,cortex,medulla,wsi,MOD):
 
 
 
-#     full_list.append(tubule_features)
-# full_list= pd.concat(full_list)
-# print(full_list)
-# exit()
+# #     full_list.append(tubule_features)
+# # full_list= pd.concat(full_list)
+# # print(full_list)
+# # exit()
diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py
index 50e32c1..721b628 100644
--- a/multic/segmentationschool/segmentation_school.py
+++ b/multic/segmentationschool/segmentation_school.py
@@ -47,7 +47,7 @@ def str2bool(v):
 def main(args):
 
     from segmentationschool.Codes.InitializeFolderStructure import initFolder, purge_training_set, prune_training_set
-    # from extract_reference_features import getKidneyReferenceFeatures,summarizeKidneyReferenceFeatures
+    from segmentationschool.Codes.extract_reference_features import getKidneyReferenceFeatures
     # from TransformXMLs import splice_cortex_XMLs,register_aperio_scn_xmls
     # from randomCropGenerator import randomCropGenerator
     if args.one_network == True:
@@ -280,6 +280,9 @@ def savetime(args, starttime):
         help='padded region for low resolution region extraction')
     parser.add_argument('--show_interstitium', dest='show_interstitium', default=True ,type=str2bool,
         help='padded region for low resolution region extraction')
+    
+    parser.add_argument('--xml_path', dest='xml_path', default=' ' ,type=str,
+        help='path to xml file')
 
 
 
diff --git a/multic/segmentationschool/utils/mask_to_xml.py b/multic/segmentationschool/utils/mask_to_xml.py
new file mode 100644
index 0000000..34217e5
--- /dev/null
+++ b/multic/segmentationschool/utils/mask_to_xml.py
@@ -0,0 +1,135 @@
+import cv2
+import numpy as np
+import lxml.etree as ET
+
+"""
+xml_path (string) - the filename of the saved xml
+mask (array) - the mask to convert to xml - uint8 array
+downsample (int) - amount of downsampling done to the mask
+                    points are upsampled - this can be used to simplify the mask
+min_size_thresh (int) - the minimum objectr size allowed in the mask. This is referenced from downsample=1
+xml_color (list) - list of binary color values to be used for classes
+
+"""
+
+def mask_to_xml(xml_path, mask, downsample=1, min_size_thresh=0, simplify_contours=0, xml_color=[65280, 65535, 33023, 255, 16711680], verbose=0, return_root=False, maxClass=None, offset={'X': 0,'Y': 0}):
+
+    min_size_thresh /= downsample
+
+    # create xml tree
+    Annotations = xml_create()
+
+    # get all classes
+    classes = np.unique(mask)
+    if maxClass is None:
+        maxClass = max(classes)
+
+    # add annotation classes to tree
+    for class_ in range(maxClass+1)[1:]:
+        if verbose:
+            print('Creating class: [{}]'.format(class_))
+        Annotations = xml_add_annotation(Annotations=Annotations, xml_color=xml_color, annotationID=class_)
+
+    # add contour points to tree classwise
+    for class_ in classes: # iterate through all classes
+
+        if class_ == 0 or class_ > maxClass:
+            continue
+
+        if verbose:
+            print('Working on class [{} of {}]'.format(class_, max(classes)))
+
+        # binarize the mask w.r.t. class_
+        binaryMask = mask==class_
+
+        # get contour points of the mask
+        pointsList = get_contour_points(binaryMask, downsample=downsample, min_size_thresh=min_size_thresh, simplify_contours=simplify_contours, offset=offset)
+        for i in range(np.shape(pointsList)[0]):
+            pointList = pointsList[i]
+            Annotations = xml_add_region(Annotations=Annotations, pointList=pointList, annotationID=class_)
+
+    if return_root:
+        # return root, do not save xml file
+        return Annotations
+
+    # save the final xml file
+    xml_save(Annotations=Annotations, filename='{}.xml'.format(xml_path.split('.')[0]))
+
+
+def get_contour_points(mask, downsample, min_size_thresh=0, simplify_contours=0, offset={'X': 0,'Y': 0}):
+    # returns a dict pointList with point 'X' and 'Y' values
+    # input greyscale binary image
+    #_, maskPoints, contours = cv2.findContours(np.array(mask), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS)
+    maskPoints, contours = cv2.findContours(np.uint8(mask), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_L1)
+    maskPoints = list(maskPoints)
+    # remove small regions
+    too_small = []
+    for idx, cnt in enumerate(maskPoints):
+        area = cv2.contourArea(cnt)
+        if area < min_size_thresh:
+            too_small.append(idx)
+    if too_small != []:
+        too_small.reverse()
+        for idx in too_small:
+            maskPoints.pop(idx)
+
+    if simplify_contours > 0:
+        for idx, cnt in enumerate(maskPoints):
+            epsilon = simplify_contours*cv2.arcLength(cnt,True)
+            approx = cv2.approxPolyDP(cnt,epsilon,True)
+            maskPoints[idx] = approx
+
+    pointsList = []
+    for j in range(np.shape(maskPoints)[0]):
+        pointList = []
+        for i in range(0,np.shape(maskPoints[j])[0]):
+            point = {'X': (maskPoints[j][i][0][0] * downsample) + offset['X'], 'Y': (maskPoints[j][i][0][1] * downsample) + offset['Y']}
+            pointList.append(point)
+        pointsList.append(pointList)
+    return pointsList
+
+### functions for building an xml tree of annotations ###
+def xml_create(): # create new xml tree
+    # create new xml Tree - Annotations
+    Annotations = ET.Element('Annotations')
+    return Annotations
+
+def xml_add_annotation(Annotations, xml_color, annotationID=None): # add new annotation
+    # add new Annotation to Annotations
+    # defualts to new annotationID
+    if annotationID == None: # not specified
+        annotationID = len(Annotations.findall('Annotation')) + 1
+    Annotation = ET.SubElement(Annotations, 'Annotation', attrib={'Type': '4', 'Visible': '1', 'ReadOnly': '0', 'Incremental': '0', 'LineColorReadOnly': '0', 'LineColor': str(xml_color[annotationID-1]), 'Id': str(annotationID), 'NameReadOnly': '0'})
+    Regions = ET.SubElement(Annotation, 'Regions')
+    return Annotations
+
+def xml_add_region(Annotations, pointList, annotationID=-1, regionID=None): # add new region to annotation
+    # add new Region to Annotation
+    # defualts to last annotationID and new regionID
+    Annotation = Annotations.find("Annotation[@Id='" + str(annotationID) + "']")
+    Regions = Annotation.find('Regions')
+    if regionID == None: # not specified
+        regionID = len(Regions.findall('Region')) + 1
+    Region = ET.SubElement(Regions, 'Region', attrib={'NegativeROA': '0', 'ImageFocus': '-1', 'DisplayId': '1', 'InputRegionId': '0', 'Analyze': '0', 'Type': '0', 'Id': str(regionID)})
+    Vertices = ET.SubElement(Region, 'Vertices')
+    for point in pointList: # add new Vertex
+        ET.SubElement(Vertices, 'Vertex', attrib={'X': str(point['X']), 'Y': str(point['Y']), 'Z': '0'})
+    # add connecting point
+    ET.SubElement(Vertices, 'Vertex', attrib={'X': str(pointList[0]['X']), 'Y': str(pointList[0]['Y']), 'Z': '0'})
+    return Annotations
+
+def xml_save(Annotations, filename):
+    xml_data = ET.tostring(Annotations, pretty_print=True)
+    #xml_data = Annotations.toprettyxml()
+    f = open(filename, 'w')
+    f.write(xml_data.decode())
+    f.close()
+
+def read_xml(filename):
+    # import xml file
+    tree = ET.parse(filename)
+    root = tree.getroot()
+
+if __name__ == '__main__':
+    main()
+    
\ No newline at end of file
diff --git a/multic/segmentationschool/utils/xml_to_mask.py b/multic/segmentationschool/utils/xml_to_mask.py
new file mode 100644
index 0000000..aeecd6a
--- /dev/null
+++ b/multic/segmentationschool/utils/xml_to_mask.py
@@ -0,0 +1,219 @@
+import numpy as np
+import lxml.etree as ET
+import cv2
+import time
+import os
+
+"""
+location (tuple) - (x, y) tuple giving the top left pixel in the level 0 reference frame
+size (tuple) - (width, height) tuple giving the region size | set to 'full' for entire mask
+downsample - int giving the amount of downsampling done to the output pixel mask
+
+NOTE: if you plan to loop through xmls parallely, it is nessesary to run write_minmax_to_xml()
+      on all the files prior - to avoid conflicting file writes
+
+"""
+
+def xml_to_mask(xml_path, location, size, tree=None, downsample=1, verbose=0):
+
+    # parse xml and get root
+    if tree == None: tree = ET.parse(xml_path)
+    root = tree.getroot()
+
+    if size == 'full':
+        import math
+        size = write_minmax_to_xml(xml_path=xml_path, tree=tree, get_absolute_max=True)
+        size = (math.ceil(size[0]/downsample), math.ceil(size[1]/downsample))
+        location = (0,0)
+
+    # calculate region bounds
+    bounds = {'x_min' : location[0], 'y_min' : location[1], 'x_max' : location[0] + size[0]*downsample, 'y_max' : location[1] + size[1]*downsample}
+
+    IDs = regions_in_mask(xml_path=xml_path, root=root, tree=tree, bounds=bounds, verbose=verbose)
+
+    if verbose != 0:
+        print('\nFOUND: ' + str(len(IDs)) + ' regions')
+
+    # find regions in bounds
+    Regions = get_vertex_points(root=root, IDs=IDs, verbose=verbose)
+
+    # fill regions and create mask
+    mask = Regions_to_mask(Regions=Regions, bounds=bounds, IDs=IDs, downsample=downsample, verbose=verbose)
+    if verbose != 0:
+        print('done...\n')
+
+    return mask
+
+
+def regions_in_mask(xml_path, root, tree, bounds, verbose=1):
+    # find regions to save
+    IDs = []
+    mtime = os.path.getmtime(xml_path)
+
+    write_minmax_to_xml(xml_path, tree)
+
+    for Annotation in root.findall("./Annotation"): # for all annotations
+        annotationID = Annotation.attrib['Id']
+
+        for Region in Annotation.findall("./*/Region"): # iterate on all region
+
+            for Vert in Region.findall("./Vertices"): # iterate on all vertex in region
+
+                # get minmax points
+                Xmin = np.int32(Vert.attrib['Xmin'])
+                Ymin = np.int32(Vert.attrib['Ymin'])
+                Xmax = np.int32(Vert.attrib['Xmax'])
+                Ymax = np.int32(Vert.attrib['Ymax'])
+
+                # test minmax points in region bounds
+                if bounds['x_min'] <= Xmax and bounds['x_max'] >= Xmin and bounds['y_min'] <= Ymax and bounds['y_max'] >= Ymin:
+                    # save region Id
+                    IDs.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID})
+                    break
+    return IDs
+
+def get_vertex_points(root, IDs, verbose=1):
+    Regions = []
+
+    for ID in IDs: # for all IDs
+
+        # get all vertex attributes (points)
+        Vertices = []
+
+        for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"):
+            # make array of points
+            Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))])
+
+        Regions.append(np.array(Vertices))
+
+    return Regions
+
+def Regions_to_mask(Regions, bounds, IDs, downsample, verbose=1):
+    # downsample = int(np.round(downsample_factor**(.5)))
+
+    if verbose !=0:
+        print('\nMAKING MASK:')
+
+    if len(Regions) != 0: # regions present
+        # get min/max sizes
+        min_sizes = np.empty(shape=[2,0], dtype=np.int32)
+        max_sizes = np.empty(shape=[2,0], dtype=np.int32)
+        for Region in Regions: # fill all regions
+            min_bounds = np.reshape((np.amin(Region, axis=0)), (2,1))
+            max_bounds = np.reshape((np.amax(Region, axis=0)), (2,1))
+            min_sizes = np.append(min_sizes, min_bounds, axis=1)
+            max_sizes = np.append(max_sizes, max_bounds, axis=1)
+        min_size = np.amin(min_sizes, axis=1)
+        max_size = np.amax(max_sizes, axis=1)
+
+        # add to old bounds
+        bounds['x_min_pad'] = min(min_size[1], bounds['x_min'])
+        bounds['y_min_pad'] = min(min_size[0], bounds['y_min'])
+        bounds['x_max_pad'] = max(max_size[1], bounds['x_max'])
+        bounds['y_max_pad'] = max(max_size[0], bounds['y_max'])
+
+        # make blank mask
+        mask = np.zeros([ int(np.round((bounds['y_max_pad'] - bounds['y_min_pad']) / downsample)), int(np.round((bounds['x_max_pad'] - bounds['x_min_pad']) / downsample)) ], dtype=np.uint8)
+
+        # fill mask polygons
+        index = 0
+        for Region in Regions:
+            # reformat Regions
+            Region[:,1] = np.int32(np.round((Region[:,1] - bounds['y_min_pad']) / downsample))
+            Region[:,0] = np.int32(np.round((Region[:,0] - bounds['x_min_pad']) / downsample))
+            # get annotation ID for mask color
+            ID = IDs[index]
+            cv2.fillPoly(mask, [Region], int(ID['annotationID']))
+            index = index + 1
+
+        # reshape mask
+        x_start = np.int32(np.round((bounds['x_min'] - bounds['x_min_pad']) / downsample))
+        y_start = np.int32(np.round((bounds['y_min'] - bounds['y_min_pad']) / downsample))
+        x_stop = np.int32(np.round((bounds['x_max'] - bounds['x_min_pad']) / downsample))
+        y_stop = np.int32(np.round((bounds['y_max'] - bounds['y_min_pad']) / downsample))
+        # pull center mask region
+        mask = mask[ y_start:y_stop, x_start:x_stop ]
+
+    else: # no Regions
+        mask = np.zeros([ int(np.round((bounds['y_max'] - bounds['y_min']) / downsample)), int(np.round((bounds['x_max'] - bounds['x_min']) / downsample)) ], dtype=np.uint8)
+
+    return mask
+
+def write_minmax_to_xml(xml_path, tree=None, time_buffer=10, get_absolute_max=False):
+    # function to write min and max verticies to each region
+
+    # parse xml and get root
+    if tree == None: tree = ET.parse(xml_path)
+    root = tree.getroot()
+
+    try:
+        if get_absolute_max:
+            # break the try statement
+            X_max = 0
+            Y_max = 0
+            raise ValueError
+
+        # has the xml been modified to include minmax
+        modtime = np.float64(root.attrib['modtime'])
+        # has the minmax modified xml been changed?
+        assert os.path.getmtime(xml_path) < modtime + time_buffer
+
+    except:
+
+        for Annotation in root.findall("./Annotation"): # for all annotations
+            annotationID = Annotation.attrib['Id']
+
+            for Region in Annotation.findall("./*/Region"): # iterate on all region
+
+                for Vert in Region.findall("./Vertices"): # iterate on all vertex in region
+                    Xs = []
+                    Ys = []
+                    for Vertex in Vert.findall("./Vertex"): # iterate on all vertex in region
+                        # get points
+                        Xs.append(np.int32(np.float64(Vertex.attrib['X'])))
+                        Ys.append(np.int32(np.float64(Vertex.attrib['Y'])))
+
+                    # find min and max points
+                    Xs = np.array(Xs)
+                    Ys = np.array(Ys)
+
+                    if get_absolute_max:
+                        # get the biggest point in annotation
+                        if Xs != [] and Ys != []:
+                            X_max = max(X_max, np.max(Xs))
+                            Y_max = max(Y_max, np.max(Ys))
+
+                    else:
+                        # modify the xml
+                        Vert.set("Xmin", "{}".format(np.min(Xs)))
+                        Vert.set("Xmax", "{}".format(np.max(Xs)))
+                        Vert.set("Ymin", "{}".format(np.min(Ys)))
+                        Vert.set("Ymax", "{}".format(np.max(Ys)))
+
+        if get_absolute_max:
+            # return annotation max point
+            return (X_max,Y_max)
+
+        else:
+            # modify the xml with minmax region info
+            root.set("modtime", "{}".format(time.time()))
+            xml_data = ET.tostring(tree, pretty_print=True)
+            #xml_data = Annotations.toprettyxml()
+            f = open(xml_path, 'w')
+            f.write(xml_data.decode())
+            f.close()
+
+
+def get_num_classes(xml_path,ignore_label=None):
+    # parse xml and get root
+    tree = ET.parse(xml_path)
+    root = tree.getroot()
+
+    annotation_num = 0
+    for Annotation in root.findall("./Annotation"): # for all annotations
+        if ignore_label != None:
+            if not int(Annotation.attrib['Id']) == ignore_label:
+                annotation_num += 1
+        else: annotation_num += 1
+
+    return annotation_num + 1
diff --git a/setup.py b/setup.py
index fff61e6..b89b3c7 100644
--- a/setup.py
+++ b/setup.py
@@ -78,6 +78,7 @@ def prerelease_local_scheme(version):
         'girder-client',
         # cli
         'ctk-cli',
+        'XlsxWriter'
     ],
     license='Apache Software License 2.0',
     keywords='multic',

From 7501fdc0cdff00926c2b39ee14f1c86ed1505529 Mon Sep 17 00:00:00 2001
From: sayatmimar <sayat.mimar@ufl.edu>
Date: Thu, 4 Apr 2024 10:45:01 -0400
Subject: [PATCH 2/8] fix xlsx save path

---
 .../ReferenceFeatureExtraction.xml                   |  4 ++--
 .../Codes/extract_reference_features.py              | 12 ++++--------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml
index 30e0924..19bc3ad 100644
--- a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml
+++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.xml
@@ -1,8 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <executable>
   <category>HistomicsTK</category>
-  <title>Multi Compartment Training</title>
-  <description>Trains Multi compartment segmentation model</description>
+  <title>Extract Extended Clinical Features</title>
+  <description>Extract reference features</description>
   <version>0.1.0</version>
   <documentation-url>https://github.com/SarderLab/Multi-Compartment-Segmentation</documentation-url>
   <license>Apache 2.0</license>
diff --git a/multic/segmentationschool/Codes/extract_reference_features.py b/multic/segmentationschool/Codes/extract_reference_features.py
index 2ca6eb5..549b4ed 100644
--- a/multic/segmentationschool/Codes/extract_reference_features.py
+++ b/multic/segmentationschool/Codes/extract_reference_features.py
@@ -40,9 +40,9 @@ def getKidneyReferenceFeatures(args):
 
     slide_item_id = item_dict[file_name]
 
-    #output_dir = args.base_dir + '/tmp'
+    output_dir = args.base_dir + '/'
     slide_name,slideExt=file_name.split('.')
-    xlsx_path = slide_name + '.xlsx'
+    xlsx_path = output_dir + 'ExtendedClinical' + '.xlsx'
 
     annotatedXMLs=[args.xml_path]
     for xml in annotatedXMLs:
@@ -354,12 +354,8 @@ def getKidneyReferenceFeatures(args):
             worksheet6.write(idx+1,1,art[1])
             worksheet6.write(idx+1,2,art[4])
 
-
-        try:
-            workbook.close(save_to=xlsx_path)
-        except:
-            print("An exception occurred")
-
+        workbook.close()
+        
         gc.uploadFileToItem(slide_item_id, xlsx_path, reference=None, mimeType=None, filename=None, progressCallback=None)
         print('Done.')
         # exit()

From 84cd08fb430ec11611d5c9a21777701cc191153b Mon Sep 17 00:00:00 2001
From: sayatmimar <sayat.mimar@ufl.edu>
Date: Fri, 12 Apr 2024 10:42:04 -0400
Subject: [PATCH 3/8] process only target image

---
 .../ReferenceFeatureExtraction/ReferenceFeatureExtraction.py   | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
index ef243d4..152cb7c 100644
--- a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
+++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
@@ -14,6 +14,7 @@
 def main(args):
 
     folder = args.base_dir
+    wsi = args.input_file
     base_dir_id = folder.split('/')[-2]
     _ = os.system("printf '\nUsing data from girder_client Folder: {}\n'".format(folder))
 
@@ -32,6 +33,8 @@ def main(args):
 
     for file in files:
         slidename = file['name']
+        if wsi.split('/')[-1] != slidename:
+            continue
         _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(slidename))
         skipSlide = 0
 

From 081419486415bcea352af6cf108a954ef4ea2f81 Mon Sep 17 00:00:00 2001
From: sayatmimar <sayat.mimar@ufl.edu>
Date: Mon, 22 Apr 2024 17:47:25 -0400
Subject: [PATCH 4/8] add coordinates

---
 .../ReferenceFeatureExtraction.py             |   2 +-
 .../Codes/extract_reference_features.py       | 165 ++++++++++++------
 .../segmentationschool/segmentation_school.py |   8 +-
 3 files changed, 118 insertions(+), 57 deletions(-)

diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
index 152cb7c..75cd230 100644
--- a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
+++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
@@ -113,7 +113,7 @@ def main(args):
 
 
 
-    cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --base_dir {} --files {} --xml_path {} --girderApiUrl {} --girderToken {}".format('get_features', args.base_dir, args.input_file, xml_path, args.girderApiUrl, args.girderToken)
+    cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --base_dir {} --files {} --xml_path {} --platform {} --girderApiUrl {} --girderToken {}".format('get_features', args.base_dir, args.input_file, xml_path, 'DSA', args.girderApiUrl, args.girderToken)
     print(cmd)
     sys.stdout.flush()
     os.system(cmd)
diff --git a/multic/segmentationschool/Codes/extract_reference_features.py b/multic/segmentationschool/Codes/extract_reference_features.py
index 549b4ed..decc38b 100644
--- a/multic/segmentationschool/Codes/extract_reference_features.py
+++ b/multic/segmentationschool/Codes/extract_reference_features.py
@@ -2,7 +2,7 @@
 import numpy as np
 
 import lxml.etree as ET
-import girder_client
+
 from matplotlib import path
 from skimage.color import rgb2lab,rgb2hsv
 
@@ -22,45 +22,53 @@
 
 def getKidneyReferenceFeatures(args):
 
+    folder = args.base_dir
+
     # assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml'
     # assert args.wsis is not None, 'Directory of WSIs must be specified, use --wsis /path/to/wsis'
+    if args.platform == 'DSA':
+        import girder_client
+        gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
+        gc.setToken(args.girderToken)
 
-    gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
-    gc.setToken(args.girderToken)
+        girder_folder_id = folder.split('/')[-2]
+        file_name = args.files.split('/')[-1]
+        files = list(gc.listItem(girder_folder_id))
+        item_dict = dict()
 
-    folder = args.base_dir
-    girder_folder_id = folder.split('/')[-2]
-    file_name = args.files.split('/')[-1]
-    files = list(gc.listItem(girder_folder_id))
-    item_dict = dict()
+        for file in files:
+            d = {file['name']: file['_id']}
+            item_dict.update(d)
+
+        slide_item_id = item_dict[file_name]
+        output_dir = args.base_dir
+        slide_name,slideExt=file_name.split('.')
+        items=[(args.files, args.xml_path)]
 
-    for file in files:
-        d = {file['name']: file['_id']}
-        item_dict.update(d)
+    elif args.platform == 'HPG':
+        image_files = [image_name for _, _, files in os.walk(folder) for image_name in files if image_name.endswith('.xml')]
+        image_names = [os.path.join(folder, f.split('.')[0]) for f in image_files]
+        slideExt = args.ext
+        output_dir = args.output_dir
+        # each item in items is a tuple of (images, annotations)
+        items = [(f + slideExt, f + '.xml') for f in image_names]
 
-    slide_item_id = item_dict[file_name]
+    else:
+        raise Exception("Please Enter a valid Platform, DSA or HPG")
 
-    output_dir = args.base_dir + '/'
-    slide_name,slideExt=file_name.split('.')
-    xlsx_path = output_dir + 'ExtendedClinical' + '.xlsx'
+    for i in range(len(items)):
 
-    annotatedXMLs=[args.xml_path]
-    for xml in annotatedXMLs:
-        # print(xml,end='\r',flush=True)
+        svsfile, xmlfile = items[i]
 
-        print(xml,'here')
-        write_minmax_to_xml(xml)
-        # for ext in args.wsi_ext.split(','):
+        print(xmlfile,'here')
+        write_minmax_to_xml(xmlfile)
 
-        #     if os.path.isfile(os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext))):
-        #         wsi=os.path.join(args.wsis,xml.split('/')[-1].replace('.xml',ext))
-        #         break
         slideExt=file_name.split('.')[-1]
         all_contours = {'1':[],'2':[],'3':[],'4':[],'5':[],'6':[]}
         # cortex medulla glomeruli scl_glomeruli tubules arteries(ioles)
-        tree = ET.parse(xml)
+        tree = ET.parse(xmlfile)
         root = tree.getroot()
-        basename=os.path.splitext(xml)[0]
+        basename=os.path.splitext(xmlfile)[0]
         for Annotation in root.findall("./Annotation"): # for all annotations
             annotationID = Annotation.attrib['Id']
             if annotationID not in ['1','2','3','4','5','6']:
@@ -79,7 +87,7 @@ def getKidneyReferenceFeatures(args):
         cortexarea=0
         medullaarea=0
 
-        slide=TiffSlide(args.files)
+        slide=TiffSlide(svsfile)
         if slideExt =='.scn':
             dim_x=int(slide.properties['tiffslide.bounds-width'])## add to columns
             dim_y=int(slide.properties['tiffslide.bounds-height'])## add to rows
@@ -142,6 +150,7 @@ def getKidneyReferenceFeatures(args):
             medulla_path=None
         pseudocortexarea=total_tissue_area-medullaarea
         #
+        xlsx_path = os.path.join(output_dir, os.path.basename(svsfile).split('.')[0] +'_extended_clinical'+'.xlsx')
         workbook=xlsxwriter.Workbook(xlsx_path)
         worksheet1 = workbook.add_worksheet('Summary')
         worksheet2 = workbook.add_worksheet('Interstitium')
@@ -175,28 +184,28 @@ def getKidneyReferenceFeatures(args):
         art_features=Parallel(n_jobs=cores)(delayed(points_to_features_art)(points,
             args,args.min_size[5],cortex_path,medulla_path,args.files,MOD) for points in tqdm(all_contours['6'],colour='magenta',unit='Artery(-iole)',leave=False))
         print('Generating output file..')
-        glom_features=[i for i in glom_features if i is not None]
-        sglom_features=[i for i in sglom_features if i is not None]
-        tub_features=[i for i in tub_features if i is not None]
-        art_features=[i for i in art_features if i is not None]
+        glom_features=np.array([i for i in glom_features if i is not None])
+        sglom_features=np.array([i for i in sglom_features if i is not None])
+        tub_features=np.array([i for i in tub_features if i is not None])
+        art_features=np.array([i for i in art_features if i is not None])
 
         # gloms_features=[i for i in glom_features if i[0]>args.min_sizes[2]]
         # sglom_features=[i for i in sglom_features if i[0]>args.min_sizes[3]]
 
         # cortexgloms=[i for i in glom_features if not i[3]]
-        cortextubs=[i for i in tub_features if not i[3]]
-        cortexarts=[i for i in art_features if not i[3]]
+        cortextubs=np.array([i for i in tub_features if not i[3]])
+        cortexarts=np.array([i for i in art_features if not i[3]])
 
-        medullatubs=[i for i in tub_features if i[3]]
-        medullaarts=[i for i in art_features if i[3]]
+        medullatubs=np.array([i for i in tub_features if i[3]])
+        medullaarts=np.array([i for i in art_features if i[3]])
 
 
         if pseudocortexarea>0:
             cortex_glom_area=np.sum(np.array(glom_features)[:,0])
             cortex_glom_density=float(cortex_glom_area)/float(pseudocortexarea)
-            cortex_tub_area=np.sum(np.array(cortextubs)[:,0])
+            cortex_tub_area=np.sum(cortextubs[:,0])
             cortex_tub_density=float(cortex_tub_area)/float(pseudocortexarea)
-            cortex_art_area=np.sum(np.array(cortexarts)[:,0])
+            cortex_art_area=np.sum(cortexarts[:,0])
             cortex_art_density=float(cortex_art_area)/float(pseudocortexarea)
             # downsample_cortex=get_downsample_cortex(args,all_contours['1'])
             # exit()
@@ -208,9 +217,9 @@ def getKidneyReferenceFeatures(args):
             cortex_art_density=None
         if medullaarea>0 and len(medullatubs)>0:
 
-            medulla_tub_area=np.sum(np.array(medullatubs)[:,0])
+            medulla_tub_area=np.sum(medullatubs[:,0])
             if len(medullaarts)>0:
-                medulla_art_area=np.sum(np.array(medullaarts)[:,0])
+                medulla_art_area=np.sum(medullaarts[:,0])
                 medulla_art_density=float(medulla_art_area)/float(medullaarea)
             else:
                 medulla_art_density=None
@@ -218,11 +227,7 @@ def getKidneyReferenceFeatures(args):
         else:
             medulla_tub_density=None
             medulla_art_density=None
-        glom_features=np.array(glom_features)
-        sglom_features=np.array(sglom_features)
-        tub_features=np.array(tub_features)
-        art_features=np.array(art_features)
-        cortexarts=np.array(cortexarts)
+
         worksheet1.write(0,0,'Glomerular density - count:')
         worksheet1.write(0,1,len(glom_features)/pseudocortexarea)
         worksheet1.write(1,0,'Average glomerular area:')
@@ -265,11 +270,16 @@ def getKidneyReferenceFeatures(args):
             worksheet1.write(13,1,np.mean(cortextubs[:,1]))
             worksheet1.write(14,1,np.std(cortextubs[:,1]))
         if medullaarea>0:
-            medullatubs=np.array(medullatubs)
-            worksheet1.write(15,1,np.mean(medullatubs[:,0]))
-            worksheet1.write(16,1,np.std(medullatubs[:,0]))
-            worksheet1.write(17,1,np.mean(medullatubs[:,1]))
-            worksheet1.write(18,1,np.std(medullatubs[:,1]))
+            if len(medullatubs)>0:
+                worksheet1.write(15,1,np.mean(medullatubs[:,0]))
+                worksheet1.write(16,1,np.std(medullatubs[:,0]))
+                worksheet1.write(17,1,np.mean(medullatubs[:,1]))
+                worksheet1.write(18,1,np.std(medullatubs[:,1]))
+            else:
+                worksheet1.write(15,1,0)
+                worksheet1.write(16,1,0)
+                worksheet1.write(17,1,0)
+                worksheet1.write(18,1,0)
 
         worksheet1.write(19,0,'Cortical arterial(olar) density')
         worksheet1.write(19,1,len(cortexarts)/pseudocortexarea)
@@ -316,24 +326,57 @@ def getKidneyReferenceFeatures(args):
         worksheet1.write(33,1,len(glom_features))
         worksheet1.write(34,0,'sGlomerulus count')
         worksheet1.write(34,1,len(sglom_features))
+
         worksheet3.write(0,0,'Area')
         worksheet3.write(0,1,'Radius')
+        worksheet3.write(0,2,'x1')
+        worksheet3.write(0,3,'x2')
+        worksheet3.write(0,4,'y1')
+        worksheet3.write(0,5,'y2')
+
         for idx,glom in enumerate(glom_features):
             worksheet3.write(idx+1,0,glom[0])
             worksheet3.write(idx+1,1,glom[1])
 
+            worksheet3.write(idx+1,2,glom[4])
+            worksheet3.write(idx+1,3,glom[5])
+            worksheet3.write(idx+1,4,glom[6])
+            worksheet3.write(idx+1,5,glom[7])
+
+
         worksheet4.write(0,0,'Area')
         worksheet4.write(0,1,'Radius')
+
+        worksheet4.write(0,2,'x1')
+        worksheet4.write(0,3,'x2')
+        worksheet4.write(0,4,'y1')
+        worksheet4.write(0,5,'y2')
+
         for idx,sglom in enumerate(sglom_features):
             worksheet4.write(idx+1,0,sglom[0])
             worksheet4.write(idx+1,1,sglom[1])
 
+            worksheet4.write(idx+1,2,sglom[4])
+            worksheet4.write(idx+1,3,sglom[5])
+            worksheet4.write(idx+1,4,sglom[6])
+            worksheet4.write(idx+1,5,sglom[7])
+
         worksheet5.write(0,0,'Area')
         worksheet5.write(0,1,'Radius')
+
+        worksheet5.write(0,6,'x1')
+        worksheet5.write(0,7,'x2')
+        worksheet5.write(0,8,'y1')
+        worksheet5.write(0,9,'y2')
         for idx,tub in enumerate(tub_features):
             worksheet5.write(idx+1,0,tub[0])
             worksheet5.write(idx+1,1,tub[1])
 
+            worksheet5.write(idx+1,6,tub[4])
+            worksheet5.write(idx+1,7,tub[5])
+            worksheet5.write(idx+1,8,tub[6])
+            worksheet5.write(idx+1,9,tub[7])
+
         worksheet5.write(0,2,'Cortical areas')
         worksheet5.write(0,3,'Cortical radii')
         for idx,tub in enumerate(cortextubs):
@@ -349,14 +392,26 @@ def getKidneyReferenceFeatures(args):
         worksheet6.write(0,0,'Area')
         worksheet6.write(0,1,'Radius')
         worksheet6.write(0,2,'Luminal ratio')
+
+        worksheet6.write(0,3,'x1')
+        worksheet6.write(0,4,'x2')
+        worksheet6.write(0,5,'y1')
+        worksheet6.write(0,6,'y2')
         for idx,art in enumerate(art_features):
             worksheet6.write(idx+1,0,art[0])
             worksheet6.write(idx+1,1,art[1])
             worksheet6.write(idx+1,2,art[4])
 
+            worksheet6.write(idx+1,3,art[5])
+            worksheet6.write(idx+1,4,art[6])
+            worksheet6.write(idx+1,5,art[7])
+            worksheet6.write(idx+1,6,art[8])
+
         workbook.close()
-        
-        gc.uploadFileToItem(slide_item_id, xlsx_path, reference=None, mimeType=None, filename=None, progressCallback=None)
+        if args.platform == 'DSA':
+            gc.uploadFileToItem(slide_item_id, xlsx_path, reference=None, mimeType=None, filename=None, progressCallback=None)
+            print('Girder file uploaded!')
+
         print('Done.')
         # exit()
     # Parallel(n_jobs=num_cores, backend='threading')(delayed(chop_wsi)(, choppable_regions=choppable_regions)  for idxx, j in enumerate(index_x))
@@ -770,7 +825,7 @@ def points_to_features_glom(points,args,min_size,cortex,medulla):
         dist=distance_transform_edt(binary_mask)
 
 
-        return [a,np.max(dist),None,containedmedulla]
+        return [a,np.max(dist),None,containedmedulla,yMin,yMax,xMin,xMax]
 
 def points_to_features_tub(points,args,min_size,cortex,medulla):
     a=cv2.contourArea(points)
@@ -790,7 +845,7 @@ def points_to_features_tub(points,args,min_size,cortex,medulla):
         binary_mask=cv2.fillPoly(binary_mask,[points],1)
         dist=distance_transform_edt(binary_mask)
 
-        return [a,np.max(dist),None,containedmedulla]
+        return [a,np.max(dist),None,containedmedulla,yMin,yMax,xMin,xMax]
 
 
 def points_to_features_art(points,args,min_size,cortex,medulla,wsi,MOD):
@@ -809,7 +864,7 @@ def points_to_features_art(points,args,min_size,cortex,medulla,wsi,MOD):
         image=np.array(TiffSlide(wsi).read_region((xMin,yMin),0,(xMax-xMin,yMax-yMin)))[:,:,:3]
 
         if xMax-xMin>5000 or yMax-yMin>5000:
-            return [a,None,None,containedmedulla,None]
+            return [a,None,None,containedmedulla,None, yMin,yMax,xMin,xMax]
         binary_mask=np.zeros((int(yMax-yMin),int(xMax-xMin)))
         points[:,0]-=xMin
         points[:,1]-=yMin
@@ -863,7 +918,7 @@ def points_to_features_art(points,args,min_size,cortex,medulla,wsi,MOD):
         #
         #     return [a,distMax*2,containedcortex,containedmedulla,(np.max(WSdist)*2)/(distMax*2)]
         # else:
-        return [a,distMax,None,containedmedulla,np.max(WSdist)/distMax]
+        return [a,distMax,None,containedmedulla,np.max(WSdist)/distMax,yMin,yMax,xMin,xMax]
 
 
 
diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py
index 721b628..ef78d67 100644
--- a/multic/segmentationschool/segmentation_school.py
+++ b/multic/segmentationschool/segmentation_school.py
@@ -134,7 +134,7 @@ def savetime(args, starttime):
         help='directory with xml transformation targets')
     parser.add_argument('--cortextarget', dest='cortextarget', default=None,type=str,
         help='directory with cortex annotations for splicing')
-    parser.add_argument('--output', dest='output', default=None,type=str,
+    parser.add_argument('--output_dir', dest='output_dir', default=None,type=str,
         help='directory to save output transformed XMLs')
     parser.add_argument('--wsis', dest='wsis', default=None,type=str,
         help='directory of WSIs for reference feature extraction')
@@ -284,6 +284,12 @@ def savetime(args, starttime):
     parser.add_argument('--xml_path', dest='xml_path', default=' ' ,type=str,
         help='path to xml file')
 
+    parser.add_argument('--ext', dest='ext', default='.svs' ,type=str,
+        help='file extention')
+
+    parser.add_argument('--platform', dest='platform', default='DSA' ,type=str,
+        help='Run Platform, HPG or DSA')
+
 
 
 

From 3d9ff20f4d906c58be4764ea5015fa8c67471677 Mon Sep 17 00:00:00 2001
From: sayatmimar <sayat.mimar@ufl.edu>
Date: Tue, 7 May 2024 10:33:17 -0400
Subject: [PATCH 5/8] fix minor issues

---
 multic/segmentationschool/Codes/extract_reference_features.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/multic/segmentationschool/Codes/extract_reference_features.py b/multic/segmentationschool/Codes/extract_reference_features.py
index decc38b..388c76a 100644
--- a/multic/segmentationschool/Codes/extract_reference_features.py
+++ b/multic/segmentationschool/Codes/extract_reference_features.py
@@ -63,7 +63,7 @@ def getKidneyReferenceFeatures(args):
         print(xmlfile,'here')
         write_minmax_to_xml(xmlfile)
 
-        slideExt=file_name.split('.')[-1]
+
         all_contours = {'1':[],'2':[],'3':[],'4':[],'5':[],'6':[]}
         # cortex medulla glomeruli scl_glomeruli tubules arteries(ioles)
         tree = ET.parse(xmlfile)
@@ -182,7 +182,7 @@ def getKidneyReferenceFeatures(args):
             args,args.min_size[4],cortex_path,medulla_path) for points in tqdm(all_contours['5'],colour='blue',unit='Tubule',leave=False))
 
         art_features=Parallel(n_jobs=cores)(delayed(points_to_features_art)(points,
-            args,args.min_size[5],cortex_path,medulla_path,args.files,MOD) for points in tqdm(all_contours['6'],colour='magenta',unit='Artery(-iole)',leave=False))
+            args,args.min_size[5],cortex_path,medulla_path,svsfile,MOD) for points in tqdm(all_contours['6'],colour='magenta',unit='Artery(-iole)',leave=False))
         print('Generating output file..')
         glom_features=np.array([i for i in glom_features if i is not None])
         sglom_features=np.array([i for i in sglom_features if i is not None])

From 95ac0f41d6c8e5d45e93a60e6ac09d564e85c85a Mon Sep 17 00:00:00 2001
From: sayatmimar <sayat.mimar@ufl.edu>
Date: Thu, 9 May 2024 14:21:18 -0400
Subject: [PATCH 6/8] add in medulla column and reorganize

---
 .../ReferenceFeatureExtraction.py             | 111 +----
 .../Codes/extract_reference_features.py       | 456 +-----------------
 .../segmentationschool/segmentation_school.py |   9 +-
 .../segmentationschool/utils/json_to_xml.py   |  66 +++
 .../segmentationschool/utils/xml_to_mask.py   | 219 ---------
 5 files changed, 118 insertions(+), 743 deletions(-)
 create mode 100644 multic/segmentationschool/utils/json_to_xml.py
 delete mode 100644 multic/segmentationschool/utils/xml_to_mask.py

diff --git a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
index 75cd230..0fb06f8 100644
--- a/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
+++ b/multic/cli/ReferenceFeatureExtraction/ReferenceFeatureExtraction.py
@@ -5,8 +5,7 @@
 from ctk_cli import CLIArgumentParser
 
 sys.path.append("..")
-from segmentationschool.utils.mask_to_xml import xml_create, xml_add_annotation, xml_add_region, xml_save
-from segmentationschool.utils.xml_to_mask import write_minmax_to_xml
+from segmentationschool.utils.json_to_xml import get_xml_path
 
 NAMES = ['cortical_interstitium','medullary_interstitium','non_globally_sclerotic_glomeruli','globally_sclerotic_glomeruli','tubules','arteries/arterioles']
 
@@ -15,105 +14,41 @@ def main(args):
 
     folder = args.base_dir
     wsi = args.input_file
+    file_name = wsi.split('/')[-1]
     base_dir_id = folder.split('/')[-2]
     _ = os.system("printf '\nUsing data from girder_client Folder: {}\n'".format(folder))
 
     gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
     gc.setToken(args.girderToken)
     # get files in folder
-    files = gc.listItem(base_dir_id)
-    xml_color=[65280]*(len(NAMES)+1)
+    files = list(gc.listItem(base_dir_id))
+    # dict to link filename to gc id
+    item_dict = dict()
+    for file in files:
+        d = {file['name']:file['_id']}
+        item_dict.update(d)
+    
+    file_id = item_dict[file_name]
+
     cwd = os.getcwd()
     print(cwd)
     os.chdir(cwd)
-
     tmp = folder
+    _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(file_name))
+    # get annotation
+    annotations= gc.get('/annotation/item/{}'.format(file_id), parameters={'sort': 'updated'})
+    annotations.reverse()
+    annotations = list(annotations)
     
-    slides_used = []
-
-    for file in files:
-        slidename = file['name']
-        if wsi.split('/')[-1] != slidename:
-            continue
-        _ = os.system("printf '\n---\n\nFOUND: [{}]\n'".format(slidename))
-        skipSlide = 0
-
-        # get annotation
-        item = gc.getItem(file['_id'])
-        annot = gc.get('/annotation/item/{}'.format(item['_id']), parameters={'sort': 'updated'})
-        annot.reverse()
-        annot = list(annot)
-        _ = os.system("printf '\tfound [{}] annotation layers...\n'".format(len(annot)))
-
-        # create root for xml file
-        xmlAnnot = xml_create()
-
-        # all compartments
-        for class_,compart in enumerate(NAMES):
-
-            compart = compart.replace(' ','')
-            class_ +=1
-            # add layer to xml
-            xmlAnnot = xml_add_annotation(Annotations=xmlAnnot, xml_color=xml_color, annotationID=class_)
-
-            # test all annotation layers in order created
-            for iter,a in enumerate(annot):
-   
-
-                try:
-                    # check for annotation layer by name
-                    a_name = a['annotation']['name'].replace(' ','')
-                except:
-                    a_name = None
-
-                if a_name == compart:
-                    # track all layers present
-                    skipSlide +=1
-
-                    pointsList = []
-
-                    # load json data
-                    _ = os.system("printf '\tloading annotation layer: [{}]\n'".format(compart))
-
-                    a_data = a['annotation']['elements']
-
-                    for data in a_data:
-                        pointList = []
-                        points = data['points']
-                        for point in points:
-                            pt_dict = {'X': round(point[0]), 'Y': round(point[1])}
-                            pointList.append(pt_dict)
-                        pointsList.append(pointList)
-
-                    # write annotations to xml
-                    for i in range(len(pointsList)):
-                        pointList = pointsList[i]
-                        xmlAnnot = xml_add_region(Annotations=xmlAnnot, pointList=pointList, annotationID=class_)
-
-                    # print(a['_version'], a['updated'], a['created'])
-                    break
-                
-        if skipSlide != len(NAMES):
-            _ = os.system("printf '\tThis slide is missing annotation layers\n'")
-            _ = os.system("printf '\tSKIPPING SLIDE...\n'")
-            del xmlAnnot
-            continue # correct layers not present
-        _ = os.system("printf '\tFETCHING SLIDE...\n'")
-        os.rename('{}/{}'.format(folder, slidename), '{}/{}'.format(tmp, slidename))
-        slides_used.append(slidename)
-
-        xml_path = '{}/{}.xml'.format(tmp, os.path.splitext(slidename)[0])
-        _ = os.system("printf '\tsaving a created xml annotation file: [{}]\n'".format(xml_path))
-        xml_save(Annotations=xmlAnnot, filename=xml_path)
-        write_minmax_to_xml(xml_path) # to avoid trying to write to the xml from multiple workers
-        del xmlAnnot
-    os.system("ls -lh '{}'".format(tmp))
-    
+    annotations_filtered = [annot for annot in annotations if annot['annotation']['name'].strip() in NAMES]
+    _ = os.system("printf '\tfound [{}] annotation layers...\n'".format(len(annotations_filtered)))
+    del annotations
+    # create root for xml file
+        
+    xml_path = get_xml_path(annotations_filtered, NAMES, tmp, file_name)    
     _ = os.system("printf '\ndone retriving data...\n\n'")
 
-
-
-    cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --base_dir {} --files {} --xml_path {} --platform {} --girderApiUrl {} --girderToken {}".format('get_features', args.base_dir, args.input_file, xml_path, 'DSA', args.girderApiUrl, args.girderToken)
+    cmd = "python3 ../segmentationschool/segmentation_school.py --option {} --base_dir {} --file {} --xml_path {} --platform {} --item_id {} --girderApiUrl {} --girderToken {}".format('get_features', args.base_dir, args.input_file, xml_path, 'DSA',file_id, args.girderApiUrl, args.girderToken)
     print(cmd)
     sys.stdout.flush()
     os.system(cmd)
diff --git a/multic/segmentationschool/Codes/extract_reference_features.py b/multic/segmentationschool/Codes/extract_reference_features.py
index 388c76a..3f048d1 100644
--- a/multic/segmentationschool/Codes/extract_reference_features.py
+++ b/multic/segmentationschool/Codes/extract_reference_features.py
@@ -31,19 +31,11 @@ def getKidneyReferenceFeatures(args):
         gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
         gc.setToken(args.girderToken)
 
-        girder_folder_id = folder.split('/')[-2]
-        file_name = args.files.split('/')[-1]
-        files = list(gc.listItem(girder_folder_id))
-        item_dict = dict()
-
-        for file in files:
-            d = {file['name']: file['_id']}
-            item_dict.update(d)
-
-        slide_item_id = item_dict[file_name]
+        file_name = args.file.split('/')[-1]
+        slide_item_id = args.item_id
         output_dir = args.base_dir
         slide_name,slideExt=file_name.split('.')
-        items=[(args.files, args.xml_path)]
+        items=[(args.file, args.xml_path)]
 
     elif args.platform == 'HPG':
         image_files = [image_name for _, _, files in os.walk(folder) for image_name in files if image_name.endswith('.xml')]
@@ -327,8 +319,8 @@ def getKidneyReferenceFeatures(args):
         worksheet1.write(34,0,'sGlomerulus count')
         worksheet1.write(34,1,len(sglom_features))
 
-        worksheet3.write(0,0,'Area')
-        worksheet3.write(0,1,'Radius')
+        worksheet3.write(0,0,'Area (pixel^2)')
+        worksheet3.write(0,1,'Radius (pixel)')
         worksheet3.write(0,2,'x1')
         worksheet3.write(0,3,'x2')
         worksheet3.write(0,4,'y1')
@@ -344,8 +336,8 @@ def getKidneyReferenceFeatures(args):
             worksheet3.write(idx+1,5,glom[7])
 
 
-        worksheet4.write(0,0,'Area')
-        worksheet4.write(0,1,'Radius')
+        worksheet4.write(0,0,'Area (pixel^2)')
+        worksheet4.write(0,1,'Radius (pixel)')
 
         worksheet4.write(0,2,'x1')
         worksheet4.write(0,3,'x2')
@@ -361,36 +353,26 @@ def getKidneyReferenceFeatures(args):
             worksheet4.write(idx+1,4,sglom[6])
             worksheet4.write(idx+1,5,sglom[7])
 
-        worksheet5.write(0,0,'Area')
-        worksheet5.write(0,1,'Radius')
+        worksheet5.write(0,0,'Area (pixel^2)')
+        worksheet5.write(0,1,'Radius (pixel)')
+        worksheet5.write(0,2,'In Medulla')
 
-        worksheet5.write(0,6,'x1')
-        worksheet5.write(0,7,'x2')
-        worksheet5.write(0,8,'y1')
-        worksheet5.write(0,9,'y2')
+        worksheet5.write(0,3,'x1')
+        worksheet5.write(0,4,'x2')
+        worksheet5.write(0,5,'y1')
+        worksheet5.write(0,6,'y2')
         for idx,tub in enumerate(tub_features):
             worksheet5.write(idx+1,0,tub[0])
             worksheet5.write(idx+1,1,tub[1])
+            worksheet5.write(idx+1,2,tub[3])
 
-            worksheet5.write(idx+1,6,tub[4])
-            worksheet5.write(idx+1,7,tub[5])
-            worksheet5.write(idx+1,8,tub[6])
-            worksheet5.write(idx+1,9,tub[7])
-
-        worksheet5.write(0,2,'Cortical areas')
-        worksheet5.write(0,3,'Cortical radii')
-        for idx,tub in enumerate(cortextubs):
-            worksheet5.write(idx+1,2,tub[0])
-            worksheet5.write(idx+1,3,tub[1])
-
-        worksheet5.write(0,4,'Medullary areas')
-        worksheet5.write(0,5,'Medullary radii')
-        for idx,tub in enumerate(medullatubs):
-            worksheet5.write(idx+1,4,tub[0])
-            worksheet5.write(idx+1,5,tub[1])
-
-        worksheet6.write(0,0,'Area')
-        worksheet6.write(0,1,'Radius')
+            worksheet5.write(idx+1,3,tub[4])
+            worksheet5.write(idx+1,4,tub[5])
+            worksheet5.write(idx+1,5,tub[6])
+            worksheet5.write(idx+1,6,tub[7])
+
+        worksheet6.write(0,0,'Area (pixel^2)')
+        worksheet6.write(0,1,'Radius (pixel)')
         worksheet6.write(0,2,'Luminal ratio')
 
         worksheet6.write(0,3,'x1')
@@ -413,399 +395,7 @@ def getKidneyReferenceFeatures(args):
             print('Girder file uploaded!')
 
         print('Done.')
-        # exit()
-    # Parallel(n_jobs=num_cores, backend='threading')(delayed(chop_wsi)(, choppable_regions=choppable_regions)  for idxx, j in enumerate(index_x))
-
-
-# def summarizeKidneyReferenceFeatures(args):
-#     #assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml'
-#     assert args.SummaryOption is not None, 'You must specify what type of summary is required with --SummaryOption'
-#     assert args.patientData is not None, 'You must provide patient metadata xlsx file with --patientData'
-#     if args.SummaryOption in ['ULDensity']:
-#         ULDensity(args)
-#     elif args.SummaryOption in ['BLDensity']:
-#         BLDensity(args)
-#     elif args.SummaryOption in ['standardScatter']:
-#         standardScatter(args)
-#     elif args.SummaryOption in ['anchorScatter']:
-#         anchorScatter(args)
-#     elif args.SummaryOption in ['aggregate']:
-#         aggregate(args)
-#     elif args.SummaryOption in ['JoyPlot']:
-#         JoyPlot(args)
-#     else:
-#         print('Incorrect SummaryOption')
-
-# def anchorScatter(args):
-#     patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn,args.anchor],index_col=None)
-#     patientMetrics={}
-#     for idx,patientID in enumerate(patientData[args.IDColumn]):
-#         patientMetrics[patientID]=[patientData[args.labelColumns][idx],patientData[args.anchor][idx]]
-
-#     datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-#     clinical_legend=[]
-#     clinical_anchor=[]
-#     for datafile in datafiles:
-#         patientID=os.path.splitext(datafile.split('/')[-1])[0]
-#         clinical_legend.append(str(patientMetrics[patientID][0]))
-#         clinical_anchor.append(patientMetrics[patientID][1])
-
-#     sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
-#     sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
-#     sortedPatientAnchor=[x for x,_ in sorted(zip(clinical_anchor,datafiles))]
-#     f1,f2=[int(i) for i in args.scatterFeatures.split(',')]
-#     # f1=5
-#     # f2=7
-
-#     temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0)
-#     index = temp.index
-#     xlabel=index[f1-1]
-#     ylabel=args.anchor
-
-#     popIdx=[]
-#     scatter_features=[]
-#     for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')):
-#         features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0))
-#         # print(np.array(features))
-#         if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]):
-#             scatter_features.append([features[f1-1][0],features[f2-1][0]])
-#         else:
-#             popIdx.append(idx)
-#     for p in popIdx[::-1]:
-#         sortedPatientLegend.pop(p)
-#         sortedPatientAnchor.pop(p)
-#     scatter_features=np.array(scatter_features)
-
-#     sns.scatterplot(x=scatter_features[:,0],y=sortedPatientAnchor,hue=sortedPatientLegend,palette='viridis',legend='auto')
-#     plt.xlabel(xlabel)
-#     plt.ylabel(ylabel)
-#     plt.legend(title=args.labelColumns)
-#     plt.show()
-# def standardScatter(args):
-#     patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
-#     patientMetrics={}
-#     assert args.labelColumns in ['Age','Cr','Sex'], 'Label column must be Age, Cr, or Sex for standard scatter'
-#     if args.labelColumns=='Age':
-#         labelBins=[0,10,20,30,40,50,60,70,80,90,100]
-#     elif args.labelColumns=='Cr':
-#         labelBins=[0,.5,.6,.7,.8,.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2]
-#     elif args.labelColumns=='Sex':
-#         labelBins=None
-
-#     for idx,patientID in enumerate(patientData[args.IDColumn]):
-#         patientMetrics[patientID]=patientData[args.labelColumns][idx]
-
-#     datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-#     clinical_legend=[]
-#     for datafile in datafiles:
-#         patientID=os.path.splitext(datafile.split('/')[-1])[0]
-#         clinical_legend.append(str(patientMetrics[patientID]))
-
-#     sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
-#     sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
-#     print(sortedPatientLegend)
-#     f1,f2=[int(i) for i in args.scatterFeatures.split(',')]
-#     # f1=5
-#     # f2=7
-
-#     temp=pd.read_excel(datafiles[0],sheet_name='Summary',header=None,index_row=None,index_col=0)
-#     index = temp.index
-#     xlabel=index[f1-1]
-#     ylabel=index[f2-1]
-
-#     popIdx=[]
-#     scatter_features=[]
-#     for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')):
-#         features=np.array(pd.read_excel(datafile,sheet_name='Summary',header=None,index_row=None,index_col=0))
-#         # print(np.array(features))
-#         if not np.isnan(features[f1-1]) and not np.isnan(features[f2-1]):
-#             scatter_features.append([features[f1-1][0],features[f2-1][0]])
-#         else:
-#             popIdx.append(idx)
-#     for p in popIdx[::-1]:
-#         sortedPatientLegend.pop(p)
-#     sortedPatientLegend=np.array(sortedPatientLegend)
-
-#     scatter_features=np.array(scatter_features)
-
-#     sns.scatterplot(x=scatter_features[:,0],y=scatter_features[:,1],hue=sortedPatientLegend,palette='viridis')
-#     plt.xlabel(xlabel)
-#     plt.ylabel(ylabel)
-#     plt.legend(sortedPatientLegend,title=args.labelColumns)
-#     plt.show()
-
-# def BLDensity(args):
-#     patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None)
-#     patientMetrics={}
-#     for idx,patientID in enumerate(patientData[args.IDColumn]):
-#         patientMetrics[patientID]=patientData[args.labelColumns][idx]
-
-#     datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-#     clinical_legend=[]
-#     for datafile in datafiles:
-#         patientID=os.path.splitext(datafile.split('/')[-1])[0]
-#         clinical_legend.append(str(patientMetrics[patientID]))
-
-#     sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
-#     sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
-
-#     fig,ax=plt.subplots()
-#     plt.xlabel('Cortical tubular diameter (µm)')
-#     plasma = cm.get_cmap('plasma',len(sortedPatientOrder))
-#     popIdx=[]
-#     for idx,datafile in enumerate(tqdm(sortedPatientOrder)):
-#         tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii','Cortical areas'],index_col=None).dropna()
-#         if len(tubule_features['Cortical radii'])>0:
-#             # xl1=np.percentile(tubule_features['Cortical radii']*0.25,0.01)
-#             xl2=np.percentile(tubule_features['Cortical radii']*0.25,99)
-#             # yl1=np.percentile(tubule_features['Cortical areas']*0.25*0.25,0.01)
-#             yl2=np.percentile(tubule_features['Cortical areas']*0.25*0.25,99)
-#             # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill)
-#             sns.kdeplot(x=tubule_features['Cortical radii']*0.25,y=tubule_features['Cortical areas']*0.25*0.25,
-#                 color=plasma(idx),ax=ax,clip=[[None,xl2],[None,yl2]])
-#         else:
-#             popIdx.append(idx)
-#     for p in popIdx[::-1]:
-#         sortedPatientLegend.pop(p)
-#     plt.legend(sortedPatientLegend,title=args.labelColumns)
-#     plt.show()
-
-# def ULDensity(args):
-
-#     if args.labelModality=='Continuous':
-#         patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
-#         patientMetrics={}
-#         # print(patientData)
-#         for idx,patientID in enumerate(patientData[args.IDColumn]):
-#             patientMetrics[patientID]=patientData[args.labelColumns][idx]
-
-#         datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-#         clinical_legend=[]
-#         for datafile in datafiles:
-#             patientID=os.path.splitext(datafile.split('/')[-1])[0]
-#             clinical_legend.append(str(patientMetrics[patientID]))
-
-#         sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
-#         sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
-
-#         fig,ax=plt.subplots()
-#         plt.xlabel('Cortical tubular diameter (µm)')
-#         plasma = cm.get_cmap('plasma',len(sortedPatientOrder))
-#         popIdx=[]
-#         all_tubules=[]
-#         featurename='Cortical radii'
-#         sheetname='Tubules'
-#         for idx,datafile in enumerate(tqdm(sortedPatientOrder,colour='red')):
-#             tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna()
-#             if len(tubule_features[featurename])>0:
-#                 l1=np.percentile(tubule_features[featurename]*0.25,0.05)
-#                 l2=np.percentile(tubule_features[featurename]*0.25,99.5)
-#                 sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1)
-#                 # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,alpha=0.1)
-#                 all_tubules.extend(np.array(tubule_features[featurename]*0.25))
-#             else:
-#                 popIdx.append(idx)
-#         for p in popIdx[::-1]:
-#             sortedPatientLegend.pop(p)
-
-#         plt.legend(sortedPatientLegend,title=args.labelColumns)
-#         plt.title('Cumulative distributions per patient')
-#         # plt.colorbar()
-#         plt.show()
-#         # print(len(all_tubules))
-#         # fig,ax=plt.subplots()
-#         # plt.xlabel('Cortical tubular diameter (µm)')
-#         # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill)
-#         # plt.title('Cumulative distribution for dataset')
-#         # plt.show()
-#     elif args.labelModality=='Categorical':
-#         patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
-#         patientMetrics={}
-#         for idx,patientID in enumerate(patientData[args.IDColumn]):
-#             patientMetrics[patientID]=patientData[args.labelColumns][idx]
-
-#         datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-#         clinical_legend=[]
-#         for datafile in datafiles:
-#             patientID=os.path.splitext(datafile.split('/')[-1])[0]
-
-#             clinical_legend.append(str(patientMetrics[patientID]))
-
-#         fig,ax=plt.subplots()
-#         plt.xlabel('Cortical tubular diameter (µm)')
-#         plasma = cm.get_cmap('plasma',len(np.unique(clinical_legend))+1)
-#         labelMapper={}
-#         categories=np.unique(clinical_legend)
-#         for idx,l in enumerate(categories):
-#             labelMapper[l]=idx
-#         popIdx=[]
-#         all_tubules=[]
-#         featurename='Radius'
-#         sheetname='Glomeruli'
-#         for idx,datafile in enumerate(tqdm(datafiles,colour='red')):
-#             tubule_features=pd.read_excel(datafile,sheet_name=sheetname,usecols=[featurename],index_col=None).dropna()
-#             if len(tubule_features[featurename])>0:
-#                 l1=np.percentile(tubule_features[featurename]*0.25,0.01)
-#                 l2=np.percentile(tubule_features[featurename]*0.25,99.9)
-
-#                 pcolor=plasma(labelMapper[clinical_legend[idx]])
-#                 # sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5)
-#                 sns.kdeplot(tubule_features[featurename]*0.25,ax=ax,bw_adjust=0.75,color=pcolor,fill=args.plotFill,alpha=0.5)
-
-#                 all_tubules.extend(np.array(tubule_features[featurename]*0.25))
-#             else:
-#                 popIdx.append(idx)
-#         for p in popIdx[::-1]:
-#             clinical_legend.pop(p)
-#         plt.legend(clinical_legend,title=args.labelColumns)
-#         plt.title('Cumulative distributions per patient')
-#         plt.show()
-#         # fig,ax=plt.subplots()
-#         # plt.xlabel('Cortical tubular diameter (µm)')
-#         # sns.kdeplot(all_tubules,ax=ax,bw_adjust=0.75,fill=args.plotFill)
-#         # plt.title('Cumulative distribution for dataset')
-#         # plt.show()
-
-
-# def JoyPlot(args):
-
-#     url = "https://gist.githubusercontent.com/borgar/31c1e476b8e92a11d7e9/raw/0fae97dab6830ecee185a63c1cee0008f6778ff6/pulsar.csv"
-#     df = pd.read_csv(url, header=None)
-#     df = df.stack().reset_index()
-#     df.columns = ['idx', 'x', 'y']
-
-#     patientData=pd.read_excel(args.patientData,usecols=[args.labelColumns,args.IDColumn],index_col=None,converters={args.IDColumn:str})
-#     patientMetrics={}
-#     for idx,patientID in enumerate(patientData[args.IDColumn]):
-#         patientMetrics[patientID]=patientData[args.labelColumns][idx]
-#     datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-#     clinical_legend=[]
-#     for datafile in datafiles:
-#         patientID=os.path.splitext(datafile.split('/')[-1])[0]
-#         clinical_legend.append(str(patientMetrics[patientID]))
-
-#     sortedPatientOrder=[x for  _, x in sorted(zip(clinical_legend,datafiles))]
-#     sortedPatientLegend=[x for x,_ in sorted(zip(clinical_legend,datafiles))]
-
-#     popIdx=[]
-#     all_tubules=[]
-#     # outDF=pd.DataFrame()
-
-#     for idx,datafile in enumerate(tqdm(sortedPatientOrder[:15],colour='red')):
-
-#         tubule_features=pd.read_excel(datafile,sheet_name='Tubules',usecols=['Cortical radii'],index_col=None).dropna()
-#         if len(tubule_features['Cortical radii'])>0:
-#             #.values()
-#             feature_array=np.array(tubule_features['Cortical radii']*0.25)
-#             all_tubules.append(feature_array)
-
-#             l1=np.percentile(feature_array,0.05)
-#             l2=np.percentile(feature_array,99.5)
-#             # sns.kdeplot(tubule_features['Cortical radii']*0.25,ax=ax,clip=[l1,l2],bw_adjust=0.75,color=plasma(idx),fill=args.plotFill,cbar=True)
-#             # all_tubules.extend(np.array(tubule_features['Cortical radii']*0.25))
-
-#         else:
-#             popIdx.append(idx)
-#     for p in popIdx[::-1]:
-#         sortedPatientLegend.pop(p)
-#     outDF=pd.DataFrame(all_tubules)
-
-#     outDF=outDF.stack(dropna=False).reset_index().dropna()
-
-#     # outDFT=outDF.transpose()
-#     # input(outDF)
-#     # input(outDF.reset_index())
-#     # input(outDF.reset_index(drop=True)[::2].reset_index(drop=True))
-#     # input(outDF.reset_index(drop=True))
-#     # input(outDF.stack())
-#     # input(outDF.reset_index().stack())
-#     # input(outDF.reset_index(drop=True).stack())
-#     # outDF=outDF.reset_index(drop=True)[::2].reset_index(drop=True)
-#     # outDF = outDF.stack().reset_index()
-#     outDF.columns = ['idx', 'x', 'y']
-#     outDF.to_csv('test.csv')
-#     # print(outDF.dropna())
-#     # exit()
-
-
-
-
-#     sns.set_theme(rc={"axes.facecolor": (0, 0, 0,0), 'figure.facecolor':'#ffffff', 'axes.grid':False})
-#     g = sns.FacetGrid(outDF, row='idx',hue="idx", aspect=15)
-#     g.map(sns.kdeplot, "x",
-#           bw_adjust=.9, clip_on=False,
-#           fill=True, alpha=1, linewidth=1.5)
-#     g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.9)
-#     g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)
-#     def label(x, color, label):
-#         ax = plt.gca()
-#         ax.text(0, .2, label, fontweight="bold", color=color,
-#                 ha="left", va="center", transform=ax.transAxes)
-#     g.map(label, "x")
-
-#     # Draw the densities in a few steps
-#     # g.map(sns.lineplot, 'x', 'y', clip_on=False, alpha=1, linewidth=1.5)
-#     # g.map(plt.fill_between, 'x', 'y', color='#000000')
-#     # g.map(sns.lineplot, 'x', 'y', clip_on=False, color='#ffffff', lw=2)
-#     # Set the subplots to overlap
-#     g.fig.subplots_adjust(hspace=-0.95)
-#     g.set_titles("")
-#     g.set(yticks=[], xticks=[], ylabel="", xlabel="")
-#     g.despine(bottom=True, left=True)
-#     plt.show()
-#     # plt.savefig('joy.png', facecolor='#000000')
-
-# def aggregate(args):
-#     assert args.exceloutfile is not None, 'You must provide a name of xlsx output file for feature aggregation with --exceloutfile name.xlsx'
-#     usecols=args.labelColumns.split(',')
-#     # index_col=int(args.IDColumn)
-#     patientData=pd.read_excel(args.patientData,usecols=usecols.extend(args.IDColumn),index_col=args.IDColumn)
-#     patientData = patientData.loc[:, ~patientData.columns.str.contains('^Unnamed')]
-#     patientMetrics={}
-
-#     pd.set_option('display.max_rows', 100)
-#     patientData.index = patientData.index.map(str)
-
-#     datafiles=glob.glob(os.path.join(args.target, "*.xlsx"))
-#     # numGloms=0
-#     # numSGloms=0
-#     # numTubules=0
-#     # numArts=0
-#     full_data={}
-#     for idx,datafile in enumerate(tqdm(datafiles,colour='red')):
-
-#         xlsxid=datafile.split('/')[-1].split('.xlsx')[0]
-#         tubule_features=pd.read_excel(datafile,sheet_name='Summary',header=None,index_col=None).transpose()
-#         names=np.array(tubule_features.iloc[0])
-#         vals=np.array(tubule_features.iloc[1])
-#         # numGloms+=vals[0]
-#         # if not np.isnan(vals[5]):
-#         #     numSGloms+=vals[5]
-#         # numTubules+=vals[10]
-#         # numArts+=vals[19]
-
-#         full_data[xlsxid]=np.append(vals,np.array(patientData.loc[xlsxid]),0)
-#     # print(numGloms)
-#     # print(numSGloms)
-#     # print(numTubules)
-#     # print(numArts)
-#     # exit()
-#     workbook=xlsxwriter.Workbook(args.exceloutfile,{'nan_inf_to_errors': True})
-#     worksheet1 = workbook.add_worksheet('Aggregation')
-
-#     outcolnames=np.append(names,patientData.columns,0)
-#     outrownames=full_data.keys()
-#     for idx,outcol in enumerate(outcolnames):
-#         worksheet1.write(0,idx+1,outcol)
-
-#     rowcounter=1
-#     for key,vals in full_data.items():
-#         worksheet1.write(rowcounter,0,key)
-#         for idx,val in enumerate(vals):
-#             worksheet1.write(rowcounter,idx+1,val)
-#         rowcounter+=1
-
-#     workbook.close()
+
 def points_to_features_glom(points,args,min_size,cortex,medulla):
     a=cv2.contourArea(points)
     if a>min_size:
diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py
index ef78d67..a12d08f 100644
--- a/multic/segmentationschool/segmentation_school.py
+++ b/multic/segmentationschool/segmentation_school.py
@@ -121,8 +121,8 @@ def savetime(args, starttime):
         help='girderApiUrl')
     parser.add_argument('--girderToken', dest='girderToken', default=' ' ,type=str,
         help='girderToken')
-    parser.add_argument('--files', dest='files', default=' ' ,type=str,
-        help='files')
+    parser.add_argument('--file', dest='file', default=' ' ,type=str,
+        help='input WSI file name')
     # option
     parser.add_argument('--option', dest='option', default=' ' ,type=str,
         help='option for [new, train, predict, validate]')
@@ -165,7 +165,7 @@ def savetime(args, starttime):
 
     # automatically generated
     parser.add_argument('--base_dir', dest='base_dir', default=os.getcwd(),type=str,
-        help='base directory of code folder')
+        help='base directory of Data folder')
 
     parser.add_argument('--code_dir', dest='code_dir', default=os.getcwd(),type=str,
         help='base directory of code folder')
@@ -290,6 +290,9 @@ def savetime(args, starttime):
     parser.add_argument('--platform', dest='platform', default='DSA' ,type=str,
         help='Run Platform, HPG or DSA')
 
+    parser.add_argument('--item_id', dest='item_id', default=' ' , type=str,
+        help='item id of the WSI in DSA')
+
 
 
 
diff --git a/multic/segmentationschool/utils/json_to_xml.py b/multic/segmentationschool/utils/json_to_xml.py
new file mode 100644
index 0000000..072c631
--- /dev/null
+++ b/multic/segmentationschool/utils/json_to_xml.py
@@ -0,0 +1,66 @@
+import os
+from .mask_to_xml import xml_create, xml_add_annotation, xml_add_region, xml_save
+
+def get_xml_path(annot, NAMES, tmp, slidename):
+
+    xmlAnnot = xml_create()
+    skipSlide = 0
+    xml_color=[65280]*(len(NAMES)+1)
+    # all compartments
+    for class_,compart in enumerate(NAMES):
+
+        compart = compart.replace(' ','')
+        class_ +=1
+        # add layer to xml
+        xmlAnnot = xml_add_annotation(Annotations=xmlAnnot, xml_color=xml_color, annotationID=class_)
+
+        # test all annotation layers in order created
+        for iter,a in enumerate(annot):
+            try:
+                # check for annotation layer by name
+                a_name = a['annotation']['name'].replace(' ','')
+            except:
+                a_name = None
+
+            if a_name == compart:
+                # track all layers present
+                skipSlide +=1
+
+                pointsList = []
+
+                # load json data
+                _ = os.system("printf '\tloading annotation layer: [{}]\n'".format(compart))
+
+                a_data = a['annotation']['elements']
+
+                for data in a_data:
+                    pointList = []
+                    points = data['points']
+                    for point in points:
+                        pt_dict = {'X': round(point[0]), 'Y': round(point[1])}
+                        pointList.append(pt_dict)
+                    pointsList.append(pointList)
+
+                # write annotations to xml
+                for i in range(len(pointsList)):
+                    pointList = pointsList[i]
+                    xmlAnnot = xml_add_region(Annotations=xmlAnnot, pointList=pointList, annotationID=class_)
+
+                # print(a['_version'], a['updated'], a['created'])
+                break
+            
+    if skipSlide != len(NAMES):
+        _ = os.system("printf '\tThis slide is missing annotation layers\n'")
+        _ = os.system("printf '\tSKIPPING SLIDE...\n'")
+        
+    _ = os.system("printf '\tFETCHING SLIDE...\n'")
+    #os.rename('{}/{}'.format(folder, slidename), '{}/{}'.format(tmp, slidename))
+
+    xml_path = '{}/{}.xml'.format(tmp, os.path.splitext(slidename)[0])
+    _ = os.system("printf '\tsaving a created xml annotation file: [{}]\n'".format(xml_path))
+    xml_save(Annotations=xmlAnnot, filename=xml_path)
+
+    del xmlAnnot
+
+
+    return xml_path
\ No newline at end of file
diff --git a/multic/segmentationschool/utils/xml_to_mask.py b/multic/segmentationschool/utils/xml_to_mask.py
deleted file mode 100644
index aeecd6a..0000000
--- a/multic/segmentationschool/utils/xml_to_mask.py
+++ /dev/null
@@ -1,219 +0,0 @@
-import numpy as np
-import lxml.etree as ET
-import cv2
-import time
-import os
-
-"""
-location (tuple) - (x, y) tuple giving the top left pixel in the level 0 reference frame
-size (tuple) - (width, height) tuple giving the region size | set to 'full' for entire mask
-downsample - int giving the amount of downsampling done to the output pixel mask
-
-NOTE: if you plan to loop through xmls parallely, it is nessesary to run write_minmax_to_xml()
-      on all the files prior - to avoid conflicting file writes
-
-"""
-
-def xml_to_mask(xml_path, location, size, tree=None, downsample=1, verbose=0):
-
-    # parse xml and get root
-    if tree == None: tree = ET.parse(xml_path)
-    root = tree.getroot()
-
-    if size == 'full':
-        import math
-        size = write_minmax_to_xml(xml_path=xml_path, tree=tree, get_absolute_max=True)
-        size = (math.ceil(size[0]/downsample), math.ceil(size[1]/downsample))
-        location = (0,0)
-
-    # calculate region bounds
-    bounds = {'x_min' : location[0], 'y_min' : location[1], 'x_max' : location[0] + size[0]*downsample, 'y_max' : location[1] + size[1]*downsample}
-
-    IDs = regions_in_mask(xml_path=xml_path, root=root, tree=tree, bounds=bounds, verbose=verbose)
-
-    if verbose != 0:
-        print('\nFOUND: ' + str(len(IDs)) + ' regions')
-
-    # find regions in bounds
-    Regions = get_vertex_points(root=root, IDs=IDs, verbose=verbose)
-
-    # fill regions and create mask
-    mask = Regions_to_mask(Regions=Regions, bounds=bounds, IDs=IDs, downsample=downsample, verbose=verbose)
-    if verbose != 0:
-        print('done...\n')
-
-    return mask
-
-
-def regions_in_mask(xml_path, root, tree, bounds, verbose=1):
-    # find regions to save
-    IDs = []
-    mtime = os.path.getmtime(xml_path)
-
-    write_minmax_to_xml(xml_path, tree)
-
-    for Annotation in root.findall("./Annotation"): # for all annotations
-        annotationID = Annotation.attrib['Id']
-
-        for Region in Annotation.findall("./*/Region"): # iterate on all region
-
-            for Vert in Region.findall("./Vertices"): # iterate on all vertex in region
-
-                # get minmax points
-                Xmin = np.int32(Vert.attrib['Xmin'])
-                Ymin = np.int32(Vert.attrib['Ymin'])
-                Xmax = np.int32(Vert.attrib['Xmax'])
-                Ymax = np.int32(Vert.attrib['Ymax'])
-
-                # test minmax points in region bounds
-                if bounds['x_min'] <= Xmax and bounds['x_max'] >= Xmin and bounds['y_min'] <= Ymax and bounds['y_max'] >= Ymin:
-                    # save region Id
-                    IDs.append({'regionID' : Region.attrib['Id'], 'annotationID' : annotationID})
-                    break
-    return IDs
-
-def get_vertex_points(root, IDs, verbose=1):
-    Regions = []
-
-    for ID in IDs: # for all IDs
-
-        # get all vertex attributes (points)
-        Vertices = []
-
-        for Vertex in root.findall("./Annotation[@Id='" + ID['annotationID'] + "']/Regions/Region[@Id='" + ID['regionID'] + "']/Vertices/Vertex"):
-            # make array of points
-            Vertices.append([int(float(Vertex.attrib['X'])), int(float(Vertex.attrib['Y']))])
-
-        Regions.append(np.array(Vertices))
-
-    return Regions
-
-def Regions_to_mask(Regions, bounds, IDs, downsample, verbose=1):
-    # downsample = int(np.round(downsample_factor**(.5)))
-
-    if verbose !=0:
-        print('\nMAKING MASK:')
-
-    if len(Regions) != 0: # regions present
-        # get min/max sizes
-        min_sizes = np.empty(shape=[2,0], dtype=np.int32)
-        max_sizes = np.empty(shape=[2,0], dtype=np.int32)
-        for Region in Regions: # fill all regions
-            min_bounds = np.reshape((np.amin(Region, axis=0)), (2,1))
-            max_bounds = np.reshape((np.amax(Region, axis=0)), (2,1))
-            min_sizes = np.append(min_sizes, min_bounds, axis=1)
-            max_sizes = np.append(max_sizes, max_bounds, axis=1)
-        min_size = np.amin(min_sizes, axis=1)
-        max_size = np.amax(max_sizes, axis=1)
-
-        # add to old bounds
-        bounds['x_min_pad'] = min(min_size[1], bounds['x_min'])
-        bounds['y_min_pad'] = min(min_size[0], bounds['y_min'])
-        bounds['x_max_pad'] = max(max_size[1], bounds['x_max'])
-        bounds['y_max_pad'] = max(max_size[0], bounds['y_max'])
-
-        # make blank mask
-        mask = np.zeros([ int(np.round((bounds['y_max_pad'] - bounds['y_min_pad']) / downsample)), int(np.round((bounds['x_max_pad'] - bounds['x_min_pad']) / downsample)) ], dtype=np.uint8)
-
-        # fill mask polygons
-        index = 0
-        for Region in Regions:
-            # reformat Regions
-            Region[:,1] = np.int32(np.round((Region[:,1] - bounds['y_min_pad']) / downsample))
-            Region[:,0] = np.int32(np.round((Region[:,0] - bounds['x_min_pad']) / downsample))
-            # get annotation ID for mask color
-            ID = IDs[index]
-            cv2.fillPoly(mask, [Region], int(ID['annotationID']))
-            index = index + 1
-
-        # reshape mask
-        x_start = np.int32(np.round((bounds['x_min'] - bounds['x_min_pad']) / downsample))
-        y_start = np.int32(np.round((bounds['y_min'] - bounds['y_min_pad']) / downsample))
-        x_stop = np.int32(np.round((bounds['x_max'] - bounds['x_min_pad']) / downsample))
-        y_stop = np.int32(np.round((bounds['y_max'] - bounds['y_min_pad']) / downsample))
-        # pull center mask region
-        mask = mask[ y_start:y_stop, x_start:x_stop ]
-
-    else: # no Regions
-        mask = np.zeros([ int(np.round((bounds['y_max'] - bounds['y_min']) / downsample)), int(np.round((bounds['x_max'] - bounds['x_min']) / downsample)) ], dtype=np.uint8)
-
-    return mask
-
-def write_minmax_to_xml(xml_path, tree=None, time_buffer=10, get_absolute_max=False):
-    # function to write min and max verticies to each region
-
-    # parse xml and get root
-    if tree == None: tree = ET.parse(xml_path)
-    root = tree.getroot()
-
-    try:
-        if get_absolute_max:
-            # break the try statement
-            X_max = 0
-            Y_max = 0
-            raise ValueError
-
-        # has the xml been modified to include minmax
-        modtime = np.float64(root.attrib['modtime'])
-        # has the minmax modified xml been changed?
-        assert os.path.getmtime(xml_path) < modtime + time_buffer
-
-    except:
-
-        for Annotation in root.findall("./Annotation"): # for all annotations
-            annotationID = Annotation.attrib['Id']
-
-            for Region in Annotation.findall("./*/Region"): # iterate on all region
-
-                for Vert in Region.findall("./Vertices"): # iterate on all vertex in region
-                    Xs = []
-                    Ys = []
-                    for Vertex in Vert.findall("./Vertex"): # iterate on all vertex in region
-                        # get points
-                        Xs.append(np.int32(np.float64(Vertex.attrib['X'])))
-                        Ys.append(np.int32(np.float64(Vertex.attrib['Y'])))
-
-                    # find min and max points
-                    Xs = np.array(Xs)
-                    Ys = np.array(Ys)
-
-                    if get_absolute_max:
-                        # get the biggest point in annotation
-                        if Xs != [] and Ys != []:
-                            X_max = max(X_max, np.max(Xs))
-                            Y_max = max(Y_max, np.max(Ys))
-
-                    else:
-                        # modify the xml
-                        Vert.set("Xmin", "{}".format(np.min(Xs)))
-                        Vert.set("Xmax", "{}".format(np.max(Xs)))
-                        Vert.set("Ymin", "{}".format(np.min(Ys)))
-                        Vert.set("Ymax", "{}".format(np.max(Ys)))
-
-        if get_absolute_max:
-            # return annotation max point
-            return (X_max,Y_max)
-
-        else:
-            # modify the xml with minmax region info
-            root.set("modtime", "{}".format(time.time()))
-            xml_data = ET.tostring(tree, pretty_print=True)
-            #xml_data = Annotations.toprettyxml()
-            f = open(xml_path, 'w')
-            f.write(xml_data.decode())
-            f.close()
-
-
-def get_num_classes(xml_path,ignore_label=None):
-    # parse xml and get root
-    tree = ET.parse(xml_path)
-    root = tree.getroot()
-
-    annotation_num = 0
-    for Annotation in root.findall("./Annotation"): # for all annotations
-        if ignore_label != None:
-            if not int(Annotation.attrib['Id']) == ignore_label:
-                annotation_num += 1
-        else: annotation_num += 1
-
-    return annotation_num + 1

From a08093b83a68060740e722ad12931443353102cc Mon Sep 17 00:00:00 2001
From: sayatmimar <sayat.mimar@ufl.edu>
Date: Thu, 9 May 2024 14:27:46 -0400
Subject: [PATCH 7/8] minor updates

---
 multic/segmentationschool/segmentation_school.py | 2 +-
 multic/segmentationschool/utils/json_to_xml.py   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/multic/segmentationschool/segmentation_school.py b/multic/segmentationschool/segmentation_school.py
index a12d08f..d31b06a 100644
--- a/multic/segmentationschool/segmentation_school.py
+++ b/multic/segmentationschool/segmentation_school.py
@@ -135,7 +135,7 @@ def savetime(args, starttime):
     parser.add_argument('--cortextarget', dest='cortextarget', default=None,type=str,
         help='directory with cortex annotations for splicing')
     parser.add_argument('--output_dir', dest='output_dir', default=None,type=str,
-        help='directory to save output transformed XMLs')
+        help='directory to save output excel file')
     parser.add_argument('--wsis', dest='wsis', default=None,type=str,
         help='directory of WSIs for reference feature extraction')
     parser.add_argument('--groupBy', dest='groupBy', default=None,type=str,
diff --git a/multic/segmentationschool/utils/json_to_xml.py b/multic/segmentationschool/utils/json_to_xml.py
index 072c631..fd3ff91 100644
--- a/multic/segmentationschool/utils/json_to_xml.py
+++ b/multic/segmentationschool/utils/json_to_xml.py
@@ -63,4 +63,5 @@ def get_xml_path(annot, NAMES, tmp, slidename):
     del xmlAnnot
 
 
-    return xml_path
\ No newline at end of file
+    return xml_path
+    
\ No newline at end of file

From b98828e8997352089d6a5dda0363a627ab6c5be1 Mon Sep 17 00:00:00 2001
From: Sayat Mimar <sayat.mimar@login9.ufhpc>
Date: Wed, 22 May 2024 15:21:46 -0400
Subject: [PATCH 8/8] pathomic script changes

---
 .../extraction_utils/process_mc_features.py   | 102 +++-------
 .../extraction_utils/run_pathomic_fe.py       | 175 ++++++++++++++++++
 2 files changed, 202 insertions(+), 75 deletions(-)
 create mode 100644 multic/segmentationschool/extraction_utils/run_pathomic_fe.py

diff --git a/multic/segmentationschool/extraction_utils/process_mc_features.py b/multic/segmentationschool/extraction_utils/process_mc_features.py
index 2f1f759..134d618 100644
--- a/multic/segmentationschool/extraction_utils/process_mc_features.py
+++ b/multic/segmentationschool/extraction_utils/process_mc_features.py
@@ -5,31 +5,26 @@
 from skimage.morphology import remove_small_objects,binary_erosion,binary_dilation,disk,binary_opening,binary_closing
 from skimage.filters import threshold_otsu
 from skimage import measure
-
+import cv2
 from .extract_ffpe_features import imreconstruct
 from .PAS_deconvolution import deconvolution
 
-GLOM_DICT = {3:'Glomeruli',4:'Sclerotic Glomeruli'}
-
-def process_glom_features(mask_xml, glom_value, MOD, slide, mpp, h_threshold, saturation_threshold):
 
-    glomeruli = mask_xml == glom_value
-    glomeruli = glomeruli.astype(np.uint8)
-    glomeruli = measure.label(glomeruli)
-    glomeruli_unique_max = np.max(glomeruli)
-    gloms = np.zeros((glomeruli_unique_max,7))
-    props = measure.regionprops(glomeruli)
-
-    for i in tqdm(range(glomeruli_unique_max),desc=GLOM_DICT[glom_value]):
-        #Area, Cellularity, Mesangial Area to Cellularity Ratio
-        area = (props[i].area)*(mpp**2)
-        x1,y1,x2,y2 = props[i].bbox
+def process_glom_features(points, MOD, slide, h_threshold, saturation_threshold):
+    
+    #Area, Cellularity, Mesangial Area to Cellularity Ratio
+    area = cv2.contourArea(points)
+    if area>0:
+        y1, y2, x1, x2 =[np.min(points[:,0]),np.max(points[:,0]),np.min(points[:,1]),np.max(points[:,1])]
 
         crop = slide.read_region((y1,x1),0,(y2-y1,x2-x1))
         crop = np.array(crop)
         crop = crop[:,:,:3]
 
-        mask = (glomeruli[x1:x2,y1:y2] == i+1).astype(np.uint8)
+        mask = np.zeros((x2-x1,y2-y1),dtype=np.uint8)
+        points[:,0]-=y1
+        points[:,1]-=x1
+        mask=cv2.fillPoly(mask,[points],1)
 
         hsv = rgb2hsv(crop)
         hsv = hsv[:,:,1]
@@ -51,41 +46,24 @@ def process_glom_features(mask_xml, glom_value, MOD, slide, mpp, h_threshold, sa
         mask_pixels = np.sum(mask)
         pas_pixels = np.sum(pas_seg)
 
-        mes_fraction = (pas_pixels*(mpp**2)) / area
-
-        gloms[i,0] = x1
-        gloms[i,1] = x2
-        gloms[i,2] = y1
-        gloms[i,3] = y2
-        gloms[i,4] = area
-        gloms[i,5] = pas_pixels*(mpp**2)
-        gloms[i,6] = mes_fraction
-    
-    del glomeruli
-
-    return gloms
+        mes_fraction = (pas_pixels) / area
 
-def process_tubules_features(mask_xml, tub_value, MOD, slide, mpp, whitespace_threshold):
+        return [x1,x2,y1,y2, area, pas_pixels, mes_fraction]
 
-    tubules = mask_xml == tub_value
-    tubules = tubules.astype(np.uint8)
-    tubules = measure.label(tubules)
-    tubules_unique_max = np.max(tubules)
-    tubs = np.zeros((tubules_unique_max,7))
-    props = measure.regionprops(tubules)
-
-    for i in tqdm(range(tubules_unique_max),desc='Tubules'):
-        #Area, Cellularity, Mesangial Area to Cellularity Ratio
-        # i=2615
-        area = (props[i].area)*(mpp**2)
-        x1,y1,x2,y2 = props[i].bbox
+def process_tubules_features(points, MOD, slide, whitespace_threshold):
 
+    area = cv2.contourArea(points)
+    if area>0:
+        y1, y2, x1, x2 =[np.min(points[:,0]),np.max(points[:,0]),np.min(points[:,1]),np.max(points[:,1])]
 
         crop = slide.read_region((y1,x1),0,(y2-y1,x2-x1))
         crop = np.array(crop)
         crop = crop[:,:,:3]
 
-        mask = (tubules[x1:x2,y1:y2] == i+1).astype(np.uint8)
+        mask = np.zeros((x2-x1,y2-y1),dtype=np.uint8)
+        points[:,0]-=y1
+        points[:,1]-=x1
+        mask=cv2.fillPoly(mask,[points],1)
 
         lab = rgb2lab(crop)
         lab = lab[:,:,0]
@@ -156,38 +134,12 @@ def process_tubules_features(mask_xml, tub_value, MOD, slide, mpp, whitespace_th
         area_tot = np.sum(np.array(cyto_areas))
         cyto_avg = np.sum(np.multiply(np.array(cyto_thicknesses),(np.array(cyto_areas)/area_tot)))
         
-        tubs[i,0] = x1
-        tubs[i,1] = x2
-        tubs[i,2] = y1
-        tubs[i,3] = y2
-        tubs[i,4] = tbm_avg*(mpp**2)
-        tubs[i,5] = cyto_avg*(mpp**2)
-        tubs[i,6] = np.sum(WS) / np.sum(mask)#
-
-    del tubules
-
-    return tubs
-
-
-def process_arteriol_features(mask_xml, art_value, mpp):
-
-    arteries = mask_xml == art_value
-    arteries = arteries.astype(np.uint8)
-    arteries = measure.label(arteries)
-    arts_unique_max = np.max(arteries)
-    arts = np.zeros((arts_unique_max,5))
-    props = measure.regionprops(arteries)
-
-    for i in tqdm(range(arts_unique_max),desc='Arteries'):
-        area = (props[i].area)*(mpp**2)
-        x1,y1,x2,y2 = props[i].bbox
+        return [x1,x2,y1,y2,tbm_avg,cyto_avg,np.sum(WS) / np.sum(mask)]
 
-        arts[i,0] = x1
-        arts[i,1] = x2
-        arts[i,2] = y1
-        arts[i,3] = y2
-        arts[i,4] = area
 
-    del arteries
+def process_arteriol_features(points):
 
-    return arts
+    area = cv2.contourArea(points)
+    if area>0:
+        y1, y2, x1, x2 =[np.min(points[:,0]),np.max(points[:,0]),np.min(points[:,1]),np.max(points[:,1])]
+        return [x1,x2,y1,y2,area]
diff --git a/multic/segmentationschool/extraction_utils/run_pathomic_fe.py b/multic/segmentationschool/extraction_utils/run_pathomic_fe.py
new file mode 100644
index 0000000..f453c53
--- /dev/null
+++ b/multic/segmentationschool/extraction_utils/run_pathomic_fe.py
@@ -0,0 +1,175 @@
+import sys
+import os, girder_client
+import numpy as np
+from tqdm import tqdm
+import pandas as pd
+from tiffslide import TiffSlide
+import lxml.etree as ET
+import multiprocessing
+from joblib import Parallel, delayed
+
+MODx=np.zeros((3,))
+MODy=np.zeros((3,))
+MODz=np.zeros((3,))
+MODx[0]= 0.644211
+MODy[0]= 0.716556
+MODz[0]= 0.266844
+
+MODx[1]= 0.175411
+MODy[1]= 0.972178
+MODz[1]= 0.154589
+
+MODx[2]= 0.0
+MODy[2]= 0.0
+MODz[2]= 0.0
+MOD=[MODx,MODy,MODz]
+
+from .extract_ffpe_features import xml_to_mask
+
+from .process_mc_features import process_glom_features, process_tubules_features, process_arteriol_features
+from segmentationschool.Codes.xml_to_mask_minmax import write_minmax_to_xml
+from segmentationschool.utils.json_to_xml import get_xml_path
+
+def getPathomicFeatures(args):
+
+    cores=multiprocessing.cpu_count()
+    folder = args.base_dir
+
+    # assert args.target is not None, 'Directory of xmls must be specified, use --target /path/to/files.xml'
+    # assert args.wsis is not None, 'Directory of WSIs must be specified, use --wsis /path/to/wsis'
+    if args.platform == 'DSA':
+        import girder_client
+        gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
+        gc.setToken(args.girderToken)
+
+        file_name = args.file.split('/')[-1]
+        slide_item_id = args.item_id
+        output_dir = args.base_dir
+        slide_name,slideExt=file_name.split('.')
+        items=[(args.file, args.xml_path)]
+
+    elif args.platform == 'HPG':
+        image_files = [image_name for _, _, files in os.walk(folder) for image_name in files if image_name.endswith('.xml')]
+        image_names = [os.path.join(folder, f.split('.')[0]) for f in image_files]
+        slideExt = args.ext
+        output_dir = args.output_dir
+        # each item in items is a tuple of (images, annotations)
+        items = [(f + slideExt, f + '.xml') for f in image_names]
+    else:
+        raise Exception("Please Enter a valid Platform, DSA or HPG")
+
+    for i in range(len(items)):
+
+        svsfile, xmlfile = items[i]
+
+        print(xmlfile,'here')
+        write_minmax_to_xml(xmlfile)
+        slide = TiffSlide(svsfile)
+
+        all_contours = {'1':[],'2':[],'3':[],'4':[],'5':[],'6':[]}
+        tree = ET.parse(xmlfile)
+        root = tree.getroot()
+
+        xlsx_path = os.path.join(output_dir, os.path.basename(svsfile).split('.')[0] +'_pathomic'+'.xlsx')
+        for Annotation in root.findall("./Annotation"): # for all annotations
+            annotationID = Annotation.attrib['Id']
+            if annotationID not in ['1','2','3','4','5','6']:
+                pass
+            else:
+                for Region in Annotation.findall("./*/Region"): # iterate on all region
+                    verts=[]
+                    for Vert in Region.findall("./Vertices/Vertex"): # iterate on all vertex in region
+                        verts.append([int(float(Vert.attrib['X'])),int(float(Vert.attrib['Y']))])
+                    all_contours[annotationID].append(np.array(verts))
+
+        glom_features=Parallel(n_jobs=cores,prefer="threads")(delayed(process_glom_features)(points,
+            MOD, slide, h_threshold=args.h_threshold, saturation_threshold=args.saturation_threshold) for points in tqdm(all_contours['3'],colour='yellow',unit='Glomerulus',leave=False))
+        s_glom_features=Parallel(n_jobs=cores,prefer="threads")(delayed(process_glom_features)(points,
+                MOD, slide, h_threshold=args.h_threshold, saturation_threshold=args.saturation_threshold) for points in tqdm(all_contours['4'],colour='yellow',unit='Scl. Glomerulus',leave=False))
+        tub_features = Parallel(n_jobs=cores,prefer="threads")(delayed(process_tubules_features)(points,
+                MOD, slide,whitespace_threshold=args.whitespace_threshold) for points in tqdm(all_contours['5'],colour='blue',unit='Tubule',leave=False))
+        art_features=Parallel(n_jobs=cores,prefer="threads")(delayed(process_arteriol_features)(points,
+                ) for points in tqdm(all_contours['6'], colour='magenta',unit='Artery(-iole)',leave=False))
+        glom_features=[i for i in glom_features if i is not None]
+        s_glom_features=[i for i in s_glom_features if i is not None]
+        tub_features=[i for i in tub_features if i is not None]
+        art_features=[i for i in art_features if i is not None]
+
+        def get_feature_matrix(features, columns):
+            m, n = len(features), len(columns)
+            feature_matrix = np.zeros((m, n)) 
+            for i, feat in enumerate(features):
+                for j in range(n):
+                    feature_matrix[i,j] = feat[j]
+                
+            return feature_matrix
+        # gloms = np.zeros((len(glom_features),7))
+        # s_gloms = np.zeros((len(s_glom_features),7))
+        # tubs = np.zeros((len(tub_features),7))
+        # arts = np.zeros((len(art_features),5))
+
+        # for i, glom in enumerate(glom_features):
+        #     gloms[i,0] = glom[0]
+        #     gloms[i,1] = glom[1]
+        #     gloms[i,2] = glom[2]
+        #     gloms[i,3] = glom[3]
+        #     gloms[i,4] = glom[4]
+        #     gloms[i,5] = glom[5]
+        #     gloms[i,6] = glom[6]
+
+        # for i, glom in enumerate(s_glom_features):
+        #     s_gloms[i,0] = s_glom[0]
+        #     s_gloms[i,1] = s_glom[1]
+        #     s_gloms[i,2] = s_glom[2]
+        #     s_gloms[i,3] = s_glom[3]
+        #     s_gloms[i,4] = s_glom[4]
+        #     s_gloms[i,5] = s_glom[5]
+        #     s_gloms[i,6] = s_glom[6]
+
+        # for i, tub in enumerate(tub_features):
+        #     if not tub:
+        #         continue
+        #     tubs[i,0] = tub[0]
+        #     tubs[i,1] = tub[1]
+        #     tubs[i,2] = tub[2]
+        #     tubs[i,3] = tub[3]
+        #     tubs[i,4] = tub[4]
+        #     tubs[i,5] = tub[5]
+        #     tubs[i,6] = tub[6]
+
+
+        # for i, art in enumerate(art_features):
+        #     if not art:
+        #         continue
+        #     arts[i,0] = art[0]
+        #     arts[i,1] = art[1]
+        #     arts[i,2] = art[2]
+        #     arts[i,3] = art[3]
+        #     arts[i,4] = art[4]
+
+    
+    
+        all_columns = [['x1','x2','y1','y2','Area','Mesangial Area','Mesangial Fraction'],
+                    ['x1','x2','y1','y2','Area','Mesangial Area','Mesangial Fraction'],
+                    ['x1','x2','y1','y2','Average TBM Thickness','Average Cell Thickness','Luminal Fraction'],
+                    ['x1','x2','y1','y2','Arterial Area']]
+        compart_names = ['gloms','s_gloms','tubs','arts']
+
+        gloms = get_feature_matrix(glom_features, all_columns[0])
+        s_gloms = get_feature_matrix(s_glom_features,all_columns[1])
+        tubs = get_feature_matrix(tub_features, all_columns[2])
+        arts = get_feature_matrix(art_features,all_columns[3])
+    
+        all_comparts = [gloms,s_gloms,tubs, arts]
+        
+        _ = os.system("printf '\tWriting Excel file: [{}]\n'".format(xlsx_path))
+        with pd.ExcelWriter(xlsx_path) as writer:
+            for idx,compart in enumerate(all_comparts):
+                df = pd.DataFrame(compart,columns=all_columns[idx])
+                df.to_excel(writer, index=False, sheet_name=compart_names[idx])
+        if args.platform == 'DSA':
+            gc.uploadFileToItem(file_id, xlsx_path, reference=None, mimeType=None, filename=None, progressCallback=None)
+            print('Girder file uploaded!')
+
+
+