From a354effb8fd30722b45370a65671f935ba350fbe Mon Sep 17 00:00:00 2001 From: Lukas Hatscher Date: Mon, 5 Aug 2024 14:25:40 +0000 Subject: [PATCH 01/14] Included glcm functions into the script and adjusted parser help information accordingly --- mcquant/ParseInput.py | 15 ++++++++--- mcquant/SingleCellDataExtraction.py | 39 ++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/mcquant/ParseInput.py b/mcquant/ParseInput.py index 1b98f5c..ed3c6b7 100644 --- a/mcquant/ParseInput.py +++ b/mcquant/ParseInput.py @@ -29,9 +29,18 @@ def ParseInputDataExtract(): By default only mean intensity is calculated. If the metric doesn't depend on signal intensity, use --mask-props instead. See list at https://scikit-image.org/docs/dev/api/skimage.measure.html#regionprops - Additionally available is gini_index, which calculates a single number - between 0 and 1, representing how unequal the signal is distributed in each region. - See https://en.wikipedia.org/wiki/Gini_coefficient + + Additionally available is: gini_index, intensity_median, intensity_sum, intensity_std, + contrast, dissimilarity, homogeneity, energy, correlation, ASM. + Further information on the parameters: + gini_index: + which calculates a single number between 0 and 1, representing how unequal the signal is distributed in each region. + Will be calculated for every marker + See https://en.wikipedia.org/wiki/Gini_coefficient + contrast, dissimilarity, homogeneity, energy, correlation, ASM: + glcm/graycoprops features from scikit-image features. The distance is currently set to 1 pixel and angle to 0 rad. + will be calculated for every marker + See https://scikit-image.org/docs/stable/api/skimage.feature.html """ ) #parser.add_argument('--suffix') diff --git a/mcquant/SingleCellDataExtraction.py b/mcquant/SingleCellDataExtraction.py index 2f97351..0c0040a 100644 --- a/mcquant/SingleCellDataExtraction.py +++ b/mcquant/SingleCellDataExtraction.py @@ -9,8 +9,8 @@ import os import skimage.measure import skimage.measure._regionprops +from skimage.feature import graycomatrix, graycoprops import tifffile - from pathlib import Path #### Additional functions that can be specified by the user via intensity_props @@ -23,6 +23,10 @@ def intensity_median(mask, intensity): def intensity_sum(mask, intensity): return np.sum(intensity[mask]) +## FUnction to calculate standard deviation of intensity values +def intensity_std(mask, intensity): + return np.std(intensity[mask]) + ## Function to calculate the gini index: https://en.wikipedia.org/wiki/Gini_coefficient def gini_index(mask, intensity): x = intensity[mask] @@ -31,6 +35,36 @@ def gini_index(mask, intensity): cumx = np.cumsum(sorted_x, dtype=float) return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n +## Function to Calculate the GLCM for gracoprops +def calculate_glcm(image, distances=[1], angles=[0]): + image_uint8 = ((image - np.min(image)) * (255 / (np.max(image) - np.min(image)))).astype(np.uint8) + glcm = graycomatrix(image_uint8, distances, angles) + return glcm + +def contrast(mask, intensity): + glcm = calculate_glcm(intensity[mask]) + return graycoprops(glcm, 'contrast')[0, 0] + +def dissimilarity(mask, intensity): + glcm = calculate_glcm(intensity[mask]) + return graycoprops(glcm, 'dissimilarity')[0, 0] + +def homogeneity(mask, intensity): + glcm = calculate_glcm(intensity[mask]) + return graycoprops(glcm, 'homogeneity')[0, 0] + +def energy(mask, intensity): + glcm = calculate_glcm(intensity[mask]) + return graycoprops(glcm, 'energy')[0, 0] + +def correlation(mask, intensity): + glcm = calculate_glcm(intensity[mask]) + return graycoprops(glcm, 'correlation')[0, 0] + +def ASM(mask, intensity): + glcm = calculate_glcm(intensity[mask]) + return graycoprops(glcm, 'ASM')[0, 0] + def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]): """Function for quantifying a single channel image @@ -41,6 +75,9 @@ def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]) builtin_props = set(intensity_props).intersection(standard_props) # Otherwise look for them in this module extra_props = set(intensity_props).difference(standard_props) + # Calculate graycoprops using glcm + + dat = skimage.measure.regionprops_table( mask_loaded, image_loaded_z, properties = tuple(builtin_props), From 5e274c91335d453b42d96784a3a82801c95580db Mon Sep 17 00:00:00 2001 From: Lukas Hatscher Date: Mon, 5 Aug 2024 17:14:00 +0000 Subject: [PATCH 02/14] added a second script which does not calculates gclm multiple time per cell label --- mcquant/SingleCellDataExtraction.py | 2 +- mcquant/SingleCellDataExtraction_v2.py | 301 +++++++++++++++++++++++++ 2 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 mcquant/SingleCellDataExtraction_v2.py diff --git a/mcquant/SingleCellDataExtraction.py b/mcquant/SingleCellDataExtraction.py index 0c0040a..5f527ef 100644 --- a/mcquant/SingleCellDataExtraction.py +++ b/mcquant/SingleCellDataExtraction.py @@ -35,7 +35,7 @@ def gini_index(mask, intensity): cumx = np.cumsum(sorted_x, dtype=float) return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n -## Function to Calculate the GLCM for gracoprops +## Functions to Calculate the GLCM and gracoprops features def calculate_glcm(image, distances=[1], angles=[0]): image_uint8 = ((image - np.min(image)) * (255 / (np.max(image) - np.min(image)))).astype(np.uint8) glcm = graycomatrix(image_uint8, distances, angles) diff --git a/mcquant/SingleCellDataExtraction_v2.py b/mcquant/SingleCellDataExtraction_v2.py new file mode 100644 index 0000000..53e717f --- /dev/null +++ b/mcquant/SingleCellDataExtraction_v2.py @@ -0,0 +1,301 @@ +#Functions for reading in single cell imaging data +#Joshua Hess + +#Import necessary modules +import skimage.io +import h5py +import pandas as pd +import numpy as np +import os +import skimage.measure +import skimage.measure._regionprops +from skimage.feature import graycomatrix, graycoprops +import tifffile +from pathlib import Path + +#### Additional functions that can be specified by the user via intensity_props + +## Function to calculate median intensity values per mask +def intensity_median(mask, intensity): + return np.median(intensity[mask]) + +## Function to sum intensity values (or in this case transcript counts) +def intensity_sum(mask, intensity): + return np.sum(intensity[mask]) + +## FUnction to calculate standard deviation of intensity values +def intensity_std(mask, intensity): + return np.std(intensity[mask]) + +## Function to calculate the gini index: https://en.wikipedia.org/wiki/Gini_coefficient +def gini_index(mask, intensity): + x = intensity[mask] + sorted_x = np.sort(x) + n = len(x) + cumx = np.cumsum(sorted_x, dtype=float) + return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n + + +def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]): + """Function for quantifying a single channel image + + Returns a table with CellID according to the mask and the mean pixel intensity + for the given channel for each cell""" + standard_props = set(skimage.measure._regionprops.COL_DTYPES) + # Look for regionprops in skimage + builtin_props = set(intensity_props).intersection(standard_props) + # Otherwise look for them in this module + extra_props = set(intensity_props).difference(standard_props) + + dat = skimage.measure.regionprops_table( + mask_loaded, image_loaded_z, + properties = tuple(builtin_props), + extra_properties = [globals()[n] for n in extra_props if n not in glcm_features] + ) + + glcm_features_set = {"contrast", "dissimilarity", "homogeneity", "energy", "correlation", "ASM"} + glcm_features = {} + + if glcm_features_set.intersection(intensity_props): + for region in skimage.measure.regionprops(mask_loaded, image_loaded_z): + label = region.label + glcm = graycomatrix(region.intensity_image, [1], [0], symmetric=True, normed=True) + glcm_props = {} + for prop in glcm_features_set.intersection(intensity_props): + glcm_props[prop] = graycoprops(glcm, prop)[0, 0] + glcm_features[label] = glcm_props + + if glcm_features: + for label, features in glcm_features.items(): + for prop, value in features.items(): + dat[prop] = dat.get(prop, []) + [value] + + return dat + + +def MaskIDs(mask, mask_props=None): + """This function will extract the CellIDs and the XY positions for each + cell based on that cells centroid + + Returns a dictionary object""" + + all_mask_props = set(["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"]) + if mask_props is not None: + all_mask_props = all_mask_props.union(mask_props) + + dat = skimage.measure.regionprops_table( + mask, + properties=all_mask_props + ) + + name_map = { + "CellID": "label", + "X_centroid": "centroid-1", + "Y_centroid": "centroid-0", + "Area": "area", + "MajorAxisLength": "major_axis_length", + "MinorAxisLength": "minor_axis_length", + "Eccentricity": "eccentricity", + "Solidity": "solidity", + "Extent": "extent", + "Orientation": "orientation", + } + for new_name, old_name in name_map.items(): + dat[new_name] = dat[old_name] + for old_name in set(name_map.values()): + del dat[old_name] + + return dat + +def n_channels(image): + """Returns the number of channel in the input image. Supports [OME]TIFF and HDF5.""" + + image_path = Path(image) + + if image_path.suffix in ['.tiff', '.tif', '.btf']: + s = tifffile.TiffFile(image).series[0] + ndim = len(s.shape) + if ndim == 2: return 1 + elif ndim == 3: return min(s.shape) + else: raise Exception('mcquant supports only 2D/3D images.') + + elif image_path.suffix in ['.h5', '.hdf5']: + f = h5py.File(image, 'r') + dat_name = list(f.keys())[0] + return f[dat_name].shape[3] + + else: + raise Exception('mcquant currently supports [OME]TIFF and HDF5 formats only') + +def PrepareData(image,z): + """Function for preparing input for maskzstack function. Connecting function + to use with mc micro ilastik pipeline""" + + image_path = Path(image) + + #Check to see if image tif(f) + if image_path.suffix in ['.tiff', '.tif', '.btf']: + image_loaded_z = tifffile.imread(image, key=z) + + #Check to see if image is hdf5 + elif image_path.suffix in ['.h5', '.hdf5']: + #Read the image + f = h5py.File(image,'r') + #Get the dataset name from the h5 file + dat_name = list(f.keys())[0] + #Retrieve the z^th channel + image_loaded_z = f[dat_name][0,:,:,z] + + else: + raise Exception('mcquant currently supports [OME]TIFF and HDF5 formats only') + + #Return the objects + return image_loaded_z + + +def MaskZstack(masks_loaded,image,channel_names_loaded, mask_props=None, intensity_props=["intensity_mean"]): + """This function will extract the stats for each cell mask through each channel + in the input image + + mask_loaded: dictionary containing Tiff masks that represents the cells in your image. + + z_stack: Multichannel z stack image""" + + #Get the names of the keys for the masks dictionary + mask_names = list(masks_loaded.keys()) + + #Create empty dictionary to store channel results per mask + dict_of_chan = {m_name: [] for m_name in mask_names} + #Get the z channel and the associated channel name from list of channel names + for z in range(len(channel_names_loaded)): + #Run the data Prep function + image_loaded_z = PrepareData(image,z) + + #Iterate through number of masks to extract single cell data + for nm in range(len(mask_names)): + #Use the above information to mask z stack + dict_of_chan[mask_names[nm]].append( + MaskChannel(masks_loaded[mask_names[nm]],image_loaded_z, intensity_props=intensity_props) + ) + #Print progress + print("Finished "+str(z)) + + # Column order according to histoCAT convention (Move xy position to end with spatial information) + last_cols = ( + "X_centroid", + "Y_centroid", + "column_centroid", + "row_centroid", + "Area", + "MajorAxisLength", + "MinorAxisLength", + "Eccentricity", + "Solidity", + "Extent", + "Orientation", + ) + def col_sort(x): + if x == "CellID": + return -2 + try: + return last_cols.index(x) + except ValueError: + return -1 + + #Iterate through the masks and format quantifications for each mask and property + for nm in mask_names: + mask_dict = {} + # Mean intensity is default property, stored without suffix + mask_dict.update( + zip(channel_names_loaded, [x["intensity_mean"] for x in dict_of_chan[nm]]) + ) + # All other properties are suffixed with their names + for prop_n in set(dict_of_chan[nm][0].keys()).difference(["intensity_mean"]): + mask_dict.update( + zip([f"{n}_{prop_n}" for n in channel_names_loaded], [x[prop_n] for x in dict_of_chan[nm]]) + ) + # Get the cell IDs and mask properties + mask_properties = pd.DataFrame(MaskIDs(masks_loaded[nm], mask_props=mask_props)) + mask_dict.update(mask_properties) + dict_of_chan[nm] = pd.DataFrame(mask_dict).reindex(columns=sorted(mask_dict.keys(), key=col_sort)) + + # Return the dict of dataframes for each mask + return dict_of_chan + +def ExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]): + """Function for extracting single cell information from input + path containing single-cell masks, z_stack path, and channel_names path.""" + + #Create pathlib object for output + output = Path(output) + + #Read csv channel names + channel_names_loaded = pd.read_csv(channel_names) + #Check for the presence of `marker_name` column + if 'marker_name' in channel_names_loaded: + #Get the marker_name column if more than one column (CyCIF structure) + channel_names_loaded_list = list(channel_names_loaded.marker_name) + #Consider the old one-marker-per-line plain text format + elif channel_names_loaded.shape[1] == 1: + #re-read the csv file and add column name + channel_names_loaded = pd.read_csv(channel_names, header = None) + channel_names_loaded_list = list(channel_names_loaded.iloc[:,0]) + else: + raise Exception('%s must contain the marker_name column'%channel_names) + + #Contrast against the number of markers in the image + if len(channel_names_loaded_list) != n_channels(image): + raise Exception("The number of channels in %s doesn't match the image"%channel_names) + + #Check for unique marker names -- create new list to store new names + channel_names_loaded_checked = [] + for idx,val in enumerate(channel_names_loaded_list): + #Check for unique value + if channel_names_loaded_list.count(val) > 1: + #If unique count greater than one, add suffix + channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1)) + else: + #Otherwise, leave channel name + channel_names_loaded_checked.append(val) + + #Read the masks + masks_loaded = {} + #iterate through mask paths and read images to add to dictionary object + for m in masks: + m_full_name = os.path.basename(m) + m_name = m_full_name.split('.')[0] + masks_loaded.update({str(m_name):skimage.io.imread(m,plugin='tifffile')}) + + scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked, mask_props=mask_props, intensity_props=intensity_props) + #Write the singe cell data to a csv file using the image name + + # Determine the image name by cutting off its extension + im_full_name = os.path.basename(image) + im_tokens = im_full_name.split(os.extsep) + if len(im_tokens) < 2: im_name = im_tokens[0] + elif im_tokens[-2] == "ome": im_name = os.extsep.join(im_tokens[0:-2]) + else: im_name = os.extsep.join(im_tokens[0:-1]) + + # iterate through each mask and export csv with mask name as suffix + for k,v in scdata_z.items(): + # export the csv for this mask name + scdata_z[k].to_csv( + str(Path(os.path.join(str(output), + str(im_name+"_{}"+".csv").format(k)))), + index=False + ) + + +def MultiExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]): + """Function for iterating over a list of z_stacks and output locations to + export single-cell data from image masks""" + + print("Extracting single-cell data for "+str(image)+'...') + + #Run the ExtractSingleCells function for this image + ExtractSingleCells(masks,image,channel_names,output, mask_props=mask_props, intensity_props=intensity_props) + + #Print update + im_full_name = os.path.basename(image) + im_name = im_full_name.split('.')[0] + print("Finished "+str(im_name)) From d103d541dfe77543a1505a931ce0885586770785 Mon Sep 17 00:00:00 2001 From: Lukas Hatscher Date: Tue, 6 Aug 2024 07:58:17 +0000 Subject: [PATCH 03/14] adjust the image parameter in glcm function --- mcquant/SingleCellDataExtraction.py | 2 +- mcquant/SingleCellDataExtraction_v2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mcquant/SingleCellDataExtraction.py b/mcquant/SingleCellDataExtraction.py index 5f527ef..9bfc3bb 100644 --- a/mcquant/SingleCellDataExtraction.py +++ b/mcquant/SingleCellDataExtraction.py @@ -36,7 +36,7 @@ def gini_index(mask, intensity): return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n ## Functions to Calculate the GLCM and gracoprops features -def calculate_glcm(image, distances=[1], angles=[0]): +def calculate_glcm(intensity, distances=[1], angles=[0]): image_uint8 = ((image - np.min(image)) * (255 / (np.max(image) - np.min(image)))).astype(np.uint8) glcm = graycomatrix(image_uint8, distances, angles) return glcm diff --git a/mcquant/SingleCellDataExtraction_v2.py b/mcquant/SingleCellDataExtraction_v2.py index 53e717f..b528b68 100644 --- a/mcquant/SingleCellDataExtraction_v2.py +++ b/mcquant/SingleCellDataExtraction_v2.py @@ -129,7 +129,7 @@ def n_channels(image): def PrepareData(image,z): """Function for preparing input for maskzstack function. Connecting function - to use with mc micro ilastik pipeline""" + to use with mcmicro ilastik pipeline""" image_path = Path(image) From 90b5f84d66568f1345ae1d0b1c62fd2b7a3d0685 Mon Sep 17 00:00:00 2001 From: Aroj Hada Date: Tue, 6 Aug 2024 10:03:23 +0200 Subject: [PATCH 04/14] Rename SingleCellDataExtraction.py to SingleCellDataExtraction_expanded.py Rename --- ...CellDataExtraction.py => SingleCellDataExtraction_expanded.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename mcquant/{SingleCellDataExtraction.py => SingleCellDataExtraction_expanded.py} (100%) diff --git a/mcquant/SingleCellDataExtraction.py b/mcquant/SingleCellDataExtraction_expanded.py similarity index 100% rename from mcquant/SingleCellDataExtraction.py rename to mcquant/SingleCellDataExtraction_expanded.py From 0167e6ef1bfafee9b34d7a6f4c5994a45c27515c Mon Sep 17 00:00:00 2001 From: Lukas Hatscher Date: Tue, 6 Aug 2024 08:09:03 +0000 Subject: [PATCH 05/14] adjust glcm function image names --- mcquant/SingleCellDataExtraction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mcquant/SingleCellDataExtraction.py b/mcquant/SingleCellDataExtraction.py index 9bfc3bb..30525b7 100644 --- a/mcquant/SingleCellDataExtraction.py +++ b/mcquant/SingleCellDataExtraction.py @@ -37,7 +37,7 @@ def gini_index(mask, intensity): ## Functions to Calculate the GLCM and gracoprops features def calculate_glcm(intensity, distances=[1], angles=[0]): - image_uint8 = ((image - np.min(image)) * (255 / (np.max(image) - np.min(image)))).astype(np.uint8) + image_uint8 = ((intensity - np.min(intensity)) * (255 / (np.max(intensity) - np.min(intensity)))).astype(np.uint8) glcm = graycomatrix(image_uint8, distances, angles) return glcm From d789c335fbf0ff54ccf32918efe2e4f9fcf869c3 Mon Sep 17 00:00:00 2001 From: Aroj Hada Date: Tue, 6 Aug 2024 10:24:16 +0200 Subject: [PATCH 06/14] Update SingleCellDataExtraction_v2.py --- mcquant/SingleCellDataExtraction_v2.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mcquant/SingleCellDataExtraction_v2.py b/mcquant/SingleCellDataExtraction_v2.py index b528b68..0cbb45b 100644 --- a/mcquant/SingleCellDataExtraction_v2.py +++ b/mcquant/SingleCellDataExtraction_v2.py @@ -47,12 +47,6 @@ def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]) # Otherwise look for them in this module extra_props = set(intensity_props).difference(standard_props) - dat = skimage.measure.regionprops_table( - mask_loaded, image_loaded_z, - properties = tuple(builtin_props), - extra_properties = [globals()[n] for n in extra_props if n not in glcm_features] - ) - glcm_features_set = {"contrast", "dissimilarity", "homogeneity", "energy", "correlation", "ASM"} glcm_features = {} @@ -65,11 +59,16 @@ def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]) glcm_props[prop] = graycoprops(glcm, prop)[0, 0] glcm_features[label] = glcm_props + dat = skimage.measure.regionprops_table( + mask_loaded, image_loaded_z, + properties = tuple(builtin_props), + extra_properties = [globals()[n] for n in extra_props if n not in glcm_features] + ) + if glcm_features: for label, features in glcm_features.items(): for prop, value in features.items(): dat[prop] = dat.get(prop, []) + [value] - return dat From eb9a450769120c9c62f881b27e6aaa78e7e44145 Mon Sep 17 00:00:00 2001 From: Lukas Hatscher Date: Tue, 6 Aug 2024 09:37:42 +0000 Subject: [PATCH 07/14] Adjusted Readme, deleted second python file. Adjsuted the new expanded MCQUANT main file to handle additional glcm features, resizing of the loaded image to uint8 for glcm calculation, adjusted extra property handling of the regionprops_table function. Version importing has to be adjusted next --- README.md | 12 +- mcquant/ParseInput.py | 2 +- mcquant/SingleCellDataExtraction_expanded.py | 51 ++-- mcquant/SingleCellDataExtraction_v2.py | 300 ------------------- mcquant/__init__.py | 2 +- 5 files changed, 32 insertions(+), 335 deletions(-) delete mode 100644 mcquant/SingleCellDataExtraction_v2.py diff --git a/README.md b/README.md index 87ee2ed..719863a 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,19 @@ Module for single-cell data extraction given a segmentation mask and multi-chann See https://en.wikipedia.org/wiki/Gini_coefficient for more information. * `--intensity_props intensity_median` : Will calculate the median of intensity values per labeled object in the mask. * `--intensity_props intensity_sum` : Will calculate the sum of intensity values per labelled object in the mask. This can be useful if you want to count RNA molecules from FISH based images for example. + * `--intensity_props intensity_std` : Will calculate the standard deviation of intensity values per labeled object in the mask. + + Further available are GLCM-derived gracoprops (see https://scikit-image.org/docs/stable/api/skimage.feature.html). Currently these are set to angle 0 and distane 1 pixel: + * `--intensity_props contrast` : Will calculate the glcm derived symmetric and normalized contrast of intensity values per labeled object in the mask. + * `--intensity_props dissimilarity` : Will calculate the glcm derived symmetric and normalized dissimilarity of intensity values per labeled object in the mask. + * `--intensity_props homogeneity` : Will calculate the glcm derived symmetric and normalized homogeneity of intensity values per labeled object in the mask. + * `--intensity_props energy` : Will calculate the glcm derived symmetric and normalized energy of intensity values per labeled object in the mask. + * `--intensity_props correlation` : Will calculate the glcm derived symmetric and normalized correlation of intensity values per labeled object in the mask. + * `--intensity_props ASM` : Will calculate the glcm derived symmetric and normalized ASM of intensity values per labeled object in the mask. + # Run script -`python CommandSingleCellExtraction.py --masks ./segmentation/cellMask.tif ./segmentation/membraneMask.tif --image ./registration/Exemplar_001.h5 --output ./feature_extraction --channel_names ./my_channels.csv` +`python CLI.py --masks ./segmentation/cellMask.tif ./segmentation/membraneMask.tif --image ./registration/Exemplar_001.h5 --output ./feature_extraction --channel_names ./my_channels.csv` # Main developer Denis Schapiro (https://github.com/DenisSch) diff --git a/mcquant/ParseInput.py b/mcquant/ParseInput.py index ed3c6b7..47dfa8b 100644 --- a/mcquant/ParseInput.py +++ b/mcquant/ParseInput.py @@ -1,6 +1,6 @@ #Functions for parsing command line arguments for ome ilastik prep import argparse -from . import __version__ +from . import __version__ # This still has to be adjusted with __init__.py function def ParseInputDataExtract(): diff --git a/mcquant/SingleCellDataExtraction_expanded.py b/mcquant/SingleCellDataExtraction_expanded.py index 30525b7..e642b54 100644 --- a/mcquant/SingleCellDataExtraction_expanded.py +++ b/mcquant/SingleCellDataExtraction_expanded.py @@ -35,35 +35,6 @@ def gini_index(mask, intensity): cumx = np.cumsum(sorted_x, dtype=float) return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n -## Functions to Calculate the GLCM and gracoprops features -def calculate_glcm(intensity, distances=[1], angles=[0]): - image_uint8 = ((intensity - np.min(intensity)) * (255 / (np.max(intensity) - np.min(intensity)))).astype(np.uint8) - glcm = graycomatrix(image_uint8, distances, angles) - return glcm - -def contrast(mask, intensity): - glcm = calculate_glcm(intensity[mask]) - return graycoprops(glcm, 'contrast')[0, 0] - -def dissimilarity(mask, intensity): - glcm = calculate_glcm(intensity[mask]) - return graycoprops(glcm, 'dissimilarity')[0, 0] - -def homogeneity(mask, intensity): - glcm = calculate_glcm(intensity[mask]) - return graycoprops(glcm, 'homogeneity')[0, 0] - -def energy(mask, intensity): - glcm = calculate_glcm(intensity[mask]) - return graycoprops(glcm, 'energy')[0, 0] - -def correlation(mask, intensity): - glcm = calculate_glcm(intensity[mask]) - return graycoprops(glcm, 'correlation')[0, 0] - -def ASM(mask, intensity): - glcm = calculate_glcm(intensity[mask]) - return graycoprops(glcm, 'ASM')[0, 0] def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]): """Function for quantifying a single channel image @@ -75,14 +46,30 @@ def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]) builtin_props = set(intensity_props).intersection(standard_props) # Otherwise look for them in this module extra_props = set(intensity_props).difference(standard_props) - # Calculate graycoprops using glcm + + glcm_features_set = {"contrast", "dissimilarity", "homogeneity", "energy", "correlation", "ASM"} + glcm_features = {} + if glcm_features_set.intersection(intensity_props): + for region in skimage.measure.regionprops(mask_loaded, image_loaded_z): + label = region.label + image_uint8 = ((region.intensity_image - np.min(region.intensity_image)) * (255 / (np.max(region.intensity_image) - np.min(region.intensity_image)))).astype(np.uint8) + glcm = graycomatrix(region.intensity_image, [1], [0], symmetric=True, normed=True) + glcm_props = {} + for prop in glcm_features_set.intersection(intensity_props): + glcm_props[prop] = graycoprops(glcm, prop)[0, 0] + glcm_features[label] = glcm_props dat = skimage.measure.regionprops_table( mask_loaded, image_loaded_z, properties = tuple(builtin_props), - extra_properties = [globals()[n] for n in extra_props] + extra_properties = [globals()[n] for n in extra_props if n not in glcm_features_set] ) + + if glcm_features: + for label, features in glcm_features.items(): + for prop, value in features.items(): + dat[prop] = dat.get(prop, []) + [value] return dat @@ -142,7 +129,7 @@ def n_channels(image): def PrepareData(image,z): """Function for preparing input for maskzstack function. Connecting function - to use with mc micro ilastik pipeline""" + to use with mcmicro ilastik pipeline""" image_path = Path(image) diff --git a/mcquant/SingleCellDataExtraction_v2.py b/mcquant/SingleCellDataExtraction_v2.py deleted file mode 100644 index 0cbb45b..0000000 --- a/mcquant/SingleCellDataExtraction_v2.py +++ /dev/null @@ -1,300 +0,0 @@ -#Functions for reading in single cell imaging data -#Joshua Hess - -#Import necessary modules -import skimage.io -import h5py -import pandas as pd -import numpy as np -import os -import skimage.measure -import skimage.measure._regionprops -from skimage.feature import graycomatrix, graycoprops -import tifffile -from pathlib import Path - -#### Additional functions that can be specified by the user via intensity_props - -## Function to calculate median intensity values per mask -def intensity_median(mask, intensity): - return np.median(intensity[mask]) - -## Function to sum intensity values (or in this case transcript counts) -def intensity_sum(mask, intensity): - return np.sum(intensity[mask]) - -## FUnction to calculate standard deviation of intensity values -def intensity_std(mask, intensity): - return np.std(intensity[mask]) - -## Function to calculate the gini index: https://en.wikipedia.org/wiki/Gini_coefficient -def gini_index(mask, intensity): - x = intensity[mask] - sorted_x = np.sort(x) - n = len(x) - cumx = np.cumsum(sorted_x, dtype=float) - return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n - - -def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]): - """Function for quantifying a single channel image - - Returns a table with CellID according to the mask and the mean pixel intensity - for the given channel for each cell""" - standard_props = set(skimage.measure._regionprops.COL_DTYPES) - # Look for regionprops in skimage - builtin_props = set(intensity_props).intersection(standard_props) - # Otherwise look for them in this module - extra_props = set(intensity_props).difference(standard_props) - - glcm_features_set = {"contrast", "dissimilarity", "homogeneity", "energy", "correlation", "ASM"} - glcm_features = {} - - if glcm_features_set.intersection(intensity_props): - for region in skimage.measure.regionprops(mask_loaded, image_loaded_z): - label = region.label - glcm = graycomatrix(region.intensity_image, [1], [0], symmetric=True, normed=True) - glcm_props = {} - for prop in glcm_features_set.intersection(intensity_props): - glcm_props[prop] = graycoprops(glcm, prop)[0, 0] - glcm_features[label] = glcm_props - - dat = skimage.measure.regionprops_table( - mask_loaded, image_loaded_z, - properties = tuple(builtin_props), - extra_properties = [globals()[n] for n in extra_props if n not in glcm_features] - ) - - if glcm_features: - for label, features in glcm_features.items(): - for prop, value in features.items(): - dat[prop] = dat.get(prop, []) + [value] - return dat - - -def MaskIDs(mask, mask_props=None): - """This function will extract the CellIDs and the XY positions for each - cell based on that cells centroid - - Returns a dictionary object""" - - all_mask_props = set(["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"]) - if mask_props is not None: - all_mask_props = all_mask_props.union(mask_props) - - dat = skimage.measure.regionprops_table( - mask, - properties=all_mask_props - ) - - name_map = { - "CellID": "label", - "X_centroid": "centroid-1", - "Y_centroid": "centroid-0", - "Area": "area", - "MajorAxisLength": "major_axis_length", - "MinorAxisLength": "minor_axis_length", - "Eccentricity": "eccentricity", - "Solidity": "solidity", - "Extent": "extent", - "Orientation": "orientation", - } - for new_name, old_name in name_map.items(): - dat[new_name] = dat[old_name] - for old_name in set(name_map.values()): - del dat[old_name] - - return dat - -def n_channels(image): - """Returns the number of channel in the input image. Supports [OME]TIFF and HDF5.""" - - image_path = Path(image) - - if image_path.suffix in ['.tiff', '.tif', '.btf']: - s = tifffile.TiffFile(image).series[0] - ndim = len(s.shape) - if ndim == 2: return 1 - elif ndim == 3: return min(s.shape) - else: raise Exception('mcquant supports only 2D/3D images.') - - elif image_path.suffix in ['.h5', '.hdf5']: - f = h5py.File(image, 'r') - dat_name = list(f.keys())[0] - return f[dat_name].shape[3] - - else: - raise Exception('mcquant currently supports [OME]TIFF and HDF5 formats only') - -def PrepareData(image,z): - """Function for preparing input for maskzstack function. Connecting function - to use with mcmicro ilastik pipeline""" - - image_path = Path(image) - - #Check to see if image tif(f) - if image_path.suffix in ['.tiff', '.tif', '.btf']: - image_loaded_z = tifffile.imread(image, key=z) - - #Check to see if image is hdf5 - elif image_path.suffix in ['.h5', '.hdf5']: - #Read the image - f = h5py.File(image,'r') - #Get the dataset name from the h5 file - dat_name = list(f.keys())[0] - #Retrieve the z^th channel - image_loaded_z = f[dat_name][0,:,:,z] - - else: - raise Exception('mcquant currently supports [OME]TIFF and HDF5 formats only') - - #Return the objects - return image_loaded_z - - -def MaskZstack(masks_loaded,image,channel_names_loaded, mask_props=None, intensity_props=["intensity_mean"]): - """This function will extract the stats for each cell mask through each channel - in the input image - - mask_loaded: dictionary containing Tiff masks that represents the cells in your image. - - z_stack: Multichannel z stack image""" - - #Get the names of the keys for the masks dictionary - mask_names = list(masks_loaded.keys()) - - #Create empty dictionary to store channel results per mask - dict_of_chan = {m_name: [] for m_name in mask_names} - #Get the z channel and the associated channel name from list of channel names - for z in range(len(channel_names_loaded)): - #Run the data Prep function - image_loaded_z = PrepareData(image,z) - - #Iterate through number of masks to extract single cell data - for nm in range(len(mask_names)): - #Use the above information to mask z stack - dict_of_chan[mask_names[nm]].append( - MaskChannel(masks_loaded[mask_names[nm]],image_loaded_z, intensity_props=intensity_props) - ) - #Print progress - print("Finished "+str(z)) - - # Column order according to histoCAT convention (Move xy position to end with spatial information) - last_cols = ( - "X_centroid", - "Y_centroid", - "column_centroid", - "row_centroid", - "Area", - "MajorAxisLength", - "MinorAxisLength", - "Eccentricity", - "Solidity", - "Extent", - "Orientation", - ) - def col_sort(x): - if x == "CellID": - return -2 - try: - return last_cols.index(x) - except ValueError: - return -1 - - #Iterate through the masks and format quantifications for each mask and property - for nm in mask_names: - mask_dict = {} - # Mean intensity is default property, stored without suffix - mask_dict.update( - zip(channel_names_loaded, [x["intensity_mean"] for x in dict_of_chan[nm]]) - ) - # All other properties are suffixed with their names - for prop_n in set(dict_of_chan[nm][0].keys()).difference(["intensity_mean"]): - mask_dict.update( - zip([f"{n}_{prop_n}" for n in channel_names_loaded], [x[prop_n] for x in dict_of_chan[nm]]) - ) - # Get the cell IDs and mask properties - mask_properties = pd.DataFrame(MaskIDs(masks_loaded[nm], mask_props=mask_props)) - mask_dict.update(mask_properties) - dict_of_chan[nm] = pd.DataFrame(mask_dict).reindex(columns=sorted(mask_dict.keys(), key=col_sort)) - - # Return the dict of dataframes for each mask - return dict_of_chan - -def ExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]): - """Function for extracting single cell information from input - path containing single-cell masks, z_stack path, and channel_names path.""" - - #Create pathlib object for output - output = Path(output) - - #Read csv channel names - channel_names_loaded = pd.read_csv(channel_names) - #Check for the presence of `marker_name` column - if 'marker_name' in channel_names_loaded: - #Get the marker_name column if more than one column (CyCIF structure) - channel_names_loaded_list = list(channel_names_loaded.marker_name) - #Consider the old one-marker-per-line plain text format - elif channel_names_loaded.shape[1] == 1: - #re-read the csv file and add column name - channel_names_loaded = pd.read_csv(channel_names, header = None) - channel_names_loaded_list = list(channel_names_loaded.iloc[:,0]) - else: - raise Exception('%s must contain the marker_name column'%channel_names) - - #Contrast against the number of markers in the image - if len(channel_names_loaded_list) != n_channels(image): - raise Exception("The number of channels in %s doesn't match the image"%channel_names) - - #Check for unique marker names -- create new list to store new names - channel_names_loaded_checked = [] - for idx,val in enumerate(channel_names_loaded_list): - #Check for unique value - if channel_names_loaded_list.count(val) > 1: - #If unique count greater than one, add suffix - channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1)) - else: - #Otherwise, leave channel name - channel_names_loaded_checked.append(val) - - #Read the masks - masks_loaded = {} - #iterate through mask paths and read images to add to dictionary object - for m in masks: - m_full_name = os.path.basename(m) - m_name = m_full_name.split('.')[0] - masks_loaded.update({str(m_name):skimage.io.imread(m,plugin='tifffile')}) - - scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked, mask_props=mask_props, intensity_props=intensity_props) - #Write the singe cell data to a csv file using the image name - - # Determine the image name by cutting off its extension - im_full_name = os.path.basename(image) - im_tokens = im_full_name.split(os.extsep) - if len(im_tokens) < 2: im_name = im_tokens[0] - elif im_tokens[-2] == "ome": im_name = os.extsep.join(im_tokens[0:-2]) - else: im_name = os.extsep.join(im_tokens[0:-1]) - - # iterate through each mask and export csv with mask name as suffix - for k,v in scdata_z.items(): - # export the csv for this mask name - scdata_z[k].to_csv( - str(Path(os.path.join(str(output), - str(im_name+"_{}"+".csv").format(k)))), - index=False - ) - - -def MultiExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]): - """Function for iterating over a list of z_stacks and output locations to - export single-cell data from image masks""" - - print("Extracting single-cell data for "+str(image)+'...') - - #Run the ExtractSingleCells function for this image - ExtractSingleCells(masks,image,channel_names,output, mask_props=mask_props, intensity_props=intensity_props) - - #Print update - im_full_name = os.path.basename(image) - im_name = im_full_name.split('.')[0] - print("Finished "+str(im_name)) diff --git a/mcquant/__init__.py b/mcquant/__init__.py index 7dd1069..23bdad8 100644 --- a/mcquant/__init__.py +++ b/mcquant/__init__.py @@ -1,4 +1,4 @@ try: - from ._version import version as __version__ + from ._version import version as __version__ # Version file? except ImportError: __version__ = "unknown" From 9b84eb14361ad98da2763573aca12bf3c3966128 Mon Sep 17 00:00:00 2001 From: Lukas Hatscher Date: Tue, 6 Aug 2024 09:40:57 +0000 Subject: [PATCH 08/14] corrected error in glcm function, forgot to use uint8 image as input --- README.md | 2 +- mcquant/SingleCellDataExtraction_expanded.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 719863a..078f2ca 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Module for single-cell data extraction given a segmentation mask and multi-chann * `--intensity_props intensity_sum` : Will calculate the sum of intensity values per labelled object in the mask. This can be useful if you want to count RNA molecules from FISH based images for example. * `--intensity_props intensity_std` : Will calculate the standard deviation of intensity values per labeled object in the mask. - Further available are GLCM-derived gracoprops (see https://scikit-image.org/docs/stable/api/skimage.feature.html). Currently these are set to angle 0 and distane 1 pixel: + Further available are GLCM-derived gracoprops (see https://scikit-image.org/docs/stable/api/skimage.feature.html). Currently these are set to angle 0 and distance 1 pixel: * `--intensity_props contrast` : Will calculate the glcm derived symmetric and normalized contrast of intensity values per labeled object in the mask. * `--intensity_props dissimilarity` : Will calculate the glcm derived symmetric and normalized dissimilarity of intensity values per labeled object in the mask. * `--intensity_props homogeneity` : Will calculate the glcm derived symmetric and normalized homogeneity of intensity values per labeled object in the mask. diff --git a/mcquant/SingleCellDataExtraction_expanded.py b/mcquant/SingleCellDataExtraction_expanded.py index e642b54..1b41573 100644 --- a/mcquant/SingleCellDataExtraction_expanded.py +++ b/mcquant/SingleCellDataExtraction_expanded.py @@ -54,7 +54,7 @@ def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]) for region in skimage.measure.regionprops(mask_loaded, image_loaded_z): label = region.label image_uint8 = ((region.intensity_image - np.min(region.intensity_image)) * (255 / (np.max(region.intensity_image) - np.min(region.intensity_image)))).astype(np.uint8) - glcm = graycomatrix(region.intensity_image, [1], [0], symmetric=True, normed=True) + glcm = graycomatrix(image_uint8, [1], [0], symmetric=True, normed=True) glcm_props = {} for prop in glcm_features_set.intersection(intensity_props): glcm_props[prop] = graycoprops(glcm, prop)[0, 0] From 33f59c0c359c316d1718094b0387aa98ca393554 Mon Sep 17 00:00:00 2001 From: Lukas Hatscher Date: Wed, 7 Aug 2024 07:52:05 +0000 Subject: [PATCH 09/14] Added CLI option to set angle and distance for GLCM calculation, adjusted readme accordinglt --- README.md | 2 ++ mcquant/ParseInput.py | 11 +++++++++++ ..._expanded.py => SingleCellDataExtraction.py} | 17 ++++++++++++----- 3 files changed, 25 insertions(+), 5 deletions(-) rename mcquant/{SingleCellDataExtraction_expanded.py => SingleCellDataExtraction.py} (92%) diff --git a/README.md b/README.md index 078f2ca..7401b17 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,8 @@ Module for single-cell data extraction given a segmentation mask and multi-chann * `--intensity_props energy` : Will calculate the glcm derived symmetric and normalized energy of intensity values per labeled object in the mask. * `--intensity_props correlation` : Will calculate the glcm derived symmetric and normalized correlation of intensity values per labeled object in the mask. * `--intensity_props ASM` : Will calculate the glcm derived symmetric and normalized ASM of intensity values per labeled object in the mask. +* `--glcm_angle` Angle in radians used for calculating the GLCM per label. Default is 0 radians. +* `--glcm_distance` Distance in pixels used for calculating the GLCM per label. Default is 1 pixel. # Run script diff --git a/mcquant/ParseInput.py b/mcquant/ParseInput.py index 47dfa8b..87f63d5 100644 --- a/mcquant/ParseInput.py +++ b/mcquant/ParseInput.py @@ -43,6 +43,14 @@ def ParseInputDataExtract(): See https://scikit-image.org/docs/stable/api/skimage.feature.html """ ) + parser.add_argument( + '--glcm_angle', type=float, default=0, + help="Angle in radians for GLCM calculation. Default is 0 radians. Currently limited to 1 angle" + ) + parser.add_argument( + '--glcm_distance', type=int, default=1, + help="Distance in pixels for GLCM calculation. Default is 1 pixel." + ) #parser.add_argument('--suffix') parser.add_argument('--version', action='version', version=f'mcquant {__version__}') args = parser.parse_args() @@ -51,6 +59,9 @@ def ParseInputDataExtract(): 'channel_names': args.channel_names,'output':args.output, 'intensity_props': set(args.intensity_props if args.intensity_props is not None else []).union(["intensity_mean"]), 'mask_props': args.mask_props, + 'glcm_angle': args.glcm_angle, + 'glcm_distance': args.glcm_distance, + } #Print the dictionary object print(dict) diff --git a/mcquant/SingleCellDataExtraction_expanded.py b/mcquant/SingleCellDataExtraction.py similarity index 92% rename from mcquant/SingleCellDataExtraction_expanded.py rename to mcquant/SingleCellDataExtraction.py index 1b41573..7b1a2b9 100644 --- a/mcquant/SingleCellDataExtraction_expanded.py +++ b/mcquant/SingleCellDataExtraction.py @@ -23,7 +23,7 @@ def intensity_median(mask, intensity): def intensity_sum(mask, intensity): return np.sum(intensity[mask]) -## FUnction to calculate standard deviation of intensity values +## Function to calculate standard deviation of intensity values def intensity_std(mask, intensity): return np.std(intensity[mask]) @@ -36,7 +36,7 @@ def gini_index(mask, intensity): return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n -def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]): +def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"], glcm_angle=0, glcm_distance=1): """Function for quantifying a single channel image Returns a table with CellID according to the mask and the mean pixel intensity @@ -47,15 +47,21 @@ def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]) # Otherwise look for them in this module extra_props = set(intensity_props).difference(standard_props) + # All possible scikit-image gracoprops features from glcm glcm_features_set = {"contrast", "dissimilarity", "homogeneity", "energy", "correlation", "ASM"} glcm_features = {} + # Function to calculate the glcm and associated graycoprops per label only if user inputs a glcm metric into --intensity_props if glcm_features_set.intersection(intensity_props): for region in skimage.measure.regionprops(mask_loaded, image_loaded_z): + # Get the label/cell label = region.label + # Rescale the image to uint8, which is needed for glcm calculation if level attribute is not set. image_uint8 = ((region.intensity_image - np.min(region.intensity_image)) * (255 / (np.max(region.intensity_image) - np.min(region.intensity_image)))).astype(np.uint8) - glcm = graycomatrix(image_uint8, [1], [0], symmetric=True, normed=True) + # Calculate the glcm once per label/cell + glcm = graycomatrix(image_uint8, [1], [0], [glcm_distance], [glcm_angle],symmetric=True, normed=True) glcm_props = {} + # Calculate the user-specified feature(s) per label/cell for prop in glcm_features_set.intersection(intensity_props): glcm_props[prop] = graycoprops(glcm, prop)[0, 0] glcm_features[label] = glcm_props @@ -66,6 +72,7 @@ def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]) extra_properties = [globals()[n] for n in extra_props if n not in glcm_features_set] ) + # Add the glcm dict to the regionproperties if glcm_features: for label, features in glcm_features.items(): for prop, value in features.items(): @@ -112,7 +119,7 @@ def n_channels(image): image_path = Path(image) - if image_path.suffix in ['.tiff', '.tif', '.btf']: + if image_path.suffix in ['.tiff', '.tif', '.btf', 'qptiff']: s = tifffile.TiffFile(image).series[0] ndim = len(s.shape) if ndim == 2: return 1 @@ -134,7 +141,7 @@ def PrepareData(image,z): image_path = Path(image) #Check to see if image tif(f) - if image_path.suffix in ['.tiff', '.tif', '.btf']: + if image_path.suffix in ['.tiff', '.tif', '.btf', 'qptiff']: image_loaded_z = tifffile.imread(image, key=z) #Check to see if image is hdf5 From c457c59dd09319aa8b3840bd879198264d7a93c4 Mon Sep 17 00:00:00 2001 From: Lukas Hatscher Date: Wed, 7 Aug 2024 08:08:10 +0000 Subject: [PATCH 10/14] adjusted parameters in remaining functions for glcm angle and glcm_distance --- mcquant/SingleCellDataExtraction.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mcquant/SingleCellDataExtraction.py b/mcquant/SingleCellDataExtraction.py index 7b1a2b9..9c71093 100644 --- a/mcquant/SingleCellDataExtraction.py +++ b/mcquant/SingleCellDataExtraction.py @@ -59,7 +59,7 @@ def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"], # Rescale the image to uint8, which is needed for glcm calculation if level attribute is not set. image_uint8 = ((region.intensity_image - np.min(region.intensity_image)) * (255 / (np.max(region.intensity_image) - np.min(region.intensity_image)))).astype(np.uint8) # Calculate the glcm once per label/cell - glcm = graycomatrix(image_uint8, [1], [0], [glcm_distance], [glcm_angle],symmetric=True, normed=True) + glcm = graycomatrix(image_uint8, distances = [glcm_distance], angles = [glcm_angle], symmetric = True, normed = True) glcm_props = {} # Calculate the user-specified feature(s) per label/cell for prop in glcm_features_set.intersection(intensity_props): @@ -160,7 +160,7 @@ def PrepareData(image,z): return image_loaded_z -def MaskZstack(masks_loaded,image,channel_names_loaded, mask_props=None, intensity_props=["intensity_mean"]): +def MaskZstack(masks_loaded,image,channel_names_loaded, mask_props=None, intensity_props=["intensity_mean"], glcm_angle = 0, glcm_distance = 1): """This function will extract the stats for each cell mask through each channel in the input image @@ -229,7 +229,7 @@ def col_sort(x): # Return the dict of dataframes for each mask return dict_of_chan -def ExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]): +def ExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"], glcm_angle = 0, glcm_distance = 1): """Function for extracting single cell information from input path containing single-cell masks, z_stack path, and channel_names path.""" @@ -293,7 +293,7 @@ def ExtractSingleCells(masks,image,channel_names,output, mask_props=None, intens ) -def MultiExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]): +def MultiExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"], glcm_angle = 0, glcm_distance = 1): """Function for iterating over a list of z_stacks and output locations to export single-cell data from image masks""" From 009b1210ef2bc6de38453ce30aa4e05fe507bd75 Mon Sep 17 00:00:00 2001 From: Aroj Hada Date: Wed, 7 Aug 2024 10:18:02 +0200 Subject: [PATCH 11/14] Update README.md --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7401b17..1a11350 100644 --- a/README.md +++ b/README.md @@ -37,8 +37,10 @@ Module for single-cell data extraction given a segmentation mask and multi-chann * `--intensity_props energy` : Will calculate the glcm derived symmetric and normalized energy of intensity values per labeled object in the mask. * `--intensity_props correlation` : Will calculate the glcm derived symmetric and normalized correlation of intensity values per labeled object in the mask. * `--intensity_props ASM` : Will calculate the glcm derived symmetric and normalized ASM of intensity values per labeled object in the mask. -* `--glcm_angle` Angle in radians used for calculating the GLCM per label. Default is 0 radians. -* `--glcm_distance` Distance in pixels used for calculating the GLCM per label. Default is 1 pixel. + + Parameters for GLCM calculation + * `--glcm_angle` Angle in radians used for calculating the GLCM per label. Default is 0 radians. + * `--glcm_distance` Distance in pixels used for calculating the GLCM per label. Default is 1 pixel. # Run script @@ -49,4 +51,4 @@ Denis Schapiro (https://github.com/DenisSch) Joshua Hess (https://github.com/JoshuaHess12) -Jeremy Muhlich (https://github.com/jmuhlich) \ No newline at end of file +Jeremy Muhlich (https://github.com/jmuhlich) From 5093268da06ba62a5cbb3b8008608b26317ceae6 Mon Sep 17 00:00:00 2001 From: Aroj Hada Date: Wed, 7 Aug 2024 10:23:13 +0200 Subject: [PATCH 12/14] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1a11350..2a32ca6 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Module for single-cell data extraction given a segmentation mask and multi-chann * `--intensity_props intensity_sum` : Will calculate the sum of intensity values per labelled object in the mask. This can be useful if you want to count RNA molecules from FISH based images for example. * `--intensity_props intensity_std` : Will calculate the standard deviation of intensity values per labeled object in the mask. - Further available are GLCM-derived gracoprops (see https://scikit-image.org/docs/stable/api/skimage.feature.html). Currently these are set to angle 0 and distance 1 pixel: + Further available are GLCM-derived graycoprops (see https://scikit-image.org/docs/stable/api/skimage.feature.html). Currently these are set to angle 0 and distance 1 pixel: * `--intensity_props contrast` : Will calculate the glcm derived symmetric and normalized contrast of intensity values per labeled object in the mask. * `--intensity_props dissimilarity` : Will calculate the glcm derived symmetric and normalized dissimilarity of intensity values per labeled object in the mask. * `--intensity_props homogeneity` : Will calculate the glcm derived symmetric and normalized homogeneity of intensity values per labeled object in the mask. From 75a0c466bde4639bc486bc9fed1499dd9014c0f2 Mon Sep 17 00:00:00 2001 From: Lukas Hatscher Date: Wed, 7 Aug 2024 08:29:18 +0000 Subject: [PATCH 13/14] changed CLI inputs to be correctly passed down from funciton to function --- mcquant/SingleCellDataExtraction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mcquant/SingleCellDataExtraction.py b/mcquant/SingleCellDataExtraction.py index 9c71093..1541831 100644 --- a/mcquant/SingleCellDataExtraction.py +++ b/mcquant/SingleCellDataExtraction.py @@ -182,7 +182,7 @@ def MaskZstack(masks_loaded,image,channel_names_loaded, mask_props=None, intensi for nm in range(len(mask_names)): #Use the above information to mask z stack dict_of_chan[mask_names[nm]].append( - MaskChannel(masks_loaded[mask_names[nm]],image_loaded_z, intensity_props=intensity_props) + MaskChannel(masks_loaded[mask_names[nm]],image_loaded_z, intensity_props=intensity_props, glcm_angle=glcm_angle, glcm_distance=glcm_distance) ) #Print progress print("Finished "+str(z)) @@ -273,7 +273,7 @@ def ExtractSingleCells(masks,image,channel_names,output, mask_props=None, intens m_name = m_full_name.split('.')[0] masks_loaded.update({str(m_name):skimage.io.imread(m,plugin='tifffile')}) - scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked, mask_props=mask_props, intensity_props=intensity_props) + scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked, mask_props=mask_props, intensity_props=intensity_props, glcm_angle=glcm_angle, glcm_distance=glcm_distance) #Write the singe cell data to a csv file using the image name # Determine the image name by cutting off its extension @@ -300,7 +300,7 @@ def MultiExtractSingleCells(masks,image,channel_names,output, mask_props=None, i print("Extracting single-cell data for "+str(image)+'...') #Run the ExtractSingleCells function for this image - ExtractSingleCells(masks,image,channel_names,output, mask_props=mask_props, intensity_props=intensity_props) + ExtractSingleCells(masks,image,channel_names,output, mask_props=mask_props, intensity_props=intensity_props, glcm_angle=glcm_angle, glcm_distance=glcm_distance) #Print update im_full_name = os.path.basename(image) From 81e58e36e17046e691574b944d928277c8dc725c Mon Sep 17 00:00:00 2001 From: Lukas Hatscher Date: Wed, 7 Aug 2024 11:27:18 +0000 Subject: [PATCH 14/14] Adjusted readme and ParserInput to include information on angles and distance parameters --- README.md | 2 +- mcquant/ParseInput.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2a32ca6..70b4da6 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Module for single-cell data extraction given a segmentation mask and multi-chann * `--intensity_props intensity_sum` : Will calculate the sum of intensity values per labelled object in the mask. This can be useful if you want to count RNA molecules from FISH based images for example. * `--intensity_props intensity_std` : Will calculate the standard deviation of intensity values per labeled object in the mask. - Further available are GLCM-derived graycoprops (see https://scikit-image.org/docs/stable/api/skimage.feature.html). Currently these are set to angle 0 and distance 1 pixel: + Further available are GLCM-derived graycoprops (see https://scikit-image.org/docs/stable/api/skimage.feature.html). Currently these metrices can only be calculated for one angle and distance at a time. * `--intensity_props contrast` : Will calculate the glcm derived symmetric and normalized contrast of intensity values per labeled object in the mask. * `--intensity_props dissimilarity` : Will calculate the glcm derived symmetric and normalized dissimilarity of intensity values per labeled object in the mask. * `--intensity_props homogeneity` : Will calculate the glcm derived symmetric and normalized homogeneity of intensity values per labeled object in the mask. diff --git a/mcquant/ParseInput.py b/mcquant/ParseInput.py index 87f63d5..9318bc5 100644 --- a/mcquant/ParseInput.py +++ b/mcquant/ParseInput.py @@ -38,8 +38,9 @@ def ParseInputDataExtract(): Will be calculated for every marker See https://en.wikipedia.org/wiki/Gini_coefficient contrast, dissimilarity, homogeneity, energy, correlation, ASM: - glcm/graycoprops features from scikit-image features. The distance is currently set to 1 pixel and angle to 0 rad. - will be calculated for every marker + glcm/graycoprops features from scikit-image features. The default distance is set to 1 pixel and the default angle is set to 0 rad. + Both parameters can be controlled via CLI inputs. However, both parameters are limited to 1 input each. + Will be calculated for every marker See https://scikit-image.org/docs/stable/api/skimage.feature.html """ )