Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 173 additions & 0 deletions bin/depth_group_comparison.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#!/usr/bin/env python

"""
Plot depth of all genes and specific genes per sample groups.

This script plots the average depth at all consensus exons across all genes and for specific genes of interest, either in the panel or in a custom subset of genes, stratified by sample groups defined in the metadata.
The output is stored in a pdf file.
"""

import click
import pandas as pd
from utils_plot import plots_general_config
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
import ast
import re

mpl.rcParams.update({
'axes.titlesize': plots_general_config["title_fontsize"],
'axes.labelsize': plots_general_config["xylabel_fontsize"],
'xtick.labelsize': plots_general_config["xyticks_fontsize"],
'ytick.labelsize': plots_general_config["xyticks_fontsize"],
'figure.titlesize': plots_general_config["title_fontsize"],
})

separator2character = {
'tab' : '\t',
'comma' : ','
}


def plot_depth_per_group(df, group_col, data_type, pdf):
'''
Function to plot depth within a group of samples in all genes or a specific subset of genes
'''

col_name = group_col[0] if isinstance(group_col, list) else group_col

# Get the number of unique categories to plot
num_categories = df[col_name].nunique() + 2
plt.figure(figsize=(num_categories, 4))

if data_type == 'ALL_GENES':
plot_df = df[df['GENE'] == 'ALL_GENES']
title = f"Average Depth for ALL_GENES in {col_name} group"

else: # data_type is the Gene Name
plot_df = df[df['GENE'] == data_type]
title = f"Average Depth for {data_type} in {col_name} group"

if plot_df.empty:
print(f"No data available for {data_type} in {col_name} group. Skipping plot.")
return

sns.boxplot(data=plot_df, x=col_name, y="MEAN_GENE_DEPTH", hue=col_name, showfliers=False, showmeans=False, legend=False)
sns.stripplot(data=plot_df, x=col_name, y="MEAN_GENE_DEPTH", color='grey', alpha=0.5, size=4, legend=False)

plt.title(title, fontsize=plots_general_config["title_fontsize"])
plt.xlabel('', fontsize=plots_general_config["xylabel_fontsize"])
plt.ylabel(f"Average Cons Exons Depth", fontsize=plots_general_config["xylabel_fontsize"])
plt.yticks( fontsize=plots_general_config["yticks_fontsize"])
plt.xticks(fontsize=plots_general_config["xticks_fontsize"])
plt.tick_params(axis='x', rotation=90)
plt.tight_layout()
pdf.savefig()
plt.close()
plt.show()

return


@click.command()
@click.option('--table-filename', required=True, type=click.Path(exists=True), help='Input features table file')
@click.option('--depth-gene-sample', required=True, type=click.Path(exists=True), help='Input depth file per gene per sample')
@click.option('--depth-sample', required=True, type=click.Path(exists=True), help='Input depth file per sample')
@click.option('--separator', required=True, type=click.Choice(['tab', 'comma']), help='Separator used in features table: tab or comma')
@click.option('--unique-identifier', default=None, type=str, help='Unique identifier column name')
@click.option('--groups', required=True, type=str, help='List of columns with grouping information')
@click.option('--custom-genes', required=False, type=str, help='Comma separated list of custom genes')


def main(table_filename, depth_gene_sample, depth_sample, unique_identifier, separator, groups, custom_genes):

sep_char = separator2character[separator]
uniq_name = unique_identifier if unique_identifier else "sample"

# Read tables
features_table = pd.read_table(table_filename, header=0, sep=sep_char)
depth_genes_samples = pd.read_table(depth_gene_sample, header=0, sep="\t")
depth_per_sample = pd.read_table(depth_sample, header=0, sep="\t")

# Process panel genes
panel_genes = sorted(set(depth_genes_samples['GENE'].unique()))
if custom_genes:
print(f'Custom genes provided, plotting custom genes only: {custom_genes}')
custom_gene_list = [g.strip() for g in custom_genes.split(",")]
panel_genes = sorted(set(custom_gene_list) & set(panel_genes))

else:
print(f'No custom genes provided, plotting all genes in the panel: {panel_genes}')


# Process depth per sample to add the 'ALL_GENES' depth value per sample
print('Processing per sample depth table to add the ALL_GENES depth column: ')
depth_per_sample['GENE'] = 'ALL_GENES'
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@FerriolCalvet if user does not define the genes in the panel as custom genes, with the way we handle it now to calculate ALL_GENES depth, it will display all depths from both on target and off target genes in the panel right? I mean the default would not be the panel genes but including off targets...

depth_per_sample = depth_per_sample.rename(columns={'avg_depth_sample': 'MEAN_GENE_DEPTH'})

print('Depth per sample table after adding ALL_GENES value in GENES column:')
print(depth_per_sample.head())

depth_genes_samples = pd.concat([depth_genes_samples, depth_per_sample], ignore_index=True)
print('Added ALL_GENES depth values to depth genes samples table:')
print('Length of table:', len(depth_genes_samples))
print(depth_genes_samples.head())

# Merge data with metadata table to get the group information for each sample
merged_depth_df = pd.merge(features_table, depth_genes_samples, how='left', left_on=uniq_name, right_on='SAMPLE_ID')
print('Merged depth and metadata table:')
print(merged_depth_df.head())
print('Length of merged table:', len(merged_depth_df))

# Process groups
# First clean the string (adds quotes if the shell stripped them)
cleaned = re.sub(r'(?<!["\'])\b([a-zA-Z_][a-zA-Z0-9_]*)\b(?!["\'])', r'"\1"', groups)

# Then parse into a list of lists
raw_groups = ast.literal_eval(cleaned) if groups else []

# Finally flatten and get unique items
# This nested loop goes through each sub-list and each item within it
groups_of_interest = list(set(item for sublist in raw_groups for item in sublist))


print(f"Processing data for the groups of interest: {groups_of_interest}")

# Handle groups so each group has its own plot in all and individual genes and stored in the same pdf file per group
for group in groups_of_interest:
output_name = f"{group}.plot_depth_per_group.pdf"

with PdfPages(output_name) as pdf:
print(f"Processing {group} group, type: {type(group)}")
group_table = merged_depth_df[[uniq_name, group, 'GENE', 'MEAN_GENE_DEPTH']]
print(group_table.head())
print('Length of the processed table', len(group_table))

# Plot depth of all samples for each group
plot_depth_per_group(group_table, group, 'ALL_GENES', pdf)

# Plot depths per gene (defined cutom genes or genes in the panel) for each group
gene_table = group_table[group_table['GENE'].isin(panel_genes)]
for gene in panel_genes:
gene_table = group_table[group_table['GENE'] == gene]
print('Length of gene data for gene', gene, ':', len(gene_table))
plot_depth_per_group(gene_table, group, gene, pdf)

print(f"Plots saved as {output_name}")

if __name__ == "__main__":
main()

'''
Example usage:
python depth_group_comparison.py \
--table-filename metadata_table_all_with_bacterial_signatures.tsv \
--depth-table all_samples.exons_cons.depth_per_gene_per_sample.tsv \
--depth-sample all_samples.exons_cons.depth_per_sample.tsv \
--separator tab \
--unique-identifier Sample_Name \
--groups "[ ["Sample_Group"], ["cancer"], ["Age_onset"], ["Cancer_age_group"] , ["Bacterial_Signatures_identified"]]" \
--custom-genes APC,BRAF,FBXW7,KRAS,PIK3CA,SMAD4,TP53' \
'''
15 changes: 15 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,21 @@ process {
]
}

withName: "ANALYZEDEPTHSGROUPS" {
// these params.features are defined in nextflow.config
ext.unique_identifier = params.features_unique_identifier
ext.features_groups = params.features_groups_list
ext.separator = params.features_table_separator
ext.custom_genes = params.features_genes_list
// define the list of custom genes here
// you will have to add an extra parameters to the pipeline (nextflow.config) and then handle it here
publishDir = [
path: { "${params.outdir}/plots/depths_summary/plots_per_group" },
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now added as subdirectory specific for groups

mode: params.publish_dir_mode,
pattern: '**{pdf}',
]
}

withName: GROUPGENES {
ext.custom = params.custom_groups
ext.hotspots = params.create_subgenic_regions
Expand Down
54 changes: 54 additions & 0 deletions modules/local/analyzedepths/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
process ANALYZE_DEPTHS_GROUPS {

tag "groups"
label 'process_low'

label 'deepcsa_core'

input:
path(features_table)
// note that these input depth files are generated in another step and defined in /deepCSA/subworkflows/local/plotdepths/main.nf
tuple val(meta) , path(average_depth_gene_sample) // needs to be added as a tupple since in PLOT_DEPTHS module (/modules/plot/depths_summary/main.nf) the output is set up as a tupple to"track" to which metadata belongs to this file
tuple val(meta) , path(average_depth_sample)



output:
// the main outputs will be the PDFs
path("*.plot_depth_per_group.pdf") , emit: plots_per_gene_per_group

script:

def separator = task.ext.separator ?: "comma"
def custom_groups = task.ext.features_groups ? "--groups \"${task.ext.features_groups}\" " : ""
def custom_genes = task.ext.features_genes ? "--custom-genes \"${task.ext.features_genes}\" " : ""
def unique_identifier = task.ext.unique_identifier ? "--unique-identifier ${task.ext.unique_identifier}" : ""

// depth_group_comparison.py is in bin/ and has execution permissions add shebang
// ${average_depth_gene_sample} comes from subworkflows/local/plotdepths/main.nf
"""

depth_group_comparison.py \\
--table-filename ${features_table} \\
--depth-table ${average_depth_gene_sample} \\
--separator ${separator} \\
${unique_identifier} \\
${custom_groups} \\
${custom_genes} \\

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
END_VERSIONS
"""

stub:
"""
touch depth_group_comparison.plot_depth_per_group.pdf

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
END_VERSIONS
"""
}
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ params {
features_table_separator = 'comma'
features_unique_identifier = null
features_groups_list = null
features_genes_list = null

custom_groups = false
custom_groups_file = null
Expand Down
7 changes: 7 additions & 0 deletions workflows/deepcsa.nf
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ include { TABLE_2_GROUP as TABLE2GROUP } from '../m
include { ANNOTATE_DEPTHS as ANNOTATEDEPTHS } from '../modules/local/annotatedepth/main'
include { DOWNSAMPLE_DEPTHS as DOWNSAMPLEDEPTHS } from '../modules/local/downsample/depths/main'

include { ANALYZE_DEPTHS_GROUPS as ANALYZEDEPTHSGROUPS } from '../modules/local/analyzedepths/main'

include { SELECT_MUTDENSITIES as SYNMUTDENSITY } from '../modules/local/select_mutdensity/main'
include { SELECT_MUTDENSITIES as SYNMUTREADSDENSITY } from '../modules/local/select_mutdensity/main'

Expand Down Expand Up @@ -227,6 +229,11 @@ workflow DEEPCSA{
}
PLOTDEPTHSEXONSCONS(ANNOTATEDEPTHS.out.all_samples_depths, CREATEPANELS.out.exons_consensus_bed, CREATEPANELS.out.exons_consensus_panel)

// ANALYZEDEPTHSGROUPS should run only when user defines a group list
if (params.features_groups_list) {
ANALYZEDEPTHSGROUPS(features_table, PLOTDEPTHSEXONSCONS.out.average_depth_gene_sample)
}

// Enrich regions in consensus panels
ENRICHPANELS(MUT_PREPROCESSING.out.mutations_all_samples,
ANNOTATEDEPTHS.out.all_samples_depths,
Expand Down