From ec55effa24dcbd17fc98c30abed6573b6f1ea89a Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Tue, 24 Feb 2026 15:18:28 +0100 Subject: [PATCH 01/13] add temporary changes - branch does NOT work --- conf/modules.config | 15 ++++++++++ modules/local/analyzedepths/main.nf | 46 +++++++++++++++++++++++++++++ workflows/deepcsa.nf | 6 ++++ 3 files changed, 67 insertions(+) create mode 100644 modules/local/analyzedepths/main.nf diff --git a/conf/modules.config b/conf/modules.config index 327648f0..f938a0ce 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -336,6 +336,21 @@ process { ] } + withName: "ANALYZEDEPTHSGROUPS" { + ext.unique_identifier = params.features_unique_identifier + ext.feature_groups = params.features_groups_list + ext.separator = params.features_table_separator + + // define the list of custom genes here + // you will have to add an extra parameters to the pipeline (nextflow.config) and then handle it here + publishDir = [ + path: { "${params.outdir}/" }, // TODO define the desired output path + mode: params.publish_dir_mode, + pattern: '**{json}', // TODO define the format of the file that will be outputted + ] + } + + withName: GROUPGENES { ext.custom = params.custom_groups ext.hotspots = params.create_subgenic_regions diff --git a/modules/local/analyzedepths/main.nf b/modules/local/analyzedepths/main.nf new file mode 100644 index 00000000..64a8dee7 --- /dev/null +++ b/modules/local/analyzedepths/main.nf @@ -0,0 +1,46 @@ +process ANALYZE_DEPTHS_GROUPS { + + tag "groups" + label 'process_low' + + label 'deepcsa_core' + + input: + path(features_table) + // add another path with the depths per gene per sample + // optionally add another one with the depth per sample + + output: + // the main outputs will be the PDFs + // path("samples.json") , emit: json_samples + path "versions.yml" , topic: versions + + script: + def separator = task.ext.separator ?: "comma" + def custom_groups = task.ext.feature_groups ? "--groups \"${task.ext.feature_groups}\" " : "" + def unique_identifier = task.ext.unique_identifier ? "--unique-identifier ${task.ext.unique_identifier}" : "" + + // .py should be in bin/ and you should make sure it can be executed (check permissions of the file and add shebang if needed) + """ + .py \\ + --table-filename ${features_table} \\ + --separator ${separator} \\ + ${unique_identifier} \\ + ${custom_groups} + // TODO add the missing parameters + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + touch samples.json groups.json all_groups.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index 7ad9dfb7..ab326104 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -101,6 +101,8 @@ include { TABLE_2_GROUP as TABLE2GROUP } from '../m include { ANNOTATE_DEPTHS as ANNOTATEDEPTHS } from '../modules/local/annotatedepth/main' include { DOWNSAMPLE_DEPTHS as DOWNSAMPLEDEPTHS } from '../modules/local/downsample/depths/main' +include { ANALYZE_DEPTHS_GROUPS as ANALYZEDEPTHSGROUPS } from '../modules/local/analyzedepths/main' + include { SELECT_MUTDENSITIES as SYNMUTDENSITY } from '../modules/local/select_mutdensity/main' include { SELECT_MUTDENSITIES as SYNMUTREADSDENSITY } from '../modules/local/select_mutdensity/main' @@ -227,6 +229,10 @@ workflow DEEPCSA{ } PLOTDEPTHSEXONSCONS(ANNOTATEDEPTHS.out.all_samples_depths, CREATEPANELS.out.exons_consensus_bed, CREATEPANELS.out.exons_consensus_panel) + // define it as a module very similar to the table2group one + ANALYZEDEPTHSGROUPS(features_table, PLOTDEPTHSEXONSCONS.out.average_depth_gene_sample) + + // Enrich regions in consensus panels ENRICHPANELS(MUT_PREPROCESSING.out.mutations_all_samples, ANNOTATEDEPTHS.out.all_samples_depths, From 6a1a5e189ce5754bfb29ae0c131bfcbdb13c8e9f Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Tue, 24 Feb 2026 15:28:05 +0100 Subject: [PATCH 02/13] minor assist --- modules/local/analyzedepths/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/local/analyzedepths/main.nf b/modules/local/analyzedepths/main.nf index 64a8dee7..448cdb0f 100644 --- a/modules/local/analyzedepths/main.nf +++ b/modules/local/analyzedepths/main.nf @@ -8,7 +8,8 @@ process ANALYZE_DEPTHS_GROUPS { input: path(features_table) // add another path with the depths per gene per sample - // optionally add another one with the depth per sample + // (take into account that maybe the format is not a single path but it comes with a name at the beginning (as a tuple) explore other modules for examples of this) + // optionally add another one with the depth per sample output: // the main outputs will be the PDFs From 9bc9e7c6e41856c86e515d574c97fc60e88c91bc Mon Sep 17 00:00:00 2001 From: efiguerola Date: Wed, 25 Feb 2026 09:56:02 +0100 Subject: [PATCH 03/13] added script to plot depths per group and modified modules, config and main nextflow files to integrate in the pipeline --- bin/depth_group_comparison.py | 144 ++++++++++++++++++++++++++++ conf/modules.config | 14 +-- modules/local/analyzedepths/main.nf | 40 ++++---- nextflow.config | 1 + workflows/deepcsa.nf | 7 ++ 5 files changed, 184 insertions(+), 22 deletions(-) create mode 100755 bin/depth_group_comparison.py diff --git a/bin/depth_group_comparison.py b/bin/depth_group_comparison.py new file mode 100755 index 00000000..2964568e --- /dev/null +++ b/bin/depth_group_comparison.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python + +""" +Plot depth of all genes and specific genes per sample groups. + +This script plots the average depth at all consensus exons across all genes and for specific genes of interest, either in the panel or in a custom subset of genes, stratified by sample groups defined in the metadata. +The output is stored in a pdf file. +""" + +import click +import pandas as pd +from utils_plot import plots_general_config +import matplotlib.pyplot as plt +import matplotlib as mpl +from matplotlib.backends.backend_pdf import PdfPages +import seaborn as sns + +mpl.rcParams.update({ + 'axes.titlesize': plots_general_config["title_fontsize"], + 'axes.labelsize': plots_general_config["xylabel_fontsize"], + 'xtick.labelsize': plots_general_config["xyticks_fontsize"], + 'ytick.labelsize': plots_general_config["xyticks_fontsize"], + 'figure.titlesize': plots_general_config["title_fontsize"], +}) + +separator2character = { + 'tab' : '\t', + 'comma' : ',' +} + + +def plot_depth_per_group(df, group_col, data_type, pdf): + ''' + Function to plot depth within a group of samples in all genes or a specific subset of genes + ''' + + col_name = group_col[0] if isinstance(group_col, list) else group_col + + # Get the number of unique categories to plot + num_categories = df[col_name].nunique() + 1 + plt.figure(figsize=(num_categories, 4)) + + ax = sns.boxplot(data=df, x=col_name, y="MEAN_GENE_DEPTH", hue=col_name, showfliers=False, showmeans=False,legend=False) + ax = sns.stripplot(data=df, x=col_name, y="MEAN_GENE_DEPTH", color='grey', alpha=0.5, size=4, legend=False) + + if data_type == 'all_genes': + plt.title(f"Average Depth for {data_type} in {col_name} group", fontsize=plots_general_config["title_fontsize"]) + elif data_type == 'gene': + gene = df['GENE'].iloc[0] + plt.title(f"Average Depth for {gene} in {col_name} group", fontsize=plots_general_config["title_fontsize"]) + else: + print(f"Unknown data type: {data_type}. Title will not be set.") + + plt.xlabel('', fontsize=plots_general_config["xylabel_fontsize"]) + plt.ylabel(f"Average Cons Exons Depth", fontsize=plots_general_config["xylabel_fontsize"]) + plt.yticks( fontsize=plots_general_config["yticks_fontsize"]) + plt.xticks(fontsize=plots_general_config["xticks_fontsize"]) + plt.tick_params(axis='x', rotation=90) + plt.tight_layout() + pdf.savefig() + plt.close() + plt.show() + + return + + +@click.command() +@click.option('--table-filename', required=True, type=click.Path(exists=True), help='Input features table file') +@click.option('--depth-table', required=True, type=click.Path(exists=True), help='Input depth table file') +@click.option('--separator', required=True, type=click.Choice(['tab', 'comma']), help='Separator: tab or comma') +@click.option('--unique-identifier', default=None, type=str, help='Unique identifier column name') +@click.option('--groups', default=None, type=str, help='List of columns with grouping information') +@click.option('--custom-genes', required=False, type=str, help='Comma separated list of custom genes') +@click.option('--output_prefix', type=str, required=True, help='Prefix for output files') + + +def main(table_filename, depth_table, unique_identifier, separator, groups, custom_genes, output_prefix): + + sep_char = separator2character[separator] + + # Read tables + features_table = pd.read_table(table_filename, header=0, sep=sep_char) + depth_table = pd.read_table(depth_table, header=0, sep=sep_char) + + # Process panel genes + panel_genes = sorted(set(depth_table['GENE'].unique())) + if custom_genes: + print(f'Custom genes provided, plotting custom genes only: {custom_genes}') + custom_gene_list = [g.strip() for g in custom_genes.split(",")] + panel_genes = sorted(set(custom_gene_list) & set(panel_genes)) + + else: + print(f'No custom genes provided, plotting all genes in the panel: {panel_genes}') + + output_pdf_path = f"{output_prefix}.plot_depth_per_group.pdf" + + # groups may contain lists of lists, but all formatted into a string + groups_of_interest_init = [group.strip().strip(",").split(",") for group in groups.replace("[", ";;;").replace("]", "").split(";;;")] if groups else [] + + groups_of_interest = [] + for comparison in groups_of_interest_init: + comparison_group_clean = [item.strip() for item in comparison] + comparison_group = [item for item in comparison_group_clean if item != ''] + if len(comparison_group) > 0: + groups_of_interest.append(comparison_group) + + uniq_name = unique_identifier if unique_identifier else "sample" + + print(f"Processing data for the groups of interest: {groups_of_interest}") + + with PdfPages(output_pdf_path) as pdf: + for group in groups_of_interest: + print(f"Processing {group} group, type: {type(group)}") + metadata_group_df = features_table[[uniq_name, str(group[0])]] + merged_depth_df = pd.merge(metadata_group_df, depth_table, how='left', left_on=uniq_name, right_on='SAMPLE_ID') + print(merged_depth_df[[uniq_name, str(group[0]), 'MEAN_GENE_DEPTH']].head()) + print('Length of the processed table', len(merged_depth_df)) + + # Plot depth of all samples for each group + plot_depth_per_group(merged_depth_df, group, 'all_genes', pdf) + + # Plot depths per gene (defined cutom genes or genes in the panel) for each group + merged_depth_df = merged_depth_df[merged_depth_df['GENE'].isin(panel_genes)] + for gene in panel_genes: + gene_data = merged_depth_df[merged_depth_df['GENE'] == gene] + print('Length of gene data for gene', gene, ':', len(gene_data)) + plot_depth_per_group(gene_data, group, 'gene', pdf) + + print(f"Plots saved to {output_pdf_path}") + +if __name__ == "__main__": + main() + +''' +Example usage: +python depth_group_comparison.py \ + --table-filename metadata_table_all_with_bacterial_signatures.tsv \ + --depth-table all_samples.exons_cons.depth_per_gene_per_sample.tsv \ + --separator tab \ + --unique-identifier Sample_Name \ + --groups "[ ["Sample_Group"], ["cancer"], ["Age_onset"], ["Cancer_age_group"] , ["Bacterial_Signatures_identified"]]" \ + --custom-genes APC,BRAF,FBXW7,KRAS,PIK3CA,SMAD4,TP53' \ + --output_prefix /data/bbg/projects/prominent/dev/internal_development/depth_group_comparison +''' \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index f938a0ce..d948ea09 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -337,20 +337,22 @@ process { } withName: "ANALYZEDEPTHSGROUPS" { - ext.unique_identifier = params.features_unique_identifier - ext.feature_groups = params.features_groups_list - ext.separator = params.features_table_separator - + \\ these params.features are defined in nextflow.config + ext.features_unique_identifier = params.features_unique_identifier + ext.features_groups = params.features_groups_list + ext.separator = params.separator + ext.custom_genes = params.features_genes_list // define the list of custom genes here // you will have to add an extra parameters to the pipeline (nextflow.config) and then handle it here publishDir = [ - path: { "${params.outdir}/" }, // TODO define the desired output path + path: { "${params.outdir}/depths/summary" }, // ideally this should be in plots directory together with the other file /depths/summary/all_samples.exons_cons.depths.pdf mode: params.publish_dir_mode, - pattern: '**{json}', // TODO define the format of the file that will be outputted + pattern: '**{pdf}', ] } + withName: GROUPGENES { ext.custom = params.custom_groups ext.hotspots = params.create_subgenic_regions diff --git a/modules/local/analyzedepths/main.nf b/modules/local/analyzedepths/main.nf index 448cdb0f..3b87b6bc 100644 --- a/modules/local/analyzedepths/main.nf +++ b/modules/local/analyzedepths/main.nf @@ -7,28 +7,36 @@ process ANALYZE_DEPTHS_GROUPS { input: path(features_table) - // add another path with the depths per gene per sample - // (take into account that maybe the format is not a single path but it comes with a name at the beginning (as a tuple) explore other modules for examples of this) - // optionally add another one with the depth per sample + //input depth file + // note that the variable for depth is defined in /deepCSA/subworkflows/local/plotdepths/main.nf + tuple val(meta) , path(average_depth_gene_sample) // needs to be added as a tupple since in PLOT_DEPTHS module (/modules/plot/depths_summary/main.nf) the output is set up as a tupple to"track" to which metadata belongs to this file + + output: // the main outputs will be the PDFs - // path("samples.json") , emit: json_samples - path "versions.yml" , topic: versions + path("*.plot_depth_per_group.pdf") , emit: plots script: - def separator = task.ext.separator ?: "comma" - def custom_groups = task.ext.feature_groups ? "--groups \"${task.ext.feature_groups}\" " : "" + // Use meta.id to ensure each sample gets a unique folder/file name + def output_path = task.workDir + def separator = task.ext.separator ? " --separator \"${task.ext.separator}\" " : "" + def custom_groups = task.ext.features_groups ? "--groups \"${task.ext.features_groups}\" " : "" + def custom_genes = task.ext.features_genes ? "--custom-genes \"${task.ext.features_genes}\" " : "" def unique_identifier = task.ext.unique_identifier ? "--unique-identifier ${task.ext.unique_identifier}" : "" - // .py should be in bin/ and you should make sure it can be executed (check permissions of the file and add shebang if needed) + // depth_group_comparison.py is in bin/ and has execution permissions add shebang """ - .py \\ - --table-filename ${features_table} \\ - --separator ${separator} \\ - ${unique_identifier} \\ - ${custom_groups} - // TODO add the missing parameters + + depth_group_comparison.py \\ + --table-filename $features_table \\ + --depth-table $average_depth_gene_sample \\ + $separator \\ + $unique_identifier \\ + $custom_groups \\ + $custom_genes \\ + --output_prefix ${output_path}/ + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') @@ -37,11 +45,11 @@ process ANALYZE_DEPTHS_GROUPS { stub: """ - touch samples.json groups.json all_groups.json + touch groups.json all_groups.json cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') END_VERSIONS """ -} +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 4faa435f..ffc7f647 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,7 @@ params { features_table_separator = 'comma' features_unique_identifier = null features_groups_list = null + features_genes_list = null custom_groups = false custom_groups_file = null diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index ab326104..0699e73d 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -232,6 +232,13 @@ workflow DEEPCSA{ // define it as a module very similar to the table2group one ANALYZEDEPTHSGROUPS(features_table, PLOTDEPTHSEXONSCONS.out.average_depth_gene_sample) + // Load group keys from JSON file in 'groups' channel + TABLE2GROUP.out.json_groups.map { json_path -> + def json = file(json_path).text + groovy.json.JsonSlurper.newInstance().parseText(json).keySet() + }.flatten().unique() + .set { group_keys_ch } // this is a channel that contains only the group names as elements of the channel + // Enrich regions in consensus panels ENRICHPANELS(MUT_PREPROCESSING.out.mutations_all_samples, From 7bab0a17a5ea76117ff5af7b7a2cb3ff82beb74f Mon Sep 17 00:00:00 2001 From: efiguerola Date: Wed, 25 Feb 2026 10:08:47 +0100 Subject: [PATCH 04/13] corrected typo --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index d948ea09..eddb1330 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -337,7 +337,7 @@ process { } withName: "ANALYZEDEPTHSGROUPS" { - \\ these params.features are defined in nextflow.config + // these params.features are defined in nextflow.config ext.features_unique_identifier = params.features_unique_identifier ext.features_groups = params.features_groups_list ext.separator = params.separator From 714fd20778be37038ec86efbee87176ad2c0ef2a Mon Sep 17 00:00:00 2001 From: efiguerola Date: Wed, 25 Feb 2026 10:24:01 +0100 Subject: [PATCH 05/13] corrected separator variable name in modules.config so it is the same as in modules main.nf --- conf/modules.config | 2 +- modules/local/analyzedepths/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index eddb1330..6b7989e4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -340,7 +340,7 @@ process { // these params.features are defined in nextflow.config ext.features_unique_identifier = params.features_unique_identifier ext.features_groups = params.features_groups_list - ext.separator = params.separator + ext.separator = params.custom_groups_separator ext.custom_genes = params.features_genes_list // define the list of custom genes here // you will have to add an extra parameters to the pipeline (nextflow.config) and then handle it here diff --git a/modules/local/analyzedepths/main.nf b/modules/local/analyzedepths/main.nf index 3b87b6bc..10d7093b 100644 --- a/modules/local/analyzedepths/main.nf +++ b/modules/local/analyzedepths/main.nf @@ -20,7 +20,7 @@ process ANALYZE_DEPTHS_GROUPS { script: // Use meta.id to ensure each sample gets a unique folder/file name def output_path = task.workDir - def separator = task.ext.separator ? " --separator \"${task.ext.separator}\" " : "" + def separator = task.ext.separator ? " --separator \"${task.ext.custom_groups_separator}\" " : "" def custom_groups = task.ext.features_groups ? "--groups \"${task.ext.features_groups}\" " : "" def custom_genes = task.ext.features_genes ? "--custom-genes \"${task.ext.features_genes}\" " : "" def unique_identifier = task.ext.unique_identifier ? "--unique-identifier ${task.ext.unique_identifier}" : "" From f9677d9978d6d0d97e76b9a1a0bdf4165bdf4d24 Mon Sep 17 00:00:00 2001 From: efiguerola Date: Wed, 25 Feb 2026 11:21:19 +0100 Subject: [PATCH 06/13] clarified terms in script, modified variable names for module in config , corrected process --- bin/depth_group_comparison.py | 12 ++++++------ conf/modules.config | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/bin/depth_group_comparison.py b/bin/depth_group_comparison.py index 2964568e..e56c74df 100755 --- a/bin/depth_group_comparison.py +++ b/bin/depth_group_comparison.py @@ -67,7 +67,7 @@ def plot_depth_per_group(df, group_col, data_type, pdf): @click.command() @click.option('--table-filename', required=True, type=click.Path(exists=True), help='Input features table file') @click.option('--depth-table', required=True, type=click.Path(exists=True), help='Input depth table file') -@click.option('--separator', required=True, type=click.Choice(['tab', 'comma']), help='Separator: tab or comma') +@click.option('--separator', required=True, type=click.Choice(['tab', 'comma']), help='Separator used in features table: tab or comma') @click.option('--unique-identifier', default=None, type=str, help='Unique identifier column name') @click.option('--groups', default=None, type=str, help='List of columns with grouping information') @click.option('--custom-genes', required=False, type=str, help='Comma separated list of custom genes') @@ -80,7 +80,7 @@ def main(table_filename, depth_table, unique_identifier, separator, groups, cust # Read tables features_table = pd.read_table(table_filename, header=0, sep=sep_char) - depth_table = pd.read_table(depth_table, header=0, sep=sep_char) + depth_table = pd.read_table(depth_table, header=0, sep="\t") # Process panel genes panel_genes = sorted(set(depth_table['GENE'].unique())) @@ -92,7 +92,7 @@ def main(table_filename, depth_table, unique_identifier, separator, groups, cust else: print(f'No custom genes provided, plotting all genes in the panel: {panel_genes}') - output_pdf_path = f"{output_prefix}.plot_depth_per_group.pdf" + output_name = f"{output_prefix}.plot_depth_per_group.pdf" # groups may contain lists of lists, but all formatted into a string groups_of_interest_init = [group.strip().strip(",").split(",") for group in groups.replace("[", ";;;").replace("]", "").split(";;;")] if groups else [] @@ -108,7 +108,7 @@ def main(table_filename, depth_table, unique_identifier, separator, groups, cust print(f"Processing data for the groups of interest: {groups_of_interest}") - with PdfPages(output_pdf_path) as pdf: + with PdfPages(output_name) as pdf: for group in groups_of_interest: print(f"Processing {group} group, type: {type(group)}") metadata_group_df = features_table[[uniq_name, str(group[0])]] @@ -126,7 +126,7 @@ def main(table_filename, depth_table, unique_identifier, separator, groups, cust print('Length of gene data for gene', gene, ':', len(gene_data)) plot_depth_per_group(gene_data, group, 'gene', pdf) - print(f"Plots saved to {output_pdf_path}") + print(f"Plots saved as {output_name}") if __name__ == "__main__": main() @@ -140,5 +140,5 @@ def main(table_filename, depth_table, unique_identifier, separator, groups, cust --unique-identifier Sample_Name \ --groups "[ ["Sample_Group"], ["cancer"], ["Age_onset"], ["Cancer_age_group"] , ["Bacterial_Signatures_identified"]]" \ --custom-genes APC,BRAF,FBXW7,KRAS,PIK3CA,SMAD4,TP53' \ - --output_prefix /data/bbg/projects/prominent/dev/internal_development/depth_group_comparison + --output_prefix depth_group_comparison ''' \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index 6b7989e4..ba50029c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -338,9 +338,9 @@ process { withName: "ANALYZEDEPTHSGROUPS" { // these params.features are defined in nextflow.config - ext.features_unique_identifier = params.features_unique_identifier + ext.unique_identifier = params.features_unique_identifier ext.features_groups = params.features_groups_list - ext.separator = params.custom_groups_separator + ext.separator = params.features_table_separator ext.custom_genes = params.features_genes_list // define the list of custom genes here // you will have to add an extra parameters to the pipeline (nextflow.config) and then handle it here From 3d4613dedfe9d26d7bb6ce8aeae71dbef6ad2593 Mon Sep 17 00:00:00 2001 From: efiguerola Date: Wed, 25 Feb 2026 11:23:50 +0100 Subject: [PATCH 07/13] modfied main.nf according previous commit --- modules/local/analyzedepths/main.nf | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/modules/local/analyzedepths/main.nf b/modules/local/analyzedepths/main.nf index 10d7093b..8693d93b 100644 --- a/modules/local/analyzedepths/main.nf +++ b/modules/local/analyzedepths/main.nf @@ -15,27 +15,28 @@ process ANALYZE_DEPTHS_GROUPS { output: // the main outputs will be the PDFs - path("*.plot_depth_per_group.pdf") , emit: plots + path("*.plot_depth_per_group.pdf") , emit: plots_per_gene_per_group script: // Use meta.id to ensure each sample gets a unique folder/file name - def output_path = task.workDir - def separator = task.ext.separator ? " --separator \"${task.ext.custom_groups_separator}\" " : "" + def output_prefix = "depth_group_comparison" + def separator = task.ext.separator ?: "comma" def custom_groups = task.ext.features_groups ? "--groups \"${task.ext.features_groups}\" " : "" def custom_genes = task.ext.features_genes ? "--custom-genes \"${task.ext.features_genes}\" " : "" def unique_identifier = task.ext.unique_identifier ? "--unique-identifier ${task.ext.unique_identifier}" : "" // depth_group_comparison.py is in bin/ and has execution permissions add shebang + // ${average_depth_gene_sample} comes from subworkflows/local/plotdepths/main.nf """ depth_group_comparison.py \\ - --table-filename $features_table \\ - --depth-table $average_depth_gene_sample \\ - $separator \\ - $unique_identifier \\ - $custom_groups \\ - $custom_genes \\ - --output_prefix ${output_path}/ + --table-filename ${features_table} \\ + --depth-table ${average_depth_gene_sample} \\ + --separator ${separator} \\ + ${unique_identifier} \\ + ${custom_groups} \\ + ${custom_genes} \\ + --output_prefix ${output_prefix}/ cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -45,7 +46,7 @@ process ANALYZE_DEPTHS_GROUPS { stub: """ - touch groups.json all_groups.json + touch depth_group_comparison.plot_depth_per_group.pdf cat <<-END_VERSIONS > versions.yml "${task.process}": From 0e72b5e6e912735c3865a934e180ad2af1d5b067 Mon Sep 17 00:00:00 2001 From: efiguerola Date: Wed, 25 Feb 2026 11:55:18 +0100 Subject: [PATCH 08/13] removed slash in output prefix params so it does not handle it as a directory --- modules/local/analyzedepths/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/analyzedepths/main.nf b/modules/local/analyzedepths/main.nf index 8693d93b..5182df26 100644 --- a/modules/local/analyzedepths/main.nf +++ b/modules/local/analyzedepths/main.nf @@ -36,7 +36,7 @@ process ANALYZE_DEPTHS_GROUPS { ${unique_identifier} \\ ${custom_groups} \\ ${custom_genes} \\ - --output_prefix ${output_prefix}/ + --output_prefix ${output_prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": From 94153bdde86efb15b3821645adba85e1a51f81b6 Mon Sep 17 00:00:00 2001 From: efiguerola Date: Wed, 25 Feb 2026 12:17:34 +0100 Subject: [PATCH 09/13] modified script to add extra width space in plots so title is not cut --- bin/depth_group_comparison.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/depth_group_comparison.py b/bin/depth_group_comparison.py index e56c74df..71b10d20 100755 --- a/bin/depth_group_comparison.py +++ b/bin/depth_group_comparison.py @@ -37,7 +37,7 @@ def plot_depth_per_group(df, group_col, data_type, pdf): col_name = group_col[0] if isinstance(group_col, list) else group_col # Get the number of unique categories to plot - num_categories = df[col_name].nunique() + 1 + num_categories = df[col_name].nunique() + 2 plt.figure(figsize=(num_categories, 4)) ax = sns.boxplot(data=df, x=col_name, y="MEAN_GENE_DEPTH", hue=col_name, showfliers=False, showmeans=False,legend=False) From 735b525346c1c5d17185465ec2fb67cd02823b0b Mon Sep 17 00:00:00 2001 From: Bet Figuerola <154223352+efigb@users.noreply.github.com> Date: Wed, 25 Feb 2026 16:41:08 +0100 Subject: [PATCH 10/13] Apply suggestions from code review Co-authored-by: Marta Huertas <97596516+m-huertasp@users.noreply.github.com> --- bin/depth_group_comparison.py | 11 ++++------- workflows/deepcsa.nf | 2 ++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/bin/depth_group_comparison.py b/bin/depth_group_comparison.py index 71b10d20..3347707e 100755 --- a/bin/depth_group_comparison.py +++ b/bin/depth_group_comparison.py @@ -69,7 +69,7 @@ def plot_depth_per_group(df, group_col, data_type, pdf): @click.option('--depth-table', required=True, type=click.Path(exists=True), help='Input depth table file') @click.option('--separator', required=True, type=click.Choice(['tab', 'comma']), help='Separator used in features table: tab or comma') @click.option('--unique-identifier', default=None, type=str, help='Unique identifier column name') -@click.option('--groups', default=None, type=str, help='List of columns with grouping information') +@click.option('--groups', required=True, type=str, help='List of columns with grouping information') @click.option('--custom-genes', required=False, type=str, help='Comma separated list of custom genes') @click.option('--output_prefix', type=str, required=True, help='Prefix for output files') @@ -95,14 +95,11 @@ def main(table_filename, depth_table, unique_identifier, separator, groups, cust output_name = f"{output_prefix}.plot_depth_per_group.pdf" # groups may contain lists of lists, but all formatted into a string - groups_of_interest_init = [group.strip().strip(",").split(",") for group in groups.replace("[", ";;;").replace("]", "").split(";;;")] if groups else [] +import ast + groups_of_interest_init = ast.literal_eval(groups) if groups else [] groups_of_interest = [] - for comparison in groups_of_interest_init: - comparison_group_clean = [item.strip() for item in comparison] - comparison_group = [item for item in comparison_group_clean if item != ''] - if len(comparison_group) > 0: - groups_of_interest.append(comparison_group) +groups_of_interest = list(dict.fromkeys(item.strip() for sublist in groups_of_interest_init for item in sublist if item != '')) uniq_name = unique_identifier if unique_identifier else "sample" diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index 0699e73d..b1238f40 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -230,7 +230,9 @@ workflow DEEPCSA{ PLOTDEPTHSEXONSCONS(ANNOTATEDEPTHS.out.all_samples_depths, CREATEPANELS.out.exons_consensus_bed, CREATEPANELS.out.exons_consensus_panel) // define it as a module very similar to the table2group one +if (params.features_groups_list) { ANALYZEDEPTHSGROUPS(features_table, PLOTDEPTHSEXONSCONS.out.average_depth_gene_sample) +} // Load group keys from JSON file in 'groups' channel TABLE2GROUP.out.json_groups.map { json_path -> From bd9c5c9560eb60b78d5357c71125636ed452f7aa Mon Sep 17 00:00:00 2001 From: efiguerola Date: Wed, 25 Feb 2026 17:07:47 +0100 Subject: [PATCH 11/13] modified script and associated files to omit output prefix variable as parameter, and the output of a pdf with all plots per group. Also added a condition so this module can only run when user defines a groups list --- bin/depth_group_comparison.py | 22 ++++++++++------------ conf/modules.config | 2 -- modules/local/analyzedepths/main.nf | 4 +--- workflows/deepcsa.nf | 16 ++++------------ 4 files changed, 15 insertions(+), 29 deletions(-) diff --git a/bin/depth_group_comparison.py b/bin/depth_group_comparison.py index 3347707e..44219271 100755 --- a/bin/depth_group_comparison.py +++ b/bin/depth_group_comparison.py @@ -14,6 +14,7 @@ import matplotlib as mpl from matplotlib.backends.backend_pdf import PdfPages import seaborn as sns +import ast mpl.rcParams.update({ 'axes.titlesize': plots_general_config["title_fontsize"], @@ -71,12 +72,12 @@ def plot_depth_per_group(df, group_col, data_type, pdf): @click.option('--unique-identifier', default=None, type=str, help='Unique identifier column name') @click.option('--groups', required=True, type=str, help='List of columns with grouping information') @click.option('--custom-genes', required=False, type=str, help='Comma separated list of custom genes') -@click.option('--output_prefix', type=str, required=True, help='Prefix for output files') -def main(table_filename, depth_table, unique_identifier, separator, groups, custom_genes, output_prefix): +def main(table_filename, depth_table, unique_identifier, separator, groups, custom_genes): sep_char = separator2character[separator] + uniq_name = unique_identifier if unique_identifier else "sample" # Read tables features_table = pd.read_table(table_filename, header=0, sep=sep_char) @@ -92,21 +93,19 @@ def main(table_filename, depth_table, unique_identifier, separator, groups, cust else: print(f'No custom genes provided, plotting all genes in the panel: {panel_genes}') - output_name = f"{output_prefix}.plot_depth_per_group.pdf" - - # groups may contain lists of lists, but all formatted into a string -import ast - + # Process groups groups_of_interest_init = ast.literal_eval(groups) if groups else [] groups_of_interest = [] -groups_of_interest = list(dict.fromkeys(item.strip() for sublist in groups_of_interest_init for item in sublist if item != '')) + groups_of_interest = list(dict.fromkeys(item.strip() for sublist in groups_of_interest_init for item in sublist if item != '')) - uniq_name = unique_identifier if unique_identifier else "sample" print(f"Processing data for the groups of interest: {groups_of_interest}") - with PdfPages(output_name) as pdf: - for group in groups_of_interest: + # Handle groups so each group has its own plot in all and individual genes and stored in the same pdf file per group + for group in groups_of_interest: + output_name = f"{group}.plot_depth_per_group.pdf" + + with PdfPages(output_name) as pdf: print(f"Processing {group} group, type: {type(group)}") metadata_group_df = features_table[[uniq_name, str(group[0])]] merged_depth_df = pd.merge(metadata_group_df, depth_table, how='left', left_on=uniq_name, right_on='SAMPLE_ID') @@ -137,5 +136,4 @@ def main(table_filename, depth_table, unique_identifier, separator, groups, cust --unique-identifier Sample_Name \ --groups "[ ["Sample_Group"], ["cancer"], ["Age_onset"], ["Cancer_age_group"] , ["Bacterial_Signatures_identified"]]" \ --custom-genes APC,BRAF,FBXW7,KRAS,PIK3CA,SMAD4,TP53' \ - --output_prefix depth_group_comparison ''' \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index ba50029c..58af4a23 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -351,8 +351,6 @@ process { ] } - - withName: GROUPGENES { ext.custom = params.custom_groups ext.hotspots = params.create_subgenic_regions diff --git a/modules/local/analyzedepths/main.nf b/modules/local/analyzedepths/main.nf index 5182df26..cf7268b3 100644 --- a/modules/local/analyzedepths/main.nf +++ b/modules/local/analyzedepths/main.nf @@ -18,8 +18,7 @@ process ANALYZE_DEPTHS_GROUPS { path("*.plot_depth_per_group.pdf") , emit: plots_per_gene_per_group script: - // Use meta.id to ensure each sample gets a unique folder/file name - def output_prefix = "depth_group_comparison" + def separator = task.ext.separator ?: "comma" def custom_groups = task.ext.features_groups ? "--groups \"${task.ext.features_groups}\" " : "" def custom_genes = task.ext.features_genes ? "--custom-genes \"${task.ext.features_genes}\" " : "" @@ -36,7 +35,6 @@ process ANALYZE_DEPTHS_GROUPS { ${unique_identifier} \\ ${custom_groups} \\ ${custom_genes} \\ - --output_prefix ${output_prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index b1238f40..30a57d08 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -229,18 +229,10 @@ workflow DEEPCSA{ } PLOTDEPTHSEXONSCONS(ANNOTATEDEPTHS.out.all_samples_depths, CREATEPANELS.out.exons_consensus_bed, CREATEPANELS.out.exons_consensus_panel) - // define it as a module very similar to the table2group one -if (params.features_groups_list) { - ANALYZEDEPTHSGROUPS(features_table, PLOTDEPTHSEXONSCONS.out.average_depth_gene_sample) -} - - // Load group keys from JSON file in 'groups' channel - TABLE2GROUP.out.json_groups.map { json_path -> - def json = file(json_path).text - groovy.json.JsonSlurper.newInstance().parseText(json).keySet() - }.flatten().unique() - .set { group_keys_ch } // this is a channel that contains only the group names as elements of the channel - + // ANALYZEDEPTHSGROUPS should run only when user defines a group list + if (params.features_groups_list) { + ANALYZEDEPTHSGROUPS(features_table, PLOTDEPTHSEXONSCONS.out.average_depth_gene_sample) + } // Enrich regions in consensus panels ENRICHPANELS(MUT_PREPROCESSING.out.mutations_all_samples, From 1b7af33a992e15f9583950b8cd7c11e2557c7c0d Mon Sep 17 00:00:00 2001 From: efiguerola Date: Wed, 25 Feb 2026 17:41:55 +0100 Subject: [PATCH 12/13] modified path so it is stored together with the other all samples depth plots file --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 58af4a23..af834fad 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -345,7 +345,7 @@ process { // define the list of custom genes here // you will have to add an extra parameters to the pipeline (nextflow.config) and then handle it here publishDir = [ - path: { "${params.outdir}/depths/summary" }, // ideally this should be in plots directory together with the other file /depths/summary/all_samples.exons_cons.depths.pdf + path: { "${params.outdir}/plots/depths_summary" }, mode: params.publish_dir_mode, pattern: '**{pdf}', ] From 1718780c81f79f6967d6bc3ea5dcfa5925bdb5ca Mon Sep 17 00:00:00 2001 From: efiguerola Date: Thu, 26 Feb 2026 23:22:03 +0100 Subject: [PATCH 13/13] modified script to parse average depth per sample and use it to plot ALL_GENES values per group. Added new input in config for it. Also fixed bug in processing groups and added in modules.config a subdirectory for output --- bin/depth_group_comparison.py | 84 ++++++++++++++++++++--------- conf/modules.config | 2 +- modules/local/analyzedepths/main.nf | 4 +- 3 files changed, 62 insertions(+), 28 deletions(-) diff --git a/bin/depth_group_comparison.py b/bin/depth_group_comparison.py index 44219271..7d62184e 100755 --- a/bin/depth_group_comparison.py +++ b/bin/depth_group_comparison.py @@ -15,6 +15,7 @@ from matplotlib.backends.backend_pdf import PdfPages import seaborn as sns import ast +import re mpl.rcParams.update({ 'axes.titlesize': plots_general_config["title_fontsize"], @@ -41,17 +42,22 @@ def plot_depth_per_group(df, group_col, data_type, pdf): num_categories = df[col_name].nunique() + 2 plt.figure(figsize=(num_categories, 4)) - ax = sns.boxplot(data=df, x=col_name, y="MEAN_GENE_DEPTH", hue=col_name, showfliers=False, showmeans=False,legend=False) - ax = sns.stripplot(data=df, x=col_name, y="MEAN_GENE_DEPTH", color='grey', alpha=0.5, size=4, legend=False) + if data_type == 'ALL_GENES': + plot_df = df[df['GENE'] == 'ALL_GENES'] + title = f"Average Depth for ALL_GENES in {col_name} group" - if data_type == 'all_genes': - plt.title(f"Average Depth for {data_type} in {col_name} group", fontsize=plots_general_config["title_fontsize"]) - elif data_type == 'gene': - gene = df['GENE'].iloc[0] - plt.title(f"Average Depth for {gene} in {col_name} group", fontsize=plots_general_config["title_fontsize"]) - else: - print(f"Unknown data type: {data_type}. Title will not be set.") + else: # data_type is the Gene Name + plot_df = df[df['GENE'] == data_type] + title = f"Average Depth for {data_type} in {col_name} group" + + if plot_df.empty: + print(f"No data available for {data_type} in {col_name} group. Skipping plot.") + return + sns.boxplot(data=plot_df, x=col_name, y="MEAN_GENE_DEPTH", hue=col_name, showfliers=False, showmeans=False, legend=False) + sns.stripplot(data=plot_df, x=col_name, y="MEAN_GENE_DEPTH", color='grey', alpha=0.5, size=4, legend=False) + + plt.title(title, fontsize=plots_general_config["title_fontsize"]) plt.xlabel('', fontsize=plots_general_config["xylabel_fontsize"]) plt.ylabel(f"Average Cons Exons Depth", fontsize=plots_general_config["xylabel_fontsize"]) plt.yticks( fontsize=plots_general_config["yticks_fontsize"]) @@ -67,24 +73,26 @@ def plot_depth_per_group(df, group_col, data_type, pdf): @click.command() @click.option('--table-filename', required=True, type=click.Path(exists=True), help='Input features table file') -@click.option('--depth-table', required=True, type=click.Path(exists=True), help='Input depth table file') +@click.option('--depth-gene-sample', required=True, type=click.Path(exists=True), help='Input depth file per gene per sample') +@click.option('--depth-sample', required=True, type=click.Path(exists=True), help='Input depth file per sample') @click.option('--separator', required=True, type=click.Choice(['tab', 'comma']), help='Separator used in features table: tab or comma') @click.option('--unique-identifier', default=None, type=str, help='Unique identifier column name') @click.option('--groups', required=True, type=str, help='List of columns with grouping information') @click.option('--custom-genes', required=False, type=str, help='Comma separated list of custom genes') -def main(table_filename, depth_table, unique_identifier, separator, groups, custom_genes): +def main(table_filename, depth_gene_sample, depth_sample, unique_identifier, separator, groups, custom_genes): sep_char = separator2character[separator] uniq_name = unique_identifier if unique_identifier else "sample" # Read tables features_table = pd.read_table(table_filename, header=0, sep=sep_char) - depth_table = pd.read_table(depth_table, header=0, sep="\t") + depth_genes_samples = pd.read_table(depth_gene_sample, header=0, sep="\t") + depth_per_sample = pd.read_table(depth_sample, header=0, sep="\t") # Process panel genes - panel_genes = sorted(set(depth_table['GENE'].unique())) + panel_genes = sorted(set(depth_genes_samples['GENE'].unique())) if custom_genes: print(f'Custom genes provided, plotting custom genes only: {custom_genes}') custom_gene_list = [g.strip() for g in custom_genes.split(",")] @@ -93,10 +101,36 @@ def main(table_filename, depth_table, unique_identifier, separator, groups, cust else: print(f'No custom genes provided, plotting all genes in the panel: {panel_genes}') + + # Process depth per sample to add the 'ALL_GENES' depth value per sample + print('Processing per sample depth table to add the ALL_GENES depth column: ') + depth_per_sample['GENE'] = 'ALL_GENES' + depth_per_sample = depth_per_sample.rename(columns={'avg_depth_sample': 'MEAN_GENE_DEPTH'}) + + print('Depth per sample table after adding ALL_GENES value in GENES column:') + print(depth_per_sample.head()) + + depth_genes_samples = pd.concat([depth_genes_samples, depth_per_sample], ignore_index=True) + print('Added ALL_GENES depth values to depth genes samples table:') + print('Length of table:', len(depth_genes_samples)) + print(depth_genes_samples.head()) + + # Merge data with metadata table to get the group information for each sample + merged_depth_df = pd.merge(features_table, depth_genes_samples, how='left', left_on=uniq_name, right_on='SAMPLE_ID') + print('Merged depth and metadata table:') + print(merged_depth_df.head()) + print('Length of merged table:', len(merged_depth_df)) + # Process groups - groups_of_interest_init = ast.literal_eval(groups) if groups else [] - groups_of_interest = [] - groups_of_interest = list(dict.fromkeys(item.strip() for sublist in groups_of_interest_init for item in sublist if item != '')) + # First clean the string (adds quotes if the shell stripped them) + cleaned = re.sub(r'(?