diff --git a/CITATIONS.md b/CITATIONS.md index 4a620f9..2e97bb2 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -69,6 +69,9 @@ - [deepTools](https://pubmed.ncbi.nlm.nih.gov/24799436/) > Ramírez F, Dündar F, Diehl S, Grüning BA, Manke T. deepTools: a flexible platform for exploring deep-sequencing data. Nucleic Acids Res. 2014 Jul;42(Web Server issue):W187-91. doi: 10.1093/nar/gku365. Epub 2014 May 5. PMID: 24799436; PMCID: PMC4086134. +- [CINmetrics](https://pubmed.ncbi.nlm.nih.gov/37123011/) + > Oza VH, Fisher JL, Darji R, Lasseigne BN. CINmetrics: an R package for analyzing copy number aberrations as a measure of chromosomal instability. PeerJ. 2023 Apr 25;11:e15244. doi: 10.7717/peerj.15244. PMID: 37123011; PMCID: PMC10143595. + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index ffbed84..26a7b2a 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -270,6 +270,36 @@ custom_data: section_name: "Copy Number Signatures Activities" description: "The image is a summary view of Copy number Signatures from segmentation files, computed by the R Package CINSignatureQuantification." + cinmetrics_summary: + id: "cinmetrics" + section_name: "CINmetrics Results" + description: "This table reports CINmetrics values: Total Aberration Index (tai), Copy Number Aberrations (cna), Altered Base Segment (base_segments), Break Point (break_points), Fraction of Altered Genome (fga)" + plot_type: "table" + headers: + sample_id: + title: "Sample ID" + placement: 1 + tai: + title: "TAI" + format: "{:,.4f}" + placement: 2 + scale: False + cna: + title: "CNA" + format: "{:,.4f}" + placement: 3 + base_segments: + title: "Base Segments" + format: "{:,.4f}" + placement: 4 + break_points: + title: "Break Points" + format: "{:,.4f}" + placement: 5 + fga: + title: "FGA" + format: "{:,.4f}" + placement: 6 hrdcna_summary: id: "hrdcna" name: "HRDCNA Summary" diff --git a/bin/compute_cinmetrics.R b/bin/compute_cinmetrics.R new file mode 100755 index 0000000..9930b6e --- /dev/null +++ b/bin/compute_cinmetrics.R @@ -0,0 +1,76 @@ +#!/usr/bin/env Rscript +suppressPackageStartupMessages({ + library(CINmetrics, quietly = TRUE) + library(argparser, quietly = TRUE) + library(readr, quietly = TRUE) + library(dplyr, quietly = TRUE) +}) + +parser <- arg_parser("Compute CINmetrics", hide.opts = TRUE) + +parser <- add_argument(parser, "--seg_file", + help = "Input segmentation file", + nargs = Inf) +parser <- add_argument(parser, "--output", + help = "Output TSV file", + default = "cinmetrics_output.tsv") +parser <- add_argument(parser, "--segmentMean_tai", + help = "Threshold for TAI", + type = "numeric", default = 0.2) +parser <- add_argument(parser, "--segmentMean_cna", + help = "Threshold for CNA", + type = "numeric", default = (log(1.7, 2) - 1)) +parser <- add_argument(parser, "--segmentMean_base_segments", + help = "Threshold for base segments", + type = "numeric", default = 0.2) +parser <- add_argument(parser, "--segmentMean_break_points", + help = "Threshold for break points", + type = "numeric", default = 0.2) +parser <- add_argument(parser, "--segmentMean_fga", + help = "Threshold for FGA", + type = "numeric", default = 0.2) +parser <- add_argument(parser, "--numProbes", + help = "Minimum number of probes", + type = "numeric", default = NA) +parser <- add_argument(parser, "--segmentDistance_cna", + help = "Segment distance for CNA", + type = "numeric", default = 0.2) +parser <- add_argument(parser, "--minSegSize_cna", + help = "Minimum segment size for CNA", + type = "numeric", default = 10) +parser <- add_argument(parser, "--genomeSize_fga", + help = "Genome size for FGA", + type = "numeric", default = 2873203431) + +args <- parse_args(parser) + +input_file <- args$seg_file +cat("Reading:", input_file, "\n") +df <- read_tsv(input_file, show_col_types = FALSE) + +colnames(df) <- c("Sample", "Chromosome", "Start", "End", "Num_Probes", "Segment_Mean") + +df <- df %>% + mutate( + Sample = as.character(Sample), + Chromosome = gsub("^chr", "", Chromosome) + ) + +cat("CINmetrics calculation...\n") +metrics <- CINmetrics( + cnvData = df, + segmentMean_tai = args$segmentMean_tai, + segmentMean_cna = args$segmentMean_cna, + segmentMean_base_segments = args$segmentMean_base_segments, + segmentMean_break_points = args$segmentMean_break_points, + segmentMean_fga = args$segmentMean_fga, + numProbes = args$numProbes, + segmentDistance_cna = args$segmentDistance_cna, + minSegSize_cna = args$minSegSize_cna, + genomeSize_fga = args$genomeSize_fga +) + +metrics$tai[is.na(metrics$tai)] <- 0 + +write_tsv(metrics, args$output) +cat("Output file:", args$output, "\n") \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index c6824c7..0245eb6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -501,6 +501,26 @@ process { ].join(' ') } + withName: COMPUTE_CINMETRICS { + publishDir = [ + path: { "${params.outdir}/cinmetrics/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + ext.args = [ + "--segmentMean_tai ${params.cin_segmentMean_tai}", + "--segmentMean_cna ${params.cin_segmentMean_cna}", + "--segmentMean_base_segments ${params.cin_segmentMean_base_segments}", + "--segmentMean_break_points ${params.cin_segmentMean_break_points}", + "--segmentMean_fga ${params.cin_segmentMean_fga}", + "--segmentDistance_cna ${params.cin_segmentDistance_cna}", + "--minSegSize_cna ${params.cin_minSegSize_cna}", + "--genomeSize_fga ${params.cin_genomeSize_fga}", + params.cin_numProbes ? "--numProbes ${params.cin_numProbes}" : "", + ].join(' ') + ext.when = params.compute_cinmetrics + } + withName: GISTIC2 { publishDir = [ path: { "${params.outdir}/gistic/" }, diff --git a/modules/local/cinmetrics/main.nf b/modules/local/cinmetrics/main.nf new file mode 100644 index 0000000..dafaec8 --- /dev/null +++ b/modules/local/cinmetrics/main.nf @@ -0,0 +1,31 @@ +process COMPUTE_CINMETRICS{ + tag "Compute CINmetrics" + label 'process_single' + + container 'ghcr.io/dincalcilab/cinmetrics:0.1.0' + + input: + file(seg_file) + + output: + path("*.tsv"), emit: cinmetrics_summary + path("versions.yml"), emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def VERSION = "0.1" + """ + compute_cinmetrics.R \\ + --seg_file $seg_file \\ + --output cinmetrics_summary_mqc.tsv \\ +$args + + cat << END_VERSIONS > versions.yml + "${task.process}": + CINmetrics: ${VERSION} +END_VERSIONS + """ +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index d7a022d..2b109d3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -125,6 +125,18 @@ params { hrdcna_compute_score = false hrdcna_threshold = 0.2 + // CINmetrics + compute_cinmetrics = false + cin_segmentMean_tai = 0.2 + cin_segmentMean_cna = (Math.log(1.7)/Math.log(2) - 1) + cin_segmentMean_base_segments = 0.2 + cin_segmentMean_break_points = 0.2 + cin_segmentMean_fga = 0.2 + cin_numProbes = null + cin_segmentDistance_cna = 0.2 + cin_minSegSize_cna = 10 + cin_genomeSize_fga = 2873203431 + // Gistic2 run_gistic = false gistic_t_amp = 0.1 diff --git a/nextflow_schema.json b/nextflow_schema.json index b22787d..035dda0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -746,6 +746,62 @@ "description": "Threshold to classify samples into 'HRD' or 'HRP'." } } + }, + "cinmetrics_options": { + "title": "CINmetrics options", + "type": "object", + "description": "Options for computing chromosomal instability metrics using the CINmetrics R package.", + "default": "", + "properties": { + "compute_cinmetrics": { + "type": "boolean", + "description": "Compute chromosomal instability metrics using CINmetrics." + }, + "cin_segmentMean_tai": { + "type": "number", + "default": 0.2, + "description": "Segment mean threshold for TAI calculation." + }, + "cin_segmentMean_cna": { + "type": "number", + "default": 0.2, + "description": "Segment mean threshold for CNA count." + }, + "cin_segmentMean_base_segments": { + "type": "number", + "default": 0.2, + "description": "Segment mean threshold for number of base segments." + }, + "cin_segmentMean_break_points": { + "type": "number", + "default": 0.2, + "description": "Segment mean threshold for breakpoint count." + }, + "cin_segmentMean_fga": { + "type": "number", + "default": 0.2, + "description": "Segment mean threshold for FGA calculation." + }, + "cin_segmentDistance_cna": { + "type": "number", + "default": 10000000, + "description": "Minimum distance between segments (in bp) for CNA merging." + }, + "cin_minSegSize_cna": { + "type": "number", + "default": 1000000, + "description": "Minimum segment size (in bp) to consider for CNA." + }, + "cin_genomeSize_fga": { + "type": "number", + "default": 3000000000, + "description": "Genome size used for FGA normalization." + }, + "cin_numProbes": { + "type": "number", + "description": "Optional number of probes per segment (if applicable)." + } + } } }, "allOf": [ @@ -796,6 +852,9 @@ }, { "$ref": "#/$defs/hrdcna_score" + }, + { + "$ref": "#/$defs/cinmetrics_options" } ] } diff --git a/workflows/samurai.nf b/workflows/samurai.nf index 7aa11a1..194faca 100644 --- a/workflows/samurai.nf +++ b/workflows/samurai.nf @@ -19,6 +19,7 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_samur // include { CIN_SIGNATURE_QUANTIFICATION } from '../modules/local/cin_signature_quantification/main' +include { COMPUTE_CINMETRICS } from '../modules/local/cinmetrics/main' include { SOLID_BIOPSY } from '../subworkflows/local/solid_biopsy/main' include { SIZE_SELECTION } from '../subworkflows/local/size_selection/main' include { LIQUID_BIOPSY } from '../subworkflows/local/liquid_biopsy/main' @@ -186,29 +187,29 @@ workflow SAMURAI { } // QC metrics about alignment (coverage, etc.) - BAM_QC_PICARD( - ch_bam_bai.map { meta, bam, bai -> - [meta, bam, bai, [], []] - }, - ch_fasta, - ch_fai, - ch_dict, - [[], []] /* fasta_gzi */ - ) - - ch_versions = ch_versions.mix( - BAM_QC_PICARD.out.versions.first() - ) - ch_multiqc_files = ch_multiqc_files.mix( - BAM_QC_PICARD.out.coverage_metrics.collect { _meta, metrics -> - metrics - } - ) - ch_multiqc_files = ch_multiqc_files.mix( - BAM_QC_PICARD.out.multiple_metrics.collect { _meta, metrics -> - metrics - } - ) + //BAM_QC_PICARD( + // ch_bam_bai.map { meta, bam, bai -> + // [meta, bam, bai, [], []] + // }, + // ch_fasta, + // ch_fai, + // ch_dict, + // [[], []] /* fasta_gzi */ + //) +// + //ch_versions = ch_versions.mix( + // BAM_QC_PICARD.out.versions.first() + //) + //ch_multiqc_files = ch_multiqc_files.mix( + // BAM_QC_PICARD.out.coverage_metrics.collect { _meta, metrics -> + // metrics + // } + //) + //ch_multiqc_files = ch_multiqc_files.mix( + // BAM_QC_PICARD.out.multiple_metrics.collect { _meta, metrics -> + // metrics + // } + //) // CN Calling @@ -286,6 +287,13 @@ workflow SAMURAI { error("Unknown / unsupported analysis ${analysis_type}") } + // Run CINmetrics if specified + if (params.compute_cinmetrics) { + COMPUTE_CINMETRICS(gistic_file) + ch_versions = ch_versions.mix(COMPUTE_CINMETRICS.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(COMPUTE_CINMETRICS.out.cinmetrics_summary) + } + // Run GISTIC if specified if (run_gistic) { RUN_GISTIC(gistic_file, genome)