Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CITATIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@
- [deepTools](https://pubmed.ncbi.nlm.nih.gov/24799436/)
> Ramírez F, Dündar F, Diehl S, Grüning BA, Manke T. deepTools: a flexible platform for exploring deep-sequencing data. Nucleic Acids Res. 2014 Jul;42(Web Server issue):W187-91. doi: 10.1093/nar/gku365. Epub 2014 May 5. PMID: 24799436; PMCID: PMC4086134.

- [CINmetrics](https://pubmed.ncbi.nlm.nih.gov/37123011/)
> Oza VH, Fisher JL, Darji R, Lasseigne BN. CINmetrics: an R package for analyzing copy number aberrations as a measure of chromosomal instability. PeerJ. 2023 Apr 25;11:e15244. doi: 10.7717/peerj.15244. PMID: 37123011; PMCID: PMC10143595.

## Software packaging/containerisation tools

- [Anaconda](https://anaconda.com)
Expand Down
30 changes: 30 additions & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,36 @@ custom_data:
section_name: "Copy Number Signatures Activities"
description: "The image is a summary view of Copy number Signatures from segmentation files, computed by the R Package CINSignatureQuantification."

cinmetrics_summary:
id: "cinmetrics"
section_name: "CINmetrics Results"
description: "This table reports CINmetrics values: Total Aberration Index (tai), Copy Number Aberrations (cna), Altered Base Segment (base_segments), Break Point (break_points), Fraction of Altered Genome (fga)"
plot_type: "table"
headers:
sample_id:
title: "Sample ID"
placement: 1
tai:
title: "TAI"
format: "{:,.4f}"
placement: 2
scale: False
cna:
title: "CNA"
format: "{:,.4f}"
placement: 3
base_segments:
title: "Base Segments"
format: "{:,.4f}"
placement: 4
break_points:
title: "Break Points"
format: "{:,.4f}"
placement: 5
fga:
title: "FGA"
format: "{:,.4f}"
placement: 6
hrdcna_summary:
id: "hrdcna"
name: "HRDCNA Summary"
Expand Down
76 changes: 76 additions & 0 deletions bin/compute_cinmetrics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env Rscript
suppressPackageStartupMessages({
library(CINmetrics, quietly = TRUE)
library(argparser, quietly = TRUE)
library(readr, quietly = TRUE)
library(dplyr, quietly = TRUE)
})

parser <- arg_parser("Compute CINmetrics", hide.opts = TRUE)

parser <- add_argument(parser, "--seg_file",
help = "Input segmentation file",
nargs = Inf)
parser <- add_argument(parser, "--output",
help = "Output TSV file",
default = "cinmetrics_output.tsv")
parser <- add_argument(parser, "--segmentMean_tai",
help = "Threshold for TAI",
type = "numeric", default = 0.2)
parser <- add_argument(parser, "--segmentMean_cna",
help = "Threshold for CNA",
type = "numeric", default = (log(1.7, 2) - 1))
parser <- add_argument(parser, "--segmentMean_base_segments",
help = "Threshold for base segments",
type = "numeric", default = 0.2)
parser <- add_argument(parser, "--segmentMean_break_points",
help = "Threshold for break points",
type = "numeric", default = 0.2)
parser <- add_argument(parser, "--segmentMean_fga",
help = "Threshold for FGA",
type = "numeric", default = 0.2)
parser <- add_argument(parser, "--numProbes",
help = "Minimum number of probes",
type = "numeric", default = NA)
parser <- add_argument(parser, "--segmentDistance_cna",
help = "Segment distance for CNA",
type = "numeric", default = 0.2)
parser <- add_argument(parser, "--minSegSize_cna",
help = "Minimum segment size for CNA",
type = "numeric", default = 10)
parser <- add_argument(parser, "--genomeSize_fga",
help = "Genome size for FGA",
type = "numeric", default = 2873203431)

args <- parse_args(parser)

input_file <- args$seg_file
cat("Reading:", input_file, "\n")
df <- read_tsv(input_file, show_col_types = FALSE)

colnames(df) <- c("Sample", "Chromosome", "Start", "End", "Num_Probes", "Segment_Mean")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you pass the column names directly to read_tsv?


df <- df %>%
mutate(
Sample = as.character(Sample),
Chromosome = gsub("^chr", "", Chromosome)
)

cat("CINmetrics calculation...\n")
metrics <- CINmetrics(
cnvData = df,
segmentMean_tai = args$segmentMean_tai,
segmentMean_cna = args$segmentMean_cna,
segmentMean_base_segments = args$segmentMean_base_segments,
segmentMean_break_points = args$segmentMean_break_points,
segmentMean_fga = args$segmentMean_fga,
numProbes = args$numProbes,
segmentDistance_cna = args$segmentDistance_cna,
minSegSize_cna = args$minSegSize_cna,
genomeSize_fga = args$genomeSize_fga
)

metrics$tai[is.na(metrics$tai)] <- 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this? A data.frame? This is really nit-picking, but if it is so, let's use either tidyverse syntax or the other, not both.


write_tsv(metrics, args$output)
cat("Output file:", args$output, "\n")
20 changes: 20 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,26 @@ process {
].join(' ')
}

withName: COMPUTE_CINMETRICS {
publishDir = [
path: { "${params.outdir}/cinmetrics/" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
ext.args = [
"--segmentMean_tai ${params.cin_segmentMean_tai}",
"--segmentMean_cna ${params.cin_segmentMean_cna}",
"--segmentMean_base_segments ${params.cin_segmentMean_base_segments}",
"--segmentMean_break_points ${params.cin_segmentMean_break_points}",
"--segmentMean_fga ${params.cin_segmentMean_fga}",
"--segmentDistance_cna ${params.cin_segmentDistance_cna}",
"--minSegSize_cna ${params.cin_minSegSize_cna}",
"--genomeSize_fga ${params.cin_genomeSize_fga}",
params.cin_numProbes ? "--numProbes ${params.cin_numProbes}" : "",
].join(' ')
ext.when = params.compute_cinmetrics
}

withName: GISTIC2 {
publishDir = [
path: { "${params.outdir}/gistic/" },
Expand Down
31 changes: 31 additions & 0 deletions modules/local/cinmetrics/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
process COMPUTE_CINMETRICS{
tag "Compute CINmetrics"
label 'process_single'

container 'ghcr.io/dincalcilab/cinmetrics:0.1.0'

input:
file(seg_file)

output:
path("*.tsv"), emit: cinmetrics_summary
path("versions.yml"), emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def VERSION = "0.1"
"""
compute_cinmetrics.R \\
--seg_file $seg_file \\
--output cinmetrics_summary_mqc.tsv \\
$args

cat << END_VERSIONS > versions.yml
"${task.process}":
CINmetrics: ${VERSION}
END_VERSIONS
"""
}
12 changes: 12 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,18 @@ params {
hrdcna_compute_score = false
hrdcna_threshold = 0.2

// CINmetrics
compute_cinmetrics = false
cin_segmentMean_tai = 0.2
cin_segmentMean_cna = (Math.log(1.7)/Math.log(2) - 1)
cin_segmentMean_base_segments = 0.2
cin_segmentMean_break_points = 0.2
cin_segmentMean_fga = 0.2
cin_numProbes = null
cin_segmentDistance_cna = 0.2
cin_minSegSize_cna = 10
cin_genomeSize_fga = 2873203431

// Gistic2
run_gistic = false
gistic_t_amp = 0.1
Expand Down
59 changes: 59 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,62 @@
"description": "Threshold to classify samples into 'HRD' or 'HRP'."
}
}
},
"cinmetrics_options": {
"title": "CINmetrics options",
"type": "object",
"description": "Options for computing chromosomal instability metrics using the CINmetrics R package.",
"default": "",
"properties": {
"compute_cinmetrics": {
"type": "boolean",
"description": "Compute chromosomal instability metrics using CINmetrics."
},
"cin_segmentMean_tai": {
"type": "number",
"default": 0.2,
"description": "Segment mean threshold for TAI calculation."
},
"cin_segmentMean_cna": {
"type": "number",
"default": 0.2,
"description": "Segment mean threshold for CNA count."
},
"cin_segmentMean_base_segments": {
"type": "number",
"default": 0.2,
"description": "Segment mean threshold for number of base segments."
},
"cin_segmentMean_break_points": {
"type": "number",
"default": 0.2,
"description": "Segment mean threshold for breakpoint count."
},
"cin_segmentMean_fga": {
"type": "number",
"default": 0.2,
"description": "Segment mean threshold for FGA calculation."
},
"cin_segmentDistance_cna": {
"type": "number",
"default": 10000000,
"description": "Minimum distance between segments (in bp) for CNA merging."
},
"cin_minSegSize_cna": {
"type": "number",
"default": 1000000,
"description": "Minimum segment size (in bp) to consider for CNA."
},
"cin_genomeSize_fga": {
"type": "number",
"default": 3000000000,
"description": "Genome size used for FGA normalization."
},
"cin_numProbes": {
"type": "number",
"description": "Optional number of probes per segment (if applicable)."
}
}
}
},
"allOf": [
Expand Down Expand Up @@ -796,6 +852,9 @@
},
{
"$ref": "#/$defs/hrdcna_score"
},
{
"$ref": "#/$defs/cinmetrics_options"
}
]
}
54 changes: 31 additions & 23 deletions workflows/samurai.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_samur
//

include { CIN_SIGNATURE_QUANTIFICATION } from '../modules/local/cin_signature_quantification/main'
include { COMPUTE_CINMETRICS } from '../modules/local/cinmetrics/main'
include { SOLID_BIOPSY } from '../subworkflows/local/solid_biopsy/main'
include { SIZE_SELECTION } from '../subworkflows/local/size_selection/main'
include { LIQUID_BIOPSY } from '../subworkflows/local/liquid_biopsy/main'
Expand Down Expand Up @@ -186,29 +187,29 @@ workflow SAMURAI {
}

// QC metrics about alignment (coverage, etc.)
BAM_QC_PICARD(
ch_bam_bai.map { meta, bam, bai ->
[meta, bam, bai, [], []]
},
ch_fasta,
ch_fai,
ch_dict,
[[], []] /* fasta_gzi */
)

ch_versions = ch_versions.mix(
BAM_QC_PICARD.out.versions.first()
)
ch_multiqc_files = ch_multiqc_files.mix(
BAM_QC_PICARD.out.coverage_metrics.collect { _meta, metrics ->
metrics
}
)
ch_multiqc_files = ch_multiqc_files.mix(
BAM_QC_PICARD.out.multiple_metrics.collect { _meta, metrics ->
metrics
}
)
//BAM_QC_PICARD(
// ch_bam_bai.map { meta, bam, bai ->
// [meta, bam, bai, [], []]
// },
// ch_fasta,
// ch_fai,
// ch_dict,
// [[], []] /* fasta_gzi */
//)
//
//ch_versions = ch_versions.mix(
// BAM_QC_PICARD.out.versions.first()
//)
//ch_multiqc_files = ch_multiqc_files.mix(
// BAM_QC_PICARD.out.coverage_metrics.collect { _meta, metrics ->
// metrics
// }
//)
//ch_multiqc_files = ch_multiqc_files.mix(
// BAM_QC_PICARD.out.multiple_metrics.collect { _meta, metrics ->
// metrics
// }
//)

// CN Calling

Expand Down Expand Up @@ -286,6 +287,13 @@ workflow SAMURAI {
error("Unknown / unsupported analysis ${analysis_type}")
}

// Run CINmetrics if specified
if (params.compute_cinmetrics) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You use ext.when, so this check is unnecessary.

COMPUTE_CINMETRICS(gistic_file)
ch_versions = ch_versions.mix(COMPUTE_CINMETRICS.out.versions)
ch_multiqc_files = ch_multiqc_files.mix(COMPUTE_CINMETRICS.out.cinmetrics_summary)
}

// Run GISTIC if specified
if (run_gistic) {
RUN_GISTIC(gistic_file, genome)
Expand Down
Loading