diff --git a/CHANGELOG.md b/CHANGELOG.md index 3492b936..5c2cf5c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - GT field is necessary for rtgtools vcfeval, strelka reports no GT field for somatic analysis. Fixing the GT field setup for strelka (bcftools plugingtset doesnt work as GT is not available) [#279](https://github.com/nf-core/variantbenchmarking/pull/279) - Using nf-metro to create better metromap [#281](https://github.com/nf-core/variantbenchmarking/pull/281) - Remove unneccesery reheadering of the vcf outputs of rtgtools and truvari [#286](https://github.com/nf-core/variantbenchmarking/pull/286) +- Fixing bugs: bnd vcfeval output files producing tags with [] and reformatting header before merging in ensemble analysis [#297](https://github.com/nf-core/variantbenchmarking/pull/297) ### `Dependencies` diff --git a/conf/modules.config b/conf/modules.config index c7a28ce4..5d5144a4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -287,6 +287,14 @@ process { ] } + withName: BCFTOOLS_UNIFY_HEADER { + ext.prefix = {"${meta.id}.unify"} + ext.args = {"-x FORMAT/AD,FORMAT/PL --output-type z --write-index=tbi"} + publishDir = [ + enabled: false + ] + } + withName: REFORMAT_TRUTH { ext.prefix = { input[0].baseName + '.reformatted' } ext.suffix = "vcf" diff --git a/subworkflows/local/concordance_analysis/main.nf b/subworkflows/local/concordance_analysis/main.nf index ca8ac0c9..2e5e698b 100644 --- a/subworkflows/local/concordance_analysis/main.nf +++ b/subworkflows/local/concordance_analysis/main.nf @@ -53,8 +53,9 @@ workflow CONCORDANCE_ANALYSIS { } ch_bed_input = bed_ch - .map { file -> tuple(["id": "intervals"], file) } - .ifEmpty([[:], []]) + .map { file -> [ [id: "intervals"], file ] } + .collect() + .ifEmpty( [ [id: "intervals"], [] ] ) // GATK4 concordance does not support structural variants now - GATK4 SVCONCORDANCE is in beta GATK4_CONCORDANCE( diff --git a/subworkflows/local/ensemble_test_vcfs/main.nf b/subworkflows/local/ensemble_test_vcfs/main.nf index 965d2f90..8f423b05 100644 --- a/subworkflows/local/ensemble_test_vcfs/main.nf +++ b/subworkflows/local/ensemble_test_vcfs/main.nf @@ -13,6 +13,7 @@ include { GAWK as REFORMAT_TRUTH_SV } from '../../../modules/nf-core/g include { GAWK as INJECT_MISSING_GT } from '../../../modules/nf-core/gawk' include { BCFTOOLS_SORT as BCFTOOLS_SORT_SV } from '../../../modules/nf-core/bcftools/sort' include { TABIX_BGZIP as TABIX_BGZIP_UNZIP } from '../../../modules/nf-core/tabix/bgzip' +include { BCFTOOLS_ANNOTATE as BCFTOOLS_UNIFY_HEADER } from '../../../modules/nf-core/bcftools/annotate' workflow ENSEMLE_TEST_VCFS { take: @@ -22,8 +23,14 @@ workflow ENSEMLE_TEST_VCFS { main: + // unify header for callers + BCFTOOLS_UNIFY_HEADER( + test_vcfs.map{meta, vcf, index -> [meta, vcf, index, [], []]}, + [],[],[] + ) + // if the benchmarking method is rtgtools, missing GT field is already filled in VCF preperation step, so no need to inject missing GT field - test_vcfs.branch { meta, vcf, index -> + BCFTOOLS_UNIFY_HEADER.out.vcf.join(BCFTOOLS_UNIFY_HEADER.out.tbi).branch { meta, vcf, index -> def is_rtg = params.method?.contains("rtgtools") def is_strelka_manta = ['strelka', 'manta'].contains(meta.caller.toLowerCase()) def is_somatic = params.analysis == "somatic" diff --git a/workflows/variantbenchmarking.nf b/workflows/variantbenchmarking.nf index ed633442..c6f27161 100644 --- a/workflows/variantbenchmarking.nf +++ b/workflows/variantbenchmarking.nf @@ -288,21 +288,13 @@ workflow VARIANTBENCHMARKING { ch_reports = ch_reports.mix(RTGTOOLS_BNDEVAL.out.summary .map { _meta, file -> tuple([vartype: params.variant_type] + [benchmark_tool: "rtgtools"], file) } .groupTuple()) - evals_ch = evals_ch.mix(RTGTOOLS_BNDEVAL.out.fn_vcf, - RTGTOOLS_BNDEVAL.out.fp_vcf, - RTGTOOLS_BNDEVAL.out.baseline_vcf, - RTGTOOLS_BNDEVAL.out.tp_vcf) - .map { meta, file -> - def mapping = [ - 'fn': 'FN', - 'fp': 'FP', - 'tp-baseline': 'TP_base', - 'tp': 'TP_comp' - ] - def tag = file.getName().tokenize('.').find { token -> token in ['fn', 'fp', 'tp-baseline', 'tp'] } - def transformedTag = mapping[tag] ?: tag - tuple( [ meta + [vartype: params.variant_type, id: "rtgtools", tag: transformedTag]], file) - } + + evals_ch = evals_ch.mix( + RTGTOOLS_BNDEVAL.out.fn_vcf.map { _meta, file -> tuple([vartype: params.variant_type] + [tag: "FN"] + [id: "rtgtools"], file) }, + RTGTOOLS_BNDEVAL.out.fp_vcf.map { _meta, file -> tuple([vartype: params.variant_type] + [tag: "FP"] + [id: "rtgtools"], file) }, + RTGTOOLS_BNDEVAL.out.baseline_vcf.map { _meta, file -> tuple([vartype: params.variant_type] + [tag: "TP_base"] + [id: "rtgtools"], file) }, + RTGTOOLS_BNDEVAL.out.tp_vcf.map { _meta, file -> tuple([vartype: params.variant_type] + [tag: "TP_comp"] + [id: "rtgtools"], file) } + ) } }