diff --git a/subworkflows/local/create_input_channel/main.nf b/subworkflows/local/create_input_channel/main.nf index 96d28f6..fd30d71 100644 --- a/subworkflows/local/create_input_channel/main.nf +++ b/subworkflows/local/create_input_channel/main.nf @@ -19,6 +19,11 @@ workflow CREATE_INPUT_CHANNEL { exit(1, "ERROR: Unsupported --local_input_type '${params.local_input_type}'. Supported values: ${allowedLocalInputTypes.join(', ')}") } + // Known raw-data extensions; order matters, so strip longest/compound ones first + // so 'sample.d.zip' -> 'sample', not 'sample.d'. + def knownRawExts = ['.d.tar.gz', '.d.tar', '.d.zip', '.mzML.gz', '.raw.gz', + '.mzML', '.raw', '.dia', '.d'] + // Always parse as SDRF using DIA-NN converter SDRF_PARSING(ch_sdrf) ch_versions = ch_versions.mix(SDRF_PARSING.out.versions) @@ -39,9 +44,24 @@ workflow CREATE_INPUT_CHANNEL { } else { filestr = row.Filename.toString() filestr = params.root_folder + File.separator + filestr - filestr = (params.local_input_type - ? filestr.take(filestr.lastIndexOf('.')) + '.' + params.local_input_type - : filestr) + if (params.local_input_type) { + // Strip the longest matching known raw-data extension (covers + // compound suffixes like .d.zip / .d.tar.gz from the SDRF), + // then append the target extension. + def stem = filestr + def stemLower = stem.toLowerCase() + + def matched = knownRawExts.find { ext -> + stemLower.endsWith(ext.toLowerCase()) + } + + if (matched) { + stem = stem.substring(0, stem.length() - matched.length()) + } else if (stem.lastIndexOf('.') > 0) { + stem = stem.take(stem.lastIndexOf('.')) + } + filestr = stem + '.' + params.local_input_type + } } return [filestr, experiment_id, row] }