From 9b3a10d5e0f8d6c027c4a5f11d7c2029ace4ec96 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 13 Feb 2024 08:53:21 -0600 Subject: [PATCH 01/17] Add workflow output schema Signed-off-by: Ben Sherman --- output_schema.json | 125 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 output_schema.json diff --git a/output_schema.json b/output_schema.json new file mode 100644 index 00000000..cae8b4d7 --- /dev/null +++ b/output_schema.json @@ -0,0 +1,125 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/output_schema.json", + "title": "nf-core/fetchngs pipeline outputs", + "description": "", + "type": "object", + "properties": { + "fastq": { + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string" + }, + "fastq_1": { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" + }, + "fastq_2": { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" + }, + "run_accession": { + "type": "string" + }, + "experiment_accession": { + "type": "string" + }, + "sample_accession": { + "type": "string" + }, + "secondary_sample_accession": { + "type": "string" + }, + "study_accession": { + "type": "string" + }, + "secondary_study_accession": { + "type": "string" + }, + "submission_accession": { + "type": "string" + }, + "run_alias": { + "type": "string" + }, + "experiment_alias": { + "type": "string" + }, + "sample_alias": { + "type": "string" + }, + "study_alias": { + "type": "string" + }, + "library_layout": { + "type": "string" + }, + "library_selection": { + "type": "string" + }, + "library_source": { + "type": "string" + }, + "library_strategy": { + "type": "string" + }, + "library_name": { + "type": "string" + }, + "instrument_model": { + "type": "string" + }, + "instrument_platform": { + "type": "string" + }, + "base_count": { + "type": "integer" + }, + "read_count": { + "type": "integer" + }, + "tax_id": { + "type": "string" + }, + "scientific_name": { + "type": "string" + }, + "sample_title": { + "type": "string" + }, + "experiment_title": { + "type": "string" + }, + "study_title": { + "type": "string" + }, + "sample_description": { + "type": "string" + }, + "fastq_md5": { + "type": "string", + "pattern": "^[0-9a-f]{32}$" + }, + "fastq_bytes": { + "type": "integer" + }, + "fastq_ftp": { + "type": "string" + }, + "fastq_galaxy": { + "type": "string" + }, + "fastq_aspera": { + "type": "string" + } + }, + "required": ["sample", "fastq_1"] + } + } + } +} From b03ed3b2aeb409312e3ca1ce80427baa033b908e Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 28 Feb 2024 11:34:33 -0600 Subject: [PATCH 02/17] Convert output schema to YAML Signed-off-by: Ben Sherman --- output.yml | 110 +++++++++++++++++++++++++++++++++++++++ output_schema.json | 125 --------------------------------------------- 2 files changed, 110 insertions(+), 125 deletions(-) create mode 100644 output.yml delete mode 100644 output_schema.json diff --git a/output.yml b/output.yml new file mode 100644 index 00000000..f80f1ba5 --- /dev/null +++ b/output.yml @@ -0,0 +1,110 @@ +$schema: 'http://json-schema.org/draft-07/schema' +$id: 'https://raw.githubusercontent.com/nf-core/fetchngs/master/output.yml' +title: 'nf-core/fetchngs pipeline outputs' +description: '' +type: object +properties: + id_mappings: + type: array + items: + type: object + properties: + sample: + type: string + experiment_accession: + type: string + run_accession: + type: string + sample_accession: + type: string + experiment_alias: + type: string + run_alias: + type: string + sample_alias: + type: string + experiment_title: + type: string + sample_title: + type: string + sample_description: + type: string + samplesheet: + type: array + items: + type: object + properties: + sample: + type: string + fastq_1: + type: string + format: file-path + pattern: '^\\S+\\.f(ast)?q\\.gz$' + fastq_2: + type: string + format: file-path + pattern: '^\\S+\\.f(ast)?q\\.gz$' + run_accession: + type: string + experiment_accession: + type: string + sample_accession: + type: string + secondary_sample_accession: + type: string + study_accession: + type: string + secondary_study_accession: + type: string + submission_accession: + type: string + run_alias: + type: string + experiment_alias: + type: string + sample_alias: + type: string + study_alias: + type: string + library_layout: + type: string + library_selection: + type: string + library_source: + type: string + library_strategy: + type: string + library_name: + type: string + instrument_model: + type: string + instrument_platform: + type: string + base_count: + type: integer + read_count: + type: integer + tax_id: + type: string + scientific_name: + type: string + sample_title: + type: string + experiment_title: + type: string + study_title: + type: string + sample_description: + type: string + fastq_md5: + type: string + pattern: '^[0-9a-f]{32}$' + fastq_bytes: + type: integer + fastq_ftp: + type: string + fastq_galaxy: + type: string + fastq_aspera: + type: string + required: ['sample', 'fastq_1'] diff --git a/output_schema.json b/output_schema.json deleted file mode 100644 index cae8b4d7..00000000 --- a/output_schema.json +++ /dev/null @@ -1,125 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/output_schema.json", - "title": "nf-core/fetchngs pipeline outputs", - "description": "", - "type": "object", - "properties": { - "fastq": { - "type": "array", - "items": { - "type": "object", - "properties": { - "sample": { - "type": "string" - }, - "fastq_1": { - "type": "string", - "format": "file-path", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - "fastq_2": { - "type": "string", - "format": "file-path", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - "run_accession": { - "type": "string" - }, - "experiment_accession": { - "type": "string" - }, - "sample_accession": { - "type": "string" - }, - "secondary_sample_accession": { - "type": "string" - }, - "study_accession": { - "type": "string" - }, - "secondary_study_accession": { - "type": "string" - }, - "submission_accession": { - "type": "string" - }, - "run_alias": { - "type": "string" - }, - "experiment_alias": { - "type": "string" - }, - "sample_alias": { - "type": "string" - }, - "study_alias": { - "type": "string" - }, - "library_layout": { - "type": "string" - }, - "library_selection": { - "type": "string" - }, - "library_source": { - "type": "string" - }, - "library_strategy": { - "type": "string" - }, - "library_name": { - "type": "string" - }, - "instrument_model": { - "type": "string" - }, - "instrument_platform": { - "type": "string" - }, - "base_count": { - "type": "integer" - }, - "read_count": { - "type": "integer" - }, - "tax_id": { - "type": "string" - }, - "scientific_name": { - "type": "string" - }, - "sample_title": { - "type": "string" - }, - "experiment_title": { - "type": "string" - }, - "study_title": { - "type": "string" - }, - "sample_description": { - "type": "string" - }, - "fastq_md5": { - "type": "string", - "pattern": "^[0-9a-f]{32}$" - }, - "fastq_bytes": { - "type": "integer" - }, - "fastq_ftp": { - "type": "string" - }, - "fastq_galaxy": { - "type": "string" - }, - "fastq_aspera": { - "type": "string" - } - }, - "required": ["sample", "fastq_1"] - } - } - } -} From b8dd9e285f3cdc2c4da0633e5e9a7da168877894 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 28 Feb 2024 12:06:26 -0600 Subject: [PATCH 03/17] Add output definition Signed-off-by: Ben Sherman --- conf/base.config | 6 --- main.nf | 40 +++++++++++++++++++ modules/local/aspera_cli/nextflow.config | 12 ------ .../multiqc_mappings_config/nextflow.config | 9 ----- modules/local/sra_fastq_ftp/nextflow.config | 12 ------ .../local/sra_ids_to_runinfo/nextflow.config | 8 ---- .../local/sra_runinfo_to_ftp/nextflow.config | 9 ----- .../local/sra_to_samplesheet/nextflow.config | 8 ---- .../tests/nextflow.config | 2 - .../sratools/fasterqdump/nextflow.config | 5 --- .../nf-core/sratools/prefetch/nextflow.config | 8 ---- .../nextflow.config | 1 - workflows/sra/nextflow.config | 5 --- 13 files changed, 40 insertions(+), 85 deletions(-) delete mode 100644 modules/local/multiqc_mappings_config/nextflow.config delete mode 100644 modules/local/sra_ids_to_runinfo/nextflow.config delete mode 100644 modules/local/sra_runinfo_to_ftp/nextflow.config delete mode 100644 modules/local/sra_to_samplesheet/nextflow.config delete mode 100644 modules/nf-core/sratools/prefetch/nextflow.config diff --git a/conf/base.config b/conf/base.config index 6af79a7b..6af45542 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,12 +14,6 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' diff --git a/main.nf b/main.nf index b5b499d5..b461a4e3 100644 --- a/main.nf +++ b/main.nf @@ -83,6 +83,46 @@ workflow { ) } +output { + path params.outdir + + collect('fastq') { + select('ASPERA_CLI|SRA_FASTQ_FTP|SRATOOLS_FASTERQDUMP') { + path 'fastq' + pattern '*.fastq.gz' + } + + select('ASPERA_CLI|SRA_FASTQ_FTP') { + path 'fastq/md5' + pattern '*.md5' + } + } + + collect('metadata') { + path 'metadata' + select('SRA_RUNINFO_TO_FTP') { + pattern '*.tsv' + } + } + + collect('samplesheet') { + path 'samplesheet' + select('MULTIQC_MAPPINGS_CONFIG') { + pattern 'multiqc_config.yml' + } + + // index { + // format 'csv' + // path 'id_mappings.csv' + // } + + // index { + // format 'csv' + // path 'samplesheet.csv' + // } + } +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/modules/local/aspera_cli/nextflow.config b/modules/local/aspera_cli/nextflow.config index fa2dbd90..9a808242 100644 --- a/modules/local/aspera_cli/nextflow.config +++ b/modules/local/aspera_cli/nextflow.config @@ -1,17 +1,5 @@ process { withName: 'ASPERA_CLI' { ext.args = '-QT -l 300m -P33001' - publishDir = [ - [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ], - [ - path: { "${params.outdir}/fastq/md5" }, - mode: params.publish_dir_mode, - pattern: "*.md5" - ] - ] } } diff --git a/modules/local/multiqc_mappings_config/nextflow.config b/modules/local/multiqc_mappings_config/nextflow.config deleted file mode 100644 index 11c58341..00000000 --- a/modules/local/multiqc_mappings_config/nextflow.config +++ /dev/null @@ -1,9 +0,0 @@ -process { - withName: 'MULTIQC_MAPPINGS_CONFIG' { - publishDir = [ - path: { "${params.outdir}/samplesheet" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} diff --git a/modules/local/sra_fastq_ftp/nextflow.config b/modules/local/sra_fastq_ftp/nextflow.config index 56e43959..26261f26 100644 --- a/modules/local/sra_fastq_ftp/nextflow.config +++ b/modules/local/sra_fastq_ftp/nextflow.config @@ -1,17 +1,5 @@ process { withName: 'SRA_FASTQ_FTP' { ext.args = '-t 5 -nv -c -T 60' - publishDir = [ - [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ], - [ - path: { "${params.outdir}/fastq/md5" }, - mode: params.publish_dir_mode, - pattern: "*.md5" - ] - ] } } diff --git a/modules/local/sra_ids_to_runinfo/nextflow.config b/modules/local/sra_ids_to_runinfo/nextflow.config deleted file mode 100644 index 9b9d0b16..00000000 --- a/modules/local/sra_ids_to_runinfo/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: 'SRA_IDS_TO_RUNINFO' { - publishDir = [ - path: { "${params.outdir}/metadata" }, - enabled: false - ] - } -} diff --git a/modules/local/sra_runinfo_to_ftp/nextflow.config b/modules/local/sra_runinfo_to_ftp/nextflow.config deleted file mode 100644 index 43263648..00000000 --- a/modules/local/sra_runinfo_to_ftp/nextflow.config +++ /dev/null @@ -1,9 +0,0 @@ -process { - withName: 'SRA_RUNINFO_TO_FTP' { - publishDir = [ - path: { "${params.outdir}/metadata" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} diff --git a/modules/local/sra_to_samplesheet/nextflow.config b/modules/local/sra_to_samplesheet/nextflow.config deleted file mode 100644 index da241c1a..00000000 --- a/modules/local/sra_to_samplesheet/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: SRA_TO_SAMPLESHEET { - publishDir = [ - path: { "${params.outdir}/samplesheet" }, - enabled: false - ] - } -} diff --git a/modules/nf-core/custom/sratoolsncbisettings/tests/nextflow.config b/modules/nf-core/custom/sratoolsncbisettings/tests/nextflow.config index c4a96e94..df5def04 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/tests/nextflow.config +++ b/modules/nf-core/custom/sratoolsncbisettings/tests/nextflow.config @@ -4,8 +4,6 @@ params.settings_file = "${params.settings_path}/user-settings.mkfg" env.NCBI_SETTINGS = params.settings_file process { - - publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } withName: CUSTOM_SRATOOLSNCBISETTINGS { containerOptions = { (workflow.containerEngine == 'singularity') ? diff --git a/modules/nf-core/sratools/fasterqdump/nextflow.config b/modules/nf-core/sratools/fasterqdump/nextflow.config index f98b140d..7e1649d1 100644 --- a/modules/nf-core/sratools/fasterqdump/nextflow.config +++ b/modules/nf-core/sratools/fasterqdump/nextflow.config @@ -1,10 +1,5 @@ process { withName: SRATOOLS_FASTERQDUMP { ext.args = '--split-files --include-technical' - publishDir = [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ] } } \ No newline at end of file diff --git a/modules/nf-core/sratools/prefetch/nextflow.config b/modules/nf-core/sratools/prefetch/nextflow.config deleted file mode 100644 index a2ca8848..00000000 --- a/modules/nf-core/sratools/prefetch/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: SRATOOLS_PREFETCH { - publishDir = [ - path: { "${params.outdir}/sra" }, - enabled: false - ] - } -} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config index de803a38..187faf6d 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config @@ -1,2 +1 @@ -includeConfig '../../../modules/nf-core/sratools/prefetch/nextflow.config' includeConfig '../../../modules/nf-core/sratools/fasterqdump/nextflow.config' diff --git a/workflows/sra/nextflow.config b/workflows/sra/nextflow.config index d242c238..522b05b8 100644 --- a/workflows/sra/nextflow.config +++ b/workflows/sra/nextflow.config @@ -1,8 +1,3 @@ -includeConfig "../../modules/local/multiqc_mappings_config/nextflow.config" includeConfig "../../modules/local/aspera_cli/nextflow.config" includeConfig "../../modules/local/sra_fastq_ftp/nextflow.config" -includeConfig "../../modules/local/sra_ids_to_runinfo/nextflow.config" -includeConfig "../../modules/local/sra_runinfo_to_ftp/nextflow.config" -includeConfig "../../modules/local/sra_to_samplesheet/nextflow.config" -includeConfig "../../modules/nf-core/sratools/prefetch/nextflow.config" includeConfig "../../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config" From 98b65742349422847bd9288551dd3caaccc36763 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 28 Feb 2024 15:01:42 -0600 Subject: [PATCH 04/17] Simplify output DSL, samplesheet generation Signed-off-by: Ben Sherman --- main.nf | 41 +++------- modules/local/sra_to_samplesheet/main.nf | 95 ++++++++++++------------ workflows/sra/main.nf | 23 +----- 3 files changed, 62 insertions(+), 97 deletions(-) diff --git a/main.nf b/main.nf index b461a4e3..b37f0644 100644 --- a/main.nf +++ b/main.nf @@ -84,42 +84,23 @@ workflow { } output { - path params.outdir + path(params.outdir) { + path('fastq') { + select 'ASPERA_CLI|SRA_FASTQ_FTP|SRATOOLS_FASTERQDUMP', pattern: '*.fastq.gz' - collect('fastq') { - select('ASPERA_CLI|SRA_FASTQ_FTP|SRATOOLS_FASTERQDUMP') { - path 'fastq' - pattern '*.fastq.gz' + path('md5') { + select 'ASPERA_CLI|SRA_FASTQ_FTP', pattern: '*.md5' + } } - select('ASPERA_CLI|SRA_FASTQ_FTP') { - path 'fastq/md5' - pattern '*.md5' + path('metadata') { + select 'SRA_RUNINFO_TO_FTP', pattern: '*.csv' } - } - - collect('metadata') { - path 'metadata' - select('SRA_RUNINFO_TO_FTP') { - pattern '*.tsv' - } - } - collect('samplesheet') { - path 'samplesheet' - select('MULTIQC_MAPPINGS_CONFIG') { - pattern 'multiqc_config.yml' + path('samplesheet') { + select 'SRA_TO_SAMPLESHEET' + select 'MULTIQC_MAPPINGS_CONFIG', pattern: 'multiqc_config.yml' } - - // index { - // format 'csv' - // path 'id_mappings.csv' - // } - - // index { - // format 'csv' - // path 'samplesheet.csv' - // } } } diff --git a/modules/local/sra_to_samplesheet/main.nf b/modules/local/sra_to_samplesheet/main.nf index 92edf5df..5a491fbb 100644 --- a/modules/local/sra_to_samplesheet/main.nf +++ b/modules/local/sra_to_samplesheet/main.nf @@ -1,75 +1,76 @@ process SRA_TO_SAMPLESHEET { - tag "$meta.id" - executor 'local' memory 100.MB input: - val meta + val samples val pipeline val strandedness val mapping_fields output: - tuple val(meta), path("*samplesheet.csv"), emit: samplesheet - tuple val(meta), path("*mappings.csv") , emit: mappings + path("samplesheet.csv"), emit: samplesheet + path("id_mappings.csv"), emit: mappings exec: - // - // Create samplesheet containing metadata - // - // Remove custom keys needed to download the data - def meta_clone = meta.clone() - meta_clone.remove("id") - meta_clone.remove("fastq_1") - meta_clone.remove("fastq_2") - meta_clone.remove("md5_1") - meta_clone.remove("md5_2") - meta_clone.remove("single_end") + def records = samples.collect { meta -> + def meta_clone = meta.clone() + meta_clone.remove("id") + meta_clone.remove("fastq_1") + meta_clone.remove("fastq_2") + meta_clone.remove("md5_1") + meta_clone.remove("md5_2") + meta_clone.remove("single_end") - // Add relevant fields to the beginning of the map - pipeline_map = [ - sample : "${meta.id.split('_')[0..-2].join('_')}", - fastq_1 : meta.fastq_1, - fastq_2 : meta.fastq_2 - ] + // Add relevant fields to the beginning of the map + def record = [ + sample : "${meta.id.split('_')[0..-2].join('_')}", + fastq_1 : meta.fastq_1, + fastq_2 : meta.fastq_2 + ] - // Add nf-core pipeline specific entries - if (pipeline) { - if (pipeline == 'rnaseq') { - pipeline_map << [ strandedness: strandedness ] - } else if (pipeline == 'atacseq') { - pipeline_map << [ replicate: 1 ] - } else if (pipeline == 'taxprofiler') { - pipeline_map << [ fasta: '' ] + // Add nf-core pipeline specific entries + if (pipeline) { + if (pipeline == 'rnaseq') { + record << [ strandedness: strandedness ] + } else if (pipeline == 'atacseq') { + record << [ replicate: 1 ] + } else if (pipeline == 'taxprofiler') { + record << [ fasta: '' ] + } } + record << meta_clone } - pipeline_map << meta_clone - // Create a samplesheet - samplesheet = pipeline_map.keySet().collect{ '"' + it + '"'}.join(",") + '\n' - samplesheet += pipeline_map.values().collect{ '"' + it + '"'}.join(",") + // + // Create samplesheet containing metadata + // + def samplesheet_lines = [] + samplesheet_lines << records.first().keySet().collect{ '"' + it + '"'}.join(",") + records.each { record -> + samplesheet_lines << record.values().collect{ '"' + it + '"'}.join(",") + } - // Write samplesheet to file - def samplesheet_file = task.workDir.resolve("${meta.id}.samplesheet.csv") - samplesheet_file.text = samplesheet + def samplesheet_file = task.workDir.resolve("samplesheet.csv") + samplesheet_file.text = samplesheet_lines.join('\n') // // Create sample id mappings file // - mappings_map = pipeline_map.clone() def fields = mapping_fields ? ['sample'] + mapping_fields.split(',').collect{ it.trim().toLowerCase() } : [] - if ((mappings_map.keySet() + fields).unique().size() != mappings_map.keySet().size()) { - error("Invalid option for '--sample_mapping_fields': ${mapping_fields}.\nValid options: ${mappings_map.keySet().join(', ')}") - } - // Create mappings - mappings = fields.collect{ '"' + it + '"'}.join(",") + '\n' - mappings += mappings_map.subMap(fields).values().collect{ '"' + it + '"'}.join(",") + def mapping_lines = [] + mapping_lines << fields.collect{ '"' + it + '"'}.join(",") + records.each { record -> + def mappings_map = record.clone() + if ((mappings_map.keySet() + fields).unique().size() != mappings_map.keySet().size()) { + error("Invalid option for '--sample_mapping_fields': ${mapping_fields}.\nValid options: ${mappings_map.keySet().join(', ')}") + } + mapping_lines << mappings_map.subMap(fields).values().collect{ '"' + it + '"'}.join(",") + } - // Write mappings to file - def mappings_file = task.workDir.resolve("${meta.id}.mappings.csv") - mappings_file.text = mappings + def mappings_file = task.workDir.resolve("id_mappings.csv") + mappings_file.text = mapping_lines.join('\n') } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 35ae18dc..ece96205 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -140,30 +140,13 @@ workflow SRA { // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet // SRA_TO_SAMPLESHEET ( - ch_sra_metadata, + ch_sra_metadata.collect(), params.nf_core_pipeline ?: '', params.nf_core_rnaseq_strandedness ?: 'auto', params.sample_mapping_fields ) - - // Merge samplesheets and mapping files across all samples - SRA_TO_SAMPLESHEET - .out - .samplesheet - .map { it[1] } - .collectFile(name:'tmp_samplesheet.csv', newLine: true, keepHeader: true, sort: { it.baseName }) - .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'samplesheet.csv', storeDir: "${params.outdir}/samplesheet") - .set { ch_samplesheet } - - SRA_TO_SAMPLESHEET - .out - .mappings - .map { it[1] } - .collectFile(name:'tmp_id_mappings.csv', newLine: true, keepHeader: true, sort: { it.baseName }) - .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'id_mappings.csv', storeDir: "${params.outdir}/samplesheet") - .set { ch_mappings } + ch_samplesheet = SRA_TO_SAMPLESHEET.out.samplesheet + ch_mappings = SRA_TO_SAMPLESHEET.out.mappings // // MODULE: Create a MutiQC config file with sample name mappings From 5647bd1d647f4b8af5950b2ec90b1b36aa15daaf Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 28 Feb 2024 16:16:45 -0600 Subject: [PATCH 05/17] Flatten output definition Signed-off-by: Ben Sherman --- main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index b37f0644..fb3fdb82 100644 --- a/main.nf +++ b/main.nf @@ -87,10 +87,10 @@ output { path(params.outdir) { path('fastq') { select 'ASPERA_CLI|SRA_FASTQ_FTP|SRATOOLS_FASTERQDUMP', pattern: '*.fastq.gz' + } - path('md5') { - select 'ASPERA_CLI|SRA_FASTQ_FTP', pattern: '*.md5' - } + path('fastq/md5') { + select 'ASPERA_CLI|SRA_FASTQ_FTP', pattern: '*.md5' } path('metadata') { From 4405674601797c8814031c7aca4f8be7369797ce Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 28 Feb 2024 16:22:37 -0600 Subject: [PATCH 06/17] Fix bug Signed-off-by: Ben Sherman --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index fb3fdb82..ed7fe445 100644 --- a/main.nf +++ b/main.nf @@ -94,7 +94,7 @@ output { } path('metadata') { - select 'SRA_RUNINFO_TO_FTP', pattern: '*.csv' + select 'SRA_RUNINFO_TO_FTP', pattern: '*.tsv' } path('samplesheet') { From 47fa3f9b685c6e14f6d1f50d61bcb7946bd11ef1 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 28 Feb 2024 19:25:27 -0600 Subject: [PATCH 07/17] Add default publish mode Signed-off-by: Ben Sherman --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index ed7fe445..43d2a7b5 100644 --- a/main.nf +++ b/main.nf @@ -84,7 +84,7 @@ workflow { } output { - path(params.outdir) { + path(params.outdir, mode: params.publish_dir_mode) { path('fastq') { select 'ASPERA_CLI|SRA_FASTQ_FTP|SRATOOLS_FASTERQDUMP', pattern: '*.fastq.gz' } From ec29439c791a622f7e8136196545f62873d336a0 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 29 Feb 2024 12:15:54 -0600 Subject: [PATCH 08/17] Add `toSamplesheet()` method to generate samplesheet Signed-off-by: Ben Sherman --- modules/local/sra_to_samplesheet/main.nf | 39 ++++++++++++++---------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/modules/local/sra_to_samplesheet/main.nf b/modules/local/sra_to_samplesheet/main.nf index 5a491fbb..5159aee5 100644 --- a/modules/local/sra_to_samplesheet/main.nf +++ b/modules/local/sra_to_samplesheet/main.nf @@ -1,4 +1,20 @@ +/** + * Save a list of records to a samplesheet file. + * + * @param records + * @param path + */ +def toSamplesheet(List records, Path path) { + def lines = [] + lines << records.first().keySet().collect{ '"' + it + '"'}.join(",") + records.each { record -> + lines << record.values().collect{ '"' + it + '"'}.join(",") + } + + path.text = lines.join('\n') +} + process SRA_TO_SAMPLESHEET { executor 'local' memory 100.MB @@ -42,35 +58,26 @@ process SRA_TO_SAMPLESHEET { } } record << meta_clone + record } // // Create samplesheet containing metadata // - def samplesheet_lines = [] - samplesheet_lines << records.first().keySet().collect{ '"' + it + '"'}.join(",") - records.each { record -> - samplesheet_lines << record.values().collect{ '"' + it + '"'}.join(",") - } - def samplesheet_file = task.workDir.resolve("samplesheet.csv") - samplesheet_file.text = samplesheet_lines.join('\n') + toSamplesheet(records, samplesheet_file) // // Create sample id mappings file // def fields = mapping_fields ? ['sample'] + mapping_fields.split(',').collect{ it.trim().toLowerCase() } : [] - - def mapping_lines = [] - mapping_lines << fields.collect{ '"' + it + '"'}.join(",") - records.each { record -> - def mappings_map = record.clone() - if ((mappings_map.keySet() + fields).unique().size() != mappings_map.keySet().size()) { - error("Invalid option for '--sample_mapping_fields': ${mapping_fields}.\nValid options: ${mappings_map.keySet().join(', ')}") + def mapping_records = records.collect { record -> + if ((record.keySet() + fields).unique().size() != record.keySet().size()) { + error("Invalid option for '--sample_mapping_fields': ${mapping_fields}.\nValid options: ${record.keySet().join(', ')}") } - mapping_lines << mappings_map.subMap(fields).values().collect{ '"' + it + '"'}.join(",") + record.subMap(fields) } def mappings_file = task.workDir.resolve("id_mappings.csv") - mappings_file.text = mapping_lines.join('\n') + toSamplesheet(mapping_records, mappings_file) } From 48c6675fe3d88913355123545ad30dbebbfc9a00 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 29 Feb 2024 12:51:37 -0600 Subject: [PATCH 09/17] Move output file schemas to separate files Signed-off-by: Ben Sherman --- assets/schema_mappings.yml | 28 +++++++++ assets/schema_samplesheet.yml | 81 +++++++++++++++++++++++++ output.yml | 110 +++------------------------------- 3 files changed, 117 insertions(+), 102 deletions(-) create mode 100644 assets/schema_mappings.yml create mode 100644 assets/schema_samplesheet.yml diff --git a/assets/schema_mappings.yml b/assets/schema_mappings.yml new file mode 100644 index 00000000..7c9b8452 --- /dev/null +++ b/assets/schema_mappings.yml @@ -0,0 +1,28 @@ +$schema: 'http://json-schema.org/draft-07/schema' +$id: 'https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_mappings.yml' +title: 'nf-core/fetchngs pipeline - id_mappings.csv schema' +description: 'Schema for the mappings file produced by fetchngs' +type: array +items: + type: object + properties: + sample: + type: string + experiment_accession: + type: string + run_accession: + type: string + sample_accession: + type: string + experiment_alias: + type: string + run_alias: + type: string + sample_alias: + type: string + experiment_title: + type: string + sample_title: + type: string + sample_description: + type: string diff --git a/assets/schema_samplesheet.yml b/assets/schema_samplesheet.yml new file mode 100644 index 00000000..bb9a7e78 --- /dev/null +++ b/assets/schema_samplesheet.yml @@ -0,0 +1,81 @@ +$schema: 'http://json-schema.org/draft-07/schema' +$id: 'https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_mappings.yml' +title: 'nf-core/fetchngs pipeline - samplesheet.csv schema' +description: 'Schema for the samplesheet file produced by fetchngs' +type: array +items: + type: object + properties: + sample: + type: string + fastq_1: + type: string + format: file-path + pattern: '^\\S+\\.f(ast)?q\\.gz$' + fastq_2: + type: string + format: file-path + pattern: '^\\S+\\.f(ast)?q\\.gz$' + run_accession: + type: string + experiment_accession: + type: string + sample_accession: + type: string + secondary_sample_accession: + type: string + study_accession: + type: string + secondary_study_accession: + type: string + submission_accession: + type: string + run_alias: + type: string + experiment_alias: + type: string + sample_alias: + type: string + study_alias: + type: string + library_layout: + type: string + library_selection: + type: string + library_source: + type: string + library_strategy: + type: string + library_name: + type: string + instrument_model: + type: string + instrument_platform: + type: string + base_count: + type: integer + read_count: + type: integer + tax_id: + type: string + scientific_name: + type: string + sample_title: + type: string + experiment_title: + type: string + study_title: + type: string + sample_description: + type: string + fastq_md5: + type: string + pattern: '^[0-9a-f]{32}$' + fastq_bytes: + type: integer + fastq_ftp: + type: string + fastq_galaxy: + type: string + fastq_aspera: + type: string diff --git a/output.yml b/output.yml index f80f1ba5..84d49b17 100644 --- a/output.yml +++ b/output.yml @@ -5,106 +5,12 @@ description: '' type: object properties: id_mappings: - type: array - items: - type: object - properties: - sample: - type: string - experiment_accession: - type: string - run_accession: - type: string - sample_accession: - type: string - experiment_alias: - type: string - run_alias: - type: string - sample_alias: - type: string - experiment_title: - type: string - sample_title: - type: string - sample_description: - type: string + type: string + format: file-path + mimetype: text/csv + schema: assets/schema_mappings.yml samplesheet: - type: array - items: - type: object - properties: - sample: - type: string - fastq_1: - type: string - format: file-path - pattern: '^\\S+\\.f(ast)?q\\.gz$' - fastq_2: - type: string - format: file-path - pattern: '^\\S+\\.f(ast)?q\\.gz$' - run_accession: - type: string - experiment_accession: - type: string - sample_accession: - type: string - secondary_sample_accession: - type: string - study_accession: - type: string - secondary_study_accession: - type: string - submission_accession: - type: string - run_alias: - type: string - experiment_alias: - type: string - sample_alias: - type: string - study_alias: - type: string - library_layout: - type: string - library_selection: - type: string - library_source: - type: string - library_strategy: - type: string - library_name: - type: string - instrument_model: - type: string - instrument_platform: - type: string - base_count: - type: integer - read_count: - type: integer - tax_id: - type: string - scientific_name: - type: string - sample_title: - type: string - experiment_title: - type: string - study_title: - type: string - sample_description: - type: string - fastq_md5: - type: string - pattern: '^[0-9a-f]{32}$' - fastq_bytes: - type: integer - fastq_ftp: - type: string - fastq_galaxy: - type: string - fastq_aspera: - type: string - required: ['sample', 'fastq_1'] + type: string + format: file-path + mimetype: text/csv + schema: assets/schema_samplesheet.yml From 9beb5eae2c550847e33db2065f848aea12b36968 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 29 Feb 2024 12:56:12 -0600 Subject: [PATCH 10/17] Add `schema` option to validate published outputs against a schema Signed-off-by: Ben Sherman --- main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 43d2a7b5..cb56e57b 100644 --- a/main.nf +++ b/main.nf @@ -98,7 +98,8 @@ output { } path('samplesheet') { - select 'SRA_TO_SAMPLESHEET' + select 'SRA_TO_SAMPLESHEET', pattern: 'samplesheet.csv', schema: 'assets/schema_samplesheet.yml' + select 'SRA_TO_SAMPLESHEET', pattern: 'id_mappings.csv', schema: 'assets/schema_mappings.yml' select 'MULTIQC_MAPPINGS_CONFIG', pattern: 'multiqc_config.yml' } } From 5b3587bada27f7be8e4b492c7b54dd5f9f79d1a9 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 19 Mar 2024 16:56:58 -0500 Subject: [PATCH 11/17] Replace process selectors with channel selectors Signed-off-by: Ben Sherman --- main.nf | 27 ++++-- modules/local/sra_to_samplesheet/main.nf | 104 +++++++++++------------ workflows/sra/main.nf | 38 ++++++++- 3 files changed, 103 insertions(+), 66 deletions(-) diff --git a/main.nf b/main.nf index cb56e57b..d21092a6 100644 --- a/main.nf +++ b/main.nf @@ -34,6 +34,17 @@ workflow NFCORE_FETCHNGS { // SRA ( ids ) + emit: + runinfo_tsv = SRA.out.runinfo_tsv + fastq = SRA.out.fastq + fastq_md5 = SRA.out.fastq_md5 + samplesheet = SRA.out.samplesheet + mappings = SRA.out.mappings + sample_mappings = SRA.out.sample_mappings + sra_metadata = SRA.out.sra_metadata + versions = SRA.out.versions + versions_yml = SRA.out.versions_yml + } /* @@ -86,21 +97,25 @@ workflow { output { path(params.outdir, mode: params.publish_dir_mode) { path('fastq') { - select 'ASPERA_CLI|SRA_FASTQ_FTP|SRATOOLS_FASTERQDUMP', pattern: '*.fastq.gz' + select NFCORE_FETCHNGS.out.fastq } path('fastq/md5') { - select 'ASPERA_CLI|SRA_FASTQ_FTP', pattern: '*.md5' + select NFCORE_FETCHNGS.out.fastq_md5 } path('metadata') { - select 'SRA_RUNINFO_TO_FTP', pattern: '*.tsv' + select NFCORE_FETCHNGS.out.runinfo_tsv + } + + path('pipeline_info') { + select NFCORE_FETCHNGS.out.versions_yml } path('samplesheet') { - select 'SRA_TO_SAMPLESHEET', pattern: 'samplesheet.csv', schema: 'assets/schema_samplesheet.yml' - select 'SRA_TO_SAMPLESHEET', pattern: 'id_mappings.csv', schema: 'assets/schema_mappings.yml' - select 'MULTIQC_MAPPINGS_CONFIG', pattern: 'multiqc_config.yml' + select NFCORE_FETCHNGS.out.samplesheet, schema: 'assets/schema_samplesheet.yml' + select NFCORE_FETCHNGS.out.mappings, schema: 'assets/schema_mappings.yml' + select NFCORE_FETCHNGS.out.sample_mappings } } } diff --git a/modules/local/sra_to_samplesheet/main.nf b/modules/local/sra_to_samplesheet/main.nf index 5159aee5..92edf5df 100644 --- a/modules/local/sra_to_samplesheet/main.nf +++ b/modules/local/sra_to_samplesheet/main.nf @@ -1,83 +1,75 @@ -/** - * Save a list of records to a samplesheet file. - * - * @param records - * @param path - */ -def toSamplesheet(List records, Path path) { - def lines = [] - lines << records.first().keySet().collect{ '"' + it + '"'}.join(",") - records.each { record -> - lines << record.values().collect{ '"' + it + '"'}.join(",") - } - - path.text = lines.join('\n') -} - process SRA_TO_SAMPLESHEET { + tag "$meta.id" + executor 'local' memory 100.MB input: - val samples + val meta val pipeline val strandedness val mapping_fields output: - path("samplesheet.csv"), emit: samplesheet - path("id_mappings.csv"), emit: mappings + tuple val(meta), path("*samplesheet.csv"), emit: samplesheet + tuple val(meta), path("*mappings.csv") , emit: mappings exec: + // + // Create samplesheet containing metadata + // + // Remove custom keys needed to download the data - def records = samples.collect { meta -> - def meta_clone = meta.clone() - meta_clone.remove("id") - meta_clone.remove("fastq_1") - meta_clone.remove("fastq_2") - meta_clone.remove("md5_1") - meta_clone.remove("md5_2") - meta_clone.remove("single_end") + def meta_clone = meta.clone() + meta_clone.remove("id") + meta_clone.remove("fastq_1") + meta_clone.remove("fastq_2") + meta_clone.remove("md5_1") + meta_clone.remove("md5_2") + meta_clone.remove("single_end") - // Add relevant fields to the beginning of the map - def record = [ - sample : "${meta.id.split('_')[0..-2].join('_')}", - fastq_1 : meta.fastq_1, - fastq_2 : meta.fastq_2 - ] + // Add relevant fields to the beginning of the map + pipeline_map = [ + sample : "${meta.id.split('_')[0..-2].join('_')}", + fastq_1 : meta.fastq_1, + fastq_2 : meta.fastq_2 + ] - // Add nf-core pipeline specific entries - if (pipeline) { - if (pipeline == 'rnaseq') { - record << [ strandedness: strandedness ] - } else if (pipeline == 'atacseq') { - record << [ replicate: 1 ] - } else if (pipeline == 'taxprofiler') { - record << [ fasta: '' ] - } + // Add nf-core pipeline specific entries + if (pipeline) { + if (pipeline == 'rnaseq') { + pipeline_map << [ strandedness: strandedness ] + } else if (pipeline == 'atacseq') { + pipeline_map << [ replicate: 1 ] + } else if (pipeline == 'taxprofiler') { + pipeline_map << [ fasta: '' ] } - record << meta_clone - record } + pipeline_map << meta_clone - // - // Create samplesheet containing metadata - // - def samplesheet_file = task.workDir.resolve("samplesheet.csv") - toSamplesheet(records, samplesheet_file) + // Create a samplesheet + samplesheet = pipeline_map.keySet().collect{ '"' + it + '"'}.join(",") + '\n' + samplesheet += pipeline_map.values().collect{ '"' + it + '"'}.join(",") + + // Write samplesheet to file + def samplesheet_file = task.workDir.resolve("${meta.id}.samplesheet.csv") + samplesheet_file.text = samplesheet // // Create sample id mappings file // + mappings_map = pipeline_map.clone() def fields = mapping_fields ? ['sample'] + mapping_fields.split(',').collect{ it.trim().toLowerCase() } : [] - def mapping_records = records.collect { record -> - if ((record.keySet() + fields).unique().size() != record.keySet().size()) { - error("Invalid option for '--sample_mapping_fields': ${mapping_fields}.\nValid options: ${record.keySet().join(', ')}") - } - record.subMap(fields) + if ((mappings_map.keySet() + fields).unique().size() != mappings_map.keySet().size()) { + error("Invalid option for '--sample_mapping_fields': ${mapping_fields}.\nValid options: ${mappings_map.keySet().join(', ')}") } - def mappings_file = task.workDir.resolve("id_mappings.csv") - toSamplesheet(mapping_records, mappings_file) + // Create mappings + mappings = fields.collect{ '"' + it + '"'}.join(",") + '\n' + mappings += mappings_map.subMap(fields).values().collect{ '"' + it + '"'}.join(",") + + // Write mappings to file + def mappings_file = task.workDir.resolve("${meta.id}.mappings.csv") + mappings_file.text = mappings } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index ece96205..6a3b8fda 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -54,6 +54,7 @@ workflow SRA { SRA_RUNINFO_TO_FTP .out .tsv + .tap { ch_runinfo_tsv } .splitCsv(header:true, sep:'\t') .map { meta -> @@ -123,6 +124,7 @@ workflow SRA { .fastq .mix(SRA_FASTQ_FTP.out.fastq) .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) + .tap { ch_fastq } .map { meta, fastq -> def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] @@ -134,19 +136,42 @@ workflow SRA { return meta_clone } .set { ch_sra_metadata } + + ASPERA_CLI + .out + .md5 + .mix(SRA_FASTQ_FTP.out.md5) + .set { ch_fastq_md5 } } // // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet // SRA_TO_SAMPLESHEET ( - ch_sra_metadata.collect(), + ch_sra_metadata, params.nf_core_pipeline ?: '', params.nf_core_rnaseq_strandedness ?: 'auto', params.sample_mapping_fields ) - ch_samplesheet = SRA_TO_SAMPLESHEET.out.samplesheet - ch_mappings = SRA_TO_SAMPLESHEET.out.mappings + + // Merge samplesheets and mapping files across all samples + SRA_TO_SAMPLESHEET + .out + .samplesheet + .map { it[1] } + .collectFile(name:'tmp_samplesheet.csv', newLine: true, keepHeader: true, sort: { it.baseName }) + .map { it.text.tokenize('\n').join('\n') } + .collectFile(name:'samplesheet.csv') + .set { ch_samplesheet } + + SRA_TO_SAMPLESHEET + .out + .mappings + .map { it[1] } + .collectFile(name:'tmp_id_mappings.csv', newLine: true, keepHeader: true, sort: { it.baseName }) + .map { it.text.tokenize('\n').join('\n') } + .collectFile(name:'id_mappings.csv') + .set { ch_mappings } // // MODULE: Create a MutiQC config file with sample name mappings @@ -164,14 +189,19 @@ workflow SRA { // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) + .collectFile(name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_versions_yml } emit: + runinfo_tsv = ch_runinfo_tsv + fastq = ch_fastq + fastq_md5 = ch_fastq_md5 samplesheet = ch_samplesheet mappings = ch_mappings sample_mappings = ch_sample_mappings_yml sra_metadata = ch_sra_metadata versions = ch_versions.unique() + versions_yml = ch_versions_yml } /* From 9e8ef57327e4990b597995dbabc4e568a7f9c272 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 27 Mar 2024 12:51:28 -0500 Subject: [PATCH 12/17] Use channel topics to reduce channel propagation Signed-off-by: Ben Sherman --- main.nf | 49 ++++++++++++++++++++----------------------- workflows/sra/main.nf | 17 +++++---------- 2 files changed, 28 insertions(+), 38 deletions(-) diff --git a/main.nf b/main.nf index d21092a6..66f658c6 100644 --- a/main.nf +++ b/main.nf @@ -10,6 +10,7 @@ */ nextflow.enable.dsl = 2 +nextflow.preview.topic = true /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -35,15 +36,11 @@ workflow NFCORE_FETCHNGS { SRA ( ids ) emit: - runinfo_tsv = SRA.out.runinfo_tsv - fastq = SRA.out.fastq - fastq_md5 = SRA.out.fastq_md5 samplesheet = SRA.out.samplesheet mappings = SRA.out.mappings sample_mappings = SRA.out.sample_mappings sra_metadata = SRA.out.sra_metadata versions = SRA.out.versions - versions_yml = SRA.out.versions_yml } @@ -95,28 +92,28 @@ workflow { } output { - path(params.outdir, mode: params.publish_dir_mode) { - path('fastq') { - select NFCORE_FETCHNGS.out.fastq - } - - path('fastq/md5') { - select NFCORE_FETCHNGS.out.fastq_md5 - } - - path('metadata') { - select NFCORE_FETCHNGS.out.runinfo_tsv - } - - path('pipeline_info') { - select NFCORE_FETCHNGS.out.versions_yml - } - - path('samplesheet') { - select NFCORE_FETCHNGS.out.samplesheet, schema: 'assets/schema_samplesheet.yml' - select NFCORE_FETCHNGS.out.mappings, schema: 'assets/schema_mappings.yml' - select NFCORE_FETCHNGS.out.sample_mappings - } + directory params.outdir, mode: params.publish_dir_mode + + 'fastq' { + topic 'fastq' + } + + 'fastq/md5' { + topic 'md5' + } + + 'metadata' { + topic 'runinfo-tsv' + } + + 'pipeline_info' { + topic 'versions-yml' + } + + 'samplesheet' { + select NFCORE_FETCHNGS.out.samplesheet, schema: 'assets/schema_samplesheet.yml' + select NFCORE_FETCHNGS.out.mappings, schema: 'assets/schema_mappings.yml' + select NFCORE_FETCHNGS.out.sample_mappings } } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 6a3b8fda..2b119a6f 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -54,7 +54,7 @@ workflow SRA { SRA_RUNINFO_TO_FTP .out .tsv - .tap { ch_runinfo_tsv } + // .topic('runinfo-tsv') .splitCsv(header:true, sep:'\t') .map { meta -> @@ -124,7 +124,7 @@ workflow SRA { .fastq .mix(SRA_FASTQ_FTP.out.fastq) .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) - .tap { ch_fastq } + // .topic('fastq') .map { meta, fastq -> def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] @@ -137,11 +137,8 @@ workflow SRA { } .set { ch_sra_metadata } - ASPERA_CLI - .out - .md5 - .mix(SRA_FASTQ_FTP.out.md5) - .set { ch_fastq_md5 } + ASPERA_CLI.out.md5.topic('md5') + SRA_FASTQ_FTP.out.md5.topic('md5') } // @@ -190,18 +187,14 @@ workflow SRA { // softwareVersionsToYAML(ch_versions) .collectFile(name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_versions_yml } + .topic('versions-yml') emit: - runinfo_tsv = ch_runinfo_tsv - fastq = ch_fastq - fastq_md5 = ch_fastq_md5 samplesheet = ch_samplesheet mappings = ch_mappings sample_mappings = ch_sample_mappings_yml sra_metadata = ch_sra_metadata versions = ch_versions.unique() - versions_yml = ch_versions_yml } /* From 35aaddd7b951f6dc30f4d6220e1ec0f525cae832 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 27 Mar 2024 21:17:22 -0500 Subject: [PATCH 13/17] Temporary workaround for topic operator bug Signed-off-by: Ben Sherman --- main.nf | 4 ++-- workflows/sra/main.nf | 13 +++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index 66f658c6..f2388d58 100644 --- a/main.nf +++ b/main.nf @@ -111,8 +111,8 @@ output { } 'samplesheet' { - select NFCORE_FETCHNGS.out.samplesheet, schema: 'assets/schema_samplesheet.yml' - select NFCORE_FETCHNGS.out.mappings, schema: 'assets/schema_mappings.yml' + select NFCORE_FETCHNGS.out.samplesheet // , schema: 'assets/schema_samplesheet.yml' + select NFCORE_FETCHNGS.out.mappings // , schema: 'assets/schema_mappings.yml' select NFCORE_FETCHNGS.out.sample_mappings } } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 2b119a6f..249c56b3 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -54,7 +54,11 @@ workflow SRA { SRA_RUNINFO_TO_FTP .out .tsv - // .topic('runinfo-tsv') + .topic('runinfo-tsv') + + SRA_RUNINFO_TO_FTP + .out + .tsv .splitCsv(header:true, sep:'\t') .map { meta -> @@ -124,7 +128,12 @@ workflow SRA { .fastq .mix(SRA_FASTQ_FTP.out.fastq) .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) - // .topic('fastq') + .set { ch_fastq } + + ch_fastq + .topic('fastq') + + ch_fastq .map { meta, fastq -> def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] From 77bdd190fc0f7b3d5ae726fc390e915ca0813b39 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 30 Mar 2024 09:38:56 -0500 Subject: [PATCH 14/17] Apply updates from upstream Signed-off-by: Ben Sherman --- main.nf | 14 +++++++------- workflows/sra/main.nf | 24 +++++++++--------------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/main.nf b/main.nf index f2388d58..3c73d88b 100644 --- a/main.nf +++ b/main.nf @@ -95,25 +95,25 @@ output { directory params.outdir, mode: params.publish_dir_mode 'fastq' { - topic 'fastq' + from 'fastq' } 'fastq/md5' { - topic 'md5' + from 'md5' } 'metadata' { - topic 'runinfo-tsv' + from 'runinfo-tsv' } 'pipeline_info' { - topic 'versions-yml' + from 'versions-yml' } 'samplesheet' { - select NFCORE_FETCHNGS.out.samplesheet // , schema: 'assets/schema_samplesheet.yml' - select NFCORE_FETCHNGS.out.mappings // , schema: 'assets/schema_mappings.yml' - select NFCORE_FETCHNGS.out.sample_mappings + from NFCORE_FETCHNGS.out.samplesheet // , schema: 'assets/schema_samplesheet.yml' + from NFCORE_FETCHNGS.out.mappings // , schema: 'assets/schema_mappings.yml' + from NFCORE_FETCHNGS.out.sample_mappings } } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 249c56b3..087e2937 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -51,11 +51,6 @@ workflow SRA { ) ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first()) - SRA_RUNINFO_TO_FTP - .out - .tsv - .topic('runinfo-tsv') - SRA_RUNINFO_TO_FTP .out .tsv @@ -128,12 +123,7 @@ workflow SRA { .fastq .mix(SRA_FASTQ_FTP.out.fastq) .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) - .set { ch_fastq } - - ch_fastq - .topic('fastq') - - ch_fastq + .tap { ch_fastq } .map { meta, fastq -> def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] @@ -145,9 +135,6 @@ workflow SRA { return meta_clone } .set { ch_sra_metadata } - - ASPERA_CLI.out.md5.topic('md5') - SRA_FASTQ_FTP.out.md5.topic('md5') } // @@ -196,7 +183,14 @@ workflow SRA { // softwareVersionsToYAML(ch_versions) .collectFile(name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) - .topic('versions-yml') + .set { ch_versions_yml } + + topic: + SRA_RUNINFO_TO_FTP.out.tsv >> 'runinfo-tsv' + ch_fastq >> 'fastq' + ASPERA_CLI.out.md5 >> 'md5' + SRA_FASTQ_FTP.out.md5 >> 'md5' + ch_versions_yml >> 'versions-yml' emit: samplesheet = ch_samplesheet From 3ca8a28fb02be2f0966d338adeb6f0534f35055e Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 10 Apr 2024 09:06:15 -0500 Subject: [PATCH 15/17] Update using latest syntax Signed-off-by: Ben Sherman --- main.nf | 33 ++------------------------------- workflows/sra/main.nf | 17 ++++++++++------- 2 files changed, 12 insertions(+), 38 deletions(-) diff --git a/main.nf b/main.nf index 3c73d88b..ff0b5b98 100644 --- a/main.nf +++ b/main.nf @@ -10,7 +10,6 @@ */ nextflow.enable.dsl = 2 -nextflow.preview.topic = true /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -35,13 +34,6 @@ workflow NFCORE_FETCHNGS { // SRA ( ids ) - emit: - samplesheet = SRA.out.samplesheet - mappings = SRA.out.mappings - sample_mappings = SRA.out.sample_mappings - sra_metadata = SRA.out.sra_metadata - versions = SRA.out.versions - } /* @@ -92,29 +84,8 @@ workflow { } output { - directory params.outdir, mode: params.publish_dir_mode - - 'fastq' { - from 'fastq' - } - - 'fastq/md5' { - from 'md5' - } - - 'metadata' { - from 'runinfo-tsv' - } - - 'pipeline_info' { - from 'versions-yml' - } - - 'samplesheet' { - from NFCORE_FETCHNGS.out.samplesheet // , schema: 'assets/schema_samplesheet.yml' - from NFCORE_FETCHNGS.out.mappings // , schema: 'assets/schema_mappings.yml' - from NFCORE_FETCHNGS.out.sample_mappings - } + directory params.outdir + mode params.publish_dir_mode } /* diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 087e2937..854195a8 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -185,19 +185,22 @@ workflow SRA { .collectFile(name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) .set { ch_versions_yml } - topic: - SRA_RUNINFO_TO_FTP.out.tsv >> 'runinfo-tsv' - ch_fastq >> 'fastq' - ASPERA_CLI.out.md5 >> 'md5' - SRA_FASTQ_FTP.out.md5 >> 'md5' - ch_versions_yml >> 'versions-yml' - emit: samplesheet = ch_samplesheet mappings = ch_mappings sample_mappings = ch_sample_mappings_yml sra_metadata = ch_sra_metadata versions = ch_versions.unique() + + publish: + ch_fastq >> 'fastq/' + ASPERA_CLI.out.md5 >> 'fastq/md5/' + SRA_FASTQ_FTP.out.md5 >> 'fastq/md5/' + SRA_RUNINFO_TO_FTP.out.tsv >> 'metadata/' + ch_versions_yml >> 'pipeline_info/' + ch_samplesheet >> 'samplesheet/' + ch_mappings >> 'samplesheet/' + ch_sample_mappings_yml >> 'samplesheet/' } /* From bb7febb9e1a847dfa47e59c59c664a5080828d43 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 24 Apr 2024 01:33:33 -0500 Subject: [PATCH 16/17] Rename output -> publish Signed-off-by: Ben Sherman --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index ff0b5b98..cd0a1369 100644 --- a/main.nf +++ b/main.nf @@ -83,7 +83,7 @@ workflow { ) } -output { +publish { directory params.outdir mode params.publish_dir_mode } From c01bd0c215d1d1b0b55c099ecaa2e8c0c376664e Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 16 May 2024 21:28:44 -0500 Subject: [PATCH 17/17] minor updates Signed-off-by: Ben Sherman --- main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index cd0a1369..b11373f5 100644 --- a/main.nf +++ b/main.nf @@ -10,6 +10,7 @@ */ nextflow.enable.dsl = 2 +nextflow.preview.output = true /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -83,7 +84,7 @@ workflow { ) } -publish { +output { directory params.outdir mode params.publish_dir_mode }