From f531c5d548e2bd213748abad274781a7930ce699 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 26 Apr 2024 19:52:30 -0500 Subject: [PATCH 01/25] Replace ext/publishDir with params/publish definition Signed-off-by: Ben Sherman --- conf/base.config | 6 ----- main.nf | 5 ++++ modules/local/aspera_cli/main.nf | 2 +- modules/local/aspera_cli/nextflow.config | 17 ------------ .../multiqc_mappings_config/nextflow.config | 9 ------- modules/local/sra_fastq_ftp/main.nf | 2 +- modules/local/sra_fastq_ftp/nextflow.config | 17 ------------ .../local/sra_ids_to_runinfo/nextflow.config | 8 ------ .../local/sra_runinfo_to_ftp/nextflow.config | 9 ------- .../local/sra_to_samplesheet/nextflow.config | 8 ------ modules/nf-core/sratools/fasterqdump/main.nf | 8 +++--- .../sratools/fasterqdump/nextflow.config | 10 ------- .../fasterqdump/tests/nextflow.config | 5 ---- .../nf-core/sratools/prefetch/nextflow.config | 8 ------ nextflow.config | 12 ++++----- .../main.nf | 10 ++++++- .../nextflow.config | 2 -- workflows/sra/main.nf | 27 ++++++++++++++----- workflows/sra/nextflow.config | 8 ------ 19 files changed, 46 insertions(+), 127 deletions(-) delete mode 100644 modules/local/aspera_cli/nextflow.config delete mode 100644 modules/local/multiqc_mappings_config/nextflow.config delete mode 100644 modules/local/sra_fastq_ftp/nextflow.config delete mode 100644 modules/local/sra_ids_to_runinfo/nextflow.config delete mode 100644 modules/local/sra_runinfo_to_ftp/nextflow.config delete mode 100644 modules/local/sra_to_samplesheet/nextflow.config delete mode 100644 modules/nf-core/sratools/fasterqdump/nextflow.config delete mode 100644 modules/nf-core/sratools/fasterqdump/tests/nextflow.config delete mode 100644 modules/nf-core/sratools/prefetch/nextflow.config delete mode 100644 subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config delete mode 100644 workflows/sra/nextflow.config diff --git a/conf/base.config b/conf/base.config index 6af79a7b..6af45542 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,12 +14,6 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' diff --git a/main.nf b/main.nf index 52539e40..e178d8a5 100644 --- a/main.nf +++ b/main.nf @@ -86,6 +86,11 @@ workflow { ) } +publish { + directory params.outdir + mode params.publish_dir_mode +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index b38d17c0..36882cdb 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -10,6 +10,7 @@ process ASPERA_CLI { input: tuple val(meta), val(fastq) val user + var args output: tuple val(meta), path("*fastq.gz"), emit: fastq @@ -17,7 +18,6 @@ process ASPERA_CLI { path "versions.yml" , emit: versions script: - def args = task.ext.args ?: '' def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" if (meta.single_end) { """ diff --git a/modules/local/aspera_cli/nextflow.config b/modules/local/aspera_cli/nextflow.config deleted file mode 100644 index fa2dbd90..00000000 --- a/modules/local/aspera_cli/nextflow.config +++ /dev/null @@ -1,17 +0,0 @@ -process { - withName: 'ASPERA_CLI' { - ext.args = '-QT -l 300m -P33001' - publishDir = [ - [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ], - [ - path: { "${params.outdir}/fastq/md5" }, - mode: params.publish_dir_mode, - pattern: "*.md5" - ] - ] - } -} diff --git a/modules/local/multiqc_mappings_config/nextflow.config b/modules/local/multiqc_mappings_config/nextflow.config deleted file mode 100644 index 11c58341..00000000 --- a/modules/local/multiqc_mappings_config/nextflow.config +++ /dev/null @@ -1,9 +0,0 @@ -process { - withName: 'MULTIQC_MAPPINGS_CONFIG' { - publishDir = [ - path: { "${params.outdir}/samplesheet" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index e2274d46..f7100055 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -11,6 +11,7 @@ process SRA_FASTQ_FTP { input: tuple val(meta), val(fastq) + val args output: tuple val(meta), path("*fastq.gz"), emit: fastq @@ -18,7 +19,6 @@ process SRA_FASTQ_FTP { path "versions.yml" , emit: versions script: - def args = task.ext.args ?: '' if (meta.single_end) { """ wget \\ diff --git a/modules/local/sra_fastq_ftp/nextflow.config b/modules/local/sra_fastq_ftp/nextflow.config deleted file mode 100644 index 56e43959..00000000 --- a/modules/local/sra_fastq_ftp/nextflow.config +++ /dev/null @@ -1,17 +0,0 @@ -process { - withName: 'SRA_FASTQ_FTP' { - ext.args = '-t 5 -nv -c -T 60' - publishDir = [ - [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ], - [ - path: { "${params.outdir}/fastq/md5" }, - mode: params.publish_dir_mode, - pattern: "*.md5" - ] - ] - } -} diff --git a/modules/local/sra_ids_to_runinfo/nextflow.config b/modules/local/sra_ids_to_runinfo/nextflow.config deleted file mode 100644 index 9b9d0b16..00000000 --- a/modules/local/sra_ids_to_runinfo/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: 'SRA_IDS_TO_RUNINFO' { - publishDir = [ - path: { "${params.outdir}/metadata" }, - enabled: false - ] - } -} diff --git a/modules/local/sra_runinfo_to_ftp/nextflow.config b/modules/local/sra_runinfo_to_ftp/nextflow.config deleted file mode 100644 index 43263648..00000000 --- a/modules/local/sra_runinfo_to_ftp/nextflow.config +++ /dev/null @@ -1,9 +0,0 @@ -process { - withName: 'SRA_RUNINFO_TO_FTP' { - publishDir = [ - path: { "${params.outdir}/metadata" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} diff --git a/modules/local/sra_to_samplesheet/nextflow.config b/modules/local/sra_to_samplesheet/nextflow.config deleted file mode 100644 index da241c1a..00000000 --- a/modules/local/sra_to_samplesheet/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: SRA_TO_SAMPLESHEET { - publishDir = [ - path: { "${params.outdir}/samplesheet" }, - enabled: false - ] - } -} diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index e7cf157a..ca94a925 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -11,6 +11,8 @@ process SRATOOLS_FASTERQDUMP { tuple val(meta), path(sra) path ncbi_settings path certificate + val fasterqdump_args // = '--split-files --include-technical' + val pigz_args // = '' output: tuple val(meta), path('*.fastq.gz'), emit: reads @@ -20,8 +22,6 @@ process SRATOOLS_FASTERQDUMP { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def outfile = meta.single_end ? "${prefix}.fastq" : prefix def key_file = '' @@ -34,14 +34,14 @@ process SRATOOLS_FASTERQDUMP { export NCBI_SETTINGS="\$PWD/${ncbi_settings}" fasterq-dump \\ - $args \\ + $fasterqdump_args \\ --threads $task.cpus \\ --outfile $outfile \\ ${key_file} \\ ${sra} pigz \\ - $args2 \\ + $pigz_args \\ --no-name \\ --processes $task.cpus \\ *.fastq diff --git a/modules/nf-core/sratools/fasterqdump/nextflow.config b/modules/nf-core/sratools/fasterqdump/nextflow.config deleted file mode 100644 index f98b140d..00000000 --- a/modules/nf-core/sratools/fasterqdump/nextflow.config +++ /dev/null @@ -1,10 +0,0 @@ -process { - withName: SRATOOLS_FASTERQDUMP { - ext.args = '--split-files --include-technical' - publishDir = [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ] - } -} \ No newline at end of file diff --git a/modules/nf-core/sratools/fasterqdump/tests/nextflow.config b/modules/nf-core/sratools/fasterqdump/tests/nextflow.config deleted file mode 100644 index 23e4100b..00000000 --- a/modules/nf-core/sratools/fasterqdump/tests/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: SRATOOLS_FASTERQDUMP { - ext.args = '' - } -} \ No newline at end of file diff --git a/modules/nf-core/sratools/prefetch/nextflow.config b/modules/nf-core/sratools/prefetch/nextflow.config deleted file mode 100644 index a2ca8848..00000000 --- a/modules/nf-core/sratools/prefetch/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: SRATOOLS_PREFETCH { - publishDir = [ - path: { "${params.outdir}/sra" }, - enabled: false - ] - } -} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 7f4f8ebf..bad8464d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,6 +19,11 @@ params { skip_fastq_download = false dbgap_key = null + aspera_cli_args = '-QT -l 300m -P33001' + sra_fastq_ftp_args = '-t 5 -nv -c -T 60' + sratools_fasterqdump_args = '' + sratools_pigz_args = '' + // Boilerplate options outdir = null publish_dir_mode = 'copy' @@ -67,15 +72,11 @@ try { System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } -// Workflow specific configs -includeConfig './workflows/sra/nextflow.config' - profiles { debug { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false - nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -193,9 +194,6 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -// Disable process selector warnings by default. Use debug profile to enable warnings. -nextflow.enable.configProcessNamesValidation = false - def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index fbeacf4a..256c8104 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -9,6 +9,8 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: ch_sra_ids // channel: [ val(meta), val(id) ] ch_dbgap_key // channel: [ path(dbgap_key) ] + sratools_fasterqdump_args // string + sratools_pigz_args // string main: @@ -30,7 +32,13 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // // Convert the SRA format into one or more compressed FASTQ files. // - SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra, ch_ncbi_settings, ch_dbgap_key ) + SRATOOLS_FASTERQDUMP ( + SRATOOLS_PREFETCH.out.sra, + ch_ncbi_settings, + ch_dbgap_key, + sratools_fasterqdump_args, + sratools_pigz_args + ) ch_versions = ch_versions.mix(SRATOOLS_FASTERQDUMP.out.versions.first()) emit: diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config deleted file mode 100644 index de803a38..00000000 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config +++ /dev/null @@ -1,2 +0,0 @@ -includeConfig '../../../modules/nf-core/sratools/prefetch/nextflow.config' -includeConfig '../../../modules/nf-core/sratools/fasterqdump/nextflow.config' diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 0c8cac0c..8cdd6845 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -93,7 +93,8 @@ workflow SRA { // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums // SRA_FASTQ_FTP ( - ch_sra_reads.ftp + ch_sra_reads.ftp, + params.sra_fastq_ftp_args ) ch_versions = ch_versions.mix(SRA_FASTQ_FTP.out.versions.first()) @@ -102,7 +103,9 @@ workflow SRA { // FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( ch_sra_reads.sratools, - params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : [] + params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : [], + params.sratools_fasterqdump_args, + params.sratools_pigz_args ) ch_versions = ch_versions.mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.versions.first()) @@ -111,7 +114,8 @@ workflow SRA { // ASPERA_CLI ( ch_sra_reads.aspera, - 'era-fasp' + 'era-fasp', + params.aspera_cli_args ) ch_versions = ch_versions.mix(ASPERA_CLI.out.versions.first()) @@ -121,6 +125,7 @@ workflow SRA { .fastq .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) .mix(ASPERA_CLI.out.fastq) + .tap { ch_fastq } .map { meta, fastq -> def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] @@ -151,7 +156,7 @@ workflow SRA { .map { it[1] } .collectFile(name:'tmp_samplesheet.csv', newLine: true, keepHeader: true, sort: { it.baseName }) .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'samplesheet.csv', storeDir: "${params.outdir}/samplesheet") + .collectFile(name:'samplesheet.csv') .set { ch_samplesheet } SRA_TO_SAMPLESHEET @@ -160,7 +165,7 @@ workflow SRA { .map { it[1] } .collectFile(name:'tmp_id_mappings.csv', newLine: true, keepHeader: true, sort: { it.baseName }) .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'id_mappings.csv', storeDir: "${params.outdir}/samplesheet") + .collectFile(name:'id_mappings.csv') .set { ch_mappings } // @@ -179,7 +184,7 @@ workflow SRA { // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) + .collectFile(name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) emit: samplesheet = ch_samplesheet @@ -187,6 +192,16 @@ workflow SRA { sample_mappings = ch_sample_mappings_yml sra_metadata = ch_sra_metadata versions = ch_versions.unique() + + publish: + ch_fastq >> 'fastq/' + ASPERA_CLI.out.md5 >> 'fastq/md5/' + SRA_FASTQ_FTP.out.md5 >> 'fastq/md5/' + SRA_RUNINFO_TO_FTP.out.tsv >> 'metadata/' + ch_versions_yml >> 'pipeline_info/' + ch_samplesheet >> 'samplesheet/' + ch_mappings >> 'samplesheet/' + ch_sample_mappings_yml >> 'samplesheet/' } /* diff --git a/workflows/sra/nextflow.config b/workflows/sra/nextflow.config deleted file mode 100644 index d242c238..00000000 --- a/workflows/sra/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -includeConfig "../../modules/local/multiqc_mappings_config/nextflow.config" -includeConfig "../../modules/local/aspera_cli/nextflow.config" -includeConfig "../../modules/local/sra_fastq_ftp/nextflow.config" -includeConfig "../../modules/local/sra_ids_to_runinfo/nextflow.config" -includeConfig "../../modules/local/sra_runinfo_to_ftp/nextflow.config" -includeConfig "../../modules/local/sra_to_samplesheet/nextflow.config" -includeConfig "../../modules/nf-core/sratools/prefetch/nextflow.config" -includeConfig "../../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config" From 836ace2cb64068a17b973cc1ac248c276b1636e3 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 26 Apr 2024 19:53:21 -0500 Subject: [PATCH 02/25] Update config to comply with strict parser Signed-off-by: Ben Sherman --- conf/base.config | 40 ++++++++++++++++++++++----------------- nextflow.config | 49 ++++++------------------------------------------ 2 files changed, 29 insertions(+), 60 deletions(-) diff --git a/conf/base.config b/conf/base.config index 6af45542..5d7c5389 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,9 +10,15 @@ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + resourceLimits = [ + cpus: params.max_cpus, + memory: params.max_memory, + time: params.max_time + ] + + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 @@ -25,30 +31,30 @@ process { // adding in your local modules too. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { 20.h * task.attempt } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { 200.GB * task.attempt } } withLabel:error_ignore { errorStrategy = 'ignore' diff --git a/nextflow.config b/nextflow.config index bad8464d..da983b96 100644 --- a/nextflow.config +++ b/nextflow.config @@ -66,11 +66,7 @@ params { includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} +includeConfig "${params.custom_config_base}/nfcore_custom.config" profiles { debug { @@ -194,22 +190,22 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +// NOTE: Nextflow config should provide some constant for the start timestamp timeline { enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" } report { enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" } trace { enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.txt" } dag { enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" } manifest { @@ -222,36 +218,3 @@ manifest { version = '1.13.0dev' doi = 'https://doi.org/10.5281/zenodo.5070524' } - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} From 25a1fb52e438df02a59ccbf62ba83a43b8e2570a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 26 Apr 2024 19:56:36 -0500 Subject: [PATCH 03/25] Use param schemas as source of truth, convert to YAML Signed-off-by: Ben Sherman --- assets/schema_input.json | 17 -- assets/schema_input.yml | 12 + nextflow.config | 56 ---- nextflow_schema.json | 287 ------------------ params.yml | 258 ++++++++++++++++ .../utils_nfcore_fetchngs_pipeline/main.nf | 4 +- 6 files changed, 272 insertions(+), 362 deletions(-) delete mode 100644 assets/schema_input.json create mode 100644 assets/schema_input.yml delete mode 100644 nextflow_schema.json create mode 100644 params.yml diff --git a/assets/schema_input.json b/assets/schema_input.json deleted file mode 100644 index db9ffc00..00000000 --- a/assets/schema_input.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_input.json", - "title": "nf-core/fetchngs pipeline - params.input schema", - "description": "Schema for the file provided with params.input", - "type": "array", - "items": { - "type": "object", - "properties": { - "": { - "type": "string", - "pattern": "^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\\d+)$", - "errorMessage": "Please provide a valid SRA, ENA, DDBJ or GEO identifier" - } - } - } -} diff --git a/assets/schema_input.yml b/assets/schema_input.yml new file mode 100644 index 00000000..29760b88 --- /dev/null +++ b/assets/schema_input.yml @@ -0,0 +1,12 @@ +$schema: http://json-schema.org/draft-07/schema +$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_input.yml +title: nf-core/fetchngs pipeline - params.input schema +description: Schema for the file provided with params.input +type: array +items: + type: object + properties: + '': + type: string + pattern: ^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\\d+)$ + errorMessage: Please provide a valid SRA, ENA, DDBJ or GEO identifier diff --git a/nextflow.config b/nextflow.config index da983b96..74e1444a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -6,62 +6,6 @@ ---------------------------------------------------------------------------------------- */ -// Global default params, used in configs -params { - - // Input options - input = null - nf_core_pipeline = null - nf_core_rnaseq_strandedness = 'auto' - ena_metadata_fields = null - sample_mapping_fields = 'experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description' - download_method = 'ftp' - skip_fastq_download = false - dbgap_key = null - - aspera_cli_args = '-QT -l 300m -P33001' - sra_fastq_ftp_args = '-t 5 -nv -c -T 60' - sratools_fasterqdump_args = '' - sratools_pigz_args = '' - - // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false - - // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null - - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' - - // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationShowHiddenParams = false - validationSchemaIgnoreParams = '' - validate_params = true - - // Deprecated options - // See: https://github.com/nf-core/fetchngs/pull/279/files#r1494459480 - force_sratools_download = false - -} - // Load base.config by default for all pipelines includeConfig 'conf/base.config' diff --git a/nextflow_schema.json b/nextflow_schema.json deleted file mode 100644 index 29f7b710..00000000 --- a/nextflow_schema.json +++ /dev/null @@ -1,287 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/nextflow_schema.json", - "title": "nf-core/fetchngs pipeline parameters", - "description": "Pipeline to fetch metadata and raw FastQ files from public databases", - "type": "object", - "definitions": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "exists": true, - "schema": "assets/schema_input.json", - "mimetype": "text/csv", - "pattern": "^\\S+\\.(csv|tsv|txt)$", - "fa_icon": "fas fa-file-excel", - "description": "File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files." - }, - "ena_metadata_fields": { - "type": "string", - "fa_icon": "fas fa-columns", - "description": "Comma-separated list of ENA metadata fields to fetch before downloading data.", - "help_text": "The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run)." - }, - "sample_mapping_fields": { - "type": "string", - "fa_icon": "fas fa-columns", - "description": "Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC.", - "default": "experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description" - }, - "nf_core_pipeline": { - "type": "string", - "fa_icon": "fab fa-apple", - "description": "Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns.", - "enum": ["rnaseq", "atacseq", "viralrecon", "taxprofiler"] - }, - "nf_core_rnaseq_strandedness": { - "type": "string", - "fa_icon": "fas fa-dna", - "description": "Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'.", - "help_text": "The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution.", - "default": "auto" - }, - "download_method": { - "type": "string", - "default": "ftp", - "fa_icon": "fas fa-download", - "enum": ["aspera", "ftp", "sratools"], - "description": "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'.", - "help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ." - }, - "skip_fastq_download": { - "type": "boolean", - "fa_icon": "fas fa-fast-forward", - "description": "Only download metadata for public data database ids and don't download the FastQ files." - }, - "dbgap_key": { - "type": "string", - "fa_icon": "fas fa-address-card", - "help_text": "Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation.", - "format": "file-path", - "description": "dbGaP repository key." - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - } - } - }, - "institutional_config_options": { - "title": "Institutional config options", - "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", - "properties": { - "custom_config_version": { - "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "custom_config_base": { - "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "hidden": true, - "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog" - }, - "config_profile_name": { - "type": "string", - "description": "Institutional config name.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_description": { - "type": "string", - "description": "Institutional config description.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_contact": { - "type": "string", - "description": "Institutional config contact information.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_url": { - "type": "string", - "description": "Institutional config URL link.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - } - } - }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, - "generic_options": { - "title": "Generic options", - "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", - "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "version": { - "type": "boolean", - "description": "Display version and exit.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true - }, - "email_on_fail": { - "type": "string", - "description": "Email address for completion summary, only when pipeline fails.", - "fa_icon": "fas fa-exclamation-triangle", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", - "hidden": true - }, - "plaintext_email": { - "type": "boolean", - "description": "Send plain-text email instead of HTML.", - "fa_icon": "fas fa-remove-format", - "hidden": true - }, - "monochrome_logs": { - "type": "boolean", - "description": "Do not use coloured log outputs.", - "fa_icon": "fas fa-palette", - "hidden": true - }, - "hook_url": { - "type": "string", - "description": "Incoming hook URL for messaging service", - "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true - }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "default": true, - "fa_icon": "fas fa-check-square", - "hidden": true - }, - "validationShowHiddenParams": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." - }, - "validationLenientMode": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters in lenient more.", - "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." - } - } - }, - "deprecated_options": { - "title": "Deprecated options", - "type": "object", - "description": "List of parameters that have been deprecated.", - "default": "", - "fa_icon": "fas fa-calendar-times", - "properties": { - "force_sratools_download": { - "type": "boolean", - "fa_icon": "fas fa-times-circle", - "description": "This parameter has been deprecated. Please use '--download_method sratools' instead.", - "enum": [false], - "hidden": true - } - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/institutional_config_options" - }, - { - "$ref": "#/definitions/max_job_request_options" - }, - { - "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/deprecated_options" - } - ] -} diff --git a/params.yml b/params.yml new file mode 100644 index 00000000..afb19664 --- /dev/null +++ b/params.yml @@ -0,0 +1,258 @@ +$schema: http://json-schema.org/draft-07/schema +$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/params.yml +title: nf-core/fetchngs pipeline parameters +description: Pipeline to fetch metadata and raw FastQ files from public databases +type: object +definitions: + input_output_options: + title: Input/output options + type: object + fa_icon: fas fa-terminal + description: Define where the pipeline should find input data and save output data. + required: + - input + - outdir + properties: + input: + type: string + format: file-path + exists: true + schema: assets/schema_input.yml + mimetype: text/csv + pattern: ^\\S+\\.(csv|tsv|txt)$ + fa_icon: fas fa-file-excel + description: File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files. + ena_metadata_fields: + type: string + fa_icon: fas fa-columns + description: Comma-separated list of ENA metadata fields to fetch before downloading data. + help_text: The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run). + sample_mapping_fields: + type: string + fa_icon: fas fa-columns + description: Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC. + default: experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description + nf_core_pipeline: + type: string + fa_icon: fab fa-apple + description: Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns. + enum: + - rnaseq + - atacseq + - viralrecon + - taxprofiler + nf_core_rnaseq_strandedness: + type: string + fa_icon: fas fa-dna + description: Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'. + help_text: The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution. + default: auto + download_method: + type: string + default: ftp + fa_icon: fas fa-download + enum: + - aspera + - ftp + - sratools + description: Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'. + help_text: FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ. + skip_fastq_download: + type: boolean + fa_icon: fas fa-fast-forward + description: Only download metadata for public data database ids and don't download the FastQ files. + dbgap_key: + type: string + fa_icon: fas fa-address-card + help_text: Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation. + format: file-path + description: dbGaP repository key. + aspera_cli_args: + type: string + default: -QT -l 300m -P33001 + sra_fastq_ftp_args: + type: string + default: -t 5 -nv -c -T 60 + sratools_fasterqdump_args: + type: string + default: '' + sratools_pigz_args: + type: string + default: '' + outdir: + type: string + format: directory-path + description: The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. + fa_icon: fas fa-folder-open + email: + type: string + description: Email address for completion summary. + fa_icon: fas fa-envelope + help_text: Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. + institutional_config_options: + title: Institutional config options + type: object + fa_icon: fas fa-university + description: Parameters used to describe centralised config profiles. These should not be edited. + help_text: The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline. + properties: + custom_config_version: + type: string + description: Git commit id for Institutional configs. + default: master + hidden: true + fa_icon: fas fa-users-cog + custom_config_base: + type: string + description: Base directory for Institutional configs. + default: https://raw.githubusercontent.com/nf-core/configs/master + hidden: true + help_text: If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter. + fa_icon: fas fa-users-cog + config_profile_name: + type: string + description: Institutional config name. + hidden: true + fa_icon: fas fa-users-cog + config_profile_description: + type: string + description: Institutional config description. + hidden: true + fa_icon: fas fa-users-cog + config_profile_contact: + type: string + description: Institutional config contact information. + hidden: true + fa_icon: fas fa-users-cog + config_profile_url: + type: string + description: Institutional config URL link. + hidden: true + fa_icon: fas fa-users-cog + max_job_request_options: + title: Max job request options + type: object + fa_icon: fab fa-acquisitions-incorporated + description: Set the top limit for requested resources for any single job. + help_text: If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details. + properties: + max_cpus: + type: integer + description: Maximum number of CPUs that can be requested for any single job. + default: 16 + fa_icon: fas fa-microchip + hidden: true + help_text: Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1` + max_memory: + type: string + description: Maximum amount of memory that can be requested for any single job. + default: 128.GB + fa_icon: fas fa-memory + pattern: ^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$ + hidden: true + help_text: Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'` + max_time: + type: string + description: Maximum amount of time that can be requested for any single job. + default: 240.h + fa_icon: far fa-clock + pattern: ^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$ + hidden: true + help_text: Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'` + generic_options: + title: Generic options + type: object + fa_icon: fas fa-file-import + description: Less common options for the pipeline, typically set in a config file. + help_text: These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`. + properties: + help: + type: boolean + description: Display help text. + fa_icon: fas fa-question-circle + hidden: true + version: + type: boolean + description: Display version and exit. + fa_icon: fas fa-question-circle + hidden: true + publish_dir_mode: + type: string + default: copy + description: Method used to save pipeline results to output directory. + help_text: The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details. + fa_icon: fas fa-copy + enum: + - symlink + - rellink + - link + - copy + - copyNoFollow + - move + hidden: true + email_on_fail: + type: string + description: Email address for completion summary, only when pipeline fails. + fa_icon: fas fa-exclamation-triangle + help_text: An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully. + hidden: true + plaintext_email: + type: boolean + description: Send plain-text email instead of HTML. + fa_icon: fas fa-remove-format + hidden: true + monochrome_logs: + type: boolean + description: Do not use coloured log outputs. + fa_icon: fas fa-palette + hidden: true + hook_url: + type: string + description: Incoming hook URL for messaging service + fa_icon: fas fa-people-group + help_text: Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported. + hidden: true + validate_params: + type: boolean + description: Boolean whether to validate parameters against the schema at runtime + default: true + fa_icon: fas fa-check-square + hidden: true + validationShowHiddenParams: + type: boolean + fa_icon: far fa-eye-slash + description: Show all params when using `--help` + hidden: true + help_text: By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters. + validationFailUnrecognisedParams: + type: boolean + fa_icon: far fa-check-circle + description: Validation of parameters fails when an unrecognised parameter is found. + hidden: true + help_text: By default, when an unrecognised parameter is found, it returns a warinig. + validationLenientMode: + type: boolean + fa_icon: far fa-check-circle + description: Validation of parameters in lenient more. + hidden: true + help_text: Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode). + deprecated_options: + title: Deprecated options + type: object + description: List of parameters that have been deprecated. + default: '' + fa_icon: fas fa-calendar-times + properties: + force_sratools_download: + type: boolean + fa_icon: fas fa-times-circle + description: This parameter has been deprecated. Please use '--download_method sratools' instead. + enum: + - false + hidden: true +allOf: + - $ref: "#/definitions/input_output_options" + - $ref: "#/definitions/institutional_config_options" + - $ref: "#/definitions/max_job_request_options" + - $ref: "#/definitions/generic_options" + - $ref: "#/definitions/deprecated_options" diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index 0c4307b5..5e317f09 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -62,7 +62,7 @@ workflow PIPELINE_INITIALISATION { pre_help_text, post_help_text, validate_params, - "nextflow_schema.json" + "params.yml" ) // @@ -112,7 +112,7 @@ workflow PIPELINE_COMPLETION { main: - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + summary_params = paramsSummaryMap(workflow, parameters_schema: "params.yml") // // Completion email and summary From 505806a9341b840fa5c47c5f5af55c2aa63c7b19 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 26 Apr 2024 20:01:04 -0500 Subject: [PATCH 04/25] Use eval output, topic channels to collect tool versions Signed-off-by: Ben Sherman --- modules/local/aspera_cli/main.nf | 12 +-------- modules/local/multiqc_mappings_config/main.nf | 7 +---- modules/local/sra_fastq_ftp/main.nf | 12 +-------- modules/local/sra_ids_to_runinfo/main.nf | 7 +---- modules/local/sra_runinfo_to_ftp/main.nf | 7 +---- .../custom/sratoolsncbisettings/main.nf | 2 +- .../templates/detect_ncbi_settings.sh | 5 ---- modules/nf-core/sratools/fasterqdump/main.nf | 9 ++----- modules/nf-core/sratools/prefetch/main.nf | 2 +- .../prefetch/templates/retry_with_backoff.sh | 5 ---- modules/nf-core/untar/main.nf | 12 +-------- .../main.nf | 7 ----- .../nf-core/utils_nfcore_pipeline/main.nf | 27 ++++++++----------- workflows/sra/main.nf | 12 ++------- 14 files changed, 23 insertions(+), 103 deletions(-) diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 36882cdb..670d6c5a 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -15,7 +15,7 @@ process ASPERA_CLI { output: tuple val(meta), path("*fastq.gz"), emit: fastq tuple val(meta), path("*md5") , emit: md5 - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('aspera_cli'), eval('ascli --version'), topic: versions script: def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" @@ -31,11 +31,6 @@ process ASPERA_CLI { echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5 md5sum -c ${meta.id}.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - aspera_cli: \$(ascli --version) - END_VERSIONS """ } else { """ @@ -58,11 +53,6 @@ process ASPERA_CLI { echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5 md5sum -c ${meta.id}_2.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - aspera_cli: \$(ascli --version) - END_VERSIONS """ } } diff --git a/modules/local/multiqc_mappings_config/main.nf b/modules/local/multiqc_mappings_config/main.nf index 8efe1caa..738069c7 100644 --- a/modules/local/multiqc_mappings_config/main.nf +++ b/modules/local/multiqc_mappings_config/main.nf @@ -11,17 +11,12 @@ process MULTIQC_MAPPINGS_CONFIG { output: path "*yml" , emit: yml - path "versions.yml", emit: versions + tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions script: """ multiqc_mappings_config.py \\ $csv \\ multiqc_config.yml - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS """ } diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index f7100055..55cf70ef 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -16,7 +16,7 @@ process SRA_FASTQ_FTP { output: tuple val(meta), path("*fastq.gz"), emit: fastq tuple val(meta), path("*md5") , emit: md5 - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('wget'), eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')"), topic: versions script: if (meta.single_end) { @@ -28,11 +28,6 @@ process SRA_FASTQ_FTP { echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5 md5sum -c ${meta.id}.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wget: \$(echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')) - END_VERSIONS """ } else { """ @@ -51,11 +46,6 @@ process SRA_FASTQ_FTP { echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5 md5sum -c ${meta.id}_2.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wget: \$(echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')) - END_VERSIONS """ } } diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf index 7d47f5e3..4e7f9e87 100644 --- a/modules/local/sra_ids_to_runinfo/main.nf +++ b/modules/local/sra_ids_to_runinfo/main.nf @@ -14,7 +14,7 @@ process SRA_IDS_TO_RUNINFO { output: path "*.tsv" , emit: tsv - path "versions.yml", emit: versions + tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions script: def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : '' @@ -24,10 +24,5 @@ process SRA_IDS_TO_RUNINFO { id.txt \\ ${id}.runinfo.tsv \\ $metadata_fields - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS """ } diff --git a/modules/local/sra_runinfo_to_ftp/main.nf b/modules/local/sra_runinfo_to_ftp/main.nf index 9c83cf53..40f6a03c 100644 --- a/modules/local/sra_runinfo_to_ftp/main.nf +++ b/modules/local/sra_runinfo_to_ftp/main.nf @@ -11,17 +11,12 @@ process SRA_RUNINFO_TO_FTP { output: path "*.tsv" , emit: tsv - path "versions.yml", emit: versions + tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions script: """ sra_runinfo_to_ftp.py \\ ${runinfo.join(',')} \\ ${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS """ } diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf index 577117ed..7dcb66e9 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/main.nf +++ b/modules/nf-core/custom/sratoolsncbisettings/main.nf @@ -12,7 +12,7 @@ process CUSTOM_SRATOOLSNCBISETTINGS { output: path('*.mkfg') , emit: ncbi_settings - path 'versions.yml', emit: versions + tuple val("${task.process}"), val('sratools'), eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'"), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh b/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh index cfe3a324..b553659b 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh +++ b/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh @@ -38,8 +38,3 @@ else fi cp "${NCBI_SETTINGS}" ./ fi - -cat <<-END_VERSIONS > versions.yml -"!{task.process}": - sratools: $(vdb-config --version 2>&1 | grep -Eo '[0-9.]+') -END_VERSIONS diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index ca94a925..df45971f 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -16,7 +16,8 @@ process SRATOOLS_FASTERQDUMP { output: tuple val(meta), path('*.fastq.gz'), emit: reads - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('sratools'), eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'"), topic: versions + tuple val("${task.process}"), val('pigz'), eval("pigz --version 2>&1 | sed 's/pigz //g'"), topic: versions when: task.ext.when == null || task.ext.when @@ -45,11 +46,5 @@ process SRATOOLS_FASTERQDUMP { --no-name \\ --processes $task.cpus \\ *.fastq - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sratools: \$(fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+') - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS """ } diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index 3c30739a..6c3cde54 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -14,7 +14,7 @@ process SRATOOLS_PREFETCH { output: tuple val(meta), path(id), emit: sra - path 'versions.yml' , emit: versions + tuple val("${task.process}"), val('sratools'), eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'"), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh index a72a4bfb..72156740 100755 --- a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh +++ b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh @@ -48,8 +48,3 @@ retry_with_backoff !{args2} \ !{id} [ -f !{id}.sralite ] && vdb-validate !{id}.sralite || vdb-validate !{id} - -cat <<-END_VERSIONS > versions.yml -"!{task.process}": - sratools: $(prefetch --version 2>&1 | grep -Eo '[0-9.]+') -END_VERSIONS diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 8a75bb95..de3a38c2 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -12,7 +12,7 @@ process UNTAR { output: tuple val(meta), path("$prefix"), emit: untar - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('untar'), eval("echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//'"), topic: versions when: task.ext.when == null || task.ext.when @@ -42,11 +42,6 @@ process UNTAR { $archive \\ $args2 fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS """ stub: @@ -54,10 +49,5 @@ process UNTAR { """ mkdir $prefix touch ${prefix}/file.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS """ } diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index 256c8104..3a57d1b2 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -13,21 +13,16 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { sratools_pigz_args // string main: - - ch_versions = Channel.empty() - // // Detect existing NCBI user settings or create new ones. // CUSTOM_SRATOOLSNCBISETTINGS ( ch_sra_ids.collect() ) ch_ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS.out.ncbi_settings - ch_versions = ch_versions.mix(CUSTOM_SRATOOLSNCBISETTINGS.out.versions) // // Prefetch sequencing reads in SRA format. // SRATOOLS_PREFETCH ( ch_sra_ids, ch_ncbi_settings, ch_dbgap_key ) - ch_versions = ch_versions.mix(SRATOOLS_PREFETCH.out.versions.first()) // // Convert the SRA format into one or more compressed FASTQ files. @@ -39,9 +34,7 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { sratools_fasterqdump_args, sratools_pigz_args ) - ch_versions = ch_versions.mix(SRATOOLS_FASTERQDUMP.out.versions.first()) emit: reads = SRATOOLS_FASTERQDUMP.out.reads // channel: [ val(meta), [ reads ] ] - versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6f..a60c69da 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -2,7 +2,6 @@ // Subworkflow with utility functions specific to the nf-core pipeline template // -import org.yaml.snakeyaml.Yaml import nextflow.extension.FilesEx /* @@ -92,15 +91,6 @@ def getWorkflowVersion() { return version_string } -// -// Get software versions for pipeline -// -def processVersionsFromYAML(yaml_file) { - Yaml yaml = new Yaml() - versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } - return yaml.dumpAsMap(versions).trim() -} - // // Get workflow version for pipeline // @@ -117,10 +107,15 @@ def workflowVersionToYAML() { // def softwareVersionsToYAML(ch_versions) { return ch_versions - .unique() - .map { processVersionsFromYAML(it) } - .unique() - .mix(Channel.of(workflowVersionToYAML())) + .unique() + .map { process, name, version -> + """ + ${process.tokenize(':').last()}: + ${name}: ${version} + """.stripIndent().trim() + } + .unique() + .mix(Channel.of(workflowVersionToYAML())) } // @@ -358,13 +353,13 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi // Write summary e-mail HTML to a file def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.toPath().copyTo("${outdir}/pipeline_info/pipeline_report.html"); output_hf.delete() // Write summary e-mail TXT to a file def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.toPath().copyTo("${outdir}/pipeline_info/pipeline_report.txt"); output_tf.delete() } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 8cdd6845..faa5ee08 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -32,8 +32,6 @@ workflow SRA { ids // channel: [ ids ] main: - ch_versions = Channel.empty() - // // MODULE: Get SRA run information for public database ids // @@ -41,7 +39,6 @@ workflow SRA { ids, params.ena_metadata_fields ?: '' ) - ch_versions = ch_versions.mix(SRA_IDS_TO_RUNINFO.out.versions.first()) // // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] @@ -49,7 +46,6 @@ workflow SRA { SRA_RUNINFO_TO_FTP ( SRA_IDS_TO_RUNINFO.out.tsv ) - ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first()) SRA_RUNINFO_TO_FTP .out @@ -96,7 +92,6 @@ workflow SRA { ch_sra_reads.ftp, params.sra_fastq_ftp_args ) - ch_versions = ch_versions.mix(SRA_FASTQ_FTP.out.versions.first()) // // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. @@ -107,7 +102,6 @@ workflow SRA { params.sratools_fasterqdump_args, params.sratools_pigz_args ) - ch_versions = ch_versions.mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.versions.first()) // // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums @@ -117,7 +111,6 @@ workflow SRA { 'era-fasp', params.aspera_cli_args ) - ch_versions = ch_versions.mix(ASPERA_CLI.out.versions.first()) // Isolate FASTQ channel which will be added to emit block SRA_FASTQ_FTP @@ -176,22 +169,21 @@ workflow SRA { MULTIQC_MAPPINGS_CONFIG ( ch_mappings ) - ch_versions = ch_versions.mix(MULTIQC_MAPPINGS_CONFIG.out.versions) ch_sample_mappings_yml = MULTIQC_MAPPINGS_CONFIG.out.yml } // // Collate and save software versions // - softwareVersionsToYAML(ch_versions) + softwareVersionsToYAML(Channel.topic('versions')) .collectFile(name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_versions_yml } emit: samplesheet = ch_samplesheet mappings = ch_mappings sample_mappings = ch_sample_mappings_yml sra_metadata = ch_sra_metadata - versions = ch_versions.unique() publish: ch_fastq >> 'fastq/' From 4401d29646d15a5d71f76327b9b155ca40416a7f Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sun, 19 May 2024 05:31:30 -0500 Subject: [PATCH 05/25] Refactor params as workflow inputs Signed-off-by: Ben Sherman --- main.nf | 16 ++++++++++++- workflows/sra/main.nf | 56 ++++++++++++++++++++++++++----------------- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/main.nf b/main.nf index e178d8a5..7158a85b 100644 --- a/main.nf +++ b/main.nf @@ -40,7 +40,21 @@ workflow NFCORE_FETCHNGS { // // WORKFLOW: Download FastQ files for SRA / ENA / GEO / DDBJ ids // - SRA ( ids ) + SRA ( + ids, + params.ena_metadata_fields ?: '', + params.sample_mapping_fields, + params.nf_core_pipeline ?: '', + params.nf_core_rnaseq_strandedness ?: 'auto', + params.download_method, + params.skip_fastq_download, + params.dbgap_key, + params.aspera_cli_args, + params.sra_fastq_ftp_args, + params.sratools_fasterqdump_args, + params.sratools_pigz_args, + params.outdir + ) } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index faa5ee08..4d89a645 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -29,7 +29,19 @@ include { FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS } from '../../subworkflow workflow SRA { take: - ids // channel: [ ids ] + ids // channel: [ ids ] + ena_metadata_fields // string + sample_mapping_fields // string + nf_core_pipeline // string + nf_core_rnaseq_strandedness // string + download_method // enum: 'aspera' | 'ftp' | 'sratools' + skip_fastq_download // boolean + dbgap_key // string + aspera_cli_args // string + sra_fastq_ftp_args // string + sratools_fasterqdump_args // string + sratools_pigz_args // string + outdir // string main: // @@ -37,7 +49,7 @@ workflow SRA { // SRA_IDS_TO_RUNINFO ( ids, - params.ena_metadata_fields ?: '' + ena_metadata_fields ) // @@ -60,27 +72,27 @@ workflow SRA { .unique() .set { ch_sra_metadata } - if (!params.skip_fastq_download) { + if (!skip_fastq_download) { ch_sra_metadata .branch { meta -> - def download_method = 'ftp' + def method = 'ftp' // meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera // For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz' // For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz' - if (meta.fastq_aspera && params.download_method == 'aspera') { - download_method = 'aspera' + if (meta.fastq_aspera && download_method == 'aspera') { + method = 'aspera' } - if ((!meta.fastq_aspera && !meta.fastq_1) || params.download_method == 'sratools') { - download_method = 'sratools' + if ((!meta.fastq_aspera && !meta.fastq_1) || download_method == 'sratools') { + method = 'sratools' } - aspera: download_method == 'aspera' + aspera: method == 'aspera' return [ meta, meta.fastq_aspera.tokenize(';').take(2) ] - ftp: download_method == 'ftp' + ftp: method == 'ftp' return [ meta, [ meta.fastq_1, meta.fastq_2 ] ] - sratools: download_method == 'sratools' + sratools: method == 'sratools' return [ meta, meta.run_accession ] } .set { ch_sra_reads } @@ -90,7 +102,7 @@ workflow SRA { // SRA_FASTQ_FTP ( ch_sra_reads.ftp, - params.sra_fastq_ftp_args + sra_fastq_ftp_args ) // @@ -98,9 +110,9 @@ workflow SRA { // FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( ch_sra_reads.sratools, - params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : [], - params.sratools_fasterqdump_args, - params.sratools_pigz_args + dbgap_key ? file(dbgap_key, checkIfExists: true) : [], + sratools_fasterqdump_args, + sratools_pigz_args ) // @@ -109,7 +121,7 @@ workflow SRA { ASPERA_CLI ( ch_sra_reads.aspera, 'era-fasp', - params.aspera_cli_args + aspera_cli_args ) // Isolate FASTQ channel which will be added to emit block @@ -124,8 +136,8 @@ workflow SRA { def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] def meta_clone = meta.clone() - meta_clone.fastq_1 = reads[0] ? "${params.outdir}/fastq/${reads[0].getName()}" : '' - meta_clone.fastq_2 = reads[1] && !meta.single_end ? "${params.outdir}/fastq/${reads[1].getName()}" : '' + meta_clone.fastq_1 = reads[0] ? "${outdir}/fastq/${reads[0].getName()}" : '' + meta_clone.fastq_2 = reads[1] && !meta.single_end ? "${outdir}/fastq/${reads[1].getName()}" : '' return meta_clone } @@ -137,9 +149,9 @@ workflow SRA { // SRA_TO_SAMPLESHEET ( ch_sra_metadata, - params.nf_core_pipeline ?: '', - params.nf_core_rnaseq_strandedness ?: 'auto', - params.sample_mapping_fields + nf_core_pipeline, + nf_core_rnaseq_strandedness, + sample_mapping_fields ) // Merge samplesheets and mapping files across all samples @@ -165,7 +177,7 @@ workflow SRA { // MODULE: Create a MutiQC config file with sample name mappings // ch_sample_mappings_yml = Channel.empty() - if (params.sample_mapping_fields) { + if (sample_mapping_fields) { MULTIQC_MAPPINGS_CONFIG ( ch_mappings ) From 1b2ad00d0aad2f063bad8d4050efdbfc7cdf49db Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sun, 19 May 2024 05:34:00 -0500 Subject: [PATCH 06/25] Update workflow output definition Signed-off-by: Ben Sherman --- main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 7158a85b..cc929e94 100644 --- a/main.nf +++ b/main.nf @@ -10,6 +10,7 @@ */ nextflow.enable.dsl = 2 +nextflow.preview.output = true /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -100,7 +101,7 @@ workflow { ) } -publish { +output { directory params.outdir mode params.publish_dir_mode } From 4ab2ddceb19eecda106027ac1a51157a8f30bbbb Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sun, 19 May 2024 05:34:10 -0500 Subject: [PATCH 07/25] Update workflow params definition Signed-off-by: Ben Sherman --- params.yml => schema_params.yml | 2 +- subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename params.yml => schema_params.yml (99%) diff --git a/params.yml b/schema_params.yml similarity index 99% rename from params.yml rename to schema_params.yml index afb19664..13d0b8d3 100644 --- a/params.yml +++ b/schema_params.yml @@ -1,5 +1,5 @@ $schema: http://json-schema.org/draft-07/schema -$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/params.yml +$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/schema_params.yml title: nf-core/fetchngs pipeline parameters description: Pipeline to fetch metadata and raw FastQ files from public databases type: object diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index 5e317f09..c7e00272 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -62,7 +62,7 @@ workflow PIPELINE_INITIALISATION { pre_help_text, post_help_text, validate_params, - "params.yml" + "schema_params.yml" ) // @@ -112,7 +112,7 @@ workflow PIPELINE_COMPLETION { main: - summary_params = paramsSummaryMap(workflow, parameters_schema: "params.yml") + summary_params = paramsSummaryMap(workflow, parameters_schema: "schema_params.yml") // // Completion email and summary From 39971b6d19a77ade6dc5e44596bdf28ce6df15ac Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sun, 19 May 2024 05:43:48 -0500 Subject: [PATCH 08/25] Add workflow output schema Signed-off-by: Ben Sherman --- assets/schema_mappings.yml | 28 ++++++++++++ assets/schema_samplesheet.yml | 81 +++++++++++++++++++++++++++++++++++ schema_outputs.yml | 16 +++++++ 3 files changed, 125 insertions(+) create mode 100644 assets/schema_mappings.yml create mode 100644 assets/schema_samplesheet.yml create mode 100644 schema_outputs.yml diff --git a/assets/schema_mappings.yml b/assets/schema_mappings.yml new file mode 100644 index 00000000..6025492e --- /dev/null +++ b/assets/schema_mappings.yml @@ -0,0 +1,28 @@ +$schema: 'http://json-schema.org/draft-07/schema' +$id: 'https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_mappings.yml' +title: 'nf-core/fetchngs pipeline - id_mappings.csv schema' +description: 'Schema for the mappings file produced by fetchngs' +type: array +items: + type: object + properties: + sample: + type: string + experiment_accession: + type: string + run_accession: + type: string + sample_accession: + type: string + experiment_alias: + type: string + run_alias: + type: string + sample_alias: + type: string + experiment_title: + type: string + sample_title: + type: string + sample_description: + type: string \ No newline at end of file diff --git a/assets/schema_samplesheet.yml b/assets/schema_samplesheet.yml new file mode 100644 index 00000000..eb93179e --- /dev/null +++ b/assets/schema_samplesheet.yml @@ -0,0 +1,81 @@ +$schema: 'http://json-schema.org/draft-07/schema' +$id: 'https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_samplesheet.yml' +title: 'nf-core/fetchngs pipeline - samplesheet.csv schema' +description: 'Schema for the samplesheet file produced by fetchngs' +type: array +items: + type: object + properties: + sample: + type: string + fastq_1: + type: string + format: file-path + pattern: '^\\S+\\.f(ast)?q\\.gz$' + fastq_2: + type: string + format: file-path + pattern: '^\\S+\\.f(ast)?q\\.gz$' + run_accession: + type: string + experiment_accession: + type: string + sample_accession: + type: string + secondary_sample_accession: + type: string + study_accession: + type: string + secondary_study_accession: + type: string + submission_accession: + type: string + run_alias: + type: string + experiment_alias: + type: string + sample_alias: + type: string + study_alias: + type: string + library_layout: + type: string + library_selection: + type: string + library_source: + type: string + library_strategy: + type: string + library_name: + type: string + instrument_model: + type: string + instrument_platform: + type: string + base_count: + type: integer + read_count: + type: integer + tax_id: + type: string + scientific_name: + type: string + sample_title: + type: string + experiment_title: + type: string + study_title: + type: string + sample_description: + type: string + fastq_md5: + type: string + pattern: '^[0-9a-f]{32}$' + fastq_bytes: + type: integer + fastq_ftp: + type: string + fastq_galaxy: + type: string + fastq_aspera: + type: string \ No newline at end of file diff --git a/schema_outputs.yml b/schema_outputs.yml new file mode 100644 index 00000000..75f26e94 --- /dev/null +++ b/schema_outputs.yml @@ -0,0 +1,16 @@ +$schema: 'http://json-schema.org/draft-07/schema' +$id: 'https://raw.githubusercontent.com/nf-core/fetchngs/master/schema_outputs.yml' +title: 'nf-core/fetchngs pipeline outputs' +description: '' +type: object +properties: + id_mappings: + type: string + format: file-path + mimetype: text/csv + schema: assets/schema_mappings.yml + samplesheet: + type: string + format: file-path + mimetype: text/csv + schema: assets/schema_samplesheet.yml \ No newline at end of file From a05928c90df169dd999317da7563d07644cff82a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 10 Jun 2024 14:33:23 -0500 Subject: [PATCH 09/25] Rename schema_params.yml to schema_inputs.yml Signed-off-by: Ben Sherman --- schema_params.yml => schema_inputs.yml | 2 +- subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename schema_params.yml => schema_inputs.yml (99%) diff --git a/schema_params.yml b/schema_inputs.yml similarity index 99% rename from schema_params.yml rename to schema_inputs.yml index 13d0b8d3..257cba7d 100644 --- a/schema_params.yml +++ b/schema_inputs.yml @@ -1,5 +1,5 @@ $schema: http://json-schema.org/draft-07/schema -$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/schema_params.yml +$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/schema_inputs.yml title: nf-core/fetchngs pipeline parameters description: Pipeline to fetch metadata and raw FastQ files from public databases type: object diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index c7e00272..770f1e08 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -62,7 +62,7 @@ workflow PIPELINE_INITIALISATION { pre_help_text, post_help_text, validate_params, - "schema_params.yml" + "schema_inputs.yml" ) // @@ -112,7 +112,7 @@ workflow PIPELINE_COMPLETION { main: - summary_params = paramsSummaryMap(workflow, parameters_schema: "schema_params.yml") + summary_params = paramsSummaryMap(workflow, parameters_schema: "schema_inputs.yml") // // Completion email and summary From 081dbc0e624fa73b3b5733abe2ec1ea1339562c8 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 10 Jun 2024 14:37:47 -0500 Subject: [PATCH 10/25] Remove trailing slashes from target names Signed-off-by: Ben Sherman --- workflows/sra/main.nf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 4d89a645..03a2732f 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -198,14 +198,14 @@ workflow SRA { sra_metadata = ch_sra_metadata publish: - ch_fastq >> 'fastq/' - ASPERA_CLI.out.md5 >> 'fastq/md5/' - SRA_FASTQ_FTP.out.md5 >> 'fastq/md5/' - SRA_RUNINFO_TO_FTP.out.tsv >> 'metadata/' - ch_versions_yml >> 'pipeline_info/' - ch_samplesheet >> 'samplesheet/' - ch_mappings >> 'samplesheet/' - ch_sample_mappings_yml >> 'samplesheet/' + ch_fastq >> 'fastq' + ASPERA_CLI.out.md5 >> 'fastq/md5' + SRA_FASTQ_FTP.out.md5 >> 'fastq/md5' + SRA_RUNINFO_TO_FTP.out.tsv >> 'metadata' + ch_versions_yml >> 'pipeline_info' + ch_samplesheet >> 'samplesheet' + ch_mappings >> 'samplesheet' + ch_sample_mappings_yml >> 'samplesheet' } /* From ff54921baf4e3a98475bde6444509d18049835b0 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 10 Jun 2024 19:27:46 -0500 Subject: [PATCH 11/25] Add wrapper workflow for ASPERA_CLI Signed-off-by: Ben Sherman --- modules/local/aspera_cli/main.nf | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 670d6c5a..841d5626 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -10,7 +10,7 @@ process ASPERA_CLI { input: tuple val(meta), val(fastq) val user - var args + val args output: tuple val(meta), path("*fastq.gz"), emit: fastq @@ -56,3 +56,21 @@ process ASPERA_CLI { """ } } + +workflow { + input = [ + [ id:'SRX9626017_SRR13191702', single_end:false, md5_1: '89c5be920021a035084d8aeb74f32df7', md5_2: '56271be38a80db78ef3bdfc5d9909b98' ], + [ + 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR131/002/SRR13191702/SRR13191702_1.fastq.gz', + 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR131/002/SRR13191702/SRR13191702_2.fastq.gz' + ] + ] + user = 'era-fasp' + args = '' + + ASPERA_CLI ( + input, + user, + args + ) +} From b69bb74308772219aed084bd0f5bcd1d928a65b1 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 21 Jun 2024 05:26:33 -0500 Subject: [PATCH 12/25] Initialize ch_fastq Signed-off-by: Ben Sherman --- workflows/sra/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 03a2732f..5e181c3a 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -72,6 +72,7 @@ workflow SRA { .unique() .set { ch_sra_metadata } + ch_fastq = Channel.empty() if (!skip_fastq_download) { ch_sra_metadata From 6996724ac1de29b0b85f700dfa1553e586057220 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 26 Jun 2024 11:07:17 -0500 Subject: [PATCH 13/25] Remove import statements Signed-off-by: Ben Sherman --- subworkflows/nf-core/utils_nextflow_pipeline/main.nf | 12 ++++-------- subworkflows/nf-core/utils_nfcore_pipeline/main.nf | 4 +--- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf index ac31f28f..17755544 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -2,10 +2,6 @@ // Subworkflow with functionality that may be useful for any Nextflow pipeline // -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx - /* ======================================================================================== SUBWORKFLOW DEFINITION @@ -79,10 +75,10 @@ def dumpParametersToJSON(outdir) { def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') def filename = "params_${timestamp}.json" def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = JsonOutput.toJson(params) - temp_pf.text = JsonOutput.prettyPrint(jsonStr) + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) - FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.toPath().copyTo("${outdir}/pipeline_info/params_${timestamp}.json") temp_pf.delete() } @@ -90,7 +86,7 @@ def dumpParametersToJSON(outdir) { // When running with -profile conda, warn if channels have not been set-up appropriately // def checkCondaChannels() { - Yaml parser = new Yaml() + def parser = new org.yaml.snakeyaml.Yaml() def channels = [] try { def config = parser.load("conda config --show channels".execute().text) diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a60c69da..ff028bd2 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -2,8 +2,6 @@ // Subworkflow with utility functions specific to the nf-core pipeline template // -import nextflow.extension.FilesEx - /* ======================================================================================== SUBWORKFLOW DEFINITION @@ -336,7 +334,7 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi Map colors = logColours(monochrome_logs) if (email_address) { try { - if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + if (plaintext_email) { throw new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") sendmail_tf.withWriter { w -> w << sendmail_html } From faf3af48cd9dbc6982f45766a200a4d72d02dcd8 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sun, 22 Sep 2024 23:58:14 -0500 Subject: [PATCH 14/25] Fix warnings Signed-off-by: Ben Sherman --- modules/local/aspera_cli/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 841d5626..61eec74a 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -58,15 +58,15 @@ process ASPERA_CLI { } workflow { - input = [ + def input = [ [ id:'SRX9626017_SRR13191702', single_end:false, md5_1: '89c5be920021a035084d8aeb74f32df7', md5_2: '56271be38a80db78ef3bdfc5d9909b98' ], [ 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR131/002/SRR13191702/SRR13191702_1.fastq.gz', 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR131/002/SRR13191702/SRR13191702_2.fastq.gz' ] ] - user = 'era-fasp' - args = '' + def user = 'era-fasp' + def args = '' ASPERA_CLI ( input, From f9385f3e4a9c962c8874c3bed07f9d13b2add639 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sun, 22 Sep 2024 23:59:04 -0500 Subject: [PATCH 15/25] Update workflow outputs (second preview) Signed-off-by: Ben Sherman --- main.nf | 8 +------- nextflow.config | 14 +++++++++----- workflows/sra/main.nf | 5 ++--- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/main.nf b/main.nf index cc929e94..6942c092 100644 --- a/main.nf +++ b/main.nf @@ -53,8 +53,7 @@ workflow NFCORE_FETCHNGS { params.aspera_cli_args, params.sra_fastq_ftp_args, params.sratools_fasterqdump_args, - params.sratools_pigz_args, - params.outdir + params.sratools_pigz_args ) } @@ -101,11 +100,6 @@ workflow { ) } -output { - directory params.outdir - mode params.publish_dir_mode -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/nextflow.config b/nextflow.config index 74e1444a..219e7e25 100644 --- a/nextflow.config +++ b/nextflow.config @@ -134,22 +134,26 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -// NOTE: Nextflow config should provide some constant for the start timestamp +// workflow outputs +outputDir = params.outdir +workflow.outputs.mode = params.publish_dir_mode + +params.trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 5e181c3a..60691879 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -41,7 +41,6 @@ workflow SRA { sra_fastq_ftp_args // string sratools_fasterqdump_args // string sratools_pigz_args // string - outdir // string main: // @@ -137,8 +136,8 @@ workflow SRA { def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] def meta_clone = meta.clone() - meta_clone.fastq_1 = reads[0] ? "${outdir}/fastq/${reads[0].getName()}" : '' - meta_clone.fastq_2 = reads[1] && !meta.single_end ? "${outdir}/fastq/${reads[1].getName()}" : '' + meta_clone.fastq_1 = reads[0] ? "${workflow.outputDir}/fastq/${reads[0].getName()}" : '' + meta_clone.fastq_2 = reads[1] && !meta.single_end ? "${workflow.outputDir}/fastq/${reads[1].getName()}" : '' return meta_clone } From a65146e7d45d25c5cf5d1324f2a2dd798db0fde4 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 2 Nov 2024 03:31:44 +0100 Subject: [PATCH 16/25] Refactor output targets to samples and versions Signed-off-by: Ben Sherman --- main.nf | 33 ++++++++ .../nf-core/utils_nfcore_pipeline/main.nf | 25 +++--- workflows/sra/main.nf | 84 ++++--------------- 3 files changed, 60 insertions(+), 82 deletions(-) diff --git a/main.nf b/main.nf index 6942c092..ca5ea47f 100644 --- a/main.nf +++ b/main.nf @@ -21,6 +21,7 @@ nextflow.preview.output = true include { SRA } from './workflows/sra' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline' +include { softwareVersionsToYAML } from './subworkflows/nf-core/utils_nfcore_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -56,6 +57,8 @@ workflow NFCORE_FETCHNGS { params.sratools_pigz_args ) + emit: + samples = SRA.out.samples } /* @@ -66,6 +69,7 @@ workflow NFCORE_FETCHNGS { workflow { + main: // // SUBWORKFLOW: Run initialisation tasks // @@ -98,6 +102,35 @@ workflow { params.monochrome_logs, params.hook_url ) + + publish: + NFCORE_FETCHNGS.out.samples >> 'samples' + softwareVersionsToYAML() >> 'versions' +} + + +output { + samples { + path { _meta, _fastq, _md5 -> + { file -> + def dir = [ + 'fastq': 'fastq', + 'md5': 'fastq/md5' + ][file.ext] + "${dir}/${file.baseName}" + } + } + index { + path 'samplesheet/samplesheet.json' + } + } + + versions { + path '.' + index { + path 'nf_core_fetchngs_software_mqc_versions.yml' + } + } } /* diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index ff028bd2..50cf7d84 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -93,27 +93,26 @@ def getWorkflowVersion() { // Get workflow version for pipeline // def workflowVersionToYAML() { - return """ - Workflow: - $workflow.manifest.name: ${getWorkflowVersion()} - Nextflow: $workflow.nextflow.version - """.stripIndent().trim() + return Channel.of( + [ 'Workflow', workflow.manifest.name, getWorkflowVersion() ], + [ 'Workflow', 'Nextflow', workflow.nextflow.version ] + ) } // // Get channel of software versions used in pipeline in YAML format // -def softwareVersionsToYAML(ch_versions) { - return ch_versions +def softwareVersionsToYAML() { + return Channel.topic('versions') .unique() + .mix(workflowVersionToYAML()) .map { process, name, version -> - """ - ${process.tokenize(':').last()}: - ${name}: ${version} - """.stripIndent().trim() + [ + (process.tokenize(':').last()): [ + (name): version + ] + ] } - .unique() - .mix(Channel.of(workflowVersionToYAML())) } // diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 60691879..337e5f99 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -9,8 +9,6 @@ include { SRA_FASTQ_FTP } from '../../modules/local/sra_fastq_ftp' include { SRA_IDS_TO_RUNINFO } from '../../modules/local/sra_ids_to_runinfo' include { SRA_RUNINFO_TO_FTP } from '../../modules/local/sra_runinfo_to_ftp' include { ASPERA_CLI } from '../../modules/local/aspera_cli' -include { SRA_TO_SAMPLESHEET } from '../../modules/local/sra_to_samplesheet' -include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -71,7 +69,7 @@ workflow SRA { .unique() .set { ch_sra_metadata } - ch_fastq = Channel.empty() + ch_samples = Channel.empty() if (!skip_fastq_download) { ch_sra_metadata @@ -125,87 +123,35 @@ workflow SRA { ) // Isolate FASTQ channel which will be added to emit block - SRA_FASTQ_FTP - .out - .fastq + ch_fastq = SRA_FASTQ_FTP.out.fastq .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) .mix(ASPERA_CLI.out.fastq) - .tap { ch_fastq } + + ch_md5 = SRA_FASTQ_FTP.out.md5 + .mix(ASPERA_CLI.out.md5) + + ch_samples = ch_fastq + .join(ch_md5, remainder: true) .map { - meta, fastq -> + meta, fastq, md5 -> def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] def meta_clone = meta.clone() - meta_clone.fastq_1 = reads[0] ? "${workflow.outputDir}/fastq/${reads[0].getName()}" : '' - meta_clone.fastq_2 = reads[1] && !meta.single_end ? "${workflow.outputDir}/fastq/${reads[1].getName()}" : '' + meta_clone.fastq_1 = reads[0] + meta_clone.fastq_2 = reads[1] && !meta.single_end ? reads[1] : null + + meta_clone.md5_1 = md5[0] + meta_clone.md5_2 = md5[1] && !meta.single_end ? md5[1] : null return meta_clone } - .set { ch_sra_metadata } - } - - // - // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet - // - SRA_TO_SAMPLESHEET ( - ch_sra_metadata, - nf_core_pipeline, - nf_core_rnaseq_strandedness, - sample_mapping_fields - ) - - // Merge samplesheets and mapping files across all samples - SRA_TO_SAMPLESHEET - .out - .samplesheet - .map { it[1] } - .collectFile(name:'tmp_samplesheet.csv', newLine: true, keepHeader: true, sort: { it.baseName }) - .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'samplesheet.csv') - .set { ch_samplesheet } - - SRA_TO_SAMPLESHEET - .out - .mappings - .map { it[1] } - .collectFile(name:'tmp_id_mappings.csv', newLine: true, keepHeader: true, sort: { it.baseName }) - .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'id_mappings.csv') - .set { ch_mappings } - - // - // MODULE: Create a MutiQC config file with sample name mappings - // - ch_sample_mappings_yml = Channel.empty() - if (sample_mapping_fields) { - MULTIQC_MAPPINGS_CONFIG ( - ch_mappings - ) - ch_sample_mappings_yml = MULTIQC_MAPPINGS_CONFIG.out.yml } - // - // Collate and save software versions - // - softwareVersionsToYAML(Channel.topic('versions')) - .collectFile(name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_versions_yml } - emit: - samplesheet = ch_samplesheet - mappings = ch_mappings - sample_mappings = ch_sample_mappings_yml - sra_metadata = ch_sra_metadata + samples = ch_samples publish: - ch_fastq >> 'fastq' - ASPERA_CLI.out.md5 >> 'fastq/md5' - SRA_FASTQ_FTP.out.md5 >> 'fastq/md5' SRA_RUNINFO_TO_FTP.out.tsv >> 'metadata' - ch_versions_yml >> 'pipeline_info' - ch_samplesheet >> 'samplesheet' - ch_mappings >> 'samplesheet' - ch_sample_mappings_yml >> 'samplesheet' } /* From e567043a2a35e4c8dac80d2300cac2aebc89fc76 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 4 Dec 2024 17:55:22 -0600 Subject: [PATCH 17/25] Revert unrelated changes Signed-off-by: Ben Sherman --- assets/schema_input.json | 17 ++ assets/schema_input.yml | 12 - assets/schema_mappings.yml | 28 -- assets/schema_samplesheet.yml | 81 ----- modules/local/aspera_cli/main.nf | 6 +- modules/local/aspera_cli/nextflow.config | 5 + modules/local/sra_fastq_ftp/main.nf | 2 +- modules/local/sra_fastq_ftp/nextflow.config | 5 + modules/nf-core/sratools/fasterqdump/main.nf | 8 +- .../sratools/fasterqdump/nextflow.config | 5 + nextflow.config | 57 +++- nextflow_schema.json | 287 ++++++++++++++++++ schema_inputs.yml | 258 ---------------- schema_outputs.yml | 16 - .../utils_nfcore_fetchngs_pipeline/main.nf | 4 +- .../main.nf | 6 +- workflows/sra/main.nf | 11 +- 17 files changed, 387 insertions(+), 421 deletions(-) create mode 100644 assets/schema_input.json delete mode 100644 assets/schema_input.yml delete mode 100644 assets/schema_mappings.yml delete mode 100644 assets/schema_samplesheet.yml create mode 100644 modules/local/aspera_cli/nextflow.config create mode 100644 modules/local/sra_fastq_ftp/nextflow.config create mode 100644 modules/nf-core/sratools/fasterqdump/nextflow.config create mode 100644 nextflow_schema.json delete mode 100644 schema_inputs.yml delete mode 100644 schema_outputs.yml diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 00000000..db9ffc00 --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,17 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_input.json", + "title": "nf-core/fetchngs pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "": { + "type": "string", + "pattern": "^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\\d+)$", + "errorMessage": "Please provide a valid SRA, ENA, DDBJ or GEO identifier" + } + } + } +} diff --git a/assets/schema_input.yml b/assets/schema_input.yml deleted file mode 100644 index 29760b88..00000000 --- a/assets/schema_input.yml +++ /dev/null @@ -1,12 +0,0 @@ -$schema: http://json-schema.org/draft-07/schema -$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_input.yml -title: nf-core/fetchngs pipeline - params.input schema -description: Schema for the file provided with params.input -type: array -items: - type: object - properties: - '': - type: string - pattern: ^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\\d+)$ - errorMessage: Please provide a valid SRA, ENA, DDBJ or GEO identifier diff --git a/assets/schema_mappings.yml b/assets/schema_mappings.yml deleted file mode 100644 index 6025492e..00000000 --- a/assets/schema_mappings.yml +++ /dev/null @@ -1,28 +0,0 @@ -$schema: 'http://json-schema.org/draft-07/schema' -$id: 'https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_mappings.yml' -title: 'nf-core/fetchngs pipeline - id_mappings.csv schema' -description: 'Schema for the mappings file produced by fetchngs' -type: array -items: - type: object - properties: - sample: - type: string - experiment_accession: - type: string - run_accession: - type: string - sample_accession: - type: string - experiment_alias: - type: string - run_alias: - type: string - sample_alias: - type: string - experiment_title: - type: string - sample_title: - type: string - sample_description: - type: string \ No newline at end of file diff --git a/assets/schema_samplesheet.yml b/assets/schema_samplesheet.yml deleted file mode 100644 index eb93179e..00000000 --- a/assets/schema_samplesheet.yml +++ /dev/null @@ -1,81 +0,0 @@ -$schema: 'http://json-schema.org/draft-07/schema' -$id: 'https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_samplesheet.yml' -title: 'nf-core/fetchngs pipeline - samplesheet.csv schema' -description: 'Schema for the samplesheet file produced by fetchngs' -type: array -items: - type: object - properties: - sample: - type: string - fastq_1: - type: string - format: file-path - pattern: '^\\S+\\.f(ast)?q\\.gz$' - fastq_2: - type: string - format: file-path - pattern: '^\\S+\\.f(ast)?q\\.gz$' - run_accession: - type: string - experiment_accession: - type: string - sample_accession: - type: string - secondary_sample_accession: - type: string - study_accession: - type: string - secondary_study_accession: - type: string - submission_accession: - type: string - run_alias: - type: string - experiment_alias: - type: string - sample_alias: - type: string - study_alias: - type: string - library_layout: - type: string - library_selection: - type: string - library_source: - type: string - library_strategy: - type: string - library_name: - type: string - instrument_model: - type: string - instrument_platform: - type: string - base_count: - type: integer - read_count: - type: integer - tax_id: - type: string - scientific_name: - type: string - sample_title: - type: string - experiment_title: - type: string - study_title: - type: string - sample_description: - type: string - fastq_md5: - type: string - pattern: '^[0-9a-f]{32}$' - fastq_bytes: - type: integer - fastq_ftp: - type: string - fastq_galaxy: - type: string - fastq_aspera: - type: string \ No newline at end of file diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 61eec74a..ceef844f 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -10,7 +10,6 @@ process ASPERA_CLI { input: tuple val(meta), val(fastq) val user - val args output: tuple val(meta), path("*fastq.gz"), emit: fastq @@ -18,6 +17,7 @@ process ASPERA_CLI { tuple val("${task.process}"), val('aspera_cli'), eval('ascli --version'), topic: versions script: + def args = task.ext.args ?: '' def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" if (meta.single_end) { """ @@ -66,11 +66,9 @@ workflow { ] ] def user = 'era-fasp' - def args = '' ASPERA_CLI ( input, - user, - args + user ) } diff --git a/modules/local/aspera_cli/nextflow.config b/modules/local/aspera_cli/nextflow.config new file mode 100644 index 00000000..9a808242 --- /dev/null +++ b/modules/local/aspera_cli/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'ASPERA_CLI' { + ext.args = '-QT -l 300m -P33001' + } +} diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index 55cf70ef..017013ce 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -11,7 +11,6 @@ process SRA_FASTQ_FTP { input: tuple val(meta), val(fastq) - val args output: tuple val(meta), path("*fastq.gz"), emit: fastq @@ -19,6 +18,7 @@ process SRA_FASTQ_FTP { tuple val("${task.process}"), val('wget'), eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')"), topic: versions script: + def args = task.ext.args ?: '' if (meta.single_end) { """ wget \\ diff --git a/modules/local/sra_fastq_ftp/nextflow.config b/modules/local/sra_fastq_ftp/nextflow.config new file mode 100644 index 00000000..26261f26 --- /dev/null +++ b/modules/local/sra_fastq_ftp/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'SRA_FASTQ_FTP' { + ext.args = '-t 5 -nv -c -T 60' + } +} diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index df45971f..4fdd07fe 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -11,8 +11,6 @@ process SRATOOLS_FASTERQDUMP { tuple val(meta), path(sra) path ncbi_settings path certificate - val fasterqdump_args // = '--split-files --include-technical' - val pigz_args // = '' output: tuple val(meta), path('*.fastq.gz'), emit: reads @@ -23,6 +21,8 @@ process SRATOOLS_FASTERQDUMP { task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def outfile = meta.single_end ? "${prefix}.fastq" : prefix def key_file = '' @@ -35,14 +35,14 @@ process SRATOOLS_FASTERQDUMP { export NCBI_SETTINGS="\$PWD/${ncbi_settings}" fasterq-dump \\ - $fasterqdump_args \\ + $args \\ --threads $task.cpus \\ --outfile $outfile \\ ${key_file} \\ ${sra} pigz \\ - $pigz_args \\ + $args2 \\ --no-name \\ --processes $task.cpus \\ *.fastq diff --git a/modules/nf-core/sratools/fasterqdump/nextflow.config b/modules/nf-core/sratools/fasterqdump/nextflow.config new file mode 100644 index 00000000..7e1649d1 --- /dev/null +++ b/modules/nf-core/sratools/fasterqdump/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: SRATOOLS_FASTERQDUMP { + ext.args = '--split-files --include-technical' + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 219e7e25..9ab04d25 100644 --- a/nextflow.config +++ b/nextflow.config @@ -6,6 +6,57 @@ ---------------------------------------------------------------------------------------- */ +// Global default params, used in configs +params { + + // Input options + input = null + nf_core_pipeline = null + nf_core_rnaseq_strandedness = 'auto' + ena_metadata_fields = null + sample_mapping_fields = 'experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description' + download_method = 'ftp' + skip_fastq_download = false + dbgap_key = null + + // Boilerplate options + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + + // Config options + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationShowHiddenParams = false + validationSchemaIgnoreParams = '' + validate_params = true + + // Deprecated options + // See: https://github.com/nf-core/fetchngs/pull/279/files#r1494459480 + force_sratools_download = false + +} + // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -17,6 +68,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -134,9 +186,12 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + // workflow outputs outputDir = params.outdir -workflow.outputs.mode = params.publish_dir_mode +workflow.output.mode = params.publish_dir_mode params.trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { diff --git a/nextflow_schema.json b/nextflow_schema.json new file mode 100644 index 00000000..29f7b710 --- /dev/null +++ b/nextflow_schema.json @@ -0,0 +1,287 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/nextflow_schema.json", + "title": "nf-core/fetchngs pipeline parameters", + "description": "Pipeline to fetch metadata and raw FastQ files from public databases", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["input", "outdir"], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", + "mimetype": "text/csv", + "pattern": "^\\S+\\.(csv|tsv|txt)$", + "fa_icon": "fas fa-file-excel", + "description": "File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files." + }, + "ena_metadata_fields": { + "type": "string", + "fa_icon": "fas fa-columns", + "description": "Comma-separated list of ENA metadata fields to fetch before downloading data.", + "help_text": "The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run)." + }, + "sample_mapping_fields": { + "type": "string", + "fa_icon": "fas fa-columns", + "description": "Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC.", + "default": "experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description" + }, + "nf_core_pipeline": { + "type": "string", + "fa_icon": "fab fa-apple", + "description": "Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns.", + "enum": ["rnaseq", "atacseq", "viralrecon", "taxprofiler"] + }, + "nf_core_rnaseq_strandedness": { + "type": "string", + "fa_icon": "fas fa-dna", + "description": "Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'.", + "help_text": "The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution.", + "default": "auto" + }, + "download_method": { + "type": "string", + "default": "ftp", + "fa_icon": "fas fa-download", + "enum": ["aspera", "ftp", "sratools"], + "description": "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'.", + "help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ." + }, + "skip_fastq_download": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Only download metadata for public data database ids and don't download the FastQ files." + }, + "dbgap_key": { + "type": "string", + "fa_icon": "fas fa-address-card", + "help_text": "Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation.", + "format": "file-path", + "description": "dbGaP repository key." + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + }, + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "128.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + }, + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "validationShowHiddenParams": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + } + } + }, + "deprecated_options": { + "title": "Deprecated options", + "type": "object", + "description": "List of parameters that have been deprecated.", + "default": "", + "fa_icon": "fas fa-calendar-times", + "properties": { + "force_sratools_download": { + "type": "boolean", + "fa_icon": "fas fa-times-circle", + "description": "This parameter has been deprecated. Please use '--download_method sratools' instead.", + "enum": [false], + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/deprecated_options" + } + ] +} diff --git a/schema_inputs.yml b/schema_inputs.yml deleted file mode 100644 index 257cba7d..00000000 --- a/schema_inputs.yml +++ /dev/null @@ -1,258 +0,0 @@ -$schema: http://json-schema.org/draft-07/schema -$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/schema_inputs.yml -title: nf-core/fetchngs pipeline parameters -description: Pipeline to fetch metadata and raw FastQ files from public databases -type: object -definitions: - input_output_options: - title: Input/output options - type: object - fa_icon: fas fa-terminal - description: Define where the pipeline should find input data and save output data. - required: - - input - - outdir - properties: - input: - type: string - format: file-path - exists: true - schema: assets/schema_input.yml - mimetype: text/csv - pattern: ^\\S+\\.(csv|tsv|txt)$ - fa_icon: fas fa-file-excel - description: File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files. - ena_metadata_fields: - type: string - fa_icon: fas fa-columns - description: Comma-separated list of ENA metadata fields to fetch before downloading data. - help_text: The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run). - sample_mapping_fields: - type: string - fa_icon: fas fa-columns - description: Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC. - default: experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description - nf_core_pipeline: - type: string - fa_icon: fab fa-apple - description: Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns. - enum: - - rnaseq - - atacseq - - viralrecon - - taxprofiler - nf_core_rnaseq_strandedness: - type: string - fa_icon: fas fa-dna - description: Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'. - help_text: The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution. - default: auto - download_method: - type: string - default: ftp - fa_icon: fas fa-download - enum: - - aspera - - ftp - - sratools - description: Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'. - help_text: FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ. - skip_fastq_download: - type: boolean - fa_icon: fas fa-fast-forward - description: Only download metadata for public data database ids and don't download the FastQ files. - dbgap_key: - type: string - fa_icon: fas fa-address-card - help_text: Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation. - format: file-path - description: dbGaP repository key. - aspera_cli_args: - type: string - default: -QT -l 300m -P33001 - sra_fastq_ftp_args: - type: string - default: -t 5 -nv -c -T 60 - sratools_fasterqdump_args: - type: string - default: '' - sratools_pigz_args: - type: string - default: '' - outdir: - type: string - format: directory-path - description: The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. - fa_icon: fas fa-folder-open - email: - type: string - description: Email address for completion summary. - fa_icon: fas fa-envelope - help_text: Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. - institutional_config_options: - title: Institutional config options - type: object - fa_icon: fas fa-university - description: Parameters used to describe centralised config profiles. These should not be edited. - help_text: The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline. - properties: - custom_config_version: - type: string - description: Git commit id for Institutional configs. - default: master - hidden: true - fa_icon: fas fa-users-cog - custom_config_base: - type: string - description: Base directory for Institutional configs. - default: https://raw.githubusercontent.com/nf-core/configs/master - hidden: true - help_text: If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter. - fa_icon: fas fa-users-cog - config_profile_name: - type: string - description: Institutional config name. - hidden: true - fa_icon: fas fa-users-cog - config_profile_description: - type: string - description: Institutional config description. - hidden: true - fa_icon: fas fa-users-cog - config_profile_contact: - type: string - description: Institutional config contact information. - hidden: true - fa_icon: fas fa-users-cog - config_profile_url: - type: string - description: Institutional config URL link. - hidden: true - fa_icon: fas fa-users-cog - max_job_request_options: - title: Max job request options - type: object - fa_icon: fab fa-acquisitions-incorporated - description: Set the top limit for requested resources for any single job. - help_text: If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details. - properties: - max_cpus: - type: integer - description: Maximum number of CPUs that can be requested for any single job. - default: 16 - fa_icon: fas fa-microchip - hidden: true - help_text: Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1` - max_memory: - type: string - description: Maximum amount of memory that can be requested for any single job. - default: 128.GB - fa_icon: fas fa-memory - pattern: ^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$ - hidden: true - help_text: Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'` - max_time: - type: string - description: Maximum amount of time that can be requested for any single job. - default: 240.h - fa_icon: far fa-clock - pattern: ^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$ - hidden: true - help_text: Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'` - generic_options: - title: Generic options - type: object - fa_icon: fas fa-file-import - description: Less common options for the pipeline, typically set in a config file. - help_text: These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`. - properties: - help: - type: boolean - description: Display help text. - fa_icon: fas fa-question-circle - hidden: true - version: - type: boolean - description: Display version and exit. - fa_icon: fas fa-question-circle - hidden: true - publish_dir_mode: - type: string - default: copy - description: Method used to save pipeline results to output directory. - help_text: The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details. - fa_icon: fas fa-copy - enum: - - symlink - - rellink - - link - - copy - - copyNoFollow - - move - hidden: true - email_on_fail: - type: string - description: Email address for completion summary, only when pipeline fails. - fa_icon: fas fa-exclamation-triangle - help_text: An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully. - hidden: true - plaintext_email: - type: boolean - description: Send plain-text email instead of HTML. - fa_icon: fas fa-remove-format - hidden: true - monochrome_logs: - type: boolean - description: Do not use coloured log outputs. - fa_icon: fas fa-palette - hidden: true - hook_url: - type: string - description: Incoming hook URL for messaging service - fa_icon: fas fa-people-group - help_text: Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported. - hidden: true - validate_params: - type: boolean - description: Boolean whether to validate parameters against the schema at runtime - default: true - fa_icon: fas fa-check-square - hidden: true - validationShowHiddenParams: - type: boolean - fa_icon: far fa-eye-slash - description: Show all params when using `--help` - hidden: true - help_text: By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters. - validationFailUnrecognisedParams: - type: boolean - fa_icon: far fa-check-circle - description: Validation of parameters fails when an unrecognised parameter is found. - hidden: true - help_text: By default, when an unrecognised parameter is found, it returns a warinig. - validationLenientMode: - type: boolean - fa_icon: far fa-check-circle - description: Validation of parameters in lenient more. - hidden: true - help_text: Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode). - deprecated_options: - title: Deprecated options - type: object - description: List of parameters that have been deprecated. - default: '' - fa_icon: fas fa-calendar-times - properties: - force_sratools_download: - type: boolean - fa_icon: fas fa-times-circle - description: This parameter has been deprecated. Please use '--download_method sratools' instead. - enum: - - false - hidden: true -allOf: - - $ref: "#/definitions/input_output_options" - - $ref: "#/definitions/institutional_config_options" - - $ref: "#/definitions/max_job_request_options" - - $ref: "#/definitions/generic_options" - - $ref: "#/definitions/deprecated_options" diff --git a/schema_outputs.yml b/schema_outputs.yml deleted file mode 100644 index 75f26e94..00000000 --- a/schema_outputs.yml +++ /dev/null @@ -1,16 +0,0 @@ -$schema: 'http://json-schema.org/draft-07/schema' -$id: 'https://raw.githubusercontent.com/nf-core/fetchngs/master/schema_outputs.yml' -title: 'nf-core/fetchngs pipeline outputs' -description: '' -type: object -properties: - id_mappings: - type: string - format: file-path - mimetype: text/csv - schema: assets/schema_mappings.yml - samplesheet: - type: string - format: file-path - mimetype: text/csv - schema: assets/schema_samplesheet.yml \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index 770f1e08..0c4307b5 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -62,7 +62,7 @@ workflow PIPELINE_INITIALISATION { pre_help_text, post_help_text, validate_params, - "schema_inputs.yml" + "nextflow_schema.json" ) // @@ -112,7 +112,7 @@ workflow PIPELINE_COMPLETION { main: - summary_params = paramsSummaryMap(workflow, parameters_schema: "schema_inputs.yml") + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") // // Completion email and summary diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index 3a57d1b2..c5330442 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -9,8 +9,6 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: ch_sra_ids // channel: [ val(meta), val(id) ] ch_dbgap_key // channel: [ path(dbgap_key) ] - sratools_fasterqdump_args // string - sratools_pigz_args // string main: // @@ -30,9 +28,7 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra, ch_ncbi_settings, - ch_dbgap_key, - sratools_fasterqdump_args, - sratools_pigz_args + ch_dbgap_key ) emit: diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 337e5f99..52ab5057 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -35,10 +35,6 @@ workflow SRA { download_method // enum: 'aspera' | 'ftp' | 'sratools' skip_fastq_download // boolean dbgap_key // string - aspera_cli_args // string - sra_fastq_ftp_args // string - sratools_fasterqdump_args // string - sratools_pigz_args // string main: // @@ -108,9 +104,7 @@ workflow SRA { // FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( ch_sra_reads.sratools, - dbgap_key ? file(dbgap_key, checkIfExists: true) : [], - sratools_fasterqdump_args, - sratools_pigz_args + dbgap_key ? file(dbgap_key, checkIfExists: true) : [] ) // @@ -118,8 +112,7 @@ workflow SRA { // ASPERA_CLI ( ch_sra_reads.aspera, - 'era-fasp', - aspera_cli_args + 'era-fasp' ) // Isolate FASTQ channel which will be added to emit block From 885128a2b3704b9a2d2b18ea8c8d8ea5b639145c Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 4 Dec 2024 18:05:15 -0600 Subject: [PATCH 18/25] Revert unrelated changes Signed-off-by: Ben Sherman --- main.nf | 10 ++-------- .../nf-core/sratools/fasterqdump/tests/nextflow.config | 5 +++++ nextflow.config | 3 +++ .../nextflow.config | 1 + workflows/sra/main.nf | 6 +----- workflows/sra/nextflow.config | 3 +++ 6 files changed, 15 insertions(+), 13 deletions(-) create mode 100644 modules/nf-core/sratools/fasterqdump/tests/nextflow.config create mode 100644 subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config create mode 100644 workflows/sra/nextflow.config diff --git a/main.nf b/main.nf index ca5ea47f..cbf129cc 100644 --- a/main.nf +++ b/main.nf @@ -11,6 +11,7 @@ nextflow.enable.dsl = 2 nextflow.preview.output = true +nextflow.preview.topic = true /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -45,16 +46,9 @@ workflow NFCORE_FETCHNGS { SRA ( ids, params.ena_metadata_fields ?: '', - params.sample_mapping_fields, - params.nf_core_pipeline ?: '', - params.nf_core_rnaseq_strandedness ?: 'auto', params.download_method, params.skip_fastq_download, - params.dbgap_key, - params.aspera_cli_args, - params.sra_fastq_ftp_args, - params.sratools_fasterqdump_args, - params.sratools_pigz_args + params.dbgap_key ) emit: diff --git a/modules/nf-core/sratools/fasterqdump/tests/nextflow.config b/modules/nf-core/sratools/fasterqdump/tests/nextflow.config new file mode 100644 index 00000000..23e4100b --- /dev/null +++ b/modules/nf-core/sratools/fasterqdump/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: SRATOOLS_FASTERQDUMP { + ext.args = '' + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 9ab04d25..8dd83bab 100644 --- a/nextflow.config +++ b/nextflow.config @@ -63,6 +63,9 @@ includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions includeConfig "${params.custom_config_base}/nfcore_custom.config" +// Workflow specific configs +includeConfig './workflows/sra/nextflow.config' + profiles { debug { dumpHashes = true diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config new file mode 100644 index 00000000..187faf6d --- /dev/null +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config @@ -0,0 +1 @@ +includeConfig '../../../modules/nf-core/sratools/fasterqdump/nextflow.config' diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 52ab5057..03aa2743 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -29,9 +29,6 @@ workflow SRA { take: ids // channel: [ ids ] ena_metadata_fields // string - sample_mapping_fields // string - nf_core_pipeline // string - nf_core_rnaseq_strandedness // string download_method // enum: 'aspera' | 'ftp' | 'sratools' skip_fastq_download // boolean dbgap_key // string @@ -95,8 +92,7 @@ workflow SRA { // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums // SRA_FASTQ_FTP ( - ch_sra_reads.ftp, - sra_fastq_ftp_args + ch_sra_reads.ftp ) // diff --git a/workflows/sra/nextflow.config b/workflows/sra/nextflow.config new file mode 100644 index 00000000..522b05b8 --- /dev/null +++ b/workflows/sra/nextflow.config @@ -0,0 +1,3 @@ +includeConfig "../../modules/local/aspera_cli/nextflow.config" +includeConfig "../../modules/local/sra_fastq_ftp/nextflow.config" +includeConfig "../../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config" From 0bbc6652bd3626d05029994ab38bf58fca606c8a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 4 Dec 2024 18:20:41 -0600 Subject: [PATCH 19/25] Use Nextflow 24.10 Signed-off-by: Ben Sherman --- .github/workflows/ci.yml | 2 +- nextflow.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index efd086d6..7af44ea3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,7 +59,7 @@ jobs: fail-fast: false matrix: NXF_VER: - - "24.04.2" + - "24.10.2" - "latest-everything" profile: - "conda" diff --git a/nextflow.config b/nextflow.config index cede24d6..8ef0df08 100644 --- a/nextflow.config +++ b/nextflow.config @@ -225,7 +225,7 @@ manifest { homePage = 'https://github.com/nf-core/fetchngs' description = """Pipeline to fetch metadata and raw FastQ files from public databases""" mainScript = 'main.nf' - nextflowVersion = '!>=24.04.2' + nextflowVersion = '!>=24.10.2' version = '1.13.0dev' doi = '10.5281/zenodo.5070524' } From 8e91e21feac6b7a7ad85aa87bc1b999a3a16c37f Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 4 Dec 2024 18:30:24 -0600 Subject: [PATCH 20/25] FIx failing test Signed-off-by: Ben Sherman --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4410c6e2..1f79805a 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![GitHub Actions CI Status](https://github.com/nf-core/fetchngs/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/fetchngs/actions/workflows/ci.yml) [![GitHub Actions Linting Status](https://github.com/nf-core/fetchngs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/fetchngs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/fetchngs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.5070524-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.5070524)[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.10.2-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) From 768702d098edd903edc7dde8e5f01e78685b1ad8 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 4 Dec 2024 18:36:46 -0600 Subject: [PATCH 21/25] Fix failing tests Signed-off-by: Ben Sherman --- nextflow.config | 10 +++++----- nextflow_schema.json | 6 ++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/nextflow.config b/nextflow.config index 8ef0df08..45283e3f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -33,6 +33,7 @@ params { version = false modules_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/modules/' pipelines_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/pipelines/fetchngs/1.15.0/' + trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') // Config options config_profile_name = null @@ -201,22 +202,21 @@ nextflow.enable.configProcessNamesValidation = false outputDir = params.outdir workflow.output.mode = params.publish_dir_mode -params.trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${params.trace_report_suffix}.html" } report { enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${params.trace_report_suffix}.html" } trace { enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${params.trace_report_suffix}.txt" } dag { enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${params.trace_report_suffix}.html" } manifest { diff --git a/nextflow_schema.json b/nextflow_schema.json index ba7c196b..f58a2358 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -199,6 +199,12 @@ "description": "Base URL or local path to location of pipeline test dataset files", "default": "s3://ngi-igenomes/testdata/nf-core/pipelines/fetchngs/1.15.0/", "hidden": true + }, + "trace_report_suffix": { + "type": "string", + "fa_icon": "far calendar", + "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", + "hidden": true } } }, From 8f97ac60ace22f3c7bf60dc58f264747ef7dba45 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 4 Dec 2024 18:53:35 -0600 Subject: [PATCH 22/25] minor edits Signed-off-by: Ben Sherman --- workflows/sra/main.nf | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 03aa2743..f0046c5a 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -62,7 +62,6 @@ workflow SRA { .unique() .set { ch_sra_metadata } - ch_samples = Channel.empty() if (!skip_fastq_download) { ch_sra_metadata @@ -124,17 +123,19 @@ workflow SRA { .map { meta, fastq, md5 -> def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] - def meta_clone = meta.clone() - - meta_clone.fastq_1 = reads[0] - meta_clone.fastq_2 = reads[1] && !meta.single_end ? reads[1] : null - - meta_clone.md5_1 = md5[0] - meta_clone.md5_2 = md5[1] && !meta.single_end ? md5[1] : null - - return meta_clone + def meta_clone = meta + [ + fastq_1: reads[0], + fastq_2: reads[1] && !meta.single_end ? reads[1] : null, + md5_1: md5[0], + md5_2: md5[1] && !meta.single_end ? md5[1] : null, + ] + + return [ meta_clone, reads, md5 ] } } + else { + ch_samples = Channel.empty() + } emit: samples = ch_samples From 26c43af44534777ab7578731664340aec456331a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Wed, 4 Dec 2024 19:03:23 -0600 Subject: [PATCH 23/25] minor edits Signed-off-by: Ben Sherman --- .../main.nf | 6 +----- workflows/sra/main.nf | 10 +++++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index c5330442..f29e99d5 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -25,11 +25,7 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // // Convert the SRA format into one or more compressed FASTQ files. // - SRATOOLS_FASTERQDUMP ( - SRATOOLS_PREFETCH.out.sra, - ch_ncbi_settings, - ch_dbgap_key - ) + SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra, ch_ncbi_settings, ch_dbgap_key ) emit: reads = SRATOOLS_FASTERQDUMP.out.reads // channel: [ val(meta), [ reads ] ] diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index f0046c5a..7d4151f6 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -27,11 +27,11 @@ include { FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS } from '../../subworkflow workflow SRA { take: - ids // channel: [ ids ] - ena_metadata_fields // string - download_method // enum: 'aspera' | 'ftp' | 'sratools' - skip_fastq_download // boolean - dbgap_key // string + ids // channel: [ ids ] + ena_metadata_fields // string + download_method // enum: 'aspera' | 'ftp' | 'sratools' + skip_fastq_download // boolean + dbgap_key // string main: // From 9e33149c32ddb5b3bd2cca83be5f67f110372077 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 5 Dec 2024 16:27:34 -0600 Subject: [PATCH 24/25] Fix workflow outputs Signed-off-by: Ben Sherman --- main.nf | 14 ++++++-------- workflows/sra/main.nf | 4 +--- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/main.nf b/main.nf index b78df2ed..780cd8bd 100644 --- a/main.nf +++ b/main.nf @@ -102,14 +102,12 @@ workflow { output { samples { - path { _meta, _fastq, _md5 -> - { file -> - def dir = [ - 'fastq': 'fastq', - 'md5': 'fastq/md5' - ][file.ext] - "${dir}/${file.baseName}" - } + path { _sample -> + def dirs = [ + 'fastq': 'fastq', + 'md5': 'fastq/md5' + ] + return { file -> "${dirs[file.extension]}/${file.name}" } } index { path 'samplesheet/samplesheet.json' diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 7d4151f6..03273ab5 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -123,14 +123,12 @@ workflow SRA { .map { meta, fastq, md5 -> def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] - def meta_clone = meta + [ + meta + [ fastq_1: reads[0], fastq_2: reads[1] && !meta.single_end ? reads[1] : null, md5_1: md5[0], md5_2: md5[1] && !meta.single_end ? md5[1] : null, ] - - return [ meta_clone, reads, md5 ] } } else { From 08a336188f95fffffc2502dc6ff421d1ed652d58 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 10 Dec 2024 10:58:07 -0600 Subject: [PATCH 25/25] Fix dynamic publish path Signed-off-by: Ben Sherman --- main.nf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 780cd8bd..ee98b2bf 100644 --- a/main.nf +++ b/main.nf @@ -107,7 +107,10 @@ output { 'fastq': 'fastq', 'md5': 'fastq/md5' ] - return { file -> "${dirs[file.extension]}/${file.name}" } + return { filename -> + def ext = filename.tokenize('.').last() + "${dirs[ext]}/${filename}" + } } index { path 'samplesheet/samplesheet.json'