From 58105574819a81c07e5fec6c0bbbf1a6c9ef8d0b Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 12:10:05 +0000 Subject: [PATCH 01/17] Add proposed samplesheet --- assets/samplesheet.csv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab..862bb89 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample_id,flowcell_id,lane_id,project_id,fastq_1,fastq_2,rundir +SAMPLE_PAIRED_END,F01,L01,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir +SAMPLE_SINGLE_END,F02,L02,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir From 89e698db4be4ccb24ed8546f10a198cbae1cf05a Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 12:11:07 +0000 Subject: [PATCH 02/17] Comment fasta input --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 2e9b1e3..1e9c2ed 100644 --- a/main.nf +++ b/main.nf @@ -17,7 +17,7 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SEQINSPECTOR } from './workflows/seqinspector' +include { SEQINSPECTOR } from './workflows/seqinspector' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_seqinspector_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_seqinspector_pipeline' @@ -32,7 +32,7 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_seqi // TODO nf-core: Remove this line if you don't need a FASTA file // This is an example of how to use getGenomeAttribute() to fetch parameters // from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') +// params.fasta = getGenomeAttribute('fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 999ca9ef6afc486447b856b52e59d119d2d49070 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 12:38:41 +0000 Subject: [PATCH 03/17] Remove flowcell from samplesheet --- assets/samplesheet.csv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 862bb89..dd4c094 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample_id,flowcell_id,lane_id,project_id,fastq_1,fastq_2,rundir -SAMPLE_PAIRED_END,F01,L01,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir -SAMPLE_SINGLE_END,F02,L02,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir +sample_id,lane_id,project_id,fastq_1,fastq_2,rundir +SAMPLE_PAIRED_END,L01,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir +SAMPLE_SINGLE_END,L02,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir From 8ae4bd9630a5083f255f5523495e6a558eac5cc4 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 13:06:02 +0000 Subject: [PATCH 04/17] Disable genomeExistsError() --- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 56b144f..af827ff 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -151,7 +151,7 @@ workflow PIPELINE_COMPLETION { // Check and validate pipeline parameters // def validateInputParameters() { - genomeExistsError() + // genomeExistsError() } // From 1cf6e9030620a7704d6ef4b4cb607f9c44e54da7 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 13:21:36 +0000 Subject: [PATCH 05/17] Preliminary samplesheet schema --- assets/schema_input.json | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 338d355..79f3644 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -11,7 +11,19 @@ "type": "string", "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces", - "meta": ["id"] + "meta": ["sample"] + }, + "lane": { + "type": "integer", + "pattern": "^\\d+$", + "errorMessage": "Lane ID must be a number", + "meta": ["lane"] + }, + "project": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Project ID cannot contain spaces", + "meta": ["project"] }, "fastq_1": { "type": "string", @@ -26,8 +38,15 @@ "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "rundir": { + "type": "string", + "format": "file-path", + "exists": true, + "errorMessage": "Run directory must be a path", + "meta": ["rundir"] } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "lane", "fastq_1"] } } From 72d94ea4ca41714d657b3c7b6100d8cd5f37a481 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 13:44:07 +0000 Subject: [PATCH 06/17] Add fastq2 dependency --- assets/schema_input.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 79f3644..fc4fdb9 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -47,6 +47,9 @@ "meta": ["rundir"] } }, - "required": ["sample", "lane", "fastq_1"] + "required": ["sample", "lane", "fastq_1"], + "dependentRequired": { + "fastq_2": ["fastq_1"] + } } } From ef6b7b11ef358f770cff43e77892540f8eadb8f3 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 14:07:45 +0000 Subject: [PATCH 07/17] Update samplesheet reading --- .../main.nf | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index af827ff..1e09716 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -75,29 +75,32 @@ workflow PIPELINE_INITIALISATION { // // Custom validation for pipeline parameters // - validateInputParameters() + validateInputParameters() // Validates workflow parameters against $projectDir/nextflow_schema.json // // Create channel from input file provided through params.input // Channel - .fromSamplesheet("input") + .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> + def id_string = "${meta.lane}_${meta.group ?: "ungrouped"}_${meta.sample}" + def updated_meta = meta + [ id: id_string ] if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + return [ updated_meta.id, updated_meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] } } .groupTuple() .map { - validateInputSamplesheet(it) - } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] + validateInputSamplesheet(it) // Applies additional group validation checks that schema_input.json cannot do. } + .transpose() // Replace the map below + // .map { + // meta, fastqs -> + // return [ meta, fastqs.flatten() ] + // } .set { ch_samplesheet } emit: @@ -152,6 +155,8 @@ workflow PIPELINE_COMPLETION { // def validateInputParameters() { // genomeExistsError() + + // TODO: Add code to further validate pipeline parameters here } // From e0ecd40ec9d775db51cee5a1b91e42381add95e4 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 14:20:40 +0000 Subject: [PATCH 08/17] Fix headers --- assets/samplesheet.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index dd4c094..37396db 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample_id,lane_id,project_id,fastq_1,fastq_2,rundir +sample,lane,project,fastq_1,fastq_2,rundir SAMPLE_PAIRED_END,L01,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir SAMPLE_SINGLE_END,L02,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir From 91c719822dd681e0f24b3e541c643193a4930217 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 14:21:22 +0000 Subject: [PATCH 09/17] Add comments and update meta with id --- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 1e09716..2fc5831 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -101,6 +101,7 @@ workflow PIPELINE_INITIALISATION { // meta, fastqs -> // return [ meta, fastqs.flatten() ] // } + .view() .set { ch_samplesheet } emit: From a0ef81b3a38a44a93095cdc2c2e0065c03bd0e5f Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 14:30:22 +0000 Subject: [PATCH 10/17] Fix comment --- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 2fc5831..d4b359a 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -75,7 +75,7 @@ workflow PIPELINE_INITIALISATION { // // Custom validation for pipeline parameters // - validateInputParameters() // Validates workflow parameters against $projectDir/nextflow_schema.json + validateInputParameters() // Runs additional validation that is not done by $projectDir/nextflow_schema.json // // Create channel from input file provided through params.input From a8a5dd160f7ef13dc06c39d9dee53deb8320f197 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 14:35:19 +0000 Subject: [PATCH 11/17] Add additional comment where validation occurs --- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index d4b359a..15e595b 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -57,7 +57,7 @@ workflow PIPELINE_INITIALISATION { pre_help_text = nfCoreLogo(monochrome_logs) post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " - UTILS_NFVALIDATION_PLUGIN ( + UTILS_NFVALIDATION_PLUGIN ( // Validates parameters against $projectDir/nextflow_schema.json help, workflow_command, pre_help_text, From 261a3dd69a9e7e78bfded99b2f91aeac07b7ec70 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 14:41:38 +0000 Subject: [PATCH 12/17] Fix lane example --- assets/samplesheet.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 37396db..fbe5de2 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ sample,lane,project,fastq_1,fastq_2,rundir -SAMPLE_PAIRED_END,L01,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir -SAMPLE_SINGLE_END,L02,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir +SAMPLE_PAIRED_END,1,P001,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir +SAMPLE_SINGLE_END,2,P002,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir From 94a549e3eee0a4f5d84ddcb4399ed0b47297c74a Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 18 Mar 2024 14:55:13 +0000 Subject: [PATCH 13/17] Rearrange meta id string --- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 15e595b..e1f8e4d 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def id_string = "${meta.lane}_${meta.group ?: "ungrouped"}_${meta.sample}" + def id_string = "${meta.sample}_${meta.group ?: "ungrouped"}_${meta.lane}" def updated_meta = meta + [ id: id_string ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] From 56e01a0357334b461662398db8c89fd6340a5b73 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Tue, 19 Mar 2024 10:09:55 +0100 Subject: [PATCH 14/17] Update assets/schema_input.json --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index fc4fdb9..332031c 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -41,7 +41,7 @@ }, "rundir": { "type": "string", - "format": "file-path", + "format": "directory-path", "exists": true, "errorMessage": "Run directory must be a path", "meta": ["rundir"] From 6c94332bf1ba3738e39c80031f6d83436a4832b4 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Wed, 20 Mar 2024 09:16:32 +0100 Subject: [PATCH 15/17] grop instead of project in a single place Co-authored-by: Adrien Coulier --- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index e1f8e4d..cdbb640 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") // Validates samplesheet against $projectDir/assets/schema_input.json. Path to validation schema is defined by $projectDir/nextflow_schema.json .map { meta, fastq_1, fastq_2 -> - def id_string = "${meta.sample}_${meta.group ?: "ungrouped"}_${meta.lane}" + def id_string = "${meta.sample}_${meta.project ?: "ungrouped"}_${meta.lane}" def updated_meta = meta + [ id: id_string ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], [ fastq_1 ] ] From 4779844462856d017d7ecdcba13cf018198caf3b Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Wed, 20 Mar 2024 09:51:59 +0100 Subject: [PATCH 16/17] Updated test profile input --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index fbbffdd..38e9ee3 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,7 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/KarNair/test-datasets/seqinspector/testdata/MiSeq/samplesheet.csv' // Genome references genome = 'R64-1-1' From 28e0137e65c001682ce5e3257ea6fe6248ccf85e Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Wed, 20 Mar 2024 14:51:35 +0100 Subject: [PATCH 17/17] Update assets/schema_input.json Co-authored-by: Karthik Nair <35717861+KarNair@users.noreply.github.com> --- assets/schema_input.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 332031c..9fb321b 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -41,7 +41,7 @@ }, "rundir": { "type": "string", - "format": "directory-path", + "format": "path", "exists": true, "errorMessage": "Run directory must be a path", "meta": ["rundir"]