forked from epi2me-labs/wf-basecalling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
.gitlab-ci.yml
172 lines (167 loc) · 14 KB
/
.gitlab-ci.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# Include shared CI
include:
- project: "epi2melabs/ci-templates"
file: "wf-containers.yaml"
variables:
CI_FLAVOUR: "new" # set to "classic" for old-style CI
SKIP_PYTHON_TESTS: "not applicable"
NF_BEFORE_SCRIPT: "wget -qO demo_data.tar.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-basecalling/wf-basecalling-demo.tar.gz && tar -xzvf demo_data.tar.gz && cat wf-basecalling-demo/VERSION && rm demo_data.tar.gz"
NF_PROCESS_OPTIONS: "--basecaller_chunk_size 1 --ubam_map_threads 5 --ubam_sort_threads 2 --ubam_bam2fq_threads 1"
check-models:
extends: .preflight
script:
- !reference [.install, nextflow] # requires nextflow to read config
- bash util/update_models_schema.sh . docker
- >
if ! diff nextflow_schema.json nextflow_schema.json.new; then
echo "Model schema requires updating."
exit 1
fi
docker-run:
artifacts:
when: always
paths:
- ${CI_PROJECT_NAME}
- .nextflow.log
exclude:
- ${CI_PROJECT_NAME}/**/*.fa
- ${CI_PROJECT_NAME}/**/*.fna
- ${CI_PROJECT_NAME}/**/*.fasta
- ${CI_PROJECT_NAME}/**/ref_cache/**
# Define a 1D job matrix to inject a variable named MATRIX_NAME into
# the CI environment, we can use the value of MATRIX_NAME to determine
# which options to apply as part of the rules block below
# NOTE There is a slightly cleaner way to define this matrix to include
# the variables, but it is broken when using long strings! See CW-756
tags:
- grid
- shell
parallel:
matrix:
- MATRIX_NAME: [
"dorado",
"dorado-igv",
"dorado-igv-gz",
"dorado_mod",
"dorado_fast5",
"dorado-gzref",
"dorado-output-fastq",
"duplex",
"duplex_mod",
"duplex_fast5",
"duplex_watch",
"duplex_fqonly_fail",
"watch_path",
"no_reference",
"no_reference-output-fastq",
"output_bam",
"polya_tails",
"demux",
"duplex_demux",
"demux-align"
]
rules:
# NOTE As we're overriding the rules block for the included docker-run
# we must redefine this CI_COMMIT_BRANCH rule to prevent docker-run
# being incorrectly scheduled for "detached merge request pipelines" etc.
- if: ($CI_COMMIT_BRANCH == null || $CI_COMMIT_BRANCH == "dev-template")
when: never
- if: $MATRIX_NAME == "dorado"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta --basecaller_cfg [email protected] ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "stopCondition,pair_stats,progressive_pairings,dorado_summary,split_xam,combine_dorado_summaries,output_pod5s"
- if: $MATRIX_NAME == "dorado-igv"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta --basecaller_cfg [email protected] --igv ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "stopCondition,pair_stats,progressive_pairings,dorado_summary,split_xam,combine_dorado_summaries,output_pod5s"
- if: $MATRIX_NAME == "dorado-igv-gz"
variables:
NF_BEFORE_SCRIPT: "wget -qO demo_data.tar.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-basecalling/wf-basecalling-demo.tar.gz && tar -xzvf demo_data.tar.gz && cat wf-basecalling-demo/VERSION && rm demo_data.tar.gz && wget -q -O wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-human-reference/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz"
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz --basecaller_cfg [email protected] --igv ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "stopCondition,pair_stats,progressive_pairings,dorado_summary,split_xam,combine_dorado_summaries,output_pod5s"
- if: $MATRIX_NAME == "dorado-gzref"
variables:
NF_BEFORE_SCRIPT: "wget -qO demo_data.tar.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-basecalling/wf-basecalling-demo.tar.gz && tar -xzvf demo_data.tar.gz && cat wf-basecalling-demo/VERSION && rm demo_data.tar.gz && wget -q -O wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-human-reference/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz"
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz --basecaller_cfg [email protected] ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "stopCondition,pair_stats,progressive_pairings,dorado_summary,split_xam,combine_dorado_summaries,output_pod5s"
- if: $MATRIX_NAME == "dorado-output-fastq"
variables:
NF_BEFORE_SCRIPT: "wget -qO demo_data.tar.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-basecalling/wf-basecalling-demo.tar.gz && tar -xzvf demo_data.tar.gz && cat wf-basecalling-demo/VERSION && rm demo_data.tar.gz && wget -q -O wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-human-reference/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz"
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz --basecaller_cfg [email protected] --output_fmt fastq ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "stopCondition,pair_stats,progressive_pairings,dorado_summary,split_xam,combine_dorado_summaries,output_pod5s"
- if: $MATRIX_NAME == "dorado_mod"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta --basecaller_cfg [email protected] --remora_cfg [email protected]_5mCG_5hmCG@v2 ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "stopCondition,pair_stats,progressive_pairings,dorado_summary,split_xam,combine_dorado_summaries,output_pod5s"
- if: $MATRIX_NAME == "dorado_fast5"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/fast5 --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta --dorado_ext fast5 --basecaller_cfg [email protected] ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "stopCondition,pair_stats,progressive_pairings,dorado_summary,split_xam,combine_dorado_summaries,output_pod5s"
- if: $MATRIX_NAME == "watch_path"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta --basecaller_cfg [email protected] --watch_path --read_limit 3000 ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "pair_stats,progressive_pairings,dorado_summary,split_xam,combine_dorado_summaries,output_pod5s"
- if: $MATRIX_NAME == "no_reference"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --basecaller_cfg [email protected] ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "cram_cache,stopCondition,pair_stats,progressive_pairings,dorado_summary,split_xam,combine_dorado_summaries,output_pod5s"
- if: $MATRIX_NAME == "no_reference-output-fastq"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --basecaller_cfg [email protected] --output_fmt fastq ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "cram_cache,stopCondition,pair_stats,progressive_pairings,dorado_summary,split_xam,combine_dorado_summaries,output_pod5s"
AFTER_NEXTFLOW_CMD: "[ -f wf-basecalling/SAMPLE.pass.fq.gz ] && echo 'Expected file wf-basecalling/SAMPLE.pass.fq.gz found' || exit 1"
- if: $MATRIX_NAME == "output_bam"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --basecaller_cfg [email protected] --output_fmt bam ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "cram_cache,stopCondition,pair_stats,progressive_pairings,dorado_summary,split_xam,combine_dorado_summaries,output_pod5s"
- if: $MATRIX_NAME == "duplex"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta --basecaller_cfg [email protected] --duplex ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "stopCondition,pair_stats,progressive_pairings,output_pod5s"
- if: $MATRIX_NAME == "duplex_mod"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta --basecaller_cfg [email protected] --remora_cfg [email protected]_5mCG_5hmCG@v2 --duplex ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "stopCondition,pair_stats,progressive_pairings,output_pod5s"
- if: $MATRIX_NAME == "duplex_fast5"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/fast5 --output_pod5 --dorado_ext fast5 --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta --basecaller_cfg [email protected] --duplex ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "stopCondition,pair_stats,progressive_pairings,dorado,make_mmi,align_and_qsFilter,\
merge_pass_calls,merge_fail_calls,getVersions,getParams,cram_cache,bamstats,progressive_stats,makeReport,output"
- if: $MATRIX_NAME == "duplex_watch"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta --basecaller_cfg [email protected] --watch_path --read_limit 3000 --duplex ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "output_pod5s"
- if: $MATRIX_NAME == "duplex_fqonly_fail"
variables:
NF_WORKFLOW_OPTS: "--input wf-basecalling-demo/input --ref wf-basecalling-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta --basecaller_cfg [email protected] --watch_path --read_limit 3000 --output_fmt fastq --duplex ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "output_pod5s"
ASSERT_NEXTFLOW_FAILURE: "yes"
ASSERT_NEXTFLOW_FAILURE_REXP : "Duplex requires the outputs of Dorado to be in BAM format."
- if: $MATRIX_NAME == "polya_tails"
variables:
NF_BEFORE_SCRIPT: "wget -qO demo_data.tar.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-basecalling/wf-basecalling-polya-demo.tar.gz && tar -xzvf demo_data.tar.gz && cat wf-basecalling-polya-demo/VERSION && rm demo_data.tar.gz"
NF_WORKFLOW_OPTS: "--poly_a_config wf-basecalling-polya-demo/polya_conf.toml --input wf-basecalling-polya-demo/input --ref wf-basecalling-polya-demo/RCS-100A.fasta --basecaller_cfg [email protected] ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "output_pod5s,pair_stats,progressive_pairings,split_xam,stopCondition"
- if: $MATRIX_NAME == "demux"
variables:
NF_BEFORE_SCRIPT: "wget -qO demo_data.tar.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-basecalling/wf-basecalling-demux-demo.tar.gz && tar -xzvf demo_data.tar.gz && cat wf-basecalling-demux-demo/README && rm demo_data.tar.gz"
NF_WORKFLOW_OPTS: "--input wf-basecalling-demux-demo/input --barcode_kit SQK-RBK114-96 --basecaller_cfg [email protected] ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "output_pod5s,pair_stats,progressive_pairings,split_xam,stopCondition,cram_cache"
- if: $MATRIX_NAME == "demux-align"
variables:
NF_BEFORE_SCRIPT: "wget -qO demo_data.tar.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-basecalling/wf-basecalling-demux-demo.tar.gz && \
tar -xzvf demo_data.tar.gz && cat wf-basecalling-demux-demo/README && \
rm demo_data.tar.gz && \
wget -q -O wf-basecalling-demux-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-human-reference/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz"
NF_WORKFLOW_OPTS: "--input wf-basecalling-demux-demo/input --barcode_kit SQK-RBK114-96 --ref wf-basecalling-demux-demo/GCA_000001405.15_GRCh38_no_alt_analysis_set.fa.gz --basecaller_cfg [email protected] ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "output_pod5s,pair_stats,progressive_pairings,split_xam,stopCondition,cram_cache"
- if: $MATRIX_NAME == "duplex_demux"
variables:
NF_BEFORE_SCRIPT: "wget -qO demo_data.tar.gz https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-basecalling/wf-basecalling-demux-demo.tar.gz && tar -xzvf demo_data.tar.gz && cat wf-basecalling-demux-demo/README && rm demo_data.tar.gz"
NF_WORKFLOW_OPTS: "--input wf-basecalling-demux-demo/input --duplex true --barcode_kit SQK-RBK114-96 --basecaller_cfg [email protected] ${NF_PROCESS_OPTIONS}"
NF_IGNORE_PROCESSES: "output_pod5s,pair_stats,progressive_pairings,split_xam,stopCondition,cram_cache"
ASSERT_NEXTFLOW_FAILURE: "yes"
ASSERT_NEXTFLOW_FAILURE_REXP : "Validation of pipeline parameters failed"
aws-run:
rules:
- when: never