Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Variant definitions #101

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/scripts/test_PR_against_release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ export PATH=/opt/conda/bin:$PATH
singularity --version
# write test log as github Action artifact
echo Nextflow run current PR in --illumina mode.. >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile singularity \
--directory $PWD/.github/data/fastqs/ \
--illumina \
Expand All @@ -25,7 +25,7 @@ git checkout tags/v1.1.1
sed -i s'/cpus = 4/cpus = 2/'g conf/resources.config
ln -s ../*.sif ./
echo Nextflow run previous release in --illumina mode.. >> ../artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile singularity \
--directory $PWD/../.github/data/fastqs/ \
--illumina \
Expand Down
4 changes: 2 additions & 2 deletions .github/scripts/test_bed_ref_input.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ echo bed file: $BED_FILE >> artifacts/test_artifact.log
# run current pull request code
singularity --version
echo Nextflow run --illumina mode with --ref, --bed .. >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile singularity \
--ref $REF_FILE \
--bed $BED_FILE \
Expand Down Expand Up @@ -58,7 +58,7 @@ rm $REF_FILE
ln -s $REAL_REF $REF_FILE

echo Nextflow run --illumina mode with symlinked --ref, --bed .. >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile singularity \
--ref $REF_FILE \
--bed $BED_FILE \
Expand Down
4 changes: 2 additions & 2 deletions .github/scripts/test_conda_cache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export REPO=$PWD
echo REPO=$REPO >> artifacts/test_artifact.log
cd ..
echo PWD=$PWD >> $REPO/artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run $REPO \
nextflow run $REPO \
-profile conda \
--cache $REPO/conda_cache_dir \
--directory $REPO/.github/data/fastqs/ \
Expand All @@ -25,7 +25,7 @@ cat .nextflow.log | grep 'Conda create complete env=/home/runner/work/ncov2019-a
rm -rf results && rm -rf work && rm -rf .nextflow*
# second NF run will use the conda env created in the previous run
echo re-run pipeline with conda --cache.. >> $REPO/artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run $REPO \
nextflow run $REPO \
-profile conda \
--cache $REPO/conda_cache_dir \
--directory $REPO/.github/data/fastqs/ \
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/test_cram_input.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ echo bed file: $BED_FILE
sed -i s'/cpus = 4/cpus = 2/'g conf/coguk/sanger.config
singularity --version
echo Nextflow run --illumina mode with --ref, --bed and --cram.. >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile sanger,singularity \
--ref $REF_FILE \
--bed $BED_FILE \
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/test_cram_output.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ echo bed file: $BED_FILE
sed -i s'/cpus = 4/cpus = 2/'g conf/coguk/sanger.config
singularity --version
echo Nextflow run --illumina mode with --ref, --bed --cram and outCram... >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile sanger,singularity \
--ref $REF_FILE \
--bed $BED_FILE \
Expand Down
10 changes: 6 additions & 4 deletions .github/scripts/test_nanopore_pipelines.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ export PATH=/opt/conda/bin:$PATH
singularity --version
# write test log as github Action artifact
echo "Nextflow run current PR in --nanopolish mode (no barcodes).." >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile singularity \
--nanopolish \
--sequencing_summary $PWD/.github/data/nanopore/20200311_1427_X4_FAK72834_a3787181/sequencing_summary_FAK72834_298b7829.txt \
Expand All @@ -17,7 +17,7 @@ cp .nextflow.log artifacts/nanopolish.nextflow.log
rm -rf results && rm -rf work && rm -rf .nextflow*

echo "Nextflow run current PR in --nanopolish mode (barcodes).." >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile singularity \
--nanopolish \
--sequencing_summary $PWD/.github/data/nanopore/20200311_1427_X1_FAK72834_a3787181/sequencing_summary_FAK72834_298b7829.txt \
Expand All @@ -28,18 +28,20 @@ cp .nextflow.log artifacts/nanopolish_barcodes.nextflow.log
rm -rf results && rm -rf work && rm -rf .nextflow*

echo "Nextflow run current PR in --medaka mode (no barcodes).." >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile singularity \
--medaka \
--medaka_model r941_min_fast_g303 \
--basecalled_fastq $PWD/.github/data/nanopore/20200311_1427_X4_FAK72834_a3787181/fastq_pass/ \
--prefix 20200311_1427_X4_FAK72834_a3787181
cp .nextflow.log artifacts/medaka.nextflow.log
rm -rf results && rm -rf work && rm -rf .nextflow*

echo "Nextflow run current PR in --medaka mode (barcodes).." >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile singularity \
--medaka \
--medaka_model r941_min_fast_g303 \
--basecalled_fastq $PWD/.github/data/nanopore/20200311_1427_X1_FAK72834_a3787181/fastq_pass/ \
--prefix 20200311_1427_X1_FAK72834_a3787181

Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/test_sanger_profile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ export PATH=/opt/conda/bin:$PATH
# there are only 2 available cpus in the github runner execution
sed -i s'/cpus = 4/cpus = 2/'g conf/coguk/sanger.config
echo run pipeline in --illumina mode with --sanger profile.. >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile sanger,singularity \
--directory $PWD/.github/data/fastqs/ \
--illumina \
Expand Down
23 changes: 14 additions & 9 deletions .github/scripts/test_typing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@ export PATH=/opt/conda/bin:$PATH

# run current pull request code
singularity --version

# Clone variant_definitions repo
git clone https://github.com/phe-genomics/variant_definitions.git

# write test log as github Action artifact
echo "Nextflow run current PR in --nanopolish mode with typing.." >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile singularity \
--gff $PWD/typing/MN908947.3.gff \
--yaml $PWD/typing/SARS-CoV-2.types.yaml \
--gb $PWD/typing/NC_045512.2.gb \
--variant_definitions $PWD/variant_definitions/variant_yaml \
--nanopolish \
--sequencing_summary $PWD/.github/data/nanopore/20200311_1427_X4_FAK72834_a3787181/sequencing_summary_FAK72834_298b7829.txt \
--basecalled_fastq $PWD/.github/data/nanopore/20200311_1427_X4_FAK72834_a3787181/fastq_pass/ \
Expand All @@ -19,21 +23,22 @@ cp .nextflow.log artifacts/nanopolish_typing.nextflow.log
rm -rf results && rm -rf work && rm -rf .nextflow*

echo "Nextflow run current PR in --medaka mode with typing .." >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile singularity \
--medaka \
--gff $PWD/typing/MN908947.3.gff \
--yaml $PWD/typing/SARS-CoV-2.types.yaml \
--medaka_model r941_min_fast_g303 \
--gb $PWD/typing/NC_045512.2.gb \
--variant_definitions $PWD/variant_definitions/variant_yaml \
--basecalled_fastq $PWD/.github/data/nanopore/20200311_1427_X4_FAK72834_a3787181/fastq_pass/ \
--prefix 20200311_1427_X4_FAK72834_a3787181
cp .nextflow.log artifacts/medaka_typing.nextflow.log
rm -rf results && rm -rf work && rm -rf .nextflow*

echo Nextflow run current PR in --illumina mode with typing.. >> artifacts/test_artifact.log
NXF_VER=20.03.0-edge nextflow run ./main.nf \
nextflow run ./main.nf \
-profile singularity \
--gff $PWD/typing/MN908947.3.gff \
--yaml $PWD/typing/SARS-CoV-2.types.yaml \
--gb $PWD/typing/NC_045512.2.gb \
--variant_definitions $PWD/variant_definitions/variant_yaml \
--directory $PWD/.github/data/fastqs/ \
--illumina \
--prefix test
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ jobs:
runs-on: ubuntu-latest
env:
NXF_ANSI_LOG: false
NXF_VER: 20.10.0
steps:
- uses: actions/checkout@master
- name: create artifacts dir to save test logs
Expand All @@ -26,7 +27,7 @@ jobs:
run: |
export PATH=/opt/conda/bin:$PATH
conda install -c bioconda nextflow
NXF_VER=20.03.0-edge nextflow -version
nextflow -version
- name: test nanopore pipelines
run: bash .github/scripts/test_nanopore_pipelines.sh
- name: test typing functionality
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ nextflow
results
*.sif
work
variant_definitions
3 changes: 2 additions & 1 deletion conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ params{
sequencing_summary = false
ref = false
bed = false
gff = false
gb = false
variant_definitions = false
profile = false

// Repo to download your primer scheme from
Expand Down
Empty file added conf/dummyfile
Empty file.
8 changes: 2 additions & 6 deletions conf/nanopore.config
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ params {
// After articGuppyPlex filter out samples with fewer than this number of reads
minReadsArticGuppyPlex = 10

// Typing frequency threshold to call aa consequences of variant.
csqAfThreshold = 0.75

// Minimum coverage depth to call aa consequences of variant.
csqDpThreshold = 20

// Medaka model to use with --medaka
medaka_model = false
}
6 changes: 4 additions & 2 deletions environments/illumina/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ dependencies:
- biopython=1.74
- libxcb
- matplotlib=3.3.3
- pip
- pandas=0.23.0=py36_1
- bwa=0.7.17=pl5.22.0_2
- samtools=1.10
- bcftools=1.10
- trim-galore=0.6.5
- ivar=1.3
- pyvcf=0.6.8
- pyyaml=5.3.1
- muscle=3.8.1551
- pip:
- git+https://github.com/connor-lab/aln2type
6 changes: 4 additions & 2 deletions environments/nanopore/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,7 @@ channels:
- bioconda
- defaults
dependencies:
- artic=1.1.3
- pyyaml=5.3.1
- artic=1.2.1
- pip
- pip:
- git+https://github.com/connor-lab/aln2type
4 changes: 4 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ if ( params.illumina ) {
if (! params.basecalled_fastq ) {
println("Please supply a directory containing basecalled fastqs with --basecalled_fastq. This is the output directory from guppy_barcoder or guppy_basecaller - usually fastq_pass. This can optionally contain barcodeXX directories, which are auto-detected.")
}
if (! params.medaka_model ) {
println("Please supply a medaka model with --medaka_model")
System.exit(1)
}
} else {
println("Please select a workflow with --nanopolish, --illumina or --medaka, or use --help to print help")
System.exit(1)
Expand Down
1 change: 1 addition & 0 deletions modules/artic.nf
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ process articMinIONMedaka {

"""
artic minion --medaka \
--medaka-model ${params.medaka_model} \
${minionFinalConfig} \
--threads ${task.cpus} \
--scheme-directory ${schemeRepo} \
Expand Down
18 changes: 11 additions & 7 deletions modules/help.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def printHelp() {
auto-detected and analysed in parallel.
--fast5_pass Directory containing fast5 files - usually fast5_pass. NOT REQUIRED FOR MEDAKA WORKFLOW.
--sequencing_summary Path to sequencing_summary.txt. NOT REQUIRED FOR MEDAKA WORKFLOW.
--medaka-model Medaka model e.g. r941_min_fast_g303 NOT REQUIRED FOR NANOPOLISH WORKFLOW.

Optional:
--outdir Output directory (Default: ./results)
Expand All @@ -43,10 +44,11 @@ def printHelp() {
--outCram Output cram instead of bam files (Default: false)
--minReadsPerBarcode Minimum number of reads accepted for a single barcode when supplying deplexed Fastq
files as input. Barcodes having fewer reads are ignored. (Default: 100)

--gff Path to annotation gff for variant consequence calling and typing. (Default: unset, don't run typing unless set)
--yaml Path to YAML file with typing schemes.
Format: { <typing_scheme_name> : { coverage: <float>, variants: <gene_name>: <[ D614G, IHV68I ]> }}

--variant_definitions Path to variant_definitions directory from https://github.com/phe-genomics/variant_definitions.git.
Must point to the directory that contains *.yml and not its parent.
--gb Path to GenBank file to generate AA consequences of mutations [NC_045512]



Illumina workflow options:
Expand All @@ -67,9 +69,11 @@ def printHelp() {
Overrides --scheme* options. (Default: unset, download scheme from git)
--ref Path to iVar-compatible reference fasta file, also requires --bed
Overrides --scheme* options. (Default: unset, download scheme from git)
--gff Path to annotation gff for variant consequence calling and typing. (Default: unset, typing not run unless set)
--yaml Path to YAML file with typing schemes.
Format: { <typing_scheme_name> : { coverage: <float>, variants: <gene_name>: <[ D614G, IHV68I ]> }}

--variant_definitions Path to variant_definitions directory from https://github.com/phe-genomics/variant_definitions.git.
Must point to the directory that contains *.yml and not its parent.
--gb Path to GenBank file to generate AA consequences of mutations [NC_045512]

--allowNoprimer Allow reads that don't have primer sequence?
Depends on your library prep method: ligation == false, tagmentation == true (Default: true)
--illuminaKeepLen Length (bp) of reads to keep after primer trimming (Default: 20)
Expand Down
62 changes: 31 additions & 31 deletions modules/typing.nf
Original file line number Diff line number Diff line change
@@ -1,47 +1,47 @@
process alignSeqs {
publishDir "${params.outdir}/${task.process.replaceAll(":","_")}/msa", mode: 'copy', overwrite: false, pattern: "${sampleName}.muscle.aln"

tag { sampleName }

process typeVariants {
input:
tuple sampleName, path(sample), path(reference)

tag { sampleName }
output:
tuple sampleName, path("${sampleName}.muscle.aln")

publishDir "${params.outdir}/${task.process.replaceAll(":","_")}/variants", pattern: "${sampleName}.variants.csv", mode: 'copy'
publishDir "${params.outdir}/${task.process.replaceAll(":","_")}/vcf", pattern: "${sampleName}.csq.vcf", mode: 'copy'
publishDir "${params.outdir}/${task.process.replaceAll(":","_")}/typing", pattern: "${sampleName}.typing.csv", mode: 'copy'
script:
"""
sed "s/>.*/>${sampleName}/g" $sample > ${sampleName}.clean.fa
cat $reference ${sampleName}.clean.fa > pre.aln
muscle -in pre.aln -out ${sampleName}.muscle.aln
"""
}

process typeVariants {
publishDir "${params.outdir}/${task.process.replaceAll(":","_")}/typing_json", mode: 'copy', overwrite: true, pattern: "${sampleName}.json.gz"
publishDir "${params.outdir}/${task.process.replaceAll(":","_")}/variant_csv", mode: 'copy', overwrite: true, pattern: "${sampleName}.csv"

tag { sampleName }

input:
tuple sampleName, path(variants), path(gff), path(ref), path(yaml)
tuple sampleName, refName, path(msa), path(yaml_dir), path(gb)

output:
path "${sampleName}.variants.csv", optional: true, emit: variants_csv
path "${sampleName}.typing.csv", optional: true, emit: typing_csv
path "${sampleName}.csq.vcf", emit: csq_vcf
path("aln2type.${sampleName}.csv"), emit: typing_csv optional true
path("${sampleName}.csv"), emit: variants_csv optional true
path("${sampleName}.json.gz") optional true

script:
if( params.illumina )
if ( gb.getBaseName() != 'dummyfile' ){
"""
type_vcf.py \
-i ${sampleName} \
-y ${yaml} \
-ov ${sampleName}.csq.vcf \
-ot ${sampleName}.typing.csv \
-os ${sampleName}.variants.csv \
-dp ${params.csqDpThreshold} \
-af ${params.csqAfThreshold} \
-t ${variants} \
${gff} ${ref}
aln2type --gb ${gb} --output_unclassified . . aln2type.${sampleName}.csv ${refName} ${msa} ${yaml_dir}/*.yml
"""
else
} else {
"""
type_vcf.py \
-i ${sampleName} \
-y ${yaml} \
-ov ${sampleName}.csq.vcf \
-ot ${sampleName}.typing.csv \
-os ${sampleName}.variants.csv \
-dp ${params.csqDpThreshold} \
-af ${params.csqAfThreshold} \
-v ${variants} \
${gff} ${ref}
aln2type --output_unclassified . . aln2type.${sampleName}.csv ${refName} ${msa} ${yaml_dir}/*.yml
"""
}

}

process mergeTypingCSVs {
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ manifest {
author = 'Matt Bull'
description = 'Nextflow for running the Artic ncov2019 pipeline'
mainScript = 'main.nf'
nextflowVersion = '>=20.01.0'
nextflowVersion = '!>=20.10.0'
version = '0.1.0'
}

Loading