Skip to content

Commit

Permalink
Merge pull request #3 from RIVM-bioinformatics/amr_parsing
Browse files Browse the repository at this point in the history
Amr parsing
  • Loading branch information
boasvdp authored Mar 27, 2024
2 parents 02e4489 + e884cc8 commit 3c5d2cc
Show file tree
Hide file tree
Showing 9 changed files with 82 additions and 261 deletions.
5 changes: 3 additions & 2 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ for param in ["threads", "mem_gb"]:
for k in config[param]:
config[param][k] = int(config[param][k])

# print(SAMPLES)


wildcard_constraints:
sample="[^/]{1,100}",
Expand All @@ -25,10 +23,13 @@ localrules:
no_typing,
aggregate_species,
copy_sample_bam,
copy_sample_vcf,
copy_ref,
prepare_ab_table,
generate_ab_table_header,
mtb_filter_res_table_positions,
postprocess_variant_table,
mtb_make_json,
audit_version_gatk,
audit_version_biopython,
audit_version_bcftools,
Expand Down
6 changes: 4 additions & 2 deletions workflow/rules/consensus.smk
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ awk '$4 > 1 {{print $0}}' 1>{output.bed} 2>>{log}
"""


rule subset_fixed_snps_mnps_from_vcf:
rule subset_fixed_snps_from_vcf:
input:
vcf="",
output:
Expand All @@ -62,7 +62,8 @@ rule subset_fixed_snps_mnps_from_vcf:
shell:
"""
bcftools filter --include "FORMAT/AF>={params.AF_threshold}" {input.vcf} 2>{log} |\
bcftools filter --include "TYPE='snp' | TYPE='mnp'" \
bcftools filter --include "TYPE='snp'" |\
bcftools filter --include "FILTER='PASS'" \
1>{output.vcf} \
2>>{log}
"""
Expand Down Expand Up @@ -148,6 +149,7 @@ rule introduce_mutations_to_reference:
bcftools consensus \
-f {input.reference} \
-o {output.fasta} \
-s - \
{input.vcf_gz} \
2>&1>{log}
"""
Expand Down
66 changes: 0 additions & 66 deletions workflow/rules/mtb_prepare_files.smk
Original file line number Diff line number Diff line change
Expand Up @@ -217,69 +217,3 @@ DB_NAME=$(basename {input.db_dir})
cd $WORKDIR
$EXEC build -genbank -v $DB_NAME -config $CONFIG_NAME -dataDir . 2>&1>{log}
"""


rule prepare_ab_table:
input:
csv=lambda wildcards: SAMPLES[wildcards.sample]["resistance_variants_csv"],
output:
uncompressed=temp(
OUT + "/mtb_typing/prepared_reference_data/{sample}/ab_table.tab"
),
params:
POS="genomepos",
metadata=lambda wildcards: SAMPLES[wildcards.sample][
"resistance_variants_columns"
],
log:
OUT + "/log/prepare_ab_table/{sample}.log",
message:
"Coverting AMR table for {wildcards.sample}"
shell:
"""
python workflow/scripts/convert_ab_table.py \
--force-chrom NC_000962.3 \
--POS {params.POS} \
--other {params.metadata:q} \
{input} \
{output.uncompressed} 2>&1>{log}
"""


rule compress_index_ab_table:
input:
uncompressed=OUT + "/mtb_typing/prepared_reference_data/{sample}/ab_table.tab",
output:
compressed=OUT + "/mtb_typing/prepared_reference_data/{sample}/ab_table.tab.gz",
index=OUT + "/mtb_typing/prepared_reference_data/{sample}/ab_table.tab.gz.tbi",
conda:
"../envs/bcftools.yaml"
container:
"docker://staphb/htslib:1.17"
log:
OUT + "/log/compress_index_ab_table/{sample}.log",
message:
"Compressing and indexing AMR table for {wildcards.sample}"
threads: config["threads"]["bcftools"]
resources:
mem_gb=config["mem_gb"]["bcftools"],
shell:
"""
bgzip -c {input.uncompressed} 1> {output.compressed} 2>{log}
tabix -s 1 -b 2 -e 2 {output.compressed} 2>&1>>{log}
"""


rule generate_ab_table_header:
output:
OUT + "/mtb_typing/prepared_reference_data/{sample}/ab_table.header",
params:
columns=lambda wildcards: SAMPLES[wildcards.sample][
"resistance_variants_columns"
],
shell:
"""
python workflow/scripts/generate_ab_table_header.py \
{params.columns:q} \
{output}
"""
110 changes: 56 additions & 54 deletions workflow/rules/mtb_typing.smk
Original file line number Diff line number Diff line change
Expand Up @@ -128,45 +128,9 @@ $EXEC ann -c {input.config} -dataDir $WORKDIR -stats {output.stats} -noLog -o ga
"""


rule mtb_annotate_ab_positions:
input:
vcf=OUT + "/mtb_typing/snpeff/{sample}.vcf",
compressed_table=OUT
+ "/mtb_typing/prepared_reference_data/{sample}/ab_table.tab.gz",
header=OUT + "/mtb_typing/prepared_reference_data/{sample}/ab_table.header",
output:
vcf=OUT + "/mtb_typing/annotated_vcf/{sample}.vcf",
conda:
"../envs/bcftools.yaml"
container:
"docker://staphb/bcftools:1.19"
params:
base_annotations="CHROM,POS",
extra_annotations=lambda wildcards: SAMPLES[wildcards.sample][
"resistance_variants_columns"
],
log:
OUT + "/log/mtb_annotate_ab_positions/{sample}.log",
message:
"Annotating variants with AMR for {wildcards.sample}"
threads: config["threads"]["bcftools"]
resources:
mem_gb=config["mem_gb"]["bcftools"],
shell:
"""
bcftools annotate \
-a {input.compressed_table} \
-h {input.header} \
-c {params.base_annotations},{params.extra_annotations:q} \
{input.vcf} \
1> {output.vcf} \
2> {log}
"""


rule mtb_annotated_vcf_to_table:
input:
vcf=OUT + "/mtb_typing/annotated_vcf/{sample}.vcf",
vcf=OUT + "/mtb_typing/snpeff/{sample}.vcf",
output:
tsv=temp(OUT + "/mtb_typing/annotated_variants/raw/{sample}.tsv"),
conda:
Expand All @@ -187,7 +151,6 @@ rule mtb_annotated_vcf_to_table:
mem_gb=config["mem_gb"]["gatk"],
shell:
"""
FIELDS=$(python workflow/scripts/print_fields_VariantsToTable.py {params.metadata:q},{params.effect_column:q})
gatk VariantsToTable \
-V {input.vcf} \
--show-filtered \
Expand All @@ -199,22 +162,36 @@ gatk VariantsToTable \
-F DP \
-F FILTER \
-GF AF \
$FIELDS \
-F {params.effect_column} \
-O {output.tsv} 2>&1>{log}
"""


rule postprocess_variant_table:
rule mtb_annotate_ab_positions:
input:
tsv=OUT + "/mtb_typing/annotated_variants/raw/{sample}.tsv",
reslist=lambda wildcards: SAMPLES[wildcards.sample]["resistance_variants_csv"],
output:
tsv=OUT + "/mtb_typing/annotated_variants/{sample}.tsv",
params:
merge_cols="CHROM,POS",
keep_cols=lambda wildcards: SAMPLES[wildcards.sample][
"resistance_variants_columns"
],
log:
OUT + "/log/postprocess_variant_table/{sample}.log",
OUT + "/log/mtb_annotate_ab_positions/{sample}.log",
message:
"Annotating variants with AMR for {wildcards.sample}"
threads: config["threads"]["other"]
resources:
mem_gb=config["mem_gb"]["other"],
shell:
"""
python workflow/scripts/postprocess_variant_table.py \
--input {input} \
--input {input.tsv} \
--reference_data {input.reslist} \
--merge_cols {params.merge_cols} \
--keep_cols {params.keep_cols} \
--output {output} \
2>&1>{log}
"""
Expand Down Expand Up @@ -267,9 +244,6 @@ module consensus_workflow:
"consensus.smk"


## Rename reference contig to sample


use rule mark_variants_by_proximity from consensus_workflow with:
input:
vcf=OUT + "/mtb_typing/prepared_files/{sample}.vcf",
Expand All @@ -279,13 +253,13 @@ use rule mark_variants_by_proximity from consensus_workflow with:
OUT + "/log/mark_variants_by_proximity/{sample}.log",


use rule subset_fixed_snps_mnps_from_vcf from consensus_workflow with:
use rule subset_fixed_snps_from_vcf from consensus_workflow with:
input:
vcf=OUT + "/mtb_typing/prepared_files/{sample}.vcf",
output:
vcf=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps_mnps.vcf",
vcf=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps.vcf",
log:
OUT + "/log/subset_fixed_snps_mnps_from_vcf/{sample}.log",
OUT + "/log/subset_fixed_snps_from_vcf/{sample}.log",


use rule subset_low_confidence_variants_from_vcf from consensus_workflow with:
Expand All @@ -299,18 +273,18 @@ use rule subset_low_confidence_variants_from_vcf from consensus_workflow with:

use rule zip_and_index_sample_vcf_bcftools from consensus_workflow with:
input:
vcf=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps_mnps.vcf",
vcf=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps.vcf",
output:
vcf_gz=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps_mnps.vcf.gz",
tbi=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps_mnps.vcf.gz.tbi",
vcf_gz=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps.vcf.gz",
tbi=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps.vcf.gz.tbi",
log:
OUT + "/log/zip_and_index_sample_vcf_bcftools/{sample}.log",


use rule introduce_mutations_to_reference from consensus_workflow with:
input:
vcf_gz=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps_mnps.vcf.gz",
tbi=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps_mnps.vcf.gz.tbi",
vcf_gz=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps.vcf.gz",
tbi=OUT + "/mtb_typing/prepared_files/{sample}.fixed_snps.vcf.gz.tbi",
reference=OUT + "/mtb_typing/prepared_files/{sample}_ref.fasta",
output:
fasta=OUT + "/mtb_typing/consensus/raw/{sample}.fasta",
Expand Down Expand Up @@ -350,10 +324,38 @@ use rule mask_fasta_based_on_bed_or_vcf from consensus_workflow as mask_fasta_on
OUT + "/log/mask_fasta_on_proximity_variants/{sample}.log",


if Path(INPUT + "/variants_raw/mask.bed").is_file():

use rule mask_fasta_based_on_bed_or_vcf from consensus_workflow as mask_fasta_by_custom_bed with:
input:
features=INPUT + "/variants_raw/mask.bed",
fasta=OUT
+ "/mtb_typing/consensus/depth_masked_low_conf_masked_proxmask/{sample}.fasta",
output:
fasta=OUT
+ "/mtb_typing/consensus/depth_masked_low_conf_masked_proxmask_bedmasked/{sample}.fasta",
log:
OUT + "/log/mask_fasta_on_custom_bed/{sample}.log",

else:

rule:
input:
fasta=OUT
+ "/mtb_typing/consensus/depth_masked_low_conf_masked_proxmask/{sample}.fasta",
output:
fasta=OUT
+ "/mtb_typing/consensus/depth_masked_low_conf_masked_proxmask_bedmasked/{sample}.fasta",
shell:
"""
cp {input} {output}
"""


use rule replace_fasta_header from consensus_workflow with:
input:
fasta=OUT
+ "/mtb_typing/consensus/depth_masked_low_conf_masked_proxmask/{sample}.fasta",
+ "/mtb_typing/consensus/depth_masked_low_conf_masked_proxmask_bedmasked/{sample}.fasta",
output:
fasta=OUT + "/mtb_typing/consensus/{sample}.fasta",
log:
Expand Down
47 changes: 0 additions & 47 deletions workflow/scripts/convert_ab_table.py

This file was deleted.

16 changes: 0 additions & 16 deletions workflow/scripts/generate_ab_table_header.py

This file was deleted.

Loading

0 comments on commit 3c5d2cc

Please sign in to comment.