Skip to content

Commit

Permalink
Merge pull request #9 from RIVM-bioinformatics/add_clonal_snps
Browse files Browse the repository at this point in the history
Improve lineage ID and iRODS operations
  • Loading branch information
boasvdp authored Sep 12, 2024
2 parents a0621c5 + 513d8ab commit d0ebe2d
Show file tree
Hide file tree
Showing 11 changed files with 542 additions and 24 deletions.
1 change: 1 addition & 0 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ localrules:
copy_sample_vcf,
copy_deletion_vcf,
copy_ref,
combine_lineage_typing,
mtb_filter_res_table_positions,
mtb_make_json,
audit_version_gatk,
Expand Down
2 changes: 1 addition & 1 deletion envs/juno_variant_typing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ dependencies:
- pip=23.*
- python=3.11.*
- pip:
- "--editable=git+https://github.com/RIVM-bioinformatics/[email protected].0#egg=juno_library"
- "--editable=git+https://github.com/RIVM-bioinformatics/[email protected].1#egg=juno_library"
165 changes: 165 additions & 0 deletions files/mtb/lineage4_lipworth.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
##fileformat=VCFv4.2
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample
NC_000962.3 15117 . C C . . . GT 0/0
NC_000962.3 42281 . C C . . . GT 0/0
NC_000962.3 70267 . G G . . . GT 0/0
NC_000962.3 123520 . T T . . . GT 0/0
NC_000962.3 143207 . T T . . . GT 0/0
NC_000962.3 206481 . C C . . . GT 0/0
NC_000962.3 206484 . G G . . . GT 0/0
NC_000962.3 217201 . T T . . . GT 0/0
NC_000962.3 249522 . T T . . . GT 0/0
NC_000962.3 251575 . G G . . . GT 0/0
NC_000962.3 325505 . T T . . . GT 0/0
NC_000962.3 342146 . A A . . . GT 0/0
NC_000962.3 392261 . T T . . . GT 0/0
NC_000962.3 445780 . C C . . . GT 0/0
NC_000962.3 491742 . T T . . . GT 0/0
NC_000962.3 492150 . G G . . . GT 0/0
NC_000962.3 498531 . A A . . . GT 0/0
NC_000962.3 505974 . G G . . . GT 0/0
NC_000962.3 517358 . T T . . . GT 0/0
NC_000962.3 546357 . A A . . . GT 0/0
NC_000962.3 555991 . A A . . . GT 0/0
NC_000962.3 584171 . T T . . . GT 0/0
NC_000962.3 599868 . A A . . . GT 0/0
NC_000962.3 648856 . T T . . . GT 0/0
NC_000962.3 655986 . T T . . . GT 0/0
NC_000962.3 659341 . T T . . . GT 0/0
NC_000962.3 662911 . T T . . . GT 0/0
NC_000962.3 670545 . G G . . . GT 0/0
NC_000962.3 713310 . T T . . . GT 0/0
NC_000962.3 757182 . A A . . . GT 0/0
NC_000962.3 763031 . T T . . . GT 0/0
NC_000962.3 776100 . G G . . . GT 0/0
NC_000962.3 820752 . C C . . . GT 0/0
NC_000962.3 847995 . T T . . . GT 0/0
NC_000962.3 931123 . T T . . . GT 0/0
NC_000962.3 932280 . T T . . . GT 0/0
NC_000962.3 934230 . C C . . . GT 0/0
NC_000962.3 934611 . G G . . . GT 0/0
NC_000962.3 941845 . C C . . . GT 0/0
NC_000962.3 960367 . A A . . . GT 0/0
NC_000962.3 1024346 . A A . . . GT 0/0
NC_000962.3 1054784 . C C . . . GT 0/0
NC_000962.3 1080192 . G G . . . GT 0/0
NC_000962.3 1098523 . T T . . . GT 0/0
NC_000962.3 1104690 . T T . . . GT 0/0
NC_000962.3 1107940 . A A . . . GT 0/0
NC_000962.3 1144585 . A A . . . GT 0/0
NC_000962.3 1148259 . A A . . . GT 0/0
NC_000962.3 1211369 . A A . . . GT 0/0
NC_000962.3 1230778 . G G . . . GT 0/0
NC_000962.3 1248382 . A A . . . GT 0/0
NC_000962.3 1248936 . G G . . . GT 0/0
NC_000962.3 1250340 . A A . . . GT 0/0
NC_000962.3 1254562 . A A . . . GT 0/0
NC_000962.3 1281771 . T T . . . GT 0/0
NC_000962.3 1351172 . A A . . . GT 0/0
NC_000962.3 1367484 . T T . . . GT 0/0
NC_000962.3 1390763 . C C . . . GT 0/0
NC_000962.3 1479085 . T T . . . GT 0/0
NC_000962.3 1490905 . A A . . . GT 0/0
NC_000962.3 1540141 . T T . . . GT 0/0
NC_000962.3 1544255 . C C . . . GT 0/0
NC_000962.3 1546703 . C C . . . GT 0/0
NC_000962.3 1608276 . A A . . . GT 0/0
NC_000962.3 1618978 . T T . . . GT 0/0
NC_000962.3 1688300 . T T . . . GT 0/0
NC_000962.3 1716472 . A A . . . GT 0/0
NC_000962.3 1839759 . G G . . . GT 0/0
NC_000962.3 1849609 . T T . . . GT 0/0
NC_000962.3 1859559 . C C . . . GT 0/0
NC_000962.3 1931718 . G G . . . GT 0/0
NC_000962.3 1971725 . G G . . . GT 0/0
NC_000962.3 2010614 . G G . . . GT 0/0
NC_000962.3 2050822 . G G . . . GT 0/0
NC_000962.3 2094913 . A A . . . GT 0/0
NC_000962.3 2108890 . A A . . . GT 0/0
NC_000962.3 2122976 . C C . . . GT 0/0
NC_000962.3 2154724 . C C . . . GT 0/0
NC_000962.3 2158109 . T T . . . GT 0/0
NC_000962.3 2161346 . T T . . . GT 0/0
NC_000962.3 2167926 . A A . . . GT 0/0
NC_000962.3 2199052 . C C . . . GT 0/0
NC_000962.3 2209465 . G G . . . GT 0/0
NC_000962.3 2229801 . C C . . . GT 0/0
NC_000962.3 2260100 . C C . . . GT 0/0
NC_000962.3 2328543 . T T . . . GT 0/0
NC_000962.3 2331620 . A A . . . GT 0/0
NC_000962.3 2331789 . G G . . . GT 0/0
NC_000962.3 2339255 . A A . . . GT 0/0
NC_000962.3 2369186 . G G . . . GT 0/0
NC_000962.3 2388641 . G G . . . GT 0/0
NC_000962.3 2413246 . C C . . . GT 0/0
NC_000962.3 2421816 . A A . . . GT 0/0
NC_000962.3 2425471 . T T . . . GT 0/0
NC_000962.3 2448458 . C C . . . GT 0/0
NC_000962.3 2470591 . A A . . . GT 0/0
NC_000962.3 2619271 . T T . . . GT 0/0
NC_000962.3 2723506 . T T . . . GT 0/0
NC_000962.3 2740693 . T T . . . GT 0/0
NC_000962.3 2791098 . C C . . . GT 0/0
NC_000962.3 2807486 . C C . . . GT 0/0
NC_000962.3 2825466 . G G . . . GT 0/0
NC_000962.3 2841022 . A A . . . GT 0/0
NC_000962.3 2847281 . A A . . . GT 0/0
NC_000962.3 2886570 . A A . . . GT 0/0
NC_000962.3 2925962 . T T . . . GT 0/0
NC_000962.3 2988630 . C C . . . GT 0/0
NC_000962.3 2994187 . T T . . . GT 0/0
NC_000962.3 3010420 . A A . . . GT 0/0
NC_000962.3 3027798 . T T . . . GT 0/0
NC_000962.3 3031168 . A A . . . GT 0/0
NC_000962.3 3079877 . A A . . . GT 0/0
NC_000962.3 3104189 . A A . . . GT 0/0
NC_000962.3 3112877 . G G . . . GT 0/0
NC_000962.3 3174496 . A A . . . GT 0/0
NC_000962.3 3180988 . C C . . . GT 0/0
NC_000962.3 3189242 . A A . . . GT 0/0
NC_000962.3 3266030 . A A . . . GT 0/0
NC_000962.3 3314412 . A A . . . GT 0/0
NC_000962.3 3326554 . A A . . . GT 0/0
NC_000962.3 3420825 . A A . . . GT 0/0
NC_000962.3 3454263 . C C . . . GT 0/0
NC_000962.3 3466919 . C C . . . GT 0/0
NC_000962.3 3467465 . C C . . . GT 0/0
NC_000962.3 3480789 . T T . . . GT 0/0
NC_000962.3 3510120 . T T . . . GT 0/0
NC_000962.3 3530955 . C C . . . GT 0/0
NC_000962.3 3638093 . G G . . . GT 0/0
NC_000962.3 3670040 . C C . . . GT 0/0
NC_000962.3 3681548 . A A . . . GT 0/0
NC_000962.3 3690016 . A A . . . GT 0/0
NC_000962.3 3693681 . A A . . . GT 0/0
NC_000962.3 3830695 . A A . . . GT 0/0
NC_000962.3 3845695 . C C . . . GT 0/0
NC_000962.3 3851887 . A A . . . GT 0/0
NC_000962.3 3851888 . T T . . . GT 0/0
NC_000962.3 3871246 . T T . . . GT 0/0
NC_000962.3 3893480 . G G . . . GT 0/0
NC_000962.3 3895727 . C C . . . GT 0/0
NC_000962.3 3909235 . G G . . . GT 0/0
NC_000962.3 3984321 . G G . . . GT 0/0
NC_000962.3 4005114 . G G . . . GT 0/0
NC_000962.3 4008747 . A A . . . GT 0/0
NC_000962.3 4028752 . A A . . . GT 0/0
NC_000962.3 4056416 . C C . . . GT 0/0
NC_000962.3 4089058 . T T . . . GT 0/0
NC_000962.3 4095295 . T T . . . GT 0/0
NC_000962.3 4107074 . T T . . . GT 0/0
NC_000962.3 4112429 . T T . . . GT 0/0
NC_000962.3 4145737 . A A . . . GT 0/0
NC_000962.3 4156503 . C C . . . GT 0/0
NC_000962.3 4179089 . C C . . . GT 0/0
NC_000962.3 4215484 . G G . . . GT 0/0
NC_000962.3 4217557 . A A . . . GT 0/0
NC_000962.3 4251297 . G G . . . GT 0/0
NC_000962.3 4287164 . A A . . . GT 0/0
NC_000962.3 4313128 . C C . . . GT 0/0
NC_000962.3 4329782 . G G . . . GT 0/0
NC_000962.3 4372353 . G G . . . GT 0/0
NC_000962.3 4383655 . A A . . . GT 0/0
NC_000962.3 4384007 . C C . . . GT 0/0
NC_000962.3 4407588 . T T . . . GT 0/0
NC_000962.3 4408923 . C C . . . GT 0/0
31 changes: 31 additions & 0 deletions files/mtb/snpCL_scheme.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#lineage position allele_change tag
snpCL_1 1157317 C/T EUSeqMyTB_snpCL
snpCL_2 4327088 A/C EUSeqMyTB_snpCL
snpCL_3 130881 C/G EUSeqMyTB_snpCL
snpCL_4 1485300 C/G EUSeqMyTB_snpCL
snpCL_5 1208477 C/G EUSeqMyTB_snpCL
snpCL_6 1547828 G/C EUSeqMyTB_snpCL
snpCL_7 2971513 C/G EUSeqMyTB_snpCL
snpCL_8 1039921 A/G EUSeqMyTB_snpCL
snpCL_10 211015 G/A EUSeqMyTB_snpCL
snpCL_11 1895566 C/G EUSeqMyTB_snpCL
snpCL_12 1914071 G/A EUSeqMyTB_snpCL
snpCL_13 1810244 G/A EUSeqMyTB_snpCL
snpCL_14 4155977 G/A EUSeqMyTB_snpCL
snpCL_15 2399093 G/A EUSeqMyTB_snpCL
snpCL_16 1008074 G/A EUSeqMyTB_snpCL
snpCL_17 4284172 C/T EUSeqMyTB_snpCL
snpCL_18 766707 A/G EUSeqMyTB_snpCL
snpCL_19 3169491 C/A EUSeqMyTB_snpCL
snpCL_20 996197 A/C EUSeqMyTB_snpCL
snpCL_22 1231934 T/G EUSeqMyTB_snpCL
snpCL_23 1398622 G/A EUSeqMyTB_snpCL
snpCL_25 1068731 G/C EUSeqMyTB_snpCL
snpCL_27 3031515 T/C EUSeqMyTB_snpCL
snpCL_28 1028437 C/T EUSeqMyTB_snpCL
snpCL_29 3640351 C/T EUSeqMyTB_snpCL
snpCL_30 3223901 G/A EUSeqMyTB_snpCL
snpCL_31 1946519 C/T EUSeqMyTB_snpCL
snpCL_32 3088899 G/A EUSeqMyTB_snpCL
snpCL_33 1921877 G/A EUSeqMyTB_snpCL
snpCL_34 1760095 C/G EUSeqMyTB_snpCL
53 changes: 51 additions & 2 deletions juno_variant_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,27 @@ def setup(self) -> None:
self.set_presets()

if self.snakemake_args["use_singularity"]:

paths_from_presets = []

for key, value in self.species_presets.items():
try:
path = Path(value)
except TypeError:
continue
if path.exists() & path.is_absolute():
paths_from_presets.append(path.parent.resolve())

unique_bind_paths = self.get_unique_bind_paths(
paths_from_presets, iterations=3
)

self.snakemake_args["singularity_args"] = " ".join(
[
self.snakemake_args["singularity_args"],
f"--bind {self.db_dir}:{self.db_dir}",
] # paths that singularity should be able to read from can be bound by adding to the above list
]
+ [f"--bind {str(path)}:{str(path)}" for path in unique_bind_paths]
)

# # Extra class methods for this pipeline can be invoked here
Expand Down Expand Up @@ -188,18 +204,51 @@ def update_sample_dict_with_metadata(self) -> None:
)

def set_presets(self) -> None:
# if no custom presets were provided, look in default location
if self.presets_path is None:
self.presets_path = Path(__file__).parent.joinpath("config/presets.yaml")

# read all presets into dict
with open(self.presets_path) as f:
presets_dict = yaml.safe_load(f)

# update sample dict with presets
for sample in self.sample_dict:
species_name = self.sample_dict[sample]["species"]
if species_name in presets_dict.keys():
for key, value in presets_dict[species_name].items():
# store species-specific presets in self.species_presets for potential reuse
self.species_presets = presets_dict[species_name]
for key, value in self.species_presets.items():
self.sample_dict[sample][key] = value

def remove_from_list(self, lst, elements):
for element in elements:
if element in lst:
lst.remove(element)
return lst

def simplify_bind_paths(self, paths):
unique_bind_paths = paths.copy()
for path1 in paths:
for path2 in paths:
if path1 == path2:
continue
elif path1 == path2.parent:
self.remove_from_list(unique_bind_paths, [path2])
elif path1.parent == path2.parent:
self.remove_from_list(unique_bind_paths, [path1, path2])
unique_bind_paths.append(path1.parent)
else:
continue

return unique_bind_paths

def get_unique_bind_paths(self, paths, iterations=3):
unique_bind_paths = paths.copy()
for _ in range(iterations):
unique_bind_paths = self.simplify_bind_paths(unique_bind_paths)
return unique_bind_paths


if __name__ == "__main__":
main()
Loading

0 comments on commit d0ebe2d

Please sign in to comment.