-
Notifications
You must be signed in to change notification settings - Fork 0
/
transfer.py
123 lines (106 loc) · 4.06 KB
/
transfer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# DO NOT RUN THIS SCRIPT
# THIS IS A VERY HACKY SCRIPT DESIGNED FOR A VERY SPECIFIC TASK
import json
import os
import re
import subprocess
import sys
oldRootPath = sys.argv[1]
rootPath = sys.argv[2]
def findData(workDir):
fasta = ""
gff = ""
gtf = ""
splice = ""
hisat = ""
for p in os.listdir(workDir):
if p.endswith(".fa"):
fasta = os.path.join(workDir,p)
if p.endswith(".gff"):
gff = os.path.join(workDir,p)
if p.endswith(".gtf"):
gtf = os.path.join(workDir,p)
if p.endswith(".Splice_sites"):
splice = os.path.join(workDir,p)
if p == "hisat-2.1.0":
hisat = os.path.join(workDir,p)
return (fasta,gff,gtf,splice,hisat)
def loadMeta(workDir):
with open(os.path.join(workDir,"metadata.json"),"r") as ifile:
meta = json.loads(ifile.read())
return meta
def saveMeta(workDir,meta):
with open(os.path.join(workDir,"metadata.json"),"w") as ofile:
return ofile.write(json.dumps(meta,indent=4) + "\n\n")
def getRootName(meta):
ret = meta["genus"]+"_"+meta["species"]
if meta["intraspecific_name"]:
ret += "_"+meta["intraspecific_name"]
ret += "-"+meta["assembly_id"]
return re.sub("[\s\\\\/]","_",ret)
def copyHiSat(fromDir,toDir,rootName):
exts = {}
for p in os.listdir(fromDir):
if p.endswith(".ht2"):
exts[p[-5:]] = p
if exts:
os.makedirs(toDir,exist_ok=True)
for key in exts:
cmd = [
"cp"
,"--preserve=timestamps"
,os.path.join(fromDir,exts[key])
,os.path.join(toDir,rootName+"."+key)
]
assert(subprocess.run(cmd).returncode==0)
for taxId in os.listdir(oldRootPath):
if taxId.isdecimal():
path = os.path.join(oldRootPath,taxId)
if os.path.isdir(path):
for assemblyName in os.listdir(path):
oldWorkDir = os.path.join(path,assemblyName)
workDir = os.path.join(rootPath,taxId,re.sub("[\s\\\\/]","_",assemblyName))
if os.path.isdir(workDir):
meta = loadMeta(workDir)
rootName = getRootName(meta)
(fasta,gff,gtf,splice,hisat) = findData(oldWorkDir)
if fasta:
cmd = [
"cp"
,"--preserve=timestamps"
,fasta
,os.path.join(workDir,rootName+".fa")
]
assert(subprocess.run(cmd).returncode==0)
if gff:
cmd = [
"cp"
,"--preserve=timestamps"
,gff
,os.path.join(workDir,rootName+".gff")
]
assert(subprocess.run(cmd).returncode==0)
if gtf:
cmd = [
"cp"
,"--preserve=timestamps"
,gtf
,os.path.join(workDir,rootName+".gtf")
]
assert(subprocess.run(cmd).returncode==0)
meta["processed"]["write_gtf"] = True
saveMeta(workDir,meta)
if splice:
cmd = [
"cp"
,"--preserve=timestamps"
,splice
,os.path.join(workDir,rootName+".Splice_sites")
]
assert(subprocess.run(cmd).returncode==0)
meta["processed"]["write_splice_sites"] = True
saveMeta(workDir,meta)
if hisat:
copyHiSat(hisat,os.path.join(workDir,"hisat-2.1.0"),rootName)
meta["processed"]["index_hisat"] = True
saveMeta(workDir,meta)