-
Notifications
You must be signed in to change notification settings - Fork 0
/
genecutter.py
104 lines (89 loc) · 2.71 KB
/
genecutter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# -*- coding: utf-8 -*-
"""
Cut the gene in files with either EcoRI, HindIII, BamHI, NotI or all of them
"""
import argparse
def cut(seq, enzyme, cut_pos):
frags = []
start = 0
end = seq.find(enzyme)
while True:
if end == -1:
frags.append(seq[start:])
break
frags.append(seq[start:end + cut_pos])
start = end + cut_pos
end = seq.find(enzyme, start)
return frags
parser = argparse.ArgumentParser(description="Cut the gene in file")
parser.add_argument("-f", "--file", dest="fpath", help="Open specified file")
parser.add_argument("-c", "--cut", dest="mode", help="""
Please specify restriction enzyme to cut the sequence :
1.EcoRI
2.HindIII
3.BamHI
4.NotI
5.All of the above
""", type=int)
args = parser.parse_args()
filename = args.fpath
header = ''
seq = []
frags = []
seqlength = 0
ecor1 = 'GAATTC'
hind3 = 'AAGCTT'
bamh1 = 'GGATCC'
not1 = 'GCGGCCGC'
with open(filename) as fastafile:
for line in fastafile:
if line.startswith('>'):
header = line
continue
seq.append(line[:-1])
fullseq = ''.join(seq)
i = args.mode
if i == 1:
frags = cut(fullseq, ecor1, 1)
for x in range(len(frags)):
print("Length of fragment ", x+1, " is ", len(frags[x]))
seqlength += len(frags[x])
print('Total gene length = ', seqlength)
elif i == 2:
frags = cut(fullseq, hind3, 1)
for x in range(len(frags)):
print("Length of fragment ", x+1, " is ", len(frags[x]))
seqlength += len(frags[x])
print('Total gene length = ', seqlength)
elif i == 3:
frags = cut(fullseq, bamh1, 1)
for x in range(len(frags)):
print("Length of fragment ", x+1, " is ", len(frags[x]))
seqlength += len(frags[x])
print('Total gene length = ', seqlength)
elif i == 4:
frags = cut(fullseq, not1, 1)
for x in range(len(frags)):
print("Length of fragment ", x+1, " is ", len(frags[x]))
seqlength += len(frags[x])
print('Total gene length = ', seqlength)
elif i == 5:
frags = cut(fullseq, ecor1, 1)
n = len(frags)
for x in range(len(frags)):
frags += cut(frags[x], hind3, 1)
frags = frags[n:]
n = len(frags)
for x in range(len(frags)):
frags += cut(frags[x], bamh1, 1)
frags = frags[n:]
n = len(frags)
for x in range(len(frags)):
frags += cut(frags[x], not1, 2)
frags = frags[n:]
for x in range(len(frags)):
print("Length of fragment ", x+1, " is ", len(frags[x]))
seqlength += len(frags[x])
print('Total gene length = ', seqlength)
else:
print("Please enter an integer between 1-5.")