-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfind_target.py
More file actions
106 lines (79 loc) · 2.41 KB
/
find_target.py
File metadata and controls
106 lines (79 loc) · 2.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import re
def find_target(seq, pam, pam_ori, len_tar):
# storage for targets
target_list = []
# processing pam
reg_ipam = ''
reg_iipam = ''
base_dict = {
'A': 'A',
'C': 'C',
'G': 'G',
'T': 'T',
'R': '(G|A)',
'Y': '(T|C)',
'K': '(G|T)',
'M': '(A|C)',
'S': '(G|C)',
'W': '(A|T)',
'B': '(G|T|C)',
'D': '(G|A|T)',
'H': '(A|C|T)',
'V': '(G|C|A)',
'N': '(A|G|C|T)',
}
for i in pam:
reg_ipam += base_dict[i]
# complementary string
comp = {
'A': 'T',
'T': 'A',
'G': 'C',
'C': 'G'
}
rev_seq = ''
for i in seq[::-1]:
rev_seq += comp[i]
# find pam
# if pam orientation is R
if pam_ori == 'R':
while True:
match = re.search(reg_ipam, seq)
if match:
if len(seq[:match.start() - 1]) >= len_tar:
target_list.append(seq[match.start() - len_tar:match.start()])
else:
break
seq = seq[match.start() + 1:]
while True:
match = re.search(reg_ipam, rev_seq)
if match:
if len(rev_seq[:match.start() - 1]) >= len_tar:
target_list.append(rev_seq[match.start() - len_tar:match.start()])
else:
break
rev_seq = rev_seq[match.start() + 1:]
# if pam orientation is L
else:
while True:
match = re.search(reg_ipam, seq)
if match:
if len(seq[match.end():]) >= len_tar:
target_list.append(seq[match.end():match.end() + len_tar])
else:
break
seq = seq[match.start() + 1:]
while True:
match = re.search(reg_ipam, rev_seq)
if match:
if len(rev_seq[match.end():]) >= len_tar:
target_list.append(rev_seq[match.end():match.end() + len_tar])
else:
break
rev_seq = rev_seq[match.start() + 1:]
return target_list
# seq = "TAGCTACGATCGATCGTTTCTAGCTACGATGCAAGAAAGATCGATCGATCGACGTACG"
# pam = "YTTN"
# pam_ori = "L"
# target_length = 21
# print(find_target(seq, pam, pam_ori, target_length))