-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMedianString.py
More file actions
57 lines (52 loc) · 1.33 KB
/
MedianString.py
File metadata and controls
57 lines (52 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def hammingDistance(p, q):
ham = 0
for x, y in zip(p, q):
if x != y:
ham += 1
return ham
def distanceBetweenPatternAndString(pattern, dna):
k = len(pattern)
distance = 0
for x in dna:
hamming = k+1
for i in range(len(x) - k + 1):
z = hammingDistance(pattern, x[i:i+k])
if hamming > z:
hamming = z
distance += hamming
return distance
def numberToPattern(x, k):
if k == 1:
return numberToSymbol(x)
return numberToPattern(x // 4, k-1) + numberToSymbol(x % 4)
def numberToSymbol(x):
if x == 0:
return "A"
if x == 1:
return "C"
if x == 2:
return "G"
if x == 3:
return "T"
def medianString(dna, k):
distance = (k+1) * len(dna)
median = ""
for i in range(4**k):
pattern = numberToPattern(i, k)
z = distanceBetweenPatternAndString(pattern, dna)
if distance > z:
distance = z
median = pattern
return median
dna = ['CCACACGGCTAAGACGGGCCGTCTGTCAATGTATACTAGAGC',
'GAACACCAGAACTGGACACCAAAGTTGAGCGAAAGTTGCTTG',
'AGTAGTATCCGAGTAGCCAGTCACTCGAGCACCAGCAAACGA',
'AACCGCAGTGCTACTGTGACACGCGTAGGCCTTCACTAGAGC',
'ATGGCGCTCCTCTCTGTGTCGAGCTCTCTCTACTGTCGCTGA',
'GCTCAGTAGCGAGTGGTCGTGAAATTGCCCTCGAGCAGGGGA',
'AACGAGGACGCGTAGCTCTTGAGCGACGGGAGACCGTATTGC',
'CCGCATCCCTGGTGTTTCAGCGGATTGAGCCTGAGAGCATTG',
'ATATTGAGATGGTCGAGCCACGCAGATGAAGCTACTAGCTTG',
'TTGCTGCCCTTTTCGAGCGTGTTCGCTGCCAGACGCCCAGTG' ]
k=6
print(medianString(dna, k))