forked from gehrigrl/ProteinFunctionalAnnotationScripts
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDSVtypes.py
More file actions
55 lines (44 loc) · 1.69 KB
/
DSVtypes.py
File metadata and controls
55 lines (44 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#This code creates a dictionary called TypeCount which shows how many
#features of each type there are in the DSVgenome.
from Bio import SeqIO
filename = 'OctDSV.gbff'
record_iterator = SeqIO.parse(filename, 'genbank')
DSVgenome = next(record_iterator)
DSVpDV = next(record_iterator)
FullGenome = [DSVgenome, DSVpDV]
TypesList = ['source', 'gene', 'CDS', 'tRNA', 'regulatory', 'rRNA', 'ncRNA'] #These are the types I knew of before doing a full check
for SeqRecord in FullGenome:
for each_feature in SeqRecord.features:
if each_feature.type not in TypesList:
print(each_feature.type)
#The missing types were 'tmRNA' and 'repeat_region' so I will add those to the TypesList
TypesList.append('tmRNA')
TypesList.append('repeat_region')
print(TypesList)
TypeCount = {}
for each_type in TypesList:
TypeCount[each_type] = 0
#Now count up the number of elements in feature list for each type
for SeqRecord in FullGenome:
for each_feature in SeqRecord.features:
if each_feature.type == TypesList[0]:
TypeCount[TypesList[0]] += 1
elif each_feature.type == TypesList[1]:
TypeCount[TypesList[1]] += 1
elif each_feature.type == TypesList[2]:
TypeCount[TypesList[2]] += 1
elif each_feature.type == TypesList[3]:
TypeCount[TypesList[3]] += 1
elif each_feature.type == TypesList[4]:
TypeCount[TypesList[4]] += 1
elif each_feature.type == TypesList[5]:
TypeCount[TypesList[5]] += 1
elif each_feature.type == TypesList[6]:
TypeCount[TypesList[6]] += 1
elif each_feature.type == TypesList[7]:
TypeCount[TypesList[7]] += 1
elif each_feature.type == TypesList[8]:
TypeCount[TypesList[8]] += 1
else:
print('This type is not in the list')
print(TypeCount)