forked from gehrigrl/ProteinFunctionalAnnotationScripts
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathMakeHypoTable.py
More file actions
33 lines (26 loc) · 1.85 KB
/
MakeHypoTable.py
File metadata and controls
33 lines (26 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#This file takes the ValuesList from the MakeValues.py file and uses it to create a dictionary
#where keys and hypothetical protein IDs and values are information about the protein
#from the GenBank file. This file also adds information about each hypothetical protein
#from the res directory, such as outputs from the pdb70, pfam, and hhblits methods. The
#resulting dictionary is then used to create a .csv file. There is one hypothetical protein ID
#which was in the GenBank file but not the res directory output, and this is
#accounted for in the code.
import pandas
from MakeValues import * #Bring in .gbff information
from ResParse import * #Bring in res directory information(SAdLSA, pfam, hhblits results)
ResDicts = [pdbDict, pfamDict, hhblitsDict]
HypotheticalsDict = {}
count = 0
for eachID in MarProtIDList:
HypotheticalsDict[eachID] = ValuesList[count] #Adds locus tag, old locus tag, and October product
count += 1
for eachDict in ResDicts:
if eachID != 'WP_014524421.1': #Avoids the 560th ProtID which is in MarProtIDList but not ResDirectory files that was discovered in an error message
for i in range(len(eachDict[eachID])): #Checks length of the dict value (a list) at each key
HypotheticalsDict[eachID].append(eachDict[eachID][i])
NullColumns = ['NULL' for i in range(11)] #Add null to the ProtID that's missing from res directory
HypotheticalsDict['WP_014524421.1'] = HypotheticalsDict['WP_014524421.1'] + NullColumns
HypoDataFrame = pandas.DataFrame(data=HypotheticalsDict)
HypoDataFrame = HypoDataFrame.T
HypoDataFrame = HypoDataFrame.rename(columns={0:'locus_tag', 1:'old_locus_tag', 2:'OctoberProduct', 3:'SAdLSApdb', 4:'SAdLSApreTMS', 5:'SAdLSApreTMS2', 6:'SAdLSApdbDescription', 7:'pfamID', 8:'pfamPreTMS', 9:'pfamPreTMS2', 10:'pfamDescription', 11:'hhblitsID', 12:'hhblitsProb', 13:'hhblitsDescription'})
HypoDataFrame.to_csv('Hypo.csv')