-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathmodel_evaluation.py
More file actions
127 lines (106 loc) · 4.05 KB
/
model_evaluation.py
File metadata and controls
127 lines (106 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import itertools
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.externals import joblib
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics.classification import confusion_matrix, jaccard_similarity_score
# load the training data
DATA_PATH = "data/"
X = pd.read_csv("%sX_train.csv" % DATA_PATH)
Y_train = pd.read_csv("%sY_train.csv" % DATA_PATH).values
X_test = pd.read_csv("%sX_test.csv" % DATA_PATH).values
Y_test = pd.read_csv("%sY_test.csv" % DATA_PATH).values
# transform panda df into arrays
X_test = np.delete(X_test, 0, axis=1)
Y_test = np.delete(Y_test, 0, axis=1).flatten()
f = open("%sclass_names.txt" % DATA_PATH)
class_names = json.load(f)
f.close()
print("Dataset loaded.")
# load models
sgd_clf = joblib.load('models/sgd_clf.pkl')
svm_clf = joblib.load('models/svm_clf.pkl')
rf_clf = joblib.load('models/rf_clf.pkl')
nn_clf = joblib.load('models/nn_clf.pkl')
print("Models loaded")
# Query the 10 most important gene for random forest classifier
importances = rf_clf.estimator.feature_importances_
std = np.std([tree.feature_importances_ for tree in rf_clf.estimator],
axis=0)
indices = np.argsort(importances)[::-1]
# Print the feature ranking
print("Feature ranking:")
for f in range(30):
print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
# make predictions
sgd_pred = sgd_clf.predict(X_test)
svm_pred = svm_clf.predict(X_test)
rf_pred = rf_clf.predict(X_test)
nn_pred = nn_clf.predict(X_test)
# measure and output accuracy
print("Scores after parameter tuning: ")
sgd_score = jaccard_similarity_score(Y_test, sgd_pred)
svm_score = jaccard_similarity_score(Y_test, svm_pred)
rf_score = jaccard_similarity_score(Y_test, rf_pred)
nn_score = jaccard_similarity_score(Y_test, nn_pred)
print("SGD Jaccard similarity score: {:5f}".format(sgd_score))
print("SVM Jaccard similarity : {:5f}".format(svm_score))
print("Random Forest Jaccard similarity score: {:5f}".format(rf_score))
print("Neural Net Jaccard similarity score: {:5f}".format(nn_score))
# plot confusion matrix
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
print("Confusion matrix plots")
# SGD
sgd_matrix = confusion_matrix(Y_test, sgd_pred)
np.set_printoptions(precision=2)
plt.figure()
plot_confusion_matrix(sgd_matrix, classes=class_names, title='SGD')
plt.savefig('images/sgd_confusion_matrix.png')
# SVM
svm_matrix = confusion_matrix(Y_test, svm_pred)
np.set_printoptions(precision=2)
plt.figure()
plot_confusion_matrix(svm_matrix, classes=class_names, title='SVM')
plt.savefig('images/svm_confusion_matrix.png')
# RF
rf_matrix = confusion_matrix(Y_test, rf_pred)
np.set_printoptions(precision=2)
plt.figure()
plot_confusion_matrix(rf_matrix, classes=class_names, title='Random Forest')
plt.savefig('images/rf_confusion_matrix.png')
# NN
nn_matrix = confusion_matrix(Y_test, nn_pred)
np.set_printoptions(precision=2)
plt.figure()
plot_confusion_matrix(nn_matrix, classes=class_names, title='Neural Network')
plt.savefig('images/nn_confusion_matrix.png')