-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstreamlit.py
More file actions
116 lines (89 loc) · 2.88 KB
/
streamlit.py
File metadata and controls
116 lines (89 loc) · 2.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
st.title(":red[ClassifyGenius]")
st.write("""
# Explore :blue[different] classifiers and :green[datasets]
""")
dataset_name = st.sidebar.selectbox(
'Select Dataset',
('Iris', 'Breast Cancer', 'Wine')
)
st.write(f"## {dataset_name} Dataset")
classifier_name = st.sidebar.selectbox(
'Select classifier',
('KNN', 'SVM', 'Random Forest')
# #3 popular ML algorithms: K-nearest neaighbors algo(KNN) , SVM algorithm for vector machines and Random Forest
)
def get_dataset(name):
data = None
if name == 'Iris':
data = datasets.load_iris()
elif name == 'Wine':
data = datasets.load_wine()
else:
data = datasets.load_breast_cancer()
X = data.data
y = data.target
return X, y
X, y = get_dataset(dataset_name)
st.write('Shape of dataset:', X.shape)
st.write('number of classes:', len(np.unique(y)))
def add_parameter_ui(clf_name):
params = dict()
if clf_name == 'SVM':
C = st.sidebar.slider('C', 0.01, 10.0)
params['C'] = C
elif clf_name == 'KNN':
K = st.sidebar.slider('K', 1, 15)
params['K'] = K
else:
max_depth = st.sidebar.slider('max_depth', 2, 15)
params['max_depth'] = max_depth
n_estimators = st.sidebar.slider('n_estimators', 1, 100)
params['n_estimators'] = n_estimators
return params
params = add_parameter_ui(classifier_name)
def get_classifier(clf_name, params):
clf = None
if clf_name == 'SVM':
clf = SVC(C=params['C'])
elif clf_name == 'KNN':
clf = KNeighborsClassifier(n_neighbors=params['K'])
else:
clf = clf = RandomForestClassifier(n_estimators=params['n_estimators'],
max_depth=params['max_depth'], random_state=1234)
return clf
clf = get_classifier(classifier_name, params)
#### CLASSIFICATION ####
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
st.write(f'Classifier = {classifier_name}')
st.write(f'Accuracy =', acc)
#### PLOT DATASET ####
# Project the data onto the 2 primary principal components
pca = PCA(2)
X_projected = pca.fit_transform(X)
x1 = X_projected[:, 0]
x2 = X_projected[:, 1]
fig = plt.figure()
plt.scatter(x1, x2,
c=y, alpha=0.8,
cmap='viridis')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar()
#plt.show()
st.pyplot(fig)
#add more parameters in like sklearn library
# add other classifiers
# and add feature scaling