-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtrain_model.py
More file actions
97 lines (72 loc) · 2.92 KB
/
train_model.py
File metadata and controls
97 lines (72 loc) · 2.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import pandas as pd
import mediapipe
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import cross_val_score
import pickle
df = pd.read_csv('newdataset.csv')
print(df.head())
X = df.iloc[:,1:]
y = df.iloc[:,0]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True, stratify=y)
#Train Machine Learning Classification Model
pipelines = {
'lr':make_pipeline(StandardScaler(), LogisticRegression()),
'sv':make_pipeline(StandardScaler(), svm.SVC()),
'kn':make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=3))
}
fit_models = {}
for algo, pipeline in pipelines.items():
model = pipeline.fit(X_train, y_train)
fit_models[algo] = model
#Evaluate and Serialized Model
# for algo, model in fit_models.items():
# yhat = model.predict(X_test)
# print(algo, "Accuracy:", accuracy_score(y_test.values, yhat),
# "Precision:", precision_score(y_test.values, yhat, average='macro'),
# "Recall:", recall_score(y_test.values, yhat, average='macro'))
# model = svm.SVC()
# model = KNeighborsClassifier(n_neighbors=3)
# model = LogisticRegression()
# model.fit(X, y)
# y_pred = model.predict(X_test[0])
# print((y==y_pred).sum())
# print((y==y_pred).sum()/y.shape[0])
# print(model.score(X, y))
model = fit_models['kn']
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
df_cm = confusion_matrix(y_test, y_pred)
print("Model Score:", model.score(X_train, y_train))
# Print Model Cross Validation evaluation
# cv_scores = cross_val_score(model, X, y, cv=5)
# print(cv_scores)
# print('cv_scores mean:{};'.format(np.mean(cv_scores)))
# Plot the confution matrix of 4 classes in the Model
class_labels = np.unique(y_test)
ax = plt.subplot()
sn.set(font_scale=1.4) # for label size
sn.heatmap(df_cm, annot=True, fmt='d', annot_kws={"size": 16}) # font size
ax.set_xlabel('Actual')
ax.set_ylabel('Predicted')
ax.set_title("Multi-Class Confusion Matrix")
ax.xaxis.set_ticklabels(['S.U. Down', 'S.U. Up', 'P.U. Down', 'P.U. Up', 'Plank', 'Sq. Up', 'Sq. Down', 'J.J. Up', 'J.J. Down'])
ax.yaxis.set_ticklabels(['S.U. Down', 'S.U. Up', 'P.U. Down', 'P.U. Up', 'Plank', 'Sq. Up', 'Sq. Down', 'J.J. Up', 'J.J. Down'])
plt.show()
#Save Trained model in pickle
# saved_model = pickle.dumps(model)
# with open('exercisev3.pkl', 'wb') as f:
# pickle.dump(model, f)
#Load trained model with the pkl file
# with open('exercise.pkl', 'rb') as f:
# model = pickle.load(f)