from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size = 0.2, random_state=7)
X = train.values[:,0:4]
Y = train.values[:,4]
x_test = test.values[:,0:4]
y_test = test.values[:,4]
from sklearn import linear_model, neighbors
from sklearn.metrics import accuracy_score
classifier = linear_model.LogisticRegression(solver='liblinear', multi_class='ovr')
classifier.fit(X,Y)
predictions=classifier.predict(x_test)
print("Accuracy Score of Logistic Regression: ", accuracy_score(y_test, predictions))
classifier = neighbors.KNeighborsClassifier()
classifier.fit(X,Y)
predictions=classifier.predict(x_test)
print("Accuracy Score of KNN: ", accuracy_score(y_test, predictions))
Accuracy Score of Logistic Regression: 0.8
Accuracy Score of KNN: 0.9
# To randomly splits the training set into 10 distinct subsets to train and evaluate the models 10 times and compare results
from sklearn import model_selection
models = {}
models['LR'] = linear_model.LogisticRegression(solver='liblinear', multi_class='ovr')
models['KNN'] = neighbors.KNeighborsClassifier()
results = []
names = []
score = 'accuracy'
for name in models:
model = models.get(name)
kfold = model_selection.KFold(n_splits=10, random_state =7)
cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=score)
results.append(cv_results)
names.append(name)
print('{}: {} ({})'.format(name, cv_results.mean(), cv_results.std()))