1. K-Nearest Neighbors (KNN)

Labelled data is the training data
Predict the label of a data point by looking at the ‘K’ closest labelled data points

Scikit-learn fit and predict

Training a model on the data = “Fitting” a model to the data
- .fit() method
To predict the labels of new data (unlabelled data point):
- .predict method

Using Scikit-learn to fit a classifier & to predict on unlabelled data

from sklearn.neighbors import KNeighborsClassifier # Import KNeighborsClassifier from sklearn.neighbors

y = df['party'].values # Create arrays for the feature variable

X = df.drop('party', axis=1).values # Create arrays for the response variables

knn = KNeighborsClassifier(n_neighbors=6) # Create a k-NN classifier with 6 neighbors

knn.fit(X, y) # Fit the classifier to the data

y_pred = knn.predict(X) # Predict the labels for the training data X

X_new = [0.696469, 0.286139, 0.226851, 0.551315, 0.719469, 0.423106, 0.980764, 0.68483, 0.480932, 0.392118, 0.343178, 0.72905, 0.438572, 0.059678, 0.398044, 0.737995]

new_prediction = knn.predict(X_new) # Predict the label for the new data point X_new

print("Prediction: {}".format(new_prediction))

KNN (categorical variable)

from sklearn.neighbors import KNeighborsClassifier

from sklearn import preprocessing

import pandas as pd

telco = pd.read_csv("C3249C Coursework Data 2.csv")

print(telco.head())

y = telco['Churn'].values  # Create arrays for the target variable

#print(y)

X = telco.drop('Churn', axis=1).values # Create arrays for the response variables

#print(X)

le = preprocessing.LabelEncoder()

gender = le.fit_transform(telco['Gender'])

contract = le.fit_transform(telco['Contract'])

paper = le.fit_transform(telco['PaperlessBilling'])

payment = le.fit_transform(telco['PaymentMethod'])

features = list(zip(gender, contract, paper, payment))

print(features)

knn = KNeighborsClassifier(n_neighbors=5) # Create a k-NN classifier with 5 neighbors

knn.fit(features,y) # Fit the classifier to the data

y_pred = knn.predict (features) # Predict the labels for the training data X

Features_new = [(0, 0, 1, 2)]

#Features_new_le = le.fit_transform(Features_new)

new_prediction = knn.predict(Features_new)

print("Prediction: {}".format(new_prediction))

KNN (mixed variable)

from sklearn.neighbors import KNeighborsClassifier

from sklearn import preprocessing

import pandas as pd

telco = pd.read_csv("C3249C Coursework Data 3.csv")

print(telco.head())

y = telco['Churn'].values  # Create arrays for the feature / target variable

#print(y)

X = telco.drop('Churn', axis=1).values # Create arrays for the response variables

#print(X)

le = preprocessing.LabelEncoder()

gender = le.fit_transform(telco['Gender'])

contract = le.fit_transform(telco['Contract'])

paper = le.fit_transform(telco['PaperlessBilling'])

payment = le.fit_transform(telco['PaymentMethod'])

tenure = telco['Tenure'].values

monthlycharges = telco['MonthlyCharges'].values

features = list(zip(gender, contract, paper, payment, tenure, monthlycharges))

print(features)

knn = KNeighborsClassifier(n_neighbors=5) # Create a k-NN classifier with 5 neighbors

knn.fit(features,y) # Fit the classifier to the data

y_pred = knn.predict (features) # Predict the labels for the training data X

Features_new = [(0, 0, 1, 2, 24, 35.50)]

#Features_new_le = le.fit_transform(Features_new)

new_prediction = knn.predict(Features_new)

print("Prediction: {}".format(new_prediction))

Confusion matrix and classification report

from sklearn.metrics import classification_report

from sklearn.metrics import confusion_matrix

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, random_state=42) # Create training and test set

knn = KNeighborsClassifier(n_neighbors=6) # Instantiate a k-NN classifier

knn.fit(X_train, y_train) # Fit the classifier to the training data

y_pred = knn.predict(X_test) # Predict the labels of the test data

print(confusion_matrix(y_test, y_pred)) # Generate the confusion matrix

print(classification_report(y_test, y_pred)) # Generate the classification report

Google Sites

Report abuse