from sklearn import datasets
import matplotlib.pyplot as plt
digits = datasets.load_digits() # Load the digits dataset
print(digits.keys()) # Print the keys of the dataset
print(digits['DESCR']) # Print the DESCR of the dataset
print(digits.images.shape) # Print the shape of the images
print(digits.data.shape) # Print the shape of the data keys
plt.imshow(digits.images[1010], cmap=plt.cm.gray_r, interpolation='nearest') # Display digit 1010
plt.show()
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
X = digits.data # Create feature arrays
y = digits.target # Create target arrays
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42, stratify=y) # Split into training and test set
knn = KNeighborsClassifier(n_neighbors=7) # Create a k-NN classifier with 7 neighbors
knn.fit(X_train, y_train) # Fit the classifier to the training data
print(knn.score(X_test, y_test)) # Print the accuracy
neighbors = np.arange(1, 9) # Setup arrays to store train and test accuracies
train_accuracy = np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))
for i, k in enumerate(neighbors): # Loop over different values of k
knn = KNeighborsClassifier(n_neighbors=k) # Setup a k-NN Classifier with k neighbors
knn.fit(X_train, y_train) # Fit the classifier to the training data
train_accuracy[i] = knn.score(X_train, y_train) #Compute accuracy on the training set
test_accuracy[i] = knn.score(X_test, y_test) #Compute accuracy on the testing set
# Generate plot
plt.title('k-NN: Varying Number of Neighbors')
plt.plot(neighbors, test_accuracy, label = 'Testing Accuracy')
plt.plot(neighbors, train_accuracy, label = 'Training Accuracy')
plt.legend()
plt.xlabel('Number of Neighbors')
plt.ylabel('Accuracy')
plt.show()