Keras NIST Code Example

(Gemini) Introductory Keras code example for building and training a simple neural network to classify handwritten digits using the MNIST dataset.

import numpy as np

import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, Flatten

from tensorflow.keras.utils import to_categorical

# 1. Load and preprocess the data

# Load the MNIST dataset (handwritten digits)

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize and flatten the images

# Scale images to the [0, 1] range

x_train = x_train.astype("float32") / 255.0

x_test = x_test.astype("float32") / 255.0

# Flatten the images from 28x28 to a 784-dimensional vector

x_train = x_train.reshape((-1, 784))

x_test = x_test.reshape((-1, 784))

# Convert labels to one-hot encoding (e.g., digit 5 becomes [0, 0, 0, 0, 0, 1, 0, 0, 0, 0])

y_train = to_categorical(y_train, 10)

y_test = to_categorical(y_test, 10)

print(f"x_train shape: {x_train.shape}")

print(f"{x_train.shape[0]} train samples")

print(f"{x_test.shape[0]} test samples")

# 2. Define the model architecture using the Sequential API

# A Sequential model allows stacking layers linearly

model = Sequential([

# Input layer and first hidden layer with 64 neurons and ReLU activation

Dense(64, activation='relu', input_shape=(784,)),

# Second hidden layer with 64 neurons and ReLU activation

Dense(64, activation='relu'),

# Output layer with 10 neurons (for 10 classes) and Softmax activation

Dense(10, activation='softmax')

])

# 3. Compile the model (configure the learning process)

model.compile(

optimizer='adam', # Optimizer: how the model is updated

loss='categorical_crossentropy', # Loss function: measures model accuracy

metrics=['accuracy'] # Metrics: what to monitor during training

)

# Optional: View a summary of the model architecture and parameters

model.summary()

# 4. Train the model (fit to the training data)

print("\nStarting training...")

history = model.fit(

x_train, y_train,

epochs=5, # Number of passes over the entire dataset

batch_size=32, # Number of samples per gradient update

validation_split=0.2 # Hold out 20% of the training data for validation

)

# 5. Evaluate the model on test data

print("\nStarting evaluation...")

test_loss, test_accuracy = model.evaluate(x_test, y_test, batch_size=128)

print(f"Test loss: {test_loss:.4f}")

print(f"Test accuracy: {test_accuracy:.4f}")

# 6. Make predictions

# Generate predictions on new data (first 5 test samples)

predictions = model.predict(x_test[:5])

predicted_classes = np.argmax(predictions, axis=1)

print(f"\nActual labels for first 5 samples: {np.argmax(y_test[:5], axis=1)}")

print(f"Predicted labels for first 5 samples: {predicted_classes}")

Page updated

Google Sites

Report abuse