(Gemini) Introductory Keras code example for building and training a simple neural network to classify handwritten digits using the MNIST dataset.
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical
# 1. Load and preprocess the data
# Load the MNIST dataset (handwritten digits)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# Normalize and flatten the images
# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0
# Flatten the images from 28x28 to a 784-dimensional vector
x_train = x_train.reshape((-1, 784))
x_test = x_test.reshape((-1, 784))
# Convert labels to one-hot encoding (e.g., digit 5 becomes [0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
print(f"x_train shape: {x_train.shape}")
print(f"{x_train.shape[0]} train samples")
print(f"{x_test.shape[0]} test samples")
# 2. Define the model architecture using the Sequential API
# A Sequential model allows stacking layers linearly
model = Sequential([
# Input layer and first hidden layer with 64 neurons and ReLU activation
Dense(64, activation='relu', input_shape=(784,)),
# Second hidden layer with 64 neurons and ReLU activation
Dense(64, activation='relu'),
# Output layer with 10 neurons (for 10 classes) and Softmax activation
Dense(10, activation='softmax')
])
# 3. Compile the model (configure the learning process)
model.compile(
optimizer='adam', # Optimizer: how the model is updated
loss='categorical_crossentropy', # Loss function: measures model accuracy
metrics=['accuracy'] # Metrics: what to monitor during training
)
# Optional: View a summary of the model architecture and parameters
model.summary()
# 4. Train the model (fit to the training data)
print("\nStarting training...")
history = model.fit(
x_train, y_train,
epochs=5, # Number of passes over the entire dataset
batch_size=32, # Number of samples per gradient update
validation_split=0.2 # Hold out 20% of the training data for validation
)
# 5. Evaluate the model on test data
print("\nStarting evaluation...")
test_loss, test_accuracy = model.evaluate(x_test, y_test, batch_size=128)
print(f"Test loss: {test_loss:.4f}")
print(f"Test accuracy: {test_accuracy:.4f}")
# 6. Make predictions
# Generate predictions on new data (first 5 test samples)
predictions = model.predict(x_test[:5])
predicted_classes = np.argmax(predictions, axis=1)
print(f"\nActual labels for first 5 samples: {np.argmax(y_test[:5], axis=1)}")
print(f"Predicted labels for first 5 samples: {predicted_classes}")