import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
# Define the CNN model
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
# Convolutional layers
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(16)
self.relu1 = nn.ReLU()
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(32)
self.relu2 = nn.ReLU()
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.bn3 = nn.BatchNorm2d(64)
self.relu3 = nn.ReLU()
self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
# Fully connected layers
self.fc1 = nn.Linear(64 * 4 * 4, 128)
self.relu4 = nn.ReLU()
self.dropout = nn.Dropout(p=0.5)
self.fc2 = nn.Linear(128, 2) # Output layer with 2 nodes for binary classification
def forward(self, x):
#the shape of x is (N, C, H, W)
#where N is the batch size, there can be multiple training samples in x
# C is the number of channels, there can be multiple channels per sample
# H and W are the height and width of an data, e.g. 28 x 28
# Convolutional layers with batch normalization, ReLU activation, and max pooling
x = self.pool1(self.relu1(self.bn1(self.conv1(x))))
x = self.pool2(self.relu2(self.bn2(self.conv2(x))))
x = self.pool3(self.relu3(self.bn3(self.conv3(x))))
# Flatten the output for fully connected layers
x = x.view(-1, 64 * 4 * 4)
# Fully connected layers with dropout
x = self.dropout(self.relu4(self.fc1(x)))
x = self.fc2(x)
return x
# Create sample data using numpy random
def create_sample_data(batch_size=10):
# Generate random data with shape (batch_size, channels, height, width)
# Example with batch size 10, 3 channels (RGB), and image size 32x32
sample_data = np.random.randn(batch_size, 3, 32, 32)
sample_labels = np.random.randint(0, 2, size=(batch_size,)) # Random binary labels (0 or 1)
return torch.FloatTensor(sample_data), torch.LongTensor(sample_labels) # Use LongTensor for labels
# Initialize the model
model = CNN()
# Define the optimizer and scheduler
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) # Reduce learning rate by 0.1 every 7 epochs
# Training parameters
num_epochs = 10
batch_size = 4 # Adjust batch size according to your memory constraints
# Create DataLoader for mini-batch training
train_data, train_labels = create_sample_data(batch_size=batch_size)
train_dataset = TensorDataset(train_data, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# Define the loss function
criterion = nn.CrossEntropyLoss()
# Training loop
for epoch in range(num_epochs):
model.train() # Set model to training mode
running_loss = 0.0
for inputs, labels in train_loader:
optimizer.zero_grad() # Zero the gradients
# Forward pass
outputs = model(inputs)
# Calculate loss
loss = criterion(outputs, labels)
# Backward pass and optimize
loss.backward()
optimizer.step()
# loss.item() is the average loss of the mini batch
# times batch size to get the total loss of the batch
running_loss += loss.item() * inputs.size(0)
# Adjust learning rate
scheduler.step()
# Print average loss per epoch
epoch_loss = running_loss / len(train_dataset)
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')
print('Training finished.')
# Print example outputs
print("\nExample outputs:")
model.eval() # Switch model to evaluation mode
example_data, example_labels = create_sample_data(batch_size=1)
with torch.no_grad():
example_output = model(example_data)
print("Example Data:")
print(example_data)
print("Example Labels:")
print(example_labels)
print("Example Model Output:")
print(example_output)