A simply example of using LSTM to predict air passenger flow. Data is also attached at the end.
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
class lstmodel(nn.Module):
def __init__(self, input_size, hidden_size, batch_size, output_size, num_layers):
super(lstmodel, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
self.output_size = output_size
self.num_layers = num_layers
# input size is the #features per x at every time step, if only one value like stock price then input size = 1
# hidden size is the #features per h
# num_layers is the # of lstm units, the h of one layer feeds into next layer as x
# lstm input shape (seq_len, batch_size, input_size), the length of a sequence in a batch, # of batch, and #features per x in a sequence
# if the input shape looks odd, use batch_first = 1 then input shape is (batch_size, seq_len, input_size) which is more straight forward
# its said that Nvidia CUDA runs considerably faster using batch as secondary
self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
#output size, the output neurons, e.g. 2 for true & false, 10 for 0-9
#the hidden state is transformed to output neurons
self.linear = nn.Linear(self.hidden_size, output_size) #dont need to include the batch size here
def init_hidden(self):
# reset hidden state h0 and c0
# the shape of h and c is (num_layers, batch_size, hidden_size)
h0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
c0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
return h0, c0
def forward(self, input):
# lstm input shape is (seq_len, batch_size, input_size)
# is (batch_size, seq_len, input_size) if batch_first=true
#
# lstm_out shape is (seq_len, batch_size, hidden_size) which has all the hidden states for each t in the seq
# is (batch_size, seq_len, hidden_size) when batch_first = True
#
# hn, cn are the hidden states and the cell states for the last time step, t = seq_len
#lstm_out, (hn, cn) = self.lstm(input)
lstm_out, (self.hn, self.cn) = self.lstm(input)
# Take the output from the final time step lstm_out[:, -1, :] for each sequence in each batch if batch_frist=true
# or lstm_out[-1] if batch_first =false
# This is probably why the lstm input & output default to shape (seq_len, batch, input_size) so it can select time step easiliy
# Can also pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
# map lstm_out at final time step (batch_size, hidden_size) to output neurons
#print(lstm_out[:,-1,:].view(self.batch_size, self.hidden_size).shape)
y_pred = self.linear(lstm_out[:,-1,:].view(self.batch_size, self.hidden_size))
return y_pred.view(-1) #flatten the elements and return
#look back a number of time steps, forecast a number of steps ahead
'''def prepare_dataset(ts, look_back=1, forecast_horizon=1):
assert len(ts) >= look_back + forecast_horizon #ts is long enough
X = []
y = []
for i in range(0, len(ts) - look_back - forecast_horizon + 1):
data = ts[i : i + look_back, :] #the time steps used as predictors
pred = ts[i + look_back + forecast_horizon - 1, 0] #the result to be predicted
X.append(data)
y.append(pred)
return X, y
'''
def create_batches(ts, look_back=1, forecast_horizon=1, batch_size=1):
batches = []
batch_x, batch_y, batch_z = [], [], []
for i in range(0, len(ts) - look_back - forecast_horizon - batch_size + 1, batch_size): #num of batches
for n in range(batch_size): #each input within a batch
x = ts[i + n : (i + n + look_back), :]
offset = x[0, 0] #the first value in the input, used for differencing
y = ts[i + n + look_back : i + n + look_back + forecast_horizon, 0] #the # of future time steps to predict
batch_x.append(np.array(x).reshape(look_back, -1))
batch_y.append(np.array(y))
batch_z.append(np.array(offset))
batch_x = np.array(batch_x)
batch_y = np.array(batch_y)
batch_z = np.array(batch_z)
batch_x[:, :, 0] -= batch_z.reshape(-1, 1)
batch_y -= batch_z.reshape(-1, 1)
batches.append((batch_x, batch_y))
batch_x, batch_y, batch_z = [], [], []
return batches
'''load data'''
parse_dates = ['Month']
index_col = ['Month']
date_parser = lambda dates: pd.datetime.strptime(dates, '%Y-%m')
ts = pd.read_csv('C:/Temp/AirPassengers.csv', parse_dates = parse_dates, index_col = index_col, date_parser=date_parser)
#plt.plot(ts)
ts = np.array(ts)
look_back = 20 #the seq_len, number of time steps per sequence
forecast_horizon = 1 #predicting only 1 time step ahead
batch_size = 10 #divide ts into batches
batches = create_batches(ts, look_back, forecast_horizon, batch_size)
train_data_ratio = 0.9
train_data_size = int(len(batches) * train_data_ratio)
train_data = batches[:train_data_size]
test_data = batches[train_data_size:]
#construct a lstm network
input_size = 1 #The number of features at each time step, only 1 value in this case
hidden_size = 128 #The number of neurons/features used by hidden state
num_layers = 1 #Number of recurrent layers. E.g., setting num_layers=2 would mean two layers of LSTMs
seq_len = look_back #the number of time steps in each input
output_size = 1 #output prediction value 1 neuron
lstm = lstmodel(input_size, hidden_size, batch_size, output_size, num_layers)
#SGD
loss_fn = torch.nn.MSELoss()
eta = 0.01
momentum = 0.9
weight_decay = 1e-5
optimizer = optim.SGD(lstm.parameters(), lr=eta, momentum=momentum, weight_decay = weight_decay)
epochs = 20
print_test = True
for epoch in range(0, epochs):
print('------------start training epoch {0} ------------'.format(epoch))
for X, y in train_data:
#lstm.hn, lstm.cn = lstm.init_hidden() #no need, pytorch set hidden states to zeros by default
X = torch.tensor(X, dtype=torch.float32, requires_grad=True) #the tensor is in shape (batch_size, seq_len, intput_size)
y = torch.tensor(y, dtype=torch.float32).view(-1) #flatten y so its a row vector
# zero the parameter gradients
optimizer.zero_grad()
#feed in a batch of training data
y_pred = lstm(X)
#calculate the loss
loss = loss_fn(y_pred, y)
#calculate the gradient
loss.backward()
#adjust the weights
optimizer.step()
else:
print("Epoch {0} done".format(epoch))
if print_test:
actuals= []
preds = []
with torch.no_grad():
lstm.eval()
for X, y in test_data:
#lstm.hn, lstm.cn = lstm.init_hidden()
X = torch.tensor(X, dtype=torch.float32, requires_grad=False)
y_pred = lstm(X)
actuals.extend(y)
preds.extend(y_pred)
lstm.train()
actuals = torch.tensor(actuals).view(-1).numpy()
preds = torch.tensor(preds).view(-1).numpy()
plt.plot(actuals, color='green')
plt.plot(preds, color='orange')
The sample data in CSV
Month,#Passengers
1949-01,112
1949-02,118
1949-03,132
1949-04,129
1949-05,121
1949-06,135
1949-07,148
1949-08,148
1949-09,136
1949-10,119
1949-11,104
1949-12,118
1950-01,115
1950-02,126
1950-03,141
1950-04,135
1950-05,125
1950-06,149
1950-07,170
1950-08,170
1950-09,158
1950-10,133
1950-11,114
1950-12,140
1951-01,145
1951-02,150
1951-03,178
1951-04,163
1951-05,172
1951-06,178
1951-07,199
1951-08,199
1951-09,184
1951-10,162
1951-11,146
1951-12,166
1952-01,171
1952-02,180
1952-03,193
1952-04,181
1952-05,183
1952-06,218
1952-07,230
1952-08,242
1952-09,209
1952-10,191
1952-11,172
1952-12,194
1953-01,196
1953-02,196
1953-03,236
1953-04,235
1953-05,229
1953-06,243
1953-07,264
1953-08,272
1953-09,237
1953-10,211
1953-11,180
1953-12,201
1954-01,204
1954-02,188
1954-03,235
1954-04,227
1954-05,234
1954-06,264
1954-07,302
1954-08,293
1954-09,259
1954-10,229
1954-11,203
1954-12,229
1955-01,242
1955-02,233
1955-03,267
1955-04,269
1955-05,270
1955-06,315
1955-07,364
1955-08,347
1955-09,312
1955-10,274
1955-11,237
1955-12,278
1956-01,284
1956-02,277
1956-03,317
1956-04,313
1956-05,318
1956-06,374
1956-07,413
1956-08,405
1956-09,355
1956-10,306
1956-11,271
1956-12,306
1957-01,315
1957-02,301
1957-03,356
1957-04,348
1957-05,355
1957-06,422
1957-07,465
1957-08,467
1957-09,404
1957-10,347
1957-11,305
1957-12,336
1958-01,340
1958-02,318
1958-03,362
1958-04,348
1958-05,363
1958-06,435
1958-07,491
1958-08,505
1958-09,404
1958-10,359
1958-11,310
1958-12,337
1959-01,360
1959-02,342
1959-03,406
1959-04,396
1959-05,420
1959-06,472
1959-07,548
1959-08,559
1959-09,463
1959-10,407
1959-11,362
1959-12,405
1960-01,417
1960-02,391
1960-03,419
1960-04,461
1960-05,472
1960-06,535
1960-07,622
1960-08,606
1960-09,508
1960-10,461
1960-11,390
1960-12,432
A bidirectional LSTM unit
class BidirectionalLSTM(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(BidirectionalLSTM, self).__init__()
self.lstm = nn.LSTM(input_size, hidden_size, bidirectional=True)
self.linear = nn.Linear(hidden_size * 2, output_size) #bidirectional lstm, so hidden size is doubled
def forward(self, input):
lstm_out, (hn, cn) = self.lstm(input) #intput shape (seq_len, batch_size, input_size), output shape (seq_len, batch_size, hidden_size), (hc, cn) are final hidden state and cell state
T, b, h = lstm_out.size() #T includes all inter-mediae hidden sates, b is the batch size, h is the output hidden size = 2 * input hidden size
hidden_states = lstm.view(T * b, h) #feed all inter-mediate hidden states, consider it as a batch of T * b size, so every inter-mediate hidden state will be fed into linear layer separately
output = self.linear(hidden_states) #output shape (T * b, output_size)
output = output.view(T, b, -1) #restore back to (T, b, output_size) shape, evey inter-mediate hidden state's output is here
return output