lstm

A simply example of using LSTM to predict air passenger flow. Data is also attached at the end.

import torch

import torch.nn as nn

import torch.optim as optim

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

class lstmodel(nn.Module):

def __init__(self, input_size, hidden_size, batch_size, output_size, num_layers):

super(lstmodel, self).__init__()

self.input_size = input_size

self.hidden_size = hidden_size

self.batch_size = batch_size

self.output_size = output_size

self.num_layers = num_layers

# input size is the #features per x at every time step, if only one value like stock price then input size = 1

# hidden size is the #features per h

# num_layers is the # of lstm units, the h of one layer feeds into next layer as x

# lstm input shape (seq_len, batch_size, input_size), the length of a sequence in a batch, # of batch, and #features per x in a sequence

# if the input shape looks odd, use batch_first = 1 then input shape is (batch_size, seq_len, input_size) which is more straight forward

# its said that Nvidia CUDA runs considerably faster using batch as secondary

self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True)

#output size, the output neurons, e.g. 2 for true & false, 10 for 0-9

#the hidden state is transformed to output neurons

self.linear = nn.Linear(self.hidden_size, output_size) #dont need to include the batch size here

def init_hidden(self):

# reset hidden state h0 and c0

# the shape of h and c is (num_layers, batch_size, hidden_size)

h0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)

c0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)

return h0, c0

def forward(self, input):

# lstm input shape is (seq_len, batch_size, input_size)

# is (batch_size, seq_len, input_size) if batch_first=true

# lstm_out shape is (seq_len, batch_size, hidden_size) which has all the hidden states for each t in the seq

# is (batch_size, seq_len, hidden_size) when batch_first = True

# hn, cn are the hidden states and the cell states for the last time step, t = seq_len

#lstm_out, (hn, cn) = self.lstm(input)

lstm_out, (self.hn, self.cn) = self.lstm(input)

# Take the output from the final time step lstm_out[:, -1, :] for each sequence in each batch if batch_frist=true

# or lstm_out[-1] if batch_first =false

# This is probably why the lstm input & output default to shape (seq_len, batch, input_size) so it can select time step easiliy

# Can also pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction

# map lstm_out at final time step (batch_size, hidden_size) to output neurons

#print(lstm_out[:,-1,:].view(self.batch_size, self.hidden_size).shape)

y_pred = self.linear(lstm_out[:,-1,:].view(self.batch_size, self.hidden_size))

return y_pred.view(-1) #flatten the elements and return

#look back a number of time steps, forecast a number of steps ahead

'''def prepare_dataset(ts, look_back=1, forecast_horizon=1):

assert len(ts) >= look_back + forecast_horizon #ts is long enough

X = []

y = []

for i in range(0, len(ts) - look_back - forecast_horizon + 1):

data = ts[i : i + look_back, :] #the time steps used as predictors

pred = ts[i + look_back + forecast_horizon - 1, 0] #the result to be predicted

X.append(data)

y.append(pred)

return X, y

'''

def create_batches(ts, look_back=1, forecast_horizon=1, batch_size=1):

batches = []

batch_x, batch_y, batch_z = [], [], []

for i in range(0, len(ts) - look_back - forecast_horizon - batch_size + 1, batch_size): #num of batches

for n in range(batch_size): #each input within a batch

x = ts[i + n : (i + n + look_back), :]

offset = x[0, 0] #the first value in the input, used for differencing

y = ts[i + n + look_back : i + n + look_back + forecast_horizon, 0] #the # of future time steps to predict

batch_x.append(np.array(x).reshape(look_back, -1))

batch_y.append(np.array(y))

batch_z.append(np.array(offset))

batch_x = np.array(batch_x)

batch_y = np.array(batch_y)

batch_z = np.array(batch_z)

batch_x[:, :, 0] -= batch_z.reshape(-1, 1)

batch_y -= batch_z.reshape(-1, 1)

batches.append((batch_x, batch_y))

batch_x, batch_y, batch_z = [], [], []

return batches

'''load data'''

parse_dates = ['Month']

index_col = ['Month']

date_parser = lambda dates: pd.datetime.strptime(dates, '%Y-%m')

ts = pd.read_csv('C:/Temp/AirPassengers.csv', parse_dates = parse_dates, index_col = index_col, date_parser=date_parser)

#plt.plot(ts)

ts = np.array(ts)

look_back = 20 #the seq_len, number of time steps per sequence

forecast_horizon = 1 #predicting only 1 time step ahead

batch_size = 10 #divide ts into batches

batches = create_batches(ts, look_back, forecast_horizon, batch_size)

train_data_ratio = 0.9

train_data_size = int(len(batches) * train_data_ratio)

train_data = batches[:train_data_size]

test_data = batches[train_data_size:]

#construct a lstm network

input_size = 1 #The number of features at each time step, only 1 value in this case

hidden_size = 128 #The number of neurons/features used by hidden state

num_layers = 1 #Number of recurrent layers. E.g., setting num_layers=2 would mean two layers of LSTMs

seq_len = look_back #the number of time steps in each input

output_size = 1 #output prediction value 1 neuron

lstm = lstmodel(input_size, hidden_size, batch_size, output_size, num_layers)

#SGD

loss_fn = torch.nn.MSELoss()

eta = 0.01

momentum = 0.9

weight_decay = 1e-5

optimizer = optim.SGD(lstm.parameters(), lr=eta, momentum=momentum, weight_decay = weight_decay)

epochs = 20

print_test = True

for epoch in range(0, epochs):

print('------------start training epoch {0} ------------'.format(epoch))

for X, y in train_data:

#lstm.hn, lstm.cn = lstm.init_hidden() #no need, pytorch set hidden states to zeros by default

X = torch.tensor(X, dtype=torch.float32, requires_grad=True) #the tensor is in shape (batch_size, seq_len, intput_size)

y = torch.tensor(y, dtype=torch.float32).view(-1) #flatten y so its a row vector

# zero the parameter gradients

optimizer.zero_grad()

#feed in a batch of training data

y_pred = lstm(X)

#calculate the loss

loss = loss_fn(y_pred, y)

#calculate the gradient

loss.backward()

#adjust the weights

optimizer.step()

else:

print("Epoch {0} done".format(epoch))

if print_test:

actuals= []

preds = []

with torch.no_grad():

lstm.eval()

for X, y in test_data:

#lstm.hn, lstm.cn = lstm.init_hidden()

X = torch.tensor(X, dtype=torch.float32, requires_grad=False)

y_pred = lstm(X)

actuals.extend(y)

preds.extend(y_pred)

lstm.train()

actuals = torch.tensor(actuals).view(-1).numpy()

preds = torch.tensor(preds).view(-1).numpy()

plt.plot(actuals, color='green')

plt.plot(preds, color='orange')

The sample data in CSV

Month,#Passengers

1949-01,112

1949-02,118

1949-03,132

1949-04,129

1949-05,121

1949-06,135

1949-07,148

1949-08,148

1949-09,136

1949-10,119

1949-11,104

1949-12,118

1950-01,115

1950-02,126

1950-03,141

1950-04,135

1950-05,125

1950-06,149

1950-07,170

1950-08,170

1950-09,158

1950-10,133

1950-11,114

1950-12,140

1951-01,145

1951-02,150

1951-03,178

1951-04,163

1951-05,172

1951-06,178

1951-07,199

1951-08,199

1951-09,184

1951-10,162

1951-11,146

1951-12,166

1952-01,171

1952-02,180

1952-03,193

1952-04,181

1952-05,183

1952-06,218

1952-07,230

1952-08,242

1952-09,209

1952-10,191

1952-11,172

1952-12,194

1953-01,196

1953-02,196

1953-03,236

1953-04,235

1953-05,229

1953-06,243

1953-07,264

1953-08,272

1953-09,237

1953-10,211

1953-11,180

1953-12,201

1954-01,204

1954-02,188

1954-03,235

1954-04,227

1954-05,234

1954-06,264

1954-07,302

1954-08,293

1954-09,259

1954-10,229

1954-11,203

1954-12,229

1955-01,242

1955-02,233

1955-03,267

1955-04,269

1955-05,270

1955-06,315

1955-07,364

1955-08,347

1955-09,312

1955-10,274

1955-11,237

1955-12,278

1956-01,284

1956-02,277

1956-03,317

1956-04,313

1956-05,318

1956-06,374

1956-07,413

1956-08,405

1956-09,355

1956-10,306

1956-11,271

1956-12,306

1957-01,315

1957-02,301

1957-03,356

1957-04,348

1957-05,355

1957-06,422

1957-07,465

1957-08,467

1957-09,404

1957-10,347

1957-11,305

1957-12,336

1958-01,340

1958-02,318

1958-03,362

1958-04,348

1958-05,363

1958-06,435

1958-07,491

1958-08,505

1958-09,404

1958-10,359

1958-11,310

1958-12,337

1959-01,360

1959-02,342

1959-03,406

1959-04,396

1959-05,420

1959-06,472

1959-07,548

1959-08,559

1959-09,463

1959-10,407

1959-11,362

1959-12,405

1960-01,417

1960-02,391

1960-03,419

1960-04,461

1960-05,472

1960-06,535

1960-07,622

1960-08,606

1960-09,508

1960-10,461

1960-11,390

1960-12,432

A bidirectional LSTM unit

class BidirectionalLSTM(nn.Module):

def __init__(self, input_size, hidden_size, output_size):

super(BidirectionalLSTM, self).__init__()

self.lstm = nn.LSTM(input_size, hidden_size, bidirectional=True)

self.linear = nn.Linear(hidden_size * 2, output_size) #bidirectional lstm, so hidden size is doubled

def forward(self, input):

lstm_out, (hn, cn) = self.lstm(input) #intput shape (seq_len, batch_size, input_size), output shape (seq_len, batch_size, hidden_size), (hc, cn) are final hidden state and cell state

T, b, h = lstm_out.size() #T includes all inter-mediae hidden sates, b is the batch size, h is the output hidden size = 2 * input hidden size

hidden_states = lstm.view(T * b, h) #feed all inter-mediate hidden states, consider it as a batch of T * b size, so every inter-mediate hidden state will be fed into linear layer separately

output = self.linear(hidden_states) #output shape (T * b, output_size)

output = output.view(T, b, -1) #restore back to (T, b, output_size) shape, evey inter-mediate hidden state's output is here

return output

Page updated

Google Sites

Report abuse