Good Stuff‎ > ‎Data‎ > ‎Papers NLP‎ > ‎

Code



Convolutional layer impl in numpy


import numpy as np

num_records = 700
x = np.linspace(0.0, 5.0, num=num_records)

# Generally examples of funtion we want to learn are
# - Regression: Time to reach home (google maps), age prediction
# - Classification: PCtr ads, email, pushes
# In our case this will be the "ideal" fuction we want to learn 
fx = (x**3 + 4)

# The collected data has noise. So we introduce noise to our collected data
y = fx + (np.random.normal(0, 10, num_records))


import matplotlib.pyplot as plt
def plotData():
    plt.plot(x,y,'x',color='y',label="training data")
    plt.plot(x,fx,'-',color='b', label="f(x)")
    
plotData()
plt.legend(loc=2)
plt.show()


x = np.array(x).reshape(len(x), 1)
y = np.array(y).reshape(len(y), 1)

# add a column with value 1 in position 0 as bias
print("x before: ", x[:5])
agumented_x = np.insert(x, 0, 1, axis=1)
print("x after inserting column: ", agumented_x[:5])

##################################################################################
##################################################################################
# Linear approximation
lr = 1e-6 # learning rate
num_features = 1

# initialize weights
w = np.random.rand(num_features + 1, 1)  # add weight for bias

print("Initial values:\n w = %s \n" % (w))

for i in range(10000):
    # forward
    ypred = agumented_x.dot(w) # linear function
    loss = np.sum(0.5*(ypred - y)**2) # squared loss since this is a regression problem
    
    # backward
    # d_err/d_w 
    # = d{np.sum(0.5*(ypred - y)**2)}_{d_w} 
    # = 
    d_ypred = (ypred - y)
    d_w = agumented_x.T.dot(d_ypred)  # THIS IS THE MAIN derivative needed. THAT's IT

    # update weights    
    w -= (lr*d_w)
    
    if i % 1000 == 0 or i < 10: print("iteration=%s \n d_w = %s \n w = %s \n loss = %s\n" % (i, d_w, w, loss))    
    
print("last iteration=%s \n d_w = %s \n w = %s \n loss = %s\n" % (i, d_w, w, loss)) 

plotData()
plt.plot(agumented_x[:,1],agumented_x.dot(w),'-',color='r', label="linear fit for f(x)")
plt.legend(loc=2)
plt.show()

##################################################################################
##################################################################################
# Non-linear approximation
# x -> w1 -> relu -> w2 -> y
lr = 1e-6
num_features = 1
num_hidden_features = 10

w1_size = num_features + 1
w1 = np.random.random((w1_size, num_hidden_features))

w2_size = num_hidden_features + 1
w2 = np.random.random(w2_size).reshape(w2_size, 1)

def relu(x):
    # Keep only values of x > 0; set x = 0 to all others
    x[x < 0] = 0
    return x

for i in range(10000):

    # forward
    u1 = agumented_x.dot(w1) # linear function
    u2 = relu(u1) # non-linear activation
    agumented_u2 = np.insert(u2, 0, 1, axis=1) # add bias column like before
    ypred = agumented_u2.dot(w2) # linear function
    loss = np.mean(0.5*(ypred - y)**2)

    # backward
    d_ypred = ypred - y
    d_w2 = agumented_u2.T.dot(d_ypred) # gradient of weights of second layer
    d_agumented_u2 = d_ypred.dot(w2.T) # gradient of inputs to second layer
    d_u2 = np.delete(d_agumented_u2, 0, axis=1) # remove the added bias column
    # gradient of inputs to non-linear ReLU layer
    d_u1 = np.zeros(d_u2.shape)
    d_u1[u1 != 0] = d_u2[u1 != 0] 
    d_w1 = agumented_x.T.dot(d_u1) # gradient of first layer

    w1 -= (lr*d_w1)
    w2 -= (lr*d_w2)

    if i % 1000 == 0: print("iteration=%s loss=%s" % (i, loss))
        
print("iteration=%s loss=%s" % (i, loss))   


u1 = agumented_x.dot(w1)
u2 = relu(u1)
agumented_u2 = np.insert(u2, 0, 1, axis=1)
ypred = agumented_u2.dot(w2)

plotData()
plt.plot(agumented_x[:,1],agumented_x.dot(w),'-',color='r', label="linear fit for f(x)")
plt.plot(agumented_x[:,1],ypred,'-',color='c', label="non-linear fit for f(x)")
plt.legend(loc=2)
plt.show()










Comments