Neural nets : one hidden layer example

Exercise 4 :

1.

In [1]:
import numpy as np

# sigmoid function
def sigmoid(z):
    return 1/(1+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

# evaluation functions
def loss(yHat, y):
    return 0.5 * (yHat - y)**2

def loss_prime(yHat, y):
    return yHat - y

# feed forward with one hidden layer

def feed_forward(x, Wh, Wo):
    # compute output of each hidden unit
    Zh_in = np.dot(x, Wh)
    Zh = np.concatenate(([1], sigmoid(Zh_in))) # 1 at the start is for biais

    # compute output of net
    Zo_in = np.dot(Zh, Wo)
    yHat = sigmoid(Zo_in)
    return yHat, Zh, Zo_in, Zh_in

# feed forward + backprop with one hidden layer
def backprop(x, y, Wh, Wo, alpha):
    """
    x, y : training data
    Wh : weights of hidden layer
    Wo : weights of output layer
    alpha : learning rate
    """
    yHat, Zh, Zo_in, Zh_in = feed_forward(x, Wh, Wo)

    # Layer Error
    
    Eo = (yHat - y) * sigmoid_prime(Zo_in)
    Eh = Eo * Wo * sigmoid_prime(Zh_in)

    # Cost derivative for weights
    dWo = Eo * Zh
    dWh = Eh * x.reshape(-1, 1)

    # Update weights
    Wh -= alpha * dWh
    Wo -= alpha * dWo.reshape(-1, 1)
    return Wh, Wo
  • For one epoch :
In [2]:
x = np.array([1, 0, 1])  # 1 at the start is for biais
y = 1
Wh = np.array([[0.4, 0.6], 
               [0.7, -0.4], 
               [-0.2, 0.3]])
Wo = np.array([[-0.3], 
               [0.5], 
               [0.1]])
alpha = 0.25
yHat, Zh, Zo_in, Zh_in = feed_forward(x, Wh, Wo)
print()
print("--- feedforward ---")
print("yHat : ", yHat)

Wh_new, Wo_new = backprop(x, y, Wh, Wo, alpha)
yHat, Zh, Zo_in, Zh_in = feed_forward(x, Wh_new, Wo_new)

print()
print("--- backpropagation ---")
print("Wh_new :\n", Wh_new )
print("Wo_new :\n", Wo_new )
print()
print("--- feedforward ---")
print("yHat : ", yHat)
--- feedforward ---
yHat :  [0.51150096]

--- backpropagation ---
Wh_new :
 [[ 0.39773411  0.59811875]
 [ 0.7        -0.4       ]
 [-0.1992447   0.30062708]]
Wo_new :
 [[-0.26948496]
 [ 0.5167782 ]
 [ 0.12169465]]

--- feedforward ---
yHat :  [0.52521644]
  • for 500 epochs :
In [3]:
# params
x = np.array([1, 0, 1])  # 1 at the start is for biais
y = 1
Wh = np.array([[0.4, 0.6], 
               [0.7, -0.4], 
               [-0.2, 0.3]])
Wo = np.array([[-0.3], 
               [0.5], 
               [0.1]])

# training
EPOCH = 500

Wh_new, Wo_new = Wh, Wo
y_preds = []
for i in range(EPOCH):
    Wh_new, Wo_new = backprop(x, y, Wh_new, Wo_new, alpha)
    y_preds.append( feed_forward(x, Wh_new, Wo_new)[0][0] )
    
print("prediction : ", y_preds[-1])
print("Wh_new :\n", Wh_new )
print("Wo_new :\n", Wo_new )
prediction :  0.9525284146110652
Wh_new :
 [[ 0.55899314  0.72588969]
 [ 0.7        -0.4       ]
 [ 0.02947009  0.48379863]]
Wo_new :
 [[1.1998775 ]
 [1.3673516 ]
 [1.19428031]]
  • Let us plot this result :
In [4]:
import matplotlib.pyplot as plt

times = np.arange(1, EPOCH+1)
plt.plot(times, y_preds)
plt.show()

2 and 3. change just "x", "y", "sigmoid" and "sigmoid_prime" accordingly