Perform an implementation of backpropgation for Multilayer Perceptron with segmi
ID: 3689736 • Letter: P
Question
Perform an implementation of backpropgation for Multilayer Perceptron with segmid function Using Paython. I have attached my work in R in very simple way without even use matrix. However, I couldn't impelment it in Python. So, I only need straight forward implementation without using a library related Multilayer perceptron for example " sklearn.linear_model.Perceptron". However, it is possible to import ( import numpy as np)
----------------------------------------------------R code--------------------------------------------------------------------------------------
#inputs
input1 = 0.05
input2 = 0.1
#expected outputs
output1 = 0.99
output2 = 0.01
#initial weights
w1 = 0.14
w2 = 0.32
w3 = 0.2
w4 = 0.25
w5 = 0.42
w6 = 0.42
w7 = 0.38
w8 = 0.5
w9 = 0.3
w10 = 0.3
w11 = 0.55
w12 = 0.55
#bias
bh1 = 1
bh2 = 1
bo1 = 1
bo2 = 1
#learning rate
learn_rate = 0.08
#initial random error
error = 1000
#counter to count the number of iterations of the algorithm
counter = 0
#run the iteration till the error drops to a certain value
while (error>0.000001)
{
#forward pass
input_h1 = input1*w1 + input2*w4 + bh1*w9
input_h2 = input1*w3 + input2*w2 + bh2*w10
output_h1 = 1/(1+exp(-input_h1))
output_h2 = 1/(1+exp(-input_h2))
input_o1 = output_h1*w5 + output_h2*w7 + bo1*w11
input_o2 = output_h1*w6 + output_h2*w8 + bo2*w12
output_o1 = 1/(1+exp(-input_o1))
output_o2 = 1/(1+exp(-input_o2))
error = 0.5*((output1-output_o1)^2+(output2-output_o2)^2)
#print(input_h1)
#print(input_h2)
#print(output_h1)
#print(output_h2)
#print(input_o1)
#print(input_o2)
#print(output_o1)
#print(output_o2)
#print(error)
#backward pass done to calculate all the errors of the neural nets so that the weights can be updated.
derror_w5 = (output_o1-output1)*output_o1*(1-output_o1)*output_h1
#print(derror_w5)
w5 = w5 - learn_rate*derror_w5
derror_w8 = (output_o2-output2)*output_o2*(1-output_o2)*output_h2
#print(derror_w8)
w8 = w8 - learn_rate*derror_w8
derror_w7 = (output_o1-output1)*output_o1*(1-output_o1)*output_h2
#print(derror_w7)
w7 = w7 - learn_rate*derror_w7
derror_w6 = (output_o2-output2)*output_o2*(1-output_o2)*output_h1
#print(derror_w6)
w6 = w6 - learn_rate*derror_w6
derror_outh1 =((output_o1-output1)*output_o1*(1-output_o1))
derror_outh2 =((output_o2-output2)*output_o2*(1-output_o2))
#print(derror_outh1)
#print(derror_outh2)
derror_w1 = (derror_outh1*w5 + derror_outh2*w6)*output_h1*(1-output_h1)*input1
#print(derror_w1)
w1 = w1 - learn_rate*derror_w1
derror_w2 = (derror_outh1*w7 + derror_outh2*w8)*output_h2*(1-output_h2)*input2
#print(derror_w2)
w2 = w2 - learn_rate*derror_w2
derror_w4 = (derror_outh1*w5 + derror_outh2*w6)*output_h1*(1-output_h1)*input2
#print(derror_w4)
w4 = w4 - learn_rate*derror_w4
derror_w3 = (derror_outh1*w7 + derror_outh2*w8)*output_h2*(1-output_h2)*input1
#print(derror_w3)
w3 = w3 - learn_rate*derror_w3
derror_w10 = (derror_outh1*w7 + derror_outh2*w8)*output_h2*(1-output_h2)*1
#print(derror_w10)
w10 = w10 - learn_rate*derror_w10
derror_w9 = (derror_outh1*w5 + derror_outh2*w5)*output_h1*(1-output_h1)*1
#print(derror_w9)
w9 = w9 - learn_rate*derror_w9
derror_w11 = (output_o1-output1)*output_o1*(1-output_o1)*1
#print(derror_w11)
w11 = w11 - learn_rate*derror_w11
derror_w12 = (output_o2-output2)*output_o2*(1-output_o2)*1
#print(derror_w12)
w12 = w12 - learn_rate*derror_w12
#forward pass to calculate updated error
input_h1 = input1*w1 + input2*w4 + bh1*w9
input_h2 = input1*w3 + input2*w2 + bh2*w10
output_h1 = 1/(1+exp(-input_h1))
output_h2 = 1/(1+exp(-input_h2))
input_o1 = output_h1*w5 + output_h2*w7 + bo1*w11
input_o2 = output_h1*w6 + output_h2*w8 + bo2*w12
output_o1 = 1/(1+exp(-input_o1))
output_o2 = 1/(1+exp(-input_o2))
error = 0.5*((output1-output_o1)^2+(output2-output_o2)^2)
counter = counter + 1
cat("Weight1:", w1," ")
cat("Weight2:", w2," ")
cat("Weight3:", w3," ")
cat("Weight4:", w4," ")
cat("Weight5:", w5," ")
cat("Weight6:", w6," ")
cat("Weight7:", w7," ")
cat("Weight8:", w8," ")
cat("Weight9:", w9," ")
cat("Weight10:",w10," ")
cat("Weight11:",w11," ")
cat("Weight12:",w12," ")
cat("Error:", error," ")
cat("Counter:", counter," ")
#print(input_h1)
#print(input_h2)
#print(output_h1)
#print(output_h2)
#print(input_o1)
#print(input_o2)
#print(output_o1)
#print(output_o2)
}
-------------------------------------------------------------------------------Python-------------------------------------------------------------
import math
import random
import string
class NN:
def __init__(self, NI, NH, NO):
# number of nodes in layers
self.ni = NI + 1 # +1 for bias
self.nh = NH
self.no = NO
# initialize node-activations
self.ai, self.ah, self.ao = [],[], []
self.ai = [1.0]*self.ni
self.ah = [1.0]*self.nh
self.ao = [1.0]*self.no
# create node weight matrices
self.wi = makeMatrix (self.ni, self.nh)
self.wo = makeMatrix (self.nh, self.no)
# initialize node weights to random vals
randomizeMatrix ( self.wi, -0.2, 0.2 )
randomizeMatrix ( self.wo, -2.0, 2.0 )
# create last change in weights matrices for momentum
self.ci = makeMatrix (self.ni, self.nh)
self.co = makeMatrix (self.nh, self.no)
def runNN (self, inputs):
if len(inputs) != self.ni-1:
print 'incorrect number of inputs'
for i in range(self.ni-1):
self.ai[i] = inputs[i]
for j in range(self.nh):
sum = 0.0
for i in range(self.ni):
sum +=( self.ai[i] * self.wi[i][j] )
self.ah[j] = sigmoid (sum)
for k in range(self.no):
sum = 0.0
for j in range(self.nh):
sum +=( self.ah[j] * self.wo[j][k] )
self.ao[k] = sigmoid (sum)
return self.ao
def backPropagate (self, targets, N, M):
# http://www.youtube.com/watch?v=aVId8KMsdUU&feature=BFa&list=LLldMCkmXl4j9_v0HeKdNcRA
# calc output deltas
# we want to find the instantaneous rate of change of ( error with respect to weight from node j to node k)
# output_delta is defined as an attribute of each ouput node. It is not the final rate we need.
# To get the final rate we must multiply the delta by the activation of the hidden layer node in question.
# This multiplication is done according to the chain rule as we are taking the derivative of the activation function
# of the ouput node.
# dE/dw[j][k] = (t[k] - ao[k]) * s'( SUM( w[j][k]*ah[j] ) ) * ah[j]
output_deltas = [0.0] * self.no
for k in range(self.no):
error = targets[k] - self.ao[k]
output_deltas[k] = error * dsigmoid(self.ao[k])
# update output weights
for j in range(self.nh):
for k in range(self.no):
# output_deltas[k] * self.ah[j] is the full derivative of dError/dweight[j][k]
change = output_deltas[k] * self.ah[j]
self.wo[j][k] += N*change + M*self.co[j][k]
self.co[j][k] = change
# calc hidden deltas
hidden_deltas = [0.0] * self.nh
for j in range(self.nh):
error = 0.0
for k in range(self.no):
error += output_deltas[k] * self.wo[j][k]
hidden_deltas[j] = error * dsigmoid(self.ah[j])
#update input weights
for i in range (self.ni):
for j in range (self.nh):
change = hidden_deltas[j] * self.ai[i]
#print 'activation',self.ai[i],'synapse',i,j,'change',change
self.wi[i][j] += N*change + M*self.ci[i][j]
self.ci[i][j] = change
# calc combined error
# 1/2 for differential convenience & **2 for modulus
error = 0.0
for k in range(len(targets)):
error = 0.5 * (targets[k]-self.ao[k])**2
return error
def weights(self):
print 'Input weights:'
for i in range(self.ni):
print self.wi[i]
print
print 'Output weights:'
for j in range(self.nh):
print self.wo[j]
print ''
def test(self, patterns):
for p in patterns:
inputs = p[0]
print 'Inputs:', p[0], '-->', self.runNN(inputs), ' Target', p[1]
def train (self, patterns, max_iterations = 1000, N=0.5, M=0.1):
for i in range(max_iterations):
for p in patterns:
inputs = p[0]
targets = p[1]
self.runNN(inputs)
error = self.backPropagate(targets, N, M)
if i % 50 == 0:
print 'Combined error', error
self.test(patterns)
def sigmoid (x):
return math.tanh(x)
# the derivative of the sigmoid function in terms of output
# proof here:
# http://www.math10.com/en/algebra/hyperbolic-functions/hyperbolic-functions.html
def dsigmoid (y):
return 1 - y**2
def makeMatrix ( I, J, fill=0.0):
m = []
for i in range(I):
m.append([fill]*J)
return m
def randomizeMatrix ( matrix, a, b):
for i in range ( len (matrix) ):
for j in range ( len (matrix[0]) ):
matrix[i][j] = random.uniform(a,b)
def main ():
pat = [
[[0,0], [1]],
[[0,1], [1]],
[[1,0], [1]],
[[1,1], [0]]
]
myNN = NN ( 2, 2, 1)
myNN.train(pat)
if __name__ == "__main__":
main()
-------------------------------------------------------------------------------------------------------------------------------------------------------------
Explanation / Answer
import math import random import string class NN: def __init__(self, NI, NH, NO): # number of nodes in layers self.ni = NI + 1 # +1 for bias self.nh = NH self.no = NO # initialize node-activations self.ai, self.ah, self.ao = [],[], [] self.ai = [1.0]*self.ni self.ah = [1.0]*self.nh self.ao = [1.0]*self.no # create node weight matrices self.wi = makeMatrix (self.ni, self.nh) self.wo = makeMatrix (self.nh, self.no) # initialize node weights to random vals randomizeMatrix ( self.wi, -0.2, 0.2 ) randomizeMatrix ( self.wo, -2.0, 2.0 ) # create last change in weights matrices for momentum self.ci = makeMatrix (self.ni, self.nh) self.co = makeMatrix (self.nh, self.no) def runNN (self, inputs): if len(inputs) != self.ni-1: print 'incorrect number of inputs' for i in range(self.ni-1): self.ai[i] = inputs[i] for j in range(self.nh): sum = 0.0 for i in range(self.ni): sum +=( self.ai[i] * self.wi[i][j] ) self.ah[j] = sigmoid (sum) for k in range(self.no): sum = 0.0 for j in range(self.nh): sum +=( self.ah[j] * self.wo[j][k] ) self.ao[k] = sigmoid (sum) return self.ao def backPropagate (self, targets, N, M): # http://www.youtube.com/watch?v=aVId8KMsdUU&feature=BFa&list=LLldMCkmXl4j9_v0HeKdNcRA # calc output deltas # we want to find the instantaneous rate of change of ( error with respect to weight from node j to node k) # output_delta is defined as an attribute of each ouput node. It is not the final rate we need. # To get the final rate we must multiply the delta by the activation of the hidden layer node in question. # This multiplication is done according to the chain rule as we are taking the derivative of the activation function # of the ouput node. # dE/dw[j][k] = (t[k] - ao[k]) * s'( SUM( w[j][k]*ah[j] ) ) * ah[j] output_deltas = [0.0] * self.no for k in range(self.no): error = targets[k] - self.ao[k] output_deltas[k] = error * dsigmoid(self.ao[k]) # update output weights for j in range(self.nh): for k in range(self.no): # output_deltas[k] * self.ah[j] is the full derivative of dError/dweight[j][k] change = output_deltas[k] * self.ah[j] self.wo[j][k] += N*change + M*self.co[j][k] self.co[j][k] = change # calc hidden deltas hidden_deltas = [0.0] * self.nh for j in range(self.nh): error = 0.0 for k in range(self.no): error += output_deltas[k] * self.wo[j][k] hidden_deltas[j] = error * dsigmoid(self.ah[j]) #update input weights for i in range (self.ni): for j in range (self.nh): change = hidden_deltas[j] * self.ai[i] #print 'activation',self.ai[i],'synapse',i,j,'change',change self.wi[i][j] += N*change + M*self.ci[i][j] self.ci[i][j] = change # calc combined error # 1/2 for differential convenience & **2 for modulus error = 0.0 for k in range(len(targets)): error = 0.5 * (targets[k]-self.ao[k])**2 return error def weights(self): print 'Input weights:' for i in range(self.ni): print self.wi[i] print print 'Output weights:' for j in range(self.nh): print self.wo[j] print '' def test(self, patterns): for p in patterns: inputs = p[0] print 'Inputs:', p[0], '-->', self.runNN(inputs), ' Target', p[1] def train (self, patterns, max_iterations = 1000, N=0.5, M=0.1): for i in range(max_iterations): for p in patterns: inputs = p[0] targets = p[1] self.runNN(inputs) error = self.backPropagate(targets, N, M) if i % 50 == 0: print 'Combined error', error self.test(patterns) def sigmoid (x): return math.tanh(x) # the derivative of the sigmoid function in terms of output # proof here: # http://www.math10.com/en/algebra/hyperbolic-functions/hyperbolic-functions.html def dsigmoid (y): return 1 - y**2 def makeMatrix ( I, J, fill=0.0): m = [] for i in range(I): m.append([fill]*J) return m def randomizeMatrix ( matrix, a, b): for i in range ( len (matrix) ): for j in range ( len (matrix[0]) ): matrix[i][j] = random.uniform(a,b) def main (): pat = [ [[0,0], [1]], [[0,1], [1]], [[1,0], [1]], [[1,1], [0]] ] myNN = NN ( 2, 2, 1) myNN.train(pat) if __name__ == "__main__": main()
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.