EDIT2:
New training set...
Inputs:
[
[0.0, 0.0],
[0.0, 1.0],
[0.0, 2.0],
[0.0, 3.0],
[0.0, 4.0],
[1.0, 0.0],
[1.0, 1.0],
[1.0, 2.0],
[1.0, 3.0],
[1.0, 4.0],
[2.0, 0.0],
[2.0, 1.0],
[2.0, 2.0],
[2.0, 3.0],
[2.0, 4.0],
[3.0, 0.0],
[3.0, 1.0],
[3.0, 2.0],
[3.0, 3.0],
[3.0, 4.0],
[4.0, 0.0],
[4.0, 1.0],
[4.0, 2.0],
[4.0, 3.0],
[4.0, 4.0]
]
Outputs:
[
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[0.0],
[1.0],
[1.0],
[0.0],
[0.0],
[0.0],
[1.0],
[1.0]
]
EDIT1:
I have updated the question with my latest code. I fixed few minor issues but I am still getting the same output for all input combinations after the network has learned.
Here is the backprop algorithm explained: Backprop algorithm
Yes, this is a homework, to make this clear right at the beginning.
I am supposed to implement a simple backpropagation algorithm on a simple neural network.
I have chosen Python as a language of choice for this task and I have chosen a neural network like this:
3 layers: 1 input, 1 hidden, 1 output layer:
O O
O
O O
There is an integer on both inptut neurons and 1 or 0 on an output neuron.
Here is my entire implementation (a bit long). Bellow it I will choose just shorter relevant snippets where I think an error could be located at:
import os
import math
import Image
import random
from random import sample
#------------------------------ class definitions
class Weight:
def __init__(self, fromNeuron, toNeuron):
self.value = random.uniform(-0.5, 0.5)
self.fromNeuron = fromNeuron
self.toNeuron = toNeuron
fromNeuron.outputWeights.append(self)
toNeuron.inputWeights.append(self)
self.delta = 0.0 # delta value, this will accumulate and after each training cycle used to adjust the weight value
def calculateDelta(self, network):
self.delta += self.fromNeuron.value * self.toNeuron.error
class Neuron:
def __init__(self):
self.value = 0.0 # the output
self.idealValue = 0.0 # the ideal output
self.error = 0.0 # error between output and ideal output
self.inputWeights = []
self.outputWeights = []
def activate(self, network):
x = 0.0;
for weight in self.inputWeights:
x += weight.value * weight.fromNeuron.value
# sigmoid function
if x < -320:
self.value = 0
elif x > 320:
self.value = 1
else:
self.value = 1 / (1 + math.exp(-x))
class Layer:
def __init__(self, neurons):
self.neurons = neurons
def activate(self, network):
for neuron in self.neurons:
neuron.activate(network)
class Network:
def __init__(self, layers, learningRate):
self.layers = layers
self.learningRate = learningRate # the rate at which the network learns
self.weights = []
for hiddenNeuron in self.layers[1].neurons:
for inputNeuron in self.layers[0].neurons:
self.weights.append(Weight(inputNeuron, hiddenNeuron))
for outputNeuron in self.layers[2].neurons:
self.weights.append(Weight(hiddenNeuron, outputNeuron))
def setInputs(self, inputs):
self.layers[0].neurons[0].value = float(inputs[0])
self.layers[0].neurons[1].value = float(inputs[1])
def setExpectedOutputs(self, expectedOutputs):
self.layers[2].neurons[0].idealValue = expectedOutputs[0]
def calculateOutputs(self, expectedOutputs):
self.setExpectedOutputs(expectedOutputs)
self.layers[1].activate(self) # activation function for hidden layer
self.layers[2].activate(self) # activation function for output layer
def calculateOutputErrors(self):
for neuron in self.layers[2].neurons:
neuron.error = (neuron.idealValue - neuron.value) * neuron.value * (1 - neuron.value)
def calculateHiddenErrors(self):
for neuron in self.layers[1].neurons:
error = 0.0
for weight in neuron.outputWeights:
error += weight.toNeuron.error * weight.value
neuron.error = error * neuron.value * (1 - neuron.value)
def calculateDeltas(self):
for weight in self.weights:
weight.calculateDelta(self)
def train(self, inputs, expectedOutputs):
self.setInputs(inputs)
self.calculateOutputs(expectedOutputs)
self.calculateOutputErrors()
self.calculateHiddenErrors()
self.calculateDeltas()
def learn(self):
for weight in self.weights:
weight.value += self.learningRate * weight.delta
def calculateSingleOutput(self, inputs):
self.setInputs(inputs)
self.layers[1].activate(self)
self.layers[2].activate(self)
#return round(self.layers[2].neurons[0].value, 0)
return self.layers[2].neurons[0].value
#------------------------------ initialize objects etc
inputLayer = Layer([Neuron() for n in range(2)])
hiddenLayer = Layer([Neuron() for n in range(100)])
outputLayer = Layer([Neuron() for n in range(1)])
learningRate = 0.5
network = Network([inputLayer, hiddenLayer, outputLayer], learningRate)
# just for debugging, the real training set is much larger
trainingInputs = [
[0.0, 0.0],
[1.0, 0.0],
[2.0, 0.0],
[0.0, 1.0],
[1.0, 1.0],
[2.0, 1.0],
[0.0, 2.0],
[1.0, 2.0],
[2.0, 2.0]
]
trainingOutputs = [
[0.0],
[1.0],
[1.0],
[0.0],
[1.0],
[0.0],
[0.0],
[0.0],
[1.0]
]
#------------------------------ let's train
for i in range(500):
for j in range(len(trainingOutputs)):
network.train(trainingInputs[j], trainingOutputs[j])
network.learn()
#-----------开发者_Go百科------------------- let's check
for pattern in trainingInputs:
print network.calculateSingleOutput(pattern)
Now, the problem is that after learning the network seems to be returning a float number very close to 0.0 for all input combinations, even those that should be close to 1.0.
I train the network in 100 cycles, in each cycle I do:
For every set of inputs in the training set:
- Set network inputs
- Calculate outputs by using a sigmoid function
- Calculate errors in the output layer
- Calculate errors in the hidden layer
- Calculate weights' deltas
Then I adjust the weights based on the learning rate and the accumulated deltas.
Here is my activation function for neurons:
def activationFunction(self, network):
"""
Calculate an activation function of a neuron which is a sum of all input weights * neurons where those weights start
"""
x = 0.0;
for weight in self.inputWeights:
x += weight.value * weight.getFromNeuron(network).value
# sigmoid function
self.value = 1 / (1 + math.exp(-x))
This how I calculate the deltas:
def calculateDelta(self, network):
self.delta += self.getFromNeuron(network).value * self.getToNeuron(network).error
This is a general flow of my algorithm:
for i in range(numberOfIterations):
for k,expectedOutput in trainingSet.iteritems():
coordinates = k.split(",")
network.setInputs((float(coordinates[0]), float(coordinates[1])))
network.calculateOutputs([float(expectedOutput)])
network.calculateOutputErrors()
network.calculateHiddenErrors()
network.calculateDeltas()
oldWeights = network.weights
network.adjustWeights()
network.resetDeltas()
print "Iteration ", i
j = 0
for weight in network.weights:
print "Weight W", weight.i, weight.j, ": ", oldWeights[j].value, " ............ Adjusted value : ", weight.value
j += j
The last two lines of the output are:
0.552785449458 # this should be close to 1
0.552785449458 # this should be close to 0
It actually returns the output number for all input combinations.
Am I missing something?
Looks like what you get is nearly the initial state of Neuron (nearly self.idealValue
). Maybe you should not initialize this Neuron before having actual data to provide ?
EDIT: OK, I looked a bit deeper in the code and simplified it a bit (will post simplified version below). Basically your code has two minor errors (looks like things you just overlooked), but that leads to a network that definitely won't work.
- you forgot to set value of expectedOutput in output layer while in learning phase. Without that the network definitely can't learn anything and will always be stuck with initial idealValue. (That is the bahavior that I spotted at first reading). This one could even have been spotted in your description of the training steps (and probably would have if you hadn't posted the code, this is one of the rare case I know where actually posting the code was hiding the error instead of making it obvious). You fixed this one after your EDIT1.
- when activating network in calculateSingleOutputs, you forgot to activate the hidden layer.
Obviously any of these two problems will lead to a disfonctional network.
Once corrected, it works (well, it does in my simplified version of your code).
The errors were not easy to spot because the initial code was much too complicated. You should think twice before introducing new classes or new methods. Not creating enough methods or classes will make code hard to read and to maintain, but creating too many may make it even harder to read and maintain. You have to find the right balance. My personal method to find this balance is to follow code smells and refactoring techniques wherever they lead me. Sometimes adding methods or creating classes, sometimes removing them. It's certainly not perfect but that's what works for me.
Below is my version of code after some refactoring applied. I spent about one hour changing your code but always keeping it functionaly equivalent. I took that as a good refactoring exercise, as the initial code was really horrible to read. After refactoring it just took 5 minutes to spot the problems.
import os
import math
"""
A simple backprop neural network. It has 3 layers:
Input layer: 2 neurons
Hidden layer: 2 neurons
Output layer: 1 neuron
"""
class Weight:
"""
Class representing a weight between two neurons
"""
def __init__(self, value, from_neuron, to_neuron):
self.value = value
self.from_neuron = from_neuron
from_neuron.outputWeights.append(self)
self.to_neuron = to_neuron
to_neuron.inputWeights.append(self)
# delta value, this will accumulate and after each training cycle
# will be used to adjust the weight value
self.delta = 0.0
class Neuron:
"""
Class representing a neuron.
"""
def __init__(self):
self.value = 0.0 # the output
self.idealValue = 0.0 # the ideal output
self.error = 0.0 # error between output and ideal output
self.inputWeights = [] # weights that end in the neuron
self.outputWeights = [] # weights that starts in the neuron
def activate(self):
"""
Calculate an activation function of a neuron which is
a sum of all input weights * neurons where those weights start
"""
x = 0.0;
for weight in self.inputWeights:
x += weight.value * weight.from_neuron.value
# sigmoid function
self.value = 1 / (1 + math.exp(-x))
class Network:
"""
Class representing a whole neural network. Contains layers.
"""
def __init__(self, layers, learningRate, weights):
self.layers = layers
self.learningRate = learningRate # the rate at which the network learns
self.weights = weights
def training(self, entries, expectedOutput):
for i in range(len(entries)):
self.layers[0][i].value = entries[i]
for i in range(len(expectedOutput)):
self.layers[2][i].idealValue = expectedOutput[i]
for layer in self.layers[1:]:
for n in layer:
n.activate()
for n in self.layers[2]:
error = (n.idealValue - n.value) * n.value * (1 - n.value)
n.error = error
for n in self.layers[1]:
error = 0.0
for w in n.outputWeights:
error += w.to_neuron.error * w.value
n.error = error
for w in self.weights:
w.delta += w.from_neuron.value * w.to_neuron.error
def updateWeights(self):
for w in self.weights:
w.value += self.learningRate * w.delta
def calculateSingleOutput(self, entries):
"""
Calculate a single output for input values.
This will be used to debug the already learned network after training.
"""
for i in range(len(entries)):
self.layers[0][i].value = entries[i]
# activation function for output layer
for layer in self.layers[1:]:
for n in layer:
n.activate()
print self.layers[2][0].value
#------------------------------ initialize objects etc
neurons = [Neuron() for n in range(5)]
w1 = Weight(-0.79, neurons[0], neurons[2])
w2 = Weight( 0.51, neurons[0], neurons[3])
w3 = Weight( 0.27, neurons[1], neurons[2])
w4 = Weight(-0.48, neurons[1], neurons[3])
w5 = Weight(-0.33, neurons[2], neurons[4])
w6 = Weight( 0.09, neurons[3], neurons[4])
weights = [w1, w2, w3, w4, w5, w6]
inputLayer = [neurons[0], neurons[1]]
hiddenLayer = [neurons[2], neurons[3]]
outputLayer = [neurons[4]]
learningRate = 0.3
network = Network([inputLayer, hiddenLayer, outputLayer], learningRate, weights)
# just for debugging, the real training set is much larger
trainingSet = [([0.0,0.0],[0.0]),
([1.0,0.0],[1.0]),
([2.0,0.0],[1.0]),
([0.0,1.0],[0.0]),
([1.0,1.0],[1.0]),
([2.0,1.0],[0.0]),
([0.0,2.0],[0.0]),
([1.0,2.0],[0.0]),
([2.0,2.0],[1.0])]
#------------------------------ let's train
for i in range(100): # training iterations
for entries, expectedOutput in trainingSet:
network.training(entries, expectedOutput)
network.updateWeights()
#network has learned, let's check
network.calculateSingleOutput((1, 0)) # this should be close to 1
network.calculateSingleOutput((0, 0)) # this should be close to 0
By the way there is still a third problem I didn't corrected (but easy to correct). If x is too big or too small (> 320 or < -320) math.exp()
will raise an exception. This will occur if you apply for training iterations, say a few thousands. The most simple way to correct that I see is to check for value of x and if it's too big or too small set Neuron's value to 0 or 1 depending on the case, which is the limit value.
精彩评论