Neural Network Backpropagation not working

2019-07-29 22:08发布

问题:

I have coded a neural network in JavaScript and implemented the Backpropagation algorithm described here. Here is the code (typescript):

/**
 * Net
 */


export class Net {
    private layers: Layer[] = [];
    private inputLayer: Layer;
    private outputLayer: Layer;
    public error: number = Infinity;

    private eta: number = 0.15;
    private alpha: number = 0.5;

    constructor(...topology: number[]) {
        topology.forEach((topologyLayer, iTL) => {
            var nextLayerNeuronNumber = topology[iTL + 1] || 0;
            this.layers.push(new Layer(topologyLayer, nextLayerNeuronNumber));
        });

        this.inputLayer = this.layers[0];
        this.outputLayer = this.layers[this.layers.length - 1];

    }

    public loadWeights(weights) {
        /*
        [
            [Layer
                [Node weights, ..., ...]
            ]
        ]
        */

        for (var iL = 0; iL < weights.length; iL++) {
            var neuronWeights = weights[iL];
            var layer = this.layers[iL];
            for (var iN = 0; iN < neuronWeights.length; iN++) {

                // Neuron

                var connections = neuronWeights[iN];
                for (var iC = 0; iC < connections.length; iC++) {
                    var connection = connections[iC];
                    this.layer(iL).neuron(iN).setWeights(iC, connection);

                }

            }
        }

    }


    public train(data: number[][], iterartions = 2000) {

        var inputs = this.inputLayer.neurons.length - 1;

        for (var ite = 0; ite < iterartions; ite++) {

            data.forEach(node => {

                var inputData = [];
                var outputData = [];

                for (var i = 0; i < node.length; i++) {
                    if (i < inputs) {
                        inputData.push(node[i])
                    } else {
                        outputData.push(node[i])
                    }
                }

                this.feedForward(...inputData);
                this.backProb(...outputData);


            });


        }


        return this.calcDataError(data);

    }

    private calcDataError(data){
        var overallDataErrorSum = 0;
        var inputs = this.inputLayer.neurons.length - 1;

        data.forEach(node => {
            var outputData = node.splice(inputs);
            var inputData = node;

            this.feedForward(...inputData);
            overallDataErrorSum += this.getNetError(outputData);
        });

        overallDataErrorSum /= data.length;

        return overallDataErrorSum;
    }

    public saveWeights() {
        // Ignore output layer
        var ret = []
        for (var iL = 0; iL < this.layers.length - 1; iL++) {
            var layer = this.layers[iL];
            var layer_ret = [];

            layer.neurons.forEach(neuron => {
                layer_ret.push(neuron.connections.map(c => c.weight));
            });

            ret.push(layer_ret);
        }
        return ret;
    }

    feedForward(...inputs: number[]) {
        if (inputs.length != this.inputLayer.neurons.length - 1) return false;

        this.inputLayer.neurons.forEach((neuron, i) => {
            if (!neuron.isBias) {
                neuron.output(inputs[i]);
            }
        });

        this.layers.forEach((layer, i) => {
            // Skip Input Layer
            if (i > 0) {
                var prevLayer = this.layers[i - 1]
                layer.neurons.forEach(neuron => {
                    neuron.calcOutput(prevLayer);
                });
            }
        });

    }

    public getNetError(targetVals) {
        // Calc delta error of outputs
        var deltas = [];

        this.outputLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                neuron.calcOutputDelta(targetVals[iN]);
                deltas.push(neuron.delta);
            }
        });

        deltas = deltas.map(d => Math.pow(d, 2));


        var sum = 0;

        deltas.forEach(d => sum += d);

        return sum / deltas.length;


    }

    backProb(...targetVals: number[]) {



        // Calc delta error of outputs
        this.outputLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                neuron.calcOutputDelta(targetVals[iN]);
            }
        });

        // Backprop delta error through hidden layers

        for (var iL = this.layers.length - 2; iL > 0; iL--) {
            var layer = this.layers[iL];
            var nextLayer = this.layers[iL + 1]
            layer.neurons.forEach(neuron => {
                neuron.calcHiddenDelta(nextLayer);
            });

        }

        // Update weights 

        for (var iL = 1; iL < this.layers.length; iL++) {
            var layer = this.layers[iL];
            var prevLayer = this.layers[iL - 1];

            layer.neurons.forEach(neuron => {
                if (!neuron.isBias) {
                    neuron.updateWeights(prevLayer, this.eta);
                }
            });
        }

        this.error = this.getNetError(targetVals);

        return this.error;

    }

    getOutputs(...inputs: number[]) {

        var ret = [];
        this.outputLayer.neurons.forEach(neuron => {
            if (!neuron.isBias) {
                ret.push(neuron.output())
            }
        });
        return ret;

    }

    getResults(...inputs: number[]) {
        this.feedForward(...inputs)
        return this.getOutputs();
    }

    layer(i) {
        return this.layers[i];
    }
}

/**
 * Layer
 */
class Layer {
    public neurons: Neuron[] = [];
    constructor(neuronNumber: number, nextLayerNeuronNumber: number) {
        for (var iN = 0; iN < neuronNumber + 1; iN++) {
            // +1 for bias neuron, which is last
            if (iN < neuronNumber) {
                // Create normal neuron
                this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, false));
            } else {
                this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, true));
            }
        }
    }

    neuron(i) {
        return this.neurons[i];
    }

    bias() {
        return this.neurons[this.neurons.length - 1];
    }
}

/**
 * Neuron
 */
class Neuron {
    public connections: Connection[] = [];
    private outputVal: number;
    public delta: number;

    constructor(outputsTo: number, private index, public isBias = false) {

        // Creates connections
        for (var c = 0; c < outputsTo; c++) {
            this.connections.push(new Connection());
        }

        this.outputVal = isBias ? 1 : 0;

    }

    calcOutput(prevLayer: Layer) {

        // Only calcOutput when neuron is not a bias neuron

        if (!this.isBias) {
            var sum = 0;

            prevLayer.neurons.forEach(prevLayerNeuron => {
                sum += prevLayerNeuron.output() * prevLayerNeuron.getWeights(this.index).weight;
            });

            this.output(this.activationFunction(sum));
        }

    }

    private activationFunction(x) {

        //return Math.tanh(x);
        return 1 / (1 + Math.exp(-x))
        //return x;
    };

    private activationFunctionDerivative(x) {
        // Small approximation of tanh derivative
        //return 1 - x * x

        // Sigmoid
        var s = this.activationFunction(x);
        return s * (1 - s);

        // With general derivative formula where h = 1e-10
        /*var h = 0.0001;
        var dx = ((this.activationFunction(x + h) - this.activationFunction(x))/h)
        return dx;*/

        //return 1
    };

    // Backprop // Todo // Understand


    public calcOutputDelta(targetVal) {

        // Bias output neurons do not have delta error
        if (!this.isBias) {
            this.delta = targetVal - this.output();
        }
    }

    public calcHiddenDelta(nextLayer: Layer) {
        var sum = 0;

        // Go through all neurons of next layer excluding bias
        nextLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                sum += neuron.delta * this.getWeights(iN).weight;
            }
        });

        this.delta = sum;
    }

    public updateWeights(prevLayer: Layer, eta: number) {

        prevLayer.neurons.forEach((neuron, iN) => {
            var weight = neuron.getWeights(this.index).weight;
            var newWeight =
                weight + // old weight
                eta *   // learning weight
                this.delta * // delta error
                this.activationFunctionDerivative(neuron.output())
            neuron.getWeights(this.index).weight = newWeight;
        });


    }


    // Backprop end

    output(s?) {
        if (s && !this.isBias) {
            this.outputVal = s;
            return this.outputVal;
        } else {
            return this.outputVal;
        }
    }

    getWeights(i) {
        return this.connections[i];
    }

    setWeights(i, s) {
        return this.connections[i].weight = s;
    }
}

/**
 * Connection
 */
class Connection {
    public weight: number;
    public deltaWeight: number;

    constructor() {
        this.weight = Math.random();
        this.deltaWeight = 0;
    }
}

When training it for just one set of data, it works just fine. (example from here)

import {Net} from './ml';

var myNet = new Net(2, 2, 2);


var weights = [
    [
        [0.15, 0.25],
        [0.20, 0.30],
        [0.35, 0.35]
    ],
    [
        [0.40, 0.50],
        [0.45, 0.55],
        [0.60, 0.60]
    ]
];

// Just loads the weights given in the example

myNet.loadWeights(weights)

var error = myNet.train([[0.05, 0.10, 0.01, 0.99]]);
console.log('Error: ', error);

console.log(myNet.getResults(0.05, 0.10));

Console prints:

Error:  0.0000020735174706210714
[ 0.011556397089327321, 0.9886867357304885 ]

Basically, that's pretty good, right?

Then, I wanted to teach the network the XOR problem:

import {Net} from './ml';

var myNet = new Net(2, 3, 1);


var trainigData = [
    [0, 0, 0],
    [1, 0, 1],
    [0, 1, 1],
    [1, 1, 0]
]

var error = myNet.train(trainigData)
console.log('Error: ', error);

console.log('Input: 0, 0: ', myNet.getResults(0, 0));
console.log('Input: 1, 0: ', myNet.getResults(1, 0));

Here the network fails:

Error:  0.2500007370167383
Input: 0, 0:  [ 0.5008584967899313 ]
Input: 1, 0:  [ 0.5008584967899313 ]

What am I doing wrong?

回答1:

Firstly perform gradient checks on the entire batch (meaining on the function calculating gradients on the batch), if you have not done so already. This will ensure you know what the problem is.

If gradients are not correctly computed, taking into account that your implementation works on single data sets, you are most likely mixing some values in the backwards pass.

If gradients are correctly computed, there is an error in your update function.

A working implementation of backpropagation for neural networks in javaScript can be found here

Here is the code snippet of the trainStep function using backpropagation

    function trainStepBatch(details){
//we compute forward pass 
//for each training sample in the batch
//and stored in the batch array 
    var batch=[];
    var ks=[];
    for(var a=0;a<details.data.in.length;a++){
    var results=[];
    var k=1;
    results[0]={output:details.data.in[a]};
    for(var i=1;i<this.layers.length;i++){
        results[i]=layers[this.layers[i].type].evalForGrad(this.layers[i],results[i-1].output);
        k++;
    }
    batch[a]=results;
    ks[a]=k;
    }
//We compute the backward pass
//first derivative of the cost function given the output
    var grad=[];
    for(i in batch)grad[i]={grad:costs[details.cost].df(batch[i][ks[i]-1].output,details.data.out[i])};
//for each layer we compute the backwards pass
//on the results of all forward passes at a given layer
    for(var i=this.layers.length-1;i>0;i--){
    var grads=[];
    var test=true;
    for(a in batch){
        grads[a]=layers[this.layers[i].type].grad(this.layers[i],batch[a][i],batch[a][i-1],grad[a]);
        if(grads[a]==null)test=false;
        else grads[a].layer=i;
    }
//we perform the update
    if(test)stepBatch(this.layers[i].par,grads,details.stepSize);
    }
}

And for the stepBatch function

function stepBatch(params,grads, stepSize){
for(i in params.w){
    for(j in params.w[i]){
        for(a in grads){
            params.w[i][j]-=stepSize*grads[a].dw[i][j];
        }
    }
}
for(i in params.b){
    for(a in grads){
        params[a]-=stepSize*grads[a].db[i];
    }
}
function stepBatch(params,grads, stepSize){
    for(i in params.w){
        for(j in params.w[i]){
            for(a in grads){
                params.w[i][j]-=stepSize*grads[a].dw[i][j];
            }
        }
    }
    for(i in params.b){
        for(a in grads){
            params[a]-=stepSize*grads[a].db[i];
        }
    }
}