diff --git a/src/main/java/schule/ngb/zm/ml/MLMath.java b/src/main/java/schule/ngb/zm/ml/MLMath.java index d91e4da..90d9a81 100644 --- a/src/main/java/schule/ngb/zm/ml/MLMath.java +++ b/src/main/java/schule/ngb/zm/ml/MLMath.java @@ -145,6 +145,37 @@ public final class MLMath { return -sum / batch_size; } + public static double[][] biasAdd( double[][] A, double[] V ) { + if( A[0].length != V.length ) { + throw new IllegalArgumentException("Can't add bias vector to matrix with wrong column count"); + } + + double[][] result = new double[A.length][A[0].length]; + for( int j = 0; j < A[0].length; j++ ) { + for( int i = 0; i < A.length; i++ ) { + result[i][j] = A[i][j] + V[j]; + } + } + + return result; + } + + public static double[] biasAdjust( double[] biases, double[][] delta ) { + if( biases.length != delta[0].length ) { + throw new IllegalArgumentException("Can't add adjust bias vector by delta with wrong column count"); + } + + double[] result = new double[biases.length]; + for( int j = 0; j < delta[0].length; j++ ) { + for( int i = 0; i < delta.length; i++ ) { + result[j] += biases[j] - delta[i][j]; + } + result[j] /= delta.length; + } + + return result; + } + private MLMath() { } diff --git a/src/main/java/schule/ngb/zm/ml/Matrix.java b/src/main/java/schule/ngb/zm/ml/Matrix.java index 0a20ae8..734fe7b 100644 --- a/src/main/java/schule/ngb/zm/ml/Matrix.java +++ b/src/main/java/schule/ngb/zm/ml/Matrix.java @@ -1,7 +1,11 @@ package schule.ngb.zm.ml; +import schule.ngb.zm.Constants; + import java.util.Arrays; +// TODO: Move Math into Matrix class +// TODO: Implement support for optional sci libs public class Matrix { private int columns, rows; @@ -37,11 +41,11 @@ public class Matrix { } public void initializeRandom() { - coefficients = MLMath.matrixApply(coefficients, (d) -> Math.random()); + coefficients = MLMath.matrixApply(coefficients, (d) -> Constants.randomGaussian()); } public void initializeRandom( double lower, double upper ) { - coefficients = MLMath.matrixApply(coefficients, (d) -> ((upper-lower) * Math.random()) + lower); + coefficients = MLMath.matrixApply(coefficients, (d) -> ((upper-lower) * (Constants.randomGaussian()+1) * .5) + lower); } public void initializeIdentity() { diff --git a/src/main/java/schule/ngb/zm/ml/NeuronLayer.java b/src/main/java/schule/ngb/zm/ml/NeuronLayer.java index ad9aa8d..9767fad 100644 --- a/src/main/java/schule/ngb/zm/ml/NeuronLayer.java +++ b/src/main/java/schule/ngb/zm/ml/NeuronLayer.java @@ -1,12 +1,38 @@ package schule.ngb.zm.ml; +import java.util.Arrays; import java.util.function.DoubleUnaryOperator; import java.util.function.Function; public class NeuronLayer implements Function { + public static NeuronLayer fromArray( double[][] weights ) { + NeuronLayer layer = new NeuronLayer(weights[0].length, weights.length); + for( int i = 0; i < weights[0].length; i++ ) { + for( int j = 0; j < weights.length; j++ ) { + layer.weights.coefficients[i][j] = weights[i][j]; + } + } + return layer; + } + + public static NeuronLayer fromArray( double[][] weights, double[] biases ) { + NeuronLayer layer = new NeuronLayer(weights[0].length, weights.length); + for( int i = 0; i < weights[0].length; i++ ) { + for( int j = 0; j < weights.length; j++ ) { + layer.weights.coefficients[i][j] = weights[i][j]; + } + } + for( int j = 0; j < biases.length; j++ ) { + layer.biases[j] = biases[j]; + } + return layer; + } + Matrix weights; + double[] biases; + NeuronLayer previous, next; DoubleUnaryOperator activationFunction, activationFunctionDerivative; @@ -17,6 +43,9 @@ public class NeuronLayer implements Function { weights = new Matrix(inputs, neurons); weights.initializeRandom(-1, 1); + biases = new double[neurons]; + Arrays.fill(biases, 0.0); // TODO: Random? + activationFunction = MLMath::sigmoid; activationFunctionDerivative = MLMath::sigmoidDerivative; } @@ -82,13 +111,19 @@ public class NeuronLayer implements Function { @Override public String toString() { - return weights.toString(); + return weights.toString() + "\n" + Arrays.toString(biases); } @Override public double[][] apply( double[][] inputs ) { lastInput = inputs; - lastOutput = MLMath.matrixApply(MLMath.matrixMultiply(inputs, weights.coefficients), activationFunction); + lastOutput = MLMath.matrixApply( + MLMath.biasAdd( + MLMath.matrixMultiply(inputs, weights.coefficients), + biases + ), + activationFunction + ); if( next != null ) { return next.apply(lastOutput); } else { @@ -114,11 +149,16 @@ public class NeuronLayer implements Function { error = MLMath.matrixMultiply(expected, MLMath.matrixTranspose(next.weights.coefficients)); } - delta = MLMath.matrixScale(error, MLMath.matrixApply(this.lastOutput,this.activationFunctionDerivative)); + delta = MLMath.matrixScale(error, MLMath.matrixApply(this.lastOutput, this.activationFunctionDerivative)); + // Hier schon leraningRate anwenden? + // See https://towardsdatascience.com/understanding-and-implementing-neural-networks-in-java-from-scratch-61421bb6352c + //delta = MLMath.matrixApply(delta, ( x ) -> learningRate * x); if( previous != null ) { previous.backprop(delta, learningRate); } + biases = MLMath.biasAdjust(biases, MLMath.matrixApply(delta, ( x ) -> learningRate * x)); + adjustment = MLMath.matrixMultiply(MLMath.matrixTranspose(lastInput), delta); adjustment = MLMath.matrixApply(adjustment, ( x ) -> learningRate * x); this.adjustWeights(adjustment);