diff --git a/lbjava-examples/pom.xml b/lbjava-examples/pom.xml index cebfb148..804e89c8 100755 --- a/lbjava-examples/pom.xml +++ b/lbjava-examples/pom.xml @@ -3,7 +3,7 @@ lbjava-project edu.illinois.cs.cogcomp - 1.3.0 + 1.3.1 4.0.0 @@ -27,12 +27,12 @@ edu.illinois.cs.cogcomp LBJava - 1.3.0 + 1.3.1 edu.illinois.cs.cogcomp lbjava-maven-plugin - 1.3.0 + 1.3.1 @@ -63,7 +63,7 @@ edu.illinois.cs.cogcomp lbjava-maven-plugin - 1.3.0 + 1.3.1 ${project.basedir}/src/main/java ${project.basedir}/target/classes @@ -77,6 +77,7 @@ + ${project.basedir}/src/main/lbj/NNBrownClassifier.lbj ${project.basedir}/src/main/lbj/BadgesClassifier.lbj ${project.basedir}/src/main/lbj/SentimentClassifier.lbj ${project.basedir}/src/main/lbj/SetCover.lbj diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/BrownReader.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/BrownReader.java new file mode 100644 index 00000000..e29ef775 --- /dev/null +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/BrownReader.java @@ -0,0 +1,238 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.Arrays; + +import edu.illinois.cs.cogcomp.lbjava.parse.Parser; + +/** + * @author redman + */ +public class BrownReader implements Parser{ + + /** the input data. */ + float [][] inputs; + + /** the labels. */ + float [][] outputs; + + /** indexes the current example. */ + int index = 0; + + /** the maximum number of input features. */ + int inputCardinality = -1; + + /** the maximum integer classification. */ + int outputCardinality = 1; + + /** + * read input data from the input file, the output data from the out file. + * @param infile the input data. + * @param outfile the output data. + * @throws IOException + */ + public BrownReader (String infile) { + try { + inputs = getExampleInputs(infile); + this.inputCardinality = inputs[0].length; + outputs = getExampleOutputs(infile); + if (inputs.length != outputs.length) + throw new RuntimeException("Need the same number of inputs and outputs."); + } catch (IOException e) { + throw new RuntimeException("Could not read example data.",e); + } + } + + /** + * read input data from the input file, the output data from the out file. + * @param infile the input data. + * @param trainingInputs the previously read training inputs. + * @throws IOException + */ + public BrownReader (String infile, int numberInputFeatures, int numberExamples) { + try { + this.inputCardinality = numberInputFeatures; + inputs = getExampleInputs(infile, numberInputFeatures); + outputs = getExampleOutputs(infile, inputs.length, numberExamples); + if (inputs.length != outputs.length) + throw new RuntimeException("Need the same number of inputs and outputs."); + } catch (IOException e) { + throw new RuntimeException("Could not read example data.",e); + } + } + + @Override + public void close() { + index = 0; + } + + @Override + public Object next() { + NeuralNetExample nne = null; + if (index < inputs.length) { + nne = new NeuralNetExample(inputs[index], outputs[index]); + index++; + } + return nne; + } + + @Override + public void reset() { + index = 0; + } + + /** + * get the examples form an NIST dataset, return everything at once. There are + * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = + * 47 million floats. These are input examples, so they are image data. + * @param filename + * @return the input examples. + * @throws IOException + */ + private float[][] getExampleInputs(String filename) throws IOException { + int count = 0; + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + while ((line=br.readLine()) != null) { + count++; + String[] splits = line.split("[,:]"); + for (int i = 1; i < splits.length; i++) { + int featureindex = Integer.parseInt(splits[i]); + if (featureindex > this.inputCardinality) + this.inputCardinality = featureindex; + } + } + } + float[][] data = new float[count][++this.inputCardinality]; + for (float[] a : data) + Arrays.fill(a, 0.0f); + + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + count = 0; + while ((line=br.readLine()) != null) { + String[] splits = line.split("[,:]"); + for (int i = 0; i < splits.length; i++) { + int featureindex = Integer.parseInt(splits[i]); + data[count][featureindex] = 1.0f; + } + count++; + } + } + return data; + } + + /** + * scale the range of input feature vector to the provided example set, of data to train on. + * @param string + * @param examples + * @return the testing input deck. + * @throws IOException + * @throws FileNotFoundException + */ + private float[][] getExampleInputs(String filename, int cardinality) throws FileNotFoundException, IOException { + int count = 0; + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + while ((line=br.readLine()) != null) { + count++; + String[] splits = line.split("[,:]"); + for (int i = 1; i < splits.length; i++) { + int featureindex = Integer.parseInt(splits[i]); + if (featureindex > this.inputCardinality) + this.inputCardinality = featureindex; + } + } + } + float[][] data = new float[count][cardinality]; + for (float[] a : data) + Arrays.fill(a, 0.0f); + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + count = 0; + while ((line=br.readLine()) != null) { + String[] splits = line.split("[,:]"); + for (int i = 0; i < splits.length; i++) { + int featureindex = Integer.parseInt(splits[i]); + data[count][featureindex] = 1.0f; + } + count++; + } + } + return data; + } + + /** + * get the examples form an NIST dataset, return everything at once. There are + * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = + * 47 million floats. These are input examples, so they are image data. + * @param filename + * @return the input examples. + * @throws IOException + */ + private float[][] getExampleOutputs(String filename) throws IOException { + int count = 0; + this.outputCardinality = -1; + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line = null; + while ((line=br.readLine()) != null) { + count++; + String[] splits = line.split("[,:]"); + int label = Integer.parseInt(splits[0]); + if (label > this.outputCardinality) + this.outputCardinality = label; + } + } + float[][] data = new float[count][1]; + for (float[] a : data) + Arrays.fill(a, 0.0f); + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + count = 0; + float range = this.outputCardinality; + while ((line=br.readLine()) != null) { + String[] splits = line.split("[,:]"); + int featureindex = Integer.parseInt(splits[0]); + data[count][0] = featureindex/range; + count++; + } + } + return data; + } + + /** + * get the example outputs. + * @param filename file with the values. + * @param outputs the training examples. + * @return the testing examples. + * @throws FileNotFoundException + * @throws IOException + */ + private float[][] getExampleOutputs(String filename, int numouts, int card) throws FileNotFoundException, IOException { + float[][] data = new float[numouts][1]; + try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { + String line; + int count = 0; + float range = card; + while ((line=br.readLine()) != null) { + String[] splits = line.split("[,:]"); + int featureindex = Integer.parseInt(splits[0]); + // convert to a number 0 - 1, then to a number -1 to 1. + data[count][0] = featureindex/range; + count++; + } + } + return data; + } +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/Debug.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/Debug.java new file mode 100644 index 00000000..13f51eaf --- /dev/null +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/Debug.java @@ -0,0 +1,238 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet; + +import edu.illinois.cs.cogcomp.lbjava.classify.Classifier; + +/** + * This was used for debugging during development, thought it might be useful in the future + * although it is completely useless right now. + * @author redman + */ +@SuppressWarnings("unused") +public class Debug { + /** running ANN by default. */ + static private final String NN = "NeuralNet"; + + /** running gradient descent. */ + static private final String SGD = "StoichasticGradientDescent"; + + /** the method we are running. */ + static private String method = NN; + + /** scales the weight deltas for each iteration. */ + static private float learningRate = .3f; + + /** this prevents local minimum capture. */ + static private float momentum = .6f; + + /** this prevents local minimum capture. */ + static private int hiddenLayerSize = 20; + + /** this prevents local minimum capture. */ + static private int epochs = 100; + + /** The number of threads to support. */ + @SuppressWarnings("unused") + static private int threads = 1; + + /** + * parse the arguments. + * @param args the command arguments. + */ + static private void parseArgs(String[] args) { + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-l")) + learningRate = Float.parseFloat(args[++i]); + else if (args[i].equals("-m")) + momentum = Float.parseFloat(args[++i]); + else if (args[i].equals("-e")) + epochs = Integer.parseInt(args[++i]); + else if (args[i].equals("-t")) + threads = Integer.parseInt(args[++i]); + else if (args[i].equals("-h")) + hiddenLayerSize = Integer.parseInt(args[++i]); + else if (args[0].equals("-gd")) + method = SGD; + else if (args[i].equals("-help")) { + System.out.println("-t the number of threads to deploy.\n" + + "-l the learning rate.\n" + + "-m momentum.\n" + + "-e number of epochs.\n" + + "-h hidden layer size.\n" + + "-gd use gradient descent.\n" + + "-help this output."); + System.exit(0); + } else + System.out.println("Unexpected argument : "+args[i]); + } + } + + /** + * Print a set of any pair of floating point arrays, labels can be passed in, if + * null is passed for the ol parameter, no second array is printed. + * @param il the input label. + * @param input the input vector. + * @param ol the output label. + * @param output the output vector. + */ + static void printInOut(String il, float[] input, String ol, float[] output) { + System.out.print(il+" "); + for (float in : input) { + System.out.format(" %.18f",in); + } + if (ol!=null) { + System.out.print(" "+ol+" "); + for (float in : output) { + System.out.format(" %.18f",in); + } + } + System.out.println(); + } + + /** + * Print the input and outputs all on one line. + * @param il the input label. + * @param input the input vector. + * @param ol the output label. + * @param output the output vector. + */ + static void printInOutC(String il, float[] input, String ol, float[] output) { + System.out.println(il+" "); + int c = 0; + for (float in : input) { + System.out.format(c+il+": %.18f\n",in); + c++; + } + if (ol!=null) { + System.out.println(" "+ol+" "); + c = 0; + for (float in : output) { + System.out.format(c+ol+": %.18f\n",in); + c++; + } + } + } + + /** + * Compute the value, compare to the label, and accumulate predicted error. + * @param br the brown data reader. + * @param classifier the learner. + */ + static double computeHits (BrownReader br, Classifier classifier) { + int i = 0; + int bads = 0; + while (true) { + NeuralNetExample nne = (NeuralNetExample)br.next(); + if (nne == null) { + // done; + return (1.0f - ((double)bads/(double)i)) * 100f; + } else { + double value = classifier.realValue(nne); + double tru = nne.getOutputLabels()[0]; + double abserr = Math.abs(value - tru); + if (abserr > .25) { + bads++; + } + i++; + } + + } + } + + /** + * @param args + + public static void main(String[] args) { + parseArgs(args); + if (method == NN) { + // read the data to know how many input features there are. + BrownReader br = new BrownReader("data/brown/their-brown80.feat"); + + // first create the classifier and train it up. + NNBrownDataClassifier nn = new NNBrownDataClassifier(); + nn.setInputCount(br.inputCardinality); + nn.setHiddenCount(hiddenLayerSize); + nn.setOutputCount(1); + nn.setEpochs(epochs); + nn.setMomentum(momentum); + nn.setLearningRate(learningRate); + nn.forget(); + + int epochs = nn.getEpochs(); + long time = System.currentTimeMillis(); + // read training data. + try { + // train. + ArrayList trainingExamples = new ArrayList<>(); + while(true) { + Object o = br.next(); + trainingExamples.add(o); + if (o == null) + break; + nn.learn(o); + } + Random r = new Random(); + for(int i = 0 ; i < epochs-1; i++) { + for (int j = 0; j < trainingExamples.size(); j++) { + int oidx = r.nextInt(trainingExamples.size()); + Object o = trainingExamples.get(oidx); + if (o == null) + break; + nn.learn(o); + } + } + + } finally { + br.close(); + } + + // now we have a trained up model, let's test it. + br = new BrownReader("data/brown/their-brown20.feat",br.inputs[0].length, br.outputCardinality); + double accuracy = computeHits(br, nn); + double seconds = ((System.currentTimeMillis() - time)/1000.0); + + // epochs, rate, momentum, hiddens, accuracy, time + System.out.format("%d,%.2f,%.2f,%d,%.4f,%.4f\n",epochs,learningRate,momentum,hiddenLayerSize,accuracy,seconds); + } else { + + // first create the classifier and train it up. + SGDBrownDataClassifier sdg = new SGDBrownDataClassifier(); + sdg.forget(); + Learner.Parameters p = sdg.getParameters(); + p.rounds = epochs; + + System.out.println("Reading data SGD"); + BrownReader br = new BrownReader("data/brown/their-brown80.feat"); + ArrayList trainingExamples = new ArrayList<>(); + while(true) { + Object o = br.next(); + trainingExamples.add(o); + if (o == null) + break; + sdg.learn(o); + } + System.out.println("Training SGD"); + Random r = new Random(); + for(int i = 0 ; i < p.rounds-1; i++) { + for (int j = 0; j < trainingExamples.size(); j++) { + int oidx = r.nextInt(trainingExamples.size()); + Object o = trainingExamples.get(oidx); + if (o == null) + break; + sdg.learn(o); + } + } + System.out.println("Training up done."); + + // now we have a trained up model, let's test it. + br = new BrownReader("data/brown/their-brown20.feat",br.inputs[0].length, br.inputs.length); + computeHits(br, sdg); + } + }*/ +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetExample.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetExample.java new file mode 100644 index 00000000..065e0659 --- /dev/null +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetExample.java @@ -0,0 +1,48 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet; + +/** + * Data container for LBJava. + * @author redman + */ +public class NeuralNetExample { + + /** the inputs. */ + public float[] inputs; + + /** the labeled data. */ + public float[] outputs; + + /** + * create with inputs and outputs. + * + * @param ins + * @param outs + */ + NeuralNetExample(float[] ins, float [] outs) { + this.inputs = ins; + this.outputs = outs; + } + + /** + * Get the input features. + * @return input features. + */ + public float[] getInputFeatures() { + return inputs; + } + + /** + * @return the output features(truth data). + */ + public float[] getOutputLabels() { + return outputs; + } + +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetwork.java b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetwork.java new file mode 100644 index 00000000..dc5caa2d --- /dev/null +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/NeuralNetwork.java @@ -0,0 +1,210 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet; + +import java.io.IOException; +import java.util.Random; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.Activator; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.Layer; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.NNTrainingInterface; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.SimpleNNTrainer; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer; + +/** + * This class will manage a neural network, it will train it up if necessary, create + * and manage all the layers and nodes internally, and respond to activations. + * @author redman + */ +public class NeuralNetwork implements Activator { + + /** debug flag. */ + static final boolean debug = false; + + /** the layers of the neural network. */ + private Layer[] layers; + + /** scales the weight deltas for each iteration. */ + static private float learningRate = .3f; + + /** this prevents local minimum capture. */ + static private float momentum = .6f; + + /** this prevents local minimum capture. */ + static private int hiddenLayerSize = 20; + + /** this prevents local minimum capture. */ + static private int epochs = 100; + + /** this prevents local minimum capture. */ + static private int threads = 1; + + /** + * parse the arguments. + * @param args the command arguments. + */ + static private void parseArgs(String[] args) { + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-l")) + learningRate = Float.parseFloat(args[++i]); + else if (args[i].equals("-m")) + momentum = Float.parseFloat(args[++i]); + else if (args[i].equals("-e")) + epochs = Integer.parseInt(args[++i]); + else if (args[i].equals("-t")) + threads = Integer.parseInt(args[++i]); + else if (args[i].equals("-h")) + hiddenLayerSize = Integer.parseInt(args[++i]); + else if (args[i].equals("-help")) { + System.out.println("-t the number of threads to deploy.\n-l the learning rate.\n-m momentum.\n-e number of epochs.\n-h hidden layer size."); + System.exit(0); + } else + System.out.println("Unexpected argument : "+args[i]); + } + } + /** + * Given the number of input layers and outputs, and the sizes of all layers, + * set up an untrained neural net. + * @param layerSizes the number of neurons in each layer, also corresponds to the number of outputs of that layer. + * @param learningRate the learning rage. + * @param momentum the momentum. + */ + NeuralNetwork(int[] layerSizes) { + layers = new Layer[layerSizes.length-1]; + + // each layer has a number of inputs defined by the outputs of the previous layer, or + // the number inputs passed in, outputs is the number of neurons in the layer since each + // neuron produces one output. + Random r = new Random (1234); + for (int i = 0; i < layerSizes.length-1; i++) { + this.layers[i] = new Layer(layerSizes[i], layerSizes[i+1], r); + } + } + + /** + * @see edu.illinois.cs.cogcomp.lbjava.neuralnet.Activator#activateLayers(float[], edu.illinois.cs.cogcomp.lbjava.neuralnet.Layer[]) + */ + @Override + public float[] prediction(float[] inputs) { + // set up our counts. + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = new float[layerCount][]; + + // This array contains inputs from previous layer + float [] currentinputs = inputs; + for (int i = 0 ; i < layerCount ; i++) { + + // compute the activations for this layer. + Layer layer = layers[i]; + activations[i] = layer.activate(currentinputs); + currentinputs = activations[i]; + } + return activations[layerCount-1]; + } + + /** + * Train up the NN model given training data, a learner algorith, + * and convergence criteria. + * @param inputs the input data. + * @param outputs the desired output. + * @param learner the learning algorithm. + * @param epochs number of iterations to run. + * @param converge the convergence criteria. + */ + public void train(float[][] inputs, float[][]outputs, NNTrainingInterface learner, int epochs) { + if (inputs.length != outputs.length) + throw new RuntimeException("There must be the same number of input data records and output data records to train."); + learner.train(inputs, outputs, epochs); + } + + /** + * Test will try learning an XOR model. + * @param args + * @throws IOException + */ + public static void main(String[] args) throws IOException { + parseArgs(args); + float[][] examples = null; + float[][] outputs = null; + float[][] texamples = null; + float[][] toutputs = null; + int [] hls = null; + int outputrange = 0; + if (args.length != 0) { + int [] thls = {28*28, hiddenLayerSize, 1}; + hls = thls; + System.out.println("reading data from disk."); + /*examples = DatasetReader.getExampleInputs("./data/NIST/train-images-idx3-ubyte"); + outputs = DatasetReader.getExampleOutputs("./data/NIST/train-labels-idx1-ubyte"); + texamples = DatasetReader.getExampleInputs("./data/NIST/t10k-images-idx3-ubyte"); + toutputs = DatasetReader.getExampleOutputs("./data/NIST/t10k-labels-idx1-ubyte"); + */ + BrownReader br = new BrownReader("data/brown/their-brown80.feat"); + examples = br.inputs; + outputs = br.outputs; + outputrange = br.outputCardinality; + br = new BrownReader("data/brown/their-brown20.feat", examples[0].length, br.outputCardinality); + texamples = br.inputs; + toutputs = br.outputs; + thls[0] = examples[0].length; + thls[2] = outputs[0].length; + } else { + int [] thls = {2, 2, 1}; + hls = thls; + examples = new float[][] { new float[] { 0, 0 }, new float[] { 0, 1 }, new float[] { 1, 0 }, new float[] { 1, 1 } }; + outputs = new float[][] { new float[] { 0 }, new float[] { 1 }, new float[] { 1 }, new float[] { 0 } }; + texamples = new float[][] { new float[] { 0, 0 }, new float[] { 0, 1 }, new float[] { 1, 0 }, new float[] { 1, 1 } }; + toutputs = new float[][] { new float[] { 0 }, new float[] { 1 }, new float[] { 1 }, new float[] { 0 } }; + } + int good = 0; + { + System.out.println("Start run: epochs="+epochs+" lr="+learningRate+" mom="+momentum+" hidden="+hiddenLayerSize+" threads:"+threads); + NeuralNetwork nn = new NeuralNetwork(hls); + NNTrainingInterface learner = null; + if (threads <= 1) { + learner = new SimpleNNTrainer(nn.layers, learningRate, momentum); + } else { + learner = new ThreadedNNTrainer(nn.layers, learningRate, momentum); + } + long time = System.currentTimeMillis(); + learner.train(examples, outputs, epochs); + time = (System.currentTimeMillis() - time)/1000l; + System.out.format("Took %d to train up a simple model, on to testing.\n",time); + System.out.println("\nCompute accuracy against training"); + + // provide some output now. + for (int inputIdx = 0; inputIdx < examples.length; inputIdx++) { + float[] outs = nn.prediction(examples[inputIdx]); + float pred = outs[0]*outputrange; + float label = outputs[inputIdx][0]*outputrange; + if (Math.round(pred) == Math.round(label)) { + good++; + } + } + System.out.format("Of %d, %d were good, accuracy %.4f",examples.length, good, ((float)good/(float)examples.length)); + good = 0; + System.out.println("\nCompute accuracy against hold out set."); + + // provide some output now. + for (int inputIdx = 0; inputIdx < texamples.length; inputIdx++) { + float[] outs = nn.prediction(texamples[inputIdx]); + float pred = outs[0]*outputrange; + float label = toutputs[inputIdx][0]*outputrange; + if (Math.round(pred) == Math.round(label)) { + System.out.format("+ %d label %.10f pred %.10f\n", inputIdx,label,pred); + good++; + } else { + System.out.format("- %d label %.10f pred %.10f\n", inputIdx,label,pred); + } + } + System.out.format("Of %d, %d were good, accuracy %.4f",texamples.length, good, ((float)good/(float)texamples.length)); + } + } +} diff --git a/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/README.md b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/README.md new file mode 100755 index 00000000..5fc8dbe6 --- /dev/null +++ b/lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/README.md @@ -0,0 +1,13 @@ +--- +title: Badges +authors: Vivek Srikumar, Stephen Mayhew, Daniel Khashabi +lead: Classify a simple dataset of names +layout: page +--- + +This classifier does spelling correction, it uses data which can be found on the CogComp web site, +do a search for "Brown Corpus Data for Context Sensitive Spelling Correction" to find this data. The +data must be placed in the directory where you run the training process for this to work. + +Training can be done by simply running the NeuralNetwork class manually + diff --git a/lbjava-examples/src/main/lbj/NNBrownClassifier.lbj b/lbjava-examples/src/main/lbj/NNBrownClassifier.lbj new file mode 100644 index 00000000..49118805 --- /dev/null +++ b/lbjava-examples/src/main/lbj/NNBrownClassifier.lbj @@ -0,0 +1,39 @@ +package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.*; + +/** +The input features are simple the set of real values +that serve as input to the neural net. + */ +real[] NNInputVector(NeuralNetExample input) <- { + float[] datapoints = input.getInputFeatures(); + for (int i = 0; i < datapoints.length; i++) { + sense datapoints[i]; + } +} + +/** +The output vector contains the data outputs, for this class +just one floating point number. +*/ +real NNOutputVector(NeuralNetExample d) <- { + float[] datapoints = d.getOutputLabels(); + return datapoints[0]; +} + +/** +A learned text classifier; its definition comes from data. +*/ +real NNBrownDataClassifier(NeuralNetExample d) <- + learn NNOutputVector + using NNInputVector + from new BrownReader("data/brown/their-brown80.feat") + with NeuralNetLearner { + inputCount = 4000; + hiddenCount=100; + outputCount=1; + learningRate=.3f; + momentum=.7f; + } +end \ No newline at end of file diff --git a/lbjava-mvn-plugin/pom.xml b/lbjava-mvn-plugin/pom.xml index 5c63f289..71bfa199 100644 --- a/lbjava-mvn-plugin/pom.xml +++ b/lbjava-mvn-plugin/pom.xml @@ -5,7 +5,7 @@ lbjava-project edu.illinois.cs.cogcomp - 1.3.0 + 1.3.1 lbjava-maven-plugin @@ -76,7 +76,7 @@ edu.illinois.cs.cogcomp LBJava - 1.3.0 + 1.3.1 jar compile diff --git a/lbjava/pom.xml b/lbjava/pom.xml index a68e0e78..13a58757 100644 --- a/lbjava/pom.xml +++ b/lbjava/pom.xml @@ -3,7 +3,7 @@ lbjava-project edu.illinois.cs.cogcomp - 1.3.0 + 1.3.1 4.0.0 diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java new file mode 100644 index 00000000..e25c5b5d --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/NeuralNetLearner.java @@ -0,0 +1,447 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.learn; + +import java.io.PrintStream; +import java.util.Arrays; +import java.util.Random; + +import edu.illinois.cs.cogcomp.lbjava.classify.Feature; +import edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector; +import edu.illinois.cs.cogcomp.lbjava.classify.RealPrimitiveStringFeature; +import edu.illinois.cs.cogcomp.lbjava.classify.ScoreSet; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.Layer; +import edu.illinois.cs.cogcomp.lbjava.neuralnet.SimpleNNTrainer; + +/** + * This class will leverage the Neural Net implementation {@see edu.illinois.cs.cogcomp.lbjava.neuralnet.NeuralNetwork} + * to allow creation and use of a backprop neural net implementation including momentum, bias, and back propogation + * for learning. There is a threaded learner that works quite well ONLY where there are a large number of weights + * between layers. + * + * This class is really just a wrapper for a SimpleNNTrainer which does all the work of training. + * @author redman + */ +public class NeuralNetLearner extends Learner { + + /** computed */ + private static final long serialVersionUID = -3369861028861092661L; + + /** the parameters for learning and stuff. */ + private Parameters parameters = new Parameters(); + + /** This is the object that will train the neural net up. It uses it own + * interal mechanism and data representation for efficiency. */ + private SimpleNNTrainer trainer = null; + + /** + * our props include not only number of rounds (epochs), also a learning rate and momentum. + * @author redman + */ + public static class Parameters extends Learner.Parameters { + /** default */ + private static final long serialVersionUID = 1L; + + /** the learning rate. */ + public float learningRate = 0.5f; + + /** the momentum value. */ + public float momentum = 0.5f; + + /** the momentum value. */ + public int seed = -1; + + /** the number of inputs */ + public int inputCount = 0; + + /** the number of outputs */ + public int outputCount = 1; + + /** the number of outputs from the single hidden layer */ + public int hiddenCount = 100; + + /** the layers of the neural network. */ + private Layer[] layers; + + /** + * Copy properties from the provided properties. + * @param p the props to copy. + */ + public Parameters(Parameters p) { + this.learningRate = p.learningRate; + this.momentum = p.momentum; + this.seed = p.seed; + this.inputCount = p.inputCount; + this.outputCount = p.outputCount; + this.hiddenCount = p.hiddenCount; + } + /** + * Copy properties from the provided properties. + * @param p the props to copy. + */ + public Parameters() { + this.learningRate = 0.5f; + this.momentum = 0.5f; + this.seed = -1; + this.inputCount = 0; + this.hiddenCount = 100; + this.outputCount = 1; + } + + } + + /** used to store inputs so we don't realloc these arrays over and over. This is an optimization + * only possible because we know this guys is not multithreaded. */ + private float inputs[] = null; + + /** used to store inputs so we don't realloc these arrays over and over. This is an optimization + * only possible because we know this guys is not multithreaded. */ + private float outputs[] = null; + + /** number of neurons in each layer, including input and output layers.*/ + private int[] layerSizes = null; + + /** + * Init the neural network learner by providing array with number of neurons in each layer, including + * the input layer. The caller will need to determin the number of inputs, the number of outputs and the number + * of hidden layers, and the neurons in that layer. The first index in teh layerSizes indicates the number of inputs, + * the middle layers sizes are determined by the middle integer sizes, and the number of outputs is the last number + * of neurons. + * @param layerSizes the number of neurons in each layer. + */ + public NeuralNetLearner () { + super("Howdy"); + this.layerSizes = new int[3]; + } + + /** + * given arguments for initialization parameters. + * @param p the parameters. + */ + public NeuralNetLearner(Parameters p) { + super("Howdy"); + this.parameters = p; + } + + /** + * The learning rate takes the default value. + * @param n The name of the classifier. + */ + public NeuralNetLearner(String n) { + super(n); + } + + /** + * Init the neural network learner by providing array with number of neurons in each layer, including + * the input layer. The caller will need to determin the number of inputs, the number of outputs and the number + * of hidden layers, and the neurons in that layer. The first index in teh layerSizes indicates the number of inputs, + * the middle layers sizes are determined by the middle integer sizes, and the number of outputs is the last number + * of neurons. + * @param layerSizes the number of neurons in each layer. + */ + public NeuralNetLearner (int[] layerSizes, Parameters p, boolean training) { + super("Howdy"); + parameters = p; + parameters.layers = new Layer[layerSizes.length-1]; + this.layerSizes = layerSizes; + this.forget(); + } + + /** + * Resets the weight vector to all zeros. + */ + public void forget() { + super.forget(); + if (this.getInputCount() != -1) { + this.layerSizes = new int[3]; + this.layerSizes[0] = this.getInputCount(); + this.layerSizes[1] = this.getHiddenCount(); + this.layerSizes[2] = this.getOutputCount(); + parameters.layers = new Layer[layerSizes.length-1]; + Layer[] l = this.parameters.layers; + Random r = new Random (1234); + for (int i = 0; i < layerSizes.length-1; i++) { + l[i] = new Layer(layerSizes[i], layerSizes[i+1], r); + } + inputs = new float[l[0].getNumberInputs()]; + outputs = new float[l[l.length-1].getNumberOutputs()]; + trainer = new SimpleNNTrainer(parameters.layers, parameters.learningRate, parameters.momentum); + } + } + + /** + * Returns a string describing the output feature type of this classifier. + * @return "real" + **/ + public String getOutputType() { + return "real"; + } + /** + * Writes the learned function's internal representation in binary form. + * @param out The output stream. + + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeFloat(this.parameters.learningRate); + out.writeFloat(this.parameters.momentum); + out.writeInt(this.parameters.rounds); + if (this.layerSizes == null) + out.writeInt(0); + else { + out.writeInt(this.layerSizes.length); + for (int neurons : this.layerSizes) + out.writeInt(neurons); + for (Layer l : this.parameters.layers) { + l.write(out); + } + } + } + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * @param in The input stream. + + public void read(ExceptionlessInputStream in) { + super.read(in); + this.parameters.learningRate = in.readFloat(); + this.parameters.momentum = in.readFloat(); + this.parameters.rounds = in.readInt(); + int layers = in.readInt(); + if (layers != 0) { + int[] szs = new int[layers]; + for (int i = 0 ; i < szs.length; i++) + szs[i] = in.readInt(); + this.layerSizes = szs; + Random r = new Random (1234); + for (int i = 0; i < layerSizes.length-1; i++) { + this.parameters.layers[i] = new Layer(layerSizes[i], layerSizes[i+1], r); + } + trainer = new SimpleNNTrainer(parameters.layers, parameters.learningRate, parameters.momentum); + for (Layer l : this.parameters.layers) { + l.read(in); + } + } + } + + + /** + * Populate the input and output vectors with the values for only those + * features that are represented. + */ + final private void populateNNVector(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + Arrays.fill(inputs,0.0f); + Arrays.fill(outputs,0.0f); + for (int i = 0; i < exampleFeatures.length; i++) + inputs[exampleFeatures[i]] = (float)exampleValues[i]; + if (exampleLabels != null) + for (int i = 0; i < exampleLabels.length; i++) + outputs[exampleLabels[i]] = (float)labelValues[i]; + + } + + /** + * Trains the learning algorithm given an object as an example. + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's label(s). + * @param labelValues The labels' values. + **/ + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + this.populateNNVector(exampleFeatures, exampleValues, exampleLabels, labelValues); + this.trainer.train(inputs, outputs); + } + + /** + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return null + **/ + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + return null; + } + + /** + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * @param f The features array. + * @param v The values array. + * @return The classification of the example as a feature. + **/ + public Feature featureValue(int[] f, double[] v) { + this.populateNNVector(f, v, null, null); + + // this returns the activation energies for ALL layers, we only wan the output layer + float[][] results = this.trainer.activate(inputs); + + // the last vector contains the score, this is the output of the last layer. + return new RealPrimitiveStringFeature(containingPackage, name, "", results [results.length-1][0]); + } + + /** + * Simply computes the dot product of the weight vector and the example + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed real value. + **/ + public double realValue(int[] exampleFeatures, double[] exampleValues) { + this.populateNNVector(exampleFeatures, exampleValues, null, null); + return (double) this.trainer.activate(inputs)[0][0]; + } + + /** + * Simply computes the dot product of the weight vector and the feature vector extracted from + * the example object. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed feature (in a vector). + **/ + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); + } + + /** + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link #learningRate} and {@link #bias}. + * @param out The output stream. + */ + public void write(PrintStream out) { + out.println(name + ": " + this.parameters.learningRate + ", " + this.parameters.momentum + ", " + this.parameters.rounds); + for (Layer l : this.parameters.layers) { + l.write(out); + } + } + + /** + * Returns a deep clone of this learning algorithm. + * TODO + */ + public Object clone() { + NeuralNetLearner clone = null; + try { + clone = (NeuralNetLearner) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning StochasticGradientDescent: " + e); + System.exit(1); + } + return clone; + } + + /** + * @return the seed to seed all random number gen. + */ + public int getSeed() { + return this.parameters.seed; + } + + /** + * @param seed the seed to set + */ + public void setSeed(int seed) { + this.parameters.seed = seed; + } + + /** + * @return the number of total inputs + */ + public int getInputCount() { + return this.parameters.inputCount; + } + + /** + * @param inputCount the inputCount to set + */ + public void setInputCount(int inputCount) { + this.parameters.inputCount = inputCount; + } + + /** + * @return the outputCount + */ + public int getOutputCount() { + return this.parameters.outputCount; + } + + /** + * @param outputCount the outputCount to set + */ + public void setOutputCount(int outputCount) { + this.parameters.outputCount = outputCount; + } + + /** + * @return the hiddenCount + */ + public int getHiddenCount() { + return this.parameters.hiddenCount; + } + + /** + * @param hiddenCount the hiddenCount to set + */ + public void setHiddenCount(int hiddenCount) { + this.parameters.hiddenCount = hiddenCount; + } + + /** + * @return the learning rate used to throttle the rate at wich the weight parameters change. + */ + public float getLearningRate() { + return parameters.learningRate; + } + + /** + * set the learning rate at which the weight parameters change. + * @param learningRate the learning rate at which the weight parameters change. + */ + public void setLearningRate(float learningRate) { + this.parameters.learningRate = learningRate; + } + + public float getMomentum() { + return parameters.momentum; + } + + /** + * set the value used to prevent convergence against local minimum. + * @param momentum used to prevent convergence against local minimum. + */ + public void setMomentum(float momentum) { + this.parameters.momentum = momentum; + } + + /** + * Get the number of epochs. + * @return number of epochs to train. + */ + public int getEpochs() { + return parameters.rounds; + } + + /** + * set the number of training iterations. More should yield better results, until overfit. + * @param learningRate set the number of training iterations. + */ + public void setEpochs(int epochs) { + this.parameters.rounds = epochs; + } + + /** + * Retrieves the parameters that are set in this learner. + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + return parameters; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java new file mode 100644 index 00000000..20929909 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java @@ -0,0 +1,71 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range; + +/** + * This thread will compute a single activtion value, for each layer + * setup must be called to provide the output array, the layer and the + * input values. + * @author redman + */ +class ActThread extends PushThread { + + /** the input data. */ + float[] currentInputs = null; + + /** the layer we are operating on. */ + Layer layer = null; + + /** the resulting outputs are stored here, this array is shared + * by all threads activating on this layer. */ + float [] layerActivations = null; + + /** used to make the name of the thread unique. */ + private static int inc = 0; + + /** + * init with a mux. + * @param m the multiplexer. + */ + ActThread() { + super("ActThread-"+(inc++)); + } + + /** + * before we start a layer, this is called to set up the thread. + * @param ci the input data. + * @param l the layer. + * @param la the layer actvation values. + * @param mux the multiplexer. + */ + void setup(float[] ci, Layer l, float[] la) { + this.currentInputs = ci; + this.layer = l; + this.layerActivations = la; + } + + /** + * Run forever never quite. + */ + public void run() { + synchronized (this) { + while(true) { + + // wait for the range object to be set. + Range r = this.getRange(); + if (r == null) + return; + for (int indx = r.start; indx < r.end; indx++) { + layerActivations[indx] = layer.computeOneOutput(indx, currentInputs); + } + } + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java new file mode 100644 index 00000000..74b25f0a --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java @@ -0,0 +1,25 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * Implementations will activate all the layers of the net and + * produce a set of outputs. The one required method will return + * all the output values. + * @author redman + */ +public interface Activator { + + /** + * Activate the provided layer, return the resulting outputs. + * @param inputs the input data. + * @param layer the layer to supply the inputs to. + * @return the output values. + */ + public float[] prediction(float[] inputs); +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java new file mode 100644 index 00000000..8237a18b --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java @@ -0,0 +1,27 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * Measure convergence, simplest implementation may simply run some number of epochs, + * more sophosticated will look some function of cumulative error going to zero at the + * end of an epoch. Conversion is always measured at the end of a training cycle. + * @author redman + */ +public interface ConvergenceMeasure { + + /** + * With the given inputs and outputs, evaluate the results of the last iteration, + * determine the error, probably store that, and if convergence (what whatever measure) + * is achieved, return true, else return false. + * + * @param learner the learner being used to train up the neural net, contains the cummulative error. + * @return true if converged. + */ + public boolean evaluate(NNTrainingInterface learner); +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java new file mode 100644 index 00000000..acd54807 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java @@ -0,0 +1,98 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.InputStream; +import java.io.FileInputStream; +import java.io.IOException; + +/** + * @author redman + * + */ +public class DatasetReader { + + /** + * flip the byte order. + * @param is input stream. + * @return the integer. + * @throws IOException + */ + private static int readInt(InputStream is) throws IOException { + int i0 = is.read(); + int i1 = is.read(); + int i2 = is.read(); + int i3 = is.read(); + return (i0<<24) + (i1<<16) + (i2<<8) + i3; + } + + /** + * get the examples form an NIST dataset, return everything at once. There are + * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = + * 47 million floats. These are input examples, so they are image data. + * @param filename + * @return the input examples. + * @throws IOException + */ + public static float[][] getExampleInputs(String filename) throws IOException { + InputStream dis = new BufferedInputStream(new FileInputStream(new File(filename))); + int m1 = readInt(dis); + if (m1 != 2051) + throw new IOException("That was not an example file! magic code = "+m1); + int numExamples = readInt(dis); + if (numExamples != 60000) + System.out.println("We expecting 60k examples "+m1); + int numRows = readInt(dis); + if (numRows != 28) + System.out.println("We expecting 28 rows "+numRows); + int numColumns = readInt(dis); + if (numColumns != 28) + System.out.println("We expecting 28 columns "+numColumns); + int totalpixels = numRows*numColumns; + float [][] examples = new float [numExamples][totalpixels]; + for (int i = 0 ; i < examples.length; i++) { + for (int j = 0; j < totalpixels; j++) { + examples[i][j] = (float)(dis.read()/128f) - 1f; + } + } + return examples; + } + + /** + * get the examples form an NIST dataset, return everything at once. There are + * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = + * 47 million floats. These are input examples, so they are image data. + * @param filename + * @return the output examples. + * @throws IOException + */ + public static float[][] getExampleOutputs(String filename) throws IOException { + InputStream dis = new BufferedInputStream(new FileInputStream(new File(filename))); + int m1 = readInt(dis); + if (m1 != 2049) + throw new IOException("That was not an example file! magic code = "+m1); + int numExamples = readInt(dis); + float [][] examples = new float [numExamples][1]; + for (int i = 0 ; i < numExamples; i++) { + examples[i][0] = (float)(dis.read()/5f) - 1f; + } + return examples; + } + + /** + * @param a + * @throws IOException + */ + @SuppressWarnings("unused") + public static void main(String[]a) throws IOException { + float[][] examples = getExampleInputs("/Users/redman/Desktop/NNTrainingData/train-images-idx3-ubyte"); + float[][] labels = getExampleOutputs("/Users/redman/Desktop/NNTrainingData/train-labels-idx1-ubyte"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java new file mode 100644 index 00000000..a794cf6a --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java @@ -0,0 +1,42 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * Terminate agfter so many iterations. + * @author redman + */ +public class EpochConvergenceMeasure implements ConvergenceMeasure { + + /** the current epoch count. */ + private int epoch = 0; + + /** the current epoch count. */ + private int max; + + /** + * Takes the number of iterations. + * @param m the max iterations. + */ + public EpochConvergenceMeasure(int m) { + this.max = m; + } + + /** + * @see edu.illinois.cs.cogcomp.lbjava.neuralnet.ConvergenceMeasure#evaluate(edu.illinois.cs.cogcomp.lbjava.neuralnet.NNTrainingInterface) + */ + @Override + public boolean evaluate(NNTrainingInterface learner) { + epoch++; + if (epoch > max) { + return true; + } else + return false; + } + +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java new file mode 100644 index 00000000..29adfeb0 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java @@ -0,0 +1,277 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.io.PrintStream; +import java.util.Random; + +/** + * This is a layer in a neural net. it is characterized by a number of inputs + * and a number of outputs. The neurons (perceptrons) are hidden within, I see + * no reason to expose them. this allows the layer class to do all computations + * across the entire layer in one pass, which is very efficient. Downside; Nothing + * in this implementation will allow you to assign per neuron attributes. Also, the + * weights are represented by a primitive array, so only 32 bit indices meaning no + * more than 2 ^ 32 weights are allowed. + * @author redman + */ +public class Layer { + + /** number of inputs to this layer. */ + private int numberInputs; + + /** the number of outputs from this layer. */ + private int numberOutputs; + + /** the neuron weights. */ + private float[] weights; + + /** the derived outputs. */ + private float[] dweights; + + /** collects output values. */ + private float[] outputs; + + /** + * The layer constructed. + * @param numIn the number of inputs. + * @param numOut the number of outputs. + */ + public Layer(int numIn, int numOut) { + this(numIn, numOut, new Random()); + outputs = new float[numOut]; + } + + /** + * The layer constructed. + * @param numIn the number of inputs. + * @param numOut the number of outputs. + * @param r the random num generator. + */ + public Layer(int numIn, int numOut, Random r) { + this.numberInputs = numIn; + this.numberOutputs = numOut; + int wl = (numIn+1)*numOut; + weights = new float[wl]; + dweights = new float[wl]; + for (int i = 0; i < wl; i++) + weights [i] = (r.nextFloat() - 0.5f) * 4f; + outputs = new float[numOut]; + } + + /** + * Compute the sigmoid first derivative. + * @param x the input value + * @return the sigmoid + */ + final private float sigmoid(float x) { + return (float) (1.0 / (1.0 + Math.exp(-x))); + } + + /** + * @return the weights + */ + public float[] getWeights() { + return weights; + } + + /** + * @param weights the weights to set + */ + public void setWeights(float[] weights) { + this.weights = weights; + } + + /** + * @return the numberInputs + */ + public int getNumberInputs() { + return numberInputs; + } + + /** + * @param numberInputs the numberInputs to set + */ + public void setNumberInputs(int numberInputs) { + this.numberInputs = numberInputs; + } + + /** + * @return the numberOutputs + */ + public int getNumberOutputs() { + return numberOutputs; + } + + /** + * @param numberOutputs the numberOutputs to set + */ + public void setNumberOutputs(int numberOutputs) { + this.numberOutputs = numberOutputs; + } + + /** + * This granularity of method invocation is only necessary so parallelize + * the process. + * @param index the index of the input to compute the output for. + * @param inputs the inputs. + * @return the activation output. + */ + final float computeOneOutput(int index, float[] inputs) { + float result = 0.0f; + int nI = this.numberInputs; + int start = index * (nI+1); + for (int k = 0 ; k < nI ; k++) { + result += weights[start+k] * inputs[k]; + } + result += weights[start+nI]; + return (float) sigmoid(result); + } + + /** + * Given a set of inputs, produce the set of activation + * values. + * @param inputs the inputs to produce the predictions for. + * @return the set of predictions. + */ + final public float[] activate(float[] inputs) { + int nO = this.numberOutputs; + float[] o = this.outputs; + for (int j = 0 ; j < nO ; j++) { + o[j] = this.computeOneOutput(j, inputs); + } + return outputs; + } + + /** + * train up weights for just one output. Thread safety must be noted here, since everybody will be + * updating the nextError array at the same time. To avoid doing repeated synchronizations which are + * expensive here, for multithreaded trainer, we pass in a dummy error array, update at will, then + * the caller is responsible for synchronizing on the real one and updating the shared sum error array. + * @param error the activation errors used to compute the backprop value. + * @param input the input date. + * @param output the computed output data. + * @param learningRate the learning rate. + * @param momentum the momentum + * @param nextError the array where the error values will be updated + * @param outIndex the output index; + */ + final public void trainOne(float[] error, float[] input, float[] output, float learningRate, float momentum, float[] nextError, int outIndex) { + int woffset = (this.numberInputs+1) * outIndex; + float d = error[outIndex] * (output[outIndex] * (1 - output[outIndex])); + for (int j = 0; j < this.numberInputs; j++) { + int windx = woffset + j; + nextError[j] += weights[windx] * d; + float dw = input[j] * d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + + // compute the error for the bias, the fake bias input is always 1. + int windx = woffset + input.length; + nextError[input.length] += weights[windx] * d; + float dw = d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + + /** + * given a set of errors (errors from the next layer on), and adjust the weights + * to do a gradient descent. + * @param error the output errors. + * @param input the input data. + * @param output the desired output. + * @param learningRate the rate of learning. + * @param momentum helps to avoid local minima. + * @return the errors from this layer. + */ + final public float[] train(float[] error, float[] input, float[] output, float learningRate, float momentum) { + int nI = this.numberInputs+1/*for the bias*/; + float[] nextError = new float[nI]; + for (int i = 0; i < this.numberOutputs; i++) { + //this.trainOne(error, input, output, learningRate, momentum, nextError, i); + + int woffset = nI * i; + float d = error[i] * (output[i] * (1 - output[i])); + for (int j = 0; j < this.numberInputs; j++) { + int windx = woffset + j; + nextError[j] += weights[windx] * d; + float dw = input[j] * d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + + // compute the error for the bias, the fake bias input is always 1. + int windx = woffset + input.length; + nextError[input.length] += weights[windx] * d; + float dw = d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + return nextError; + } + + /** + * print out the weights. + */ + public void print() { + System.out.print(this.numberInputs+":"+this.numberOutputs); + System.out.print(" "); + for (float w : weights) { + System.out.format(" %.8f",w); + } + System.out.print(" ("); + for (float w : dweights) { + System.out.format(" %.8f",w); + } + System.out.println(")"); + } + + /** + * @return the dweights + */ + public float[] getDweights() { + return dweights; + } + + /** + * @param dweights the dweights to set + */ + public void setDweights(float[] dweights) { + this.dweights = dweights; + } + + /** + * used for reporting mostely. + */ + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("in : "+this.numberInputs+" out : "+this.numberOutputs); + sb.append("\n"); + for (int i = 0; i < weights.length;) { + for (int j = 0; j < this.numberInputs;j++,i++) { + sb.append(" "+weights[i]); + sb.append(" "); + } + sb.append("\n"); + } + return sb.toString(); + } + + /** + * Write the representation to a digital output stream. + * @param out the output stream for serialization. + */ + public void write(PrintStream out) { + out.print(numberInputs); + out.print(numberOutputs); + out.print(weights.length); + for (int i = 0; i < weights.length; ++i) + out.print(weights[i]); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java new file mode 100644 index 00000000..d3568330 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java @@ -0,0 +1,96 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.util.Arrays; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range; + +/** + * This thread will compute a single activtion value, for each layer + * setup must be called to provide the output array, the layer and the + * input values. + * @author redman + */ +class LearnerThread extends PushThread { + + /** the input error from the next layer being back propogated. */ + float[] error = null; + + /** the input labeled data. */ + float[] input = null; + + /** the input data. */ + float[] output = null; + + /** the result error SHARED ACROSS THREADS, must be synced to update. */ + float [] nextError; + + /** the space where updates to the errors will be set, later used to update nextError.*/ + float [] errorWorkspace; + + /** the learning rate. */ + float learnRate; + + /** the momentum. */ + float momentum; + + /** the layer we are operating on. */ + Layer layer = null;; + + /** the unique id. */ + private static int inc = 0; + + /** + * The learning rate and momentum will not change, so we will take them initially. + * @param lR the learning rate. + * @param m the momentum. + * @param mux the multiplexer. + */ + LearnerThread(float lR, float m) { + super("LearnerThread-"+(inc++)); + this.learnRate = lR; + this.momentum = m; + } + + /** + * before we start a layer, this is called to set up the thread. + * @param error the error from the next layer, used to calc this layers error. + * @param input the input data. + * @param output the result data. + * @param nextError put the next layers input error here. + * @param layer the layer we operate on. + */ + void setup(float [] error, float [] input, float [] output, float[] nextError, Layer layer) { + this.error = error; + this.input = input; + this.output = output; + this.nextError = nextError; + this.layer = layer; + this.errorWorkspace = new float[nextError.length]; + Arrays.fill(this.errorWorkspace, 0); + } + + /** + * Run till we complete the layer, then finish up. + */ + public void run() { + synchronized (this) { + while(true) { + + // wait for the range object to be set. + Range r = this.getRange(); + if (r == null) + return; + for (int indx = r.start; indx < r.end; indx++) { + layer.trainOne(error, input, output, learnRate, momentum, errorWorkspace, indx); + } + } + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java new file mode 100644 index 00000000..84e29922 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java @@ -0,0 +1,23 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * @author redman + */ +public interface NNTrainingInterface { + + /** + * Given a set of examples, and a set of desired outputs, train the network + * represented by the provided network layers the provided number of epochs. + * @param inputs the input data to train against. + * @param outputs the desired outputs. + * @param epochs the number of training iterations to run. + */ + public void train(float[][] inputs, float[][]outputs, int epochs); +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java new file mode 100644 index 00000000..9dada667 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java @@ -0,0 +1,84 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range; + +/** + * Threads will operate on a range, this superclass contains that + * range and handles atomic synchronized access. + * @author redman + */ +public class PushThread extends Thread { + + /** the range to operate on. */ + protected Range range = null; + + /** set when this thread is waiting for input. */ + private boolean idle = false; + /** + * the push thread takes the name ofthe thread, to pass to + * the super. + * @param name the name of the thread. + */ + PushThread(String name) { + super(name); + } + + /** + * set the range of things to operate on. + * @param range + */ + synchronized void setRange(Range range) { + this.range = range; + this.notifyAll(); + } + + /** + * call this when we are done. + */ + synchronized void done() { + this.range = null; + this.interrupt(); + } + + /** + * wait for the thread to complete it's run, it will set + * poised and block till it gets data. + */ + final synchronized public void waitIdle() { + while(!idle || range != null) + try { + this.wait(); + } catch (InterruptedException e) { + } + } + + /** + * wait for the next range. + * @return the range. + */ + final synchronized protected Range getRange() { + while (range == null) + try { + this.idle = true; + this.notify(); // somebody waiting for completion? + this.wait(); + } catch (InterruptedException e) { + if (this.isInterrupted()) { + System.out.println("Interrupted error."); + return null; + } + } + Range r = range; + range = null; + this.idle = false; + return r; + } + +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java new file mode 100644 index 00000000..951c3144 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java @@ -0,0 +1,150 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.util.Random; + +/** + * This class will simply learn up the NeuralNet layers, single threaded. + * @author redman + */ +public class SimpleNNTrainer implements NNTrainingInterface { + + /** the layers of the neural network. */ + private Layer[] layers; + + /** scales the weight deltas for each iteration. */ + private float learningRate = .3f; + + /** this prevents local minimum capture. */ + private float momentum = .6f; + + /** + * Need the layer data, learning rate and momentum. + * @param l the layers of the neural net. + * @param rate the learning rate. + * @param mom the momentum. + */ + public SimpleNNTrainer(Layer[] l, float rate, float mom) { + this.layers = l; + this.learningRate = rate; + this.momentum = mom; + } + + /** + * given an input set of example, compute the output values, also return all the + * activation values in between, return them all. The results will be in the last + * vector in the returned array. + * @param inputs the inputs. + * @return the activation energies from all layers/ + */ + public final float [] classify(float[] inputs) { + + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = new float[layerCount][]; + + // This array contains inputs from previous layer + float [] currentinputs = inputs; + for (int i = 0 ; i < layerCount ; i++) { + + // compute the activations for this layer. + Layer layer = layers[i]; + activations[i] = layer.activate(currentinputs); + currentinputs = activations[i]; + } + return activations[layerCount-1]; + } + + /** + * given an input set of example, compute the output values, also return all the + * activation values in between, return them all. The results will be in the last + * vector in the returned array. + * @param inputs the inputs. + * @return the activation energies from all layers/ + */ + public final float [][] activate(float[] inputs) { + + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = new float[layerCount][]; + + // This array contains inputs from previous layer + float [] currentinputs = inputs; + for (int i = 0 ; i < layerCount ; i++) { + + // compute the activations for this layer. + Layer layer = layers[i]; + activations[i] = layer.activate(currentinputs); + currentinputs = activations[i]; + } + return activations; + } + + /** + * Train with one example. + * @param inputs input data. + * @param outputs the labeled data. + * @param epochs + */ + public void train(float[] inputs, float[]outputs) { + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = this.activate(inputs); + + // now we have all the activations. + float[] calcOut = activations[activations.length-1]; + int errlen = calcOut.length; + float [] error = new float[errlen]; + for (int i = 0; i < errlen; i++) { + error[i] = outputs[i] - calcOut[i]; // negative error + } + for (int i = layers.length - 1; i > 0; i--) { + error = layers[i].train(error,activations[i-1],activations[i], this.learningRate, this.momentum); + } + error = layers[0].train(error,inputs, activations[0], this.learningRate, this.momentum); + } + /** + * Execute the given number of epochs, then exit whatever the error. + * @param inputs the input examples. + * @param outputs the labels. + * @param layers + */ + @Override + public void train(float[][] inputs, float[][]outputs, int epochs) { + + // error checking. + if (inputs.length != outputs.length) + throw new RuntimeException("There must be the same number of input data records and output data records."); + int totalInputs = inputs.length; + + // set up our counts. + int layerCount = layers.length; + Random r = new Random(34565); + for (int epoch = 0; epoch < epochs; epoch++) { + for (int inindx = 0; inindx < totalInputs; inindx++) { + int iI = r.nextInt(totalInputs); + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = this.activate(inputs[iI]); + + // now we have all the activations. + float[] calcOut = activations[layerCount-1]; + int errlen = calcOut.length; + float [] error = new float[errlen]; + for (int i = 0; i < errlen; i++) { + error[i] = outputs[iI][i] - calcOut[i]; // negative error + } + for (int i = layers.length - 1; i > 0; i--) { + error = layers[i].train(error,activations[i-1],activations[i], this.learningRate, this.momentum); + } + error = layers[0].train(error,inputs[iI],activations[0], this.learningRate, this.momentum); + } + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java new file mode 100644 index 00000000..01980fb2 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java @@ -0,0 +1,338 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.util.Arrays; +import java.util.Random; + +/** + * This class will simply learn up the NeuralNet layers, single threaded. + * @author redman + */ +public class ThreadedNNTrainer implements NNTrainingInterface { + + /** the layers of the neural network. */ + private Layer[] layers; + + /** scales the weight deltas for each iteration. */ + private float learningRate = .3f; + + /** this prevents local minimum capture. */ + private float momentum = .6f; + + /** this is the number of threads we will use, by default, number of processors on the machine. */ + private int numThreads = Runtime.getRuntime().availableProcessors(); + + /** + * Need the layer data, learning rate and momentum. + * @param l the layers of the neural net. + * @param rate the learning rate. + * @param mom the momentum. + */ + public ThreadedNNTrainer(Layer[] l, float rate, float mom) { + this.layers = l; + this.learningRate = rate; + this.momentum = mom; + } + + /** + * Need the layer data, learning rate and momentum. + * @param l the layers of the neural net. + * @param rate the learning rate. + * @param mom the momentum. + * @param numThreads number of threads to deploy. + */ + public ThreadedNNTrainer(Layer[] l, float rate, float mom, int numThreads) { + this.layers = l; + this.learningRate = rate; + this.momentum = mom; + this.numThreads = numThreads; + } + + /** + * given an input set of example, compute the output values, also return all the + * activation values in between, return them all. The results will be in the last + * vector in the returned array. + * @param inputs the inputs. + * @return the activation energies from all layers/ + */ + public final float [][] activate(float[] inputs) { + + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = new float[layerCount][]; + + // This array contains inputs from previous layer + float [] currentinputs = inputs; + for (int i = 0 ; i < layerCount ; i++) { + + // compute the activations for this layer. + Layer layer = layers[i]; + activations[i] = layer.activate(currentinputs); + currentinputs = activations[i]; + } + return activations; + } + + /** + * Execute the given number of epochs, then exit whatever the error. + * @param inputs the input examples. + * @param outputs the labels. + * @param epochs the number of iterations to perform. + */ + @Override + final public void train(float[][] inputs, float[][] outputs, int epochs) { + // error checking. + if (inputs.length != outputs.length) + throw new RuntimeException("There must be the same number of input data records and output data records."); + + // iterate this number of times. + int numExamples = inputs.length; + + // For each layer, compute the ranges of indices to operate on. This will allow us to + // continue computing on a thread without handshakes. + int ll = layers.length; + Range[][] ranges = new Range[ll][]; + for (int i = 0; i < ll ; i++) { + Layer l = layers[i]; + int no = l.getNumberOutputs(); + int increment = no / numThreads; + int onsies; + if (increment == 0) { + onsies = no; + ranges[i] = new Range[onsies]; + } else { + onsies = no % numThreads; + ranges[i] = new Range[numThreads]; + } + int start = 0; + for (int j = 0 ; j < ranges[i].length && start < no; j++) { + int end = start + increment; + if (onsies != 0) { + end++; + onsies--; + } + ranges[i][j] = new Range(start, end); + start = end; + } + } + + // create the threads to run against the activation mux. + ActThread[] actThreads = new ActThread[numThreads]; + for (int i = 0; i < numThreads; i++) { + actThreads[i] = new ActThread(); + actThreads[i].start(); + } + + // create the threads to run against the activation mux. + LearnerThread[] learnerThreads = new LearnerThread[numThreads]; + for (int i = 0; i < numThreads; i++) { + learnerThreads[i] = new LearnerThread(this.learningRate, this.momentum); + learnerThreads[i].start(); + } + + // set up our counts. + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float[][] activations = new float[layerCount][]; + for (int i = 0; i < layerCount; i++) { + activations[i] = new float[layers[i].getNumberOutputs()]; + } + + Thread.yield(); + Thread.yield(); + Thread.yield(); + Random r = new Random(34565); + + // do the specified number of epochs. + for (int epoch = 0; epoch < epochs; epoch++) { + for (int inindx = 0; inindx < numExamples; inindx++) { + int iI = r.nextInt(numExamples); + + // zero activations + for (int i = 0; i < layerCount; i++) { + Arrays.fill(activations[i], 0.0f); + } + + // This array contains inputs from previous layer output + float[] currentinputs = inputs[iI]; + + // for each layer, do the activations. + for (int i = 0; i < layerCount; i++) { + Layer layer = layers[i]; + + // set up the threads + float[] acts = activations[i]; + int rl = ranges[i].length; + for (int j = 0; j < rl; j++) { + actThreads[j].setup(currentinputs, layer, acts); + actThreads[j].setRange(ranges[i][j]); + } + + // wait for them to finish. + for (int j = 0; j < rl; j++) { + actThreads[j].waitIdle(); + } + currentinputs = acts; + } + + ////////////////////////////////// + // compute output errors. + // now we have all the activations, lets do error propogation. + float[] calcOut = activations[layerCount - 1]; + int errlen = calcOut.length; + float[] error = new float[errlen]; + for (int i = 0; i < errlen; i++) { + error[i] = outputs[iI][i] - calcOut[i]; // negative error + } + + ////////////////////////////////// + // propogate the errors back and adjust weights. + // now learn from out errors. + for (int i = layerCount - 1; i > 0; i--) { + Layer layer = layers[i]; + int nI = layer.getNumberInputs() + 1/*for the bias*/; + float[] nextError = new float[nI]; + + // set up the threads + int rl = ranges[i].length; + for (int j = 0; j < rl; j++) { + learnerThreads[j].setup(error, activations[i - 1], activations[i], nextError, layer); + learnerThreads[j].setRange(ranges[i][j]); + } + + // wait for complete, then set up next layer run. + // wait for them to finish. + for (int j = 0; j < rl; j++) { + learnerThreads[j].waitIdle(); + } + + // now we must sum all the errors for each of the threads. + int esize = nextError.length; + for (int ei = 0; ei < esize; ei++) { + for (int j = 0; j < rl; j++) { + nextError[ei] += learnerThreads[j].errorWorkspace[ei]; + } + } + error = nextError; + } + + // The setup for the first layer is computed using the actual inputs, so we do this + // a bit differently. + Layer layer = layers[0]; + int rl = ranges[0].length; + int nI = layer.getNumberInputs() + 1/*for the bias*/; + float[] nextError = new float[nI]; + for (int j = 0; j < rl; j++) { + learnerThreads[j].setup(error, inputs[iI], activations[0], nextError, layer); + learnerThreads[j].setRange(ranges[0][j]); + } + + // wait for complete, then set up next layer run. + // wait for them to finish. + for (int j = 0; j < rl; j++) { + learnerThreads[j].waitIdle(); + } + } + + // check for convergence. + float sumerr = 0; + for (int inputIdx = 0; inputIdx < outputs.length; inputIdx++) { + + // storage for each output of each layer, and the error computed for each activation. + float [][] a = this.activate(inputs[inputIdx]); + float[] outs = a[layerCount-1]; + float pred = outs[0]; + float label = outputs[inputIdx][0]; + sumerr = pred > label ? pred - label : label - pred; + } + System.out.format("%d) error = %.18f\n",epoch,(sumerr/(float)outputs.length)); + } + } + + /** just holds range of datums to operate on. */ + static class Range { + int start; + int end; + Range(int s, int e) { + start = s; + end = e; + } + public String toString() { + return start+"-"+end; + } + } + + /** + * this class coordinates the activities of a set of threads by handing + * out indexes that need operated on in a threadsafe way. If a request is made + * for an index, and non are available, the thread will wait until notified. + * @author redman + */ + static class Multiplexer { + + /** these are the ranges for the layer we operate on, these inited once and reused each epoch. */ + private Range[] ranges = null; + + /** the number of elements we are counting down from. */ + private int count = 0; + + /** number of threads operating. */ + private int waiting = 0; + + /** the number of threads sharing this multiplexer. */ + private int numThreads = 0; + + /** + * We need the number of elements in the layer to operate on. + * @param numThreads the total number of threads. + */ + Multiplexer(int numThreads) { + this.numThreads = numThreads; + } + + /** + * Start this process. This should be called by the main thread where + * coordination occures. This will be accessed by the done method. + * @param ranges the range of indices to operate on. + * @param compLock use this as a semaphor + */ + synchronized void startAndWait(Range[] ranges) { + this.count = 0; + this.ranges = ranges; + this.waiting = 0; + this.notifyAll(); + while (waiting != numThreads) { + try { + this.wait(); + } catch (InterruptedException e1) { + } + } + } + + /** + * get the next available index, or block till one is available. + * @return the index. + */ + synchronized Range getNextIndex() { + while (ranges == null || count == ranges.length) { + try { + this.waiting++; + if (waiting == numThreads) + this.notifyAll(); + this.wait(); + this.waiting--; + } catch (InterruptedException e) { + } + } + return ranges[count++]; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java new file mode 100644 index 00000000..c8623369 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java @@ -0,0 +1,21 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +/** + * This package contains the Neural Network implemented employed by LBJava. This + * implementation supports bias, momentum and back prop, and is designed with + * efficiency in mind. The implementation contract includes an API for trainers + * {@see NNTrainingInterface} that defines the API for the any trainers. A single + * threaded trainer is provided. There is also a multithreaded trainer, which helps + * when there are a very large number of weights between layers. + * + * There is also a {@see Layer} class which implements functionality specific + * to neural net layers within the system. However, there is no representation of + * neuron within the API, this was decided upon to ensure good performance. + * @author redman + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; diff --git a/pom.xml b/pom.xml index acabfcea..db0a0881 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ edu.illinois.cs.cogcomp lbjava-project pom - 1.3.0 + 1.3.1 lbjava
+ * This class is really just a wrapper for a SimpleNNTrainer which does all the work of training. + * @author redman + */ +public class NeuralNetLearner extends Learner { + + /** computed */ + private static final long serialVersionUID = -3369861028861092661L; + + /** the parameters for learning and stuff. */ + private Parameters parameters = new Parameters(); + + /** This is the object that will train the neural net up. It uses it own + * interal mechanism and data representation for efficiency. */ + private SimpleNNTrainer trainer = null; + + /** + * our props include not only number of rounds (epochs), also a learning rate and momentum. + * @author redman + */ + public static class Parameters extends Learner.Parameters { + /** default */ + private static final long serialVersionUID = 1L; + + /** the learning rate. */ + public float learningRate = 0.5f; + + /** the momentum value. */ + public float momentum = 0.5f; + + /** the momentum value. */ + public int seed = -1; + + /** the number of inputs */ + public int inputCount = 0; + + /** the number of outputs */ + public int outputCount = 1; + + /** the number of outputs from the single hidden layer */ + public int hiddenCount = 100; + + /** the layers of the neural network. */ + private Layer[] layers; + + /** + * Copy properties from the provided properties. + * @param p the props to copy. + */ + public Parameters(Parameters p) { + this.learningRate = p.learningRate; + this.momentum = p.momentum; + this.seed = p.seed; + this.inputCount = p.inputCount; + this.outputCount = p.outputCount; + this.hiddenCount = p.hiddenCount; + } + /** + * Copy properties from the provided properties. + * @param p the props to copy. + */ + public Parameters() { + this.learningRate = 0.5f; + this.momentum = 0.5f; + this.seed = -1; + this.inputCount = 0; + this.hiddenCount = 100; + this.outputCount = 1; + } + + } + + /** used to store inputs so we don't realloc these arrays over and over. This is an optimization + * only possible because we know this guys is not multithreaded. */ + private float inputs[] = null; + + /** used to store inputs so we don't realloc these arrays over and over. This is an optimization + * only possible because we know this guys is not multithreaded. */ + private float outputs[] = null; + + /** number of neurons in each layer, including input and output layers.*/ + private int[] layerSizes = null; + + /** + * Init the neural network learner by providing array with number of neurons in each layer, including + * the input layer. The caller will need to determin the number of inputs, the number of outputs and the number + * of hidden layers, and the neurons in that layer. The first index in teh layerSizes indicates the number of inputs, + * the middle layers sizes are determined by the middle integer sizes, and the number of outputs is the last number + * of neurons. + * @param layerSizes the number of neurons in each layer. + */ + public NeuralNetLearner () { + super("Howdy"); + this.layerSizes = new int[3]; + } + + /** + * given arguments for initialization parameters. + * @param p the parameters. + */ + public NeuralNetLearner(Parameters p) { + super("Howdy"); + this.parameters = p; + } + + /** + * The learning rate takes the default value. + * @param n The name of the classifier. + */ + public NeuralNetLearner(String n) { + super(n); + } + + /** + * Init the neural network learner by providing array with number of neurons in each layer, including + * the input layer. The caller will need to determin the number of inputs, the number of outputs and the number + * of hidden layers, and the neurons in that layer. The first index in teh layerSizes indicates the number of inputs, + * the middle layers sizes are determined by the middle integer sizes, and the number of outputs is the last number + * of neurons. + * @param layerSizes the number of neurons in each layer. + */ + public NeuralNetLearner (int[] layerSizes, Parameters p, boolean training) { + super("Howdy"); + parameters = p; + parameters.layers = new Layer[layerSizes.length-1]; + this.layerSizes = layerSizes; + this.forget(); + } + + /** + * Resets the weight vector to all zeros. + */ + public void forget() { + super.forget(); + if (this.getInputCount() != -1) { + this.layerSizes = new int[3]; + this.layerSizes[0] = this.getInputCount(); + this.layerSizes[1] = this.getHiddenCount(); + this.layerSizes[2] = this.getOutputCount(); + parameters.layers = new Layer[layerSizes.length-1]; + Layer[] l = this.parameters.layers; + Random r = new Random (1234); + for (int i = 0; i < layerSizes.length-1; i++) { + l[i] = new Layer(layerSizes[i], layerSizes[i+1], r); + } + inputs = new float[l[0].getNumberInputs()]; + outputs = new float[l[l.length-1].getNumberOutputs()]; + trainer = new SimpleNNTrainer(parameters.layers, parameters.learningRate, parameters.momentum); + } + } + + /** + * Returns a string describing the output feature type of this classifier. + * @return "real" + **/ + public String getOutputType() { + return "real"; + } + /** + * Writes the learned function's internal representation in binary form. + * @param out The output stream. + + public void write(ExceptionlessOutputStream out) { + super.write(out); + out.writeFloat(this.parameters.learningRate); + out.writeFloat(this.parameters.momentum); + out.writeInt(this.parameters.rounds); + if (this.layerSizes == null) + out.writeInt(0); + else { + out.writeInt(this.layerSizes.length); + for (int neurons : this.layerSizes) + out.writeInt(neurons); + for (Layer l : this.parameters.layers) { + l.write(out); + } + } + } + + /** + * Reads the binary representation of a learner with this object's run-time type, overwriting + * any and all learned or manually specified parameters as well as the label lexicon but without + * modifying the feature lexicon. + * @param in The input stream. + + public void read(ExceptionlessInputStream in) { + super.read(in); + this.parameters.learningRate = in.readFloat(); + this.parameters.momentum = in.readFloat(); + this.parameters.rounds = in.readInt(); + int layers = in.readInt(); + if (layers != 0) { + int[] szs = new int[layers]; + for (int i = 0 ; i < szs.length; i++) + szs[i] = in.readInt(); + this.layerSizes = szs; + Random r = new Random (1234); + for (int i = 0; i < layerSizes.length-1; i++) { + this.parameters.layers[i] = new Layer(layerSizes[i], layerSizes[i+1], r); + } + trainer = new SimpleNNTrainer(parameters.layers, parameters.learningRate, parameters.momentum); + for (Layer l : this.parameters.layers) { + l.read(in); + } + } + } + + + /** + * Populate the input and output vectors with the values for only those + * features that are represented. + */ + final private void populateNNVector(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + Arrays.fill(inputs,0.0f); + Arrays.fill(outputs,0.0f); + for (int i = 0; i < exampleFeatures.length; i++) + inputs[exampleFeatures[i]] = (float)exampleValues[i]; + if (exampleLabels != null) + for (int i = 0; i < exampleLabels.length; i++) + outputs[exampleLabels[i]] = (float)labelValues[i]; + + } + + /** + * Trains the learning algorithm given an object as an example. + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @param exampleLabels The example's label(s). + * @param labelValues The labels' values. + **/ + public void learn(int[] exampleFeatures, double[] exampleValues, int[] exampleLabels, + double[] labelValues) { + this.populateNNVector(exampleFeatures, exampleValues, exampleLabels, labelValues); + this.trainer.train(inputs, outputs); + } + + /** + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return null + **/ + public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { + return null; + } + + /** + * Returns the classification of the given example as a single feature instead of a + * {@link FeatureVector}. + * @param f The features array. + * @param v The values array. + * @return The classification of the example as a feature. + **/ + public Feature featureValue(int[] f, double[] v) { + this.populateNNVector(f, v, null, null); + + // this returns the activation energies for ALL layers, we only wan the output layer + float[][] results = this.trainer.activate(inputs); + + // the last vector contains the score, this is the output of the last layer. + return new RealPrimitiveStringFeature(containingPackage, name, "", results [results.length-1][0]); + } + + /** + * Simply computes the dot product of the weight vector and the example + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed real value. + **/ + public double realValue(int[] exampleFeatures, double[] exampleValues) { + this.populateNNVector(exampleFeatures, exampleValues, null, null); + return (double) this.trainer.activate(inputs)[0][0]; + } + + /** + * Simply computes the dot product of the weight vector and the feature vector extracted from + * the example object. + * + * @param exampleFeatures The example's array of feature indices. + * @param exampleValues The example's array of feature values. + * @return The computed feature (in a vector). + **/ + public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { + return new FeatureVector(featureValue(exampleFeatures, exampleValues)); + } + + /** + * Writes the algorithm's internal representation as text. In the first line of output, the name + * of the classifier is printed, followed by {@link #learningRate} and {@link #bias}. + * @param out The output stream. + */ + public void write(PrintStream out) { + out.println(name + ": " + this.parameters.learningRate + ", " + this.parameters.momentum + ", " + this.parameters.rounds); + for (Layer l : this.parameters.layers) { + l.write(out); + } + } + + /** + * Returns a deep clone of this learning algorithm. + * TODO + */ + public Object clone() { + NeuralNetLearner clone = null; + try { + clone = (NeuralNetLearner) super.clone(); + } catch (Exception e) { + System.err.println("Error cloning StochasticGradientDescent: " + e); + System.exit(1); + } + return clone; + } + + /** + * @return the seed to seed all random number gen. + */ + public int getSeed() { + return this.parameters.seed; + } + + /** + * @param seed the seed to set + */ + public void setSeed(int seed) { + this.parameters.seed = seed; + } + + /** + * @return the number of total inputs + */ + public int getInputCount() { + return this.parameters.inputCount; + } + + /** + * @param inputCount the inputCount to set + */ + public void setInputCount(int inputCount) { + this.parameters.inputCount = inputCount; + } + + /** + * @return the outputCount + */ + public int getOutputCount() { + return this.parameters.outputCount; + } + + /** + * @param outputCount the outputCount to set + */ + public void setOutputCount(int outputCount) { + this.parameters.outputCount = outputCount; + } + + /** + * @return the hiddenCount + */ + public int getHiddenCount() { + return this.parameters.hiddenCount; + } + + /** + * @param hiddenCount the hiddenCount to set + */ + public void setHiddenCount(int hiddenCount) { + this.parameters.hiddenCount = hiddenCount; + } + + /** + * @return the learning rate used to throttle the rate at wich the weight parameters change. + */ + public float getLearningRate() { + return parameters.learningRate; + } + + /** + * set the learning rate at which the weight parameters change. + * @param learningRate the learning rate at which the weight parameters change. + */ + public void setLearningRate(float learningRate) { + this.parameters.learningRate = learningRate; + } + + public float getMomentum() { + return parameters.momentum; + } + + /** + * set the value used to prevent convergence against local minimum. + * @param momentum used to prevent convergence against local minimum. + */ + public void setMomentum(float momentum) { + this.parameters.momentum = momentum; + } + + /** + * Get the number of epochs. + * @return number of epochs to train. + */ + public int getEpochs() { + return parameters.rounds; + } + + /** + * set the number of training iterations. More should yield better results, until overfit. + * @param learningRate set the number of training iterations. + */ + public void setEpochs(int epochs) { + this.parameters.rounds = epochs; + } + + /** + * Retrieves the parameters that are set in this learner. + * @return An object containing all the values of the parameters that control the behavior of + * this learning algorithm. + **/ + public Learner.Parameters getParameters() { + return parameters; + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java new file mode 100644 index 00000000..20929909 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ActThread.java @@ -0,0 +1,71 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range; + +/** + * This thread will compute a single activtion value, for each layer + * setup must be called to provide the output array, the layer and the + * input values. + * @author redman + */ +class ActThread extends PushThread { + + /** the input data. */ + float[] currentInputs = null; + + /** the layer we are operating on. */ + Layer layer = null; + + /** the resulting outputs are stored here, this array is shared + * by all threads activating on this layer. */ + float [] layerActivations = null; + + /** used to make the name of the thread unique. */ + private static int inc = 0; + + /** + * init with a mux. + * @param m the multiplexer. + */ + ActThread() { + super("ActThread-"+(inc++)); + } + + /** + * before we start a layer, this is called to set up the thread. + * @param ci the input data. + * @param l the layer. + * @param la the layer actvation values. + * @param mux the multiplexer. + */ + void setup(float[] ci, Layer l, float[] la) { + this.currentInputs = ci; + this.layer = l; + this.layerActivations = la; + } + + /** + * Run forever never quite. + */ + public void run() { + synchronized (this) { + while(true) { + + // wait for the range object to be set. + Range r = this.getRange(); + if (r == null) + return; + for (int indx = r.start; indx < r.end; indx++) { + layerActivations[indx] = layer.computeOneOutput(indx, currentInputs); + } + } + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java new file mode 100644 index 00000000..74b25f0a --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Activator.java @@ -0,0 +1,25 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * Implementations will activate all the layers of the net and + * produce a set of outputs. The one required method will return + * all the output values. + * @author redman + */ +public interface Activator { + + /** + * Activate the provided layer, return the resulting outputs. + * @param inputs the input data. + * @param layer the layer to supply the inputs to. + * @return the output values. + */ + public float[] prediction(float[] inputs); +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java new file mode 100644 index 00000000..8237a18b --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ConvergenceMeasure.java @@ -0,0 +1,27 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * Measure convergence, simplest implementation may simply run some number of epochs, + * more sophosticated will look some function of cumulative error going to zero at the + * end of an epoch. Conversion is always measured at the end of a training cycle. + * @author redman + */ +public interface ConvergenceMeasure { + + /** + * With the given inputs and outputs, evaluate the results of the last iteration, + * determine the error, probably store that, and if convergence (what whatever measure) + * is achieved, return true, else return false. + * + * @param learner the learner being used to train up the neural net, contains the cummulative error. + * @return true if converged. + */ + public boolean evaluate(NNTrainingInterface learner); +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java new file mode 100644 index 00000000..acd54807 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/DatasetReader.java @@ -0,0 +1,98 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.InputStream; +import java.io.FileInputStream; +import java.io.IOException; + +/** + * @author redman + * + */ +public class DatasetReader { + + /** + * flip the byte order. + * @param is input stream. + * @return the integer. + * @throws IOException + */ + private static int readInt(InputStream is) throws IOException { + int i0 = is.read(); + int i1 = is.read(); + int i2 = is.read(); + int i3 = is.read(); + return (i0<<24) + (i1<<16) + (i2<<8) + i3; + } + + /** + * get the examples form an NIST dataset, return everything at once. There are + * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = + * 47 million floats. These are input examples, so they are image data. + * @param filename + * @return the input examples. + * @throws IOException + */ + public static float[][] getExampleInputs(String filename) throws IOException { + InputStream dis = new BufferedInputStream(new FileInputStream(new File(filename))); + int m1 = readInt(dis); + if (m1 != 2051) + throw new IOException("That was not an example file! magic code = "+m1); + int numExamples = readInt(dis); + if (numExamples != 60000) + System.out.println("We expecting 60k examples "+m1); + int numRows = readInt(dis); + if (numRows != 28) + System.out.println("We expecting 28 rows "+numRows); + int numColumns = readInt(dis); + if (numColumns != 28) + System.out.println("We expecting 28 columns "+numColumns); + int totalpixels = numRows*numColumns; + float [][] examples = new float [numExamples][totalpixels]; + for (int i = 0 ; i < examples.length; i++) { + for (int j = 0; j < totalpixels; j++) { + examples[i][j] = (float)(dis.read()/128f) - 1f; + } + } + return examples; + } + + /** + * get the examples form an NIST dataset, return everything at once. There are + * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = + * 47 million floats. These are input examples, so they are image data. + * @param filename + * @return the output examples. + * @throws IOException + */ + public static float[][] getExampleOutputs(String filename) throws IOException { + InputStream dis = new BufferedInputStream(new FileInputStream(new File(filename))); + int m1 = readInt(dis); + if (m1 != 2049) + throw new IOException("That was not an example file! magic code = "+m1); + int numExamples = readInt(dis); + float [][] examples = new float [numExamples][1]; + for (int i = 0 ; i < numExamples; i++) { + examples[i][0] = (float)(dis.read()/5f) - 1f; + } + return examples; + } + + /** + * @param a + * @throws IOException + */ + @SuppressWarnings("unused") + public static void main(String[]a) throws IOException { + float[][] examples = getExampleInputs("/Users/redman/Desktop/NNTrainingData/train-images-idx3-ubyte"); + float[][] labels = getExampleOutputs("/Users/redman/Desktop/NNTrainingData/train-labels-idx1-ubyte"); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java new file mode 100644 index 00000000..a794cf6a --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/EpochConvergenceMeasure.java @@ -0,0 +1,42 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * Terminate agfter so many iterations. + * @author redman + */ +public class EpochConvergenceMeasure implements ConvergenceMeasure { + + /** the current epoch count. */ + private int epoch = 0; + + /** the current epoch count. */ + private int max; + + /** + * Takes the number of iterations. + * @param m the max iterations. + */ + public EpochConvergenceMeasure(int m) { + this.max = m; + } + + /** + * @see edu.illinois.cs.cogcomp.lbjava.neuralnet.ConvergenceMeasure#evaluate(edu.illinois.cs.cogcomp.lbjava.neuralnet.NNTrainingInterface) + */ + @Override + public boolean evaluate(NNTrainingInterface learner) { + epoch++; + if (epoch > max) { + return true; + } else + return false; + } + +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java new file mode 100644 index 00000000..29adfeb0 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/Layer.java @@ -0,0 +1,277 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.io.PrintStream; +import java.util.Random; + +/** + * This is a layer in a neural net. it is characterized by a number of inputs + * and a number of outputs. The neurons (perceptrons) are hidden within, I see + * no reason to expose them. this allows the layer class to do all computations + * across the entire layer in one pass, which is very efficient. Downside; Nothing + * in this implementation will allow you to assign per neuron attributes. Also, the + * weights are represented by a primitive array, so only 32 bit indices meaning no + * more than 2 ^ 32 weights are allowed. + * @author redman + */ +public class Layer { + + /** number of inputs to this layer. */ + private int numberInputs; + + /** the number of outputs from this layer. */ + private int numberOutputs; + + /** the neuron weights. */ + private float[] weights; + + /** the derived outputs. */ + private float[] dweights; + + /** collects output values. */ + private float[] outputs; + + /** + * The layer constructed. + * @param numIn the number of inputs. + * @param numOut the number of outputs. + */ + public Layer(int numIn, int numOut) { + this(numIn, numOut, new Random()); + outputs = new float[numOut]; + } + + /** + * The layer constructed. + * @param numIn the number of inputs. + * @param numOut the number of outputs. + * @param r the random num generator. + */ + public Layer(int numIn, int numOut, Random r) { + this.numberInputs = numIn; + this.numberOutputs = numOut; + int wl = (numIn+1)*numOut; + weights = new float[wl]; + dweights = new float[wl]; + for (int i = 0; i < wl; i++) + weights [i] = (r.nextFloat() - 0.5f) * 4f; + outputs = new float[numOut]; + } + + /** + * Compute the sigmoid first derivative. + * @param x the input value + * @return the sigmoid + */ + final private float sigmoid(float x) { + return (float) (1.0 / (1.0 + Math.exp(-x))); + } + + /** + * @return the weights + */ + public float[] getWeights() { + return weights; + } + + /** + * @param weights the weights to set + */ + public void setWeights(float[] weights) { + this.weights = weights; + } + + /** + * @return the numberInputs + */ + public int getNumberInputs() { + return numberInputs; + } + + /** + * @param numberInputs the numberInputs to set + */ + public void setNumberInputs(int numberInputs) { + this.numberInputs = numberInputs; + } + + /** + * @return the numberOutputs + */ + public int getNumberOutputs() { + return numberOutputs; + } + + /** + * @param numberOutputs the numberOutputs to set + */ + public void setNumberOutputs(int numberOutputs) { + this.numberOutputs = numberOutputs; + } + + /** + * This granularity of method invocation is only necessary so parallelize + * the process. + * @param index the index of the input to compute the output for. + * @param inputs the inputs. + * @return the activation output. + */ + final float computeOneOutput(int index, float[] inputs) { + float result = 0.0f; + int nI = this.numberInputs; + int start = index * (nI+1); + for (int k = 0 ; k < nI ; k++) { + result += weights[start+k] * inputs[k]; + } + result += weights[start+nI]; + return (float) sigmoid(result); + } + + /** + * Given a set of inputs, produce the set of activation + * values. + * @param inputs the inputs to produce the predictions for. + * @return the set of predictions. + */ + final public float[] activate(float[] inputs) { + int nO = this.numberOutputs; + float[] o = this.outputs; + for (int j = 0 ; j < nO ; j++) { + o[j] = this.computeOneOutput(j, inputs); + } + return outputs; + } + + /** + * train up weights for just one output. Thread safety must be noted here, since everybody will be + * updating the nextError array at the same time. To avoid doing repeated synchronizations which are + * expensive here, for multithreaded trainer, we pass in a dummy error array, update at will, then + * the caller is responsible for synchronizing on the real one and updating the shared sum error array. + * @param error the activation errors used to compute the backprop value. + * @param input the input date. + * @param output the computed output data. + * @param learningRate the learning rate. + * @param momentum the momentum + * @param nextError the array where the error values will be updated + * @param outIndex the output index; + */ + final public void trainOne(float[] error, float[] input, float[] output, float learningRate, float momentum, float[] nextError, int outIndex) { + int woffset = (this.numberInputs+1) * outIndex; + float d = error[outIndex] * (output[outIndex] * (1 - output[outIndex])); + for (int j = 0; j < this.numberInputs; j++) { + int windx = woffset + j; + nextError[j] += weights[windx] * d; + float dw = input[j] * d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + + // compute the error for the bias, the fake bias input is always 1. + int windx = woffset + input.length; + nextError[input.length] += weights[windx] * d; + float dw = d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + + /** + * given a set of errors (errors from the next layer on), and adjust the weights + * to do a gradient descent. + * @param error the output errors. + * @param input the input data. + * @param output the desired output. + * @param learningRate the rate of learning. + * @param momentum helps to avoid local minima. + * @return the errors from this layer. + */ + final public float[] train(float[] error, float[] input, float[] output, float learningRate, float momentum) { + int nI = this.numberInputs+1/*for the bias*/; + float[] nextError = new float[nI]; + for (int i = 0; i < this.numberOutputs; i++) { + //this.trainOne(error, input, output, learningRate, momentum, nextError, i); + + int woffset = nI * i; + float d = error[i] * (output[i] * (1 - output[i])); + for (int j = 0; j < this.numberInputs; j++) { + int windx = woffset + j; + nextError[j] += weights[windx] * d; + float dw = input[j] * d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + + // compute the error for the bias, the fake bias input is always 1. + int windx = woffset + input.length; + nextError[input.length] += weights[windx] * d; + float dw = d * learningRate; + weights[windx] += dweights[windx] * momentum + dw; + dweights[windx] = dw; + } + return nextError; + } + + /** + * print out the weights. + */ + public void print() { + System.out.print(this.numberInputs+":"+this.numberOutputs); + System.out.print(" "); + for (float w : weights) { + System.out.format(" %.8f",w); + } + System.out.print(" ("); + for (float w : dweights) { + System.out.format(" %.8f",w); + } + System.out.println(")"); + } + + /** + * @return the dweights + */ + public float[] getDweights() { + return dweights; + } + + /** + * @param dweights the dweights to set + */ + public void setDweights(float[] dweights) { + this.dweights = dweights; + } + + /** + * used for reporting mostely. + */ + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("in : "+this.numberInputs+" out : "+this.numberOutputs); + sb.append("\n"); + for (int i = 0; i < weights.length;) { + for (int j = 0; j < this.numberInputs;j++,i++) { + sb.append(" "+weights[i]); + sb.append(" "); + } + sb.append("\n"); + } + return sb.toString(); + } + + /** + * Write the representation to a digital output stream. + * @param out the output stream for serialization. + */ + public void write(PrintStream out) { + out.print(numberInputs); + out.print(numberOutputs); + out.print(weights.length); + for (int i = 0; i < weights.length; ++i) + out.print(weights[i]); + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java new file mode 100644 index 00000000..d3568330 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/LearnerThread.java @@ -0,0 +1,96 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.util.Arrays; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range; + +/** + * This thread will compute a single activtion value, for each layer + * setup must be called to provide the output array, the layer and the + * input values. + * @author redman + */ +class LearnerThread extends PushThread { + + /** the input error from the next layer being back propogated. */ + float[] error = null; + + /** the input labeled data. */ + float[] input = null; + + /** the input data. */ + float[] output = null; + + /** the result error SHARED ACROSS THREADS, must be synced to update. */ + float [] nextError; + + /** the space where updates to the errors will be set, later used to update nextError.*/ + float [] errorWorkspace; + + /** the learning rate. */ + float learnRate; + + /** the momentum. */ + float momentum; + + /** the layer we are operating on. */ + Layer layer = null;; + + /** the unique id. */ + private static int inc = 0; + + /** + * The learning rate and momentum will not change, so we will take them initially. + * @param lR the learning rate. + * @param m the momentum. + * @param mux the multiplexer. + */ + LearnerThread(float lR, float m) { + super("LearnerThread-"+(inc++)); + this.learnRate = lR; + this.momentum = m; + } + + /** + * before we start a layer, this is called to set up the thread. + * @param error the error from the next layer, used to calc this layers error. + * @param input the input data. + * @param output the result data. + * @param nextError put the next layers input error here. + * @param layer the layer we operate on. + */ + void setup(float [] error, float [] input, float [] output, float[] nextError, Layer layer) { + this.error = error; + this.input = input; + this.output = output; + this.nextError = nextError; + this.layer = layer; + this.errorWorkspace = new float[nextError.length]; + Arrays.fill(this.errorWorkspace, 0); + } + + /** + * Run till we complete the layer, then finish up. + */ + public void run() { + synchronized (this) { + while(true) { + + // wait for the range object to be set. + Range r = this.getRange(); + if (r == null) + return; + for (int indx = r.start; indx < r.end; indx++) { + layer.trainOne(error, input, output, learnRate, momentum, errorWorkspace, indx); + } + } + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java new file mode 100644 index 00000000..84e29922 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/NNTrainingInterface.java @@ -0,0 +1,23 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +/** + * @author redman + */ +public interface NNTrainingInterface { + + /** + * Given a set of examples, and a set of desired outputs, train the network + * represented by the provided network layers the provided number of epochs. + * @param inputs the input data to train against. + * @param outputs the desired outputs. + * @param epochs the number of training iterations to run. + */ + public void train(float[][] inputs, float[][]outputs, int epochs); +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java new file mode 100644 index 00000000..9dada667 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/PushThread.java @@ -0,0 +1,84 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import edu.illinois.cs.cogcomp.lbjava.neuralnet.ThreadedNNTrainer.Range; + +/** + * Threads will operate on a range, this superclass contains that + * range and handles atomic synchronized access. + * @author redman + */ +public class PushThread extends Thread { + + /** the range to operate on. */ + protected Range range = null; + + /** set when this thread is waiting for input. */ + private boolean idle = false; + /** + * the push thread takes the name ofthe thread, to pass to + * the super. + * @param name the name of the thread. + */ + PushThread(String name) { + super(name); + } + + /** + * set the range of things to operate on. + * @param range + */ + synchronized void setRange(Range range) { + this.range = range; + this.notifyAll(); + } + + /** + * call this when we are done. + */ + synchronized void done() { + this.range = null; + this.interrupt(); + } + + /** + * wait for the thread to complete it's run, it will set + * poised and block till it gets data. + */ + final synchronized public void waitIdle() { + while(!idle || range != null) + try { + this.wait(); + } catch (InterruptedException e) { + } + } + + /** + * wait for the next range. + * @return the range. + */ + final synchronized protected Range getRange() { + while (range == null) + try { + this.idle = true; + this.notify(); // somebody waiting for completion? + this.wait(); + } catch (InterruptedException e) { + if (this.isInterrupted()) { + System.out.println("Interrupted error."); + return null; + } + } + Range r = range; + range = null; + this.idle = false; + return r; + } + +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java new file mode 100644 index 00000000..951c3144 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/SimpleNNTrainer.java @@ -0,0 +1,150 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.util.Random; + +/** + * This class will simply learn up the NeuralNet layers, single threaded. + * @author redman + */ +public class SimpleNNTrainer implements NNTrainingInterface { + + /** the layers of the neural network. */ + private Layer[] layers; + + /** scales the weight deltas for each iteration. */ + private float learningRate = .3f; + + /** this prevents local minimum capture. */ + private float momentum = .6f; + + /** + * Need the layer data, learning rate and momentum. + * @param l the layers of the neural net. + * @param rate the learning rate. + * @param mom the momentum. + */ + public SimpleNNTrainer(Layer[] l, float rate, float mom) { + this.layers = l; + this.learningRate = rate; + this.momentum = mom; + } + + /** + * given an input set of example, compute the output values, also return all the + * activation values in between, return them all. The results will be in the last + * vector in the returned array. + * @param inputs the inputs. + * @return the activation energies from all layers/ + */ + public final float [] classify(float[] inputs) { + + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = new float[layerCount][]; + + // This array contains inputs from previous layer + float [] currentinputs = inputs; + for (int i = 0 ; i < layerCount ; i++) { + + // compute the activations for this layer. + Layer layer = layers[i]; + activations[i] = layer.activate(currentinputs); + currentinputs = activations[i]; + } + return activations[layerCount-1]; + } + + /** + * given an input set of example, compute the output values, also return all the + * activation values in between, return them all. The results will be in the last + * vector in the returned array. + * @param inputs the inputs. + * @return the activation energies from all layers/ + */ + public final float [][] activate(float[] inputs) { + + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = new float[layerCount][]; + + // This array contains inputs from previous layer + float [] currentinputs = inputs; + for (int i = 0 ; i < layerCount ; i++) { + + // compute the activations for this layer. + Layer layer = layers[i]; + activations[i] = layer.activate(currentinputs); + currentinputs = activations[i]; + } + return activations; + } + + /** + * Train with one example. + * @param inputs input data. + * @param outputs the labeled data. + * @param epochs + */ + public void train(float[] inputs, float[]outputs) { + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = this.activate(inputs); + + // now we have all the activations. + float[] calcOut = activations[activations.length-1]; + int errlen = calcOut.length; + float [] error = new float[errlen]; + for (int i = 0; i < errlen; i++) { + error[i] = outputs[i] - calcOut[i]; // negative error + } + for (int i = layers.length - 1; i > 0; i--) { + error = layers[i].train(error,activations[i-1],activations[i], this.learningRate, this.momentum); + } + error = layers[0].train(error,inputs, activations[0], this.learningRate, this.momentum); + } + /** + * Execute the given number of epochs, then exit whatever the error. + * @param inputs the input examples. + * @param outputs the labels. + * @param layers + */ + @Override + public void train(float[][] inputs, float[][]outputs, int epochs) { + + // error checking. + if (inputs.length != outputs.length) + throw new RuntimeException("There must be the same number of input data records and output data records."); + int totalInputs = inputs.length; + + // set up our counts. + int layerCount = layers.length; + Random r = new Random(34565); + for (int epoch = 0; epoch < epochs; epoch++) { + for (int inindx = 0; inindx < totalInputs; inindx++) { + int iI = r.nextInt(totalInputs); + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = this.activate(inputs[iI]); + + // now we have all the activations. + float[] calcOut = activations[layerCount-1]; + int errlen = calcOut.length; + float [] error = new float[errlen]; + for (int i = 0; i < errlen; i++) { + error[i] = outputs[iI][i] - calcOut[i]; // negative error + } + for (int i = layers.length - 1; i > 0; i--) { + error = layers[i].train(error,activations[i-1],activations[i], this.learningRate, this.momentum); + } + error = layers[0].train(error,inputs[iI],activations[0], this.learningRate, this.momentum); + } + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java new file mode 100644 index 00000000..01980fb2 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/ThreadedNNTrainer.java @@ -0,0 +1,338 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; + +import java.util.Arrays; +import java.util.Random; + +/** + * This class will simply learn up the NeuralNet layers, single threaded. + * @author redman + */ +public class ThreadedNNTrainer implements NNTrainingInterface { + + /** the layers of the neural network. */ + private Layer[] layers; + + /** scales the weight deltas for each iteration. */ + private float learningRate = .3f; + + /** this prevents local minimum capture. */ + private float momentum = .6f; + + /** this is the number of threads we will use, by default, number of processors on the machine. */ + private int numThreads = Runtime.getRuntime().availableProcessors(); + + /** + * Need the layer data, learning rate and momentum. + * @param l the layers of the neural net. + * @param rate the learning rate. + * @param mom the momentum. + */ + public ThreadedNNTrainer(Layer[] l, float rate, float mom) { + this.layers = l; + this.learningRate = rate; + this.momentum = mom; + } + + /** + * Need the layer data, learning rate and momentum. + * @param l the layers of the neural net. + * @param rate the learning rate. + * @param mom the momentum. + * @param numThreads number of threads to deploy. + */ + public ThreadedNNTrainer(Layer[] l, float rate, float mom, int numThreads) { + this.layers = l; + this.learningRate = rate; + this.momentum = mom; + this.numThreads = numThreads; + } + + /** + * given an input set of example, compute the output values, also return all the + * activation values in between, return them all. The results will be in the last + * vector in the returned array. + * @param inputs the inputs. + * @return the activation energies from all layers/ + */ + public final float [][] activate(float[] inputs) { + + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float [][] activations = new float[layerCount][]; + + // This array contains inputs from previous layer + float [] currentinputs = inputs; + for (int i = 0 ; i < layerCount ; i++) { + + // compute the activations for this layer. + Layer layer = layers[i]; + activations[i] = layer.activate(currentinputs); + currentinputs = activations[i]; + } + return activations; + } + + /** + * Execute the given number of epochs, then exit whatever the error. + * @param inputs the input examples. + * @param outputs the labels. + * @param epochs the number of iterations to perform. + */ + @Override + final public void train(float[][] inputs, float[][] outputs, int epochs) { + // error checking. + if (inputs.length != outputs.length) + throw new RuntimeException("There must be the same number of input data records and output data records."); + + // iterate this number of times. + int numExamples = inputs.length; + + // For each layer, compute the ranges of indices to operate on. This will allow us to + // continue computing on a thread without handshakes. + int ll = layers.length; + Range[][] ranges = new Range[ll][]; + for (int i = 0; i < ll ; i++) { + Layer l = layers[i]; + int no = l.getNumberOutputs(); + int increment = no / numThreads; + int onsies; + if (increment == 0) { + onsies = no; + ranges[i] = new Range[onsies]; + } else { + onsies = no % numThreads; + ranges[i] = new Range[numThreads]; + } + int start = 0; + for (int j = 0 ; j < ranges[i].length && start < no; j++) { + int end = start + increment; + if (onsies != 0) { + end++; + onsies--; + } + ranges[i][j] = new Range(start, end); + start = end; + } + } + + // create the threads to run against the activation mux. + ActThread[] actThreads = new ActThread[numThreads]; + for (int i = 0; i < numThreads; i++) { + actThreads[i] = new ActThread(); + actThreads[i].start(); + } + + // create the threads to run against the activation mux. + LearnerThread[] learnerThreads = new LearnerThread[numThreads]; + for (int i = 0; i < numThreads; i++) { + learnerThreads[i] = new LearnerThread(this.learningRate, this.momentum); + learnerThreads[i].start(); + } + + // set up our counts. + int layerCount = layers.length; + + // storage for each output of each layer, and the error computed for each activation. + float[][] activations = new float[layerCount][]; + for (int i = 0; i < layerCount; i++) { + activations[i] = new float[layers[i].getNumberOutputs()]; + } + + Thread.yield(); + Thread.yield(); + Thread.yield(); + Random r = new Random(34565); + + // do the specified number of epochs. + for (int epoch = 0; epoch < epochs; epoch++) { + for (int inindx = 0; inindx < numExamples; inindx++) { + int iI = r.nextInt(numExamples); + + // zero activations + for (int i = 0; i < layerCount; i++) { + Arrays.fill(activations[i], 0.0f); + } + + // This array contains inputs from previous layer output + float[] currentinputs = inputs[iI]; + + // for each layer, do the activations. + for (int i = 0; i < layerCount; i++) { + Layer layer = layers[i]; + + // set up the threads + float[] acts = activations[i]; + int rl = ranges[i].length; + for (int j = 0; j < rl; j++) { + actThreads[j].setup(currentinputs, layer, acts); + actThreads[j].setRange(ranges[i][j]); + } + + // wait for them to finish. + for (int j = 0; j < rl; j++) { + actThreads[j].waitIdle(); + } + currentinputs = acts; + } + + ////////////////////////////////// + // compute output errors. + // now we have all the activations, lets do error propogation. + float[] calcOut = activations[layerCount - 1]; + int errlen = calcOut.length; + float[] error = new float[errlen]; + for (int i = 0; i < errlen; i++) { + error[i] = outputs[iI][i] - calcOut[i]; // negative error + } + + ////////////////////////////////// + // propogate the errors back and adjust weights. + // now learn from out errors. + for (int i = layerCount - 1; i > 0; i--) { + Layer layer = layers[i]; + int nI = layer.getNumberInputs() + 1/*for the bias*/; + float[] nextError = new float[nI]; + + // set up the threads + int rl = ranges[i].length; + for (int j = 0; j < rl; j++) { + learnerThreads[j].setup(error, activations[i - 1], activations[i], nextError, layer); + learnerThreads[j].setRange(ranges[i][j]); + } + + // wait for complete, then set up next layer run. + // wait for them to finish. + for (int j = 0; j < rl; j++) { + learnerThreads[j].waitIdle(); + } + + // now we must sum all the errors for each of the threads. + int esize = nextError.length; + for (int ei = 0; ei < esize; ei++) { + for (int j = 0; j < rl; j++) { + nextError[ei] += learnerThreads[j].errorWorkspace[ei]; + } + } + error = nextError; + } + + // The setup for the first layer is computed using the actual inputs, so we do this + // a bit differently. + Layer layer = layers[0]; + int rl = ranges[0].length; + int nI = layer.getNumberInputs() + 1/*for the bias*/; + float[] nextError = new float[nI]; + for (int j = 0; j < rl; j++) { + learnerThreads[j].setup(error, inputs[iI], activations[0], nextError, layer); + learnerThreads[j].setRange(ranges[0][j]); + } + + // wait for complete, then set up next layer run. + // wait for them to finish. + for (int j = 0; j < rl; j++) { + learnerThreads[j].waitIdle(); + } + } + + // check for convergence. + float sumerr = 0; + for (int inputIdx = 0; inputIdx < outputs.length; inputIdx++) { + + // storage for each output of each layer, and the error computed for each activation. + float [][] a = this.activate(inputs[inputIdx]); + float[] outs = a[layerCount-1]; + float pred = outs[0]; + float label = outputs[inputIdx][0]; + sumerr = pred > label ? pred - label : label - pred; + } + System.out.format("%d) error = %.18f\n",epoch,(sumerr/(float)outputs.length)); + } + } + + /** just holds range of datums to operate on. */ + static class Range { + int start; + int end; + Range(int s, int e) { + start = s; + end = e; + } + public String toString() { + return start+"-"+end; + } + } + + /** + * this class coordinates the activities of a set of threads by handing + * out indexes that need operated on in a threadsafe way. If a request is made + * for an index, and non are available, the thread will wait until notified. + * @author redman + */ + static class Multiplexer { + + /** these are the ranges for the layer we operate on, these inited once and reused each epoch. */ + private Range[] ranges = null; + + /** the number of elements we are counting down from. */ + private int count = 0; + + /** number of threads operating. */ + private int waiting = 0; + + /** the number of threads sharing this multiplexer. */ + private int numThreads = 0; + + /** + * We need the number of elements in the layer to operate on. + * @param numThreads the total number of threads. + */ + Multiplexer(int numThreads) { + this.numThreads = numThreads; + } + + /** + * Start this process. This should be called by the main thread where + * coordination occures. This will be accessed by the done method. + * @param ranges the range of indices to operate on. + * @param compLock use this as a semaphor + */ + synchronized void startAndWait(Range[] ranges) { + this.count = 0; + this.ranges = ranges; + this.waiting = 0; + this.notifyAll(); + while (waiting != numThreads) { + try { + this.wait(); + } catch (InterruptedException e1) { + } + } + } + + /** + * get the next available index, or block till one is available. + * @return the index. + */ + synchronized Range getNextIndex() { + while (ranges == null || count == ranges.length) { + try { + this.waiting++; + if (waiting == numThreads) + this.notifyAll(); + this.wait(); + this.waiting--; + } catch (InterruptedException e) { + } + } + return ranges[count++]; + } + } +} diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java new file mode 100644 index 00000000..c8623369 --- /dev/null +++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/neuralnet/package-info.java @@ -0,0 +1,21 @@ +/** + * This software is released under the University of Illinois/Research and Academic Use License. See + * the LICENSE file in the root folder for details. Copyright (c) 2016 + * + * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign + * http://cogcomp.cs.illinois.edu/ + */ +/** + * This package contains the Neural Network implemented employed by LBJava. This + * implementation supports bias, momentum and back prop, and is designed with + * efficiency in mind. The implementation contract includes an API for trainers + * {@see NNTrainingInterface} that defines the API for the any trainers. A single + * threaded trainer is provided. There is also a multithreaded trainer, which helps + * when there are a very large number of weights between layers.
"real"
null
+ * + * There is also a {@see Layer} class which implements functionality specific + * to neural net layers within the system. However, there is no representation of + * neuron within the API, this was decided upon to ensure good performance. + * @author redman + */ +package edu.illinois.cs.cogcomp.lbjava.neuralnet; diff --git a/pom.xml b/pom.xml index acabfcea..db0a0881 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ edu.illinois.cs.cogcomp lbjava-project pom - 1.3.0 + 1.3.1 lbjava