-
Notifications
You must be signed in to change notification settings - Fork 17
Neural Net #107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Neural Net #107
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
cb10189
This is the neural network implementation as it stands now. This is n…
7a9eba7
Merge branch 'master' into neuralnet
cowchipkid f35f841
Merge branch 'master' into neuralnet
af2ed1c
Added the license headers and moved the NNBrownClassifier to examples…
c49df3b
Merge remote-tracking branch 'origin/master' into neuralnet
b41c0b9
Moved some files there were only need by the examples into lbjava-exa…
b711973
Added a readme file to the neural net directory, cleaned up stuff.
7b23b0c
Didn't update the lbjava-mvn-plugin version number, till now.
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
238 changes: 238 additions & 0 deletions
238
...examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/BrownReader.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,238 @@ | ||
| /** | ||
| * This software is released under the University of Illinois/Research and Academic Use License. See | ||
| * the LICENSE file in the root folder for details. Copyright (c) 2016 | ||
| * | ||
| * Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign | ||
| * http://cogcomp.cs.illinois.edu/ | ||
| */ | ||
| package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet; | ||
|
|
||
| import java.io.BufferedReader; | ||
| import java.io.File; | ||
| import java.io.FileNotFoundException; | ||
| import java.io.FileReader; | ||
| import java.io.IOException; | ||
| import java.util.Arrays; | ||
|
|
||
| import edu.illinois.cs.cogcomp.lbjava.parse.Parser; | ||
|
|
||
| /** | ||
| * @author redman | ||
| */ | ||
| public class BrownReader implements Parser{ | ||
|
|
||
| /** the input data. */ | ||
| float [][] inputs; | ||
|
|
||
| /** the labels. */ | ||
| float [][] outputs; | ||
|
|
||
| /** indexes the current example. */ | ||
| int index = 0; | ||
|
|
||
| /** the maximum number of input features. */ | ||
| int inputCardinality = -1; | ||
|
|
||
| /** the maximum integer classification. */ | ||
| int outputCardinality = 1; | ||
|
|
||
| /** | ||
| * read input data from the input file, the output data from the out file. | ||
| * @param infile the input data. | ||
| * @param outfile the output data. | ||
| * @throws IOException | ||
| */ | ||
| public BrownReader (String infile) { | ||
| try { | ||
| inputs = getExampleInputs(infile); | ||
| this.inputCardinality = inputs[0].length; | ||
| outputs = getExampleOutputs(infile); | ||
| if (inputs.length != outputs.length) | ||
| throw new RuntimeException("Need the same number of inputs and outputs."); | ||
| } catch (IOException e) { | ||
| throw new RuntimeException("Could not read example data.",e); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * read input data from the input file, the output data from the out file. | ||
| * @param infile the input data. | ||
| * @param trainingInputs the previously read training inputs. | ||
| * @throws IOException | ||
| */ | ||
| public BrownReader (String infile, int numberInputFeatures, int numberExamples) { | ||
| try { | ||
| this.inputCardinality = numberInputFeatures; | ||
| inputs = getExampleInputs(infile, numberInputFeatures); | ||
| outputs = getExampleOutputs(infile, inputs.length, numberExamples); | ||
| if (inputs.length != outputs.length) | ||
| throw new RuntimeException("Need the same number of inputs and outputs."); | ||
| } catch (IOException e) { | ||
| throw new RuntimeException("Could not read example data.",e); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public void close() { | ||
| index = 0; | ||
| } | ||
|
|
||
| @Override | ||
| public Object next() { | ||
| NeuralNetExample nne = null; | ||
| if (index < inputs.length) { | ||
| nne = new NeuralNetExample(inputs[index], outputs[index]); | ||
| index++; | ||
| } | ||
| return nne; | ||
| } | ||
|
|
||
| @Override | ||
| public void reset() { | ||
| index = 0; | ||
| } | ||
|
|
||
| /** | ||
| * get the examples form an NIST dataset, return everything at once. There are | ||
| * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = | ||
| * 47 million floats. These are input examples, so they are image data. | ||
| * @param filename | ||
| * @return the input examples. | ||
| * @throws IOException | ||
| */ | ||
| private float[][] getExampleInputs(String filename) throws IOException { | ||
| int count = 0; | ||
| try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { | ||
| String line; | ||
| while ((line=br.readLine()) != null) { | ||
| count++; | ||
| String[] splits = line.split("[,:]"); | ||
| for (int i = 1; i < splits.length; i++) { | ||
| int featureindex = Integer.parseInt(splits[i]); | ||
| if (featureindex > this.inputCardinality) | ||
| this.inputCardinality = featureindex; | ||
| } | ||
| } | ||
| } | ||
| float[][] data = new float[count][++this.inputCardinality]; | ||
| for (float[] a : data) | ||
| Arrays.fill(a, 0.0f); | ||
|
|
||
| try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { | ||
| String line; | ||
| count = 0; | ||
| while ((line=br.readLine()) != null) { | ||
| String[] splits = line.split("[,:]"); | ||
| for (int i = 0; i < splits.length; i++) { | ||
| int featureindex = Integer.parseInt(splits[i]); | ||
| data[count][featureindex] = 1.0f; | ||
| } | ||
| count++; | ||
| } | ||
| } | ||
| return data; | ||
| } | ||
|
|
||
| /** | ||
| * scale the range of input feature vector to the provided example set, of data to train on. | ||
| * @param string | ||
| * @param examples | ||
| * @return the testing input deck. | ||
| * @throws IOException | ||
| * @throws FileNotFoundException | ||
| */ | ||
| private float[][] getExampleInputs(String filename, int cardinality) throws FileNotFoundException, IOException { | ||
| int count = 0; | ||
| try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { | ||
| String line; | ||
| while ((line=br.readLine()) != null) { | ||
| count++; | ||
| String[] splits = line.split("[,:]"); | ||
| for (int i = 1; i < splits.length; i++) { | ||
| int featureindex = Integer.parseInt(splits[i]); | ||
| if (featureindex > this.inputCardinality) | ||
| this.inputCardinality = featureindex; | ||
| } | ||
| } | ||
| } | ||
| float[][] data = new float[count][cardinality]; | ||
| for (float[] a : data) | ||
| Arrays.fill(a, 0.0f); | ||
| try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { | ||
| String line; | ||
| count = 0; | ||
| while ((line=br.readLine()) != null) { | ||
| String[] splits = line.split("[,:]"); | ||
| for (int i = 0; i < splits.length; i++) { | ||
| int featureindex = Integer.parseInt(splits[i]); | ||
| data[count][featureindex] = 1.0f; | ||
| } | ||
| count++; | ||
| } | ||
| } | ||
| return data; | ||
| } | ||
|
|
||
| /** | ||
| * get the examples form an NIST dataset, return everything at once. There are | ||
| * 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats = | ||
| * 47 million floats. These are input examples, so they are image data. | ||
| * @param filename | ||
| * @return the input examples. | ||
| * @throws IOException | ||
| */ | ||
| private float[][] getExampleOutputs(String filename) throws IOException { | ||
| int count = 0; | ||
| this.outputCardinality = -1; | ||
| try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { | ||
| String line = null; | ||
| while ((line=br.readLine()) != null) { | ||
| count++; | ||
| String[] splits = line.split("[,:]"); | ||
| int label = Integer.parseInt(splits[0]); | ||
| if (label > this.outputCardinality) | ||
| this.outputCardinality = label; | ||
| } | ||
| } | ||
| float[][] data = new float[count][1]; | ||
| for (float[] a : data) | ||
| Arrays.fill(a, 0.0f); | ||
| try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { | ||
| String line; | ||
| count = 0; | ||
| float range = this.outputCardinality; | ||
| while ((line=br.readLine()) != null) { | ||
| String[] splits = line.split("[,:]"); | ||
| int featureindex = Integer.parseInt(splits[0]); | ||
| data[count][0] = featureindex/range; | ||
| count++; | ||
| } | ||
| } | ||
| return data; | ||
| } | ||
|
|
||
| /** | ||
| * get the example outputs. | ||
| * @param filename file with the values. | ||
| * @param outputs the training examples. | ||
| * @return the testing examples. | ||
| * @throws FileNotFoundException | ||
| * @throws IOException | ||
| */ | ||
| private float[][] getExampleOutputs(String filename, int numouts, int card) throws FileNotFoundException, IOException { | ||
| float[][] data = new float[numouts][1]; | ||
| try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) { | ||
| String line; | ||
| int count = 0; | ||
| float range = card; | ||
| while ((line=br.readLine()) != null) { | ||
| String[] splits = line.split("[,:]"); | ||
| int featureindex = Integer.parseInt(splits[0]); | ||
| // convert to a number 0 - 1, then to a number -1 to 1. | ||
| data[count][0] = featureindex/range; | ||
| count++; | ||
| } | ||
| } | ||
| return data; | ||
| } | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for being nitpicky. Would be great if you add a readme file for
lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/, explaining what is input/output, where to get the data, and what to expect after running the system.