Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions lbjava-examples/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for being nitpicky. Would be great if you add a readme file for lbjava-examples/src/main/java/edu/illinois/cs/cogcomp/lbjava/examples/neuralnet/, explaining what is input/output, where to get the data, and what to expect after running the system.

<artifactId>lbjava-project</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>1.3.0</version>
<version>1.3.1</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand All @@ -27,12 +27,12 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>LBJava</artifactId>
<version>1.3.0</version>
<version>1.3.1</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>lbjava-maven-plugin</artifactId>
<version>1.3.0</version>
<version>1.3.1</version>
</dependency>
</dependencies>

Expand Down Expand Up @@ -63,7 +63,7 @@
<plugin>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>lbjava-maven-plugin</artifactId>
<version>1.3.0</version>
<version>1.3.1</version>
<configuration>
<gspFlag>${project.basedir}/src/main/java</gspFlag>
<dFlag>${project.basedir}/target/classes</dFlag>
Expand All @@ -77,6 +77,7 @@
</goals>
<configuration>
<lbjavaInputFileList>
<param>${project.basedir}/src/main/lbj/NNBrownClassifier.lbj</param>
<param>${project.basedir}/src/main/lbj/BadgesClassifier.lbj</param>
<param>${project.basedir}/src/main/lbj/SentimentClassifier.lbj</param>
<param>${project.basedir}/src/main/lbj/SetCover.lbj</param>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
/**
* This software is released under the University of Illinois/Research and Academic Use License. See
* the LICENSE file in the root folder for details. Copyright (c) 2016
*
* Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
* http://cogcomp.cs.illinois.edu/
*/
package edu.illinois.cs.cogcomp.lbjava.examples.neuralnet;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;

import edu.illinois.cs.cogcomp.lbjava.parse.Parser;

/**
* @author redman
*/
public class BrownReader implements Parser{

/** the input data. */
float [][] inputs;

/** the labels. */
float [][] outputs;

/** indexes the current example. */
int index = 0;

/** the maximum number of input features. */
int inputCardinality = -1;

/** the maximum integer classification. */
int outputCardinality = 1;

/**
* read input data from the input file, the output data from the out file.
* @param infile the input data.
* @param outfile the output data.
* @throws IOException
*/
public BrownReader (String infile) {
try {
inputs = getExampleInputs(infile);
this.inputCardinality = inputs[0].length;
outputs = getExampleOutputs(infile);
if (inputs.length != outputs.length)
throw new RuntimeException("Need the same number of inputs and outputs.");
} catch (IOException e) {
throw new RuntimeException("Could not read example data.",e);
}
}

/**
* read input data from the input file, the output data from the out file.
* @param infile the input data.
* @param trainingInputs the previously read training inputs.
* @throws IOException
*/
public BrownReader (String infile, int numberInputFeatures, int numberExamples) {
try {
this.inputCardinality = numberInputFeatures;
inputs = getExampleInputs(infile, numberInputFeatures);
outputs = getExampleOutputs(infile, inputs.length, numberExamples);
if (inputs.length != outputs.length)
throw new RuntimeException("Need the same number of inputs and outputs.");
} catch (IOException e) {
throw new RuntimeException("Could not read example data.",e);
}
}

@Override
public void close() {
index = 0;
}

@Override
public Object next() {
NeuralNetExample nne = null;
if (index < inputs.length) {
nne = new NeuralNetExample(inputs[index], outputs[index]);
index++;
}
return nne;
}

@Override
public void reset() {
index = 0;
}

/**
* get the examples form an NIST dataset, return everything at once. There are
* 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats =
* 47 million floats. These are input examples, so they are image data.
* @param filename
* @return the input examples.
* @throws IOException
*/
private float[][] getExampleInputs(String filename) throws IOException {
int count = 0;
try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
String line;
while ((line=br.readLine()) != null) {
count++;
String[] splits = line.split("[,:]");
for (int i = 1; i < splits.length; i++) {
int featureindex = Integer.parseInt(splits[i]);
if (featureindex > this.inputCardinality)
this.inputCardinality = featureindex;
}
}
}
float[][] data = new float[count][++this.inputCardinality];
for (float[] a : data)
Arrays.fill(a, 0.0f);

try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
String line;
count = 0;
while ((line=br.readLine()) != null) {
String[] splits = line.split("[,:]");
for (int i = 0; i < splits.length; i++) {
int featureindex = Integer.parseInt(splits[i]);
data[count][featureindex] = 1.0f;
}
count++;
}
}
return data;
}

/**
* scale the range of input feature vector to the provided example set, of data to train on.
* @param string
* @param examples
* @return the testing input deck.
* @throws IOException
* @throws FileNotFoundException
*/
private float[][] getExampleInputs(String filename, int cardinality) throws FileNotFoundException, IOException {
int count = 0;
try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
String line;
while ((line=br.readLine()) != null) {
count++;
String[] splits = line.split("[,:]");
for (int i = 1; i < splits.length; i++) {
int featureindex = Integer.parseInt(splits[i]);
if (featureindex > this.inputCardinality)
this.inputCardinality = featureindex;
}
}
}
float[][] data = new float[count][cardinality];
for (float[] a : data)
Arrays.fill(a, 0.0f);
try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
String line;
count = 0;
while ((line=br.readLine()) != null) {
String[] splits = line.split("[,:]");
for (int i = 0; i < splits.length; i++) {
int featureindex = Integer.parseInt(splits[i]);
data[count][featureindex] = 1.0f;
}
count++;
}
}
return data;
}

/**
* get the examples form an NIST dataset, return everything at once. There are
* 60k examples, at 28x28 pixel values per example, so 60000 x 28 x 28 floats =
* 47 million floats. These are input examples, so they are image data.
* @param filename
* @return the input examples.
* @throws IOException
*/
private float[][] getExampleOutputs(String filename) throws IOException {
int count = 0;
this.outputCardinality = -1;
try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
String line = null;
while ((line=br.readLine()) != null) {
count++;
String[] splits = line.split("[,:]");
int label = Integer.parseInt(splits[0]);
if (label > this.outputCardinality)
this.outputCardinality = label;
}
}
float[][] data = new float[count][1];
for (float[] a : data)
Arrays.fill(a, 0.0f);
try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
String line;
count = 0;
float range = this.outputCardinality;
while ((line=br.readLine()) != null) {
String[] splits = line.split("[,:]");
int featureindex = Integer.parseInt(splits[0]);
data[count][0] = featureindex/range;
count++;
}
}
return data;
}

/**
* get the example outputs.
* @param filename file with the values.
* @param outputs the training examples.
* @return the testing examples.
* @throws FileNotFoundException
* @throws IOException
*/
private float[][] getExampleOutputs(String filename, int numouts, int card) throws FileNotFoundException, IOException {
float[][] data = new float[numouts][1];
try (BufferedReader br = new BufferedReader(new FileReader(new File(filename)))) {
String line;
int count = 0;
float range = card;
while ((line=br.readLine()) != null) {
String[] splits = line.split("[,:]");
int featureindex = Integer.parseInt(splits[0]);
// convert to a number 0 - 1, then to a number -1 to 1.
data[count][0] = featureindex/range;
count++;
}
}
return data;
}
}
Loading