import java.util.ArrayList;
public class CSVParser {
public enum OPstates
{
INITIAL(0),
NORMAL(1),
COMMA(2),
OPEN_QUOTATION(9),
CLOSE_QUOTATION(4),
WHITE_SPACE(5),
QUOTATION_IN_TOKEN(6),
CLOSE_QUOTATION_IN_TOKEN(8),
ERROR(-1),
NEW_LINE(200);
int i;
private OPstates(int i) {
this.i = i;
}
public int getValue()
{
return i;
}
}
int [][] state = { {1, 2, 3, 200, 5}, //0 initial state
{1, 2, -1, 200, 1}, //1 normal character state
{1, 2, 3, 200, 5}, //2 comma state
{9, 9, 4, -1, 9}, //3 open quotation
{-1, 2, 6, 200, 5}, //4 close quotation or quotation in token state
{1, 2, 3, 200, 5}, //5 whitespace state
{6, 6, 7, 200, 6}, //6 quotation in token
{-1, -1, 8, -1, -1}, //7 close quotation
{9, -1, 4, 200, 9}, //8 close quotation
{9, 9, 4, -1, 9} //9 open quotation state
};
ArrayList<String> tokens = new ArrayList<String>();
int TokenState(char token)
{
if(token == ',')
return 1;
else if(token == '"')
return 2;
else if(token == '\n')
return 3;
else if(token == ' ')
return 4;
else
return 0;
}
void Parse(String input)
{
int curState = 0;
StringBuilder sb = new StringBuilder();
for(int i = 0; i < input.length(); i++)
{
char tokenChar = input.charAt(i);
int tokenState = TokenState(tokenChar);
curState = state[curState][tokenState];
if(curState == OPstates.NORMAL.getValue() ||
curState == OPstates.OPEN_QUOTATION.getValue() ||
curState == OPstates.QUOTATION_IN_TOKEN.getValue() ||
curState == OPstates.CLOSE_QUOTATION_IN_TOKEN.getValue())
sb.append(tokenChar);
if(curState == OPstates.COMMA.getValue() ||
curState == OPstates.NEW_LINE.getValue())
{
tokens.add(sb.toString().trim());
sb.delete(0, sb.length());
if(curState == OPstates.NEW_LINE.getValue())
break;
}
if(curState == OPstates.ERROR.getValue())
throw new RuntimeException("Parse Error");
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for(int i = 0; i < tokens.size(); i++)
sb.append("Token: " + tokens.get(i) + "\n");
return sb.toString().trim() + "\n";
}
public CSVParser(String input) {
Parse(input);
}
public static void main(String[] args) {
String s = " \"a\" , \"b,cd\" , c ,\"vb, n\",\",\"\n";
System.out.println(s + new CSVParser(s));
s = " , a,b,\"c, l\",d,,5\n";
System.out.println(s + new CSVParser(s));
s = "h,\"john, jr\",a,b,\"c, l\",d,,5,\"e, jr\"\n";
System.out.println(s + new CSVParser(s));
s = "\"h, jk\",\"john, jr\",a,b,\"c, l\",d,,5,\"e's, jr\"\n";
System.out.println(s + new CSVParser(s));
s = "\" e \"\"abc\"\" n \"\"def\"\" \"\"buddy\"\"d\",\"b, jr\", c dj n\n";
System.out.println(s + new CSVParser(s));
}
}