CSVParser sample
This sample parser parses a reader stream and tokenizes the stream based on a comma (`,`). The stream is broken down into lines, which are then tokenized. An empty string token is returned if the parser finds two consecutive commas (`,`) or if the line starts or ends with a comma.
package com.ibm.ccd.api.samplecode.parser;
import java.io.BufferedReader;
import java.io.StringReader;
import java.util.ArrayList;
public class CSVParser
{
String oneRes;
private String line = "";
private int nbLines = 0;
private BufferedReader reader;
public CSVParser(BufferedReader reader)
{
this.reader = reader;
}
public String[] splitLine() throws Exception
{
nbLines = 0;
ArrayList<String> al = new ArrayList<String>();
line = nextLine();
if (line == null)
return null;
nbLines = 1;
int pos = 0;
while (pos < line.length())
{
pos = findNextComma(pos);
al.add(oneRes);
pos++;
}
if (line.length() > 0 && line.charAt(line.length() - 1) == ',')
{
al.add("");
}
return (String[])al.toArray(com.ibm.ccd.common.util.Const.JAVA_LANG_STRING_EMPTY_ARRAY);
}
private int findNextComma(int p) throws Exception
{
char c;
int i;
oneRes = "";
c = line.charAt(p);
// empty field
if (c == ',')
{
oneRes = "";
return p;
}
// not escape char
if (c != '"')
{
i = line.indexOf(',', p);
if (i == -1)
i = line.length();
oneRes = line.substring(p, i);
return i;
}
// start with "
p++;
StringBuffer sb = new StringBuffer(200);
while (true)
{
c = readNextChar(p);
p++;
// not a "
if (c != '"')
{
sb.append(c);
continue;
}
// ", last char -> ok
if (p == line.length())
{
oneRes = sb.toString();
return p;
}
c = readNextChar(p);
p++;
// "" -> just print one
if (c == '"')
{
sb.append('"');
continue;
}
// ", -> return
if (c == ',')
{
oneRes = sb.toString();
return p - 1;
}
throw new Exception("Unexpected token found");
}
}
private char readNextChar(int p) throws Exception
{
if (p == line.length())
{
String newLine = reader.readLine();
if (newLine == null)
throw new Exception("Error occured while parsing");
line += "\n" + newLine;
nbLines++;
}
return line.charAt(p);
}
public String nextLine()
throws Exception
{
do
{
line = reader.readLine();
if (line == null)
return null;
}
while (line.trim().equals(""));
return line;
}
public static void main (String args[]) throws Exception
{
BufferedReader reader = null;
try
{
String doc = "a,a ab,c,d a\n" +
",1 a\n" +
"1, \n" +
"a,\n" +
"1," +
"\"v \"\"a v\"";
System.out.println("String to be parsed = " + doc);
reader = new BufferedReader(new StringReader(doc));
CSVParser parser = new CSVParser(reader);
String[] res;
ArrayList<String> tokens = new ArrayList<String>();
while ((res = parser.splitLine()) != null)
{
for (int i = 0; i < res.length; i++)
{
System.out.println("Token Found ["+res[i]+"] \n");
}
}
}
catch(Exception e )
{
e.printStackTrace();
}
finally
{
reader.close();
}
}
}