//---------------------------------------------------------------------------|| // csvParser.java || // || // This code takes a comma-separated value (CSV) file as input, and converts || // it to XML. The entire document is enclosed in a tag, each row || // is enclosed in a tag, and the tag names for the individual data || // items are based on the column number in the file. || //---------------------------------------------------------------------------|| // Created 21 March 2000 by Doug Tidwell, based on a question from || // Tom Watson (!) of Schwab. || //---------------------------------------------------------------------------|| import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.StreamTokenizer; import java.math.BigInteger; public class csvParser { public void csvToXML(String inputFile, String outputFile) throws java.io.FileNotFoundException, java.io.IOException { BufferedReader br = new BufferedReader(new FileReader(inputFile)); StreamTokenizer st = new StreamTokenizer(br); // We want to read the file one line at a time, so end-ofline matters st.eolIsSignificant(true); // The delimiter between fields is a comma, not a space st.whitespaceChars(',', ','); // All strings are in double quotes st.quoteChar('"'); FileWriter fw = new FileWriter(outputFile); // Write the XML declaration and the root element fw.write("\n"); fw.write("\n"); // Get the first token, then check its type st.nextToken(); while (st.ttype != StreamTokenizer.TT_EOF) { // We're not at EOF, so start a row fw.write(" \n"); int i = 1; while (st.ttype != StreamTokenizer.TT_EOL && st.ttype != StreamTokenizer.TT_EOF) { // We use the BigInteger class here to write long numbers out // Without this, the date fields (which are written something // like (19991013) get converted to scientific notation.... if (st.ttype == StreamTokenizer.TT_NUMBER) { fw.write(" "); fw.write((BigInteger.valueOf((long)st.nval)).toString()); fw.write("\n"); } else if (st.ttype != StreamTokenizer.TT_EOL && st.ttype != StreamTokenizer.TT_EOF) { // For reasons that escape me, if the token is "+", it // is interpreted as NULL. if (st.sval != null) { fw.write(" "); fw.write(st.sval.trim()); fw.write("\n"); } } st.nextToken(); } // We've hit either the end of the line or the end of the file, // so close the row. fw.write(" \n"); st.nextToken(); } // Now we're at the end of the file, so close the XML document, // flush the buffer to disk, and close the newly-created file. fw.write("\n"); fw.flush(); fw.close(); } public static void main(String argv[]) throws java.io.IOException { if (argv.length == 2) { csvParser cp = new csvParser(); cp.csvToXML(argv[0], argv[1]); } else { System.out.println("\nUsage: java csvToXML csv-file xml-file"); System.out.println(" where csv-file is the comma-separated file, and "); System.out.println(" xml-file is the XML file to be generated."); } } }