Skip to main content

By clicking Submit, you agree to the developerWorks terms of use.

The first time you sign into developerWorks, a profile is created for you. Select information in your developerWorks profile is displayed to the public, but you may edit the information at any time. Your first name, last name (unless you choose to hide them), and display name will accompany the content that you post.

All information submitted is secure.

  • Close [x]

The first time you sign in to developerWorks, a profile is created for you, so you need to choose a display name. Your display name accompanies the content you post on developerworks.

Please choose a display name between 3-31 characters. Your display name must be unique in the developerWorks community and should not be your email address for privacy reasons.

By clicking Submit, you agree to the developerWorks terms of use.

All information submitted is secure.

  • Close [x]

Parsing comma-separated values

Return to article.

//---------------------------------------------------------------------------||
// csvParser.java                                                            ||
//                                                                           ||
// This code takes a comma-separated value (CSV) file as input, and converts ||
// it to XML.  The entire document is enclosed in a <document> tag, each row ||
// is enclosed in a <row> tag, and the tag names for the individual data     ||
// items are based on the column number in the file.                         ||
//---------------------------------------------------------------------------||
// Created 21 March 2000 by Doug Tidwell, based on a question from           ||
// Tom Watson (!) of Schwab.                                                 ||
//---------------------------------------------------------------------------||

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StreamTokenizer;
import java.math.BigInteger;

public class csvParser
{
  public void csvToXML(String inputFile, String outputFile)
    throws java.io.FileNotFoundException, 
           java.io.IOException
  {
    BufferedReader br = new BufferedReader(new FileReader(inputFile));
    StreamTokenizer st = new StreamTokenizer(br);
    // We want to read the file one line at a time, so end-ofline matters
    st.eolIsSignificant(true);
    // The delimiter between fields is a comma, not a space
    st.whitespaceChars(',', ',');
    // All strings are in double quotes
    st.quoteChar('"');

    FileWriter fw = new FileWriter(outputFile);
    // Write the XML declaration and the root element
    fw.write("<?xml version=\"1.0\"?>\n");
    fw.write("<document>\n");

    // Get the first token, then check its type
    st.nextToken();
    while (st.ttype != StreamTokenizer.TT_EOF)
    {
      // We're not at EOF, so start a row
      fw.write("  <row>\n");
      int i = 1;
      while (st.ttype != StreamTokenizer.TT_EOL && 
             st.ttype != StreamTokenizer.TT_EOF)
      {
        // We use the BigInteger class here to write long numbers out
        // Without this, the date fields (which are written something 
        // like (19991013) get converted to scientific notation....
        if (st.ttype == StreamTokenizer.TT_NUMBER)
        {
          fw.write("    <column" + i + ">");
          fw.write((BigInteger.valueOf((long)st.nval)).toString());
          fw.write("</column" + i++ + ">\n");
        }
        else if (st.ttype != StreamTokenizer.TT_EOL &&
                 st.ttype != StreamTokenizer.TT_EOF)
        {
          // For reasons that escape me, if the token is "+", it
          // is interpreted as NULL.
          if (st.sval != null)
          {
            fw.write("    <column" + i + ">");
            fw.write(st.sval.trim());
            fw.write("</column" + i++ + ">\n");
          }
        }
        st.nextToken();
      }

      // We've hit either the end of the line or the end of the file,
      // so close the row.
      fw.write("  </row>\n");
      st.nextToken();
    }

    // Now we're at the end of the file, so close the XML document, 
    // flush the buffer to disk, and close the newly-created file. 
    fw.write("</document>\n");
    fw.flush();
    fw.close();
  }

  public static void main(String argv[])
    throws java.io.IOException
  {
    if (argv.length == 2)
    {
      csvParser cp = new csvParser();
      cp.csvToXML(argv[0], argv[1]);
    }
    else
    {
      System.out.println("\nUsage: java csvToXML csv-file xml-file");
      System.out.println("       where csv-file is the comma-separated file, and ");
      System.out.println("       xml-file is the XML file to be generated.");
    }
  }
}

Return to article.