/* * (C) Copyright IBM Corp. 2004. All rights reserved. * * US Government Users Restricted Rights Use, duplication or * disclosure restricted by GSA ADP Schedule Contract with IBM Corp. * * The program is provided "as is" without any warranty express or * implied, including the warranty of non-infringement and the implied * warranties of merchantibility and fitness for a particular purpose. * IBM will not be liable for any damages suffered by you as a result * of using the Program. In no event will IBM be liable for any * special, indirect or consequential damages or lost profits even if * IBM has been advised of the possibility of their occurrence. IBM * will not be liable for any third party claims against you. */ import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.StreamTokenizer; import java.math.BigInteger; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; public class CsvToSax extends DefaultHandler { static String tagNames[] = {"employeeNumber", "firstName", "middleInitial", "lastName", "deptNo", "extension", "dateOfBirth"}; public void scanCsvFile(String uri, DefaultHandler dh) throws java.io.FileNotFoundException, java.io.IOException, org.xml.sax.SAXException { BufferedReader br = new BufferedReader(new FileReader(uri)); StreamTokenizer st = new StreamTokenizer(br); // We want to read the file one line at a time, so end-ofline matters st.eolIsSignificant(true); // The delimiter between fields is a comma, not a space st.whitespaceChars(',', ','); // All strings are in double quotes st.quoteChar('"'); char [] lineBreak = new String("\n").toCharArray(); char [] singleIndent = new String(" ").toCharArray(); char [] doubleIndent = new String(" ").toCharArray(); dh.startDocument(); dh.startElement(null, null, "employees", null); dh.ignorableWhitespace(lineBreak, 0, lineBreak.length); // Get the first token, then check its type st.nextToken(); while (st.ttype != StreamTokenizer.TT_EOF) { // We're not at EOF, so start a row dh.ignorableWhitespace(singleIndent, 0, singleIndent.length); dh.startElement(null, null, "employee", null); dh.ignorableWhitespace(lineBreak, 0, lineBreak.length); int i = 0; while (st.ttype != StreamTokenizer.TT_EOL && st.ttype != StreamTokenizer.TT_EOF) { // We use the BigInteger class here to write long numbers out // Without this, the date fields (which are written something // like (19991013) get converted to scientific notation.... if (st.ttype == StreamTokenizer.TT_NUMBER) { char [] chars = BigInteger.valueOf((long)st.nval). toString().toCharArray(); dh.ignorableWhitespace(doubleIndent, 0, doubleIndent.length); dh.startElement(null, null, tagNames[i], null); dh.characters(chars, 0, chars.length); dh.endElement(null, null, tagNames[i]); dh.ignorableWhitespace(lineBreak, 0, lineBreak.length); } else if (st.ttype != StreamTokenizer.TT_EOL && st.ttype != StreamTokenizer.TT_EOF) { // For reasons that escape me, if the token is "+", it // is interpreted as NULL. if (st.sval != null) { char [] chars = st.sval.trim().toCharArray(); dh.ignorableWhitespace(doubleIndent, 0, doubleIndent.length); dh.startElement(null, null, tagNames[i], null); dh.characters(chars, 0, chars.length); dh.endElement(null, null, tagNames[i]); dh.ignorableWhitespace(lineBreak, 0, lineBreak.length); } } st.nextToken(); i++; } // We've hit either the end of the line or the end of the file, // so close the row. dh.ignorableWhitespace(singleIndent, 0, singleIndent.length); dh.endElement(null, null, "employee"); st.nextToken(); dh.ignorableWhitespace(lineBreak, 0, lineBreak.length); } // Now we're at the end of the file, so close the XML document, dh.endElement(null, null, "employees"); dh.ignorableWhitespace(lineBreak, 0, lineBreak.length); dh.endDocument(); } /** Start document. */ public void startDocument() { System.out.println(""); } // startDocument() /** Start element. */ public void startElement(String namespaceURI, String localName, String rawName, Attributes attrs) { System.out.print("<"); System.out.print(rawName); if (attrs != null) { int len = attrs.getLength(); for (int i = 0; i < len; i++) { System.out.print(" "); System.out.print(attrs.getQName(i)); System.out.print("=\""); System.out.print(attrs.getValue(i)); System.out.print("\""); } } System.out.print(">"); } // startElement(String,AttributeList) /** Characters. */ public void characters(char ch[], int start, int length) { System.out.print(new String(ch, start, length)); } // characters(char[],int,int); /** Ignorable whitespace. */ public void ignorableWhitespace(char ch[], int start, int length) { characters(ch, start, length); } // ignorableWhitespace(char[],int,int); /** End element. */ public void endElement(String namespaceURI, String localName, String rawName) { System.out.print(""); } // endElement(String) /** End document. */ public void endDocument() { // No need to do anything. } // endDocument() /** Processing instruction. */ public void processingInstruction(String target, String data) { System.out.print(" 0) { System.out.print(' '); System.out.print(data); } System.out.print("?>"); } // processingInstruction(String,String) // // ErrorHandler methods // /** Warning. */ public void warning(SAXParseException ex) { System.err.println("[Warning] "+ getLocationString(ex)+": "+ ex.getMessage()); } /** Error. */ public void error(SAXParseException ex) { System.err.println("[Error] "+ getLocationString(ex)+": "+ ex.getMessage()); } /** Fatal error. */ public void fatalError(SAXParseException ex) throws SAXException { System.err.println("[Fatal Error] "+ getLocationString(ex)+": "+ ex.getMessage()); throw ex; } /** Returns a string of the location. */ private String getLocationString(SAXParseException ex) { StringBuffer str = new StringBuffer(); String systemId = ex.getSystemId(); if (systemId != null) { int index = systemId.lastIndexOf('/'); if (index != -1) systemId = systemId.substring(index + 1); str.append(systemId); } str.append(':'); str.append(ex.getLineNumber()); str.append(':'); str.append(ex.getColumnNumber()); return str.toString(); } // getLocationString(SAXParseException):String /** Main program entry point. */ public static void main(String argv[]) { if (argv.length == 0 || (argv.length == 1 && argv[0].equals("-help"))) { System.out.println("\nUsage: java CsvToSax uri"); System.out.println(" where uri is the URI of your "); System.out.println(" comma-separated values document."); System.out.println(" Sample: java CsvToSax sonnet.xml"); System.out.println("\nEchoes SAX events back to the console."); System.exit(1); } CsvToSax c2s = new CsvToSax(); try { c2s.scanCsvFile(argv[0], c2s); } catch (FileNotFoundException fnfe) { System.err.println("Error - File " + argv[0] + " not found!"); } catch (SAXException se) { System.err.println("SAX Exception: " + se); } catch (IOException ioe) { System.err.println("IO Exception: " + ioe); } } // main(String[]) }