import java.util.*; import java.io.*; import java.sql.Timestamp; import org.w3c.dom.*; /** * The WeatherExtractor class extracts weather information from * a web page and saves it in XML format. If a weather XML file * all ready exists, the data is merged with that file, otherwise * a new XML data file is created */ public class WeatherExtractor { // The location of the various resources and files private static final String source = "http://weather.yahoo.com/forecast/Seattle_WA_US_f.html"; private static final String xslFile = "weather.xsl"; private static final String xmlFile = "result.xml"; public static void main(String args[]) { try { // Retrieve the source document as XML Document xhtml = XMLHelper.tidyHTML(source); // Parse the XSL document Document xsl = XMLHelper.parseXMLFromFile(xslFile); // Get the current data by applying the XSL transform Document data = XMLHelper.transformXML(xhtml, xsl); // For convenience, add a timestamp to what we have done Date now = new Date(); Timestamp time = new Timestamp(now.getTime()); Element root = data.getDocumentElement(); NodeList nl = root.getElementsByTagName("WEATHER"); Element weather = (Element)nl.item(0); weather.setAttribute("Retreived",time.toString()); // Write the output file, merging if necessary File dataFile = new File(xmlFile); if (dataFile.exists()) { // If we have extracted before, merge the data and write the file Document oldData = XMLHelper.parseXMLFromFile(dataFile); XMLHelper.mergeXML(oldData.getDocumentElement(), weather, false); XMLHelper.outputXMLToFile(oldData, xmlFile); } else { // If this is our first extraction, just write the file XMLHelper.outputXMLToFile(data, xmlFile); } } catch (XMLHelperException xmlhe) { System.err.println( "There was an error in the extraction process:\n" + xmlhe.getMessage()); } } }