// (C) Copyright IBM Corp. 2009, 2014 All Rights Reserved. // All rights reserved /** * * Notes: * * Create a new collection if necessary * Push all xml files in a directory into the collection * * ALL AUTHENTICATED OPERATIONS DONE WITH the user/password * apitestuser/testpw, which is hardwired into this app. You * should either create this user in the Watson Explorer Engine installation * that you are testing with, or update the code for this * sample application, substituting a valid user/password * combination for that Watson Explorer Engine installation. * */ import velocity.*; import velocity.objects.*; import velocity.soap.*; import velocity.types.*; import java.util.Enumeration; import java.io.File; import java.io.FileInputStream; import java.net.URL; import javax.xml.namespace.QName; import javax.xml.stream.*; public class XmlPush { static String endpoint = "http://HOSTNAME/vivisimo/cgi-bin/velocity?v.app=api-soap&"; static java.lang.Boolean verbose = false; public static void usage() { System.out.println(); System.out.println("usage: XmlPush [-v] [-e endpoint] collection filename(s)\n"); if ("http://HOSTNAME/vivisimo/cgi-bin/velocity?v.app=api-soap&".equals(endpoint)) { System.out.println("SOAP endpoint has not been changed in the source code.\nYou will need to specify an endpoint on the commandline."); } System.out.println(); } public static void main(String[] args) { String collection = ""; java.util.Vector<File> filesToPush = new java.util.Vector<File>(); /* parse commandline */ if (args.length == 0) { usage(); System.exit(1); } else { int i; for (i=0; i < args.length; i++) { if ("-v".equals(args[i])) { verbose = true; } else if ("-e".equals(args[i])) { if ( i+1 < args.length ) { i++; endpoint = args[i]; } else { usage(); System.exit(1); } } else { collection = args[i]; i++; if ( i >= args.length ) { usage(); System.exit(1); } for ( ; i < args.length; i++) { File f = new File(args[i]); if ( ! f.canRead() ) { try { System.out.println("WARNING: can't read file ["+f.getCanonicalPath()+"]. Skipping."); } catch (java.io.IOException ioe) { System.out.println("WARNING: exception caught processing filename ["+args[i]+"]. Skipping."); } } else if ( f.isDirectory() ) { try { System.out.println("WARNING: ["+f.getCanonicalPath()+"] is a directory, not a file. Skipping."); } catch (java.io.IOException ioe) { System.out.println("WARNING: exception caught processing directory ["+args[i]+"]. Skipping."); } } else { filesToPush.addElement(f); } } } } if ("".equals(collection)) { usage(); System.exit(1); } if (filesToPush.isEmpty()) { usage(); System.exit(1); } if (verbose) { System.out.println("Using files:"); for (Enumeration e = filesToPush.elements() ; e.hasMoreElements() ; ) { try { File f = (File) e.nextElement(); System.out.println("\t"+f.getCanonicalPath()); } catch (java.io.IOException ioe) { System.out.println("EXCEPTION: caught printing canonical path of input filename\n"+ioe); } } } } System.out.println("\nUsing collection=["+collection+"]"); System.out.println("Using endpoint=["+endpoint+"]"); if (verbose) System.out.print("Create service..."); VelocityService vs = null; try { vs = new VelocityService( new URL(endpoint+"wsdl=1&specialize-for=&use-types=true&"), new QName("urn:/velocity", "VelocityService")); } catch (java.net.MalformedURLException e) { System.out.println("ERROR:\n"+e); System.exit(1); } if (verbose) System.out.println(" done."); if (verbose) System.out.print("Get port from service..."); VelocityPort vp = vs.getVelocityPort(); if (verbose) System.out.println(" done."); if (verbose) System.out.print("Set port's endpoint..."); ((javax.xml.ws.BindingProvider) vp).getRequestContext().put(javax.xml.ws.BindingProvider.ENDPOINT_ADDRESS_PROPERTY, endpoint); if (verbose) System.out.println(" done."); if (verbose) System.out.print("Initialize authentication object..."); Authentication authentication = new Authentication(); authentication.setUsername("apitestuser"); authentication.setPassword("testpw"); if (verbose) System.out.println(" done."); /* The crawler should start automatically when we enqueue if it is not running, but let's get the status first to make sure the collection exists */ if (verbose) System.out.print("Initialize search collection status..."); SearchCollectionStatus scs = new SearchCollectionStatus(); scs.setAuthentication(authentication); scs.setCollection(collection); if (verbose) System.out.println(" done."); SearchCollectionStatusResponse scsr = null; VseStatus vses = null; VseIndexStatus vseis = null; CrawlerStatus cs = null; try { if (verbose) System.out.print("Request status of ["+collection+"]..."); scsr = vp.searchCollectionStatus(scs); if (verbose) System.out.println(" done."); if (scsr == null) { System.out.println("Collection ["+collection+"] exists, but has no status."); } else { vses = scsr.getVseStatus(); vseis = vses.getVseIndexStatus(); cs = vses.getCrawlerStatus(); System.out.println("This collection's crawler is ["+cs.getServiceStatus()+"]."); System.out.println("This collection's indexer is ["+vseis.getServiceStatus()+"]."); } } catch (javax.xml.ws.soap.SOAPFaultException e) { if (e.getFault().getFaultString().contains("search-collection-invalid-name") ) { System.out.println("\n\n\tWARNING: collection ["+collection+"] does not exist."); System.out.print("\tWARNING: create ["+collection+"] based on [default-push]..."); /* [default-push] is the default base collection when creating a new collection via the api */ SearchCollectionCreate scc = new SearchCollectionCreate(); scc.setAuthentication(authentication); scc.setCollection(collection); vp.searchCollectionCreate(scc); System.out.println(" done."); } else { System.out.println("\n\n\tWARNING: "+e); System.out.println("\nproceed anyway..."); } } catch (java.lang.Exception e) { System.out.println("\nERROR: \n"+e); System.exit(2); } if (verbose) System.out.print("Initialize search collection enqueue..."); SearchCollectionEnqueue sce = new SearchCollectionEnqueue(); SearchCollectionEnqueue.CrawlUrls urlsToEnqueue = new SearchCollectionEnqueue.CrawlUrls(); sce.setAuthentication(authentication); sce.setCollection(collection); sce.setCrawlUrls(urlsToEnqueue); if (verbose) System.out.println(" done."); java.util.List<CrawlUrl> crawlUrlList = urlsToEnqueue.getCrawlUrl(); XMLInputFactory xmlif = XMLInputFactory.newInstance(); for (Enumeration e = filesToPush.elements() ; e.hasMoreElements() ; ) { try { File f = (File) e.nextElement(); XMLStreamReader xmlsr = xmlif.createXMLStreamReader(new FileInputStream(f)); if (verbose) System.out.print("Build data to enqueue for ["+f.getCanonicalPath()+"]...\n"); /* all documents need a url, if not for crawling, then for later updates or deletes */ String urlString = "myproto://doc?id="+f.hashCode(); CrawlUrl crawlUrl = new CrawlUrl(); crawlUrl.setUrl(urlString); crawlUrl.setStatus("complete"); crawlUrl.setEnqueueType("reenqueued"); crawlUrlList.add(crawlUrl); java.util.List<CrawlData> crawlDataList = crawlUrl.getCrawlData(); CrawlData crawlData0 = new CrawlData(); CrawlData.Vxml vxml0 = new CrawlData.Vxml(); crawlData0.setContentType("application/vxml"); crawlData0.setVxml(vxml0); crawlDataList.add(crawlData0); java.util.List<Document> documentList = vxml0.getDocument(); Document d0 = new Document(); documentList.add(d0); java.util.List<Content> contentList = d0.getContent(); /* read in the xml file and insert data into the crawlDataList for this crawlUrl */ while(xmlsr.hasNext()) { String localname = null; if ( xmlsr.isStartElement()) { localname = xmlsr.getLocalName(); } if ( "document".equals(localname) && xmlsr.isStartElement() ) { /* we are at the start of a document */ if (verbose) System.out.println("\t"+localname); } if ( "content".equals(localname) && xmlsr.isStartElement() ) { /* we are at the start of a content */ Content c0 = new Content(); contentList.add(c0); if ( "snippet".equals(xmlsr.getAttributeValue(null, "name")) ) { String contentName = xmlsr.getAttributeValue(null, "name"); String contentAction = xmlsr.getAttributeValue(null, "action"); String contentWeight = xmlsr.getAttributeValue(null, "weight"); String text = xmlsr.getElementText(); /* create the searchable, not displayable, content */ c0.setName(contentName); if (verbose) System.out.println("\t\tcontent "+contentName); c0.setAction("index-only"); if (verbose) System.out.println("\t\t\taction index-only"); if ( contentWeight != null ) { c0.setWeight(Double.valueOf(contentWeight)); if (verbose) System.out.println("\t\t\tweight "+contentWeight); } c0.setValue(text); if (verbose) System.out.println("\t\t\t\t"+text); /* create the displayable, not searchable, content */ Content c1 = new Content(); contentList.add(c1); c1.setName(contentName); if (verbose) System.out.println("\t\tcontent "+contentName); if ( contentAction != null ) { c1.setAction(contentAction); if (verbose) System.out.println("\t\t\taction "+contentAction); } c1.setWeight( (double) -1 ); if (verbose) System.out.println("\t\t\tweight -1"); String shortText = text.substring(0, 20); c1.setValue(shortText); if (verbose) System.out.println("\t\t\t\t"+shortText); } else { /* This is not the main body of searchable text, so process normally */ c0.setName(xmlsr.getAttributeValue(null, "name")); if (verbose) System.out.println("\t\tcontent "+xmlsr.getAttributeValue(null, "name")); if ( xmlsr.getAttributeValue(null, "action") != null ) { c0.setAction(xmlsr.getAttributeValue(null, "action")); if (verbose) System.out.println("\t\t\taction "+xmlsr.getAttributeValue(null, "action")); } if ( xmlsr.getAttributeValue(null, "weight") != null ) { c0.setWeight(Double.valueOf(xmlsr.getAttributeValue(null, "weight"))); if (verbose) System.out.println("\t\t\tweight "+xmlsr.getAttributeValue(null, "weight")); } String text = xmlsr.getElementText(); c0.setValue(text); if (verbose) System.out.println("\t\t\t\t"+text); } } xmlsr.next(); } } catch (java.io.FileNotFoundException fnfe) { System.out.println("\nERROR The file disappeared?!?"); } catch (javax.xml.stream.XMLStreamException xmlse) { System.out.println("\nERROR xml stream exception"); } catch (java.io.IOException ioe) { System.out.println("\nERROR IO Exception"); } } if (verbose) System.out.print("Enqueue data..."); SearchCollectionEnqueueResponse enqresp = vp.searchCollectionEnqueue(sce); if (verbose) System.out.println(" done."); try { Thread.sleep(4000); } catch (java.lang.InterruptedException ie) {} if (verbose) System.out.print("Request status of ["+collection+"]..."); scsr = vp.searchCollectionStatus(scs); if (verbose) System.out.println(" done."); /* reuse objects already created for initial status check */ vses = scsr.getVseStatus(); vseis = vses.getVseIndexStatus(); cs = vses.getCrawlerStatus(); /* see schema documentation of vse-status, crawler-status, and vse-index-status for more detail */ System.out.println("|******** Begin status of ["+collection+"] ********|"); System.out.println("|* Crawler:"); System.out.println("|* \tversion\t\t\t\t"+cs.getVersion()); System.out.println("|* \tcrawler is\t\t\t"+cs.getServiceStatus()); System.out.println("|* \tunique URLs input\t\t"+cs.getNInput()); System.out.println("|* \tunique URLs output\t\t"+cs.getNOutput()); System.out.println("|* \tfetch or conversion errors\t"+cs.getNErrors()); System.out.println("|* \tunique URLs pending\t\t"+cs.getNPending()); System.out.println("|* \tbytes crawled\t\t\t"+cs.getNBytes()); System.out.println("|* \tbytes downloaded\t\t"+cs.getNDlBytes()); System.out.println("|* \tbytes of converted data\t\t"+cs.getConvertedSize()); System.out.println("|* Indexer:"); System.out.println("|* \tversion\t\t\t\t"+vseis.getServiceVersion()); System.out.println("|* \tindexer is\t\t\t"+vseis.getServiceStatus()); System.out.println("|* \tindexed URLs\t\t\t"+vseis.getIndexedUrls()); System.out.println("|* \tindexed documents\t\t"+vseis.getIndexedDocs()); System.out.println("|* \tvalid documents\t\t\t"+vseis.getNDocs()); System.out.println("|* \tindexed contents\t\t"+vseis.getIndexedContents()); System.out.println("|* \tindexed bytes\t\t\t"+vseis.getIndexedBytes()); System.out.println("|* \truntime (seconds)\t\t"+vseis.getRunningTime()); System.out.println("|* \terror items\t\t\t"+vseis.getErrorItems()); System.out.println("|******** End status of ["+collection+"] ********|"); System.out.println(); if (verbose) System.out.println("\nexiting"); } }