// (C) Copyright IBM Corp. 2009, 2014 All Rights Reserved.
// All rights reserved
/**
*
* Notes:
*
* Create a new collection if necessary
* Push all xml files in a directory into the collection
*
* ALL AUTHENTICATED OPERATIONS DONE WITH the user/password
* apitestuser/testpw, which is hardwired into this app. You
* should either create this user in the Watson Explorer Engine installation
* that you are testing with, or update the code for this
* sample application, substituting a valid user/password
* combination for that Watson Explorer Engine installation.
*
*/
import velocity.*;
import velocity.objects.*;
import velocity.soap.*;
import velocity.types.*;
import java.util.Enumeration;
import java.io.File;
import java.io.FileInputStream;
import java.net.URL;
import javax.xml.namespace.QName;
import javax.xml.stream.*;
public class XmlPush {
static String endpoint = "http://HOSTNAME/vivisimo/cgi-bin/velocity?v.app=api-soap&";
static java.lang.Boolean verbose = false;
public static void usage() {
System.out.println();
System.out.println("usage: XmlPush [-v] [-e endpoint] collection filename(s)\n");
if ("http://HOSTNAME/vivisimo/cgi-bin/velocity?v.app=api-soap&".equals(endpoint)) {
System.out.println("SOAP endpoint has not been changed in the source code.\nYou will need to specify an endpoint on the commandline.");
}
System.out.println();
}
public static void main(String[] args) {
String collection = "";
java.util.Vector<File> filesToPush = new java.util.Vector<File>();
/* parse commandline */
if (args.length == 0) {
usage();
System.exit(1);
} else {
int i;
for (i=0; i < args.length; i++) {
if ("-v".equals(args[i])) {
verbose = true;
} else if ("-e".equals(args[i])) {
if ( i+1 < args.length ) {
i++;
endpoint = args[i];
} else {
usage();
System.exit(1);
}
} else {
collection = args[i];
i++;
if ( i >= args.length ) {
usage();
System.exit(1);
}
for ( ; i < args.length; i++) {
File f = new File(args[i]);
if ( ! f.canRead() ) {
try {
System.out.println("WARNING: can't read file ["+f.getCanonicalPath()+"]. Skipping.");
} catch (java.io.IOException ioe) {
System.out.println("WARNING: exception caught processing filename ["+args[i]+"]. Skipping.");
}
} else if ( f.isDirectory() ) {
try {
System.out.println("WARNING: ["+f.getCanonicalPath()+"] is a directory, not a file. Skipping.");
} catch (java.io.IOException ioe) {
System.out.println("WARNING: exception caught processing directory ["+args[i]+"]. Skipping.");
}
} else {
filesToPush.addElement(f);
}
}
}
}
if ("".equals(collection)) {
usage();
System.exit(1);
}
if (filesToPush.isEmpty()) {
usage();
System.exit(1);
}
if (verbose) {
System.out.println("Using files:");
for (Enumeration e = filesToPush.elements() ; e.hasMoreElements() ; ) {
try {
File f = (File) e.nextElement();
System.out.println("\t"+f.getCanonicalPath());
} catch (java.io.IOException ioe) {
System.out.println("EXCEPTION: caught printing canonical path of input filename\n"+ioe);
}
}
}
}
System.out.println("\nUsing collection=["+collection+"]");
System.out.println("Using endpoint=["+endpoint+"]");
if (verbose) System.out.print("Create service...");
VelocityService vs = null;
try {
vs = new VelocityService( new URL(endpoint+"wsdl=1&specialize-for=&use-types=true&"),
new QName("urn:/velocity", "VelocityService"));
} catch (java.net.MalformedURLException e) {
System.out.println("ERROR:\n"+e);
System.exit(1);
}
if (verbose) System.out.println(" done.");
if (verbose) System.out.print("Get port from service...");
VelocityPort vp = vs.getVelocityPort();
if (verbose) System.out.println(" done.");
if (verbose) System.out.print("Set port's endpoint...");
((javax.xml.ws.BindingProvider) vp).getRequestContext().put(javax.xml.ws.BindingProvider.ENDPOINT_ADDRESS_PROPERTY,
endpoint);
if (verbose) System.out.println(" done.");
if (verbose) System.out.print("Initialize authentication object...");
Authentication authentication = new Authentication();
authentication.setUsername("apitestuser");
authentication.setPassword("testpw");
if (verbose) System.out.println(" done.");
/* The crawler should start automatically when we enqueue if it is not running,
but let's get the status first to make sure the collection exists
*/
if (verbose) System.out.print("Initialize search collection status...");
SearchCollectionStatus scs = new SearchCollectionStatus();
scs.setAuthentication(authentication);
scs.setCollection(collection);
if (verbose) System.out.println(" done.");
SearchCollectionStatusResponse scsr = null;
VseStatus vses = null;
VseIndexStatus vseis = null;
CrawlerStatus cs = null;
try {
if (verbose) System.out.print("Request status of ["+collection+"]...");
scsr = vp.searchCollectionStatus(scs);
if (verbose) System.out.println(" done.");
if (scsr == null) {
System.out.println("Collection ["+collection+"] exists, but has no status.");
} else {
vses = scsr.getVseStatus();
vseis = vses.getVseIndexStatus();
cs = vses.getCrawlerStatus();
System.out.println("This collection's crawler is ["+cs.getServiceStatus()+"].");
System.out.println("This collection's indexer is ["+vseis.getServiceStatus()+"].");
}
} catch (javax.xml.ws.soap.SOAPFaultException e) {
if (e.getFault().getFaultString().contains("search-collection-invalid-name") ) {
System.out.println("\n\n\tWARNING: collection ["+collection+"] does not exist.");
System.out.print("\tWARNING: create ["+collection+"] based on [default-push]...");
/* [default-push] is the default base collection when creating a new collection via the api */
SearchCollectionCreate scc = new SearchCollectionCreate();
scc.setAuthentication(authentication);
scc.setCollection(collection);
vp.searchCollectionCreate(scc);
System.out.println(" done.");
} else {
System.out.println("\n\n\tWARNING: "+e);
System.out.println("\nproceed anyway...");
}
} catch (java.lang.Exception e) {
System.out.println("\nERROR: \n"+e);
System.exit(2);
}
if (verbose) System.out.print("Initialize search collection enqueue...");
SearchCollectionEnqueue sce = new SearchCollectionEnqueue();
SearchCollectionEnqueue.CrawlUrls urlsToEnqueue =
new SearchCollectionEnqueue.CrawlUrls();
sce.setAuthentication(authentication);
sce.setCollection(collection);
sce.setCrawlUrls(urlsToEnqueue);
if (verbose) System.out.println(" done.");
java.util.List<CrawlUrl> crawlUrlList = urlsToEnqueue.getCrawlUrl();
XMLInputFactory xmlif = XMLInputFactory.newInstance();
for (Enumeration e = filesToPush.elements() ; e.hasMoreElements() ; ) {
try {
File f = (File) e.nextElement();
XMLStreamReader xmlsr = xmlif.createXMLStreamReader(new FileInputStream(f));
if (verbose) System.out.print("Build data to enqueue for ["+f.getCanonicalPath()+"]...\n");
/* all documents need a url, if not for crawling, then for later updates or deletes */
String urlString = "myproto://doc?id="+f.hashCode();
CrawlUrl crawlUrl = new CrawlUrl();
crawlUrl.setUrl(urlString);
crawlUrl.setStatus("complete");
crawlUrl.setEnqueueType("reenqueued");
crawlUrlList.add(crawlUrl);
java.util.List<CrawlData> crawlDataList = crawlUrl.getCrawlData();
CrawlData crawlData0 = new CrawlData();
CrawlData.Vxml vxml0 = new CrawlData.Vxml();
crawlData0.setContentType("application/vxml");
crawlData0.setVxml(vxml0);
crawlDataList.add(crawlData0);
java.util.List<Document> documentList = vxml0.getDocument();
Document d0 = new Document();
documentList.add(d0);
java.util.List<Content> contentList = d0.getContent();
/* read in the xml file and insert data into the crawlDataList for this crawlUrl */
while(xmlsr.hasNext()) {
String localname = null;
if ( xmlsr.isStartElement()) {
localname = xmlsr.getLocalName();
}
if ( "document".equals(localname) && xmlsr.isStartElement() ) {
/* we are at the start of a document */
if (verbose) System.out.println("\t"+localname);
}
if ( "content".equals(localname) && xmlsr.isStartElement() ) {
/* we are at the start of a content */
Content c0 = new Content();
contentList.add(c0);
if ( "snippet".equals(xmlsr.getAttributeValue(null, "name")) ) {
String contentName = xmlsr.getAttributeValue(null, "name");
String contentAction = xmlsr.getAttributeValue(null, "action");
String contentWeight = xmlsr.getAttributeValue(null, "weight");
String text = xmlsr.getElementText();
/* create the searchable, not displayable, content */
c0.setName(contentName);
if (verbose) System.out.println("\t\tcontent "+contentName);
c0.setAction("index-only");
if (verbose) System.out.println("\t\t\taction index-only");
if ( contentWeight != null ) {
c0.setWeight(Double.valueOf(contentWeight));
if (verbose) System.out.println("\t\t\tweight "+contentWeight);
}
c0.setValue(text);
if (verbose) System.out.println("\t\t\t\t"+text);
/* create the displayable, not searchable, content */
Content c1 = new Content();
contentList.add(c1);
c1.setName(contentName);
if (verbose) System.out.println("\t\tcontent "+contentName);
if ( contentAction != null ) {
c1.setAction(contentAction);
if (verbose) System.out.println("\t\t\taction "+contentAction);
}
c1.setWeight( (double) -1 );
if (verbose) System.out.println("\t\t\tweight -1");
String shortText = text.substring(0, 20);
c1.setValue(shortText);
if (verbose) System.out.println("\t\t\t\t"+shortText);
} else {
/* This is not the main body of searchable text, so process normally */
c0.setName(xmlsr.getAttributeValue(null, "name"));
if (verbose) System.out.println("\t\tcontent "+xmlsr.getAttributeValue(null, "name"));
if ( xmlsr.getAttributeValue(null, "action") != null ) {
c0.setAction(xmlsr.getAttributeValue(null, "action"));
if (verbose) System.out.println("\t\t\taction "+xmlsr.getAttributeValue(null, "action"));
}
if ( xmlsr.getAttributeValue(null, "weight") != null ) {
c0.setWeight(Double.valueOf(xmlsr.getAttributeValue(null, "weight")));
if (verbose) System.out.println("\t\t\tweight "+xmlsr.getAttributeValue(null, "weight"));
}
String text = xmlsr.getElementText();
c0.setValue(text);
if (verbose) System.out.println("\t\t\t\t"+text);
}
}
xmlsr.next();
}
} catch (java.io.FileNotFoundException fnfe) {
System.out.println("\nERROR The file disappeared?!?");
} catch (javax.xml.stream.XMLStreamException xmlse) {
System.out.println("\nERROR xml stream exception");
} catch (java.io.IOException ioe) {
System.out.println("\nERROR IO Exception");
}
}
if (verbose) System.out.print("Enqueue data...");
SearchCollectionEnqueueResponse enqresp = vp.searchCollectionEnqueue(sce);
if (verbose) System.out.println(" done.");
try {
Thread.sleep(4000);
} catch (java.lang.InterruptedException ie) {}
if (verbose) System.out.print("Request status of ["+collection+"]...");
scsr = vp.searchCollectionStatus(scs);
if (verbose) System.out.println(" done.");
/* reuse objects already created for initial status check */
vses = scsr.getVseStatus();
vseis = vses.getVseIndexStatus();
cs = vses.getCrawlerStatus();
/* see schema documentation of vse-status, crawler-status, and vse-index-status for more detail */
System.out.println("|******** Begin status of ["+collection+"] ********|");
System.out.println("|* Crawler:");
System.out.println("|* \tversion\t\t\t\t"+cs.getVersion());
System.out.println("|* \tcrawler is\t\t\t"+cs.getServiceStatus());
System.out.println("|* \tunique URLs input\t\t"+cs.getNInput());
System.out.println("|* \tunique URLs output\t\t"+cs.getNOutput());
System.out.println("|* \tfetch or conversion errors\t"+cs.getNErrors());
System.out.println("|* \tunique URLs pending\t\t"+cs.getNPending());
System.out.println("|* \tbytes crawled\t\t\t"+cs.getNBytes());
System.out.println("|* \tbytes downloaded\t\t"+cs.getNDlBytes());
System.out.println("|* \tbytes of converted data\t\t"+cs.getConvertedSize());
System.out.println("|* Indexer:");
System.out.println("|* \tversion\t\t\t\t"+vseis.getServiceVersion());
System.out.println("|* \tindexer is\t\t\t"+vseis.getServiceStatus());
System.out.println("|* \tindexed URLs\t\t\t"+vseis.getIndexedUrls());
System.out.println("|* \tindexed documents\t\t"+vseis.getIndexedDocs());
System.out.println("|* \tvalid documents\t\t\t"+vseis.getNDocs());
System.out.println("|* \tindexed contents\t\t"+vseis.getIndexedContents());
System.out.println("|* \tindexed bytes\t\t\t"+vseis.getIndexedBytes());
System.out.println("|* \truntime (seconds)\t\t"+vseis.getRunningTime());
System.out.println("|* \terror items\t\t\t"+vseis.getErrorItems());
System.out.println("|******** End status of ["+collection+"] ********|");
System.out.println();
if (verbose) System.out.println("\nexiting");
}
}