/**
	IBM grants you a nonexclusive copyright license to use all programming code 
	examples from which you can generate similar function tailored to your own 
	specific needs.

	All sample code is provided by IBM for illustrative purposes only.
	These examples have not been thoroughly tested under all conditions.  IBM, 
	therefore cannot guarantee or imply reliability, serviceability, or function of 
	these programs.

	All Programs or code component contained herein are provided to you AS IS  
	without any warranties of any kind.
	The implied warranties of non-infringement, merchantability and fitness for a 
	particular purpose are expressly disclaimed.

	 Copyright IBM Corporation 2008, ALL RIGHTS RESERVED.
 */

package sampdita;

import java.io.*;
import java.util.*;
import java.net.URL;
import javax.xml.parsers.*;
import java.net.MalformedURLException;
import javax.xml.xpath.*;
import org.w3c.dom.*;


/**
 * This class parses DITA xml documents using a DOM parser
 * and maps xml values to document properties.
 * 
 */
public class DITAParser 
{
	private File ditacomponent = null;
	private XPathFactory  xpathFactory = null;
	private XPath xPath = null;
	private String componentP8DitaClass = null;
	private String ditaId = null;
	private String ditaTitle = null;
	private Map fileRelationshipMap = null;
	private Map propertiesMap = null;
	private Node rootNode = null;
	private Document domDoc = null;

	// From org.apache.xerces.impl.Constants
	public static final String XERCES_FEATURE_PREFIX = "http://apache.org/xml/features/";
	public static final String LOAD_EXTERNAL_DTD_FEATURE = "nonvalidating/load-external-dtd";  
	
	// P8 Document classes
	private static final String DITAMAP_CLASS = "DitaMap"; 
	private static final String DITABOOKMAP_CLASS = "DitaBookMap"; 
	private static final String DITACONCEPT_CLASS = "DitaConcept"; 
	private static final String DITAREFERENCE_CLASS = "DitaReference"; 
	private static final String DITATASK_CLASS = "DitaTask"; 
	private static final String DITATOPIC_CLASS = "DitaTopic"; 
	private static final String DITABASE_CLASS = "DitaBase"; 
	
	private static final String DITABASEREF_CLASS = "DitaBaseref"; 
	private static final String DITATOPICREF_CLASS = "DitaTopicref"; 
	private static final String DITAMAPREF_CLASS = "DitaMapref"; 
	private static final String DITAIMAGEREF_CLASS = "DitaImageref"; 
	private static final String DITACONTENTREF_CLASS = "DitaContentref"; 

	private static Map ditatypeMap = null;
	private static Map hrefMap = null;
	private static Map basicLinkTypeMap = null;
	private static Map xrefLinkTypeMap = null;
	private static Map inheritAttributeParents = null;
	
	static 
	{
		// initialized document and property mappings
		
		ditatypeMap = new HashMap();
		ditatypeMap.put("map", DITAMAP_CLASS);
		ditatypeMap.put("bookmap", DITABOOKMAP_CLASS);
		ditatypeMap.put("concept", DITACONCEPT_CLASS);
		ditatypeMap.put("reference", DITAREFERENCE_CLASS);
		ditatypeMap.put("task", DITATASK_CLASS);
		ditatypeMap.put("topic", DITATOPIC_CLASS);
		ditatypeMap.put("dita", DITABASE_CLASS);
		
		hrefMap = new HashMap();
		hrefMap.put("topic", "DitaTopicref");
		hrefMap.put("xref",  "DitaXref");
		hrefMap.put("image", "DitaImageref");
		hrefMap.put("conref", "DitaContentref");
			
		basicLinkTypeMap = new HashMap();
		basicLinkTypeMap.put("concept", "concept");
		basicLinkTypeMap.put("task", "task");
		basicLinkTypeMap.put("reference", "reference");
		basicLinkTypeMap.put("topic", "topic");
		
		xrefLinkTypeMap = new HashMap();
		xrefLinkTypeMap.put("fig", "fig");
		xrefLinkTypeMap.put("table", "table");
		xrefLinkTypeMap.put("li", "li");
		xrefLinkTypeMap.put("fn", "fn");
		xrefLinkTypeMap.put("section", "section");

		inheritAttributeParents = new HashMap();
		
		inheritAttributeParents.put("linklist", "linklist");
		inheritAttributeParents.put("linkpool", "linkpool");
	}
	
	// disable this constructor
	protected DITAParser() {}
	

	/**
	 * Parse the given ditamap files
	 * 
	 * @param ditacomponent
	 */
	public DITAParser(File ditacomponent)
	{
		if(!ditacomponent.exists())
		{
			throw new RuntimeException("DITA component does not exist: " + ditacomponent.getAbsolutePath());
		}
		
		this.ditacomponent = ditacomponent;
		
		try
		{
			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
			factory.setNamespaceAware(true); // never forget this!
			factory.setValidating(false); // never forget this!

			// disable feature where external DTD is loaded for non-validating parser
			String feature = XERCES_FEATURE_PREFIX + LOAD_EXTERNAL_DTD_FEATURE;
			factory.setAttribute(feature, new Boolean(false));
			
			// create the DOM parse and XPath instance
			DocumentBuilder builder = factory.newDocumentBuilder();
			domDoc = builder.parse(ditacomponent);
			xpathFactory = XPathFactory.newInstance();
			xPath = xpathFactory.newXPath();		
		}
		catch(Throwable t)
		{
			t.printStackTrace();
			throw new RuntimeException(t.getLocalizedMessage());
		}
	}
	
	/**
	 * Return P8 document class name for ditacomponent
	 * Use this document class when creating the ditacomponent in P8
	 * 
	 */
	public String getComponentP8DitaClass()
	{
		if(componentP8DitaClass == null)
		{
			try
			{				
				String rootElement = domDoc.getDocumentElement().getNodeName();
				
				if(ditatypeMap.containsKey(rootElement))
				{				
					componentP8DitaClass = (String) ditatypeMap.get(rootElement);
					rootNode = domDoc.getDocumentElement();
				}
				else
				{
					throw new RuntimeException("Error: unsupported DITA class: " + rootElement);
				}
			}
			catch(Throwable t)
			{
				throw new RuntimeException(t.getLocalizedMessage());
			}
		}
		
		return componentP8DitaClass;
	}
	
	/**
	 * Returns mapping of relationships to other file found in ditacomponent
	 * The Map will have the following key:value pairs:
	 *
	 *      Key:   String value that specifies filesystem path to related file
	 *      Value: String value that specifies the P8 Component Relationhip class name 
	 *             that should be used to create the relationship in P8
	 * 
	 */
	public Map getComponentRelationships()
	{
		if(fileRelationshipMap == null)
		{
			try
			{
				getComponentP8DitaClass();
				
				fileRelationshipMap = new HashMap();	

				findRelationships(fileRelationshipMap, "//*[@href]", DITATOPICREF_CLASS);
					
				findRelationships(fileRelationshipMap, "//@conref", DITACONTENTREF_CLASS);
  		    }
			catch(Throwable t)
			{
				throw new RuntimeException(t.getLocalizedMessage());
			}

		}
		return fileRelationshipMap;
	}


	/**
	 * Return the value of the attribute node or give an exception
	 * if it is null and required.
	 * 
	 * @param attribMap
	 * @param name
	 * @param required
	 * @return
	 */
	private String getNodeValue(NamedNodeMap attribMap, String name, boolean required)
	{
		Node n = attribMap.getNamedItem(name);
		String value = null;
		if(n == null )
		{
			if(required)
			{
				throw new RuntimeException("Required value not found: " + name);
			}
		}
		else
		{
			value = n.getNodeValue();
		}
		return value;
	}
	
	/**
	 * Get the node value for the query string
	 * and cause and exception if it is null and required.
	 * 
	 * @param queryStr
	 * @param required
	 * @return
	 */
	private String getNodeValue(String queryStr, boolean required)
	{
		String value = null;
		
		try
		{
			NodeList nodes = (NodeList)xPath.evaluate(queryStr,
					                                  domDoc,
													  XPathConstants.NODESET );
						
			int count = nodes.getLength();
			
			if(count == 1)
			{
				Node n = nodes.item(0);
		
				String name = n.getNodeName();
				
				value = n.getNodeValue();
			}
			else if(count > 1)
			{
				throw new RuntimeException("two manu value found: " + queryStr + ", count = " + Integer.toString(count));

			}
		}
		catch(Throwable t)
		{
			throw new RuntimeException(t.getLocalizedMessage());
		}
		
		if(value == null && required)
		{
			throw new RuntimeException("required value not found: " + queryStr);
		}
		return value;
	}

	/**
	 * Get a list of nodes based on the queryStr. Cause an
	 * exception if it is null and required.
	 * 
	 * @param queryStr
	 * @param required
	 * @return
	 */
	private List getNodeMultiValue(String queryStr, boolean required)
	{
		List list = new ArrayList();
		
		try
		{
			NodeList nodes = (NodeList)xPath.evaluate(queryStr,
					                                  domDoc,
													  XPathConstants.NODESET );
						
			int count = nodes.getLength();
			
			for(int ix = 0; ix < count; ix++)
			{
				Node n = nodes.item(ix);
		
				String name = n.getNodeName();
				String val  = n.getNodeValue();
				
				if(val != null && val.length() > 0)
				{
					list.add(val);
				}
			}
		}
		catch(Throwable t)
		{
			throw new RuntimeException(t.getLocalizedMessage());
		}
		
		if(list.size() == 0 && required)
		{
			throw new RuntimeException("required value not found: " + queryStr);
		}

		return list;
	}


	/**
	 * Add the name value pair to the property map.
	 * 
	 * @param map
	 * @param name
	 * @param value
	 */
	private void addProperty(Map map, String name, Object value)
	{
		if(value != null)
		{
			if(value.getClass().getName().equalsIgnoreCase("String"))
			{
				String val = (String) value;
				if(val.length() == 0)
				{
					return;
				}
			}
			map.put(name, value);
		}
	}
	
	/**
	 * Find the relationships for the queryString values
	 * in xPath. Add those relationships to the relationshipMap
	 * 
	 * @param relationshipMap
	 * @param queryStr
	 * @param defaultRelType
	 */
	private void findRelationships(Map relationshipMap, String queryStr, String defaultRelType)
	{
		try
		{
			NodeList nodes = (NodeList)xPath.evaluate(queryStr,
					                                  domDoc,
					                                  XPathConstants.NODESET );
			
			int count = nodes.getLength();
			
			for(int ix = 0; ix < count; ix++)
			{
				Node n = nodes.item(ix);
				
				String filePath = null;
				String relType = defaultRelType;
				boolean externalNode = false;
				
				if(defaultRelType.equalsIgnoreCase(DITACONTENTREF_CLASS))
				{
					// "//@conref"
					filePath = n.getNodeValue();
				}
				else
				{
					// "//@href"
					
					relType = DITATOPICREF_CLASS;
					
					Node parentNode = n;
					
					if(parentNode != null)
					{
						Map values = extractNodeValues(parentNode);
						String parentName = (String) values.get("parentName");
						
						//System.out.println("Parent node = " + parentName);
						
						if(hrefMap.get(parentName) != null)
						{
							relType = (String) hrefMap.get(parentName); 
							//System.out.println("Found mapped href: " + parentName);
						}
						
						filePath = (String) values.get("href");
							
						// if "href" not present in topicref (legal!) ignore it
						if(filePath != null)
						{
							String format = (String) values.get("format");
								
							if(format!= null)
							{
								if(format.equalsIgnoreCase("ditamap"))
								{
									relType = DITAMAPREF_CLASS;
								}
								else if(!format.equalsIgnoreCase("dita"))
								{
									externalNode = !isFilesystemBased(filePath);
									relType = DITABASEREF_CLASS;
								}
							}
							else
							{
								String type = (String) values.get("type");
									
								if(type != null)
								{
									String linkType = (String) basicLinkTypeMap.get(type);
									if(linkType == null)
									{
										if(parentName.equalsIgnoreCase("xref"))
										{
											linkType = (String) xrefLinkTypeMap.get(type);
										}
										
										if(linkType == null)
										{
											externalNode = !isFilesystemBased(filePath);
											// the "type" attribute references a non-standard type
											// modify relationship type to basic non-DITA
											relType = DITABASEREF_CLASS;
										}
									}
								}
								else if (parentName.equalsIgnoreCase("link"))
								{
									externalNode = isLinkExternal(parentNode);
								}
							}
							String scope = (String) values.get("scope");
							if(scope != null)
							{
								if(scope.equalsIgnoreCase("external"))
								{
									// "scope=external" overrides format=dita
									externalNode = !isFilesystemBased(filePath);
									relType = DITABASEREF_CLASS;
								}
							}
						}
						else
						{
							filePath = null;
						}
					}
				}
				if(!externalNode && filePath != null)
				{
					int index = filePath.indexOf("#");
					
					// index == 0, conref to within current file so ignore it
					if(index != 0)
					{
						if(index != -1)
						{
							relType = DITACONTENTREF_CLASS;
							filePath = filePath.substring(0, index);
						}
						
						//System.out.println("relationship type = " + relType + ", file = " + filePath);
						relationshipMap.put(filePath, relType);
					}
				}
				
			}
		}
		catch(Throwable t)
		{
			throw new RuntimeException(t.getLocalizedMessage());
		}
	}		

	/**
	 * Checks to see if the given node is external to the parent.
	 * 
	 * @param parentNode
	 * @return
	 */
	private boolean isLinkExternal(Node parentNode)
	{
		boolean external = false;
		
		Node grandparent = parentNode.getParentNode();
		
		if(grandparent != null)
		{
			String name = grandparent.getNodeName();
			
			if(name != null && inheritAttributeParents.containsKey(name))
			{
				Map values = extractNodeValues(grandparent);
				
				String format = (String) values.get("format");
				
				if(format != null && !format.equalsIgnoreCase("dita"))
				{
					external = true;
				}
				
				String scope = (String) values.get("scope");
				
				if(scope != null && scope.equalsIgnoreCase("external"))
				{
					external = true;
				}
			}
		}
		
		return external;
	}
	
	/**
	 * Return the values for node n as a Map of value pairs.
	 * 
	 * @param n
	 * @return
	 */
	private Map extractNodeValues(Node n)
	{
		Map values = new HashMap();
		
		NamedNodeMap nMap = n.getAttributes();

		values.put("parentName", n.getNodeName());			

		if(nMap != null)
		{
			Node nFilePath  = nMap.getNamedItem("href");
			String filePath = nFilePath != null ? nFilePath.getNodeValue() : null;

			Node nFormat  = nMap.getNamedItem("format");
			String format = nFormat != null ? nFormat.getNodeValue() : null;

			Node nScope  = nMap.getNamedItem("scope");
			String scope = nScope != null ? nScope.getNodeValue() : null;

			Node nType  = nMap.getNamedItem("type");
			String type = nType != null ? nType.getNodeValue() : null;

			values.put("href", filePath);
			values.put("format", format);
			values.put("scope", scope);
			values.put("type", type);
		}
		else
		{
			String data = n.getTextContent();
			
			if(data != null)
			{
				boolean pairFound = true;
				data = data.trim();
				System.out.println(data);
				StringTokenizer st = new StringTokenizer(data, " =");
				while(st.hasMoreTokens())
				{
					pairFound = false;
					String key = st.nextToken().trim();
					while(st.hasMoreTokens())
					{
						String val = st.nextToken().trim();
						
						if(val != null && val.length() > 0)
						{
							values.put(key, val);
							pairFound = true;
							break;
						}
					}
				}
				if(!pairFound)
				{
					throw new RuntimeException();
				}
			}
		}
		
		return values;
	}


	/**
	 * Check to see if the given path is file system or
	 * inet based.
	 * 
	 * @param path
	 * @return
	 */
	private boolean isFilesystemBased(String path)
	{
		boolean isFileBased = false;
		
		if(path != null && path.length() > 0)
		{
			try
			{
				URL url = new URL(path);
			}
			catch(MalformedURLException e)
			{
				if(path.indexOf(":") <= 0)
				{
					// crude but appears to work
					// If ":" not in path, UNIX based
					// If ":" present but index > 0, then URL with unknown protocol like news://...
					// NOTE: This works because only hrefs with a unknown format get here, 
					//       so no relative paths to file system do get here.
					isFileBased = true;
				}
			}
		}
		return isFileBased;
	}
	
}
