Skip to main content

By clicking Submit, you agree to the developerWorks terms of use.

The first time you sign into developerWorks, a profile is created for you. Select information in your profile (name, country/region, and company) is displayed to the public and will accompany any content you post. You may update your IBM account at any time.

All information submitted is secure.

  • Close [x]

The first time you sign in to developerWorks, a profile is created for you, so you need to choose a display name. Your display name accompanies the content you post on developerworks.

Please choose a display name between 3-31 characters. Your display name must be unique in the developerWorks community and should not be your email address for privacy reasons.

By clicking Submit, you agree to the developerWorks terms of use.

All information submitted is secure.

  • Close [x]

Compiling XPaths

HC kicks off with a first implementation of DFA construction

Return to article.


Listing 3: DFAFactory.java
package org.ananas.hc.compiler;

import java.util.Set;
import java.util.Vector;
import java.util.HashSet;
import java.util.Iterator;
import org.ananas.hc.QName;
import java.util.Enumeration;

public class DFAFactory
{
   public QName[] uniqueSymbols(XPathNode root)
   {
      Set qnames = new HashSet();
      uniqueSymbolsHelper(root,qnames);
      QName[] array = new QName[qnames.size()];
      return (QName[])qnames.toArray(array);
   }

   public void uniqueSymbolsHelper(XPathNode node,Set qnames)
   {
      if(node == null)
         return;
      QName qname = node.toQName();
      if(qname != null)
         qnames.add(qname);
      uniqueSymbolsHelper(node.getLeft(),qnames);
      uniqueSymbolsHelper(node.getRight(),qnames);
   }

   public DFATable createDFA(XPathNode root)
   {
      Vector dstates = new Vector(),
             dtran = new Vector(),
             astate = new Vector();
      QName[] symbols = uniqueSymbols(root);

      // initially the only unmarked state in Dstates is is firstpos(root)
      dstates.addElement(root.firstpos());

      Enumeration statesEnumeration = dstates.elements();

      // for every s as a state in dstates
      while(statesEnumeration.hasMoreElements())
      {
         Set t = (Set)statesEnumeration.nextElement();
         int dtranOfT[] = new int[symbols.length];
         dtran.addElement(dtranOfT);
         // for each input symbol a do
         for(int i = 0;i < symbols.length;i++)
         {
            // let U be the set of positions that are in followpos(p)
            // for some position p in T,
            // such that the symbol at position p is a
            Set u = e_closure(symbols[i],t);
            // if U is not empty and is not in Dstates then
            // add U as an new state to Dstates
            if (!u.isEmpty() && !dstates.contains(u))
               dstates.addElement(u);

            // Dtran[T,a] = U;
            dtranOfT[i] = dstates.indexOf(u);
         }
         astate.addElement(getAcceptingState(t));
      }

      int[][] idtran = new int[dtran.size()][symbols.length];
      dtran.copyInto(idtran);
      Object[] oastate = new Object[astate.size()];
      astate.copyInto(oastate);
      DFATable table = new DFATable(symbols,idtran,oastate);
      return table;
   }

   private Object getAcceptingState(Set t)
   {
      Iterator iterator = t.iterator();
      Object data = null;
      int priority = Integer.MIN_VALUE;
      while(iterator.hasNext())
      {
         XPathNode n = (XPathNode)iterator.next();
         if (n.getType() == XPathNode.END_MARK &&
             priority < n.getPriority())
         {
            priority = n.getPriority();
            data = n.getData();
         }
      }
      return data;
   }

   private Set e_closure(QName a,Set t)
   {
      Set result = new HashSet();
      Iterator iterator = t.iterator();
      while(iterator.hasNext())
      {
         XPathNode n = (XPathNode)iterator.next();
         if(a.matches(n))
            result.addAll(n.followpos());
      }
      return result;
   }
}

Return to article.