document

One document to be clustered (input) or that has been clustered (in tree in the output) or a duplicate (in documents in the output)

Attributes

Children

Examples

Input Example:

  <document url="url">
  <content name="title" type="html" action="cluster-bold" weight="3">
  <_cdata_>
  ... the document title would appear here ....
  </_cdata_>
  </content>
  <content name="snippet" type="html" action="cluster" output-action="bold">
  <_cdata_>
  ... the document summary would appear here ....
  </_cdata_>
  </content>
  </document>

Output Example:

  <tree>
  <node id="N0" level="0" sep="0.0000" cohesion="0.0000" ndocs="1" score="0.000000" instances="Ndoc0">
  <document url="url" id="Ndoc0">
  <content name="title" type="html" action="discard" weight="3.000000">
  ... the document title would appear here ....
  </content>
  <content name="title" type="html" action="cluster" weight="3.000000" output-action="bold">
  <_cdata_>
  ... the document title would appear here ....
  </_cdata_>
  </content>
  <content name="snippet" type="html" action="discard" weight="1.000000">
  ... the document summary would appear here ....
  </content>
  <content name="snippet" type="html" action="cluster" output-action="bold" weight="1.000000">
  <_cdata_>
  ... the document summary would appear here ....
  </_cdata_>
  </content>
  </document>
  </node>
  </tree>

Input Example:

  <vce>
  <meta query="companies"/>
  <document url="http://vivisimo.com/">
  <content name="title" type="html" action="cluster-bold" weight="3">
  <_cdata_>
  Vivisimo
  </_cdata_>
  </content>
  <content name="snippet" type="html" action="cluster-bold">
  <_cdata_>
  Groups the results by topic via document clustering technology.
  Options include Web or news search, selection of sources, language
  restriction, and filtering.
  </_cdata_>
  </content>
  </document>
  <document url="http://sportsillustrated.cnn.com/hockey/">
  <content name="title" type="html" action="cluster-bold" weight="3">
  <_cdata_>
  CNN/SI: Hockey
  </_cdata_>
  </content>
  <content name="snippet" type="html" action="cluster-bold">
  <_cdata_>
  Daily news, scores, feature stories, statistics, standings, player
  profiles, polls, and chat.
  </_cdata_>
  </content>
  </document>
  </vce>

Output Example:

  <meta query="companies"/>
  <tree>
  <node id="N2" level="0" sep="0.0000" cohesion="0.0000" ndocs="2" score="0.000000" instances="Ndoc0 Ndoc1">
  <node id="N0" level="1" sep="0.0000" cohesion="0.0000" ndocs="1" score="0.000000" instances="Ndoc0">
  <descriptor string="Vivisimo" sep="1.000000" ratio="1.000000"/>
  <descriptor string="Sources" sep="0.577350" ratio="1.000000"/>
  <document url="http://vivisimo.com/" id="Ndoc0">
  <content name="title" type="html" action="discard" weight="3.000000">
  <span class=b1>Vivisimo</span>
  </content>
  <content name="title" type="html" action="cluster" weight="3.000000" output-action="bold">
  <_cdata_>
  Vivisimo
  </_cdata_>
  </content>
  <content name="snippet" type="html" action="discard" weight="1.000000">
  Groups the results by topic via document clustering technology.
  Options include Web or news search, selection of <span class=b1>sources</span>, language
  restriction, and filtering.
  </content>
  <content name="snippet" type="html" action="cluster" output-action="bold" weight="1.000000">
  <_cdata_>
  Groups the results by topic via document clustering technology.
  Options include Web or news search, selection of sources, language
  restriction, and filtering.
  </_cdata_>
  </content>
  </document>
  </node>
  <node id="N1" level="1" sep="0.0000" cohesion="0.0000" ndocs="1" score="0.000000" instances="Ndoc1">
  <descriptor string="Hockey" sep="1.000000" ratio="1.000000"/>
  <descriptor string="Statistics" sep="0.577350" ratio="1.000000"/>
  <document url="http://sportsillustrated.cnn.com/hockey/" id="Ndoc1">
  <content name="title" type="html" action="discard" weight="3.000000">
  CNN/SI: <span class=b1>Hockey</span>
  </content>
  <content name="title" type="html" action="cluster" weight="3.000000" output-action="bold">
  <_cdata_>
  CNN/SI: Hockey
  </_cdata_>
  </content>
  <content name="snippet" type="html" action="discard" weight="1.000000">
  Daily news, scores, feature stories, <span class=b1>statistics</span>, standings, player
  profiles, polls, and chat.
  </content>
  <content name="snippet" type="html" action="cluster" output-action="bold" weight="1.000000">
  <_cdata_>
  Daily news, scores, feature stories, statistics, standings, player
  profiles, polls, and chat.
  </_cdata_>
  </content>
  </document>
  </node>
  </node>
  </tree>