Skip to main content
Jump to: navigation, search

SMILA/Documentation/LuceneIndexPipelet

< SMILA‎ | Documentation
Revision as of 07:15, 12 August 2008 by Juergen.schumacher.empolis.com (Talk | contribs) (New page: == Bundle: <tt>org.eclipse.eilf.lucene.LuceneIndexService</tt> == === Description === This ProcessingService is used to index SMILA records in a Lucene document index. It supports adding,...)

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Bundle: org.eclipse.eilf.lucene.LuceneIndexService

Description

This ProcessingService is used to index SMILA records in a Lucene document index. It supports adding, updating and deleting of records.

Configuration

Annotations

The LuceneIndexService uses the Annotation org.eclipse.eilf.lucene.LuceneIndexService on records to decide how to handle a record. It supports the following required values.

Name Value Description
indexName a String the name of the index to work on
executionMode ADD or DELETE ADD - add or update the record, DELETE - delete the record from the index

Configuration files

  • configuration/com.brox.anyfinder.datadictionary/DataDictionary.xml

Here the Lucene index structure and the search template are configured. It is possible to define more than one index here. The index to work on is set by the Annotation "indexName". The defined "FieldNo" are referenced in LuceneAttributeMapping.xml and LuceneAttachmentMapping.xml. For more information about configuration of DataDictionary.xml see Anyfinder documentation.


  • configuration/org.eclipse.eilf.lucene/LuceneAttributeMapping.xml

In this property file record attribute names are mapped to Lucene "FieldNo" defined in DataDictionary.xml. So the name of each property is the attribute name in the record and the value is the "FieldNo".

Property Type Description
attribute name Integer the FieldNo
  • configuration/org.eclipse.eilf.lucene/LuceneAttachmentMapping.xml

This property file is identical to LuceneAttributeMapping.xml, with the exception that here attachment names are mapped instead of attribute names.

Property Type Description
attachment name Integer the FieldNo


Example

The following example was used in the SMILA example application to index records delivered by Filesystem- and WebCrawler.

addpipeline.bpel

...
<extensionActivity name="invokeLuceneService">
    <proc:invokeService>
        <proc:service name="LuceneIndexService" />
        <proc:variables input="request" output="request" />
        <proc:setAnnotations>
            <rec:An n="org.eclipse.eilf.lucene.LuceneIndexService">
                <rec:V n="indexName">test_index</rec:V>
                <rec:V n="executionMode">ADD</rec:V>
            </rec:An>
        </proc:setAnnotations>
    </proc:invokeService>
</extensionActivity>
...

DataDictionary.xml

<?xml version="1.0" encoding="UTF-8"?>
<AnyFinderDataDictionary xmlns="http://www.anyfinder.de/DataDictionary" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://www.anyfinder.de/DataDictionary ../xml/AnyFinderDataDictionary.xsd">
  <Index Name="test_index">
    <Connection xmlns="http://www.anyfinder.de/DataDictionary/Connection" MaxConnections="5"/>
    <IndexStructure xmlns="http://www.anyfinder.de/IndexStructure" Name="test_index">
      <Analyzer ClassName="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
      <IndexField FieldNo="5" IndexValue="true" Name="Title" StoreText="true" Tokenize="true" Type="Text"/>
      <IndexField FieldNo="4" IndexValue="true" Name="Url" StoreText="true" Tokenize="false" Type="Text">
        <Analyzer ClassName="org.apache.lucene.analysis.WhitespaceAnalyzer"/>
      </IndexField>
      <IndexField FieldNo="3" IndexValue="true" Name="Date" StoreText="true" Tokenize="false" Type="Text"/>
      <IndexField FieldNo="2" IndexValue="true" Name="Path" StoreText="true" Tokenize="true" Type="Text"/>
      <IndexField FieldNo="1" IndexValue="true" Name="Filename" StoreText="true" Tokenize="true" Type="Text"/>
      <IndexField FieldNo="0" IndexValue="true" Name="Content" StoreText="true" Tokenize="true" Type="Text"/>
    </IndexStructure>
    <Result>
      <Field FieldNo="0" Name="ID"/>
    </Result>
    <Configuration xmlns="http://www.anyfinder.de/DataDictionary/Configuration" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://www.anyfinder.de/DataDictionary/Configuration ../xml/DataDictionaryConfiguration.xsd">
      <DefaultConfig>
        <Field FieldNo="5">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="4">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="3">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="2">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="1">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="0">
          <FieldConfig Constraint="required" Weight="1" xsi:type="FTText">
            <NodeTransformer xmlns="http://www.anyfinder.de/Search/ParameterObjects" Name="urn:ExtendedNodeTransformer">
              <ParameterSet xmlns="http://www.brox.de/ParameterSet"/>
            </NodeTransformer>
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="AND" Tolerance="exact"/>
          </FieldConfig>
        </Field>
      </DefaultConfig>
      <Result Name="">
        <ResultField FieldNo="5" Name="Title"/>
        <ResultField FieldNo="4" Name="Url"/>
        <ResultField FieldNo="3" Name="Date"/>
        <ResultField FieldNo="2" Name="Path"/>
        <ResultField FieldNo="1" Name="Filename"/>
      </Result>
      <HighlightingResult Name="">
        <HighlightingResultField FieldNo="0" Name="Content" xsi:type="HLTextField">
          <HighlightingTransformer Name="urn:Sentence">
            <ParameterSet xmlns="http://www.brox.de/ParameterSet">
              <Parameter Name="MaxLength" xsi:type="Integer">
                <Value>300</Value>
              </Parameter>
              <Parameter Name="MaxHLElements" xsi:type="Integer">
                <Value>999</Value>
              </Parameter>
              <Parameter Name="MaxSucceedingCharacters" xsi:type="Integer">
                <Value>30</Value>
              </Parameter>
              <Parameter Name="SucceedingCharacters" xsi:type="String">
                <Value>...</Value>
              </Parameter>
              <Parameter Name="SortAlgorithm" xsi:type="String">
                <Value>Occurrence</Value>
              </Parameter>
              <Parameter Name="TextHandling" xsi:type="String">
                <Value>ReturnSnipplet</Value>
              </Parameter>
            </ParameterSet>
          </HighlightingTransformer>
          <HighlightingParameter xmlns="http://www.anyfinder.de/DataDictionary/Configuration/TextHighlighting"/>
        </HighlightingResultField>
      </HighlightingResult>
    </Configuration>
  </Index>
</AnyFinderDataDictionary>

LuceneAttributeMapping.xml

<PipeletConfiguration xmlns="http://www.eclipse.org/eilf/processor">
    <Property name="Filename" type="java.lang.Integer">
        <Value>1</Value>
    </Property>
    <Property name="Path" type="java.lang.Integer">
        <Value>2</Value>
    </Property>	
    <Property name="Date" type="java.lang.Integer">
        <Value>3</Value>
    </Property>	
    <Property name="Url" type="java.lang.Integer">
        <Value>4</Value>
    </Property>
    <Property name="Title" type="java.lang.Integer">
        <Value>5</Value>
    </Property>
</PipeletConfiguration>

LuceneAttachmentMapping.xml

<PipeletConfiguration xmlns="http://www.eclipse.org/eilf/processor">
   <Property name="Content" type="java.lang.Integer">
      <Value>0</Value>
   </Property>
</PipeletConfiguration>

Back to the top