Skip to main content
Jump to: navigation, search

Difference between revisions of "SMILA/Documentation/LuceneIndexPipelet"

Line 60: Line 60:
 
     <IndexStructure xmlns="http://www.anyfinder.de/IndexStructure" Name="test_index">
 
     <IndexStructure xmlns="http://www.anyfinder.de/IndexStructure" Name="test_index">
 
       <Analyzer ClassName="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
 
       <Analyzer ClassName="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
      <IndexField FieldNo="9" IndexValue="false" Name="XMLID" StoreText="true" Tokenize="false" Type="Text"/>
 
 
       <IndexField FieldNo="8" IndexValue="true" Name="MimeType" StoreText="true" Tokenize="true" Type="Text"/>
 
       <IndexField FieldNo="8" IndexValue="true" Name="MimeType" StoreText="true" Tokenize="true" Type="Text"/>
 
       <IndexField FieldNo="7" IndexValue="true" Name="Size" StoreText="true" Tokenize="true" Type="Text"/>
 
       <IndexField FieldNo="7" IndexValue="true" Name="Size" StoreText="true" Tokenize="true" Type="Text"/>
Line 78: Line 77:
 
     <Configuration xmlns="http://www.anyfinder.de/DataDictionary/Configuration" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.anyfinder.de/DataDictionary/Configuration ../xml/DataDictionaryConfiguration.xsd">
 
     <Configuration xmlns="http://www.anyfinder.de/DataDictionary/Configuration" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.anyfinder.de/DataDictionary/Configuration ../xml/DataDictionaryConfiguration.xsd">
 
       <DefaultConfig>
 
       <DefaultConfig>
        <Field FieldNo="9">
 
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
 
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
 
          </FieldConfig>
 
        </Field>
 
 
         <Field FieldNo="8">
 
         <Field FieldNo="8">
 
           <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
 
           <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
Line 132: Line 126:
 
         </Field>
 
         </Field>
 
       </DefaultConfig>
 
       </DefaultConfig>
      <Result Name="">
 
        <ResultField FieldNo="9" Name="XMLID"/>
 
        <ResultField FieldNo="8" Name="MimeType"/>
 
        <ResultField FieldNo="7" Name="Size"/>
 
        <ResultField FieldNo="6" Name="Extension"/>
 
        <ResultField FieldNo="5" Name="Title"/>
 
        <ResultField FieldNo="4" Name="Url"/>
 
        <ResultField FieldNo="3" Name="LastModifiedDate"/>
 
<ResultField FieldNo="2" Name="Path"/>
 
        <ResultField FieldNo="1" Name="Filename"/>
 
      </Result>
 
      <HighlightingResult Name="">
 
        <HighlightingResultField FieldNo="0" Name="Content" xsi:type="HLTextField">
 
          <HighlightingTransformer Name="urn:Sentence">
 
            <ParameterSet xmlns="http://www.brox.de/ParameterSet">
 
              <Parameter Name="MaxLength" xsi:type="Integer">
 
                <Value>300</Value>
 
              </Parameter>
 
              <Parameter Name="MaxHLElements" xsi:type="Integer">
 
                <Value>999</Value>
 
              </Parameter>
 
              <Parameter Name="MaxSucceedingCharacters" xsi:type="Integer">
 
                <Value>30</Value>
 
              </Parameter>
 
              <Parameter Name="SucceedingCharacters" xsi:type="String">
 
                <Value>...</Value>
 
              </Parameter>
 
              <Parameter Name="SortAlgorithm" xsi:type="String">
 
                <Value>Occurrence</Value>
 
              </Parameter>
 
              <Parameter Name="TextHandling" xsi:type="String">
 
                <Value>ReturnSnipplet</Value>
 
              </Parameter>
 
            </ParameterSet>
 
          </HighlightingTransformer>
 
          <HighlightingParameter xmlns="http://www.anyfinder.de/DataDictionary/Configuration/TextHighlighting"/>
 
        </HighlightingResultField>
 
      </HighlightingResult>
 
 
     </Configuration>
 
     </Configuration>
 
   </Index>
 
   </Index>
Line 192: Line 148:
 
    <Attribute name="Size" fieldNo="7" />
 
    <Attribute name="Size" fieldNo="7" />
 
    <Attribute name="MimeType" fieldNo="8" />           
 
    <Attribute name="MimeType" fieldNo="8" />           
    <Attribute name="XMLID" fieldNo="9" />    
 
 
     </Attributes>
 
     </Attributes>
 
     <Attachments>
 
     <Attachments>

Revision as of 10:22, 31 March 2009

Bundle: org.eclipse.smila.lucene.LuceneIndexService

Description

This ProcessingService is used to index SMILA records in a Lucene document index. It supports adding, updating and deleting of records.

Configuration

Annotations

The LuceneIndexService uses the Annotation org.eclipse.smila.lucene.LuceneIndexService on records to decide how to handle a record. It supports the following required values.

Name Value Description
indexName a String the name of the index to work on
executionMode ADD or DELETE ADD - add or update the record, DELETE - delete the record from the index

Configuration files

  • configuration/org.eclipse.smila.search.datadictionary/DataDictionary.xml

Here the Lucene index structure and the search template are configured. It is possible to define more than one index here. The index to work on is set by the Annotation "indexName". The defined "FieldNo" are referenced in LuceneAttributeMapping.xml and LuceneAttachmentMapping.xml. For more information about configuration of DataDictionary.xml see Anyfinder documentation. This file is used to prepared the settings for indicies. When an index is needed, it is created automatically on demand, and configuration is loaded from this file to created the index. (Beside: the Framework creates a Datadictionary.xml file also in the workspace. This file only contains the information(settings/configuration) for created indices). The Framework will create an index by itself, when a record is configured to be stored in an index. But the user can also use the createIndex JMX-command to create an index.


  • configuration/org.eclipse.smila.lucene/Mappings.xml

Here a mapping of attribute and attachment names to Lucene "FieldNo" (defined in DataDictionary.xml) is configured. It is possible to define mappings for multiple indexes in this file, using the same "indexName" as in file DataDictionary.xml.

The Field XMLID is a required field. It is used to store the xml representation of Id objects to be able to restore Id objects from xml in search results. This is not possible using the Id string or hash value.

Example

The following example was used in the SMILA example application to index records delivered by Filesystem- and WebCrawler.

addpipeline.bpel

...
<extensionActivity name="invokeLuceneService">
    <proc:invokeService>
        <proc:service name="LuceneIndexService" />
        <proc:variables input="request" output="request" />
        <proc:setAnnotations>
            <rec:An n="org.eclipse.smila.lucene.LuceneIndexService">
                <rec:V n="indexName">test_index</rec:V>
                <rec:V n="executionMode">ADD</rec:V>
            </rec:An>
        </proc:setAnnotations>
    </proc:invokeService>
</extensionActivity>
...

DataDictionary.xml

<?xml version="1.0" encoding="UTF-8"?>
<AnyFinderDataDictionary xmlns="http://www.anyfinder.de/DataDictionary" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.anyfinder.de/DataDictionary ../xml/AnyFinderDataDictionary.xsd">
  <Index Name="test_index">
    <Connection xmlns="http://www.anyfinder.de/DataDictionary/Connection" MaxConnections="5"/>
    <IndexStructure xmlns="http://www.anyfinder.de/IndexStructure" Name="test_index">
      <Analyzer ClassName="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
      <IndexField FieldNo="8" IndexValue="true" Name="MimeType" StoreText="true" Tokenize="true" Type="Text"/>
      <IndexField FieldNo="7" IndexValue="true" Name="Size" StoreText="true" Tokenize="true" Type="Text"/>
      <IndexField FieldNo="6" IndexValue="true" Name="Extension" StoreText="true" Tokenize="true" Type="Text"/>
      <IndexField FieldNo="5" IndexValue="true" Name="Title" StoreText="true" Tokenize="true" Type="Text"/>
      <IndexField FieldNo="4" IndexValue="true" Name="Url" StoreText="true" Tokenize="false" Type="Text">
        <Analyzer ClassName="org.apache.lucene.analysis.WhitespaceAnalyzer"/>
      </IndexField>
      <IndexField FieldNo="3" IndexValue="true" Name="LastModifiedDate" StoreText="true" Tokenize="false" Type="Text"/>
      <IndexField FieldNo="2" IndexValue="true" Name="Path" StoreText="true" Tokenize="true" Type="Text"/>
      <IndexField FieldNo="1" IndexValue="true" Name="Filename" StoreText="true" Tokenize="true" Type="Text"/>
      <IndexField FieldNo="0" IndexValue="true" Name="Content" StoreText="true" Tokenize="true" Type="Text"/>
    </IndexStructure>
    <Result>
      <Field FieldNo="0" Name="ID"/>            
    </Result>
    <Configuration xmlns="http://www.anyfinder.de/DataDictionary/Configuration" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.anyfinder.de/DataDictionary/Configuration ../xml/DataDictionaryConfiguration.xsd">
      <DefaultConfig>
        <Field FieldNo="8">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="7">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="6">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>        
        <Field FieldNo="5">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="4">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="3">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="2">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="1">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
        <Field FieldNo="0">
          <FieldConfig Constraint="optional" Weight="1" xsi:type="FTText">
            <NodeTransformer xmlns="http://www.anyfinder.de/Search/ParameterObjects" Name="urn:ExtendedNodeTransformer">
              <ParameterSet xmlns="http://www.brox.de/ParameterSet"/>
            </NodeTransformer>
            <Parameter xmlns="http://www.anyfinder.de/Search/TextField" Operator="OR" Tolerance="exact"/>
          </FieldConfig>
        </Field>
      </DefaultConfig>
    </Configuration>
  </Index>
</AnyFinderDataDictionary>

Mappings.xml

<?xml version="1.0" encoding="utf-8" ?>
<Mappings xmlns="http://www.eclipse.org/smila/lucene"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:noNamespaceSchemaLocation="schemas/Mappings.xsd"
>
  <Mapping indexName="test_index">
    <Attributes>
	    <Attribute name="Filename" fieldNo="1" />
	    <Attribute name="Path" fieldNo="2" />    
	    <Attribute name="LastModifiedDate" fieldNo="3" />
	    <Attribute name="Url" fieldNo="4" />
	    <Attribute name="Title" fieldNo="5" />    
	    <Attribute name="Extension" fieldNo="6" />
	    <Attribute name="Size" fieldNo="7" />
	    <Attribute name="MimeType" fieldNo="8" />           
    </Attributes>
    <Attachments>
        <Attachment name="Content" fieldNo="0" />      
    </Attachments>
  </Mapping>
</Mappings>

Back to the top