SMILA/Documentation/Filesystem Crawler

From Eclipsepedia

Jump to: navigation, search

Filesystem Index Order

Following is an example of a Filesystem Index Order:

<IndexOrderConfiguration
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:noNamespaceSchemaLocation="../org.eclipse.smila.connectivity.framework.crawler.filesystem/schemas/filesystemIndexOrder.xsd"
>
  <DataSourceID>file</DataSourceID>
  <SchemaID>org.eclipse.smila.connectivity.framework.crawler.filesystem</SchemaID>
  <DataConnectionID>
    <Crawler>FileSystemCrawlerDS</Crawler>
  </DataConnectionID>
  <CompoundHandling>Yes</CompoundHandling>
  <Attributes>
    <Attribute Type="Date" Name="LastModifiedDate" HashAttribute="true">
      <FileAttributes>LastModifiedDate</FileAttributes>
    </Attribute>
    <Attribute Type="String" Name="Filename">
      <FileAttributes>Name</FileAttributes>
    </Attribute>
    <Attribute Type="String" Name="Path" KeyAttribute="true">
      <FileAttributes>Path</FileAttributes>
    </Attribute>
    <Attribute Type="String" Name="Content" Attachment="true">
      <FileAttributes>Content</FileAttributes>
    </Attribute>
    <Attribute Type="String" Name="Extension">
      <FileAttributes>FileExtension</FileAttributes>
    </Attribute>
    <Attribute Type="String" Name="Size">
      <FileAttributes>Size</FileAttributes>
    </Attribute>    
    <Attribute Type="String" Name="AccessTreeNotExpanded">
      <AccessTree ExpandAccounts="false"/>
    </Attribute>
    <Attribute Type="String" Name="AccessTreeExpanded">
      <AccessTree ExpandAccounts="true"/>
    </Attribute>
    <Attribute Type="String" Name="AccessListNotExpanded">
      <AccessList ExpandAccounts="false" Mask=" W "/>
    </Attribute>
    <Attribute Type="String" Name="AccessListExpanded">
      <AccessList ExpandAccounts="true" Mask=" W "/>
    </Attribute>
  </Attributes>
  <Process>
    <BaseDir>c:\data</BaseDir>
    <Filter Recursive="true" CaseSensitive="false">
      <Include Name="*.txt"/>
      <Include Name="*.htm"/>
      <Include Name="*.html"/>
      <Include Name="*.xml"/>      
    </Filter>
  </Process>
</IndexOrderConfiguration>

XSD Schema used for Filesystem Crawler

<xs:schema elementFormDefault="qualified" attributeFormDefault="unqualified" xmlns:xs="http://www.w3.org/2001/XMLSchema">
  <xs:redefine schemaLocation="../../org.eclipse.smila.connectivity.framework.indexorder/schemas/RootIndexOrderConfiguration.xsd">
    <xs:complexType name="Process">
      <xs:annotation>
        <xs:documentation>Process Specification</xs:documentation>
      </xs:annotation>
      <xs:complexContent>
        <xs:extension base="Process">
          <xs:sequence maxOccurs="unbounded">
            <xs:element name="BaseDir" type="xs:string"/>
            <xs:element name="Filter">
              <xs:complexType>
                <xs:sequence>
                  <xs:element name="Include" minOccurs="0" maxOccurs="unbounded">
                    <xs:complexType>
                      <xs:attribute name="Name" type="xs:string" use="required"/>
                      <xs:attribute name="DateFrom" type="xs:dateTime" use="optional"/>
                      <xs:attribute name="DateTo" type="xs:dateTime" use="optional"/>
                    </xs:complexType>
                  </xs:element>
                  <xs:element name="Exclude" minOccurs="0" maxOccurs="unbounded">
                    <xs:complexType>
                      <xs:attribute name="Name" type="xs:string" use="required"/>
                    </xs:complexType>
                  </xs:element>
                </xs:sequence>
                <xs:attribute name="CaseSensitive" type="xs:boolean" use="optional" default="false"/>
                <xs:attribute name="Recursive" type="xs:boolean" use="optional" default="true"/>
              </xs:complexType>
            </xs:element>
          </xs:sequence>
        </xs:extension>
      </xs:complexContent>
    </xs:complexType>
    <xs:complexType name="Attribute">
      <xs:complexContent>
        <xs:extension base="Attribute">
          <xs:choice>
            <xs:element name="FileAttributes" type="FileAttributesType" />
            <xs:element name="AccessTree" type="AccessTreeType" />
            <xs:element name="AccessList" type="AccessListType" />
          </xs:choice>
        </xs:extension>
      </xs:complexContent>
    </xs:complexType>
  </xs:redefine>
 
 
  <!-- simple types -->
  <xs:simpleType name="FileAttributesType">
    <xs:restriction base="xs:string">
      <xs:enumeration value="Name"/>
      <xs:enumeration value="Path"/>
      <xs:enumeration value="Size"/>
      <xs:enumeration value="LastModifiedDate"/>
      <xs:enumeration value="Content"/>
      <xs:enumeration value="FileExtension"/>
    </xs:restriction>
  </xs:simpleType>
  <xs:simpleType name="AuthorityType">
    <xs:restriction base="xs:string">
      <xs:enumeration value="USERS"/>
      <xs:enumeration value="GROUPS"/>
    </xs:restriction>
  </xs:simpleType>
  <xs:simpleType name="MaskType">
    <xs:restriction base="xs:string">
      <xs:pattern value="(R|\s)(W|\s)(X|\s)" />
    </xs:restriction>
  </xs:simpleType>
 
 
  <!-- complex types -->
  <xs:complexType name="AccessTreeType">
    <xs:attribute name="ExpandAccounts" type="xs:boolean" use="required"/>
  </xs:complexType>
 
  <xs:complexType name="AccessListType">
    <xs:complexContent>
      <xs:extension base="AccessTreeType">
        <xs:attribute name="Mask" type="MaskType" use="required"/>
        <xs:attribute name="AuthorityFilter" type="AuthorityType" use="optional"/>
      </xs:extension>
    </xs:complexContent>
  </xs:complexType>
 
</xs:schema>