Add DwC indexing to Solr

  • download DWC_to_solr.xslt (GIST)
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  xmlns:foxml="info:fedora/fedora-system:def/foxml#"
  xmlns:dwr="http://rs.tdwg.org/dwc/xsd/simpledarwincore/"
  xmlns:dcterms="http://purl.org/dc/terms/"
  xmlns:dwc="http://rs.tdwg.org/dwc/terms/"
  exclude-result-prefixes="dwr dcterms dwc">
  <xsl:template match="foxml:datastream[@ID='DWC']/foxml:datastreamVersion[last()]">
    <xsl:param name="content"/>
    <xsl:param name="prefix">dwc.</xsl:param>
    <xsl:param name="suffix">_s</xsl:param>
    <xsl:apply-templates select="$content//dwr:SimpleDarwinRecordSet[1]/dwr:SimpleDarwinRecord[1]">
      <xsl:with-param name="prefix" select="$prefix"/>
      <xsl:with-param name="suffix" select="$suffix"/>
    </xsl:apply-templates>
  </xsl:template>
  <xsl:template match="dwr:SimpleDarwinRecord">
    <xsl:param name="prefix">dwc.</xsl:param>
    <xsl:param name="prefixdct">dcterms.</xsl:param>
    <xsl:param name="suffix">_s</xsl:param>
    <xsl:for-each select="./dcterms:*">
      <field>
        <xsl:attribute name="name">
          <xsl:value-of select="concat($prefixdct, local-name(), $suffix)"/>
        </xsl:attribute>
        <xsl:value-of select="text()"/>
      </field>
    </xsl:for-each>
    <xsl:for-each select="./dwc:*">
      <field>
        <xsl:attribute name="name">
          <xsl:value-of select="concat($prefix, local-name(), $suffix)"/>
        </xsl:attribute>
        <xsl:value-of select="text()"/>
      </field>
    </xsl:for-each>
  </xsl:template>
</xsl:stylesheet>
  • cp DWC_to_solr.xslt into /var/lib/tomcat7/webapps/fedoragsearch/WEB-INF/classes/fgsconfigFinal/index/FgsIndex/islandora_transforms/
  • edit /var/lib/tomcat7/webapps/fedoragsearch/WEB-INF/classes/fgsconfigFinal/index/FgsIndex/foxmlToSolr.xslt
+   <xsl:include href="/var/lib/tomcat7/webapps/fedoragsearch/WEB-INF/classes/fgsconfigFinal/index/FgsIndex/islandora_transforms/DWC_to_solr.xslt"/>
  • edit /usr/local/solr/islandora/conf/schema.xml
+   <dynamicField name="dwc.*"  type="text_general" indexed="true" stored="true" multiValued="true"/>
+   <dynamicField name="dcterms.*"  type="text_general" indexed="true" stored="true" multiValued="true"/>
  • restart Tomcat7
  • DWC datastream example to test DwC indexing
<?xml version="1.0" encoding="UTF-8"?>
<dwr:SimpleDarwinRecordSet  xmlns:dwr="http://rs.tdwg.org/dwc/xsd/simpledarwincore/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dwc="http://rs.tdwg.org/dwc/terms/">
	<dwr:SimpleDarwinRecord>
		<dcterms:type>StillImage</dcterms:type>
		<dcterms:modified>1980-09-11</dcterms:modified>
		<dcterms:language>zxx</dcterms:language>
		<dcterms:rights>Creative Commons license: Attribution-NonCommercial-NoDerivatives 4.0 International(CC BY-NC-ND 4.0)</dcterms:rights>
		<dcterms:rightsHolder>Institute for Sustainable Plant Protection IPSP-CNR</dcterms:rightsHolder>
		<dcterms:accessRights>not-for-profit use only</dcterms:accessRights>
		<dcterms:bibliographicCitation></dcterms:bibliographicCitation>
		<dwc:scientificName>Maize rough dwarf virus</dwc:scientificName>
		<dwc:acceptedNameUsage>Maize rough dwarf virus</dwc:acceptedNameUsage>
		<dwc:nameAccordingTo>Harpaz, I.</dwc:nameAccordingTo>
		<dwc:namePublishedIn>Nature, Lond. 184: 77</dwc:namePublishedIn>
		<dwc:namePublishedInYear>1959</dwc:namePublishedInYear>
		<dwc:higherClassification>Viruses;Reoviridae;Spinareovirinae;Fijivirus;Maize rough dwarf virus</dwc:higherClassification>
		<dwc:kingdom>Viruses</dwc:kingdom>
		<dwc:family>Reoviridae</dwc:family>
		<dwc:genus>Fijivirus</dwc:genus>
		<dwc:taxonRank>species</dwc:taxonRank>
		<dwc:vernacularName>virus del nanismo ruvido del mais</dwc:vernacularName>
	</dwr:SimpleDarwinRecord>
</dwr:SimpleDarwinRecordSet>
 
 
repo371/dwc.txt ยท Last modified: 2015/04/15 15:56 by giancarlo

Developers: CNR IRCrES IT Office and Library
Giancarlo Birello (giancarlo.birello _@_ ircres.cnr.it) and Anna Perin (anna.perin _@_ ircres.cnr.it)
V2P2@TO.CNR is licensed under: Creative Commons License
Recent changes RSS feed Creative Commons License Valid XHTML 1.0 Valid CSS Driven by DokuWiki
Drupal Garland Theme for Dokuwiki