How can we tune our CQ5 search functionality, exclude certain properties from being indexed, enable spell checking, stemming or other features?
The search module in CQ5 is built using CRX. CRX uses an embedded Apache Lucene index to implement search. CRX provides various parameters and configurations to allow you to fine tune the index to meet your needs.
SearchIndex configuration:
First of all, a CRX repository can have one or many workspaces and each workspace has its own search index. In addition to this, each workspace's search index can be configured separately in the workspace's workspace.xml file.
In a default CQ5 installation you only have one CRX workspace called "crx.default". This workspace stores all your site's content (excluding versions). The search index configuration for this workspace can be found under crx-quickstart/workspaces/crx.default/workspace.xml. All configurations for this can be made within the <SearchIndex> element. A reference of all these configurations can be found here.
Here's an example workspace.xml SearchIndex section:
...
<SearchIndex class="com.day.crx.query.lucene.LuceneHandler">
<param name="path" value="${wsp.home}/index"/>
</SearchIndex>
...
indexing_config.xml configuration:
The configurations in workspace.xml allow you to tune the behavior of the search index, however they don't let you modify what content is being indexed. To configure this, we need to look at the indexing_config.xml file which allows us to specify indexing rules.
indexing_config.xml
can be found under crx-quickstart/server/runtime/0/_crx/WEB-INF/classes.
In CQ 5.5, the default configuration file is embedded in the repository bundle deployed in the OSGi container.
<SearchIndex class="com.day.crx.query.lucene.LuceneHandler">
<param name="path" value="${wsp.home}/index"/>
<param name="resultFetchSize" value="50" />
<param name="indexingConfiguration" value="${wsp.home}/indexing_config.xml"/>
</SearchIndex>
...CQ 5.2+
<?xml version="1.0"?> | |
<!DOCTYPE configuration SYSTEM "http://jackrabbit.apache.org/dtd/indexing-configuration-1.2.dtd"> | |
<configuration | |
xmlns:cq="http://www.day.com/jcr/cq/1.0" | |
xmlns:dam="http://www.day.com/dam/1.0" | |
xmlns:nt="http://www.jcp.org/jcr/nt/1.0" | |
xmlns:jcr="http://www.jcp.org/jcr/1.0" | |
xmlns:sling="http://sling.apache.org/jcr/sling/1.0"> | |
<!-- Do not index content of subassets --> | |
<index-rule nodeType="nt:resource" | |
condition="ancestor::subassets/@jcr:primaryType='{http://www.jcp.org/jcr/nt/1.0}unstructured'"> | |
</index-rule> | |
<!-- | |
Exclude some well known properties from the node scope | |
fulltext index. Do not add rules below this one, since | |
this rule matches any node and acts as a default/fallback. | |
--> | |
<index-rule nodeType="nt:base"> | |
<property nodeScopeIndex="false">analyticsProvider</property> | |
<property nodeScopeIndex="false">analyticsSnippet</property> | |
<property nodeScopeIndex="false">hideInNav</property> | |
<property nodeScopeIndex="false">offTime</property> | |
<property nodeScopeIndex="false">onTime</property> | |
<property nodeScopeIndex="false">cq:allowedTemplates</property> | |
<property nodeScopeIndex="false">cq:childrenOrder</property> | |
<property nodeScopeIndex="false">cq:cugEnabled</property> | |
<property nodeScopeIndex="false">cq:cugPrincipals</property> | |
<property nodeScopeIndex="false">cq:cugRealm</property> | |
<property nodeScopeIndex="false">cq:designPath</property> | |
<property nodeScopeIndex="false">cq:isCancelledForChildren</property> | |
<property nodeScopeIndex="false">cq:isDeep</property> | |
<property nodeScopeIndex="false">cq:lastModified</property> | |
<property nodeScopeIndex="false">cq:lastModifiedBy</property> | |
<property nodeScopeIndex="false">cq:lastPublished</property> | |
<property nodeScopeIndex="false">cq:lastPublishedBy</property> | |
<property nodeScopeIndex="false">cq:lastReplicated</property> | |
<property nodeScopeIndex="false">cq:lastReplicatedBy</property> | |
<property nodeScopeIndex="false">cq:lastReplicationAction</property> | |
<property nodeScopeIndex="false">cq:lastReplicationStatus</property> | |
<property nodeScopeIndex="false">cq:lastRolledout</property> | |
<property nodeScopeIndex="false">cq:lastRolledoutBy</property> | |
<property nodeScopeIndex="false">cq:name</property> | |
<property nodeScopeIndex="false">cq:parentPath</property> | |
<property nodeScopeIndex="false">cq:segments</property> | |
<property nodeScopeIndex="false">cq:siblingOrder</property> | |
<property nodeScopeIndex="false">cq:template</property> | |
<property nodeScopeIndex="false">cq:trigger</property> | |
<property nodeScopeIndex="false">cq:versionComment</property> | |
<property nodeScopeIndex="false">jcr:createdBy</property> | |
<property nodeScopeIndex="false">jcr:lastModifiedBy</property> | |
<property nodeScopeIndex="false">sling:alias</property> | |
<property nodeScopeIndex="false">sling:resourceType</property> | |
<property nodeScopeIndex="false">sling:vanityPath</property> | |
<property isRegexp="true">.*:.*</property> | |
</index-rule> | |
<!-- Cq Page for jcr:contains(jcr:content, "...") searches --> | |
<aggregate primaryType="cq:PageContent"> | |
<include>*</include> | |
<include>*/*</include> | |
<include>*/*/*</include> | |
<include>*/*/*/*</include> | |
</aggregate> | |
<aggregate primaryType="dam:Asset"> | |
<include>jcr:content</include> | |
<include>jcr:content/metadata</include> | |
<include>jcr:content/metadata/*</include> | |
<include>jcr:content/renditions</include> | |
<include>jcr:content/renditions/original</include> | |
<include>jcr:content/renditions/original/jcr:content</include> | |
<!-- child axis orderby index --> | |
<include>jcr:content/renditions/original/jcr:content/jcr:lastModified</include> | |
</aggregate> | |
<!-- nt:file child axis orderby index --> | |
<aggregate primaryType="nt:file"> | |
<include>jcr:content</include> | |
<include>jcr:content/jcr:lastModified</include> | |
</aggregate> | |
<!-- cq:Page child axis orderby index --> | |
<aggregate primaryType="cq:Page"> | |
<include>jcr:content</include> | |
<include>jcr:content/cq:lastModified</include> | |
</aggregate> | |
</configuration> | |
Sign in to your account