001/* 002 * Copyright 2016 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.search.solr.schema; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.util.ArrayList; 021import java.util.HashMap; 022import java.util.List; 023import java.util.Map; 024 025import org.apache.avalon.framework.component.Component; 026import org.apache.avalon.framework.service.ServiceException; 027import org.apache.avalon.framework.service.ServiceManager; 028import org.apache.avalon.framework.service.Serviceable; 029import org.apache.excalibur.source.Source; 030import org.apache.excalibur.source.SourceResolver; 031import org.apache.excalibur.xml.dom.DOMParser; 032import org.apache.excalibur.xml.xpath.XPathProcessor; 033import org.apache.solr.client.solrj.request.schema.AnalyzerDefinition; 034import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition; 035import org.apache.solr.client.solrj.response.schema.SchemaRepresentation; 036import org.w3c.dom.Document; 037import org.w3c.dom.Element; 038import org.w3c.dom.NamedNodeMap; 039import org.w3c.dom.Node; 040import org.w3c.dom.NodeList; 041import org.xml.sax.InputSource; 042import org.xml.sax.SAXException; 043 044import org.ametys.cms.contenttype.MetadataType; 045import org.ametys.runtime.plugin.component.AbstractLogEnabled; 046 047/** 048 * Component providing helper methods to work with search schema and fields. 049 */ 050public class SchemaHelper extends AbstractLogEnabled implements Component, Serviceable 051{ 052 053 /** The component role. */ 054 public static final String ROLE = SchemaHelper.class.getName(); 055 056 /** The source resolver. */ 057 protected SourceResolver _sourceResolver; 058 059 /** A DOM parser. */ 060 protected DOMParser _domParser; 061 062 /** A XPath processor. */ 063 protected XPathProcessor _xPathProcessor; 064 065 @Override 066 public void service(ServiceManager manager) throws ServiceException 067 { 068 _sourceResolver = (SourceResolver) manager.lookup(SourceResolver.ROLE); 069 _domParser = (DOMParser) manager.lookup(DOMParser.ROLE); 070 _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE); 071 } 072 073 /** 074 * Get the solr schema type from the metadata type. 075 * @param metaType The metadata type. 076 * @return The solr schema type. 077 */ 078 public static String getSchemaType(MetadataType metaType) 079 { 080 String type = null; 081 082 switch (metaType) 083 { 084 case STRING: 085 case USER: 086 case CONTENT: 087 case SUB_CONTENT: 088 type = "string"; 089 break; 090 case LONG: 091 type = "long"; 092 break; 093 case DOUBLE: 094 type = "double"; 095 break; 096 case BOOLEAN: 097 type = "boolean"; 098 break; 099 case DATE: 100 case DATETIME: 101 type = "date"; 102 break; 103 case GEOCODE: 104 type = "location_rpt"; 105 break; 106 case RICH_TEXT: 107 // TODO? 108 break; 109 case BINARY: 110 case FILE: 111 case COMPOSITE: 112 case REFERENCE: 113 default: 114 break; 115 } 116 117 return type; 118 } 119 120 /** 121 * Get the schema at the corresponding location (source URI). 122 * @param location The location, as a source URI. 123 * @return The schema representation. 124 */ 125 public SchemaRepresentation getSchema(String location) 126 { 127 Source source = null; 128 129 try 130 { 131 source = _sourceResolver.resolveURI(location); 132 133 if (source.exists()) 134 { 135 try (InputStream is = source.getInputStream()) 136 { 137 return readSchema(is); 138 } 139 } 140 } 141 catch (IOException | SAXException e) 142 { 143 getLogger().error("Error reading the schema from location '" + location + "'", e); 144 } 145 finally 146 { 147 if (source != null) 148 { 149 _sourceResolver.release(source); 150 } 151 } 152 153 return null; 154 } 155 156 /** 157 * Read the static schema. 158 * @param is An input stream on the schema XML. 159 * @return The representation of the schema. 160 * @throws IOException If an error occurs reading the stream. 161 * @throws SAXException If an error occurs parsing the XML. 162 */ 163 public SchemaRepresentation readSchema(InputStream is) throws IOException, SAXException 164 { 165 SchemaRepresentation schema = new SchemaRepresentation(); 166 167 InputSource source = new InputSource(is); 168 Document document = _domParser.parseDocument(source); 169 170 Element root = document.getDocumentElement(); 171 172 String name = _xPathProcessor.evaluateAsString(root, "/schema/@name"); 173 float version = _xPathProcessor.evaluateAsNumber(root, "/schema/@version").floatValue(); 174 String uniqueKey = _xPathProcessor.evaluateAsString(root, "/schema/uniqueKey"); 175 String defaultOp = _xPathProcessor.evaluateAsString(root, "/schema/solrQueryParser/@defaultOperator"); 176 String defaultSearchField = _xPathProcessor.evaluateAsString(root, "/schema/defaultSearchField"); 177 178 schema.setName(name); 179 schema.setVersion(version); 180 schema.setUniqueKey(uniqueKey); 181 schema.setDefaultOperator(defaultOp); 182 schema.setDefaultSearchField(defaultSearchField); 183 184 NodeList fieldTypeNodes = _xPathProcessor.selectNodeList(root, "/schema/fieldType | /schema/types/fieldType"); 185 schema.setFieldTypes(getFieldTypes(fieldTypeNodes)); 186 187 NodeList fieldNodes = _xPathProcessor.selectNodeList(root, "/schema/field | /schema/fields/field"); 188 schema.setFields(getList(fieldNodes)); 189 190 NodeList dynFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/dynamicField | /schema/fields/dynamicField"); 191 schema.setDynamicFields(getList(dynFieldNodes)); 192 193 NodeList copyFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/copyField"); 194 schema.setCopyFields(getList(copyFieldNodes)); 195 196 Node similarityNode = _xPathProcessor.selectSingleNode(root, "/schema/similarity"); 197 if (similarityNode != null) 198 { 199 schema.setSimilarity(getAttributes(similarityNode)); 200 } 201 202 return schema; 203 } 204 205 /** 206 * Get the field type definitions from the corresponding DOM nodes. 207 * @param fieldTypeNodes The field type nodes. 208 * @return The list of field type definitions. 209 */ 210 protected List<FieldTypeDefinition> getFieldTypes(NodeList fieldTypeNodes) 211 { 212 List<FieldTypeDefinition> definitions = new ArrayList<>(); 213 214 for (int i = 0; i < fieldTypeNodes.getLength(); i++) 215 { 216 Node fieldTypeNode = fieldTypeNodes.item(i); 217 definitions.add(getFieldType(fieldTypeNode)); 218 } 219 220 return definitions; 221 } 222 223 /** 224 * Get a field type definition from the corresponding DOM node. 225 * @param fieldTypeNode The field type DOM node. 226 * @return The field type definition. 227 */ 228 protected FieldTypeDefinition getFieldType(Node fieldTypeNode) 229 { 230 FieldTypeDefinition fieldType = new FieldTypeDefinition(); 231 232 fieldType.setAttributes(getAttributes(fieldTypeNode)); 233 234 NodeList analyzerNodes = _xPathProcessor.selectNodeList(fieldTypeNode, "analyzer"); 235 236 for (int i = 0; i < analyzerNodes.getLength(); i++) 237 { 238 Node analyzerNode = analyzerNodes.item(i); 239 Node typeNode = analyzerNode.getAttributes().getNamedItem("type"); 240 String type = typeNode != null ? typeNode.getNodeValue() : ""; 241 242 AnalyzerDefinition analyzer = getAnalyzer(analyzerNode); 243 244 switch (type) 245 { 246 case "index": 247 fieldType.setIndexAnalyzer(analyzer); 248 break; 249 case "query": 250 fieldType.setQueryAnalyzer(analyzer); 251 break; 252 case "multiterm": 253 fieldType.setMultiTermAnalyzer(analyzer); 254 break; 255 default: 256 fieldType.setAnalyzer(analyzer); 257 break; 258 } 259 } 260 261 Node similarityNode = _xPathProcessor.selectSingleNode(fieldTypeNode, "similarity"); 262 if (similarityNode != null) 263 { 264 fieldType.setSimilarity(getAttributes(similarityNode)); 265 } 266 267 return fieldType; 268 } 269 270 /** 271 * Get an analyzer definition from the corresponding DOM node. 272 * @param analyzerNode The analyzer node. 273 * @return The analyzer definition. 274 */ 275 protected AnalyzerDefinition getAnalyzer(Node analyzerNode) 276 { 277 AnalyzerDefinition analyzer = new AnalyzerDefinition(); 278 279 analyzer.setAttributes(getAttributes(analyzerNode)); 280 281 NodeList charFilterNodes = _xPathProcessor.selectNodeList(analyzerNode, "charFilter"); 282 analyzer.setCharFilters(getList(charFilterNodes)); 283 284 Node tokenizerNode = _xPathProcessor.selectSingleNode(analyzerNode, "tokenizer"); 285 if (tokenizerNode != null) 286 { 287 analyzer.setTokenizer(getAttributes(tokenizerNode)); 288 } 289 290 NodeList filterNodes = _xPathProcessor.selectNodeList(analyzerNode, "filter"); 291 analyzer.setFilters(getList(filterNodes)); 292 293 return analyzer; 294 } 295 296 /** 297 * Extract a DOM node attributes as a Map. 298 * @param node The node. 299 * @return The attributes as a Map. 300 */ 301 protected Map<String, Object> getAttributes(Node node) 302 { 303 Map<String, Object> map = new HashMap<>(); 304 305 NamedNodeMap attributes = node.getAttributes(); 306 for (int i = 0; i < attributes.getLength(); i++) 307 { 308 Node attribute = attributes.item(i); 309 310 String name = attribute.getNodeName(); 311 String value = attribute.getNodeValue(); 312 313 // Filter out ametys-specific attributes. 314 if (!name.startsWith("ametys")) 315 { 316 map.put(name, value); 317 } 318 } 319 320 return map; 321 } 322 323 /** 324 * Extract the list of DOM node attributes. 325 * @param nodeList The node list. 326 * @return A List of the attribute values as Maps. 327 */ 328 protected List<Map<String, Object>> getList(NodeList nodeList) 329 { 330 List<Map<String, Object>> list = new ArrayList<>(); 331 332 for (int i = 0; i < nodeList.getLength(); i++) 333 { 334 Node node = nodeList.item(i); 335 list.add(getAttributes(node)); 336 } 337 338 return list; 339 } 340 341}