001/* 002 * Copyright 2016 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.search.solr.schema; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.util.ArrayList; 021import java.util.HashMap; 022import java.util.List; 023import java.util.Map; 024import java.util.stream.Collectors; 025 026import org.apache.avalon.framework.component.Component; 027import org.apache.avalon.framework.service.ServiceException; 028import org.apache.avalon.framework.service.ServiceManager; 029import org.apache.avalon.framework.service.Serviceable; 030import org.apache.excalibur.source.Source; 031import org.apache.excalibur.source.SourceResolver; 032import org.apache.excalibur.xml.dom.DOMParser; 033import org.apache.excalibur.xml.xpath.XPathProcessor; 034import org.apache.solr.client.solrj.request.schema.AnalyzerDefinition; 035import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition; 036import org.apache.solr.client.solrj.response.schema.SchemaRepresentation; 037import org.w3c.dom.Document; 038import org.w3c.dom.Element; 039import org.w3c.dom.NamedNodeMap; 040import org.w3c.dom.Node; 041import org.w3c.dom.NodeList; 042import org.xml.sax.InputSource; 043import org.xml.sax.SAXException; 044 045import org.ametys.cms.contenttype.MetadataType; 046import org.ametys.runtime.plugin.component.AbstractLogEnabled; 047 048/** 049 * Component providing helper methods to work with search schema and fields. 050 */ 051public class SchemaHelper extends AbstractLogEnabled implements Component, Serviceable 052{ 053 054 /** The component role. */ 055 public static final String ROLE = SchemaHelper.class.getName(); 056 057 /** The source resolver. */ 058 protected SourceResolver _sourceResolver; 059 060 /** A DOM parser. */ 061 protected DOMParser _domParser; 062 063 /** A XPath processor. */ 064 protected XPathProcessor _xPathProcessor; 065 066 @Override 067 public void service(ServiceManager manager) throws ServiceException 068 { 069 _sourceResolver = (SourceResolver) manager.lookup(SourceResolver.ROLE); 070 _domParser = (DOMParser) manager.lookup(DOMParser.ROLE); 071 _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE); 072 } 073 074 /** 075 * Get the solr schema type from the metadata type. 076 * @param metaType The metadata type. 077 * @return The solr schema type. 078 */ 079 public static String getSchemaType(MetadataType metaType) 080 { 081 String type = null; 082 083 switch (metaType) 084 { 085 case STRING: 086 case MULTILINGUAL_STRING: 087 case USER: 088 case CONTENT: 089 case SUB_CONTENT: 090 type = "string"; 091 break; 092 case LONG: 093 type = "long"; 094 break; 095 case DOUBLE: 096 type = "double"; 097 break; 098 case BOOLEAN: 099 type = "boolean"; 100 break; 101 case DATE: 102 case DATETIME: 103 type = "date"; 104 break; 105 case GEOCODE: 106 type = "location_rpt"; 107 break; 108 case RICH_TEXT: 109 // TODO? 110 break; 111 case BINARY: 112 case FILE: 113 case COMPOSITE: 114 case REFERENCE: 115 default: 116 break; 117 } 118 119 return type; 120 } 121 122 /** 123 * Chekcs that the passed Solr field name is valid 124 * @param fieldName the Solr field name 125 * @return true if name is valid, false otherwise 126 */ 127 public static boolean isNameValid(String fieldName) 128 { 129 return fieldName.matches("^[a-zA-Z_][a-zA-Z0-9_\\/-]*$"); 130 } 131 132 /** 133 * Get the schema at the corresponding location (source URI). 134 * @param location The location, as a source URI. 135 * @return The schema representation. 136 */ 137 public SchemaRepresentation getSchema(String location) 138 { 139 Source source = null; 140 141 try 142 { 143 source = _sourceResolver.resolveURI(location); 144 145 if (source.exists()) 146 { 147 try (InputStream is = source.getInputStream()) 148 { 149 return readSchema(is); 150 } 151 } 152 } 153 catch (IOException | SAXException e) 154 { 155 getLogger().error("Error reading the schema from location '" + location + "'", e); 156 } 157 finally 158 { 159 if (source != null) 160 { 161 _sourceResolver.release(source); 162 } 163 } 164 165 return null; 166 } 167 168 /** 169 * Read the static schema. 170 * @param is An input stream on the schema XML. 171 * @return The representation of the schema. 172 * @throws IOException If an error occurs reading the stream. 173 * @throws SAXException If an error occurs parsing the XML. 174 */ 175 public SchemaRepresentation readSchema(InputStream is) throws IOException, SAXException 176 { 177 SchemaRepresentation schema = new SchemaRepresentation(); 178 179 InputSource source = new InputSource(is); 180 Document document = _domParser.parseDocument(source); 181 182 Element root = document.getDocumentElement(); 183 184 String name = _xPathProcessor.evaluateAsString(root, "/schema/@name"); 185 float version = _xPathProcessor.evaluateAsNumber(root, "/schema/@version").floatValue(); 186 String uniqueKey = _xPathProcessor.evaluateAsString(root, "/schema/uniqueKey"); 187 String defaultOp = _xPathProcessor.evaluateAsString(root, "/schema/solrQueryParser/@defaultOperator"); 188 String defaultSearchField = _xPathProcessor.evaluateAsString(root, "/schema/defaultSearchField"); 189 190 schema.setName(name); 191 schema.setVersion(version); 192 schema.setUniqueKey(uniqueKey); 193 schema.setDefaultOperator(defaultOp); 194 schema.setDefaultSearchField(defaultSearchField); 195 196 NodeList fieldTypeNodes = _xPathProcessor.selectNodeList(root, "/schema/fieldType | /schema/types/fieldType"); 197 schema.setFieldTypes(getFieldTypes(fieldTypeNodes)); 198 199 NodeList fieldNodes = _xPathProcessor.selectNodeList(root, "/schema/field | /schema/fields/field"); 200 schema.setFields(filterListNode(getList(fieldNodes))); 201 202 NodeList dynFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/dynamicField | /schema/fields/dynamicField"); 203 schema.setDynamicFields(getList(dynFieldNodes)); 204 205 NodeList copyFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/copyField"); 206 schema.setCopyFields(getList(copyFieldNodes)); 207 208 Node similarityNode = _xPathProcessor.selectSingleNode(root, "/schema/similarity"); 209 if (similarityNode != null) 210 { 211 schema.setSimilarity(getAttributes(similarityNode)); 212 } 213 214 return schema; 215 } 216 217 /** 218 * Get the field type definitions from the corresponding DOM nodes. 219 * @param fieldTypeNodes The field type nodes. 220 * @return The list of field type definitions. 221 */ 222 protected List<FieldTypeDefinition> getFieldTypes(NodeList fieldTypeNodes) 223 { 224 List<FieldTypeDefinition> definitions = new ArrayList<>(); 225 226 for (int i = 0; i < fieldTypeNodes.getLength(); i++) 227 { 228 Node fieldTypeNode = fieldTypeNodes.item(i); 229 definitions.add(getFieldType(fieldTypeNode)); 230 } 231 232 return definitions; 233 } 234 235 /** 236 * Get a field type definition from the corresponding DOM node. 237 * @param fieldTypeNode The field type DOM node. 238 * @return The field type definition. 239 */ 240 protected FieldTypeDefinition getFieldType(Node fieldTypeNode) 241 { 242 FieldTypeDefinition fieldType = new FieldTypeDefinition(); 243 244 fieldType.setAttributes(getAttributes(fieldTypeNode)); 245 246 NodeList analyzerNodes = _xPathProcessor.selectNodeList(fieldTypeNode, "analyzer"); 247 248 for (int i = 0; i < analyzerNodes.getLength(); i++) 249 { 250 Node analyzerNode = analyzerNodes.item(i); 251 Node typeNode = analyzerNode.getAttributes().getNamedItem("type"); 252 String type = typeNode != null ? typeNode.getNodeValue() : ""; 253 254 AnalyzerDefinition analyzer = getAnalyzer(analyzerNode); 255 256 switch (type) 257 { 258 case "index": 259 fieldType.setIndexAnalyzer(analyzer); 260 break; 261 case "query": 262 fieldType.setQueryAnalyzer(analyzer); 263 break; 264 case "multiterm": 265 fieldType.setMultiTermAnalyzer(analyzer); 266 break; 267 default: 268 fieldType.setAnalyzer(analyzer); 269 break; 270 } 271 } 272 273 Node similarityNode = _xPathProcessor.selectSingleNode(fieldTypeNode, "similarity"); 274 if (similarityNode != null) 275 { 276 fieldType.setSimilarity(getAttributes(similarityNode)); 277 } 278 279 return fieldType; 280 } 281 282 /** 283 * Get an analyzer definition from the corresponding DOM node. 284 * @param analyzerNode The analyzer node. 285 * @return The analyzer definition. 286 */ 287 protected AnalyzerDefinition getAnalyzer(Node analyzerNode) 288 { 289 AnalyzerDefinition analyzer = new AnalyzerDefinition(); 290 291 analyzer.setAttributes(getAttributes(analyzerNode)); 292 293 NodeList charFilterNodes = _xPathProcessor.selectNodeList(analyzerNode, "charFilter"); 294 analyzer.setCharFilters(getList(charFilterNodes)); 295 296 Node tokenizerNode = _xPathProcessor.selectSingleNode(analyzerNode, "tokenizer"); 297 if (tokenizerNode != null) 298 { 299 analyzer.setTokenizer(getAttributes(tokenizerNode)); 300 } 301 302 NodeList filterNodes = _xPathProcessor.selectNodeList(analyzerNode, "filter"); 303 analyzer.setFilters(getList(filterNodes)); 304 305 return analyzer; 306 } 307 308 /** 309 * Extract a DOM node attributes as a Map. 310 * @param node The node. 311 * @return The attributes as a Map. 312 */ 313 protected Map<String, Object> getAttributes(Node node) 314 { 315 Map<String, Object> map = new HashMap<>(); 316 317 NamedNodeMap attributes = node.getAttributes(); 318 for (int i = 0; i < attributes.getLength(); i++) 319 { 320 Node attribute = attributes.item(i); 321 322 String name = attribute.getNodeName(); 323 String value = attribute.getNodeValue(); 324 325 // Filter out ametys-specific attributes. 326 if (!name.startsWith("ametys")) 327 { 328 map.put(name, value); 329 } 330 } 331 332 return map; 333 } 334 335 /** 336 * Extract the list of DOM node attributes. 337 * @param nodeList The node list. 338 * @return A List of the attribute values as Maps. 339 */ 340 protected List<Map<String, Object>> getList(NodeList nodeList) 341 { 342 List<Map<String, Object>> list = new ArrayList<>(); 343 344 for (int i = 0; i < nodeList.getLength(); i++) 345 { 346 Node node = nodeList.item(i); 347 list.add(getAttributes(node)); 348 } 349 350 return list; 351 } 352 353 /** 354 * Filters the list of field declarations and only keep valid ones. 355 * @param fieldList The list of fields to filter 356 * @return The filtered list 357 */ 358 protected List<Map<String, Object>> filterListNode(List<Map<String, Object>> fieldList) 359 { 360 return fieldList.stream().filter(fieldMap -> 361 { 362 String fieldName = (String) fieldMap.get("name"); 363 if (fieldName == null) 364 { 365 getLogger().warn("'name' attribute for field node cannot be null. Field will be ignored."); 366 return false; 367 } 368 else if (!isNameValid(fieldName)) 369 { 370 // https://lucene.apache.org/solr/guide/6_6/defining-fields.html#DefiningFields-FieldProperties 371 getLogger().warn("Invalid field name: '{}'. Field names should consist of alphanumeric or underscore characters only and not start with a digit (Ametys also supports dash character). Field will be ignored.", fieldName); 372 return false; 373 } 374 return true; 375 }).collect(Collectors.toList()); 376 } 377 378}