001/* 002 * Copyright 2016 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.search.solr.schema; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.util.ArrayList; 021import java.util.HashMap; 022import java.util.List; 023import java.util.Map; 024import java.util.stream.Collectors; 025 026import org.apache.avalon.framework.component.Component; 027import org.apache.avalon.framework.service.ServiceException; 028import org.apache.avalon.framework.service.ServiceManager; 029import org.apache.avalon.framework.service.Serviceable; 030import org.apache.excalibur.source.Source; 031import org.apache.excalibur.source.SourceResolver; 032import org.apache.excalibur.xml.dom.DOMParser; 033import org.apache.excalibur.xml.xpath.XPathProcessor; 034import org.apache.solr.client.solrj.request.schema.AnalyzerDefinition; 035import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition; 036import org.apache.solr.client.solrj.response.schema.SchemaRepresentation; 037import org.w3c.dom.Document; 038import org.w3c.dom.Element; 039import org.w3c.dom.NamedNodeMap; 040import org.w3c.dom.Node; 041import org.w3c.dom.NodeList; 042import org.xml.sax.InputSource; 043import org.xml.sax.SAXException; 044 045import org.ametys.cms.contenttype.MetadataType; 046import org.ametys.runtime.plugin.component.AbstractLogEnabled; 047 048/** 049 * Component providing helper methods to work with search schema and fields. 050 */ 051public class SchemaHelper extends AbstractLogEnabled implements Component, Serviceable 052{ 053 054 /** The component role. */ 055 public static final String ROLE = SchemaHelper.class.getName(); 056 057 /** The source resolver. */ 058 protected SourceResolver _sourceResolver; 059 060 /** A DOM parser. */ 061 protected DOMParser _domParser; 062 063 /** A XPath processor. */ 064 protected XPathProcessor _xPathProcessor; 065 066 @Override 067 public void service(ServiceManager manager) throws ServiceException 068 { 069 _sourceResolver = (SourceResolver) manager.lookup(SourceResolver.ROLE); 070 _domParser = (DOMParser) manager.lookup(DOMParser.ROLE); 071 _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE); 072 } 073 074 /** 075 * Get the solr schema type from the metadata type. 076 * @param metaType The metadata type. 077 * @return The solr schema type. 078 */ 079 public static String getSchemaType(MetadataType metaType) 080 { 081 String type = null; 082 083 switch (metaType) 084 { 085 case STRING: 086 case MULTILINGUAL_STRING: 087 case USER: 088 case CONTENT: 089 case SUB_CONTENT: 090 type = "string"; 091 break; 092 case LONG: 093 type = "plong"; 094 break; 095 case DOUBLE: 096 type = "pdouble"; 097 break; 098 case BOOLEAN: 099 type = "boolean"; 100 break; 101 case DATE: 102 case DATETIME: 103 type = "pdate"; 104 break; 105 case GEOCODE: 106 type = "location_rpt"; 107 break; 108 case RICH_TEXT: 109 // TODO? 110 break; 111 case BINARY: 112 case FILE: 113 case COMPOSITE: 114 case REFERENCE: 115 default: 116 break; 117 } 118 119 return type; 120 } 121 122 /** 123 * Chekcs that the passed Solr field name is valid 124 * @param fieldName the Solr field name 125 * @return true if name is valid, false otherwise 126 */ 127 public static boolean isNameValid(String fieldName) 128 { 129 return fieldName.matches("^[a-zA-Z_][a-zA-Z0-9_\\/-]*$"); 130 } 131 132 /** 133 * Get the schema at the corresponding location (source URI). 134 * @param location The location, as a source URI. 135 * @return The schema representation. 136 */ 137 public SchemaRepresentation getSchema(String location) 138 { 139 Source source = null; 140 141 try 142 { 143 source = _sourceResolver.resolveURI(location); 144 145 if (source.exists()) 146 { 147 try (InputStream is = source.getInputStream()) 148 { 149 return readSchema(is); 150 } 151 } 152 } 153 catch (IOException | SAXException e) 154 { 155 getLogger().error("Error reading the schema from location '" + location + "'", e); 156 } 157 finally 158 { 159 if (source != null) 160 { 161 _sourceResolver.release(source); 162 } 163 } 164 165 return null; 166 } 167 168 /** 169 * Read the static schema. 170 * @param is An input stream on the schema XML. 171 * @return The representation of the schema. 172 * @throws IOException If an error occurs reading the stream. 173 * @throws SAXException If an error occurs parsing the XML. 174 */ 175 public SchemaRepresentation readSchema(InputStream is) throws IOException, SAXException 176 { 177 SchemaRepresentation schema = new SchemaRepresentation(); 178 179 InputSource source = new InputSource(is); 180 Document document = _domParser.parseDocument(source); 181 182 Element root = document.getDocumentElement(); 183 184 String name = _xPathProcessor.evaluateAsString(root, "/schema/@name"); 185 float version = _xPathProcessor.evaluateAsNumber(root, "/schema/@version").floatValue(); 186 String uniqueKey = _xPathProcessor.evaluateAsString(root, "/schema/uniqueKey"); 187 188 schema.setName(name); 189 schema.setVersion(version); 190 schema.setUniqueKey(uniqueKey); 191 192 NodeList fieldTypeNodes = _xPathProcessor.selectNodeList(root, "/schema/fieldType | /schema/types/fieldType"); 193 schema.setFieldTypes(getFieldTypes(fieldTypeNodes)); 194 195 NodeList fieldNodes = _xPathProcessor.selectNodeList(root, "/schema/field | /schema/fields/field"); 196 schema.setFields(filterListNode(getList(fieldNodes))); 197 198 NodeList dynFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/dynamicField | /schema/fields/dynamicField"); 199 schema.setDynamicFields(getList(dynFieldNodes)); 200 201 NodeList copyFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/copyField"); 202 schema.setCopyFields(getList(copyFieldNodes)); 203 204 Node similarityNode = _xPathProcessor.selectSingleNode(root, "/schema/similarity"); 205 if (similarityNode != null) 206 { 207 schema.setSimilarity(getAttributes(similarityNode)); 208 } 209 210 return schema; 211 } 212 213 /** 214 * Get the field type definitions from the corresponding DOM nodes. 215 * @param fieldTypeNodes The field type nodes. 216 * @return The list of field type definitions. 217 */ 218 protected List<FieldTypeDefinition> getFieldTypes(NodeList fieldTypeNodes) 219 { 220 List<FieldTypeDefinition> definitions = new ArrayList<>(); 221 222 for (int i = 0; i < fieldTypeNodes.getLength(); i++) 223 { 224 Node fieldTypeNode = fieldTypeNodes.item(i); 225 definitions.add(getFieldType(fieldTypeNode)); 226 } 227 228 return definitions; 229 } 230 231 /** 232 * Get a field type definition from the corresponding DOM node. 233 * @param fieldTypeNode The field type DOM node. 234 * @return The field type definition. 235 */ 236 protected FieldTypeDefinition getFieldType(Node fieldTypeNode) 237 { 238 FieldTypeDefinition fieldType = new FieldTypeDefinition(); 239 240 fieldType.setAttributes(getAttributes(fieldTypeNode)); 241 242 NodeList analyzerNodes = _xPathProcessor.selectNodeList(fieldTypeNode, "analyzer"); 243 244 for (int i = 0; i < analyzerNodes.getLength(); i++) 245 { 246 Node analyzerNode = analyzerNodes.item(i); 247 Node typeNode = analyzerNode.getAttributes().getNamedItem("type"); 248 String type = typeNode != null ? typeNode.getNodeValue() : ""; 249 250 AnalyzerDefinition analyzer = getAnalyzer(analyzerNode); 251 252 switch (type) 253 { 254 case "index": 255 fieldType.setIndexAnalyzer(analyzer); 256 break; 257 case "query": 258 fieldType.setQueryAnalyzer(analyzer); 259 break; 260 case "multiterm": 261 fieldType.setMultiTermAnalyzer(analyzer); 262 break; 263 default: 264 fieldType.setAnalyzer(analyzer); 265 break; 266 } 267 } 268 269 Node similarityNode = _xPathProcessor.selectSingleNode(fieldTypeNode, "similarity"); 270 if (similarityNode != null) 271 { 272 fieldType.setSimilarity(getAttributes(similarityNode)); 273 } 274 275 return fieldType; 276 } 277 278 /** 279 * Get an analyzer definition from the corresponding DOM node. 280 * @param analyzerNode The analyzer node. 281 * @return The analyzer definition. 282 */ 283 protected AnalyzerDefinition getAnalyzer(Node analyzerNode) 284 { 285 AnalyzerDefinition analyzer = new AnalyzerDefinition(); 286 287 analyzer.setAttributes(getAttributes(analyzerNode)); 288 289 NodeList charFilterNodes = _xPathProcessor.selectNodeList(analyzerNode, "charFilter"); 290 analyzer.setCharFilters(getList(charFilterNodes)); 291 292 Node tokenizerNode = _xPathProcessor.selectSingleNode(analyzerNode, "tokenizer"); 293 if (tokenizerNode != null) 294 { 295 analyzer.setTokenizer(getAttributes(tokenizerNode)); 296 } 297 298 NodeList filterNodes = _xPathProcessor.selectNodeList(analyzerNode, "filter"); 299 analyzer.setFilters(getList(filterNodes)); 300 301 return analyzer; 302 } 303 304 /** 305 * Extract a DOM node attributes as a Map. 306 * @param node The node. 307 * @return The attributes as a Map. 308 */ 309 protected Map<String, Object> getAttributes(Node node) 310 { 311 Map<String, Object> map = new HashMap<>(); 312 313 NamedNodeMap attributes = node.getAttributes(); 314 for (int i = 0; i < attributes.getLength(); i++) 315 { 316 Node attribute = attributes.item(i); 317 318 String name = attribute.getNodeName(); 319 String value = attribute.getNodeValue(); 320 321 // Filter out ametys-specific attributes. 322 if (!name.startsWith("ametys")) 323 { 324 map.put(name, value); 325 } 326 } 327 328 return map; 329 } 330 331 /** 332 * Extract the list of DOM node attributes. 333 * @param nodeList The node list. 334 * @return A List of the attribute values as Maps. 335 */ 336 protected List<Map<String, Object>> getList(NodeList nodeList) 337 { 338 List<Map<String, Object>> list = new ArrayList<>(); 339 340 for (int i = 0; i < nodeList.getLength(); i++) 341 { 342 Node node = nodeList.item(i); 343 list.add(getAttributes(node)); 344 } 345 346 return list; 347 } 348 349 /** 350 * Filters the list of field declarations and only keep valid ones. 351 * @param fieldList The list of fields to filter 352 * @return The filtered list 353 */ 354 protected List<Map<String, Object>> filterListNode(List<Map<String, Object>> fieldList) 355 { 356 return fieldList.stream().filter(fieldMap -> 357 { 358 String fieldName = (String) fieldMap.get("name"); 359 if (fieldName == null) 360 { 361 getLogger().warn("'name' attribute for field node cannot be null. Field will be ignored."); 362 return false; 363 } 364 else if (!isNameValid(fieldName)) 365 { 366 // https://lucene.apache.org/solr/guide/6_6/defining-fields.html#DefiningFields-FieldProperties 367 getLogger().warn("Invalid field name: '{}'. Field names should consist of alphanumeric or underscore characters only and not start with a digit (Ametys also supports dash character). Field will be ignored.", fieldName); 368 return false; 369 } 370 return true; 371 }).collect(Collectors.toList()); 372 } 373 374}