001/* 002 * Copyright 2016 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.search.solr.schema; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.util.ArrayList; 021import java.util.HashMap; 022import java.util.List; 023import java.util.Map; 024import java.util.stream.Collectors; 025 026import org.apache.avalon.framework.component.Component; 027import org.apache.avalon.framework.service.ServiceException; 028import org.apache.avalon.framework.service.ServiceManager; 029import org.apache.avalon.framework.service.Serviceable; 030import org.apache.excalibur.source.Source; 031import org.apache.excalibur.source.SourceResolver; 032import org.apache.excalibur.xml.dom.DOMParser; 033import org.apache.excalibur.xml.xpath.XPathProcessor; 034import org.apache.solr.client.solrj.request.schema.AnalyzerDefinition; 035import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition; 036import org.apache.solr.client.solrj.response.schema.SchemaRepresentation; 037import org.w3c.dom.Document; 038import org.w3c.dom.Element; 039import org.w3c.dom.NamedNodeMap; 040import org.w3c.dom.Node; 041import org.w3c.dom.NodeList; 042import org.xml.sax.InputSource; 043import org.xml.sax.SAXException; 044 045import org.ametys.runtime.plugin.component.AbstractLogEnabled; 046 047/** 048 * Component providing helper methods to work with search schema and fields. 049 */ 050public class SchemaHelper extends AbstractLogEnabled implements Component, Serviceable 051{ 052 053 /** The component role. */ 054 public static final String ROLE = SchemaHelper.class.getName(); 055 056 /** The source resolver. */ 057 protected SourceResolver _sourceResolver; 058 059 /** A DOM parser. */ 060 protected DOMParser _domParser; 061 062 /** A XPath processor. */ 063 protected XPathProcessor _xPathProcessor; 064 065 @Override 066 public void service(ServiceManager manager) throws ServiceException 067 { 068 _sourceResolver = (SourceResolver) manager.lookup(SourceResolver.ROLE); 069 _domParser = (DOMParser) manager.lookup(DOMParser.ROLE); 070 _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE); 071 } 072 073 /** 074 * Chekcs that the passed Solr field name is valid 075 * @param fieldName the Solr field name 076 * @return true if name is valid, false otherwise 077 */ 078 public static boolean isNameValid(String fieldName) 079 { 080 return fieldName.matches("^[a-zA-Z_][a-zA-Z0-9_\\/-]*$"); 081 } 082 083 /** 084 * Get the schema at the corresponding location (source URI). 085 * @param location The location, as a source URI. 086 * @return The schema representation. 087 */ 088 public SchemaRepresentation getSchema(String location) 089 { 090 Source source = null; 091 092 try 093 { 094 source = _sourceResolver.resolveURI(location); 095 096 if (source.exists()) 097 { 098 try (InputStream is = source.getInputStream()) 099 { 100 return readSchema(is); 101 } 102 } 103 } 104 catch (IOException | SAXException e) 105 { 106 getLogger().error("Error reading the schema from location '" + location + "'", e); 107 } 108 finally 109 { 110 if (source != null) 111 { 112 _sourceResolver.release(source); 113 } 114 } 115 116 return null; 117 } 118 119 /** 120 * Read the static schema. 121 * @param is An input stream on the schema XML. 122 * @return The representation of the schema. 123 * @throws IOException If an error occurs reading the stream. 124 * @throws SAXException If an error occurs parsing the XML. 125 */ 126 public SchemaRepresentation readSchema(InputStream is) throws IOException, SAXException 127 { 128 SchemaRepresentation schema = new SchemaRepresentation(); 129 130 InputSource source = new InputSource(is); 131 Document document = _domParser.parseDocument(source); 132 133 Element root = document.getDocumentElement(); 134 135 String name = _xPathProcessor.evaluateAsString(root, "/schema/@name"); 136 float version = _xPathProcessor.evaluateAsNumber(root, "/schema/@version").floatValue(); 137 String uniqueKey = _xPathProcessor.evaluateAsString(root, "/schema/uniqueKey"); 138 139 schema.setName(name); 140 schema.setVersion(version); 141 schema.setUniqueKey(uniqueKey); 142 143 NodeList fieldTypeNodes = _xPathProcessor.selectNodeList(root, "/schema/fieldType | /schema/types/fieldType"); 144 schema.setFieldTypes(getFieldTypes(fieldTypeNodes)); 145 146 NodeList fieldNodes = _xPathProcessor.selectNodeList(root, "/schema/field | /schema/fields/field"); 147 schema.setFields(filterListNode(getList(fieldNodes))); 148 149 NodeList dynFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/dynamicField | /schema/fields/dynamicField"); 150 schema.setDynamicFields(getList(dynFieldNodes)); 151 152 NodeList copyFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/copyField"); 153 schema.setCopyFields(getList(copyFieldNodes)); 154 155 Node similarityNode = _xPathProcessor.selectSingleNode(root, "/schema/similarity"); 156 if (similarityNode != null) 157 { 158 schema.setSimilarity(getAttributes(similarityNode)); 159 } 160 161 return schema; 162 } 163 164 /** 165 * Get the field type definitions from the corresponding DOM nodes. 166 * @param fieldTypeNodes The field type nodes. 167 * @return The list of field type definitions. 168 */ 169 protected List<FieldTypeDefinition> getFieldTypes(NodeList fieldTypeNodes) 170 { 171 List<FieldTypeDefinition> definitions = new ArrayList<>(); 172 173 for (int i = 0; i < fieldTypeNodes.getLength(); i++) 174 { 175 Node fieldTypeNode = fieldTypeNodes.item(i); 176 definitions.add(getFieldType(fieldTypeNode)); 177 } 178 179 return definitions; 180 } 181 182 /** 183 * Get a field type definition from the corresponding DOM node. 184 * @param fieldTypeNode The field type DOM node. 185 * @return The field type definition. 186 */ 187 protected FieldTypeDefinition getFieldType(Node fieldTypeNode) 188 { 189 FieldTypeDefinition fieldType = new FieldTypeDefinition(); 190 191 fieldType.setAttributes(getAttributes(fieldTypeNode)); 192 193 NodeList analyzerNodes = _xPathProcessor.selectNodeList(fieldTypeNode, "analyzer"); 194 195 for (int i = 0; i < analyzerNodes.getLength(); i++) 196 { 197 Node analyzerNode = analyzerNodes.item(i); 198 Node typeNode = analyzerNode.getAttributes().getNamedItem("type"); 199 String type = typeNode != null ? typeNode.getNodeValue() : ""; 200 201 AnalyzerDefinition analyzer = getAnalyzer(analyzerNode); 202 203 switch (type) 204 { 205 case "index": 206 fieldType.setIndexAnalyzer(analyzer); 207 break; 208 case "query": 209 fieldType.setQueryAnalyzer(analyzer); 210 break; 211 case "multiterm": 212 fieldType.setMultiTermAnalyzer(analyzer); 213 break; 214 default: 215 fieldType.setAnalyzer(analyzer); 216 break; 217 } 218 } 219 220 Node similarityNode = _xPathProcessor.selectSingleNode(fieldTypeNode, "similarity"); 221 if (similarityNode != null) 222 { 223 fieldType.setSimilarity(getAttributes(similarityNode)); 224 } 225 226 return fieldType; 227 } 228 229 /** 230 * Get an analyzer definition from the corresponding DOM node. 231 * @param analyzerNode The analyzer node. 232 * @return The analyzer definition. 233 */ 234 protected AnalyzerDefinition getAnalyzer(Node analyzerNode) 235 { 236 AnalyzerDefinition analyzer = new AnalyzerDefinition(); 237 238 analyzer.setAttributes(getAttributes(analyzerNode)); 239 240 NodeList charFilterNodes = _xPathProcessor.selectNodeList(analyzerNode, "charFilter"); 241 analyzer.setCharFilters(getList(charFilterNodes)); 242 243 Node tokenizerNode = _xPathProcessor.selectSingleNode(analyzerNode, "tokenizer"); 244 if (tokenizerNode != null) 245 { 246 analyzer.setTokenizer(getAttributes(tokenizerNode)); 247 } 248 249 NodeList filterNodes = _xPathProcessor.selectNodeList(analyzerNode, "filter"); 250 analyzer.setFilters(getList(filterNodes)); 251 252 return analyzer; 253 } 254 255 /** 256 * Extract a DOM node attributes as a Map. 257 * @param node The node. 258 * @return The attributes as a Map. 259 */ 260 protected Map<String, Object> getAttributes(Node node) 261 { 262 Map<String, Object> map = new HashMap<>(); 263 264 NamedNodeMap attributes = node.getAttributes(); 265 for (int i = 0; i < attributes.getLength(); i++) 266 { 267 Node attribute = attributes.item(i); 268 269 String name = attribute.getNodeName(); 270 String value = attribute.getNodeValue(); 271 272 // Filter out ametys-specific attributes. 273 if (!name.startsWith("ametys")) 274 { 275 map.put(name, value); 276 } 277 } 278 279 return map; 280 } 281 282 /** 283 * Extract the list of DOM node attributes. 284 * @param nodeList The node list. 285 * @return A List of the attribute values as Maps. 286 */ 287 protected List<Map<String, Object>> getList(NodeList nodeList) 288 { 289 List<Map<String, Object>> list = new ArrayList<>(); 290 291 for (int i = 0; i < nodeList.getLength(); i++) 292 { 293 Node node = nodeList.item(i); 294 list.add(getAttributes(node)); 295 } 296 297 return list; 298 } 299 300 /** 301 * Filters the list of field declarations and only keep valid ones. 302 * @param fieldList The list of fields to filter 303 * @return The filtered list 304 */ 305 protected List<Map<String, Object>> filterListNode(List<Map<String, Object>> fieldList) 306 { 307 return fieldList.stream().filter(fieldMap -> 308 { 309 String fieldName = (String) fieldMap.get("name"); 310 if (fieldName == null) 311 { 312 getLogger().warn("'name' attribute for field node cannot be null. Field will be ignored."); 313 return false; 314 } 315 else if (!isNameValid(fieldName)) 316 { 317 // https://lucene.apache.org/solr/guide/6_6/defining-fields.html#DefiningFields-FieldProperties 318 getLogger().warn("Invalid field name: '{}'. Field names should consist of alphanumeric or underscore characters only and not start with a digit (Ametys also supports dash character). Field will be ignored.", fieldName); 319 return false; 320 } 321 return true; 322 }).collect(Collectors.toList()); 323 } 324 325}