001/*
002 *  Copyright 2016 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.search.solr.schema;
017
018import java.io.IOException;
019import java.io.InputStream;
020import java.util.ArrayList;
021import java.util.HashMap;
022import java.util.List;
023import java.util.Map;
024import java.util.stream.Collectors;
025
026import org.apache.avalon.framework.component.Component;
027import org.apache.avalon.framework.service.ServiceException;
028import org.apache.avalon.framework.service.ServiceManager;
029import org.apache.avalon.framework.service.Serviceable;
030import org.apache.excalibur.source.Source;
031import org.apache.excalibur.source.SourceResolver;
032import org.apache.excalibur.xml.dom.DOMParser;
033import org.apache.excalibur.xml.xpath.XPathProcessor;
034import org.apache.solr.client.solrj.request.schema.AnalyzerDefinition;
035import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition;
036import org.apache.solr.client.solrj.response.schema.SchemaRepresentation;
037import org.w3c.dom.Document;
038import org.w3c.dom.Element;
039import org.w3c.dom.NamedNodeMap;
040import org.w3c.dom.Node;
041import org.w3c.dom.NodeList;
042import org.xml.sax.InputSource;
043import org.xml.sax.SAXException;
044
045import org.ametys.runtime.plugin.component.AbstractLogEnabled;
046
047/**
048 * Component providing helper methods to work with search schema and fields.
049 */
050public class SchemaHelper extends AbstractLogEnabled implements Component, Serviceable
051{
052    
053    /** The component role. */
054    public static final String ROLE = SchemaHelper.class.getName();
055    
056    /** The source resolver. */
057    protected SourceResolver _sourceResolver;
058    
059    /** A DOM parser. */
060    protected DOMParser _domParser;
061    
062    /** A XPath processor. */
063    protected XPathProcessor _xPathProcessor;
064    
065    @Override
066    public void service(ServiceManager manager) throws ServiceException
067    {
068        _sourceResolver = (SourceResolver) manager.lookup(SourceResolver.ROLE);
069        _domParser = (DOMParser) manager.lookup(DOMParser.ROLE);
070        _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE);
071    }
072    
073    /**
074     * Chekcs that the passed Solr field name is valid
075     * @param fieldName the Solr field name
076     * @return true if name is valid, false otherwise
077     */
078    public static boolean isNameValid(String fieldName)
079    {
080        return fieldName.matches("^[a-zA-Z_][a-zA-Z0-9_\\/-]*$");
081    }
082    
083    /**
084     * Get the schema at the corresponding location (source URI).
085     * @param location The location, as a source URI.
086     * @return The schema representation.
087     */
088    public SchemaRepresentation getSchema(String location)
089    {
090        Source source = null;
091        
092        try
093        {
094            source = _sourceResolver.resolveURI(location);
095            
096            if (source.exists())
097            {
098                try (InputStream is = source.getInputStream())
099                {
100                    return readSchema(is);
101                }
102            }
103        }
104        catch (IOException | SAXException e)
105        {
106            getLogger().error("Error reading the schema from location '" + location + "'", e);
107        }
108        finally
109        {
110            if (source != null)
111            {
112                _sourceResolver.release(source);
113            }
114        }
115        
116        return null;
117    }
118    
119    /**
120     * Read the static schema.
121     * @param is An input stream on the schema XML.
122     * @return The representation of the schema.
123     * @throws IOException If an error occurs reading the stream.
124     * @throws SAXException If an error occurs parsing the XML.
125     */
126    public SchemaRepresentation readSchema(InputStream is) throws IOException, SAXException
127    {
128        SchemaRepresentation schema = new SchemaRepresentation();
129        
130        InputSource source = new InputSource(is);
131        Document document = _domParser.parseDocument(source);
132        
133        Element root = document.getDocumentElement();
134        
135        String name = _xPathProcessor.evaluateAsString(root, "/schema/@name");
136        float version = _xPathProcessor.evaluateAsNumber(root, "/schema/@version").floatValue();
137        String uniqueKey = _xPathProcessor.evaluateAsString(root, "/schema/uniqueKey");
138        
139        schema.setName(name);
140        schema.setVersion(version);
141        schema.setUniqueKey(uniqueKey);
142        
143        NodeList fieldTypeNodes = _xPathProcessor.selectNodeList(root, "/schema/fieldType | /schema/types/fieldType");
144        schema.setFieldTypes(getFieldTypes(fieldTypeNodes));
145        
146        NodeList fieldNodes = _xPathProcessor.selectNodeList(root, "/schema/field | /schema/fields/field");
147        schema.setFields(filterListNode(getList(fieldNodes)));
148        
149        NodeList dynFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/dynamicField | /schema/fields/dynamicField");
150        schema.setDynamicFields(getList(dynFieldNodes));
151        
152        NodeList copyFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/copyField");
153        schema.setCopyFields(getList(copyFieldNodes));
154        
155        Node similarityNode = _xPathProcessor.selectSingleNode(root, "/schema/similarity");
156        if (similarityNode != null)
157        {
158            schema.setSimilarity(getAttributes(similarityNode));
159        }
160        
161        return schema;
162    }
163    
164    /**
165     * Get the field type definitions from the corresponding DOM nodes.
166     * @param fieldTypeNodes The field type nodes.
167     * @return The list of field type definitions.
168     */
169    protected List<FieldTypeDefinition> getFieldTypes(NodeList fieldTypeNodes)
170    {
171        List<FieldTypeDefinition> definitions = new ArrayList<>();
172        
173        for (int i = 0; i < fieldTypeNodes.getLength(); i++)
174        {
175            Node fieldTypeNode = fieldTypeNodes.item(i);
176            definitions.add(getFieldType(fieldTypeNode));
177        }
178        
179        return definitions;
180    }
181    
182    /**
183     * Get a field type definition from the corresponding DOM node.
184     * @param fieldTypeNode The field type DOM node.
185     * @return The field type definition.
186     */
187    protected FieldTypeDefinition getFieldType(Node fieldTypeNode)
188    {
189        FieldTypeDefinition fieldType = new FieldTypeDefinition();
190        
191        fieldType.setAttributes(getAttributes(fieldTypeNode));
192        
193        NodeList analyzerNodes = _xPathProcessor.selectNodeList(fieldTypeNode, "analyzer");
194        
195        for (int i = 0; i < analyzerNodes.getLength(); i++)
196        {
197            Node analyzerNode = analyzerNodes.item(i);
198            Node typeNode = analyzerNode.getAttributes().getNamedItem("type");
199            String type = typeNode != null ? typeNode.getNodeValue() : "";
200            
201            AnalyzerDefinition analyzer = getAnalyzer(analyzerNode);
202            
203            switch (type)
204            {
205                case "index":
206                    fieldType.setIndexAnalyzer(analyzer);
207                    break;
208                case "query":
209                    fieldType.setQueryAnalyzer(analyzer);
210                    break;
211                case "multiterm":
212                    fieldType.setMultiTermAnalyzer(analyzer);
213                    break;
214                default:
215                    fieldType.setAnalyzer(analyzer);
216                    break;
217            }
218        }
219        
220        Node similarityNode = _xPathProcessor.selectSingleNode(fieldTypeNode, "similarity");
221        if (similarityNode != null)
222        {
223            fieldType.setSimilarity(getAttributes(similarityNode));
224        }
225        
226        return fieldType;
227    }
228    
229    /**
230     * Get an analyzer definition from the corresponding DOM node.
231     * @param analyzerNode The analyzer node.
232     * @return The analyzer definition.
233     */
234    protected AnalyzerDefinition getAnalyzer(Node analyzerNode)
235    {
236        AnalyzerDefinition analyzer = new AnalyzerDefinition();
237        
238        analyzer.setAttributes(getAttributes(analyzerNode));
239        
240        NodeList charFilterNodes = _xPathProcessor.selectNodeList(analyzerNode, "charFilter");
241        analyzer.setCharFilters(getList(charFilterNodes));
242        
243        Node tokenizerNode = _xPathProcessor.selectSingleNode(analyzerNode, "tokenizer");
244        if (tokenizerNode != null)
245        {
246            analyzer.setTokenizer(getAttributes(tokenizerNode));
247        }
248        
249        NodeList filterNodes = _xPathProcessor.selectNodeList(analyzerNode, "filter");
250        analyzer.setFilters(getList(filterNodes));
251        
252        return analyzer;
253    }
254    
255    /**
256     * Extract a DOM node attributes as a Map.
257     * @param node The node.
258     * @return The attributes as a Map.
259     */
260    protected Map<String, Object> getAttributes(Node node)
261    {
262        Map<String, Object> map = new HashMap<>();
263        
264        NamedNodeMap attributes = node.getAttributes();
265        for (int i = 0; i < attributes.getLength(); i++)
266        {
267            Node attribute = attributes.item(i);
268            
269            String name = attribute.getNodeName();
270            String value = attribute.getNodeValue();
271            
272            // Filter out ametys-specific attributes.
273            if (!name.startsWith("ametys"))
274            {
275                map.put(name, value);
276            }
277        }
278        
279        return map;
280    }
281    
282    /**
283     * Extract the list of DOM node attributes.
284     * @param nodeList The node list.
285     * @return A List of the attribute values as Maps.
286     */
287    protected List<Map<String, Object>> getList(NodeList nodeList)
288    {
289        List<Map<String, Object>> list = new ArrayList<>();
290        
291        for (int i = 0; i < nodeList.getLength(); i++)
292        {
293            Node node = nodeList.item(i);
294            list.add(getAttributes(node));
295        }
296        
297        return list;
298    }
299    
300    /**
301     * Filters the list of field declarations and only keep valid ones.
302     * @param fieldList The list of fields to filter
303     * @return The filtered list
304     */
305    protected List<Map<String, Object>> filterListNode(List<Map<String, Object>> fieldList)
306    {
307        return fieldList.stream().filter(fieldMap -> 
308        {
309            String fieldName = (String) fieldMap.get("name");
310            if (fieldName == null)
311            {
312                getLogger().warn("'name' attribute for field node cannot be null. Field will be ignored.");
313                return false;
314            }
315            else if (!isNameValid(fieldName))
316            {
317                // https://lucene.apache.org/solr/guide/6_6/defining-fields.html#DefiningFields-FieldProperties
318                getLogger().warn("Invalid field name: '{}'. Field names should consist of alphanumeric or underscore characters only and not start with a digit (Ametys also supports dash character). Field will be ignored.", fieldName);
319                return false;
320            }
321            return true;
322        }).collect(Collectors.toList());
323    }
324    
325}