001/*
002 *  Copyright 2016 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.search.solr.schema;
017
018import java.io.IOException;
019import java.io.InputStream;
020import java.util.ArrayList;
021import java.util.HashMap;
022import java.util.List;
023import java.util.Map;
024import java.util.stream.Collectors;
025
026import org.apache.avalon.framework.component.Component;
027import org.apache.avalon.framework.service.ServiceException;
028import org.apache.avalon.framework.service.ServiceManager;
029import org.apache.avalon.framework.service.Serviceable;
030import org.apache.excalibur.source.Source;
031import org.apache.excalibur.source.SourceResolver;
032import org.apache.excalibur.xml.dom.DOMParser;
033import org.apache.excalibur.xml.xpath.XPathProcessor;
034import org.apache.solr.client.solrj.request.schema.AnalyzerDefinition;
035import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition;
036import org.apache.solr.client.solrj.response.schema.SchemaRepresentation;
037import org.w3c.dom.Document;
038import org.w3c.dom.Element;
039import org.w3c.dom.NamedNodeMap;
040import org.w3c.dom.Node;
041import org.w3c.dom.NodeList;
042import org.xml.sax.InputSource;
043import org.xml.sax.SAXException;
044
045import org.ametys.cms.contenttype.MetadataType;
046import org.ametys.runtime.plugin.component.AbstractLogEnabled;
047
048/**
049 * Component providing helper methods to work with search schema and fields.
050 */
051public class SchemaHelper extends AbstractLogEnabled implements Component, Serviceable
052{
053    
054    /** The component role. */
055    public static final String ROLE = SchemaHelper.class.getName();
056    
057    /** The source resolver. */
058    protected SourceResolver _sourceResolver;
059    
060    /** A DOM parser. */
061    protected DOMParser _domParser;
062    
063    /** A XPath processor. */
064    protected XPathProcessor _xPathProcessor;
065    
066    @Override
067    public void service(ServiceManager manager) throws ServiceException
068    {
069        _sourceResolver = (SourceResolver) manager.lookup(SourceResolver.ROLE);
070        _domParser = (DOMParser) manager.lookup(DOMParser.ROLE);
071        _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE);
072    }
073    
074    /**
075     * Get the solr schema type from the metadata type.
076     * @param metaType The metadata type.
077     * @return The solr schema type.
078     */
079    public static String getSchemaType(MetadataType metaType)
080    {
081        String type = null;
082        
083        switch (metaType)
084        {
085            case STRING:
086            case MULTILINGUAL_STRING:
087            case USER:
088            case CONTENT:
089            case SUB_CONTENT:
090                type = "string";
091                break;
092            case LONG:
093                type = "plong";
094                break;
095            case DOUBLE:
096                type = "pdouble";
097                break;
098            case BOOLEAN:
099                type = "boolean";
100                break;
101            case DATE:
102            case DATETIME:
103                type = "pdate";
104                break;
105            case GEOCODE:
106                type = "location_rpt";
107                break;
108            case RICH_TEXT:
109                // TODO?
110                break;
111            case BINARY:
112            case FILE:
113            case COMPOSITE:
114            case REFERENCE:
115            default:
116                break;
117        }
118        
119        return type;
120    }
121    
122    /**
123     * Chekcs that the passed Solr field name is valid
124     * @param fieldName the Solr field name
125     * @return true if name is valid, false otherwise
126     */
127    public static boolean isNameValid(String fieldName)
128    {
129        return fieldName.matches("^[a-zA-Z_][a-zA-Z0-9_\\/-]*$");
130    }
131    
132    /**
133     * Get the schema at the corresponding location (source URI).
134     * @param location The location, as a source URI.
135     * @return The schema representation.
136     */
137    public SchemaRepresentation getSchema(String location)
138    {
139        Source source = null;
140        
141        try
142        {
143            source = _sourceResolver.resolveURI(location);
144            
145            if (source.exists())
146            {
147                try (InputStream is = source.getInputStream())
148                {
149                    return readSchema(is);
150                }
151            }
152        }
153        catch (IOException | SAXException e)
154        {
155            getLogger().error("Error reading the schema from location '" + location + "'", e);
156        }
157        finally
158        {
159            if (source != null)
160            {
161                _sourceResolver.release(source);
162            }
163        }
164        
165        return null;
166    }
167    
168    /**
169     * Read the static schema.
170     * @param is An input stream on the schema XML.
171     * @return The representation of the schema.
172     * @throws IOException If an error occurs reading the stream.
173     * @throws SAXException If an error occurs parsing the XML.
174     */
175    public SchemaRepresentation readSchema(InputStream is) throws IOException, SAXException
176    {
177        SchemaRepresentation schema = new SchemaRepresentation();
178        
179        InputSource source = new InputSource(is);
180        Document document = _domParser.parseDocument(source);
181        
182        Element root = document.getDocumentElement();
183        
184        String name = _xPathProcessor.evaluateAsString(root, "/schema/@name");
185        float version = _xPathProcessor.evaluateAsNumber(root, "/schema/@version").floatValue();
186        String uniqueKey = _xPathProcessor.evaluateAsString(root, "/schema/uniqueKey");
187        
188        schema.setName(name);
189        schema.setVersion(version);
190        schema.setUniqueKey(uniqueKey);
191        
192        NodeList fieldTypeNodes = _xPathProcessor.selectNodeList(root, "/schema/fieldType | /schema/types/fieldType");
193        schema.setFieldTypes(getFieldTypes(fieldTypeNodes));
194        
195        NodeList fieldNodes = _xPathProcessor.selectNodeList(root, "/schema/field | /schema/fields/field");
196        schema.setFields(filterListNode(getList(fieldNodes)));
197        
198        NodeList dynFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/dynamicField | /schema/fields/dynamicField");
199        schema.setDynamicFields(getList(dynFieldNodes));
200        
201        NodeList copyFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/copyField");
202        schema.setCopyFields(getList(copyFieldNodes));
203        
204        Node similarityNode = _xPathProcessor.selectSingleNode(root, "/schema/similarity");
205        if (similarityNode != null)
206        {
207            schema.setSimilarity(getAttributes(similarityNode));
208        }
209        
210        return schema;
211    }
212    
213    /**
214     * Get the field type definitions from the corresponding DOM nodes.
215     * @param fieldTypeNodes The field type nodes.
216     * @return The list of field type definitions.
217     */
218    protected List<FieldTypeDefinition> getFieldTypes(NodeList fieldTypeNodes)
219    {
220        List<FieldTypeDefinition> definitions = new ArrayList<>();
221        
222        for (int i = 0; i < fieldTypeNodes.getLength(); i++)
223        {
224            Node fieldTypeNode = fieldTypeNodes.item(i);
225            definitions.add(getFieldType(fieldTypeNode));
226        }
227        
228        return definitions;
229    }
230    
231    /**
232     * Get a field type definition from the corresponding DOM node.
233     * @param fieldTypeNode The field type DOM node.
234     * @return The field type definition.
235     */
236    protected FieldTypeDefinition getFieldType(Node fieldTypeNode)
237    {
238        FieldTypeDefinition fieldType = new FieldTypeDefinition();
239        
240        fieldType.setAttributes(getAttributes(fieldTypeNode));
241        
242        NodeList analyzerNodes = _xPathProcessor.selectNodeList(fieldTypeNode, "analyzer");
243        
244        for (int i = 0; i < analyzerNodes.getLength(); i++)
245        {
246            Node analyzerNode = analyzerNodes.item(i);
247            Node typeNode = analyzerNode.getAttributes().getNamedItem("type");
248            String type = typeNode != null ? typeNode.getNodeValue() : "";
249            
250            AnalyzerDefinition analyzer = getAnalyzer(analyzerNode);
251            
252            switch (type)
253            {
254                case "index":
255                    fieldType.setIndexAnalyzer(analyzer);
256                    break;
257                case "query":
258                    fieldType.setQueryAnalyzer(analyzer);
259                    break;
260                case "multiterm":
261                    fieldType.setMultiTermAnalyzer(analyzer);
262                    break;
263                default:
264                    fieldType.setAnalyzer(analyzer);
265                    break;
266            }
267        }
268        
269        Node similarityNode = _xPathProcessor.selectSingleNode(fieldTypeNode, "similarity");
270        if (similarityNode != null)
271        {
272            fieldType.setSimilarity(getAttributes(similarityNode));
273        }
274        
275        return fieldType;
276    }
277    
278    /**
279     * Get an analyzer definition from the corresponding DOM node.
280     * @param analyzerNode The analyzer node.
281     * @return The analyzer definition.
282     */
283    protected AnalyzerDefinition getAnalyzer(Node analyzerNode)
284    {
285        AnalyzerDefinition analyzer = new AnalyzerDefinition();
286        
287        analyzer.setAttributes(getAttributes(analyzerNode));
288        
289        NodeList charFilterNodes = _xPathProcessor.selectNodeList(analyzerNode, "charFilter");
290        analyzer.setCharFilters(getList(charFilterNodes));
291        
292        Node tokenizerNode = _xPathProcessor.selectSingleNode(analyzerNode, "tokenizer");
293        if (tokenizerNode != null)
294        {
295            analyzer.setTokenizer(getAttributes(tokenizerNode));
296        }
297        
298        NodeList filterNodes = _xPathProcessor.selectNodeList(analyzerNode, "filter");
299        analyzer.setFilters(getList(filterNodes));
300        
301        return analyzer;
302    }
303    
304    /**
305     * Extract a DOM node attributes as a Map.
306     * @param node The node.
307     * @return The attributes as a Map.
308     */
309    protected Map<String, Object> getAttributes(Node node)
310    {
311        Map<String, Object> map = new HashMap<>();
312        
313        NamedNodeMap attributes = node.getAttributes();
314        for (int i = 0; i < attributes.getLength(); i++)
315        {
316            Node attribute = attributes.item(i);
317            
318            String name = attribute.getNodeName();
319            String value = attribute.getNodeValue();
320            
321            // Filter out ametys-specific attributes.
322            if (!name.startsWith("ametys"))
323            {
324                map.put(name, value);
325            }
326        }
327        
328        return map;
329    }
330    
331    /**
332     * Extract the list of DOM node attributes.
333     * @param nodeList The node list.
334     * @return A List of the attribute values as Maps.
335     */
336    protected List<Map<String, Object>> getList(NodeList nodeList)
337    {
338        List<Map<String, Object>> list = new ArrayList<>();
339        
340        for (int i = 0; i < nodeList.getLength(); i++)
341        {
342            Node node = nodeList.item(i);
343            list.add(getAttributes(node));
344        }
345        
346        return list;
347    }
348    
349    /**
350     * Filters the list of field declarations and only keep valid ones.
351     * @param fieldList The list of fields to filter
352     * @return The filtered list
353     */
354    protected List<Map<String, Object>> filterListNode(List<Map<String, Object>> fieldList)
355    {
356        return fieldList.stream().filter(fieldMap -> 
357        {
358            String fieldName = (String) fieldMap.get("name");
359            if (fieldName == null)
360            {
361                getLogger().warn("'name' attribute for field node cannot be null. Field will be ignored.");
362                return false;
363            }
364            else if (!isNameValid(fieldName))
365            {
366                // https://lucene.apache.org/solr/guide/6_6/defining-fields.html#DefiningFields-FieldProperties
367                getLogger().warn("Invalid field name: '{}'. Field names should consist of alphanumeric or underscore characters only and not start with a digit (Ametys also supports dash character). Field will be ignored.", fieldName);
368                return false;
369            }
370            return true;
371        }).collect(Collectors.toList());
372    }
373    
374}