Source code

001/*
002 *  Copyright 2016 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.search.solr.schema;
017
018import java.io.IOException;
019import java.io.InputStream;
020import java.util.ArrayList;
021import java.util.HashMap;
022import java.util.List;
023import java.util.Map;
024import java.util.stream.Collectors;
025
026import org.apache.avalon.framework.component.Component;
027import org.apache.avalon.framework.service.ServiceException;
028import org.apache.avalon.framework.service.ServiceManager;
029import org.apache.avalon.framework.service.Serviceable;
030import org.apache.excalibur.source.Source;
031import org.apache.excalibur.source.SourceResolver;
032import org.apache.excalibur.xml.dom.DOMParser;
033import org.apache.excalibur.xml.xpath.XPathProcessor;
034import org.apache.solr.client.solrj.request.schema.AnalyzerDefinition;
035import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition;
036import org.apache.solr.client.solrj.response.schema.SchemaRepresentation;
037import org.w3c.dom.Document;
038import org.w3c.dom.Element;
039import org.w3c.dom.NamedNodeMap;
040import org.w3c.dom.Node;
041import org.w3c.dom.NodeList;
042import org.xml.sax.InputSource;
043import org.xml.sax.SAXException;
044
045import org.ametys.cms.contenttype.MetadataType;
046import org.ametys.runtime.plugin.component.AbstractLogEnabled;
047
048/**
049 * Component providing helper methods to work with search schema and fields.
050 */
051public class SchemaHelper extends AbstractLogEnabled implements Component, Serviceable
052{
053    
054    /** The component role. */
055    public static final String ROLE = SchemaHelper.class.getName();
056    
057    /** The source resolver. */
058    protected SourceResolver _sourceResolver;
059    
060    /** A DOM parser. */
061    protected DOMParser _domParser;
062    
063    /** A XPath processor. */
064    protected XPathProcessor _xPathProcessor;
065    
066    @Override
067    public void service(ServiceManager manager) throws ServiceException
068    {
069        _sourceResolver = (SourceResolver) manager.lookup(SourceResolver.ROLE);
070        _domParser = (DOMParser) manager.lookup(DOMParser.ROLE);
071        _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE);
072    }
073    
074    /**
075     * Get the solr schema type from the metadata type.
076     * @param metaType The metadata type.
077     * @return The solr schema type.
078     */
079    public static String getSchemaType(MetadataType metaType)
080    {
081        String type = null;
082        
083        switch (metaType)
084        {
085            case STRING:
086            case MULTILINGUAL_STRING:
087            case USER:
088            case CONTENT:
089            case SUB_CONTENT:
090                type = "string";
091                break;
092            case LONG:
093                type = "long";
094                break;
095            case DOUBLE:
096                type = "double";
097                break;
098            case BOOLEAN:
099                type = "boolean";
100                break;
101            case DATE:
102            case DATETIME:
103                type = "date";
104                break;
105            case GEOCODE:
106                type = "location_rpt";
107                break;
108            case RICH_TEXT:
109                // TODO?
110                break;
111            case BINARY:
112            case FILE:
113            case COMPOSITE:
114            case REFERENCE:
115            default:
116                break;
117        }
118        
119        return type;
120    }
121    
122    /**
123     * Chekcs that the passed Solr field name is valid
124     * @param fieldName the Solr field name
125     * @return true if name is valid, false otherwise
126     */
127    public static boolean isNameValid(String fieldName)
128    {
129        return fieldName.matches("^[a-zA-Z_][a-zA-Z0-9_\\/-]*$");
130    }
131    
132    /**
133     * Get the schema at the corresponding location (source URI).
134     * @param location The location, as a source URI.
135     * @return The schema representation.
136     */
137    public SchemaRepresentation getSchema(String location)
138    {
139        Source source = null;
140        
141        try
142        {
143            source = _sourceResolver.resolveURI(location);
144            
145            if (source.exists())
146            {
147                try (InputStream is = source.getInputStream())
148                {
149                    return readSchema(is);
150                }
151            }
152        }
153        catch (IOException | SAXException e)
154        {
155            getLogger().error("Error reading the schema from location '" + location + "'", e);
156        }
157        finally
158        {
159            if (source != null)
160            {
161                _sourceResolver.release(source);
162            }
163        }
164        
165        return null;
166    }
167    
168    /**
169     * Read the static schema.
170     * @param is An input stream on the schema XML.
171     * @return The representation of the schema.
172     * @throws IOException If an error occurs reading the stream.
173     * @throws SAXException If an error occurs parsing the XML.
174     */
175    public SchemaRepresentation readSchema(InputStream is) throws IOException, SAXException
176    {
177        SchemaRepresentation schema = new SchemaRepresentation();
178        
179        InputSource source = new InputSource(is);
180        Document document = _domParser.parseDocument(source);
181        
182        Element root = document.getDocumentElement();
183        
184        String name = _xPathProcessor.evaluateAsString(root, "/schema/@name");
185        float version = _xPathProcessor.evaluateAsNumber(root, "/schema/@version").floatValue();
186        String uniqueKey = _xPathProcessor.evaluateAsString(root, "/schema/uniqueKey");
187        String defaultOp = _xPathProcessor.evaluateAsString(root, "/schema/solrQueryParser/@defaultOperator");
188        String defaultSearchField = _xPathProcessor.evaluateAsString(root, "/schema/defaultSearchField");
189        
190        schema.setName(name);
191        schema.setVersion(version);
192        schema.setUniqueKey(uniqueKey);
193        schema.setDefaultOperator(defaultOp);
194        schema.setDefaultSearchField(defaultSearchField);
195        
196        NodeList fieldTypeNodes = _xPathProcessor.selectNodeList(root, "/schema/fieldType | /schema/types/fieldType");
197        schema.setFieldTypes(getFieldTypes(fieldTypeNodes));
198        
199        NodeList fieldNodes = _xPathProcessor.selectNodeList(root, "/schema/field | /schema/fields/field");
200        schema.setFields(filterListNode(getList(fieldNodes)));
201        
202        NodeList dynFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/dynamicField | /schema/fields/dynamicField");
203        schema.setDynamicFields(getList(dynFieldNodes));
204        
205        NodeList copyFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/copyField");
206        schema.setCopyFields(getList(copyFieldNodes));
207        
208        Node similarityNode = _xPathProcessor.selectSingleNode(root, "/schema/similarity");
209        if (similarityNode != null)
210        {
211            schema.setSimilarity(getAttributes(similarityNode));
212        }
213        
214        return schema;
215    }
216    
217    /**
218     * Get the field type definitions from the corresponding DOM nodes.
219     * @param fieldTypeNodes The field type nodes.
220     * @return The list of field type definitions.
221     */
222    protected List<FieldTypeDefinition> getFieldTypes(NodeList fieldTypeNodes)
223    {
224        List<FieldTypeDefinition> definitions = new ArrayList<>();
225        
226        for (int i = 0; i < fieldTypeNodes.getLength(); i++)
227        {
228            Node fieldTypeNode = fieldTypeNodes.item(i);
229            definitions.add(getFieldType(fieldTypeNode));
230        }
231        
232        return definitions;
233    }
234    
235    /**
236     * Get a field type definition from the corresponding DOM node.
237     * @param fieldTypeNode The field type DOM node.
238     * @return The field type definition.
239     */
240    protected FieldTypeDefinition getFieldType(Node fieldTypeNode)
241    {
242        FieldTypeDefinition fieldType = new FieldTypeDefinition();
243        
244        fieldType.setAttributes(getAttributes(fieldTypeNode));
245        
246        NodeList analyzerNodes = _xPathProcessor.selectNodeList(fieldTypeNode, "analyzer");
247        
248        for (int i = 0; i < analyzerNodes.getLength(); i++)
249        {
250            Node analyzerNode = analyzerNodes.item(i);
251            Node typeNode = analyzerNode.getAttributes().getNamedItem("type");
252            String type = typeNode != null ? typeNode.getNodeValue() : "";
253            
254            AnalyzerDefinition analyzer = getAnalyzer(analyzerNode);
255            
256            switch (type)
257            {
258                case "index":
259                    fieldType.setIndexAnalyzer(analyzer);
260                    break;
261                case "query":
262                    fieldType.setQueryAnalyzer(analyzer);
263                    break;
264                case "multiterm":
265                    fieldType.setMultiTermAnalyzer(analyzer);
266                    break;
267                default:
268                    fieldType.setAnalyzer(analyzer);
269                    break;
270            }
271        }
272        
273        Node similarityNode = _xPathProcessor.selectSingleNode(fieldTypeNode, "similarity");
274        if (similarityNode != null)
275        {
276            fieldType.setSimilarity(getAttributes(similarityNode));
277        }
278        
279        return fieldType;
280    }
281    
282    /**
283     * Get an analyzer definition from the corresponding DOM node.
284     * @param analyzerNode The analyzer node.
285     * @return The analyzer definition.
286     */
287    protected AnalyzerDefinition getAnalyzer(Node analyzerNode)
288    {
289        AnalyzerDefinition analyzer = new AnalyzerDefinition();
290        
291        analyzer.setAttributes(getAttributes(analyzerNode));
292        
293        NodeList charFilterNodes = _xPathProcessor.selectNodeList(analyzerNode, "charFilter");
294        analyzer.setCharFilters(getList(charFilterNodes));
295        
296        Node tokenizerNode = _xPathProcessor.selectSingleNode(analyzerNode, "tokenizer");
297        if (tokenizerNode != null)
298        {
299            analyzer.setTokenizer(getAttributes(tokenizerNode));
300        }
301        
302        NodeList filterNodes = _xPathProcessor.selectNodeList(analyzerNode, "filter");
303        analyzer.setFilters(getList(filterNodes));
304        
305        return analyzer;
306    }
307    
308    /**
309     * Extract a DOM node attributes as a Map.
310     * @param node The node.
311     * @return The attributes as a Map.
312     */
313    protected Map<String, Object> getAttributes(Node node)
314    {
315        Map<String, Object> map = new HashMap<>();
316        
317        NamedNodeMap attributes = node.getAttributes();
318        for (int i = 0; i < attributes.getLength(); i++)
319        {
320            Node attribute = attributes.item(i);
321            
322            String name = attribute.getNodeName();
323            String value = attribute.getNodeValue();
324            
325            // Filter out ametys-specific attributes.
326            if (!name.startsWith("ametys"))
327            {
328                map.put(name, value);
329            }
330        }
331        
332        return map;
333    }
334    
335    /**
336     * Extract the list of DOM node attributes.
337     * @param nodeList The node list.
338     * @return A List of the attribute values as Maps.
339     */
340    protected List<Map<String, Object>> getList(NodeList nodeList)
341    {
342        List<Map<String, Object>> list = new ArrayList<>();
343        
344        for (int i = 0; i < nodeList.getLength(); i++)
345        {
346            Node node = nodeList.item(i);
347            list.add(getAttributes(node));
348        }
349        
350        return list;
351    }
352    
353    /**
354     * Filters the list of field declarations and only keep valid ones.
355     * @param fieldList The list of fields to filter
356     * @return The filtered list
357     */
358    protected List<Map<String, Object>> filterListNode(List<Map<String, Object>> fieldList)
359    {
360        return fieldList.stream().filter(fieldMap -> 
361        {
362            String fieldName = (String) fieldMap.get("name");
363            if (fieldName == null)
364            {
365                getLogger().warn("'name' attribute for field node cannot be null. Field will be ignored.");
366                return false;
367            }
368            else if (!isNameValid(fieldName))
369            {
370                // https://lucene.apache.org/solr/guide/6_6/defining-fields.html#DefiningFields-FieldProperties
371                getLogger().warn("Invalid field name: '{}'. Field names should consist of alphanumeric or underscore characters only and not start with a digit (Ametys also supports dash character). Field will be ignored.", fieldName);
372                return false;
373            }
374            return true;
375        }).collect(Collectors.toList());
376    }
377    
378}