001/*
002 *  Copyright 2016 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.search.solr.schema;
017
018import java.io.IOException;
019import java.io.InputStream;
020import java.util.ArrayList;
021import java.util.HashMap;
022import java.util.List;
023import java.util.Map;
024
025import org.apache.avalon.framework.component.Component;
026import org.apache.avalon.framework.service.ServiceException;
027import org.apache.avalon.framework.service.ServiceManager;
028import org.apache.avalon.framework.service.Serviceable;
029import org.apache.excalibur.source.Source;
030import org.apache.excalibur.source.SourceResolver;
031import org.apache.excalibur.xml.dom.DOMParser;
032import org.apache.excalibur.xml.xpath.XPathProcessor;
033import org.apache.solr.client.solrj.request.schema.AnalyzerDefinition;
034import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition;
035import org.apache.solr.client.solrj.response.schema.SchemaRepresentation;
036import org.w3c.dom.Document;
037import org.w3c.dom.Element;
038import org.w3c.dom.NamedNodeMap;
039import org.w3c.dom.Node;
040import org.w3c.dom.NodeList;
041import org.xml.sax.InputSource;
042import org.xml.sax.SAXException;
043
044import org.ametys.cms.contenttype.MetadataType;
045import org.ametys.runtime.plugin.component.AbstractLogEnabled;
046
047/**
048 * Component providing helper methods to work with search schema and fields.
049 */
050public class SchemaHelper extends AbstractLogEnabled implements Component, Serviceable
051{
052    
053    /** The component role. */
054    public static final String ROLE = SchemaHelper.class.getName();
055    
056    /** The source resolver. */
057    protected SourceResolver _sourceResolver;
058    
059    /** A DOM parser. */
060    protected DOMParser _domParser;
061    
062    /** A XPath processor. */
063    protected XPathProcessor _xPathProcessor;
064    
065    @Override
066    public void service(ServiceManager manager) throws ServiceException
067    {
068        _sourceResolver = (SourceResolver) manager.lookup(SourceResolver.ROLE);
069        _domParser = (DOMParser) manager.lookup(DOMParser.ROLE);
070        _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE);
071    }
072    
073    /**
074     * Get the solr schema type from the metadata type.
075     * @param metaType The metadata type.
076     * @return The solr schema type.
077     */
078    public static String getSchemaType(MetadataType metaType)
079    {
080        String type = null;
081        
082        switch (metaType)
083        {
084            case STRING:
085            case USER:
086            case CONTENT:
087            case SUB_CONTENT:
088                type = "string";
089                break;
090            case LONG:
091                type = "long";
092                break;
093            case DOUBLE:
094                type = "double";
095                break;
096            case BOOLEAN:
097                type = "boolean";
098                break;
099            case DATE:
100            case DATETIME:
101                type = "date";
102                break;
103            case GEOCODE:
104                type = "location_rpt";
105                break;
106            case RICH_TEXT:
107                // TODO?
108                break;
109            case BINARY:
110            case FILE:
111            case COMPOSITE:
112            case REFERENCE:
113            default:
114                break;
115        }
116        
117        return type;
118    }
119    
120    /**
121     * Get the schema at the corresponding location (source URI).
122     * @param location The location, as a source URI.
123     * @return The schema representation.
124     */
125    public SchemaRepresentation getSchema(String location)
126    {
127        Source source = null;
128        
129        try
130        {
131            source = _sourceResolver.resolveURI(location);
132            
133            if (source.exists())
134            {
135                try (InputStream is = source.getInputStream())
136                {
137                    return readSchema(is);
138                }
139            }
140        }
141        catch (IOException | SAXException e)
142        {
143            getLogger().error("Error reading the schema from location '" + location + "'", e);
144        }
145        finally
146        {
147            if (source != null)
148            {
149                _sourceResolver.release(source);
150            }
151        }
152        
153        return null;
154    }
155    
156    /**
157     * Read the static schema.
158     * @param is An input stream on the schema XML.
159     * @return The representation of the schema.
160     * @throws IOException If an error occurs reading the stream.
161     * @throws SAXException If an error occurs parsing the XML.
162     */
163    public SchemaRepresentation readSchema(InputStream is) throws IOException, SAXException
164    {
165        SchemaRepresentation schema = new SchemaRepresentation();
166        
167        InputSource source = new InputSource(is);
168        Document document = _domParser.parseDocument(source);
169        
170        Element root = document.getDocumentElement();
171        
172        String name = _xPathProcessor.evaluateAsString(root, "/schema/@name");
173        float version = _xPathProcessor.evaluateAsNumber(root, "/schema/@version").floatValue();
174        String uniqueKey = _xPathProcessor.evaluateAsString(root, "/schema/uniqueKey");
175        String defaultOp = _xPathProcessor.evaluateAsString(root, "/schema/solrQueryParser/@defaultOperator");
176        String defaultSearchField = _xPathProcessor.evaluateAsString(root, "/schema/defaultSearchField");
177        
178        schema.setName(name);
179        schema.setVersion(version);
180        schema.setUniqueKey(uniqueKey);
181        schema.setDefaultOperator(defaultOp);
182        schema.setDefaultSearchField(defaultSearchField);
183        
184        NodeList fieldTypeNodes = _xPathProcessor.selectNodeList(root, "/schema/fieldType | /schema/types/fieldType");
185        schema.setFieldTypes(getFieldTypes(fieldTypeNodes));
186        
187        NodeList fieldNodes = _xPathProcessor.selectNodeList(root, "/schema/field | /schema/fields/field");
188        schema.setFields(getList(fieldNodes));
189        
190        NodeList dynFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/dynamicField | /schema/fields/dynamicField");
191        schema.setDynamicFields(getList(dynFieldNodes));
192        
193        NodeList copyFieldNodes = _xPathProcessor.selectNodeList(root, "/schema/copyField");
194        schema.setCopyFields(getList(copyFieldNodes));
195        
196        Node similarityNode = _xPathProcessor.selectSingleNode(root, "/schema/similarity");
197        if (similarityNode != null)
198        {
199            schema.setSimilarity(getAttributes(similarityNode));
200        }
201        
202        return schema;
203    }
204    
205    /**
206     * Get the field type definitions from the corresponding DOM nodes.
207     * @param fieldTypeNodes The field type nodes.
208     * @return The list of field type definitions.
209     */
210    protected List<FieldTypeDefinition> getFieldTypes(NodeList fieldTypeNodes)
211    {
212        List<FieldTypeDefinition> definitions = new ArrayList<>();
213        
214        for (int i = 0; i < fieldTypeNodes.getLength(); i++)
215        {
216            Node fieldTypeNode = fieldTypeNodes.item(i);
217            definitions.add(getFieldType(fieldTypeNode));
218        }
219        
220        return definitions;
221    }
222    
223    /**
224     * Get a field type definition from the corresponding DOM node.
225     * @param fieldTypeNode The field type DOM node.
226     * @return The field type definition.
227     */
228    protected FieldTypeDefinition getFieldType(Node fieldTypeNode)
229    {
230        FieldTypeDefinition fieldType = new FieldTypeDefinition();
231        
232        fieldType.setAttributes(getAttributes(fieldTypeNode));
233        
234        NodeList analyzerNodes = _xPathProcessor.selectNodeList(fieldTypeNode, "analyzer");
235        
236        for (int i = 0; i < analyzerNodes.getLength(); i++)
237        {
238            Node analyzerNode = analyzerNodes.item(i);
239            Node typeNode = analyzerNode.getAttributes().getNamedItem("type");
240            String type = typeNode != null ? typeNode.getNodeValue() : "";
241            
242            AnalyzerDefinition analyzer = getAnalyzer(analyzerNode);
243            
244            switch (type)
245            {
246                case "index":
247                    fieldType.setIndexAnalyzer(analyzer);
248                    break;
249                case "query":
250                    fieldType.setQueryAnalyzer(analyzer);
251                    break;
252                case "multiterm":
253                    fieldType.setMultiTermAnalyzer(analyzer);
254                    break;
255                default:
256                    fieldType.setAnalyzer(analyzer);
257                    break;
258            }
259        }
260        
261        Node similarityNode = _xPathProcessor.selectSingleNode(fieldTypeNode, "similarity");
262        if (similarityNode != null)
263        {
264            fieldType.setSimilarity(getAttributes(similarityNode));
265        }
266        
267        return fieldType;
268    }
269    
270    /**
271     * Get an analyzer definition from the corresponding DOM node.
272     * @param analyzerNode The analyzer node.
273     * @return The analyzer definition.
274     */
275    protected AnalyzerDefinition getAnalyzer(Node analyzerNode)
276    {
277        AnalyzerDefinition analyzer = new AnalyzerDefinition();
278        
279        analyzer.setAttributes(getAttributes(analyzerNode));
280        
281        NodeList charFilterNodes = _xPathProcessor.selectNodeList(analyzerNode, "charFilter");
282        analyzer.setCharFilters(getList(charFilterNodes));
283        
284        Node tokenizerNode = _xPathProcessor.selectSingleNode(analyzerNode, "tokenizer");
285        if (tokenizerNode != null)
286        {
287            analyzer.setTokenizer(getAttributes(tokenizerNode));
288        }
289        
290        NodeList filterNodes = _xPathProcessor.selectNodeList(analyzerNode, "filter");
291        analyzer.setFilters(getList(filterNodes));
292        
293        return analyzer;
294    }
295    
296    /**
297     * Extract a DOM node attributes as a Map.
298     * @param node The node.
299     * @return The attributes as a Map.
300     */
301    protected Map<String, Object> getAttributes(Node node)
302    {
303        Map<String, Object> map = new HashMap<>();
304        
305        NamedNodeMap attributes = node.getAttributes();
306        for (int i = 0; i < attributes.getLength(); i++)
307        {
308            Node attribute = attributes.item(i);
309            
310            String name = attribute.getNodeName();
311            String value = attribute.getNodeValue();
312            
313            // Filter out ametys-specific attributes.
314            if (!name.startsWith("ametys"))
315            {
316                map.put(name, value);
317            }
318        }
319        
320        return map;
321    }
322    
323    /**
324     * Extract the list of DOM node attributes.
325     * @param nodeList The node list.
326     * @return A List of the attribute values as Maps.
327     */
328    protected List<Map<String, Object>> getList(NodeList nodeList)
329    {
330        List<Map<String, Object>> list = new ArrayList<>();
331        
332        for (int i = 0; i < nodeList.getLength(); i++)
333        {
334            Node node = nodeList.item(i);
335            list.add(getAttributes(node));
336        }
337        
338        return list;
339    }
340    
341}