001/*
002 *  Copyright 2014 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.contentio.in.xml;
017
018import java.io.IOException;
019import java.io.InputStream;
020import java.util.HashMap;
021import java.util.HashSet;
022import java.util.Map;
023import java.util.Properties;
024import java.util.Set;
025
026import javax.xml.transform.OutputKeys;
027import javax.xml.transform.TransformerException;
028import javax.xml.transform.dom.DOMResult;
029import javax.xml.transform.sax.TransformerHandler;
030
031import org.apache.avalon.framework.configuration.Configuration;
032import org.apache.avalon.framework.configuration.ConfigurationException;
033import org.apache.avalon.framework.service.ServiceException;
034import org.apache.avalon.framework.service.ServiceManager;
035import org.apache.commons.lang3.StringUtils;
036import org.apache.excalibur.source.Source;
037import org.apache.excalibur.source.SourceResolver;
038import org.apache.excalibur.xml.dom.DOMParser;
039import org.apache.excalibur.xml.sax.SAXParser;
040import org.apache.excalibur.xml.xpath.PrefixResolver;
041import org.apache.excalibur.xml.xpath.XPathProcessor;
042import org.apache.excalibur.xml.xslt.XSLTProcessor;
043import org.apache.excalibur.xml.xslt.XSLTProcessorException;
044import org.w3c.dom.Document;
045import org.w3c.dom.NamedNodeMap;
046import org.w3c.dom.Node;
047import org.xml.sax.InputSource;
048import org.xml.sax.SAXException;
049
050import org.ametys.plugins.contentio.AbstractContentImporter;
051import org.ametys.plugins.contentio.ContentImporterHelper;
052
053/**
054 * Abstract {@link XmlContentImporter} class which provides base XML importer configuration and logic.
055 */
056public abstract class AbstractXmlContentImporter extends AbstractContentImporter implements XmlContentImporter
057{
058    /** The service manager. */
059    protected ServiceManager _manager;
060    
061    /** The source resolver. */
062    protected SourceResolver _srcResolver;
063    
064    /** A DOM parser. */
065    protected DOMParser _domParser;
066    
067    /** The XPath processor. */
068    protected XPathProcessor _xPathProcessor;
069    
070    /** The runtime XSLT processor. */
071    protected XSLTProcessor _xsltProcessor;
072    
073    /** The prefix resolver. */
074    protected PrefixResolver _prefixResolver;
075    
076    /** The XSL transformer handler. */
077    protected TransformerHandler _xslTransformerHandler;
078    
079    /** The configured XML transformation stylesheet. */
080    protected String _xsl;
081    
082    @Override
083    public void service(ServiceManager serviceManager) throws ServiceException
084    {
085        super.service(serviceManager);
086        _manager = serviceManager;
087        _srcResolver = (SourceResolver) serviceManager.lookup(SourceResolver.ROLE);
088        _domParser = (DOMParser) serviceManager.lookup(DOMParser.ROLE);
089        _xPathProcessor = (XPathProcessor) serviceManager.lookup(XPathProcessor.ROLE);
090        _xsltProcessor = (XSLTProcessor) serviceManager.lookup(XSLTProcessor.ROLE + "/xalan");
091    }
092    
093    @Override
094    public void configure(Configuration configuration) throws ConfigurationException
095    {
096        super.configure(configuration);
097        
098        configureXml(configuration.getChild("xml"));
099    }
100    
101    /**
102     * Configure XML-specific properties.
103     * @param configuration the XML configuration.
104     * @throws ConfigurationException if an error occurs.
105     */
106    protected void configureXml(Configuration configuration) throws ConfigurationException
107    {
108        _xsl = configuration.getChild("xsl").getAttribute("src", null);
109        
110        configureNamespaces(configuration.getChild("namespaces"));
111    }
112    
113    @Override
114    protected void configureContentCreation(Configuration configuration) throws ConfigurationException
115    {
116        // Override default configuration to be more permissive.
117        String typesStr = configuration.getChild("content-types").getValue("");
118        _contentTypes = StringUtils.split(typesStr, ", ");
119        
120        String mixins = configuration.getChild("mixins").getValue("");
121        _mixins = StringUtils.split(mixins, ", ");
122        
123        _language = configuration.getChild("language").getValue("");
124        
125        configureWorkflow(configuration);
126    }
127    
128    /**
129     * Configure the namespace to use.
130     * @param configuration the namespaces configuration, can be null.
131     * @throws ConfigurationException if an error occurs.
132     */
133    protected void configureNamespaces(Configuration configuration) throws ConfigurationException
134    {
135        Map<String, String> namespaces = new HashMap<>();
136        
137        for (Configuration nsConf : configuration.getChildren("namespace"))
138        {
139            String prefix = nsConf.getAttribute("prefix", "");
140            String namespace = nsConf.getAttribute("uri");
141            
142            namespaces.put(prefix, namespace);
143        }
144        
145        _prefixResolver = new DefaultPrefixResolver(namespaces);
146    }
147    
148    /**
149     * Get the prefix resolver.
150     * @return the prefix resolver.
151     */
152    protected PrefixResolver getPrefixResolver()
153    {
154        return _prefixResolver;
155    }
156    
157    @Override
158    public boolean supports(InputStream is, String name) throws IOException
159    {
160        try
161        {
162            Document doc = _domParser.parseDocument(new InputSource(is));
163            
164            return supports(doc);
165        }
166        catch (SAXException e)
167        {
168            throw new IOException("Error parsing the document.", e);
169        }
170    }
171    
172    @Override
173    public Set<String> importContents(InputStream is, Map<String, Object> params) throws IOException
174    {
175        Set<String> contentIds = new HashSet<>();
176        SAXParser saxParser = null;
177        
178        try
179        {
180            Document document = null;
181            
182            // Either parse the document (no XSL) or transform 
183            if (_xsl == null)
184            {
185                document = _domParser.parseDocument(new InputSource(is));
186            }
187            else
188            {
189                // Initialize the XSL transformer.
190                initializeXslTransformerHandler();
191                
192                // Transform the XML doc with the configured XSL.
193                DOMResult result = new DOMResult();
194                _xslTransformerHandler.setResult(result);
195                saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE);
196                saxParser.parse(new InputSource(is), _xslTransformerHandler);
197                Node node = result.getNode();
198                
199                if (node instanceof Document)
200                {
201                    document = (Document) node;
202                }
203            }
204            
205            if (document != null)
206            {
207                if (getLogger().isDebugEnabled())
208                {
209                    getLogger().debug("Importing contents from document:\n {}", ContentImporterHelper.serializeNode(document, true));
210                }
211                
212                contentIds = importContents(document, params);
213            }
214        }
215        catch (ServiceException e)
216        {
217            getLogger().error("Unable to get a SAX parser.", e);
218            throw new IOException("Unable to get a SAX parser.", e);
219        }
220        catch (SAXException e)
221        {
222            // Should never happen: the XML has already been parsed in supports().
223            getLogger().error("Error parsing the XML document.", e);
224            throw new IOException("Error parsing the XML document.", e);
225        }
226        catch (TransformerException e)
227        {
228            getLogger().error("Error parsing the XML document.", e);
229            throw new IOException("Error parsing the XML document.", e);
230        }
231        catch (XSLTProcessorException e)
232        {
233            getLogger().error("Error parsing the XML document.", e);
234            throw new IOException("Error parsing the XML document.", e);
235        }
236        finally
237        {
238            _manager.release(saxParser);
239        }
240        
241        return contentIds;
242    }
243    
244    /**
245     * Import the contents from the XML DOM {@link Document}.
246     * @param document the XML Document.
247     * @param params the import parameters.
248     * @return a Set of the imported content IDs.
249     * @throws IOException if an error occurs importing the contents.
250     */
251    protected abstract Set<String> importContents(Document document, Map<String, Object> params) throws IOException;
252    
253    /**
254     * Initialize the transformer from the configured XSL.
255     * @throws IOException if an errors occurs reading the XSL.
256     * @throws XSLTProcessorException of an error occurs during the XSL transformer manipulation
257     */
258    protected void initializeXslTransformerHandler() throws IOException, XSLTProcessorException
259    {
260        if (_xslTransformerHandler == null && StringUtils.isNotEmpty(_xsl))
261        {
262            Source xslSource = null;
263            
264            try
265            {
266                xslSource = _srcResolver.resolveURI(_xsl);
267                
268                _xslTransformerHandler = _xsltProcessor.getTransformerHandler(xslSource);
269                
270                Properties format = new Properties();
271                format.put(OutputKeys.METHOD, "xml");
272                format.put(OutputKeys.INDENT, "no");
273                format.put(OutputKeys.ENCODING, "UTF-8");
274                
275                _xslTransformerHandler.getTransformer().setOutputProperties(format);
276            }
277            finally
278            {
279                _srcResolver.release(xslSource);
280            }
281        }
282    }
283    
284    /**
285     * Get a node's text content, without trimming it.
286     * @param node the node, can be null.
287     * @param defaultValue the default value.
288     * @return the node's text content, or the default value if the given node is null.
289     */
290    protected String getTextContent(Node node, String defaultValue)
291    {
292        return getTextContent(node, defaultValue, false);
293    }
294    
295    /**
296     * Get a node's text content, optionally trimmed.
297     * @param node the node, can be null.
298     * @param defaultValue the default value.
299     * @param trim true to trim the text content, false otherwise.
300     * @return the node's text content, or the default value if the given node is null.
301     */
302    protected String getTextContent(Node node, String defaultValue, boolean trim)
303    {
304        String value = defaultValue;
305        if (node != null)
306        {
307            value = trim ? node.getTextContent().trim() : node.getTextContent();
308        }
309        return value;
310    }
311    
312    /**
313     * Get a node's attribute value (trimmed).
314     * @param node the node, can be null.
315     * @param name the attribute name.
316     * @param defaultValue the default value.
317     * @return the node's attribute value, or the default value if the given node is null
318     * or the attribute doesn't exist.
319     */
320    protected String getAttributeValue(Node node, String name, String defaultValue)
321    {
322        return getAttributeValue(node, name, defaultValue, true);
323    }
324    
325    /**
326     * Get a node's attribute value, optionally trimmed.
327     * @param node the node, can be null.
328     * @param name the attribute name.
329     * @param defaultValue the default value.
330     * @param trim true
331     * @return the node's attribute value, or the default value if the given node is null
332     * or the attribute doesn't exist.
333     */
334    protected String getAttributeValue(Node node, String name, String defaultValue, boolean trim)
335    {
336        String value = defaultValue;
337        if (node != null)
338        {
339            NamedNodeMap attributes = node.getAttributes();
340            Node attrNode = attributes.getNamedItem(name);
341            if (attrNode != null)
342            {
343                value = trim ? attrNode.getTextContent().trim() : attrNode.getTextContent();
344            }
345        }
346        return value;
347    }
348    
349    /**
350     * Configurable XML prefix resolver.
351     */
352    protected static class DefaultPrefixResolver implements PrefixResolver
353    {
354        
355        /** Map of namespace URIs, indexed by prefix. */
356        private Map<String, String> _namespaces;
357        
358        /**
359         * Constructor.
360         * @param namespaces the namespaces to resolve, indexed by prefix.
361         */
362        public DefaultPrefixResolver(Map<String, String> namespaces)
363        {
364            _namespaces = new HashMap<>(namespaces);
365        }
366        
367        @Override
368        public String prefixToNamespace(String prefix)
369        {
370            return _namespaces.get(prefix);
371        }
372    }
373    
374}