001/*
002 *  Copyright 2014 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.contentio.in.xml;
017
018import java.io.IOException;
019import java.io.InputStream;
020import java.util.HashMap;
021import java.util.HashSet;
022import java.util.Map;
023import java.util.Properties;
024import java.util.Set;
025
026import javax.xml.transform.OutputKeys;
027import javax.xml.transform.TransformerException;
028import javax.xml.transform.dom.DOMResult;
029import javax.xml.transform.sax.TransformerHandler;
030
031import org.apache.avalon.framework.configuration.Configuration;
032import org.apache.avalon.framework.configuration.ConfigurationException;
033import org.apache.avalon.framework.service.ServiceException;
034import org.apache.avalon.framework.service.ServiceManager;
035import org.apache.commons.lang3.StringUtils;
036import org.apache.excalibur.source.Source;
037import org.apache.excalibur.source.SourceResolver;
038import org.apache.excalibur.xml.dom.DOMParser;
039import org.apache.excalibur.xml.sax.SAXParser;
040import org.apache.excalibur.xml.xpath.PrefixResolver;
041import org.apache.excalibur.xml.xpath.XPathProcessor;
042import org.apache.excalibur.xml.xslt.XSLTProcessor;
043import org.apache.excalibur.xml.xslt.XSLTProcessorException;
044import org.w3c.dom.Document;
045import org.w3c.dom.Element;
046import org.w3c.dom.Node;
047import org.xml.sax.InputSource;
048import org.xml.sax.SAXException;
049
050import org.ametys.plugins.contentio.ContentImporterHelper;
051import org.ametys.plugins.contentio.in.AbstractContentImporter;
052
053/**
054 * Abstract {@link XmlContentImporter} class which provides base XML importer configuration and logic.
055 */
056public abstract class AbstractXmlContentImporter extends AbstractContentImporter implements XmlContentImporter
057{
058    /** The service manager. */
059    protected ServiceManager _manager;
060    
061    /** The source resolver. */
062    protected SourceResolver _srcResolver;
063    
064    /** A DOM parser. */
065    protected DOMParser _domParser;
066    
067    /** The XPath processor. */
068    protected XPathProcessor _xPathProcessor;
069    
070    /** The runtime XSLT processor. */
071    protected XSLTProcessor _xsltProcessor;
072    
073    /** The prefix resolver. */
074    protected PrefixResolver _prefixResolver;
075    
076    /** The XSL transformer handler. */
077    protected TransformerHandler _xslTransformerHandler;
078    
079    /** The configured XML transformation stylesheet. */
080    protected String _xsl;
081    
082    @Override
083    public void service(ServiceManager serviceManager) throws ServiceException
084    {
085        super.service(serviceManager);
086        _manager = serviceManager;
087        _srcResolver = (SourceResolver) serviceManager.lookup(SourceResolver.ROLE);
088        _domParser = (DOMParser) serviceManager.lookup(DOMParser.ROLE);
089        _xPathProcessor = (XPathProcessor) serviceManager.lookup(XPathProcessor.ROLE);
090        _xsltProcessor = (XSLTProcessor) serviceManager.lookup(XSLTProcessor.ROLE + "/xalan");
091    }
092    
093    @Override
094    public void configure(Configuration configuration) throws ConfigurationException
095    {
096        super.configure(configuration);
097        configureXml(configuration.getChild("xml"));
098    }
099    
100    /**
101     * Configure XML-specific properties.
102     * @param configuration the XML configuration.
103     * @throws ConfigurationException if an error occurs.
104     */
105    protected void configureXml(Configuration configuration) throws ConfigurationException
106    {
107        _xsl = configuration.getChild("xsl").getAttribute("src", null);
108        configureNamespaces(configuration.getChild("namespaces"));
109    }
110    
111    @Override
112    protected void configureContentCreation(Configuration configuration) throws ConfigurationException
113    {
114        // Override default configuration to be more permissive.
115        String typesStr = configuration.getChild("content-types").getValue("");
116        _contentTypes = StringUtils.split(typesStr, ", ");
117        
118        String mixins = configuration.getChild("mixins").getValue("");
119        _mixins = StringUtils.split(mixins, ", ");
120        
121        _language = configuration.getChild("language").getValue("");
122        
123        configureWorkflow(configuration);
124    }
125    
126    /**
127     * Configure the namespace to use.
128     * @param configuration the namespaces configuration, can be null.
129     * @throws ConfigurationException if an error occurs.
130     */
131    protected void configureNamespaces(Configuration configuration) throws ConfigurationException
132    {
133        Map<String, String> namespaces = new HashMap<>();
134        
135        for (Configuration nsConf : configuration.getChildren("namespace"))
136        {
137            String prefix = nsConf.getAttribute("prefix", "");
138            String namespace = nsConf.getAttribute("uri");
139            
140            namespaces.put(prefix, namespace);
141        }
142        
143        _prefixResolver = new DefaultPrefixResolver(namespaces);
144    }
145    
146    /**
147     * Get the prefix resolver.
148     * @return the prefix resolver.
149     */
150    protected PrefixResolver getPrefixResolver()
151    {
152        return _prefixResolver;
153    }
154    
155    @Override
156    public boolean supports(InputStream is, String name) throws IOException
157    {
158        try
159        {
160            Document doc = _domParser.parseDocument(new InputSource(is));
161            
162            return supports(doc);
163        }
164        catch (SAXException e)
165        {
166            throw new IOException("Error parsing the document.", e);
167        }
168    }
169    
170    @Override
171    public Set<String> importContents(InputStream is, Map<String, Object> params) throws IOException
172    {
173        Set<String> contentIds = new HashSet<>();
174        SAXParser saxParser = null;
175        
176        try
177        {
178            Document document = null;
179            
180            // Either parse the document (no XSL) or transform 
181            if (_xsl == null)
182            {
183                document = _domParser.parseDocument(new InputSource(is));
184            }
185            else
186            {
187                // Initialize the XSL transformer.
188                initializeXslTransformerHandler();
189                
190                // Transform the XML doc with the configured XSL.
191                DOMResult result = new DOMResult();
192                _xslTransformerHandler.setResult(result);
193                saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE);
194                saxParser.parse(new InputSource(is), _xslTransformerHandler);
195                Node node = result.getNode();
196                
197                if (node instanceof Document)
198                {
199                    document = (Document) node;
200                }
201            }
202            
203            if (document != null)
204            {
205                if (getLogger().isDebugEnabled())
206                {
207                    getLogger().debug("Importing contents from document:\n {}", ContentImporterHelper.serializeNode(document, true));
208                }
209                
210                contentIds = importContents(document, params);
211            }
212        }
213        catch (ServiceException e)
214        {
215            getLogger().error("Unable to get a SAX parser.", e);
216            throw new IOException("Unable to get a SAX parser.", e);
217        }
218        catch (SAXException | TransformerException | XSLTProcessorException e)
219        {
220            getLogger().error("Error parsing the XML document.", e);
221            throw new IOException("Error parsing the XML document.", e);
222        }
223        finally
224        {
225            _manager.release(saxParser);
226        }
227        
228        return contentIds;
229    }
230    
231    /**
232     * Import the contents from the XML DOM {@link Document}.
233     * @param document the XML Document.
234     * @param params the import parameters.
235     * @return a Set of the imported content IDs.
236     * @throws IOException if an error occurs importing the contents.
237     */
238    protected abstract Set<String> importContents(Document document, Map<String, Object> params) throws IOException;
239    
240    /**
241     * Initialize the transformer from the configured XSL.
242     * @throws IOException if an errors occurs reading the XSL.
243     * @throws XSLTProcessorException of an error occurs during the XSL transformer manipulation
244     */
245    protected void initializeXslTransformerHandler() throws IOException, XSLTProcessorException
246    {
247        if (_xslTransformerHandler == null && StringUtils.isNotEmpty(_xsl))
248        {
249            Source xslSource = null;
250            
251            try
252            {
253                xslSource = _srcResolver.resolveURI(_xsl);
254                
255                _xslTransformerHandler = _xsltProcessor.getTransformerHandler(xslSource);
256                
257                Properties format = new Properties();
258                format.put(OutputKeys.METHOD, "xml");
259                format.put(OutputKeys.INDENT, "no");
260                format.put(OutputKeys.ENCODING, "UTF-8");
261                
262                _xslTransformerHandler.getTransformer().setOutputProperties(format);
263            }
264            finally
265            {
266                _srcResolver.release(xslSource);
267            }
268        }
269    }
270    
271    /**
272     * Get a node's text content, without trimming it.
273     * @param node the node, can be null.
274     * @param defaultValue the default value.
275     * @return the node's text content, or the default value if the given node is null.
276     */
277    protected String getTextContent(Node node, String defaultValue)
278    {
279        return getTextContent(node, defaultValue, false);
280    }
281    
282    /**
283     * Get a node's text content, optionally trimmed.
284     * @param node the node, can be null.
285     * @param defaultValue the default value.
286     * @param trim true to trim the text content, false otherwise.
287     * @return the node's text content, or the default value if the given node is null.
288     */
289    protected String getTextContent(Node node, String defaultValue, boolean trim)
290    {
291        String value = defaultValue;
292        if (node != null)
293        {
294            value = trim ? node.getTextContent().trim() : node.getTextContent();
295        }
296        
297        return value;
298    }
299    
300    /**
301     * Get an element attribute value (trimmed).
302     * @param element the {@link Element}, can be null.
303     * @param name the attribute name.
304     * @param defaultValue the default value.
305     * @return the node's attribute value, or the default value if the given node is null
306     * or the attribute doesn't exist.
307     */
308    protected String getAttributeValue(Element element, String name, String defaultValue)
309    {
310        return getAttributeValue(element, name, defaultValue, true);
311    }
312    
313    /**
314     * Get a node's attribute value, optionally trimmed.
315     * @param element the {@link Element}, can be null.
316     * @param name the attribute name.
317     * @param defaultValue the default value.
318     * @param trim true
319     * @return the node's attribute value, or the default value if the given node is null or the attribute doesn't exist.
320     */
321    protected String getAttributeValue(Element element, String name, String defaultValue, boolean trim)
322    {
323        String value = defaultValue;
324        if (element != null)
325        {
326            value = element.getAttribute(name);
327        }
328        
329        return value != null ? trim ? value.trim() : value : null;
330    }
331    
332    /**
333     * Configurable XML prefix resolver.
334     */
335    protected static class DefaultPrefixResolver implements PrefixResolver
336    {
337        /** Map of namespace URIs, indexed by prefix. */
338        private Map<String, String> _namespaces;
339        
340        /**
341         * Constructor.
342         * @param namespaces the namespaces to resolve, indexed by prefix.
343         */
344        public DefaultPrefixResolver(Map<String, String> namespaces)
345        {
346            _namespaces = new HashMap<>(namespaces);
347        }
348        
349        @Override
350        public String prefixToNamespace(String prefix)
351        {
352            return _namespaces.get(prefix);
353        }
354    }
355}