001/*
002 *  Copyright 2014 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.contentio.in.xml;
017
018import java.io.IOException;
019import java.io.InputStream;
020import java.util.HashMap;
021import java.util.HashSet;
022import java.util.Map;
023import java.util.Properties;
024import java.util.Set;
025
026import javax.xml.transform.OutputKeys;
027import javax.xml.transform.TransformerException;
028import javax.xml.transform.dom.DOMResult;
029import javax.xml.transform.sax.TransformerHandler;
030
031import org.apache.avalon.framework.configuration.Configuration;
032import org.apache.avalon.framework.configuration.ConfigurationException;
033import org.apache.avalon.framework.service.ServiceException;
034import org.apache.avalon.framework.service.ServiceManager;
035import org.apache.commons.lang3.StringUtils;
036import org.apache.excalibur.source.Source;
037import org.apache.excalibur.source.SourceResolver;
038import org.apache.excalibur.xml.dom.DOMParser;
039import org.apache.excalibur.xml.sax.SAXParser;
040import org.apache.excalibur.xml.xpath.PrefixResolver;
041import org.apache.excalibur.xml.xpath.XPathProcessor;
042import org.apache.excalibur.xml.xslt.XSLTProcessor;
043import org.apache.excalibur.xml.xslt.XSLTProcessorException;
044import org.w3c.dom.Document;
045import org.w3c.dom.NamedNodeMap;
046import org.w3c.dom.Node;
047import org.xml.sax.InputSource;
048import org.xml.sax.SAXException;
049
050import org.ametys.plugins.contentio.AbstractContentImporter;
051import org.ametys.plugins.contentio.ContentImporterHelper;
052
053/**
054 * Abstract {@link XmlContentImporter} class which provides base XML importer configuration and logic.
055 */
056public abstract class AbstractXmlContentImporter extends AbstractContentImporter implements XmlContentImporter
057{
058    
059    /** The source resolver. */
060    protected SourceResolver _srcResolver;
061    
062    /** A DOM parser. */
063    protected DOMParser _domParser;
064    
065    /** A SAX parser. */
066    protected SAXParser _saxParser;
067    
068    /** The XPath processor. */
069    protected XPathProcessor _xPathProcessor;
070    
071    /** The runtime XSLT processor. */
072    protected XSLTProcessor _xsltProcessor;
073    
074    /** The prefix resolver. */
075    protected PrefixResolver _prefixResolver;
076    
077    /** The XSL transformer handler. */
078    protected TransformerHandler _xslTransformerHandler;
079    
080    /** The configured XML transformation stylesheet. */
081    protected String _xsl;
082    
083    @Override
084    public void service(ServiceManager serviceManager) throws ServiceException
085    {
086        super.service(serviceManager);
087        _srcResolver = (SourceResolver) serviceManager.lookup(SourceResolver.ROLE);
088        _domParser = (DOMParser) serviceManager.lookup(DOMParser.ROLE);
089        _xPathProcessor = (XPathProcessor) serviceManager.lookup(XPathProcessor.ROLE);
090        
091        _saxParser = (SAXParser) serviceManager.lookup(SAXParser.ROLE);
092        _xsltProcessor = (XSLTProcessor) serviceManager.lookup(XSLTProcessor.ROLE + "/xalan");
093    }
094    
095    @Override
096    public void configure(Configuration configuration) throws ConfigurationException
097    {
098        super.configure(configuration);
099        
100        configureXml(configuration.getChild("xml"));
101    }
102    
103    /**
104     * Configure XML-specific properties.
105     * @param configuration the XML configuration.
106     * @throws ConfigurationException if an error occurs.
107     */
108    protected void configureXml(Configuration configuration) throws ConfigurationException
109    {
110        _xsl = configuration.getChild("xsl").getAttribute("src", null);
111        
112        configureNamespaces(configuration.getChild("namespaces"));
113    }
114    
115    @Override
116    protected void configureContentCreation(Configuration configuration) throws ConfigurationException
117    {
118        // Override default configuration to be more permissive.
119        String typesStr = configuration.getChild("content-types").getValue("");
120        _contentTypes = StringUtils.split(typesStr, ", ");
121        
122        String mixins = configuration.getChild("mixins").getValue("");
123        _mixins = StringUtils.split(mixins, ", ");
124        
125        _language = configuration.getChild("language").getValue("");
126        
127        configureWorkflow(configuration);
128    }
129    
130    /**
131     * Configure the namespace to use.
132     * @param configuration the namespaces configuration, can be null.
133     * @throws ConfigurationException if an error occurs.
134     */
135    protected void configureNamespaces(Configuration configuration) throws ConfigurationException
136    {
137        Map<String, String> namespaces = new HashMap<>();
138        
139        for (Configuration nsConf : configuration.getChildren("namespace"))
140        {
141            String prefix = nsConf.getAttribute("prefix", "");
142            String namespace = nsConf.getAttribute("uri");
143            
144            namespaces.put(prefix, namespace);
145        }
146        
147        _prefixResolver = new DefaultPrefixResolver(namespaces);
148    }
149    
150    /**
151     * Get the prefix resolver.
152     * @return the prefix resolver.
153     */
154    protected PrefixResolver getPrefixResolver()
155    {
156        return _prefixResolver;
157    }
158    
159    @Override
160    public boolean supports(InputStream is, String name) throws IOException
161    {
162        try
163        {
164            Document doc = _domParser.parseDocument(new InputSource(is));
165            
166            return supports(doc);
167        }
168        catch (SAXException e)
169        {
170            throw new IOException("Error parsing the document.", e);
171        }
172    }
173    
174    @Override
175    public Set<String> importContents(InputStream is, Map<String, Object> params) throws IOException
176    {
177        Set<String> contentIds = new HashSet<>();
178        
179        try
180        {
181            Document document = null;
182            
183            // Either parse the document (no XSL) or transform 
184            if (_xsl == null)
185            {
186                document = _domParser.parseDocument(new InputSource(is));
187            }
188            else
189            {
190                // Initialize the XSL transformer.
191                initializeXslTransformerHandler();
192                
193                // Transform the XML doc with the configured XSL.
194                DOMResult result = new DOMResult();
195                _xslTransformerHandler.setResult(result);
196                _saxParser.parse(new InputSource(is), _xslTransformerHandler);
197                Node node = result.getNode();
198                
199                if (node instanceof Document)
200                {
201                    document = (Document) node;
202                }
203            }
204            
205            if (document != null)
206            {
207                if (getLogger().isDebugEnabled())
208                {
209                    getLogger().debug("Importing contents from document:\n" + ContentImporterHelper.serializeNode(document, true));
210                }
211                
212                contentIds = importContents(document, params);
213            }
214        }
215        catch (SAXException e)
216        {
217            // Should never happen: the XML has already been parsed in supports().
218            getLogger().error("Error parsing the XML document.", e);
219            throw new IOException("Error parsing the XML document.", e);
220        }
221        catch (TransformerException e)
222        {
223            getLogger().error("Error parsing the XML document.", e);
224            throw new IOException("Error parsing the XML document.", e);
225        }
226        catch (XSLTProcessorException e)
227        {
228            getLogger().error("Error parsing the XML document.", e);
229            throw new IOException("Error parsing the XML document.", e);
230        }
231        
232        return contentIds;
233    }
234    
235    /**
236     * Import the contents from the XML DOM {@link Document}.
237     * @param document the XML Document.
238     * @param params the import parameters.
239     * @return a Set of the imported content IDs.
240     * @throws IOException if an error occurs importing the contents.
241     */
242    protected abstract Set<String> importContents(Document document, Map<String, Object> params) throws IOException;
243    
244    /**
245     * Initialize the transformer from the configured XSL.
246     * @throws IOException if an errors occurs reading the XSL.
247     * @throws XSLTProcessorException of an error occurs during the XSL transformer manipulation
248     */
249    protected void initializeXslTransformerHandler() throws IOException, XSLTProcessorException
250    {
251        if (_xslTransformerHandler == null && StringUtils.isNotEmpty(_xsl))
252        {
253            Source xslSource = null;
254            
255            try
256            {
257                xslSource = _srcResolver.resolveURI(_xsl);
258                
259                _xslTransformerHandler = _xsltProcessor.getTransformerHandler(xslSource);
260                
261                Properties format = new Properties();
262                format.put(OutputKeys.METHOD, "xml");
263                format.put(OutputKeys.INDENT, "no");
264                format.put(OutputKeys.ENCODING, "UTF-8");
265                
266                _xslTransformerHandler.getTransformer().setOutputProperties(format);
267            }
268            finally
269            {
270                _srcResolver.release(xslSource);
271            }
272        }
273    }
274    
275    /**
276     * Get a node's text content, without trimming it.
277     * @param node the node, can be null.
278     * @param defaultValue the default value.
279     * @return the node's text content, or the default value if the given node is null.
280     */
281    protected String getTextContent(Node node, String defaultValue)
282    {
283        return getTextContent(node, defaultValue, false);
284    }
285    
286    /**
287     * Get a node's text content, optionally trimmed.
288     * @param node the node, can be null.
289     * @param defaultValue the default value.
290     * @param trim true to trim the text content, false otherwise.
291     * @return the node's text content, or the default value if the given node is null.
292     */
293    protected String getTextContent(Node node, String defaultValue, boolean trim)
294    {
295        String value = defaultValue;
296        if (node != null)
297        {
298            value = trim ? node.getTextContent().trim() : node.getTextContent();
299        }
300        return value;
301    }
302    
303    /**
304     * Get a node's attribute value (trimmed).
305     * @param node the node, can be null.
306     * @param name the attribute name.
307     * @param defaultValue the default value.
308     * @return the node's attribute value, or the default value if the given node is null
309     * or the attribute doesn't exist.
310     */
311    protected String getAttributeValue(Node node, String name, String defaultValue)
312    {
313        return getAttributeValue(node, name, defaultValue, true);
314    }
315    
316    /**
317     * Get a node's attribute value, optionally trimmed.
318     * @param node the node, can be null.
319     * @param name the attribute name.
320     * @param defaultValue the default value.
321     * @param trim true
322     * @return the node's attribute value, or the default value if the given node is null
323     * or the attribute doesn't exist.
324     */
325    protected String getAttributeValue(Node node, String name, String defaultValue, boolean trim)
326    {
327        String value = defaultValue;
328        if (node != null)
329        {
330            NamedNodeMap attributes = node.getAttributes();
331            Node attrNode = attributes.getNamedItem(name);
332            if (attrNode != null)
333            {
334                value = trim ? attrNode.getTextContent().trim() : attrNode.getTextContent();
335            }
336        }
337        return value;
338    }
339    
340    /**
341     * Configurable XML prefix resolver.
342     */
343    protected static class DefaultPrefixResolver implements PrefixResolver
344    {
345        
346        /** Map of namespace URIs, indexed by prefix. */
347        private Map<String, String> _namespaces;
348        
349        /**
350         * Constructor.
351         * @param namespaces the namespaces to resolve, indexed by prefix.
352         */
353        public DefaultPrefixResolver(Map<String, String> namespaces)
354        {
355            _namespaces = new HashMap<>(namespaces);
356        }
357        
358        @Override
359        public String prefixToNamespace(String prefix)
360        {
361            return _namespaces.get(prefix);
362        }
363    }
364    
365}