001/* 002 * Copyright 2014 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.contentio.in.xml; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.util.HashMap; 021import java.util.HashSet; 022import java.util.Map; 023import java.util.Properties; 024import java.util.Set; 025 026import javax.xml.transform.OutputKeys; 027import javax.xml.transform.TransformerException; 028import javax.xml.transform.dom.DOMResult; 029import javax.xml.transform.sax.TransformerHandler; 030 031import org.apache.avalon.framework.configuration.Configuration; 032import org.apache.avalon.framework.configuration.ConfigurationException; 033import org.apache.avalon.framework.service.ServiceException; 034import org.apache.avalon.framework.service.ServiceManager; 035import org.apache.commons.lang3.StringUtils; 036import org.apache.excalibur.source.Source; 037import org.apache.excalibur.source.SourceResolver; 038import org.apache.excalibur.xml.dom.DOMParser; 039import org.apache.excalibur.xml.sax.SAXParser; 040import org.apache.excalibur.xml.xpath.PrefixResolver; 041import org.apache.excalibur.xml.xpath.XPathProcessor; 042import org.apache.excalibur.xml.xslt.XSLTProcessor; 043import org.apache.excalibur.xml.xslt.XSLTProcessorException; 044import org.w3c.dom.Document; 045import org.w3c.dom.Element; 046import org.w3c.dom.Node; 047import org.xml.sax.InputSource; 048import org.xml.sax.SAXException; 049 050import org.ametys.plugins.contentio.ContentImporterHelper; 051import org.ametys.plugins.contentio.in.AbstractContentImporter; 052 053/** 054 * Abstract {@link XmlContentImporter} class which provides base XML importer configuration and logic. 055 */ 056public abstract class AbstractXmlContentImporter extends AbstractContentImporter implements XmlContentImporter 057{ 058 /** The service manager. */ 059 protected ServiceManager _manager; 060 061 /** The source resolver. */ 062 protected SourceResolver _srcResolver; 063 064 /** A DOM parser. */ 065 protected DOMParser _domParser; 066 067 /** The XPath processor. */ 068 protected XPathProcessor _xPathProcessor; 069 070 /** The runtime XSLT processor. */ 071 protected XSLTProcessor _xsltProcessor; 072 073 /** The prefix resolver. */ 074 protected PrefixResolver _prefixResolver; 075 076 /** The XSL transformer handler. */ 077 protected TransformerHandler _xslTransformerHandler; 078 079 /** The configured XML transformation stylesheet. */ 080 protected String _xsl; 081 082 @Override 083 public void service(ServiceManager serviceManager) throws ServiceException 084 { 085 super.service(serviceManager); 086 _manager = serviceManager; 087 _srcResolver = (SourceResolver) serviceManager.lookup(SourceResolver.ROLE); 088 _domParser = (DOMParser) serviceManager.lookup(DOMParser.ROLE); 089 _xPathProcessor = (XPathProcessor) serviceManager.lookup(XPathProcessor.ROLE); 090 _xsltProcessor = (XSLTProcessor) serviceManager.lookup(XSLTProcessor.ROLE + "/xalan"); 091 } 092 093 @Override 094 public void configure(Configuration configuration) throws ConfigurationException 095 { 096 super.configure(configuration); 097 configureXml(configuration.getChild("xml")); 098 } 099 100 /** 101 * Configure XML-specific properties. 102 * @param configuration the XML configuration. 103 * @throws ConfigurationException if an error occurs. 104 */ 105 protected void configureXml(Configuration configuration) throws ConfigurationException 106 { 107 _xsl = configuration.getChild("xsl").getAttribute("src", null); 108 configureNamespaces(configuration.getChild("namespaces")); 109 } 110 111 @Override 112 protected void configureContentCreation(Configuration configuration) throws ConfigurationException 113 { 114 // Override default configuration to be more permissive. 115 String typesStr = configuration.getChild("content-types").getValue(""); 116 _contentTypes = StringUtils.split(typesStr, ", "); 117 118 String mixins = configuration.getChild("mixins").getValue(""); 119 _mixins = StringUtils.split(mixins, ", "); 120 121 _language = configuration.getChild("language").getValue(""); 122 123 configureWorkflow(configuration); 124 } 125 126 /** 127 * Configure the namespace to use. 128 * @param configuration the namespaces configuration, can be null. 129 * @throws ConfigurationException if an error occurs. 130 */ 131 protected void configureNamespaces(Configuration configuration) throws ConfigurationException 132 { 133 Map<String, String> namespaces = new HashMap<>(); 134 135 for (Configuration nsConf : configuration.getChildren("namespace")) 136 { 137 String prefix = nsConf.getAttribute("prefix", ""); 138 String namespace = nsConf.getAttribute("uri"); 139 140 namespaces.put(prefix, namespace); 141 } 142 143 _prefixResolver = new DefaultPrefixResolver(namespaces); 144 } 145 146 /** 147 * Get the prefix resolver. 148 * @return the prefix resolver. 149 */ 150 protected PrefixResolver getPrefixResolver() 151 { 152 return _prefixResolver; 153 } 154 155 @Override 156 public boolean supports(InputStream is, String name) throws IOException 157 { 158 try 159 { 160 Document doc = _domParser.parseDocument(new InputSource(is)); 161 162 return supports(doc); 163 } 164 catch (SAXException e) 165 { 166 throw new IOException("Error parsing the document.", e); 167 } 168 } 169 170 @Override 171 public Set<String> importContents(InputStream is, Map<String, Object> params) throws IOException 172 { 173 Set<String> contentIds = new HashSet<>(); 174 SAXParser saxParser = null; 175 176 try 177 { 178 Document document = null; 179 180 // Either parse the document (no XSL) or transform 181 if (_xsl == null) 182 { 183 document = _domParser.parseDocument(new InputSource(is)); 184 } 185 else 186 { 187 // Initialize the XSL transformer. 188 initializeXslTransformerHandler(); 189 190 // Transform the XML doc with the configured XSL. 191 DOMResult result = new DOMResult(); 192 _xslTransformerHandler.setResult(result); 193 saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE); 194 saxParser.parse(new InputSource(is), _xslTransformerHandler); 195 Node node = result.getNode(); 196 197 if (node instanceof Document) 198 { 199 document = (Document) node; 200 } 201 } 202 203 if (document != null) 204 { 205 if (getLogger().isDebugEnabled()) 206 { 207 getLogger().debug("Importing contents from document:\n {}", ContentImporterHelper.serializeNode(document, true)); 208 } 209 210 contentIds = importContents(document, params); 211 } 212 } 213 catch (ServiceException e) 214 { 215 getLogger().error("Unable to get a SAX parser.", e); 216 throw new IOException("Unable to get a SAX parser.", e); 217 } 218 catch (SAXException | TransformerException | XSLTProcessorException e) 219 { 220 getLogger().error("Error parsing the XML document.", e); 221 throw new IOException("Error parsing the XML document.", e); 222 } 223 finally 224 { 225 _manager.release(saxParser); 226 } 227 228 return contentIds; 229 } 230 231 /** 232 * Import the contents from the XML DOM {@link Document}. 233 * @param document the XML Document. 234 * @param params the import parameters. 235 * @return a Set of the imported content IDs. 236 * @throws IOException if an error occurs importing the contents. 237 */ 238 protected abstract Set<String> importContents(Document document, Map<String, Object> params) throws IOException; 239 240 /** 241 * Initialize the transformer from the configured XSL. 242 * @throws IOException if an errors occurs reading the XSL. 243 * @throws XSLTProcessorException of an error occurs during the XSL transformer manipulation 244 */ 245 protected void initializeXslTransformerHandler() throws IOException, XSLTProcessorException 246 { 247 if (_xslTransformerHandler == null && StringUtils.isNotEmpty(_xsl)) 248 { 249 Source xslSource = null; 250 251 try 252 { 253 xslSource = _srcResolver.resolveURI(_xsl); 254 255 _xslTransformerHandler = _xsltProcessor.getTransformerHandler(xslSource); 256 257 Properties format = new Properties(); 258 format.put(OutputKeys.METHOD, "xml"); 259 format.put(OutputKeys.INDENT, "no"); 260 format.put(OutputKeys.ENCODING, "UTF-8"); 261 262 _xslTransformerHandler.getTransformer().setOutputProperties(format); 263 } 264 finally 265 { 266 _srcResolver.release(xslSource); 267 } 268 } 269 } 270 271 /** 272 * Get a node's text content, without trimming it. 273 * @param node the node, can be null. 274 * @param defaultValue the default value. 275 * @return the node's text content, or the default value if the given node is null. 276 */ 277 protected String getTextContent(Node node, String defaultValue) 278 { 279 return getTextContent(node, defaultValue, false); 280 } 281 282 /** 283 * Get a node's text content, optionally trimmed. 284 * @param node the node, can be null. 285 * @param defaultValue the default value. 286 * @param trim true to trim the text content, false otherwise. 287 * @return the node's text content, or the default value if the given node is null. 288 */ 289 protected String getTextContent(Node node, String defaultValue, boolean trim) 290 { 291 String value = defaultValue; 292 if (node != null) 293 { 294 value = trim ? node.getTextContent().trim() : node.getTextContent(); 295 } 296 297 return value; 298 } 299 300 /** 301 * Get an element attribute value (trimmed). 302 * @param element the {@link Element}, can be null. 303 * @param name the attribute name. 304 * @param defaultValue the default value. 305 * @return the node's attribute value, or the default value if the given node is null 306 * or the attribute doesn't exist. 307 */ 308 protected String getAttributeValue(Element element, String name, String defaultValue) 309 { 310 return getAttributeValue(element, name, defaultValue, true); 311 } 312 313 /** 314 * Get a node's attribute value, optionally trimmed. 315 * @param element the {@link Element}, can be null. 316 * @param name the attribute name. 317 * @param defaultValue the default value. 318 * @param trim true 319 * @return the node's attribute value, or the default value if the given node is null or the attribute doesn't exist. 320 */ 321 protected String getAttributeValue(Element element, String name, String defaultValue, boolean trim) 322 { 323 String value = defaultValue; 324 if (element != null) 325 { 326 value = element.getAttribute(name); 327 } 328 329 return value != null ? trim ? value.trim() : value : null; 330 } 331 332 /** 333 * Configurable XML prefix resolver. 334 */ 335 protected static class DefaultPrefixResolver implements PrefixResolver 336 { 337 /** Map of namespace URIs, indexed by prefix. */ 338 private Map<String, String> _namespaces; 339 340 /** 341 * Constructor. 342 * @param namespaces the namespaces to resolve, indexed by prefix. 343 */ 344 public DefaultPrefixResolver(Map<String, String> namespaces) 345 { 346 _namespaces = new HashMap<>(namespaces); 347 } 348 349 @Override 350 public String prefixToNamespace(String prefix) 351 { 352 return _namespaces.get(prefix); 353 } 354 } 355}