001/* 002 * Copyright 2014 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.contentio.in.xml; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.io.StringWriter; 021import java.util.HashMap; 022import java.util.HashSet; 023import java.util.Map; 024import java.util.Properties; 025import java.util.Set; 026 027import javax.xml.transform.OutputKeys; 028import javax.xml.transform.Transformer; 029import javax.xml.transform.TransformerException; 030import javax.xml.transform.TransformerFactory; 031import javax.xml.transform.dom.DOMResult; 032import javax.xml.transform.dom.DOMSource; 033import javax.xml.transform.sax.TransformerHandler; 034import javax.xml.transform.stream.StreamResult; 035 036import org.apache.avalon.framework.configuration.Configuration; 037import org.apache.avalon.framework.configuration.ConfigurationException; 038import org.apache.avalon.framework.service.ServiceException; 039import org.apache.avalon.framework.service.ServiceManager; 040import org.apache.commons.lang3.StringUtils; 041import org.apache.excalibur.source.Source; 042import org.apache.excalibur.source.SourceResolver; 043import org.apache.excalibur.xml.dom.DOMParser; 044import org.apache.excalibur.xml.sax.SAXParser; 045import org.apache.excalibur.xml.xpath.PrefixResolver; 046import org.apache.excalibur.xml.xpath.XPathProcessor; 047import org.apache.excalibur.xml.xslt.XSLTProcessor; 048import org.apache.excalibur.xml.xslt.XSLTProcessorException; 049import org.w3c.dom.Document; 050import org.w3c.dom.NamedNodeMap; 051import org.w3c.dom.Node; 052import org.xml.sax.InputSource; 053import org.xml.sax.SAXException; 054 055import org.ametys.plugins.contentio.AbstractContentImporter; 056 057/** 058 * Abstract {@link XmlContentImporter} class which provides base XML importer configuration and logic. 059 */ 060public abstract class AbstractXmlContentImporter extends AbstractContentImporter implements XmlContentImporter 061{ 062 063 /** The source resolver. */ 064 protected SourceResolver _srcResolver; 065 066 /** A DOM parser. */ 067 protected DOMParser _domParser; 068 069 /** A SAX parser. */ 070 protected SAXParser _saxParser; 071 072 /** The XPath processor. */ 073 protected XPathProcessor _xPathProcessor; 074 075 /** The runtime XSLT processor. */ 076 protected XSLTProcessor _xsltProcessor; 077 078 /** The prefix resolver. */ 079 protected PrefixResolver _prefixResolver; 080 081 /** The XSL transformer handler. */ 082 protected TransformerHandler _xslTransformerHandler; 083 084 /** The configured XML transformation stylesheet. */ 085 protected String _xsl; 086 087 @Override 088 public void service(ServiceManager serviceManager) throws ServiceException 089 { 090 super.service(serviceManager); 091 _srcResolver = (SourceResolver) serviceManager.lookup(SourceResolver.ROLE); 092 _domParser = (DOMParser) serviceManager.lookup(DOMParser.ROLE); 093 _xPathProcessor = (XPathProcessor) serviceManager.lookup(XPathProcessor.ROLE); 094 095 _saxParser = (SAXParser) serviceManager.lookup(SAXParser.ROLE); 096 _xsltProcessor = (XSLTProcessor) serviceManager.lookup(XSLTProcessor.ROLE + "/xalan"); 097 } 098 099 @Override 100 public void configure(Configuration configuration) throws ConfigurationException 101 { 102 super.configure(configuration); 103 104 configureXml(configuration.getChild("xml")); 105 } 106 107 /** 108 * Configure XML-specific properties. 109 * @param configuration the XML configuration. 110 * @throws ConfigurationException if an error occurs. 111 */ 112 protected void configureXml(Configuration configuration) throws ConfigurationException 113 { 114 _xsl = configuration.getChild("xsl").getAttribute("src", null); 115 116 configureNamespaces(configuration.getChild("namespaces")); 117 } 118 119 @Override 120 protected void configureContentCreation(Configuration configuration) throws ConfigurationException 121 { 122 // Override default configuration to be more permissive. 123 String typesStr = configuration.getChild("content-types").getValue(""); 124 _contentTypes = StringUtils.split(typesStr, ", "); 125 126 String mixins = configuration.getChild("mixins").getValue(""); 127 _mixins = StringUtils.split(mixins, ", "); 128 129 _language = configuration.getChild("language").getValue(""); 130 131 configureWorkflow(configuration); 132 } 133 134 /** 135 * Configure the namespace to use. 136 * @param configuration the namespaces configuration, can be null. 137 * @throws ConfigurationException if an error occurs. 138 */ 139 protected void configureNamespaces(Configuration configuration) throws ConfigurationException 140 { 141 Map<String, String> namespaces = new HashMap<>(); 142 143 for (Configuration nsConf : configuration.getChildren("namespace")) 144 { 145 String prefix = nsConf.getAttribute("prefix", ""); 146 String namespace = nsConf.getAttribute("uri"); 147 148 namespaces.put(prefix, namespace); 149 } 150 151 _prefixResolver = new DefaultPrefixResolver(namespaces); 152 } 153 154 /** 155 * Get the prefix resolver. 156 * @return the prefix resolver. 157 */ 158 protected PrefixResolver getPrefixResolver() 159 { 160 return _prefixResolver; 161 } 162 163 @Override 164 public boolean supports(InputStream is, String name) throws IOException 165 { 166 try 167 { 168 Document doc = _domParser.parseDocument(new InputSource(is)); 169 170 return supports(doc); 171 } 172 catch (SAXException e) 173 { 174 throw new IOException("Error parsing the document.", e); 175 } 176 } 177 178 @Override 179 public Set<String> importContents(InputStream is, Map<String, Object> params) throws IOException 180 { 181 Set<String> contentIds = new HashSet<>(); 182 183 try 184 { 185 Document document = null; 186 187 // Either parse the document (no XSL) or transform 188 if (_xsl == null) 189 { 190 document = _domParser.parseDocument(new InputSource(is)); 191 } 192 else 193 { 194 // Initialize the XSL transformer. 195 initializeXslTransformerHandler(); 196 197 // Transform the XML doc with the configured XSL. 198 DOMResult result = new DOMResult(); 199 _xslTransformerHandler.setResult(result); 200 _saxParser.parse(new InputSource(is), _xslTransformerHandler); 201 Node node = result.getNode(); 202 203 if (node instanceof Document) 204 { 205 document = (Document) node; 206 } 207 } 208 209 if (document != null) 210 { 211 if (getLogger().isDebugEnabled()) 212 { 213 getLogger().debug("Importing contents from document:\n" + serializeNode(document, true)); 214 } 215 216 contentIds = importContents(document, params); 217 } 218 } 219 catch (SAXException e) 220 { 221 // Should never happen: the XML has already been parsed in supports(). 222 getLogger().error("Error parsing the XML document.", e); 223 throw new IOException("Error parsing the XML document.", e); 224 } 225 catch (TransformerException e) 226 { 227 getLogger().error("Error parsing the XML document.", e); 228 throw new IOException("Error parsing the XML document.", e); 229 } 230 catch (XSLTProcessorException e) 231 { 232 getLogger().error("Error parsing the XML document.", e); 233 throw new IOException("Error parsing the XML document.", e); 234 } 235 236 return contentIds; 237 } 238 239 /** 240 * Import the contents from the XML DOM {@link Document}. 241 * @param document the XML Document. 242 * @param params the import parameters. 243 * @return a Set of the imported content IDs. 244 * @throws IOException if an error occurs importing the contents. 245 */ 246 protected abstract Set<String> importContents(Document document, Map<String, Object> params) throws IOException; 247 248 /** 249 * Initialize the transformer from the configured XSL. 250 * @throws IOException if an errors occurs reading the XSL. 251 * @throws XSLTProcessorException of an error occurs during the XSL transformer manipulation 252 */ 253 protected void initializeXslTransformerHandler() throws IOException, XSLTProcessorException 254 { 255 if (_xslTransformerHandler == null && StringUtils.isNotEmpty(_xsl)) 256 { 257 Source xslSource = null; 258 259 try 260 { 261 xslSource = _srcResolver.resolveURI(_xsl); 262 263 _xslTransformerHandler = _xsltProcessor.getTransformerHandler(xslSource); 264 265 Properties format = new Properties(); 266 format.put(OutputKeys.METHOD, "xml"); 267 format.put(OutputKeys.INDENT, "no"); 268 format.put(OutputKeys.ENCODING, "UTF-8"); 269 270 _xslTransformerHandler.getTransformer().setOutputProperties(format); 271 } 272 finally 273 { 274 _srcResolver.release(xslSource); 275 } 276 } 277 } 278 279 /** 280 * Get a node's text content, without trimming it. 281 * @param node the node, can be null. 282 * @param defaultValue the default value. 283 * @return the node's text content, or the default value if the given node is null. 284 */ 285 protected String getTextContent(Node node, String defaultValue) 286 { 287 return getTextContent(node, defaultValue, false); 288 } 289 290 /** 291 * Get a node's text content, optionally trimmed. 292 * @param node the node, can be null. 293 * @param defaultValue the default value. 294 * @param trim true to trim the text content, false otherwise. 295 * @return the node's text content, or the default value if the given node is null. 296 */ 297 protected String getTextContent(Node node, String defaultValue, boolean trim) 298 { 299 String value = defaultValue; 300 if (node != null) 301 { 302 value = trim ? node.getTextContent().trim() : node.getTextContent(); 303 } 304 return value; 305 } 306 307 /** 308 * Get a node's attribute value (trimmed). 309 * @param node the node, can be null. 310 * @param name the attribute name. 311 * @param defaultValue the default value. 312 * @return the node's attribute value, or the default value if the given node is null 313 * or the attribute doesn't exist. 314 */ 315 protected String getAttributeValue(Node node, String name, String defaultValue) 316 { 317 return getAttributeValue(node, name, defaultValue, true); 318 } 319 320 /** 321 * Get a node's attribute value, optionally trimmed. 322 * @param node the node, can be null. 323 * @param name the attribute name. 324 * @param defaultValue the default value. 325 * @param trim true 326 * @return the node's attribute value, or the default value if the given node is null 327 * or the attribute doesn't exist. 328 */ 329 protected String getAttributeValue(Node node, String name, String defaultValue, boolean trim) 330 { 331 String value = defaultValue; 332 if (node != null) 333 { 334 NamedNodeMap attributes = node.getAttributes(); 335 Node attrNode = attributes.getNamedItem(name); 336 if (attrNode != null) 337 { 338 value = trim ? attrNode.getTextContent().trim() : attrNode.getTextContent(); 339 } 340 } 341 return value; 342 } 343 344 /** 345 * Serialize a XML node as a String, with XML declaration and without indentation. 346 * @param node the node. 347 * @return the XML string. 348 * @throws TransformerException if an error occurs. 349 */ 350 protected String serializeNode(Node node) throws TransformerException 351 { 352 return serializeNode(node, false, false); 353 } 354 355 /** 356 * Serialize a XML node as a String, with XML declaration. 357 * @param node the node. 358 * @param indent true to indent the result, false otherwise. 359 * @return the XML string. 360 * @throws TransformerException if an error occurs. 361 */ 362 protected String serializeNode(Node node, boolean indent) throws TransformerException 363 { 364 return serializeNode(node, indent, false); 365 } 366 367 /** 368 * Serialize a XML node as a String. 369 * @param node the node. 370 * @param indent true to indent the result, false otherwise. 371 * @param omitXmlDeclaration true to omit XML declaration, false otherwise. 372 * @return the XML string. 373 * @throws TransformerException if an error occurs. 374 */ 375 protected String serializeNode(Node node, boolean indent, boolean omitXmlDeclaration) throws TransformerException 376 { 377 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 378 379 Properties format = new Properties(); 380 format.put(OutputKeys.METHOD, "xml"); 381 format.put(OutputKeys.OMIT_XML_DECLARATION, omitXmlDeclaration ? "yes" : "no"); 382 format.put(OutputKeys.INDENT, indent ? "yes" : "no"); 383 format.put(OutputKeys.ENCODING, "UTF-8"); 384 385 transformer.setOutputProperties(format); 386 387 StringWriter writer = new StringWriter(); 388 DOMSource domSource = new DOMSource(node); 389 StreamResult result = new StreamResult(writer); 390 391 transformer.transform(domSource, result); 392 393 return writer.toString(); 394 } 395 396 /** 397 * Configurable XML prefix resolver. 398 */ 399 protected static class DefaultPrefixResolver implements PrefixResolver 400 { 401 402 /** Map of namespace URIs, indexed by prefix. */ 403 private Map<String, String> _namespaces; 404 405 /** 406 * Constructor. 407 * @param namespaces the namespaces to resolve, indexed by prefix. 408 */ 409 public DefaultPrefixResolver(Map<String, String> namespaces) 410 { 411 _namespaces = new HashMap<>(namespaces); 412 } 413 414 @Override 415 public String prefixToNamespace(String prefix) 416 { 417 return _namespaces.get(prefix); 418 } 419 } 420 421}