001/* 002 * Copyright 2014 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.contentio.in.xml; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.util.HashMap; 021import java.util.HashSet; 022import java.util.Map; 023import java.util.Properties; 024import java.util.Set; 025 026import javax.xml.transform.OutputKeys; 027import javax.xml.transform.TransformerException; 028import javax.xml.transform.dom.DOMResult; 029import javax.xml.transform.sax.TransformerHandler; 030 031import org.apache.avalon.framework.configuration.Configuration; 032import org.apache.avalon.framework.configuration.ConfigurationException; 033import org.apache.avalon.framework.service.ServiceException; 034import org.apache.avalon.framework.service.ServiceManager; 035import org.apache.commons.lang3.StringUtils; 036import org.apache.excalibur.source.Source; 037import org.apache.excalibur.source.SourceResolver; 038import org.apache.excalibur.xml.dom.DOMParser; 039import org.apache.excalibur.xml.sax.SAXParser; 040import org.apache.excalibur.xml.xpath.PrefixResolver; 041import org.apache.excalibur.xml.xpath.XPathProcessor; 042import org.apache.excalibur.xml.xslt.XSLTProcessor; 043import org.apache.excalibur.xml.xslt.XSLTProcessorException; 044import org.w3c.dom.Document; 045import org.w3c.dom.NamedNodeMap; 046import org.w3c.dom.Node; 047import org.xml.sax.InputSource; 048import org.xml.sax.SAXException; 049 050import org.ametys.plugins.contentio.AbstractContentImporter; 051import org.ametys.plugins.contentio.ContentImporterHelper; 052 053/** 054 * Abstract {@link XmlContentImporter} class which provides base XML importer configuration and logic. 055 */ 056public abstract class AbstractXmlContentImporter extends AbstractContentImporter implements XmlContentImporter 057{ 058 /** The service manager. */ 059 protected ServiceManager _manager; 060 061 /** The source resolver. */ 062 protected SourceResolver _srcResolver; 063 064 /** A DOM parser. */ 065 protected DOMParser _domParser; 066 067 /** The XPath processor. */ 068 protected XPathProcessor _xPathProcessor; 069 070 /** The runtime XSLT processor. */ 071 protected XSLTProcessor _xsltProcessor; 072 073 /** The prefix resolver. */ 074 protected PrefixResolver _prefixResolver; 075 076 /** The XSL transformer handler. */ 077 protected TransformerHandler _xslTransformerHandler; 078 079 /** The configured XML transformation stylesheet. */ 080 protected String _xsl; 081 082 @Override 083 public void service(ServiceManager serviceManager) throws ServiceException 084 { 085 super.service(serviceManager); 086 _manager = serviceManager; 087 _srcResolver = (SourceResolver) serviceManager.lookup(SourceResolver.ROLE); 088 _domParser = (DOMParser) serviceManager.lookup(DOMParser.ROLE); 089 _xPathProcessor = (XPathProcessor) serviceManager.lookup(XPathProcessor.ROLE); 090 _xsltProcessor = (XSLTProcessor) serviceManager.lookup(XSLTProcessor.ROLE + "/xalan"); 091 } 092 093 @Override 094 public void configure(Configuration configuration) throws ConfigurationException 095 { 096 super.configure(configuration); 097 098 configureXml(configuration.getChild("xml")); 099 } 100 101 /** 102 * Configure XML-specific properties. 103 * @param configuration the XML configuration. 104 * @throws ConfigurationException if an error occurs. 105 */ 106 protected void configureXml(Configuration configuration) throws ConfigurationException 107 { 108 _xsl = configuration.getChild("xsl").getAttribute("src", null); 109 110 configureNamespaces(configuration.getChild("namespaces")); 111 } 112 113 @Override 114 protected void configureContentCreation(Configuration configuration) throws ConfigurationException 115 { 116 // Override default configuration to be more permissive. 117 String typesStr = configuration.getChild("content-types").getValue(""); 118 _contentTypes = StringUtils.split(typesStr, ", "); 119 120 String mixins = configuration.getChild("mixins").getValue(""); 121 _mixins = StringUtils.split(mixins, ", "); 122 123 _language = configuration.getChild("language").getValue(""); 124 125 configureWorkflow(configuration); 126 } 127 128 /** 129 * Configure the namespace to use. 130 * @param configuration the namespaces configuration, can be null. 131 * @throws ConfigurationException if an error occurs. 132 */ 133 protected void configureNamespaces(Configuration configuration) throws ConfigurationException 134 { 135 Map<String, String> namespaces = new HashMap<>(); 136 137 for (Configuration nsConf : configuration.getChildren("namespace")) 138 { 139 String prefix = nsConf.getAttribute("prefix", ""); 140 String namespace = nsConf.getAttribute("uri"); 141 142 namespaces.put(prefix, namespace); 143 } 144 145 _prefixResolver = new DefaultPrefixResolver(namespaces); 146 } 147 148 /** 149 * Get the prefix resolver. 150 * @return the prefix resolver. 151 */ 152 protected PrefixResolver getPrefixResolver() 153 { 154 return _prefixResolver; 155 } 156 157 @Override 158 public boolean supports(InputStream is, String name) throws IOException 159 { 160 try 161 { 162 Document doc = _domParser.parseDocument(new InputSource(is)); 163 164 return supports(doc); 165 } 166 catch (SAXException e) 167 { 168 throw new IOException("Error parsing the document.", e); 169 } 170 } 171 172 @Override 173 public Set<String> importContents(InputStream is, Map<String, Object> params) throws IOException 174 { 175 Set<String> contentIds = new HashSet<>(); 176 SAXParser saxParser = null; 177 178 try 179 { 180 Document document = null; 181 182 // Either parse the document (no XSL) or transform 183 if (_xsl == null) 184 { 185 document = _domParser.parseDocument(new InputSource(is)); 186 } 187 else 188 { 189 // Initialize the XSL transformer. 190 initializeXslTransformerHandler(); 191 192 // Transform the XML doc with the configured XSL. 193 DOMResult result = new DOMResult(); 194 _xslTransformerHandler.setResult(result); 195 saxParser = (SAXParser) _manager.lookup(SAXParser.ROLE); 196 saxParser.parse(new InputSource(is), _xslTransformerHandler); 197 Node node = result.getNode(); 198 199 if (node instanceof Document) 200 { 201 document = (Document) node; 202 } 203 } 204 205 if (document != null) 206 { 207 if (getLogger().isDebugEnabled()) 208 { 209 getLogger().debug("Importing contents from document:\n {}", ContentImporterHelper.serializeNode(document, true)); 210 } 211 212 contentIds = importContents(document, params); 213 } 214 } 215 catch (ServiceException e) 216 { 217 getLogger().error("Unable to get a SAX parser.", e); 218 throw new IOException("Unable to get a SAX parser.", e); 219 } 220 catch (SAXException e) 221 { 222 // Should never happen: the XML has already been parsed in supports(). 223 getLogger().error("Error parsing the XML document.", e); 224 throw new IOException("Error parsing the XML document.", e); 225 } 226 catch (TransformerException e) 227 { 228 getLogger().error("Error parsing the XML document.", e); 229 throw new IOException("Error parsing the XML document.", e); 230 } 231 catch (XSLTProcessorException e) 232 { 233 getLogger().error("Error parsing the XML document.", e); 234 throw new IOException("Error parsing the XML document.", e); 235 } 236 finally 237 { 238 _manager.release(saxParser); 239 } 240 241 return contentIds; 242 } 243 244 /** 245 * Import the contents from the XML DOM {@link Document}. 246 * @param document the XML Document. 247 * @param params the import parameters. 248 * @return a Set of the imported content IDs. 249 * @throws IOException if an error occurs importing the contents. 250 */ 251 protected abstract Set<String> importContents(Document document, Map<String, Object> params) throws IOException; 252 253 /** 254 * Initialize the transformer from the configured XSL. 255 * @throws IOException if an errors occurs reading the XSL. 256 * @throws XSLTProcessorException of an error occurs during the XSL transformer manipulation 257 */ 258 protected void initializeXslTransformerHandler() throws IOException, XSLTProcessorException 259 { 260 if (_xslTransformerHandler == null && StringUtils.isNotEmpty(_xsl)) 261 { 262 Source xslSource = null; 263 264 try 265 { 266 xslSource = _srcResolver.resolveURI(_xsl); 267 268 _xslTransformerHandler = _xsltProcessor.getTransformerHandler(xslSource); 269 270 Properties format = new Properties(); 271 format.put(OutputKeys.METHOD, "xml"); 272 format.put(OutputKeys.INDENT, "no"); 273 format.put(OutputKeys.ENCODING, "UTF-8"); 274 275 _xslTransformerHandler.getTransformer().setOutputProperties(format); 276 } 277 finally 278 { 279 _srcResolver.release(xslSource); 280 } 281 } 282 } 283 284 /** 285 * Get a node's text content, without trimming it. 286 * @param node the node, can be null. 287 * @param defaultValue the default value. 288 * @return the node's text content, or the default value if the given node is null. 289 */ 290 protected String getTextContent(Node node, String defaultValue) 291 { 292 return getTextContent(node, defaultValue, false); 293 } 294 295 /** 296 * Get a node's text content, optionally trimmed. 297 * @param node the node, can be null. 298 * @param defaultValue the default value. 299 * @param trim true to trim the text content, false otherwise. 300 * @return the node's text content, or the default value if the given node is null. 301 */ 302 protected String getTextContent(Node node, String defaultValue, boolean trim) 303 { 304 String value = defaultValue; 305 if (node != null) 306 { 307 value = trim ? node.getTextContent().trim() : node.getTextContent(); 308 } 309 return value; 310 } 311 312 /** 313 * Get a node's attribute value (trimmed). 314 * @param node the node, can be null. 315 * @param name the attribute name. 316 * @param defaultValue the default value. 317 * @return the node's attribute value, or the default value if the given node is null 318 * or the attribute doesn't exist. 319 */ 320 protected String getAttributeValue(Node node, String name, String defaultValue) 321 { 322 return getAttributeValue(node, name, defaultValue, true); 323 } 324 325 /** 326 * Get a node's attribute value, optionally trimmed. 327 * @param node the node, can be null. 328 * @param name the attribute name. 329 * @param defaultValue the default value. 330 * @param trim true 331 * @return the node's attribute value, or the default value if the given node is null 332 * or the attribute doesn't exist. 333 */ 334 protected String getAttributeValue(Node node, String name, String defaultValue, boolean trim) 335 { 336 String value = defaultValue; 337 if (node != null) 338 { 339 NamedNodeMap attributes = node.getAttributes(); 340 Node attrNode = attributes.getNamedItem(name); 341 if (attrNode != null) 342 { 343 value = trim ? attrNode.getTextContent().trim() : attrNode.getTextContent(); 344 } 345 } 346 return value; 347 } 348 349 /** 350 * Configurable XML prefix resolver. 351 */ 352 protected static class DefaultPrefixResolver implements PrefixResolver 353 { 354 355 /** Map of namespace URIs, indexed by prefix. */ 356 private Map<String, String> _namespaces; 357 358 /** 359 * Constructor. 360 * @param namespaces the namespaces to resolve, indexed by prefix. 361 */ 362 public DefaultPrefixResolver(Map<String, String> namespaces) 363 { 364 _namespaces = new HashMap<>(namespaces); 365 } 366 367 @Override 368 public String prefixToNamespace(String prefix) 369 { 370 return _namespaces.get(prefix); 371 } 372 } 373 374}