001/* 002 * Copyright 2016 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.webcontentio.xml; 017 018import java.io.ByteArrayInputStream; 019import java.io.ByteArrayOutputStream; 020import java.io.File; 021import java.io.FileInputStream; 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.InputStreamReader; 026import java.io.Reader; 027import java.io.StringWriter; 028import java.io.UnsupportedEncodingException; 029import java.lang.reflect.Array; 030import java.net.HttpURLConnection; 031import java.net.URL; 032import java.time.ZonedDateTime; 033import java.util.ArrayList; 034import java.util.List; 035import java.util.Map; 036import java.util.Optional; 037import java.util.Properties; 038import java.util.regex.Matcher; 039import java.util.regex.Pattern; 040 041import javax.xml.transform.OutputKeys; 042import javax.xml.transform.Transformer; 043import javax.xml.transform.TransformerException; 044import javax.xml.transform.TransformerFactory; 045import javax.xml.transform.dom.DOMSource; 046import javax.xml.transform.stream.StreamResult; 047 048import org.apache.avalon.framework.logger.AbstractLogEnabled; 049import org.apache.avalon.framework.service.ServiceException; 050import org.apache.avalon.framework.service.ServiceManager; 051import org.apache.avalon.framework.service.Serviceable; 052import org.apache.commons.io.FilenameUtils; 053import org.apache.commons.io.IOUtils; 054import org.apache.commons.lang3.StringUtils; 055import org.apache.excalibur.xml.dom.DOMParser; 056import org.apache.excalibur.xml.xpath.XPathProcessor; 057import org.w3c.dom.Document; 058import org.w3c.dom.Node; 059import org.w3c.dom.NodeList; 060import org.xml.sax.InputSource; 061import org.xml.sax.SAXException; 062 063import org.ametys.cms.contenttype.ContentTypeExtensionPoint; 064import org.ametys.cms.data.Binary; 065import org.ametys.cms.data.Geocode; 066import org.ametys.cms.data.RichText; 067import org.ametys.cms.data.type.ModelItemTypeConstants; 068import org.ametys.cms.repository.Content; 069import org.ametys.core.util.URIUtils; 070import org.ametys.plugins.repository.data.holder.ModifiableModelAwareDataHolder; 071import org.ametys.plugins.repository.data.holder.group.impl.ModifiableModelAwareComposite; 072import org.ametys.plugins.repository.data.holder.group.impl.ModifiableModelAwareRepeater; 073import org.ametys.plugins.repository.data.holder.group.impl.ModifiableModelAwareRepeaterEntry; 074import org.ametys.plugins.repository.model.CompositeDefinition; 075import org.ametys.plugins.repository.model.RepeaterDefinition; 076import org.ametys.plugins.webcontentio.ContentImporter; 077import org.ametys.runtime.model.ElementDefinition; 078import org.ametys.runtime.model.ModelItem; 079import org.ametys.runtime.model.type.ElementType; 080import org.ametys.web.repository.content.ModifiableWebContent; 081import org.ametys.web.repository.page.ModifiablePage; 082 083/** 084 * Default XML content importer 085 */ 086public class XmlContentImporter extends AbstractLogEnabled implements ContentImporter, Serviceable 087{ 088 private DOMParser _domParser; 089 private XPathProcessor _xPathProcessor; 090 private ContentTypeExtensionPoint _contentTypeExtensionPoint; 091 092 @Override 093 public void service(ServiceManager manager) throws ServiceException 094 { 095 _domParser = (DOMParser) manager.lookup(DOMParser.ROLE); 096 _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE); 097 _contentTypeExtensionPoint = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE); 098 } 099 100 @Override 101 public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException 102 { 103 Document doc = getXmlDocFromFile(file); 104 105 if (doc == null) 106 { 107 throw new IOException("Unable to retrieve the xml document from the file received."); 108 } 109 110 Node xmlContent = _xPathProcessor.selectSingleNode(doc, "/content"); 111 112 String contentTypeId = _xPathProcessor.evaluateAsString(xmlContent, "@type"); 113 114 if (StringUtils.isEmpty(contentTypeId)) 115 { 116 throw new IOException("Invalid file content : no content type specified."); 117 } 118 119 if (!_contentTypeExtensionPoint.hasExtension(contentTypeId)) 120 { 121 throw new IOException("Invalid file content : the specified content type does not exist."); 122 } 123 124 content.setTypes(new String[] {contentTypeId}); 125 126 Node title = _xPathProcessor.selectSingleNode(xmlContent, "title"); 127 128 if (title == null) 129 { 130 throw new IOException("Invalid file content : no title found, but it is mandatory."); 131 } 132 133 _importAttributes(content, xmlContent); 134 } 135 136 @Override 137 public String[] getMimeTypes() 138 { 139 // handles xml mime-types 140 return new String[] {"application/xml", "text/xml"}; 141 } 142 143 @Override 144 public void postTreatment(ModifiablePage page, Content content, File file) throws IOException 145 { 146 // Nothing to do 147 } 148 149 private Document getXmlDocFromFile(File file) throws FileNotFoundException, UnsupportedEncodingException, IOException 150 { 151 InputStream is = new FileInputStream(file); 152 Reader reader = new InputStreamReader(is, "UTF-8"); 153 Document doc = null; 154 try 155 { 156 doc = _domParser.parseDocument(new InputSource(reader)); 157 } 158 catch (SAXException e) 159 { 160 getLogger().error("[IMPORT] Unable to parse imported file " + file.getName(), e); 161 } 162 return doc; 163 } 164 165 private void _importAttributes(ModifiableWebContent content, Node xmlContent) throws IOException 166 { 167 NodeList attributesNodes = xmlContent.getChildNodes(); 168 for (int i = 0; i < attributesNodes.getLength(); i++) 169 { 170 Node attributeNode = attributesNodes.item(i); 171 172 if (attributeNode.getNodeType() == Node.ELEMENT_NODE && content.hasDefinition(attributeNode.getLocalName())) 173 { 174 ModelItem attributeDefinition = content.getDefinition(attributeNode.getLocalName()); 175 _importAttribute(content, attributeDefinition, attributeNode); 176 } 177 } 178 } 179 180 @SuppressWarnings("unchecked") 181 private void _importAttribute(ModifiableModelAwareDataHolder dataHolder, ModelItem attributeDefinition, Node attributeNode) throws IOException 182 { 183 if (attributeDefinition != null) 184 { 185 if (attributeDefinition instanceof RepeaterDefinition) 186 { 187 _setRepeater(dataHolder, (RepeaterDefinition) attributeDefinition, attributeNode); 188 } 189 else if (attributeDefinition instanceof CompositeDefinition) 190 { 191 _setComposite(dataHolder, (CompositeDefinition) attributeDefinition, attributeNode); 192 } 193 else if (attributeDefinition instanceof ElementDefinition) 194 { 195 _setAttribute(dataHolder, (ElementDefinition) attributeDefinition, attributeNode); 196 } 197 } 198 } 199 200 private void _setRepeater(ModifiableModelAwareDataHolder dataHolder, RepeaterDefinition repeaterDefinition, Node repeaterNode) throws IOException 201 { 202 NodeList entryNodes = _xPathProcessor.selectNodeList(repeaterNode, "entry"); 203 if (entryNodes.getLength() > 0) 204 { 205 ModifiableModelAwareRepeater repeaterData = dataHolder.getRepeater(repeaterDefinition.getName(), true); 206 for (int i = 0; i < entryNodes.getLength(); i++) 207 { 208 Node entryNode = entryNodes.item(i); 209 ModifiableModelAwareRepeaterEntry entryData = repeaterData.addEntry(); 210 211 NodeList subDataNodes = entryNode.getChildNodes(); 212 for (int j = 0; j < subDataNodes.getLength(); j++) 213 { 214 Node subDataNode = subDataNodes.item(j); 215 if (subDataNode.getNodeType() == Node.ELEMENT_NODE) 216 { 217 String subDataName = subDataNode.getLocalName(); 218 ModelItem childDefinition = repeaterDefinition.getChild(subDataName); 219 220 _importAttribute(entryData, childDefinition, subDataNode); 221 } 222 } 223 } 224 } 225 } 226 227 private void _setComposite(ModifiableModelAwareDataHolder dataHolder, CompositeDefinition compositeDefinition, Node compositeNode) throws IOException 228 { 229 NodeList subDataNodes = compositeNode.getChildNodes(); 230 if (subDataNodes.getLength() > 0) 231 { 232 ModifiableModelAwareComposite compositeData = dataHolder.getComposite(compositeDefinition.getName(), true); 233 for (int i = 0; i < subDataNodes.getLength(); i++) 234 { 235 Node subDataNode = subDataNodes.item(i); 236 if (subDataNode.getNodeType() == Node.ELEMENT_NODE) 237 { 238 String subDataName = subDataNode.getLocalName(); 239 ModelItem childDefinition = compositeDefinition.getChild(subDataName); 240 241 _importAttribute(compositeData, childDefinition, subDataNode); 242 } 243 } 244 } 245 } 246 247 @SuppressWarnings("unchecked") 248 private <T> void _setAttribute(ModifiableModelAwareDataHolder dataHolder, ElementDefinition<T> attributeDefinition, Node attributeNode) throws IOException 249 { 250 ElementType<T> type = attributeDefinition.getType(); 251 252 if (attributeDefinition.isMultiple()) 253 { 254 NodeList valuesNodeList = _xPathProcessor.selectNodeList(attributeNode, "value"); 255 List<T> values = new ArrayList<>(); 256 for (int i = 0; i < valuesNodeList.getLength(); i++) 257 { 258 _getSingleAttributeValue(valuesNodeList.item(i), type) 259 .ifPresent(value -> values.add(value)); 260 } 261 262 if (!values.isEmpty()) 263 { 264 T[] valuesAsArray = (T[]) Array.newInstance(type.getManagedClass(), values.size()); 265 dataHolder.setValue(attributeDefinition.getName(), values.toArray(valuesAsArray)); 266 } 267 } 268 else 269 { 270 _getSingleAttributeValue(attributeNode, type) 271 .ifPresent(value -> dataHolder.setValue(attributeDefinition.getName(), value)); 272 } 273 } 274 275 @SuppressWarnings("unchecked") 276 private <T> Optional<T> _getSingleAttributeValue(Node valueNode, ElementType<T> type) throws IOException 277 { 278 String id = type.getId(); 279 if (ModelItemTypeConstants.BINARY_ELEMENT_TYPE_ID.equals(id) || ModelItemTypeConstants.FILE_ELEMENT_TYPE_ID.equals(id)) 280 { 281 return (Optional<T>) _getSingleBinaryAttributeValue(valueNode); 282 } 283 else if (ModelItemTypeConstants.GEOCODE_ELEMENT_TYPE_ID.equals(id)) 284 { 285 return (Optional<T>) _getSingleGeocodeAttributeValue(valueNode); 286 } 287 else if (ModelItemTypeConstants.RICH_TEXT_ELEMENT_TYPE_ID.equals(id)) 288 { 289 return (Optional<T>) _getSingleRichTextAttributeValue(valueNode); 290 } 291 else 292 { 293 return _getSingleDefaultAttributeValue(valueNode, type); 294 } 295 } 296 297 private Optional<Geocode> _getSingleGeocodeAttributeValue(Node geocodeNode) 298 { 299 Node latitudeNode = _xPathProcessor.selectSingleNode(geocodeNode, "latitude"); 300 String latitude = latitudeNode.getTextContent(); 301 302 Node longitudeNode = _xPathProcessor.selectSingleNode(geocodeNode, "longitude"); 303 String longitude = longitudeNode.getTextContent(); 304 305 if (StringUtils.isNotEmpty(latitude) && StringUtils.isNotEmpty(longitude)) 306 { 307 return Optional.of(new Geocode(Double.valueOf(latitude), Double.valueOf(longitude))); 308 } 309 else 310 { 311 throw new IllegalArgumentException("Invalid geocode values: latitude='" + latitude + "', longitude='" + longitude + "'."); 312 } 313 } 314 315 private Optional<Binary> _getSingleBinaryAttributeValue(Node binaryNode) 316 { 317 String value = binaryNode.getTextContent(); 318 if (StringUtils.isNotEmpty(value)) 319 { 320 try 321 { 322 Pattern pattern = Pattern.compile("filename=\"([^\"]+)\""); 323 324 URL url = new URL(value); 325 HttpURLConnection connection = (HttpURLConnection) url.openConnection(); 326 connection.setConnectTimeout(1000); 327 connection.setReadTimeout(2000); 328 329 String contentType = StringUtils.defaultString(connection.getContentType(), "application/unknown"); 330 String contentEncoding = StringUtils.defaultString(connection.getContentEncoding(), ""); 331 String contentDisposition = StringUtils.defaultString(connection.getHeaderField("Content-Disposition"), ""); 332 String filename = URIUtils.decode(FilenameUtils.getName(connection.getURL().getPath())); 333 if (StringUtils.isEmpty(filename)) 334 { 335 Matcher matcher = pattern.matcher(contentDisposition); 336 if (matcher.matches()) 337 { 338 filename = matcher.group(1); 339 } 340 else 341 { 342 filename = "unknown"; 343 } 344 } 345 346 try (InputStream is = connection.getInputStream()) 347 { 348 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 349 IOUtils.copy(is, bos); 350 351 Binary binary = new Binary(); 352 binary.setLastModificationDate(ZonedDateTime.now()); 353 binary.setInputStream(new ByteArrayInputStream(bos.toByteArray())); 354 355 if (StringUtils.isNotEmpty(filename)) 356 { 357 binary.setFilename(filename); 358 } 359 if (StringUtils.isNotEmpty(contentType)) 360 { 361 binary.setMimeType(contentType); 362 } 363 if (StringUtils.isNotEmpty(contentEncoding)) 364 { 365 binary.setEncoding(contentEncoding); 366 } 367 368 return Optional.of(binary); 369 } 370 } 371 catch (Exception e) 372 { 373 throw new IllegalArgumentException("Unable to fetch file from URL '" + value + "', it will be ignored.", e); 374 } 375 } 376 else 377 { 378 return Optional.empty(); 379 } 380 } 381 382 private Optional<RichText> _getSingleRichTextAttributeValue(Node richTextNode) throws IOException 383 { 384 NodeList docbookNodes = richTextNode.getChildNodes(); 385 for (int i = 0; i < docbookNodes.getLength(); i++) 386 { 387 Node docbookNode = docbookNodes.item(i); 388 if (docbookNode.getNodeType() == Node.ELEMENT_NODE && "article".equals(docbookNode.getLocalName())) 389 { 390 try 391 { 392 String docbook = _serializeNode(docbookNode); 393 394 RichText richText = new RichText(); 395 richText.setEncoding("UTF-8"); 396 richText.setLastModificationDate(ZonedDateTime.now()); 397 richText.setMimeType("text/xml"); 398 richText.setInputStream(new ByteArrayInputStream(docbook.getBytes("UTF-8"))); 399 return Optional.of(richText); 400 } 401 catch (TransformerException e) 402 { 403 throw new IOException("Error serializing a docbook node.", e); 404 } 405 } 406 } 407 408 // No article found, return an empty Optional 409 return Optional.empty(); 410 } 411 412 private String _serializeNode(Node node) throws TransformerException 413 { 414 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 415 416 Properties format = new Properties(); 417 format.put(OutputKeys.METHOD, "xml"); 418 format.put(OutputKeys.ENCODING, "UTF-8"); 419 420 transformer.setOutputProperties(format); 421 422 StringWriter writer = new StringWriter(); 423 DOMSource domSource = new DOMSource(node); 424 StreamResult result = new StreamResult(writer); 425 426 transformer.transform(domSource, result); 427 428 return writer.toString(); 429 } 430 431 private <T> Optional<T> _getSingleDefaultAttributeValue(Node valueNode, ElementType<T> type) 432 { 433 String valueAsString = valueNode.getTextContent(); 434 return Optional.of(type.castValue(valueAsString)); 435 } 436}