001/* 002 * Copyright 2016 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.webcontentio.xml; 017 018import java.io.ByteArrayInputStream; 019import java.io.ByteArrayOutputStream; 020import java.io.File; 021import java.io.FileInputStream; 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.InputStreamReader; 026import java.io.Reader; 027import java.io.StringWriter; 028import java.io.UnsupportedEncodingException; 029import java.lang.reflect.Array; 030import java.net.HttpURLConnection; 031import java.net.URL; 032import java.time.ZonedDateTime; 033import java.util.ArrayList; 034import java.util.List; 035import java.util.Map; 036import java.util.Objects; 037import java.util.Optional; 038import java.util.Properties; 039import java.util.regex.Matcher; 040import java.util.regex.Pattern; 041 042import javax.xml.transform.OutputKeys; 043import javax.xml.transform.Transformer; 044import javax.xml.transform.TransformerException; 045import javax.xml.transform.TransformerFactory; 046import javax.xml.transform.dom.DOMSource; 047import javax.xml.transform.stream.StreamResult; 048 049import org.apache.avalon.framework.logger.AbstractLogEnabled; 050import org.apache.avalon.framework.service.ServiceException; 051import org.apache.avalon.framework.service.ServiceManager; 052import org.apache.avalon.framework.service.Serviceable; 053import org.apache.commons.io.FilenameUtils; 054import org.apache.commons.io.IOUtils; 055import org.apache.commons.lang3.StringUtils; 056import org.apache.excalibur.xml.dom.DOMParser; 057import org.apache.excalibur.xml.xpath.XPathProcessor; 058import org.w3c.dom.Document; 059import org.w3c.dom.Node; 060import org.w3c.dom.NodeList; 061import org.xml.sax.InputSource; 062import org.xml.sax.SAXException; 063 064import org.ametys.cms.contenttype.ContentTypeExtensionPoint; 065import org.ametys.cms.data.Binary; 066import org.ametys.cms.data.Geocode; 067import org.ametys.cms.data.RichText; 068import org.ametys.cms.data.type.ModelItemTypeConstants; 069import org.ametys.cms.repository.Content; 070import org.ametys.core.util.URIUtils; 071import org.ametys.plugins.repository.data.holder.ModifiableModelAwareDataHolder; 072import org.ametys.plugins.repository.data.holder.group.ModifiableModelAwareComposite; 073import org.ametys.plugins.repository.data.holder.group.ModifiableModelAwareRepeater; 074import org.ametys.plugins.repository.data.holder.group.ModifiableModelAwareRepeaterEntry; 075import org.ametys.plugins.repository.model.CompositeDefinition; 076import org.ametys.plugins.repository.model.RepeaterDefinition; 077import org.ametys.plugins.webcontentio.ContentImporter; 078import org.ametys.runtime.model.ElementDefinition; 079import org.ametys.runtime.model.ModelItem; 080import org.ametys.runtime.model.type.ElementType; 081import org.ametys.web.repository.content.ModifiableWebContent; 082import org.ametys.web.repository.page.ModifiablePage; 083 084/** 085 * Default XML content importer 086 */ 087public class XmlContentImporter extends AbstractLogEnabled implements ContentImporter, Serviceable 088{ 089 private DOMParser _domParser; 090 private XPathProcessor _xPathProcessor; 091 private ContentTypeExtensionPoint _contentTypeExtensionPoint; 092 093 @Override 094 public void service(ServiceManager manager) throws ServiceException 095 { 096 _domParser = (DOMParser) manager.lookup(DOMParser.ROLE); 097 _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE); 098 _contentTypeExtensionPoint = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE); 099 } 100 101 @Override 102 public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException 103 { 104 Document doc = getXmlDocFromFile(file); 105 106 if (doc == null) 107 { 108 throw new IOException("Unable to retrieve the xml document from the file received."); 109 } 110 111 Node xmlContent = _xPathProcessor.selectSingleNode(doc, "/content"); 112 113 String contentTypeId = _xPathProcessor.evaluateAsString(xmlContent, "@type"); 114 115 if (StringUtils.isEmpty(contentTypeId)) 116 { 117 throw new IOException("Invalid file content : no content type specified."); 118 } 119 120 if (!_contentTypeExtensionPoint.hasExtension(contentTypeId)) 121 { 122 throw new IOException("Invalid file content : the specified content type does not exist."); 123 } 124 125 content.setTypes(new String[] {contentTypeId}); 126 127 Node title = _xPathProcessor.selectSingleNode(xmlContent, "title"); 128 129 if (title == null) 130 { 131 throw new IOException("Invalid file content : no title found, but it is mandatory."); 132 } 133 134 _importAttributes(content, xmlContent); 135 } 136 137 @Override 138 public String[] getMimeTypes() 139 { 140 // handles xml mime-types 141 return new String[] {"application/xml", "text/xml"}; 142 } 143 144 @Override 145 public void postTreatment(ModifiablePage page, Content content, File file) throws IOException 146 { 147 // Nothing to do 148 } 149 150 private Document getXmlDocFromFile(File file) throws FileNotFoundException, UnsupportedEncodingException, IOException 151 { 152 InputStream is = new FileInputStream(file); 153 Reader reader = new InputStreamReader(is, "UTF-8"); 154 Document doc = null; 155 try 156 { 157 doc = _domParser.parseDocument(new InputSource(reader)); 158 } 159 catch (SAXException e) 160 { 161 getLogger().error("[IMPORT] Unable to parse imported file " + file.getName(), e); 162 } 163 return doc; 164 } 165 166 private void _importAttributes(ModifiableWebContent content, Node xmlContent) throws IOException 167 { 168 NodeList attributesNodes = xmlContent.getChildNodes(); 169 for (int i = 0; i < attributesNodes.getLength(); i++) 170 { 171 Node attributeNode = attributesNodes.item(i); 172 173 if (attributeNode.getNodeType() == Node.ELEMENT_NODE && content.hasDefinition(attributeNode.getLocalName())) 174 { 175 ModelItem attributeDefinition = content.getDefinition(attributeNode.getLocalName()); 176 _importAttribute(content, attributeDefinition, attributeNode); 177 } 178 } 179 } 180 181 @SuppressWarnings("unchecked") 182 private void _importAttribute(ModifiableModelAwareDataHolder dataHolder, ModelItem attributeDefinition, Node attributeNode) throws IOException 183 { 184 if (attributeDefinition != null) 185 { 186 if (attributeDefinition instanceof RepeaterDefinition) 187 { 188 _setRepeater(dataHolder, (RepeaterDefinition) attributeDefinition, attributeNode); 189 } 190 else if (attributeDefinition instanceof CompositeDefinition) 191 { 192 _setComposite(dataHolder, (CompositeDefinition) attributeDefinition, attributeNode); 193 } 194 else if (attributeDefinition instanceof ElementDefinition) 195 { 196 _setAttribute(dataHolder, (ElementDefinition) attributeDefinition, attributeNode); 197 } 198 } 199 } 200 201 private void _setRepeater(ModifiableModelAwareDataHolder dataHolder, RepeaterDefinition repeaterDefinition, Node repeaterNode) throws IOException 202 { 203 NodeList entryNodes = _xPathProcessor.selectNodeList(repeaterNode, "entry"); 204 if (entryNodes.getLength() > 0) 205 { 206 ModifiableModelAwareRepeater repeaterData = dataHolder.getRepeater(repeaterDefinition.getName(), true); 207 for (int i = 0; i < entryNodes.getLength(); i++) 208 { 209 Node entryNode = entryNodes.item(i); 210 ModifiableModelAwareRepeaterEntry entryData = repeaterData.addEntry(); 211 212 NodeList subDataNodes = entryNode.getChildNodes(); 213 for (int j = 0; j < subDataNodes.getLength(); j++) 214 { 215 Node subDataNode = subDataNodes.item(j); 216 if (subDataNode.getNodeType() == Node.ELEMENT_NODE) 217 { 218 String subDataName = subDataNode.getLocalName(); 219 ModelItem childDefinition = repeaterDefinition.getChild(subDataName); 220 221 _importAttribute(entryData, childDefinition, subDataNode); 222 } 223 } 224 } 225 } 226 } 227 228 private void _setComposite(ModifiableModelAwareDataHolder dataHolder, CompositeDefinition compositeDefinition, Node compositeNode) throws IOException 229 { 230 NodeList subDataNodes = compositeNode.getChildNodes(); 231 if (subDataNodes.getLength() > 0) 232 { 233 ModifiableModelAwareComposite compositeData = dataHolder.getComposite(compositeDefinition.getName(), true); 234 for (int i = 0; i < subDataNodes.getLength(); i++) 235 { 236 Node subDataNode = subDataNodes.item(i); 237 if (subDataNode.getNodeType() == Node.ELEMENT_NODE) 238 { 239 String subDataName = subDataNode.getLocalName(); 240 ModelItem childDefinition = compositeDefinition.getChild(subDataName); 241 242 _importAttribute(compositeData, childDefinition, subDataNode); 243 } 244 } 245 } 246 } 247 248 @SuppressWarnings("unchecked") 249 private <T> void _setAttribute(ModifiableModelAwareDataHolder dataHolder, ElementDefinition<T> attributeDefinition, Node attributeNode) throws IOException 250 { 251 ElementType<T> type = attributeDefinition.getType(); 252 253 if (attributeDefinition.isMultiple()) 254 { 255 NodeList valuesNodeList = _xPathProcessor.selectNodeList(attributeNode, "value"); 256 List<T> values = new ArrayList<>(); 257 for (int i = 0; i < valuesNodeList.getLength(); i++) 258 { 259 _getSingleAttributeValue(valuesNodeList.item(i), type) 260 .ifPresent(value -> values.add(value)); 261 } 262 263 if (!values.isEmpty()) 264 { 265 T[] valuesAsArray = (T[]) Array.newInstance(type.getManagedClass(), values.size()); 266 dataHolder.setValue(attributeDefinition.getName(), values.toArray(valuesAsArray)); 267 } 268 } 269 else 270 { 271 _getSingleAttributeValue(attributeNode, type) 272 .ifPresent(value -> dataHolder.setValue(attributeDefinition.getName(), value)); 273 } 274 } 275 276 @SuppressWarnings("unchecked") 277 private <T> Optional<T> _getSingleAttributeValue(Node valueNode, ElementType<T> type) throws IOException 278 { 279 String id = type.getId(); 280 if (ModelItemTypeConstants.BINARY_ELEMENT_TYPE_ID.equals(id) || ModelItemTypeConstants.FILE_ELEMENT_TYPE_ID.equals(id)) 281 { 282 return (Optional<T>) _getSingleBinaryAttributeValue(valueNode); 283 } 284 else if (ModelItemTypeConstants.GEOCODE_ELEMENT_TYPE_ID.equals(id)) 285 { 286 return (Optional<T>) _getSingleGeocodeAttributeValue(valueNode); 287 } 288 else if (ModelItemTypeConstants.RICH_TEXT_ELEMENT_TYPE_ID.equals(id)) 289 { 290 return (Optional<T>) _getSingleRichTextAttributeValue(valueNode); 291 } 292 else 293 { 294 return _getSingleDefaultAttributeValue(valueNode, type); 295 } 296 } 297 298 private Optional<Geocode> _getSingleGeocodeAttributeValue(Node geocodeNode) 299 { 300 Node latitudeNode = _xPathProcessor.selectSingleNode(geocodeNode, "latitude"); 301 String latitude = latitudeNode.getTextContent(); 302 303 Node longitudeNode = _xPathProcessor.selectSingleNode(geocodeNode, "longitude"); 304 String longitude = longitudeNode.getTextContent(); 305 306 if (StringUtils.isNotEmpty(latitude) && StringUtils.isNotEmpty(longitude)) 307 { 308 return Optional.of(new Geocode(Double.valueOf(latitude), Double.valueOf(longitude))); 309 } 310 else 311 { 312 throw new IllegalArgumentException("Invalid geocode values: latitude='" + latitude + "', longitude='" + longitude + "'."); 313 } 314 } 315 316 private Optional<Binary> _getSingleBinaryAttributeValue(Node binaryNode) 317 { 318 String value = binaryNode.getTextContent(); 319 if (StringUtils.isNotEmpty(value)) 320 { 321 try 322 { 323 Pattern pattern = Pattern.compile("filename=\"([^\"]+)\""); 324 325 URL url = new URL(value); 326 HttpURLConnection connection = (HttpURLConnection) url.openConnection(); 327 connection.setConnectTimeout(1000); 328 connection.setReadTimeout(2000); 329 330 String contentType = Objects.toString(connection.getContentType(), "application/unknown"); 331 String contentEncoding = Objects.toString(connection.getContentEncoding(), ""); 332 String contentDisposition = Objects.toString(connection.getHeaderField("Content-Disposition"), ""); 333 String filename = URIUtils.decode(FilenameUtils.getName(connection.getURL().getPath())); 334 if (StringUtils.isEmpty(filename)) 335 { 336 Matcher matcher = pattern.matcher(contentDisposition); 337 if (matcher.matches()) 338 { 339 filename = matcher.group(1); 340 } 341 else 342 { 343 filename = "unknown"; 344 } 345 } 346 347 try (InputStream is = connection.getInputStream()) 348 { 349 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 350 IOUtils.copy(is, bos); 351 352 Binary binary = new Binary(); 353 binary.setLastModificationDate(ZonedDateTime.now()); 354 binary.setInputStream(new ByteArrayInputStream(bos.toByteArray())); 355 356 if (StringUtils.isNotEmpty(filename)) 357 { 358 binary.setFilename(filename); 359 } 360 if (StringUtils.isNotEmpty(contentType)) 361 { 362 binary.setMimeType(contentType); 363 } 364 if (StringUtils.isNotEmpty(contentEncoding)) 365 { 366 binary.setEncoding(contentEncoding); 367 } 368 369 return Optional.of(binary); 370 } 371 } 372 catch (Exception e) 373 { 374 throw new IllegalArgumentException("Unable to fetch file from URL '" + value + "', it will be ignored.", e); 375 } 376 } 377 else 378 { 379 return Optional.empty(); 380 } 381 } 382 383 private Optional<RichText> _getSingleRichTextAttributeValue(Node richTextNode) throws IOException 384 { 385 NodeList docbookNodes = richTextNode.getChildNodes(); 386 for (int i = 0; i < docbookNodes.getLength(); i++) 387 { 388 Node docbookNode = docbookNodes.item(i); 389 if (docbookNode.getNodeType() == Node.ELEMENT_NODE && "article".equals(docbookNode.getLocalName())) 390 { 391 try 392 { 393 String docbook = _serializeNode(docbookNode); 394 395 RichText richText = new RichText(); 396 richText.setEncoding("UTF-8"); 397 richText.setLastModificationDate(ZonedDateTime.now()); 398 richText.setMimeType("text/xml"); 399 richText.setInputStream(new ByteArrayInputStream(docbook.getBytes("UTF-8"))); 400 return Optional.of(richText); 401 } 402 catch (TransformerException e) 403 { 404 throw new IOException("Error serializing a docbook node.", e); 405 } 406 } 407 } 408 409 // No article found, return an empty Optional 410 return Optional.empty(); 411 } 412 413 private String _serializeNode(Node node) throws TransformerException 414 { 415 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 416 417 Properties format = new Properties(); 418 format.put(OutputKeys.METHOD, "xml"); 419 format.put(OutputKeys.ENCODING, "UTF-8"); 420 421 transformer.setOutputProperties(format); 422 423 StringWriter writer = new StringWriter(); 424 DOMSource domSource = new DOMSource(node); 425 StreamResult result = new StreamResult(writer); 426 427 transformer.transform(domSource, result); 428 429 return writer.toString(); 430 } 431 432 private <T> Optional<T> _getSingleDefaultAttributeValue(Node valueNode, ElementType<T> type) 433 { 434 String valueAsString = valueNode.getTextContent(); 435 return Optional.of(type.castValue(valueAsString)); 436 } 437}