001/* 002 * Copyright 2016 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.webcontentio.xml; 017 018import java.io.ByteArrayInputStream; 019import java.io.ByteArrayOutputStream; 020import java.io.File; 021import java.io.FileInputStream; 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.InputStreamReader; 026import java.io.Reader; 027import java.io.StringWriter; 028import java.io.UnsupportedEncodingException; 029import java.net.HttpURLConnection; 030import java.net.URL; 031import java.net.URLDecoder; 032import java.time.ZonedDateTime; 033import java.time.format.DateTimeFormatter; 034import java.util.Date; 035import java.util.Map; 036import java.util.Properties; 037import java.util.regex.Matcher; 038import java.util.regex.Pattern; 039 040import javax.xml.transform.OutputKeys; 041import javax.xml.transform.Transformer; 042import javax.xml.transform.TransformerException; 043import javax.xml.transform.TransformerFactory; 044import javax.xml.transform.dom.DOMSource; 045import javax.xml.transform.stream.StreamResult; 046 047import org.apache.avalon.framework.logger.AbstractLogEnabled; 048import org.apache.avalon.framework.service.ServiceException; 049import org.apache.avalon.framework.service.ServiceManager; 050import org.apache.avalon.framework.service.Serviceable; 051import org.apache.commons.io.FilenameUtils; 052import org.apache.commons.io.IOUtils; 053import org.apache.commons.lang.StringUtils; 054import org.apache.excalibur.xml.dom.DOMParser; 055import org.apache.excalibur.xml.xpath.XPathProcessor; 056import org.w3c.dom.Document; 057import org.w3c.dom.Node; 058import org.w3c.dom.NodeList; 059import org.xml.sax.InputSource; 060import org.xml.sax.SAXException; 061 062import org.ametys.cms.contenttype.ContentTypesHelper; 063import org.ametys.cms.contenttype.MetadataDefinition; 064import org.ametys.cms.contenttype.RepeaterDefinition; 065import org.ametys.cms.repository.Content; 066import org.ametys.plugins.repository.metadata.ModifiableBinaryMetadata; 067import org.ametys.plugins.repository.metadata.ModifiableCompositeMetadata; 068import org.ametys.plugins.repository.metadata.ModifiableRichText; 069import org.ametys.plugins.webcontentio.ContentImporter; 070import org.ametys.runtime.parameter.ParameterHelper; 071import org.ametys.runtime.parameter.ParameterHelper.ParameterType; 072import org.ametys.web.repository.content.ModifiableWebContent; 073import org.ametys.web.repository.page.ModifiablePage; 074 075/** 076 * Default XML content importer 077 */ 078public class XmlContentImporter extends AbstractLogEnabled implements ContentImporter, Serviceable 079{ 080 private DOMParser _domParser; 081 private XPathProcessor _xPathProcessor; 082 private ContentTypesHelper _cTypeHelper; 083 084 @Override 085 public void service(ServiceManager manager) throws ServiceException 086 { 087 _domParser = (DOMParser) manager.lookup(DOMParser.ROLE); 088 _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE); 089 _cTypeHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 090 } 091 092 @Override 093 public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException 094 { 095 Document doc = getXmlDocFromFile(file); 096 097 if (doc == null) 098 { 099 throw new IOException("Unable to retrieve the xml document from the file received."); 100 } 101 102 Node xmlContent = _xPathProcessor.selectSingleNode(doc, "/content"); 103 String contentType = _xPathProcessor.evaluateAsString(xmlContent, "@type"); 104 105 if (contentType == null) 106 { 107 throw new IOException("Invalid file content : no content type specified."); 108 } 109 content.setTypes(new String[] {contentType}); 110 111 Node title = _xPathProcessor.selectSingleNode(xmlContent, "title"); 112 113 if (title == null) 114 { 115 throw new IOException("Invalid file content : no title found, but it is mandatory."); 116 } 117 118 _importMetadata(content, xmlContent); 119 } 120 121 @Override 122 public String[] getMimeTypes() 123 { 124 // handles xml mime-types 125 return new String[] {"application/xml", "text/xml"}; 126 } 127 128 @Override 129 public void postTreatment(ModifiablePage page, Content content, File file) throws IOException 130 { 131 // Nothing to do 132 } 133 134 private Document getXmlDocFromFile(File file) throws FileNotFoundException, UnsupportedEncodingException, IOException 135 { 136 InputStream is = new FileInputStream(file); 137 Reader reader = new InputStreamReader(is, "UTF-8"); 138 Document doc = null; 139 try 140 { 141 doc = _domParser.parseDocument(new InputSource(reader)); 142 } 143 catch (SAXException e) 144 { 145 getLogger().error("[IMPORT] Unable to parse imported file " + file.getName(), e); 146 } 147 return doc; 148 } 149 150 private void _importMetadata(ModifiableWebContent content, Node domNode) throws IOException 151 { 152 NodeList childNodes = domNode.getChildNodes(); 153 for (int i = 0; i < childNodes.getLength(); i++) 154 { 155 Node metadataNode = childNodes.item(i); 156 157 if (metadataNode.getNodeType() == Node.ELEMENT_NODE) 158 { 159 MetadataDefinition metaDef = _cTypeHelper.getMetadataDefinition(metadataNode.getLocalName(), content); 160 _importMetadata(content.getMetadataHolder(), metaDef, metadataNode); 161 } 162 } 163 } 164 165 private void _importMetadata(ModifiableCompositeMetadata contentNode, MetadataDefinition metaDef, Node domNode) throws IOException 166 { 167 if (metaDef != null) 168 { 169 if (metaDef instanceof RepeaterDefinition) 170 { 171 _setRepeater(contentNode, (RepeaterDefinition) metaDef, domNode); 172 } 173 else 174 { 175 _setMetadata(contentNode, metaDef, domNode); 176 } 177 } 178 } 179 180 private void _setRepeater(ModifiableCompositeMetadata meta, RepeaterDefinition repeaterDef, Node domNode) throws IOException 181 { 182 ModifiableCompositeMetadata repeaterMeta = meta.getCompositeMetadata(repeaterDef.getName(), true); 183 184 NodeList entryNodes = _xPathProcessor.selectNodeList(domNode, "entry"); 185 for (int i = 0; i < entryNodes.getLength(); i++) 186 { 187 Node entryNode = entryNodes.item(i); 188 String entryName = Integer.toString(i + 1); 189 190 ModifiableCompositeMetadata entryMeta = repeaterMeta.getCompositeMetadata(entryName, true); 191 NodeList subMetaNodes = entryNode.getChildNodes(); 192 for (int j = 0; j < subMetaNodes.getLength(); j++) 193 { 194 Node subMetaNode = subMetaNodes.item(j); 195 if (subMetaNode.getNodeType() == Node.ELEMENT_NODE) 196 { 197 String subMetaName = subMetaNode.getLocalName(); 198 MetadataDefinition childDef = repeaterDef.getMetadataDefinition(subMetaName); 199 200 _importMetadata(entryMeta, childDef, subMetaNode); 201 } 202 } 203 } 204 } 205 206 private void _setMetadata(ModifiableCompositeMetadata contentNode, MetadataDefinition metaDef, Node domNode) throws IOException 207 { 208 switch (metaDef.getType()) 209 { 210 case STRING: 211 setStringMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode)); 212 break; 213 case BOOLEAN: 214 setBooleanMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode)); 215 break; 216 case DOUBLE: 217 setDoubleMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode)); 218 break; 219 case LONG: 220 setLongMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode)); 221 break; 222 case COMPOSITE: 223 setCompositeMetadata(contentNode, metaDef, domNode); 224 break; 225 case DATE: 226 case DATETIME: 227 setDateMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode)); 228 break; 229 case BINARY: 230 case FILE: 231 String[] values = getMetadataValues(metaDef, domNode); 232 if (values.length > 0) 233 { 234 setBinaryMetadata(contentNode, metaDef, values[0]); 235 } 236 break; 237 case GEOCODE: 238 Node latitudeNode = _xPathProcessor.selectSingleNode(domNode, "latitude"); 239 Node longitudeNode = _xPathProcessor.selectSingleNode(domNode, "longitude"); 240 setGeocodeMetadata(contentNode, metaDef, latitudeNode.getTextContent(), longitudeNode.getTextContent()); 241 break; 242 case REFERENCE: 243 break; 244 case RICH_TEXT: 245 setRichTextMetadata(contentNode, domNode, metaDef.getName()); 246 break; 247 case USER: 248 break; 249 default: 250 break; 251 } 252 } 253 254 private String[] getMetadataValues(MetadataDefinition metaDef, Node domNode) 255 { 256 String[] values; 257 if (metaDef.isMultiple()) 258 { 259 NodeList valuesNodeList = _xPathProcessor.selectNodeList(domNode, "value"); 260 values = new String[valuesNodeList.getLength()]; 261 for (int i = 0; i < valuesNodeList.getLength(); i++) 262 { 263 values[i] = valuesNodeList.item(i).getTextContent(); 264 } 265 } 266 else 267 { 268 values = new String[1]; 269 values[0] = domNode.getTextContent(); 270 } 271 return values; 272 } 273 274 private void setStringMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values) 275 { 276 if (values != null) 277 { 278 if (metaDef.isMultiple()) 279 { 280 meta.setMetadata(metaDef.getName(), values); 281 } 282 else 283 { 284 meta.setMetadata(metaDef.getName(), values[0]); 285 } 286 } 287 } 288 289 /** 290 * Set a boolean metadata. 291 * @param meta the metadata holder. 292 * @param metaDef the metadata definition. 293 * @param values the metadata values. 294 */ 295 protected void setBooleanMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values) 296 { 297 if (values != null) 298 { 299 if (metaDef.isMultiple()) 300 { 301 boolean[] bValues = new boolean[values.length]; 302 for (int i = 0; i < values.length; i++) 303 { 304 bValues[i] = Boolean.parseBoolean(values[i]); 305 } 306 307 meta.setMetadata(metaDef.getName(), bValues); 308 } 309 else 310 { 311 meta.setMetadata(metaDef.getName(), Boolean.parseBoolean(values[0])); 312 } 313 } 314 } 315 316 /** 317 * Set a long metadata. 318 * @param meta the metadata holder. 319 * @param metaDef the metadata definition. 320 * @param values the metadata values. 321 */ 322 protected void setLongMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values) 323 { 324 if (values != null) 325 { 326 if (metaDef.isMultiple()) 327 { 328 long[] lValues = new long[values.length]; 329 for (int i = 0; i < values.length; i++) 330 { 331 lValues[i] = Long.parseLong(values[i]); 332 } 333 334 meta.setMetadata(metaDef.getName(), lValues); 335 } 336 else 337 { 338 meta.setMetadata(metaDef.getName(), Long.parseLong(values[0])); 339 } 340 } 341 } 342 343 /** 344 * Set a double metadata. 345 * @param meta the metadata holder. 346 * @param metaDef the metadata definition. 347 * @param values the metadata values. 348 */ 349 protected void setDoubleMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values) 350 { 351 if (values != null) 352 { 353 if (metaDef.isMultiple()) 354 { 355 double[] dValues = new double[values.length]; 356 for (int i = 0; i < values.length; i++) 357 { 358 dValues[i] = Double.parseDouble(values[i]); 359 } 360 361 meta.setMetadata(metaDef.getName(), dValues); 362 } 363 else 364 { 365 meta.setMetadata(metaDef.getName(), Double.parseDouble(values[0])); 366 } 367 } 368 } 369 370 /** 371 * Set a date or datetime metadata. 372 * @param meta the metadata holder. 373 * @param metaDef the metadata definition. 374 * @param values the metadata values. 375 */ 376 protected void setDateMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values) 377 { 378 if (values != null) 379 { 380 if (metaDef.isMultiple()) 381 { 382 Date[] dValues = new Date[values.length]; 383 for (int i = 0; i < values.length; i++) 384 { 385 dValues[i] = parseDate(values[i]); 386 } 387 388 meta.setMetadata(metaDef.getName(), dValues); 389 } 390 else 391 { 392 meta.setMetadata(metaDef.getName(), parseDate(values[0])); 393 } 394 } 395 } 396 397 /** 398 * Parse a String value as a Date.<br> 399 * Allowed formats: 400 * <ul> 401 * <li>yyyy-MM-dd</li> 402 * <li>yyyy-MM-dd'T'HH:mm:ss.SSSZZ</li> 403 * </ul> 404 * @param value the String value. 405 * @return the parsed Date or <code>null</code> if the value can't be parsed. 406 */ 407 protected Date parseDate(String value) 408 { 409 return parseDate(value, false); 410 } 411 412 /** 413 * Parse a String value as a Date.<br> 414 * Allowed formats: 415 * <ul> 416 * <li>yyyy-MM-dd</li> 417 * <li>yyyy-MM-dd'T'HH:mm:ss.SSSZZ</li> 418 * </ul> 419 * @param value the String value. 420 * @param throwException true to throw an exception if the value can't be parsed, false to return null. 421 * @return the parsed Date or <code>null</code> if the value can't be parsed and throwException is false. 422 */ 423 protected Date parseDate(String value, boolean throwException) 424 { 425 Date dateValue = null; 426 427 try 428 { 429 dateValue = Date.from(ZonedDateTime.parse(value, DateTimeFormatter.ISO_LOCAL_DATE).toInstant()); 430 } 431 catch (Exception e) 432 { 433 dateValue = (Date) ParameterHelper.castValue(value, ParameterType.DATE); 434 } 435 436 if (dateValue == null && throwException) 437 { 438 throw new IllegalArgumentException("'" + value + "' could not be cast as a Date."); 439 } 440 441 return dateValue; 442 } 443 444 /** 445 * Set a geocode metadata. 446 * @param meta the metadata holder. 447 * @param metaDef the metadata definition. 448 * @param latitude the geocode latitude as a String. 449 * @param longitude the geocode longitude as a String. 450 */ 451 protected void setGeocodeMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String latitude, String longitude) 452 { 453 if (StringUtils.isNotEmpty(latitude) && StringUtils.isNotEmpty(longitude)) 454 { 455 double dLong = Double.parseDouble(longitude); 456 double dLat = Double.parseDouble(latitude); 457 458 ModifiableCompositeMetadata geoCode = meta.getCompositeMetadata(metaDef.getName(), true); 459 geoCode.setMetadata("longitude", dLong); 460 geoCode.setMetadata("latitude", dLat); 461 } 462 else 463 { 464 throw new IllegalArgumentException("Invalid geocode values: latitude='" + latitude + "', longitude='" + longitude + "'."); 465 } 466 } 467 468 /** 469 * Set a composite metadata. 470 * @param meta the metadata holder. 471 * @param domNode the metadata DOM node. 472 * @param metaDef the metadata definition. 473 * @throws IOException If an error occurres 474 */ 475 protected void setCompositeMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, Node domNode) throws IOException 476 { 477 NodeList subMetaNodes = domNode.getChildNodes(); 478 if (subMetaNodes.getLength() > 0) 479 { 480 ModifiableCompositeMetadata composite = meta.getCompositeMetadata(metaDef.getName(), true); 481 for (int i = 0; i < subMetaNodes.getLength(); i++) 482 { 483 Node subMetaNode = subMetaNodes.item(i); 484 if (subMetaNode.getNodeType() == Node.ELEMENT_NODE) 485 { 486 MetadataDefinition childDef = metaDef.getMetadataDefinition(subMetaNode.getLocalName()); 487 488 _importMetadata(composite, childDef, subMetaNode); 489 } 490 } 491 } 492 } 493 494 /** 495 * Set a file metadata. 496 * @param meta the metadata holder. 497 * @param metaDef The metadata definition 498 * @param value The value 499 */ 500 protected void setBinaryMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String value) 501 { 502 if (StringUtils.isNotEmpty(value)) 503 { 504 505 try 506 { 507 Pattern pattern = Pattern.compile("filename=\"([^\"]+)\""); 508 509 URL url = new URL(value); 510 HttpURLConnection connection = (HttpURLConnection) url.openConnection(); 511 connection.setConnectTimeout(1000); 512 connection.setReadTimeout(2000); 513 514 String contentType = StringUtils.defaultString(connection.getContentType(), "application/unknown"); 515 String contentEncoding = StringUtils.defaultString(connection.getContentEncoding(), ""); 516 String contentDisposition = StringUtils.defaultString(connection.getHeaderField("Content-Disposition"), ""); 517 String filename = URLDecoder.decode(FilenameUtils.getName(connection.getURL().getPath()), "UTF-8"); 518 if (StringUtils.isEmpty(filename)) 519 { 520 Matcher matcher = pattern.matcher(contentDisposition); 521 if (matcher.matches()) 522 { 523 filename = matcher.group(1); 524 } 525 else 526 { 527 filename = "unknown"; 528 } 529 } 530 531 try (InputStream is = connection.getInputStream()) 532 { 533 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 534 IOUtils.copy(is, bos); 535 536 ModifiableBinaryMetadata binaryMeta = meta.getBinaryMetadata(metaDef.getName(), true); 537 binaryMeta.setLastModified(new Date()); 538 binaryMeta.setInputStream(new ByteArrayInputStream(bos.toByteArray())); 539 540 if (StringUtils.isNotEmpty(filename)) 541 { 542 binaryMeta.setFilename(filename); 543 } 544 if (StringUtils.isNotEmpty(contentType)) 545 { 546 binaryMeta.setMimeType(contentType); 547 } 548 if (StringUtils.isNotEmpty(contentEncoding)) 549 { 550 binaryMeta.setEncoding(contentEncoding); 551 } 552 } 553 } 554 catch (Exception e) 555 { 556 throw new IllegalArgumentException("Unable to fetch file from URL '" + value + "', it will be ignored.", e); 557 } 558 } 559 } 560 561 /** 562 * Set a RichText metadata. 563 * @param meta the metadata holder. 564 * @param domNode the metadata node. 565 * @param name the metadata name. 566 * @throws IOException if an error occurs. 567 */ 568 protected void setRichTextMetadata(ModifiableCompositeMetadata meta, Node domNode, String name) throws IOException 569 { 570 NodeList docbookNodes = domNode.getChildNodes(); 571 for (int i = 0; i < docbookNodes.getLength(); i++) 572 { 573 Node docbookNode = docbookNodes.item(i); 574 if (docbookNode.getNodeType() == Node.ELEMENT_NODE && "article".equals(docbookNode.getLocalName())) 575 { 576 try 577 { 578 String docbook = serializeNode(docbookNode); 579 ModifiableRichText richText = meta.getRichText(name, true); 580 581 richText.setEncoding("UTF-8"); 582 richText.setLastModified(new Date()); 583 richText.setMimeType("text/xml"); 584 richText.setInputStream(new ByteArrayInputStream(docbook.getBytes("UTF-8"))); 585 } 586 catch (TransformerException e) 587 { 588 throw new IOException("Error serializing a docbook node.", e); 589 } 590 } 591 } 592 } 593 594 /** 595 * Serialize a XML node as a String. 596 * @param node the node. 597 * @return the XML string. 598 * @throws TransformerException if an error occurs. 599 */ 600 protected String serializeNode(Node node) throws TransformerException 601 { 602 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 603 604 Properties format = new Properties(); 605 format.put(OutputKeys.METHOD, "xml"); 606 format.put(OutputKeys.ENCODING, "UTF-8"); 607 608 transformer.setOutputProperties(format); 609 610 StringWriter writer = new StringWriter(); 611 DOMSource domSource = new DOMSource(node); 612 StreamResult result = new StreamResult(writer); 613 614 transformer.transform(domSource, result); 615 616 return writer.toString(); 617 } 618}