001/* 002 * Copyright 2016 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.webcontentio.xml; 017 018import java.io.ByteArrayInputStream; 019import java.io.ByteArrayOutputStream; 020import java.io.File; 021import java.io.FileInputStream; 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.InputStreamReader; 026import java.io.Reader; 027import java.io.StringWriter; 028import java.io.UnsupportedEncodingException; 029import java.net.HttpURLConnection; 030import java.net.URL; 031import java.net.URLDecoder; 032import java.util.Date; 033import java.util.Map; 034import java.util.Properties; 035import java.util.regex.Matcher; 036import java.util.regex.Pattern; 037 038import javax.xml.transform.OutputKeys; 039import javax.xml.transform.Transformer; 040import javax.xml.transform.TransformerException; 041import javax.xml.transform.TransformerFactory; 042import javax.xml.transform.dom.DOMSource; 043import javax.xml.transform.stream.StreamResult; 044 045import org.apache.avalon.framework.logger.AbstractLogEnabled; 046import org.apache.avalon.framework.service.ServiceException; 047import org.apache.avalon.framework.service.ServiceManager; 048import org.apache.avalon.framework.service.Serviceable; 049import org.apache.commons.io.FilenameUtils; 050import org.apache.commons.io.IOUtils; 051import org.apache.commons.lang.StringUtils; 052import org.apache.excalibur.xml.dom.DOMParser; 053import org.apache.excalibur.xml.xpath.XPathProcessor; 054import org.joda.time.format.ISODateTimeFormat; 055import org.w3c.dom.Document; 056import org.w3c.dom.Node; 057import org.w3c.dom.NodeList; 058import org.xml.sax.InputSource; 059import org.xml.sax.SAXException; 060 061import org.ametys.cms.contenttype.ContentTypesHelper; 062import org.ametys.cms.contenttype.MetadataDefinition; 063import org.ametys.cms.contenttype.RepeaterDefinition; 064import org.ametys.cms.repository.Content; 065import org.ametys.plugins.repository.metadata.ModifiableBinaryMetadata; 066import org.ametys.plugins.repository.metadata.ModifiableCompositeMetadata; 067import org.ametys.plugins.repository.metadata.ModifiableRichText; 068import org.ametys.plugins.webcontentio.ContentImporter; 069import org.ametys.runtime.parameter.ParameterHelper; 070import org.ametys.runtime.parameter.ParameterHelper.ParameterType; 071import org.ametys.web.repository.content.ModifiableWebContent; 072import org.ametys.web.repository.page.ModifiablePage; 073 074/** 075 * Default XML content importer 076 */ 077public class XmlContentImporter extends AbstractLogEnabled implements ContentImporter, Serviceable 078{ 079 private DOMParser _domParser; 080 private XPathProcessor _xPathProcessor; 081 private ContentTypesHelper _cTypeHelper; 082 083 @Override 084 public void service(ServiceManager manager) throws ServiceException 085 { 086 _domParser = (DOMParser) manager.lookup(DOMParser.ROLE); 087 _xPathProcessor = (XPathProcessor) manager.lookup(XPathProcessor.ROLE); 088 _cTypeHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 089 } 090 091 @Override 092 public void importContent(File file, ModifiableWebContent content, Map<String, String> params) throws IOException 093 { 094 Document doc = getXmlDocFromFile(file); 095 096 if (doc == null) 097 { 098 throw new IOException("Unable to retrieve the xml document from the file received."); 099 } 100 101 Node xmlContent = _xPathProcessor.selectSingleNode(doc, "/content"); 102 String contentType = _xPathProcessor.evaluateAsString(xmlContent, "@type"); 103 104 if (contentType == null) 105 { 106 throw new IOException("Invalid file content : no content type specified."); 107 } 108 content.setTypes(new String[] {contentType}); 109 110 Node title = _xPathProcessor.selectSingleNode(xmlContent, "title"); 111 112 if (title == null) 113 { 114 throw new IOException("Invalid file content : no title found, but it is mandatory."); 115 } 116 117 _importMetadata(content, xmlContent); 118 } 119 120 @Override 121 public String[] getMimeTypes() 122 { 123 // handles xml mime-types 124 return new String[] {"application/xml", "text/xml"}; 125 } 126 127 @Override 128 public void postTreatment(ModifiablePage page, Content content, File file) throws IOException 129 { 130 // Nothing to do 131 } 132 133 private Document getXmlDocFromFile(File file) throws FileNotFoundException, UnsupportedEncodingException, IOException 134 { 135 InputStream is = new FileInputStream(file); 136 Reader reader = new InputStreamReader(is, "UTF-8"); 137 Document doc = null; 138 try 139 { 140 doc = _domParser.parseDocument(new InputSource(reader)); 141 } 142 catch (SAXException e) 143 { 144 getLogger().error("[IMPORT] Unable to parse imported file " + file.getName(), e); 145 } 146 return doc; 147 } 148 149 private void _importMetadata(ModifiableWebContent content, Node domNode) throws IOException 150 { 151 NodeList childNodes = domNode.getChildNodes(); 152 for (int i = 0; i < childNodes.getLength(); i++) 153 { 154 Node metadataNode = childNodes.item(i); 155 156 if (metadataNode.getNodeType() == Node.ELEMENT_NODE) 157 { 158 MetadataDefinition metaDef = _cTypeHelper.getMetadataDefinition(metadataNode.getLocalName(), content); 159 _importMetadata(content.getMetadataHolder(), metaDef, metadataNode); 160 } 161 } 162 } 163 164 private void _importMetadata(ModifiableCompositeMetadata contentNode, MetadataDefinition metaDef, Node domNode) throws IOException 165 { 166 if (metaDef != null) 167 { 168 if (metaDef instanceof RepeaterDefinition) 169 { 170 _setRepeater(contentNode, (RepeaterDefinition) metaDef, domNode); 171 } 172 else 173 { 174 _setMetadata(contentNode, metaDef, domNode); 175 } 176 } 177 } 178 179 private void _setRepeater(ModifiableCompositeMetadata meta, RepeaterDefinition repeaterDef, Node domNode) throws IOException 180 { 181 ModifiableCompositeMetadata repeaterMeta = meta.getCompositeMetadata(repeaterDef.getName(), true); 182 183 NodeList entryNodes = _xPathProcessor.selectNodeList(domNode, "entry"); 184 for (int i = 0; i < entryNodes.getLength(); i++) 185 { 186 Node entryNode = entryNodes.item(i); 187 String entryName = Integer.toString(i + 1); 188 189 ModifiableCompositeMetadata entryMeta = repeaterMeta.getCompositeMetadata(entryName, true); 190 NodeList subMetaNodes = entryNode.getChildNodes(); 191 for (int j = 0; j < subMetaNodes.getLength(); j++) 192 { 193 Node subMetaNode = subMetaNodes.item(j); 194 if (subMetaNode.getNodeType() == Node.ELEMENT_NODE) 195 { 196 String subMetaName = subMetaNode.getLocalName(); 197 MetadataDefinition childDef = repeaterDef.getMetadataDefinition(subMetaName); 198 199 _importMetadata(entryMeta, childDef, subMetaNode); 200 } 201 } 202 } 203 } 204 205 private void _setMetadata(ModifiableCompositeMetadata contentNode, MetadataDefinition metaDef, Node domNode) throws IOException 206 { 207 switch (metaDef.getType()) 208 { 209 case STRING: 210 setStringMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode)); 211 break; 212 case BOOLEAN: 213 setBooleanMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode)); 214 break; 215 case DOUBLE: 216 setDoubleMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode)); 217 break; 218 case LONG: 219 setLongMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode)); 220 break; 221 case COMPOSITE: 222 setCompositeMetadata(contentNode, metaDef, domNode); 223 break; 224 case DATE: 225 case DATETIME: 226 setDateMetadata(contentNode, metaDef, getMetadataValues(metaDef, domNode)); 227 break; 228 case BINARY: 229 case FILE: 230 String[] values = getMetadataValues(metaDef, domNode); 231 if (values.length > 0) 232 { 233 setBinaryMetadata(contentNode, metaDef, values[0]); 234 } 235 break; 236 case GEOCODE: 237 Node latitudeNode = _xPathProcessor.selectSingleNode(domNode, "latitude"); 238 Node longitudeNode = _xPathProcessor.selectSingleNode(domNode, "longitude"); 239 setGeocodeMetadata(contentNode, metaDef, latitudeNode.getTextContent(), longitudeNode.getTextContent()); 240 break; 241 case REFERENCE: 242 break; 243 case RICH_TEXT: 244 setRichTextMetadata(contentNode, domNode, metaDef.getName()); 245 break; 246 case USER: 247 break; 248 default: 249 break; 250 } 251 } 252 253 private String[] getMetadataValues(MetadataDefinition metaDef, Node domNode) 254 { 255 String[] values; 256 if (metaDef.isMultiple()) 257 { 258 NodeList valuesNodeList = _xPathProcessor.selectNodeList(domNode, "value"); 259 values = new String[valuesNodeList.getLength()]; 260 for (int i = 0; i < valuesNodeList.getLength(); i++) 261 { 262 values[i] = valuesNodeList.item(i).getTextContent(); 263 } 264 } 265 else 266 { 267 values = new String[1]; 268 values[0] = domNode.getTextContent(); 269 } 270 return values; 271 } 272 273 private void setStringMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values) 274 { 275 if (values != null) 276 { 277 if (metaDef.isMultiple()) 278 { 279 meta.setMetadata(metaDef.getName(), values); 280 } 281 else 282 { 283 meta.setMetadata(metaDef.getName(), values[0]); 284 } 285 } 286 } 287 288 /** 289 * Set a boolean metadata. 290 * @param meta the metadata holder. 291 * @param metaDef the metadata definition. 292 * @param values the metadata values. 293 */ 294 protected void setBooleanMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values) 295 { 296 if (values != null) 297 { 298 if (metaDef.isMultiple()) 299 { 300 boolean[] bValues = new boolean[values.length]; 301 for (int i = 0; i < values.length; i++) 302 { 303 bValues[i] = Boolean.parseBoolean(values[i]); 304 } 305 306 meta.setMetadata(metaDef.getName(), bValues); 307 } 308 else 309 { 310 meta.setMetadata(metaDef.getName(), Boolean.parseBoolean(values[0])); 311 } 312 } 313 } 314 315 /** 316 * Set a long metadata. 317 * @param meta the metadata holder. 318 * @param metaDef the metadata definition. 319 * @param values the metadata values. 320 */ 321 protected void setLongMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values) 322 { 323 if (values != null) 324 { 325 if (metaDef.isMultiple()) 326 { 327 long[] lValues = new long[values.length]; 328 for (int i = 0; i < values.length; i++) 329 { 330 lValues[i] = Long.parseLong(values[i]); 331 } 332 333 meta.setMetadata(metaDef.getName(), lValues); 334 } 335 else 336 { 337 meta.setMetadata(metaDef.getName(), Long.parseLong(values[0])); 338 } 339 } 340 } 341 342 /** 343 * Set a double metadata. 344 * @param meta the metadata holder. 345 * @param metaDef the metadata definition. 346 * @param values the metadata values. 347 */ 348 protected void setDoubleMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values) 349 { 350 if (values != null) 351 { 352 if (metaDef.isMultiple()) 353 { 354 double[] dValues = new double[values.length]; 355 for (int i = 0; i < values.length; i++) 356 { 357 dValues[i] = Double.parseDouble(values[i]); 358 } 359 360 meta.setMetadata(metaDef.getName(), dValues); 361 } 362 else 363 { 364 meta.setMetadata(metaDef.getName(), Double.parseDouble(values[0])); 365 } 366 } 367 } 368 369 /** 370 * Set a date or datetime metadata. 371 * @param meta the metadata holder. 372 * @param metaDef the metadata definition. 373 * @param values the metadata values. 374 */ 375 protected void setDateMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String[] values) 376 { 377 if (values != null) 378 { 379 if (metaDef.isMultiple()) 380 { 381 Date[] dValues = new Date[values.length]; 382 for (int i = 0; i < values.length; i++) 383 { 384 dValues[i] = parseDate(values[i]); 385 } 386 387 meta.setMetadata(metaDef.getName(), dValues); 388 } 389 else 390 { 391 meta.setMetadata(metaDef.getName(), parseDate(values[0])); 392 } 393 } 394 } 395 396 /** 397 * Parse a String value as a Date.<br> 398 * Allowed formats: 399 * <ul> 400 * <li>yyyy-MM-dd</li> 401 * <li>yyyy-MM-dd'T'HH:mm:ss.SSSZZ</li> 402 * </ul> 403 * @param value the String value. 404 * @return the parsed Date or <code>null</code> if the value can't be parsed. 405 */ 406 protected Date parseDate(String value) 407 { 408 return parseDate(value, false); 409 } 410 411 /** 412 * Parse a String value as a Date.<br> 413 * Allowed formats: 414 * <ul> 415 * <li>yyyy-MM-dd</li> 416 * <li>yyyy-MM-dd'T'HH:mm:ss.SSSZZ</li> 417 * </ul> 418 * @param value the String value. 419 * @param throwException true to throw an exception if the value can't be parsed, false to return null. 420 * @return the parsed Date or <code>null</code> if the value can't be parsed and throwException is false. 421 */ 422 protected Date parseDate(String value, boolean throwException) 423 { 424 Date dateValue = null; 425 426 try 427 { 428 dateValue = ISODateTimeFormat.date().parseDateTime(value).toDate(); 429 } 430 catch (Exception e) 431 { 432 dateValue = (Date) ParameterHelper.castValue(value, ParameterType.DATE); 433 } 434 435 if (dateValue == null && throwException) 436 { 437 throw new IllegalArgumentException("'" + value + "' could not be cast as a Date."); 438 } 439 440 return dateValue; 441 } 442 443 /** 444 * Set a geocode metadata. 445 * @param meta the metadata holder. 446 * @param metaDef the metadata definition. 447 * @param latitude the geocode latitude as a String. 448 * @param longitude the geocode longitude as a String. 449 */ 450 protected void setGeocodeMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String latitude, String longitude) 451 { 452 if (StringUtils.isNotEmpty(latitude) && StringUtils.isNotEmpty(longitude)) 453 { 454 double dLong = Double.parseDouble(longitude); 455 double dLat = Double.parseDouble(latitude); 456 457 ModifiableCompositeMetadata geoCode = meta.getCompositeMetadata(metaDef.getName(), true); 458 geoCode.setMetadata("longitude", dLong); 459 geoCode.setMetadata("latitude", dLat); 460 } 461 else 462 { 463 throw new IllegalArgumentException("Invalid geocode values: latitude='" + latitude + "', longitude='" + longitude + "'."); 464 } 465 } 466 467 /** 468 * Set a composite metadata. 469 * @param meta the metadata holder. 470 * @param domNode the metadata DOM node. 471 * @param metaDef the metadata definition. 472 * @throws IOException If an error occurres 473 */ 474 protected void setCompositeMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, Node domNode) throws IOException 475 { 476 NodeList subMetaNodes = domNode.getChildNodes(); 477 if (subMetaNodes.getLength() > 0) 478 { 479 ModifiableCompositeMetadata composite = meta.getCompositeMetadata(metaDef.getName(), true); 480 for (int i = 0; i < subMetaNodes.getLength(); i++) 481 { 482 Node subMetaNode = subMetaNodes.item(i); 483 if (subMetaNode.getNodeType() == Node.ELEMENT_NODE) 484 { 485 MetadataDefinition childDef = metaDef.getMetadataDefinition(subMetaNode.getLocalName()); 486 487 _importMetadata(composite, childDef, subMetaNode); 488 } 489 } 490 } 491 } 492 493 /** 494 * Set a file metadata. 495 * @param meta the metadata holder. 496 * @param metaDef The metadata definition 497 * @param value The value 498 */ 499 protected void setBinaryMetadata(ModifiableCompositeMetadata meta, MetadataDefinition metaDef, String value) 500 { 501 if (StringUtils.isNotEmpty(value)) 502 { 503 504 try 505 { 506 Pattern pattern = Pattern.compile("filename=\"([^\"]+)\""); 507 508 URL url = new URL(value); 509 HttpURLConnection connection = (HttpURLConnection) url.openConnection(); 510 connection.setConnectTimeout(1000); 511 connection.setReadTimeout(2000); 512 513 String contentType = StringUtils.defaultString(connection.getContentType(), "application/unknown"); 514 String contentEncoding = StringUtils.defaultString(connection.getContentEncoding(), ""); 515 String contentDisposition = StringUtils.defaultString(connection.getHeaderField("Content-Disposition"), ""); 516 String filename = URLDecoder.decode(FilenameUtils.getName(connection.getURL().getPath()), "UTF-8"); 517 if (StringUtils.isEmpty(filename)) 518 { 519 Matcher matcher = pattern.matcher(contentDisposition); 520 if (matcher.matches()) 521 { 522 filename = matcher.group(1); 523 } 524 else 525 { 526 filename = "unknown"; 527 } 528 } 529 530 try (InputStream is = connection.getInputStream()) 531 { 532 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 533 IOUtils.copy(is, bos); 534 535 ModifiableBinaryMetadata binaryMeta = meta.getBinaryMetadata(metaDef.getName(), true); 536 binaryMeta.setLastModified(new Date()); 537 binaryMeta.setInputStream(new ByteArrayInputStream(bos.toByteArray())); 538 539 if (StringUtils.isNotEmpty(filename)) 540 { 541 binaryMeta.setFilename(filename); 542 } 543 if (StringUtils.isNotEmpty(contentType)) 544 { 545 binaryMeta.setMimeType(contentType); 546 } 547 if (StringUtils.isNotEmpty(contentEncoding)) 548 { 549 binaryMeta.setEncoding(contentEncoding); 550 } 551 } 552 } 553 catch (Exception e) 554 { 555 throw new IllegalArgumentException("Unable to fetch file from URL '" + value + "', it will be ignored.", e); 556 } 557 } 558 } 559 560 /** 561 * Set a RichText metadata. 562 * @param meta the metadata holder. 563 * @param domNode the metadata node. 564 * @param name the metadata name. 565 * @throws IOException if an error occurs. 566 */ 567 protected void setRichTextMetadata(ModifiableCompositeMetadata meta, Node domNode, String name) throws IOException 568 { 569 NodeList docbookNodes = domNode.getChildNodes(); 570 for (int i = 0; i < docbookNodes.getLength(); i++) 571 { 572 Node docbookNode = docbookNodes.item(i); 573 if (docbookNode.getNodeType() == Node.ELEMENT_NODE && "article".equals(docbookNode.getLocalName())) 574 { 575 try 576 { 577 String docbook = serializeNode(docbookNode); 578 ModifiableRichText richText = meta.getRichText(name, true); 579 580 richText.setEncoding("UTF-8"); 581 richText.setLastModified(new Date()); 582 richText.setMimeType("text/xml"); 583 richText.setInputStream(new ByteArrayInputStream(docbook.getBytes("UTF-8"))); 584 } 585 catch (TransformerException e) 586 { 587 throw new IOException("Error serializing a docbook node.", e); 588 } 589 } 590 } 591 } 592 593 /** 594 * Serialize a XML node as a String. 595 * @param node the node. 596 * @return the XML string. 597 * @throws TransformerException if an error occurs. 598 */ 599 protected String serializeNode(Node node) throws TransformerException 600 { 601 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 602 603 Properties format = new Properties(); 604 format.put(OutputKeys.METHOD, "xml"); 605 format.put(OutputKeys.ENCODING, "UTF-8"); 606 607 transformer.setOutputProperties(format); 608 609 StringWriter writer = new StringWriter(); 610 DOMSource domSource = new DOMSource(node); 611 StreamResult result = new StreamResult(writer); 612 613 transformer.transform(domSource, result); 614 615 return writer.toString(); 616 } 617}