001/* 002 * Copyright 2010 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.transformation.htmledition; 017 018import java.awt.image.BufferedImage; 019import java.io.ByteArrayInputStream; 020import java.io.IOException; 021import java.io.InputStream; 022import java.net.HttpURLConnection; 023import java.net.MalformedURLException; 024import java.net.URI; 025import java.net.URISyntaxException; 026import java.net.URL; 027import java.time.ZoneOffset; 028import java.time.ZonedDateTime; 029import java.util.Date; 030import java.util.HashSet; 031import java.util.Map; 032import java.util.Optional; 033import java.util.Set; 034import java.util.regex.Matcher; 035import java.util.regex.Pattern; 036import java.util.stream.Collectors; 037 038import org.apache.avalon.framework.context.ContextException; 039import org.apache.avalon.framework.service.ServiceException; 040import org.apache.avalon.framework.service.ServiceManager; 041import org.apache.cocoon.Constants; 042import org.apache.cocoon.components.ContextHelper; 043import org.apache.cocoon.environment.Context; 044import org.apache.cocoon.environment.ObjectModelHelper; 045import org.apache.cocoon.environment.Request; 046import org.apache.cocoon.xml.AttributesImpl; 047import org.apache.commons.io.IOUtils; 048import org.apache.commons.io.output.ByteArrayOutputStream; 049import org.apache.excalibur.source.Source; 050import org.apache.excalibur.source.SourceResolver; 051import org.xml.sax.Attributes; 052import org.xml.sax.SAXException; 053 054import org.ametys.cms.data.NamedResource; 055import org.ametys.cms.data.RichText; 056import org.ametys.core.upload.Upload; 057import org.ametys.core.upload.UploadManager; 058import org.ametys.core.user.CurrentUserProvider; 059import org.ametys.core.util.DateUtils; 060import org.ametys.core.util.ImageHelper; 061import org.ametys.plugins.explorer.resources.Resource; 062import org.ametys.plugins.repository.AmetysObjectResolver; 063import org.ametys.plugins.repository.AmetysRepositoryException; 064import org.ametys.plugins.repository.UnknownAmetysObjectException; 065import org.ametys.plugins.repository.metadata.File; 066import org.ametys.plugins.repository.metadata.ModifiableFile; 067import org.ametys.plugins.repository.metadata.ModifiableFolder; 068import org.ametys.plugins.repository.metadata.ModifiableResource; 069import org.ametys.plugins.repository.metadata.ModifiableRichText; 070 071/** 072 * This transformer extracts uploaded files' ids from the incoming HTML for further processing. 073 */ 074public class UploadedDataHTMLEditionHandler extends AbstractHTMLEditionHandler 075{ 076 private static final Pattern __INLINE_IMAGE_MARKER = Pattern.compile("^data:image/(png|jpeg|gif);base64,.*"); 077 078 private UploadManager _uploadManager; 079 private CurrentUserProvider _userProvider; 080 private SourceResolver _resolver; 081 private AmetysObjectResolver _ametysResolver; 082 private Context _cocoonContext; 083 084 private boolean _tagToIgnore; 085 private Set<String> _usedLocalFiles = new HashSet<>(); 086 private Object _richText; 087 private Map _objectModel; 088 089 090 @Override 091 public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException 092 { 093 super.contextualize(context); 094 _cocoonContext = (Context) _context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT); 095 } 096 097 @Override 098 public void service(ServiceManager sManager) throws ServiceException 099 { 100 super.service(sManager); 101 _uploadManager = (UploadManager) sManager.lookup(UploadManager.ROLE); 102 _userProvider = (CurrentUserProvider) sManager.lookup(CurrentUserProvider.ROLE); 103 _resolver = (SourceResolver) sManager.lookup(SourceResolver.ROLE); 104 _ametysResolver = (AmetysObjectResolver) sManager.lookup(AmetysObjectResolver.ROLE); 105 } 106 107 @Override 108 public void startDocument() throws SAXException 109 { 110 _tagToIgnore = false; 111 _objectModel = ContextHelper.getObjectModel(_context); 112 Map parentContextParameters = (Map) _objectModel.get(ObjectModelHelper.PARENT_CONTEXT); 113 _richText = parentContextParameters.get("richText"); 114 115 super.startDocument(); 116 } 117 118 @Override 119 public void startElement(String uri, String loc, String raw, Attributes attrs) throws SAXException 120 { 121 if ("img".equals(raw)) 122 { 123 String type = attrs.getValue("data-ametys-type"); 124 125 if ("temp".equals(type)) 126 { 127 Attributes newAttrs = _getAttributesForTemp(attrs); 128 super.startElement(uri, loc, raw, newAttrs); 129 return; 130 } 131 else if ("explorer".equals(type)) 132 { 133 Attributes newAttrs = _processResource(attrs); 134 super.startElement(uri, loc, raw, newAttrs); 135 return; 136 } 137 else if ("local".equals(type)) 138 { 139 Attributes newAttrs = _processLocal(attrs); 140 super.startElement(uri, loc, raw, newAttrs); 141 return; 142 } 143 else if (type == null && !"marker".equals(attrs.getValue("marker"))) 144 { 145 // image is copied from elsewhere, fetch it in the content 146 String src = attrs.getValue("src"); 147 if (src == null) 148 { 149 _tagToIgnore = true; 150 getLogger().warn("Don't know how to fetch image with no src attribute. Image is ignored."); 151 return; 152 } 153 154 // The final filename 155 String fileName = null; 156 // The new attributes, will be filled with image width and height. 157 AttributesImpl newAttrs = new AttributesImpl(); 158 159 Matcher m = __INLINE_IMAGE_MARKER.matcher(src); 160 if (m.matches()) 161 { 162 String mimetype = m.group(1); 163 String imageAsBase64 = src.substring(19 + mimetype.length()); 164 byte[] imageAsBytes = org.apache.commons.codec.binary.Base64.decodeBase64(imageAsBase64); 165 fileName = _storeFile("paste." + mimetype, new ByteArrayInputStream(imageAsBytes), null, null); 166 167 try (InputStream is = new ByteArrayInputStream(imageAsBytes)) 168 { 169 _addDimensionAttributes(is, newAttrs); 170 } 171 catch (IOException e) 172 { 173 // Ignore 174 } 175 } 176 else 177 { 178 179 String initialFileName = _getInitialFileName(src); 180 181 if (src.startsWith("/")) 182 { 183 try 184 { 185 fileName = _handleInternalFile(src, newAttrs, initialFileName); 186 } 187 catch (Exception e) 188 { 189 // unable to fetch image, do not keep the img tag 190 _tagToIgnore = true; 191 getLogger().warn("Unable to fetch internal image from URL '" + src + "'. Image is ignored.", e); 192 return; 193 } 194 } 195 else if (src.startsWith("http://") || src.startsWith("https://")) 196 { 197 try 198 { 199 fileName = _handleRemoteFile(src, newAttrs, initialFileName); 200 } 201 catch (Exception e) 202 { 203 // unable to fetch image, do not keep the img tag 204 _tagToIgnore = true; 205 getLogger().warn("Unable to fetch external image from URL '" + src + "'. Image is ignored.", e); 206 return; 207 } 208 } 209 else 210 { 211 _tagToIgnore = true; 212 getLogger().warn("Don't know how to fetch image at '" + src + "'. Image is ignored."); 213 return; 214 } 215 } 216 217 _copyAttributes(attrs, newAttrs); 218 219 newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", fileName); 220 newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local"); 221 222 super.startElement(uri, loc, raw, newAttrs); 223 return; 224 } 225 } 226 227 super.startElement(uri, loc, raw, attrs); 228 } 229 230 private String _getInitialFileName(String src) 231 { 232 int j = src.lastIndexOf('/'); 233 int k = src.indexOf('?', j); 234 String initialFileName; 235 236 if (k == -1) 237 { 238 initialFileName = src.substring(j + 1); 239 } 240 else 241 { 242 initialFileName = src.substring(j + 1, k); 243 } 244 245 // FIXME CMS-3090 A uploaded image can not contain '_max' or '_crop', replace it by '_Max', '_Crop' 246 return initialFileName.replaceAll("_max", "_Max").replaceAll("_crop", "_Crop"); 247 } 248 249 private String _handleInternalFile(String src, AttributesImpl newAttrs, String initialFileName) throws MalformedURLException, IOException, URISyntaxException 250 { 251 // it may be an internal URL 252 Request request = ContextHelper.getRequest(_context); 253 String contextPath = request.getContextPath(); 254 Source source = null; 255 256 try 257 { 258 String modifiedSrc = src; 259 260 if (src.startsWith(contextPath)) 261 { 262 // it is an Ametys URL 263 // first decode it 264 modifiedSrc = new URI(modifiedSrc).getPath(); 265 266 modifiedSrc = "cocoon:/" + src.substring(contextPath.length()); 267 } 268 else 269 { 270 StringBuilder sb = _getRequestURI(request); 271 272 modifiedSrc = sb.toString() + modifiedSrc; 273 } 274 275 source = _resolver.resolveURI(src); 276 277 try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) 278 { 279 try (InputStream is = source.getInputStream()) 280 { 281 IOUtils.copy(is, bos); 282 } 283 284 String fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null); 285 286 try (InputStream is = new ByteArrayInputStream(bos.toByteArray())) 287 { 288 _addDimensionAttributes(is, newAttrs); 289 } 290 291 return fileName; 292 } 293 } 294 finally 295 { 296 if (source != null) 297 { 298 _resolver.release(source); 299 } 300 } 301 302 } 303 304 private String _handleRemoteFile(String src, AttributesImpl newAttrs, String initialFileName) throws MalformedURLException, IOException 305 { 306 String fileName; 307 URL url = new URL(src); 308 HttpURLConnection connection = (HttpURLConnection) url.openConnection(); 309 connection.setConnectTimeout(1000); 310 connection.setReadTimeout(2000); 311 312 try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) 313 { 314 try (InputStream is = connection.getInputStream()) 315 { 316 IOUtils.copy(is, bos); 317 } 318 319 fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null); 320 321 try (InputStream is = new ByteArrayInputStream(bos.toByteArray())) 322 { 323 _addDimensionAttributes(is, newAttrs); 324 } 325 } 326 return fileName; 327 } 328 329 /** 330 * Copy the attributes. 331 * @param attrs the attributes to copy. 332 * @param newAttrs the attributes to copy to. 333 */ 334 private void _copyAttributes(Attributes attrs, AttributesImpl newAttrs) 335 { 336 for (int i = 0; i < attrs.getLength(); i++) 337 { 338 String name = attrs.getQName(i); 339 340 if (!"data-ametys-src".equals(name) && !"data-ametys-type".equals(name)) 341 { 342 newAttrs.addAttribute(attrs.getURI(i), attrs.getLocalName(i), name, attrs.getType(i), attrs.getValue(i)); 343 } 344 } 345 } 346 347 /** 348 * Get the cms uri 349 * @param request The request 350 * @return the uri without context path 351 */ 352 private StringBuilder _getRequestURI(Request request) 353 { 354 StringBuilder sb = new StringBuilder(); 355 sb.append(request.getScheme()); 356 sb.append("://"); 357 sb.append(request.getServerName()); 358 359 if (request.isSecure()) 360 { 361 if (request.getServerPort() != 443) 362 { 363 sb.append(":"); 364 sb.append(request.getServerPort()); 365 } 366 } 367 else 368 { 369 if (request.getServerPort() != 80) 370 { 371 sb.append(":"); 372 sb.append(request.getServerPort()); 373 } 374 } 375 return sb; 376 } 377 378 private Attributes _getAttributesForTemp(Attributes attrs) 379 { 380 // data has just been uploaded, must change the value, and store the id for further processing 381 String id = attrs.getValue("data-ametys-temp-src"); 382 383 Upload upload = _uploadManager.getUpload(_userProvider.getUser(), id); 384 385 String initialFileName = upload.getFilename(); 386 // FIXME CMS-3090 A uploaded image can not contain '_max', replace it by '_Max' 387 initialFileName = initialFileName.replaceAll("_max", "_Max").replaceAll("_crop", "_Crop"); 388 String fileName = _storeFile(initialFileName, upload.getInputStream(), upload.getMimeType(), upload.getUploadedDate()); 389 390 AttributesImpl newAttrs = new AttributesImpl(); 391 392 _copyAttributes(attrs, newAttrs); 393 394 if (!"marker".equals(attrs.getValue("marker"))) 395 { 396 try (InputStream is = upload.getInputStream()) 397 { 398 _addDimensionAttributes(is, newAttrs); 399 } 400 catch (IOException e) 401 { 402 // Ignore 403 } 404 } 405 406 newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", fileName); 407 newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local"); 408 409 return newAttrs; 410 } 411 412 /** 413 * Store a file as rich text data. 414 * @param initialFileName the initial file name. 415 * @param is an input stream on the file. 416 * @param mimeType the file mime type. 417 * @param lastModified the last modification date. 418 * @return the final file name. 419 */ 420 protected String _storeFile(String initialFileName, InputStream is, String mimeType, ZonedDateTime lastModified) 421 { 422 String fileName = initialFileName; 423 int count = 2; 424 425 if (_richText instanceof RichText) 426 { 427 RichText richText = (RichText) _richText; 428 429 while (richText.hasAttachment(fileName)) 430 { 431 int i = initialFileName.lastIndexOf('.'); 432 fileName = i == -1 ? initialFileName + '-' + (count++) : initialFileName.substring(0, i) + '-' + (count++) + initialFileName.substring(i); 433 } 434 435 NamedResource resource = new NamedResource(); 436 437 String finalMimeType = mimeType != null ? mimeType : _cocoonContext.getMimeType(fileName.toLowerCase()); 438 resource.setMimeType(finalMimeType != null ? finalMimeType : "application/unknown"); 439 440 resource.setFilename(fileName); 441 resource.setLastModificationDate(lastModified != null ? lastModified : ZonedDateTime.now(ZoneOffset.UTC)); 442 443 try 444 { 445 resource.setInputStream(is); 446 } 447 catch (IOException e) 448 { 449 throw new AmetysRepositoryException("Unable to save attachment " + initialFileName, e); 450 } 451 452 richText.addAttachment(resource); 453 } 454 else 455 { 456 ModifiableRichText richText = (ModifiableRichText) _richText; 457 458 while (richText.getAdditionalDataFolder().hasFile(fileName)) 459 { 460 int i = initialFileName.lastIndexOf('.'); 461 fileName = i == -1 ? initialFileName + '-' + (count++) : initialFileName.substring(0, i) + '-' + (count++) + initialFileName.substring(i); 462 } 463 464 ModifiableFile file = richText.getAdditionalDataFolder().addFile(fileName); 465 ModifiableResource resource = file.getResource(); 466 resource.setLastModified(Optional.ofNullable(lastModified).map(DateUtils::asDate).orElseGet(Date::new)); 467 468 String finalMimeType = mimeType != null ? mimeType : _cocoonContext.getMimeType(fileName.toLowerCase()); 469 470 resource.setMimeType(finalMimeType != null ? finalMimeType : "application/unknown"); 471 resource.setInputStream(is); 472 } 473 474 // store the file usage, so that it won't be deleted immediately 475 _usedLocalFiles.add(fileName); 476 477 return fileName; 478 } 479 480 /** 481 * Process a local file. 482 * @param attrs the img tag attributes. 483 * @return the new img tag attributes. 484 */ 485 protected Attributes _processLocal(Attributes attrs) 486 { 487 // src contains the fileName 488 String filename = attrs.getValue("data-ametys-src"); 489 _usedLocalFiles.add(filename); 490 491 AttributesImpl newAttrs = new AttributesImpl(attrs); 492 if (!"marker".equals(attrs.getValue("marker"))) 493 { 494 if (_richText instanceof RichText) 495 { 496 NamedResource file = ((RichText) _richText).getAttachment(filename); 497 try (InputStream is = file.getInputStream()) 498 { 499 _addDimensionAttributes(is, newAttrs); 500 } 501 catch (IOException e) 502 { 503 // Ignore 504 } 505 } 506 else 507 { 508 File file = ((ModifiableRichText) _richText).getAdditionalDataFolder().getFile(filename); 509 try (InputStream is = file.getResource().getInputStream()) 510 { 511 _addDimensionAttributes(is, newAttrs); 512 } 513 catch (IOException e) 514 { 515 // Ignore 516 } 517 } 518 } 519 520 return newAttrs; 521 } 522 523 /** 524 * Process a resource. 525 * @param attrs the img tag attributes. 526 * @return the new img tag attributes. 527 */ 528 protected Attributes _processResource(Attributes attrs) 529 { 530 String ametys_src = attrs.getValue("data-ametys-src"); 531 532 Resource resource = null; 533 try 534 { 535 resource = _ametysResolver.resolveById(ametys_src); 536 } 537 catch (UnknownAmetysObjectException ex) 538 { 539 getLogger().warn("Link to unexisting resource image " + ametys_src, ex); 540 return attrs; 541 } 542 543 AttributesImpl newAttrs = new AttributesImpl(attrs); 544 if (!"marker".equals(attrs.getValue("marker"))) 545 { 546 try (InputStream is = resource.getInputStream()) 547 { 548 _addDimensionAttributes(is, newAttrs); 549 } 550 catch (IOException e) 551 { 552 // Ignore 553 } 554 } 555 556 return newAttrs; 557 } 558 559 /** 560 * Add an image's width and height to the XML attributes. 561 * @param inputStream an input stream on the image. 562 * @param attrs the attributes to fill. 563 * @throws IOException if an error occurs during reading dimension 564 */ 565 protected void _addDimensionAttributes(InputStream inputStream, AttributesImpl attrs) throws IOException 566 { 567 // We need to call Thumbnail to get image dimension with EXIF orientation tag 568 BufferedImage img = ImageHelper.read(inputStream); 569 if (img != null && attrs.getValue("width") == null) 570 { 571 attrs.addCDATAAttribute("width", Integer.toString(img.getWidth())); 572 } 573 if (img != null && attrs.getValue("height") == null) 574 { 575 attrs.addCDATAAttribute("height", Integer.toString(img.getHeight())); 576 } 577 } 578 579 @Override 580 public void endElement(String uri, String loc, String raw) throws SAXException 581 { 582 if ("img".equals(raw) && _tagToIgnore) 583 { 584 // ignore img tag 585 _tagToIgnore = false; 586 return; 587 } 588 589 super.endElement(uri, loc, raw); 590 } 591 592 @Override 593 public void endDocument() throws SAXException 594 { 595 if (_richText instanceof RichText) 596 { 597 RichText richText = (RichText) _richText; 598 599 // Look for unused files 600 Set<String> unusedLocalFiles = richText.getAttachmentNames() 601 .stream() 602 .filter(fileName -> !_usedLocalFiles.contains(fileName)) 603 .collect(Collectors.toSet()); 604 // Remove unused files 605 unusedLocalFiles.stream() 606 .forEach(richText::removeAttachment); 607 } 608 else 609 { 610 ModifiableRichText richText = (ModifiableRichText) _richText; 611 612 // removing unused files 613 ModifiableFolder folder = richText.getAdditionalDataFolder(); 614 for (File file : folder.getFiles()) 615 { 616 String fileName = file.getName(); 617 618 if (!_usedLocalFiles.contains(fileName)) 619 { 620 folder.remove(fileName); 621 } 622 } 623 } 624 625 super.endDocument(); 626 } 627}