001/* 002 * Copyright 2020 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.ametys.cms.data; 018 019import java.io.IOException; 020import java.io.InputStream; 021import java.net.URL; 022import java.util.Map; 023import java.util.Optional; 024 025import org.apache.avalon.framework.component.Component; 026import org.apache.avalon.framework.context.ContextException; 027import org.apache.avalon.framework.context.Contextualizable; 028import org.apache.cocoon.Constants; 029import org.apache.cocoon.environment.Context; 030import org.apache.cocoon.xml.AttributesImpl; 031import org.apache.commons.lang3.StringUtils; 032import org.apache.excalibur.xml.sax.ContentHandlerProxy; 033import org.apache.http.HttpEntity; 034import org.apache.http.client.config.RequestConfig; 035import org.apache.http.client.methods.CloseableHttpResponse; 036import org.apache.http.client.methods.HttpGet; 037import org.apache.http.entity.ContentType; 038import org.apache.http.impl.client.CloseableHttpClient; 039import org.apache.http.impl.client.HttpClientBuilder; 040import org.slf4j.Logger; 041import org.xml.sax.Attributes; 042import org.xml.sax.ContentHandler; 043import org.xml.sax.SAXException; 044 045import org.ametys.cms.repository.comment.CommentsDAO; 046import org.ametys.runtime.plugin.component.AbstractLogEnabled; 047 048/** 049 * Factory for the transformer that imports a rich text from docbook. 050 */ 051public class RichTextImportHandlerFactory extends AbstractLogEnabled implements Component, Contextualizable 052{ 053 /** Avalon role. */ 054 public static final String ROLE = RichTextImportHandlerFactory.class.getName(); 055 private Context _cocoonContext; 056 057 public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException 058 { 059 _cocoonContext = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT); 060 } 061 062 /** 063 * Creates a handler proxy to import the rich text 064 * @param contentHandler the contentHandler to pass SAX events to 065 * @param richText the rich text 066 * @param files the attachments of this rich text 067 * @return the created handler 068 */ 069 public RichTextImportHandler createHandlerProxy(ContentHandler contentHandler, RichText richText, Map<String, InputStream> files) 070 { 071 return new RichTextImportHandler(contentHandler, richText, files, getLogger()); 072 } 073 074 /** 075 * This transformer imports the rich text from docbook. 076 */ 077 public class RichTextImportHandler extends ContentHandlerProxy 078 { 079 private static final String __ATTACHMENT_IMAGE_TAG_NAME = "imagedata"; 080 private static final String __ATTACHMENT_VIDEO_TAG_NAME = "videodata"; 081 private static final String __ATTACHMENT_AUDIO_TAG_NAME = "audiodata"; 082 private static final String __ATTACHMENT_TYPE_ATTRIBUTE_NAME = "type"; 083 private static final String __ATTACHMENT_TYPE_ATTRIBUTE_LOCAL_VALUE = "local"; 084 085 private static final String __ANNOTATION_TAG_NAME = "phrase"; 086 private static final String __ANNOTATION_NAME_ATTRIBUTE_NAME = "role"; 087 private static final String __ANNOTATION_CLASS_ATTRIBUTE_NAME = "class"; 088 private static final String __ANNOTATION_CLASS_ATTRIBUTE_VALUE = "semantic"; 089 090 091 private RichText _richText; 092 private Map<String, InputStream> _files; 093 private Logger _logger; 094 095 private boolean _isCurrentlyInAnnotation; 096 private String _currentAnnotationName; 097 private StringBuilder _currentAnnotationValue; 098 private int _cptrElementsInsideCurrentAnnotation; 099 100 /** 101 * Creates a handler proxy to import a rich text 102 * @param contentHandler the contentHandler to pass SAX events to 103 * @param richText the rich text 104 * @param files the attachments of this rich text 105 * @param logger the logger 106 */ 107 public RichTextImportHandler(ContentHandler contentHandler, RichText richText, Map<String, InputStream> files, Logger logger) 108 { 109 super(contentHandler); 110 _richText = richText; 111 _files = files; 112 _logger = logger; 113 } 114 115 @Override 116 public void startDocument() throws SAXException 117 { 118 // Remove all existing attachments from the rich text. 119 _richText.removeAttachments(); 120 121 // Remove all existing annotations from the rich text. 122 _richText.removeAllAnnotations(); 123 124 super.startDocument(); 125 } 126 127 @Override 128 public void startElement(String uri, String loc, String raw, Attributes attrs) throws SAXException 129 { 130 // A new attachment starts being saxed 131 boolean isAttachment = _isAttachment(loc); 132 String type = attrs.getValue(__ATTACHMENT_TYPE_ATTRIBUTE_NAME); 133 Attributes newAttrs = attrs; 134 if (isAttachment && __ATTACHMENT_TYPE_ATTRIBUTE_LOCAL_VALUE.equals(type)) 135 { 136 newAttrs = _processAttachment(attrs); 137 } 138 139 // A new semantic annotation starts being saxed 140 String clazz = attrs.getValue(__ANNOTATION_CLASS_ATTRIBUTE_NAME); 141 String annotationName = attrs.getValue(__ANNOTATION_NAME_ATTRIBUTE_NAME); 142 if (__ANNOTATION_TAG_NAME.equals(loc) && __ANNOTATION_CLASS_ATTRIBUTE_VALUE.equals(clazz) && annotationName != null) 143 { 144 _processAnnotation(attrs); 145 } 146 else if (_isCurrentlyInAnnotation) 147 { 148 // A new element is being SAXed inside the current annotation 149 _cptrElementsInsideCurrentAnnotation++; 150 } 151 152 super.startElement(uri, loc, raw, newAttrs); 153 } 154 155 private boolean _isAttachment(String loc) 156 { 157 return __ATTACHMENT_IMAGE_TAG_NAME.equals(loc) || __ATTACHMENT_VIDEO_TAG_NAME.equals(loc) || __ATTACHMENT_AUDIO_TAG_NAME.equals(loc); 158 } 159 160 private Attributes _processAttachment(Attributes attrs) throws SAXException 161 { 162 String fileRefAttribute = attrs.getValue("fileref"); 163 String filename; 164 165 if (CommentsDAO.URL_VALIDATOR.matcher(fileRefAttribute).matches()) 166 { 167 try 168 { 169 NamedResource attachment = new NamedResource(); 170 171 RequestConfig requestConfig = RequestConfig.custom() 172 .setConnectTimeout(2000) 173 .setSocketTimeout(2000) 174 .build(); 175 176 URL url = new URL(fileRefAttribute); 177 String path = url.getPath(); 178 filename = path.substring(path.lastIndexOf("/") + 1); 179 180 try (CloseableHttpClient httpclient = HttpClientBuilder.create() 181 .setDefaultRequestConfig(requestConfig) 182 .useSystemProperties() 183 .build()) 184 { 185 HttpGet httpGet = new HttpGet(fileRefAttribute); 186 try (CloseableHttpResponse httpResponse = httpclient.execute(httpGet)) 187 { 188 if (httpResponse.getStatusLine().getStatusCode() != 200) 189 { 190 _logger.warn("Can't import file with url '" + fileRefAttribute + "' in the imported rich text"); 191 } 192 else 193 { 194 HttpEntity entity = httpResponse.getEntity(); 195 try (InputStream is = entity.getContent()) 196 { 197 if (is == null) 198 { 199 _logger.warn("The attachment named '" + filename + "' of the imported rich text is empty"); 200 } 201 else 202 { 203 attachment.setInputStream(is); 204 } 205 206 String mimeType = Optional.ofNullable(ContentType.get(entity)) 207 .map(ContentType::getMimeType) 208 .filter(StringUtils::isNotEmpty) 209 .orElse(_cocoonContext.getMimeType(filename.toLowerCase())); 210 211 attachment.setMimeType(mimeType); 212 attachment.setFilename(filename); 213 214 _richText.addAttachment(attachment); 215 } 216 } 217 } 218 } 219 } 220 catch (IOException e) 221 { 222 throw new SAXException("Unable to process the attachment '" + fileRefAttribute + "'. An error occured while setting its content", e); 223 } 224 } 225 else 226 { 227 // file reference is of the form ownerId@dataName;fileName 228 int indexOfFilenameSeparator = fileRefAttribute.lastIndexOf(';'); 229 filename = fileRefAttribute.substring(indexOfFilenameSeparator + 1); 230 231 if (indexOfFilenameSeparator == -1) 232 { 233 throw new IllegalArgumentException("A local image should have a file reference of the form <protocol>://<protocol-specific-part>;<filename> : " + fileRefAttribute); 234 } 235 236 if (_files.containsKey(filename)) 237 { 238 try 239 { 240 NamedResource attachment = new NamedResource(); 241 String mimeType = _cocoonContext.getMimeType(filename.toLowerCase()); 242 attachment.setMimeType(mimeType); 243 attachment.setFilename(filename); 244 attachment.setInputStream(_files.get(filename)); 245 _richText.addAttachment(attachment); 246 } 247 catch (IOException e) 248 { 249 throw new SAXException("Unable to process the attachment '" + filename + "'. An error occured while setting its content", e); 250 } 251 } 252 } 253 254 AttributesImpl newAttrs = new AttributesImpl(); 255 _copyAttributes(attrs, newAttrs); 256 newAttrs.addCDATAAttribute("fileref", filename); 257 return newAttrs; 258 } 259 260 /** 261 * Copy the attributes except the fileref attribute 262 * @param attrs the attributes to copy. 263 * @param newAttrs the attributes to copy to. 264 */ 265 private void _copyAttributes(Attributes attrs, AttributesImpl newAttrs) 266 { 267 for (int i = 0; i < attrs.getLength(); i++) 268 { 269 String name = attrs.getQName(i); 270 271 if (!"fileref".equals(name)) 272 { 273 newAttrs.addAttribute(attrs.getURI(i), attrs.getLocalName(i), name, attrs.getType(i), attrs.getValue(i)); 274 } 275 } 276 } 277 278 private void _processAnnotation(Attributes attrs) 279 { 280 _isCurrentlyInAnnotation = true; 281 _currentAnnotationName = attrs.getValue(__ANNOTATION_NAME_ATTRIBUTE_NAME); 282 _currentAnnotationValue = new StringBuilder(); 283 _cptrElementsInsideCurrentAnnotation = 0; 284 } 285 286 @Override 287 public void characters(char[] ch, int start, int length) throws SAXException 288 { 289 if (_isCurrentlyInAnnotation) 290 { 291 _currentAnnotationValue.append(ch, start, length); 292 } 293 294 super.characters(ch, start, length); 295 } 296 297 @Override 298 public void endElement(String uri, String loc, String raw) throws SAXException 299 { 300 if (_isCurrentlyInAnnotation) 301 { 302 if (_cptrElementsInsideCurrentAnnotation == 0) 303 { 304 // When the semantic annotation is fully saxed, add it to the rich text 305 _richText.addAnnotations(_currentAnnotationName, _currentAnnotationValue.toString()); 306 _isCurrentlyInAnnotation = false; 307 } 308 else 309 { 310 _cptrElementsInsideCurrentAnnotation--; 311 } 312 } 313 314 super.endElement(uri, loc, raw); 315 } 316 } 317}