001/* 002 * Copyright 2020 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.ametys.cms.data; 018 019import java.io.IOException; 020import java.io.InputStream; 021import java.net.URL; 022import java.util.Map; 023import java.util.Optional; 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026 027import org.apache.avalon.framework.component.Component; 028import org.apache.avalon.framework.context.ContextException; 029import org.apache.avalon.framework.context.Contextualizable; 030import org.apache.cocoon.Constants; 031import org.apache.cocoon.environment.Context; 032import org.apache.cocoon.xml.AttributesImpl; 033import org.apache.commons.lang3.StringUtils; 034import org.apache.excalibur.xml.sax.ContentHandlerProxy; 035import org.apache.http.HttpEntity; 036import org.apache.http.client.config.RequestConfig; 037import org.apache.http.client.methods.CloseableHttpResponse; 038import org.apache.http.client.methods.HttpGet; 039import org.apache.http.entity.ContentType; 040import org.apache.http.impl.client.CloseableHttpClient; 041import org.apache.http.impl.client.HttpClientBuilder; 042import org.slf4j.Logger; 043import org.xml.sax.Attributes; 044import org.xml.sax.ContentHandler; 045import org.xml.sax.SAXException; 046 047import org.ametys.core.util.HttpUrlUtils; 048import org.ametys.runtime.plugin.component.AbstractLogEnabled; 049 050/** 051 * Factory for the transformer that imports a rich text from docbook. 052 */ 053public class RichTextImportHandlerFactory extends AbstractLogEnabled implements Component, Contextualizable 054{ 055 /** Avalon role. */ 056 public static final String ROLE = RichTextImportHandlerFactory.class.getName(); 057 private Context _cocoonContext; 058 059 public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException 060 { 061 _cocoonContext = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT); 062 } 063 064 /** 065 * Creates a handler proxy to import the rich text 066 * @param contentHandler the contentHandler to pass SAX events to 067 * @param richText the rich text 068 * @param files the attachments of this rich text 069 * @return the created handler 070 */ 071 public RichTextImportHandler createHandlerProxy(ContentHandler contentHandler, RichText richText, Map<String, InputStream> files) 072 { 073 return new RichTextImportHandler(contentHandler, richText, files, getLogger()); 074 } 075 076 /** 077 * This transformer imports the rich text from docbook. 078 */ 079 public class RichTextImportHandler extends ContentHandlerProxy 080 { 081 private static final String __ATTACHMENT_IMAGE_TAG_NAME = "imagedata"; 082 private static final String __ATTACHMENT_VIDEO_TAG_NAME = "videodata"; 083 private static final String __ATTACHMENT_AUDIO_TAG_NAME = "audiodata"; 084 private static final String __ATTACHMENT_TYPE_ATTRIBUTE_NAME = "type"; 085 private static final String __ATTACHMENT_TYPE_ATTRIBUTE_LOCAL_VALUE = "local"; 086 087 // Local attachment URI is of the form ownerId@dataName;fileName 088 private static final Pattern __LOCAL_ATTACHMENT_URI_VALIDATOR = Pattern.compile("^(?:[^@;]+)@(?:[^@;]+);([^@;]+)$"); 089 090 private static final String __ANNOTATION_TAG_NAME = "phrase"; 091 private static final String __ANNOTATION_NAME_ATTRIBUTE_NAME = "role"; 092 private static final String __ANNOTATION_CLASS_ATTRIBUTE_NAME = "class"; 093 private static final String __ANNOTATION_CLASS_ATTRIBUTE_VALUE = "semantic"; 094 095 private RichText _richText; 096 private Map<String, InputStream> _files; 097 private Logger _logger; 098 099 private boolean _isCurrentlyInAnnotation; 100 private String _currentAnnotationName; 101 private StringBuilder _currentAnnotationValue; 102 private int _cptrElementsInsideCurrentAnnotation; 103 104 /** 105 * Creates a handler proxy to import a rich text 106 * @param contentHandler the contentHandler to pass SAX events to 107 * @param richText the rich text 108 * @param files the attachments of this rich text 109 * @param logger the logger 110 */ 111 public RichTextImportHandler(ContentHandler contentHandler, RichText richText, Map<String, InputStream> files, Logger logger) 112 { 113 super(contentHandler); 114 _richText = richText; 115 _files = files; 116 _logger = logger; 117 } 118 119 @Override 120 public void startDocument() throws SAXException 121 { 122 // Remove all existing attachments from the rich text. 123 _richText.removeAttachments(); 124 125 // Remove all existing annotations from the rich text. 126 _richText.removeAllAnnotations(); 127 128 super.startDocument(); 129 } 130 131 @Override 132 public void startElement(String uri, String loc, String raw, Attributes attrs) throws SAXException 133 { 134 // A new attachment starts being saxed 135 boolean isAttachment = _isAttachment(loc); 136 String type = attrs.getValue(__ATTACHMENT_TYPE_ATTRIBUTE_NAME); 137 Attributes newAttrs = attrs; 138 if (isAttachment && __ATTACHMENT_TYPE_ATTRIBUTE_LOCAL_VALUE.equals(type)) 139 { 140 newAttrs = _processAttachment(attrs); 141 } 142 143 // A new semantic annotation starts being saxed 144 String clazz = attrs.getValue(__ANNOTATION_CLASS_ATTRIBUTE_NAME); 145 String annotationName = attrs.getValue(__ANNOTATION_NAME_ATTRIBUTE_NAME); 146 if (__ANNOTATION_TAG_NAME.equals(loc) && __ANNOTATION_CLASS_ATTRIBUTE_VALUE.equals(clazz) && annotationName != null) 147 { 148 _processAnnotation(attrs); 149 } 150 else if (_isCurrentlyInAnnotation) 151 { 152 // A new element is being SAXed inside the current annotation 153 _cptrElementsInsideCurrentAnnotation++; 154 } 155 156 super.startElement(uri, loc, raw, newAttrs); 157 } 158 159 private boolean _isAttachment(String loc) 160 { 161 return __ATTACHMENT_IMAGE_TAG_NAME.equals(loc) || __ATTACHMENT_VIDEO_TAG_NAME.equals(loc) || __ATTACHMENT_AUDIO_TAG_NAME.equals(loc); 162 } 163 164 private Attributes _processAttachment(Attributes attrs) throws SAXException 165 { 166 String fileRefAttribute = attrs.getValue("fileref"); 167 String filename = fileRefAttribute; 168 169 if (HttpUrlUtils.HTTP_URL_VALIDATOR.matcher(fileRefAttribute).matches()) 170 { 171 try 172 { 173 NamedResource attachment = new NamedResource(); 174 175 RequestConfig requestConfig = RequestConfig.custom() 176 .setConnectTimeout(2000) 177 .setSocketTimeout(2000) 178 .build(); 179 180 URL url = new URL(fileRefAttribute); 181 String path = url.getPath(); 182 filename = path.substring(path.lastIndexOf("/") + 1); 183 184 try (CloseableHttpClient httpclient = HttpClientBuilder.create() 185 .setDefaultRequestConfig(requestConfig) 186 .useSystemProperties() 187 .build()) 188 { 189 HttpGet httpGet = new HttpGet(fileRefAttribute); 190 try (CloseableHttpResponse httpResponse = httpclient.execute(httpGet)) 191 { 192 int statusCode = httpResponse.getStatusLine().getStatusCode(); 193 if (statusCode != 200) 194 { 195 _logger.warn("Can't import file with url '" + fileRefAttribute + "' in the imported rich text. Status code is: " + statusCode); 196 } 197 else 198 { 199 HttpEntity entity = httpResponse.getEntity(); 200 try (InputStream is = entity.getContent()) 201 { 202 if (is == null) 203 { 204 _logger.warn("The attachment named '" + filename + "' of the imported rich text is empty"); 205 } 206 else 207 { 208 attachment.setInputStream(is); 209 } 210 211 String mimeType = Optional.ofNullable(ContentType.get(entity)) 212 .map(ContentType::getMimeType) 213 .filter(StringUtils::isNotEmpty) 214 .orElse(_cocoonContext.getMimeType(filename.toLowerCase())); 215 216 attachment.setMimeType(mimeType); 217 attachment.setFilename(filename); 218 219 _richText.addAttachment(attachment); 220 } 221 } 222 } 223 } 224 } 225 catch (IOException e) 226 { 227 throw new SAXException("Unable to process the attachment '" + fileRefAttribute + "'. An error occured while setting its content", e); 228 } 229 } 230 else 231 { 232 Matcher uriMatcher = __LOCAL_ATTACHMENT_URI_VALIDATOR.matcher(fileRefAttribute); 233 if (uriMatcher.matches()) 234 { 235 filename = uriMatcher.group(1); 236 if (_files.containsKey(filename)) 237 { 238 try 239 { 240 NamedResource attachment = new NamedResource(); 241 String mimeType = _cocoonContext.getMimeType(filename.toLowerCase()); 242 attachment.setMimeType(mimeType); 243 attachment.setFilename(filename); 244 attachment.setInputStream(_files.get(filename)); 245 _richText.addAttachment(attachment); 246 } 247 catch (IOException e) 248 { 249 throw new SAXException("Unable to process the attachment '" + filename + "'. An error occured while setting its content", e); 250 } 251 } 252 else 253 { 254 _logger.warn("The file named '" + filename + "' is not an attachment of the imported rich text"); 255 } 256 } 257 else 258 { 259 // No URL format matches 260 _logger.warn("Can't import file with url '" + fileRefAttribute + "' in the imported rich text. URL format is not valid."); 261 } 262 } 263 264 AttributesImpl newAttrs = new AttributesImpl(); 265 _copyAttributes(attrs, newAttrs); 266 newAttrs.addCDATAAttribute("fileref", filename); 267 return newAttrs; 268 } 269 270 /** 271 * Copy the attributes except the fileref attribute 272 * @param attrs the attributes to copy. 273 * @param newAttrs the attributes to copy to. 274 */ 275 private void _copyAttributes(Attributes attrs, AttributesImpl newAttrs) 276 { 277 for (int i = 0; i < attrs.getLength(); i++) 278 { 279 String name = attrs.getQName(i); 280 281 if (!"fileref".equals(name)) 282 { 283 newAttrs.addAttribute(attrs.getURI(i), attrs.getLocalName(i), name, attrs.getType(i), attrs.getValue(i)); 284 } 285 } 286 } 287 288 private void _processAnnotation(Attributes attrs) 289 { 290 _isCurrentlyInAnnotation = true; 291 _currentAnnotationName = attrs.getValue(__ANNOTATION_NAME_ATTRIBUTE_NAME); 292 _currentAnnotationValue = new StringBuilder(); 293 _cptrElementsInsideCurrentAnnotation = 0; 294 } 295 296 @Override 297 public void characters(char[] ch, int start, int length) throws SAXException 298 { 299 if (_isCurrentlyInAnnotation) 300 { 301 _currentAnnotationValue.append(ch, start, length); 302 } 303 304 super.characters(ch, start, length); 305 } 306 307 @Override 308 public void endElement(String uri, String loc, String raw) throws SAXException 309 { 310 if (_isCurrentlyInAnnotation) 311 { 312 if (_cptrElementsInsideCurrentAnnotation == 0) 313 { 314 // When the semantic annotation is fully saxed, add it to the rich text 315 _richText.addAnnotations(_currentAnnotationName, _currentAnnotationValue.toString()); 316 _isCurrentlyInAnnotation = false; 317 } 318 else 319 { 320 _cptrElementsInsideCurrentAnnotation--; 321 } 322 } 323 324 super.endElement(uri, loc, raw); 325 } 326 } 327}