001/* 002 * Copyright 2020 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.ametys.cms.data; 018 019import java.io.IOException; 020import java.io.InputStream; 021import java.net.URI; 022import java.net.URISyntaxException; 023import java.util.Map; 024import java.util.Optional; 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import org.apache.avalon.framework.component.Component; 029import org.apache.avalon.framework.context.ContextException; 030import org.apache.avalon.framework.context.Contextualizable; 031import org.apache.cocoon.Constants; 032import org.apache.cocoon.environment.Context; 033import org.apache.cocoon.xml.AttributesImpl; 034import org.apache.commons.lang3.StringUtils; 035import org.apache.excalibur.xml.sax.ContentHandlerProxy; 036import org.apache.http.HttpEntity; 037import org.apache.http.client.config.RequestConfig; 038import org.apache.http.client.methods.CloseableHttpResponse; 039import org.apache.http.client.methods.HttpGet; 040import org.apache.http.entity.ContentType; 041import org.apache.http.impl.client.CloseableHttpClient; 042import org.apache.http.impl.client.HttpClientBuilder; 043import org.slf4j.Logger; 044import org.xml.sax.Attributes; 045import org.xml.sax.ContentHandler; 046import org.xml.sax.SAXException; 047 048import org.ametys.core.util.HttpUtils; 049import org.ametys.runtime.plugin.component.AbstractLogEnabled; 050 051/** 052 * Factory for the transformer that imports a rich text from docbook. 053 */ 054public class RichTextImportHandlerFactory extends AbstractLogEnabled implements Component, Contextualizable 055{ 056 /** Avalon role. */ 057 public static final String ROLE = RichTextImportHandlerFactory.class.getName(); 058 private Context _cocoonContext; 059 060 public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException 061 { 062 _cocoonContext = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT); 063 } 064 065 /** 066 * Creates a handler proxy to import the rich text 067 * @param contentHandler the contentHandler to pass SAX events to 068 * @param richText the rich text 069 * @param files the attachments of this rich text 070 * @return the created handler 071 */ 072 public RichTextImportHandler createHandlerProxy(ContentHandler contentHandler, RichText richText, Map<String, InputStream> files) 073 { 074 return new RichTextImportHandler(contentHandler, richText, files, getLogger()); 075 } 076 077 /** 078 * This transformer imports the rich text from docbook. 079 */ 080 public class RichTextImportHandler extends ContentHandlerProxy 081 { 082 private static final String __ATTACHMENT_IMAGE_TAG_NAME = "imagedata"; 083 private static final String __ATTACHMENT_VIDEO_TAG_NAME = "videodata"; 084 private static final String __ATTACHMENT_AUDIO_TAG_NAME = "audiodata"; 085 private static final String __ATTACHMENT_TYPE_ATTRIBUTE_NAME = "type"; 086 private static final String __ATTACHMENT_TYPE_ATTRIBUTE_LOCAL_VALUE = "local"; 087 088 // Local attachment URI is of the form ownerId@dataName;fileName 089 private static final Pattern __LOCAL_ATTACHMENT_URI_VALIDATOR = Pattern.compile("^(?:[^@;]+)@(?:[^@;]+);([^@;]+)$"); 090 091 private static final String __ANNOTATION_TAG_NAME = "phrase"; 092 private static final String __ANNOTATION_NAME_ATTRIBUTE_NAME = "role"; 093 private static final String __ANNOTATION_CLASS_ATTRIBUTE_NAME = "class"; 094 private static final String __ANNOTATION_CLASS_ATTRIBUTE_VALUE = "semantic"; 095 096 private RichText _richText; 097 private Map<String, InputStream> _files; 098 private Logger _logger; 099 100 private boolean _isCurrentlyInAnnotation; 101 private String _currentAnnotationName; 102 private StringBuilder _currentAnnotationValue; 103 private int _cptrElementsInsideCurrentAnnotation; 104 105 /** 106 * Creates a handler proxy to import a rich text 107 * @param contentHandler the contentHandler to pass SAX events to 108 * @param richText the rich text 109 * @param files the attachments of this rich text 110 * @param logger the logger 111 */ 112 public RichTextImportHandler(ContentHandler contentHandler, RichText richText, Map<String, InputStream> files, Logger logger) 113 { 114 super(contentHandler); 115 _richText = richText; 116 _files = files; 117 _logger = logger; 118 } 119 120 @Override 121 public void startDocument() throws SAXException 122 { 123 // Remove all existing attachments from the rich text. 124 _richText.removeAttachments(); 125 126 // Remove all existing annotations from the rich text. 127 _richText.removeAllAnnotations(); 128 129 super.startDocument(); 130 } 131 132 @Override 133 public void startElement(String uri, String loc, String raw, Attributes attrs) throws SAXException 134 { 135 // A new attachment starts being saxed 136 boolean isAttachment = _isAttachment(loc); 137 String type = attrs.getValue(__ATTACHMENT_TYPE_ATTRIBUTE_NAME); 138 Attributes newAttrs = attrs; 139 if (isAttachment && __ATTACHMENT_TYPE_ATTRIBUTE_LOCAL_VALUE.equals(type)) 140 { 141 newAttrs = _processAttachment(attrs); 142 } 143 144 // A new semantic annotation starts being saxed 145 String clazz = attrs.getValue(__ANNOTATION_CLASS_ATTRIBUTE_NAME); 146 String annotationName = attrs.getValue(__ANNOTATION_NAME_ATTRIBUTE_NAME); 147 if (__ANNOTATION_TAG_NAME.equals(loc) && __ANNOTATION_CLASS_ATTRIBUTE_VALUE.equals(clazz) && annotationName != null) 148 { 149 _processAnnotation(attrs); 150 } 151 else if (_isCurrentlyInAnnotation) 152 { 153 // A new element is being SAXed inside the current annotation 154 _cptrElementsInsideCurrentAnnotation++; 155 } 156 157 super.startElement(uri, loc, raw, newAttrs); 158 } 159 160 private boolean _isAttachment(String loc) 161 { 162 return __ATTACHMENT_IMAGE_TAG_NAME.equals(loc) || __ATTACHMENT_VIDEO_TAG_NAME.equals(loc) || __ATTACHMENT_AUDIO_TAG_NAME.equals(loc); 163 } 164 165 private Attributes _processAttachment(Attributes attrs) throws SAXException 166 { 167 String fileRefAttribute = attrs.getValue("fileref"); 168 String filename = fileRefAttribute; 169 170 if (StringUtils.isEmpty(fileRefAttribute) || HttpUtils.HTTP_URL_VALIDATOR.matcher(fileRefAttribute).matches()) 171 { 172 try 173 { 174 NamedResource attachment = new NamedResource(); 175 176 RequestConfig requestConfig = RequestConfig.custom() 177 .setConnectTimeout(2000) 178 .setSocketTimeout(2000) 179 .build(); 180 181 URI uri = new URI(fileRefAttribute); 182 String path = uri.getPath(); 183 filename = path.substring(path.lastIndexOf("/") + 1); 184 185 try (CloseableHttpClient httpclient = HttpClientBuilder.create() 186 .setDefaultRequestConfig(requestConfig) 187 .useSystemProperties() 188 .build()) 189 { 190 HttpGet httpGet = new HttpGet(fileRefAttribute); 191 try (CloseableHttpResponse httpResponse = httpclient.execute(httpGet)) 192 { 193 int statusCode = httpResponse.getStatusLine().getStatusCode(); 194 if (statusCode != 200) 195 { 196 _logger.warn("Can't import file with url '" + fileRefAttribute + "' in the imported rich text. Status code is: " + statusCode); 197 } 198 else 199 { 200 HttpEntity entity = httpResponse.getEntity(); 201 try (InputStream is = entity.getContent()) 202 { 203 if (is == null) 204 { 205 _logger.warn("The attachment named '" + filename + "' of the imported rich text is empty"); 206 } 207 else 208 { 209 attachment.setInputStream(is); 210 } 211 212 String mimeType = Optional.ofNullable(ContentType.get(entity)) 213 .map(ContentType::getMimeType) 214 .filter(StringUtils::isNotEmpty) 215 .orElse(_cocoonContext.getMimeType(filename.toLowerCase())); 216 217 attachment.setMimeType(mimeType); 218 attachment.setFilename(filename); 219 220 _richText.addAttachment(attachment); 221 } 222 } 223 } 224 } 225 } 226 catch (URISyntaxException | IOException e) 227 { 228 throw new SAXException("Unable to process the attachment '" + fileRefAttribute + "'. An error occured while setting its content", e); 229 } 230 } 231 else 232 { 233 Matcher uriMatcher = __LOCAL_ATTACHMENT_URI_VALIDATOR.matcher(fileRefAttribute); 234 if (uriMatcher.matches()) 235 { 236 filename = uriMatcher.group(1); 237 if (_files.containsKey(filename)) 238 { 239 try 240 { 241 NamedResource attachment = new NamedResource(); 242 String mimeType = _cocoonContext.getMimeType(filename.toLowerCase()); 243 attachment.setMimeType(mimeType); 244 attachment.setFilename(filename); 245 attachment.setInputStream(_files.get(filename)); 246 _richText.addAttachment(attachment); 247 } 248 catch (IOException e) 249 { 250 throw new SAXException("Unable to process the attachment '" + filename + "'. An error occured while setting its content", e); 251 } 252 } 253 else 254 { 255 _logger.warn("The file named '" + filename + "' is not an attachment of the imported rich text"); 256 } 257 } 258 else 259 { 260 // No URL format matches 261 _logger.warn("Can't import file with url '" + fileRefAttribute + "' in the imported rich text. URL format is not valid."); 262 } 263 } 264 265 AttributesImpl newAttrs = new AttributesImpl(); 266 _copyAttributes(attrs, newAttrs); 267 newAttrs.addCDATAAttribute("fileref", filename); 268 return newAttrs; 269 } 270 271 /** 272 * Copy the attributes except the fileref attribute 273 * @param attrs the attributes to copy. 274 * @param newAttrs the attributes to copy to. 275 */ 276 private void _copyAttributes(Attributes attrs, AttributesImpl newAttrs) 277 { 278 for (int i = 0; i < attrs.getLength(); i++) 279 { 280 String name = attrs.getQName(i); 281 282 if (!"fileref".equals(name)) 283 { 284 newAttrs.addAttribute(attrs.getURI(i), attrs.getLocalName(i), name, attrs.getType(i), attrs.getValue(i)); 285 } 286 } 287 } 288 289 private void _processAnnotation(Attributes attrs) 290 { 291 _isCurrentlyInAnnotation = true; 292 _currentAnnotationName = attrs.getValue(__ANNOTATION_NAME_ATTRIBUTE_NAME); 293 _currentAnnotationValue = new StringBuilder(); 294 _cptrElementsInsideCurrentAnnotation = 0; 295 } 296 297 @Override 298 public void characters(char[] ch, int start, int length) throws SAXException 299 { 300 if (_isCurrentlyInAnnotation) 301 { 302 _currentAnnotationValue.append(ch, start, length); 303 } 304 305 super.characters(ch, start, length); 306 } 307 308 @Override 309 public void endElement(String uri, String loc, String raw) throws SAXException 310 { 311 if (_isCurrentlyInAnnotation) 312 { 313 if (_cptrElementsInsideCurrentAnnotation == 0) 314 { 315 // When the semantic annotation is fully saxed, add it to the rich text 316 _richText.addAnnotations(_currentAnnotationName, _currentAnnotationValue.toString()); 317 _isCurrentlyInAnnotation = false; 318 } 319 else 320 { 321 _cptrElementsInsideCurrentAnnotation--; 322 } 323 } 324 325 super.endElement(uri, loc, raw); 326 } 327 } 328}