001/*
002 *  Copyright 2020 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016
017package org.ametys.cms.data;
018
019import java.io.IOException;
020import java.io.InputStream;
021import java.net.URI;
022import java.net.URISyntaxException;
023import java.util.Map;
024import java.util.Optional;
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import org.apache.avalon.framework.component.Component;
029import org.apache.avalon.framework.context.ContextException;
030import org.apache.avalon.framework.context.Contextualizable;
031import org.apache.cocoon.Constants;
032import org.apache.cocoon.environment.Context;
033import org.apache.cocoon.xml.AttributesImpl;
034import org.apache.commons.lang3.StringUtils;
035import org.apache.excalibur.xml.sax.ContentHandlerProxy;
036import org.apache.http.HttpEntity;
037import org.apache.http.client.config.RequestConfig;
038import org.apache.http.client.methods.CloseableHttpResponse;
039import org.apache.http.client.methods.HttpGet;
040import org.apache.http.entity.ContentType;
041import org.apache.http.impl.client.CloseableHttpClient;
042import org.apache.http.impl.client.HttpClientBuilder;
043import org.slf4j.Logger;
044import org.xml.sax.Attributes;
045import org.xml.sax.ContentHandler;
046import org.xml.sax.SAXException;
047
048import org.ametys.core.util.HttpUtils;
049import org.ametys.runtime.plugin.component.AbstractLogEnabled;
050
051/**
052 * Factory for the transformer that imports a rich text from docbook.
053 */
054public class RichTextImportHandlerFactory extends AbstractLogEnabled implements Component, Contextualizable
055{
056    /** Avalon role. */
057    public static final String ROLE = RichTextImportHandlerFactory.class.getName();
058    private Context _cocoonContext;
059    
060    public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException
061    {
062        _cocoonContext = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT);
063    }
064
065    /**
066     * Creates a handler proxy to import the rich text
067     * @param contentHandler the contentHandler to pass SAX events to
068     * @param richText the rich text
069     * @param files the attachments of this rich text
070     * @return the created handler
071     */
072    public RichTextImportHandler createHandlerProxy(ContentHandler contentHandler, RichText richText, Map<String, InputStream> files)
073    {
074        return new RichTextImportHandler(contentHandler, richText, files, getLogger());
075    }
076    
077    /**
078     * This transformer imports the rich text from docbook.
079     */
080    public class RichTextImportHandler extends ContentHandlerProxy
081    {
082        private static final String __ATTACHMENT_IMAGE_TAG_NAME = "imagedata";
083        private static final String __ATTACHMENT_VIDEO_TAG_NAME = "videodata";
084        private static final String __ATTACHMENT_AUDIO_TAG_NAME = "audiodata";
085        private static final String __ATTACHMENT_TYPE_ATTRIBUTE_NAME = "type";
086        private static final String __ATTACHMENT_TYPE_ATTRIBUTE_LOCAL_VALUE = "local";
087
088        // Local attachment URI is of the form ownerId@dataName;fileName
089        private static final Pattern __LOCAL_ATTACHMENT_URI_VALIDATOR = Pattern.compile("^(?:[^@;]+)@(?:[^@;]+);([^@;]+)$");
090
091        private static final String __ANNOTATION_TAG_NAME = "phrase";
092        private static final String __ANNOTATION_NAME_ATTRIBUTE_NAME = "role";
093        private static final String __ANNOTATION_CLASS_ATTRIBUTE_NAME = "class";
094        private static final String __ANNOTATION_CLASS_ATTRIBUTE_VALUE = "semantic";
095
096        private RichText _richText;
097        private Map<String, InputStream> _files;
098        private Logger _logger;
099
100        private boolean _isCurrentlyInAnnotation;
101        private String _currentAnnotationName;
102        private StringBuilder _currentAnnotationValue;
103        private int _cptrElementsInsideCurrentAnnotation;
104        
105        /**
106         * Creates a handler proxy to import a rich text
107         * @param contentHandler the contentHandler to pass SAX events to
108         * @param richText the rich text
109         * @param files the attachments of this rich text
110         * @param logger the logger
111         */
112        public RichTextImportHandler(ContentHandler contentHandler, RichText richText, Map<String, InputStream> files, Logger logger)
113        {
114            super(contentHandler);
115            _richText = richText;
116            _files = files;
117            _logger = logger;
118        }
119
120        @Override
121        public void startDocument() throws SAXException
122        {
123            // Remove all existing attachments from the rich text.
124            _richText.removeAttachments();
125
126            // Remove all existing annotations from the rich text.
127            _richText.removeAllAnnotations();
128
129            super.startDocument();
130        }
131
132        @Override
133        public void startElement(String uri, String loc, String raw, Attributes attrs) throws SAXException
134        {
135            // A new attachment starts being saxed
136            boolean isAttachment = _isAttachment(loc);
137            String type = attrs.getValue(__ATTACHMENT_TYPE_ATTRIBUTE_NAME);
138            Attributes newAttrs = attrs;
139            if (isAttachment && __ATTACHMENT_TYPE_ATTRIBUTE_LOCAL_VALUE.equals(type))
140            {
141                newAttrs = _processAttachment(attrs);
142            }
143
144            // A new semantic annotation starts being saxed
145            String clazz = attrs.getValue(__ANNOTATION_CLASS_ATTRIBUTE_NAME);
146            String annotationName = attrs.getValue(__ANNOTATION_NAME_ATTRIBUTE_NAME);
147            if (__ANNOTATION_TAG_NAME.equals(loc) && __ANNOTATION_CLASS_ATTRIBUTE_VALUE.equals(clazz) && annotationName != null)
148            {
149                _processAnnotation(attrs);
150            }
151            else if (_isCurrentlyInAnnotation)
152            {
153                // A new element is being SAXed inside the current annotation
154                _cptrElementsInsideCurrentAnnotation++;
155            }
156
157            super.startElement(uri, loc, raw, newAttrs);
158        }
159        
160        private boolean _isAttachment(String loc)
161        {
162            return __ATTACHMENT_IMAGE_TAG_NAME.equals(loc) || __ATTACHMENT_VIDEO_TAG_NAME.equals(loc) || __ATTACHMENT_AUDIO_TAG_NAME.equals(loc);
163        }
164
165        private Attributes _processAttachment(Attributes attrs) throws SAXException
166        {
167            String fileRefAttribute = attrs.getValue("fileref");
168            String filename = fileRefAttribute;
169            
170            if (StringUtils.isEmpty(fileRefAttribute) || HttpUtils.HTTP_URL_VALIDATOR.matcher(fileRefAttribute).matches())
171            {
172                try
173                {
174                    NamedResource attachment = new NamedResource();
175    
176                    RequestConfig requestConfig = RequestConfig.custom()
177                            .setConnectTimeout(2000)
178                            .setSocketTimeout(2000)
179                            .build();
180                    
181                    URI uri = new URI(fileRefAttribute);
182                    String path = uri.getPath();
183                    filename = path.substring(path.lastIndexOf("/") + 1);
184                    
185                    try (CloseableHttpClient httpclient = HttpClientBuilder.create()
186                                                                           .setDefaultRequestConfig(requestConfig)
187                                                                           .useSystemProperties()
188                                                                           .build())
189                    {
190                        HttpGet httpGet = new HttpGet(fileRefAttribute);
191                        try (CloseableHttpResponse httpResponse = httpclient.execute(httpGet))
192                        {
193                            int statusCode = httpResponse.getStatusLine().getStatusCode();
194                            if (statusCode != 200)
195                            {
196                                _logger.warn("Can't import file with url '" + fileRefAttribute + "' in the imported rich text. Status code is: " + statusCode);
197                            }
198                            else
199                            {
200                                HttpEntity entity = httpResponse.getEntity();
201                                try (InputStream is = entity.getContent())
202                                {
203                                    if (is == null)
204                                    {
205                                        _logger.warn("The attachment named '" + filename + "' of the imported rich text is empty");
206                                    }
207                                    else
208                                    {
209                                        attachment.setInputStream(is);
210                                    }
211                                    
212                                    String mimeType = Optional.ofNullable(ContentType.get(entity))
213                                            .map(ContentType::getMimeType)
214                                            .filter(StringUtils::isNotEmpty)
215                                            .orElse(_cocoonContext.getMimeType(filename.toLowerCase()));
216
217                                    attachment.setMimeType(mimeType);
218                                    attachment.setFilename(filename);
219                                    
220                                    _richText.addAttachment(attachment);
221                                }
222                            }
223                        }
224                    }
225                }
226                catch (URISyntaxException | IOException e)
227                {
228                    throw new SAXException("Unable to process the attachment '" + fileRefAttribute + "'. An error occured while setting its content", e);
229                }
230            }
231            else
232            {
233                Matcher uriMatcher = __LOCAL_ATTACHMENT_URI_VALIDATOR.matcher(fileRefAttribute);
234                if (uriMatcher.matches())
235                {
236                    filename = uriMatcher.group(1);
237                    if (_files.containsKey(filename))
238                    {
239                        try
240                        {
241                            NamedResource attachment = new NamedResource();
242                            String mimeType = _cocoonContext.getMimeType(filename.toLowerCase());
243                            attachment.setMimeType(mimeType);
244                            attachment.setFilename(filename);
245                            attachment.setInputStream(_files.get(filename));
246                            _richText.addAttachment(attachment);
247                        }
248                        catch (IOException e)
249                        {
250                            throw new SAXException("Unable to process the attachment '" + filename + "'. An error occured while setting its content", e);
251                        }
252                    }
253                    else
254                    {
255                        _logger.warn("The file named '" + filename + "' is not an attachment of the imported rich text");
256                    }
257                }
258                else
259                {
260                    // No URL format matches
261                    _logger.warn("Can't import file with url '" + fileRefAttribute + "' in the imported rich text. URL format is not valid.");
262                }
263            }
264            
265            AttributesImpl newAttrs = new AttributesImpl();
266            _copyAttributes(attrs, newAttrs);
267            newAttrs.addCDATAAttribute("fileref", filename);
268            return newAttrs;
269        }
270        
271        /**
272         * Copy the attributes except the fileref attribute
273         * @param attrs the attributes to copy.
274         * @param newAttrs the attributes to copy to.
275         */
276        private void _copyAttributes(Attributes attrs, AttributesImpl newAttrs)
277        {
278            for (int i = 0; i < attrs.getLength(); i++)
279            {
280                String name = attrs.getQName(i);
281
282                if (!"fileref".equals(name))
283                {
284                    newAttrs.addAttribute(attrs.getURI(i), attrs.getLocalName(i), name, attrs.getType(i), attrs.getValue(i));
285                }
286            }
287        }
288
289        private void _processAnnotation(Attributes attrs)
290        {
291            _isCurrentlyInAnnotation = true;
292            _currentAnnotationName = attrs.getValue(__ANNOTATION_NAME_ATTRIBUTE_NAME);
293            _currentAnnotationValue = new StringBuilder();
294            _cptrElementsInsideCurrentAnnotation = 0;
295        }
296
297        @Override
298        public void characters(char[] ch, int start, int length) throws SAXException
299        {
300            if (_isCurrentlyInAnnotation)
301            {
302                _currentAnnotationValue.append(ch, start, length);
303            }
304            
305            super.characters(ch, start, length);
306        }
307
308        @Override
309        public void endElement(String uri, String loc, String raw) throws SAXException
310        {
311            if (_isCurrentlyInAnnotation)
312            {
313                if (_cptrElementsInsideCurrentAnnotation == 0)
314                {
315                    // When the semantic annotation is fully saxed, add it to the rich text
316                    _richText.addAnnotations(_currentAnnotationName, _currentAnnotationValue.toString());
317                    _isCurrentlyInAnnotation = false;
318                }
319                else
320                {
321                    _cptrElementsInsideCurrentAnnotation--;
322                }
323            }
324            
325            super.endElement(uri, loc, raw);
326        }
327    }
328}