001/*
002 *  Copyright 2020 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016
017package org.ametys.cms.data;
018
019import java.io.IOException;
020import java.io.InputStream;
021import java.net.URL;
022import java.util.Map;
023import java.util.Optional;
024
025import org.apache.avalon.framework.component.Component;
026import org.apache.avalon.framework.context.ContextException;
027import org.apache.avalon.framework.context.Contextualizable;
028import org.apache.cocoon.Constants;
029import org.apache.cocoon.environment.Context;
030import org.apache.cocoon.xml.AttributesImpl;
031import org.apache.commons.lang3.StringUtils;
032import org.apache.excalibur.xml.sax.ContentHandlerProxy;
033import org.apache.http.HttpEntity;
034import org.apache.http.client.config.RequestConfig;
035import org.apache.http.client.methods.CloseableHttpResponse;
036import org.apache.http.client.methods.HttpGet;
037import org.apache.http.entity.ContentType;
038import org.apache.http.impl.client.CloseableHttpClient;
039import org.apache.http.impl.client.HttpClientBuilder;
040import org.slf4j.Logger;
041import org.xml.sax.Attributes;
042import org.xml.sax.ContentHandler;
043import org.xml.sax.SAXException;
044
045import org.ametys.cms.repository.comment.CommentsDAO;
046import org.ametys.runtime.plugin.component.AbstractLogEnabled;
047
048/**
049 * Factory for the transformer that imports a rich text from docbook.
050 */
051public class RichTextImportHandlerFactory extends AbstractLogEnabled implements Component, Contextualizable
052{
053    /** Avalon role. */
054    public static final String ROLE = RichTextImportHandlerFactory.class.getName();
055    private Context _cocoonContext;
056    
057    public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException
058    {
059        _cocoonContext = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT);
060    }
061
062    /**
063     * Creates a handler proxy to import the rich text
064     * @param contentHandler the contentHandler to pass SAX events to
065     * @param richText the rich text
066     * @param files the attachments of this rich text
067     * @return the created handler
068     */
069    public RichTextImportHandler createHandlerProxy(ContentHandler contentHandler, RichText richText, Map<String, InputStream> files)
070    {
071        return new RichTextImportHandler(contentHandler, richText, files, getLogger());
072    }
073    
074    /**
075     * This transformer imports the rich text from docbook.
076     */
077    public class RichTextImportHandler extends ContentHandlerProxy
078    {
079        private static final String __ATTACHMENT_IMAGE_TAG_NAME = "imagedata";
080        private static final String __ATTACHMENT_VIDEO_TAG_NAME = "videodata";
081        private static final String __ATTACHMENT_AUDIO_TAG_NAME = "audiodata";
082        private static final String __ATTACHMENT_TYPE_ATTRIBUTE_NAME = "type";
083        private static final String __ATTACHMENT_TYPE_ATTRIBUTE_LOCAL_VALUE = "local";
084
085        private static final String __ANNOTATION_TAG_NAME = "phrase";
086        private static final String __ANNOTATION_NAME_ATTRIBUTE_NAME = "role";
087        private static final String __ANNOTATION_CLASS_ATTRIBUTE_NAME = "class";
088        private static final String __ANNOTATION_CLASS_ATTRIBUTE_VALUE = "semantic";
089
090
091        private RichText _richText;
092        private Map<String, InputStream> _files;
093        private Logger _logger;
094
095        private boolean _isCurrentlyInAnnotation;
096        private String _currentAnnotationName;
097        private StringBuilder _currentAnnotationValue;
098        private int _cptrElementsInsideCurrentAnnotation;
099        
100        /**
101         * Creates a handler proxy to import a rich text
102         * @param contentHandler the contentHandler to pass SAX events to
103         * @param richText the rich text
104         * @param files the attachments of this rich text
105         * @param logger the logger
106         */
107        public RichTextImportHandler(ContentHandler contentHandler, RichText richText, Map<String, InputStream> files, Logger logger)
108        {
109            super(contentHandler);
110            _richText = richText;
111            _files = files;
112            _logger = logger;
113        } 
114
115        @Override
116        public void startDocument() throws SAXException
117        {
118            // Remove all existing attachments from the rich text.
119            _richText.removeAttachments();
120
121            // Remove all existing annotations from the rich text.
122            _richText.removeAllAnnotations();
123
124            super.startDocument();
125        }
126
127        @Override
128        public void startElement(String uri, String loc, String raw, Attributes attrs) throws SAXException
129        {
130            // A new attachment starts being saxed
131            boolean isAttachment = _isAttachment(loc);
132            String type = attrs.getValue(__ATTACHMENT_TYPE_ATTRIBUTE_NAME);
133            Attributes newAttrs = attrs;
134            if (isAttachment && __ATTACHMENT_TYPE_ATTRIBUTE_LOCAL_VALUE.equals(type))
135            {
136                newAttrs = _processAttachment(attrs);
137            }
138
139            // A new semantic annotation starts being saxed
140            String clazz = attrs.getValue(__ANNOTATION_CLASS_ATTRIBUTE_NAME);
141            String annotationName = attrs.getValue(__ANNOTATION_NAME_ATTRIBUTE_NAME);
142            if (__ANNOTATION_TAG_NAME.equals(loc) && __ANNOTATION_CLASS_ATTRIBUTE_VALUE.equals(clazz) && annotationName != null) 
143            {
144                _processAnnotation(attrs);
145            }
146            else if (_isCurrentlyInAnnotation)
147            {
148                // A new element is being SAXed inside the current annotation
149                _cptrElementsInsideCurrentAnnotation++;
150            }
151
152            super.startElement(uri, loc, raw, newAttrs);
153        }
154        
155        private boolean _isAttachment(String loc)
156        {
157            return __ATTACHMENT_IMAGE_TAG_NAME.equals(loc) || __ATTACHMENT_VIDEO_TAG_NAME.equals(loc) || __ATTACHMENT_AUDIO_TAG_NAME.equals(loc);
158        }
159
160        private Attributes _processAttachment(Attributes attrs) throws SAXException
161        {
162            String fileRefAttribute = attrs.getValue("fileref");
163            String filename;
164            
165            if (CommentsDAO.URL_VALIDATOR.matcher(fileRefAttribute).matches())
166            {
167                try
168                {
169                    NamedResource attachment = new NamedResource();
170    
171                    RequestConfig requestConfig = RequestConfig.custom()
172                            .setConnectTimeout(2000)
173                            .setSocketTimeout(2000)
174                            .build();
175                    
176                    URL url = new URL(fileRefAttribute);
177                    String path = url.getPath();
178                    filename = path.substring(path.lastIndexOf("/") + 1);
179                    
180                    try (CloseableHttpClient httpclient = HttpClientBuilder.create()
181                                                                           .setDefaultRequestConfig(requestConfig)
182                                                                           .useSystemProperties()
183                                                                           .build())
184                    {
185                        HttpGet httpGet = new HttpGet(fileRefAttribute);
186                        try (CloseableHttpResponse httpResponse = httpclient.execute(httpGet))
187                        {
188                            if (httpResponse.getStatusLine().getStatusCode() != 200)
189                            {
190                                _logger.warn("Can't import file with url '" + fileRefAttribute + "' in the imported rich text");
191                            }
192                            else
193                            {
194                                HttpEntity entity = httpResponse.getEntity();
195                                try (InputStream is = entity.getContent())
196                                {
197                                    if (is == null)
198                                    {
199                                        _logger.warn("The attachment named '" + filename + "' of the imported rich text is empty");
200                                    }
201                                    else
202                                    {
203                                        attachment.setInputStream(is);
204                                    }
205                                    
206                                    String mimeType = Optional.ofNullable(ContentType.get(entity))
207                                            .map(ContentType::getMimeType)
208                                            .filter(StringUtils::isNotEmpty)
209                                            .orElse(_cocoonContext.getMimeType(filename.toLowerCase()));
210
211                                    attachment.setMimeType(mimeType);
212                                    attachment.setFilename(filename);
213                                    
214                                    _richText.addAttachment(attachment);
215                                }
216                            }
217                        }
218                    }
219                }
220                catch (IOException e)
221                {
222                    throw new SAXException("Unable to process the attachment '" + fileRefAttribute + "'. An error occured while setting its content", e);
223                }
224            }
225            else
226            {
227                // file reference is of the form ownerId@dataName;fileName
228                int indexOfFilenameSeparator = fileRefAttribute.lastIndexOf(';');
229                filename = fileRefAttribute.substring(indexOfFilenameSeparator + 1);
230    
231                if (indexOfFilenameSeparator == -1)
232                {
233                    throw new IllegalArgumentException("A local image should have a file reference of the form <protocol>://<protocol-specific-part>;<filename> : " + fileRefAttribute);
234                }
235    
236                if (_files.containsKey(filename))
237                {
238                    try
239                    {
240                        NamedResource attachment = new NamedResource();
241                        String mimeType = _cocoonContext.getMimeType(filename.toLowerCase());
242                        attachment.setMimeType(mimeType);
243                        attachment.setFilename(filename);
244                        attachment.setInputStream(_files.get(filename));
245                        _richText.addAttachment(attachment);
246                    }
247                    catch (IOException e)
248                    {
249                        throw new SAXException("Unable to process the attachment '" + filename + "'. An error occured while setting its content", e);
250                    }
251                }
252            }
253            
254            AttributesImpl newAttrs = new AttributesImpl();
255            _copyAttributes(attrs, newAttrs);
256            newAttrs.addCDATAAttribute("fileref", filename);
257            return newAttrs;
258        }
259        
260        /**
261         * Copy the attributes except the fileref attribute
262         * @param attrs the attributes to copy.
263         * @param newAttrs the attributes to copy to.
264         */
265        private void _copyAttributes(Attributes attrs, AttributesImpl newAttrs)
266        {
267            for (int i = 0; i < attrs.getLength(); i++)
268            {
269                String name = attrs.getQName(i);
270
271                if (!"fileref".equals(name))
272                {
273                    newAttrs.addAttribute(attrs.getURI(i), attrs.getLocalName(i), name, attrs.getType(i), attrs.getValue(i));
274                }
275            }
276        }
277
278        private void _processAnnotation(Attributes attrs)
279        {
280            _isCurrentlyInAnnotation = true;
281            _currentAnnotationName = attrs.getValue(__ANNOTATION_NAME_ATTRIBUTE_NAME);
282            _currentAnnotationValue = new StringBuilder();
283            _cptrElementsInsideCurrentAnnotation = 0;
284        }
285
286        @Override
287        public void characters(char[] ch, int start, int length) throws SAXException
288        {
289            if (_isCurrentlyInAnnotation)
290            {
291                _currentAnnotationValue.append(ch, start, length);
292            }
293            
294            super.characters(ch, start, length);
295        }
296
297        @Override
298        public void endElement(String uri, String loc, String raw) throws SAXException
299        {    
300            if (_isCurrentlyInAnnotation)
301            {
302                if (_cptrElementsInsideCurrentAnnotation == 0)
303                {                
304                    // When the semantic annotation is fully saxed, add it to the rich text
305                    _richText.addAnnotations(_currentAnnotationName, _currentAnnotationValue.toString());
306                    _isCurrentlyInAnnotation = false;
307                }
308                else 
309                {
310                    _cptrElementsInsideCurrentAnnotation--;                
311                }
312            }
313            
314            super.endElement(uri, loc, raw);
315        }
316    }
317}