001/*
002 *  Copyright 2010 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.transformation.htmledition;
017
018import java.awt.image.BufferedImage;
019import java.io.ByteArrayInputStream;
020import java.io.IOException;
021import java.io.InputStream;
022import java.net.HttpURLConnection;
023import java.net.MalformedURLException;
024import java.net.URI;
025import java.net.URISyntaxException;
026import java.net.URL;
027import java.time.ZoneOffset;
028import java.time.ZonedDateTime;
029import java.util.HashSet;
030import java.util.Map;
031import java.util.Set;
032import java.util.regex.Matcher;
033import java.util.regex.Pattern;
034import java.util.stream.Collectors;
035
036import org.apache.avalon.framework.context.ContextException;
037import org.apache.avalon.framework.service.ServiceException;
038import org.apache.avalon.framework.service.ServiceManager;
039import org.apache.cocoon.Constants;
040import org.apache.cocoon.components.ContextHelper;
041import org.apache.cocoon.environment.Context;
042import org.apache.cocoon.environment.ObjectModelHelper;
043import org.apache.cocoon.environment.Request;
044import org.apache.cocoon.xml.AttributesImpl;
045import org.apache.commons.io.IOUtils;
046import org.apache.commons.io.output.ByteArrayOutputStream;
047import org.apache.excalibur.source.Source;
048import org.apache.excalibur.source.SourceResolver;
049import org.xml.sax.Attributes;
050import org.xml.sax.SAXException;
051
052import org.ametys.cms.data.NamedResource;
053import org.ametys.cms.data.RichText;
054import org.ametys.core.upload.Upload;
055import org.ametys.core.upload.UploadManager;
056import org.ametys.core.user.CurrentUserProvider;
057import org.ametys.core.util.ImageHelper;
058import org.ametys.plugins.explorer.resources.Resource;
059import org.ametys.plugins.repository.AmetysObjectResolver;
060import org.ametys.plugins.repository.AmetysRepositoryException;
061import org.ametys.plugins.repository.UnknownAmetysObjectException;
062
063/**
064 * This transformer extracts uploaded files' ids from the incoming HTML for further processing.
065 */
066public class UploadedDataHTMLEditionHandler extends AbstractHTMLEditionHandler
067{
068    private static final Pattern __INLINE_IMAGE_MARKER = Pattern.compile("^data:image/(png|jpeg|gif);base64,.*");
069    
070    private UploadManager _uploadManager;
071    private CurrentUserProvider _userProvider;
072    private SourceResolver _resolver;
073    private AmetysObjectResolver _ametysResolver;
074    private Context _cocoonContext;
075    
076    private boolean _tagToIgnore;
077    private Set<String> _usedLocalFiles = new HashSet<>();
078    private RichText _richText;
079    private Map _objectModel;
080
081
082    @Override
083    public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException
084    {
085        super.contextualize(context);
086        _cocoonContext = (Context) _context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT);
087    }
088    
089    @Override
090    public void service(ServiceManager sManager) throws ServiceException
091    {
092        super.service(sManager);
093        _uploadManager = (UploadManager) sManager.lookup(UploadManager.ROLE);
094        _userProvider = (CurrentUserProvider) sManager.lookup(CurrentUserProvider.ROLE);
095        _resolver = (SourceResolver) sManager.lookup(SourceResolver.ROLE);
096        _ametysResolver = (AmetysObjectResolver) sManager.lookup(AmetysObjectResolver.ROLE);
097    }
098    
099    @Override
100    public void startDocument() throws SAXException
101    {
102        _tagToIgnore = false;
103        _objectModel = ContextHelper.getObjectModel(_context);
104        Map parentContextParameters = (Map) _objectModel.get(ObjectModelHelper.PARENT_CONTEXT);
105        _richText = (RichText) parentContextParameters.get("richText");
106        
107        super.startDocument();
108    }
109    
110    @Override
111    public void startElement(String uri, String loc, String raw, Attributes attrs) throws SAXException
112    {
113        if ("img".equals(raw))
114        {
115            String type = attrs.getValue("data-ametys-type");
116            
117            if ("temp".equals(type))
118            {
119                Attributes newAttrs = _getAttributesForTemp(attrs);
120                super.startElement(uri, loc, raw, newAttrs);
121                return;
122            }
123            else if ("explorer".equals(type))
124            {
125                Attributes newAttrs = _processResource(attrs);
126                super.startElement(uri, loc, raw, newAttrs);
127                return;
128            }
129            else if ("local".equals(type))
130            {
131                Attributes newAttrs = _processLocal(attrs);
132                super.startElement(uri, loc, raw, newAttrs);
133                return;
134            }
135            else if (type == null && !"marker".equals(attrs.getValue("marker")))
136            {
137                // image is copied from elsewhere, fetch it in the content
138                String src = attrs.getValue("src");
139                if (src == null)
140                {
141                    _tagToIgnore = true;
142                    getLogger().warn("Don't know how to fetch image with no src attribute. Image is ignored.");
143                    return;
144                }
145                
146                // The final filename
147                String fileName = null;
148                // The new attributes, will be filled with image width and height.
149                AttributesImpl newAttrs = new AttributesImpl();
150
151                Matcher m = __INLINE_IMAGE_MARKER.matcher(src);
152                if (m.matches())
153                {
154                    String mimetype = m.group(1);
155                    String imageAsBase64 = src.substring(19 + mimetype.length());
156                    byte[] imageAsBytes = org.apache.commons.codec.binary.Base64.decodeBase64(imageAsBase64);
157                    fileName = _storeFile("paste." + mimetype, new ByteArrayInputStream(imageAsBytes), null, null);
158                    
159                    try (InputStream is = new ByteArrayInputStream(imageAsBytes))
160                    {
161                        _addDimensionAttributes(is, newAttrs);
162                    }
163                    catch (IOException e)
164                    {
165                        // Ignore
166                    }
167                }
168                else
169                {
170                    
171                    String initialFileName = _getInitialFileName(src);
172                    
173                    if (src.startsWith("/"))
174                    {
175                        try
176                        {
177                            fileName = _handleInternalFile(src, newAttrs, initialFileName);
178                        }
179                        catch (Exception e)
180                        {
181                            // unable to fetch image, do not keep the img tag
182                            _tagToIgnore = true;
183                            getLogger().warn("Unable to fetch internal image from URL '" + src + "'. Image is ignored.", e);
184                            return;
185                        }
186                    }
187                    else if (src.startsWith("http://") || src.startsWith("https://"))
188                    {
189                        try
190                        {
191                            fileName = _handleRemoteFile(src, newAttrs, initialFileName);
192                        }
193                        catch (Exception e)
194                        {
195                            // unable to fetch image, do not keep the img tag
196                            _tagToIgnore = true;
197                            getLogger().warn("Unable to fetch external image from URL '" + src + "'. Image is ignored.", e);
198                            return;
199                        }
200                    }
201                    else
202                    {
203                        _tagToIgnore = true;
204                        getLogger().warn("Don't know how to fetch image at '" + src + "'. Image is ignored.");
205                        return;
206                    }
207                }
208                    
209                _copyAttributes(attrs, newAttrs);
210                
211                newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", fileName);
212                newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local");
213                
214                super.startElement(uri, loc, raw, newAttrs);
215                return;
216            }
217        }
218        
219        super.startElement(uri, loc, raw, attrs);
220    }
221    
222    private String _getInitialFileName(String src)
223    {
224        int j = src.lastIndexOf('/');
225        int k = src.indexOf('?', j);
226        String initialFileName;
227        
228        if (k == -1)
229        {
230            initialFileName = src.substring(j + 1);
231        }
232        else
233        {
234            initialFileName = src.substring(j + 1, k);
235        }
236        
237        // FIXME CMS-3090 An uploaded image can not contain '_max' or '_crop', replace it by '_Max', '_Crop'
238        return initialFileName.replaceAll("_max", "_Max").replaceAll("_crop", "_Crop");
239    }
240
241    private String _handleInternalFile(String src, AttributesImpl newAttrs, String initialFileName) throws MalformedURLException, IOException, URISyntaxException
242    {
243        // it may be an internal URL
244        Request request = ContextHelper.getRequest(_context);
245        String contextPath = request.getContextPath();
246        Source source = null;
247        
248        try
249        {
250            String modifiedSrc = src;
251            
252            if (src.startsWith(contextPath))
253            {
254                // it is an Ametys URL
255                // first decode it
256                modifiedSrc = new URI(modifiedSrc).getPath();
257                
258                modifiedSrc = "cocoon:/" + src.substring(contextPath.length());
259            }
260            else
261            {
262                StringBuilder sb = _getRequestURI(request);
263                
264                modifiedSrc = sb.toString() + modifiedSrc;
265            }
266            
267            source = _resolver.resolveURI(src);
268            
269            try (ByteArrayOutputStream bos = new ByteArrayOutputStream())
270            {
271                try (InputStream is = source.getInputStream())
272                {
273                    IOUtils.copy(is, bos);
274                }
275                
276                String fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null);
277                
278                try (InputStream is = new ByteArrayInputStream(bos.toByteArray()))
279                {
280                    _addDimensionAttributes(is, newAttrs);
281                }
282                
283                return fileName;
284            }
285        }
286        finally
287        {
288            if (source != null)
289            {
290                _resolver.release(source);
291            }
292        }
293
294    }
295    
296    private String _handleRemoteFile(String src, AttributesImpl newAttrs, String initialFileName) throws URISyntaxException, MalformedURLException, IOException
297    {
298        String fileName;
299        URL url = new URI(src).toURL();
300        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
301        connection.setRequestProperty("User-Agent", "Mozilla"); // some servers only answer to browsers ...
302        connection.setConnectTimeout(1000);
303        connection.setReadTimeout(2000);
304        
305        try (ByteArrayOutputStream bos = new ByteArrayOutputStream())
306        {
307            try (InputStream is = connection.getInputStream())
308            {
309                IOUtils.copy(is, bos);
310            }
311            
312            fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null);
313            
314            try (InputStream is = new ByteArrayInputStream(bos.toByteArray()))
315            {
316                _addDimensionAttributes(is, newAttrs);
317            }
318        }
319        return fileName;
320    }
321    
322    /**
323     * Copy the attributes.
324     * @param attrs the attributes to copy.
325     * @param newAttrs the attributes to copy to.
326     */
327    private void _copyAttributes(Attributes attrs, AttributesImpl newAttrs)
328    {
329        for (int i = 0; i < attrs.getLength(); i++)
330        {
331            String name = attrs.getQName(i);
332            
333            if (!"data-ametys-src".equals(name) && !"data-ametys-type".equals(name))
334            {
335                newAttrs.addAttribute(attrs.getURI(i), attrs.getLocalName(i), name, attrs.getType(i), attrs.getValue(i));
336            }
337        }
338    }
339
340    /**
341     * Get the cms uri
342     * @param request The request
343     * @return the uri without context path
344     */
345    private StringBuilder _getRequestURI(Request request)
346    {
347        StringBuilder sb = new StringBuilder();
348        sb.append(request.getScheme());
349        sb.append("://");
350        sb.append(request.getServerName());
351        
352        if (request.isSecure())
353        {
354            if (request.getServerPort() != 443)
355            {
356                sb.append(":");
357                sb.append(request.getServerPort());
358            }
359        }
360        else
361        {
362            if (request.getServerPort() != 80)
363            {
364                sb.append(":");
365                sb.append(request.getServerPort());
366            }
367        }
368        return sb;
369    }
370
371    private Attributes _getAttributesForTemp(Attributes attrs)
372    {
373        // data has just been uploaded, must change the value, and store the id for further processing
374        String id = attrs.getValue("data-ametys-temp-src");
375        
376        Upload upload = _uploadManager.getUpload(_userProvider.getUser(), id);
377        
378        String initialFileName = upload.getFilename();
379        // FIXME CMS-3090 An uploaded image can not contain '_max', replace it by '_Max'
380        initialFileName = initialFileName.replaceAll("_max", "_Max").replaceAll("_crop", "_Crop");
381        String fileName = _storeFile(initialFileName, upload.getInputStream(), upload.getMimeType(), upload.getUploadedDate());
382        
383        AttributesImpl newAttrs = new AttributesImpl();
384        
385        _copyAttributes(attrs, newAttrs);
386        
387        if (!"marker".equals(attrs.getValue("marker")))
388        {
389            try (InputStream is = upload.getInputStream())
390            {
391                _addDimensionAttributes(is, newAttrs);
392            }
393            catch (IOException e)
394            {
395                // Ignore
396            }
397        }
398        
399        newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", fileName);
400        newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local");
401        
402        return newAttrs;
403    }
404    
405    /**
406     * Store a file as rich text data.
407     * @param initialFileName the initial file name.
408     * @param is an input stream on the file.
409     * @param mimeType the file mime type.
410     * @param lastModified the last modification date.
411     * @return the final file name.
412     */
413    protected String _storeFile(String initialFileName, InputStream is, String mimeType, ZonedDateTime lastModified)
414    {
415        String fileName = initialFileName;
416        int count = 2;
417        
418        while (_richText.hasAttachment(fileName))
419        {
420            int i = initialFileName.lastIndexOf('.');
421            fileName = i == -1 ? initialFileName + '-' + (count++) : initialFileName.substring(0, i) + '-' + (count++) + initialFileName.substring(i);
422        }
423    
424        NamedResource resource = new NamedResource();
425
426        String finalMimeType = mimeType != null ? mimeType : _cocoonContext.getMimeType(fileName.toLowerCase());
427        resource.setMimeType(finalMimeType != null ? finalMimeType : "application/unknown");
428        
429        resource.setFilename(fileName);
430        resource.setLastModificationDate(lastModified != null ? lastModified : ZonedDateTime.now(ZoneOffset.UTC));
431        
432        try
433        {
434            resource.setInputStream(is);
435        }
436        catch (IOException e)
437        {
438            throw new AmetysRepositoryException("Unable to save attachment " + initialFileName, e);
439        }
440    
441        _richText.addAttachment(resource);
442        
443        // store the file usage, so that it won't be deleted immediately
444        _usedLocalFiles.add(fileName);
445        
446        return fileName;
447    }
448    
449    /**
450     * Process a local file.
451     * @param attrs the img tag attributes.
452     * @return the new img tag attributes.
453     */
454    protected Attributes _processLocal(Attributes attrs)
455    {
456        // src contains the fileName
457        String filename = attrs.getValue("data-ametys-src");
458        _usedLocalFiles.add(filename);
459        
460        AttributesImpl newAttrs = new AttributesImpl(attrs);
461        if (!"marker".equals(attrs.getValue("marker")))
462        {
463            NamedResource file = _richText.getAttachment(filename);
464            if (file != null)
465            {
466                try (InputStream is = file.getInputStream())
467                {
468                    _addDimensionAttributes(is, newAttrs);
469                }
470                catch (IOException e)
471                {
472                    // Ignore
473                }
474            }
475        }
476        
477        return newAttrs;
478    }
479
480    /**
481     * Process a resource.
482     * @param attrs the img tag attributes.
483     * @return the new img tag attributes.
484     */
485    protected Attributes _processResource(Attributes attrs)
486    {
487        String ametys_src = attrs.getValue("data-ametys-src");
488        
489        Resource resource = null;
490        try
491        {
492            resource = _ametysResolver.resolveById(ametys_src);
493        }
494        catch (UnknownAmetysObjectException ex)
495        {
496            getLogger().warn("Link to unexisting resource image " + ametys_src, ex);
497            return attrs;
498        }
499        
500        AttributesImpl newAttrs = new AttributesImpl(attrs);
501        if (!"marker".equals(attrs.getValue("marker")))
502        {
503            try (InputStream is = resource.getInputStream())
504            {
505                _addDimensionAttributes(is, newAttrs);
506            }
507            catch (IOException e)
508            {
509                // Ignore
510            }
511        }
512        
513        return newAttrs;
514    }
515
516    /**
517     * Add an image's width and height to the XML attributes.
518     * @param inputStream an input stream on the image.
519     * @param attrs the attributes to fill.
520     * @throws IOException if an error occurs during reading dimension
521     */
522    protected void _addDimensionAttributes(InputStream inputStream, AttributesImpl attrs) throws IOException
523    {
524        // We need to call Thumbnail to get image dimension with EXIF orientation tag
525        BufferedImage img = ImageHelper.read(inputStream);
526        if (img != null && attrs.getValue("width") == null)
527        {
528            attrs.addCDATAAttribute("width", Integer.toString(img.getWidth()));
529        }
530        if (img != null && attrs.getValue("height") == null)
531        {
532            attrs.addCDATAAttribute("height", Integer.toString(img.getHeight()));
533        }
534    }
535        
536    @Override
537    public void endElement(String uri, String loc, String raw) throws SAXException
538    {
539        if ("img".equals(raw) && _tagToIgnore)
540        {
541            // ignore img tag
542            _tagToIgnore = false;
543            return;
544        }
545        
546        super.endElement(uri, loc, raw);
547    }
548    
549    @Override
550    public void endDocument() throws SAXException
551    {
552        // Look for unused files
553        Set<String> unusedLocalFiles = _richText.getAttachmentNames()
554                                                .stream()
555                                                .filter(fileName -> !_usedLocalFiles.contains(fileName))
556                                                .collect(Collectors.toSet());
557        // Remove unused files        
558        unusedLocalFiles.stream()
559                        .forEach(_richText::removeAttachment);
560        
561        super.endDocument();
562    }
563}