001/*
002 *  Copyright 2010 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.transformation.htmledition;
017
018import java.awt.image.BufferedImage;
019import java.io.ByteArrayInputStream;
020import java.io.IOException;
021import java.io.InputStream;
022import java.net.HttpURLConnection;
023import java.net.MalformedURLException;
024import java.net.URI;
025import java.net.URISyntaxException;
026import java.net.URL;
027import java.time.ZoneOffset;
028import java.time.ZonedDateTime;
029import java.util.Date;
030import java.util.HashSet;
031import java.util.Map;
032import java.util.Optional;
033import java.util.Set;
034import java.util.regex.Matcher;
035import java.util.regex.Pattern;
036import java.util.stream.Collectors;
037
038import org.apache.avalon.framework.context.ContextException;
039import org.apache.avalon.framework.service.ServiceException;
040import org.apache.avalon.framework.service.ServiceManager;
041import org.apache.cocoon.Constants;
042import org.apache.cocoon.components.ContextHelper;
043import org.apache.cocoon.environment.Context;
044import org.apache.cocoon.environment.ObjectModelHelper;
045import org.apache.cocoon.environment.Request;
046import org.apache.cocoon.xml.AttributesImpl;
047import org.apache.commons.io.IOUtils;
048import org.apache.commons.io.output.ByteArrayOutputStream;
049import org.apache.excalibur.source.Source;
050import org.apache.excalibur.source.SourceResolver;
051import org.xml.sax.Attributes;
052import org.xml.sax.SAXException;
053
054import org.ametys.cms.data.NamedResource;
055import org.ametys.cms.data.RichText;
056import org.ametys.cms.repository.Content;
057import org.ametys.core.upload.Upload;
058import org.ametys.core.upload.UploadManager;
059import org.ametys.core.user.CurrentUserProvider;
060import org.ametys.core.util.DateUtils;
061import org.ametys.core.util.ImageHelper;
062import org.ametys.core.util.StringUtils;
063import org.ametys.plugins.explorer.resources.Resource;
064import org.ametys.plugins.repository.AmetysObjectResolver;
065import org.ametys.plugins.repository.AmetysRepositoryException;
066import org.ametys.plugins.repository.UnknownAmetysObjectException;
067import org.ametys.plugins.repository.metadata.CompositeMetadata;
068import org.ametys.plugins.repository.metadata.File;
069import org.ametys.plugins.repository.metadata.Folder;
070import org.ametys.plugins.repository.metadata.ModifiableFile;
071import org.ametys.plugins.repository.metadata.ModifiableFolder;
072import org.ametys.plugins.repository.metadata.ModifiableResource;
073import org.ametys.plugins.repository.metadata.ModifiableRichText;
074
075/**
076 * This transformer extracts uploaded files' ids from the incoming HTML for further processing.
077 */
078public class UploadedDataHTMLEditionHandler extends AbstractHTMLEditionHandler
079{
080    private static final Pattern __INLINE_IMAGE_MARKER = Pattern.compile("^data:image/(png|jpeg|gif);base64,.*");
081    
082    private UploadManager _uploadManager;
083    private CurrentUserProvider _userProvider;
084    private SourceResolver _resolver;
085    private AmetysObjectResolver _ametysResolver;
086    private Context _cocoonContext;
087    
088    private boolean _tagToIgnore;
089    private Set<String> _usedLocalFiles = new HashSet<>();
090    private Object _richText;
091    private Map _objectModel;
092
093
094    @Override
095    public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException
096    {
097        super.contextualize(context);
098        _cocoonContext = (Context) _context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT);
099    }
100    
101    @Override
102    public void service(ServiceManager sManager) throws ServiceException
103    {
104        super.service(sManager);
105        _uploadManager = (UploadManager) sManager.lookup(UploadManager.ROLE);
106        _userProvider = (CurrentUserProvider) sManager.lookup(CurrentUserProvider.ROLE);
107        _resolver = (SourceResolver) sManager.lookup(SourceResolver.ROLE);
108        _ametysResolver = (AmetysObjectResolver) sManager.lookup(AmetysObjectResolver.ROLE);
109    }
110    
111    @Override
112    public void startDocument() throws SAXException
113    {
114        _tagToIgnore = false;
115        _objectModel = ContextHelper.getObjectModel(_context);
116        Map parentContextParameters = (Map) _objectModel.get(ObjectModelHelper.PARENT_CONTEXT);
117        _richText = parentContextParameters.get("richText");
118        
119        super.startDocument();
120    }
121    
122    @Override
123    public void startElement(String uri, String loc, String raw, Attributes attrs) throws SAXException
124    {
125        if ("img".equals(raw))
126        {
127            String type = attrs.getValue("data-ametys-type");
128            
129            if ("temp".equals(type))
130            {
131                Attributes newAttrs = _getAttributesForTemp(attrs);
132                super.startElement(uri, loc, raw, newAttrs);
133                return;
134            }
135            else if ("explorer".equals(type))
136            {
137                Attributes newAttrs = _processResource(attrs);
138                super.startElement(uri, loc, raw, newAttrs);
139                return;
140            }
141            else if ("local".equals(type))
142            {
143                Attributes newAttrs = _processLocal(attrs);
144                super.startElement(uri, loc, raw, newAttrs);
145                return;
146            }
147            else if (type == null && !"marker".equals(attrs.getValue("marker")))
148            {
149                // image is copied from elsewhere, fetch it in the content
150                String src = attrs.getValue("src");
151                if (src == null)
152                {
153                    _tagToIgnore = true;
154                    getLogger().warn("Don't know how to fetch image with no src attribute. Image is ignored.");
155                    return;
156                }
157                
158                String ametys_src = attrs.getValue("data-ametys-src");
159
160                // The final filename
161                String fileName = null;
162                // The new attributes, will be filled with image width and height.
163                AttributesImpl newAttrs = new AttributesImpl();
164
165                Matcher m = __INLINE_IMAGE_MARKER.matcher(src);
166                if (m.matches())
167                {
168                    String mimetype = m.group(1);
169                    String imageAsBase64 = src.substring(19 + mimetype.length());
170                    byte[] imageAsBytes = org.apache.commons.codec.binary.Base64.decodeBase64(imageAsBase64);
171                    
172                    String generateKey = StringUtils.generateKey();
173                    fileName = _storeFile("paste-" + generateKey + "." + mimetype, new ByteArrayInputStream(imageAsBytes), null, null);
174                    
175                    try (InputStream is = new ByteArrayInputStream(imageAsBytes))
176                    {
177                        _addDimensionAttributes(is, newAttrs);
178                    }
179                    catch (IOException e)
180                    {
181                        // Ignore
182                    }
183                }
184                else
185                {
186                    
187                    String initialFileName = _getInitialFileName(src);
188                    
189                    if (src.startsWith("/"))
190                    {
191                        try
192                        {
193                            fileName = _handleInternalFile(src, newAttrs, initialFileName);
194                        }
195                        catch (Exception e)
196                        {
197                            // unable to fetch image, do not keep the img tag
198                            _tagToIgnore = true;
199                            getLogger().warn("Unable to fetch internal image from URL '" + src + "'. Image is ignored.", e);
200                            return;
201                        }
202                    }
203                    else if (src.startsWith("http://") || src.startsWith("https://"))
204                    {
205                        try
206                        {
207                            fileName = _handleRemoteFile(src, newAttrs, initialFileName);
208                        }
209                        catch (Exception e)
210                        {
211                            // unable to fetch image, do not keep the img tag
212                            _tagToIgnore = true;
213                            getLogger().warn("Unable to fetch external image from URL '" + src + "'. Image is ignored.", e);
214                            return;
215                        }
216                    }
217                    else
218                    {
219                        _tagToIgnore = true;
220                        getLogger().warn("Don't know how to fetch image at '" + src + "'. Image is ignored.");
221                        return;
222                    }
223                }
224                    
225                _copyAttributes(attrs, newAttrs);
226                
227                newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", ametys_src.replaceAll("\\.", "/") + ";" + fileName);
228                newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local");
229                
230                super.startElement(uri, loc, raw, newAttrs);
231                return;
232            }
233        }
234        
235        super.startElement(uri, loc, raw, attrs);
236    }
237    
238    private String _getInitialFileName(String src)
239    {
240        int j = src.lastIndexOf('/');
241        int k = src.indexOf('?', j);
242        String initialFileName;
243        
244        if (k == -1)
245        {
246            initialFileName = src.substring(j + 1);
247        }
248        else
249        {
250            initialFileName = src.substring(j + 1, k);
251        }
252        
253        // FIXME CMS-3090 A uploaded image can not contain '_max' or '_crop', replace it by '_Max', '_Crop'
254        return initialFileName.replaceAll("_max", "_Max").replaceAll("_crop", "_Crop");
255    }
256
257    private String _handleInternalFile(String src, AttributesImpl newAttrs, String initialFileName) throws MalformedURLException, IOException, URISyntaxException
258    {
259        // it may be an internal URL
260        Request request = ContextHelper.getRequest(_context);
261        String contextPath = request.getContextPath();
262        Source source = null;
263        
264        try
265        {
266            String modifiedSrc = src;
267            
268            if (src.startsWith(contextPath))
269            {
270                // it is an Ametys URL
271                // first decode it
272                modifiedSrc = new URI(modifiedSrc).getPath();
273                
274                modifiedSrc = "cocoon:/" + src.substring(contextPath.length());
275            }
276            else
277            {
278                StringBuilder sb = _getRequestURI(request);
279                
280                modifiedSrc = sb.toString() + modifiedSrc;
281            }
282            
283            source = _resolver.resolveURI(src);
284            
285            try (ByteArrayOutputStream bos = new ByteArrayOutputStream())
286            {
287                try (InputStream is = source.getInputStream())
288                {
289                    IOUtils.copy(is, bos);
290                }
291                
292                String fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null);
293                
294                try (InputStream is = new ByteArrayInputStream(bos.toByteArray()))
295                {
296                    _addDimensionAttributes(is, newAttrs);
297                }
298                
299                return fileName;
300            }
301        }
302        finally
303        {
304            if (source != null)
305            {
306                _resolver.release(source);
307            }
308        }
309
310    }
311    
312    private String _handleRemoteFile(String src, AttributesImpl newAttrs, String initialFileName) throws MalformedURLException, IOException
313    {
314        String fileName;
315        URL url = new URL(src);
316        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
317        connection.setConnectTimeout(1000);
318        connection.setReadTimeout(2000);
319        
320        try (ByteArrayOutputStream bos = new ByteArrayOutputStream())
321        {
322            try (InputStream is = connection.getInputStream())
323            {
324                IOUtils.copy(is, bos);
325            }
326            
327            fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null);
328            
329            try (InputStream is = new ByteArrayInputStream(bos.toByteArray()))
330            {
331                _addDimensionAttributes(is, newAttrs);
332            }
333        }
334        return fileName;
335    }
336    
337    /**
338     * Copy the attributes.
339     * @param attrs the attributes to copy.
340     * @param newAttrs the attributes to copy to.
341     */
342    private void _copyAttributes(Attributes attrs, AttributesImpl newAttrs)
343    {
344        for (int i = 0; i < attrs.getLength(); i++)
345        {
346            String name = attrs.getQName(i);
347            
348            if (!"data-ametys-src".equals(name) && !"data-ametys-type".equals(name))
349            {
350                newAttrs.addAttribute(attrs.getURI(i), attrs.getLocalName(i), name, attrs.getType(i), attrs.getValue(i));
351            }
352        }
353    }
354
355    /**
356     * Get the cms uri
357     * @param request The request
358     * @return the uri without context path
359     */
360    private StringBuilder _getRequestURI(Request request)
361    {
362        StringBuilder sb = new StringBuilder();
363        sb.append(request.getScheme());
364        sb.append("://");
365        sb.append(request.getServerName());
366        
367        if (request.isSecure())
368        {
369            if (request.getServerPort() != 443)
370            {
371                sb.append(":");
372                sb.append(request.getServerPort());
373            }
374        }
375        else
376        {
377            if (request.getServerPort() != 80)
378            {
379                sb.append(":");
380                sb.append(request.getServerPort());
381            }
382        }
383        return sb;
384    }
385
386    private Attributes _getAttributesForTemp(Attributes attrs)
387    {
388        // data has just been uploaded, must change the value, and store the id for further processing
389        String id = attrs.getValue("data-ametys-temp-src");
390        String src = attrs.getValue("data-ametys-src");
391        
392        Upload upload = _uploadManager.getUpload(_userProvider.getUser(), id);
393        
394        String initialFileName = upload.getFilename();
395        // FIXME CMS-3090 A uploaded image can not contain '_max', replace it by '_Max'
396        initialFileName = initialFileName.replaceAll("_max", "_Max").replaceAll("_crop", "_Crop");
397        String fileName = _storeFile(initialFileName, upload.getInputStream(), upload.getMimeType(), upload.getUploadedDate());
398        
399        AttributesImpl newAttrs = new AttributesImpl();
400        
401        _copyAttributes(attrs, newAttrs);
402        
403        if (!"marker".equals(attrs.getValue("marker")))
404        {
405            try (InputStream is = upload.getInputStream())
406            {
407                _addDimensionAttributes(is, newAttrs);
408            }
409            catch (IOException e)
410            {
411                // Ignore
412            }
413        }
414        
415        newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", src.replaceAll("\\.", "/") + ";" + fileName);
416        newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local");
417        
418        return newAttrs;
419    }
420    
421    /**
422     * Store a file as rich text data.
423     * @param initialFileName the initial file name.
424     * @param is an input stream on the file.
425     * @param mimeType the file mime type.
426     * @param lastModified the last modification date.
427     * @return the final file name.
428     */
429    protected String _storeFile(String initialFileName, InputStream is, String mimeType, ZonedDateTime lastModified)
430    {
431        String fileName = initialFileName;
432        int count = 2;
433        
434        if (_richText instanceof RichText)
435        {
436            RichText richText = (RichText) _richText;
437            
438            while (richText.hasAttachment(fileName))
439            {
440                int i = initialFileName.lastIndexOf('.');
441                fileName = i == -1 ? initialFileName + '-' + (count++) : initialFileName.substring(0, i) + '-' + (count++) + initialFileName.substring(i);
442            }
443        
444            NamedResource resource = new NamedResource();
445    
446            String finalMimeType = mimeType != null ? mimeType : _cocoonContext.getMimeType(fileName.toLowerCase());
447            resource.setMimeType(finalMimeType != null ? finalMimeType : "application/unknown");
448            
449            resource.setFilename(fileName);
450            resource.setLastModificationDate(lastModified != null ? lastModified : ZonedDateTime.now(ZoneOffset.UTC));
451            
452            try
453            {
454                resource.setInputStream(is);
455            }
456            catch (IOException e)
457            {
458                throw new AmetysRepositoryException("Unable to save attachment " + initialFileName, e);
459            }
460        
461            richText.addAttachment(resource);
462        }
463        else
464        {
465            ModifiableRichText richText = (ModifiableRichText) _richText;
466            
467            while (richText.getAdditionalDataFolder().hasFile(fileName))
468            {
469                int i = initialFileName.lastIndexOf('.');
470                fileName = i == -1 ? initialFileName + '-' + (count++) : initialFileName.substring(0, i) + '-' + (count++) + initialFileName.substring(i);
471            }
472            
473            ModifiableFile file = richText.getAdditionalDataFolder().addFile(fileName);
474            ModifiableResource resource = file.getResource();
475            resource.setLastModified(Optional.ofNullable(lastModified).map(DateUtils::asDate).orElseGet(Date::new));
476            
477            String finalMimeType = mimeType != null ? mimeType : _cocoonContext.getMimeType(fileName.toLowerCase());
478            
479            resource.setMimeType(finalMimeType != null ? finalMimeType : "application/unknown");
480            resource.setInputStream(is);
481        }
482        
483        // store the file usage, so that it won't be deleted immediately
484        _usedLocalFiles.add(fileName);
485        
486        return fileName;
487    }
488    
489    /**
490     * Process a local file.
491     * @param attrs the img tag attributes.
492     * @return the new img tag attributes.
493     */
494    protected Attributes _processLocal(Attributes attrs)
495    {
496        // src is of the form contentId@attributePath;fileName
497        String ametys_src = attrs.getValue("data-ametys-src");
498        int i = ametys_src.indexOf('@');
499        int j = ametys_src.indexOf(';', i);
500        String id = ametys_src.substring(0, i);
501        String attributePath = ametys_src.substring(i + 1, j);
502        String filename = ametys_src.substring(j + 1);
503        
504        if (j == -1)
505        {
506            throw new IllegalArgumentException("A local image from inline editor should have an data-ametys-src attribute of the form <protocol>://<protocol-specific-part>;<filename> : " + ametys_src);
507        }
508        
509        AttributesImpl newAttrs = new AttributesImpl(attrs);
510        
511        _usedLocalFiles.add(filename);
512
513        if (!"marker".equals(attrs.getValue("marker")))
514        {
515            Content content = _ametysResolver.resolveById(id);
516            
517            // new mode
518            if (_richText instanceof RichText)
519            {
520                RichText richText = content.getValue(attributePath);
521                NamedResource file = richText.getAttachment(filename);
522                
523                try (InputStream is = file.getInputStream())
524                {
525                    _addDimensionAttributes(is, newAttrs);
526                }
527                catch (IOException e)
528                {
529                    // Ignore
530                }
531            }
532            // old mode
533            else
534            {
535                Folder folder = _getMeta(content.getMetadataHolder(), attributePath).getAdditionalDataFolder();
536                File file = folder.getFile(filename);
537                
538                try (InputStream is = file.getResource().getInputStream())
539                {
540                    _addDimensionAttributes(is, newAttrs);
541                }
542                catch (IOException e)
543                {
544                    // Ignore
545                }
546            }
547        }
548        
549        return newAttrs;
550    }
551
552    /**
553     * Process a resource.
554     * @param attrs the img tag attributes.
555     * @return the new img tag attributes.
556     */
557    protected Attributes _processResource(Attributes attrs)
558    {
559        String ametys_src = attrs.getValue("data-ametys-src");
560        
561        Resource resource = null;
562        try
563        {
564            resource = _ametysResolver.resolveById(ametys_src);
565        }
566        catch (UnknownAmetysObjectException ex)
567        {
568            getLogger().warn("Link to unexisting resource image " + ametys_src, ex);
569            return attrs;
570        }
571        
572        AttributesImpl newAttrs = new AttributesImpl(attrs);
573        if (!"marker".equals(attrs.getValue("marker")))
574        {
575            try (InputStream is = resource.getInputStream())
576            {
577                _addDimensionAttributes(is, newAttrs);
578            }
579            catch (IOException e)
580            {
581                // Ignore
582            }
583        }
584        
585        return newAttrs;
586    }
587
588    /**
589     * Add an image's width and height to the XML attributes.
590     * @param inputStream an input stream on the image.
591     * @param attrs the attributes to fill.
592     * @throws IOException if an error occurs during reading dimension
593     */
594    protected void _addDimensionAttributes(InputStream inputStream, AttributesImpl attrs) throws IOException
595    {
596        // We need to call Thumbnail to get image dimension with EXIF orientation tag
597        BufferedImage img = ImageHelper.read(inputStream);
598        if (img != null && attrs.getValue("width") == null)
599        {
600            attrs.addCDATAAttribute("width", Integer.toString(img.getWidth()));
601        }
602        if (img != null && attrs.getValue("height") == null)
603        {
604            attrs.addCDATAAttribute("height", Integer.toString(img.getHeight()));
605        }
606    }
607        
608    @Override
609    public void endElement(String uri, String loc, String raw) throws SAXException
610    {
611        if ("img".equals(raw) && _tagToIgnore)
612        {
613            // ignore img tag
614            _tagToIgnore = false;
615            return;
616        }
617        
618        super.endElement(uri, loc, raw);
619    }
620    
621    @Override
622    public void endDocument() throws SAXException
623    {
624        if (_richText instanceof RichText)
625        {
626            RichText richText = (RichText) _richText;
627            
628            // Look for unused files
629            Set<String> unusedLocalFiles = richText.getAttachmentNames()
630                                                    .stream()
631                                                    .filter(fileName -> !_usedLocalFiles.contains(fileName))
632                                                    .collect(Collectors.toSet());
633            // Remove unused files        
634            unusedLocalFiles.stream()
635                            .forEach(richText::removeAttachment);
636        }
637        else
638        {
639            ModifiableRichText richText = (ModifiableRichText) _richText;
640
641            // removing unused files
642            ModifiableFolder folder = richText.getAdditionalDataFolder();
643            for (File file : folder.getFiles())
644            {
645                String fileName = file.getName();
646                
647                if (!_usedLocalFiles.contains(fileName))
648                {
649                    folder.remove(fileName);
650                }
651            }
652        }
653        
654        super.endDocument();
655    }
656    
657    /** 
658     * Get the rich text meta
659     * @param meta The composite meta
660     * @param metadataName The metadata name (with /)
661     * @return The rich text meta
662     * @deprecated still there for legacy purposes
663     */
664    protected org.ametys.plugins.repository.metadata.RichText _getMeta(CompositeMetadata meta, String metadataName)
665    {
666        int pos = metadataName.indexOf("/");
667        if (pos == -1)
668        {
669            return meta.getRichText(metadataName);
670        }
671        else
672        {
673            return _getMeta(meta.getCompositeMetadata(metadataName.substring(0, pos)), metadataName.substring(pos + 1));
674        }
675    }
676}