001/*
002 *  Copyright 2010 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.transformation.htmledition;
017
018import java.awt.image.BufferedImage;
019import java.io.ByteArrayInputStream;
020import java.io.IOException;
021import java.io.InputStream;
022import java.net.HttpURLConnection;
023import java.net.MalformedURLException;
024import java.net.URI;
025import java.net.URISyntaxException;
026import java.net.URL;
027import java.time.ZoneOffset;
028import java.time.ZonedDateTime;
029import java.util.Date;
030import java.util.HashSet;
031import java.util.Map;
032import java.util.Optional;
033import java.util.Set;
034import java.util.regex.Matcher;
035import java.util.regex.Pattern;
036import java.util.stream.Collectors;
037
038import org.apache.avalon.framework.context.ContextException;
039import org.apache.avalon.framework.service.ServiceException;
040import org.apache.avalon.framework.service.ServiceManager;
041import org.apache.cocoon.Constants;
042import org.apache.cocoon.components.ContextHelper;
043import org.apache.cocoon.environment.Context;
044import org.apache.cocoon.environment.ObjectModelHelper;
045import org.apache.cocoon.environment.Request;
046import org.apache.cocoon.xml.AttributesImpl;
047import org.apache.commons.io.IOUtils;
048import org.apache.commons.io.output.ByteArrayOutputStream;
049import org.apache.excalibur.source.Source;
050import org.apache.excalibur.source.SourceResolver;
051import org.xml.sax.Attributes;
052import org.xml.sax.SAXException;
053
054import org.ametys.cms.data.NamedResource;
055import org.ametys.cms.data.RichText;
056import org.ametys.core.upload.Upload;
057import org.ametys.core.upload.UploadManager;
058import org.ametys.core.user.CurrentUserProvider;
059import org.ametys.core.util.DateUtils;
060import org.ametys.core.util.ImageHelper;
061import org.ametys.plugins.explorer.resources.Resource;
062import org.ametys.plugins.repository.AmetysObjectResolver;
063import org.ametys.plugins.repository.AmetysRepositoryException;
064import org.ametys.plugins.repository.UnknownAmetysObjectException;
065import org.ametys.plugins.repository.metadata.File;
066import org.ametys.plugins.repository.metadata.ModifiableFile;
067import org.ametys.plugins.repository.metadata.ModifiableFolder;
068import org.ametys.plugins.repository.metadata.ModifiableResource;
069import org.ametys.plugins.repository.metadata.ModifiableRichText;
070
071/**
072 * This transformer extracts uploaded files' ids from the incoming HTML for further processing.
073 */
074public class UploadedDataHTMLEditionHandler extends AbstractHTMLEditionHandler
075{
076    private static final Pattern __INLINE_IMAGE_MARKER = Pattern.compile("^data:image/(png|jpeg|gif);base64,.*");
077    
078    private UploadManager _uploadManager;
079    private CurrentUserProvider _userProvider;
080    private SourceResolver _resolver;
081    private AmetysObjectResolver _ametysResolver;
082    private Context _cocoonContext;
083    
084    private boolean _tagToIgnore;
085    private Set<String> _usedLocalFiles = new HashSet<>();
086    private Object _richText;
087    private Map _objectModel;
088
089
090    @Override
091    public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException
092    {
093        super.contextualize(context);
094        _cocoonContext = (Context) _context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT);
095    }
096    
097    @Override
098    public void service(ServiceManager sManager) throws ServiceException
099    {
100        super.service(sManager);
101        _uploadManager = (UploadManager) sManager.lookup(UploadManager.ROLE);
102        _userProvider = (CurrentUserProvider) sManager.lookup(CurrentUserProvider.ROLE);
103        _resolver = (SourceResolver) sManager.lookup(SourceResolver.ROLE);
104        _ametysResolver = (AmetysObjectResolver) sManager.lookup(AmetysObjectResolver.ROLE);
105    }
106    
107    @Override
108    public void startDocument() throws SAXException
109    {
110        _tagToIgnore = false;
111        _objectModel = ContextHelper.getObjectModel(_context);
112        Map parentContextParameters = (Map) _objectModel.get(ObjectModelHelper.PARENT_CONTEXT);
113        _richText = parentContextParameters.get("richText");
114        
115        super.startDocument();
116    }
117    
118    @Override
119    public void startElement(String uri, String loc, String raw, Attributes attrs) throws SAXException
120    {
121        if ("img".equals(raw))
122        {
123            String type = attrs.getValue("data-ametys-type");
124            
125            if ("temp".equals(type))
126            {
127                Attributes newAttrs = _getAttributesForTemp(attrs);
128                super.startElement(uri, loc, raw, newAttrs);
129                return;
130            }
131            else if ("explorer".equals(type))
132            {
133                Attributes newAttrs = _processResource(attrs);
134                super.startElement(uri, loc, raw, newAttrs);
135                return;
136            }
137            else if ("local".equals(type))
138            {
139                Attributes newAttrs = _processLocal(attrs);
140                super.startElement(uri, loc, raw, newAttrs);
141                return;
142            }
143            else if (type == null && !"marker".equals(attrs.getValue("marker")))
144            {
145                // image is copied from elsewhere, fetch it in the content
146                String src = attrs.getValue("src");
147                if (src == null)
148                {
149                    _tagToIgnore = true;
150                    getLogger().warn("Don't know how to fetch image with no src attribute. Image is ignored.");
151                    return;
152                }
153                
154                // The final filename
155                String fileName = null;
156                // The new attributes, will be filled with image width and height.
157                AttributesImpl newAttrs = new AttributesImpl();
158
159                Matcher m = __INLINE_IMAGE_MARKER.matcher(src);
160                if (m.matches())
161                {
162                    String mimetype = m.group(1);
163                    String imageAsBase64 = src.substring(19 + mimetype.length());
164                    byte[] imageAsBytes = org.apache.commons.codec.binary.Base64.decodeBase64(imageAsBase64);
165                    fileName = _storeFile("paste." + mimetype, new ByteArrayInputStream(imageAsBytes), null, null);
166                    
167                    try (InputStream is = new ByteArrayInputStream(imageAsBytes))
168                    {
169                        _addDimensionAttributes(is, newAttrs);
170                    }
171                    catch (IOException e)
172                    {
173                        // Ignore
174                    }
175                }
176                else
177                {
178                    
179                    String initialFileName = _getInitialFileName(src);
180                    
181                    if (src.startsWith("/"))
182                    {
183                        try
184                        {
185                            fileName = _handleInternalFile(src, newAttrs, initialFileName);
186                        }
187                        catch (Exception e)
188                        {
189                            // unable to fetch image, do not keep the img tag
190                            _tagToIgnore = true;
191                            getLogger().warn("Unable to fetch internal image from URL '" + src + "'. Image is ignored.", e);
192                            return;
193                        }
194                    }
195                    else if (src.startsWith("http://") || src.startsWith("https://"))
196                    {
197                        try
198                        {
199                            fileName = _handleRemoteFile(src, newAttrs, initialFileName);
200                        }
201                        catch (Exception e)
202                        {
203                            // unable to fetch image, do not keep the img tag
204                            _tagToIgnore = true;
205                            getLogger().warn("Unable to fetch external image from URL '" + src + "'. Image is ignored.", e);
206                            return;
207                        }
208                    }
209                    else
210                    {
211                        _tagToIgnore = true;
212                        getLogger().warn("Don't know how to fetch image at '" + src + "'. Image is ignored.");
213                        return;
214                    }
215                }
216                    
217                _copyAttributes(attrs, newAttrs);
218                
219                newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", fileName);
220                newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local");
221                
222                super.startElement(uri, loc, raw, newAttrs);
223                return;
224            }
225        }
226        
227        super.startElement(uri, loc, raw, attrs);
228    }
229    
230    private String _getInitialFileName(String src)
231    {
232        int j = src.lastIndexOf('/');
233        int k = src.indexOf('?', j);
234        String initialFileName;
235        
236        if (k == -1)
237        {
238            initialFileName = src.substring(j + 1);
239        }
240        else
241        {
242            initialFileName = src.substring(j + 1, k);
243        }
244        
245        // FIXME CMS-3090 A uploaded image can not contain '_max' or '_crop', replace it by '_Max', '_Crop'
246        return initialFileName.replaceAll("_max", "_Max").replaceAll("_crop", "_Crop");
247    }
248
249    private String _handleInternalFile(String src, AttributesImpl newAttrs, String initialFileName) throws MalformedURLException, IOException, URISyntaxException
250    {
251        // it may be an internal URL
252        Request request = ContextHelper.getRequest(_context);
253        String contextPath = request.getContextPath();
254        Source source = null;
255        
256        try
257        {
258            String modifiedSrc = src;
259            
260            if (src.startsWith(contextPath))
261            {
262                // it is an Ametys URL
263                // first decode it
264                modifiedSrc = new URI(modifiedSrc).getPath();
265                
266                modifiedSrc = "cocoon:/" + src.substring(contextPath.length());
267            }
268            else
269            {
270                StringBuilder sb = _getRequestURI(request);
271                
272                modifiedSrc = sb.toString() + modifiedSrc;
273            }
274            
275            source = _resolver.resolveURI(src);
276            
277            try (ByteArrayOutputStream bos = new ByteArrayOutputStream())
278            {
279                try (InputStream is = source.getInputStream())
280                {
281                    IOUtils.copy(is, bos);
282                }
283                
284                String fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null);
285                
286                try (InputStream is = new ByteArrayInputStream(bos.toByteArray()))
287                {
288                    _addDimensionAttributes(is, newAttrs);
289                }
290                
291                return fileName;
292            }
293        }
294        finally
295        {
296            if (source != null)
297            {
298                _resolver.release(source);
299            }
300        }
301
302    }
303    
304    private String _handleRemoteFile(String src, AttributesImpl newAttrs, String initialFileName) throws MalformedURLException, IOException
305    {
306        String fileName;
307        URL url = new URL(src);
308        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
309        connection.setConnectTimeout(1000);
310        connection.setReadTimeout(2000);
311        
312        try (ByteArrayOutputStream bos = new ByteArrayOutputStream())
313        {
314            try (InputStream is = connection.getInputStream())
315            {
316                IOUtils.copy(is, bos);
317            }
318            
319            fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null);
320            
321            try (InputStream is = new ByteArrayInputStream(bos.toByteArray()))
322            {
323                _addDimensionAttributes(is, newAttrs);
324            }
325        }
326        return fileName;
327    }
328    
329    /**
330     * Copy the attributes.
331     * @param attrs the attributes to copy.
332     * @param newAttrs the attributes to copy to.
333     */
334    private void _copyAttributes(Attributes attrs, AttributesImpl newAttrs)
335    {
336        for (int i = 0; i < attrs.getLength(); i++)
337        {
338            String name = attrs.getQName(i);
339            
340            if (!"data-ametys-src".equals(name) && !"data-ametys-type".equals(name))
341            {
342                newAttrs.addAttribute(attrs.getURI(i), attrs.getLocalName(i), name, attrs.getType(i), attrs.getValue(i));
343            }
344        }
345    }
346
347    /**
348     * Get the cms uri
349     * @param request The request
350     * @return the uri without context path
351     */
352    private StringBuilder _getRequestURI(Request request)
353    {
354        StringBuilder sb = new StringBuilder();
355        sb.append(request.getScheme());
356        sb.append("://");
357        sb.append(request.getServerName());
358        
359        if (request.isSecure())
360        {
361            if (request.getServerPort() != 443)
362            {
363                sb.append(":");
364                sb.append(request.getServerPort());
365            }
366        }
367        else
368        {
369            if (request.getServerPort() != 80)
370            {
371                sb.append(":");
372                sb.append(request.getServerPort());
373            }
374        }
375        return sb;
376    }
377
378    private Attributes _getAttributesForTemp(Attributes attrs)
379    {
380        // data has just been uploaded, must change the value, and store the id for further processing
381        String id = attrs.getValue("data-ametys-temp-src");
382        
383        Upload upload = _uploadManager.getUpload(_userProvider.getUser(), id);
384        
385        String initialFileName = upload.getFilename();
386        // FIXME CMS-3090 A uploaded image can not contain '_max', replace it by '_Max'
387        initialFileName = initialFileName.replaceAll("_max", "_Max").replaceAll("_crop", "_Crop");
388        String fileName = _storeFile(initialFileName, upload.getInputStream(), upload.getMimeType(), upload.getUploadedDate());
389        
390        AttributesImpl newAttrs = new AttributesImpl();
391        
392        _copyAttributes(attrs, newAttrs);
393        
394        if (!"marker".equals(attrs.getValue("marker")))
395        {
396            try (InputStream is = upload.getInputStream())
397            {
398                _addDimensionAttributes(is, newAttrs);
399            }
400            catch (IOException e)
401            {
402                // Ignore
403            }
404        }
405        
406        newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", fileName);
407        newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local");
408        
409        return newAttrs;
410    }
411    
412    /**
413     * Store a file as rich text data.
414     * @param initialFileName the initial file name.
415     * @param is an input stream on the file.
416     * @param mimeType the file mime type.
417     * @param lastModified the last modification date.
418     * @return the final file name.
419     */
420    protected String _storeFile(String initialFileName, InputStream is, String mimeType, ZonedDateTime lastModified)
421    {
422        String fileName = initialFileName;
423        int count = 2;
424        
425        if (_richText instanceof RichText)
426        {
427            RichText richText = (RichText) _richText;
428            
429            while (richText.hasAttachment(fileName))
430            {
431                int i = initialFileName.lastIndexOf('.');
432                fileName = i == -1 ? initialFileName + '-' + (count++) : initialFileName.substring(0, i) + '-' + (count++) + initialFileName.substring(i);
433            }
434        
435            NamedResource resource = new NamedResource();
436    
437            String finalMimeType = mimeType != null ? mimeType : _cocoonContext.getMimeType(fileName.toLowerCase());
438            resource.setMimeType(finalMimeType != null ? finalMimeType : "application/unknown");
439            
440            resource.setFilename(fileName);
441            resource.setLastModificationDate(lastModified != null ? lastModified : ZonedDateTime.now(ZoneOffset.UTC));
442            
443            try
444            {
445                resource.setInputStream(is);
446            }
447            catch (IOException e)
448            {
449                throw new AmetysRepositoryException("Unable to save attachment " + initialFileName, e);
450            }
451        
452            richText.addAttachment(resource);
453        }
454        else
455        {
456            ModifiableRichText richText = (ModifiableRichText) _richText;
457            
458            while (richText.getAdditionalDataFolder().hasFile(fileName))
459            {
460                int i = initialFileName.lastIndexOf('.');
461                fileName = i == -1 ? initialFileName + '-' + (count++) : initialFileName.substring(0, i) + '-' + (count++) + initialFileName.substring(i);
462            }
463            
464            ModifiableFile file = richText.getAdditionalDataFolder().addFile(fileName);
465            ModifiableResource resource = file.getResource();
466            resource.setLastModified(Optional.ofNullable(lastModified).map(DateUtils::asDate).orElseGet(Date::new));
467            
468            String finalMimeType = mimeType != null ? mimeType : _cocoonContext.getMimeType(fileName.toLowerCase());
469            
470            resource.setMimeType(finalMimeType != null ? finalMimeType : "application/unknown");
471            resource.setInputStream(is);
472        }
473        
474        // store the file usage, so that it won't be deleted immediately
475        _usedLocalFiles.add(fileName);
476        
477        return fileName;
478    }
479    
480    /**
481     * Process a local file.
482     * @param attrs the img tag attributes.
483     * @return the new img tag attributes.
484     */
485    protected Attributes _processLocal(Attributes attrs)
486    {
487        // src contains the fileName
488        String filename = attrs.getValue("data-ametys-src");
489        _usedLocalFiles.add(filename);
490        
491        AttributesImpl newAttrs = new AttributesImpl(attrs);
492        if (!"marker".equals(attrs.getValue("marker")))
493        {
494            if (_richText instanceof RichText)
495            {
496                NamedResource file = ((RichText) _richText).getAttachment(filename);
497                try (InputStream is = file.getInputStream())
498                {
499                    _addDimensionAttributes(is, newAttrs);
500                }
501                catch (IOException e)
502                {
503                    // Ignore
504                }
505            }
506            else
507            {
508                File file = ((ModifiableRichText) _richText).getAdditionalDataFolder().getFile(filename);
509                try (InputStream is = file.getResource().getInputStream())
510                {
511                    _addDimensionAttributes(is, newAttrs);
512                }
513                catch (IOException e)
514                {
515                    // Ignore
516                }
517            }
518        }
519        
520        return newAttrs;
521    }
522
523    /**
524     * Process a resource.
525     * @param attrs the img tag attributes.
526     * @return the new img tag attributes.
527     */
528    protected Attributes _processResource(Attributes attrs)
529    {
530        String ametys_src = attrs.getValue("data-ametys-src");
531        
532        Resource resource = null;
533        try
534        {
535            resource = _ametysResolver.resolveById(ametys_src);
536        }
537        catch (UnknownAmetysObjectException ex)
538        {
539            getLogger().warn("Link to unexisting resource image " + ametys_src, ex);
540            return attrs;
541        }
542        
543        AttributesImpl newAttrs = new AttributesImpl(attrs);
544        if (!"marker".equals(attrs.getValue("marker")))
545        {
546            try (InputStream is = resource.getInputStream())
547            {
548                _addDimensionAttributes(is, newAttrs);
549            }
550            catch (IOException e)
551            {
552                // Ignore
553            }
554        }
555        
556        return newAttrs;
557    }
558
559    /**
560     * Add an image's width and height to the XML attributes.
561     * @param inputStream an input stream on the image.
562     * @param attrs the attributes to fill.
563     * @throws IOException if an error occurs during reading dimension
564     */
565    protected void _addDimensionAttributes(InputStream inputStream, AttributesImpl attrs) throws IOException
566    {
567        // We need to call Thumbnail to get image dimension with EXIF orientation tag
568        BufferedImage img = ImageHelper.read(inputStream);
569        if (img != null && attrs.getValue("width") == null)
570        {
571            attrs.addCDATAAttribute("width", Integer.toString(img.getWidth()));
572        }
573        if (img != null && attrs.getValue("height") == null)
574        {
575            attrs.addCDATAAttribute("height", Integer.toString(img.getHeight()));
576        }
577    }
578        
579    @Override
580    public void endElement(String uri, String loc, String raw) throws SAXException
581    {
582        if ("img".equals(raw) && _tagToIgnore)
583        {
584            // ignore img tag
585            _tagToIgnore = false;
586            return;
587        }
588        
589        super.endElement(uri, loc, raw);
590    }
591    
592    @Override
593    public void endDocument() throws SAXException
594    {
595        if (_richText instanceof RichText)
596        {
597            RichText richText = (RichText) _richText;
598            
599            // Look for unused files
600            Set<String> unusedLocalFiles = richText.getAttachmentNames()
601                                                    .stream()
602                                                    .filter(fileName -> !_usedLocalFiles.contains(fileName))
603                                                    .collect(Collectors.toSet());
604            // Remove unused files        
605            unusedLocalFiles.stream()
606                            .forEach(richText::removeAttachment);
607        }
608        else
609        {
610            ModifiableRichText richText = (ModifiableRichText) _richText;
611
612            // removing unused files
613            ModifiableFolder folder = richText.getAdditionalDataFolder();
614            for (File file : folder.getFiles())
615            {
616                String fileName = file.getName();
617                
618                if (!_usedLocalFiles.contains(fileName))
619                {
620                    folder.remove(fileName);
621                }
622            }
623        }
624        
625        super.endDocument();
626    }
627}