001/*
002 *  Copyright 2010 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.transformation.htmledition;
017
018import java.awt.image.BufferedImage;
019import java.io.ByteArrayInputStream;
020import java.io.IOException;
021import java.io.InputStream;
022import java.net.HttpURLConnection;
023import java.net.MalformedURLException;
024import java.net.URI;
025import java.net.URISyntaxException;
026import java.net.URL;
027import java.time.ZoneOffset;
028import java.time.ZonedDateTime;
029import java.util.Date;
030import java.util.HashSet;
031import java.util.Map;
032import java.util.Optional;
033import java.util.Set;
034import java.util.regex.Matcher;
035import java.util.regex.Pattern;
036import java.util.stream.Collectors;
037
038import org.apache.avalon.framework.context.ContextException;
039import org.apache.avalon.framework.service.ServiceException;
040import org.apache.avalon.framework.service.ServiceManager;
041import org.apache.cocoon.Constants;
042import org.apache.cocoon.components.ContextHelper;
043import org.apache.cocoon.environment.Context;
044import org.apache.cocoon.environment.ObjectModelHelper;
045import org.apache.cocoon.environment.Request;
046import org.apache.cocoon.xml.AttributesImpl;
047import org.apache.commons.io.IOUtils;
048import org.apache.commons.io.output.ByteArrayOutputStream;
049import org.apache.excalibur.source.Source;
050import org.apache.excalibur.source.SourceResolver;
051import org.xml.sax.Attributes;
052import org.xml.sax.SAXException;
053
054import org.ametys.cms.data.NamedResource;
055import org.ametys.cms.data.RichText;
056import org.ametys.core.upload.Upload;
057import org.ametys.core.upload.UploadManager;
058import org.ametys.core.user.CurrentUserProvider;
059import org.ametys.core.util.DateUtils;
060import org.ametys.core.util.ImageHelper;
061import org.ametys.plugins.explorer.resources.Resource;
062import org.ametys.plugins.repository.AmetysObjectResolver;
063import org.ametys.plugins.repository.AmetysRepositoryException;
064import org.ametys.plugins.repository.UnknownAmetysObjectException;
065import org.ametys.plugins.repository.metadata.File;
066import org.ametys.plugins.repository.metadata.ModifiableFile;
067import org.ametys.plugins.repository.metadata.ModifiableFolder;
068import org.ametys.plugins.repository.metadata.ModifiableResource;
069import org.ametys.plugins.repository.metadata.ModifiableRichText;
070
071/**
072 * This transformer extracts uploaded files' ids from the incoming HTML for further processing.
073 */
074public class UploadedDataHTMLEditionHandler extends AbstractHTMLEditionHandler
075{
076    private static final Pattern __INLINE_IMAGE_MARKER = Pattern.compile("^data:image/(png|jpeg|gif);base64,.*");
077    
078    private UploadManager _uploadManager;
079    private CurrentUserProvider _userProvider;
080    private SourceResolver _resolver;
081    private AmetysObjectResolver _ametysResolver;
082    private Context _cocoonContext;
083    
084    private boolean _tagToIgnore;
085    private Set<String> _usedLocalFiles = new HashSet<>();
086    private Object _richText; // FIXME only handle org.ametys.cms.data.RichText when old API will finally be removed
087    private Map _objectModel;
088
089
090    @Override
091    public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException
092    {
093        super.contextualize(context);
094        _cocoonContext = (Context) _context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT);
095    }
096    
097    @Override
098    public void service(ServiceManager sManager) throws ServiceException
099    {
100        super.service(sManager);
101        _uploadManager = (UploadManager) sManager.lookup(UploadManager.ROLE);
102        _userProvider = (CurrentUserProvider) sManager.lookup(CurrentUserProvider.ROLE);
103        _resolver = (SourceResolver) sManager.lookup(SourceResolver.ROLE);
104        _ametysResolver = (AmetysObjectResolver) sManager.lookup(AmetysObjectResolver.ROLE);
105    }
106    
107    @Override
108    public void startDocument() throws SAXException
109    {
110        _tagToIgnore = false;
111        _objectModel = ContextHelper.getObjectModel(_context);
112        Map parentContextParameters = (Map) _objectModel.get(ObjectModelHelper.PARENT_CONTEXT);
113        _richText = parentContextParameters.get("richText");
114        
115        super.startDocument();
116    }
117    
118    @Override
119    public void startElement(String uri, String loc, String raw, Attributes attrs) throws SAXException
120    {
121        if ("img".equals(raw))
122        {
123            String type = attrs.getValue("data-ametys-type");
124            
125            if ("temp".equals(type))
126            {
127                Attributes newAttrs = _getAttributesForTemp(attrs);
128                super.startElement(uri, loc, raw, newAttrs);
129                return;
130            }
131            else if ("explorer".equals(type))
132            {
133                Attributes newAttrs = _processResource(attrs);
134                super.startElement(uri, loc, raw, newAttrs);
135                return;
136            }
137            else if ("local".equals(type))
138            {
139                Attributes newAttrs = _processLocal(attrs);
140                super.startElement(uri, loc, raw, newAttrs);
141                return;
142            }
143            else if (type == null && !"marker".equals(attrs.getValue("marker")))
144            {
145                // image is copied from elsewhere, fetch it in the content
146                String src = attrs.getValue("src");
147                if (src == null)
148                {
149                    _tagToIgnore = true;
150                    getLogger().warn("Don't know how to fetch image with no src attribute. Image is ignored.");
151                    return;
152                }
153                
154                // The final filename
155                String fileName = null;
156                // The new attributes, will be filled with image width and height.
157                AttributesImpl newAttrs = new AttributesImpl();
158
159                Matcher m = __INLINE_IMAGE_MARKER.matcher(src);
160                if (m.matches())
161                {
162                    String mimetype = m.group(1);
163                    String imageAsBase64 = src.substring(19 + mimetype.length());
164                    byte[] imageAsBytes = org.apache.commons.codec.binary.Base64.decodeBase64(imageAsBase64);
165                    fileName = _storeFile("paste." + mimetype, new ByteArrayInputStream(imageAsBytes), null, null);
166                    
167                    try (InputStream is = new ByteArrayInputStream(imageAsBytes))
168                    {
169                        _addDimensionAttributes(is, newAttrs);
170                    }
171                    catch (IOException e)
172                    {
173                        // Ignore
174                    }
175                }
176                else
177                {
178                    
179                    String initialFileName = _getInitialFileName(src);
180                    
181                    if (src.startsWith("/"))
182                    {
183                        try
184                        {
185                            fileName = _handleInternalFile(src, newAttrs, initialFileName);
186                        }
187                        catch (Exception e)
188                        {
189                            // unable to fetch image, do not keep the img tag
190                            _tagToIgnore = true;
191                            getLogger().warn("Unable to fetch internal image from URL '" + src + "'. Image is ignored.", e);
192                            return;
193                        }
194                    }
195                    else if (src.startsWith("http://") || src.startsWith("https://"))
196                    {
197                        try
198                        {
199                            fileName = _handleRemoteFile(src, newAttrs, initialFileName);
200                        }
201                        catch (Exception e)
202                        {
203                            // unable to fetch image, do not keep the img tag
204                            _tagToIgnore = true;
205                            getLogger().warn("Unable to fetch external image from URL '" + src + "'. Image is ignored.", e);
206                            return;
207                        }
208                    }
209                    else
210                    {
211                        _tagToIgnore = true;
212                        getLogger().warn("Don't know how to fetch image at '" + src + "'. Image is ignored.");
213                        return;
214                    }
215                }
216                    
217                _copyAttributes(attrs, newAttrs);
218                
219                newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", fileName);
220                newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local");
221                
222                super.startElement(uri, loc, raw, newAttrs);
223                return;
224            }
225        }
226        
227        super.startElement(uri, loc, raw, attrs);
228    }
229    
230    private String _getInitialFileName(String src)
231    {
232        int j = src.lastIndexOf('/');
233        int k = src.indexOf('?', j);
234        String initialFileName;
235        
236        if (k == -1)
237        {
238            initialFileName = src.substring(j + 1);
239        }
240        else
241        {
242            initialFileName = src.substring(j + 1, k);
243        }
244        
245        // FIXME CMS-3090 A uploaded image can not contain '_max' or '_crop', replace it by '_Max', '_Crop'
246        return initialFileName.replaceAll("_max", "_Max").replaceAll("_crop", "_Crop");
247    }
248
249    private String _handleInternalFile(String src, AttributesImpl newAttrs, String initialFileName) throws MalformedURLException, IOException, URISyntaxException
250    {
251        // it may be an internal URL
252        Request request = ContextHelper.getRequest(_context);
253        String contextPath = request.getContextPath();
254        Source source = null;
255        
256        try
257        {
258            String modifiedSrc = src;
259            
260            if (src.startsWith(contextPath))
261            {
262                // it is an Ametys URL
263                // first decode it
264                modifiedSrc = new URI(modifiedSrc).getPath();
265                
266                modifiedSrc = "cocoon:/" + src.substring(contextPath.length());
267            }
268            else
269            {
270                StringBuilder sb = _getRequestURI(request);
271                
272                modifiedSrc = sb.toString() + modifiedSrc;
273            }
274            
275            source = _resolver.resolveURI(src);
276            
277            try (ByteArrayOutputStream bos = new ByteArrayOutputStream())
278            {
279                try (InputStream is = source.getInputStream())
280                {
281                    IOUtils.copy(is, bos);
282                }
283                
284                String fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null);
285                
286                try (InputStream is = new ByteArrayInputStream(bos.toByteArray()))
287                {
288                    _addDimensionAttributes(is, newAttrs);
289                }
290                
291                return fileName;
292            }
293        }
294        finally
295        {
296            if (source != null)
297            {
298                _resolver.release(source);
299            }
300        }
301
302    }
303    
304    private String _handleRemoteFile(String src, AttributesImpl newAttrs, String initialFileName) throws MalformedURLException, IOException
305    {
306        String fileName;
307        URL url = new URL(src);
308        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
309        connection.setRequestProperty("User-Agent", "Mozilla"); // some servers only answer to browsers ...
310        connection.setConnectTimeout(1000);
311        connection.setReadTimeout(2000);
312        
313        try (ByteArrayOutputStream bos = new ByteArrayOutputStream())
314        {
315            try (InputStream is = connection.getInputStream())
316            {
317                IOUtils.copy(is, bos);
318            }
319            
320            fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null);
321            
322            try (InputStream is = new ByteArrayInputStream(bos.toByteArray()))
323            {
324                _addDimensionAttributes(is, newAttrs);
325            }
326        }
327        return fileName;
328    }
329    
330    /**
331     * Copy the attributes.
332     * @param attrs the attributes to copy.
333     * @param newAttrs the attributes to copy to.
334     */
335    private void _copyAttributes(Attributes attrs, AttributesImpl newAttrs)
336    {
337        for (int i = 0; i < attrs.getLength(); i++)
338        {
339            String name = attrs.getQName(i);
340            
341            if (!"data-ametys-src".equals(name) && !"data-ametys-type".equals(name))
342            {
343                newAttrs.addAttribute(attrs.getURI(i), attrs.getLocalName(i), name, attrs.getType(i), attrs.getValue(i));
344            }
345        }
346    }
347
348    /**
349     * Get the cms uri
350     * @param request The request
351     * @return the uri without context path
352     */
353    private StringBuilder _getRequestURI(Request request)
354    {
355        StringBuilder sb = new StringBuilder();
356        sb.append(request.getScheme());
357        sb.append("://");
358        sb.append(request.getServerName());
359        
360        if (request.isSecure())
361        {
362            if (request.getServerPort() != 443)
363            {
364                sb.append(":");
365                sb.append(request.getServerPort());
366            }
367        }
368        else
369        {
370            if (request.getServerPort() != 80)
371            {
372                sb.append(":");
373                sb.append(request.getServerPort());
374            }
375        }
376        return sb;
377    }
378
379    private Attributes _getAttributesForTemp(Attributes attrs)
380    {
381        // data has just been uploaded, must change the value, and store the id for further processing
382        String id = attrs.getValue("data-ametys-temp-src");
383        
384        Upload upload = _uploadManager.getUpload(_userProvider.getUser(), id);
385        
386        String initialFileName = upload.getFilename();
387        // FIXME CMS-3090 A uploaded image can not contain '_max', replace it by '_Max'
388        initialFileName = initialFileName.replaceAll("_max", "_Max").replaceAll("_crop", "_Crop");
389        String fileName = _storeFile(initialFileName, upload.getInputStream(), upload.getMimeType(), upload.getUploadedDate());
390        
391        AttributesImpl newAttrs = new AttributesImpl();
392        
393        _copyAttributes(attrs, newAttrs);
394        
395        if (!"marker".equals(attrs.getValue("marker")))
396        {
397            try (InputStream is = upload.getInputStream())
398            {
399                _addDimensionAttributes(is, newAttrs);
400            }
401            catch (IOException e)
402            {
403                // Ignore
404            }
405        }
406        
407        newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", fileName);
408        newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local");
409        
410        return newAttrs;
411    }
412    
413    /**
414     * Store a file as rich text data.
415     * @param initialFileName the initial file name.
416     * @param is an input stream on the file.
417     * @param mimeType the file mime type.
418     * @param lastModified the last modification date.
419     * @return the final file name.
420     */
421    protected String _storeFile(String initialFileName, InputStream is, String mimeType, ZonedDateTime lastModified)
422    {
423        String fileName = initialFileName;
424        int count = 2;
425        
426        if (_richText instanceof RichText)
427        {
428            RichText richText = (RichText) _richText;
429            
430            while (richText.hasAttachment(fileName))
431            {
432                int i = initialFileName.lastIndexOf('.');
433                fileName = i == -1 ? initialFileName + '-' + (count++) : initialFileName.substring(0, i) + '-' + (count++) + initialFileName.substring(i);
434            }
435        
436            NamedResource resource = new NamedResource();
437    
438            String finalMimeType = mimeType != null ? mimeType : _cocoonContext.getMimeType(fileName.toLowerCase());
439            resource.setMimeType(finalMimeType != null ? finalMimeType : "application/unknown");
440            
441            resource.setFilename(fileName);
442            resource.setLastModificationDate(lastModified != null ? lastModified : ZonedDateTime.now(ZoneOffset.UTC));
443            
444            try
445            {
446                resource.setInputStream(is);
447            }
448            catch (IOException e)
449            {
450                throw new AmetysRepositoryException("Unable to save attachment " + initialFileName, e);
451            }
452        
453            richText.addAttachment(resource);
454        }
455        else
456        {
457            ModifiableRichText richText = (ModifiableRichText) _richText;
458            
459            while (richText.getAdditionalDataFolder().hasFile(fileName))
460            {
461                int i = initialFileName.lastIndexOf('.');
462                fileName = i == -1 ? initialFileName + '-' + (count++) : initialFileName.substring(0, i) + '-' + (count++) + initialFileName.substring(i);
463            }
464            
465            ModifiableFile file = richText.getAdditionalDataFolder().addFile(fileName);
466            ModifiableResource resource = file.getResource();
467            resource.setLastModified(Optional.ofNullable(lastModified).map(DateUtils::asDate).orElseGet(Date::new));
468            
469            String finalMimeType = mimeType != null ? mimeType : _cocoonContext.getMimeType(fileName.toLowerCase());
470            
471            resource.setMimeType(finalMimeType != null ? finalMimeType : "application/unknown");
472            resource.setInputStream(is);
473        }
474        
475        // store the file usage, so that it won't be deleted immediately
476        _usedLocalFiles.add(fileName);
477        
478        return fileName;
479    }
480    
481    /**
482     * Process a local file.
483     * @param attrs the img tag attributes.
484     * @return the new img tag attributes.
485     */
486    protected Attributes _processLocal(Attributes attrs)
487    {
488        // src contains the fileName
489        String filename = attrs.getValue("data-ametys-src");
490        _usedLocalFiles.add(filename);
491        
492        AttributesImpl newAttrs = new AttributesImpl(attrs);
493        if (!"marker".equals(attrs.getValue("marker")))
494        {
495            if (_richText instanceof RichText)
496            {
497                NamedResource file = ((RichText) _richText).getAttachment(filename);
498                if (file != null)
499                {
500                    try (InputStream is = file.getInputStream())
501                    {
502                        _addDimensionAttributes(is, newAttrs);
503                    }
504                    catch (IOException e)
505                    {
506                        // Ignore
507                    }
508                }
509            }
510            else
511            {
512                File file = ((ModifiableRichText) _richText).getAdditionalDataFolder().getFile(filename);
513                try (InputStream is = file.getResource().getInputStream())
514                {
515                    _addDimensionAttributes(is, newAttrs);
516                }
517                catch (IOException e)
518                {
519                    // Ignore
520                }
521            }
522        }
523        
524        return newAttrs;
525    }
526
527    /**
528     * Process a resource.
529     * @param attrs the img tag attributes.
530     * @return the new img tag attributes.
531     */
532    protected Attributes _processResource(Attributes attrs)
533    {
534        String ametys_src = attrs.getValue("data-ametys-src");
535        
536        Resource resource = null;
537        try
538        {
539            resource = _ametysResolver.resolveById(ametys_src);
540        }
541        catch (UnknownAmetysObjectException ex)
542        {
543            getLogger().warn("Link to unexisting resource image " + ametys_src, ex);
544            return attrs;
545        }
546        
547        AttributesImpl newAttrs = new AttributesImpl(attrs);
548        if (!"marker".equals(attrs.getValue("marker")))
549        {
550            try (InputStream is = resource.getInputStream())
551            {
552                _addDimensionAttributes(is, newAttrs);
553            }
554            catch (IOException e)
555            {
556                // Ignore
557            }
558        }
559        
560        return newAttrs;
561    }
562
563    /**
564     * Add an image's width and height to the XML attributes.
565     * @param inputStream an input stream on the image.
566     * @param attrs the attributes to fill.
567     * @throws IOException if an error occurs during reading dimension
568     */
569    protected void _addDimensionAttributes(InputStream inputStream, AttributesImpl attrs) throws IOException
570    {
571        // We need to call Thumbnail to get image dimension with EXIF orientation tag
572        BufferedImage img = ImageHelper.read(inputStream);
573        if (img != null && attrs.getValue("width") == null)
574        {
575            attrs.addCDATAAttribute("width", Integer.toString(img.getWidth()));
576        }
577        if (img != null && attrs.getValue("height") == null)
578        {
579            attrs.addCDATAAttribute("height", Integer.toString(img.getHeight()));
580        }
581    }
582        
583    @Override
584    public void endElement(String uri, String loc, String raw) throws SAXException
585    {
586        if ("img".equals(raw) && _tagToIgnore)
587        {
588            // ignore img tag
589            _tagToIgnore = false;
590            return;
591        }
592        
593        super.endElement(uri, loc, raw);
594    }
595    
596    @Override
597    public void endDocument() throws SAXException
598    {
599        if (_richText instanceof RichText)
600        {
601            RichText richText = (RichText) _richText;
602            
603            // Look for unused files
604            Set<String> unusedLocalFiles = richText.getAttachmentNames()
605                                                    .stream()
606                                                    .filter(fileName -> !_usedLocalFiles.contains(fileName))
607                                                    .collect(Collectors.toSet());
608            // Remove unused files        
609            unusedLocalFiles.stream()
610                            .forEach(richText::removeAttachment);
611        }
612        else
613        {
614            ModifiableRichText richText = (ModifiableRichText) _richText;
615
616            // removing unused files
617            ModifiableFolder folder = richText.getAdditionalDataFolder();
618            for (File file : folder.getFiles())
619            {
620                String fileName = file.getName();
621                
622                if (!_usedLocalFiles.contains(fileName))
623                {
624                    folder.remove(fileName);
625                }
626            }
627        }
628        
629        super.endDocument();
630    }
631}