001/*
002 *  Copyright 2010 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.transformation.htmledition;
017
018import java.awt.image.BufferedImage;
019import java.io.ByteArrayInputStream;
020import java.io.IOException;
021import java.io.InputStream;
022import java.net.HttpURLConnection;
023import java.net.MalformedURLException;
024import java.net.URI;
025import java.net.URISyntaxException;
026import java.net.URL;
027import java.util.Date;
028import java.util.HashSet;
029import java.util.Map;
030import java.util.Set;
031import java.util.regex.Matcher;
032import java.util.regex.Pattern;
033
034import org.apache.avalon.framework.context.ContextException;
035import org.apache.avalon.framework.service.ServiceException;
036import org.apache.avalon.framework.service.ServiceManager;
037import org.apache.cocoon.Constants;
038import org.apache.cocoon.components.ContextHelper;
039import org.apache.cocoon.environment.Context;
040import org.apache.cocoon.environment.ObjectModelHelper;
041import org.apache.cocoon.environment.Request;
042import org.apache.cocoon.xml.AttributesImpl;
043import org.apache.commons.io.IOUtils;
044import org.apache.commons.io.output.ByteArrayOutputStream;
045import org.apache.excalibur.source.Source;
046import org.apache.excalibur.source.SourceResolver;
047import org.xml.sax.Attributes;
048import org.xml.sax.SAXException;
049
050import org.ametys.cms.repository.Content;
051import org.ametys.core.upload.Upload;
052import org.ametys.core.upload.UploadManager;
053import org.ametys.core.user.CurrentUserProvider;
054import org.ametys.core.util.ImageHelper;
055import org.ametys.plugins.explorer.resources.Resource;
056import org.ametys.plugins.repository.AmetysObjectResolver;
057import org.ametys.plugins.repository.UnknownAmetysObjectException;
058import org.ametys.plugins.repository.metadata.CompositeMetadata;
059import org.ametys.plugins.repository.metadata.File;
060import org.ametys.plugins.repository.metadata.Folder;
061import org.ametys.plugins.repository.metadata.ModifiableFile;
062import org.ametys.plugins.repository.metadata.ModifiableFolder;
063import org.ametys.plugins.repository.metadata.ModifiableResource;
064import org.ametys.plugins.repository.metadata.ModifiableRichText;
065import org.ametys.plugins.repository.metadata.RichText;
066
067/**
068 * This transformer extracts uploaded files' ids from the incoming HTML for further processing.
069 */
070public class UploadedDataHTMLEditionHandler extends AbstractHTMLEditionHandler
071{
072    private static final Pattern __INLINE_IMAGE_MARKER = Pattern.compile("^data:image/(png|jpeg|gif);base64,.*");
073    
074    private UploadManager _uploadManager;
075    private CurrentUserProvider _userProvider;
076    private SourceResolver _resolver;
077    private AmetysObjectResolver _ametysResolver;
078    private Context _cocoonContext;
079    
080    private boolean _tagToIgnore;
081    private Set<String> _usedLocalFiles = new HashSet<>();
082    private ModifiableRichText _richText;
083    private Map _objectModel;
084
085
086    @Override
087    public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException
088    {
089        super.contextualize(context);
090        _cocoonContext = (Context) _context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT);
091    }
092    
093    @Override
094    public void service(ServiceManager sManager) throws ServiceException
095    {
096        super.service(sManager);
097        _uploadManager = (UploadManager) sManager.lookup(UploadManager.ROLE);
098        _userProvider = (CurrentUserProvider) sManager.lookup(CurrentUserProvider.ROLE);
099        _resolver = (SourceResolver) sManager.lookup(SourceResolver.ROLE);
100        _ametysResolver = (AmetysObjectResolver) sManager.lookup(AmetysObjectResolver.ROLE);
101    }
102    
103    @Override
104    public void startDocument() throws SAXException
105    {
106        _tagToIgnore = false;
107        _objectModel = ContextHelper.getObjectModel(_context);
108        Map parentContextParameters = (Map) _objectModel.get(ObjectModelHelper.PARENT_CONTEXT);
109        _richText = (ModifiableRichText) parentContextParameters.get("richText");
110        
111        super.startDocument();
112    }
113    
114    @Override
115    public void startElement(String uri, String loc, String raw, Attributes attrs) throws SAXException
116    {
117        if ("img".equals(raw))
118        {
119            String type = attrs.getValue("data-ametys-type");
120            
121            if ("temp".equals(type))
122            {
123                Attributes newAttrs = _getAttributesForTemp(attrs);
124                super.startElement(uri, loc, raw, newAttrs);
125                return;
126            }
127            else if ("explorer".equals(type))
128            {
129                Attributes newAttrs = _processResource(attrs);
130                super.startElement(uri, loc, raw, newAttrs);
131                return;
132            }
133            else if ("local".equals(type))
134            {
135                Attributes newAttrs = _processLocal(attrs);
136                super.startElement(uri, loc, raw, newAttrs);
137                return;
138            }
139            else if (type == null && !"marker".equals(attrs.getValue("marker")))
140            {
141                // image is copied from elsewhere, fetch it in the content
142                String src = attrs.getValue("src");
143                if (src == null)
144                {
145                    _tagToIgnore = true;
146                    getLogger().warn("Don't know how to fetch image with no src attribute. Image is ignored.");
147                    return;
148                }
149                
150                String ametys_src = attrs.getValue("data-ametys-src");
151
152                // The final filename
153                String fileName = null;
154                // The new attributes, will be filled with image width and height.
155                AttributesImpl newAttrs = new AttributesImpl();
156
157                Matcher m = __INLINE_IMAGE_MARKER.matcher(src);
158                if (m.matches())
159                {
160                    String mimetype = m.group(1);
161                    String imageAsBase64 = src.substring(19 + mimetype.length());
162                    byte[] imageAsBytes = org.apache.commons.codec.binary.Base64.decodeBase64(imageAsBase64);
163                    fileName = _storeFile("paste." + mimetype, new ByteArrayInputStream(imageAsBytes), null, null);
164                    
165                    try (InputStream is = new ByteArrayInputStream(imageAsBytes))
166                    {
167                        _addDimensionAttributes(is, newAttrs);
168                    }
169                    catch (IOException e)
170                    {
171                        // Ignore
172                    }
173                }
174                else
175                {
176                    
177                    String initialFileName = _getInitialFileName(src);
178                    
179                    if (src.startsWith("/"))
180                    {
181                        try
182                        {
183                            fileName = _handleInternalFile(src, newAttrs, initialFileName);
184                        }
185                        catch (Exception e)
186                        {
187                            // unable to fetch image, do not keep the img tag
188                            _tagToIgnore = true;
189                            getLogger().warn("Unable to fetch internal image from URL '" + src + "'. Image is ignored.", e);
190                            return;
191                        }
192                    }
193                    else if (src.startsWith("http://") || src.startsWith("https://"))
194                    {
195                        try
196                        {
197                            fileName = _handleRemoteFile(src, newAttrs, initialFileName);
198                        }
199                        catch (Exception e)
200                        {
201                            // unable to fetch image, do not keep the img tag
202                            _tagToIgnore = true;
203                            getLogger().warn("Unable to fetch external image from URL '" + src + "'. Image is ignored.", e);
204                            return;
205                        }
206                    }
207                    else
208                    {
209                        _tagToIgnore = true;
210                        getLogger().warn("Don't know how to fetch image at '" + src + "'. Image is ignored.");
211                        return;
212                    }
213                }
214                    
215                _copyAttributes(attrs, newAttrs);
216                
217                newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", ametys_src.replaceAll("\\.", "/") + ";" + fileName);
218                newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local");
219                
220                super.startElement(uri, loc, raw, newAttrs);
221                return;
222            }
223        }
224        
225        super.startElement(uri, loc, raw, attrs);
226    }
227    
228    private String _getInitialFileName(String src)
229    {
230        int j = src.lastIndexOf('/');
231        int k = src.indexOf('?', j);
232        String initialFileName;
233        
234        if (k == -1)
235        {
236            initialFileName = src.substring(j + 1);
237        }
238        else
239        {
240            initialFileName = src.substring(j + 1, k);
241        }
242        
243        // FIXME CMS-3090 A uploaded image can not contain '_max', replace it by '_Max'
244        return initialFileName.replaceAll("_max", "_Max");
245    }
246
247    private String _handleInternalFile(String src, AttributesImpl newAttrs, String initialFileName) throws MalformedURLException, IOException, URISyntaxException
248    {
249        // it may be an internal URL
250        Request request = ContextHelper.getRequest(_context);
251        String contextPath = request.getContextPath();
252        Source source = null;
253        
254        try
255        {
256            String modifiedSrc = src;
257            
258            if (src.startsWith(contextPath))
259            {
260                // it is an Ametys URL
261                // first decode it
262                modifiedSrc = new URI(modifiedSrc).getPath();
263                
264                modifiedSrc = "cocoon:/" + src.substring(contextPath.length());
265            }
266            else
267            {
268                StringBuilder sb = _getRequestURI(request);
269                
270                modifiedSrc = sb.toString() + modifiedSrc;
271            }
272            
273            source = _resolver.resolveURI(src);
274            
275            try (ByteArrayOutputStream bos = new ByteArrayOutputStream())
276            {
277                try (InputStream is = source.getInputStream())
278                {
279                    IOUtils.copy(is, bos);
280                }
281                
282                String fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null);
283                
284                try (InputStream is = new ByteArrayInputStream(bos.toByteArray()))
285                {
286                    _addDimensionAttributes(is, newAttrs);
287                }
288                
289                return fileName;
290            }
291        }
292        finally
293        {
294            if (source != null)
295            {
296                _resolver.release(source);
297            }
298        }
299
300    }
301    
302    private String _handleRemoteFile(String src, AttributesImpl newAttrs, String initialFileName) throws MalformedURLException, IOException
303    {
304        String fileName;
305        URL url = new URL(src);
306        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
307        connection.setConnectTimeout(1000);
308        connection.setReadTimeout(2000);
309        
310        try (ByteArrayOutputStream bos = new ByteArrayOutputStream())
311        {
312            try (InputStream is = connection.getInputStream())
313            {
314                IOUtils.copy(is, bos);
315            }
316            
317            fileName = _storeFile(initialFileName, new ByteArrayInputStream(bos.toByteArray()), null, null);
318            
319            try (InputStream is = new ByteArrayInputStream(bos.toByteArray()))
320            {
321                _addDimensionAttributes(is, newAttrs);
322            }
323        }
324        return fileName;
325    }
326    
327    /**
328     * Copy the attributes.
329     * @param attrs the attributes to copy.
330     * @param newAttrs the attributes to copy to.
331     */
332    private void _copyAttributes(Attributes attrs, AttributesImpl newAttrs)
333    {
334        for (int i = 0; i < attrs.getLength(); i++)
335        {
336            String name = attrs.getQName(i);
337            
338            if (!"data-ametys-src".equals(name) && !"data-ametys-type".equals(name))
339            {
340                newAttrs.addAttribute(attrs.getURI(i), attrs.getLocalName(i), name, attrs.getType(i), attrs.getValue(i));
341            }
342        }
343    }
344
345    /**
346     * Get the cms uri
347     * @param request The request
348     * @return the uri without context path
349     */
350    private StringBuilder _getRequestURI(Request request)
351    {
352        StringBuilder sb = new StringBuilder();
353        sb.append(request.getScheme());
354        sb.append("://");
355        sb.append(request.getServerName());
356        
357        if (request.isSecure())
358        {
359            if (request.getServerPort() != 443)
360            {
361                sb.append(":");
362                sb.append(request.getServerPort());
363            }
364        }
365        else
366        {
367            if (request.getServerPort() != 80)
368            {
369                sb.append(":");
370                sb.append(request.getServerPort());
371            }
372        }
373        return sb;
374    }
375
376    private Attributes _getAttributesForTemp(Attributes attrs)
377    {
378        // data has just been uploaded, must change the value, and store the id for further processing
379        String id = attrs.getValue("data-ametys-temp-src");
380        String src = attrs.getValue("data-ametys-src");
381        
382        Upload upload = _uploadManager.getUpload(_userProvider.getUser(), id);
383        
384        String initialFileName = upload.getFilename();
385        // FIXME CMS-3090 A uploaded image can not contain '_max', replace it by '_Max'
386        initialFileName = initialFileName.replaceAll("_max", "_Max");
387        String fileName = _storeFile(initialFileName, upload.getInputStream(), upload.getMimeType(), upload.getUploadedDate());
388        
389        AttributesImpl newAttrs = new AttributesImpl();
390        
391        _copyAttributes(attrs, newAttrs);
392        
393        if (!"marker".equals(attrs.getValue("marker")))
394        {
395            try (InputStream is = upload.getInputStream())
396            {
397                _addDimensionAttributes(is, newAttrs);
398            }
399            catch (IOException e)
400            {
401                // Ignore
402            }
403        }
404        
405        newAttrs.addAttribute("", "data-ametys-src", "data-ametys-src", "CDATA", src.replaceAll("\\.", "/") + ";" + fileName);
406        newAttrs.addAttribute("", "data-ametys-type", "data-ametys-type", "CDATA", "local");
407        
408        return newAttrs;
409    }
410    
411    /**
412     * Store a file as rich text data.
413     * @param initialFileName the initial file name.
414     * @param is an input stream on the file.
415     * @param mimeType the file mime type.
416     * @param lastModified the last modification date.
417     * @return the final file name.
418     */
419    protected String _storeFile(String initialFileName, InputStream is, String mimeType, Date lastModified)
420    {
421        String fileName = initialFileName;
422        int count = 2;
423        
424        while (_richText.getAdditionalDataFolder().hasFile(fileName))
425        {
426            int i = initialFileName.lastIndexOf('.');
427            fileName = i == -1 ? initialFileName + '-' + (count++) : initialFileName.substring(0, i) + '-' + (count++) + initialFileName.substring(i);
428        }
429        
430        ModifiableFile file = _richText.getAdditionalDataFolder().addFile(fileName);
431        ModifiableResource resource = file.getResource();
432        resource.setLastModified(lastModified != null ? lastModified : new Date());
433        
434        String finalMimeType = mimeType != null ? mimeType : _cocoonContext.getMimeType(fileName.toLowerCase());
435        
436        resource.setMimeType(finalMimeType != null ? finalMimeType : "application/unknown");
437        resource.setInputStream(is);
438        
439        // store the file usage, so that it won't be deleted immediately
440        _usedLocalFiles.add(fileName);
441        
442        return fileName;
443    }
444    
445    /**
446     * Process a local file.
447     * @param attrs the img tag attributes.
448     * @return the new img tag attributes.
449     */
450    protected Attributes _processLocal(Attributes attrs)
451    {
452        // src is of the form contentId@metadataName;fileName
453        String ametys_src = attrs.getValue("data-ametys-src");
454        int i = ametys_src.indexOf('@');
455        int j = ametys_src.lastIndexOf(';');
456        String id = ametys_src.substring(0, i);
457        String metadataName = ametys_src.substring(i + 1, j);
458        String filename = ametys_src.substring(j + 1);
459        
460        if (j == -1)
461        {
462            throw new IllegalArgumentException("A local image from inline editor should have an data-ametys-src attribute of the form <protocol>://<protocol-specific-part>;<filename> : " + ametys_src);
463        }
464        
465        _usedLocalFiles.add(filename);
466        
467        Content content = _ametysResolver.resolveById(id);
468        Folder folder = _getMeta(content.getMetadataHolder(), metadataName).getAdditionalDataFolder();
469        File file = folder.getFile(filename);
470        
471        AttributesImpl newAttrs = new AttributesImpl(attrs);
472        if (!"marker".equals(attrs.getValue("marker")))
473        {
474            try (InputStream is = file.getResource().getInputStream())
475            {
476                _addDimensionAttributes(is, newAttrs);
477            }
478            catch (IOException e)
479            {
480                // Ignore
481            }
482        }
483        
484        return newAttrs;
485    }
486
487    /**
488     * Process a resource.
489     * @param attrs the img tag attributes.
490     * @return the new img tag attributes.
491     */
492    protected Attributes _processResource(Attributes attrs)
493    {
494        String ametys_src = attrs.getValue("data-ametys-src");
495        
496        Resource resource = null;
497        try
498        {
499            resource = _ametysResolver.resolveById(ametys_src);
500        }
501        catch (UnknownAmetysObjectException ex)
502        {
503            getLogger().warn("Link to unexisting resource image " + ametys_src, ex);
504            return attrs;
505        }
506        
507        AttributesImpl newAttrs = new AttributesImpl(attrs);
508        if (!"marker".equals(attrs.getValue("marker")))
509        {
510            try (InputStream is = resource.getInputStream())
511            {
512                _addDimensionAttributes(is, newAttrs);
513            }
514            catch (IOException e)
515            {
516                // Ignore
517            }
518        }
519        
520        return newAttrs;
521    }
522
523    /**
524     * Add an image's width and height to the XML attributes.
525     * @param inputStream an input stream on the image.
526     * @param attrs the attributes to fill.
527     * @throws IOException if an error occurs during reading dimension
528     */
529    protected void _addDimensionAttributes(InputStream inputStream, AttributesImpl attrs) throws IOException
530    {
531        // We need to call Thumbnail to get image dimension with EXIF orientation tag
532        BufferedImage img = ImageHelper.read(inputStream);
533        if (img != null && attrs.getValue("width") == null)
534        {
535            attrs.addCDATAAttribute("width", Integer.toString(img.getWidth()));
536        }
537        if (img != null && attrs.getValue("height") == null)
538        {
539            attrs.addCDATAAttribute("height", Integer.toString(img.getHeight()));
540        }
541    }
542        
543    @Override
544    public void endElement(String uri, String loc, String raw) throws SAXException
545    {
546        if ("img".equals(raw) && _tagToIgnore)
547        {
548            // ignore img tag
549            _tagToIgnore = false;
550            return;
551        }
552        
553        super.endElement(uri, loc, raw);
554    }
555    
556    @Override
557    public void endDocument() throws SAXException
558    {
559        // removing unused files
560        ModifiableFolder folder = _richText.getAdditionalDataFolder();
561        for (File file : folder.getFiles())
562        {
563            String fileName = file.getName();
564            
565            if (!_usedLocalFiles.contains(fileName))
566            {
567                folder.remove(fileName);
568            }
569        }
570        
571        super.endDocument();
572    }
573    
574    /** 
575     * Get the rich text meta
576     * @param meta The composite meta
577     * @param metadataName The metadata name (with /)
578     * @return The rich text meta
579     */
580    protected RichText _getMeta(CompositeMetadata meta, String metadataName)
581    {
582        int pos = metadataName.indexOf("/");
583        if (pos == -1)
584        {
585            return meta.getRichText(metadataName);
586        }
587        else
588        {
589            return _getMeta(meta.getCompositeMetadata(metadataName.substring(0, pos)), metadataName.substring(pos + 1));
590        }
591    }
592}