001/*
002 *  Copyright 2012 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.core.cocoon;
017
018import java.util.Arrays;
019import java.util.HashSet;
020import java.util.Set;
021
022import javax.xml.transform.Result;
023
024import org.apache.avalon.framework.configuration.Configuration;
025import org.apache.avalon.framework.configuration.ConfigurationException;
026import org.apache.avalon.framework.logger.LogEnabled;
027import org.apache.avalon.framework.logger.Logger;
028import org.apache.avalon.framework.service.ServiceException;
029import org.apache.avalon.framework.service.ServiceManager;
030import org.apache.avalon.framework.service.Serviceable;
031import org.xml.sax.Attributes;
032import org.xml.sax.SAXException;
033import org.xml.sax.helpers.AttributesImpl;
034
035/**
036 * Inherits from cocoon's serializers block XHTMLSerializer.<p>
037 * The following configuration can be used:
038 * <pre>
039 * &lt;omit-xml-declaration&gt;yes|no&lt;/omit-xml-declaration&gt;
040 * &lt;tags-to-collapse&gt;input,meta&lt;/tags-to-collapse&gt;
041 * &lt;namespace-allowed&gt;&lt;/namespace-allowed&gt;
042 * &lt;namespace-allowed&gt;http://www.w3.org/XML/1998/namespace&lt;/namespace-allowed&gt;
043 * &lt;namespace-allowed&gt;http://www.w3.org/1999/xhtml&lt;/namespace-allowed&gt;
044 * </pre>
045 * Empty tags are not collapsed except the ones configured with
046 * <code>tags-to-collapse</code>.<br>
047 * If there is no such configuration, default tags to collaspe are:
048 * <ul>
049 *   <li>input</li>
050 *   <li>img</li>
051 *   <li>meta</li>
052 *   <li>link</li>
053 *   <li>hr</li>
054 *   <li>br</li>
055 * </ul>
056 * Namespace tags and attributes are filtered to product valid XHTML.
057 * This is configureable using <code>namespace-allowed</code>, by default
058 * the only following namespaces are allowed:
059 * <ul>
060 *   <li>"" (empty namespace)</li>
061 *   <li>"http://www.w3.org/XML/1998/namespace"</li>
062 *   <li>"http://www.w3.org/1999/xhtml"</li>
063 *   <li>"http://www.w3.org/2000/svg"</li>
064 *   <li>"http://www.w3.org/1998/Math/MathML"</li>
065 * </ul>
066 * Content of <code>script</code> tags will be exported in a single comment.<p>
067 * <code>omit-xml-declaration</code> is set to <code>yes</code> by default for
068 * compatibility purpose (IE 6).
069 * If <code>omit-xml-declaration</code> is set to <code>no</code>,
070 * <code>Content-Type</code> meta tag will be dropped if present.<br>
071 * @since 1.1.5 this serializer is JAXP compliant with the processing instruction
072 *              <code>javax.xml.transform.*-output-escaping processing</code>.
073 * @see Result
074 */
075public class XHTMLSerializer extends org.apache.cocoon.components.serializers.XHTMLSerializer implements LogEnabled, Serviceable
076{   
077    /** List of the tags to collapse. */
078    private static final Set<String> __NAMESPACES_ALLOWED = new HashSet<>(Arrays.asList(
079            new String[] {"", "http://www.w3.org/XML/1998/namespace", XHTML1_NAMESPACE, "http://www.w3.org/2000/svg",
080                "http://www.w3.org/1998/Math/MathML"}));
081
082    /** List of the tags to collapse. */
083    private static final Set<String> __COLLAPSE_TAGS = new HashSet<>(Arrays.asList(
084            new String[] {"input", "img", "meta", "link", "hr", "br"}));
085
086    /** Head tag. */
087    private static final String __HEAD_TAG = "head";
088    /** Meta tag. */
089    private static final String __META_TAG = "meta";
090    /** Meta HTTP equiv attribute name. */
091    private static final String __META_HTTP_EQUIV_ATTR = "http-equiv";
092    /** Meta HTTP equiv attribute value for content type. */
093    private static final String __META_HTTP_EQUIV_CTYPE_VALUE = "Content-Type";
094    /** Meta content attribute name. */
095    private static final String __META_CONTENT_ATTR = "content";
096    /** Script tag. */
097    private static final String __SCRIPT_TAG = "script";
098    /** Style tag. */
099    private static final String __STYLE_TAG = "style";
100
101    /** The XHTMLSerializerExtensionPoint instance */
102    protected XHTMLSerializerExtensionPoint _xhtmlSerializerExtensionPoint;
103
104    /** Buffer to store script tag content. */
105    private StringBuilder _buffer;
106
107    /** Buffer to store tag to collapse. */
108    private Set<String> _tagsToCollapse;
109
110    /** Namespaces allowed. */
111    private Set<String> _namespacesAllowed;
112
113    /** Namespaces prefixe filtered. */
114    private Set<String> _namespacesPrefixFiltered;
115
116    /** Inside filtered tag: greater than 0 if we are inside a filtered tag. */
117    private int _insideFilteredTag;
118
119    /** Inline resource context: greater than 0 if we are inside a style or a script tag. */
120    private int _insideInlineResourceTag;
121    private int _tagsInsideInlineResourceTag;
122
123    /**
124     * Flag for disabling output escaping states encountered with
125     * <code>javax.xml.transform.*-output-escaping</code> processing instructions.
126     */
127    private boolean _disableOutputEscaping;
128
129    /** Define whether to put XML declaration in the head of the document. */
130    private boolean _omitXmlDeclaration;
131
132    /** Meta http-equiv="Content-Type" context. True if we are inside a meta "content-type" tag.*/
133    private boolean _isMetaContentType;
134
135    private Logger _logger;
136
137    @Override
138    public void enableLogging(Logger logger)
139    {
140        _logger = logger;
141    }
142
143    @Override
144    public void service(ServiceManager manager) throws ServiceException
145    {
146        if (manager.hasService(XHTMLSerializerExtensionPoint.ROLE))
147        {
148            _xhtmlSerializerExtensionPoint = (XHTMLSerializerExtensionPoint) manager.lookup(XHTMLSerializerExtensionPoint.ROLE);
149        }
150    }
151
152    @Override
153    public void configure(Configuration conf) throws ConfigurationException
154    {
155        super.configure(conf);
156
157        String omitXmlDeclaration = conf.getChild("omit-xml-declaration").getValue(null);
158        // Default to yes (omit).
159        this._omitXmlDeclaration = !"no".equals(omitXmlDeclaration);
160
161        // Tags to collapse
162        String tagsToCollapse = conf.getChild("tags-to-collapse").getValue(null);
163
164        if (tagsToCollapse != null)
165        {
166            _tagsToCollapse = new HashSet<>();
167            for (String tag : tagsToCollapse.split(","))
168            {
169                _tagsToCollapse.add(tag.trim());
170            }
171        }
172        else
173        {
174            _tagsToCollapse = __COLLAPSE_TAGS;
175        }
176
177        // Namespaces allowed
178        Configuration[] namespacesAllowed = conf.getChildren("namespace-allowed");
179
180        if (namespacesAllowed.length > 0)
181        {
182            _namespacesAllowed = new HashSet<>();
183            for (Configuration namespaceAllowed : namespacesAllowed)
184            {
185                String namespace = namespaceAllowed.getValue("");
186                _namespacesAllowed.add(namespace.trim());
187            }
188        }
189        else if (_xhtmlSerializerExtensionPoint != null)
190        {
191            _namespacesAllowed = _xhtmlSerializerExtensionPoint.getAllowedNamespaces();
192        }
193        else
194        {
195            _namespacesAllowed = __NAMESPACES_ALLOWED;
196        }
197    }
198
199    @Override
200    public void startPrefixMapping(String prefix, String uri) throws SAXException
201    {
202        if (_namespacesAllowed.contains(uri))
203        {
204            super.startPrefixMapping(prefix, uri);
205        }
206        else
207        {
208            _namespacesPrefixFiltered.add(prefix);
209        }
210    }
211
212    @Override
213    public void startElement(String nsuri, String local, String qual, Attributes attributes) throws SAXException
214    {
215        if  (_insideInlineResourceTag > 0)
216        {
217            if (_logger.isWarnEnabled())
218            {
219                _logger.warn("Tags are forbidden inside a <script> or <style> tag : <" + local + ">");
220            }
221
222            _tagsInsideInlineResourceTag++;
223        }
224        else if (_namespacesAllowed.contains(nsuri))
225        {
226            if (_insideFilteredTag == 0)
227            {
228                super.startElement(nsuri, local, qual, _filterAttributes(attributes));
229            }
230        }
231        else
232        {
233            // Ignore nested content as the namespace is filtered
234            _insideFilteredTag++;
235        }
236    }
237
238    private Attributes _filterAttributes(Attributes attributes)
239    {
240        AttributesImpl attributesFiltered = new AttributesImpl();
241
242        for (int i = 0; i < attributes.getLength(); i++)
243        {
244            String uri = attributes.getURI(i);
245
246            // Filter attribute with not allowed namespace
247            if (_namespacesAllowed.contains(uri))
248            {
249                String qName = attributes.getQName(i);
250
251                // Filter attribute xmlns and xmlns:xxx
252                if (!qName.equals("xmlns") && !qName.startsWith("xmlns:"))
253                {
254                    attributesFiltered.addAttribute(uri, attributes.getLocalName(i), qName,
255                            attributes.getType(i), attributes.getValue(i));
256                }
257            }
258        }
259
260        return attributesFiltered;
261    }
262    
263    @Override
264    public void startElementImpl(String uri, String local, String qual, String[][] lNamespaces, String[][] attributes) throws SAXException
265    {
266        if ((local.equalsIgnoreCase(__SCRIPT_TAG) && isJsScript(local, attributes)) 
267                || local.equalsIgnoreCase(__STYLE_TAG))
268        {
269            _insideInlineResourceTag++;
270        }
271
272        _isMetaContentType = isMetaContentType(local, attributes);
273
274        // Always ignore the content-type meta tag because we do not want
275        // it in non omit mode and because we create it in omit mode (see below)
276        if (!_isMetaContentType)
277        {
278            super.startElementImpl(uri, local, qual, lNamespaces, attributes);
279        }
280
281        // Create our own content-type meta tag in omit mode
282        if (_omitXmlDeclaration && local.equalsIgnoreCase(__HEAD_TAG))
283        {
284            // Create our own meta content type element as Xalan creates one but
285            // places it in the last children (after an potential title)
286            String qua = namespaces.qualify(XHTML1_NAMESPACE, __META_TAG, __META_TAG);
287            String[][] attrs = new String[2][ATTRIBUTE_LENGTH];
288
289            attrs[0][ATTRIBUTE_NSURI] = "";
290            attrs[0][ATTRIBUTE_LOCAL] = __META_HTTP_EQUIV_ATTR;
291            attrs[0][ATTRIBUTE_QNAME] = __META_HTTP_EQUIV_ATTR;
292            attrs[0][ATTRIBUTE_VALUE] = __META_HTTP_EQUIV_CTYPE_VALUE;
293            attrs[1][ATTRIBUTE_NSURI] = "";
294            attrs[1][ATTRIBUTE_LOCAL] = __META_CONTENT_ATTR;
295            attrs[1][ATTRIBUTE_QNAME] = __META_CONTENT_ATTR;
296            attrs[1][ATTRIBUTE_VALUE] = this.getMimeType();
297
298            super.startElementImpl(XHTML1_NAMESPACE, __META_TAG, qua, new String[0][0], attrs);
299            super.endElementImpl(XHTML1_NAMESPACE, __META_TAG, qua);
300        }
301    }
302
303    @Override
304    public void characters(char[] ch, int start, int length) throws SAXException
305    {
306        if (_insideFilteredTag == 0)
307        {
308            if (_disableOutputEscaping)
309            {
310                // Close current element if necessary
311                closeElement(false);
312                // Let content pass through unchanged
313                write(ch, start, length);
314            }
315            else
316            {
317                super.characters(ch, start, length);
318            }
319        }
320    }
321
322    @Override
323    public void charactersImpl(char[] data, int start, int length) throws SAXException
324    {
325        if (_insideInlineResourceTag > 0)
326        {
327            _buffer.append(data, start, length);
328        }
329        else
330        {
331            super.charactersImpl(data, start, length);
332        }
333    }
334
335    @Override
336    public void ignorableWhitespace(char[] data, int start, int length) throws SAXException
337    {
338        if (_insideFilteredTag == 0)
339        {
340            super.ignorableWhitespace(data, start, length);
341        }
342    }
343
344    @Override
345    public void comment(char[] data, int start, int length) throws SAXException
346    {
347        if (_insideFilteredTag == 0)
348        {
349            if (_insideInlineResourceTag > 0)
350            {
351                _buffer.append(data, start, length);
352            }
353            else
354            {
355                super.comment(data, start, length);
356            }
357        }
358    }
359
360    @Override
361    public void processingInstruction(String target, String data) throws SAXException
362    {
363        if (_insideFilteredTag == 0)
364        {
365            if (Result.PI_DISABLE_OUTPUT_ESCAPING.equals(target))
366            {
367                // Start unescaping
368                _disableOutputEscaping = true;
369            }
370            else if (Result.PI_ENABLE_OUTPUT_ESCAPING.equals(target))
371            {
372                // Stop unescapping
373                _disableOutputEscaping = false;
374            }
375            else
376            {
377                super.processingInstruction(target, data);
378            }
379        }
380    }
381
382    @Override
383    public void endElement(String nsuri, String local, String qual) throws SAXException
384    {
385        if (_tagsInsideInlineResourceTag > 0)
386        {
387            _tagsInsideInlineResourceTag--;
388        }
389        else if (_namespacesAllowed.contains(nsuri))
390        {
391            if (_insideFilteredTag == 0)
392            {
393                super.endElement(nsuri, local, qual);
394            }
395        }
396        else
397        {
398            // Finish to ignore parsed nested content as the namespace is filtered
399            _insideFilteredTag--;
400        }
401    }
402
403    @Override
404    public void endElementImpl(String uri, String local, String qual) throws SAXException
405    {
406        String namespaceUri = uri;
407        if (uri.length() == 0)
408        {
409            namespaceUri = XHTML1_NAMESPACE;
410        }
411
412        if (local.equalsIgnoreCase(__SCRIPT_TAG) && _insideInlineResourceTag > 0)
413        {
414            _insideInlineResourceTag--;
415            if (_buffer.length() > 0)
416            {
417                char[] content = new char[_buffer.length() + 5];
418                content[0] = '\n';
419                content[content.length - 4] = '\n';
420                content[content.length - 3] = '/';
421                content[content.length - 2] = '/';
422                content[content.length - 1] = ' ';
423                _buffer.getChars(0, _buffer.length(), content, 1);
424                _buffer.setLength(0);
425                super.comment(content, 0, content.length);
426            }
427        }
428        else if (local.equalsIgnoreCase(__STYLE_TAG))
429        {
430            _insideInlineResourceTag--;
431            if (_buffer.length() > 0)
432            {
433                char[] content = new char[_buffer.length() + 2];
434                content[0] = '\n';
435                content[content.length - 1] = '\n';
436                _buffer.getChars(0, _buffer.length(), content, 1);
437                _buffer.setLength(0);
438                super.comment(content, 0, content.length);
439            }
440        }
441
442        if (XHTML1_NAMESPACE.equals(namespaceUri))
443        {
444            // If the element is not in the list of the tags to collapse, close it without collapsing
445            if (!_tagsToCollapse.contains(local))
446            {
447                this.closeElement(false);
448            }
449        }
450
451        // Ignore the content-type meta tag, see startElementImpl
452        if (!_isMetaContentType)
453        {
454            super.endElementImpl(namespaceUri, local, qual);
455        }
456        else
457        {
458            _isMetaContentType = false;
459        }
460    }
461
462    @Override
463    public void endPrefixMapping(String prefix) throws SAXException
464    {
465        if (!_namespacesPrefixFiltered.contains(prefix))
466        {
467            super.endPrefixMapping(prefix);
468        }
469    }
470
471    private boolean isMetaContentType(String local, String[][] attributes)
472    {
473        if (local.equalsIgnoreCase(__META_TAG))
474        {
475            for (String[] attr : attributes)
476            {
477                if (attr[ATTRIBUTE_LOCAL].equalsIgnoreCase(__META_HTTP_EQUIV_ATTR)
478                        && attr[ATTRIBUTE_VALUE].equalsIgnoreCase(__META_HTTP_EQUIV_CTYPE_VALUE))
479                {
480                    return true;
481                }
482            }
483        }
484        return false;
485    }
486    
487    private boolean isJsScript(String local, String[][] attributes)
488    {
489        boolean hasTypeAttr = false;
490        
491        if (local.equalsIgnoreCase(__SCRIPT_TAG))
492        {
493            for (String[] attr : attributes)
494            {
495                if (attr[ATTRIBUTE_LOCAL].equalsIgnoreCase("type"))
496                {
497                    hasTypeAttr = true;
498                    
499                    if (attr[ATTRIBUTE_VALUE].equalsIgnoreCase("text/javascript"))
500                    {
501                        return true;
502                    }
503                }
504            }
505        }
506        
507        return !hasTypeAttr;
508    }
509
510    @Override
511    public void recycle()
512    {
513        super.recycle();
514
515        if (_buffer == null)
516        {
517            _buffer = new StringBuilder(512);
518        }
519        else
520        {
521            if (_buffer.capacity() >  100 * 1024)
522            {
523                // Garbage collect previous buffer is it exceed 100 Kb
524                _buffer = new StringBuilder(512);
525            }
526            else
527            {
528                // Clear buffer but keep capacity
529                _buffer.setLength(0);
530            }
531        }
532
533        // Clear parsing state aware attributes
534        if (_namespacesPrefixFiltered == null)
535        {
536            _namespacesPrefixFiltered = new HashSet<>();
537        }
538        else
539        {
540            _namespacesPrefixFiltered.clear();
541        }
542        _insideFilteredTag = 0;
543        _insideInlineResourceTag = 0;
544        _disableOutputEscaping = false;
545        _isMetaContentType = false;
546    }
547}