001/* 002 * Copyright 2012 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.core.cocoon; 017 018import java.util.Arrays; 019import java.util.HashSet; 020import java.util.Set; 021 022import javax.xml.transform.Result; 023 024import org.apache.avalon.framework.configuration.Configuration; 025import org.apache.avalon.framework.configuration.ConfigurationException; 026import org.apache.avalon.framework.logger.LogEnabled; 027import org.apache.avalon.framework.logger.Logger; 028import org.apache.avalon.framework.service.ServiceException; 029import org.apache.avalon.framework.service.ServiceManager; 030import org.apache.avalon.framework.service.Serviceable; 031import org.xml.sax.Attributes; 032import org.xml.sax.SAXException; 033import org.xml.sax.helpers.AttributesImpl; 034 035/** 036 * Inherits from cocoon's serializers block XHTMLSerializer.<p> 037 * The following configuration can be used: 038 * <pre> 039 * <omit-xml-declaration>yes|no</omit-xml-declaration> 040 * <tags-to-collapse>input,meta</tags-to-collapse> 041 * <namespace-allowed></namespace-allowed> 042 * <namespace-allowed>http://www.w3.org/XML/1998/namespace</namespace-allowed> 043 * <namespace-allowed>http://www.w3.org/1999/xhtml</namespace-allowed> 044 * </pre> 045 * Empty tags are not collapsed except the ones configured with 046 * <code>tags-to-collapse</code>.<br> 047 * If there is no such configuration, default tags to collaspe are: 048 * <ul> 049 * <li>input</li> 050 * <li>img</li> 051 * <li>meta</li> 052 * <li>link</li> 053 * <li>hr</li> 054 * <li>br</li> 055 * </ul> 056 * Namespace tags and attributes are filtered to product valid XHTML. 057 * This is configureable using <code>namespace-allowed</code>, by default 058 * the only following namespaces are allowed: 059 * <ul> 060 * <li>"" (empty namespace)</li> 061 * <li>"http://www.w3.org/XML/1998/namespace"</li> 062 * <li>"http://www.w3.org/1999/xhtml"</li> 063 * <li>"http://www.w3.org/2000/svg"</li> 064 * <li>"http://www.w3.org/1998/Math/MathML"</li> 065 * </ul> 066 * Content of <code>script</code> tags will be exported in a single comment.<p> 067 * <code>omit-xml-declaration</code> is set to <code>yes</code> by default for 068 * compatibility purpose (IE 6). 069 * If <code>omit-xml-declaration</code> is set to <code>no</code>, 070 * <code>Content-Type</code> meta tag will be dropped if present.<br> 071 * @since 1.1.5 this serializer is JAXP compliant with the processing instruction 072 * <code>javax.xml.transform.*-output-escaping processing</code>. 073 * @see Result 074 */ 075public class XHTMLSerializer extends org.apache.cocoon.components.serializers.XHTMLSerializer implements LogEnabled, Serviceable 076{ 077 /** List of the tags to collapse. */ 078 private static final Set<String> __NAMESPACES_ALLOWED = new HashSet<>(Arrays.asList( 079 new String[] {"", "http://www.w3.org/XML/1998/namespace", XHTML1_NAMESPACE, "http://www.w3.org/2000/svg", 080 "http://www.w3.org/1998/Math/MathML"})); 081 082 /** List of the tags to collapse. */ 083 private static final Set<String> __COLLAPSE_TAGS = new HashSet<>(Arrays.asList( 084 new String[] {"input", "img", "meta", "link", "hr", "br"})); 085 086 /** Head tag. */ 087 private static final String __HEAD_TAG = "head"; 088 /** Meta tag. */ 089 private static final String __META_TAG = "meta"; 090 /** Meta HTTP equiv attribute name. */ 091 private static final String __META_HTTP_EQUIV_ATTR = "http-equiv"; 092 /** Meta HTTP equiv attribute value for content type. */ 093 private static final String __META_HTTP_EQUIV_CTYPE_VALUE = "Content-Type"; 094 /** Meta content attribute name. */ 095 private static final String __META_CONTENT_ATTR = "content"; 096 /** Script tag. */ 097 private static final String __SCRIPT_TAG = "script"; 098 /** Style tag. */ 099 private static final String __STYLE_TAG = "style"; 100 101 /** The XHTMLSerializerExtensionPoint instance */ 102 protected XHTMLSerializerExtensionPoint _xhtmlSerializerExtensionPoint; 103 104 /** Buffer to store script tag content. */ 105 private StringBuilder _buffer; 106 107 /** Buffer to store tag to collapse. */ 108 private Set<String> _tagsToCollapse; 109 110 /** Namespaces allowed. */ 111 private Set<String> _namespacesAllowed; 112 113 /** Namespaces prefixe filtered. */ 114 private Set<String> _namespacesPrefixFiltered; 115 116 /** Inside filtered tag: greater than 0 if we are inside a filtered tag. */ 117 private int _insideFilteredTag; 118 119 /** Inline resource context: greater than 0 if we are inside a style or a script tag. */ 120 private int _insideInlineResourceTag; 121 private int _tagsInsideInlineResourceTag; 122 123 /** 124 * Flag for disabling output escaping states encountered with 125 * <code>javax.xml.transform.*-output-escaping</code> processing instructions. 126 */ 127 private boolean _disableOutputEscaping; 128 129 /** Define whether to put XML declaration in the head of the document. */ 130 private boolean _omitXmlDeclaration; 131 132 /** Meta http-equiv="Content-Type" context. True if we are inside a meta "content-type" tag.*/ 133 private boolean _isMetaContentType; 134 135 private Logger _logger; 136 137 @Override 138 public void enableLogging(Logger logger) 139 { 140 _logger = logger; 141 } 142 143 @Override 144 public void service(ServiceManager manager) throws ServiceException 145 { 146 if (manager.hasService(XHTMLSerializerExtensionPoint.ROLE)) 147 { 148 _xhtmlSerializerExtensionPoint = (XHTMLSerializerExtensionPoint) manager.lookup(XHTMLSerializerExtensionPoint.ROLE); 149 } 150 } 151 152 @Override 153 public void configure(Configuration conf) throws ConfigurationException 154 { 155 super.configure(conf); 156 157 String omitXmlDeclaration = conf.getChild("omit-xml-declaration").getValue(null); 158 // Default to yes (omit). 159 this._omitXmlDeclaration = !"no".equals(omitXmlDeclaration); 160 161 // Tags to collapse 162 String tagsToCollapse = conf.getChild("tags-to-collapse").getValue(null); 163 164 if (tagsToCollapse != null) 165 { 166 _tagsToCollapse = new HashSet<>(); 167 for (String tag : tagsToCollapse.split(",")) 168 { 169 _tagsToCollapse.add(tag.trim()); 170 } 171 } 172 else 173 { 174 _tagsToCollapse = __COLLAPSE_TAGS; 175 } 176 177 // Namespaces allowed 178 Configuration[] namespacesAllowed = conf.getChildren("namespace-allowed"); 179 180 if (namespacesAllowed.length > 0) 181 { 182 _namespacesAllowed = new HashSet<>(); 183 for (Configuration namespaceAllowed : namespacesAllowed) 184 { 185 String namespace = namespaceAllowed.getValue(""); 186 _namespacesAllowed.add(namespace.trim()); 187 } 188 } 189 else if (_xhtmlSerializerExtensionPoint != null) 190 { 191 _namespacesAllowed = _xhtmlSerializerExtensionPoint.getAllowedNamespaces(); 192 } 193 else 194 { 195 _namespacesAllowed = __NAMESPACES_ALLOWED; 196 } 197 } 198 199 @Override 200 public void startPrefixMapping(String prefix, String uri) throws SAXException 201 { 202 if (_namespacesAllowed.contains(uri)) 203 { 204 super.startPrefixMapping(prefix, uri); 205 } 206 else 207 { 208 _namespacesPrefixFiltered.add(prefix); 209 } 210 } 211 212 @Override 213 public void startElement(String nsuri, String local, String qual, Attributes attributes) throws SAXException 214 { 215 if (_insideInlineResourceTag > 0) 216 { 217 if (_logger.isWarnEnabled()) 218 { 219 _logger.warn("Tags are forbidden inside a <script> or <style> tag : <" + local + ">"); 220 } 221 222 _tagsInsideInlineResourceTag++; 223 } 224 else if (_namespacesAllowed.contains(nsuri)) 225 { 226 if (_insideFilteredTag == 0) 227 { 228 super.startElement(nsuri, local, qual, _filterAttributes(attributes)); 229 } 230 } 231 else 232 { 233 // Ignore nested content as the namespace is filtered 234 _insideFilteredTag++; 235 } 236 } 237 238 private Attributes _filterAttributes(Attributes attributes) 239 { 240 AttributesImpl attributesFiltered = new AttributesImpl(); 241 242 for (int i = 0; i < attributes.getLength(); i++) 243 { 244 String uri = attributes.getURI(i); 245 246 // Filter attribute with not allowed namespace 247 if (_namespacesAllowed.contains(uri)) 248 { 249 String qName = attributes.getQName(i); 250 251 // Filter attribute xmlns and xmlns:xxx 252 if (!qName.equals("xmlns") && !qName.startsWith("xmlns:")) 253 { 254 attributesFiltered.addAttribute(uri, attributes.getLocalName(i), qName, 255 attributes.getType(i), attributes.getValue(i)); 256 } 257 } 258 } 259 260 return attributesFiltered; 261 } 262 263 @Override 264 public void startElementImpl(String uri, String local, String qual, String[][] lNamespaces, String[][] attributes) throws SAXException 265 { 266 if (local.equalsIgnoreCase(__SCRIPT_TAG) && isJsScript(local, attributes) 267 || local.equalsIgnoreCase(__STYLE_TAG)) 268 { 269 _insideInlineResourceTag++; 270 } 271 272 _isMetaContentType = isMetaContentType(local, attributes); 273 274 // Always ignore the content-type meta tag because we do not want 275 // it in non omit mode and because we create it in omit mode (see below) 276 if (!_isMetaContentType) 277 { 278 super.startElementImpl(uri, local, qual, lNamespaces, attributes); 279 } 280 281 // Create our own content-type meta tag in omit mode 282 if (_omitXmlDeclaration && local.equalsIgnoreCase(__HEAD_TAG)) 283 { 284 // Create our own meta content type element as Xalan creates one but 285 // places it in the last children (after an potential title) 286 String qua = namespaces.qualify(XHTML1_NAMESPACE, __META_TAG, __META_TAG); 287 String[][] attrs = new String[2][ATTRIBUTE_LENGTH]; 288 289 attrs[0][ATTRIBUTE_NSURI] = ""; 290 attrs[0][ATTRIBUTE_LOCAL] = __META_HTTP_EQUIV_ATTR; 291 attrs[0][ATTRIBUTE_QNAME] = __META_HTTP_EQUIV_ATTR; 292 attrs[0][ATTRIBUTE_VALUE] = __META_HTTP_EQUIV_CTYPE_VALUE; 293 attrs[1][ATTRIBUTE_NSURI] = ""; 294 attrs[1][ATTRIBUTE_LOCAL] = __META_CONTENT_ATTR; 295 attrs[1][ATTRIBUTE_QNAME] = __META_CONTENT_ATTR; 296 attrs[1][ATTRIBUTE_VALUE] = this.getMimeType(); 297 298 super.startElementImpl(XHTML1_NAMESPACE, __META_TAG, qua, new String[0][0], attrs); 299 super.endElementImpl(XHTML1_NAMESPACE, __META_TAG, qua); 300 } 301 } 302 303 @Override 304 public void characters(char[] ch, int start, int length) throws SAXException 305 { 306 if (_insideFilteredTag == 0) 307 { 308 if (_disableOutputEscaping) 309 { 310 // Close current element if necessary 311 closeElement(false); 312 // Let content pass through unchanged 313 write(ch, start, length); 314 } 315 else 316 { 317 super.characters(ch, start, length); 318 } 319 } 320 } 321 322 @Override 323 public void charactersImpl(char[] data, int start, int length) throws SAXException 324 { 325 if (_insideInlineResourceTag > 0) 326 { 327 _buffer.append(data, start, length); 328 } 329 else 330 { 331 super.charactersImpl(data, start, length); 332 } 333 } 334 335 @Override 336 public void ignorableWhitespace(char[] data, int start, int length) throws SAXException 337 { 338 if (_insideFilteredTag == 0) 339 { 340 super.ignorableWhitespace(data, start, length); 341 } 342 } 343 344 @Override 345 public void comment(char[] data, int start, int length) throws SAXException 346 { 347 if (_insideFilteredTag == 0) 348 { 349 if (_insideInlineResourceTag > 0) 350 { 351 _buffer.append(data, start, length); 352 } 353 else 354 { 355 super.comment(data, start, length); 356 } 357 } 358 } 359 360 @Override 361 public void processingInstruction(String target, String data) throws SAXException 362 { 363 if (_insideFilteredTag == 0) 364 { 365 if (Result.PI_DISABLE_OUTPUT_ESCAPING.equals(target)) 366 { 367 // Start unescaping 368 _disableOutputEscaping = true; 369 } 370 else if (Result.PI_ENABLE_OUTPUT_ESCAPING.equals(target)) 371 { 372 // Stop unescapping 373 _disableOutputEscaping = false; 374 } 375 else 376 { 377 super.processingInstruction(target, data); 378 } 379 } 380 } 381 382 @Override 383 public void endElement(String nsuri, String local, String qual) throws SAXException 384 { 385 if (_tagsInsideInlineResourceTag > 0) 386 { 387 _tagsInsideInlineResourceTag--; 388 } 389 else if (_namespacesAllowed.contains(nsuri)) 390 { 391 if (_insideFilteredTag == 0) 392 { 393 super.endElement(nsuri, local, qual); 394 } 395 } 396 else 397 { 398 // Finish to ignore parsed nested content as the namespace is filtered 399 _insideFilteredTag--; 400 } 401 } 402 403 @Override 404 public void endElementImpl(String uri, String local, String qual) throws SAXException 405 { 406 String namespaceUri = uri; 407 if (uri.length() == 0) 408 { 409 namespaceUri = XHTML1_NAMESPACE; 410 } 411 412 if (local.equalsIgnoreCase(__SCRIPT_TAG) && _insideInlineResourceTag > 0) 413 { 414 _insideInlineResourceTag--; 415 if (_buffer.length() > 0) 416 { 417 char[] content = new char[_buffer.length() + 5]; 418 content[0] = '\n'; 419 content[content.length - 4] = '\n'; 420 content[content.length - 3] = '/'; 421 content[content.length - 2] = '/'; 422 content[content.length - 1] = ' '; 423 _buffer.getChars(0, _buffer.length(), content, 1); 424 _buffer.setLength(0); 425 super.comment(content, 0, content.length); 426 } 427 } 428 else if (local.equalsIgnoreCase(__STYLE_TAG)) 429 { 430 _insideInlineResourceTag--; 431 if (_buffer.length() > 0) 432 { 433 char[] content = new char[_buffer.length() + 2]; 434 content[0] = '\n'; 435 content[content.length - 1] = '\n'; 436 _buffer.getChars(0, _buffer.length(), content, 1); 437 _buffer.setLength(0); 438 super.comment(content, 0, content.length); 439 } 440 } 441 442 if (XHTML1_NAMESPACE.equals(namespaceUri)) 443 { 444 // If the element is not in the list of the tags to collapse, close it without collapsing 445 if (!_tagsToCollapse.contains(local)) 446 { 447 this.closeElement(false); 448 } 449 } 450 451 // Ignore the content-type meta tag, see startElementImpl 452 if (!_isMetaContentType) 453 { 454 super.endElementImpl(namespaceUri, local, qual); 455 } 456 else 457 { 458 _isMetaContentType = false; 459 } 460 } 461 462 @Override 463 public void endPrefixMapping(String prefix) throws SAXException 464 { 465 if (!_namespacesPrefixFiltered.contains(prefix)) 466 { 467 super.endPrefixMapping(prefix); 468 } 469 } 470 471 private boolean isMetaContentType(String local, String[][] attributes) 472 { 473 if (local.equalsIgnoreCase(__META_TAG)) 474 { 475 for (String[] attr : attributes) 476 { 477 if (attr[ATTRIBUTE_LOCAL].equalsIgnoreCase(__META_HTTP_EQUIV_ATTR) 478 && attr[ATTRIBUTE_VALUE].equalsIgnoreCase(__META_HTTP_EQUIV_CTYPE_VALUE)) 479 { 480 return true; 481 } 482 } 483 } 484 return false; 485 } 486 487 private boolean isJsScript(String local, String[][] attributes) 488 { 489 boolean hasTypeAttr = false; 490 491 if (local.equalsIgnoreCase(__SCRIPT_TAG)) 492 { 493 for (String[] attr : attributes) 494 { 495 if (attr[ATTRIBUTE_LOCAL].equalsIgnoreCase("type")) 496 { 497 hasTypeAttr = true; 498 499 if (attr[ATTRIBUTE_VALUE].equalsIgnoreCase("text/javascript")) 500 { 501 return true; 502 } 503 } 504 } 505 } 506 507 return !hasTypeAttr; 508 } 509 510 @Override 511 public void recycle() 512 { 513 super.recycle(); 514 515 if (_buffer == null) 516 { 517 _buffer = new StringBuilder(512); 518 } 519 else 520 { 521 if (_buffer.capacity() > 100 * 1024) 522 { 523 // Garbage collect previous buffer is it exceed 100 Kb 524 _buffer = new StringBuilder(512); 525 } 526 else 527 { 528 // Clear buffer but keep capacity 529 _buffer.setLength(0); 530 } 531 } 532 533 // Clear parsing state aware attributes 534 if (_namespacesPrefixFiltered == null) 535 { 536 _namespacesPrefixFiltered = new HashSet<>(); 537 } 538 else 539 { 540 _namespacesPrefixFiltered.clear(); 541 } 542 _insideFilteredTag = 0; 543 _insideInlineResourceTag = 0; 544 _disableOutputEscaping = false; 545 _isMetaContentType = false; 546 } 547}