001/* 002 * Copyright 2011 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.glossary.transformation; 017 018import java.util.Collections; 019import java.util.HashMap; 020import java.util.HashSet; 021import java.util.Iterator; 022import java.util.Locale; 023import java.util.Map; 024import java.util.Set; 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import org.apache.avalon.framework.component.Component; 029import org.apache.avalon.framework.context.Context; 030import org.apache.avalon.framework.context.ContextException; 031import org.apache.avalon.framework.context.Contextualizable; 032import org.apache.avalon.framework.service.ServiceException; 033import org.apache.avalon.framework.service.ServiceManager; 034import org.apache.avalon.framework.service.Serviceable; 035import org.apache.cocoon.components.ContextHelper; 036import org.apache.cocoon.environment.Request; 037import org.apache.cocoon.transformation.I18nTransformer; 038import org.apache.cocoon.xml.AttributesImpl; 039import org.apache.cocoon.xml.XMLUtils; 040import org.apache.commons.lang.StringUtils; 041import org.xml.sax.Attributes; 042import org.xml.sax.SAXException; 043 044import org.ametys.cms.repository.Content; 045import org.ametys.cms.transformation.AbstractEnhancementHandler; 046import org.ametys.cms.transformation.URIResolverExtensionPoint; 047import org.ametys.plugins.glossary.DefaultDefinition; 048import org.ametys.plugins.glossary.Definition; 049import org.ametys.plugins.glossary.GlossaryHelper; 050import org.ametys.plugins.repository.AmetysObjectIterable; 051import org.ametys.plugins.repository.AmetysObjectResolver; 052import org.ametys.plugins.repository.TraversableAmetysObject; 053import org.ametys.plugins.repository.query.expression.Expression; 054import org.ametys.plugins.repository.query.expression.Expression.Operator; 055import org.ametys.web.renderingcontext.RenderingContext; 056import org.ametys.web.renderingcontext.RenderingContextHandler; 057import org.ametys.web.repository.content.WebContent; 058import org.ametys.web.repository.page.Page; 059import org.ametys.web.repository.page.PageQueryHelper; 060import org.ametys.web.repository.site.SiteManager; 061import org.ametys.web.tags.TagExpression; 062 063/** 064 * Definition enhancement handler. 065 */ 066public class DefinitionEnhancementHandler extends AbstractEnhancementHandler implements Component, Serviceable, Contextualizable 067{ 068 /** The Avalon role. */ 069 public static final String ROLE = DefinitionEnhancementHandler.class.getName(); 070 071 /** The tags in which to ignore the glossary words. */ 072 private static final Set<String> __IGNORE_TAGS = new HashSet<>(); 073 static 074 { 075 __IGNORE_TAGS.add("head"); 076 __IGNORE_TAGS.add("script"); 077 __IGNORE_TAGS.add("style"); 078 __IGNORE_TAGS.add("option"); 079 __IGNORE_TAGS.add("a"); 080 __IGNORE_TAGS.add("h1"); 081 __IGNORE_TAGS.add("h2"); 082 __IGNORE_TAGS.add("h3"); 083 __IGNORE_TAGS.add("h4"); 084 __IGNORE_TAGS.add("h5"); 085 __IGNORE_TAGS.add("h6"); 086 } 087 088 /** The namespaces in which to ignore the glossary words. */ 089 private static final Set<String> __IGNORE_NAMESPACES = new HashSet<>(); 090 static 091 { 092 __IGNORE_NAMESPACES.add(I18nTransformer.I18N_NAMESPACE_URI); 093 } 094 095 /** The ametys object resolver. */ 096 protected AmetysObjectResolver _resolver; 097 098 /** The site manager */ 099 protected SiteManager _siteManager; 100 101 /** The page URI resolver. */ 102 protected URIResolverExtensionPoint _uriResolver; 103 104 /** The avalon context. */ 105 protected Context _context; 106 107 /** The word definitions. */ 108 protected Map<String, Definition> _definitions; 109 110 /** The glossary page href. */ 111 protected String _glossaryHref; 112 113 /** Ignored namespace stack. */ 114 protected Map<String, Integer> _ignoredNamespaceStack; 115 116 /** True if we are processing a paragraph, false otherwise. */ 117 private int _inIgnoredTag; 118 119 private RenderingContextHandler _renderingContextHandler; 120 121 @Override 122 public void service(ServiceManager serviceManager) throws ServiceException 123 { 124 _resolver = (AmetysObjectResolver) serviceManager.lookup(AmetysObjectResolver.ROLE); 125 _uriResolver = (URIResolverExtensionPoint) serviceManager.lookup(URIResolverExtensionPoint.ROLE); 126 _siteManager = (SiteManager) serviceManager.lookup(SiteManager.ROLE); 127 _renderingContextHandler = (RenderingContextHandler) serviceManager.lookup(RenderingContextHandler.ROLE); 128 } 129 130 @Override 131 public void contextualize(Context context) throws ContextException 132 { 133 _context = context; 134 } 135 136 @Override 137 public void startDocument() throws SAXException 138 { 139 super.startDocument(); 140 141 _glossaryHref = null; 142 _inIgnoredTag = 0; 143 144 _ignoredNamespaceStack = new HashMap<>(); 145 for (String ignoredNamespace : __IGNORE_NAMESPACES) 146 { 147 _ignoredNamespaceStack.put(ignoredNamespace, 0); 148 } 149 } 150 151 @Override 152 public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException 153 { 154 super.startElement(uri, localName, qName, atts); 155 156 if (__IGNORE_TAGS.contains(localName.toLowerCase())) 157 { 158 _inIgnoredTag++; 159 } 160 if (__IGNORE_NAMESPACES.contains(uri)) 161 { 162 _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) + 1); 163 } 164 } 165 166 @Override 167 public void endElement(String uri, String localName, String qName) throws SAXException 168 { 169 if (__IGNORE_NAMESPACES.contains(uri)) 170 { 171 _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) - 1); 172 } 173 if (__IGNORE_TAGS.contains(localName.toLowerCase())) 174 { 175 _inIgnoredTag--; 176 } 177 178 super.endElement(uri, localName, qName); 179 } 180 181 @Override 182 public void characters(char[] ch, int start, int length) throws SAXException 183 { 184 if (_searchCharacters()) 185 { 186 Request request = ContextHelper.getRequest(_context); 187 Content content = (Content) request.getAttribute(Content.class.getName()); 188 _charactersWithDefinitions(ch, start, length, content); 189 } 190 else 191 { 192 super.characters(ch, start, length); 193 } 194 } 195 196 /** 197 * Test if the currently processed characters have to be searched for glossary words or ignored. 198 * @return true to search the characters for glossary words, false to ignore. 199 */ 200 protected boolean _searchCharacters() 201 { 202 // Test if we are in a tag to ignore. 203 boolean search = _inIgnoredTag < 1 && !_inUnmodifiableContent; 204 205 // Test if we are in a tag which has a namespace to ignore. 206 Iterator<Integer> nsIt = _ignoredNamespaceStack.values().iterator(); 207 while (nsIt.hasNext() && search) 208 { 209 // If the "namespace level" is greater than 0, we are in an ignored namespace. 210 if (nsIt.next() > 0) 211 { 212 search = false; 213 } 214 } 215 216 return search; 217 } 218 219 /** 220 * SAX characters, generating definition tags on words that are present in the glossary. 221 * @param ch the characters from the XML document. 222 * @param start the start position in the array. 223 * @param length the number of characters to read from the array. 224 * @param content the content. 225 * @throws SAXException if an error occurs generating the XML. 226 */ 227 protected void _charactersWithDefinitions(char[] ch, int start, int length, Content content) throws SAXException 228 { 229 Request request = ContextHelper.getRequest(_context); 230 231 // Get site name and language. 232 String siteName = null; 233 if (content instanceof WebContent) 234 { 235 siteName = ((WebContent) content).getSiteName(); 236 } 237 if (siteName == null) 238 { 239 siteName = (String) request.getAttribute("site"); 240 } 241 String language = content.getLanguage(); 242 if (language == null) 243 { 244 language = (String) request.getAttribute("sitemapLanguage"); 245 } 246 247 if (language == null) 248 { 249 Map objectModel = ContextHelper.getObjectModel(_context); 250 language = org.apache.cocoon.i18n.I18nUtils.findLocale(objectModel, "locale", null, Locale.getDefault(), true).getLanguage(); 251 } 252 253 // Build a pattern to detect the glossary words. 254 Map<String, Definition> words = _getDefinitions(siteName, language); 255 if (!words.isEmpty()) 256 { 257 Pattern pattern = _getWordsPattern(words.keySet()); 258 259 // Match the pattern. 260 String str = new String(ch, start, length); 261 Matcher matcher = pattern.matcher(str); 262 263 int previousMatch = start; 264 while (matcher.find()) 265 { 266 // Get a link on the tagged glossary page. 267 String pageLink = _getGlossaryPageHref(siteName, language); 268 269 int startIndex = matcher.start(); 270 int endIndex = matcher.end(); 271 272 int wordIndex = start + startIndex; 273 274 // The matched word. 275 String word = str.substring(startIndex, endIndex).toLowerCase(); 276 Definition definition = words.get(word); 277 278 // A null definition (word not present in the map) should never happen, but protect anyway. 279 if (definition != null && StringUtils.isNotEmpty(word)) 280 { 281 String defContent = definition.getContent(); 282 283 AttributesImpl attrs = new AttributesImpl(); 284 attrs.addCDATAAttribute("title", defContent); 285 286 // Generate all the characters until the matched word. 287 super.characters(ch, previousMatch, wordIndex - previousMatch); 288 289 // Generate the definition tag. 290 XMLUtils.startElement(_contentHandler, "dfn", attrs); 291 292 // If a glossary page is tagged in this site and sitemap, generate a link to it. 293 if (StringUtils.isNotEmpty(pageLink)) 294 { 295 RenderingContext currentContext = _renderingContextHandler.getRenderingContext(); 296 if (!(currentContext == RenderingContext.BACK)) 297 { 298 pageLink = pageLink + "?letter=" + word.charAt(0) + "#" + definition.getWord(); 299 } 300 301 AttributesImpl linkAttrs = new AttributesImpl(); 302 linkAttrs.addCDATAAttribute("href", pageLink); 303 XMLUtils.startElement(_contentHandler, "a", linkAttrs); 304 } 305 306 // Generate the word itself. 307 super.characters(ch, wordIndex, endIndex - startIndex); 308 309 if (StringUtils.isNotEmpty(pageLink)) 310 { 311 XMLUtils.endElement(_contentHandler, "a"); 312 } 313 314 XMLUtils.endElement(_contentHandler, "dfn"); 315 } 316 317 previousMatch = start + endIndex; 318 } 319 320 // Generate the end of the input. This will generate the whole input, unchanged, 321 // if no glossary word was present. 322 super.characters(ch, previousMatch, start + length - previousMatch); 323 } 324 else 325 { 326 super.characters(ch, start, length); 327 } 328 } 329 330 /** 331 * Get all the words with definitions to display. 332 * @param siteName the site name. 333 * @param lang the language. 334 * @return an exhaustive set of the words. 335 */ 336 protected Map<String, Definition> _getDefinitions(String siteName, String lang) 337 { 338 if (_definitions == null) 339 { 340 _definitions = _getWordsAndDefinitions(siteName, lang); 341 } 342 343 return Collections.unmodifiableMap(_definitions); 344 } 345 346 /** 347 * Get all the words with definitions to display. 348 * @param siteName the site name. 349 * @param lang the language. 350 * @return an exhaustive set of the words. 351 */ 352 protected Map<String, Definition> _getWordsAndDefinitions(String siteName, String lang) 353 { 354 Map<String, Definition> words = new HashMap<>(); 355 356 TraversableAmetysObject definitionsNode = GlossaryHelper.getDefinitionsNode(_siteManager.getSite(siteName), lang); 357 AmetysObjectIterable<DefaultDefinition> definitions = definitionsNode.getChildren(); 358 359 for (DefaultDefinition definition : definitions) 360 { 361 if (definition.displayOnText()) 362 { 363 for (String word : definition.getAllForms()) 364 { 365 words.put(word.toLowerCase(), definition); 366 } 367 } 368 } 369 370 return words; 371 } 372 373 /** 374 * Get a regexp that matches any of the definition words. 375 * @param words the words. 376 * @return the pattern. 377 */ 378 protected Pattern _getWordsPattern(Set<String> words) 379 { 380 StringBuilder pattern = new StringBuilder(); 381 pattern.append("\\b(?:"); 382 383 Iterator<String> wordIt = words.iterator(); 384 for (int i = 0; wordIt.hasNext(); i++) 385 { 386 if (i > 0) 387 { 388 pattern.append('|'); 389 } 390 391 // Quote 392 pattern.append("\\Q").append(wordIt.next()).append("\\E"); 393 } 394 395 pattern.append(")\\b"); 396 397 return Pattern.compile(pattern.toString(), Pattern.CASE_INSENSITIVE); 398 } 399 400 /** 401 * Get the glossary page in a given site and language. 402 * @param siteName the site name. 403 * @param language the language. 404 * @return the glossary page. 405 */ 406 protected String _getGlossaryPageHref(String siteName, String language) 407 { 408 if (_glossaryHref == null) 409 { 410 Page glossaryPage = _getGlossaryPage(siteName, language); 411 if (glossaryPage != null) 412 { 413 // FIXME CMS-2611 Force absolute 414 Request request = ContextHelper.getRequest(_context); 415 boolean absolute = request.getAttribute("forceAbsoluteUrl") != null ? (Boolean) request.getAttribute("forceAbsoluteUrl") : false; 416 417 _glossaryHref = _uriResolver.getResolverForType("page").resolve(glossaryPage.getId(), false, absolute, false); 418 } 419 else 420 { 421 _glossaryHref = ""; 422 } 423 } 424 425 return _glossaryHref; 426 } 427 428 /** 429 * Get the glossary page in a given site and language. 430 * @param siteName the site name. 431 * @param language the language. 432 * @return the glossary page. 433 */ 434 protected Page _getGlossaryPage(String siteName, String language) 435 { 436 Page page = null; 437 438 Expression glossaryExpr = new TagExpression(Operator.EQ, GlossaryHelper.GLOSSARY_PAGE_TAG); 439 String xpath = PageQueryHelper.getPageXPathQuery(siteName, language, null, glossaryExpr, null); 440 441 try (AmetysObjectIterable<Page> pages = _resolver.query(xpath);) 442 { 443 Iterator<Page> it = pages.iterator(); 444 if (it.hasNext()) 445 { 446 page = it.next(); 447 448 if (it.hasNext()) 449 { 450 getLogger().warn(String.format("More than one page is tagged 'GLOSSARY' in site %s and sitemap %s, please tag a single page.", siteName, language)); 451 } 452 } 453 } 454 455 return page; 456 } 457}