001/* 002 * Copyright 2011 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.glossary.transformation; 017 018import java.util.Collections; 019import java.util.HashMap; 020import java.util.HashSet; 021import java.util.Iterator; 022import java.util.Locale; 023import java.util.Map; 024import java.util.Set; 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import org.apache.avalon.framework.component.Component; 029import org.apache.avalon.framework.context.Context; 030import org.apache.avalon.framework.context.ContextException; 031import org.apache.avalon.framework.context.Contextualizable; 032import org.apache.avalon.framework.service.ServiceException; 033import org.apache.avalon.framework.service.ServiceManager; 034import org.apache.avalon.framework.service.Serviceable; 035import org.apache.cocoon.components.ContextHelper; 036import org.apache.cocoon.environment.Request; 037import org.apache.cocoon.transformation.I18nTransformer; 038import org.apache.cocoon.xml.AttributesImpl; 039import org.apache.cocoon.xml.XMLUtils; 040import org.apache.commons.lang.StringUtils; 041import org.xml.sax.Attributes; 042import org.xml.sax.SAXException; 043 044import org.ametys.cms.repository.Content; 045import org.ametys.cms.transformation.AbstractEnhancementHandler; 046import org.ametys.cms.transformation.URIResolverExtensionPoint; 047import org.ametys.plugins.glossary.DefaultDefinition; 048import org.ametys.plugins.glossary.Definition; 049import org.ametys.plugins.glossary.GlossaryHelper; 050import org.ametys.plugins.repository.AmetysObjectIterable; 051import org.ametys.plugins.repository.AmetysObjectResolver; 052import org.ametys.plugins.repository.TraversableAmetysObject; 053import org.ametys.plugins.repository.query.expression.Expression; 054import org.ametys.plugins.repository.query.expression.Expression.Operator; 055import org.ametys.web.WebHelper; 056import org.ametys.web.renderingcontext.RenderingContext; 057import org.ametys.web.renderingcontext.RenderingContextHandler; 058import org.ametys.web.repository.page.Page; 059import org.ametys.web.repository.page.PageQueryHelper; 060import org.ametys.web.repository.site.SiteManager; 061import org.ametys.web.tags.TagExpression; 062 063/** 064 * Definition enhancement handler. 065 */ 066public class DefinitionEnhancementHandler extends AbstractEnhancementHandler implements Component, Serviceable, Contextualizable 067{ 068 /** The Avalon role. */ 069 public static final String ROLE = DefinitionEnhancementHandler.class.getName(); 070 071 /** The tags in which to ignore the glossary words. */ 072 private static final Set<String> __IGNORE_TAGS = new HashSet<>(); 073 static 074 { 075 __IGNORE_TAGS.add("head"); 076 __IGNORE_TAGS.add("script"); 077 __IGNORE_TAGS.add("style"); 078 __IGNORE_TAGS.add("option"); 079 __IGNORE_TAGS.add("a"); 080 __IGNORE_TAGS.add("h1"); 081 __IGNORE_TAGS.add("h2"); 082 __IGNORE_TAGS.add("h3"); 083 __IGNORE_TAGS.add("h4"); 084 __IGNORE_TAGS.add("h5"); 085 __IGNORE_TAGS.add("h6"); 086 } 087 088 /** The namespaces in which to ignore the glossary words. */ 089 private static final Set<String> __IGNORE_NAMESPACES = new HashSet<>(); 090 static 091 { 092 __IGNORE_NAMESPACES.add(I18nTransformer.I18N_NAMESPACE_URI); 093 } 094 095 /** The ametys object resolver. */ 096 protected AmetysObjectResolver _resolver; 097 098 /** The site manager */ 099 protected SiteManager _siteManager; 100 101 /** The page URI resolver. */ 102 protected URIResolverExtensionPoint _uriResolver; 103 104 /** The avalon context. */ 105 protected Context _context; 106 107 /** The word definitions. */ 108 protected Map<String, Definition> _definitions; 109 110 /** The glossary page href. */ 111 protected String _glossaryHref; 112 113 /** Ignored namespace stack. */ 114 protected Map<String, Integer> _ignoredNamespaceStack; 115 116 /** True if we are processing a paragraph, false otherwise. */ 117 private int _inIgnoredTag; 118 119 private RenderingContextHandler _renderingContextHandler; 120 121 @Override 122 public void service(ServiceManager serviceManager) throws ServiceException 123 { 124 _resolver = (AmetysObjectResolver) serviceManager.lookup(AmetysObjectResolver.ROLE); 125 _uriResolver = (URIResolverExtensionPoint) serviceManager.lookup(URIResolverExtensionPoint.ROLE); 126 _siteManager = (SiteManager) serviceManager.lookup(SiteManager.ROLE); 127 _renderingContextHandler = (RenderingContextHandler) serviceManager.lookup(RenderingContextHandler.ROLE); 128 } 129 130 @Override 131 public void contextualize(Context context) throws ContextException 132 { 133 _context = context; 134 } 135 136 @Override 137 public void startDocument() throws SAXException 138 { 139 super.startDocument(); 140 141 _glossaryHref = null; 142 _inIgnoredTag = 0; 143 144 _ignoredNamespaceStack = new HashMap<>(); 145 for (String ignoredNamespace : __IGNORE_NAMESPACES) 146 { 147 _ignoredNamespaceStack.put(ignoredNamespace, 0); 148 } 149 } 150 151 @Override 152 public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException 153 { 154 super.startElement(uri, localName, qName, atts); 155 156 if (__IGNORE_TAGS.contains(localName.toLowerCase())) 157 { 158 _inIgnoredTag++; 159 } 160 if (__IGNORE_NAMESPACES.contains(uri)) 161 { 162 _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) + 1); 163 } 164 } 165 166 @Override 167 public void endElement(String uri, String localName, String qName) throws SAXException 168 { 169 if (__IGNORE_NAMESPACES.contains(uri)) 170 { 171 _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) - 1); 172 } 173 if (__IGNORE_TAGS.contains(localName.toLowerCase())) 174 { 175 _inIgnoredTag--; 176 } 177 178 super.endElement(uri, localName, qName); 179 } 180 181 @Override 182 public void characters(char[] ch, int start, int length) throws SAXException 183 { 184 if (_searchCharacters()) 185 { 186 Request request = ContextHelper.getRequest(_context); 187 Content content = (Content) request.getAttribute(Content.class.getName()); 188 _charactersWithDefinitions(ch, start, length, content); 189 } 190 else 191 { 192 super.characters(ch, start, length); 193 } 194 } 195 196 /** 197 * Test if the currently processed characters have to be searched for glossary words or ignored. 198 * @return true to search the characters for glossary words, false to ignore. 199 */ 200 protected boolean _searchCharacters() 201 { 202 // Test if we are in a tag to ignore. 203 boolean search = _inIgnoredTag < 1 && !_inUnmodifiableContent; 204 205 // Test if we are in a tag which has a namespace to ignore. 206 Iterator<Integer> nsIt = _ignoredNamespaceStack.values().iterator(); 207 while (nsIt.hasNext() && search) 208 { 209 // If the "namespace level" is greater than 0, we are in an ignored namespace. 210 if (nsIt.next() > 0) 211 { 212 search = false; 213 } 214 } 215 216 return search; 217 } 218 219 /** 220 * SAX characters, generating definition tags on words that are present in the glossary. 221 * @param ch the characters from the XML document. 222 * @param start the start position in the array. 223 * @param length the number of characters to read from the array. 224 * @param content the content. 225 * @throws SAXException if an error occurs generating the XML. 226 */ 227 protected void _charactersWithDefinitions(char[] ch, int start, int length, Content content) throws SAXException 228 { 229 Request request = ContextHelper.getRequest(_context); 230 231 // Get site name and language. 232 String siteName = WebHelper.getSiteName(request, content); 233 234 String language = content.getLanguage(); 235 if (language == null) 236 { 237 language = (String) request.getAttribute("sitemapLanguage"); 238 } 239 240 if (language == null) 241 { 242 Map objectModel = ContextHelper.getObjectModel(_context); 243 language = org.apache.cocoon.i18n.I18nUtils.findLocale(objectModel, "locale", null, Locale.getDefault(), true).getLanguage(); 244 } 245 246 // Build a pattern to detect the glossary words. 247 Map<String, Definition> words = _getDefinitions(siteName, language); 248 if (!words.isEmpty()) 249 { 250 Pattern pattern = _getWordsPattern(words.keySet()); 251 252 // Match the pattern. 253 String str = new String(ch, start, length); 254 Matcher matcher = pattern.matcher(str); 255 256 int previousMatch = start; 257 while (matcher.find()) 258 { 259 // Get a link on the tagged glossary page. 260 String pageLink = _getGlossaryPageHref(siteName, language); 261 262 int startIndex = matcher.start(); 263 int endIndex = matcher.end(); 264 265 int wordIndex = start + startIndex; 266 267 // The matched word. 268 String word = str.substring(startIndex, endIndex).toLowerCase(); 269 Definition definition = words.get(word); 270 271 // A null definition (word not present in the map) should never happen, but protect anyway. 272 if (definition != null && StringUtils.isNotEmpty(word)) 273 { 274 String defContent = definition.getContent(); 275 276 AttributesImpl attrs = new AttributesImpl(); 277 attrs.addCDATAAttribute("title", defContent); 278 279 // Generate all the characters until the matched word. 280 super.characters(ch, previousMatch, wordIndex - previousMatch); 281 282 // Generate the definition tag. 283 XMLUtils.startElement(_contentHandler, "dfn", attrs); 284 285 // If a glossary page is tagged in this site and sitemap, generate a link to it. 286 if (StringUtils.isNotEmpty(pageLink)) 287 { 288 RenderingContext currentContext = _renderingContextHandler.getRenderingContext(); 289 if (!(currentContext == RenderingContext.BACK)) 290 { 291 pageLink = pageLink + "?letter=" + word.charAt(0) + "#" + definition.getWord(); 292 } 293 294 AttributesImpl linkAttrs = new AttributesImpl(); 295 linkAttrs.addCDATAAttribute("href", pageLink); 296 XMLUtils.startElement(_contentHandler, "a", linkAttrs); 297 } 298 299 // Generate the word itself. 300 super.characters(ch, wordIndex, endIndex - startIndex); 301 302 if (StringUtils.isNotEmpty(pageLink)) 303 { 304 XMLUtils.endElement(_contentHandler, "a"); 305 } 306 307 XMLUtils.endElement(_contentHandler, "dfn"); 308 } 309 310 previousMatch = start + endIndex; 311 } 312 313 // Generate the end of the input. This will generate the whole input, unchanged, 314 // if no glossary word was present. 315 super.characters(ch, previousMatch, start + length - previousMatch); 316 } 317 else 318 { 319 super.characters(ch, start, length); 320 } 321 } 322 323 /** 324 * Get all the words with definitions to display. 325 * @param siteName the site name. 326 * @param lang the language. 327 * @return an exhaustive set of the words. 328 */ 329 protected Map<String, Definition> _getDefinitions(String siteName, String lang) 330 { 331 if (_definitions == null) 332 { 333 _definitions = _getWordsAndDefinitions(siteName, lang); 334 } 335 336 return Collections.unmodifiableMap(_definitions); 337 } 338 339 /** 340 * Get all the words with definitions to display. 341 * @param siteName the site name. 342 * @param lang the language. 343 * @return an exhaustive set of the words. 344 */ 345 protected Map<String, Definition> _getWordsAndDefinitions(String siteName, String lang) 346 { 347 Map<String, Definition> words = new HashMap<>(); 348 349 TraversableAmetysObject definitionsNode = GlossaryHelper.getDefinitionsNode(_siteManager.getSite(siteName), lang); 350 AmetysObjectIterable<DefaultDefinition> definitions = definitionsNode.getChildren(); 351 352 for (DefaultDefinition definition : definitions) 353 { 354 if (definition.displayOnText()) 355 { 356 for (String word : definition.getAllForms()) 357 { 358 words.put(word.toLowerCase(), definition); 359 } 360 } 361 } 362 363 return words; 364 } 365 366 /** 367 * Get a regexp that matches any of the definition words. 368 * @param words the words. 369 * @return the pattern. 370 */ 371 protected Pattern _getWordsPattern(Set<String> words) 372 { 373 StringBuilder pattern = new StringBuilder(); 374 pattern.append("\\b(?:"); 375 376 Iterator<String> wordIt = words.iterator(); 377 for (int i = 0; wordIt.hasNext(); i++) 378 { 379 if (i > 0) 380 { 381 pattern.append('|'); 382 } 383 384 // Quote 385 pattern.append("\\Q").append(wordIt.next()).append("\\E"); 386 } 387 388 pattern.append(")\\b"); 389 390 return Pattern.compile(pattern.toString(), Pattern.CASE_INSENSITIVE); 391 } 392 393 /** 394 * Get the glossary page in a given site and language. 395 * @param siteName the site name. 396 * @param language the language. 397 * @return the glossary page. 398 */ 399 protected String _getGlossaryPageHref(String siteName, String language) 400 { 401 if (_glossaryHref == null) 402 { 403 Page glossaryPage = _getGlossaryPage(siteName, language); 404 if (glossaryPage != null) 405 { 406 // FIXME CMS-2611 Force absolute 407 Request request = ContextHelper.getRequest(_context); 408 boolean absolute = request.getAttribute("forceAbsoluteUrl") != null ? (Boolean) request.getAttribute("forceAbsoluteUrl") : false; 409 410 _glossaryHref = _uriResolver.getResolverForType("page").resolve(glossaryPage.getId(), false, absolute, false); 411 } 412 else 413 { 414 _glossaryHref = ""; 415 } 416 } 417 418 return _glossaryHref; 419 } 420 421 /** 422 * Get the glossary page in a given site and language. 423 * @param siteName the site name. 424 * @param language the language. 425 * @return the glossary page. 426 */ 427 protected Page _getGlossaryPage(String siteName, String language) 428 { 429 Page page = null; 430 431 Expression glossaryExpr = new TagExpression(Operator.EQ, GlossaryHelper.GLOSSARY_PAGE_TAG); 432 String xpath = PageQueryHelper.getPageXPathQuery(siteName, language, null, glossaryExpr, null); 433 434 try (AmetysObjectIterable<Page> pages = _resolver.query(xpath);) 435 { 436 Iterator<Page> it = pages.iterator(); 437 if (it.hasNext()) 438 { 439 page = it.next(); 440 441 if (it.hasNext()) 442 { 443 getLogger().warn(String.format("More than one page is tagged 'GLOSSARY' in site %s and sitemap %s, please tag a single page.", siteName, language)); 444 } 445 } 446 } 447 448 return page; 449 } 450}