001/* 002 * Copyright 2011 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.glossary.transformation; 017 018import java.util.Collections; 019import java.util.HashMap; 020import java.util.HashSet; 021import java.util.Iterator; 022import java.util.Map; 023import java.util.Set; 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026 027import org.apache.avalon.framework.component.Component; 028import org.apache.avalon.framework.context.Context; 029import org.apache.avalon.framework.context.ContextException; 030import org.apache.avalon.framework.context.Contextualizable; 031import org.apache.avalon.framework.service.ServiceException; 032import org.apache.avalon.framework.service.ServiceManager; 033import org.apache.avalon.framework.service.Serviceable; 034import org.apache.cocoon.components.ContextHelper; 035import org.apache.cocoon.environment.Request; 036import org.apache.cocoon.transformation.I18nTransformer; 037import org.apache.cocoon.xml.AttributesImpl; 038import org.apache.cocoon.xml.XMLUtils; 039import org.apache.commons.lang.StringUtils; 040import org.xml.sax.Attributes; 041import org.xml.sax.SAXException; 042 043import org.ametys.cms.repository.Content; 044import org.ametys.cms.transformation.AbstractEnhancementHandler; 045import org.ametys.cms.transformation.URIResolverExtensionPoint; 046import org.ametys.plugins.glossary.DefaultDefinition; 047import org.ametys.plugins.glossary.Definition; 048import org.ametys.plugins.glossary.GlossaryHelper; 049import org.ametys.plugins.repository.AmetysObjectIterable; 050import org.ametys.plugins.repository.AmetysObjectResolver; 051import org.ametys.plugins.repository.TraversableAmetysObject; 052import org.ametys.plugins.repository.query.expression.Expression; 053import org.ametys.plugins.repository.query.expression.Expression.Operator; 054import org.ametys.web.renderingcontext.RenderingContext; 055import org.ametys.web.renderingcontext.RenderingContextHandler; 056import org.ametys.web.repository.content.WebContent; 057import org.ametys.web.repository.page.Page; 058import org.ametys.web.repository.page.PageQueryHelper; 059import org.ametys.web.repository.site.SiteManager; 060import org.ametys.web.tags.TagExpression; 061 062/** 063 * Definition enhancement handler. 064 */ 065public class DefinitionEnhancementHandler extends AbstractEnhancementHandler implements Component, Serviceable, Contextualizable 066{ 067 /** The Avalon role. */ 068 public static final String ROLE = DefinitionEnhancementHandler.class.getName(); 069 070 /** The tags in which to ignore the glossary words. */ 071 private static final Set<String> __IGNORE_TAGS = new HashSet<>(); 072 static 073 { 074 __IGNORE_TAGS.add("head"); 075 __IGNORE_TAGS.add("script"); 076 __IGNORE_TAGS.add("style"); 077 __IGNORE_TAGS.add("option"); 078 __IGNORE_TAGS.add("a"); 079 __IGNORE_TAGS.add("h1"); 080 __IGNORE_TAGS.add("h2"); 081 __IGNORE_TAGS.add("h3"); 082 __IGNORE_TAGS.add("h4"); 083 __IGNORE_TAGS.add("h5"); 084 __IGNORE_TAGS.add("h6"); 085 } 086 087 /** The namespaces in which to ignore the glossary words. */ 088 private static final Set<String> __IGNORE_NAMESPACES = new HashSet<>(); 089 static 090 { 091 __IGNORE_NAMESPACES.add(I18nTransformer.I18N_NAMESPACE_URI); 092 } 093 094 /** The ametys object resolver. */ 095 protected AmetysObjectResolver _resolver; 096 097 /** The site manager */ 098 protected SiteManager _siteManager; 099 100 /** The page URI resolver. */ 101 protected URIResolverExtensionPoint _uriResolver; 102 103 /** The avalon context. */ 104 protected Context _context; 105 106 /** The word definitions. */ 107 protected Map<String, Definition> _definitions; 108 109 /** The glossary page href. */ 110 protected String _glossaryHref; 111 112 /** Ignored namespace stack. */ 113 protected Map<String, Integer> _ignoredNamespaceStack; 114 115 /** True if we are processing a paragraph, false otherwise. */ 116 private int _inIgnoredTag; 117 118 private RenderingContextHandler _renderingContextHandler; 119 120 @Override 121 public void service(ServiceManager serviceManager) throws ServiceException 122 { 123 _resolver = (AmetysObjectResolver) serviceManager.lookup(AmetysObjectResolver.ROLE); 124 _uriResolver = (URIResolverExtensionPoint) serviceManager.lookup(URIResolverExtensionPoint.ROLE); 125 _siteManager = (SiteManager) serviceManager.lookup(SiteManager.ROLE); 126 _renderingContextHandler = (RenderingContextHandler) serviceManager.lookup(RenderingContextHandler.ROLE); 127 } 128 129 @Override 130 public void contextualize(Context context) throws ContextException 131 { 132 _context = context; 133 } 134 135 @Override 136 public void startDocument() throws SAXException 137 { 138 super.startDocument(); 139 140 _glossaryHref = null; 141 _inIgnoredTag = 0; 142 143 _ignoredNamespaceStack = new HashMap<>(); 144 for (String ignoredNamespace : __IGNORE_NAMESPACES) 145 { 146 _ignoredNamespaceStack.put(ignoredNamespace, 0); 147 } 148 } 149 150 @Override 151 public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException 152 { 153 super.startElement(uri, localName, qName, atts); 154 155 if (__IGNORE_TAGS.contains(localName.toLowerCase())) 156 { 157 _inIgnoredTag++; 158 } 159 if (__IGNORE_NAMESPACES.contains(uri)) 160 { 161 _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) + 1); 162 } 163 } 164 165 @Override 166 public void endElement(String uri, String localName, String qName) throws SAXException 167 { 168 if (__IGNORE_NAMESPACES.contains(uri)) 169 { 170 _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) - 1); 171 } 172 if (__IGNORE_TAGS.contains(localName.toLowerCase())) 173 { 174 _inIgnoredTag--; 175 } 176 177 super.endElement(uri, localName, qName); 178 } 179 180 @Override 181 public void characters(char[] ch, int start, int length) throws SAXException 182 { 183 if (_searchCharacters()) 184 { 185 Request request = ContextHelper.getRequest(_context); 186 Content content = (Content) request.getAttribute(Content.class.getName()); 187 _charactersWithDefinitions(ch, start, length, content); 188 } 189 else 190 { 191 super.characters(ch, start, length); 192 } 193 } 194 195 /** 196 * Test if the currently processed characters have to be searched for glossary words or ignored. 197 * @return true to search the characters for glossary words, false to ignore. 198 */ 199 protected boolean _searchCharacters() 200 { 201 // Test if we are in a tag to ignore. 202 boolean search = _inIgnoredTag < 1 && !_inUnmodifiableContent; 203 204 // Test if we are in a tag which has a namespace to ignore. 205 Iterator<Integer> nsIt = _ignoredNamespaceStack.values().iterator(); 206 while (nsIt.hasNext() && search) 207 { 208 // If the "namespace level" is greater than 0, we are in an ignored namespace. 209 if (nsIt.next() > 0) 210 { 211 search = false; 212 } 213 } 214 215 return search; 216 } 217 218 /** 219 * SAX characters, generating definition tags on words that are present in the glossary. 220 * @param ch the characters from the XML document. 221 * @param start the start position in the array. 222 * @param length the number of characters to read from the array. 223 * @param content the content. 224 * @throws SAXException if an error occurs generating the XML. 225 */ 226 protected void _charactersWithDefinitions(char[] ch, int start, int length, Content content) throws SAXException 227 { 228 Request request = ContextHelper.getRequest(_context); 229 230 // Get site name and language. 231 String siteName = null; 232 if (content instanceof WebContent) 233 { 234 siteName = ((WebContent) content).getSiteName(); 235 } 236 if (siteName == null) 237 { 238 siteName = (String) request.getAttribute("site"); 239 } 240 String language = content.getLanguage(); 241 242 // Build a pattern to detect the glossary words. 243 Map<String, Definition> words = _getDefinitions(siteName, language); 244 if (!words.isEmpty()) 245 { 246 Pattern pattern = _getWordsPattern(words.keySet()); 247 248 // Match the pattern. 249 String str = new String(ch, start, length); 250 Matcher matcher = pattern.matcher(str); 251 252 int previousMatch = start; 253 while (matcher.find()) 254 { 255 // Get a link on the tagged glossary page. 256 String pageLink = _getGlossaryPageHref(siteName, language); 257 258 int startIndex = matcher.start(); 259 int endIndex = matcher.end(); 260 261 int wordIndex = start + startIndex; 262 263 // The matched word. 264 String word = str.substring(startIndex, endIndex).toLowerCase(); 265 Definition definition = words.get(word); 266 267 // A null definition (word not present in the map) should never happen, but protect anyway. 268 if (definition != null && StringUtils.isNotEmpty(word)) 269 { 270 String defContent = definition.getContent(); 271 272 AttributesImpl attrs = new AttributesImpl(); 273 attrs.addCDATAAttribute("title", defContent); 274 275 // Generate all the characters until the matched word. 276 super.characters(ch, previousMatch, wordIndex - previousMatch); 277 278 // Generate the definition tag. 279 XMLUtils.startElement(_contentHandler, "dfn", attrs); 280 281 // If a glossary page is tagged in this site and sitemap, generate a link to it. 282 if (StringUtils.isNotEmpty(pageLink)) 283 { 284 RenderingContext currentContext = _renderingContextHandler.getRenderingContext(); 285 if (!(currentContext == RenderingContext.BACK)) 286 { 287 pageLink = pageLink + "?letter=" + word.charAt(0) + "#" + definition.getWord(); 288 } 289 290 AttributesImpl linkAttrs = new AttributesImpl(); 291 linkAttrs.addCDATAAttribute("href", pageLink); 292 XMLUtils.startElement(_contentHandler, "a", linkAttrs); 293 } 294 295 // Generate the word itself. 296 super.characters(ch, wordIndex, endIndex - startIndex); 297 298 if (StringUtils.isNotEmpty(pageLink)) 299 { 300 XMLUtils.endElement(_contentHandler, "a"); 301 } 302 303 XMLUtils.endElement(_contentHandler, "dfn"); 304 } 305 306 previousMatch = start + endIndex; 307 } 308 309 // Generate the end of the input. This will generate the whole input, unchanged, 310 // if no glossary word was present. 311 super.characters(ch, previousMatch, start + length - previousMatch); 312 } 313 else 314 { 315 super.characters(ch, start, length); 316 } 317 } 318 319 /** 320 * Get all the words with definitions to display. 321 * @param siteName the site name. 322 * @param lang the language. 323 * @return an exhaustive set of the words. 324 */ 325 protected Map<String, Definition> _getDefinitions(String siteName, String lang) 326 { 327 if (_definitions == null) 328 { 329 _definitions = _getWordsAndDefinitions(siteName, lang); 330 } 331 332 return Collections.unmodifiableMap(_definitions); 333 } 334 335 /** 336 * Get all the words with definitions to display. 337 * @param siteName the site name. 338 * @param lang the language. 339 * @return an exhaustive set of the words. 340 */ 341 protected Map<String, Definition> _getWordsAndDefinitions(String siteName, String lang) 342 { 343 Map<String, Definition> words = new HashMap<>(); 344 345 TraversableAmetysObject definitionsNode = GlossaryHelper.getDefinitionsNode(_siteManager.getSite(siteName), lang); 346 AmetysObjectIterable<DefaultDefinition> definitions = definitionsNode.getChildren(); 347 348 for (DefaultDefinition definition : definitions) 349 { 350 if (definition.displayOnText()) 351 { 352 for (String word : definition.getAllForms()) 353 { 354 words.put(word.toLowerCase(), definition); 355 } 356 } 357 } 358 359 return words; 360 } 361 362 /** 363 * Get a regexp that matches any of the definition words. 364 * @param words the words. 365 * @return the pattern. 366 */ 367 protected Pattern _getWordsPattern(Set<String> words) 368 { 369 StringBuilder pattern = new StringBuilder(); 370 pattern.append("\\b(?:"); 371 372 Iterator<String> wordIt = words.iterator(); 373 for (int i = 0; wordIt.hasNext(); i++) 374 { 375 if (i > 0) 376 { 377 pattern.append('|'); 378 } 379 380 // Quote 381 pattern.append("\\Q").append(wordIt.next()).append("\\E"); 382 } 383 384 pattern.append(")\\b"); 385 386 return Pattern.compile(pattern.toString(), Pattern.CASE_INSENSITIVE); 387 } 388 389 /** 390 * Get the glossary page in a given site and language. 391 * @param siteName the site name. 392 * @param language the language. 393 * @return the glossary page. 394 */ 395 protected String _getGlossaryPageHref(String siteName, String language) 396 { 397 if (_glossaryHref == null) 398 { 399 Page glossaryPage = _getGlossaryPage(siteName, language); 400 if (glossaryPage != null) 401 { 402 // FIXME CMS-2611 Force absolute 403 Request request = ContextHelper.getRequest(_context); 404 boolean absolute = request.getAttribute("forceAbsoluteUrl") != null ? (Boolean) request.getAttribute("forceAbsoluteUrl") : false; 405 406 _glossaryHref = _uriResolver.getResolverForType("page").resolve(glossaryPage.getId(), false, absolute, false); 407 } 408 else 409 { 410 _glossaryHref = ""; 411 } 412 } 413 414 return _glossaryHref; 415 } 416 417 /** 418 * Get the glossary page in a given site and language. 419 * @param siteName the site name. 420 * @param language the language. 421 * @return the glossary page. 422 */ 423 protected Page _getGlossaryPage(String siteName, String language) 424 { 425 Page page = null; 426 427 Expression glossaryExpr = new TagExpression(Operator.EQ, GlossaryHelper.GLOSSARY_PAGE_TAG); 428 String xpath = PageQueryHelper.getPageXPathQuery(siteName, language, null, glossaryExpr, null); 429 430 try (AmetysObjectIterable<Page> pages = _resolver.query(xpath);) 431 { 432 Iterator<Page> it = pages.iterator(); 433 if (it.hasNext()) 434 { 435 page = it.next(); 436 437 if (it.hasNext()) 438 { 439 getLogger().warn(String.format("More than one page is tagged 'GLOSSARY' in site %s and sitemap %s, please tag a single page.", siteName, language)); 440 } 441 } 442 } 443 444 return page; 445 } 446}