001/*
002 *  Copyright 2011 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.glossary.transformation;
017
018import java.util.Collections;
019import java.util.HashMap;
020import java.util.HashSet;
021import java.util.Iterator;
022import java.util.Locale;
023import java.util.Map;
024import java.util.Set;
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import org.apache.avalon.framework.component.Component;
029import org.apache.avalon.framework.context.Context;
030import org.apache.avalon.framework.context.ContextException;
031import org.apache.avalon.framework.context.Contextualizable;
032import org.apache.avalon.framework.service.ServiceException;
033import org.apache.avalon.framework.service.ServiceManager;
034import org.apache.avalon.framework.service.Serviceable;
035import org.apache.cocoon.components.ContextHelper;
036import org.apache.cocoon.environment.Request;
037import org.apache.cocoon.transformation.I18nTransformer;
038import org.apache.cocoon.xml.AttributesImpl;
039import org.apache.cocoon.xml.XMLUtils;
040import org.apache.commons.lang.StringUtils;
041import org.xml.sax.Attributes;
042import org.xml.sax.SAXException;
043
044import org.ametys.cms.repository.Content;
045import org.ametys.cms.transformation.AbstractEnhancementHandler;
046import org.ametys.cms.transformation.URIResolverExtensionPoint;
047import org.ametys.plugins.glossary.DefaultDefinition;
048import org.ametys.plugins.glossary.Definition;
049import org.ametys.plugins.glossary.GlossaryHelper;
050import org.ametys.plugins.repository.AmetysObjectIterable;
051import org.ametys.plugins.repository.AmetysObjectResolver;
052import org.ametys.plugins.repository.TraversableAmetysObject;
053import org.ametys.plugins.repository.query.expression.Expression;
054import org.ametys.plugins.repository.query.expression.Expression.Operator;
055import org.ametys.web.renderingcontext.RenderingContext;
056import org.ametys.web.renderingcontext.RenderingContextHandler;
057import org.ametys.web.repository.content.WebContent;
058import org.ametys.web.repository.page.Page;
059import org.ametys.web.repository.page.PageQueryHelper;
060import org.ametys.web.repository.site.SiteManager;
061import org.ametys.web.tags.TagExpression;
062
063/**
064 * Definition enhancement handler.
065 */
066public class DefinitionEnhancementHandler extends AbstractEnhancementHandler implements Component, Serviceable, Contextualizable
067{
068    /** The Avalon role. */
069    public static final String ROLE = DefinitionEnhancementHandler.class.getName();
070    
071    /** The tags in which to ignore the glossary words. */
072    private static final Set<String> __IGNORE_TAGS = new HashSet<>();
073    static
074    {
075        __IGNORE_TAGS.add("head");
076        __IGNORE_TAGS.add("script");
077        __IGNORE_TAGS.add("style");
078        __IGNORE_TAGS.add("option");
079        __IGNORE_TAGS.add("a");
080        __IGNORE_TAGS.add("h1");
081        __IGNORE_TAGS.add("h2");
082        __IGNORE_TAGS.add("h3");
083        __IGNORE_TAGS.add("h4");
084        __IGNORE_TAGS.add("h5");
085        __IGNORE_TAGS.add("h6");
086    }
087    
088    /** The namespaces in which to ignore the glossary words. */
089    private static final Set<String> __IGNORE_NAMESPACES = new HashSet<>();
090    static
091    {
092        __IGNORE_NAMESPACES.add(I18nTransformer.I18N_NAMESPACE_URI);
093    }
094    
095    /** The ametys object resolver. */
096    protected AmetysObjectResolver _resolver;
097    
098    /** The site manager */
099    protected SiteManager _siteManager;
100    
101    /** The page URI resolver. */
102    protected URIResolverExtensionPoint _uriResolver;
103    
104    /** The avalon context. */
105    protected Context _context;
106    
107    /** The word definitions. */
108    protected Map<String, Definition> _definitions;
109    
110    /** The glossary page href. */
111    protected String _glossaryHref;
112    
113    /** Ignored namespace stack. */
114    protected Map<String, Integer> _ignoredNamespaceStack;
115    
116    /** True if we are processing a paragraph, false otherwise. */
117    private int _inIgnoredTag;
118    
119    private RenderingContextHandler _renderingContextHandler;
120    
121    @Override
122    public void service(ServiceManager serviceManager) throws ServiceException
123    {
124        _resolver = (AmetysObjectResolver) serviceManager.lookup(AmetysObjectResolver.ROLE);
125        _uriResolver = (URIResolverExtensionPoint) serviceManager.lookup(URIResolverExtensionPoint.ROLE);
126        _siteManager = (SiteManager) serviceManager.lookup(SiteManager.ROLE);
127        _renderingContextHandler = (RenderingContextHandler) serviceManager.lookup(RenderingContextHandler.ROLE);
128    }
129    
130    @Override
131    public void contextualize(Context context) throws ContextException
132    {
133        _context = context;
134    }
135    
136    @Override
137    public void startDocument() throws SAXException
138    {
139        super.startDocument();
140        
141        _glossaryHref = null;
142        _inIgnoredTag = 0;
143        
144        _ignoredNamespaceStack = new HashMap<>();
145        for (String ignoredNamespace : __IGNORE_NAMESPACES)
146        {
147            _ignoredNamespaceStack.put(ignoredNamespace, 0);
148        }
149    }
150    
151    @Override
152    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException
153    {
154        super.startElement(uri, localName, qName, atts);
155        
156        if (__IGNORE_TAGS.contains(localName.toLowerCase()))
157        {
158            _inIgnoredTag++;
159        }
160        if (__IGNORE_NAMESPACES.contains(uri))
161        {
162            _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) + 1);
163        }
164    }
165    
166    @Override
167    public void endElement(String uri, String localName, String qName) throws SAXException
168    {
169        if (__IGNORE_NAMESPACES.contains(uri))
170        {
171            _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) - 1);
172        }
173        if (__IGNORE_TAGS.contains(localName.toLowerCase()))
174        {
175            _inIgnoredTag--;
176        }
177        
178        super.endElement(uri, localName, qName);
179    }
180    
181    @Override
182    public void characters(char[] ch, int start, int length) throws SAXException
183    {
184        if (_searchCharacters())
185        {
186            Request request = ContextHelper.getRequest(_context);
187            Content content = (Content) request.getAttribute(Content.class.getName());
188            _charactersWithDefinitions(ch, start, length, content);
189        }
190        else
191        {
192            super.characters(ch, start, length);
193        }
194    }
195    
196    /**
197     * Test if the currently processed characters have to be searched for glossary words or ignored.
198     * @return true to search the characters for glossary words, false to ignore.
199     */
200    protected boolean _searchCharacters()
201    {
202        // Test if we are in a tag to ignore.
203        boolean search = _inIgnoredTag < 1 && !_inUnmodifiableContent;
204        
205        // Test if we are in a tag which has a namespace to ignore.
206        Iterator<Integer> nsIt = _ignoredNamespaceStack.values().iterator();
207        while (nsIt.hasNext() && search)
208        {
209            // If the "namespace level" is greater than 0, we are in an ignored namespace.
210            if (nsIt.next() > 0)
211            {
212                search = false;
213            }
214        }
215        
216        return search;
217    }
218    
219    /**
220     * SAX characters, generating definition tags on words that are present in the glossary.
221     * @param ch the characters from the XML document.
222     * @param start the start position in the array.
223     * @param length the number of characters to read from the array.
224     * @param content the content.
225     * @throws SAXException if an error occurs generating the XML.
226     */
227    protected void _charactersWithDefinitions(char[] ch, int start, int length, Content content) throws SAXException
228    {
229        Request request = ContextHelper.getRequest(_context);
230        
231        // Get site name and language.
232        String siteName = null;
233        if (content instanceof WebContent)
234        {
235            siteName = ((WebContent) content).getSiteName();
236        }
237        if (siteName == null)
238        {
239            siteName = (String) request.getAttribute("site");
240        }
241        String language = content.getLanguage();
242        if (language == null)
243        {
244            language = (String) request.getAttribute("sitemapLanguage");
245        }
246        
247        if (language == null)
248        {
249            Map objectModel = ContextHelper.getObjectModel(_context);
250            language = org.apache.cocoon.i18n.I18nUtils.findLocale(objectModel, "locale", null, Locale.getDefault(), true).getLanguage();
251        }
252        
253        // Build a pattern to detect the glossary words.
254        Map<String, Definition> words = _getDefinitions(siteName, language);
255        if (!words.isEmpty())
256        {
257            Pattern pattern = _getWordsPattern(words.keySet());
258            
259            // Match the pattern.
260            String str = new String(ch, start, length);
261            Matcher matcher = pattern.matcher(str);
262            
263            int previousMatch = start;
264            while (matcher.find())
265            {
266                // Get a link on the tagged glossary page.
267                String pageLink = _getGlossaryPageHref(siteName, language);
268                
269                int startIndex = matcher.start();
270                int endIndex = matcher.end();
271                
272                int wordIndex = start + startIndex;
273                
274                // The matched word.
275                String word = str.substring(startIndex, endIndex).toLowerCase();
276                Definition definition = words.get(word);
277                
278                // A null definition (word not present in the map) should never happen, but protect anyway.
279                if (definition != null && StringUtils.isNotEmpty(word))
280                {
281                    String defContent = definition.getContent();
282                    
283                    AttributesImpl attrs = new AttributesImpl();
284                    attrs.addCDATAAttribute("title", defContent);
285                    
286                    // Generate all the characters until the matched word.
287                    super.characters(ch, previousMatch, wordIndex - previousMatch);
288                    
289                    // Generate the definition tag.
290                    XMLUtils.startElement(_contentHandler, "dfn", attrs);
291                    
292                    // If a glossary page is tagged in this site and sitemap, generate a link to it.
293                    if (StringUtils.isNotEmpty(pageLink))
294                    {
295                        RenderingContext currentContext = _renderingContextHandler.getRenderingContext();
296                        if (!(currentContext == RenderingContext.BACK))
297                        {
298                            pageLink = pageLink + "?letter=" + word.charAt(0) + "#" + definition.getWord();
299                        }
300                        
301                        AttributesImpl linkAttrs = new AttributesImpl();
302                        linkAttrs.addCDATAAttribute("href", pageLink);
303                        XMLUtils.startElement(_contentHandler, "a", linkAttrs);
304                    }
305                    
306                    // Generate the word itself.
307                    super.characters(ch, wordIndex, endIndex - startIndex);
308                    
309                    if (StringUtils.isNotEmpty(pageLink))
310                    {
311                        XMLUtils.endElement(_contentHandler, "a");
312                    }
313                    
314                    XMLUtils.endElement(_contentHandler, "dfn");
315                }
316                
317                previousMatch = start + endIndex;
318            }
319            
320            // Generate the end of the input. This will generate the whole input, unchanged,
321            // if no glossary word was present.
322            super.characters(ch, previousMatch, start + length - previousMatch);
323        }
324        else
325        {
326            super.characters(ch, start, length);
327        }
328    }
329    
330    /**
331     * Get all the words with definitions to display.
332     * @param siteName the site name.
333     * @param lang the language.
334     * @return an exhaustive set of the words.
335     */
336    protected Map<String, Definition> _getDefinitions(String siteName, String lang)
337    {
338        if (_definitions == null)
339        {
340            _definitions = _getWordsAndDefinitions(siteName, lang);
341        }
342        
343        return Collections.unmodifiableMap(_definitions);
344    }
345    
346    /**
347     * Get all the words with definitions to display.
348     * @param siteName the site name.
349     * @param lang the language.
350     * @return an exhaustive set of the words.
351     */
352    protected Map<String, Definition> _getWordsAndDefinitions(String siteName, String lang)
353    {
354        Map<String, Definition> words = new HashMap<>();
355        
356        TraversableAmetysObject definitionsNode = GlossaryHelper.getDefinitionsNode(_siteManager.getSite(siteName), lang);
357        AmetysObjectIterable<DefaultDefinition> definitions = definitionsNode.getChildren();
358        
359        for (DefaultDefinition definition : definitions)
360        {
361            if (definition.displayOnText())
362            {
363                for (String word : definition.getAllForms())
364                {
365                    words.put(word.toLowerCase(), definition);
366                }
367            }
368        }
369        
370        return words;
371    }
372    
373    /**
374     * Get a regexp that matches any of the definition words.
375     * @param words the words.
376     * @return the pattern.
377     */
378    protected Pattern _getWordsPattern(Set<String> words)
379    {
380        StringBuilder pattern = new StringBuilder();
381        pattern.append("\\b(?:");
382        
383        Iterator<String> wordIt = words.iterator();
384        for (int i = 0; wordIt.hasNext(); i++)
385        {
386            if (i > 0)
387            {
388                pattern.append('|');
389            }
390            
391            // Quote
392            pattern.append("\\Q").append(wordIt.next()).append("\\E");
393        }
394        
395        pattern.append(")\\b");
396        
397        return Pattern.compile(pattern.toString(), Pattern.CASE_INSENSITIVE);
398    }
399    
400    /**
401     * Get the glossary page in a given site and language.
402     * @param siteName the site name.
403     * @param language the language.
404     * @return the glossary page.
405     */
406    protected String _getGlossaryPageHref(String siteName, String language)
407    {
408        if (_glossaryHref == null)
409        {
410            Page glossaryPage = _getGlossaryPage(siteName, language);
411            if (glossaryPage != null)
412            {
413                // FIXME CMS-2611 Force absolute 
414                Request request = ContextHelper.getRequest(_context);
415                boolean absolute = request.getAttribute("forceAbsoluteUrl") != null ? (Boolean) request.getAttribute("forceAbsoluteUrl") : false;
416                
417                _glossaryHref = _uriResolver.getResolverForType("page").resolve(glossaryPage.getId(), false, absolute, false);
418            }
419            else
420            {
421                _glossaryHref = "";
422            }
423        }
424        
425        return _glossaryHref;
426    }
427    
428    /**
429     * Get the glossary page in a given site and language.
430     * @param siteName the site name.
431     * @param language the language.
432     * @return the glossary page.
433     */
434    protected Page _getGlossaryPage(String siteName, String language)
435    {
436        Page page = null;
437        
438        Expression glossaryExpr = new TagExpression(Operator.EQ, GlossaryHelper.GLOSSARY_PAGE_TAG);
439        String xpath = PageQueryHelper.getPageXPathQuery(siteName, language, null, glossaryExpr, null);
440        
441        try (AmetysObjectIterable<Page> pages = _resolver.query(xpath);)
442        {
443            Iterator<Page> it = pages.iterator();
444            if (it.hasNext())
445            {
446                page = it.next();
447                
448                if (it.hasNext())
449                {
450                    getLogger().warn(String.format("More than one page is tagged 'GLOSSARY' in site %s and sitemap %s, please tag a single page.", siteName, language));
451                }
452            }
453        }
454        
455        return page;
456    }    
457}