001/*
002 *  Copyright 2011 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.glossary.transformation;
017
018import java.util.Collections;
019import java.util.HashMap;
020import java.util.HashSet;
021import java.util.Iterator;
022import java.util.Locale;
023import java.util.Map;
024import java.util.Set;
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import org.apache.avalon.framework.component.Component;
029import org.apache.avalon.framework.context.Context;
030import org.apache.avalon.framework.context.ContextException;
031import org.apache.avalon.framework.context.Contextualizable;
032import org.apache.avalon.framework.service.ServiceException;
033import org.apache.avalon.framework.service.ServiceManager;
034import org.apache.avalon.framework.service.Serviceable;
035import org.apache.cocoon.components.ContextHelper;
036import org.apache.cocoon.environment.Request;
037import org.apache.cocoon.transformation.I18nTransformer;
038import org.apache.cocoon.xml.AttributesImpl;
039import org.apache.cocoon.xml.XMLUtils;
040import org.apache.commons.lang.StringUtils;
041import org.xml.sax.Attributes;
042import org.xml.sax.SAXException;
043
044import org.ametys.cms.repository.Content;
045import org.ametys.cms.transformation.AbstractEnhancementHandler;
046import org.ametys.cms.transformation.URIResolverExtensionPoint;
047import org.ametys.plugins.glossary.DefaultDefinition;
048import org.ametys.plugins.glossary.Definition;
049import org.ametys.plugins.glossary.GlossaryHelper;
050import org.ametys.plugins.repository.AmetysObjectIterable;
051import org.ametys.plugins.repository.AmetysObjectResolver;
052import org.ametys.plugins.repository.TraversableAmetysObject;
053import org.ametys.plugins.repository.query.expression.Expression;
054import org.ametys.plugins.repository.query.expression.Expression.Operator;
055import org.ametys.web.WebHelper;
056import org.ametys.web.renderingcontext.RenderingContext;
057import org.ametys.web.renderingcontext.RenderingContextHandler;
058import org.ametys.web.repository.page.Page;
059import org.ametys.web.repository.page.PageQueryHelper;
060import org.ametys.web.repository.site.SiteManager;
061import org.ametys.web.tags.TagExpression;
062
063/**
064 * Definition enhancement handler.
065 */
066public class DefinitionEnhancementHandler extends AbstractEnhancementHandler implements Component, Serviceable, Contextualizable
067{
068    /** The Avalon role. */
069    public static final String ROLE = DefinitionEnhancementHandler.class.getName();
070    
071    /** The tags in which to ignore the glossary words. */
072    private static final Set<String> __IGNORE_TAGS = new HashSet<>();
073    static
074    {
075        __IGNORE_TAGS.add("head");
076        __IGNORE_TAGS.add("script");
077        __IGNORE_TAGS.add("style");
078        __IGNORE_TAGS.add("option");
079        __IGNORE_TAGS.add("a");
080        __IGNORE_TAGS.add("h1");
081        __IGNORE_TAGS.add("h2");
082        __IGNORE_TAGS.add("h3");
083        __IGNORE_TAGS.add("h4");
084        __IGNORE_TAGS.add("h5");
085        __IGNORE_TAGS.add("h6");
086    }
087    
088    /** The namespaces in which to ignore the glossary words. */
089    private static final Set<String> __IGNORE_NAMESPACES = new HashSet<>();
090    static
091    {
092        __IGNORE_NAMESPACES.add(I18nTransformer.I18N_NAMESPACE_URI);
093    }
094    
095    /** The ametys object resolver. */
096    protected AmetysObjectResolver _resolver;
097    
098    /** The site manager */
099    protected SiteManager _siteManager;
100    
101    /** The page URI resolver. */
102    protected URIResolverExtensionPoint _uriResolver;
103    
104    /** The avalon context. */
105    protected Context _context;
106    
107    /** The word definitions. */
108    protected Map<String, Definition> _definitions;
109    
110    /** The glossary page href. */
111    protected String _glossaryHref;
112    
113    /** Ignored namespace stack. */
114    protected Map<String, Integer> _ignoredNamespaceStack;
115    
116    /** True if we are processing a paragraph, false otherwise. */
117    private int _inIgnoredTag;
118    
119    private RenderingContextHandler _renderingContextHandler;
120    
121    @Override
122    public void service(ServiceManager serviceManager) throws ServiceException
123    {
124        _resolver = (AmetysObjectResolver) serviceManager.lookup(AmetysObjectResolver.ROLE);
125        _uriResolver = (URIResolverExtensionPoint) serviceManager.lookup(URIResolverExtensionPoint.ROLE);
126        _siteManager = (SiteManager) serviceManager.lookup(SiteManager.ROLE);
127        _renderingContextHandler = (RenderingContextHandler) serviceManager.lookup(RenderingContextHandler.ROLE);
128    }
129    
130    @Override
131    public void contextualize(Context context) throws ContextException
132    {
133        _context = context;
134    }
135    
136    @Override
137    public void startDocument() throws SAXException
138    {
139        super.startDocument();
140        
141        _glossaryHref = null;
142        _inIgnoredTag = 0;
143        
144        _ignoredNamespaceStack = new HashMap<>();
145        for (String ignoredNamespace : __IGNORE_NAMESPACES)
146        {
147            _ignoredNamespaceStack.put(ignoredNamespace, 0);
148        }
149    }
150    
151    @Override
152    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException
153    {
154        super.startElement(uri, localName, qName, atts);
155        
156        if (__IGNORE_TAGS.contains(localName.toLowerCase()))
157        {
158            _inIgnoredTag++;
159        }
160        if (__IGNORE_NAMESPACES.contains(uri))
161        {
162            _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) + 1);
163        }
164    }
165    
166    @Override
167    public void endElement(String uri, String localName, String qName) throws SAXException
168    {
169        if (__IGNORE_NAMESPACES.contains(uri))
170        {
171            _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) - 1);
172        }
173        if (__IGNORE_TAGS.contains(localName.toLowerCase()))
174        {
175            _inIgnoredTag--;
176        }
177        
178        super.endElement(uri, localName, qName);
179    }
180    
181    @Override
182    public void characters(char[] ch, int start, int length) throws SAXException
183    {
184        if (_searchCharacters())
185        {
186            Request request = ContextHelper.getRequest(_context);
187            Content content = (Content) request.getAttribute(Content.class.getName());
188            _charactersWithDefinitions(ch, start, length, content);
189        }
190        else
191        {
192            super.characters(ch, start, length);
193        }
194    }
195    
196    /**
197     * Test if the currently processed characters have to be searched for glossary words or ignored.
198     * @return true to search the characters for glossary words, false to ignore.
199     */
200    protected boolean _searchCharacters()
201    {
202        // Test if we are in a tag to ignore.
203        boolean search = _inIgnoredTag < 1 && !_inUnmodifiableContent;
204        
205        // Test if we are in a tag which has a namespace to ignore.
206        Iterator<Integer> nsIt = _ignoredNamespaceStack.values().iterator();
207        while (nsIt.hasNext() && search)
208        {
209            // If the "namespace level" is greater than 0, we are in an ignored namespace.
210            if (nsIt.next() > 0)
211            {
212                search = false;
213            }
214        }
215        
216        return search;
217    }
218    
219    /**
220     * SAX characters, generating definition tags on words that are present in the glossary.
221     * @param ch the characters from the XML document.
222     * @param start the start position in the array.
223     * @param length the number of characters to read from the array.
224     * @param content the content.
225     * @throws SAXException if an error occurs generating the XML.
226     */
227    protected void _charactersWithDefinitions(char[] ch, int start, int length, Content content) throws SAXException
228    {
229        Request request = ContextHelper.getRequest(_context);
230        
231        // Get site name and language.
232        String siteName = WebHelper.getSiteName(request, content);
233        
234        String language = content.getLanguage();
235        if (language == null)
236        {
237            language = (String) request.getAttribute("sitemapLanguage");
238        }
239        
240        if (language == null)
241        {
242            Map objectModel = ContextHelper.getObjectModel(_context);
243            language = org.apache.cocoon.i18n.I18nUtils.findLocale(objectModel, "locale", null, Locale.getDefault(), true).getLanguage();
244        }
245        
246        // Build a pattern to detect the glossary words.
247        Map<String, Definition> words = _getDefinitions(siteName, language);
248        if (!words.isEmpty())
249        {
250            Pattern pattern = _getWordsPattern(words.keySet());
251            
252            // Match the pattern.
253            String str = new String(ch, start, length);
254            Matcher matcher = pattern.matcher(str);
255            
256            int previousMatch = start;
257            while (matcher.find())
258            {
259                // Get a link on the tagged glossary page.
260                String pageLink = _getGlossaryPageHref(siteName, language);
261                
262                int startIndex = matcher.start();
263                int endIndex = matcher.end();
264                
265                int wordIndex = start + startIndex;
266                
267                // The matched word.
268                String word = str.substring(startIndex, endIndex).toLowerCase();
269                Definition definition = words.get(word);
270                
271                // A null definition (word not present in the map) should never happen, but protect anyway.
272                if (definition != null && StringUtils.isNotEmpty(word))
273                {
274                    String defContent = definition.getContent();
275                    
276                    AttributesImpl attrs = new AttributesImpl();
277                    attrs.addCDATAAttribute("title", defContent);
278                    
279                    // Generate all the characters until the matched word.
280                    super.characters(ch, previousMatch, wordIndex - previousMatch);
281                    
282                    // Generate the definition tag.
283                    XMLUtils.startElement(_contentHandler, "dfn", attrs);
284                    
285                    // If a glossary page is tagged in this site and sitemap, generate a link to it.
286                    if (StringUtils.isNotEmpty(pageLink))
287                    {
288                        RenderingContext currentContext = _renderingContextHandler.getRenderingContext();
289                        if (!(currentContext == RenderingContext.BACK))
290                        {
291                            pageLink = pageLink + "?letter=" + word.charAt(0) + "#" + definition.getWord();
292                        }
293                        
294                        AttributesImpl linkAttrs = new AttributesImpl();
295                        linkAttrs.addCDATAAttribute("href", pageLink);
296                        XMLUtils.startElement(_contentHandler, "a", linkAttrs);
297                    }
298                    
299                    // Generate the word itself.
300                    super.characters(ch, wordIndex, endIndex - startIndex);
301                    
302                    if (StringUtils.isNotEmpty(pageLink))
303                    {
304                        XMLUtils.endElement(_contentHandler, "a");
305                    }
306                    
307                    XMLUtils.endElement(_contentHandler, "dfn");
308                }
309                
310                previousMatch = start + endIndex;
311            }
312            
313            // Generate the end of the input. This will generate the whole input, unchanged,
314            // if no glossary word was present.
315            super.characters(ch, previousMatch, start + length - previousMatch);
316        }
317        else
318        {
319            super.characters(ch, start, length);
320        }
321    }
322    
323    /**
324     * Get all the words with definitions to display.
325     * @param siteName the site name.
326     * @param lang the language.
327     * @return an exhaustive set of the words.
328     */
329    protected Map<String, Definition> _getDefinitions(String siteName, String lang)
330    {
331        if (_definitions == null)
332        {
333            _definitions = _getWordsAndDefinitions(siteName, lang);
334        }
335        
336        return Collections.unmodifiableMap(_definitions);
337    }
338    
339    /**
340     * Get all the words with definitions to display.
341     * @param siteName the site name.
342     * @param lang the language.
343     * @return an exhaustive set of the words.
344     */
345    protected Map<String, Definition> _getWordsAndDefinitions(String siteName, String lang)
346    {
347        Map<String, Definition> words = new HashMap<>();
348        
349        TraversableAmetysObject definitionsNode = GlossaryHelper.getDefinitionsNode(_siteManager.getSite(siteName), lang);
350        AmetysObjectIterable<DefaultDefinition> definitions = definitionsNode.getChildren();
351        
352        for (DefaultDefinition definition : definitions)
353        {
354            if (definition.displayOnText())
355            {
356                for (String word : definition.getAllForms())
357                {
358                    words.put(word.toLowerCase(), definition);
359                }
360            }
361        }
362        
363        return words;
364    }
365    
366    /**
367     * Get a regexp that matches any of the definition words.
368     * @param words the words.
369     * @return the pattern.
370     */
371    protected Pattern _getWordsPattern(Set<String> words)
372    {
373        StringBuilder pattern = new StringBuilder();
374        pattern.append("\\b(?:");
375        
376        Iterator<String> wordIt = words.iterator();
377        for (int i = 0; wordIt.hasNext(); i++)
378        {
379            if (i > 0)
380            {
381                pattern.append('|');
382            }
383            
384            // Quote
385            pattern.append("\\Q").append(wordIt.next()).append("\\E");
386        }
387        
388        pattern.append(")\\b");
389        
390        return Pattern.compile(pattern.toString(), Pattern.CASE_INSENSITIVE);
391    }
392    
393    /**
394     * Get the glossary page in a given site and language.
395     * @param siteName the site name.
396     * @param language the language.
397     * @return the glossary page.
398     */
399    protected String _getGlossaryPageHref(String siteName, String language)
400    {
401        if (_glossaryHref == null)
402        {
403            Page glossaryPage = _getGlossaryPage(siteName, language);
404            if (glossaryPage != null)
405            {
406                // FIXME CMS-2611 Force absolute 
407                Request request = ContextHelper.getRequest(_context);
408                boolean absolute = request.getAttribute("forceAbsoluteUrl") != null ? (Boolean) request.getAttribute("forceAbsoluteUrl") : false;
409                
410                _glossaryHref = _uriResolver.getResolverForType("page").resolve(glossaryPage.getId(), false, absolute, false);
411            }
412            else
413            {
414                _glossaryHref = "";
415            }
416        }
417        
418        return _glossaryHref;
419    }
420    
421    /**
422     * Get the glossary page in a given site and language.
423     * @param siteName the site name.
424     * @param language the language.
425     * @return the glossary page.
426     */
427    protected Page _getGlossaryPage(String siteName, String language)
428    {
429        Page page = null;
430        
431        Expression glossaryExpr = new TagExpression(Operator.EQ, GlossaryHelper.GLOSSARY_PAGE_TAG);
432        String xpath = PageQueryHelper.getPageXPathQuery(siteName, language, null, glossaryExpr, null);
433        
434        try (AmetysObjectIterable<Page> pages = _resolver.query(xpath);)
435        {
436            Iterator<Page> it = pages.iterator();
437            if (it.hasNext())
438            {
439                page = it.next();
440                
441                if (it.hasNext())
442                {
443                    getLogger().warn(String.format("More than one page is tagged 'GLOSSARY' in site %s and sitemap %s, please tag a single page.", siteName, language));
444                }
445            }
446        }
447        
448        return page;
449    }    
450}