001/*
002 *  Copyright 2011 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.glossary.transformation;
017
018import java.util.Collections;
019import java.util.HashMap;
020import java.util.HashSet;
021import java.util.Iterator;
022import java.util.Map;
023import java.util.Set;
024import java.util.regex.Matcher;
025import java.util.regex.Pattern;
026
027import org.apache.avalon.framework.component.Component;
028import org.apache.avalon.framework.context.Context;
029import org.apache.avalon.framework.context.ContextException;
030import org.apache.avalon.framework.context.Contextualizable;
031import org.apache.avalon.framework.service.ServiceException;
032import org.apache.avalon.framework.service.ServiceManager;
033import org.apache.avalon.framework.service.Serviceable;
034import org.apache.cocoon.components.ContextHelper;
035import org.apache.cocoon.environment.Request;
036import org.apache.cocoon.transformation.I18nTransformer;
037import org.apache.cocoon.xml.AttributesImpl;
038import org.apache.cocoon.xml.XMLUtils;
039import org.apache.commons.lang.StringUtils;
040import org.xml.sax.Attributes;
041import org.xml.sax.SAXException;
042
043import org.ametys.cms.repository.Content;
044import org.ametys.cms.transformation.AbstractEnhancementHandler;
045import org.ametys.cms.transformation.URIResolverExtensionPoint;
046import org.ametys.plugins.glossary.DefaultDefinition;
047import org.ametys.plugins.glossary.Definition;
048import org.ametys.plugins.glossary.GlossaryHelper;
049import org.ametys.plugins.repository.AmetysObjectIterable;
050import org.ametys.plugins.repository.AmetysObjectResolver;
051import org.ametys.plugins.repository.TraversableAmetysObject;
052import org.ametys.plugins.repository.query.expression.Expression;
053import org.ametys.plugins.repository.query.expression.Expression.Operator;
054import org.ametys.web.renderingcontext.RenderingContext;
055import org.ametys.web.renderingcontext.RenderingContextHandler;
056import org.ametys.web.repository.content.WebContent;
057import org.ametys.web.repository.page.Page;
058import org.ametys.web.repository.page.PageQueryHelper;
059import org.ametys.web.repository.site.SiteManager;
060import org.ametys.web.tags.TagExpression;
061
062/**
063 * Definition enhancement handler.
064 */
065public class DefinitionEnhancementHandler extends AbstractEnhancementHandler implements Component, Serviceable, Contextualizable
066{
067    /** The Avalon role. */
068    public static final String ROLE = DefinitionEnhancementHandler.class.getName();
069    
070    /** The tags in which to ignore the glossary words. */
071    private static final Set<String> __IGNORE_TAGS = new HashSet<>();
072    static
073    {
074        __IGNORE_TAGS.add("head");
075        __IGNORE_TAGS.add("script");
076        __IGNORE_TAGS.add("style");
077        __IGNORE_TAGS.add("option");
078        __IGNORE_TAGS.add("a");
079        __IGNORE_TAGS.add("h1");
080        __IGNORE_TAGS.add("h2");
081        __IGNORE_TAGS.add("h3");
082        __IGNORE_TAGS.add("h4");
083        __IGNORE_TAGS.add("h5");
084        __IGNORE_TAGS.add("h6");
085    }
086    
087    /** The namespaces in which to ignore the glossary words. */
088    private static final Set<String> __IGNORE_NAMESPACES = new HashSet<>();
089    static
090    {
091        __IGNORE_NAMESPACES.add(I18nTransformer.I18N_NAMESPACE_URI);
092    }
093    
094    /** The ametys object resolver. */
095    protected AmetysObjectResolver _resolver;
096    
097    /** The site manager */
098    protected SiteManager _siteManager;
099    
100    /** The page URI resolver. */
101    protected URIResolverExtensionPoint _uriResolver;
102    
103    /** The avalon context. */
104    protected Context _context;
105    
106    /** The word definitions. */
107    protected Map<String, Definition> _definitions;
108    
109    /** The glossary page href. */
110    protected String _glossaryHref;
111    
112    /** Ignored namespace stack. */
113    protected Map<String, Integer> _ignoredNamespaceStack;
114    
115    /** True if we are processing a paragraph, false otherwise. */
116    private int _inIgnoredTag;
117    
118    private RenderingContextHandler _renderingContextHandler;
119    
120    @Override
121    public void service(ServiceManager serviceManager) throws ServiceException
122    {
123        _resolver = (AmetysObjectResolver) serviceManager.lookup(AmetysObjectResolver.ROLE);
124        _uriResolver = (URIResolverExtensionPoint) serviceManager.lookup(URIResolverExtensionPoint.ROLE);
125        _siteManager = (SiteManager) serviceManager.lookup(SiteManager.ROLE);
126        _renderingContextHandler = (RenderingContextHandler) serviceManager.lookup(RenderingContextHandler.ROLE);
127    }
128    
129    @Override
130    public void contextualize(Context context) throws ContextException
131    {
132        _context = context;
133    }
134    
135    @Override
136    public void startDocument() throws SAXException
137    {
138        super.startDocument();
139        
140        _glossaryHref = null;
141        _inIgnoredTag = 0;
142        
143        _ignoredNamespaceStack = new HashMap<>();
144        for (String ignoredNamespace : __IGNORE_NAMESPACES)
145        {
146            _ignoredNamespaceStack.put(ignoredNamespace, 0);
147        }
148    }
149    
150    @Override
151    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException
152    {
153        super.startElement(uri, localName, qName, atts);
154        
155        if (__IGNORE_TAGS.contains(localName.toLowerCase()))
156        {
157            _inIgnoredTag++;
158        }
159        if (__IGNORE_NAMESPACES.contains(uri))
160        {
161            _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) + 1);
162        }
163    }
164    
165    @Override
166    public void endElement(String uri, String localName, String qName) throws SAXException
167    {
168        if (__IGNORE_NAMESPACES.contains(uri))
169        {
170            _ignoredNamespaceStack.put(uri, _ignoredNamespaceStack.get(uri) - 1);
171        }
172        if (__IGNORE_TAGS.contains(localName.toLowerCase()))
173        {
174            _inIgnoredTag--;
175        }
176        
177        super.endElement(uri, localName, qName);
178    }
179    
180    @Override
181    public void characters(char[] ch, int start, int length) throws SAXException
182    {
183        if (_searchCharacters())
184        {
185            Request request = ContextHelper.getRequest(_context);
186            Content content = (Content) request.getAttribute(Content.class.getName());
187            _charactersWithDefinitions(ch, start, length, content);
188        }
189        else
190        {
191            super.characters(ch, start, length);
192        }
193    }
194    
195    /**
196     * Test if the currently processed characters have to be searched for glossary words or ignored.
197     * @return true to search the characters for glossary words, false to ignore.
198     */
199    protected boolean _searchCharacters()
200    {
201        // Test if we are in a tag to ignore.
202        boolean search = _inIgnoredTag < 1 && !_inUnmodifiableContent;
203        
204        // Test if we are in a tag which has a namespace to ignore.
205        Iterator<Integer> nsIt = _ignoredNamespaceStack.values().iterator();
206        while (nsIt.hasNext() && search)
207        {
208            // If the "namespace level" is greater than 0, we are in an ignored namespace.
209            if (nsIt.next() > 0)
210            {
211                search = false;
212            }
213        }
214        
215        return search;
216    }
217    
218    /**
219     * SAX characters, generating definition tags on words that are present in the glossary.
220     * @param ch the characters from the XML document.
221     * @param start the start position in the array.
222     * @param length the number of characters to read from the array.
223     * @param content the content.
224     * @throws SAXException if an error occurs generating the XML.
225     */
226    protected void _charactersWithDefinitions(char[] ch, int start, int length, Content content) throws SAXException
227    {
228        Request request = ContextHelper.getRequest(_context);
229        
230        // Get site name and language.
231        String siteName = null;
232        if (content instanceof WebContent)
233        {
234            siteName = ((WebContent) content).getSiteName();
235        }
236        if (siteName == null)
237        {
238            siteName = (String) request.getAttribute("site");
239        }
240        String language = content.getLanguage();
241        
242        // Build a pattern to detect the glossary words.
243        Map<String, Definition> words = _getDefinitions(siteName, language);
244        if (!words.isEmpty())
245        {
246            Pattern pattern = _getWordsPattern(words.keySet());
247            
248            // Match the pattern.
249            String str = new String(ch, start, length);
250            Matcher matcher = pattern.matcher(str);
251            
252            int previousMatch = start;
253            while (matcher.find())
254            {
255                // Get a link on the tagged glossary page.
256                String pageLink = _getGlossaryPageHref(siteName, language);
257                
258                int startIndex = matcher.start();
259                int endIndex = matcher.end();
260                
261                int wordIndex = start + startIndex;
262                
263                // The matched word.
264                String word = str.substring(startIndex, endIndex).toLowerCase();
265                Definition definition = words.get(word);
266                
267                // A null definition (word not present in the map) should never happen, but protect anyway.
268                if (definition != null && StringUtils.isNotEmpty(word))
269                {
270                    String defContent = definition.getContent();
271                    
272                    AttributesImpl attrs = new AttributesImpl();
273                    attrs.addCDATAAttribute("title", defContent);
274                    
275                    // Generate all the characters until the matched word.
276                    super.characters(ch, previousMatch, wordIndex - previousMatch);
277                    
278                    // Generate the definition tag.
279                    XMLUtils.startElement(_contentHandler, "dfn", attrs);
280                    
281                    // If a glossary page is tagged in this site and sitemap, generate a link to it.
282                    if (StringUtils.isNotEmpty(pageLink))
283                    {
284                        RenderingContext currentContext = _renderingContextHandler.getRenderingContext();
285                        if (!(currentContext == RenderingContext.BACK))
286                        {
287                            pageLink = pageLink + "?letter=" + word.charAt(0) + "#" + definition.getWord();
288                        }
289                        
290                        AttributesImpl linkAttrs = new AttributesImpl();
291                        linkAttrs.addCDATAAttribute("href", pageLink);
292                        XMLUtils.startElement(_contentHandler, "a", linkAttrs);
293                    }
294                    
295                    // Generate the word itself.
296                    super.characters(ch, wordIndex, endIndex - startIndex);
297                    
298                    if (StringUtils.isNotEmpty(pageLink))
299                    {
300                        XMLUtils.endElement(_contentHandler, "a");
301                    }
302                    
303                    XMLUtils.endElement(_contentHandler, "dfn");
304                }
305                
306                previousMatch = start + endIndex;
307            }
308            
309            // Generate the end of the input. This will generate the whole input, unchanged,
310            // if no glossary word was present.
311            super.characters(ch, previousMatch, start + length - previousMatch);
312        }
313        else
314        {
315            super.characters(ch, start, length);
316        }
317    }
318    
319    /**
320     * Get all the words with definitions to display.
321     * @param siteName the site name.
322     * @param lang the language.
323     * @return an exhaustive set of the words.
324     */
325    protected Map<String, Definition> _getDefinitions(String siteName, String lang)
326    {
327        if (_definitions == null)
328        {
329            _definitions = _getWordsAndDefinitions(siteName, lang);
330        }
331        
332        return Collections.unmodifiableMap(_definitions);
333    }
334    
335    /**
336     * Get all the words with definitions to display.
337     * @param siteName the site name.
338     * @param lang the language.
339     * @return an exhaustive set of the words.
340     */
341    protected Map<String, Definition> _getWordsAndDefinitions(String siteName, String lang)
342    {
343        Map<String, Definition> words = new HashMap<>();
344        
345        TraversableAmetysObject definitionsNode = GlossaryHelper.getDefinitionsNode(_siteManager.getSite(siteName), lang);
346        AmetysObjectIterable<DefaultDefinition> definitions = definitionsNode.getChildren();
347        
348        for (DefaultDefinition definition : definitions)
349        {
350            if (definition.displayOnText())
351            {
352                for (String word : definition.getAllForms())
353                {
354                    words.put(word.toLowerCase(), definition);
355                }
356            }
357        }
358        
359        return words;
360    }
361    
362    /**
363     * Get a regexp that matches any of the definition words.
364     * @param words the words.
365     * @return the pattern.
366     */
367    protected Pattern _getWordsPattern(Set<String> words)
368    {
369        StringBuilder pattern = new StringBuilder();
370        pattern.append("\\b(?:");
371        
372        Iterator<String> wordIt = words.iterator();
373        for (int i = 0; wordIt.hasNext(); i++)
374        {
375            if (i > 0)
376            {
377                pattern.append('|');
378            }
379            
380            // Quote
381            pattern.append("\\Q").append(wordIt.next()).append("\\E");
382        }
383        
384        pattern.append(")\\b");
385        
386        return Pattern.compile(pattern.toString(), Pattern.CASE_INSENSITIVE);
387    }
388    
389    /**
390     * Get the glossary page in a given site and language.
391     * @param siteName the site name.
392     * @param language the language.
393     * @return the glossary page.
394     */
395    protected String _getGlossaryPageHref(String siteName, String language)
396    {
397        if (_glossaryHref == null)
398        {
399            Page glossaryPage = _getGlossaryPage(siteName, language);
400            if (glossaryPage != null)
401            {
402                // FIXME CMS-2611 Force absolute 
403                Request request = ContextHelper.getRequest(_context);
404                boolean absolute = request.getAttribute("forceAbsoluteUrl") != null ? (Boolean) request.getAttribute("forceAbsoluteUrl") : false;
405                
406                _glossaryHref = _uriResolver.getResolverForType("page").resolve(glossaryPage.getId(), false, absolute, false);
407            }
408            else
409            {
410                _glossaryHref = "";
411            }
412        }
413        
414        return _glossaryHref;
415    }
416    
417    /**
418     * Get the glossary page in a given site and language.
419     * @param siteName the site name.
420     * @param language the language.
421     * @return the glossary page.
422     */
423    protected Page _getGlossaryPage(String siteName, String language)
424    {
425        Page page = null;
426        
427        Expression glossaryExpr = new TagExpression(Operator.EQ, GlossaryHelper.GLOSSARY_PAGE_TAG);
428        String xpath = PageQueryHelper.getPageXPathQuery(siteName, language, null, glossaryExpr, null);
429        
430        try (AmetysObjectIterable<Page> pages = _resolver.query(xpath);)
431        {
432            Iterator<Page> it = pages.iterator();
433            if (it.hasNext())
434            {
435                page = it.next();
436                
437                if (it.hasNext())
438                {
439                    getLogger().warn(String.format("More than one page is tagged 'GLOSSARY' in site %s and sitemap %s, please tag a single page.", siteName, language));
440                }
441            }
442        }
443        
444        return page;
445    }    
446}