001/*
002 *  Copyright 2016 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.content;
017
018import org.apache.commons.lang.ArrayUtils;
019import org.xml.sax.Attributes;
020import org.xml.sax.SAXException;
021import org.xml.sax.helpers.DefaultHandler;
022
023/**
024 * This handle parse a rich text to get only text nodes.
025 * HTML expert is ignored.
026 * Call {@link #getValue()} to get parse string.
027 */
028public class RichTextHandler extends DefaultHandler
029{
030    /** List of elements to be ignored during the body text retrieval processing*/
031    protected static final String[] __IGNORED_ELEMENTS = new String[]{"htmlexpert"};
032    
033    private static final String __SEPARATOR = " ";
034    
035    private StringBuilder _bodyText = new StringBuilder();
036    private boolean _ignoreElmt;
037    private boolean _newElmt;
038    private int _excerptLength;
039    
040    /**
041     * Creates a rich text handler 
042     */
043    public RichTextHandler()
044    {
045        super();
046        _excerptLength = 0;
047    }
048    
049    /**
050     * Creates a rich text handler to parse a rich text with a limit a characters 
051     * @param excerptLength The length for content excerpt
052     */
053    public RichTextHandler(int excerptLength)
054    {
055        super();
056        _excerptLength = excerptLength;
057    }
058    
059    @Override
060    public void characters(char[] ch, int start, int length) throws SAXException
061    {
062        if (_newElmt && _bodyText.length() > 0)
063        {
064            _bodyText.append(__SEPARATOR);
065            _newElmt = false;
066        }
067        
068        if (!_ignoreElmt)
069        {
070            String text = new String(ch, start, length);
071            _bodyText.append(text);
072        }
073    }
074    
075    @Override
076    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
077    {
078        if (ArrayUtils.contains(__IGNORED_ELEMENTS, localName))
079        {
080            _ignoreElmt = true;
081        }
082        else
083        {
084            _newElmt = true;
085        }
086    }
087    
088    @Override
089    public void endElement(String uri, String localName, String qName) throws SAXException
090    {
091        if (ArrayUtils.contains(__IGNORED_ELEMENTS, localName))
092        {
093            _ignoreElmt = false;
094        }
095    }
096    
097    /**
098     * Gets the value of the parsed data
099     * @return the value of the parsed data
100     */
101    public String getValue()
102    {
103        String bodyText = _bodyText.toString();
104        bodyText = bodyText.replaceAll("\\t|\\r?\\n", " ").replaceAll("\\s+", " ").trim();
105        
106        if (_excerptLength > 0 && bodyText.length() > _excerptLength)
107        {
108            int summaryEndIndex = bodyText.lastIndexOf(' ', _excerptLength);
109            if (summaryEndIndex == -1)
110            {
111                summaryEndIndex = bodyText.length();
112            }
113            
114            return bodyText.substring(0, summaryEndIndex) + "…";
115        }
116
117        return bodyText;
118    }
119}