001/* 002 * Copyright 2016 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.content; 017 018import org.apache.commons.lang.ArrayUtils; 019import org.xml.sax.Attributes; 020import org.xml.sax.SAXException; 021import org.xml.sax.helpers.DefaultHandler; 022 023/** 024 * This handle parse a rich text to get only text nodes. 025 * HTML expert is ignored. 026 * Call {@link #getValue()} to get parse string. 027 */ 028public class RichTextHandler extends DefaultHandler 029{ 030 /** List of elements to be ignored during the body text retrieval processing*/ 031 protected static final String[] __IGNORED_ELEMENTS = new String[]{"htmlexpert"}; 032 033 private static final String __SEPARATOR = " "; 034 035 private StringBuilder _bodyText = new StringBuilder(); 036 private boolean _ignoreElmt; 037 private boolean _newElmt; 038 private int _excerptLength; 039 040 /** 041 * Creates a rich text handler 042 */ 043 public RichTextHandler() 044 { 045 super(); 046 _excerptLength = 0; 047 } 048 049 /** 050 * Creates a rich text handler to parse a rich text with a limit a characters 051 * @param excerptLength The length for content excerpt 052 */ 053 public RichTextHandler(int excerptLength) 054 { 055 super(); 056 _excerptLength = excerptLength; 057 } 058 059 @Override 060 public void characters(char[] ch, int start, int length) throws SAXException 061 { 062 if (_newElmt && _bodyText.length() > 0) 063 { 064 _bodyText.append(__SEPARATOR); 065 _newElmt = false; 066 } 067 068 if (!_ignoreElmt) 069 { 070 String text = new String(ch, start, length); 071 _bodyText.append(text); 072 } 073 } 074 075 @Override 076 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException 077 { 078 if (ArrayUtils.contains(__IGNORED_ELEMENTS, localName)) 079 { 080 _ignoreElmt = true; 081 } 082 else 083 { 084 _newElmt = true; 085 } 086 } 087 088 @Override 089 public void endElement(String uri, String localName, String qName) throws SAXException 090 { 091 if (ArrayUtils.contains(__IGNORED_ELEMENTS, localName)) 092 { 093 _ignoreElmt = false; 094 } 095 } 096 097 /** 098 * Gets the value of the parsed data 099 * @return the value of the parsed data 100 */ 101 public String getValue() 102 { 103 String bodyText = _bodyText.toString(); 104 bodyText = bodyText.replaceAll("\\t|\\r?\\n", " ").replaceAll("\\s+", " ").trim(); 105 106 if (_excerptLength > 0 && bodyText.length() > _excerptLength) 107 { 108 int summaryEndIndex = bodyText.lastIndexOf(' ', _excerptLength); 109 if (summaryEndIndex == -1) 110 { 111 summaryEndIndex = bodyText.length(); 112 } 113 114 return bodyText.substring(0, summaryEndIndex) + "…"; 115 } 116 117 return bodyText; 118 } 119}