001/*
002 *  Copyright 2014 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.search.query;
017
018import java.util.Collections;
019import java.util.regex.Matcher;
020import java.util.regex.Pattern;
021
022import org.ametys.cms.search.QueryBuilder;
023import org.ametys.runtime.i18n.I18nizableText;
024
025/**
026 * Represents a {@link Query} testing a string field.
027 */
028public class StringQuery extends AbstractFieldQuery
029{
030    
031    /** Escape '{', '}', ':' and '!' characters to prevent the user from changing the query field. */
032    //   '(?<!(?<!\\)\\)([{}:!])'
033    // = '(?<!'                     Negative Lookbehind (Assert that the Regex below does not match)
034    // + '(?<!'                     Negative Lookbehind (Assert that the Regex below does not match)
035    // + '\\)'                      matches the character '\'
036    // + '\\)'                      matches the character '\'
037    // + '([{}:!])'                 Matches a single character in the list, so '{', '}', ':' or '!'
038    //                  In order to match '{', '}', ':' or '!' only if they are not escaped yet (not preceded by a '\')
039    protected static final Pattern FULLTEXT_ESCAPE_CHARS = Pattern.compile("(?<!(?<!\\\\)\\\\)([{}:!])");
040    
041    /** The replacement. */
042    // '\\$1' = '\' + '\$1'. 
043    // We want to escape the found characters, so add a '\' in front of them. '\$1' is the references to the captured subsequences
044    protected static final String FULLTEXT_ESCAPE_CHARS_REPLACEMENT = "\\\\$1";
045    
046    /** The operator. */
047    protected Operator _operator;
048    /** The value to test. */
049    protected String _value;
050    /** The language. */
051    protected String _language;
052    
053    /**
054     * Build a StringQuery testing the existence of the field.
055     * @param fieldPath the field path
056     */
057    public StringQuery(String fieldPath)
058    {
059        this(fieldPath, Operator.EXISTS, null, null);
060    }
061    
062    /**
063     * Build a string query.
064     * @param fieldPath the field's path
065     * @param value the value.
066     */
067    public StringQuery(String fieldPath, String value)
068    {
069        this(fieldPath, value, null);
070    }
071    
072    /**
073     * Build a string query.
074     * @param fieldPath the field's path
075     * @param value the value.
076     * @param language the query language (can be null).
077     */
078    public StringQuery(String fieldPath, String value, String language)
079    {
080        this(fieldPath, Operator.EQ, value, language);
081    }
082    
083    /**
084     * Build a string query.
085     * @param fieldPath the field's path
086     * @param op the operator.
087     * @param value the value.
088     * @param language the query language (can be null).
089     */
090    public StringQuery(String fieldPath, Operator op, String value, String language)
091    {
092        super(fieldPath);
093        _operator = op;
094        _value = value;
095        _language = language;
096    }
097    
098    /**
099     * Get the operator.
100     * @return the operator.
101     */
102    public Operator getOperator()
103    {
104        return _operator;
105    }
106    
107    /**
108     * Get the value.
109     * @return the value.
110     */
111    public String getValue()
112    {
113        return _value;
114    }
115    
116    /**
117     * Get the language.
118     * @return the language.
119     */
120    public String getLanguage()
121    {
122        return _language;
123    }
124    
125    @Override
126    public String build() throws QuerySyntaxException
127    {
128        StringBuilder query = new StringBuilder();
129        
130        if (_operator == Operator.EXISTS)
131        {
132            query.append(_fieldPath).append("_s:").append(QueryHelper.EXISTS_VALUE);
133            return query.toString();
134        }
135        
136        // TODO Params?
137        String language = _language != null ? _language : QueryBuilder.DEFAULT_LANGUAGE;
138        
139        checkStringValue(_value);
140        
141        String escapedValue = escapeStringValue(_value);
142        
143        if (_operator == Operator.NE)
144        {
145            query.append('-');
146        }
147        
148        query.append(_fieldPath);
149        
150        if (_operator == Operator.SEARCH)
151        {
152            // Test query, unstemmed.
153            query.append("_txt_").append(language).append(":(").append(escapedValue).append(')');
154        }
155        else if (_operator == Operator.SEARCH_STEMMED)
156        {
157            // Full-text query.
158            query.append("_txt_stemmed_").append(language).append(":(").append(escapedValue).append(')');
159        }
160        else if (_operator == Operator.LIKE)
161        {
162            // Wildcard query: run a lower-case search.
163            query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append(')');
164        }
165        else
166        {
167            // Strict string comparison (enumerator value, ID, ...)
168            query.append("_s:").append('"').append(escapedValue).append('"');
169        }
170        
171        return query.toString();
172    }
173    
174    /**
175     * Ensure that the string value is valid (i.e. the parentheses are balanced),
176     * and throw an exception if it isn't.
177     * @param value the string value to check.
178     * @throws QuerySyntaxException if the value is invalid.
179     */
180    public static void checkStringValue(String value) throws QuerySyntaxException
181    {
182        // The parenthesis nesting level.
183        boolean invalid = false;
184        int level = 0;
185        
186        for (int i = 0; i < value.length() && !invalid; i++)
187        {
188            char ch = value.charAt(i);
189            // The current character is escaped if the previous character is a slash
190            // that is not itself escaped (the previous-previous character must not be a slash).
191            boolean escaped = i > 0 && value.charAt(i - 1) == '\\' && !(i > 1 && value.charAt(i - 2) == '\\');
192            
193            if (ch == '(' && !escaped)
194            {
195                level++;
196            }
197            else if (ch == ')' && !escaped)
198            {
199                level--;
200                if (level < 0)
201                {
202                    // More closing than opening parentheses at this point: the value is invalid.
203                    invalid = true;
204                }
205            }
206        }
207        
208        // If the parentheses are balanced, the level is 0 at this point.
209        if (level != 0 || invalid)
210        {
211            String message = "The string search " + value + " is illegal, check the parentheses.";
212            I18nizableText details = new I18nizableText("plugin.cms", "UITOOL_SEARCH_ERROR_QUERY_LABEL", Collections.singletonMap("value", new I18nizableText(value)));
213            
214            throw new QuerySyntaxException(message, details);
215        }
216    }
217    
218    /**
219     * Escape from a string value the characters that can modify the query field (':', '{' and '}').
220     * @param value the string value.
221     * @return the escaped value.
222     */
223    public static String escapeStringValue(String value)
224    {
225        Matcher matcher = FULLTEXT_ESCAPE_CHARS.matcher(value);
226        if (matcher.find())
227        {
228            return matcher.replaceAll(FULLTEXT_ESCAPE_CHARS_REPLACEMENT);
229        }
230        else
231        {
232            return value;
233        }
234    }
235    
236}