001/*
002 *  Copyright 2021 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.search.query;
017
018import java.util.Collections;
019import java.util.stream.Collectors;
020import java.util.stream.Stream;
021
022import org.apache.commons.lang3.StringUtils;
023import org.apache.solr.client.solrj.util.ClientUtils;
024
025import org.ametys.runtime.i18n.I18nizableText;
026
027/**
028 * Represents a {@link Query} testing a text field.
029 */
030public abstract class AbstractTextQuery extends AbstractFieldQuery
031{
032    /** The operator. */
033    protected Operator _operator;
034    /** The value to test. */
035    protected String _value;
036    /** The language. */
037    protected String _language;
038    /** <code>true</code> if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}. */
039    protected boolean _valueAlreadyEscaped;
040    
041    /**
042     * Build a AbstractTextQuery testing the existence of the field.
043     * @param fieldPath the field path
044     */
045    public AbstractTextQuery(String fieldPath)
046    {
047        this(fieldPath, Operator.EXISTS, null, null);
048    }
049    
050    /**
051     * Build a text query.
052     * @param fieldPath the field's path
053     * @param value the value.
054     */
055    public AbstractTextQuery(String fieldPath, String value)
056    {
057        this(fieldPath, value, null);
058    }
059    
060    /**
061     * Build a text query.
062     * @param fieldPath the field's path
063     * @param value the value.
064     * @param language the query language (can be null).
065     */
066    public AbstractTextQuery(String fieldPath, String value, String language)
067    {
068        this(fieldPath, Operator.EQ, value, language);
069    }
070    
071    /**
072     * Build a text query.
073     * @param fieldPath the field's path
074     * @param op the operator.
075     * @param value the value.
076     * @param language the query language (can be null).
077     */
078    public AbstractTextQuery(String fieldPath, Operator op, String value, String language)
079    {
080        this(fieldPath, op, value, language, false);
081    }
082    
083    /**
084     * Build a text query.
085     * @param fieldPath the field's path
086     * @param op the operator.
087     * @param value the value.
088     * @param language the query language (can be null).
089     * @param alreadyEscaped true if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}.
090     */
091    public AbstractTextQuery(String fieldPath, Operator op, String value, String language, boolean alreadyEscaped)
092    {
093        super(fieldPath);
094        _operator = op;
095        _value = value;
096        _language = language;
097        _valueAlreadyEscaped = alreadyEscaped;
098    }
099    
100    /**
101     * Get the operator.
102     * @return the operator.
103     */
104    public Operator getOperator()
105    {
106        return _operator;
107    }
108    
109    /**
110     * Get the value.
111     * @return the value.
112     */
113    public String getValue()
114    {
115        return _value;
116    }
117    
118    /**
119     * Get the language.
120     * @return the language.
121     */
122    public String getLanguage()
123    {
124        return _language;
125    }
126    
127    @Override
128    public String build() throws QuerySyntaxException
129    {
130        StringBuilder query = new StringBuilder();
131                
132        checkStringValue(_value);
133        
134        String escapedValue = _valueAlreadyEscaped ? _value : escapeStringValue(_value, _operator);
135        
136        if (_operator == Operator.NE)
137        {
138            NotQuery.appendNegation(query);
139        }
140        
141        query.append(_fieldPath);
142        
143        if (_operator == Operator.SEARCH)
144        {
145            // Test query, unstemmed.
146            query.append("_txt");
147            if (_language != null)
148            {
149                query.append("_").append(_language);
150            }
151            query.append(":(").append(escapedValue).append(')');
152        }
153        else if (_operator == Operator.SEARCH_STEMMED)
154        {
155            // Full-text query
156            if (_language == null)
157            {
158                throw new IllegalArgumentException("Cannot build a string query with stemming without language");
159            }
160            query.append("_txt_stemmed_").append(_language).append(":(").append(escapedValue).append(')');
161        }
162        else if (_operator == Operator.LIKE)
163        {
164            if (_language != null)
165            {
166                query.append("_").append(_language);
167            }
168            // Wildcard query: run a lower-case search.
169            query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append(')');
170        }
171        else if (_operator == Operator.FUZZY)
172        {
173            if (_language != null)
174            {
175                query.append("_").append(_language);
176            }
177            // Run a lower-case fuzzy search with a maximum edit distance of 2 characters.
178            query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append("~2").append(')');
179        }
180        else
181        {
182            // Strict string comparison (enumerator value, ID, ...)
183            query.append("_s:").append('"').append(escapedValue).append('"');
184        }
185        
186        return query.toString();
187    }
188    
189    @Override
190    public int hashCode()
191    {
192        final int prime = 31;
193        int result = super.hashCode();
194        result = prime * result + ((_language == null) ? 0 : _language.hashCode());
195        result = prime * result + ((_operator == null) ? 0 : _operator.hashCode());
196        result = prime * result + ((_value == null) ? 0 : _value.hashCode());
197        return result;
198    }
199
200    @Override
201    public boolean equals(Object obj)
202    {
203        if (this == obj)
204        {
205            return true;
206        }
207        if (!super.equals(obj))
208        {
209            return false;
210        }
211        if (getClass() != obj.getClass())
212        {
213            return false;
214        }
215        AbstractTextQuery other = (AbstractTextQuery) obj;
216        if (_language == null)
217        {
218            if (other._language != null)
219            {
220                return false;
221            }
222        }
223        else if (!_language.equals(other._language))
224        {
225            return false;
226        }
227        if (_operator != other._operator)
228        {
229            return false;
230        }
231        if (_value == null)
232        {
233            if (other._value != null)
234            {
235                return false;
236            }
237        }
238        else if (!_value.equals(other._value))
239        {
240            return false;
241        }
242        return true;
243    }
244
245    /**
246     * Ensure that the string value is valid (i.e. the parentheses are balanced),
247     * and throw an exception if it isn't.
248     * @param value the string value to check.
249     * @throws QuerySyntaxException if the value is invalid.
250     */
251    public static void checkStringValue(String value) throws QuerySyntaxException
252    {
253        // The parenthesis nesting level.
254        boolean invalid = false;
255        int level = 0;
256        
257        for (int i = 0; i < value.length() && !invalid; i++)
258        {
259            char ch = value.charAt(i);
260            // The current character is escaped if the previous character is a slash
261            // that is not itself escaped (the previous-previous character must not be a slash).
262            boolean escaped = i > 0 && value.charAt(i - 1) == '\\' && !(i > 1 && value.charAt(i - 2) == '\\');
263            
264            if (ch == '(' && !escaped)
265            {
266                level++;
267            }
268            else if (ch == ')' && !escaped)
269            {
270                level--;
271                if (level < 0)
272                {
273                    // More closing than opening parentheses at this point: the value is invalid.
274                    invalid = true;
275                }
276            }
277        }
278        
279        // If the parentheses are balanced, the level is 0 at this point.
280        if (level != 0 || invalid)
281        {
282            String message = "The string search " + value + " is illegal, check the parentheses.";
283            I18nizableText details = new I18nizableText("plugin.cms", "UITOOL_SEARCH_ERROR_QUERY_LABEL", Collections.singletonMap("value", new I18nizableText(value)));
284            
285            throw new QuerySyntaxException(message, details);
286        }
287    }
288    
289    /**
290     * Escape from a string value the characters that can modify the query field
291     * @param value the string value.
292     * @param operator the operator
293     * @return the escaped value.
294     */
295    public static String escapeStringValue(String value, Operator operator)
296    {
297        switch (operator)
298        {
299            case LIKE:
300                // '*' are allowed characters
301                // So escape all characters except '*'
302                return Stream.of(StringUtils.splitByWholeSeparatorPreserveAllTokens(value, "*"))
303                             .map(ClientUtils::escapeQueryChars)
304                             .collect(Collectors.joining("*"));
305            case SEARCH:
306            case SEARCH_STEMMED:
307                // '*' are allowed characters
308                // So escape all characters (except '*' and whitespaces)
309                return QueryHelper.escapeQueryCharsExceptStarsAndWhitespaces(value);
310
311            default:
312                return ClientUtils.escapeQueryChars(value);
313        }
314    }
315}