001/*
002 *  Copyright 2021 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.search.query;
017
018import java.util.Collections;
019import java.util.stream.Collectors;
020import java.util.stream.Stream;
021
022import org.apache.commons.lang3.StringUtils;
023import org.apache.solr.client.solrj.util.ClientUtils;
024
025import org.ametys.runtime.i18n.I18nizableText;
026
027/**
028 * Represents a {@link Query} testing a text field.
029 */
030public abstract class AbstractTextQuery extends AbstractFieldQuery
031{
032    /** The operator. */
033    protected Operator _operator;
034    /** The value to test. */
035    protected String _value;
036    /** The language. */
037    protected String _language;
038    /** <code>true</code> if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}. */
039    protected boolean _valueAlreadyEscaped;
040    
041    /**
042     * Build a AbstractTextQuery testing the existence of the field.
043     * @param fieldPath the field path
044     */
045    public AbstractTextQuery(String fieldPath)
046    {
047        this(fieldPath, Operator.EXISTS, null, null);
048    }
049    
050    /**
051     * Build a text query.
052     * @param fieldPath the field's path
053     * @param value the value.
054     */
055    public AbstractTextQuery(String fieldPath, String value)
056    {
057        this(fieldPath, value, null);
058    }
059    
060    /**
061     * Build a text query.
062     * @param fieldPath the field's path
063     * @param value the value.
064     * @param language the query language (can be null).
065     */
066    public AbstractTextQuery(String fieldPath, String value, String language)
067    {
068        this(fieldPath, Operator.EQ, value, language);
069    }
070    
071    /**
072     * Build a text query.
073     * @param fieldPath the field's path
074     * @param op the operator.
075     * @param value the value.
076     * @param language the query language (can be null).
077     */
078    public AbstractTextQuery(String fieldPath, Operator op, String value, String language)
079    {
080        this(fieldPath, op, value, language, false);
081    }
082    
083    /**
084     * Build a text query.
085     * @param fieldPath the field's path
086     * @param op the operator.
087     * @param value the value.
088     * @param language the query language (can be null).
089     * @param alreadyEscaped true if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}.
090     */
091    public AbstractTextQuery(String fieldPath, Operator op, String value, String language, boolean alreadyEscaped)
092    {
093        super(fieldPath);
094        _operator = op;
095        _value = value;
096        _language = language;
097        _valueAlreadyEscaped = alreadyEscaped;
098    }
099    
100    /**
101     * Get the operator.
102     * @return the operator.
103     */
104    public Operator getOperator()
105    {
106        return _operator;
107    }
108    
109    /**
110     * Get the value.
111     * @return the value.
112     */
113    public String getValue()
114    {
115        return _value;
116    }
117    
118    /**
119     * Get the language.
120     * @return the language.
121     */
122    public String getLanguage()
123    {
124        return _language;
125    }
126    
127    @Override
128    public String build() throws QuerySyntaxException
129    {
130        StringBuilder query = new StringBuilder();
131                
132        checkStringValue(_value);
133        
134        String escapedValue = _valueAlreadyEscaped ? _value : escapeStringValue(_value, _operator);
135        
136        if (_operator == Operator.NE)
137        {
138            NotQuery.appendNegation(query);
139        }
140        
141        query.append(_fieldPath);
142        
143        if (_operator == Operator.SEARCH)
144        {
145            // Test query, unstemmed.
146            query.append("_txt");
147            if (_language != null)
148            {
149                query.append("_").append(_language);
150            }
151            query.append(":(").append(escapedValue).append(')');
152        }
153        else if (_operator == Operator.SEARCH_STEMMED)
154        {
155            // Full-text query
156            if (_language == null)
157            {
158                throw new IllegalArgumentException("Cannot build a string query with stemming without language");
159            }
160            query.append("_txt_stemmed_").append(_language).append(":(").append(escapedValue).append(')');
161        }
162        else if (_operator == Operator.LIKE)
163        {
164            if (_language != null)
165            {
166                query.append("_").append(_language);
167            }
168            // Wildcard query: run a lower-case search.
169            query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append(')');
170        }
171        else
172        {
173            // Strict string comparison (enumerator value, ID, ...)
174            query.append("_s:").append('"').append(escapedValue).append('"');
175        }
176        
177        return query.toString();
178    }
179    
180    @Override
181    public int hashCode()
182    {
183        final int prime = 31;
184        int result = super.hashCode();
185        result = prime * result + ((_language == null) ? 0 : _language.hashCode());
186        result = prime * result + ((_operator == null) ? 0 : _operator.hashCode());
187        result = prime * result + ((_value == null) ? 0 : _value.hashCode());
188        return result;
189    }
190
191    @Override
192    public boolean equals(Object obj)
193    {
194        if (this == obj)
195        {
196            return true;
197        }
198        if (!super.equals(obj))
199        {
200            return false;
201        }
202        if (getClass() != obj.getClass())
203        {
204            return false;
205        }
206        AbstractTextQuery other = (AbstractTextQuery) obj;
207        if (_language == null)
208        {
209            if (other._language != null)
210            {
211                return false;
212            }
213        }
214        else if (!_language.equals(other._language))
215        {
216            return false;
217        }
218        if (_operator != other._operator)
219        {
220            return false;
221        }
222        if (_value == null)
223        {
224            if (other._value != null)
225            {
226                return false;
227            }
228        }
229        else if (!_value.equals(other._value))
230        {
231            return false;
232        }
233        return true;
234    }
235
236    /**
237     * Ensure that the string value is valid (i.e. the parentheses are balanced),
238     * and throw an exception if it isn't.
239     * @param value the string value to check.
240     * @throws QuerySyntaxException if the value is invalid.
241     */
242    public static void checkStringValue(String value) throws QuerySyntaxException
243    {
244        // The parenthesis nesting level.
245        boolean invalid = false;
246        int level = 0;
247        
248        for (int i = 0; i < value.length() && !invalid; i++)
249        {
250            char ch = value.charAt(i);
251            // The current character is escaped if the previous character is a slash
252            // that is not itself escaped (the previous-previous character must not be a slash).
253            boolean escaped = i > 0 && value.charAt(i - 1) == '\\' && !(i > 1 && value.charAt(i - 2) == '\\');
254            
255            if (ch == '(' && !escaped)
256            {
257                level++;
258            }
259            else if (ch == ')' && !escaped)
260            {
261                level--;
262                if (level < 0)
263                {
264                    // More closing than opening parentheses at this point: the value is invalid.
265                    invalid = true;
266                }
267            }
268        }
269        
270        // If the parentheses are balanced, the level is 0 at this point.
271        if (level != 0 || invalid)
272        {
273            String message = "The string search " + value + " is illegal, check the parentheses.";
274            I18nizableText details = new I18nizableText("plugin.cms", "UITOOL_SEARCH_ERROR_QUERY_LABEL", Collections.singletonMap("value", new I18nizableText(value)));
275            
276            throw new QuerySyntaxException(message, details);
277        }
278    }
279    
280    /**
281     * Escape from a string value the characters that can modify the query field
282     * @param value the string value.
283     * @param operator the operator
284     * @return the escaped value.
285     */
286    public static String escapeStringValue(String value, Operator operator)
287    {
288        switch (operator)
289        {
290            case LIKE:
291                // '*' are allowed characters
292                // So escape all characters except '*'
293                return Stream.of(StringUtils.splitByWholeSeparatorPreserveAllTokens(value, "*"))
294                             .map(ClientUtils::escapeQueryChars)
295                             .collect(Collectors.joining("*"));
296            case SEARCH:
297            case SEARCH_STEMMED:
298                // '*' are allowed characters
299                // So escape all characters (except '*' and whitespaces)
300                return _escapeQueryCharsExceptStarsAndWhitespaces(value);
301
302            default:
303                return ClientUtils.escapeQueryChars(value);
304        }
305    }
306    
307    @SuppressWarnings("all")
308    private static String _escapeQueryCharsExceptStarsAndWhitespaces(String s)
309    {
310        StringBuilder sb = new StringBuilder();
311        for (int i = 0; i < s.length(); i++)
312        {
313            char c = s.charAt(i);
314            // These characters are part of the query syntax and must be escaped (except '*' and whitespaces)
315            if (c == '\\' || c == '+' || c == '-' || c == '!'  || c == '(' || c == ')' || c == ':'
316                || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
317                || c == '?' || c == '|' || c == '&'  || c == ';' || c == '/')
318            {
319                sb.append('\\');
320            }
321            sb.append(c);
322        }
323        return sb.toString();
324    }
325}