001/*
002 *  Copyright 2021 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.search.query;
017
018import java.util.Collections;
019import java.util.Objects;
020import java.util.stream.Collectors;
021import java.util.stream.Stream;
022
023import org.apache.commons.lang3.StringUtils;
024import org.apache.solr.client.solrj.util.ClientUtils;
025
026import org.ametys.runtime.i18n.I18nizableText;
027
028/**
029 * Represents a {@link Query} testing a text field.
030 */
031public abstract class AbstractTextQuery extends AbstractOperatorQuery<String>
032{
033    /** The language. */
034    protected String _language;
035    /** <code>true</code> if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}. */
036    protected boolean _valueAlreadyEscaped;
037    
038    /**
039     * Build a AbstractTextQuery testing the existence of the field.
040     * @param fieldPath the field path
041     */
042    public AbstractTextQuery(String fieldPath)
043    {
044        this(fieldPath, Operator.EXISTS, null, null);
045    }
046    
047    /**
048     * Build a text query.
049     * @param fieldPath the field's path
050     * @param value the value.
051     */
052    public AbstractTextQuery(String fieldPath, String value)
053    {
054        this(fieldPath, value, null);
055    }
056    
057    /**
058     * Build a text query.
059     * @param fieldPath the field's path
060     * @param value the value.
061     * @param language the query language (can be null).
062     */
063    public AbstractTextQuery(String fieldPath, String value, String language)
064    {
065        this(fieldPath, Operator.EQ, value, language);
066    }
067    
068    /**
069     * Build a text query.
070     * @param fieldPath the field's path
071     * @param op the operator.
072     * @param value the value.
073     * @param language the query language (can be null).
074     */
075    public AbstractTextQuery(String fieldPath, Operator op, String value, String language)
076    {
077        this(fieldPath, op, value, language, false);
078    }
079    
080    /**
081     * Build a text query.
082     * @param fieldPath the field's path
083     * @param op the operator.
084     * @param value the value.
085     * @param language the query language (can be null).
086     * @param alreadyEscaped true if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}.
087     */
088    public AbstractTextQuery(String fieldPath, Operator op, String value, String language, boolean alreadyEscaped)
089    {
090        super(fieldPath, op, value);
091        _language = language;
092        _valueAlreadyEscaped = alreadyEscaped;
093    }
094    
095    /**
096     * Get the language.
097     * @return the language.
098     */
099    public String getLanguage()
100    {
101        return _language;
102    }
103    
104    @Override
105    public String build() throws QuerySyntaxException
106    {
107        Operator operator = getOperator();
108        String value = getValue();
109        
110        StringBuilder query = new StringBuilder();
111                
112        checkStringValue(value);
113        
114        String escapedValue = _valueAlreadyEscaped ? value : escapeStringValue(value, operator);
115        
116        if (operator == Operator.NE)
117        {
118            NotQuery.appendNegation(query);
119        }
120        
121        query.append(getFieldName());
122        
123        if (operator == Operator.SEARCH)
124        {
125            // Test query, unstemmed.
126            query.append("_txt");
127            if (_language != null)
128            {
129                query.append("_").append(_language);
130            }
131            query.append(":(").append(escapedValue).append(')');
132        }
133        else if (operator == Operator.SEARCH_STEMMED)
134        {
135            // Full-text query
136            if (_language == null)
137            {
138                throw new IllegalArgumentException("Cannot build a string query with stemming without language");
139            }
140            query.append("_txt_stemmed_").append(_language).append(":(").append(escapedValue).append(')');
141        }
142        else if (operator == Operator.LIKE)
143        {
144            if (_language != null)
145            {
146                query.append("_").append(_language);
147            }
148            // Wildcard query: run a lower-case search.
149            query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append(')');
150        }
151        else if (operator == Operator.FUZZY)
152        {
153            if (_language != null)
154            {
155                query.append("_").append(_language);
156            }
157            // Run a lower-case fuzzy search with a maximum edit distance of 2 characters.
158            query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append("~2").append(')');
159        }
160        else if (operator == Operator.PHONETIC)
161        {
162            // Full-text query
163            if (_language == null)
164            {
165                throw new IllegalArgumentException("Cannot build a phonetic query without language");
166            }
167            query.append("_phonetic_").append(_language).append(":(").append(escapedValue).append(')');
168        }
169        else if (operator == Operator.NGRAM)
170        {
171            // Full-text query
172            if (_language == null)
173            {
174                throw new IllegalArgumentException("Cannot build a ngram query without language");
175            }
176            query.append("_ngram_").append(_language).append(":(").append(escapedValue).append(')');
177        }
178        else
179        {
180            // Strict string comparison (enumerator value, ID, ...)
181            query.append("_s:").append('"').append(escapedValue).append('"');
182        }
183        
184        return query.toString();
185    }
186    
187    @Override
188    public int hashCode()
189    {
190        return 31 * super.hashCode() + Objects.hash(_language, _valueAlreadyEscaped);
191    }
192
193    @Override
194    public boolean equals(Object obj)
195    {
196        if (!super.equals(obj))
197        {
198            return false;
199        }
200
201        AbstractTextQuery other = (AbstractTextQuery) obj;
202        return Objects.equals(_language, other._language) && Objects.equals(_valueAlreadyEscaped, other._valueAlreadyEscaped);
203    }
204
205    /**
206     * Ensure that the string value is valid (i.e. the parentheses are balanced),
207     * and throw an exception if it isn't.
208     * @param value the string value to check.
209     * @throws QuerySyntaxException if the value is invalid.
210     */
211    public static void checkStringValue(String value) throws QuerySyntaxException
212    {
213        // The parenthesis nesting level.
214        boolean invalid = false;
215        int level = 0;
216        
217        for (int i = 0; i < value.length() && !invalid; i++)
218        {
219            char ch = value.charAt(i);
220            // The current character is escaped if the previous character is a slash
221            // that is not itself escaped (the previous-previous character must not be a slash).
222            boolean escaped = i > 0 && value.charAt(i - 1) == '\\' && !(i > 1 && value.charAt(i - 2) == '\\');
223            
224            if (ch == '(' && !escaped)
225            {
226                level++;
227            }
228            else if (ch == ')' && !escaped)
229            {
230                level--;
231                if (level < 0)
232                {
233                    // More closing than opening parentheses at this point: the value is invalid.
234                    invalid = true;
235                }
236            }
237        }
238        
239        // If the parentheses are balanced, the level is 0 at this point.
240        if (level != 0 || invalid)
241        {
242            String message = "The string search " + value + " is illegal, check the parentheses.";
243            I18nizableText details = new I18nizableText("plugin.cms", "UITOOL_SEARCH_ERROR_QUERY_LABEL", Collections.singletonMap("value", new I18nizableText(value)));
244            
245            throw new QuerySyntaxException(message, details);
246        }
247    }
248    
249    /**
250     * Escape from a string value the characters that can modify the query field
251     * @param value the string value.
252     * @param operator the operator
253     * @return the escaped value.
254     */
255    public static String escapeStringValue(String value, Operator operator)
256    {
257        switch (operator)
258        {
259            case LIKE:
260                // '*' are allowed characters
261                // So escape all characters except '*'
262                return Stream.of(StringUtils.splitByWholeSeparatorPreserveAllTokens(value, "*"))
263                             .map(ClientUtils::escapeQueryChars)
264                             .collect(Collectors.joining("*"));
265            case SEARCH:
266            case SEARCH_STEMMED:
267                // '*' are allowed characters
268                // So escape all characters (except '*' and whitespaces)
269                return QueryHelper.escapeQueryCharsExceptStarsAndWhitespaces(value);
270
271            default:
272                return ClientUtils.escapeQueryChars(value);
273        }
274    }
275}