001/*
002 *  Copyright 2021 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.cms.search.query;
017
018import java.util.Collections;
019import java.util.Objects;
020import java.util.stream.Collectors;
021import java.util.stream.Stream;
022
023import org.apache.commons.lang3.StringUtils;
024import org.apache.solr.client.solrj.util.ClientUtils;
025
026import org.ametys.runtime.i18n.I18nizableText;
027
028/**
029 * Represents a {@link Query} testing a text field.
030 */
031public abstract class AbstractTextQuery extends AbstractOperatorQuery<String>
032{
033    /** The language. */
034    protected String _language;
035    /** <code>true</code> if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}. */
036    protected boolean _valueAlreadyEscaped;
037    
038    /**
039     * Build a AbstractTextQuery testing the existence of the field.
040     * @param fieldPath the field path
041     */
042    public AbstractTextQuery(String fieldPath)
043    {
044        this(fieldPath, Operator.EXISTS, null, null);
045    }
046    
047    /**
048     * Build a text query.
049     * @param fieldPath the field's path
050     * @param value the value.
051     */
052    public AbstractTextQuery(String fieldPath, String value)
053    {
054        this(fieldPath, value, null);
055    }
056    
057    /**
058     * Build a text query.
059     * @param fieldPath the field's path
060     * @param value the value.
061     * @param language the query language (can be null).
062     */
063    public AbstractTextQuery(String fieldPath, String value, String language)
064    {
065        this(fieldPath, Operator.EQ, value, language);
066    }
067    
068    /**
069     * Build a text query.
070     * @param fieldPath the field's path
071     * @param op the operator.
072     * @param value the value.
073     * @param language the query language (can be null).
074     */
075    public AbstractTextQuery(String fieldPath, Operator op, String value, String language)
076    {
077        this(fieldPath, op, value, language, false);
078    }
079    
080    /**
081     * Build a text query.
082     * @param fieldPath the field's path
083     * @param op the operator.
084     * @param value the value.
085     * @param language the query language (can be null).
086     * @param alreadyEscaped true if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}.
087     */
088    public AbstractTextQuery(String fieldPath, Operator op, String value, String language, boolean alreadyEscaped)
089    {
090        super(fieldPath, op, value);
091        _language = language;
092        _valueAlreadyEscaped = alreadyEscaped;
093    }
094    
095    /**
096     * Get the language.
097     * @return the language.
098     */
099    public String getLanguage()
100    {
101        return _language;
102    }
103    
104    @Override
105    public String build() throws QuerySyntaxException
106    {
107        Operator operator = getOperator();
108        String value = getValue();
109        
110        StringBuilder query = new StringBuilder();
111        
112        String escapedValue = _valueAlreadyEscaped ? checkStringValue(value) : escapeStringValue(value, operator);
113        
114        if (operator == Operator.NE)
115        {
116            NotQuery.appendNegation(query);
117        }
118        
119        query.append(getFieldName());
120        
121        if (operator == Operator.SEARCH)
122        {
123            // Test query, unstemmed.
124            query.append("_txt");
125            if (_language != null)
126            {
127                query.append("_").append(_language);
128            }
129            query.append(":(").append(escapedValue).append(')');
130        }
131        else if (operator == Operator.SEARCH_STEMMED)
132        {
133            // Full-text query
134            if (_language == null)
135            {
136                throw new IllegalArgumentException("Cannot build a string query with stemming without language");
137            }
138            query.append("_txt_stemmed_").append(_language).append(":(").append(escapedValue).append(')');
139        }
140        else if (operator == Operator.LIKE)
141        {
142            if (_language != null)
143            {
144                query.append("_").append(_language);
145            }
146            // Wildcard query: run a lower-case search.
147            query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append(')');
148        }
149        else if (operator == Operator.FUZZY)
150        {
151            if (_language != null)
152            {
153                query.append("_").append(_language);
154            }
155            // Run a lower-case fuzzy search with a maximum edit distance of 2 characters.
156            query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append("~2").append(')');
157        }
158        else if (operator == Operator.PHONETIC)
159        {
160            // Full-text query
161            if (_language == null)
162            {
163                throw new IllegalArgumentException("Cannot build a phonetic query without language");
164            }
165            query.append("_phonetic_").append(_language).append(":(").append(escapedValue).append(')');
166        }
167        else if (operator == Operator.NGRAM)
168        {
169            // Full-text query
170            if (_language == null)
171            {
172                throw new IllegalArgumentException("Cannot build a ngram query without language");
173            }
174            query.append("_ngram_").append(_language).append(":(").append(escapedValue).append(')');
175        }
176        else
177        {
178            // Strict string comparison (enumerator value, ID, ...)
179            query.append("_s:").append('"').append(escapedValue).append('"');
180        }
181        
182        return query.toString();
183    }
184    
185    @Override
186    public int hashCode()
187    {
188        return 31 * super.hashCode() + Objects.hash(_language, _valueAlreadyEscaped);
189    }
190
191    @Override
192    public boolean equals(Object obj)
193    {
194        if (!super.equals(obj))
195        {
196            return false;
197        }
198
199        AbstractTextQuery other = (AbstractTextQuery) obj;
200        return Objects.equals(_language, other._language) && Objects.equals(_valueAlreadyEscaped, other._valueAlreadyEscaped);
201    }
202
203    /**
204     * Ensure that the string value is valid (i.e. the parentheses are balanced),
205     * and throw an exception if it isn't.
206     * @param value the string value to check.
207     * @return the value checked
208     * @throws QuerySyntaxException if the value is invalid.
209     */
210    public static String checkStringValue(String value) throws QuerySyntaxException
211    {
212        // The parenthesis nesting level.
213        boolean invalid = false;
214        int level = 0;
215        
216        for (int i = 0; i < value.length() && !invalid; i++)
217        {
218            char ch = value.charAt(i);
219            // The current character is escaped if the previous character is a slash
220            // that is not itself escaped (the previous-previous character must not be a slash).
221            boolean escaped = i > 0 && value.charAt(i - 1) == '\\' && !(i > 1 && value.charAt(i - 2) == '\\');
222            
223            if (ch == '(' && !escaped)
224            {
225                level++;
226            }
227            else if (ch == ')' && !escaped)
228            {
229                level--;
230                if (level < 0)
231                {
232                    // More closing than opening parentheses at this point: the value is invalid.
233                    invalid = true;
234                }
235            }
236        }
237        
238        // If the parentheses are balanced, the level is 0 at this point.
239        if (level != 0 || invalid)
240        {
241            String message = "The string search " + value + " is illegal, check the parentheses.";
242            I18nizableText details = new I18nizableText("plugin.cms", "UITOOL_SEARCH_ERROR_QUERY_LABEL", Collections.singletonMap("value", new I18nizableText(value)));
243            
244            throw new QuerySyntaxException(message, details);
245        }
246        
247        return value;
248    }
249    
250    /**
251     * Escape from a string value the characters that can modify the query field
252     * @param value the string value.
253     * @param operator the operator
254     * @return the escaped value.
255     */
256    public static String escapeStringValue(String value, Operator operator)
257    {
258        switch (operator)
259        {
260            case LIKE:
261                // '*' are allowed characters
262                // So escape all characters except '*'
263                return Stream.of(StringUtils.splitByWholeSeparatorPreserveAllTokens(value, "*"))
264                             .map(ClientUtils::escapeQueryChars)
265                             .collect(Collectors.joining("*"));
266            case SEARCH:
267            case SEARCH_STEMMED:
268                // '*' are allowed characters
269                // So escape all characters (except '*' and whitespaces)
270                return QueryHelper.escapeQueryCharsExceptStarsAndWhitespaces(value);
271
272            default:
273                return ClientUtils.escapeQueryChars(value);
274        }
275    }
276}