001/* 002 * Copyright 2021 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.search.query; 017 018import java.util.Collections; 019import java.util.Objects; 020import java.util.stream.Collectors; 021import java.util.stream.Stream; 022 023import org.apache.commons.lang3.StringUtils; 024import org.apache.solr.client.solrj.util.ClientUtils; 025 026import org.ametys.runtime.i18n.I18nizableText; 027 028/** 029 * Represents a {@link Query} testing a text field. 030 */ 031public abstract class AbstractTextQuery extends AbstractOperatorQuery<String> 032{ 033 /** The language. */ 034 protected String _language; 035 /** <code>true</code> if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}. */ 036 protected boolean _valueAlreadyEscaped; 037 038 /** 039 * Build a AbstractTextQuery testing the existence of the field. 040 * @param fieldPath the field path 041 */ 042 public AbstractTextQuery(String fieldPath) 043 { 044 this(fieldPath, Operator.EXISTS, null, null); 045 } 046 047 /** 048 * Build a text query. 049 * @param fieldPath the field's path 050 * @param value the value. 051 */ 052 public AbstractTextQuery(String fieldPath, String value) 053 { 054 this(fieldPath, value, null); 055 } 056 057 /** 058 * Build a text query. 059 * @param fieldPath the field's path 060 * @param value the value. 061 * @param language the query language (can be null). 062 */ 063 public AbstractTextQuery(String fieldPath, String value, String language) 064 { 065 this(fieldPath, Operator.EQ, value, language); 066 } 067 068 /** 069 * Build a text query. 070 * @param fieldPath the field's path 071 * @param op the operator. 072 * @param value the value. 073 * @param language the query language (can be null). 074 */ 075 public AbstractTextQuery(String fieldPath, Operator op, String value, String language) 076 { 077 this(fieldPath, op, value, language, false); 078 } 079 080 /** 081 * Build a text query. 082 * @param fieldPath the field's path 083 * @param op the operator. 084 * @param value the value. 085 * @param language the query language (can be null). 086 * @param alreadyEscaped true if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}. 087 */ 088 public AbstractTextQuery(String fieldPath, Operator op, String value, String language, boolean alreadyEscaped) 089 { 090 super(fieldPath, op, value); 091 _language = language; 092 _valueAlreadyEscaped = alreadyEscaped; 093 } 094 095 /** 096 * Get the language. 097 * @return the language. 098 */ 099 public String getLanguage() 100 { 101 return _language; 102 } 103 104 @Override 105 public String build() throws QuerySyntaxException 106 { 107 Operator operator = getOperator(); 108 String value = getValue(); 109 110 StringBuilder query = new StringBuilder(); 111 112 String escapedValue = _valueAlreadyEscaped ? checkStringValue(value) : escapeStringValue(value, operator); 113 114 if (operator == Operator.NE) 115 { 116 NotQuery.appendNegation(query); 117 } 118 119 query.append(getFieldName()); 120 121 if (operator == Operator.SEARCH) 122 { 123 // Test query, unstemmed. 124 query.append("_txt"); 125 if (_language != null) 126 { 127 query.append("_").append(_language); 128 } 129 query.append(":(").append(escapedValue).append(')'); 130 } 131 else if (operator == Operator.SEARCH_STEMMED) 132 { 133 // Full-text query 134 if (_language == null) 135 { 136 throw new IllegalArgumentException("Cannot build a string query with stemming without language"); 137 } 138 query.append("_txt_stemmed_").append(_language).append(":(").append(escapedValue).append(')'); 139 } 140 else if (operator == Operator.LIKE) 141 { 142 if (_language != null) 143 { 144 query.append("_").append(_language); 145 } 146 // Wildcard query: run a lower-case search. 147 query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append(')'); 148 } 149 else if (operator == Operator.FUZZY) 150 { 151 if (_language != null) 152 { 153 query.append("_").append(_language); 154 } 155 // Run a lower-case fuzzy search with a maximum edit distance of 2 characters. 156 query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append("~2").append(')'); 157 } 158 else if (operator == Operator.PHONETIC) 159 { 160 // Full-text query 161 if (_language == null) 162 { 163 throw new IllegalArgumentException("Cannot build a phonetic query without language"); 164 } 165 query.append("_phonetic_").append(_language).append(":(").append(escapedValue).append(')'); 166 } 167 else if (operator == Operator.NGRAM) 168 { 169 // Full-text query 170 if (_language == null) 171 { 172 throw new IllegalArgumentException("Cannot build a ngram query without language"); 173 } 174 query.append("_ngram_").append(_language).append(":(").append(escapedValue).append(')'); 175 } 176 else 177 { 178 // Strict string comparison (enumerator value, ID, ...) 179 query.append("_s:").append('"').append(escapedValue).append('"'); 180 } 181 182 return query.toString(); 183 } 184 185 @Override 186 public int hashCode() 187 { 188 return 31 * super.hashCode() + Objects.hash(_language, _valueAlreadyEscaped); 189 } 190 191 @Override 192 public boolean equals(Object obj) 193 { 194 if (!super.equals(obj)) 195 { 196 return false; 197 } 198 199 AbstractTextQuery other = (AbstractTextQuery) obj; 200 return Objects.equals(_language, other._language) && Objects.equals(_valueAlreadyEscaped, other._valueAlreadyEscaped); 201 } 202 203 /** 204 * Ensure that the string value is valid (i.e. the parentheses are balanced), 205 * and throw an exception if it isn't. 206 * @param value the string value to check. 207 * @return the value checked 208 * @throws QuerySyntaxException if the value is invalid. 209 */ 210 public static String checkStringValue(String value) throws QuerySyntaxException 211 { 212 // The parenthesis nesting level. 213 boolean invalid = false; 214 int level = 0; 215 216 for (int i = 0; i < value.length() && !invalid; i++) 217 { 218 char ch = value.charAt(i); 219 // The current character is escaped if the previous character is a slash 220 // that is not itself escaped (the previous-previous character must not be a slash). 221 boolean escaped = i > 0 && value.charAt(i - 1) == '\\' && !(i > 1 && value.charAt(i - 2) == '\\'); 222 223 if (ch == '(' && !escaped) 224 { 225 level++; 226 } 227 else if (ch == ')' && !escaped) 228 { 229 level--; 230 if (level < 0) 231 { 232 // More closing than opening parentheses at this point: the value is invalid. 233 invalid = true; 234 } 235 } 236 } 237 238 // If the parentheses are balanced, the level is 0 at this point. 239 if (level != 0 || invalid) 240 { 241 String message = "The string search " + value + " is illegal, check the parentheses."; 242 I18nizableText details = new I18nizableText("plugin.cms", "UITOOL_SEARCH_ERROR_QUERY_LABEL", Collections.singletonMap("value", new I18nizableText(value))); 243 244 throw new QuerySyntaxException(message, details); 245 } 246 247 return value; 248 } 249 250 /** 251 * Escape from a string value the characters that can modify the query field 252 * @param value the string value. 253 * @param operator the operator 254 * @return the escaped value. 255 */ 256 public static String escapeStringValue(String value, Operator operator) 257 { 258 switch (operator) 259 { 260 case LIKE: 261 // '*' are allowed characters 262 // So escape all characters except '*' 263 return Stream.of(StringUtils.splitByWholeSeparatorPreserveAllTokens(value, "*")) 264 .map(ClientUtils::escapeQueryChars) 265 .collect(Collectors.joining("*")); 266 case SEARCH: 267 case SEARCH_STEMMED: 268 // '*' are allowed characters 269 // So escape all characters (except '*' and whitespaces) 270 return QueryHelper.escapeQueryCharsExceptStarsAndWhitespaces(value); 271 272 default: 273 return ClientUtils.escapeQueryChars(value); 274 } 275 } 276}