001/* 002 * Copyright 2021 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.search.query; 017 018import java.util.Collections; 019import java.util.Objects; 020import java.util.stream.Collectors; 021import java.util.stream.Stream; 022 023import org.apache.commons.lang3.StringUtils; 024import org.apache.solr.client.solrj.util.ClientUtils; 025 026import org.ametys.runtime.i18n.I18nizableText; 027 028/** 029 * Represents a {@link Query} testing a text field. 030 */ 031public abstract class AbstractTextQuery extends AbstractOperatorQuery<String> 032{ 033 /** The language. */ 034 protected String _language; 035 /** <code>true</code> if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}. */ 036 protected boolean _valueAlreadyEscaped; 037 038 /** 039 * Build a AbstractTextQuery testing the existence of the field. 040 * @param fieldPath the field path 041 */ 042 public AbstractTextQuery(String fieldPath) 043 { 044 this(fieldPath, Operator.EXISTS, null, null); 045 } 046 047 /** 048 * Build a text query. 049 * @param fieldPath the field's path 050 * @param value the value. 051 */ 052 public AbstractTextQuery(String fieldPath, String value) 053 { 054 this(fieldPath, value, null); 055 } 056 057 /** 058 * Build a text query. 059 * @param fieldPath the field's path 060 * @param value the value. 061 * @param language the query language (can be null). 062 */ 063 public AbstractTextQuery(String fieldPath, String value, String language) 064 { 065 this(fieldPath, Operator.EQ, value, language); 066 } 067 068 /** 069 * Build a text query. 070 * @param fieldPath the field's path 071 * @param op the operator. 072 * @param value the value. 073 * @param language the query language (can be null). 074 */ 075 public AbstractTextQuery(String fieldPath, Operator op, String value, String language) 076 { 077 this(fieldPath, op, value, language, false); 078 } 079 080 /** 081 * Build a text query. 082 * @param fieldPath the field's path 083 * @param op the operator. 084 * @param value the value. 085 * @param language the query language (can be null). 086 * @param alreadyEscaped true if the value is already escaped and there is no need to escape again the value during {@link #build() the build of the query}. 087 */ 088 public AbstractTextQuery(String fieldPath, Operator op, String value, String language, boolean alreadyEscaped) 089 { 090 super(fieldPath, op, value); 091 _language = language; 092 _valueAlreadyEscaped = alreadyEscaped; 093 } 094 095 /** 096 * Get the language. 097 * @return the language. 098 */ 099 public String getLanguage() 100 { 101 return _language; 102 } 103 104 @Override 105 public String build() throws QuerySyntaxException 106 { 107 Operator operator = getOperator(); 108 String value = getValue(); 109 110 StringBuilder query = new StringBuilder(); 111 112 checkStringValue(value); 113 114 String escapedValue = _valueAlreadyEscaped ? value : escapeStringValue(value, operator); 115 116 if (operator == Operator.NE) 117 { 118 NotQuery.appendNegation(query); 119 } 120 121 query.append(getFieldName()); 122 123 if (operator == Operator.SEARCH) 124 { 125 // Test query, unstemmed. 126 query.append("_txt"); 127 if (_language != null) 128 { 129 query.append("_").append(_language); 130 } 131 query.append(":(").append(escapedValue).append(')'); 132 } 133 else if (operator == Operator.SEARCH_STEMMED) 134 { 135 // Full-text query 136 if (_language == null) 137 { 138 throw new IllegalArgumentException("Cannot build a string query with stemming without language"); 139 } 140 query.append("_txt_stemmed_").append(_language).append(":(").append(escapedValue).append(')'); 141 } 142 else if (operator == Operator.LIKE) 143 { 144 if (_language != null) 145 { 146 query.append("_").append(_language); 147 } 148 // Wildcard query: run a lower-case search. 149 query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append(')'); 150 } 151 else if (operator == Operator.FUZZY) 152 { 153 if (_language != null) 154 { 155 query.append("_").append(_language); 156 } 157 // Run a lower-case fuzzy search with a maximum edit distance of 2 characters. 158 query.append("_s_lower:").append('(').append(escapedValue.toLowerCase()).append("~2").append(')'); 159 } 160 else if (operator == Operator.PHONETIC) 161 { 162 // Full-text query 163 if (_language == null) 164 { 165 throw new IllegalArgumentException("Cannot build a phonetic query without language"); 166 } 167 query.append("_phonetic_").append(_language).append(":(").append(escapedValue).append(')'); 168 } 169 else if (operator == Operator.NGRAM) 170 { 171 // Full-text query 172 if (_language == null) 173 { 174 throw new IllegalArgumentException("Cannot build a ngram query without language"); 175 } 176 query.append("_ngram_").append(_language).append(":(").append(escapedValue).append(')'); 177 } 178 else 179 { 180 // Strict string comparison (enumerator value, ID, ...) 181 query.append("_s:").append('"').append(escapedValue).append('"'); 182 } 183 184 return query.toString(); 185 } 186 187 @Override 188 public int hashCode() 189 { 190 return 31 * super.hashCode() + Objects.hash(_language, _valueAlreadyEscaped); 191 } 192 193 @Override 194 public boolean equals(Object obj) 195 { 196 if (!super.equals(obj)) 197 { 198 return false; 199 } 200 201 AbstractTextQuery other = (AbstractTextQuery) obj; 202 return Objects.equals(_language, other._language) && Objects.equals(_valueAlreadyEscaped, other._valueAlreadyEscaped); 203 } 204 205 /** 206 * Ensure that the string value is valid (i.e. the parentheses are balanced), 207 * and throw an exception if it isn't. 208 * @param value the string value to check. 209 * @throws QuerySyntaxException if the value is invalid. 210 */ 211 public static void checkStringValue(String value) throws QuerySyntaxException 212 { 213 // The parenthesis nesting level. 214 boolean invalid = false; 215 int level = 0; 216 217 for (int i = 0; i < value.length() && !invalid; i++) 218 { 219 char ch = value.charAt(i); 220 // The current character is escaped if the previous character is a slash 221 // that is not itself escaped (the previous-previous character must not be a slash). 222 boolean escaped = i > 0 && value.charAt(i - 1) == '\\' && !(i > 1 && value.charAt(i - 2) == '\\'); 223 224 if (ch == '(' && !escaped) 225 { 226 level++; 227 } 228 else if (ch == ')' && !escaped) 229 { 230 level--; 231 if (level < 0) 232 { 233 // More closing than opening parentheses at this point: the value is invalid. 234 invalid = true; 235 } 236 } 237 } 238 239 // If the parentheses are balanced, the level is 0 at this point. 240 if (level != 0 || invalid) 241 { 242 String message = "The string search " + value + " is illegal, check the parentheses."; 243 I18nizableText details = new I18nizableText("plugin.cms", "UITOOL_SEARCH_ERROR_QUERY_LABEL", Collections.singletonMap("value", new I18nizableText(value))); 244 245 throw new QuerySyntaxException(message, details); 246 } 247 } 248 249 /** 250 * Escape from a string value the characters that can modify the query field 251 * @param value the string value. 252 * @param operator the operator 253 * @return the escaped value. 254 */ 255 public static String escapeStringValue(String value, Operator operator) 256 { 257 switch (operator) 258 { 259 case LIKE: 260 // '*' are allowed characters 261 // So escape all characters except '*' 262 return Stream.of(StringUtils.splitByWholeSeparatorPreserveAllTokens(value, "*")) 263 .map(ClientUtils::escapeQueryChars) 264 .collect(Collectors.joining("*")); 265 case SEARCH: 266 case SEARCH_STEMMED: 267 // '*' are allowed characters 268 // So escape all characters (except '*' and whitespaces) 269 return QueryHelper.escapeQueryCharsExceptStarsAndWhitespaces(value); 270 271 default: 272 return ClientUtils.escapeQueryChars(value); 273 } 274 } 275}