001/* 002 * Copyright 2017 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.extraction.component; 017 018import java.util.ArrayList; 019import java.util.Arrays; 020import java.util.Collection; 021import java.util.Collections; 022import java.util.HashSet; 023import java.util.LinkedHashMap; 024import java.util.LinkedHashSet; 025import java.util.List; 026import java.util.Locale; 027import java.util.Map; 028import java.util.Set; 029import java.util.regex.Matcher; 030import java.util.regex.Pattern; 031 032import org.apache.avalon.framework.configuration.Configuration; 033import org.apache.avalon.framework.configuration.ConfigurationException; 034import org.apache.avalon.framework.service.ServiceException; 035import org.apache.avalon.framework.service.ServiceManager; 036import org.xml.sax.ContentHandler; 037 038import org.ametys.cms.content.ContentHelper; 039import org.ametys.cms.contenttype.ContentConstants; 040import org.ametys.cms.contenttype.ContentType; 041import org.ametys.cms.contenttype.MetadataDefinition; 042import org.ametys.cms.contenttype.MetadataType; 043import org.ametys.cms.repository.Content; 044import org.ametys.cms.search.GetQueryFromJSONHelper; 045import org.ametys.cms.search.content.ContentSearcherFactory; 046import org.ametys.cms.search.content.ContentSearcherFactory.SimpleContentSearcher; 047import org.ametys.cms.search.model.SystemProperty; 048import org.ametys.cms.search.model.SystemPropertyExtensionPoint; 049import org.ametys.cms.search.query.QuerySyntaxException; 050import org.ametys.cms.search.query.StringQuery; 051import org.ametys.cms.search.query.Query.Operator; 052import org.ametys.cms.search.ui.model.SearchUIModel; 053import org.ametys.core.util.JSONUtils; 054import org.ametys.core.util.StringUtils; 055import org.ametys.plugins.extraction.execution.ExtractionExecutionContext; 056import org.ametys.plugins.extraction.execution.ExtractionExecutionContextHierarchyElement; 057import org.ametys.plugins.queriesdirectory.Query; 058import org.ametys.plugins.repository.AmetysObjectIterable; 059import org.ametys.plugins.repository.AmetysObjectResolver; 060import org.ametys.plugins.thesaurus.content.ThesaurusItemContentType; 061 062/** 063 * This class represents an extraction component with a solr query 064 */ 065public abstract class AbstractSolrExtractionComponent extends AbstractExtractionComponent 066{ 067 /** 068 * Regex used to extract variables from a join expression: \$\{(\.\.(?:\/\.\.)*(?:\/[^\/}]+)?)\} 069 * a variable is inside a ${} 070 * variable starts with .. (to get the direct parent), 071 * has several /.. (to get parent of parent of (...)) 072 * and can have a /metadataName (to specify the metadata to join on) 073 */ 074 private static final String EXTRACT_JOIN_VARIABLES_REGEX = "\\$\\{(\\.\\.(?:\\/\\.\\.)*(?:\\/[^\\/}]+)?)\\}"; 075 076 /** Content types concerned by the solr search */ 077 protected Set<String> _contentTypes = new HashSet<>(); 078 079 /** Reference id of a recorded query */ 080 protected String _queryReferenceId; 081 082 /** The list of clauses */ 083 protected List<ExtractionClause> _clauses = new ArrayList<>(); 084 085 /** Helper to resolve referenced query infos */ 086 protected GetQueryFromJSONHelper _getQueryFromJSONHelper; 087 088 /** Util class to manipulate JSON String */ 089 protected JSONUtils _jsonUtils; 090 091 private AmetysObjectResolver _resolver; 092 private SystemPropertyExtensionPoint _systemPropertyExtensionPoint; 093 private ContentHelper _contentHelper; 094 private ContentSearcherFactory _contentSearcherFactory; 095 096 @Override 097 public void service(ServiceManager serviceManager) throws ServiceException 098 { 099 super.service(serviceManager); 100 _jsonUtils = (JSONUtils) serviceManager.lookup(JSONUtils.ROLE); 101 _getQueryFromJSONHelper = (GetQueryFromJSONHelper) serviceManager.lookup(GetQueryFromJSONHelper.ROLE); 102 _resolver = (AmetysObjectResolver) serviceManager.lookup(AmetysObjectResolver.ROLE); 103 _systemPropertyExtensionPoint = (SystemPropertyExtensionPoint) serviceManager.lookup(SystemPropertyExtensionPoint.ROLE); 104 _contentHelper = (ContentHelper) serviceManager.lookup(ContentHelper.ROLE); 105 _contentSearcherFactory = (ContentSearcherFactory) serviceManager.lookup(ContentSearcherFactory.ROLE); 106 } 107 108 @Override 109 public void configure(Configuration configuration) throws ConfigurationException 110 { 111 super.configure(configuration); 112 113 Configuration clauses = configuration.getChild("clauses"); 114 for (Configuration clause : clauses.getChildren("clause")) 115 { 116 addClauses(clause.getValue()); 117 } 118 119 _contentTypes = new HashSet<>(); 120 if (Arrays.asList(configuration.getAttributeNames()).contains("ref")) 121 { 122 if (Arrays.asList(configuration.getAttributeNames()).contains("contentTypes")) 123 { 124 throw new IllegalArgumentException(getLogsPrefix() + "a component with a query reference should not specify a content type"); 125 } 126 127 _queryReferenceId = configuration.getAttribute("ref"); 128 } 129 else 130 { 131 String contentTypesString = configuration.getAttribute("contentTypes"); 132 _contentTypes.addAll(StringUtils.stringToCollection(contentTypesString)); 133 } 134 } 135 136 @Override 137 public void prepareComponentExecution(ExtractionExecutionContext context) throws Exception 138 { 139 super.prepareComponentExecution(context); 140 141 if (_queryReferenceId != null && !_queryReferenceId.isEmpty()) 142 { 143 Query referencedQuery = _resolver.resolveById(_queryReferenceId); 144 computeReferencedQueryInfos(referencedQuery.getContent()); 145 } 146 147 _computeClausesInfos(context); 148 } 149 150 /** 151 * Manages the stored query referenced by the component 152 * @param refQueryContent referenced query content 153 * @throws QuerySyntaxException if there is a syntax error in the referenced query 154 */ 155 @SuppressWarnings("unchecked") 156 protected void computeReferencedQueryInfos(String refQueryContent) throws QuerySyntaxException 157 { 158 Map<String, Object> contentMap = _jsonUtils.convertJsonToMap(refQueryContent); 159 Map<String, Object> exportParams = (Map<String, Object>) contentMap.get("exportParams"); 160 String modelId = (String) exportParams.get("model"); 161 162 String q; 163 if (modelId.contains("solr")) 164 { 165 Map<String, Object> values = (Map<String, Object>) exportParams.get("values"); 166 q = (String) values.get("query"); 167 168 _contentTypes = new HashSet<>((List<String>) values.get("contentTypes")); 169 } 170 else 171 { 172 SearchUIModel model = _getQueryFromJSONHelper.getSearchUIModel(exportParams); 173 List<String> contentTypesToFill = new ArrayList<>(); 174 org.ametys.cms.search.query.Query query = _getQueryFromJSONHelper.getQueryFromModel(model, exportParams, contentTypesToFill); 175 176 q = query.build(); 177 _contentTypes = new HashSet<>(contentTypesToFill); 178 } 179 180 ExtractionClause clause = new ExtractionClause(); 181 clause.setExpression(q); 182 _clauses.add(0, clause); 183 } 184 185 private void _computeClausesInfos(ExtractionExecutionContext context) 186 { 187 for (ExtractionClause clause : _clauses) 188 { 189 String clauseExpression = clause.getExpression(); 190 clause.setExpression(_resolveExpression(clauseExpression, context.getClauseVariables())); 191 192 Map<String, String> groupExpressions = _extractGroupExpressionsFromClause(clauseExpression); 193 if (!groupExpressions.isEmpty()) 194 { 195 if (_hasVariablesOutsideGroups(clauseExpression, groupExpressions.keySet())) 196 { 197 throw new IllegalArgumentException(getLogsPrefix() + "if there's at least one group, every variable should be in a group."); 198 } 199 } 200 else 201 { 202 // The only group is the entire expression 203 // The complete expression is the same as the classic one (there is no characters used to identify the group) 204 groupExpressions.put(clauseExpression, clauseExpression); 205 } 206 207 for (Map.Entry<String, String> groupExpression : groupExpressions.entrySet()) 208 { 209 ExtractionClauseGroup group = new ExtractionClauseGroup(); 210 211 group.setCompleteExpression(groupExpression.getKey()); 212 group.setExpression(groupExpression.getValue()); 213 214 Set<String> variables = new HashSet<>(_extractVariableFromClauseExpression(groupExpression.getValue())); 215 if (!variables.isEmpty()) 216 { 217 if (variables.size() > 1) 218 { 219 throw new IllegalArgumentException(getLogsPrefix() + "only variables with same name are allowed within a single group"); 220 } 221 222 for (String variable : variables) 223 { 224 String[] pathSegments = variable.split(JOIN_HIERARCHY_SEPARATOR); 225 String fieldPath = pathSegments[pathSegments.length - 1]; 226 227 group.setVariable(variable); 228 group.setFieldPath(fieldPath); 229 } 230 } 231 232 clause.addGroup(group); 233 } 234 } 235 } 236 237 private String _resolveExpression(String expression, Map<String, String> queryVariables) 238 { 239 String resolvedExpression = expression; 240 for (Map.Entry<String, String> entry : queryVariables.entrySet()) 241 { 242 String variableName = entry.getKey(); 243 String contentId = entry.getValue(); 244 String escapedContentId = StringQuery.escapeStringValue(contentId, Operator.EQ); 245 resolvedExpression = resolvedExpression.replace("${" + variableName + "}", escapedContentId); 246 } 247 248 return resolvedExpression; 249 } 250 251 private boolean _hasVariablesOutsideGroups(String clauseExpression, Collection<String> groupExpressions) 252 { 253 List<String> variablesInClause = _extractVariableFromClauseExpression(clauseExpression); 254 List<String> variablesInGroups = new ArrayList<>(); 255 for (String groupExpression : groupExpressions) 256 { 257 variablesInGroups.addAll(_extractVariableFromClauseExpression(groupExpression)); 258 } 259 return variablesInClause.size() > variablesInGroups.size(); 260 } 261 262 Map<String, String> _extractGroupExpressionsFromClause(String expression) 263 { 264 Map<String, String> groupExpressions = new LinkedHashMap<>(); 265 int indexOfGroup = expression.indexOf("#{"); 266 while (indexOfGroup != -1) 267 { 268 StringBuilder currentGroup = new StringBuilder(); 269 int endIndex = indexOfGroup; 270 int braceLevel = 0; 271 for (int i = indexOfGroup + 2; i < expression.length(); i++) 272 { 273 endIndex = i; 274 char currentChar = expression.charAt(i); 275 if ('{' == currentChar) 276 { 277 braceLevel++; 278 } 279 else if ('}' == currentChar) 280 { 281 if (0 == braceLevel) 282 { 283 groupExpressions.put("#{" + currentGroup.toString() + "}", currentGroup.toString()); 284 break; 285 } 286 braceLevel--; 287 } 288 currentGroup.append(currentChar); 289 } 290 291 indexOfGroup = expression.indexOf("#{", endIndex); 292 } 293 return groupExpressions; 294 } 295 296 List<String> _extractVariableFromClauseExpression(String expression) 297 { 298 List<String> variables = new ArrayList<>(); 299 300 Pattern pattern = Pattern.compile(EXTRACT_JOIN_VARIABLES_REGEX); 301 Matcher matcher = pattern.matcher(expression); 302 303 while (matcher.find()) 304 { 305 variables.add(matcher.group(1)); 306 } 307 308 return variables; 309 } 310 311 @Override 312 public void executeComponent(ContentHandler contentHandler, ExtractionExecutionContext context) throws Exception 313 { 314 List<String> clauseQueries = _getClauseQueries(context); 315 316 if (clauseQueries != null) 317 { 318 AmetysObjectIterable<Content> contents = getContentSearcher().withFilterQueryStrings(clauseQueries).setCheckRights(false).search("*:*"); 319 processContents(contents, contentHandler, context); 320 } 321 } 322 323 List<String> _getClauseQueries(ExtractionExecutionContext context) 324 { 325 List<String> clauseQueries = new ArrayList<>(); 326 327 for (ExtractionClause clause : _clauses) 328 { 329 String expression = clause.getExpression(); 330 331 for (ExtractionClauseGroup group : clause.getGroups()) 332 { 333 String variable = group.getVariable(); 334 335 if (variable != null && !variable.isEmpty()) 336 { 337 ExtractionExecutionContextHierarchyElement currentContextHierarchyElement = _getCurrentContextElementFromVariable(variable, context.getHierarchyElements()); 338 339 String fieldPath = group.getFieldPath(); 340 341 ExtractionComponent contextComponent = currentContextHierarchyElement.getComponent(); 342 MetadataType metadataType = _getMetadataType(fieldPath, contextComponent.getContentTypes()); 343 Collection<Object> values = _getValuesFromVariable(fieldPath, metadataType, currentContextHierarchyElement, context.getDefaultLocale()); 344 345 if (values.isEmpty()) 346 { 347 getLogger().warn(getLogsPrefix() + "no value found for field '" + fieldPath + "'. The query of this component can't be achieved"); 348 return null; 349 } 350 351 Collection<String> groupExpressions = new ArrayList<>(); 352 for (Object value : values) 353 { 354 String valueAsString = _getValueAsString(value, metadataType, fieldPath); 355 groupExpressions.add(group.getExpression().replace("${" + variable + "}", valueAsString)); 356 } 357 358 String groupReplacement = org.apache.commons.lang3.StringUtils.join(groupExpressions, " OR "); 359 expression = expression.replace(group.getCompleteExpression(), "(" + groupReplacement + ")"); 360 } 361 } 362 363 clauseQueries.add(expression); 364 } 365 366 return clauseQueries; 367 } 368 369 private ExtractionExecutionContextHierarchyElement _getCurrentContextElementFromVariable(String variable, List<ExtractionExecutionContextHierarchyElement> context) 370 { 371 int lastIndexOfSlash = variable.lastIndexOf(JOIN_HIERARCHY_SEPARATOR); 372 int indexOfCurrentContext = -1; 373 if (lastIndexOfSlash == -1) 374 { 375 indexOfCurrentContext = context.size() - 1; 376 } 377 else 378 { 379 int hierarchicalLevel = (lastIndexOfSlash + 1) / 3; 380 indexOfCurrentContext = context.size() - hierarchicalLevel; 381 if (variable.endsWith(JOIN_HIERARCHY_ELEMENT)) 382 { 383 indexOfCurrentContext--; 384 } 385 } 386 if (indexOfCurrentContext < 0 || indexOfCurrentContext >= context.size()) 387 { 388 throw new IllegalArgumentException(getLogsPrefix() + "join on '" + variable + "' does not refer to an existing parent"); 389 } 390 return context.get(indexOfCurrentContext); 391 } 392 393 /** 394 * Retrieves the field path's metadata type from content types 395 * @param fieldPath the field path 396 * @param contentTypes the content types 397 * @return the metadata type 398 */ 399 protected MetadataType _getMetadataType(String fieldPath, Collection<String> contentTypes) 400 { 401 // Manage direct content references 402 if (JOIN_HIERARCHY_ELEMENT.equals(fieldPath)) 403 { 404 return MetadataType.CONTENT; 405 } 406 407 // Manage System Properties 408 String[] pathSegments = fieldPath.split(EXTRACTION_METADATA_PATH_SEPARATOR); 409 String propertyName = pathSegments[pathSegments.length - 1]; 410 if (_systemPropertyExtensionPoint.hasExtension(propertyName)) 411 { 412 SystemProperty systemProperty = _systemPropertyExtensionPoint.getExtension(propertyName); 413 return systemProperty.getType(); 414 } 415 416 // Get content types common ancestor 417 ContentType contentTypesAncestor = null; 418 String contentTypesAncestorId = _contentTypesHelper.getCommonAncestor(contentTypes); 419 if (contentTypesAncestorId != null && _contentTypeExtensionPoint.hasExtension(contentTypesAncestorId)) 420 { 421 contentTypesAncestor = _contentTypeExtensionPoint.getExtension(contentTypesAncestorId); 422 } 423 424 // Manage metadata 425 if (contentTypesAncestor != null) 426 { 427 String fieldPathWthClassicSeparator = fieldPath.replaceAll(EXTRACTION_METADATA_PATH_SEPARATOR, ContentConstants.METADATA_PATH_SEPARATOR); 428 MetadataDefinition definition = _contentTypesHelper.getMetadataDefinition(fieldPathWthClassicSeparator, contentTypesAncestor); 429 if (definition != null) 430 { 431 return definition.getType(); 432 } 433 throw new IllegalArgumentException(getLogsPrefix() + "join on '" + fieldPath + "'. This metadata is not available for '" + contentTypesAncestor.getId() + "' content type"); 434 } 435 436 throw new IllegalArgumentException(getLogsPrefix() + "join on '" + fieldPath + "'. This metadata is not available"); 437 } 438 439 private Collection<Object> _getValuesFromVariable(String fieldPath, MetadataType metadataType, ExtractionExecutionContextHierarchyElement contextHierarchyElement, Locale defaultLocale) 440 { 441 Collection<Object> values = new LinkedHashSet<>(); 442 443 Iterable<Content> contents = contextHierarchyElement.getContents(); 444 for (Content content: contents) 445 { 446 boolean isAutoposting = contextHierarchyElement.isAutoposting(); 447 Collection<Object> contentValues = _getContentValuesFromVariable(content, fieldPath, metadataType, isAutoposting, defaultLocale); 448 values.addAll(contentValues); 449 } 450 451 return values; 452 } 453 454 private Collection<Object> _getContentValuesFromVariable(Content content, String fieldPath, MetadataType metadataType, boolean isAutoposting, Locale defaultLocale) 455 { 456 Collection<Object> values = new LinkedHashSet<>(); 457 458 Object value = _getContentValue(content, fieldPath, defaultLocale); 459 if (value == null) 460 { 461 return Collections.emptyList(); 462 } 463 464 if (value instanceof Collection<?>) 465 { 466 values.addAll((Collection<?>) value); 467 } 468 else 469 { 470 values.add(value); 471 } 472 473 Collection<Object> result = new LinkedHashSet<>(values); 474 475 if (isAutoposting) 476 { 477 switch (metadataType) 478 { 479 case CONTENT: 480 for (Object object : values) 481 { 482 Content parent = (Content) object; 483 484 // Manage autoposting only if the current value is a thesaurus term 485 if (_contentTypesHelper.isInstanceOf(parent, ThesaurusItemContentType.TERM_CONTENT_TYPE_ID)) 486 { 487 AmetysObjectIterable<Content> chidren = _thesaurusDAO.getChildTerms(parent.getId()); 488 for (Content child : chidren) 489 { 490 Collection<Object> childValues = _getContentValuesFromVariable(child, JOIN_HIERARCHY_ELEMENT, metadataType, isAutoposting, defaultLocale); 491 result.addAll(childValues); 492 } 493 } 494 } 495 496 break; 497 default: 498 break; 499 } 500 } 501 502 return result; 503 } 504 505 private Object _getContentValue(Content content, String fieldPath, Locale defaultLocale) 506 { 507 if (JOIN_HIERARCHY_ELEMENT.equals(fieldPath)) 508 { 509 return content; 510 } 511 else 512 { 513 String fieldPathWthClassicSeparator = fieldPath.replaceAll(EXTRACTION_METADATA_PATH_SEPARATOR, ContentConstants.METADATA_PATH_SEPARATOR); 514 return _contentHelper.getValue(content, fieldPathWthClassicSeparator, defaultLocale, true); 515 } 516 } 517 518 private String _getValueAsString(Object value, MetadataType metadataType, String fieldPath) 519 { 520 String valueAsString; 521 switch (metadataType) 522 { 523 case STRING: 524 case LONG: 525 case DOUBLE: 526 case BOOLEAN: 527 valueAsString = value.toString(); 528 break; 529 case CONTENT: 530 valueAsString = ((Content) value).getId(); 531 break; 532 default: 533 throw new IllegalArgumentException(getLogsPrefix() + "join on '" + fieldPath + "'. Metadata type '" + metadataType + "' is not supported by extraction module"); 534 } 535 536 return StringQuery.escapeStringValue(valueAsString, Operator.EQ); 537 } 538 539 /** 540 * Retrieves the content searcher to use for solr search 541 * @return the content searcher 542 */ 543 protected SimpleContentSearcher getContentSearcher() 544 { 545 return _contentSearcherFactory.create(_contentTypes); 546 } 547 548 /** 549 * Process result contents to format the result document 550 * @param contents search results 551 * @param contentHandler result document 552 * @param context component execution context 553 * @throws Exception if an error occurs 554 */ 555 protected abstract void processContents(AmetysObjectIterable<Content> contents, ContentHandler contentHandler, ExtractionExecutionContext context) throws Exception; 556 557 @Override 558 public Map<String, Object> getComponentDetailsForTree() 559 { 560 Map<String, Object> details = super.getComponentDetailsForTree(); 561 562 @SuppressWarnings("unchecked") 563 Map<String, Object> data = (Map<String, Object>) details.get("data"); 564 565 List<String> clauses = new ArrayList<>(); 566 for (ExtractionClause clause : this.getClauses()) 567 { 568 clauses.add(clause.getExpression()); 569 } 570 data.put("clauses", clauses); 571 572 data.put("useQueryRef", org.apache.commons.lang.StringUtils.isNotEmpty(_queryReferenceId)); 573 data.put("contentTypes", this.getContentTypes()); 574 data.put("queryReferenceId", this.getQueryReferenceId()); 575 576 return details; 577 } 578 579 public Set<String> getContentTypes() 580 { 581 return _contentTypes; 582 } 583 584 /** 585 * Add content types to component 586 * @param contentTypes Array of content types to add 587 */ 588 public void addContentTypes(String... contentTypes) 589 { 590 _contentTypes.addAll(Arrays.asList(contentTypes)); 591 } 592 593 /** 594 * Retrieves the id of the referenced query 595 * @return the id of the referenced query 596 */ 597 public String getQueryReferenceId() 598 { 599 return _queryReferenceId; 600 } 601 602 /** 603 * Sets the id of the referenced query 604 * @param queryReferenceId The id of the referenced query to set 605 */ 606 public void setQueryReferenceId(String queryReferenceId) 607 { 608 _queryReferenceId = queryReferenceId; 609 } 610 611 /** 612 * Retrieves the component clauses 613 * @return the component clauses 614 */ 615 public List<ExtractionClause> getClauses() 616 { 617 return _clauses; 618 } 619 620 /** 621 * Add clauses to the component. Do not manage clauses' groups 622 * @param expressions Array clauses expressions to add 623 */ 624 public void addClauses(String... expressions) 625 { 626 for (String expression : expressions) 627 { 628 ExtractionClause clause = new ExtractionClause(); 629 clause.setExpression(expression); 630 _clauses.add(clause); 631 } 632 } 633}