001/* 002 * Copyright 2018 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.extraction.execution; 017 018import java.nio.file.Path; 019import java.nio.file.Paths; 020import java.util.ArrayList; 021import java.util.Arrays; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.HashMap; 025import java.util.HashSet; 026import java.util.List; 027import java.util.Map; 028import java.util.Objects; 029import java.util.Optional; 030import java.util.Set; 031import java.util.regex.Matcher; 032import java.util.regex.Pattern; 033import java.util.stream.Collectors; 034import java.util.stream.Stream; 035 036import org.apache.avalon.framework.component.Component; 037import org.apache.avalon.framework.service.ServiceException; 038import org.apache.avalon.framework.service.ServiceManager; 039import org.apache.avalon.framework.service.Serviceable; 040import org.apache.commons.lang3.StringUtils; 041 042import org.ametys.cms.contenttype.ContentType; 043import org.ametys.cms.contenttype.ContentTypeExtensionPoint; 044import org.ametys.cms.repository.Content; 045import org.ametys.core.util.FilenameUtils; 046import org.ametys.plugins.extraction.component.AbstractSolrExtractionComponent; 047import org.ametys.plugins.extraction.component.ExtractionComponent; 048import org.ametys.plugins.extraction.edition.EditExtractionNodeManager; 049import org.ametys.runtime.model.ElementDefinition; 050import org.ametys.runtime.model.ModelHelper; 051import org.ametys.runtime.model.type.ElementType; 052import org.ametys.runtime.plugin.component.AbstractLogEnabled; 053 054/** 055 * The resolver for string paths which can contain variables (format is <code>foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}</code>) 056 * and need to be resolved against some given contents. 057 */ 058public class PathResolver extends AbstractLogEnabled implements Component, Serviceable 059{ 060 /** The Avalon role. */ 061 public static final String ROLE = PathResolver.class.getName(); 062 063 private static final List<Character> __PATH_SEPARATORS = Arrays.asList('/', '\\'); 064 private static final Pattern __VARIABLE_REGEXP_PATTERN = Pattern.compile( 065 "\\$" // character '$' literally 066 + "\\{" // character '{' literally 067 + "([\\w-\\/]*)" // capturing group: [any word character or '-' or '/'] between zero and unlimited times 068 + "\\}" // character '}' literally 069 ); 070 private static final String __NO_VALUE_OR_BLANK_FOLDER_NAME = "_NOVALUE_"; 071 072 private EditExtractionNodeManager _editExtractionNodeManager; 073 private ContentTypeExtensionPoint _contentTypeExtensionPoint; 074 075 @Override 076 public void service(ServiceManager manager) throws ServiceException 077 { 078 _editExtractionNodeManager = (EditExtractionNodeManager) manager.lookup(EditExtractionNodeManager.ROLE); 079 _contentTypeExtensionPoint = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE); 080 } 081 082 /** 083 * Returns <code>true</code> if the path contains variables to be resolved. 084 * <br>If it returns <code>false</code>, then {@link #resolvePath(String, List, Extraction, Path)} 085 * can be called with <code>null</code> parameters for contents and extraction. 086 * @param path The relative path to resolve 087 * @return <code>true</code> if the path contains variables to be resolved 088 */ 089 public boolean hasVariable(String path) 090 { 091 Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(path); 092 return m.find(); 093 } 094 095 /** 096 * Returns <code>true</code> if the unresolved path represents a folder, i.e. its last element does not contain a '.' character. 097 * @param path The relative path to resolve 098 * @return <code>true</code> if the unresolved path represents a folder 099 */ 100 public boolean isFolder(String path) 101 { 102 PathWrapper unresolvedPath = _splitPathElements(path); 103 if (path.isEmpty()) 104 { 105 return true; 106 } 107 108 List<String> elements = unresolvedPath.getElements(); 109 String lastElement = elements.get(elements.size() - 1); 110 // dummy variable replacement to avoid to take account of '.' 111 // in variable names (not possible for the moment but it could change) 112 Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(lastElement); 113 StringBuffer sb = new StringBuffer(); 114 while (m.find()) 115 { 116 m.group(1); 117 m.appendReplacement(sb, ""); 118 } 119 m.appendTail(sb); 120 return !sb.toString().contains("."); 121 } 122 123 /** 124 * Resolve the given path, which can contain variables, with the values for the given contents. 125 * <br>Thus, the result is a {@link Map} of resolved {@link Path Paths}, each value containg the list of contents for its associated resolved path key. 126 * <br>If a variable is multivalued, a content can be in several paths at the same time in the result. 127 * <br> 128 * <br>For instance, <code>foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}</code> 129 * could be resolved to the path <code>foo / a_val1_m_val2_z / bar / qux / val3</code> for some contents. 130 * @param path The relative path to resolve. It must not start, nor end with a '/' or a '\' character 131 * @param contents The contents. Can be null if {@link PathResolver#hasVariable(String)} was called before and returned false. 132 * @param extraction The extraction. Can be null if {@link PathResolver#hasVariable(String)} was called before and returned false. 133 * @param basePath The base absolute path 134 * @return The absolute resolved paths mapped with their matching contents. 135 * <br>If the returned map contains only one path with a null list, it means that all contents match for that given single path. 136 * @throws IllegalArgumentException If the path contains variables that are not in the extracted contents' model 137 */ 138 public Map<Path, List<Content>> resolvePath(String path, List<Content> contents, Extraction extraction, Path basePath) throws IllegalArgumentException 139 { 140 PathWrapper unresolvedPath = _splitPathElements(path); 141 Collection<ContentType> contentTypes = _getFirstLevelContentTypes(extraction); 142 143 Set<String> variableNames = new HashSet<>(); 144 for (String element : unresolvedPath.getElements()) 145 { 146 _fillVariableNames(contentTypes, element, variableNames); 147 } 148 149 if (variableNames.isEmpty()) 150 { 151 return Collections.singletonMap(_toPath(_validPath(unresolvedPath), basePath), null); 152 } 153 154 Map<Content, Set<PathWrapper>> pathByContent = _pathByContent(unresolvedPath, Optional.ofNullable(contents).orElse(Collections.emptyList())); 155 Map<PathWrapper, List<Content>> contentsByPath = _contentsByPath(pathByContent); 156 157 return contentsByPath.entrySet() 158 .stream() 159 .collect(Collectors.toMap( 160 e -> _toPath(e.getKey(), basePath), 161 e -> e.getValue() 162 )); 163 } 164 165 private PathWrapper _validPath(PathWrapper pathWithNoVar) 166 { 167 List<String> pathElements = pathWithNoVar.getElements(); 168 if (pathElements.size() == 1 && "".equals(pathElements.get(0))) 169 { 170 return pathWithNoVar; 171 } 172 return new PathWrapper( 173 pathElements.stream() 174 .map(this::_validPathElementName) 175 .collect(Collectors.toList())); 176 } 177 178 private Path _toPath(PathWrapper resolvedPath, Path basePath) 179 { 180 List<String> elements = resolvedPath.getElements(); 181 return Paths.get(basePath.toString(), elements.toArray(new String[elements.size()])); 182 } 183 184 /* 185 * In: 186 * "foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}" 187 * Out: 188 * ["foo", "a_${meta1/meta2/meta3}_m_${meta4}_z", "bar", "qux", "${meta5}"] 189 */ 190 private PathWrapper _splitPathElements(String path) 191 { 192 List<String> res = new ArrayList<>(); 193 boolean previousCharWasDollar = false; 194 boolean inVariable = false; 195 int start = 0; 196 int end = 0; 197 198 for (int i = 0; i < path.length(); i++) 199 { 200 char currentChar = path.charAt(i); 201 if (!inVariable && __PATH_SEPARATORS.contains(currentChar)) 202 { 203 end = i; 204 res.add(path.substring(start, end)); 205 start = i + 1; 206 } 207 else if (!inVariable && currentChar == '$') 208 { 209 previousCharWasDollar = true; 210 } 211 else if (!inVariable && previousCharWasDollar && currentChar == '{') 212 { 213 inVariable = true; 214 } 215 else if (inVariable && currentChar == '}') 216 { 217 inVariable = false; 218 } 219 220 if (currentChar != '$') 221 { 222 previousCharWasDollar = false; 223 } 224 } 225 226 // End of string 227 res.add(path.substring(start, path.length())); 228 229 return new PathWrapper(res); 230 } 231 232 /* 233 * In: 234 * "a_${meta1/meta2/meta3}_m_${meta4}_z" 235 * Will fill variableNames with: 236 * ["meta1/meta2/meta3", "meta4"] 237 */ 238 private void _fillVariableNames(Collection<ContentType> contentTypes, String element, Set<String> variableNames) throws IllegalArgumentException 239 { 240 Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(element); 241 while (m.find()) 242 { 243 String variableName = m.group(1); 244 if (ModelHelper.hasModelItem(variableName, contentTypes) && ModelHelper.getModelItem(variableName, contentTypes) instanceof ElementDefinition) 245 { 246 variableNames.add(variableName); 247 } 248 else 249 { 250 throw new IllegalArgumentException("The variable named '" + variableName + "' can not be used in the extraction result path. It is not an attribute of the defined content types"); 251 } 252 } 253 } 254 255 private Collection<ContentType> _getFirstLevelContentTypes(Extraction extraction) 256 { 257 return extraction.getExtractionComponents().stream() 258 .map(this::_getContentTypeIds) 259 .flatMap(Collection::stream) 260 .map(_contentTypeExtensionPoint::getExtension) 261 .collect(Collectors.toList()); 262 } 263 264 private Collection<String> _getContentTypeIds(ExtractionComponent component) 265 { 266 if (component instanceof AbstractSolrExtractionComponent) 267 { 268 String queryReferenceId = ((AbstractSolrExtractionComponent) component).getQueryReferenceId(); 269 if (StringUtils.isNotEmpty(queryReferenceId)) 270 { 271 return _editExtractionNodeManager.getSavedQueryContentTypes(queryReferenceId); 272 } 273 } 274 return component.getContentTypes(); 275 } 276 277 /* 278 * Out: 279 * A map with the resolved relative paths for each content 280 */ 281 private Map<Content, Set<PathWrapper>> _pathByContent(PathWrapper unresolvedPath, List<Content> contents) 282 { 283 Map<Content, Set<PathWrapper>> pathByContent = new HashMap<>(); 284 for (Content content : contents) 285 { 286 List<Set<String>> pathElements = _resolvePath(unresolvedPath, content); 287 Set<PathWrapper> allPaths = _getAllPaths(pathElements); 288 pathByContent.put(content, allPaths); 289 } 290 return pathByContent; 291 } 292 293 /* 294 * Out: 295 * The (resolved) relative paths (as a list of possible elements in a set) for the given content 296 */ 297 private List<Set<String>> _resolvePath(PathWrapper unresolvedPath, Content content) 298 { 299 List<Set<String>> resolvedPathElements = new ArrayList<>(); 300 for (String element : unresolvedPath.getElements()) 301 { 302 Set<String> resolvedElements = _resolvePathElement(element, content); 303 resolvedPathElements.add(_validPathElementNames(resolvedElements)); 304 } 305 306 return resolvedPathElements; 307 } 308 309 /* 310 * Out: 311 * The (resolved) possible path elements (i.e. folder names) for the given values (i.e. variables resolved for a given content) 312 * It is a set as variables can be multivalued 313 */ 314 private Set<String> _resolvePathElement(String unresolvedElement, Content content) 315 { 316 Map<String, Set<String>> replacements = new HashMap<>(); 317 Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(unresolvedElement); 318 while (m.find()) 319 { 320 String variableName = m.group(1); 321 ElementType type = content.getType(variableName); 322 Object variableValue = content.getValue(variableName, true); 323 Set<String> strValues = _getStringValues(type, variableValue); 324 replacements.put("${" + variableName + "}", strValues); 325 } 326 327 Set<String> pathElements = Collections.singleton(unresolvedElement); 328 for (String toReplace : replacements.keySet()) 329 { 330 pathElements = _replace(toReplace, replacements.get(toReplace), pathElements); 331 } 332 return pathElements; 333 } 334 335 @SuppressWarnings("unchecked") 336 private Set<String> _getStringValues(ElementType type, Object value) 337 { 338 Stream<Object> values = Stream.empty(); 339 if (type.getManagedClassArray().isInstance(value)) 340 { 341 values = Arrays.stream((Object[]) value); 342 } 343 else 344 { 345 values = Collections.singleton(value).stream(); 346 } 347 348 Set<String> strValues = values.filter(Objects::nonNull) 349 .map(type::toString) 350 .collect(Collectors.toSet()); 351 352 if (strValues.isEmpty()) 353 { 354 strValues = Collections.singleton(__NO_VALUE_OR_BLANK_FOLDER_NAME); 355 } 356 return strValues; 357 } 358 359 /* 360 * In: 361 * toReplace="${metaB}" 362 * replaceBy={ "b1", "b2" } 363 * uncompleteElements={ "a1_${metaB}_${metaC}", "a2_${metaB}_${metaC}" } 364 * Out: 365 * { "a1_b1_${metaC}", "a2_b1_${metaC}", "a1_b2_${metaC}", "a2_b2_${metaC}" } 366 */ 367 private Set<String> _replace(String toReplace, Set<String> replaceBy, Set<String> uncompleteElements) 368 { 369 Set<String> newPossibleElements = new HashSet<>(); 370 for (String singleReplaceBy : replaceBy) 371 { 372 for (String uncompleteElement : uncompleteElements) 373 { 374 newPossibleElements.add(uncompleteElement.replace(toReplace, singleReplaceBy)); 375 } 376 } 377 return newPossibleElements; 378 } 379 380 private Set<String> _validPathElementNames(Set<String> elements) 381 { 382 return elements.stream() 383 .map(this::_validPathElementName) 384 .collect(Collectors.toSet()); 385 } 386 387 /* 388 * Out: 389 * The tranformed path element name to have a valid folder name 390 */ 391 private String _validPathElementName(String element) 392 { 393 return StringUtils.isBlank(element) ? __NO_VALUE_OR_BLANK_FOLDER_NAME : FilenameUtils.filterName(element); 394 } 395 396 /* 397 * In: 398 * [{a1, a2}, {b}, {c1, c2}] 399 * Out: 400 * {[a1, b c1], [a1, b, c2], [a2, b, c1], [a2, b, c2]} 401 * representing {a1/b/c1, a1/b/c2, a2/b/c1, a2/b/c2} 402 */ 403 private Set<PathWrapper> _getAllPaths(List<Set<String>> pathElements) 404 { 405 Set<PathWrapper> allPaths = new HashSet<>(); 406 allPaths.add(null); // root 407 for (Set<String> possibleElements : pathElements) 408 { 409 allPaths = _getAllPathsInCurrentLevel(possibleElements, allPaths); 410 } 411 return allPaths; 412 } 413 414 private Set<PathWrapper> _getAllPathsInCurrentLevel(Set<String> possibleElementsInCurrentLevel, Set<PathWrapper> computedPathsInPreviousLevel) 415 { 416 Set<PathWrapper> paths = new HashSet<>(); 417 for (PathWrapper computedPathInPreviousLevel : computedPathsInPreviousLevel) 418 { 419 for (String possibleElement : possibleElementsInCurrentLevel) 420 { 421 List<String> pathInCurrentLevel; 422 if (computedPathInPreviousLevel == null) // root case 423 { 424 pathInCurrentLevel = new ArrayList<>(); 425 } 426 else 427 { 428 pathInCurrentLevel = new ArrayList<>(computedPathInPreviousLevel.getElements()); 429 } 430 pathInCurrentLevel.add(possibleElement); 431 paths.add(new PathWrapper(pathInCurrentLevel)); 432 } 433 } 434 return paths; 435 } 436 437 /* 438 * In: 439 * A map with the resolved relative paths for each content (the different possible paths are within a set) 440 * Out: 441 * The 'inverted' map, i.e. a map with the list of contents for each path 442 */ 443 private Map<PathWrapper, List<Content>> _contentsByPath(Map<Content, Set<PathWrapper>> pathByContent) 444 { 445 Map<PathWrapper, List<Content>> contentsByPath = new HashMap<>(); 446 for (Content content : pathByContent.keySet()) 447 { 448 Set<PathWrapper> paths = pathByContent.get(content); 449 for (PathWrapper path : paths) 450 { 451 List<Content> contentsForPath; 452 if (contentsByPath.containsKey(path)) 453 { 454 contentsForPath = contentsByPath.get(path); 455 } 456 else 457 { 458 contentsForPath = new ArrayList<>(); 459 contentsByPath.put(path, contentsForPath); 460 } 461 contentsForPath.add(content); 462 } 463 } 464 return contentsByPath; 465 } 466 467 // Just for readability of the code (PathWrapper in method signatures is better than List<String>) 468 private static final class PathWrapper 469 { 470 private List<String> _pathElements; 471 472 PathWrapper(List<String> pathElements) 473 { 474 _pathElements = pathElements; 475 } 476 477 List<String> getElements() 478 { 479 return _pathElements; 480 } 481 482 @Override 483 public int hashCode() 484 { 485 final int prime = 31; 486 int result = 1; 487 result = prime * result + ((_pathElements == null) ? 0 : _pathElements.hashCode()); 488 return result; 489 } 490 491 @Override 492 public boolean equals(Object obj) 493 { 494 if (this == obj) 495 { 496 return true; 497 } 498 if (obj == null) 499 { 500 return false; 501 } 502 if (!(obj instanceof PathWrapper)) 503 { 504 return false; 505 } 506 PathWrapper other = (PathWrapper) obj; 507 if (_pathElements == null) 508 { 509 if (other._pathElements != null) 510 { 511 return false; 512 } 513 } 514 else if (!_pathElements.equals(other._pathElements)) 515 { 516 return false; 517 } 518 return true; 519 } 520 521 @Override 522 public String toString() 523 { 524 return _pathElements.toString(); 525 } 526 } 527}