001/* 002 * Copyright 2018 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.extraction.execution; 017 018import java.nio.file.Path; 019import java.nio.file.Paths; 020import java.util.ArrayList; 021import java.util.Arrays; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.HashMap; 025import java.util.HashSet; 026import java.util.List; 027import java.util.Map; 028import java.util.Objects; 029import java.util.Optional; 030import java.util.Set; 031import java.util.regex.Matcher; 032import java.util.regex.Pattern; 033import java.util.stream.Collectors; 034 035import org.apache.avalon.framework.component.Component; 036import org.apache.avalon.framework.service.ServiceException; 037import org.apache.avalon.framework.service.ServiceManager; 038import org.apache.avalon.framework.service.Serviceable; 039import org.apache.commons.lang3.StringUtils; 040 041import org.ametys.cms.repository.Content; 042import org.ametys.cms.search.content.ContentValuesExtractorFactory; 043import org.ametys.cms.search.content.ContentValuesExtractorFactory.SimpleContentValuesExtractor; 044import org.ametys.plugins.extraction.component.AbstractSolrExtractionComponent; 045import org.ametys.plugins.extraction.component.ExtractionComponent; 046import org.ametys.plugins.extraction.edition.EditExtractionNodeManager; 047import org.ametys.plugins.extraction.utils.FilenameUtils; 048import org.ametys.runtime.plugin.component.AbstractLogEnabled; 049 050/** 051 * The resolver for string paths which can contain variables (format is <code>foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}</code>) 052 * and need to be resolved against some given contents. 053 */ 054public class PathResolver extends AbstractLogEnabled implements Component, Serviceable 055{ 056 /** The Avalon role. */ 057 public static final String ROLE = PathResolver.class.getName(); 058 059 private static final List<Character> __PATH_SEPARATORS = Arrays.asList('/', '\\'); 060 private static final Pattern __VARIABLE_REGEXP_PATTERN = Pattern.compile( 061 "\\$" // character '$' literally 062 + "\\{" // character '{' literally 063 + "([\\w-\\/]*)" // capturing group: [any word character or '-' or '/'] between zero and unlimited times 064 + "\\}" // character '}' literally 065 ); 066 private static final String __NO_VALUE_OR_BLANK_FOLDER_NAME = "_NOVALUE_"; 067 068 private ContentValuesExtractorFactory _contentValuesExtractorFactory; 069 private EditExtractionNodeManager _editExtractionNodeManager; 070 071 @Override 072 public void service(ServiceManager manager) throws ServiceException 073 { 074 _contentValuesExtractorFactory = (ContentValuesExtractorFactory) manager.lookup(ContentValuesExtractorFactory.ROLE); 075 _editExtractionNodeManager = (EditExtractionNodeManager) manager.lookup(EditExtractionNodeManager.ROLE); 076 } 077 078 /** 079 * Returns <code>true</code> if the path contains variables to be resolved. 080 * <br>If it returns <code>false</code>, then {@link #resolvePath(String, List, Extraction, Path)} 081 * can be called with <code>null</code> parameters for contents and extraction. 082 * @param path The relative path to resolve 083 * @return <code>true</code> if the path contains variables to be resolved 084 */ 085 public boolean hasVariable(String path) 086 { 087 Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(path); 088 return m.find(); 089 } 090 091 /** 092 * Returns <code>true</code> if the unresolved path represents a folder, i.e. its last element does not contain a '.' character. 093 * @param path The relative path to resolve 094 * @return <code>true</code> if the unresolved path represents a folder 095 */ 096 public boolean isFolder(String path) 097 { 098 PathWrapper unresolvedPath = _splitPathElements(path); 099 if (path.isEmpty()) 100 { 101 return true; 102 } 103 104 List<String> elements = unresolvedPath.getElements(); 105 String lastElement = elements.get(elements.size() - 1); 106 // dummy variable replacement to avoid to take account of '.' 107 // in variable names (not possible for the moment but it could change) 108 Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(lastElement); 109 StringBuffer sb = new StringBuffer(); 110 while (m.find()) 111 { 112 m.group(1); 113 m.appendReplacement(sb, ""); 114 } 115 m.appendTail(sb); 116 return !sb.toString().contains("."); 117 } 118 119 /** 120 * Resolve the given path, which can contain variables, with the values for the given contents. 121 * <br>Thus, the result is a {@link Map} of resolved {@link Path Paths}, each value containg the list of contents for its associated resolved path key. 122 * <br>If a variable is multivalued, a content can be in several paths at the same time in the result. 123 * <br> 124 * <br>For instance, <code>foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}</code> 125 * could be resolved to the path <code>foo / a_val1_m_val2_z / bar / qux / val3</code> for some contents. 126 * @param path The relative path to resolve. It must not start, nor end with a '/' or a '\' character 127 * @param contents The contents. Can be null if {@link PathResolver#hasVariable(String)} was called before and returned false. 128 * @param extraction The extraction. Can be null if {@link PathResolver#hasVariable(String)} was called before and returned false. 129 * @param basePath The base absolute path 130 * @return The absolute resolved paths mapped with their matching contents. 131 * <br>If the returned map contains only one path with a null list, it means that all contents match for that given single path. 132 */ 133 public Map<Path, List<Content>> resolvePath(String path, List<Content> contents, Extraction extraction, Path basePath) 134 { 135 PathWrapper unresolvedPath = _splitPathElements(path); 136 137 Set<String> variableNames = new HashSet<>(); 138 for (String element : unresolvedPath.getElements()) 139 { 140 _fillVariableNames(element, variableNames); 141 } 142 143 if (variableNames.isEmpty()) 144 { 145 return Collections.singletonMap(_toPath(_validPath(unresolvedPath), basePath), null); 146 } 147 148 Collection<String> contentTypes = _getFirstLevelContentTypes(extraction); 149 SimpleContentValuesExtractor valuesExtractor = _contentValuesExtractorFactory.create(contentTypes, new ArrayList<>(variableNames)); 150 Map<Content, Set<PathWrapper>> pathByContent = _pathByContent(unresolvedPath, Optional.ofNullable(contents).orElse(Collections.emptyList()), valuesExtractor); 151 152 Map<PathWrapper, List<Content>> contentsByPath = _contentsByPath(pathByContent); 153 154 return contentsByPath.entrySet() 155 .stream() 156 .collect(Collectors.toMap( 157 e -> _toPath(e.getKey(), basePath), 158 e -> e.getValue() 159 )); 160 } 161 162 private PathWrapper _validPath(PathWrapper pathWithNoVar) 163 { 164 List<String> pathElements = pathWithNoVar.getElements(); 165 if (pathElements.size() == 1 && "".equals(pathElements.get(0))) 166 { 167 return pathWithNoVar; 168 } 169 return new PathWrapper( 170 pathElements.stream() 171 .map(this::_validPathElementName) 172 .collect(Collectors.toList())); 173 } 174 175 private Path _toPath(PathWrapper resolvedPath, Path basePath) 176 { 177 List<String> elements = resolvedPath.getElements(); 178 return Paths.get(basePath.toString(), elements.toArray(new String[elements.size()])); 179 } 180 181 /* 182 * In: 183 * "foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}" 184 * Out: 185 * ["foo", "a_${meta1/meta2/meta3}_m_${meta4}_z", "bar", "qux", "${meta5}"] 186 */ 187 private PathWrapper _splitPathElements(String path) 188 { 189 List<String> res = new ArrayList<>(); 190 boolean previousCharWasDollar = false; 191 boolean inVariable = false; 192 int start = 0; 193 int end = 0; 194 195 for (int i = 0; i < path.length(); i++) 196 { 197 char currentChar = path.charAt(i); 198 if (!inVariable && __PATH_SEPARATORS.contains(currentChar)) 199 { 200 end = i; 201 res.add(path.substring(start, end)); 202 start = i + 1; 203 } 204 else if (!inVariable && currentChar == '$') 205 { 206 previousCharWasDollar = true; 207 } 208 else if (!inVariable && previousCharWasDollar && currentChar == '{') 209 { 210 inVariable = true; 211 } 212 else if (inVariable && currentChar == '}') 213 { 214 inVariable = false; 215 } 216 217 if (currentChar != '$') 218 { 219 previousCharWasDollar = false; 220 } 221 } 222 223 // End of string 224 res.add(path.substring(start, path.length())); 225 226 return new PathWrapper(res); 227 } 228 229 /* 230 * In: 231 * "a_${meta1/meta2/meta3}_m_${meta4}_z" 232 * Will fill variableNames with: 233 * ["meta1/meta2/meta3", "meta4"] 234 */ 235 private void _fillVariableNames(String element, Set<String> variableNames) 236 { 237 Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(element); 238 while (m.find()) 239 { 240 String variableName = m.group(1); 241 variableNames.add(variableName); 242 } 243 } 244 245 private Collection<String> _getFirstLevelContentTypes(Extraction extraction) 246 { 247 return extraction.getExtractionComponents().stream() 248 .map(this::_getContentTypes) 249 .flatMap(Collection::stream) 250 .collect(Collectors.toList()); 251 } 252 253 private Collection<String> _getContentTypes(ExtractionComponent component) 254 { 255 if (component instanceof AbstractSolrExtractionComponent) 256 { 257 String queryReferenceId = ((AbstractSolrExtractionComponent) component).getQueryReferenceId(); 258 if (StringUtils.isNotEmpty(queryReferenceId)) 259 { 260 return _editExtractionNodeManager.getSavedQueryContentTypes(queryReferenceId); 261 } 262 } 263 return component.getContentTypes(); 264 } 265 266 /* 267 * Out: 268 * A map with the resolved relative paths for each content 269 */ 270 private Map<Content, Set<PathWrapper>> _pathByContent(PathWrapper unresolvedPath, List<Content> contents, SimpleContentValuesExtractor valuesExtractor) 271 { 272 Map<Content, Set<PathWrapper>> pathByContent = new HashMap<>(); 273 for (Content content : contents) 274 { 275 List<Set<String>> pathElements = _resolvePath(unresolvedPath, content, valuesExtractor); 276 Set<PathWrapper> allPaths = _getAllPaths(pathElements); 277 pathByContent.put(content, allPaths); 278 } 279 return pathByContent; 280 } 281 282 /* 283 * Out: 284 * The (resolved) relative paths (as a list of possible elements in a set) for the given content 285 */ 286 private List<Set<String>> _resolvePath(PathWrapper unresolvedPath, Content content, SimpleContentValuesExtractor valuesExtractor) 287 { 288 List<Set<String>> resolvedPathElements = new ArrayList<>(); 289 Map<String, Object> values = valuesExtractor.getValues(content, null); 290 291 for (String element : unresolvedPath.getElements()) 292 { 293 Set<String> resolvedElements = _resolvePathElement(element, values); 294 resolvedPathElements.add(_validPathElementNames(resolvedElements)); 295 } 296 297 return resolvedPathElements; 298 } 299 300 /* 301 * Out: 302 * The (resolved) possible path elements (i.e. folder names) for the given values (i.e. variables resolved for a given content) 303 * It is a set as variables can be multivalued 304 */ 305 private Set<String> _resolvePathElement(String unresolvedElement, Map<String, Object> values) 306 { 307 Map<String, Set<String>> replacements = new HashMap<>(); 308 Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(unresolvedElement); 309 while (m.find()) 310 { 311 String variableName = m.group(1); 312 Object variableValue = values.get(variableName); 313 Set<String> strValues = _getStringValues(variableValue); 314 replacements.put("${" + variableName + "}", strValues); 315 } 316 317 Set<String> pathElements = Collections.singleton(unresolvedElement); 318 for (String toReplace : replacements.keySet()) 319 { 320 pathElements = _replace(toReplace, replacements.get(toReplace), pathElements); 321 } 322 return pathElements; 323 } 324 325 @SuppressWarnings("unchecked") 326 private Set<String> _getStringValues(Object value) 327 { 328 Collection< Object > values; 329 if (value instanceof Collection< ? >) 330 { 331 values = (Collection< Object >) value; 332 } 333 else 334 { 335 values = Collections.singleton(value); 336 } 337 338 Set<String> strValues = values.stream() 339 .filter(Objects::nonNull) 340 .map(String::valueOf) 341 .collect(Collectors.toSet()); 342 343 if (strValues.isEmpty()) 344 { 345 strValues = Collections.singleton(__NO_VALUE_OR_BLANK_FOLDER_NAME); 346 } 347 return strValues; 348 } 349 350 /* 351 * In: 352 * toReplace="${metaB}" 353 * replaceBy={ "b1", "b2" } 354 * uncompleteElements={ "a1_${metaB}_${metaC}", "a2_${metaB}_${metaC}" } 355 * Out: 356 * { "a1_b1_${metaC}", "a2_b1_${metaC}", "a1_b2_${metaC}", "a2_b2_${metaC}" } 357 */ 358 private Set<String> _replace(String toReplace, Set<String> replaceBy, Set<String> uncompleteElements) 359 { 360 Set<String> newPossibleElements = new HashSet<>(); 361 for (String singleReplaceBy : replaceBy) 362 { 363 for (String uncompleteElement : uncompleteElements) 364 { 365 newPossibleElements.add(uncompleteElement.replace(toReplace, singleReplaceBy)); 366 } 367 } 368 return newPossibleElements; 369 } 370 371 private Set<String> _validPathElementNames(Set<String> elements) 372 { 373 return elements.stream() 374 .map(this::_validPathElementName) 375 .collect(Collectors.toSet()); 376 } 377 378 /* 379 * Out: 380 * The tranformed path element name to have a valid folder name 381 */ 382 private String _validPathElementName(String element) 383 { 384 return StringUtils.isBlank(element) ? __NO_VALUE_OR_BLANK_FOLDER_NAME : FilenameUtils.sanitize(element); 385 } 386 387 /* 388 * In: 389 * [{a1, a2}, {b}, {c1, c2}] 390 * Out: 391 * {[a1, b c1], [a1, b, c2], [a2, b, c1], [a2, b, c2]} 392 * representing {a1/b/c1, a1/b/c2, a2/b/c1, a2/b/c2} 393 */ 394 private Set<PathWrapper> _getAllPaths(List<Set<String>> pathElements) 395 { 396 Set<PathWrapper> allPaths = new HashSet<>(); 397 allPaths.add(null); // root 398 for (Set<String> possibleElements : pathElements) 399 { 400 allPaths = _getAllPathsInCurrentLevel(possibleElements, allPaths); 401 } 402 return allPaths; 403 } 404 405 private Set<PathWrapper> _getAllPathsInCurrentLevel(Set<String> possibleElementsInCurrentLevel, Set<PathWrapper> computedPathsInPreviousLevel) 406 { 407 Set<PathWrapper> paths = new HashSet<>(); 408 for (PathWrapper computedPathInPreviousLevel : computedPathsInPreviousLevel) 409 { 410 for (String possibleElement : possibleElementsInCurrentLevel) 411 { 412 List<String> pathInCurrentLevel; 413 if (computedPathInPreviousLevel == null) // root case 414 { 415 pathInCurrentLevel = new ArrayList<>(); 416 } 417 else 418 { 419 pathInCurrentLevel = new ArrayList<>(computedPathInPreviousLevel.getElements()); 420 } 421 pathInCurrentLevel.add(possibleElement); 422 paths.add(new PathWrapper(pathInCurrentLevel)); 423 } 424 } 425 return paths; 426 } 427 428 /* 429 * In: 430 * A map with the resolved relative paths for each content (the different possible paths are within a set) 431 * Out: 432 * The 'inverted' map, i.e. a map with the list of contents for each path 433 */ 434 private Map<PathWrapper, List<Content>> _contentsByPath(Map<Content, Set<PathWrapper>> pathByContent) 435 { 436 Map<PathWrapper, List<Content>> contentsByPath = new HashMap<>(); 437 for (Content content : pathByContent.keySet()) 438 { 439 Set<PathWrapper> paths = pathByContent.get(content); 440 for (PathWrapper path : paths) 441 { 442 List<Content> contentsForPath; 443 if (contentsByPath.containsKey(path)) 444 { 445 contentsForPath = contentsByPath.get(path); 446 } 447 else 448 { 449 contentsForPath = new ArrayList<>(); 450 contentsByPath.put(path, contentsForPath); 451 } 452 contentsForPath.add(content); 453 } 454 } 455 return contentsByPath; 456 } 457 458 // Just for readability of the code (PathWrapper in method signatures is better than List<String>) 459 private static final class PathWrapper 460 { 461 private List<String> _pathElements; 462 463 PathWrapper(List<String> pathElements) 464 { 465 _pathElements = pathElements; 466 } 467 468 List<String> getElements() 469 { 470 return _pathElements; 471 } 472 473 @Override 474 public int hashCode() 475 { 476 final int prime = 31; 477 int result = 1; 478 result = prime * result + ((_pathElements == null) ? 0 : _pathElements.hashCode()); 479 return result; 480 } 481 482 @Override 483 public boolean equals(Object obj) 484 { 485 if (this == obj) 486 { 487 return true; 488 } 489 if (obj == null) 490 { 491 return false; 492 } 493 if (!(obj instanceof PathWrapper)) 494 { 495 return false; 496 } 497 PathWrapper other = (PathWrapper) obj; 498 if (_pathElements == null) 499 { 500 if (other._pathElements != null) 501 { 502 return false; 503 } 504 } 505 else if (!_pathElements.equals(other._pathElements)) 506 { 507 return false; 508 } 509 return true; 510 } 511 512 @Override 513 public String toString() 514 { 515 return _pathElements.toString(); 516 } 517 } 518}