001/*
002 *  Copyright 2018 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.extraction.execution;
017
018import java.nio.file.Path;
019import java.nio.file.Paths;
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.Collection;
023import java.util.Collections;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.List;
027import java.util.Map;
028import java.util.Objects;
029import java.util.Optional;
030import java.util.Set;
031import java.util.regex.Matcher;
032import java.util.regex.Pattern;
033import java.util.stream.Collectors;
034
035import org.apache.avalon.framework.component.Component;
036import org.apache.avalon.framework.service.ServiceException;
037import org.apache.avalon.framework.service.ServiceManager;
038import org.apache.avalon.framework.service.Serviceable;
039import org.apache.commons.lang3.StringUtils;
040
041import org.ametys.cms.repository.Content;
042import org.ametys.cms.search.content.ContentValuesExtractorFactory;
043import org.ametys.cms.search.content.ContentValuesExtractorFactory.SimpleContentValuesExtractor;
044import org.ametys.plugins.extraction.component.AbstractSolrExtractionComponent;
045import org.ametys.plugins.extraction.component.ExtractionComponent;
046import org.ametys.plugins.extraction.edition.EditExtractionNodeManager;
047import org.ametys.plugins.extraction.utils.FilenameUtils;
048import org.ametys.runtime.plugin.component.AbstractLogEnabled;
049
050/**
051 * The resolver for string paths which can contain variables (format is <code>foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}</code>)
052 * and need to be resolved against some given contents.
053 */
054public class PathResolver extends AbstractLogEnabled implements Component, Serviceable
055{
056    /** The Avalon role. */
057    public static final String ROLE = PathResolver.class.getName();
058    
059    private static final List<Character> __PATH_SEPARATORS = Arrays.asList('/', '\\');
060    private static final Pattern __VARIABLE_REGEXP_PATTERN = Pattern.compile(
061            "\\$" // character '$' literally
062            + "\\{" // character '{' literally
063            + "([\\w-\\/]*)" // capturing group: [any word character or '-' or '/'] between zero and unlimited times
064            + "\\}" // character '}' literally
065    );
066    private static final String __NO_VALUE_OR_BLANK_FOLDER_NAME = "_NOVALUE_";
067    
068    private ContentValuesExtractorFactory _contentValuesExtractorFactory;
069    private EditExtractionNodeManager _editExtractionNodeManager;
070    
071    @Override
072    public void service(ServiceManager manager) throws ServiceException
073    {
074        _contentValuesExtractorFactory = (ContentValuesExtractorFactory) manager.lookup(ContentValuesExtractorFactory.ROLE);
075        _editExtractionNodeManager = (EditExtractionNodeManager) manager.lookup(EditExtractionNodeManager.ROLE);
076    }
077    
078    /**
079     * Returns <code>true</code> if the path contains variables to be resolved.
080     * <br>If it returns <code>false</code>, then {@link #resolvePath(String, List, Extraction, Path)}
081     * can be called with <code>null</code> parameters for contents and extraction.
082     * @param path The relative path to resolve
083     * @return <code>true</code> if the path contains variables to be resolved
084     */
085    public boolean hasVariable(String path)
086    {
087        Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(path);
088        return m.find();
089    }
090    
091    /**
092     * Returns <code>true</code> if the unresolved path represents a folder, i.e. its last element does not contain a '.' character.
093     * @param path The relative path to resolve
094     * @return <code>true</code> if the unresolved path represents a folder
095     */
096    public boolean isFolder(String path)
097    {
098        PathWrapper unresolvedPath = _splitPathElements(path);
099        if (path.isEmpty())
100        {
101            return true;
102        }
103        
104        List<String> elements = unresolvedPath.getElements();
105        String lastElement = elements.get(elements.size() - 1);
106        // dummy variable replacement to avoid to take account of '.' 
107        // in variable names (not possible for the moment but it could change)
108        Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(lastElement);
109        StringBuffer sb = new StringBuffer();
110        while (m.find())
111        {
112            m.group(1);
113            m.appendReplacement(sb, "");
114        }
115        m.appendTail(sb);
116        return !sb.toString().contains(".");
117    }
118    
119    /**
120     * Resolve the given path, which can contain variables, with the values for the given contents.
121     * <br>Thus, the result is a {@link Map} of resolved {@link Path Paths}, each value containg the list of contents for its associated resolved path key.
122     * <br>If a variable is multivalued, a content can be in several paths at the same time in the result.
123     * <br>
124     * <br>For instance, <code>foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}</code>
125     * could be resolved to the path <code>foo / a_val1_m_val2_z / bar / qux / val3</code> for some contents.
126     * @param path The relative path to resolve. It must not start, nor end with a '/' or a '\' character
127     * @param contents The contents. Can be null if {@link PathResolver#hasVariable(String)} was called before and returned false.
128     * @param extraction The extraction. Can be null if {@link PathResolver#hasVariable(String)} was called before and returned false.
129     * @param basePath The base absolute path
130     * @return The absolute resolved paths mapped with their matching contents.
131     * <br>If the returned map contains only one path with a null list, it means that all contents match for that given single path.
132     */
133    public Map<Path, List<Content>> resolvePath(String path, List<Content> contents, Extraction extraction, Path basePath)
134    {
135        PathWrapper unresolvedPath = _splitPathElements(path);
136        
137        Set<String> variableNames = new HashSet<>();
138        for (String element : unresolvedPath.getElements())
139        {
140            _fillVariableNames(element, variableNames);
141        }
142        
143        if (variableNames.isEmpty())
144        {
145            return Collections.singletonMap(_toPath(_validPath(unresolvedPath), basePath), null);
146        }
147        
148        Collection<String> contentTypes = _getFirstLevelContentTypes(extraction);
149        SimpleContentValuesExtractor valuesExtractor = _contentValuesExtractorFactory.create(contentTypes, new ArrayList<>(variableNames));
150        Map<Content, Set<PathWrapper>> pathByContent = _pathByContent(unresolvedPath, Optional.ofNullable(contents).orElse(Collections.emptyList()), valuesExtractor);
151        
152        Map<PathWrapper, List<Content>> contentsByPath = _contentsByPath(pathByContent);
153        
154        return contentsByPath.entrySet()
155                .stream()
156                .collect(Collectors.toMap(
157                    e -> _toPath(e.getKey(), basePath), 
158                    e -> e.getValue()
159                ));
160    }
161    
162    private PathWrapper _validPath(PathWrapper pathWithNoVar)
163    {
164        List<String> pathElements = pathWithNoVar.getElements();
165        if (pathElements.size() == 1 && "".equals(pathElements.get(0)))
166        {
167            return pathWithNoVar;
168        }
169        return new PathWrapper(
170                pathElements.stream()
171                    .map(this::_validPathElementName)
172                    .collect(Collectors.toList()));
173    }
174    
175    private Path _toPath(PathWrapper resolvedPath, Path basePath)
176    {
177        List<String> elements = resolvedPath.getElements();
178        return Paths.get(basePath.toString(), elements.toArray(new String[elements.size()]));
179    }
180    
181    /*
182     * In:
183     *      "foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}"
184     * Out:
185     *      ["foo", "a_${meta1/meta2/meta3}_m_${meta4}_z", "bar", "qux", "${meta5}"]
186     */
187    private PathWrapper _splitPathElements(String path)
188    {
189        List<String> res = new ArrayList<>();
190        boolean previousCharWasDollar = false;
191        boolean inVariable = false;
192        int start = 0;
193        int end = 0;
194        
195        for (int i = 0; i < path.length(); i++)
196        {
197            char currentChar = path.charAt(i);
198            if (!inVariable && __PATH_SEPARATORS.contains(currentChar))
199            {
200                end = i;
201                res.add(path.substring(start, end));
202                start = i + 1;
203            }
204            else if (!inVariable && currentChar == '$')
205            {
206                previousCharWasDollar = true;
207            }
208            else if (!inVariable && previousCharWasDollar && currentChar == '{')
209            {
210                inVariable = true;
211            }
212            else if (inVariable && currentChar == '}')
213            {
214                inVariable = false;
215            }
216            
217            if (currentChar != '$')
218            {
219                previousCharWasDollar = false;
220            }
221        }
222        
223        // End of string
224        res.add(path.substring(start, path.length()));
225        
226        return new PathWrapper(res);
227    }
228    
229    /*
230     * In:
231     *      "a_${meta1/meta2/meta3}_m_${meta4}_z"
232     * Will fill variableNames with:
233     *      ["meta1/meta2/meta3", "meta4"]
234     */
235    private void _fillVariableNames(String element, Set<String> variableNames)
236    {
237        Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(element);
238        while (m.find())
239        {
240            String variableName = m.group(1);
241            variableNames.add(variableName);
242        }
243    }
244    
245    private Collection<String> _getFirstLevelContentTypes(Extraction extraction)
246    {
247        return extraction.getExtractionComponents().stream()
248                .map(this::_getContentTypes)
249                .flatMap(Collection::stream)
250                .collect(Collectors.toList());
251    }
252    
253    private Collection<String> _getContentTypes(ExtractionComponent component)
254    {
255        if (component instanceof AbstractSolrExtractionComponent)
256        {
257            String queryReferenceId = ((AbstractSolrExtractionComponent) component).getQueryReferenceId();
258            if (StringUtils.isNotEmpty(queryReferenceId))
259            {
260                return _editExtractionNodeManager.getSavedQueryContentTypes(queryReferenceId);
261            }
262        }
263        return component.getContentTypes();
264    }
265    
266    /*
267     * Out:
268     *      A map with the resolved relative paths for each content
269     */
270    private Map<Content, Set<PathWrapper>> _pathByContent(PathWrapper unresolvedPath, List<Content> contents, SimpleContentValuesExtractor valuesExtractor)
271    {
272        Map<Content, Set<PathWrapper>> pathByContent = new HashMap<>();
273        for (Content content : contents)
274        {
275            List<Set<String>> pathElements = _resolvePath(unresolvedPath, content, valuesExtractor);
276            Set<PathWrapper> allPaths = _getAllPaths(pathElements);
277            pathByContent.put(content, allPaths);
278        }
279        return pathByContent;
280    }
281    
282    /*
283     * Out:
284     *      The (resolved) relative paths (as a list of possible elements in a set) for the given content
285     */
286    private List<Set<String>> _resolvePath(PathWrapper unresolvedPath, Content content, SimpleContentValuesExtractor valuesExtractor)
287    {
288        List<Set<String>> resolvedPathElements = new ArrayList<>();
289        Map<String, Object> values = valuesExtractor.getValues(content, null, Map.of("externalizable", false));
290        
291        for (String element : unresolvedPath.getElements())
292        {
293            Set<String> resolvedElements = _resolvePathElement(element, values);
294            resolvedPathElements.add(_validPathElementNames(resolvedElements));
295        }
296        
297        return resolvedPathElements;
298    }
299    
300    /*
301     * Out:
302     *      The (resolved) possible path elements (i.e. folder names) for the given values (i.e. variables resolved for a given content)
303     *      It is a set as variables can be multivalued
304     */
305    private Set<String> _resolvePathElement(String unresolvedElement, Map<String, Object> values)
306    {
307        Map<String, Set<String>> replacements = new HashMap<>();
308        Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(unresolvedElement);
309        while (m.find())
310        {
311            String variableName = m.group(1);
312            Object variableValue = values.get(variableName);
313            Set<String> strValues = _getStringValues(variableValue);
314            replacements.put("${" + variableName + "}", strValues);
315        }
316        
317        Set<String> pathElements = Collections.singleton(unresolvedElement);
318        for (String toReplace : replacements.keySet())
319        {
320            pathElements = _replace(toReplace, replacements.get(toReplace), pathElements);
321        }
322        return pathElements;
323    }
324    
325    @SuppressWarnings("unchecked")
326    private Set<String> _getStringValues(Object value)
327    {
328        Collection< Object > values;
329        if (value instanceof Collection< ? >)
330        {
331            values = (Collection< Object >) value;
332        }
333        else
334        {
335            values = Collections.singleton(value);
336        }
337        
338        Set<String> strValues = values.stream()
339                .filter(Objects::nonNull)
340                .map(String::valueOf)
341                .collect(Collectors.toSet());
342        
343        if (strValues.isEmpty())
344        {
345            strValues = Collections.singleton(__NO_VALUE_OR_BLANK_FOLDER_NAME);
346        }
347        return strValues;
348    }
349    
350    /*
351     * In:
352     *      toReplace="${metaB}"
353     *      replaceBy={ "b1", "b2" }
354     *      uncompleteElements={ "a1_${metaB}_${metaC}", "a2_${metaB}_${metaC}" }
355     * Out:
356     *      { "a1_b1_${metaC}", "a2_b1_${metaC}", "a1_b2_${metaC}", "a2_b2_${metaC}" }
357     */
358    private Set<String> _replace(String toReplace, Set<String> replaceBy, Set<String> uncompleteElements)
359    {
360        Set<String> newPossibleElements = new HashSet<>();
361        for (String singleReplaceBy : replaceBy)
362        {
363            for (String uncompleteElement : uncompleteElements)
364            {
365                newPossibleElements.add(uncompleteElement.replace(toReplace, singleReplaceBy));
366            }
367        }
368        return newPossibleElements;
369    }
370    
371    private Set<String> _validPathElementNames(Set<String> elements)
372    {
373        return elements.stream()
374                .map(this::_validPathElementName)
375                .collect(Collectors.toSet());
376    }
377    
378    /*
379     * Out:
380     *      The tranformed path element name to have a valid folder name
381     */
382    private String _validPathElementName(String element)
383    {
384        return StringUtils.isBlank(element) ? __NO_VALUE_OR_BLANK_FOLDER_NAME : FilenameUtils.sanitize(element);
385    }
386    
387    /*
388     * In:
389     *      [{a1, a2}, {b}, {c1, c2}]
390     * Out:
391     *      {[a1, b c1], [a1, b, c2], [a2, b, c1], [a2, b, c2]}
392     *      representing {a1/b/c1, a1/b/c2, a2/b/c1, a2/b/c2}
393     */
394    private Set<PathWrapper> _getAllPaths(List<Set<String>> pathElements)
395    {
396        Set<PathWrapper> allPaths = new HashSet<>();
397        allPaths.add(null); // root
398        for (Set<String> possibleElements : pathElements)
399        {
400            allPaths = _getAllPathsInCurrentLevel(possibleElements, allPaths);
401        }
402        return allPaths;
403    }
404    
405    private Set<PathWrapper> _getAllPathsInCurrentLevel(Set<String> possibleElementsInCurrentLevel, Set<PathWrapper> computedPathsInPreviousLevel)
406    {
407        Set<PathWrapper> paths = new HashSet<>();
408        for (PathWrapper computedPathInPreviousLevel : computedPathsInPreviousLevel)
409        {
410            for (String possibleElement : possibleElementsInCurrentLevel)
411            {
412                List<String> pathInCurrentLevel;
413                if (computedPathInPreviousLevel == null) // root case
414                {
415                    pathInCurrentLevel = new ArrayList<>();
416                }
417                else
418                {
419                    pathInCurrentLevel = new ArrayList<>(computedPathInPreviousLevel.getElements());
420                }
421                pathInCurrentLevel.add(possibleElement);
422                paths.add(new PathWrapper(pathInCurrentLevel));
423            }
424        }
425        return paths;
426    }
427    
428    /*
429     * In:
430     *      A map with the resolved relative paths for each content (the different possible paths are within a set)
431     * Out:
432     *      The 'inverted' map, i.e. a map with the list of contents for each path
433     */
434    private Map<PathWrapper, List<Content>> _contentsByPath(Map<Content, Set<PathWrapper>> pathByContent)
435    {
436        Map<PathWrapper, List<Content>> contentsByPath = new HashMap<>();
437        for (Content content : pathByContent.keySet())
438        {
439            Set<PathWrapper> paths = pathByContent.get(content);
440            for (PathWrapper path : paths)
441            {
442                List<Content> contentsForPath;
443                if (contentsByPath.containsKey(path))
444                {
445                    contentsForPath = contentsByPath.get(path);
446                }
447                else
448                {
449                    contentsForPath = new ArrayList<>();
450                    contentsByPath.put(path, contentsForPath);
451                }
452                contentsForPath.add(content);
453            }
454        }
455        return contentsByPath;
456    }
457    
458    // Just for readability of the code (PathWrapper in method signatures is better than List<String>)
459    private static final class PathWrapper
460    {
461        private List<String> _pathElements;
462
463        PathWrapper(List<String> pathElements)
464        {
465            _pathElements = pathElements;
466        }
467        
468        List<String> getElements()
469        {
470            return _pathElements;
471        }
472
473        @Override
474        public int hashCode()
475        {
476            final int prime = 31;
477            int result = 1;
478            result = prime * result + ((_pathElements == null) ? 0 : _pathElements.hashCode());
479            return result;
480        }
481
482        @Override
483        public boolean equals(Object obj)
484        {
485            if (this == obj)
486            {
487                return true;
488            }
489            if (obj == null)
490            {
491                return false;
492            }
493            if (!(obj instanceof PathWrapper))
494            {
495                return false;
496            }
497            PathWrapper other = (PathWrapper) obj;
498            if (_pathElements == null)
499            {
500                if (other._pathElements != null)
501                {
502                    return false;
503                }
504            }
505            else if (!_pathElements.equals(other._pathElements))
506            {
507                return false;
508            }
509            return true;
510        }
511        
512        @Override
513        public String toString()
514        {
515            return _pathElements.toString();
516        }
517    }
518}