001/*
002 *  Copyright 2018 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.extraction.execution;
017
018import java.nio.file.Path;
019import java.nio.file.Paths;
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.Collection;
023import java.util.Collections;
024import java.util.HashMap;
025import java.util.HashSet;
026import java.util.List;
027import java.util.Map;
028import java.util.Objects;
029import java.util.Optional;
030import java.util.Set;
031import java.util.regex.Matcher;
032import java.util.regex.Pattern;
033import java.util.stream.Collectors;
034import java.util.stream.Stream;
035
036import org.apache.avalon.framework.component.Component;
037import org.apache.avalon.framework.service.ServiceException;
038import org.apache.avalon.framework.service.ServiceManager;
039import org.apache.avalon.framework.service.Serviceable;
040import org.apache.commons.lang3.StringUtils;
041
042import org.ametys.cms.contenttype.ContentType;
043import org.ametys.cms.contenttype.ContentTypeExtensionPoint;
044import org.ametys.cms.repository.Content;
045import org.ametys.core.util.FilenameUtils;
046import org.ametys.plugins.extraction.component.AbstractSolrExtractionComponent;
047import org.ametys.plugins.extraction.component.ExtractionComponent;
048import org.ametys.plugins.extraction.edition.EditExtractionNodeManager;
049import org.ametys.runtime.model.ElementDefinition;
050import org.ametys.runtime.model.ModelHelper;
051import org.ametys.runtime.model.type.ElementType;
052import org.ametys.runtime.plugin.component.AbstractLogEnabled;
053
054/**
055 * The resolver for string paths which can contain variables (format is <code>foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}</code>)
056 * and need to be resolved against some given contents.
057 */
058public class PathResolver extends AbstractLogEnabled implements Component, Serviceable
059{
060    /** The Avalon role. */
061    public static final String ROLE = PathResolver.class.getName();
062    
063    private static final List<Character> __PATH_SEPARATORS = Arrays.asList('/', '\\');
064    private static final Pattern __VARIABLE_REGEXP_PATTERN = Pattern.compile(
065            "\\$" // character '$' literally
066            + "\\{" // character '{' literally
067            + "([\\w-\\/]*)" // capturing group: [any word character or '-' or '/'] between zero and unlimited times
068            + "\\}" // character '}' literally
069    );
070    private static final String __NO_VALUE_OR_BLANK_FOLDER_NAME = "_NOVALUE_";
071    
072    private EditExtractionNodeManager _editExtractionNodeManager;
073    private ContentTypeExtensionPoint _contentTypeExtensionPoint;
074    
075    @Override
076    public void service(ServiceManager manager) throws ServiceException
077    {
078        _editExtractionNodeManager = (EditExtractionNodeManager) manager.lookup(EditExtractionNodeManager.ROLE);
079        _contentTypeExtensionPoint = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE);
080    }
081    
082    /**
083     * Returns <code>true</code> if the path contains variables to be resolved.
084     * <br>If it returns <code>false</code>, then {@link #resolvePath(String, List, Extraction, Path)}
085     * can be called with <code>null</code> parameters for contents and extraction.
086     * @param path The relative path to resolve
087     * @return <code>true</code> if the path contains variables to be resolved
088     */
089    public boolean hasVariable(String path)
090    {
091        Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(path);
092        return m.find();
093    }
094    
095    /**
096     * Returns <code>true</code> if the unresolved path represents a folder, i.e. its last element does not contain a '.' character.
097     * @param path The relative path to resolve
098     * @return <code>true</code> if the unresolved path represents a folder
099     */
100    public boolean isFolder(String path)
101    {
102        PathWrapper unresolvedPath = _splitPathElements(path);
103        if (path.isEmpty())
104        {
105            return true;
106        }
107        
108        List<String> elements = unresolvedPath.getElements();
109        String lastElement = elements.get(elements.size() - 1);
110        // dummy variable replacement to avoid to take account of '.' 
111        // in variable names (not possible for the moment but it could change)
112        Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(lastElement);
113        StringBuffer sb = new StringBuffer();
114        while (m.find())
115        {
116            m.group(1);
117            m.appendReplacement(sb, "");
118        }
119        m.appendTail(sb);
120        return !sb.toString().contains(".");
121    }
122    
123    /**
124     * Resolve the given path, which can contain variables, with the values for the given contents.
125     * <br>Thus, the result is a {@link Map} of resolved {@link Path Paths}, each value containg the list of contents for its associated resolved path key.
126     * <br>If a variable is multivalued, a content can be in several paths at the same time in the result.
127     * <br>
128     * <br>For instance, <code>foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}</code>
129     * could be resolved to the path <code>foo / a_val1_m_val2_z / bar / qux / val3</code> for some contents.
130     * @param path The relative path to resolve. It must not start, nor end with a '/' or a '\' character
131     * @param contents The contents. Can be null if {@link PathResolver#hasVariable(String)} was called before and returned false.
132     * @param extraction The extraction. Can be null if {@link PathResolver#hasVariable(String)} was called before and returned false.
133     * @param basePath The base absolute path
134     * @return The absolute resolved paths mapped with their matching contents.
135     * <br>If the returned map contains only one path with a null list, it means that all contents match for that given single path.
136     * @throws IllegalArgumentException If the path contains variables that are not in the extracted contents' model
137     */
138    public Map<Path, List<Content>> resolvePath(String path, List<Content> contents, Extraction extraction, Path basePath) throws IllegalArgumentException
139    {
140        PathWrapper unresolvedPath = _splitPathElements(path);
141        Collection<ContentType> contentTypes = _getFirstLevelContentTypes(extraction);
142        
143        Set<String> variableNames = new HashSet<>();
144        for (String element : unresolvedPath.getElements())
145        {
146            _fillVariableNames(contentTypes, element, variableNames);
147        }
148        
149        if (variableNames.isEmpty())
150        {
151            return Collections.singletonMap(_toPath(_validPath(unresolvedPath), basePath), null);
152        }
153        
154        Map<Content, Set<PathWrapper>> pathByContent = _pathByContent(unresolvedPath, Optional.ofNullable(contents).orElse(Collections.emptyList()));
155        Map<PathWrapper, List<Content>> contentsByPath = _contentsByPath(pathByContent);
156        
157        return contentsByPath.entrySet()
158                .stream()
159                .collect(Collectors.toMap(
160                    e -> _toPath(e.getKey(), basePath), 
161                    e -> e.getValue()
162                ));
163    }
164    
165    private PathWrapper _validPath(PathWrapper pathWithNoVar)
166    {
167        List<String> pathElements = pathWithNoVar.getElements();
168        if (pathElements.size() == 1 && "".equals(pathElements.get(0)))
169        {
170            return pathWithNoVar;
171        }
172        return new PathWrapper(
173                pathElements.stream()
174                    .map(this::_validPathElementName)
175                    .collect(Collectors.toList()));
176    }
177    
178    private Path _toPath(PathWrapper resolvedPath, Path basePath)
179    {
180        List<String> elements = resolvedPath.getElements();
181        return Paths.get(basePath.toString(), elements.toArray(new String[elements.size()]));
182    }
183    
184    /*
185     * In:
186     *      "foo/a_${meta1/meta2/meta3}_m_${meta4}_z/bar\qux/${meta5}"
187     * Out:
188     *      ["foo", "a_${meta1/meta2/meta3}_m_${meta4}_z", "bar", "qux", "${meta5}"]
189     */
190    private PathWrapper _splitPathElements(String path)
191    {
192        List<String> res = new ArrayList<>();
193        boolean previousCharWasDollar = false;
194        boolean inVariable = false;
195        int start = 0;
196        int end = 0;
197        
198        for (int i = 0; i < path.length(); i++)
199        {
200            char currentChar = path.charAt(i);
201            if (!inVariable && __PATH_SEPARATORS.contains(currentChar))
202            {
203                end = i;
204                res.add(path.substring(start, end));
205                start = i + 1;
206            }
207            else if (!inVariable && currentChar == '$')
208            {
209                previousCharWasDollar = true;
210            }
211            else if (!inVariable && previousCharWasDollar && currentChar == '{')
212            {
213                inVariable = true;
214            }
215            else if (inVariable && currentChar == '}')
216            {
217                inVariable = false;
218            }
219            
220            if (currentChar != '$')
221            {
222                previousCharWasDollar = false;
223            }
224        }
225        
226        // End of string
227        res.add(path.substring(start, path.length()));
228        
229        return new PathWrapper(res);
230    }
231    
232    /*
233     * In:
234     *      "a_${meta1/meta2/meta3}_m_${meta4}_z"
235     * Will fill variableNames with:
236     *      ["meta1/meta2/meta3", "meta4"]
237     */
238    private void _fillVariableNames(Collection<ContentType> contentTypes, String element, Set<String> variableNames) throws IllegalArgumentException
239    {
240        Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(element);
241        while (m.find())
242        {
243            String variableName = m.group(1);
244            if (ModelHelper.hasModelItem(variableName, contentTypes) && ModelHelper.getModelItem(variableName, contentTypes) instanceof ElementDefinition)
245            {
246                variableNames.add(variableName);
247            }
248            else
249            {
250                throw new IllegalArgumentException("The variable named '" + variableName + "' can not be used in the extraction result path. It is not an attribute of the defined content types");
251            }
252        }
253    }
254    
255    private Collection<ContentType> _getFirstLevelContentTypes(Extraction extraction)
256    {
257        return extraction.getExtractionComponents().stream()
258                .map(this::_getContentTypeIds)
259                .flatMap(Collection::stream)
260                .map(_contentTypeExtensionPoint::getExtension)
261                .collect(Collectors.toList());
262    }
263    
264    private Collection<String> _getContentTypeIds(ExtractionComponent component)
265    {
266        if (component instanceof AbstractSolrExtractionComponent)
267        {
268            String queryReferenceId = ((AbstractSolrExtractionComponent) component).getQueryReferenceId();
269            if (StringUtils.isNotEmpty(queryReferenceId))
270            {
271                return _editExtractionNodeManager.getSavedQueryContentTypes(queryReferenceId);
272            }
273        }
274        return component.getContentTypes();
275    }
276    
277    /*
278     * Out:
279     *      A map with the resolved relative paths for each content
280     */
281    private Map<Content, Set<PathWrapper>> _pathByContent(PathWrapper unresolvedPath, List<Content> contents)
282    {
283        Map<Content, Set<PathWrapper>> pathByContent = new HashMap<>();
284        for (Content content : contents)
285        {
286            List<Set<String>> pathElements = _resolvePath(unresolvedPath, content);
287            Set<PathWrapper> allPaths = _getAllPaths(pathElements);
288            pathByContent.put(content, allPaths);
289        }
290        return pathByContent;
291    }
292    
293    /*
294     * Out:
295     *      The (resolved) relative paths (as a list of possible elements in a set) for the given content
296     */
297    private List<Set<String>> _resolvePath(PathWrapper unresolvedPath, Content content)
298    {
299        List<Set<String>> resolvedPathElements = new ArrayList<>();
300        for (String element : unresolvedPath.getElements())
301        {
302            Set<String> resolvedElements = _resolvePathElement(element, content);
303            resolvedPathElements.add(_validPathElementNames(resolvedElements));
304        }
305        
306        return resolvedPathElements;
307    }
308    
309    /*
310     * Out:
311     *      The (resolved) possible path elements (i.e. folder names) for the given values (i.e. variables resolved for a given content)
312     *      It is a set as variables can be multivalued
313     */
314    private Set<String> _resolvePathElement(String unresolvedElement, Content content)
315    {
316        Map<String, Set<String>> replacements = new HashMap<>();
317        Matcher m = __VARIABLE_REGEXP_PATTERN.matcher(unresolvedElement);
318        while (m.find())
319        {
320            String variableName = m.group(1);
321            ElementType type = content.getType(variableName);
322            Object variableValue = content.getValue(variableName, true);
323            Set<String> strValues = _getStringValues(type, variableValue);
324            replacements.put("${" + variableName + "}", strValues);
325        }
326        
327        Set<String> pathElements = Collections.singleton(unresolvedElement);
328        for (String toReplace : replacements.keySet())
329        {
330            pathElements = _replace(toReplace, replacements.get(toReplace), pathElements);
331        }
332        return pathElements;
333    }
334    
335    @SuppressWarnings("unchecked")
336    private Set<String> _getStringValues(ElementType type, Object value)
337    {
338        Stream<Object> values = Stream.empty();
339        if (type.getManagedClassArray().isInstance(value))
340        {
341            values = Arrays.stream((Object[]) value);
342        }
343        else
344        {
345            values = Collections.singleton(value).stream();
346        }
347        
348        Set<String> strValues = values.filter(Objects::nonNull)
349                                      .map(type::toString)
350                                      .collect(Collectors.toSet());
351        
352        if (strValues.isEmpty())
353        {
354            strValues = Collections.singleton(__NO_VALUE_OR_BLANK_FOLDER_NAME);
355        }
356        return strValues;
357    }
358    
359    /*
360     * In:
361     *      toReplace="${metaB}"
362     *      replaceBy={ "b1", "b2" }
363     *      uncompleteElements={ "a1_${metaB}_${metaC}", "a2_${metaB}_${metaC}" }
364     * Out:
365     *      { "a1_b1_${metaC}", "a2_b1_${metaC}", "a1_b2_${metaC}", "a2_b2_${metaC}" }
366     */
367    private Set<String> _replace(String toReplace, Set<String> replaceBy, Set<String> uncompleteElements)
368    {
369        Set<String> newPossibleElements = new HashSet<>();
370        for (String singleReplaceBy : replaceBy)
371        {
372            for (String uncompleteElement : uncompleteElements)
373            {
374                newPossibleElements.add(uncompleteElement.replace(toReplace, singleReplaceBy));
375            }
376        }
377        return newPossibleElements;
378    }
379    
380    private Set<String> _validPathElementNames(Set<String> elements)
381    {
382        return elements.stream()
383                .map(this::_validPathElementName)
384                .collect(Collectors.toSet());
385    }
386    
387    /*
388     * Out:
389     *      The tranformed path element name to have a valid folder name
390     */
391    private String _validPathElementName(String element)
392    {
393        return StringUtils.isBlank(element) ? __NO_VALUE_OR_BLANK_FOLDER_NAME : FilenameUtils.filterName(element);
394    }
395    
396    /*
397     * In:
398     *      [{a1, a2}, {b}, {c1, c2}]
399     * Out:
400     *      {[a1, b c1], [a1, b, c2], [a2, b, c1], [a2, b, c2]}
401     *      representing {a1/b/c1, a1/b/c2, a2/b/c1, a2/b/c2}
402     */
403    private Set<PathWrapper> _getAllPaths(List<Set<String>> pathElements)
404    {
405        Set<PathWrapper> allPaths = new HashSet<>();
406        allPaths.add(null); // root
407        for (Set<String> possibleElements : pathElements)
408        {
409            allPaths = _getAllPathsInCurrentLevel(possibleElements, allPaths);
410        }
411        return allPaths;
412    }
413    
414    private Set<PathWrapper> _getAllPathsInCurrentLevel(Set<String> possibleElementsInCurrentLevel, Set<PathWrapper> computedPathsInPreviousLevel)
415    {
416        Set<PathWrapper> paths = new HashSet<>();
417        for (PathWrapper computedPathInPreviousLevel : computedPathsInPreviousLevel)
418        {
419            for (String possibleElement : possibleElementsInCurrentLevel)
420            {
421                List<String> pathInCurrentLevel;
422                if (computedPathInPreviousLevel == null) // root case
423                {
424                    pathInCurrentLevel = new ArrayList<>();
425                }
426                else
427                {
428                    pathInCurrentLevel = new ArrayList<>(computedPathInPreviousLevel.getElements());
429                }
430                pathInCurrentLevel.add(possibleElement);
431                paths.add(new PathWrapper(pathInCurrentLevel));
432            }
433        }
434        return paths;
435    }
436    
437    /*
438     * In:
439     *      A map with the resolved relative paths for each content (the different possible paths are within a set)
440     * Out:
441     *      The 'inverted' map, i.e. a map with the list of contents for each path
442     */
443    private Map<PathWrapper, List<Content>> _contentsByPath(Map<Content, Set<PathWrapper>> pathByContent)
444    {
445        Map<PathWrapper, List<Content>> contentsByPath = new HashMap<>();
446        for (Content content : pathByContent.keySet())
447        {
448            Set<PathWrapper> paths = pathByContent.get(content);
449            for (PathWrapper path : paths)
450            {
451                List<Content> contentsForPath;
452                if (contentsByPath.containsKey(path))
453                {
454                    contentsForPath = contentsByPath.get(path);
455                }
456                else
457                {
458                    contentsForPath = new ArrayList<>();
459                    contentsByPath.put(path, contentsForPath);
460                }
461                contentsForPath.add(content);
462            }
463        }
464        return contentsByPath;
465    }
466    
467    // Just for readability of the code (PathWrapper in method signatures is better than List<String>)
468    private static final class PathWrapper
469    {
470        private List<String> _pathElements;
471
472        PathWrapper(List<String> pathElements)
473        {
474            _pathElements = pathElements;
475        }
476        
477        List<String> getElements()
478        {
479            return _pathElements;
480        }
481
482        @Override
483        public int hashCode()
484        {
485            final int prime = 31;
486            int result = 1;
487            result = prime * result + ((_pathElements == null) ? 0 : _pathElements.hashCode());
488            return result;
489        }
490
491        @Override
492        public boolean equals(Object obj)
493        {
494            if (this == obj)
495            {
496                return true;
497            }
498            if (obj == null)
499            {
500                return false;
501            }
502            if (!(obj instanceof PathWrapper))
503            {
504                return false;
505            }
506            PathWrapper other = (PathWrapper) obj;
507            if (_pathElements == null)
508            {
509                if (other._pathElements != null)
510                {
511                    return false;
512                }
513            }
514            else if (!_pathElements.equals(other._pathElements))
515            {
516                return false;
517            }
518            return true;
519        }
520        
521        @Override
522        public String toString()
523        {
524            return _pathElements.toString();
525        }
526    }
527}