001/*
002 *  Copyright 2017 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.extraction.component;
017
018import java.util.ArrayList;
019import java.util.Arrays;
020import java.util.Collection;
021import java.util.Collections;
022import java.util.HashSet;
023import java.util.LinkedHashMap;
024import java.util.LinkedHashSet;
025import java.util.List;
026import java.util.Locale;
027import java.util.Map;
028import java.util.Set;
029import java.util.regex.Matcher;
030import java.util.regex.Pattern;
031
032import org.apache.avalon.framework.configuration.Configuration;
033import org.apache.avalon.framework.configuration.ConfigurationException;
034import org.apache.avalon.framework.service.ServiceException;
035import org.apache.avalon.framework.service.ServiceManager;
036import org.xml.sax.ContentHandler;
037
038import org.ametys.cms.content.ContentHelper;
039import org.ametys.cms.contenttype.ContentConstants;
040import org.ametys.cms.contenttype.ContentType;
041import org.ametys.cms.contenttype.MetadataDefinition;
042import org.ametys.cms.contenttype.MetadataType;
043import org.ametys.cms.repository.Content;
044import org.ametys.cms.search.GetQueryFromJSONHelper;
045import org.ametys.cms.search.content.ContentSearcherFactory;
046import org.ametys.cms.search.content.ContentSearcherFactory.SimpleContentSearcher;
047import org.ametys.cms.search.model.SystemProperty;
048import org.ametys.cms.search.model.SystemPropertyExtensionPoint;
049import org.ametys.cms.search.query.QuerySyntaxException;
050import org.ametys.cms.search.query.StringQuery;
051import org.ametys.cms.search.query.Query.Operator;
052import org.ametys.cms.search.ui.model.SearchUIModel;
053import org.ametys.core.util.JSONUtils;
054import org.ametys.core.util.StringUtils;
055import org.ametys.plugins.extraction.execution.ExtractionExecutionContext;
056import org.ametys.plugins.extraction.execution.ExtractionExecutionContextHierarchyElement;
057import org.ametys.plugins.queriesdirectory.Query;
058import org.ametys.plugins.repository.AmetysObjectIterable;
059import org.ametys.plugins.repository.AmetysObjectResolver;
060import org.ametys.plugins.thesaurus.content.ThesaurusItemContentType;
061
062/**
063 * This class represents an extraction component with a solr query
064 */
065public abstract class AbstractSolrExtractionComponent extends AbstractExtractionComponent
066{
067    /**
068     * Regex used to extract variables from a join expression: \$\{(\.\.(?:\/\.\.)*(?:\/[^\/}]+)?)\}
069     * a variable is inside a ${}
070     * variable starts with .. (to get the direct parent),
071     * has several /.. (to get parent of parent of (...))
072     * and can have a /metadataName (to specify the metadata to join on)
073     */
074    private static final String EXTRACT_JOIN_VARIABLES_REGEX = "\\$\\{(\\.\\.(?:\\/\\.\\.)*(?:\\/[^\\/}]+)?)\\}";
075    
076    /** Content types concerned by the solr search */
077    protected Set<String> _contentTypes = new HashSet<>();
078    
079    /** Reference id of a recorded query */
080    protected String _queryReferenceId;
081    
082    /** The list of clauses */
083    protected List<ExtractionClause> _clauses = new ArrayList<>();
084    
085    /** Helper to resolve referenced query infos */
086    protected GetQueryFromJSONHelper _getQueryFromJSONHelper;
087    
088    /** Util class to manipulate JSON String */
089    protected JSONUtils _jsonUtils;
090    
091    private AmetysObjectResolver _resolver;
092    private SystemPropertyExtensionPoint _systemPropertyExtensionPoint;
093    private ContentHelper _contentHelper;
094    private ContentSearcherFactory _contentSearcherFactory;
095    
096    @Override
097    public void service(ServiceManager serviceManager) throws ServiceException
098    {
099        super.service(serviceManager);
100        _jsonUtils = (JSONUtils) serviceManager.lookup(JSONUtils.ROLE);
101        _getQueryFromJSONHelper = (GetQueryFromJSONHelper) serviceManager.lookup(GetQueryFromJSONHelper.ROLE);
102        _resolver = (AmetysObjectResolver) serviceManager.lookup(AmetysObjectResolver.ROLE);
103        _systemPropertyExtensionPoint = (SystemPropertyExtensionPoint) serviceManager.lookup(SystemPropertyExtensionPoint.ROLE);
104        _contentHelper = (ContentHelper) serviceManager.lookup(ContentHelper.ROLE);
105        _contentSearcherFactory = (ContentSearcherFactory) serviceManager.lookup(ContentSearcherFactory.ROLE);
106    }
107
108    @Override
109    public void configure(Configuration configuration) throws ConfigurationException
110    {
111        super.configure(configuration);
112        
113        Configuration clauses = configuration.getChild("clauses");
114        for (Configuration clause : clauses.getChildren("clause"))
115        {
116            addClauses(clause.getValue());
117        }
118
119        _contentTypes = new HashSet<>();
120        if (Arrays.asList(configuration.getAttributeNames()).contains("ref"))
121        {
122            if (Arrays.asList(configuration.getAttributeNames()).contains("contentTypes"))
123            {
124                throw new IllegalArgumentException(getLogsPrefix() + "a component with a query reference should not specify a content type");
125            }
126            
127            _queryReferenceId = configuration.getAttribute("ref");
128        }
129        else
130        {
131            String contentTypesString = configuration.getAttribute("contentTypes");
132            _contentTypes.addAll(StringUtils.stringToCollection(contentTypesString));
133        }
134    }
135    
136    @Override
137    public void prepareComponentExecution(ExtractionExecutionContext context) throws Exception
138    {
139        super.prepareComponentExecution(context);
140        
141        if (_queryReferenceId != null && !_queryReferenceId.isEmpty())
142        {
143            Query referencedQuery = _resolver.resolveById(_queryReferenceId);
144            computeReferencedQueryInfos(referencedQuery.getContent());
145        }
146        
147        _computeClausesInfos(context);
148    }
149
150    /**
151     * Manages the stored query referenced by the component
152     * @param refQueryContent referenced query content
153     * @throws QuerySyntaxException if there is a syntax error in the referenced query
154     */
155    @SuppressWarnings("unchecked")
156    protected void computeReferencedQueryInfos(String refQueryContent) throws QuerySyntaxException
157    {
158        Map<String, Object> contentMap = _jsonUtils.convertJsonToMap(refQueryContent);
159        Map<String, Object> exportParams = (Map<String, Object>) contentMap.get("exportParams");
160        String modelId = (String) exportParams.get("model");
161        
162        String q;
163        if (modelId.contains("solr"))
164        {
165            Map<String, Object> values = (Map<String, Object>) exportParams.get("values");
166            q = (String) values.get("query");
167            
168            _contentTypes = new HashSet<>((List<String>) values.get("contentTypes"));
169        }
170        else
171        {
172            SearchUIModel model = _getQueryFromJSONHelper.getSearchUIModel(exportParams);
173            List<String> contentTypesToFill = new ArrayList<>();
174            org.ametys.cms.search.query.Query query = _getQueryFromJSONHelper.getQueryFromModel(model, exportParams, contentTypesToFill);
175            
176            q =  query.build();
177            _contentTypes = new HashSet<>(contentTypesToFill);
178        }
179        
180        ExtractionClause clause = new ExtractionClause();
181        clause.setExpression(q);
182        _clauses.add(0, clause);
183    }
184
185    private void _computeClausesInfos(ExtractionExecutionContext context)
186    {
187        for (ExtractionClause clause : _clauses)
188        {
189            String clauseExpression = clause.getExpression();
190            clause.setExpression(_resolveExpression(clauseExpression, context.getClauseVariables()));
191            
192            Map<String, String> groupExpressions = _extractGroupExpressionsFromClause(clauseExpression);
193            if (!groupExpressions.isEmpty())
194            {
195                if (_hasVariablesOutsideGroups(clauseExpression, groupExpressions.keySet()))
196                {
197                    throw new IllegalArgumentException(getLogsPrefix() + "if there's at least one group, every variable should be in a group.");
198                }
199            }
200            else
201            {
202                // The only group is the entire expression
203                // The complete expression is the same as the classic one (there is no characters used to identify the group)
204                groupExpressions.put(clauseExpression, clauseExpression);
205            }
206            
207            for (Map.Entry<String, String> groupExpression : groupExpressions.entrySet())
208            {
209                ExtractionClauseGroup group = new ExtractionClauseGroup();
210                
211                group.setCompleteExpression(groupExpression.getKey());
212                group.setExpression(groupExpression.getValue());
213                
214                Set<String> variables = new HashSet<>(_extractVariableFromClauseExpression(groupExpression.getValue()));
215                if (!variables.isEmpty())
216                {
217                    if (variables.size() > 1)
218                    {
219                        throw new IllegalArgumentException(getLogsPrefix() + "only variables with same name are allowed within a single group");
220                    }
221                    
222                    for (String variable : variables)
223                    {
224                        String[] pathSegments = variable.split(JOIN_HIERARCHY_SEPARATOR);
225                        String fieldPath = pathSegments[pathSegments.length - 1];
226    
227                        group.setVariable(variable);
228                        group.setFieldPath(fieldPath);
229                    }
230                }
231                
232                clause.addGroup(group);
233            }
234        }
235    }
236    
237    private String _resolveExpression(String expression, Map<String, String> queryVariables)
238    {
239        String resolvedExpression = expression;
240        for (Map.Entry<String, String> entry : queryVariables.entrySet())
241        {
242            String variableName = entry.getKey();
243            String contentId = entry.getValue();
244            String escapedContentId = StringQuery.escapeStringValue(contentId, Operator.EQ);
245            resolvedExpression = resolvedExpression.replace("${" + variableName + "}", escapedContentId);
246        }
247        
248        return resolvedExpression;
249    }
250    
251    private boolean _hasVariablesOutsideGroups(String clauseExpression, Collection<String> groupExpressions)
252    {
253        List<String> variablesInClause = _extractVariableFromClauseExpression(clauseExpression);
254        List<String> variablesInGroups = new ArrayList<>();
255        for (String groupExpression : groupExpressions)
256        {
257            variablesInGroups.addAll(_extractVariableFromClauseExpression(groupExpression));
258        }
259        return variablesInClause.size() > variablesInGroups.size();
260    }
261
262    Map<String, String> _extractGroupExpressionsFromClause(String expression)
263    {
264        Map<String, String> groupExpressions = new LinkedHashMap<>();
265        int indexOfGroup = expression.indexOf("#{");
266        while (indexOfGroup != -1)
267        {
268            StringBuilder currentGroup = new StringBuilder();
269            int endIndex = indexOfGroup;
270            int braceLevel = 0;
271            for (int i = indexOfGroup + 2; i < expression.length(); i++)
272            {
273                endIndex = i;
274                char currentChar = expression.charAt(i);
275                if ('{' == currentChar)
276                {
277                    braceLevel++;
278                }
279                else if ('}' == currentChar)
280                {
281                    if (0  == braceLevel)
282                    {
283                        groupExpressions.put("#{" + currentGroup.toString() + "}", currentGroup.toString());
284                        break;
285                    }
286                    braceLevel--;
287                }
288                currentGroup.append(currentChar);
289            }
290            
291            indexOfGroup = expression.indexOf("#{", endIndex);
292        }
293        return groupExpressions;
294    }
295
296    List<String> _extractVariableFromClauseExpression(String expression)
297    {
298        List<String> variables = new ArrayList<>();
299        
300        Pattern pattern = Pattern.compile(EXTRACT_JOIN_VARIABLES_REGEX);
301        Matcher matcher = pattern.matcher(expression);
302        
303        while (matcher.find())
304        {
305            variables.add(matcher.group(1));
306        }
307        
308        return variables;
309    }
310
311    @Override
312    public void executeComponent(ContentHandler contentHandler, ExtractionExecutionContext context) throws Exception
313    {
314        List<String> clauseQueries = _getClauseQueries(context);
315        
316        if (clauseQueries != null)
317        {
318            AmetysObjectIterable<Content> contents = getContentSearcher().withFilterQueryStrings(clauseQueries).setCheckRights(false).search("*:*");
319            processContents(contents, contentHandler, context);
320        }
321    }
322    
323    List<String> _getClauseQueries(ExtractionExecutionContext context)
324    {
325        List<String> clauseQueries = new ArrayList<>();
326        
327        for (ExtractionClause clause : _clauses)
328        {
329            String expression = clause.getExpression();
330            
331            for (ExtractionClauseGroup group : clause.getGroups())
332            {
333                String variable = group.getVariable();
334                
335                if (variable != null && !variable.isEmpty())
336                {
337                    ExtractionExecutionContextHierarchyElement currentContextHierarchyElement = _getCurrentContextElementFromVariable(variable, context.getHierarchyElements());
338                    
339                    String fieldPath = group.getFieldPath();
340                    
341                    ExtractionComponent contextComponent = currentContextHierarchyElement.getComponent();
342                    MetadataType metadataType = _getMetadataType(fieldPath, contextComponent.getContentTypes());
343                    Collection<Object> values = _getValuesFromVariable(fieldPath, metadataType, currentContextHierarchyElement, context.getDefaultLocale());
344                    
345                    if (values.isEmpty())
346                    {
347                        getLogger().warn(getLogsPrefix() + "no value found for field '" + fieldPath + "'. The query of this component can't be achieved");
348                        return null;
349                    }
350                    
351                    Collection<String> groupExpressions = new ArrayList<>();
352                    for (Object value : values)
353                    {
354                        String valueAsString = _getValueAsString(value, metadataType, fieldPath);
355                        groupExpressions.add(group.getExpression().replace("${" + variable + "}", valueAsString));
356                    }
357                    
358                    String groupReplacement =  org.apache.commons.lang3.StringUtils.join(groupExpressions, " OR ");
359                    expression = expression.replace(group.getCompleteExpression(), "(" + groupReplacement + ")");
360                }
361            }
362            
363            clauseQueries.add(expression);
364        }
365        
366        return clauseQueries;
367    }
368
369    private ExtractionExecutionContextHierarchyElement _getCurrentContextElementFromVariable(String variable, List<ExtractionExecutionContextHierarchyElement> context)
370    {
371        int lastIndexOfSlash = variable.lastIndexOf(JOIN_HIERARCHY_SEPARATOR);
372        int indexOfCurrentContext = -1;
373        if (lastIndexOfSlash == -1)
374        {
375            indexOfCurrentContext = context.size() - 1;
376        }
377        else
378        {
379            int hierarchicalLevel = (lastIndexOfSlash + 1) / 3;
380            indexOfCurrentContext = context.size() - hierarchicalLevel;
381            if (variable.endsWith(JOIN_HIERARCHY_ELEMENT))
382            {
383                indexOfCurrentContext--;
384            }
385        }
386        if (indexOfCurrentContext < 0 || indexOfCurrentContext >= context.size())
387        {
388            throw new IllegalArgumentException(getLogsPrefix() + "join on '" + variable + "' does not refer to an existing parent");
389        }
390        return context.get(indexOfCurrentContext);
391    }
392    
393    /**
394     * Retrieves the field path's metadata type from content types
395     * @param fieldPath the field path
396     * @param contentTypes the content types
397     * @return the metadata type
398     */
399    protected MetadataType _getMetadataType(String fieldPath, Collection<String> contentTypes)
400    {
401        // Manage direct content references
402        if (JOIN_HIERARCHY_ELEMENT.equals(fieldPath))
403        {
404            return MetadataType.CONTENT;
405        }
406        
407        // Manage System Properties
408        String[] pathSegments = fieldPath.split(EXTRACTION_METADATA_PATH_SEPARATOR);
409        String propertyName = pathSegments[pathSegments.length - 1];
410        if (_systemPropertyExtensionPoint.hasExtension(propertyName))
411        {
412            SystemProperty systemProperty = _systemPropertyExtensionPoint.getExtension(propertyName);
413            return systemProperty.getType();
414        }
415        
416        // Get content types common ancestor
417        ContentType contentTypesAncestor = null;
418        String contentTypesAncestorId = _contentTypesHelper.getCommonAncestor(contentTypes);
419        if (contentTypesAncestorId != null && _contentTypeExtensionPoint.hasExtension(contentTypesAncestorId))
420        {
421            contentTypesAncestor = _contentTypeExtensionPoint.getExtension(contentTypesAncestorId);
422        }
423        
424        // Manage metadata
425        if (contentTypesAncestor != null)
426        {
427            String fieldPathWthClassicSeparator = fieldPath.replaceAll(EXTRACTION_METADATA_PATH_SEPARATOR, ContentConstants.METADATA_PATH_SEPARATOR);
428            MetadataDefinition definition = _contentTypesHelper.getMetadataDefinition(fieldPathWthClassicSeparator, contentTypesAncestor);
429            if (definition != null)
430            {
431                return definition.getType();
432            }
433            throw new IllegalArgumentException(getLogsPrefix() + "join on '" + fieldPath + "'. This metadata is not available for '" + contentTypesAncestor.getId() + "' content type");
434        }
435        
436        throw new IllegalArgumentException(getLogsPrefix() + "join on '" + fieldPath + "'. This metadata is not available");
437    }
438
439    private Collection<Object> _getValuesFromVariable(String fieldPath, MetadataType metadataType, ExtractionExecutionContextHierarchyElement contextHierarchyElement, Locale defaultLocale)
440    {
441        Collection<Object> values = new LinkedHashSet<>();
442        
443        Iterable<Content> contents = contextHierarchyElement.getContents();
444        for (Content content: contents)
445        {
446            boolean isAutoposting = contextHierarchyElement.isAutoposting();
447            Collection<Object> contentValues = _getContentValuesFromVariable(content, fieldPath, metadataType, isAutoposting, defaultLocale);
448            values.addAll(contentValues);
449        }
450        
451        return values;
452    }
453    
454    private Collection<Object> _getContentValuesFromVariable(Content content, String fieldPath, MetadataType metadataType, boolean isAutoposting, Locale defaultLocale)
455    {
456        Collection<Object> values = new LinkedHashSet<>();
457        
458        Object value = _getContentValue(content, fieldPath, defaultLocale);
459        if (value == null)
460        {
461            return Collections.emptyList();
462        }
463        
464        if (value instanceof Collection<?>)
465        {
466            values.addAll((Collection<?>) value);
467        }
468        else
469        {
470            values.add(value);
471        }
472        
473        Collection<Object> result = new LinkedHashSet<>(values);
474        
475        if (isAutoposting)
476        {
477            switch (metadataType)
478            {
479                case CONTENT:
480                    for (Object object : values)
481                    {
482                        Content parent = (Content) object;
483                        
484                        // Manage autoposting only if the current value is a thesaurus term
485                        if (_contentTypesHelper.isInstanceOf(parent, ThesaurusItemContentType.TERM_CONTENT_TYPE_ID))
486                        {
487                            AmetysObjectIterable<Content> chidren = _thesaurusDAO.getChildTerms(parent.getId());
488                            for (Content child : chidren)
489                            {
490                                Collection<Object> childValues = _getContentValuesFromVariable(child, JOIN_HIERARCHY_ELEMENT, metadataType, isAutoposting, defaultLocale);
491                                result.addAll(childValues);
492                            }
493                        }
494                    }
495                    
496                    break;
497                default:
498                    break;
499            }
500        }
501
502        return result;
503    }
504    
505    private Object _getContentValue(Content content, String fieldPath, Locale defaultLocale)
506    {
507        if (JOIN_HIERARCHY_ELEMENT.equals(fieldPath))
508        {
509            return content;
510        }
511        else 
512        {
513            String fieldPathWthClassicSeparator = fieldPath.replaceAll(EXTRACTION_METADATA_PATH_SEPARATOR, ContentConstants.METADATA_PATH_SEPARATOR);
514            return _contentHelper.getValue(content, fieldPathWthClassicSeparator, defaultLocale, true);
515        }
516    }
517    
518    private String _getValueAsString(Object value, MetadataType metadataType, String fieldPath)
519    {
520        String valueAsString;
521        switch (metadataType)
522        {
523            case STRING:
524            case LONG:
525            case DOUBLE:
526            case BOOLEAN:
527                valueAsString = value.toString();
528                break;
529            case CONTENT:
530                valueAsString = ((Content) value).getId();
531                break;
532            default:
533                throw new IllegalArgumentException(getLogsPrefix() + "join on '" + fieldPath + "'. Metadata type '" + metadataType + "' is not supported by extraction module");
534        }
535        
536        return StringQuery.escapeStringValue(valueAsString, Operator.EQ);
537    }
538    
539    /**
540     * Retrieves the content searcher to use for solr search
541     * @return the content searcher
542     */
543    protected SimpleContentSearcher getContentSearcher()
544    {
545        return _contentSearcherFactory.create(_contentTypes);
546    }
547
548    /**
549     * Process result contents to format the result document
550     * @param contents search results
551     * @param contentHandler result document
552     * @param context component execution context
553     * @throws Exception if an error occurs
554     */
555    protected abstract void processContents(AmetysObjectIterable<Content> contents, ContentHandler contentHandler, ExtractionExecutionContext context) throws Exception;
556
557    @Override
558    public Map<String, Object> getComponentDetailsForTree()
559    {
560        Map<String, Object> details = super.getComponentDetailsForTree();
561        
562        @SuppressWarnings("unchecked")
563        Map<String, Object> data = (Map<String, Object>) details.get("data");
564        
565        List<String> clauses = new ArrayList<>();
566        for (ExtractionClause clause : this.getClauses())
567        {
568            clauses.add(clause.getExpression());
569        }
570        data.put("clauses", clauses);
571        
572        data.put("useQueryRef", org.apache.commons.lang.StringUtils.isNotEmpty(_queryReferenceId));
573        data.put("contentTypes", this.getContentTypes());
574        data.put("queryReferenceId", this.getQueryReferenceId());
575        
576        return details;
577    }
578    
579    public Set<String> getContentTypes()
580    {
581        return _contentTypes;
582    }
583
584    /**
585     * Add content types to component
586     * @param contentTypes Array of content types to add
587     */
588    public void addContentTypes(String... contentTypes)
589    {
590        _contentTypes.addAll(Arrays.asList(contentTypes));
591    }
592
593    /**
594     * Retrieves the id of the referenced query
595     * @return the id of the referenced query
596     */
597    public String getQueryReferenceId()
598    {
599        return _queryReferenceId;
600    }
601    
602    /**
603     * Sets the id of the referenced query
604     * @param queryReferenceId The id of the referenced query to set
605     */
606    public void setQueryReferenceId(String queryReferenceId)
607    {
608        _queryReferenceId = queryReferenceId;
609    }
610
611    /**
612     * Retrieves the component clauses
613     * @return the component clauses
614     */
615    public List<ExtractionClause> getClauses()
616    {
617        return _clauses;
618    }
619
620    /**
621     * Add clauses to the component. Do not manage clauses' groups
622     * @param expressions Array clauses expressions to add
623     */
624    public void addClauses(String... expressions)
625    {
626        for (String expression : expressions)
627        {
628            ExtractionClause clause = new ExtractionClause();
629            clause.setExpression(expression);
630            _clauses.add(clause);
631        }
632    }
633}