001/*
002 *  Copyright 2017 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.extraction.execution;
017
018import java.io.File;
019import java.util.ArrayList;
020import java.util.List;
021import java.util.Optional;
022
023import org.apache.avalon.framework.component.Component;
024import org.apache.avalon.framework.configuration.Configuration;
025import org.apache.avalon.framework.configuration.ConfigurationException;
026import org.apache.avalon.framework.configuration.DefaultConfigurationBuilder;
027import org.apache.avalon.framework.context.Context;
028import org.apache.avalon.framework.context.ContextException;
029import org.apache.avalon.framework.context.Contextualizable;
030import org.apache.avalon.framework.logger.AbstractLogEnabled;
031import org.apache.avalon.framework.service.ServiceException;
032import org.apache.avalon.framework.service.ServiceManager;
033import org.apache.avalon.framework.service.Serviceable;
034import org.apache.cocoon.components.LifecycleHelper;
035import org.apache.commons.lang3.StringUtils;
036
037import org.ametys.cms.contenttype.ContentTypeExtensionPoint;
038import org.ametys.plugins.core.user.UserHelper;
039import org.ametys.plugins.extraction.ExtractionConstants;
040import org.ametys.plugins.extraction.component.CountExtractionComponent;
041import org.ametys.plugins.extraction.component.ExtractionComponent;
042import org.ametys.plugins.extraction.component.MappingQueryExtractionComponent;
043import org.ametys.plugins.extraction.component.QueryExtractionComponent;
044import org.ametys.plugins.extraction.component.ThesaurusExtractionComponent;
045import org.ametys.plugins.extraction.execution.Extraction.ClausesVariable;
046import org.ametys.plugins.extraction.execution.Extraction.ClausesVariableType;
047import org.ametys.plugins.repository.AmetysObjectResolver;
048import org.ametys.plugins.repository.AmetysRepositoryException;
049
050/**
051 * This class reads the extraction definition file
052 */
053public class ExtractionDefinitionReader extends AbstractLogEnabled implements Component, Contextualizable, Serviceable
054{
055    /** The component role. */
056    public static final String ROLE = ExtractionDefinitionReader.class.getName();
057    
058    private Context _context;
059    private ServiceManager _serviceManager;
060    
061    private UserHelper _userHelper;
062    private AmetysObjectResolver _ametysResolver;
063    private ContentTypeExtensionPoint _contentTypeExtensionPoint;
064    
065    public void contextualize(Context context) throws ContextException
066    {
067        _context = context;
068    }
069    
070    public void service(ServiceManager manager) throws ServiceException
071    {
072        _serviceManager = manager;
073        
074        _userHelper = (UserHelper) manager.lookup(UserHelper.ROLE);
075        _ametysResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE);
076        _contentTypeExtensionPoint = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE);
077    }
078    
079    /**
080     * Read the extraction definition file
081     * @param file extraction definition file
082     * @return the extraction components parsed in the definition file
083     * @throws Exception if an error occurs
084     */
085    public Extraction readExtractionDefinitionFile(File file) throws Exception
086    {
087        long startTime = -1;
088        if (getLogger().isDebugEnabled())
089        {
090            startTime = System.currentTimeMillis();
091            getLogger().debug("Reading definition file");
092        }
093        
094        assert file != null;
095        Configuration configuration = new DefaultConfigurationBuilder().buildFromFile(file);
096        
097        Extraction extraction = new Extraction(file.getName());
098        
099        _readExtractionDescription(configuration, extraction);
100        _readExtractionAuthor(configuration, extraction);
101        _readVariablesDefinition(configuration, extraction);
102        _readExtractionDefinitionFile(configuration, extraction);
103        
104        if (getLogger().isDebugEnabled())
105        {
106            long endTime = System.currentTimeMillis();
107            getLogger().debug("Read definition file in " + (endTime - startTime) + "ms");
108        }
109        
110        return extraction;
111    }
112    
113    /**
114     * Read the extraction definition file
115     * @param file extraction definition file
116     * @return the extraction components parsed in the definition file
117     * @throws Exception if an error occurs
118     */
119    public Extraction readVariablesDefinitionsInExtractionDefinitionFile(File file) throws Exception
120    {
121        assert file != null;
122        
123        Configuration configuration = new DefaultConfigurationBuilder().buildFromFile(file);
124        Extraction extraction = new Extraction(file.getName());
125        _readVariablesDefinition(configuration, extraction);
126        
127        return extraction;
128    }
129    
130    private void _readExtractionDescription(Configuration configuration, Extraction extraction)
131    {
132        String descriptionId = configuration.getChild(ExtractionConstants.DESCRIPTION_TAG, true)
133                .getAttribute(ExtractionConstants.DESCRIPTION_IDENTIFIER_ATTRIBUTE_NAME, null);
134        
135        if (StringUtils.isNotBlank(descriptionId))
136        {
137            try
138            {
139                _ametysResolver.resolveById(descriptionId);
140                extraction.setDescriptionId(descriptionId);
141            }
142            catch (AmetysRepositoryException e)
143            {
144                if (getLogger().isWarnEnabled())
145                {
146                    getLogger().warn("Invalid extraction description " + descriptionId + " for configuration " + configuration.getLocation(), e);
147                }
148            }
149        }
150    }
151    
152    private void _readExtractionAuthor(Configuration configuration, Extraction extraction) throws ConfigurationException
153    {
154        Configuration author = configuration.getChild(ExtractionConstants.AUTHOR_TAG, false);
155        if (author != null)
156        {
157            extraction.setAuthor(_userHelper.xml2userIdentity(author));
158        }
159    }
160    
161    private void _readVariablesDefinition(Configuration configuration, Extraction extraction) throws ConfigurationException
162    {
163        for (Configuration child : configuration.getChildren())
164        {
165            switch (child.getName())
166            {
167                case ExtractionConstants.OPTIONAL_COLUMNS_TAG:
168                    extraction.setDisplayOptionalColumnsNames(getDisplayOptionalColumnNames(child));
169                    break;
170                case ExtractionConstants.CLAUSES_VARIABLES_TAG:
171                    extraction.setClausesVariables(getClausesVariables(child));
172                    break;
173                default:
174                    // Do nothing, we only check variables definitions
175            }
176        }
177    }
178    
179    private void _readExtractionDefinitionFile(Configuration configuration, Extraction extraction) throws Exception
180    {
181        for (Configuration child : configuration.getChildren())
182        {
183            switch (child.getName())
184            {
185                case ExtractionConstants.QUERY_COMPONENT_TAG:
186                case ExtractionConstants.THESAURUS_COMPONENT_TAG:
187                case ExtractionConstants.COUNT_COMPONENT_TAG:
188                case ExtractionConstants.MAPPING_QUERY_COMPONENT_TAG:
189                    ExtractionComponent component = _processExtractionComponent(child);
190                    extraction.addExtractionComponent(component);
191                    break;
192                default:
193                    // Do nothing
194            }
195        }
196    }
197
198    private List<String> getDisplayOptionalColumnNames(Configuration configuration) throws ConfigurationException
199    {
200        List<String> names = new ArrayList<>();
201        for (Configuration nameConfiguration : configuration.getChildren("name"))
202        {
203            names.add(nameConfiguration.getValue());
204        }
205        return names;
206    }
207    
208    private List<ClausesVariable> getClausesVariables(Configuration configuration) throws ConfigurationException
209    {
210        List<ClausesVariable> variables = new ArrayList<>();
211        
212        for (Configuration variableConfiguration : configuration.getChildren("variable"))
213        {
214            // Name
215            String name = variableConfiguration.getAttribute("name", null);
216            if (null == name)
217            {
218                throw new ConfigurationException("A clauses variable is not well defined, name is mandatory.");
219            }
220            
221            // Type
222            String typeAsString = variableConfiguration.getAttribute("type", null);
223            // For legacy purpose, if no type is defined, type is the SELECT_CONTENTS one
224            ClausesVariableType type = typeAsString != null ? ClausesVariableType.fromStringValue(typeAsString) : ClausesVariableType.SELECT_CONTENTS;
225            
226            // Content type identifiers
227            List<String> contentTypeIds = _getClausesVariableContentTypeIds(variableConfiguration, type);
228            
229            // Solr request
230            Optional<String> solrRequest = _getClausesVariableSolrRequest(variableConfiguration);
231            
232            variables.add(new ClausesVariable(name, type, contentTypeIds, solrRequest));
233        }
234        
235        return variables;
236    }
237    
238    private List<String> _getClausesVariableContentTypeIds(Configuration variableConfiguration, ClausesVariableType clausesVariableType) throws ConfigurationException
239    {
240        List<String> contentTypeIds = new ArrayList<>();
241        
242        // Look for legacy content type
243        Optional.ofNullable(variableConfiguration.getAttribute("contentType", null))
244                .ifPresent(contentTypeId -> contentTypeIds.add(contentTypeId));
245        
246        // If there is no legacy content type, get the content types' list
247        if (contentTypeIds.isEmpty())
248        {
249            Configuration contentTypesConf = variableConfiguration.getChild("content-types");
250            for (Configuration contentTypeConf : contentTypesConf.getChildren("content-type"))
251            {
252                String id = contentTypeConf.getAttribute("id");
253                contentTypeIds.add(id);
254            }
255        }
256        
257        // Check that there is max one content type for SELECT_CONTENTS variable
258        if (ClausesVariableType.SELECT_CONTENTS.equals(clausesVariableType) && contentTypeIds.size() > 1)
259        {
260            throw new ConfigurationException("Only one content type id can be provided for variables of type selectContents", variableConfiguration);
261        }
262        
263        // Check that all referenced content types are existing 
264        for (String contentTypeId : contentTypeIds)
265        {
266            if (!_contentTypeExtensionPoint.hasExtension(contentTypeId))
267            {
268                throw new ConfigurationException("Only one content type id can be provided for variables of type selectContents", variableConfiguration);
269            }
270        }
271        
272        return contentTypeIds;
273    }
274    
275    private Optional<String> _getClausesVariableSolrRequest(Configuration variableConfiguration)
276    {
277        String solrRequest = variableConfiguration.getChild("solr-request").getValue(null);
278
279        return Optional.ofNullable(solrRequest)
280                       .filter(StringUtils::isNotEmpty);
281    }
282
283    private ExtractionComponent _processExtractionComponent(Configuration componentConfiguration) throws Exception
284    {
285        ExtractionComponent component = null;
286        switch (componentConfiguration.getName())
287        {
288            case "query":
289                component = new QueryExtractionComponent();
290                break;
291            case "count":
292                component = new CountExtractionComponent();
293                break;
294            case "thesaurus":
295                component = new ThesaurusExtractionComponent();
296                break;
297            case "mapping-query":
298                component = new MappingQueryExtractionComponent();
299                break;
300            default:
301                // do nothing
302                break;
303        }
304
305        if (component != null)
306        {
307            LifecycleHelper.setupComponent(component, getLogger(), _context, _serviceManager, componentConfiguration);
308            for (Configuration child : componentConfiguration.getChildren())
309            {
310                ExtractionComponent subComponent = _processExtractionComponent(child);
311                if (null != subComponent)
312                {
313                    component.addSubComponent(subComponent);
314                }
315            }
316        }
317        return component;
318    }
319}