001/* 002 * Copyright 2017 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.extraction.execution; 017 018import java.io.File; 019import java.util.ArrayList; 020import java.util.List; 021import java.util.Optional; 022 023import org.apache.avalon.framework.component.Component; 024import org.apache.avalon.framework.configuration.Configuration; 025import org.apache.avalon.framework.configuration.ConfigurationException; 026import org.apache.avalon.framework.configuration.DefaultConfigurationBuilder; 027import org.apache.avalon.framework.context.Context; 028import org.apache.avalon.framework.context.ContextException; 029import org.apache.avalon.framework.context.Contextualizable; 030import org.apache.avalon.framework.logger.AbstractLogEnabled; 031import org.apache.avalon.framework.service.ServiceException; 032import org.apache.avalon.framework.service.ServiceManager; 033import org.apache.avalon.framework.service.Serviceable; 034import org.apache.cocoon.components.LifecycleHelper; 035import org.apache.commons.lang3.StringUtils; 036 037import org.ametys.cms.contenttype.ContentTypeExtensionPoint; 038import org.ametys.plugins.core.user.UserHelper; 039import org.ametys.plugins.extraction.ExtractionConstants; 040import org.ametys.plugins.extraction.component.CountExtractionComponent; 041import org.ametys.plugins.extraction.component.ExtractionComponent; 042import org.ametys.plugins.extraction.component.MappingQueryExtractionComponent; 043import org.ametys.plugins.extraction.component.QueryExtractionComponent; 044import org.ametys.plugins.extraction.component.ThesaurusExtractionComponent; 045import org.ametys.plugins.extraction.execution.Extraction.ClausesVariable; 046import org.ametys.plugins.extraction.execution.Extraction.ClausesVariableType; 047import org.ametys.plugins.repository.AmetysObjectResolver; 048import org.ametys.plugins.repository.AmetysRepositoryException; 049 050/** 051 * This class reads the extraction definition file 052 */ 053public class ExtractionDefinitionReader extends AbstractLogEnabled implements Component, Contextualizable, Serviceable 054{ 055 /** The component role. */ 056 public static final String ROLE = ExtractionDefinitionReader.class.getName(); 057 058 private Context _context; 059 private ServiceManager _serviceManager; 060 061 private UserHelper _userHelper; 062 private AmetysObjectResolver _ametysResolver; 063 private ContentTypeExtensionPoint _contentTypeExtensionPoint; 064 065 public void contextualize(Context context) throws ContextException 066 { 067 _context = context; 068 } 069 070 public void service(ServiceManager manager) throws ServiceException 071 { 072 _serviceManager = manager; 073 074 _userHelper = (UserHelper) manager.lookup(UserHelper.ROLE); 075 _ametysResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 076 _contentTypeExtensionPoint = (ContentTypeExtensionPoint) manager.lookup(ContentTypeExtensionPoint.ROLE); 077 } 078 079 /** 080 * Read the extraction definition file 081 * @param file extraction definition file 082 * @return the extraction components parsed in the definition file 083 * @throws Exception if an error occurs 084 */ 085 public Extraction readExtractionDefinitionFile(File file) throws Exception 086 { 087 long startTime = -1; 088 if (getLogger().isDebugEnabled()) 089 { 090 startTime = System.currentTimeMillis(); 091 getLogger().debug("Reading definition file"); 092 } 093 094 assert file != null; 095 Configuration configuration = new DefaultConfigurationBuilder().buildFromFile(file); 096 097 Extraction extraction = new Extraction(file.getName()); 098 099 _readExtractionDescription(configuration, extraction); 100 _readExtractionAuthor(configuration, extraction); 101 _readVariablesDefinition(configuration, extraction); 102 _readExtractionDefinitionFile(configuration, extraction); 103 104 if (getLogger().isDebugEnabled()) 105 { 106 long endTime = System.currentTimeMillis(); 107 getLogger().debug("Read definition file in " + (endTime - startTime) + "ms"); 108 } 109 110 return extraction; 111 } 112 113 /** 114 * Read the extraction definition file 115 * @param file extraction definition file 116 * @return the extraction components parsed in the definition file 117 * @throws Exception if an error occurs 118 */ 119 public Extraction readVariablesDefinitionsInExtractionDefinitionFile(File file) throws Exception 120 { 121 assert file != null; 122 123 Configuration configuration = new DefaultConfigurationBuilder().buildFromFile(file); 124 Extraction extraction = new Extraction(file.getName()); 125 _readVariablesDefinition(configuration, extraction); 126 127 return extraction; 128 } 129 130 private void _readExtractionDescription(Configuration configuration, Extraction extraction) 131 { 132 String descriptionId = configuration.getChild(ExtractionConstants.DESCRIPTION_TAG, true) 133 .getAttribute(ExtractionConstants.DESCRIPTION_IDENTIFIER_ATTRIBUTE_NAME, null); 134 135 if (StringUtils.isNotBlank(descriptionId)) 136 { 137 try 138 { 139 _ametysResolver.resolveById(descriptionId); 140 extraction.setDescriptionId(descriptionId); 141 } 142 catch (AmetysRepositoryException e) 143 { 144 if (getLogger().isWarnEnabled()) 145 { 146 getLogger().warn("Invalid extraction description " + descriptionId + " for configuration " + configuration.getLocation(), e); 147 } 148 } 149 } 150 } 151 152 private void _readExtractionAuthor(Configuration configuration, Extraction extraction) throws ConfigurationException 153 { 154 Configuration author = configuration.getChild(ExtractionConstants.AUTHOR_TAG, false); 155 if (author != null) 156 { 157 extraction.setAuthor(_userHelper.xml2userIdentity(author)); 158 } 159 } 160 161 private void _readVariablesDefinition(Configuration configuration, Extraction extraction) throws ConfigurationException 162 { 163 for (Configuration child : configuration.getChildren()) 164 { 165 switch (child.getName()) 166 { 167 case ExtractionConstants.OPTIONAL_COLUMNS_TAG: 168 extraction.setDisplayOptionalColumnsNames(getDisplayOptionalColumnNames(child)); 169 break; 170 case ExtractionConstants.CLAUSES_VARIABLES_TAG: 171 extraction.setClausesVariables(getClausesVariables(child)); 172 break; 173 default: 174 // Do nothing, we only check variables definitions 175 } 176 } 177 } 178 179 private void _readExtractionDefinitionFile(Configuration configuration, Extraction extraction) throws Exception 180 { 181 for (Configuration child : configuration.getChildren()) 182 { 183 switch (child.getName()) 184 { 185 case ExtractionConstants.QUERY_COMPONENT_TAG: 186 case ExtractionConstants.THESAURUS_COMPONENT_TAG: 187 case ExtractionConstants.COUNT_COMPONENT_TAG: 188 case ExtractionConstants.MAPPING_QUERY_COMPONENT_TAG: 189 ExtractionComponent component = _processExtractionComponent(child); 190 extraction.addExtractionComponent(component); 191 break; 192 default: 193 // Do nothing 194 } 195 } 196 } 197 198 private List<String> getDisplayOptionalColumnNames(Configuration configuration) throws ConfigurationException 199 { 200 List<String> names = new ArrayList<>(); 201 for (Configuration nameConfiguration : configuration.getChildren("name")) 202 { 203 names.add(nameConfiguration.getValue()); 204 } 205 return names; 206 } 207 208 private List<ClausesVariable> getClausesVariables(Configuration configuration) throws ConfigurationException 209 { 210 List<ClausesVariable> variables = new ArrayList<>(); 211 212 for (Configuration variableConfiguration : configuration.getChildren("variable")) 213 { 214 // Name 215 String name = variableConfiguration.getAttribute("name", null); 216 if (null == name) 217 { 218 throw new ConfigurationException("A clauses variable is not well defined, name is mandatory."); 219 } 220 221 // Type 222 String typeAsString = variableConfiguration.getAttribute("type", null); 223 // For legacy purpose, if no type is defined, type is the SELECT_CONTENTS one 224 ClausesVariableType type = typeAsString != null ? ClausesVariableType.fromStringValue(typeAsString) : ClausesVariableType.SELECT_CONTENTS; 225 226 // Content type identifiers 227 List<String> contentTypeIds = _getClausesVariableContentTypeIds(variableConfiguration, type); 228 229 // Solr request 230 Optional<String> solrRequest = _getClausesVariableSolrRequest(variableConfiguration); 231 232 variables.add(new ClausesVariable(name, type, contentTypeIds, solrRequest)); 233 } 234 235 return variables; 236 } 237 238 private List<String> _getClausesVariableContentTypeIds(Configuration variableConfiguration, ClausesVariableType clausesVariableType) throws ConfigurationException 239 { 240 List<String> contentTypeIds = new ArrayList<>(); 241 242 // Look for legacy content type 243 Optional.ofNullable(variableConfiguration.getAttribute("contentType", null)) 244 .ifPresent(contentTypeId -> contentTypeIds.add(contentTypeId)); 245 246 // If there is no legacy content type, get the content types' list 247 if (contentTypeIds.isEmpty()) 248 { 249 Configuration contentTypesConf = variableConfiguration.getChild("content-types"); 250 for (Configuration contentTypeConf : contentTypesConf.getChildren("content-type")) 251 { 252 String id = contentTypeConf.getAttribute("id"); 253 contentTypeIds.add(id); 254 } 255 } 256 257 // Check that there is max one content type for SELECT_CONTENTS variable 258 if (ClausesVariableType.SELECT_CONTENTS.equals(clausesVariableType) && contentTypeIds.size() > 1) 259 { 260 throw new ConfigurationException("Only one content type id can be provided for variables of type selectContents", variableConfiguration); 261 } 262 263 // Check that all referenced content types are existing 264 for (String contentTypeId : contentTypeIds) 265 { 266 if (!_contentTypeExtensionPoint.hasExtension(contentTypeId)) 267 { 268 throw new ConfigurationException("Only one content type id can be provided for variables of type selectContents", variableConfiguration); 269 } 270 } 271 272 return contentTypeIds; 273 } 274 275 private Optional<String> _getClausesVariableSolrRequest(Configuration variableConfiguration) 276 { 277 String solrRequest = variableConfiguration.getChild("solr-request").getValue(null); 278 279 return Optional.ofNullable(solrRequest) 280 .filter(StringUtils::isNotEmpty); 281 } 282 283 private ExtractionComponent _processExtractionComponent(Configuration componentConfiguration) throws Exception 284 { 285 ExtractionComponent component = null; 286 switch (componentConfiguration.getName()) 287 { 288 case "query": 289 component = new QueryExtractionComponent(); 290 break; 291 case "count": 292 component = new CountExtractionComponent(); 293 break; 294 case "thesaurus": 295 component = new ThesaurusExtractionComponent(); 296 break; 297 case "mapping-query": 298 component = new MappingQueryExtractionComponent(); 299 break; 300 default: 301 // do nothing 302 break; 303 } 304 305 if (component != null) 306 { 307 LifecycleHelper.setupComponent(component, getLogger(), _context, _serviceManager, componentConfiguration); 308 for (Configuration child : componentConfiguration.getChildren()) 309 { 310 ExtractionComponent subComponent = _processExtractionComponent(child); 311 if (null != subComponent) 312 { 313 component.addSubComponent(subComponent); 314 } 315 } 316 } 317 return component; 318 } 319}