001/* 002 * Copyright 2010 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.cms.content.consistency; 017 018import java.time.ZonedDateTime; 019import java.util.ArrayList; 020import java.util.HashMap; 021import java.util.Iterator; 022import java.util.List; 023import java.util.Map; 024import java.util.Optional; 025import java.util.concurrent.Callable; 026import java.util.concurrent.CancellationException; 027import java.util.concurrent.ExecutionException; 028import java.util.concurrent.Executor; 029import java.util.concurrent.ExecutorService; 030import java.util.concurrent.Executors; 031import java.util.concurrent.Future; 032import java.util.concurrent.ThreadFactory; 033import java.util.concurrent.atomic.AtomicLong; 034 035import javax.jcr.RepositoryException; 036 037import org.apache.avalon.framework.activity.Initializable; 038import org.apache.avalon.framework.component.Component; 039import org.apache.avalon.framework.context.ContextException; 040import org.apache.avalon.framework.context.Contextualizable; 041import org.apache.avalon.framework.logger.Logger; 042import org.apache.avalon.framework.service.ServiceException; 043import org.apache.avalon.framework.service.ServiceManager; 044import org.apache.avalon.framework.service.Serviceable; 045import org.apache.cocoon.Constants; 046import org.apache.cocoon.environment.Context; 047import org.apache.cocoon.util.log.SLF4JLoggerAdapter; 048 049import org.ametys.cms.content.references.OutgoingReferences; 050import org.ametys.cms.repository.Content; 051import org.ametys.cms.repository.ContentQueryHelper; 052import org.ametys.cms.repository.DefaultContent; 053import org.ametys.cms.repository.WorkflowAwareContent; 054import org.ametys.cms.transformation.ConsistencyChecker; 055import org.ametys.cms.transformation.ConsistencyChecker.CHECK; 056import org.ametys.core.engine.BackgroundEngineHelper; 057import org.ametys.core.util.DateUtils; 058import org.ametys.plugins.repository.AmetysObjectIterable; 059import org.ametys.plugins.repository.AmetysObjectResolver; 060import org.ametys.plugins.repository.AmetysRepositoryException; 061import org.ametys.plugins.repository.ModifiableAmetysObject; 062import org.ametys.plugins.repository.ModifiableTraversableAmetysObject; 063import org.ametys.plugins.repository.RepositoryConstants; 064import org.ametys.plugins.repository.collection.AmetysObjectCollectionFactory; 065import org.ametys.plugins.repository.jcr.NameHelper; 066import org.ametys.plugins.repository.jcr.NameHelper.NameComputationMode; 067import org.ametys.plugins.repository.query.QueryHelper; 068import org.ametys.plugins.repository.query.expression.AndExpression; 069import org.ametys.plugins.repository.query.expression.Expression; 070import org.ametys.plugins.repository.query.expression.Expression.Operator; 071import org.ametys.plugins.repository.query.expression.StringExpression; 072import org.ametys.plugins.repositoryapp.RepositoryProvider; 073import org.ametys.runtime.plugin.component.AbstractLogEnabled; 074 075/** 076 * Manage all operation related to checking the consistency of a content 077 */ 078public class ContentConsistencyManager extends AbstractLogEnabled implements Initializable, Contextualizable, Serviceable, Component 079{ 080 081 /** The avalon role */ 082 public static final String ROLE = ContentConsistencyManager.class.getName(); 083 084 private static final String __CONSISTENCY_RESULTS_ROOT_NODE_NAME = "consistencyResults"; 085 086 private static ExecutorService __PARALLEL_THREAD_EXECUTOR; 087 private static final int __THREAD_POOL_SIZE_MULTIPLIER = 4; 088 089 /** The ametys object resolver. */ 090 protected AmetysObjectResolver _resolver; 091 092 /** The consistency checker */ 093 protected ConsistencyChecker _consistencyChecker; 094 095 private Context _cocoonContext; 096 private RepositoryProvider _repositoryProvider; 097 private ServiceManager _manager; 098 099 public void contextualize(org.apache.avalon.framework.context.Context context) throws ContextException 100 { 101 _cocoonContext = (Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT); 102 } 103 104 public void service(ServiceManager manager) throws ServiceException 105 { 106 _manager = manager; 107 _consistencyChecker = (ConsistencyChecker) manager.lookup(ConsistencyChecker.ROLE); 108 _resolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 109 _repositoryProvider = (RepositoryProvider) manager.lookup(RepositoryProvider.ROLE); 110 } 111 112 public void initialize() throws Exception 113 { 114 AsyncConsistencyCheckerThreadFactory threadFactory = new AsyncConsistencyCheckerThreadFactory(); 115 // The thread are doing a lot of heavy IO operations, it's worth going over the number of available processors 116 __PARALLEL_THREAD_EXECUTOR = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * __THREAD_POOL_SIZE_MULTIPLIER, threadFactory); 117 } 118 119 /** 120 * Thread factory for async checker. 121 * Set the thread name format and marks the thread as daemon. 122 */ 123 static class AsyncConsistencyCheckerThreadFactory implements ThreadFactory 124 { 125 private static ThreadFactory _defaultThreadFactory; 126 private static String _nameFormat; 127 private static AtomicLong _count; 128 129 public AsyncConsistencyCheckerThreadFactory() 130 { 131 _defaultThreadFactory = Executors.defaultThreadFactory(); 132 _nameFormat = "ametys-async-consistency-checker-%d"; 133 _count = new AtomicLong(0); 134 } 135 136 public Thread newThread(Runnable r) 137 { 138 Thread thread = _defaultThreadFactory.newThread(r); 139 thread.setName(String.format(_nameFormat, _count.getAndIncrement())); 140 // make the threads low priority daemon to avoid slowing user thread 141 thread.setDaemon(true); 142 thread.setPriority(3); 143 144 return thread; 145 } 146 } 147 148 /** 149 * Getter to provide synthetic access to the manager 150 */ 151 private ServiceManager getManager() 152 { 153 return _manager; 154 } 155 156 /** 157 * Runnable to be used for asynchronous calls 158 */ 159 class AsyncConsistencyChecker implements Callable<String> 160 { 161 /** event to observe */ 162 protected final Logger _logger; 163 private final Content _content; 164 165 public AsyncConsistencyChecker(Content content, org.slf4j.Logger logger) 166 { 167 this._logger = new SLF4JLoggerAdapter(logger); 168 this._content = content; 169 } 170 171 @Override 172 public String call() 173 { 174 Map<String, Object> environmentInformation = null; 175 try 176 { 177 // Create the environment. 178 environmentInformation = BackgroundEngineHelper.createAndEnterEngineEnvironment(getManager(), _cocoonContext, _logger); 179 180 return _checkConsistency(_content); 181 } 182 catch (Exception e) 183 { 184 throw new RuntimeException("Content consistency check for content " + _content.getId() + " failed", e); 185 } 186 finally 187 { 188 BackgroundEngineHelper.leaveEngineEnvironment(environmentInformation); 189 } 190 } 191 } 192 193 /** 194 * Check all contents to see if there references are consistent. 195 * All result will also be stored in the consistency result database table for later access. 196 * @return record describing the results of the checks 197 */ 198 public ConsistenciesReport checkAllContents() 199 { 200 // Get the start date to remove outdated results later 201 ZonedDateTime startDate = ZonedDateTime.now(); 202 try (AmetysObjectIterable<Content> contents = _getContents(null)) 203 { 204 ConsistenciesReport checkReport = _checkContents(contents); 205 // the checkContents only remove possible outdated results for the set of content provided as arguments. 206 // We need to remove all the other result : deleted content, deleted link, etc… 207 removeOutdatedResult(startDate, null); 208 return checkReport; 209 } 210 } 211 212 /** 213 * Remove results older than a given date 214 * @param date the threshold 215 * @param filterExpression an expression to filter the content to consider, null to consider all contents 216 */ 217 protected void removeOutdatedResult(ZonedDateTime date, Expression filterExpression) 218 { 219 String xPathQuery = QueryHelper.getXPathQuery(null, ContentConsistencyResult.CONTENT_CONSISTENCY_RESULT_NODETYPE, filterExpression); 220 xPathQuery += "[@ametys:" + ContentConsistencyResult.DATE + " < xs:dateTime('" + DateUtils.zonedDateTimeToString(date) + "')]"; 221 try (AmetysObjectIterable<ContentConsistencyResult> outdatedResults = _resolver.query(xPathQuery)) 222 { 223 for (ContentConsistencyResult outdatedResult : outdatedResults) 224 { 225 try 226 { 227 outdatedResult.remove(); 228 outdatedResult.saveChanges(); 229 } 230 catch (AmetysRepositoryException e) 231 { 232 getLogger().warn("Failed to remove outdated result '{}' due to repository error", outdatedResult.getId(), e); 233 } 234 } 235 } 236 } 237 238 /** 239 * Record holding the results of a consistency checks on multiple contents 240 * @param results a list of {@link ContentConsistencyResult} for every contents that were checked 241 * @param unchecked a list of content id for every content check that were interrupted, cancelled or failed during execution 242 */ 243 public record ConsistenciesReport(List<String> results, List<String> unchecked) { /* Record */ } 244 245 /** 246 * Check all the content provided by the iterable for broken links. 247 * This method will actually provide {@link Callable} to an {@link Executor} that parallelize the checks. 248 * @param contents an iterable of contents to check 249 * @return record describing the results of the checks 250 */ 251 protected ConsistenciesReport _checkContents(AmetysObjectIterable<Content> contents) 252 { 253 try 254 { 255 List<AsyncConsistencyChecker> checkers = contents.stream() 256 .map(content -> new AsyncConsistencyChecker(content, getLogger())) 257 .toList(); 258 259 // execute all checker and wait for their completion (either success or failure) 260 List<Future<String>> futures = __PARALLEL_THREAD_EXECUTOR.invokeAll(checkers); 261 262 // Refresh the session to retrieve the repository modification from the threads 263 _repositoryProvider.getSession("default").refresh(true); 264 265 Iterator<Future<String>> fIterarot = futures.iterator(); 266 Iterator<Content> cIterator = contents.iterator(); 267 268 List<String> done = new ArrayList<>(); 269 List<String> failed = new ArrayList<>(); 270 271 // both iterator should have the same size as the future iterator was mapped from the content iterator 272 while (fIterarot.hasNext() && cIterator.hasNext()) 273 { 274 Future<String> future = fIterarot.next(); 275 Content content = cIterator.next(); 276 277 try 278 { 279 String result = future.get(); 280 if (result != null) 281 { 282 done.add(result); 283 } 284 } 285 catch (CancellationException | InterruptedException | ExecutionException e) 286 { 287 String contentId = content.getId(); 288 getLogger().error("Failed to retrieve result from content consistency checker thread for content {}", contentId, e); 289 failed.add(contentId); 290 } 291 } 292 return new ConsistenciesReport(done, failed); 293 } 294 catch (InterruptedException e) 295 { 296 getLogger().error("Content consistency check was interrupted", e); 297 return null; 298 } 299 catch (RepositoryException e1) 300 { 301 getLogger().error("Failed to refresh the session"); 302 return null; 303 } 304 } 305 306 private String _checkConsistency(Content content) 307 { 308 int successCount = 0; 309 int unknownCount = 0; 310 int unauthorizedCount = 0; 311 int notFoundCount = 0; 312 int serverErrorCount = 0; 313 314 Map<String, OutgoingReferences> referencesByPath = content.getOutgoingReferences(); 315 316 for (String dataPath : referencesByPath.keySet()) 317 { 318 OutgoingReferences references = referencesByPath.get(dataPath); 319 for (String referenceType : references.keySet()) 320 { 321 for (String referenceValue : references.get(referenceType)) 322 { 323 CHECK check; 324 try 325 { 326 check = _consistencyChecker.checkConsistency(referenceType, referenceValue, content.getId(), dataPath, false).status(); 327 } 328 catch (Exception e) 329 { 330 check = CHECK.SERVER_ERROR; 331 getLogger().debug("An exception occurred while checking reference value {} at dataPath {} for content {}", referenceType + "#" + referenceValue, dataPath, content.getId(), e); 332 } 333 334 switch (check) 335 { 336 case SUCCESS: 337 successCount++; 338 break; 339 case UNKNOWN: 340 unknownCount++; 341 break; 342 case UNAUTHORIZED: 343 unauthorizedCount++; 344 break; 345 case NOT_FOUND: 346 notFoundCount++; 347 break; 348 case SERVER_ERROR: 349 default: 350 serverErrorCount++; 351 break; 352 } 353 } 354 } 355 } 356 357 // Store the result 358 ContentConsistencyResult result = storeResult((WorkflowAwareContent) content, successCount, unknownCount, unauthorizedCount, notFoundCount, serverErrorCount); 359 360 return result != null ? result.getId() : null; 361 } 362 363 /** 364 * Store the result of the consistency check done on the content 365 * @param content the content checked 366 * @param successCount the number of success 367 * @param unknownCount the number of unknown 368 * @param unauthorizedCount the number of unauthorized 369 * @param notFoundCount the number of not found 370 * @param serverErrorCount the number of server error 371 * @return the result 372 */ 373 protected ContentConsistencyResult storeResult(WorkflowAwareContent content, int successCount, int unknownCount, int unauthorizedCount, int notFoundCount, int serverErrorCount) 374 { 375 // Retrieve a pre existing result for the content or create an new one 376 Optional<ContentConsistencyResult> previousResult = _getExistingResultForContent(content.getId()); 377 if (unknownCount > 0 || unauthorizedCount > 0 || serverErrorCount > 0 || notFoundCount > 0) 378 { 379 ContentConsistencyResult result = previousResult.orElseGet(() -> 380 { 381 ModifiableTraversableAmetysObject root = _getOrCreateRootCollection(); 382 String nodeName = NameHelper.getUniqueAmetysObjectName(root, "consistency-result", NameComputationMode.GENERATED_KEY, false); 383 return root.createChild(nodeName, ContentConsistencyResult.CONTENT_CONSISTENCY_RESULT_NODETYPE); 384 } 385 ); 386 387 Map<String, Object> values = new HashMap<>(); 388 values.put(ContentConsistencyResult.CONTENT_ID, content.getId()); 389 values.put(ContentConsistencyResult.TITLE, content.getTitle()); 390 values.put(ContentConsistencyResult.CONTENT_TYPES, content.getTypes()); 391 values.put(ContentConsistencyResult.CREATION_DATE, content.getCreationDate()); 392 values.put(ContentConsistencyResult.CREATOR, content.getCreator()); 393 Optional.ofNullable(content.getLastMajorValidationDate()).ifPresent(date -> values.put(ContentConsistencyResult.LAST_MAJOR_VALIDATION_DATE, date)); 394 content.getLastMajorValidator().ifPresent(user -> values.put(ContentConsistencyResult.LAST_MAJOR_VALIDATOR, user)); 395 Optional.ofNullable(content.getLastValidationDate()).ifPresent(date -> values.put(ContentConsistencyResult.LAST_VALIDATION_DATE, date)); 396 content.getLastValidator().ifPresent(user -> values.put(ContentConsistencyResult.LAST_VALIDATOR, user)); 397 values.put(ContentConsistencyResult.LAST_CONTRIBUTOR, content.getLastContributor()); 398 values.put(ContentConsistencyResult.LAST_MODIFICATION_DATE, content.getLastModified()); 399 values.put(ContentConsistencyResult.WORKFLOW_STEP, content.getCurrentStepId()); 400 values.put(ContentConsistencyResult.DATE, ZonedDateTime.now()); 401 values.put(ContentConsistencyResult.NOT_FOUND, notFoundCount); 402 values.put(ContentConsistencyResult.SERVER_ERROR, serverErrorCount); 403 values.put(ContentConsistencyResult.SUCCESS, successCount); 404 values.put(ContentConsistencyResult.UNAUTHORIZED, unauthorizedCount); 405 values.put(ContentConsistencyResult.UNKNOWN, unknownCount); 406 407 result.synchronizeValues(values); 408 409 result.saveChanges(); 410 return result; 411 } 412 // Remove old result if there is no error any more 413 else if (previousResult.isPresent()) 414 { 415 ContentConsistencyResult result = previousResult.get(); 416 ModifiableAmetysObject parent = result.getParent(); 417 result.remove(); 418 parent.saveChanges(); 419 } 420 return null; 421 } 422 423 private Optional<ContentConsistencyResult> _getExistingResultForContent(String id) 424 { 425 String xPathQuery = QueryHelper.getXPathQuery(null, ContentConsistencyResult.CONTENT_CONSISTENCY_RESULT_NODETYPE, new StringExpression(ContentConsistencyResult.CONTENT_ID, Operator.EQ, id)); 426 try (AmetysObjectIterable<ContentConsistencyResult> query = _resolver.query(xPathQuery)) 427 { 428 return query.stream().findFirst(); 429 } 430 } 431 432 // Synchronized to avoid creation of multiple root 433 private synchronized ModifiableTraversableAmetysObject _getOrCreateRootCollection() 434 { 435 ModifiableTraversableAmetysObject pluginsRoot = _resolver.resolveByPath("/ametys:plugins"); 436 437 ModifiableTraversableAmetysObject cmsNode = null; 438 if (pluginsRoot.hasChild("cms")) 439 { 440 cmsNode = (ModifiableTraversableAmetysObject) pluginsRoot.getChild("cms"); 441 } 442 else 443 { 444 cmsNode = (ModifiableTraversableAmetysObject) pluginsRoot.createChild("cms", "ametys:unstructured"); 445 } 446 447 ModifiableTraversableAmetysObject resultsCollection = null; 448 if (cmsNode.hasChild(__CONSISTENCY_RESULTS_ROOT_NODE_NAME)) 449 { 450 resultsCollection = (ModifiableTraversableAmetysObject) cmsNode.getChild(__CONSISTENCY_RESULTS_ROOT_NODE_NAME); 451 } 452 else 453 { 454 resultsCollection = (ModifiableTraversableAmetysObject) cmsNode.createChild(__CONSISTENCY_RESULTS_ROOT_NODE_NAME, AmetysObjectCollectionFactory.COLLECTION_NODETYPE); 455 // Save the new node instantly to allow other thread to retrieve the newly created node. 456 cmsNode.saveChanges(); 457 } 458 459 return resultsCollection; 460 } 461 462 /** 463 * Get the contents with inconsistency information. 464 * @param filterExpression an expression to filter content to check, or null to check all contents 465 * @return an iterator on contents. 466 */ 467 protected AmetysObjectIterable<Content> _getContents(Expression filterExpression) 468 { 469 String query = ContentQueryHelper.getContentXPathQuery(filterExpression != null ? new AndExpression(new ConsistencyExpression(), filterExpression) : new ConsistencyExpression()); 470 return _resolver.query(query); 471 } 472 473 /** 474 * Expression which tests if contents have consistency informations. 475 */ 476 public static class ConsistencyExpression implements Expression 477 { 478 public String build() 479 { 480 return new StringBuilder() 481 .append(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL).append(':').append(DefaultContent.METADATA_ROOT_OUTGOING_REFERENCES) 482 .append('/').append(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL).append(':').append(DefaultContent.METADATA_OUTGOING_REFERENCES) 483 .append("/*") 484 .append("/@").append(RepositoryConstants.NAMESPACE_PREFIX).append(':').append(DefaultContent.METADATA_OUTGOING_REFERENCE_PROPERTY) 485 .toString(); 486 } 487 } 488}