001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019import java.time.ZoneOffset; 020import java.time.ZonedDateTime; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.HashSet; 025import java.util.List; 026import java.util.Map; 027import java.util.Optional; 028import java.util.Set; 029import java.util.concurrent.Future; 030import java.util.function.Function; 031import java.util.stream.Collectors; 032 033import org.apache.avalon.framework.component.Component; 034import org.apache.avalon.framework.context.Context; 035import org.apache.avalon.framework.context.ContextException; 036import org.apache.avalon.framework.context.Contextualizable; 037import org.apache.avalon.framework.service.ServiceException; 038import org.apache.avalon.framework.service.ServiceManager; 039import org.apache.avalon.framework.service.Serviceable; 040import org.apache.cocoon.Constants; 041import org.apache.cocoon.components.ContextHelper; 042import org.apache.cocoon.environment.Request; 043import org.apache.commons.lang3.ArrayUtils; 044import org.apache.commons.lang3.LocaleUtils; 045import org.apache.solr.client.solrj.SolrClient; 046import org.apache.solr.client.solrj.SolrServerException; 047import org.apache.solr.client.solrj.response.UpdateResponse; 048import org.apache.solr.common.SolrInputDocument; 049import org.apache.solr.common.SolrInputField; 050 051import org.ametys.cms.content.indexing.solr.SolrFieldNames; 052import org.ametys.cms.content.indexing.solr.SolrIndexer; 053import org.ametys.cms.content.indexing.solr.SolrResourceIndexer; 054import org.ametys.cms.contenttype.ContentTypesHelper; 055import org.ametys.cms.data.ContentValue; 056import org.ametys.cms.data.type.indexing.IndexableElementType; 057import org.ametys.cms.data.type.indexing.IndexableElementTypeHelper; 058import org.ametys.cms.indexing.IndexingException; 059import org.ametys.cms.indexing.solr.AbstractIndexerCallable; 060import org.ametys.cms.indexing.solr.AdditionalPropertyIndexer; 061import org.ametys.cms.indexing.solr.AdditionalPropertyIndexerExtensionPoint; 062import org.ametys.cms.indexing.solr.IndexationResult; 063import org.ametys.cms.indexing.solr.ThreadIndexerHelper; 064import org.ametys.cms.model.CMSDataContext; 065import org.ametys.cms.repository.Content; 066import org.ametys.cms.search.query.AndQuery; 067import org.ametys.cms.search.query.DocumentTypeQuery; 068import org.ametys.cms.search.query.JoinQuery; 069import org.ametys.cms.search.query.OrQuery; 070import org.ametys.cms.search.query.Query; 071import org.ametys.cms.search.query.QuerySyntaxException; 072import org.ametys.cms.search.solr.SolrClientProvider; 073import org.ametys.cms.search.systemprop.FirstValidationSystemProperty; 074import org.ametys.cms.search.systemprop.LastMajorValidationSystemProperty; 075import org.ametys.cms.search.systemprop.LastModifiedSystemProperty; 076import org.ametys.cms.search.systemprop.LastValidationSystemProperty; 077import org.ametys.cms.search.systemprop.TagsSystemProperty; 078import org.ametys.cms.tag.Tag; 079import org.ametys.cms.tag.TagHelper; 080import org.ametys.cms.tag.TagProviderExtensionPoint; 081import org.ametys.core.util.DateUtils; 082import org.ametys.plugins.explorer.resources.Resource; 083import org.ametys.plugins.explorer.resources.ResourceCollection; 084import org.ametys.plugins.repository.AmetysObject; 085import org.ametys.plugins.repository.AmetysObjectIterable; 086import org.ametys.plugins.repository.AmetysObjectResolver; 087import org.ametys.plugins.repository.AmetysRepositoryException; 088import org.ametys.plugins.repository.RepositoryConstants; 089import org.ametys.plugins.repository.data.holder.ModelAwareDataHolder; 090import org.ametys.plugins.repository.data.holder.group.ModelAwareComposite; 091import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeater; 092import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeaterEntry; 093import org.ametys.plugins.repository.model.CompositeDefinition; 094import org.ametys.plugins.repository.model.RepeaterDefinition; 095import org.ametys.plugins.repository.model.RepositoryDataContext; 096import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector; 097import org.ametys.runtime.model.ElementDefinition; 098import org.ametys.runtime.model.ModelItem; 099import org.ametys.runtime.model.type.DataContext; 100import org.ametys.runtime.model.type.ElementType; 101import org.ametys.runtime.model.type.ModelItemTypeConstants; 102import org.ametys.runtime.plugin.component.AbstractLogEnabled; 103import org.ametys.web.WebConstants; 104import org.ametys.web.indexing.solr.page.attachment.PageVisibleAttachmentIndexerExtensionPoint; 105import org.ametys.web.repository.page.Page; 106import org.ametys.web.repository.page.Page.PageType; 107import org.ametys.web.repository.page.Zone; 108import org.ametys.web.repository.page.ZoneItem; 109import org.ametys.web.repository.page.ZoneItem.ZoneType; 110import org.ametys.web.repository.site.Site; 111import org.ametys.web.repository.sitemap.Sitemap; 112import org.ametys.web.search.query.PageAttachmentQuery; 113import org.ametys.web.search.query.PageQuery; 114import org.ametys.web.service.Service; 115import org.ametys.web.service.ServiceExtensionPoint; 116 117/** 118 * Component responsible for indexing a page with all its contents. 119 */ 120public class SolrPageIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrWebFieldNames, Contextualizable 121{ 122 /** The avalon role. */ 123 public static final String ROLE = SolrPageIndexer.class.getName(); 124 125 /** The service manager. */ 126 protected ServiceManager _manager; 127 /** The Solr client provider */ 128 protected SolrClientProvider _solrClientProvider; 129 /** The Solr indexer */ 130 protected SolrIndexer _solrIndexer; 131 /** Solr Ametys resources indexer */ 132 protected SolrResourceIndexer _solrResourceIndexer; 133 /** The extension point for PageVisibleAttachmentIndexers */ 134 protected PageVisibleAttachmentIndexerExtensionPoint _pageVisibleAttachmentIndexerEP; 135 /** The additional property indexer extension point. */ 136 protected AdditionalPropertyIndexerExtensionPoint _additionalPropertiesIndexerEP; 137 /** The tag provider extension point. */ 138 protected TagProviderExtensionPoint _tagProviderEP; 139 /** The content types helper */ 140 protected ContentTypesHelper _cTypesHelper; 141 /** The thread indexer helper */ 142 protected ThreadIndexerHelper _threadIndexerHelper; 143 144 /** The service extension point. */ 145 protected ServiceExtensionPoint _serviceExtensionPoint; 146 /** The Ametys object resolver*/ 147 protected AmetysObjectResolver _ametysObjectResolver; 148 /** The avalon context */ 149 protected Context _context; 150 /** Cocoon Context */ 151 protected org.apache.cocoon.environment.Context _cocoonContext; 152 153 @Override 154 public void service(ServiceManager manager) throws ServiceException 155 { 156 _manager = manager; 157 _ametysObjectResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 158 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 159 _solrResourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE); 160 _pageVisibleAttachmentIndexerEP = (PageVisibleAttachmentIndexerExtensionPoint) manager.lookup(PageVisibleAttachmentIndexerExtensionPoint.ROLE); 161 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 162 _serviceExtensionPoint = (ServiceExtensionPoint) manager.lookup(ServiceExtensionPoint.ROLE); 163 _additionalPropertiesIndexerEP = (AdditionalPropertyIndexerExtensionPoint) manager.lookup(AdditionalPropertyIndexerExtensionPoint.ROLE); 164 _tagProviderEP = (TagProviderExtensionPoint) manager.lookup(TagProviderExtensionPoint.ROLE); 165 _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 166 _threadIndexerHelper = (ThreadIndexerHelper) manager.lookup(ThreadIndexerHelper.ROLE); 167 } 168 169 public void contextualize(Context context) throws ContextException 170 { 171 _context = context; 172 _cocoonContext = (org.apache.cocoon.environment.Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT); 173 } 174 175 /** 176 * Index a page and eventually its children, recursively, in all workspaces and commit<br> 177 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 178 * @param pageId the page to be indexed. 179 * @param indexRecursively to also process children pages. 180 * @param indexAttachments to index page attachments 181 * @throws Exception if an error occurs during indexation. 182 */ 183 public void indexPage(String pageId, boolean indexRecursively, boolean indexAttachments) throws Exception 184 { 185 indexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, indexRecursively, indexAttachments); 186 indexPage(pageId, WebConstants.LIVE_WORKSPACE, indexRecursively, indexAttachments); 187 } 188 189 /** 190 * Index a page and eventually its children, recursively.<br> 191 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 192 * @param pageId the page to be indexed. 193 * @param workspaceName the workspace where to index 194 * @param indexRecursively to also process children pages. 195 * @param indexAttachments to index page attachments 196 * @throws IndexingException if an error occurs during indexation. 197 */ 198 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments) throws IndexingException 199 { 200 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true); 201 indexPage(pageId, workspaceName, indexRecursively, indexAttachments, solrClient); 202 } 203 204 /** 205 * Index a page and eventually its children, recursively.<br> 206 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 207 * @param pageId the page to be indexed. 208 * @param workspaceName the workspace where to index 209 * @param indexRecursively to also process children pages. 210 * @param indexAttachments to index page attachments 211 * @param solrClient The solr client to use 212 * @throws IndexingException if an error occurs during indexation. 213 */ 214 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 215 { 216 Request request = ContextHelper.getRequest(_context); 217 218 // Retrieve the current workspace. 219 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 220 // Retrieve the current site name. 221 String currentSiteName = (String) request.getAttribute("siteName"); 222 223 try 224 { 225 // Force the workspace. 226 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 227 228 getLogger().debug("Indexing page: {}", pageId); 229 230 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 231 { 232 Page page = _ametysObjectResolver.resolveById(pageId); 233 _indexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient); 234 } 235 } 236 catch (AmetysRepositoryException e) 237 { 238 String error = String.format("Failed to index page %s in workspace %s", pageId, workspaceName); 239 getLogger().error(error, e); 240 throw new IndexingException(error, e); 241 } 242 finally 243 { 244 // Restore the site name. 245 request.setAttribute("siteName", currentSiteName); 246 // Restore context 247 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 248 } 249 } 250 251 private void _indexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 252 { 253 // Add callable for each page to index 254 List<Future<Void>> tasks = _asyncIndexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient); 255 256 // Now that everything is submitted, we can iterate and wait for result 257 IndexationResult.fromTasks(tasks, getLogger()); 258 } 259 260 private List<Future<Void>> _asyncIndexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 261 { 262 List<Future<Void>> tasks = new ArrayList<>(); 263 264 if (page.isIndexable()) 265 { 266 _threadIndexerHelper.submitCallable(new PageIndexerCallable(page, workspaceName, indexAttachments, solrClient)); 267 } 268 else 269 { 270 getLogger().debug("Not indexing page: {} in workspace '{}'", page, workspaceName); 271 } 272 273 if (indexRecursively) 274 { 275 AmetysObjectIterable<? extends Page> children = page.getChildrenPages(); 276 for (Page child : children) 277 { 278 // FIXME index child pages if (and only if) not indexed... see original source. 279// indexPage(child, false, indexRecursively); 280// indexPage(child, false); 281 tasks.addAll(_asyncIndexPage(child, workspaceName, indexRecursively, indexAttachments, solrClient)); 282 } 283 } 284 285 return tasks; 286 } 287 288 /** 289 * Populate the solr input document by adding fields to index. 290 * @param page the page to index. 291 * @param document the solr input document 292 * @throws Exception if something goes wrong when processing the indexation of the page 293 */ 294 protected void _populatePageDocument(Page page, SolrInputDocument document) throws Exception 295 { 296 Sitemap sitemap = page.getSitemap(); 297 String sitemapName = sitemap.getName(); 298 Site site = page.getSite(); 299 String siteName = site.getName(); 300 String pageId = page.getId(); 301 String pageTitle = page.getTitle(); 302 String pageLongTitle = page.getLongTitle(); 303 String language = sitemapName; 304 305 // Page id and type 306 document.addField(SolrFieldNames.ID, pageId); 307 document.addField(SolrFieldNames.DOCUMENT_TYPE, SolrWebFieldNames.TYPE_PAGE); 308 309 // Fulltext 310 CMSDataContext context = CMSDataContext.newInstance() 311 .withLocale(LocaleUtils.toLocale(language)); 312 IndexableElementTypeHelper.indexFulltextValue(document, pageTitle, context); 313 if (!pageTitle.equals(pageLongTitle)) 314 { 315 IndexableElementTypeHelper.indexFulltextValue(document, pageLongTitle, context); 316 } 317 318 // Page title 319 _indexStringFields(document, pageId, PAGE_TITLE, pageTitle, language); 320 // Page long title 321 _indexStringFields(document, pageId, PAGE_LONG_TITLE, pageLongTitle, language); 322 // Title for sorting 323 document.addField(TITLE_SORT, pageTitle); 324 325 document.addField(TEMPLATE, page.getTemplate()); 326 document.addField(PAGE_TYPE, page.getType().name()); 327 document.addField(PAGE_DEPTH, page.getDepth()); 328 329 // Contents (page title shoud be indexed before because the main content can override it). 330 _populatePageContentsDocument(page, document); 331 332 // Parent of the page 333 AmetysObject parent = page.getParent(); 334 if (parent != null) 335 { 336 document.addField(PAGE_PARENT_ID, parent.getId()); 337 } 338 339 // Ancestors of the page 340 List<String> ancestorIds = new ArrayList<>(); 341 while (parent instanceof Page) 342 { 343 ancestorIds.add(parent.getId()); 344 parent = parent.getParent(); 345 } 346 document.addField(PAGE_ANCESTOR_IDS, ancestorIds); 347 348 document.addField(SITE_NAME, siteName); 349 document.addField(SITEMAP_NAME, sitemapName); 350 document.addField(SITE_TYPE, site.getType()); 351 352 // Page tags (strict and tags including ancestor pages). 353 Set<String> tags = page.getTags() 354 .stream() 355 .filter(tagName -> _tagProviderEP.hasTag(tagName, Map.of("siteName", page.getSiteName()))) 356 .collect(Collectors.toSet()); 357 document.addField(TagsSystemProperty.TAGS_SOLR_FIELD_NAME, tags); 358 document.addField(TagsSystemProperty.ALL_TAGS_SOLR_FIELD_NAME, _getTagsWithAncestors(page)); 359 360 _populateDatesOfPage(page, document); 361 362 // Attachments 363 _solrResourceIndexer.indexResourceCollection(page.getRootAttachments(), document, language); 364 Optional.ofNullable(page.getRootAttachments()) 365 .map(AmetysObject::getId) 366 .ifPresent(id -> document.addField(PAGE_OUTGOING_REFEERENCES_RESOURCE_IDS, id)); 367 _indexVisibleAttachments(page, document); 368 } 369 370 private void _indexVisibleAttachments(Page page, SolrInputDocument document) 371 { 372 Collection<String> values = _pageVisibleAttachmentIndexerEP.getExtensionsIds() 373 .stream() 374 .map(_pageVisibleAttachmentIndexerEP::getExtension) 375 .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(page)) 376 .flatMap(Collection::stream) 377 .collect(Collectors.toList()); 378 document.addField(PAGE_VISIBLE_ATTACHMENT_RESOURCE_IDS, values); 379 } 380 381 /** 382 * Populate the solr input document with dates from the page 383 * @param page The page 384 * @param document The Solr document 385 */ 386 protected void _populateDatesOfPage(Page page, SolrInputDocument document) 387 { 388 // Page last modification date 389 ZonedDateTime lastModified = _getLastModificationDate(page); 390 if (lastModified != null) 391 { 392 String lastModifiedStr = DateUtils.zonedDateTimeToString(lastModified, ZoneOffset.UTC); 393 // For 'new' search service 394 document.addField(LastModifiedSystemProperty.SOLR_FIELD_NAME, lastModifiedStr); 395 // For 'old' search service 396 document.addField(LastModifiedSystemProperty.SOLR_FIELD_NAME + "_dt", lastModifiedStr); 397 } 398 399 // Page last validation date 400 ZonedDateTime lastValidation = _getLastValidationDate(page); 401 if (lastValidation != null) 402 { 403 String lastValidationStr = DateUtils.zonedDateTimeToString(lastValidation, ZoneOffset.UTC); 404 // For 'new' search service 405 document.addField(LastValidationSystemProperty.SOLR_FIELD_NAME, lastValidationStr); 406 } 407 408 // Page first validation date 409 ZonedDateTime firstValidation = _getFirstValidationDate(page); 410 if (firstValidation != null) 411 { 412 String firstValidationStr = DateUtils.zonedDateTimeToString(firstValidation, ZoneOffset.UTC); 413 // For 'new' search service 414 document.addField(FirstValidationSystemProperty.SOLR_FIELD_NAME, firstValidationStr); 415 } 416 417 // Page last major validation date 418 ZonedDateTime lastMajorValidation = _getLastMajorValidationDate(page); 419 if (lastMajorValidation != null) 420 { 421 String lastMajorValidationStr = DateUtils.zonedDateTimeToString(lastMajorValidation, ZoneOffset.UTC); 422 // For 'new' search service 423 document.addField(LastMajorValidationSystemProperty.SOLR_FIELD_NAME, lastMajorValidationStr); 424 } 425 426 // date for sorting 427 SolrInputField dateField = document.getField(DATE_FOR_SORTING); 428 if (dateField == null) 429 { 430 Collection<Object> oDateValues = document.getFieldValues(CONTENT_INTERESTING_DATES); 431 if (oDateValues != null && !oDateValues.isEmpty()) 432 { 433 document.setField(DATE_FOR_SORTING, oDateValues.iterator().next()); 434 } 435 } 436 } 437 438 private void _indexStringFields(SolrInputDocument document, String documentId, String fieldName, String fieldValue, String language) 439 { 440 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(fieldValue, getLogger(), documentId, fieldName); 441 442 document.addField(fieldName, possiblyTruncatedValue); 443 document.addField(fieldName + "_txt_" + language, fieldValue); 444 document.addField(fieldName + "_txt_stemmed_" + language, fieldValue); 445 document.addField(fieldName + "_txt_ws_" + language, fieldValue); 446 447 document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase()); 448 document.addField(fieldName + "_s_ws", fieldValue.toLowerCase()); 449 document.addField(fieldName + "_txt", fieldValue); 450 } 451 /** 452 * Get all the page tags with their ancestors. 453 * @param page The page. 454 * @return All the page tags with their ancestors. 455 */ 456 protected Set<String> _getTagsWithAncestors(Page page) 457 { 458 Set<String> allTags = new HashSet<>(page.getTags()); 459 460 Map<String, Object> tagParams = Map.of("siteName", page.getSiteName()); 461 462 for (String tagName : page.getTags()) 463 { 464 allTags.add(tagName); 465 466 // Get the ancestor tags 467 Tag tag = _tagProviderEP.getTag(tagName, tagParams); 468 for (Tag ancestor : TagHelper.getAncestors(tag, false)) 469 { 470 allTags.add(ancestor.getName()); 471 } 472 } 473 474 return allTags; 475 } 476 477 /** 478 * Index the content of the page.<p> 479 * @param page the page to index. 480 * @param document the document to populate. 481 * @throws Exception if an error occurs. 482 */ 483 protected void _populatePageContentsDocument(Page page, SolrInputDocument document) throws Exception 484 { 485 if (page.getType() == PageType.CONTAINER) 486 { 487 for (Zone zone : page.getZones()) 488 { 489 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 490 for (ZoneItem zoneItem : zoneItems) 491 { 492 if (zoneItem.getType() == ZoneType.CONTENT) 493 { 494 try 495 { 496 Content content = zoneItem.getContent(); 497 document.addField(CONTENT_IDS, content.getId()); 498 499 for (String cType : content.getTypes()) 500 { 501 document.addField(PAGE_CONTENT_TYPES, cType); 502 document.addField(PAGE_CONTENT_TYPES + "_s_dv", cType); // facets 503 } 504 505 _indexFacetableFields(content, document); 506 } 507 catch (AmetysRepositoryException e) 508 { 509 getLogger().error("Failed to index content referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 510 } 511 } 512 else if (zoneItem.getType() == ZoneType.SERVICE) 513 { 514 try 515 { 516 String serviceId = zoneItem.getServiceId(); 517 document.addField(SERVICE_IDS, serviceId); 518 519 Service service = _serviceExtensionPoint.getExtension(serviceId); 520 if (service == null) 521 { 522 getLogger().error("The service id '{}' does not exist. It is referenced in the page {}/{}/{} ({} in zoneitem {})", serviceId, page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId()); 523 } 524 else 525 { 526 service.index(zoneItem, document); 527 } 528 } 529 catch (AmetysRepositoryException e) 530 { 531 getLogger().error("Failed to index service referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 532 } 533 534 } 535 } 536 } 537 } 538 } 539 540 /** 541 * Index the facetable fields of a content into the page solr document 542 * @param content The content 543 * @param document The main page solr document. 544 */ 545 protected void _indexFacetableFields(Content content, SolrInputDocument document) 546 { 547 try 548 { 549 String[] allContentTypes = ArrayUtils.addAll(content.getTypes(), content.getMixinTypes()); 550 for (ModelItem modelItem : _cTypesHelper.getModelItems(allContentTypes)) 551 { 552 DataContext context = RepositoryDataContext.newInstance() 553 .withObject(content); 554 555 Optional.ofNullable(content.getLanguage()) 556 .map(LocaleUtils::toLocale) 557 .ifPresent(context::withLocale); 558 559 _findAndIndexFacetableField(document, content, modelItem, context); 560 } 561 } 562 catch (IllegalArgumentException e) 563 { 564 getLogger().error("indexContent > Error getting the model items of content " + content.getId(), e); 565 throw new RuntimeException("indexContent > Error getting the model items of content " + content.getId(), e); 566 } 567 } 568 569 /** 570 * Index the facetable fields of a data holder into the page solr document 571 * @param pageDocument The Solr page document 572 * @param dataHolder the parent data holder 573 * @param modelItem the model item 574 * @param context the context of the data to index 575 */ 576 protected void _findAndIndexFacetableField(SolrInputDocument pageDocument, ModelAwareDataHolder dataHolder, ModelItem modelItem, DataContext context) 577 { 578 String dataName = modelItem.getName(); 579 if (dataHolder.hasValue(dataName)) 580 { 581 if (modelItem instanceof ElementDefinition elementDefinition) 582 { 583 DataContext newContext = context.cloneContext() 584 .addSegmentToDataPath(dataName); 585 586 Collection<String> values = _getValuesToIndex(dataHolder, elementDefinition, newContext); 587 for (String value : values) 588 { 589 pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + dataName + "_s_dv", value); 590 } 591 } 592 else if (modelItem instanceof RepeaterDefinition repeaterDefinition) 593 { 594 ModelAwareRepeater repeater = dataHolder.getRepeater(dataName); 595 for (ModelAwareRepeaterEntry entry : repeater.getEntries()) 596 { 597 DataContext newContext = context.cloneContext() 598 .addSegmentToDataPath(dataName + "[" + entry.getPosition() + "]"); 599 600 for (ModelItem child : repeaterDefinition.getModelItems()) 601 { 602 _findAndIndexFacetableField(pageDocument, entry, child, newContext); 603 } 604 } 605 } 606 else if (modelItem instanceof CompositeDefinition compositeDefinition) 607 { 608 ModelAwareComposite composite = dataHolder.getComposite(dataName); 609 DataContext newContext = context.cloneContext() 610 .addSegmentToDataPath(dataName); 611 612 for (ModelItem child : compositeDefinition.getModelItems()) 613 { 614 _findAndIndexFacetableField(pageDocument, composite, child, newContext); 615 } 616 } 617 } 618 } 619 620 /** 621 * Retrieves the values to index if the field is facetable, or an empty collection 622 * @param dataHolder the data holder 623 * @param elementDefinition the definition of the field 624 * @param context the context of the data to index 625 * @return the values to index if the field is facetable, or an empty collection 626 */ 627 protected Collection<String> _getValuesToIndex(ModelAwareDataHolder dataHolder, ElementDefinition elementDefinition, DataContext context) 628 { 629 String dataName = elementDefinition.getName(); 630 ElementType type = elementDefinition.getType(); 631 if (type instanceof IndexableElementType indexingElementType) 632 { 633 if (ModelItemTypeConstants.STRING_TYPE_ID.equals(type.getId()) && indexingElementType.isFacetable(context)) 634 { 635 Object value = dataHolder.getValue(dataName, true); 636 if (value instanceof String[] stringValues) 637 { 638 return Arrays.asList(stringValues); 639 } 640 else if (value instanceof String stringValue) 641 { 642 return List.of(stringValue); 643 } 644 } 645 else if (org.ametys.cms.data.type.ModelItemTypeConstants.CONTENT_ELEMENT_TYPE_ID.equals(type.getId())) 646 { 647 Object value = dataHolder.getValue(dataName, true); 648 if (value instanceof ContentValue[] contentValues) 649 { 650 return Arrays.stream(contentValues) 651 .map(ContentValue::getContentId) 652 .collect(Collectors.toList()); 653 } 654 else if (value instanceof ContentValue contentValue) 655 { 656 return List.of(contentValue.getContentId()); 657 } 658 } 659 } 660 661 return List.of(); 662 } 663 664 /** 665 * Computes the last modification date of a page. 666 * @param page the page. 667 * @return the last modification date or <code>null</code>. 668 */ 669 protected ZonedDateTime _getLastModificationDate(Page page) 670 { 671 return _getLastDate(page, Content::getLastModified); 672 } 673 /** 674 * Computes the first validation date of a page. 675 * @param page the page. 676 * @return the first validation date or <code>null</code>. 677 */ 678 protected ZonedDateTime _getFirstValidationDate(Page page) 679 { 680 return _getFirstDate(page, Content::getFirstValidationDate); 681 } 682 683 /** 684 * Computes the last validation date of a page. 685 * @param page the page. 686 * @return the last validation date or <code>null</code>. 687 */ 688 protected ZonedDateTime _getLastValidationDate(Page page) 689 { 690 return _getLastDate(page, Content::getLastValidationDate); 691 } 692 693 /** 694 * Computes the last major validation date of a page. 695 * @param page the page. 696 * @return the last major validation date or <code>null</code>. 697 */ 698 protected ZonedDateTime _getLastMajorValidationDate(Page page) 699 { 700 return _getLastDate(page, Content::getLastMajorValidationDate); 701 } 702 703 /** 704 * Computes a "last date" of a page, using the simple and naive following algorithm: 705 * <br>From all the dates from each of its contents, keep the greatest of them. 706 * @param page the page. 707 * @param dateRetriever The function to retrieve a Date from a Content of the Page 708 * @return the "last date" or <code>null</code>. 709 */ 710 protected ZonedDateTime _getLastDate(Page page, Function<Content, ZonedDateTime> dateRetriever) 711 { 712 ZonedDateTime last = null; 713 714 if (page.getType() == PageType.CONTAINER) 715 { 716 for (Zone zone : page.getZones()) 717 { 718 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 719 for (ZoneItem zoneItem : zoneItems) 720 { 721 switch (zoneItem.getType()) 722 { 723 case SERVICE: 724 // A service has no last date 725 break; 726 case CONTENT: 727 try 728 { 729 ZonedDateTime contentLast = dateRetriever.apply(zoneItem.getContent()); 730 731 if (contentLast != null && (last == null || contentLast.isAfter(last))) 732 { 733 // Keep the latest date 734 last = contentLast; 735 } 736 } 737 catch (AmetysRepositoryException e) 738 { 739 getLogger().error("Failed to index last date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 740 } 741 break; 742 default: 743 break; 744 } 745 } 746 } 747 } 748 749 return last; 750 } 751 752 /** 753 * Computes a "first date" of a page, using the simple and naive following algorithm: 754 * <br>From all the dates from each of its contents, keep the lowest of them. 755 * @param page the page. 756 * @param dateRetriever The function to retrieve a Date from a Content of the Page 757 * @return the "first date" or <code>null</code>. 758 */ 759 protected ZonedDateTime _getFirstDate(Page page, Function<Content, ZonedDateTime> dateRetriever) 760 { 761 ZonedDateTime first = null; 762 763 if (page.getType() == PageType.CONTAINER) 764 { 765 for (Zone zone : page.getZones()) 766 { 767 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 768 for (ZoneItem zoneItem : zoneItems) 769 { 770 switch (zoneItem.getType()) 771 { 772 case SERVICE: 773 // A service has no first date 774 break; 775 case CONTENT: 776 try 777 { 778 ZonedDateTime contentFirst = dateRetriever.apply(zoneItem.getContent()); 779 780 if (contentFirst != null && (first == null || contentFirst.isBefore(first))) 781 { 782 // Keep the lowest date 783 first = contentFirst; 784 } 785 } 786 catch (AmetysRepositoryException e) 787 { 788 getLogger().error("Failed to index first date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 789 } 790 break; 791 default: 792 break; 793 } 794 } 795 } 796 } 797 798 return first; 799 } 800 801 /** 802 * Populate the solr input document by adding fields to index. 803 * @param page the page to index. 804 * @param document the solr input document 805 * @throws Exception if something goes wrong when processing the indexation of the page 806 */ 807 protected void _populateAdditionalProperties(Page page, SolrInputDocument document) throws Exception 808 { 809 Collection<AdditionalPropertyIndexer> indexers = _additionalPropertiesIndexerEP.getIndexers("page"); 810 for (AdditionalPropertyIndexer indexer : indexers) 811 { 812 indexer.index(page, document); 813 } 814 } 815 816 /** 817 * Index page attachments as new entries in the index. 818 * @param collection the collection of attachments 819 * @param page the page whose attachments will be indexed 820 * @throws Exception if something goes wrong when indexing the attachments of the page 821 */ 822 public void indexPageAttachments(ResourceCollection collection, Page page) throws Exception 823 { 824 if (page.isIndexable()) 825 { 826 Request request = ContextHelper.getRequest(_context); 827 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 828 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 829 _indexPageAttachments(collection, page, solrClient); 830 } 831 } 832 833 private void _indexPageAttachments(ResourceCollection collection, Page page, SolrClient solrClient) throws Exception 834 { 835 if (collection == null) 836 { 837 return; 838 } 839 840 AmetysObjectIterable<AmetysObject> children = collection.getChildren(); 841 for (AmetysObject object : children) 842 { 843 if (object instanceof ResourceCollection) 844 { 845 _indexPageAttachments((ResourceCollection) object, page, solrClient); 846 } 847 else if (object instanceof Resource) 848 { 849 Resource resource = (Resource) object; 850 _indexPageAttachment(resource, page, solrClient); 851 } 852 } 853 } 854 855 /** 856 * Index a page attachment 857 * @param resource the page attachment as a {@link Resource} 858 * @param page the page whose attachment is going to be indexed 859 * @throws Exception if something goes wrong when processing the indexation of the page attachment 860 */ 861 public void indexPageAttachment(Resource resource, Page page) throws Exception 862 { 863 if (page.isIndexable()) 864 { 865 Request request = ContextHelper.getRequest(_context); 866 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 867 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 868 _indexPageAttachment(resource, page, solrClient); 869 } 870 } 871 872 private void _indexPageAttachment(Resource resource, Page page, SolrClient solrClient) throws Exception 873 { 874 SolrInputDocument document = new SolrInputDocument(); 875 876 // Prepare resource doc 877 _populatePageAttachmentDocument(resource, document, page); 878 879 // Indexation of the document 880 _indexResourceDocument(resource, document, solrClient); 881 } 882 883 private void _populatePageAttachmentDocument(Resource resource, SolrInputDocument document, Page page) throws Exception 884 { 885 String language = page.getSitemapName(); 886 887 _solrResourceIndexer.indexResource(resource, document, TYPE_PAGE_RESOURCE, language); 888 889 Site site = page.getSite(); 890 // site name - Store.YES, Index.NOT_ANALYZED 891 document.addField(SolrWebFieldNames.SITE_NAME, site.getName()); 892 893 // site type - Store.YES, Index.NOT_ANALYZED 894 document.addField(SolrWebFieldNames.SITE_TYPE, site.getType()); 895 896 // Added for Solr. 897 // Page site map name - Store.YES, Index.NOT_ANALYZED 898 document.addField(SITEMAP_NAME, page.getSitemapName()); 899 900 // Need the id of the page for unindexing attachment during the unindexing of the page 901 document.addField(ATTACHMENT_PAGE_ID, page.getId()); 902 } 903 904 /** 905 * Index a populated solr input document of type Page. 906 * @param page the page from which the input document is created 907 * @param document the input document to add to the solr index 908 * @param workspaceName The workspace name 909 * @param solrClient The solr client to use 910 * @throws SolrServerException if there is an error on the Solr server 911 * @throws IOException if there is a communication error with the server 912 */ 913 protected void _indexPageDocument(Page page, SolrInputDocument document, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException 914 { 915 // Retrieve appropriate solr client 916 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 917 918 // Add document 919 UpdateResponse solrResponse = solrClient.add(collectionName, document); 920 int status = solrResponse.getStatus(); 921 922 if (status != 0) 923 { 924 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + page.getId()); 925 } 926 927 getLogger().debug("Successful page indexing. Page identifier : {}", page.getId()); 928 } 929 930 /** 931 * Index a populated solr input document of type Resource. 932 * @param resource the resource from which the input document is created 933 * @param document the input document 934 * @param solrClient The solr client to use 935 * @throws SolrServerException if there is an error on the server 936 * @throws IOException if there is a communication error with the server 937 */ 938 protected void _indexResourceDocument(Resource resource, SolrInputDocument document, SolrClient solrClient) throws SolrServerException, IOException 939 { 940 // Retrieve appropriate solr client 941 Request request = ContextHelper.getRequest(_context); 942 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 943 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 944 945 // Add document 946 UpdateResponse solrResponse = solrClient.add(collectionName, document); 947 int status = solrResponse.getStatus(); 948 949 if (status != 0) 950 { 951 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Resource id : " + resource.getId()); 952 } 953 954 getLogger().debug("Successful resource indexing. Resource identifier : {}", resource.getId()); 955 } 956 957 /////////////////////////////////////////////////////////////////////////// 958 959 /** 960 * Un-index a page by its ID for all workspaces and commit 961 * @param pageId The page ID. 962 * @param unindexRecursively also unindex child pages if requested. 963 * @param unindexAttachments also unindex page attachments 964 * @throws Exception if an error occurs during index update. 965 */ 966 public void unindexPage(String pageId, boolean unindexRecursively, boolean unindexAttachments) throws Exception 967 { 968 unindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, unindexRecursively, unindexAttachments); 969 unindexPage(pageId, WebConstants.LIVE_WORKSPACE, unindexRecursively, unindexAttachments); 970 } 971 972 /** 973 * De-index a page (and optionally its children pages). 974 * @param pageId the page to be de-indexed. 975 * @param workspaceName The workspace where to work in 976 * @param unindexRecursively also unindex child pages if requested. 977 * @param unindexAttachments also unindex page attachments 978 * @throws Exception if an error occurs during index update. 979 */ 980 public void unindexPage(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws Exception 981 { 982 Request request = ContextHelper.getRequest(_context); 983 984 // Retrieve the current workspace. 985 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 986 // Retrieve the current site name. 987 String currentSiteName = (String) request.getAttribute("siteName"); 988 989 try 990 { 991 // Force the workspace. 992 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 993 994 getLogger().debug("Unindexing page: {}", pageId); 995 996 _unindexPageDocument(pageId, workspaceName, unindexRecursively, unindexAttachments); 997 } 998 catch (Exception e) 999 { 1000 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 1001 getLogger().error(error, e); 1002 throw new IndexingException(error, e); 1003 } 1004 finally 1005 { 1006 // Restore the site name. 1007 request.setAttribute("siteName", currentSiteName); 1008 // Restore context 1009 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 1010 } 1011 } 1012 1013 /** 1014 * Deindex a document of type Page. Also deindex attachments of a page 1015 * @param pageId the id of the page to deindex 1016 * @param workspaceName The workspace name 1017 * @param unindexRecursively also unindex child pages if requested. 1018 * @param unindexAttachments also unindex page attachments 1019 * @throws SolrServerException if there is an error on the server 1020 * @throws IOException if there is a communication error with the server 1021 * @throws QuerySyntaxException if the uri query can't be built because of a syntax error. 1022 */ 1023 protected void _unindexPageDocument(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws SolrServerException, IOException, QuerySyntaxException 1024 { 1025 // Retrieve appropriate solr client 1026 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 1027 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 1028 1029 getLogger().info("Unindexing page {} in workspace '{}'", pageId, workspaceName); 1030 1031 Query pages = new AndQuery(new DocumentTypeQuery(TYPE_PAGE), new PageQuery(pageId, unindexRecursively)); 1032 Query query; 1033 if (unindexRecursively && unindexAttachments) 1034 { 1035 // {!ametys join=pageId q=page-ancestorIds:"page://xxxx"} 1036 Query joinQuery = new JoinQuery(() -> PAGE_ANCESTOR_IDS + ":\"" + pageId + "\"", ATTACHMENT_PAGE_ID); 1037 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new OrQuery(new PageAttachmentQuery(pageId), joinQuery)); 1038 query = new OrQuery(attachments, pages); 1039 } 1040 else if (unindexAttachments) 1041 { 1042 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new PageAttachmentQuery(pageId)); 1043 query = new OrQuery(attachments, pages); 1044 } 1045 else 1046 { 1047 query = pages; 1048 } 1049 1050 // Delete by query 1051 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query.build()); 1052 int status = solrResponse.getStatus(); 1053 1054 if (status != 0) 1055 { 1056 throw new IOException("Ametys Page de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + pageId); 1057 } 1058 1059 getLogger().debug("Successful page de-indexing{}. Page identifier : {}", unindexRecursively ? " with its children" : "", pageId); 1060 } 1061 1062 /////////////////////////////////////////////////////////////////////////// 1063 1064 /** 1065 * Reindex a page by its ID for all workspaces and commit 1066 * @param pageId The page ID. 1067 * @param reindexRecursively also reindex child pages if requested. 1068 * @param reindexAttachments also reindex page attachments 1069 * @throws Exception if an error occurs during index update. 1070 */ 1071 public void reindexPage(String pageId, boolean reindexRecursively, boolean reindexAttachments) throws Exception 1072 { 1073 reindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, reindexRecursively, reindexAttachments); 1074 reindexPage(pageId, WebConstants.LIVE_WORKSPACE, reindexRecursively, reindexAttachments); 1075 } 1076 1077 1078 /** 1079 * Reindex a page by its ID. 1080 * @param pageId The page ID. 1081 * @param workspaceName The workspace where to work in 1082 * @param reindexRecursively also reindex child pages if requested. 1083 * @param reindexAttachments also reindex page attachments 1084 * @throws IndexingException if an error occurs during index update. 1085 */ 1086 public void reindexPage(String pageId, String workspaceName, boolean reindexRecursively, boolean reindexAttachments) throws IndexingException 1087 { 1088 Request request = ContextHelper.getRequest(_context); 1089 1090 // Retrieve the current workspace. 1091 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 1092 // Retrieve the current site name. 1093 String currentSiteName = (String) request.getAttribute("siteName"); 1094 1095 try 1096 { 1097 // Force the workspace. 1098 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 1099 1100 getLogger().debug("Reindexing page: {}", pageId); 1101 1102 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 1103 { 1104 Page page = _ametysObjectResolver.resolveById(pageId); 1105 _unindexPageDocument(pageId, workspaceName, reindexRecursively, reindexAttachments); 1106 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 1107 _indexPage(page, workspaceName, reindexRecursively, reindexAttachments, solrClient); 1108 } 1109 } 1110 catch (AmetysRepositoryException | QuerySyntaxException | SolrServerException | IOException e) 1111 { 1112 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 1113 getLogger().error(error, e); 1114 throw new IndexingException(error, e); 1115 } 1116 finally 1117 { 1118 // Restore the site name. 1119 request.setAttribute("siteName", currentSiteName); 1120 // Restore context 1121 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 1122 } 1123 } 1124 1125 private class PageIndexerCallable extends AbstractIndexerCallable<Page> 1126 { 1127 private boolean _indexAttachments; 1128 1129 @SuppressWarnings("synthetic-access") 1130 public PageIndexerCallable(Page page, String workspaceName, boolean indexAttachments, SolrClient solrClient) 1131 { 1132 super(page, workspaceName, solrClient, _manager, _cocoonContext, _ametysObjectResolver, getLogger()); 1133 this._indexAttachments = indexAttachments; 1134 } 1135 1136 @Override 1137 protected void process(Page page) throws Exception 1138 { 1139 _logger.info("Indexing page: {} in workspace '{}'", page, _workspaceName); 1140 1141 SolrInputDocument document = new SolrInputDocument(); 1142 1143 // Prepare the solr input document by adding fields. 1144 _populatePageDocument(page, document); 1145 1146 // Set the additional properties in the document. 1147 _populateAdditionalProperties(page, document); 1148 1149 // Indexation of ACL initial values 1150 _solrIndexer.indexAclInitValues(page, document); 1151 1152 // Indexation of the document 1153 _indexPageDocument(page, document, _workspaceName, _solrClient); 1154 1155 // Index page attachments documents 1156 if (_indexAttachments) 1157 { 1158 _indexPageAttachments(page.getRootAttachments(), page, _solrClient); 1159 } 1160 } 1161 1162 @Override 1163 protected String getObjectLabel() 1164 { 1165 return "page"; 1166 } 1167 } 1168}