001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019import java.time.ZoneOffset; 020import java.time.ZonedDateTime; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.HashSet; 025import java.util.List; 026import java.util.Locale; 027import java.util.Map; 028import java.util.Optional; 029import java.util.Set; 030import java.util.function.Function; 031import java.util.stream.Collectors; 032 033import org.apache.avalon.framework.component.Component; 034import org.apache.avalon.framework.context.Context; 035import org.apache.avalon.framework.context.ContextException; 036import org.apache.avalon.framework.context.Contextualizable; 037import org.apache.avalon.framework.service.ServiceException; 038import org.apache.avalon.framework.service.ServiceManager; 039import org.apache.avalon.framework.service.Serviceable; 040import org.apache.cocoon.components.ContextHelper; 041import org.apache.cocoon.environment.Request; 042import org.apache.commons.lang3.ArrayUtils; 043import org.apache.solr.client.solrj.SolrClient; 044import org.apache.solr.client.solrj.SolrServerException; 045import org.apache.solr.client.solrj.response.UpdateResponse; 046import org.apache.solr.common.SolrInputDocument; 047import org.apache.solr.common.SolrInputField; 048 049import org.ametys.cms.content.indexing.solr.SolrFieldNames; 050import org.ametys.cms.content.indexing.solr.SolrIndexer; 051import org.ametys.cms.content.indexing.solr.SolrResourceIndexer; 052import org.ametys.cms.contenttype.ContentTypesHelper; 053import org.ametys.cms.data.ContentValue; 054import org.ametys.cms.data.type.indexing.IndexableDataContext; 055import org.ametys.cms.data.type.indexing.IndexableElementType; 056import org.ametys.cms.data.type.indexing.IndexableElementTypeHelper; 057import org.ametys.cms.indexing.IndexingException; 058import org.ametys.cms.indexing.solr.AdditionalPropertyIndexer; 059import org.ametys.cms.indexing.solr.AdditionalPropertyIndexerExtensionPoint; 060import org.ametys.cms.model.properties.ElementRefProperty; 061import org.ametys.cms.model.properties.Property; 062import org.ametys.cms.repository.Content; 063import org.ametys.cms.search.query.AndQuery; 064import org.ametys.cms.search.query.DocumentTypeQuery; 065import org.ametys.cms.search.query.JoinQuery; 066import org.ametys.cms.search.query.OrQuery; 067import org.ametys.cms.search.query.Query; 068import org.ametys.cms.search.query.QuerySyntaxException; 069import org.ametys.cms.search.solr.SolrClientProvider; 070import org.ametys.cms.search.solr.field.FirstValidationSearchField; 071import org.ametys.cms.search.solr.field.LastMajorValidationSearchField; 072import org.ametys.cms.search.solr.field.LastModifiedSearchField; 073import org.ametys.cms.search.solr.field.LastValidationSearchField; 074import org.ametys.cms.tag.Tag; 075import org.ametys.cms.tag.TagHelper; 076import org.ametys.cms.tag.TagProviderExtensionPoint; 077import org.ametys.core.util.DateUtils; 078import org.ametys.plugins.explorer.resources.Resource; 079import org.ametys.plugins.explorer.resources.ResourceCollection; 080import org.ametys.plugins.repository.AmetysObject; 081import org.ametys.plugins.repository.AmetysObjectIterable; 082import org.ametys.plugins.repository.AmetysObjectResolver; 083import org.ametys.plugins.repository.AmetysRepositoryException; 084import org.ametys.plugins.repository.RepositoryConstants; 085import org.ametys.plugins.repository.data.holder.ModelAwareDataHolder; 086import org.ametys.plugins.repository.data.holder.group.ModelAwareComposite; 087import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeater; 088import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeaterEntry; 089import org.ametys.plugins.repository.model.CompositeDefinition; 090import org.ametys.plugins.repository.model.RepeaterDefinition; 091import org.ametys.plugins.repository.model.RepositoryDataContext; 092import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector; 093import org.ametys.runtime.model.ElementDefinition; 094import org.ametys.runtime.model.ModelItem; 095import org.ametys.runtime.model.type.DataContext; 096import org.ametys.runtime.model.type.ElementType; 097import org.ametys.runtime.model.type.ModelItemTypeConstants; 098import org.ametys.runtime.plugin.component.AbstractLogEnabled; 099import org.ametys.web.WebConstants; 100import org.ametys.web.indexing.solr.page.attachment.PageVisibleAttachmentIndexerExtensionPoint; 101import org.ametys.web.repository.page.Page; 102import org.ametys.web.repository.page.Page.PageType; 103import org.ametys.web.repository.page.Zone; 104import org.ametys.web.repository.page.ZoneItem; 105import org.ametys.web.repository.page.ZoneItem.ZoneType; 106import org.ametys.web.repository.site.Site; 107import org.ametys.web.repository.sitemap.Sitemap; 108import org.ametys.web.search.query.PageAttachmentQuery; 109import org.ametys.web.search.query.PageQuery; 110import org.ametys.web.service.Service; 111import org.ametys.web.service.ServiceExtensionPoint; 112 113/** 114 * Component responsible for indexing a page with all its contents. 115 */ 116public class SolrPageIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrWebFieldNames, Contextualizable 117{ 118 /** The avalon role. */ 119 public static final String ROLE = SolrPageIndexer.class.getName(); 120 121 /** The Solr client provider */ 122 protected SolrClientProvider _solrClientProvider; 123 /** The Solr indexer */ 124 protected SolrIndexer _solrIndexer; 125 /** Solr Ametys resources indexer */ 126 protected SolrResourceIndexer _solrResourceIndexer; 127 /** The extension point for PageVisibleAttachmentIndexers */ 128 protected PageVisibleAttachmentIndexerExtensionPoint _pageVisibleAttachmentIndexerEP; 129 /** The additional property indexer extension point. */ 130 protected AdditionalPropertyIndexerExtensionPoint _additionalPropertiesIndexerEP; 131 /** The tag provider extension point. */ 132 protected TagProviderExtensionPoint _tagProviderEP; 133 134 /** The service extension point. */ 135 protected ServiceExtensionPoint _serviceExtensionPoint; 136 /** The Ametys object resolver*/ 137 protected AmetysObjectResolver _ametysObjectResolver; 138 /** The avalon context */ 139 protected Context _context; 140 141 private ContentTypesHelper _cTypesHelper; 142 143 @Override 144 public void service(ServiceManager manager) throws ServiceException 145 { 146 _ametysObjectResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 147 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 148 _solrResourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE); 149 _pageVisibleAttachmentIndexerEP = (PageVisibleAttachmentIndexerExtensionPoint) manager.lookup(PageVisibleAttachmentIndexerExtensionPoint.ROLE); 150 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 151 _serviceExtensionPoint = (ServiceExtensionPoint) manager.lookup(ServiceExtensionPoint.ROLE); 152 _additionalPropertiesIndexerEP = (AdditionalPropertyIndexerExtensionPoint) manager.lookup(AdditionalPropertyIndexerExtensionPoint.ROLE); 153 _tagProviderEP = (TagProviderExtensionPoint) manager.lookup(TagProviderExtensionPoint.ROLE); 154 _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 155 } 156 157 public void contextualize(Context context) throws ContextException 158 { 159 _context = context; 160 } 161 162 /** 163 * Index a page and eventually its children, recursively, in all workspaces and commit<br> 164 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 165 * @param pageId the page to be indexed. 166 * @param indexRecursively to also process children pages. 167 * @param indexAttachments to index page attachments 168 * @throws Exception if an error occurs during indexation. 169 */ 170 public void indexPage(String pageId, boolean indexRecursively, boolean indexAttachments) throws Exception 171 { 172 indexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, indexRecursively, indexAttachments); 173 indexPage(pageId, WebConstants.LIVE_WORKSPACE, indexRecursively, indexAttachments); 174 } 175 176 /** 177 * Index a page and eventually its children, recursively.<br> 178 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 179 * @param pageId the page to be indexed. 180 * @param workspaceName the workspace where to index 181 * @param indexRecursively to also process children pages. 182 * @param indexAttachments to index page attachments 183 * @throws IndexingException if an error occurs during indexation. 184 */ 185 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments) throws IndexingException 186 { 187 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true); 188 indexPage(pageId, workspaceName, indexRecursively, indexAttachments, solrClient); 189 } 190 191 /** 192 * Index a page and eventually its children, recursively.<br> 193 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 194 * @param pageId the page to be indexed. 195 * @param workspaceName the workspace where to index 196 * @param indexRecursively to also process children pages. 197 * @param indexAttachments to index page attachments 198 * @param solrClient The solr client to use 199 * @throws IndexingException if an error occurs during indexation. 200 */ 201 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 202 { 203 Request request = ContextHelper.getRequest(_context); 204 205 // Retrieve the current workspace. 206 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 207 // Retrieve the current site name. 208 String currentSiteName = (String) request.getAttribute("siteName"); 209 210 try 211 { 212 // Force the workspace. 213 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 214 215 getLogger().debug("Indexing page: {}", pageId); 216 217 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 218 { 219 Page page = _ametysObjectResolver.resolveById(pageId); 220 _indexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient); 221 } 222 } 223 catch (AmetysRepositoryException e) 224 { 225 String error = String.format("Failed to index page %s in workspace %s", pageId, workspaceName); 226 getLogger().error(error, e); 227 throw new IndexingException(error, e); 228 } 229 finally 230 { 231 // Restore the site name. 232 request.setAttribute("siteName", currentSiteName); 233 // Restore context 234 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 235 } 236 } 237 238 private void _indexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 239 { 240 getLogger().info("Indexing page: {} in workspace '{}'", page, workspaceName); 241 242 SolrInputDocument document = new SolrInputDocument(); 243 244 try 245 { 246 // Prepare the solr input document by adding fields. 247 _populatePageDocument(page, document); 248 249 // Set the additional properties in the document. 250 _populateAdditionalProperties(page, document); 251 252 // Indexation of ACL initial values 253 _solrIndexer.indexAclInitValues(page, document); 254 255 // Indexation of the document 256 _indexPageDocument(page, document, workspaceName, solrClient); 257 258 // Index page attachments documents 259 if (indexAttachments) 260 { 261 _indexPageAttachments(page.getRootAttachments(), page, solrClient); 262 } 263 } 264 catch (Exception e) 265 { 266 String error = String.format("Failed to index page %s in workspace %s", page.getId(), workspaceName); 267 getLogger().error(error, e); 268 throw new IndexingException(error, e); 269 } 270 271 if (indexRecursively) 272 { 273 AmetysObjectIterable<? extends Page> children = page.getChildrenPages(); 274 for (Page child : children) 275 { 276 // FIXME index child pages if (and only if) not indexed... see original source. 277// indexPage(child, false, indexRecursively); 278// indexPage(child, false); 279 _indexPage(child, workspaceName, indexRecursively, indexAttachments, solrClient); 280 } 281 } 282 } 283 284 /** 285 * Populate the solr input document by adding fields to index. 286 * @param page the page to index. 287 * @param document the solr input document 288 * @throws Exception if something goes wrong when processing the indexation of the page 289 */ 290 protected void _populatePageDocument(Page page, SolrInputDocument document) throws Exception 291 { 292 Sitemap sitemap = page.getSitemap(); 293 String sitemapName = sitemap.getName(); 294 Site site = page.getSite(); 295 String siteName = site.getName(); 296 String pageId = page.getId(); 297 String pageTitle = page.getTitle(); 298 String pageLongTitle = page.getLongTitle(); 299 String language = sitemapName; 300 301 // Page id and type 302 document.addField(SolrFieldNames.ID, pageId); 303 document.addField(SolrFieldNames.DOCUMENT_TYPE, SolrWebFieldNames.TYPE_PAGE); 304 305 // Fulltext 306 IndexableDataContext context = IndexableDataContext.newInstance() 307 .withLocale(new Locale(language)); 308 IndexableElementTypeHelper.indexFulltextValue(document, pageTitle, context); 309 if (!pageTitle.equals(pageLongTitle)) 310 { 311 IndexableElementTypeHelper.indexFulltextValue(document, pageLongTitle, context); 312 } 313 314 // Page title 315 _indexStringFields(document, pageId, PAGE_TITLE, pageTitle, language); 316 // Page long title 317 _indexStringFields(document, pageId, PAGE_LONG_TITLE, pageLongTitle, language); 318 // Title for sorting 319 document.addField(TITLE_SORT, pageTitle); 320 321 document.addField(TEMPLATE, page.getTemplate()); 322 document.addField(PAGE_TYPE, page.getType().name()); 323 document.addField(PAGE_DEPTH, page.getDepth()); 324 325 // Contents (page title shoud be indexed before because the main content can override it). 326 _populatePageContentsDocument(page, document); 327 328 // Parent of the page 329 AmetysObject parent = page.getParent(); 330 if (parent != null) 331 { 332 document.addField(PAGE_PARENT_ID, parent.getId()); 333 } 334 335 // Ancestors of the page 336 List<String> ancestorIds = new ArrayList<>(); 337 while (parent instanceof Page) 338 { 339 ancestorIds.add(parent.getId()); 340 parent = parent.getParent(); 341 } 342 document.addField(PAGE_ANCESTOR_IDS, ancestorIds); 343 344 document.addField(SITE_NAME, siteName); 345 document.addField(SITEMAP_NAME, sitemapName); 346 document.addField(SITE_TYPE, site.getType()); 347 348 // Page tags (strict and tags including ancestor pages). 349 Set<String> tags = page.getTags() 350 .stream() 351 .filter(tagName -> _tagProviderEP.hasTag(tagName, Map.of("siteName", page.getSiteName()))) 352 .collect(Collectors.toSet()); 353 document.addField(SolrFieldNames.TAGS, tags); 354 document.addField(SolrFieldNames.ALL_TAGS, _getTagsWithAncestors(page)); 355 356 _populateDatesOfPage(page, document); 357 358 // Attachments 359 _solrResourceIndexer.indexResourceCollection(page.getRootAttachments(), document, language); 360 Optional.ofNullable(page.getRootAttachments()) 361 .map(AmetysObject::getId) 362 .ifPresent(id -> document.addField(PAGE_OUTGOING_REFEERENCES_RESOURCE_IDS, id)); 363 _indexVisibleAttachments(page, document); 364 } 365 366 private void _indexVisibleAttachments(Page page, SolrInputDocument document) 367 { 368 Collection<String> values = _pageVisibleAttachmentIndexerEP.getExtensionsIds() 369 .stream() 370 .map(_pageVisibleAttachmentIndexerEP::getExtension) 371 .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(page)) 372 .flatMap(Collection::stream) 373 .collect(Collectors.toList()); 374 document.addField(PAGE_VISIBLE_ATTACHMENT_RESOURCE_IDS, values); 375 } 376 377 /** 378 * Populate the solr input document with dates from the page 379 * @param page The page 380 * @param document The Solr document 381 */ 382 protected void _populateDatesOfPage(Page page, SolrInputDocument document) 383 { 384 // Page last modification date 385 ZonedDateTime lastModified = _getLastModificationDate(page); 386 if (lastModified != null) 387 { 388 String lastModifiedStr = DateUtils.zonedDateTimeToString(lastModified, ZoneOffset.UTC); 389 // For 'new' search service 390 document.addField(LastModifiedSearchField.NAME, lastModifiedStr); 391 // For 'old' search service 392 document.addField(LAST_MODIFIED + "_dt", lastModifiedStr); 393 } 394 395 // Page last validation date 396 ZonedDateTime lastValidation = _getLastValidationDate(page); 397 if (lastValidation != null) 398 { 399 String lastValidationStr = DateUtils.zonedDateTimeToString(lastValidation, ZoneOffset.UTC); 400 // For 'new' search service 401 document.addField(LastValidationSearchField.NAME, lastValidationStr); 402 } 403 404 // Page first validation date 405 ZonedDateTime firstValidation = _getFirstValidationDate(page); 406 if (firstValidation != null) 407 { 408 String firstValidationStr = DateUtils.zonedDateTimeToString(firstValidation, ZoneOffset.UTC); 409 // For 'new' search service 410 document.addField(FirstValidationSearchField.NAME, firstValidationStr); 411 } 412 413 // Page last major validation date 414 ZonedDateTime lastMajorValidation = _getLastMajorValidationDate(page); 415 if (lastMajorValidation != null) 416 { 417 String lastMajorValidationStr = DateUtils.zonedDateTimeToString(lastMajorValidation, ZoneOffset.UTC); 418 // For 'new' search service 419 document.addField(LastMajorValidationSearchField.NAME, lastMajorValidationStr); 420 } 421 422 // date for sorting 423 SolrInputField dateField = document.getField(DATE_FOR_SORTING); 424 if (dateField == null) 425 { 426 Collection<Object> oDateValues = document.getFieldValues(CONTENT_INTERESTING_DATES); 427 if (oDateValues != null && !oDateValues.isEmpty()) 428 { 429 document.setField(DATE_FOR_SORTING, oDateValues.iterator().next()); 430 } 431 } 432 } 433 434 private void _indexStringFields(SolrInputDocument document, String documentId, String fieldName, String fieldValue, String language) 435 { 436 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(fieldValue, getLogger(), documentId, fieldName); 437 438 document.addField(fieldName, possiblyTruncatedValue); 439 document.addField(fieldName + "_txt_" + language, fieldValue); 440 document.addField(fieldName + "_txt_stemmed_" + language, fieldValue); 441 document.addField(fieldName + "_txt_ws_" + language, fieldValue); 442 443 document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase()); 444 document.addField(fieldName + "_s_ws", fieldValue.toLowerCase()); 445 document.addField(fieldName + "_txt", fieldValue); 446 } 447 /** 448 * Get all the page tags with their ancestors. 449 * @param page The page. 450 * @return All the page tags with their ancestors. 451 */ 452 protected Set<String> _getTagsWithAncestors(Page page) 453 { 454 Set<String> allTags = new HashSet<>(page.getTags()); 455 456 Map<String, Object> tagParams = Map.of("siteName", page.getSiteName()); 457 458 for (String tagName : page.getTags()) 459 { 460 allTags.add(tagName); 461 462 // Get the ancestor tags 463 Tag tag = _tagProviderEP.getTag(tagName, tagParams); 464 for (Tag ancestor : TagHelper.getAncestors(tag, false)) 465 { 466 allTags.add(ancestor.getName()); 467 } 468 } 469 470 return allTags; 471 } 472 473 /** 474 * Index the content of the page.<p> 475 * @param page the page to index. 476 * @param document the document to populate. 477 * @throws Exception if an error occurs. 478 */ 479 protected void _populatePageContentsDocument(Page page, SolrInputDocument document) throws Exception 480 { 481 if (page.getType() == PageType.CONTAINER) 482 { 483 for (Zone zone : page.getZones()) 484 { 485 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 486 for (ZoneItem zoneItem : zoneItems) 487 { 488 if (zoneItem.getType() == ZoneType.CONTENT) 489 { 490 try 491 { 492 Content content = zoneItem.getContent(); 493 document.addField(CONTENT_IDS, content.getId()); 494 495 for (String cType : content.getTypes()) 496 { 497 document.addField(PAGE_CONTENT_TYPES, cType); 498 document.addField(PAGE_CONTENT_TYPES + "_s_dv", cType); // facets 499 } 500 501 _indexFacetableFields(content, document); 502 } 503 catch (AmetysRepositoryException e) 504 { 505 getLogger().error("Failed to index content referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 506 } 507 } 508 else if (zoneItem.getType() == ZoneType.SERVICE) 509 { 510 try 511 { 512 String serviceId = zoneItem.getServiceId(); 513 document.addField(SERVICE_IDS, serviceId); 514 515 Service service = _serviceExtensionPoint.getExtension(serviceId); 516 if (service == null) 517 { 518 getLogger().error("The service id '{}' does not exist. It is referenced in the page {}/{}/{} ({} in zoneitem {})", serviceId, page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId()); 519 } 520 else 521 { 522 service.index(zoneItem, document); 523 } 524 } 525 catch (AmetysRepositoryException e) 526 { 527 getLogger().error("Failed to index service referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 528 } 529 530 } 531 } 532 } 533 } 534 } 535 536 /** 537 * Index the facetable fields of a content into the page solr document 538 * @param content The content 539 * @param document The main page solr document. 540 */ 541 protected void _indexFacetableFields(Content content, SolrInputDocument document) 542 { 543 List<ModelItem> modelItems = new ArrayList<>(); 544 try 545 { 546 String[] allContentTypes = ArrayUtils.addAll(content.getTypes(), content.getMixinTypes()); 547 modelItems.addAll(_cTypesHelper.getModelItems(allContentTypes) 548 .stream() 549 .filter(modelItem -> !(modelItem instanceof Property) || modelItem instanceof ElementRefProperty) 550 .collect(Collectors.toList())); 551 } 552 catch (IllegalArgumentException e) 553 { 554 getLogger().error("indexContent > Error getting the model items of content " + content.getId(), e); 555 throw new RuntimeException("indexContent > Error getting the model items of content " + content.getId(), e); 556 } 557 558 for (ModelItem modelItem : modelItems) 559 { 560 DataContext context = RepositoryDataContext.newInstance() 561 .withObject(content); 562 563 Optional.ofNullable(content.getLanguage()) 564 .map(Locale::new) 565 .ifPresent(context::withLocale); 566 567 _findAndIndexFacetableField(document, content, modelItem, context); 568 } 569 } 570 571 /** 572 * Index the facetable fields of a data holder into the page solr document 573 * @param pageDocument The Solr page document 574 * @param dataHolder the parent data holder 575 * @param modelItem the model item 576 * @param context the context of the data to index 577 */ 578 protected void _findAndIndexFacetableField(SolrInputDocument pageDocument, ModelAwareDataHolder dataHolder, ModelItem modelItem, DataContext context) 579 { 580 String dataName = modelItem.getName(); 581 if (dataHolder.hasValue(dataName)) 582 { 583 if (modelItem instanceof ElementDefinition elementDefinition) 584 { 585 DataContext newContext = context.cloneContext() 586 .addSegmentToDataPath(dataName); 587 588 Collection<String> values = _getValuesToIndex(dataHolder, elementDefinition, newContext); 589 for (String value : values) 590 { 591 pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + dataName + "_s_dv", value); 592 } 593 } 594 else if (modelItem instanceof RepeaterDefinition repeaterDefinition) 595 { 596 ModelAwareRepeater repeater = dataHolder.getRepeater(dataName); 597 for (ModelAwareRepeaterEntry entry : repeater.getEntries()) 598 { 599 DataContext newContext = context.cloneContext() 600 .addSegmentToDataPath(dataName + "[" + entry.getPosition() + "]"); 601 602 for (ModelItem child : repeaterDefinition.getModelItems()) 603 { 604 _findAndIndexFacetableField(pageDocument, entry, child, newContext); 605 } 606 } 607 } 608 else if (modelItem instanceof CompositeDefinition compositeDefinition) 609 { 610 ModelAwareComposite composite = dataHolder.getComposite(dataName); 611 DataContext newContext = context.cloneContext() 612 .addSegmentToDataPath(dataName); 613 614 for (ModelItem child : compositeDefinition.getModelItems()) 615 { 616 _findAndIndexFacetableField(pageDocument, composite, child, newContext); 617 } 618 } 619 } 620 } 621 622 /** 623 * Retrieves the values to index if the field is facetable, or an empty collection 624 * @param dataHolder the data holder 625 * @param elementDefinition the definition of the field 626 * @param context the context of the data to index 627 * @return the values to index if the field is facetable, or an empty collection 628 */ 629 protected Collection<String> _getValuesToIndex(ModelAwareDataHolder dataHolder, ElementDefinition elementDefinition, DataContext context) 630 { 631 String dataName = elementDefinition.getName(); 632 ElementType type = elementDefinition.getType(); 633 if (type instanceof IndexableElementType indexingElementType) 634 { 635 if (ModelItemTypeConstants.STRING_TYPE_ID.equals(type.getId()) && indexingElementType.isFacetable(context)) 636 { 637 String dataPath = dataName; 638 if (elementDefinition instanceof ElementRefProperty property) 639 { 640 dataPath = property.getPath(); 641 } 642 643 Object value = dataHolder.getValue(dataPath, true); 644 if (value instanceof String[] stringValues) 645 { 646 return Arrays.asList(stringValues); 647 } 648 else if (value instanceof String stringValue) 649 { 650 return List.of(stringValue); 651 } 652 } 653 else if (org.ametys.cms.data.type.ModelItemTypeConstants.CONTENT_ELEMENT_TYPE_ID.equals(type.getId())) 654 { 655 String dataPath = dataName; 656 if (elementDefinition instanceof ElementRefProperty property) 657 { 658 dataPath = property.getPath(); 659 } 660 661 Object value = dataHolder.getValue(dataPath, true); 662 if (value instanceof ContentValue[] contentValues) 663 { 664 return Arrays.stream(contentValues) 665 .map(ContentValue::getContentId) 666 .collect(Collectors.toList()); 667 } 668 else if (value instanceof ContentValue contentValue) 669 { 670 return List.of(contentValue.getContentId()); 671 } 672 } 673 } 674 675 return List.of(); 676 } 677 678 /** 679 * Computes the last modification date of a page. 680 * @param page the page. 681 * @return the last modification date or <code>null</code>. 682 */ 683 protected ZonedDateTime _getLastModificationDate(Page page) 684 { 685 return _getLastDate(page, Content::getLastModified); 686 } 687 /** 688 * Computes the first validation date of a page. 689 * @param page the page. 690 * @return the first validation date or <code>null</code>. 691 */ 692 protected ZonedDateTime _getFirstValidationDate(Page page) 693 { 694 return _getFirstDate(page, Content::getFirstValidationDate); 695 } 696 697 /** 698 * Computes the last validation date of a page. 699 * @param page the page. 700 * @return the last validation date or <code>null</code>. 701 */ 702 protected ZonedDateTime _getLastValidationDate(Page page) 703 { 704 return _getLastDate(page, Content::getLastValidationDate); 705 } 706 707 /** 708 * Computes the last major validation date of a page. 709 * @param page the page. 710 * @return the last major validation date or <code>null</code>. 711 */ 712 protected ZonedDateTime _getLastMajorValidationDate(Page page) 713 { 714 return _getLastDate(page, Content::getLastMajorValidationDate); 715 } 716 717 /** 718 * Computes a "last date" of a page, using the simple and naive following algorithm: 719 * <br>From all the dates from each of its contents, keep the greatest of them. 720 * @param page the page. 721 * @param dateRetriever The function to retrieve a Date from a Content of the Page 722 * @return the "last date" or <code>null</code>. 723 */ 724 protected ZonedDateTime _getLastDate(Page page, Function<Content, ZonedDateTime> dateRetriever) 725 { 726 ZonedDateTime last = null; 727 728 if (page.getType() == PageType.CONTAINER) 729 { 730 for (Zone zone : page.getZones()) 731 { 732 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 733 for (ZoneItem zoneItem : zoneItems) 734 { 735 switch (zoneItem.getType()) 736 { 737 case SERVICE: 738 // A service has no last date 739 break; 740 case CONTENT: 741 try 742 { 743 ZonedDateTime contentLast = dateRetriever.apply(zoneItem.getContent()); 744 745 if (contentLast != null && (last == null || contentLast.isAfter(last))) 746 { 747 // Keep the latest date 748 last = contentLast; 749 } 750 } 751 catch (AmetysRepositoryException e) 752 { 753 getLogger().error("Failed to index last date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 754 } 755 break; 756 default: 757 break; 758 } 759 } 760 } 761 } 762 763 return last; 764 } 765 766 /** 767 * Computes a "first date" of a page, using the simple and naive following algorithm: 768 * <br>From all the dates from each of its contents, keep the lowest of them. 769 * @param page the page. 770 * @param dateRetriever The function to retrieve a Date from a Content of the Page 771 * @return the "first date" or <code>null</code>. 772 */ 773 protected ZonedDateTime _getFirstDate(Page page, Function<Content, ZonedDateTime> dateRetriever) 774 { 775 ZonedDateTime first = null; 776 777 if (page.getType() == PageType.CONTAINER) 778 { 779 for (Zone zone : page.getZones()) 780 { 781 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 782 for (ZoneItem zoneItem : zoneItems) 783 { 784 switch (zoneItem.getType()) 785 { 786 case SERVICE: 787 // A service has no first date 788 break; 789 case CONTENT: 790 try 791 { 792 ZonedDateTime contentFirst = dateRetriever.apply(zoneItem.getContent()); 793 794 if (contentFirst != null && (first == null || contentFirst.isBefore(first))) 795 { 796 // Keep the lowest date 797 first = contentFirst; 798 } 799 } 800 catch (AmetysRepositoryException e) 801 { 802 getLogger().error("Failed to index first date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 803 } 804 break; 805 default: 806 break; 807 } 808 } 809 } 810 } 811 812 return first; 813 } 814 815 /** 816 * Populate the solr input document by adding fields to index. 817 * @param page the page to index. 818 * @param document the solr input document 819 * @throws Exception if something goes wrong when processing the indexation of the page 820 */ 821 protected void _populateAdditionalProperties(Page page, SolrInputDocument document) throws Exception 822 { 823 Collection<AdditionalPropertyIndexer> indexers = _additionalPropertiesIndexerEP.getIndexers("page"); 824 for (AdditionalPropertyIndexer indexer : indexers) 825 { 826 indexer.index(page, document); 827 } 828 } 829 830 /** 831 * Index page attachments as new entries in the index. 832 * @param collection the collection of attachments 833 * @param page the page whose attachments will be indexed 834 * @throws Exception if something goes wrong when indexing the attachments of the page 835 */ 836 public void indexPageAttachments(ResourceCollection collection, Page page) throws Exception 837 { 838 Request request = ContextHelper.getRequest(_context); 839 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 840 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 841 _indexPageAttachments(collection, page, solrClient); 842 } 843 844 private void _indexPageAttachments(ResourceCollection collection, Page page, SolrClient solrClient) throws Exception 845 { 846 if (collection == null) 847 { 848 return; 849 } 850 851 AmetysObjectIterable<AmetysObject> children = collection.getChildren(); 852 for (AmetysObject object : children) 853 { 854 if (object instanceof ResourceCollection) 855 { 856 _indexPageAttachments((ResourceCollection) object, page, solrClient); 857 } 858 else if (object instanceof Resource) 859 { 860 Resource resource = (Resource) object; 861 _indexPageAttachment(resource, page, solrClient); 862 } 863 } 864 } 865 866 /** 867 * Index a page attachment 868 * @param resource the page attachment as a {@link Resource} 869 * @param page the page whose attachment is going to be indexed 870 * @throws Exception if something goes wrong when processing the indexation of the page attachment 871 */ 872 public void indexPageAttachment(Resource resource, Page page) throws Exception 873 { 874 Request request = ContextHelper.getRequest(_context); 875 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 876 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 877 _indexPageAttachment(resource, page, solrClient); 878 } 879 880 private void _indexPageAttachment(Resource resource, Page page, SolrClient solrClient) throws Exception 881 { 882 SolrInputDocument document = new SolrInputDocument(); 883 884 // Prepare resource doc 885 _populatePageAttachmentDocument(resource, document, page); 886 887 // Indexation of the document 888 _indexResourceDocument(resource, document, solrClient); 889 } 890 891 private void _populatePageAttachmentDocument(Resource resource, SolrInputDocument document, Page page) throws Exception 892 { 893 String language = page.getSitemapName(); 894 895 _solrResourceIndexer.indexResource(resource, document, TYPE_PAGE_RESOURCE, language); 896 897 Site site = page.getSite(); 898 // site name - Store.YES, Index.NOT_ANALYZED 899 document.addField(SolrWebFieldNames.SITE_NAME, site.getName()); 900 901 // site type - Store.YES, Index.NOT_ANALYZED 902 document.addField(SolrWebFieldNames.SITE_TYPE, site.getType()); 903 904 // Added for Solr. 905 // Page site map name - Store.YES, Index.NOT_ANALYZED 906 document.addField(SITEMAP_NAME, page.getSitemapName()); 907 908 // Need the id of the page for unindexing attachment during the unindexing of the page 909 document.addField(ATTACHMENT_PAGE_ID, page.getId()); 910 } 911 912 /** 913 * Index a populated solr input document of type Page. 914 * @param page the page from which the input document is created 915 * @param document the input document to add to the solr index 916 * @param workspaceName The workspace name 917 * @param solrClient The solr client to use 918 * @throws SolrServerException if there is an error on the Solr server 919 * @throws IOException if there is a communication error with the server 920 */ 921 protected void _indexPageDocument(Page page, SolrInputDocument document, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException 922 { 923 // Retrieve appropriate solr client 924 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 925 926 // Add document 927 UpdateResponse solrResponse = solrClient.add(collectionName, document); 928 int status = solrResponse.getStatus(); 929 930 if (status != 0) 931 { 932 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + page.getId()); 933 } 934 935 getLogger().debug("Successful page indexing. Page identifier : {}", page.getId()); 936 } 937 938 /** 939 * Index a populated solr input document of type Resource. 940 * @param resource the resource from which the input document is created 941 * @param document the input document 942 * @param solrClient The solr client to use 943 * @throws SolrServerException if there is an error on the server 944 * @throws IOException if there is a communication error with the server 945 */ 946 protected void _indexResourceDocument(Resource resource, SolrInputDocument document, SolrClient solrClient) throws SolrServerException, IOException 947 { 948 // Retrieve appropriate solr client 949 Request request = ContextHelper.getRequest(_context); 950 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 951 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 952 953 // Add document 954 UpdateResponse solrResponse = solrClient.add(collectionName, document); 955 int status = solrResponse.getStatus(); 956 957 if (status != 0) 958 { 959 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Resource id : " + resource.getId()); 960 } 961 962 getLogger().debug("Successful resource indexing. Resource identifier : {}", resource.getId()); 963 } 964 965 /////////////////////////////////////////////////////////////////////////// 966 967 /** 968 * Un-index a page by its ID for all workspaces and commit 969 * @param pageId The page ID. 970 * @param unindexRecursively also unindex child pages if requested. 971 * @param unindexAttachments also unindex page attachments 972 * @throws Exception if an error occurs during index update. 973 */ 974 public void unindexPage(String pageId, boolean unindexRecursively, boolean unindexAttachments) throws Exception 975 { 976 unindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, unindexRecursively, unindexAttachments); 977 unindexPage(pageId, WebConstants.LIVE_WORKSPACE, unindexRecursively, unindexAttachments); 978 } 979 980 /** 981 * De-index a page (and optionally its children pages). 982 * @param pageId the page to be de-indexed. 983 * @param workspaceName The workspace where to work in 984 * @param unindexRecursively also unindex child pages if requested. 985 * @param unindexAttachments also unindex page attachments 986 * @throws Exception if an error occurs during index update. 987 */ 988 public void unindexPage(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws Exception 989 { 990 Request request = ContextHelper.getRequest(_context); 991 992 // Retrieve the current workspace. 993 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 994 // Retrieve the current site name. 995 String currentSiteName = (String) request.getAttribute("siteName"); 996 997 try 998 { 999 // Force the workspace. 1000 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 1001 1002 getLogger().debug("Unindexing page: {}", pageId); 1003 1004 _unindexPageDocument(pageId, workspaceName, unindexRecursively, unindexAttachments); 1005 } 1006 catch (Exception e) 1007 { 1008 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 1009 getLogger().error(error, e); 1010 throw new IndexingException(error, e); 1011 } 1012 finally 1013 { 1014 // Restore the site name. 1015 request.setAttribute("siteName", currentSiteName); 1016 // Restore context 1017 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 1018 } 1019 } 1020 1021 /** 1022 * Deindex a document of type Page. Also deindex attachments of a page 1023 * @param pageId the id of the page to deindex 1024 * @param workspaceName The workspace name 1025 * @param unindexRecursively also unindex child pages if requested. 1026 * @param unindexAttachments also unindex page attachments 1027 * @throws SolrServerException if there is an error on the server 1028 * @throws IOException if there is a communication error with the server 1029 * @throws QuerySyntaxException if the uri query can't be built because of a syntax error. 1030 */ 1031 protected void _unindexPageDocument(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws SolrServerException, IOException, QuerySyntaxException 1032 { 1033 // Retrieve appropriate solr client 1034 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 1035 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 1036 1037 getLogger().info("Unindexing page {} in workspace '{}'", pageId, workspaceName); 1038 1039 Query pages = new AndQuery(new DocumentTypeQuery(TYPE_PAGE), new PageQuery(pageId, unindexRecursively)); 1040 Query query; 1041 if (unindexRecursively && unindexAttachments) 1042 { 1043 // {!ametys join=pageId q=page-ancestorIds:"page://xxxx"} 1044 Query joinQuery = new JoinQuery(() -> PAGE_ANCESTOR_IDS + ":\"" + pageId + "\"", ATTACHMENT_PAGE_ID); 1045 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new OrQuery(new PageAttachmentQuery(pageId), joinQuery)); 1046 query = new OrQuery(attachments, pages); 1047 } 1048 else if (unindexAttachments) 1049 { 1050 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new PageAttachmentQuery(pageId)); 1051 query = new OrQuery(attachments, pages); 1052 } 1053 else 1054 { 1055 query = pages; 1056 } 1057 1058 // Delete by query 1059 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query.build()); 1060 int status = solrResponse.getStatus(); 1061 1062 if (status != 0) 1063 { 1064 throw new IOException("Ametys Page de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + pageId); 1065 } 1066 1067 getLogger().debug("Successful page de-indexing{}. Page identifier : {}", unindexRecursively ? " with its children" : "", pageId); 1068 } 1069 1070 /////////////////////////////////////////////////////////////////////////// 1071 1072 /** 1073 * Reindex a page by its ID for all workspaces and commit 1074 * @param pageId The page ID. 1075 * @param reindexRecursively also reindex child pages if requested. 1076 * @param reindexAttachments also reindex page attachments 1077 * @throws Exception if an error occurs during index update. 1078 */ 1079 public void reindexPage(String pageId, boolean reindexRecursively, boolean reindexAttachments) throws Exception 1080 { 1081 reindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, reindexRecursively, reindexAttachments); 1082 reindexPage(pageId, WebConstants.LIVE_WORKSPACE, reindexRecursively, reindexAttachments); 1083 } 1084 1085 1086 /** 1087 * Reindex a page by its ID. 1088 * @param pageId The page ID. 1089 * @param workspaceName The workspace where to work in 1090 * @param reindexRecursively also reindex child pages if requested. 1091 * @param reindexAttachments also reindex page attachments 1092 * @throws IndexingException if an error occurs during index update. 1093 */ 1094 public void reindexPage(String pageId, String workspaceName, boolean reindexRecursively, boolean reindexAttachments) throws IndexingException 1095 { 1096 Request request = ContextHelper.getRequest(_context); 1097 1098 // Retrieve the current workspace. 1099 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 1100 // Retrieve the current site name. 1101 String currentSiteName = (String) request.getAttribute("siteName"); 1102 1103 try 1104 { 1105 // Force the workspace. 1106 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 1107 1108 getLogger().debug("Reindexing page: {}", pageId); 1109 1110 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 1111 { 1112 Page page = _ametysObjectResolver.resolveById(pageId); 1113 _unindexPageDocument(pageId, workspaceName, reindexRecursively, reindexAttachments); 1114 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 1115 _indexPage(page, workspaceName, reindexRecursively, reindexAttachments, solrClient); 1116 } 1117 } 1118 catch (AmetysRepositoryException | QuerySyntaxException | SolrServerException | IOException e) 1119 { 1120 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 1121 getLogger().error(error, e); 1122 throw new IndexingException(error, e); 1123 } 1124 finally 1125 { 1126 // Restore the site name. 1127 request.setAttribute("siteName", currentSiteName); 1128 // Restore context 1129 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 1130 } 1131 } 1132}