001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019import java.time.ZoneOffset; 020import java.time.ZonedDateTime; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.HashSet; 025import java.util.List; 026import java.util.Locale; 027import java.util.Map; 028import java.util.Optional; 029import java.util.Set; 030import java.util.function.Function; 031import java.util.stream.Collectors; 032 033import org.apache.avalon.framework.component.Component; 034import org.apache.avalon.framework.context.Context; 035import org.apache.avalon.framework.context.ContextException; 036import org.apache.avalon.framework.context.Contextualizable; 037import org.apache.avalon.framework.service.ServiceException; 038import org.apache.avalon.framework.service.ServiceManager; 039import org.apache.avalon.framework.service.Serviceable; 040import org.apache.cocoon.components.ContextHelper; 041import org.apache.cocoon.environment.Request; 042import org.apache.commons.lang3.ArrayUtils; 043import org.apache.solr.client.solrj.SolrClient; 044import org.apache.solr.client.solrj.SolrServerException; 045import org.apache.solr.client.solrj.response.UpdateResponse; 046import org.apache.solr.common.SolrInputDocument; 047import org.apache.solr.common.SolrInputField; 048 049import org.ametys.cms.content.indexing.solr.SolrFieldNames; 050import org.ametys.cms.content.indexing.solr.SolrIndexer; 051import org.ametys.cms.content.indexing.solr.SolrResourceIndexer; 052import org.ametys.cms.contenttype.ContentTypesHelper; 053import org.ametys.cms.data.ContentValue; 054import org.ametys.cms.data.type.indexing.IndexableDataContext; 055import org.ametys.cms.data.type.indexing.IndexableElementType; 056import org.ametys.cms.data.type.indexing.IndexableElementTypeHelper; 057import org.ametys.cms.indexing.IndexingException; 058import org.ametys.cms.indexing.solr.AdditionalPropertyIndexer; 059import org.ametys.cms.indexing.solr.AdditionalPropertyIndexerExtensionPoint; 060import org.ametys.cms.model.properties.ElementRefProperty; 061import org.ametys.cms.model.properties.Property; 062import org.ametys.cms.repository.Content; 063import org.ametys.cms.search.query.AndQuery; 064import org.ametys.cms.search.query.DocumentTypeQuery; 065import org.ametys.cms.search.query.JoinQuery; 066import org.ametys.cms.search.query.OrQuery; 067import org.ametys.cms.search.query.Query; 068import org.ametys.cms.search.query.QuerySyntaxException; 069import org.ametys.cms.search.solr.SolrClientProvider; 070import org.ametys.cms.search.solr.field.FirstValidationSearchField; 071import org.ametys.cms.search.solr.field.LastMajorValidationSearchField; 072import org.ametys.cms.search.solr.field.LastModifiedSearchField; 073import org.ametys.cms.search.solr.field.LastValidationSearchField; 074import org.ametys.cms.tag.Tag; 075import org.ametys.cms.tag.TagHelper; 076import org.ametys.cms.tag.TagProviderExtensionPoint; 077import org.ametys.core.util.DateUtils; 078import org.ametys.plugins.explorer.resources.Resource; 079import org.ametys.plugins.explorer.resources.ResourceCollection; 080import org.ametys.plugins.repository.AmetysObject; 081import org.ametys.plugins.repository.AmetysObjectIterable; 082import org.ametys.plugins.repository.AmetysObjectResolver; 083import org.ametys.plugins.repository.AmetysRepositoryException; 084import org.ametys.plugins.repository.RepositoryConstants; 085import org.ametys.plugins.repository.data.holder.ModelAwareDataHolder; 086import org.ametys.plugins.repository.data.holder.group.ModelAwareComposite; 087import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeater; 088import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeaterEntry; 089import org.ametys.plugins.repository.model.CompositeDefinition; 090import org.ametys.plugins.repository.model.RepeaterDefinition; 091import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector; 092import org.ametys.runtime.model.ElementDefinition; 093import org.ametys.runtime.model.ModelItem; 094import org.ametys.runtime.model.type.DataContext; 095import org.ametys.runtime.model.type.ElementType; 096import org.ametys.runtime.model.type.ModelItemTypeConstants; 097import org.ametys.runtime.plugin.component.AbstractLogEnabled; 098import org.ametys.web.WebConstants; 099import org.ametys.web.indexing.solr.page.attachment.PageVisibleAttachmentIndexerExtensionPoint; 100import org.ametys.web.repository.page.Page; 101import org.ametys.web.repository.page.Page.PageType; 102import org.ametys.web.repository.page.Zone; 103import org.ametys.web.repository.page.ZoneItem; 104import org.ametys.web.repository.page.ZoneItem.ZoneType; 105import org.ametys.web.repository.site.Site; 106import org.ametys.web.repository.sitemap.Sitemap; 107import org.ametys.web.search.query.PageAttachmentQuery; 108import org.ametys.web.search.query.PageQuery; 109import org.ametys.web.service.Service; 110import org.ametys.web.service.ServiceExtensionPoint; 111 112/** 113 * Component responsible for indexing a page with all its contents. 114 */ 115public class SolrPageIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrWebFieldNames, Contextualizable 116{ 117 /** The avalon role. */ 118 public static final String ROLE = SolrPageIndexer.class.getName(); 119 120 /** The Solr client provider */ 121 protected SolrClientProvider _solrClientProvider; 122 /** The Solr indexer */ 123 protected SolrIndexer _solrIndexer; 124 /** Solr Ametys resources indexer */ 125 protected SolrResourceIndexer _solrResourceIndexer; 126 /** The extension point for PageVisibleAttachmentIndexers */ 127 protected PageVisibleAttachmentIndexerExtensionPoint _pageVisibleAttachmentIndexerEP; 128 /** The additional property indexer extension point. */ 129 protected AdditionalPropertyIndexerExtensionPoint _additionalPropertiesIndexerEP; 130 /** The tag provider extension point. */ 131 protected TagProviderExtensionPoint _tagProviderEP; 132 133 /** The service extension point. */ 134 protected ServiceExtensionPoint _serviceExtensionPoint; 135 /** The Ametys object resolver*/ 136 protected AmetysObjectResolver _ametysObjectResolver; 137 /** The avalon context */ 138 protected Context _context; 139 140 private ContentTypesHelper _cTypesHelper; 141 142 @Override 143 public void service(ServiceManager manager) throws ServiceException 144 { 145 _ametysObjectResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 146 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 147 _solrResourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE); 148 _pageVisibleAttachmentIndexerEP = (PageVisibleAttachmentIndexerExtensionPoint) manager.lookup(PageVisibleAttachmentIndexerExtensionPoint.ROLE); 149 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 150 _serviceExtensionPoint = (ServiceExtensionPoint) manager.lookup(ServiceExtensionPoint.ROLE); 151 _additionalPropertiesIndexerEP = (AdditionalPropertyIndexerExtensionPoint) manager.lookup(AdditionalPropertyIndexerExtensionPoint.ROLE); 152 _tagProviderEP = (TagProviderExtensionPoint) manager.lookup(TagProviderExtensionPoint.ROLE); 153 _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 154 } 155 156 public void contextualize(Context context) throws ContextException 157 { 158 _context = context; 159 } 160 161 /** 162 * Index a page and eventually its children, recursively, in all workspaces and commit<br> 163 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 164 * @param pageId the page to be indexed. 165 * @param indexRecursively to also process children pages. 166 * @param indexAttachments to index page attachments 167 * @throws Exception if an error occurs during indexation. 168 */ 169 public void indexPage(String pageId, boolean indexRecursively, boolean indexAttachments) throws Exception 170 { 171 indexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, indexRecursively, indexAttachments); 172 indexPage(pageId, WebConstants.LIVE_WORKSPACE, indexRecursively, indexAttachments); 173 } 174 175 /** 176 * Index a page and eventually its children, recursively.<br> 177 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 178 * @param pageId the page to be indexed. 179 * @param workspaceName the workspace where to index 180 * @param indexRecursively to also process children pages. 181 * @param indexAttachments to index page attachments 182 * @throws IndexingException if an error occurs during indexation. 183 */ 184 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments) throws IndexingException 185 { 186 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true); 187 indexPage(pageId, workspaceName, indexRecursively, indexAttachments, solrClient); 188 } 189 190 /** 191 * Index a page and eventually its children, recursively.<br> 192 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 193 * @param pageId the page to be indexed. 194 * @param workspaceName the workspace where to index 195 * @param indexRecursively to also process children pages. 196 * @param indexAttachments to index page attachments 197 * @param solrClient The solr client to use 198 * @throws IndexingException if an error occurs during indexation. 199 */ 200 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 201 { 202 Request request = ContextHelper.getRequest(_context); 203 204 // Retrieve the current workspace. 205 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 206 // Retrieve the current site name. 207 String currentSiteName = (String) request.getAttribute("siteName"); 208 209 try 210 { 211 // Force the workspace. 212 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 213 214 getLogger().debug("Indexing page: {}", pageId); 215 216 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 217 { 218 Page page = _ametysObjectResolver.resolveById(pageId); 219 _indexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient); 220 } 221 } 222 catch (AmetysRepositoryException e) 223 { 224 String error = String.format("Failed to index page %s in workspace %s", pageId, workspaceName); 225 getLogger().error(error, e); 226 throw new IndexingException(error, e); 227 } 228 finally 229 { 230 // Restore the site name. 231 request.setAttribute("siteName", currentSiteName); 232 // Restore context 233 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 234 } 235 } 236 237 private void _indexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 238 { 239 getLogger().info("Indexing page: {} in workspace '{}'", page, workspaceName); 240 241 SolrInputDocument document = new SolrInputDocument(); 242 243 try 244 { 245 // Prepare the solr input document by adding fields. 246 _populatePageDocument(page, document); 247 248 // Set the additional properties in the document. 249 _populateAdditionalProperties(page, document); 250 251 // Indexation of ACL initial values 252 _solrIndexer.indexAclInitValues(page, document); 253 254 // Indexation of the document 255 _indexPageDocument(page, document, workspaceName, solrClient); 256 257 // Index page attachments documents 258 if (indexAttachments) 259 { 260 _indexPageAttachments(page.getRootAttachments(), page, solrClient); 261 } 262 } 263 catch (Exception e) 264 { 265 String error = String.format("Failed to index page %s in workspace %s", page.getId(), workspaceName); 266 getLogger().error(error, e); 267 throw new IndexingException(error, e); 268 } 269 270 if (indexRecursively) 271 { 272 AmetysObjectIterable<? extends Page> children = page.getChildrenPages(); 273 for (Page child : children) 274 { 275 // FIXME index child pages if (and only if) not indexed... see original source. 276// indexPage(child, false, indexRecursively); 277// indexPage(child, false); 278 _indexPage(child, workspaceName, indexRecursively, indexAttachments, solrClient); 279 } 280 } 281 } 282 283 /** 284 * Populate the solr input document by adding fields to index. 285 * @param page the page to index. 286 * @param document the solr input document 287 * @throws Exception if something goes wrong when processing the indexation of the page 288 */ 289 protected void _populatePageDocument(Page page, SolrInputDocument document) throws Exception 290 { 291 Sitemap sitemap = page.getSitemap(); 292 String sitemapName = sitemap.getName(); 293 Site site = page.getSite(); 294 String siteName = site.getName(); 295 String pageId = page.getId(); 296 String pageTitle = page.getTitle(); 297 String pageLongTitle = page.getLongTitle(); 298 String language = sitemapName; 299 300 // Page id and type 301 document.addField(SolrFieldNames.ID, pageId); 302 document.addField(SolrFieldNames.DOCUMENT_TYPE, SolrWebFieldNames.TYPE_PAGE); 303 304 // Fulltext 305 IndexableDataContext context = IndexableDataContext.newInstance() 306 .withLocale(new Locale(language)); 307 IndexableElementTypeHelper.indexFulltextValue(document, pageTitle, context); 308 if (!pageTitle.equals(pageLongTitle)) 309 { 310 IndexableElementTypeHelper.indexFulltextValue(document, pageLongTitle, context); 311 } 312 313 // Page title 314 _indexStringFields(document, pageId, PAGE_TITLE, pageTitle, language); 315 // Page long title 316 _indexStringFields(document, pageId, PAGE_LONG_TITLE, pageLongTitle, language); 317 // Title for sorting 318 document.addField(TITLE_SORT, pageTitle); 319 320 document.addField(TEMPLATE, page.getTemplate()); 321 document.addField(PAGE_TYPE, page.getType().name()); 322 document.addField(PAGE_DEPTH, page.getDepth()); 323 324 // Contents (page title shoud be indexed before because the main content can override it). 325 _populatePageContentsDocument(page, document); 326 327 // Parent of the page 328 AmetysObject parent = page.getParent(); 329 if (parent != null) 330 { 331 document.addField(PAGE_PARENT_ID, parent.getId()); 332 } 333 334 // Ancestors of the page 335 List<String> ancestorIds = new ArrayList<>(); 336 while (parent instanceof Page) 337 { 338 ancestorIds.add(parent.getId()); 339 parent = parent.getParent(); 340 } 341 document.addField(PAGE_ANCESTOR_IDS, ancestorIds); 342 343 document.addField(SITE_NAME, siteName); 344 document.addField(SITEMAP_NAME, sitemapName); 345 document.addField(SITE_TYPE, site.getType()); 346 347 // Page tags (strict and tags including ancestor pages). 348 Set<String> tags = page.getTags() 349 .stream() 350 .filter(tagName -> _tagProviderEP.hasTag(tagName, Map.of("siteName", page.getSiteName()))) 351 .collect(Collectors.toSet()); 352 document.addField(SolrFieldNames.TAGS, tags); 353 document.addField(SolrFieldNames.ALL_TAGS, _getTagsWithAncestors(page)); 354 355 _populateDatesOfPage(page, document); 356 357 // Attachments 358 _solrResourceIndexer.indexResourceCollection(page.getRootAttachments(), document, language); 359 Optional.ofNullable(page.getRootAttachments()) 360 .map(AmetysObject::getId) 361 .ifPresent(id -> document.addField(PAGE_OUTGOING_REFEERENCES_RESOURCE_IDS, id)); 362 _indexVisibleAttachments(page, document); 363 } 364 365 private void _indexVisibleAttachments(Page page, SolrInputDocument document) 366 { 367 Collection<String> values = _pageVisibleAttachmentIndexerEP.getExtensionsIds() 368 .stream() 369 .map(_pageVisibleAttachmentIndexerEP::getExtension) 370 .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(page)) 371 .flatMap(Collection::stream) 372 .collect(Collectors.toList()); 373 document.addField(PAGE_VISIBLE_ATTACHMENT_RESOURCE_IDS, values); 374 } 375 376 /** 377 * Populate the solr input document with dates from the page 378 * @param page The page 379 * @param document The Solr document 380 */ 381 protected void _populateDatesOfPage(Page page, SolrInputDocument document) 382 { 383 // Page last modification date 384 ZonedDateTime lastModified = _getLastModificationDate(page); 385 if (lastModified != null) 386 { 387 String lastModifiedStr = DateUtils.zonedDateTimeToString(lastModified, ZoneOffset.UTC); 388 // For 'new' search service 389 document.addField(LastModifiedSearchField.NAME, lastModifiedStr); 390 // For 'old' search service 391 document.addField(LAST_MODIFIED + "_dt", lastModifiedStr); 392 } 393 394 // Page last validation date 395 ZonedDateTime lastValidation = _getLastValidationDate(page); 396 if (lastValidation != null) 397 { 398 String lastValidationStr = DateUtils.zonedDateTimeToString(lastValidation, ZoneOffset.UTC); 399 // For 'new' search service 400 document.addField(LastValidationSearchField.NAME, lastValidationStr); 401 } 402 403 // Page first validation date 404 ZonedDateTime firstValidation = _getFirstValidationDate(page); 405 if (firstValidation != null) 406 { 407 String firstValidationStr = DateUtils.zonedDateTimeToString(firstValidation, ZoneOffset.UTC); 408 // For 'new' search service 409 document.addField(FirstValidationSearchField.NAME, firstValidationStr); 410 } 411 412 // Page last major validation date 413 ZonedDateTime lastMajorValidation = _getLastMajorValidationDate(page); 414 if (lastMajorValidation != null) 415 { 416 String lastMajorValidationStr = DateUtils.zonedDateTimeToString(lastMajorValidation, ZoneOffset.UTC); 417 // For 'new' search service 418 document.addField(LastMajorValidationSearchField.NAME, lastMajorValidationStr); 419 } 420 421 // date for sorting 422 SolrInputField dateField = document.getField(DATE_FOR_SORTING); 423 if (dateField == null) 424 { 425 Collection<Object> oDateValues = document.getFieldValues(CONTENT_INTERESTING_DATES); 426 if (oDateValues != null && !oDateValues.isEmpty()) 427 { 428 document.setField(DATE_FOR_SORTING, oDateValues.iterator().next()); 429 } 430 } 431 } 432 433 private void _indexStringFields(SolrInputDocument document, String documentId, String fieldName, String fieldValue, String language) 434 { 435 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(fieldValue, getLogger(), documentId, fieldName); 436 437 document.addField(fieldName, possiblyTruncatedValue); 438 document.addField(fieldName + "_txt_" + language, fieldValue); 439 document.addField(fieldName + "_txt_stemmed_" + language, fieldValue); 440 document.addField(fieldName + "_txt_ws_" + language, fieldValue); 441 442 document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase()); 443 document.addField(fieldName + "_s_ws", fieldValue.toLowerCase()); 444 document.addField(fieldName + "_txt", fieldValue); 445 } 446 /** 447 * Get all the page tags with their ancestors. 448 * @param page The page. 449 * @return All the page tags with their ancestors. 450 */ 451 protected Set<String> _getTagsWithAncestors(Page page) 452 { 453 Set<String> allTags = new HashSet<>(page.getTags()); 454 455 Map<String, Object> tagParams = Map.of("siteName", page.getSiteName()); 456 457 for (String tagName : page.getTags()) 458 { 459 allTags.add(tagName); 460 461 // Get the ancestor tags 462 Tag tag = _tagProviderEP.getTag(tagName, tagParams); 463 for (Tag ancestor : TagHelper.getAncestors(tag, false)) 464 { 465 allTags.add(ancestor.getName()); 466 } 467 } 468 469 return allTags; 470 } 471 472 /** 473 * Index the content of the page.<p> 474 * @param page the page to index. 475 * @param document the document to populate. 476 * @throws Exception if an error occurs. 477 */ 478 protected void _populatePageContentsDocument(Page page, SolrInputDocument document) throws Exception 479 { 480 if (page.getType() == PageType.CONTAINER) 481 { 482 for (Zone zone : page.getZones()) 483 { 484 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 485 for (ZoneItem zoneItem : zoneItems) 486 { 487 if (zoneItem.getType() == ZoneType.CONTENT) 488 { 489 try 490 { 491 Content content = zoneItem.getContent(); 492 document.addField(CONTENT_IDS, content.getId()); 493 494 for (String cType : content.getTypes()) 495 { 496 document.addField(PAGE_CONTENT_TYPES, cType); 497 document.addField(PAGE_CONTENT_TYPES + "_s_dv", cType); // facets 498 } 499 500 _indexFacetableFields(content, document); 501 } 502 catch (AmetysRepositoryException e) 503 { 504 getLogger().error("Failed to index content referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 505 } 506 } 507 else if (zoneItem.getType() == ZoneType.SERVICE) 508 { 509 try 510 { 511 String serviceId = zoneItem.getServiceId(); 512 document.addField(SERVICE_IDS, serviceId); 513 514 Service service = _serviceExtensionPoint.getExtension(serviceId); 515 if (service == null) 516 { 517 getLogger().error("The service id '{}' does not exist. It is referenced in the page {}/{}/{} ({} in zoneitem {})", serviceId, page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId()); 518 } 519 else 520 { 521 service.index(zoneItem, document); 522 } 523 } 524 catch (AmetysRepositoryException e) 525 { 526 getLogger().error("Failed to index service referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 527 } 528 529 } 530 } 531 } 532 } 533 } 534 535 /** 536 * Index the facetable fields of a content into the page solr document 537 * @param content The content 538 * @param document The main page solr document. 539 */ 540 protected void _indexFacetableFields(Content content, SolrInputDocument document) 541 { 542 List<ModelItem> modelItems = new ArrayList<>(); 543 try 544 { 545 String[] allContentTypes = ArrayUtils.addAll(content.getTypes(), content.getMixinTypes()); 546 modelItems.addAll(_cTypesHelper.getModelItems(allContentTypes) 547 .stream() 548 .filter(modelItem -> !(modelItem instanceof Property) || modelItem instanceof ElementRefProperty) 549 .collect(Collectors.toList())); 550 } 551 catch (IllegalArgumentException e) 552 { 553 getLogger().error("indexContent > Error getting the model items of content " + content.getId(), e); 554 throw new RuntimeException("indexContent > Error getting the model items of content " + content.getId(), e); 555 } 556 557 for (ModelItem modelItem : modelItems) 558 { 559 DataContext context = DataContext.newInstance() 560 .withObjectId(content.getId()); 561 562 Optional.ofNullable(content.getLanguage()) 563 .map(Locale::new) 564 .ifPresent(context::withLocale); 565 566 _findAndIndexFacetableField(document, content, modelItem, context); 567 } 568 } 569 570 /** 571 * Index the facetable fields of a data holder into the page solr document 572 * @param pageDocument The Solr page document 573 * @param dataHolder the parent data holder 574 * @param modelItem the model item 575 * @param context the context of the data to index 576 */ 577 protected void _findAndIndexFacetableField(SolrInputDocument pageDocument, ModelAwareDataHolder dataHolder, ModelItem modelItem, DataContext context) 578 { 579 String dataName = modelItem.getName(); 580 if (dataHolder.hasValue(dataName)) 581 { 582 if (modelItem instanceof ElementDefinition elementDefinition) 583 { 584 DataContext newContext = context.cloneContext() 585 .addSegmentToDataPath(dataName); 586 587 Collection<String> values = _getValuesToIndex(dataHolder, elementDefinition, newContext); 588 for (String value : values) 589 { 590 pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + dataName + "_s_dv", value); 591 } 592 } 593 else if (modelItem instanceof RepeaterDefinition repeaterDefinition) 594 { 595 ModelAwareRepeater repeater = dataHolder.getRepeater(dataName); 596 for (ModelAwareRepeaterEntry entry : repeater.getEntries()) 597 { 598 DataContext newContext = context.cloneContext() 599 .addSegmentToDataPath(dataName + "[" + entry.getPosition() + "]"); 600 601 for (ModelItem child : repeaterDefinition.getModelItems()) 602 { 603 _findAndIndexFacetableField(pageDocument, entry, child, newContext); 604 } 605 } 606 } 607 else if (modelItem instanceof CompositeDefinition compositeDefinition) 608 { 609 ModelAwareComposite composite = dataHolder.getComposite(dataName); 610 DataContext newContext = context.cloneContext() 611 .addSegmentToDataPath(dataName); 612 613 for (ModelItem child : compositeDefinition.getModelItems()) 614 { 615 _findAndIndexFacetableField(pageDocument, composite, child, newContext); 616 } 617 } 618 } 619 } 620 621 /** 622 * Retrieves the values to index if the field is facetable, or an empty collection 623 * @param dataHolder the data holder 624 * @param elementDefinition the definition of the field 625 * @param context the context of the data to index 626 * @return the values to index if the field is facetable, or an empty collection 627 */ 628 protected Collection<String> _getValuesToIndex(ModelAwareDataHolder dataHolder, ElementDefinition elementDefinition, DataContext context) 629 { 630 String dataName = elementDefinition.getName(); 631 ElementType type = elementDefinition.getType(); 632 if (type instanceof IndexableElementType indexingElementType) 633 { 634 if (ModelItemTypeConstants.STRING_TYPE_ID.equals(type.getId()) && indexingElementType.isFacetable(context)) 635 { 636 String dataPath = dataName; 637 if (elementDefinition instanceof ElementRefProperty property) 638 { 639 dataPath = property.getPath(); 640 } 641 642 Object value = dataHolder.getValue(dataPath, true); 643 if (value instanceof String[] stringValues) 644 { 645 return Arrays.asList(stringValues); 646 } 647 else if (value instanceof String stringValue) 648 { 649 return List.of(stringValue); 650 } 651 } 652 else if (org.ametys.cms.data.type.ModelItemTypeConstants.CONTENT_ELEMENT_TYPE_ID.equals(type.getId())) 653 { 654 String dataPath = dataName; 655 if (elementDefinition instanceof ElementRefProperty property) 656 { 657 dataPath = property.getPath(); 658 } 659 660 Object value = dataHolder.getValue(dataPath, true); 661 if (value instanceof ContentValue[] contentValues) 662 { 663 return Arrays.stream(contentValues) 664 .map(ContentValue::getContentId) 665 .collect(Collectors.toList()); 666 } 667 else if (value instanceof ContentValue contentValue) 668 { 669 return List.of(contentValue.getContentId()); 670 } 671 } 672 } 673 674 return List.of(); 675 } 676 677 /** 678 * Computes the last modification date of a page. 679 * @param page the page. 680 * @return the last modification date or <code>null</code>. 681 */ 682 protected ZonedDateTime _getLastModificationDate(Page page) 683 { 684 return _getLastDate(page, Content::getLastModified); 685 } 686 /** 687 * Computes the first validation date of a page. 688 * @param page the page. 689 * @return the first validation date or <code>null</code>. 690 */ 691 protected ZonedDateTime _getFirstValidationDate(Page page) 692 { 693 return _getFirstDate(page, Content::getFirstValidationDate); 694 } 695 696 /** 697 * Computes the last validation date of a page. 698 * @param page the page. 699 * @return the last validation date or <code>null</code>. 700 */ 701 protected ZonedDateTime _getLastValidationDate(Page page) 702 { 703 return _getLastDate(page, Content::getLastValidationDate); 704 } 705 706 /** 707 * Computes the last major validation date of a page. 708 * @param page the page. 709 * @return the last major validation date or <code>null</code>. 710 */ 711 protected ZonedDateTime _getLastMajorValidationDate(Page page) 712 { 713 return _getLastDate(page, Content::getLastMajorValidationDate); 714 } 715 716 /** 717 * Computes a "last date" of a page, using the simple and naive following algorithm: 718 * <br>From all the dates from each of its contents, keep the greatest of them. 719 * @param page the page. 720 * @param dateRetriever The function to retrieve a Date from a Content of the Page 721 * @return the "last date" or <code>null</code>. 722 */ 723 protected ZonedDateTime _getLastDate(Page page, Function<Content, ZonedDateTime> dateRetriever) 724 { 725 ZonedDateTime last = null; 726 727 if (page.getType() == PageType.CONTAINER) 728 { 729 for (Zone zone : page.getZones()) 730 { 731 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 732 for (ZoneItem zoneItem : zoneItems) 733 { 734 switch (zoneItem.getType()) 735 { 736 case SERVICE: 737 // A service has no last date 738 break; 739 case CONTENT: 740 try 741 { 742 ZonedDateTime contentLast = dateRetriever.apply(zoneItem.getContent()); 743 744 if (contentLast != null && (last == null || contentLast.isAfter(last))) 745 { 746 // Keep the latest date 747 last = contentLast; 748 } 749 } 750 catch (AmetysRepositoryException e) 751 { 752 getLogger().error("Failed to index last date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 753 } 754 break; 755 default: 756 break; 757 } 758 } 759 } 760 } 761 762 return last; 763 } 764 765 /** 766 * Computes a "first date" of a page, using the simple and naive following algorithm: 767 * <br>From all the dates from each of its contents, keep the lowest of them. 768 * @param page the page. 769 * @param dateRetriever The function to retrieve a Date from a Content of the Page 770 * @return the "first date" or <code>null</code>. 771 */ 772 protected ZonedDateTime _getFirstDate(Page page, Function<Content, ZonedDateTime> dateRetriever) 773 { 774 ZonedDateTime first = null; 775 776 if (page.getType() == PageType.CONTAINER) 777 { 778 for (Zone zone : page.getZones()) 779 { 780 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 781 for (ZoneItem zoneItem : zoneItems) 782 { 783 switch (zoneItem.getType()) 784 { 785 case SERVICE: 786 // A service has no first date 787 break; 788 case CONTENT: 789 try 790 { 791 ZonedDateTime contentFirst = dateRetriever.apply(zoneItem.getContent()); 792 793 if (contentFirst != null && (first == null || contentFirst.isBefore(first))) 794 { 795 // Keep the lowest date 796 first = contentFirst; 797 } 798 } 799 catch (AmetysRepositoryException e) 800 { 801 getLogger().error("Failed to index first date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 802 } 803 break; 804 default: 805 break; 806 } 807 } 808 } 809 } 810 811 return first; 812 } 813 814 /** 815 * Populate the solr input document by adding fields to index. 816 * @param page the page to index. 817 * @param document the solr input document 818 * @throws Exception if something goes wrong when processing the indexation of the page 819 */ 820 protected void _populateAdditionalProperties(Page page, SolrInputDocument document) throws Exception 821 { 822 Collection<AdditionalPropertyIndexer> indexers = _additionalPropertiesIndexerEP.getIndexers("page"); 823 for (AdditionalPropertyIndexer indexer : indexers) 824 { 825 indexer.index(page, document); 826 } 827 } 828 829 /** 830 * Index page attachments as new entries in the index. 831 * @param collection the collection of attachments 832 * @param page the page whose attachments will be indexed 833 * @throws Exception if something goes wrong when indexing the attachments of the page 834 */ 835 public void indexPageAttachments(ResourceCollection collection, Page page) throws Exception 836 { 837 Request request = ContextHelper.getRequest(_context); 838 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 839 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 840 _indexPageAttachments(collection, page, solrClient); 841 } 842 843 private void _indexPageAttachments(ResourceCollection collection, Page page, SolrClient solrClient) throws Exception 844 { 845 if (collection == null) 846 { 847 return; 848 } 849 850 AmetysObjectIterable<AmetysObject> children = collection.getChildren(); 851 for (AmetysObject object : children) 852 { 853 if (object instanceof ResourceCollection) 854 { 855 _indexPageAttachments((ResourceCollection) object, page, solrClient); 856 } 857 else if (object instanceof Resource) 858 { 859 Resource resource = (Resource) object; 860 _indexPageAttachment(resource, page, solrClient); 861 } 862 } 863 } 864 865 /** 866 * Index a page attachment 867 * @param resource the page attachment as a {@link Resource} 868 * @param page the page whose attachment is going to be indexed 869 * @throws Exception if something goes wrong when processing the indexation of the page attachment 870 */ 871 public void indexPageAttachment(Resource resource, Page page) throws Exception 872 { 873 Request request = ContextHelper.getRequest(_context); 874 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 875 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 876 _indexPageAttachment(resource, page, solrClient); 877 } 878 879 private void _indexPageAttachment(Resource resource, Page page, SolrClient solrClient) throws Exception 880 { 881 SolrInputDocument document = new SolrInputDocument(); 882 883 // Prepare resource doc 884 _populatePageAttachmentDocument(resource, document, page); 885 886 // Indexation of the document 887 _indexResourceDocument(resource, document, solrClient); 888 } 889 890 private void _populatePageAttachmentDocument(Resource resource, SolrInputDocument document, Page page) throws Exception 891 { 892 String language = page.getSitemapName(); 893 894 _solrResourceIndexer.indexResource(resource, document, TYPE_PAGE_RESOURCE, language); 895 896 Site site = page.getSite(); 897 // site name - Store.YES, Index.NOT_ANALYZED 898 document.addField(SolrWebFieldNames.SITE_NAME, site.getName()); 899 900 // site type - Store.YES, Index.NOT_ANALYZED 901 document.addField(SolrWebFieldNames.SITE_TYPE, site.getType()); 902 903 // Added for Solr. 904 // Page site map name - Store.YES, Index.NOT_ANALYZED 905 document.addField(SITEMAP_NAME, page.getSitemapName()); 906 907 // Need the id of the page for unindexing attachment during the unindexing of the page 908 document.addField(ATTACHMENT_PAGE_ID, page.getId()); 909 } 910 911 /** 912 * Index a populated solr input document of type Page. 913 * @param page the page from which the input document is created 914 * @param document the input document to add to the solr index 915 * @param workspaceName The workspace name 916 * @param solrClient The solr client to use 917 * @throws SolrServerException if there is an error on the Solr server 918 * @throws IOException if there is a communication error with the server 919 */ 920 protected void _indexPageDocument(Page page, SolrInputDocument document, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException 921 { 922 // Retrieve appropriate solr client 923 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 924 925 // Add document 926 UpdateResponse solrResponse = solrClient.add(collectionName, document); 927 int status = solrResponse.getStatus(); 928 929 if (status != 0) 930 { 931 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + page.getId()); 932 } 933 934 getLogger().debug("Successful page indexing. Page identifier : {}", page.getId()); 935 } 936 937 /** 938 * Index a populated solr input document of type Resource. 939 * @param resource the resource from which the input document is created 940 * @param document the input document 941 * @param solrClient The solr client to use 942 * @throws SolrServerException if there is an error on the server 943 * @throws IOException if there is a communication error with the server 944 */ 945 protected void _indexResourceDocument(Resource resource, SolrInputDocument document, SolrClient solrClient) throws SolrServerException, IOException 946 { 947 // Retrieve appropriate solr client 948 Request request = ContextHelper.getRequest(_context); 949 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 950 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 951 952 // Add document 953 UpdateResponse solrResponse = solrClient.add(collectionName, document); 954 int status = solrResponse.getStatus(); 955 956 if (status != 0) 957 { 958 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Resource id : " + resource.getId()); 959 } 960 961 getLogger().debug("Successful resource indexing. Resource identifier : {}", resource.getId()); 962 } 963 964 /////////////////////////////////////////////////////////////////////////// 965 966 /** 967 * Un-index a page by its ID for all workspaces and commit 968 * @param pageId The page ID. 969 * @param unindexRecursively also unindex child pages if requested. 970 * @param unindexAttachments also unindex page attachments 971 * @throws Exception if an error occurs during index update. 972 */ 973 public void unindexPage(String pageId, boolean unindexRecursively, boolean unindexAttachments) throws Exception 974 { 975 unindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, unindexRecursively, unindexAttachments); 976 unindexPage(pageId, WebConstants.LIVE_WORKSPACE, unindexRecursively, unindexAttachments); 977 } 978 979 /** 980 * De-index a page (and optionally its children pages). 981 * @param pageId the page to be de-indexed. 982 * @param workspaceName The workspace where to work in 983 * @param unindexRecursively also unindex child pages if requested. 984 * @param unindexAttachments also unindex page attachments 985 * @throws Exception if an error occurs during index update. 986 */ 987 public void unindexPage(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws Exception 988 { 989 Request request = ContextHelper.getRequest(_context); 990 991 // Retrieve the current workspace. 992 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 993 // Retrieve the current site name. 994 String currentSiteName = (String) request.getAttribute("siteName"); 995 996 try 997 { 998 // Force the workspace. 999 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 1000 1001 getLogger().debug("Unindexing page: {}", pageId); 1002 1003 _unindexPageDocument(pageId, workspaceName, unindexRecursively, unindexAttachments); 1004 } 1005 catch (Exception e) 1006 { 1007 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 1008 getLogger().error(error, e); 1009 throw new IndexingException(error, e); 1010 } 1011 finally 1012 { 1013 // Restore the site name. 1014 request.setAttribute("siteName", currentSiteName); 1015 // Restore context 1016 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 1017 } 1018 } 1019 1020 /** 1021 * Deindex a document of type Page. Also deindex attachments of a page 1022 * @param pageId the id of the page to deindex 1023 * @param workspaceName The workspace name 1024 * @param unindexRecursively also unindex child pages if requested. 1025 * @param unindexAttachments also unindex page attachments 1026 * @throws SolrServerException if there is an error on the server 1027 * @throws IOException if there is a communication error with the server 1028 * @throws QuerySyntaxException if the uri query can't be built because of a syntax error. 1029 */ 1030 protected void _unindexPageDocument(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws SolrServerException, IOException, QuerySyntaxException 1031 { 1032 // Retrieve appropriate solr client 1033 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 1034 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 1035 1036 getLogger().info("Unindexing page {} in workspace '{}'", pageId, workspaceName); 1037 1038 Query pages = new AndQuery(new DocumentTypeQuery(TYPE_PAGE), new PageQuery(pageId, unindexRecursively)); 1039 Query query; 1040 if (unindexRecursively && unindexAttachments) 1041 { 1042 // {!ametys join=pageId q=page-ancestorIds:"page://xxxx"} 1043 Query joinQuery = new JoinQuery(() -> PAGE_ANCESTOR_IDS + ":\"" + pageId + "\"", ATTACHMENT_PAGE_ID); 1044 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new OrQuery(new PageAttachmentQuery(pageId), joinQuery)); 1045 query = new OrQuery(attachments, pages); 1046 } 1047 else if (unindexAttachments) 1048 { 1049 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new PageAttachmentQuery(pageId)); 1050 query = new OrQuery(attachments, pages); 1051 } 1052 else 1053 { 1054 query = pages; 1055 } 1056 1057 // Delete by query 1058 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query.build()); 1059 int status = solrResponse.getStatus(); 1060 1061 if (status != 0) 1062 { 1063 throw new IOException("Ametys Page de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + pageId); 1064 } 1065 1066 getLogger().debug("Successful page de-indexing{}. Page identifier : {}", unindexRecursively ? " with its children" : "", pageId); 1067 } 1068 1069 /////////////////////////////////////////////////////////////////////////// 1070 1071 /** 1072 * Reindex a page by its ID for all workspaces and commit 1073 * @param pageId The page ID. 1074 * @param reindexRecursively also reindex child pages if requested. 1075 * @param reindexAttachments also reindex page attachments 1076 * @throws Exception if an error occurs during index update. 1077 */ 1078 public void reindexPage(String pageId, boolean reindexRecursively, boolean reindexAttachments) throws Exception 1079 { 1080 reindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, reindexRecursively, reindexAttachments); 1081 reindexPage(pageId, WebConstants.LIVE_WORKSPACE, reindexRecursively, reindexAttachments); 1082 } 1083 1084 1085 /** 1086 * Reindex a page by its ID. 1087 * @param pageId The page ID. 1088 * @param workspaceName The workspace where to work in 1089 * @param reindexRecursively also reindex child pages if requested. 1090 * @param reindexAttachments also reindex page attachments 1091 * @throws IndexingException if an error occurs during index update. 1092 */ 1093 public void reindexPage(String pageId, String workspaceName, boolean reindexRecursively, boolean reindexAttachments) throws IndexingException 1094 { 1095 Request request = ContextHelper.getRequest(_context); 1096 1097 // Retrieve the current workspace. 1098 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 1099 // Retrieve the current site name. 1100 String currentSiteName = (String) request.getAttribute("siteName"); 1101 1102 try 1103 { 1104 // Force the workspace. 1105 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 1106 1107 getLogger().debug("Reindexing page: {}", pageId); 1108 1109 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 1110 { 1111 Page page = _ametysObjectResolver.resolveById(pageId); 1112 _unindexPageDocument(pageId, workspaceName, reindexRecursively, reindexAttachments); 1113 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 1114 _indexPage(page, workspaceName, reindexRecursively, reindexAttachments, solrClient); 1115 } 1116 } 1117 catch (AmetysRepositoryException | QuerySyntaxException | SolrServerException | IOException e) 1118 { 1119 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 1120 getLogger().error(error, e); 1121 throw new IndexingException(error, e); 1122 } 1123 finally 1124 { 1125 // Restore the site name. 1126 request.setAttribute("siteName", currentSiteName); 1127 // Restore context 1128 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 1129 } 1130 } 1131}