001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019import java.time.ZoneOffset; 020import java.time.ZonedDateTime; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.HashSet; 025import java.util.List; 026import java.util.Locale; 027import java.util.Map; 028import java.util.Optional; 029import java.util.Set; 030import java.util.function.Function; 031import java.util.stream.Collectors; 032 033import org.apache.avalon.framework.component.Component; 034import org.apache.avalon.framework.context.Context; 035import org.apache.avalon.framework.context.ContextException; 036import org.apache.avalon.framework.context.Contextualizable; 037import org.apache.avalon.framework.service.ServiceException; 038import org.apache.avalon.framework.service.ServiceManager; 039import org.apache.avalon.framework.service.Serviceable; 040import org.apache.cocoon.components.ContextHelper; 041import org.apache.cocoon.environment.Request; 042import org.apache.commons.lang3.ArrayUtils; 043import org.apache.solr.client.solrj.SolrClient; 044import org.apache.solr.client.solrj.SolrServerException; 045import org.apache.solr.client.solrj.response.UpdateResponse; 046import org.apache.solr.common.SolrInputDocument; 047import org.apache.solr.common.SolrInputField; 048 049import org.ametys.cms.content.indexing.solr.SolrFieldNames; 050import org.ametys.cms.content.indexing.solr.SolrIndexer; 051import org.ametys.cms.content.indexing.solr.SolrResourceIndexer; 052import org.ametys.cms.contenttype.ContentTypesHelper; 053import org.ametys.cms.data.ContentValue; 054import org.ametys.cms.data.type.indexing.IndexableDataContext; 055import org.ametys.cms.data.type.indexing.IndexableElementType; 056import org.ametys.cms.data.type.indexing.IndexableElementTypeHelper; 057import org.ametys.cms.indexing.IndexingException; 058import org.ametys.cms.indexing.solr.AdditionalPropertyIndexer; 059import org.ametys.cms.indexing.solr.AdditionalPropertyIndexerExtensionPoint; 060import org.ametys.cms.repository.Content; 061import org.ametys.cms.search.query.AndQuery; 062import org.ametys.cms.search.query.DocumentTypeQuery; 063import org.ametys.cms.search.query.JoinQuery; 064import org.ametys.cms.search.query.OrQuery; 065import org.ametys.cms.search.query.Query; 066import org.ametys.cms.search.query.QuerySyntaxException; 067import org.ametys.cms.search.solr.SolrClientProvider; 068import org.ametys.cms.search.solr.field.FirstValidationSearchField; 069import org.ametys.cms.search.solr.field.LastMajorValidationSearchField; 070import org.ametys.cms.search.solr.field.LastModifiedSearchField; 071import org.ametys.cms.search.solr.field.LastValidationSearchField; 072import org.ametys.cms.tag.Tag; 073import org.ametys.cms.tag.TagHelper; 074import org.ametys.cms.tag.TagProviderExtensionPoint; 075import org.ametys.core.util.DateUtils; 076import org.ametys.plugins.explorer.resources.Resource; 077import org.ametys.plugins.explorer.resources.ResourceCollection; 078import org.ametys.plugins.repository.AmetysObject; 079import org.ametys.plugins.repository.AmetysObjectIterable; 080import org.ametys.plugins.repository.AmetysObjectResolver; 081import org.ametys.plugins.repository.AmetysRepositoryException; 082import org.ametys.plugins.repository.RepositoryConstants; 083import org.ametys.plugins.repository.data.holder.ModelAwareDataHolder; 084import org.ametys.plugins.repository.data.holder.group.ModelAwareComposite; 085import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeater; 086import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeaterEntry; 087import org.ametys.plugins.repository.model.CompositeDefinition; 088import org.ametys.plugins.repository.model.RepeaterDefinition; 089import org.ametys.plugins.repository.model.RepositoryDataContext; 090import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector; 091import org.ametys.runtime.model.ElementDefinition; 092import org.ametys.runtime.model.ModelItem; 093import org.ametys.runtime.model.type.DataContext; 094import org.ametys.runtime.model.type.ElementType; 095import org.ametys.runtime.model.type.ModelItemTypeConstants; 096import org.ametys.runtime.plugin.component.AbstractLogEnabled; 097import org.ametys.web.WebConstants; 098import org.ametys.web.indexing.solr.page.attachment.PageVisibleAttachmentIndexerExtensionPoint; 099import org.ametys.web.repository.page.Page; 100import org.ametys.web.repository.page.Page.PageType; 101import org.ametys.web.repository.page.Zone; 102import org.ametys.web.repository.page.ZoneItem; 103import org.ametys.web.repository.page.ZoneItem.ZoneType; 104import org.ametys.web.repository.site.Site; 105import org.ametys.web.repository.sitemap.Sitemap; 106import org.ametys.web.search.query.PageAttachmentQuery; 107import org.ametys.web.search.query.PageQuery; 108import org.ametys.web.service.Service; 109import org.ametys.web.service.ServiceExtensionPoint; 110 111/** 112 * Component responsible for indexing a page with all its contents. 113 */ 114public class SolrPageIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrWebFieldNames, Contextualizable 115{ 116 /** The avalon role. */ 117 public static final String ROLE = SolrPageIndexer.class.getName(); 118 119 /** The Solr client provider */ 120 protected SolrClientProvider _solrClientProvider; 121 /** The Solr indexer */ 122 protected SolrIndexer _solrIndexer; 123 /** Solr Ametys resources indexer */ 124 protected SolrResourceIndexer _solrResourceIndexer; 125 /** The extension point for PageVisibleAttachmentIndexers */ 126 protected PageVisibleAttachmentIndexerExtensionPoint _pageVisibleAttachmentIndexerEP; 127 /** The additional property indexer extension point. */ 128 protected AdditionalPropertyIndexerExtensionPoint _additionalPropertiesIndexerEP; 129 /** The tag provider extension point. */ 130 protected TagProviderExtensionPoint _tagProviderEP; 131 132 /** The service extension point. */ 133 protected ServiceExtensionPoint _serviceExtensionPoint; 134 /** The Ametys object resolver*/ 135 protected AmetysObjectResolver _ametysObjectResolver; 136 /** The avalon context */ 137 protected Context _context; 138 139 private ContentTypesHelper _cTypesHelper; 140 141 @Override 142 public void service(ServiceManager manager) throws ServiceException 143 { 144 _ametysObjectResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 145 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 146 _solrResourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE); 147 _pageVisibleAttachmentIndexerEP = (PageVisibleAttachmentIndexerExtensionPoint) manager.lookup(PageVisibleAttachmentIndexerExtensionPoint.ROLE); 148 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 149 _serviceExtensionPoint = (ServiceExtensionPoint) manager.lookup(ServiceExtensionPoint.ROLE); 150 _additionalPropertiesIndexerEP = (AdditionalPropertyIndexerExtensionPoint) manager.lookup(AdditionalPropertyIndexerExtensionPoint.ROLE); 151 _tagProviderEP = (TagProviderExtensionPoint) manager.lookup(TagProviderExtensionPoint.ROLE); 152 _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 153 } 154 155 public void contextualize(Context context) throws ContextException 156 { 157 _context = context; 158 } 159 160 /** 161 * Index a page and eventually its children, recursively, in all workspaces and commit<br> 162 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 163 * @param pageId the page to be indexed. 164 * @param indexRecursively to also process children pages. 165 * @param indexAttachments to index page attachments 166 * @throws Exception if an error occurs during indexation. 167 */ 168 public void indexPage(String pageId, boolean indexRecursively, boolean indexAttachments) throws Exception 169 { 170 indexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, indexRecursively, indexAttachments); 171 indexPage(pageId, WebConstants.LIVE_WORKSPACE, indexRecursively, indexAttachments); 172 } 173 174 /** 175 * Index a page and eventually its children, recursively.<br> 176 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 177 * @param pageId the page to be indexed. 178 * @param workspaceName the workspace where to index 179 * @param indexRecursively to also process children pages. 180 * @param indexAttachments to index page attachments 181 * @throws IndexingException if an error occurs during indexation. 182 */ 183 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments) throws IndexingException 184 { 185 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true); 186 indexPage(pageId, workspaceName, indexRecursively, indexAttachments, solrClient); 187 } 188 189 /** 190 * Index a page and eventually its children, recursively.<br> 191 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 192 * @param pageId the page to be indexed. 193 * @param workspaceName the workspace where to index 194 * @param indexRecursively to also process children pages. 195 * @param indexAttachments to index page attachments 196 * @param solrClient The solr client to use 197 * @throws IndexingException if an error occurs during indexation. 198 */ 199 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 200 { 201 Request request = ContextHelper.getRequest(_context); 202 203 // Retrieve the current workspace. 204 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 205 // Retrieve the current site name. 206 String currentSiteName = (String) request.getAttribute("siteName"); 207 208 try 209 { 210 // Force the workspace. 211 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 212 213 getLogger().debug("Indexing page: {}", pageId); 214 215 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 216 { 217 Page page = _ametysObjectResolver.resolveById(pageId); 218 _indexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient); 219 } 220 } 221 catch (AmetysRepositoryException e) 222 { 223 String error = String.format("Failed to index page %s in workspace %s", pageId, workspaceName); 224 getLogger().error(error, e); 225 throw new IndexingException(error, e); 226 } 227 finally 228 { 229 // Restore the site name. 230 request.setAttribute("siteName", currentSiteName); 231 // Restore context 232 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 233 } 234 } 235 236 private void _indexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 237 { 238 getLogger().info("Indexing page: {} in workspace '{}'", page, workspaceName); 239 240 SolrInputDocument document = new SolrInputDocument(); 241 242 try 243 { 244 // Prepare the solr input document by adding fields. 245 _populatePageDocument(page, document); 246 247 // Set the additional properties in the document. 248 _populateAdditionalProperties(page, document); 249 250 // Indexation of ACL initial values 251 _solrIndexer.indexAclInitValues(page, document); 252 253 // Indexation of the document 254 _indexPageDocument(page, document, workspaceName, solrClient); 255 256 // Index page attachments documents 257 if (indexAttachments) 258 { 259 _indexPageAttachments(page.getRootAttachments(), page, solrClient); 260 } 261 } 262 catch (Exception e) 263 { 264 String error = String.format("Failed to index page %s in workspace %s", page.getId(), workspaceName); 265 getLogger().error(error, e); 266 throw new IndexingException(error, e); 267 } 268 269 if (indexRecursively) 270 { 271 AmetysObjectIterable<? extends Page> children = page.getChildrenPages(); 272 for (Page child : children) 273 { 274 // FIXME index child pages if (and only if) not indexed... see original source. 275// indexPage(child, false, indexRecursively); 276// indexPage(child, false); 277 _indexPage(child, workspaceName, indexRecursively, indexAttachments, solrClient); 278 } 279 } 280 } 281 282 /** 283 * Populate the solr input document by adding fields to index. 284 * @param page the page to index. 285 * @param document the solr input document 286 * @throws Exception if something goes wrong when processing the indexation of the page 287 */ 288 protected void _populatePageDocument(Page page, SolrInputDocument document) throws Exception 289 { 290 Sitemap sitemap = page.getSitemap(); 291 String sitemapName = sitemap.getName(); 292 Site site = page.getSite(); 293 String siteName = site.getName(); 294 String pageId = page.getId(); 295 String pageTitle = page.getTitle(); 296 String pageLongTitle = page.getLongTitle(); 297 String language = sitemapName; 298 299 // Page id and type 300 document.addField(SolrFieldNames.ID, pageId); 301 document.addField(SolrFieldNames.DOCUMENT_TYPE, SolrWebFieldNames.TYPE_PAGE); 302 303 // Fulltext 304 IndexableDataContext context = IndexableDataContext.newInstance() 305 .withLocale(new Locale(language)); 306 IndexableElementTypeHelper.indexFulltextValue(document, pageTitle, context); 307 if (!pageTitle.equals(pageLongTitle)) 308 { 309 IndexableElementTypeHelper.indexFulltextValue(document, pageLongTitle, context); 310 } 311 312 // Page title 313 _indexStringFields(document, pageId, PAGE_TITLE, pageTitle, language); 314 // Page long title 315 _indexStringFields(document, pageId, PAGE_LONG_TITLE, pageLongTitle, language); 316 // Title for sorting 317 document.addField(TITLE_SORT, pageTitle); 318 319 document.addField(TEMPLATE, page.getTemplate()); 320 document.addField(PAGE_TYPE, page.getType().name()); 321 document.addField(PAGE_DEPTH, page.getDepth()); 322 323 // Contents (page title shoud be indexed before because the main content can override it). 324 _populatePageContentsDocument(page, document); 325 326 // Parent of the page 327 AmetysObject parent = page.getParent(); 328 if (parent != null) 329 { 330 document.addField(PAGE_PARENT_ID, parent.getId()); 331 } 332 333 // Ancestors of the page 334 List<String> ancestorIds = new ArrayList<>(); 335 while (parent instanceof Page) 336 { 337 ancestorIds.add(parent.getId()); 338 parent = parent.getParent(); 339 } 340 document.addField(PAGE_ANCESTOR_IDS, ancestorIds); 341 342 document.addField(SITE_NAME, siteName); 343 document.addField(SITEMAP_NAME, sitemapName); 344 document.addField(SITE_TYPE, site.getType()); 345 346 // Page tags (strict and tags including ancestor pages). 347 Set<String> tags = page.getTags() 348 .stream() 349 .filter(tagName -> _tagProviderEP.hasTag(tagName, Map.of("siteName", page.getSiteName()))) 350 .collect(Collectors.toSet()); 351 document.addField(SolrFieldNames.TAGS, tags); 352 document.addField(SolrFieldNames.ALL_TAGS, _getTagsWithAncestors(page)); 353 354 _populateDatesOfPage(page, document); 355 356 // Attachments 357 _solrResourceIndexer.indexResourceCollection(page.getRootAttachments(), document, language); 358 Optional.ofNullable(page.getRootAttachments()) 359 .map(AmetysObject::getId) 360 .ifPresent(id -> document.addField(PAGE_OUTGOING_REFEERENCES_RESOURCE_IDS, id)); 361 _indexVisibleAttachments(page, document); 362 } 363 364 private void _indexVisibleAttachments(Page page, SolrInputDocument document) 365 { 366 Collection<String> values = _pageVisibleAttachmentIndexerEP.getExtensionsIds() 367 .stream() 368 .map(_pageVisibleAttachmentIndexerEP::getExtension) 369 .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(page)) 370 .flatMap(Collection::stream) 371 .collect(Collectors.toList()); 372 document.addField(PAGE_VISIBLE_ATTACHMENT_RESOURCE_IDS, values); 373 } 374 375 /** 376 * Populate the solr input document with dates from the page 377 * @param page The page 378 * @param document The Solr document 379 */ 380 protected void _populateDatesOfPage(Page page, SolrInputDocument document) 381 { 382 // Page last modification date 383 ZonedDateTime lastModified = _getLastModificationDate(page); 384 if (lastModified != null) 385 { 386 String lastModifiedStr = DateUtils.zonedDateTimeToString(lastModified, ZoneOffset.UTC); 387 // For 'new' search service 388 document.addField(LastModifiedSearchField.NAME, lastModifiedStr); 389 // For 'old' search service 390 document.addField(LAST_MODIFIED + "_dt", lastModifiedStr); 391 } 392 393 // Page last validation date 394 ZonedDateTime lastValidation = _getLastValidationDate(page); 395 if (lastValidation != null) 396 { 397 String lastValidationStr = DateUtils.zonedDateTimeToString(lastValidation, ZoneOffset.UTC); 398 // For 'new' search service 399 document.addField(LastValidationSearchField.NAME, lastValidationStr); 400 } 401 402 // Page first validation date 403 ZonedDateTime firstValidation = _getFirstValidationDate(page); 404 if (firstValidation != null) 405 { 406 String firstValidationStr = DateUtils.zonedDateTimeToString(firstValidation, ZoneOffset.UTC); 407 // For 'new' search service 408 document.addField(FirstValidationSearchField.NAME, firstValidationStr); 409 } 410 411 // Page last major validation date 412 ZonedDateTime lastMajorValidation = _getLastMajorValidationDate(page); 413 if (lastMajorValidation != null) 414 { 415 String lastMajorValidationStr = DateUtils.zonedDateTimeToString(lastMajorValidation, ZoneOffset.UTC); 416 // For 'new' search service 417 document.addField(LastMajorValidationSearchField.NAME, lastMajorValidationStr); 418 } 419 420 // date for sorting 421 SolrInputField dateField = document.getField(DATE_FOR_SORTING); 422 if (dateField == null) 423 { 424 Collection<Object> oDateValues = document.getFieldValues(CONTENT_INTERESTING_DATES); 425 if (oDateValues != null && !oDateValues.isEmpty()) 426 { 427 document.setField(DATE_FOR_SORTING, oDateValues.iterator().next()); 428 } 429 } 430 } 431 432 private void _indexStringFields(SolrInputDocument document, String documentId, String fieldName, String fieldValue, String language) 433 { 434 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(fieldValue, getLogger(), documentId, fieldName); 435 436 document.addField(fieldName, possiblyTruncatedValue); 437 document.addField(fieldName + "_txt_" + language, fieldValue); 438 document.addField(fieldName + "_txt_stemmed_" + language, fieldValue); 439 document.addField(fieldName + "_txt_ws_" + language, fieldValue); 440 441 document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase()); 442 document.addField(fieldName + "_s_ws", fieldValue.toLowerCase()); 443 document.addField(fieldName + "_txt", fieldValue); 444 } 445 /** 446 * Get all the page tags with their ancestors. 447 * @param page The page. 448 * @return All the page tags with their ancestors. 449 */ 450 protected Set<String> _getTagsWithAncestors(Page page) 451 { 452 Set<String> allTags = new HashSet<>(page.getTags()); 453 454 Map<String, Object> tagParams = Map.of("siteName", page.getSiteName()); 455 456 for (String tagName : page.getTags()) 457 { 458 allTags.add(tagName); 459 460 // Get the ancestor tags 461 Tag tag = _tagProviderEP.getTag(tagName, tagParams); 462 for (Tag ancestor : TagHelper.getAncestors(tag, false)) 463 { 464 allTags.add(ancestor.getName()); 465 } 466 } 467 468 return allTags; 469 } 470 471 /** 472 * Index the content of the page.<p> 473 * @param page the page to index. 474 * @param document the document to populate. 475 * @throws Exception if an error occurs. 476 */ 477 protected void _populatePageContentsDocument(Page page, SolrInputDocument document) throws Exception 478 { 479 if (page.getType() == PageType.CONTAINER) 480 { 481 for (Zone zone : page.getZones()) 482 { 483 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 484 for (ZoneItem zoneItem : zoneItems) 485 { 486 if (zoneItem.getType() == ZoneType.CONTENT) 487 { 488 try 489 { 490 Content content = zoneItem.getContent(); 491 document.addField(CONTENT_IDS, content.getId()); 492 493 for (String cType : content.getTypes()) 494 { 495 document.addField(PAGE_CONTENT_TYPES, cType); 496 document.addField(PAGE_CONTENT_TYPES + "_s_dv", cType); // facets 497 } 498 499 _indexFacetableFields(content, document); 500 } 501 catch (AmetysRepositoryException e) 502 { 503 getLogger().error("Failed to index content referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 504 } 505 } 506 else if (zoneItem.getType() == ZoneType.SERVICE) 507 { 508 try 509 { 510 String serviceId = zoneItem.getServiceId(); 511 document.addField(SERVICE_IDS, serviceId); 512 513 Service service = _serviceExtensionPoint.getExtension(serviceId); 514 if (service == null) 515 { 516 getLogger().error("The service id '{}' does not exist. It is referenced in the page {}/{}/{} ({} in zoneitem {})", serviceId, page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId()); 517 } 518 else 519 { 520 service.index(zoneItem, document); 521 } 522 } 523 catch (AmetysRepositoryException e) 524 { 525 getLogger().error("Failed to index service referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 526 } 527 528 } 529 } 530 } 531 } 532 } 533 534 /** 535 * Index the facetable fields of a content into the page solr document 536 * @param content The content 537 * @param document The main page solr document. 538 */ 539 protected void _indexFacetableFields(Content content, SolrInputDocument document) 540 { 541 try 542 { 543 String[] allContentTypes = ArrayUtils.addAll(content.getTypes(), content.getMixinTypes()); 544 for (ModelItem modelItem : _cTypesHelper.getModelItems(allContentTypes)) 545 { 546 DataContext context = RepositoryDataContext.newInstance() 547 .withObject(content); 548 549 Optional.ofNullable(content.getLanguage()) 550 .map(Locale::new) 551 .ifPresent(context::withLocale); 552 553 _findAndIndexFacetableField(document, content, modelItem, context); 554 } 555 } 556 catch (IllegalArgumentException e) 557 { 558 getLogger().error("indexContent > Error getting the model items of content " + content.getId(), e); 559 throw new RuntimeException("indexContent > Error getting the model items of content " + content.getId(), e); 560 } 561 } 562 563 /** 564 * Index the facetable fields of a data holder into the page solr document 565 * @param pageDocument The Solr page document 566 * @param dataHolder the parent data holder 567 * @param modelItem the model item 568 * @param context the context of the data to index 569 */ 570 protected void _findAndIndexFacetableField(SolrInputDocument pageDocument, ModelAwareDataHolder dataHolder, ModelItem modelItem, DataContext context) 571 { 572 String dataName = modelItem.getName(); 573 if (dataHolder.hasValue(dataName)) 574 { 575 if (modelItem instanceof ElementDefinition elementDefinition) 576 { 577 DataContext newContext = context.cloneContext() 578 .addSegmentToDataPath(dataName); 579 580 Collection<String> values = _getValuesToIndex(dataHolder, elementDefinition, newContext); 581 for (String value : values) 582 { 583 pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + dataName + "_s_dv", value); 584 } 585 } 586 else if (modelItem instanceof RepeaterDefinition repeaterDefinition) 587 { 588 ModelAwareRepeater repeater = dataHolder.getRepeater(dataName); 589 for (ModelAwareRepeaterEntry entry : repeater.getEntries()) 590 { 591 DataContext newContext = context.cloneContext() 592 .addSegmentToDataPath(dataName + "[" + entry.getPosition() + "]"); 593 594 for (ModelItem child : repeaterDefinition.getModelItems()) 595 { 596 _findAndIndexFacetableField(pageDocument, entry, child, newContext); 597 } 598 } 599 } 600 else if (modelItem instanceof CompositeDefinition compositeDefinition) 601 { 602 ModelAwareComposite composite = dataHolder.getComposite(dataName); 603 DataContext newContext = context.cloneContext() 604 .addSegmentToDataPath(dataName); 605 606 for (ModelItem child : compositeDefinition.getModelItems()) 607 { 608 _findAndIndexFacetableField(pageDocument, composite, child, newContext); 609 } 610 } 611 } 612 } 613 614 /** 615 * Retrieves the values to index if the field is facetable, or an empty collection 616 * @param dataHolder the data holder 617 * @param elementDefinition the definition of the field 618 * @param context the context of the data to index 619 * @return the values to index if the field is facetable, or an empty collection 620 */ 621 protected Collection<String> _getValuesToIndex(ModelAwareDataHolder dataHolder, ElementDefinition elementDefinition, DataContext context) 622 { 623 String dataName = elementDefinition.getName(); 624 ElementType type = elementDefinition.getType(); 625 if (type instanceof IndexableElementType indexingElementType) 626 { 627 if (ModelItemTypeConstants.STRING_TYPE_ID.equals(type.getId()) && indexingElementType.isFacetable(context)) 628 { 629 Object value = dataHolder.getValue(dataName, true); 630 if (value instanceof String[] stringValues) 631 { 632 return Arrays.asList(stringValues); 633 } 634 else if (value instanceof String stringValue) 635 { 636 return List.of(stringValue); 637 } 638 } 639 else if (org.ametys.cms.data.type.ModelItemTypeConstants.CONTENT_ELEMENT_TYPE_ID.equals(type.getId())) 640 { 641 Object value = dataHolder.getValue(dataName, true); 642 if (value instanceof ContentValue[] contentValues) 643 { 644 return Arrays.stream(contentValues) 645 .map(ContentValue::getContentId) 646 .collect(Collectors.toList()); 647 } 648 else if (value instanceof ContentValue contentValue) 649 { 650 return List.of(contentValue.getContentId()); 651 } 652 } 653 } 654 655 return List.of(); 656 } 657 658 /** 659 * Computes the last modification date of a page. 660 * @param page the page. 661 * @return the last modification date or <code>null</code>. 662 */ 663 protected ZonedDateTime _getLastModificationDate(Page page) 664 { 665 return _getLastDate(page, Content::getLastModified); 666 } 667 /** 668 * Computes the first validation date of a page. 669 * @param page the page. 670 * @return the first validation date or <code>null</code>. 671 */ 672 protected ZonedDateTime _getFirstValidationDate(Page page) 673 { 674 return _getFirstDate(page, Content::getFirstValidationDate); 675 } 676 677 /** 678 * Computes the last validation date of a page. 679 * @param page the page. 680 * @return the last validation date or <code>null</code>. 681 */ 682 protected ZonedDateTime _getLastValidationDate(Page page) 683 { 684 return _getLastDate(page, Content::getLastValidationDate); 685 } 686 687 /** 688 * Computes the last major validation date of a page. 689 * @param page the page. 690 * @return the last major validation date or <code>null</code>. 691 */ 692 protected ZonedDateTime _getLastMajorValidationDate(Page page) 693 { 694 return _getLastDate(page, Content::getLastMajorValidationDate); 695 } 696 697 /** 698 * Computes a "last date" of a page, using the simple and naive following algorithm: 699 * <br>From all the dates from each of its contents, keep the greatest of them. 700 * @param page the page. 701 * @param dateRetriever The function to retrieve a Date from a Content of the Page 702 * @return the "last date" or <code>null</code>. 703 */ 704 protected ZonedDateTime _getLastDate(Page page, Function<Content, ZonedDateTime> dateRetriever) 705 { 706 ZonedDateTime last = null; 707 708 if (page.getType() == PageType.CONTAINER) 709 { 710 for (Zone zone : page.getZones()) 711 { 712 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 713 for (ZoneItem zoneItem : zoneItems) 714 { 715 switch (zoneItem.getType()) 716 { 717 case SERVICE: 718 // A service has no last date 719 break; 720 case CONTENT: 721 try 722 { 723 ZonedDateTime contentLast = dateRetriever.apply(zoneItem.getContent()); 724 725 if (contentLast != null && (last == null || contentLast.isAfter(last))) 726 { 727 // Keep the latest date 728 last = contentLast; 729 } 730 } 731 catch (AmetysRepositoryException e) 732 { 733 getLogger().error("Failed to index last date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 734 } 735 break; 736 default: 737 break; 738 } 739 } 740 } 741 } 742 743 return last; 744 } 745 746 /** 747 * Computes a "first date" of a page, using the simple and naive following algorithm: 748 * <br>From all the dates from each of its contents, keep the lowest of them. 749 * @param page the page. 750 * @param dateRetriever The function to retrieve a Date from a Content of the Page 751 * @return the "first date" or <code>null</code>. 752 */ 753 protected ZonedDateTime _getFirstDate(Page page, Function<Content, ZonedDateTime> dateRetriever) 754 { 755 ZonedDateTime first = null; 756 757 if (page.getType() == PageType.CONTAINER) 758 { 759 for (Zone zone : page.getZones()) 760 { 761 AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems(); 762 for (ZoneItem zoneItem : zoneItems) 763 { 764 switch (zoneItem.getType()) 765 { 766 case SERVICE: 767 // A service has no first date 768 break; 769 case CONTENT: 770 try 771 { 772 ZonedDateTime contentFirst = dateRetriever.apply(zoneItem.getContent()); 773 774 if (contentFirst != null && (first == null || contentFirst.isBefore(first))) 775 { 776 // Keep the lowest date 777 first = contentFirst; 778 } 779 } 780 catch (AmetysRepositoryException e) 781 { 782 getLogger().error("Failed to index first date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 783 } 784 break; 785 default: 786 break; 787 } 788 } 789 } 790 } 791 792 return first; 793 } 794 795 /** 796 * Populate the solr input document by adding fields to index. 797 * @param page the page to index. 798 * @param document the solr input document 799 * @throws Exception if something goes wrong when processing the indexation of the page 800 */ 801 protected void _populateAdditionalProperties(Page page, SolrInputDocument document) throws Exception 802 { 803 Collection<AdditionalPropertyIndexer> indexers = _additionalPropertiesIndexerEP.getIndexers("page"); 804 for (AdditionalPropertyIndexer indexer : indexers) 805 { 806 indexer.index(page, document); 807 } 808 } 809 810 /** 811 * Index page attachments as new entries in the index. 812 * @param collection the collection of attachments 813 * @param page the page whose attachments will be indexed 814 * @throws Exception if something goes wrong when indexing the attachments of the page 815 */ 816 public void indexPageAttachments(ResourceCollection collection, Page page) throws Exception 817 { 818 Request request = ContextHelper.getRequest(_context); 819 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 820 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 821 _indexPageAttachments(collection, page, solrClient); 822 } 823 824 private void _indexPageAttachments(ResourceCollection collection, Page page, SolrClient solrClient) throws Exception 825 { 826 if (collection == null) 827 { 828 return; 829 } 830 831 AmetysObjectIterable<AmetysObject> children = collection.getChildren(); 832 for (AmetysObject object : children) 833 { 834 if (object instanceof ResourceCollection) 835 { 836 _indexPageAttachments((ResourceCollection) object, page, solrClient); 837 } 838 else if (object instanceof Resource) 839 { 840 Resource resource = (Resource) object; 841 _indexPageAttachment(resource, page, solrClient); 842 } 843 } 844 } 845 846 /** 847 * Index a page attachment 848 * @param resource the page attachment as a {@link Resource} 849 * @param page the page whose attachment is going to be indexed 850 * @throws Exception if something goes wrong when processing the indexation of the page attachment 851 */ 852 public void indexPageAttachment(Resource resource, Page page) throws Exception 853 { 854 Request request = ContextHelper.getRequest(_context); 855 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 856 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 857 _indexPageAttachment(resource, page, solrClient); 858 } 859 860 private void _indexPageAttachment(Resource resource, Page page, SolrClient solrClient) throws Exception 861 { 862 SolrInputDocument document = new SolrInputDocument(); 863 864 // Prepare resource doc 865 _populatePageAttachmentDocument(resource, document, page); 866 867 // Indexation of the document 868 _indexResourceDocument(resource, document, solrClient); 869 } 870 871 private void _populatePageAttachmentDocument(Resource resource, SolrInputDocument document, Page page) throws Exception 872 { 873 String language = page.getSitemapName(); 874 875 _solrResourceIndexer.indexResource(resource, document, TYPE_PAGE_RESOURCE, language); 876 877 Site site = page.getSite(); 878 // site name - Store.YES, Index.NOT_ANALYZED 879 document.addField(SolrWebFieldNames.SITE_NAME, site.getName()); 880 881 // site type - Store.YES, Index.NOT_ANALYZED 882 document.addField(SolrWebFieldNames.SITE_TYPE, site.getType()); 883 884 // Added for Solr. 885 // Page site map name - Store.YES, Index.NOT_ANALYZED 886 document.addField(SITEMAP_NAME, page.getSitemapName()); 887 888 // Need the id of the page for unindexing attachment during the unindexing of the page 889 document.addField(ATTACHMENT_PAGE_ID, page.getId()); 890 } 891 892 /** 893 * Index a populated solr input document of type Page. 894 * @param page the page from which the input document is created 895 * @param document the input document to add to the solr index 896 * @param workspaceName The workspace name 897 * @param solrClient The solr client to use 898 * @throws SolrServerException if there is an error on the Solr server 899 * @throws IOException if there is a communication error with the server 900 */ 901 protected void _indexPageDocument(Page page, SolrInputDocument document, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException 902 { 903 // Retrieve appropriate solr client 904 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 905 906 // Add document 907 UpdateResponse solrResponse = solrClient.add(collectionName, document); 908 int status = solrResponse.getStatus(); 909 910 if (status != 0) 911 { 912 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + page.getId()); 913 } 914 915 getLogger().debug("Successful page indexing. Page identifier : {}", page.getId()); 916 } 917 918 /** 919 * Index a populated solr input document of type Resource. 920 * @param resource the resource from which the input document is created 921 * @param document the input document 922 * @param solrClient The solr client to use 923 * @throws SolrServerException if there is an error on the server 924 * @throws IOException if there is a communication error with the server 925 */ 926 protected void _indexResourceDocument(Resource resource, SolrInputDocument document, SolrClient solrClient) throws SolrServerException, IOException 927 { 928 // Retrieve appropriate solr client 929 Request request = ContextHelper.getRequest(_context); 930 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 931 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 932 933 // Add document 934 UpdateResponse solrResponse = solrClient.add(collectionName, document); 935 int status = solrResponse.getStatus(); 936 937 if (status != 0) 938 { 939 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Resource id : " + resource.getId()); 940 } 941 942 getLogger().debug("Successful resource indexing. Resource identifier : {}", resource.getId()); 943 } 944 945 /////////////////////////////////////////////////////////////////////////// 946 947 /** 948 * Un-index a page by its ID for all workspaces and commit 949 * @param pageId The page ID. 950 * @param unindexRecursively also unindex child pages if requested. 951 * @param unindexAttachments also unindex page attachments 952 * @throws Exception if an error occurs during index update. 953 */ 954 public void unindexPage(String pageId, boolean unindexRecursively, boolean unindexAttachments) throws Exception 955 { 956 unindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, unindexRecursively, unindexAttachments); 957 unindexPage(pageId, WebConstants.LIVE_WORKSPACE, unindexRecursively, unindexAttachments); 958 } 959 960 /** 961 * De-index a page (and optionally its children pages). 962 * @param pageId the page to be de-indexed. 963 * @param workspaceName The workspace where to work in 964 * @param unindexRecursively also unindex child pages if requested. 965 * @param unindexAttachments also unindex page attachments 966 * @throws Exception if an error occurs during index update. 967 */ 968 public void unindexPage(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws Exception 969 { 970 Request request = ContextHelper.getRequest(_context); 971 972 // Retrieve the current workspace. 973 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 974 // Retrieve the current site name. 975 String currentSiteName = (String) request.getAttribute("siteName"); 976 977 try 978 { 979 // Force the workspace. 980 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 981 982 getLogger().debug("Unindexing page: {}", pageId); 983 984 _unindexPageDocument(pageId, workspaceName, unindexRecursively, unindexAttachments); 985 } 986 catch (Exception e) 987 { 988 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 989 getLogger().error(error, e); 990 throw new IndexingException(error, e); 991 } 992 finally 993 { 994 // Restore the site name. 995 request.setAttribute("siteName", currentSiteName); 996 // Restore context 997 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 998 } 999 } 1000 1001 /** 1002 * Deindex a document of type Page. Also deindex attachments of a page 1003 * @param pageId the id of the page to deindex 1004 * @param workspaceName The workspace name 1005 * @param unindexRecursively also unindex child pages if requested. 1006 * @param unindexAttachments also unindex page attachments 1007 * @throws SolrServerException if there is an error on the server 1008 * @throws IOException if there is a communication error with the server 1009 * @throws QuerySyntaxException if the uri query can't be built because of a syntax error. 1010 */ 1011 protected void _unindexPageDocument(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws SolrServerException, IOException, QuerySyntaxException 1012 { 1013 // Retrieve appropriate solr client 1014 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 1015 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 1016 1017 getLogger().info("Unindexing page {} in workspace '{}'", pageId, workspaceName); 1018 1019 Query pages = new AndQuery(new DocumentTypeQuery(TYPE_PAGE), new PageQuery(pageId, unindexRecursively)); 1020 Query query; 1021 if (unindexRecursively && unindexAttachments) 1022 { 1023 // {!ametys join=pageId q=page-ancestorIds:"page://xxxx"} 1024 Query joinQuery = new JoinQuery(() -> PAGE_ANCESTOR_IDS + ":\"" + pageId + "\"", ATTACHMENT_PAGE_ID); 1025 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new OrQuery(new PageAttachmentQuery(pageId), joinQuery)); 1026 query = new OrQuery(attachments, pages); 1027 } 1028 else if (unindexAttachments) 1029 { 1030 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new PageAttachmentQuery(pageId)); 1031 query = new OrQuery(attachments, pages); 1032 } 1033 else 1034 { 1035 query = pages; 1036 } 1037 1038 // Delete by query 1039 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query.build()); 1040 int status = solrResponse.getStatus(); 1041 1042 if (status != 0) 1043 { 1044 throw new IOException("Ametys Page de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + pageId); 1045 } 1046 1047 getLogger().debug("Successful page de-indexing{}. Page identifier : {}", unindexRecursively ? " with its children" : "", pageId); 1048 } 1049 1050 /////////////////////////////////////////////////////////////////////////// 1051 1052 /** 1053 * Reindex a page by its ID for all workspaces and commit 1054 * @param pageId The page ID. 1055 * @param reindexRecursively also reindex child pages if requested. 1056 * @param reindexAttachments also reindex page attachments 1057 * @throws Exception if an error occurs during index update. 1058 */ 1059 public void reindexPage(String pageId, boolean reindexRecursively, boolean reindexAttachments) throws Exception 1060 { 1061 reindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, reindexRecursively, reindexAttachments); 1062 reindexPage(pageId, WebConstants.LIVE_WORKSPACE, reindexRecursively, reindexAttachments); 1063 } 1064 1065 1066 /** 1067 * Reindex a page by its ID. 1068 * @param pageId The page ID. 1069 * @param workspaceName The workspace where to work in 1070 * @param reindexRecursively also reindex child pages if requested. 1071 * @param reindexAttachments also reindex page attachments 1072 * @throws IndexingException if an error occurs during index update. 1073 */ 1074 public void reindexPage(String pageId, String workspaceName, boolean reindexRecursively, boolean reindexAttachments) throws IndexingException 1075 { 1076 Request request = ContextHelper.getRequest(_context); 1077 1078 // Retrieve the current workspace. 1079 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 1080 // Retrieve the current site name. 1081 String currentSiteName = (String) request.getAttribute("siteName"); 1082 1083 try 1084 { 1085 // Force the workspace. 1086 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 1087 1088 getLogger().debug("Reindexing page: {}", pageId); 1089 1090 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 1091 { 1092 Page page = _ametysObjectResolver.resolveById(pageId); 1093 _unindexPageDocument(pageId, workspaceName, reindexRecursively, reindexAttachments); 1094 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 1095 _indexPage(page, workspaceName, reindexRecursively, reindexAttachments, solrClient); 1096 } 1097 } 1098 catch (AmetysRepositoryException | QuerySyntaxException | SolrServerException | IOException e) 1099 { 1100 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 1101 getLogger().error(error, e); 1102 throw new IndexingException(error, e); 1103 } 1104 finally 1105 { 1106 // Restore the site name. 1107 request.setAttribute("siteName", currentSiteName); 1108 // Restore context 1109 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 1110 } 1111 } 1112}