001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019import java.util.ArrayList; 020import java.util.Collection; 021import java.util.Date; 022import java.util.HashSet; 023import java.util.List; 024import java.util.Map; 025import java.util.Optional; 026import java.util.Set; 027import java.util.function.Function; 028import java.util.stream.Collectors; 029 030import org.apache.avalon.framework.component.Component; 031import org.apache.avalon.framework.context.Context; 032import org.apache.avalon.framework.context.ContextException; 033import org.apache.avalon.framework.context.Contextualizable; 034import org.apache.avalon.framework.service.ServiceException; 035import org.apache.avalon.framework.service.ServiceManager; 036import org.apache.avalon.framework.service.Serviceable; 037import org.apache.cocoon.components.ContextHelper; 038import org.apache.cocoon.environment.Request; 039import org.apache.commons.lang3.ArrayUtils; 040import org.apache.solr.client.solrj.SolrClient; 041import org.apache.solr.client.solrj.SolrServerException; 042import org.apache.solr.client.solrj.response.UpdateResponse; 043import org.apache.solr.common.SolrInputDocument; 044import org.apache.solr.common.SolrInputField; 045 046import org.ametys.cms.content.indexing.solr.SolrContentIndexer; 047import org.ametys.cms.content.indexing.solr.SolrFieldNames; 048import org.ametys.cms.content.indexing.solr.SolrIndexer; 049import org.ametys.cms.content.indexing.solr.SolrResourceIndexer; 050import org.ametys.cms.contenttype.ContentConstants; 051import org.ametys.cms.contenttype.ContentTypesHelper; 052import org.ametys.cms.contenttype.MetadataDefinition; 053import org.ametys.cms.contenttype.RepeaterDefinition; 054import org.ametys.cms.contenttype.indexing.IndexingField; 055import org.ametys.cms.contenttype.indexing.IndexingModel; 056import org.ametys.cms.contenttype.indexing.MetadataIndexingField; 057import org.ametys.cms.indexing.IndexingException; 058import org.ametys.cms.indexing.solr.AdditionalPropertyIndexer; 059import org.ametys.cms.indexing.solr.AdditionalPropertyIndexerExtensionPoint; 060import org.ametys.cms.repository.Content; 061import org.ametys.cms.search.query.AndQuery; 062import org.ametys.cms.search.query.DocumentTypeQuery; 063import org.ametys.cms.search.query.JoinQuery; 064import org.ametys.cms.search.query.OrQuery; 065import org.ametys.cms.search.query.Query; 066import org.ametys.cms.search.query.QuerySyntaxException; 067import org.ametys.cms.search.solr.SolrClientProvider; 068import org.ametys.cms.search.solr.field.FirstValidationSearchField; 069import org.ametys.cms.search.solr.field.LastMajorValidationSearchField; 070import org.ametys.cms.search.solr.field.LastModifiedSearchField; 071import org.ametys.cms.search.solr.field.LastValidationSearchField; 072import org.ametys.cms.tag.Tag; 073import org.ametys.cms.tag.TagHelper; 074import org.ametys.cms.tag.TagProviderExtensionPoint; 075import org.ametys.plugins.explorer.resources.Resource; 076import org.ametys.plugins.explorer.resources.ResourceCollection; 077import org.ametys.plugins.repository.AmetysObject; 078import org.ametys.plugins.repository.AmetysObjectResolver; 079import org.ametys.plugins.repository.AmetysRepositoryException; 080import org.ametys.plugins.repository.RepositoryConstants; 081import org.ametys.plugins.repository.metadata.CompositeMetadata; 082import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector; 083import org.ametys.runtime.plugin.component.AbstractLogEnabled; 084import org.ametys.web.WebConstants; 085import org.ametys.web.indexing.solr.page.attachment.PageVisibleAttachmentIndexerExtensionPoint; 086import org.ametys.web.repository.page.Page; 087import org.ametys.web.repository.page.Page.PageType; 088import org.ametys.web.repository.page.Zone; 089import org.ametys.web.repository.page.ZoneItem; 090import org.ametys.web.repository.page.ZoneItem.ZoneType; 091import org.ametys.web.repository.site.Site; 092import org.ametys.web.repository.sitemap.Sitemap; 093import org.ametys.web.search.query.PageAttachmentQuery; 094import org.ametys.web.search.query.PageQuery; 095import org.ametys.web.service.Service; 096import org.ametys.web.service.ServiceExtensionPoint; 097 098/** 099 * Component responsible for indexing a page with all its contents. 100 */ 101public class SolrPageIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrWebFieldNames, Contextualizable 102{ 103 /** The avalon role. */ 104 public static final String ROLE = SolrPageIndexer.class.getName(); 105 106 /** The Solr client provider */ 107 protected SolrClientProvider _solrClientProvider; 108 /** The Solr indexer */ 109 protected SolrIndexer _solrIndexer; 110 /** Solr Ametys contents indexer */ 111 protected SolrContentIndexer _solrContentIndexer; 112 /** Solr Ametys resources indexer */ 113 protected SolrResourceIndexer _solrResourceIndexer; 114 /** The extension point for PageVisibleAttachmentIndexers */ 115 protected PageVisibleAttachmentIndexerExtensionPoint _pageVisibleAttachmentIndexerEP; 116 /** The additional property indexer extension point. */ 117 protected AdditionalPropertyIndexerExtensionPoint _additionalPropertiesIndexerEP; 118 /** The tag provider extension point. */ 119 protected TagProviderExtensionPoint _tagProviderEP; 120 121 /** The service extension point. */ 122 protected ServiceExtensionPoint _serviceExtensionPoint; 123 /** The Ametys object resolver*/ 124 protected AmetysObjectResolver _ametysObjectResolver; 125 /** The avalon context */ 126 protected Context _context; 127 128 private ContentTypesHelper _cTypesHelper; 129 130 @Override 131 public void service(ServiceManager manager) throws ServiceException 132 { 133 _ametysObjectResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 134 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 135 _solrContentIndexer = (SolrContentIndexer) manager.lookup(SolrContentIndexer.ROLE); 136 _solrResourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE); 137 _pageVisibleAttachmentIndexerEP = (PageVisibleAttachmentIndexerExtensionPoint) manager.lookup(PageVisibleAttachmentIndexerExtensionPoint.ROLE); 138 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 139 _serviceExtensionPoint = (ServiceExtensionPoint) manager.lookup(ServiceExtensionPoint.ROLE); 140 _additionalPropertiesIndexerEP = (AdditionalPropertyIndexerExtensionPoint) manager.lookup(AdditionalPropertyIndexerExtensionPoint.ROLE); 141 _tagProviderEP = (TagProviderExtensionPoint) manager.lookup(TagProviderExtensionPoint.ROLE); 142 _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 143 } 144 145 public void contextualize(Context context) throws ContextException 146 { 147 _context = context; 148 } 149 150 /** 151 * Index a page and eventually its children, recursively, in all workspaces and commit<br> 152 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 153 * @param pageId the page to be indexed. 154 * @param indexRecursively to also process children pages. 155 * @param indexAttachments to index page attachments 156 * @throws Exception if an error occurs during indexation. 157 */ 158 public void indexPage(String pageId, boolean indexRecursively, boolean indexAttachments) throws Exception 159 { 160 indexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, indexRecursively, indexAttachments); 161 indexPage(pageId, WebConstants.LIVE_WORKSPACE, indexRecursively, indexAttachments); 162 } 163 164 /** 165 * Index a page and eventually its children, recursively.<br> 166 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 167 * @param pageId the page to be indexed. 168 * @param workspaceName the workspace where to index 169 * @param indexRecursively to also process children pages. 170 * @param indexAttachments to index page attachments 171 * @throws IndexingException if an error occurs during indexation. 172 */ 173 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments) throws IndexingException 174 { 175 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true); 176 indexPage(pageId, workspaceName, indexRecursively, indexAttachments, solrClient); 177 } 178 179 /** 180 * Index a page and eventually its children, recursively.<br> 181 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 182 * @param pageId the page to be indexed. 183 * @param workspaceName the workspace where to index 184 * @param indexRecursively to also process children pages. 185 * @param indexAttachments to index page attachments 186 * @param solrClient The solr client to use 187 * @throws IndexingException if an error occurs during indexation. 188 */ 189 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 190 { 191 Request request = ContextHelper.getRequest(_context); 192 193 // Retrieve the current workspace. 194 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 195 // Retrieve the current site name. 196 String currentSiteName = (String) request.getAttribute("siteName"); 197 198 try 199 { 200 // Force the workspace. 201 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 202 203 getLogger().debug("Indexing page: {}", pageId); 204 205 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 206 { 207 Page page = _ametysObjectResolver.resolveById(pageId); 208 _indexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient); 209 } 210 } 211 catch (AmetysRepositoryException e) 212 { 213 String error = String.format("Failed to index page %s in workspace %s", pageId, workspaceName); 214 getLogger().error(error, e); 215 throw new IndexingException(error, e); 216 } 217 finally 218 { 219 // Restore the site name. 220 request.setAttribute("siteName", currentSiteName); 221 // Restore context 222 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 223 } 224 } 225 226 private void _indexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 227 { 228 getLogger().info("Indexing page: {} in workspace '{}'", page, workspaceName); 229 230 SolrInputDocument document = new SolrInputDocument(); 231 232 try 233 { 234 // Prepare the solr input document by adding fields. 235 _populatePageDocument(page, document); 236 237 // Set the additional properties in the document. 238 _populateAdditionalProperties(page, document); 239 240 // Indexation of ACL initial values 241 _solrIndexer.indexAclInitValues(page, document); 242 243 // Indexation of the document 244 _indexPageDocument(page, document, workspaceName, solrClient); 245 246 // Index page attachments documents 247 if (indexAttachments) 248 { 249 _indexPageAttachments(page.getRootAttachments(), page, solrClient); 250 } 251 } 252 catch (Exception e) 253 { 254 String error = String.format("Failed to index page %s in workspace %s", page.getId(), workspaceName); 255 getLogger().error(error, e); 256 throw new IndexingException(error, e); 257 } 258 259 if (indexRecursively) 260 { 261 for (Page child : page.getChildrenPages()) 262 { 263 // FIXME index child pages if (and only if) not indexed... see original source. 264// indexPage(child, false, indexRecursively); 265// indexPage(child, false); 266 _indexPage(child, workspaceName, indexRecursively, indexAttachments, solrClient); 267 } 268 } 269 } 270 271 /** 272 * Populate the solr input document by adding fields to index. 273 * @param page the page to index. 274 * @param document the solr input document 275 * @throws Exception if something goes wrong when processing the indexation of the page 276 */ 277 protected void _populatePageDocument(Page page, SolrInputDocument document) throws Exception 278 { 279 Sitemap sitemap = page.getSitemap(); 280 String sitemapName = sitemap.getName(); 281 Site site = page.getSite(); 282 String siteName = site.getName(); 283 String pageId = page.getId(); 284 String pageTitle = page.getTitle(); 285 String pageLongTitle = page.getLongTitle(); 286 String language = sitemapName; 287 288 // Page id and type 289 document.addField(SolrFieldNames.ID, pageId); 290 document.addField(SolrFieldNames.DOCUMENT_TYPE, SolrWebFieldNames.TYPE_PAGE); 291 292 // Fulltext 293 SolrContentIndexer.indexFulltextValue(document, pageTitle, language); 294 if (!pageTitle.equals(pageLongTitle)) 295 { 296 SolrContentIndexer.indexFulltextValue(document, pageLongTitle, language); 297 } 298 299 // Page title 300 _indexStringFields(document, pageId, PAGE_TITLE, pageTitle, language); 301 // Page long title 302 _indexStringFields(document, pageId, PAGE_LONG_TITLE, pageLongTitle, language); 303 // Title for sorting 304 document.addField(TITLE_SORT, pageTitle); 305 306 document.addField(TEMPLATE, page.getTemplate()); 307 document.addField(PAGE_TYPE, page.getType().name()); 308 document.addField(PAGE_DEPTH, page.getDepth()); 309 310 // Contents (page title shoud be indexed before because the main content can override it). 311 _populatePageContentsDocument(page, document); 312 313 // Parent of the page 314 AmetysObject parent = page.getParent(); 315 if (parent != null) 316 { 317 document.addField(PAGE_PARENT_ID, parent.getId()); 318 } 319 320 // Ancestors of the page 321 List<String> ancestorIds = new ArrayList<>(); 322 while (parent instanceof Page) 323 { 324 ancestorIds.add(parent.getId()); 325 parent = parent.getParent(); 326 } 327 document.addField(PAGE_ANCESTOR_IDS, ancestorIds); 328 329 document.addField(SITE_NAME, siteName); 330 document.addField(SITEMAP_NAME, sitemapName); 331 document.addField(SITE_TYPE, site.getType()); 332 333 // Page tags (strict and tags including ancestor pages). 334 Set<String> tags = page.getTags() 335 .stream() 336 .filter(tagName -> _tagProviderEP.hasTag(tagName, Map.of("siteName", page.getSiteName()))) 337 .collect(Collectors.toSet()); 338 document.addField(SolrFieldNames.TAGS, tags); 339 document.addField(SolrFieldNames.ALL_TAGS, _getTagsWithAncestors(page)); 340 341 _populateDatesOfPage(page, document); 342 343 // Attachments 344 _solrResourceIndexer.indexResourceCollection(page.getRootAttachments(), document, language); 345 Optional.ofNullable(page.getRootAttachments()) 346 .map(AmetysObject::getId) 347 .ifPresent(id -> document.addField(PAGE_OUTGOING_REFEERENCES_RESOURCE_IDS, id)); 348 _indexVisibleAttachments(page, document); 349 } 350 351 private void _indexVisibleAttachments(Page page, SolrInputDocument document) 352 { 353 Collection<String> values = _pageVisibleAttachmentIndexerEP.getExtensionsIds() 354 .stream() 355 .map(_pageVisibleAttachmentIndexerEP::getExtension) 356 .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(page)) 357 .flatMap(Collection::stream) 358 .collect(Collectors.toList()); 359 document.addField(PAGE_VISIBLE_ATTACHMENT_RESOURCE_IDS, values); 360 } 361 362 /** 363 * Populate the solr input document with dates from the page 364 * @param page The page 365 * @param document The Solr document 366 */ 367 protected void _populateDatesOfPage(Page page, SolrInputDocument document) 368 { 369 // Page last modification date 370 Date lastModified = _getLastModificationDate(page); 371 if (lastModified != null) 372 { 373 String lastModifiedStr = SolrIndexer.dateFormat().format(lastModified); 374 // For 'new' search service 375 document.addField(LastModifiedSearchField.NAME, lastModifiedStr); 376 // For 'old' search service 377 document.addField(LAST_MODIFIED + "_dt", lastModifiedStr); 378 } 379 380 // Page last validation date 381 Date lastValidation = _getLastValidationDate(page); 382 if (lastValidation != null) 383 { 384 String lastValidationStr = SolrIndexer.dateFormat().format(lastValidation); 385 // For 'new' search service 386 document.addField(LastValidationSearchField.NAME, lastValidationStr); 387 if (!LAST_VALIDATION.equals(LastValidationSearchField.NAME)) 388 { 389 // For 'old' search service 390 document.addField(LAST_VALIDATION, lastValidationStr); 391 } 392 } 393 394 // Page first validation date 395 Date firstValidation = _getFirstValidationDate(page); 396 if (firstValidation != null) 397 { 398 String firstValidationStr = SolrIndexer.dateFormat().format(firstValidation); 399 // For 'new' search service 400 document.addField(FirstValidationSearchField.NAME, firstValidationStr); 401 } 402 403 // Page last major validation date 404 Date lastMajorValidation = _getLastMajorValidationDate(page); 405 if (lastMajorValidation != null) 406 { 407 String lastMajorValidationStr = SolrIndexer.dateFormat().format(lastMajorValidation); 408 // For 'new' search service 409 document.addField(LastMajorValidationSearchField.NAME, lastMajorValidationStr); 410 } 411 412 // date for sorting 413 SolrInputField dateField = document.getField(DATE_FOR_SORTING); 414 if (dateField == null) 415 { 416 Collection<Object> oDateValues = document.getFieldValues(CONTENT_INTERESTING_DATES); 417 if (oDateValues != null && !oDateValues.isEmpty()) 418 { 419 document.setField(DATE_FOR_SORTING, oDateValues.iterator().next()); 420 } 421 } 422 } 423 424 private void _indexStringFields(SolrInputDocument document, String documentId, String fieldName, String fieldValue, String language) 425 { 426 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(fieldValue, getLogger(), documentId, fieldName); 427 428 document.addField(fieldName, possiblyTruncatedValue); 429 document.addField(fieldName + "_txt_" + language, fieldValue); 430 document.addField(fieldName + "_txt_stemmed_" + language, fieldValue); 431 document.addField(fieldName + "_txt_ws_" + language, fieldValue); 432 433 document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase()); 434 document.addField(fieldName + "_s_ws", fieldValue.toLowerCase()); 435 document.addField(fieldName + "_txt", fieldValue); 436 } 437 /** 438 * Get all the page tags with their ancestors. 439 * @param page The page. 440 * @return All the page tags with their ancestors. 441 */ 442 protected Set<String> _getTagsWithAncestors(Page page) 443 { 444 Set<String> allTags = new HashSet<>(page.getTags()); 445 446 Map<String, Object> tagParams = Map.of("siteName", page.getSiteName()); 447 448 for (String tagName : page.getTags()) 449 { 450 allTags.add(tagName); 451 452 // Get the ancestor tags 453 Tag tag = _tagProviderEP.getTag(tagName, tagParams); 454 for (Tag ancestor : TagHelper.getAncestors(tag, false)) 455 { 456 allTags.add(ancestor.getName()); 457 } 458 } 459 460 return allTags; 461 } 462 463 /** 464 * Index the content of the page.<p> 465 * @param page the page to index. 466 * @param document the document to populate. 467 * @throws Exception if an error occurs. 468 */ 469 protected void _populatePageContentsDocument(Page page, SolrInputDocument document) throws Exception 470 { 471 if (page.getType() == PageType.CONTAINER) 472 { 473 for (Zone zone : page.getZones()) 474 { 475 for (ZoneItem zoneItem : zone.getZoneItems()) 476 { 477 if (zoneItem.getType() == ZoneType.CONTENT) 478 { 479 try 480 { 481 Content content = zoneItem.getContent(); 482 document.addField(CONTENT_IDS, content.getId()); 483 484 for (String cType : content.getTypes()) 485 { 486 document.addField(PAGE_CONTENT_TYPES, cType); 487 document.addField(PAGE_CONTENT_TYPES + "_s_dv", cType); // facets 488 } 489 490 _indexFacetableField(content, document); 491 } 492 catch (AmetysRepositoryException e) 493 { 494 getLogger().error("Failed to index content referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 495 } 496 } 497 else if (zoneItem.getType() == ZoneType.SERVICE) 498 { 499 try 500 { 501 String serviceId = zoneItem.getServiceId(); 502 document.addField(SERVICE_IDS, serviceId); 503 504 Service service = _serviceExtensionPoint.getExtension(serviceId); 505 if (service == null) 506 { 507 getLogger().error("The service id '{}' does not exist. It is referenced in the page {}/{}/{} ({} in zoneitem {})", serviceId, page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId()); 508 } 509 else 510 { 511 service.index(zoneItem, document); 512 } 513 } 514 catch (AmetysRepositoryException e) 515 { 516 getLogger().error("Failed to index service referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 517 } 518 519 } 520 } 521 } 522 } 523 } 524 525 /** 526 * Index the facetable fields of a content into the page solr document 527 * @param content The content 528 * @param document The main page solr document. 529 */ 530 protected void _indexFacetableField(Content content, SolrInputDocument document) 531 { 532 IndexingModel indexingModel = null; 533 try 534 { 535 indexingModel = _cTypesHelper.getIndexingModel(content); 536 } 537 catch (RuntimeException e) 538 { 539 getLogger().error("indexContent > Error getting the indexing model of content " + content.getId(), e); 540 throw e; 541 } 542 543 for (IndexingField field : indexingModel.getFields()) 544 { 545 if (field instanceof MetadataIndexingField) 546 { 547 String metadataPath = ((MetadataIndexingField) field).getMetadataPath(); 548 String[] pathSegments = metadataPath.split(ContentConstants.METADATA_PATH_SEPARATOR); 549 550 MetadataDefinition definition = _cTypesHelper.getMetadataDefinition(pathSegments[0], content.getTypes(), content.getMixinTypes()); 551 if (definition != null) 552 { 553 _findAndIndexFacetableField(pathSegments, content.getLanguage(), content.getMetadataHolder(), definition, field, document); 554 } 555 } 556 } 557 } 558 559 /** 560 * Index the facetable fields of a content into the page solr document 561 * @param pathSegments The path of metadata 562 * @param lang The language 563 * @param metadata The parent composite metadata 564 * @param definition The metadata definition 565 * @param field The indexing field 566 * @param pageDocument The Solr page document 567 */ 568 protected void _findAndIndexFacetableField(String[] pathSegments, String lang, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, SolrInputDocument pageDocument) 569 { 570 String currentFieldName = pathSegments[0]; 571 572 if (!metadata.hasMetadata(currentFieldName)) 573 { 574 // Nothing to do 575 return; 576 } 577 578 switch (definition.getType()) 579 { 580 case STRING: 581 if (definition.getEnumerator() != null) 582 { 583 String[] strValues = metadata.getStringArray(currentFieldName, new String[0]); 584 for (String value : strValues) 585 { 586 pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + currentFieldName + "_s_dv", value); 587 } 588 } 589 break; 590 case CONTENT: 591 String[] contentIds = metadata.getStringArray(currentFieldName, new String[0]); 592 for (String contentId : contentIds) 593 { 594 pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + currentFieldName + "_s_dv", contentId); 595 } 596 break; 597 case COMPOSITE: 598 if (pathSegments.length > 1) 599 { 600 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 601 602 CompositeMetadata composite = metadata.getCompositeMetadata(currentFieldName); 603 if (definition instanceof RepeaterDefinition) 604 { 605 String[] entries = composite.getMetadataNames(); 606 for (String entry : entries) 607 { 608 _findAndIndexFacetableField(followingSegments, lang, composite.getCompositeMetadata(entry), definition.getMetadataDefinition(followingSegments[0]), field, pageDocument); 609 } 610 } 611 else 612 { 613 _findAndIndexFacetableField(followingSegments, lang, composite, definition.getMetadataDefinition(followingSegments[0]), field, pageDocument); 614 } 615 } 616 617 break; 618 default: 619 break; 620 621 } 622 } 623 624 /** 625 * Computes the last modification date of a page. 626 * @param page the page. 627 * @return the last modification date or <code>null</code>. 628 */ 629 protected Date _getLastModificationDate(Page page) 630 { 631 return _getLastDate(page, Content::getLastModified); 632 } 633 /** 634 * Computes the first validation date of a page. 635 * @param page the page. 636 * @return the first validation date or <code>null</code>. 637 */ 638 protected Date _getFirstValidationDate(Page page) 639 { 640 return _getFirstDate(page, Content::getFirstValidationDate); 641 } 642 643 /** 644 * Computes the last validation date of a page. 645 * @param page the page. 646 * @return the last validation date or <code>null</code>. 647 */ 648 protected Date _getLastValidationDate(Page page) 649 { 650 return _getLastDate(page, Content::getLastValidationDate); 651 } 652 653 /** 654 * Computes the last major validation date of a page. 655 * @param page the page. 656 * @return the last major validation date or <code>null</code>. 657 */ 658 protected Date _getLastMajorValidationDate(Page page) 659 { 660 return _getLastDate(page, Content::getLastMajorValidationDate); 661 } 662 663 /** 664 * Computes a "last date" of a page, using the simple and naive following algorithm: 665 * <br>From all the dates from each of its contents, keep the greatest of them. 666 * @param page the page. 667 * @param dateRetriever The function to retrieve a Date from a Content of the Page 668 * @return the "last date" or <code>null</code>. 669 */ 670 protected Date _getLastDate(Page page, Function<Content, Date> dateRetriever) 671 { 672 Date last = null; 673 674 if (page.getType() == PageType.CONTAINER) 675 { 676 for (Zone zone : page.getZones()) 677 { 678 for (ZoneItem zoneItem : zone.getZoneItems()) 679 { 680 switch (zoneItem.getType()) 681 { 682 case SERVICE: 683 // A service has no last date 684 break; 685 case CONTENT: 686 try 687 { 688 Date contentLast = dateRetriever.apply(zoneItem.getContent()); 689 690 if (contentLast != null && (last == null || contentLast.after(last))) 691 { 692 // Keep the latest date 693 last = contentLast; 694 } 695 } 696 catch (AmetysRepositoryException e) 697 { 698 getLogger().error("Failed to index last date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 699 } 700 break; 701 default: 702 break; 703 } 704 } 705 } 706 } 707 708 return last; 709 } 710 711 /** 712 * Computes a "first date" of a page, using the simple and naive following algorithm: 713 * <br>From all the dates from each of its contents, keep the lowest of them. 714 * @param page the page. 715 * @param dateRetriever The function to retrieve a Date from a Content of the Page 716 * @return the "first date" or <code>null</code>. 717 */ 718 protected Date _getFirstDate(Page page, Function<Content, Date> dateRetriever) 719 { 720 Date first = null; 721 722 if (page.getType() == PageType.CONTAINER) 723 { 724 for (Zone zone : page.getZones()) 725 { 726 for (ZoneItem zoneItem : zone.getZoneItems()) 727 { 728 switch (zoneItem.getType()) 729 { 730 case SERVICE: 731 // A service has no first date 732 break; 733 case CONTENT: 734 try 735 { 736 Date contentFirst = dateRetriever.apply(zoneItem.getContent()); 737 738 if (contentFirst != null && (first == null || contentFirst.before(first))) 739 { 740 // Keep the lowest date 741 first = contentFirst; 742 } 743 } 744 catch (AmetysRepositoryException e) 745 { 746 getLogger().error("Failed to index first date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 747 } 748 break; 749 default: 750 break; 751 } 752 } 753 } 754 } 755 756 return first; 757 } 758 759 /** 760 * Populate the solr input document by adding fields to index. 761 * @param page the page to index. 762 * @param document the solr input document 763 * @throws Exception if something goes wrong when processing the indexation of the page 764 */ 765 protected void _populateAdditionalProperties(Page page, SolrInputDocument document) throws Exception 766 { 767 Collection<AdditionalPropertyIndexer> indexers = _additionalPropertiesIndexerEP.getIndexers("page"); 768 for (AdditionalPropertyIndexer indexer : indexers) 769 { 770 indexer.index(page, document); 771 } 772 } 773 774 /** 775 * Index page attachments as new entries in the index. 776 * @param collection the collection of attachments 777 * @param page the page whose attachments will be indexed 778 * @throws Exception if something goes wrong when indexing the attachments of the page 779 */ 780 public void indexPageAttachments(ResourceCollection collection, Page page) throws Exception 781 { 782 Request request = ContextHelper.getRequest(_context); 783 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 784 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 785 _indexPageAttachments(collection, page, solrClient); 786 } 787 788 private void _indexPageAttachments(ResourceCollection collection, Page page, SolrClient solrClient) throws Exception 789 { 790 if (collection == null) 791 { 792 return; 793 } 794 795 for (AmetysObject object : collection.getChildren()) 796 { 797 if (object instanceof ResourceCollection) 798 { 799 _indexPageAttachments((ResourceCollection) object, page, solrClient); 800 } 801 else if (object instanceof Resource) 802 { 803 Resource resource = (Resource) object; 804 _indexPageAttachment(resource, page, solrClient); 805 } 806 } 807 } 808 809 /** 810 * Index a page attachment 811 * @param resource the page attachment as a {@link Resource} 812 * @param page the page whose attachment is going to be indexed 813 * @throws Exception if something goes wrong when processing the indexation of the page attachment 814 */ 815 public void indexPageAttachment(Resource resource, Page page) throws Exception 816 { 817 Request request = ContextHelper.getRequest(_context); 818 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 819 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 820 _indexPageAttachment(resource, page, solrClient); 821 } 822 823 private void _indexPageAttachment(Resource resource, Page page, SolrClient solrClient) throws Exception 824 { 825 SolrInputDocument document = new SolrInputDocument(); 826 827 // Prepare resource doc 828 _populatePageAttachmentDocument(resource, document, page); 829 830 // Indexation of the document 831 _indexResourceDocument(resource, document, solrClient); 832 } 833 834 private void _populatePageAttachmentDocument(Resource resource, SolrInputDocument document, Page page) throws Exception 835 { 836 String language = page.getSitemapName(); 837 838 _solrResourceIndexer.indexResource(resource, document, TYPE_PAGE_RESOURCE, language); 839 840 Site site = page.getSite(); 841 // site name - Store.YES, Index.NOT_ANALYZED 842 document.addField(SolrWebFieldNames.SITE_NAME, site.getName()); 843 844 // site type - Store.YES, Index.NOT_ANALYZED 845 document.addField(SolrWebFieldNames.SITE_TYPE, site.getType()); 846 847 // Added for Solr. 848 // Page site map name - Store.YES, Index.NOT_ANALYZED 849 document.addField(SITEMAP_NAME, page.getSitemapName()); 850 851 // Need the id of the page for unindexing attachment during the unindexing of the page 852 document.addField(ATTACHMENT_PAGE_ID, page.getId()); 853 } 854 855 /** 856 * Index a populated solr input document of type Page. 857 * @param page the page from which the input document is created 858 * @param document the input document to add to the solr index 859 * @param workspaceName The workspace name 860 * @param solrClient The solr client to use 861 * @throws SolrServerException if there is an error on the Solr server 862 * @throws IOException if there is a communication error with the server 863 */ 864 protected void _indexPageDocument(Page page, SolrInputDocument document, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException 865 { 866 // Retrieve appropriate solr client 867 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 868 869 // Add document 870 UpdateResponse solrResponse = solrClient.add(collectionName, document); 871 int status = solrResponse.getStatus(); 872 873 if (status != 0) 874 { 875 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + page.getId()); 876 } 877 878 getLogger().debug("Successful page indexing. Page identifier : {}", page.getId()); 879 } 880 881 /** 882 * Index a populated solr input document of type Resource. 883 * @param resource the resource from which the input document is created 884 * @param document the input document 885 * @param solrClient The solr client to use 886 * @throws SolrServerException if there is an error on the server 887 * @throws IOException if there is a communication error with the server 888 */ 889 protected void _indexResourceDocument(Resource resource, SolrInputDocument document, SolrClient solrClient) throws SolrServerException, IOException 890 { 891 // Retrieve appropriate solr client 892 Request request = ContextHelper.getRequest(_context); 893 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 894 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 895 896 // Add document 897 UpdateResponse solrResponse = solrClient.add(collectionName, document); 898 int status = solrResponse.getStatus(); 899 900 if (status != 0) 901 { 902 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Resource id : " + resource.getId()); 903 } 904 905 getLogger().debug("Successful resource indexing. Resource identifier : {}", resource.getId()); 906 } 907 908 /////////////////////////////////////////////////////////////////////////// 909 910 /** 911 * Un-index a page by its ID for all workspaces and commit 912 * @param pageId The page ID. 913 * @param unindexRecursively also unindex child pages if requested. 914 * @param unindexAttachments also unindex page attachments 915 * @throws Exception if an error occurs during index update. 916 */ 917 public void unindexPage(String pageId, boolean unindexRecursively, boolean unindexAttachments) throws Exception 918 { 919 unindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, unindexRecursively, unindexAttachments); 920 unindexPage(pageId, WebConstants.LIVE_WORKSPACE, unindexRecursively, unindexAttachments); 921 } 922 923 /** 924 * De-index a page (and optionally its children pages). 925 * @param pageId the page to be de-indexed. 926 * @param workspaceName The workspace where to work in 927 * @param unindexRecursively also unindex child pages if requested. 928 * @param unindexAttachments also unindex page attachments 929 * @throws Exception if an error occurs during index update. 930 */ 931 public void unindexPage(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws Exception 932 { 933 Request request = ContextHelper.getRequest(_context); 934 935 // Retrieve the current workspace. 936 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 937 // Retrieve the current site name. 938 String currentSiteName = (String) request.getAttribute("siteName"); 939 940 try 941 { 942 // Force the workspace. 943 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 944 945 getLogger().debug("Unindexing page: {}", pageId); 946 947 _unindexPageDocument(pageId, workspaceName, unindexRecursively, unindexAttachments); 948 } 949 catch (Exception e) 950 { 951 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 952 getLogger().error(error, e); 953 throw new IndexingException(error, e); 954 } 955 finally 956 { 957 // Restore the site name. 958 request.setAttribute("siteName", currentSiteName); 959 // Restore context 960 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 961 } 962 } 963 964 /** 965 * Deindex a document of type Page. Also deindex attachments of a page 966 * @param pageId the id of the page to deindex 967 * @param workspaceName The workspace name 968 * @param unindexRecursively also unindex child pages if requested. 969 * @param unindexAttachments also unindex page attachments 970 * @throws SolrServerException if there is an error on the server 971 * @throws IOException if there is a communication error with the server 972 * @throws QuerySyntaxException if the uri query can't be built because of a syntax error. 973 */ 974 protected void _unindexPageDocument(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws SolrServerException, IOException, QuerySyntaxException 975 { 976 // Retrieve appropriate solr client 977 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 978 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 979 980 getLogger().info("Unindexing page {} in workspace '{}'", pageId, workspaceName); 981 982 Query pages = new AndQuery(new DocumentTypeQuery(TYPE_PAGE), new PageQuery(pageId, unindexRecursively)); 983 Query query; 984 if (unindexRecursively && unindexAttachments) 985 { 986 // {!ametys join=pageId q=page-ancestorIds:"page://xxxx"} 987 Query joinQuery = new JoinQuery(() -> PAGE_ANCESTOR_IDS + ":\"" + pageId + "\"", ATTACHMENT_PAGE_ID); 988 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new OrQuery(new PageAttachmentQuery(pageId), joinQuery)); 989 query = new OrQuery(attachments, pages); 990 } 991 else if (unindexAttachments) 992 { 993 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new PageAttachmentQuery(pageId)); 994 query = new OrQuery(attachments, pages); 995 } 996 else 997 { 998 query = pages; 999 } 1000 1001 // Delete by query 1002 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query.build()); 1003 int status = solrResponse.getStatus(); 1004 1005 if (status != 0) 1006 { 1007 throw new IOException("Ametys Page de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + pageId); 1008 } 1009 1010 getLogger().debug("Successful page de-indexing{}. Page identifier : {}", unindexRecursively ? " with its children" : "", pageId); 1011 } 1012 1013 /////////////////////////////////////////////////////////////////////////// 1014 1015 /** 1016 * Reindex a page by its ID for all workspaces and commit 1017 * @param pageId The page ID. 1018 * @param reindexRecursively also reindex child pages if requested. 1019 * @param reindexAttachments also reindex page attachments 1020 * @throws Exception if an error occurs during index update. 1021 */ 1022 public void reindexPage(String pageId, boolean reindexRecursively, boolean reindexAttachments) throws Exception 1023 { 1024 reindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, reindexRecursively, reindexAttachments); 1025 reindexPage(pageId, WebConstants.LIVE_WORKSPACE, reindexRecursively, reindexAttachments); 1026 } 1027 1028 1029 /** 1030 * Reindex a page by its ID. 1031 * @param pageId The page ID. 1032 * @param workspaceName The workspace where to work in 1033 * @param reindexRecursively also reindex child pages if requested. 1034 * @param reindexAttachments also reindex page attachments 1035 * @throws IndexingException if an error occurs during index update. 1036 */ 1037 public void reindexPage(String pageId, String workspaceName, boolean reindexRecursively, boolean reindexAttachments) throws IndexingException 1038 { 1039 Request request = ContextHelper.getRequest(_context); 1040 1041 // Retrieve the current workspace. 1042 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 1043 // Retrieve the current site name. 1044 String currentSiteName = (String) request.getAttribute("siteName"); 1045 1046 try 1047 { 1048 // Force the workspace. 1049 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 1050 1051 getLogger().debug("Reindexing page: {}", pageId); 1052 1053 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 1054 { 1055 Page page = _ametysObjectResolver.resolveById(pageId); 1056 _unindexPageDocument(pageId, workspaceName, reindexRecursively, reindexAttachments); 1057 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 1058 _indexPage(page, workspaceName, reindexRecursively, reindexAttachments, solrClient); 1059 } 1060 } 1061 catch (AmetysRepositoryException | QuerySyntaxException | SolrServerException | IOException e) 1062 { 1063 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 1064 getLogger().error(error, e); 1065 throw new IndexingException(error, e); 1066 } 1067 finally 1068 { 1069 // Restore the site name. 1070 request.setAttribute("siteName", currentSiteName); 1071 // Restore context 1072 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 1073 } 1074 } 1075}