001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019import java.util.ArrayList; 020import java.util.Collection; 021import java.util.Collections; 022import java.util.Date; 023import java.util.HashSet; 024import java.util.List; 025import java.util.Map; 026import java.util.Optional; 027import java.util.Set; 028import java.util.function.Function; 029import java.util.stream.Collectors; 030 031import org.apache.avalon.framework.component.Component; 032import org.apache.avalon.framework.context.Context; 033import org.apache.avalon.framework.context.ContextException; 034import org.apache.avalon.framework.context.Contextualizable; 035import org.apache.avalon.framework.service.ServiceException; 036import org.apache.avalon.framework.service.ServiceManager; 037import org.apache.avalon.framework.service.Serviceable; 038import org.apache.cocoon.components.ContextHelper; 039import org.apache.cocoon.environment.Request; 040import org.apache.commons.lang3.ArrayUtils; 041import org.apache.solr.client.solrj.SolrClient; 042import org.apache.solr.client.solrj.SolrServerException; 043import org.apache.solr.client.solrj.response.UpdateResponse; 044import org.apache.solr.common.SolrInputDocument; 045import org.apache.solr.common.SolrInputField; 046 047import org.ametys.cms.content.indexing.solr.SolrContentIndexer; 048import org.ametys.cms.content.indexing.solr.SolrFieldNames; 049import org.ametys.cms.content.indexing.solr.SolrIndexer; 050import org.ametys.cms.content.indexing.solr.SolrResourceIndexer; 051import org.ametys.cms.contenttype.ContentConstants; 052import org.ametys.cms.contenttype.ContentTypesHelper; 053import org.ametys.cms.contenttype.MetadataDefinition; 054import org.ametys.cms.contenttype.RepeaterDefinition; 055import org.ametys.cms.contenttype.indexing.IndexingField; 056import org.ametys.cms.contenttype.indexing.IndexingModel; 057import org.ametys.cms.contenttype.indexing.MetadataIndexingField; 058import org.ametys.cms.indexing.IndexingException; 059import org.ametys.cms.indexing.solr.AdditionalPropertyIndexer; 060import org.ametys.cms.indexing.solr.AdditionalPropertyIndexerExtensionPoint; 061import org.ametys.cms.repository.Content; 062import org.ametys.cms.search.query.AndQuery; 063import org.ametys.cms.search.query.DocumentTypeQuery; 064import org.ametys.cms.search.query.JoinQuery; 065import org.ametys.cms.search.query.OrQuery; 066import org.ametys.cms.search.query.Query; 067import org.ametys.cms.search.query.QuerySyntaxException; 068import org.ametys.cms.search.solr.SolrClientProvider; 069import org.ametys.cms.search.solr.field.FirstValidationSearchField; 070import org.ametys.cms.search.solr.field.LastMajorValidationSearchField; 071import org.ametys.cms.search.solr.field.LastModifiedSearchField; 072import org.ametys.cms.search.solr.field.LastValidationSearchField; 073import org.ametys.cms.tag.Tag; 074import org.ametys.cms.tag.TagHelper; 075import org.ametys.cms.tag.TagProviderExtensionPoint; 076import org.ametys.plugins.explorer.resources.Resource; 077import org.ametys.plugins.explorer.resources.ResourceCollection; 078import org.ametys.plugins.repository.AmetysObject; 079import org.ametys.plugins.repository.AmetysObjectResolver; 080import org.ametys.plugins.repository.AmetysRepositoryException; 081import org.ametys.plugins.repository.RepositoryConstants; 082import org.ametys.plugins.repository.metadata.CompositeMetadata; 083import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector; 084import org.ametys.runtime.plugin.component.AbstractLogEnabled; 085import org.ametys.web.WebConstants; 086import org.ametys.web.indexing.solr.page.attachment.PageVisibleAttachmentIndexerExtensionPoint; 087import org.ametys.web.repository.page.Page; 088import org.ametys.web.repository.page.Page.PageType; 089import org.ametys.web.repository.page.Zone; 090import org.ametys.web.repository.page.ZoneItem; 091import org.ametys.web.repository.page.ZoneItem.ZoneType; 092import org.ametys.web.repository.site.Site; 093import org.ametys.web.repository.sitemap.Sitemap; 094import org.ametys.web.search.query.PageAttachmentQuery; 095import org.ametys.web.search.query.PageQuery; 096import org.ametys.web.service.Service; 097import org.ametys.web.service.ServiceExtensionPoint; 098 099/** 100 * Component responsible for indexing a page with all its contents. 101 */ 102public class SolrPageIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrWebFieldNames, Contextualizable 103{ 104 /** The avalon role. */ 105 public static final String ROLE = SolrPageIndexer.class.getName(); 106 107 /** The Solr client provider */ 108 protected SolrClientProvider _solrClientProvider; 109 /** The Solr indexer */ 110 protected SolrIndexer _solrIndexer; 111 /** Solr Ametys contents indexer */ 112 protected SolrContentIndexer _solrContentIndexer; 113 /** Solr Ametys resources indexer */ 114 protected SolrResourceIndexer _solrResourceIndexer; 115 /** The extension point for PageVisibleAttachmentIndexers */ 116 protected PageVisibleAttachmentIndexerExtensionPoint _pageVisibleAttachmentIndexerEP; 117 /** The additional property indexer extension point. */ 118 protected AdditionalPropertyIndexerExtensionPoint _additionalPropertiesIndexerEP; 119 /** The tag provider extension point. */ 120 protected TagProviderExtensionPoint _tagProviderEP; 121 122 /** The service extension point. */ 123 protected ServiceExtensionPoint _serviceExtensionPoint; 124 /** The Ametys object resolver*/ 125 protected AmetysObjectResolver _ametysObjectResolver; 126 /** The avalon context */ 127 protected Context _context; 128 129 private ContentTypesHelper _cTypesHelper; 130 131 @Override 132 public void service(ServiceManager manager) throws ServiceException 133 { 134 _ametysObjectResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE); 135 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 136 _solrContentIndexer = (SolrContentIndexer) manager.lookup(SolrContentIndexer.ROLE); 137 _solrResourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE); 138 _pageVisibleAttachmentIndexerEP = (PageVisibleAttachmentIndexerExtensionPoint) manager.lookup(PageVisibleAttachmentIndexerExtensionPoint.ROLE); 139 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 140 _serviceExtensionPoint = (ServiceExtensionPoint) manager.lookup(ServiceExtensionPoint.ROLE); 141 _additionalPropertiesIndexerEP = (AdditionalPropertyIndexerExtensionPoint) manager.lookup(AdditionalPropertyIndexerExtensionPoint.ROLE); 142 _tagProviderEP = (TagProviderExtensionPoint) manager.lookup(TagProviderExtensionPoint.ROLE); 143 _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE); 144 } 145 146 public void contextualize(Context context) throws ContextException 147 { 148 _context = context; 149 } 150 151 /** 152 * Index a page and eventually its children, recursively, in all workspaces and commit<br> 153 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 154 * @param pageId the page to be indexed. 155 * @param indexRecursively to also process children pages. 156 * @param indexAttachments to index page attachments 157 * @throws Exception if an error occurs during indexation. 158 */ 159 public void indexPage(String pageId, boolean indexRecursively, boolean indexAttachments) throws Exception 160 { 161 indexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, indexRecursively, indexAttachments); 162 indexPage(pageId, WebConstants.LIVE_WORKSPACE, indexRecursively, indexAttachments); 163 } 164 165 /** 166 * Index a page and eventually its children, recursively.<br> 167 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 168 * @param pageId the page to be indexed. 169 * @param workspaceName the workspace where to index 170 * @param indexRecursively to also process children pages. 171 * @param indexAttachments to index page attachments 172 * @throws IndexingException if an error occurs during indexation. 173 */ 174 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments) throws IndexingException 175 { 176 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true); 177 indexPage(pageId, workspaceName, indexRecursively, indexAttachments, solrClient); 178 } 179 180 /** 181 * Index a page and eventually its children, recursively.<br> 182 * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed. 183 * @param pageId the page to be indexed. 184 * @param workspaceName the workspace where to index 185 * @param indexRecursively to also process children pages. 186 * @param indexAttachments to index page attachments 187 * @param solrClient The solr client to use 188 * @throws IndexingException if an error occurs during indexation. 189 */ 190 public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 191 { 192 Request request = ContextHelper.getRequest(_context); 193 194 // Retrieve the current workspace. 195 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 196 // Retrieve the current site name. 197 String currentSiteName = (String) request.getAttribute("siteName"); 198 199 try 200 { 201 // Force the workspace. 202 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 203 204 getLogger().debug("Indexing page: {}", pageId); 205 206 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 207 { 208 Page page = _ametysObjectResolver.resolveById(pageId); 209 _indexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient); 210 } 211 } 212 catch (AmetysRepositoryException e) 213 { 214 String error = String.format("Failed to index page %s in workspace %s", pageId, workspaceName); 215 getLogger().error(error, e); 216 throw new IndexingException(error, e); 217 } 218 finally 219 { 220 // Restore the site name. 221 request.setAttribute("siteName", currentSiteName); 222 // Restore context 223 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 224 } 225 } 226 227 private void _indexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException 228 { 229 getLogger().info("Indexing page: {} in workspace '{}'", page, workspaceName); 230 231 SolrInputDocument document = new SolrInputDocument(); 232 233 try 234 { 235 // Prepare the solr input document by adding fields. 236 _populatePageDocument(page, document); 237 238 // Set the additional properties in the document. 239 _populateAdditionalProperties(page, document); 240 241 // Indexation of ACL initial values 242 _solrIndexer.indexAclInitValues(page, document); 243 244 // Indexation of the document 245 _indexPageDocument(page, document, workspaceName, solrClient); 246 247 // Index page attachments documents 248 if (indexAttachments) 249 { 250 _indexPageAttachments(page.getRootAttachments(), page, solrClient); 251 } 252 } 253 catch (Exception e) 254 { 255 String error = String.format("Failed to index page %s in workspace %s", page.getId(), workspaceName); 256 getLogger().error(error, e); 257 throw new IndexingException(error, e); 258 } 259 260 if (indexRecursively) 261 { 262 for (Page child : page.getChildrenPages()) 263 { 264 // FIXME index child pages if (and only if) not indexed... see original source. 265// indexPage(child, false, indexRecursively); 266// indexPage(child, false); 267 _indexPage(child, workspaceName, indexRecursively, indexAttachments, solrClient); 268 } 269 } 270 } 271 272 /** 273 * Populate the solr input document by adding fields to index. 274 * @param page the page to index. 275 * @param document the solr input document 276 * @throws Exception if something goes wrong when processing the indexation of the page 277 */ 278 protected void _populatePageDocument(Page page, SolrInputDocument document) throws Exception 279 { 280 Sitemap sitemap = page.getSitemap(); 281 String sitemapName = sitemap.getName(); 282 Site site = page.getSite(); 283 String siteName = site.getName(); 284 String pageId = page.getId(); 285 String pageTitle = page.getTitle(); 286 String pageLongTitle = page.getLongTitle(); 287 String language = sitemapName; 288 289 // Page id and type 290 document.addField(SolrFieldNames.ID, pageId); 291 document.addField(SolrFieldNames.DOCUMENT_TYPE, SolrWebFieldNames.TYPE_PAGE); 292 293 // Fulltext 294 SolrContentIndexer.indexFulltextValue(document, pageTitle, language); 295 if (!pageTitle.equals(pageLongTitle)) 296 { 297 SolrContentIndexer.indexFulltextValue(document, pageLongTitle, language); 298 } 299 300 // Page title 301 _indexStringFields(document, pageId, PAGE_TITLE, pageTitle, language); 302 // Page long title 303 _indexStringFields(document, pageId, PAGE_LONG_TITLE, pageLongTitle, language); 304 // Title for sorting 305 document.addField(TITLE_SORT, pageTitle); 306 307 document.addField(TEMPLATE, page.getTemplate()); 308 document.addField(PAGE_TYPE, page.getType().name()); 309 document.addField(PAGE_DEPTH, page.getDepth()); 310 311 // Contents (page title shoud be indexed before because the main content can override it). 312 _populatePageContentsDocument(page, document); 313 314 // Parent of the page 315 AmetysObject parent = page.getParent(); 316 if (parent != null) 317 { 318 document.addField(PAGE_PARENT_ID, parent.getId()); 319 } 320 321 // Ancestors of the page 322 List<String> ancestorIds = new ArrayList<>(); 323 while (parent instanceof Page) 324 { 325 ancestorIds.add(parent.getId()); 326 parent = parent.getParent(); 327 } 328 document.addField(PAGE_ANCESTOR_IDS, ancestorIds); 329 330 document.addField(SITE_NAME, siteName); 331 document.addField(SITEMAP_NAME, sitemapName); 332 document.addField(SITE_TYPE, site.getType()); 333 334 // Page tags (strict and tags including ancestor pages). 335 document.addField(SolrFieldNames.TAGS, page.getTags()); 336 document.addField(SolrFieldNames.ALL_TAGS, _getTagsWithAncestors(page)); 337 338 _populateDatesOfPage(page, document); 339 340 // Attachments 341 _solrResourceIndexer.indexResourceCollection(page.getRootAttachments(), document, language); 342 Optional.ofNullable(page.getRootAttachments()) 343 .map(AmetysObject::getId) 344 .ifPresent(id -> document.addField(PAGE_OUTGOING_REFEERENCES_RESOURCE_IDS, id)); 345 _indexVisibleAttachments(page, document); 346 } 347 348 private void _indexVisibleAttachments(Page page, SolrInputDocument document) 349 { 350 Collection<String> values = _pageVisibleAttachmentIndexerEP.getExtensionsIds() 351 .stream() 352 .map(_pageVisibleAttachmentIndexerEP::getExtension) 353 .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(page)) 354 .flatMap(Collection::stream) 355 .collect(Collectors.toList()); 356 document.addField(PAGE_VISIBLE_ATTACHMENT_RESOURCE_IDS, values); 357 } 358 359 /** 360 * Populate the solr input document with dates from the page 361 * @param page The page 362 * @param document The Solr document 363 */ 364 protected void _populateDatesOfPage(Page page, SolrInputDocument document) 365 { 366 // Page last modification date 367 Date lastModified = _getLastModificationDate(page); 368 if (lastModified != null) 369 { 370 String lastModifiedStr = SolrIndexer.dateFormat().format(lastModified); 371 // For 'new' search service 372 document.addField(LastModifiedSearchField.NAME, lastModifiedStr); 373 // For 'old' search service 374 document.addField(LAST_MODIFIED + "_dt", lastModifiedStr); 375 } 376 377 // Page last validation date 378 Date lastValidation = _getLastValidationDate(page); 379 if (lastValidation != null) 380 { 381 String lastValidationStr = SolrIndexer.dateFormat().format(lastValidation); 382 // For 'new' search service 383 document.addField(LastValidationSearchField.NAME, lastValidationStr); 384 if (!LAST_VALIDATION.equals(LastValidationSearchField.NAME)) 385 { 386 // For 'old' search service 387 document.addField(LAST_VALIDATION, lastValidationStr); 388 } 389 } 390 391 // Page first validation date 392 Date firstValidation = _getFirstValidationDate(page); 393 if (firstValidation != null) 394 { 395 String firstValidationStr = SolrIndexer.dateFormat().format(firstValidation); 396 // For 'new' search service 397 document.addField(FirstValidationSearchField.NAME, firstValidationStr); 398 } 399 400 // Page last major validation date 401 Date lastMajorValidation = _getLastMajorValidationDate(page); 402 if (lastMajorValidation != null) 403 { 404 String lastMajorValidationStr = SolrIndexer.dateFormat().format(lastMajorValidation); 405 // For 'new' search service 406 document.addField(LastMajorValidationSearchField.NAME, lastMajorValidationStr); 407 } 408 409 // date for sorting 410 SolrInputField dateField = document.getField(DATE_FOR_SORTING); 411 if (dateField == null) 412 { 413 Collection<Object> oDateValues = document.getFieldValues(CONTENT_INTERESTING_DATES); 414 if (oDateValues != null && !oDateValues.isEmpty()) 415 { 416 document.setField(DATE_FOR_SORTING, oDateValues.iterator().next()); 417 } 418 } 419 } 420 421 private void _indexStringFields(SolrInputDocument document, String documentId, String fieldName, String fieldValue, String language) 422 { 423 String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(fieldValue, getLogger(), documentId, fieldName); 424 425 document.addField(fieldName, possiblyTruncatedValue); 426 document.addField(fieldName + "_txt_" + language, fieldValue); 427 document.addField(fieldName + "_txt_stemmed_" + language, fieldValue); 428 document.addField(fieldName + "_txt_ws_" + language, fieldValue); 429 430 document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase()); 431 document.addField(fieldName + "_s_ws", fieldValue.toLowerCase()); 432 document.addField(fieldName + "_txt", fieldValue); 433 } 434 /** 435 * Get all the page tags with their ancestors. 436 * @param page The page. 437 * @return All the page tags with their ancestors. 438 */ 439 protected Set<String> _getTagsWithAncestors(Page page) 440 { 441 Set<String> allTags = new HashSet<>(page.getTags()); 442 443 Map<String, Object> tagParams = Collections.singletonMap("siteName", page.getSiteName()); 444 445 for (String tagName : page.getTags()) 446 { 447 allTags.add(tagName); 448 449 // Get the ancestor tags 450 Tag tag = _tagProviderEP.getTag(tagName, tagParams); 451 for (Tag ancestor : TagHelper.getAncestors(tag, false)) 452 { 453 allTags.add(ancestor.getName()); 454 } 455 } 456 457 return allTags; 458 } 459 460 /** 461 * Index the content of the page.<p> 462 * @param page the page to index. 463 * @param document the document to populate. 464 * @throws Exception if an error occurs. 465 */ 466 protected void _populatePageContentsDocument(Page page, SolrInputDocument document) throws Exception 467 { 468 if (page.getType() == PageType.CONTAINER) 469 { 470 for (Zone zone : page.getZones()) 471 { 472 for (ZoneItem zoneItem : zone.getZoneItems()) 473 { 474 if (zoneItem.getType() == ZoneType.CONTENT) 475 { 476 try 477 { 478 Content content = zoneItem.getContent(); 479 document.addField(CONTENT_IDS, content.getId()); 480 481 for (String cType : content.getTypes()) 482 { 483 document.addField(PAGE_CONTENT_TYPES, cType); 484 document.addField(PAGE_CONTENT_TYPES + "_s_dv", cType); // facets 485 } 486 487 _indexFacetableField(content, document); 488 } 489 catch (AmetysRepositoryException e) 490 { 491 getLogger().error("Failed to index content referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 492 } 493 } 494 else if (zoneItem.getType() == ZoneType.SERVICE) 495 { 496 try 497 { 498 String serviceId = zoneItem.getServiceId(); 499 document.addField(SERVICE_IDS, serviceId); 500 501 Service service = _serviceExtensionPoint.getExtension(serviceId); 502 if (service == null) 503 { 504 getLogger().error("The service id '{}' does not exist. It is referenced in the page {}/{}/{} ({} in zoneitem {})", serviceId, page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId()); 505 } 506 else 507 { 508 service.index(zoneItem, document); 509 } 510 } 511 catch (AmetysRepositoryException e) 512 { 513 getLogger().error("Failed to index service referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 514 } 515 516 } 517 } 518 } 519 } 520 } 521 522 /** 523 * Index the facetable fields of a content into the page solr document 524 * @param content The content 525 * @param document The main page solr document. 526 */ 527 protected void _indexFacetableField(Content content, SolrInputDocument document) 528 { 529 IndexingModel indexingModel = null; 530 try 531 { 532 indexingModel = _cTypesHelper.getIndexingModel(content); 533 } 534 catch (RuntimeException e) 535 { 536 getLogger().error("indexContent > Error getting the indexing model of content " + content.getId(), e); 537 throw e; 538 } 539 540 for (IndexingField field : indexingModel.getFields()) 541 { 542 if (field instanceof MetadataIndexingField) 543 { 544 String metadataPath = ((MetadataIndexingField) field).getMetadataPath(); 545 String[] pathSegments = metadataPath.split(ContentConstants.METADATA_PATH_SEPARATOR); 546 547 MetadataDefinition definition = _cTypesHelper.getMetadataDefinition(pathSegments[0], content.getTypes(), content.getMixinTypes()); 548 if (definition != null) 549 { 550 _findAndIndexFacetableField(pathSegments, content.getLanguage(), content.getMetadataHolder(), definition, field, document); 551 } 552 } 553 } 554 } 555 556 /** 557 * Index the facetable fields of a content into the page solr document 558 * @param pathSegments The path of metadata 559 * @param lang The language 560 * @param metadata The parent composite metadata 561 * @param definition The metadata definition 562 * @param field The indexing field 563 * @param pageDocument The Solr page document 564 */ 565 protected void _findAndIndexFacetableField(String[] pathSegments, String lang, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, SolrInputDocument pageDocument) 566 { 567 String currentFieldName = pathSegments[0]; 568 569 if (!metadata.hasMetadata(currentFieldName)) 570 { 571 // Nothing to do 572 return; 573 } 574 575 switch (definition.getType()) 576 { 577 case STRING: 578 if (definition.getEnumerator() != null) 579 { 580 String[] strValues = metadata.getStringArray(currentFieldName, new String[0]); 581 for (String value : strValues) 582 { 583 pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + currentFieldName + "_s_dv", value); 584 } 585 } 586 break; 587 case CONTENT: 588 String[] contentIds = metadata.getStringArray(currentFieldName, new String[0]); 589 for (String contentId : contentIds) 590 { 591 pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + currentFieldName + "_s_dv", contentId); 592 } 593 break; 594 case COMPOSITE: 595 if (pathSegments.length > 1) 596 { 597 String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length); 598 599 CompositeMetadata composite = metadata.getCompositeMetadata(currentFieldName); 600 if (definition instanceof RepeaterDefinition) 601 { 602 String[] entries = composite.getMetadataNames(); 603 for (String entry : entries) 604 { 605 _findAndIndexFacetableField(followingSegments, lang, composite.getCompositeMetadata(entry), definition.getMetadataDefinition(followingSegments[0]), field, pageDocument); 606 } 607 } 608 else 609 { 610 _findAndIndexFacetableField(followingSegments, lang, composite, definition.getMetadataDefinition(followingSegments[0]), field, pageDocument); 611 } 612 } 613 614 break; 615 default: 616 break; 617 618 } 619 } 620 621 /** 622 * Computes the last modification date of a page. 623 * @param page the page. 624 * @return the last modification date or <code>null</code>. 625 */ 626 protected Date _getLastModificationDate(Page page) 627 { 628 return _getLastDate(page, Content::getLastModified); 629 } 630 /** 631 * Computes the first validation date of a page. 632 * @param page the page. 633 * @return the first validation date or <code>null</code>. 634 */ 635 protected Date _getFirstValidationDate(Page page) 636 { 637 return _getFirstDate(page, Content::getFirstValidationDate); 638 } 639 640 /** 641 * Computes the last validation date of a page. 642 * @param page the page. 643 * @return the last validation date or <code>null</code>. 644 */ 645 protected Date _getLastValidationDate(Page page) 646 { 647 return _getLastDate(page, Content::getLastValidationDate); 648 } 649 650 /** 651 * Computes the last major validation date of a page. 652 * @param page the page. 653 * @return the last major validation date or <code>null</code>. 654 */ 655 protected Date _getLastMajorValidationDate(Page page) 656 { 657 return _getLastDate(page, Content::getLastMajorValidationDate); 658 } 659 660 /** 661 * Computes a "last date" of a page, using the simple and naive following algorithm: 662 * <br>From all the dates from each of its contents, keep the greatest of them. 663 * @param page the page. 664 * @param dateRetriever The function to retrieve a Date from a Content of the Page 665 * @return the "last date" or <code>null</code>. 666 */ 667 protected Date _getLastDate(Page page, Function<Content, Date> dateRetriever) 668 { 669 Date last = null; 670 671 if (page.getType() == PageType.CONTAINER) 672 { 673 for (Zone zone : page.getZones()) 674 { 675 for (ZoneItem zoneItem : zone.getZoneItems()) 676 { 677 switch (zoneItem.getType()) 678 { 679 case SERVICE: 680 // A service has no last date 681 break; 682 case CONTENT: 683 try 684 { 685 Date contentLast = dateRetriever.apply(zoneItem.getContent()); 686 687 if (contentLast != null && (last == null || contentLast.after(last))) 688 { 689 // Keep the latest date 690 last = contentLast; 691 } 692 } 693 catch (AmetysRepositoryException e) 694 { 695 getLogger().error("Failed to index last date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 696 } 697 break; 698 default: 699 break; 700 } 701 } 702 } 703 } 704 705 return last; 706 } 707 708 /** 709 * Computes a "first date" of a page, using the simple and naive following algorithm: 710 * <br>From all the dates from each of its contents, keep the lowest of them. 711 * @param page the page. 712 * @param dateRetriever The function to retrieve a Date from a Content of the Page 713 * @return the "first date" or <code>null</code>. 714 */ 715 protected Date _getFirstDate(Page page, Function<Content, Date> dateRetriever) 716 { 717 Date first = null; 718 719 if (page.getType() == PageType.CONTAINER) 720 { 721 for (Zone zone : page.getZones()) 722 { 723 for (ZoneItem zoneItem : zone.getZoneItems()) 724 { 725 switch (zoneItem.getType()) 726 { 727 case SERVICE: 728 // A service has no first date 729 break; 730 case CONTENT: 731 try 732 { 733 Date contentFirst = dateRetriever.apply(zoneItem.getContent()); 734 735 if (contentFirst != null && (first == null || contentFirst.before(first))) 736 { 737 // Keep the lowest date 738 first = contentFirst; 739 } 740 } 741 catch (AmetysRepositoryException e) 742 { 743 getLogger().error("Failed to index first date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e); 744 } 745 break; 746 default: 747 break; 748 } 749 } 750 } 751 } 752 753 return first; 754 } 755 756 /** 757 * Populate the solr input document by adding fields to index. 758 * @param page the page to index. 759 * @param document the solr input document 760 * @throws Exception if something goes wrong when processing the indexation of the page 761 */ 762 protected void _populateAdditionalProperties(Page page, SolrInputDocument document) throws Exception 763 { 764 Collection<AdditionalPropertyIndexer> indexers = _additionalPropertiesIndexerEP.getIndexers("page"); 765 for (AdditionalPropertyIndexer indexer : indexers) 766 { 767 indexer.index(page, document); 768 } 769 } 770 771 /** 772 * Index page attachments as new entries in the index. 773 * @param collection the collection of attachments 774 * @param page the page whose attachments will be indexed 775 * @throws Exception if something goes wrong when indexing the attachments of the page 776 */ 777 public void indexPageAttachments(ResourceCollection collection, Page page) throws Exception 778 { 779 Request request = ContextHelper.getRequest(_context); 780 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 781 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 782 _indexPageAttachments(collection, page, solrClient); 783 } 784 785 private void _indexPageAttachments(ResourceCollection collection, Page page, SolrClient solrClient) throws Exception 786 { 787 if (collection == null) 788 { 789 return; 790 } 791 792 for (AmetysObject object : collection.getChildren()) 793 { 794 if (object instanceof ResourceCollection) 795 { 796 _indexPageAttachments((ResourceCollection) object, page, solrClient); 797 } 798 else if (object instanceof Resource) 799 { 800 Resource resource = (Resource) object; 801 _indexPageAttachment(resource, page, solrClient); 802 } 803 } 804 } 805 806 /** 807 * Index a page attachment 808 * @param resource the page attachment as a {@link Resource} 809 * @param page the page whose attachment is going to be indexed 810 * @throws Exception if something goes wrong when processing the indexation of the page attachment 811 */ 812 public void indexPageAttachment(Resource resource, Page page) throws Exception 813 { 814 Request request = ContextHelper.getRequest(_context); 815 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 816 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 817 _indexPageAttachment(resource, page, solrClient); 818 } 819 820 private void _indexPageAttachment(Resource resource, Page page, SolrClient solrClient) throws Exception 821 { 822 SolrInputDocument document = new SolrInputDocument(); 823 824 // Prepare resource doc 825 _populatePageAttachmentDocument(resource, document, page); 826 827 // Indexation of the document 828 _indexResourceDocument(resource, document, solrClient); 829 } 830 831 private void _populatePageAttachmentDocument(Resource resource, SolrInputDocument document, Page page) throws Exception 832 { 833 String language = page.getSitemapName(); 834 835 _solrResourceIndexer.indexResource(resource, document, TYPE_PAGE_RESOURCE, language); 836 837 Site site = page.getSite(); 838 // site name - Store.YES, Index.NOT_ANALYZED 839 document.addField(SolrWebFieldNames.SITE_NAME, site.getName()); 840 841 // site type - Store.YES, Index.NOT_ANALYZED 842 document.addField(SolrWebFieldNames.SITE_TYPE, site.getType()); 843 844 // Added for Solr. 845 // Page site map name - Store.YES, Index.NOT_ANALYZED 846 document.addField(SITEMAP_NAME, page.getSitemapName()); 847 848 // Need the id of the page for unindexing attachment during the unindexing of the page 849 document.addField(ATTACHMENT_PAGE_ID, page.getId()); 850 } 851 852 /** 853 * Index a populated solr input document of type Page. 854 * @param page the page from which the input document is created 855 * @param document the input document to add to the solr index 856 * @param workspaceName The workspace name 857 * @param solrClient The solr client to use 858 * @throws SolrServerException if there is an error on the Solr server 859 * @throws IOException if there is a communication error with the server 860 */ 861 protected void _indexPageDocument(Page page, SolrInputDocument document, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException 862 { 863 // Retrieve appropriate solr client 864 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 865 866 // Add document 867 UpdateResponse solrResponse = solrClient.add(collectionName, document); 868 int status = solrResponse.getStatus(); 869 870 if (status != 0) 871 { 872 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + page.getId()); 873 } 874 875 getLogger().debug("Successful page indexing. Page identifier : {}", page.getId()); 876 } 877 878 /** 879 * Index a populated solr input document of type Resource. 880 * @param resource the resource from which the input document is created 881 * @param document the input document 882 * @param solrClient The solr client to use 883 * @throws SolrServerException if there is an error on the server 884 * @throws IOException if there is a communication error with the server 885 */ 886 protected void _indexResourceDocument(Resource resource, SolrInputDocument document, SolrClient solrClient) throws SolrServerException, IOException 887 { 888 // Retrieve appropriate solr client 889 Request request = ContextHelper.getRequest(_context); 890 String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 891 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 892 893 // Add document 894 UpdateResponse solrResponse = solrClient.add(collectionName, document); 895 int status = solrResponse.getStatus(); 896 897 if (status != 0) 898 { 899 throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Resource id : " + resource.getId()); 900 } 901 902 getLogger().debug("Successful resource indexing. Resource identifier : {}", resource.getId()); 903 } 904 905 /////////////////////////////////////////////////////////////////////////// 906 907 /** 908 * Un-index a page by its ID for all workspaces and commit 909 * @param pageId The page ID. 910 * @param unindexRecursively also unindex child pages if requested. 911 * @param unindexAttachments also unindex page attachments 912 * @throws Exception if an error occurs during index update. 913 */ 914 public void unindexPage(String pageId, boolean unindexRecursively, boolean unindexAttachments) throws Exception 915 { 916 unindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, unindexRecursively, unindexAttachments); 917 unindexPage(pageId, WebConstants.LIVE_WORKSPACE, unindexRecursively, unindexAttachments); 918 } 919 920 /** 921 * De-index a page (and optionally its children pages). 922 * @param pageId the page to be de-indexed. 923 * @param workspaceName The workspace where to work in 924 * @param unindexRecursively also unindex child pages if requested. 925 * @param unindexAttachments also unindex page attachments 926 * @throws Exception if an error occurs during index update. 927 */ 928 public void unindexPage(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws Exception 929 { 930 Request request = ContextHelper.getRequest(_context); 931 932 // Retrieve the current workspace. 933 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 934 // Retrieve the current site name. 935 String currentSiteName = (String) request.getAttribute("siteName"); 936 937 try 938 { 939 // Force the workspace. 940 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 941 942 getLogger().debug("Unindexing page: {}", pageId); 943 944 _unindexPageDocument(pageId, workspaceName, unindexRecursively, unindexAttachments); 945 } 946 catch (Exception e) 947 { 948 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 949 getLogger().error(error, e); 950 throw new IndexingException(error, e); 951 } 952 finally 953 { 954 // Restore the site name. 955 request.setAttribute("siteName", currentSiteName); 956 // Restore context 957 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 958 } 959 } 960 961 /** 962 * Deindex a document of type Page. Also deindex attachments of a page 963 * @param pageId the id of the page to deindex 964 * @param workspaceName The workspace name 965 * @param unindexRecursively also unindex child pages if requested. 966 * @param unindexAttachments also unindex page attachments 967 * @throws SolrServerException if there is an error on the server 968 * @throws IOException if there is a communication error with the server 969 * @throws QuerySyntaxException if the uri query can't be built because of a syntax error. 970 */ 971 protected void _unindexPageDocument(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws SolrServerException, IOException, QuerySyntaxException 972 { 973 // Retrieve appropriate solr client 974 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 975 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 976 977 getLogger().info("Unindexing page {} in workspace '{}'", pageId, workspaceName); 978 979 Query pages = new AndQuery(new DocumentTypeQuery(TYPE_PAGE), new PageQuery(pageId, unindexRecursively)); 980 Query query; 981 if (unindexRecursively && unindexAttachments) 982 { 983 // {!ametys join=pageId q=page-ancestorIds:"page://xxxx"} 984 Query joinQuery = new JoinQuery(() -> PAGE_ANCESTOR_IDS + ":\"" + pageId + "\"", ATTACHMENT_PAGE_ID); 985 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new OrQuery(new PageAttachmentQuery(pageId), joinQuery)); 986 query = new OrQuery(attachments, pages); 987 } 988 else if (unindexAttachments) 989 { 990 Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new PageAttachmentQuery(pageId)); 991 query = new OrQuery(attachments, pages); 992 } 993 else 994 { 995 query = pages; 996 } 997 998 // Delete by query 999 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query.build()); 1000 int status = solrResponse.getStatus(); 1001 1002 if (status != 0) 1003 { 1004 throw new IOException("Ametys Page de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + pageId); 1005 } 1006 1007 getLogger().debug("Successful page de-indexing{}. Page identifier : {}", unindexRecursively ? " with its children" : "", pageId); 1008 } 1009 1010 /////////////////////////////////////////////////////////////////////////// 1011 1012 /** 1013 * Reindex a page by its ID for all workspaces and commit 1014 * @param pageId The page ID. 1015 * @param reindexRecursively also reindex child pages if requested. 1016 * @param reindexAttachments also reindex page attachments 1017 * @throws Exception if an error occurs during index update. 1018 */ 1019 public void reindexPage(String pageId, boolean reindexRecursively, boolean reindexAttachments) throws Exception 1020 { 1021 reindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, reindexRecursively, reindexAttachments); 1022 reindexPage(pageId, WebConstants.LIVE_WORKSPACE, reindexRecursively, reindexAttachments); 1023 } 1024 1025 1026 /** 1027 * Reindex a page by its ID. 1028 * @param pageId The page ID. 1029 * @param workspaceName The workspace where to work in 1030 * @param reindexRecursively also reindex child pages if requested. 1031 * @param reindexAttachments also reindex page attachments 1032 * @throws IndexingException if an error occurs during index update. 1033 */ 1034 public void reindexPage(String pageId, String workspaceName, boolean reindexRecursively, boolean reindexAttachments) throws IndexingException 1035 { 1036 Request request = ContextHelper.getRequest(_context); 1037 1038 // Retrieve the current workspace. 1039 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 1040 // Retrieve the current site name. 1041 String currentSiteName = (String) request.getAttribute("siteName"); 1042 1043 try 1044 { 1045 // Force the workspace. 1046 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 1047 1048 getLogger().debug("Reindexing page: {}", pageId); 1049 1050 if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist 1051 { 1052 Page page = _ametysObjectResolver.resolveById(pageId); 1053 _unindexPageDocument(pageId, workspaceName, reindexRecursively, reindexAttachments); 1054 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 1055 _indexPage(page, workspaceName, reindexRecursively, reindexAttachments, solrClient); 1056 } 1057 } 1058 catch (AmetysRepositoryException | QuerySyntaxException | SolrServerException | IOException e) 1059 { 1060 String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName); 1061 getLogger().error(error, e); 1062 throw new IndexingException(error, e); 1063 } 1064 finally 1065 { 1066 // Restore the site name. 1067 request.setAttribute("siteName", currentSiteName); 1068 // Restore context 1069 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 1070 } 1071 } 1072}