Source code

001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.indexing.solr;
017
018import java.io.IOException;
019import java.time.ZoneOffset;
020import java.time.ZonedDateTime;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.HashSet;
025import java.util.List;
026import java.util.Locale;
027import java.util.Map;
028import java.util.Optional;
029import java.util.Set;
030import java.util.function.Function;
031import java.util.stream.Collectors;
032
033import org.apache.avalon.framework.component.Component;
034import org.apache.avalon.framework.context.Context;
035import org.apache.avalon.framework.context.ContextException;
036import org.apache.avalon.framework.context.Contextualizable;
037import org.apache.avalon.framework.service.ServiceException;
038import org.apache.avalon.framework.service.ServiceManager;
039import org.apache.avalon.framework.service.Serviceable;
040import org.apache.cocoon.components.ContextHelper;
041import org.apache.cocoon.environment.Request;
042import org.apache.commons.lang3.ArrayUtils;
043import org.apache.solr.client.solrj.SolrClient;
044import org.apache.solr.client.solrj.SolrServerException;
045import org.apache.solr.client.solrj.response.UpdateResponse;
046import org.apache.solr.common.SolrInputDocument;
047import org.apache.solr.common.SolrInputField;
048
049import org.ametys.cms.content.indexing.solr.SolrFieldNames;
050import org.ametys.cms.content.indexing.solr.SolrIndexer;
051import org.ametys.cms.content.indexing.solr.SolrResourceIndexer;
052import org.ametys.cms.contenttype.ContentTypesHelper;
053import org.ametys.cms.data.ContentValue;
054import org.ametys.cms.data.type.indexing.IndexableDataContext;
055import org.ametys.cms.data.type.indexing.IndexableElementType;
056import org.ametys.cms.data.type.indexing.IndexableElementTypeHelper;
057import org.ametys.cms.indexing.IndexingException;
058import org.ametys.cms.indexing.solr.AdditionalPropertyIndexer;
059import org.ametys.cms.indexing.solr.AdditionalPropertyIndexerExtensionPoint;
060import org.ametys.cms.model.properties.ElementRefProperty;
061import org.ametys.cms.model.properties.Property;
062import org.ametys.cms.repository.Content;
063import org.ametys.cms.search.query.AndQuery;
064import org.ametys.cms.search.query.DocumentTypeQuery;
065import org.ametys.cms.search.query.JoinQuery;
066import org.ametys.cms.search.query.OrQuery;
067import org.ametys.cms.search.query.Query;
068import org.ametys.cms.search.query.QuerySyntaxException;
069import org.ametys.cms.search.solr.SolrClientProvider;
070import org.ametys.cms.search.solr.field.FirstValidationSearchField;
071import org.ametys.cms.search.solr.field.LastMajorValidationSearchField;
072import org.ametys.cms.search.solr.field.LastModifiedSearchField;
073import org.ametys.cms.search.solr.field.LastValidationSearchField;
074import org.ametys.cms.tag.Tag;
075import org.ametys.cms.tag.TagHelper;
076import org.ametys.cms.tag.TagProviderExtensionPoint;
077import org.ametys.core.util.DateUtils;
078import org.ametys.plugins.explorer.resources.Resource;
079import org.ametys.plugins.explorer.resources.ResourceCollection;
080import org.ametys.plugins.repository.AmetysObject;
081import org.ametys.plugins.repository.AmetysObjectIterable;
082import org.ametys.plugins.repository.AmetysObjectResolver;
083import org.ametys.plugins.repository.AmetysRepositoryException;
084import org.ametys.plugins.repository.RepositoryConstants;
085import org.ametys.plugins.repository.data.holder.ModelAwareDataHolder;
086import org.ametys.plugins.repository.data.holder.group.ModelAwareComposite;
087import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeater;
088import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeaterEntry;
089import org.ametys.plugins.repository.model.CompositeDefinition;
090import org.ametys.plugins.repository.model.RepeaterDefinition;
091import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector;
092import org.ametys.runtime.model.ElementDefinition;
093import org.ametys.runtime.model.ModelItem;
094import org.ametys.runtime.model.type.DataContext;
095import org.ametys.runtime.model.type.ElementType;
096import org.ametys.runtime.model.type.ModelItemTypeConstants;
097import org.ametys.runtime.plugin.component.AbstractLogEnabled;
098import org.ametys.web.WebConstants;
099import org.ametys.web.indexing.solr.page.attachment.PageVisibleAttachmentIndexerExtensionPoint;
100import org.ametys.web.repository.page.Page;
101import org.ametys.web.repository.page.Page.PageType;
102import org.ametys.web.repository.page.Zone;
103import org.ametys.web.repository.page.ZoneItem;
104import org.ametys.web.repository.page.ZoneItem.ZoneType;
105import org.ametys.web.repository.site.Site;
106import org.ametys.web.repository.sitemap.Sitemap;
107import org.ametys.web.search.query.PageAttachmentQuery;
108import org.ametys.web.search.query.PageQuery;
109import org.ametys.web.service.Service;
110import org.ametys.web.service.ServiceExtensionPoint;
111
112/**
113 * Component responsible for indexing a page with all its contents.
114 */
115public class SolrPageIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrWebFieldNames, Contextualizable
116{
117    /** The avalon role. */
118    public static final String ROLE = SolrPageIndexer.class.getName();
119    
120    /** The Solr client provider */
121    protected SolrClientProvider _solrClientProvider;
122    /** The Solr indexer */
123    protected SolrIndexer _solrIndexer;
124    /** Solr Ametys resources indexer */
125    protected SolrResourceIndexer _solrResourceIndexer;
126    /** The extension point for PageVisibleAttachmentIndexers */
127    protected PageVisibleAttachmentIndexerExtensionPoint _pageVisibleAttachmentIndexerEP;
128    /** The additional property indexer extension point. */
129    protected AdditionalPropertyIndexerExtensionPoint _additionalPropertiesIndexerEP;
130    /** The tag provider extension point. */
131    protected TagProviderExtensionPoint _tagProviderEP;
132    
133    /** The service extension point. */
134    protected ServiceExtensionPoint _serviceExtensionPoint;
135    /** The Ametys object resolver*/
136    protected AmetysObjectResolver _ametysObjectResolver;
137    /** The avalon context */
138    protected Context _context;
139
140    private ContentTypesHelper _cTypesHelper;
141    
142    @Override
143    public void service(ServiceManager manager) throws ServiceException
144    {
145        _ametysObjectResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE);
146        _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE);
147        _solrResourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE);
148        _pageVisibleAttachmentIndexerEP = (PageVisibleAttachmentIndexerExtensionPoint) manager.lookup(PageVisibleAttachmentIndexerExtensionPoint.ROLE);
149        _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE);
150        _serviceExtensionPoint = (ServiceExtensionPoint) manager.lookup(ServiceExtensionPoint.ROLE);
151        _additionalPropertiesIndexerEP = (AdditionalPropertyIndexerExtensionPoint) manager.lookup(AdditionalPropertyIndexerExtensionPoint.ROLE);
152        _tagProviderEP = (TagProviderExtensionPoint) manager.lookup(TagProviderExtensionPoint.ROLE);
153        _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE);
154    }
155    
156    public void contextualize(Context context) throws ContextException
157    {
158        _context = context;
159    }
160    
161    /**
162     * Index a page and eventually its children, recursively, in all workspaces and commit<br>
163     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
164     * @param pageId the page to be indexed.
165     * @param indexRecursively to also process children pages.
166     * @param indexAttachments to index page attachments
167     * @throws Exception if an error occurs during indexation.
168     */
169    public void indexPage(String pageId, boolean indexRecursively, boolean indexAttachments) throws Exception
170    {
171        indexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, indexRecursively, indexAttachments);
172        indexPage(pageId, WebConstants.LIVE_WORKSPACE, indexRecursively, indexAttachments);
173    }
174    
175    /**
176     * Index a page and eventually its children, recursively.<br>
177     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
178     * @param pageId the page to be indexed.
179     * @param workspaceName the workspace where to index
180     * @param indexRecursively to also process children pages.
181     * @param indexAttachments to index page attachments
182     * @throws IndexingException if an error occurs during indexation.
183     */
184    public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments) throws IndexingException
185    {
186        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true);
187        indexPage(pageId, workspaceName, indexRecursively, indexAttachments, solrClient);
188    }
189    
190    /**
191     * Index a page and eventually its children, recursively.<br>
192     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
193     * @param pageId the page to be indexed.
194     * @param workspaceName the workspace where to index
195     * @param indexRecursively to also process children pages.
196     * @param indexAttachments to index page attachments
197     * @param solrClient The solr client to use
198     * @throws IndexingException if an error occurs during indexation.
199     */
200    public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException
201    {
202        Request request = ContextHelper.getRequest(_context);
203        
204        // Retrieve the current workspace.
205        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
206        // Retrieve the current site name.
207        String currentSiteName = (String) request.getAttribute("siteName");
208        
209        try
210        {
211            // Force the workspace.
212            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
213    
214            getLogger().debug("Indexing page: {}", pageId);
215            
216            if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist
217            {
218                Page page = _ametysObjectResolver.resolveById(pageId);
219                _indexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient);
220            }
221        }
222        catch (AmetysRepositoryException e)
223        {
224            String error = String.format("Failed to index page %s in workspace %s", pageId, workspaceName);
225            getLogger().error(error, e);
226            throw new IndexingException(error, e);
227        }
228        finally
229        {
230            // Restore the site name.
231            request.setAttribute("siteName", currentSiteName);
232            // Restore context
233            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
234        }
235    }
236    
237    private void _indexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException
238    {
239        getLogger().info("Indexing page: {} in workspace '{}'", page, workspaceName);
240        
241        SolrInputDocument document = new SolrInputDocument();
242        
243        try
244        {
245            // Prepare the solr input document by adding fields.
246            _populatePageDocument(page, document);
247            
248            // Set the additional properties in the document.
249            _populateAdditionalProperties(page, document);
250            
251            // Indexation of ACL initial values
252            _solrIndexer.indexAclInitValues(page, document);
253            
254            // Indexation of the document
255            _indexPageDocument(page, document, workspaceName, solrClient);
256            
257            // Index page attachments documents
258            if (indexAttachments)
259            {
260                _indexPageAttachments(page.getRootAttachments(), page, solrClient);
261            }
262        }
263        catch (Exception e)
264        {
265            String error = String.format("Failed to index page %s in workspace %s", page.getId(), workspaceName);
266            getLogger().error(error, e);
267            throw new IndexingException(error, e);
268        }
269        
270        if (indexRecursively)
271        {
272            AmetysObjectIterable<? extends Page> children = page.getChildrenPages();
273            for (Page child : children)
274            {
275                // FIXME index child pages if (and only if) not indexed... see original source.
276//                indexPage(child, false, indexRecursively);
277//                indexPage(child, false);
278                _indexPage(child, workspaceName, indexRecursively, indexAttachments, solrClient);
279            }
280        }
281    }
282    
283    /**
284     * Populate the solr input document by adding fields to index.
285     * @param page the page to index.
286     * @param document the solr input document
287     * @throws Exception if something goes wrong when processing the indexation of the page
288     */
289    protected void _populatePageDocument(Page page, SolrInputDocument document) throws Exception
290    {
291        Sitemap sitemap = page.getSitemap();
292        String sitemapName = sitemap.getName();
293        Site site = page.getSite();
294        String siteName = site.getName();
295        String pageId = page.getId();
296        String pageTitle = page.getTitle();
297        String pageLongTitle = page.getLongTitle();
298        String language = sitemapName;
299        
300        // Page id and type
301        document.addField(SolrFieldNames.ID, pageId);
302        document.addField(SolrFieldNames.DOCUMENT_TYPE, SolrWebFieldNames.TYPE_PAGE);
303        
304        // Fulltext
305        IndexableDataContext context = IndexableDataContext.newInstance()
306                                         .withLocale(new Locale(language));
307        IndexableElementTypeHelper.indexFulltextValue(document, pageTitle, context);
308        if (!pageTitle.equals(pageLongTitle))
309        {
310            IndexableElementTypeHelper.indexFulltextValue(document, pageLongTitle, context);
311        }
312        
313        // Page title
314        _indexStringFields(document, pageId, PAGE_TITLE, pageTitle, language);
315        // Page long title
316        _indexStringFields(document, pageId, PAGE_LONG_TITLE, pageLongTitle, language);
317        // Title for sorting
318        document.addField(TITLE_SORT, pageTitle);
319        
320        document.addField(TEMPLATE, page.getTemplate());
321        document.addField(PAGE_TYPE, page.getType().name());
322        document.addField(PAGE_DEPTH, page.getDepth());
323        
324        // Contents (page title shoud be indexed before because the main content can override it).
325        _populatePageContentsDocument(page, document);
326        
327        // Parent of the page
328        AmetysObject parent = page.getParent();
329        if (parent != null)
330        {
331            document.addField(PAGE_PARENT_ID, parent.getId());
332        }
333
334        // Ancestors of the page
335        List<String> ancestorIds = new ArrayList<>();
336        while (parent instanceof Page)
337        {
338            ancestorIds.add(parent.getId());
339            parent = parent.getParent();
340        }
341        document.addField(PAGE_ANCESTOR_IDS, ancestorIds);
342        
343        document.addField(SITE_NAME, siteName);
344        document.addField(SITEMAP_NAME, sitemapName);
345        document.addField(SITE_TYPE, site.getType());
346        
347        // Page tags (strict and tags including ancestor pages).
348        Set<String> tags = page.getTags()
349                .stream()
350                .filter(tagName -> _tagProviderEP.hasTag(tagName, Map.of("siteName", page.getSiteName())))
351                .collect(Collectors.toSet());
352        document.addField(SolrFieldNames.TAGS, tags);
353        document.addField(SolrFieldNames.ALL_TAGS, _getTagsWithAncestors(page));
354        
355        _populateDatesOfPage(page, document);
356        
357        // Attachments
358        _solrResourceIndexer.indexResourceCollection(page.getRootAttachments(), document, language);
359        Optional.ofNullable(page.getRootAttachments())
360                .map(AmetysObject::getId)
361                .ifPresent(id -> document.addField(PAGE_OUTGOING_REFEERENCES_RESOURCE_IDS, id));
362        _indexVisibleAttachments(page, document);
363    }
364    
365    private void _indexVisibleAttachments(Page page, SolrInputDocument document)
366    {
367        Collection<String> values = _pageVisibleAttachmentIndexerEP.getExtensionsIds()
368                .stream()
369                .map(_pageVisibleAttachmentIndexerEP::getExtension)
370                .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(page))
371                .flatMap(Collection::stream)
372                .collect(Collectors.toList());
373        document.addField(PAGE_VISIBLE_ATTACHMENT_RESOURCE_IDS, values);
374    }
375    
376    /**
377     * Populate the solr input document with dates from the page
378     * @param page The page
379     * @param document The Solr document
380     */
381    protected void _populateDatesOfPage(Page page, SolrInputDocument document)
382    {
383        // Page last modification date
384        ZonedDateTime lastModified = _getLastModificationDate(page);
385        if (lastModified != null)
386        {
387            String lastModifiedStr = DateUtils.zonedDateTimeToString(lastModified, ZoneOffset.UTC);
388            // For 'new' search service
389            document.addField(LastModifiedSearchField.NAME, lastModifiedStr);
390            // For 'old' search service
391            document.addField(LAST_MODIFIED + "_dt", lastModifiedStr);
392        }
393        
394        // Page last validation date
395        ZonedDateTime lastValidation = _getLastValidationDate(page);
396        if (lastValidation != null)
397        {
398            String lastValidationStr = DateUtils.zonedDateTimeToString(lastValidation, ZoneOffset.UTC);
399            // For 'new' search service
400            document.addField(LastValidationSearchField.NAME, lastValidationStr);
401        }
402        
403        // Page first validation date
404        ZonedDateTime firstValidation = _getFirstValidationDate(page);
405        if (firstValidation != null)
406        {
407            String firstValidationStr = DateUtils.zonedDateTimeToString(firstValidation, ZoneOffset.UTC);
408            // For 'new' search service
409            document.addField(FirstValidationSearchField.NAME, firstValidationStr);
410        }
411        
412        // Page last major validation date
413        ZonedDateTime lastMajorValidation = _getLastMajorValidationDate(page);
414        if (lastMajorValidation != null)
415        {
416            String lastMajorValidationStr = DateUtils.zonedDateTimeToString(lastMajorValidation, ZoneOffset.UTC);
417            // For 'new' search service
418            document.addField(LastMajorValidationSearchField.NAME, lastMajorValidationStr);
419        }
420        
421        // date for sorting
422        SolrInputField dateField = document.getField(DATE_FOR_SORTING);
423        if (dateField == null)
424        {
425            Collection<Object> oDateValues = document.getFieldValues(CONTENT_INTERESTING_DATES);
426            if (oDateValues != null && !oDateValues.isEmpty())
427            {
428                document.setField(DATE_FOR_SORTING, oDateValues.iterator().next());
429            }
430        }
431    }
432    
433    private void _indexStringFields(SolrInputDocument document, String documentId, String fieldName, String fieldValue, String language)
434    {
435        String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(fieldValue, getLogger(), documentId, fieldName);
436        
437        document.addField(fieldName, possiblyTruncatedValue);
438        document.addField(fieldName + "_txt_" + language, fieldValue);
439        document.addField(fieldName + "_txt_stemmed_" + language, fieldValue);
440        document.addField(fieldName + "_txt_ws_" + language, fieldValue);
441
442        document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase());
443        document.addField(fieldName + "_s_ws", fieldValue.toLowerCase());
444        document.addField(fieldName + "_txt", fieldValue);
445    }
446    /**
447     * Get all the page tags with their ancestors.
448     * @param page The page.
449     * @return All the page tags with their ancestors.
450     */
451    protected Set<String> _getTagsWithAncestors(Page page)
452    {
453        Set<String> allTags = new HashSet<>(page.getTags());
454        
455        Map<String, Object> tagParams = Map.of("siteName", page.getSiteName());
456        
457        for (String tagName : page.getTags())
458        {
459            allTags.add(tagName);
460            
461            // Get the ancestor tags
462            Tag tag = _tagProviderEP.getTag(tagName, tagParams);
463            for (Tag ancestor : TagHelper.getAncestors(tag, false))
464            {
465                allTags.add(ancestor.getName());
466            }
467        }
468        
469        return allTags;
470    }
471    
472    /**
473     * Index the content of the page.<p>
474     * @param page the page to index.
475     * @param document the document to populate.
476     * @throws Exception if an error occurs.
477     */
478    protected void _populatePageContentsDocument(Page page, SolrInputDocument document) throws Exception
479    {
480        if (page.getType() == PageType.CONTAINER)
481        {
482            for (Zone zone : page.getZones())
483            {
484                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
485                for (ZoneItem zoneItem : zoneItems)
486                {
487                    if (zoneItem.getType() == ZoneType.CONTENT)
488                    {
489                        try
490                        {
491                            Content content = zoneItem.getContent();
492                            document.addField(CONTENT_IDS, content.getId());
493                            
494                            for (String cType : content.getTypes())
495                            {
496                                document.addField(PAGE_CONTENT_TYPES, cType);
497                                document.addField(PAGE_CONTENT_TYPES + "_s_dv", cType); // facets
498                            }
499                            
500                            _indexFacetableFields(content, document);
501                        }
502                        catch (AmetysRepositoryException e)
503                        {
504                            getLogger().error("Failed to index content referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
505                        }
506                    }
507                    else if (zoneItem.getType() == ZoneType.SERVICE)
508                    {
509                        try
510                        {
511                            String serviceId = zoneItem.getServiceId();
512                            document.addField(SERVICE_IDS, serviceId);
513
514                            Service service = _serviceExtensionPoint.getExtension(serviceId);
515                            if (service == null)
516                            {
517                                getLogger().error("The service id '{}' does not exist. It is referenced in the page {}/{}/{} ({} in zoneitem {})", serviceId, page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId());
518                            }
519                            else
520                            {
521                                service.index(zoneItem, document);
522                            }
523                        }
524                        catch (AmetysRepositoryException e)
525                        {
526                            getLogger().error("Failed to index service referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
527                        }
528                        
529                    }
530                }
531            }
532        }
533    }
534    
535    /**
536     * Index the facetable fields of a content into the page solr document
537     * @param content The content
538     * @param document The main page solr document.
539     */
540    protected void _indexFacetableFields(Content content, SolrInputDocument document)
541    {
542        List<ModelItem> modelItems = new ArrayList<>();
543        try
544        {
545            String[] allContentTypes = ArrayUtils.addAll(content.getTypes(), content.getMixinTypes());
546            modelItems.addAll(_cTypesHelper.getModelItems(allContentTypes)
547                                           .stream()
548                                           .filter(modelItem -> !(modelItem instanceof Property) || modelItem instanceof ElementRefProperty)
549                                           .collect(Collectors.toList()));
550        }
551        catch (IllegalArgumentException e)
552        {
553            getLogger().error("indexContent > Error getting the model items of content " + content.getId(), e);
554            throw new RuntimeException("indexContent > Error getting the model items of content " + content.getId(), e);
555        }
556        
557        for (ModelItem modelItem : modelItems)
558        {
559            DataContext context = DataContext.newInstance()
560                                             .withObjectId(content.getId());
561            
562            Optional.ofNullable(content.getLanguage())
563                    .map(Locale::new)
564                    .ifPresent(context::withLocale);
565            
566            _findAndIndexFacetableField(document, content, modelItem, context);
567        }
568    }
569    
570    /**
571     * Index the facetable fields of a data holder into the page solr document
572     * @param pageDocument The Solr page document
573     * @param dataHolder the parent data holder
574     * @param modelItem the model item
575     * @param context the context of the data to index
576     */
577    protected void _findAndIndexFacetableField(SolrInputDocument pageDocument, ModelAwareDataHolder dataHolder, ModelItem modelItem, DataContext context)
578    {
579        String dataName = modelItem.getName();
580        if (dataHolder.hasValue(dataName))
581        {
582            if (modelItem instanceof ElementDefinition elementDefinition)
583            {
584                DataContext newContext = context.cloneContext()
585                                                .addSegmentToDataPath(dataName);
586               
587                Collection<String> values = _getValuesToIndex(dataHolder, elementDefinition, newContext);
588                for (String value : values)
589                {
590                    pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + dataName + "_s_dv", value);
591                }
592            }
593            else if (modelItem instanceof RepeaterDefinition repeaterDefinition)
594            {
595                ModelAwareRepeater repeater = dataHolder.getRepeater(dataName);
596                for (ModelAwareRepeaterEntry entry : repeater.getEntries())
597                {
598                    DataContext newContext = context.cloneContext()
599                                                    .addSegmentToDataPath(dataName + "[" + entry.getPosition() + "]");
600                    
601                    for (ModelItem child : repeaterDefinition.getModelItems())
602                    {
603                        _findAndIndexFacetableField(pageDocument, entry, child, newContext);
604                    }
605                }
606            }
607            else if (modelItem instanceof CompositeDefinition compositeDefinition)
608            {
609                ModelAwareComposite composite = dataHolder.getComposite(dataName);
610                DataContext newContext = context.cloneContext()
611                                                .addSegmentToDataPath(dataName);
612                
613                for (ModelItem child : compositeDefinition.getModelItems())
614                {
615                    _findAndIndexFacetableField(pageDocument, composite, child, newContext);
616                }
617            }
618        }
619    }
620    
621    /**
622     * Retrieves the values to index if the field is facetable, or an empty collection
623     * @param dataHolder the data holder
624     * @param elementDefinition the definition of the field
625     * @param context the context of the data to index
626     * @return the values to index if the field is facetable, or an empty collection
627     */
628    protected Collection<String> _getValuesToIndex(ModelAwareDataHolder dataHolder, ElementDefinition elementDefinition, DataContext context)
629    {
630        String dataName = elementDefinition.getName();
631        ElementType type = elementDefinition.getType();
632        if (type instanceof IndexableElementType indexingElementType)
633        {
634            if (ModelItemTypeConstants.STRING_TYPE_ID.equals(type.getId()) && indexingElementType.isFacetable(context))
635            {
636                String dataPath = dataName;
637                if (elementDefinition instanceof ElementRefProperty property)
638                {
639                    dataPath = property.getPath();
640                }
641                
642                Object value = dataHolder.getValue(dataPath, true);
643                if (value instanceof String[] stringValues)
644                {
645                    return Arrays.asList(stringValues);
646                }
647                else if (value instanceof String stringValue)
648                {
649                    return List.of(stringValue);
650                }
651            }
652            else if (org.ametys.cms.data.type.ModelItemTypeConstants.CONTENT_ELEMENT_TYPE_ID.equals(type.getId()))
653            {
654                String dataPath = dataName;
655                if (elementDefinition instanceof ElementRefProperty property)
656                {
657                    dataPath = property.getPath();
658                }
659                
660                Object value = dataHolder.getValue(dataPath, true);
661                if (value instanceof ContentValue[] contentValues)
662                {
663                    return Arrays.stream(contentValues)
664                            .map(ContentValue::getContentId)
665                            .collect(Collectors.toList());
666                }
667                else if (value instanceof ContentValue contentValue)
668                {
669                    return List.of(contentValue.getContentId());
670                }
671            }
672        }
673        
674        return List.of();
675    }
676    
677    /**
678     * Computes the last modification date of a page.
679     * @param page the page.
680     * @return the last modification date or <code>null</code>.
681     */
682    protected ZonedDateTime _getLastModificationDate(Page page)
683    {
684        return _getLastDate(page, Content::getLastModified);
685    }
686    /**
687     * Computes the first validation date of a page.
688     * @param page the page.
689     * @return the first validation date or <code>null</code>.
690     */
691    protected ZonedDateTime _getFirstValidationDate(Page page)
692    {
693        return _getFirstDate(page, Content::getFirstValidationDate);
694    }
695
696    /**
697     * Computes the last validation date of a page.
698     * @param page the page.
699     * @return the last validation date or <code>null</code>.
700     */
701    protected ZonedDateTime _getLastValidationDate(Page page)
702    {
703        return _getLastDate(page, Content::getLastValidationDate);
704    }
705    
706    /**
707     * Computes the last major validation date of a page.
708     * @param page the page.
709     * @return the last major validation date or <code>null</code>.
710     */
711    protected ZonedDateTime _getLastMajorValidationDate(Page page)
712    {
713        return _getLastDate(page, Content::getLastMajorValidationDate);
714    }
715    
716    /**
717     * Computes a "last date" of a page, using the simple and naive following algorithm:
718     * <br>From all the dates from each of its contents, keep the greatest of them.
719     * @param page the page.
720     * @param dateRetriever The function to retrieve a Date from a Content of the Page
721     * @return the "last date" or <code>null</code>.
722     */
723    protected ZonedDateTime _getLastDate(Page page, Function<Content, ZonedDateTime> dateRetriever)
724    {
725        ZonedDateTime last = null;
726
727        if (page.getType() == PageType.CONTAINER)
728        {
729            for (Zone zone : page.getZones())
730            {
731                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
732                for (ZoneItem zoneItem : zoneItems)
733                {
734                    switch (zoneItem.getType())
735                    {
736                        case SERVICE:
737                            // A service has no last date
738                            break;
739                        case CONTENT:
740                            try
741                            {
742                                ZonedDateTime contentLast = dateRetriever.apply(zoneItem.getContent());
743
744                                if (contentLast != null && (last == null || contentLast.isAfter(last)))
745                                {
746                                    // Keep the latest date
747                                    last = contentLast;
748                                }
749                            }
750                            catch (AmetysRepositoryException e)
751                            {
752                                getLogger().error("Failed to index last date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
753                            }
754                            break;
755                        default:
756                            break;
757                    }
758                }
759            }
760        }
761        
762        return last;
763    }
764    
765    /**
766     * Computes a "first date" of a page, using the simple and naive following algorithm:
767     * <br>From all the dates from each of its contents, keep the lowest of them.
768     * @param page the page.
769     * @param dateRetriever The function to retrieve a Date from a Content of the Page
770     * @return the "first date" or <code>null</code>.
771     */
772    protected ZonedDateTime _getFirstDate(Page page, Function<Content, ZonedDateTime> dateRetriever)
773    {
774        ZonedDateTime first = null;
775        
776        if (page.getType() == PageType.CONTAINER)
777        {
778            for (Zone zone : page.getZones())
779            {
780                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
781                for (ZoneItem zoneItem : zoneItems)
782                {
783                    switch (zoneItem.getType())
784                    {
785                        case SERVICE:
786                            // A service has no first date
787                            break;
788                        case CONTENT:
789                            try
790                            {
791                                ZonedDateTime contentFirst = dateRetriever.apply(zoneItem.getContent());
792                                
793                                if (contentFirst != null && (first == null || contentFirst.isBefore(first)))
794                                {
795                                    // Keep the lowest date
796                                    first = contentFirst;
797                                }
798                            }
799                            catch (AmetysRepositoryException e)
800                            {
801                                getLogger().error("Failed to index first date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
802                            }
803                            break;
804                        default:
805                            break;
806                    }
807                }
808            }
809        }
810        
811        return first;
812    }
813    
814    /**
815     * Populate the solr input document by adding fields to index.
816     * @param page the page to index.
817     * @param document the solr input document
818     * @throws Exception if something goes wrong when processing the indexation of the page
819     */
820    protected void _populateAdditionalProperties(Page page, SolrInputDocument document) throws Exception
821    {
822        Collection<AdditionalPropertyIndexer> indexers = _additionalPropertiesIndexerEP.getIndexers("page");
823        for (AdditionalPropertyIndexer indexer : indexers)
824        {
825            indexer.index(page, document);
826        }
827    }
828    
829    /**
830     * Index page attachments as new entries in the index.
831     * @param collection the collection of attachments
832     * @param page the page whose attachments will be indexed
833     * @throws Exception if something goes wrong when indexing the attachments of the page
834     */
835    public void indexPageAttachments(ResourceCollection collection, Page page) throws Exception
836    {
837        Request request = ContextHelper.getRequest(_context);
838        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
839        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
840        _indexPageAttachments(collection, page, solrClient);
841    }
842    
843    private void _indexPageAttachments(ResourceCollection collection, Page page, SolrClient solrClient) throws Exception
844    {
845        if (collection == null)
846        {
847            return;
848        }
849        
850        AmetysObjectIterable<AmetysObject> children = collection.getChildren();
851        for (AmetysObject object : children)
852        {
853            if (object instanceof ResourceCollection)
854            {
855                _indexPageAttachments((ResourceCollection) object, page, solrClient);
856            }
857            else if (object instanceof Resource)
858            {
859                Resource resource = (Resource) object;
860                _indexPageAttachment(resource, page, solrClient);
861            }
862        }
863    }
864    
865    /**
866     * Index a page attachment
867     * @param resource the page attachment as a {@link Resource}
868     * @param page the page whose attachment is going to be indexed
869     * @throws Exception if something goes wrong when processing the indexation of the page attachment
870     */
871    public void indexPageAttachment(Resource resource, Page page) throws Exception
872    {
873        Request request = ContextHelper.getRequest(_context);
874        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
875        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
876        _indexPageAttachment(resource, page, solrClient);
877    }
878    
879    private void _indexPageAttachment(Resource resource, Page page, SolrClient solrClient) throws Exception
880    {
881        SolrInputDocument document = new SolrInputDocument();
882        
883        // Prepare resource doc
884        _populatePageAttachmentDocument(resource, document, page);
885        
886        // Indexation of the document
887        _indexResourceDocument(resource, document, solrClient);
888    }
889    
890    private void _populatePageAttachmentDocument(Resource resource, SolrInputDocument document, Page page) throws Exception
891    {
892        String language = page.getSitemapName();
893        
894        _solrResourceIndexer.indexResource(resource, document, TYPE_PAGE_RESOURCE, language);
895        
896        Site site = page.getSite();
897        // site name - Store.YES, Index.NOT_ANALYZED
898        document.addField(SolrWebFieldNames.SITE_NAME, site.getName());
899        
900        // site type - Store.YES, Index.NOT_ANALYZED
901        document.addField(SolrWebFieldNames.SITE_TYPE, site.getType());
902        
903        // Added for Solr.
904        // Page site map name - Store.YES, Index.NOT_ANALYZED
905        document.addField(SITEMAP_NAME, page.getSitemapName());
906        
907        // Need the id of the page for unindexing attachment during the unindexing of the page
908        document.addField(ATTACHMENT_PAGE_ID, page.getId());
909    }
910    
911    /**
912     * Index a populated solr input document of type Page.
913     * @param page the page from which the input document is created
914     * @param document the input document to add to the solr index
915     * @param workspaceName The workspace name
916     * @param solrClient The solr client to use
917     * @throws SolrServerException if there is an error on the Solr server
918     * @throws IOException if there is a communication error with the server
919     */
920    protected void _indexPageDocument(Page page, SolrInputDocument document, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException
921    {
922        // Retrieve appropriate solr client
923        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
924        
925        // Add document
926        UpdateResponse solrResponse = solrClient.add(collectionName, document);
927        int status = solrResponse.getStatus();
928        
929        if (status != 0)
930        {
931            throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + page.getId());
932        }
933        
934        getLogger().debug("Successful page indexing. Page identifier : {}", page.getId());
935    }
936    
937    /**
938     * Index a populated solr input document of type Resource.
939     * @param resource the resource from which the input document is created
940     * @param document the input document
941     * @param solrClient The solr client to use
942     * @throws SolrServerException if there is an error on the server
943     * @throws IOException if there is a communication error with the server
944     */
945    protected void _indexResourceDocument(Resource resource, SolrInputDocument document, SolrClient solrClient) throws SolrServerException, IOException
946    {
947        // Retrieve appropriate solr client
948        Request request = ContextHelper.getRequest(_context);
949        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
950        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
951        
952        // Add document
953        UpdateResponse solrResponse = solrClient.add(collectionName, document);
954        int status = solrResponse.getStatus();
955        
956        if (status != 0)
957        {
958            throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Resource id : " + resource.getId());
959        }
960        
961        getLogger().debug("Successful resource indexing. Resource identifier : {}", resource.getId());
962    }
963    
964    ///////////////////////////////////////////////////////////////////////////
965    
966    /**
967     * Un-index a page by its ID  for all workspaces and commit
968     * @param pageId The page ID.
969     * @param unindexRecursively also unindex child pages if requested.
970     * @param unindexAttachments also unindex page attachments
971     * @throws Exception if an error occurs during index update.
972     */
973    public void unindexPage(String pageId, boolean unindexRecursively, boolean unindexAttachments) throws Exception
974    {
975        unindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, unindexRecursively, unindexAttachments);
976        unindexPage(pageId, WebConstants.LIVE_WORKSPACE, unindexRecursively, unindexAttachments);
977    }
978    
979    /**
980     * De-index a page (and optionally its children pages).
981     * @param pageId the page to be de-indexed.
982     * @param workspaceName The workspace where to work in 
983     * @param unindexRecursively also unindex child pages if requested.
984     * @param unindexAttachments also unindex page attachments
985     * @throws Exception if an error occurs during index update.
986     */
987    public void unindexPage(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws Exception
988    {
989        Request request = ContextHelper.getRequest(_context);
990        
991        // Retrieve the current workspace.
992        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
993        // Retrieve the current site name.
994        String currentSiteName = (String) request.getAttribute("siteName");
995        
996        try
997        {
998            // Force the workspace.
999            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
1000    
1001            getLogger().debug("Unindexing page: {}", pageId);
1002            
1003            _unindexPageDocument(pageId, workspaceName, unindexRecursively, unindexAttachments);
1004        }
1005        catch (Exception e)
1006        {
1007            String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName);
1008            getLogger().error(error, e);
1009            throw new IndexingException(error, e);
1010        }
1011        finally
1012        {
1013            // Restore the site name.
1014            request.setAttribute("siteName", currentSiteName);
1015            // Restore context
1016            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
1017        }
1018    }
1019    
1020    /**
1021     * Deindex a document of type Page. Also deindex attachments of a page
1022     * @param pageId the id of the page to deindex
1023     * @param workspaceName The workspace name
1024     * @param unindexRecursively also unindex child pages if requested.
1025     * @param unindexAttachments also unindex page attachments
1026     * @throws SolrServerException if there is an error on the server
1027     * @throws IOException if there is a communication error with the server
1028     * @throws QuerySyntaxException if the uri query can't be built because of a syntax error.
1029     */
1030    protected void _unindexPageDocument(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws SolrServerException, IOException, QuerySyntaxException
1031    {
1032        // Retrieve appropriate solr client
1033        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
1034        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
1035        
1036        getLogger().info("Unindexing page {} in workspace '{}'", pageId, workspaceName);
1037        
1038        Query pages = new AndQuery(new DocumentTypeQuery(TYPE_PAGE), new PageQuery(pageId, unindexRecursively));
1039        Query query;
1040        if (unindexRecursively && unindexAttachments)
1041        {
1042            // {!ametys join=pageId q=page-ancestorIds:"page://xxxx"}
1043            Query joinQuery = new JoinQuery(() -> PAGE_ANCESTOR_IDS + ":\"" + pageId + "\"", ATTACHMENT_PAGE_ID);
1044            Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new OrQuery(new PageAttachmentQuery(pageId), joinQuery));
1045            query = new OrQuery(attachments, pages);
1046        }
1047        else if (unindexAttachments)
1048        {
1049            Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new PageAttachmentQuery(pageId));
1050            query = new OrQuery(attachments, pages);
1051        }
1052        else
1053        {
1054            query = pages;
1055        }
1056        
1057        // Delete by query
1058        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query.build());
1059        int status = solrResponse.getStatus();
1060        
1061        if (status != 0)
1062        {
1063            throw new IOException("Ametys Page de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + pageId);
1064        }
1065        
1066        getLogger().debug("Successful page de-indexing{}. Page identifier : {}", unindexRecursively ? " with its children" : "", pageId);
1067    }
1068    
1069    ///////////////////////////////////////////////////////////////////////////
1070    
1071    /**
1072     * Reindex a page by its ID for all workspaces and commit
1073     * @param pageId The page ID.
1074     * @param reindexRecursively also reindex child pages if requested.
1075     * @param reindexAttachments also reindex page attachments
1076     * @throws Exception if an error occurs during index update.
1077     */
1078    public void reindexPage(String pageId, boolean reindexRecursively, boolean reindexAttachments) throws Exception
1079    {
1080        reindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, reindexRecursively, reindexAttachments);
1081        reindexPage(pageId, WebConstants.LIVE_WORKSPACE, reindexRecursively, reindexAttachments);
1082    }
1083  
1084    
1085    /**
1086     * Reindex a page by its ID.
1087     * @param pageId The page ID.
1088     * @param workspaceName The workspace where to work in 
1089     * @param reindexRecursively also reindex child pages if requested.
1090     * @param reindexAttachments also reindex page attachments
1091     * @throws IndexingException if an error occurs during index update.
1092     */
1093    public void reindexPage(String pageId, String workspaceName, boolean reindexRecursively, boolean reindexAttachments) throws IndexingException
1094    {
1095        Request request = ContextHelper.getRequest(_context);
1096        
1097        // Retrieve the current workspace.
1098        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
1099        // Retrieve the current site name.
1100        String currentSiteName = (String) request.getAttribute("siteName");
1101        
1102        try
1103        {
1104            // Force the workspace.
1105            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
1106    
1107            getLogger().debug("Reindexing page: {}", pageId);
1108            
1109            if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist
1110            {
1111                Page page = _ametysObjectResolver.resolveById(pageId);
1112                _unindexPageDocument(pageId, workspaceName, reindexRecursively, reindexAttachments);
1113                SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
1114                _indexPage(page, workspaceName, reindexRecursively, reindexAttachments, solrClient);
1115            }
1116        }
1117        catch (AmetysRepositoryException | QuerySyntaxException | SolrServerException | IOException e)
1118        {
1119            String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName);
1120            getLogger().error(error, e);
1121            throw new IndexingException(error, e);
1122        }
1123        finally
1124        {
1125            // Restore the site name.
1126            request.setAttribute("siteName", currentSiteName);
1127            // Restore context
1128            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
1129        }
1130    }
1131}