Source code

001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.indexing.solr;
017
018import java.io.IOException;
019import java.time.ZoneOffset;
020import java.time.ZonedDateTime;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.HashSet;
025import java.util.List;
026import java.util.Locale;
027import java.util.Map;
028import java.util.Optional;
029import java.util.Set;
030import java.util.function.Function;
031import java.util.stream.Collectors;
032
033import org.apache.avalon.framework.component.Component;
034import org.apache.avalon.framework.context.Context;
035import org.apache.avalon.framework.context.ContextException;
036import org.apache.avalon.framework.context.Contextualizable;
037import org.apache.avalon.framework.service.ServiceException;
038import org.apache.avalon.framework.service.ServiceManager;
039import org.apache.avalon.framework.service.Serviceable;
040import org.apache.cocoon.components.ContextHelper;
041import org.apache.cocoon.environment.Request;
042import org.apache.commons.lang3.ArrayUtils;
043import org.apache.solr.client.solrj.SolrClient;
044import org.apache.solr.client.solrj.SolrServerException;
045import org.apache.solr.client.solrj.response.UpdateResponse;
046import org.apache.solr.common.SolrInputDocument;
047import org.apache.solr.common.SolrInputField;
048
049import org.ametys.cms.content.indexing.solr.SolrFieldNames;
050import org.ametys.cms.content.indexing.solr.SolrIndexer;
051import org.ametys.cms.content.indexing.solr.SolrResourceIndexer;
052import org.ametys.cms.contenttype.ContentTypesHelper;
053import org.ametys.cms.data.ContentValue;
054import org.ametys.cms.data.type.indexing.IndexableDataContext;
055import org.ametys.cms.data.type.indexing.IndexableElementType;
056import org.ametys.cms.data.type.indexing.IndexableElementTypeHelper;
057import org.ametys.cms.indexing.IndexingException;
058import org.ametys.cms.indexing.solr.AdditionalPropertyIndexer;
059import org.ametys.cms.indexing.solr.AdditionalPropertyIndexerExtensionPoint;
060import org.ametys.cms.model.properties.ElementRefProperty;
061import org.ametys.cms.model.properties.Property;
062import org.ametys.cms.repository.Content;
063import org.ametys.cms.search.query.AndQuery;
064import org.ametys.cms.search.query.DocumentTypeQuery;
065import org.ametys.cms.search.query.JoinQuery;
066import org.ametys.cms.search.query.OrQuery;
067import org.ametys.cms.search.query.Query;
068import org.ametys.cms.search.query.QuerySyntaxException;
069import org.ametys.cms.search.solr.SolrClientProvider;
070import org.ametys.cms.search.solr.field.FirstValidationSearchField;
071import org.ametys.cms.search.solr.field.LastMajorValidationSearchField;
072import org.ametys.cms.search.solr.field.LastModifiedSearchField;
073import org.ametys.cms.search.solr.field.LastValidationSearchField;
074import org.ametys.cms.tag.Tag;
075import org.ametys.cms.tag.TagHelper;
076import org.ametys.cms.tag.TagProviderExtensionPoint;
077import org.ametys.core.util.DateUtils;
078import org.ametys.plugins.explorer.resources.Resource;
079import org.ametys.plugins.explorer.resources.ResourceCollection;
080import org.ametys.plugins.repository.AmetysObject;
081import org.ametys.plugins.repository.AmetysObjectIterable;
082import org.ametys.plugins.repository.AmetysObjectResolver;
083import org.ametys.plugins.repository.AmetysRepositoryException;
084import org.ametys.plugins.repository.RepositoryConstants;
085import org.ametys.plugins.repository.data.holder.ModelAwareDataHolder;
086import org.ametys.plugins.repository.data.holder.group.ModelAwareComposite;
087import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeater;
088import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeaterEntry;
089import org.ametys.plugins.repository.model.CompositeDefinition;
090import org.ametys.plugins.repository.model.RepeaterDefinition;
091import org.ametys.plugins.repository.model.RepositoryDataContext;
092import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector;
093import org.ametys.runtime.model.ElementDefinition;
094import org.ametys.runtime.model.ModelItem;
095import org.ametys.runtime.model.type.DataContext;
096import org.ametys.runtime.model.type.ElementType;
097import org.ametys.runtime.model.type.ModelItemTypeConstants;
098import org.ametys.runtime.plugin.component.AbstractLogEnabled;
099import org.ametys.web.WebConstants;
100import org.ametys.web.indexing.solr.page.attachment.PageVisibleAttachmentIndexerExtensionPoint;
101import org.ametys.web.repository.page.Page;
102import org.ametys.web.repository.page.Page.PageType;
103import org.ametys.web.repository.page.Zone;
104import org.ametys.web.repository.page.ZoneItem;
105import org.ametys.web.repository.page.ZoneItem.ZoneType;
106import org.ametys.web.repository.site.Site;
107import org.ametys.web.repository.sitemap.Sitemap;
108import org.ametys.web.search.query.PageAttachmentQuery;
109import org.ametys.web.search.query.PageQuery;
110import org.ametys.web.service.Service;
111import org.ametys.web.service.ServiceExtensionPoint;
112
113/**
114 * Component responsible for indexing a page with all its contents.
115 */
116public class SolrPageIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrWebFieldNames, Contextualizable
117{
118    /** The avalon role. */
119    public static final String ROLE = SolrPageIndexer.class.getName();
120    
121    /** The Solr client provider */
122    protected SolrClientProvider _solrClientProvider;
123    /** The Solr indexer */
124    protected SolrIndexer _solrIndexer;
125    /** Solr Ametys resources indexer */
126    protected SolrResourceIndexer _solrResourceIndexer;
127    /** The extension point for PageVisibleAttachmentIndexers */
128    protected PageVisibleAttachmentIndexerExtensionPoint _pageVisibleAttachmentIndexerEP;
129    /** The additional property indexer extension point. */
130    protected AdditionalPropertyIndexerExtensionPoint _additionalPropertiesIndexerEP;
131    /** The tag provider extension point. */
132    protected TagProviderExtensionPoint _tagProviderEP;
133    
134    /** The service extension point. */
135    protected ServiceExtensionPoint _serviceExtensionPoint;
136    /** The Ametys object resolver*/
137    protected AmetysObjectResolver _ametysObjectResolver;
138    /** The avalon context */
139    protected Context _context;
140
141    private ContentTypesHelper _cTypesHelper;
142    
143    @Override
144    public void service(ServiceManager manager) throws ServiceException
145    {
146        _ametysObjectResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE);
147        _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE);
148        _solrResourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE);
149        _pageVisibleAttachmentIndexerEP = (PageVisibleAttachmentIndexerExtensionPoint) manager.lookup(PageVisibleAttachmentIndexerExtensionPoint.ROLE);
150        _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE);
151        _serviceExtensionPoint = (ServiceExtensionPoint) manager.lookup(ServiceExtensionPoint.ROLE);
152        _additionalPropertiesIndexerEP = (AdditionalPropertyIndexerExtensionPoint) manager.lookup(AdditionalPropertyIndexerExtensionPoint.ROLE);
153        _tagProviderEP = (TagProviderExtensionPoint) manager.lookup(TagProviderExtensionPoint.ROLE);
154        _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE);
155    }
156    
157    public void contextualize(Context context) throws ContextException
158    {
159        _context = context;
160    }
161    
162    /**
163     * Index a page and eventually its children, recursively, in all workspaces and commit<br>
164     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
165     * @param pageId the page to be indexed.
166     * @param indexRecursively to also process children pages.
167     * @param indexAttachments to index page attachments
168     * @throws Exception if an error occurs during indexation.
169     */
170    public void indexPage(String pageId, boolean indexRecursively, boolean indexAttachments) throws Exception
171    {
172        indexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, indexRecursively, indexAttachments);
173        indexPage(pageId, WebConstants.LIVE_WORKSPACE, indexRecursively, indexAttachments);
174    }
175    
176    /**
177     * Index a page and eventually its children, recursively.<br>
178     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
179     * @param pageId the page to be indexed.
180     * @param workspaceName the workspace where to index
181     * @param indexRecursively to also process children pages.
182     * @param indexAttachments to index page attachments
183     * @throws IndexingException if an error occurs during indexation.
184     */
185    public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments) throws IndexingException
186    {
187        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true);
188        indexPage(pageId, workspaceName, indexRecursively, indexAttachments, solrClient);
189    }
190    
191    /**
192     * Index a page and eventually its children, recursively.<br>
193     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
194     * @param pageId the page to be indexed.
195     * @param workspaceName the workspace where to index
196     * @param indexRecursively to also process children pages.
197     * @param indexAttachments to index page attachments
198     * @param solrClient The solr client to use
199     * @throws IndexingException if an error occurs during indexation.
200     */
201    public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException
202    {
203        Request request = ContextHelper.getRequest(_context);
204        
205        // Retrieve the current workspace.
206        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
207        // Retrieve the current site name.
208        String currentSiteName = (String) request.getAttribute("siteName");
209        
210        try
211        {
212            // Force the workspace.
213            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
214    
215            getLogger().debug("Indexing page: {}", pageId);
216            
217            if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist
218            {
219                Page page = _ametysObjectResolver.resolveById(pageId);
220                _indexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient);
221            }
222        }
223        catch (AmetysRepositoryException e)
224        {
225            String error = String.format("Failed to index page %s in workspace %s", pageId, workspaceName);
226            getLogger().error(error, e);
227            throw new IndexingException(error, e);
228        }
229        finally
230        {
231            // Restore the site name.
232            request.setAttribute("siteName", currentSiteName);
233            // Restore context
234            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
235        }
236    }
237    
238    private void _indexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException
239    {
240        getLogger().info("Indexing page: {} in workspace '{}'", page, workspaceName);
241        
242        SolrInputDocument document = new SolrInputDocument();
243        
244        try
245        {
246            // Prepare the solr input document by adding fields.
247            _populatePageDocument(page, document);
248            
249            // Set the additional properties in the document.
250            _populateAdditionalProperties(page, document);
251            
252            // Indexation of ACL initial values
253            _solrIndexer.indexAclInitValues(page, document);
254            
255            // Indexation of the document
256            _indexPageDocument(page, document, workspaceName, solrClient);
257            
258            // Index page attachments documents
259            if (indexAttachments)
260            {
261                _indexPageAttachments(page.getRootAttachments(), page, solrClient);
262            }
263        }
264        catch (Exception e)
265        {
266            String error = String.format("Failed to index page %s in workspace %s", page.getId(), workspaceName);
267            getLogger().error(error, e);
268            throw new IndexingException(error, e);
269        }
270        
271        if (indexRecursively)
272        {
273            AmetysObjectIterable<? extends Page> children = page.getChildrenPages();
274            for (Page child : children)
275            {
276                // FIXME index child pages if (and only if) not indexed... see original source.
277//                indexPage(child, false, indexRecursively);
278//                indexPage(child, false);
279                _indexPage(child, workspaceName, indexRecursively, indexAttachments, solrClient);
280            }
281        }
282    }
283    
284    /**
285     * Populate the solr input document by adding fields to index.
286     * @param page the page to index.
287     * @param document the solr input document
288     * @throws Exception if something goes wrong when processing the indexation of the page
289     */
290    protected void _populatePageDocument(Page page, SolrInputDocument document) throws Exception
291    {
292        Sitemap sitemap = page.getSitemap();
293        String sitemapName = sitemap.getName();
294        Site site = page.getSite();
295        String siteName = site.getName();
296        String pageId = page.getId();
297        String pageTitle = page.getTitle();
298        String pageLongTitle = page.getLongTitle();
299        String language = sitemapName;
300        
301        // Page id and type
302        document.addField(SolrFieldNames.ID, pageId);
303        document.addField(SolrFieldNames.DOCUMENT_TYPE, SolrWebFieldNames.TYPE_PAGE);
304        
305        // Fulltext
306        IndexableDataContext context = IndexableDataContext.newInstance()
307                                         .withLocale(new Locale(language));
308        IndexableElementTypeHelper.indexFulltextValue(document, pageTitle, context);
309        if (!pageTitle.equals(pageLongTitle))
310        {
311            IndexableElementTypeHelper.indexFulltextValue(document, pageLongTitle, context);
312        }
313        
314        // Page title
315        _indexStringFields(document, pageId, PAGE_TITLE, pageTitle, language);
316        // Page long title
317        _indexStringFields(document, pageId, PAGE_LONG_TITLE, pageLongTitle, language);
318        // Title for sorting
319        document.addField(TITLE_SORT, pageTitle);
320        
321        document.addField(TEMPLATE, page.getTemplate());
322        document.addField(PAGE_TYPE, page.getType().name());
323        document.addField(PAGE_DEPTH, page.getDepth());
324        
325        // Contents (page title shoud be indexed before because the main content can override it).
326        _populatePageContentsDocument(page, document);
327        
328        // Parent of the page
329        AmetysObject parent = page.getParent();
330        if (parent != null)
331        {
332            document.addField(PAGE_PARENT_ID, parent.getId());
333        }
334
335        // Ancestors of the page
336        List<String> ancestorIds = new ArrayList<>();
337        while (parent instanceof Page)
338        {
339            ancestorIds.add(parent.getId());
340            parent = parent.getParent();
341        }
342        document.addField(PAGE_ANCESTOR_IDS, ancestorIds);
343        
344        document.addField(SITE_NAME, siteName);
345        document.addField(SITEMAP_NAME, sitemapName);
346        document.addField(SITE_TYPE, site.getType());
347        
348        // Page tags (strict and tags including ancestor pages).
349        Set<String> tags = page.getTags()
350                .stream()
351                .filter(tagName -> _tagProviderEP.hasTag(tagName, Map.of("siteName", page.getSiteName())))
352                .collect(Collectors.toSet());
353        document.addField(SolrFieldNames.TAGS, tags);
354        document.addField(SolrFieldNames.ALL_TAGS, _getTagsWithAncestors(page));
355        
356        _populateDatesOfPage(page, document);
357        
358        // Attachments
359        _solrResourceIndexer.indexResourceCollection(page.getRootAttachments(), document, language);
360        Optional.ofNullable(page.getRootAttachments())
361                .map(AmetysObject::getId)
362                .ifPresent(id -> document.addField(PAGE_OUTGOING_REFEERENCES_RESOURCE_IDS, id));
363        _indexVisibleAttachments(page, document);
364    }
365    
366    private void _indexVisibleAttachments(Page page, SolrInputDocument document)
367    {
368        Collection<String> values = _pageVisibleAttachmentIndexerEP.getExtensionsIds()
369                .stream()
370                .map(_pageVisibleAttachmentIndexerEP::getExtension)
371                .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(page))
372                .flatMap(Collection::stream)
373                .collect(Collectors.toList());
374        document.addField(PAGE_VISIBLE_ATTACHMENT_RESOURCE_IDS, values);
375    }
376    
377    /**
378     * Populate the solr input document with dates from the page
379     * @param page The page
380     * @param document The Solr document
381     */
382    protected void _populateDatesOfPage(Page page, SolrInputDocument document)
383    {
384        // Page last modification date
385        ZonedDateTime lastModified = _getLastModificationDate(page);
386        if (lastModified != null)
387        {
388            String lastModifiedStr = DateUtils.zonedDateTimeToString(lastModified, ZoneOffset.UTC);
389            // For 'new' search service
390            document.addField(LastModifiedSearchField.NAME, lastModifiedStr);
391            // For 'old' search service
392            document.addField(LAST_MODIFIED + "_dt", lastModifiedStr);
393        }
394        
395        // Page last validation date
396        ZonedDateTime lastValidation = _getLastValidationDate(page);
397        if (lastValidation != null)
398        {
399            String lastValidationStr = DateUtils.zonedDateTimeToString(lastValidation, ZoneOffset.UTC);
400            // For 'new' search service
401            document.addField(LastValidationSearchField.NAME, lastValidationStr);
402        }
403        
404        // Page first validation date
405        ZonedDateTime firstValidation = _getFirstValidationDate(page);
406        if (firstValidation != null)
407        {
408            String firstValidationStr = DateUtils.zonedDateTimeToString(firstValidation, ZoneOffset.UTC);
409            // For 'new' search service
410            document.addField(FirstValidationSearchField.NAME, firstValidationStr);
411        }
412        
413        // Page last major validation date
414        ZonedDateTime lastMajorValidation = _getLastMajorValidationDate(page);
415        if (lastMajorValidation != null)
416        {
417            String lastMajorValidationStr = DateUtils.zonedDateTimeToString(lastMajorValidation, ZoneOffset.UTC);
418            // For 'new' search service
419            document.addField(LastMajorValidationSearchField.NAME, lastMajorValidationStr);
420        }
421        
422        // date for sorting
423        SolrInputField dateField = document.getField(DATE_FOR_SORTING);
424        if (dateField == null)
425        {
426            Collection<Object> oDateValues = document.getFieldValues(CONTENT_INTERESTING_DATES);
427            if (oDateValues != null && !oDateValues.isEmpty())
428            {
429                document.setField(DATE_FOR_SORTING, oDateValues.iterator().next());
430            }
431        }
432    }
433    
434    private void _indexStringFields(SolrInputDocument document, String documentId, String fieldName, String fieldValue, String language)
435    {
436        String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(fieldValue, getLogger(), documentId, fieldName);
437        
438        document.addField(fieldName, possiblyTruncatedValue);
439        document.addField(fieldName + "_txt_" + language, fieldValue);
440        document.addField(fieldName + "_txt_stemmed_" + language, fieldValue);
441        document.addField(fieldName + "_txt_ws_" + language, fieldValue);
442
443        document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase());
444        document.addField(fieldName + "_s_ws", fieldValue.toLowerCase());
445        document.addField(fieldName + "_txt", fieldValue);
446    }
447    /**
448     * Get all the page tags with their ancestors.
449     * @param page The page.
450     * @return All the page tags with their ancestors.
451     */
452    protected Set<String> _getTagsWithAncestors(Page page)
453    {
454        Set<String> allTags = new HashSet<>(page.getTags());
455        
456        Map<String, Object> tagParams = Map.of("siteName", page.getSiteName());
457        
458        for (String tagName : page.getTags())
459        {
460            allTags.add(tagName);
461            
462            // Get the ancestor tags
463            Tag tag = _tagProviderEP.getTag(tagName, tagParams);
464            for (Tag ancestor : TagHelper.getAncestors(tag, false))
465            {
466                allTags.add(ancestor.getName());
467            }
468        }
469        
470        return allTags;
471    }
472    
473    /**
474     * Index the content of the page.<p>
475     * @param page the page to index.
476     * @param document the document to populate.
477     * @throws Exception if an error occurs.
478     */
479    protected void _populatePageContentsDocument(Page page, SolrInputDocument document) throws Exception
480    {
481        if (page.getType() == PageType.CONTAINER)
482        {
483            for (Zone zone : page.getZones())
484            {
485                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
486                for (ZoneItem zoneItem : zoneItems)
487                {
488                    if (zoneItem.getType() == ZoneType.CONTENT)
489                    {
490                        try
491                        {
492                            Content content = zoneItem.getContent();
493                            document.addField(CONTENT_IDS, content.getId());
494                            
495                            for (String cType : content.getTypes())
496                            {
497                                document.addField(PAGE_CONTENT_TYPES, cType);
498                                document.addField(PAGE_CONTENT_TYPES + "_s_dv", cType); // facets
499                            }
500                            
501                            _indexFacetableFields(content, document);
502                        }
503                        catch (AmetysRepositoryException e)
504                        {
505                            getLogger().error("Failed to index content referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
506                        }
507                    }
508                    else if (zoneItem.getType() == ZoneType.SERVICE)
509                    {
510                        try
511                        {
512                            String serviceId = zoneItem.getServiceId();
513                            document.addField(SERVICE_IDS, serviceId);
514
515                            Service service = _serviceExtensionPoint.getExtension(serviceId);
516                            if (service == null)
517                            {
518                                getLogger().error("The service id '{}' does not exist. It is referenced in the page {}/{}/{} ({} in zoneitem {})", serviceId, page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId());
519                            }
520                            else
521                            {
522                                service.index(zoneItem, document);
523                            }
524                        }
525                        catch (AmetysRepositoryException e)
526                        {
527                            getLogger().error("Failed to index service referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
528                        }
529                        
530                    }
531                }
532            }
533        }
534    }
535    
536    /**
537     * Index the facetable fields of a content into the page solr document
538     * @param content The content
539     * @param document The main page solr document.
540     */
541    protected void _indexFacetableFields(Content content, SolrInputDocument document)
542    {
543        List<ModelItem> modelItems = new ArrayList<>();
544        try
545        {
546            String[] allContentTypes = ArrayUtils.addAll(content.getTypes(), content.getMixinTypes());
547            modelItems.addAll(_cTypesHelper.getModelItems(allContentTypes)
548                                           .stream()
549                                           .filter(modelItem -> !(modelItem instanceof Property) || modelItem instanceof ElementRefProperty)
550                                           .collect(Collectors.toList()));
551        }
552        catch (IllegalArgumentException e)
553        {
554            getLogger().error("indexContent > Error getting the model items of content " + content.getId(), e);
555            throw new RuntimeException("indexContent > Error getting the model items of content " + content.getId(), e);
556        }
557        
558        for (ModelItem modelItem : modelItems)
559        {
560            DataContext context = RepositoryDataContext.newInstance()
561                                                       .withObject(content);
562            
563            Optional.ofNullable(content.getLanguage())
564                    .map(Locale::new)
565                    .ifPresent(context::withLocale);
566            
567            _findAndIndexFacetableField(document, content, modelItem, context);
568        }
569    }
570    
571    /**
572     * Index the facetable fields of a data holder into the page solr document
573     * @param pageDocument The Solr page document
574     * @param dataHolder the parent data holder
575     * @param modelItem the model item
576     * @param context the context of the data to index
577     */
578    protected void _findAndIndexFacetableField(SolrInputDocument pageDocument, ModelAwareDataHolder dataHolder, ModelItem modelItem, DataContext context)
579    {
580        String dataName = modelItem.getName();
581        if (dataHolder.hasValue(dataName))
582        {
583            if (modelItem instanceof ElementDefinition elementDefinition)
584            {
585                DataContext newContext = context.cloneContext()
586                                                .addSegmentToDataPath(dataName);
587               
588                Collection<String> values = _getValuesToIndex(dataHolder, elementDefinition, newContext);
589                for (String value : values)
590                {
591                    pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + dataName + "_s_dv", value);
592                }
593            }
594            else if (modelItem instanceof RepeaterDefinition repeaterDefinition)
595            {
596                ModelAwareRepeater repeater = dataHolder.getRepeater(dataName);
597                for (ModelAwareRepeaterEntry entry : repeater.getEntries())
598                {
599                    DataContext newContext = context.cloneContext()
600                                                    .addSegmentToDataPath(dataName + "[" + entry.getPosition() + "]");
601                    
602                    for (ModelItem child : repeaterDefinition.getModelItems())
603                    {
604                        _findAndIndexFacetableField(pageDocument, entry, child, newContext);
605                    }
606                }
607            }
608            else if (modelItem instanceof CompositeDefinition compositeDefinition)
609            {
610                ModelAwareComposite composite = dataHolder.getComposite(dataName);
611                DataContext newContext = context.cloneContext()
612                                                .addSegmentToDataPath(dataName);
613                
614                for (ModelItem child : compositeDefinition.getModelItems())
615                {
616                    _findAndIndexFacetableField(pageDocument, composite, child, newContext);
617                }
618            }
619        }
620    }
621    
622    /**
623     * Retrieves the values to index if the field is facetable, or an empty collection
624     * @param dataHolder the data holder
625     * @param elementDefinition the definition of the field
626     * @param context the context of the data to index
627     * @return the values to index if the field is facetable, or an empty collection
628     */
629    protected Collection<String> _getValuesToIndex(ModelAwareDataHolder dataHolder, ElementDefinition elementDefinition, DataContext context)
630    {
631        String dataName = elementDefinition.getName();
632        ElementType type = elementDefinition.getType();
633        if (type instanceof IndexableElementType indexingElementType)
634        {
635            if (ModelItemTypeConstants.STRING_TYPE_ID.equals(type.getId()) && indexingElementType.isFacetable(context))
636            {
637                String dataPath = dataName;
638                if (elementDefinition instanceof ElementRefProperty property)
639                {
640                    dataPath = property.getPath();
641                }
642                
643                Object value = dataHolder.getValue(dataPath, true);
644                if (value instanceof String[] stringValues)
645                {
646                    return Arrays.asList(stringValues);
647                }
648                else if (value instanceof String stringValue)
649                {
650                    return List.of(stringValue);
651                }
652            }
653            else if (org.ametys.cms.data.type.ModelItemTypeConstants.CONTENT_ELEMENT_TYPE_ID.equals(type.getId()))
654            {
655                String dataPath = dataName;
656                if (elementDefinition instanceof ElementRefProperty property)
657                {
658                    dataPath = property.getPath();
659                }
660                
661                Object value = dataHolder.getValue(dataPath, true);
662                if (value instanceof ContentValue[] contentValues)
663                {
664                    return Arrays.stream(contentValues)
665                            .map(ContentValue::getContentId)
666                            .collect(Collectors.toList());
667                }
668                else if (value instanceof ContentValue contentValue)
669                {
670                    return List.of(contentValue.getContentId());
671                }
672            }
673        }
674        
675        return List.of();
676    }
677    
678    /**
679     * Computes the last modification date of a page.
680     * @param page the page.
681     * @return the last modification date or <code>null</code>.
682     */
683    protected ZonedDateTime _getLastModificationDate(Page page)
684    {
685        return _getLastDate(page, Content::getLastModified);
686    }
687    /**
688     * Computes the first validation date of a page.
689     * @param page the page.
690     * @return the first validation date or <code>null</code>.
691     */
692    protected ZonedDateTime _getFirstValidationDate(Page page)
693    {
694        return _getFirstDate(page, Content::getFirstValidationDate);
695    }
696
697    /**
698     * Computes the last validation date of a page.
699     * @param page the page.
700     * @return the last validation date or <code>null</code>.
701     */
702    protected ZonedDateTime _getLastValidationDate(Page page)
703    {
704        return _getLastDate(page, Content::getLastValidationDate);
705    }
706    
707    /**
708     * Computes the last major validation date of a page.
709     * @param page the page.
710     * @return the last major validation date or <code>null</code>.
711     */
712    protected ZonedDateTime _getLastMajorValidationDate(Page page)
713    {
714        return _getLastDate(page, Content::getLastMajorValidationDate);
715    }
716    
717    /**
718     * Computes a "last date" of a page, using the simple and naive following algorithm:
719     * <br>From all the dates from each of its contents, keep the greatest of them.
720     * @param page the page.
721     * @param dateRetriever The function to retrieve a Date from a Content of the Page
722     * @return the "last date" or <code>null</code>.
723     */
724    protected ZonedDateTime _getLastDate(Page page, Function<Content, ZonedDateTime> dateRetriever)
725    {
726        ZonedDateTime last = null;
727
728        if (page.getType() == PageType.CONTAINER)
729        {
730            for (Zone zone : page.getZones())
731            {
732                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
733                for (ZoneItem zoneItem : zoneItems)
734                {
735                    switch (zoneItem.getType())
736                    {
737                        case SERVICE:
738                            // A service has no last date
739                            break;
740                        case CONTENT:
741                            try
742                            {
743                                ZonedDateTime contentLast = dateRetriever.apply(zoneItem.getContent());
744
745                                if (contentLast != null && (last == null || contentLast.isAfter(last)))
746                                {
747                                    // Keep the latest date
748                                    last = contentLast;
749                                }
750                            }
751                            catch (AmetysRepositoryException e)
752                            {
753                                getLogger().error("Failed to index last date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
754                            }
755                            break;
756                        default:
757                            break;
758                    }
759                }
760            }
761        }
762        
763        return last;
764    }
765    
766    /**
767     * Computes a "first date" of a page, using the simple and naive following algorithm:
768     * <br>From all the dates from each of its contents, keep the lowest of them.
769     * @param page the page.
770     * @param dateRetriever The function to retrieve a Date from a Content of the Page
771     * @return the "first date" or <code>null</code>.
772     */
773    protected ZonedDateTime _getFirstDate(Page page, Function<Content, ZonedDateTime> dateRetriever)
774    {
775        ZonedDateTime first = null;
776        
777        if (page.getType() == PageType.CONTAINER)
778        {
779            for (Zone zone : page.getZones())
780            {
781                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
782                for (ZoneItem zoneItem : zoneItems)
783                {
784                    switch (zoneItem.getType())
785                    {
786                        case SERVICE:
787                            // A service has no first date
788                            break;
789                        case CONTENT:
790                            try
791                            {
792                                ZonedDateTime contentFirst = dateRetriever.apply(zoneItem.getContent());
793                                
794                                if (contentFirst != null && (first == null || contentFirst.isBefore(first)))
795                                {
796                                    // Keep the lowest date
797                                    first = contentFirst;
798                                }
799                            }
800                            catch (AmetysRepositoryException e)
801                            {
802                                getLogger().error("Failed to index first date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
803                            }
804                            break;
805                        default:
806                            break;
807                    }
808                }
809            }
810        }
811        
812        return first;
813    }
814    
815    /**
816     * Populate the solr input document by adding fields to index.
817     * @param page the page to index.
818     * @param document the solr input document
819     * @throws Exception if something goes wrong when processing the indexation of the page
820     */
821    protected void _populateAdditionalProperties(Page page, SolrInputDocument document) throws Exception
822    {
823        Collection<AdditionalPropertyIndexer> indexers = _additionalPropertiesIndexerEP.getIndexers("page");
824        for (AdditionalPropertyIndexer indexer : indexers)
825        {
826            indexer.index(page, document);
827        }
828    }
829    
830    /**
831     * Index page attachments as new entries in the index.
832     * @param collection the collection of attachments
833     * @param page the page whose attachments will be indexed
834     * @throws Exception if something goes wrong when indexing the attachments of the page
835     */
836    public void indexPageAttachments(ResourceCollection collection, Page page) throws Exception
837    {
838        Request request = ContextHelper.getRequest(_context);
839        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
840        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
841        _indexPageAttachments(collection, page, solrClient);
842    }
843    
844    private void _indexPageAttachments(ResourceCollection collection, Page page, SolrClient solrClient) throws Exception
845    {
846        if (collection == null)
847        {
848            return;
849        }
850        
851        AmetysObjectIterable<AmetysObject> children = collection.getChildren();
852        for (AmetysObject object : children)
853        {
854            if (object instanceof ResourceCollection)
855            {
856                _indexPageAttachments((ResourceCollection) object, page, solrClient);
857            }
858            else if (object instanceof Resource)
859            {
860                Resource resource = (Resource) object;
861                _indexPageAttachment(resource, page, solrClient);
862            }
863        }
864    }
865    
866    /**
867     * Index a page attachment
868     * @param resource the page attachment as a {@link Resource}
869     * @param page the page whose attachment is going to be indexed
870     * @throws Exception if something goes wrong when processing the indexation of the page attachment
871     */
872    public void indexPageAttachment(Resource resource, Page page) throws Exception
873    {
874        Request request = ContextHelper.getRequest(_context);
875        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
876        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
877        _indexPageAttachment(resource, page, solrClient);
878    }
879    
880    private void _indexPageAttachment(Resource resource, Page page, SolrClient solrClient) throws Exception
881    {
882        SolrInputDocument document = new SolrInputDocument();
883        
884        // Prepare resource doc
885        _populatePageAttachmentDocument(resource, document, page);
886        
887        // Indexation of the document
888        _indexResourceDocument(resource, document, solrClient);
889    }
890    
891    private void _populatePageAttachmentDocument(Resource resource, SolrInputDocument document, Page page) throws Exception
892    {
893        String language = page.getSitemapName();
894        
895        _solrResourceIndexer.indexResource(resource, document, TYPE_PAGE_RESOURCE, language);
896        
897        Site site = page.getSite();
898        // site name - Store.YES, Index.NOT_ANALYZED
899        document.addField(SolrWebFieldNames.SITE_NAME, site.getName());
900        
901        // site type - Store.YES, Index.NOT_ANALYZED
902        document.addField(SolrWebFieldNames.SITE_TYPE, site.getType());
903        
904        // Added for Solr.
905        // Page site map name - Store.YES, Index.NOT_ANALYZED
906        document.addField(SITEMAP_NAME, page.getSitemapName());
907        
908        // Need the id of the page for unindexing attachment during the unindexing of the page
909        document.addField(ATTACHMENT_PAGE_ID, page.getId());
910    }
911    
912    /**
913     * Index a populated solr input document of type Page.
914     * @param page the page from which the input document is created
915     * @param document the input document to add to the solr index
916     * @param workspaceName The workspace name
917     * @param solrClient The solr client to use
918     * @throws SolrServerException if there is an error on the Solr server
919     * @throws IOException if there is a communication error with the server
920     */
921    protected void _indexPageDocument(Page page, SolrInputDocument document, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException
922    {
923        // Retrieve appropriate solr client
924        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
925        
926        // Add document
927        UpdateResponse solrResponse = solrClient.add(collectionName, document);
928        int status = solrResponse.getStatus();
929        
930        if (status != 0)
931        {
932            throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + page.getId());
933        }
934        
935        getLogger().debug("Successful page indexing. Page identifier : {}", page.getId());
936    }
937    
938    /**
939     * Index a populated solr input document of type Resource.
940     * @param resource the resource from which the input document is created
941     * @param document the input document
942     * @param solrClient The solr client to use
943     * @throws SolrServerException if there is an error on the server
944     * @throws IOException if there is a communication error with the server
945     */
946    protected void _indexResourceDocument(Resource resource, SolrInputDocument document, SolrClient solrClient) throws SolrServerException, IOException
947    {
948        // Retrieve appropriate solr client
949        Request request = ContextHelper.getRequest(_context);
950        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
951        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
952        
953        // Add document
954        UpdateResponse solrResponse = solrClient.add(collectionName, document);
955        int status = solrResponse.getStatus();
956        
957        if (status != 0)
958        {
959            throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Resource id : " + resource.getId());
960        }
961        
962        getLogger().debug("Successful resource indexing. Resource identifier : {}", resource.getId());
963    }
964    
965    ///////////////////////////////////////////////////////////////////////////
966    
967    /**
968     * Un-index a page by its ID  for all workspaces and commit
969     * @param pageId The page ID.
970     * @param unindexRecursively also unindex child pages if requested.
971     * @param unindexAttachments also unindex page attachments
972     * @throws Exception if an error occurs during index update.
973     */
974    public void unindexPage(String pageId, boolean unindexRecursively, boolean unindexAttachments) throws Exception
975    {
976        unindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, unindexRecursively, unindexAttachments);
977        unindexPage(pageId, WebConstants.LIVE_WORKSPACE, unindexRecursively, unindexAttachments);
978    }
979    
980    /**
981     * De-index a page (and optionally its children pages).
982     * @param pageId the page to be de-indexed.
983     * @param workspaceName The workspace where to work in 
984     * @param unindexRecursively also unindex child pages if requested.
985     * @param unindexAttachments also unindex page attachments
986     * @throws Exception if an error occurs during index update.
987     */
988    public void unindexPage(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws Exception
989    {
990        Request request = ContextHelper.getRequest(_context);
991        
992        // Retrieve the current workspace.
993        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
994        // Retrieve the current site name.
995        String currentSiteName = (String) request.getAttribute("siteName");
996        
997        try
998        {
999            // Force the workspace.
1000            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
1001    
1002            getLogger().debug("Unindexing page: {}", pageId);
1003            
1004            _unindexPageDocument(pageId, workspaceName, unindexRecursively, unindexAttachments);
1005        }
1006        catch (Exception e)
1007        {
1008            String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName);
1009            getLogger().error(error, e);
1010            throw new IndexingException(error, e);
1011        }
1012        finally
1013        {
1014            // Restore the site name.
1015            request.setAttribute("siteName", currentSiteName);
1016            // Restore context
1017            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
1018        }
1019    }
1020    
1021    /**
1022     * Deindex a document of type Page. Also deindex attachments of a page
1023     * @param pageId the id of the page to deindex
1024     * @param workspaceName The workspace name
1025     * @param unindexRecursively also unindex child pages if requested.
1026     * @param unindexAttachments also unindex page attachments
1027     * @throws SolrServerException if there is an error on the server
1028     * @throws IOException if there is a communication error with the server
1029     * @throws QuerySyntaxException if the uri query can't be built because of a syntax error.
1030     */
1031    protected void _unindexPageDocument(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws SolrServerException, IOException, QuerySyntaxException
1032    {
1033        // Retrieve appropriate solr client
1034        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
1035        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
1036        
1037        getLogger().info("Unindexing page {} in workspace '{}'", pageId, workspaceName);
1038        
1039        Query pages = new AndQuery(new DocumentTypeQuery(TYPE_PAGE), new PageQuery(pageId, unindexRecursively));
1040        Query query;
1041        if (unindexRecursively && unindexAttachments)
1042        {
1043            // {!ametys join=pageId q=page-ancestorIds:"page://xxxx"}
1044            Query joinQuery = new JoinQuery(() -> PAGE_ANCESTOR_IDS + ":\"" + pageId + "\"", ATTACHMENT_PAGE_ID);
1045            Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new OrQuery(new PageAttachmentQuery(pageId), joinQuery));
1046            query = new OrQuery(attachments, pages);
1047        }
1048        else if (unindexAttachments)
1049        {
1050            Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new PageAttachmentQuery(pageId));
1051            query = new OrQuery(attachments, pages);
1052        }
1053        else
1054        {
1055            query = pages;
1056        }
1057        
1058        // Delete by query
1059        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query.build());
1060        int status = solrResponse.getStatus();
1061        
1062        if (status != 0)
1063        {
1064            throw new IOException("Ametys Page de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + pageId);
1065        }
1066        
1067        getLogger().debug("Successful page de-indexing{}. Page identifier : {}", unindexRecursively ? " with its children" : "", pageId);
1068    }
1069    
1070    ///////////////////////////////////////////////////////////////////////////
1071    
1072    /**
1073     * Reindex a page by its ID for all workspaces and commit
1074     * @param pageId The page ID.
1075     * @param reindexRecursively also reindex child pages if requested.
1076     * @param reindexAttachments also reindex page attachments
1077     * @throws Exception if an error occurs during index update.
1078     */
1079    public void reindexPage(String pageId, boolean reindexRecursively, boolean reindexAttachments) throws Exception
1080    {
1081        reindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, reindexRecursively, reindexAttachments);
1082        reindexPage(pageId, WebConstants.LIVE_WORKSPACE, reindexRecursively, reindexAttachments);
1083    }
1084  
1085    
1086    /**
1087     * Reindex a page by its ID.
1088     * @param pageId The page ID.
1089     * @param workspaceName The workspace where to work in 
1090     * @param reindexRecursively also reindex child pages if requested.
1091     * @param reindexAttachments also reindex page attachments
1092     * @throws IndexingException if an error occurs during index update.
1093     */
1094    public void reindexPage(String pageId, String workspaceName, boolean reindexRecursively, boolean reindexAttachments) throws IndexingException
1095    {
1096        Request request = ContextHelper.getRequest(_context);
1097        
1098        // Retrieve the current workspace.
1099        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
1100        // Retrieve the current site name.
1101        String currentSiteName = (String) request.getAttribute("siteName");
1102        
1103        try
1104        {
1105            // Force the workspace.
1106            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
1107    
1108            getLogger().debug("Reindexing page: {}", pageId);
1109            
1110            if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist
1111            {
1112                Page page = _ametysObjectResolver.resolveById(pageId);
1113                _unindexPageDocument(pageId, workspaceName, reindexRecursively, reindexAttachments);
1114                SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
1115                _indexPage(page, workspaceName, reindexRecursively, reindexAttachments, solrClient);
1116            }
1117        }
1118        catch (AmetysRepositoryException | QuerySyntaxException | SolrServerException | IOException e)
1119        {
1120            String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName);
1121            getLogger().error(error, e);
1122            throw new IndexingException(error, e);
1123        }
1124        finally
1125        {
1126            // Restore the site name.
1127            request.setAttribute("siteName", currentSiteName);
1128            // Restore context
1129            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
1130        }
1131    }
1132}