Source code

001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.indexing.solr;
017
018import java.io.IOException;
019import java.time.ZoneOffset;
020import java.time.ZonedDateTime;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.HashSet;
025import java.util.List;
026import java.util.Map;
027import java.util.Optional;
028import java.util.Set;
029import java.util.concurrent.Future;
030import java.util.function.Function;
031import java.util.stream.Collectors;
032
033import org.apache.avalon.framework.component.Component;
034import org.apache.avalon.framework.context.Context;
035import org.apache.avalon.framework.context.ContextException;
036import org.apache.avalon.framework.context.Contextualizable;
037import org.apache.avalon.framework.service.ServiceException;
038import org.apache.avalon.framework.service.ServiceManager;
039import org.apache.avalon.framework.service.Serviceable;
040import org.apache.cocoon.Constants;
041import org.apache.cocoon.components.ContextHelper;
042import org.apache.cocoon.environment.Request;
043import org.apache.commons.lang3.ArrayUtils;
044import org.apache.commons.lang3.LocaleUtils;
045import org.apache.solr.client.solrj.SolrClient;
046import org.apache.solr.client.solrj.SolrServerException;
047import org.apache.solr.client.solrj.response.UpdateResponse;
048import org.apache.solr.common.SolrInputDocument;
049import org.apache.solr.common.SolrInputField;
050
051import org.ametys.cms.content.indexing.solr.SolrFieldNames;
052import org.ametys.cms.content.indexing.solr.SolrIndexer;
053import org.ametys.cms.content.indexing.solr.SolrResourceIndexer;
054import org.ametys.cms.contenttype.ContentTypesHelper;
055import org.ametys.cms.data.ContentValue;
056import org.ametys.cms.data.type.indexing.IndexableElementType;
057import org.ametys.cms.data.type.indexing.IndexableElementTypeHelper;
058import org.ametys.cms.indexing.IndexingException;
059import org.ametys.cms.indexing.solr.AbstractIndexerCallable;
060import org.ametys.cms.indexing.solr.AdditionalPropertyIndexer;
061import org.ametys.cms.indexing.solr.AdditionalPropertyIndexerExtensionPoint;
062import org.ametys.cms.indexing.solr.IndexationResult;
063import org.ametys.cms.indexing.solr.ThreadIndexerHelper;
064import org.ametys.cms.model.CMSDataContext;
065import org.ametys.cms.repository.Content;
066import org.ametys.cms.search.query.AndQuery;
067import org.ametys.cms.search.query.DocumentTypeQuery;
068import org.ametys.cms.search.query.JoinQuery;
069import org.ametys.cms.search.query.OrQuery;
070import org.ametys.cms.search.query.Query;
071import org.ametys.cms.search.query.QuerySyntaxException;
072import org.ametys.cms.search.solr.SolrClientProvider;
073import org.ametys.cms.search.systemprop.FirstValidationSystemProperty;
074import org.ametys.cms.search.systemprop.LastMajorValidationSystemProperty;
075import org.ametys.cms.search.systemprop.LastModifiedSystemProperty;
076import org.ametys.cms.search.systemprop.LastValidationSystemProperty;
077import org.ametys.cms.search.systemprop.TagsSystemProperty;
078import org.ametys.cms.tag.Tag;
079import org.ametys.cms.tag.TagHelper;
080import org.ametys.cms.tag.TagProviderExtensionPoint;
081import org.ametys.core.util.DateUtils;
082import org.ametys.plugins.explorer.resources.Resource;
083import org.ametys.plugins.explorer.resources.ResourceCollection;
084import org.ametys.plugins.repository.AmetysObject;
085import org.ametys.plugins.repository.AmetysObjectIterable;
086import org.ametys.plugins.repository.AmetysObjectResolver;
087import org.ametys.plugins.repository.AmetysRepositoryException;
088import org.ametys.plugins.repository.RepositoryConstants;
089import org.ametys.plugins.repository.data.holder.ModelAwareDataHolder;
090import org.ametys.plugins.repository.data.holder.group.ModelAwareComposite;
091import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeater;
092import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeaterEntry;
093import org.ametys.plugins.repository.model.CompositeDefinition;
094import org.ametys.plugins.repository.model.RepeaterDefinition;
095import org.ametys.plugins.repository.model.RepositoryDataContext;
096import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector;
097import org.ametys.runtime.model.ElementDefinition;
098import org.ametys.runtime.model.ModelItem;
099import org.ametys.runtime.model.type.DataContext;
100import org.ametys.runtime.model.type.ElementType;
101import org.ametys.runtime.model.type.ModelItemTypeConstants;
102import org.ametys.runtime.plugin.component.AbstractLogEnabled;
103import org.ametys.web.WebConstants;
104import org.ametys.web.indexing.solr.page.attachment.PageVisibleAttachmentIndexerExtensionPoint;
105import org.ametys.web.repository.page.Page;
106import org.ametys.web.repository.page.Page.PageType;
107import org.ametys.web.repository.page.Zone;
108import org.ametys.web.repository.page.ZoneItem;
109import org.ametys.web.repository.page.ZoneItem.ZoneType;
110import org.ametys.web.repository.site.Site;
111import org.ametys.web.repository.sitemap.Sitemap;
112import org.ametys.web.search.query.PageAttachmentQuery;
113import org.ametys.web.search.query.PageQuery;
114import org.ametys.web.service.Service;
115import org.ametys.web.service.ServiceExtensionPoint;
116
117/**
118 * Component responsible for indexing a page with all its contents.
119 */
120public class SolrPageIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrWebFieldNames, Contextualizable
121{
122    /** The avalon role. */
123    public static final String ROLE = SolrPageIndexer.class.getName();
124    
125    /** The service manager. */
126    protected ServiceManager _manager;
127    /** The Solr client provider */
128    protected SolrClientProvider _solrClientProvider;
129    /** The Solr indexer */
130    protected SolrIndexer _solrIndexer;
131    /** Solr Ametys resources indexer */
132    protected SolrResourceIndexer _solrResourceIndexer;
133    /** The extension point for PageVisibleAttachmentIndexers */
134    protected PageVisibleAttachmentIndexerExtensionPoint _pageVisibleAttachmentIndexerEP;
135    /** The additional property indexer extension point. */
136    protected AdditionalPropertyIndexerExtensionPoint _additionalPropertiesIndexerEP;
137    /** The tag provider extension point. */
138    protected TagProviderExtensionPoint _tagProviderEP;
139    /** The content types helper */
140    protected ContentTypesHelper _cTypesHelper;
141    /** The thread indexer helper */
142    protected ThreadIndexerHelper _threadIndexerHelper;
143    
144    /** The service extension point. */
145    protected ServiceExtensionPoint _serviceExtensionPoint;
146    /** The Ametys object resolver*/
147    protected AmetysObjectResolver _ametysObjectResolver;
148    /** The avalon context */
149    protected Context _context;
150    /** Cocoon Context */
151    protected org.apache.cocoon.environment.Context _cocoonContext;
152    
153    @Override
154    public void service(ServiceManager manager) throws ServiceException
155    {
156        _manager = manager;
157        _ametysObjectResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE);
158        _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE);
159        _solrResourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE);
160        _pageVisibleAttachmentIndexerEP = (PageVisibleAttachmentIndexerExtensionPoint) manager.lookup(PageVisibleAttachmentIndexerExtensionPoint.ROLE);
161        _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE);
162        _serviceExtensionPoint = (ServiceExtensionPoint) manager.lookup(ServiceExtensionPoint.ROLE);
163        _additionalPropertiesIndexerEP = (AdditionalPropertyIndexerExtensionPoint) manager.lookup(AdditionalPropertyIndexerExtensionPoint.ROLE);
164        _tagProviderEP = (TagProviderExtensionPoint) manager.lookup(TagProviderExtensionPoint.ROLE);
165        _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE);
166        _threadIndexerHelper = (ThreadIndexerHelper) manager.lookup(ThreadIndexerHelper.ROLE);
167    }
168    
169    public void contextualize(Context context) throws ContextException
170    {
171        _context = context;
172        _cocoonContext = (org.apache.cocoon.environment.Context) context.get(Constants.CONTEXT_ENVIRONMENT_CONTEXT);
173    }
174    
175    /**
176     * Index a page and eventually its children, recursively, in all workspaces and commit<br>
177     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
178     * @param pageId the page to be indexed.
179     * @param indexRecursively to also process children pages.
180     * @param indexAttachments to index page attachments
181     * @throws Exception if an error occurs during indexation.
182     */
183    public void indexPage(String pageId, boolean indexRecursively, boolean indexAttachments) throws Exception
184    {
185        indexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, indexRecursively, indexAttachments);
186        indexPage(pageId, WebConstants.LIVE_WORKSPACE, indexRecursively, indexAttachments);
187    }
188    
189    /**
190     * Index a page and eventually its children, recursively.<br>
191     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
192     * @param pageId the page to be indexed.
193     * @param workspaceName the workspace where to index
194     * @param indexRecursively to also process children pages.
195     * @param indexAttachments to index page attachments
196     * @throws IndexingException if an error occurs during indexation.
197     */
198    public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments) throws IndexingException
199    {
200        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true);
201        indexPage(pageId, workspaceName, indexRecursively, indexAttachments, solrClient);
202    }
203    
204    /**
205     * Index a page and eventually its children, recursively.<br>
206     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
207     * @param pageId the page to be indexed.
208     * @param workspaceName the workspace where to index
209     * @param indexRecursively to also process children pages.
210     * @param indexAttachments to index page attachments
211     * @param solrClient The solr client to use
212     * @throws IndexingException if an error occurs during indexation.
213     */
214    public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException
215    {
216        Request request = ContextHelper.getRequest(_context);
217        
218        // Retrieve the current workspace.
219        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
220        // Retrieve the current site name.
221        String currentSiteName = (String) request.getAttribute("siteName");
222        
223        try
224        {
225            // Force the workspace.
226            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
227    
228            getLogger().debug("Indexing page: {}", pageId);
229            
230            if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist
231            {
232                Page page = _ametysObjectResolver.resolveById(pageId);
233                _indexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient);
234            }
235        }
236        catch (AmetysRepositoryException e)
237        {
238            String error = String.format("Failed to index page %s in workspace %s", pageId, workspaceName);
239            getLogger().error(error, e);
240            throw new IndexingException(error, e);
241        }
242        finally
243        {
244            // Restore the site name.
245            request.setAttribute("siteName", currentSiteName);
246            // Restore context
247            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
248        }
249    }
250    
251    private void _indexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException
252    {
253        // Add callable for each page to index
254        List<Future<Void>> tasks = _asyncIndexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient);
255        
256        // Now that everything is submitted, we can iterate and wait for result
257        IndexationResult.fromTasks(tasks, getLogger());
258    }
259    
260    private List<Future<Void>> _asyncIndexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException
261    {
262        List<Future<Void>> tasks = new ArrayList<>();
263
264        if (page.isIndexable())
265        {
266            _threadIndexerHelper.submitCallable(new PageIndexerCallable(page, workspaceName, indexAttachments, solrClient));
267        }
268        else
269        {
270            getLogger().debug("Not indexing page: {} in workspace '{}'", page, workspaceName);
271        }
272        
273        if (indexRecursively)
274        {
275            AmetysObjectIterable<? extends Page> children = page.getChildrenPages();
276            for (Page child : children)
277            {
278                // FIXME index child pages if (and only if) not indexed... see original source.
279//                indexPage(child, false, indexRecursively);
280//                indexPage(child, false);
281                tasks.addAll(_asyncIndexPage(child, workspaceName, indexRecursively, indexAttachments, solrClient));
282            }
283        }
284        
285        return tasks;
286    }
287    
288    /**
289     * Populate the solr input document by adding fields to index.
290     * @param page the page to index.
291     * @param document the solr input document
292     * @throws Exception if something goes wrong when processing the indexation of the page
293     */
294    protected void _populatePageDocument(Page page, SolrInputDocument document) throws Exception
295    {
296        Sitemap sitemap = page.getSitemap();
297        String sitemapName = sitemap.getName();
298        Site site = page.getSite();
299        String siteName = site.getName();
300        String pageId = page.getId();
301        String pageTitle = page.getTitle();
302        String pageLongTitle = page.getLongTitle();
303        String language = sitemapName;
304        
305        // Page id and type
306        document.addField(SolrFieldNames.ID, pageId);
307        document.addField(SolrFieldNames.DOCUMENT_TYPE, SolrWebFieldNames.TYPE_PAGE);
308        
309        // Fulltext
310        CMSDataContext context = CMSDataContext.newInstance()
311                                               .withLocale(LocaleUtils.toLocale(language));
312        IndexableElementTypeHelper.indexFulltextValue(document, pageTitle, context);
313        if (!pageTitle.equals(pageLongTitle))
314        {
315            IndexableElementTypeHelper.indexFulltextValue(document, pageLongTitle, context);
316        }
317        
318        // Page title
319        _indexStringFields(document, pageId, PAGE_TITLE, pageTitle, language);
320        // Page long title
321        _indexStringFields(document, pageId, PAGE_LONG_TITLE, pageLongTitle, language);
322        // Title for sorting
323        document.addField(TITLE_SORT, pageTitle);
324        
325        document.addField(TEMPLATE, page.getTemplate());
326        document.addField(PAGE_TYPE, page.getType().name());
327        document.addField(PAGE_DEPTH, page.getDepth());
328        
329        // Contents (page title shoud be indexed before because the main content can override it).
330        _populatePageContentsDocument(page, document);
331        
332        // Parent of the page
333        AmetysObject parent = page.getParent();
334        if (parent != null)
335        {
336            document.addField(PAGE_PARENT_ID, parent.getId());
337        }
338
339        // Ancestors of the page
340        List<String> ancestorIds = new ArrayList<>();
341        while (parent instanceof Page)
342        {
343            ancestorIds.add(parent.getId());
344            parent = parent.getParent();
345        }
346        document.addField(PAGE_ANCESTOR_IDS, ancestorIds);
347        
348        document.addField(SITE_NAME, siteName);
349        document.addField(SITEMAP_NAME, sitemapName);
350        document.addField(SITE_TYPE, site.getType());
351        
352        // Page tags (strict and tags including ancestor pages).
353        Set<String> tags = page.getTags()
354                .stream()
355                .filter(tagName -> _tagProviderEP.hasTag(tagName, Map.of("siteName", page.getSiteName())))
356                .collect(Collectors.toSet());
357        document.addField(TagsSystemProperty.TAGS_SOLR_FIELD_NAME, tags);
358        document.addField(TagsSystemProperty.ALL_TAGS_SOLR_FIELD_NAME, _getTagsWithAncestors(page));
359        
360        _populateDatesOfPage(page, document);
361        
362        // Attachments
363        _solrResourceIndexer.indexResourceCollection(page.getRootAttachments(), document, language);
364        Optional.ofNullable(page.getRootAttachments())
365                .map(AmetysObject::getId)
366                .ifPresent(id -> document.addField(PAGE_OUTGOING_REFEERENCES_RESOURCE_IDS, id));
367        _indexVisibleAttachments(page, document);
368    }
369    
370    private void _indexVisibleAttachments(Page page, SolrInputDocument document)
371    {
372        Collection<String> values = _pageVisibleAttachmentIndexerEP.getExtensionsIds()
373                .stream()
374                .map(_pageVisibleAttachmentIndexerEP::getExtension)
375                .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(page))
376                .flatMap(Collection::stream)
377                .collect(Collectors.toList());
378        document.addField(PAGE_VISIBLE_ATTACHMENT_RESOURCE_IDS, values);
379    }
380    
381    /**
382     * Populate the solr input document with dates from the page
383     * @param page The page
384     * @param document The Solr document
385     */
386    protected void _populateDatesOfPage(Page page, SolrInputDocument document)
387    {
388        // Page last modification date
389        ZonedDateTime lastModified = _getLastModificationDate(page);
390        if (lastModified != null)
391        {
392            String lastModifiedStr = DateUtils.zonedDateTimeToString(lastModified, ZoneOffset.UTC);
393            // For 'new' search service
394            document.addField(LastModifiedSystemProperty.SOLR_FIELD_NAME, lastModifiedStr);
395            // For 'old' search service
396            document.addField(LastModifiedSystemProperty.SOLR_FIELD_NAME + "_dt", lastModifiedStr);
397        }
398        
399        // Page last validation date
400        ZonedDateTime lastValidation = _getLastValidationDate(page);
401        if (lastValidation != null)
402        {
403            String lastValidationStr = DateUtils.zonedDateTimeToString(lastValidation, ZoneOffset.UTC);
404            // For 'new' search service
405            document.addField(LastValidationSystemProperty.SOLR_FIELD_NAME, lastValidationStr);
406        }
407        
408        // Page first validation date
409        ZonedDateTime firstValidation = _getFirstValidationDate(page);
410        if (firstValidation != null)
411        {
412            String firstValidationStr = DateUtils.zonedDateTimeToString(firstValidation, ZoneOffset.UTC);
413            // For 'new' search service
414            document.addField(FirstValidationSystemProperty.SOLR_FIELD_NAME, firstValidationStr);
415        }
416        
417        // Page last major validation date
418        ZonedDateTime lastMajorValidation = _getLastMajorValidationDate(page);
419        if (lastMajorValidation != null)
420        {
421            String lastMajorValidationStr = DateUtils.zonedDateTimeToString(lastMajorValidation, ZoneOffset.UTC);
422            // For 'new' search service
423            document.addField(LastMajorValidationSystemProperty.SOLR_FIELD_NAME, lastMajorValidationStr);
424        }
425        
426        // date for sorting
427        SolrInputField dateField = document.getField(DATE_FOR_SORTING);
428        if (dateField == null)
429        {
430            Collection<Object> oDateValues = document.getFieldValues(CONTENT_INTERESTING_DATES);
431            if (oDateValues != null && !oDateValues.isEmpty())
432            {
433                document.setField(DATE_FOR_SORTING, oDateValues.iterator().next());
434            }
435        }
436    }
437    
438    private void _indexStringFields(SolrInputDocument document, String documentId, String fieldName, String fieldValue, String language)
439    {
440        String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(fieldValue, getLogger(), documentId, fieldName);
441        
442        document.addField(fieldName, possiblyTruncatedValue);
443        document.addField(fieldName + "_txt_" + language, fieldValue);
444        document.addField(fieldName + "_txt_stemmed_" + language, fieldValue);
445        document.addField(fieldName + "_txt_ws_" + language, fieldValue);
446
447        document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase());
448        document.addField(fieldName + "_s_ws", fieldValue.toLowerCase());
449        document.addField(fieldName + "_txt", fieldValue);
450    }
451    /**
452     * Get all the page tags with their ancestors.
453     * @param page The page.
454     * @return All the page tags with their ancestors.
455     */
456    protected Set<String> _getTagsWithAncestors(Page page)
457    {
458        Set<String> allTags = new HashSet<>(page.getTags());
459        
460        Map<String, Object> tagParams = Map.of("siteName", page.getSiteName());
461        
462        for (String tagName : page.getTags())
463        {
464            allTags.add(tagName);
465            
466            // Get the ancestor tags
467            Tag tag = _tagProviderEP.getTag(tagName, tagParams);
468            for (Tag ancestor : TagHelper.getAncestors(tag, false))
469            {
470                allTags.add(ancestor.getName());
471            }
472        }
473        
474        return allTags;
475    }
476    
477    /**
478     * Index the content of the page.<p>
479     * @param page the page to index.
480     * @param document the document to populate.
481     * @throws Exception if an error occurs.
482     */
483    protected void _populatePageContentsDocument(Page page, SolrInputDocument document) throws Exception
484    {
485        if (page.getType() == PageType.CONTAINER)
486        {
487            for (Zone zone : page.getZones())
488            {
489                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
490                for (ZoneItem zoneItem : zoneItems)
491                {
492                    if (zoneItem.getType() == ZoneType.CONTENT)
493                    {
494                        try
495                        {
496                            Content content = zoneItem.getContent();
497                            document.addField(CONTENT_IDS, content.getId());
498                            
499                            for (String cType : content.getTypes())
500                            {
501                                document.addField(PAGE_CONTENT_TYPES, cType);
502                                document.addField(PAGE_CONTENT_TYPES + "_s_dv", cType); // facets
503                            }
504                            
505                            _indexFacetableFields(content, document);
506                        }
507                        catch (AmetysRepositoryException e)
508                        {
509                            getLogger().error("Failed to index content referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
510                        }
511                    }
512                    else if (zoneItem.getType() == ZoneType.SERVICE)
513                    {
514                        try
515                        {
516                            String serviceId = zoneItem.getServiceId();
517                            document.addField(SERVICE_IDS, serviceId);
518
519                            Service service = _serviceExtensionPoint.getExtension(serviceId);
520                            if (service == null)
521                            {
522                                getLogger().error("The service id '{}' does not exist. It is referenced in the page {}/{}/{} ({} in zoneitem {})", serviceId, page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId());
523                            }
524                            else
525                            {
526                                service.index(zoneItem, document);
527                            }
528                        }
529                        catch (AmetysRepositoryException e)
530                        {
531                            getLogger().error("Failed to index service referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
532                        }
533                        
534                    }
535                }
536            }
537        }
538    }
539    
540    /**
541     * Index the facetable fields of a content into the page solr document
542     * @param content The content
543     * @param document The main page solr document.
544     */
545    protected void _indexFacetableFields(Content content, SolrInputDocument document)
546    {
547        try
548        {
549            String[] allContentTypes = ArrayUtils.addAll(content.getTypes(), content.getMixinTypes());
550            for (ModelItem modelItem : _cTypesHelper.getModelItems(allContentTypes))
551            {
552                DataContext context = RepositoryDataContext.newInstance()
553                                                           .withObject(content);
554                
555                Optional.ofNullable(content.getLanguage())
556                        .map(LocaleUtils::toLocale)
557                        .ifPresent(context::withLocale);
558                
559                _findAndIndexFacetableField(document, content, modelItem, context);
560            }
561        }
562        catch (IllegalArgumentException e)
563        {
564            getLogger().error("indexContent > Error getting the model items of content " + content.getId(), e);
565            throw new RuntimeException("indexContent > Error getting the model items of content " + content.getId(), e);
566        }
567    }
568    
569    /**
570     * Index the facetable fields of a data holder into the page solr document
571     * @param pageDocument The Solr page document
572     * @param dataHolder the parent data holder
573     * @param modelItem the model item
574     * @param context the context of the data to index
575     */
576    protected void _findAndIndexFacetableField(SolrInputDocument pageDocument, ModelAwareDataHolder dataHolder, ModelItem modelItem, DataContext context)
577    {
578        String dataName = modelItem.getName();
579        if (dataHolder.hasValue(dataName))
580        {
581            if (modelItem instanceof ElementDefinition elementDefinition)
582            {
583                DataContext newContext = context.cloneContext()
584                                                .addSegmentToDataPath(dataName);
585               
586                Collection<String> values = _getValuesToIndex(dataHolder, elementDefinition, newContext);
587                for (String value : values)
588                {
589                    pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + dataName + "_s_dv", value);
590                }
591            }
592            else if (modelItem instanceof RepeaterDefinition repeaterDefinition)
593            {
594                ModelAwareRepeater repeater = dataHolder.getRepeater(dataName);
595                for (ModelAwareRepeaterEntry entry : repeater.getEntries())
596                {
597                    DataContext newContext = context.cloneContext()
598                                                    .addSegmentToDataPath(dataName + "[" + entry.getPosition() + "]");
599                    
600                    for (ModelItem child : repeaterDefinition.getModelItems())
601                    {
602                        _findAndIndexFacetableField(pageDocument, entry, child, newContext);
603                    }
604                }
605            }
606            else if (modelItem instanceof CompositeDefinition compositeDefinition)
607            {
608                ModelAwareComposite composite = dataHolder.getComposite(dataName);
609                DataContext newContext = context.cloneContext()
610                                                .addSegmentToDataPath(dataName);
611                
612                for (ModelItem child : compositeDefinition.getModelItems())
613                {
614                    _findAndIndexFacetableField(pageDocument, composite, child, newContext);
615                }
616            }
617        }
618    }
619    
620    /**
621     * Retrieves the values to index if the field is facetable, or an empty collection
622     * @param dataHolder the data holder
623     * @param elementDefinition the definition of the field
624     * @param context the context of the data to index
625     * @return the values to index if the field is facetable, or an empty collection
626     */
627    protected Collection<String> _getValuesToIndex(ModelAwareDataHolder dataHolder, ElementDefinition elementDefinition, DataContext context)
628    {
629        String dataName = elementDefinition.getName();
630        ElementType type = elementDefinition.getType();
631        if (type instanceof IndexableElementType indexingElementType)
632        {
633            if (ModelItemTypeConstants.STRING_TYPE_ID.equals(type.getId()) && indexingElementType.isFacetable(context))
634            {
635                Object value = dataHolder.getValue(dataName, true);
636                if (value instanceof String[] stringValues)
637                {
638                    return Arrays.asList(stringValues);
639                }
640                else if (value instanceof String stringValue)
641                {
642                    return List.of(stringValue);
643                }
644            }
645            else if (org.ametys.cms.data.type.ModelItemTypeConstants.CONTENT_ELEMENT_TYPE_ID.equals(type.getId()))
646            {
647                Object value = dataHolder.getValue(dataName, true);
648                if (value instanceof ContentValue[] contentValues)
649                {
650                    return Arrays.stream(contentValues)
651                            .map(ContentValue::getContentId)
652                            .collect(Collectors.toList());
653                }
654                else if (value instanceof ContentValue contentValue)
655                {
656                    return List.of(contentValue.getContentId());
657                }
658            }
659        }
660        
661        return List.of();
662    }
663    
664    /**
665     * Computes the last modification date of a page.
666     * @param page the page.
667     * @return the last modification date or <code>null</code>.
668     */
669    protected ZonedDateTime _getLastModificationDate(Page page)
670    {
671        return _getLastDate(page, Content::getLastModified);
672    }
673    /**
674     * Computes the first validation date of a page.
675     * @param page the page.
676     * @return the first validation date or <code>null</code>.
677     */
678    protected ZonedDateTime _getFirstValidationDate(Page page)
679    {
680        return _getFirstDate(page, Content::getFirstValidationDate);
681    }
682
683    /**
684     * Computes the last validation date of a page.
685     * @param page the page.
686     * @return the last validation date or <code>null</code>.
687     */
688    protected ZonedDateTime _getLastValidationDate(Page page)
689    {
690        return _getLastDate(page, Content::getLastValidationDate);
691    }
692    
693    /**
694     * Computes the last major validation date of a page.
695     * @param page the page.
696     * @return the last major validation date or <code>null</code>.
697     */
698    protected ZonedDateTime _getLastMajorValidationDate(Page page)
699    {
700        return _getLastDate(page, Content::getLastMajorValidationDate);
701    }
702    
703    /**
704     * Computes a "last date" of a page, using the simple and naive following algorithm:
705     * <br>From all the dates from each of its contents, keep the greatest of them.
706     * @param page the page.
707     * @param dateRetriever The function to retrieve a Date from a Content of the Page
708     * @return the "last date" or <code>null</code>.
709     */
710    protected ZonedDateTime _getLastDate(Page page, Function<Content, ZonedDateTime> dateRetriever)
711    {
712        ZonedDateTime last = null;
713
714        if (page.getType() == PageType.CONTAINER)
715        {
716            for (Zone zone : page.getZones())
717            {
718                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
719                for (ZoneItem zoneItem : zoneItems)
720                {
721                    switch (zoneItem.getType())
722                    {
723                        case SERVICE:
724                            // A service has no last date
725                            break;
726                        case CONTENT:
727                            try
728                            {
729                                ZonedDateTime contentLast = dateRetriever.apply(zoneItem.getContent());
730
731                                if (contentLast != null && (last == null || contentLast.isAfter(last)))
732                                {
733                                    // Keep the latest date
734                                    last = contentLast;
735                                }
736                            }
737                            catch (AmetysRepositoryException e)
738                            {
739                                getLogger().error("Failed to index last date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
740                            }
741                            break;
742                        default:
743                            break;
744                    }
745                }
746            }
747        }
748        
749        return last;
750    }
751    
752    /**
753     * Computes a "first date" of a page, using the simple and naive following algorithm:
754     * <br>From all the dates from each of its contents, keep the lowest of them.
755     * @param page the page.
756     * @param dateRetriever The function to retrieve a Date from a Content of the Page
757     * @return the "first date" or <code>null</code>.
758     */
759    protected ZonedDateTime _getFirstDate(Page page, Function<Content, ZonedDateTime> dateRetriever)
760    {
761        ZonedDateTime first = null;
762        
763        if (page.getType() == PageType.CONTAINER)
764        {
765            for (Zone zone : page.getZones())
766            {
767                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
768                for (ZoneItem zoneItem : zoneItems)
769                {
770                    switch (zoneItem.getType())
771                    {
772                        case SERVICE:
773                            // A service has no first date
774                            break;
775                        case CONTENT:
776                            try
777                            {
778                                ZonedDateTime contentFirst = dateRetriever.apply(zoneItem.getContent());
779                                
780                                if (contentFirst != null && (first == null || contentFirst.isBefore(first)))
781                                {
782                                    // Keep the lowest date
783                                    first = contentFirst;
784                                }
785                            }
786                            catch (AmetysRepositoryException e)
787                            {
788                                getLogger().error("Failed to index first date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
789                            }
790                            break;
791                        default:
792                            break;
793                    }
794                }
795            }
796        }
797        
798        return first;
799    }
800    
801    /**
802     * Populate the solr input document by adding fields to index.
803     * @param page the page to index.
804     * @param document the solr input document
805     * @throws Exception if something goes wrong when processing the indexation of the page
806     */
807    protected void _populateAdditionalProperties(Page page, SolrInputDocument document) throws Exception
808    {
809        Collection<AdditionalPropertyIndexer> indexers = _additionalPropertiesIndexerEP.getIndexers("page");
810        for (AdditionalPropertyIndexer indexer : indexers)
811        {
812            indexer.index(page, document);
813        }
814    }
815    
816    /**
817     * Index page attachments as new entries in the index.
818     * @param collection the collection of attachments
819     * @param page the page whose attachments will be indexed
820     * @throws Exception if something goes wrong when indexing the attachments of the page
821     */
822    public void indexPageAttachments(ResourceCollection collection, Page page) throws Exception
823    {
824        if (page.isIndexable())
825        {
826            Request request = ContextHelper.getRequest(_context);
827            String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
828            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
829            _indexPageAttachments(collection, page, solrClient);
830        }
831    }
832    
833    private void _indexPageAttachments(ResourceCollection collection, Page page, SolrClient solrClient) throws Exception
834    {
835        if (collection == null)
836        {
837            return;
838        }
839        
840        AmetysObjectIterable<AmetysObject> children = collection.getChildren();
841        for (AmetysObject object : children)
842        {
843            if (object instanceof ResourceCollection)
844            {
845                _indexPageAttachments((ResourceCollection) object, page, solrClient);
846            }
847            else if (object instanceof Resource)
848            {
849                Resource resource = (Resource) object;
850                _indexPageAttachment(resource, page, solrClient);
851            }
852        }
853    }
854    
855    /**
856     * Index a page attachment
857     * @param resource the page attachment as a {@link Resource}
858     * @param page the page whose attachment is going to be indexed
859     * @throws Exception if something goes wrong when processing the indexation of the page attachment
860     */
861    public void indexPageAttachment(Resource resource, Page page) throws Exception
862    {
863        if (page.isIndexable())
864        {
865            Request request = ContextHelper.getRequest(_context);
866            String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
867            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
868            _indexPageAttachment(resource, page, solrClient);
869        }
870    }
871    
872    private void _indexPageAttachment(Resource resource, Page page, SolrClient solrClient) throws Exception
873    {
874        SolrInputDocument document = new SolrInputDocument();
875        
876        // Prepare resource doc
877        _populatePageAttachmentDocument(resource, document, page);
878        
879        // Indexation of the document
880        _indexResourceDocument(resource, document, solrClient);
881    }
882    
883    private void _populatePageAttachmentDocument(Resource resource, SolrInputDocument document, Page page) throws Exception
884    {
885        String language = page.getSitemapName();
886        
887        _solrResourceIndexer.indexResource(resource, document, TYPE_PAGE_RESOURCE, language);
888        
889        Site site = page.getSite();
890        // site name - Store.YES, Index.NOT_ANALYZED
891        document.addField(SolrWebFieldNames.SITE_NAME, site.getName());
892        
893        // site type - Store.YES, Index.NOT_ANALYZED
894        document.addField(SolrWebFieldNames.SITE_TYPE, site.getType());
895        
896        // Added for Solr.
897        // Page site map name - Store.YES, Index.NOT_ANALYZED
898        document.addField(SITEMAP_NAME, page.getSitemapName());
899        
900        // Need the id of the page for unindexing attachment during the unindexing of the page
901        document.addField(ATTACHMENT_PAGE_ID, page.getId());
902    }
903    
904    /**
905     * Index a populated solr input document of type Page.
906     * @param page the page from which the input document is created
907     * @param document the input document to add to the solr index
908     * @param workspaceName The workspace name
909     * @param solrClient The solr client to use
910     * @throws SolrServerException if there is an error on the Solr server
911     * @throws IOException if there is a communication error with the server
912     */
913    protected void _indexPageDocument(Page page, SolrInputDocument document, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException
914    {
915        // Retrieve appropriate solr client
916        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
917        
918        // Add document
919        UpdateResponse solrResponse = solrClient.add(collectionName, document);
920        int status = solrResponse.getStatus();
921        
922        if (status != 0)
923        {
924            throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + page.getId());
925        }
926        
927        getLogger().debug("Successful page indexing. Page identifier : {}", page.getId());
928    }
929    
930    /**
931     * Index a populated solr input document of type Resource.
932     * @param resource the resource from which the input document is created
933     * @param document the input document
934     * @param solrClient The solr client to use
935     * @throws SolrServerException if there is an error on the server
936     * @throws IOException if there is a communication error with the server
937     */
938    protected void _indexResourceDocument(Resource resource, SolrInputDocument document, SolrClient solrClient) throws SolrServerException, IOException
939    {
940        // Retrieve appropriate solr client
941        Request request = ContextHelper.getRequest(_context);
942        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
943        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
944        
945        // Add document
946        UpdateResponse solrResponse = solrClient.add(collectionName, document);
947        int status = solrResponse.getStatus();
948        
949        if (status != 0)
950        {
951            throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Resource id : " + resource.getId());
952        }
953        
954        getLogger().debug("Successful resource indexing. Resource identifier : {}", resource.getId());
955    }
956    
957    ///////////////////////////////////////////////////////////////////////////
958    
959    /**
960     * Un-index a page by its ID  for all workspaces and commit
961     * @param pageId The page ID.
962     * @param unindexRecursively also unindex child pages if requested.
963     * @param unindexAttachments also unindex page attachments
964     * @throws Exception if an error occurs during index update.
965     */
966    public void unindexPage(String pageId, boolean unindexRecursively, boolean unindexAttachments) throws Exception
967    {
968        unindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, unindexRecursively, unindexAttachments);
969        unindexPage(pageId, WebConstants.LIVE_WORKSPACE, unindexRecursively, unindexAttachments);
970    }
971    
972    /**
973     * De-index a page (and optionally its children pages).
974     * @param pageId the page to be de-indexed.
975     * @param workspaceName The workspace where to work in
976     * @param unindexRecursively also unindex child pages if requested.
977     * @param unindexAttachments also unindex page attachments
978     * @throws Exception if an error occurs during index update.
979     */
980    public void unindexPage(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws Exception
981    {
982        Request request = ContextHelper.getRequest(_context);
983        
984        // Retrieve the current workspace.
985        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
986        // Retrieve the current site name.
987        String currentSiteName = (String) request.getAttribute("siteName");
988        
989        try
990        {
991            // Force the workspace.
992            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
993    
994            getLogger().debug("Unindexing page: {}", pageId);
995            
996            _unindexPageDocument(pageId, workspaceName, unindexRecursively, unindexAttachments);
997        }
998        catch (Exception e)
999        {
1000            String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName);
1001            getLogger().error(error, e);
1002            throw new IndexingException(error, e);
1003        }
1004        finally
1005        {
1006            // Restore the site name.
1007            request.setAttribute("siteName", currentSiteName);
1008            // Restore context
1009            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
1010        }
1011    }
1012    
1013    /**
1014     * Deindex a document of type Page. Also deindex attachments of a page
1015     * @param pageId the id of the page to deindex
1016     * @param workspaceName The workspace name
1017     * @param unindexRecursively also unindex child pages if requested.
1018     * @param unindexAttachments also unindex page attachments
1019     * @throws SolrServerException if there is an error on the server
1020     * @throws IOException if there is a communication error with the server
1021     * @throws QuerySyntaxException if the uri query can't be built because of a syntax error.
1022     */
1023    protected void _unindexPageDocument(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws SolrServerException, IOException, QuerySyntaxException
1024    {
1025        // Retrieve appropriate solr client
1026        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
1027        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
1028        
1029        getLogger().info("Unindexing page {} in workspace '{}'", pageId, workspaceName);
1030        
1031        Query pages = new AndQuery(new DocumentTypeQuery(TYPE_PAGE), new PageQuery(pageId, unindexRecursively));
1032        Query query;
1033        if (unindexRecursively && unindexAttachments)
1034        {
1035            // {!ametys join=pageId q=page-ancestorIds:"page://xxxx"}
1036            Query joinQuery = new JoinQuery(() -> PAGE_ANCESTOR_IDS + ":\"" + pageId + "\"", ATTACHMENT_PAGE_ID);
1037            Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new OrQuery(new PageAttachmentQuery(pageId), joinQuery));
1038            query = new OrQuery(attachments, pages);
1039        }
1040        else if (unindexAttachments)
1041        {
1042            Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new PageAttachmentQuery(pageId));
1043            query = new OrQuery(attachments, pages);
1044        }
1045        else
1046        {
1047            query = pages;
1048        }
1049        
1050        // Delete by query
1051        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query.build());
1052        int status = solrResponse.getStatus();
1053        
1054        if (status != 0)
1055        {
1056            throw new IOException("Ametys Page de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + pageId);
1057        }
1058        
1059        getLogger().debug("Successful page de-indexing{}. Page identifier : {}", unindexRecursively ? " with its children" : "", pageId);
1060    }
1061    
1062    ///////////////////////////////////////////////////////////////////////////
1063    
1064    /**
1065     * Reindex a page by its ID for all workspaces and commit
1066     * @param pageId The page ID.
1067     * @param reindexRecursively also reindex child pages if requested.
1068     * @param reindexAttachments also reindex page attachments
1069     * @throws Exception if an error occurs during index update.
1070     */
1071    public void reindexPage(String pageId, boolean reindexRecursively, boolean reindexAttachments) throws Exception
1072    {
1073        reindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, reindexRecursively, reindexAttachments);
1074        reindexPage(pageId, WebConstants.LIVE_WORKSPACE, reindexRecursively, reindexAttachments);
1075    }
1076  
1077    
1078    /**
1079     * Reindex a page by its ID.
1080     * @param pageId The page ID.
1081     * @param workspaceName The workspace where to work in
1082     * @param reindexRecursively also reindex child pages if requested.
1083     * @param reindexAttachments also reindex page attachments
1084     * @throws IndexingException if an error occurs during index update.
1085     */
1086    public void reindexPage(String pageId, String workspaceName, boolean reindexRecursively, boolean reindexAttachments) throws IndexingException
1087    {
1088        Request request = ContextHelper.getRequest(_context);
1089        
1090        // Retrieve the current workspace.
1091        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
1092        // Retrieve the current site name.
1093        String currentSiteName = (String) request.getAttribute("siteName");
1094        
1095        try
1096        {
1097            // Force the workspace.
1098            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
1099    
1100            getLogger().debug("Reindexing page: {}", pageId);
1101            
1102            if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist
1103            {
1104                Page page = _ametysObjectResolver.resolveById(pageId);
1105                _unindexPageDocument(pageId, workspaceName, reindexRecursively, reindexAttachments);
1106                SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
1107                _indexPage(page, workspaceName, reindexRecursively, reindexAttachments, solrClient);
1108            }
1109        }
1110        catch (AmetysRepositoryException | QuerySyntaxException | SolrServerException | IOException e)
1111        {
1112            String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName);
1113            getLogger().error(error, e);
1114            throw new IndexingException(error, e);
1115        }
1116        finally
1117        {
1118            // Restore the site name.
1119            request.setAttribute("siteName", currentSiteName);
1120            // Restore context
1121            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
1122        }
1123    }
1124    
1125    private class PageIndexerCallable extends AbstractIndexerCallable<Page>
1126    {
1127        private boolean _indexAttachments;
1128        
1129        @SuppressWarnings("synthetic-access")
1130        public PageIndexerCallable(Page page, String workspaceName, boolean indexAttachments, SolrClient solrClient)
1131        {
1132            super(page, workspaceName, solrClient, _manager, _cocoonContext, _ametysObjectResolver, getLogger());
1133            this._indexAttachments = indexAttachments;
1134        }
1135        
1136        @Override
1137        protected void process(Page page) throws Exception
1138        {
1139            _logger.info("Indexing page: {} in workspace '{}'", page, _workspaceName);
1140            
1141            SolrInputDocument document = new SolrInputDocument();
1142            
1143            // Prepare the solr input document by adding fields.
1144            _populatePageDocument(page, document);
1145            
1146            // Set the additional properties in the document.
1147            _populateAdditionalProperties(page, document);
1148            
1149            // Indexation of ACL initial values
1150            _solrIndexer.indexAclInitValues(page, document);
1151            
1152            // Indexation of the document
1153            _indexPageDocument(page, document, _workspaceName, _solrClient);
1154            
1155            // Index page attachments documents
1156            if (_indexAttachments)
1157            {
1158                _indexPageAttachments(page.getRootAttachments(), page, _solrClient);
1159            }
1160        }
1161
1162        @Override
1163        protected String getObjectLabel()
1164        {
1165            return "page";
1166        }
1167    }
1168}