001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.indexing.solr;
017
018import java.io.IOException;
019import java.time.ZoneOffset;
020import java.time.ZonedDateTime;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.HashSet;
025import java.util.List;
026import java.util.Locale;
027import java.util.Map;
028import java.util.Optional;
029import java.util.Set;
030import java.util.function.Function;
031import java.util.stream.Collectors;
032
033import org.apache.avalon.framework.component.Component;
034import org.apache.avalon.framework.context.Context;
035import org.apache.avalon.framework.context.ContextException;
036import org.apache.avalon.framework.context.Contextualizable;
037import org.apache.avalon.framework.service.ServiceException;
038import org.apache.avalon.framework.service.ServiceManager;
039import org.apache.avalon.framework.service.Serviceable;
040import org.apache.cocoon.components.ContextHelper;
041import org.apache.cocoon.environment.Request;
042import org.apache.commons.lang3.ArrayUtils;
043import org.apache.solr.client.solrj.SolrClient;
044import org.apache.solr.client.solrj.SolrServerException;
045import org.apache.solr.client.solrj.response.UpdateResponse;
046import org.apache.solr.common.SolrInputDocument;
047import org.apache.solr.common.SolrInputField;
048
049import org.ametys.cms.content.indexing.solr.SolrFieldNames;
050import org.ametys.cms.content.indexing.solr.SolrIndexer;
051import org.ametys.cms.content.indexing.solr.SolrResourceIndexer;
052import org.ametys.cms.contenttype.ContentTypesHelper;
053import org.ametys.cms.data.ContentValue;
054import org.ametys.cms.data.type.indexing.IndexableElementType;
055import org.ametys.cms.data.type.indexing.IndexableElementTypeHelper;
056import org.ametys.cms.indexing.IndexingException;
057import org.ametys.cms.indexing.solr.AdditionalPropertyIndexer;
058import org.ametys.cms.indexing.solr.AdditionalPropertyIndexerExtensionPoint;
059import org.ametys.cms.model.properties.ElementRefProperty;
060import org.ametys.cms.model.properties.Property;
061import org.ametys.cms.repository.Content;
062import org.ametys.cms.search.query.AndQuery;
063import org.ametys.cms.search.query.DocumentTypeQuery;
064import org.ametys.cms.search.query.JoinQuery;
065import org.ametys.cms.search.query.OrQuery;
066import org.ametys.cms.search.query.Query;
067import org.ametys.cms.search.query.QuerySyntaxException;
068import org.ametys.cms.search.solr.SolrClientProvider;
069import org.ametys.cms.search.solr.field.FirstValidationSearchField;
070import org.ametys.cms.search.solr.field.LastMajorValidationSearchField;
071import org.ametys.cms.search.solr.field.LastModifiedSearchField;
072import org.ametys.cms.search.solr.field.LastValidationSearchField;
073import org.ametys.cms.tag.Tag;
074import org.ametys.cms.tag.TagHelper;
075import org.ametys.cms.tag.TagProviderExtensionPoint;
076import org.ametys.core.util.DateUtils;
077import org.ametys.plugins.explorer.resources.Resource;
078import org.ametys.plugins.explorer.resources.ResourceCollection;
079import org.ametys.plugins.repository.AmetysObject;
080import org.ametys.plugins.repository.AmetysObjectIterable;
081import org.ametys.plugins.repository.AmetysObjectResolver;
082import org.ametys.plugins.repository.AmetysRepositoryException;
083import org.ametys.plugins.repository.RepositoryConstants;
084import org.ametys.plugins.repository.data.holder.ModelAwareDataHolder;
085import org.ametys.plugins.repository.data.holder.group.ModelAwareComposite;
086import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeater;
087import org.ametys.plugins.repository.data.holder.group.ModelAwareRepeaterEntry;
088import org.ametys.plugins.repository.model.CompositeDefinition;
089import org.ametys.plugins.repository.model.RepeaterDefinition;
090import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector;
091import org.ametys.runtime.model.ElementDefinition;
092import org.ametys.runtime.model.ModelItem;
093import org.ametys.runtime.model.type.DataContext;
094import org.ametys.runtime.model.type.ElementType;
095import org.ametys.runtime.model.type.ModelItemTypeConstants;
096import org.ametys.runtime.plugin.component.AbstractLogEnabled;
097import org.ametys.web.WebConstants;
098import org.ametys.web.indexing.solr.page.attachment.PageVisibleAttachmentIndexerExtensionPoint;
099import org.ametys.web.repository.page.Page;
100import org.ametys.web.repository.page.Page.PageType;
101import org.ametys.web.repository.page.Zone;
102import org.ametys.web.repository.page.ZoneItem;
103import org.ametys.web.repository.page.ZoneItem.ZoneType;
104import org.ametys.web.repository.site.Site;
105import org.ametys.web.repository.sitemap.Sitemap;
106import org.ametys.web.search.query.PageAttachmentQuery;
107import org.ametys.web.search.query.PageQuery;
108import org.ametys.web.service.Service;
109import org.ametys.web.service.ServiceExtensionPoint;
110
111/**
112 * Component responsible for indexing a page with all its contents.
113 */
114public class SolrPageIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrWebFieldNames, Contextualizable
115{
116    /** The avalon role. */
117    public static final String ROLE = SolrPageIndexer.class.getName();
118    
119    /** The Solr client provider */
120    protected SolrClientProvider _solrClientProvider;
121    /** The Solr indexer */
122    protected SolrIndexer _solrIndexer;
123    /** Solr Ametys resources indexer */
124    protected SolrResourceIndexer _solrResourceIndexer;
125    /** The extension point for PageVisibleAttachmentIndexers */
126    protected PageVisibleAttachmentIndexerExtensionPoint _pageVisibleAttachmentIndexerEP;
127    /** The additional property indexer extension point. */
128    protected AdditionalPropertyIndexerExtensionPoint _additionalPropertiesIndexerEP;
129    /** The tag provider extension point. */
130    protected TagProviderExtensionPoint _tagProviderEP;
131    
132    /** The service extension point. */
133    protected ServiceExtensionPoint _serviceExtensionPoint;
134    /** The Ametys object resolver*/
135    protected AmetysObjectResolver _ametysObjectResolver;
136    /** The avalon context */
137    protected Context _context;
138
139    private ContentTypesHelper _cTypesHelper;
140    
141    @Override
142    public void service(ServiceManager manager) throws ServiceException
143    {
144        _ametysObjectResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE);
145        _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE);
146        _solrResourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE);
147        _pageVisibleAttachmentIndexerEP = (PageVisibleAttachmentIndexerExtensionPoint) manager.lookup(PageVisibleAttachmentIndexerExtensionPoint.ROLE);
148        _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE);
149        _serviceExtensionPoint = (ServiceExtensionPoint) manager.lookup(ServiceExtensionPoint.ROLE);
150        _additionalPropertiesIndexerEP = (AdditionalPropertyIndexerExtensionPoint) manager.lookup(AdditionalPropertyIndexerExtensionPoint.ROLE);
151        _tagProviderEP = (TagProviderExtensionPoint) manager.lookup(TagProviderExtensionPoint.ROLE);
152        _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE);
153    }
154    
155    public void contextualize(Context context) throws ContextException
156    {
157        _context = context;
158    }
159    
160    /**
161     * Index a page and eventually its children, recursively, in all workspaces and commit<br>
162     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
163     * @param pageId the page to be indexed.
164     * @param indexRecursively to also process children pages.
165     * @param indexAttachments to index page attachments
166     * @throws Exception if an error occurs during indexation.
167     */
168    public void indexPage(String pageId, boolean indexRecursively, boolean indexAttachments) throws Exception
169    {
170        indexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, indexRecursively, indexAttachments);
171        indexPage(pageId, WebConstants.LIVE_WORKSPACE, indexRecursively, indexAttachments);
172    }
173    
174    /**
175     * Index a page and eventually its children, recursively.<br>
176     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
177     * @param pageId the page to be indexed.
178     * @param workspaceName the workspace where to index
179     * @param indexRecursively to also process children pages.
180     * @param indexAttachments to index page attachments
181     * @throws IndexingException if an error occurs during indexation.
182     */
183    public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments) throws IndexingException
184    {
185        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true);
186        indexPage(pageId, workspaceName, indexRecursively, indexAttachments, solrClient);
187    }
188    
189    /**
190     * Index a page and eventually its children, recursively.<br>
191     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
192     * @param pageId the page to be indexed.
193     * @param workspaceName the workspace where to index
194     * @param indexRecursively to also process children pages.
195     * @param indexAttachments to index page attachments
196     * @param solrClient The solr client to use
197     * @throws IndexingException if an error occurs during indexation.
198     */
199    public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException
200    {
201        Request request = ContextHelper.getRequest(_context);
202        
203        // Retrieve the current workspace.
204        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
205        // Retrieve the current site name.
206        String currentSiteName = (String) request.getAttribute("siteName");
207        
208        try
209        {
210            // Force the workspace.
211            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
212    
213            getLogger().debug("Indexing page: {}", pageId);
214            
215            if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist
216            {
217                Page page = _ametysObjectResolver.resolveById(pageId);
218                _indexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient);
219            }
220        }
221        catch (AmetysRepositoryException e)
222        {
223            String error = String.format("Failed to index page %s in workspace %s", pageId, workspaceName);
224            getLogger().error(error, e);
225            throw new IndexingException(error, e);
226        }
227        finally
228        {
229            // Restore the site name.
230            request.setAttribute("siteName", currentSiteName);
231            // Restore context
232            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
233        }
234    }
235    
236    private void _indexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException
237    {
238        getLogger().info("Indexing page: {} in workspace '{}'", page, workspaceName);
239        
240        SolrInputDocument document = new SolrInputDocument();
241        
242        try
243        {
244            // Prepare the solr input document by adding fields.
245            _populatePageDocument(page, document);
246            
247            // Set the additional properties in the document.
248            _populateAdditionalProperties(page, document);
249            
250            // Indexation of ACL initial values
251            _solrIndexer.indexAclInitValues(page, document);
252            
253            // Indexation of the document
254            _indexPageDocument(page, document, workspaceName, solrClient);
255            
256            // Index page attachments documents
257            if (indexAttachments)
258            {
259                _indexPageAttachments(page.getRootAttachments(), page, solrClient);
260            }
261        }
262        catch (Exception e)
263        {
264            String error = String.format("Failed to index page %s in workspace %s", page.getId(), workspaceName);
265            getLogger().error(error, e);
266            throw new IndexingException(error, e);
267        }
268        
269        if (indexRecursively)
270        {
271            AmetysObjectIterable<? extends Page> children = page.getChildrenPages();
272            for (Page child : children)
273            {
274                // FIXME index child pages if (and only if) not indexed... see original source.
275//                indexPage(child, false, indexRecursively);
276//                indexPage(child, false);
277                _indexPage(child, workspaceName, indexRecursively, indexAttachments, solrClient);
278            }
279        }
280    }
281    
282    /**
283     * Populate the solr input document by adding fields to index.
284     * @param page the page to index.
285     * @param document the solr input document
286     * @throws Exception if something goes wrong when processing the indexation of the page
287     */
288    protected void _populatePageDocument(Page page, SolrInputDocument document) throws Exception
289    {
290        Sitemap sitemap = page.getSitemap();
291        String sitemapName = sitemap.getName();
292        Site site = page.getSite();
293        String siteName = site.getName();
294        String pageId = page.getId();
295        String pageTitle = page.getTitle();
296        String pageLongTitle = page.getLongTitle();
297        String language = sitemapName;
298        
299        // Page id and type
300        document.addField(SolrFieldNames.ID, pageId);
301        document.addField(SolrFieldNames.DOCUMENT_TYPE, SolrWebFieldNames.TYPE_PAGE);
302        
303        // Fulltext
304        DataContext context = DataContext.newInstance()
305                                         .withLocale(new Locale(language));
306        IndexableElementTypeHelper.indexFulltextValue(document, pageTitle, context);
307        if (!pageTitle.equals(pageLongTitle))
308        {
309            IndexableElementTypeHelper.indexFulltextValue(document, pageLongTitle, context);
310        }
311        
312        // Page title
313        _indexStringFields(document, pageId, PAGE_TITLE, pageTitle, language);
314        // Page long title
315        _indexStringFields(document, pageId, PAGE_LONG_TITLE, pageLongTitle, language);
316        // Title for sorting
317        document.addField(TITLE_SORT, pageTitle);
318        
319        document.addField(TEMPLATE, page.getTemplate());
320        document.addField(PAGE_TYPE, page.getType().name());
321        document.addField(PAGE_DEPTH, page.getDepth());
322        
323        // Contents (page title shoud be indexed before because the main content can override it).
324        _populatePageContentsDocument(page, document);
325        
326        // Parent of the page
327        AmetysObject parent = page.getParent();
328        if (parent != null)
329        {
330            document.addField(PAGE_PARENT_ID, parent.getId());
331        }
332
333        // Ancestors of the page
334        List<String> ancestorIds = new ArrayList<>();
335        while (parent instanceof Page)
336        {
337            ancestorIds.add(parent.getId());
338            parent = parent.getParent();
339        }
340        document.addField(PAGE_ANCESTOR_IDS, ancestorIds);
341        
342        document.addField(SITE_NAME, siteName);
343        document.addField(SITEMAP_NAME, sitemapName);
344        document.addField(SITE_TYPE, site.getType());
345        
346        // Page tags (strict and tags including ancestor pages).
347        Set<String> tags = page.getTags()
348                .stream()
349                .filter(tagName -> _tagProviderEP.hasTag(tagName, Map.of("siteName", page.getSiteName())))
350                .collect(Collectors.toSet());
351        document.addField(SolrFieldNames.TAGS, tags);
352        document.addField(SolrFieldNames.ALL_TAGS, _getTagsWithAncestors(page));
353        
354        _populateDatesOfPage(page, document);
355        
356        // Attachments
357        _solrResourceIndexer.indexResourceCollection(page.getRootAttachments(), document, language);
358        Optional.ofNullable(page.getRootAttachments())
359                .map(AmetysObject::getId)
360                .ifPresent(id -> document.addField(PAGE_OUTGOING_REFEERENCES_RESOURCE_IDS, id));
361        _indexVisibleAttachments(page, document);
362    }
363    
364    private void _indexVisibleAttachments(Page page, SolrInputDocument document)
365    {
366        Collection<String> values = _pageVisibleAttachmentIndexerEP.getExtensionsIds()
367                .stream()
368                .map(_pageVisibleAttachmentIndexerEP::getExtension)
369                .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(page))
370                .flatMap(Collection::stream)
371                .collect(Collectors.toList());
372        document.addField(PAGE_VISIBLE_ATTACHMENT_RESOURCE_IDS, values);
373    }
374    
375    /**
376     * Populate the solr input document with dates from the page
377     * @param page The page
378     * @param document The Solr document
379     */
380    protected void _populateDatesOfPage(Page page, SolrInputDocument document)
381    {
382        // Page last modification date
383        ZonedDateTime lastModified = _getLastModificationDate(page);
384        if (lastModified != null)
385        {
386            String lastModifiedStr = DateUtils.zonedDateTimeToString(lastModified, ZoneOffset.UTC);
387            // For 'new' search service
388            document.addField(LastModifiedSearchField.NAME, lastModifiedStr);
389            // For 'old' search service
390            document.addField(LAST_MODIFIED + "_dt", lastModifiedStr);
391        }
392        
393        // Page last validation date
394        ZonedDateTime lastValidation = _getLastValidationDate(page);
395        if (lastValidation != null)
396        {
397            String lastValidationStr = DateUtils.zonedDateTimeToString(lastValidation, ZoneOffset.UTC);
398            // For 'new' search service
399            document.addField(LastValidationSearchField.NAME, lastValidationStr);
400        }
401        
402        // Page first validation date
403        ZonedDateTime firstValidation = _getFirstValidationDate(page);
404        if (firstValidation != null)
405        {
406            String firstValidationStr = DateUtils.zonedDateTimeToString(firstValidation, ZoneOffset.UTC);
407            // For 'new' search service
408            document.addField(FirstValidationSearchField.NAME, firstValidationStr);
409        }
410        
411        // Page last major validation date
412        ZonedDateTime lastMajorValidation = _getLastMajorValidationDate(page);
413        if (lastMajorValidation != null)
414        {
415            String lastMajorValidationStr = DateUtils.zonedDateTimeToString(lastMajorValidation, ZoneOffset.UTC);
416            // For 'new' search service
417            document.addField(LastMajorValidationSearchField.NAME, lastMajorValidationStr);
418        }
419        
420        // date for sorting
421        SolrInputField dateField = document.getField(DATE_FOR_SORTING);
422        if (dateField == null)
423        {
424            Collection<Object> oDateValues = document.getFieldValues(CONTENT_INTERESTING_DATES);
425            if (oDateValues != null && !oDateValues.isEmpty())
426            {
427                document.setField(DATE_FOR_SORTING, oDateValues.iterator().next());
428            }
429        }
430    }
431    
432    private void _indexStringFields(SolrInputDocument document, String documentId, String fieldName, String fieldValue, String language)
433    {
434        String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(fieldValue, getLogger(), documentId, fieldName);
435        
436        document.addField(fieldName, possiblyTruncatedValue);
437        document.addField(fieldName + "_txt_" + language, fieldValue);
438        document.addField(fieldName + "_txt_stemmed_" + language, fieldValue);
439        document.addField(fieldName + "_txt_ws_" + language, fieldValue);
440
441        document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase());
442        document.addField(fieldName + "_s_ws", fieldValue.toLowerCase());
443        document.addField(fieldName + "_txt", fieldValue);
444    }
445    /**
446     * Get all the page tags with their ancestors.
447     * @param page The page.
448     * @return All the page tags with their ancestors.
449     */
450    protected Set<String> _getTagsWithAncestors(Page page)
451    {
452        Set<String> allTags = new HashSet<>(page.getTags());
453        
454        Map<String, Object> tagParams = Map.of("siteName", page.getSiteName());
455        
456        for (String tagName : page.getTags())
457        {
458            allTags.add(tagName);
459            
460            // Get the ancestor tags
461            Tag tag = _tagProviderEP.getTag(tagName, tagParams);
462            for (Tag ancestor : TagHelper.getAncestors(tag, false))
463            {
464                allTags.add(ancestor.getName());
465            }
466        }
467        
468        return allTags;
469    }
470    
471    /**
472     * Index the content of the page.<p>
473     * @param page the page to index.
474     * @param document the document to populate.
475     * @throws Exception if an error occurs.
476     */
477    protected void _populatePageContentsDocument(Page page, SolrInputDocument document) throws Exception
478    {
479        if (page.getType() == PageType.CONTAINER)
480        {
481            for (Zone zone : page.getZones())
482            {
483                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
484                for (ZoneItem zoneItem : zoneItems)
485                {
486                    if (zoneItem.getType() == ZoneType.CONTENT)
487                    {
488                        try
489                        {
490                            Content content = zoneItem.getContent();
491                            document.addField(CONTENT_IDS, content.getId());
492                            
493                            for (String cType : content.getTypes())
494                            {
495                                document.addField(PAGE_CONTENT_TYPES, cType);
496                                document.addField(PAGE_CONTENT_TYPES + "_s_dv", cType); // facets
497                            }
498                            
499                            _indexFacetableFields(content, document);
500                        }
501                        catch (AmetysRepositoryException e)
502                        {
503                            getLogger().error("Failed to index content referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
504                        }
505                    }
506                    else if (zoneItem.getType() == ZoneType.SERVICE)
507                    {
508                        try
509                        {
510                            String serviceId = zoneItem.getServiceId();
511                            document.addField(SERVICE_IDS, serviceId);
512
513                            Service service = _serviceExtensionPoint.getExtension(serviceId);
514                            if (service == null)
515                            {
516                                getLogger().error("The service id '{}' does not exist. It is referenced in the page {}/{}/{} ({} in zoneitem {})", serviceId, page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId());
517                            }
518                            else
519                            {
520                                service.index(zoneItem, document);
521                            }
522                        }
523                        catch (AmetysRepositoryException e)
524                        {
525                            getLogger().error("Failed to index service referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
526                        }
527                        
528                    }
529                }
530            }
531        }
532    }
533    
534    /**
535     * Index the facetable fields of a content into the page solr document
536     * @param content The content
537     * @param document The main page solr document.
538     */
539    protected void _indexFacetableFields(Content content, SolrInputDocument document)
540    {
541        List<ModelItem> modelItems = new ArrayList<>();
542        try
543        {
544            String[] allContentTypes = ArrayUtils.addAll(content.getTypes(), content.getMixinTypes());
545            modelItems.addAll(_cTypesHelper.getModelItems(allContentTypes)
546                                           .stream()
547                                           .filter(modelItem -> !(modelItem instanceof Property) || modelItem instanceof ElementRefProperty)
548                                           .collect(Collectors.toList()));
549        }
550        catch (IllegalArgumentException e)
551        {
552            getLogger().error("indexContent > Error getting the model items of content " + content.getId(), e);
553            throw new RuntimeException("indexContent > Error getting the model items of content " + content.getId(), e);
554        }
555        
556        for (ModelItem modelItem : modelItems)
557        {
558            DataContext context = DataContext.newInstance()
559                                             .withObjectId(content.getId());
560            
561            Optional.ofNullable(content.getLanguage())
562                    .map(Locale::new)
563                    .ifPresent(context::withLocale);
564            
565            _findAndIndexFacetableField(document, content, modelItem, context);
566        }
567    }
568    
569    /**
570     * Index the facetable fields of a data holder into the page solr document
571     * @param pageDocument The Solr page document
572     * @param dataHolder the parent data holder
573     * @param modelItem the model item
574     * @param context the context of the data to index
575     */
576    protected void _findAndIndexFacetableField(SolrInputDocument pageDocument, ModelAwareDataHolder dataHolder, ModelItem modelItem, DataContext context)
577    {
578        String dataName = modelItem.getName();
579        if (dataHolder.hasValue(dataName))
580        {
581            if (modelItem instanceof ElementDefinition elementDefinition)
582            {
583                DataContext newContext = context.cloneContext()
584                                                .addSegmentToDataPath(dataName);
585               
586                Collection<String> values = _getValuesToIndex(dataHolder, elementDefinition, newContext);
587                for (String value : values)
588                {
589                    pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + dataName + "_s_dv", value);
590                }
591            }
592            else if (modelItem instanceof RepeaterDefinition repeaterDefinition)
593            {
594                ModelAwareRepeater repeater = dataHolder.getRepeater(dataName);
595                for (ModelAwareRepeaterEntry entry : repeater.getEntries())
596                {
597                    DataContext newContext = context.cloneContext()
598                                                    .addSegmentToDataPath(dataName + "[" + entry.getPosition() + "]");
599                    
600                    for (ModelItem child : repeaterDefinition.getModelItems())
601                    {
602                        _findAndIndexFacetableField(pageDocument, entry, child, newContext);
603                    }
604                }
605            }
606            else if (modelItem instanceof CompositeDefinition compositeDefinition)
607            {
608                ModelAwareComposite composite = dataHolder.getComposite(dataName);
609                DataContext newContext = context.cloneContext()
610                                                .addSegmentToDataPath(dataName);
611                
612                for (ModelItem child : compositeDefinition.getModelItems())
613                {
614                    _findAndIndexFacetableField(pageDocument, composite, child, newContext);
615                }
616            }
617        }
618    }
619    
620    /**
621     * Retrieves the values to index if the field is facetable, or an empty collection
622     * @param dataHolder the data holder
623     * @param elementDefinition the definition of the field
624     * @param context the context of the data to index
625     * @return the values to index if the field is facetable, or an empty collection
626     */
627    protected Collection<String> _getValuesToIndex(ModelAwareDataHolder dataHolder, ElementDefinition elementDefinition, DataContext context)
628    {
629        String dataName = elementDefinition.getName();
630        ElementType type = elementDefinition.getType();
631        if (type instanceof IndexableElementType indexingElementType)
632        {
633            if (ModelItemTypeConstants.STRING_TYPE_ID.equals(type.getId()) && indexingElementType.isFacetable(context))
634            {
635                String dataPath = dataName;
636                if (elementDefinition instanceof ElementRefProperty property)
637                {
638                    dataPath = property.getPath();
639                }
640                
641                Object value = dataHolder.getValue(dataPath, true);
642                if (value instanceof String[] stringValues)
643                {
644                    return Arrays.asList(stringValues);
645                }
646                else if (value instanceof String stringValue)
647                {
648                    return List.of(stringValue);
649                }
650            }
651            else if (org.ametys.cms.data.type.ModelItemTypeConstants.CONTENT_ELEMENT_TYPE_ID.equals(type.getId()))
652            {
653                String dataPath = dataName;
654                if (elementDefinition instanceof ElementRefProperty property)
655                {
656                    dataPath = property.getPath();
657                }
658                
659                Object value = dataHolder.getValue(dataPath, true);
660                if (value instanceof ContentValue[] contentValues)
661                {
662                    return Arrays.stream(contentValues)
663                            .map(ContentValue::getContentId)
664                            .collect(Collectors.toList());
665                }
666                else if (value instanceof ContentValue contentValue)
667                {
668                    return List.of(contentValue.getContentId());
669                }
670            }
671        }
672        
673        return List.of();
674    }
675    
676    /**
677     * Computes the last modification date of a page.
678     * @param page the page.
679     * @return the last modification date or <code>null</code>.
680     */
681    protected ZonedDateTime _getLastModificationDate(Page page)
682    {
683        return _getLastDate(page, Content::getLastModified);
684    }
685    /**
686     * Computes the first validation date of a page.
687     * @param page the page.
688     * @return the first validation date or <code>null</code>.
689     */
690    protected ZonedDateTime _getFirstValidationDate(Page page)
691    {
692        return _getFirstDate(page, Content::getFirstValidationDate);
693    }
694
695    /**
696     * Computes the last validation date of a page.
697     * @param page the page.
698     * @return the last validation date or <code>null</code>.
699     */
700    protected ZonedDateTime _getLastValidationDate(Page page)
701    {
702        return _getLastDate(page, Content::getLastValidationDate);
703    }
704    
705    /**
706     * Computes the last major validation date of a page.
707     * @param page the page.
708     * @return the last major validation date or <code>null</code>.
709     */
710    protected ZonedDateTime _getLastMajorValidationDate(Page page)
711    {
712        return _getLastDate(page, Content::getLastMajorValidationDate);
713    }
714    
715    /**
716     * Computes a "last date" of a page, using the simple and naive following algorithm:
717     * <br>From all the dates from each of its contents, keep the greatest of them.
718     * @param page the page.
719     * @param dateRetriever The function to retrieve a Date from a Content of the Page
720     * @return the "last date" or <code>null</code>.
721     */
722    protected ZonedDateTime _getLastDate(Page page, Function<Content, ZonedDateTime> dateRetriever)
723    {
724        ZonedDateTime last = null;
725
726        if (page.getType() == PageType.CONTAINER)
727        {
728            for (Zone zone : page.getZones())
729            {
730                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
731                for (ZoneItem zoneItem : zoneItems)
732                {
733                    switch (zoneItem.getType())
734                    {
735                        case SERVICE:
736                            // A service has no last date
737                            break;
738                        case CONTENT:
739                            try
740                            {
741                                ZonedDateTime contentLast = dateRetriever.apply(zoneItem.getContent());
742
743                                if (contentLast != null && (last == null || contentLast.isAfter(last)))
744                                {
745                                    // Keep the latest date
746                                    last = contentLast;
747                                }
748                            }
749                            catch (AmetysRepositoryException e)
750                            {
751                                getLogger().error("Failed to index last date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
752                            }
753                            break;
754                        default:
755                            break;
756                    }
757                }
758            }
759        }
760        
761        return last;
762    }
763    
764    /**
765     * Computes a "first date" of a page, using the simple and naive following algorithm:
766     * <br>From all the dates from each of its contents, keep the lowest of them.
767     * @param page the page.
768     * @param dateRetriever The function to retrieve a Date from a Content of the Page
769     * @return the "first date" or <code>null</code>.
770     */
771    protected ZonedDateTime _getFirstDate(Page page, Function<Content, ZonedDateTime> dateRetriever)
772    {
773        ZonedDateTime first = null;
774        
775        if (page.getType() == PageType.CONTAINER)
776        {
777            for (Zone zone : page.getZones())
778            {
779                AmetysObjectIterable<? extends ZoneItem> zoneItems = zone.getZoneItems();
780                for (ZoneItem zoneItem : zoneItems)
781                {
782                    switch (zoneItem.getType())
783                    {
784                        case SERVICE:
785                            // A service has no first date
786                            break;
787                        case CONTENT:
788                            try
789                            {
790                                ZonedDateTime contentFirst = dateRetriever.apply(zoneItem.getContent());
791                                
792                                if (contentFirst != null && (first == null || contentFirst.isBefore(first)))
793                                {
794                                    // Keep the lowest date
795                                    first = contentFirst;
796                                }
797                            }
798                            catch (AmetysRepositoryException e)
799                            {
800                                getLogger().error("Failed to index first date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
801                            }
802                            break;
803                        default:
804                            break;
805                    }
806                }
807            }
808        }
809        
810        return first;
811    }
812    
813    /**
814     * Populate the solr input document by adding fields to index.
815     * @param page the page to index.
816     * @param document the solr input document
817     * @throws Exception if something goes wrong when processing the indexation of the page
818     */
819    protected void _populateAdditionalProperties(Page page, SolrInputDocument document) throws Exception
820    {
821        Collection<AdditionalPropertyIndexer> indexers = _additionalPropertiesIndexerEP.getIndexers("page");
822        for (AdditionalPropertyIndexer indexer : indexers)
823        {
824            indexer.index(page, document);
825        }
826    }
827    
828    /**
829     * Index page attachments as new entries in the index.
830     * @param collection the collection of attachments
831     * @param page the page whose attachments will be indexed
832     * @throws Exception if something goes wrong when indexing the attachments of the page
833     */
834    public void indexPageAttachments(ResourceCollection collection, Page page) throws Exception
835    {
836        Request request = ContextHelper.getRequest(_context);
837        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
838        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
839        _indexPageAttachments(collection, page, solrClient);
840    }
841    
842    private void _indexPageAttachments(ResourceCollection collection, Page page, SolrClient solrClient) throws Exception
843    {
844        if (collection == null)
845        {
846            return;
847        }
848        
849        AmetysObjectIterable<AmetysObject> children = collection.getChildren();
850        for (AmetysObject object : children)
851        {
852            if (object instanceof ResourceCollection)
853            {
854                _indexPageAttachments((ResourceCollection) object, page, solrClient);
855            }
856            else if (object instanceof Resource)
857            {
858                Resource resource = (Resource) object;
859                _indexPageAttachment(resource, page, solrClient);
860            }
861        }
862    }
863    
864    /**
865     * Index a page attachment
866     * @param resource the page attachment as a {@link Resource}
867     * @param page the page whose attachment is going to be indexed
868     * @throws Exception if something goes wrong when processing the indexation of the page attachment
869     */
870    public void indexPageAttachment(Resource resource, Page page) throws Exception
871    {
872        Request request = ContextHelper.getRequest(_context);
873        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
874        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
875        _indexPageAttachment(resource, page, solrClient);
876    }
877    
878    private void _indexPageAttachment(Resource resource, Page page, SolrClient solrClient) throws Exception
879    {
880        SolrInputDocument document = new SolrInputDocument();
881        
882        // Prepare resource doc
883        _populatePageAttachmentDocument(resource, document, page);
884        
885        // Indexation of the document
886        _indexResourceDocument(resource, document, solrClient);
887    }
888    
889    private void _populatePageAttachmentDocument(Resource resource, SolrInputDocument document, Page page) throws Exception
890    {
891        String language = page.getSitemapName();
892        
893        _solrResourceIndexer.indexResource(resource, document, TYPE_PAGE_RESOURCE, language);
894        
895        Site site = page.getSite();
896        // site name - Store.YES, Index.NOT_ANALYZED
897        document.addField(SolrWebFieldNames.SITE_NAME, site.getName());
898        
899        // site type - Store.YES, Index.NOT_ANALYZED
900        document.addField(SolrWebFieldNames.SITE_TYPE, site.getType());
901        
902        // Added for Solr.
903        // Page site map name - Store.YES, Index.NOT_ANALYZED
904        document.addField(SITEMAP_NAME, page.getSitemapName());
905        
906        // Need the id of the page for unindexing attachment during the unindexing of the page
907        document.addField(ATTACHMENT_PAGE_ID, page.getId());
908    }
909    
910    /**
911     * Index a populated solr input document of type Page.
912     * @param page the page from which the input document is created
913     * @param document the input document to add to the solr index
914     * @param workspaceName The workspace name
915     * @param solrClient The solr client to use
916     * @throws SolrServerException if there is an error on the Solr server
917     * @throws IOException if there is a communication error with the server
918     */
919    protected void _indexPageDocument(Page page, SolrInputDocument document, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException
920    {
921        // Retrieve appropriate solr client
922        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
923        
924        // Add document
925        UpdateResponse solrResponse = solrClient.add(collectionName, document);
926        int status = solrResponse.getStatus();
927        
928        if (status != 0)
929        {
930            throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + page.getId());
931        }
932        
933        getLogger().debug("Successful page indexing. Page identifier : {}", page.getId());
934    }
935    
936    /**
937     * Index a populated solr input document of type Resource.
938     * @param resource the resource from which the input document is created
939     * @param document the input document
940     * @param solrClient The solr client to use
941     * @throws SolrServerException if there is an error on the server
942     * @throws IOException if there is a communication error with the server
943     */
944    protected void _indexResourceDocument(Resource resource, SolrInputDocument document, SolrClient solrClient) throws SolrServerException, IOException
945    {
946        // Retrieve appropriate solr client
947        Request request = ContextHelper.getRequest(_context);
948        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
949        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
950        
951        // Add document
952        UpdateResponse solrResponse = solrClient.add(collectionName, document);
953        int status = solrResponse.getStatus();
954        
955        if (status != 0)
956        {
957            throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Resource id : " + resource.getId());
958        }
959        
960        getLogger().debug("Successful resource indexing. Resource identifier : {}", resource.getId());
961    }
962    
963    ///////////////////////////////////////////////////////////////////////////
964    
965    /**
966     * Un-index a page by its ID  for all workspaces and commit
967     * @param pageId The page ID.
968     * @param unindexRecursively also unindex child pages if requested.
969     * @param unindexAttachments also unindex page attachments
970     * @throws Exception if an error occurs during index update.
971     */
972    public void unindexPage(String pageId, boolean unindexRecursively, boolean unindexAttachments) throws Exception
973    {
974        unindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, unindexRecursively, unindexAttachments);
975        unindexPage(pageId, WebConstants.LIVE_WORKSPACE, unindexRecursively, unindexAttachments);
976    }
977    
978    /**
979     * De-index a page (and optionally its children pages).
980     * @param pageId the page to be de-indexed.
981     * @param workspaceName The workspace where to work in 
982     * @param unindexRecursively also unindex child pages if requested.
983     * @param unindexAttachments also unindex page attachments
984     * @throws Exception if an error occurs during index update.
985     */
986    public void unindexPage(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws Exception
987    {
988        Request request = ContextHelper.getRequest(_context);
989        
990        // Retrieve the current workspace.
991        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
992        // Retrieve the current site name.
993        String currentSiteName = (String) request.getAttribute("siteName");
994        
995        try
996        {
997            // Force the workspace.
998            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
999    
1000            getLogger().debug("Unindexing page: {}", pageId);
1001            
1002            _unindexPageDocument(pageId, workspaceName, unindexRecursively, unindexAttachments);
1003        }
1004        catch (Exception e)
1005        {
1006            String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName);
1007            getLogger().error(error, e);
1008            throw new IndexingException(error, e);
1009        }
1010        finally
1011        {
1012            // Restore the site name.
1013            request.setAttribute("siteName", currentSiteName);
1014            // Restore context
1015            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
1016        }
1017    }
1018    
1019    /**
1020     * Deindex a document of type Page. Also deindex attachments of a page
1021     * @param pageId the id of the page to deindex
1022     * @param workspaceName The workspace name
1023     * @param unindexRecursively also unindex child pages if requested.
1024     * @param unindexAttachments also unindex page attachments
1025     * @throws SolrServerException if there is an error on the server
1026     * @throws IOException if there is a communication error with the server
1027     * @throws QuerySyntaxException if the uri query can't be built because of a syntax error.
1028     */
1029    protected void _unindexPageDocument(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws SolrServerException, IOException, QuerySyntaxException
1030    {
1031        // Retrieve appropriate solr client
1032        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
1033        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
1034        
1035        getLogger().info("Unindexing page {} in workspace '{}'", pageId, workspaceName);
1036        
1037        Query pages = new AndQuery(new DocumentTypeQuery(TYPE_PAGE), new PageQuery(pageId, unindexRecursively));
1038        Query query;
1039        if (unindexRecursively && unindexAttachments)
1040        {
1041            // {!ametys join=pageId q=page-ancestorIds:"page://xxxx"}
1042            Query joinQuery = new JoinQuery(() -> PAGE_ANCESTOR_IDS + ":\"" + pageId + "\"", ATTACHMENT_PAGE_ID);
1043            Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new OrQuery(new PageAttachmentQuery(pageId), joinQuery));
1044            query = new OrQuery(attachments, pages);
1045        }
1046        else if (unindexAttachments)
1047        {
1048            Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new PageAttachmentQuery(pageId));
1049            query = new OrQuery(attachments, pages);
1050        }
1051        else
1052        {
1053            query = pages;
1054        }
1055        
1056        // Delete by query
1057        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query.build());
1058        int status = solrResponse.getStatus();
1059        
1060        if (status != 0)
1061        {
1062            throw new IOException("Ametys Page de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + pageId);
1063        }
1064        
1065        getLogger().debug("Successful page de-indexing{}. Page identifier : {}", unindexRecursively ? " with its children" : "", pageId);
1066    }
1067    
1068    ///////////////////////////////////////////////////////////////////////////
1069    
1070    /**
1071     * Reindex a page by its ID for all workspaces and commit
1072     * @param pageId The page ID.
1073     * @param reindexRecursively also reindex child pages if requested.
1074     * @param reindexAttachments also reindex page attachments
1075     * @throws Exception if an error occurs during index update.
1076     */
1077    public void reindexPage(String pageId, boolean reindexRecursively, boolean reindexAttachments) throws Exception
1078    {
1079        reindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, reindexRecursively, reindexAttachments);
1080        reindexPage(pageId, WebConstants.LIVE_WORKSPACE, reindexRecursively, reindexAttachments);
1081    }
1082  
1083    
1084    /**
1085     * Reindex a page by its ID.
1086     * @param pageId The page ID.
1087     * @param workspaceName The workspace where to work in 
1088     * @param reindexRecursively also reindex child pages if requested.
1089     * @param reindexAttachments also reindex page attachments
1090     * @throws IndexingException if an error occurs during index update.
1091     */
1092    public void reindexPage(String pageId, String workspaceName, boolean reindexRecursively, boolean reindexAttachments) throws IndexingException
1093    {
1094        Request request = ContextHelper.getRequest(_context);
1095        
1096        // Retrieve the current workspace.
1097        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
1098        // Retrieve the current site name.
1099        String currentSiteName = (String) request.getAttribute("siteName");
1100        
1101        try
1102        {
1103            // Force the workspace.
1104            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
1105    
1106            getLogger().debug("Reindexing page: {}", pageId);
1107            
1108            if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist
1109            {
1110                Page page = _ametysObjectResolver.resolveById(pageId);
1111                _unindexPageDocument(pageId, workspaceName, reindexRecursively, reindexAttachments);
1112                SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
1113                _indexPage(page, workspaceName, reindexRecursively, reindexAttachments, solrClient);
1114            }
1115        }
1116        catch (AmetysRepositoryException | QuerySyntaxException | SolrServerException | IOException e)
1117        {
1118            String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName);
1119            getLogger().error(error, e);
1120            throw new IndexingException(error, e);
1121        }
1122        finally
1123        {
1124            // Restore the site name.
1125            request.setAttribute("siteName", currentSiteName);
1126            // Restore context
1127            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
1128        }
1129    }
1130}