Source code

001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.indexing.solr;
017
018import java.io.IOException;
019import java.util.ArrayList;
020import java.util.Collection;
021import java.util.Date;
022import java.util.HashSet;
023import java.util.List;
024import java.util.Map;
025import java.util.Optional;
026import java.util.Set;
027import java.util.function.Function;
028import java.util.stream.Collectors;
029
030import org.apache.avalon.framework.component.Component;
031import org.apache.avalon.framework.context.Context;
032import org.apache.avalon.framework.context.ContextException;
033import org.apache.avalon.framework.context.Contextualizable;
034import org.apache.avalon.framework.service.ServiceException;
035import org.apache.avalon.framework.service.ServiceManager;
036import org.apache.avalon.framework.service.Serviceable;
037import org.apache.cocoon.components.ContextHelper;
038import org.apache.cocoon.environment.Request;
039import org.apache.commons.lang3.ArrayUtils;
040import org.apache.solr.client.solrj.SolrClient;
041import org.apache.solr.client.solrj.SolrServerException;
042import org.apache.solr.client.solrj.response.UpdateResponse;
043import org.apache.solr.common.SolrInputDocument;
044import org.apache.solr.common.SolrInputField;
045
046import org.ametys.cms.content.indexing.solr.SolrContentIndexer;
047import org.ametys.cms.content.indexing.solr.SolrFieldNames;
048import org.ametys.cms.content.indexing.solr.SolrIndexer;
049import org.ametys.cms.content.indexing.solr.SolrResourceIndexer;
050import org.ametys.cms.contenttype.ContentConstants;
051import org.ametys.cms.contenttype.ContentTypesHelper;
052import org.ametys.cms.contenttype.MetadataDefinition;
053import org.ametys.cms.contenttype.RepeaterDefinition;
054import org.ametys.cms.contenttype.indexing.IndexingField;
055import org.ametys.cms.contenttype.indexing.IndexingModel;
056import org.ametys.cms.contenttype.indexing.MetadataIndexingField;
057import org.ametys.cms.indexing.IndexingException;
058import org.ametys.cms.indexing.solr.AdditionalPropertyIndexer;
059import org.ametys.cms.indexing.solr.AdditionalPropertyIndexerExtensionPoint;
060import org.ametys.cms.repository.Content;
061import org.ametys.cms.search.query.AndQuery;
062import org.ametys.cms.search.query.DocumentTypeQuery;
063import org.ametys.cms.search.query.JoinQuery;
064import org.ametys.cms.search.query.OrQuery;
065import org.ametys.cms.search.query.Query;
066import org.ametys.cms.search.query.QuerySyntaxException;
067import org.ametys.cms.search.solr.SolrClientProvider;
068import org.ametys.cms.search.solr.field.FirstValidationSearchField;
069import org.ametys.cms.search.solr.field.LastMajorValidationSearchField;
070import org.ametys.cms.search.solr.field.LastModifiedSearchField;
071import org.ametys.cms.search.solr.field.LastValidationSearchField;
072import org.ametys.cms.tag.Tag;
073import org.ametys.cms.tag.TagHelper;
074import org.ametys.cms.tag.TagProviderExtensionPoint;
075import org.ametys.plugins.explorer.resources.Resource;
076import org.ametys.plugins.explorer.resources.ResourceCollection;
077import org.ametys.plugins.repository.AmetysObject;
078import org.ametys.plugins.repository.AmetysObjectResolver;
079import org.ametys.plugins.repository.AmetysRepositoryException;
080import org.ametys.plugins.repository.RepositoryConstants;
081import org.ametys.plugins.repository.metadata.CompositeMetadata;
082import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector;
083import org.ametys.runtime.plugin.component.AbstractLogEnabled;
084import org.ametys.web.WebConstants;
085import org.ametys.web.indexing.solr.page.attachment.PageVisibleAttachmentIndexerExtensionPoint;
086import org.ametys.web.repository.page.Page;
087import org.ametys.web.repository.page.Page.PageType;
088import org.ametys.web.repository.page.Zone;
089import org.ametys.web.repository.page.ZoneItem;
090import org.ametys.web.repository.page.ZoneItem.ZoneType;
091import org.ametys.web.repository.site.Site;
092import org.ametys.web.repository.sitemap.Sitemap;
093import org.ametys.web.search.query.PageAttachmentQuery;
094import org.ametys.web.search.query.PageQuery;
095import org.ametys.web.service.Service;
096import org.ametys.web.service.ServiceExtensionPoint;
097
098/**
099 * Component responsible for indexing a page with all its contents.
100 */
101public class SolrPageIndexer extends AbstractLogEnabled implements Component, Serviceable, SolrWebFieldNames, Contextualizable
102{
103    /** The avalon role. */
104    public static final String ROLE = SolrPageIndexer.class.getName();
105    
106    /** The Solr client provider */
107    protected SolrClientProvider _solrClientProvider;
108    /** The Solr indexer */
109    protected SolrIndexer _solrIndexer;
110    /** Solr Ametys contents indexer */
111    protected SolrContentIndexer _solrContentIndexer;
112    /** Solr Ametys resources indexer */
113    protected SolrResourceIndexer _solrResourceIndexer;
114    /** The extension point for PageVisibleAttachmentIndexers */
115    protected PageVisibleAttachmentIndexerExtensionPoint _pageVisibleAttachmentIndexerEP;
116    /** The additional property indexer extension point. */
117    protected AdditionalPropertyIndexerExtensionPoint _additionalPropertiesIndexerEP;
118    /** The tag provider extension point. */
119    protected TagProviderExtensionPoint _tagProviderEP;
120    
121    /** The service extension point. */
122    protected ServiceExtensionPoint _serviceExtensionPoint;
123    /** The Ametys object resolver*/
124    protected AmetysObjectResolver _ametysObjectResolver;
125    /** The avalon context */
126    protected Context _context;
127
128    private ContentTypesHelper _cTypesHelper;
129    
130    @Override
131    public void service(ServiceManager manager) throws ServiceException
132    {
133        _ametysObjectResolver = (AmetysObjectResolver) manager.lookup(AmetysObjectResolver.ROLE);
134        _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE);
135        _solrContentIndexer = (SolrContentIndexer) manager.lookup(SolrContentIndexer.ROLE);
136        _solrResourceIndexer = (SolrResourceIndexer) manager.lookup(SolrResourceIndexer.ROLE);
137        _pageVisibleAttachmentIndexerEP = (PageVisibleAttachmentIndexerExtensionPoint) manager.lookup(PageVisibleAttachmentIndexerExtensionPoint.ROLE);
138        _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE);
139        _serviceExtensionPoint = (ServiceExtensionPoint) manager.lookup(ServiceExtensionPoint.ROLE);
140        _additionalPropertiesIndexerEP = (AdditionalPropertyIndexerExtensionPoint) manager.lookup(AdditionalPropertyIndexerExtensionPoint.ROLE);
141        _tagProviderEP = (TagProviderExtensionPoint) manager.lookup(TagProviderExtensionPoint.ROLE);
142        _cTypesHelper = (ContentTypesHelper) manager.lookup(ContentTypesHelper.ROLE);
143    }
144    
145    public void contextualize(Context context) throws ContextException
146    {
147        _context = context;
148    }
149    
150    /**
151     * Index a page and eventually its children, recursively, in all workspaces and commit<br>
152     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
153     * @param pageId the page to be indexed.
154     * @param indexRecursively to also process children pages.
155     * @param indexAttachments to index page attachments
156     * @throws Exception if an error occurs during indexation.
157     */
158    public void indexPage(String pageId, boolean indexRecursively, boolean indexAttachments) throws Exception
159    {
160        indexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, indexRecursively, indexAttachments);
161        indexPage(pageId, WebConstants.LIVE_WORKSPACE, indexRecursively, indexAttachments);
162    }
163    
164    /**
165     * Index a page and eventually its children, recursively.<br>
166     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
167     * @param pageId the page to be indexed.
168     * @param workspaceName the workspace where to index
169     * @param indexRecursively to also process children pages.
170     * @param indexAttachments to index page attachments
171     * @throws IndexingException if an error occurs during indexation.
172     */
173    public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments) throws IndexingException
174    {
175        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true);
176        indexPage(pageId, workspaceName, indexRecursively, indexAttachments, solrClient);
177    }
178    
179    /**
180     * Index a page and eventually its children, recursively.<br>
181     * By default, children pages will be actually indexed if indexRecursively is true and if those pages are not already indexed.
182     * @param pageId the page to be indexed.
183     * @param workspaceName the workspace where to index
184     * @param indexRecursively to also process children pages.
185     * @param indexAttachments to index page attachments
186     * @param solrClient The solr client to use
187     * @throws IndexingException if an error occurs during indexation.
188     */
189    public void indexPage(String pageId, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException
190    {
191        Request request = ContextHelper.getRequest(_context);
192        
193        // Retrieve the current workspace.
194        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
195        // Retrieve the current site name.
196        String currentSiteName = (String) request.getAttribute("siteName");
197        
198        try
199        {
200            // Force the workspace.
201            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
202    
203            getLogger().debug("Indexing page: {}", pageId);
204            
205            if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist
206            {
207                Page page = _ametysObjectResolver.resolveById(pageId);
208                _indexPage(page, workspaceName, indexRecursively, indexAttachments, solrClient);
209            }
210        }
211        catch (AmetysRepositoryException e)
212        {
213            String error = String.format("Failed to index page %s in workspace %s", pageId, workspaceName);
214            getLogger().error(error, e);
215            throw new IndexingException(error, e);
216        }
217        finally
218        {
219            // Restore the site name.
220            request.setAttribute("siteName", currentSiteName);
221            // Restore context
222            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
223        }
224    }
225    
226    private void _indexPage(Page page, String workspaceName, boolean indexRecursively, boolean indexAttachments, SolrClient solrClient) throws IndexingException
227    {
228        getLogger().info("Indexing page: {} in workspace '{}'", page, workspaceName);
229        
230        SolrInputDocument document = new SolrInputDocument();
231        
232        try
233        {
234            // Prepare the solr input document by adding fields.
235            _populatePageDocument(page, document);
236            
237            // Set the additional properties in the document.
238            _populateAdditionalProperties(page, document);
239            
240            // Indexation of ACL initial values
241            _solrIndexer.indexAclInitValues(page, document);
242            
243            // Indexation of the document
244            _indexPageDocument(page, document, workspaceName, solrClient);
245            
246            // Index page attachments documents
247            if (indexAttachments)
248            {
249                _indexPageAttachments(page.getRootAttachments(), page, solrClient);
250            }
251        }
252        catch (Exception e)
253        {
254            String error = String.format("Failed to index page %s in workspace %s", page.getId(), workspaceName);
255            getLogger().error(error, e);
256            throw new IndexingException(error, e);
257        }
258        
259        if (indexRecursively)
260        {
261            for (Page child : page.getChildrenPages())
262            {
263                // FIXME index child pages if (and only if) not indexed... see original source.
264//                indexPage(child, false, indexRecursively);
265//                indexPage(child, false);
266                _indexPage(child, workspaceName, indexRecursively, indexAttachments, solrClient);
267            }
268        }
269    }
270    
271    /**
272     * Populate the solr input document by adding fields to index.
273     * @param page the page to index.
274     * @param document the solr input document
275     * @throws Exception if something goes wrong when processing the indexation of the page
276     */
277    protected void _populatePageDocument(Page page, SolrInputDocument document) throws Exception
278    {
279        Sitemap sitemap = page.getSitemap();
280        String sitemapName = sitemap.getName();
281        Site site = page.getSite();
282        String siteName = site.getName();
283        String pageId = page.getId();
284        String pageTitle = page.getTitle();
285        String pageLongTitle = page.getLongTitle();
286        String language = sitemapName;
287        
288        // Page id and type
289        document.addField(SolrFieldNames.ID, pageId);
290        document.addField(SolrFieldNames.DOCUMENT_TYPE, SolrWebFieldNames.TYPE_PAGE);
291        
292        // Fulltext
293        SolrContentIndexer.indexFulltextValue(document, pageTitle, language);
294        if (!pageTitle.equals(pageLongTitle))
295        {
296            SolrContentIndexer.indexFulltextValue(document, pageLongTitle, language);
297        }
298        
299        // Page title
300        _indexStringFields(document, pageId, PAGE_TITLE, pageTitle, language);
301        // Page long title
302        _indexStringFields(document, pageId, PAGE_LONG_TITLE, pageLongTitle, language);
303        // Title for sorting
304        document.addField(TITLE_SORT, pageTitle);
305        
306        document.addField(TEMPLATE, page.getTemplate());
307        document.addField(PAGE_TYPE, page.getType().name());
308        document.addField(PAGE_DEPTH, page.getDepth());
309        
310        // Contents (page title shoud be indexed before because the main content can override it).
311        _populatePageContentsDocument(page, document);
312        
313        // Parent of the page
314        AmetysObject parent = page.getParent();
315        if (parent != null)
316        {
317            document.addField(PAGE_PARENT_ID, parent.getId());
318        }
319
320        // Ancestors of the page
321        List<String> ancestorIds = new ArrayList<>();
322        while (parent instanceof Page)
323        {
324            ancestorIds.add(parent.getId());
325            parent = parent.getParent();
326        }
327        document.addField(PAGE_ANCESTOR_IDS, ancestorIds);
328        
329        document.addField(SITE_NAME, siteName);
330        document.addField(SITEMAP_NAME, sitemapName);
331        document.addField(SITE_TYPE, site.getType());
332        
333        // Page tags (strict and tags including ancestor pages).
334        Set<String> tags = page.getTags()
335                .stream()
336                .filter(tagName -> _tagProviderEP.hasTag(tagName, Map.of("siteName", page.getSiteName())))
337                .collect(Collectors.toSet());
338        document.addField(SolrFieldNames.TAGS, tags);
339        document.addField(SolrFieldNames.ALL_TAGS, _getTagsWithAncestors(page));
340        
341        _populateDatesOfPage(page, document);
342        
343        // Attachments
344        _solrResourceIndexer.indexResourceCollection(page.getRootAttachments(), document, language);
345        Optional.ofNullable(page.getRootAttachments())
346                .map(AmetysObject::getId)
347                .ifPresent(id -> document.addField(PAGE_OUTGOING_REFEERENCES_RESOURCE_IDS, id));
348        _indexVisibleAttachments(page, document);
349    }
350    
351    private void _indexVisibleAttachments(Page page, SolrInputDocument document)
352    {
353        Collection<String> values = _pageVisibleAttachmentIndexerEP.getExtensionsIds()
354                .stream()
355                .map(_pageVisibleAttachmentIndexerEP::getExtension)
356                .map(attachmentIndexer -> attachmentIndexer.getVisibleAttachmentIds(page))
357                .flatMap(Collection::stream)
358                .collect(Collectors.toList());
359        document.addField(PAGE_VISIBLE_ATTACHMENT_RESOURCE_IDS, values);
360    }
361    
362    /**
363     * Populate the solr input document with dates from the page
364     * @param page The page
365     * @param document The Solr document
366     */
367    protected void _populateDatesOfPage(Page page, SolrInputDocument document)
368    {
369        // Page last modification date
370        Date lastModified = _getLastModificationDate(page);
371        if (lastModified != null)
372        {
373            String lastModifiedStr = SolrIndexer.dateFormat().format(lastModified);
374            // For 'new' search service
375            document.addField(LastModifiedSearchField.NAME, lastModifiedStr);
376            // For 'old' search service
377            document.addField(LAST_MODIFIED + "_dt", lastModifiedStr);
378        }
379        
380        // Page last validation date
381        Date lastValidation = _getLastValidationDate(page);
382        if (lastValidation != null)
383        {
384            String lastValidationStr = SolrIndexer.dateFormat().format(lastValidation);
385            // For 'new' search service
386            document.addField(LastValidationSearchField.NAME, lastValidationStr);
387            if (!LAST_VALIDATION.equals(LastValidationSearchField.NAME))
388            {
389                // For 'old' search service
390                document.addField(LAST_VALIDATION, lastValidationStr);
391            }
392        }
393        
394        // Page first validation date
395        Date firstValidation = _getFirstValidationDate(page);
396        if (firstValidation != null)
397        {
398            String firstValidationStr = SolrIndexer.dateFormat().format(firstValidation);
399            // For 'new' search service
400            document.addField(FirstValidationSearchField.NAME, firstValidationStr);
401        }
402        
403        // Page last major validation date
404        Date lastMajorValidation = _getLastMajorValidationDate(page);
405        if (lastMajorValidation != null)
406        {
407            String lastMajorValidationStr = SolrIndexer.dateFormat().format(lastMajorValidation);
408            // For 'new' search service
409            document.addField(LastMajorValidationSearchField.NAME, lastMajorValidationStr);
410        }
411        
412        // date for sorting
413        SolrInputField dateField = document.getField(DATE_FOR_SORTING);
414        if (dateField == null)
415        {
416            Collection<Object> oDateValues = document.getFieldValues(CONTENT_INTERESTING_DATES);
417            if (oDateValues != null && !oDateValues.isEmpty())
418            {
419                document.setField(DATE_FOR_SORTING, oDateValues.iterator().next());
420            }
421        }
422    }
423    
424    private void _indexStringFields(SolrInputDocument document, String documentId, String fieldName, String fieldValue, String language)
425    {
426        String possiblyTruncatedValue = SolrIndexer.truncateUtf8StringValue(fieldValue, getLogger(), documentId, fieldName);
427        
428        document.addField(fieldName, possiblyTruncatedValue);
429        document.addField(fieldName + "_txt_" + language, fieldValue);
430        document.addField(fieldName + "_txt_stemmed_" + language, fieldValue);
431        document.addField(fieldName + "_txt_ws_" + language, fieldValue);
432
433        document.addField(fieldName + "_s_lower", possiblyTruncatedValue.toLowerCase());
434        document.addField(fieldName + "_s_ws", fieldValue.toLowerCase());
435        document.addField(fieldName + "_txt", fieldValue);
436    }
437    /**
438     * Get all the page tags with their ancestors.
439     * @param page The page.
440     * @return All the page tags with their ancestors.
441     */
442    protected Set<String> _getTagsWithAncestors(Page page)
443    {
444        Set<String> allTags = new HashSet<>(page.getTags());
445        
446        Map<String, Object> tagParams = Map.of("siteName", page.getSiteName());
447        
448        for (String tagName : page.getTags())
449        {
450            allTags.add(tagName);
451            
452            // Get the ancestor tags
453            Tag tag = _tagProviderEP.getTag(tagName, tagParams);
454            for (Tag ancestor : TagHelper.getAncestors(tag, false))
455            {
456                allTags.add(ancestor.getName());
457            }
458        }
459        
460        return allTags;
461    }
462    
463    /**
464     * Index the content of the page.<p>
465     * @param page the page to index.
466     * @param document the document to populate.
467     * @throws Exception if an error occurs.
468     */
469    protected void _populatePageContentsDocument(Page page, SolrInputDocument document) throws Exception
470    {
471        if (page.getType() == PageType.CONTAINER)
472        {
473            for (Zone zone : page.getZones())
474            {
475                for (ZoneItem zoneItem : zone.getZoneItems())
476                {
477                    if (zoneItem.getType() == ZoneType.CONTENT)
478                    {
479                        try
480                        {
481                            Content content = zoneItem.getContent();
482                            document.addField(CONTENT_IDS, content.getId());
483                            
484                            for (String cType : content.getTypes())
485                            {
486                                document.addField(PAGE_CONTENT_TYPES, cType);
487                                document.addField(PAGE_CONTENT_TYPES + "_s_dv", cType); // facets
488                            }
489                            
490                            _indexFacetableField(content, document);
491                        }
492                        catch (AmetysRepositoryException e)
493                        {
494                            getLogger().error("Failed to index content referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
495                        }
496                    }
497                    else if (zoneItem.getType() == ZoneType.SERVICE)
498                    {
499                        try
500                        {
501                            String serviceId = zoneItem.getServiceId();
502                            document.addField(SERVICE_IDS, serviceId);
503
504                            Service service = _serviceExtensionPoint.getExtension(serviceId);
505                            if (service == null)
506                            {
507                                getLogger().error("The service id '{}' does not exist. It is referenced in the page {}/{}/{} ({} in zoneitem {})", serviceId, page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId());
508                            }
509                            else
510                            {
511                                service.index(zoneItem, document);
512                            }
513                        }
514                        catch (AmetysRepositoryException e)
515                        {
516                            getLogger().error("Failed to index service referenced in the page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
517                        }
518                        
519                    }
520                }
521            }
522        }
523    }
524    
525    /**
526     * Index the facetable fields of a content into the page solr document
527     * @param content The content
528     * @param document The main page solr document.
529     */
530    protected void _indexFacetableField(Content content, SolrInputDocument document)
531    {
532        IndexingModel indexingModel = null;
533        try
534        {
535            indexingModel = _cTypesHelper.getIndexingModel(content);
536        }
537        catch (RuntimeException e)
538        {
539            getLogger().error("indexContent > Error getting the indexing model of content " + content.getId(), e);
540            throw e;
541        }
542        
543        for (IndexingField field : indexingModel.getFields())
544        {
545            if (field instanceof MetadataIndexingField)
546            {
547                String metadataPath = ((MetadataIndexingField) field).getMetadataPath();
548                String[] pathSegments = metadataPath.split(ContentConstants.METADATA_PATH_SEPARATOR);
549                
550                MetadataDefinition definition = _cTypesHelper.getMetadataDefinition(pathSegments[0], content.getTypes(), content.getMixinTypes());
551                if (definition != null)
552                {
553                    _findAndIndexFacetableField(pathSegments, content.getLanguage(), content.getMetadataHolder(), definition, field, document);
554                }
555            }
556        }
557    }
558    
559    /**
560     * Index the facetable fields of a content into the page solr document
561     * @param pathSegments The path of metadata
562     * @param lang The language
563     * @param metadata The parent composite metadata
564     * @param definition The metadata definition
565     * @param field The indexing field
566     * @param pageDocument The Solr page document
567     */
568    protected void _findAndIndexFacetableField(String[] pathSegments, String lang, CompositeMetadata metadata, MetadataDefinition definition, IndexingField field, SolrInputDocument pageDocument)
569    {
570        String currentFieldName = pathSegments[0];
571        
572        if (!metadata.hasMetadata(currentFieldName))
573        {
574            // Nothing to do
575            return;
576        }
577        
578        switch (definition.getType())
579        {
580            case STRING:
581                if (definition.getEnumerator() != null)
582                {
583                    String[] strValues = metadata.getStringArray(currentFieldName, new String[0]);
584                    for (String value : strValues)
585                    {
586                        pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + currentFieldName + "_s_dv", value);
587                    }
588                }
589                break;
590            case CONTENT:
591                String[] contentIds = metadata.getStringArray(currentFieldName, new String[0]);
592                for (String contentId : contentIds)
593                {
594                    pageDocument.addField(FACETABLE_CONTENT_FIELD_PREFIX + currentFieldName + "_s_dv", contentId);
595                }
596                break;
597            case COMPOSITE:
598                if (pathSegments.length > 1)
599                {
600                    String[] followingSegments = ArrayUtils.subarray(pathSegments, 1, pathSegments.length);
601                    
602                    CompositeMetadata composite = metadata.getCompositeMetadata(currentFieldName);
603                    if (definition instanceof RepeaterDefinition)
604                    {
605                        String[] entries = composite.getMetadataNames();
606                        for (String entry : entries)
607                        {
608                            _findAndIndexFacetableField(followingSegments, lang, composite.getCompositeMetadata(entry), definition.getMetadataDefinition(followingSegments[0]), field, pageDocument);
609                        }
610                    }
611                    else
612                    {
613                        _findAndIndexFacetableField(followingSegments, lang, composite, definition.getMetadataDefinition(followingSegments[0]), field, pageDocument);
614                    }
615                }
616                
617                break;
618            default:
619                break;
620            
621        }
622    }
623
624    /**
625     * Computes the last modification date of a page.
626     * @param page the page.
627     * @return the last modification date or <code>null</code>.
628     */
629    protected Date _getLastModificationDate(Page page)
630    {
631        return _getLastDate(page, Content::getLastModified);
632    }
633    /**
634     * Computes the first validation date of a page.
635     * @param page the page.
636     * @return the first validation date or <code>null</code>.
637     */
638    protected Date _getFirstValidationDate(Page page)
639    {
640        return _getFirstDate(page, Content::getFirstValidationDate);
641    }
642
643    /**
644     * Computes the last validation date of a page.
645     * @param page the page.
646     * @return the last validation date or <code>null</code>.
647     */
648    protected Date _getLastValidationDate(Page page)
649    {
650        return _getLastDate(page, Content::getLastValidationDate);
651    }
652    
653    /**
654     * Computes the last major validation date of a page.
655     * @param page the page.
656     * @return the last major validation date or <code>null</code>.
657     */
658    protected Date _getLastMajorValidationDate(Page page)
659    {
660        return _getLastDate(page, Content::getLastMajorValidationDate);
661    }
662    
663    /**
664     * Computes a "last date" of a page, using the simple and naive following algorithm:
665     * <br>From all the dates from each of its contents, keep the greatest of them.
666     * @param page the page.
667     * @param dateRetriever The function to retrieve a Date from a Content of the Page
668     * @return the "last date" or <code>null</code>.
669     */
670    protected Date _getLastDate(Page page, Function<Content, Date> dateRetriever)
671    {
672        Date last = null;
673
674        if (page.getType() == PageType.CONTAINER)
675        {
676            for (Zone zone : page.getZones())
677            {
678                for (ZoneItem zoneItem : zone.getZoneItems())
679                {
680                    switch (zoneItem.getType())
681                    {
682                        case SERVICE:
683                            // A service has no last date
684                            break;
685                        case CONTENT:
686                            try
687                            {
688                                Date contentLast = dateRetriever.apply(zoneItem.getContent());
689
690                                if (contentLast != null && (last == null || contentLast.after(last)))
691                                {
692                                    // Keep the latest date
693                                    last = contentLast;
694                                }
695                            }
696                            catch (AmetysRepositoryException e)
697                            {
698                                getLogger().error("Failed to index last date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
699                            }
700                            break;
701                        default:
702                            break;
703                    }
704                }
705            }
706        }
707        
708        return last;
709    }
710    
711    /**
712     * Computes a "first date" of a page, using the simple and naive following algorithm:
713     * <br>From all the dates from each of its contents, keep the lowest of them.
714     * @param page the page.
715     * @param dateRetriever The function to retrieve a Date from a Content of the Page
716     * @return the "first date" or <code>null</code>.
717     */
718    protected Date _getFirstDate(Page page, Function<Content, Date> dateRetriever)
719    {
720        Date first = null;
721        
722        if (page.getType() == PageType.CONTAINER)
723        {
724            for (Zone zone : page.getZones())
725            {
726                for (ZoneItem zoneItem : zone.getZoneItems())
727                {
728                    switch (zoneItem.getType())
729                    {
730                        case SERVICE:
731                            // A service has no first date
732                            break;
733                        case CONTENT:
734                            try
735                            {
736                                Date contentFirst = dateRetriever.apply(zoneItem.getContent());
737                                
738                                if (contentFirst != null && (first == null || contentFirst.before(first)))
739                                {
740                                    // Keep the lowest date
741                                    first = contentFirst;
742                                }
743                            }
744                            catch (AmetysRepositoryException e)
745                            {
746                                getLogger().error("Failed to index first date for content in page {}/{}/{} ({} in zoneitem {})", page.getSiteName(), page.getSitemapName(), page.getPathInSitemap(), page.getId(), zoneItem.getId(), e);
747                            }
748                            break;
749                        default:
750                            break;
751                    }
752                }
753            }
754        }
755        
756        return first;
757    }
758    
759    /**
760     * Populate the solr input document by adding fields to index.
761     * @param page the page to index.
762     * @param document the solr input document
763     * @throws Exception if something goes wrong when processing the indexation of the page
764     */
765    protected void _populateAdditionalProperties(Page page, SolrInputDocument document) throws Exception
766    {
767        Collection<AdditionalPropertyIndexer> indexers = _additionalPropertiesIndexerEP.getIndexers("page");
768        for (AdditionalPropertyIndexer indexer : indexers)
769        {
770            indexer.index(page, document);
771        }
772    }
773    
774    /**
775     * Index page attachments as new entries in the index.
776     * @param collection the collection of attachments
777     * @param page the page whose attachments will be indexed
778     * @throws Exception if something goes wrong when indexing the attachments of the page
779     */
780    public void indexPageAttachments(ResourceCollection collection, Page page) throws Exception
781    {
782        Request request = ContextHelper.getRequest(_context);
783        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
784        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
785        _indexPageAttachments(collection, page, solrClient);
786    }
787    
788    private void _indexPageAttachments(ResourceCollection collection, Page page, SolrClient solrClient) throws Exception
789    {
790        if (collection == null)
791        {
792            return;
793        }
794        
795        for (AmetysObject object : collection.getChildren())
796        {
797            if (object instanceof ResourceCollection)
798            {
799                _indexPageAttachments((ResourceCollection) object, page, solrClient);
800            }
801            else if (object instanceof Resource)
802            {
803                Resource resource = (Resource) object;
804                _indexPageAttachment(resource, page, solrClient);
805            }
806        }
807    }
808    
809    /**
810     * Index a page attachment
811     * @param resource the page attachment as a {@link Resource}
812     * @param page the page whose attachment is going to be indexed
813     * @throws Exception if something goes wrong when processing the indexation of the page attachment
814     */
815    public void indexPageAttachment(Resource resource, Page page) throws Exception
816    {
817        Request request = ContextHelper.getRequest(_context);
818        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
819        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
820        _indexPageAttachment(resource, page, solrClient);
821    }
822    
823    private void _indexPageAttachment(Resource resource, Page page, SolrClient solrClient) throws Exception
824    {
825        SolrInputDocument document = new SolrInputDocument();
826        
827        // Prepare resource doc
828        _populatePageAttachmentDocument(resource, document, page);
829        
830        // Indexation of the document
831        _indexResourceDocument(resource, document, solrClient);
832    }
833    
834    private void _populatePageAttachmentDocument(Resource resource, SolrInputDocument document, Page page) throws Exception
835    {
836        String language = page.getSitemapName();
837        
838        _solrResourceIndexer.indexResource(resource, document, TYPE_PAGE_RESOURCE, language);
839        
840        Site site = page.getSite();
841        // site name - Store.YES, Index.NOT_ANALYZED
842        document.addField(SolrWebFieldNames.SITE_NAME, site.getName());
843        
844        // site type - Store.YES, Index.NOT_ANALYZED
845        document.addField(SolrWebFieldNames.SITE_TYPE, site.getType());
846        
847        // Added for Solr.
848        // Page site map name - Store.YES, Index.NOT_ANALYZED
849        document.addField(SITEMAP_NAME, page.getSitemapName());
850        
851        // Need the id of the page for unindexing attachment during the unindexing of the page
852        document.addField(ATTACHMENT_PAGE_ID, page.getId());
853    }
854    
855    /**
856     * Index a populated solr input document of type Page.
857     * @param page the page from which the input document is created
858     * @param document the input document to add to the solr index
859     * @param workspaceName The workspace name
860     * @param solrClient The solr client to use
861     * @throws SolrServerException if there is an error on the Solr server
862     * @throws IOException if there is a communication error with the server
863     */
864    protected void _indexPageDocument(Page page, SolrInputDocument document, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException
865    {
866        // Retrieve appropriate solr client
867        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
868        
869        // Add document
870        UpdateResponse solrResponse = solrClient.add(collectionName, document);
871        int status = solrResponse.getStatus();
872        
873        if (status != 0)
874        {
875            throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + page.getId());
876        }
877        
878        getLogger().debug("Successful page indexing. Page identifier : {}", page.getId());
879    }
880    
881    /**
882     * Index a populated solr input document of type Resource.
883     * @param resource the resource from which the input document is created
884     * @param document the input document
885     * @param solrClient The solr client to use
886     * @throws SolrServerException if there is an error on the server
887     * @throws IOException if there is a communication error with the server
888     */
889    protected void _indexResourceDocument(Resource resource, SolrInputDocument document, SolrClient solrClient) throws SolrServerException, IOException
890    {
891        // Retrieve appropriate solr client
892        Request request = ContextHelper.getRequest(_context);
893        String workspaceName = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
894        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
895        
896        // Add document
897        UpdateResponse solrResponse = solrClient.add(collectionName, document);
898        int status = solrResponse.getStatus();
899        
900        if (status != 0)
901        {
902            throw new IOException("Ametys Page indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Resource id : " + resource.getId());
903        }
904        
905        getLogger().debug("Successful resource indexing. Resource identifier : {}", resource.getId());
906    }
907    
908    ///////////////////////////////////////////////////////////////////////////
909    
910    /**
911     * Un-index a page by its ID  for all workspaces and commit
912     * @param pageId The page ID.
913     * @param unindexRecursively also unindex child pages if requested.
914     * @param unindexAttachments also unindex page attachments
915     * @throws Exception if an error occurs during index update.
916     */
917    public void unindexPage(String pageId, boolean unindexRecursively, boolean unindexAttachments) throws Exception
918    {
919        unindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, unindexRecursively, unindexAttachments);
920        unindexPage(pageId, WebConstants.LIVE_WORKSPACE, unindexRecursively, unindexAttachments);
921    }
922    
923    /**
924     * De-index a page (and optionally its children pages).
925     * @param pageId the page to be de-indexed.
926     * @param workspaceName The workspace where to work in 
927     * @param unindexRecursively also unindex child pages if requested.
928     * @param unindexAttachments also unindex page attachments
929     * @throws Exception if an error occurs during index update.
930     */
931    public void unindexPage(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws Exception
932    {
933        Request request = ContextHelper.getRequest(_context);
934        
935        // Retrieve the current workspace.
936        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
937        // Retrieve the current site name.
938        String currentSiteName = (String) request.getAttribute("siteName");
939        
940        try
941        {
942            // Force the workspace.
943            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
944    
945            getLogger().debug("Unindexing page: {}", pageId);
946            
947            _unindexPageDocument(pageId, workspaceName, unindexRecursively, unindexAttachments);
948        }
949        catch (Exception e)
950        {
951            String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName);
952            getLogger().error(error, e);
953            throw new IndexingException(error, e);
954        }
955        finally
956        {
957            // Restore the site name.
958            request.setAttribute("siteName", currentSiteName);
959            // Restore context
960            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
961        }
962    }
963    
964    /**
965     * Deindex a document of type Page. Also deindex attachments of a page
966     * @param pageId the id of the page to deindex
967     * @param workspaceName The workspace name
968     * @param unindexRecursively also unindex child pages if requested.
969     * @param unindexAttachments also unindex page attachments
970     * @throws SolrServerException if there is an error on the server
971     * @throws IOException if there is a communication error with the server
972     * @throws QuerySyntaxException if the uri query can't be built because of a syntax error.
973     */
974    protected void _unindexPageDocument(String pageId, String workspaceName, boolean unindexRecursively, boolean unindexAttachments) throws SolrServerException, IOException, QuerySyntaxException
975    {
976        // Retrieve appropriate solr client
977        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
978        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
979        
980        getLogger().info("Unindexing page {} in workspace '{}'", pageId, workspaceName);
981        
982        Query pages = new AndQuery(new DocumentTypeQuery(TYPE_PAGE), new PageQuery(pageId, unindexRecursively));
983        Query query;
984        if (unindexRecursively && unindexAttachments)
985        {
986            // {!ametys join=pageId q=page-ancestorIds:"page://xxxx"}
987            Query joinQuery = new JoinQuery(() -> PAGE_ANCESTOR_IDS + ":\"" + pageId + "\"", ATTACHMENT_PAGE_ID);
988            Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new OrQuery(new PageAttachmentQuery(pageId), joinQuery));
989            query = new OrQuery(attachments, pages);
990        }
991        else if (unindexAttachments)
992        {
993            Query attachments = new AndQuery(new DocumentTypeQuery(TYPE_PAGE_RESOURCE), new PageAttachmentQuery(pageId));
994            query = new OrQuery(attachments, pages);
995        }
996        else
997        {
998            query = pages;
999        }
1000        
1001        // Delete by query
1002        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query.build());
1003        int status = solrResponse.getStatus();
1004        
1005        if (status != 0)
1006        {
1007            throw new IOException("Ametys Page de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Page id : " + pageId);
1008        }
1009        
1010        getLogger().debug("Successful page de-indexing{}. Page identifier : {}", unindexRecursively ? " with its children" : "", pageId);
1011    }
1012    
1013    ///////////////////////////////////////////////////////////////////////////
1014    
1015    /**
1016     * Reindex a page by its ID for all workspaces and commit
1017     * @param pageId The page ID.
1018     * @param reindexRecursively also reindex child pages if requested.
1019     * @param reindexAttachments also reindex page attachments
1020     * @throws Exception if an error occurs during index update.
1021     */
1022    public void reindexPage(String pageId, boolean reindexRecursively, boolean reindexAttachments) throws Exception
1023    {
1024        reindexPage(pageId, RepositoryConstants.DEFAULT_WORKSPACE, reindexRecursively, reindexAttachments);
1025        reindexPage(pageId, WebConstants.LIVE_WORKSPACE, reindexRecursively, reindexAttachments);
1026    }
1027  
1028    
1029    /**
1030     * Reindex a page by its ID.
1031     * @param pageId The page ID.
1032     * @param workspaceName The workspace where to work in 
1033     * @param reindexRecursively also reindex child pages if requested.
1034     * @param reindexAttachments also reindex page attachments
1035     * @throws IndexingException if an error occurs during index update.
1036     */
1037    public void reindexPage(String pageId, String workspaceName, boolean reindexRecursively, boolean reindexAttachments) throws IndexingException
1038    {
1039        Request request = ContextHelper.getRequest(_context);
1040        
1041        // Retrieve the current workspace.
1042        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
1043        // Retrieve the current site name.
1044        String currentSiteName = (String) request.getAttribute("siteName");
1045        
1046        try
1047        {
1048            // Force the workspace.
1049            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
1050    
1051            getLogger().debug("Reindexing page: {}", pageId);
1052            
1053            if (_ametysObjectResolver.hasAmetysObjectForId(pageId)) // In 'live' the page may not exist
1054            {
1055                Page page = _ametysObjectResolver.resolveById(pageId);
1056                _unindexPageDocument(pageId, workspaceName, reindexRecursively, reindexAttachments);
1057                SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
1058                _indexPage(page, workspaceName, reindexRecursively, reindexAttachments, solrClient);
1059            }
1060        }
1061        catch (AmetysRepositoryException | QuerySyntaxException | SolrServerException | IOException e)
1062        {
1063            String error = String.format("Failed to unindex page %s in workspace %s", pageId, workspaceName);
1064            getLogger().error(error, e);
1065            throw new IndexingException(error, e);
1066        }
1067        finally
1068        {
1069            // Restore the site name.
1070            request.setAttribute("siteName", currentSiteName);
1071            // Restore context
1072            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
1073        }
1074    }
1075}