001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.indexing.solr;
017
018import java.io.IOException;
019import java.util.List;
020import java.util.Map;
021
022import org.apache.avalon.framework.context.Context;
023import org.apache.avalon.framework.context.ContextException;
024import org.apache.avalon.framework.context.Contextualizable;
025import org.apache.avalon.framework.service.ServiceException;
026import org.apache.avalon.framework.service.ServiceManager;
027import org.apache.avalon.framework.service.Serviceable;
028import org.apache.cocoon.components.ContextHelper;
029import org.apache.cocoon.environment.Request;
030import org.apache.solr.client.solrj.SolrClient;
031import org.apache.solr.client.solrj.SolrServerException;
032import org.apache.solr.client.solrj.response.UpdateResponse;
033import org.apache.solr.client.solrj.util.ClientUtils;
034
035import org.ametys.cms.content.archive.ArchiveConstants;
036import org.ametys.cms.content.indexing.solr.SolrFieldNames;
037import org.ametys.cms.content.indexing.solr.SolrIndexer;
038import org.ametys.cms.indexing.IndexingException;
039import org.ametys.cms.search.solr.SolrClientProvider;
040import org.ametys.core.schedule.progression.ContainerProgressionTracker;
041import org.ametys.core.schedule.progression.ProgressionTrackerFactory;
042import org.ametys.core.schedule.progression.SimpleProgressionTracker;
043import org.ametys.plugins.repository.AmetysObjectIterable;
044import org.ametys.plugins.repository.AmetysRepositoryException;
045import org.ametys.plugins.repository.RepositoryConstants;
046import org.ametys.plugins.repository.UnknownAmetysObjectException;
047import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector;
048import org.ametys.runtime.i18n.I18nizableText;
049import org.ametys.runtime.plugin.component.AbstractLogEnabled;
050import org.ametys.web.WebConstants;
051import org.ametys.web.indexing.SiteIndexer;
052import org.ametys.web.repository.page.Page;
053import org.ametys.web.repository.site.Site;
054import org.ametys.web.repository.site.SiteManager;
055import org.ametys.web.repository.sitemap.Sitemap;
056
057/**
058 * Solr implementation of {@link SiteIndexer}.
059 */
060public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable
061{
062    /** The site manager. */
063    protected SiteManager _siteManager;
064    /** The solr indexer. */
065    protected SolrIndexer _solrIndexer;
066    /** The solr page indexer. */
067    protected SolrPageIndexer _solrPageIndexer;
068    /** The site document provider handler. */
069    protected SiteDocumentProviderExtensionPoint _siteDocProviderEP;
070    /** The Solr client provider */
071    protected SolrClientProvider _solrClientProvider;
072
073    private Context _context;
074    
075    @Override
076    public void contextualize(Context context) throws ContextException
077    {
078        _context = context;
079    }
080    
081    @Override
082    public void service(ServiceManager manager) throws ServiceException
083    {
084        _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE);
085        _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE);
086        _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE);
087        _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE);
088        _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE);
089    }
090    
091    @Override
092    public void indexSite(Site site) throws IndexingException
093    {
094        indexSite(site, ProgressionTrackerFactory.createContainerProgressionTracker("Index site '" + site.getName() + "'", getLogger()));
095    }
096    
097    @Override
098    public void indexSite(Site site, ContainerProgressionTracker progressionTracker) throws IndexingException
099    {
100        indexSite(site.getName(), progressionTracker);
101    }
102    
103    @Override
104    public void indexSite(String siteName) throws IndexingException
105    {
106        indexSite(siteName, ProgressionTrackerFactory.createContainerProgressionTracker("Index site '" + siteName + "'", getLogger()));
107    }
108    
109    /**
110     * Index a site in all workspaces
111     * @param siteName  the name of the site to index.
112     * @param progressionTracker The progression of the indexation
113     * @throws IndexingException If an error occurs while indexing the site.
114     */
115    public void indexSite(String siteName, ContainerProgressionTracker progressionTracker) throws IndexingException
116    {
117        progressionTracker.addContainerStep(RepositoryConstants.DEFAULT_WORKSPACE, new I18nizableText("plugin.web", "PLUGINS_WEB_SCHEDULABLE_SITE_INDEXATION_STEPS_LABEL", List.of(RepositoryConstants.DEFAULT_WORKSPACE)));
118        progressionTracker.addContainerStep(WebConstants.LIVE_WORKSPACE, new I18nizableText("plugin.web", "PLUGINS_WEB_SCHEDULABLE_SITE_INDEXATION_STEPS_LABEL", List.of(WebConstants.LIVE_WORKSPACE)));
119        progressionTracker.addContainerStep(ArchiveConstants.ARCHIVE_WORKSPACE, new I18nizableText("plugin.web", "PLUGINS_WEB_SCHEDULABLE_SITE_INDEXATION_STEPS_LABEL", List.of(ArchiveConstants.ARCHIVE_WORKSPACE)));
120        
121        indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE, (ContainerProgressionTracker) progressionTracker.getStep(RepositoryConstants.DEFAULT_WORKSPACE));
122        indexSite(siteName, WebConstants.LIVE_WORKSPACE, (ContainerProgressionTracker) progressionTracker.getStep(WebConstants.LIVE_WORKSPACE));
123        indexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE, (ContainerProgressionTracker) progressionTracker.getStep(ArchiveConstants.ARCHIVE_WORKSPACE));
124    }
125    
126    @Override
127    public void indexSite(String siteName, String workspaceName) throws IndexingException
128    {
129        indexSite(siteName, workspaceName, ProgressionTrackerFactory.createContainerProgressionTracker("Index site '" + siteName + "' for workspace " + workspaceName, getLogger()));
130    }
131    
132    @Override
133    public void indexSite(String siteName, String workspaceName, ContainerProgressionTracker progressionTracker) throws IndexingException
134    {
135        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false);
136        _indexSite(siteName, workspaceName, solrClient, true, progressionTracker);
137    }
138    
139    @Override
140    public void indexSite(String siteName, String workspaceName, SolrClient solrClient, ContainerProgressionTracker progressionTracker) throws IndexingException
141    {
142        // Pass false for commit as caller provided a SolrClient and thus will take care of commit operation (if it is a NoAutoCommit Solr client)
143        boolean commit = false;
144        _indexSite(siteName, workspaceName, solrClient, commit, progressionTracker);
145    }
146    
147    private void _createProgressionTrackerStepsForSubIndexSite(ContainerProgressionTracker progressionTracker, boolean commit) throws AmetysRepositoryException
148    {
149        progressionTracker.addSimpleStep("unindexing", new I18nizableText("plugin.cms", "PLUGINS_CMS_SCHEDULER_GLOBAL_INDEXATION_UNINDEXING_DOCUMENT_SUB_STEP_LABEL"));
150
151        progressionTracker.addSimpleStep("contents", new I18nizableText("plugin.cms", "PLUGINS_CMS_SCHEDULER_GLOBAL_INDEXATION_CONTENT_STEP_LABEL"));
152        
153        progressionTracker.addSimpleStep("sitemaps", new I18nizableText("plugin.web", "PLUGINS_WEB_SCHEDULABLE_SITE_INDEXATION_SITEMAPS_STEP_LABEL"));
154        
155        progressionTracker.addSimpleStep("resources", new I18nizableText("plugin.cms", "PLUGINS_CMS_SCHEDULER_GLOBAL_INDEXATION_RESOURCES_STEP_LABEL"));
156        
157        for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
158        {
159            SiteDocumentProvider extension = _siteDocProviderEP.getExtension(docProviderId);
160            progressionTracker.addContainerStep("documents", new I18nizableText("plugin.cms", "PLUGINS_CMS_SCHEDULER_GLOBAL_INDEXATION_DOCUMENTS_PROVIDER_STEPS_LABEL", Map.of("0", extension.getLabel())));
161        }
162
163        if (commit)
164        {
165            progressionTracker.addSimpleStep("commit", new I18nizableText("plugin.cms", "PLUGINS_CMS_SCHEDULER_GLOBAL_INDEXATION_SAVING_SUB_STEP_LABEL"));
166        }
167    }
168    
169    private void _indexSite(String siteName, String workspaceName, SolrClient solrClient, boolean commit, ContainerProgressionTracker progressionTracker) throws IndexingException
170    {
171        Request request = ContextHelper.getRequest(_context);
172        
173        // Retrieve the current workspace.
174        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
175        // Retrieve the current site name.
176        String currentSiteName = (String) request.getAttribute("siteName");
177        
178        try
179        {
180            // Force the workspace.
181            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
182            
183            // Get the site in the given workspace.
184            Site site = null;
185            try
186            {
187                site = _siteManager.getSite(siteName);
188            }
189            catch (UnknownAmetysObjectException e)    
190            {
191                // Site might not exist in the desired workspace (archive for example)
192                return;
193            }
194            
195            _createProgressionTrackerStepsForSubIndexSite(progressionTracker, commit);
196            
197            // Set the site name in the request.
198            request.setAttribute("siteName", siteName);
199
200            getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName);
201            
202            // Delete all documents from this site in current workspace
203            _unindexSiteDocuments(siteName, workspaceName, solrClient, progressionTracker.getStep("unindexing"));
204            
205            // Index the site's contents in current workspace
206            _indexContents(site, workspaceName, solrClient, progressionTracker.getStep("contents"));
207            
208            // Index the site's sitemaps and pages in current workspace
209            _indexSitemaps(site, workspaceName, solrClient, progressionTracker.getStep("sitemaps"));
210            
211            // Index the site's resources in current workspace
212            _indexResources(site, workspaceName, solrClient, progressionTracker.getStep("resources"));
213            
214            // Add additional site documents
215            for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
216            {
217                SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
218                
219                getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider);
220                
221                docProvider.indexSiteDocuments(site, solrClient, progressionTracker.getStep("documents-" + docProviderId));
222            }
223            
224            if (commit)
225            {
226                _solrIndexer.commit(workspaceName, solrClient);
227                ((SimpleProgressionTracker) progressionTracker.getStep("commit")).increment();
228            }
229        }
230        catch (Exception e)
231        {
232            String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName);
233            getLogger().error(error, e);
234            throw new IndexingException(error, e);
235        }
236        finally
237        {
238            // Restore the site name.
239            request.setAttribute("siteName", currentSiteName);
240            // Restore context
241            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
242        }
243    }
244    
245    @Override
246    public void indexSitemap(Sitemap sitemap) throws IndexingException
247    {
248        indexSitemap(sitemap.getSiteName(), sitemap.getName());
249    }
250    
251    @Override
252    public void indexSitemap(String siteName, String sitemapName) throws IndexingException
253    {
254        indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
255        indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
256    }
257    
258    @Override
259    public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
260    {
261        Request request = ContextHelper.getRequest(_context);
262        
263        // Retrieve the current workspace.
264        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
265        
266        try
267        {
268            // Force the workspace.
269            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
270            request.setAttribute("siteName", siteName);
271            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false);
272            
273            getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
274            
275            _indexSitemap(siteName, sitemapName, workspaceName, solrClient);
276            
277            _solrIndexer.commit(workspaceName, solrClient);
278        }
279        catch (Exception e)
280        {
281            String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
282            getLogger().error(error, e);
283            throw new IndexingException(error, e);
284        }
285        finally
286        {
287            // Restore context
288            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
289        }
290    }
291    
292    @Override
293    public void unindexSite(String siteName) throws IndexingException
294    {
295        unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE);
296        unindexSite(siteName, WebConstants.LIVE_WORKSPACE);
297        unindexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE);
298    }
299   
300    @Override
301    public void unindexSite(String siteName, String workspaceName) throws IndexingException
302    {
303        Request request = ContextHelper.getRequest(_context);
304        
305        // Retrieve the current workspace.
306        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
307        
308        try
309        {
310            // Force the workspace.
311            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
312            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true);
313            
314            getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName);
315            
316            _unindexSiteDocuments(siteName, workspaceName, solrClient);
317            
318        }
319        catch (Exception e)
320        {
321            String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName);
322            getLogger().error(error, e);
323            throw new IndexingException(error, e);
324        }
325        finally
326        {
327            // Restore context
328            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
329        }
330    }
331    
332    @Override
333    public void unindexSitemap(String siteName, String sitemapName) throws IndexingException
334    {
335        unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
336        unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
337    }
338
339    @Override
340    public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
341    {
342        Request request = ContextHelper.getRequest(_context);
343        
344        // Retrieve the current workspace.
345        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
346        
347        try
348        {
349            // Force the workspace.
350            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
351            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
352            
353            getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
354            
355            _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient);
356        }
357        catch (Exception e)
358        {
359            String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
360            getLogger().error(error, e);
361            throw new IndexingException(error, e);
362        }
363        finally
364        {
365            // Restore context
366            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
367        }
368    }
369    
370    private void _indexSitemaps(Site site, String workspaceName, SolrClient solrClient, SimpleProgressionTracker progressionTracker) throws Exception
371    {
372        // The sitemap node may not exist if site was created but not yet configured
373        if (site.getNode().hasNode(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL + ":sitemaps"))
374        {
375            
376            AmetysObjectIterable<Sitemap> sitemaps = site.getSitemaps();
377            long nbOfSitemaps = sitemaps.getSize();
378            
379            progressionTracker.setSize(nbOfSitemaps);
380
381            for (Sitemap sitemap : sitemaps)
382            {
383                getLogger().info("Indexing sitemap {} started", sitemap.getName());
384                
385                long start = System.currentTimeMillis();
386                
387                _indexSitemap(site.getName(), sitemap.getName(), workspaceName, solrClient);
388                
389                long end = System.currentTimeMillis();
390                
391                getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start);
392                
393                progressionTracker.increment();
394            }
395        }
396    }
397    
398    private void _indexSitemap(String siteName, String sitemapName, String workspaceName, SolrClient solrClient) throws Exception
399    {
400        // Get the sitemap in the given workspace.
401        Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName);
402        
403        // First delete the directory if exists
404        _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient);
405
406        // Index pages of this sitemap
407        AmetysObjectIterable<? extends Page> children = sitemap.getChildrenPages();
408        for (Page page : children)
409        {
410            // Index page recursively, without committing.
411            _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, solrClient);
412        }
413        
414        // Add additional sitemap documents
415        for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
416        {
417            SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
418            
419            getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider);
420            
421            docProvider.indexSitemapDocuments(sitemap, solrClient);
422        }
423    }
424    
425    private void _unindexSiteDocuments(String siteName, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException
426    {
427        _unindexSiteDocuments(siteName, workspaceName, solrClient, ProgressionTrackerFactory.createSimpleProgressionTracker("Unindex site document for site '" + siteName + "' for workspace " + workspaceName, getLogger()));
428    }
429    
430    private void _unindexSiteDocuments(String siteName, String workspaceName, SolrClient solrClient, SimpleProgressionTracker progressionTracker) throws SolrServerException, IOException
431    {
432        // query
433        String query = "site:" + ClientUtils.escapeQueryChars(siteName);
434        
435        // delete
436        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
437        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
438        int status = solrResponse.getStatus();
439        
440        if (status != 0)
441        {
442            throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName);
443        }
444        
445        getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName);
446        
447        progressionTracker.increment();
448    }
449    
450    private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace, SolrClient solrClient) throws SolrServerException, IOException
451    {
452        // query
453        String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName);
454        
455        // delete
456        String collectionName = _solrClientProvider.getCollectionName(workspace);
457        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
458        int status = solrResponse.getStatus();
459        
460        if (status != 0)
461        {
462            throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName);
463        }
464        
465        getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName);
466    }
467    
468    /**
469     * Index the contents of a site.
470     * @param site The site to index.
471     * @param workspaceName The workspace name
472     * @param solrClient The solr client to use
473     * @param progressionTracker The progression of the indexation
474     * @throws Exception If an error occurs indexing the contents.
475     */
476    protected void _indexContents(Site site, String workspaceName, SolrClient solrClient, SimpleProgressionTracker progressionTracker) throws Exception
477    {
478        _solrIndexer.indexContents(site.getContents(), workspaceName, true, solrClient, progressionTracker);
479    }
480    
481    /**
482     * Index the resources of a site.
483     * @param site The site to index.
484     * @param workspaceName The workspace name
485     * @param solrClient The solr client to use
486     * @param progressionTracker The progression of the indexation
487     * @throws Exception If an error occurs indexing the resources.
488     */
489    protected void _indexResources(Site site, String workspaceName, SolrClient solrClient, SimpleProgressionTracker progressionTracker) throws Exception
490    {
491        try
492        {
493            _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, solrClient, progressionTracker);
494        }
495        catch (UnknownAmetysObjectException e)
496        {
497            // Ignore if the resource root is not present.
498            progressionTracker.increment();
499        }
500    }
501}