001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.indexing.solr;
017
018import java.io.IOException;
019
020import org.apache.avalon.framework.context.Context;
021import org.apache.avalon.framework.context.ContextException;
022import org.apache.avalon.framework.context.Contextualizable;
023import org.apache.avalon.framework.service.ServiceException;
024import org.apache.avalon.framework.service.ServiceManager;
025import org.apache.avalon.framework.service.Serviceable;
026import org.apache.cocoon.components.ContextHelper;
027import org.apache.cocoon.environment.Request;
028import org.apache.solr.client.solrj.SolrClient;
029import org.apache.solr.client.solrj.SolrServerException;
030import org.apache.solr.client.solrj.response.UpdateResponse;
031import org.apache.solr.client.solrj.util.ClientUtils;
032
033import org.ametys.cms.content.archive.ArchiveConstants;
034import org.ametys.cms.content.indexing.solr.SolrFieldNames;
035import org.ametys.cms.content.indexing.solr.SolrIndexer;
036import org.ametys.cms.indexing.IndexingException;
037import org.ametys.cms.search.solr.SolrClientProvider;
038import org.ametys.plugins.repository.AmetysObjectIterable;
039import org.ametys.plugins.repository.RepositoryConstants;
040import org.ametys.plugins.repository.UnknownAmetysObjectException;
041import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector;
042import org.ametys.runtime.plugin.component.AbstractLogEnabled;
043import org.ametys.web.WebConstants;
044import org.ametys.web.indexing.SiteIndexer;
045import org.ametys.web.repository.page.Page;
046import org.ametys.web.repository.site.Site;
047import org.ametys.web.repository.site.SiteManager;
048import org.ametys.web.repository.sitemap.Sitemap;
049
050/**
051 * Solr implementation of {@link SiteIndexer}.
052 */
053public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable
054{
055    
056    /** The site manager. */
057    protected SiteManager _siteManager;
058    /** The solr indexer. */
059    protected SolrIndexer _solrIndexer;
060    /** The solr page indexer. */
061    protected SolrPageIndexer _solrPageIndexer;
062    /** The site document provider handler. */
063    protected SiteDocumentProviderExtensionPoint _siteDocProviderEP;
064    /** The Solr client provider */
065    protected SolrClientProvider _solrClientProvider;
066
067    private Context _context;
068    
069    @Override
070    public void contextualize(Context context) throws ContextException
071    {
072        _context = context;
073    }
074    
075    @Override
076    public void service(ServiceManager manager) throws ServiceException
077    {
078        _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE);
079        _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE);
080        _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE);
081        _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE);
082        _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE);
083    }
084    
085    @Override
086    public void indexSite(Site site) throws IndexingException
087    {
088        indexSite(site.getName());
089    }
090    
091    @Override
092    public void indexSite(String siteName) throws IndexingException
093    {
094        indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE);
095        indexSite(siteName, WebConstants.LIVE_WORKSPACE);
096        indexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE);
097    }
098    
099    @Override
100    public void indexSite(String siteName, String workspaceName) throws IndexingException
101    {
102        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false);
103        _indexSite(siteName, workspaceName, solrClient, true);
104    }
105    
106    @Override
107    public void indexSite(String siteName, String workspaceName, SolrClient solrClient) throws IndexingException
108    {
109        // Pass false for commit as caller provided a SolrClient and thus will take care of commit operation (if it is a NoAutoCommit Solr client)
110        boolean commit = false;
111        _indexSite(siteName, workspaceName, solrClient, commit);
112    }
113    
114    private void _indexSite(String siteName, String workspaceName, SolrClient solrClient, boolean commit) throws IndexingException
115    {
116        Request request = ContextHelper.getRequest(_context);
117        
118        // Retrieve the current workspace.
119        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
120        // Retrieve the current site name.
121        String currentSiteName = (String) request.getAttribute("siteName");
122        
123        try
124        {
125            // Force the workspace.
126            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
127            
128            // Get the site in the given workspace.
129            Site site = null;
130            try
131            {
132                site = _siteManager.getSite(siteName);
133            }
134            catch (UnknownAmetysObjectException e)    
135            {
136                // Site might not exist in the desired workspace (archive for example)
137                return;
138            }
139            
140            // Set the site name in the request.
141            request.setAttribute("siteName", siteName);
142
143            getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName);
144            
145            // Delete all documents from this site in current workspace
146            _unindexSiteDocuments(siteName, workspaceName, solrClient);
147            
148            // Index the site's contents in current workspace
149            _indexContents(site, workspaceName, solrClient);
150            
151            // Index the site's sitemaps and pages in current workspace
152            _indexSitemaps(site, workspaceName, solrClient);
153            
154            // Index the site's resources in current workspace
155            _indexResources(site, workspaceName, solrClient);
156            
157            // Add additional site documents
158            for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
159            {
160                SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
161                
162                getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider);
163                
164                docProvider.indexSiteDocuments(site, solrClient);
165            }
166            
167            if (commit)
168            {
169                _solrIndexer.commit(workspaceName, solrClient);
170            }
171        }
172        catch (Exception e)
173        {
174            String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName);
175            getLogger().error(error, e);
176            throw new IndexingException(error, e);
177        }
178        finally
179        {
180            // Restore the site name.
181            request.setAttribute("siteName", currentSiteName);
182            // Restore context
183            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
184        }
185    }
186    
187    @Override
188    public void indexSitemap(Sitemap sitemap) throws IndexingException
189    {
190        indexSitemap(sitemap.getSiteName(), sitemap.getName());
191    }
192    
193    @Override
194    public void indexSitemap(String siteName, String sitemapName) throws IndexingException
195    {
196        indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
197        indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
198    }
199    
200    @Override
201    public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
202    {
203        Request request = ContextHelper.getRequest(_context);
204        
205        // Retrieve the current workspace.
206        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
207        
208        try
209        {
210            // Force the workspace.
211            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
212            request.setAttribute("siteName", siteName);
213            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false);
214            
215            getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
216            
217            _indexSitemap(siteName, sitemapName, workspaceName, solrClient);
218            
219            _solrIndexer.commit(workspaceName, solrClient);
220        }
221        catch (Exception e)
222        {
223            String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
224            getLogger().error(error, e);
225            throw new IndexingException(error, e);
226        }
227        finally
228        {
229            // Restore context
230            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
231        }
232    }
233    
234    @Override
235    public void unindexSite(String siteName) throws IndexingException
236    {
237        unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE);
238        unindexSite(siteName, WebConstants.LIVE_WORKSPACE);
239        unindexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE);
240    }
241   
242    @Override
243    public void unindexSite(String siteName, String workspaceName) throws IndexingException
244    {
245        Request request = ContextHelper.getRequest(_context);
246        
247        // Retrieve the current workspace.
248        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
249        
250        try
251        {
252            // Force the workspace.
253            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
254            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true);
255            
256            getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName);
257            
258            _unindexSiteDocuments(siteName, workspaceName, solrClient);
259            
260        }
261        catch (Exception e)
262        {
263            String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName);
264            getLogger().error(error, e);
265            throw new IndexingException(error, e);
266        }
267        finally
268        {
269            // Restore context
270            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
271        }
272    }
273    
274    @Override
275    public void unindexSitemap(String siteName, String sitemapName) throws IndexingException
276    {
277        unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
278        unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
279    }
280
281    @Override
282    public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
283    {
284        Request request = ContextHelper.getRequest(_context);
285        
286        // Retrieve the current workspace.
287        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
288        
289        try
290        {
291            // Force the workspace.
292            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
293            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
294            
295            getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
296            
297            _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient);
298        }
299        catch (Exception e)
300        {
301            String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
302            getLogger().error(error, e);
303            throw new IndexingException(error, e);
304        }
305        finally
306        {
307            // Restore context
308            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
309        }
310    }
311    
312    private void _indexSitemaps(Site site, String workspaceName, SolrClient solrClient) throws Exception
313    {
314        // The sitemap node may not exist if site was created but not yet configured
315        if (site.getNode().hasNode(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL + ":sitemaps"))
316        {
317            AmetysObjectIterable<Sitemap> sitemaps = site.getSitemaps();
318            for (Sitemap sitemap : sitemaps)
319            {
320                getLogger().info("Indexing sitemap {} started", sitemap.getName());
321                
322                long start = System.currentTimeMillis();
323                
324                _indexSitemap(site.getName(), sitemap.getName(), workspaceName, solrClient);
325                
326                long end = System.currentTimeMillis();
327                
328                getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start);
329            }
330        }
331    }
332    
333    private void _indexSitemap(String siteName, String sitemapName, String workspaceName, SolrClient solrClient) throws Exception
334    {
335        // Get the sitemap in the given workspace.
336        Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName);
337        
338        // First delete the directory if exists
339        _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient);
340
341        // Index pages of this sitemap
342        AmetysObjectIterable<? extends Page> children = sitemap.getChildrenPages();
343        for (Page page : children)
344        {
345            // Index page recursively, without committing.
346            _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, solrClient);
347        }
348        
349        // Add additional sitemap documents
350        for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
351        {
352            SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
353            
354            getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider);
355            
356            docProvider.indexSitemapDocuments(sitemap, solrClient);
357        }
358    }
359    
360    private void _unindexSiteDocuments(String siteName, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException
361    {
362        // query
363        String query = "site:" + ClientUtils.escapeQueryChars(siteName);
364        
365        // delete
366        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
367        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
368        int status = solrResponse.getStatus();
369        
370        if (status != 0)
371        {
372            throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName);
373        }
374        
375        getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName);
376    }
377    
378    private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace, SolrClient solrClient) throws SolrServerException, IOException
379    {
380        // query
381        String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName);
382        
383        // delete
384        String collectionName = _solrClientProvider.getCollectionName(workspace);
385        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
386        int status = solrResponse.getStatus();
387        
388        if (status != 0)
389        {
390            throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName);
391        }
392        
393        getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName);
394    }
395    
396    /**
397     * Index the contents of a site.
398     * @param site The site to index.
399     * @param workspaceName The workspace name
400     * @param solrClient The solr client to use
401     * @throws Exception If an error occurs indexing the contents.
402     */
403    protected void _indexContents(Site site, String workspaceName, SolrClient solrClient) throws Exception
404    {
405        _solrIndexer.indexContents(site.getContents(), workspaceName, true, solrClient);
406    }
407    
408    /**
409     * Index the resources of a site.
410     * @param site The site to index.
411     * @param workspaceName The workspace name
412     * @param solrClient The solr client to use
413     * @throws Exception If an error occurs indexing the resources.
414     */
415    protected void _indexResources(Site site, String workspaceName, SolrClient solrClient) throws Exception
416    {
417        try
418        {
419            _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, solrClient);
420        }
421        catch (UnknownAmetysObjectException e)
422        {
423            // Ignore if the resource root is not present.
424        }
425    }
426}