001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.indexing.solr;
017
018import java.io.IOException;
019
020import org.apache.avalon.framework.context.Context;
021import org.apache.avalon.framework.context.ContextException;
022import org.apache.avalon.framework.context.Contextualizable;
023import org.apache.avalon.framework.service.ServiceException;
024import org.apache.avalon.framework.service.ServiceManager;
025import org.apache.avalon.framework.service.Serviceable;
026import org.apache.cocoon.components.ContextHelper;
027import org.apache.cocoon.environment.Request;
028import org.apache.solr.client.solrj.SolrClient;
029import org.apache.solr.client.solrj.SolrServerException;
030import org.apache.solr.client.solrj.response.UpdateResponse;
031import org.apache.solr.client.solrj.util.ClientUtils;
032
033import org.ametys.cms.content.indexing.solr.SolrFieldNames;
034import org.ametys.cms.content.indexing.solr.SolrIndexer;
035import org.ametys.cms.indexing.IndexingException;
036import org.ametys.cms.repository.RequestAttributeWorkspaceSelector;
037import org.ametys.cms.search.solr.SolrClientProvider;
038import org.ametys.plugins.repository.RepositoryConstants;
039import org.ametys.plugins.repository.UnknownAmetysObjectException;
040import org.ametys.runtime.plugin.component.AbstractLogEnabled;
041import org.ametys.web.WebConstants;
042import org.ametys.web.indexing.SiteIndexer;
043import org.ametys.web.repository.page.Page;
044import org.ametys.web.repository.site.Site;
045import org.ametys.web.repository.site.SiteManager;
046import org.ametys.web.repository.sitemap.Sitemap;
047
048/**
049 * Solr implementation of {@link SiteIndexer}.
050 */
051public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable
052{
053    
054    /** The site manager. */
055    protected SiteManager _siteManager;
056    /** The solr indexer. */
057    protected SolrIndexer _solrIndexer;
058    /** The solr page indexer. */
059    protected SolrPageIndexer _solrPageIndexer;
060    /** The site document provider handler. */
061    protected SiteDocumentProviderExtensionPoint _siteDocProviderEP;
062    /** The Solr client provider */
063    protected SolrClientProvider _solrClientProvider;
064
065    private Context _context;
066    
067    @Override
068    public void contextualize(Context context) throws ContextException
069    {
070        _context = context;
071    }
072    
073    @Override
074    public void service(ServiceManager manager) throws ServiceException
075    {
076        _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE);
077        _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE);
078        _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE);
079        _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE);
080        _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE);
081    }
082    
083    @Override
084    public void indexSite(Site site) throws IndexingException
085    {
086        indexSite(site.getName());
087    }
088    
089    @Override
090    public void indexSite (String siteName) throws IndexingException
091    {
092        indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE);
093        indexSite(siteName, WebConstants.LIVE_WORKSPACE);
094        // TODO index archives workspace
095    }
096    
097    @Override
098    public void indexSite(String siteName, String workspaceName) throws IndexingException
099    {
100        Request request = ContextHelper.getRequest(_context);
101        
102        // Retrieve the current workspace.
103        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
104        // Retrieve the current site name.
105        String currentSiteName = (String) request.getAttribute("siteName");
106        
107        try
108        {
109            // Force the workspace.
110            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
111            
112            // Get the site in the given workspace.
113            Site site = _siteManager.getSite(siteName);
114            
115            // Site might not exist in the desired workspace (archive for example)
116            if (site == null)
117            {
118                return;
119            }
120            
121            // Set the site name in the request.
122            request.setAttribute("siteName", siteName);
123
124            getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName);
125            
126            // Delete all documents from this site in current workspace
127            _unindexSiteDocuments(siteName, workspaceName);
128            
129            // Index the site's contents in current workspace
130            _indexContents(site, workspaceName);
131            
132            // Index the site's sitemaps and pages in current workspace
133            _indexSitemaps(site, workspaceName);
134            
135            // Index the site's resources in current workspace
136            _indexResources(site, workspaceName);
137            
138            // Add additional site documents
139            for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
140            {
141                SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
142                
143                getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider);
144                
145                docProvider.indexSiteDocuments(site);
146            }
147            
148            _solrIndexer.commit(workspaceName);
149            _solrIndexer.optimize(workspaceName);
150        }
151        catch (Exception e)
152        {
153            String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName);
154            getLogger().error(error, e);
155            throw new IndexingException(error, e);
156        }
157        finally
158        {
159            // Restore the site name.
160            request.setAttribute("siteName", currentSiteName);
161            // Restore context
162            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
163        }
164    }
165    
166    @Override
167    public void indexSitemap(Sitemap sitemap) throws IndexingException
168    {
169        indexSitemap(sitemap.getSiteName(), sitemap.getName());
170    }
171    
172    @Override
173    public void indexSitemap(String siteName, String sitemapName) throws IndexingException
174    {
175        indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
176        indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
177    }
178    
179    @Override
180    public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
181    {
182        Request request = ContextHelper.getRequest(_context);
183        
184        // Retrieve the current workspace.
185        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
186        
187        try
188        {
189            // Force the workspace.
190            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
191            request.setAttribute("siteName", siteName);
192            
193            getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
194            
195            _indexSitemap (siteName, sitemapName, workspaceName);
196            
197            _solrIndexer.commit(workspaceName);
198            _solrIndexer.optimize(workspaceName);
199        }
200        catch (Exception e)
201        {
202            String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
203            getLogger().error(error, e);
204            throw new IndexingException(error, e);
205        }
206        finally
207        {
208            // Restore context
209            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
210        }
211    }
212    
213    @Override
214    public void unindexSite(String siteName) throws IndexingException
215    {
216        unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE);
217        unindexSite(siteName, WebConstants.LIVE_WORKSPACE);
218        // TODO unindex archives workspace
219    }
220   
221    @Override
222    public void unindexSite(String siteName, String workspaceName) throws IndexingException
223    {
224        Request request = ContextHelper.getRequest(_context);
225        
226        // Retrieve the current workspace.
227        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
228        
229        try
230        {
231            // Force the workspace.
232            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
233            
234            getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName);
235            
236            _unindexSiteDocuments(siteName, workspaceName);
237            _solrIndexer.commit(workspaceName);
238            
239        }
240        catch (Exception e)
241        {
242            String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName);
243            getLogger().error(error, e);
244            throw new IndexingException(error, e);
245        }
246        finally
247        {
248            // Restore context
249            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
250        }
251    }
252    
253    @Override
254    public void unindexSitemap(String siteName, String sitemapName) throws IndexingException
255    {
256        unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
257        unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
258    }
259
260    @Override
261    public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
262    {
263        Request request = ContextHelper.getRequest(_context);
264        
265        // Retrieve the current workspace.
266        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
267        
268        try
269        {
270            // Force the workspace.
271            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
272            
273            getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
274            
275            _unindexSitemapDocuments(siteName, sitemapName, workspaceName);
276            
277            _solrIndexer.commit(workspaceName);
278        }
279        catch (Exception e)
280        {
281            String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
282            getLogger().error(error, e);
283            throw new IndexingException(error, e);
284        }
285        finally
286        {
287            // Restore context
288            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
289        }
290    }
291    
292    private void _indexSitemaps(Site site, String workspaceName) throws Exception
293    {
294        for (Sitemap sitemap : site.getSitemaps())
295        {
296            getLogger().info("Indexing sitemap {} started", sitemap.getName());
297            
298            long start = System.currentTimeMillis();
299            
300            _indexSitemap(site.getName(), sitemap.getName(), workspaceName);
301            
302            long end = System.currentTimeMillis();
303            
304            getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start);
305        }
306    }
307    
308    private void _indexSitemap (String siteName, String sitemapName, String workspaceName) throws Exception
309    {
310        // Get the sitemap in the given workspace.
311        Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName);
312        
313        // First delete the directory if exists
314        _unindexSitemapDocuments(siteName, sitemapName, workspaceName);
315
316        // Index pages of this sitemap
317        for (Page page : sitemap.getChildrenPages())
318        {
319            // Index page recursively, without committing.
320            _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, false);
321        }
322        
323        // Add additional sitemap documents
324        for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
325        {
326            SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
327            
328            getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider);
329            
330            docProvider.indexSitemapDocuments(sitemap);
331        }
332    }
333    
334    private void _unindexSiteDocuments(String siteName, String workspaceName) throws SolrServerException, IOException
335    {
336        // query
337        String query = "site:" + ClientUtils.escapeQueryChars(siteName);
338        
339        // delete
340        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
341        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
342        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
343        int status = solrResponse.getStatus();
344        
345        if (status != 0)
346        {
347            throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName);
348        }
349        
350        getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName);
351    }
352    
353    private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace) throws SolrServerException, IOException
354    {
355        // query
356        String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName);
357        
358        // delete
359        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspace);
360        String collectionName = _solrClientProvider.getCollectionName(workspace);
361        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
362        int status = solrResponse.getStatus();
363        
364        if (status != 0)
365        {
366            throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName);
367        }
368        
369        getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName);
370    }
371    
372    /**
373     * Index the contents of a site.
374     * @param site The site to index.
375     * @param workspaceName The workspace name
376     * @throws Exception If an error occurs indexing the contents.
377     */
378    protected void _indexContents(Site site, String workspaceName) throws Exception
379    {
380        _solrIndexer.indexContents(site.getContents(), workspaceName, false);
381    }
382    
383    /**
384     * Index the resources of a site.
385     * @param site The site to index.
386     * @param workspaceName The workspace name
387     * @throws Exception If an error occurs indexing the resources.
388     */
389    protected void _indexResources(Site site, String workspaceName) throws Exception
390    {
391        try
392        {
393            _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, false);
394        }
395        catch (UnknownAmetysObjectException e)
396        {
397            // Ignore if the resource root is not present.
398        }
399    }
400}