001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.indexing.solr;
017
018import java.io.IOException;
019
020import org.apache.avalon.framework.context.Context;
021import org.apache.avalon.framework.context.ContextException;
022import org.apache.avalon.framework.context.Contextualizable;
023import org.apache.avalon.framework.service.ServiceException;
024import org.apache.avalon.framework.service.ServiceManager;
025import org.apache.avalon.framework.service.Serviceable;
026import org.apache.cocoon.components.ContextHelper;
027import org.apache.cocoon.environment.Request;
028import org.apache.solr.client.solrj.SolrClient;
029import org.apache.solr.client.solrj.SolrServerException;
030import org.apache.solr.client.solrj.response.UpdateResponse;
031import org.apache.solr.client.solrj.util.ClientUtils;
032
033import org.ametys.cms.content.archive.ArchiveConstants;
034import org.ametys.cms.content.indexing.solr.SolrFieldNames;
035import org.ametys.cms.content.indexing.solr.SolrIndexer;
036import org.ametys.cms.indexing.IndexingException;
037import org.ametys.cms.search.solr.SolrClientProvider;
038import org.ametys.plugins.repository.RepositoryConstants;
039import org.ametys.plugins.repository.UnknownAmetysObjectException;
040import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector;
041import org.ametys.runtime.plugin.component.AbstractLogEnabled;
042import org.ametys.web.WebConstants;
043import org.ametys.web.indexing.SiteIndexer;
044import org.ametys.web.repository.page.Page;
045import org.ametys.web.repository.site.Site;
046import org.ametys.web.repository.site.SiteManager;
047import org.ametys.web.repository.sitemap.Sitemap;
048
049/**
050 * Solr implementation of {@link SiteIndexer}.
051 */
052public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable
053{
054    
055    /** The site manager. */
056    protected SiteManager _siteManager;
057    /** The solr indexer. */
058    protected SolrIndexer _solrIndexer;
059    /** The solr page indexer. */
060    protected SolrPageIndexer _solrPageIndexer;
061    /** The site document provider handler. */
062    protected SiteDocumentProviderExtensionPoint _siteDocProviderEP;
063    /** The Solr client provider */
064    protected SolrClientProvider _solrClientProvider;
065
066    private Context _context;
067    
068    @Override
069    public void contextualize(Context context) throws ContextException
070    {
071        _context = context;
072    }
073    
074    @Override
075    public void service(ServiceManager manager) throws ServiceException
076    {
077        _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE);
078        _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE);
079        _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE);
080        _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE);
081        _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE);
082    }
083    
084    @Override
085    public void indexSite(Site site) throws IndexingException
086    {
087        indexSite(site.getName());
088    }
089    
090    @Override
091    public void indexSite (String siteName) throws IndexingException
092    {
093        indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE);
094        indexSite(siteName, WebConstants.LIVE_WORKSPACE);
095        indexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE);
096    }
097    
098    @Override
099    public void indexSite(String siteName, String workspaceName) throws IndexingException
100    {
101        Request request = ContextHelper.getRequest(_context);
102        
103        // Retrieve the current workspace.
104        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
105        // Retrieve the current site name.
106        String currentSiteName = (String) request.getAttribute("siteName");
107        
108        try
109        {
110            // Force the workspace.
111            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
112            
113            // Get the site in the given workspace.
114            Site site = _siteManager.getSite(siteName);
115            
116            // Site might not exist in the desired workspace (archive for example)
117            if (site == null)
118            {
119                return;
120            }
121            
122            // Set the site name in the request.
123            request.setAttribute("siteName", siteName);
124
125            getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName);
126            
127            // Delete all documents from this site in current workspace
128            _unindexSiteDocuments(siteName, workspaceName);
129            
130            // Index the site's contents in current workspace
131            _indexContents(site, workspaceName);
132            
133            // Index the site's sitemaps and pages in current workspace
134            _indexSitemaps(site, workspaceName);
135            
136            // Index the site's resources in current workspace
137            _indexResources(site, workspaceName);
138            
139            // Add additional site documents
140            for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
141            {
142                SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
143                
144                getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider);
145                
146                docProvider.indexSiteDocuments(site);
147            }
148            
149            _solrIndexer.commit(workspaceName);
150            _solrIndexer.optimize(workspaceName);
151        }
152        catch (Exception e)
153        {
154            String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName);
155            getLogger().error(error, e);
156            throw new IndexingException(error, e);
157        }
158        finally
159        {
160            // Restore the site name.
161            request.setAttribute("siteName", currentSiteName);
162            // Restore context
163            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
164        }
165    }
166    
167    @Override
168    public void indexSitemap(Sitemap sitemap) throws IndexingException
169    {
170        indexSitemap(sitemap.getSiteName(), sitemap.getName());
171    }
172    
173    @Override
174    public void indexSitemap(String siteName, String sitemapName) throws IndexingException
175    {
176        indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
177        indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
178    }
179    
180    @Override
181    public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
182    {
183        Request request = ContextHelper.getRequest(_context);
184        
185        // Retrieve the current workspace.
186        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
187        
188        try
189        {
190            // Force the workspace.
191            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
192            request.setAttribute("siteName", siteName);
193            
194            getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
195            
196            _indexSitemap (siteName, sitemapName, workspaceName);
197            
198            _solrIndexer.commit(workspaceName);
199            _solrIndexer.optimize(workspaceName);
200        }
201        catch (Exception e)
202        {
203            String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
204            getLogger().error(error, e);
205            throw new IndexingException(error, e);
206        }
207        finally
208        {
209            // Restore context
210            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
211        }
212    }
213    
214    @Override
215    public void unindexSite(String siteName) throws IndexingException
216    {
217        unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE);
218        unindexSite(siteName, WebConstants.LIVE_WORKSPACE);
219        unindexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE);
220    }
221   
222    @Override
223    public void unindexSite(String siteName, String workspaceName) throws IndexingException
224    {
225        Request request = ContextHelper.getRequest(_context);
226        
227        // Retrieve the current workspace.
228        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
229        
230        try
231        {
232            // Force the workspace.
233            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
234            
235            getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName);
236            
237            _unindexSiteDocuments(siteName, workspaceName);
238            _solrIndexer.commit(workspaceName);
239            
240        }
241        catch (Exception e)
242        {
243            String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName);
244            getLogger().error(error, e);
245            throw new IndexingException(error, e);
246        }
247        finally
248        {
249            // Restore context
250            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
251        }
252    }
253    
254    @Override
255    public void unindexSitemap(String siteName, String sitemapName) throws IndexingException
256    {
257        unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
258        unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
259    }
260
261    @Override
262    public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
263    {
264        Request request = ContextHelper.getRequest(_context);
265        
266        // Retrieve the current workspace.
267        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
268        
269        try
270        {
271            // Force the workspace.
272            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
273            
274            getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
275            
276            _unindexSitemapDocuments(siteName, sitemapName, workspaceName);
277            
278            _solrIndexer.commit(workspaceName);
279        }
280        catch (Exception e)
281        {
282            String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
283            getLogger().error(error, e);
284            throw new IndexingException(error, e);
285        }
286        finally
287        {
288            // Restore context
289            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
290        }
291    }
292    
293    private void _indexSitemaps(Site site, String workspaceName) throws Exception
294    {
295        // The sitemap node may not exist if site was created but not yet configured
296        if (site.getNode().hasNode(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL + ":sitemaps"))
297        {
298            for (Sitemap sitemap : site.getSitemaps())
299            {
300                getLogger().info("Indexing sitemap {} started", sitemap.getName());
301                
302                long start = System.currentTimeMillis();
303                
304                _indexSitemap(site.getName(), sitemap.getName(), workspaceName);
305                
306                long end = System.currentTimeMillis();
307                
308                getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start);
309            }
310        }
311    }
312    
313    private void _indexSitemap (String siteName, String sitemapName, String workspaceName) throws Exception
314    {
315        // Get the sitemap in the given workspace.
316        Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName);
317        
318        // First delete the directory if exists
319        _unindexSitemapDocuments(siteName, sitemapName, workspaceName);
320
321        // Index pages of this sitemap
322        for (Page page : sitemap.getChildrenPages())
323        {
324            // Index page recursively, without committing.
325            _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, false);
326        }
327        
328        // Add additional sitemap documents
329        for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
330        {
331            SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
332            
333            getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider);
334            
335            docProvider.indexSitemapDocuments(sitemap);
336        }
337    }
338    
339    private void _unindexSiteDocuments(String siteName, String workspaceName) throws SolrServerException, IOException
340    {
341        // query
342        String query = "site:" + ClientUtils.escapeQueryChars(siteName);
343        
344        // delete
345        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
346        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
347        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
348        int status = solrResponse.getStatus();
349        
350        if (status != 0)
351        {
352            throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName);
353        }
354        
355        getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName);
356    }
357    
358    private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace) throws SolrServerException, IOException
359    {
360        // query
361        String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName);
362        
363        // delete
364        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspace);
365        String collectionName = _solrClientProvider.getCollectionName(workspace);
366        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
367        int status = solrResponse.getStatus();
368        
369        if (status != 0)
370        {
371            throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName);
372        }
373        
374        getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName);
375    }
376    
377    /**
378     * Index the contents of a site.
379     * @param site The site to index.
380     * @param workspaceName The workspace name
381     * @throws Exception If an error occurs indexing the contents.
382     */
383    protected void _indexContents(Site site, String workspaceName) throws Exception
384    {
385        _solrIndexer.indexContents(site.getContents(), workspaceName, true, false);
386    }
387    
388    /**
389     * Index the resources of a site.
390     * @param site The site to index.
391     * @param workspaceName The workspace name
392     * @throws Exception If an error occurs indexing the resources.
393     */
394    protected void _indexResources(Site site, String workspaceName) throws Exception
395    {
396        try
397        {
398            _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, false);
399        }
400        catch (UnknownAmetysObjectException e)
401        {
402            // Ignore if the resource root is not present.
403        }
404    }
405}