001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.indexing.solr;
017
018import java.io.IOException;
019
020import org.apache.avalon.framework.context.Context;
021import org.apache.avalon.framework.context.ContextException;
022import org.apache.avalon.framework.context.Contextualizable;
023import org.apache.avalon.framework.service.ServiceException;
024import org.apache.avalon.framework.service.ServiceManager;
025import org.apache.avalon.framework.service.Serviceable;
026import org.apache.cocoon.components.ContextHelper;
027import org.apache.cocoon.environment.Request;
028import org.apache.solr.client.solrj.SolrClient;
029import org.apache.solr.client.solrj.SolrServerException;
030import org.apache.solr.client.solrj.response.UpdateResponse;
031import org.apache.solr.client.solrj.util.ClientUtils;
032
033import org.ametys.cms.content.archive.ArchiveConstants;
034import org.ametys.cms.content.indexing.solr.SolrFieldNames;
035import org.ametys.cms.content.indexing.solr.SolrIndexer;
036import org.ametys.cms.indexing.IndexingException;
037import org.ametys.cms.search.solr.SolrClientProvider;
038import org.ametys.plugins.repository.RepositoryConstants;
039import org.ametys.plugins.repository.UnknownAmetysObjectException;
040import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector;
041import org.ametys.runtime.plugin.component.AbstractLogEnabled;
042import org.ametys.web.WebConstants;
043import org.ametys.web.indexing.SiteIndexer;
044import org.ametys.web.repository.page.Page;
045import org.ametys.web.repository.site.Site;
046import org.ametys.web.repository.site.SiteManager;
047import org.ametys.web.repository.sitemap.Sitemap;
048
049/**
050 * Solr implementation of {@link SiteIndexer}.
051 */
052public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable
053{
054    
055    /** The site manager. */
056    protected SiteManager _siteManager;
057    /** The solr indexer. */
058    protected SolrIndexer _solrIndexer;
059    /** The solr page indexer. */
060    protected SolrPageIndexer _solrPageIndexer;
061    /** The site document provider handler. */
062    protected SiteDocumentProviderExtensionPoint _siteDocProviderEP;
063    /** The Solr client provider */
064    protected SolrClientProvider _solrClientProvider;
065
066    private Context _context;
067    
068    @Override
069    public void contextualize(Context context) throws ContextException
070    {
071        _context = context;
072    }
073    
074    @Override
075    public void service(ServiceManager manager) throws ServiceException
076    {
077        _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE);
078        _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE);
079        _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE);
080        _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE);
081        _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE);
082    }
083    
084    @Override
085    public void indexSite(Site site) throws IndexingException
086    {
087        indexSite(site.getName());
088    }
089    
090    @Override
091    public void indexSite(String siteName) throws IndexingException
092    {
093        indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE);
094        indexSite(siteName, WebConstants.LIVE_WORKSPACE);
095        indexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE);
096    }
097    
098    @Override
099    public void indexSite(String siteName, String workspaceName) throws IndexingException
100    {
101        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false);
102        _indexSite(siteName, workspaceName, solrClient, true);
103    }
104    
105    @Override
106    public void indexSite(String siteName, String workspaceName, SolrClient solrClient) throws IndexingException
107    {
108        // Pass false for commit as caller provided a SolrClient and thus will take care of commit operation (if it is a NoAutoCommit Solr client)
109        boolean commit = false;
110        _indexSite(siteName, workspaceName, solrClient, commit);
111    }
112    
113    private void _indexSite(String siteName, String workspaceName, SolrClient solrClient, boolean commit) throws IndexingException
114    {
115        Request request = ContextHelper.getRequest(_context);
116        
117        // Retrieve the current workspace.
118        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
119        // Retrieve the current site name.
120        String currentSiteName = (String) request.getAttribute("siteName");
121        
122        try
123        {
124            // Force the workspace.
125            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
126            
127            // Get the site in the given workspace.
128            Site site = null;
129            try
130            {
131                site = _siteManager.getSite(siteName);
132            }
133            catch (UnknownAmetysObjectException e)    
134            {
135                // Site might not exist in the desired workspace (archive for example)
136                return;
137            }
138            
139            // Set the site name in the request.
140            request.setAttribute("siteName", siteName);
141
142            getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName);
143            
144            // Delete all documents from this site in current workspace
145            _unindexSiteDocuments(siteName, workspaceName, solrClient);
146            
147            // Index the site's contents in current workspace
148            _indexContents(site, workspaceName, solrClient);
149            
150            // Index the site's sitemaps and pages in current workspace
151            _indexSitemaps(site, workspaceName, solrClient);
152            
153            // Index the site's resources in current workspace
154            _indexResources(site, workspaceName, solrClient);
155            
156            // Add additional site documents
157            for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
158            {
159                SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
160                
161                getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider);
162                
163                docProvider.indexSiteDocuments(site, solrClient);
164            }
165            
166            if (commit)
167            {
168                _solrIndexer.commit(workspaceName, solrClient);
169            }
170        }
171        catch (Exception e)
172        {
173            String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName);
174            getLogger().error(error, e);
175            throw new IndexingException(error, e);
176        }
177        finally
178        {
179            // Restore the site name.
180            request.setAttribute("siteName", currentSiteName);
181            // Restore context
182            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
183        }
184    }
185    
186    @Override
187    public void indexSitemap(Sitemap sitemap) throws IndexingException
188    {
189        indexSitemap(sitemap.getSiteName(), sitemap.getName());
190    }
191    
192    @Override
193    public void indexSitemap(String siteName, String sitemapName) throws IndexingException
194    {
195        indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
196        indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
197    }
198    
199    @Override
200    public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
201    {
202        Request request = ContextHelper.getRequest(_context);
203        
204        // Retrieve the current workspace.
205        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
206        
207        try
208        {
209            // Force the workspace.
210            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
211            request.setAttribute("siteName", siteName);
212            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false);
213            
214            getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
215            
216            _indexSitemap(siteName, sitemapName, workspaceName, solrClient);
217            
218            _solrIndexer.commit(workspaceName, solrClient);
219        }
220        catch (Exception e)
221        {
222            String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
223            getLogger().error(error, e);
224            throw new IndexingException(error, e);
225        }
226        finally
227        {
228            // Restore context
229            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
230        }
231    }
232    
233    @Override
234    public void unindexSite(String siteName) throws IndexingException
235    {
236        unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE);
237        unindexSite(siteName, WebConstants.LIVE_WORKSPACE);
238        unindexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE);
239    }
240   
241    @Override
242    public void unindexSite(String siteName, String workspaceName) throws IndexingException
243    {
244        Request request = ContextHelper.getRequest(_context);
245        
246        // Retrieve the current workspace.
247        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
248        
249        try
250        {
251            // Force the workspace.
252            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
253            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true);
254            
255            getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName);
256            
257            _unindexSiteDocuments(siteName, workspaceName, solrClient);
258            
259        }
260        catch (Exception e)
261        {
262            String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName);
263            getLogger().error(error, e);
264            throw new IndexingException(error, e);
265        }
266        finally
267        {
268            // Restore context
269            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
270        }
271    }
272    
273    @Override
274    public void unindexSitemap(String siteName, String sitemapName) throws IndexingException
275    {
276        unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
277        unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
278    }
279
280    @Override
281    public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
282    {
283        Request request = ContextHelper.getRequest(_context);
284        
285        // Retrieve the current workspace.
286        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
287        
288        try
289        {
290            // Force the workspace.
291            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
292            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
293            
294            getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
295            
296            _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient);
297        }
298        catch (Exception e)
299        {
300            String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
301            getLogger().error(error, e);
302            throw new IndexingException(error, e);
303        }
304        finally
305        {
306            // Restore context
307            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
308        }
309    }
310    
311    private void _indexSitemaps(Site site, String workspaceName, SolrClient solrClient) throws Exception
312    {
313        // The sitemap node may not exist if site was created but not yet configured
314        if (site.getNode().hasNode(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL + ":sitemaps"))
315        {
316            for (Sitemap sitemap : site.getSitemaps())
317            {
318                getLogger().info("Indexing sitemap {} started", sitemap.getName());
319                
320                long start = System.currentTimeMillis();
321                
322                _indexSitemap(site.getName(), sitemap.getName(), workspaceName, solrClient);
323                
324                long end = System.currentTimeMillis();
325                
326                getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start);
327            }
328        }
329    }
330    
331    private void _indexSitemap(String siteName, String sitemapName, String workspaceName, SolrClient solrClient) throws Exception
332    {
333        // Get the sitemap in the given workspace.
334        Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName);
335        
336        // First delete the directory if exists
337        _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient);
338
339        // Index pages of this sitemap
340        for (Page page : sitemap.getChildrenPages())
341        {
342            // Index page recursively, without committing.
343            _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, solrClient);
344        }
345        
346        // Add additional sitemap documents
347        for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
348        {
349            SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
350            
351            getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider);
352            
353            docProvider.indexSitemapDocuments(sitemap, solrClient);
354        }
355    }
356    
357    private void _unindexSiteDocuments(String siteName, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException
358    {
359        // query
360        String query = "site:" + ClientUtils.escapeQueryChars(siteName);
361        
362        // delete
363        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
364        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
365        int status = solrResponse.getStatus();
366        
367        if (status != 0)
368        {
369            throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName);
370        }
371        
372        getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName);
373    }
374    
375    private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace, SolrClient solrClient) throws SolrServerException, IOException
376    {
377        // query
378        String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName);
379        
380        // delete
381        String collectionName = _solrClientProvider.getCollectionName(workspace);
382        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
383        int status = solrResponse.getStatus();
384        
385        if (status != 0)
386        {
387            throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName);
388        }
389        
390        getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName);
391    }
392    
393    /**
394     * Index the contents of a site.
395     * @param site The site to index.
396     * @param workspaceName The workspace name
397     * @param solrClient The solr client to use
398     * @throws Exception If an error occurs indexing the contents.
399     */
400    protected void _indexContents(Site site, String workspaceName, SolrClient solrClient) throws Exception
401    {
402        _solrIndexer.indexContents(site.getContents(), workspaceName, true, solrClient);
403    }
404    
405    /**
406     * Index the resources of a site.
407     * @param site The site to index.
408     * @param workspaceName The workspace name
409     * @param solrClient The solr client to use
410     * @throws Exception If an error occurs indexing the resources.
411     */
412    protected void _indexResources(Site site, String workspaceName, SolrClient solrClient) throws Exception
413    {
414        try
415        {
416            _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, solrClient);
417        }
418        catch (UnknownAmetysObjectException e)
419        {
420            // Ignore if the resource root is not present.
421        }
422    }
423}