001/*
002 *  Copyright 2015 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.indexing.solr;
017
018import java.io.IOException;
019
020import org.apache.avalon.framework.context.Context;
021import org.apache.avalon.framework.context.ContextException;
022import org.apache.avalon.framework.context.Contextualizable;
023import org.apache.avalon.framework.service.ServiceException;
024import org.apache.avalon.framework.service.ServiceManager;
025import org.apache.avalon.framework.service.Serviceable;
026import org.apache.cocoon.components.ContextHelper;
027import org.apache.cocoon.environment.Request;
028import org.apache.solr.client.solrj.SolrClient;
029import org.apache.solr.client.solrj.SolrServerException;
030import org.apache.solr.client.solrj.response.UpdateResponse;
031import org.apache.solr.client.solrj.util.ClientUtils;
032
033import org.ametys.cms.content.archive.ArchiveConstants;
034import org.ametys.cms.content.indexing.solr.SolrFieldNames;
035import org.ametys.cms.content.indexing.solr.SolrIndexer;
036import org.ametys.cms.indexing.IndexingException;
037import org.ametys.cms.search.solr.SolrClientProvider;
038import org.ametys.plugins.repository.RepositoryConstants;
039import org.ametys.plugins.repository.UnknownAmetysObjectException;
040import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector;
041import org.ametys.runtime.plugin.component.AbstractLogEnabled;
042import org.ametys.web.WebConstants;
043import org.ametys.web.indexing.SiteIndexer;
044import org.ametys.web.repository.page.Page;
045import org.ametys.web.repository.site.Site;
046import org.ametys.web.repository.site.SiteManager;
047import org.ametys.web.repository.sitemap.Sitemap;
048
049/**
050 * Solr implementation of {@link SiteIndexer}.
051 */
052public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable
053{
054    
055    /** The site manager. */
056    protected SiteManager _siteManager;
057    /** The solr indexer. */
058    protected SolrIndexer _solrIndexer;
059    /** The solr page indexer. */
060    protected SolrPageIndexer _solrPageIndexer;
061    /** The site document provider handler. */
062    protected SiteDocumentProviderExtensionPoint _siteDocProviderEP;
063    /** The Solr client provider */
064    protected SolrClientProvider _solrClientProvider;
065
066    private Context _context;
067    
068    @Override
069    public void contextualize(Context context) throws ContextException
070    {
071        _context = context;
072    }
073    
074    @Override
075    public void service(ServiceManager manager) throws ServiceException
076    {
077        _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE);
078        _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE);
079        _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE);
080        _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE);
081        _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE);
082    }
083    
084    @Override
085    public void indexSite(Site site) throws IndexingException
086    {
087        indexSite(site.getName());
088    }
089    
090    @Override
091    public void indexSite(String siteName) throws IndexingException
092    {
093        indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE);
094        indexSite(siteName, WebConstants.LIVE_WORKSPACE);
095        indexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE);
096    }
097    
098    @Override
099    public void indexSite(String siteName, String workspaceName) throws IndexingException
100    {
101        SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false);
102        _indexSite(siteName, workspaceName, solrClient, true);
103    }
104    
105    @Override
106    public void indexSite(String siteName, String workspaceName, SolrClient solrClient) throws IndexingException
107    {
108        // Pass false for commitAndOptimize as caller provided a SolrClient and thus will take care of commit operation (if it is a NoAutoCommit Solr client)
109        boolean commitAndOptimize = false;
110        _indexSite(siteName, workspaceName, solrClient, commitAndOptimize);
111    }
112    
113    private void _indexSite(String siteName, String workspaceName, SolrClient solrClient, boolean commitAndOptimize) throws IndexingException
114    {
115        Request request = ContextHelper.getRequest(_context);
116        
117        // Retrieve the current workspace.
118        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
119        // Retrieve the current site name.
120        String currentSiteName = (String) request.getAttribute("siteName");
121        
122        try
123        {
124            // Force the workspace.
125            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
126            
127            // Get the site in the given workspace.
128            Site site = _siteManager.getSite(siteName);
129            
130            // Site might not exist in the desired workspace (archive for example)
131            if (site == null)
132            {
133                return;
134            }
135            
136            // Set the site name in the request.
137            request.setAttribute("siteName", siteName);
138
139            getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName);
140            
141            // Delete all documents from this site in current workspace
142            _unindexSiteDocuments(siteName, workspaceName, solrClient);
143            
144            // Index the site's contents in current workspace
145            _indexContents(site, workspaceName, solrClient);
146            
147            // Index the site's sitemaps and pages in current workspace
148            _indexSitemaps(site, workspaceName, solrClient);
149            
150            // Index the site's resources in current workspace
151            _indexResources(site, workspaceName, solrClient);
152            
153            // Add additional site documents
154            for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
155            {
156                SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
157                
158                getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider);
159                
160                docProvider.indexSiteDocuments(site, solrClient);
161            }
162            
163            if (commitAndOptimize)
164            {
165                _solrIndexer.commit(workspaceName, solrClient);
166                _solrIndexer.optimize(workspaceName, solrClient);
167            }
168        }
169        catch (Exception e)
170        {
171            String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName);
172            getLogger().error(error, e);
173            throw new IndexingException(error, e);
174        }
175        finally
176        {
177            // Restore the site name.
178            request.setAttribute("siteName", currentSiteName);
179            // Restore context
180            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
181        }
182    }
183    
184    @Override
185    public void indexSitemap(Sitemap sitemap) throws IndexingException
186    {
187        indexSitemap(sitemap.getSiteName(), sitemap.getName());
188    }
189    
190    @Override
191    public void indexSitemap(String siteName, String sitemapName) throws IndexingException
192    {
193        indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
194        indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
195    }
196    
197    @Override
198    public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
199    {
200        Request request = ContextHelper.getRequest(_context);
201        
202        // Retrieve the current workspace.
203        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
204        
205        try
206        {
207            // Force the workspace.
208            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
209            request.setAttribute("siteName", siteName);
210            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false);
211            
212            getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
213            
214            _indexSitemap(siteName, sitemapName, workspaceName, solrClient);
215            
216            _solrIndexer.commit(workspaceName, solrClient);
217            _solrIndexer.optimize(workspaceName, solrClient);
218        }
219        catch (Exception e)
220        {
221            String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
222            getLogger().error(error, e);
223            throw new IndexingException(error, e);
224        }
225        finally
226        {
227            // Restore context
228            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
229        }
230    }
231    
232    @Override
233    public void unindexSite(String siteName) throws IndexingException
234    {
235        unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE);
236        unindexSite(siteName, WebConstants.LIVE_WORKSPACE);
237        unindexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE);
238    }
239   
240    @Override
241    public void unindexSite(String siteName, String workspaceName) throws IndexingException
242    {
243        Request request = ContextHelper.getRequest(_context);
244        
245        // Retrieve the current workspace.
246        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
247        
248        try
249        {
250            // Force the workspace.
251            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
252            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true);
253            
254            getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName);
255            
256            _unindexSiteDocuments(siteName, workspaceName, solrClient);
257            
258        }
259        catch (Exception e)
260        {
261            String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName);
262            getLogger().error(error, e);
263            throw new IndexingException(error, e);
264        }
265        finally
266        {
267            // Restore context
268            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
269        }
270    }
271    
272    @Override
273    public void unindexSitemap(String siteName, String sitemapName) throws IndexingException
274    {
275        unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE);
276        unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE);
277    }
278
279    @Override
280    public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException
281    {
282        Request request = ContextHelper.getRequest(_context);
283        
284        // Retrieve the current workspace.
285        String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request);
286        
287        try
288        {
289            // Force the workspace.
290            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName);
291            SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName);
292            
293            getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName);
294            
295            _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient);
296        }
297        catch (Exception e)
298        {
299            String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName);
300            getLogger().error(error, e);
301            throw new IndexingException(error, e);
302        }
303        finally
304        {
305            // Restore context
306            RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp);
307        }
308    }
309    
310    private void _indexSitemaps(Site site, String workspaceName, SolrClient solrClient) throws Exception
311    {
312        // The sitemap node may not exist if site was created but not yet configured
313        if (site.getNode().hasNode(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL + ":sitemaps"))
314        {
315            for (Sitemap sitemap : site.getSitemaps())
316            {
317                getLogger().info("Indexing sitemap {} started", sitemap.getName());
318                
319                long start = System.currentTimeMillis();
320                
321                _indexSitemap(site.getName(), sitemap.getName(), workspaceName, solrClient);
322                
323                long end = System.currentTimeMillis();
324                
325                getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start);
326            }
327        }
328    }
329    
330    private void _indexSitemap(String siteName, String sitemapName, String workspaceName, SolrClient solrClient) throws Exception
331    {
332        // Get the sitemap in the given workspace.
333        Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName);
334        
335        // First delete the directory if exists
336        _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient);
337
338        // Index pages of this sitemap
339        for (Page page : sitemap.getChildrenPages())
340        {
341            // Index page recursively, without committing.
342            _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, solrClient);
343        }
344        
345        // Add additional sitemap documents
346        for (String docProviderId : _siteDocProviderEP.getExtensionsIds())
347        {
348            SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId);
349            
350            getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider);
351            
352            docProvider.indexSitemapDocuments(sitemap, solrClient);
353        }
354    }
355    
356    private void _unindexSiteDocuments(String siteName, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException
357    {
358        // query
359        String query = "site:" + ClientUtils.escapeQueryChars(siteName);
360        
361        // delete
362        String collectionName = _solrClientProvider.getCollectionName(workspaceName);
363        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
364        int status = solrResponse.getStatus();
365        
366        if (status != 0)
367        {
368            throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName);
369        }
370        
371        getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName);
372    }
373    
374    private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace, SolrClient solrClient) throws SolrServerException, IOException
375    {
376        // query
377        String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName);
378        
379        // delete
380        String collectionName = _solrClientProvider.getCollectionName(workspace);
381        UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query);
382        int status = solrResponse.getStatus();
383        
384        if (status != 0)
385        {
386            throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName);
387        }
388        
389        getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName);
390    }
391    
392    /**
393     * Index the contents of a site.
394     * @param site The site to index.
395     * @param workspaceName The workspace name
396     * @param solrClient The solr client to use
397     * @throws Exception If an error occurs indexing the contents.
398     */
399    protected void _indexContents(Site site, String workspaceName, SolrClient solrClient) throws Exception
400    {
401        _solrIndexer.indexContents(site.getContents(), workspaceName, true, solrClient);
402    }
403    
404    /**
405     * Index the resources of a site.
406     * @param site The site to index.
407     * @param workspaceName The workspace name
408     * @param solrClient The solr client to use
409     * @throws Exception If an error occurs indexing the resources.
410     */
411    protected void _indexResources(Site site, String workspaceName, SolrClient solrClient) throws Exception
412    {
413        try
414        {
415            _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, solrClient);
416        }
417        catch (UnknownAmetysObjectException e)
418        {
419            // Ignore if the resource root is not present.
420        }
421    }
422}