001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019 020import org.apache.avalon.framework.context.Context; 021import org.apache.avalon.framework.context.ContextException; 022import org.apache.avalon.framework.context.Contextualizable; 023import org.apache.avalon.framework.service.ServiceException; 024import org.apache.avalon.framework.service.ServiceManager; 025import org.apache.avalon.framework.service.Serviceable; 026import org.apache.cocoon.components.ContextHelper; 027import org.apache.cocoon.environment.Request; 028import org.apache.solr.client.solrj.SolrClient; 029import org.apache.solr.client.solrj.SolrServerException; 030import org.apache.solr.client.solrj.response.UpdateResponse; 031import org.apache.solr.client.solrj.util.ClientUtils; 032 033import org.ametys.cms.content.archive.ArchiveConstants; 034import org.ametys.cms.content.indexing.solr.SolrFieldNames; 035import org.ametys.cms.content.indexing.solr.SolrIndexer; 036import org.ametys.cms.indexing.IndexingException; 037import org.ametys.cms.search.solr.SolrClientProvider; 038import org.ametys.plugins.repository.RepositoryConstants; 039import org.ametys.plugins.repository.UnknownAmetysObjectException; 040import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector; 041import org.ametys.runtime.plugin.component.AbstractLogEnabled; 042import org.ametys.web.WebConstants; 043import org.ametys.web.indexing.SiteIndexer; 044import org.ametys.web.repository.page.Page; 045import org.ametys.web.repository.site.Site; 046import org.ametys.web.repository.site.SiteManager; 047import org.ametys.web.repository.sitemap.Sitemap; 048 049/** 050 * Solr implementation of {@link SiteIndexer}. 051 */ 052public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable 053{ 054 055 /** The site manager. */ 056 protected SiteManager _siteManager; 057 /** The solr indexer. */ 058 protected SolrIndexer _solrIndexer; 059 /** The solr page indexer. */ 060 protected SolrPageIndexer _solrPageIndexer; 061 /** The site document provider handler. */ 062 protected SiteDocumentProviderExtensionPoint _siteDocProviderEP; 063 /** The Solr client provider */ 064 protected SolrClientProvider _solrClientProvider; 065 066 private Context _context; 067 068 @Override 069 public void contextualize(Context context) throws ContextException 070 { 071 _context = context; 072 } 073 074 @Override 075 public void service(ServiceManager manager) throws ServiceException 076 { 077 _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE); 078 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 079 _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE); 080 _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE); 081 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 082 } 083 084 @Override 085 public void indexSite(Site site) throws IndexingException 086 { 087 indexSite(site.getName()); 088 } 089 090 @Override 091 public void indexSite(String siteName) throws IndexingException 092 { 093 indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE); 094 indexSite(siteName, WebConstants.LIVE_WORKSPACE); 095 indexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE); 096 } 097 098 @Override 099 public void indexSite(String siteName, String workspaceName) throws IndexingException 100 { 101 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false); 102 _indexSite(siteName, workspaceName, solrClient, true); 103 } 104 105 @Override 106 public void indexSite(String siteName, String workspaceName, SolrClient solrClient) throws IndexingException 107 { 108 // Pass false for commit as caller provided a SolrClient and thus will take care of commit operation (if it is a NoAutoCommit Solr client) 109 boolean commit = false; 110 _indexSite(siteName, workspaceName, solrClient, commit); 111 } 112 113 private void _indexSite(String siteName, String workspaceName, SolrClient solrClient, boolean commit) throws IndexingException 114 { 115 Request request = ContextHelper.getRequest(_context); 116 117 // Retrieve the current workspace. 118 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 119 // Retrieve the current site name. 120 String currentSiteName = (String) request.getAttribute("siteName"); 121 122 try 123 { 124 // Force the workspace. 125 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 126 127 // Get the site in the given workspace. 128 Site site = null; 129 try 130 { 131 site = _siteManager.getSite(siteName); 132 } 133 catch (UnknownAmetysObjectException e) 134 { 135 // Site might not exist in the desired workspace (archive for example) 136 return; 137 } 138 139 // Set the site name in the request. 140 request.setAttribute("siteName", siteName); 141 142 getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName); 143 144 // Delete all documents from this site in current workspace 145 _unindexSiteDocuments(siteName, workspaceName, solrClient); 146 147 // Index the site's contents in current workspace 148 _indexContents(site, workspaceName, solrClient); 149 150 // Index the site's sitemaps and pages in current workspace 151 _indexSitemaps(site, workspaceName, solrClient); 152 153 // Index the site's resources in current workspace 154 _indexResources(site, workspaceName, solrClient); 155 156 // Add additional site documents 157 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 158 { 159 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 160 161 getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider); 162 163 docProvider.indexSiteDocuments(site, solrClient); 164 } 165 166 if (commit) 167 { 168 _solrIndexer.commit(workspaceName, solrClient); 169 } 170 } 171 catch (Exception e) 172 { 173 String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName); 174 getLogger().error(error, e); 175 throw new IndexingException(error, e); 176 } 177 finally 178 { 179 // Restore the site name. 180 request.setAttribute("siteName", currentSiteName); 181 // Restore context 182 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 183 } 184 } 185 186 @Override 187 public void indexSitemap(Sitemap sitemap) throws IndexingException 188 { 189 indexSitemap(sitemap.getSiteName(), sitemap.getName()); 190 } 191 192 @Override 193 public void indexSitemap(String siteName, String sitemapName) throws IndexingException 194 { 195 indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 196 indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 197 } 198 199 @Override 200 public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 201 { 202 Request request = ContextHelper.getRequest(_context); 203 204 // Retrieve the current workspace. 205 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 206 207 try 208 { 209 // Force the workspace. 210 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 211 request.setAttribute("siteName", siteName); 212 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false); 213 214 getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 215 216 _indexSitemap(siteName, sitemapName, workspaceName, solrClient); 217 218 _solrIndexer.commit(workspaceName, solrClient); 219 } 220 catch (Exception e) 221 { 222 String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 223 getLogger().error(error, e); 224 throw new IndexingException(error, e); 225 } 226 finally 227 { 228 // Restore context 229 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 230 } 231 } 232 233 @Override 234 public void unindexSite(String siteName) throws IndexingException 235 { 236 unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE); 237 unindexSite(siteName, WebConstants.LIVE_WORKSPACE); 238 unindexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE); 239 } 240 241 @Override 242 public void unindexSite(String siteName, String workspaceName) throws IndexingException 243 { 244 Request request = ContextHelper.getRequest(_context); 245 246 // Retrieve the current workspace. 247 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 248 249 try 250 { 251 // Force the workspace. 252 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 253 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true); 254 255 getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName); 256 257 _unindexSiteDocuments(siteName, workspaceName, solrClient); 258 259 } 260 catch (Exception e) 261 { 262 String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName); 263 getLogger().error(error, e); 264 throw new IndexingException(error, e); 265 } 266 finally 267 { 268 // Restore context 269 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 270 } 271 } 272 273 @Override 274 public void unindexSitemap(String siteName, String sitemapName) throws IndexingException 275 { 276 unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 277 unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 278 } 279 280 @Override 281 public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 282 { 283 Request request = ContextHelper.getRequest(_context); 284 285 // Retrieve the current workspace. 286 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 287 288 try 289 { 290 // Force the workspace. 291 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 292 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 293 294 getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 295 296 _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient); 297 } 298 catch (Exception e) 299 { 300 String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 301 getLogger().error(error, e); 302 throw new IndexingException(error, e); 303 } 304 finally 305 { 306 // Restore context 307 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 308 } 309 } 310 311 private void _indexSitemaps(Site site, String workspaceName, SolrClient solrClient) throws Exception 312 { 313 // The sitemap node may not exist if site was created but not yet configured 314 if (site.getNode().hasNode(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL + ":sitemaps")) 315 { 316 for (Sitemap sitemap : site.getSitemaps()) 317 { 318 getLogger().info("Indexing sitemap {} started", sitemap.getName()); 319 320 long start = System.currentTimeMillis(); 321 322 _indexSitemap(site.getName(), sitemap.getName(), workspaceName, solrClient); 323 324 long end = System.currentTimeMillis(); 325 326 getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start); 327 } 328 } 329 } 330 331 private void _indexSitemap(String siteName, String sitemapName, String workspaceName, SolrClient solrClient) throws Exception 332 { 333 // Get the sitemap in the given workspace. 334 Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName); 335 336 // First delete the directory if exists 337 _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient); 338 339 // Index pages of this sitemap 340 for (Page page : sitemap.getChildrenPages()) 341 { 342 // Index page recursively, without committing. 343 _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, solrClient); 344 } 345 346 // Add additional sitemap documents 347 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 348 { 349 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 350 351 getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider); 352 353 docProvider.indexSitemapDocuments(sitemap, solrClient); 354 } 355 } 356 357 private void _unindexSiteDocuments(String siteName, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException 358 { 359 // query 360 String query = "site:" + ClientUtils.escapeQueryChars(siteName); 361 362 // delete 363 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 364 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 365 int status = solrResponse.getStatus(); 366 367 if (status != 0) 368 { 369 throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName); 370 } 371 372 getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName); 373 } 374 375 private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace, SolrClient solrClient) throws SolrServerException, IOException 376 { 377 // query 378 String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName); 379 380 // delete 381 String collectionName = _solrClientProvider.getCollectionName(workspace); 382 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 383 int status = solrResponse.getStatus(); 384 385 if (status != 0) 386 { 387 throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName); 388 } 389 390 getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName); 391 } 392 393 /** 394 * Index the contents of a site. 395 * @param site The site to index. 396 * @param workspaceName The workspace name 397 * @param solrClient The solr client to use 398 * @throws Exception If an error occurs indexing the contents. 399 */ 400 protected void _indexContents(Site site, String workspaceName, SolrClient solrClient) throws Exception 401 { 402 _solrIndexer.indexContents(site.getContents(), workspaceName, true, solrClient); 403 } 404 405 /** 406 * Index the resources of a site. 407 * @param site The site to index. 408 * @param workspaceName The workspace name 409 * @param solrClient The solr client to use 410 * @throws Exception If an error occurs indexing the resources. 411 */ 412 protected void _indexResources(Site site, String workspaceName, SolrClient solrClient) throws Exception 413 { 414 try 415 { 416 _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, solrClient); 417 } 418 catch (UnknownAmetysObjectException e) 419 { 420 // Ignore if the resource root is not present. 421 } 422 } 423}