001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019 020import org.apache.avalon.framework.context.Context; 021import org.apache.avalon.framework.context.ContextException; 022import org.apache.avalon.framework.context.Contextualizable; 023import org.apache.avalon.framework.service.ServiceException; 024import org.apache.avalon.framework.service.ServiceManager; 025import org.apache.avalon.framework.service.Serviceable; 026import org.apache.cocoon.components.ContextHelper; 027import org.apache.cocoon.environment.Request; 028import org.apache.solr.client.solrj.SolrClient; 029import org.apache.solr.client.solrj.SolrServerException; 030import org.apache.solr.client.solrj.response.UpdateResponse; 031import org.apache.solr.client.solrj.util.ClientUtils; 032 033import org.ametys.cms.content.archive.ArchiveConstants; 034import org.ametys.cms.content.indexing.solr.SolrFieldNames; 035import org.ametys.cms.content.indexing.solr.SolrIndexer; 036import org.ametys.cms.indexing.IndexingException; 037import org.ametys.cms.search.solr.SolrClientProvider; 038import org.ametys.plugins.repository.RepositoryConstants; 039import org.ametys.plugins.repository.UnknownAmetysObjectException; 040import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector; 041import org.ametys.runtime.plugin.component.AbstractLogEnabled; 042import org.ametys.web.WebConstants; 043import org.ametys.web.indexing.SiteIndexer; 044import org.ametys.web.repository.page.Page; 045import org.ametys.web.repository.site.Site; 046import org.ametys.web.repository.site.SiteManager; 047import org.ametys.web.repository.sitemap.Sitemap; 048 049/** 050 * Solr implementation of {@link SiteIndexer}. 051 */ 052public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable 053{ 054 055 /** The site manager. */ 056 protected SiteManager _siteManager; 057 /** The solr indexer. */ 058 protected SolrIndexer _solrIndexer; 059 /** The solr page indexer. */ 060 protected SolrPageIndexer _solrPageIndexer; 061 /** The site document provider handler. */ 062 protected SiteDocumentProviderExtensionPoint _siteDocProviderEP; 063 /** The Solr client provider */ 064 protected SolrClientProvider _solrClientProvider; 065 066 private Context _context; 067 068 @Override 069 public void contextualize(Context context) throws ContextException 070 { 071 _context = context; 072 } 073 074 @Override 075 public void service(ServiceManager manager) throws ServiceException 076 { 077 _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE); 078 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 079 _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE); 080 _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE); 081 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 082 } 083 084 @Override 085 public void indexSite(Site site) throws IndexingException 086 { 087 indexSite(site.getName()); 088 } 089 090 @Override 091 public void indexSite(String siteName) throws IndexingException 092 { 093 indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE); 094 indexSite(siteName, WebConstants.LIVE_WORKSPACE); 095 indexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE); 096 } 097 098 @Override 099 public void indexSite(String siteName, String workspaceName) throws IndexingException 100 { 101 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false); 102 _indexSite(siteName, workspaceName, solrClient, true); 103 } 104 105 @Override 106 public void indexSite(String siteName, String workspaceName, SolrClient solrClient) throws IndexingException 107 { 108 // Pass false for commitAndOptimize as caller provided a SolrClient and thus will take care of commit operation (if it is a NoAutoCommit Solr client) 109 boolean commitAndOptimize = false; 110 _indexSite(siteName, workspaceName, solrClient, commitAndOptimize); 111 } 112 113 private void _indexSite(String siteName, String workspaceName, SolrClient solrClient, boolean commitAndOptimize) throws IndexingException 114 { 115 Request request = ContextHelper.getRequest(_context); 116 117 // Retrieve the current workspace. 118 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 119 // Retrieve the current site name. 120 String currentSiteName = (String) request.getAttribute("siteName"); 121 122 try 123 { 124 // Force the workspace. 125 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 126 127 // Get the site in the given workspace. 128 Site site = _siteManager.getSite(siteName); 129 130 // Site might not exist in the desired workspace (archive for example) 131 if (site == null) 132 { 133 return; 134 } 135 136 // Set the site name in the request. 137 request.setAttribute("siteName", siteName); 138 139 getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName); 140 141 // Delete all documents from this site in current workspace 142 _unindexSiteDocuments(siteName, workspaceName, solrClient); 143 144 // Index the site's contents in current workspace 145 _indexContents(site, workspaceName, solrClient); 146 147 // Index the site's sitemaps and pages in current workspace 148 _indexSitemaps(site, workspaceName, solrClient); 149 150 // Index the site's resources in current workspace 151 _indexResources(site, workspaceName, solrClient); 152 153 // Add additional site documents 154 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 155 { 156 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 157 158 getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider); 159 160 docProvider.indexSiteDocuments(site, solrClient); 161 } 162 163 if (commitAndOptimize) 164 { 165 _solrIndexer.commit(workspaceName, solrClient); 166 _solrIndexer.optimize(workspaceName, solrClient); 167 } 168 } 169 catch (Exception e) 170 { 171 String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName); 172 getLogger().error(error, e); 173 throw new IndexingException(error, e); 174 } 175 finally 176 { 177 // Restore the site name. 178 request.setAttribute("siteName", currentSiteName); 179 // Restore context 180 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 181 } 182 } 183 184 @Override 185 public void indexSitemap(Sitemap sitemap) throws IndexingException 186 { 187 indexSitemap(sitemap.getSiteName(), sitemap.getName()); 188 } 189 190 @Override 191 public void indexSitemap(String siteName, String sitemapName) throws IndexingException 192 { 193 indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 194 indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 195 } 196 197 @Override 198 public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 199 { 200 Request request = ContextHelper.getRequest(_context); 201 202 // Retrieve the current workspace. 203 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 204 205 try 206 { 207 // Force the workspace. 208 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 209 request.setAttribute("siteName", siteName); 210 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false); 211 212 getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 213 214 _indexSitemap(siteName, sitemapName, workspaceName, solrClient); 215 216 _solrIndexer.commit(workspaceName, solrClient); 217 _solrIndexer.optimize(workspaceName, solrClient); 218 } 219 catch (Exception e) 220 { 221 String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 222 getLogger().error(error, e); 223 throw new IndexingException(error, e); 224 } 225 finally 226 { 227 // Restore context 228 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 229 } 230 } 231 232 @Override 233 public void unindexSite(String siteName) throws IndexingException 234 { 235 unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE); 236 unindexSite(siteName, WebConstants.LIVE_WORKSPACE); 237 unindexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE); 238 } 239 240 @Override 241 public void unindexSite(String siteName, String workspaceName) throws IndexingException 242 { 243 Request request = ContextHelper.getRequest(_context); 244 245 // Retrieve the current workspace. 246 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 247 248 try 249 { 250 // Force the workspace. 251 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 252 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true); 253 254 getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName); 255 256 _unindexSiteDocuments(siteName, workspaceName, solrClient); 257 258 } 259 catch (Exception e) 260 { 261 String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName); 262 getLogger().error(error, e); 263 throw new IndexingException(error, e); 264 } 265 finally 266 { 267 // Restore context 268 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 269 } 270 } 271 272 @Override 273 public void unindexSitemap(String siteName, String sitemapName) throws IndexingException 274 { 275 unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 276 unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 277 } 278 279 @Override 280 public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 281 { 282 Request request = ContextHelper.getRequest(_context); 283 284 // Retrieve the current workspace. 285 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 286 287 try 288 { 289 // Force the workspace. 290 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 291 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 292 293 getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 294 295 _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient); 296 } 297 catch (Exception e) 298 { 299 String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 300 getLogger().error(error, e); 301 throw new IndexingException(error, e); 302 } 303 finally 304 { 305 // Restore context 306 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 307 } 308 } 309 310 private void _indexSitemaps(Site site, String workspaceName, SolrClient solrClient) throws Exception 311 { 312 // The sitemap node may not exist if site was created but not yet configured 313 if (site.getNode().hasNode(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL + ":sitemaps")) 314 { 315 for (Sitemap sitemap : site.getSitemaps()) 316 { 317 getLogger().info("Indexing sitemap {} started", sitemap.getName()); 318 319 long start = System.currentTimeMillis(); 320 321 _indexSitemap(site.getName(), sitemap.getName(), workspaceName, solrClient); 322 323 long end = System.currentTimeMillis(); 324 325 getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start); 326 } 327 } 328 } 329 330 private void _indexSitemap(String siteName, String sitemapName, String workspaceName, SolrClient solrClient) throws Exception 331 { 332 // Get the sitemap in the given workspace. 333 Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName); 334 335 // First delete the directory if exists 336 _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient); 337 338 // Index pages of this sitemap 339 for (Page page : sitemap.getChildrenPages()) 340 { 341 // Index page recursively, without committing. 342 _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, solrClient); 343 } 344 345 // Add additional sitemap documents 346 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 347 { 348 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 349 350 getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider); 351 352 docProvider.indexSitemapDocuments(sitemap, solrClient); 353 } 354 } 355 356 private void _unindexSiteDocuments(String siteName, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException 357 { 358 // query 359 String query = "site:" + ClientUtils.escapeQueryChars(siteName); 360 361 // delete 362 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 363 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 364 int status = solrResponse.getStatus(); 365 366 if (status != 0) 367 { 368 throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName); 369 } 370 371 getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName); 372 } 373 374 private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace, SolrClient solrClient) throws SolrServerException, IOException 375 { 376 // query 377 String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName); 378 379 // delete 380 String collectionName = _solrClientProvider.getCollectionName(workspace); 381 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 382 int status = solrResponse.getStatus(); 383 384 if (status != 0) 385 { 386 throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName); 387 } 388 389 getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName); 390 } 391 392 /** 393 * Index the contents of a site. 394 * @param site The site to index. 395 * @param workspaceName The workspace name 396 * @param solrClient The solr client to use 397 * @throws Exception If an error occurs indexing the contents. 398 */ 399 protected void _indexContents(Site site, String workspaceName, SolrClient solrClient) throws Exception 400 { 401 _solrIndexer.indexContents(site.getContents(), workspaceName, true, solrClient); 402 } 403 404 /** 405 * Index the resources of a site. 406 * @param site The site to index. 407 * @param workspaceName The workspace name 408 * @param solrClient The solr client to use 409 * @throws Exception If an error occurs indexing the resources. 410 */ 411 protected void _indexResources(Site site, String workspaceName, SolrClient solrClient) throws Exception 412 { 413 try 414 { 415 _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, solrClient); 416 } 417 catch (UnknownAmetysObjectException e) 418 { 419 // Ignore if the resource root is not present. 420 } 421 } 422}