001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019import java.util.List; 020import java.util.Map; 021 022import org.apache.avalon.framework.context.Context; 023import org.apache.avalon.framework.context.ContextException; 024import org.apache.avalon.framework.context.Contextualizable; 025import org.apache.avalon.framework.service.ServiceException; 026import org.apache.avalon.framework.service.ServiceManager; 027import org.apache.avalon.framework.service.Serviceable; 028import org.apache.cocoon.components.ContextHelper; 029import org.apache.cocoon.environment.Request; 030import org.apache.solr.client.solrj.SolrClient; 031import org.apache.solr.client.solrj.SolrServerException; 032import org.apache.solr.client.solrj.response.UpdateResponse; 033import org.apache.solr.client.solrj.util.ClientUtils; 034 035import org.ametys.cms.content.archive.ArchiveConstants; 036import org.ametys.cms.content.indexing.solr.SolrFieldNames; 037import org.ametys.cms.content.indexing.solr.SolrIndexer; 038import org.ametys.cms.indexing.IndexingException; 039import org.ametys.cms.search.solr.SolrClientProvider; 040import org.ametys.core.schedule.progression.ContainerProgressionTracker; 041import org.ametys.core.schedule.progression.ProgressionTrackerFactory; 042import org.ametys.core.schedule.progression.SimpleProgressionTracker; 043import org.ametys.plugins.repository.AmetysObjectIterable; 044import org.ametys.plugins.repository.AmetysRepositoryException; 045import org.ametys.plugins.repository.RepositoryConstants; 046import org.ametys.plugins.repository.UnknownAmetysObjectException; 047import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector; 048import org.ametys.runtime.i18n.I18nizableText; 049import org.ametys.runtime.plugin.component.AbstractLogEnabled; 050import org.ametys.web.WebConstants; 051import org.ametys.web.indexing.SiteIndexer; 052import org.ametys.web.repository.page.Page; 053import org.ametys.web.repository.site.Site; 054import org.ametys.web.repository.site.SiteManager; 055import org.ametys.web.repository.sitemap.Sitemap; 056 057/** 058 * Solr implementation of {@link SiteIndexer}. 059 */ 060public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable 061{ 062 /** The site manager. */ 063 protected SiteManager _siteManager; 064 /** The solr indexer. */ 065 protected SolrIndexer _solrIndexer; 066 /** The solr page indexer. */ 067 protected SolrPageIndexer _solrPageIndexer; 068 /** The site document provider handler. */ 069 protected SiteDocumentProviderExtensionPoint _siteDocProviderEP; 070 /** The Solr client provider */ 071 protected SolrClientProvider _solrClientProvider; 072 073 private Context _context; 074 075 @Override 076 public void contextualize(Context context) throws ContextException 077 { 078 _context = context; 079 } 080 081 @Override 082 public void service(ServiceManager manager) throws ServiceException 083 { 084 _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE); 085 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 086 _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE); 087 _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE); 088 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 089 } 090 091 @Override 092 public void indexSite(Site site) throws IndexingException 093 { 094 indexSite(site, ProgressionTrackerFactory.createContainerProgressionTracker("Index site '" + site.getName() + "'", getLogger())); 095 } 096 097 @Override 098 public void indexSite(Site site, ContainerProgressionTracker progressionTracker) throws IndexingException 099 { 100 indexSite(site.getName(), progressionTracker); 101 } 102 103 @Override 104 public void indexSite(String siteName) throws IndexingException 105 { 106 indexSite(siteName, ProgressionTrackerFactory.createContainerProgressionTracker("Index site '" + siteName + "'", getLogger())); 107 } 108 109 /** 110 * Index a site in all workspaces 111 * @param siteName the name of the site to index. 112 * @param progressionTracker The progression of the indexation 113 * @throws IndexingException If an error occurs while indexing the site. 114 */ 115 public void indexSite(String siteName, ContainerProgressionTracker progressionTracker) throws IndexingException 116 { 117 progressionTracker.addContainerStep(RepositoryConstants.DEFAULT_WORKSPACE, new I18nizableText("plugin.web", "PLUGINS_WEB_SCHEDULABLE_SITE_INDEXATION_STEPS_LABEL", List.of(RepositoryConstants.DEFAULT_WORKSPACE))); 118 progressionTracker.addContainerStep(WebConstants.LIVE_WORKSPACE, new I18nizableText("plugin.web", "PLUGINS_WEB_SCHEDULABLE_SITE_INDEXATION_STEPS_LABEL", List.of(WebConstants.LIVE_WORKSPACE))); 119 progressionTracker.addContainerStep(ArchiveConstants.ARCHIVE_WORKSPACE, new I18nizableText("plugin.web", "PLUGINS_WEB_SCHEDULABLE_SITE_INDEXATION_STEPS_LABEL", List.of(ArchiveConstants.ARCHIVE_WORKSPACE))); 120 121 indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE, (ContainerProgressionTracker) progressionTracker.getStep(RepositoryConstants.DEFAULT_WORKSPACE)); 122 indexSite(siteName, WebConstants.LIVE_WORKSPACE, (ContainerProgressionTracker) progressionTracker.getStep(WebConstants.LIVE_WORKSPACE)); 123 indexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE, (ContainerProgressionTracker) progressionTracker.getStep(ArchiveConstants.ARCHIVE_WORKSPACE)); 124 } 125 126 @Override 127 public void indexSite(String siteName, String workspaceName) throws IndexingException 128 { 129 indexSite(siteName, workspaceName, ProgressionTrackerFactory.createContainerProgressionTracker("Index site '" + siteName + "' for workspace " + workspaceName, getLogger())); 130 } 131 132 @Override 133 public void indexSite(String siteName, String workspaceName, ContainerProgressionTracker progressionTracker) throws IndexingException 134 { 135 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false); 136 _indexSite(siteName, workspaceName, solrClient, true, progressionTracker); 137 } 138 139 @Override 140 public void indexSite(String siteName, String workspaceName, SolrClient solrClient, ContainerProgressionTracker progressionTracker) throws IndexingException 141 { 142 // Pass false for commit as caller provided a SolrClient and thus will take care of commit operation (if it is a NoAutoCommit Solr client) 143 boolean commit = false; 144 _indexSite(siteName, workspaceName, solrClient, commit, progressionTracker); 145 } 146 147 private void _createProgressionTrackerStepsForSubIndexSite(ContainerProgressionTracker progressionTracker, boolean commit) throws AmetysRepositoryException 148 { 149 progressionTracker.addSimpleStep("unindexing", new I18nizableText("plugin.cms", "PLUGINS_CMS_SCHEDULER_GLOBAL_INDEXATION_UNINDEXING_DOCUMENT_SUB_STEP_LABEL")); 150 151 progressionTracker.addSimpleStep("contents", new I18nizableText("plugin.cms", "PLUGINS_CMS_SCHEDULER_GLOBAL_INDEXATION_CONTENT_STEP_LABEL")); 152 153 progressionTracker.addSimpleStep("sitemaps", new I18nizableText("plugin.web", "PLUGINS_WEB_SCHEDULABLE_SITE_INDEXATION_SITEMAPS_STEP_LABEL")); 154 155 progressionTracker.addSimpleStep("resources", new I18nizableText("plugin.cms", "PLUGINS_CMS_SCHEDULER_GLOBAL_INDEXATION_RESOURCES_STEP_LABEL")); 156 157 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 158 { 159 SiteDocumentProvider extension = _siteDocProviderEP.getExtension(docProviderId); 160 progressionTracker.addContainerStep("documents", new I18nizableText("plugin.cms", "PLUGINS_CMS_SCHEDULER_GLOBAL_INDEXATION_DOCUMENTS_PROVIDER_STEPS_LABEL", Map.of("0", extension.getLabel()))); 161 } 162 163 if (commit) 164 { 165 progressionTracker.addSimpleStep("commit", new I18nizableText("plugin.cms", "PLUGINS_CMS_SCHEDULER_GLOBAL_INDEXATION_SAVING_SUB_STEP_LABEL")); 166 } 167 } 168 169 private void _indexSite(String siteName, String workspaceName, SolrClient solrClient, boolean commit, ContainerProgressionTracker progressionTracker) throws IndexingException 170 { 171 Request request = ContextHelper.getRequest(_context); 172 173 // Retrieve the current workspace. 174 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 175 // Retrieve the current site name. 176 String currentSiteName = (String) request.getAttribute("siteName"); 177 178 try 179 { 180 // Force the workspace. 181 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 182 183 // Get the site in the given workspace. 184 Site site = null; 185 try 186 { 187 site = _siteManager.getSite(siteName); 188 } 189 catch (UnknownAmetysObjectException e) 190 { 191 // Site might not exist in the desired workspace (archive for example) 192 return; 193 } 194 195 _createProgressionTrackerStepsForSubIndexSite(progressionTracker, commit); 196 197 // Set the site name in the request. 198 request.setAttribute("siteName", siteName); 199 200 getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName); 201 202 // Delete all documents from this site in current workspace 203 _unindexSiteDocuments(siteName, workspaceName, solrClient, progressionTracker.getStep("unindexing")); 204 205 // Index the site's contents in current workspace 206 _indexContents(site, workspaceName, solrClient, progressionTracker.getStep("contents")); 207 208 // Index the site's sitemaps and pages in current workspace 209 _indexSitemaps(site, workspaceName, solrClient, progressionTracker.getStep("sitemaps")); 210 211 // Index the site's resources in current workspace 212 _indexResources(site, workspaceName, solrClient, progressionTracker.getStep("resources")); 213 214 // Add additional site documents 215 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 216 { 217 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 218 219 getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider); 220 221 docProvider.indexSiteDocuments(site, solrClient, progressionTracker.getStep("documents-" + docProviderId)); 222 } 223 224 if (commit) 225 { 226 _solrIndexer.commit(workspaceName, solrClient); 227 ((SimpleProgressionTracker) progressionTracker.getStep("commit")).increment(); 228 } 229 } 230 catch (Exception e) 231 { 232 String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName); 233 getLogger().error(error, e); 234 throw new IndexingException(error, e); 235 } 236 finally 237 { 238 // Restore the site name. 239 request.setAttribute("siteName", currentSiteName); 240 // Restore context 241 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 242 } 243 } 244 245 @Override 246 public void indexSitemap(Sitemap sitemap) throws IndexingException 247 { 248 indexSitemap(sitemap.getSiteName(), sitemap.getName()); 249 } 250 251 @Override 252 public void indexSitemap(String siteName, String sitemapName) throws IndexingException 253 { 254 indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 255 indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 256 } 257 258 @Override 259 public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 260 { 261 Request request = ContextHelper.getRequest(_context); 262 263 // Retrieve the current workspace. 264 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 265 266 try 267 { 268 // Force the workspace. 269 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 270 request.setAttribute("siteName", siteName); 271 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false); 272 273 getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 274 275 _indexSitemap(siteName, sitemapName, workspaceName, solrClient); 276 277 _solrIndexer.commit(workspaceName, solrClient); 278 } 279 catch (Exception e) 280 { 281 String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 282 getLogger().error(error, e); 283 throw new IndexingException(error, e); 284 } 285 finally 286 { 287 // Restore context 288 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 289 } 290 } 291 292 @Override 293 public void unindexSite(String siteName) throws IndexingException 294 { 295 unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE); 296 unindexSite(siteName, WebConstants.LIVE_WORKSPACE); 297 unindexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE); 298 } 299 300 @Override 301 public void unindexSite(String siteName, String workspaceName) throws IndexingException 302 { 303 Request request = ContextHelper.getRequest(_context); 304 305 // Retrieve the current workspace. 306 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 307 308 try 309 { 310 // Force the workspace. 311 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 312 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true); 313 314 getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName); 315 316 _unindexSiteDocuments(siteName, workspaceName, solrClient); 317 318 } 319 catch (Exception e) 320 { 321 String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName); 322 getLogger().error(error, e); 323 throw new IndexingException(error, e); 324 } 325 finally 326 { 327 // Restore context 328 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 329 } 330 } 331 332 @Override 333 public void unindexSitemap(String siteName, String sitemapName) throws IndexingException 334 { 335 unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 336 unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 337 } 338 339 @Override 340 public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 341 { 342 Request request = ContextHelper.getRequest(_context); 343 344 // Retrieve the current workspace. 345 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 346 347 try 348 { 349 // Force the workspace. 350 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 351 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 352 353 getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 354 355 _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient); 356 } 357 catch (Exception e) 358 { 359 String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 360 getLogger().error(error, e); 361 throw new IndexingException(error, e); 362 } 363 finally 364 { 365 // Restore context 366 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 367 } 368 } 369 370 private void _indexSitemaps(Site site, String workspaceName, SolrClient solrClient, SimpleProgressionTracker progressionTracker) throws Exception 371 { 372 // The sitemap node may not exist if site was created but not yet configured 373 if (site.getNode().hasNode(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL + ":sitemaps")) 374 { 375 376 AmetysObjectIterable<Sitemap> sitemaps = site.getSitemaps(); 377 long nbOfSitemaps = sitemaps.getSize(); 378 379 progressionTracker.setSize(nbOfSitemaps); 380 381 for (Sitemap sitemap : sitemaps) 382 { 383 getLogger().info("Indexing sitemap {} started", sitemap.getName()); 384 385 long start = System.currentTimeMillis(); 386 387 _indexSitemap(site.getName(), sitemap.getName(), workspaceName, solrClient); 388 389 long end = System.currentTimeMillis(); 390 391 getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start); 392 393 progressionTracker.increment(); 394 } 395 } 396 } 397 398 private void _indexSitemap(String siteName, String sitemapName, String workspaceName, SolrClient solrClient) throws Exception 399 { 400 // Get the sitemap in the given workspace. 401 Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName); 402 403 // First delete the directory if exists 404 _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient); 405 406 // Index pages of this sitemap 407 AmetysObjectIterable<? extends Page> children = sitemap.getChildrenPages(); 408 for (Page page : children) 409 { 410 // Index page recursively, without committing. 411 _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, solrClient); 412 } 413 414 // Add additional sitemap documents 415 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 416 { 417 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 418 419 getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider); 420 421 docProvider.indexSitemapDocuments(sitemap, solrClient); 422 } 423 } 424 425 private void _unindexSiteDocuments(String siteName, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException 426 { 427 _unindexSiteDocuments(siteName, workspaceName, solrClient, ProgressionTrackerFactory.createSimpleProgressionTracker("Unindex site document for site '" + siteName + "' for workspace " + workspaceName, getLogger())); 428 } 429 430 private void _unindexSiteDocuments(String siteName, String workspaceName, SolrClient solrClient, SimpleProgressionTracker progressionTracker) throws SolrServerException, IOException 431 { 432 // query 433 String query = "site:" + ClientUtils.escapeQueryChars(siteName); 434 435 // delete 436 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 437 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 438 int status = solrResponse.getStatus(); 439 440 if (status != 0) 441 { 442 throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName); 443 } 444 445 getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName); 446 447 progressionTracker.increment(); 448 } 449 450 private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace, SolrClient solrClient) throws SolrServerException, IOException 451 { 452 // query 453 String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName); 454 455 // delete 456 String collectionName = _solrClientProvider.getCollectionName(workspace); 457 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 458 int status = solrResponse.getStatus(); 459 460 if (status != 0) 461 { 462 throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName); 463 } 464 465 getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName); 466 } 467 468 /** 469 * Index the contents of a site. 470 * @param site The site to index. 471 * @param workspaceName The workspace name 472 * @param solrClient The solr client to use 473 * @param progressionTracker The progression of the indexation 474 * @throws Exception If an error occurs indexing the contents. 475 */ 476 protected void _indexContents(Site site, String workspaceName, SolrClient solrClient, SimpleProgressionTracker progressionTracker) throws Exception 477 { 478 _solrIndexer.indexContents(site.getContents(), workspaceName, true, solrClient, progressionTracker); 479 } 480 481 /** 482 * Index the resources of a site. 483 * @param site The site to index. 484 * @param workspaceName The workspace name 485 * @param solrClient The solr client to use 486 * @param progressionTracker The progression of the indexation 487 * @throws Exception If an error occurs indexing the resources. 488 */ 489 protected void _indexResources(Site site, String workspaceName, SolrClient solrClient, SimpleProgressionTracker progressionTracker) throws Exception 490 { 491 try 492 { 493 _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, solrClient, progressionTracker); 494 } 495 catch (UnknownAmetysObjectException e) 496 { 497 // Ignore if the resource root is not present. 498 progressionTracker.increment(); 499 } 500 } 501}