001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019 020import org.apache.avalon.framework.context.Context; 021import org.apache.avalon.framework.context.ContextException; 022import org.apache.avalon.framework.context.Contextualizable; 023import org.apache.avalon.framework.service.ServiceException; 024import org.apache.avalon.framework.service.ServiceManager; 025import org.apache.avalon.framework.service.Serviceable; 026import org.apache.cocoon.components.ContextHelper; 027import org.apache.cocoon.environment.Request; 028import org.apache.solr.client.solrj.SolrClient; 029import org.apache.solr.client.solrj.SolrServerException; 030import org.apache.solr.client.solrj.response.UpdateResponse; 031import org.apache.solr.client.solrj.util.ClientUtils; 032 033import org.ametys.cms.content.archive.ArchiveConstants; 034import org.ametys.cms.content.indexing.solr.SolrFieldNames; 035import org.ametys.cms.content.indexing.solr.SolrIndexer; 036import org.ametys.cms.indexing.IndexingException; 037import org.ametys.cms.search.solr.SolrClientProvider; 038import org.ametys.plugins.repository.AmetysObjectIterable; 039import org.ametys.plugins.repository.RepositoryConstants; 040import org.ametys.plugins.repository.UnknownAmetysObjectException; 041import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector; 042import org.ametys.runtime.plugin.component.AbstractLogEnabled; 043import org.ametys.web.WebConstants; 044import org.ametys.web.indexing.SiteIndexer; 045import org.ametys.web.repository.page.Page; 046import org.ametys.web.repository.site.Site; 047import org.ametys.web.repository.site.SiteManager; 048import org.ametys.web.repository.sitemap.Sitemap; 049 050/** 051 * Solr implementation of {@link SiteIndexer}. 052 */ 053public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable 054{ 055 056 /** The site manager. */ 057 protected SiteManager _siteManager; 058 /** The solr indexer. */ 059 protected SolrIndexer _solrIndexer; 060 /** The solr page indexer. */ 061 protected SolrPageIndexer _solrPageIndexer; 062 /** The site document provider handler. */ 063 protected SiteDocumentProviderExtensionPoint _siteDocProviderEP; 064 /** The Solr client provider */ 065 protected SolrClientProvider _solrClientProvider; 066 067 private Context _context; 068 069 @Override 070 public void contextualize(Context context) throws ContextException 071 { 072 _context = context; 073 } 074 075 @Override 076 public void service(ServiceManager manager) throws ServiceException 077 { 078 _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE); 079 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 080 _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE); 081 _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE); 082 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 083 } 084 085 @Override 086 public void indexSite(Site site) throws IndexingException 087 { 088 indexSite(site.getName()); 089 } 090 091 @Override 092 public void indexSite(String siteName) throws IndexingException 093 { 094 indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE); 095 indexSite(siteName, WebConstants.LIVE_WORKSPACE); 096 indexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE); 097 } 098 099 @Override 100 public void indexSite(String siteName, String workspaceName) throws IndexingException 101 { 102 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false); 103 _indexSite(siteName, workspaceName, solrClient, true); 104 } 105 106 @Override 107 public void indexSite(String siteName, String workspaceName, SolrClient solrClient) throws IndexingException 108 { 109 // Pass false for commit as caller provided a SolrClient and thus will take care of commit operation (if it is a NoAutoCommit Solr client) 110 boolean commit = false; 111 _indexSite(siteName, workspaceName, solrClient, commit); 112 } 113 114 private void _indexSite(String siteName, String workspaceName, SolrClient solrClient, boolean commit) throws IndexingException 115 { 116 Request request = ContextHelper.getRequest(_context); 117 118 // Retrieve the current workspace. 119 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 120 // Retrieve the current site name. 121 String currentSiteName = (String) request.getAttribute("siteName"); 122 123 try 124 { 125 // Force the workspace. 126 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 127 128 // Get the site in the given workspace. 129 Site site = null; 130 try 131 { 132 site = _siteManager.getSite(siteName); 133 } 134 catch (UnknownAmetysObjectException e) 135 { 136 // Site might not exist in the desired workspace (archive for example) 137 return; 138 } 139 140 // Set the site name in the request. 141 request.setAttribute("siteName", siteName); 142 143 getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName); 144 145 // Delete all documents from this site in current workspace 146 _unindexSiteDocuments(siteName, workspaceName, solrClient); 147 148 // Index the site's contents in current workspace 149 _indexContents(site, workspaceName, solrClient); 150 151 // Index the site's sitemaps and pages in current workspace 152 _indexSitemaps(site, workspaceName, solrClient); 153 154 // Index the site's resources in current workspace 155 _indexResources(site, workspaceName, solrClient); 156 157 // Add additional site documents 158 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 159 { 160 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 161 162 getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider); 163 164 docProvider.indexSiteDocuments(site, solrClient); 165 } 166 167 if (commit) 168 { 169 _solrIndexer.commit(workspaceName, solrClient); 170 } 171 } 172 catch (Exception e) 173 { 174 String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName); 175 getLogger().error(error, e); 176 throw new IndexingException(error, e); 177 } 178 finally 179 { 180 // Restore the site name. 181 request.setAttribute("siteName", currentSiteName); 182 // Restore context 183 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 184 } 185 } 186 187 @Override 188 public void indexSitemap(Sitemap sitemap) throws IndexingException 189 { 190 indexSitemap(sitemap.getSiteName(), sitemap.getName()); 191 } 192 193 @Override 194 public void indexSitemap(String siteName, String sitemapName) throws IndexingException 195 { 196 indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 197 indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 198 } 199 200 @Override 201 public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 202 { 203 Request request = ContextHelper.getRequest(_context); 204 205 // Retrieve the current workspace. 206 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 207 208 try 209 { 210 // Force the workspace. 211 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 212 request.setAttribute("siteName", siteName); 213 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, false); 214 215 getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 216 217 _indexSitemap(siteName, sitemapName, workspaceName, solrClient); 218 219 _solrIndexer.commit(workspaceName, solrClient); 220 } 221 catch (Exception e) 222 { 223 String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 224 getLogger().error(error, e); 225 throw new IndexingException(error, e); 226 } 227 finally 228 { 229 // Restore context 230 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 231 } 232 } 233 234 @Override 235 public void unindexSite(String siteName) throws IndexingException 236 { 237 unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE); 238 unindexSite(siteName, WebConstants.LIVE_WORKSPACE); 239 unindexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE); 240 } 241 242 @Override 243 public void unindexSite(String siteName, String workspaceName) throws IndexingException 244 { 245 Request request = ContextHelper.getRequest(_context); 246 247 // Retrieve the current workspace. 248 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 249 250 try 251 { 252 // Force the workspace. 253 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 254 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName, true); 255 256 getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName); 257 258 _unindexSiteDocuments(siteName, workspaceName, solrClient); 259 260 } 261 catch (Exception e) 262 { 263 String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName); 264 getLogger().error(error, e); 265 throw new IndexingException(error, e); 266 } 267 finally 268 { 269 // Restore context 270 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 271 } 272 } 273 274 @Override 275 public void unindexSitemap(String siteName, String sitemapName) throws IndexingException 276 { 277 unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 278 unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 279 } 280 281 @Override 282 public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 283 { 284 Request request = ContextHelper.getRequest(_context); 285 286 // Retrieve the current workspace. 287 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 288 289 try 290 { 291 // Force the workspace. 292 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 293 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 294 295 getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 296 297 _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient); 298 } 299 catch (Exception e) 300 { 301 String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 302 getLogger().error(error, e); 303 throw new IndexingException(error, e); 304 } 305 finally 306 { 307 // Restore context 308 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 309 } 310 } 311 312 private void _indexSitemaps(Site site, String workspaceName, SolrClient solrClient) throws Exception 313 { 314 // The sitemap node may not exist if site was created but not yet configured 315 if (site.getNode().hasNode(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL + ":sitemaps")) 316 { 317 AmetysObjectIterable<Sitemap> sitemaps = site.getSitemaps(); 318 for (Sitemap sitemap : sitemaps) 319 { 320 getLogger().info("Indexing sitemap {} started", sitemap.getName()); 321 322 long start = System.currentTimeMillis(); 323 324 _indexSitemap(site.getName(), sitemap.getName(), workspaceName, solrClient); 325 326 long end = System.currentTimeMillis(); 327 328 getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start); 329 } 330 } 331 } 332 333 private void _indexSitemap(String siteName, String sitemapName, String workspaceName, SolrClient solrClient) throws Exception 334 { 335 // Get the sitemap in the given workspace. 336 Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName); 337 338 // First delete the directory if exists 339 _unindexSitemapDocuments(siteName, sitemapName, workspaceName, solrClient); 340 341 // Index pages of this sitemap 342 AmetysObjectIterable<? extends Page> children = sitemap.getChildrenPages(); 343 for (Page page : children) 344 { 345 // Index page recursively, without committing. 346 _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, solrClient); 347 } 348 349 // Add additional sitemap documents 350 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 351 { 352 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 353 354 getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider); 355 356 docProvider.indexSitemapDocuments(sitemap, solrClient); 357 } 358 } 359 360 private void _unindexSiteDocuments(String siteName, String workspaceName, SolrClient solrClient) throws SolrServerException, IOException 361 { 362 // query 363 String query = "site:" + ClientUtils.escapeQueryChars(siteName); 364 365 // delete 366 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 367 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 368 int status = solrResponse.getStatus(); 369 370 if (status != 0) 371 { 372 throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName); 373 } 374 375 getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName); 376 } 377 378 private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace, SolrClient solrClient) throws SolrServerException, IOException 379 { 380 // query 381 String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName); 382 383 // delete 384 String collectionName = _solrClientProvider.getCollectionName(workspace); 385 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 386 int status = solrResponse.getStatus(); 387 388 if (status != 0) 389 { 390 throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName); 391 } 392 393 getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName); 394 } 395 396 /** 397 * Index the contents of a site. 398 * @param site The site to index. 399 * @param workspaceName The workspace name 400 * @param solrClient The solr client to use 401 * @throws Exception If an error occurs indexing the contents. 402 */ 403 protected void _indexContents(Site site, String workspaceName, SolrClient solrClient) throws Exception 404 { 405 _solrIndexer.indexContents(site.getContents(), workspaceName, true, solrClient); 406 } 407 408 /** 409 * Index the resources of a site. 410 * @param site The site to index. 411 * @param workspaceName The workspace name 412 * @param solrClient The solr client to use 413 * @throws Exception If an error occurs indexing the resources. 414 */ 415 protected void _indexResources(Site site, String workspaceName, SolrClient solrClient) throws Exception 416 { 417 try 418 { 419 _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, solrClient); 420 } 421 catch (UnknownAmetysObjectException e) 422 { 423 // Ignore if the resource root is not present. 424 } 425 } 426}