001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019 020import org.apache.avalon.framework.context.Context; 021import org.apache.avalon.framework.context.ContextException; 022import org.apache.avalon.framework.context.Contextualizable; 023import org.apache.avalon.framework.service.ServiceException; 024import org.apache.avalon.framework.service.ServiceManager; 025import org.apache.avalon.framework.service.Serviceable; 026import org.apache.cocoon.components.ContextHelper; 027import org.apache.cocoon.environment.Request; 028import org.apache.solr.client.solrj.SolrClient; 029import org.apache.solr.client.solrj.SolrServerException; 030import org.apache.solr.client.solrj.response.UpdateResponse; 031import org.apache.solr.client.solrj.util.ClientUtils; 032 033import org.ametys.cms.content.archive.ArchiveConstants; 034import org.ametys.cms.content.indexing.solr.SolrFieldNames; 035import org.ametys.cms.content.indexing.solr.SolrIndexer; 036import org.ametys.cms.indexing.IndexingException; 037import org.ametys.cms.search.solr.SolrClientProvider; 038import org.ametys.plugins.repository.RepositoryConstants; 039import org.ametys.plugins.repository.UnknownAmetysObjectException; 040import org.ametys.plugins.repository.provider.RequestAttributeWorkspaceSelector; 041import org.ametys.runtime.plugin.component.AbstractLogEnabled; 042import org.ametys.web.WebConstants; 043import org.ametys.web.indexing.SiteIndexer; 044import org.ametys.web.repository.page.Page; 045import org.ametys.web.repository.site.Site; 046import org.ametys.web.repository.site.SiteManager; 047import org.ametys.web.repository.sitemap.Sitemap; 048 049/** 050 * Solr implementation of {@link SiteIndexer}. 051 */ 052public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable 053{ 054 055 /** The site manager. */ 056 protected SiteManager _siteManager; 057 /** The solr indexer. */ 058 protected SolrIndexer _solrIndexer; 059 /** The solr page indexer. */ 060 protected SolrPageIndexer _solrPageIndexer; 061 /** The site document provider handler. */ 062 protected SiteDocumentProviderExtensionPoint _siteDocProviderEP; 063 /** The Solr client provider */ 064 protected SolrClientProvider _solrClientProvider; 065 066 private Context _context; 067 068 @Override 069 public void contextualize(Context context) throws ContextException 070 { 071 _context = context; 072 } 073 074 @Override 075 public void service(ServiceManager manager) throws ServiceException 076 { 077 _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE); 078 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 079 _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE); 080 _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE); 081 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 082 } 083 084 @Override 085 public void indexSite(Site site) throws IndexingException 086 { 087 indexSite(site.getName()); 088 } 089 090 @Override 091 public void indexSite (String siteName) throws IndexingException 092 { 093 indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE); 094 indexSite(siteName, WebConstants.LIVE_WORKSPACE); 095 indexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE); 096 } 097 098 @Override 099 public void indexSite(String siteName, String workspaceName) throws IndexingException 100 { 101 Request request = ContextHelper.getRequest(_context); 102 103 // Retrieve the current workspace. 104 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 105 // Retrieve the current site name. 106 String currentSiteName = (String) request.getAttribute("siteName"); 107 108 try 109 { 110 // Force the workspace. 111 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 112 113 // Get the site in the given workspace. 114 Site site = _siteManager.getSite(siteName); 115 116 // Site might not exist in the desired workspace (archive for example) 117 if (site == null) 118 { 119 return; 120 } 121 122 // Set the site name in the request. 123 request.setAttribute("siteName", siteName); 124 125 getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName); 126 127 // Delete all documents from this site in current workspace 128 _unindexSiteDocuments(siteName, workspaceName); 129 130 // Index the site's contents in current workspace 131 _indexContents(site, workspaceName); 132 133 // Index the site's sitemaps and pages in current workspace 134 _indexSitemaps(site, workspaceName); 135 136 // Index the site's resources in current workspace 137 _indexResources(site, workspaceName); 138 139 // Add additional site documents 140 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 141 { 142 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 143 144 getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider); 145 146 docProvider.indexSiteDocuments(site); 147 } 148 149 _solrIndexer.commit(workspaceName); 150 _solrIndexer.optimize(workspaceName); 151 } 152 catch (Exception e) 153 { 154 String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName); 155 getLogger().error(error, e); 156 throw new IndexingException(error, e); 157 } 158 finally 159 { 160 // Restore the site name. 161 request.setAttribute("siteName", currentSiteName); 162 // Restore context 163 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 164 } 165 } 166 167 @Override 168 public void indexSitemap(Sitemap sitemap) throws IndexingException 169 { 170 indexSitemap(sitemap.getSiteName(), sitemap.getName()); 171 } 172 173 @Override 174 public void indexSitemap(String siteName, String sitemapName) throws IndexingException 175 { 176 indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 177 indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 178 } 179 180 @Override 181 public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 182 { 183 Request request = ContextHelper.getRequest(_context); 184 185 // Retrieve the current workspace. 186 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 187 188 try 189 { 190 // Force the workspace. 191 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 192 request.setAttribute("siteName", siteName); 193 194 getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 195 196 _indexSitemap (siteName, sitemapName, workspaceName); 197 198 _solrIndexer.commit(workspaceName); 199 _solrIndexer.optimize(workspaceName); 200 } 201 catch (Exception e) 202 { 203 String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 204 getLogger().error(error, e); 205 throw new IndexingException(error, e); 206 } 207 finally 208 { 209 // Restore context 210 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 211 } 212 } 213 214 @Override 215 public void unindexSite(String siteName) throws IndexingException 216 { 217 unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE); 218 unindexSite(siteName, WebConstants.LIVE_WORKSPACE); 219 unindexSite(siteName, ArchiveConstants.ARCHIVE_WORKSPACE); 220 } 221 222 @Override 223 public void unindexSite(String siteName, String workspaceName) throws IndexingException 224 { 225 Request request = ContextHelper.getRequest(_context); 226 227 // Retrieve the current workspace. 228 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 229 230 try 231 { 232 // Force the workspace. 233 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 234 235 getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName); 236 237 _unindexSiteDocuments(siteName, workspaceName); 238 _solrIndexer.commit(workspaceName); 239 240 } 241 catch (Exception e) 242 { 243 String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName); 244 getLogger().error(error, e); 245 throw new IndexingException(error, e); 246 } 247 finally 248 { 249 // Restore context 250 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 251 } 252 } 253 254 @Override 255 public void unindexSitemap(String siteName, String sitemapName) throws IndexingException 256 { 257 unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 258 unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 259 } 260 261 @Override 262 public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 263 { 264 Request request = ContextHelper.getRequest(_context); 265 266 // Retrieve the current workspace. 267 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 268 269 try 270 { 271 // Force the workspace. 272 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 273 274 getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 275 276 _unindexSitemapDocuments(siteName, sitemapName, workspaceName); 277 278 _solrIndexer.commit(workspaceName); 279 } 280 catch (Exception e) 281 { 282 String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 283 getLogger().error(error, e); 284 throw new IndexingException(error, e); 285 } 286 finally 287 { 288 // Restore context 289 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 290 } 291 } 292 293 private void _indexSitemaps(Site site, String workspaceName) throws Exception 294 { 295 // The sitemap node may not exist if site was created but not yet configured 296 if (site.getNode().hasNode(RepositoryConstants.NAMESPACE_PREFIX_INTERNAL + ":sitemaps")) 297 { 298 for (Sitemap sitemap : site.getSitemaps()) 299 { 300 getLogger().info("Indexing sitemap {} started", sitemap.getName()); 301 302 long start = System.currentTimeMillis(); 303 304 _indexSitemap(site.getName(), sitemap.getName(), workspaceName); 305 306 long end = System.currentTimeMillis(); 307 308 getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start); 309 } 310 } 311 } 312 313 private void _indexSitemap (String siteName, String sitemapName, String workspaceName) throws Exception 314 { 315 // Get the sitemap in the given workspace. 316 Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName); 317 318 // First delete the directory if exists 319 _unindexSitemapDocuments(siteName, sitemapName, workspaceName); 320 321 // Index pages of this sitemap 322 for (Page page : sitemap.getChildrenPages()) 323 { 324 // Index page recursively, without committing. 325 _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, false); 326 } 327 328 // Add additional sitemap documents 329 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 330 { 331 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 332 333 getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider); 334 335 docProvider.indexSitemapDocuments(sitemap); 336 } 337 } 338 339 private void _unindexSiteDocuments(String siteName, String workspaceName) throws SolrServerException, IOException 340 { 341 // query 342 String query = "site:" + ClientUtils.escapeQueryChars(siteName); 343 344 // delete 345 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 346 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 347 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 348 int status = solrResponse.getStatus(); 349 350 if (status != 0) 351 { 352 throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName); 353 } 354 355 getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName); 356 } 357 358 private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace) throws SolrServerException, IOException 359 { 360 // query 361 String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName); 362 363 // delete 364 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspace); 365 String collectionName = _solrClientProvider.getCollectionName(workspace); 366 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 367 int status = solrResponse.getStatus(); 368 369 if (status != 0) 370 { 371 throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName); 372 } 373 374 getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName); 375 } 376 377 /** 378 * Index the contents of a site. 379 * @param site The site to index. 380 * @param workspaceName The workspace name 381 * @throws Exception If an error occurs indexing the contents. 382 */ 383 protected void _indexContents(Site site, String workspaceName) throws Exception 384 { 385 _solrIndexer.indexContents(site.getContents(), workspaceName, true, false); 386 } 387 388 /** 389 * Index the resources of a site. 390 * @param site The site to index. 391 * @param workspaceName The workspace name 392 * @throws Exception If an error occurs indexing the resources. 393 */ 394 protected void _indexResources(Site site, String workspaceName) throws Exception 395 { 396 try 397 { 398 _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, false); 399 } 400 catch (UnknownAmetysObjectException e) 401 { 402 // Ignore if the resource root is not present. 403 } 404 } 405}