001/* 002 * Copyright 2015 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.indexing.solr; 017 018import java.io.IOException; 019 020import org.apache.avalon.framework.context.Context; 021import org.apache.avalon.framework.context.ContextException; 022import org.apache.avalon.framework.context.Contextualizable; 023import org.apache.avalon.framework.service.ServiceException; 024import org.apache.avalon.framework.service.ServiceManager; 025import org.apache.avalon.framework.service.Serviceable; 026import org.apache.cocoon.components.ContextHelper; 027import org.apache.cocoon.environment.Request; 028import org.apache.solr.client.solrj.SolrClient; 029import org.apache.solr.client.solrj.SolrServerException; 030import org.apache.solr.client.solrj.response.UpdateResponse; 031import org.apache.solr.client.solrj.util.ClientUtils; 032 033import org.ametys.cms.content.indexing.solr.SolrFieldNames; 034import org.ametys.cms.content.indexing.solr.SolrIndexer; 035import org.ametys.cms.indexing.IndexingException; 036import org.ametys.cms.repository.RequestAttributeWorkspaceSelector; 037import org.ametys.cms.search.solr.SolrClientProvider; 038import org.ametys.plugins.repository.RepositoryConstants; 039import org.ametys.plugins.repository.UnknownAmetysObjectException; 040import org.ametys.runtime.plugin.component.AbstractLogEnabled; 041import org.ametys.web.WebConstants; 042import org.ametys.web.indexing.SiteIndexer; 043import org.ametys.web.repository.page.Page; 044import org.ametys.web.repository.site.Site; 045import org.ametys.web.repository.site.SiteManager; 046import org.ametys.web.repository.sitemap.Sitemap; 047 048/** 049 * Solr implementation of {@link SiteIndexer}. 050 */ 051public class SolrSiteIndexer extends AbstractLogEnabled implements SiteIndexer, Serviceable, Contextualizable 052{ 053 054 /** The site manager. */ 055 protected SiteManager _siteManager; 056 /** The solr indexer. */ 057 protected SolrIndexer _solrIndexer; 058 /** The solr page indexer. */ 059 protected SolrPageIndexer _solrPageIndexer; 060 /** The site document provider handler. */ 061 protected SiteDocumentProviderExtensionPoint _siteDocProviderEP; 062 /** The Solr client provider */ 063 protected SolrClientProvider _solrClientProvider; 064 065 private Context _context; 066 067 @Override 068 public void contextualize(Context context) throws ContextException 069 { 070 _context = context; 071 } 072 073 @Override 074 public void service(ServiceManager manager) throws ServiceException 075 { 076 _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE); 077 _solrIndexer = (SolrIndexer) manager.lookup(SolrIndexer.ROLE); 078 _solrPageIndexer = (SolrPageIndexer) manager.lookup(SolrPageIndexer.ROLE); 079 _siteDocProviderEP = (SiteDocumentProviderExtensionPoint) manager.lookup(SiteDocumentProviderExtensionPoint.ROLE); 080 _solrClientProvider = (SolrClientProvider) manager.lookup(SolrClientProvider.ROLE); 081 } 082 083 @Override 084 public void indexSite(Site site) throws IndexingException 085 { 086 indexSite(site.getName()); 087 } 088 089 @Override 090 public void indexSite (String siteName) throws IndexingException 091 { 092 indexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE); 093 indexSite(siteName, WebConstants.LIVE_WORKSPACE); 094 // TODO index archives workspace 095 } 096 097 @Override 098 public void indexSite(String siteName, String workspaceName) throws IndexingException 099 { 100 Request request = ContextHelper.getRequest(_context); 101 102 // Retrieve the current workspace. 103 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 104 // Retrieve the current site name. 105 String currentSiteName = (String) request.getAttribute("siteName"); 106 107 try 108 { 109 // Force the workspace. 110 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 111 112 // Get the site in the given workspace. 113 Site site = _siteManager.getSite(siteName); 114 115 // Site might not exist in the desired workspace (archive for example) 116 if (site == null) 117 { 118 return; 119 } 120 121 // Set the site name in the request. 122 request.setAttribute("siteName", siteName); 123 124 getLogger().info("Indexing site '{}' in workspace '{}'", siteName, workspaceName); 125 126 // Delete all documents from this site in current workspace 127 _unindexSiteDocuments(siteName, workspaceName); 128 129 // Index the site's contents in current workspace 130 _indexContents(site, workspaceName); 131 132 // Index the site's sitemaps and pages in current workspace 133 _indexSitemaps(site, workspaceName); 134 135 // Index the site's resources in current workspace 136 _indexResources(site, workspaceName); 137 138 // Add additional site documents 139 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 140 { 141 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 142 143 getLogger().debug("Indexing additional documents for site {} with provider {}", siteName, docProvider); 144 145 docProvider.indexSiteDocuments(site); 146 } 147 148 _solrIndexer.commit(workspaceName); 149 _solrIndexer.optimize(workspaceName); 150 } 151 catch (Exception e) 152 { 153 String error = String.format("Failed to index site %s in workspace %s", siteName, workspaceName); 154 getLogger().error(error, e); 155 throw new IndexingException(error, e); 156 } 157 finally 158 { 159 // Restore the site name. 160 request.setAttribute("siteName", currentSiteName); 161 // Restore context 162 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 163 } 164 } 165 166 @Override 167 public void indexSitemap(Sitemap sitemap) throws IndexingException 168 { 169 indexSitemap(sitemap.getSiteName(), sitemap.getName()); 170 } 171 172 @Override 173 public void indexSitemap(String siteName, String sitemapName) throws IndexingException 174 { 175 indexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 176 indexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 177 } 178 179 @Override 180 public void indexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 181 { 182 Request request = ContextHelper.getRequest(_context); 183 184 // Retrieve the current workspace. 185 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 186 187 try 188 { 189 // Force the workspace. 190 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 191 request.setAttribute("siteName", siteName); 192 193 getLogger().info("Indexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 194 195 _indexSitemap (siteName, sitemapName, workspaceName); 196 197 _solrIndexer.commit(workspaceName); 198 _solrIndexer.optimize(workspaceName); 199 } 200 catch (Exception e) 201 { 202 String error = String.format("Failed to index sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 203 getLogger().error(error, e); 204 throw new IndexingException(error, e); 205 } 206 finally 207 { 208 // Restore context 209 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 210 } 211 } 212 213 @Override 214 public void unindexSite(String siteName) throws IndexingException 215 { 216 unindexSite(siteName, RepositoryConstants.DEFAULT_WORKSPACE); 217 unindexSite(siteName, WebConstants.LIVE_WORKSPACE); 218 // TODO unindex archives workspace 219 } 220 221 @Override 222 public void unindexSite(String siteName, String workspaceName) throws IndexingException 223 { 224 Request request = ContextHelper.getRequest(_context); 225 226 // Retrieve the current workspace. 227 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 228 229 try 230 { 231 // Force the workspace. 232 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 233 234 getLogger().info("Unindexing site '{}' in workspace {}", siteName, workspaceName); 235 236 _unindexSiteDocuments(siteName, workspaceName); 237 _solrIndexer.commit(workspaceName); 238 239 } 240 catch (Exception e) 241 { 242 String error = String.format("Failed to unindex the site %s for workspace %s", siteName, workspaceName); 243 getLogger().error(error, e); 244 throw new IndexingException(error, e); 245 } 246 finally 247 { 248 // Restore context 249 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 250 } 251 } 252 253 @Override 254 public void unindexSitemap(String siteName, String sitemapName) throws IndexingException 255 { 256 unindexSitemap(siteName, sitemapName, RepositoryConstants.DEFAULT_WORKSPACE); 257 unindexSitemap(siteName, sitemapName, WebConstants.LIVE_WORKSPACE); 258 } 259 260 @Override 261 public void unindexSitemap(String siteName, String sitemapName, String workspaceName) throws IndexingException 262 { 263 Request request = ContextHelper.getRequest(_context); 264 265 // Retrieve the current workspace. 266 String currentWsp = RequestAttributeWorkspaceSelector.getForcedWorkspace(request); 267 268 try 269 { 270 // Force the workspace. 271 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, workspaceName); 272 273 getLogger().info("Unindexing sitemap '{}' of site '{}' in workspace '{}'", sitemapName, siteName, workspaceName); 274 275 _unindexSitemapDocuments(siteName, sitemapName, workspaceName); 276 277 _solrIndexer.commit(workspaceName); 278 } 279 catch (Exception e) 280 { 281 String error = String.format("Failed to unindex sitemap '%s' for site '%s' and workspace '%s'", sitemapName, siteName, workspaceName); 282 getLogger().error(error, e); 283 throw new IndexingException(error, e); 284 } 285 finally 286 { 287 // Restore context 288 RequestAttributeWorkspaceSelector.setForcedWorkspace(request, currentWsp); 289 } 290 } 291 292 private void _indexSitemaps(Site site, String workspaceName) throws Exception 293 { 294 for (Sitemap sitemap : site.getSitemaps()) 295 { 296 getLogger().info("Indexing sitemap {} started", sitemap.getName()); 297 298 long start = System.currentTimeMillis(); 299 300 _indexSitemap(site.getName(), sitemap.getName(), workspaceName); 301 302 long end = System.currentTimeMillis(); 303 304 getLogger().info("Indexing sitemap {} ended in {} ms", sitemap.getName(), end - start); 305 } 306 } 307 308 private void _indexSitemap (String siteName, String sitemapName, String workspaceName) throws Exception 309 { 310 // Get the sitemap in the given workspace. 311 Sitemap sitemap = _siteManager.getSite(siteName).getSitemap(sitemapName); 312 313 // First delete the directory if exists 314 _unindexSitemapDocuments(siteName, sitemapName, workspaceName); 315 316 // Index pages of this sitemap 317 for (Page page : sitemap.getChildrenPages()) 318 { 319 // Index page recursively, without committing. 320 _solrPageIndexer.indexPage(page.getId(), workspaceName, true, true, false); 321 } 322 323 // Add additional sitemap documents 324 for (String docProviderId : _siteDocProviderEP.getExtensionsIds()) 325 { 326 SiteDocumentProvider docProvider = _siteDocProviderEP.getExtension(docProviderId); 327 328 getLogger().debug("Indexing additional documents for sitemap {} with provider {}", sitemapName, docProvider); 329 330 docProvider.indexSitemapDocuments(sitemap); 331 } 332 } 333 334 private void _unindexSiteDocuments(String siteName, String workspaceName) throws SolrServerException, IOException 335 { 336 // query 337 String query = "site:" + ClientUtils.escapeQueryChars(siteName); 338 339 // delete 340 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspaceName); 341 String collectionName = _solrClientProvider.getCollectionName(workspaceName); 342 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 343 int status = solrResponse.getStatus(); 344 345 if (status != 0) 346 { 347 throw new IOException("Ametys Site de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Site name : " + siteName); 348 } 349 350 getLogger().debug("Successful site de-indexing. Site identifier : {}", siteName); 351 } 352 353 private void _unindexSitemapDocuments(String siteName, String sitemapName, String workspace) throws SolrServerException, IOException 354 { 355 // query 356 String query = "site:" + ClientUtils.escapeQueryChars(siteName) + " AND sitemap:" + ClientUtils.escapeQueryChars(sitemapName); 357 358 // delete 359 SolrClient solrClient = _solrClientProvider.getUpdateClient(workspace); 360 String collectionName = _solrClientProvider.getCollectionName(workspace); 361 UpdateResponse solrResponse = solrClient.deleteByQuery(collectionName, query); 362 int status = solrResponse.getStatus(); 363 364 if (status != 0) 365 { 366 throw new IOException("Ametys sitemap de-indexing - Expecting status code of '0' in the Solr response but got : '" + status + "'. Sitemap name : " + siteName + "/" + sitemapName); 367 } 368 369 getLogger().debug("Successful sitemap de-indexing. Sitemap name : {}/{}", siteName, sitemapName); 370 } 371 372 /** 373 * Index the contents of a site. 374 * @param site The site to index. 375 * @param workspaceName The workspace name 376 * @throws Exception If an error occurs indexing the contents. 377 */ 378 protected void _indexContents(Site site, String workspaceName) throws Exception 379 { 380 _solrIndexer.indexContents(site.getContents(), workspaceName, false); 381 } 382 383 /** 384 * Index the resources of a site. 385 * @param site The site to index. 386 * @param workspaceName The workspace name 387 * @throws Exception If an error occurs indexing the resources. 388 */ 389 protected void _indexResources(Site site, String workspaceName) throws Exception 390 { 391 try 392 { 393 _solrIndexer.indexResources(site.getResources(), SolrFieldNames.TYPE_RESOURCE, site.getRootResources(), workspaceName, false); 394 } 395 catch (UnknownAmetysObjectException e) 396 { 397 // Ignore if the resource root is not present. 398 } 399 } 400}