001/* 002 * Copyright 2010 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.robots; 017 018import java.io.IOException; 019import java.util.Calendar; 020import java.util.Date; 021import java.util.GregorianCalendar; 022import java.util.Timer; 023import java.util.TimerTask; 024import java.util.concurrent.TimeUnit; 025 026import org.apache.avalon.framework.activity.Disposable; 027import org.apache.avalon.framework.activity.Initializable; 028import org.apache.avalon.framework.component.Component; 029import org.apache.avalon.framework.logger.LogEnabled; 030import org.apache.avalon.framework.logger.Logger; 031import org.apache.avalon.framework.service.ServiceException; 032import org.apache.avalon.framework.service.ServiceManager; 033import org.apache.avalon.framework.service.Serviceable; 034import org.apache.commons.lang.StringUtils; 035import org.apache.http.HttpResponse; 036import org.apache.http.client.ClientProtocolException; 037import org.apache.http.client.config.RequestConfig; 038import org.apache.http.client.methods.HttpGet; 039import org.apache.http.impl.client.CloseableHttpClient; 040import org.apache.http.impl.client.HttpClientBuilder; 041 042import org.ametys.core.util.URLEncoder; 043import org.ametys.plugins.repository.AmetysObjectIterable; 044import org.ametys.runtime.config.Config; 045import org.ametys.web.repository.site.Site; 046import org.ametys.web.repository.site.SiteManager; 047import org.ametys.web.site.SiteConfigurationExtensionPoint; 048 049/** 050 * Periodically ping three search engine : Google, Yahoo and Bing 051 * 052 */ 053public class PingSitemapTask extends TimerTask implements Component, Initializable, LogEnabled, Serviceable, Disposable 054{ 055 /** Avalon role */ 056 public static final String ROLE = PingSitemapTask.class.getName(); 057 058 private static final String EXECUTION_HOUR_PARAMETER = "robots.sitemap.ping.cron.expression"; 059 060 private static final String PING_SITEMAP_TIMER_NAME = "PingSitemapScheduler"; 061 062 private static final String PING_ACTIVATED_SITE_PARAM = "ping_activated"; 063 064 private static final String BING_URL = "http://www.bing.com/webmaster/ping.aspx?siteMap="; 065 066 private static final String GOOGLE_URL = "http://www.google.com/webmasters/sitemaps/ping?sitemap="; 067 068 private static final String SITEMAP_XML_PATH = "/sitemap.xml"; 069 070 private Logger _logger; 071 072 private SiteManager _siteManager; 073 074 private Timer _timer; 075 076 private SiteConfigurationExtensionPoint _siteConfigurationEP; 077 078 private CloseableHttpClient getHttpClient() 079 { 080 RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(2000).setSocketTimeout(2000).build(); 081 return HttpClientBuilder.create().useSystemProperties().setDefaultRequestConfig(requestConfig).build(); 082 } 083 084 private String getSitemapUrl(Site site) 085 { 086 return site.getUrl() + SITEMAP_XML_PATH; 087 } 088 089 private void handleResponse(String uri, HttpGet pingRrequest, HttpResponse response) 090 { 091 if (response.getStatusLine().getStatusCode() == 200) 092 { 093 if (_logger.isDebugEnabled()) 094 { 095 _logger.debug("Sitemap ping sent and received successfully to: " + pingRrequest.getURI().getHost()); 096 } 097 } 098 else 099 { 100 if (_logger.isWarnEnabled()) 101 { 102 _logger.warn("Unable to to ping search engin with request: " + uri + ", response: " + response.getStatusLine()); 103 } 104 } 105 } 106 107 private void pingSearchEngine(String searchEngineUrl, String sitemapUrl) throws Exception 108 { 109 String uri = searchEngineUrl + URLEncoder.encodePath(sitemapUrl); 110 111 try (CloseableHttpClient httpClient = getHttpClient()) 112 { 113 HttpGet pingRequest = new HttpGet(uri); 114 handleResponse(uri, pingRequest, httpClient.execute(pingRequest)); 115 } 116 catch (Exception e) 117 { 118 if (e instanceof IOException || e instanceof ClientProtocolException) 119 { 120 if (_logger.isWarnEnabled()) 121 { 122 _logger.warn("Unable to ping search engine: " + uri, e); 123 } 124 } 125 else 126 { 127 throw e; 128 } 129 } 130 } 131 132 @Override 133 public void dispose() 134 { 135 _logger = null; 136 137 cancel(); 138 _timer.cancel(); 139 _timer = null; 140 } 141 142 @Override 143 public void enableLogging(Logger logger) 144 { 145 _logger = logger; 146 } 147 148 @Override 149 public void initialize() throws Exception 150 { 151 if (_logger.isDebugEnabled()) 152 { 153 _logger.debug("Initializing the Sitemap Ping..."); 154 } 155 156 // Schedule a timer to run each night. 157 String hourStr = Config.getInstance().getValueAsString(EXECUTION_HOUR_PARAMETER); 158 int hour = 0; 159 int minute = 0; 160 if (StringUtils.isNotEmpty(hourStr) && hourStr.indexOf(':') > 0) 161 { 162 String[] hourArray = StringUtils.split(hourStr, ':'); 163 hour = Integer.parseInt(hourArray[0]); 164 minute = Integer.parseInt(hourArray[1]); 165 } 166 167 GregorianCalendar calendar = new GregorianCalendar(); 168 calendar.set(Calendar.AM_PM, hour < 12 ? Calendar.AM : Calendar.PM); 169 calendar.set(Calendar.HOUR, hour % 12); 170 calendar.set(Calendar.MINUTE, minute); 171 calendar.set(Calendar.SECOND, 0); 172 calendar.set(Calendar.MILLISECOND, 0); 173 174 // Each day. 175 long period = TimeUnit.DAYS.toMillis(1); 176 Date firstTime = calendar.getTime(); 177 178 Date now = new Date(); 179 180 // If the given time today is past, schedule for tomorrow. 181 if (firstTime.compareTo(now) < 0) 182 { 183 calendar.add(Calendar.DAY_OF_MONTH, 1); 184 firstTime = calendar.getTime(); 185 } 186 187 if (_logger.isInfoEnabled()) 188 { 189 _logger.info("Sitemap Ping: the sitemap ping will run each day, starting " + firstTime.toString()); 190 } 191 192 _timer = new Timer(PING_SITEMAP_TIMER_NAME, true); 193 _timer.schedule(this, firstTime, period); 194 } 195 196 @Override 197 public void run() 198 { 199 AmetysObjectIterable<Site> sites = _siteManager.getSites(); 200 for (Site site : sites) 201 { 202 String url = getSitemapUrl(site); 203 Boolean activated = _siteConfigurationEP.getValueAsBoolean(site.getName(), PING_ACTIVATED_SITE_PARAM); 204 if (Boolean.TRUE.equals(activated)) 205 { 206 try 207 { 208 pingSearchEngine(GOOGLE_URL, url); 209 pingSearchEngine(BING_URL, url); 210 } 211 catch (Exception e) 212 { 213 _logger.error("Ping error : " + url, e); 214 } 215 } 216 } 217 } 218 219 @Override 220 public void service(ServiceManager manager) throws ServiceException 221 { 222 _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE); 223 _siteConfigurationEP = (SiteConfigurationExtensionPoint) manager.lookup(SiteConfigurationExtensionPoint.ROLE); 224 } 225 226}