001/* 002 * Copyright 2010 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.robots; 017 018import java.io.IOException; 019import java.util.Calendar; 020import java.util.Date; 021import java.util.GregorianCalendar; 022import java.util.Timer; 023import java.util.TimerTask; 024import java.util.concurrent.TimeUnit; 025 026import org.apache.avalon.framework.activity.Disposable; 027import org.apache.avalon.framework.activity.Initializable; 028import org.apache.avalon.framework.component.Component; 029import org.apache.avalon.framework.logger.LogEnabled; 030import org.apache.avalon.framework.logger.Logger; 031import org.apache.avalon.framework.service.ServiceException; 032import org.apache.avalon.framework.service.ServiceManager; 033import org.apache.avalon.framework.service.Serviceable; 034import org.apache.commons.lang.StringUtils; 035import org.apache.http.HttpResponse; 036import org.apache.http.client.ClientProtocolException; 037import org.apache.http.client.config.RequestConfig; 038import org.apache.http.client.methods.HttpGet; 039import org.apache.http.impl.client.CloseableHttpClient; 040import org.apache.http.impl.client.HttpClientBuilder; 041 042import org.ametys.core.util.URIUtils; 043import org.ametys.plugins.repository.AmetysObjectIterable; 044import org.ametys.runtime.config.Config; 045import org.ametys.web.repository.site.Site; 046import org.ametys.web.repository.site.SiteManager; 047 048/** 049 * Periodically ping three search engine : Google, Yahoo and Bing 050 * 051 */ 052public class PingSitemapTask extends TimerTask implements Component, Initializable, LogEnabled, Serviceable, Disposable 053{ 054 /** Avalon role */ 055 public static final String ROLE = PingSitemapTask.class.getName(); 056 057 private static final String EXECUTION_HOUR_PARAMETER = "robots.sitemap.ping.cron.expression"; 058 059 private static final String PING_SITEMAP_TIMER_NAME = "PingSitemapScheduler"; 060 061 private static final String PING_ACTIVATED_SITE_PARAM = "ping_activated"; 062 063 private static final String BING_URL = "http://www.bing.com/webmaster/ping.aspx?siteMap="; 064 065 private static final String GOOGLE_URL = "http://www.google.com/webmasters/sitemaps/ping?sitemap="; 066 067 private static final String SITEMAP_XML_PATH = "/sitemap.xml"; 068 069 private Logger _logger; 070 071 private SiteManager _siteManager; 072 073 private Timer _timer; 074 075 private CloseableHttpClient getHttpClient() 076 { 077 RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(2000).setSocketTimeout(2000).build(); 078 return HttpClientBuilder.create().useSystemProperties().setDefaultRequestConfig(requestConfig).build(); 079 } 080 081 private String getSitemapUrl(Site site) 082 { 083 return site.getUrl() + SITEMAP_XML_PATH; 084 } 085 086 private void handleResponse(String uri, HttpGet pingRrequest, HttpResponse response) 087 { 088 if (response.getStatusLine().getStatusCode() == 200) 089 { 090 if (_logger.isDebugEnabled()) 091 { 092 _logger.debug("Sitemap ping sent and received successfully to: " + pingRrequest.getURI().getHost()); 093 } 094 } 095 else 096 { 097 if (_logger.isWarnEnabled()) 098 { 099 _logger.warn("Unable to to ping search engin with request: " + uri + ", response: " + response.getStatusLine()); 100 } 101 } 102 } 103 104 private void pingSearchEngine(String searchEngineUrl, String sitemapUrl) throws Exception 105 { 106 String uri = searchEngineUrl + URIUtils.encodePath(sitemapUrl); 107 108 try (CloseableHttpClient httpClient = getHttpClient()) 109 { 110 HttpGet pingRequest = new HttpGet(uri); 111 handleResponse(uri, pingRequest, httpClient.execute(pingRequest)); 112 } 113 catch (Exception e) 114 { 115 if (e instanceof IOException || e instanceof ClientProtocolException) 116 { 117 if (_logger.isWarnEnabled()) 118 { 119 _logger.warn("Unable to ping search engine: " + uri, e); 120 } 121 } 122 else 123 { 124 throw e; 125 } 126 } 127 } 128 129 @Override 130 public void dispose() 131 { 132 _logger = null; 133 134 cancel(); 135 _timer.cancel(); 136 _timer = null; 137 } 138 139 @Override 140 public void enableLogging(Logger logger) 141 { 142 _logger = logger; 143 } 144 145 @Override 146 public void initialize() throws Exception 147 { 148 if (_logger.isDebugEnabled()) 149 { 150 _logger.debug("Initializing the Sitemap Ping..."); 151 } 152 153 // Schedule a timer to run each night. 154 String hourStr = Config.getInstance().getValue(EXECUTION_HOUR_PARAMETER); 155 int hour = 0; 156 int minute = 0; 157 if (StringUtils.isNotEmpty(hourStr) && hourStr.indexOf(':') > 0) 158 { 159 String[] hourArray = StringUtils.split(hourStr, ':'); 160 hour = Integer.parseInt(hourArray[0]); 161 minute = Integer.parseInt(hourArray[1]); 162 } 163 164 GregorianCalendar calendar = new GregorianCalendar(); 165 calendar.set(Calendar.AM_PM, hour < 12 ? Calendar.AM : Calendar.PM); 166 calendar.set(Calendar.HOUR, hour % 12); 167 calendar.set(Calendar.MINUTE, minute); 168 calendar.set(Calendar.SECOND, 0); 169 calendar.set(Calendar.MILLISECOND, 0); 170 171 // Each day. 172 long period = TimeUnit.DAYS.toMillis(1); 173 Date firstTime = calendar.getTime(); 174 175 Date now = new Date(); 176 177 // If the given time today is past, schedule for tomorrow. 178 if (firstTime.compareTo(now) < 0) 179 { 180 calendar.add(Calendar.DAY_OF_MONTH, 1); 181 firstTime = calendar.getTime(); 182 } 183 184 if (_logger.isInfoEnabled()) 185 { 186 _logger.info("Sitemap Ping: the sitemap ping will run each day, starting " + firstTime.toString()); 187 } 188 189 _timer = new Timer(PING_SITEMAP_TIMER_NAME, true); 190 _timer.schedule(this, firstTime, period); 191 } 192 193 @Override 194 public void run() 195 { 196 AmetysObjectIterable<Site> sites = _siteManager.getSites(); 197 for (Site site : sites) 198 { 199 String url = getSitemapUrl(site); 200 Boolean activated = site.getValue(PING_ACTIVATED_SITE_PARAM); 201 if (Boolean.TRUE.equals(activated)) 202 { 203 try 204 { 205 pingSearchEngine(GOOGLE_URL, url); 206 pingSearchEngine(BING_URL, url); 207 } 208 catch (Exception e) 209 { 210 _logger.error("Ping error : " + url, e); 211 } 212 } 213 } 214 } 215 216 @Override 217 public void service(ServiceManager manager) throws ServiceException 218 { 219 _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE); 220 } 221 222}