001/*
002 *  Copyright 2010 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.robots;
017
018import java.io.IOException;
019import java.util.Calendar;
020import java.util.Date;
021import java.util.GregorianCalendar;
022import java.util.Timer;
023import java.util.TimerTask;
024import java.util.concurrent.TimeUnit;
025
026import org.apache.avalon.framework.activity.Disposable;
027import org.apache.avalon.framework.activity.Initializable;
028import org.apache.avalon.framework.component.Component;
029import org.apache.avalon.framework.logger.LogEnabled;
030import org.apache.avalon.framework.logger.Logger;
031import org.apache.avalon.framework.service.ServiceException;
032import org.apache.avalon.framework.service.ServiceManager;
033import org.apache.avalon.framework.service.Serviceable;
034import org.apache.commons.lang.StringUtils;
035import org.apache.http.HttpResponse;
036import org.apache.http.client.ClientProtocolException;
037import org.apache.http.client.config.RequestConfig;
038import org.apache.http.client.methods.HttpGet;
039import org.apache.http.impl.client.CloseableHttpClient;
040import org.apache.http.impl.client.HttpClientBuilder;
041
042import org.ametys.core.util.URLEncoder;
043import org.ametys.plugins.repository.AmetysObjectIterable;
044import org.ametys.runtime.config.Config;
045import org.ametys.web.repository.site.Site;
046import org.ametys.web.repository.site.SiteManager;
047import org.ametys.web.site.SiteConfigurationExtensionPoint;
048
049/**
050 * Periodically ping three search engine : Google, Yahoo and Bing
051 * 
052 */
053public class PingSitemapTask extends TimerTask implements Component, Initializable, LogEnabled, Serviceable, Disposable
054{
055    /** Avalon role */
056    public static final String ROLE = PingSitemapTask.class.getName();
057    
058    private static final String EXECUTION_HOUR_PARAMETER = "robots.sitemap.ping.cron.expression";
059
060    private static final String PING_SITEMAP_TIMER_NAME = "PingSitemapScheduler";
061
062    private static final String PING_ACTIVATED_SITE_PARAM = "ping_activated";
063
064    private static final String BING_URL = "http://www.bing.com/webmaster/ping.aspx?siteMap=";
065
066    private static final String GOOGLE_URL = "http://www.google.com/webmasters/sitemaps/ping?sitemap=";
067
068    private static final String SITEMAP_XML_PATH = "/sitemap.xml";
069
070    private Logger _logger;
071
072    private SiteManager _siteManager;
073
074    private Timer _timer;
075
076    private SiteConfigurationExtensionPoint _siteConfigurationEP;
077
078    private CloseableHttpClient getHttpClient()
079    {
080        RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(2000).setSocketTimeout(2000).build();
081        return HttpClientBuilder.create().useSystemProperties().setDefaultRequestConfig(requestConfig).build();
082    }
083
084    private String getSitemapUrl(Site site)
085    {
086        return site.getUrl() + SITEMAP_XML_PATH;
087    }
088
089    private void handleResponse(String uri, HttpGet pingRrequest, HttpResponse response)
090    {
091        if (response.getStatusLine().getStatusCode() == 200)
092        {
093            if (_logger.isDebugEnabled())
094            {
095                _logger.debug("Sitemap ping sent and received successfully to: " + pingRrequest.getURI().getHost());
096            }
097        }
098        else
099        {
100            if (_logger.isWarnEnabled())
101            {
102                _logger.warn("Unable to to ping search engin with request: " + uri + ", response: " + response.getStatusLine());
103            }
104        }
105    }
106
107    private void pingSearchEngine(String searchEngineUrl, String sitemapUrl) throws Exception
108    {
109        String uri = searchEngineUrl + URLEncoder.encodePath(sitemapUrl);
110        
111        try (CloseableHttpClient httpClient = getHttpClient())
112        {
113            HttpGet pingRequest = new HttpGet(uri);
114            handleResponse(uri, pingRequest, httpClient.execute(pingRequest));
115        }
116        catch (Exception e)
117        {
118            if (e instanceof IOException || e instanceof ClientProtocolException)
119            {
120                if (_logger.isWarnEnabled())
121                {
122                    _logger.warn("Unable to ping search engine: " + uri, e);
123                }
124            }
125            else
126            {
127                throw e;
128            }
129        }
130    }
131
132    @Override
133    public void dispose()
134    {
135        _logger = null;
136
137        cancel();
138        _timer.cancel();
139        _timer = null;
140    }
141
142    @Override
143    public void enableLogging(Logger logger)
144    {
145        _logger = logger;
146    }
147
148    @Override
149    public void initialize() throws Exception
150    {
151        if (_logger.isDebugEnabled())
152        {
153            _logger.debug("Initializing the Sitemap Ping...");
154        }
155
156        // Schedule a timer to run each night.
157        String hourStr = Config.getInstance().getValueAsString(EXECUTION_HOUR_PARAMETER);
158        int hour = 0;
159        int minute = 0;
160        if (StringUtils.isNotEmpty(hourStr) && hourStr.indexOf(':') > 0)
161        {
162            String[] hourArray = StringUtils.split(hourStr, ':');
163            hour = Integer.parseInt(hourArray[0]);
164            minute = Integer.parseInt(hourArray[1]);
165        }
166
167        GregorianCalendar calendar = new GregorianCalendar();
168        calendar.set(Calendar.AM_PM, hour < 12 ? Calendar.AM : Calendar.PM);
169        calendar.set(Calendar.HOUR, hour % 12);
170        calendar.set(Calendar.MINUTE, minute);
171        calendar.set(Calendar.SECOND, 0);
172        calendar.set(Calendar.MILLISECOND, 0);
173
174        // Each day.
175        long period = TimeUnit.DAYS.toMillis(1);
176        Date firstTime = calendar.getTime();
177
178        Date now = new Date();
179
180        // If the given time today is past, schedule for tomorrow.
181        if (firstTime.compareTo(now) < 0)
182        {
183            calendar.add(Calendar.DAY_OF_MONTH, 1);
184            firstTime = calendar.getTime();
185        }
186
187        if (_logger.isInfoEnabled())
188        {
189            _logger.info("Sitemap Ping: the sitemap ping will run each day, starting " + firstTime.toString());
190        }
191
192        _timer = new Timer(PING_SITEMAP_TIMER_NAME, true);
193        _timer.schedule(this, firstTime, period);
194    }
195
196    @Override
197    public void run()
198    {
199        AmetysObjectIterable<Site> sites = _siteManager.getSites();
200        for (Site site : sites)
201        {
202            String url = getSitemapUrl(site);
203            Boolean activated = _siteConfigurationEP.getValueAsBoolean(site.getName(), PING_ACTIVATED_SITE_PARAM);
204            if (Boolean.TRUE.equals(activated))
205            {
206                try
207                {
208                    pingSearchEngine(GOOGLE_URL, url);
209                    pingSearchEngine(BING_URL, url);
210                }
211                catch (Exception e)
212                {
213                    _logger.error("Ping error : " + url, e);
214                }
215            }
216        }
217    }
218
219    @Override
220    public void service(ServiceManager manager) throws ServiceException
221    {
222        _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE);
223        _siteConfigurationEP = (SiteConfigurationExtensionPoint) manager.lookup(SiteConfigurationExtensionPoint.ROLE);
224    }
225
226}