001/*
002 *  Copyright 2010 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.robots;
017
018import java.io.IOException;
019import java.util.Calendar;
020import java.util.Date;
021import java.util.GregorianCalendar;
022import java.util.Timer;
023import java.util.TimerTask;
024import java.util.concurrent.TimeUnit;
025
026import org.apache.avalon.framework.activity.Disposable;
027import org.apache.avalon.framework.activity.Initializable;
028import org.apache.avalon.framework.component.Component;
029import org.apache.avalon.framework.logger.LogEnabled;
030import org.apache.avalon.framework.logger.Logger;
031import org.apache.avalon.framework.service.ServiceException;
032import org.apache.avalon.framework.service.ServiceManager;
033import org.apache.avalon.framework.service.Serviceable;
034import org.apache.commons.lang.StringUtils;
035import org.apache.http.HttpResponse;
036import org.apache.http.client.ClientProtocolException;
037import org.apache.http.client.config.RequestConfig;
038import org.apache.http.client.methods.HttpGet;
039import org.apache.http.impl.client.CloseableHttpClient;
040import org.apache.http.impl.client.HttpClientBuilder;
041
042import org.ametys.core.util.URIUtils;
043import org.ametys.plugins.repository.AmetysObjectIterable;
044import org.ametys.runtime.config.Config;
045import org.ametys.web.repository.site.Site;
046import org.ametys.web.repository.site.SiteManager;
047
048/**
049 * Periodically ping three search engine : Google, Yahoo and Bing
050 * 
051 */
052public class PingSitemapTask extends TimerTask implements Component, Initializable, LogEnabled, Serviceable, Disposable
053{
054    /** Avalon role */
055    public static final String ROLE = PingSitemapTask.class.getName();
056    
057    private static final String EXECUTION_HOUR_PARAMETER = "robots.sitemap.ping.cron.expression";
058
059    private static final String PING_SITEMAP_TIMER_NAME = "PingSitemapScheduler";
060
061    private static final String PING_ACTIVATED_SITE_PARAM = "ping_activated";
062
063    private static final String BING_URL = "http://www.bing.com/webmaster/ping.aspx?siteMap=";
064
065    private static final String GOOGLE_URL = "http://www.google.com/webmasters/sitemaps/ping?sitemap=";
066
067    private static final String SITEMAP_XML_PATH = "/sitemap.xml";
068
069    private Logger _logger;
070
071    private SiteManager _siteManager;
072
073    private Timer _timer;
074
075    private CloseableHttpClient getHttpClient()
076    {
077        RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(2000).setSocketTimeout(2000).build();
078        return HttpClientBuilder.create().useSystemProperties().setDefaultRequestConfig(requestConfig).build();
079    }
080
081    private String getSitemapUrl(Site site)
082    {
083        return site.getUrl() + SITEMAP_XML_PATH;
084    }
085
086    private void handleResponse(String uri, HttpGet pingRrequest, HttpResponse response)
087    {
088        if (response.getStatusLine().getStatusCode() == 200)
089        {
090            if (_logger.isDebugEnabled())
091            {
092                _logger.debug("Sitemap ping sent and received successfully to: " + pingRrequest.getURI().getHost());
093            }
094        }
095        else
096        {
097            if (_logger.isWarnEnabled())
098            {
099                _logger.warn("Unable to to ping search engin with request: " + uri + ", response: " + response.getStatusLine());
100            }
101        }
102    }
103
104    private void pingSearchEngine(String searchEngineUrl, String sitemapUrl) throws Exception
105    {
106        String uri = searchEngineUrl + URIUtils.encodePath(sitemapUrl);
107        
108        try (CloseableHttpClient httpClient = getHttpClient())
109        {
110            HttpGet pingRequest = new HttpGet(uri);
111            handleResponse(uri, pingRequest, httpClient.execute(pingRequest));
112        }
113        catch (Exception e)
114        {
115            if (e instanceof IOException || e instanceof ClientProtocolException)
116            {
117                if (_logger.isWarnEnabled())
118                {
119                    _logger.warn("Unable to ping search engine: " + uri, e);
120                }
121            }
122            else
123            {
124                throw e;
125            }
126        }
127    }
128
129    @Override
130    public void dispose()
131    {
132        _logger = null;
133
134        cancel();
135        _timer.cancel();
136        _timer = null;
137    }
138
139    @Override
140    public void enableLogging(Logger logger)
141    {
142        _logger = logger;
143    }
144
145    @Override
146    public void initialize() throws Exception
147    {
148        if (_logger.isDebugEnabled())
149        {
150            _logger.debug("Initializing the Sitemap Ping...");
151        }
152
153        // Schedule a timer to run each night.
154        String hourStr = Config.getInstance().getValue(EXECUTION_HOUR_PARAMETER);
155        int hour = 0;
156        int minute = 0;
157        if (StringUtils.isNotEmpty(hourStr) && hourStr.indexOf(':') > 0)
158        {
159            String[] hourArray = StringUtils.split(hourStr, ':');
160            hour = Integer.parseInt(hourArray[0]);
161            minute = Integer.parseInt(hourArray[1]);
162        }
163
164        GregorianCalendar calendar = new GregorianCalendar();
165        calendar.set(Calendar.AM_PM, hour < 12 ? Calendar.AM : Calendar.PM);
166        calendar.set(Calendar.HOUR, hour % 12);
167        calendar.set(Calendar.MINUTE, minute);
168        calendar.set(Calendar.SECOND, 0);
169        calendar.set(Calendar.MILLISECOND, 0);
170
171        // Each day.
172        long period = TimeUnit.DAYS.toMillis(1);
173        Date firstTime = calendar.getTime();
174
175        Date now = new Date();
176
177        // If the given time today is past, schedule for tomorrow.
178        if (firstTime.compareTo(now) < 0)
179        {
180            calendar.add(Calendar.DAY_OF_MONTH, 1);
181            firstTime = calendar.getTime();
182        }
183
184        if (_logger.isInfoEnabled())
185        {
186            _logger.info("Sitemap Ping: the sitemap ping will run each day, starting " + firstTime.toString());
187        }
188
189        _timer = new Timer(PING_SITEMAP_TIMER_NAME, true);
190        _timer.schedule(this, firstTime, period);
191    }
192
193    @Override
194    public void run()
195    {
196        AmetysObjectIterable<Site> sites = _siteManager.getSites();
197        for (Site site : sites)
198        {
199            String url = getSitemapUrl(site);
200            Boolean activated = site.getValue(PING_ACTIVATED_SITE_PARAM);
201            if (Boolean.TRUE.equals(activated))
202            {
203                try
204                {
205                    pingSearchEngine(GOOGLE_URL, url);
206                    pingSearchEngine(BING_URL, url);
207                }
208                catch (Exception e)
209                {
210                    _logger.error("Ping error : " + url, e);
211                }
212            }
213        }
214    }
215
216    @Override
217    public void service(ServiceManager manager) throws ServiceException
218    {
219        _siteManager = (SiteManager) manager.lookup(SiteManager.ROLE);
220    }
221
222}