001/*
002 *  Copyright 2013 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.site.cache.monitoring.process.access.impl;
017
018import java.sql.Timestamp;
019import java.text.DateFormat;
020import java.text.ParseException;
021import java.util.Date;
022import java.util.HashMap;
023import java.util.Map;
024import java.util.regex.Matcher;
025import java.util.regex.Pattern;
026
027import org.apache.commons.lang.StringUtils;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031import org.ametys.plugins.site.cache.monitoring.Constants;
032import org.ametys.plugins.site.cache.monitoring.process.access.ResourceAccess;
033import org.ametys.plugins.site.cache.monitoring.process.access.ResourceAccessUtils;
034
035/**
036 * Apache resource access. Represent an access to a resource from Apache.
037 * These objects are created will parsing the Apache access logs.
038 */
039public class HTTPServerResourceAccess implements ResourceAccess
040{
041    /** logger */
042    protected static final Logger _LOGGER = LoggerFactory.getLogger(HTTPServerResourceAccess.class);
043    
044    private static final Pattern __PATTERN;
045    static
046    {
047        final String sPattern = "^([A-Za-z0-9@-]+) (\\S+) (\\S+) \\S+ .* \\[([^\\]]+)\\] \"([^\"]+)\" (\\d{3})/(\\d{3}) [\\d-]\\d* (-|1) \"([^\"]+)\" \"([^\"]+)\"$";
048        __PATTERN = Pattern.compile(sPattern);
049    }
050    
051    private enum Field
052    {
053        UNIQUE_ID,
054        SITE,
055        REMOTE_HOST_NAME,
056        DATE,
057        HTTP_METHOD,
058        HTTP_PATH,
059        HTTP_QUERY_STRING,
060        HTTP_PROTOCOL,
061        ORI_STATUS_CODE,
062        RET_STATUS_CODE,
063        CACHE_HIT,
064        REFERER,
065        USER_AGENT
066    }
067    
068    private final String _uniqueID;
069    private final String _site;
070    private final Date _date;
071    private final String _httpMethod;
072    private final String _httpPath;
073    private final String _httpPathHash;
074    private final String _httpQueryString;
075    private final String _originalStatusCode;
076    private final String _returnedStatusCode;
077    private final boolean _cacheHit;
078    
079    /**
080     * Constructor
081     * @param params the parameters used 
082     */
083    protected HTTPServerResourceAccess(Map<Field, Object> params)
084    {
085        _uniqueID = (String) params.get(Field.UNIQUE_ID);
086        _site = (String) params.get(Field.SITE);
087        _date = (Date) params.get(Field.DATE);
088        _httpMethod = (String) params.get(Field.HTTP_METHOD);
089        
090        // Calculate a hash for the path.
091        _httpPath = (String) params.get(Field.HTTP_PATH);
092        String hash = ResourceAccessUtils.toHash(_httpPath);
093        _httpPathHash = hash;
094        
095        String qs = (String) params.get(Field.HTTP_QUERY_STRING);
096        _httpQueryString = StringUtils.defaultIfEmpty(qs, "-");
097        
098        _originalStatusCode = (String) params.get(Field.ORI_STATUS_CODE);
099        _returnedStatusCode = (String) params.get(Field.RET_STATUS_CODE);
100        _cacheHit = (Boolean) params.get(Field.CACHE_HIT);
101    }
102    
103    /**
104     * Create a new record instance
105     * @param entry the server access log entry
106     * @param df the date format to use for the record
107     * @return the created record
108     */
109    public static HTTPServerResourceAccess createRecord(String entry, DateFormat df)
110    {
111        Matcher m = __PATTERN.matcher(entry);
112        
113        if (m.matches())
114        {
115            boolean success = true;
116            
117            Map<Field, Object> params = new HashMap<>();
118            params.put(Field.UNIQUE_ID, m.group(1));
119            params.put(Field.SITE, m.group(2));
120            params.put(Field.REMOTE_HOST_NAME, m.group(3));
121            
122            try
123            {
124                params.put(Field.DATE, df.parse(m.group(4)));
125            }
126            catch (NumberFormatException e)
127            {
128                success = false;
129                
130                // Could happen if there is a synchronization issue with the
131                // DateFormat instance used to parse the date.
132                String msg = "NumberFormatException when trying to parse the resource from the httpserver access logs.\nInput catched string to be parsed '%s'";
133                _LOGGER.error(String.format(msg, m.group(4)));
134            }
135            catch (ParseException e)
136            {
137                _LOGGER.error("Error while parsing the a date from the httpserver access logs");
138                success = false;
139            }
140            
141            String[] httpReq = StringUtils.split(m.group(5), ' ');
142            params.put(Field.HTTP_METHOD, httpReq[0]);
143            params.put(Field.HTTP_PATH, StringUtils.substringBefore(httpReq[1], "?"));
144            params.put(Field.HTTP_QUERY_STRING, StringUtils.substringAfter(httpReq[1], "?"));
145            params.put(Field.HTTP_PROTOCOL, httpReq[2]);
146            
147            params.put(Field.ORI_STATUS_CODE, m.group(6));
148            params.put(Field.RET_STATUS_CODE, m.group(7));
149            params.put(Field.CACHE_HIT, !"1".equals(m.group(8)));
150            params.put(Field.REFERER, m.group(9));
151            params.put(Field.USER_AGENT, m.group(10));
152            
153            if (success)
154            {
155                return new HTTPServerResourceAccess(params);
156            }
157        }
158        
159        _LOGGER.error("Access log entry does not match the pattern\nEntry: " + entry);
160        return null;
161    }
162    
163    @Override
164    public String getInsertStatementId()
165    {
166        return "FrontCacheMonitoringAccess.insertHTTPServerResourceAccess";
167    }
168    
169    @Override
170    public Map<String, Object> getInsertStatementParameters()
171    {
172        Map<String, Object> params = new HashMap<>();
173        
174        params.put("tableName", Constants.SQL_TABLE_NAME_HTTPSERVER_ACCESS);
175        
176        // (Unique_Id, Site, Request_Date, Method, Path_Hash, Path, Query_String,
177        // Ori_Status_Code, Ret_Status_Code, Cache_Hit, Created_At)
178        
179        params.put("Unique_Id", _uniqueID);
180        params.put("Site", _site);
181        params.put("Request_Date", new Timestamp(_date.getTime()));
182        params.put("Method", _httpMethod);
183        params.put("Path_Hash", _httpPathHash);
184        params.put("Path", _httpPath);
185        params.put("Query_String", _httpQueryString);
186        params.put("Ori_Status_Code", _originalStatusCode);
187        params.put("Ret_Status_Code", _returnedStatusCode);
188        params.put("Cache_Hit", _cacheHit);
189        params.put("Created_At", new Timestamp(System.currentTimeMillis()));
190        
191        return params;
192    }
193    
194    /**
195     * Indicates if this record should be persisted in the database.
196     * If it returns false, it means that this record must be filtered out and must not be inserted into the database.
197     * @param date The date at which the httpserver log importer has been started.
198     * @return True is this record is newer than the date passed as an argument.
199     */
200    public boolean isOfInterest(Date date)
201    {
202        return date.before(_date);
203    }
204    
205    @Override
206    public String toString()
207    {
208        final StringBuilder sb = new StringBuilder();
209        sb.append('[').append(_uniqueID).append("] ").append(_site).append(' ').append(_date);
210        sb.append(' ').append(_httpMethod).append(' ').append(_httpPath).append(StringUtils.isNotEmpty(_httpQueryString) ? '?' + _httpQueryString : "").append(' ');
211        sb.append(_originalStatusCode).append('/').append(_returnedStatusCode);
212        sb.append(' ').append("cache-hit:").append(_cacheHit);
213        return sb.toString();
214    }
215}