001/* 002 * Copyright 2013 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.site.cache.monitoring.process.access.impl; 017 018import java.sql.Timestamp; 019import java.text.DateFormat; 020import java.text.ParseException; 021import java.util.Date; 022import java.util.HashMap; 023import java.util.Map; 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026 027import org.apache.commons.lang.StringUtils; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import org.ametys.plugins.site.cache.monitoring.Constants; 032import org.ametys.plugins.site.cache.monitoring.process.access.ResourceAccess; 033import org.ametys.plugins.site.cache.monitoring.process.access.ResourceAccessUtils; 034 035/** 036 * Apache resource access. Represent an access to a resource from Apache. 037 * These objects are created will parsing the Apache access logs. 038 */ 039public class HTTPServerResourceAccess implements ResourceAccess 040{ 041 /** logger */ 042 protected static final Logger _LOGGER = LoggerFactory.getLogger(HTTPServerResourceAccess.class); 043 044 private static final Pattern __PATTERN; 045 static 046 { 047 final String sPattern = "^([A-Za-z0-9@-]+) (\\S+) (\\S+) \\S+ .* \\[([^\\]]+)\\] \"([^\"]+)\" (\\d{3})/(\\d{3}) [\\d-]\\d* (-|1) \"([^\"]+)\" \"([^\"]+)\"$"; 048 __PATTERN = Pattern.compile(sPattern); 049 } 050 051 private enum Field 052 { 053 UNIQUE_ID, 054 SITE, 055 REMOTE_HOST_NAME, 056 DATE, 057 HTTP_METHOD, 058 HTTP_PATH, 059 HTTP_QUERY_STRING, 060 HTTP_PROTOCOL, 061 ORI_STATUS_CODE, 062 RET_STATUS_CODE, 063 CACHE_HIT, 064 REFERER, 065 USER_AGENT 066 } 067 068 private final String _uniqueID; 069 private final String _site; 070 private final Date _date; 071 private final String _httpMethod; 072 private final String _httpPath; 073 private final String _httpPathHash; 074 private final String _httpQueryString; 075 private final String _originalStatusCode; 076 private final String _returnedStatusCode; 077 private final boolean _cacheHit; 078 079 /** 080 * Constructor 081 * @param params the parameters used 082 */ 083 protected HTTPServerResourceAccess(Map<Field, Object> params) 084 { 085 _uniqueID = (String) params.get(Field.UNIQUE_ID); 086 _site = (String) params.get(Field.SITE); 087 _date = (Date) params.get(Field.DATE); 088 _httpMethod = (String) params.get(Field.HTTP_METHOD); 089 090 // Calculate a hash for the path. 091 _httpPath = (String) params.get(Field.HTTP_PATH); 092 String hash = ResourceAccessUtils.toHash(_httpPath); 093 _httpPathHash = hash; 094 095 String qs = (String) params.get(Field.HTTP_QUERY_STRING); 096 _httpQueryString = StringUtils.defaultIfEmpty(qs, "-"); 097 098 _originalStatusCode = (String) params.get(Field.ORI_STATUS_CODE); 099 _returnedStatusCode = (String) params.get(Field.RET_STATUS_CODE); 100 _cacheHit = (Boolean) params.get(Field.CACHE_HIT); 101 } 102 103 /** 104 * Create a new record instance 105 * @param entry the server access log entry 106 * @param df the date format to use for the record 107 * @return the created record 108 */ 109 public static HTTPServerResourceAccess createRecord(String entry, DateFormat df) 110 { 111 Matcher m = __PATTERN.matcher(entry); 112 113 if (m.matches()) 114 { 115 boolean success = true; 116 117 Map<Field, Object> params = new HashMap<>(); 118 params.put(Field.UNIQUE_ID, m.group(1)); 119 params.put(Field.SITE, m.group(2)); 120 params.put(Field.REMOTE_HOST_NAME, m.group(3)); 121 122 try 123 { 124 params.put(Field.DATE, df.parse(m.group(4))); 125 } 126 catch (NumberFormatException e) 127 { 128 success = false; 129 130 // Could happen if there is a synchronization issue with the 131 // DateFormat instance used to parse the date. 132 String msg = "NumberFormatException when trying to parse the resource from the httpserver access logs.\nInput catched string to be parsed '%s'"; 133 _LOGGER.error(String.format(msg, m.group(4))); 134 } 135 catch (ParseException e) 136 { 137 _LOGGER.error("Error while parsing the a date from the httpserver access logs"); 138 success = false; 139 } 140 141 String[] httpReq = StringUtils.split(m.group(5), ' '); 142 params.put(Field.HTTP_METHOD, httpReq[0]); 143 params.put(Field.HTTP_PATH, StringUtils.substringBefore(httpReq[1], "?")); 144 params.put(Field.HTTP_QUERY_STRING, StringUtils.substringAfter(httpReq[1], "?")); 145 params.put(Field.HTTP_PROTOCOL, httpReq[2]); 146 147 params.put(Field.ORI_STATUS_CODE, m.group(6)); 148 params.put(Field.RET_STATUS_CODE, m.group(7)); 149 params.put(Field.CACHE_HIT, !"1".equals(m.group(8))); 150 params.put(Field.REFERER, m.group(9)); 151 params.put(Field.USER_AGENT, m.group(10)); 152 153 if (success) 154 { 155 return new HTTPServerResourceAccess(params); 156 } 157 } 158 159 _LOGGER.error("Access log entry does not match the pattern\nEntry: " + entry); 160 return null; 161 } 162 163 @Override 164 public String getInsertStatementId() 165 { 166 return "FrontCacheMonitoringAccess.insertHTTPServerResourceAccess"; 167 } 168 169 @Override 170 public Map<String, Object> getInsertStatementParameters() 171 { 172 Map<String, Object> params = new HashMap<>(); 173 174 params.put("tableName", Constants.SQL_TABLE_NAME_HTTPSERVER_ACCESS); 175 176 // (Unique_Id, Site, Request_Date, Method, Path_Hash, Path, Query_String, 177 // Ori_Status_Code, Ret_Status_Code, Cache_Hit, Created_At) 178 179 params.put("Unique_Id", _uniqueID); 180 params.put("Site", _site); 181 params.put("Request_Date", new Timestamp(_date.getTime())); 182 params.put("Method", _httpMethod); 183 params.put("Path_Hash", _httpPathHash); 184 params.put("Path", _httpPath); 185 params.put("Query_String", _httpQueryString); 186 params.put("Ori_Status_Code", _originalStatusCode); 187 params.put("Ret_Status_Code", _returnedStatusCode); 188 params.put("Cache_Hit", _cacheHit); 189 params.put("Created_At", new Timestamp(System.currentTimeMillis())); 190 191 return params; 192 } 193 194 /** 195 * Indicates if this record should be persisted in the database. 196 * If it returns false, it means that this record must be filtered out and must not be inserted into the database. 197 * @param date The date at which the httpserver log importer has been started. 198 * @return True is this record is newer than the date passed as an argument. 199 */ 200 public boolean isOfInterest(Date date) 201 { 202 return date.before(_date); 203 } 204 205 @Override 206 public String toString() 207 { 208 final StringBuilder sb = new StringBuilder(); 209 sb.append('[').append(_uniqueID).append("] ").append(_site).append(' ').append(_date); 210 sb.append(' ').append(_httpMethod).append(' ').append(_httpPath).append(StringUtils.isNotEmpty(_httpQueryString) ? '?' + _httpQueryString : "").append(' '); 211 sb.append(_originalStatusCode).append('/').append(_returnedStatusCode); 212 sb.append(' ').append("cache-hit:").append(_cacheHit); 213 return sb.toString(); 214 } 215}