package tw.org.nchc.demo; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Locale; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; public class AccessLogParser { private String ip; private String protocol; private String method; private String url; private String code; private String byteSize; private String referrer; private String agent; private long timestamp; private static Pattern p = Pattern .compile("([^ ]*) ([^ ]*) ([^ ]*) \\[([^]]*)\\] \"([^\"]*)\"" + " ([^ ]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\".*"); public AccessLogParser(String line) throws ParseException, Exception{ Matcher matcher = p.matcher(line); if(matcher.matches()){ this.ip = matcher.group(1); // IP address of the client requesting the web page. if(isIpAddress(ip)){ SimpleDateFormat sdf = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z",Locale.US); this.timestamp = sdf.parse(matcher.group(4)).getTime(); String[] http = matcher.group(5).split(" "); this.method = http[0]; this.url = http[1]; this.protocol = http[2]; this.code = matcher.group(6); this.byteSize = matcher.group(7); this.referrer = matcher.group(8); this.agent = matcher.group(9); } } } public static boolean isIpAddress(String inputString) { StringTokenizer tokenizer = new StringTokenizer(inputString, "."); if (tokenizer.countTokens() != 4) { return false; } try { for (int i = 0; i < 4; i++) { String t = tokenizer.nextToken(); int chunk = Integer.parseInt(t); if ((chunk & 255) != chunk) { return false; } } } catch (NumberFormatException e) { return false; } if (inputString.indexOf("..") >= 0) { return false; } return true; } public String getIp() { return ip; } public String getProtocol() { return protocol; } public String getMethod() { return method; } public String getUrl() { return url; } public String getCode() { return code; } public String getByteSize() { return byteSize; } public String getReferrer() { return referrer; } public String getAgent() { return agent; } public long getTimestamp() { return timestamp; } }