wiki:LogParser

Version 6 (modified by waue, 16 years ago) (diff)

--

目的

This program will parse your apache log and store it into Hbase.

如何使用

  • 1. Upload apache logs ( /var/log/apache2/access.log* ) to hdfs (default: /user/waue/apache-log) \
$ bin/hadoop dfs -put /var/log/apache2/ apache-log
  • 2. parameter "dir" in main contains the logs.
  • 3. you should filter the exception contents manually,
    ex:  ::1 - - [29/Jun/2008:07:35:15 +0800] "GET / HTTP/1.0" 200 729 "...
    

結果

1 執行以下指令

	hql > select * from apache-log;

2 結果

+-------------------------+-------------------------+-------------------------+

| Row                     | Column                  | Cell                    |

+-------------------------+-------------------------+-------------------------+

| 118.170.101.250         | http:agent              | Mozilla/4.0 (compatible;|

|                         |                         |  MSIE 4.01; Windows 95) |

+-------------------------+-------------------------+-------------------------+

| 118.170.101.250         | http:bytesize           | 318                     |

+-------------------------+-------------------------+-------------------------+

..........(skip)........

+-------------------------+-------------------------+-------------------------+

| 87.65.93.58             | http:method             | OPTIONS                 |

+-------------------------+-------------------------+-------------------------+

| 87.65.93.58             | http:protocol           | HTTP/1.1                |

+-------------------------+-------------------------+-------------------------+

| 87.65.93.58             | referrer:-              | *                       |

+-------------------------+-------------------------+-------------------------+

| 87.65.93.58             | url:*                   | -                       |

+-------------------------+-------------------------+-------------------------+

31 row(s) in set. (0.58 sec)


LogParser.java

package tw.org.nchc.code;

import java.text.ParseException;

import java.text.SimpleDateFormat;

import java.util.Locale;

import java.util.StringTokenizer;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

public class LogParser {

  private String ip;

  private String protocol;

  private String method;

  private String url;

  private String code;

  private String byteSize;

  private String referrer;

  private String agent;

  private long timestamp;

  private static Pattern p = Pattern

  .compile("([^ ]*) ([^ ]*) ([^ ]*) \\[([^]]*)\\] \"([^\"]*)\"" +

                  " ([^ ]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\".*");

  public LogParser(String line) throws ParseException, Exception{ 

	 Matcher matcher = p.matcher(line);

	 if(matcher.matches()){

		 this.ip = matcher.group(1);

		 // IP address of the client requesting the web page.

		 if(isIpAddress(ip)){

			 SimpleDateFormat sdf = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z",Locale.US);

			 this.timestamp = sdf.parse(matcher.group(4)).getTime();

			 String[] http = matcher.group(5).split(" ");

			 this.method = http[0];

			 this.url = http[1];

			 this.protocol = http[2];

			 this.code = matcher.group(6);

			 this.byteSize = matcher.group(7);

			 this.referrer = matcher.group(8);

			 this.agent = matcher.group(9);

		 }

	 }

  }

  public static boolean isIpAddress(String inputString) {

    StringTokenizer tokenizer = new StringTokenizer(inputString, ".");

    if (tokenizer.countTokens() != 4) {

      return false;

    }

    try {

      for (int i = 0; i < 4; i++) {

        String t = tokenizer.nextToken();

        int chunk = Integer.parseInt(t);

        if ((chunk & 255) != chunk) {

          return false;

        }

      }

    } catch (NumberFormatException e) {

      return false;

    }

    if (inputString.indexOf("..") >= 0) {

      return false;

    }

    return true;

  }

  public String getIp() {

    return ip;

  }

  public String getProtocol() {

    return protocol;

  }

  public String getMethod() {

    return method;

  }

  public String getUrl() {

    return url;

  }

  public String getCode() {

    return code;

  }

  public String getByteSize() {

    return byteSize;

  }

  public String getReferrer() {

    return referrer;

  }

  public String getAgent() {

    return agent;

  }

  public long getTimestamp() {

    return timestamp;

  }

}