source: sample/hadoop-0.16/tw/org/nchc/code/SnortBase.java @ 32

Last change on this file since 32 was 32, checked in by waue, 16 years ago

SnortParser? is ok ,
It will parse snort log file become my specific form.

Next step will code SnortBase? to upload the parsed file into Hbase;

File size: 7.5 KB
Line 
1/**
2 * Program: LogParserGo.java
3 * Editor: Waue Chen
4 * From :  NCHC. Taiwn
5 * Last Update Date: 07/02/2008
6 */
7/**
8 * Purpose :
9 *  This program will parse your apache log and store it into Hbase.
10 *
11 * HowToUse :
12 *  Make sure two thing :
13 *  1. Upload apache logs ( /var/log/apache2/access.log* ) to \
14 *    hdfs (default: /user/waue/apache-log) \
15 *   $ bin/hadoop dfs -put /var/log/apache2/ apache-log
16 *  2. parameter "dir" in main contains the logs.
17 *  3. you should filter the exception contents manually, \
18 *    ex:  ::1 - - [29/Jun/2008:07:35:15 +0800] "GET / HTTP/1.0" 200 729 "...
19 * 
20 * Check Result:
21 *  Go to hbase console, type :
22 *    hql > select * from apache-log;
23
24+-------------------------+-------------------------+-------------------------+
25| Row                     | Column                  | Cell                    |
26+-------------------------+-------------------------+-------------------------+
27| 118.170.101.250         | http:agent              | Mozilla/4.0 (compatible;|
28|                         |                         |  MSIE 4.01; Windows 95) |
29+-------------------------+-------------------------+-------------------------+
30| 118.170.101.250         | http:bytesize           | 318                     |
31+-------------------------+-------------------------+-------------------------+
32..........(skip)........
33+-------------------------+-------------------------+-------------------------+
34| 87.65.93.58             | http:method             | OPTIONS                 |
35+-------------------------+-------------------------+-------------------------+
36| 87.65.93.58             | http:protocol           | HTTP/1.1                |
37+-------------------------+-------------------------+-------------------------+
38| 87.65.93.58             | referrer:-              | *                       |
39+-------------------------+-------------------------+-------------------------+
40| 87.65.93.58             | url:*                   | -                       |
41+-------------------------+-------------------------+-------------------------+
4231 row(s) in set. (0.58 sec)
43
44
45
46 */
47package tw.org.nchc.code;
48
49import java.io.IOException;
50
51import org.apache.hadoop.fs.FileStatus;
52import org.apache.hadoop.fs.FileSystem;
53import org.apache.hadoop.fs.Path;
54import org.apache.hadoop.hbase.HBaseAdmin;
55import org.apache.hadoop.hbase.HBaseConfiguration;
56import org.apache.hadoop.hbase.HColumnDescriptor;
57import org.apache.hadoop.hbase.HTable;
58import org.apache.hadoop.hbase.HTableDescriptor;
59import org.apache.hadoop.io.Text;
60import org.apache.hadoop.io.Writable;
61import org.apache.hadoop.io.WritableComparable;
62import org.apache.hadoop.mapred.ClusterStatus;
63import org.apache.hadoop.mapred.JobClient;
64import org.apache.hadoop.mapred.JobConf;
65import org.apache.hadoop.mapred.MapReduceBase;
66import org.apache.hadoop.mapred.Mapper;
67import org.apache.hadoop.mapred.OutputCollector;
68import org.apache.hadoop.mapred.Reporter;
69
70// import AccessLogParser
71/**
72 * Access_log fetcher. TODO: FgnStatLog, Error_log, Access_log (Default,
73 * W3CExtended, IISw3cExtended)
74 */
75public class SnortBase {
76  static HBaseConfiguration conf = new HBaseConfiguration();
77
78  public static final String TABLE = "table.name";
79
80  static String tableName;
81
82  static HTable table = null;
83 
84  static void print(String str){
85    System.out.println("STR  = "+str);
86  }
87  public static class MapClass extends MapReduceBase implements
88      Mapper<WritableComparable, Text, Text, Writable> {
89
90    @Override
91    // MapReduceBase.configure(JobConf job)
92    // Default implementation that does nothing.
93    public void configure(JobConf job) {
94      // String get(String name,String defaultValue)
95      // Get the value of the name property. If no such property exists,\
96      //  then defaultValue is returned.
97      tableName = job.get(TABLE, "");
98    }
99
100    public void map(WritableComparable key, Text value,
101        OutputCollector<Text, Writable> output, Reporter reporter)
102        throws IOException {
103     
104      try {
105        /*
106        print(value.toString());
107        FileWriter out = new FileWriter(new File(
108        "/home/waue/mr-result.txt"));
109        out.write(value.toString());
110        out.flush();
111        out.close();
112        */
113//        SnortParser log = new SnortParser(value.toString(),0);
114       
115        if (table == null)
116          table = new HTable(conf, new Text(tableName));
117        /*
118        long lockId = table.startUpdate(new Text(log.getIp()));
119        table.put(lockId, new Text("http:protocol"), log.getProtocol()
120            .getBytes());
121        table.put(lockId, new Text("http:method"), log.getMethod()
122            .getBytes());
123        table.put(lockId, new Text("http:code"), log.getCode()
124            .getBytes());
125        table.put(lockId, new Text("http:bytesize"), log.getByteSize()
126            .getBytes());
127        table.put(lockId, new Text("http:agent"), log.getAgent()
128            .getBytes());
129        table.put(lockId, new Text("url:" + log.getUrl()), log
130            .getReferrer().getBytes());
131        table.put(lockId, new Text("referrer:" + log.getReferrer()),
132            log.getUrl().getBytes());
133        table.commit(lockId, log.getTimestamp());
134        */
135       
136      } catch (Exception e) {
137        e.printStackTrace();
138      }
139     
140    }
141  }
142
143  // do it to resolve warning : FileSystem.listPaths
144  static public Path[] listPaths(FileSystem fsm, Path path)
145      throws IOException {
146    FileStatus[] fss = fsm.listStatus(path);
147    int length = fss.length;
148    Path[] pi = new Path[length];
149    for (int i = 0; i < length; i++) {
150      pi[i] = fss[i].getPath();
151    }
152    return pi;
153  }
154
155  public static void runMapReduce(String table, String dir)
156      throws IOException {
157    Path tempDir = new Path("/tmp/Mylog/");
158    Path InputDir = new Path(dir);
159    FileSystem fs = FileSystem.get(conf);
160    JobConf jobConf = new JobConf(conf, SnortBase.class);
161    jobConf.setJobName("apache log fetcher");
162    jobConf.set(TABLE, table);
163    Path[] in = listPaths(fs, InputDir);
164    if (fs.isFile(InputDir)) {
165      jobConf.setInputPath(InputDir);
166    } else {
167      for (int i = 0; i < in.length; i++) {
168        if (fs.isFile(in[i])) {
169          jobConf.addInputPath(in[i]);
170        } else {
171          Path[] sub = listPaths(fs, in[i]);
172          for (int j = 0; j < sub.length; j++) {
173            if (fs.isFile(sub[j])) {
174              jobConf.addInputPath(sub[j]);
175            }
176          }
177        }
178      }
179    }
180    jobConf.setOutputPath(tempDir);
181    jobConf.setMapperClass(MapClass.class);
182    JobClient client = new JobClient(jobConf);
183    ClusterStatus cluster = client.getClusterStatus();
184    jobConf.setNumMapTasks(cluster.getMapTasks());
185    jobConf.setNumReduceTasks(0);
186    JobClient.runJob(jobConf);
187    fs.delete(tempDir);
188    fs.close();
189  }
190
191  public static void creatTable(String table) throws IOException {
192    HBaseAdmin admin = new HBaseAdmin(conf);
193    if (!admin.tableExists(new Text(table))) {
194      System.out.println("1. " + table
195          + " table creating ... please wait");
196      HTableDescriptor tableDesc = new HTableDescriptor(table);
197      tableDesc.addFamily(new HColumnDescriptor("http:"));
198      tableDesc.addFamily(new HColumnDescriptor("url:"));
199      tableDesc.addFamily(new HColumnDescriptor("referrer:"));
200      admin.createTable(tableDesc);
201    } else {
202      System.out.println("1. " + table + " table already exists.");
203    }
204    System.out.println("2. access_log files fetching using map/reduce");
205  }
206
207  public static void main(String[] args) throws IOException {
208    String table_name = "apache-log2";
209    String dir = "/user/waue/apache-log";
210   
211    // if (eclipseRun) {
212    // table_name = "log";
213    // dir = "apache-log";
214    // } else if (args.length < 2) {
215    // System.out
216    // .println("Usage: logfetcher <access_log file or directory>
217    // <table_name>");
218    // System.exit(1);
219    // } else {
220    // table_name = args[1];
221    // dir = args[0];
222    // }
223
224    creatTable(table_name);
225    runMapReduce(table_name, dir);
226
227  }
228
229}
Note: See TracBrowser for help on using the repository browser.