Context Navigation

Changes between Version 15 and Version 16 of LogParser

Timestamp:: Jul 7, 2008, 5:51:16 PM (17 years ago)
Author:: waue
Comment:: --

Legend:

: Unmodified
: Added
: Removed
: Modified

LogParser

-                      v15
+                      v16
  = !LogParserGo.java =
+{{{
+public class LogParserGo {
+        static HBaseConfiguration conf = new HBaseConfiguration();
+        public static final String TABLE = "table.name";
+        static String tableName;
+        static HTable table = null;
+        public static class MapClass;
+        static public Path[] listPaths(FileSystem fsm, Path path);
+        public static void runMapReduce(String table, String dir);
+        public static void creatTable(String table) ;
+        public static void main(String[] args) ;
+}}}
+LogParserGo共宣告了以下幾個全域變數及方法：
+HBaseConfiguration conf為重要的控制設定參數，其定義了很多方法可以設定或取得map reduce程式運作所需要的值
+定義 TABLE 為 "table.name"，table.name為 name property
+string tableName 為資料表名稱
+Htable table 在定義一個HBase的操作變數
+class MapClass 為實做map的一個內部類別
+Path[] listPaths 是個可以列出指定路徑下的檔案和目錄，原本0.16 api即宣告 Deprecated，因此為了解決warning在此實做
+void runMapReduce(String table, String dir) 跑MapReduce的程序
+void creatTable(String table)  建立hbase的資料表
+void main(String[] args)  main 函數
+~4為變數較為單純，之後將說明5~9的函數功能
+------------------------------------
 {{{
         public static class MapClass extends MapReduceBase implements
 …
+        }
 }}}
+此內部類別繼承了 [http://hadoop.apache.org/core/docs/r0.16.4/api/org/apache/hadoop/mapred/MapReduceBase.html org.apache.hadoop.mapred.MapReduceBase] ，並實做Mapper<WritableComparable, Text, Text, Writable> 介面，
+不見得所有map reduce程式都需要實做此介面，但若有要讓map能分配工作就需要寫在下面此函數中：[[BR]]
+map(WritableComparable key, Text value, OutputCollector<Text, Writable> output, Reporter reporter) [[BR]]
+變數key為hbase中的row key，value則為值，output 可以透過collect() 功能將值寫入hbase的table中。但在此範例中，
+並沒有用到 output的寫入方式，reporter也沒有用到。[[br]]
+此方法因為有IO的存取，因此要宣告trows IOException, 且用try來起始。[[br]]
+首先LogParser log = new LogParser(value.toString()); value的值為要parser的內容的某一行，因為基於hdfs的map-reduce架構上，hadoop會幫我們把資料整合起來，因此程式的邏輯只要處理好這一行即可。LogParser 在下面會介紹到，目前只要知道log物件是原始資料value透過 LogParser 處理過的產物。透過log物件的方法getIP,getProtocol(),...等，我們可以輕易取得需要的資料，用table.put( Row_Key , Column_Qualify_Name , Value) 方法將Value值填入Row_Key中的Column_Qualify_Name欄位中。接著研究table物件。[[br]]
+table是全域變數之一，由 [http://hadoop.apache.org/hbase/docs/current/api/org/apache/hadoop/hbase/HTable.html org.apache.hadoop.hbase.HTable] 類別定義。產生出HTable物件'''必定要'''給兩個初始化的值，一個是另一個全域變數也是重要的設定檔conf，另一個是tableName也就是資料表的名稱
+configure(jobConf conf) 此為override org.apache.hadoop.mapred.MapReduceBase.configure(JobConf )
+內容只是取得並回傳Table的名字而已
+------------------------------
+{{{
+        static public Path[] listPaths(FileSystem fsm, Path path)
+                        throws IOException {
+                FileStatus[] fss = fsm.listStatus(path);
+                int length = fss.length;
+                Path[] pi = new Path[length];
+                for (int i = 0; i < length; i++) {
+                        pi[i] = fss[i].getPath();
+                }
+                return pi;
+        }
+}}}
+{{{
+        public static void runMapReduce(String table, String dir)
+                        throws IOException {
+                Path tempDir = new Path("/tmp/Mylog/");
+                Path InputDir = new Path(dir);
+                FileSystem fs = FileSystem.get(conf);
+                JobConf jobConf = new JobConf(conf, LogParserGo.class);
+                jobConf.setJobName("apache log fetcher");
+                jobConf.set(TABLE, table);
+                Path[] in = listPaths(fs, InputDir);
+                if (fs.isFile(InputDir)) {
+                        jobConf.setInputPath(InputDir);
+                } else {
+                        for (int i = 0; i < in.length; i++) {
+                                if (fs.isFile(in[i])) {
+                                        jobConf.addInputPath(in[i]);
+                                } else {
+                                        Path[] sub = listPaths(fs, in[i]);
+                                        for (int j = 0; j < sub.length; j++) {
+                                                if (fs.isFile(sub[j])) {
+                                                        jobConf.addInputPath(sub[j]);
+                                                }
+                                        }
+                                }
+                        }
+                }
+                jobConf.setOutputPath(tempDir);
+                jobConf.setMapperClass(MapClass.class);
+                JobClient client = new JobClient(jobConf);
+                ClusterStatus cluster = client.getClusterStatus();
+                jobConf.setNumMapTasks(cluster.getMapTasks());
+                jobConf.setNumReduceTasks(0);
+                JobClient.runJob(jobConf);
+                fs.delete(tempDir);
+                fs.close();
+        }
+}}}
+{{{
+        public static void creatTable(String table) throws IOException {
+                HBaseAdmin admin = new HBaseAdmin(conf);
+                if (!admin.tableExists(new Text(table))) {
+                        System.out.println("1. " + table
+                                        + " table creating ... please wait");
+                        HTableDescriptor tableDesc = new HTableDescriptor(table);
+                        tableDesc.addFamily(new HColumnDescriptor("http:"));
+                        tableDesc.addFamily(new HColumnDescriptor("url:"));
+                        tableDesc.addFamily(new HColumnDescriptor("referrer:"));
+                        admin.createTable(tableDesc);
+                } else {
+                        System.out.println("1. " + table + " table already exists.");
+                }
+                System.out.println("2. access_log files fetching using map/reduce");
+        }
+}}}
+{{{
+        public static void main(String[] args) throws IOException {
+                String table_name = "apache-log2";
+                String dir = "/user/waue/apache-log";
+                creatTable(table_name);
+                runMapReduce(table_name, dir);
+        }
+}
+}}}
  = LogParser.java =