Changeset 31 for sample/hadoop-0.16/tw/org
- Timestamp:
- Jul 17, 2008, 3:35:18 PM (16 years ago)
- Location:
- sample/hadoop-0.16/tw/org/nchc
- Files:
-
- 12 edited
Legend:
- Unmodified
- Added
- Removed
-
sample/hadoop-0.16/tw/org/nchc/code/LogParser.java
r30 r31 1 /** 2 * Program: LogParser.java 3 * Editor: Waue Chen 4 * From : NCHC. Taiwn 5 * Last Update Date: 07/02/2008 6 */ 7 1 8 package tw.org.nchc.code; 2 9 … … 10 17 11 18 12 public class AccessLogParser {19 public class LogParser { 13 20 private String ip; 14 21 private String protocol; … … 27 34 28 35 29 public AccessLogParser(String line) throws ParseException, Exception{36 public LogParser(String line) throws ParseException, Exception{ 30 37 31 38 Matcher matcher = p.matcher(line); -
sample/hadoop-0.16/tw/org/nchc/code/LogParserGo.java
r30 r31 1 1 /** 2 * Program: Log Fetcher.java2 * Program: LogParserGo.java 3 3 * Editor: Waue Chen 4 4 * From : NCHC. Taiwn … … 6 6 */ 7 7 /** 8 * Copyright 2007 The Apache Software Foundation 9 * 10 * Licensed to the Apache Software Foundation (ASF) under one 11 * or more contributor license agreements. See the NOTICE file 12 * distributed with this work for additional information 13 * regarding copyright ownership. The ASF licenses this file 14 * to you under the Apache License, Version 2.0 (the 15 * "License"); you may not use this file except in compliance 16 * with the License. You may obtain a copy of the License at 17 * 18 * http://www.apache.org/licenses/LICENSE-2.0 19 * 20 * Unless required by applicable law or agreed to in writing, software 21 * distributed under the License is distributed on an "AS IS" BASIS, 22 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 * See the License for the specific language governing permissions and 24 * limitations under the License. 8 * Purpose : 9 * This program will parse your apache log and store it into Hbase. 10 * 11 * HowToUse : 12 * Make sure two thing : 13 * 1. Upload apache logs ( /var/log/apache2/access.log* ) to \ 14 * hdfs (default: /user/waue/apache-log) \ 15 * $ bin/hadoop dfs -put /var/log/apache2/ apache-log 16 * 2. parameter "dir" in main contains the logs. 17 * 3. you should filter the exception contents manually, \ 18 * ex: ::1 - - [29/Jun/2008:07:35:15 +0800] "GET / HTTP/1.0" 200 729 "... 19 * 20 * Check Result: 21 * Go to hbase console, type : 22 * hql > select * from apache-log; 23 24 +-------------------------+-------------------------+-------------------------+ 25 | Row | Column | Cell | 26 +-------------------------+-------------------------+-------------------------+ 27 | 118.170.101.250 | http:agent | Mozilla/4.0 (compatible;| 28 | | | MSIE 4.01; Windows 95) | 29 +-------------------------+-------------------------+-------------------------+ 30 | 118.170.101.250 | http:bytesize | 318 | 31 +-------------------------+-------------------------+-------------------------+ 32 ..........(skip)........ 33 +-------------------------+-------------------------+-------------------------+ 34 | 87.65.93.58 | http:method | OPTIONS | 35 +-------------------------+-------------------------+-------------------------+ 36 | 87.65.93.58 | http:protocol | HTTP/1.1 | 37 +-------------------------+-------------------------+-------------------------+ 38 | 87.65.93.58 | referrer:- | * | 39 +-------------------------+-------------------------+-------------------------+ 40 | 87.65.93.58 | url:* | - | 41 +-------------------------+-------------------------+-------------------------+ 42 31 row(s) in set. (0.58 sec) 43 44 45 25 46 */ 26 47 package tw.org.nchc.code; 27 48 28 49 import java.io.IOException; 29 import java.text.ParseException;30 50 31 51 import org.apache.hadoop.fs.FileStatus; … … 47 67 import org.apache.hadoop.mapred.OutputCollector; 48 68 import org.apache.hadoop.mapred.Reporter; 69 49 70 // import AccessLogParser 50 71 /** … … 52 73 * W3CExtended, IISw3cExtended) 53 74 */ 54 public class Log Fetcher{75 public class LogParserGo { 55 76 static HBaseConfiguration conf = new HBaseConfiguration(); 56 77 … … 60 81 61 82 static HTable table = null; 62 63 static boolean eclipseRun = false; 64 83 84 static void print(String str){ 85 System.out.println("STR = "+str); 86 } 65 87 public static class MapClass extends MapReduceBase implements 66 88 Mapper<WritableComparable, Text, Text, Writable> { 67 89 68 90 @Override 91 // MapReduceBase.configure(JobConf job) 92 // Default implementation that does nothing. 69 93 public void configure(JobConf job) { 94 // String get(String name,String defaultValue) 95 // Get the value of the name property. If no such property exists,\ 96 // then defaultValue is returned. 70 97 tableName = job.get(TABLE, ""); 71 98 } … … 74 101 OutputCollector<Text, Writable> output, Reporter reporter) 75 102 throws IOException { 103 76 104 try { 105 /* 106 print(value.toString()); 107 FileWriter out = new FileWriter(new File( 108 "/home/waue/mr-result.txt")); 109 out.write(value.toString()); 110 out.flush(); 111 out.close(); 112 */ 113 LogParser log = new LogParser(value.toString()); 77 114 78 AccessLogParser log = new AccessLogParser(value.toString());79 115 if (table == null) 80 116 table = new HTable(conf, new Text(tableName)); … … 95 131 log.getUrl().getBytes()); 96 132 table.commit(lockId, log.getTimestamp()); 97 } catch (ParseException e) { 98 e.printStackTrace(); 133 99 134 } catch (Exception e) { 100 135 e.printStackTrace(); 101 136 } 102 } 103 } 104 // do it to resolve warning : FileSystem.listPaths 105 static public Path[] listPaths(FileSystem fsm,Path path) throws IOException 106 { 137 138 } 139 } 140 141 // do it to resolve warning : FileSystem.listPaths 142 static public Path[] listPaths(FileSystem fsm, Path path) 143 throws IOException { 107 144 FileStatus[] fss = fsm.listStatus(path); 108 145 int length = fss.length; 109 146 Path[] pi = new Path[length]; 110 for (int i=0 ; i< length; i++) 111 { 147 for (int i = 0; i < length; i++) { 112 148 pi[i] = fss[i].getPath(); 113 149 } 114 150 return pi; 115 } 151 } 152 116 153 public static void runMapReduce(String table, String dir) 117 154 throws IOException { 118 Path tempDir = new Path(" log/temp");155 Path tempDir = new Path("/tmp/Mylog/"); 119 156 Path InputDir = new Path(dir); 120 157 FileSystem fs = FileSystem.get(conf); 121 JobConf jobConf = new JobConf(conf, Log Fetcher.class);158 JobConf jobConf = new JobConf(conf, LogParserGo.class); 122 159 jobConf.setJobName("apache log fetcher"); 123 160 jobConf.set(TABLE, table); … … 140 177 } 141 178 jobConf.setOutputPath(tempDir); 142 179 143 180 jobConf.setMapperClass(MapClass.class); 144 181 … … 150 187 JobClient.runJob(jobConf); 151 188 152 fs.delete(tempDir); 189 fs.delete(tempDir); 153 190 fs.close(); 154 191 } … … 171 208 172 209 public static void main(String[] args) throws IOException { 173 String table_name = "log"; 174 String dir = "apache-log"; 175 176 if (eclipseRun) { 177 table_name = "log"; 178 dir = "apache-log"; 179 } else if (args.length < 2) { 180 System.out 181 .println("Usage: logfetcher <access_log file or directory> <table_name>"); 182 System.exit(1); 183 } else { 184 table_name = args[1]; 185 dir = args[0]; 186 } 210 String table_name = "apache-log2"; 211 String dir = "/user/waue/apache-log"; 212 213 // if (eclipseRun) { 214 // table_name = "log"; 215 // dir = "apache-log"; 216 // } else if (args.length < 2) { 217 // System.out 218 // .println("Usage: logfetcher <access_log file or directory> 219 // <table_name>"); 220 // System.exit(1); 221 // } else { 222 // table_name = args[1]; 223 // dir = args[0]; 224 // } 225 187 226 creatTable(table_name); 188 227 runMapReduce(table_name, dir); -
sample/hadoop-0.16/tw/org/nchc/code/WordCount.java
r27 r31 56 56 57 57 // mapper: emits (token, 1) for every word occurrence 58 private static class MapClass extends MapReduceBase implements 59 Mapper<LongWritable, Text, Text, IntWritable> { 58 private static class MapClass extends MapReduceBase 59 implements Mapper<LongWritable, Text, Text, IntWritable> 60 { 60 61 61 62 // reuse objects to save overhead of object creation … … 77 78 78 79 // reducer: sums up all the counts 79 private static class ReduceClass extends MapReduceBase implements 80 Reducer<Text, IntWritable, Text, IntWritable> { 80 private static class ReduceClass extends MapReduceBase 81 implements Reducer<Text, IntWritable, Text, IntWritable> 82 { 81 83 82 84 // reuse objects … … 105 107 int reduceTasks = 1; 106 108 JobConf conf = new JobConf(WordCount.class); 107 conf.setJobName("wordcount");109 // conf.setJobName("wordcount"); 108 110 109 111 conf.setNumMapTasks(mapTasks); … … 118 120 119 121 conf.setMapperClass(MapClass.class); 120 conf.setCombinerClass(ReduceClass.class);122 // conf.setCombinerClass(ReduceClass.class); 121 123 conf.setReducerClass(ReduceClass.class); 122 124 -
sample/hadoop-0.16/tw/org/nchc/code/WordCountIntoHBase.java
r27 r31 45 45 // $Input_Path. Please make sure the path is correct and contains input 46 46 // files 47 static final String Input_Path = "/user/waue/ simple";47 static final String Input_Path = "/user/waue/input"; 48 48 49 49 // Hbase table name, the program will create it -
sample/hadoop-0.16/tw/org/nchc/demo/LogFetcher.java
r29 r31 6 6 */ 7 7 /** 8 * Copyright 2007 The Apache Software Foundation 9 * 10 * Licensed to the Apache Software Foundation (ASF) under one 11 * or more contributor license agreements. See the NOTICE file 12 * distributed with this work for additional information 13 * regarding copyright ownership. The ASF licenses this file 14 * to you under the Apache License, Version 2.0 (the 15 * "License"); you may not use this file except in compliance 16 * with the License. You may obtain a copy of the License at 17 * 18 * http://www.apache.org/licenses/LICENSE-2.0 19 * 20 * Unless required by applicable law or agreed to in writing, software 21 * distributed under the License is distributed on an "AS IS" BASIS, 22 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 * See the License for the specific language governing permissions and 24 * limitations under the License. 8 * Purpose : 9 * This program will parse your apache log and store it into Hbase. 10 * 11 * HowToUse : 12 * Make sure two thing : 13 * 1. Upload apache logs ( /var/log/apache2/access.log* ) to \ 14 * hdfs (default: /user/waue/apache-log) \ 15 * $ bin/hadoop dfs -put /var/log/apache2/ apache-log 16 * 2. parameter "dir" in main contains the logs. 17 * 3. you should filter the exception contents manually, \ 18 * ex: ::1 - - [29/Jun/2008:07:35:15 +0800] "GET / HTTP/1.0" 200 729 "... 19 * 20 * Check Result: 21 * Go to hbase console, type : 22 * hql > select * from apache-log; 23 24 +-------------------------+-------------------------+-------------------------+ 25 | Row | Column | Cell | 26 +-------------------------+-------------------------+-------------------------+ 27 | 118.170.101.250 | http:agent | Mozilla/4.0 (compatible;| 28 | | | MSIE 4.01; Windows 95) | 29 +-------------------------+-------------------------+-------------------------+ 30 | 118.170.101.250 | http:bytesize | 318 | 31 +-------------------------+-------------------------+-------------------------+ 32 ..........(skip)........ 33 +-------------------------+-------------------------+-------------------------+ 34 | 87.65.93.58 | http:method | OPTIONS | 35 +-------------------------+-------------------------+-------------------------+ 36 | 87.65.93.58 | http:protocol | HTTP/1.1 | 37 +-------------------------+-------------------------+-------------------------+ 38 | 87.65.93.58 | referrer:- | * | 39 +-------------------------+-------------------------+-------------------------+ 40 | 87.65.93.58 | url:* | - | 41 +-------------------------+-------------------------+-------------------------+ 42 31 row(s) in set. (0.58 sec) 43 25 44 */ 45 46 26 47 package tw.org.nchc.demo; 27 48
Note: See TracChangeset
for help on using the changeset viewer.