Ignore:
Timestamp:
Jul 17, 2008, 3:35:18 PM (16 years ago)
Author:
waue
Message:

update some new ..

Location:
sample/hadoop-0.16/tw/org/nchc
Files:
12 edited

Legend:

Unmodified
Added
Removed
  • sample/hadoop-0.16/tw/org/nchc/code/LogParser.java

    r30 r31  
     1/**
     2 * Program: LogParser.java
     3 * Editor: Waue Chen
     4 * From :  NCHC. Taiwn
     5 * Last Update Date: 07/02/2008
     6 */
     7
    18package tw.org.nchc.code;
    29
     
    1017
    1118
    12 public class AccessLogParser {
     19public class LogParser {
    1320  private String ip;
    1421  private String protocol;
     
    2734 
    2835 
    29   public AccessLogParser(String line) throws ParseException, Exception{
     36  public LogParser(String line) throws ParseException, Exception{
    3037   
    3138   Matcher matcher = p.matcher(line);
  • sample/hadoop-0.16/tw/org/nchc/code/LogParserGo.java

    r30 r31  
    11/**
    2  * Program: LogFetcher.java
     2 * Program: LogParserGo.java
    33 * Editor: Waue Chen
    44 * From :  NCHC. Taiwn
     
    66 */
    77/**
    8  * Copyright 2007 The Apache Software Foundation
    9  *
    10  * Licensed to the Apache Software Foundation (ASF) under one
    11  * or more contributor license agreements.  See the NOTICE file
    12  * distributed with this work for additional information
    13  * regarding copyright ownership.  The ASF licenses this file
    14  * to you under the Apache License, Version 2.0 (the
    15  * "License"); you may not use this file except in compliance
    16  * with the License.  You may obtain a copy of the License at
    17  *
    18  *     http://www.apache.org/licenses/LICENSE-2.0
    19  *
    20  * Unless required by applicable law or agreed to in writing, software
    21  * distributed under the License is distributed on an "AS IS" BASIS,
    22  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    23  * See the License for the specific language governing permissions and
    24  * limitations under the License.
     8 * Purpose :
     9 *  This program will parse your apache log and store it into Hbase.
     10 *
     11 * HowToUse :
     12 *  Make sure two thing :
     13 *  1. Upload apache logs ( /var/log/apache2/access.log* ) to \
     14 *    hdfs (default: /user/waue/apache-log) \
     15 *   $ bin/hadoop dfs -put /var/log/apache2/ apache-log
     16 *  2. parameter "dir" in main contains the logs.
     17 *  3. you should filter the exception contents manually, \
     18 *    ex:  ::1 - - [29/Jun/2008:07:35:15 +0800] "GET / HTTP/1.0" 200 729 "...
     19 * 
     20 * Check Result:
     21 *  Go to hbase console, type :
     22 *    hql > select * from apache-log;
     23
     24+-------------------------+-------------------------+-------------------------+
     25| Row                     | Column                  | Cell                    |
     26+-------------------------+-------------------------+-------------------------+
     27| 118.170.101.250         | http:agent              | Mozilla/4.0 (compatible;|
     28|                         |                         |  MSIE 4.01; Windows 95) |
     29+-------------------------+-------------------------+-------------------------+
     30| 118.170.101.250         | http:bytesize           | 318                     |
     31+-------------------------+-------------------------+-------------------------+
     32..........(skip)........
     33+-------------------------+-------------------------+-------------------------+
     34| 87.65.93.58             | http:method             | OPTIONS                 |
     35+-------------------------+-------------------------+-------------------------+
     36| 87.65.93.58             | http:protocol           | HTTP/1.1                |
     37+-------------------------+-------------------------+-------------------------+
     38| 87.65.93.58             | referrer:-              | *                       |
     39+-------------------------+-------------------------+-------------------------+
     40| 87.65.93.58             | url:*                   | -                       |
     41+-------------------------+-------------------------+-------------------------+
     4231 row(s) in set. (0.58 sec)
     43
     44
     45
    2546 */
    2647package tw.org.nchc.code;
    2748
    2849import java.io.IOException;
    29 import java.text.ParseException;
    3050
    3151import org.apache.hadoop.fs.FileStatus;
     
    4767import org.apache.hadoop.mapred.OutputCollector;
    4868import org.apache.hadoop.mapred.Reporter;
     69
    4970// import AccessLogParser
    5071/**
     
    5273 * W3CExtended, IISw3cExtended)
    5374 */
    54 public class LogFetcher {
     75public class LogParserGo {
    5576  static HBaseConfiguration conf = new HBaseConfiguration();
    5677
     
    6081
    6182  static HTable table = null;
    62 
    63   static boolean eclipseRun = false;
    64 
     83 
     84  static void print(String str){
     85    System.out.println("STR  = "+str);
     86  }
    6587  public static class MapClass extends MapReduceBase implements
    6688      Mapper<WritableComparable, Text, Text, Writable> {
    6789
    6890    @Override
     91    // MapReduceBase.configure(JobConf job)
     92    // Default implementation that does nothing.
    6993    public void configure(JobConf job) {
     94      // String get(String name,String defaultValue)
     95      // Get the value of the name property. If no such property exists,\
     96      //  then defaultValue is returned.
    7097      tableName = job.get(TABLE, "");
    7198    }
     
    74101        OutputCollector<Text, Writable> output, Reporter reporter)
    75102        throws IOException {
     103     
    76104      try {
     105        /*
     106        print(value.toString());
     107        FileWriter out = new FileWriter(new File(
     108        "/home/waue/mr-result.txt"));
     109        out.write(value.toString());
     110        out.flush();
     111        out.close();
     112        */
     113        LogParser log = new LogParser(value.toString());
    77114       
    78         AccessLogParser log = new AccessLogParser(value.toString());
    79115        if (table == null)
    80116          table = new HTable(conf, new Text(tableName));
     
    95131            log.getUrl().getBytes());
    96132        table.commit(lockId, log.getTimestamp());
    97       } catch (ParseException e) {
    98         e.printStackTrace();
     133       
    99134      } catch (Exception e) {
    100135        e.printStackTrace();
    101136      }
    102     }
    103   }
    104 //   do it to resolve warning : FileSystem.listPaths
    105   static public Path[] listPaths(FileSystem fsm,Path path) throws IOException
    106   {
     137     
     138    }
     139  }
     140
     141  // do it to resolve warning : FileSystem.listPaths
     142  static public Path[] listPaths(FileSystem fsm, Path path)
     143      throws IOException {
    107144    FileStatus[] fss = fsm.listStatus(path);
    108145    int length = fss.length;
    109146    Path[] pi = new Path[length];
    110     for (int i=0 ; i< length; i++)
    111     {
     147    for (int i = 0; i < length; i++) {
    112148      pi[i] = fss[i].getPath();
    113149    }
    114150    return pi;
    115   }
     151  }
     152
    116153  public static void runMapReduce(String table, String dir)
    117154      throws IOException {
    118     Path tempDir = new Path("log/temp");
     155    Path tempDir = new Path("/tmp/Mylog/");
    119156    Path InputDir = new Path(dir);
    120157    FileSystem fs = FileSystem.get(conf);
    121     JobConf jobConf = new JobConf(conf, LogFetcher.class);
     158    JobConf jobConf = new JobConf(conf, LogParserGo.class);
    122159    jobConf.setJobName("apache log fetcher");
    123160    jobConf.set(TABLE, table);
     
    140177    }
    141178    jobConf.setOutputPath(tempDir);
    142    
     179
    143180    jobConf.setMapperClass(MapClass.class);
    144181
     
    150187    JobClient.runJob(jobConf);
    151188
    152     fs.delete(tempDir);   
     189    fs.delete(tempDir);
    153190    fs.close();
    154191  }
     
    171208
    172209  public static void main(String[] args) throws IOException {
    173     String table_name = "log";
    174     String dir = "apache-log";
    175 
    176     if (eclipseRun) {
    177       table_name = "log";
    178       dir = "apache-log";
    179     } else if (args.length < 2) {
    180       System.out
    181           .println("Usage: logfetcher <access_log file or directory> <table_name>");
    182       System.exit(1);
    183     } else {
    184       table_name = args[1];
    185       dir = args[0];
    186     }
     210    String table_name = "apache-log2";
     211    String dir = "/user/waue/apache-log";
     212   
     213    // if (eclipseRun) {
     214    // table_name = "log";
     215    // dir = "apache-log";
     216    // } else if (args.length < 2) {
     217    // System.out
     218    // .println("Usage: logfetcher <access_log file or directory>
     219    // <table_name>");
     220    // System.exit(1);
     221    // } else {
     222    // table_name = args[1];
     223    // dir = args[0];
     224    // }
     225
    187226    creatTable(table_name);
    188227    runMapReduce(table_name, dir);
  • sample/hadoop-0.16/tw/org/nchc/code/WordCount.java

    r27 r31  
    5656
    5757  // mapper: emits (token, 1) for every word occurrence
    58   private static class MapClass extends MapReduceBase implements
    59       Mapper<LongWritable, Text, Text, IntWritable> {
     58  private static class MapClass extends MapReduceBase
     59  implements Mapper<LongWritable, Text, Text, IntWritable>
     60  {
    6061
    6162    // reuse objects to save overhead of object creation
     
    7778
    7879  // reducer: sums up all the counts
    79   private static class ReduceClass extends MapReduceBase implements
    80       Reducer<Text, IntWritable, Text, IntWritable> {
     80  private static class ReduceClass extends MapReduceBase
     81  implements Reducer<Text, IntWritable, Text, IntWritable>
     82  {
    8183
    8284    // reuse objects
     
    105107    int reduceTasks = 1;
    106108    JobConf conf = new JobConf(WordCount.class);
    107     conf.setJobName("wordcount");
     109//    conf.setJobName("wordcount");
    108110
    109111    conf.setNumMapTasks(mapTasks);
     
    118120
    119121    conf.setMapperClass(MapClass.class);
    120     conf.setCombinerClass(ReduceClass.class);
     122//    conf.setCombinerClass(ReduceClass.class);
    121123    conf.setReducerClass(ReduceClass.class);
    122124
  • sample/hadoop-0.16/tw/org/nchc/code/WordCountIntoHBase.java

    r27 r31  
    4545  // $Input_Path. Please make sure the path is correct and contains input
    4646  // files
    47   static final String Input_Path = "/user/waue/simple";
     47  static final String Input_Path = "/user/waue/input";
    4848
    4949  // Hbase table name, the program will create it
  • sample/hadoop-0.16/tw/org/nchc/demo/LogFetcher.java

    r29 r31  
    66 */
    77/**
    8  * Copyright 2007 The Apache Software Foundation
    9  *
    10  * Licensed to the Apache Software Foundation (ASF) under one
    11  * or more contributor license agreements.  See the NOTICE file
    12  * distributed with this work for additional information
    13  * regarding copyright ownership.  The ASF licenses this file
    14  * to you under the Apache License, Version 2.0 (the
    15  * "License"); you may not use this file except in compliance
    16  * with the License.  You may obtain a copy of the License at
    17  *
    18  *     http://www.apache.org/licenses/LICENSE-2.0
    19  *
    20  * Unless required by applicable law or agreed to in writing, software
    21  * distributed under the License is distributed on an "AS IS" BASIS,
    22  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    23  * See the License for the specific language governing permissions and
    24  * limitations under the License.
     8 * Purpose :
     9 *  This program will parse your apache log and store it into Hbase.
     10 *
     11 * HowToUse :
     12 *  Make sure two thing :
     13 *  1. Upload apache logs ( /var/log/apache2/access.log* ) to \
     14 *    hdfs (default: /user/waue/apache-log) \
     15 *   $ bin/hadoop dfs -put /var/log/apache2/ apache-log
     16 *  2. parameter "dir" in main contains the logs.
     17 *  3. you should filter the exception contents manually, \
     18 *    ex:  ::1 - - [29/Jun/2008:07:35:15 +0800] "GET / HTTP/1.0" 200 729 "...
     19 * 
     20 * Check Result:
     21 *  Go to hbase console, type :
     22 *    hql > select * from apache-log;
     23
     24+-------------------------+-------------------------+-------------------------+
     25| Row                     | Column                  | Cell                    |
     26+-------------------------+-------------------------+-------------------------+
     27| 118.170.101.250         | http:agent              | Mozilla/4.0 (compatible;|
     28|                         |                         |  MSIE 4.01; Windows 95) |
     29+-------------------------+-------------------------+-------------------------+
     30| 118.170.101.250         | http:bytesize           | 318                     |
     31+-------------------------+-------------------------+-------------------------+
     32..........(skip)........
     33+-------------------------+-------------------------+-------------------------+
     34| 87.65.93.58             | http:method             | OPTIONS                 |
     35+-------------------------+-------------------------+-------------------------+
     36| 87.65.93.58             | http:protocol           | HTTP/1.1                |
     37+-------------------------+-------------------------+-------------------------+
     38| 87.65.93.58             | referrer:-              | *                       |
     39+-------------------------+-------------------------+-------------------------+
     40| 87.65.93.58             | url:*                   | -                       |
     41+-------------------------+-------------------------+-------------------------+
     4231 row(s) in set. (0.58 sec)
     43
    2544 */
     45
     46
    2647package tw.org.nchc.demo;
    2748
Note: See TracChangeset for help on using the changeset viewer.