source: sample/hadoop-0.17/tw/org/nchc/code/WordCount.java @ 192

Last change on this file since 192 was 20, checked in by waue, 16 years ago

將改完的 hadoop 0.17版package 放來備份
目前繼續開發 hadoop 0.16 + hbase 1.3

File size: 3.7 KB
Line 
1/**
2 * Program: WordCount.java
3 * Editor: Waue Chen
4 * From :  NCHC. Taiwn
5 * Last Update Date: 07/02/2008
6 * Upgrade to 0.17
7 */
8
9/**
10 * Purpose :
11 *  Store the result of WordCount.java from Hbase to Hadoop file system
12 *
13 * HowToUse :
14 *  Make sure Hadoop file system is running correctly.
15 *  Put text file on the directory "/local_src/input"
16 *  You can use the instruction to upload "/local_src/input" to HDFS input dir
17 *    $ bin/hadoop dfs -put /local_src/input input
18 *  Then modify the $filepath parameter in construtor to be correct and run this code.
19 * 
20 *
21 * Check Result:
22 *  inspect http://localhost:50070 by web explorer
23 */
24package tw.org.nchc.code;
25
26import java.io.IOException;
27import java.util.Iterator;
28import java.util.StringTokenizer;
29
30import org.apache.hadoop.fs.FileSystem;
31import org.apache.hadoop.fs.Path;
32import org.apache.hadoop.io.IntWritable;
33import org.apache.hadoop.io.LongWritable;
34import org.apache.hadoop.io.Text;
35import org.apache.hadoop.mapred.JobClient;
36import org.apache.hadoop.mapred.JobConf;
37import org.apache.hadoop.mapred.MapReduceBase;
38import org.apache.hadoop.mapred.Mapper;
39import org.apache.hadoop.mapred.OutputCollector;
40import org.apache.hadoop.mapred.Reducer;
41import org.apache.hadoop.mapred.Reporter;
42
43public class WordCount {
44  private String filepath;
45
46  private String outputPath;
47
48  public WordCount() {
49    filepath = "/user/waue/input/";
50    outputPath = "counts1";
51  }
52
53  public WordCount(String path, String output) {
54    filepath = path;
55    outputPath = output;
56  }
57
58  // mapper: emits (token, 1) for every word occurrence
59  private static class MapClass extends MapReduceBase implements
60      Mapper<LongWritable, Text, Text, IntWritable> {
61
62    // reuse objects to save overhead of object creation
63    private final static IntWritable one = new IntWritable(1);
64
65    private Text word = new Text();
66
67    public void map(LongWritable key, Text value,
68        OutputCollector<Text, IntWritable> output, Reporter reporter)
69        throws IOException {
70      String line = ((Text) value).toString();
71      StringTokenizer itr = new StringTokenizer(line);
72      while (itr.hasMoreTokens()) {
73        word.set(itr.nextToken());
74        output.collect(word, one);
75      }
76    }
77  }
78
79  // reducer: sums up all the counts
80  private static class ReduceClass extends MapReduceBase implements
81      Reducer<Text, IntWritable, Text, IntWritable> {
82
83    // reuse objects
84    private final static IntWritable SumValue = new IntWritable();
85
86    public void reduce(Text key, Iterator<IntWritable> values,
87        OutputCollector<Text, IntWritable> output, Reporter reporter)
88        throws IOException {
89      // sum up values
90      int sum = 0;
91      while (values.hasNext()) {
92        sum += values.next().get();
93      }
94      SumValue.set(sum);
95      output.collect(key, SumValue);
96    }
97  }
98
99  /**
100   * Runs the demo.
101   */
102  public static void main(String[] args) throws IOException {
103    WordCount wc = new WordCount();
104
105    int mapTasks = 1;
106    int reduceTasks = 1;
107    JobConf conf = new JobConf(WordCount.class);
108    conf.setJobName("wordcount");
109
110    conf.setNumMapTasks(mapTasks);
111    conf.setNumReduceTasks(reduceTasks);
112    // 0.16
113    // conf.setInputPath(new Path(wc.filepath));
114    Convert.setInputPath(conf, new Path(wc.filepath));
115    conf.setOutputKeyClass(Text.class);
116    conf.setOutputValueClass(IntWritable.class);
117    // 0.16
118    // conf.setOutputPath(new Path(wc.outputPath));
119    Convert.setOutputPath(conf, new Path(wc.outputPath));
120
121    conf.setMapperClass(MapClass.class);
122    conf.setCombinerClass(ReduceClass.class);
123    conf.setReducerClass(ReduceClass.class);
124
125    // Delete the output directory if it exists already
126    Path outputDir = new Path(wc.outputPath);
127    // 0.16
128    FileSystem.get(conf).delete(outputDir,true);
129
130    JobClient.runJob(conf);
131  }
132}
Note: See TracBrowser for help on using the repository browser.