source: sample/WordCount.java @ 18

Last change on this file since 18 was 18, checked in by waue, 16 years ago

upgrade 0.16 to 0.17

File size: 3.7 KB
RevLine 
[9]1/**
2 * Program: WordCount.java
3 * Editor: Waue Chen
4 * From :  NCHC. Taiwn
[18]5 * Last Update Date: 07/02/2008
6 * Upgrade to 0.17
[7]7 */
[9]8
9/**
10 * Purpose :
11 *  Store the result of WordCount.java from Hbase to Hadoop file system
12 *
13 * HowToUse :
14 *  Make sure Hadoop file system is running correctly.
15 *  Put text file on the directory "/local_src/input"
16 *  You can use the instruction to upload "/local_src/input" to HDFS input dir
17 *    $ bin/hadoop dfs -put /local_src/input input
18 *  Then modify the $filepath parameter in construtor to be correct and run this code.
19 * 
20 *
21 * Check Result:
22 *  inspect http://localhost:50070 by web explorer
23 */
[8]24package tw.org.nchc.code;
[7]25
26import java.io.IOException;
27import java.util.Iterator;
28import java.util.StringTokenizer;
29
30import org.apache.hadoop.fs.FileSystem;
31import org.apache.hadoop.fs.Path;
32import org.apache.hadoop.io.IntWritable;
33import org.apache.hadoop.io.LongWritable;
34import org.apache.hadoop.io.Text;
35import org.apache.hadoop.mapred.JobClient;
36import org.apache.hadoop.mapred.JobConf;
37import org.apache.hadoop.mapred.MapReduceBase;
38import org.apache.hadoop.mapred.Mapper;
39import org.apache.hadoop.mapred.OutputCollector;
40import org.apache.hadoop.mapred.Reducer;
41import org.apache.hadoop.mapred.Reporter;
42
43public class WordCount {
[9]44  private String filepath;
[18]45
[9]46  private String outputPath;
[18]47
48  public WordCount() {
[9]49    filepath = "/user/waue/input/";
50    outputPath = "counts1";
51  }
[18]52
53  public WordCount(String path, String output) {
[9]54    filepath = path;
55    outputPath = output;
56  }
[18]57
[7]58  // mapper: emits (token, 1) for every word occurrence
59  private static class MapClass extends MapReduceBase implements
60      Mapper<LongWritable, Text, Text, IntWritable> {
61
62    // reuse objects to save overhead of object creation
63    private final static IntWritable one = new IntWritable(1);
[18]64
[7]65    private Text word = new Text();
66
67    public void map(LongWritable key, Text value,
68        OutputCollector<Text, IntWritable> output, Reporter reporter)
69        throws IOException {
70      String line = ((Text) value).toString();
71      StringTokenizer itr = new StringTokenizer(line);
72      while (itr.hasMoreTokens()) {
73        word.set(itr.nextToken());
74        output.collect(word, one);
75      }
76    }
77  }
78
79  // reducer: sums up all the counts
80  private static class ReduceClass extends MapReduceBase implements
81      Reducer<Text, IntWritable, Text, IntWritable> {
82
83    // reuse objects
84    private final static IntWritable SumValue = new IntWritable();
85
86    public void reduce(Text key, Iterator<IntWritable> values,
87        OutputCollector<Text, IntWritable> output, Reporter reporter)
88        throws IOException {
89      // sum up values
90      int sum = 0;
91      while (values.hasNext()) {
92        sum += values.next().get();
93      }
94      SumValue.set(sum);
95      output.collect(key, SumValue);
96    }
97  }
98
99  /**
100   * Runs the demo.
101   */
102  public static void main(String[] args) throws IOException {
[9]103    WordCount wc = new WordCount();
[18]104
[9]105    int mapTasks = 1;
[7]106    int reduceTasks = 1;
107    JobConf conf = new JobConf(WordCount.class);
108    conf.setJobName("wordcount");
109
110    conf.setNumMapTasks(mapTasks);
111    conf.setNumReduceTasks(reduceTasks);
[18]112    // 0.16
113    // conf.setInputPath(new Path(wc.filepath));
114    Convert.setInputPath(conf, new Path(wc.filepath));
[7]115    conf.setOutputKeyClass(Text.class);
116    conf.setOutputValueClass(IntWritable.class);
[18]117    // 0.16
118    // conf.setOutputPath(new Path(wc.outputPath));
119    Convert.setOutputPath(conf, new Path(wc.outputPath));
[7]120
121    conf.setMapperClass(MapClass.class);
122    conf.setCombinerClass(ReduceClass.class);
123    conf.setReducerClass(ReduceClass.class);
[18]124
[7]125    // Delete the output directory if it exists already
[9]126    Path outputDir = new Path(wc.outputPath);
[18]127    // 0.16
128    FileSystem.get(conf).delete(outputDir,true);
[7]129
130    JobClient.runJob(conf);
131  }
132}
Note: See TracBrowser for help on using the repository browser.