source: sample/hadoop-0.16/tw/org/nchc/code/WordCount.java @ 27

Last change on this file since 27 was 27, checked in by waue, 16 years ago

test!

File size: 3.6 KB
Line 
1/**
2 * Program: WordCount.java
3 * Editor: Waue Chen
4 * From :  NCHC. Taiwn
5 * Last Update Date: 07/02/2008
6 */
7
8/**
9 * Purpose :
10 *  Store the result of WordCount.java from Hbase to Hadoop file system
11 *
12 * HowToUse :
13 *  Make sure Hadoop file system is running correctly.
14 *  Put text file on the directory "/local_src/input"
15 *  You can use the instruction to upload "/local_src/input" to HDFS input dir
16 *    $ bin/hadoop dfs -put /local_src/input input
17 *  Then modify the $filepath parameter in construtor to be correct and run this code.
18 * 
19 *
20 * Check Result:
21 *  inspect http://localhost:50070 by web explorer
22 */
23package tw.org.nchc.code;
24
25import java.io.IOException;
26import java.util.Iterator;
27import java.util.StringTokenizer;
28
29import org.apache.hadoop.fs.FileSystem;
30import org.apache.hadoop.fs.Path;
31import org.apache.hadoop.io.IntWritable;
32import org.apache.hadoop.io.LongWritable;
33import org.apache.hadoop.io.Text;
34import org.apache.hadoop.mapred.JobClient;
35import org.apache.hadoop.mapred.JobConf;
36import org.apache.hadoop.mapred.MapReduceBase;
37import org.apache.hadoop.mapred.Mapper;
38import org.apache.hadoop.mapred.OutputCollector;
39import org.apache.hadoop.mapred.Reducer;
40import org.apache.hadoop.mapred.Reporter;
41
42public class WordCount {
43  private String filepath;
44
45  private String outputPath;
46
47  public WordCount() {
48    filepath = "/user/waue/input/";
49    outputPath = "counts1";
50  }
51
52  public WordCount(String path, String output) {
53    filepath = path;
54    outputPath = output;
55  }
56
57  // mapper: emits (token, 1) for every word occurrence
58  private static class MapClass extends MapReduceBase implements
59      Mapper<LongWritable, Text, Text, IntWritable> {
60
61    // reuse objects to save overhead of object creation
62    private final static IntWritable one = new IntWritable(1);
63
64    private Text word = new Text();
65
66    public void map(LongWritable key, Text value,
67        OutputCollector<Text, IntWritable> output, Reporter reporter)
68        throws IOException {
69      String line = ((Text) value).toString();
70      StringTokenizer itr = new StringTokenizer(line);
71      while (itr.hasMoreTokens()) {
72        word.set(itr.nextToken());
73        output.collect(word, one);
74      }
75    }
76  }
77
78  // reducer: sums up all the counts
79  private static class ReduceClass extends MapReduceBase implements
80      Reducer<Text, IntWritable, Text, IntWritable> {
81
82    // reuse objects
83    private final static IntWritable SumValue = new IntWritable();
84
85    public void reduce(Text key, Iterator<IntWritable> values,
86        OutputCollector<Text, IntWritable> output, Reporter reporter)
87        throws IOException {
88      // sum up values
89      int sum = 0;
90      while (values.hasNext()) {
91        sum += values.next().get();
92      }
93      SumValue.set(sum);
94      output.collect(key, SumValue);
95    }
96  }
97
98  /**
99   * Runs the demo.
100   */
101  public static void main(String[] args) throws IOException {
102    WordCount wc = new WordCount();
103
104    int mapTasks = 1;
105    int reduceTasks = 1;
106    JobConf conf = new JobConf(WordCount.class);
107    conf.setJobName("wordcount");
108
109    conf.setNumMapTasks(mapTasks);
110    conf.setNumReduceTasks(reduceTasks);
111   
112    conf.setInputPath(new Path(wc.filepath));
113
114    conf.setOutputKeyClass(Text.class);
115    conf.setOutputValueClass(IntWritable.class);
116
117    conf.setOutputPath(new Path(wc.outputPath));
118
119    conf.setMapperClass(MapClass.class);
120    conf.setCombinerClass(ReduceClass.class);
121    conf.setReducerClass(ReduceClass.class);
122
123    // Delete the output directory if it exists already
124    Path outputDir = new Path(wc.outputPath);
125    FileSystem.get(conf).delete(outputDir);
126    JobClient.runJob(conf);
127  }
128}
Note: See TracBrowser for help on using the repository browser.