source: sample/hadoop-0.16/tw/org/nchc/code/WordCount.java @ 123

Last change on this file since 123 was 31, checked in by waue, 16 years ago

update some new ..

File size: 3.6 KB
Line 
1/**
2 * Program: WordCount.java
3 * Editor: Waue Chen
4 * From :  NCHC. Taiwn
5 * Last Update Date: 07/02/2008
6 */
7
8/**
9 * Purpose :
10 *  Store the result of WordCount.java from Hbase to Hadoop file system
11 *
12 * HowToUse :
13 *  Make sure Hadoop file system is running correctly.
14 *  Put text file on the directory "/local_src/input"
15 *  You can use the instruction to upload "/local_src/input" to HDFS input dir
16 *    $ bin/hadoop dfs -put /local_src/input input
17 *  Then modify the $filepath parameter in construtor to be correct and run this code.
18 * 
19 *
20 * Check Result:
21 *  inspect http://localhost:50070 by web explorer
22 */
23package tw.org.nchc.code;
24
25import java.io.IOException;
26import java.util.Iterator;
27import java.util.StringTokenizer;
28
29import org.apache.hadoop.fs.FileSystem;
30import org.apache.hadoop.fs.Path;
31import org.apache.hadoop.io.IntWritable;
32import org.apache.hadoop.io.LongWritable;
33import org.apache.hadoop.io.Text;
34import org.apache.hadoop.mapred.JobClient;
35import org.apache.hadoop.mapred.JobConf;
36import org.apache.hadoop.mapred.MapReduceBase;
37import org.apache.hadoop.mapred.Mapper;
38import org.apache.hadoop.mapred.OutputCollector;
39import org.apache.hadoop.mapred.Reducer;
40import org.apache.hadoop.mapred.Reporter;
41
42public class WordCount {
43  private String filepath;
44
45  private String outputPath;
46
47  public WordCount() {
48    filepath = "/user/waue/input/";
49    outputPath = "counts1";
50  }
51
52  public WordCount(String path, String output) {
53    filepath = path;
54    outputPath = output;
55  }
56
57  // mapper: emits (token, 1) for every word occurrence
58  private static class MapClass extends MapReduceBase
59  implements Mapper<LongWritable, Text, Text, IntWritable> 
60  {
61
62    // reuse objects to save overhead of object creation
63    private final static IntWritable one = new IntWritable(1);
64
65    private Text word = new Text();
66
67    public void map(LongWritable key, Text value,
68        OutputCollector<Text, IntWritable> output, Reporter reporter)
69        throws IOException {
70      String line = ((Text) value).toString();
71      StringTokenizer itr = new StringTokenizer(line);
72      while (itr.hasMoreTokens()) {
73        word.set(itr.nextToken());
74        output.collect(word, one);
75      }
76    }
77  }
78
79  // reducer: sums up all the counts
80  private static class ReduceClass extends MapReduceBase
81  implements Reducer<Text, IntWritable, Text, IntWritable> 
82  {
83
84    // reuse objects
85    private final static IntWritable SumValue = new IntWritable();
86
87    public void reduce(Text key, Iterator<IntWritable> values,
88        OutputCollector<Text, IntWritable> output, Reporter reporter)
89        throws IOException {
90      // sum up values
91      int sum = 0;
92      while (values.hasNext()) {
93        sum += values.next().get();
94      }
95      SumValue.set(sum);
96      output.collect(key, SumValue);
97    }
98  }
99
100  /**
101   * Runs the demo.
102   */
103  public static void main(String[] args) throws IOException {
104    WordCount wc = new WordCount();
105
106    int mapTasks = 1;
107    int reduceTasks = 1;
108    JobConf conf = new JobConf(WordCount.class);
109//    conf.setJobName("wordcount");
110
111    conf.setNumMapTasks(mapTasks);
112    conf.setNumReduceTasks(reduceTasks);
113   
114    conf.setInputPath(new Path(wc.filepath));
115
116    conf.setOutputKeyClass(Text.class);
117    conf.setOutputValueClass(IntWritable.class);
118
119    conf.setOutputPath(new Path(wc.outputPath));
120
121    conf.setMapperClass(MapClass.class);
122//    conf.setCombinerClass(ReduceClass.class);
123    conf.setReducerClass(ReduceClass.class);
124
125    // Delete the output directory if it exists already
126    Path outputDir = new Path(wc.outputPath);
127    FileSystem.get(conf).delete(outputDir);
128    JobClient.runJob(conf);
129  }
130}
Note: See TracBrowser for help on using the repository browser.