Changes between Initial Version and Version 1 of waue/WordCount


Ignore:
Timestamp:
Feb 6, 2009, 4:33:13 PM (15 years ago)
Author:
waue
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • waue/WordCount

    v1 v1  
     1{{{
     2#!java
     3       
     4import java.io.IOException;
     5import java.util.*;
     6       
     7import org.apache.hadoop.fs.Path;
     8import org.apache.hadoop.conf.*;
     9import org.apache.hadoop.io.*;
     10import org.apache.hadoop.mapred.*;
     11import org.apache.hadoop.util.*;
     12       
     13public class WordCount {
     14       
     15 public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
     16    private final static IntWritable one = new IntWritable(1);
     17    private Text word = new Text();
     18       
     19    public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
     20        String line = value.toString();
     21        StringTokenizer tokenizer = new StringTokenizer(line);
     22        while (tokenizer.hasMoreTokens()) {
     23            word.set(tokenizer.nextToken());
     24            output.collect(word, one);
     25        }
     26    }
     27 }
     28       
     29 public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
     30
     31    public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
     32        int sum = 0;
     33        while (values.hasNext()) {
     34            sum += values.next().get();
     35        }
     36        output.collect(key, new IntWritable(sum));
     37    }
     38 }
     39       
     40 public static void main(String[] args) throws Exception {
     41    JobConf conf = new JobConf(WordCount.class);
     42    conf.setJobName("wordcount");
     43       
     44    conf.setOutputKeyClass(Text.class);
     45    conf.setOutputValueClass(IntWritable.class);
     46       
     47    conf.setMapperClass(Map.class);
     48    conf.setCombinerClass(Reduce.class);
     49    conf.setReducerClass(Reduce.class);
     50       
     51    conf.setInputFormat(TextInputFormat.class);
     52    conf.setOutputFormat(TextOutputFormat.class);
     53       
     54    FileInputFormat.setInputPaths(conf, new Path(args[0]));
     55    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
     56       
     57    JobClient.runJob(conf);
     58 }
     59       
     60}
     61}}}