Version 1 (modified by waue, 17 years ago) (diff) |
---|
/* * hadoop sample code */ import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; public class WordCount { // mapper: emits (token, 1) for every word occurrence private static class MapClass extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { // reuse objects to save overhead of object creation private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = ((Text) value).toString(); StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); output.collect(word, one); } } } // reducer: sums up all the counts private static class ReduceClass extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { // reuse objects private final static IntWritable SumValue = new IntWritable(); public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { // sum up values int sum = 0; while (values.hasNext()) { sum += values.next().get(); } SumValue.set(sum); output.collect(key, SumValue); } } /** * Runs the demo. */ public static void main(String[] args) throws IOException { String filename = "/user/waue/test/132.txt"; String outputPath = "sample-counts"; int mapTasks = 20; int reduceTasks = 1; JobConf conf = new JobConf(DemoWordCount.class); conf.setJobName("wordcount"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); conf.setInputPath(new Path(filename)); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setOutputPath(new Path(outputPath)); conf.setMapperClass(MapClass.class); conf.setCombinerClass(ReduceClass.class); conf.setReducerClass(ReduceClass.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir); JobClient.runJob(conf); } }
Attachments (1)
- WordCount.java (2.7 KB) - added by waue 17 years ago.
Download all attachments as: .zip