Context Navigation

DemoWordCount.java @ 218

Last change on this file since 218 was 27, checked in by waue, 16 years ago
test!
File size: 3.4 KB

Line
1	/**
2	* Program: HBaseRecordPro.java
3	* Editor: Waue Chen
4	* From : NCHC. Taiwn
5	* Last Update Date: 07/02/2008
6	*/
7	/*
8	* Cloud9: A MapReduce Library for Hadoop
9	*/
10
11	package tw.org.nchc.demo;
12
13	import java.io.IOException;
14	import java.util.Iterator;
15	import java.util.StringTokenizer;
16
17	import org.apache.hadoop.fs.FileSystem;
18	import org.apache.hadoop.fs.Path;
19	import org.apache.hadoop.io.IntWritable;
20	import org.apache.hadoop.io.LongWritable;
21	import org.apache.hadoop.io.Text;
22	import org.apache.hadoop.mapred.JobClient;
23	import org.apache.hadoop.mapred.JobConf;
24	import org.apache.hadoop.mapred.MapReduceBase;
25	import org.apache.hadoop.mapred.Mapper;
26	import org.apache.hadoop.mapred.OutputCollector;
27	import org.apache.hadoop.mapred.Reducer;
28	import org.apache.hadoop.mapred.Reporter;
29
30	/**
31	* <p>Simple word count demo. Counts words in the Bible+Shakespeare sample
32	* collection. Expected trace of MapReduce operation:</p>
33	*
34	* <pre>
35	* Map input records=156215
36	* Map output records=1734298
37	* Map input bytes=9068074
38	* Map output bytes=15919397
39	* Combine input records=1734298
40	* Combine output records=135372
41	* Reduce input groups=41788
42	* Reduce input records=135372
43	* Reduce output records=41788
44	* </pre>
45	*
46	*/
47	public class DemoWordCount {
48
49	// mapper: emits (token, 1) for every word occurrence
50	private static class MapClass extends MapReduceBase implements
51	Mapper<LongWritable, Text, Text, IntWritable> {
52
53	// reuse objects to save overhead of object creation
54	private final static IntWritable one = new IntWritable(1);
55	private Text word = new Text();
56
57	public void map(LongWritable key, Text value,
58	OutputCollector<Text, IntWritable> output, Reporter reporter)
59	throws IOException {
60	String line = ((Text) value).toString();
61	StringTokenizer itr = new StringTokenizer(line);
62	while (itr.hasMoreTokens()) {
63	word.set(itr.nextToken());
64	output.collect(word, one);
65	}
66	}
67	}
68
69	// reducer: sums up all the counts
70	private static class ReduceClass extends MapReduceBase implements
71	Reducer<Text, IntWritable, Text, IntWritable> {
72
73	// reuse objects
74	private final static IntWritable SumValue = new IntWritable();
75
76	public void reduce(Text key, Iterator<IntWritable> values,
77	OutputCollector<Text, IntWritable> output, Reporter reporter)
78	throws IOException {
79	// sum up values
80	int sum = 0;
81	while (values.hasNext()) {
82	sum += values.next().get();
83	}
84	SumValue.set(sum);
85	output.collect(key, SumValue);
86	}
87	}
88
89	private DemoWordCount() {
90	}
91
92	/**
93	* Runs the demo.
94	*/
95	public static void main(String[] args) throws IOException {
96	String filename = "/user/waue/test/132.txt";
97	String outputPath = "sample-counts";
98	int mapTasks = 20;
99	int reduceTasks = 1;
100
101	JobConf conf = new JobConf(DemoWordCount.class);
102	conf.setJobName("wordcount");
103
104	conf.setNumMapTasks(mapTasks);
105	conf.setNumReduceTasks(reduceTasks);
106
107	conf.setInputPath(new Path(filename));
108
109	conf.setOutputKeyClass(Text.class);
110	conf.setOutputValueClass(IntWritable.class);
111
112	conf.setOutputPath(new Path(outputPath));
113	conf.setMapperClass(MapClass.class);
114	conf.setCombinerClass(ReduceClass.class);
115	conf.setReducerClass(ReduceClass.class);
116
117	// Delete the output directory if it exists already
118	Path outputDir = new Path(outputPath);
119	FileSystem.get(conf).delete(outputDir);
120	JobClient.runJob(conf);
121	}
122	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: sample/hadoop-0.16/tw/org/nchc/demo/DemoWordCount.java @ 218

Download in other formats: