Context Navigation

source: sample/hadoop-0.16/tw/org/nchc/demo/DemoWordCount.java @ 25

Last change on this file since 25 was 25, checked in by waue, 16 years ago
downgrade from 0.17 to 0.16 test for work -> not yet
File size: 3.4 KB

Rev	Line
[21]	1	/**
	2	* Program: HBaseRecordPro.java
	3	* Editor: Waue Chen
	4	* From : NCHC. Taiwn
	5	* Last Update Date: 07/02/2008
	6	* Upgrade to 0.17
	7	*/
	8	/*
	9	* Cloud9: A MapReduce Library for Hadoop
	10	*/
	11
	12	package tw.org.nchc.demo;
	13
	14	import java.io.IOException;
	15	import java.util.Iterator;
	16	import java.util.StringTokenizer;
	17
	18	import org.apache.hadoop.fs.FileSystem;
	19	import org.apache.hadoop.fs.Path;
	20	import org.apache.hadoop.io.IntWritable;
	21	import org.apache.hadoop.io.LongWritable;
	22	import org.apache.hadoop.io.Text;
	23	import org.apache.hadoop.mapred.JobClient;
	24	import org.apache.hadoop.mapred.JobConf;
	25	import org.apache.hadoop.mapred.MapReduceBase;
	26	import org.apache.hadoop.mapred.Mapper;
	27	import org.apache.hadoop.mapred.OutputCollector;
	28	import org.apache.hadoop.mapred.Reducer;
	29	import org.apache.hadoop.mapred.Reporter;
	30
	31	/**
	32	* <p>Simple word count demo. Counts words in the Bible+Shakespeare sample
	33	* collection. Expected trace of MapReduce operation:</p>
	34	*
	35	* <pre>
	36	* Map input records=156215
	37	* Map output records=1734298
	38	* Map input bytes=9068074
	39	* Map output bytes=15919397
	40	* Combine input records=1734298
	41	* Combine output records=135372
	42	* Reduce input groups=41788
	43	* Reduce input records=135372
	44	* Reduce output records=41788
	45	* </pre>
	46	*
	47	*/
	48	public class DemoWordCount {
	49
	50	// mapper: emits (token, 1) for every word occurrence
	51	private static class MapClass extends MapReduceBase implements
	52	Mapper<LongWritable, Text, Text, IntWritable> {
	53
	54	// reuse objects to save overhead of object creation
	55	private final static IntWritable one = new IntWritable(1);
	56	private Text word = new Text();
	57
	58	public void map(LongWritable key, Text value,
	59	OutputCollector<Text, IntWritable> output, Reporter reporter)
	60	throws IOException {
	61	String line = ((Text) value).toString();
	62	StringTokenizer itr = new StringTokenizer(line);
	63	while (itr.hasMoreTokens()) {
	64	word.set(itr.nextToken());
	65	output.collect(word, one);
	66	}
	67	}
	68	}
	69
	70	// reducer: sums up all the counts
	71	private static class ReduceClass extends MapReduceBase implements
	72	Reducer<Text, IntWritable, Text, IntWritable> {
	73
	74	// reuse objects
	75	private final static IntWritable SumValue = new IntWritable();
	76
	77	public void reduce(Text key, Iterator<IntWritable> values,
	78	OutputCollector<Text, IntWritable> output, Reporter reporter)
	79	throws IOException {
	80	// sum up values
	81	int sum = 0;
	82	while (values.hasNext()) {
	83	sum += values.next().get();
	84	}
	85	SumValue.set(sum);
	86	output.collect(key, SumValue);
	87	}
	88	}
	89
	90	private DemoWordCount() {
	91	}
	92
	93	/**
	94	* Runs the demo.
	95	*/
	96	public static void main(String[] args) throws IOException {
	97	String filename = "/user/waue/test/132.txt";
	98	String outputPath = "sample-counts";
	99	int mapTasks = 20;
	100	int reduceTasks = 1;
	101
	102	JobConf conf = new JobConf(DemoWordCount.class);
	103	conf.setJobName("wordcount");
	104
	105	conf.setNumMapTasks(mapTasks);
	106	conf.setNumReduceTasks(reduceTasks);
[25]	107
	108	conf.setInputPath(new Path(filename));
	109
[21]	110	conf.setOutputKeyClass(Text.class);
	111	conf.setOutputValueClass(IntWritable.class);
[25]	112
	113	conf.setOutputPath(new Path(outputPath));
[21]	114	conf.setMapperClass(MapClass.class);
	115	conf.setCombinerClass(ReduceClass.class);
	116	conf.setReducerClass(ReduceClass.class);
	117
	118	// Delete the output directory if it exists already
	119	Path outputDir = new Path(outputPath);
[25]	120	FileSystem.get(conf).delete(outputDir);
[21]	121	JobClient.runJob(conf);
	122	}
	123	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: