Context Navigation

DemoWordCount.java

Last change on this file was 20, checked in by waue, 16 years ago
將改完的 hadoop 0.17版package 放來備份目前繼續開發 hadoop 0.16 + hbase 1.3
File size: 3.6 KB

Rev	Line
[20]	1	/**
	2	* Program: HBaseRecordPro.java
	3	* Editor: Waue Chen
	4	* From : NCHC. Taiwn
	5	* Last Update Date: 07/02/2008
	6	* Upgrade to 0.17
	7	*/
	8	/*
	9	* Cloud9: A MapReduce Library for Hadoop
	10	*/
	11
	12	package tw.org.nchc.demo;
	13
	14	import java.io.IOException;
	15	import java.util.Iterator;
	16	import java.util.StringTokenizer;
	17
	18	import org.apache.hadoop.fs.FileSystem;
	19	import org.apache.hadoop.fs.Path;
	20	import org.apache.hadoop.io.IntWritable;
	21	import org.apache.hadoop.io.LongWritable;
	22	import org.apache.hadoop.io.Text;
	23	import org.apache.hadoop.mapred.JobClient;
	24	import org.apache.hadoop.mapred.JobConf;
	25	import org.apache.hadoop.mapred.MapReduceBase;
	26	import org.apache.hadoop.mapred.Mapper;
	27	import org.apache.hadoop.mapred.OutputCollector;
	28	import org.apache.hadoop.mapred.Reducer;
	29	import org.apache.hadoop.mapred.Reporter;
	30
	31	import tw.org.nchc.code.Convert;
	32
	33	/**
	34	* <p>Simple word count demo. Counts words in the Bible+Shakespeare sample
	35	* collection. Expected trace of MapReduce operation:</p>
	36	*
	37	* <pre>
	38	* Map input records=156215
	39	* Map output records=1734298
	40	* Map input bytes=9068074
	41	* Map output bytes=15919397
	42	* Combine input records=1734298
	43	* Combine output records=135372
	44	* Reduce input groups=41788
	45	* Reduce input records=135372
	46	* Reduce output records=41788
	47	* </pre>
	48	*
	49	*/
	50	public class DemoWordCount {
	51
	52	// mapper: emits (token, 1) for every word occurrence
	53	private static class MapClass extends MapReduceBase implements
	54	Mapper<LongWritable, Text, Text, IntWritable> {
	55
	56	// reuse objects to save overhead of object creation
	57	private final static IntWritable one = new IntWritable(1);
	58	private Text word = new Text();
	59
	60	public void map(LongWritable key, Text value,
	61	OutputCollector<Text, IntWritable> output, Reporter reporter)
	62	throws IOException {
	63	String line = ((Text) value).toString();
	64	StringTokenizer itr = new StringTokenizer(line);
	65	while (itr.hasMoreTokens()) {
	66	word.set(itr.nextToken());
	67	output.collect(word, one);
	68	}
	69	}
	70	}
	71
	72	// reducer: sums up all the counts
	73	private static class ReduceClass extends MapReduceBase implements
	74	Reducer<Text, IntWritable, Text, IntWritable> {
	75
	76	// reuse objects
	77	private final static IntWritable SumValue = new IntWritable();
	78
	79	public void reduce(Text key, Iterator<IntWritable> values,
	80	OutputCollector<Text, IntWritable> output, Reporter reporter)
	81	throws IOException {
	82	// sum up values
	83	int sum = 0;
	84	while (values.hasNext()) {
	85	sum += values.next().get();
	86	}
	87	SumValue.set(sum);
	88	output.collect(key, SumValue);
	89	}
	90	}
	91
	92	private DemoWordCount() {
	93	}
	94
	95	/**
	96	* Runs the demo.
	97	*/
	98	public static void main(String[] args) throws IOException {
	99	String filename = "/user/waue/test/132.txt";
	100	String outputPath = "sample-counts";
	101	int mapTasks = 20;
	102	int reduceTasks = 1;
	103
	104	JobConf conf = new JobConf(DemoWordCount.class);
	105	conf.setJobName("wordcount");
	106
	107	conf.setNumMapTasks(mapTasks);
	108	conf.setNumReduceTasks(reduceTasks);
	109	//0.16
	110	// conf.setInputPath(new Path(filename));
	111	Convert.setInputPath(conf, new Path(filename));
	112	conf.setOutputKeyClass(Text.class);
	113	conf.setOutputValueClass(IntWritable.class);
	114	// 0.16
	115	// conf.setOutputPath(new Path(outputPath));
	116	Convert.setInputPath(conf, new Path(outputPath));
	117	conf.setMapperClass(MapClass.class);
	118	conf.setCombinerClass(ReduceClass.class);
	119	conf.setReducerClass(ReduceClass.class);
	120
	121	// Delete the output directory if it exists already
	122	Path outputDir = new Path(outputPath);
	123	// 0.16
	124	// FileSystem.get(conf).delete(outputDir);
	125	FileSystem.get(conf).delete(outputDir,true);
	126	JobClient.runJob(conf);
	127	}
	128	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: sample/hadoop-0.17/tw/org/nchc/demo/DemoWordCount.java

Download in other formats: