Context Navigation

WordCount.java @ 127

Last change on this file since 127 was 31, checked in by waue, 16 years ago
update some new ..
File size: 3.6 KB

Rev	Line
[9]	1	/**
	2	* Program: WordCount.java
	3	* Editor: Waue Chen
	4	* From : NCHC. Taiwn
[18]	5	* Last Update Date: 07/02/2008
[7]	6	*/
[9]	7
	8	/**
	9	* Purpose :
	10	* Store the result of WordCount.java from Hbase to Hadoop file system
	11	*
	12	* HowToUse :
	13	* Make sure Hadoop file system is running correctly.
	14	* Put text file on the directory "/local_src/input"
	15	* You can use the instruction to upload "/local_src/input" to HDFS input dir
	16	* $ bin/hadoop dfs -put /local_src/input input
	17	* Then modify the $filepath parameter in construtor to be correct and run this code.
	18	*
	19	*
	20	* Check Result:
	21	* inspect http://localhost:50070 by web explorer
	22	*/
[8]	23	package tw.org.nchc.code;
[7]	24
	25	import java.io.IOException;
	26	import java.util.Iterator;
	27	import java.util.StringTokenizer;
	28
	29	import org.apache.hadoop.fs.FileSystem;
	30	import org.apache.hadoop.fs.Path;
	31	import org.apache.hadoop.io.IntWritable;
	32	import org.apache.hadoop.io.LongWritable;
	33	import org.apache.hadoop.io.Text;
	34	import org.apache.hadoop.mapred.JobClient;
	35	import org.apache.hadoop.mapred.JobConf;
	36	import org.apache.hadoop.mapred.MapReduceBase;
	37	import org.apache.hadoop.mapred.Mapper;
	38	import org.apache.hadoop.mapred.OutputCollector;
	39	import org.apache.hadoop.mapred.Reducer;
	40	import org.apache.hadoop.mapred.Reporter;
	41
	42	public class WordCount {
[9]	43	private String filepath;
[18]	44
[9]	45	private String outputPath;
[18]	46
	47	public WordCount() {
[9]	48	filepath = "/user/waue/input/";
	49	outputPath = "counts1";
	50	}
[18]	51
	52	public WordCount(String path, String output) {
[9]	53	filepath = path;
	54	outputPath = output;
	55	}
[18]	56
[7]	57	// mapper: emits (token, 1) for every word occurrence
[31]	58	private static class MapClass extends MapReduceBase
	59	implements Mapper<LongWritable, Text, Text, IntWritable>
	60	{
[7]	61
	62	// reuse objects to save overhead of object creation
	63	private final static IntWritable one = new IntWritable(1);
[18]	64
[7]	65	private Text word = new Text();
	66
	67	public void map(LongWritable key, Text value,
	68	OutputCollector<Text, IntWritable> output, Reporter reporter)
	69	throws IOException {
	70	String line = ((Text) value).toString();
	71	StringTokenizer itr = new StringTokenizer(line);
	72	while (itr.hasMoreTokens()) {
	73	word.set(itr.nextToken());
	74	output.collect(word, one);
	75	}
	76	}
	77	}
	78
	79	// reducer: sums up all the counts
[31]	80	private static class ReduceClass extends MapReduceBase
	81	implements Reducer<Text, IntWritable, Text, IntWritable>
	82	{
[7]	83
	84	// reuse objects
	85	private final static IntWritable SumValue = new IntWritable();
	86
	87	public void reduce(Text key, Iterator<IntWritable> values,
	88	OutputCollector<Text, IntWritable> output, Reporter reporter)
	89	throws IOException {
	90	// sum up values
	91	int sum = 0;
	92	while (values.hasNext()) {
	93	sum += values.next().get();
	94	}
	95	SumValue.set(sum);
	96	output.collect(key, SumValue);
	97	}
	98	}
	99
	100	/**
	101	* Runs the demo.
	102	*/
	103	public static void main(String[] args) throws IOException {
[9]	104	WordCount wc = new WordCount();
[18]	105
[9]	106	int mapTasks = 1;
[7]	107	int reduceTasks = 1;
	108	JobConf conf = new JobConf(WordCount.class);
[31]	109	// conf.setJobName("wordcount");
[7]	110
	111	conf.setNumMapTasks(mapTasks);
	112	conf.setNumReduceTasks(reduceTasks);
[25]	113
	114	conf.setInputPath(new Path(wc.filepath));
	115
[7]	116	conf.setOutputKeyClass(Text.class);
	117	conf.setOutputValueClass(IntWritable.class);
	118
[25]	119	conf.setOutputPath(new Path(wc.outputPath));
	120
[7]	121	conf.setMapperClass(MapClass.class);
[31]	122	// conf.setCombinerClass(ReduceClass.class);
[7]	123	conf.setReducerClass(ReduceClass.class);
[18]	124
[7]	125	// Delete the output directory if it exists already
[9]	126	Path outputDir = new Path(wc.outputPath);
[25]	127	FileSystem.get(conf).delete(outputDir);
[7]	128	JobClient.runJob(conf);
	129	}
	130	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: sample/hadoop-0.16/tw/org/nchc/code/WordCount.java @ 127

Download in other formats: