Context Navigation

close Warning: Can't use blame annotator:
svn blame failed on sample/WordCount.java: 200029 - Couldn't perform atomic initialization 20014 - Can't find a temporary directory: Internal error

source: sample/WordCount.java @ 11

Last change on this file since 11 was 9, checked in by waue, 18 years ago
comment
File size: 3.6 KB

Rev	Line
	1	/**
	2	* Program: WordCount.java
	3	* Editor: Waue Chen
	4	* From : NCHC. Taiwn
	5	* Last Update Date: 06/13/2008
	6	*/
	7
	8	/**
	9	* Purpose :
	10	* Store the result of WordCount.java from Hbase to Hadoop file system
	11	*
	12	* HowToUse :
	13	* Make sure Hadoop file system is running correctly.
	14	* Put text file on the directory "/local_src/input"
	15	* You can use the instruction to upload "/local_src/input" to HDFS input dir
	16	* $ bin/hadoop dfs -put /local_src/input input
	17	* Then modify the $filepath parameter in construtor to be correct and run this code.
	18	*
	19	*
	20	* Check Result:
	21	* inspect http://localhost:50070 by web explorer
	22	*/
	23	package tw.org.nchc.code;
	24
	25	import java.io.IOException;
	26	import java.util.Iterator;
	27	import java.util.StringTokenizer;
	28
	29	import org.apache.hadoop.fs.FileSystem;
	30	import org.apache.hadoop.fs.Path;
	31	import org.apache.hadoop.io.IntWritable;
	32	import org.apache.hadoop.io.LongWritable;
	33	import org.apache.hadoop.io.Text;
	34	import org.apache.hadoop.mapred.JobClient;
	35	import org.apache.hadoop.mapred.JobConf;
	36	import org.apache.hadoop.mapred.MapReduceBase;
	37	import org.apache.hadoop.mapred.Mapper;
	38	import org.apache.hadoop.mapred.OutputCollector;
	39	import org.apache.hadoop.mapred.Reducer;
	40	import org.apache.hadoop.mapred.Reporter;
	41
	42
	43	public class WordCount {
	44	private String filepath;
	45	private String outputPath;
	46
	47	public WordCount(){
	48	filepath = "/user/waue/input/";
	49	outputPath = "counts1";
	50	}
	51	public WordCount(String path,String output){
	52	filepath = path;
	53	outputPath = output;
	54	}
	55	// mapper: emits (token, 1) for every word occurrence
	56	private static class MapClass extends MapReduceBase implements
	57	Mapper<LongWritable, Text, Text, IntWritable> {
	58
	59	// reuse objects to save overhead of object creation
	60	private final static IntWritable one = new IntWritable(1);
	61	private Text word = new Text();
	62
	63	public void map(LongWritable key, Text value,
	64	OutputCollector<Text, IntWritable> output, Reporter reporter)
	65	throws IOException {
	66	String line = ((Text) value).toString();
	67	StringTokenizer itr = new StringTokenizer(line);
	68	while (itr.hasMoreTokens()) {
	69	word.set(itr.nextToken());
	70	output.collect(word, one);
	71	}
	72	}
	73	}
	74
	75	// reducer: sums up all the counts
	76	private static class ReduceClass extends MapReduceBase implements
	77	Reducer<Text, IntWritable, Text, IntWritable> {
	78
	79	// reuse objects
	80	private final static IntWritable SumValue = new IntWritable();
	81
	82	public void reduce(Text key, Iterator<IntWritable> values,
	83	OutputCollector<Text, IntWritable> output, Reporter reporter)
	84	throws IOException {
	85	// sum up values
	86	int sum = 0;
	87	while (values.hasNext()) {
	88	sum += values.next().get();
	89	}
	90	SumValue.set(sum);
	91	output.collect(key, SumValue);
	92	}
	93	}
	94
	95
	96	/**
	97	* Runs the demo.
	98	*/
	99	public static void main(String[] args) throws IOException {
	100	WordCount wc = new WordCount();
	101
	102	int mapTasks = 1;
	103	int reduceTasks = 1;
	104	JobConf conf = new JobConf(WordCount.class);
	105	conf.setJobName("wordcount");
	106
	107	conf.setNumMapTasks(mapTasks);
	108	conf.setNumReduceTasks(reduceTasks);
	109
	110	conf.setInputPath(new Path(wc.filepath));
	111	conf.setOutputKeyClass(Text.class);
	112	conf.setOutputValueClass(IntWritable.class);
	113	conf.setOutputPath(new Path(wc.outputPath));
	114
	115	conf.setMapperClass(MapClass.class);
	116	conf.setCombinerClass(ReduceClass.class);
	117	conf.setReducerClass(ReduceClass.class);
	118
	119	// Delete the output directory if it exists already
	120	Path outputDir = new Path(wc.outputPath);
	121	FileSystem.get(conf).delete(outputDir);
	122
	123	JobClient.runJob(conf);
	124	}
	125	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: