Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

WordCount.java @ 20

Last change on this file since 20 was 20, checked in by waue, 16 years ago
將改完的 hadoop 0.17版package 放來備份目前繼續開發 hadoop 0.16 + hbase 1.3
File size: 3.7 KB

Line
1	/**
2	* Program: WordCount.java
3	* Editor: Waue Chen
4	* From : NCHC. Taiwn
5	* Last Update Date: 07/02/2008
6	* Upgrade to 0.17
7	*/
8
9	/**
10	* Purpose :
11	* Store the result of WordCount.java from Hbase to Hadoop file system
12	*
13	* HowToUse :
14	* Make sure Hadoop file system is running correctly.
15	* Put text file on the directory "/local_src/input"
16	* You can use the instruction to upload "/local_src/input" to HDFS input dir
17	* $ bin/hadoop dfs -put /local_src/input input
18	* Then modify the $filepath parameter in construtor to be correct and run this code.
19	*
20	*
21	* Check Result:
22	* inspect http://localhost:50070 by web explorer
23	*/
24	package tw.org.nchc.code;
25
26	import java.io.IOException;
27	import java.util.Iterator;
28	import java.util.StringTokenizer;
29
30	import org.apache.hadoop.fs.FileSystem;
31	import org.apache.hadoop.fs.Path;
32	import org.apache.hadoop.io.IntWritable;
33	import org.apache.hadoop.io.LongWritable;
34	import org.apache.hadoop.io.Text;
35	import org.apache.hadoop.mapred.JobClient;
36	import org.apache.hadoop.mapred.JobConf;
37	import org.apache.hadoop.mapred.MapReduceBase;
38	import org.apache.hadoop.mapred.Mapper;
39	import org.apache.hadoop.mapred.OutputCollector;
40	import org.apache.hadoop.mapred.Reducer;
41	import org.apache.hadoop.mapred.Reporter;
42
43	public class WordCount {
44	private String filepath;
45
46	private String outputPath;
47
48	public WordCount() {
49	filepath = "/user/waue/input/";
50	outputPath = "counts1";
51	}
52
53	public WordCount(String path, String output) {
54	filepath = path;
55	outputPath = output;
56	}
57
58	// mapper: emits (token, 1) for every word occurrence
59	private static class MapClass extends MapReduceBase implements
60	Mapper<LongWritable, Text, Text, IntWritable> {
61
62	// reuse objects to save overhead of object creation
63	private final static IntWritable one = new IntWritable(1);
64
65	private Text word = new Text();
66
67	public void map(LongWritable key, Text value,
68	OutputCollector<Text, IntWritable> output, Reporter reporter)
69	throws IOException {
70	String line = ((Text) value).toString();
71	StringTokenizer itr = new StringTokenizer(line);
72	while (itr.hasMoreTokens()) {
73	word.set(itr.nextToken());
74	output.collect(word, one);
75	}
76	}
77	}
78
79	// reducer: sums up all the counts
80	private static class ReduceClass extends MapReduceBase implements
81	Reducer<Text, IntWritable, Text, IntWritable> {
82
83	// reuse objects
84	private final static IntWritable SumValue = new IntWritable();
85
86	public void reduce(Text key, Iterator<IntWritable> values,
87	OutputCollector<Text, IntWritable> output, Reporter reporter)
88	throws IOException {
89	// sum up values
90	int sum = 0;
91	while (values.hasNext()) {
92	sum += values.next().get();
93	}
94	SumValue.set(sum);
95	output.collect(key, SumValue);
96	}
97	}
98
99	/**
100	* Runs the demo.
101	*/
102	public static void main(String[] args) throws IOException {
103	WordCount wc = new WordCount();
104
105	int mapTasks = 1;
106	int reduceTasks = 1;
107	JobConf conf = new JobConf(WordCount.class);
108	conf.setJobName("wordcount");
109
110	conf.setNumMapTasks(mapTasks);
111	conf.setNumReduceTasks(reduceTasks);
112	// 0.16
113	// conf.setInputPath(new Path(wc.filepath));
114	Convert.setInputPath(conf, new Path(wc.filepath));
115	conf.setOutputKeyClass(Text.class);
116	conf.setOutputValueClass(IntWritable.class);
117	// 0.16
118	// conf.setOutputPath(new Path(wc.outputPath));
119	Convert.setOutputPath(conf, new Path(wc.outputPath));
120
121	conf.setMapperClass(MapClass.class);
122	conf.setCombinerClass(ReduceClass.class);
123	conf.setReducerClass(ReduceClass.class);
124
125	// Delete the output directory if it exists already
126	Path outputDir = new Path(wc.outputPath);
127	// 0.16
128	FileSystem.get(conf).delete(outputDir,true);
129
130	JobClient.runJob(conf);
131	}
132	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: sample/hadoop-0.17/tw/org/nchc/code/WordCount.java @ 20

Download in other formats: