Context Navigation

WordCount.java @ 64

Last change on this file since 64 was 31, checked in by waue, 16 years ago
update some new ..
File size: 3.6 KB

Line
1	/**
2	* Program: WordCount.java
3	* Editor: Waue Chen
4	* From : NCHC. Taiwn
5	* Last Update Date: 07/02/2008
6	*/
7
8	/**
9	* Purpose :
10	* Store the result of WordCount.java from Hbase to Hadoop file system
11	*
12	* HowToUse :
13	* Make sure Hadoop file system is running correctly.
14	* Put text file on the directory "/local_src/input"
15	* You can use the instruction to upload "/local_src/input" to HDFS input dir
16	* $ bin/hadoop dfs -put /local_src/input input
17	* Then modify the $filepath parameter in construtor to be correct and run this code.
18	*
19	*
20	* Check Result:
21	* inspect http://localhost:50070 by web explorer
22	*/
23	package tw.org.nchc.code;
24
25	import java.io.IOException;
26	import java.util.Iterator;
27	import java.util.StringTokenizer;
28
29	import org.apache.hadoop.fs.FileSystem;
30	import org.apache.hadoop.fs.Path;
31	import org.apache.hadoop.io.IntWritable;
32	import org.apache.hadoop.io.LongWritable;
33	import org.apache.hadoop.io.Text;
34	import org.apache.hadoop.mapred.JobClient;
35	import org.apache.hadoop.mapred.JobConf;
36	import org.apache.hadoop.mapred.MapReduceBase;
37	import org.apache.hadoop.mapred.Mapper;
38	import org.apache.hadoop.mapred.OutputCollector;
39	import org.apache.hadoop.mapred.Reducer;
40	import org.apache.hadoop.mapred.Reporter;
41
42	public class WordCount {
43	private String filepath;
44
45	private String outputPath;
46
47	public WordCount() {
48	filepath = "/user/waue/input/";
49	outputPath = "counts1";
50	}
51
52	public WordCount(String path, String output) {
53	filepath = path;
54	outputPath = output;
55	}
56
57	// mapper: emits (token, 1) for every word occurrence
58	private static class MapClass extends MapReduceBase
59	implements Mapper<LongWritable, Text, Text, IntWritable>
60	{
61
62	// reuse objects to save overhead of object creation
63	private final static IntWritable one = new IntWritable(1);
64
65	private Text word = new Text();
66
67	public void map(LongWritable key, Text value,
68	OutputCollector<Text, IntWritable> output, Reporter reporter)
69	throws IOException {
70	String line = ((Text) value).toString();
71	StringTokenizer itr = new StringTokenizer(line);
72	while (itr.hasMoreTokens()) {
73	word.set(itr.nextToken());
74	output.collect(word, one);
75	}
76	}
77	}
78
79	// reducer: sums up all the counts
80	private static class ReduceClass extends MapReduceBase
81	implements Reducer<Text, IntWritable, Text, IntWritable>
82	{
83
84	// reuse objects
85	private final static IntWritable SumValue = new IntWritable();
86
87	public void reduce(Text key, Iterator<IntWritable> values,
88	OutputCollector<Text, IntWritable> output, Reporter reporter)
89	throws IOException {
90	// sum up values
91	int sum = 0;
92	while (values.hasNext()) {
93	sum += values.next().get();
94	}
95	SumValue.set(sum);
96	output.collect(key, SumValue);
97	}
98	}
99
100	/**
101	* Runs the demo.
102	*/
103	public static void main(String[] args) throws IOException {
104	WordCount wc = new WordCount();
105
106	int mapTasks = 1;
107	int reduceTasks = 1;
108	JobConf conf = new JobConf(WordCount.class);
109	// conf.setJobName("wordcount");
110
111	conf.setNumMapTasks(mapTasks);
112	conf.setNumReduceTasks(reduceTasks);
113
114	conf.setInputPath(new Path(wc.filepath));
115
116	conf.setOutputKeyClass(Text.class);
117	conf.setOutputValueClass(IntWritable.class);
118
119	conf.setOutputPath(new Path(wc.outputPath));
120
121	conf.setMapperClass(MapClass.class);
122	// conf.setCombinerClass(ReduceClass.class);
123	conf.setReducerClass(ReduceClass.class);
124
125	// Delete the output directory if it exists already
126	Path outputDir = new Path(wc.outputPath);
127	FileSystem.get(conf).delete(outputDir);
128	JobClient.runJob(conf);
129	}
130	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: sample/hadoop-0.16/tw/org/nchc/code/WordCount.java @ 64

Download in other formats: