用Hadoop 0.18 改成 Hadoop 0.20 程式易出現的問題
前言
- 下面程式碼雖然看起來跑wordcount , 但出來的結果卻是跑 helloword
import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class wordcount { static public class wcmapper extends Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); } } } static public class wcreducer extends Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } output.collect(key, new IntWritable(sum)); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "wordcount"); job.setJarByClass(wordcount.class); FileInputFormat.setInputPaths(job, "/user/shunfa/input"); FileOutputFormat.setOutputPath(job, new Path( "/user/shunfa/output-wordcount11")); job.setMapperClass(wcmapper.class); job.setReducerClass(wcreducer.class); job.waitForCompletion(true); } }
輸入檔內容為
i am little cat you are small dog haha
- 預期結果
am 1 are 1 cat 1 ....
- 執行結果:
0 i am little cat 12 you are small dog 32 haha ...
- 解決方法提示
已經用 org.apache.hadoop.mapreduce.Mapper; org.apache.hadoop.mapreduce.Reducer; 來實做的map() 與 reduce () 的 function,
implement 時不可用 map(LongWritable? key, Text value, OutputCollector?<Text, IntWritable?> output, Reporter reporter) ,
應改成 map(LongWritable? key, Text value, Context context) 來實做
Last modified 15 years ago
Last modified on Jan 22, 2010, 5:56:48 PM
Attachments (1)
- shunfa.jar (2.9 KB) - added by waue 15 years ago.
Download all attachments as: .zip