wiki:waue/2010/0120

Version 2 (modified by waue, 14 years ago) (diff)

--

用Hadoop 0.18 改成 Hadoop 0.20 程式易出現的問題

前言

  • 下面程式碼雖然看起來跑wordcount , 但出來的結果卻是跑 helloword
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class wordcount {
  static public class wcmapper extends
      Mapper<LongWritable, Text, Text, IntWritable> {
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    public void map(LongWritable key, Text value,
        OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {
      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line);
      while (tokenizer.hasMoreTokens()) {
        word.set(tokenizer.nextToken());
        output.collect(word, one);
      }
    }
  }

  static public class wcreducer extends
      Reducer<Text, IntWritable, Text, IntWritable> {
    public void reduce(Text key, Iterator<IntWritable> values,
        OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {
      int sum = 0;
      while (values.hasNext()) {
        sum += values.next().get();
      }
      output.collect(key, new IntWritable(sum));
    }
  }

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();

    Job job = new Job(conf, "wordcount");

    job.setJarByClass(wordcount.class);

    FileInputFormat.setInputPaths(job, "/user/shunfa/input");
    FileOutputFormat.setOutputPath(job, new Path(
        "/user/shunfa/output-wordcount11"));

    job.setMapperClass(wcmapper.class);
    job.setReducerClass(wcreducer.class);
    job.waitForCompletion(true);
  }
}

輸入檔內容為

i am little cat
you are small dog
haha
  • 預期結果
am 1
are 1
cat 1
....
  • 執行結果:
0   i am little cat
12    you are small dog
32  haha
...
  • 解決方法提示

已經用 org.apache.hadoop.mapreduce.Mapper; org.apache.hadoop.mapreduce.Reducer; 來實做的map() 與 reduce () 的 function,

implement 時不可用 map(LongWritable? key, Text value, OutputCollector?<Text, IntWritable?> output, Reporter reporter)

應改成 map(LongWritable? key, Text value, Context context) 來實做

Attachments (1)

Download all attachments as: .zip