wiki:waue/2010/0118

Version 3 (modified by waue, 14 years ago) (diff)

--

原本

  public static class wordindexM extends
      Mapper<LongWritable, Text, Text, Text> {
    public void map(LongWritable key, Text value,
        OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
      
      String line = value.toString();
      StringTokenizer st = new StringTokenizer(line.toLowerCase());
      while (st.hasMoreTokens()) {
        String word = st.nextToken();
        output.collect(new Text(word), new Text(fileSplit.getPath()
            .getName()
            + ":" + line));
      }
    }
  }

遇到問題:

10/01/18 20:52:39 INFO input.FileInputFormat: Total input paths to process : 2
10/01/18 20:52:39 INFO mapred.JobClient: Running job: job_201001181452_0038
10/01/18 20:52:40 INFO mapred.JobClient:  map 0% reduce 0%
10/01/18 20:52:50 INFO mapred.JobClient: Task Id : attempt_201001181452_0038_m_000000_0, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable

  • 已解決
  public static class wordindexM extends
      Mapper<LongWritable, Text, Text, Text> {
    public void map(LongWritable key, Text value,
        OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
      Text map_key = new Text();
      Text map_value = new Text();
      String line = value.toString();
      StringTokenizer st = new StringTokenizer(line.toLowerCase());
      while (st.hasMoreTokens()) {
        String word = st.nextToken();
        map_key.set(word);
        map_value.set(fileSplit.getPath().getName() + ":" + line);
        output.collect(map_key,map_value);
      }
    }
      }