wiki:waue/2009/0716

map 的 key 為 LongWritable? ,然而此key轉型為Text會遇到些run-time-error

原因:Type mismatch in key from map: expected org.apache.hadoop.io.LongWritable?, recieved org.apache.hadoop.io.Text

Map 的 key 從 longWritable 強制轉型到 String,似乎會遇到一些錯

  • keyvalue.java
package nchc.keyvalue;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;

public class keyvalue{
  public static void main(String[] args) {
    String[] argv = {"input","output","1","1"};
    args = argv;
    
    if (args.length < 4) {
      System.out.println("keyvalue <inDir> <outDir> <m> <r>");
      return;
    }
    
    JobConf conf = new JobConf(keyvalue.class);
    conf.setJobName("keyValue");
    FileInputFormat.setInputPaths(conf, args[0]);
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    conf.setNumMapTasks(Integer.parseInt(args[2]));
    conf.setNumReduceTasks(Integer.parseInt(args[3]));
    conf.setMapperClass(kvM.class);
    conf.setReducerClass(kvR.class);

    long start = System.nanoTime();
    try {
      JobClient.runJob(conf);
    } catch (Exception e) {
      e.printStackTrace();
    }
    long period = System.nanoTime() - start;
    System.err.println(period*(1e-9) + " secs.");
  }
}

  • kvm.java (有runtime error)
package nchc.keyvalue;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class kvM extends MapReduceBase implements
		Mapper<LongWritable, Text, Text, Text> {

	public void map(LongWritable key, Text value,
			OutputCollector<Text, Text> output, Reporter report)
			throws IOException {
		Text keyv = new Text(key.toString());
		output.collect(keyv, value);
	}

}
  • kvr.java (有runtime error)
package nchc.keyvalue;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class kvR extends MapReduceBase implements
		Reducer< Text, Text, Text, Text> {
	public void reduce(Text key, Iterator<Text> values,
			OutputCollector<Text, Text> output, Reporter report)
			throws IOException {
		while (values.hasNext()) {
			Text keyv = new Text("< "+key+" , ");
			Text val = new Text(values.next()+">");
			output.collect(keyv, val);
		}
	}
}

改成以下的map 與 reduce 檔就可以正常運作

  • kvM.java
package nchc.keyvalue;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class kvM extends MapReduceBase implements
    Mapper<LongWritable, Text, LongWritable, Text> {

  public void map(LongWritable key, Text value,
      OutputCollector<LongWritable, Text> output, Reporter report)
      throws IOException {
    output.collect(key, value);
  }

}
  • kvR.java
package nchc.keyvalue;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class kvR extends MapReduceBase implements
    Reducer< LongWritable, Text, Text, Text> {
  public void reduce(LongWritable key, Iterator<Text> values,
      OutputCollector<Text, Text> output, Reporter report)
      throws IOException {
    while (values.hasNext()) {
      Text keyv = new Text("< "+key+" , ");
      Text val = new Text(values.next()+">");
      output.collect(keyv, val);
    }
  }
}

然而就看不到key被reduce起來

  • output/part-00000
    < 0 , 	This eBook is for the use of anyone anywhere at no cost and with>
    < 0 , 	This eBook is for the use of anyone anywhere at no cost and with>
    < 66 , 	almost no restrictions whatsoever.  You may copy it, give it away or>
    < 66 , 	almost no restrictions whatsoever.  You may copy it, give it away or>
    < 136 , 	re-use it under the terms of the Project Gutenberg License included>
    < 136 , 	re-use it under the terms of the Project Gutenberg License included>
    < 205 , 	with this eBook or online at www.gutenberg.net>
    < 205 , 	with this eBook or online at www.gutenberg.org>
    
    .... (省略)
    
Last modified 16 years ago Last modified on Jul 17, 2009, 10:17:01 AM