= Hadoop Debug =


 * 原本

{{{
#!java
	public static class wordindexM extends
			Mapper<LongWritable, Text, Text, Text> {
		public void map(LongWritable key, Text value,
				OutputCollector<Text, Text> output, Reporter reporter)
				throws IOException {

			FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
			
			String line = value.toString();
			StringTokenizer st = new StringTokenizer(line.toLowerCase());
			while (st.hasMoreTokens()) {
				String word = st.nextToken();
				output.collect(new Text(word), new Text(fileSplit.getPath()
						.getName()
						+ ":" + line));
			}
		}
	}
}}}

遇到問題：
{{{
#!text
10/01/18 20:52:39 INFO input.FileInputFormat: Total input paths to process : 2
10/01/18 20:52:39 INFO mapred.JobClient: Running job: job_201001181452_0038
10/01/18 20:52:40 INFO mapred.JobClient:  map 0% reduce 0%
10/01/18 20:52:50 INFO mapred.JobClient: Task Id : attempt_201001181452_0038_m_000000_0, Status : FAILED
java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.LongWritable

}}}

 * 已解決

{{{
#!java
	public static class wordindexM extends
			Mapper<LongWritable, Text, Text, Text> {
		public void map(LongWritable key, Text value,
				OutputCollector<Text, Text> output, Reporter reporter)
				throws IOException {

			FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
			Text map_key = new Text();
			Text map_value = new Text();
			String line = value.toString();
			StringTokenizer st = new StringTokenizer(line.toLowerCase());
			while (st.hasMoreTokens()) {
				String word = st.nextToken();
				map_key.set(word);
				map_value.set(fileSplit.getPath().getName() + ":" + line);
				output.collect(map_key,map_value);
			}
		}
      }
}}}

 * 解析 

用output.collect寫入輸出串流，''' new Text(word) ''' 感覺用這個方法就可以把 Text 型態的資料寫入，

hadoop 0.18 可以這麼做不會出問題，但在 hadoop 0.20 之後，如果遇到''' " Type mismatch in key from xxx " ''' 問題時

可以換成用 Text.set() 方法來解決問題 ！