{{{
#!html
<div style="text-align: center; color:#151B8D"><big style="font-weight: bold;"><big><big>
ITRI HBase 進階課程 
</big></big></big></div> <div style="text-align: center; color:#7E2217"><big style="font-weight: bold;"><big>
HBase 範例
</big></big></div>
}}}

[wiki:waue/2011/0426_4_5 上一關 < ] 第六關 [wiki:waue/2011/0426_4_7 > 下一關]

 = 範例六：WordCountHBase = 

 == 說明：  ==

 * 此程式碼將輸入路徑的檔案內的字串取出做字數統計,再將結果塞回HTable內
 * 請注意在將hbase 等函式庫放入hadoop 的lib 目錄後，必須重新啟動hbase 與 hadoop 再執行此範例程式才不會出現錯誤

{{{
$ bin/hadoop dfs -mkdir input
$ bin/hadoop dfs -put README.txt input
$ bin/hadoop jar ItriMenu.jar CountToHBaseReducer input
}}}

 == 注意： ==

1.	在hdfs 上來源檔案的路徑為 "/user/$YOUR_NAME/input"

	請注意必須先放資料到此hdfs上的資料夾內，且此資料夾內只能放檔案，不可再放資料夾

2.	運算完後，程式將執行結果放在hbase的wordcount資料表內



 == 程式碼 ==

{{{
#!java

package itri;


import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class CountToHBaseReducer {
	public static class HtMap extends
			Mapper<LongWritable, Text, Text, IntWritable> {
		private IntWritable one = new IntWritable(1);

		public void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			// 輸入的字串先轉換成小寫再用空白區隔
			String s[] = value.toString().toLowerCase().trim().split(" ");
			for (String m : s) {
				// 寫入到輸出串流
				context.write(new Text(m), one);
			}
		}
	}

	// TableReducer<KEYIN,VALUEIN,KEYOUT>
	// 原本為 TableReducer<Text, IntWritable, NullWritable >
	// 但在此改成 LongWritable 也可以
	// 因此證明在此的Class可以很多,org.apache.hadoop.io.* 內有write()的Writable class應該皆可
	public static class HtReduce extends
			TableReducer<Text, IntWritable, LongWritable> {

		public void reduce(Text key, Iterable<IntWritable> values,
				Context context) throws IOException, InterruptedException {
			int sum = 0;
			for (IntWritable i : values) {
				sum += i.get();
			}

			// org.apache.hadoop.hbase.client.Put
			// Used to perform Put operations for a single row.
			// new Put(byte[] row)
			Put put = new Put(Bytes.toBytes(key.toString()));

			// add(byte[] family, byte[] qualifier, byte[] value)
			// 在main設定output format class 為 TableOutputFormat
			// TableReducer 內有定義 output Key class 必須為 Put 或 Delete
			put.add(Bytes.toBytes("content"), Bytes.toBytes("word"), Bytes
					.toBytes(key.toString()));
			put.add(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes
					.toBytes(String.valueOf(sum)));

			// NullWritable.get(): Returns the single instance of this class.
			// NullWritable.write(): Serialize the fields of this object to out.
			context.write(new LongWritable(), put);
			// context.write(NullWritable.get(), put)
		}
	}

	public static void main(String argv[]) throws Exception {
//		String[] argc = {"input"};	argv = argc ; // in hdfs
		
		Configuration conf = new Configuration();
		String[] otherArgs = new GenericOptionsParser(conf, argv)
				.getRemainingArgs();
		if (otherArgs.length < 1) {
			System.out.println("CountToHBaseReducer <inHdfsDir> ");
			return;
		}
		
		String input = otherArgs[0];
		
		String tablename = "wordcounthbase";


		// OUTPUT_TABLE = "hbase.mapred.outputtable"
		// conf.set 用於設定 如 core-site.xml 的 name 與 value
		// 告訴程式 hbase.mapred.outputtable --> wordcount
		conf.set(TableOutputFormat.OUTPUT_TABLE, tablename);
		// 建立hbase 的table 否則沒先建立會出錯
		CreateTable.createHBaseTable(tablename, "content");

		Job job = new Job(conf, "WordCount table with " + input);

		job.setJarByClass(CountToHBaseReducer.class);

		job.setMapperClass(HtMap.class);
		job.setReducerClass(HtReduce.class);
		// 此範例的輸出為 <Text,IntWritable> 因此其實可以省略宣告
		// set{Map|Reduce}Output{Key|Value}Class()
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		// InputFormat 只有一個子介面
		// FileInputFormat <-(SequenceFileInputFormat,TextInputFormat)
		// 其中TextInputFormat 最常用 ，預設輸入為 LongWritable,Text
		// 另外HBase 則設計了一個子類別 TableInputFormat
		job.setInputFormatClass(TextInputFormat.class);
		// TAbleOutputFormat
		// 宣告此行則可使 reduce 輸出為 HBase 的table
		job.setOutputFormatClass(TableOutputFormat.class);

		// 原本設定輸入檔案為 Config.setInputPath(Path) 卻改為
		// FileInputFormat.addInputPath(Job, Path()) 的設計，
		// 猜測應該是考慮某些檔案操作並不需要跑mapreduce的Job，因此提到外面
		FileInputFormat.addInputPath(job, new Path(input));

		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}
}}}

= 執行測試 =

{{{
$ /opt/hbase/bin/hbase shell
hbase(main):x:0> list
wordcount                                                                                                     
1 row(s) in 0.0240 seconds
hbase(main):x:0> scan 'wordcount'
.....
 zeller                      column=content:count, timestamp=1285674576293, value=1                           
 zero                        column=content:count, timestamp=1285674576293, value=8                           
 zero,                       column=content:count, timestamp=1285674576293, value=2                           
 zero-compressed             column=content:count, timestamp=1285674576293, value=1              
.....
hbase(main):x:0> exit
}}}