{{{
#!html
<div style="text-align: center; color:#151B8D"><big style="font-weight: bold;"><big><big>
HBase 進階課程 
</big></big></big></div> <div style="text-align: center; color:#7E2217"><big style="font-weight: bold;"><big>
程式範例練習
</big></big></div>
}}}

[wiki:NCHCCloudCourse100929_4_HBEX5 上一關 < ] 第六關 [wiki:NCHCCloudCourse100929_4_HBEX7 > 下一關]

 = 範例六：WordCountHBase = 

 == 說明：  ==

 * 此程式碼將輸入路徑的檔案內的字串取出做字數統計,再將結果塞回HTable內
 * 請注意在將hbase 等函式庫放入hadoop 的lib 目錄後，必須重新啟動hbase 與 hadoop 再執行此範例程式才不會出現錯誤

{{{
$ bin/hadoop dfs -mkdir input
$ bin/hadoop dfs -put README.txt input
$ bin/hadoop jar TCRCExample.jar CountToHBaseReducer input
}}}

 == 注意： ==

1.	在hdfs 上來源檔案的路徑為 "/user/$YOUR_NAME/input"

	請注意必須先放資料到此hdfs上的資料夾內，且此資料夾內只能放檔案，不可再放資料夾

2.	運算完後，程式將執行結果放在hbase的wordcount資料表內



 == 程式碼 ==

{{{
#!java

package org.nchc.hbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

public class CountToHBaseReducer {
  public static class HtMap extends
      Mapper<LongWritable, Text, Text, IntWritable> {
    private IntWritable one = new IntWritable(1);

    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      // 輸入的字串先轉換成小寫再用空白區隔
      String s[] = value.toString().toLowerCase().trim().split(" ");
      for (String m : s) {
        // 寫入到輸出串流
        context.write(new Text(m), one);
      }
    }
  }

  public static class HtReduce extends
      TableReducer<Text, IntWritable, LongWritable> {

    public void reduce(Text key, Iterable<IntWritable> values,
        Context context) throws IOException, InterruptedException {
      int sum = 0;
      for (IntWritable i : values) {
        sum += i.get();
      }

      Put put = new Put(Bytes.toBytes(key.toString()));

      put.add(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes
          .toBytes(String.valueOf(sum)));

      context.write(new LongWritable(), put);
    }
  }

  public static void main(String args[]) throws Exception {
    // eclipse 
//    String[] argv = { "/user/hadoop/input" };
//   args = argv;


    String tablename = "wordcount";
    String family = "content";

    Configuration conf = new Configuration();

    conf.set(TableOutputFormat.OUTPUT_TABLE, tablename);
    // 建立hbase 的table 否則沒先建立會出錯
    CreateTable.createHBaseTable(tablename, family);

    Job job = new Job(conf, "WordCount table with " + args[0]);

    job.setJarByClass(CountToHBaseReducer.class);

    job.setMapperClass(HtMap.class);
    job.setReducerClass(HtReduce.class);
    // 此範例的輸出為 <Text,IntWritable> 因此其實可以省略宣告
    // set{Map|Reduce}Output{Key|Value}Class()
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    // InputFormat 只有一個子介面
    // FileInputFormat <-(SequenceFileInputFormat,TextInputFormat)
    // 其中TextInputFormat 最常用 ，預設輸入為 LongWritable,Text
    // 另外HBase 則設計了一個子類別 TableInputFormat
    job.setInputFormatClass(TextInputFormat.class);
    // TAbleOutputFormat
    // 宣告此行則可使 reduce 輸出為 HBase 的table
    job.setOutputFormatClass(TableOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}

}}}

= 執行測試 =

{{{
$ /opt/hbase/bin/hbase shell
hbase(main):x:0> list
wordcount                                                                                                     
1 row(s) in 0.0240 seconds
hbase(main):x:0> scan 'wordcount'
.....
 zeller                      column=content:count, timestamp=1285674576293, value=1                           
 zero                        column=content:count, timestamp=1285674576293, value=8                           
 zero,                       column=content:count, timestamp=1285674576293, value=2                           
 zero-compressed             column=content:count, timestamp=1285674576293, value=1              
.....
hbase(main):x:0> exit
}}}