hbase 程式範例
範例一
package hbase020; // WordCountHBase //說明: // 此程式碼將輸入路徑的檔案內的字串取出做字數統計 // 再將結果塞回HTable內 // //運算方法: // 將此程式運作在hadoop 0.20 平台上,用(參考2)的方法加入hbase參數後,將此程式碼打包成XX.jar // 執行: // --------------------------- // hadoop jar XX.jar WordCountHBase <hdfs_input> // --------------------------- // //結果: // --------------------------- // $ hbase shell // > scan 'wordcount' // ROW COLUMN+CELL // am column=content:count, timestamp=1264406245488, value=1 // chen column=content:count, timestamp=1264406245488, value=1 // hi, column=content:count, timestamp=1264406245488, value=2 // ......(略) // --------------------------- //注意: //1. 在hdfs 上來源檔案的路徑為 "/user/$YOUR_NAME/input" // 請注意必須先放資料到此hdfs上的資料夾內,且此資料夾內只能放檔案,不可再放資料夾 //2. 運算完後,程式將執行結果放在hbase的wordcount資料表內 // //參考: // 1.程式碼改編於: http://blog.ring.idv.tw/comment.ser?i=337 // 2.hbase 運作 mapreduce 程式的方法參考於:http://wiki.apache.org/hadoop/Hbase/MapReduce import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; public class WordCountHBase { public static class HtMap extends Mapper<LongWritable, Text, Text, IntWritable> { private IntWritable one = new IntWritable(1); public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 輸入的字串先轉換成小寫再用空白區隔 String s[] = value.toString().toLowerCase().trim().split(" "); for (String m : s) { // 寫入到輸出串流 context.write(new Text(m), one); } } } // TableReducer<KEYIN,VALUEIN,KEYOUT> // 原本為 TableReducer<Text, IntWritable, NullWritable > // 但在此改成 LongWritable 也可以 // 因此證明在此的Class可以很多,org.apache.hadoop.io.* 內有write()的Writable class應該皆可 public static class HtReduce extends TableReducer<Text, IntWritable,LongWritable > { public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable i : values) { sum += i.get(); } // org.apache.hadoop.hbase.client.Put // Used to perform Put operations for a single row. // new Put(byte[] row) Put put = new Put(Bytes.toBytes(key.toString())); // add(byte[] family, byte[] qualifier, byte[] value) // 在main設定output format class 為 TableOutputFormat // TableReducer 內有定義 output Key class 必須為 Put 或 Delete put.add(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes .toBytes(String.valueOf(sum))); //NullWritable.get(): Returns the single instance of this class. //NullWritable.write(): Serialize the fields of this object to out. context.write(new LongWritable(), put); // context.write(NullWritable.get(), put) } } public static void createHBaseTable(String tablename) throws IOException { // HTableDescriptor contains the name of an HTable, and its column // families // HTableDescriptor 用來描述table的屬性 HTableDescriptor htd = new HTableDescriptor(tablename); // HColumnDescriptor HColumnDescriptor contains information about a // column family such as the number of versions, compression settings, // etc. // HTableDescriptor 透過 add() 方法來加入Column family htd.addFamily(new HColumnDescriptor("content:")); // HBaseConfiguration 能接收 hbase-site.xml 的設定值 HBaseConfiguration config = new HBaseConfiguration(); // 檔案的操作則使用 HBaseAdmin HBaseAdmin admin = new HBaseAdmin(config); // 檢查 if (admin.tableExists(tablename)) { // 停止 admin.disableTable(tablename); // 刪除 admin.deleteTable(tablename); } System.out.println("create new table: " + tablename); // 建立 admin.createTable(htd); } public static void main(String args[]) throws Exception { // debug String[] argv = { "/user/waue/input" }; args = argv; String input = args[0]; String tablename = "wordcount"; Configuration conf = new Configuration(); // OUTPUT_TABLE = "hbase.mapred.outputtable" // conf.set 用於設定 如 core-site.xml 的 name 與 value // 告訴程式 hbase.mapred.outputtable --> wordcount conf.set(TableOutputFormat.OUTPUT_TABLE, tablename); // 建立hbase 的table 否則沒先建立會出錯 createHBaseTable(tablename); Job job = new Job(conf, "WordCount table with " + input); job.setJarByClass(WordCountHBase.class); job.setNumReduceTasks(1); job.setMapperClass(HtMap.class); job.setReducerClass(HtReduce.class); // 此範例的輸出為 <Text,IntWritable> 因此其實可以省略宣告 // set{Map|Reduce}Output{Key|Value}Class() job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // InputFormat 只有一個子介面 // FileInputFormat <-(SequenceFileInputFormat,TextInputFormat) // 其中TextInputFormat 最常用 ,預設輸入為 LongWritable,Text // 另外HBase 則設計了一個子類別 TableInputFormat job.setInputFormatClass(TextInputFormat.class); // TAbleOutputFormat // 宣告此行則可使 reduce 輸出為 HBase 的table job.setOutputFormatClass(TableOutputFormat.class); // 原本設定輸入檔案為 Config.setInputPath(Path) 卻改為 // FileInputFormat.addInputPath(Job, Path()) 的設計, // 猜測應該是考慮某些檔案操作並不需要跑mapreduce的Job,因此提到外面 FileInputFormat.addInputPath(job, new Path(input)); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
範例二
package hbase020; // 說明: // 此程式首先建立table, 然後put一個資料後 // 用get 方法取得資料,最後用scan方法取出整個資料表 // // 參考: 程式碼改編自 // http://hadoop.apache.org/hbase/docs/current/api/org/apache/.接. // hadoop/hbase/client/package-summary.html#package_description import java.io.IOException; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; // Class that has nothing but a main. // Does a Put, Get and a Scan against an hbase table. public class MyLittleHBaseClient { private static String table_name = "mytable"; private static String the_row = "myLittleRow"; private static String column_family = "family"; private static String column_qualifier = "someQualifier"; private static String cell_value = "somvalue"; public static void createHBaseTable(String tablename) throws IOException { // HTableDescriptor contains the name of an HTable, and its column // families // HTableDescriptor 用來描述table的屬性 HTableDescriptor htd = new HTableDescriptor(tablename); // HColumnDescriptor HColumnDescriptor contains information about a // column family such as the number of versions, compression settings, // etc. // HTableDescriptor 透過 add() 方法來加入Column family htd.addFamily(new HColumnDescriptor(column_family+":")); // HBaseConfiguration 能接收 hbase-site.xml 的設定值 HBaseConfiguration config = new HBaseConfiguration(); // 檔案的操作則使用 HBaseAdmin HBaseAdmin admin = new HBaseAdmin(config); // 檢查 if (admin.tableExists(tablename)) { // 停止 admin.disableTable(tablename); // 刪除 admin.deleteTable(tablename); } System.out.println("create new table: " + tablename); // 建立 admin.createTable(htd); } public static void main(String[] args) throws IOException { // You need a configuration object to tell the client where to connect. // When you create a HBaseConfiguration, it reads in whatever you've set // into your hbase-site.xml and in hbase-default.xml, as long as these // can be found on the CLASSPATH HBaseConfiguration config = new HBaseConfiguration(); // create table automatically createHBaseTable(table_name); // This instantiates an HTable object that connects you to // the "myLittleHBaseTable" table. HTable table = new HTable(config, table_name); // To add to a row, use Put. A Put constructor takes the name of the row // you want to insert into as a byte array. In HBase, the Bytes class // has // utility for converting all kinds of java types to byte arrays. In the // below, we are converting the String "myLittleRow" into a byte array // to // use as a row key for our update. Once you have a Put instance, you // can // adorn it by setting the names of columns you want to update on the // row, // the timestamp to use in your update, etc.If no timestamp, the server // applies current time to the edits. Put p = new Put(Bytes.toBytes(the_row)); // To set the value you'd like to update in the row 'myRow', specify the // column family, column qualifier, and value of the table cell you'd // like // to update. The column family must already exist in your table schema. // The qualifier can be anything. All must be specified as byte arrays // as // hbase is all about byte arrays. Lets pretend the table // 'myLittleHBaseTable' was created with a family 'myLittleFamily'. p.add(Bytes.toBytes(column_family), Bytes.toBytes(column_qualifier), Bytes.toBytes(cell_value)); // Once you've adorned your Put instance with all the updates you want // to // make, to commit it do the following (The HTable#put method takes the // Put instance you've been building and pushes the changes you made // into // hbase) table.put(p); // Now, to retrieve the data we just wrote. The values that come back // are // Result instances. Generally, a Result is an object that will package // up // the hbase return into the form you find most palatable. Get g = new Get(Bytes.toBytes(the_row)); Result r = table.get(g); byte[] value = r.getValue(Bytes.toBytes(column_family), Bytes .toBytes(column_qualifier)); // If we convert the value bytes, we should get back 'Some Value', the // value we inserted at this location. String valueStr = Bytes.toString(value); System.out.println("GET: " + valueStr); // Sometimes, you won't know the row you're looking for. In this case, // you // use a Scanner. This will give you cursor-like interface to the // contents // of the table. To set up a Scanner, do like you did above making a Put // and a Get, create a Scan. Adorn it with column names, etc. Scan s = new Scan(); s.addColumn(Bytes.toBytes(column_family), Bytes .toBytes(column_qualifier)); ResultScanner scanner = table.getScanner(s); try { // Scanners return Result instances. // Now, for the actual iteration. One way is to use a while loop // like so: for (Result rr : scanner) { // print out the row we found and the columns we were looking // for System.out.println("Found row: " + rr); } // The other approach is to use a foreach loop. Scanners are // iterable! // for (Result rr : scanner) { // System.out.println("Found row: " + rr); // } } finally { // Make sure you close your scanners when you are done! // Thats why we have it inside a try/finally clause scanner.close(); } } }
Last modified 15 years ago
Last modified on Jan 25, 2010, 5:13:29 PM