wiki:waue/2010/0402

Version 3 (modified by waue, 14 years ago) (diff)

--

hadoop + Hbase + thrift + php
程式碼解析

零、前言

  • thrift 是透過非java的其他程式語言,直接對hbase 進行存取的中介函式庫
  • 此篇介紹的是如何用php透過 thrift 對 hbase 操作
hadoop /opt/hadoop
hbase /opt/hbase
網頁根目錄 /var/www/
hbase 的php碼目錄 /var/www/hbase
thrift php /var/www/hbase/thrift
  • 測試程式之前,請先確定
    • hbase , hadoop 都有正常運作中
    • $ bin/hbase thrift start 尚在執行

一、php引用thrift lib

<?
$GLOBALS['THRIFT_ROOT'] = '/var/www/hbase/thrift';

require_once( $GLOBALS['THRIFT_ROOT'].'/Thrift.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/transport/TSocket.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/transport/TBufferedTransport.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/protocol/TBinaryProtocol.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/packages/Hbase/Hbase.php' );

$socket = new TSocket( 'secuse.nchc.org.tw', 9090 );
$socket->setSendTimeout( 10000 ); // Ten seconds (too long for production, but this is just a demo ;)
$socket->setRecvTimeout( 20000 ); // Twenty seconds
$transport = new TBufferedTransport( $socket );
$protocol = new TBinaryProtocol( $transport );
$client = new HbaseClient( $protocol );
$transport->open();
?>
........
其他html 碼
<?
或是 下面提到的各式讀寫操作

?>
.......
<?
$transport->close();
?>
  • 所有的程式碼都必須包含這些引入函式庫、開啟關閉socket 的敘述

二、各種對hbase的操作

2.1 列出hbase 裡的所有 table

!#php
echo( "listing tables...\n" );
$tables = $client->getTableNames();
sort( $tables );
foreach ( $tables as $name ) {
  echo( "  found: {$name}\n" );
}
}

2.2 刪除table

!#php
$name = "hbase table name";
if ($client->isTableEnabled( $name )) {
      echo( "    disabling table: {$name}\n");
      $client->disableTable( $name );
}
echo( "    deleting table: {$name}\n" );
    $client->deleteTable( $name );
}

2.3 新增table

  • 我們先定義columns 的物件結構如下
    !#php
    $columns = array(
      new ColumnDescriptor( array(
        'name' => 'entry:',
        'maxVersions' => 10
      ) ),
      new ColumnDescriptor( array(
        'name' => 'unused:'
      ) )
    );
    
  • 將剛剛的column 放到table 內
    !#php
    $t = "table name";
    echo( "creating table: {$t}\n" );
    
    try {
      $client->createTable( $t, $columns );
    } catch ( AlreadyExists $ae ) {
      echo( "WARN: {$ae->message}\n" );
    }
    
    

2.4 列出 table內的家族成員 family

!#php
$t = "table name";
echo( "column families in {$t}:\n" );

$descriptors = $client->getColumnDescriptors( $t );
asort( $descriptors );
foreach ( $descriptors as $col ) {
  echo( "  column: {$col->name}, maxVer: {$col->maxVersions}\n" );
}

2.5 寫入資料

!#php
$t = "table name";
$row = "row name"
$valid = "foobar-\xE7\x94\x9F\xE3\x83\x93";

$mutations = array(
  new Mutation( array(
    'column' => 'entry:foo',
    'value' => $valid
  ) ),
);

$client->mutateRow( $t, $row, $mutations );

2.6 讀取資料

get 取得一個 column value

  • get 取得一個 column value 的用法

$table_name = 't1'; $row_name = '1'; $fam_col_name = 'f1:c1';

$arr = $client->get($table_name, $row_name , $fam_col_name); $arr = array foreach ( $arr as $k=>$v ) {

$k = TCell echo ("value = {$v->value} , <br> "); echo ("timestamp = {$v->timestamp} <br>");

}

}

getRow 取得一整個row

  • getRow($tableName, $row) 用法
    $table_name = "table name";
    $row_name = "row name";
    
    $arr = $client->getRow($table_name, $row_name);
    // $client->getRow return a array
    foreach ( $arr as $k=>$TRowResult  ) {
        // $k = 0 ; non-use
        // $TRowResult = TRowResult
        printTRowResult($TRowResult);
    }
    

scan 一整個table

!#php
$table_name = 't1';
$start_row = ""; // 從row 的起點開始
$family = array( "f1","f2","f3" );


$scanner = $client->scannerOpen( $table_name, $start_row , $family );
// $scanner 是一個遞增數字 for open socket
// scannerGet() 一次只抓一row,因此要用while迴圈不斷地抓
while (true ){
		$get_arr = $client->scannerGet( $scanner );
		// get_arr is an array

		if($get_arr == null) break;
		// 沒有回傳值代表已經沒有資料可抓,跳脫此無限迴圈

		foreach ( $get_arr as $TRowResult ){
			// $TRowResult = TRowResult
			echo ("	row = {$TRowResult->row} ; <br> ");
			$column = $TRowResult->columns;
			foreach ($column as $family_column=>$Tcell){
					echo ("family:column = $family_column ");
					// $family_column = family_column
					// $Tcell = Tcell
					echo ("	value = {$Tcell->value} ");
					echo (" timestamp = {$Tcell->timestamp}  <br>");
			}
		}
}
echo( "<br> ----------------- " );
echo( "<br> Scanner finished <br>" );
$client->scannerClose( $scanner );