wiki:waue/2010/0128

Version 2 (modified by waue, 14 years ago) (diff)

--

HBase Indexed Table
Using HBase TableIndexed from Thrift with unique keys

http://kdpeterson.net/blog/2009/09/using-hbase-tableindexed-from-thrift-with-unique-keys.html

HBase is primarily a sorted distributed hash map, but it does support secondary keys through a contrib package called Transactional HBase. The secondary keys are provided by a component called TableIndexed?.

http://mail-archives.apache.org/mod_mbox/hadoop-hbase-user/200908.mbox/%3C73d592f60908180339j3231f7f5w688260109ab3463@mail.gmail.com%3E

> If you're using the default key generator
> (org.apache.hadoop.hbase.client.tableindexed.SimpleIndexKeyGenerator),
> it actually appends the base table row key for you.  So even though
> the column value may be the same for multiple rows, the secondary
> index table will still have 1 row for each row with the value in the
> original table.  Here is relevant method from SimpleIndexKeyGenerator:
>
>  public byte[] createIndexKey(byte[] rowKey, Map<byte[], byte[]> columns) {
>    return Bytes.add(columns.get(column), rowKey);
>  }
>
> So, say you have a table "mytable", with the columns:
>    info:keycol       (say this is the one you want to index)
>    info:col2
>    info:col3
>
> If you define your table with the index specification -- new
> IndexSpecification("keycol", Bytes.toBytes("info:keycol")) -- then
> HBase will create the secondary index table named "mytable-by_keycol".
>
> Then, say you add the following rows to "mytable":
>
> "row1":  info:keycol="one", info:col2="abc", info:col3="def"
> "row2":  info:keycol="one", info:col2="ghi", info:col3="jkl"
>
> At this point, your index table ("mytable-by_keycol") will have the
> following rows:
>
> "onerow1": info:keycol="one", __INDEX__:ROW="row1"
> "onerow2": info:keycol="one", __INDEX__:ROW="row2"
>
> So you wind up with 2 rows in the index table (with unique row keys)
> pointing back at the original table rows, even though we've only
> stored a single distinct value for info:keycol.
>
> To access the rows by the secondary index to create a scanner using
> IndexedTable.getIndexedScanner(...).  I don't think there's support
> for using the indexes when performing a random read with
> HTable.getRow()/HTable.get().  (But maybe I'm wrong?)

Hadoop HBase數據導入和索引測試 http://bbs.telewiki.cn/blog/qianling/entry/hadoop_hbase%E6%95%B0%E6%8D%AE%E5%AF%BC%E5%85%A5%E5%92%8C%E7%B4%A2%E5%BC%95%E6%B5%8B%E8%AF%95

Secondary indexes in HBase

http://rajeev1982.blogspot.com/2009/06/secondary-indexes-in-hbase.html

import java.io.IOException;
import java.util.Date;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Scanner;
import org.apache.hadoop.hbase.client.tableindexed.IndexSpecification;
import org.apache.hadoop.hbase.client.tableindexed.IndexedTable;
import org.apache.hadoop.hbase.client.tableindexed.IndexedTableAdmin;
import org.apache.hadoop.hbase.filter.ColumnValueFilter;
import org.apache.hadoop.hbase.filter.ColumnValueFilter.CompareOp;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.util.Bytes;

public class SecondaryIndexTest {
    public void writeToTable() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        IndexedTable table = new IndexedTable(conf, Bytes.toBytes("test_table"));

        String row = "test_row";
        BatchUpdate update = null;

        for (int i = 0; i < 100; i++) {
            update = new BatchUpdate(row + i);
            update.put("columnfamily1:column1", Bytes.toBytes("value1-" + i));
            update.put("columnfamily1:column2", Bytes.toBytes("value2-" + i));
            table.commit(update);
        }

        table.close();
    }

    public void readAllRowsFromSecondaryIndex() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        IndexedTable table = new IndexedTable(conf, Bytes.toBytes("test_table"));

        Scanner scanner = table.getIndexedScanner("column1",
            HConstants.EMPTY_START_ROW, null, null, new byte[][] {
            Bytes.toBytes("columnfamily1:column1"),
                Bytes.toBytes("columnfamily1:column2") });

        for (RowResult rowResult : scanner) {
            System.out.println(Bytes.toString(
                rowResult.get(Bytes.toBytes("columnfamily1:column1")).getValue())
                + ", " + Bytes.toString(rowResult.get(
                Bytes.toBytes("columnfamily1:column2")).getValue()
                ));
        }

        table.close();
    }

    public void readFilteredRowsFromSecondaryIndex() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        IndexedTable table = new IndexedTable(conf, Bytes.toBytes("test_table"));

        ColumnValueFilter filter = 
            new ColumnValueFilter(Bytes.toBytes("columnfamily1:column1"), 
            CompareOp.LESS, Bytes.toBytes("value1-40"));

        Scanner scanner = table.getIndexedScanner("column1", 
            HConstants.EMPTY_START_ROW, null, filter, 
            new byte[][] { Bytes.toBytes("columnfamily1:column1"),
                Bytes.toBytes("columnfamily1:column2")
            });

        for (RowResult rowResult : scanner) {
            System.out.println(Bytes.toString(
                rowResult.get(Bytes.toBytes("columnfamily1:column1")).getValue())
                + ", " + Bytes.toString(rowResult.get(
                Bytes.toBytes("columnfamily1:column2")).getValue()
                ));
        }

        table.close();
    }

    public void createTableWithSecondaryIndexes() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        HTableDescriptor desc = new HTableDescriptor("test_table");

        desc.addFamily(new HColumnDescriptor("columnfamily1:column1"));
        desc.addFamily(new HColumnDescriptor("columnfamily1:column2"));

        desc.addIndex(new IndexSpecification("column1",
            Bytes.toBytes("columnfamily1:column1")));
        desc.addIndex(new IndexSpecification("column2",
            Bytes.toBytes("columnfamily1:column2")));

        IndexedTableAdmin admin = null;
        admin = new IndexedTableAdmin(conf);

        if (admin.tableExists(Bytes.toBytes("test_table"))) {
            if (admin.isTableEnabled("test_table")) {
                admin.disableTable(Bytes.toBytes("test_table"));
            }

            admin.deleteTable(Bytes.toBytes("test_table"));
        }

        if (admin.tableExists(Bytes.toBytes("test_table-column1"))) {
            if (admin.isTableEnabled("test_table-column1")) {
                admin.disableTable(Bytes.toBytes("test_table-column1"));
            }

            admin.deleteTable(Bytes.toBytes("test_table-column1"));
        }

        admin.createTable(desc);
    }

    public void addSecondaryIndexToExistingTable() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        IndexedTableAdmin admin = null;
        admin = new IndexedTableAdmin(conf);

        admin.addIndex(Bytes.toBytes("test_table"), 
            new IndexSpecification("column2", 
            Bytes.toBytes("columnfamily1:column2")));
    }

    public void removeSecondaryIndexToExistingTable() throws IOException {
        HBaseConfiguration conf = new HBaseConfiguration();
        conf.addResource(new Path("/opt/hbase-0.19.3/conf/hbase-site.xml"));

        IndexedTableAdmin admin = null;
        admin = new IndexedTableAdmin(conf);

        admin.removeIndex(Bytes.toBytes("test_table"), "column2");
    }

    public static void main(String[] args) throws IOException {
        SecondaryIndexTest test = new SecondaryIndexTest();

        test.createTableWithSecondaryIndexes();
        test.writeToTable();
        test.addSecondaryIndexToExistingTable();
        test.removeSecondaryIndexToExistingTable();
        test.readAllRowsFromSecondaryIndex();
        test.readFilteredRowsFromSecondaryIndex();

        System.out.println("Done!");
    }
}