source: sample/HBaseRecord2.java @ 10

Last change on this file since 10 was 10, checked in by waue, 16 years ago

auto parse

File size: 6.7 KB
Line 
1/**
2 * Program: HBaseRecord.java
3 * Editor: Waue Chen
4 * From :  NCHC. Taiwn
5 * Last Update Date: 06/13/2008
6 */
7
8/**
9 * Purpose :
10 *  1.Auto generate HTable
11 *  2.Parse your record and then store in HBase.
12 *
13 * HowToUse :
14 *  Make sure Hadoop file system and Hbase are running correctly.
15 *  1. put test.txt in t1 directory which content is
16  ---------------
17  name:locate:years
18  waue:taiwan:1981
19  shellon:taiwan:1981
20  ---------------
21 *  2. hadoop_root/$ bin/hadoop dfs -put t1 t1
22 *  3. hbase_root/$ bin/hbase shell
23 *  4. hql > create table t1_table("person");
24 *  5. Come to Eclipse and run this code, and we will let database as that
25  t1_table -> person
26    ----------------
27    |  name | locate | years |
28    | waue  | taiwan | 1981 |
29    | shellon | taiwan | 1981 |
30    ----------------
31 * Check Result:
32 *  Go to hbase console, type :
33 *    hql > select * from t1_table;
3408/06/06 12:20:48 INFO hbase.HTable: Creating scanner over t1_table starting at key
35+-------------------------+-------------------------+-------------------------+
36| Row                     | Column                  | Cell                    |
37+-------------------------+-------------------------+-------------------------+
38| 0                       | person:locate           | locate                  |
39+-------------------------+-------------------------+-------------------------+
40| 0                       | person:name             | name                    |
41+-------------------------+-------------------------+-------------------------+
42| 0                       | person:years            | years                   |
43+-------------------------+-------------------------+-------------------------+
44| 19                      | person:locate           | taiwan                  |
45+-------------------------+-------------------------+-------------------------+
46| 19                      | person:name             | waue                    |
47+-------------------------+-------------------------+-------------------------+
48| 19                      | person:years            | 1981                    |
49+-------------------------+-------------------------+-------------------------+
50| 36                      | person:locate           | taiwan                  |
51+-------------------------+-------------------------+-------------------------+
52| 36                      | person:name             | shellon                 |
53+-------------------------+-------------------------+-------------------------+
54| 36                      | person:years            | 1981                    |
55+-------------------------+-------------------------+-------------------------+
563 row(s) in set. (0.04 sec)
57 */
58
59
60
61
62package tw.org.nchc.code;
63
64import java.io.IOException;
65import java.util.Iterator;
66
67import org.apache.hadoop.fs.Path;
68import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
69import org.apache.hadoop.hbase.mapred.TableReduce;
70import org.apache.hadoop.io.LongWritable;
71import org.apache.hadoop.io.MapWritable;
72import org.apache.hadoop.io.Text;
73import org.apache.hadoop.mapred.JobClient;
74import org.apache.hadoop.mapred.JobConf;
75import org.apache.hadoop.mapred.OutputCollector;
76import org.apache.hadoop.mapred.Reporter;
77import org.apache.hadoop.mapred.lib.IdentityMapper;
78import org.apache.hadoop.mapred.lib.IdentityReducer;
79
80
81public class HBaseRecord2 {
82
83  /* Denify parameter */
84  // one column family: person; three column qualifier: name,locate,years
85  final String colstr;
86  // Hbase table name
87  static String[] col;
88  String Table_Name = "Record1";
89  //split character
90  static String sp = ":";
91  // file path in hadoop file system (not phisical file system)
92  String file_path = "/user/waue/t1";
93
94
95
96  public HBaseRecord2(){
97    colstr ="person:name,locate,years";
98  }
99  public HBaseRecord2(String str){
100    colstr = str; 
101  }
102
103 
104  private static class ReduceClass extends TableReduce<LongWritable, Text> {
105
106    // Column id is created dymanically,
107    private static final Text col_name = new Text(baseId1);
108    private static final Text col_local = new Text(baseId2);
109    private static final Text col_year = new Text(baseId3);
110   
111    // this map holds the columns per row
112    private MapWritable map = new MapWritable(); 
113   
114    // on this sample, map is nonuse, we use reduce to handle
115    public void reduce(LongWritable key, Iterator<Text> values,
116        OutputCollector<Text, MapWritable> output, Reporter reporter)
117        throws IOException {
118
119      // values.next().getByte() can get value and transfer to byte form, there is an other way that let decode()
120      // to substitude getByte()
121      String stro = new String(values.next().getBytes());
122      String str[] = stro.split(sp);
123      byte b_local[] = str[0].getBytes();
124      byte b_name[] = str[1].getBytes();
125      byte b_year[] = str[2].getBytes();
126     
127      // contents must be ImmutableBytesWritable
128      ImmutableBytesWritable w_local = new ImmutableBytesWritable( b_local);
129      ImmutableBytesWritable w_name = new ImmutableBytesWritable( b_name );
130      ImmutableBytesWritable w_year = new ImmutableBytesWritable( b_year );
131
132      // populate the current row
133      map.clear();
134      map.put(col_name, w_local);
135      map.put(col_local, w_name);
136      map.put(col_year, w_year);
137
138      // add the row with the key as the row id
139      output.collect(new Text(key.toString()), map);
140    }
141  }
142
143  /**
144   * Runs the demo.
145   */
146  public static void main(String[] args) throws IOException {
147    // parse colstr to split column family and column qualify
148    HBaseRecord2 work = new HBaseRecord2();
149   
150    String tmp[] = work.colstr.split(":");
151    String Column_Family = tmp[0]+":";
152    String CF[] = {Column_Family};
153    String CQ[] = tmp[2].split(",");
154    // check whether create table or not , we don't admit \
155    // the same name but different structure
156   
157    BuildHTable build_table = new BuildHTable(work.Table_Name,CF);
158    if (!build_table.checkTableExist(work.Table_Name)) {
159      if (!build_table.createTable()) {
160        System.out.println("create table error !");
161      }
162    }else{
163      System.out.println("Table \"" + work.Table_Name +"\" has already existed !");
164    }   
165
166    JobConf conf = new JobConf(HBaseRecord2.class);
167    int mapTasks = 1;
168    int reduceTasks = 1;
169    //Job name; you can modify to any you like 
170    conf.setJobName("NCHC_PersonDataBase");
171
172    // Hbase table name must be correct , in our profile is t1_table
173    TableReduce.initJob(work.Table_Name, ReduceClass.class, conf);
174   
175    // below are map-reduce profile
176    conf.setNumMapTasks(mapTasks);
177    conf.setNumReduceTasks(reduceTasks);
178    conf.setInputPath(new Path(work.file_path));
179    conf.setMapperClass(IdentityMapper.class);
180    conf.setCombinerClass(IdentityReducer.class);
181    conf.setReducerClass(ReduceClass.class);
182    JobClient.runJob(conf);
183  }
184}
Note: See TracBrowser for help on using the repository browser.