source: sample/WordCountIntoHBase.java @ 8

Last change on this file since 8 was 8, checked in by waue, 16 years ago
File size: 3.7 KB
Line 
1/**
2 * Program: WordCountIntoHBase.java
3 * Editor: Waue Chen
4 * From :  NCHC. Taiwn
5 * Last Update Date: 06/10/2008
6 */
7
8/**
9 * Purpose :
10 *  Store every line from $Input_Path to HBase
11 *
12 * HowToUse :
13 *  Make sure Hadoop file system and HBase are running correctly.
14 *  Use Hadoop instruction to add input-text-files to $Input_Path.
15 *  ($ bin/hadoop dfs -put local_dir hdfs_dir)
16 *  Then run the program with BuildHTable.java after \
17 *  modifying these setup parameters.
18 *
19 * Check Result :
20 *  View the result by hbase instruction (hql> select * from $Table_Name).
21 *  Or run WordCountFromHBase.java then inspect http://localhost:60070 by web explorer;
22 */
23
24package tw.org.nchc.code;
25
26import java.io.IOException;
27import java.util.Iterator;
28
29import org.apache.hadoop.fs.Path;
30import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
31import org.apache.hadoop.hbase.mapred.TableReduce;
32import org.apache.hadoop.io.LongWritable;
33import org.apache.hadoop.io.MapWritable;
34import org.apache.hadoop.io.Text;
35import org.apache.hadoop.mapred.JobClient;
36import org.apache.hadoop.mapred.JobConf;
37import org.apache.hadoop.mapred.OutputCollector;
38import org.apache.hadoop.mapred.Reporter;
39import org.apache.hadoop.mapred.lib.IdentityMapper;
40import org.apache.hadoop.mapred.lib.IdentityReducer;
41
42public class WordCountIntoHBase {
43
44  /* setup parameters */
45  // $Input_Path. Please make sure the path is correct and contains input files
46  static final String Input_Path = "/user/waue/simple";
47  // Hbase table name, the program will create it
48  static final String Table_Name = "word_count5";
49  // column name, the program will create it
50  static final String colstr = "word:text" ;
51 
52  // constructor
53  private WordCountIntoHBase() {
54  }
55
56  private static class ReduceClass extends TableReduce<LongWritable, Text> {
57    // set (column_family:column_qualify)
58    private static final Text col = new Text(WordCountIntoHBase.colstr);
59    // this map holds the columns per row
60    private MapWritable map = new MapWritable();
61    public void reduce(LongWritable key, Iterator<Text> values,
62        OutputCollector<Text, MapWritable> output, Reporter reporter)
63        throws IOException {
64      // contents must be ImmutableBytesWritable
65      ImmutableBytesWritable bytes = 
66        new ImmutableBytesWritable(values.next().getBytes());     
67      map.clear();
68      // write data
69      map.put(col, bytes);
70      // add the row with the key as the row id
71      output.collect(new Text(key.toString()), map);
72    }
73  }
74
75  /**
76   * Runs the demo.
77   */
78  public static void main(String[] args) throws IOException { 
79    // parse colstr to split column family and column qualify
80    String tmp[] = colstr.split(":");
81    String Column_Family = tmp[0]+":";
82    String CF[] = {Column_Family};
83    // check whether create table or not , we don't admit \
84    // the same name but different structure
85    BuildHTable build_table = new BuildHTable(Table_Name,CF);
86    if (!build_table.checkTableExist(Table_Name)) {
87      if (!build_table.createTable()) {
88        System.out.println("create table error !");
89      }
90    }else{
91      System.out.println("Table \"" + Table_Name +"\" has already existed !");
92    }
93    int mapTasks = 1;
94    int reduceTasks = 1;
95    JobConf conf = new JobConf(WordCountIntoHBase.class);
96    conf.setJobName(Table_Name);
97
98    // must initialize the TableReduce before running job
99    TableReduce.initJob(Table_Name, ReduceClass.class, conf);
100    conf.setNumMapTasks(mapTasks);
101    conf.setNumReduceTasks(reduceTasks);
102    conf.setInputPath(new Path(Input_Path));
103    conf.setMapperClass(IdentityMapper.class);
104    conf.setCombinerClass(IdentityReducer.class);
105    conf.setReducerClass(ReduceClass.class);
106
107    JobClient.runJob(conf);
108  }
109}
Note: See TracBrowser for help on using the repository browser.