Changeset 9 for sample


Ignore:
Timestamp:
Jun 13, 2008, 5:45:02 PM (17 years ago)
Author:
waue
Message:

comment

Location:
sample
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • sample/HBaseRecord.java

    r8 r9  
    1 /*
    2  *  NCHC Hbase with map reduce sample code
    3  *  DemoHBaseSlink.java
     1/**
     2 * Program: HBaseRecord.java
     3 * Editor: Waue Chen
     4 * From :  NCHC. Taiwn
     5 * Last Update Date: 06/01/2008
    46 */
    57
    6 package tw.org.nchc.code;
    7 
    8 import java.io.IOException;
    9 import java.util.Iterator;
    10 
    11 import org.apache.hadoop.fs.Path;
    12 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    13 import org.apache.hadoop.hbase.mapred.TableReduce;
    14 import org.apache.hadoop.io.LongWritable;
    15 import org.apache.hadoop.io.MapWritable;
    16 import org.apache.hadoop.io.Text;
    17 import org.apache.hadoop.mapred.JobClient;
    18 import org.apache.hadoop.mapred.JobConf;
    19 import org.apache.hadoop.mapred.OutputCollector;
    20 import org.apache.hadoop.mapred.Reporter;
    21 import org.apache.hadoop.mapred.lib.IdentityMapper;
    22 import org.apache.hadoop.mapred.lib.IdentityReducer;
    23 
    248/**
    25  * This sample code will put the indicate data to Hbase.
    26  * 1. put test.txt in t1 directory which content is
    27 ---------------
    28 name:locate:years
    29 waue:taiwan:1981
    30 shellon:taiwan:1981
    31 ---------------
    32  * 2. hadoop_root/$ bin/hadoop dfs -put t1 t1
    33  * 3. hbase_root/$ bin/hbase shell
    34  * 4. hql > create table t1_table("person");
    35  * 5. Come to Eclipse and run this code, and we will let database as that
    36  t1_table -> person
    37   ----------------
    38   |  name | locate | years |
    39   ----------------
    40   | waue  | taiwan | 1981 |
    41   ----------------
    42   | shellon | taiwan | 1981 |
    43   * 6. Go to hbase console, type : hql > select * from t1_table;
    44      
     9 * Purpose :
     10 *  Parse your record and then store in HBase.
     11 *
     12 * HowToUse :
     13 *  Make sure Hadoop file system and Hbase are running correctly.
     14 *  1. put test.txt in t1 directory which content is
     15  ---------------
     16  name:locate:years
     17  waue:taiwan:1981
     18  shellon:taiwan:1981
     19  ---------------
     20 *  2. hadoop_root/$ bin/hadoop dfs -put t1 t1
     21 *  3. hbase_root/$ bin/hbase shell
     22 *  4. hql > create table t1_table("person");
     23 *  5. Come to Eclipse and run this code, and we will let database as that
     24  t1_table -> person
     25    ----------------
     26    |  name | locate | years |
     27    | waue  | taiwan | 1981 |
     28    | shellon | taiwan | 1981 |
     29    ----------------
     30 * Check Result:
     31 *  Go to hbase console, type :
     32 *    hql > select * from t1_table;
    453308/06/06 12:20:48 INFO hbase.HTable: Creating scanner over t1_table starting at key
    4634+-------------------------+-------------------------+-------------------------+
     
    6654+-------------------------+-------------------------+-------------------------+
    67553 row(s) in set. (0.04 sec)
    68  **/
     56 */
     57
     58
     59
     60
     61package tw.org.nchc.code;
     62
     63import java.io.IOException;
     64import java.util.Iterator;
     65
     66import org.apache.hadoop.fs.Path;
     67import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
     68import org.apache.hadoop.hbase.mapred.TableReduce;
     69import org.apache.hadoop.io.LongWritable;
     70import org.apache.hadoop.io.MapWritable;
     71import org.apache.hadoop.io.Text;
     72import org.apache.hadoop.mapred.JobClient;
     73import org.apache.hadoop.mapred.JobConf;
     74import org.apache.hadoop.mapred.OutputCollector;
     75import org.apache.hadoop.mapred.Reporter;
     76import org.apache.hadoop.mapred.lib.IdentityMapper;
     77import org.apache.hadoop.mapred.lib.IdentityReducer;
     78
     79
    6980public class HBaseRecord {
    7081
    7182  /* Denify parameter */
    7283  // one column family: person; three column qualifier: name,locate,years
    73   static private String  baseId1 = "person:name";
     84  static private String  baseId1 ="person:name";
    7485  static private String  baseId2 ="person:locate";
    7586  static private String  baseId3 ="person:years";
     
    131142  public static void main(String[] args) throws IOException {
    132143    // which path of input files in Hadoop file system 
    133 
    134144   
    135145    HBaseRecord setup = new HBaseRecord();
  • sample/WordCount.java

    r8 r9  
    1 /*
    2  * map reduce sample code
     1/**
     2 * Program: WordCount.java
     3 * Editor: Waue Chen
     4 * From :  NCHC. Taiwn
     5 * Last Update Date: 06/13/2008
     6 */
     7
     8/**
     9 * Purpose :
     10 *  Store the result of WordCount.java from Hbase to Hadoop file system
     11 *
     12 * HowToUse :
     13 *  Make sure Hadoop file system is running correctly.
     14 *  Put text file on the directory "/local_src/input"
     15 *  You can use the instruction to upload "/local_src/input" to HDFS input dir
     16 *    $ bin/hadoop dfs -put /local_src/input input
     17 *  Then modify the $filepath parameter in construtor to be correct and run this code.
     18 * 
     19 *
     20 * Check Result:
     21 *  inspect http://localhost:50070 by web explorer
    322 */
    423package tw.org.nchc.code;
     
    2342
    2443public class WordCount {
    25 
     44  private String filepath;
     45  private String outputPath;
     46 
     47  public WordCount(){
     48    filepath = "/user/waue/input/";
     49    outputPath = "counts1";
     50  }
     51  public WordCount(String path,String output){
     52    filepath = path;
     53    outputPath = output;
     54  }
    2655  // mapper: emits (token, 1) for every word occurrence
    2756  private static class MapClass extends MapReduceBase implements
     
    6998   */
    7099  public static void main(String[] args) throws IOException {
    71     String filename = "/user/waue/input/";
    72     String outputPath = "sample-counts";
    73     int mapTasks = 20;
     100    WordCount wc = new WordCount();
     101   
     102    int mapTasks = 1;
    74103    int reduceTasks = 1;
    75 
    76104    JobConf conf = new JobConf(WordCount.class);
    77105    conf.setJobName("wordcount");
     
    80108    conf.setNumReduceTasks(reduceTasks);
    81109
    82     conf.setInputPath(new Path(filename));
     110    conf.setInputPath(new Path(wc.filepath));
    83111    conf.setOutputKeyClass(Text.class);
    84112    conf.setOutputValueClass(IntWritable.class);
    85     conf.setOutputPath(new Path(outputPath));
     113    conf.setOutputPath(new Path(wc.outputPath));
    86114
    87115    conf.setMapperClass(MapClass.class);
     
    90118   
    91119    // Delete the output directory if it exists already
    92     Path outputDir = new Path(outputPath);
     120    Path outputDir = new Path(wc.outputPath);
    93121    FileSystem.get(conf).delete(outputDir);
    94122
  • sample/WordCountFromHBase.java

    r8 r9  
    33 * Editor: Waue Chen
    44 * From :  NCHC. Taiwn
    5  * Last Update Date: 06/10/2008
     5 * Last Update Date: 06/13/2008
    66 */
    77
    88/**
    99 * Purpose :
    10  *  Store the result of WordCountIntoHbase.java from Hbase to Hadoop file system
     10 *  Word counting from Hbase then store result in Hadoop file system
    1111 *
    1212 * HowToUse :
    13  *  Make sure Hadoop file system and HBase are running correctly.
    14  *  Then run the program with BuildHTable.java after \
    15  *  modifying these setup parameters.
     13 *  Make sure Hadoop file system are running and HBase has correct data.
     14 *  Suggest to run WordCountIntoHBase first.
     15 *  finally, modify these setup parameters and run.
    1616 *
    1717 * Check Result:
    18  *  inspect http://localhost:60070 by web explorer
     18 * 
     19 *  inspect http://localhost:50070 by web explorer
    1920 */
    2021
     
    8081      String line = Text.decode( ((ImmutableBytesWritable) cols.get(textcol) )
    8182          .get() );
     83     
    8284      //let us know what is "line"
    8385      /*
     
    9092      // the result is the contents of merged files "
    9193     
     94      //StringTokenizer will divide a line into a word 
    9295      StringTokenizer itr = new StringTokenizer(line);
    9396      // set every word as one
    9497      while (itr.hasMoreTokens()) {
    95         word.set(itr.nextToken());       
     98        // nextToken will return this value in String and point to next \
     99        // Text.set() = Set to contain the contents of a string.
     100        word.set(itr.nextToken()); 
     101        // OutputCollector.collect = collect(K key, V value) \
     102        //  Adds a key/value pair to the output.
    96103        output.collect(word, one);
    97104      }
     
    105112    // reuse objects
    106113    private final static IntWritable SumValue = new IntWritable();
    107 
     114   
     115    // this sample's reduce() format is the same as map() \
     116    //  reduce is a method waiting for implement \
     117    //  four type in this sample is (Text , Iterator<IntWritable>, \
     118    //    OutputCollector<Text, IntWritable> , Reporter ) ;
    108119    public void reduce(Text key, Iterator<IntWritable> values,
    109120        OutputCollector<Text, IntWritable> output, Reporter reporter)
    110121        throws IOException {
    111       // sum up values
     122      // sum up value
    112123      int sum = 0;
    113       while (values.hasNext()) {
    114         sum += values.next().get();
     124      // "key" is word , "value" is sum
     125      // why values.hasNext(), not key.hasNext()
     126      while (values.hasNext()) {
     127        // next() will return this value and pointer to next event \
     128        //  IntWritable.get() will transfer IntWritable to Int
     129        sum += values.next().get();
    115130      }
     131      // IntWritable.set(int) will transfer Int to IntWritable
    116132      SumValue.set(sum);
     133      // hense we set outputPath in main, the output.collect will put
     134      //  data in Hadoop
    117135      output.collect(key, SumValue);
    118136    }
Note: See TracChangeset for help on using the changeset viewer.