Ignore:
Timestamp:
Jun 13, 2008, 5:45:02 PM (16 years ago)
Author:
waue
Message:

comment

File:
1 edited

Legend:

Unmodified
Added
Removed
  • sample/WordCountFromHBase.java

    r8 r9  
    33 * Editor: Waue Chen
    44 * From :  NCHC. Taiwn
    5  * Last Update Date: 06/10/2008
     5 * Last Update Date: 06/13/2008
    66 */
    77
    88/**
    99 * Purpose :
    10  *  Store the result of WordCountIntoHbase.java from Hbase to Hadoop file system
     10 *  Word counting from Hbase then store result in Hadoop file system
    1111 *
    1212 * HowToUse :
    13  *  Make sure Hadoop file system and HBase are running correctly.
    14  *  Then run the program with BuildHTable.java after \
    15  *  modifying these setup parameters.
     13 *  Make sure Hadoop file system are running and HBase has correct data.
     14 *  Suggest to run WordCountIntoHBase first.
     15 *  finally, modify these setup parameters and run.
    1616 *
    1717 * Check Result:
    18  *  inspect http://localhost:60070 by web explorer
     18 * 
     19 *  inspect http://localhost:50070 by web explorer
    1920 */
    2021
     
    8081      String line = Text.decode( ((ImmutableBytesWritable) cols.get(textcol) )
    8182          .get() );
     83     
    8284      //let us know what is "line"
    8385      /*
     
    9092      // the result is the contents of merged files "
    9193     
     94      //StringTokenizer will divide a line into a word 
    9295      StringTokenizer itr = new StringTokenizer(line);
    9396      // set every word as one
    9497      while (itr.hasMoreTokens()) {
    95         word.set(itr.nextToken());       
     98        // nextToken will return this value in String and point to next \
     99        // Text.set() = Set to contain the contents of a string.
     100        word.set(itr.nextToken()); 
     101        // OutputCollector.collect = collect(K key, V value) \
     102        //  Adds a key/value pair to the output.
    96103        output.collect(word, one);
    97104      }
     
    105112    // reuse objects
    106113    private final static IntWritable SumValue = new IntWritable();
    107 
     114   
     115    // this sample's reduce() format is the same as map() \
     116    //  reduce is a method waiting for implement \
     117    //  four type in this sample is (Text , Iterator<IntWritable>, \
     118    //    OutputCollector<Text, IntWritable> , Reporter ) ;
    108119    public void reduce(Text key, Iterator<IntWritable> values,
    109120        OutputCollector<Text, IntWritable> output, Reporter reporter)
    110121        throws IOException {
    111       // sum up values
     122      // sum up value
    112123      int sum = 0;
    113       while (values.hasNext()) {
    114         sum += values.next().get();
     124      // "key" is word , "value" is sum
     125      // why values.hasNext(), not key.hasNext()
     126      while (values.hasNext()) {
     127        // next() will return this value and pointer to next event \
     128        //  IntWritable.get() will transfer IntWritable to Int
     129        sum += values.next().get();
    115130      }
     131      // IntWritable.set(int) will transfer Int to IntWritable
    116132      SumValue.set(sum);
     133      // hense we set outputPath in main, the output.collect will put
     134      //  data in Hadoop
    117135      output.collect(key, SumValue);
    118136    }
Note: See TracChangeset for help on using the changeset viewer.