Context Navigation

← Previous Change
Next Change →

Changeset 9 for sample

Timestamp:

Jun 13, 2008, 5:45:02 PM (17 years ago)

Author:

waue

Message:

comment

Location:

Files:

: 3 edited

HBaseRecord.java (modified) (3 diffs)
WordCount.java (modified) (5 diffs)
WordCountFromHBase.java (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

sample/HBaseRecord.java

-                      r8
+                      r9
+/*
+ *  NCHC Hbase with map reduce sample code
+ *  DemoHBaseSlink.java
+/**
+ * Program: HBaseRecord.java
+ * Editor: Waue Chen
+ * From :  NCHC. Taiwn
+ * Last Update Date: 06/01/2008
  */
-package tw.org.nchc.code;
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.mapred.TableReduce;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.MapWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.IdentityMapper;
-import org.apache.hadoop.mapred.lib.IdentityReducer;
 /**
+ * This sample code will put the indicate data to Hbase.
+ * 1. put test.txt in t1 directory which content is
+---------------
+name:locate:years
+waue:taiwan:1981
+shellon:taiwan:1981
+---------------
+ * 2. hadoop_root/$ bin/hadoop dfs -put t1 t1
+ * 3. hbase_root/$ bin/hbase shell
+ * 4. hql > create table t1_table("person");
+ * 5. Come to Eclipse and run this code, and we will let database as that
+ t1_table -> person
+  ----------------
+  |  name | locate | years |
+  ----------------
+  | waue  | taiwan | 1981 |
+  ----------------
+  | shellon | taiwan | 1981 |
+  * 6. Go to hbase console, type : hql > select * from t1_table;
+ * Purpose :
+ *  Parse your record and then store in HBase.
+ *
+ * HowToUse :
+ *  Make sure Hadoop file system and Hbase are running correctly.
+ *  1. put test.txt in t1 directory which content is
+  ---------------
+  name:locate:years
+  waue:taiwan:1981
+  shellon:taiwan:1981
+  ---------------
+ *  2. hadoop_root/$ bin/hadoop dfs -put t1 t1
+ *  3. hbase_root/$ bin/hbase shell
+ *  4. hql > create table t1_table("person");
+ *  5. Come to Eclipse and run this code, and we will let database as that
+  t1_table -> person
+    ----------------
+    |  name | locate | years |
+    | waue  | taiwan | 1981 |
+    | shellon | taiwan | 1981 |
+    ----------------
+ * Check Result:
+ *  Go to hbase console, type :
+ *    hql > select * from t1_table;
 /06/06 12:20:48 INFO hbase.HTable: Creating scanner over t1_table starting at key
 +-------------------------+-------------------------+-------------------------+
 …
 +-------------------------+-------------------------+-------------------------+
 row(s) in set. (0.04 sec)
+ **/
+ */
+package tw.org.nchc.code;
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapred.TableReduce;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.MapWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.IdentityMapper;
+import org.apache.hadoop.mapred.lib.IdentityReducer;
 public class HBaseRecord {
   /* Denify parameter */
   // one column family: person; three column qualifier: name,locate,years
   static private String  baseId1 = "person:name";
+  static private String  baseId1 ="person:name";
   static private String  baseId2 ="person:locate";
   static private String  baseId3 ="person:years";
 …
   public static void main(String[] args) throws IOException {
     // which path of input files in Hadoop file system
     HBaseRecord setup = new HBaseRecord();

sample/WordCount.java

-                      r8
+                      r9
+/*
+ * map reduce sample code
+/**
+ * Program: WordCount.java
+ * Editor: Waue Chen
+ * From :  NCHC. Taiwn
+ * Last Update Date: 06/13/2008
+ */
+/**
+ * Purpose :
+ *  Store the result of WordCount.java from Hbase to Hadoop file system
+ *
+ * HowToUse :
+ *  Make sure Hadoop file system is running correctly.
+ *  Put text file on the directory "/local_src/input"
+ *  You can use the instruction to upload "/local_src/input" to HDFS input dir
+ *    $ bin/hadoop dfs -put /local_src/input input
+ *  Then modify the $filepath parameter in construtor to be correct and run this code.
+ *
+ *
+ * Check Result:
+ *  inspect http://localhost:50070 by web explorer
  */
 package tw.org.nchc.code;
 …
 public class WordCount {
+  private String filepath;
+  private String outputPath;
+  public WordCount(){
+    filepath = "/user/waue/input/";
+    outputPath = "counts1";
+  }
+  public WordCount(String path,String output){
+    filepath = path;
+    outputPath = output;
+  }
   // mapper: emits (token, 1) for every word occurrence
   private static class MapClass extends MapReduceBase implements
 …
    */
   public static void main(String[] args) throws IOException {
     String filename = "/user/waue/input/";
     String outputPath = "sample-counts";
     int mapTasks = 20;
+    WordCount wc = new WordCount();
+    int mapTasks = 1;
     int reduceTasks = 1;
     JobConf conf = new JobConf(WordCount.class);
     conf.setJobName("wordcount");
 …
     conf.setNumReduceTasks(reduceTasks);
     conf.setInputPath(new Path(filename));
+    conf.setInputPath(new Path(wc.filepath));
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(IntWritable.class);
     conf.setOutputPath(new Path(outputPath));
+    conf.setOutputPath(new Path(wc.outputPath));
     conf.setMapperClass(MapClass.class);
 …
     // Delete the output directory if it exists already
     Path outputDir = new Path(outputPath);
+    Path outputDir = new Path(wc.outputPath);
     FileSystem.get(conf).delete(outputDir);

sample/WordCountFromHBase.java

-                      r8
+                      r9
  * Editor: Waue Chen
  * From :  NCHC. Taiwn
  * Last Update Date: 06/10/2008
+ * Last Update Date: 06/13/2008
  */
 /**
  * Purpose :
  *  Store the result of WordCountIntoHbase.java from Hbase to Hadoop file system
+ *  Word counting from Hbase then store result in Hadoop file system
+ *
  * HowToUse :
  *  Make sure Hadoop file system and HBase are running correctly.
  *  Then run the program with BuildHTable.java after \
  *  modifying these setup parameters.
+ *  Make sure Hadoop file system are running and HBase has correct data.
+ *  Suggest to run WordCountIntoHBase first.
+ *  finally, modify these setup parameters and run.
+ *
  * Check Result:
+ *  inspect http://localhost:60070 by web explorer
+ *
+ *  inspect http://localhost:50070 by web explorer
  */
 …
       String line = Text.decode( ((ImmutableBytesWritable) cols.get(textcol) )
           .get() );
       //let us know what is "line"
       /*
 …
       // the result is the contents of merged files "
+      //StringTokenizer will divide a line into a word
       StringTokenizer itr = new StringTokenizer(line);
       // set every word as one
       while (itr.hasMoreTokens()) {
+        word.set(itr.nextToken());
+        // nextToken will return this value in String and point to next \
+        // Text.set() = Set to contain the contents of a string.
+        word.set(itr.nextToken());
+        // OutputCollector.collect = collect(K key, V value) \
+        //  Adds a key/value pair to the output.
         output.collect(word, one);
+      }
 …
     // reuse objects
     private final static IntWritable SumValue = new IntWritable();
+    // this sample's reduce() format is the same as map() \
+    //  reduce is a method waiting for implement \
+    //  four type in this sample is (Text , Iterator<IntWritable>, \
+    //    OutputCollector<Text, IntWritable> , Reporter ) ;
     public void reduce(Text key, Iterator<IntWritable> values,
         OutputCollector<Text, IntWritable> output, Reporter reporter)
         throws IOException {
       // sum up values
+      // sum up value
       int sum = 0;
+      while (values.hasNext()) {
+        sum += values.next().get();
+      // "key" is word , "value" is sum
+      // why values.hasNext(), not key.hasNext()
+      while (values.hasNext()) {
+        // next() will return this value and pointer to next event \
+        //  IntWritable.get() will transfer IntWritable to Int
+        sum += values.next().get();
+      }
+      // IntWritable.set(int) will transfer Int to IntWritable
       SumValue.set(sum);
+      // hense we set outputPath in main, the output.collect will put
+      //  data in Hadoop
       output.collect(key, SumValue);
+    }

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: