- Timestamp:
- Jun 13, 2008, 5:45:02 PM (17 years ago)
- Location:
- sample
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
sample/HBaseRecord.java
r8 r9 1 /* 2 * NCHC Hbase with map reduce sample code 3 * DemoHBaseSlink.java 1 /** 2 * Program: HBaseRecord.java 3 * Editor: Waue Chen 4 * From : NCHC. Taiwn 5 * Last Update Date: 06/01/2008 4 6 */ 5 7 6 package tw.org.nchc.code;7 8 import java.io.IOException;9 import java.util.Iterator;10 11 import org.apache.hadoop.fs.Path;12 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;13 import org.apache.hadoop.hbase.mapred.TableReduce;14 import org.apache.hadoop.io.LongWritable;15 import org.apache.hadoop.io.MapWritable;16 import org.apache.hadoop.io.Text;17 import org.apache.hadoop.mapred.JobClient;18 import org.apache.hadoop.mapred.JobConf;19 import org.apache.hadoop.mapred.OutputCollector;20 import org.apache.hadoop.mapred.Reporter;21 import org.apache.hadoop.mapred.lib.IdentityMapper;22 import org.apache.hadoop.mapred.lib.IdentityReducer;23 24 8 /** 25 * This sample code will put the indicate data to Hbase. 26 * 1. put test.txt in t1 directory which content is 27 --------------- 28 name:locate:years 29 waue:taiwan:1981 30 shellon:taiwan:1981 31 --------------- 32 * 2. hadoop_root/$ bin/hadoop dfs -put t1 t1 33 * 3. hbase_root/$ bin/hbase shell 34 * 4. hql > create table t1_table("person"); 35 * 5. Come to Eclipse and run this code, and we will let database as that 36 t1_table -> person 37 ---------------- 38 | name | locate | years | 39 ---------------- 40 | waue | taiwan | 1981 | 41 ---------------- 42 | shellon | taiwan | 1981 | 43 * 6. Go to hbase console, type : hql > select * from t1_table; 44 9 * Purpose : 10 * Parse your record and then store in HBase. 11 * 12 * HowToUse : 13 * Make sure Hadoop file system and Hbase are running correctly. 14 * 1. put test.txt in t1 directory which content is 15 --------------- 16 name:locate:years 17 waue:taiwan:1981 18 shellon:taiwan:1981 19 --------------- 20 * 2. hadoop_root/$ bin/hadoop dfs -put t1 t1 21 * 3. hbase_root/$ bin/hbase shell 22 * 4. hql > create table t1_table("person"); 23 * 5. Come to Eclipse and run this code, and we will let database as that 24 t1_table -> person 25 ---------------- 26 | name | locate | years | 27 | waue | taiwan | 1981 | 28 | shellon | taiwan | 1981 | 29 ---------------- 30 * Check Result: 31 * Go to hbase console, type : 32 * hql > select * from t1_table; 45 33 08/06/06 12:20:48 INFO hbase.HTable: Creating scanner over t1_table starting at key 46 34 +-------------------------+-------------------------+-------------------------+ … … 66 54 +-------------------------+-------------------------+-------------------------+ 67 55 3 row(s) in set. (0.04 sec) 68 **/ 56 */ 57 58 59 60 61 package tw.org.nchc.code; 62 63 import java.io.IOException; 64 import java.util.Iterator; 65 66 import org.apache.hadoop.fs.Path; 67 import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 68 import org.apache.hadoop.hbase.mapred.TableReduce; 69 import org.apache.hadoop.io.LongWritable; 70 import org.apache.hadoop.io.MapWritable; 71 import org.apache.hadoop.io.Text; 72 import org.apache.hadoop.mapred.JobClient; 73 import org.apache.hadoop.mapred.JobConf; 74 import org.apache.hadoop.mapred.OutputCollector; 75 import org.apache.hadoop.mapred.Reporter; 76 import org.apache.hadoop.mapred.lib.IdentityMapper; 77 import org.apache.hadoop.mapred.lib.IdentityReducer; 78 79 69 80 public class HBaseRecord { 70 81 71 82 /* Denify parameter */ 72 83 // one column family: person; three column qualifier: name,locate,years 73 static private String baseId1 = 84 static private String baseId1 ="person:name"; 74 85 static private String baseId2 ="person:locate"; 75 86 static private String baseId3 ="person:years"; … … 131 142 public static void main(String[] args) throws IOException { 132 143 // which path of input files in Hadoop file system 133 134 144 135 145 HBaseRecord setup = new HBaseRecord(); -
sample/WordCount.java
r8 r9 1 /* 2 * map reduce sample code 1 /** 2 * Program: WordCount.java 3 * Editor: Waue Chen 4 * From : NCHC. Taiwn 5 * Last Update Date: 06/13/2008 6 */ 7 8 /** 9 * Purpose : 10 * Store the result of WordCount.java from Hbase to Hadoop file system 11 * 12 * HowToUse : 13 * Make sure Hadoop file system is running correctly. 14 * Put text file on the directory "/local_src/input" 15 * You can use the instruction to upload "/local_src/input" to HDFS input dir 16 * $ bin/hadoop dfs -put /local_src/input input 17 * Then modify the $filepath parameter in construtor to be correct and run this code. 18 * 19 * 20 * Check Result: 21 * inspect http://localhost:50070 by web explorer 3 22 */ 4 23 package tw.org.nchc.code; … … 23 42 24 43 public class WordCount { 25 44 private String filepath; 45 private String outputPath; 46 47 public WordCount(){ 48 filepath = "/user/waue/input/"; 49 outputPath = "counts1"; 50 } 51 public WordCount(String path,String output){ 52 filepath = path; 53 outputPath = output; 54 } 26 55 // mapper: emits (token, 1) for every word occurrence 27 56 private static class MapClass extends MapReduceBase implements … … 69 98 */ 70 99 public static void main(String[] args) throws IOException { 71 String filename = "/user/waue/input/";72 String outputPath = "sample-counts";73 int mapTasks = 20;100 WordCount wc = new WordCount(); 101 102 int mapTasks = 1; 74 103 int reduceTasks = 1; 75 76 104 JobConf conf = new JobConf(WordCount.class); 77 105 conf.setJobName("wordcount"); … … 80 108 conf.setNumReduceTasks(reduceTasks); 81 109 82 conf.setInputPath(new Path( filename));110 conf.setInputPath(new Path(wc.filepath)); 83 111 conf.setOutputKeyClass(Text.class); 84 112 conf.setOutputValueClass(IntWritable.class); 85 conf.setOutputPath(new Path( outputPath));113 conf.setOutputPath(new Path(wc.outputPath)); 86 114 87 115 conf.setMapperClass(MapClass.class); … … 90 118 91 119 // Delete the output directory if it exists already 92 Path outputDir = new Path( outputPath);120 Path outputDir = new Path(wc.outputPath); 93 121 FileSystem.get(conf).delete(outputDir); 94 122 -
sample/WordCountFromHBase.java
r8 r9 3 3 * Editor: Waue Chen 4 4 * From : NCHC. Taiwn 5 * Last Update Date: 06/1 0/20085 * Last Update Date: 06/13/2008 6 6 */ 7 7 8 8 /** 9 9 * Purpose : 10 * Store the result of WordCountIntoHbase.java from Hbase toHadoop file system10 * Word counting from Hbase then store result in Hadoop file system 11 11 * 12 12 * HowToUse : 13 * Make sure Hadoop file system a nd HBase are running correctly.14 * Then run the program with BuildHTable.java after \15 * modifying these setup parameters.13 * Make sure Hadoop file system are running and HBase has correct data. 14 * Suggest to run WordCountIntoHBase first. 15 * finally, modify these setup parameters and run. 16 16 * 17 17 * Check Result: 18 * inspect http://localhost:60070 by web explorer 18 * 19 * inspect http://localhost:50070 by web explorer 19 20 */ 20 21 … … 80 81 String line = Text.decode( ((ImmutableBytesWritable) cols.get(textcol) ) 81 82 .get() ); 83 82 84 //let us know what is "line" 83 85 /* … … 90 92 // the result is the contents of merged files " 91 93 94 //StringTokenizer will divide a line into a word 92 95 StringTokenizer itr = new StringTokenizer(line); 93 96 // set every word as one 94 97 while (itr.hasMoreTokens()) { 95 word.set(itr.nextToken()); 98 // nextToken will return this value in String and point to next \ 99 // Text.set() = Set to contain the contents of a string. 100 word.set(itr.nextToken()); 101 // OutputCollector.collect = collect(K key, V value) \ 102 // Adds a key/value pair to the output. 96 103 output.collect(word, one); 97 104 } … … 105 112 // reuse objects 106 113 private final static IntWritable SumValue = new IntWritable(); 107 114 115 // this sample's reduce() format is the same as map() \ 116 // reduce is a method waiting for implement \ 117 // four type in this sample is (Text , Iterator<IntWritable>, \ 118 // OutputCollector<Text, IntWritable> , Reporter ) ; 108 119 public void reduce(Text key, Iterator<IntWritable> values, 109 120 OutputCollector<Text, IntWritable> output, Reporter reporter) 110 121 throws IOException { 111 // sum up value s122 // sum up value 112 123 int sum = 0; 113 while (values.hasNext()) { 114 sum += values.next().get(); 124 // "key" is word , "value" is sum 125 // why values.hasNext(), not key.hasNext() 126 while (values.hasNext()) { 127 // next() will return this value and pointer to next event \ 128 // IntWritable.get() will transfer IntWritable to Int 129 sum += values.next().get(); 115 130 } 131 // IntWritable.set(int) will transfer Int to IntWritable 116 132 SumValue.set(sum); 133 // hense we set outputPath in main, the output.collect will put 134 // data in Hadoop 117 135 output.collect(key, SumValue); 118 136 }
Note: See TracChangeset
for help on using the changeset viewer.