| 1 | {{{ |
| 2 | #!java |
| 3 | package tsmc; |
| 4 | |
| 5 | // TSMC serial program number 2 |
| 6 | // 0. after TSMC1LoadFile |
| 7 | // 1. build several .txt files to /tmp/income/ |
| 8 | // txt format is as: |
| 9 | // <any>:store:product:<other> |
| 10 | // xxx:T01:P4:000 |
| 11 | // ooo:T02:P1:bbs |
| 12 | // oo:T03:P1:0sf0 |
| 13 | // 2. put /tmp/tsmc/income to hdfs: /user/xxxx/income |
| 14 | // 3. run it |
| 15 | // |
| 16 | |
| 17 | import java.io.IOException; |
| 18 | |
| 19 | import org.apache.hadoop.conf.Configuration; |
| 20 | import org.apache.hadoop.fs.Path; |
| 21 | import org.apache.hadoop.hbase.client.Put; |
| 22 | import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; |
| 23 | import org.apache.hadoop.hbase.mapreduce.TableReducer; |
| 24 | import org.apache.hadoop.hbase.util.Bytes; |
| 25 | import org.apache.hadoop.io.IntWritable; |
| 26 | import org.apache.hadoop.io.LongWritable; |
| 27 | import org.apache.hadoop.io.Text; |
| 28 | import org.apache.hadoop.mapreduce.Job; |
| 29 | import org.apache.hadoop.mapreduce.Mapper; |
| 30 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; |
| 31 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; |
| 32 | |
| 33 | public class TSMC2Count { |
| 34 | public static class HtMap extends |
| 35 | Mapper<LongWritable, Text, Text, IntWritable> { |
| 36 | private IntWritable one = new IntWritable(1); |
| 37 | public void map(LongWritable key, Text value, Context context) |
| 38 | throws IOException, InterruptedException { |
| 39 | String s[] = value.toString().trim().split(":"); |
| 40 | // xxx:T01:P4:oooo => T01@P4 |
| 41 | String str = s[1] + "@" + s[2]; |
| 42 | context.write(new Text(str), one); |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | public static class HtReduce extends |
| 47 | TableReducer<Text, IntWritable, LongWritable> { |
| 48 | public void reduce(Text key, Iterable<IntWritable> values, |
| 49 | Context context) throws IOException, InterruptedException { |
| 50 | int sum = 0; |
| 51 | for (IntWritable i : values) { |
| 52 | sum += i.get(); |
| 53 | } |
| 54 | // T01@P4 => ( row = T01 , Column= Turnover:P4 ) |
| 55 | String[] str = (key.toString()).split("@"); |
| 56 | byte[] row = (str[0]).getBytes(); |
| 57 | byte[] family = Bytes.toBytes("Turnover"); |
| 58 | byte[] qualifier = (str[1]).getBytes(); |
| 59 | byte[] summary = Bytes.toBytes(String.valueOf(sum)); |
| 60 | Put put = new Put(row); |
| 61 | put.add(family, qualifier, summary ); |
| 62 | context.write(new LongWritable(), put); |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | public static void main(String args[]) throws Exception { |
| 67 | // debug |
| 68 | |
| 69 | String input = "income"; |
| 70 | |
| 71 | String tablename = "tsmc"; |
| 72 | |
| 73 | Configuration conf = new Configuration(); |
| 74 | |
| 75 | conf.set(TableOutputFormat.OUTPUT_TABLE, tablename); |
| 76 | |
| 77 | Job job = new Job(conf, "Count to tsmc"); |
| 78 | |
| 79 | job.setJarByClass(TSMC2Count.class); |
| 80 | |
| 81 | job.setMapperClass(HtMap.class); |
| 82 | job.setReducerClass(HtReduce.class); |
| 83 | |
| 84 | job.setMapOutputKeyClass(Text.class); |
| 85 | job.setMapOutputValueClass(IntWritable.class); |
| 86 | |
| 87 | job.setInputFormatClass(TextInputFormat.class); |
| 88 | |
| 89 | job.setOutputFormatClass(TableOutputFormat.class); |
| 90 | |
| 91 | FileInputFormat.addInputPath(job, new Path(input)); |
| 92 | |
| 93 | System.exit(job.waitForCompletion(true) ? 0 : 1); |
| 94 | } |
| 95 | } |
| 96 | }}} |