package tsmc; // TSMC serial program number 2 // 0. after TSMC1LoadFile // 1. build several .txt files to /tmp/income/ // txt format is as: // <any>:store:product:<other> // xxx:T01:P4:000 // ooo:T02:P1:bbs // oo:T03:P1:0sf0 // 2. put /tmp/tsmc/income to hdfs: /user/xxxx/income // 3. run it // import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; public class TSMC2Count { public static class HtMap extends Mapper<LongWritable, Text, Text, IntWritable> { private IntWritable one = new IntWritable(1); public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String s[] = value.toString().trim().split(":"); // xxx:T01:P4:oooo => T01@P4 String str = s[1] + "@" + s[2]; context.write(new Text(str), one); } } public static class HtReduce extends TableReducer<Text, IntWritable, LongWritable> { public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable i : values) { sum += i.get(); } // T01@P4 => ( row = T01 , Column= Turnover:P4 ) String[] str = (key.toString()).split("@"); byte[] row = (str[0]).getBytes(); byte[] family = Bytes.toBytes("Turnover"); byte[] qualifier = (str[1]).getBytes(); byte[] summary = Bytes.toBytes(String.valueOf(sum)); Put put = new Put(row); put.add(family, qualifier, summary ); context.write(new LongWritable(), put); } } public static void main(String args[]) throws Exception { // debug String input = "income"; String tablename = "tsmc"; Configuration conf = new Configuration(); conf.set(TableOutputFormat.OUTPUT_TABLE, tablename); Job job = new Job(conf, "Count to tsmc"); job.setJarByClass(TSMC2Count.class); job.setMapperClass(HtMap.class); job.setReducerClass(HtReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TableOutputFormat.class); FileInputFormat.addInputPath(job, new Path(input)); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
Last modified 15 years ago
Last modified on Feb 3, 2010, 9:56:35 PM