| 1 | {{{ |
| 2 | #!text |
| 3 | |
| 4 | package org.nchc.hadoop; |
| 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.hadoop.conf.Configuration; |
| 8 | import org.apache.hadoop.fs.Path; |
| 9 | import org.apache.hadoop.io.Text; |
| 10 | import org.apache.hadoop.mapreduce.Job; |
| 11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; |
| 12 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; |
| 13 | import org.apache.hadoop.util.GenericOptionsParser; |
| 14 | |
| 15 | public class HelloHadoopV3 { |
| 16 | |
| 17 | public static void main(String[] args) throws IOException, |
| 18 | InterruptedException, ClassNotFoundException { |
| 19 | |
| 20 | // eclipse using |
| 21 | // String[] argv = {"/home/hadoop/input","/home/hadoop/output-hh3"}; |
| 22 | // args = argv; |
| 23 | |
| 24 | String hdfs_input = "HH3_input"; |
| 25 | String hdfs_output = "HH3_output"; |
| 26 | |
| 27 | Configuration conf = new Configuration(); |
| 28 | // 宣告取得參數 |
| 29 | String[] otherArgs = new GenericOptionsParser(conf, args) |
| 30 | .getRemainingArgs(); |
| 31 | // 如果參數數量不為2 則印出提示訊息 |
| 32 | if (otherArgs.length != 2) { |
| 33 | System.err |
| 34 | .println("Usage: hadoop jar HelloHadoopV3.jar <local_input> <local_output>"); |
| 35 | System.exit(2); |
| 36 | } |
| 37 | Job job = new Job(conf, "Hadoop Hello World"); |
| 38 | job.setJarByClass(HelloHadoopV3.class); |
| 39 | // set map and reduce class |
| 40 | job.setMapperClass(); // 輸入正確的mapper |
| 41 | job.setCombinerClass(); // 輸入正確的 class |
| 42 | job.setReducerClass(); // 輸入正確的 reducer |
| 43 | |
| 44 | job.setMapOutputKeyClass(Text.class); |
| 45 | job.setMapOutputValueClass(Text.class); |
| 46 | |
| 47 | job.setOutputKeyClass(Text.class); |
| 48 | job.setOutputValueClass(Text.class); |
| 49 | |
| 50 | |
| 51 | // 用 checkAndDelete 函式防止overhead的錯誤 |
| 52 | CheckAndDelete.checkAndDelete(hdfs_input, conf); |
| 53 | CheckAndDelete.checkAndDelete(hdfs_output, conf); |
| 54 | |
| 55 | // 放檔案到hdfs |
| 56 | PutToHdfs.putToHdfs(args[0], hdfs_input, conf); |
| 57 | |
| 58 | // 設定hdfs 的輸入輸出來源路定 |
| 59 | FileInputFormat.addInputPath(job, new Path(hdfs_input)); |
| 60 | FileOutputFormat.setOutputPath(job, new Path(hdfs_output)); |
| 61 | |
| 62 | |
| 63 | long start = System.nanoTime(); |
| 64 | |
| 65 | job.waitForCompletion(true); |
| 66 | |
| 67 | // 把hdfs的結果取下 |
| 68 | GetFromHdfs.getFromHdfs(hdfs_output, args[1], conf); |
| 69 | |
| 70 | boolean status = job.waitForCompletion(true); |
| 71 | // 計算時間 |
| 72 | if (status) { |
| 73 | System.err.println("Integrate Alert Job Finished !"); |
| 74 | long time = System.nanoTime() - start; |
| 75 | System.err.println(time * (1E-9) + " secs."); |
| 76 | |
| 77 | } else { |
| 78 | System.err.println("Integrate Alert Job Failed !"); |
| 79 | System.exit(1); |
| 80 | } |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | |
| 85 | }}} |