| 1 | = Hello Hadoop V3 = |
| 2 | |
| 3 | {{{ |
| 4 | #!java |
| 5 | import java.io.IOException; |
| 6 | |
| 7 | import org.apache.hadoop.conf.Configuration; |
| 8 | import org.apache.hadoop.fs.Path; |
| 9 | import org.apache.hadoop.io.Text; |
| 10 | import org.apache.hadoop.mapreduce.Job; |
| 11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; |
| 12 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; |
| 13 | import org.apache.hadoop.util.GenericOptionsParser; |
| 14 | |
| 15 | // HelloHadoopV3 |
| 16 | // 說明: |
| 17 | // 此程式碼再利用了 HelloHadoopV2 的 map , reduce 檔,並且 |
| 18 | // 自動將檔案上傳到hdfs上運算並自動取回結果,還有 |
| 19 | // 提示訊息 、參數輸入 與 印出運算時間 的功能 |
| 20 | // |
| 21 | // 測試方法: |
| 22 | // 將此程式運作在hadoop 0.20 平台上,執行: |
| 23 | // --------------------------- |
| 24 | // hadoop jar HelloHadoopV3.jar /home/$yourname/input /home/$yourname/output-hh3 |
| 25 | // --------------------------- |
| 26 | // |
| 27 | // 注意: |
| 28 | // 1. 第一個輸入的參數是在local 的 輸入資料夾,請確認此資料夾內有資料並無子目錄 |
| 29 | // 2. 第二個輸入的參數是在local 的 運算結果資料夾,由程式產生不用事先建立,若有請刪除之 |
| 30 | |
| 31 | |
| 32 | public class HelloHadoopV3 { |
| 33 | |
| 34 | public static void main(String[] args) throws IOException, |
| 35 | InterruptedException, ClassNotFoundException { |
| 36 | |
| 37 | // debug using |
| 38 | // String[] argv = {"/home/waue/input","/home/waue/output-hh3"}; |
| 39 | // args = argv; |
| 40 | |
| 41 | String hdfs_input = "HH3_input"; |
| 42 | String hdfs_output = "HH3_output"; |
| 43 | |
| 44 | Configuration conf = new Configuration(); |
| 45 | // 宣告取得參數 |
| 46 | String[] otherArgs = new GenericOptionsParser(conf, args) |
| 47 | .getRemainingArgs(); |
| 48 | // 如果參數數量不為2 則印出提示訊息 |
| 49 | if (otherArgs.length != 2) { |
| 50 | System.err |
| 51 | .println("Usage: hadoop jar HelloHadoopV3.jar <local_input> <local_output>"); |
| 52 | System.exit(2); |
| 53 | } |
| 54 | Job job = new Job(conf, "Hadoop Hello World"); |
| 55 | job.setJarByClass(HelloHadoopV3.class); |
| 56 | // set map and reduce class |
| 57 | job.setMapperClass(HelloMapperV2.class); |
| 58 | job.setCombinerClass(HelloReducerV2.class); |
| 59 | job.setReducerClass(HelloReducerV2.class); |
| 60 | |
| 61 | job.setMapOutputKeyClass(Text.class); |
| 62 | job.setMapOutputValueClass(Text.class); |
| 63 | |
| 64 | job.setOutputKeyClass(Text.class); |
| 65 | job.setOutputValueClass(Text.class); |
| 66 | |
| 67 | |
| 68 | // 用 checkAndDelete 函式防止overhead的錯誤 |
| 69 | CheckAndDelete.checkAndDelete(hdfs_input, conf); |
| 70 | CheckAndDelete.checkAndDelete(hdfs_output, conf); |
| 71 | |
| 72 | // 放檔案到hdfs |
| 73 | PutToHdfs.putToHdfs(args[0], hdfs_input, conf); |
| 74 | |
| 75 | // 設定hdfs 的輸入輸出來源路定 |
| 76 | FileInputFormat.addInputPath(job, new Path(hdfs_input)); |
| 77 | FileOutputFormat.setOutputPath(job, new Path(hdfs_output)); |
| 78 | |
| 79 | |
| 80 | long start = System.nanoTime(); |
| 81 | |
| 82 | job.waitForCompletion(true); |
| 83 | |
| 84 | // 把hdfs的結果取下 |
| 85 | GetFromHdfs.getFromHdfs(hdfs_output, args[1], conf); |
| 86 | |
| 87 | boolean status = job.waitForCompletion(true); |
| 88 | // 計算時間 |
| 89 | if (status) { |
| 90 | System.err.println("Integrate Alert Job Finished !"); |
| 91 | long time = System.nanoTime() - start; |
| 92 | System.err.println(time * (1E-9) + " secs."); |
| 93 | |
| 94 | } else { |
| 95 | System.err.println("Integrate Alert Job Failed !"); |
| 96 | System.exit(1); |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | }}} |