| 1 | {{{ |
| 2 | #!html |
| 3 | <div style="text-align: center; color:#151B8D"><big style="font-weight: bold;"><big><big> |
| 4 | Hadoop 進階課程 |
| 5 | </big></big></big></div> <div style="text-align: center; color:#7E2217"><big style="font-weight: bold;"><big> |
| 6 | 範例練習 |
| 7 | </big></big></div> |
| 8 | }}} |
| 9 | |
| 10 | [wiki:NCHCCloudCourse100928_4_EXM2 上一關 < ] 第三關 [wiki:NCHCCloudCourse100928_4_EXM4 > 下一關] |
| 11 | |
| 12 | {{{ |
| 13 | #!java |
| 14 | package org.nchc.hadoop; |
| 15 | import java.io.IOException; |
| 16 | |
| 17 | import org.apache.hadoop.conf.Configuration; |
| 18 | import org.apache.hadoop.fs.Path; |
| 19 | import org.apache.hadoop.io.Text; |
| 20 | import org.apache.hadoop.mapreduce.Job; |
| 21 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; |
| 22 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; |
| 23 | import org.apache.hadoop.util.GenericOptionsParser; |
| 24 | |
| 25 | // HelloHadoopV3 |
| 26 | // 說明: |
| 27 | // 此程式碼再利用了 HelloHadoopV2 的 map , reduce 檔,並且 |
| 28 | // 自動將檔案上傳到hdfs上運算並自動取回結果,還有 |
| 29 | // 提示訊息 、參數輸入 與 印出運算時間 的功能 |
| 30 | // |
| 31 | // 測試方法: |
| 32 | // 將此程式運作在hadoop 0.20 平台上,執行: |
| 33 | // --------------------------- |
| 34 | // hadoop jar HelloHadoopV3.jar /home/$yourname/input /home/$yourname/output-hh3 |
| 35 | // --------------------------- |
| 36 | // |
| 37 | // 注意: |
| 38 | // 1. 第一個輸入的參數是在local 的 輸入資料夾,請確認此資料夾內有資料並無子目錄 |
| 39 | // 2. 第二個輸入的參數是在local 的 運算結果資料夾,由程式產生不用事先建立,若有請刪除之 |
| 40 | |
| 41 | |
| 42 | public class HelloHadoopV3 { |
| 43 | |
| 44 | public static void main(String[] args) throws IOException, |
| 45 | InterruptedException, ClassNotFoundException { |
| 46 | |
| 47 | // debug using |
| 48 | // String[] argv = {"/home/hadooper/input","/home/hadooper/output-hh3"}; |
| 49 | // args = argv; |
| 50 | |
| 51 | String hdfs_input = "HH3_input"; |
| 52 | String hdfs_output = "HH3_output"; |
| 53 | |
| 54 | Configuration conf = new Configuration(); |
| 55 | // 宣告取得參數 |
| 56 | String[] otherArgs = new GenericOptionsParser(conf, args) |
| 57 | .getRemainingArgs(); |
| 58 | // 如果參數數量不為2 則印出提示訊息 |
| 59 | if (otherArgs.length != 2) { |
| 60 | System.err |
| 61 | .println("Usage: hadoop jar HelloHadoopV3.jar <local_input> <local_output>"); |
| 62 | System.exit(2); |
| 63 | } |
| 64 | Job job = new Job(conf, "Hadoop Hello World"); |
| 65 | job.setJarByClass(HelloHadoopV3.class); |
| 66 | // set map and reduce class |
| 67 | job.setMapperClass(HelloMapperV2.class); |
| 68 | job.setCombinerClass(HelloReducerV2.class); |
| 69 | job.setReducerClass(HelloReducerV2.class); |
| 70 | |
| 71 | job.setMapOutputKeyClass(Text.class); |
| 72 | job.setMapOutputValueClass(Text.class); |
| 73 | |
| 74 | job.setOutputKeyClass(Text.class); |
| 75 | job.setOutputValueClass(Text.class); |
| 76 | |
| 77 | |
| 78 | // 用 checkAndDelete 函式防止overhead的錯誤 |
| 79 | CheckAndDelete.checkAndDelete(hdfs_input, conf); |
| 80 | CheckAndDelete.checkAndDelete(hdfs_output, conf); |
| 81 | |
| 82 | // 放檔案到hdfs |
| 83 | PutToHdfs.putToHdfs(args[0], hdfs_input, conf); |
| 84 | |
| 85 | // 設定hdfs 的輸入輸出來源路定 |
| 86 | FileInputFormat.addInputPath(job, new Path(hdfs_input)); |
| 87 | FileOutputFormat.setOutputPath(job, new Path(hdfs_output)); |
| 88 | |
| 89 | |
| 90 | long start = System.nanoTime(); |
| 91 | |
| 92 | job.waitForCompletion(true); |
| 93 | |
| 94 | // 把hdfs的結果取下 |
| 95 | GetFromHdfs.getFromHdfs(hdfs_output, args[1], conf); |
| 96 | |
| 97 | boolean status = job.waitForCompletion(true); |
| 98 | // 計算時間 |
| 99 | if (status) { |
| 100 | System.err.println("Integrate Alert Job Finished !"); |
| 101 | long time = System.nanoTime() - start; |
| 102 | System.err.println(time * (1E-9) + " secs."); |
| 103 | |
| 104 | } else { |
| 105 | System.err.println("Integrate Alert Job Failed !"); |
| 106 | System.exit(1); |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | }}} |
| 112 | |
| 113 | |