14 | | * HelloHadoopV2 |
| 15 | {{{ |
| 16 | #!text |
| 17 | HelloHadoopV2 |
| 18 | 說明: |
| 19 | 此程式碼比HelloHadoop 增加 |
| 20 | * 檢查輸出資料夾是否存在並刪除 |
| 21 | * input 資料夾內的資料若大於兩個,則資料不會被覆蓋 |
| 22 | * map 與 reduce 拆開以利程式再利用 |
| 23 | |
| 24 | 測試方法: |
| 25 | 將此程式運作在hadoop 0.20 平台上,執行: |
| 26 | --------------------------- |
| 27 | hadoop jar HelloHadoopV2.jar |
| 28 | --------------------------- |
| 29 | |
| 30 | 注意: |
| 31 | 1. 在hdfs 上來源檔案的路徑為 "/user/$YOUR_NAME/input" |
| 32 | 請注意必須先放資料到此hdfs上的資料夾內,且此資料夾內只能放檔案,不可再放資料夾 |
| 33 | 2. 運算完後,程式將執行結果放在hdfs 的輸出路徑為 "/user/$YOUR_NAME/output-hh2" |
| 34 | }}} |
| 35 | |
| 36 | |
| 37 | * 請注意以下有三個java檔案,並先compile !HelloMapperV2, !HelloReducerV2, 最後再編譯 !HelloHadoopV2.java |
| 38 | |
| 39 | |
| 40 | = !HelloMapperV2.java = |
| 41 | |
| 42 | {{{ |
| 43 | #!java |
| 44 | package org.nchc.hadoop; |
| 45 | import java.io.IOException; |
| 46 | |
| 47 | import org.apache.hadoop.io.LongWritable; |
| 48 | import org.apache.hadoop.io.Text; |
| 49 | import org.apache.hadoop.mapreduce.Mapper; |
| 50 | |
| 51 | public class HelloMapperV2 extends Mapper<LongWritable, Text, Text, Text> { |
| 52 | |
| 53 | public void map(LongWritable key, Text value, Context context) |
| 54 | throws IOException, InterruptedException { |
| 55 | context.write(new Text(key.toString()), value); |
| 56 | } |
| 57 | |
| 58 | } |
| 59 | |
| 60 | }}} |
| 61 | |
| 62 | = !HelloReducerV2.java = |
| 63 | |
| 64 | {{{ |
| 65 | #!java |
| 66 | package org.nchc.hadoop; |
| 67 | import java.io.IOException; |
| 68 | |
| 69 | import org.apache.hadoop.io.Text; |
| 70 | import org.apache.hadoop.mapreduce.Reducer; |
| 71 | |
| 72 | public class HelloReducerV2 extends Reducer<Text, Text, Text, Text> { |
| 73 | public void reduce(Text key, Iterable<Text> values, Context context) |
| 74 | throws IOException, InterruptedException { |
| 75 | |
| 76 | String str = new String(""); |
| 77 | Text final_key = new Text(); |
| 78 | Text final_value = new Text(); |
| 79 | // 將key值相同的values,透過 && 符號分隔之 |
| 80 | for (Text tmp : values) { |
| 81 | str += tmp.toString() + " &&"; |
| 82 | } |
| 83 | |
| 84 | final_key.set(key); |
| 85 | final_value.set(str); |
| 86 | |
| 87 | context.write(final_key, final_value); |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | }}} |
| 92 | |
| 93 | = !HelloHadoopV2.java = |
| 94 | |
25 | | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; |
26 | | |
27 | | // HelloHadoopV2 |
28 | | // 說明: |
29 | | // 此程式碼比HelloHadoop 增加 |
30 | | // * 檢查輸出資料夾是否存在並刪除 |
31 | | // * input 資料夾內的資料若大於兩個,則資料不會被覆蓋 |
32 | | // * map 與 reduce 拆開以利程式再利用 |
33 | | // |
34 | | // 測試方法: |
35 | | // 將此程式運作在hadoop 0.20 平台上,執行: |
36 | | // --------------------------- |
37 | | // hadoop jar HelloHadoopV2.jar |
38 | | // --------------------------- |
39 | | // |
40 | | // 注意: |
41 | | // 1. 在hdfs 上來源檔案的路徑為 "/user/$YOUR_NAME/input" |
42 | | // 請注意必須先放資料到此hdfs上的資料夾內,且此資料夾內只能放檔案,不可再放資料夾 |
43 | | // 2. 運算完後,程式將執行結果放在hdfs 的輸出路徑為 "/user/$YOUR_NAME/output-hh2" |
44 | | // |
| 105 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; |
87 | | |
88 | | * HelloMapperV2 |
89 | | |
90 | | {{{ |
91 | | #!java |
92 | | package org.nchc.hadoop; |
93 | | import java.io.IOException; |
94 | | |
95 | | import org.apache.hadoop.io.LongWritable; |
96 | | import org.apache.hadoop.io.Text; |
97 | | import org.apache.hadoop.mapreduce.Mapper; |
98 | | |
99 | | public class HelloMapperV2 extends Mapper<LongWritable, Text, Text, Text> { |
100 | | |
101 | | public void map(LongWritable key, Text value, Context context) |
102 | | throws IOException, InterruptedException { |
103 | | context.write(new Text(key.toString()), value); |
104 | | } |
105 | | |
106 | | } |
107 | | |
108 | | }}} |
109 | | |
110 | | * HelloReducerV2 |
111 | | |
112 | | {{{ |
113 | | #!java |
114 | | package org.nchc.hadoop; |
115 | | import java.io.IOException; |
116 | | |
117 | | import org.apache.hadoop.io.Text; |
118 | | import org.apache.hadoop.mapreduce.Reducer; |
119 | | |
120 | | public class HelloReducerV2 extends Reducer<Text, Text, Text, Text> { |
121 | | public void reduce(Text key, Iterable<Text> values, Context context) |
122 | | throws IOException, InterruptedException { |
123 | | |
124 | | String str = new String(""); |
125 | | Text final_key = new Text(); |
126 | | Text final_value = new Text(); |
127 | | // 將key值相同的values,透過 && 符號分隔之 |
128 | | for (Text tmp : values) { |
129 | | str += tmp.toString() + " &&"; |
130 | | } |
131 | | |
132 | | final_key.set(key); |
133 | | final_value.set(str); |
134 | | |
135 | | context.write(final_key, final_value); |
136 | | } |
137 | | } |
138 | | |
139 | | }}} |