| | 1 | ◢ <[wiki:III140705/Lab20 實作二十]> | <[wiki:III140705 回課程大綱]> ▲ | <[wiki:III140705/Lab22 實作二十二] > ◣ |
| | 2 | |
| | 3 | = 實作二十一 Lab21 = |
| | 4 | |
| | 5 | {{{ |
| | 6 | #!html |
| | 7 | <p style="text-align: center;"><big style="font-weight: bold;"><big> 修改檔案輸入格式<br/> KeyValueTextInputFormat </big></big></p> |
| | 8 | }}} |
| | 9 | |
| | 10 | [[PageOutline]] |
| | 11 | |
| | 12 | {{{ |
| | 13 | #!text |
| | 14 | 請先連線至 nodeN.3du.me , N 為您的報名編號 |
| | 15 | }}} |
| | 16 | |
| | 17 | {{{ |
| | 18 | cd ~/hadoop_labs/lab012 |
| | 19 | ant |
| | 20 | mkdir -p kv_input |
| | 21 | printf "A\t1\n" > kv_input/input1 |
| | 22 | printf "B\t2\n" >> kv_input/input1 |
| | 23 | printf "C\t3\n" >> kv_input/input1 |
| | 24 | printf "A\t1\n" > kv_input/input2 |
| | 25 | printf "C\t2\n" >> kv_input/input2 |
| | 26 | printf "B\t1\n" >> kv_input/input2 |
| | 27 | hadoop fs -put kv_input kv_input |
| | 28 | hadoop jar WordCount.jar kv_input kv_output |
| | 29 | hadoop fs -ls kv_output |
| | 30 | hadoop fs -cat kv_output/part-* |
| | 31 | |
| | 32 | export HADOOP_CONF_DIR=~/hadoop/conf.local/ |
| | 33 | hadoop jar WordCount.jar kv_input kv_output |
| | 34 | ls -al kv_output |
| | 35 | cat kv_output/part-* |
| | 36 | unset HADOOP_CONF_DIR |
| | 37 | }}} |
| | 38 | |
| | 39 | * Reference: |
| | 40 | * http://hadoop.apache.org/docs/r1.0.4/api/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.html |
| | 41 | * http://hadoop.apache.org/docs/r1.0.4/api/org/apache/hadoop/mapreduce/lib/input/TextInputFormat.html |
| | 42 | {{{ |
| | 43 | #!text |
| | 44 | public class TextInputFormat |
| | 45 | extends FileInputFormat<LongWritable,Text> |
| | 46 | }}} |
| | 47 | * http://hadoop.apache.org/docs/r1.0.4/api/org/apache/hadoop/mapreduce/lib/input/KeyValueTextInputFormat.html |
| | 48 | {{{ |
| | 49 | #!text |
| | 50 | public class KeyValueTextInputFormat |
| | 51 | extends FileInputFormat<Text,Text> |
| | 52 | }}} |
| | 53 | * http://hadoop.apache.org/docs/r1.0.4/api/org/apache/hadoop/mapreduce/lib/input/NLineInputFormat.html |
| | 54 | {{{ |
| | 55 | #!text |
| | 56 | public class NLineInputFormat |
| | 57 | extends FileInputFormat<LongWritable,Text> |
| | 58 | |
| | 59 | NLineInputFormat which splits N lines of input as one split. |
| | 60 | }}} |
| | 61 | |
| | 62 | == 實作習題 == |
| | 63 | |
| | 64 | <問題 1> |