1 | | {{{ |
2 | | #!html |
3 | | <div style="text-align: center;"><big |
4 | | style="font-weight: bold;"><big><big>實做五: Hadoop 程式編譯</big></big></big></div> |
5 | | }}} |
6 | | [[PageOutline]] |
7 | | |
8 | | |
9 | | == 前言:啟動Hadoop環境 == |
10 | | |
11 | | * 重新啟動昨天的環境 |
12 | | * 在 node1 上操作 |
13 | | {{{ |
14 | | $ cd /opt/hadoop |
15 | | $ bin/hadoop start-dfs.sh |
16 | | $ ssh node02 "bin/hadoop start-mapred.sh" |
17 | | }}} |
18 | | * 請檢查 hadoop 是否正確運作. |
19 | | |
20 | | == 練習 1 : Word Count 初級版 == |
21 | | |
22 | | * 上傳內容到hdfs內 |
23 | | |
24 | | {{{ |
25 | | $ cd /opt/hadoop |
26 | | $ bin/hadoop dfs -mkdir input |
27 | | $ echo "I like NCHC Cloud Course." > input1 |
28 | | $ echo "I like nchc Cloud Course, and we enjoy this course." > input2 |
29 | | $ bin/hadoop dfs -put input1 input |
30 | | $ bin/hadoop dfs -put input2 input |
31 | | $ bin/hadoop dfs -ls input |
32 | | }}} |
33 | | |
34 | | * 點此連結 [attachment:wiki:jazz/Hadoop_Lab6:WordCount.java?format=raw WordCount.java] 並將他存到 /opt/hadoop; |
35 | | |
36 | | |
37 | | * 運作程式 |
38 | | |
39 | | {{{ |
40 | | $ mkdir MyJava |
41 | | $ javac -classpath hadoop-*-core.jar -d MyJava WordCount.java |
42 | | $ jar -cvf wordcount.jar -C MyJava . |
43 | | $ bin/hadoop jar wordcount.jar WordCount input/ output/ |
44 | | $ bin/hadoop dfs -cat output/part-00000 |
45 | | }}} |
46 | | |
47 | | ----- |
48 | | |
49 | | == 練習 2 : Word Count 進階版 == |
50 | | |
51 | | {{{ |
52 | | $ echo "\." >pattern.txt && echo "\," >>pattern.txt |
53 | | $ bin/hadoop dfs -put pattern.txt ./ |
54 | | $ mkdir MyJava2 |
55 | | }}} |
56 | | |
57 | | * 點此連結 [attachment:wiki:jazz/Hadoop_Lab6:WordCount2.java?format=raw WordCount2.java] 並將他存到 /opt/hadoop; |
58 | | |
59 | | {{{ |
60 | | $ javac -classpath hadoop-*-core.jar -d MyJava2 WordCount2.java |
61 | | $ jar -cvf wordcount2.jar -C MyJava2 . |
62 | | $ bin/hadoop jar wordcount2.jar WordCount2 input output2 -skip pattern.txt |
63 | | $ bin/hadoop dfs -cat output2/part-00000 |
64 | | $ bin/hadoop jar wordcount2.jar WordCount2 -Dwordcount.case.sensitive=false input output3 -skip pattern.txt |
65 | | $ bin/hadoop dfs -cat output3/part-00000 |
66 | | }}} |