| 1 | {{{ |
| 2 | #!html |
| 3 | <div style="text-align: center;"><big |
| 4 | style="font-weight: bold;"><big><big>實作二: HDFS Shell操作練習</big></big></big></div> |
| 5 | }}} |
| 6 | [[PageOutline]] |
| 7 | |
| 8 | == 前言 == |
| 9 | |
| 10 | * 此部份接續實做一 |
| 11 | |
| 12 | = Content 1. HDFS Shell基本操作 = |
| 13 | == 1.1 瀏覽你HDFS目錄 == |
| 14 | |
| 15 | {{{ |
| 16 | /opt/hadoop$ bin/hadoop fs -ls |
| 17 | }}} |
| 18 | |
| 19 | == 1.2 上傳資料到HDFS目錄 == |
| 20 | * 上傳 |
| 21 | |
| 22 | {{{ |
| 23 | /opt/hadoop$ bin/hadoop fs -put conf input |
| 24 | }}} |
| 25 | |
| 26 | * 檢查 |
| 27 | |
| 28 | {{{ |
| 29 | /opt/hadoop$ bin/hadoop fs -ls |
| 30 | /opt/hadoop$ bin/hadoop fs -ls input |
| 31 | }}} |
| 32 | |
| 33 | == 1.3 下載HDFS的資料到本地目錄 == |
| 34 | |
| 35 | * 下載 |
| 36 | |
| 37 | {{{ |
| 38 | /opt/hadoop$ bin/hadoop fs -get input fromHDFS |
| 39 | }}} |
| 40 | |
| 41 | * 檢查 |
| 42 | |
| 43 | {{{ |
| 44 | /opt/hadoop$ ls -al | grep fromHDFS |
| 45 | /opt/hadoop$ ls -al fromHDFS |
| 46 | }}} |
| 47 | |
| 48 | == 1.4 刪除檔案 == |
| 49 | |
| 50 | {{{ |
| 51 | /opt/hadoop$ bin/hadoop fs -ls input |
| 52 | /opt/hadoop$ bin/hadoop fs -rm input/masters |
| 53 | }}} |
| 54 | |
| 55 | == 1.5 直接看檔案 == |
| 56 | |
| 57 | {{{ |
| 58 | /opt/hadoop$ bin/hadoop fs -ls input |
| 59 | /opt/hadoop$ bin/hadoop fs -cat input/slaves |
| 60 | }}} |
| 61 | |
| 62 | == 1.6 更多指令操作 == |
| 63 | |
| 64 | {{{ |
| 65 | hadooper@vPro:/opt/hadoop$ bin/hadoop fs |
| 66 | |
| 67 | Usage: java FsShell |
| 68 | [-ls <path>] |
| 69 | [-lsr <path>] |
| 70 | [-du <path>] |
| 71 | [-dus <path>] |
| 72 | [-count[-q] <path>] |
| 73 | [-mv <src> <dst>] |
| 74 | [-cp <src> <dst>] |
| 75 | [-rm <path>] |
| 76 | [-rmr <path>] |
| 77 | [-expunge] |
| 78 | [-put <localsrc> ... <dst>] |
| 79 | [-copyFromLocal <localsrc> ... <dst>] |
| 80 | [-moveFromLocal <localsrc> ... <dst>] |
| 81 | [-get [-ignoreCrc] [-crc] <src> <localdst>] |
| 82 | [-getmerge <src> <localdst> [addnl]] |
| 83 | [-cat <src>] |
| 84 | [-text <src>] |
| 85 | [-copyToLocal [-ignoreCrc] [-crc] <src> <localdst>] |
| 86 | [-moveToLocal [-crc] <src> <localdst>] |
| 87 | [-mkdir <path>] |
| 88 | [-setrep [-R] [-w] <rep> <path/file>] |
| 89 | [-touchz <path>] |
| 90 | [-test -[ezd] <path>] |
| 91 | [-stat [format] <path>] |
| 92 | [-tail [-f] <file>] |
| 93 | [-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...] |
| 94 | [-chown [-R] [OWNER][:[GROUP]] PATH...] |
| 95 | [-chgrp [-R] GROUP PATH...] |
| 96 | [-help [cmd]] |
| 97 | |
| 98 | Generic options supported are |
| 99 | -conf <configuration file> specify an application configuration file |
| 100 | -D <property=value> use value for given property |
| 101 | -fs <local|namenode:port> specify a namenode |
| 102 | -jt <local|jobtracker:port> specify a job tracker |
| 103 | -files <comma separated list of files> specify comma separated files to be copied to the map reduce cluster |
| 104 | -libjars <comma separated list of jars> specify comma separated jar files to include in the classpath. |
| 105 | -archives <comma separated list of archives> specify comma separated archives to be unarchived on the compute machines. |
| 106 | The general command line syntax is |
| 107 | bin/hadoop command [genericOptions] [commandOptions] |
| 108 | |
| 109 | }}} |
| 110 | |
| 111 | |
| 112 | |
| 113 | = Content 2. 使用網頁Gui瀏覽資訊 = |
| 114 | |
| 115 | * [http://localhost:50030 Map/Reduce Administration] |
| 116 | * [http://localhost:50070 NameNode ] |
| 117 | |
| 118 | = Content 3. 更多HDFS shell 的用法 = |
| 119 | |
| 120 | * bin/hadoop fs <args> ,下面則列出 <args> 的用法 |
| 121 | * 以下操作預設的目錄在 /user/<$username>/ 下 |
| 122 | {{{ |
| 123 | $ bin/hadoop fs -ls input |
| 124 | Found 4 items |
| 125 | -rw-r--r-- 2 hadooper supergroup 115045564 2009-04-02 11:51 /user/hadooper/input/1.txt |
| 126 | -rw-r--r-- 2 hadooper supergroup 987864 2009-04-02 11:51 /user/hadooper/input/2.txt |
| 127 | -rw-r--r-- 2 hadooper supergroup 1573048 2009-04-02 11:51 /user/hadooper/input/3.txt |
| 128 | -rw-r--r-- 2 hadooper supergroup 25844527 2009-04-02 11:51 /user/hadooper/input/4.txt |
| 129 | }}} |
| 130 | * 完整的路徑則是 '''hdfs://node:port/path''' 如: |
| 131 | {{{ |
| 132 | $ bin/hadoop fs -ls hdfs://gm1.nchc.org.tw:9000/user/hadooper/input |
| 133 | Found 4 items |
| 134 | -rw-r--r-- 2 hadooper supergroup 115045564 2009-04-02 11:51 /user/hadooper/input/1.txt |
| 135 | -rw-r--r-- 2 hadooper supergroup 987864 2009-04-02 11:51 /user/hadooper/input/2.txt |
| 136 | -rw-r--r-- 2 hadooper supergroup 1573048 2009-04-02 11:51 /user/hadooper/input/3.txt |
| 137 | -rw-r--r-- 2 hadooper supergroup 25844527 2009-04-02 11:51 /user/hadooper/input/4.txt |
| 138 | }}} |
| 139 | |
| 140 | == -cat == |
| 141 | * 將路徑指定文件的內容輸出到stdout |
| 142 | {{{ |
| 143 | $ bin/hadoop fs -cat quota/hadoop-env.sh |
| 144 | }}} |
| 145 | == -chgrp == |
| 146 | * 改變文件所屬的組 |
| 147 | {{{ |
| 148 | $ bin/hadoop fs -chgrp -R hadooper own |
| 149 | }}} |
| 150 | == -chmod == |
| 151 | * 改變文件的權限 |
| 152 | {{{ |
| 153 | $ bin/hadoop fs -chmod -R 755 own |
| 154 | }}} |
| 155 | == -chown == |
| 156 | * 改變文件的擁有者 |
| 157 | {{{ |
| 158 | $ bin/hadoop fs -chown -R hadooper own |
| 159 | }}} |
| 160 | == -copyFromLocal, -put == |
| 161 | * 從local放檔案到hdfs |
| 162 | {{{ |
| 163 | $ bin/hadoop fs -put input dfs_input |
| 164 | }}} |
| 165 | == -copyToLocal, -get == |
| 166 | * 把hdfs上得檔案下載到 local |
| 167 | {{{ |
| 168 | $ bin/hadoop fs -get dfs_input input1 |
| 169 | }}} |
| 170 | == -cp == |
| 171 | * 將文件從hdfs原本路徑複製到hdfs目標路徑 |
| 172 | {{{ |
| 173 | $ bin/hadoop fs -cp own hadooper |
| 174 | }}} |
| 175 | == -du == |
| 176 | * 顯示目錄中所有文件的大小 |
| 177 | {{{ |
| 178 | $ bin/hadoop fs -du input |
| 179 | |
| 180 | Found 4 items |
| 181 | 115045564 hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/1.txt |
| 182 | 987864 hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/2.txt |
| 183 | 1573048 hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/3.txt |
| 184 | 25844527 hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/4.txt |
| 185 | }}} |
| 186 | == -dus == |
| 187 | * 顯示該目錄/文件的總大小 |
| 188 | {{{ |
| 189 | $ bin/hadoop fs -dus input |
| 190 | |
| 191 | hdfs://gm1.nchc.org.tw:9000/user/hadooper/input 143451003 |
| 192 | }}} |
| 193 | == -expunge == |
| 194 | * 清空垃圾桶 |
| 195 | {{{ |
| 196 | $ bin/hadoop fs -expunge |
| 197 | }}} |
| 198 | == -getmerge == |
| 199 | * 將來源目錄<src>下所有的文件都集合到本地端一個<localdst>檔案內 |
| 200 | * bin/hadoop fs -getmerge <src> <localdst> |
| 201 | {{{ |
| 202 | $ echo "this is one; " >> in1/input |
| 203 | $ echo "this is two; " >> in1/input2 |
| 204 | $ bin/hadoop fs -put in1 in1 |
| 205 | $ bin/hadoop fs -getmerge in1 merge.txt |
| 206 | $ cat ./merge.txt |
| 207 | }}} |
| 208 | |
| 209 | == -ls == |
| 210 | * 列出文件或目錄的資訊 |
| 211 | * 文件名 <副本數> 文件大小 修改日期 修改時間 權限 用戶ID 組ID |
| 212 | * 目錄名 <dir> 修改日期 修改時間 權限 用戶ID 組ID |
| 213 | {{{ |
| 214 | $ bin/hadoop fs -ls |
| 215 | }}} |
| 216 | == -lsr == |
| 217 | * ls命令的遞迴版本 |
| 218 | {{{ |
| 219 | $ bin/hadoop fs -lsr / |
| 220 | }}} |
| 221 | == -mkdir == |
| 222 | * 建立資料夾 |
| 223 | {{{ |
| 224 | $ bin/hadoop fs -mkdir a b c |
| 225 | }}} |
| 226 | == -moveFromLocal == |
| 227 | * 將local端的資料夾剪下移動到hdfs上 |
| 228 | {{{ |
| 229 | $ bin/hadoop fs -moveFromLocal in1 in2 |
| 230 | }}} |
| 231 | == -mv == |
| 232 | * 更改資料的名稱 |
| 233 | {{{ |
| 234 | $ bin/hadoop fs -mv in2 in3 |
| 235 | }}} |
| 236 | == -rm == |
| 237 | * 刪除指定的檔案(不可資料夾) |
| 238 | {{{ |
| 239 | $ bin/hadoop fs -rm in1/input |
| 240 | }}} |
| 241 | == -rmr == |
| 242 | * 遞迴刪除資料夾(包含在內的所有檔案) |
| 243 | {{{ |
| 244 | $ bin/hadoop fs -rmr in1 |
| 245 | }}} |
| 246 | == -setrep == |
| 247 | * 設定副本係數 |
| 248 | * bin/hadoop fs -setrep [-R] [-w] <rep> <path/file> |
| 249 | {{{ |
| 250 | $ bin/hadoop fs -setrep -w 2 -R input |
| 251 | Replication 2 set: hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/1.txt |
| 252 | Replication 2 set: hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/2.txt |
| 253 | Replication 2 set: hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/3.txt |
| 254 | Replication 2 set: hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/4.txt |
| 255 | Waiting for hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/1.txt ... done |
| 256 | Waiting for hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/2.txt ... done |
| 257 | Waiting for hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/3.txt ... done |
| 258 | Waiting for hdfs://gm1.nchc.org.tw:9000/user/hadooper/input/4.txt ... done |
| 259 | }}} |
| 260 | == -stat == |
| 261 | * 印出時間資訊 |
| 262 | {{{ |
| 263 | $ bin/hadoop fs -stat input |
| 264 | 2009-04-02 03:51:29 |
| 265 | }}} |
| 266 | == -tail == |
| 267 | * 將文件的最後1k內容輸出 |
| 268 | * 用法 : bin/hadoop fs -tail [-f] 檔案 (-f 參數用來顯示如果檔案增大,則秀出被append上得內容) |
| 269 | {{{ |
| 270 | $ bin/hadoop fs -tail input/1.txt |
| 271 | }}} |
| 272 | == -test == |
| 273 | * 測試檔案, -e 檢查文件是否存在(1=存在, 0=否), -z 檢查文件是否為空(1=空, 0=不為空), -d 檢查是否為目錄(1=存在, 0=否) |
| 274 | * 要用echo $? 來看回傳值為 0 or 1 |
| 275 | * 用法: bin/hadoop fs -test -[ezd] URI |
| 276 | |
| 277 | {{{ |
| 278 | $ bin/hadoop fs -test -e /user/hadooper/input/5.txt |
| 279 | $ bin/hadoop fs -test -z /user/hadooper/input/5.txt |
| 280 | test: File does not exist: /user/hadooper/input/5.txt |
| 281 | $ bin/hadoop fs -test -d /user/hadooper/input/5.txt |
| 282 | |
| 283 | test: File does not exist: /user/hadooper/input/5.txt |
| 284 | }}} |
| 285 | == -text == |
| 286 | * 將檔案(如壓縮檔, textrecordinputstream)輸出為純文字格式 |
| 287 | * hadoop fs -text <src> |
| 288 | {{{ |
| 289 | $ hadoop fs -text macadr-eth1.txt.gz |
| 290 | 00:1b:fc:61:75:b1 |
| 291 | 00:1b:fc:58:9c:23 |
| 292 | }}} |
| 293 | * ps : 目前沒支援zip的函式庫 |
| 294 | {{{ |
| 295 | $ bin/hadoop fs -text b/a.txt.zip |
| 296 | PK |
| 297 | ���:��H{ |
| 298 | a.txtUT b��Ib��IUx��sssss |
| 299 | test |
| 300 | PK |
| 301 | ���:��H{ |
| 302 | ��a.txtUTb��IUxPK@C |
| 303 | }}} |
| 304 | == -touchz == |
| 305 | * 建立一個空文件 |
| 306 | {{{ |
| 307 | $ bin/hadoop fs -touchz b/kk |
| 308 | $ bin/hadoop fs -test -z b/kk |
| 309 | $ echo $? |
| 310 | 1 |
| 311 | $ bin/hadoop fs -test -z b/a.txt.zip |
| 312 | $ echo $? |
| 313 | 0 |
| 314 | }}} |