| 7 | = 簡介 = |
| 8 | * Hive 是由facebook所捐贈給hadoop的項目 |
| 9 | * 功能類似前一天測試的cloudbase |
| 10 | * 更簡便的一點是,Hive已經整合在Hadoop 0.19.1的版本內(但 0.20.0 卻又不見了),幾乎不用特別座設定,並且也有提供自己的交互查詢模式,就不用特別在安裝其他東西了 |
| 11 | * 感覺上操作起來有點像hbase !! |
| 12 | |
| 13 | = 安裝 = |
| 14 | |
| 15 | |
| 16 | |
| 17 | = 測試 = |
| 18 | [hadoop@gp1 bin]$ export HADOOP=/home/hadoop/hadoop-0.19.1 |
| 19 | [hadoop@gp1 bin]$ export HIVE_HOME=/home/hadoop/hadoop-0.19.1/contrib/hive/ |
| 20 | |
| 21 | [hadoop@gp1 bin]$ cd $HIVE_HOME |
| 22 | [hadoop@gp1 hive]$ bin/hive |
| 23 | |
| 24 | hive> CREATE TABLE pokes (foo INT, bar STRING); |
| 25 | OK |
| 26 | Time taken: 0.251 seconds |
| 27 | hive> CREATE TABLE invites (foo INT, bar STRING) PARTITIONED BY (ds STRING); |
| 28 | OK |
| 29 | Time taken: 0.106 seconds |
| 30 | hive> SHOW TABLES; |
| 31 | OK |
| 32 | invites pokes |
| 33 | Time taken: 0.107 seconds |
| 34 | hive> DESCRIBE invites; |
| 35 | OK |
| 36 | foo int |
| 37 | bar string |
| 38 | ds string |
| 39 | Time taken: 0.151 seconds |
| 40 | hive> ALTER TABLE pokes ADD COLUMNS (new_col INT); |
| 41 | OK |
| 42 | Time taken: 0.117 seconds |
| 43 | hive> ALTER TABLE invites ADD COLUMNS (new_col2 INT COMMENT 『a comment'); |
| 44 | OK |
| 45 | Time taken: 0.152 seconds |
| 46 | hive> LOAD DATA LOCAL INPATH 『./examples/files/kv1.txt' OVERWRITE INTO TABLE pokes; |
| 47 | Copying data from file:/home/hadoop/hadoop-0.19.1/contrib/hive/examples/files/kv1.txt |
| 48 | Loading data to table pokes |
| 49 | OK |
| 50 | Time taken: 0.288 seconds |
| 51 | hive> LOAD DATA LOCAL INPATH 『./examples/files/kv2.txt' OVERWRITE INTO TABLE invites PARTITION (ds=』2008-08-15′); |
| 52 | Copying data from file:/home/hadoop/hadoop-0.19.1/contrib/hive/examples/files/kv2.txt |
| 53 | Loading data to table invites partition {ds=2008-08-15} |
| 54 | OK |
| 55 | Time taken: 0.524 seconds |
| 56 | hive> LOAD DATA LOCAL INPATH 『./examples/files/kv3.txt' OVERWRITE INTO TABLE invites PARTITION (ds=』2008-08-08′); |
| 57 | Copying data from file:/home/hadoop/hadoop-0.19.1/contrib/hive/examples/files/kv3.txt |
| 58 | Loading data to table invites partition {ds=2008-08-08} |
| 59 | OK |
| 60 | Time taken: 0.406 seconds |
| 61 | |
| 62 | hive> INSERT OVERWRITE DIRECTORY 『/tmp/hdfs_out' SELECT a.* FROM invites a; |
| 63 | Total MapReduce jobs = 1 |
| 64 | Starting Job = job_200902261245_0002, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0002 |
| 65 | Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0002 |
| 66 | map = 0%, reduce =0% |
| 67 | map = 50%, reduce =0% |
| 68 | map = 100%, reduce =0% |
| 69 | Ended Job = job_200902261245_0002 |
| 70 | Moving data to: /tmp/hdfs_out |
| 71 | OK |
| 72 | Time taken: 18.551 seconds |
| 73 | |
| 74 | hive> select count(1) from pokes; |
| 75 | Total MapReduce jobs = 2 |
| 76 | Number of reducers = 1 |
| 77 | In order to change numer of reducers use: |
| 78 | set mapred.reduce.tasks = <number> |
| 79 | Starting Job = job_200902261245_0003, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0003 |
| 80 | Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0003 |
| 81 | map = 0%, reduce =0% |
| 82 | map = 50%, reduce =0% |
| 83 | map = 100%, reduce =0% |
| 84 | map = 100%, reduce =17% |
| 85 | map = 100%, reduce =100% |
| 86 | Ended Job = job_200902261245_0003 |
| 87 | Starting Job = job_200902261245_0004, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0004 |
| 88 | Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0004 |
| 89 | map = 0%, reduce =0% |
| 90 | map = 50%, reduce =0% |
| 91 | map = 100%, reduce =0% |
| 92 | map = 100%, reduce =100% |
| 93 | Ended Job = job_200902261245_0004 |
| 94 | OK |
| 95 | 500 |
| 96 | Time taken: 57.285 seconds |
| 97 | |
| 98 | hive> INSERT OVERWRITE DIRECTORY 『/tmp/hdfs_out' SELECT a.* FROM invites a; |
| 99 | Total MapReduce jobs = 1 |
| 100 | Starting Job = job_200902261245_0005, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0005 |
| 101 | Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0005 |
| 102 | map = 0%, reduce =0% |
| 103 | map = 50%, reduce =0% |
| 104 | map = 100%, reduce =0% |
| 105 | Ended Job = job_200902261245_0005 |
| 106 | Moving data to: /tmp/hdfs_out |
| 107 | OK |
| 108 | Time taken: 18.349 seconds |
| 109 | |
| 110 | hive> INSERT OVERWRITE DIRECTORY 『/tmp/reg_5′ SELECT COUNT(1) FROM invites a; |
| 111 | Total MapReduce jobs = 2 |
| 112 | Number of reducers = 1 |
| 113 | In order to change numer of reducers use: |
| 114 | set mapred.reduce.tasks = <number> |
| 115 | Starting Job = job_200902261245_0006, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0006 |
| 116 | Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0006 |
| 117 | map = 0%, reduce =0% |
| 118 | map = 50%, reduce =0% |
| 119 | map = 100%, reduce =0% |
| 120 | map = 100%, reduce =17% |
| 121 | map = 100%, reduce =100% |
| 122 | Ended Job = job_200902261245_0006 |
| 123 | Starting Job = job_200902261245_0007, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0007 |
| 124 | Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0007 |
| 125 | map = 0%, reduce =0% |
| 126 | map = 50%, reduce =0% |
| 127 | map = 100%, reduce =0% |
| 128 | map = 100%, reduce =17% |
| 129 | map = 100%, reduce =100% |
| 130 | Ended Job = job_200902261245_0007 |
| 131 | Moving data to: /tmp/reg_5 |
| 132 | OK |
| 133 | Time taken: 70.956 seconds |