Changes between Version 2 and Version 3 of waue/Hadoop_DRBL


Ignore:
Timestamp:
Feb 12, 2009, 5:04:21 PM (15 years ago)
Author:
waue
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • waue/Hadoop_DRBL

    v2 v3  
    2525}}}
    2626
     27
     28 * download Hadoop 0.18.3
     29{{{
     30$ cd /opt
     31$ wget http://ftp.twaren.net/Unix/Web/apache/hadoop/core/hadoop-0.18.3/hadoop-0.18.3.tar.gz
     32$ tar zxvf hadoop-0.18.3.tar.gz
     33hadoop:/opt# ln -sf hadoop-0.18.3 hadoop
     34}}}
     35 * setup JAVA_HOME environment variable
     36{{{
     37$ echo "export JAVA_HOME=/usr/lib/jvm/java-6-sun" >> ~/.bash_profile
     38$ source ~/.bash_profile
     39}}}
     40 * edit hadoop-0.18.3/conf/hadoop-env.sh
     41{{{
     42#!diff
     43--- hadoop-0.18.3/conf/hadoop-env.sh.org
     44+++ hadoop-0.18.3/conf/hadoop-env.sh
     45@@ -6,7 +6,9 @@
     46 # remote nodes.
     47
     48 # The java implementation to use.  Required.
     49-# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
     50+export JAVA_HOME=/usr/lib/jvm/java-6-sun
     51+export HADOOP_HOME=/opt/hadoop-0.18.3
     52+export HADOOP_CONF_DIR=$HADOOP_HOME/conf
     53
     54 # Extra Java CLASSPATH elements.  Optional.
     55 # export HADOOP_CLASSPATH=
     56}}}
     57
     58 * edit hadoop-0.18.3/conf/hadoop-site.xml
     59{{{
     60#!diff
     61--- hadoop-0.18.3/conf/hadoop-site.xml.org
     62+++ hadoop-0.18.3/conf/hadoop-site.xml
     63@@ -4,5 +4,31 @@
     64 <!-- Put site-specific property overrides in this file. -->
     65
     66 <configuration>
     67-
     68+  <property>
     69+    <name>fs.default.name</name>
     70+    <value>hdfs://192.168.1.254:9000/</value>
     71+    <description>
     72+      The name of the default file system. Either the literal string
     73+      "local" or a host:port for NDFS.
     74+    </description>
     75+  </property>
     76+  <property>
     77+    <name>mapred.job.tracker</name>
     78+    <value>hdfs://192.168.1.254:9001</value>
     79+    <description>
     80+      The host and port that the MapReduce job tracker runs at. If
     81+      "local", then jobs are run in-process as a single map and
     82+      reduce task.
     83+    </description>
     84+  </property>
     85 </configuration>
     86}}}
     87
     88 * here is current DRBL setup
     89{{{
     90******************************************************
     91          NIC    NIC IP                    Clients
     92+------------------------------+
     93|         DRBL SERVER          |
     94|                              |
     95|    +-- [eth0] X.X.X.X        +- to WAN
     96|                              |
     97|    +-- [eth1] 192.168.1.254 +- to clients group 1 [ 16 clients, their IP
     98|                              |             from 192.168.1.1 - 192.168.1.16]
     99+------------------------------+
     100******************************************************
     101Total clients: 16
     102******************************************************
     103}}}
     104 * Hadoop will use ssh connections for internal connection, thus we have to do SSH key exchange.
     105{{{
     106$ ssh-keygen
     107$ cp .ssh/id_rsa.pub .ssh/authorized_keys
     108$ sudo apt-get install dsh
     109$ mkdir -p .dsh
     110$ for ((i=1;i<=16;i++)); do echo "192.168.1.$i" >> .dsh/machines.list; done
     111}}}
     112
     113 * edit /etc/rc.local for DRBL Server as Hadoop namenode
     114{{{
     115#!diff
     116--- /etc/rc.local.org   2008-11-07 18:09:10.000000000 +0800
     117+++ /etc/rc.local       2008-11-07 17:58:14.000000000 +0800
     118@@ -11,4 +11,7 @@
     119 #
     120 # By default this script does nothing.
     121
     122+echo 3 > /proc/sys/vm/drop_caches
     123+/opt/hadoop-0.18.3/bin/hadoop namenode -format
     124+/opt/hadoop-0.18.3/bin/hadoop-daemon.sh start namenode
     125+/opt/hadoop-0.18.3/bin/hadoop-daemon.sh start jobtracker
     126+/opt/hadoop-0.18.3/bin/hadoop-daemon.sh start tasktracker
     127 exit 0
     128}}}
     129 * edit hadoop_datanode for DRBL client as datanode
     130{{{
     131$ cat > hadoop_datanode << EOF
     132}}}
     133{{{
     134#! /bin/sh
     135set -e
     136
     137# /etc/init.d/hadoop_datanode: start and stop Hadoop DFS datanode for DRBL Client
     138
     139export PATH="${PATH:+$PATH:}/usr/sbin:/sbin"
     140
     141case "\$1" in
     142  start)
     143        echo -n "starting datanode:"
     144        /opt/hadoop-0.18.3/bin/hadoop-daemon.sh start datanode
     145        echo "[OK]"
     146        ;;
     147  stop)
     148        echo -n "stoping datanode:"
     149        /opt/hadoop-0.18.3/bin/hadoop-daemon.sh stop datanode
     150        echo "[OK]"
     151        ;;
     152
     153  *)
     154        echo "Usage: /etc/init.d/hadoop_datanode {start|stop}"
     155        exit 1
     156esac
     157
     158exit 0
     159EOF
     160}}}
     161{{{
     162$ chmod a+x hadoop_datanode
     163$ sudo /opt/drbl/sbin/drbl-cp-host hadoop_datanode /etc/init.d/
     164$ sudo /opt/drbl/bin/drbl-doit update-rc.d hadoop_datanode defaults 99
     165}}}
     166 * shutdown DRBL clients
     167 * reboot DRBL server
     168 * use "Wake on LAN" for DRBL clients
     169 * browse http://192.168.1.254:50070 for DFS status