= Deploy Hadoop to PC Classroom using DRBL =

 * java is required for Hadoop, so you need to install java runtime or jdk first.
{{{
~$ echo "deb http://free.nchc.org.tw/debian/ etch non-free" > /tmp/etch-non-free.list
~$ sudo mv /tmp/etch-non-free.list /etc/apt/sources.list.d/.
~$ sudo apt-get update
~$ sudo apt-get install sun-java5-jdk
}}}
 * download Hadoop 0.18.2 
{{{
~$ wget http://ftp.twaren.net/Unix/Web/apache/hadoop/core/hadoop-0.18.2/hadoop-0.18.2.tar.gz
~$ tar zxvf hadoop-0.18.2.tar.gz
}}}
 * setup JAVA_HOME environment variable
{{{
~$ echo "export JAVA_HOME=/usr/lib/jvm/java-1.5.0-sun" >> ~/.bash_profile
~$ source ~/.bash_profile
}}}
 * edit hadoop-0.18.2/conf/hadoop-env.sh
{{{
#!diff
--- hadoop-0.18.2/conf/hadoop-env.sh.org        2008-11-06 22:57:40.000000000 +0800
+++ hadoop-0.18.2/conf/hadoop-env.sh    2008-11-06 22:58:42.000000000 +0800
@@ -6,7 +6,9 @@
 # remote nodes.

 # The java implementation to use.  Required.
-# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
+export JAVA_HOME=/usr/lib/jvm/java-1.5.0-sun
+export HADOOP_HOME=/home/jazz/hadoop-0.18.2
+export HADOOP_CONF_DIR=$HADOOP_HOME/conf

 # Extra Java CLASSPATH elements.  Optional.
 # export HADOOP_CLASSPATH=
}}}
 * here is current DRBL setup
{{{
你的DRBL環境配置:
******************************************************
          NIC    NIC IP                    Clients
+------------------------------+
|         DRBL SERVER          |
|                              |
|    +-- [eth0] 140.110.25.101 +- to WAN
|                              |
|    +-- [eth1] 192.168.61.254 +- to clients group 1 [ 16 clients, their IP
|                              |             from 192.168.61.1 - 192.168.61.16]
+------------------------------+
******************************************************
Total clients: 16
******************************************************
}}}
 * Hadoop will use ssh connections for internal connection, thus we have to do SSH key exchange.
{{{
~$ ssh-keygen
~$ cp .ssh/id_rsa.pub .ssh/authorized_keys
~$ sudo apt-get install dsh
~$ mkdir -p .dsh
~$ for ((i=1;i<=16;i++)); do echo "192.168.61.$i" >> .dsh/machines.list; done
}}}
 * edit hadoop-0.18.2/conf/hadoop-site.xml
{{{
#!diff
--- hadoop-0.18.2/conf/hadoop-site.xml.org      2008-11-06 23:11:18.000000000 +0800
+++ hadoop-0.18.2/conf/hadoop-site.xml  2008-11-07 17:05:11.000000000 +0800
@@ -4,5 +4,31 @@
 <!-- Put site-specific property overrides in this file. -->

 <configuration>
-
+  <property>
+    <name>fs.default.name</name>
+    <value>hdfs://192.168.61.254:9000/</value>
+    <description>
+      The name of the default file system. Either the literal string
+      "local" or a host:port for NDFS.
+    </description>
+  </property>
+  <property>
+    <name>mapred.job.tracker</name>
+    <value>192.168.61.254:9001</value>
+    <description>
+      The host and port that the MapReduce job tracker runs at. If
+      "local", then jobs are run in-process as a single map and
+      reduce task.
+    </description>
+  </property>
+  <property>
+    <name>dfs.data.dir</name>
+    <value>/hadoop/dfs/data</value>
+    <description>Determines where on the local filesystem an DFS data node
+      should store its blocks.  If this is a comma-delimited
+      list of directories, then data will be stored in all named
+      directories, typically on different devices.
+      Directories that do not exist are ignored.
+    </description>
+  </property>
 </configuration>
}}}
 * edit /etc/rc.local for DRBL Server as Hadoop namenode
{{{
#!diff
--- /etc/rc.local.org   2008-11-07 18:09:10.000000000 +0800
+++ /etc/rc.local       2008-11-07 17:58:14.000000000 +0800
@@ -11,4 +11,7 @@
 #
 # By default this script does nothing.

+echo 3 > /proc/sys/vm/drop_caches
+/home/jazz/hadoop-0.18.2/bin/hadoop namenode -format
+/home/jazz/hadoop-0.18.2/bin/hadoop-daemon.sh start namenode
 exit 0
}}}
 * edit rc.local for DRBL client as datanode
{{{
~$ cat > rc.local << EOF
}}}
{{{
#!/bin/sh -e
echo 3 > /proc/sys/vm/drop_caches
/home/jazz/hadoop-0.18.2/bin/hadoop-daemon.sh start datanode
exit 0
EOF
}}}
{{{
~$ chmod a+x rc.local
~$ sudo /opt/drbl/sbin/drbl-cp-host rc.local /etc/
~$ sudo su -
~# dsh -a update-rc.d rc.local defaults
}}}
 * shutdown DRBL clients
 * reboot DRBL server
 * use "Wake on LAN" for DRBL clients
 * browse http://192.168.61.254:50070 for DFS status