#!/bin/bash
source install_lang
####### garbage here #############
function mainFunction ( )
{
echo "$Good"
}
function braBraBra ( )
{
echo "$Bra_Bra_Bra"
}
####### garbage end ###############
####### fafa code here ###########
# 參數假設
# /home/nutchuser/NutchEZ_source下有3個檔案
# install.sh, nutch-1.0.tar.gz, apache-tomcat-6.0.18.tar.gz
# 安裝路徑為/opt/NutchEZ
Install_source=/home/nutchuser/NutchEZ_source
NutchEZ_HOME=/opt/NutchEZ
MasterIP_Address=`/sbin/ifconfig eth0 | grep 'inet addr' | sed 's/^.*addr://g' | sed 's/Bcast.*$//g' | sed 's/ .*// '`
set_install_information () {
read -p "Please enter administrator's e-mail address: " Admin_email
read -p "Please enter the Master DNS: " MasterDNS
}
show_info () {
echo "Administrator's e-mail address is $Admin_email."
echo "The master DNS is: $MasterDNS"
}
confirm_install_information () {
read -p "Please confirm your install infomation: 1.Yes 2.No " confirm
}
Install_Nutch () {
cd /opt
tar zxf /opt/nutch-1.0.tar.gz
# tar zxvf /opt/nutch-1.0.tar.gz
mv /opt/nutch-1.0 NutchEZ
chown -R nutchuser:nutchuser $NutchEZ_HOME
set_Nutch_conf
}
set_Nutch_conf () {
set_hadoop-env
set_haoop-site
set_nutch-site
set_crawl-urlfilter
}
# set $NutchEZ_HOME/conf/hadoop-env.sh
set_hadoop-env () {
echo "set $NutchEZ_HOME/conf/hadoop-env.sh"
cd $NutchEZ_HOME/conf/
cat >> hadoop-env.sh << EOF
export JAVA_HOME=/usr/lib/jvm/java-6-sun
export HADOOP_HOME=/opt/NutchEZ
export HADOOP_LOG_DIR=/tmp/NutchEZ/logs
export HADOOP_SLAVES=/opt/NutchEZ/conf/slaves
EOF
}
# set $NutchEZ_HOME/conf/hadoop-site.xml
set_haoop-site () {
echo "set $NutchEZ_HOME/conf/hadoop-site.xml"
cd $NutchEZ_HOME/conf/
cat > hadoop-site.xml << EOF
fs.default.name
$MasterDNS:9000
The name of the default file system. Either the literal string "local" or a host:port for NDFS.
mapred.job.tracker
$MasterDNS:9001
The host and port that the MapReduce job tracker runs at. If "local", then jobs are run in-process as a single map and reduce task.
EOF
}
set_nutch-site () {
echo "set $NutchEZ_HOME/conf/nutch-site.xml"
cd $NutchEZ_HOME/conf/
cat > nutch-site.xml << EOF
http.agent.name
nutchuser
HTTP 'User-Agent' request header.
http.agent.description
MyTest
Further description
http.agent.url
$MasterDNS
A URL to advertise in the User-Agent header.
$MasterDNS
$Admin_email
An email address
EOF
}
set_crawl-urlfilter () {
echo "set $NutchEZ_HOME/conf/set_crawl-urlfilter.txt"
Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip file:, ftp:, & mailto: urls' | sed 's/:.*//g'`
sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
sed -i ''$Line_NO'a -^(ftp|mailto):' $NutchEZ_HOME/conf/crawl-urlfilter.txt
Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip image and other suffixes we can' | sed 's/:.*//g'`
sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
sed -i ''$Line_NO'a -\\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|sit|eps|wmf|mpg|xls|gz|rpm|tgz|mov|MOV|exe|jpeg|JPEG|bmp|BMP)$' $NutchEZ_HOME/conf/crawl-urlfilter.txt
Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'`
sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
sed -i ''$Line_NO'a -[*!@]' $NutchEZ_HOME/conf/crawl-urlfilter.txt
Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip everything else' | sed 's/:.*//g'`
sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
sed -i ''$Line_NO'a +.*' $NutchEZ_HOME/conf/crawl-urlfilter.txt
sed -i ''$Line_NO'a # accecpt anything else' $NutchEZ_HOME/conf/crawl-urlfilter.txt
}
format_HDFS () {
echo "format HDFS..."
$NutchEZ_HOME/bin/hadoop namenode -format
}
start_up_NutchEZ (){
echo "start up NutchEZ..."
$NutchEZ_HOME/bin/start-all.sh
}
# install tomcat
Install_Tomcat () {
cd /opt/
# tar zxf apache-tomcat-6.0.18.tar.gz
tar zxf apache-tomcat-6.0.18.tar.gz
mv apache-tomcat-6.0.18 $NutchEZ_HOME
cd $NutchEZ_HOME
mv apache-tomcat-6.0.18 tomcat
chown -R nutchuser:nutchuser $NutchEZ_HOME
mkdir $NutchEZ_HOME/web
jar -xvf $NutchEZ_HOME/nutch-1.0.war $NutchEZ_HOME/web
mv $NutchEZ_HOME/tomcat/webapps/ROOT $NutchEZ_HOME/tomcat/webapps/ROOT-ori
mv $NutchEZ_HOME/web $NutchEZ_HOME/tomcat/webapps/ROOT
mkdir $NutchEZ_HOME/search
set_server
set_nutch-site
}
set_server () {
echo "$NutchEZ_HOME/tomcat/conf/server.xml"
Line_NO=`cat $NutchEZ_HOME'/tomcat/conf/server.xml' | grep -n '' | sed 's/:.*//g'`
sed -i ''$((Line_NO+1))','$((Line_NO+6))'d' $NutchEZ_HOME/tomcat/conf/server.xml
sed -i ''$Line_NO'a \
' $NutchEZ_HOME/tomcat/conf/server.xml
}
set_nutch-site () {
echo "$NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml"
# 搜尋加入設定的行號位址
line_NO=`cat $NutchEZ_HOME'/conf/nutch-site.xml' | grep -n '<'configuration'>' | sed 's/:.*//g'`
# 加入設定檔
sed -i ''$line_NO'a \
http.agent.name\
waue\
HTTP 'User-Agent' request header. \
\
\
http.agent.description\
MyTest\
Further description\
\
\
http.agent.url\
'$MasterDNS'\
A URL to advertise in the User-Agent header. \
\
\
http.agent.email\
'$Admin_email'\
An email address\
\
\
' $NutchEZ_HOME/conf/nutch-site.xml
}
start_up_tomcat () {
echo "start up tomcat..."
$NutchEZ_HOME/tomcat/bin/startup.sh
}