| 1 | #!/bin/bash |
|---|
| 2 | source install_lang |
|---|
| 3 | ####### garbage here ############# |
|---|
| 4 | function mainFunction ( ) |
|---|
| 5 | { |
|---|
| 6 | echo "$Good" |
|---|
| 7 | } |
|---|
| 8 | function braBraBra ( ) |
|---|
| 9 | { |
|---|
| 10 | echo "$Bra_Bra_Bra" |
|---|
| 11 | } |
|---|
| 12 | ####### garbage end ############### |
|---|
| 13 | |
|---|
| 14 | |
|---|
| 15 | |
|---|
| 16 | ####### fafa code here ########### |
|---|
| 17 | |
|---|
| 18 | # 參數假設 |
|---|
| 19 | # /home/nutchuser/NutchEZ_source下有3個檔案 |
|---|
| 20 | # install.sh, nutch-1.0.tar.gz, apache-tomcat-6.0.18.tar.gz |
|---|
| 21 | # 安裝路徑為/opt/nutchEZ |
|---|
| 22 | |
|---|
| 23 | Nutch_install_folder=/opt/nutchEZ |
|---|
| 24 | Install_source=/home/nutchuser/NutchEZ_source |
|---|
| 25 | NutchEZ_HOME=/opt/nutchEZ |
|---|
| 26 | MasterIP=`/sbin/ifconfig eth0 | grep 'inet addr' | sed 's/^.*addr://g' | sed 's/Bcast.*$//g'` |
|---|
| 27 | |
|---|
| 28 | |
|---|
| 29 | set_install_information () { |
|---|
| 30 | read -p "Please enter administrator's e-mail address: " admin_email |
|---|
| 31 | read -p "Please enter the Master DNS: " MasterDNS |
|---|
| 32 | } |
|---|
| 33 | |
|---|
| 34 | show_info () { |
|---|
| 35 | echo "Administrator's e-mail address is $Admin_email." |
|---|
| 36 | echo "The master DNS is: $MasterDNS" |
|---|
| 37 | } |
|---|
| 38 | |
|---|
| 39 | confirm_install_information () { |
|---|
| 40 | read -p "Please confirm your install infomation: 1.Yes 2.No " confirm |
|---|
| 41 | } |
|---|
| 42 | |
|---|
| 43 | Install_Nutch () { |
|---|
| 44 | tar zxvf $Install_source/nutch-1.0.tar.gz /opt/ |
|---|
| 45 | mv nutch-1.0 nutchEZ |
|---|
| 46 | chown -R nutchuser:nutchuser /opt/nutchEZ |
|---|
| 47 | set_Nutch_conf |
|---|
| 48 | } |
|---|
| 49 | |
|---|
| 50 | set_Nutch_conf () { |
|---|
| 51 | set_hadoop-env |
|---|
| 52 | set_haoop-site |
|---|
| 53 | set_nutch-site |
|---|
| 54 | set_crawl-urlfilter |
|---|
| 55 | } |
|---|
| 56 | |
|---|
| 57 | # set $NutchEZ_HOME/conf/hadoop-env.sh |
|---|
| 58 | set_hadoop-env () { |
|---|
| 59 | echo "set $NutchEZ_HOME/conf/hadoop-env.sh" |
|---|
| 60 | cat >> hadoop-env.sh << EOF |
|---|
| 61 | export JAVA_HOME=/usr/lib/jvm/java-6-sun |
|---|
| 62 | export HADOOP_HOME=/opt/nutch |
|---|
| 63 | export HADOOP_LOG_DIR=/tmp/nutch/logs |
|---|
| 64 | export HADOOP_SLAVES=/opt/nutch/conf/slaves |
|---|
| 65 | EOF |
|---|
| 66 | } |
|---|
| 67 | |
|---|
| 68 | # set $NutchEZ_HOME/conf/hadoop-site.xml |
|---|
| 69 | set_hadoop-site () { |
|---|
| 70 | echo "set $NutchEZ_HOME/conf/hadoop-site.xml" |
|---|
| 71 | cat > $NutchEZ_HOME/conf/hadoop-site.xml << EOF |
|---|
| 72 | <configuration> |
|---|
| 73 | <property> |
|---|
| 74 | <name>fs.default.name</name> |
|---|
| 75 | <value>$MasterDNS:9000</value> |
|---|
| 76 | <description> The name of the default file system. Either the literal string "local" or a host:port for NDFS. </description> |
|---|
| 77 | </property> |
|---|
| 78 | <property> |
|---|
| 79 | <name>mapred.job.tracker</name> |
|---|
| 80 | <value>$MasterDNS:9001</value> |
|---|
| 81 | <description> The host and port that the MapReduce job tracker runs at. If "local", then jobs are run in-process as a single map and reduce task. </description> |
|---|
| 82 | </property> |
|---|
| 83 | </configuration> |
|---|
| 84 | EOF |
|---|
| 85 | } |
|---|
| 86 | |
|---|
| 87 | set_nutch-site () { |
|---|
| 88 | echo "set $NutchEZ_HOME/conf/nutch-site.xml" |
|---|
| 89 | cat > $NutchEZ_HOME/conf/nutch-site.xml << EOF |
|---|
| 90 | <configuration> |
|---|
| 91 | <property> |
|---|
| 92 | <name>http.agent.name</name> |
|---|
| 93 | <value>nutchuser</value> |
|---|
| 94 | <description>HTTP 'User-Agent' request header. </description> |
|---|
| 95 | </property> |
|---|
| 96 | <property> |
|---|
| 97 | <name>http.agent.description</name> |
|---|
| 98 | <value>MyTest</value> |
|---|
| 99 | <description>Further description</description> |
|---|
| 100 | </property> |
|---|
| 101 | <property> |
|---|
| 102 | <name>http.agent.url</name> |
|---|
| 103 | <value>$MasterDNS</value> |
|---|
| 104 | <description>A URL to advertise in the User-Agent header. </description> |
|---|
| 105 | </property> |
|---|
| 106 | <property> |
|---|
| 107 | <name>$MasterDNS</name> |
|---|
| 108 | <value>$Admin_email</value> |
|---|
| 109 | <description>An email address |
|---|
| 110 | </description> |
|---|
| 111 | </property> |
|---|
| 112 | </configuration> |
|---|
| 113 | EOF |
|---|
| 114 | } |
|---|
| 115 | |
|---|
| 116 | |
|---|
| 117 | set_crawl-urlfilter () { |
|---|
| 118 | echo "set NutchEZ_HOME/conf/set_crawl-urlfilter.txt" |
|---|
| 119 | Line_NO=`cat '$NutchEZ'/conf/crawl-urlfilter.txt | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'` |
|---|
| 120 | sed -i ''$((Line_NO+1))'d' $NutchEZ/conf/crawl-urlfilter.txt |
|---|
| 121 | sed -i ''$Line_NO'a -[*!@]' $NutchEZ/conf/crawl-urlfilter.txt |
|---|
| 122 | Line_NO=`cat crawl-urlfilter.txt | grep -n 'accept hosts in MY.DOMAIN.NAME' | sed 's/:.*//g'` |
|---|
| 123 | sed -i ''$((Line_NO+1))'d' $NutchEZ/conf/crawl-urlfilter.txt |
|---|
| 124 | sed -i ''$Line_NO'a +^http://([a-z0-9]*\.)*.*/' $NutchEZ/conf/crawl-urlfilter.txt |
|---|
| 125 | } |
|---|
| 126 | |
|---|
| 127 | format_HDFS () { |
|---|
| 128 | echo "format HDFS..." |
|---|
| 129 | $NutchEZ_HOME/bin/hadoop namenode -format |
|---|
| 130 | } |
|---|
| 131 | |
|---|
| 132 | |
|---|
| 133 | start_up_NutchEZ (){ |
|---|
| 134 | echo "start up NutchEZ..." |
|---|
| 135 | $NutchEZ_HOME/bin/start-all.sh |
|---|
| 136 | } |
|---|
| 137 | |
|---|
| 138 | # install tomcat |
|---|
| 139 | Install_Tomcat () { |
|---|
| 140 | tar zxvf $Install_source/apache-tomcat-6.0.18.tar.gz $NutchEZ_HOME |
|---|
| 141 | mv $NutchEZ_HOME/apache-tomcat-6.0.18 $NutchEZ_HOME/tomcat |
|---|
| 142 | chown -R nutchuser:nutchuser $NutchEZ_HOME |
|---|
| 143 | mkdir $NutchEZ_HOME/web |
|---|
| 144 | jar -xvf $NutchEZ_HOME/nutch-1.0.war $NutchEZ_HOME/web |
|---|
| 145 | mv $NutchEZ_HOME/tomcat/webapps/ROOT $NutchEZ_HOME/tomcat/webapps/ROOT-ori |
|---|
| 146 | mv $NutchEZ_HOME/web $NutchEZ_HOME/tomcat/webapps/ROOT |
|---|
| 147 | mkdir $NutchEZ_HOME/search |
|---|
| 148 | set_server |
|---|
| 149 | set_nutch-site |
|---|
| 150 | } |
|---|
| 151 | |
|---|
| 152 | set_server () { |
|---|
| 153 | echo "$NutchEZ_HOME/tomcat/conf/server.xml" |
|---|
| 154 | Line_NO=`cat '$NutchEZ'/tomcat/conf/server.xml | grep -n '<Connector port="8080" protocol="HTTP/1.1"' | sed 's/:.*//g'` |
|---|
| 155 | sed -i ''$Line_NO','$(($Line_NO+2))'d' server.xml |
|---|
| 156 | sed -i ''$Line_NO'a <Connector port="8080" protocol="HTTP/1.1"\ |
|---|
| 157 | connectionTimeout="20000"\ |
|---|
| 158 | redirectPort="8443" URIEncoding="UTF-8"/>\ |
|---|
| 159 | ' $NutchEZ/tomcat/conf/server.xml |
|---|
| 160 | } |
|---|
| 161 | |
|---|
| 162 | |
|---|
| 163 | set_nutch-site () { |
|---|
| 164 | echo "$NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml" |
|---|
| 165 | |
|---|
| 166 | # 搜尋加入設定的行號位址 |
|---|
| 167 | line_NO=`cat '$NutchEZ'/conf/nutch-site.xml | grep -n '<'configuration'>' | sed 's/:.*//g'` |
|---|
| 168 | |
|---|
| 169 | # 加入設定檔 |
|---|
| 170 | sed -i ''$line_NO'a <property>\ |
|---|
| 171 | <name>http.agent.name</name>\ |
|---|
| 172 | <value>waue</value>\ |
|---|
| 173 | <description>HTTP 'User-Agent' request header. </description>\ |
|---|
| 174 | </property>\ |
|---|
| 175 | <property>\ |
|---|
| 176 | <name>http.agent.description</name>\ |
|---|
| 177 | <value>MyTest</value>\ |
|---|
| 178 | <description>Further description</description>\ |
|---|
| 179 | </property>\ |
|---|
| 180 | <property>\ |
|---|
| 181 | <name>http.agent.url</name>\ |
|---|
| 182 | <value>'$MasterDNS'</value>\ |
|---|
| 183 | <description>A URL to advertise in the User-Agent header. </description>\ |
|---|
| 184 | </property>\ |
|---|
| 185 | <property>\ |
|---|
| 186 | <name>http.agent.email</name>\ |
|---|
| 187 | <value>'$Admin_email'</value>\ |
|---|
| 188 | <description>An email address\ |
|---|
| 189 | </description>\ |
|---|
| 190 | </property>\ |
|---|
| 191 | ' $NutchEZ/conf/nutch-site.xml |
|---|
| 192 | } |
|---|
| 193 | |
|---|
| 194 | |
|---|
| 195 | start_up_tomcat () { |
|---|
| 196 | echo "start up tomcat..." |
|---|
| 197 | $NutchEZ_HOME/tomcat/bin/startup.sh |
|---|
| 198 | } |
|---|