Changeset 130 for nutchez-0.2
- Timestamp:
- May 27, 2010, 10:57:57 AM (14 years ago)
- Location:
- nutchez-0.2/src/test
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
nutchez-0.2/src/test/install
r125 r130 31 31 format_HDFS 32 32 start_up_NutchEZ 33 install_tomcat33 Install_Tomcat 34 34 start_up_tomcat 35 35 36 36 # 安裝流程結束,並進入網頁管理頁面設定爬網網址...等 37 37 echo "Install Successfully!!" 38 echo "Visit http://$MasterIP_Address: portNO"38 echo "Visit http://$MasterIP_Address:8080" 39 39 elif [ $confirm -eq 2 ]; then 40 40 main -
nutchez-0.2/src/test/install_func.sh
r126 r130 19 19 # /home/nutchuser/NutchEZ_source下有3個檔案 20 20 # install.sh, nutch-1.0.tar.gz, apache-tomcat-6.0.18.tar.gz 21 # 安裝路徑為/opt/nutchEZ 22 23 Nutch_install_folder=/opt/nutchEZ 21 # 安裝路徑為/opt/NutchEZ 22 24 23 Install_source=/home/nutchuser/NutchEZ_source 25 NutchEZ_HOME=/opt/ nutchEZ26 MasterIP =`/sbin/ifconfig eth0 | grep 'inet addr' | sed 's/^.*addr://g' | sed 's/Bcast.*$//g'`24 NutchEZ_HOME=/opt/NutchEZ 25 MasterIP_Address=`/sbin/ifconfig eth0 | grep 'inet addr' | sed 's/^.*addr://g' | sed 's/Bcast.*$//g' | sed 's/ .*// '` 27 26 28 27 29 28 set_install_information () { 30 read -p "Please enter administrator's e-mail address: " admin_email29 read -p "Please enter administrator's e-mail address: " Admin_email 31 30 read -p "Please enter the Master DNS: " MasterDNS 32 31 } … … 42 41 43 42 Install_Nutch () { 44 tar zxvf $Install_source/nutch-1.0.tar.gz /opt/ 45 mv nutch-1.0 nutchEZ 46 chown -R nutchuser:nutchuser /opt/nutchEZ 43 cd /opt 44 tar zxf /opt/nutch-1.0.tar.gz 45 # tar zxvf /opt/nutch-1.0.tar.gz 46 mv /opt/nutch-1.0 NutchEZ 47 chown -R nutchuser:nutchuser $NutchEZ_HOME 47 48 set_Nutch_conf 48 49 } … … 58 59 set_hadoop-env () { 59 60 echo "set $NutchEZ_HOME/conf/hadoop-env.sh" 61 cd $NutchEZ_HOME/conf/ 60 62 cat >> hadoop-env.sh << EOF 61 63 export JAVA_HOME=/usr/lib/jvm/java-6-sun 62 export HADOOP_HOME=/opt/ nutch63 export HADOOP_LOG_DIR=/tmp/ nutch/logs64 export HADOOP_SLAVES=/opt/ nutch/conf/slaves64 export HADOOP_HOME=/opt/NutchEZ 65 export HADOOP_LOG_DIR=/tmp/NutchEZ/logs 66 export HADOOP_SLAVES=/opt/NutchEZ/conf/slaves 65 67 EOF 66 68 } 67 69 68 70 # set $NutchEZ_HOME/conf/hadoop-site.xml 69 set_ha doop-site () {71 set_haoop-site () { 70 72 echo "set $NutchEZ_HOME/conf/hadoop-site.xml" 71 cat > $NutchEZ_HOME/conf/hadoop-site.xml << EOF 73 cd $NutchEZ_HOME/conf/ 74 cat > hadoop-site.xml << EOF 72 75 <configuration> 73 76 <property> … … 87 90 set_nutch-site () { 88 91 echo "set $NutchEZ_HOME/conf/nutch-site.xml" 89 cat > $NutchEZ_HOME/conf/nutch-site.xml << EOF 92 cd $NutchEZ_HOME/conf/ 93 cat > nutch-site.xml << EOF 90 94 <configuration> 91 95 <property> … … 116 120 117 121 set_crawl-urlfilter () { 118 echo "set NutchEZ_HOME/conf/set_crawl-urlfilter.txt" 119 Line_NO=`cat '$NutchEZ'/conf/crawl-urlfilter.txt | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'` 120 sed -i ''$((Line_NO+1))'d' $NutchEZ/conf/crawl-urlfilter.txt 121 sed -i ''$Line_NO'a -[*!@]' $NutchEZ/conf/crawl-urlfilter.txt 122 Line_NO=`cat crawl-urlfilter.txt | grep -n 'accept hosts in MY.DOMAIN.NAME' | sed 's/:.*//g'` 123 sed -i ''$((Line_NO+1))'d' $NutchEZ/conf/crawl-urlfilter.txt 124 sed -i ''$Line_NO'a +^http://([a-z0-9]*\.)*.*/' $NutchEZ/conf/crawl-urlfilter.txt 122 echo "set $NutchEZ_HOME/conf/set_crawl-urlfilter.txt" 123 Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip file:, ftp:, & mailto: urls' | sed 's/:.*//g'` 124 sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt 125 sed -i ''$Line_NO'a -^(ftp|mailto):' $NutchEZ_HOME/conf/crawl-urlfilter.txt 126 127 128 Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip image and other suffixes we can' | sed 's/:.*//g'` 129 sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt 130 sed -i ''$Line_NO'a -\\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|sit|eps|wmf|mpg|xls|gz|rpm|tgz|mov|MOV|exe|jpeg|JPEG|bmp|BMP)$' $NutchEZ_HOME/conf/crawl-urlfilter.txt 131 132 133 Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'` 134 sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt 135 sed -i ''$Line_NO'a -[*!@]' $NutchEZ_HOME/conf/crawl-urlfilter.txt 136 137 138 Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip everything else' | sed 's/:.*//g'` 139 sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt 140 sed -i ''$Line_NO'a +.*' $NutchEZ_HOME/conf/crawl-urlfilter.txt 141 sed -i ''$Line_NO'a # accecpt anything else' $NutchEZ_HOME/conf/crawl-urlfilter.txt 125 142 } 126 143 … … 130 147 } 131 148 132 133 149 start_up_NutchEZ (){ 134 150 echo "start up NutchEZ..." … … 138 154 # install tomcat 139 155 Install_Tomcat () { 140 tar zxvf $Install_source/apache-tomcat-6.0.18.tar.gz $NutchEZ_HOME 141 mv $NutchEZ_HOME/apache-tomcat-6.0.18 $NutchEZ_HOME/tomcat 156 cd /opt/ 157 # tar zxf apache-tomcat-6.0.18.tar.gz 158 tar zxf apache-tomcat-6.0.18.tar.gz 159 mv apache-tomcat-6.0.18 $NutchEZ_HOME 160 cd $NutchEZ_HOME 161 mv apache-tomcat-6.0.18 tomcat 142 162 chown -R nutchuser:nutchuser $NutchEZ_HOME 143 163 mkdir $NutchEZ_HOME/web … … 150 170 } 151 171 172 152 173 set_server () { 153 174 echo "$NutchEZ_HOME/tomcat/conf/server.xml" 154 Line_NO=`cat '$NutchEZ'/tomcat/conf/server.xml | grep -n '<Connector port="8080" protocol="HTTP/1.1"' | sed 's/:.*//g'` 155 sed -i ''$Line_NO','$(($Line_NO+2))'d' server.xml 156 sed -i ''$Line_NO'a <Connector port="8080" protocol="HTTP/1.1"\ 175 Line_NO=`cat $NutchEZ_HOME'/tomcat/conf/server.xml' | grep -n '<!-- A "Connector" using the shared thread pool-->' | sed 's/:.*//g'` 176 177 sed -i ''$((Line_NO+1))','$((Line_NO+6))'d' $NutchEZ_HOME/tomcat/conf/server.xml 178 sed -i ''$Line_NO'a <Connector port="8080" protocol="HTTP/1.1"\ 157 179 connectionTimeout="20000"\ 158 redirectPort="8443" URIEncoding="UTF-8" />\159 ' $NutchEZ/tomcat/conf/server.xml 160 } 161 180 redirectPort="8443" URIEncoding="UTF-8"\ 181 useBodyEncodingForURI="true" />\ 182 ' $NutchEZ_HOME/tomcat/conf/server.xml 183 } 162 184 163 185 set_nutch-site () { … … 165 187 166 188 # 搜尋加入設定的行號位址 167 line_NO=`cat '$NutchEZ'/conf/nutch-site.xml| grep -n '<'configuration'>' | sed 's/:.*//g'`189 line_NO=`cat $NutchEZ_HOME'/conf/nutch-site.xml' | grep -n '<'configuration'>' | sed 's/:.*//g'` 168 190 169 191 # 加入設定檔 … … 189 211 </description>\ 190 212 </property>\ 191 ' $NutchEZ /conf/nutch-site.xml213 ' $NutchEZ_HOME/conf/nutch-site.xml 192 214 } 193 215
Note: See TracChangeset
for help on using the changeset viewer.