Changeset 137 for nutchez-0.2
- Timestamp:
- May 31, 2010, 11:02:58 AM (14 years ago)
- Location:
- nutchez-0.2/src/test
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
nutchez-0.2/src/test/install
r136 r137 6 6 source ./client_install_func.sh 7 7 ### real code ##### 8 main () { 8 9 # 執行環境檢查 10 check_info () { 9 11 check_root 10 12 check_systemInfo … … 14 16 #check_dialog 15 17 #check_php 18 } 16 19 17 #set install mode 20 main () { 21 echo "歡迎使用NutchEZ\ 此安裝程序會為您新建一個nutchuser帳號" 18 22 set_install_information 19 23 show_info 20 24 read -p "Please confirm your install infomation: 1.Yes 2.No " confirm 21 25 if [ $confirm -eq 1 ]; then 22 23 26 # create_nutchuser_account 24 27 # make_ssh_key … … 26 29 # 解壓縮 27 30 # tar -zxvf nutchez-0.2-20100524.tar.gz 28 # mv -rnutchez /opt/31 # mv nutchez /opt/ 29 32 Install_Nutch 30 33 Install_Tomcat 31 #chown -R nutchuser:nutchuser /opt/nutchez34 chown -R nutchuser:nutchuser /opt/nutchez 32 35 # make_client_install 33 36 37 # 啟動系統 34 38 format_HDFS 35 39 start_up_NutchEZ … … 44 48 } 45 49 50 check_info 46 51 main -
nutchez-0.2/src/test/install_func.sh
r136 r137 12 12 ####### garbage end ############### 13 13 14 15 16 14 ####### fafa code here ########### 17 18 # 參數假設 19 # /home/nutchuser/NutchEZ_source下有3個檔案 20 # install.sh, nutch-1.0.tar.gz, apache-tomcat-6.0.18.tar.gz 21 # 安裝路徑為/opt/NutchEZ 22 23 Install_source=/home/nutchuser/NutchEZ_source 24 NutchEZ_HOME=/opt/NutchEZ 15 User_HOME=/home/nutchuser/nutchez 16 NutchEZ_HOME=/opt/nutchez 17 Nutch_HOME=$NutchEZ_HOME/nutch 18 Tomcat_HOME=$NutchEZ_HOME/tomcat 19 Index_DB=$User_HOME/search 25 20 MasterIP_Address=`/sbin/ifconfig eth0 | grep 'inet addr' | sed 's/^.*addr://g' | sed 's/Bcast.*$//g' | sed 's/ .*// '` 26 21 22 # DNS暫時以IP取代 23 MasterDNS=$MasterIP_Address 27 24 28 set_install_information () { 29 read -p "Please enter administrator's e-mail address: " Admin_email 25 function set_install_information () { 26 read -p "Please enter nutchuser's password : " nutchuser_passwd 27 read -p "Please enter nutchuser's password again: " nutchuser_passwd_confirm 28 if [$nutchuser_passwd != $nutchuser_passwd_confirm] 29 set_install_information 30 fi 31 read -p "Please enter Administrator's e-mail address: " Admin_email 30 32 read -p "Please enter the Master DNS: " MasterDNS 31 33 } 32 34 33 show_info () {35 function show_info () { 34 36 echo "Administrator's e-mail address is $Admin_email." 35 37 echo "The master DNS is: $MasterDNS" 36 38 } 37 39 38 confirm_install_information () {40 function confirm_install_information () { 39 41 read -p "Please confirm your install infomation: 1.Yes 2.No " confirm 40 42 } 41 43 42 # set $NutchEZ_HOME/conf/hadoop-env.sh 43 set_hadoop-env () { 44 echo "set $NutchEZ_HOME/conf/hadoop-env.sh" 45 cd $NutchEZ_HOME/conf/ 46 cat >> hadoop-env.sh << EOF 47 export JAVA_HOME=/usr/lib/jvm/java-6-sun 48 export HADOOP_HOME=$NutchEZ_HOME 49 export HADOOP_LOG_DIR=/tmp/NutchEZ/logs 50 export HADOOP_SLAVES=$NutchEZ_HOME/conf/slaves 51 export HADOOP_CONF_DIR=$NutchEZ_HOME/conf 52 export HADOOP_PID_DIR=/tmp/hadoop/pid 53 export NUTCH_HOME=$NutchEZ_HOME 54 export NUTCH_CONF_DIR=$NutchEZ_HOME/conf 55 EOF 56 } 57 58 # set $NutchEZ_HOME/conf/hadoop-site.xml 59 set_haoop-site () { 60 echo "set $NutchEZ_HOME/conf/hadoop-site.xml" 61 cd $NutchEZ_HOME/conf/ 44 function set_haoop-site () { 45 echo "set $Nutch_HOME/conf/hadoop-site.xml" 46 cd $Nutch_HOME/conf/ 62 47 cat > hadoop-site.xml << EOF 63 48 <configuration> 64 <property>49 <property> 65 50 <name>fs.default.name</name> 66 <value>$MasterDNS:9000</value> 67 <description> The name of the default file system. Either the literal string "local" or a host:port for NDFS. </description> 68 </property> 69 <property> 51 <value>hdfs://$MasterIP_Address:9000</value> 52 </property> 53 <property> 70 54 <name>mapred.job.tracker</name> 71 <value>$MasterDNS:9001</value> 72 <description> The host and port that the MapReduce job tracker runs at. If "local", then jobs are run in-process as a single map and reduce task. </description> 73 </property> 55 <value>$MasterIP_Address:9001</value> 56 </property> 57 <property> 58 <name>hadoop.tmp.dir</name> 59 <value>/var/nutchez/nutch-nutchuser</value> 60 </property> 74 61 </configuration> 75 62 EOF 76 63 } 77 64 78 set_nutch-site () { 79 echo "set $NutchEZ_HOME/conf/nutch-site.xml" 80 cd $NutchEZ_HOME/conf/ 81 cat > nutch-site.xml << EOF 82 <configuration> 83 <property> 84 <name>http.agent.name</name> 85 <value>nutchuser</value> 86 <description>HTTP 'User-Agent' request header. </description> 87 </property> 88 <property> 89 <name>http.agent.description</name> 90 <value>MyTest</value> 91 <description>Further description</description> 92 </property> 93 <property> 94 <name>http.agent.url</name> 95 <value>$MasterDNS</value> 96 <description>A URL to advertise in the User-Agent header. </description> 97 </property> 98 <property> 99 <name>$MasterDNS</name> 100 <value>$Admin_email</value> 101 <description>An email address 102 </description> 103 </property> 104 </configuration> 105 EOF 65 # 修改nutch-site.xml中-http.agent.url, http.agent.email 66 function set_nutch-site () { 67 echo "set $Nutch_HOME/conf/nutch-site.xml" 68 Line_NO=`cat $Nutch_HOME'/conf/nutch-site.xml' | grep -n 'http.agent.url' | sed 's/:.*//g'` 69 echo "debug...http.agent.url line number = $Line_NO..." 70 sed -i ''$((Line_NO+1))'d' $Nutch_HOME/conf/nutch-site.xml 71 echo "debug...edit http.agent.url delete line $((Line_NO+1))..." 72 sed -i ''$Line_NO'a <value>'$MasterIP_Address'</value>' $Nutch_HOME/conf/nutch-site.xml 73 echo "debug...edit http.agent.url done..." 74 75 Line_NO=`cat $Nutch_HOME'/conf/nutch-site.xml' | grep -n 'http.agent.email' | sed 's/:.*//g'` 76 echo "debug...http.agent.email line number = $Line_NO..." 77 78 sed -i ''$((Line_NO+1))'d' $Nutch_HOME/conf/nutch-site.xml 79 echo "debug...edit http.agent.email delete line $((Line_NO+1))..." 80 sed -i ''$Line_NO'a <value>'$Admin_email'</value>' $Nutch_HOME/conf/nutch-site.xml 81 echo "debug...edit http.agent.email done..." 106 82 } 107 83 108 109 set_crawl-urlfilter () { 110 echo "set $NutchEZ_HOME/conf/set_crawl-urlfilter.txt" 111 Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip file:, ftp:, & mailto: urls' | sed 's/:.*//g'` 112 sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt 113 sed -i ''$Line_NO'a -^(ftp|mailto):' $NutchEZ_HOME/conf/crawl-urlfilter.txt 114 115 116 Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip image and other suffixes we can' | sed 's/:.*//g'` 117 sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt 118 sed -i ''$Line_NO'a -\\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|sit|eps|wmf|mpg|xls|gz|rpm|tgz|mov|MOV|exe|jpeg|JPEG|bmp|BMP)$' $NutchEZ_HOME/conf/crawl-urlfilter.txt 119 120 121 Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'` 122 sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt 123 sed -i ''$Line_NO'a -[*!@]' $NutchEZ_HOME/conf/crawl-urlfilter.txt 124 125 126 Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip everything else' | sed 's/:.*//g'` 127 sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt 128 sed -i ''$Line_NO'a +.*' $NutchEZ_HOME/conf/crawl-urlfilter.txt 129 sed -i ''$Line_NO'a # accecpt anything else' $NutchEZ_HOME/conf/crawl-urlfilter.txt 84 function format_HDFS () { 85 echo "format HDFS..." 86 $Nutch_HOME/bin/hadoop namenode -format 130 87 } 131 88 132 f ormat_HDFS (){133 echo " format HDFS..."134 $NutchE Z_HOME/bin/hadoop namenode -format89 function start_up_NutchEZ (){ 90 echo "start up NutchEZ..." 91 $NutchE_HOME/bin/start-all.sh 135 92 } 136 93 137 start_up_NutchEZ (){ 138 echo "start up NutchEZ..." 139 $NutchEZ_HOME/bin/start-all.sh 140 } 141 142 set_server () { 143 echo "$NutchEZ_HOME/tomcat/conf/server.xml" 144 Line_NO=`cat $NutchEZ_HOME'/tomcat/conf/server.xml' | grep -n '<!-- A "Connector" using the shared thread pool-->' | sed 's/:.*//g'` 145 146 sed -i ''$((Line_NO+1))','$((Line_NO+6))'d' $NutchEZ_HOME/tomcat/conf/server.xml 147 sed -i ''$Line_NO'a <Connector port="8080" protocol="HTTP/1.1"\ 148 connectionTimeout="20000"\ 149 redirectPort="8443" URIEncoding="UTF-8"\ 150 useBodyEncodingForURI="true" />\ 151 ' $NutchEZ_HOME/tomcat/conf/server.xml 152 } 153 154 155 set_nutch-site2 () { 156 echo "$NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml" 157 158 # 搜尋加入設定的行號位址 159 line_NO=`cat $NutchEZ_HOME'/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml' | grep -n '<'configuration'>' | sed 's/:.*//g'` 160 161 # 加入設定檔 162 sed -i ''$line_NO'a <property>\ 163 <name>http.agent.name</name>\ 164 <value>nutch</value>\ 165 <description>HTTP 'User-Agent' request header. </description> \ 166 </property>\ 167 <property>\ 168 <name>http.agent.description</name>\ 169 <value>MyTest</value>\ 170 <description>Further description</description> \ 171 </property>\ 172 <property>\ 173 <name>http.agent.url</name> \ 174 <value>localhost</value> \ 175 <description>A URL to advertise in the User-Agent header. </description> \ 176 </property>\ 177 <property>\ 178 <name>http.agent.email</name>\ 179 <value>'$Admin_email'</value> \ 180 <description>An email address \ 181 </description> \ 182 </property>\ 183 <property>\ 184 <name>plugin.folders</name>\ 185 <value>'$NutchEZ_HOME'/plugins</value>\ 186 <description>Directories where nutch plugins are located. </description>\ 187 </property>\ 188 <property>\ 189 <name>plugin.includes</name>\ 190 <value>protocol-(http|httpclient)|urlfilter-regex|parse-(text|html|js|ext|msexcel|mspowerpoint|msword|oo|pdf|rss|swf|zip)|index-(more|basic|anchor)|query-(more|basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>\ 191 <description> Regular expression naming plugin directory names</description>\ 192 </property>\ 193 <property>\ 194 <name>parse.plugin.file</name>\ 195 <value>parse-plugins.xml</value>\ 196 <description>The name of the file that defines the associations between\ 197 content-types and parsers.</description>\ 198 </property>\ 199 <property>\ 200 <name>db.max.outlinks.per.page</name>\ 201 <value>-1</value>\ 202 <description> </description>\ 203 </property> \ 204 <property>\ 205 <name>http.content.limit</name> \ 206 <value>-1</value>\ 207 </property>\ 208 <property>\ 209 <name>indexer.mergeFactor</name>\ 210 <value>500</value>\ 211 <description>The factor that determines the frequency of Lucene segment\ 212 merges. This must not be less than 2, higher values increase indexing\ 213 speed but lead to increased RAM usage, and increase the number of\ 214 open file handles (which may lead to "Too many open files" errors).\ 215 NOTE: the "segments" here have nothing to do with Nutch segments, they\ 216 are a low-level data unit used by Lucene.\ 217 </description>\ 218 </property>\ 219 220 <property>\ 221 <name>indexer.minMergeDocs</name>\ 222 <value>500</value>\ 223 <description>This number determines the minimum number of Lucene\ 224 Documents buffered in memory between Lucene segment merges. Larger\ 225 values increase indexing speed and increase RAM usage.\ 226 </description>\ 227 </property>\ 228 229 ' $NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml 230 } 231 232 233 set_Nutch_conf () { 94 function Install_Nutch () { 234 95 set_hadoop-env 235 96 set_haoop-site 236 97 set_nutch-site 237 set_crawl-urlfilter238 98 } 239 99 240 241 Install_Nutch () { 242 cd /opt 243 tar zxf /opt/nutch-1.0.tar.gz 244 # tar zxvf /opt/nutch-1.0.tar.gz 245 mv /opt/nutch-1.0 NutchEZ 246 chown -R nutchuser:nutchuser $NutchEZ_HOME 247 set_Nutch_conf 100 function Install_Tomcat () { 101 # 設定nutch的搜尋引擎頁面到tomcat 102 cd $Nutch_HOME 103 mkdir web 104 cd web 105 jar -xvf ../nutch-1.0.war 106 mv $Tomcat_HOME/webapps/ROOT $Tomcat_HOME/webapps/ROOT-ori 107 cd $Nutch_HOME 108 mv $Nutch_HOME/web $Tomcat_HOME/webapps/ROOT 109 mkdir $Index_DB 248 110 } 249 111 250 # install tomcat 251 Install_Tomcat () { 252 cd /opt/ 253 # tar zxvf apache-tomcat-6.0.18.tar.gz 254 tar zxf apache-tomcat-6.0.18.tar.gz 255 mv apache-tomcat-6.0.18 $NutchEZ_HOME 256 cd $NutchEZ_HOME 257 mv apache-tomcat-6.0.18 tomcat 258 mkdir web 259 # mkdir $NutchEZ_HOME/search 260 chown -R nutchuser:nutchuser $NutchEZ_HOME 261 jar -xvf nutch-1.0.war web 262 mv $NutchEZ_HOME/tomcat/webapps/ROOT $NutchEZ_HOME/tomcat/webapps/ROOT-ori 263 mv $NutchEZ_HOME/web $NutchEZ_HOME/tomcat/webapps/ROOT 264 set_server 265 #set_nutch-site2 112 function start_up_tomcat () { 113 echo "start up tomcat..." 114 $Tomcat_HOME/bin/startup.sh 266 115 } 267 268 start_up_tomcat () {269 echo "start up tomcat..."270 $NutchEZ_HOME/tomcat/bin/startup.sh271 }
Note: See TracChangeset
for help on using the changeset viewer.