source: nutchez-0.2/src/test/install_func.sh @ 136

Last change on this file since 136 was 136, checked in by shunfa, 14 years ago

install流程修改

  • Property svn:executable set to *
File size: 8.4 KB
Line 
1#!/bin/bash
2source install_lang
3####### garbage here #############
4function mainFunction ( )
5{
6echo "$Good"
7}
8function braBraBra ( )
9{
10echo "$Bra_Bra_Bra"
11}
12####### garbage end ###############
13
14
15
16####### fafa code here ###########
17
18# 參數假設
19# /home/nutchuser/NutchEZ_source下有3個檔案
20# install.sh, nutch-1.0.tar.gz, apache-tomcat-6.0.18.tar.gz
21# 安裝路徑為/opt/NutchEZ
22
23Install_source=/home/nutchuser/NutchEZ_source
24NutchEZ_HOME=/opt/NutchEZ
25MasterIP_Address=`/sbin/ifconfig eth0 | grep 'inet addr' |  sed 's/^.*addr://g' | sed 's/Bcast.*$//g' | sed 's/ .*// '`
26
27
28set_install_information () {
29  read -p "Please enter administrator's e-mail address:  " Admin_email
30  read -p "Please enter the Master DNS:  " MasterDNS
31}
32
33show_info () {
34  echo "Administrator's e-mail address is $Admin_email."
35  echo "The master DNS is: $MasterDNS"
36}
37
38confirm_install_information () {
39  read -p "Please confirm your install infomation: 1.Yes 2.No  " confirm
40}
41
42# set $NutchEZ_HOME/conf/hadoop-env.sh
43set_hadoop-env () {
44  echo "set $NutchEZ_HOME/conf/hadoop-env.sh"
45  cd $NutchEZ_HOME/conf/
46  cat >> hadoop-env.sh << EOF
47export JAVA_HOME=/usr/lib/jvm/java-6-sun
48export HADOOP_HOME=$NutchEZ_HOME
49export HADOOP_LOG_DIR=/tmp/NutchEZ/logs
50export HADOOP_SLAVES=$NutchEZ_HOME/conf/slaves
51export HADOOP_CONF_DIR=$NutchEZ_HOME/conf
52export HADOOP_PID_DIR=/tmp/hadoop/pid
53export NUTCH_HOME=$NutchEZ_HOME
54export NUTCH_CONF_DIR=$NutchEZ_HOME/conf
55EOF
56}
57
58# set $NutchEZ_HOME/conf/hadoop-site.xml
59set_haoop-site () {
60  echo "set $NutchEZ_HOME/conf/hadoop-site.xml"
61  cd $NutchEZ_HOME/conf/
62  cat > hadoop-site.xml << EOF
63<configuration>
64<property>
65    <name>fs.default.name</name>
66    <value>$MasterDNS:9000</value>
67    <description> The name of the default file system. Either the literal string "local" or a host:port for NDFS. </description>
68</property>
69<property>
70    <name>mapred.job.tracker</name>
71    <value>$MasterDNS:9001</value>
72    <description> The host and port that the MapReduce job tracker runs at. If "local", then jobs are run in-process as a single map and reduce task. </description>
73</property>
74</configuration>
75EOF
76}
77
78set_nutch-site () {
79  echo "set $NutchEZ_HOME/conf/nutch-site.xml"
80  cd $NutchEZ_HOME/conf/
81  cat > nutch-site.xml << EOF
82<configuration>
83<property>
84  <name>http.agent.name</name>
85  <value>nutchuser</value>
86  <description>HTTP 'User-Agent' request header. </description>
87</property>
88<property>
89  <name>http.agent.description</name>
90  <value>MyTest</value>
91  <description>Further description</description>
92</property>
93<property>
94  <name>http.agent.url</name>
95  <value>$MasterDNS</value>
96  <description>A URL to advertise in the User-Agent header. </description>
97</property>
98<property>
99  <name>$MasterDNS</name>
100  <value>$Admin_email</value>
101  <description>An email address
102  </description>
103</property>
104</configuration>
105EOF
106}
107
108
109set_crawl-urlfilter () {
110  echo "set $NutchEZ_HOME/conf/set_crawl-urlfilter.txt"
111  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip file:, ftp:, & mailto: urls' | sed 's/:.*//g'`
112  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
113  sed -i ''$Line_NO'a -^(ftp|mailto):' $NutchEZ_HOME/conf/crawl-urlfilter.txt
114
115
116  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip image and other suffixes we can' | sed 's/:.*//g'`
117  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
118  sed -i ''$Line_NO'a -\\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|sit|eps|wmf|mpg|xls|gz|rpm|tgz|mov|MOV|exe|jpeg|JPEG|bmp|BMP)$' $NutchEZ_HOME/conf/crawl-urlfilter.txt
119
120
121  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'`
122  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
123  sed -i ''$Line_NO'a -[*!@]' $NutchEZ_HOME/conf/crawl-urlfilter.txt
124
125
126  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip everything else' | sed 's/:.*//g'`
127  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
128  sed -i ''$Line_NO'a +.*' $NutchEZ_HOME/conf/crawl-urlfilter.txt
129  sed -i ''$Line_NO'a # accecpt anything else' $NutchEZ_HOME/conf/crawl-urlfilter.txt
130}
131
132format_HDFS () {
133  echo "format HDFS..."
134  $NutchEZ_HOME/bin/hadoop namenode -format
135}
136
137start_up_NutchEZ (){
138  echo "start up NutchEZ..."
139  $NutchEZ_HOME/bin/start-all.sh
140}
141
142set_server () {
143  echo "$NutchEZ_HOME/tomcat/conf/server.xml"
144  Line_NO=`cat $NutchEZ_HOME'/tomcat/conf/server.xml' | grep -n '<!-- A "Connector" using the shared thread pool-->' | sed 's/:.*//g'`
145
146  sed -i ''$((Line_NO+1))','$((Line_NO+6))'d' $NutchEZ_HOME/tomcat/conf/server.xml
147  sed -i ''$Line_NO'a    <Connector port="8080" protocol="HTTP/1.1"\
148               connectionTimeout="20000"\
149               redirectPort="8443" URIEncoding="UTF-8"\
150               useBodyEncodingForURI="true" />\
151' $NutchEZ_HOME/tomcat/conf/server.xml
152}
153
154
155set_nutch-site2 () {
156  echo "$NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml"
157 
158  # 搜尋加入設定的行號位址
159  line_NO=`cat $NutchEZ_HOME'/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml' | grep -n '<'configuration'>' | sed 's/:.*//g'`
160 
161  # 加入設定檔
162  sed -i ''$line_NO'a  <property>\
163  <name>http.agent.name</name>\
164  <value>nutch</value>\
165  <description>HTTP 'User-Agent' request header. </description> \
166</property>\
167<property>\
168  <name>http.agent.description</name>\
169  <value>MyTest</value>\
170  <description>Further description</description> \
171</property>\
172<property>\
173  <name>http.agent.url</name> \
174  <value>localhost</value> \
175  <description>A URL to advertise in the User-Agent header. </description> \
176</property>\
177<property>\
178  <name>http.agent.email</name>\
179  <value>'$Admin_email'</value> \
180  <description>An email address \
181  </description> \
182</property>\
183<property>\
184  <name>plugin.folders</name>\
185  <value>'$NutchEZ_HOME'/plugins</value>\
186  <description>Directories where nutch plugins are located. </description>\
187</property>\
188<property>\
189  <name>plugin.includes</name>\
190  <value>protocol-(http|httpclient)|urlfilter-regex|parse-(text|html|js|ext|msexcel|mspowerpoint|msword|oo|pdf|rss|swf|zip)|index-(more|basic|anchor)|query-(more|basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>\
191  <description> Regular expression naming plugin directory names</description>\
192 </property>\
193 <property>\
194  <name>parse.plugin.file</name>\
195  <value>parse-plugins.xml</value>\
196  <description>The name of the file that defines the associations between\
197  content-types and parsers.</description>\
198 </property>\
199 <property>\
200   <name>db.max.outlinks.per.page</name>\
201   <value>-1</value>\
202   <description> </description>\
203 </property> \
204 <property>\
205   <name>http.content.limit</name> \
206   <value>-1</value>\
207 </property>\
208<property>\
209  <name>indexer.mergeFactor</name>\
210  <value>500</value>\
211  <description>The factor that determines the frequency of Lucene segment\
212  merges. This must not be less than 2, higher values increase indexing\
213  speed but lead to increased RAM usage, and increase the number of\
214  open file handles (which may lead to "Too many open files" errors).\
215  NOTE: the "segments" here have nothing to do with Nutch segments, they\
216  are a low-level data unit used by Lucene.\
217  </description>\
218</property>\
219
220<property>\
221  <name>indexer.minMergeDocs</name>\
222  <value>500</value>\
223  <description>This number determines the minimum number of Lucene\
224  Documents buffered in memory between Lucene segment merges. Larger\
225  values increase indexing speed and increase RAM usage.\
226  </description>\
227</property>\
228
229' $NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml
230}
231
232
233set_Nutch_conf () {
234  set_hadoop-env
235  set_haoop-site
236  set_nutch-site
237  set_crawl-urlfilter
238}
239
240
241Install_Nutch () {
242  cd /opt
243  tar zxf /opt/nutch-1.0.tar.gz
244#  tar zxvf /opt/nutch-1.0.tar.gz
245  mv /opt/nutch-1.0  NutchEZ
246  chown -R nutchuser:nutchuser $NutchEZ_HOME
247  set_Nutch_conf
248}
249
250# install tomcat
251Install_Tomcat () {
252  cd /opt/
253#  tar zxvf apache-tomcat-6.0.18.tar.gz
254  tar zxf apache-tomcat-6.0.18.tar.gz
255  mv apache-tomcat-6.0.18 $NutchEZ_HOME
256  cd $NutchEZ_HOME
257  mv  apache-tomcat-6.0.18 tomcat
258  mkdir web
259  # mkdir $NutchEZ_HOME/search
260  chown -R nutchuser:nutchuser $NutchEZ_HOME
261  jar -xvf nutch-1.0.war web
262  mv $NutchEZ_HOME/tomcat/webapps/ROOT $NutchEZ_HOME/tomcat/webapps/ROOT-ori
263  mv $NutchEZ_HOME/web $NutchEZ_HOME/tomcat/webapps/ROOT
264  set_server
265  #set_nutch-site2
266}
267
268start_up_tomcat () {
269  echo "start up tomcat..."
270  $NutchEZ_HOME/tomcat/bin/startup.sh
271}
Note: See TracBrowser for help on using the repository browser.