Changeset 137 for nutchez-0.2


Ignore:
Timestamp:
May 31, 2010, 11:02:58 AM (14 years ago)
Author:
shunfa
Message:

修改 install, install_func.sh

Location:
nutchez-0.2/src/test
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • nutchez-0.2/src/test/install

    r136 r137  
    66source ./client_install_func.sh
    77### real code #####
    8 main () {
     8
     9# 執行環境檢查
     10check_info () {
    911  check_root
    1012  check_systemInfo
     
    1416  #check_dialog
    1517  #check_php
     18}
    1619
    17   #set install mode
     20main () {
     21  echo "歡迎使用NutchEZ\ 此安裝程序會為您新建一個nutchuser帳號"
    1822  set_install_information
    1923  show_info
    2024  read -p "Please confirm your install infomation: 1.Yes 2.No  " confirm
    2125  if [ $confirm -eq 1 ]; then
    22 
    2326    # create_nutchuser_account
    2427    # make_ssh_key
     
    2629    # 解壓縮
    2730    # tar -zxvf nutchez-0.2-20100524.tar.gz
    28     # mv -r nutchez /opt/
     31    # mv nutchez /opt/
    2932    Install_Nutch
    3033    Install_Tomcat
    31     # chown -R nutchuser:nutchuser /opt/nutchez
     34    chown -R nutchuser:nutchuser /opt/nutchez
    3235    # make_client_install
    3336
     37    # 啟動系統
    3438    format_HDFS
    3539    start_up_NutchEZ
     
    4448}
    4549
     50check_info
    4651main
  • nutchez-0.2/src/test/install_func.sh

    r136 r137  
    1212####### garbage end ###############
    1313
    14 
    15 
    1614####### fafa code here ###########
    17 
    18 # 參數假設
    19 # /home/nutchuser/NutchEZ_source下有3個檔案
    20 # install.sh, nutch-1.0.tar.gz, apache-tomcat-6.0.18.tar.gz
    21 # 安裝路徑為/opt/NutchEZ
    22 
    23 Install_source=/home/nutchuser/NutchEZ_source
    24 NutchEZ_HOME=/opt/NutchEZ
     15User_HOME=/home/nutchuser/nutchez
     16NutchEZ_HOME=/opt/nutchez
     17Nutch_HOME=$NutchEZ_HOME/nutch
     18Tomcat_HOME=$NutchEZ_HOME/tomcat
     19Index_DB=$User_HOME/search
    2520MasterIP_Address=`/sbin/ifconfig eth0 | grep 'inet addr' |  sed 's/^.*addr://g' | sed 's/Bcast.*$//g' | sed 's/ .*// '`
    2621
     22# DNS暫時以IP取代
     23MasterDNS=$MasterIP_Address
    2724
    28 set_install_information () {
    29   read -p "Please enter administrator's e-mail address:  " Admin_email
     25function set_install_information () {
     26  read -p "Please enter nutchuser's password :  " nutchuser_passwd
     27  read -p "Please enter nutchuser's password again:  " nutchuser_passwd_confirm
     28  if [$nutchuser_passwd != $nutchuser_passwd_confirm]
     29    set_install_information
     30  fi
     31  read -p "Please enter Administrator's e-mail address: " Admin_email
    3032  read -p "Please enter the Master DNS:  " MasterDNS
    3133}
    3234
    33 show_info () {
     35function show_info () {
    3436  echo "Administrator's e-mail address is $Admin_email."
    3537  echo "The master DNS is: $MasterDNS"
    3638}
    3739
    38 confirm_install_information () {
     40function confirm_install_information () {
    3941  read -p "Please confirm your install infomation: 1.Yes 2.No  " confirm
    4042}
    4143
    42 # set $NutchEZ_HOME/conf/hadoop-env.sh
    43 set_hadoop-env () {
    44   echo "set $NutchEZ_HOME/conf/hadoop-env.sh"
    45   cd $NutchEZ_HOME/conf/
    46   cat >> hadoop-env.sh << EOF
    47 export JAVA_HOME=/usr/lib/jvm/java-6-sun
    48 export HADOOP_HOME=$NutchEZ_HOME
    49 export HADOOP_LOG_DIR=/tmp/NutchEZ/logs
    50 export HADOOP_SLAVES=$NutchEZ_HOME/conf/slaves
    51 export HADOOP_CONF_DIR=$NutchEZ_HOME/conf
    52 export HADOOP_PID_DIR=/tmp/hadoop/pid
    53 export NUTCH_HOME=$NutchEZ_HOME
    54 export NUTCH_CONF_DIR=$NutchEZ_HOME/conf
    55 EOF
    56 }
    57 
    58 # set $NutchEZ_HOME/conf/hadoop-site.xml
    59 set_haoop-site () {
    60   echo "set $NutchEZ_HOME/conf/hadoop-site.xml"
    61   cd $NutchEZ_HOME/conf/
     44function set_haoop-site () {
     45  echo "set $Nutch_HOME/conf/hadoop-site.xml"
     46  cd $Nutch_HOME/conf/
    6247  cat > hadoop-site.xml << EOF
    6348<configuration>
    64 <property>
     49  <property>
    6550    <name>fs.default.name</name>
    66     <value>$MasterDNS:9000</value>
    67     <description> The name of the default file system. Either the literal string "local" or a host:port for NDFS. </description>
    68 </property>
    69 <property>
     51    <value>hdfs://$MasterIP_Address:9000</value>
     52  </property>
     53  <property>
    7054    <name>mapred.job.tracker</name>
    71     <value>$MasterDNS:9001</value>
    72     <description> The host and port that the MapReduce job tracker runs at. If "local", then jobs are run in-process as a single map and reduce task. </description>
    73 </property>
     55    <value>$MasterIP_Address:9001</value>
     56  </property>
     57  <property>
     58    <name>hadoop.tmp.dir</name>
     59    <value>/var/nutchez/nutch-nutchuser</value>
     60  </property>
    7461</configuration>
    7562EOF
    7663}
    7764
    78 set_nutch-site () {
    79   echo "set $NutchEZ_HOME/conf/nutch-site.xml"
    80   cd $NutchEZ_HOME/conf/
    81   cat > nutch-site.xml << EOF
    82 <configuration>
    83 <property>
    84   <name>http.agent.name</name>
    85   <value>nutchuser</value>
    86   <description>HTTP 'User-Agent' request header. </description>
    87 </property>
    88 <property>
    89   <name>http.agent.description</name>
    90   <value>MyTest</value>
    91   <description>Further description</description>
    92 </property>
    93 <property>
    94   <name>http.agent.url</name>
    95   <value>$MasterDNS</value>
    96   <description>A URL to advertise in the User-Agent header. </description>
    97 </property>
    98 <property>
    99   <name>$MasterDNS</name>
    100   <value>$Admin_email</value>
    101   <description>An email address
    102   </description>
    103 </property>
    104 </configuration>
    105 EOF
     65# 修改nutch-site.xml中-http.agent.url, http.agent.email
     66function set_nutch-site () {
     67  echo "set $Nutch_HOME/conf/nutch-site.xml"
     68  Line_NO=`cat $Nutch_HOME'/conf/nutch-site.xml' | grep -n 'http.agent.url' | sed 's/:.*//g'`
     69  echo "debug...http.agent.url line number = $Line_NO..."
     70  sed -i ''$((Line_NO+1))'d' $Nutch_HOME/conf/nutch-site.xml
     71  echo "debug...edit http.agent.url delete line $((Line_NO+1))..."
     72  sed -i ''$Line_NO'a <value>'$MasterIP_Address'</value>' $Nutch_HOME/conf/nutch-site.xml
     73  echo "debug...edit http.agent.url done..."
     74
     75  Line_NO=`cat $Nutch_HOME'/conf/nutch-site.xml' | grep -n 'http.agent.email' | sed 's/:.*//g'`
     76  echo "debug...http.agent.email line number = $Line_NO..."
     77
     78  sed -i ''$((Line_NO+1))'d' $Nutch_HOME/conf/nutch-site.xml
     79  echo "debug...edit http.agent.email delete line $((Line_NO+1))..."
     80  sed -i ''$Line_NO'a <value>'$Admin_email'</value>' $Nutch_HOME/conf/nutch-site.xml
     81  echo "debug...edit http.agent.email done..."
    10682}
    10783
    108 
    109 set_crawl-urlfilter () {
    110   echo "set $NutchEZ_HOME/conf/set_crawl-urlfilter.txt"
    111   Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip file:, ftp:, & mailto: urls' | sed 's/:.*//g'`
    112   sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
    113   sed -i ''$Line_NO'a -^(ftp|mailto):' $NutchEZ_HOME/conf/crawl-urlfilter.txt
    114 
    115 
    116   Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip image and other suffixes we can' | sed 's/:.*//g'`
    117   sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
    118   sed -i ''$Line_NO'a -\\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|sit|eps|wmf|mpg|xls|gz|rpm|tgz|mov|MOV|exe|jpeg|JPEG|bmp|BMP)$' $NutchEZ_HOME/conf/crawl-urlfilter.txt
    119 
    120 
    121   Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'`
    122   sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
    123   sed -i ''$Line_NO'a -[*!@]' $NutchEZ_HOME/conf/crawl-urlfilter.txt
    124 
    125 
    126   Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip everything else' | sed 's/:.*//g'`
    127   sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
    128   sed -i ''$Line_NO'a +.*' $NutchEZ_HOME/conf/crawl-urlfilter.txt
    129   sed -i ''$Line_NO'a # accecpt anything else' $NutchEZ_HOME/conf/crawl-urlfilter.txt
     84function format_HDFS () {
     85  echo "format HDFS..."
     86  $Nutch_HOME/bin/hadoop namenode -format
    13087}
    13188
    132 format_HDFS () {
    133   echo "format HDFS..."
    134   $NutchEZ_HOME/bin/hadoop namenode -format
     89function start_up_NutchEZ (){
     90  echo "start up NutchEZ..."
     91  $NutchE_HOME/bin/start-all.sh
    13592}
    13693
    137 start_up_NutchEZ (){
    138   echo "start up NutchEZ..."
    139   $NutchEZ_HOME/bin/start-all.sh
    140 }
    141 
    142 set_server () {
    143   echo "$NutchEZ_HOME/tomcat/conf/server.xml"
    144   Line_NO=`cat $NutchEZ_HOME'/tomcat/conf/server.xml' | grep -n '<!-- A "Connector" using the shared thread pool-->' | sed 's/:.*//g'`
    145 
    146   sed -i ''$((Line_NO+1))','$((Line_NO+6))'d' $NutchEZ_HOME/tomcat/conf/server.xml
    147   sed -i ''$Line_NO'a    <Connector port="8080" protocol="HTTP/1.1"\
    148                connectionTimeout="20000"\
    149                redirectPort="8443" URIEncoding="UTF-8"\
    150                useBodyEncodingForURI="true" />\
    151 ' $NutchEZ_HOME/tomcat/conf/server.xml
    152 }
    153 
    154 
    155 set_nutch-site2 () {
    156   echo "$NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml"
    157  
    158   # 搜尋加入設定的行號位址
    159   line_NO=`cat $NutchEZ_HOME'/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml' | grep -n '<'configuration'>' | sed 's/:.*//g'`
    160  
    161   # 加入設定檔
    162   sed -i ''$line_NO'a  <property>\
    163   <name>http.agent.name</name>\
    164   <value>nutch</value>\
    165   <description>HTTP 'User-Agent' request header. </description> \
    166 </property>\
    167 <property>\
    168   <name>http.agent.description</name>\
    169   <value>MyTest</value>\
    170   <description>Further description</description> \
    171 </property>\
    172 <property>\
    173   <name>http.agent.url</name> \
    174   <value>localhost</value> \
    175   <description>A URL to advertise in the User-Agent header. </description> \
    176 </property>\
    177 <property>\
    178   <name>http.agent.email</name>\
    179   <value>'$Admin_email'</value> \
    180   <description>An email address \
    181   </description> \
    182 </property>\
    183 <property>\
    184   <name>plugin.folders</name>\
    185   <value>'$NutchEZ_HOME'/plugins</value>\
    186   <description>Directories where nutch plugins are located. </description>\
    187 </property>\
    188 <property>\
    189   <name>plugin.includes</name>\
    190   <value>protocol-(http|httpclient)|urlfilter-regex|parse-(text|html|js|ext|msexcel|mspowerpoint|msword|oo|pdf|rss|swf|zip)|index-(more|basic|anchor)|query-(more|basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>\
    191   <description> Regular expression naming plugin directory names</description>\
    192  </property>\
    193  <property>\
    194   <name>parse.plugin.file</name>\
    195   <value>parse-plugins.xml</value>\
    196   <description>The name of the file that defines the associations between\
    197   content-types and parsers.</description>\
    198  </property>\
    199  <property>\
    200    <name>db.max.outlinks.per.page</name>\
    201    <value>-1</value>\
    202    <description> </description>\
    203  </property> \
    204  <property>\
    205    <name>http.content.limit</name> \
    206    <value>-1</value>\
    207  </property>\
    208 <property>\
    209   <name>indexer.mergeFactor</name>\
    210   <value>500</value>\
    211   <description>The factor that determines the frequency of Lucene segment\
    212   merges. This must not be less than 2, higher values increase indexing\
    213   speed but lead to increased RAM usage, and increase the number of\
    214   open file handles (which may lead to "Too many open files" errors).\
    215   NOTE: the "segments" here have nothing to do with Nutch segments, they\
    216   are a low-level data unit used by Lucene.\
    217   </description>\
    218 </property>\
    219 
    220 <property>\
    221   <name>indexer.minMergeDocs</name>\
    222   <value>500</value>\
    223   <description>This number determines the minimum number of Lucene\
    224   Documents buffered in memory between Lucene segment merges. Larger\
    225   values increase indexing speed and increase RAM usage.\
    226   </description>\
    227 </property>\
    228 
    229 ' $NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml
    230 }
    231 
    232 
    233 set_Nutch_conf () {
     94function Install_Nutch () {
    23495  set_hadoop-env
    23596  set_haoop-site
    23697  set_nutch-site
    237   set_crawl-urlfilter
    23898}
    23999
    240 
    241 Install_Nutch () {
    242   cd /opt
    243   tar zxf /opt/nutch-1.0.tar.gz
    244 #  tar zxvf /opt/nutch-1.0.tar.gz
    245   mv /opt/nutch-1.0  NutchEZ
    246   chown -R nutchuser:nutchuser $NutchEZ_HOME
    247   set_Nutch_conf
     100function Install_Tomcat () {
     101# 設定nutch的搜尋引擎頁面到tomcat 
     102  cd $Nutch_HOME
     103  mkdir web
     104  cd web
     105  jar -xvf ../nutch-1.0.war
     106  mv $Tomcat_HOME/webapps/ROOT $Tomcat_HOME/webapps/ROOT-ori
     107  cd $Nutch_HOME
     108  mv $Nutch_HOME/web $Tomcat_HOME/webapps/ROOT
     109  mkdir $Index_DB
    248110}
    249111
    250 # install tomcat
    251 Install_Tomcat () {
    252   cd /opt/
    253 #  tar zxvf apache-tomcat-6.0.18.tar.gz
    254   tar zxf apache-tomcat-6.0.18.tar.gz
    255   mv apache-tomcat-6.0.18 $NutchEZ_HOME
    256   cd $NutchEZ_HOME
    257   mv  apache-tomcat-6.0.18 tomcat
    258   mkdir web
    259   # mkdir $NutchEZ_HOME/search
    260   chown -R nutchuser:nutchuser $NutchEZ_HOME
    261   jar -xvf nutch-1.0.war web
    262   mv $NutchEZ_HOME/tomcat/webapps/ROOT $NutchEZ_HOME/tomcat/webapps/ROOT-ori
    263   mv $NutchEZ_HOME/web $NutchEZ_HOME/tomcat/webapps/ROOT
    264   set_server
    265   #set_nutch-site2
     112function start_up_tomcat () {
     113  echo "start up tomcat..."
     114  $Tomcat_HOME/bin/startup.sh
    266115}
    267 
    268 start_up_tomcat () {
    269   echo "start up tomcat..."
    270   $NutchEZ_HOME/tomcat/bin/startup.sh
    271 }
Note: See TracChangeset for help on using the changeset viewer.