Changeset 130 for nutchez-0.2


Ignore:
Timestamp:
May 27, 2010, 10:57:57 AM (14 years ago)
Author:
shunfa
Message:

modify install, install_func

Location:
nutchez-0.2/src/test
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • nutchez-0.2/src/test/install

    r125 r130  
    3131    format_HDFS
    3232    start_up_NutchEZ
    33     install_tomcat
     33    Install_Tomcat
    3434    start_up_tomcat
    3535
    3636    # 安裝流程結束,並進入網頁管理頁面設定爬網網址...等 
    3737    echo "Install Successfully!!"
    38     echo "Visit http://$MasterIP_Address:portNO"
     38    echo "Visit http://$MasterIP_Address:8080"
    3939  elif [ $confirm -eq 2 ]; then
    4040    main
  • nutchez-0.2/src/test/install_func.sh

    r126 r130  
    1919# /home/nutchuser/NutchEZ_source下有3個檔案
    2020# install.sh, nutch-1.0.tar.gz, apache-tomcat-6.0.18.tar.gz
    21 # 安裝路徑為/opt/nutchEZ
    22 
    23 Nutch_install_folder=/opt/nutchEZ
     21# 安裝路徑為/opt/NutchEZ
     22
    2423Install_source=/home/nutchuser/NutchEZ_source
    25 NutchEZ_HOME=/opt/nutchEZ
    26 MasterIP=`/sbin/ifconfig eth0 | grep 'inet addr' |  sed 's/^.*addr://g' | sed 's/Bcast.*$//g'`
     24NutchEZ_HOME=/opt/NutchEZ
     25MasterIP_Address=`/sbin/ifconfig eth0 | grep 'inet addr' |  sed 's/^.*addr://g' | sed 's/Bcast.*$//g' | sed 's/ .*// '`
    2726
    2827
    2928set_install_information () {
    30   read -p "Please enter administrator's e-mail address:  " admin_email
     29  read -p "Please enter administrator's e-mail address:  " Admin_email
    3130  read -p "Please enter the Master DNS:  " MasterDNS
    3231}
     
    4241
    4342Install_Nutch () {
    44   tar zxvf $Install_source/nutch-1.0.tar.gz /opt/
    45   mv nutch-1.0 nutchEZ
    46   chown -R nutchuser:nutchuser /opt/nutchEZ
     43  cd /opt
     44  tar zxf /opt/nutch-1.0.tar.gz
     45#  tar zxvf /opt/nutch-1.0.tar.gz
     46  mv /opt/nutch-1.0  NutchEZ
     47  chown -R nutchuser:nutchuser $NutchEZ_HOME
    4748  set_Nutch_conf
    4849}
     
    5859set_hadoop-env () {
    5960  echo "set $NutchEZ_HOME/conf/hadoop-env.sh"
     61  cd $NutchEZ_HOME/conf/
    6062  cat >> hadoop-env.sh << EOF
    6163export JAVA_HOME=/usr/lib/jvm/java-6-sun
    62 export HADOOP_HOME=/opt/nutch
    63 export HADOOP_LOG_DIR=/tmp/nutch/logs
    64 export HADOOP_SLAVES=/opt/nutch/conf/slaves
     64export HADOOP_HOME=/opt/NutchEZ
     65export HADOOP_LOG_DIR=/tmp/NutchEZ/logs
     66export HADOOP_SLAVES=/opt/NutchEZ/conf/slaves
    6567EOF
    6668}
    6769
    6870# set $NutchEZ_HOME/conf/hadoop-site.xml
    69 set_hadoop-site () {
     71set_haoop-site () {
    7072  echo "set $NutchEZ_HOME/conf/hadoop-site.xml"
    71   cat > $NutchEZ_HOME/conf/hadoop-site.xml << EOF
     73  cd $NutchEZ_HOME/conf/
     74  cat > hadoop-site.xml << EOF
    7275<configuration>
    7376<property>
     
    8790set_nutch-site () {
    8891  echo "set $NutchEZ_HOME/conf/nutch-site.xml"
    89   cat > $NutchEZ_HOME/conf/nutch-site.xml << EOF
     92  cd $NutchEZ_HOME/conf/
     93  cat > nutch-site.xml << EOF
    9094<configuration>
    9195<property>
     
    116120
    117121set_crawl-urlfilter () {
    118   echo "set NutchEZ_HOME/conf/set_crawl-urlfilter.txt"
    119   Line_NO=`cat '$NutchEZ'/conf/crawl-urlfilter.txt | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'`
    120   sed -i ''$((Line_NO+1))'d' $NutchEZ/conf/crawl-urlfilter.txt
    121   sed -i ''$Line_NO'a -[*!@]' $NutchEZ/conf/crawl-urlfilter.txt
    122   Line_NO=`cat crawl-urlfilter.txt | grep -n 'accept hosts in MY.DOMAIN.NAME' | sed 's/:.*//g'`
    123   sed -i ''$((Line_NO+1))'d' $NutchEZ/conf/crawl-urlfilter.txt
    124   sed -i ''$Line_NO'a +^http://([a-z0-9]*\.)*.*/' $NutchEZ/conf/crawl-urlfilter.txt
     122  echo "set $NutchEZ_HOME/conf/set_crawl-urlfilter.txt"
     123  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip file:, ftp:, & mailto: urls' | sed 's/:.*//g'`
     124  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
     125  sed -i ''$Line_NO'a -^(ftp|mailto):' $NutchEZ_HOME/conf/crawl-urlfilter.txt
     126
     127
     128  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip image and other suffixes we can' | sed 's/:.*//g'`
     129  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
     130  sed -i ''$Line_NO'a -\\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|sit|eps|wmf|mpg|xls|gz|rpm|tgz|mov|MOV|exe|jpeg|JPEG|bmp|BMP)$' $NutchEZ_HOME/conf/crawl-urlfilter.txt
     131
     132
     133  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'`
     134  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
     135  sed -i ''$Line_NO'a -[*!@]' $NutchEZ_HOME/conf/crawl-urlfilter.txt
     136
     137
     138  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip everything else' | sed 's/:.*//g'`
     139  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
     140  sed -i ''$Line_NO'a +.*' $NutchEZ_HOME/conf/crawl-urlfilter.txt
     141  sed -i ''$Line_NO'a # accecpt anything else' $NutchEZ_HOME/conf/crawl-urlfilter.txt
    125142}
    126143
     
    130147}
    131148
    132 
    133149start_up_NutchEZ (){
    134150  echo "start up NutchEZ..."
     
    138154# install tomcat
    139155Install_Tomcat () {
    140   tar zxvf $Install_source/apache-tomcat-6.0.18.tar.gz $NutchEZ_HOME
    141   mv $NutchEZ_HOME/apache-tomcat-6.0.18 $NutchEZ_HOME/tomcat
     156  cd /opt/
     157#  tar zxf apache-tomcat-6.0.18.tar.gz
     158  tar zxf apache-tomcat-6.0.18.tar.gz
     159  mv apache-tomcat-6.0.18 $NutchEZ_HOME
     160  cd $NutchEZ_HOME
     161  mv  apache-tomcat-6.0.18 tomcat
    142162  chown -R nutchuser:nutchuser $NutchEZ_HOME
    143163  mkdir $NutchEZ_HOME/web
     
    150170}
    151171
     172
    152173set_server () {
    153174  echo "$NutchEZ_HOME/tomcat/conf/server.xml"
    154   Line_NO=`cat '$NutchEZ'/tomcat/conf/server.xml | grep -n '<Connector port="8080" protocol="HTTP/1.1"' | sed 's/:.*//g'`
    155   sed -i ''$Line_NO','$(($Line_NO+2))'d' server.xml
    156   sed -i ''$Line_NO'a <Connector port="8080" protocol="HTTP/1.1"\
     175  Line_NO=`cat $NutchEZ_HOME'/tomcat/conf/server.xml' | grep -n '<!-- A "Connector" using the shared thread pool-->' | sed 's/:.*//g'`
     176
     177  sed -i ''$((Line_NO+1))','$((Line_NO+6))'d' $NutchEZ_HOME/tomcat/conf/server.xml
     178  sed -i ''$Line_NO'a    <Connector port="8080" protocol="HTTP/1.1"\
    157179               connectionTimeout="20000"\
    158                redirectPort="8443" URIEncoding="UTF-8"/>\
    159 ' $NutchEZ/tomcat/conf/server.xml
    160 }
    161 
     180               redirectPort="8443" URIEncoding="UTF-8"\
     181               useBodyEncodingForURI="true" />\
     182' $NutchEZ_HOME/tomcat/conf/server.xml
     183}
    162184
    163185set_nutch-site () {
     
    165187 
    166188  # 搜尋加入設定的行號位址
    167   line_NO=`cat '$NutchEZ'/conf/nutch-site.xml | grep -n '<'configuration'>' | sed 's/:.*//g'`
     189  line_NO=`cat $NutchEZ_HOME'/conf/nutch-site.xml' | grep -n '<'configuration'>' | sed 's/:.*//g'`
    168190 
    169191  # 加入設定檔
     
    189211  </description>\
    190212</property>\
    191 ' $NutchEZ/conf/nutch-site.xml
     213' $NutchEZ_HOME/conf/nutch-site.xml
    192214}
    193215
Note: See TracChangeset for help on using the changeset viewer.