Changeset 69


Ignore:
Timestamp:
May 27, 2009, 5:01:46 PM (15 years ago)
Author:
waue
Message:

script ok version

Location:
nutchez-0.1
Files:
2 deleted
7 edited

Legend:

Unmodified
Added
Removed
  • nutchez-0.1/bin/nutchez

    r68 r69  
    44# Description: Eazily use for Nutch
    55# .
    6 export NUTCH_CONF_DIR=~/.nutchez
    7 NUTCHEZ_SCRIPT_PATH="${NUTCHEZ_SCRIPT_PATH:-/opt/nutch}"
    86
    9 . ./nutchez-func.sh
     7# begining ..
     8bin=`dirname "$0"`
     9bin=`cd "$bin"; pwd`
    1010
    11 
    12 # root ?
    13 #check_if_root
    14 
    15 # show url lists
    16 
    17 setup_nutchez
     11. "$bin"/nutchez-func.sh
     12init_nutchez
    1813
    1914CHECK=0
  • nutchez-0.1/bin/nutchez-func.sh

    r68 r69  
    55# .
    66
    7 . /etc/nutch/hadoop-env.sh || . $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh
    87
    98: ${DIALOG=dialog}
    109
    11 # display more for debug
    12 VERB=0
     10# set 1 to display more for debug,
     11VERB=1
     12
     13init_nutchez () {
     14  if ! [ -e ~/.nutchez ] ;then
     15    # copy from /etc/nutch
     16    mkdir ~/.nutchez
     17    cp -rf /etc/nutch/* ~/.nutchez
     18    mkdir ~/.nutchez/log
     19    chown -R $LOGNAME:$LOGNAME ~/.nutchez
     20  fi
     21  export NUTCH_CONF_DIR=~/.nutchez
     22  export HADOOP_CONF_DIR=~/.nutchez
     23  export HADOOP_LOG_DIR=~/.nutchez/log
     24  . ~/.nutchez/hadoop-env.sh || . /etc/nutch/hadoop-env.sh
     25}
    1326
    1427echo_vb () {
     
    3851  echo_vb "7. chang tmp as txt"
    3952  rm ~/.nutchez/sav/n.*.txt
    40   mv /tmp/n.url.tmp ~/.nutchez/sav/
    41   mv /tmp/n.robot.tmp ~/.nutchez/sav/
    42   mv /tmp/n.crawler.tmp ~/.nutchez/sav/
    43   mv /tmp/n.tomcat.tmp ~/.nutchez/sav/
     53  mv /tmp/n.urls.tmp ~/.nutchez/sav/n.urls.txt
     54  mv /tmp/n.robot.tmp ~/.nutchez/sav/n.robot.txt
     55  mv /tmp/n.crawler.tmp ~/.nutchez/sav/n.crawler.txt
     56  mv /tmp/n.tomcat.tmp ~/.nutchez/sav/n.tomcat.txt
    4457}
    4558
     
    4760  echo_vb "7. delete tmp"
    4861  rm /tmp/n.*.tmp
    49 }
    50 
    51 init_nutchez () {
    52   if ! [ -e ~/.nutchez ] ;then
    53     # copy from /etc/nutch
    54     cp -rf /etc/nutch/* ~/.nutchez
    55     chown -R $LOGNAME:$LOGNAME ~/.nutchez
    56   fi
    5762}
    5863
     
    7176  if [ -e ~/.nutchez/nutch-site.xml ] ; then
    7277    # set nutch-site.xml
    73     sed -ie "s/>user</>$ROBOT</" ~/.nutchez/nutch-site.xml
     78    sed -i -e "4s/<value>[a-zA-Z0-9]*</<value>$ROBOT</" ~/.nutchez/nutch-site.xml
    7479  fi
    7580
     
    8691    fi
    8792    # change explorer port
    88     sed -ie "s/8080/>$PORT</" ~/.nutchez/tomcat/conf/server.xml
     93    sed -i -e "s/<Connector port=\"[0-9]*\"/<Connector port=\"$PORT\"/" ~/.nutchez/tomcat/conf/server.xml
     94  fi
    8995}
    9096
     
    9399  # show urls : ok =0 ,cancel = 1
    94100  echo_vb "2. show_urls !"
    95   test_file  ~/.nutchez/sav/n.url.txt
     101  test_file  ~/.nutchez/sav/n.urls.txt
    96102  echo_vb "2.1 test_file ~/.nutchez/sav return : $?"
    97103  # dialog begin
    98   dialog  --editbox ~/.nutchez/sav/n.url.txt 16 51 2>/tmp/n.url.tmp
     104  dialog  --editbox ~/.nutchez/sav/n.urls.txt 16 51 2>/tmp/n.urls.tmp
    99105  RET=$?
    100   echo_vb "2.1 cat url: `cat /tmp/n.url.tmp`"
     106  echo_vb "2.1 cat url: `cat /tmp/n.urls.tmp`"
    101107  return $RET
    102108}
     
    126132final_confirm () {
    127133  echo_vb "6. final_confirm : start =0 , back =1 "
    128   tempfile=`tempfile 2>/dev/null` || tempfile=/tmp/n.finalcheck.tmp
    129   #trap "rm -f $tempfile" 0 1 2 5 15
     134  tempfile=/tmp/n.finalcheck.tmp
    130135
    131136  echo " \n 1. The url list is : \n " > $tempfile
    132   cat /tmp/n.url.tmp >> $tempfile
     137  cat /tmp/n.urls.tmp >> $tempfile
    133138  echo " \n 2. The robot name is : \n" >> $tempfile
    134139  cat /tmp/n.robot.tmp >> $tempfile
     
    142147  #read READ
    143148  $DIALOG --title "Check It !!" --clear \
    144         --yesno "$MSG" 16 51
     149        --yesno "$MSG" 26 51
    145150  RET=$?
    146151  echo_vb "final return = $RET"
     
    152157start_crawl () {
    153158
    154   ROBOT=`cat ~/.nutchez/sav`
    155   URLS=`cat ~/.nutchez/sav`
    156   DEPTH=`cat ~/.nutchez/sav`
    157   PORT=`cat ~/.nutchez/sav`
     159  ROBOT=`cat ~/.nutchez/sav/n.robot.txt`
     160  URLS=`cat ~/.nutchez/sav/n.urls.txt`
     161  DEPTH=`cat ~/.nutchez/sav/n.crawler.txt`
     162  PORT=`cat ~/.nutchez/sav/n.tomcat.txt`
    158163
    159164  echo_vb "7. start_crawl"
    160   setup_nutch
     165  setup_nutchez
    161166  install_tomcat
    162   # /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH
    163   echo "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH"
     167  echo_vb "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH"
     168  echo_vb "$NUTCH_CONF_DIR"
     169  /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH
    164170}
    165171
    166172start_tomcat () {
    167173  echo_vb "8. start_tomcat "
    168   /opt/nutch/tomcat/bin/startup.sh
     174  echo_vb "/opt/nutch/tomcat/bin/startup.sh"
     175  if [ -e /tmp/search ]
     176    rm -rf /tmp/search
     177  fi
     178  ln -sf ~/.nutchez/search/ /tmp/
     179  ~/.nutchez/tomcat/bin/shutdown.sh
     180  ~/.nutchez/tomcat/bin/startup.sh
    169181}
    170182
     
    173185  FIREFOX=`which firefox`
    174186  RET=$?
    175   if [ RET == 0 ];then
     187  if [ $RET == 0 ];then
    176188    $FIREFOX -D 0.0 http://localhost:$PORT
    177   else
    178     $DIALOG --msgbox "Congratulations! \n you can explore the url: \n  http://localhost:8080" 0 0
     189    RET=$?
     190  fi
     191  if ! [ $RET == 0 ];then
     192    $DIALOG --msgbox "Congratulations! \n you can explore the url: \n  http://localhost:$PORT" 0 0
    179193  fi
    180194}
  • nutchez-0.1/debian/nutchez.postinst

    r68 r69  
    99
    1010ln -sf /opt/nutch/bin/nutchez /usr/local/sbin/
     11ln -sf /opt/nutch/bin/nutchez-func.sh /usr/local/sbin/
    1112
    1213setup_hdfsadm_user() {
  • nutchez-0.1/debian/nutchez.postrm

    r68 r69  
    33echo "$1"
    44
    5 if [ "$1" != remove ]
    6 then
     5if [ "$1" != remove ]; then
    76  exit 0
    87fi
    98
    10 rm -rf /tmp/hsperfdata*
    119if [ -e HOME/.nutchez ]; then
    1210  rm -rf $HOME/.nutchez
    1311fi
    14 rm /usr/local/sbin/nutchez*
    1512
     13rm -f /usr/local/sbin/nutchez*
     14
     15rm -rf /tmp/search
     16
  • nutchez-0.1/debian/nutchez.prerm

    r66 r69  
    11#!/bin/sh
    22
    3 /opt/nutch/bin/stop-all.sh
     3#/opt/nutch/bin/stop-all.sh
    44#su -c /opt/nutch/bin/stop-all.sh hdfsadm -
  • nutchez-0.1/tomcat/conf/server.xml

    r66 r69  
    2020     Documentation at /docs/config/server.html
    2121 -->
    22 <Server port="8005" shutdown="SHUTDOWN">
     22<Server port="8083" shutdown="SHUTDOWN">
    2323
    2424  <!--APR library loader. Documentation at /docs/apr.html -->
     
    6565         Define a non-SSL HTTP/1.1 Connector on port 8080
    6666    -->
    67     <Connector port="8080" protocol="HTTP/1.1"
     67    <Connector port="8083" protocol="HTTP/1.1"
    6868               connectionTimeout="20000"
    6969               redirectPort="8443" URIEncoding="UTF-8"
     
    7272    <!--
    7373    <Connector executor="tomcatThreadPool"
    74                port="8080" protocol="HTTP/1.1"
     74               port="8083" protocol="HTTP/1.1"
    7575               connectionTimeout="20000"
    7676               redirectPort="8443" />
     
    8181         described in the APR documentation -->
    8282    <!--
    83     <Connector port="8443" protocol="HTTP/1.1" SSLEnabled="true"
     83    <Connector port="8083" protocol="HTTP/1.1" SSLEnabled="true"
    8484               maxThreads="150" scheme="https" secure="true"
    8585               clientAuth="false" sslProtocol="TLS" URIEncoding="UTF-8"/>
     
    8787
    8888    <!-- Define an AJP 1.3 Connector on port 8009 -->
    89     <Connector port="8009" protocol="AJP/1.3" redirectPort="8443" />
     89    <Connector port="8083" protocol="AJP/1.3" redirectPort="8443" />
    9090
    9191
  • nutchez-0.1/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml

    r68 r69  
    66    <property>
    77        <name>searcher.dir</name>
    8         <value>../../../../../search</value>
     8        <value>/tmp/search</value>
    99    </property>
    1010</configuration>
Note: See TracChangeset for help on using the changeset viewer.