source: nutchez-0.1/bin/nutchez-func.sh @ 91

Last change on this file since 91 was 91, checked in by waue, 15 years ago

fix some bugs

  • Property svn:executable set to *
File size: 5.4 KB
RevLine 
[66]1#!/bin/bash
2# Author: WeiYu Chen <waue _at_ nchc org tw>
3# License: GPL
4# Description: Eazily use for Nutch
5# .
6
7
8: ${DIALOG=dialog}
9
[69]10# set 1 to display more for debug,
[72]11VERB=0
[66]12
[69]13init_nutchez () {
14  if ! [ -e ~/.nutchez ] ;then
15    # copy from /etc/nutch
16    mkdir ~/.nutchez
[89]17    cp -rf /etc/nutch/conf ~/.nutchez/
[69]18    mkdir ~/.nutchez/log
[89]19    cp -rf /etc/nutch/sav ~/.nutchez/
[69]20    chown -R $LOGNAME:$LOGNAME ~/.nutchez
21  fi
[91]22 
[69]23}
24
[91]25echo_dialog_v () {
[66]26  if [ $VERB -eq 1 ]; then
27    $DIALOG --msgbox "$1" 16 51
28  fi
29}
30
31test_file () {
32  if ! test -e "$1" ; then
[91]33    echo_dialog_v "test_file: \n can not find $1"
[66]34    echo "" >  $1
35  else
[91]36     echo_dialog_v "test_file: \n Touch  $1 ! \n Its content is \n `cat $1`"
[66]37  fi
38}
39
40check_if_root() {
41   if [ ! "$UID" = "0" ]; then
[91]42     echo_dialog_v "Hi [$LOGNAME] !! "
43     echo_dialog_v "You need to run this script \"`basename $0`\" as root."
[66]44     exit 1
45   fi
46}
47
48promote_tempfile () {
[91]49  echo_dialog_v "7. chang tmp as txt"
[68]50  rm ~/.nutchez/sav/n.*.txt
[69]51  mv /tmp/n.urls.tmp ~/.nutchez/sav/n.urls.txt
52  mv /tmp/n.robot.tmp ~/.nutchez/sav/n.robot.txt
53  mv /tmp/n.crawler.tmp ~/.nutchez/sav/n.crawler.txt
54  mv /tmp/n.tomcat.tmp ~/.nutchez/sav/n.tomcat.txt
[66]55}
56
57clean_tempfile () {
[91]58  echo_dialog_v "7. delete tmp"
[66]59  rm /tmp/n.*.tmp
60}
61
[68]62setup_nutchez () {   
63  if ! [ -e ~/.nutchez/urls ] ; then
64    # make url list dir
65    mkdir ~/.nutchez/urls
[67]66  fi
[68]67
68  if  [ -e ~/.nutchez/urls/urls.txt ] ; then
69    rm ~/.nutchez/urls/urls.txt
70  fi
71
72  cp ~/.nutchez/sav/n.urls.txt ~/.nutchez/urls/urls.txt
73
[91]74  if [ -e ~/.nutchez/conf/nutch-site.xml ] ; then
[68]75    # set nutch-site.xml
[91]76    sed -i -e "4s/<value>[a-zA-Z0-9]*</<value>$ROBOT</" ~/.nutchez/conf/nutch-site.xml
[68]77  fi
78
[67]79}
[66]80
[68]81install_tomcat (){
82  if ! [ -e ~/.nutchez/tomcat ] ;then
83    # isntall tomcat to home
84    cp -rf /opt/nutch/tomcat ~/.nutchez/
85    chown -R $LOGNAME:$LOGNAME ~/.nutchez/tomcat/
86    # make search dir
87    if ! [ -e ~/.nutchez/search ] ;then
88      mkdir ~/.nutchez/search
89    fi
[69]90  fi
[77]91  # change explorer port
92  sed -i -e "67s/<Connector port=\"[0-9]*\"/<Connector port=\"$PORT\"/" ~/.nutchez/tomcat/conf/server.xml
[68]93}
94
95
[66]96show_urls (){
97  # show urls : ok =0 ,cancel = 1
[91]98  echo_dialog_v "2. show_urls !"
[69]99  test_file  ~/.nutchez/sav/n.urls.txt
[91]100  echo_dialog_v "2.1 test_file ~/.nutchez/sav return : $?"
[66]101  # dialog begin
[75]102  dialog --title "The URLS that you want" --editbox ~/.nutchez/sav/n.urls.txt 16 51 2>/tmp/n.urls.tmp
[66]103  RET=$?
[91]104  echo_dialog_v "2.1 cat url: `cat /tmp/n.urls.tmp`"
[66]105  return $RET
106}
107
108setup_robot () {
[68]109  test_file ~/.nutchez/sav/n.robot.txt
[91]110  echo_dialog_v "3. setup_robot"
[66]111  # dialog
[68]112  dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat ~/.nutchez/sav/n.robot.txt`" 2>/tmp/n.robot.tmp
[91]113  echo_dialog_v "3.1 cat robot : `cat /tmp/n.robot.tmp`"
[66]114}
115
116setup_crawler () {
[91]117  echo_dialog_v "4. setup_crawler"
[68]118  test_file ~/.nutchez/sav/n.crawler.txt
119  dialog --nocancel --inputbox " Depth  \n " 16 51 "`cat ~/.nutchez/sav/n.crawler.txt`" 2>/tmp/n.crawler.tmp
[91]120  echo_dialog_v "4.1 cat robot : `cat /tmp/n.robot.tmp`"
[66]121}
122
123setup_tomcat () {
[91]124  echo_dialog_v "5. setup_tomcat"
[68]125  test_file ~/.nutchez/sav/n.tomcat.txt
126  dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat ~/.nutchez/sav/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp
[91]127  echo_dialog_v "5.1 cat tomcat : `cat /tmp/n.tomcat.tmp`"
[66]128}
129
130final_confirm () {
[91]131  echo_dialog_v "6. final_confirm : start =0 , back =1 "
[69]132  tempfile=/tmp/n.finalcheck.tmp
[66]133
134  echo " \n 1. The url list is : \n " > $tempfile
[69]135  cat /tmp/n.urls.tmp >> $tempfile
[66]136  echo " \n 2. The robot name is : \n" >> $tempfile
137  cat /tmp/n.robot.tmp >> $tempfile
[67]138  echo " \n 3. The crawled depth is : \n " >> $tempfile
[66]139  cat /tmp/n.crawler.tmp >> $tempfile
140  echo " \n 4. The explorer port is : \n " >> $tempfile
141  cat /tmp/n.tomcat.tmp >> $tempfile
142
143  MSG=`cat $tempfile`
[91]144  echo_dialog_v "6.1 final message :\n $MSG"
[66]145  #read READ
146  $DIALOG --title "Check It !!" --clear \
[75]147        --extra-button --extra-label "reset" --ok-label "ok" --cancel-label "exit" \
[69]148        --yesno "$MSG" 26 51
[66]149  RET=$?
[91]150  echo_dialog_v "final return = $RET"
[66]151  return $RET
152}
153
[67]154# define paramaters
155
[77]156set_nutchez_p () {
[69]157  ROBOT=`cat ~/.nutchez/sav/n.robot.txt`
158  URLS=`cat ~/.nutchez/sav/n.urls.txt`
159  DEPTH=`cat ~/.nutchez/sav/n.crawler.txt`
160  PORT=`cat ~/.nutchez/sav/n.tomcat.txt`
[77]161}
162start_crawl () {
[68]163
[91]164  echo_dialog_v "7. start_crawl"
[69]165  setup_nutchez
[68]166  install_tomcat
[91]167  echo_dialog_v "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH"
168  echo_dialog_v "nutch conf dir = $NUTCH_CONF_DIR"
[80]169  /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH -topN 5000 -threads 1000
[66]170}
171
172start_tomcat () {
[91]173  echo_dialog_v "8. start_tomcat "
174  echo_dialog_v "/opt/nutch/tomcat/bin/startup.sh"
[72]175  if [ -e /tmp/search ];then
[69]176    rm -rf /tmp/search
177  fi
178  ln -sf ~/.nutchez/search/ /tmp/
[88]179
[91]180  pid_tc=$(ps axw -eo pid,command |\
181 grep "catalina.startup.Bootstrap" |\
182 grep "start" | awk '{print $1}')
[88]183  if [ -z "$pid_tc" ]; then
184      echo "starting tomcat"
185  else
186      echo "tomcat had been started and the pid is $pid_tc"
187      echo "stop it first"
188      kill -9 $pid_tc
189      if [ -z $? ];then
190    echo " tomcat ($pid_tc) is  killed ..."
191      else
192    echo "kill error ..." 
193      fi
194  fi
[69]195  ~/.nutchez/tomcat/bin/startup.sh
[77]196  sleep 3
[66]197}
198
199show_report () {
[91]200  echo_dialog_v "9. show_report "
[68]201  FIREFOX=`which firefox`
202  RET=$?
[69]203  if [ $RET == 0 ];then 
[68]204    $FIREFOX -D 0.0 http://localhost:$PORT
[69]205    RET=$?
[68]206  fi
[69]207  if ! [ $RET == 0 ];then
208    $DIALOG --msgbox "Congratulations! \n you can explore the url: \n  http://localhost:$PORT" 0 0 
209  fi
[66]210}
Note: See TracBrowser for help on using the repository browser.