source: nutchez-0.1/bin/nutchez-func.sh @ 68

Last change on this file since 68 was 68, checked in by waue, 15 years ago

big modification

  • Property svn:executable set to *
File size: 4.4 KB
RevLine 
[66]1#!/bin/bash
2# Author: WeiYu Chen <waue _at_ nchc org tw>
3# License: GPL
4# Description: Eazily use for Nutch
5# .
6
[67]7. /etc/nutch/hadoop-env.sh || . $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh
[66]8
9: ${DIALOG=dialog}
10
[68]11# display more for debug
[67]12VERB=0
[66]13
14echo_vb () {
15  if [ $VERB -eq 1 ]; then
16    $DIALOG --msgbox "$1" 16 51
17  fi
18}
19
20test_file () {
21  if ! test -e "$1" ; then
22    echo_vb "test_file: \n can not find $1"
23    echo "" >  $1
24  else
25     echo_vb "test_file: \n Touch  $1 ! \n Its content is \n `cat $1`"
26  fi
27}
28
29check_if_root() {
30   if [ ! "$UID" = "0" ]; then
31     echo_vb "Hi [$LOGNAME] !! "
32     echo_vb "You need to run this script \"`basename $0`\" as root."
33     exit 1
34   fi
35}
36
37promote_tempfile () {
38  echo_vb "7. chang tmp as txt"
[68]39  rm ~/.nutchez/sav/n.*.txt
40  mv /tmp/n.url.tmp ~/.nutchez/sav/
41  mv /tmp/n.robot.tmp ~/.nutchez/sav/
42  mv /tmp/n.crawler.tmp ~/.nutchez/sav/
43  mv /tmp/n.tomcat.tmp ~/.nutchez/sav/
[66]44}
45
46clean_tempfile () {
47  echo_vb "7. delete tmp"
48  rm /tmp/n.*.tmp
49}
50
[68]51init_nutchez () {
[67]52  if ! [ -e ~/.nutchez ] ;then
[68]53    # copy from /etc/nutch
54    cp -rf /etc/nutch/* ~/.nutchez
[67]55    chown -R $LOGNAME:$LOGNAME ~/.nutchez
[68]56  fi
57}
[66]58
[68]59setup_nutchez () {   
60  if ! [ -e ~/.nutchez/urls ] ; then
61    # make url list dir
62    mkdir ~/.nutchez/urls
[67]63  fi
[68]64
65  if  [ -e ~/.nutchez/urls/urls.txt ] ; then
66    rm ~/.nutchez/urls/urls.txt
67  fi
68
69  cp ~/.nutchez/sav/n.urls.txt ~/.nutchez/urls/urls.txt
70
71  if [ -e ~/.nutchez/nutch-site.xml ] ; then
72    # set nutch-site.xml
73    sed -ie "s/>user</>$ROBOT</" ~/.nutchez/nutch-site.xml
74  fi
75
[67]76}
[66]77
[68]78install_tomcat (){
79  if ! [ -e ~/.nutchez/tomcat ] ;then
80    # isntall tomcat to home
81    cp -rf /opt/nutch/tomcat ~/.nutchez/
82    chown -R $LOGNAME:$LOGNAME ~/.nutchez/tomcat/
83    # make search dir
84    if ! [ -e ~/.nutchez/search ] ;then
85      mkdir ~/.nutchez/search
86    fi
87    # change explorer port
88    sed -ie "s/8080/>$PORT</" ~/.nutchez/tomcat/conf/server.xml
89}
90
91
[66]92show_urls (){
93  # show urls : ok =0 ,cancel = 1
94  echo_vb "2. show_urls !"
[68]95  test_file  ~/.nutchez/sav/n.url.txt
96  echo_vb "2.1 test_file ~/.nutchez/sav return : $?"
[66]97  # dialog begin
[68]98  dialog  --editbox ~/.nutchez/sav/n.url.txt 16 51 2>/tmp/n.url.tmp
[66]99  RET=$?
100  echo_vb "2.1 cat url: `cat /tmp/n.url.tmp`"
101  return $RET
102}
103
104setup_robot () {
[68]105  test_file ~/.nutchez/sav/n.robot.txt
[66]106  echo_vb "3. setup_robot"
107  # dialog
[68]108  dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat ~/.nutchez/sav/n.robot.txt`" 2>/tmp/n.robot.tmp
[66]109  echo_vb "3.1 cat robot : `cat /tmp/n.robot.tmp`"
110}
111
112setup_crawler () {
113  echo_vb "4. setup_crawler"
[68]114  test_file ~/.nutchez/sav/n.crawler.txt
115  dialog --nocancel --inputbox " Depth  \n " 16 51 "`cat ~/.nutchez/sav/n.crawler.txt`" 2>/tmp/n.crawler.tmp
[66]116  echo_vb "4.1 cat robot : `cat /tmp/n.robot.tmp`"
117}
118
119setup_tomcat () {
120  echo_vb "5. setup_tomcat"
[68]121  test_file ~/.nutchez/sav/n.tomcat.txt
122  dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat ~/.nutchez/sav/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp
[66]123  echo_vb "5.1 cat tomcat : `cat /tmp/n.tomcat.tmp`"
124}
125
126final_confirm () {
127  echo_vb "6. final_confirm : start =0 , back =1 "
128  tempfile=`tempfile 2>/dev/null` || tempfile=/tmp/n.finalcheck.tmp
129  #trap "rm -f $tempfile" 0 1 2 5 15
130
131  echo " \n 1. The url list is : \n " > $tempfile
132  cat /tmp/n.url.tmp >> $tempfile
133  echo " \n 2. The robot name is : \n" >> $tempfile
134  cat /tmp/n.robot.tmp >> $tempfile
[67]135  echo " \n 3. The crawled depth is : \n " >> $tempfile
[66]136  cat /tmp/n.crawler.tmp >> $tempfile
137  echo " \n 4. The explorer port is : \n " >> $tempfile
138  cat /tmp/n.tomcat.tmp >> $tempfile
139
140  MSG=`cat $tempfile`
141  echo_vb "6.1 final message :\n $MSG"
142  #read READ
143  $DIALOG --title "Check It !!" --clear \
144        --yesno "$MSG" 16 51
145  RET=$?
146  echo_vb "final return = $RET"
147  return $RET
148}
149
[67]150# define paramaters
151
[66]152start_crawl () {
[68]153
154  ROBOT=`cat ~/.nutchez/sav`
155  URLS=`cat ~/.nutchez/sav`
156  DEPTH=`cat ~/.nutchez/sav`
157  PORT=`cat ~/.nutchez/sav`
158
[66]159  echo_vb "7. start_crawl"
[67]160  setup_nutch
[68]161  install_tomcat
162  # /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH
163  echo "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH"
[66]164}
165
166start_tomcat () {
167  echo_vb "8. start_tomcat "
[67]168  /opt/nutch/tomcat/bin/startup.sh
[66]169}
170
171show_report () {
172  echo_vb "9. show_report "
[68]173  FIREFOX=`which firefox`
174  RET=$?
175  if [ RET == 0 ];then 
176    $FIREFOX -D 0.0 http://localhost:$PORT
177  else
178    $DIALOG --msgbox "Congratulations! \n you can explore the url: \n  http://localhost:8080" 0 0 
179  fi
[66]180}
Note: See TracBrowser for help on using the repository browser.