source: nutchez-0.1/bin/nutchez-func.sh @ 68

Last change on this file since 68 was 68, checked in by waue, 15 years ago

big modification

  • Property svn:executable set to *
File size: 4.4 KB
Line 
1#!/bin/bash
2# Author: WeiYu Chen <waue _at_ nchc org tw>
3# License: GPL
4# Description: Eazily use for Nutch
5# .
6
7. /etc/nutch/hadoop-env.sh || . $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh
8
9: ${DIALOG=dialog}
10
11# display more for debug
12VERB=0
13
14echo_vb () {
15  if [ $VERB -eq 1 ]; then
16    $DIALOG --msgbox "$1" 16 51
17  fi
18}
19
20test_file () {
21  if ! test -e "$1" ; then
22    echo_vb "test_file: \n can not find $1"
23    echo "" >  $1
24  else
25     echo_vb "test_file: \n Touch  $1 ! \n Its content is \n `cat $1`"
26  fi
27}
28
29check_if_root() {
30   if [ ! "$UID" = "0" ]; then
31     echo_vb "Hi [$LOGNAME] !! "
32     echo_vb "You need to run this script \"`basename $0`\" as root."
33     exit 1
34   fi
35}
36
37promote_tempfile () {
38  echo_vb "7. chang tmp as txt"
39  rm ~/.nutchez/sav/n.*.txt
40  mv /tmp/n.url.tmp ~/.nutchez/sav/
41  mv /tmp/n.robot.tmp ~/.nutchez/sav/
42  mv /tmp/n.crawler.tmp ~/.nutchez/sav/
43  mv /tmp/n.tomcat.tmp ~/.nutchez/sav/
44}
45
46clean_tempfile () {
47  echo_vb "7. delete tmp"
48  rm /tmp/n.*.tmp
49}
50
51init_nutchez () {
52  if ! [ -e ~/.nutchez ] ;then
53    # copy from /etc/nutch
54    cp -rf /etc/nutch/* ~/.nutchez
55    chown -R $LOGNAME:$LOGNAME ~/.nutchez
56  fi
57}
58
59setup_nutchez () {   
60  if ! [ -e ~/.nutchez/urls ] ; then
61    # make url list dir
62    mkdir ~/.nutchez/urls
63  fi
64
65  if  [ -e ~/.nutchez/urls/urls.txt ] ; then
66    rm ~/.nutchez/urls/urls.txt
67  fi
68
69  cp ~/.nutchez/sav/n.urls.txt ~/.nutchez/urls/urls.txt
70
71  if [ -e ~/.nutchez/nutch-site.xml ] ; then
72    # set nutch-site.xml
73    sed -ie "s/>user</>$ROBOT</" ~/.nutchez/nutch-site.xml
74  fi
75
76}
77
78install_tomcat (){
79  if ! [ -e ~/.nutchez/tomcat ] ;then
80    # isntall tomcat to home
81    cp -rf /opt/nutch/tomcat ~/.nutchez/
82    chown -R $LOGNAME:$LOGNAME ~/.nutchez/tomcat/
83    # make search dir
84    if ! [ -e ~/.nutchez/search ] ;then
85      mkdir ~/.nutchez/search
86    fi
87    # change explorer port
88    sed -ie "s/8080/>$PORT</" ~/.nutchez/tomcat/conf/server.xml
89}
90
91
92show_urls (){
93  # show urls : ok =0 ,cancel = 1
94  echo_vb "2. show_urls !"
95  test_file  ~/.nutchez/sav/n.url.txt
96  echo_vb "2.1 test_file ~/.nutchez/sav return : $?"
97  # dialog begin
98  dialog  --editbox ~/.nutchez/sav/n.url.txt 16 51 2>/tmp/n.url.tmp
99  RET=$?
100  echo_vb "2.1 cat url: `cat /tmp/n.url.tmp`"
101  return $RET
102}
103
104setup_robot () {
105  test_file ~/.nutchez/sav/n.robot.txt
106  echo_vb "3. setup_robot"
107  # dialog
108  dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat ~/.nutchez/sav/n.robot.txt`" 2>/tmp/n.robot.tmp
109  echo_vb "3.1 cat robot : `cat /tmp/n.robot.tmp`"
110}
111
112setup_crawler () {
113  echo_vb "4. setup_crawler"
114  test_file ~/.nutchez/sav/n.crawler.txt
115  dialog --nocancel --inputbox " Depth  \n " 16 51 "`cat ~/.nutchez/sav/n.crawler.txt`" 2>/tmp/n.crawler.tmp
116  echo_vb "4.1 cat robot : `cat /tmp/n.robot.tmp`"
117}
118
119setup_tomcat () {
120  echo_vb "5. setup_tomcat"
121  test_file ~/.nutchez/sav/n.tomcat.txt
122  dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat ~/.nutchez/sav/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp
123  echo_vb "5.1 cat tomcat : `cat /tmp/n.tomcat.tmp`"
124}
125
126final_confirm () {
127  echo_vb "6. final_confirm : start =0 , back =1 "
128  tempfile=`tempfile 2>/dev/null` || tempfile=/tmp/n.finalcheck.tmp
129  #trap "rm -f $tempfile" 0 1 2 5 15
130
131  echo " \n 1. The url list is : \n " > $tempfile
132  cat /tmp/n.url.tmp >> $tempfile
133  echo " \n 2. The robot name is : \n" >> $tempfile
134  cat /tmp/n.robot.tmp >> $tempfile
135  echo " \n 3. The crawled depth is : \n " >> $tempfile
136  cat /tmp/n.crawler.tmp >> $tempfile
137  echo " \n 4. The explorer port is : \n " >> $tempfile
138  cat /tmp/n.tomcat.tmp >> $tempfile
139
140  MSG=`cat $tempfile`
141  echo_vb "6.1 final message :\n $MSG"
142  #read READ
143  $DIALOG --title "Check It !!" --clear \
144        --yesno "$MSG" 16 51
145  RET=$?
146  echo_vb "final return = $RET"
147  return $RET
148}
149
150# define paramaters
151
152start_crawl () {
153
154  ROBOT=`cat ~/.nutchez/sav`
155  URLS=`cat ~/.nutchez/sav`
156  DEPTH=`cat ~/.nutchez/sav`
157  PORT=`cat ~/.nutchez/sav`
158
159  echo_vb "7. start_crawl"
160  setup_nutch
161  install_tomcat
162  # /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH
163  echo "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH"
164}
165
166start_tomcat () {
167  echo_vb "8. start_tomcat "
168  /opt/nutch/tomcat/bin/startup.sh
169}
170
171show_report () {
172  echo_vb "9. show_report "
173  FIREFOX=`which firefox`
174  RET=$?
175  if [ RET == 0 ];then 
176    $FIREFOX -D 0.0 http://localhost:$PORT
177  else
178    $DIALOG --msgbox "Congratulations! \n you can explore the url: \n  http://localhost:8080" 0 0 
179  fi
180}
Note: See TracBrowser for help on using the repository browser.