source: nutchez-0.1/bin/nutchez-func.sh @ 77

Last change on this file since 77 was 77, checked in by waue, 15 years ago

good

  • Property svn:executable set to *
File size: 5.0 KB
Line 
1#!/bin/bash
2# Author: WeiYu Chen <waue _at_ nchc org tw>
3# License: GPL
4# Description: Eazily use for Nutch
5# .
6
7
8: ${DIALOG=dialog}
9
10# set 1 to display more for debug,
11VERB=0
12
13init_nutchez () {
14  if ! [ -e ~/.nutchez ] ;then
15    # copy from /etc/nutch
16    mkdir ~/.nutchez
17    cp -rf /etc/nutch/* ~/.nutchez
18    mkdir ~/.nutchez/log
19    chown -R $LOGNAME:$LOGNAME ~/.nutchez
20  fi
21#  export NUTCH_CONF_DIR=~/.nutchez
22#  export HADOOP_CONF_DIR=~/.nutchez
23#  export HADOOP_LOG_DIR=~/.nutchez/log
24#  . ~/.nutchez/hadoop-env.sh || . /etc/nutch/hadoop-env.sh
25}
26
27echo_vb () {
28  if [ $VERB -eq 1 ]; then
29    $DIALOG --msgbox "$1" 16 51
30  fi
31}
32
33test_file () {
34  if ! test -e "$1" ; then
35    echo_vb "test_file: \n can not find $1"
36    echo "" >  $1
37  else
38     echo_vb "test_file: \n Touch  $1 ! \n Its content is \n `cat $1`"
39  fi
40}
41
42check_if_root() {
43   if [ ! "$UID" = "0" ]; then
44     echo_vb "Hi [$LOGNAME] !! "
45     echo_vb "You need to run this script \"`basename $0`\" as root."
46     exit 1
47   fi
48}
49
50promote_tempfile () {
51  echo_vb "7. chang tmp as txt"
52  rm ~/.nutchez/sav/n.*.txt
53  mv /tmp/n.urls.tmp ~/.nutchez/sav/n.urls.txt
54  mv /tmp/n.robot.tmp ~/.nutchez/sav/n.robot.txt
55  mv /tmp/n.crawler.tmp ~/.nutchez/sav/n.crawler.txt
56  mv /tmp/n.tomcat.tmp ~/.nutchez/sav/n.tomcat.txt
57}
58
59clean_tempfile () {
60  echo_vb "7. delete tmp"
61  rm /tmp/n.*.tmp
62}
63
64setup_nutchez () {   
65  if ! [ -e ~/.nutchez/urls ] ; then
66    # make url list dir
67    mkdir ~/.nutchez/urls
68  fi
69
70  if  [ -e ~/.nutchez/urls/urls.txt ] ; then
71    rm ~/.nutchez/urls/urls.txt
72  fi
73
74  cp ~/.nutchez/sav/n.urls.txt ~/.nutchez/urls/urls.txt
75
76  if [ -e ~/.nutchez/nutch-site.xml ] ; then
77    # set nutch-site.xml
78    sed -i -e "4s/<value>[a-zA-Z0-9]*</<value>$ROBOT</" ~/.nutchez/nutch-site.xml
79  fi
80
81}
82
83install_tomcat (){
84  if ! [ -e ~/.nutchez/tomcat ] ;then
85    # isntall tomcat to home
86    cp -rf /opt/nutch/tomcat ~/.nutchez/
87    chown -R $LOGNAME:$LOGNAME ~/.nutchez/tomcat/
88    # make search dir
89    if ! [ -e ~/.nutchez/search ] ;then
90      mkdir ~/.nutchez/search
91    fi
92  fi
93  # change explorer port
94  sed -i -e "67s/<Connector port=\"[0-9]*\"/<Connector port=\"$PORT\"/" ~/.nutchez/tomcat/conf/server.xml
95}
96
97
98show_urls (){
99  # show urls : ok =0 ,cancel = 1
100  echo_vb "2. show_urls !"
101  test_file  ~/.nutchez/sav/n.urls.txt
102  echo_vb "2.1 test_file ~/.nutchez/sav return : $?"
103  # dialog begin
104  dialog --title "The URLS that you want" --editbox ~/.nutchez/sav/n.urls.txt 16 51 2>/tmp/n.urls.tmp
105  RET=$?
106  echo_vb "2.1 cat url: `cat /tmp/n.urls.tmp`"
107  return $RET
108}
109
110setup_robot () {
111  test_file ~/.nutchez/sav/n.robot.txt
112  echo_vb "3. setup_robot"
113  # dialog
114  dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat ~/.nutchez/sav/n.robot.txt`" 2>/tmp/n.robot.tmp
115  echo_vb "3.1 cat robot : `cat /tmp/n.robot.tmp`"
116}
117
118setup_crawler () {
119  echo_vb "4. setup_crawler"
120  test_file ~/.nutchez/sav/n.crawler.txt
121  dialog --nocancel --inputbox " Depth  \n " 16 51 "`cat ~/.nutchez/sav/n.crawler.txt`" 2>/tmp/n.crawler.tmp
122  echo_vb "4.1 cat robot : `cat /tmp/n.robot.tmp`"
123}
124
125setup_tomcat () {
126  echo_vb "5. setup_tomcat"
127  test_file ~/.nutchez/sav/n.tomcat.txt
128  dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat ~/.nutchez/sav/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp
129  echo_vb "5.1 cat tomcat : `cat /tmp/n.tomcat.tmp`"
130}
131
132final_confirm () {
133  echo_vb "6. final_confirm : start =0 , back =1 "
134  tempfile=/tmp/n.finalcheck.tmp
135
136  echo " \n 1. The url list is : \n " > $tempfile
137  cat /tmp/n.urls.tmp >> $tempfile
138  echo " \n 2. The robot name is : \n" >> $tempfile
139  cat /tmp/n.robot.tmp >> $tempfile
140  echo " \n 3. The crawled depth is : \n " >> $tempfile
141  cat /tmp/n.crawler.tmp >> $tempfile
142  echo " \n 4. The explorer port is : \n " >> $tempfile
143  cat /tmp/n.tomcat.tmp >> $tempfile
144
145  MSG=`cat $tempfile`
146  echo_vb "6.1 final message :\n $MSG"
147  #read READ
148  $DIALOG --title "Check It !!" --clear \
149        --extra-button --extra-label "reset" --ok-label "ok" --cancel-label "exit" \
150        --yesno "$MSG" 26 51
151  RET=$?
152  echo_vb "final return = $RET"
153  return $RET
154}
155
156# define paramaters
157
158set_nutchez_p () {
159  ROBOT=`cat ~/.nutchez/sav/n.robot.txt`
160  URLS=`cat ~/.nutchez/sav/n.urls.txt`
161  DEPTH=`cat ~/.nutchez/sav/n.crawler.txt`
162  PORT=`cat ~/.nutchez/sav/n.tomcat.txt`
163}
164start_crawl () {
165
166  echo_vb "7. start_crawl"
167  setup_nutchez
168  install_tomcat
169  echo_vb "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH"
170  echo_vb "nutch conf dir = $NUTCH_CONF_DIR"
171  /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH
172}
173
174start_tomcat () {
175  echo_vb "8. start_tomcat "
176  echo_vb "/opt/nutch/tomcat/bin/startup.sh"
177  if [ -e /tmp/search ];then
178    rm -rf /tmp/search
179  fi
180  ln -sf ~/.nutchez/search/ /tmp/
181  ~/.nutchez/tomcat/bin/shutdown.sh
182  ~/.nutchez/tomcat/bin/startup.sh
183  sleep 3
184}
185
186show_report () {
187  echo_vb "9. show_report "
188  FIREFOX=`which firefox`
189  RET=$?
190  if [ $RET == 0 ];then 
191    $FIREFOX -D 0.0 http://localhost:$PORT
192    RET=$?
193  fi
194  if ! [ $RET == 0 ];then
195    $DIALOG --msgbox "Congratulations! \n you can explore the url: \n  http://localhost:$PORT" 0 0 
196  fi
197}
Note: See TracBrowser for help on using the repository browser.