source: nutchez-0.1/bin/nutchez-func.sh @ 67

Last change on this file since 67 was 67, checked in by waue, 15 years ago

v2

  • Property svn:executable set to *
File size: 3.3 KB
Line 
1#!/bin/bash
2# Author: WeiYu Chen <waue _at_ nchc org tw>
3# License: GPL
4# Description: Eazily use for Nutch
5# .
6
7. /etc/nutch/hadoop-env.sh || . $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh
8
9: ${DIALOG=dialog}
10
11VERB=0
12
13
14echo_vb () {
15  if [ $VERB -eq 1 ]; then
16    $DIALOG --msgbox "$1" 16 51
17  fi
18}
19
20test_file () {
21  if ! test -e "$1" ; then
22    echo_vb "test_file: \n can not find $1"
23    echo "" >  $1
24  else
25     echo_vb "test_file: \n Touch  $1 ! \n Its content is \n `cat $1`"
26  fi
27}
28
29check_if_root() {
30   if [ ! "$UID" = "0" ]; then
31     echo_vb "Hi [$LOGNAME] !! "
32     echo_vb "You need to run this script \"`basename $0`\" as root."
33     exit 1
34   fi
35}
36
37promote_tempfile () {
38  echo_vb "7. chang tmp as txt"
39  rm /tmp/n.*.txt
40  mv /tmp/n.url.tmp /tmp/n.url.txt
41  mv /tmp/n.robot.tmp /tmp/n.robot.txt
42  mv /tmp/n.crawler.tmp /tmp/n.crawler.txt
43  mv /tmp/n.tomcat.tmp /tmp/n.tomcat.txt
44}
45
46clean_tempfile () {
47  echo_vb "7. delete tmp"
48  rm /tmp/n.*.tmp
49}
50
51setup_nutchez() {
52  if ! [ -e ~/.nutchez ] ;then
53    cp -rf /etc/nutch/.nutchez ~/
54    chown -R $LOGNAME:$LOGNAME ~/.nutchez
55    if [ -e /tmp/nutch ] ;then
56      rm -rf /tmp/nutch
57    fi
58    mkdir /tmp/nutch
59    ln -sf ~/.nutchez/search /tmp/nutch/
60
61  fi
62}
63
64show_urls (){
65  # show urls : ok =0 ,cancel = 1
66  echo_vb "2. show_urls !"
67  test_file  /tmp/n.url.txt
68  echo_vb "2.1 test_file /tmp/n.url.txt return : $?"
69  # dialog begin
70  dialog  --editbox /tmp/n.url.txt 16 51 2>/tmp/n.url.tmp
71  RET=$?
72  echo_vb "2.1 cat url: `cat /tmp/n.url.tmp`"
73  return $RET
74}
75
76setup_robot () {
77  test_file /tmp/n.robot.txt
78  echo_vb "3. setup_robot"
79  # dialog
80  dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat /tmp/n.robot.txt`" 2>/tmp/n.robot.tmp
81  echo_vb "3.1 cat robot : `cat /tmp/n.robot.tmp`"
82}
83
84setup_crawler () {
85  echo_vb "4. setup_crawler"
86  test_file /tmp/n.crawler.txt
87  dialog --nocancel --inputbox " Depth  \n " 16 51 "`cat /tmp/n.crawler.txt`" 2>/tmp/n.crawler.tmp
88  echo_vb "4.1 cat robot : `cat /tmp/n.robot.tmp`"
89}
90
91setup_tomcat () {
92  echo_vb "5. setup_tomcat"
93  test_file /tmp/n.tomcat.txt
94  dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat /tmp/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp
95  echo_vb "5.1 cat tomcat : `cat /tmp/n.tomcat.tmp`"
96}
97
98final_confirm () {
99  echo_vb "6. final_confirm : start =0 , back =1 "
100  tempfile=`tempfile 2>/dev/null` || tempfile=/tmp/n.finalcheck.tmp
101  #trap "rm -f $tempfile" 0 1 2 5 15
102
103  echo " \n 1. The url list is : \n " > $tempfile
104  cat /tmp/n.url.tmp >> $tempfile
105  echo " \n 2. The robot name is : \n" >> $tempfile
106  cat /tmp/n.robot.tmp >> $tempfile
107  echo " \n 3. The crawled depth is : \n " >> $tempfile
108  cat /tmp/n.crawler.tmp >> $tempfile
109  echo " \n 4. The explorer port is : \n " >> $tempfile
110  cat /tmp/n.tomcat.tmp >> $tempfile
111
112  MSG=`cat $tempfile`
113  echo_vb "6.1 final message :\n $MSG"
114  #read READ
115  $DIALOG --title "Check It !!" --clear \
116        --yesno "$MSG" 16 51
117  RET=$?
118  echo_vb "final return = $RET"
119  return $RET
120}
121
122# define paramaters
123ROBOT=`cat /tmp/n.url.txt`
124URLS=`cat /tmp/n.url.txt`
125DEPTH=`cat /tmp/n.url.txt`
126PORT=`cat /tmp/n.url.txt`
127
128start_crawl () {
129  echo_vb "7. start_crawl"
130  setup_nutch
131  /opt/nutch/bin/nutch crawl ~/.nutch/urls -dir ~/.nutch/search -threads 2 -depth $DEPTH 
132}
133
134start_tomcat () {
135  echo_vb "8. start_tomcat "
136  /opt/nutch/tomcat/bin/startup.sh
137}
138
139show_report () {
140  echo_vb "9. show_report "
141}
Note: See TracBrowser for help on using the repository browser.