#!/bin/bash
# Author: WeiYu Chen <waue _at_ nchc org tw>
# License: GPL
# Description: Eazily use for Nutch
# .
NUTCHEZ_SCRIPT_PATH="${NUTCHEZ_SCRIPT_PATH:-/opt/nutch}"
. ./nutchez-func.sh
# root ?
#check_if_root
# show url lists
CHECK=0
while [ $CHECK -eq 0 ]; do
LOCK=1
while [ $LOCK -eq 1 ]; do
# milestone M1
show_urls
URL=$?
echo_vb "$URL"
# add or delete url: ok , exit
# ok =0 , exit =1
if [ $URL -eq 0 ];then
# go_to_M2
LOCK=0
else
#return_console
exit
fi
done
# milestone M2
# setup search engine personality
# next, back
setup_robot
# setup the crawl paramater
# next, back
setup_crawler
# setup the tomcat paramater
# next, back
setup_tomcat
# show the final checklist
final_confirm
FC=$?
echo_vb $FC
# START , back, cancel
# start =0 , back =1
if [ $FC -eq 0 ];then
#go_to_M3
CHECK=1
else
#go_to_M1
LOCK=1
CHECK=0
#return_console
fi
done
start_crawl
start_tomcat
# show result message
show_report
# Done
#!/bin/bash
# Author: WeiYu Chen <waue _at_ nchc org tw>
# License: GPL
# Description: Eazily use for Nutch
# .
. $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh
VERB=1
#DIALOG=dialog
#NEZ_DIR=/home/waue/.nutchez
#NEZ_DIR_URL
#NEZ_DIR_OTHER
echo_vb () {
if [ $VERB -eq 1 ]; then
echo $1
fi
}
test_file () {
if ! test -e $1 ; then
echo "" > $1
fi
}
check_if_root() {
if [ ! "$UID" = "0" ]; then
echo_vb "[$LOGNAME] You need to run this script \"`basename $0`\" as root."
exit 1
fi
}
show_urls (){
echo_vb "show urls : ok =0 ,cancel = 1"
test_file ~/n.url.txt
# dialog begin
dialog --editbox ~/n.url.txt 30 50 2> ~/n.url.tmp
return $?
}
setup_robot () {
test_file ~/n.robot.txt
echo_vb "setup_robot"
# dialog
dialog --inputbox "this agent name \n ex: nutch" 0 0 2> ~/n.robot.tmp
}
setup_crawler () {
echo_vb "setup_crawler"
test_file ~/n.crawler.txt
dialog --inputbox "Depth \n ex: 5" 0 0 2> ~/n.crawler.tmp
}
setup_tomcat () {
echo_vb "setup_tomcat"
test_file ~/n.tomcat.txt
dialog --inputbox "explorer port \n ex:8080 " 0 0 2> ~/n.tomcat.tmp
}
final_confirm () {
echo_vb "final_confirm : start =0 , back =1 "
MSG="urls = \n `cat ~/n.url.txt` \n robot name = \n `~/n.robot.tmp` \n depth = \n `~/n.crawler.tmp` \n explorer port = \n `cat ~/n.tomcat.tmp` \n"
#read READ
dialog --msgbox "$MSG" 0 0
return $READ
}
start_crawl () {
echo_vb "start_crawl"
}
start_tomcat () {
echo_vb "start_tomcat "
}
show_report () {
echo_vb "show_report "
}