- nutchez
#!/bin/bash # Author: WeiYu Chen <waue _at_ nchc org tw> # License: GPL # Description: Eazily use for Nutch # . NUTCHEZ_SCRIPT_PATH="${NUTCHEZ_SCRIPT_PATH:-/opt/nutch}" . ./nutchez-func.sh # root ? #check_if_root # show url lists CHECK=0 while [ $CHECK -eq 0 ]; do LOCK=1 while [ $LOCK -eq 1 ]; do # milestone M1 show_urls URL=$? echo_vb "$URL" # add or delete url: ok , exit # ok =0 , exit =1 if [ $URL -eq 0 ];then # go_to_M2 LOCK=0 else #return_console exit fi done # milestone M2 # setup search engine personality # next, back setup_robot # setup the crawl paramater # next, back setup_crawler # setup the tomcat paramater # next, back setup_tomcat # show the final checklist final_confirm FC=$? echo_vb $FC # START , back, cancel # start =0 , back =1 if [ $FC -eq 0 ];then #go_to_M3 CHECK=1 else #go_to_M1 LOCK=1 CHECK=0 #return_console fi done start_crawl start_tomcat # show result message show_report # Done
- nutchez-func.sh
#!/bin/bash # Author: WeiYu Chen <waue _at_ nchc org tw> # License: GPL # Description: Eazily use for Nutch # . . $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh VERB=1 #DIALOG=dialog #NEZ_DIR=/home/waue/.nutchez #NEZ_DIR_URL #NEZ_DIR_OTHER echo_vb () { if [ $VERB -eq 1 ]; then echo $1 fi } test_file () { if ! test -e $1 ; then echo "" > $1 fi } check_if_root() { if [ ! "$UID" = "0" ]; then echo_vb "[$LOGNAME] You need to run this script \"`basename $0`\" as root." exit 1 fi } show_urls (){ echo_vb "show urls : ok =0 ,cancel = 1" test_file ~/n.url.txt # dialog begin dialog --editbox ~/n.url.txt 30 50 2> ~/n.url.tmp return $? } setup_robot () { test_file ~/n.robot.txt echo_vb "setup_robot" # dialog dialog --inputbox "this agent name \n ex: nutch" 0 0 2> ~/n.robot.tmp } setup_crawler () { echo_vb "setup_crawler" test_file ~/n.crawler.txt dialog --inputbox "Depth \n ex: 5" 0 0 2> ~/n.crawler.tmp } setup_tomcat () { echo_vb "setup_tomcat" test_file ~/n.tomcat.txt dialog --inputbox "explorer port \n ex:8080 " 0 0 2> ~/n.tomcat.tmp } final_confirm () { echo_vb "final_confirm : start =0 , back =1 " MSG="urls = \n `cat ~/n.url.txt` \n robot name = \n `~/n.robot.tmp` \n depth = \n `~/n.crawler.tmp` \n explorer port = \n `cat ~/n.tomcat.tmp` \n" #read READ dialog --msgbox "$MSG" 0 0 return $READ } start_crawl () { echo_vb "start_crawl" } start_tomcat () { echo_vb "start_tomcat " } show_report () { echo_vb "show_report " }
Last modified 16 years ago
Last modified on May 15, 2009, 4:40:33 PM