wiki:waue/2009/0515

Version 2 (modified by waue, 15 years ago) (diff)

--

  • nutchez
#!/bin/bash
# Author: WeiYu Chen <waue _at_ nchc org tw>
# License: GPL
# Description: Eazily use for Nutch
# .

NUTCHEZ_SCRIPT_PATH="${NUTCHEZ_SCRIPT_PATH:-/opt/nutch}"

. ./nutchez-func.sh
# root ?
#check_if_root

# show url lists
CHECK=0
while [ $CHECK -eq 0 ]; do

  LOCK=1
  while [ $LOCK -eq 1 ]; do
    # milestone M1
    show_urls
    URL=$?
    echo_vb "$URL"
    
# add or delete url:  ok , exit
# ok =0 , exit =1 

    if [ $URL -eq 0 ];then
      # go_to_M2
      LOCK=0
    else
      #return_console
      exit
    fi

  done
  # milestone M2

  # setup search engine personality
  #   next, back
  setup_robot

  # setup the crawl paramater
  #   next, back
  setup_crawler

  # setup the tomcat paramater
  #   next, back
  setup_tomcat

  # show the final checklist
  final_confirm
  FC=$?
  echo_vb $FC 
  #   START , back, cancel
  # start =0 , back =1 
  if [ $FC -eq 0 ];then
    #go_to_M3
    CHECK=1
  else
    #go_to_M1
    LOCK=1
    CHECK=0
    #return_console
  fi
done


start_crawl
start_tomcat

# show result message

show_report

# Done 
  • nutchez-func.sh
#!/bin/bash
# Author: WeiYu Chen <waue _at_ nchc org tw>
# License: GPL
# Description: Eazily use for Nutch
# .

. $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh

VERB=1
#DIALOG=dialog

#NEZ_DIR=/home/waue/.nutchez
#NEZ_DIR_URL
#NEZ_DIR_OTHER


echo_vb () {
  if [ $VERB -eq 1 ]; then
    echo $1
  fi
}

test_file () {
  if ! test -e $1 ; then
    echo "" >  $1
  fi
}

check_if_root() {
   if [ ! "$UID" = "0" ]; then
     echo_vb "[$LOGNAME] You need to run this script \"`basename $0`\" as root."
     exit 1
   fi
}

show_urls (){
  echo_vb "show urls : ok =0 ,cancel = 1"

  test_file  ~/n.url.txt

  # dialog begin
  dialog  --editbox ~/n.url.txt 30 50 2> ~/n.url.tmp
  return $?
}

setup_robot () {
  test_file ~/n.robot.txt
  echo_vb "setup_robot"
  # dialog
  dialog --inputbox "this agent name \n ex: nutch" 0 0 2> ~/n.robot.tmp
}

setup_crawler () {
  echo_vb "setup_crawler"
  test_file ~/n.crawler.txt
  dialog --inputbox "Depth  \n ex: 5" 0 0 2> ~/n.crawler.tmp
}

setup_tomcat () {
  echo_vb "setup_tomcat"
  test_file ~/n.tomcat.txt
  dialog --inputbox "explorer port \n ex:8080 " 0 0 2> ~/n.tomcat.tmp
}

final_confirm () {
  echo_vb "final_confirm : start =0 , back =1 "
  MSG="urls = \n `cat ~/n.url.txt` \n robot name = \n `~/n.robot.tmp` \n depth = \n `~/n.crawler.tmp` \n explorer port = \n `cat ~/n.tomcat.tmp` \n"
  #read READ
  dialog --msgbox "$MSG" 0 0
  return $READ
}

start_crawl () {
  echo_vb "start_crawl"
}

start_tomcat () {
  echo_vb "start_tomcat "
}

show_report () {
  echo_vb "show_report "
}