#!/bin/bash
# Author: WeiYu Chen <waue _at_ nchc org tw>
# License: GPL
# Description: Eazily use for Nutch
# .

. /etc/nutch/hadoop-env.sh || . $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh

: ${DIALOG=dialog}

VERB=0


echo_vb () {
  if [ $VERB -eq 1 ]; then
    $DIALOG --msgbox "$1" 16 51
  fi
}

test_file () {
  if ! test -e "$1" ; then
    echo_vb "test_file: \n can not find $1"
    echo "" >  $1
  else
     echo_vb "test_file: \n Touch  $1 ! \n Its content is \n `cat $1`"
  fi
}

check_if_root() {
   if [ ! "$UID" = "0" ]; then
     echo_vb "Hi [$LOGNAME] !! "
     echo_vb "You need to run this script \"`basename $0`\" as root."
     exit 1
   fi
}

promote_tempfile () {
  echo_vb "7. chang tmp as txt"
  rm /tmp/n.*.txt
  mv /tmp/n.url.tmp /tmp/n.url.txt
  mv /tmp/n.robot.tmp /tmp/n.robot.txt
  mv /tmp/n.crawler.tmp /tmp/n.crawler.txt
  mv /tmp/n.tomcat.tmp /tmp/n.tomcat.txt
}

clean_tempfile () {
  echo_vb "7. delete tmp"
  rm /tmp/n.*.tmp
}

setup_nutchez() {
  if ! [ -e ~/.nutchez ] ;then
    cp -rf /etc/nutch/.nutchez ~/
    chown -R $LOGNAME:$LOGNAME ~/.nutchez
    if [ -e /tmp/nutch ] ;then
      rm -rf /tmp/nutch
    fi
    mkdir /tmp/nutch
    ln -sf ~/.nutchez/search /tmp/nutch/

  fi
}

show_urls (){
  # show urls : ok =0 ,cancel = 1
  echo_vb "2. show_urls !"
  test_file  /tmp/n.url.txt
  echo_vb "2.1 test_file /tmp/n.url.txt return : $?"
  # dialog begin
  dialog  --editbox /tmp/n.url.txt 16 51 2>/tmp/n.url.tmp
  RET=$?
  echo_vb "2.1 cat url: `cat /tmp/n.url.tmp`"
  return $RET
}

setup_robot () {
  test_file /tmp/n.robot.txt
  echo_vb "3. setup_robot"
  # dialog
  dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat /tmp/n.robot.txt`" 2>/tmp/n.robot.tmp
  echo_vb "3.1 cat robot : `cat /tmp/n.robot.tmp`"
}

setup_crawler () {
  echo_vb "4. setup_crawler"
  test_file /tmp/n.crawler.txt
  dialog --nocancel --inputbox " Depth  \n " 16 51 "`cat /tmp/n.crawler.txt`" 2>/tmp/n.crawler.tmp
  echo_vb "4.1 cat robot : `cat /tmp/n.robot.tmp`"
}

setup_tomcat () {
  echo_vb "5. setup_tomcat"
  test_file /tmp/n.tomcat.txt
  dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat /tmp/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp
  echo_vb "5.1 cat tomcat : `cat /tmp/n.tomcat.tmp`"
}

final_confirm () {
  echo_vb "6. final_confirm : start =0 , back =1 "
  tempfile=`tempfile 2>/dev/null` || tempfile=/tmp/n.finalcheck.tmp
  #trap "rm -f $tempfile" 0 1 2 5 15

  echo " \n 1. The url list is : \n " > $tempfile
  cat /tmp/n.url.tmp >> $tempfile
  echo " \n 2. The robot name is : \n" >> $tempfile
  cat /tmp/n.robot.tmp >> $tempfile
  echo " \n 3. The crawled depth is : \n " >> $tempfile
  cat /tmp/n.crawler.tmp >> $tempfile
  echo " \n 4. The explorer port is : \n " >> $tempfile
  cat /tmp/n.tomcat.tmp >> $tempfile

  MSG=`cat $tempfile`
  echo_vb "6.1 final message :\n $MSG"
  #read READ
  $DIALOG --title "Check It !!" --clear \
        --yesno "$MSG" 16 51
  RET=$?
  echo_vb "final return = $RET"
  return $RET
}

# define paramaters
ROBOT=`cat /tmp/n.url.txt`
URLS=`cat /tmp/n.url.txt`
DEPTH=`cat /tmp/n.url.txt`
PORT=`cat /tmp/n.url.txt`

start_crawl () {
  echo_vb "7. start_crawl"
  setup_nutch
  /opt/nutch/bin/nutch crawl ~/.nutch/urls -dir ~/.nutch/search -threads 2 -depth $DEPTH 
}

start_tomcat () {
  echo_vb "8. start_tomcat "
  /opt/nutch/tomcat/bin/startup.sh
}

show_report () {
  echo_vb "9. show_report "
}
