#!/bin/bash
# Author: WeiYu Chen <waue _at_ nchc org tw>
# License: GPL
# Description: Eazily use for Nutch
# .


: ${DIALOG=dialog}

# set 1 to display more for debug, 
VERB=1

init_nutchez () {
  if ! [ -e ~/.nutchez ] ;then
    # copy from /etc/nutch
    mkdir ~/.nutchez
    cp -rf /etc/nutch/* ~/.nutchez
    mkdir ~/.nutchez/log
    chown -R $LOGNAME:$LOGNAME ~/.nutchez
  fi
  export NUTCH_CONF_DIR=~/.nutchez
  export HADOOP_CONF_DIR=~/.nutchez
  export HADOOP_LOG_DIR=~/.nutchez/log
  . ~/.nutchez/hadoop-env.sh || . /etc/nutch/hadoop-env.sh
}

echo_vb () {
  if [ $VERB -eq 1 ]; then
    $DIALOG --msgbox "$1" 16 51
  fi
}

test_file () {
  if ! test -e "$1" ; then
    echo_vb "test_file: \n can not find $1"
    echo "" >  $1
  else
     echo_vb "test_file: \n Touch  $1 ! \n Its content is \n `cat $1`"
  fi
}

check_if_root() {
   if [ ! "$UID" = "0" ]; then
     echo_vb "Hi [$LOGNAME] !! "
     echo_vb "You need to run this script \"`basename $0`\" as root."
     exit 1
   fi
}

promote_tempfile () {
  echo_vb "7. chang tmp as txt"
  rm ~/.nutchez/sav/n.*.txt
  mv /tmp/n.urls.tmp ~/.nutchez/sav/n.urls.txt
  mv /tmp/n.robot.tmp ~/.nutchez/sav/n.robot.txt
  mv /tmp/n.crawler.tmp ~/.nutchez/sav/n.crawler.txt
  mv /tmp/n.tomcat.tmp ~/.nutchez/sav/n.tomcat.txt
}

clean_tempfile () {
  echo_vb "7. delete tmp"
  rm /tmp/n.*.tmp
}

setup_nutchez () {    
  if ! [ -e ~/.nutchez/urls ] ; then
    # make url list dir
    mkdir ~/.nutchez/urls
  fi

  if  [ -e ~/.nutchez/urls/urls.txt ] ; then
    rm ~/.nutchez/urls/urls.txt
  fi

  cp ~/.nutchez/sav/n.urls.txt ~/.nutchez/urls/urls.txt

  if [ -e ~/.nutchez/nutch-site.xml ] ; then
    # set nutch-site.xml
    sed -i -e "4s/<value>[a-zA-Z0-9]*</<value>$ROBOT</" ~/.nutchez/nutch-site.xml
  fi

}

install_tomcat (){
  if ! [ -e ~/.nutchez/tomcat ] ;then
    # isntall tomcat to home
    cp -rf /opt/nutch/tomcat ~/.nutchez/
    chown -R $LOGNAME:$LOGNAME ~/.nutchez/tomcat/
    # make search dir
    if ! [ -e ~/.nutchez/search ] ;then
      mkdir ~/.nutchez/search
    fi
    # change explorer port
    sed -i -e "s/<Connector port=\"[0-9]*\"/<Connector port=\"$PORT\"/" ~/.nutchez/tomcat/conf/server.xml
  fi
}


show_urls (){
  # show urls : ok =0 ,cancel = 1
  echo_vb "2. show_urls !"
  test_file  ~/.nutchez/sav/n.urls.txt
  echo_vb "2.1 test_file ~/.nutchez/sav return : $?"
  # dialog begin
  dialog  --editbox ~/.nutchez/sav/n.urls.txt 16 51 2>/tmp/n.urls.tmp
  RET=$?
  echo_vb "2.1 cat url: `cat /tmp/n.urls.tmp`"
  return $RET
}

setup_robot () {
  test_file ~/.nutchez/sav/n.robot.txt
  echo_vb "3. setup_robot"
  # dialog
  dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat ~/.nutchez/sav/n.robot.txt`" 2>/tmp/n.robot.tmp
  echo_vb "3.1 cat robot : `cat /tmp/n.robot.tmp`"
}

setup_crawler () {
  echo_vb "4. setup_crawler"
  test_file ~/.nutchez/sav/n.crawler.txt
  dialog --nocancel --inputbox " Depth  \n " 16 51 "`cat ~/.nutchez/sav/n.crawler.txt`" 2>/tmp/n.crawler.tmp
  echo_vb "4.1 cat robot : `cat /tmp/n.robot.tmp`"
}

setup_tomcat () {
  echo_vb "5. setup_tomcat"
  test_file ~/.nutchez/sav/n.tomcat.txt
  dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat ~/.nutchez/sav/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp
  echo_vb "5.1 cat tomcat : `cat /tmp/n.tomcat.tmp`"
}

final_confirm () {
  echo_vb "6. final_confirm : start =0 , back =1 "
  tempfile=/tmp/n.finalcheck.tmp

  echo " \n 1. The url list is : \n " > $tempfile
  cat /tmp/n.urls.tmp >> $tempfile
  echo " \n 2. The robot name is : \n" >> $tempfile
  cat /tmp/n.robot.tmp >> $tempfile
  echo " \n 3. The crawled depth is : \n " >> $tempfile
  cat /tmp/n.crawler.tmp >> $tempfile
  echo " \n 4. The explorer port is : \n " >> $tempfile
  cat /tmp/n.tomcat.tmp >> $tempfile

  MSG=`cat $tempfile`
  echo_vb "6.1 final message :\n $MSG"
  #read READ
  $DIALOG --title "Check It !!" --clear \
        --yesno "$MSG" 26 51
  RET=$?
  echo_vb "final return = $RET"
  return $RET
}

# define paramaters

start_crawl () {

  ROBOT=`cat ~/.nutchez/sav/n.robot.txt`
  URLS=`cat ~/.nutchez/sav/n.urls.txt`
  DEPTH=`cat ~/.nutchez/sav/n.crawler.txt`
  PORT=`cat ~/.nutchez/sav/n.tomcat.txt`

  echo_vb "7. start_crawl"
  setup_nutchez
  install_tomcat
  echo_vb "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH"
  echo_vb "$NUTCH_CONF_DIR"
  /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH
}

start_tomcat () {
  echo_vb "8. start_tomcat "
  echo_vb "/opt/nutch/tomcat/bin/startup.sh"
  if [ -e /tmp/search ]
    rm -rf /tmp/search
  fi
  ln -sf ~/.nutchez/search/ /tmp/
  ~/.nutchez/tomcat/bin/shutdown.sh
  ~/.nutchez/tomcat/bin/startup.sh
}

show_report () {
  echo_vb "9. show_report "
  FIREFOX=`which firefox`
  RET=$?
  if [ $RET == 0 ];then 
    $FIREFOX -D 0.0 http://localhost:$PORT
    RET=$?
  fi
  if ! [ $RET == 0 ];then
    $DIALOG --msgbox "Congratulations! \n you can explore the url: \n  http://localhost:$PORT" 0 0 
  fi
}
