Changeset 94


Ignore:
Timestamp:
Jul 8, 2009, 2:20:41 PM (15 years ago)
Author:
waue
Message:

0.1-2 fix some bug

Location:
nutchez-0.1
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • nutchez-0.1/Makefile

    r90 r94  
    1313  @echo "make deb     - Build Debian Package."
    1414  @echo "make clean   - Clean up Debian Package temparate files."
    15   @echo "make source  - download source tarball from hadoop mirror site."
    1615  @echo "make help    - show Makefile options."
    1716  @echo " "
  • nutchez-0.1/README

    r89 r94  
    33************************************************
    44
    5 NutchEz is developed by NCHC in Taiwan
     5NutchEz is developed by National Center for High-Performance Computing (NCHC) in Taiwan
    66
    7  * How to install :
    8 You should download the deb file and type the instruction at the same dir.
    9 ============================ 
    10 sudo dpkg -i nutchez-*.deb
    11 ============================
     7 * How to lunch nutchez  (Type following Instruction)
     8========================================
     9nutchez
     10========================================
    1211
    13 
    14  * How to lunch nutchez
    15 ============
    16 nutchez
    17 ============
    18 
    19  * How to remove nutchez
    20 =====================
     12 * How to remove nutchez  (Type following Instruction)
     13========================================
    2114sudo dpkg -P nutchez
    22 =====================
     15========================================
    2316
    2417 * Where are the program and configuration files
     
    27202. Nutch conf dir is set on ~/.nutchez/conf
    28213. Tomcat home is installed on ~/.nutchez/tomcat
    29 
    30 
     224. The final crawl results is located on ~/.nutchez/search
     235. The nutchez log will be recorded on ~/.nutchez/log
     246. The full path of this README is /etc/nutch/README
  • nutchez-0.1/bin/nutchez

    r91 r94  
    1212  export JAVA_HOME=/usr/lib/jvm/java-6-sun
    1313  export NUTCH_HOME=/opt/nutch
    14 #  export HADOOP_HOME=/opt/nutch
    1514  export NUTCH_CONF_DIR=~/.nutchez/conf
    16 #  export HADOOP_CONF_DIR=~/.nutchez/conf
    1715  export NUTCH_LOG_DIR=~/.nutchez/log
    1816
     17. /opt/nutch/bin/nutchez-func.sh
     18
     19init_nutchez
    1920
    2021
     22tfile=`tempfile 2>/dev/null` || tfile=/tmp/test$$
     23trap "rm -f $tfile" 0 1 2 5 15
    2124
    22 . /opt/nutch/bin/nutchez-func.sh
    23 #. ~/.nutchez/hadoop-env.sh
     25$DIALOG --backtitle "Developed By NCHC" --clear --item-help --title "NutchEz 雛型版" \
     26        --menu " 你好,歡迎使用NutchEz! \n\
     27這套軟體是用來打造專屬於你的搜尋引擎 \n\
     28你有網頁不希望被公開的搜尋引擎找到, \n\
     29卻又希望能有個搜尋介面的困擾嗎? \n\
     30用NutchEz就對了!因為他操作簡單, \n\
     31除了基本的網頁以外,還支援多種格式(ppt,doc,txt...) \n\
     32並且是開源碼軟體,完全免費,安全無虞\n\
     33趕快來使用看看吧!\n\n\
     34          選擇你要的模式:" 20 60 4 \
     35        "1"  "開始建構搜尋內容"    "透過NutchEz來建構專屬於你自己所需的內容的搜尋引擎" \
     36        "2"  "開啟或關閉NutchEz的網頁伺服器"  "若您之前已經執行完1之後才需做網頁伺服器的管理" 2> $tfile
    2437
     38rev=$?
    2539
     40MAIN_CHOISE=`cat $tfile`
    2641
    27 init_nutchez
     42case $rev in
     43  0)
     44    echo_dialog_v "OK '$MAIN_CHOISE' chosen.";;
     45  1)
     46    echo_dialog_v "Cancel pressed."
     47    exit 0 ;;
     48  2)
     49    echo_dialog_v "HELP '$MAIN_CHOISE' chosen.";;
     50  255)
     51    echo_dialog_v "ESC pressed.";;
     52  *)
     53    echo_dialog_v "Unexpected code $MAIN_CHOISE";;
     54esac
     55
     56if [ $MAIN_CHOISE -eq 2 ];then
     57
     58  pid_tc=$(ps axw -eo pid,command |\
     59  grep "catalina" | grep "java" |\
     60  grep "start" | awk '{print $1}')
     61  if [ -z "$pid_tc" ]; then
     62      echo_dialog_v "0. pid = $pid_tc ! no another toddmcat is running"
     63
     64      $DIALOG --title "你的NutchEz網頁伺服器沒打開.." --clear \
     65        --yesno "\n 要開啟NutchEz的網頁伺服器嗎? \n" 15 61
     66      if [ $? -eq 0 ];then
     67          ~/.nutchez/tomcat/bin/startup.sh
     68    PORT=`cat ~/.nutchez/sav/n.tomcat.txt`
     69          $DIALOG --msgbox "已經試著開啟瀏覽伺服器,你可以瀏覽這個網址看看: \n  http://localhost:$PORT" 0 0
     70      else
     71    $DIALOG --msgbox "你選擇不要打開瀏覽伺服器!" 0 0
     72      fi
     73     
     74  else
     75      echo_dialog_v "0. tomcat had been started and the pid is $pid_tc"
     76
     77      $DIALOG --title "偵測到NutchEz的網頁伺服器正在運作.." --clear \
     78        --yesno "\n 你要關閉他嗎? \n" 15 61
     79      if [ $? -eq 0 ];then
     80          ~/.nutchez/tomcat/bin/shutdown.sh
     81    kill -9 $pid_tc
     82          $DIALOG --msgbox "已經試著關閉NutchEz網頁伺服器" 0 0
     83      else
     84          $DIALOG --msgbox "你選擇不要關閉瀏覽伺服器!" 0 0
     85      fi
     86
     87  fi
     88  exit 0
     89
     90elif [ $MAIN_CHOISE -eq 1 ];then
    2891
    2992CHECK=0
     
    3598    show_urls
    3699    URL=$?
    37     echo_vb "2.2 show_urls return $URL"
     100    echo_dialog_v "2.2 show_urls return $URL"
    38101   
    39102# add or delete url:  ok , exit
     
    63126  setup_tomcat
    64127
     128  # continue last search
     129  if [ -e ~/.nutchez/search ];then
     130    continue_previous
     131  fi
    65132  # show the final checklist
    66133  final_confirm
    67134  FC=$?
    68   echo_vb "6.3 final confirm return = $FC "
     135  echo_dialog_v "6.3 final confirm return = $FC "
    69136  #   START , back, cancel
    70137  # start =0 , back =1
     
    90157      ;;
    91158    255)
    92       echo_vb "ESC pressed !!"
     159      echo_dialog_v "ESC pressed !!"
    93160      ;;
    94161  esac
     
    108175# Done
    109176
     177
     178
     179
     180else
     181
     182  echo_dialog_v "Main Choise is $MAIN_CHOISE ! Error !!"
     183
     184fi
  • nutchez-0.1/bin/nutchez-func.sh

    r91 r94  
    66
    77
    8 : ${DIALOG=dialog}
     8#DIALOG='dialog --backtitle "     NutchEz Setup Menu           -- powered by NCHC "'
     9DIALOG=dialog
    910
    1011# set 1 to display more for debug,
     
    3940
    4041check_if_root() {
    41    if [ ! "$UID" = "0" ]; then
     42   if [ ! "$UID" -eq "0" ]; then
    4243     echo_dialog_v "Hi [$LOGNAME] !! "
    4344     echo_dialog_v "You need to run this script \"`basename $0`\" as root."
     
    7172
    7273  cp ~/.nutchez/sav/n.urls.txt ~/.nutchez/urls/urls.txt
     74
     75  if [ $NOCONTINUE -eq 1 ]; then
     76    echo_dialog_v " delete the ~/.nutchez/search/*"
     77    DATE=`date +%y%m%d%H%M%S`
     78    mv ~/.nutchez/search ~/.nutchez/search-$DATE
     79    $DIALOG --msgbox "上次搜尋的結果改放到 ~/.nutchez/search-$DATE " 0 0
     80    rm -f /tmp/search
     81#    rm -rf ~/.nutchez/search/*
     82  fi
    7383
    7484  if [ -e ~/.nutchez/conf/nutch-site.xml ] ; then
     
    100110  echo_dialog_v "2.1 test_file ~/.nutchez/sav return : $?"
    101111  # dialog begin
    102   dialog --title "The URLS that you want" --editbox ~/.nutchez/sav/n.urls.txt 16 51 2>/tmp/n.urls.tmp
     112  dialog --title "請輸入你要抓取的網址(一行一個網址)" --editbox ~/.nutchez/sav/n.urls.txt 16 51 2>/tmp/n.urls.tmp
    103113  RET=$?
    104114  echo_dialog_v "2.1 cat url: `cat /tmp/n.urls.tmp`"
     
    110120  echo_dialog_v "3. setup_robot"
    111121  # dialog
    112   dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat ~/.nutchez/sav/n.robot.txt`" 2>/tmp/n.robot.tmp
     122  dialog --title "設定機器人名稱" --nocancel --inputbox " 這個爬網機器人,你要將他取名為:\n\n (ps: 這個設定只是禮貌性宣告,並不會對結果造成影響) \n" 16 55 "`cat ~/.nutchez/sav/n.robot.txt`" 2>/tmp/n.robot.tmp
    113123  echo_dialog_v "3.1 cat robot : `cat /tmp/n.robot.tmp`"
    114124}
     
    117127  echo_dialog_v "4. setup_crawler"
    118128  test_file ~/.nutchez/sav/n.crawler.txt
    119   dialog --nocancel --inputbox " Depth  \n " 16 51 "`cat ~/.nutchez/sav/n.crawler.txt`" 2>/tmp/n.crawler.tmp
     129  dialog --title "設定抓取深度" --nocancel --inputbox " 對於每個網址,你需要NutchEz爬多深呢?\n\n (ps: 初次體驗建議將深度設為1來感受需要多久)  \n " 16 51 "`cat ~/.nutchez/sav/n.crawler.txt`" 2>/tmp/n.crawler.tmp
    120130  echo_dialog_v "4.1 cat robot : `cat /tmp/n.robot.tmp`"
    121131}
     
    124134  echo_dialog_v "5. setup_tomcat"
    125135  test_file ~/.nutchez/sav/n.tomcat.txt
    126   dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat ~/.nutchez/sav/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp
     136  dialog --title "設定網頁伺服器" --nocancel --inputbox " 你希望NutchEz將網頁伺服器開在哪個port \n\n (ps: 請選擇一個沒用到的port以免造成衝突 \n 也請盡量不要設成80以免造成你誤以為是apache的混淆) \n " 16 51 "`cat ~/.nutchez/sav/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp
    127137  echo_dialog_v "5.1 cat tomcat : `cat /tmp/n.tomcat.tmp`"
    128138}
    129139
     140continue_previous () {
     141  echo_dialog_v "6. setup_tomcat"
     142$DIALOG --title "清除上次搜尋" --clear \
     143        --yesno "你是否要清除上一次爬網所得的結果,\n否則將加入到URL列裡增加搜尋負擔 \n\n ps: 選no的話,會跑相當相當久,\n 請慎重考慮之\n" 16 51
     144
     145case $? in
     146  0)
     147    NOCONTINUE=1;;
     148  1)
     149    NOCONTINUE=0;;
     150  255)
     151    echo "ESC pressed.";;
     152esac
     153echo_dialog_v " 6continue = $CONTINUE"
     154}
     155
    130156final_confirm () {
    131   echo_dialog_v "6. final_confirm : start =0 , back =1 "
     157  echo_dialog_v "7. final_confirm : start =0 , back =1 "
    132158  tempfile=/tmp/n.finalcheck.tmp
    133159
    134   echo " \n 1. The url list is : \n " > $tempfile
     160  echo " \n 1. 你所選擇要爬取的網址為 : \n " > $tempfile
    135161  cat /tmp/n.urls.tmp >> $tempfile
    136   echo " \n 2. The robot name is : \n" >> $tempfile
     162  echo " \n\n 2. 對於這個爬網機器人,你取名為 : \n" >> $tempfile
    137163  cat /tmp/n.robot.tmp >> $tempfile
    138   echo " \n 3. The crawled depth is : \n " >> $tempfile
     164  echo " \n\n 3. 爬網的深度,你設定為 : \n " >> $tempfile
    139165  cat /tmp/n.crawler.tmp >> $tempfile
    140   echo " \n 4. The explorer port is : \n " >> $tempfile
     166  echo " \n\n 4. NutchEz將會把你的搜尋結果呈現在這個Port : \n " >> $tempfile
    141167  cat /tmp/n.tomcat.tmp >> $tempfile
    142 
     168  if [ $NOCONTINUE -eq 0 ];then
     169    echo " \n\n 5. 是否要清除上一次的收尋結果 : \n " >> $tempfile
     170    echo_dialog_v " 7continue = $CONTINUE"
     171    echo "NO" >> $tempfile
     172  elif [ $NOCONTINUE -eq 1 ];then
     173    echo " \n\n 5. 是否要清除上一次的收尋結果繼續搜尋 : \n " >> $tempfile
     174    echo_dialog_v " 7continue = $CONTINUE"
     175    echo "YES" >> $tempfile
     176  else
     177    echo_dialog_v " 無資料可匯入 "
     178  fi
    143179  MSG=`cat $tempfile`
    144   echo_dialog_v "6.1 final message :\n $MSG"
     180  echo_dialog_v "7.1 final message :\n $MSG"
    145181  #read READ
    146   $DIALOG --title "Check It !!" --clear \
     182  $DIALOG --title "請檢查你的選擇 ! \n\n 若所有的設定都是正確的,你可以按 \"ok\",\n 若你按了 \"reset\" 則會重頭開始設定, \n 若你選擇 \"exit\" 則會跳出NutchEz的設定選單 \n ps: reset 與 exit都不會把資料記成預設值,請放心使用 " --clear \
    147183        --extra-button --extra-label "reset" --ok-label "ok" --cancel-label "exit" \
    148184        --yesno "$MSG" 26 51
     
    179215
    180216  pid_tc=$(ps axw -eo pid,command |\
    181  grep "catalina.startup.Bootstrap" |\
    182  grep "start" | awk '{print $1}')
     217  grep "catalina" | grep "java" |\
     218  grep "start" | awk '{print $1}')
    183219  if [ -z "$pid_tc" ]; then
    184       echo "starting tomcat"
     220      echo_dialog_v "no another tomcat is running"
    185221  else
    186       echo "tomcat had been started and the pid is $pid_tc"
    187       echo "stop it first"
     222      echo_dialog_v "tomcat had been started and the pid is $pid_tc"
     223      echo_dialog_v "stop it first"
    188224      kill -9 $pid_tc
    189225      if [ -z $? ];then
    190     echo " tomcat ($pid_tc) is  killed ..."
     226    echo_dialog_v " tomcat ($pid_tc) is  killed ..."
    191227      else
    192     echo "kill error ..."
     228    echo_dialog_v "kill error ..."
    193229      fi
    194230  fi
     231  echo "Starting Tomcat ...."
    195232  ~/.nutchez/tomcat/bin/startup.sh
    196233  sleep 3
     
    201238  FIREFOX=`which firefox`
    202239  RET=$?
    203   if [ $RET == 0 ];then
     240  if [ $RET -eq 0 ];then
    204241    $FIREFOX -D 0.0 http://localhost:$PORT
    205242    RET=$?
    206243  fi
    207   if ! [ $RET == 0 ];then
    208     $DIALOG --msgbox "Congratulations! \n you can explore the url: \n  http://localhost:$PORT" 0 0
    209   fi
    210 }
     244  if ! [ $RET -eq 0 ];then
     245    $DIALOG --msgbox "恭喜你已經完成了! \n 你可以用瀏覽器瀏覽: \n  http://host_ip:$PORT" 0 0
     246  fi
     247}
  • nutchez-0.1/debian/changelog

    r66 r94  
    1 nutchez (0.1-1) unstable; urgency=low
     1nutchez (0.1-2) unstable; urgency=low
    22
    33  * Initial release (Closes: #nnnn)  <nnnn is the bug number of your ITP>
     4  *
    45
    5  -- Wei-Yu Chen <waue0920@gmail.com>  Tue, 12 May 2009 11:15:51 +0800
    6 
     6 -- Wei-Yu Chen <waue0920@gmail.com>  Tue, 07 Jul 2009 11:22:46 +0800
  • nutchez-0.1/debian/nutchez.install

    r90 r94  
    33README*   etc/nutch
    44bin   opt/nutch
    5 bin/nutchez*  usr/local/bin
    65lib   opt/nutch
    7 webapps   opt/nutch
    86tomcat    opt/nutch
    97plugins   opt/nutch
  • nutchez-0.1/debian/nutchez.postinst

    r69 r94  
    88fi
    99
    10 ln -sf /opt/nutch/bin/nutchez /usr/local/sbin/
    11 ln -sf /opt/nutch/bin/nutchez-func.sh /usr/local/sbin/
     10ln -sf /opt/nutch/bin/nutchez /usr/sbin/
     11ln -sf /opt/nutch/bin/nutchez-func.sh /usr/sbin/
    1212
    13 setup_hdfsadm_user() {
    14   if ! getent passwd hdfsadm >/dev/null; then
    15     useradd hdfsadm
    16     mkdir -p /home/hdfsadm/.ssh
    17     mkdir -p /var/log/nutch
    18     ssh-keygen -t rsa -q -f /home/hdfsadm/.ssh/id_rsa -N ""
    19     cp /home/hdfsadm/.ssh/id_rsa.pub /home/hdfsadm/.ssh/authorized_keys
    20     chown hdfsadm:hdfsadm /var/log/nutch
    21     chown -R hdfsadm:hdfsadm /opt/nutch
    22     chown -R hdfsadm:hdfsadm /home/hdfsadm
    23   fi
     13show_message(){
     14  cat /etc/nutch/README
    2415}
    25 
    26 check_root() {
    27   if ! test -e /root/.ssh/id_rsa ; then
    28     ssh-keygen -t rsa -q -f /root/.ssh/id_rsa -N ""
    29   fi
    30   if test -e /root/.ssh/id_rsa.pub ; then
    31     if ! test -e /root/.ssh/authorized_keys ; then
    32       cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
    33     fi
    34   else
    35     ssh-keygen -t rsa -q -f /root/.ssh/id_rsa -N ""
    36     cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
    37   fi
    38 
    39 }
    40 
    41 start_hadoop() {
    42   if getent passwd hdfsadm >/dev/null; then
    43     su -c "/opt/nutch/bin/hadoop namenode -format" hdfsadm -
    44     su -c "/opt/nutch/bin/start-all.sh" hdfsadm -
    45   else
    46     /opt/nutch/conf/hadoop-env.sh
    47     /opt/nutch/bin/hadoop namenode -format
    48     /opt/nutch/bin/start-all.sh
    49   fi
    50 }
    51 show_message() {
     16show_old_message() {
    5217  echo "You can use the instruction : \" nutchez\" to easyly use nutch"
    5318  echo "Enjoy"
    5419}
    55 show_old_message() {
    56   echo "You can quickly start by following ways [in /opt/nutch/ with root privilege]:"
    57   echo "(1) Modify the urls/urls.txt file with indicate urls, one site one line."
    58   echo "(2) Use this instruction \"bin/nutch crawl urls -dir search -depth 4 -topN 50\" to crawl web"
    59   echo "(3) Type \" tomcat/bin/startup.sh \" and use browser to check the result in http://localhost:8080/"
    60   echo "Enjoy !"
    61 }
    62 #setup_hdfsadm_user
    63 #check_root
    64 #start_hadoop
    6520show_message
  • nutchez-0.1/debian/nutchez.postrm

    r80 r94  
    1818done
    1919
    20 rm -f /usr/local/sbin/nutchez*
     20rm -f /usr/sbin/nutchez*
    2121
    2222rm -rf /tmp/search
  • nutchez-0.1/debian/rules

    r66 r94  
    1313  dh_compress
    1414  dh_fixperms
     15  find ./debian/nutchez/ -name ".svn" -type d > svnfolders
     16  rm -rf `cat svnfolders`
    1517  dh_installdeb
    1618  dh_link
Note: See TracChangeset for help on using the changeset viewer.