Changeset 68


Ignore:
Timestamp:
May 22, 2009, 6:36:48 PM (16 years ago)
Author:
waue
Message:

big modification

Location:
nutchez-0.1
Files:
1 deleted
5 edited
1 moved

Legend:

Unmodified
Added
Removed
  • nutchez-0.1/bin/nutchez

    r67 r68  
    44# Description: Eazily use for Nutch
    55# .
    6 
     6export NUTCH_CONF_DIR=~/.nutchez
    77NUTCHEZ_SCRIPT_PATH="${NUTCHEZ_SCRIPT_PATH:-/opt/nutch}"
    88
    99. ./nutchez-func.sh
     10
    1011
    1112# root ?
  • nutchez-0.1/bin/nutchez-func.sh

    r67 r68  
    99: ${DIALOG=dialog}
    1010
     11# display more for debug
    1112VERB=0
    12 
    1313
    1414echo_vb () {
     
    3737promote_tempfile () {
    3838  echo_vb "7. chang tmp as txt"
    39   rm /tmp/n.*.txt
    40   mv /tmp/n.url.tmp /tmp/n.url.txt
    41   mv /tmp/n.robot.tmp /tmp/n.robot.txt
    42   mv /tmp/n.crawler.tmp /tmp/n.crawler.txt
    43   mv /tmp/n.tomcat.tmp /tmp/n.tomcat.txt
     39  rm ~/.nutchez/sav/n.*.txt
     40  mv /tmp/n.url.tmp ~/.nutchez/sav/
     41  mv /tmp/n.robot.tmp ~/.nutchez/sav/
     42  mv /tmp/n.crawler.tmp ~/.nutchez/sav/
     43  mv /tmp/n.tomcat.tmp ~/.nutchez/sav/
    4444}
    4545
     
    4949}
    5050
    51 setup_nutchez() {
     51init_nutchez () {
    5252  if ! [ -e ~/.nutchez ] ;then
    53     cp -rf /etc/nutch/.nutchez ~/
     53    # copy from /etc/nutch
     54    cp -rf /etc/nutch/* ~/.nutchez
    5455    chown -R $LOGNAME:$LOGNAME ~/.nutchez
    55     if [ -e /tmp/nutch ] ;then
    56       rm -rf /tmp/nutch
    57     fi
    58     mkdir /tmp/nutch
    59     ln -sf ~/.nutchez/search /tmp/nutch/
    60 
    6156  fi
    6257}
     58
     59setup_nutchez () {   
     60  if ! [ -e ~/.nutchez/urls ] ; then
     61    # make url list dir
     62    mkdir ~/.nutchez/urls
     63  fi
     64
     65  if  [ -e ~/.nutchez/urls/urls.txt ] ; then
     66    rm ~/.nutchez/urls/urls.txt
     67  fi
     68
     69  cp ~/.nutchez/sav/n.urls.txt ~/.nutchez/urls/urls.txt
     70
     71  if [ -e ~/.nutchez/nutch-site.xml ] ; then
     72    # set nutch-site.xml
     73    sed -ie "s/>user</>$ROBOT</" ~/.nutchez/nutch-site.xml
     74  fi
     75
     76}
     77
     78install_tomcat (){
     79  if ! [ -e ~/.nutchez/tomcat ] ;then
     80    # isntall tomcat to home
     81    cp -rf /opt/nutch/tomcat ~/.nutchez/
     82    chown -R $LOGNAME:$LOGNAME ~/.nutchez/tomcat/
     83    # make search dir
     84    if ! [ -e ~/.nutchez/search ] ;then
     85      mkdir ~/.nutchez/search
     86    fi
     87    # change explorer port
     88    sed -ie "s/8080/>$PORT</" ~/.nutchez/tomcat/conf/server.xml
     89}
     90
    6391
    6492show_urls (){
    6593  # show urls : ok =0 ,cancel = 1
    6694  echo_vb "2. show_urls !"
    67   test_file  /tmp/n.url.txt
    68   echo_vb "2.1 test_file /tmp/n.url.txt return : $?"
     95  test_file  ~/.nutchez/sav/n.url.txt
     96  echo_vb "2.1 test_file ~/.nutchez/sav return : $?"
    6997  # dialog begin
    70   dialog  --editbox /tmp/n.url.txt 16 51 2>/tmp/n.url.tmp
     98  dialog  --editbox ~/.nutchez/sav/n.url.txt 16 51 2>/tmp/n.url.tmp
    7199  RET=$?
    72100  echo_vb "2.1 cat url: `cat /tmp/n.url.tmp`"
     
    75103
    76104setup_robot () {
    77   test_file /tmp/n.robot.txt
     105  test_file ~/.nutchez/sav/n.robot.txt
    78106  echo_vb "3. setup_robot"
    79107  # dialog
    80   dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat /tmp/n.robot.txt`" 2>/tmp/n.robot.tmp
     108  dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat ~/.nutchez/sav/n.robot.txt`" 2>/tmp/n.robot.tmp
    81109  echo_vb "3.1 cat robot : `cat /tmp/n.robot.tmp`"
    82110}
     
    84112setup_crawler () {
    85113  echo_vb "4. setup_crawler"
    86   test_file /tmp/n.crawler.txt
    87   dialog --nocancel --inputbox " Depth  \n " 16 51 "`cat /tmp/n.crawler.txt`" 2>/tmp/n.crawler.tmp
     114  test_file ~/.nutchez/sav/n.crawler.txt
     115  dialog --nocancel --inputbox " Depth  \n " 16 51 "`cat ~/.nutchez/sav/n.crawler.txt`" 2>/tmp/n.crawler.tmp
    88116  echo_vb "4.1 cat robot : `cat /tmp/n.robot.tmp`"
    89117}
     
    91119setup_tomcat () {
    92120  echo_vb "5. setup_tomcat"
    93   test_file /tmp/n.tomcat.txt
    94   dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat /tmp/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp
     121  test_file ~/.nutchez/sav/n.tomcat.txt
     122  dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat ~/.nutchez/sav/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp
    95123  echo_vb "5.1 cat tomcat : `cat /tmp/n.tomcat.tmp`"
    96124}
     
    121149
    122150# define paramaters
    123 ROBOT=`cat /tmp/n.url.txt`
    124 URLS=`cat /tmp/n.url.txt`
    125 DEPTH=`cat /tmp/n.url.txt`
    126 PORT=`cat /tmp/n.url.txt`
    127151
    128152start_crawl () {
     153
     154  ROBOT=`cat ~/.nutchez/sav`
     155  URLS=`cat ~/.nutchez/sav`
     156  DEPTH=`cat ~/.nutchez/sav`
     157  PORT=`cat ~/.nutchez/sav`
     158
    129159  echo_vb "7. start_crawl"
    130160  setup_nutch
    131   /opt/nutch/bin/nutch crawl ~/.nutch/urls -dir ~/.nutch/search -threads 2 -depth $DEPTH
     161  install_tomcat
     162  # /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH
     163  echo "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH"
    132164}
    133165
     
    139171show_report () {
    140172  echo_vb "9. show_report "
     173  FIREFOX=`which firefox`
     174  RET=$?
     175  if [ RET == 0 ];then
     176    $FIREFOX -D 0.0 http://localhost:$PORT
     177  else
     178    $DIALOG --msgbox "Congratulations! \n you can explore the url: \n  http://localhost:8080" 0 0
     179  fi
    141180}
  • nutchez-0.1/debian/nutchez.postinst

    r66 r68  
    77  exit 0
    88fi
     9
     10ln -sf /opt/nutch/bin/nutchez /usr/local/sbin/
    911
    1012setup_hdfsadm_user() {
     
    4749}
    4850show_message() {
     51  echo "You can use the instruction : \" nutchez\" to easyly use nutch"
     52  echo "Enjoy"
     53}
     54show_old_message() {
    4955  echo "You can quickly start by following ways [in /opt/nutch/ with root privilege]:"
    5056  echo "(1) Modify the urls/urls.txt file with indicate urls, one site one line."
  • nutchez-0.1/debian/nutchez.postrm

    r67 r68  
    88fi
    99
    10 setup_hdfsadm_user() {
    11   if ! getent passwd hdfsadm >/dev/null; then
    12     echo "no account found: 'hdfsadm'."
    13   else
    14     userdel hdfsadm
    15     rm -rf /home/hdfsadm
    16     rm -rf /opt/nutch
    17     rm -rf /tmp/hadoop*
    18     rm -rf /tmp/hsperfdata*
    19     rm /usr/local/sbin/nutchez*
    20   fi
    21 }
     10rm -rf /tmp/hsperfdata*
     11if [ -e HOME/.nutchez ]; then
     12  rm -rf $HOME/.nutchez
     13fi
     14rm /usr/local/sbin/nutchez*
    2215
    23 setup_hdfsadm_user
  • nutchez-0.1/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml

    r67 r68  
    66    <property>
    77        <name>searcher.dir</name>
    8         <value>/tmp/nutch/search</value>
     8        <value>../../../../../search</value>
    99    </property>
    1010</configuration>
Note: See TracChangeset for help on using the changeset viewer.