Changeset 68
- Timestamp:
- May 22, 2009, 6:36:48 PM (16 years ago)
- Location:
- nutchez-0.1
- Files:
-
- 1 deleted
- 5 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
nutchez-0.1/bin/nutchez
r67 r68 4 4 # Description: Eazily use for Nutch 5 5 # . 6 6 export NUTCH_CONF_DIR=~/.nutchez 7 7 NUTCHEZ_SCRIPT_PATH="${NUTCHEZ_SCRIPT_PATH:-/opt/nutch}" 8 8 9 9 . ./nutchez-func.sh 10 10 11 11 12 # root ? -
nutchez-0.1/bin/nutchez-func.sh
r67 r68 9 9 : ${DIALOG=dialog} 10 10 11 # display more for debug 11 12 VERB=0 12 13 13 14 14 echo_vb () { … … 37 37 promote_tempfile () { 38 38 echo_vb "7. chang tmp as txt" 39 rm /tmp/n.*.txt40 mv /tmp/n.url.tmp /tmp/n.url.txt41 mv /tmp/n.robot.tmp /tmp/n.robot.txt42 mv /tmp/n.crawler.tmp /tmp/n.crawler.txt43 mv /tmp/n.tomcat.tmp /tmp/n.tomcat.txt39 rm ~/.nutchez/sav/n.*.txt 40 mv /tmp/n.url.tmp ~/.nutchez/sav/ 41 mv /tmp/n.robot.tmp ~/.nutchez/sav/ 42 mv /tmp/n.crawler.tmp ~/.nutchez/sav/ 43 mv /tmp/n.tomcat.tmp ~/.nutchez/sav/ 44 44 } 45 45 … … 49 49 } 50 50 51 setup_nutchez() {51 init_nutchez () { 52 52 if ! [ -e ~/.nutchez ] ;then 53 cp -rf /etc/nutch/.nutchez ~/ 53 # copy from /etc/nutch 54 cp -rf /etc/nutch/* ~/.nutchez 54 55 chown -R $LOGNAME:$LOGNAME ~/.nutchez 55 if [ -e /tmp/nutch ] ;then56 rm -rf /tmp/nutch57 fi58 mkdir /tmp/nutch59 ln -sf ~/.nutchez/search /tmp/nutch/60 61 56 fi 62 57 } 58 59 setup_nutchez () { 60 if ! [ -e ~/.nutchez/urls ] ; then 61 # make url list dir 62 mkdir ~/.nutchez/urls 63 fi 64 65 if [ -e ~/.nutchez/urls/urls.txt ] ; then 66 rm ~/.nutchez/urls/urls.txt 67 fi 68 69 cp ~/.nutchez/sav/n.urls.txt ~/.nutchez/urls/urls.txt 70 71 if [ -e ~/.nutchez/nutch-site.xml ] ; then 72 # set nutch-site.xml 73 sed -ie "s/>user</>$ROBOT</" ~/.nutchez/nutch-site.xml 74 fi 75 76 } 77 78 install_tomcat (){ 79 if ! [ -e ~/.nutchez/tomcat ] ;then 80 # isntall tomcat to home 81 cp -rf /opt/nutch/tomcat ~/.nutchez/ 82 chown -R $LOGNAME:$LOGNAME ~/.nutchez/tomcat/ 83 # make search dir 84 if ! [ -e ~/.nutchez/search ] ;then 85 mkdir ~/.nutchez/search 86 fi 87 # change explorer port 88 sed -ie "s/8080/>$PORT</" ~/.nutchez/tomcat/conf/server.xml 89 } 90 63 91 64 92 show_urls (){ 65 93 # show urls : ok =0 ,cancel = 1 66 94 echo_vb "2. show_urls !" 67 test_file /tmp/n.url.txt68 echo_vb "2.1 test_file /tmp/n.url.txtreturn : $?"95 test_file ~/.nutchez/sav/n.url.txt 96 echo_vb "2.1 test_file ~/.nutchez/sav return : $?" 69 97 # dialog begin 70 dialog --editbox /tmp/n.url.txt 16 51 2>/tmp/n.url.tmp98 dialog --editbox ~/.nutchez/sav/n.url.txt 16 51 2>/tmp/n.url.tmp 71 99 RET=$? 72 100 echo_vb "2.1 cat url: `cat /tmp/n.url.tmp`" … … 75 103 76 104 setup_robot () { 77 test_file /tmp/n.robot.txt105 test_file ~/.nutchez/sav/n.robot.txt 78 106 echo_vb "3. setup_robot" 79 107 # dialog 80 dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat /tmp/n.robot.txt`" 2>/tmp/n.robot.tmp108 dialog --nocancel --inputbox " This agent name \n" 16 51 "`cat ~/.nutchez/sav/n.robot.txt`" 2>/tmp/n.robot.tmp 81 109 echo_vb "3.1 cat robot : `cat /tmp/n.robot.tmp`" 82 110 } … … 84 112 setup_crawler () { 85 113 echo_vb "4. setup_crawler" 86 test_file /tmp/n.crawler.txt87 dialog --nocancel --inputbox " Depth \n " 16 51 "`cat /tmp/n.crawler.txt`" 2>/tmp/n.crawler.tmp114 test_file ~/.nutchez/sav/n.crawler.txt 115 dialog --nocancel --inputbox " Depth \n " 16 51 "`cat ~/.nutchez/sav/n.crawler.txt`" 2>/tmp/n.crawler.tmp 88 116 echo_vb "4.1 cat robot : `cat /tmp/n.robot.tmp`" 89 117 } … … 91 119 setup_tomcat () { 92 120 echo_vb "5. setup_tomcat" 93 test_file /tmp/n.tomcat.txt94 dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat /tmp/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp121 test_file ~/.nutchez/sav/n.tomcat.txt 122 dialog --nocancel --inputbox " explorer port \n " 16 51 "`cat ~/.nutchez/sav/n.tomcat.txt`" 2>/tmp/n.tomcat.tmp 95 123 echo_vb "5.1 cat tomcat : `cat /tmp/n.tomcat.tmp`" 96 124 } … … 121 149 122 150 # define paramaters 123 ROBOT=`cat /tmp/n.url.txt`124 URLS=`cat /tmp/n.url.txt`125 DEPTH=`cat /tmp/n.url.txt`126 PORT=`cat /tmp/n.url.txt`127 151 128 152 start_crawl () { 153 154 ROBOT=`cat ~/.nutchez/sav` 155 URLS=`cat ~/.nutchez/sav` 156 DEPTH=`cat ~/.nutchez/sav` 157 PORT=`cat ~/.nutchez/sav` 158 129 159 echo_vb "7. start_crawl" 130 160 setup_nutch 131 /opt/nutch/bin/nutch crawl ~/.nutch/urls -dir ~/.nutch/search -threads 2 -depth $DEPTH 161 install_tomcat 162 # /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH 163 echo "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH" 132 164 } 133 165 … … 139 171 show_report () { 140 172 echo_vb "9. show_report " 173 FIREFOX=`which firefox` 174 RET=$? 175 if [ RET == 0 ];then 176 $FIREFOX -D 0.0 http://localhost:$PORT 177 else 178 $DIALOG --msgbox "Congratulations! \n you can explore the url: \n http://localhost:8080" 0 0 179 fi 141 180 } -
nutchez-0.1/debian/nutchez.postinst
r66 r68 7 7 exit 0 8 8 fi 9 10 ln -sf /opt/nutch/bin/nutchez /usr/local/sbin/ 9 11 10 12 setup_hdfsadm_user() { … … 47 49 } 48 50 show_message() { 51 echo "You can use the instruction : \" nutchez\" to easyly use nutch" 52 echo "Enjoy" 53 } 54 show_old_message() { 49 55 echo "You can quickly start by following ways [in /opt/nutch/ with root privilege]:" 50 56 echo "(1) Modify the urls/urls.txt file with indicate urls, one site one line." -
nutchez-0.1/debian/nutchez.postrm
r67 r68 8 8 fi 9 9 10 setup_hdfsadm_user() { 11 if ! getent passwd hdfsadm >/dev/null; then 12 echo "no account found: 'hdfsadm'." 13 else 14 userdel hdfsadm 15 rm -rf /home/hdfsadm 16 rm -rf /opt/nutch 17 rm -rf /tmp/hadoop* 18 rm -rf /tmp/hsperfdata* 19 rm /usr/local/sbin/nutchez* 20 fi 21 } 10 rm -rf /tmp/hsperfdata* 11 if [ -e HOME/.nutchez ]; then 12 rm -rf $HOME/.nutchez 13 fi 14 rm /usr/local/sbin/nutchez* 22 15 23 setup_hdfsadm_user -
nutchez-0.1/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml
r67 r68 6 6 <property> 7 7 <name>searcher.dir</name> 8 <value> /tmp/nutch/search</value>8 <value>../../../../../search</value> 9 9 </property> 10 10 </configuration>
Note: See TracChangeset
for help on using the changeset viewer.