Changeset 69
- Timestamp:
- May 27, 2009, 5:01:46 PM (16 years ago)
- Location:
- nutchez-0.1
- Files:
-
- 2 deleted
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
nutchez-0.1/bin/nutchez
r68 r69 4 4 # Description: Eazily use for Nutch 5 5 # . 6 export NUTCH_CONF_DIR=~/.nutchez7 NUTCHEZ_SCRIPT_PATH="${NUTCHEZ_SCRIPT_PATH:-/opt/nutch}"8 6 9 . ./nutchez-func.sh 7 # begining .. 8 bin=`dirname "$0"` 9 bin=`cd "$bin"; pwd` 10 10 11 12 # root ? 13 #check_if_root 14 15 # show url lists 16 17 setup_nutchez 11 . "$bin"/nutchez-func.sh 12 init_nutchez 18 13 19 14 CHECK=0 -
nutchez-0.1/bin/nutchez-func.sh
r68 r69 5 5 # . 6 6 7 . /etc/nutch/hadoop-env.sh || . $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh8 7 9 8 : ${DIALOG=dialog} 10 9 11 # display more for debug 12 VERB=0 10 # set 1 to display more for debug, 11 VERB=1 12 13 init_nutchez () { 14 if ! [ -e ~/.nutchez ] ;then 15 # copy from /etc/nutch 16 mkdir ~/.nutchez 17 cp -rf /etc/nutch/* ~/.nutchez 18 mkdir ~/.nutchez/log 19 chown -R $LOGNAME:$LOGNAME ~/.nutchez 20 fi 21 export NUTCH_CONF_DIR=~/.nutchez 22 export HADOOP_CONF_DIR=~/.nutchez 23 export HADOOP_LOG_DIR=~/.nutchez/log 24 . ~/.nutchez/hadoop-env.sh || . /etc/nutch/hadoop-env.sh 25 } 13 26 14 27 echo_vb () { … … 38 51 echo_vb "7. chang tmp as txt" 39 52 rm ~/.nutchez/sav/n.*.txt 40 mv /tmp/n.url .tmp ~/.nutchez/sav/41 mv /tmp/n.robot.tmp ~/.nutchez/sav/ 42 mv /tmp/n.crawler.tmp ~/.nutchez/sav/ 43 mv /tmp/n.tomcat.tmp ~/.nutchez/sav/ 53 mv /tmp/n.urls.tmp ~/.nutchez/sav/n.urls.txt 54 mv /tmp/n.robot.tmp ~/.nutchez/sav/n.robot.txt 55 mv /tmp/n.crawler.tmp ~/.nutchez/sav/n.crawler.txt 56 mv /tmp/n.tomcat.tmp ~/.nutchez/sav/n.tomcat.txt 44 57 } 45 58 … … 47 60 echo_vb "7. delete tmp" 48 61 rm /tmp/n.*.tmp 49 }50 51 init_nutchez () {52 if ! [ -e ~/.nutchez ] ;then53 # copy from /etc/nutch54 cp -rf /etc/nutch/* ~/.nutchez55 chown -R $LOGNAME:$LOGNAME ~/.nutchez56 fi57 62 } 58 63 … … 71 76 if [ -e ~/.nutchez/nutch-site.xml ] ; then 72 77 # set nutch-site.xml 73 sed -i e "s/>user</>$ROBOT</" ~/.nutchez/nutch-site.xml78 sed -i -e "4s/<value>[a-zA-Z0-9]*</<value>$ROBOT</" ~/.nutchez/nutch-site.xml 74 79 fi 75 80 … … 86 91 fi 87 92 # change explorer port 88 sed -ie "s/8080/>$PORT</" ~/.nutchez/tomcat/conf/server.xml 93 sed -i -e "s/<Connector port=\"[0-9]*\"/<Connector port=\"$PORT\"/" ~/.nutchez/tomcat/conf/server.xml 94 fi 89 95 } 90 96 … … 93 99 # show urls : ok =0 ,cancel = 1 94 100 echo_vb "2. show_urls !" 95 test_file ~/.nutchez/sav/n.url .txt101 test_file ~/.nutchez/sav/n.urls.txt 96 102 echo_vb "2.1 test_file ~/.nutchez/sav return : $?" 97 103 # dialog begin 98 dialog --editbox ~/.nutchez/sav/n.url .txt 16 51 2>/tmp/n.url.tmp104 dialog --editbox ~/.nutchez/sav/n.urls.txt 16 51 2>/tmp/n.urls.tmp 99 105 RET=$? 100 echo_vb "2.1 cat url: `cat /tmp/n.url .tmp`"106 echo_vb "2.1 cat url: `cat /tmp/n.urls.tmp`" 101 107 return $RET 102 108 } … … 126 132 final_confirm () { 127 133 echo_vb "6. final_confirm : start =0 , back =1 " 128 tempfile=`tempfile 2>/dev/null` || tempfile=/tmp/n.finalcheck.tmp 129 #trap "rm -f $tempfile" 0 1 2 5 15 134 tempfile=/tmp/n.finalcheck.tmp 130 135 131 136 echo " \n 1. The url list is : \n " > $tempfile 132 cat /tmp/n.url .tmp >> $tempfile137 cat /tmp/n.urls.tmp >> $tempfile 133 138 echo " \n 2. The robot name is : \n" >> $tempfile 134 139 cat /tmp/n.robot.tmp >> $tempfile … … 142 147 #read READ 143 148 $DIALOG --title "Check It !!" --clear \ 144 --yesno "$MSG" 16 51149 --yesno "$MSG" 26 51 145 150 RET=$? 146 151 echo_vb "final return = $RET" … … 152 157 start_crawl () { 153 158 154 ROBOT=`cat ~/.nutchez/sav `155 URLS=`cat ~/.nutchez/sav `156 DEPTH=`cat ~/.nutchez/sav `157 PORT=`cat ~/.nutchez/sav `159 ROBOT=`cat ~/.nutchez/sav/n.robot.txt` 160 URLS=`cat ~/.nutchez/sav/n.urls.txt` 161 DEPTH=`cat ~/.nutchez/sav/n.crawler.txt` 162 PORT=`cat ~/.nutchez/sav/n.tomcat.txt` 158 163 159 164 echo_vb "7. start_crawl" 160 setup_nutch 165 setup_nutchez 161 166 install_tomcat 162 # /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH 163 echo "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH" 167 echo_vb "/opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH" 168 echo_vb "$NUTCH_CONF_DIR" 169 /opt/nutch/bin/nutch crawl ~/.nutchez/urls -dir ~/.nutchez/search -depth $DEPTH 164 170 } 165 171 166 172 start_tomcat () { 167 173 echo_vb "8. start_tomcat " 168 /opt/nutch/tomcat/bin/startup.sh 174 echo_vb "/opt/nutch/tomcat/bin/startup.sh" 175 if [ -e /tmp/search ] 176 rm -rf /tmp/search 177 fi 178 ln -sf ~/.nutchez/search/ /tmp/ 179 ~/.nutchez/tomcat/bin/shutdown.sh 180 ~/.nutchez/tomcat/bin/startup.sh 169 181 } 170 182 … … 173 185 FIREFOX=`which firefox` 174 186 RET=$? 175 if [ RET == 0 ];then187 if [ $RET == 0 ];then 176 188 $FIREFOX -D 0.0 http://localhost:$PORT 177 else 178 $DIALOG --msgbox "Congratulations! \n you can explore the url: \n http://localhost:8080" 0 0 189 RET=$? 190 fi 191 if ! [ $RET == 0 ];then 192 $DIALOG --msgbox "Congratulations! \n you can explore the url: \n http://localhost:$PORT" 0 0 179 193 fi 180 194 } -
nutchez-0.1/debian/nutchez.postinst
r68 r69 9 9 10 10 ln -sf /opt/nutch/bin/nutchez /usr/local/sbin/ 11 ln -sf /opt/nutch/bin/nutchez-func.sh /usr/local/sbin/ 11 12 12 13 setup_hdfsadm_user() { -
nutchez-0.1/debian/nutchez.postrm
r68 r69 3 3 echo "$1" 4 4 5 if [ "$1" != remove ] 6 then 5 if [ "$1" != remove ]; then 7 6 exit 0 8 7 fi 9 8 10 rm -rf /tmp/hsperfdata*11 9 if [ -e HOME/.nutchez ]; then 12 10 rm -rf $HOME/.nutchez 13 11 fi 14 rm /usr/local/sbin/nutchez*15 12 13 rm -f /usr/local/sbin/nutchez* 14 15 rm -rf /tmp/search 16 -
nutchez-0.1/debian/nutchez.prerm
r66 r69 1 1 #!/bin/sh 2 2 3 /opt/nutch/bin/stop-all.sh3 #/opt/nutch/bin/stop-all.sh 4 4 #su -c /opt/nutch/bin/stop-all.sh hdfsadm - -
nutchez-0.1/tomcat/conf/server.xml
r66 r69 20 20 Documentation at /docs/config/server.html 21 21 --> 22 <Server port="80 05" shutdown="SHUTDOWN">22 <Server port="8083" shutdown="SHUTDOWN"> 23 23 24 24 <!--APR library loader. Documentation at /docs/apr.html --> … … 65 65 Define a non-SSL HTTP/1.1 Connector on port 8080 66 66 --> 67 <Connector port="808 0" protocol="HTTP/1.1"67 <Connector port="8083" protocol="HTTP/1.1" 68 68 connectionTimeout="20000" 69 69 redirectPort="8443" URIEncoding="UTF-8" … … 72 72 <!-- 73 73 <Connector executor="tomcatThreadPool" 74 port="808 0" protocol="HTTP/1.1"74 port="8083" protocol="HTTP/1.1" 75 75 connectionTimeout="20000" 76 76 redirectPort="8443" /> … … 81 81 described in the APR documentation --> 82 82 <!-- 83 <Connector port="8 443" protocol="HTTP/1.1" SSLEnabled="true"83 <Connector port="8083" protocol="HTTP/1.1" SSLEnabled="true" 84 84 maxThreads="150" scheme="https" secure="true" 85 85 clientAuth="false" sslProtocol="TLS" URIEncoding="UTF-8"/> … … 87 87 88 88 <!-- Define an AJP 1.3 Connector on port 8009 --> 89 <Connector port="80 09" protocol="AJP/1.3" redirectPort="8443" />89 <Connector port="8083" protocol="AJP/1.3" redirectPort="8443" /> 90 90 91 91 -
nutchez-0.1/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml
r68 r69 6 6 <property> 7 7 <name>searcher.dir</name> 8 <value> ../../../../../search</value>8 <value>/tmp/search</value> 9 9 </property> 10 10 </configuration>
Note: See TracChangeset
for help on using the changeset viewer.