| | 1 | * nutchez |
| | 2 | |
| | 3 | {{{ |
| | 4 | #!sh |
| | 5 | |
| | 6 | #!/bin/bash |
| | 7 | # Author: WeiYu Chen <waue _at_ nchc org tw> |
| | 8 | # License: GPL |
| | 9 | # Description: Eazily use for Nutch |
| | 10 | # . |
| | 11 | |
| | 12 | NUTCHEZ_SCRIPT_PATH="${NUTCHEZ_SCRIPT_PATH:-/opt/nutch}" |
| | 13 | |
| | 14 | . ./nutchez-func.sh |
| | 15 | # root ? |
| | 16 | #check_if_root |
| | 17 | |
| | 18 | # show url lists |
| | 19 | CHECK=0 |
| | 20 | while [ $CHECK -eq 0 ]; do |
| | 21 | |
| | 22 | LOCK=1 |
| | 23 | while [ $LOCK -eq 1 ]; do |
| | 24 | # milestone M1 |
| | 25 | show_urls |
| | 26 | URL=$? |
| | 27 | echo_vb "$URL" |
| | 28 | |
| | 29 | # add or delete url: ok , exit |
| | 30 | # ok =0 , exit =1 |
| | 31 | |
| | 32 | if [ $URL -eq 0 ];then |
| | 33 | # go_to_M2 |
| | 34 | LOCK=0 |
| | 35 | else |
| | 36 | #return_console |
| | 37 | exit |
| | 38 | fi |
| | 39 | |
| | 40 | done |
| | 41 | # milestone M2 |
| | 42 | |
| | 43 | # setup search engine personality |
| | 44 | # next, back |
| | 45 | setup_robot |
| | 46 | |
| | 47 | # setup the crawl paramater |
| | 48 | # next, back |
| | 49 | setup_crawler |
| | 50 | |
| | 51 | # setup the tomcat paramater |
| | 52 | # next, back |
| | 53 | setup_tomcat |
| | 54 | |
| | 55 | # show the final checklist |
| | 56 | final_confirm |
| | 57 | FC=$? |
| | 58 | echo_vb $FC |
| | 59 | # START , back, cancel |
| | 60 | # start =0 , back =1 |
| | 61 | if [ $FC -eq 0 ];then |
| | 62 | #go_to_M3 |
| | 63 | CHECK=1 |
| | 64 | else |
| | 65 | #go_to_M1 |
| | 66 | LOCK=1 |
| | 67 | CHECK=0 |
| | 68 | #return_console |
| | 69 | fi |
| | 70 | done |
| | 71 | |
| | 72 | |
| | 73 | start_crawl |
| | 74 | start_tomcat |
| | 75 | |
| | 76 | # show result message |
| | 77 | |
| | 78 | show_report |
| | 79 | |
| | 80 | # Done |
| | 81 | }}} |
| | 82 | |
| | 83 | |
| | 84 | * nutchez-func.sh |
| | 85 | {{{ |
| | 86 | #!/bin/bash |
| | 87 | # Author: WeiYu Chen <waue _at_ nchc org tw> |
| | 88 | # License: GPL |
| | 89 | # Description: Eazily use for Nutch |
| | 90 | # . |
| | 91 | |
| | 92 | . $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh |
| | 93 | |
| | 94 | VERB=1 |
| | 95 | #DIALOG=dialog |
| | 96 | |
| | 97 | #NEZ_DIR=/home/waue/.nutchez |
| | 98 | #NEZ_DIR_URL |
| | 99 | #NEZ_DIR_OTHER |
| | 100 | |
| | 101 | |
| | 102 | echo_vb () { |
| | 103 | if [ $VERB -eq 1 ]; then |
| | 104 | echo $1 |
| | 105 | fi |
| | 106 | } |
| | 107 | |
| | 108 | test_file () { |
| | 109 | if ! test -e $1 ; then |
| | 110 | echo "" > $1 |
| | 111 | fi |
| | 112 | } |
| | 113 | |
| | 114 | check_if_root() { |
| | 115 | if [ ! "$UID" = "0" ]; then |
| | 116 | echo_vb "[$LOGNAME] You need to run this script \"`basename $0`\" as root." |
| | 117 | exit 1 |
| | 118 | fi |
| | 119 | } |
| | 120 | |
| | 121 | show_urls (){ |
| | 122 | echo_vb "show urls : ok =0 ,cancel = 1" |
| | 123 | |
| | 124 | test_file ~/n.url.txt |
| | 125 | |
| | 126 | # dialog begin |
| | 127 | dialog --editbox ~/n.url.txt 30 50 2> ~/n.url.tmp |
| | 128 | return $? |
| | 129 | } |
| | 130 | |
| | 131 | setup_robot () { |
| | 132 | test_file ~/n.robot.txt |
| | 133 | echo_vb "setup_robot" |
| | 134 | # dialog |
| | 135 | dialog --inputbox "this agent name \n ex: nutch" 0 0 2> ~/n.robot.tmp |
| | 136 | } |
| | 137 | |
| | 138 | setup_crawler () { |
| | 139 | echo_vb "setup_crawler" |
| | 140 | test_file ~/n.crawler.txt |
| | 141 | dialog --inputbox "Depth \n ex: 5" 0 0 2> ~/n.crawler.tmp |
| | 142 | } |
| | 143 | |
| | 144 | setup_tomcat () { |
| | 145 | echo_vb "setup_tomcat" |
| | 146 | test_file ~/n.tomcat.txt |
| | 147 | dialog --inputbox "explorer port \n ex:8080 " 0 0 2> ~/n.tomcat.tmp |
| | 148 | } |
| | 149 | |
| | 150 | final_confirm () { |
| | 151 | echo_vb "final_confirm : start =0 , back =1 " |
| | 152 | MSG="urls = \n `cat ~/n.url.txt` \n robot name = \n `~/n.robot.tmp` \n depth = \n `~/n.crawler.tmp` \n explorer port = \n `cat ~/n.tomcat.tmp` \n" |
| | 153 | #read READ |
| | 154 | dialog --msgbox "$MSG" 0 0 |
| | 155 | return $READ |
| | 156 | } |
| | 157 | |
| | 158 | start_crawl () { |
| | 159 | echo_vb "start_crawl" |
| | 160 | } |
| | 161 | |
| | 162 | start_tomcat () { |
| | 163 | echo_vb "start_tomcat " |
| | 164 | } |
| | 165 | |
| | 166 | show_report () { |
| | 167 | echo_vb "show_report " |
| | 168 | } |
| | 169 | |
| | 170 | }}} |