| 1 | * nutchez |
| 2 | |
| 3 | {{{ |
| 4 | #!sh |
| 5 | |
| 6 | #!/bin/bash |
| 7 | # Author: WeiYu Chen <waue _at_ nchc org tw> |
| 8 | # License: GPL |
| 9 | # Description: Eazily use for Nutch |
| 10 | # . |
| 11 | |
| 12 | NUTCHEZ_SCRIPT_PATH="${NUTCHEZ_SCRIPT_PATH:-/opt/nutch}" |
| 13 | |
| 14 | . ./nutchez-func.sh |
| 15 | # root ? |
| 16 | #check_if_root |
| 17 | |
| 18 | # show url lists |
| 19 | CHECK=0 |
| 20 | while [ $CHECK -eq 0 ]; do |
| 21 | |
| 22 | LOCK=1 |
| 23 | while [ $LOCK -eq 1 ]; do |
| 24 | # milestone M1 |
| 25 | show_urls |
| 26 | URL=$? |
| 27 | echo_vb "$URL" |
| 28 | |
| 29 | # add or delete url: ok , exit |
| 30 | # ok =0 , exit =1 |
| 31 | |
| 32 | if [ $URL -eq 0 ];then |
| 33 | # go_to_M2 |
| 34 | LOCK=0 |
| 35 | else |
| 36 | #return_console |
| 37 | exit |
| 38 | fi |
| 39 | |
| 40 | done |
| 41 | # milestone M2 |
| 42 | |
| 43 | # setup search engine personality |
| 44 | # next, back |
| 45 | setup_robot |
| 46 | |
| 47 | # setup the crawl paramater |
| 48 | # next, back |
| 49 | setup_crawler |
| 50 | |
| 51 | # setup the tomcat paramater |
| 52 | # next, back |
| 53 | setup_tomcat |
| 54 | |
| 55 | # show the final checklist |
| 56 | final_confirm |
| 57 | FC=$? |
| 58 | echo_vb $FC |
| 59 | # START , back, cancel |
| 60 | # start =0 , back =1 |
| 61 | if [ $FC -eq 0 ];then |
| 62 | #go_to_M3 |
| 63 | CHECK=1 |
| 64 | else |
| 65 | #go_to_M1 |
| 66 | LOCK=1 |
| 67 | CHECK=0 |
| 68 | #return_console |
| 69 | fi |
| 70 | done |
| 71 | |
| 72 | |
| 73 | start_crawl |
| 74 | start_tomcat |
| 75 | |
| 76 | # show result message |
| 77 | |
| 78 | show_report |
| 79 | |
| 80 | # Done |
| 81 | }}} |
| 82 | |
| 83 | |
| 84 | * nutchez-func.sh |
| 85 | {{{ |
| 86 | #!/bin/bash |
| 87 | # Author: WeiYu Chen <waue _at_ nchc org tw> |
| 88 | # License: GPL |
| 89 | # Description: Eazily use for Nutch |
| 90 | # . |
| 91 | |
| 92 | . $NUTCHEZ_SCRIPT_PATH/conf/hadoop-env.sh |
| 93 | |
| 94 | VERB=1 |
| 95 | #DIALOG=dialog |
| 96 | |
| 97 | #NEZ_DIR=/home/waue/.nutchez |
| 98 | #NEZ_DIR_URL |
| 99 | #NEZ_DIR_OTHER |
| 100 | |
| 101 | |
| 102 | echo_vb () { |
| 103 | if [ $VERB -eq 1 ]; then |
| 104 | echo $1 |
| 105 | fi |
| 106 | } |
| 107 | |
| 108 | test_file () { |
| 109 | if ! test -e $1 ; then |
| 110 | echo "" > $1 |
| 111 | fi |
| 112 | } |
| 113 | |
| 114 | check_if_root() { |
| 115 | if [ ! "$UID" = "0" ]; then |
| 116 | echo_vb "[$LOGNAME] You need to run this script \"`basename $0`\" as root." |
| 117 | exit 1 |
| 118 | fi |
| 119 | } |
| 120 | |
| 121 | show_urls (){ |
| 122 | echo_vb "show urls : ok =0 ,cancel = 1" |
| 123 | |
| 124 | test_file ~/n.url.txt |
| 125 | |
| 126 | # dialog begin |
| 127 | dialog --editbox ~/n.url.txt 30 50 2> ~/n.url.tmp |
| 128 | return $? |
| 129 | } |
| 130 | |
| 131 | setup_robot () { |
| 132 | test_file ~/n.robot.txt |
| 133 | echo_vb "setup_robot" |
| 134 | # dialog |
| 135 | dialog --inputbox "this agent name \n ex: nutch" 0 0 2> ~/n.robot.tmp |
| 136 | } |
| 137 | |
| 138 | setup_crawler () { |
| 139 | echo_vb "setup_crawler" |
| 140 | test_file ~/n.crawler.txt |
| 141 | dialog --inputbox "Depth \n ex: 5" 0 0 2> ~/n.crawler.tmp |
| 142 | } |
| 143 | |
| 144 | setup_tomcat () { |
| 145 | echo_vb "setup_tomcat" |
| 146 | test_file ~/n.tomcat.txt |
| 147 | dialog --inputbox "explorer port \n ex:8080 " 0 0 2> ~/n.tomcat.tmp |
| 148 | } |
| 149 | |
| 150 | final_confirm () { |
| 151 | echo_vb "final_confirm : start =0 , back =1 " |
| 152 | MSG="urls = \n `cat ~/n.url.txt` \n robot name = \n `~/n.robot.tmp` \n depth = \n `~/n.crawler.tmp` \n explorer port = \n `cat ~/n.tomcat.tmp` \n" |
| 153 | #read READ |
| 154 | dialog --msgbox "$MSG" 0 0 |
| 155 | return $READ |
| 156 | } |
| 157 | |
| 158 | start_crawl () { |
| 159 | echo_vb "start_crawl" |
| 160 | } |
| 161 | |
| 162 | start_tomcat () { |
| 163 | echo_vb "start_tomcat " |
| 164 | } |
| 165 | |
| 166 | show_report () { |
| 167 | echo_vb "show_report " |
| 168 | } |
| 169 | |
| 170 | }}} |