Last change
on this file since 249 was
247,
checked in by shunfa, 15 years ago
|
將go.sh複製到nutchez/system/目錄下
|
-
Property svn:executable set to
*
|
File size:
1.1 KB
|
Line | |
---|
1 | #!/bin/bash |
---|
2 | crawl_dep=$1 |
---|
3 | |
---|
4 | if [ "$1" == "" ]; then |
---|
5 | echo "1. 使用這個shell ,首先你需要有nutchuser這個使用者,並且hadoop 已經開始運作"; |
---|
6 | echo "2. /home/nutchuser/nutchez/url/urls.txt 裡面有你要抓的網址"; |
---|
7 | echo "3. 執行 ./go.sh [深度] 即可,如:"; |
---|
8 | echo " ./go.sh 3" |
---|
9 | exit |
---|
10 | fi |
---|
11 | |
---|
12 | function debug_echo () { |
---|
13 | |
---|
14 | if [ $? -eq 0 ]; then |
---|
15 | echo "$1 finished " |
---|
16 | else |
---|
17 | echo "$1 is error" |
---|
18 | exit |
---|
19 | fi |
---|
20 | } |
---|
21 | |
---|
22 | |
---|
23 | |
---|
24 | source /opt/nutchez/nutch/conf/hadoop-env.sh |
---|
25 | |
---|
26 | debug_echo "import hadoop-env.sh" |
---|
27 | |
---|
28 | echo "delete search (local,hdfs) and urls (hdfs) " |
---|
29 | |
---|
30 | rm -rf /home/nutchuser/nutchez/search |
---|
31 | |
---|
32 | /opt/nutchez/nutch/bin/hadoop dfs -rmr urls search |
---|
33 | |
---|
34 | /opt/nutchez/nutch/bin/hadoop dfs -put /home/nutchuser/nutchez/urls urls |
---|
35 | |
---|
36 | # |
---|
37 | |
---|
38 | /opt/nutchez/nutch/bin/nutch crawl urls -dir search -depth $crawl_dep -topN 5000 -threads 1000 |
---|
39 | |
---|
40 | debug_echo "nutch crawl" |
---|
41 | |
---|
42 | # |
---|
43 | |
---|
44 | /opt/nutchez/nutch/bin/hadoop dfs -get search /home/nutchuser/nutchez/search |
---|
45 | |
---|
46 | debug_echo "download search" |
---|
47 | |
---|
48 | # |
---|
49 | |
---|
50 | /opt/nutchez/tomcat/bin/shutdown.sh |
---|
51 | |
---|
52 | /opt/nutchez/tomcat/bin/startup.sh |
---|
53 | |
---|
54 | |
---|
55 | debug_echo "tomcat restart" |
---|
Note: See
TracBrowser
for help on using the repository browser.