Index: nutchez-0.2/src/test/install
===================================================================
--- nutchez-0.2/src/test/install	(revision 136)
+++ nutchez-0.2/src/test/install	(revision 137)
@@ -6,5 +6,7 @@
 source ./client_install_func.sh
 ### real code #####
-main () {
+
+# 執行環境檢查
+check_info () {
   check_root
   check_systemInfo
@@ -14,11 +16,12 @@
   #check_dialog
   #check_php
+}
 
-  #set install mode
+main () {
+  echo "歡迎使用NutchEZ\ 此安裝程序會為您新建一個nutchuser帳號"
   set_install_information
   show_info
   read -p "Please confirm your install infomation: 1.Yes 2.No  " confirm
   if [ $confirm -eq 1 ]; then
-
     # create_nutchuser_account
     # make_ssh_key
@@ -26,10 +29,11 @@
     # 解壓縮
     # tar -zxvf nutchez-0.2-20100524.tar.gz 
-    # mv -r nutchez /opt/
+    # mv nutchez /opt/
     Install_Nutch
     Install_Tomcat
-    # chown -R nutchuser:nutchuser /opt/nutchez
+    chown -R nutchuser:nutchuser /opt/nutchez
     # make_client_install
 
+    # 啟動系統
     format_HDFS
     start_up_NutchEZ
@@ -44,3 +48,4 @@
 }
 
+check_info
 main
Index: nutchez-0.2/src/test/install_func.sh
===================================================================
--- nutchez-0.2/src/test/install_func.sh	(revision 136)
+++ nutchez-0.2/src/test/install_func.sh	(revision 137)
@@ -12,260 +12,104 @@
 ####### garbage end ###############
 
-
-
 ####### fafa code here ###########
-
-# 參數假設
-# /home/nutchuser/NutchEZ_source下有3個檔案
-# install.sh, nutch-1.0.tar.gz, apache-tomcat-6.0.18.tar.gz
-# 安裝路徑為/opt/NutchEZ
-
-Install_source=/home/nutchuser/NutchEZ_source
-NutchEZ_HOME=/opt/NutchEZ
+User_HOME=/home/nutchuser/nutchez
+NutchEZ_HOME=/opt/nutchez
+Nutch_HOME=$NutchEZ_HOME/nutch
+Tomcat_HOME=$NutchEZ_HOME/tomcat
+Index_DB=$User_HOME/search
 MasterIP_Address=`/sbin/ifconfig eth0 | grep 'inet addr' |  sed 's/^.*addr://g' | sed 's/Bcast.*$//g' | sed 's/ .*// '`
 
+# DNS暫時以IP取代
+MasterDNS=$MasterIP_Address
 
-set_install_information () {
-  read -p "Please enter administrator's e-mail address:  " Admin_email
+function set_install_information () { 
+  read -p "Please enter nutchuser's password :  " nutchuser_passwd
+  read -p "Please enter nutchuser's password again:  " nutchuser_passwd_confirm
+  if [$nutchuser_passwd != $nutchuser_passwd_confirm]
+    set_install_information 
+  fi
+  read -p "Please enter Administrator's e-mail address: " Admin_email
   read -p "Please enter the Master DNS:  " MasterDNS
 }
 
-show_info () {
+function show_info () {
   echo "Administrator's e-mail address is $Admin_email."
   echo "The master DNS is: $MasterDNS"
 }
 
-confirm_install_information () {
+function confirm_install_information () {
   read -p "Please confirm your install infomation: 1.Yes 2.No  " confirm
 }
 
-# set $NutchEZ_HOME/conf/hadoop-env.sh
-set_hadoop-env () {
-  echo "set $NutchEZ_HOME/conf/hadoop-env.sh"
-  cd $NutchEZ_HOME/conf/
-  cat >> hadoop-env.sh << EOF
-export JAVA_HOME=/usr/lib/jvm/java-6-sun
-export HADOOP_HOME=$NutchEZ_HOME
-export HADOOP_LOG_DIR=/tmp/NutchEZ/logs
-export HADOOP_SLAVES=$NutchEZ_HOME/conf/slaves
-export HADOOP_CONF_DIR=$NutchEZ_HOME/conf
-export HADOOP_PID_DIR=/tmp/hadoop/pid
-export NUTCH_HOME=$NutchEZ_HOME
-export NUTCH_CONF_DIR=$NutchEZ_HOME/conf
-EOF
-}
-
-# set $NutchEZ_HOME/conf/hadoop-site.xml
-set_haoop-site () {
-  echo "set $NutchEZ_HOME/conf/hadoop-site.xml"
-  cd $NutchEZ_HOME/conf/
+function set_haoop-site () {
+  echo "set $Nutch_HOME/conf/hadoop-site.xml"
+  cd $Nutch_HOME/conf/
   cat > hadoop-site.xml << EOF
 <configuration>
-<property>
+  <property>
     <name>fs.default.name</name>
-    <value>$MasterDNS:9000</value>
-    <description> The name of the default file system. Either the literal string "local" or a host:port for NDFS. </description>
-</property>
-<property>
+    <value>hdfs://$MasterIP_Address:9000</value>
+  </property>
+  <property>
     <name>mapred.job.tracker</name>
-    <value>$MasterDNS:9001</value>
-    <description> The host and port that the MapReduce job tracker runs at. If "local", then jobs are run in-process as a single map and reduce task. </description>
-</property>
+    <value>$MasterIP_Address:9001</value>
+  </property>
+  <property>
+    <name>hadoop.tmp.dir</name>
+    <value>/var/nutchez/nutch-nutchuser</value>
+  </property>
 </configuration>
 EOF
 }
 
-set_nutch-site () {
-  echo "set $NutchEZ_HOME/conf/nutch-site.xml"
-  cd $NutchEZ_HOME/conf/
-  cat > nutch-site.xml << EOF
-<configuration>
-<property>
-  <name>http.agent.name</name>
-  <value>nutchuser</value>
-  <description>HTTP 'User-Agent' request header. </description>
-</property>
-<property>
-  <name>http.agent.description</name>
-  <value>MyTest</value>
-  <description>Further description</description>
-</property>
-<property>
-  <name>http.agent.url</name>
-  <value>$MasterDNS</value>
-  <description>A URL to advertise in the User-Agent header. </description>
-</property>
-<property>
-  <name>$MasterDNS</name>
-  <value>$Admin_email</value>
-  <description>An email address
-  </description>
-</property>
-</configuration>
-EOF
+# 修改nutch-site.xml中-http.agent.url, http.agent.email
+function set_nutch-site () {
+  echo "set $Nutch_HOME/conf/nutch-site.xml"
+  Line_NO=`cat $Nutch_HOME'/conf/nutch-site.xml' | grep -n 'http.agent.url' | sed 's/:.*//g'`
+  echo "debug...http.agent.url line number = $Line_NO..."
+  sed -i ''$((Line_NO+1))'d' $Nutch_HOME/conf/nutch-site.xml
+  echo "debug...edit http.agent.url delete line $((Line_NO+1))..."
+  sed -i ''$Line_NO'a <value>'$MasterIP_Address'</value>' $Nutch_HOME/conf/nutch-site.xml
+  echo "debug...edit http.agent.url done..."
+
+  Line_NO=`cat $Nutch_HOME'/conf/nutch-site.xml' | grep -n 'http.agent.email' | sed 's/:.*//g'`
+  echo "debug...http.agent.email line number = $Line_NO..."
+
+  sed -i ''$((Line_NO+1))'d' $Nutch_HOME/conf/nutch-site.xml
+  echo "debug...edit http.agent.email delete line $((Line_NO+1))..."
+  sed -i ''$Line_NO'a <value>'$Admin_email'</value>' $Nutch_HOME/conf/nutch-site.xml
+  echo "debug...edit http.agent.email done..."
 }
 
-
-set_crawl-urlfilter () {
-  echo "set $NutchEZ_HOME/conf/set_crawl-urlfilter.txt"
-  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip file:, ftp:, & mailto: urls' | sed 's/:.*//g'`
-  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-  sed -i ''$Line_NO'a -^(ftp|mailto):' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-
-
-  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip image and other suffixes we can' | sed 's/:.*//g'`
-  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-  sed -i ''$Line_NO'a -\\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|sit|eps|wmf|mpg|xls|gz|rpm|tgz|mov|MOV|exe|jpeg|JPEG|bmp|BMP)$' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-
-
-  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip URLs containing certain characters as probable queries, etc.' | sed 's/:.*//g'`
-  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-  sed -i ''$Line_NO'a -[*!@]' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-
-
-  Line_NO=`cat $NutchEZ_HOME'/conf/crawl-urlfilter.txt' | grep -n 'skip everything else' | sed 's/:.*//g'`
-  sed -i ''$((Line_NO+1))'d' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-  sed -i ''$Line_NO'a +.*' $NutchEZ_HOME/conf/crawl-urlfilter.txt
-  sed -i ''$Line_NO'a # accecpt anything else' $NutchEZ_HOME/conf/crawl-urlfilter.txt
+function format_HDFS () {
+  echo "format HDFS..."
+  $Nutch_HOME/bin/hadoop namenode -format
 }
 
-format_HDFS () {
-  echo "format HDFS..."
-  $NutchEZ_HOME/bin/hadoop namenode -format
+function start_up_NutchEZ (){
+  echo "start up NutchEZ..."
+  $NutchE_HOME/bin/start-all.sh
 }
 
-start_up_NutchEZ (){
-  echo "start up NutchEZ..."
-  $NutchEZ_HOME/bin/start-all.sh
-}
-
-set_server () {
-  echo "$NutchEZ_HOME/tomcat/conf/server.xml"
-  Line_NO=`cat $NutchEZ_HOME'/tomcat/conf/server.xml' | grep -n '<!-- A "Connector" using the shared thread pool-->' | sed 's/:.*//g'`
-
-  sed -i ''$((Line_NO+1))','$((Line_NO+6))'d' $NutchEZ_HOME/tomcat/conf/server.xml
-  sed -i ''$Line_NO'a    <Connector port="8080" protocol="HTTP/1.1"\
-               connectionTimeout="20000"\
-               redirectPort="8443" URIEncoding="UTF-8"\
-               useBodyEncodingForURI="true" />\
-' $NutchEZ_HOME/tomcat/conf/server.xml
-}
-
-
-set_nutch-site2 () {
-  echo "$NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml"
-  
-  # 搜尋加入設定的行號位址
-  line_NO=`cat $NutchEZ_HOME'/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml' | grep -n '<'configuration'>' | sed 's/:.*//g'`
-  
-  # 加入設定檔
-  sed -i ''$line_NO'a  <property>\
-  <name>http.agent.name</name>\
-  <value>nutch</value>\
-  <description>HTTP 'User-Agent' request header. </description> \
-</property>\
-<property>\
-  <name>http.agent.description</name>\
-  <value>MyTest</value>\
-  <description>Further description</description> \
-</property>\
-<property>\
-  <name>http.agent.url</name> \
-  <value>localhost</value> \
-  <description>A URL to advertise in the User-Agent header. </description> \
-</property>\
-<property>\
-  <name>http.agent.email</name>\
-  <value>'$Admin_email'</value> \
-  <description>An email address \
-  </description> \
-</property>\
-<property>\
-  <name>plugin.folders</name>\
-  <value>'$NutchEZ_HOME'/plugins</value>\
-  <description>Directories where nutch plugins are located. </description>\
-</property>\
-<property>\
-  <name>plugin.includes</name>\
-  <value>protocol-(http|httpclient)|urlfilter-regex|parse-(text|html|js|ext|msexcel|mspowerpoint|msword|oo|pdf|rss|swf|zip)|index-(more|basic|anchor)|query-(more|basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>\
-  <description> Regular expression naming plugin directory names</description>\
- </property>\
- <property>\
-  <name>parse.plugin.file</name>\
-  <value>parse-plugins.xml</value>\
-  <description>The name of the file that defines the associations between\
-  content-types and parsers.</description>\
- </property>\
- <property>\
-   <name>db.max.outlinks.per.page</name>\
-   <value>-1</value>\
-   <description> </description>\
- </property> \
- <property>\
-   <name>http.content.limit</name> \
-   <value>-1</value>\
- </property>\
-<property>\
-  <name>indexer.mergeFactor</name>\
-  <value>500</value>\
-  <description>The factor that determines the frequency of Lucene segment\
-  merges. This must not be less than 2, higher values increase indexing\
-  speed but lead to increased RAM usage, and increase the number of\
-  open file handles (which may lead to "Too many open files" errors).\
-  NOTE: the "segments" here have nothing to do with Nutch segments, they\
-  are a low-level data unit used by Lucene.\
-  </description>\
-</property>\
-
-<property>\
-  <name>indexer.minMergeDocs</name>\
-  <value>500</value>\
-  <description>This number determines the minimum number of Lucene\
-  Documents buffered in memory between Lucene segment merges. Larger\
-  values increase indexing speed and increase RAM usage.\
-  </description>\
-</property>\
-
-' $NutchEZ_HOME/tomcat/webapps/ROOT/WEB-INF/classes/nutch-site.xml
-}
-
-
-set_Nutch_conf () {
+function Install_Nutch () {
   set_hadoop-env
   set_haoop-site
   set_nutch-site
-  set_crawl-urlfilter
 }
 
-
-Install_Nutch () {
-  cd /opt
-  tar zxf /opt/nutch-1.0.tar.gz
-#  tar zxvf /opt/nutch-1.0.tar.gz
-  mv /opt/nutch-1.0  NutchEZ
-  chown -R nutchuser:nutchuser $NutchEZ_HOME
-  set_Nutch_conf
+function Install_Tomcat () {
+# 設定nutch的搜尋引擎頁面到tomcat  
+  cd $Nutch_HOME
+  mkdir web
+  cd web
+  jar -xvf ../nutch-1.0.war
+  mv $Tomcat_HOME/webapps/ROOT $Tomcat_HOME/webapps/ROOT-ori
+  cd $Nutch_HOME
+  mv $Nutch_HOME/web $Tomcat_HOME/webapps/ROOT
+  mkdir $Index_DB
 }
 
-# install tomcat
-Install_Tomcat () {
-  cd /opt/
-#  tar zxvf apache-tomcat-6.0.18.tar.gz
-  tar zxf apache-tomcat-6.0.18.tar.gz
-  mv apache-tomcat-6.0.18 $NutchEZ_HOME
-  cd $NutchEZ_HOME
-  mv  apache-tomcat-6.0.18 tomcat
-  mkdir web
-  # mkdir $NutchEZ_HOME/search
-  chown -R nutchuser:nutchuser $NutchEZ_HOME
-  jar -xvf nutch-1.0.war web
-  mv $NutchEZ_HOME/tomcat/webapps/ROOT $NutchEZ_HOME/tomcat/webapps/ROOT-ori
-  mv $NutchEZ_HOME/web $NutchEZ_HOME/tomcat/webapps/ROOT
-  set_server
-  #set_nutch-site2
+function start_up_tomcat () {
+  echo "start up tomcat..."
+  $Tomcat_HOME/bin/startup.sh
 }
-
-start_up_tomcat () {
-  echo "start up tomcat..."
-  $NutchEZ_HOME/tomcat/bin/startup.sh
-}
