Changes between Version 7 and Version 8 of waue/2010/1029


Ignore:
Timestamp:
Nov 1, 2010, 11:12:19 AM (13 years ago)
Author:
waue
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • waue/2010/1029

    v7 v8  
    9090 = 自動化script =
    9191
     92{{{
     93#!sh
     94#!/bin/bash
    9295
     96# prompt
     97if [ "$1" == "" ];then
     98    echo "Usage : fix <JOB_NAME>";
     99    echo " where JOB_NAME is one of: ";
     100    echo "==========="
     101    NN=$(/opt/crawlzilla/nutch/bin/hadoop dfs -ls |grep crawler |awk '{print $8}' | cut -d "/" -f 4)
     102    echo "$NN"
     103    echo "==========="
     104    exit 9;
     105fi
     106
     107# begin
     108
     109JNAME=$1
     110LOGFILE=~/crawlzilla/debug_fix.log
     111META_PATH=/home/crawler/crawlzilla/.tmp
     112
     113### not test
     114JPID="$META_PATH/$JNAME/$JNAME"_count_pid # go.sh need add go.sh's pid
     115JDEPTH="$META_PATH/$JNAME/$JNAME"xxx # go.sh need fix
     116JPTIME="$META_PATH/$JNAME/$JNAME"PassTime
     117### not test
     118
     119
     120DATE=$(date)
     121echo "$JNAME BEGINE at $DATE" >> $LOGFILE
     122
     123echo "1 invertlinks" >> $LOGFILE
     124
     125/opt/crawlzilla/nutch/bin/nutch invertlinks /user/crawler/$JNAME/linkdb -dir /user/crawler/$JNAME/segments/
     126if [ ! $? -eq 0 ];then echo "ERROR!!! see $LOGFILE ";exit 8; fi
     127
     128echo "2 index" >> $LOGFILE
     129SEGS=$(/opt/crawlzilla/nutch/bin/hadoop dfs -ls /user/crawler/$JNAME/segments | grep  segments | awk '{print $8 }')
     130/opt/crawlzilla/nutch/bin/nutch index /user/crawler/$JNAME/index /user/crawler/$JNAME/crawldb /user/crawler/$JNAME/linkdb $SEGS
     131if [ ! $? -eq 0 ];then echo "ERROR!!! see $LOGFILE ";exit 8; fi
     132
     133echo "3 dedup" >> $LOGFILE
     134/opt/crawlzilla/nutch/bin/nutch dedup /user/crawler/$JNAME/index
     135if [ ! $? -eq 0 ];then echo "ERROR!!! see $LOGFILE ";exit 8; fi
     136
     137echo "4 download" >> $LOGFILE
     138/opt/crawlzilla/nutch/bin/hadoop dfs -get $JNAME /home/crawler/crawlzilla/archieve/$JNAME
     139if [ ! $? -eq 0 ];then echo "ERROR!!! see $LOGFILE ";exit 8; fi
     140
     141echo "5 $JNAMEPassTime" >> $LOGFILE
     142echo "0h:0m:0s" >> /home/crawler/crawlzilla/archieve/$JNAME/$JNAME"PassTime"
     143if [ ! $? -eq 0 ];then echo "ERROR!!! see $LOGFILE ";exit 8; fi
     144
     145echo "6 append depth" >> $LOGFILE
     146echo "0" >> /home/crawler/crawlzilla/archieve/$JNAME/.crawl_depth
     147if [ ! $? -eq 0 ];then echo "ERROR!!! see $LOGFILE ";exit 8; fi
     148
     149echo "7 mv index files from part-00000" >> $LOGFILE
     150mv /home/crawler/crawlzilla/archieve/$JNAME/index/part-00000/* /home/crawler/crawlzilla/archieve/$JNAME/index/
     151if [ ! $? -eq 0 ];then echo "ERROR!!! see $LOGFILE ";exit 8; fi
     152
     153echo "8 rmdir part-00000/"  >> $LOGFILE
     154rmdir /home/crawler/crawlzilla/archieve/$JNAME/index/part-00000/
     155if [ ! $? -eq 0 ];then echo "ERROR!!! see $LOGFILE ";exit 8; fi
     156
     157echo "9 tomcat"  >> $LOGFILE
     158cp -rf /opt/crawlzilla/tomcat/webapps/default /opt/crawlzilla/tomcat/webapps/$JNAME
     159if [ ! $? -eq 0 ];then echo "ERROR!!! see $LOGFILE ";exit 8; fi
     160
     161echo "10 nutch-site.xml"  >> $LOGFILE
     162sed -i '8s/search/'${JNAME}'/g' /opt/crawlzilla/tomcat/webapps/$JNAME/WEB-INF/classes/nutch-site.xml
     163if [ ! $? -eq 0 ];then echo "ERROR!!! see $LOGFILE ";exit 8; fi
     164
     165
     166
     167
     168DATE=$(date)
     169echo "$JNAME completed and finished at"$DATE >> $LOGFILE
     170
     171
     172}}}