| [16] | 1 | #!/bin/ksh | 
|---|
|  | 2 | # @(#)11  1.1  src/avs/fs/mmfs/samples/nfscluster/takeover, mmfs, avs_rgpfs24, rgpfs24s011a 3/7/07 20:57:09 | 
|---|
|  | 3 | # | 
|---|
|  | 4 |  | 
|---|
|  | 5 | if [ ! -f /var/mmfs/etc/nfsfuncs ]; then | 
|---|
|  | 6 | echo "$0: Can't find NFS functions in /var/mmfs/etc" | 
|---|
|  | 7 | exit 0 | 
|---|
|  | 8 | fi | 
|---|
|  | 9 | . /var/mmfs/etc/nfsfuncs | 
|---|
|  | 10 |  | 
|---|
|  | 11 | forcedTakeover() { | 
|---|
|  | 12 | me=$1 | 
|---|
|  | 13 | failed=$2 | 
|---|
|  | 14 | typeset -i do_reclaim=0 | 
|---|
|  | 15 |  | 
|---|
|  | 16 | [ "$me" == "$failed" ] && return | 
|---|
|  | 17 |  | 
|---|
|  | 18 | msg "Initiating forced IP takeover of $failed due to node failure" | 
|---|
|  | 19 |  | 
|---|
|  | 20 | _mkdir ${SHARED_RECOVERY}/$me | 
|---|
|  | 21 |  | 
|---|
|  | 22 | stopNode $failed | 
|---|
|  | 23 | nfsIPs=$(getNfsIPs $failed) | 
|---|
|  | 24 | debugmsg "forcedTakeover ips: $nfsIPs" | 
|---|
|  | 25 | for ip in $nfsIPs; do | 
|---|
|  | 26 | # Takeover IP and issue gratuitous ARP to the clients for the node | 
|---|
|  | 27 | # that failed so that clients can reconnect to the new address | 
|---|
|  | 28 | recoverNode $ip $failed | 
|---|
|  | 29 | [ $? -eq 0 ] && do_reclaim=1 | 
|---|
|  | 30 | done | 
|---|
|  | 31 |  | 
|---|
|  | 32 | if [ $do_reclaim -ne 0 ]; then | 
|---|
|  | 33 | # got the IP | 
|---|
|  | 34 | debuglog touch ${SHARED_RECOVERY}/$me/$failed | 
|---|
|  | 35 | else | 
|---|
|  | 36 | # did not get IP, takeover failed, remove the entry | 
|---|
|  | 37 | _unlink ${SHARED_RECOVERY}/$me/$failed | 
|---|
|  | 38 | return | 
|---|
|  | 39 | fi | 
|---|
|  | 40 | debugmsg "forcedTakeover: File contents:" | 
|---|
|  | 41 | debugmsg $(ls -R ${SHARED_RECOVERY}/$me) | 
|---|
|  | 42 | _cp ${SHARED_NFS}/$failed/statd/sm/* ${SHARED_NFS}/$me/statd/sm | 
|---|
|  | 43 | checkDynamicGrace | 
|---|
|  | 44 | [ $? -eq 1 ] && startReclaim $failed | 
|---|
|  | 45 | } | 
|---|
|  | 46 | # Main | 
|---|
|  | 47 |  | 
|---|
|  | 48 | [ $# -eq 1 ] || die "Usage: $0 GPFS-IP" | 
|---|
|  | 49 |  | 
|---|
|  | 50 | node=$1 | 
|---|
|  | 51 | me=$(myGPFSIP) | 
|---|
|  | 52 |  | 
|---|
|  | 53 | # Check if failed node is a valid GPFS IP address | 
|---|
|  | 54 | let valid=0 | 
|---|
|  | 55 | gpfsIPs=$(getAllGPFSIPs) | 
|---|
|  | 56 | for ip in $gpfsIPs; do | 
|---|
|  | 57 | if [ "$ip" == "$node" ]; then | 
|---|
|  | 58 | valid=1 | 
|---|
|  | 59 | break | 
|---|
|  | 60 | fi | 
|---|
|  | 61 | done | 
|---|
|  | 62 | [ $valid -eq 1 ] || die "$node is not a valid GPFS IP address" | 
|---|
|  | 63 |  | 
|---|
|  | 64 | debugmsg "Start forced node recovery for $node" | 
|---|
|  | 65 | forcedTakeover $me $node | 
|---|
|  | 66 | debuglog ls -A $SHARED_RECOVERY/$node | 
|---|
|  | 67 | # recover other IPs that are serviced by the failed node | 
|---|
|  | 68 | for ip in $(getFailedNodes $node); do | 
|---|
|  | 69 | debugmsg "Do 2nd recovery for node $ip" | 
|---|
|  | 70 | forcedTakeover $me $ip | 
|---|
|  | 71 | _unlink ${SHARED_RECOVERY}/$node/$ip | 
|---|
|  | 72 | done | 
|---|