| 1 | #!/bin/ksh | 
|---|
| 2 | # @(#)11  1.1  src/avs/fs/mmfs/samples/nfscluster/takeover, mmfs, avs_rgpfs24, rgpfs24s011a 3/7/07 20:57:09 | 
|---|
| 3 | # | 
|---|
| 4 |  | 
|---|
| 5 | if [ ! -f /var/mmfs/etc/nfsfuncs ]; then | 
|---|
| 6 | echo "$0: Can't find NFS functions in /var/mmfs/etc" | 
|---|
| 7 | exit 0 | 
|---|
| 8 | fi | 
|---|
| 9 | . /var/mmfs/etc/nfsfuncs | 
|---|
| 10 |  | 
|---|
| 11 | forcedTakeover() { | 
|---|
| 12 | me=$1 | 
|---|
| 13 | failed=$2 | 
|---|
| 14 | typeset -i do_reclaim=0 | 
|---|
| 15 |  | 
|---|
| 16 | [ "$me" == "$failed" ] && return | 
|---|
| 17 |  | 
|---|
| 18 | msg "Initiating forced IP takeover of $failed due to node failure" | 
|---|
| 19 |  | 
|---|
| 20 | _mkdir ${SHARED_RECOVERY}/$me | 
|---|
| 21 |  | 
|---|
| 22 | stopNode $failed | 
|---|
| 23 | nfsIPs=$(getNfsIPs $failed) | 
|---|
| 24 | debugmsg "forcedTakeover ips: $nfsIPs" | 
|---|
| 25 | for ip in $nfsIPs; do | 
|---|
| 26 | # Takeover IP and issue gratuitous ARP to the clients for the node | 
|---|
| 27 | # that failed so that clients can reconnect to the new address | 
|---|
| 28 | recoverNode $ip $failed | 
|---|
| 29 | [ $? -eq 0 ] && do_reclaim=1 | 
|---|
| 30 | done | 
|---|
| 31 |  | 
|---|
| 32 | if [ $do_reclaim -ne 0 ]; then | 
|---|
| 33 | # got the IP | 
|---|
| 34 | debuglog touch ${SHARED_RECOVERY}/$me/$failed | 
|---|
| 35 | else | 
|---|
| 36 | # did not get IP, takeover failed, remove the entry | 
|---|
| 37 | _unlink ${SHARED_RECOVERY}/$me/$failed | 
|---|
| 38 | return | 
|---|
| 39 | fi | 
|---|
| 40 | debugmsg "forcedTakeover: File contents:" | 
|---|
| 41 | debugmsg $(ls -R ${SHARED_RECOVERY}/$me) | 
|---|
| 42 | _cp ${SHARED_NFS}/$failed/statd/sm/* ${SHARED_NFS}/$me/statd/sm | 
|---|
| 43 | checkDynamicGrace | 
|---|
| 44 | [ $? -eq 1 ] && startReclaim $failed | 
|---|
| 45 | } | 
|---|
| 46 | # Main | 
|---|
| 47 |  | 
|---|
| 48 | [ $# -eq 1 ] || die "Usage: $0 GPFS-IP" | 
|---|
| 49 |  | 
|---|
| 50 | node=$1 | 
|---|
| 51 | me=$(myGPFSIP) | 
|---|
| 52 |  | 
|---|
| 53 | # Check if failed node is a valid GPFS IP address | 
|---|
| 54 | let valid=0 | 
|---|
| 55 | gpfsIPs=$(getAllGPFSIPs) | 
|---|
| 56 | for ip in $gpfsIPs; do | 
|---|
| 57 | if [ "$ip" == "$node" ]; then | 
|---|
| 58 | valid=1 | 
|---|
| 59 | break | 
|---|
| 60 | fi | 
|---|
| 61 | done | 
|---|
| 62 | [ $valid -eq 1 ] || die "$node is not a valid GPFS IP address" | 
|---|
| 63 |  | 
|---|
| 64 | debugmsg "Start forced node recovery for $node" | 
|---|
| 65 | forcedTakeover $me $node | 
|---|
| 66 | debuglog ls -A $SHARED_RECOVERY/$node | 
|---|
| 67 | # recover other IPs that are serviced by the failed node | 
|---|
| 68 | for ip in $(getFailedNodes $node); do | 
|---|
| 69 | debugmsg "Do 2nd recovery for node $ip" | 
|---|
| 70 | forcedTakeover $me $ip | 
|---|
| 71 | _unlink ${SHARED_RECOVERY}/$node/$ip | 
|---|
| 72 | done | 
|---|