[16] | 1 | #!/bin/ksh |
---|
| 2 | # @(#)11 1.1 src/avs/fs/mmfs/samples/nfscluster/takeover, mmfs, avs_rgpfs24, rgpfs24s011a 3/7/07 20:57:09 |
---|
| 3 | # |
---|
| 4 | |
---|
| 5 | if [ ! -f /var/mmfs/etc/nfsfuncs ]; then |
---|
| 6 | echo "$0: Can't find NFS functions in /var/mmfs/etc" |
---|
| 7 | exit 0 |
---|
| 8 | fi |
---|
| 9 | . /var/mmfs/etc/nfsfuncs |
---|
| 10 | |
---|
| 11 | forcedTakeover() { |
---|
| 12 | me=$1 |
---|
| 13 | failed=$2 |
---|
| 14 | typeset -i do_reclaim=0 |
---|
| 15 | |
---|
| 16 | [ "$me" == "$failed" ] && return |
---|
| 17 | |
---|
| 18 | msg "Initiating forced IP takeover of $failed due to node failure" |
---|
| 19 | |
---|
| 20 | _mkdir ${SHARED_RECOVERY}/$me |
---|
| 21 | |
---|
| 22 | stopNode $failed |
---|
| 23 | nfsIPs=$(getNfsIPs $failed) |
---|
| 24 | debugmsg "forcedTakeover ips: $nfsIPs" |
---|
| 25 | for ip in $nfsIPs; do |
---|
| 26 | # Takeover IP and issue gratuitous ARP to the clients for the node |
---|
| 27 | # that failed so that clients can reconnect to the new address |
---|
| 28 | recoverNode $ip $failed |
---|
| 29 | [ $? -eq 0 ] && do_reclaim=1 |
---|
| 30 | done |
---|
| 31 | |
---|
| 32 | if [ $do_reclaim -ne 0 ]; then |
---|
| 33 | # got the IP |
---|
| 34 | debuglog touch ${SHARED_RECOVERY}/$me/$failed |
---|
| 35 | else |
---|
| 36 | # did not get IP, takeover failed, remove the entry |
---|
| 37 | _unlink ${SHARED_RECOVERY}/$me/$failed |
---|
| 38 | return |
---|
| 39 | fi |
---|
| 40 | debugmsg "forcedTakeover: File contents:" |
---|
| 41 | debugmsg $(ls -R ${SHARED_RECOVERY}/$me) |
---|
| 42 | _cp ${SHARED_NFS}/$failed/statd/sm/* ${SHARED_NFS}/$me/statd/sm |
---|
| 43 | checkDynamicGrace |
---|
| 44 | [ $? -eq 1 ] && startReclaim $failed |
---|
| 45 | } |
---|
| 46 | # Main |
---|
| 47 | |
---|
| 48 | [ $# -eq 1 ] || die "Usage: $0 GPFS-IP" |
---|
| 49 | |
---|
| 50 | node=$1 |
---|
| 51 | me=$(myGPFSIP) |
---|
| 52 | |
---|
| 53 | # Check if failed node is a valid GPFS IP address |
---|
| 54 | let valid=0 |
---|
| 55 | gpfsIPs=$(getAllGPFSIPs) |
---|
| 56 | for ip in $gpfsIPs; do |
---|
| 57 | if [ "$ip" == "$node" ]; then |
---|
| 58 | valid=1 |
---|
| 59 | break |
---|
| 60 | fi |
---|
| 61 | done |
---|
| 62 | [ $valid -eq 1 ] || die "$node is not a valid GPFS IP address" |
---|
| 63 | |
---|
| 64 | debugmsg "Start forced node recovery for $node" |
---|
| 65 | forcedTakeover $me $node |
---|
| 66 | debuglog ls -A $SHARED_RECOVERY/$node |
---|
| 67 | # recover other IPs that are serviced by the failed node |
---|
| 68 | for ip in $(getFailedNodes $node); do |
---|
| 69 | debugmsg "Do 2nd recovery for node $ip" |
---|
| 70 | forcedTakeover $me $ip |
---|
| 71 | _unlink ${SHARED_RECOVERY}/$node/$ip |
---|
| 72 | done |
---|