1 | #!/bin/ksh |
---|
2 | # @(#)11 1.1 src/avs/fs/mmfs/samples/nfscluster/takeover, mmfs, avs_rgpfs24, rgpfs24s011a 3/7/07 20:57:09 |
---|
3 | # |
---|
4 | |
---|
5 | if [ ! -f /var/mmfs/etc/nfsfuncs ]; then |
---|
6 | echo "$0: Can't find NFS functions in /var/mmfs/etc" |
---|
7 | exit 0 |
---|
8 | fi |
---|
9 | . /var/mmfs/etc/nfsfuncs |
---|
10 | |
---|
11 | forcedTakeover() { |
---|
12 | me=$1 |
---|
13 | failed=$2 |
---|
14 | typeset -i do_reclaim=0 |
---|
15 | |
---|
16 | [ "$me" == "$failed" ] && return |
---|
17 | |
---|
18 | msg "Initiating forced IP takeover of $failed due to node failure" |
---|
19 | |
---|
20 | _mkdir ${SHARED_RECOVERY}/$me |
---|
21 | |
---|
22 | stopNode $failed |
---|
23 | nfsIPs=$(getNfsIPs $failed) |
---|
24 | debugmsg "forcedTakeover ips: $nfsIPs" |
---|
25 | for ip in $nfsIPs; do |
---|
26 | # Takeover IP and issue gratuitous ARP to the clients for the node |
---|
27 | # that failed so that clients can reconnect to the new address |
---|
28 | recoverNode $ip $failed |
---|
29 | [ $? -eq 0 ] && do_reclaim=1 |
---|
30 | done |
---|
31 | |
---|
32 | if [ $do_reclaim -ne 0 ]; then |
---|
33 | # got the IP |
---|
34 | debuglog touch ${SHARED_RECOVERY}/$me/$failed |
---|
35 | else |
---|
36 | # did not get IP, takeover failed, remove the entry |
---|
37 | _unlink ${SHARED_RECOVERY}/$me/$failed |
---|
38 | return |
---|
39 | fi |
---|
40 | debugmsg "forcedTakeover: File contents:" |
---|
41 | debugmsg $(ls -R ${SHARED_RECOVERY}/$me) |
---|
42 | _cp ${SHARED_NFS}/$failed/statd/sm/* ${SHARED_NFS}/$me/statd/sm |
---|
43 | checkDynamicGrace |
---|
44 | [ $? -eq 1 ] && startReclaim $failed |
---|
45 | } |
---|
46 | # Main |
---|
47 | |
---|
48 | [ $# -eq 1 ] || die "Usage: $0 GPFS-IP" |
---|
49 | |
---|
50 | node=$1 |
---|
51 | me=$(myGPFSIP) |
---|
52 | |
---|
53 | # Check if failed node is a valid GPFS IP address |
---|
54 | let valid=0 |
---|
55 | gpfsIPs=$(getAllGPFSIPs) |
---|
56 | for ip in $gpfsIPs; do |
---|
57 | if [ "$ip" == "$node" ]; then |
---|
58 | valid=1 |
---|
59 | break |
---|
60 | fi |
---|
61 | done |
---|
62 | [ $valid -eq 1 ] || die "$node is not a valid GPFS IP address" |
---|
63 | |
---|
64 | debugmsg "Start forced node recovery for $node" |
---|
65 | forcedTakeover $me $node |
---|
66 | debuglog ls -A $SHARED_RECOVERY/$node |
---|
67 | # recover other IPs that are serviced by the failed node |
---|
68 | for ip in $(getFailedNodes $node); do |
---|
69 | debugmsg "Do 2nd recovery for node $ip" |
---|
70 | forcedTakeover $me $ip |
---|
71 | _unlink ${SHARED_RECOVERY}/$node/$ip |
---|
72 | done |
---|