1 | #!/bin/ksh |
---|
2 | # @(#)64 1.17.1.18 src/avs/fs/mmfs/samples/nfscluster/nfsfuncs, mmfs, avs_rgpfs24, rgpfs24s011a 3/7/07 20:57:04 |
---|
3 | # |
---|
4 | |
---|
5 | if [ ! -f /var/mmfs/etc/nfsdefs ]; then |
---|
6 | echo "$0: Can't find NFS defines(nfsdefs) in /var/mmfs/etc" |
---|
7 | exit 0 |
---|
8 | fi |
---|
9 | |
---|
10 | . /var/mmfs/etc/nfsdefs |
---|
11 | |
---|
12 | # Configure path used for programs |
---|
13 | PATH=$PATH:/sbin:/usr/sbin:/usr/bin:/bin # standard programs |
---|
14 | PATH=$PATH:/usr/lpp/mmfs/bin:/var/mmfs/etc # for mmfs programs |
---|
15 | PATH=$PATH:/etc/ha.d/resource.d:/usr/lib/heartbeat # for HA stuff |
---|
16 | export PATH |
---|
17 | |
---|
18 | # Shared storage for HA-NFS |
---|
19 | # The directory structure is as follows: |
---|
20 | # shared/.ha |
---|
21 | # nfs (mirrors /var/lib/nfs) |
---|
22 | # rmtab |
---|
23 | # node1 |
---|
24 | # statd |
---|
25 | # sm |
---|
26 | # sm.bak |
---|
27 | # ... |
---|
28 | # recovery |
---|
29 | # node1 |
---|
30 | # nodeX |
---|
31 | # ... |
---|
32 | # |
---|
33 | # Local storage for node1 |
---|
34 | # /var/lib/nfs |
---|
35 | # rmtab -> shared/.ha/nfs/rmtab |
---|
36 | # (RHEL): |
---|
37 | # statd -> shared/.ha/nfs/node1/statd |
---|
38 | # (SLES): |
---|
39 | # sm -> shared/.ha/nfs/node1/statd/sm |
---|
40 | # sm.bak -> shared/.ha/nfs/node1/statd/sm.bak |
---|
41 | # |
---|
42 | SHARED_HA=${SHARED_ROOT}/.ha |
---|
43 | SHARED_NFS=${SHARED_HA}/nfs |
---|
44 | SHARED_RECOVERY=${SHARED_HA}/recovery |
---|
45 | |
---|
46 | # For failover of locks to happen correctly, the lockmgrs on all GPFS nodes |
---|
47 | # need to listen on different port numbers so clients are forced to |
---|
48 | # re-establish socket connection with the takeover node for reclaims |
---|
49 | # Default port number to use for NLM (NFS lock manager) |
---|
50 | # For node i, nlmport = NLM_PORT + i |
---|
51 | [ -z "$NLM_PORT" ] && NLM_PORT=10000 |
---|
52 | |
---|
53 | # Dependencies for distribution - RHEL, SLES |
---|
54 | if [ -d /etc/sysconfig/network ]; then |
---|
55 | IFPATH="/etc/sysconfig/network" |
---|
56 | else |
---|
57 | IFPATH="/etc/sysconfig/network-scripts" |
---|
58 | fi |
---|
59 | |
---|
60 | typeset -i iptakeover=0 # does GPFS perform IP failover? |
---|
61 | debug=0 # debug level for messages to be logged? |
---|
62 | notifyfix=0 # SM_NOTIFY fix required for SLES? |
---|
63 | monitor=1 # monitoring of daemons required? |
---|
64 | customLog=1 # Log file specified |
---|
65 | hardMount=1 # Clients use "hard" NFS mounts |
---|
66 | |
---|
67 | [ -f $NODELIST ] && iptakeover=1 |
---|
68 | [ -n "$DEBUG" ] && debug=$DEBUG |
---|
69 | [ -n "$NOTIFYFIX" ] && notifyfix=1 |
---|
70 | [ -n "$MONITOR" ] && monitor=$MONITOR |
---|
71 | [ -z "$LOGFILE" ] && LOGFILE=/var/mmfs/gen/mmfslog && customLog=0 |
---|
72 | [ -z "$NFSD_PROCS" ] && NFSD_PROCS=32 |
---|
73 | |
---|
74 | if [ -n "$GPFS_RSH" ]; then |
---|
75 | GPFS_rshPath=$(which $GPFS_RSH) |
---|
76 | else |
---|
77 | GPFS_rshPath=$(which rsh) |
---|
78 | fi |
---|
79 | export GPFS_rshPath |
---|
80 | |
---|
81 | ################################################################################ |
---|
82 | # Utility functions # |
---|
83 | ################################################################################ |
---|
84 | |
---|
85 | die() { |
---|
86 | echo "$*" |
---|
87 | exit 1 |
---|
88 | } |
---|
89 | |
---|
90 | _log() { |
---|
91 | let level=$1; shift |
---|
92 | if [ $debug -ge $level ]; then |
---|
93 | echo "`date`: $*" >> $LOGFILE 2>&1 |
---|
94 | $* >> $LOGFILE 2>&1 |
---|
95 | else |
---|
96 | $* > /dev/null 2>&1 |
---|
97 | fi |
---|
98 | } |
---|
99 | |
---|
100 | log() { |
---|
101 | _log 0 $* |
---|
102 | } |
---|
103 | |
---|
104 | debuglog() { |
---|
105 | _log 1 $* |
---|
106 | } |
---|
107 | |
---|
108 | debuglog2() { |
---|
109 | _log 2 $* |
---|
110 | } |
---|
111 | |
---|
112 | _msg() { |
---|
113 | level=$1; shift |
---|
114 | [ $debug -ge $level ] && echo "`date`: $*" >> $LOGFILE 2>&1 |
---|
115 | } |
---|
116 | |
---|
117 | msg() { |
---|
118 | _msg 0 $* |
---|
119 | logger -t HA-NFS "$*" |
---|
120 | } |
---|
121 | |
---|
122 | debugmsg() { |
---|
123 | _msg 1 $* |
---|
124 | } |
---|
125 | |
---|
126 | debugmsg2() { |
---|
127 | _msg 2 $* |
---|
128 | } |
---|
129 | |
---|
130 | err() { |
---|
131 | msg "Error: $*" |
---|
132 | } |
---|
133 | |
---|
134 | warn() { |
---|
135 | msg "Warning: $*" |
---|
136 | } |
---|
137 | |
---|
138 | _mkdir() { |
---|
139 | debuglog2 mkdir -m 0700 -p $* |
---|
140 | } |
---|
141 | |
---|
142 | _rmdir() { |
---|
143 | debuglog2 rm -rf $* |
---|
144 | } |
---|
145 | |
---|
146 | _unlink() { |
---|
147 | debuglog2 unlink $1 |
---|
148 | } |
---|
149 | |
---|
150 | _cp() { |
---|
151 | debuglog2 cp -dpf $* |
---|
152 | } |
---|
153 | |
---|
154 | _mv() { |
---|
155 | debuglog2 mv -f $* |
---|
156 | } |
---|
157 | |
---|
158 | # Skip blank and comment lines (Ugh!) |
---|
159 | invalid() { |
---|
160 | line=$1 |
---|
161 | line=${line## } |
---|
162 | [[ "$line" = "" || "${line#\#}" != "$line" ]] && return 0 |
---|
163 | return 1 |
---|
164 | } |
---|
165 | |
---|
166 | # Return distribution |
---|
167 | getDistro() { |
---|
168 | if grep -q "SUSE LINUX Enterprise Server 9" /etc/issue; then |
---|
169 | echo "SLES_8" |
---|
170 | elif grep -q "SUSE SLES 8" /etc/issue; then |
---|
171 | echo "SLES_9" |
---|
172 | elif grep -q "Fedora\|Red Hat" /etc/issue; then |
---|
173 | echo "RH" |
---|
174 | else |
---|
175 | echo "" |
---|
176 | fi |
---|
177 | } |
---|
178 | |
---|
179 | # Save old log file - use last modified time |
---|
180 | rotatelog() { |
---|
181 | if [[ $customLog -eq 1 && -f $LOGFILE ]]; then |
---|
182 | ext=$(stat -c "%y" $LOGFILE) |
---|
183 | ext=${ext%.*} # get date and time |
---|
184 | #ext=${ext// /.} # replace space with . - doesn't work with pdksh |
---|
185 | ext=$(echo $ext | sed 's/ /./g') # replace space with . |
---|
186 | mv $LOGFILE $LOGFILE.$ext |
---|
187 | fi |
---|
188 | } |
---|
189 | |
---|
190 | ################################################################################ |
---|
191 | # Network functions # |
---|
192 | ################################################################################ |
---|
193 | |
---|
194 | # Get IP address from hostname: use /etc/hosts first |
---|
195 | ipaddr() { |
---|
196 | ip=$(grep -w "${1}" /etc/hosts | grep -v ^# | awk '{print $1}') |
---|
197 | if [ -z "$ip" ]; then |
---|
198 | line=$(host -n $1 | grep 'has address' | awk '{print $4}') |
---|
199 | fi |
---|
200 | if [ -n "$ip" ]; then |
---|
201 | echo $ip |
---|
202 | else |
---|
203 | echo $1 |
---|
204 | fi |
---|
205 | } |
---|
206 | |
---|
207 | # Get host name from IP address |
---|
208 | ipname() { |
---|
209 | name=$(grep -w $1 /etc/hosts | grep -v ^# | awk '{print $2}') |
---|
210 | if [ -z "$name" ]; then |
---|
211 | name=$(host -n $1 | grep -v 'not found:') |
---|
212 | name=${name##* } # Last word is the host name |
---|
213 | name=${name%%.} # Strip trailing dot |
---|
214 | fi |
---|
215 | echo $name |
---|
216 | } |
---|
217 | |
---|
218 | shortipname() { |
---|
219 | name=$(ipname $1) |
---|
220 | echo $name | awk -F. '{print $1}' |
---|
221 | } |
---|
222 | |
---|
223 | # Get matching subnet given two IP addresses |
---|
224 | getsubnet() { |
---|
225 | ip1=$1 |
---|
226 | ip2=$2 |
---|
227 | mask=$3 |
---|
228 | |
---|
229 | typeset -i i1 i2 m1 |
---|
230 | IFS=. |
---|
231 | set $ip1 |
---|
232 | i1=$((($1<<24)+($2<<16)+($3<<8)+$4)) # comment to fix hilit |
---|
233 | |
---|
234 | set $ip2 |
---|
235 | i2=$((($1<<24)+($2<<16)+($3<<8)+$4)) # comment to fix hilit |
---|
236 | |
---|
237 | set $mask |
---|
238 | m1=$((($1<<24)+($2<<16)+($3<<8)+$4)) # comment to fix hilit |
---|
239 | |
---|
240 | if [[ $((i1&m1)) == $((i2&m1)) ]]; then |
---|
241 | echo $((i1&m1)) |
---|
242 | fi |
---|
243 | } |
---|
244 | |
---|
245 | # Get configuration file for a given IP address from IFPATH |
---|
246 | # Note: Only the first file that matches the given IP is returned |
---|
247 | getifcfg() { |
---|
248 | echo $(grep -lw "^IPADDR.*='$1'" ${IFPATH}/ifcfg-* 2> /dev/null | head -n1) |
---|
249 | } |
---|
250 | |
---|
251 | # Get interface name given its IP address |
---|
252 | getifname() { |
---|
253 | iface=$(getifcfg $1) |
---|
254 | iface=${iface##*/} # Strip path |
---|
255 | iface=${iface#*-} # Strip ifcfg- |
---|
256 | [ -n "$iface" ] && iface=$(getcfg-interface -- $iface) # FIX: SLES only |
---|
257 | echo $iface |
---|
258 | } |
---|
259 | |
---|
260 | # Bring up interface corresponding to a given IP address |
---|
261 | ifUp() { |
---|
262 | # Check if it is already configured and up |
---|
263 | #if [ -n "$(ifconfig | grep -wo $1)" ]; then |
---|
264 | iface=$(getifname $1) |
---|
265 | debuglog ifup $iface |
---|
266 | #fi |
---|
267 | |
---|
268 | # Send an arp to the default gateway just in case... |
---|
269 | gwIP=$(route -n | awk '/UG/ {print $2}') |
---|
270 | iface=$(mmgetifconf | grep $1 | awk '{print $1}') |
---|
271 | if [ -n "$iface" ]; then |
---|
272 | for ip in $gwIP; do |
---|
273 | arping -q -c 5 -s $1 -I $iface $ip |
---|
274 | done |
---|
275 | fi |
---|
276 | } |
---|
277 | |
---|
278 | ifDown() { |
---|
279 | eth=$(mmgetifconf | grep -w $1 | awk '{print $1}') |
---|
280 | debugmsg "Invoking ifdown on $eth for ip $1" |
---|
281 | |
---|
282 | if [ -n "$eth" ]; then |
---|
283 | # FIX: SuSE only; |
---|
284 | # ifdown won't work on RedHat for an interface like eth0:0 |
---|
285 | debuglog ifdown $eth |
---|
286 | fi |
---|
287 | } |
---|
288 | |
---|
289 | # Bring up "bond" interface |
---|
290 | ifBondUp() { |
---|
291 | iface=$1 |
---|
292 | debuglog modprobe bonding $BONDING_MODULE_OPTS |
---|
293 | debuglog ifconfig $iface up |
---|
294 | |
---|
295 | # Get all slave interfaces from hardware descriptions |
---|
296 | BSINTERFACES="" |
---|
297 | for i in $(set | egrep "^BONDING_SLAVE") ; do |
---|
298 | BONDING_SLAVE=${i##*=} |
---|
299 | [ -z "$BONDING_SLAVE" ] && continue |
---|
300 | BSIFACE=$(getcfg-interface -- $BONDING_SLAVE) # FIX: SLES only |
---|
301 | if [ $? != 0 ] ; then |
---|
302 | debugmsg "Could not get an interface for slave" |
---|
303 | continue |
---|
304 | fi |
---|
305 | # prepare only available slave devices |
---|
306 | if [ -d /sys/class/net/$BSIFACE ] ; then |
---|
307 | BSINTERFACES="$BSINTERFACES $BSIFACE" |
---|
308 | else |
---|
309 | debugmsg "Bonding Slave $BSIFACE is not available. Skipped" |
---|
310 | fi |
---|
311 | done |
---|
312 | # enslave the slave ifaces only once |
---|
313 | if [ -n "$BSINTERFACES" ]; then |
---|
314 | debuglog ifenslave $iface $BSINTERFACES |
---|
315 | fi |
---|
316 | } |
---|
317 | |
---|
318 | # Check if a given IP address is an alias (virtual) |
---|
319 | isVirtualIP() { |
---|
320 | grep -qlw "^IPADDR..*='$1'" ${IFPATH}/ifcfg-* 2> /dev/null |
---|
321 | return $? |
---|
322 | } |
---|
323 | |
---|
324 | ################################################################################ |
---|
325 | # Nodes list functions # |
---|
326 | ################################################################################ |
---|
327 | |
---|
328 | # Extract GPFS IP, iface and netmask from nodes file with the format: |
---|
329 | # GPFS_IP[:eth:mask] NFS_IP1 NFS_IP2 ... |
---|
330 | getip() { |
---|
331 | echo $1 | awk -F: '{print $1}' |
---|
332 | } |
---|
333 | |
---|
334 | getiface() { |
---|
335 | echo $1 | awk -F: '{print $2}' |
---|
336 | } |
---|
337 | |
---|
338 | getnetmask() { |
---|
339 | echo $1 | awk -F: '{print $3}' |
---|
340 | } |
---|
341 | |
---|
342 | # Get all NFS IP addresses from nodes file |
---|
343 | getAllNfsIPs() { |
---|
344 | exec 3< $NODELIST |
---|
345 | while read -u3 gpfs_if nfs_list; do |
---|
346 | # Skip empty and comment lines |
---|
347 | if invalid $gpfs_if; then |
---|
348 | continue |
---|
349 | fi |
---|
350 | echo $nfs_list |
---|
351 | done |
---|
352 | } |
---|
353 | |
---|
354 | # Get NFS IP addresses for a given GPFS IP address from nodes file |
---|
355 | getNfsIPs() { |
---|
356 | exec 3< $NODELIST |
---|
357 | while read -u3 gpfs_if nfs_list; do |
---|
358 | # Skip empty and comment lines |
---|
359 | if invalid $gpfs_if; then |
---|
360 | continue |
---|
361 | fi |
---|
362 | gpfs_ip=$(getip $gpfs_if) |
---|
363 | if [ "$gpfs_ip" == "$1" ]; then |
---|
364 | debugmsg2 "getNfsIPs: $gpfs_ip $nfs_list" |
---|
365 | echo $nfs_list |
---|
366 | break |
---|
367 | fi |
---|
368 | done |
---|
369 | } |
---|
370 | |
---|
371 | # Get interface for a given NFS+GPFS address |
---|
372 | getEth() { |
---|
373 | gpfs_ip=$1 |
---|
374 | nfs_ip=$2 |
---|
375 | eth="" |
---|
376 | |
---|
377 | debugmsg "getEth: gpfs_ip $gpfs_ip nfs_ip $nfs_ip" |
---|
378 | |
---|
379 | # First try to get eth from node list |
---|
380 | exec 3< $NODELIST |
---|
381 | while read -u3 gpfs_if nfs_list; do |
---|
382 | # Skip empty and comment lines |
---|
383 | if invalid $gpfs_if; then |
---|
384 | continue |
---|
385 | fi |
---|
386 | if [ "$(getip $gpfs_if)" == "$gpfs_ip" ]; then |
---|
387 | iface=$(getiface $gpfs_if) |
---|
388 | if [ -n "$iface" ]; then |
---|
389 | debugmsg "getEth: from $NODELIST $gpfs_ip interface $iface" |
---|
390 | echo $iface |
---|
391 | return |
---|
392 | fi |
---|
393 | fi |
---|
394 | done |
---|
395 | |
---|
396 | # Now try to get eth from list of interfaces |
---|
397 | tmp=/tmp/mmgetifconf.$$ |
---|
398 | mmgetifconf > $tmp |
---|
399 | exec 3< $tmp |
---|
400 | while read -u3 iface ip mask; do |
---|
401 | subnet=$(getsubnet $ip $nfs_ip $mask) |
---|
402 | if [ -n "$subnet" ]; then |
---|
403 | debugmsg "getEth: from mmgetifconf $nfs_ip interface $iface" |
---|
404 | echo $iface |
---|
405 | return |
---|
406 | fi |
---|
407 | done |
---|
408 | unlink $tmp |
---|
409 | |
---|
410 | msg "getEth: not found $gpfs_ip interface" |
---|
411 | echo "" |
---|
412 | } |
---|
413 | |
---|
414 | # Get netmask for a given NFS+GPFS IP address |
---|
415 | getmask() { |
---|
416 | gpfs_ip=$1 |
---|
417 | nfs_ip=$2 |
---|
418 | |
---|
419 | # First try to get mask from node list |
---|
420 | exec 3< $NODELIST |
---|
421 | while read -u3 gpfs_if nfs_list; do |
---|
422 | # Skip empty and comment lines |
---|
423 | if invalid $gpfs_if; then |
---|
424 | continue |
---|
425 | fi |
---|
426 | if [ "$(getip $gpfs_if)" == "$gpfs_ip" ]; then |
---|
427 | mask=$(getnetmask $gpfs_if) |
---|
428 | if [ -n "$mask" ]; then |
---|
429 | debugmsg "getmask: from $NODELIST $gpfs_ip netmask $mask" |
---|
430 | echo $mask |
---|
431 | return |
---|
432 | fi |
---|
433 | fi |
---|
434 | done |
---|
435 | |
---|
436 | # Now try from list of interfaces |
---|
437 | #sub_ip=$(echo $nfs_ip | cut -d . -f1,2,3) |
---|
438 | sub_ip=${nfs_ip%.*} |
---|
439 | mask=$(mmgetifconf | grep -m1 $sub_ip | awk '{print $3}') |
---|
440 | if [ -n "$mask" ]; then |
---|
441 | debugmsg "getmask: from get_ifconf $gpfs_ip netmask $mask" |
---|
442 | echo $mask |
---|
443 | return |
---|
444 | fi |
---|
445 | |
---|
446 | msg "getmask: not found $gpfs_ip netmask default" |
---|
447 | echo "255.255.255.0" |
---|
448 | } |
---|
449 | |
---|
450 | # return the next node for a given node and a start point. |
---|
451 | getNextNode() { |
---|
452 | given=$1 |
---|
453 | start=$2 |
---|
454 | typeset -i next n i=0 |
---|
455 | set -A gpfsIPs $(getAllGPFSIPs) |
---|
456 | n=${#gpfsIPs[@]} |
---|
457 | |
---|
458 | while [ i -lt $n ]; do |
---|
459 | [ "${gpfsIPs[$i]}" == "$given" ] && break |
---|
460 | i=$i+1 |
---|
461 | done |
---|
462 | [ $i -eq $n ] && return # not found |
---|
463 | next=$i+1 |
---|
464 | [ $next -eq $n ] && next=0 |
---|
465 | [ "${gpfsIPs[$next]}" == "$start" ] && return |
---|
466 | echo ${gpfsIPs[$next]} |
---|
467 | } |
---|
468 | |
---|
469 | |
---|
470 | ################################################################################ |
---|
471 | # GPFS functions # |
---|
472 | ################################################################################ |
---|
473 | |
---|
474 | # Get GPFS IP addresses from nodes file |
---|
475 | getAllGPFSIPs() { |
---|
476 | exec 3< $NODELIST |
---|
477 | while read -u3 gpfs_if nfs_list; do |
---|
478 | # Skip empty and comment lines |
---|
479 | if invalid $gpfs_if; then |
---|
480 | continue |
---|
481 | fi |
---|
482 | debugmsg2 "getAllGPFSIPs: $gpfs_if" |
---|
483 | echo $(getip $gpfs_if) |
---|
484 | done |
---|
485 | } |
---|
486 | |
---|
487 | # Get current node's GPFS IP address |
---|
488 | myGPFSIP() { |
---|
489 | for ip in $(getAllGPFSIPs); do |
---|
490 | my_ip=$(mmgetifconf | grep -w $ip) |
---|
491 | if [ "$my_ip" != "" ]; then |
---|
492 | echo $ip |
---|
493 | break |
---|
494 | fi |
---|
495 | done |
---|
496 | } |
---|
497 | |
---|
498 | isGpfsFS() { |
---|
499 | # Get list of GPFS filesystems from /etc/fstab |
---|
500 | fses1="$(grep -w gpfs /etc/fstab | awk '{print $2}')" |
---|
501 | exp1=$1 |
---|
502 | for fs1 in $fses1; do |
---|
503 | debugmsg isGpfsFS: exp=$exp1 fs=$fs1 |
---|
504 | if [ "${exp1##$fs1}" != "$exp1" ]; then |
---|
505 | debugmsg isGpfsFS: return 0 |
---|
506 | return 0 |
---|
507 | fi |
---|
508 | done |
---|
509 | debugmsg isGpfsFS: $1 return 1 |
---|
510 | return 1 |
---|
511 | } |
---|
512 | |
---|
513 | # Mount GPFS filesystems that are to be NFS-exported |
---|
514 | mountExportedFS() { |
---|
515 | # Get list of GPFS filesystems from /etc/fstab |
---|
516 | fses="$(grep -w gpfs /etc/fstab | awk '{print $2}')" |
---|
517 | # Get list of GPFS exports from /etc/exports |
---|
518 | exports="$(awk '{print $1}' /etc/exports | grep ^/ | sort | uniq)" |
---|
519 | for exp in $exports; do |
---|
520 | for fs in $fses; do |
---|
521 | if [ "${exp##$fs}" != "$exp" ]; then |
---|
522 | debuglog mount $fs |
---|
523 | fi |
---|
524 | done |
---|
525 | done |
---|
526 | } |
---|
527 | |
---|
528 | isExported() { |
---|
529 | # Get list of GPFS filesystems from /etc/mtab |
---|
530 | fs="$(grep -w "gpfs .*dev=$1" /etc/mtab | awk '{print $2}')" |
---|
531 | debugmsg isExported: $fs |
---|
532 | # Get list of GPFS exports from /etc/exports |
---|
533 | exports="$(awk '{print $1}' /etc/exports | grep ^/ | sort | uniq)" |
---|
534 | for exp in $exports; do |
---|
535 | if [ "${exp##$fs}" != "$exp" ]; then |
---|
536 | debugmsg isExported: $fs return 0 |
---|
537 | return 0 |
---|
538 | fi |
---|
539 | done |
---|
540 | debugmsg isExported: $fs return 1 |
---|
541 | return 1 |
---|
542 | } |
---|
543 | |
---|
544 | isAnyExported() { |
---|
545 | mountExportedFS |
---|
546 | # Get list of GPFS filesystems from /etc/fstab |
---|
547 | fses="$(grep " gpfs .*dev=" /etc/mtab | awk '{print $2}')" |
---|
548 | debugmsg isAnyExported: $fses |
---|
549 | # Get list of GPFS exports from /etc/exports |
---|
550 | exports="$(awk '{print $1}' /etc/exports | grep ^/ | sort | uniq)" |
---|
551 | for exp in $exports; do |
---|
552 | for fs in $fses; do |
---|
553 | debugmsg isAnyExported: exp=$exp fs=$fs |
---|
554 | if [ "${exp##$fs}" != "$exp" ]; then |
---|
555 | debugmsg isAnyExported: return 0 |
---|
556 | return 0 |
---|
557 | fi |
---|
558 | done |
---|
559 | isGpfsFS $exp |
---|
560 | rc=$? |
---|
561 | # export if not gpfs |
---|
562 | if [ $rc -ne 0 ]; then |
---|
563 | debugmsg isAnyExported: $exp is not GPFS return 0 |
---|
564 | return 0 |
---|
565 | fi |
---|
566 | done |
---|
567 | debugmsg isAnyExported: return 1 |
---|
568 | return 1 |
---|
569 | } |
---|
570 | |
---|
571 | isMounted() { |
---|
572 | # Get list of GPFS filesystems from /etc/mtab |
---|
573 | fses="$(grep " gpfs .*dev=" /etc/mtab | awk '{print $2}')" |
---|
574 | debugmsg isMounted: $fses |
---|
575 | # Get list of GPFS exports from /etc/exports |
---|
576 | exp=$1 |
---|
577 | for fs in $fses; do |
---|
578 | if [ "${exp##$fs}" != "$exp" ]; then |
---|
579 | debugmsg isMounted: return 0 |
---|
580 | return 0 |
---|
581 | fi |
---|
582 | done |
---|
583 | debugmsg isMounted: return 1 |
---|
584 | return 1 |
---|
585 | } |
---|
586 | |
---|
587 | isSharedRoot() { |
---|
588 | # Get filesystem from /etc/mtab |
---|
589 | fs=$(grep -w "gpfs .*dev=$1" /etc/mtab | awk '{print $2}') |
---|
590 | if [ "${SHARED_ROOT##$fs}" != "$SHARED_ROOT" ]; then |
---|
591 | return 0 |
---|
592 | fi |
---|
593 | return 1 |
---|
594 | } |
---|
595 | |
---|
596 | # Run mmdsh command |
---|
597 | mmdshcmd() { |
---|
598 | debugmsg "mmdsh -vL $*" |
---|
599 | mmdsh -vL $* |
---|
600 | } |
---|
601 | |
---|
602 | # Run mmdsh command and return exit code correctly |
---|
603 | mmdshcmdRC() { |
---|
604 | debugmsg "mmdsh -vL $*" |
---|
605 | |
---|
606 | typeset -i rc=0 |
---|
607 | myIP=$(myGPFSIP) |
---|
608 | targets=$1 |
---|
609 | cmd=../../../../$2 # relative to /usr/lpp/mmfs/bin |
---|
610 | shift 2 |
---|
611 | parms=$* |
---|
612 | |
---|
613 | remoteVerb=hanfs |
---|
614 | # FIX: Use the following from mmglobfuncs |
---|
615 | tmpDir=/var/mmfs/tmp/ |
---|
616 | mmremote=/usr/lpp/mmfs/bin/mmremote |
---|
617 | MMMODE=LC |
---|
618 | NO_LINK=_NO_LINK_ |
---|
619 | |
---|
620 | rm -f $tmpDir/$remoteVerb.* |
---|
621 | mmdsh -vL $targets $mmremote onbehalf2 $myIP $remoteVerb $MMMODE $NO_LINK $cmd $parms |
---|
622 | rcInfo=$(ls $tmpDir$remoteVerb.* 2> /dev/null | sort -rn | head -1) |
---|
623 | rm -f $tmpDir$remoteVerb.* |
---|
624 | if [ -n "$rcInfo" ]; then |
---|
625 | rc=${rcInfo#$tmpDir$remoteVerb\.} |
---|
626 | fi |
---|
627 | return $rc |
---|
628 | } |
---|
629 | |
---|
630 | # Run command on all GPFS nodes |
---|
631 | mmdshAll() { |
---|
632 | gpfsIPs=$(getAllGPFSIPs) |
---|
633 | gpfsIPs=$(echo $gpfsIPs | sed 's/ /,/g') |
---|
634 | mmdshcmd $gpfsIPs $* |
---|
635 | } |
---|
636 | |
---|
637 | # stop another node |
---|
638 | stopNode() { |
---|
639 | [ -z "$1" ] && return 1 |
---|
640 | cmd="/var/mmfs/etc/nfsmonitor -e && /var/mmfs/etc/stop.nfs" |
---|
641 | mmdshcmd $1 "$cmd" & |
---|
642 | return 0 |
---|
643 | } |
---|
644 | |
---|
645 | # Return 0 (success) if quorum is "Active" on this node; 1 otherwise |
---|
646 | isNodeUp() { |
---|
647 | [ -z "$1" ] && return 1 |
---|
648 | #status=$(tsstatus -m | grep -w "$1") |
---|
649 | #status=$(mmgetstate -k -N $1 | grep -w "active") |
---|
650 | #status=$(mmdshcmd $1 "/usr/lpp/mmfs/bin/mmremote mmGetState | grep -w active") |
---|
651 | #debugmsg "mmgetstate $1: $status" |
---|
652 | #[ -n "$status" ] && return 0 |
---|
653 | mmfsadm dump cfgmgr | grep -q "$1 .* up " |
---|
654 | return $? |
---|
655 | } |
---|
656 | |
---|
657 | # Exit GPFS for given IP address is "down" |
---|
658 | ifGPFSDownExit() { |
---|
659 | [ -n "$(pidof mmfsd)" ] && return 0 |
---|
660 | stop.nfs |
---|
661 | mmfsadm cleanup |
---|
662 | exit 1 |
---|
663 | } |
---|
664 | |
---|
665 | ################################################################################ |
---|
666 | # Configuration functions # |
---|
667 | ################################################################################ |
---|
668 | |
---|
669 | # Check status of a service |
---|
670 | checkStatus() { |
---|
671 | if [ -e /sbin/checkproc ]; then |
---|
672 | opts="" |
---|
673 | if [[ "$1" == "nfsd" || "$1" == "lockd" ]]; then |
---|
674 | opts="-n" |
---|
675 | fi |
---|
676 | checkproc $opts $1 > /dev/null 2>&1 |
---|
677 | return $? |
---|
678 | elif [ -f /etc/rc.d/init.d/functions ]; then |
---|
679 | savedpath=$PATH |
---|
680 | . /etc/rc.d/init.d/functions |
---|
681 | PATH=$savedpath |
---|
682 | status $1 > /dev/null 2>&1 |
---|
683 | return $? |
---|
684 | else |
---|
685 | return 3 |
---|
686 | fi |
---|
687 | } |
---|
688 | |
---|
689 | # Get service for communicating between GPFS nodes |
---|
690 | rshService() { |
---|
691 | if [ -n "$GPFS_RSHD" ]; then |
---|
692 | echo "$GPFS_RSHD" |
---|
693 | elif [ "$GPFS_rshPath" == "/usr/bin/rsh" ]; then |
---|
694 | echo "xinetd" |
---|
695 | elif [ "$GPFS_rshPath" == "/usr/bin/ssh" ]; then |
---|
696 | echo "sshd" |
---|
697 | else |
---|
698 | debugmsg "Unsupported service $GPFS_rshPath" |
---|
699 | echo "" |
---|
700 | fi |
---|
701 | } |
---|
702 | |
---|
703 | # Start rsh (or ssh) server for communication between GPFS nodes |
---|
704 | startRshd() { |
---|
705 | service=$(rshService) |
---|
706 | checkStatus $service |
---|
707 | if [ $? -ne 0 ]; then |
---|
708 | /etc/init.d/$service start |
---|
709 | fi |
---|
710 | } |
---|
711 | |
---|
712 | # Configure NLM ports |
---|
713 | configNLMPorts() { |
---|
714 | # Determine which port to use for NLM from the node id |
---|
715 | # and ensure it is set |
---|
716 | typeset -i nlmport curport nodeid=0 |
---|
717 | while [ $nodeid -eq 0 ]; do |
---|
718 | #nodeid=$(mmgetstate -k | awk -F: '{print $2}') |
---|
719 | #nodeid=$(mmlscluster | grep -w $1 | awk '{print $1}') |
---|
720 | nodeid=$(mmdsm dsmGetNodeNumber) |
---|
721 | [ $nodeid -eq 0 ] && warn "Cannot get nodeid for $1 from mmgetstate, retrying..." |
---|
722 | done |
---|
723 | nlmport=$NLM_PORT+$nodeid |
---|
724 | |
---|
725 | # Ensure that nfsd is loaded |
---|
726 | debuglog modprobe nfsd |
---|
727 | |
---|
728 | curport=$(rpcinfo -p 2> /dev/null | grep -m1 nlockmgr | awk '{print $4}') |
---|
729 | [ -z "$curport" ] && curport=$(sysctl -n fs.nfs.nlm_tcpport) |
---|
730 | if [ $curport -ne $nlmport ]; then |
---|
731 | debugmsg "Current NLM port used is $curport, should be $nlmport" |
---|
732 | debuglog sysctl -w fs.nfs.nlm_tcpport=$nlmport |
---|
733 | debuglog sysctl -w fs.nfs.nlm_udpport=$nlmport |
---|
734 | nfsService stop |
---|
735 | nfsService start |
---|
736 | # Check if we the port got assigned correctly |
---|
737 | curport=$(rpcinfo -p 2> /dev/null | grep -m1 nlockmgr | awk '{print $4}') |
---|
738 | [ -z "$curport" ] && curport=$(sysctl -n fs.nfs.nlm_tcpport) |
---|
739 | if [ $curport -ne $nlmport ]; then |
---|
740 | err "Cannot change existing port $curport to $nlmport for HA-NFS. Terminating..." |
---|
741 | return 1 |
---|
742 | fi |
---|
743 | fi |
---|
744 | return 0 |
---|
745 | } |
---|
746 | |
---|
747 | # Get system boot time |
---|
748 | getBootTime() { |
---|
749 | grep -w btime /proc/stat | awk '{print $2}' |
---|
750 | } |
---|
751 | |
---|
752 | # Create a place to backup entries (statd/sm) different from /var/lib/nfs |
---|
753 | # We need this because sending SM_NOTIFY messages on failover/failback wipes out |
---|
754 | # client entries from /var/lib/nfs/sm and these do not get created again until |
---|
755 | # next reboot |
---|
756 | # This backup will be cleaned up on next reboot |
---|
757 | backupSmDir() { |
---|
758 | typeset -i current_btime saved_btime |
---|
759 | |
---|
760 | if [ -f $1/btime ]; then |
---|
761 | current_btime=$(getBootTime) |
---|
762 | saved_btime=$(cat $1/btime) |
---|
763 | if [ $current_btime -gt $saved_btime ]; then |
---|
764 | # Erase backup smdir since a reboot has happened |
---|
765 | debugmsg "Erasing backup statd dirs in $1" |
---|
766 | _rmdir $1/sm $1/sm.bak |
---|
767 | echo $current_btime > $1/btime |
---|
768 | fi |
---|
769 | else |
---|
770 | _mkdir $1/sm $1/sm.bak |
---|
771 | # Save boot time so we can decide when to cleanup $1 |
---|
772 | btime=$(getBootTime) |
---|
773 | debugmsg "Saving current boot time $btime in $1" |
---|
774 | echo $btime > $1/btime |
---|
775 | fi |
---|
776 | } |
---|
777 | |
---|
778 | # Keep the following data from /var/lib/nfs in shared space (GPFS) |
---|
779 | # so all nodes have access to it for failover/failback purposes: |
---|
780 | # rmtab |
---|
781 | # sm |
---|
782 | # sm.bak |
---|
783 | shareSmDir() { |
---|
784 | myip=$1 |
---|
785 | |
---|
786 | sh_rmtab=$SHARED_NFS/rmtab |
---|
787 | [ ! -f $sh_rmtab ] && touch $sh_rmtab && chmod 644 $sh_rmtab |
---|
788 | # No need to share rmtab. Its no longer used to validate NFS requests. |
---|
789 | # ln -sf $sh_rmtab /var/lib/nfs/rmtab |
---|
790 | |
---|
791 | sh_statd=$SHARED_NFS/$myip/statd |
---|
792 | _mkdir $sh_statd/sm $sh_statd/sm.bak |
---|
793 | |
---|
794 | if [ -e /var/lib/nfs/statd ]; then |
---|
795 | smdir=/var/lib/nfs/statd/sm |
---|
796 | else |
---|
797 | smdir=/var/lib/nfs/sm |
---|
798 | fi |
---|
799 | if [[ -d $smdir && ! -L $smdir ]]; then |
---|
800 | # Move stuff from local smdir to shared |
---|
801 | _mv $smdir/* $sh_statd/sm |
---|
802 | _mv ${smdir}.bak/* $sh_statd/sm.bak |
---|
803 | _rmdir $smdir ${smdir}.bak |
---|
804 | fi |
---|
805 | if [ ! -d $smdir ]; then |
---|
806 | ln -sf $sh_statd/sm $smdir |
---|
807 | if [ $? -ne 0 ]; then |
---|
808 | err "Failed to link $smdir to $sh_statd/sm" |
---|
809 | return 1 |
---|
810 | fi |
---|
811 | ln -sf $sh_statd/sm.bak ${smdir}.bak |
---|
812 | if [ $? -ne 0 ]; then |
---|
813 | err "Failed to link ${smdir}.bak to $sh_statd/sm.bak" |
---|
814 | return 1 |
---|
815 | fi |
---|
816 | fi |
---|
817 | if [ -e /var/lib/nfs/statd ]; then |
---|
818 | # Redhat requires rpcuser as uid/gid for statd stuff |
---|
819 | chown -R rpcuser.rpcuser $sh_statd |
---|
820 | fi |
---|
821 | return 0 |
---|
822 | } |
---|
823 | |
---|
824 | # Remove the symlinks created by shareSmDir |
---|
825 | unshareSmDir() { |
---|
826 | if [ -e /var/lib/nfs/statd ]; then |
---|
827 | smdir=/var/lib/nfs/statd/sm |
---|
828 | else |
---|
829 | smdir=/var/lib/nfs/sm |
---|
830 | fi |
---|
831 | [ -L /var/lib/nfs/rmtab ] && _unlink /var/lib/nfs/rmtab && touch /var/lib/nfs/rmtab |
---|
832 | [ -L $smdir ] && _unlink $smdir && _mkdir $smdir |
---|
833 | [ -L ${smdir}.bak ] && _unlink ${smdir}.bak && _mkdir ${smdir}.bak |
---|
834 | } |
---|
835 | |
---|
836 | # Configure GPFS for HA-NFS - first time only |
---|
837 | configHA() { |
---|
838 | myip=$1 |
---|
839 | |
---|
840 | # Check if this is the first time we are configuring |
---|
841 | # FIX: check needed here? |
---|
842 | |
---|
843 | # Configure NLM ports |
---|
844 | # Note: This is now done by the startup script /etc/init.d/gpfs |
---|
845 | # configNLMPorts $myip |
---|
846 | |
---|
847 | # Check the shared directory is available and on GPFS |
---|
848 | if [ ! -d $SHARED_ROOT ]; then |
---|
849 | err "Cannot find shared directory $SHARED_ROOT" |
---|
850 | return 1 |
---|
851 | fi |
---|
852 | df -Tl $SHARED_ROOT | grep -qw gpfs |
---|
853 | if [ $? -ne 0 ]; then |
---|
854 | err "$SHARED_ROOT found but is not on a GPFS filesystem" |
---|
855 | return 1 |
---|
856 | fi |
---|
857 | debugmsg "Shared fs is $SHARED_ROOT" |
---|
858 | |
---|
859 | # Create shared data for HA-NFS (statd, rmtab) and recovery |
---|
860 | |
---|
861 | _mkdir $SHARED_NFS $SHARED_RECOVERY |
---|
862 | |
---|
863 | shareSmDir $myip |
---|
864 | return $? |
---|
865 | } |
---|
866 | |
---|
867 | ################################################################################ |
---|
868 | # NFS functions # |
---|
869 | ################################################################################ |
---|
870 | |
---|
871 | startMountd() { |
---|
872 | savedpath=$PATH |
---|
873 | RPCMOUNTDOPTS= |
---|
874 | #Unrolling mountd part of /etc/init.d/nfs |
---|
875 | [ -f /etc/init.d/functions ] && . /etc/init.d/functions |
---|
876 | [ -f /etc/sysconfig/network ] && . /etc/sysconfig/network |
---|
877 | [ -f /etc/sysconfig/nfs ] && . /etc/sysconfig/nfs |
---|
878 | PATH=$savedpath |
---|
879 | [ -n "$MOUNTD_PORT" ] && RPCMOUNTDOPTS="$RPCMOUNTDOPTS -p $MOUNTD_PORT" |
---|
880 | case $MOUNTD_NFS_V2 in |
---|
881 | no|NO) |
---|
882 | RPCMOUNTDOPTS="$RPCMOUNTDOPTS --no-nfs-version 2" ;; |
---|
883 | esac |
---|
884 | case $MOUNTD_NFS_V3 in |
---|
885 | no|NO) |
---|
886 | RPCMOUNTDOPTS="$RPCMOUNTDOPTS --no-nfs-version 3" ;; |
---|
887 | esac |
---|
888 | if [ -e /sbin/startproc ]; then |
---|
889 | debuglog startproc /usr/sbin/rpc.mountd $RPCMOUNTDOPTS |
---|
890 | else |
---|
891 | daemon rpc.mountd $RPCMOUNTDOPTS |
---|
892 | fi |
---|
893 | } |
---|
894 | |
---|
895 | nfsService() { |
---|
896 | nfslock=/etc/init.d/nfslock |
---|
897 | if [ -f /etc/init.d/nfsserver ]; then |
---|
898 | nfs=/etc/init.d/nfsserver |
---|
899 | else |
---|
900 | nfs=/etc/init.d/nfs |
---|
901 | fi |
---|
902 | |
---|
903 | case $1 in |
---|
904 | start) |
---|
905 | msg "Starting NFS services" |
---|
906 | sysctl -e -q -w fs.nfs.use_underlying_lock_ops=1 |
---|
907 | debuglog $nfs start |
---|
908 | debuglog $nfslock start |
---|
909 | if [ -n "$MOUNTD_PORT" ]; then |
---|
910 | # Make sure mountd is bound to the right port if specified |
---|
911 | curport=$(rpcinfo -p 2> /dev/null | grep -m1 mountd | awk '{print $4}') |
---|
912 | if [ "$curport" != "$MOUNTD_PORT" ]; then |
---|
913 | pid=$(pidof rpc.mountd) |
---|
914 | debugmsg "Current mountd port is $curport, should be $MOUNTD_PORT. Stopping current rpc.mountd (pid $pid) and restarting with correct port." |
---|
915 | debuglog kill -9 $pid |
---|
916 | startMountd |
---|
917 | fi |
---|
918 | fi |
---|
919 | # Update number of nfsd processes |
---|
920 | debuglog rpc.nfsd $NFSD_PROCS |
---|
921 | |
---|
922 | # Reload exportfs anyway since starting nfs server may not do this |
---|
923 | # if it was already running |
---|
924 | exportfs -r |
---|
925 | ;; |
---|
926 | |
---|
927 | stop) |
---|
928 | msg "Stopping NFS services" |
---|
929 | debuglog $nfs stop |
---|
930 | ;; |
---|
931 | |
---|
932 | terminate) |
---|
933 | msg "Cleaning NFS services" |
---|
934 | debuglog $nfs stop |
---|
935 | debuglog $nfslock stop |
---|
936 | ;; |
---|
937 | |
---|
938 | soft-restart) |
---|
939 | debuglog rpc.nfsd 0 |
---|
940 | debuglog sleep 1 # FIX: required? |
---|
941 | debuglog rpc.nfsd $NFSD_PROCS |
---|
942 | ;; |
---|
943 | |
---|
944 | *) |
---|
945 | echo "Usage: $0 start|stop|restart" |
---|
946 | esac |
---|
947 | } |
---|
948 | |
---|
949 | ################################################################################ |
---|
950 | # NLM functions # |
---|
951 | ################################################################################ |
---|
952 | |
---|
953 | # Restart rpc.statd |
---|
954 | # Note that we don't want to issue a "nfslock restart" directly since this would |
---|
955 | # involve restarting lockd (which results in lock recovery) which we don't want |
---|
956 | restartStatd() { |
---|
957 | if [ -f /sbin/rpc.statd ]; then |
---|
958 | # Kill the statd process if exists (important for registering with portmap) |
---|
959 | debuglog kill -9 $(/sbin/pidof -x /sbin/rpc.statd) |
---|
960 | # Start the statd process |
---|
961 | debuglog /etc/init.d/nfslock start |
---|
962 | fi |
---|
963 | } |
---|
964 | |
---|
965 | # Release all locks by sending a KILL signal to kernel lockd thread |
---|
966 | resetLockd() { |
---|
967 | if [ -f /etc/init.d/nfsserver ]; then |
---|
968 | # SuSE |
---|
969 | debuglog /etc/init.d/nfslock stop |
---|
970 | else |
---|
971 | # Redhat fails to kill lockd to start grace period, so do it explicitly |
---|
972 | pid=$(ps -aef | grep -w "\[lockd\]" | awk '{print $2}') |
---|
973 | [ -n "$pid" ] && kill -9 $pid |
---|
974 | fi |
---|
975 | } |
---|
976 | |
---|
977 | # Check grace period support in kernel |
---|
978 | checkDynamicGrace() { |
---|
979 | debuglog2 mount -t nfsd nfsd /proc/fs/nfsd |
---|
980 | [ -f /proc/fs/nfsd/grace ] && return 1 |
---|
981 | debugmsg2 "Cannot find /proc/fs/nfsd/grace, will restart lockd (and reclaim all locks) for failover/failback." |
---|
982 | return 0 |
---|
983 | } |
---|
984 | |
---|
985 | # Start grace period |
---|
986 | startGrace() { |
---|
987 | checkDynamicGrace |
---|
988 | if [ $? -eq 1 ]; then |
---|
989 | echo 1 > /proc/fs/nfsd/grace |
---|
990 | else |
---|
991 | # Kernel does not support starting grace period through /proc |
---|
992 | # Only thing to do is kill lockd |
---|
993 | msg "Dynamic enabling of grace period not supported in this kernel. Restarting lockd" |
---|
994 | resetLockd |
---|
995 | fi |
---|
996 | } |
---|
997 | |
---|
998 | # Send SM_NOTIFY message to client on server restart |
---|
999 | # Usage: notify statd_dir [server IP] |
---|
1000 | notify() { |
---|
1001 | opts="" |
---|
1002 | if [ -e /sbin/sm-notify ]; then # SLES - kernel-space statd |
---|
1003 | _cp $1/sm/* /var/lib/nfs/sm |
---|
1004 | [ -n "$2" ] && opts="-m 1 -v $2" |
---|
1005 | debugmsg2 "Notify clients: " && log ls $1/sm |
---|
1006 | debuglog2 sm-notify $opts |
---|
1007 | elif [ -e /sbin/rpc.statd ]; then # RHEL - user-space statd |
---|
1008 | [ -n "$2" ] && opts="-P $2" |
---|
1009 | debugmsg2 "Notify clients: " && log ls $1/sm |
---|
1010 | debuglog2 rpc.statd -N -n $1 $opts |
---|
1011 | fi |
---|
1012 | } |
---|
1013 | |
---|
1014 | # Send SM_NOTIFY message to client on all available interfaces |
---|
1015 | # SLES9 has a bug wherein the NFS client compares the hostname in the notify |
---|
1016 | # message against the hostname it registered during mount to determine whether |
---|
1017 | # to handle SM_NOTIFY requests coming from the server. We try to work around |
---|
1018 | # the problem by (optionally) sending notify messages on combinations of |
---|
1019 | # hostnames and IP address |
---|
1020 | notifyClient() { |
---|
1021 | debugmsg2 "notify host:$1, vip:$VIP, statddir:$2" |
---|
1022 | |
---|
1023 | #debugmsg "Notify on local name" |
---|
1024 | #notify $2 |
---|
1025 | |
---|
1026 | # If VIP is specified, always send notify on the VIP |
---|
1027 | if [ -n "$VIP" ]; then |
---|
1028 | debugmsg "SM_NOTIFY clients for VIP $VIP" |
---|
1029 | notify $2 $VIP |
---|
1030 | [ $notifyfix -eq 0 ] && return |
---|
1031 | fi |
---|
1032 | |
---|
1033 | shortname=$(shortipname $1) |
---|
1034 | debugmsg "Notify for host $shortname" |
---|
1035 | notify $2 $shortname |
---|
1036 | [ $notifyfix -eq 0 ] && return |
---|
1037 | |
---|
1038 | host=$(ipname $1) |
---|
1039 | if [[ -n "$host" && "$host" != "$shortname" ]]; then |
---|
1040 | debugmsg "Notify for host.domain $host" |
---|
1041 | notify $2 $host |
---|
1042 | fi |
---|
1043 | |
---|
1044 | ip=$(ipaddr $1) |
---|
1045 | if [[ -n "$ip" && "$ip" != "$host" && "$ip" != "$shortname" ]]; then |
---|
1046 | debugmsg "Notify for IP $ip" |
---|
1047 | notify $2 $ip |
---|
1048 | fi |
---|
1049 | } |
---|
1050 | |
---|
1051 | ################################################################################ |
---|
1052 | # Failover functions # |
---|
1053 | ################################################################################ |
---|
1054 | |
---|
1055 | startReclaim() { |
---|
1056 | gpfs_ip=$1 |
---|
1057 | smdir=${SHARED_NFS}/$gpfs_ip/statd/sm |
---|
1058 | statedir=/tmp/statd |
---|
1059 | |
---|
1060 | msg "Reclaim of NLM locks initiated for node $gpfs_ip" |
---|
1061 | |
---|
1062 | _mkdir $statedir/sm $statedir/sm.bak |
---|
1063 | _cp $smdir/* $statedir/sm.bak |
---|
1064 | |
---|
1065 | if [ $iptakeover -eq 1 ]; then |
---|
1066 | for nfsip in "$(getNfsIPs $gpfs_ip)"; do |
---|
1067 | _cp $statedir/sm.bak/* $statedir/sm |
---|
1068 | notifyClient $nfsip $statedir |
---|
1069 | done |
---|
1070 | else |
---|
1071 | # get VIP from loopback |
---|
1072 | ip=$(mmgetifconf | grep -w 'lo' | awk '{print $2}') |
---|
1073 | _cp $statedir/sm.bak/* $statedir/sm |
---|
1074 | if [[ -n "$ip" && "$ip" != "$gpfs_ip" ]]; then |
---|
1075 | notifyClient $ip $statedir |
---|
1076 | fi |
---|
1077 | fi |
---|
1078 | # restore list of lock users |
---|
1079 | _cp $statedir/sm.bak/* $smdir |
---|
1080 | } |
---|
1081 | |
---|
1082 | # Find the next entry after one with node that failed and use it as the |
---|
1083 | # takeover node. If no more line wrap around to the top. Check that the node is |
---|
1084 | # up, if not use the next entry. There can be few NFS external IP address for |
---|
1085 | # each GPFS IP. |
---|
1086 | selectNode() { |
---|
1087 | failed_node=$1 |
---|
1088 | typeset -i next=0 |
---|
1089 | gpfs_ip="" |
---|
1090 | |
---|
1091 | while true; do |
---|
1092 | exec 3< $NODELIST |
---|
1093 | while read -u3 gpfs_if nfs_list; do |
---|
1094 | # Skip empty and comment lines |
---|
1095 | if invalid $gpfs_if; then |
---|
1096 | continue |
---|
1097 | fi |
---|
1098 | gpfs_ip=$(getip $gpfs_if) |
---|
1099 | debugmsg2 "selectNode: GPFS IP: $gpfs_ip, NFS IP:$nfs_list" |
---|
1100 | if [ $next -eq 1 ]; then |
---|
1101 | [ "$gpfs_ip" == "$failed_node" ] && continue |
---|
1102 | isNodeUp $gpfs_ip |
---|
1103 | if [ $? -eq 0 ]; then |
---|
1104 | debugmsg "takeover node is $gpfs_ip" |
---|
1105 | echo $gpfs_ip |
---|
1106 | return |
---|
1107 | else |
---|
1108 | debugmsg "selectNode: takeover_node $gpfs_ip is down" |
---|
1109 | continue |
---|
1110 | fi |
---|
1111 | else |
---|
1112 | [ "$gpfs_ip" == "$failed_node" ] && next=1 |
---|
1113 | fi |
---|
1114 | done |
---|
1115 | |
---|
1116 | # Didn't find a takeover node, so start from top searching for new node |
---|
1117 | debugmsg "selectNode: start from top" |
---|
1118 | exec 3< $NODELIST |
---|
1119 | while read -u3 gpfs_if nfs_list; do |
---|
1120 | # Skip empty and comment lines |
---|
1121 | if invalid $gpfs_if; then |
---|
1122 | continue |
---|
1123 | fi |
---|
1124 | gpfs_ip=$(getip $gpfs_if) |
---|
1125 | debugmsg2 "selectNode: GPFS IP: $gpfs_ip, NFS IP: $nfs_list" |
---|
1126 | if [ "$gpfs_ip" != "$failed_node" ]; then |
---|
1127 | isNodeUp $gpfs_ip |
---|
1128 | if [ $? -eq 0 ]; then |
---|
1129 | debugmsg "takeover node is $gpfs_ip" |
---|
1130 | echo $gpfs_ip |
---|
1131 | return |
---|
1132 | else |
---|
1133 | debugmsg "selectNode: takeover_node $gpfs_ip is down" |
---|
1134 | continue |
---|
1135 | fi |
---|
1136 | fi |
---|
1137 | done |
---|
1138 | sleep 10 |
---|
1139 | done |
---|
1140 | } |
---|
1141 | |
---|
1142 | selectNode2() { |
---|
1143 | failed=$1 |
---|
1144 | candidate=$(getNextNode $failed $failed) |
---|
1145 | while true; do |
---|
1146 | while [ -z "$candidate" ]; do |
---|
1147 | sleep 10 |
---|
1148 | candidate=$(getNextNode $failed $failed) |
---|
1149 | done |
---|
1150 | isNodeUp $candidate |
---|
1151 | if [ $? -eq 0 ]; then |
---|
1152 | echo $candidate |
---|
1153 | return |
---|
1154 | fi |
---|
1155 | candidate=$(getNextNode $candidate $failed) |
---|
1156 | done |
---|
1157 | } |
---|
1158 | |
---|
1159 | recoverNode() { |
---|
1160 | failed_nfs_ip=$1 |
---|
1161 | failed_gpfs_ip=$2 |
---|
1162 | debugmsg "start recoverNode $failed_nfs_ip" |
---|
1163 | [ -z "$failed_nfs_ip" ] && return 1 |
---|
1164 | |
---|
1165 | if [ -z "$(ifconfig | grep -wo $failed_nfs_ip)" ]; then |
---|
1166 | typeset -i numberOfRetries=20 attemptNo=0 |
---|
1167 | while [[ $attemptNo -lt $numberOfRetries && |
---|
1168 | "$(IPaddr $failed_nfs_ip monitor)" == "OK" ]]; do |
---|
1169 | attemptNo=$attemptNo+1 |
---|
1170 | sleep 1 |
---|
1171 | done |
---|
1172 | # call stonith exit |
---|
1173 | if [ $attemptNo -eq $numberOfRetries ]; then |
---|
1174 | if [ -e /var/mmfs/etc/stonith ]; then |
---|
1175 | debugmsg "call /var/mmfs/etc/stonith with $failed_gpfs_ip $failed_nfs_ip" |
---|
1176 | /var/mmfs/etc/stonith $failed_gpfs_ip $failed_nfs_ip |
---|
1177 | debugmsg "back from call to /var/mmfs/etc/stonith" |
---|
1178 | fi |
---|
1179 | fi |
---|
1180 | typeset -i numberOfRetries=15 attemptNo=0 |
---|
1181 | while [[ $attemptNo -lt $numberOfRetries && |
---|
1182 | "$(IPaddr $failed_nfs_ip monitor)" == "OK" ]]; do |
---|
1183 | attemptNo=$attemptNo+1 |
---|
1184 | sleep 1 |
---|
1185 | done |
---|
1186 | if [ $attemptNo -eq $numberOfRetries ]; then |
---|
1187 | # Somebody else has failed_nfs_ip - maybe the failed node is not down? |
---|
1188 | msg "Error: some other host already has address $failed_nfs_ip. Recovery will not happen." |
---|
1189 | return 1 |
---|
1190 | fi |
---|
1191 | fi |
---|
1192 | |
---|
1193 | debugmsg "recoverNode $failed_nfs_ip" |
---|
1194 | debuglog IPaddr $failed_nfs_ip start |
---|
1195 | |
---|
1196 | eth=$(mmgetifconf | grep -w $failed_nfs_ip | awk '{print $1}') |
---|
1197 | debugmsg "Checking if interface for ip $failed_nfs_ip is up" |
---|
1198 | if [ -n "$eth" ]; then |
---|
1199 | return 0 |
---|
1200 | fi |
---|
1201 | debugmsg "No interface for ip $failed_nfs_ip is up" |
---|
1202 | return 1 |
---|
1203 | } |
---|
1204 | |
---|
1205 | IPtakeover() { |
---|
1206 | me=$1 |
---|
1207 | failed=$2 |
---|
1208 | typeset -i do_reclaim=0 |
---|
1209 | |
---|
1210 | [ "$me" == "$failed" ] && return |
---|
1211 | |
---|
1212 | msg "Initiating IP takeover of $failed due to node failure" |
---|
1213 | |
---|
1214 | _mkdir ${SHARED_RECOVERY}/$me |
---|
1215 | debuglog touch ${SHARED_RECOVERY}/$me/$failed |
---|
1216 | |
---|
1217 | stopNode $failed |
---|
1218 | nfsIPs=$(getNfsIPs $failed) |
---|
1219 | debugmsg "IPtakeover ips: $nfsIPs" |
---|
1220 | for ip in $nfsIPs; do |
---|
1221 | # Takeover IP and issue gratuitous ARP to the clients for the node |
---|
1222 | # that failed so that clients can reconnect to the new address |
---|
1223 | recoverNode $ip $failed |
---|
1224 | [ $? -eq 0 ] && do_reclaim=1 |
---|
1225 | done |
---|
1226 | |
---|
1227 | if [ $do_reclaim -ne 0 ]; then |
---|
1228 | # got the IP, check if we are still the node to do takeover |
---|
1229 | if [ ! -f $SHARED_RECOVERY/$me/$failed ]; then |
---|
1230 | # drop the inteface |
---|
1231 | nfsips=$(getNfsIPs $failed) |
---|
1232 | debugmsg "Node $failed recovery canceled" |
---|
1233 | for ip in $nfsips; do |
---|
1234 | ifDown $ip |
---|
1235 | done |
---|
1236 | return |
---|
1237 | fi |
---|
1238 | else |
---|
1239 | # did not get IP, takeover failed, remove the entry |
---|
1240 | _unlink ${SHARED_RECOVERY}/$me/$failed |
---|
1241 | return |
---|
1242 | fi |
---|
1243 | debugmsg "IPtakeover: File contents:" |
---|
1244 | debugmsg $(ls -R ${SHARED_RECOVERY}/$me) |
---|
1245 | _cp ${SHARED_NFS}/$failed/statd/sm/* ${SHARED_NFS}/$me/statd/sm |
---|
1246 | checkDynamicGrace |
---|
1247 | [ $? -eq 1 ] && startReclaim $failed |
---|
1248 | } |
---|
1249 | |
---|
1250 | # Use mii-diag, mii-tool or ethtool to detect network link status |
---|
1251 | # Return 0 if link beat detected, 1 if invalid (no device), 2 if no link beat |
---|
1252 | # FIX: If none of the tools exist, return 2 since there is no way to detect |
---|
1253 | # link status. This means caller is responsible for handling the error |
---|
1254 | # correctly. |
---|
1255 | netdiag() { |
---|
1256 | # We trust mii-diag works for all interfaces; |
---|
1257 | # if it exists, return its status |
---|
1258 | tool=$(which mii-diag 2> /dev/null) |
---|
1259 | if [ -n "$tool" ]; then |
---|
1260 | debuglog2 $tool -s $eth |
---|
1261 | return $? |
---|
1262 | fi |
---|
1263 | |
---|
1264 | # mii-diag doesn't exist - try both mii-tool and ethtool |
---|
1265 | tool=$(which mii-tool 2> /dev/null) |
---|
1266 | if [ -n "$tool" ]; then |
---|
1267 | output=$($tool $eth 2> /dev/null) |
---|
1268 | if [ $? -eq 0 ]; then |
---|
1269 | status=$(echo $output | awk '{print $NF}') |
---|
1270 | [ "$status" == "ok" ] && return 0 |
---|
1271 | fi |
---|
1272 | fi |
---|
1273 | |
---|
1274 | tool=$(which ethtool 2> /dev/null) |
---|
1275 | if [ -n "$tool" ]; then |
---|
1276 | output=$($tool $eth 2> /dev/null) |
---|
1277 | if [ $? -eq 0 ]; then |
---|
1278 | status=$(echo $output | grep "Link detected" | awk '{print $NF}') |
---|
1279 | [ $status == "yes" ] && return 0 |
---|
1280 | fi |
---|
1281 | fi |
---|
1282 | |
---|
1283 | # We reach here either if no tool exists or if there is an error |
---|
1284 | return 2 |
---|
1285 | } |
---|
1286 | |
---|
1287 | # Returns 0 if ready, 1 otherwise |
---|
1288 | checkLinkStatus() { |
---|
1289 | eth=$1 |
---|
1290 | |
---|
1291 | # Check if the interface is down |
---|
1292 | debuglog2 ifconfig $eth |
---|
1293 | [ $? -ne 0 ] && return 1 |
---|
1294 | |
---|
1295 | typeset -i tries=3 n=0 |
---|
1296 | while [ $n -lt $tries ]; do |
---|
1297 | n=$n+1 |
---|
1298 | debugmsg2 "Checking link status for $eth - attempt $n" |
---|
1299 | |
---|
1300 | netdiag $eth |
---|
1301 | if [ $? -eq 0 ]; then |
---|
1302 | debugmsg2 "Tested the link for $eth, and it is connected" |
---|
1303 | return 0 |
---|
1304 | else |
---|
1305 | debugmsg2 "Tested the link for $eth, and it is NOT connected" |
---|
1306 | sleep 5 |
---|
1307 | continue |
---|
1308 | fi |
---|
1309 | done |
---|
1310 | return 1 |
---|
1311 | } |
---|
1312 | |
---|
1313 | getRecoveryNodes() { |
---|
1314 | ls -A $SHARED_RECOVERY 2> /dev/null |
---|
1315 | } |
---|
1316 | |
---|
1317 | getFailedNodes() { |
---|
1318 | ls -A $SHARED_RECOVERY/$1 2> /dev/null |
---|
1319 | } |
---|
1320 | |
---|
1321 | |
---|
1322 | IPfailback() { |
---|
1323 | # Find failover IP address |
---|
1324 | failover_ips="" |
---|
1325 | for recovery_node in $(getRecoveryNodes); do |
---|
1326 | debugmsg2 "Checking if $recovery_node is recovery node" |
---|
1327 | for failed_node in $(getFailedNodes $recovery_node); do |
---|
1328 | debugmsg "Failed node is $failed_node" |
---|
1329 | if [ "$failed_node" == "$myip" ]; then |
---|
1330 | failover_ips="$failover_ips $recovery_node" |
---|
1331 | fi |
---|
1332 | done |
---|
1333 | done |
---|
1334 | |
---|
1335 | if [ -z "$failover_ips" ]; then |
---|
1336 | debugmsg "No failback is needed" |
---|
1337 | return |
---|
1338 | fi |
---|
1339 | debugmsg "Failover nodes are $failover_ips" |
---|
1340 | |
---|
1341 | # if recovery node dosen't hold my external IP then no failback |
---|
1342 | tmp="" |
---|
1343 | nfsIPs=$(getNfsIPs $myip) |
---|
1344 | for nfsip in $nfsIPs; do |
---|
1345 | for failover_ip in $failover_ips; do |
---|
1346 | tmp=$(mmdshcmd $failover_ip "/usr/lpp/mmfs/bin/mmgetifconf | grep $nfsip") |
---|
1347 | debugmsg2 check for ip=$nfsip on $failover_ip tmp=$tmp |
---|
1348 | [ -n "$tmp" ] && break 2 |
---|
1349 | done |
---|
1350 | done |
---|
1351 | if [ -z "$tmp" ]; then |
---|
1352 | # remove my node from the recovery node list |
---|
1353 | debugmsg "IP is not in use" |
---|
1354 | else |
---|
1355 | # wait for any of the NFS IP to be up, but try all IPs at least once |
---|
1356 | let up=0 |
---|
1357 | while [ $up -eq 0 ]; do |
---|
1358 | for ip in $nfsIPs; do |
---|
1359 | iface=$(getifname $ip) |
---|
1360 | # if this is a virtual address, the interface is already up |
---|
1361 | isVirtualIP $ip |
---|
1362 | RC=$? |
---|
1363 | if [ $RC -ne 0 ]; then |
---|
1364 | debugmsg2 wait for $ip |
---|
1365 | ifcfg=$(getifcfg $ip) |
---|
1366 | |
---|
1367 | debugmsg2 "Sourcing $ifcfg" |
---|
1368 | . $ifcfg |
---|
1369 | |
---|
1370 | if [ "$BONDING_MASTER" == "yes" ]; then |
---|
1371 | ifBondUp $iface |
---|
1372 | else |
---|
1373 | debuglog ifconfig $iface up |
---|
1374 | fi |
---|
1375 | fi |
---|
1376 | checkLinkStatus $iface |
---|
1377 | if [ $? -eq 0 ]; then |
---|
1378 | debugmsg2 "Testing the link for $iface, and it is connected" |
---|
1379 | if ! arping -q -c 2 -w 3 -D -I $iface $ip ; then |
---|
1380 | debugmsg2 $ip is ready |
---|
1381 | up=1 |
---|
1382 | fi |
---|
1383 | fi |
---|
1384 | [ $RC -ne 0 ] && ifconfig $iface down |
---|
1385 | done |
---|
1386 | [ $up -eq 0 ] && sleep 5 |
---|
1387 | done |
---|
1388 | fi |
---|
1389 | |
---|
1390 | # remove my node from the recovery node list |
---|
1391 | debuglog rm $SHARED_RECOVERY/*/$myip > /dev/null 2>&1 |
---|
1392 | |
---|
1393 | # save list of lock users for |
---|
1394 | debugmsg "Failover IP is $failover_ip" |
---|
1395 | smdir=/tmp/statd/sm |
---|
1396 | _mkdir $smdir ${smdir}.bak |
---|
1397 | _cp ${SHARED_NFS}/$failover_ip/statd/sm/* ${smdir}.bak |
---|
1398 | |
---|
1399 | # Get recovery node to free my IP addresses |
---|
1400 | mmdshAll "/var/mmfs/etc/nfsgrace $myip" >> $LOGFILE 2>&1 |
---|
1401 | |
---|
1402 | checkDynamicGrace |
---|
1403 | if [ $? -eq 1 ]; then |
---|
1404 | mmdshcmd $failover_ip "/var/mmfs/etc/nfsnodeback $failover_ip $myip" >> $LOGFILE 2>&1 |
---|
1405 | else |
---|
1406 | gpfsIPs=$(getAllGPFSIPs) |
---|
1407 | for ip in $gpfsIPs; do |
---|
1408 | debugmsg2 "ip $ip, myip $ip" |
---|
1409 | if [ "$ip" != "$myip" ]; then |
---|
1410 | mmdshcmd $ip "/var/mmfs/etc/nfsnodeback $ip $failover_ip $myip" >> $LOGFILE 2>&1 |
---|
1411 | fi |
---|
1412 | done |
---|
1413 | fi |
---|
1414 | } |
---|
1415 | |
---|