source: gpfs_3.1_ker2.6.20/lpp/mmfs/bin/mmgetstate @ 177

Last change on this file since 177 was 16, checked in by rock, 17 years ago
  • Property svn:executable set to *
File size: 16.9 KB
Line 
1#!/bin/ksh
2# IBM_PROLOG_BEGIN_TAG
3# This is an automatically generated prolog.
4
5
6
7# Licensed Materials - Property of IBM
8
9# (C) COPYRIGHT International Business Machines Corp. 2001,2006
10# All Rights Reserved
11
12# US Government Users Restricted Rights - Use, duplication or
13# disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
14
15# IBM_PROLOG_END_TAG
16# @(#)40 1.21 src/avs/fs/mmfs/ts/admin/mmgetstate.sh, mmfs, avs_rgpfs24, rgpfs240610b 12/13/05 01:47:25
17################################################################################
18#
19# Determine the state of the GPFS daemon:  down, arbitrating (initializing),
20# active, etc.  Optionally, display the quorum value, the number of nodes up,
21# and other daemon-related information.
22#
23# Usage:
24#
25#   mmgetstate [-L] [-s] [-v]
26#              [-a | -N {Node[,Node...] | NodeFile | NodeClass}]
27#
28# where:
29#
30#   -L            Display detailed information including quorum value,
31#                 number of nodes up, total number of nodes, etc.
32#
33#   -s            Display summary information: number of local and remote nodes
34#                 that have joined in the cluster, number of quorum nodes, etc.
35#
36#   -v            Display intermediate error messages.
37#
38#   -a            Check all nodes in the GPFS cluster.
39#
40#   -N Node,Node,...  Specify the nodes whose state is desired.
41#   -N NodeFile       NodeClass may be one of several possible node classes
42#   -N NodeClass      (e.g., quorumnodes, managernodes, nsdnodes, etc.)
43#
44#
45# If not explicitly specified otherwise, only the local node is checked.
46#
47#
48# The state of a node can be one of the following:
49#
50#   "arbitrating"   Node is trying to form a quorum with other available
51#                   nodes or trying to acquire the disk fence if single-node
52#                   quorum semantics are in effect.
53#
54#   "active"        GPFS is active and ready for normal operation.
55#
56#   "down"          The GPFS daemon is not running on the node.
57#
58#   "unknown"       Unknown state or error condition.
59#
60# When the state is "down" or "unknown" the values for quorum and nodes up
61# are shown as zero.
62#
63#
64# Obsolete options:
65#
66#   -C NodesetId  Check all nodes in the specified nodeset.
67#                 Assumed to be the same as -a.
68#
69#   -W NodeFile   Check all nodes whose reliable hostnames
70#                 are listed one per line in NodeFilename.
71#                 Cannot be specified with -a, -N, -w, or -n.
72#
73#   -w nodenames  Check all nodes whose reliable hostnames
74#                 are in the comma-separated nodenames list.
75#                 Cannot be specified with -a, -N or -W.
76#                 If both -w and -n are specified, the lists are combined.
77#
78#   -n nodenums   Check all nodes whose node numbers are
79#                 in the comma-separated nodenums list.
80#                 Cannot be specified with -a, -N, or -W.
81#                 If both -w and -n are specified, the lists are combined.
82#
83#   -k            Suppress headers and display the output in colon-separated
84#                 fields format.  Implies -L.  The output has the following
85#                 format:
86#   gpfs:nodeNumber:nodeName:quorumValue:nodesUp:totalNodes:state:remarks
87#
88################################################################################
89
90# Include global declarations and service routines.
91. /usr/lpp/mmfs/bin/mmglobfuncs
92. /usr/lpp/mmfs/bin/mmsdrfsdef
93
94sourceFile="mmgetstate.sh"
95[[ -n $DEBUG || -n $DEBUGmmgetstate ]] && set -x
96$mmTRACE_ENTER "$*"
97
98
99# Local variables
100
101usageMsg=449
102typeset -i nodes=0
103rc=0
104underline="----------------------------------------------------------"
105underline="${underline}----------------------------------------------"
106
107
108# Local functions
109
110
111######################################################################
112#
113# Function:  Scan the errMsg file and display on stderr all messages
114#            that have originated from the specified node.
115#
116# Input:     $1 - name of the originating node
117#
118# Output:    Error messages (if any).
119#
120# Returns:   Always zero.
121#
122######################################################################
123function showErrorMessages  # <nodeName>
124{
125  typeset sourceFile="mmgetstate.sh"
126  [[ -n $DEBUG || -n $DEBUGshowErrorMessages ]] && set -x
127  $mmTRACE_ENTER "$*"
128  typeset nodeName=$1
129  typeset errLine
130
131  [[ ! -s $errMsg ]] && return 0
132
133  [[ -n $nodeName ]] && nodeName="${nodeName}:"
134
135  $rm -f $errMsg2
136  exec 5<&-
137  exec 5< $errMsg
138  while read -u5 errLine
139  do
140    set -f ; set -- $errLine ; set +f
141    currentName=$1
142
143    # If the line came from the node that we are interested in
144    # and there is a message, print it out.
145    if [[ $currentName = $nodeName ]]
146    then
147      [[ -n $2 ]] && print -u2 "$errLine"
148      continue
149    fi
150
151    # Save the other lines for later processing.
152    print -- "$errLine" >> $errMsg2
153
154  done  # end of while read -u5 errLine
155
156  # Prepare the file for the next iteration.
157  $mv $errMsg2 $errMsg
158
159  return 0
160
161}  #----- end of function showErrorMessages  ----------------
162
163
164######################################################################
165#
166# Function:  Display the result from tsctl clusterNodeInfo issued
167#            on a node that has joined the cluster.  Note that the
168#            tsctl command will work even before the node has joined
169#            the cluster but all counters will return zero.
170#
171# Input:     None.
172#
173# Output:    Summary information from tsctl clusterNodeInfo.
174#
175# Returns:   0 - no errors encountered.
176#            1 - unexpected error or none of the nodes have joined
177#                the cluster yet.
178#
179######################################################################
180function showSummaryCounters  #
181{
182  typeset sourceFile="mmgetstate.sh"
183  [[ -n $DEBUG || -n $DEBUGshowSummaryCounters ]] && set -x
184  $mmTRACE_ENTER "$*"
185
186  typeset nodeName
187  typeset rc=0
188
189
190  #-----------------------------------------------------------------------------
191  # The tsctl clusterNodeInfo command must be issued on a node that has joined
192  # the cluster.  If it is issued on a node on which GPFS has started, but has
193  # not joined the cluster yet, the command will work but all counters will be
194  # set to zero.  Note that mmcommon onactive can not be used here because it
195  # bypasses nodes that are waiting for quorum while the tsctl clusterNodeInfo
196  # command will work on such nodes.
197  #-----------------------------------------------------------------------------
198
199  # If the -a option was specified, we already have a file
200  # with the reliable names of the nodes in the cluster.
201  # Otherwise, create the file now.
202  [[ -z $aflag ]] &&  \
203    getNodeList $REL_HOSTNAME_Field $GLOBAL_ID $mmsdrfsFile > $nodefile
204
205  # Try the nodes one by one until the command is executed somewhere.
206  # Always start with the local node first.
207  for nodeName in $(print -- "$ourNodeName" ; $grep -v -w "$ourNodeName" $nodefile)
208  do
209    run on1 $nodeName adminCmd tsctl clusterNodeInfo > $tmpfile 2> $errMsg2
210    rc=$(remapRC $?)
211
212    # If acceptable error (daemon down, node has not joined, etc.),
213    # try to find some other node to run the command.  Otherwise,
214    # the command was executed and either succeeded or failed.
215    if [[ $rc -eq $MM_DaemonDown || $rc -eq $MM_NotJoined || $rc -eq $MM_QuorumWait ]]
216    then
217      continue
218    else
219      break
220    fi
221  done  # end of for nodeName in ...
222
223  # Display header "Summary information:"
224  header=$(printInfoMsg 405)
225  printf "\n%s\n%.${#header}s\n" "$header" "$underline"
226
227  # Examine the result from the tsctl call.
228  if [[ $rc -eq 0 ]]
229  then
230    # Things must have worked.  Show the results.
231    $cat $tmpfile
232    print -- ""   # Output a blank separator line.
233
234  elif [[ $rc -eq $MM_DaemonDown || $rc -eq $MM_NotJoined || $rc -eq $MM_QuorumWait ]]
235  then
236    # Either all nodes are down, or the daemon is down on all nodes,
237    # or none of the nodes have joined the cluster.
238    printErrorMsg 354 $mmcmd
239
240  else 
241    # Unexpected error.
242    [[ -s $errMsg2 ]] && $cat $errMsg2 1>&2
243    printErrorMsg 171 "$mmcmd" "tsctl clusterNodeInfo on node $nodeName" $rc
244  fi  # end of if [[ $rc -eq 0 ]]
245
246  $rm -f $errMsg2
247  return $rc
248
249}  #----- end of function showSummaryCounters  ----------------
250
251
252#######################
253# Mainline processing
254#######################
255
256##################################
257# Process each of the arguments.
258##################################
259[[ $arg1 = '-?' || $arg1 = '-h' || $arg1 = '--help' || $arg1 = '--' ]] &&  \
260  syntaxError "help" $usageMsg
261
262while getopts :aC:kLn:N:svw:W: OPT
263do
264  case $OPT in
265
266    a) [[ -n $aflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
267       aflag="-$OPT"
268       all="all"
269       [[ -n $Cflag || -n $Wflag || -n $wflag || -n $nflag || -n $Nflag ]] &&  \
270         syntaxError "invalidCombination"  \
271           $usageMsg $aflag $Cflag $Wflag $wflag $nflag $Nflag
272       ;;
273
274    C) # syntaxError "obsoleteOption" $usageMsg "-$OPT"
275       [[ -n $Cflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
276       Cflag="-$OPT"
277       nodesetId="$OPTARG"
278       [[ -n $aflag || -n $Wflag || -n $wflag || -n $nflag || -n $Nflag ]] &&  \
279         syntaxError "invalidCombination"  \
280           $usageMsg $Cflag $aflag $Wflag $wflag $nflag $Nflag
281       ;;
282
283    k) [[ -n $kflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
284       kflag="-$OPT"
285       ;;
286
287    L) [[ -n $Lflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
288       Lflag="-$OPT"
289       ;;
290
291    n) [[ -n $nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
292       nflag="-$OPT"
293       nodenums="$OPTARG"
294       [[ -n $Cflag || -n $Wflag || -n $aflag || -n $Nflag ]] &&  \
295         syntaxError "invalidCombination"  \
296           $usageMsg $nflag $Cflag $Wflag $aflag $Nflag
297       ;;
298
299    N) [[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
300       Nflag="-$OPT"
301       nodenames="$OPTARG"
302       [[ -n $Cflag || -n $Wflag || -n $aflag || -n $nflag || -n $wflag ]] &&  \
303         syntaxError "invalidCombination"  \
304           $usageMsg $Nflag $Cflag $Wflag $aflag $nflag $wflag
305       ;;
306
307    s) [[ -n $sflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
308       sflag="-$OPT"
309       ;;
310
311    v) [[ -n $vflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
312       vflag="-$OPT"
313       ;;
314
315    w) [[ -n $wflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
316       wflag="-$OPT"
317       nodenames="$OPTARG"
318       [[ -n $Cflag || -n $Wflag || -n $aflag || -n $Nflag ]] &&  \
319         syntaxError "invalidCombination"  \
320           $usageMsg $wflag $Cflag $Wflag $aflag $Nflag
321       ;;
322
323    W) [[ -n $Wflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
324       Wflag="-$OPT"
325       Wcoll="$OPTARG"
326       [[ -n $Cflag || -n $aflag || -n $wflag || -n $nflag || -n $Nflag ]] &&  \
327         syntaxError "invalidCombination"  \
328           $usageMsg $Wflag $Cflag $aflag $wflag $nflag $Nflag
329       ;;
330
331    :) syntaxError "missingValue" $usageMsg $OPTARG
332       ;;
333
334    +[aCkLnNsvwW])
335       syntaxError "invalidOption" $usageMsg "$OPT"
336       ;;
337
338    *) syntaxError "invalidOption" $usageMsg $OPTARG
339       ;;
340
341  esac
342done  # end of while getopts :aC:kLn:N:vw:W: OPT do
343
344shift OPTIND-1
345[[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1
346
347# -k implies -L as well.
348[[ -n $kflag ]] && Lflag="-L"
349
350# Rather than fail, convert the obsolete -C option to -a.
351[[ -n $Cflag ]] && aflag="-a"
352
353
354########################################################################
355# Set up trap exception handling and call the gpfsInit function.
356# It will ensure that the local copy of the mmsdrfs and the rest of the
357# GPFS system files are up-to-date.  There is no need to lock the sdr.
358########################################################################
359trap pretrap2 HUP INT QUIT KILL
360gpfsInitOutput=$(gpfsInit nolock)
361setGlobalVar $? $gpfsInitOutput
362
363
364#######################################################
365# Create a file containing all of the specified nodes.
366#######################################################
367if [[ -n $aflag ]]
368then
369  # Get a list of the nodes.
370  getNodeList $REL_HOSTNAME_Field $GLOBAL_ID $mmsdrfsFile > $nodefile
371
372elif [[ -n $Nflag ]]
373then
374  # Convert the passed data into a file containing admin node names.
375  createVerifiedNodefile $nodenames $REL_HOSTNAME_Field no $nodefile
376  [[ $? -ne 0 ]] && cleanupAndExit
377
378elif [[ -n $Wflag ]]
379then
380  # Verify the input file is readable.
381  if [[ ! -f $Wcoll || ! -r $Wcoll ]]
382  then
383    printErrorMsg 43 $mmcmd $Wcoll
384    cleanupAndExit
385  fi
386
387  # Filter out comment lines and localhost entries.
388  $grep -v -e "localhost" -e "^#" "$Wcoll" > $tmpfile
389
390  # Convert any entries in the node file into admin node names.
391  if [[ -s $tmpfile ]]
392  then
393    createVerifiedNodefile $tmpfile $REL_HOSTNAME_Field no $nodefile
394    [[ $? -ne 0 ]] && cleanupAndExit
395  else
396    # No node names were specified.
397    printErrorMsg 328 $mmcmd $Wcoll
398    cleanupAndExit
399  fi
400
401else
402  # Either no option was specified, or we have some combination of -w and -n.
403
404  # Convert the node names list (if any) into a file.
405  $rm -f $tmpfile
406  if [[ -n $nodenames ]]
407  then
408    for i in $(print $nodenames | $tr "," " ")
409    do
410      print -- "$i" >> $tmpfile
411    done
412  fi
413
414  # Append the node number list (if any) to the node file.
415  if [[ -n $nodenums ]]
416  then
417    for i in $(print $nodenums | $tr "," " ")
418    do
419      print -- "$i" >> $tmpfile
420    done
421  fi
422
423  # Convert the entries in the node file into admin node names.
424  if [[ -s $tmpfile ]]
425  then
426    createVerifiedNodefile $tmpfile $REL_HOSTNAME_Field no $nodefile
427    [[ $? -ne 0 ]] && cleanupAndExit
428  fi
429
430fi  # end of if [[ -n $aflag ]]
431
432
433# Ensure we have the proper credentials.
434[[ $getCredCalled = no ]] && getCred
435
436
437################################
438# Get the quorum state.
439################################
440$rm -f $tmpfile $tmpfile2 $errMsg
441if [[ ! -s $nodefile ]]
442then
443  # This is a request to query the local node only.
444  mmremoteOutput=$($mmremote mmGetState $Lflag 2>$errMsg)
445  rc=$?
446  [[ -n $mmremoteOutput ]] &&  \
447    print "${ourNodeName}: ${mmremoteOutput}" > $tmpfile
448else
449  # This is a request to query other nodes as well.
450  $mmcommon onall $nodefile $unreachedNodes mmGetState $Lflag > $tmpfile2 2>&1
451  rc=$?
452
453  # Split the results into two files:  the first will contain only
454  # the lines with the GPFS state information.  The second file
455  # will contain all error messages.
456  $grep    "mmGetState:" $tmpfile2 > $tmpfile
457  $grep -v "mmGetState:" $tmpfile2 > $errMsg
458
459  # Sort the results based on nodesetId and node number.
460  $sort -t: -k 3,3 -k 4,4n $tmpfile -o $tmpfile
461fi  # end of if [[ ! -s $nodefile ]]
462
463if [[ ! -s $tmpfile ]]
464then
465  # We didn't get anything back.  Give up.
466  [[ $rc -eq 0 ]] && rc=1
467  [[ -s $errMsg ]] && cat $errMsg 1>&2
468  # Command failed.
469  printErrorMsg 389 $mmcmd
470  cleanupAndExit
471fi
472
473
474################################
475# Display the results.
476################################
477
478# Print the appropriate header line.
479# This depends on the specified formatting option.
480if [[ -z $kflag ]]
481then
482  if [[ -n $Lflag ]]
483  then
484    # "Node number  Node name  Quorum  Nodes up  Total nodes  GPFS state  Remarks"
485    header=$(printInfoMsg 496)
486    printf "\n%s\n%.${#header}s\n" "$header" "$underline"
487
488  else
489    # "Node number  Node name  GPFS state"
490    header=$(printInfoMsg 497)
491    printf "\n%s\n%.${#header}s\n" "$header" "$underline"
492  fi
493fi  # end of if [[ -z $kflag ]]
494
495# Process the output from the mmGetState call.
496IFS=":"
497exec 3<&-
498exec 3< $tmpfile
499while read -u3 inLine
500do
501  # Parse the line.
502  set -f ; set -- $inLine ; set +f
503  nodeName=$1
504  magicWord=${2# }
505  nodeset=$3
506  nodeNumber=$4
507  shortName=$5
508  quorum=$6
509  nodesUp=$7
510  nodesTotal=$8
511  state=$9
512  quorumDesignation=${10}
513  IFS="$IFS_sv"
514
515  # Set the remarks field based upon the quorum designation value.
516  if [[ $quorumDesignation = $quorumNode ]]
517  then
518    remarks=$(printInfoMsg 431)  # "quorum node"
519  else
520    remarks=""
521  fi
522
523  # If verbose output requested, show relevent error messages.
524  [[ -n $vflag ]] && showErrorMessages $nodeName
525
526  # Print the desired information.
527  if [[ -n $kflag ]]
528  then
529    # Colon-separated output was requested.
530    print "gpfs:$nodeNumber:$shortName:$quorum:$nodesUp:$nodesTotal:$state:$remarks"
531
532  elif [[ -n $Lflag ]]
533  then
534    # Extended information was requested.
535    printf "%8s %4s %-16s %3s %8s %10s %5s %-7s %3s %s\n"  \
536           "$nodeNumber" "$BLANKchar" "$shortName" "$quorum" "$nodesUp"  \
537           "$nodesTotal" "$BLANKchar" "$state" "$BLANKchar" "$remarks"
538
539  else
540    # Default information was requested.
541    printf "%8s %4s %-16s %s\n"  \
542           "$nodeNumber" "$BLANKchar" "$shortName" "$state"
543  fi
544
545 IFS=":"  # Change the separator back to ":" for the next iteration.
546
547done  # end while read -u3 diskLine
548
549IFS="$IFS_sv"  # Restore the default IFS settings.
550
551# If verbose output requested, and there are still undisplayed errors,
552# show them to the user now.
553[[ -s $errMsg && -n $vflag ]] &&  \
554  $cat $errMsg  1>&2
555
556# If summary statistics were requested, get them from the daemon.
557if [[ -n $sflag ]]
558then
559  showSummaryCounters
560  rc=$?
561fi
562
563# If any nodes could not be reached, tell the user which ones.
564if [[ -s $unreachedNodes ]]
565then
566  # The following nodes could not be reached: . . .
567  printErrorMsg 270 $mmcmd
568  $cat $unreachedNodes 1>&2
569fi
570
571cleanupAndExit $rc
572
Note: See TracBrowser for help on using the repository browser.