#!/bin/ksh # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # # # Licensed Materials - Property of IBM # # (C) COPYRIGHT International Business Machines Corp. 2001,2006 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#)40 1.21 src/avs/fs/mmfs/ts/admin/mmgetstate.sh, mmfs, avs_rgpfs24, rgpfs240610b 12/13/05 01:47:25 ################################################################################ # # Determine the state of the GPFS daemon: down, arbitrating (initializing), # active, etc. Optionally, display the quorum value, the number of nodes up, # and other daemon-related information. # # Usage: # # mmgetstate [-L] [-s] [-v] # [-a | -N {Node[,Node...] | NodeFile | NodeClass}] # # where: # # -L Display detailed information including quorum value, # number of nodes up, total number of nodes, etc. # # -s Display summary information: number of local and remote nodes # that have joined in the cluster, number of quorum nodes, etc. # # -v Display intermediate error messages. # # -a Check all nodes in the GPFS cluster. # # -N Node,Node,... Specify the nodes whose state is desired. # -N NodeFile NodeClass may be one of several possible node classes # -N NodeClass (e.g., quorumnodes, managernodes, nsdnodes, etc.) # # # If not explicitly specified otherwise, only the local node is checked. # # # The state of a node can be one of the following: # # "arbitrating" Node is trying to form a quorum with other available # nodes or trying to acquire the disk fence if single-node # quorum semantics are in effect. # # "active" GPFS is active and ready for normal operation. # # "down" The GPFS daemon is not running on the node. # # "unknown" Unknown state or error condition. # # When the state is "down" or "unknown" the values for quorum and nodes up # are shown as zero. # # # Obsolete options: # # -C NodesetId Check all nodes in the specified nodeset. # Assumed to be the same as -a. # # -W NodeFile Check all nodes whose reliable hostnames # are listed one per line in NodeFilename. # Cannot be specified with -a, -N, -w, or -n. # # -w nodenames Check all nodes whose reliable hostnames # are in the comma-separated nodenames list. # Cannot be specified with -a, -N or -W. # If both -w and -n are specified, the lists are combined. # # -n nodenums Check all nodes whose node numbers are # in the comma-separated nodenums list. # Cannot be specified with -a, -N, or -W. # If both -w and -n are specified, the lists are combined. # # -k Suppress headers and display the output in colon-separated # fields format. Implies -L. The output has the following # format: # gpfs:nodeNumber:nodeName:quorumValue:nodesUp:totalNodes:state:remarks # ################################################################################ # Include global declarations and service routines. . /usr/lpp/mmfs/bin/mmglobfuncs . /usr/lpp/mmfs/bin/mmsdrfsdef sourceFile="mmgetstate.sh" [[ -n $DEBUG || -n $DEBUGmmgetstate ]] && set -x $mmTRACE_ENTER "$*" # Local variables usageMsg=449 typeset -i nodes=0 rc=0 underline="----------------------------------------------------------" underline="${underline}----------------------------------------------" # Local functions ###################################################################### # # Function: Scan the errMsg file and display on stderr all messages # that have originated from the specified node. # # Input: $1 - name of the originating node # # Output: Error messages (if any). # # Returns: Always zero. # ###################################################################### function showErrorMessages # { typeset sourceFile="mmgetstate.sh" [[ -n $DEBUG || -n $DEBUGshowErrorMessages ]] && set -x $mmTRACE_ENTER "$*" typeset nodeName=$1 typeset errLine [[ ! -s $errMsg ]] && return 0 [[ -n $nodeName ]] && nodeName="${nodeName}:" $rm -f $errMsg2 exec 5<&- exec 5< $errMsg while read -u5 errLine do set -f ; set -- $errLine ; set +f currentName=$1 # If the line came from the node that we are interested in # and there is a message, print it out. if [[ $currentName = $nodeName ]] then [[ -n $2 ]] && print -u2 "$errLine" continue fi # Save the other lines for later processing. print -- "$errLine" >> $errMsg2 done # end of while read -u5 errLine # Prepare the file for the next iteration. $mv $errMsg2 $errMsg return 0 } #----- end of function showErrorMessages ---------------- ###################################################################### # # Function: Display the result from tsctl clusterNodeInfo issued # on a node that has joined the cluster. Note that the # tsctl command will work even before the node has joined # the cluster but all counters will return zero. # # Input: None. # # Output: Summary information from tsctl clusterNodeInfo. # # Returns: 0 - no errors encountered. # 1 - unexpected error or none of the nodes have joined # the cluster yet. # ###################################################################### function showSummaryCounters # { typeset sourceFile="mmgetstate.sh" [[ -n $DEBUG || -n $DEBUGshowSummaryCounters ]] && set -x $mmTRACE_ENTER "$*" typeset nodeName typeset rc=0 #----------------------------------------------------------------------------- # The tsctl clusterNodeInfo command must be issued on a node that has joined # the cluster. If it is issued on a node on which GPFS has started, but has # not joined the cluster yet, the command will work but all counters will be # set to zero. Note that mmcommon onactive can not be used here because it # bypasses nodes that are waiting for quorum while the tsctl clusterNodeInfo # command will work on such nodes. #----------------------------------------------------------------------------- # If the -a option was specified, we already have a file # with the reliable names of the nodes in the cluster. # Otherwise, create the file now. [[ -z $aflag ]] && \ getNodeList $REL_HOSTNAME_Field $GLOBAL_ID $mmsdrfsFile > $nodefile # Try the nodes one by one until the command is executed somewhere. # Always start with the local node first. for nodeName in $(print -- "$ourNodeName" ; $grep -v -w "$ourNodeName" $nodefile) do run on1 $nodeName adminCmd tsctl clusterNodeInfo > $tmpfile 2> $errMsg2 rc=$(remapRC $?) # If acceptable error (daemon down, node has not joined, etc.), # try to find some other node to run the command. Otherwise, # the command was executed and either succeeded or failed. if [[ $rc -eq $MM_DaemonDown || $rc -eq $MM_NotJoined || $rc -eq $MM_QuorumWait ]] then continue else break fi done # end of for nodeName in ... # Display header "Summary information:" header=$(printInfoMsg 405) printf "\n%s\n%.${#header}s\n" "$header" "$underline" # Examine the result from the tsctl call. if [[ $rc -eq 0 ]] then # Things must have worked. Show the results. $cat $tmpfile print -- "" # Output a blank separator line. elif [[ $rc -eq $MM_DaemonDown || $rc -eq $MM_NotJoined || $rc -eq $MM_QuorumWait ]] then # Either all nodes are down, or the daemon is down on all nodes, # or none of the nodes have joined the cluster. printErrorMsg 354 $mmcmd else # Unexpected error. [[ -s $errMsg2 ]] && $cat $errMsg2 1>&2 printErrorMsg 171 "$mmcmd" "tsctl clusterNodeInfo on node $nodeName" $rc fi # end of if [[ $rc -eq 0 ]] $rm -f $errMsg2 return $rc } #----- end of function showSummaryCounters ---------------- ####################### # Mainline processing ####################### ################################## # Process each of the arguments. ################################## [[ $arg1 = '-?' || $arg1 = '-h' || $arg1 = '--help' || $arg1 = '--' ]] && \ syntaxError "help" $usageMsg while getopts :aC:kLn:N:svw:W: OPT do case $OPT in a) [[ -n $aflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" aflag="-$OPT" all="all" [[ -n $Cflag || -n $Wflag || -n $wflag || -n $nflag || -n $Nflag ]] && \ syntaxError "invalidCombination" \ $usageMsg $aflag $Cflag $Wflag $wflag $nflag $Nflag ;; C) # syntaxError "obsoleteOption" $usageMsg "-$OPT" [[ -n $Cflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" Cflag="-$OPT" nodesetId="$OPTARG" [[ -n $aflag || -n $Wflag || -n $wflag || -n $nflag || -n $Nflag ]] && \ syntaxError "invalidCombination" \ $usageMsg $Cflag $aflag $Wflag $wflag $nflag $Nflag ;; k) [[ -n $kflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" kflag="-$OPT" ;; L) [[ -n $Lflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" Lflag="-$OPT" ;; n) [[ -n $nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" nflag="-$OPT" nodenums="$OPTARG" [[ -n $Cflag || -n $Wflag || -n $aflag || -n $Nflag ]] && \ syntaxError "invalidCombination" \ $usageMsg $nflag $Cflag $Wflag $aflag $Nflag ;; N) [[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" Nflag="-$OPT" nodenames="$OPTARG" [[ -n $Cflag || -n $Wflag || -n $aflag || -n $nflag || -n $wflag ]] && \ syntaxError "invalidCombination" \ $usageMsg $Nflag $Cflag $Wflag $aflag $nflag $wflag ;; s) [[ -n $sflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" sflag="-$OPT" ;; v) [[ -n $vflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" vflag="-$OPT" ;; w) [[ -n $wflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" wflag="-$OPT" nodenames="$OPTARG" [[ -n $Cflag || -n $Wflag || -n $aflag || -n $Nflag ]] && \ syntaxError "invalidCombination" \ $usageMsg $wflag $Cflag $Wflag $aflag $Nflag ;; W) [[ -n $Wflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" Wflag="-$OPT" Wcoll="$OPTARG" [[ -n $Cflag || -n $aflag || -n $wflag || -n $nflag || -n $Nflag ]] && \ syntaxError "invalidCombination" \ $usageMsg $Wflag $Cflag $aflag $wflag $nflag $Nflag ;; :) syntaxError "missingValue" $usageMsg $OPTARG ;; +[aCkLnNsvwW]) syntaxError "invalidOption" $usageMsg "$OPT" ;; *) syntaxError "invalidOption" $usageMsg $OPTARG ;; esac done # end of while getopts :aC:kLn:N:vw:W: OPT do shift OPTIND-1 [[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1 # -k implies -L as well. [[ -n $kflag ]] && Lflag="-L" # Rather than fail, convert the obsolete -C option to -a. [[ -n $Cflag ]] && aflag="-a" ######################################################################## # Set up trap exception handling and call the gpfsInit function. # It will ensure that the local copy of the mmsdrfs and the rest of the # GPFS system files are up-to-date. There is no need to lock the sdr. ######################################################################## trap pretrap2 HUP INT QUIT KILL gpfsInitOutput=$(gpfsInit nolock) setGlobalVar $? $gpfsInitOutput ####################################################### # Create a file containing all of the specified nodes. ####################################################### if [[ -n $aflag ]] then # Get a list of the nodes. getNodeList $REL_HOSTNAME_Field $GLOBAL_ID $mmsdrfsFile > $nodefile elif [[ -n $Nflag ]] then # Convert the passed data into a file containing admin node names. createVerifiedNodefile $nodenames $REL_HOSTNAME_Field no $nodefile [[ $? -ne 0 ]] && cleanupAndExit elif [[ -n $Wflag ]] then # Verify the input file is readable. if [[ ! -f $Wcoll || ! -r $Wcoll ]] then printErrorMsg 43 $mmcmd $Wcoll cleanupAndExit fi # Filter out comment lines and localhost entries. $grep -v -e "localhost" -e "^#" "$Wcoll" > $tmpfile # Convert any entries in the node file into admin node names. if [[ -s $tmpfile ]] then createVerifiedNodefile $tmpfile $REL_HOSTNAME_Field no $nodefile [[ $? -ne 0 ]] && cleanupAndExit else # No node names were specified. printErrorMsg 328 $mmcmd $Wcoll cleanupAndExit fi else # Either no option was specified, or we have some combination of -w and -n. # Convert the node names list (if any) into a file. $rm -f $tmpfile if [[ -n $nodenames ]] then for i in $(print $nodenames | $tr "," " ") do print -- "$i" >> $tmpfile done fi # Append the node number list (if any) to the node file. if [[ -n $nodenums ]] then for i in $(print $nodenums | $tr "," " ") do print -- "$i" >> $tmpfile done fi # Convert the entries in the node file into admin node names. if [[ -s $tmpfile ]] then createVerifiedNodefile $tmpfile $REL_HOSTNAME_Field no $nodefile [[ $? -ne 0 ]] && cleanupAndExit fi fi # end of if [[ -n $aflag ]] # Ensure we have the proper credentials. [[ $getCredCalled = no ]] && getCred ################################ # Get the quorum state. ################################ $rm -f $tmpfile $tmpfile2 $errMsg if [[ ! -s $nodefile ]] then # This is a request to query the local node only. mmremoteOutput=$($mmremote mmGetState $Lflag 2>$errMsg) rc=$? [[ -n $mmremoteOutput ]] && \ print "${ourNodeName}: ${mmremoteOutput}" > $tmpfile else # This is a request to query other nodes as well. $mmcommon onall $nodefile $unreachedNodes mmGetState $Lflag > $tmpfile2 2>&1 rc=$? # Split the results into two files: the first will contain only # the lines with the GPFS state information. The second file # will contain all error messages. $grep "mmGetState:" $tmpfile2 > $tmpfile $grep -v "mmGetState:" $tmpfile2 > $errMsg # Sort the results based on nodesetId and node number. $sort -t: -k 3,3 -k 4,4n $tmpfile -o $tmpfile fi # end of if [[ ! -s $nodefile ]] if [[ ! -s $tmpfile ]] then # We didn't get anything back. Give up. [[ $rc -eq 0 ]] && rc=1 [[ -s $errMsg ]] && cat $errMsg 1>&2 # Command failed. printErrorMsg 389 $mmcmd cleanupAndExit fi ################################ # Display the results. ################################ # Print the appropriate header line. # This depends on the specified formatting option. if [[ -z $kflag ]] then if [[ -n $Lflag ]] then # "Node number Node name Quorum Nodes up Total nodes GPFS state Remarks" header=$(printInfoMsg 496) printf "\n%s\n%.${#header}s\n" "$header" "$underline" else # "Node number Node name GPFS state" header=$(printInfoMsg 497) printf "\n%s\n%.${#header}s\n" "$header" "$underline" fi fi # end of if [[ -z $kflag ]] # Process the output from the mmGetState call. IFS=":" exec 3<&- exec 3< $tmpfile while read -u3 inLine do # Parse the line. set -f ; set -- $inLine ; set +f nodeName=$1 magicWord=${2# } nodeset=$3 nodeNumber=$4 shortName=$5 quorum=$6 nodesUp=$7 nodesTotal=$8 state=$9 quorumDesignation=${10} IFS="$IFS_sv" # Set the remarks field based upon the quorum designation value. if [[ $quorumDesignation = $quorumNode ]] then remarks=$(printInfoMsg 431) # "quorum node" else remarks="" fi # If verbose output requested, show relevent error messages. [[ -n $vflag ]] && showErrorMessages $nodeName # Print the desired information. if [[ -n $kflag ]] then # Colon-separated output was requested. print "gpfs:$nodeNumber:$shortName:$quorum:$nodesUp:$nodesTotal:$state:$remarks" elif [[ -n $Lflag ]] then # Extended information was requested. printf "%8s %4s %-16s %3s %8s %10s %5s %-7s %3s %s\n" \ "$nodeNumber" "$BLANKchar" "$shortName" "$quorum" "$nodesUp" \ "$nodesTotal" "$BLANKchar" "$state" "$BLANKchar" "$remarks" else # Default information was requested. printf "%8s %4s %-16s %s\n" \ "$nodeNumber" "$BLANKchar" "$shortName" "$state" fi IFS=":" # Change the separator back to ":" for the next iteration. done # end while read -u3 diskLine IFS="$IFS_sv" # Restore the default IFS settings. # If verbose output requested, and there are still undisplayed errors, # show them to the user now. [[ -s $errMsg && -n $vflag ]] && \ $cat $errMsg 1>&2 # If summary statistics were requested, get them from the daemon. if [[ -n $sflag ]] then showSummaryCounters rc=$? fi # If any nodes could not be reached, tell the user which ones. if [[ -s $unreachedNodes ]] then # The following nodes could not be reached: . . . printErrorMsg 270 $mmcmd $cat $unreachedNodes 1>&2 fi cleanupAndExit $rc