#!/bin/ksh
# IBM_PROLOG_BEGIN_TAG 
# This is an automatically generated prolog. 
#  
#  
#  
# Licensed Materials - Property of IBM 
#  
# (C) COPYRIGHT International Business Machines Corp. 2001,2006 
# All Rights Reserved 
#  
# US Government Users Restricted Rights - Use, duplication or 
# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. 
#  
# IBM_PROLOG_END_TAG 
# @(#)40 1.21 src/avs/fs/mmfs/ts/admin/mmgetstate.sh, mmfs, avs_rgpfs24, rgpfs240610b 12/13/05 01:47:25
################################################################################
#
# Determine the state of the GPFS daemon:  down, arbitrating (initializing),
# active, etc.  Optionally, display the quorum value, the number of nodes up,
# and other daemon-related information.
#
# Usage:
#
#   mmgetstate [-L] [-s] [-v]
#              [-a | -N {Node[,Node...] | NodeFile | NodeClass}]
#
# where:
#
#   -L            Display detailed information including quorum value,
#                 number of nodes up, total number of nodes, etc.
#
#   -s            Display summary information: number of local and remote nodes
#                 that have joined in the cluster, number of quorum nodes, etc.
#
#   -v            Display intermediate error messages.
#
#   -a            Check all nodes in the GPFS cluster.
#
#   -N Node,Node,...  Specify the nodes whose state is desired.
#   -N NodeFile       NodeClass may be one of several possible node classes
#   -N NodeClass      (e.g., quorumnodes, managernodes, nsdnodes, etc.)
#
#
# If not explicitly specified otherwise, only the local node is checked.
#
#
# The state of a node can be one of the following:
#
#   "arbitrating"   Node is trying to form a quorum with other available
#                   nodes or trying to acquire the disk fence if single-node
#                   quorum semantics are in effect.
#
#   "active"        GPFS is active and ready for normal operation.
#
#   "down"          The GPFS daemon is not running on the node.
#
#   "unknown"       Unknown state or error condition.
#
# When the state is "down" or "unknown" the values for quorum and nodes up
# are shown as zero.
#
#
# Obsolete options:
#
#   -C NodesetId  Check all nodes in the specified nodeset.
#                 Assumed to be the same as -a.
#
#   -W NodeFile   Check all nodes whose reliable hostnames
#                 are listed one per line in NodeFilename.
#                 Cannot be specified with -a, -N, -w, or -n.
#
#   -w nodenames  Check all nodes whose reliable hostnames
#                 are in the comma-separated nodenames list.
#                 Cannot be specified with -a, -N or -W.
#                 If both -w and -n are specified, the lists are combined.
#
#   -n nodenums   Check all nodes whose node numbers are
#                 in the comma-separated nodenums list.
#                 Cannot be specified with -a, -N, or -W.
#                 If both -w and -n are specified, the lists are combined.
#
#   -k            Suppress headers and display the output in colon-separated
#                 fields format.  Implies -L.  The output has the following
#                 format:
#   gpfs:nodeNumber:nodeName:quorumValue:nodesUp:totalNodes:state:remarks
#
################################################################################

# Include global declarations and service routines.
. /usr/lpp/mmfs/bin/mmglobfuncs
. /usr/lpp/mmfs/bin/mmsdrfsdef

sourceFile="mmgetstate.sh"
[[ -n $DEBUG || -n $DEBUGmmgetstate ]] && set -x
$mmTRACE_ENTER "$*"


# Local variables

usageMsg=449
typeset -i nodes=0
rc=0
underline="----------------------------------------------------------"
underline="${underline}----------------------------------------------"


# Local functions


######################################################################
#
# Function:  Scan the errMsg file and display on stderr all messages
#            that have originated from the specified node.
#
# Input:     $1 - name of the originating node
#
# Output:    Error messages (if any).
#
# Returns:   Always zero.
#
######################################################################
function showErrorMessages  # <nodeName>
{
  typeset sourceFile="mmgetstate.sh"
  [[ -n $DEBUG || -n $DEBUGshowErrorMessages ]] && set -x
  $mmTRACE_ENTER "$*"
  typeset nodeName=$1
  typeset errLine

  [[ ! -s $errMsg ]] && return 0

  [[ -n $nodeName ]] && nodeName="${nodeName}:"

  $rm -f $errMsg2
  exec 5<&-
  exec 5< $errMsg
  while read -u5 errLine
  do
    set -f ; set -- $errLine ; set +f
    currentName=$1

    # If the line came from the node that we are interested in
    # and there is a message, print it out.
    if [[ $currentName = $nodeName ]]
    then
      [[ -n $2 ]] && print -u2 "$errLine"
      continue
    fi

    # Save the other lines for later processing.
    print -- "$errLine" >> $errMsg2

  done  # end of while read -u5 errLine

  # Prepare the file for the next iteration.
  $mv $errMsg2 $errMsg

  return 0

}  #----- end of function showErrorMessages  ----------------


######################################################################
#
# Function:  Display the result from tsctl clusterNodeInfo issued
#            on a node that has joined the cluster.  Note that the
#            tsctl command will work even before the node has joined
#            the cluster but all counters will return zero.
#
# Input:     None.
#
# Output:    Summary information from tsctl clusterNodeInfo.
#
# Returns:   0 - no errors encountered.
#            1 - unexpected error or none of the nodes have joined
#                the cluster yet.
#
######################################################################
function showSummaryCounters  #
{
  typeset sourceFile="mmgetstate.sh"
  [[ -n $DEBUG || -n $DEBUGshowSummaryCounters ]] && set -x
  $mmTRACE_ENTER "$*"

  typeset nodeName
  typeset rc=0


  #-----------------------------------------------------------------------------
  # The tsctl clusterNodeInfo command must be issued on a node that has joined
  # the cluster.  If it is issued on a node on which GPFS has started, but has
  # not joined the cluster yet, the command will work but all counters will be
  # set to zero.  Note that mmcommon onactive can not be used here because it
  # bypasses nodes that are waiting for quorum while the tsctl clusterNodeInfo
  # command will work on such nodes.
  #-----------------------------------------------------------------------------

  # If the -a option was specified, we already have a file
  # with the reliable names of the nodes in the cluster.
  # Otherwise, create the file now.
  [[ -z $aflag ]] &&  \
    getNodeList $REL_HOSTNAME_Field $GLOBAL_ID $mmsdrfsFile > $nodefile

  # Try the nodes one by one until the command is executed somewhere.
  # Always start with the local node first.
  for nodeName in $(print -- "$ourNodeName" ; $grep -v -w "$ourNodeName" $nodefile)
  do
    run on1 $nodeName adminCmd tsctl clusterNodeInfo > $tmpfile 2> $errMsg2
    rc=$(remapRC $?)

    # If acceptable error (daemon down, node has not joined, etc.),
    # try to find some other node to run the command.  Otherwise,
    # the command was executed and either succeeded or failed.
    if [[ $rc -eq $MM_DaemonDown || $rc -eq $MM_NotJoined || $rc -eq $MM_QuorumWait ]]
    then
      continue
    else
      break
    fi
  done  # end of for nodeName in ...

  # Display header "Summary information:"
  header=$(printInfoMsg 405)
  printf "\n%s\n%.${#header}s\n" "$header" "$underline"

  # Examine the result from the tsctl call.
  if [[ $rc -eq 0 ]]
  then
    # Things must have worked.  Show the results.
    $cat $tmpfile
    print -- ""   # Output a blank separator line.

  elif [[ $rc -eq $MM_DaemonDown || $rc -eq $MM_NotJoined || $rc -eq $MM_QuorumWait ]]
  then
    # Either all nodes are down, or the daemon is down on all nodes,
    # or none of the nodes have joined the cluster.
    printErrorMsg 354 $mmcmd

  else 
    # Unexpected error.
    [[ -s $errMsg2 ]] && $cat $errMsg2 1>&2
    printErrorMsg 171 "$mmcmd" "tsctl clusterNodeInfo on node $nodeName" $rc
  fi  # end of if [[ $rc -eq 0 ]]

  $rm -f $errMsg2
  return $rc

}  #----- end of function showSummaryCounters  ----------------


#######################
# Mainline processing
#######################

##################################
# Process each of the arguments.
##################################
[[ $arg1 = '-?' || $arg1 = '-h' || $arg1 = '--help' || $arg1 = '--' ]] &&  \
  syntaxError "help" $usageMsg

while getopts :aC:kLn:N:svw:W: OPT
do
  case $OPT in

    a) [[ -n $aflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       aflag="-$OPT"
       all="all"
       [[ -n $Cflag || -n $Wflag || -n $wflag || -n $nflag || -n $Nflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $aflag $Cflag $Wflag $wflag $nflag $Nflag
       ;;

    C) # syntaxError "obsoleteOption" $usageMsg "-$OPT"
       [[ -n $Cflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       Cflag="-$OPT"
       nodesetId="$OPTARG"
       [[ -n $aflag || -n $Wflag || -n $wflag || -n $nflag || -n $Nflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $Cflag $aflag $Wflag $wflag $nflag $Nflag
       ;;

    k) [[ -n $kflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       kflag="-$OPT"
       ;;

    L) [[ -n $Lflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       Lflag="-$OPT"
       ;;

    n) [[ -n $nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       nflag="-$OPT"
       nodenums="$OPTARG"
       [[ -n $Cflag || -n $Wflag || -n $aflag || -n $Nflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $nflag $Cflag $Wflag $aflag $Nflag
       ;;

    N) [[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       Nflag="-$OPT"
       nodenames="$OPTARG"
       [[ -n $Cflag || -n $Wflag || -n $aflag || -n $nflag || -n $wflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $Nflag $Cflag $Wflag $aflag $nflag $wflag
       ;;

    s) [[ -n $sflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       sflag="-$OPT"
       ;;

    v) [[ -n $vflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       vflag="-$OPT"
       ;;

    w) [[ -n $wflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       wflag="-$OPT"
       nodenames="$OPTARG"
       [[ -n $Cflag || -n $Wflag || -n $aflag || -n $Nflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $wflag $Cflag $Wflag $aflag $Nflag
       ;;

    W) [[ -n $Wflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       Wflag="-$OPT"
       Wcoll="$OPTARG"
       [[ -n $Cflag || -n $aflag || -n $wflag || -n $nflag || -n $Nflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $Wflag $Cflag $aflag $wflag $nflag $Nflag
       ;;

    :) syntaxError "missingValue" $usageMsg $OPTARG
       ;;

    +[aCkLnNsvwW])
       syntaxError "invalidOption" $usageMsg "$OPT"
       ;;

    *) syntaxError "invalidOption" $usageMsg $OPTARG
       ;;

  esac
done  # end of while getopts :aC:kLn:N:vw:W: OPT do

shift OPTIND-1
[[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1

# -k implies -L as well.
[[ -n $kflag ]] && Lflag="-L"

# Rather than fail, convert the obsolete -C option to -a.
[[ -n $Cflag ]] && aflag="-a"


########################################################################
# Set up trap exception handling and call the gpfsInit function.
# It will ensure that the local copy of the mmsdrfs and the rest of the
# GPFS system files are up-to-date.  There is no need to lock the sdr.
########################################################################
trap pretrap2 HUP INT QUIT KILL
gpfsInitOutput=$(gpfsInit nolock)
setGlobalVar $? $gpfsInitOutput


#######################################################
# Create a file containing all of the specified nodes.
#######################################################
if [[ -n $aflag ]]
then
  # Get a list of the nodes.
  getNodeList $REL_HOSTNAME_Field $GLOBAL_ID $mmsdrfsFile > $nodefile

elif [[ -n $Nflag ]]
then
  # Convert the passed data into a file containing admin node names.
  createVerifiedNodefile $nodenames $REL_HOSTNAME_Field no $nodefile
  [[ $? -ne 0 ]] && cleanupAndExit

elif [[ -n $Wflag ]]
then
  # Verify the input file is readable.
  if [[ ! -f $Wcoll || ! -r $Wcoll ]]
  then
    printErrorMsg 43 $mmcmd $Wcoll
    cleanupAndExit
  fi

  # Filter out comment lines and localhost entries.
  $grep -v -e "localhost" -e "^#" "$Wcoll" > $tmpfile

  # Convert any entries in the node file into admin node names.
  if [[ -s $tmpfile ]]
  then
    createVerifiedNodefile $tmpfile $REL_HOSTNAME_Field no $nodefile
    [[ $? -ne 0 ]] && cleanupAndExit
  else
    # No node names were specified.
    printErrorMsg 328 $mmcmd $Wcoll
    cleanupAndExit
  fi

else
  # Either no option was specified, or we have some combination of -w and -n.

  # Convert the node names list (if any) into a file.
  $rm -f $tmpfile
  if [[ -n $nodenames ]]
  then
    for i in $(print $nodenames | $tr "," " ")
    do
      print -- "$i" >> $tmpfile
    done
  fi

  # Append the node number list (if any) to the node file.
  if [[ -n $nodenums ]]
  then
    for i in $(print $nodenums | $tr "," " ")
    do
      print -- "$i" >> $tmpfile
    done
  fi

  # Convert the entries in the node file into admin node names.
  if [[ -s $tmpfile ]]
  then
    createVerifiedNodefile $tmpfile $REL_HOSTNAME_Field no $nodefile
    [[ $? -ne 0 ]] && cleanupAndExit
  fi

fi  # end of if [[ -n $aflag ]]


# Ensure we have the proper credentials.
[[ $getCredCalled = no ]] && getCred


################################
# Get the quorum state.
################################
$rm -f $tmpfile $tmpfile2 $errMsg
if [[ ! -s $nodefile ]]
then
  # This is a request to query the local node only.
  mmremoteOutput=$($mmremote mmGetState $Lflag 2>$errMsg)
  rc=$?
  [[ -n $mmremoteOutput ]] &&  \
    print "${ourNodeName}: ${mmremoteOutput}" > $tmpfile
else
  # This is a request to query other nodes as well.
  $mmcommon onall $nodefile $unreachedNodes mmGetState $Lflag > $tmpfile2 2>&1
  rc=$?

  # Split the results into two files:  the first will contain only
  # the lines with the GPFS state information.  The second file
  # will contain all error messages.
  $grep    "mmGetState:" $tmpfile2 > $tmpfile
  $grep -v "mmGetState:" $tmpfile2 > $errMsg

  # Sort the results based on nodesetId and node number.
  $sort -t: -k 3,3 -k 4,4n $tmpfile -o $tmpfile
fi  # end of if [[ ! -s $nodefile ]]

if [[ ! -s $tmpfile ]]
then
  # We didn't get anything back.  Give up.
  [[ $rc -eq 0 ]] && rc=1
  [[ -s $errMsg ]] && cat $errMsg 1>&2
  # Command failed.
  printErrorMsg 389 $mmcmd
  cleanupAndExit
fi


################################
# Display the results.
################################

# Print the appropriate header line.
# This depends on the specified formatting option.
if [[ -z $kflag ]]
then
  if [[ -n $Lflag ]]
  then
    # "Node number  Node name  Quorum  Nodes up  Total nodes  GPFS state  Remarks"
    header=$(printInfoMsg 496)
    printf "\n%s\n%.${#header}s\n" "$header" "$underline"

  else
    # "Node number  Node name  GPFS state"
    header=$(printInfoMsg 497)
    printf "\n%s\n%.${#header}s\n" "$header" "$underline"
  fi
fi  # end of if [[ -z $kflag ]]

# Process the output from the mmGetState call.
IFS=":"
exec 3<&-
exec 3< $tmpfile
while read -u3 inLine
do
  # Parse the line.
  set -f ; set -- $inLine ; set +f
  nodeName=$1
  magicWord=${2# }
  nodeset=$3
  nodeNumber=$4
  shortName=$5
  quorum=$6
  nodesUp=$7
  nodesTotal=$8
  state=$9
  quorumDesignation=${10}
  IFS="$IFS_sv"

  # Set the remarks field based upon the quorum designation value.
  if [[ $quorumDesignation = $quorumNode ]]
  then
    remarks=$(printInfoMsg 431)  # "quorum node"
  else
    remarks=""
  fi

  # If verbose output requested, show relevent error messages.
  [[ -n $vflag ]] && showErrorMessages $nodeName

  # Print the desired information.
  if [[ -n $kflag ]]
  then
    # Colon-separated output was requested.
    print "gpfs:$nodeNumber:$shortName:$quorum:$nodesUp:$nodesTotal:$state:$remarks"

  elif [[ -n $Lflag ]]
  then
    # Extended information was requested.
    printf "%8s %4s %-16s %3s %8s %10s %5s %-7s %3s %s\n"  \
           "$nodeNumber" "$BLANKchar" "$shortName" "$quorum" "$nodesUp"  \
           "$nodesTotal" "$BLANKchar" "$state" "$BLANKchar" "$remarks"

  else
    # Default information was requested.
    printf "%8s %4s %-16s %s\n"  \
           "$nodeNumber" "$BLANKchar" "$shortName" "$state"
  fi

 IFS=":"  # Change the separator back to ":" for the next iteration.

done  # end while read -u3 diskLine

IFS="$IFS_sv"  # Restore the default IFS settings.

# If verbose output requested, and there are still undisplayed errors,
# show them to the user now.
[[ -s $errMsg && -n $vflag ]] &&  \
  $cat $errMsg  1>&2

# If summary statistics were requested, get them from the daemon.
if [[ -n $sflag ]]
then
  showSummaryCounters
  rc=$?
fi

# If any nodes could not be reached, tell the user which ones.
if [[ -s $unreachedNodes ]]
then
  # The following nodes could not be reached: . . .
  printErrorMsg 270 $mmcmd
  $cat $unreachedNodes 1>&2
fi

cleanupAndExit $rc

