#!/bin/ksh
# IBM_PROLOG_BEGIN_TAG 
# This is an automatically generated prolog. 
#  
#  
#  
# Licensed Materials - Property of IBM 
#  
# (C) COPYRIGHT International Business Machines Corp. 2000,2006 
# All Rights Reserved 
#  
# US Government Users Restricted Rights - Use, duplication or 
# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. 
#  
# IBM_PROLOG_END_TAG 
# @(#)78 1.32.1.2 src/avs/fs/mmfs/ts/admin/mmstartup.sh, mmfs, avs_rgpfs24, rgpfs24s003a 5/26/06 09:48:07
##############################################################################
#
#  The GPFS daemons on the specified nodes will be started.
#
#  Usage:
#
#    mmstartup [-a | -N {Node[,Node...] | NodeFile | NodeClass}]
#              [-E EnvVar=Value...] [ -T ]
#
#  where:
#
#    -a            Start the daemon on all nodes in the GPFS cluster.
#
#    -N Node,Node,...  Specify the nodes on which the daemon is to be started.
#    -N NodeFile       NodeClass may be one of several possible node classes
#    -N NodeClass      (e.g., quorumnodes, managernodes, nsdnodes, etc.)
#
#    -E EnvVar=Value  Specify an environment variable to be passed to the GPFS
#                     daemon.  More than one -E option can be specified.
#
#    -T            start tracing at daemon startup and cut a trace report
#                  at daemon shutdown
#
#
#  If not explicitly specified otherwise, the daemon is started on the local
#  node only.
#
#
#  Undocumented options:
#
#    -e seconds    Estimated startup time (in seconds).  During this time
#                  period, the mmsdrfs file is guaranteed not to change.
#                  Specifying a value of 0 disables the locking that would
#                  otherwise take place for large non-sp clusters.
#
#    -f            Force the loading of the kernel extensions (Linux only).
#
#    -G            Force trace records to be cut on all nodes when there is
#                  a daemon failure on some node.
#
#    -t {yes|traceFile}  Start the mm commands tracing facility.  If traceFile
#                   is specified, it must be a fully qualified pathname.
#                   Otherwise, the trace results are appended to file
#                   /tmp/mmfs/mmScriptTrace.
#
#
#  Obsolete options:
#
#    -C NodesetId  Start the daemon on all nodes in the specified nodeset.
#                  Assumed to be the same as -a.
#
#    -W NodeFile   Start the daemon on all nodes whose reliable hostnames
#                  are listed one per line in NodeFilename.
#                  Cannot be specified with -a, -N, -w, or -n.
#
#    -w nodenames  Start the daemon on all nodes whose reliable hostnames
#                  are in the comma-separated nodenames list.
#                  Cannot be specified with -a, -N or -W.
#                  If both -w and -n are specified, the lists are combined.
#
#    -n nodenums   Start the daemon on all nodes whose node numbers are
#                  in the comma-separated nodenums list.
#                  Cannot be specified with -a, -N, or -W.
#                  If both -w and -n are specified, the lists are combined.
#
##############################################################################

# Include global declarations and service routines.
. /usr/lpp/mmfs/bin/mmglobfuncs
. /usr/lpp/mmfs/bin/mmsdrfsdef

sourceFile="mmstartup.sh"
[[ -n $DEBUG || -n $DEBUGmmstartup ]] && set -x
$mmTRACE_ENTER "$*"


# Local variables

usageMsg=391
typeset -i timeout=0
typeset -i nodes=0
rc=0
nodenames=""
nodenums=""
wcoll=""
aflag=""
Cflag=""
eflag=""
Eflag=""
Estring=""
fflag=""
Gflag=""
nflag=""
Nflag=""
Tflag=""
tflag=""
Wflag=""
wflag=""
nodeList=""



#######################
# Mainline processing.
#######################


##################################
# Process each of the arguments.
##################################
[[ $arg1 = '-?' || $arg1 = '-h' || $arg1 = '--help' || $arg1 = '--' ]] &&  \
  syntaxError "help" $usageMsg

while getopts :aC:e:E:fGn:N:t:Tw:W: OPT
do
  case $OPT in

    a) [[ -n $aflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       aflag="-$OPT"
       [[ -n $Cflag || -n $Wflag || -n $wflag || -n $nflag || -n $Nflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $aflag $Cflag $Wflag $wflag $nflag $Nflag
       ;;

    C) # syntaxError "obsoleteOption" $usageMsg "-$OPT"
       [[ -n $Cflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       Cflag="-$OPT"
       nodesetId="$OPTARG"
       [[ -n $aflag || -n $Wflag || -n $wflag || -n $nflag || -n $Nflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $Cflag $aflag $Wflag $wflag $nflag $Nflag
       ;;

    e) [[ -n $eflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       eflag="-$OPT"
       estimatedStartupTime=$(checkIntRange estimatedStartupTime "$OPTARG")
       [[ $? -ne 0 ]] && cleanupAndExit
       ;;

    E) # more than one -E option is allowed
       envString="${envString} -$OPT $OPTARG"
       ;;

    f) [[ -n $fflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       fflag="-$OPT"
       [[ $osName != Linux ]] && syntaxError "invalidOption" $usageMsg "-$OPT"
       ;;

    G) [[ -n $Gflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       Gflag="-$OPT"
       ;;

    n) [[ -n $nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       nflag="-$OPT"
       nodenums="$OPTARG"
       [[ -n $Cflag || -n $Wflag || -n $aflag || -n $Nflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $nflag $Cflag $Wflag $aflag $Nflag
       ;;

    N) [[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       Nflag="-$OPT"
       nodenames="$OPTARG"
       [[ -n $Cflag || -n $Wflag || -n $aflag || -n $nflag || -n $wflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $Nflag $Cflag $Wflag $aflag $nflag $wflag
       ;;

    t) [[ -n $tflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       tflag="-$OPT $OPTARG"
       ;;

    T) [[ -n $Tflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       Tflag="-$OPT"
       ;;

    w) [[ -n $wflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       wflag="-$OPT"
       nodenames="$OPTARG"
       [[ -n $Cflag || -n $Wflag || -n $aflag || -n $Nflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $wflag $Cflag $Wflag $aflag $Nflag
       ;;

    W) [[ -n $Wflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       Wflag="-$OPT"
       wcoll="$OPTARG"
       [[ -n $Cflag || -n $aflag || -n $wflag || -n $nflag || -n $Nflag ]] &&  \
         syntaxError "invalidCombination"  \
           $usageMsg $Wflag $Cflag $aflag $wflag $nflag $Nflag
       ;;

    :) syntaxError "missingValue" $usageMsg $OPTARG
       ;;

    +[aCeEfnNtTwW])
       syntaxError "invalidOption" $usageMsg "$OPT"
       ;;

    *) syntaxError "invalidOption" $usageMsg $OPTARG
       ;;

  esac
done

shift OPTIND-1
[[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1

# Rather than fail, convert the obsolete -C option to -a.
[[ -n $Cflag ]] && aflag="-a"


########################################################################
# Set up trap exception handling and call the gpfsInit function.
# It will ensure that the local copy of the mmsdrfs and the rest of the
# GPFS system files are up-to-date.  There is no need to lock the sdr.
########################################################################
trap pretrap2 HUP INT QUIT KILL
gpfsInitOutput=$(gpfsInit nolock)
setGlobalVar $? $gpfsInitOutput


#######################################################
# Create a file containing all of the specified nodes.
#######################################################
if [[ -n $aflag ]]
then
  # Get a list of the nodes.
  getNodeList $REL_HOSTNAME_Field $GLOBAL_ID $mmsdrfsFile > $nodefile

elif [[ -n $Nflag ]]
then
  # Convert the passed data into a file containing admin node names.
  createVerifiedNodefile $nodenames $REL_HOSTNAME_Field no $nodefile
  [[ $? -ne 0 ]] && cleanupAndExit

elif [[ -n $Wflag ]]
then
  # Verify the input file is readable.
  if [[ ! -f $Wcoll || ! -r $Wcoll ]]
  then
    printErrorMsg 43 $mmcmd $Wcoll
    cleanupAndExit
  fi

  # Filter out comment lines and localhost entries.
  $grep -v -e "localhost" -e "^#" "$Wcoll" > $tmpfile

  # Convert any entries in the node file into admin node names.
  if [[ -s $tmpfile ]]
  then
    createVerifiedNodefile $tmpfile $REL_HOSTNAME_Field no $nodefile
    [[ $? -ne 0 ]] && cleanupAndExit
  else
    # No node names were specified.
    printErrorMsg 328 $mmcmd $Wcoll
    cleanupAndExit
  fi

else
  # Either no option was specified, or we have some combination of -w and -n.

  # Convert the node names list (if any) into a file.
  $rm -f $tmpfile
  if [[ -n $nodenames ]]
  then
    for i in $(print $nodenames | $tr "," " ")
    do
      print -- "$i" >> $tmpfile
    done
  fi

  # Append the node number list (if any) to the node file.
  if [[ -n $nodenums ]]
  then
    for i in $(print $nodenums | $tr "," " ")
    do
      print -- "$i" >> $tmpfile
    done
  fi

  # Convert the entries in the node file into admin node names.
  if [[ -s $tmpfile ]]
  then
    createVerifiedNodefile $tmpfile $REL_HOSTNAME_Field no $nodefile
    [[ $? -ne 0 ]] && cleanupAndExit
  fi

fi  # end of if [[ -n $aflag ]]


# Ensure we have the proper credentials.
[[ $getCredCalled = no ]] && getCred


##############################################################
# If starting GPFS on a large number of nodes, lock the sdr.
# The goal is to prevent the config data from changing and to
# inform the rest of the nodes that they can trust their data
# without having to check with the server nodes first.
##############################################################
[[ -s $nodefile ]] && nodes=$($cat $nodefile | $wc -l)
if [[ -z $eflag && $nodes -gt 32 ||
      -n $eflag && $estimatedStartupTime -gt 0 ]]
then
  # If the estimated startup time is given by the user,
  # use the value from the command line.  Otherwise, pick
  # a number based on the number of nodes to start.
  if [[ -z $eflag ]]
  then
    if [[ $nodes -lt 128 ]]
    then
      estimatedStartupTime=90
    elif [[ $nodes -lt 256 ]]
    then
      estimatedStartupTime=150
    else
      estimatedStartupTime=240
    fi

    # Add additional time for each file system that will be mounted.
    fsToMount=$($cat $startupMountFile 2>/dev/null | $wc -l )
    [[ $fsToMount -gt 0 ]] &&  \
      (( estimatedStartupTime = estimatedStartupTime + 10 * $fsToMount ))
  fi  # end of if [[ -z $eflag ]]

  # Create a special lock id.
  expLockId="mmSdrLockExp:$ourNodeName:$estimatedStartupTime"

  # Try to obtain the sdr lock.
  gpfsInitOutput=$(gpfsInit $expLockId 2>/dev/null)
  rc=$?
  if [[ $rc -eq 0 ]]
  then
    # We got the lock.  Parse the output to get the latest
    # generation number and corresponding timestamp.
    setGlobalVar $rc $gpfsInitOutput

    # Create the expiration token to be passed to the nodes.
    currentTime=$($perl -e 'print time')
    (( expirationTime = currentTime + estimatedStartupTime ))
    expirationData="mmSdrLockExp:$sdrGenNumber:$sdrGenTimestamp:$expirationTime"

    # Start the background process that will free the lock.
    # Reset the sdrLocked var to prevent the unlocking that
    # would otherwise take place as part of cleanupAndExit.
    sdrLocked=no
    $mmcommon expirationDataCleanup $expirationData unlock >/dev/null 2>&1 &

    # Add an option letter in front of the string.
    expirationData="-e $expirationData"

  else
    # gpfsInit failed and we did not get the lock.
    # Ignore the error and continue with the regular processing.
    expirationData=""
  fi  # end of if [[ $rc -eq 0 ]]

else
  # Do not do anything special if starting just a few nodes,
  # or the user explicitly requested no locking (-e 0 on the command line).
  expirationData=""
fi  # end of if [[ $nodes -gt 32 ]]


####################################################################
# If daemon tracing is specified, decide what its scope should be.
####################################################################
if [[ -n $Tflag ]]
then
  if [[ -n $Gflag ]]
  then
    Tflag="-T global"
  else
    Tflag="-T local"
  fi
elif [[ -n $Gflag ]]
then
  Tflag="-T global"
else
  Tflag=""
fi  # end of if [[ -n $Tflag ]]


############################################
# Start GPFS daemon on the specified nodes.
############################################

# Starting GPFS ...
printInfoMsg 392 "$(date)" $mmcmd

if [[ ! -s $nodefile ]]
then
  # The request is to start the local daemon only.
  $mmremote startSubsys $fflag $Tflag $tflag $envString
  rc=$?
else
  # The request is to start the daemon on a number of nodes.
  $mmcommon onall $nodefile $unreachedNodes  \
    startSubsys $expirationData $fflag $Tflag $tflag $envString
  rc=$?
fi

# If any nodes could not be reached, tell the user which ones.
if [[ -s $unreachedNodes ]]
then
  # The following nodes could not be reached: . . .
  printErrorMsg 270 $mmcmd
  $cat $unreachedNodes 1>&2
fi

cleanupAndExit $rc

