#!/bin/ksh
# IBM_PROLOG_BEGIN_TAG 
# This is an automatically generated prolog. 
#  
#  
#  
# Licensed Materials - Property of IBM 
#  
# (C) COPYRIGHT International Business Machines Corp. 1997,2007 
# All Rights Reserved 
#  
# US Government Users Restricted Rights - Use, duplication or 
# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. 
#  
# IBM_PROLOG_END_TAG 
# @(#)30 1.103.3.4 src/avs/fs/mmfs/ts/admin/mmrpldisk.sh, mmfs, avs_rgpfs24, rgpfs24s010a 2/15/07 03:27:20
#############################################################################
#
# Usage:
#   mmrpldisk Device DiskName {DiskDesc | -F DescFile} [-v {yes | no}]
#             [-N {all | mount | Node[,Node...] | NodeFile | NodeClass}]
#
# where
#   Device     is the file system device name
#   DiskName   is the name of the disk to replace ($olddisk)
#   DiskDesc   is a single disk descriptor
#   -v         make sure the disk does not belong to another file system
#   -N         parallel restripe options:
#                all        - use all of the nodes in the cluster
#                mount      - use only the nodes on which the fs is mounted
#                node list  - use only the nodes in the list
#                node file  - use only the nodes in the file
#                node class - use only the nodes in the class
#
#############################################################################

# Include global declarations and service routines.
. /usr/lpp/mmfs/bin/mmglobfuncs
. /usr/lpp/mmfs/bin/mmsdrfsdef
. /usr/lpp/mmfs/bin/mmfsfuncs

sourceFile="mmrpldisk.sh"
[[ -n $DEBUG || -n $DEBUGmmrpldisk ]] && set -x
$mmTRACE_ENTER "$*"


# Local work files.  Names should be of the form:
#   fn=${tmpDir}fn.${mmcmd}.$$

descfile=${tmpDir}descfile.${mmcmd}.$$
tempsdrfs=${tmpDir}tempsdrfs.${mmcmd}.$$

LOCAL_FILES=" $descfile $tempsdrfs "


# Local variables
usageMsg=295


# Local functions


#######################
# Mainline processing
#######################


#######################################
# Process the command line arguments.
#######################################
[[ $arg1 = '-?' || $arg1 = '-h' || $arg1 = '--help' || $arg1 = '--' ]] &&  \
  syntaxError "help" $usageMsg

[[ $argc -lt 3  ]] &&  \
  syntaxError "missingArgs" $usageMsg

# The first argument is always the file system name.
device=$arg1

# The second argument is always the name of the disk to be replaced.
olddisk=$arg2

if [[ $arg3 != "-F" ]]
then
  # If the third argument is not -F, it must be
  # a disk descriptor for the new disk.
  mmDiskDesc=$arg3
  shift 3
else
  # -F was specified.  The fourth argument must be a file name
  # containing a single disk descriptor for the new disk.
  if [[ -z $arg4 ]]
  then
    syntaxError "missingFile" $usageMsg
  else
    # Verify the existence of the file and create our own copy.
    checkUserFile $arg4 $descfile
    [[ $? -ne 0 ]] && cleanupAndExit
    shift 4
  fi  # end of if [[ -z $arg4 ]]

  # Get the descriptor from the file.
  # We will get only the first non-comment and non-white
  # space only line.  Everything else will be ignored.
  exec 3<&-
  exec 3< $descfile
  while read -u3 inputLine
  do
    # Skip empty and comment lines.
    [[ $inputLine = *([$BLANKchar$TABchar])   ]] && continue
    [[ $inputLine = *([$BLANKchar$TABchar])#* ]] && continue

    # Assume this line contains a descriptor.
    if [[ -z $mmDiskDesc ]]
    then
      mmDiskDesc=$inputLine
    else
      # The descriptor file contains more than one descriptor.
      printErrorMsg 537 $mmcmd
      cleanupAndExit
    fi
  done  # end of while read -u3 inputLine

  if [[ -z $mmDiskDesc ]]
  then
    # The descriptor file contains no descriptor.
    printErrorMsg 538 $mmcmd
    cleanupAndExit
  fi
fi  # end of if [[ $arg3 != "-F" ]]


# Parse the optional parameters.
while getopts :v:N:  OPT
do
  case $OPT in
    v) [[ -n $vflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       if [[ $OPTARG = yes || $OPTARG = no ]]
       then
         vflag="-v $OPTARG"
       else
         syntaxError "YesNoValue" $noUsageMsg "-$OPT"
       fi
       ;;

    N) [[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
       nodeList="$OPTARG"
       Nflag="-N $OPTARG"
       ;;

    :) syntaxError "missingValue" $usageMsg $OPTARG
       ;;

    +[vN)
       syntaxError "invalidOption" $usageMsg $OPT
       ;;

    *) syntaxError "invalidOption" $usageMsg $OPTARG
       ;;
  esac
done

shift OPTIND-1
[[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1


#######################################################################
# Set up trap exception handling and call the gpfsInit function.
# It will ensure that the local copy of the mmsdrfs and the rest of
# the GPFS system files are up-to-date and will obtain the sdr lock.
#######################################################################
trap pretrap HUP INT QUIT KILL
gpfsInitOutput=$(gpfsInit $lockId)
setGlobalVar $? $gpfsInitOutput

# Determine the lookup order for resolving host names.
[[ $osName != AIX ]] && resolveOrder=$(setHostResolveOrder)


###########################################################
# Make sure the specified file system exists and is local.
###########################################################
findFSoutput=$(findFS "$device" $mmsdrfsFile)
[[ -z $findFSoutput ]] && cleanupAndExit

# Parse the output from the findFS function.
set -f ; set -- $findFSoutput ; set +f
fqDeviceName=$1
deviceName=$2
fsHomeCluster=$3
oddState=$5

# Exit with a message if the command was invoked for a remote file system.
if [[ $fsHomeCluster != $HOME_CLUSTER ]]
then
  # Command is not allowed for remote file systems.
  printErrorMsg 106 $mmcmd $device $fsHomeCluster
  cleanupAndExit
fi

# Check whether some of the disks in the file system may be in an odd state.
if [[ $oddState = yes ]]
then
  # Some of the disks in the file system appear to be in an odd state.
  # Reconcile the sdrfs file with the GPFS daemon's view of the filesystem.
  $cp $mmsdrfsFile $newsdrfs
  reconcileSdrfsWithDaemon $deviceName $newsdrfs
  rc=$?
  if [[ $rc -ne 0 ]]
  then
    # reconcileSdrfsWithDaemon failed.
    printErrorMsg 171 $mmcmd reconcileSdrfsWithDaemon $rc
    # Tell the user to run mmcommon recoverfs against the file system.
    printErrorMsg 103 $mmcmd $deviceName $deviceName
    cleanupAndExit
  fi

  # Obtain the generation number from the version line of the new sdrfs file.
  versionLine=$($head -1 $newsdrfs)
  IFS=':'
  set -f ; set -- $versionLine ; set +f
  newGenNumber=$6
  IFS="$IFS_sv"

  # Commit the reconciled version of the sdrfs file to the server
  # so the admin scripts and the daemon are in sync.
  trap "" HUP INT QUIT KILL    # Disable interrupts until the commit is done.
  gpfsObjectInfo=$(commitChanges  \
    $fsHomeCluster $nsId $gpfsObjectInfo $newGenNumber $newsdrfs $primaryServer)
  if [[ $? -ne 0 ]]
  then
    # We were unable to replace the file in the sdr.
    printErrorMsg 381 $mmcmd
    # Tell the user to run mmcommon recoverfs against the filesystem.
    printErrorMsg 190 $mmcmd $deviceName $deviceName
    printErrorMsg 104 $mmcmd mmrpldisk
    cleanupAndExit
  fi
  trap posttrap HUP INT QUIT KILL
fi  # end of if [[ $oddState = yes ]]


#######################################################
# If a list of nodes was specified via the -N option,
# convert it to a verified list of daemon node names.
#######################################################
if [[ -n $Nflag && $nodeList != all && $nodeList != mount ]]
then
  createVerifiedNodefile "$nodeList" $DAEMON_NODENAME_Field no $nodefile
  [[ $? -ne 0 ]] && cleanupAndExit

  # Convert the output data from a file to a comma-separated list.
  newNodeList=$(print -- $(cat $nodefile) | $sed 's/ /,/g')
  Nflag="-N $newNodeList"
fi  # end of if [[ -n $Nflag && $nodeList != all && $nodeList != mount ]]


##################################################
# Obtain the disk name from the disk descriptor.
##################################################
IFS=":"
set -f ; set -- $mmDiskDesc ; set +f
diskName=$1
IFS="$IFS_sv"

if [[ -z $diskName ]]
then
  # Disk name must be specified in the disk descriptor.
  printErrorMsg 23 $mmcmd
  cleanupAndExit
fi


########################################################################
# Create the new version of the mmsdrfs file.
#
# It will have a new generation number and the SG_DISKS lines
# for the involved disks will have new values.
#
# Simultaneously, we will make sure that the new disk does not already
# belong to some other file system, and we will create a list of the
# nodes in the cluster.
########################################################################
$rm -f $newsdrfs $nodefile $allQuorumNodes
IFS=":"
exec 3<&-
exec 3< $mmsdrfsFile
while read -u3 sdrfsLine
do
  # Parse the line.
  set -f ; set -A v -- - $sdrfsLine ; set +f

  IFS="$IFS_sv"    # Restore the default IFS settings.
  printLine=true   # Assume the line will be printed.

  case ${v[$LINE_TYPE_Field]} in

    $VERSION_LINE )  # This is the global header line.
       # Increment the generation number.
       newGenNumber=${v[$SDRFS_GENNUM_Field]}+1
       v[$SDRFS_GENNUM_Field]=$newGenNumber
       ;;

    $MEMBER_NODE )  # This line describes a node.
       # Add the reliable node name to nodefile.
       print -- "${v[$REL_HOSTNAME_Field]}" >> $nodefile
       checkForErrors "writing to file $nodefile" $?

       # Create a list of the quorum nodes.
       if [[ ${v[$CORE_QUORUM_Field]} = $quorumNode ]]
       then
         print -- "${v[$REL_HOSTNAME_Field]}" >> $allQuorumNodes
         checkForErrors "writing to file $allQuorumNodes" $?
       fi

       # If this is the line for the node that is executing
       # this command, set the preferredNode variable.
       [[ ${v[$NODE_NUMBER_Field]} = $ourNodeNumber ]] &&  \
         preferredNode=${v[$REL_HOSTNAME_Field]}
       ;;

    $SG_HEADR )  # This is the header line for some file system.
       # Check whether the filesystem has disks that are in an "odd state".
       if [[ -n ${v[$ODD_STATE_Field]} && ${v[$ODD_STATE_Field]} != no ]]
       then
         # Is this filesystem a different one than the one for which
         # this command was invoked?
         if [[ ${v[$DEV_NAME_Field]} != $deviceName ]]
         then
           # The "odd state" flag is set for a different file system
           # than our filesystem.  Add the name of the file system to a
           # list of filesystems to be reported to the user later.
           fsOddStateList="${fsOddStateList} ${v[$DEV_NAME_Field]}"
         else
           # The "odd state" flag is set for the file system
           # for which this command was invoked.
           :  # Allow the command to proceed, since it may succeed.
              # We will report any failures if it does not.
         fi
       else
         # Is this filesystem the one for which this command was invoked?
         if [[ ${v[$DEV_NAME_Field]} = $deviceName ]]
         then
           # Set the "odd state" field in case we don't succeed.
           # We will reset it later if tsrpldisk succeeds.
           v[$ODD_STATE_Field]=mmrpldisk
         fi
       fi
       ;;

    $SG_DISKS )  # This line describes a disk.
       # If this is the line for the new disk (the replacement disk),
       # make sure that it is either a free disk or a replacement disk
       # from a prior mmrpldisk that did not complete.
       if [[ ${v[$DISK_NAME_Field]} = $diskName ]]
       then
         # Is the disk a free disk or a replacement disk from a
         # prior mmrpldisk that did not complete?
         if [[ ${v[$NODESETID_Field]} = $FREE_DISK ||
               ( ${v[$DEV_NAME_Field]} = $device  &&
                 ${v[$DISK_STATUS_Field]} = "replacement" ) ]]
         then
           # We found the intended replacement disk.
           # Create a copy of the line in $sgdiskLine with a sequence
           # number equal to that of the disk being replaced; this will
           # be passed to the validateAndConvertNsdDescriptor routine.
           # Suppress printing of the line now, since we will use the
           # updated line produced by the validate routine.
           v[$LINE_NUMBER_Field]=$seqNo
           sgdiskLine=$(print_newLine)
           printLine=false
         else
           # The disk already exists in another file system.
           # Issue an error and quit.
           printErrorMsg 265 $mmcmd $diskName ${v[$DEV_NAME_Field]}
           cleanupAndExit
         fi  # end of if [[ ${v[$NODESETID_Field]} = $FREE_DISK ]]

       # If this is the line for the old disk (the disk being replaced),
       # make sure that it belongs to the specified file system and
       # change the disk status to indicate the disk should be deleted
       # (converted to a free disk) if tsrpldisk succeeds.
       elif [[ ${v[$DISK_NAME_Field]} = $olddisk ]]
       then
         if [[ ${v[$DEV_NAME_Field]} = $deviceName ]]
         then
           v[$EXCLUDE_Field]=$includedDisk
           v[$DISK_STATUS_Field]="mmdel"
           oldDiskFound=yes
           seqNo=${v[$LINE_NUMBER_Field]}
         else
           # The disk being replaced is not part of the specified file system.
           # Issue an error and quit.
           printErrorMsg 101 $mmcmd $olddisk $device
           cleanupAndExit
         fi

       # If this line is for some other disk in the same file system,
       # set a flag to indicate that this is not a single disk file system.
       elif [[ ${v[$DEV_NAME_Field]} = $deviceName ]]
       then
         singleDiskFileSystem=no

       fi  # end of if [[ ${v[$DISK_NAME_Field]} = $diskName ]]
       ;;

    * )  # Pass all other lines without a change.
       ;;

  esac  # end of "Change some of the fields . . . "

  # Build and write the line to the new mmsdrfs file.
  if [[ $printLine = true ]]
  then
    print_newLine >> $newsdrfs
    checkForErrors "writing to file $newsdrfs" $?
  fi

  IFS=":"  # Change the separator back to ":" for the next iteration.

done  # end while read -u3 sdrfsLine

IFS="$IFS_sv"    # Restore the default IFS settings.


if [[ -z $oldDiskFound ]]
then
  # The disk to be replaced was not found to belong to the file system.
  printErrorMsg 315 $mmcmd $olddisk $device
  cleanupAndExit
fi

if [[ -z $sgdiskLine ]]
then
  # We did not find the replacement disk.
  # The disk descriptor should refer to an existing NSD.
  printErrorMsg 415 $mmcmd "$mmDiskDesc"
  cleanupAndExit
fi


##############################################################################
# Process the descriptor for the replacement disk.
# If all of the information is correct, the descriptor is converted into
# the format recognized by the tsrpldisk command.  Specifying oldDiskUsage
# will cause the validate routine to not set the hasData and hasMetadata
# flags in the ts descriptor.  As a result, the replacement disk will inherit
# the usage properties from the disk being replaced.  If the user specified
# diskUsage in the mm descriptor, the oldDiskUsage parameter has no effect.
##############################################################################
validateDescriptorOutput=$(validateAndConvertNsdDescriptor "$mmDiskDesc"  \
                  $sgdiskLine $deviceName $fsHomeCluster mmadd oldDiskUsage)
rc=$?
if [[ $rc -ne 0 ]]
then
  # If an error was found, the validate routine issued a message.
  # We will now print the entire descriptor to help the guy some more.
  printErrorMsg 386 $mmcmd "$mmDiskDesc"
  cleanupAndExit
fi

# If the descriptor seems to be OK, parse the output
# from the validateAndConvertNsdDescriptor routine.
set -f ; set -- $validateDescriptorOutput ; set +f
updatedDiskLine="$1"
tsDiskDesc="$2"

# Add the SG_DISKS line for the replacement disk to the mmsdrfs file.
print -- $updatedDiskLine >> $newsdrfs
checkForErrors "writing to file $newsdrfs" $?

# Make a copy of the current mmsdrfs file.  It may be needed
# to restore the mmsdrfs file if the tsrpldisk command fails.
$cp $mmsdrfsFile $oldsdrfs
checkForErrors cp $?


#############################################################################
# Put the new mmsdrfs file in the sdr.  This will allow the getEFOptions
# call that tsrpldisk is going to make shortly to return a list of disks
# that does not include the disk that is being replaced.  This commit also
# serves as the "pre-commit" that indicates an mmrpldisk is in progress.
# The commit is done only if there is more than one disk in the file system.
#############################################################################
trap "" HUP INT QUIT KILL    # Disable interrupts until the commit is done.
if [[ $singleDiskFileSystem = no ]]
then
  # Make sure the file is properly sorted.
  LC_ALL=C $SORT_MMSDRFS $newsdrfs -o $newsdrfs

  gpfsObjectInfo=$(commitChanges $fsHomeCluster $nsId  \
                     $gpfsObjectInfo $newGenNumber $newsdrfs $primaryServer)
  rc=$?
  if [[ $rc -ne 0 ]]
  then
    # We were unable to replace the file in the sdr.
    printErrorMsg 381 $mmcmd
    cleanupAndExit
  fi
else
  # You cannot use mmrpldisk to replace the only disk in a file system.
  printErrorMsg 360 $mmcmd $olddisk $device
  cleanupAndExit
fi
trap posttrap HUP INT QUIT KILL


################################
# Invoke the tsrpldisk command.
################################
# Issue "Replacing disk . . ." message.
printInfoMsg 96 $olddisk

$mmcommon onactive  \
    $preferredNode $nodefile $NO_FILE_COPY $NO_MOUNT_CHECK NULL $NO_LINK  \
    tsrpldisk "$fqDeviceName -d $olddisk -n $tsDiskDesc $vflag $Nflag"
rc=$?
if [[ $rc -ne 0 ]]
then
  # tsrpldisk failed.
  printErrorMsg 104 $mmcmd tsrpldisk

  # Was the GPFS daemon up anywhere for tsrpldisk to run?
  if [[ $rc -eq $MM_DaemonDown ]]
  then
    # tsrpldisk was never attempted because the daemon was not up anywhere.
    # Make a modified version of the original sdrfs file with the generation
    # number in the global header incremented by 2 but no other changes.
    $rm -f $newsdrfs
    newGenNumber=$($awk -F:  '                                                 \
      BEGIN { gen = 0 }                                                        \
      # If this is the global header line, increment the gen number.           \
      /^'$GLOBAL_ID:$VERSION_LINE:'/ {                                         \
        { gen = $'$SDRFS_GENNUM_Field' + 2 }                                   \
        { $'$SDRFS_GENNUM_Field' = gen }                                       \
        { print  $1":" $2":" $3":" $4":" $5":" $6":" $7":" $8":" $9":"$10":"   \
                $11":"$12":"$13":"$14":"$15":"$16":"$17":"$18":"$19":"$20":"   \
                $21":"$22":"$23":"$24":"$25":"$26":"$27":" >> "'$newsdrfs'" }  \
        { next }                                                               \
      }                                                                        \
      # All other lines are printed without change.                            \
      { print $0 >> "'$newsdrfs'" }                                            \
      END { print gen }                                                        \
    ' $oldsdrfs)
    checkForErrors awk $?
  else
    # Since tsrpldisk was attempted but failed, reconcile the
    # sdrfs file with the GPFS daemon's view of the filesystem.
    reconcileSdrfsWithDaemon $deviceName $newsdrfs
    rc2=$?
    if [[ $rc2 -ne 0 ]]
    then
      # reconcileSdrfsWithDaemon failed.
      printErrorMsg 171 $mmcmd reconcileSdrfsWithDaemon $rc2
      # Tell the user to run mmcommon recoverfs against the filesystem.
      printErrorMsg 190 $mmcmd $deviceName $deviceName
      cleanupAndExit $rc
    fi
    # Obtain the generation number from the version line of the new sdrfs file.
    versionLine=$($head -1 $newsdrfs)
    IFS=':'
    set -f ; set -- $versionLine ; set +f
    newGenNumber=$6
    IFS="$IFS_sv"
  fi  # end of if [[ $rc -eq $MM_DaemonDown ]]

  # Commit the modified version of the sdrfs file to the server.
  trap "" HUP INT QUIT KILL    # Disable interrupts until the commit is done.
  gpfsObjectInfo=$(commitChanges  \
    $fsHomeCluster $nsId $gpfsObjectInfo $newGenNumber $newsdrfs $primaryServer)
  if [[ $? -ne 0 ]]
  then
    # We were unable to replace the file in the sdr.
    printErrorMsg 381 $mmcmd
    # Tell the user to run mmcommon recoverfs against the filesystem.
    printErrorMsg 190 $mmcmd $deviceName $deviceName
  fi

  # Unlock the sdr.
  [[ $sdrLocked = yes ]] &&  \
    freeLockOnServer $primaryServer $ourNodeNumber >/dev/null
  sdrLocked=no
  trap posttrap HUP INT QUIT KILL

  # Propagate the new mmsdrfs file.  This process is asynchronous.
  propagateSdrfsFile async $nodefile $newsdrfs $newGenNumber

  cleanupAndExit $rc
fi


###################################################################
# If here, tsrpldisk was successful.
# Invoke routine to change the SG_DISKS lines with a disk status
# of mmadd and mmdel to a regular (null) disk status and then
# commit the new mmsdrfs file.  This makes the new disks visible
# to future mm commands and to the mmcommon getEFOptions function.
###################################################################
# We finished replacing the disk.
printInfoMsg 99

# Reset the status fields of the two involved disks (replacer/replacee).
resetDiskStatus $deviceName $newsdrfs mmadd mmdel
checkForErrors updateDiskStatus $?

# Obtain the generation number from the version line of the new sdrfs file.
versionLine=$($head -1 $newsdrfs)
IFS=':'
set -f ; set -- $versionLine ; set +f
newGenNumber=$6
IFS="$IFS_sv"

# Commit the new mmsdrfs file.
trap "" HUP INT QUIT KILL    # Disable interrupts until the commit is done.
gpfsObjectInfo=$(commitChanges  \
   $fsHomeCluster $nsId $gpfsObjectInfo $newGenNumber $newsdrfs $primaryServer)
rc=$?
if [[ $rc -ne 0 ]]
then
  # We were unable to replace the file in the sdr.
  printErrorMsg 381 $mmcmd
  # Tell the user to run mmcommon recoverfs against the filesystem.
  printErrorMsg 190 $mmcmd $deviceName $deviceName
  cleanupAndExit
fi


###################################################
# Unlock the sdr.
###################################################
[[ $sdrLocked = yes ]] &&  \
  freeLockOnServer $primaryServer $ourNodeNumber >/dev/null
sdrLocked=no
trap posttrap HUP INT QUIT KILL


######################################################################
# Tell the daemon to invalidate its currently-cached mount options.
######################################################################
$mmcommon onactive $preferredNode $allQuorumNodes  \
                   $NO_FILE_COPY $NO_MOUNT_CHECK NULL $NO_LINK  \
                   tsctl resetEFOptions $fqDeviceName > $errMsg 2>&1
rc=$?
[[ $rc = $MM_DaemonDown ]] && rc=0
[[ $rc -ne 0 && -s $errMsg ]] && cat $errMsg 2>&1
$rm -f $errMsg


#################################################################
# Propagate the new mmsdrfs file.  This process is asynchronous.
#################################################################
propagateSdrfsFile async $nodefile $newsdrfs $newGenNumber


###################################################
# If installed, invoke the syncfsconfig user exit.
###################################################
if [[ -x $syncfsconfig ]]
then
   print -- "$mmcmd:  Starting $syncfsconfig ..."
   $syncfsconfig
   print -- "$mmcmd:  $syncfsconfig finished."
fi


#####################################################################
# If an "odd state" flag was encountered for any other file systems
# in the mmsdrfs file, tell the user to issue commands to reconcile
# the mmsdrfs file with the GPFS daemon's view of the filesystems.
#####################################################################
for fsname in $fsOddStateList
do
  # Tell the user to run mmcommon recoverfs against the file system.
  printErrorMsg 103 $mmcmd $fsname $fsname
done

cleanupAndExit 0

