| 1 | #!/bin/ksh | 
|---|
| 2 | # IBM_PROLOG_BEGIN_TAG | 
|---|
| 3 | # This is an automatically generated prolog. | 
|---|
| 4 | # | 
|---|
| 5 | # | 
|---|
| 6 | # | 
|---|
| 7 | # Licensed Materials - Property of IBM | 
|---|
| 8 | # | 
|---|
| 9 | # (C) COPYRIGHT International Business Machines Corp. 2003,2005 | 
|---|
| 10 | # All Rights Reserved | 
|---|
| 11 | # | 
|---|
| 12 | # US Government Users Restricted Rights - Use, duplication or | 
|---|
| 13 | # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. | 
|---|
| 14 | # | 
|---|
| 15 | # IBM_PROLOG_END_TAG | 
|---|
| 16 | # @(#)55 1.18 src/avs/fs/mmfs/ts/admin/mmexectsmcmd.sh, mmfs, avs_rgpfs24, rgpfs240610b 2/11/05 11:32:21 | 
|---|
| 17 | ############################################################################## | 
|---|
| 18 | # | 
|---|
| 19 | # This script issues TSM commands from a GPFS node that is a TSM client. | 
|---|
| 20 | # | 
|---|
| 21 | # It accepts the following arguments: | 
|---|
| 22 | #   1) The mount point of the pertinent file system | 
|---|
| 23 | #   2) The TSM operation to be performed; supported opperations are: | 
|---|
| 24 | #        - selective | 
|---|
| 25 | #        - incremental | 
|---|
| 26 | #        - expire | 
|---|
| 27 | #        - restore     (no longer used since tsrestorefile was dropped) | 
|---|
| 28 | #   3) A qualifier for the fourth parameter: | 
|---|
| 29 | #        - filelist    This value indicates that the fourth parameter | 
|---|
| 30 | #                        is the value for the Tivoli filelist=<> option. | 
|---|
| 31 | #        - nofilelist  This value indicates that the fourth parameter | 
|---|
| 32 | #                        is not to be used as a Tivoli filelist parameter. | 
|---|
| 33 | #                        In this case, the Tivoli filelist option is not used. | 
|---|
| 34 | #   4) If the 3rd parameter was | 
|---|
| 35 | #        - filelist    then the 4th parameter is the full path name of the | 
|---|
| 36 | #                        file containing the filenames to be operated on. | 
|---|
| 37 | #        - nofilelist  then the 4th parameter is another parameter to be | 
|---|
| 38 | #                        passed to the Tivoli command. | 
|---|
| 39 | #   5) The nodename of the master tsbackup process   (used for locking) | 
|---|
| 40 | #   6) The pid number of the master tsbackup process (used for locking) | 
|---|
| 41 | #   7) The backup client process index               (used for file naming) | 
|---|
| 42 | #   8) The name of the TSM server            (checked against dsm.opt file) | 
|---|
| 43 | #   9) The I/O rate level (used to allow other non-backup processes to run) | 
|---|
| 44 | # | 
|---|
| 45 | # The backup client process index is used for constructing the name of | 
|---|
| 46 | # the file for pending (not successfully completed) transactions. | 
|---|
| 47 | # | 
|---|
| 48 | # The program returns: | 
|---|
| 49 | #   0 on success (i.e., all files were succesfully backed up) | 
|---|
| 50 | #   1 on partial success (i.e., some but not all files were backed up) | 
|---|
| 51 | #   2 on failure (i.e., no success at all) | 
|---|
| 52 | # | 
|---|
| 53 | ############################################################################## | 
|---|
| 54 |  | 
|---|
| 55 | # Include global declarations and service routines | 
|---|
| 56 | . /usr/lpp/mmfs/bin/mmglobfuncs | 
|---|
| 57 | . /usr/lpp/mmfs/bin/mmsdrfsdef | 
|---|
| 58 | . /usr/lpp/mmfs/bin/mmfsfuncs | 
|---|
| 59 |  | 
|---|
| 60 | sourceFile="mmexectsmcmd.sh" | 
|---|
| 61 | [[ -n $DEBUG || -n $DEBUGmmexectsmcmd ]] && set -x | 
|---|
| 62 | $mmTRACE_ENTER "$*" | 
|---|
| 63 |  | 
|---|
| 64 |  | 
|---|
| 65 | #------------------------------------------------- | 
|---|
| 66 | # Local work files.  Names should be of the form: | 
|---|
| 67 | #   fn=${tmpDir}fn.${mmcmd}.$$ | 
|---|
| 68 | #------------------------------------------------- | 
|---|
| 69 |  | 
|---|
| 70 | #tmpCtrlFile=${tmpDir}tmpCtrlFile.mmbackup.$$ | 
|---|
| 71 | #LOCAL_FILES=" $tmpCtrlFile " | 
|---|
| 72 |  | 
|---|
| 73 |  | 
|---|
| 74 | #------------------ | 
|---|
| 75 | # Global variables | 
|---|
| 76 | #------------------ | 
|---|
| 77 |  | 
|---|
| 78 | dateTime=$($date +"%y%m%d_%H:%M:%S") | 
|---|
| 79 |  | 
|---|
| 80 | #tsmDate=$(date '+%m/%d/%y') | 
|---|
| 81 | #tsmTime=$(date '+%H:%M:%S') | 
|---|
| 82 |  | 
|---|
| 83 | dsmc=/usr/bin/dsmc | 
|---|
| 84 |  | 
|---|
| 85 | if [[ -n $DSM_CONFIG ]] | 
|---|
| 86 | then | 
|---|
| 87 | dsmoptfile=$DSM_CONFIG | 
|---|
| 88 | elif [[ $osName = AIX ]] | 
|---|
| 89 | then | 
|---|
| 90 | dsmoptfile=/usr/tivoli/tsm/client/ba/bin/dsm.opt | 
|---|
| 91 | elif [[ $osName = Linux ]] | 
|---|
| 92 | then | 
|---|
| 93 | dsmoptfile=/opt/tivoli/tsm/client/ba/bin/dsm.opt | 
|---|
| 94 | else | 
|---|
| 95 | print -u2  " Unknown operating system $osName " | 
|---|
| 96 | exit 1 | 
|---|
| 97 | fi | 
|---|
| 98 |  | 
|---|
| 99 | searchErrorString="No files matching search criteria" | 
|---|
| 100 | objectErrorString="No objects on server match" | 
|---|
| 101 | errorProcessingString="Error processing" | 
|---|
| 102 |  | 
|---|
| 103 | opf=Operation_Failure | 
|---|
| 104 | opps=Operation_Partial_Success | 
|---|
| 105 | ops=Operation_Success | 
|---|
| 106 |  | 
|---|
| 107 | rc_success=0 | 
|---|
| 108 | rc_psuccess=1 | 
|---|
| 109 | rc_fail=2 | 
|---|
| 110 |  | 
|---|
| 111 | tpl=".mmbuTSMPendingTransactions" | 
|---|
| 112 |  | 
|---|
| 113 | lockdir="/var/mmfs/etc/mmbackuplock" | 
|---|
| 114 |  | 
|---|
| 115 | pgm="mmexectsmcmd" | 
|---|
| 116 |  | 
|---|
| 117 |  | 
|---|
| 118 | #----------------- | 
|---|
| 119 | # local routines | 
|---|
| 120 | #----------------- | 
|---|
| 121 |  | 
|---|
| 122 |  | 
|---|
| 123 | #-------------------------------------------------------------- | 
|---|
| 124 | # This function is called if there is an interrupt before | 
|---|
| 125 | # we have obtained a backup lock. | 
|---|
| 126 | #-------------------------------------------------------------- | 
|---|
| 127 | function localTrap | 
|---|
| 128 | { | 
|---|
| 129 | doCleanupAndExit $rc_fail | 
|---|
| 130 | } | 
|---|
| 131 |  | 
|---|
| 132 |  | 
|---|
| 133 | #-------------------------------------------------------------- | 
|---|
| 134 | # This function is called if there is an interrupt after | 
|---|
| 135 | # we have obtained a backup lock. | 
|---|
| 136 | #-------------------------------------------------------------- | 
|---|
| 137 | function localTrap2 | 
|---|
| 138 | { | 
|---|
| 139 | freeLockAndExit $rc_fail | 
|---|
| 140 | } | 
|---|
| 141 |  | 
|---|
| 142 |  | 
|---|
| 143 | #-------------------------------------------------------------- | 
|---|
| 144 | # getBackupLock | 
|---|
| 145 | # | 
|---|
| 146 | # Obtain a lock before proceeding. | 
|---|
| 147 | # | 
|---|
| 148 | # Here is the mechanism by which locks are managed and used: | 
|---|
| 149 | # | 
|---|
| 150 | #   1) Begin to obtain a lock by issuing:  mkdir lockdir | 
|---|
| 151 | #   2) If (1) succeeds, no one else has a lock.  Finish obtaining your | 
|---|
| 152 | #      lock by issuing mkdirs for lockdir/masternode_masterpid and | 
|---|
| 153 | #      lockdir/masternode_masterpid/mylocalpid.  You now have a valid lock. | 
|---|
| 154 | #   3) If (1) fails, perhaps one of your local sibling processes already | 
|---|
| 155 | #      has created lockdir and lockdir/masternode_masterpid (as well as | 
|---|
| 156 | #      lockdir/masternode_masterpid/hislocalpid for his own use).  Try to | 
|---|
| 157 | #      obtain a lock by doing mkdir lockdir/masternode_masterpid/mylocalpid. | 
|---|
| 158 | #      This will fail if lockdir/masternode_masterpid does not exist and | 
|---|
| 159 | #      succeed if it does.  There is a small chance that there is a sibling | 
|---|
| 160 | #      process that is obtaining a lock and has created lockdir but has | 
|---|
| 161 | #      not yet created lockdir/masternode_masterpid.  This could be solved | 
|---|
| 162 | #      by trying the mkdir for lockdir/masternode_masterpid/mylocalpid a | 
|---|
| 163 | #      second time. If a mkdir for lockdir/masternode_masterpid/mylocalpid | 
|---|
| 164 | #      succeeds, you have a valid lock. | 
|---|
| 165 | #   4) If a valid lock is not obtained, exit with a msg to the user stating | 
|---|
| 166 | #      who has the lock, try again later, and how to free the lock if the | 
|---|
| 167 | #      process that had the lock no longer exists. | 
|---|
| 168 | #   5) If a valid lock is obtained, proceed to do the requested work. | 
|---|
| 169 | #   6) When the requested work is done, free your lock by doing | 
|---|
| 170 | #        rmdir lockdir/masternode_masterpid/mylocalpid | 
|---|
| 171 | #        rmdir lockdir/masternode_masterpid | 
|---|
| 172 | #        rmdir lockdir | 
|---|
| 173 | #      The latter two rmdirs will fail if any of sibling processes are still | 
|---|
| 174 | #      holding locks, since these directories will then not be empty. | 
|---|
| 175 | #      Care must be taken to always free locks before exiting. | 
|---|
| 176 | # | 
|---|
| 177 | #-------------------------------------------------------------- | 
|---|
| 178 | function getBackupLock | 
|---|
| 179 | { | 
|---|
| 180 | # Enable debug output. | 
|---|
| 181 | typeset sourceFile="mmexectsmcmd.sh" | 
|---|
| 182 | [[ -n $DEBUG || -n $DEBUGgetBackupLock ]] && set -x | 
|---|
| 183 | $mmTRACE_ENTER "$*" | 
|---|
| 184 |  | 
|---|
| 185 | # Try to create the base lock directory. | 
|---|
| 186 | $mkdir $lockdir > /dev/null 2>&1 | 
|---|
| 187 | if [[ $? = 0 ]] | 
|---|
| 188 | then | 
|---|
| 189 | # The lock was not in use by anyone.  Finish creating our lock. | 
|---|
| 190 | $mkdir $lockdir/$masterNode"_"$masterPid > /dev/null 2>&1 | 
|---|
| 191 | $mkdir $lockdir/$masterNode"_"$masterPid/$$ > /dev/null 2>&1 | 
|---|
| 192 | if [[ $? != 0 ]] | 
|---|
| 193 | then | 
|---|
| 194 | print "$opf: $sn: Unexpected error creating a lock.  Try again.  If the problem persists, contact IBM service." | 
|---|
| 195 | freeLockAndExit $rc_fail | 
|---|
| 196 | fi | 
|---|
| 197 | else | 
|---|
| 198 | # At least one other process has a lock.  Try to obtain a lock for | 
|---|
| 199 | # our own use by issuing a mkdir.  If the other processes with locks | 
|---|
| 200 | # were created by the same master process as our process was, | 
|---|
| 201 | # the mkdir will succeed. | 
|---|
| 202 | $mkdir $lockdir/$masterNode"_"$masterPid/$$ > /dev/null 2>&1 | 
|---|
| 203 | if [[ $? != 0 ]] | 
|---|
| 204 | then | 
|---|
| 205 | # Retry the mkdir just in case the first one failed due to timing. | 
|---|
| 206 | $mkdir $lockdir/$masterNode"_"$masterPid/$$ > /dev/null 2>&1 | 
|---|
| 207 | if [[ $? != 0 ]] | 
|---|
| 208 | then | 
|---|
| 209 | lsOutput=$($ls $lockdir) | 
|---|
| 210 |  | 
|---|
| 211 | IFS_sv=$IFS | 
|---|
| 212 | IFS="_" | 
|---|
| 213 | set -f ; set -- $lsOutput ; set +f | 
|---|
| 214 | lockhldrNode=$1 | 
|---|
| 215 | lockhldrPid=$2 | 
|---|
| 216 | IFS="$IFS_sv" | 
|---|
| 217 |  | 
|---|
| 218 | # We have the pid and hostname of the process holding the lock. | 
|---|
| 219 | # Determine whether the process is still running. | 
|---|
| 220 | if [[ $lockhldrNode = $ourNodeName ]] | 
|---|
| 221 | then | 
|---|
| 222 | pidCheckAll=$($mmremote pid $lockhldrPid) | 
|---|
| 223 | else | 
|---|
| 224 | pidCheckAll=$($mmcommon on1 $lockhldrNode pid $lockhldrPid) | 
|---|
| 225 | rc=$? | 
|---|
| 226 | fi | 
|---|
| 227 |  | 
|---|
| 228 | pidCheck=$(print "$pidCheckAll" | $egrep "^(died|alive)$") | 
|---|
| 229 | if [[ $pidCheck != died ]] | 
|---|
| 230 | then | 
|---|
| 231 | $mmTRACE "$1 lock held by $lockhldrNode $lockhldrPid" | 
|---|
| 232 | # The process that has the lock is still alive, or we could | 
|---|
| 233 | # not determine its status.  Either way, give up. | 
|---|
| 234 | if [[ $pidCheck = alive ]] | 
|---|
| 235 | then | 
|---|
| 236 | print "$opf: $sn:\nFailed to obtain GPFS backup lock.  Process $lockhldrPid on node $lockhldrNode has it.\nIf process $lockhldrPid is no longer running on $lockhldrNode, clear the lock\nby issuing:  \"rm -rf $lockdir\"  on all GPFS nodes.  Otherwise,\ntry the GPFS backup again when process $lockhldrPid on $lockhldrNode is done.\n" | 
|---|
| 237 | else | 
|---|
| 238 | print "$opf: $sn:\nFailed to obtain GPFS backup lock.  Unable to reach the holder\nof the lock, which is process $lockhldrPid on node $lockhldrNode.\nIf process $lockhldrPid is no longer running on $lockhldrNode, clear the lock\nby issuing:  \"rm -rf $lockdir\"  on all GPFS nodes.  Otherwise,\ntry the GPFS backup again when process $lockhldrPid on $lockhldrNode is done.\n" | 
|---|
| 239 | fi | 
|---|
| 240 | doCleanupAndExit $rc_fail | 
|---|
| 241 | else | 
|---|
| 242 | # The process that had the lock somehow went away without unlocking. | 
|---|
| 243 | # Remove the old lock and establish a new one for the caller. | 
|---|
| 244 | $rm -rf $lockdir > /dev/null 2>&1 | 
|---|
| 245 | $mkdir $lockdir > /dev/null 2>&1 | 
|---|
| 246 | if [[ $? = 0 ]] | 
|---|
| 247 | then | 
|---|
| 248 | # Success!  Complete the work of establishing the lock. | 
|---|
| 249 | $mkdir $lockdir/$masterNode"_"$masterPid > /dev/null 2>&1 | 
|---|
| 250 | $mkdir $lockdir/$masterNode"_"$masterPid/$$ > /dev/null 2>&1 | 
|---|
| 251 | else | 
|---|
| 252 | # Failed again!  Give up. | 
|---|
| 253 | print "$opf: $sn:\nFailed to obtain GPFS backup lock even after clearing the lock.\n" | 
|---|
| 254 | doCleanupAndExit $rc_fail | 
|---|
| 255 | fi | 
|---|
| 256 | fi  # end of if [[ $pidCheck != died ]] | 
|---|
| 257 | fi | 
|---|
| 258 | fi | 
|---|
| 259 | fi | 
|---|
| 260 |  | 
|---|
| 261 | }  #------ end of function getBackupLock ----------------- | 
|---|
| 262 |  | 
|---|
| 263 |  | 
|---|
| 264 | #-------------------------------------------------------------- | 
|---|
| 265 | # freeBackupLock | 
|---|
| 266 | #-------------------------------------------------------------- | 
|---|
| 267 | function freeBackupLock | 
|---|
| 268 | { | 
|---|
| 269 | # Enable debug output. | 
|---|
| 270 | typeset sourceFile="mmexectsmcmd.sh" | 
|---|
| 271 | [[ -n $DEBUG || -n $DEBUGfreeBackupLock ]] && set -x | 
|---|
| 272 | $mmTRACE_ENTER "$*" | 
|---|
| 273 |  | 
|---|
| 274 | $rmdir $lockdir/$masterNode"_"$masterPid/$$ > /dev/null 2>&1 | 
|---|
| 275 | $rmdir $lockdir/$masterNode"_"$masterPid > /dev/null 2>&1 | 
|---|
| 276 | $rmdir $lockdir > /dev/null 2>&1 | 
|---|
| 277 |  | 
|---|
| 278 | }  #------ end of function freeBackupLock ---------------- | 
|---|
| 279 |  | 
|---|
| 280 |  | 
|---|
| 281 | #-------------------------------------------------------------- | 
|---|
| 282 | # | 
|---|
| 283 | # freeLockAndExit | 
|---|
| 284 | # | 
|---|
| 285 | # Function:  Free the backup lock and then exit via the | 
|---|
| 286 | #            doCleanupAndExit routine. | 
|---|
| 287 | # | 
|---|
| 288 | # Input:     $1 - return code with which to exit | 
|---|
| 289 | # | 
|---|
| 290 | #-------------------------------------------------------------- | 
|---|
| 291 | function freeLockAndExit | 
|---|
| 292 | { | 
|---|
| 293 | # Enable debug output. | 
|---|
| 294 | typeset sourceFile="mmexectsmcmd.sh" | 
|---|
| 295 | [[ -n $DEBUG || -n $DEBUGfreeLockAndExit ]] && set -x | 
|---|
| 296 | $mmTRACE_ENTER "$*" | 
|---|
| 297 |  | 
|---|
| 298 | rc=$1 | 
|---|
| 299 |  | 
|---|
| 300 | freeBackupLock | 
|---|
| 301 |  | 
|---|
| 302 | doCleanupAndExit $rc | 
|---|
| 303 |  | 
|---|
| 304 | }  #------ end of function freeLockAndExit --------------- | 
|---|
| 305 |  | 
|---|
| 306 |  | 
|---|
| 307 | #-------------------------------------------------------------- | 
|---|
| 308 | # | 
|---|
| 309 | # doCleanupAndExit | 
|---|
| 310 | # | 
|---|
| 311 | # Function:  Perform cleanup unique to mmexectsmcmd, and then | 
|---|
| 312 | #            exit via the standard cleanupAndExit routine. | 
|---|
| 313 | # | 
|---|
| 314 | # Input:     $1 - return code with which to exit | 
|---|
| 315 | # | 
|---|
| 316 | #-------------------------------------------------------------- | 
|---|
| 317 | function doCleanupAndExit | 
|---|
| 318 | { | 
|---|
| 319 | # Enable debug output. | 
|---|
| 320 | typeset sourceFile="mmexectsmcmd.sh" | 
|---|
| 321 | [[ -n $DEBUG || -n $DEBUGdoCleanupAndExit ]] && set -x | 
|---|
| 322 | $mmTRACE_ENTER "$*" | 
|---|
| 323 |  | 
|---|
| 324 | rc=$1 | 
|---|
| 325 |  | 
|---|
| 326 | # If the TSM log file does not exist and have a size greater than 0, | 
|---|
| 327 | # remove the log directory. | 
|---|
| 328 | [[ ! -s $logfile ]] && $rm -rf $logdir > /dev/null 2>&1 | 
|---|
| 329 |  | 
|---|
| 330 | cleanupAndExit $rc | 
|---|
| 331 |  | 
|---|
| 332 | }  #------ end of function doCleanupAndExit --------------- | 
|---|
| 333 |  | 
|---|
| 334 |  | 
|---|
| 335 |  | 
|---|
| 336 | #------------------------ | 
|---|
| 337 | # Start main processing. | 
|---|
| 338 | #------------------------ | 
|---|
| 339 |  | 
|---|
| 340 | # Set local trap routine. | 
|---|
| 341 | trap localTrap HUP INT QUIT KILL | 
|---|
| 342 |  | 
|---|
| 343 | typeset -l keyword_lc   # variable for storing keyword in lower case | 
|---|
| 344 |  | 
|---|
| 345 | # Verify that the correct number of parameters were passed. | 
|---|
| 346 | if [[ $argc -ne 9 && $argc -ne 2 ]] | 
|---|
| 347 | then | 
|---|
| 348 | print "$opf: $sn: An incorrect number of parameters was passed." | 
|---|
| 349 | print "Usage:\n  $pgm <FSname> <tsmOperation> <tsmOperationOption> <filelistname> <invokingNode> <invokingPid> <clientIndex> <tsmServer> <ioRateValue>" | 
|---|
| 350 | doCleanupAndExit $rc_fail | 
|---|
| 351 | fi | 
|---|
| 352 |  | 
|---|
| 353 | [[ -z $MMMODE || -z $primaryServer ]] && \ | 
|---|
| 354 | determineMode | 
|---|
| 355 | getLocalNodeData | 
|---|
| 356 | sn=$ourNodeName | 
|---|
| 357 |  | 
|---|
| 358 | # | 
|---|
| 359 | # Assign the input parameters as follows: | 
|---|
| 360 | # | 
|---|
| 361 | #   arg1 - "givestatus" | 
|---|
| 362 | #   arg2 - number of seconds to sleep between messages | 
|---|
| 363 | # | 
|---|
| 364 | #    OR | 
|---|
| 365 | # | 
|---|
| 366 | #   arg1 - mountpoint of filesystem to be operated on | 
|---|
| 367 | #   arg2 - TSM command (selective, incremental, expire, or restore) | 
|---|
| 368 | #   arg3 - TSM command option (filelist or nofilelist) | 
|---|
| 369 | #   arg4 - name of the filelist file (if arg3 was filelist) | 
|---|
| 370 | #            or | 
|---|
| 371 | #          some other TSM command parameter (if arg3 was nofilelist) | 
|---|
| 372 | #   arg5 - name of the invoking node (used for obtaining a backup lock) | 
|---|
| 373 | #   arg6 - pid of the invoking process (used for obtaining a backup lock) | 
|---|
| 374 | #   arg7 - process index (used for creating directory in which, if necessary, | 
|---|
| 375 | #                           Tivoli will store a dsmerror.log file) | 
|---|
| 376 | #   arg8 - name of the TSM server (used for checking against the dsm.opt file) | 
|---|
| 377 | #   arg9 - I/O rate value | 
|---|
| 378 | # | 
|---|
| 379 |  | 
|---|
| 380 | if [[ $arg1 = "givestatus" ]] | 
|---|
| 381 | then | 
|---|
| 382 | integer nSeconds=${arg2} | 
|---|
| 383 | while true | 
|---|
| 384 | do | 
|---|
| 385 | sleep $nSeconds | 
|---|
| 386 | printInfoMsg 527 mmbackup | 
|---|
| 387 | done  # end while true | 
|---|
| 388 | fi | 
|---|
| 389 |  | 
|---|
| 390 | mountPoint=$arg1 | 
|---|
| 391 |  | 
|---|
| 392 | tsmCommand=$arg2 | 
|---|
| 393 |  | 
|---|
| 394 | tsmCommandOption=$arg3 | 
|---|
| 395 |  | 
|---|
| 396 | if [[ $tsmCommandOption = filelist ]] | 
|---|
| 397 | then | 
|---|
| 398 | filelistName=$arg4 | 
|---|
| 399 | if [[ ! -f $filelistName || ! -r $filelistName ]] | 
|---|
| 400 | then | 
|---|
| 401 | # The filelist file does not exist or is not readable. | 
|---|
| 402 | # Issue an error message and fail the command. | 
|---|
| 403 | print "$opf: $sn: Cannot open $filelistName.  Make sure filesystem is mounted on node." | 
|---|
| 404 | doCleanupAndExit $rc_fail | 
|---|
| 405 | fi | 
|---|
| 406 | elif [[ $tsmCommandOption = nofilelist ]] | 
|---|
| 407 | then | 
|---|
| 408 | tsmCommandParm=$arg4 | 
|---|
| 409 | else | 
|---|
| 410 | print "$opf: $sn: Invalid parameter: $tsmCommandOption" | 
|---|
| 411 | doCleanupAndExit $rc_fail | 
|---|
| 412 | fi | 
|---|
| 413 |  | 
|---|
| 414 | masterNode=$arg5 | 
|---|
| 415 |  | 
|---|
| 416 | masterPid=$arg6 | 
|---|
| 417 |  | 
|---|
| 418 | processIndex=$arg7 | 
|---|
| 419 |  | 
|---|
| 420 | tsmServer=$arg8 | 
|---|
| 421 |  | 
|---|
| 422 | ioRateValue=$arg9 | 
|---|
| 423 |  | 
|---|
| 424 |  | 
|---|
| 425 | # Set and export the DSM_LOG environment variable to tell TSM | 
|---|
| 426 | # where to put any error log it generates. | 
|---|
| 427 | logdir=$mmbackupDir"$mountPoint"_"$dateTime"_"$processIndex" | 
|---|
| 428 | logfile=$logdir/dsmerror.log | 
|---|
| 429 | export DSM_LOG=$logdir | 
|---|
| 430 |  | 
|---|
| 431 | # Create the directory for the TSM error log. | 
|---|
| 432 | $mkdir -p $logdir > /dev/null 2>&1 | 
|---|
| 433 |  | 
|---|
| 434 | # Add a slash to the end of the mount point variable for passing to TSM later. | 
|---|
| 435 | mountPoint=$mountPoint/ | 
|---|
| 436 |  | 
|---|
| 437 | # Call routine to obtain a lock before proceeding. | 
|---|
| 438 | # If a lock cannot be obtained, getBackupLock will exit with a failure rc. | 
|---|
| 439 | getBackupLock | 
|---|
| 440 |  | 
|---|
| 441 | # At this point we have obtained a lock. | 
|---|
| 442 | # Now that we have obtained a backup lock, change the trap routine | 
|---|
| 443 | # to one that frees the lock before exiting. | 
|---|
| 444 | trap localTrap2 HUP INT QUIT KILL | 
|---|
| 445 |  | 
|---|
| 446 | # Check that the TSM server passed on the command matches the one | 
|---|
| 447 | # specified in the /usr/tivoli/tsm/client/ba/bin/dsm.opt file. | 
|---|
| 448 | # If it doesn't, issue an error message and exit with a failing rc. | 
|---|
| 449 | grepOutput=$($grep -i servername $dsmoptfile | $grep -v '*') | 
|---|
| 450 | set -f ; set -- $grepOutput ; set +f | 
|---|
| 451 | dsmoptTSMserverKeyword=$1 | 
|---|
| 452 | dsmoptTSMserver=$2 | 
|---|
| 453 |  | 
|---|
| 454 | keyword_lc=$dsmoptTSMserverKeyword    # Convert keyword to all lower case. | 
|---|
| 455 | if [[ $keyword_lc != servername ]] | 
|---|
| 456 | then | 
|---|
| 457 | print "$opf: $sn: unable to find TSM server name in dsm.opt file" | 
|---|
| 458 | freeLockAndExit $rc_fail | 
|---|
| 459 | fi | 
|---|
| 460 |  | 
|---|
| 461 | if [[ $tsmServer != $dsmoptTSMserver ]] | 
|---|
| 462 | then | 
|---|
| 463 | print "$opf: $sn: specified TSM server does not match TSM server in dsm.opt file" | 
|---|
| 464 | freeLockAndExit $rc_fail | 
|---|
| 465 | fi | 
|---|
| 466 |  | 
|---|
| 467 | # Depending on the I/O rate value, take a breather | 
|---|
| 468 | # to allow other non-backup processes to run. | 
|---|
| 469 | # The scheme implemented here, wait a fixed number of | 
|---|
| 470 | # seconds controlled by the passed value, is a very | 
|---|
| 471 | # rough beginning.  A better scheme would be to calculate | 
|---|
| 472 | # the time used doing backup work, and then sleep a | 
|---|
| 473 | # fraction of that time based on the I/O rate value. | 
|---|
| 474 | # (An I/O rate value of 100 would mean don't sleep at all, | 
|---|
| 475 | # a value of 75 would mean sleep for 1/3 of the time | 
|---|
| 476 | # spent doing backup, a value of 50 would mean sleep an | 
|---|
| 477 | # amount of time equal to the time spent on backup, | 
|---|
| 478 | # and so on.) | 
|---|
| 479 | integer numberOfSeconds=100-${ioRateValue} | 
|---|
| 480 | sleep $numberOfSeconds | 
|---|
| 481 |  | 
|---|
| 482 |  | 
|---|
| 483 | # We now have obtained a backup lock, verified that the TSM server is | 
|---|
| 484 | # correct, and idled away some time if warranted by the I/O rate value. | 
|---|
| 485 | # Proceed to process Tivoli Storage Manager commands. | 
|---|
| 486 |  | 
|---|
| 487 | # TSM selective or incremental command processing | 
|---|
| 488 | # | 
|---|
| 489 | if [[ $tsmCommand = selective || $tsmCommand = incremental ]] | 
|---|
| 490 | then | 
|---|
| 491 |  | 
|---|
| 492 | # Issue the TSM command to have the list of files backed up. | 
|---|
| 493 | # $dsmc $tsmCommand -filelist=$filelistName > /tmp/out1i 2> /tmp/out2i | 
|---|
| 494 | $dsmc $tsmCommand -filelist=$filelistName > /dev/null 2>&1 | 
|---|
| 495 | rc=$? | 
|---|
| 496 |  | 
|---|
| 497 | # The return code only indicates whether the TSM executable ran successfully. | 
|---|
| 498 | # The TSM client returns a code greater than 0 (zero) only if TSM was | 
|---|
| 499 | # unable to attempt the operations. | 
|---|
| 500 | # Only those return codes output to the error report (dsmerror.log) | 
|---|
| 501 | # indicate the success or failure of specific TSM commands. | 
|---|
| 502 | if [[ $rc -eq 0 ]] | 
|---|
| 503 | then | 
|---|
| 504 |  | 
|---|
| 505 | # Clear the error log.  It will be recreated | 
|---|
| 506 | # by the query command to be issued next. | 
|---|
| 507 | if [[ -a $logfile && -s $logfile ]] | 
|---|
| 508 | then | 
|---|
| 509 | $rm -rf $logfile > /dev/null 2>&1 | 
|---|
| 510 | rc=$? | 
|---|
| 511 | if [[ $rc -gt 0 ]] | 
|---|
| 512 | then | 
|---|
| 513 | print "$opf: $sn: cannot rm $logfile" | 
|---|
| 514 | freeLockAndExit $rc_fail | 
|---|
| 515 | fi | 
|---|
| 516 | fi | 
|---|
| 517 |  | 
|---|
| 518 | # Issue the query command to determine which files were not | 
|---|
| 519 | # backed up successfully. | 
|---|
| 520 | # | 
|---|
| 521 | # dsmc query backup -fromdate=$tsmDate -fromtime=$tsmTime | 
|---|
| 522 | #                   -filelist=$filelistName > /dev/null 2>&1 | 
|---|
| 523 | #   $dsmc query backup -filelist=$filelistName > /tmp/out1q 2> /tmp/out2q | 
|---|
| 524 | $dsmc query backup -filelist=$filelistName > /dev/null 2>&1 | 
|---|
| 525 | rc=$? | 
|---|
| 526 |  | 
|---|
| 527 | if [[ $rc -eq 0 ]] | 
|---|
| 528 | then | 
|---|
| 529 | # If we have an error log file, examine it to determine | 
|---|
| 530 | # the files which were not successfully backed up. | 
|---|
| 531 | if [[ -a $logfile && -s $logfile ]] | 
|---|
| 532 | then | 
|---|
| 533 |  | 
|---|
| 534 | # Process the error log and construct the pending transactions file. | 
|---|
| 535 | # | 
|---|
| 536 | # The following is a line from the pertinent error log file: | 
|---|
| 537 | # 03/13/02   10:37:48 ANS1092E No files matching search criteria | 
|---|
| 538 | # were found | 
|---|
| 539 | #                         or | 
|---|
| 540 | # 11/16/01   15:24:56 ANS1345E No objects on server match | 
|---|
| 541 | # '/log/michail/file_4' | 
|---|
| 542 |  | 
|---|
| 543 | # Check for matching search criteria failure. | 
|---|
| 544 | pendingBackupsList="$mountPoint$tpl"_"$processIndex" | 
|---|
| 545 | $grep "${searchErrorString}" $logfile > ${pendingBackupsList} | 
|---|
| 546 | if [[ -a $pendingBackupsList && -s $pendingBackupsList ]] | 
|---|
| 547 | then | 
|---|
| 548 | print "$opf: $sn: TSM dsmc query command indicated search criteria failure (see file $logfile)." | 
|---|
| 549 | $rm -rf $pendingBackupsList > /dev/null 2>&1 | 
|---|
| 550 | freeLockAndExit $rc_fail | 
|---|
| 551 | fi | 
|---|
| 552 |  | 
|---|
| 553 | # Check for objects which do not match, i.e., | 
|---|
| 554 | # individual files which were not backed up. | 
|---|
| 555 | $grep "${objectErrorString}" $logfile |  \ | 
|---|
| 556 | $cut -f2 -d\' >> ${pendingBackupsList} | 
|---|
| 557 | print "$opps: $sn: TSM dsmc $tsmCommand command partially succeeded (see file $logfile)." | 
|---|
| 558 | freeLockAndExit $rc_psuccess | 
|---|
| 559 | else | 
|---|
| 560 | print "$ops: $sn: TSM dsmc query command did not produce any error log." | 
|---|
| 561 | freeLockAndExit $rc_success | 
|---|
| 562 | fi | 
|---|
| 563 | else | 
|---|
| 564 | print "$opf: $sn: TSM dsmc query command failed to run." | 
|---|
| 565 | freeLockAndExit $rc_fail | 
|---|
| 566 | fi | 
|---|
| 567 | else | 
|---|
| 568 | # If there is an error log file, tell the user to examine it | 
|---|
| 569 | # to determine the cause of the failure. | 
|---|
| 570 | if [[ -a $logfile && -s $logfile ]] | 
|---|
| 571 | then | 
|---|
| 572 | print "$opf: $sn: TSM dsmc $tsmCommand command failed to run (see file $logfile)." | 
|---|
| 573 | else | 
|---|
| 574 | print "$opf: $sn: TSM dsmc $tsmCommand command failed to run." | 
|---|
| 575 | fi | 
|---|
| 576 | freeLockAndExit $rc_fail | 
|---|
| 577 | fi | 
|---|
| 578 |  | 
|---|
| 579 | # TSM expire command processing | 
|---|
| 580 | # | 
|---|
| 581 | elif [[ $tsmCommand = expire ]] | 
|---|
| 582 | then | 
|---|
| 583 |  | 
|---|
| 584 | # Clear the error log.  We are only interested in the error log | 
|---|
| 585 | # created from issuing the expire command. | 
|---|
| 586 | if [[ -a $logfile && -s $logfile ]] | 
|---|
| 587 | then | 
|---|
| 588 | $rm -rf $logfile > /dev/null 2>&1 | 
|---|
| 589 | rc=$? | 
|---|
| 590 | if [[ $rc -gt 0 ]] | 
|---|
| 591 | then | 
|---|
| 592 | print "$opf: $sn: cannot rm $logfile" | 
|---|
| 593 | freeLockAndExit $rc_fail | 
|---|
| 594 | fi | 
|---|
| 595 | fi | 
|---|
| 596 |  | 
|---|
| 597 | # Issue the TSM command to have files in the file system expired. | 
|---|
| 598 | if [[ $tsmCommandOption = filelist ]] | 
|---|
| 599 | then | 
|---|
| 600 | #   $dsmc $tsmCommand -noprompt -filelist=$filelistName > /tmp/out1x 2> /tmp/out2x | 
|---|
| 601 | $dsmc $tsmCommand -noprompt -filelist=$filelistName > /dev/null 2>&1 | 
|---|
| 602 | else | 
|---|
| 603 | #   $dsmc $tsmCommand -noprompt $tsmCommandParm > /tmp/out1x 2>/tmp/out2x | 
|---|
| 604 | $dsmc $tsmCommand -noprompt $tsmCommandParm > /dev/null 2>&1 | 
|---|
| 605 | fi | 
|---|
| 606 | rc=$? | 
|---|
| 607 |  | 
|---|
| 608 | # The return code only indicates whether the TSM executable ran | 
|---|
| 609 | # successfully. | 
|---|
| 610 | # The TSM client returns a code greater than 0 (zero) only if TSM was | 
|---|
| 611 | # unable to attempt the operations. | 
|---|
| 612 | # Only those return codes output to the error report (dsmerror.log) | 
|---|
| 613 | # indicate the success or failure of specific TSM commands. | 
|---|
| 614 | if [[ $rc -eq 0 ]] | 
|---|
| 615 | then | 
|---|
| 616 |  | 
|---|
| 617 | # If we have an error log file examine it to determine the files | 
|---|
| 618 | # which were not successfully restored. | 
|---|
| 619 | if [[ -a $logfile && -s $logfile ]] | 
|---|
| 620 | then | 
|---|
| 621 |  | 
|---|
| 622 | # Process the error log and construct the transactions_pending file. | 
|---|
| 623 | # Check for objects which failed to get processed (i.e expired). | 
|---|
| 624 | pendingExpiresList="$mountPoint$tpl"_"$processIndex" | 
|---|
| 625 | $grep "${errorProcessingString}" $logfile |  \ | 
|---|
| 626 | $cut -f2 -d\' > ${pendingExpiresList} | 
|---|
| 627 |  | 
|---|
| 628 | if [[ -a $pendingExpiresList && -s $pendingExpiresList ]] | 
|---|
| 629 | then | 
|---|
| 630 | print "$opf: $sn: TSM dsmc query command indicated search criteria failure (see file $logfile)." | 
|---|
| 631 | $rm -rf $pendingExpiresList > /dev/null 2>&1 | 
|---|
| 632 | freeLockAndExit $rc_fail | 
|---|
| 633 | fi | 
|---|
| 634 |  | 
|---|
| 635 | # Check for objects which do not match, namely, | 
|---|
| 636 | # individual files which were not restored. | 
|---|
| 637 | $grep "${objectErrorString}" $logfile |  \ | 
|---|
| 638 | $cut -f2 -d\' >> ${pendingExpiresList} | 
|---|
| 639 | print "$opps: $sn: TSM dsmc expire command partially succeeded (see file $logfile)." | 
|---|
| 640 | freeLockAndExit $rc_psuccess | 
|---|
| 641 | else | 
|---|
| 642 | print "$ops: $sn: TSM dsmc expire command did not produce any error log." | 
|---|
| 643 | freeLockAndExit $rc_success | 
|---|
| 644 | fi | 
|---|
| 645 | else | 
|---|
| 646 | # If there is an error log file, tell the user to examine it | 
|---|
| 647 | # to determine the cause of the failure. | 
|---|
| 648 | if [[ -a $logfile && -s $logfile ]] | 
|---|
| 649 | then | 
|---|
| 650 | print "$opf: $sn: TSM dsmc expire command failed to run (see file $logfile)." | 
|---|
| 651 | else | 
|---|
| 652 | print "$opf: $sn: TSM dsmc expire command failed to run." | 
|---|
| 653 | fi | 
|---|
| 654 | freeLockAndExit $rc_fail | 
|---|
| 655 | fi | 
|---|
| 656 |  | 
|---|
| 657 | # TSM restore command processing | 
|---|
| 658 | # | 
|---|
| 659 | elif [[ $tsmCommand = restore ]] | 
|---|
| 660 | then | 
|---|
| 661 |  | 
|---|
| 662 | # Clear the error log.  We are only interested in the error log | 
|---|
| 663 | # created from issuing the restore command. | 
|---|
| 664 | if [[ -a $logfile && -s $logfile ]] | 
|---|
| 665 | then | 
|---|
| 666 | $rm -rf $logfile > /dev/null 2>&1 | 
|---|
| 667 | rc=$? | 
|---|
| 668 | if [[ $rc -gt 0 ]] | 
|---|
| 669 | then | 
|---|
| 670 | print "$opf: $sn: cannot rm $logfile" | 
|---|
| 671 | freeLockAndExit $rc_fail | 
|---|
| 672 | fi | 
|---|
| 673 | fi | 
|---|
| 674 |  | 
|---|
| 675 | # Issue the TSM command to have the list of files restored | 
|---|
| 676 | # NOTE:  We restore the files specified to the mount point of the file | 
|---|
| 677 | #        system and not to the snapshot the files were backed up from. | 
|---|
| 678 | # $dsmc $tsmCommand -filelist=$filelistName $mountPoint > /tmp/out1r 2>/tmp/out2r | 
|---|
| 679 | $dsmc $tsmCommand -filelist=$filelistName $mountPoint > /dev/null 2>&1 | 
|---|
| 680 | rc=$? | 
|---|
| 681 |  | 
|---|
| 682 | # The return code only indicates whether the TSM executable ran successfully. | 
|---|
| 683 | # The TSM client returns a code greater than 0 (zero) only if TSM was | 
|---|
| 684 | # unable to attempt the operations. | 
|---|
| 685 | # Only those return codes output to the error report (dsmerror.log) | 
|---|
| 686 | # indicate the success or failure of specific TSM commands. | 
|---|
| 687 | # NOTE: | 
|---|
| 688 | #   According to TSM documentation ("Return Codes from TSM Executables | 
|---|
| 689 | #   and Shell Scripts"): | 
|---|
| 690 | #     TSM returns 0 even if the file is not successfully backed up, | 
|---|
| 691 | #     archived, or restored if the reason for the unsuccessful process is: | 
|---|
| 692 | #       . the client could not establish a session with the TSM server, or | 
|---|
| 693 | #       . the file does not exist. | 
|---|
| 694 | # WARNING: | 
|---|
| 695 | #   In unit testing it was learned that if a file does not exist, | 
|---|
| 696 | #   the return code on a restore operation is not zero. | 
|---|
| 697 | #   The following code tries to deal with this situation | 
|---|
| 698 | #   (which situation contradicts what was stated above). | 
|---|
| 699 |  | 
|---|
| 700 | # If we have an error log file examine it to determine the files | 
|---|
| 701 | # which were not successfully restored. | 
|---|
| 702 | if [[ -a $logfile && -s $logfile ]] | 
|---|
| 703 | then | 
|---|
| 704 |  | 
|---|
| 705 | # Process the error log and construct the transactions_pending file. | 
|---|
| 706 | # | 
|---|
| 707 | # The following is a line from the pertinent error log file: | 
|---|
| 708 | # 04/05/02   15:41:52 ANS4007E Error processing | 
|---|
| 709 | # '/backup_tsm/.backup_snapshot/.NodeCtrl' | 
|---|
| 710 | #                         or | 
|---|
| 711 | # 11/16/01 15:24:56 ANS1345E No objects on server match | 
|---|
| 712 | # '/log/michail/file_4' | 
|---|
| 713 |  | 
|---|
| 714 | # Check for objects which failed to get processed (i.e., restored). | 
|---|
| 715 | pendingRestoresList="$mountPoint$tpl"_"$processIndex" | 
|---|
| 716 | $grep "${errorProcessingString}" $logfile  |  \ | 
|---|
| 717 | $cut -f2 -d\' > ${pendingRestoresList} | 
|---|
| 718 |  | 
|---|
| 719 | # Check for objects which do not match, i.e., | 
|---|
| 720 | # individual files which were not restored. | 
|---|
| 721 | $grep "${objectErrorString}" $logfile  |  \ | 
|---|
| 722 | $cut -f2 -d\' >> ${pendingRestoresList} | 
|---|
| 723 | if [[ -a $pendingRestoresList && -s $pendingRestoresList ]] | 
|---|
| 724 | then | 
|---|
| 725 | print "$opps: $sn: TSM dsmc restore command partially succeeded (see file $logfile)." | 
|---|
| 726 | freeLockAndExit $rc_psuccess | 
|---|
| 727 | else | 
|---|
| 728 | print "$opf: $sn: TSM dsmc restore command failed (see file $logfile)." | 
|---|
| 729 | freeLockAndExit $rc_fail | 
|---|
| 730 | fi | 
|---|
| 731 | elif [[ $rc -eq 0 ]] | 
|---|
| 732 | then | 
|---|
| 733 | print "$ops: $sn: TSM dsmc restore command did not produce any error log." | 
|---|
| 734 | freeLockAndExit $rc_success | 
|---|
| 735 | else | 
|---|
| 736 | print "$opf: $sn: TSM dsmc restore command failed to run." | 
|---|
| 737 | freeLockAndExit $rc_fail | 
|---|
| 738 | fi | 
|---|
| 739 |  | 
|---|
| 740 | else | 
|---|
| 741 | print "$opf: $sn: Wrong value ($tsmCommand) passed for TSM operation." | 
|---|
| 742 | freeLockAndExit $rc_fail | 
|---|
| 743 | fi | 
|---|
| 744 |  | 
|---|
| 745 | freeLockAndExit $rc_success | 
|---|
| 746 |  | 
|---|