[16] | 1 | #!/bin/ksh |
---|
| 2 | ################################################################################ |
---|
| 3 | # |
---|
| 4 | # Module: gpfs.snap |
---|
| 5 | # |
---|
| 6 | # Description: |
---|
| 7 | # This script attempts to collect all of the data likely to be needed |
---|
| 8 | # when reporting a GPFS-related problem. |
---|
| 9 | # |
---|
| 10 | # Syntax: |
---|
| 11 | # gpfs.snap [-c "CmdString"] [-d OutputDirectory] [-p] [-x {1 | 2}] [-y | -z] |
---|
| 12 | # [-a | -W NodeFilename | -w NodeName[,NodeName...] | |
---|
| 13 | # -n NodeNumber[,NodeNumber...]] |
---|
| 14 | # |
---|
| 15 | # -c "CmdString" Run the command string on the specified nodes. |
---|
| 16 | # If -c "CmdString" is used, the data collected is |
---|
| 17 | # just the data for the specified command string; |
---|
| 18 | # the standard data collected by gpfs.snap is not |
---|
| 19 | # collected. CmdString may consist of multiple |
---|
| 20 | # commands separated by semi-colons. |
---|
| 21 | # The d, p, x, y, and z flags have no effect when |
---|
| 22 | # -c "CmdString" is specified. |
---|
| 23 | # -d OutputDirectory Directory to be used for output. |
---|
| 24 | # The default is /tmp/gpfs.snapOut |
---|
| 25 | # -p Skip the problem determination sequence (applies to master only). |
---|
| 26 | # -x 1 Check whether there is enough space but do not collect data. |
---|
| 27 | # 2 Collect data only; do not check whether there is enough space. |
---|
| 28 | # -y Collect snaps only from nodes specified. |
---|
| 29 | # -z Collect data only from this node - no "master" data. |
---|
| 30 | # |
---|
| 31 | # Node specification options: |
---|
| 32 | # -a Collect data on all nodes. This is the default. |
---|
| 33 | # Cannot be specified with -n, -w, or -W. |
---|
| 34 | # -n nodeNumList Collect data on the nodes in the list of node numbers. |
---|
| 35 | # Cannot be specified with -a or -W. |
---|
| 36 | # -w nodeList Collect data on the nodes in the list of node names. |
---|
| 37 | # Cannot be specified with -a or -W. |
---|
| 38 | # -W nodeFile Collect data on the nodes in the file. |
---|
| 39 | # Cannot be specified with -a, -n, or -w. |
---|
| 40 | # |
---|
| 41 | # Outputs: |
---|
| 42 | # If -d option is specified, this output file will be stored in the |
---|
| 43 | # user-specified directory. |
---|
| 44 | # If -d option is not specified, the output file will be put in the |
---|
| 45 | # /tmp/gpfs.snapOut directory. |
---|
| 46 | # When run without the -z flag, snaps from the nodes will all be collected |
---|
| 47 | # into a tar file named all.xxxxx.tar, where xxxxx is a timestamp. |
---|
| 48 | # Otherwise, the file name will be gpfs.snap.node_number.xxxxxxxx.out.tar.Z |
---|
| 49 | # (a compressed file), where xxxxxxxx is the timestamp for the time the |
---|
| 50 | # script was run. |
---|
| 51 | # |
---|
| 52 | # The file contains output from the following commands: |
---|
| 53 | # |
---|
| 54 | # lsdev -C ALWAYS for AIX |
---|
| 55 | # lspv ALWAYS for AIX |
---|
| 56 | # lsattr -El for all physical disks ALWAYS for AIX |
---|
| 57 | # lsvg -o, lsvg -l, lsvg, ls -l /dev/VGs ALWAYS for AIX |
---|
| 58 | # lsfs ALWAYS for AIX |
---|
| 59 | # |
---|
| 60 | # Files |
---|
| 61 | # /etc/fstab (Linux) |
---|
| 62 | # /etc/filesystems (AIX) |
---|
| 63 | # /var/adm/ras/mmfs.log.* |
---|
| 64 | # /var/mmfs/etc/* |
---|
| 65 | # /var/mmfs/gen/* |
---|
| 66 | # /var/mmfs/ssl/* |
---|
| 67 | # /var/mmfs/tmp/* |
---|
| 68 | # |
---|
| 69 | # Miscellaneous commands |
---|
| 70 | # ps -edf ALWAYS |
---|
| 71 | # errpt -a ALWAYS |
---|
| 72 | # df -k ALWAYS |
---|
| 73 | # lslpp -ha ALWAYS |
---|
| 74 | # lssrc -a ALWAYS |
---|
| 75 | # vmstat 5 5 ALWAYS |
---|
| 76 | # vmstat -s ALWAYS |
---|
| 77 | # |
---|
| 78 | # Network stuff (ALWAYS) |
---|
| 79 | # echo $NSorder |
---|
| 80 | # no -a |
---|
| 81 | # netstat -m |
---|
| 82 | # netstat -i -n |
---|
| 83 | # netstat -rn |
---|
| 84 | # netstat -D |
---|
| 85 | # entstat en* |
---|
| 86 | # tokstat tr* |
---|
| 87 | # ifconfig (on all adapters in hats groups) |
---|
| 88 | # |
---|
| 89 | # Dependencies: |
---|
| 90 | # The script must be run as root and requires rsh access to remote nodes. |
---|
| 91 | # |
---|
| 92 | ################################################################################ |
---|
| 93 | #"@(#)44 1.31.1.4 src/avs/fs/mmfs/ts/admin/gpfs.snap.sh, mmfs, avs_rgpfs24, rgpfs24s012a 4/2/07 01:34:01" |
---|
| 94 | |
---|
| 95 | starttime=$(date) |
---|
| 96 | VERSION=1.31.1.4 |
---|
| 97 | |
---|
| 98 | # Include global declarations and service routines. |
---|
| 99 | . /usr/lpp/mmfs/bin/mmglobfuncs |
---|
| 100 | . /usr/lpp/mmfs/bin/mmsdrfsdef |
---|
| 101 | |
---|
| 102 | sourceFile="gpfs.snap.sh" |
---|
| 103 | [[ -n $DEBUGgpfssnap ]] && set -x |
---|
| 104 | $mmTRACE_ENTER "$*" |
---|
| 105 | |
---|
| 106 | # Global variables |
---|
| 107 | |
---|
| 108 | mmlsmgr=/usr/lpp/mmfs/bin/mmlsmgr |
---|
| 109 | outputDelimiter="######################################################################" |
---|
| 110 | |
---|
| 111 | |
---|
| 112 | # Local work files. Names should be of the form: |
---|
| 113 | # fn=${tmpDir}fn.${mmcmd}.$$ |
---|
| 114 | |
---|
| 115 | trcFile=${tmpDir}trcFile.${mmcmd}.$$ # file replacement for trclist |
---|
| 116 | newrcFile=${tmpDir}newrcFile.${mmcmd}.$$ # file replacement for newrclist |
---|
| 117 | commaFile=${tmpDir}commaFile.${mmcmd}.$$ # file equivalent of commalist |
---|
| 118 | nodefilecFile=${tmpDir}nodefilecFile.${mmcmd}.$$ # file replacement for nodefileclist |
---|
| 119 | nodefile2=${tmpDir}nodefile2.${mmcmd}.$$ # file of reachable specified nodes |
---|
| 120 | |
---|
| 121 | LOCAL_FILES=" $trcFile $newrcFile $commaFile $nodefilecFile $nodefile2 " |
---|
| 122 | |
---|
| 123 | |
---|
| 124 | # Local functions |
---|
| 125 | |
---|
| 126 | |
---|
| 127 | function removefromlist |
---|
| 128 | { |
---|
| 129 | typeset sourceFile="gpfs.snap.sh" |
---|
| 130 | [[ -n $DEBUGgpfssnap || -n $DEBUGremovefromlist ]] && set -x |
---|
| 131 | $mmTRACE_ENTER "$*" |
---|
| 132 | |
---|
| 133 | llist=$($cat $nodefile) |
---|
| 134 | |
---|
| 135 | $rm ${nodefile}.tmp 2>/dev/null |
---|
| 136 | $touch ${nodefile}.tmp |
---|
| 137 | |
---|
| 138 | k=0 |
---|
| 139 | for i in $llist |
---|
| 140 | do |
---|
| 141 | gotit=-1 |
---|
| 142 | for j in $1 |
---|
| 143 | do |
---|
| 144 | if [[ $i = $j ]] |
---|
| 145 | then |
---|
| 146 | gotit=$k |
---|
| 147 | break |
---|
| 148 | fi |
---|
| 149 | done |
---|
| 150 | if [[ $gotit = -1 ]] |
---|
| 151 | then |
---|
| 152 | echo $i >> ${nodefile}.tmp |
---|
| 153 | else |
---|
| 154 | l=$gotit |
---|
| 155 | ddone=0 |
---|
| 156 | while [[ $ddone = 0 ]] |
---|
| 157 | do |
---|
| 158 | (( m = l + 1 )) |
---|
| 159 | if [[ -n ${hostarray[$m]} ]] |
---|
| 160 | then |
---|
| 161 | hostarray[$l]=${hostarray[$m]} |
---|
| 162 | (( l = l + 1 )) |
---|
| 163 | else |
---|
| 164 | unset hostarray[$l] |
---|
| 165 | ddone=1 |
---|
| 166 | fi |
---|
| 167 | done |
---|
| 168 | fi |
---|
| 169 | (( k = k + 1 )) |
---|
| 170 | done |
---|
| 171 | |
---|
| 172 | $mv ${nodefile}.tmp ${nodefile} |
---|
| 173 | |
---|
| 174 | } #----- end of function removefromlist ----------------------- |
---|
| 175 | |
---|
| 176 | |
---|
| 177 | function checklist |
---|
| 178 | { |
---|
| 179 | typeset sourceFile="gpfs.snap.sh" |
---|
| 180 | [[ -n $DEBUGgpfssnap || -n $DEBUGchecklist ]] && set -x |
---|
| 181 | $mmTRACE_ENTER "$*" |
---|
| 182 | |
---|
| 183 | $rm $nodefile2 $tmpfile 2>/dev/null |
---|
| 184 | $touch $nodefile2 $tmpfile |
---|
| 185 | list=$($cat $nodefile) |
---|
| 186 | $rm /tmp/hostfile 2>/dev/null |
---|
| 187 | for i in $list |
---|
| 188 | do |
---|
| 189 | if [[ $i = $myhname ]] |
---|
| 190 | then |
---|
| 191 | continue |
---|
| 192 | fi |
---|
| 193 | bad=0 |
---|
| 194 | $ping -c1 -w5 $i >/dev/null 2>/tmp/err |
---|
| 195 | if [[ $? = 0 ]] |
---|
| 196 | then |
---|
| 197 | $mmdsh -L $i K5MUTE=1 /bin/hostname >/tmp/hostname 2>/tmp/err & |
---|
| 198 | waitforit |
---|
| 199 | if [[ -s /tmp/hostname ]] |
---|
| 200 | then |
---|
| 201 | echo $i >> $tmpfile |
---|
| 202 | thostname=$($cat /tmp/hostname | $awk '{print $2}') |
---|
| 203 | if [[ $thostname = $myhname ]] |
---|
| 204 | then |
---|
| 205 | continue |
---|
| 206 | fi |
---|
| 207 | echo $i >> $nodefile2 |
---|
| 208 | $cat /tmp/hostname | $awk '{print $2}' >>/tmp/hostfile |
---|
| 209 | else |
---|
| 210 | bad="mmdsh" |
---|
| 211 | fi |
---|
| 212 | else |
---|
| 213 | bad="ping" |
---|
| 214 | fi |
---|
| 215 | if [[ $bad != 0 ]] |
---|
| 216 | then |
---|
| 217 | [[ -z $cflag ]] && \ |
---|
| 218 | print "\nCannot collect data from $i. $bad failed:" | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 219 | $cat /tmp/err | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 220 | already_failed="$already_failed $i" |
---|
| 221 | fi |
---|
| 222 | done |
---|
| 223 | $rm /tmp/err 2>/dev/null |
---|
| 224 | |
---|
| 225 | # Create two node files. |
---|
| 226 | # $nodefile contains all of the reachable nodes except the master node. |
---|
| 227 | # $nodefile2 contains all of the reachable nodes. |
---|
| 228 | $mv $nodefile2 $nodefile 2>/dev/null |
---|
| 229 | $mv $tmpfile $nodefile2 2>/dev/null |
---|
| 230 | hlist=$($cat /tmp/hostfile 2>/dev/null) |
---|
| 231 | set -f ; set -A hostarray $hlist ; set +f |
---|
| 232 | |
---|
| 233 | } #----- end of function checklist ---------------------------- |
---|
| 234 | |
---|
| 235 | |
---|
| 236 | function addtolist |
---|
| 237 | { |
---|
| 238 | typeset sourceFile="gpfs.snap.sh" |
---|
| 239 | [[ -n $DEBUGgpfssnap || -n $DEBUGaddtolist ]] && set -x |
---|
| 240 | $mmTRACE_ENTER "$*" |
---|
| 241 | |
---|
| 242 | list=$($cat $nodefile) |
---|
| 243 | hlist=$($cat /tmp/hostfile 2>/dev/null) |
---|
| 244 | |
---|
| 245 | j=0 |
---|
| 246 | for i in $1 |
---|
| 247 | do |
---|
| 248 | if [[ $i = $myhname ]] |
---|
| 249 | then |
---|
| 250 | continue |
---|
| 251 | fi |
---|
| 252 | gotit=0 |
---|
| 253 | for j in $list |
---|
| 254 | do |
---|
| 255 | if [[ $i = $j ]] |
---|
| 256 | then |
---|
| 257 | gotit=1 |
---|
| 258 | break |
---|
| 259 | fi |
---|
| 260 | done |
---|
| 261 | bad=0 |
---|
| 262 | if [[ $gotit != 1 ]] |
---|
| 263 | then |
---|
| 264 | already_got=0 |
---|
| 265 | for k in $already_failed |
---|
| 266 | do |
---|
| 267 | if [[ $i = $k ]] |
---|
| 268 | then |
---|
| 269 | already_got=1 |
---|
| 270 | break; |
---|
| 271 | fi |
---|
| 272 | done |
---|
| 273 | if [[ $already_got = 1 ]] |
---|
| 274 | then |
---|
| 275 | continue |
---|
| 276 | fi |
---|
| 277 | $ping -c1 -w5 $i >/dev/null 2>/tmp/err |
---|
| 278 | if [[ $? = 0 ]] |
---|
| 279 | then |
---|
| 280 | $mmdsh -L $i K5MUTE=1 /bin/hostname >/tmp/hostname 2>/tmp/err & |
---|
| 281 | waitforit |
---|
| 282 | if [[ -s /tmp/hostname ]] |
---|
| 283 | then |
---|
| 284 | gotit=0 |
---|
| 285 | j=0 |
---|
| 286 | hname=$($cat /tmp/hostname | $awk '{print $2}') |
---|
| 287 | if [[ $hname = $myhname ]] |
---|
| 288 | then |
---|
| 289 | continue |
---|
| 290 | fi |
---|
| 291 | while [[ -n ${hostarray[$j]} ]] |
---|
| 292 | do |
---|
| 293 | if [[ $hname = ${hostarray[$j]} ]] |
---|
| 294 | then |
---|
| 295 | gotit=1 |
---|
| 296 | break |
---|
| 297 | fi |
---|
| 298 | (( j = j + 1 )) |
---|
| 299 | done |
---|
| 300 | if [[ $gotit = 0 ]] |
---|
| 301 | then |
---|
| 302 | hostarray[$j]=$hname |
---|
| 303 | echo $i >> $nodefile |
---|
| 304 | fi |
---|
| 305 | else |
---|
| 306 | bad="mmdsh" |
---|
| 307 | fi |
---|
| 308 | else |
---|
| 309 | bad="ping" |
---|
| 310 | fi |
---|
| 311 | if [[ $bad != 0 ]] |
---|
| 312 | then |
---|
| 313 | print "\nCannot collect data from $i. $bad failed:" | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 314 | $cat /tmp/err | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 315 | already_failed="$already_failed $i" |
---|
| 316 | fi |
---|
| 317 | fi |
---|
| 318 | done |
---|
| 319 | |
---|
| 320 | $rm /tmp/err 2>/dev/null |
---|
| 321 | |
---|
| 322 | } #----- end of function addtolist ---------------------------- |
---|
| 323 | |
---|
| 324 | |
---|
| 325 | function check_space |
---|
| 326 | { |
---|
| 327 | typeset sourceFile="gpfs.snap.sh" |
---|
| 328 | [[ -n $DEBUGgpfssnap || -n $DEBUGcheck_space ]] && set -x |
---|
| 329 | $mmTRACE_ENTER "$*" |
---|
| 330 | |
---|
| 331 | # The following extraction takes into account that the output |
---|
| 332 | # of df is arranged differently under Linux than under AIX. |
---|
| 333 | dfOutput=$($df -k $LOGDIR | $tail -n +2) |
---|
| 334 | if [[ $os = "AIX" ]] |
---|
| 335 | then |
---|
| 336 | FREE_SPACE=$(print $dfOutput | $awk '{print $3}') |
---|
| 337 | else |
---|
| 338 | FREE_SPACE=$(print $dfOutput | $awk '{print $4}') |
---|
| 339 | fi |
---|
| 340 | FREE_SPACE=$(expr $FREE_SPACE - 1) |
---|
| 341 | |
---|
| 342 | # Give ourselves a .1 safety margin. |
---|
| 343 | (( maxbytes = max_bytes + max_bytes / 10 )) |
---|
| 344 | (( total_bytes = total_bytes + total_bytes / 10 )) |
---|
| 345 | |
---|
| 346 | if [[ $total_bytes < 1000000 ]] |
---|
| 347 | then |
---|
| 348 | factor=30 |
---|
| 349 | else |
---|
| 350 | if [[ $total_bytes < 2000000 ]] |
---|
| 351 | then |
---|
| 352 | factor=25 |
---|
| 353 | else |
---|
| 354 | factor=20 |
---|
| 355 | fi |
---|
| 356 | fi |
---|
| 357 | |
---|
| 358 | # Guess how large the compressed file will be. |
---|
| 359 | (( zipped_bytes = total_bytes * factor / 100 )) |
---|
| 360 | (( adjusted_bytes = total_bytes + zipped_bytes )) |
---|
| 361 | if [[ $adjusted_bytes > $maxbytes ]] |
---|
| 362 | then |
---|
| 363 | maxbytes=$adjusted_bytes |
---|
| 364 | fi |
---|
| 365 | |
---|
| 366 | if [[ $master = 1 && -s $nodefile ]] |
---|
| 367 | then |
---|
| 368 | (( max_zipped_bytes = zipped_bytes * 2 )) |
---|
| 369 | # when we tar our own Z file.... |
---|
| 370 | |
---|
| 371 | notenough=$($grep "There is not enough space" $BASELOGDIR/pass1outfile | $cut -f1 -d :) |
---|
| 372 | for ii in $notenough |
---|
| 373 | do |
---|
| 374 | print "Node $ii reports it does not have enough space in ${BASELOGDIR}/${logdate}\nRemoving from list\n" | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 375 | removefromlist $ii |
---|
| 376 | removefromrlist $ii |
---|
| 377 | done |
---|
| 378 | |
---|
| 379 | for ii in $rlist |
---|
| 380 | do |
---|
| 381 | size=$($grep "compressed file" $BASELOGDIR/pass1outfile | $grep $ii | $awk '{print $7}') |
---|
| 382 | if [[ -n $size ]] |
---|
| 383 | then |
---|
| 384 | # zipped_bytes = max at end |
---|
| 385 | # tmp_max_zipped bytes is max at any point |
---|
| 386 | (( zipped_bytes = zipped_bytes + size )) |
---|
| 387 | (( tmp_max_zipped_bytes = zipped_bytes + size )) |
---|
| 388 | if [[ $tmp_max_zipped_bytes -gt $max_zipped_bytes ]] |
---|
| 389 | then |
---|
| 390 | max_zipped_bytes=$tmp_max_zipped_bytes |
---|
| 391 | fi |
---|
| 392 | fi |
---|
| 393 | done |
---|
| 394 | for ii in $glist |
---|
| 395 | do |
---|
| 396 | size=$($grep "requires about" $BASELOGDIR/pass1outfile | $grep $ii | $awk '{print $5}') |
---|
| 397 | if [[ -n $size ]] |
---|
| 398 | then |
---|
| 399 | (( zipped_bytes = zipped_bytes + size )) |
---|
| 400 | (( tmp_max_zipped_bytes = zipped_bytes + size )) |
---|
| 401 | if [[ $tmp_max_zipped_bytes -gt $max_zipped_bytes ]] |
---|
| 402 | then |
---|
| 403 | max_zipped_bytes=$tmp_max_zipped_bytes |
---|
| 404 | fi |
---|
| 405 | fi |
---|
| 406 | done |
---|
| 407 | |
---|
| 408 | if [[ "$max_zipped_bytes" -gt "$maxbytes" ]] |
---|
| 409 | then |
---|
| 410 | maxbytes=$max_zipped_bytes |
---|
| 411 | fi |
---|
| 412 | else |
---|
| 413 | print "compressed file will be about $zipped_bytes bytes" |
---|
| 414 | fi |
---|
| 415 | |
---|
| 416 | total_block=$(expr $maxbytes / 1024) |
---|
| 417 | |
---|
| 418 | echo "gpfs.snap requires about $maxbytes bytes" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 419 | if [[ "$total_block" -gt "$FREE_SPACE" ]] |
---|
| 420 | then |
---|
| 421 | echo "gpfs.snap requires about $maxbytes bytes" >> ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 422 | echo "There is not enough space in ${BASELOGDIR}. Either increase\nthe filesystem size or choose a different filesystem with the -d option." | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 423 | $rm -r ${LOGDIR} 2>/dev/null |
---|
| 424 | if [[ $master = 1 ]] |
---|
| 425 | then |
---|
| 426 | $rm -r $NODESDIR 2>/dev/null |
---|
| 427 | fi |
---|
| 428 | exit 1 |
---|
| 429 | fi |
---|
| 430 | |
---|
| 431 | $rm $BASELOGDIR/pass1outfile 2>/dev/null |
---|
| 432 | |
---|
| 433 | } #----- end of function check_space -------------------------- |
---|
| 434 | |
---|
| 435 | |
---|
| 436 | function check_waiters2 |
---|
| 437 | { |
---|
| 438 | typeset sourceFile="gpfs.snap.sh" |
---|
| 439 | [[ -n $DEBUGgpfssnap || -n $DEBUGcheck_waiters2 ]] && set -x |
---|
| 440 | $mmTRACE_ENTER "$*" |
---|
| 441 | |
---|
| 442 | $mkdir -p ${LOGDIR}/waiters 2>/dev/null |
---|
| 443 | |
---|
| 444 | firstone=1 |
---|
| 445 | |
---|
| 446 | if [[ $pass = 1 ]] |
---|
| 447 | then |
---|
| 448 | cp ${BASELOGDIR}/*_waiters ${LOGDIR}/waiters 2>/dev/null |
---|
| 449 | if [[ -n $aflag ]] |
---|
| 450 | then |
---|
| 451 | if [[ -s $rcFile ]] |
---|
| 452 | then |
---|
| 453 | $mmdsh -F $rcFile K5MUTE=1 $rcp ${BASELOGDIR}/\*_waiters $my_hostname:${LOGDIR}/waiters & |
---|
| 454 | waitforit NULL 60 |
---|
| 455 | $mmdsh -F $rcFile K5MUTE=1 rm -f ${BASELOGDIR}/\*_waiters & |
---|
| 456 | waitforit NULL 60 |
---|
| 457 | fi |
---|
| 458 | else |
---|
| 459 | if [[ -s $brcFile ]] |
---|
| 460 | then |
---|
| 461 | $mmdsh -F $brcFile K5MUTE=1 $rcp ${BASELOGDIR}/\*_waiters $my_hostname:${LOGDIR}/waiters & |
---|
| 462 | waitforit NULL 60 |
---|
| 463 | $mmdsh -F $brcFile K5MUTE=1 rm -f ${BASELOGDIR}/\*_waiters & |
---|
| 464 | waitforit NULL 60 |
---|
| 465 | fi |
---|
| 466 | fi # end of if [[ -n $aflag ]] |
---|
| 467 | size=$(du -ks ${LOGDIR}/waiters | $cut -f1) |
---|
| 468 | if [[ -n $size ]] |
---|
| 469 | then |
---|
| 470 | addit $size ${LOGDIR}/waiters |
---|
| 471 | fi |
---|
| 472 | else |
---|
| 473 | list=$($ls ${LOGDIR}/waiters) |
---|
| 474 | for i in $list |
---|
| 475 | do |
---|
| 476 | if [[ -s ${LOGDIR}/waiters/${i} ]] |
---|
| 477 | then |
---|
| 478 | tarit waiters/${i} |
---|
| 479 | fi |
---|
| 480 | done |
---|
| 481 | fi # end of if [[ $pass = 1 ]] |
---|
| 482 | |
---|
| 483 | } #----- end of function check_waiters2 ----------------------- |
---|
| 484 | |
---|
| 485 | |
---|
| 486 | function check_files2 |
---|
| 487 | { |
---|
| 488 | typeset sourceFile="gpfs.snap.sh" |
---|
| 489 | [[ -n $DEBUGgpfssnap || -n $DEBUGcheck_files2 ]] && set -x |
---|
| 490 | $mmTRACE_ENTER "$*" |
---|
| 491 | |
---|
| 492 | dirlist=$($ls -d ${LOGDIR}/bad.*.files 2>/dev/null) |
---|
| 493 | |
---|
| 494 | for i in $dirlist |
---|
| 495 | do |
---|
| 496 | if [[ $pass = 1 ]] |
---|
| 497 | then |
---|
| 498 | size=$(du -ks $i | $cut -f1) |
---|
| 499 | addit $size $i |
---|
| 500 | else |
---|
| 501 | basedir=$(basename $i) |
---|
| 502 | filelist=$($ls $i) |
---|
| 503 | for j in $filelist |
---|
| 504 | do |
---|
| 505 | tarit $basedir/$j |
---|
| 506 | done |
---|
| 507 | fi |
---|
| 508 | done |
---|
| 509 | |
---|
| 510 | } #----- end of function check_files2 ------------------------- |
---|
| 511 | |
---|
| 512 | |
---|
| 513 | function check_files |
---|
| 514 | { |
---|
| 515 | typeset sourceFile="gpfs.snap.sh" |
---|
| 516 | [[ -n $DEBUGgpfssnap || -n $DEBUGcheck_files ]] && set -x |
---|
| 517 | $mmTRACE_ENTER "$*" |
---|
| 518 | |
---|
| 519 | name=$(basename $1) |
---|
| 520 | |
---|
| 521 | if [[ ! -s $2 ]] |
---|
| 522 | then |
---|
| 523 | return |
---|
| 524 | fi |
---|
| 525 | |
---|
| 526 | $mmdsh -F $2 K5MUTE=1 sum $1 > $name.outfile & |
---|
| 527 | waitforit NULL 60 |
---|
| 528 | diffs=$($sort -uk 2,2 $name.outfile | $wc -l) |
---|
| 529 | diffs=${diffs##*( )} |
---|
| 530 | if [[ $diffs -gt 1 ]] |
---|
| 531 | then |
---|
| 532 | list=$($sort -uk 2,2 $name.outfile | $awk '{print $2}') |
---|
| 533 | most=0 |
---|
| 534 | for i in $list |
---|
| 535 | do |
---|
| 536 | num=$($grep $i $name.outfile | $wc -l) |
---|
| 537 | if [[ $num -gt $most ]] |
---|
| 538 | then |
---|
| 539 | mostsum=$i |
---|
| 540 | most=$num |
---|
| 541 | fi |
---|
| 542 | done |
---|
| 543 | if [[ $base = mmsdrfs ]] |
---|
| 544 | then |
---|
| 545 | sdrfssum="$sdrfssum $mostsum" |
---|
| 546 | fi |
---|
| 547 | $mkdir ${LOGDIR}/bad.${name}.files |
---|
| 548 | badlist=$($grep -v $mostsum $name.outfile | $cut -f1 -d :) |
---|
| 549 | goodlist=$($grep $mostsum $name.outfile | $cut -f1 -d :) |
---|
| 550 | echo $goodlist | $grep $my_hostname >/dev/null |
---|
| 551 | if [[ $? = 0 ]] |
---|
| 552 | then |
---|
| 553 | cp $1 ${LOGDIR}/bad.${name}.files/${name}.$my_hostname.good |
---|
| 554 | else |
---|
| 555 | goodnode=$(echo $goodlist | $awk '{print $1}') |
---|
| 556 | $mmdsh -L $goodnode K5MUTE=1 $rcp $1 $my_hostname:${LOGDIR}/bad.${name}.files/${name}.${goodnode}.good & |
---|
| 557 | waitforit NULL 60 |
---|
| 558 | fi |
---|
| 559 | if [[ -n $3 ]] |
---|
| 560 | then |
---|
| 561 | print "\nThe following nodes $name files are different and are in the minority in $groupname $3" | $tee -a ${BASELOGDIR}/problem.${my_hostname} |
---|
| 562 | else |
---|
| 563 | print "\nThe following nodes $name files are different and are in the minority" | $tee -a ${BASELOGDIR}/problem.${my_hostname} |
---|
| 564 | fi |
---|
| 565 | echo "$badlist\n" | $tee -a ${BASELOGDIR}/problem.${my_hostname} |
---|
| 566 | for i in $badlist |
---|
| 567 | do |
---|
| 568 | $mmdsh -L $i K5MUTE=1 $rcp $1 $my_hostname:${LOGDIR}/bad.${name}.files/${name}.$i.bad & |
---|
| 569 | waitforit NULL 60 |
---|
| 570 | done |
---|
| 571 | else |
---|
| 572 | if [[ $name = mmsdrfs ]] |
---|
| 573 | then |
---|
| 574 | tsum=$($cat $name.outfile | $head -1 | $cut -f1 -d " ") |
---|
| 575 | sdrfssum="$sdrfssum $tsum" |
---|
| 576 | fi |
---|
| 577 | fi # end of if [[ $diffs -gt 1 ]] |
---|
| 578 | |
---|
| 579 | $rm $name.outfile 2>/dev/null |
---|
| 580 | |
---|
| 581 | } #----- end of function check_files -------------------------- |
---|
| 582 | |
---|
| 583 | |
---|
| 584 | function check_waiters |
---|
| 585 | { |
---|
| 586 | typeset sourceFile="gpfs.snap.sh" |
---|
| 587 | [[ -n $DEBUGgpfssnap || -n $DEBUGcheck_waiters ]] && set -x |
---|
| 588 | $mmTRACE_ENTER "$*" |
---|
| 589 | |
---|
| 590 | # Check this cluster. |
---|
| 591 | firstone=1 |
---|
| 592 | |
---|
| 593 | $mmdsh -F $nodefile2 "K5MUTE=1 mkdir ${BASELOGDIR} 2>/dev/null; K5MUTE=1 $mmfsadm dump waiters > ${BASELOGDIR}/$(hostname -s)_waiters" & |
---|
| 594 | waitforit NULL 60 |
---|
| 595 | $mmdsh -F $nodefile2 "K5MUTE=1 $mmfsadm dump waiters | grep -v '===== dump waiters ====='" > longwaiters 2>/dev/null & |
---|
| 596 | waitforit NULL 60 |
---|
| 597 | |
---|
| 598 | if [[ -s $brcFile ]] |
---|
| 599 | then |
---|
| 600 | $mmdsh -F $brcFile "K5MUTE=1 grep tmMsgRevoke ${BASELOGDIR}/$(hostname | cut -d. -f1)_waiters yamo" >> ${BASELOGDIR}/grepped-waiters 2>/dev/null & |
---|
| 601 | waitforit NULL 60 |
---|
| 602 | fi |
---|
| 603 | |
---|
| 604 | if [[ -n $bglist ]] |
---|
| 605 | then |
---|
| 606 | $mmdsh -L $my_hostname "K5MUTE=1 grep tmMsgRevoke ${BASELOGDIR}/\*_waiters yamo" >> ${BASELOGDIR}/grepped-waiters 2>/dev/null & |
---|
| 607 | waitforit NULL 60 |
---|
| 608 | fi |
---|
| 609 | |
---|
| 610 | if [[ -s ${BASELOGDIR}/grepped-waiters ]] |
---|
| 611 | then |
---|
| 612 | print "\nThere are waiters for tmMsgRevokes:" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 613 | if [[ $yflag != 1 ]] |
---|
| 614 | then |
---|
| 615 | print "Data will be collected from these nodes:" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 616 | fi |
---|
| 617 | newlist="" |
---|
| 618 | { |
---|
| 619 | while read line |
---|
| 620 | do |
---|
| 621 | tnode=$(echo $line | $cut -f2 -d :) |
---|
| 622 | thisnode=$(basename $tnode | $cut -f1 -d "_") |
---|
| 623 | echo $line | $grep "tmMsgRevoke on node" >/dev/null 2>&1 |
---|
| 624 | if [[ $? = 0 ]] |
---|
| 625 | then |
---|
| 626 | addr=${line##*tmMsgRevoke on node} |
---|
| 627 | print "waiter on $thisnode, tmMsgRevoke from $addr" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 628 | echo $newlist | $grep -w $addr >/dev/null 2>&1 |
---|
| 629 | if [[ $? != 0 ]] |
---|
| 630 | then |
---|
| 631 | newlist="$newlist $addr" |
---|
| 632 | fi |
---|
| 633 | else |
---|
| 634 | print "waiter on $thisnode, tmMsgRevoke" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 635 | fi |
---|
| 636 | echo $newlist | $grep -w $thisnode >/dev/null 2>&1 |
---|
| 637 | if [[ $? != 0 ]] |
---|
| 638 | then |
---|
| 639 | newlist="$newlist $thisnode" |
---|
| 640 | fi |
---|
| 641 | done |
---|
| 642 | echo "" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 643 | } < ${BASELOGDIR}/grepped-waiters |
---|
| 644 | if [[ $yflag != 1 && -z $aflag ]] |
---|
| 645 | then |
---|
| 646 | addtolist "$newlist" |
---|
| 647 | fi |
---|
| 648 | fi # end of if [[ -s ${BASELOGDIR}/grepped-waiters ]] |
---|
| 649 | |
---|
| 650 | $rm ${BASELOGDIR}/grepped-waiters 2>/dev/null |
---|
| 651 | |
---|
| 652 | if [[ -s longwaiters ]] |
---|
| 653 | then |
---|
| 654 | $sort -nrk 4,4 longwaiters > ${LOGDIR}/long_waiters.sorted |
---|
| 655 | list=$($cat ${LOGDIR}/long_waiters.sorted | $head -5 | $cut -f1 -d ":") |
---|
| 656 | if [[ $yflag = 1 ]] |
---|
| 657 | then |
---|
| 658 | print "There are long waiters. The 5 longest waiters are on the following nodes:\n$list\n" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 659 | else |
---|
| 660 | print "There are long waiters. The 5 longest waiters are on the following nodes, which will be added to the list to collect data from\n$list\n" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 661 | addtolist "$list" |
---|
| 662 | fi |
---|
| 663 | fi # end of if [[ -s longwaiters ]] |
---|
| 664 | |
---|
| 665 | $rm longwaiters 2>/dev/null |
---|
| 666 | |
---|
| 667 | } #----- end of function check_waiters ------------------------ |
---|
| 668 | |
---|
| 669 | |
---|
| 670 | function check_dumps |
---|
| 671 | { |
---|
| 672 | typeset sourceFile="gpfs.snap.sh" |
---|
| 673 | [[ -n $DEBUGgpfssnap || -n $DEBUGcheck_dumps ]] && set -x |
---|
| 674 | $mmTRACE_ENTER "$*" |
---|
| 675 | |
---|
| 676 | filePrefix=$1 |
---|
| 677 | maxFiles=$2 |
---|
| 678 | dumpDir=$3 |
---|
| 679 | |
---|
| 680 | internal_list="" |
---|
| 681 | |
---|
| 682 | if [[ ! -a $dumpDir ]] |
---|
| 683 | then |
---|
| 684 | return |
---|
| 685 | fi |
---|
| 686 | |
---|
| 687 | savedir=$(pwd) |
---|
| 688 | cd $dumpDir |
---|
| 689 | |
---|
| 690 | numfiles=$($ls -ltr $filePrefix.*.* 2>/dev/null | $grep -v shutdown | $wc -l) |
---|
| 691 | if [[ $numfiles -eq 0 ]] |
---|
| 692 | then |
---|
| 693 | cd $savedir |
---|
| 694 | return |
---|
| 695 | fi |
---|
| 696 | if [[ $maxFiles -gt 0 && $numfiles -gt $maxFiles ]] |
---|
| 697 | then |
---|
| 698 | print "There are $numfiles $filePrefix files in $dumpDir.\nBecause these files are large I am only grabbing the latest $maxFiles.\n" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 699 | fi |
---|
| 700 | |
---|
| 701 | today=$(date +%j) |
---|
| 702 | today=${today##+(0)} |
---|
| 703 | |
---|
| 704 | if [[ $maxFiles -lt 0 ]] |
---|
| 705 | then |
---|
| 706 | # A negative value indicates "collect them all". |
---|
| 707 | $ls -ltr $filePrefix.*.* 2>/dev/null | $grep -v shutdown > $tmpfile |
---|
| 708 | else |
---|
| 709 | # Collect up to the specified number of files. |
---|
| 710 | $ls -ltr $filePrefix.*.* 2>/dev/null | $grep -v shutdown | $tail -n -$maxFiles > $tmpfile |
---|
| 711 | fi |
---|
| 712 | |
---|
| 713 | # Create a list of files in global variable $internal_list |
---|
| 714 | # that will later be collected after we return to the caller. |
---|
| 715 | exec 3<&- |
---|
| 716 | exec 3< $tmpfile |
---|
| 717 | while read -u3 fileLine |
---|
| 718 | do |
---|
| 719 | month=$(echo $fileLine | $awk '{print $6}') |
---|
| 720 | day=$(echo $fileLine | $awk '{print $7}') |
---|
| 721 | name=$(echo $fileLine | $awk '{print $9}') |
---|
| 722 | days=0 |
---|
| 723 | i=1 |
---|
| 724 | while [[ -n ${months[$i]} ]] |
---|
| 725 | do |
---|
| 726 | if [[ $month = ${months[$i]} ]] |
---|
| 727 | then |
---|
| 728 | (( days = days + day )) |
---|
| 729 | break |
---|
| 730 | else |
---|
| 731 | (( days = days + ${days[$i]} )) |
---|
| 732 | (( i = i + 1 )) |
---|
| 733 | fi |
---|
| 734 | done # end of while [[ -n ${months[$i]} ]] do |
---|
| 735 | |
---|
| 736 | if [[ $(( today - days )) -gt 14 ]] |
---|
| 737 | then |
---|
| 738 | print "File $name is over 2 weeks old so I am not going to collect it." | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 739 | else |
---|
| 740 | if [[ -z $internal_list ]] |
---|
| 741 | then |
---|
| 742 | internal_list=$name |
---|
| 743 | else |
---|
| 744 | internal_list="${internal_list} $name" |
---|
| 745 | fi |
---|
| 746 | fi # end of if [[ $(( today - days )) -gt 14 ]] |
---|
| 747 | done # end of while read -u3 fileLine do |
---|
| 748 | $rm -f $tmpfile |
---|
| 749 | |
---|
| 750 | cd $savedir |
---|
| 751 | |
---|
| 752 | } #----- end of function check_dumps -------------------------- |
---|
| 753 | |
---|
| 754 | |
---|
| 755 | function removefromrlist |
---|
| 756 | { |
---|
| 757 | typeset sourceFile="gpfs.snap.sh" |
---|
| 758 | [[ -n $DEBUGgpfssnap || -n $DEBUGremovefromrlist ]] && set -x |
---|
| 759 | $mmTRACE_ENTER "$*" |
---|
| 760 | |
---|
| 761 | for i in $glist |
---|
| 762 | do |
---|
| 763 | if [[ $i != $1 ]] |
---|
| 764 | then |
---|
| 765 | newglist="$newglist $i" |
---|
| 766 | fi |
---|
| 767 | done |
---|
| 768 | glist=$newglist |
---|
| 769 | |
---|
| 770 | firstone=1 |
---|
| 771 | $rm -f $newrcFile |
---|
| 772 | for i in $rlist |
---|
| 773 | do |
---|
| 774 | if [[ $i != $1 ]] |
---|
| 775 | then |
---|
| 776 | if [[ $firstone = 1 ]] |
---|
| 777 | then |
---|
| 778 | print -- $i > $newrcFile |
---|
| 779 | newrlist="$i" |
---|
| 780 | firstone=0 |
---|
| 781 | else |
---|
| 782 | print -- $i >> $newrcFile |
---|
| 783 | newrlist="$newrlist $i" |
---|
| 784 | fi |
---|
| 785 | fi |
---|
| 786 | done |
---|
| 787 | rlist=$newrlist |
---|
| 788 | rcFile=$newrcFile |
---|
| 789 | |
---|
| 790 | } #----- end of function removefromrlist ---------------------- |
---|
| 791 | |
---|
| 792 | |
---|
| 793 | function check_fs |
---|
| 794 | { |
---|
| 795 | typeset sourceFile="gpfs.snap.sh" |
---|
| 796 | [[ -n $DEBUGgpfssnap || -n $DEBUGcheck_fs ]] && set -x |
---|
| 797 | $mmTRACE_ENTER "$*" |
---|
| 798 | |
---|
| 799 | echo "yamo" > ${BASELOGDIR}/yamo |
---|
| 800 | yamosum=$($sum yamo | $cut -f1 -d " ") |
---|
| 801 | |
---|
| 802 | $mmdsh -F $1 "K5MUTE=1 sum ${BASELOGDIR}/yamo 2>/dev/null | awk '{print \$1}'" > rsumfile & |
---|
| 803 | waitforit |
---|
| 804 | |
---|
| 805 | firstone=1 |
---|
| 806 | trlist="" |
---|
| 807 | tglist="" |
---|
| 808 | $rm -f $trcFile |
---|
| 809 | |
---|
| 810 | g=0 |
---|
| 811 | h=0 |
---|
| 812 | for i in $2 |
---|
| 813 | do |
---|
| 814 | thissum=$($grep -E "^$i:" rsumfile | $awk '{print $2}') |
---|
| 815 | if [[ $thissum = $yamosum ]] |
---|
| 816 | then |
---|
| 817 | tglist="$tglist $i" |
---|
| 818 | if [[ -n $3 ]] |
---|
| 819 | then |
---|
| 820 | ghostarray[$g]=${hostarray[$h]} |
---|
| 821 | (( g = g + 1 )) |
---|
| 822 | fi |
---|
| 823 | else |
---|
| 824 | if [[ $firstone = 1 ]] |
---|
| 825 | then |
---|
| 826 | print -- $i > $trcFile |
---|
| 827 | trlist=$i |
---|
| 828 | firstone=0 |
---|
| 829 | else |
---|
| 830 | print -- $i >> $trcFile |
---|
| 831 | trlist="$trlist $i" |
---|
| 832 | fi |
---|
| 833 | fi |
---|
| 834 | (( h = h + 1 )) |
---|
| 835 | done |
---|
| 836 | |
---|
| 837 | $rm ${BASELOGDIR}/yamo 2>/dev/null |
---|
| 838 | $rm rsumfile 2>/dev/null |
---|
| 839 | |
---|
| 840 | } #----- end of function check_fs ----------------------------- |
---|
| 841 | |
---|
| 842 | |
---|
| 843 | function get_files |
---|
| 844 | { |
---|
| 845 | typeset sourceFile="gpfs.snap.sh" |
---|
| 846 | [[ -n $DEBUGgpfssnap || -n $DEBUGget_files ]] && set -x |
---|
| 847 | $mmTRACE_ENTER "$*" |
---|
| 848 | |
---|
| 849 | cd $LOGDIR |
---|
| 850 | |
---|
| 851 | if [[ $pass = 1 || $x_arg = 2 ]] |
---|
| 852 | then |
---|
| 853 | if [[ $master = 1 ]] |
---|
| 854 | then |
---|
| 855 | ddd=$(date +"%m %e %H %M %y") |
---|
| 856 | emon=$(echo $ddd | $awk '{print $1}') |
---|
| 857 | eday=$(echo $ddd | $awk '{print $2}') |
---|
| 858 | eyr=$(echo $ddd | $awk '{print $5}') |
---|
| 859 | emon=${emon##0} |
---|
| 860 | syr=$eyr; smon=$emon; |
---|
| 861 | if [[ $eday -gt 14 ]] |
---|
| 862 | then |
---|
| 863 | (( sday = eday - 14 )) |
---|
| 864 | else |
---|
| 865 | (( diff = 14 - eday )) |
---|
| 866 | if [[ $emon -eq 1 ]] |
---|
| 867 | then |
---|
| 868 | smon=12 |
---|
| 869 | (( syr = eyr - 1 )) |
---|
| 870 | (( sday = 31 - diff )) |
---|
| 871 | else |
---|
| 872 | (( smon = smon - 1 )) |
---|
| 873 | (( sday = ${days[$smon]} - diff )) |
---|
| 874 | fi |
---|
| 875 | fi |
---|
| 876 | fi |
---|
| 877 | |
---|
| 878 | if [[ $smon = $emon ]] |
---|
| 879 | then |
---|
| 880 | months=1 |
---|
| 881 | endday=$eday |
---|
| 882 | else |
---|
| 883 | months=2 |
---|
| 884 | endday=${days[$smon]} |
---|
| 885 | fi |
---|
| 886 | |
---|
| 887 | curmonth=1 |
---|
| 888 | cmon=$smon |
---|
| 889 | i=${sday##0} |
---|
| 890 | while [[ $curmonth -le $months ]] |
---|
| 891 | do |
---|
| 892 | if [[ $curmonth -eq 2 ]] |
---|
| 893 | then |
---|
| 894 | i=1 |
---|
| 895 | endday=$eday |
---|
| 896 | cmon=$emon |
---|
| 897 | fi |
---|
| 898 | |
---|
| 899 | if [[ $i -lt 10 ]] |
---|
| 900 | then |
---|
| 901 | e=${endday##0} |
---|
| 902 | if [[ $e -lt 10 ]] |
---|
| 903 | then |
---|
| 904 | ee=$e |
---|
| 905 | else |
---|
| 906 | ee=9 |
---|
| 907 | fi |
---|
| 908 | if [[ -z $greplist ]] |
---|
| 909 | then |
---|
| 910 | greplist="${months[$cmon]} [$i-$ee]" |
---|
| 911 | greplistb="^${months[$cmon]} *[$i-$ee] " |
---|
| 912 | greplist2="^${cmon}/0[$i-$ee]" |
---|
| 913 | else |
---|
| 914 | greplist="${greplist}|${months[$cmon]} [$i-$ee]" |
---|
| 915 | greplistb="${greplistb}|^${months[$cmon]} *[$i-$ee] " |
---|
| 916 | greplist2="${greplist2}|^${cmon}/0[$i-$ee]" |
---|
| 917 | fi |
---|
| 918 | fi |
---|
| 919 | ii=2 |
---|
| 920 | while [[ $ii -lt 5 ]] |
---|
| 921 | do |
---|
| 922 | (( jj = ii * 10 )) |
---|
| 923 | (( kk = jj - 11 )) |
---|
| 924 | (( ll = jj - 10 )) |
---|
| 925 | (( mm = ii - 1 )) |
---|
| 926 | if [[ $i -lt $jj && $endday -gt $kk ]] |
---|
| 927 | then |
---|
| 928 | if [[ $i -gt $kk ]] |
---|
| 929 | then |
---|
| 930 | (( s = i - ll )) |
---|
| 931 | else |
---|
| 932 | s=0 |
---|
| 933 | fi |
---|
| 934 | |
---|
| 935 | (( e = endday - ll )) |
---|
| 936 | if [[ $endday -ge $jj ]] |
---|
| 937 | then |
---|
| 938 | e=9 |
---|
| 939 | fi |
---|
| 940 | if [[ -z $greplist ]] |
---|
| 941 | then |
---|
| 942 | greplist="${months[$cmon]} ${mm}[$s-$e]" |
---|
| 943 | greplistb="^${months[$cmon]} ${mm}[$s-$e]" |
---|
| 944 | greplist2="^${cmon}/${mm}[$s-$e]" |
---|
| 945 | else |
---|
| 946 | greplist="${greplist}|${months[$cmon]} ${mm}[$s-$e]" |
---|
| 947 | greplistb="${greplistb}|^${months[$cmon]} ${mm}[$s-$e]" |
---|
| 948 | greplist2="${greplist2}|^${cmon}/${mm}[$s-$e]" |
---|
| 949 | fi |
---|
| 950 | fi |
---|
| 951 | (( ii = ii + 1 )) |
---|
| 952 | done |
---|
| 953 | (( curmonth = curmonth + 1 )) |
---|
| 954 | done |
---|
| 955 | |
---|
| 956 | if [[ $master = 1 ]] |
---|
| 957 | then |
---|
| 958 | if [[ -n $aflag ]] |
---|
| 959 | then |
---|
| 960 | $mmdsh -L $myhname -F $nodefile K5MUTE=1 "grep -E '$greplist' $rasDir/mmfs.log.[0-9]*" > mmfslogs.unsorted 2>/dev/null |
---|
| 961 | else |
---|
| 962 | if [[ -s $nodefile2 ]] |
---|
| 963 | then |
---|
| 964 | $mmdsh -F $nodefile2 K5MUTE=1 "grep -E '$greplist' $rasDir/mmfs.log.[0-9]*" > mmfslogs.unsorted 2>/dev/null |
---|
| 965 | fi |
---|
| 966 | fi |
---|
| 967 | $sort -k3,5 mmfslogs.unsorted > mmfslogs.sorted 2>/dev/null |
---|
| 968 | |
---|
| 969 | $rm mmfslogs.unsorted 2>/dev/null |
---|
| 970 | |
---|
| 971 | if [[ $pass = 1 ]] |
---|
| 972 | then |
---|
| 973 | size=$($ls -l mmfslogs.sorted | $awk '{print $5}') |
---|
| 974 | if [[ $size != 0 ]] |
---|
| 975 | then |
---|
| 976 | addit $size mmfslogs.sorted |
---|
| 977 | fi |
---|
| 978 | fi |
---|
| 979 | fi |
---|
| 980 | fi # end of if [[ $pass = 1 || $x_arg = 2 ]] |
---|
| 981 | |
---|
| 982 | if [[ $pass = 2 ]] |
---|
| 983 | then |
---|
| 984 | cd $BASELOGDIR |
---|
| 985 | tarit mmfslogs.sorted |
---|
| 986 | fi |
---|
| 987 | |
---|
| 988 | cd $LOGDIR |
---|
| 989 | $cat $rasDir/mmfs.log.[0-9]* > mmfs.logs.${my_hostname} |
---|
| 990 | size=$($ls -l mmfs.logs.${my_hostname} | $awk '{print $5}') |
---|
| 991 | if [[ $pass = 1 ]] |
---|
| 992 | then |
---|
| 993 | addit $size mmfs.logs.${my_hostname} |
---|
| 994 | cd $BASELOGDIR |
---|
| 995 | else |
---|
| 996 | cd $BASELOGDIR |
---|
| 997 | tarit mmfs.logs.${my_hostname} |
---|
| 998 | fi |
---|
| 999 | |
---|
| 1000 | } #----- end of function get_files ---------------------------- |
---|
| 1001 | |
---|
| 1002 | |
---|
| 1003 | ############################################################################ |
---|
| 1004 | # |
---|
| 1005 | # Function: Get all of the files in a specified directory. |
---|
| 1006 | # |
---|
| 1007 | # Input: $1 - directory whose files are to be gotten |
---|
| 1008 | # |
---|
| 1009 | # Output: Pass 1: Calculate space for the files in the directory. |
---|
| 1010 | # Pass 2: Add the files in the directory to the tar file. |
---|
| 1011 | # |
---|
| 1012 | # Returns: 0 |
---|
| 1013 | # |
---|
| 1014 | ############################################################################ |
---|
| 1015 | function get_files_dir # <dirName> |
---|
| 1016 | { |
---|
| 1017 | typeset sourceFile="gpfs.snap.sh" |
---|
| 1018 | [[ -n $DEBUGgpfssnap || -n $DEBUGget_files_dir ]] && set -x |
---|
| 1019 | $mmTRACE_ENTER "$*" |
---|
| 1020 | |
---|
| 1021 | typeset dirName=$1 |
---|
| 1022 | typeset saveDir fileList |
---|
| 1023 | |
---|
| 1024 | # Generate the list of files to get, but leave out complete.map files. |
---|
| 1025 | # Then invoke get_files_list() to get the files. |
---|
| 1026 | saveDir=$(pwd) |
---|
| 1027 | cd $dirName 2>/dev/null |
---|
| 1028 | fileList=$($ls -A 2>/dev/null | $grep -v "complete.map") |
---|
| 1029 | cd $saveDir |
---|
| 1030 | if [[ -n $fileList ]] |
---|
| 1031 | then |
---|
| 1032 | get_files_list "$dirName" "$fileList" |
---|
| 1033 | fi |
---|
| 1034 | |
---|
| 1035 | return 0 |
---|
| 1036 | |
---|
| 1037 | } #----- end of function get_files_dir ------------------------ |
---|
| 1038 | |
---|
| 1039 | |
---|
| 1040 | ############################################################################ |
---|
| 1041 | # |
---|
| 1042 | # Function: Get the files specified by means of a directory and a list. |
---|
| 1043 | # |
---|
| 1044 | # Input: $1 - directory from which to get files |
---|
| 1045 | # $2 - list of files to be gotten from the directory |
---|
| 1046 | # $3 - (optional) name of subdir to use in the tar file |
---|
| 1047 | # |
---|
| 1048 | # Output: Pass 1: Calculate space for the specified files. |
---|
| 1049 | # Pass 2: Add the specified files to the tar file. |
---|
| 1050 | # |
---|
| 1051 | # Returns: 0 |
---|
| 1052 | # |
---|
| 1053 | ############################################################################ |
---|
| 1054 | function get_files_list # <dirName> <fileList> [<subdirName>] |
---|
| 1055 | { |
---|
| 1056 | typeset sourceFile="gpfs.snap.sh" |
---|
| 1057 | [[ -n $DEBUGgpfssnap || -n $DEBUGget_files_list ]] && set -x |
---|
| 1058 | $mmTRACE_ENTER "$*" |
---|
| 1059 | |
---|
| 1060 | typeset dirName=$1 |
---|
| 1061 | typeset fileList=$2 |
---|
| 1062 | typeset subdirName=$3 |
---|
| 1063 | typeset relDir tmpdir e |
---|
| 1064 | |
---|
| 1065 | # Based on the value of the subdir and the input directory parameters, |
---|
| 1066 | # calculate $relDir and $dir and optionally create directory $LOGDIR/$dir. |
---|
| 1067 | if [[ -n $subdirName ]] |
---|
| 1068 | then |
---|
| 1069 | relDir=$subdirName # use the name specified as an input |
---|
| 1070 | else |
---|
| 1071 | relDir=${dirName#/} # remove the leading / character |
---|
| 1072 | fi |
---|
| 1073 | tmpdir=$(echo $relDir | $grep "/") |
---|
| 1074 | if [[ -n $tmpdir ]] |
---|
| 1075 | then |
---|
| 1076 | dir=${relDir%/*} |
---|
| 1077 | $mkdir -p ${LOGDIR}/$dir 2>/dev/null |
---|
| 1078 | else |
---|
| 1079 | dir=$relDir |
---|
| 1080 | fi |
---|
| 1081 | |
---|
| 1082 | # Create a symlink to the passed directory. |
---|
| 1083 | ln -s $dirName ${LOGDIR}/$dir |
---|
| 1084 | |
---|
| 1085 | # Loop through the list passed as the 2nd parameter |
---|
| 1086 | # and calculate the space required if this is pass 1 |
---|
| 1087 | # or add the parts to the tar file if this is pass 2. |
---|
| 1088 | for e in $fileList |
---|
| 1089 | do |
---|
| 1090 | if [[ -f $dirName/$e ]] |
---|
| 1091 | then |
---|
| 1092 | if [[ $pass = 1 ]] |
---|
| 1093 | then |
---|
| 1094 | temp_bytes=$($ls -l $dirName/$e | $awk '{ print $5 }') |
---|
| 1095 | addit $temp_bytes "$dirName/$e file" |
---|
| 1096 | else |
---|
| 1097 | tarit "$relDir/$e" 1 |
---|
| 1098 | fi |
---|
| 1099 | fi # end of if [[ -f $dirName/$e ]] |
---|
| 1100 | done # end of for e in $fileList do |
---|
| 1101 | |
---|
| 1102 | # Remove the symlink created earlier. |
---|
| 1103 | $rm ${LOGDIR}/$relDir 2>/dev/null |
---|
| 1104 | |
---|
| 1105 | } #----- end of function get_files_list ----------------------- |
---|
| 1106 | |
---|
| 1107 | |
---|
| 1108 | function get_always |
---|
| 1109 | { |
---|
| 1110 | typeset sourceFile="gpfs.snap.sh" |
---|
| 1111 | [[ -n $DEBUGgpfssnap || -n $DEBUGget_always ]] && set -x |
---|
| 1112 | $mmTRACE_ENTER "$*" |
---|
| 1113 | |
---|
| 1114 | if [[ $os = "AIX" ]] |
---|
| 1115 | then |
---|
| 1116 | doit "errpt_a" "/usr/bin/errpt -a" |
---|
| 1117 | doit "lscfg_vp" "lscfg -vp" |
---|
| 1118 | doit "lslpp_hac" "/usr/bin/lslpp -hac" |
---|
| 1119 | doit "lssrc_a" "lssrc -a" |
---|
| 1120 | doit "no_a" "no -a" |
---|
| 1121 | if [[ $gotvmstat = 1 ]] |
---|
| 1122 | then |
---|
| 1123 | doit "vmstat_s" "vmstat -s" |
---|
| 1124 | fi |
---|
| 1125 | conslog=$(lscons) |
---|
| 1126 | if [[ $conslog != +(/)dev+(/)* && -s $conslog ]] |
---|
| 1127 | then |
---|
| 1128 | doit "lscons" "cat $conslog" |
---|
| 1129 | fi |
---|
| 1130 | |
---|
| 1131 | else |
---|
| 1132 | doit "dmesg" "dmesg" |
---|
| 1133 | doit "fdisk_l" "fdisk -l" |
---|
| 1134 | doit "lsmod" "lsmod" |
---|
| 1135 | doit "lspci" "lspci" |
---|
| 1136 | doit "rpm_qa" "rpm -qa" |
---|
| 1137 | doit "rpm_verify" "rpm --verify gpfs.base" 1 |
---|
| 1138 | doit "rpm_verify" "rpm --verify gpfs.docs" 1 |
---|
| 1139 | doit "rpm_verify" "rpm --verify gpfs.gpl" 1 |
---|
| 1140 | doit "rpm_verify" "rpm --verify gpfs.msg.en_US" |
---|
| 1141 | doit "uname_a" "uname -a" |
---|
| 1142 | doit "proc_cpuinfo" "cat /proc/cpuinfo" |
---|
| 1143 | doit "proc_version" "cat /proc/version" |
---|
| 1144 | doit "site_mcr" "cat /usr/lpp/mmfs/src/config/site.mcr" |
---|
| 1145 | doit "etc_release" "$grep '[a-zA-Z]' /etc/*release" |
---|
| 1146 | |
---|
| 1147 | fi # end of if [[ $os = "AIX" ]] |
---|
| 1148 | |
---|
| 1149 | doit "date" "date" |
---|
| 1150 | doit "df_k" "df -k" |
---|
| 1151 | doit "exportfs" "exportfs" |
---|
| 1152 | doit "gpfs_executables" "$ls -l /usr/lpp/mmfs/bin" |
---|
| 1153 | doit "ipcs_a" "ipcs -a" |
---|
| 1154 | doit "ls_dev" "$ls -l /dev" |
---|
| 1155 | doit "ps_edf" "ps -edf" |
---|
| 1156 | doit "uptime" "uptime" |
---|
| 1157 | |
---|
| 1158 | doit "mmdevdiscover" "/usr/lpp/mmfs/bin/mmdevdiscover" |
---|
| 1159 | doit "tspreparedisk_S" "/usr/lpp/mmfs/bin/tspreparedisk -S" |
---|
| 1160 | |
---|
| 1161 | if [[ $mmScriptTrace != /dev/null && -s $mmScriptTrace ]] |
---|
| 1162 | then |
---|
| 1163 | doit "mmScriptTrace" "cat $mmScriptTrace" |
---|
| 1164 | fi |
---|
| 1165 | |
---|
| 1166 | return 0 |
---|
| 1167 | |
---|
| 1168 | } #----- end of function get_always --------------------------- |
---|
| 1169 | |
---|
| 1170 | |
---|
| 1171 | function get_net_stuff |
---|
| 1172 | { |
---|
| 1173 | typeset sourceFile="gpfs.snap.sh" |
---|
| 1174 | [[ -n $DEBUGgpfssnap || -n $DEBUGget_net_stuff ]] && set -x |
---|
| 1175 | $mmTRACE_ENTER "$*" |
---|
| 1176 | |
---|
| 1177 | if [[ $os = "AIX" ]] |
---|
| 1178 | then |
---|
| 1179 | doit "netstat" "netstat -i -n" "1" |
---|
| 1180 | doit "netstat" "netstat -m" "1" |
---|
| 1181 | doit "netstat" "netstat -D" "1" |
---|
| 1182 | INTERFACES=$($lsdev -Cc if | $grep -v Defined | $cut -d " " -f1) |
---|
| 1183 | for i in $INTERFACES |
---|
| 1184 | do |
---|
| 1185 | doit "ifconfig" "ifconfig $i" 1 |
---|
| 1186 | doit "odmget_CuAt" "odmget -q name=$i CuAt" 1 |
---|
| 1187 | doit "lsattr" "lsattr -El $i" 1 |
---|
| 1188 | done |
---|
| 1189 | else |
---|
| 1190 | INTERFACES=$(netstat -i -n | $cut -f1 -d " " | $grep -v "Kernel" | $grep -v "Iface") |
---|
| 1191 | for i in $INTERFACES |
---|
| 1192 | do |
---|
| 1193 | doit "ifconfig" "ifconfig $i" 1 |
---|
| 1194 | done |
---|
| 1195 | fi # end of if [[ $os = "AIX" ]] |
---|
| 1196 | |
---|
| 1197 | doit "netstat" "netstat -i" "1" |
---|
| 1198 | doit "netstat" "netstat -r" "1" |
---|
| 1199 | doit "netstat" "netstat -rn" "1" |
---|
| 1200 | doit "netstat" "netstat -v 2>/dev/null" "1" |
---|
| 1201 | doit "netstat" "netstat -s" |
---|
| 1202 | |
---|
| 1203 | if [[ $pass = 2 ]] |
---|
| 1204 | then |
---|
| 1205 | tarit "ifconfig" |
---|
| 1206 | if [[ $os = "AIX" ]] |
---|
| 1207 | then |
---|
| 1208 | tarit "lsattr" |
---|
| 1209 | tarit "odmget_CuAt" |
---|
| 1210 | fi |
---|
| 1211 | fi |
---|
| 1212 | |
---|
| 1213 | } #----- end of function get_net_stuff ------------------------ |
---|
| 1214 | |
---|
| 1215 | |
---|
| 1216 | function get_lvm_stuff |
---|
| 1217 | { |
---|
| 1218 | typeset sourceFile="gpfs.snap.sh" |
---|
| 1219 | [[ -n $DEBUGgpfssnap || -n $DEBUGget_lvm_stuff ]] && set -x |
---|
| 1220 | $mmTRACE_ENTER "$*" |
---|
| 1221 | |
---|
| 1222 | typeset lspvLine pdisk i VGs |
---|
| 1223 | |
---|
| 1224 | if [[ $os = "AIX" ]] |
---|
| 1225 | then |
---|
| 1226 | doit "lsfs" "lsfs" |
---|
| 1227 | doit "lspv" "lspv" 1 |
---|
| 1228 | |
---|
| 1229 | LC_ALL=C $lspv > $lspvOutputFile |
---|
| 1230 | exec 3<&- |
---|
| 1231 | exec 3< $lspvOutputFile |
---|
| 1232 | while read -u3 lspvLine |
---|
| 1233 | do |
---|
| 1234 | set -f ; set -- $lspvLine ; set +f |
---|
| 1235 | pdisk=$1 |
---|
| 1236 | doit "lspv" "lsattr -El $pdisk" "1" |
---|
| 1237 | done |
---|
| 1238 | $rm -f $lspvOutputFile |
---|
| 1239 | |
---|
| 1240 | doit "lsvg" "lsvg" "1" |
---|
| 1241 | doit "lsvg" "lsvg -o" "1" |
---|
| 1242 | VGs=$($lsvg -o) |
---|
| 1243 | for i in $VGs |
---|
| 1244 | do |
---|
| 1245 | doit "lsvg" "lsvg -l $i" "1" |
---|
| 1246 | doit "getlvodm_u" "getlvodm -u $i" "1" |
---|
| 1247 | done |
---|
| 1248 | |
---|
| 1249 | VGs=$($lsvg) |
---|
| 1250 | for i in $VGs |
---|
| 1251 | do |
---|
| 1252 | doit "lsvg" "$ls -l /dev/$i" "1" |
---|
| 1253 | done |
---|
| 1254 | if [[ $pass = 2 ]] |
---|
| 1255 | then |
---|
| 1256 | tarit lspv |
---|
| 1257 | tarit lsvg |
---|
| 1258 | tarit getlvodm_u |
---|
| 1259 | fi |
---|
| 1260 | fi # end of if [[ $os = "AIX" ]] |
---|
| 1261 | |
---|
| 1262 | } #----- end of function get_lvm_stuff ------------------------ |
---|
| 1263 | |
---|
| 1264 | |
---|
| 1265 | function addit |
---|
| 1266 | { |
---|
| 1267 | typeset sourceFile="gpfs.snap.sh" |
---|
| 1268 | [[ -n $DEBUGgpfssnap || -n $DEBUGaddit ]] && set -x |
---|
| 1269 | $mmTRACE_ENTER "$*" |
---|
| 1270 | |
---|
| 1271 | # total_bytes is total_bytes at the end |
---|
| 1272 | # max_tmp is max bytes while processing this file at one time |
---|
| 1273 | # (size of tarfile + 2 * sizeof_file) |
---|
| 1274 | # max_bytes is max bytes while processing ANY file |
---|
| 1275 | |
---|
| 1276 | (( total_bytes = total_bytes + $1 )) |
---|
| 1277 | (( max_tmp = total_bytes + $1 )) |
---|
| 1278 | if (( $max_tmp > $max_bytes )) |
---|
| 1279 | then |
---|
| 1280 | max_bytes=$max_tmp |
---|
| 1281 | fi |
---|
| 1282 | if [[ $first = 1 ]] |
---|
| 1283 | then |
---|
| 1284 | echo "estimate $2 will take $1 bytes" >> ${LOGDIR}/sizes |
---|
| 1285 | else |
---|
| 1286 | echo "estimate $2 will take $1 bytes" > ${LOGDIR}/sizes |
---|
| 1287 | fi |
---|
| 1288 | first=1 |
---|
| 1289 | |
---|
| 1290 | } #----- end of function addit -------------------------------- |
---|
| 1291 | |
---|
| 1292 | |
---|
| 1293 | ############################################################################### |
---|
| 1294 | # |
---|
| 1295 | # Function waitforit (wait for the most recently-started background process) |
---|
| 1296 | # |
---|
| 1297 | ############################################################################### |
---|
| 1298 | function waitforit |
---|
| 1299 | { |
---|
| 1300 | typeset sourceFile="gpfs.snap.sh" |
---|
| 1301 | [[ -n $DEBUGgpfssnap || -n $DEBUGwaitforit ]] && set -x |
---|
| 1302 | $mmTRACE_ENTER "$*" |
---|
| 1303 | |
---|
| 1304 | mpid=$! |
---|
| 1305 | |
---|
| 1306 | if [[ -n $1 && $1 != NULL ]] |
---|
| 1307 | then |
---|
| 1308 | tmpid=$mpid |
---|
| 1309 | comm=$(echo $1 | $head -c 70) |
---|
| 1310 | $sleep 1 |
---|
| 1311 | mpid=$($ps -g $mypgid -o pid=PID,args=COMM | $grep "$comm" | $grep -v grep | $tail -n -1 | $awk '{print $1}') |
---|
| 1312 | |
---|
| 1313 | # In some cases commands fork themselves. We need to make sure |
---|
| 1314 | # we have the parent process; try to find the right one. |
---|
| 1315 | words=$(echo $mpid | $wc -w) |
---|
| 1316 | words=${words##*( )} |
---|
| 1317 | if [[ $words != 1 && $words != 0 ]] |
---|
| 1318 | then |
---|
| 1319 | echo "got a multiple: $mpid comm is $comm" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 1320 | # if there is more than one try to get the direct descendent of |
---|
| 1321 | # gpfs.snap or the pid |
---|
| 1322 | mmpid=$($ps -f | $grep "$comm" | $grep -v grep | $grep -E "$$|$tmpid" | $awk '{print $2}') |
---|
| 1323 | if [[ -z $mmpid ]] |
---|
| 1324 | then |
---|
| 1325 | mpid=$(echo $mpid | $head -n 1) |
---|
| 1326 | mpid=$(echo $mpid | $awk '{print $1}') |
---|
| 1327 | else |
---|
| 1328 | mpid=$mmpid |
---|
| 1329 | fi |
---|
| 1330 | fi |
---|
| 1331 | fi # end of if [[ -n $1 && $1 != NULL ]] |
---|
| 1332 | |
---|
| 1333 | counter=1 |
---|
| 1334 | if [[ -n $2 ]] |
---|
| 1335 | then |
---|
| 1336 | count=$2 |
---|
| 1337 | else |
---|
| 1338 | count=20 |
---|
| 1339 | fi |
---|
| 1340 | |
---|
| 1341 | while [[ $counter -le $count && -n $mpid ]] |
---|
| 1342 | do |
---|
| 1343 | if [[ $counter -eq $count ]] |
---|
| 1344 | then |
---|
| 1345 | if [[ -n $1 && $1 != NULL ]] |
---|
| 1346 | then |
---|
| 1347 | comm=$1 |
---|
| 1348 | else |
---|
| 1349 | comm=$($ps -fp $mpid -o args=ARGS | $tail -n -1) |
---|
| 1350 | fi |
---|
| 1351 | print "\nThe following command timed out!:\n$comm\n" | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 1352 | # kill any children of the process we are about to kill |
---|
| 1353 | # (for the case where we were called with pipes in the command line) |
---|
| 1354 | |
---|
| 1355 | $ps -o pid=PID,ppid=PPID | $grep $mpid | $grep -v "grep $mpid" > tmpout |
---|
| 1356 | { |
---|
| 1357 | while read line |
---|
| 1358 | do |
---|
| 1359 | pid=$(echo $line | $awk '{print $1}') |
---|
| 1360 | ppid=$(echo $line | $awk '{print $2}') |
---|
| 1361 | # echo $line | read pid ppid |
---|
| 1362 | if [[ $ppid = $mpid ]] |
---|
| 1363 | then |
---|
| 1364 | $kill -9 $pid |
---|
| 1365 | fi |
---|
| 1366 | done |
---|
| 1367 | } < tmpout |
---|
| 1368 | $rm tmpout |
---|
| 1369 | |
---|
| 1370 | echo "killing $mpid" |
---|
| 1371 | $kill -9 $mpid 2>/dev/null |
---|
| 1372 | $sleep 1 |
---|
| 1373 | # special check to catch defunct children of c -c (pass 1) |
---|
| 1374 | parent=$($ps -fp $mpid | $grep defunct | $awk '{print $3}') |
---|
| 1375 | if [[ -n $parent ]] |
---|
| 1376 | then |
---|
| 1377 | parcomm=$($ps -p $parent | $tail -n -1 | $awk '{print $4}') |
---|
| 1378 | if [[ $parcomm = wc ]] |
---|
| 1379 | then |
---|
| 1380 | $kill -9 $parent 2>/dev/null |
---|
| 1381 | fi |
---|
| 1382 | fi |
---|
| 1383 | set +x |
---|
| 1384 | return 1 |
---|
| 1385 | fi # end of if [[ $counter -eq $count ]] |
---|
| 1386 | $sleep 1 |
---|
| 1387 | (( counter = counter + 1 )) |
---|
| 1388 | mpid=$($ps -p $mpid | $awk '{print $1}' | $grep -v "PID") |
---|
| 1389 | |
---|
| 1390 | done # end of while [[ $counter -le $count && -n $mpid ]] do |
---|
| 1391 | |
---|
| 1392 | return 0 |
---|
| 1393 | |
---|
| 1394 | } #----- end of function waitforit ---------------------------- |
---|
| 1395 | |
---|
| 1396 | |
---|
| 1397 | ############################################################################### |
---|
| 1398 | # |
---|
| 1399 | # Function doit |
---|
| 1400 | # |
---|
| 1401 | # All arguments are optional except $2. If you do not desire an action, |
---|
| 1402 | # pass "" for the parameter, or just leave out trailing args completely. |
---|
| 1403 | # |
---|
| 1404 | # Arguments: |
---|
| 1405 | # $1 The unique part of the name of the log file as in |
---|
| 1406 | # $LOGDIR/$1.${my_hostname}.${logdate} |
---|
| 1407 | # $2 The command to be run. stdout is redirected to the log file. |
---|
| 1408 | # stderr is redirected to both the screen and the file |
---|
| 1409 | # gpfs.snap_err.${logdate}.out in the $LOGDIR. |
---|
| 1410 | # On a non-zero return code, an error message is printed to the |
---|
| 1411 | # screen and the gpfs.snap_err.${logdate}.out file. |
---|
| 1412 | # $3 "Output control" flag. |
---|
| 1413 | # If null, the output is appended to the global tar file; |
---|
| 1414 | # If 1, the output is not appended to the global tar file |
---|
| 1415 | # (the log file will be left for further data to be added to it, |
---|
| 1416 | # and a subsequent invocation with a null output control flag |
---|
| 1417 | # will cause the log file to be added to the tar file then) |
---|
| 1418 | # If 2, the output is prepended with the hostname of the node |
---|
| 1419 | # to make the output look like that produced by mmdsh, |
---|
| 1420 | # and then copied to the log file's parent directory |
---|
| 1421 | # (this option is used by the -c option for collecting |
---|
| 1422 | # output data from executing a command string). |
---|
| 1423 | # |
---|
| 1424 | ############################################################################### |
---|
| 1425 | function doit # <logFile> <cmdToRun> <outputControl> |
---|
| 1426 | { |
---|
| 1427 | typeset sourceFile="gpfs.snap.sh" |
---|
| 1428 | [[ -n $DEBUGgpfssnap || -n $DEBUGdoit ]] && set -x |
---|
| 1429 | $mmTRACE_ENTER "$*" |
---|
| 1430 | typeset logFile=$1 |
---|
| 1431 | typeset cmdToRun=$2 |
---|
| 1432 | typeset outputControl=$3 |
---|
| 1433 | |
---|
| 1434 | if [[ $pass = 1 ]] |
---|
| 1435 | then |
---|
| 1436 | if [[ -n $cmdToRun ]] |
---|
| 1437 | then |
---|
| 1438 | ksh -c "PATH=$PATH $cmdToRun" 2>/dev/null | $wc -c >$YAMO & |
---|
| 1439 | if [[ $cmdToRun = "netstat -D" ]] |
---|
| 1440 | then |
---|
| 1441 | waitforit "$cmdToRun" "60" |
---|
| 1442 | else |
---|
| 1443 | waitforit "$cmdToRun" |
---|
| 1444 | fi |
---|
| 1445 | if [[ $? = 0 && -s $YAMO ]] |
---|
| 1446 | then |
---|
| 1447 | temp_bytes=$($cat $YAMO | $awk '{print $1}') |
---|
| 1448 | temp_bytes2=$(echo $cmdToRun | $wc -c) |
---|
| 1449 | tmp2_bytes=$(( temp_bytes + temp_bytes2 + 153 )) |
---|
| 1450 | addit $tmp2_bytes $cmdToRun |
---|
| 1451 | fi |
---|
| 1452 | fi |
---|
| 1453 | $rm $YAMO 2>/dev/null |
---|
| 1454 | else |
---|
| 1455 | if [[ -n $cmdToRun ]] |
---|
| 1456 | then |
---|
| 1457 | print "" >> $LOGDIR/$logFile |
---|
| 1458 | print "$outputDelimiter" >> $LOGDIR/$logFile |
---|
| 1459 | print "Output for $cmdToRun on $($hostname | $cut -d. -f1)" >>${LOGDIR}/$logFile |
---|
| 1460 | print "$outputDelimiter" >> $LOGDIR/$logFile |
---|
| 1461 | ksh -c "PATH=$PATH $cmdToRun" >>$LOGDIR/$logFile 2>${BASELOGDIR}/gpfs.snap_err.out.temp & |
---|
| 1462 | if [[ $cmdToRun = "netstat -D" ]] |
---|
| 1463 | then |
---|
| 1464 | waitforit "$cmdToRun" "60" |
---|
| 1465 | else |
---|
| 1466 | waitforit |
---|
| 1467 | fi |
---|
| 1468 | |
---|
| 1469 | if [[ -s ${BASELOGDIR}/gpfs.snap_err.out.temp ]]; |
---|
| 1470 | then |
---|
| 1471 | print "\nErrata from $cmdToRun:" | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 1472 | $cat ${BASELOGDIR}/gpfs.snap_err.out.temp | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 1473 | $rm ${BASELOGDIR}/gpfs.snap_err.out.temp 2>/dev/null |
---|
| 1474 | fi |
---|
| 1475 | |
---|
| 1476 | if [[ -z $outputControl ]] |
---|
| 1477 | then |
---|
| 1478 | tarit $logFile |
---|
| 1479 | fi # end of if [[ -z $outputControl ]] |
---|
| 1480 | fi # end of if [[ -n $cmdToRun ]] |
---|
| 1481 | fi # end of if [[ $pass = 1 ]] |
---|
| 1482 | |
---|
| 1483 | } #----- end of function doit --------------------------------- |
---|
| 1484 | |
---|
| 1485 | |
---|
| 1486 | ############################################################################ |
---|
| 1487 | # |
---|
| 1488 | # Function: Append a file to the global tarfile ($tarfile) |
---|
| 1489 | # If $tarfile does not exist yet, create it. |
---|
| 1490 | # |
---|
| 1491 | # Input: $1 - file to be added to (or serve as the start of) $tarfile |
---|
| 1492 | # $2 - "remove" flag (if 0, delete input file; otherwise, don't) |
---|
| 1493 | # |
---|
| 1494 | # Output: $tarfile has been created or augmented |
---|
| 1495 | # |
---|
| 1496 | # Returns: 0 |
---|
| 1497 | # |
---|
| 1498 | # Notes: The caller must be in $BASELOGDIR when calling tarit. |
---|
| 1499 | # The file to be added to $tarfile must be in $LOGDIR. |
---|
| 1500 | # |
---|
| 1501 | ############################################################################ |
---|
| 1502 | function tarit |
---|
| 1503 | { |
---|
| 1504 | typeset sourceFile="gpfs.snap.sh" |
---|
| 1505 | [[ -n $DEBUGgpfssnap || -n $DEBUGtarit ]] && set -x |
---|
| 1506 | $mmTRACE_ENTER "$*" |
---|
| 1507 | |
---|
| 1508 | if [[ -a ${SUBDIR}/$1 ]] |
---|
| 1509 | then |
---|
| 1510 | if [[ -a $tarfile ]] |
---|
| 1511 | then |
---|
| 1512 | tar -rf $tarfile ${SUBDIR}/$1 |
---|
| 1513 | else |
---|
| 1514 | tar -cf $tarfile ${SUBDIR}/$1 |
---|
| 1515 | fi |
---|
| 1516 | if [[ -z $2 ]] |
---|
| 1517 | then |
---|
| 1518 | $rm -r ${LOGDIR}/$1 2>/dev/null |
---|
| 1519 | fi |
---|
| 1520 | fi # end of if [[ -a ${SUBDIR}/$1 ]] |
---|
| 1521 | |
---|
| 1522 | } #----- end of function tarit -------------------------------- |
---|
| 1523 | |
---|
| 1524 | |
---|
| 1525 | function do_master_stuff |
---|
| 1526 | { |
---|
| 1527 | typeset sourceFile="gpfs.snap.sh" |
---|
| 1528 | [[ -n $DEBUGgpfssnap || -n $DEBUGdo_master_stuff ]] && set -x |
---|
| 1529 | $mmTRACE_ENTER "$*" |
---|
| 1530 | |
---|
| 1531 | if [[ $os = "Linux" ]] |
---|
| 1532 | then |
---|
| 1533 | line=$($head -n1 $mmsdrfsfile) |
---|
| 1534 | IFS_sv="$IFS" |
---|
| 1535 | IFS=":" |
---|
| 1536 | set -f ; set -A v -- - $line ; set +f |
---|
| 1537 | IFS="$IFS_sv" |
---|
| 1538 | addlist=${v[$PRIMARY_SERVER_Field]} |
---|
| 1539 | echo "Primary server is: ${v[$PRIMARY_SERVER_Field]}" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 1540 | if [[ -n ${v[$BACKUP_SERVER_Field]} ]] |
---|
| 1541 | then |
---|
| 1542 | echo "Backup server is: ${v[$BACKUP_SERVER_Field]}" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 1543 | addlist="$addlist ${v[$BACKUP_SERVER_Field]}" |
---|
| 1544 | fi |
---|
| 1545 | fi # end of if [[ $os = "Linux" ]] |
---|
| 1546 | |
---|
| 1547 | echo "\nGetting file system manager information . . .\n" |
---|
| 1548 | |
---|
| 1549 | dev2list=$(getUsedDevices) |
---|
| 1550 | if [[ -n $aflag ]] |
---|
| 1551 | then |
---|
| 1552 | devlist=$dev2list |
---|
| 1553 | fi |
---|
| 1554 | |
---|
| 1555 | $mmlsmgr $devlist 2>/dev/null | $grep -v "^file system" | $grep -v "^\-\-\-\-\-\-" > lsout |
---|
| 1556 | |
---|
| 1557 | if [[ -s lsout ]] |
---|
| 1558 | then |
---|
| 1559 | echo "According to mmlsmgr . . ." | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 1560 | { |
---|
| 1561 | while read line |
---|
| 1562 | do |
---|
| 1563 | if [[ -z $line ]] |
---|
| 1564 | then |
---|
| 1565 | continue |
---|
| 1566 | fi |
---|
| 1567 | fs=$(echo $line | $cut -f1 -d " ") |
---|
| 1568 | manager=$(echo $line | $cut -f2 -d "(" | $cut -f1 -d ")") |
---|
| 1569 | mannum=$(echo $line | $awk '{print $2}') |
---|
| 1570 | echo $mannum | $grep "\." >/dev/null |
---|
| 1571 | if [[ $? = 0 ]] |
---|
| 1572 | then |
---|
| 1573 | mannum=$($grep $mannum $mmsdrfsfile | $grep MEMBER_NODE | $cut -f5 -d ":") |
---|
| 1574 | fi |
---|
| 1575 | if [[ $mannum != "(none" ]] |
---|
| 1576 | then |
---|
| 1577 | nodesetID=$(findNodesetId $mmsdrfsfile $mannum) |
---|
| 1578 | name=$(getNodeInfo $REL_HOSTNAME_Field $NODE_NUMBER_Field $mannum $nodesetID $mmsdrfsfile) |
---|
| 1579 | addlist="$addlist $name" |
---|
| 1580 | else |
---|
| 1581 | name="" |
---|
| 1582 | fi |
---|
| 1583 | print "The manager of $fs is $manager ($name)" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 1584 | done |
---|
| 1585 | } < lsout |
---|
| 1586 | skipone=1 |
---|
| 1587 | else |
---|
| 1588 | print "Couldn't get filesystem manager info from daemon. Trying log files. . ." | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 1589 | fi # end of if [[ -s lsout ]] |
---|
| 1590 | |
---|
| 1591 | $rm lsout 2>/dev/null |
---|
| 1592 | |
---|
| 1593 | echo "\nAssessing file system manager data from logs . . .\n" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 1594 | |
---|
| 1595 | list=$($ls -t $rasDir/mmfs.log.[0-9]* 2>/dev/null) |
---|
| 1596 | for fs2 in $dev2list |
---|
| 1597 | do |
---|
| 1598 | gotlast=0;gotresigned=0;gotappointed=0; |
---|
| 1599 | fs=${fs2#/dev/} |
---|
| 1600 | $rm tmplist 2>/dev/null |
---|
| 1601 | for file in $list |
---|
| 1602 | do |
---|
| 1603 | $grep -nE "Cannot mount file system|unmounted because it|as manager|last file system manager" $file | $grep -nE "$fs|last file system manager" | $sort -nrk 1,1 >> tmplist |
---|
| 1604 | done |
---|
| 1605 | if [[ -s tmplist ]] |
---|
| 1606 | then |
---|
| 1607 | { |
---|
| 1608 | while read line |
---|
| 1609 | do |
---|
| 1610 | echo $line | $grep "last file system manager" >/dev/null |
---|
| 1611 | if [[ $? = 0 ]] |
---|
| 1612 | then |
---|
| 1613 | continue |
---|
| 1614 | fi |
---|
| 1615 | echo $line | $grep -E "unmounted | Cannot mount" >/dev/null |
---|
| 1616 | if [[ $? = 0 ]] |
---|
| 1617 | then |
---|
| 1618 | read line |
---|
| 1619 | fi |
---|
| 1620 | manager=$(echo $line | $cut -f2 -d "(" | $cut -f1 -d ")") |
---|
| 1621 | mannum=$(echo $line | $cut -f1 -d "(") |
---|
| 1622 | echo $line | $grep "last file system manager" >/dev/null |
---|
| 1623 | if [[ $? = 0 ]] |
---|
| 1624 | then |
---|
| 1625 | if [[ $gotlast = 1 ]] |
---|
| 1626 | then |
---|
| 1627 | continue |
---|
| 1628 | fi |
---|
| 1629 | mannum=${mannum#*was node } |
---|
| 1630 | gotlast=1 |
---|
| 1631 | message="failed as" |
---|
| 1632 | read message2 |
---|
| 1633 | else |
---|
| 1634 | echo $line | $grep "resigned as" >/dev/null |
---|
| 1635 | if [[ $? = 0 ]] |
---|
| 1636 | then |
---|
| 1637 | if [[ $gotresigned = 1 ]] |
---|
| 1638 | then |
---|
| 1639 | continue |
---|
| 1640 | fi |
---|
| 1641 | mannum=${mannum#* Node } |
---|
| 1642 | gotresigned=1 |
---|
| 1643 | message="resigned as" |
---|
| 1644 | else |
---|
| 1645 | if [[ $gotappointed = 2 ]] |
---|
| 1646 | then |
---|
| 1647 | continue |
---|
| 1648 | else |
---|
| 1649 | mannum=${mannum#* Node } |
---|
| 1650 | if [[ $gotappointed = 0 ]] |
---|
| 1651 | then |
---|
| 1652 | gotappointed=1 |
---|
| 1653 | message="last appointed" |
---|
| 1654 | else |
---|
| 1655 | gotappointed=2 |
---|
| 1656 | message="2nd last appointed" |
---|
| 1657 | fi |
---|
| 1658 | fi |
---|
| 1659 | fi |
---|
| 1660 | fi # end of if [[ $? = 0 ]] |
---|
| 1661 | echo $mannum | $grep "\." >/dev/null |
---|
| 1662 | if [[ $? = 0 ]] |
---|
| 1663 | then |
---|
| 1664 | mannum=$($grep $mannum $mmsdrfsfile | $grep MEMBER_NODE | $cut -f5 -d ":") |
---|
| 1665 | fi |
---|
| 1666 | nodesetID=$(findNodesetId $mmsdrfsfile $mannum) |
---|
| 1667 | name=$(getNodeInfo $REL_HOSTNAME_Field $NODE_NUMBER_Field $mannum $nodesetID $mmsdrfsFile) |
---|
| 1668 | if [[ $message = "failed as" ]] |
---|
| 1669 | then |
---|
| 1670 | outfile=${BASELOGDIR}/problem.${my_hostname} |
---|
| 1671 | else |
---|
| 1672 | outfile=${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 1673 | fi |
---|
| 1674 | print "$manager ($name) $message manager of $fs" | $tee -a $outfile |
---|
| 1675 | if [[ -n $message2 ]] |
---|
| 1676 | then |
---|
| 1677 | print "$manager ($name) $message manager of $fs" | $tee -a $outfile |
---|
| 1678 | manager="" |
---|
| 1679 | fi |
---|
| 1680 | addlist="$addlist $name" |
---|
| 1681 | done |
---|
| 1682 | } < tmplist |
---|
| 1683 | if [[ $gotlast = 1 && gotresigned = 1 && gotappointed = 2 ]] |
---|
| 1684 | then |
---|
| 1685 | break |
---|
| 1686 | fi |
---|
| 1687 | fi # end of if [[ -s tmplist ]] |
---|
| 1688 | $rm tmplist 2>/dev/null |
---|
| 1689 | done |
---|
| 1690 | |
---|
| 1691 | if [[ $yflag != 1 && $pflag != 1 ]] |
---|
| 1692 | then |
---|
| 1693 | addtolist "$addlist" |
---|
| 1694 | fi |
---|
| 1695 | |
---|
| 1696 | bigtarfile=${BASELOGDIR}/all.${logdate}.tar |
---|
| 1697 | if [[ -s $nodefile ]] |
---|
| 1698 | then |
---|
| 1699 | mysum=$($sum $spath | $cut -f1 -d " ") |
---|
| 1700 | NODESDIR=${BASELOGDIR}/${logdate} |
---|
| 1701 | $mkdir $NODESDIR |
---|
| 1702 | |
---|
| 1703 | node_list=$($cat $nodefile) |
---|
| 1704 | if [[ -n $node_list ]] |
---|
| 1705 | then |
---|
| 1706 | if [[ $x_arg = 1 ]] |
---|
| 1707 | then |
---|
| 1708 | print "\nWould fork gpfs.snap on nodes $node_list\n" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 1709 | else |
---|
| 1710 | print "\nForking gpfs.snap on nodes:\n$node_list\n" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 1711 | fi |
---|
| 1712 | |
---|
| 1713 | dir=/$(echo $BASELOGDIR | $cut -f2 -d "/") |
---|
| 1714 | $mmdsh -F $nodefile "K5MUTE=1 df $dir | tail -n -1 | grep 100%" >dfout & |
---|
| 1715 | waitforit NULL 60 |
---|
| 1716 | list=$($cat dfout | $cut -f1 -d :) 2>/dev/null |
---|
| 1717 | $rm dfout 2>/dev/null |
---|
| 1718 | if [[ -n $list ]] |
---|
| 1719 | then |
---|
| 1720 | print "$dir is 100% full on the following nodes:\n$list\nRemoving from list." | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 1721 | removefromlist "$list" |
---|
| 1722 | fi |
---|
| 1723 | if [[ -s $nodefile ]] |
---|
| 1724 | then |
---|
| 1725 | $mmdsh -F $nodefile K5MUTE=1 mkdir -p ${BASELOGDIR}/${logdate} >/dev/null 2>/dev/null |
---|
| 1726 | $mmdsh -F $nodefile K5MUTE=1 sum $SNAP/gpfs.snap >sumout 2>sumerr |
---|
| 1727 | nnewlist=$($cat sumerr | $cut -f1 -d :) |
---|
| 1728 | newlist2=$($cat sumout | $grep -v "$mysum" | $cut -f1 -d :) |
---|
| 1729 | nnewlist="$nnewlist $newlist2" |
---|
| 1730 | firstone=1 |
---|
| 1731 | $rm -f sumerr sumout $commaFile 2>/dev/null |
---|
| 1732 | for i in $nnewlist |
---|
| 1733 | do |
---|
| 1734 | if [[ $firstone = 1 ]] |
---|
| 1735 | then |
---|
| 1736 | commalist="$i" |
---|
| 1737 | print -- $i > $commaFile |
---|
| 1738 | firstone=0 |
---|
| 1739 | else |
---|
| 1740 | commalist="$commalist,$i" |
---|
| 1741 | print -- $i >> $commaFile |
---|
| 1742 | fi |
---|
| 1743 | done |
---|
| 1744 | if [[ -s $commaFile ]] |
---|
| 1745 | then |
---|
| 1746 | print "There is an outdated or no gpfs.snap in $SNAP on the following nodes:" |
---|
| 1747 | print $commalist |
---|
| 1748 | print "\nAttempting to copy . . .\n" |
---|
| 1749 | if [[ $SNAP != "/usr/lpp/mmfs/bin" ]] |
---|
| 1750 | then |
---|
| 1751 | $mmdsh -F $commaFile K5MUTE=1 mkdir -p $SNAP 2>/dev/null |
---|
| 1752 | fi |
---|
| 1753 | $mmdsh -F $commaFile K5MUTE=1 $rcp $myhname:$spath $spath 2>tmperr |
---|
| 1754 | |
---|
| 1755 | if [[ -s tmperr ]] |
---|
| 1756 | then |
---|
| 1757 | list=$($cat tmperr | $cut -f1 -d ":") |
---|
| 1758 | $cat tmperr |
---|
| 1759 | removefromlist "$list" |
---|
| 1760 | print "copy failed for the following nodes:\n$list\nRemoving them from list." | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 1761 | $cat $nodefile |
---|
| 1762 | fi |
---|
| 1763 | $rm tmperr 2>/dev/null |
---|
| 1764 | fi |
---|
| 1765 | if [[ $x_arg != 2 ]] |
---|
| 1766 | then |
---|
| 1767 | # Question: What is the "node_args" variable for? |
---|
| 1768 | $mmdsh -F $nodefile K5MUTE=1 "$SNAP/gpfs.snap $node_args -d ${BASELOGDIR}/$(hostname | cut -d. -f1)_${logdate} -x 1 -z" >$BASELOGDIR/pass1outfile & |
---|
| 1769 | fi |
---|
| 1770 | fi |
---|
| 1771 | fi # end of if [[ -n $node_list ]] |
---|
| 1772 | fi # end of if [[ -s $nodefile ]] |
---|
| 1773 | |
---|
| 1774 | } #----- end of function do_master_stuff ---------------------- |
---|
| 1775 | |
---|
| 1776 | |
---|
| 1777 | function printAndExit |
---|
| 1778 | { |
---|
| 1779 | [[ -n $DEBUGgpfssnap || -n $DEBUGprintAndExit ]] && set -x |
---|
| 1780 | |
---|
| 1781 | printErrorMsg $1 gpfs.snap $2 |
---|
| 1782 | print "$USAGE" |
---|
| 1783 | exit 1 |
---|
| 1784 | |
---|
| 1785 | } #----- end of function printAndExit ------------------------- |
---|
| 1786 | |
---|
| 1787 | |
---|
| 1788 | function getCurrentStanzaList2 # <outputFile> |
---|
| 1789 | { |
---|
| 1790 | typeset sourceFile="gpfs.snap.sh" |
---|
| 1791 | [[ -n $DEBUGgpfssnap || -n $DEBUGgetCurrentStanzaList2 ]] && set -x |
---|
| 1792 | $mmTRACE_ENTER "$*" |
---|
| 1793 | |
---|
| 1794 | typeset outfile=$1 |
---|
| 1795 | typeset rc=0 |
---|
| 1796 | |
---|
| 1797 | $rm -f $outfile |
---|
| 1798 | |
---|
| 1799 | #------------------------------------------------------------------- |
---|
| 1800 | # Generate a list of the GPFS file systems in /etc/filesystems. |
---|
| 1801 | # The output of the AIX lsfs -c command looks something like this: |
---|
| 1802 | # |
---|
| 1803 | # #MountPoint:Device:Vfs:Nodename:Type:Size:Options:AutoMount:Acct |
---|
| 1804 | # /gpfs/gpfsA:/dev/gpfsA:mmfs:-:mmfs:0:rw:no:no |
---|
| 1805 | # /gpfs/gpfsB:/dev/gpfsB:mmfs:-:mmfs:0:rw:no:no |
---|
| 1806 | #------------------------------------------------------------------- |
---|
| 1807 | set +x |
---|
| 1808 | LC_ALL=C $lsfs -c -v mmfs > $outfile 2>&1 |
---|
| 1809 | rc=$? |
---|
| 1810 | [[ -n $DEBUGgpfssnap || -n $DEBUGgetCurrentStanzaList2 ]] && set -x |
---|
| 1811 | if [[ $rc -ne 0 ]] |
---|
| 1812 | then |
---|
| 1813 | # Check whether this is a 'not found error'. |
---|
| 1814 | $grep -q "unknown vfs type" $outfile |
---|
| 1815 | if [[ $? = 0 ]] |
---|
| 1816 | then |
---|
| 1817 | # 'not found' is acceptable. Reset the return code |
---|
| 1818 | # and create a file with an lsfs header-like line only. |
---|
| 1819 | rc=0 |
---|
| 1820 | print -- "#MountPoint:Device:Vfs:junk" > $outfile |
---|
| 1821 | else |
---|
| 1822 | # If some other error, show the error messages. |
---|
| 1823 | $cat $outfile |
---|
| 1824 | fi |
---|
| 1825 | fi |
---|
| 1826 | |
---|
| 1827 | return $rc |
---|
| 1828 | |
---|
| 1829 | } #----- end of function getCurrentStanzaList2 ---------------- |
---|
| 1830 | |
---|
| 1831 | |
---|
| 1832 | |
---|
| 1833 | ############################################# |
---|
| 1834 | # Mainline processing MAIN main |
---|
| 1835 | ############################################# |
---|
| 1836 | |
---|
| 1837 | args=$@ |
---|
| 1838 | set -A months Yam Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec |
---|
| 1839 | set -A days 0 31 29 31 30 31 30 31 31 30 31 30 31 |
---|
| 1840 | export K5MUTE=1 |
---|
| 1841 | GPFSDIR=/usr/lpp/mmfs/bin |
---|
| 1842 | export PATH=/bin:/usr/bin:/etc:/usr/sbin:/sbin:$GPFSDIR |
---|
| 1843 | |
---|
| 1844 | export LANG=en_US |
---|
| 1845 | export LC_MESSAGES=C |
---|
| 1846 | export LC_TIME=C |
---|
| 1847 | pwd=$(pwd) |
---|
| 1848 | result=$(echo $0 | $grep "^"/) |
---|
| 1849 | if [[ -n $result ]] |
---|
| 1850 | then |
---|
| 1851 | spath=$0 |
---|
| 1852 | else |
---|
| 1853 | spath=${pwd}/$0 |
---|
| 1854 | fi |
---|
| 1855 | SNAP=$(dirname $spath) |
---|
| 1856 | pass=1 |
---|
| 1857 | total_bytes=0 |
---|
| 1858 | max_bytes=0 |
---|
| 1859 | |
---|
| 1860 | BASELOGDIR=/tmp/gpfs.snapOut |
---|
| 1861 | $mkdir ${BASELOGDIR} 2>/dev/null |
---|
| 1862 | logdate=$(date +\%m\%d\%H\%M) |
---|
| 1863 | my_hostname=$($hostname | $cut -d. -f1) |
---|
| 1864 | $rm -rf ${BASELOGDIR}/gpfs.snap_err.*.out 2>/dev/null |
---|
| 1865 | $rm -rf ${BASELOGDIR}/gpfs.snap_info.*.out 2>/dev/null |
---|
| 1866 | $rm ${BASELOGDIR}/problem.${my_hostname} 2>/dev/null |
---|
| 1867 | YAMO=/tmp/yamo |
---|
| 1868 | ODMDIR=/etc/objrepos |
---|
| 1869 | SPENV=0 |
---|
| 1870 | os=$($uname) |
---|
| 1871 | if [[ -f $mmfscfg ]] |
---|
| 1872 | then |
---|
| 1873 | logDir=$($awk '$1 == "logDir" {value = $2} END {print value}' $mmfscfg) |
---|
| 1874 | [[ -n $logDir ]] && rasDir="${logDir}/" |
---|
| 1875 | fi |
---|
| 1876 | [[ -z $rasDir ]] && rasDir=/var/adm/ras |
---|
| 1877 | |
---|
| 1878 | |
---|
| 1879 | if [[ $os = "AIX" ]] |
---|
| 1880 | then |
---|
| 1881 | alevel=$($lslpp -L bos.rte | $grep bos.rte | $awk '{print $2}') |
---|
| 1882 | sp_version=$($lslpp -Lc ssp.basic 2>/dev/null | $grep ssp.basic | $cut -f3 -d :) |
---|
| 1883 | if [[ -n $sp_version ]] |
---|
| 1884 | then |
---|
| 1885 | sp_version=${sp_version%.#} |
---|
| 1886 | SPENV=1 |
---|
| 1887 | my_node_number=$(/usr/lpp/ssp/install/bin/node_number) |
---|
| 1888 | PATH=$PATH:/usr/lpp/ssp/bin:/usr/lpp/csd/bin |
---|
| 1889 | fi |
---|
| 1890 | else |
---|
| 1891 | alevel=$($uname -rv) |
---|
| 1892 | fi # end of if [[ $os = "AIX" ]] |
---|
| 1893 | |
---|
| 1894 | myhname=$($hostname) |
---|
| 1895 | if [[ -a /usr/bin/vmstat ]] |
---|
| 1896 | then |
---|
| 1897 | gotvmstat=1 |
---|
| 1898 | fi |
---|
| 1899 | |
---|
| 1900 | # gpfs.snap [-c "CmdString"] [-d OutputDirectory] [-p] [-x {1 | 2}] [-y | -z] |
---|
| 1901 | # [-a | -W NodeFilename | -w NodeName[,NodeName...] | |
---|
| 1902 | # -n NodeNumber[,NodeNumber...]] |
---|
| 1903 | USAGE=\ |
---|
| 1904 | "Usage:\n"\ |
---|
| 1905 | " gpfs.snap [-c \"CmdString\"] [-d OutputDirectory] [-p] [-x {1 | 2}] [-y | -z]\n"\ |
---|
| 1906 | " [ -a | -W NodeFilename | -w NodeName[,NodeName...] | -n NodeNumber[,NodeNumber...]]" |
---|
| 1907 | |
---|
| 1908 | master=1 |
---|
| 1909 | |
---|
| 1910 | if [[ $arg1 = '-?' || $ARG1 = '-H' || $ARG1 = '--HELP' || $arg1 = '--' ]] |
---|
| 1911 | then |
---|
| 1912 | print $USAGE |
---|
| 1913 | exit 1 |
---|
| 1914 | fi |
---|
| 1915 | |
---|
| 1916 | while getopts :ac:d:Dn:pw:W:x:yz OPT |
---|
| 1917 | do |
---|
| 1918 | case $OPT in |
---|
| 1919 | |
---|
| 1920 | a) [[ -n $aflag ]] && printAndExit 36 "-$OPT" |
---|
| 1921 | aflag="-$OPT" |
---|
| 1922 | all="all" |
---|
| 1923 | if [[ -n $nflag || -n $wflag || -n $Wflag ]] |
---|
| 1924 | then |
---|
| 1925 | [[ -n $nflag ]] && printErrorMsg 191 gpfs.snap "-a" "-n" |
---|
| 1926 | [[ -n $wflag ]] && printErrorMsg 191 gpfs.snap "-a" "-w" |
---|
| 1927 | [[ -n $Wflag ]] && printErrorMsg 191 gpfs.snap "-a" "-W" |
---|
| 1928 | print $USAGE |
---|
| 1929 | exit 1 |
---|
| 1930 | fi |
---|
| 1931 | ;; |
---|
| 1932 | |
---|
| 1933 | c) [[ -n $cflag ]] && printAndExit 36 "-$OPT" |
---|
| 1934 | cflag="-$OPT" |
---|
| 1935 | cmdString="$OPTARG" |
---|
| 1936 | ;; |
---|
| 1937 | |
---|
| 1938 | d) d_argument=$OPTARG |
---|
| 1939 | $mkdir -p $d_argument 2>/dev/null & |
---|
| 1940 | waitforit |
---|
| 1941 | if [[ $? = 1 ]] |
---|
| 1942 | then |
---|
| 1943 | print -u2 "Write to $d_argument timed out. Choose another directory or take the default (/tmp)" |
---|
| 1944 | exit 1 |
---|
| 1945 | fi |
---|
| 1946 | BASELOGDIR=$d_argument |
---|
| 1947 | $rm -rf ${BASELOGDIR}/gpfs.snap_err.*.out 2>/dev/null |
---|
| 1948 | $rm -rf ${BASELOGDIR}/gpfs.snap_info.*.out 2>/dev/null |
---|
| 1949 | $rm ${BASELOGDIR}/problem.${my_hostname} 2>/dev/null |
---|
| 1950 | $mv /tmp/gpfs.snapOut/${BASELOGDIR}/gpfs.snap_err.${logdate}.out ${BASELOGDIR} 2>/dev/null |
---|
| 1951 | ;; |
---|
| 1952 | |
---|
| 1953 | D) DEBUGgpfssnap=1 |
---|
| 1954 | exec 2>/tmp/gpfs.snap.debug |
---|
| 1955 | set -x |
---|
| 1956 | echo "Writing debug data and redirecting stderr to /tmp/gpfs.snap.debug" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 1957 | ;; |
---|
| 1958 | |
---|
| 1959 | n) [[ -n $nflag ]] && printAndExit 36 "-$OPT" |
---|
| 1960 | nflag="-$OPT" |
---|
| 1961 | nodenums="$OPTARG" |
---|
| 1962 | if [[ -n $aflag || -n $Wflag ]] |
---|
| 1963 | then |
---|
| 1964 | [[ -n $aflag ]] && printErrorMsg 191 gpfs.snap "-n" "-a" |
---|
| 1965 | [[ -n $Wflag ]] && printErrorMsg 191 gpfs.snap "-n" "-W" |
---|
| 1966 | print $USAGE |
---|
| 1967 | exit 1 |
---|
| 1968 | fi |
---|
| 1969 | ;; |
---|
| 1970 | |
---|
| 1971 | p) pflag=1 |
---|
| 1972 | ;; |
---|
| 1973 | |
---|
| 1974 | w) [[ -n $wflag ]] && printAndExit 36 "-$OPT" |
---|
| 1975 | wflag="-$OPT" |
---|
| 1976 | nodenames="$OPTARG" |
---|
| 1977 | if [[ -n $aflag || -n Wflag ]] |
---|
| 1978 | then |
---|
| 1979 | [[ -n $aflag ]] && printErrorMsg 191 gpfs.snap "-w" "-a" |
---|
| 1980 | [[ -n $Wflag ]] && printErrorMsg 191 gpfs.snap "-w" "-W" |
---|
| 1981 | print $USAGE |
---|
| 1982 | exit 1 |
---|
| 1983 | fi |
---|
| 1984 | ;; |
---|
| 1985 | |
---|
| 1986 | W) [[ -n $Wflag ]] && printAndExit 36 "-$OPT" |
---|
| 1987 | Wflag="-$OPT" |
---|
| 1988 | wcoll="$OPTARG" |
---|
| 1989 | if [[ -n $aflag || -n $nflag || -n $wflag ]] |
---|
| 1990 | then |
---|
| 1991 | [[ -n $aflag ]] && printErrorMsg 191 gpfs.snap "-W" "-a" |
---|
| 1992 | [[ -n $nflag ]] && printErrorMsg 191 gpfs.snap "-W" "-n" |
---|
| 1993 | [[ -n $wflag ]] && printErrorMsg 191 gpfs.snap "-W" "-w" |
---|
| 1994 | print $USAGE |
---|
| 1995 | exit 1 |
---|
| 1996 | fi |
---|
| 1997 | ;; |
---|
| 1998 | |
---|
| 1999 | x) xflag=1 |
---|
| 2000 | x_arg=$OPTARG |
---|
| 2001 | if [[ $x_arg != 1 ]] && [[ $x_arg != 2 ]] |
---|
| 2002 | then |
---|
| 2003 | print "Illegal argument to option x: $x_arg" |
---|
| 2004 | print "$USAGE" |
---|
| 2005 | exit 1 |
---|
| 2006 | fi |
---|
| 2007 | ;; |
---|
| 2008 | |
---|
| 2009 | y) yflag=1 |
---|
| 2010 | if [[ $zflag = 1 ]] |
---|
| 2011 | then |
---|
| 2012 | printErrorMsg 191 gpfs.snap y z |
---|
| 2013 | print "$USAGE" |
---|
| 2014 | exit 1 |
---|
| 2015 | fi |
---|
| 2016 | ;; |
---|
| 2017 | |
---|
| 2018 | z) zflag=1 |
---|
| 2019 | if [[ $yflag = 1 ]] |
---|
| 2020 | then |
---|
| 2021 | printErrorMsg 191 gpfs.snap y z |
---|
| 2022 | print "$USAGE" |
---|
| 2023 | exit 1 |
---|
| 2024 | fi |
---|
| 2025 | master=0 |
---|
| 2026 | ;; |
---|
| 2027 | |
---|
| 2028 | :) printAndExit 204 $OPTARG |
---|
| 2029 | ;; |
---|
| 2030 | |
---|
| 2031 | +[acdDnpwWxyz]) |
---|
| 2032 | printAndExit 13 "$OPT" |
---|
| 2033 | ;; |
---|
| 2034 | |
---|
| 2035 | *) printAndExit 13 $OPTARG |
---|
| 2036 | ;; |
---|
| 2037 | |
---|
| 2038 | esac |
---|
| 2039 | done # end of while getopts do |
---|
| 2040 | |
---|
| 2041 | shift OPTIND-1 |
---|
| 2042 | [[ $# != 0 ]] && printAndExit 38 $1 |
---|
| 2043 | |
---|
| 2044 | # If no node selection option was specified, default to -a. |
---|
| 2045 | [[ -z $aflag && -z $nflag && -z $wflag && -z $Wflag ]] && \ |
---|
| 2046 | aflag="-a" |
---|
| 2047 | |
---|
| 2048 | |
---|
| 2049 | ######################################################################## |
---|
| 2050 | # Set up trap exception handling and call the gpfsInit function. |
---|
| 2051 | # It will ensure that the local copy of the mmsdrfs and the rest of the |
---|
| 2052 | # GPFS system files are up-to-date. There is no need to lock the sdr. |
---|
| 2053 | ######################################################################## |
---|
| 2054 | trap pretrap2 HUP INT QUIT KILL |
---|
| 2055 | gpfsInitOutput=$(gpfsInit nolock) |
---|
| 2056 | setGlobalVar $? $gpfsInitOutput |
---|
| 2057 | |
---|
| 2058 | |
---|
| 2059 | ###################################################### |
---|
| 2060 | # Create a file with the names of all affected nodes. |
---|
| 2061 | ###################################################### |
---|
| 2062 | $rm -f $nodefile 2>/dev/null |
---|
| 2063 | $touch -f $nodefile |
---|
| 2064 | |
---|
| 2065 | if [[ -n $aflag ]] |
---|
| 2066 | then |
---|
| 2067 | # Get a list of the nodes that belong to the cluster. |
---|
| 2068 | getNodeList $REL_HOSTNAME_Field $GLOBAL_ID $mmsdrfsFile > $nodefile |
---|
| 2069 | |
---|
| 2070 | # If there are no nodes, issue an appropriate message and return. |
---|
| 2071 | if [[ ! -s $nodefile ]] |
---|
| 2072 | then |
---|
| 2073 | print -u2 "$mmcmd: There are no known GPFS nodes." |
---|
| 2074 | exit 1 |
---|
| 2075 | fi |
---|
| 2076 | elif [[ -n $Wflag ]] |
---|
| 2077 | then |
---|
| 2078 | # Verify input file is readable. |
---|
| 2079 | if [[ ! -f $wcoll || ! -r $wcoll ]] |
---|
| 2080 | then |
---|
| 2081 | printErrorMsg 43 $mmcmd $wcoll |
---|
| 2082 | exit 1 |
---|
| 2083 | fi |
---|
| 2084 | |
---|
| 2085 | # Filter out comment lines and localhost entries. |
---|
| 2086 | $grep -v -e "localhost" -e "^#" "$wcoll" > $nodefile |
---|
| 2087 | if [[ ! -s $nodefile ]] |
---|
| 2088 | then |
---|
| 2089 | # No node names specified |
---|
| 2090 | printErrorMsg 328 $mmcmd $wcoll |
---|
| 2091 | exit 1 |
---|
| 2092 | fi |
---|
| 2093 | else |
---|
| 2094 | # Either no option was specified, or we have some combination of -w and -n. |
---|
| 2095 | |
---|
| 2096 | # Convert the node names list into a file. |
---|
| 2097 | for i in $(print $nodenames | $tr "," " ") |
---|
| 2098 | do |
---|
| 2099 | print $i >> $nodefile |
---|
| 2100 | done |
---|
| 2101 | |
---|
| 2102 | # Convert the node number list into node names |
---|
| 2103 | # and append the names to the file. |
---|
| 2104 | for i in $(print $nodenums | $tr "," " ") |
---|
| 2105 | do |
---|
| 2106 | nodeName=$(getNodeInfo \ |
---|
| 2107 | $REL_HOSTNAME_Field $NODE_NUMBER_Field $i $GLOBAL_ID $mmsdrfsFile) |
---|
| 2108 | if [[ -n $nodeName ]] |
---|
| 2109 | then |
---|
| 2110 | print $nodeName >> $nodefile |
---|
| 2111 | else |
---|
| 2112 | # Node number is not in cluster |
---|
| 2113 | printErrorMsg 352 $mmcmd $i |
---|
| 2114 | fi |
---|
| 2115 | done |
---|
| 2116 | |
---|
| 2117 | # If none of the node numbers resolved correctly, give up. |
---|
| 2118 | [[ ! -s $nodefile && ( -n $nodenames || -n $nodenums ) ]] && exit 1 |
---|
| 2119 | |
---|
| 2120 | fi # end of if [[ -n $aflag ]] |
---|
| 2121 | |
---|
| 2122 | if [[ $master = 1 && -z $cflag ]] |
---|
| 2123 | then |
---|
| 2124 | checklist |
---|
| 2125 | SUBDIR=${my_hostname}.master.${logdate} |
---|
| 2126 | else |
---|
| 2127 | SUBDIR=${my_hostname}.${logdate} |
---|
| 2128 | fi |
---|
| 2129 | tarfile=${BASELOGDIR}/gpfs.snap.${SUBDIR}.out.tar |
---|
| 2130 | LOGDIR=${BASELOGDIR}/${SUBDIR} |
---|
| 2131 | |
---|
| 2132 | if [[ -z $cflag ]] |
---|
| 2133 | then |
---|
| 2134 | echo "$SNAP/gpfs.snap version $VERSION started at $starttime with args:\n$args\n" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 2135 | echo "My process id is $$" |
---|
| 2136 | fi |
---|
| 2137 | |
---|
| 2138 | mypgid=$($ps -p $$ -o pgid=PGID | $tail -n -1) |
---|
| 2139 | |
---|
| 2140 | # Check the output directory for space. |
---|
| 2141 | basedir=/$(echo $BASELOGDIR | $cut -f2 -d "/") |
---|
| 2142 | if [[ $os = "AIX" ]] |
---|
| 2143 | then |
---|
| 2144 | $df $basedir | $tail -n -1 | $awk '{print $4}' >/tmp/yamo & |
---|
| 2145 | else |
---|
| 2146 | $df $basedir | $tail -n -1 | $awk '{print $5}' >/tmp/yamo & |
---|
| 2147 | fi |
---|
| 2148 | waitforit "$df $basedir" 60 |
---|
| 2149 | if [[ $? = 1 ]] |
---|
| 2150 | then |
---|
| 2151 | print -u2 "df on $basedir timed out. Solve the problem with $basedir or specify a different directory with -d." |
---|
| 2152 | exit 1 |
---|
| 2153 | fi |
---|
| 2154 | per=$($cat /tmp/yamo) |
---|
| 2155 | if [[ $per = 100% ]] |
---|
| 2156 | then |
---|
| 2157 | print -u2 "$basedir is 100% full. Specify a different directory with -d or clear space." |
---|
| 2158 | exit 1 |
---|
| 2159 | fi |
---|
| 2160 | $mkdir -p ${LOGDIR} |
---|
| 2161 | cd $BASELOGDIR |
---|
| 2162 | |
---|
| 2163 | mmsdrfsfile=/var/mmfs/gen/mmsdrfs |
---|
| 2164 | |
---|
| 2165 | # If the node does not belong to a GPFS cluster, go away quietly. |
---|
| 2166 | if [[ ! -f $mmsdrfsfile ]] |
---|
| 2167 | then |
---|
| 2168 | print -u2 "The node does not belong to a GPFS cluster ($mmsdrfsfile does not exist). Exiting." |
---|
| 2169 | return 0 |
---|
| 2170 | fi |
---|
| 2171 | |
---|
| 2172 | determineMode |
---|
| 2173 | getLocalNodeData |
---|
| 2174 | mygnum=$ourNodeNumber |
---|
| 2175 | mygname=$ourNodeName |
---|
| 2176 | |
---|
| 2177 | mynodeset=$(findNodesetId $mmsdrfsfile $mygnum) |
---|
| 2178 | |
---|
| 2179 | if [[ $mynodeset = "%%home%%" ]] |
---|
| 2180 | then |
---|
| 2181 | mynodeset2=$($grep clusterName $mmsdrfsfile | $grep %%home%% | $cut -f2 -d " " | $cut -f1 -d ":") |
---|
| 2182 | else |
---|
| 2183 | mynodeset2=$mynodeset |
---|
| 2184 | fi |
---|
| 2185 | |
---|
| 2186 | if [[ -z $mygname ]] |
---|
| 2187 | then |
---|
| 2188 | $grep MEMBER_NODE $mmsdrfsfile >/tmp/mmsdrfs2.tmp |
---|
| 2189 | { |
---|
| 2190 | while read line |
---|
| 2191 | do |
---|
| 2192 | rhname=$(echo $line | $cut -f8 -d ":") |
---|
| 2193 | addr=$($ping -c1 -w5 $rhname | $head -n 1 | $cut -f2 -d "(" | $cut -f1 -d ")") |
---|
| 2194 | ilist=$(netstat -i | $awk '{print $1}' | $grep -v -E "Iface|Kernel|Name") |
---|
| 2195 | for i in $ilist |
---|
| 2196 | do |
---|
| 2197 | $ifconfig $i | $grep $addr >/dev/null |
---|
| 2198 | if [[ $? = 0 ]] |
---|
| 2199 | then |
---|
| 2200 | mynodeset=$(echo $line | $cut -f1 -d :) |
---|
| 2201 | mygname=$(echo $line | $cut -f6 -d :) |
---|
| 2202 | mygnum=$(echo $line | $cut -f5 -d :) |
---|
| 2203 | break |
---|
| 2204 | fi |
---|
| 2205 | done |
---|
| 2206 | if [[ -n $mygname ]] |
---|
| 2207 | then |
---|
| 2208 | break |
---|
| 2209 | fi |
---|
| 2210 | done |
---|
| 2211 | } < /tmp/mmsdrfs2.tmp |
---|
| 2212 | $rm /tmp/mmsdrfs2.tmp 2>/dev/null |
---|
| 2213 | fi # end of if [[ -z $mygname ]] |
---|
| 2214 | |
---|
| 2215 | if [[ $os = "AIX" ]] |
---|
| 2216 | then |
---|
| 2217 | [[ -z $cflag ]] && \ |
---|
| 2218 | echo "I am hostname $myhname running AIX level $alevel" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 2219 | gpfs_version=$($lslpp -Lc gpfs.base 2>/dev/null | $grep gpfs.base | $cut -f3 -d :) |
---|
| 2220 | gpfs_version=${gpfs_version%.#} |
---|
| 2221 | if [[ -z $gpfs_version ]] |
---|
| 2222 | then |
---|
| 2223 | gpfs_version=$($lslpp -Lc mmfs.base.rte 2>/dev/null | $grep mmfs.base.rte | $cut -f3 -d :) |
---|
| 2224 | gpfs_version=${gpfs_version%.#} |
---|
| 2225 | fi |
---|
| 2226 | fi # end of if [[ $os = "AIX" ]] |
---|
| 2227 | |
---|
| 2228 | if [[ $SPENV = 1 ]] |
---|
| 2229 | then |
---|
| 2230 | [[ -z $cflag ]] && \ |
---|
| 2231 | echo "I am SP node $my_node_number running $sp_version" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 2232 | fi |
---|
| 2233 | |
---|
| 2234 | if [[ $os = "Linux" ]] |
---|
| 2235 | then |
---|
| 2236 | [[ -z $cflag ]] && \ |
---|
| 2237 | echo "I am $myhname running Linux level $alevel" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 2238 | gpfs_version=$($rpm -q gpfs.base | $awk 'BEGIN{FS="-"} {print $2"-"$3}') |
---|
| 2239 | fi |
---|
| 2240 | |
---|
| 2241 | if [[ -z $gpfs_version ]] |
---|
| 2242 | then |
---|
| 2243 | print -u2 "\nGPFS does not appear to be installed on this machine." |
---|
| 2244 | $rm -r ${LOGDIR} 2>/dev/null |
---|
| 2245 | exit 1 |
---|
| 2246 | fi |
---|
| 2247 | |
---|
| 2248 | rel1=$(echo $gpfs_version | $cut -f1 -d ".") |
---|
| 2249 | rel2=$(echo $gpfs_version | $cut -f2 -d ".") |
---|
| 2250 | |
---|
| 2251 | if [[ $rel1 -gt 2 ]] || [[ $rel1 -eq 2 && $rel2 -ge 3 ]] |
---|
| 2252 | then |
---|
| 2253 | groupname="cluster" |
---|
| 2254 | else |
---|
| 2255 | groupname="nodeset" |
---|
| 2256 | fi |
---|
| 2257 | |
---|
| 2258 | [[ -z $cflag ]] && \ |
---|
| 2259 | echo "I am gpfs node $mygname number $mygnum in $groupname $mynodeset2 running GPFS version $gpfs_version" | $tee -a ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 2260 | |
---|
| 2261 | dumpdir=$($cat /var/mmfs/etc/mmfs.cfg | $grep "^dataStructureDump " | $head -1 | $awk '{print $2}') |
---|
| 2262 | if [[ -z $dumpdir ]] |
---|
| 2263 | then |
---|
| 2264 | dumpdir="/tmp/mmfs" |
---|
| 2265 | fi |
---|
| 2266 | |
---|
| 2267 | # If the -c "run this command string on the nodes" option was specified, |
---|
| 2268 | # just collect the data, cleanup temporary files, and exit early. |
---|
| 2269 | if [[ -n $cflag ]] |
---|
| 2270 | then |
---|
| 2271 | $mmdsh -F $nodefile K5MUTE=1 ksh -c \"PATH=$PATH $cmdString\" 2>/dev/null |
---|
| 2272 | $rm -r ${LOGDIR} 2>/dev/null |
---|
| 2273 | $rm -f $nodefile 2>/dev/null |
---|
| 2274 | $rm -f $LOCAL_FILES 2>/dev/null |
---|
| 2275 | $rm /tmp/hostfile 2>/dev/null |
---|
| 2276 | exit 0 |
---|
| 2277 | fi # end of if [[ -n $cflag ]] |
---|
| 2278 | |
---|
| 2279 | # Collect data on the master node if so desired. |
---|
| 2280 | if [[ $master = 1 ]] |
---|
| 2281 | then |
---|
| 2282 | if [[ $pflag != 1 ]] |
---|
| 2283 | then |
---|
| 2284 | print "Checking configuration files . . ." |
---|
| 2285 | nlist2=$($cat $nodefile2) |
---|
| 2286 | check_fs $nodefile2 "$nlist2" |
---|
| 2287 | brcFile=$trcFile |
---|
| 2288 | bglist=$tglist |
---|
| 2289 | |
---|
| 2290 | check_files /var/mmfs/gen/mmsdrfs $nodefile2 |
---|
| 2291 | |
---|
| 2292 | print "Checking for waiters . . ." |
---|
| 2293 | check_waiters |
---|
| 2294 | |
---|
| 2295 | if [[ $os = "Linux" ]] |
---|
| 2296 | then |
---|
| 2297 | set +x |
---|
| 2298 | getCurrentStanzaList stanzafile |
---|
| 2299 | [[ $DEBUGgpfssnap = 1 ]] && set -x |
---|
| 2300 | else |
---|
| 2301 | getCurrentStanzaList2 stanzafile |
---|
| 2302 | fi |
---|
| 2303 | fslist=$($cat stanzafile | $cut -f2 -d :) |
---|
| 2304 | fslist=${fslist#Device} |
---|
| 2305 | $rm stanzafile 2>/dev/null |
---|
| 2306 | fi # end of if [[ $pflag != 1 ]] |
---|
| 2307 | |
---|
| 2308 | do_master_stuff |
---|
| 2309 | |
---|
| 2310 | firstone=1 |
---|
| 2311 | $rm -f $nodefilecFile |
---|
| 2312 | list=$($cat $nodefile) |
---|
| 2313 | for i in $list |
---|
| 2314 | do |
---|
| 2315 | if [[ $firstone = 1 ]] |
---|
| 2316 | then |
---|
| 2317 | nodefilelist=$i |
---|
| 2318 | print -- $i > $nodefilecFile |
---|
| 2319 | firstone=0 |
---|
| 2320 | else |
---|
| 2321 | nodefilelist="$nodefilelist $i" |
---|
| 2322 | print -- $i >> $nodefilecFile |
---|
| 2323 | fi |
---|
| 2324 | done |
---|
| 2325 | check_fs $nodefilecFile "$nodefilelist" "$hostarray" |
---|
| 2326 | rlist=$trlist |
---|
| 2327 | rcFile=$trcFile |
---|
| 2328 | glist=$tglist |
---|
| 2329 | fi # end of if [[ $master = 1 ]] |
---|
| 2330 | |
---|
| 2331 | all="-1" |
---|
| 2332 | check_dumps internaldump $all $dumpdir |
---|
| 2333 | check_dumps trcrpt $all "/tmp/mmfs" |
---|
| 2334 | |
---|
| 2335 | if [[ $x_arg = 2 ]] |
---|
| 2336 | then |
---|
| 2337 | pass=2 |
---|
| 2338 | else |
---|
| 2339 | print "\nDetermining whether there is enough space in ${BASELOGDIR} . . .\n" |
---|
| 2340 | fi |
---|
| 2341 | |
---|
| 2342 | #export mmdshCommandsFile=${BASELOGDIR}/commandfile |
---|
| 2343 | |
---|
| 2344 | while [[ $pass -le 2 ]] |
---|
| 2345 | do |
---|
| 2346 | print "Processing log files . . ." |
---|
| 2347 | get_files |
---|
| 2348 | if [[ $master = 1 && $pass = 2 ]] |
---|
| 2349 | then |
---|
| 2350 | if [[ -s $rcFile ]] |
---|
| 2351 | then |
---|
| 2352 | $mmdsh -F $rcFile K5MUTE=1 cat ${BASELOGDIR}/${logdate}/problem.\* 2>/dev/null | tee -a ${BASELOGDIR}/problem.${my_hostname} |
---|
| 2353 | fi |
---|
| 2354 | for i in $glist |
---|
| 2355 | do |
---|
| 2356 | $cat ${BASELOGDIR}/${logdate}/problem.$i 2>/dev/null | $tee -a ${BASELOGDIR}/problem.${my_hostname} |
---|
| 2357 | done |
---|
| 2358 | # Question: What is the "node_args" variable for? |
---|
| 2359 | $mmdsh -F $nodefile K5MUTE=1 $SNAP/gpfs.snap $node_args -d ${BASELOGDIR}/$(hostname | cut -d. -f1)_${logdate} -x 2 -z >/dev/null 2>/dev/null & |
---|
| 2360 | fi |
---|
| 2361 | |
---|
| 2362 | if [[ $pass = 1 && $gotvmstat = 1 ]] |
---|
| 2363 | then |
---|
| 2364 | ksh -c "vmstat 5 5" > ${LOGDIR}/vmstat_5_5 & |
---|
| 2365 | fi |
---|
| 2366 | |
---|
| 2367 | if [[ -s ${LOGDIR}/long_waiters.sorted ]] |
---|
| 2368 | then |
---|
| 2369 | if [[ $pass = 1 ]] |
---|
| 2370 | then |
---|
| 2371 | size=$($ls -l ${LOGDIR}/long_waiters.sorted | $awk '{print $5}') |
---|
| 2372 | addit $size "${LOGDIR}/long_waiters.sorted" |
---|
| 2373 | else |
---|
| 2374 | tarit long_waiters.sorted |
---|
| 2375 | fi |
---|
| 2376 | fi |
---|
| 2377 | |
---|
| 2378 | if [[ $master = 1 && $pflag != 1 ]] |
---|
| 2379 | then |
---|
| 2380 | doit "dump_list" "$mmdsh -F $nodefile2 ls -l $dumpdir/internaldump\* 2>/dev/null" |
---|
| 2381 | |
---|
| 2382 | print "Processing waiters . . ." |
---|
| 2383 | check_waiters2 |
---|
| 2384 | print "Processing configuration files . . ." |
---|
| 2385 | check_files2 |
---|
| 2386 | fi # end of if [[ $master = 1 && $pflag != 1 ]] |
---|
| 2387 | |
---|
| 2388 | print "Running mm commands . . ." |
---|
| 2389 | if [[ $master = 1 || $yflag = 1 ]] |
---|
| 2390 | then |
---|
| 2391 | doit "mmlsconfig" "/usr/lpp/mmfs/bin/mmlsconfig" |
---|
| 2392 | doit "mmlsmgr" "/usr/lpp/mmfs/bin/mmlsmgr" |
---|
| 2393 | doit "mmlsnode_a" "/usr/lpp/mmfs/bin/mmlsnode -a" |
---|
| 2394 | doit "mmgetstate_a" "/usr/lpp/mmfs/bin/mmgetstate -a" |
---|
| 2395 | doit "tsstatus" "tsstatus" |
---|
| 2396 | # need full pathname for some of these for waitforit to handle properly |
---|
| 2397 | for i in $fslist |
---|
| 2398 | do |
---|
| 2399 | doit "mmdf" "/usr/lpp/mmfs/bin/mmdf $i -q" 1 |
---|
| 2400 | doit "mmlsfs" "/usr/lpp/mmfs/bin/mmlsfs $i" 1 |
---|
| 2401 | doit "mmlsdisk" "/usr/lpp/mmfs/bin/mmlsdisk $i -L" 1 |
---|
| 2402 | doit "mmlspolicy" "/usr/lpp/mmfs/bin/mmlspolicy $i" 1 |
---|
| 2403 | doit "mmlspolicy" "/usr/lpp/mmfs/bin/mmlspolicy $i -L" 1 |
---|
| 2404 | doit "mmlsfileset" "/usr/lpp/mmfs/bin/mmlsfileset $i" 1 |
---|
| 2405 | doit "mmlsfileset" "/usr/lpp/mmfs/bin/mmlsfileset $i -L" 1 |
---|
| 2406 | doit "mmlssnapshot" "/usr/lpp/mmfs/bin/mmlssnapshot $i -d -Q" 1 |
---|
| 2407 | done |
---|
| 2408 | doit "mmlscluster" "mmlscluster" |
---|
| 2409 | doit "mmlsnsd" "mmlsnsd -L" 1 |
---|
| 2410 | doit "mmlsnsd" "mmlsnsd -X" |
---|
| 2411 | doit "mmremotecluster" "mmremotecluster show all" |
---|
| 2412 | doit "mmremotefs" "mmremotefs show all" |
---|
| 2413 | doit "mmauth" "mmauth show" |
---|
| 2414 | fi # end of if [[ $master = 1 || $yflag = 1 ]] |
---|
| 2415 | |
---|
| 2416 | # Be careful not to dump live data that may assert or segfault. |
---|
| 2417 | # We can always ask for additional data later. |
---|
| 2418 | print "Processing dumps . . ." |
---|
| 2419 | doit "mmfsadm_dump_some" "mmfsadm dump version" 1 |
---|
| 2420 | doit "mmfsadm_dump_some" "mmfsadm dump waiters" 1 |
---|
| 2421 | doit "mmfsadm_dump_some" "mmfsadm dump cfgmgr" 1 |
---|
| 2422 | doit "mmfsadm_dump_some" "mmfsadm dump tscomm" 1 |
---|
| 2423 | doit "mmfsadm_dump_some" "mmfsadm dump config" 1 |
---|
| 2424 | doit "mmfsadm_dump_some" "mmfsadm dump mutex" 1 |
---|
| 2425 | doit "mmfsadm_dump_some" "mmfsadm dump sgmgr" 1 |
---|
| 2426 | doit "mmfsadm_dump_some" "mmfsadm dump stripe" 1 |
---|
| 2427 | doit "mmfsadm_dump_some" "mmfsadm dump malloc" 1 |
---|
| 2428 | doit "mmfsadm_dump_some" "mmfsadm dump fs" 1 |
---|
| 2429 | doit "mmfsadm_dump_some" "mmfsadm dump mmap" 1 |
---|
| 2430 | doit "mmfsadm_dump_some" "mmfsadm dump nsd" 1 |
---|
| 2431 | doit "mmfsadm_dump_some" "mmfsadm dump disk" 1 |
---|
| 2432 | doit "mmfsadm_dump_some" "mmfsadm dump alloc stats" 1 |
---|
| 2433 | doit "mmfsadm_dump_some" "mmfsadm dump alloc hist" 1 |
---|
| 2434 | doit "mmfsadm_dump_some" "mmfsadm dump dealloc stats" 1 |
---|
| 2435 | doit "mmfsadm_dump_some" "mmfsadm dump allocmgr" 1 |
---|
| 2436 | doit "mmfsadm_dump_some" "mmfsadm dump allocmgr stats" 1 |
---|
| 2437 | doit "mmfsadm_dump_some" "mmfsadm dump allocmgr hist" |
---|
| 2438 | print "Processing common files . . ." |
---|
| 2439 | get_always |
---|
| 2440 | print "Processing network info . . ." |
---|
| 2441 | get_net_stuff |
---|
| 2442 | print "Processing lvm info . . ." |
---|
| 2443 | get_lvm_stuff |
---|
| 2444 | |
---|
| 2445 | if [[ $os = "AIX" ]] |
---|
| 2446 | then |
---|
| 2447 | console=$(/usr/sbin/lscons) |
---|
| 2448 | if [[ -f $console ]] |
---|
| 2449 | then |
---|
| 2450 | if [[ $pass = 1 ]] |
---|
| 2451 | then |
---|
| 2452 | temp_bytes=$($ls -l $console | $awk '{ print $5 }') |
---|
| 2453 | addit $temp_bytes "$console" |
---|
| 2454 | else |
---|
| 2455 | $cp $console ${LOGDIR}/console |
---|
| 2456 | tarit "console" |
---|
| 2457 | fi |
---|
| 2458 | fi |
---|
| 2459 | fi # end of if [[ $os = "AIX" ]] |
---|
| 2460 | |
---|
| 2461 | print "Processing miscellaneous files . . ." |
---|
| 2462 | get_files_list "/etc" "fstab filesystems trcfmt syslog.conf" |
---|
| 2463 | get_files_dir "/var/mmfs/etc" |
---|
| 2464 | get_files_dir "/var/mmfs/gen" |
---|
| 2465 | get_files_dir "/var/mmfs/ssl" |
---|
| 2466 | get_files_dir "/var/mmfs/ssl/stage" |
---|
| 2467 | get_files_dir "/var/mmfs/tmp" |
---|
| 2468 | get_files_list "$dumpdir" "$internal_list" internaldumps |
---|
| 2469 | savedir=$(pwd) |
---|
| 2470 | cd /var/log 2>/dev/null |
---|
| 2471 | mlist=$($ls messages* 2>/dev/null) |
---|
| 2472 | cd $savedir |
---|
| 2473 | if [[ -n $mlist ]] |
---|
| 2474 | then |
---|
| 2475 | get_files_list "/var/log" "$mlist" |
---|
| 2476 | fi |
---|
| 2477 | mlist="" |
---|
| 2478 | [[ -s /usr/lpp/mmfs/bin/mmfslinux ]] && mlist="$mlist mmfslinux" |
---|
| 2479 | [[ -s /usr/lpp/mmfs/bin/mmfs26 ]] && mlist="$mlist mmfs26" |
---|
| 2480 | [[ -s /usr/lpp/mmfs/bin/mmfs24 ]] && mlist="$mlist mmfs24" |
---|
| 2481 | [[ -s /usr/lpp/mmfs/bin/mmfs ]] && mlist="$mlist mmfs" |
---|
| 2482 | if [[ -n $mlist ]] |
---|
| 2483 | then |
---|
| 2484 | get_files_list "/usr/lpp/mmfs/bin" "$mlist" |
---|
| 2485 | fi |
---|
| 2486 | |
---|
| 2487 | # Get info for whatever group services/topopology services pairs are running. |
---|
| 2488 | if [[ $SPENV = 1 && $my_node_number = 0 ]] |
---|
| 2489 | then |
---|
| 2490 | syspar=$(/usr/lpp/ssp/bin/spget_syspar -n) |
---|
| 2491 | syspar=".$syspar" |
---|
| 2492 | fi |
---|
| 2493 | if [[ $SPENV = 1 ]] |
---|
| 2494 | then |
---|
| 2495 | doit "lssrc_rvsd" "lssrc -g rvsd" 1 |
---|
| 2496 | doit "lsvsd_l" "lsvsd -l" |
---|
| 2497 | if [[ -s ./${SUBDIR}/mmsdrfs2 ]] |
---|
| 2498 | then |
---|
| 2499 | if [[ $pass = 1 ]] |
---|
| 2500 | then |
---|
| 2501 | temp_bytes=$($ls -l ./${SUBDIR}/mmsdrfs2 | $awk '{ print $5 }') |
---|
| 2502 | addit $temp_bytes "mmsdrfs2" |
---|
| 2503 | else |
---|
| 2504 | tarit "mmsdrfs2" |
---|
| 2505 | fi |
---|
| 2506 | fi |
---|
| 2507 | fi # end of if [[ $SPENV = 1 ]] |
---|
| 2508 | |
---|
| 2509 | if [[ $master = 1 ]] |
---|
| 2510 | then |
---|
| 2511 | if [[ $pass = 1 ]] |
---|
| 2512 | then |
---|
| 2513 | print "Waiting for remote nodes to report space requirements . . ." |
---|
| 2514 | else |
---|
| 2515 | print "Waiting for remote nodes to collect data . . ." |
---|
| 2516 | fi |
---|
| 2517 | wait |
---|
| 2518 | fi |
---|
| 2519 | |
---|
| 2520 | if [[ $pass = 1 ]] |
---|
| 2521 | then |
---|
| 2522 | if [[ $gotvmstat = 1 ]] |
---|
| 2523 | then |
---|
| 2524 | (( tmpval = $( vmstat | $wc -c ) * 5 )) |
---|
| 2525 | addit $tmpval |
---|
| 2526 | fi |
---|
| 2527 | check_space |
---|
| 2528 | print "It appears we have enough space.\n" |
---|
| 2529 | |
---|
| 2530 | if [[ $x_arg = 1 ]] |
---|
| 2531 | then |
---|
| 2532 | $rm -r ${LOGDIR} 2>/dev/null |
---|
| 2533 | if [[ $master = 1 ]] |
---|
| 2534 | then |
---|
| 2535 | $rm -r $NODESDIR 2>/dev/null |
---|
| 2536 | fi |
---|
| 2537 | exit 0 |
---|
| 2538 | fi |
---|
| 2539 | fi # end of if [[ $pass = 1 ]] |
---|
| 2540 | |
---|
| 2541 | if [[ $pass = 2 ]] |
---|
| 2542 | then |
---|
| 2543 | [[ $gotvmstat = 1 ]] && tarit "vmstat_5_5" |
---|
| 2544 | tarit "mmdf" |
---|
| 2545 | tarit "mmlsdisk" |
---|
| 2546 | tarit "mmlsfs" |
---|
| 2547 | tarit "mmlspolicy" |
---|
| 2548 | tarit "mmlsfileset" |
---|
| 2549 | tarit "mmlssnapshot" |
---|
| 2550 | fi # end of if [[ $pass = 2 && $gotvmstat = 1 ]] |
---|
| 2551 | |
---|
| 2552 | pass=$(expr $pass + 1) |
---|
| 2553 | |
---|
| 2554 | done # end of while [[ $pass -le 2 ]] do |
---|
| 2555 | |
---|
| 2556 | |
---|
| 2557 | if [[ -s gpfs.snap_err.${logdate}.out ]] |
---|
| 2558 | then |
---|
| 2559 | $cp gpfs.snap_err.${logdate}.out ${SUBDIR}/gpfs.snap_err.${logdate}.out |
---|
| 2560 | tar -rf $tarfile ${SUBDIR}/gpfs.snap_err.${logdate}.out |
---|
| 2561 | fi |
---|
| 2562 | $rm gpfs.snap_err.out.temp 2>/dev/null |
---|
| 2563 | |
---|
| 2564 | if [[ -a problem.${my_hostname} ]] |
---|
| 2565 | then |
---|
| 2566 | if [[ -s problem.${my_hostname} ]] |
---|
| 2567 | then |
---|
| 2568 | $cp problem.${my_hostname} ${SUBDIR}/problem.${my_hostname} |
---|
| 2569 | tar -rf $tarfile problem.${my_hostname} |
---|
| 2570 | else |
---|
| 2571 | $rm problem.${my_hostname} 2>/dev/null |
---|
| 2572 | fi |
---|
| 2573 | fi # end of if [[ -a problem.${my_hostname} ]] |
---|
| 2574 | |
---|
| 2575 | endtime=$(date) |
---|
| 2576 | echo "gpfs.snap near completion at $endtime" >> ${BASELOGDIR}/gpfs.snap_info.${logdate}.out |
---|
| 2577 | |
---|
| 2578 | if [[ -a gpfs.snap_info.${logdate}.out ]] |
---|
| 2579 | then |
---|
| 2580 | $cp gpfs.snap_info.${logdate}.out ${SUBDIR}/gpfs.snap_info.${logdate}.out |
---|
| 2581 | tar -rf $tarfile ${SUBDIR}/gpfs.snap_info.${logdate}.out |
---|
| 2582 | fi |
---|
| 2583 | |
---|
| 2584 | if [[ -a /bin/compress ]] |
---|
| 2585 | then |
---|
| 2586 | gotcompress=1 |
---|
| 2587 | compress $tarfile |
---|
| 2588 | suff="Z" |
---|
| 2589 | else |
---|
| 2590 | gzip $tarfile |
---|
| 2591 | suff="gz" |
---|
| 2592 | fi |
---|
| 2593 | |
---|
| 2594 | if [[ $master = 1 ]] |
---|
| 2595 | then |
---|
| 2596 | basetar=$(basename ${tarfile}) |
---|
| 2597 | tar -cf $bigtarfile ${basetar}.${suff} |
---|
| 2598 | $rm ${tarfile}.${suff} |
---|
| 2599 | |
---|
| 2600 | wait |
---|
| 2601 | if [[ -s $nodefile ]] |
---|
| 2602 | then |
---|
| 2603 | print "Getting snaps from remote nodes . . ." |
---|
| 2604 | if [[ -s $rcFile ]] |
---|
| 2605 | then |
---|
| 2606 | $mmdsh -F $rcFile K5MUTE=1 $rcp ${BASELOGDIR}/$(hostname | cut -d. -f1)_${logdate}/gpfs.snap.\*.out.tar.\* $myhname:${BASELOGDIR} |
---|
| 2607 | fi |
---|
| 2608 | if [[ -s rcperr ]] |
---|
| 2609 | then |
---|
| 2610 | print "The following nodes had trouble sending the snap file:" | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 2611 | $cat rcperr | $tee -a ${BASELOGDIR}/gpfs.snap_err.${logdate}.out |
---|
| 2612 | fi |
---|
| 2613 | $rm rcperr 2>/dev/null |
---|
| 2614 | g=0 |
---|
| 2615 | for i in $glist |
---|
| 2616 | do |
---|
| 2617 | short=$(echo ${ghostarray[$g]} | $cut -f1 -d .) |
---|
| 2618 | $mv ${BASELOGDIR}/${short}_${logdate}/gpfs.snap.${short}.*.out.tar.* ${BASELOGDIR} |
---|
| 2619 | $rm -r ${BASELOGDIR}/${short}_${logdate} |
---|
| 2620 | (( g = g + 1 )) |
---|
| 2621 | done |
---|
| 2622 | |
---|
| 2623 | tarlist=$($ls gpfs.snap.*.out.tar.*) |
---|
| 2624 | if [[ -n $tarlist ]] |
---|
| 2625 | then |
---|
| 2626 | tar -rf $bigtarfile $tarlist |
---|
| 2627 | fi |
---|
| 2628 | $rm $tarlist 2>/dev/null |
---|
| 2629 | fi |
---|
| 2630 | |
---|
| 2631 | print "###############################################################################" |
---|
| 2632 | print "Send file ${bigtarfile} to IBM Service" |
---|
| 2633 | else |
---|
| 2634 | print "###############################################################################" |
---|
| 2635 | print "Send file ${tarfile}.${suff} to IBM Service" |
---|
| 2636 | |
---|
| 2637 | fi # end of if [[ $master = 1 ]] |
---|
| 2638 | |
---|
| 2639 | # Remove temporary files. |
---|
| 2640 | $rm -r ${LOGDIR} 2>/dev/null |
---|
| 2641 | $rm -r ${NODESDIR} 2>/dev/null |
---|
| 2642 | $rm ${BASELOGDIR}/*waiters 2>/dev/null |
---|
| 2643 | $rm -f $nodefile 2>/dev/null |
---|
| 2644 | $rm -f $LOCAL_FILES 2>/dev/null |
---|
| 2645 | $rm /tmp/hostfile 2>/dev/null |
---|
| 2646 | |
---|
| 2647 | endtime=$(date) |
---|
| 2648 | |
---|
| 2649 | echo "gpfs.snap completed at $endtime" |
---|
| 2650 | exit 0 |
---|
| 2651 | |
---|