[16] | 1 | #!/bin/ksh |
---|
| 2 | # IBM_PROLOG_BEGIN_TAG |
---|
| 3 | # This is an automatically generated prolog. |
---|
| 4 | # |
---|
| 5 | # |
---|
| 6 | # |
---|
| 7 | # Licensed Materials - Property of IBM |
---|
| 8 | # |
---|
| 9 | # (C) COPYRIGHT International Business Machines Corp. 2000,2007 |
---|
| 10 | # All Rights Reserved |
---|
| 11 | # |
---|
| 12 | # US Government Users Restricted Rights - Use, duplication or |
---|
| 13 | # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. |
---|
| 14 | # |
---|
| 15 | # IBM_PROLOG_END_TAG |
---|
| 16 | # @(#)11 1.59.1.3 src/avs/fs/mmfs/ts/admin/mmchcluster.sh, mmfs, avs_rgpfs24, rgpfs24s009a 12/19/06 13:10:44 |
---|
| 17 | ############################################################################### |
---|
| 18 | # |
---|
| 19 | # Usage: |
---|
| 20 | # mmchcluster {[-p PrimaryServer] [-s SecondaryServer]} |
---|
| 21 | # or |
---|
| 22 | # mmchcluster -p LATEST |
---|
| 23 | # or |
---|
| 24 | # mmchcluster {[-r RemoteShellCommand] [-R RemoteFileCopyCommand]} |
---|
| 25 | # or |
---|
| 26 | # mmchcluster -C ClusterName |
---|
| 27 | # or |
---|
| 28 | # mmchcluster -N {NodeDesc[,NodeDesc...] | NodeFile} |
---|
| 29 | # |
---|
| 30 | # where: |
---|
| 31 | # |
---|
| 32 | # -p PrimaryServer specifies the node to be used as the primary server |
---|
| 33 | # of the GPFS sdrfs data for this cluster. |
---|
| 34 | # |
---|
| 35 | # LATEST requests a check to be made that all currently |
---|
| 36 | # available nodes point to the correct primary and |
---|
| 37 | # backup server. |
---|
| 38 | # |
---|
| 39 | # -s SecondaryServer specifies the node to be used as the backup server |
---|
| 40 | # of the GPFS sdrfs data for this cluster (optional). |
---|
| 41 | # To remove a backup server, specify -s "". |
---|
| 42 | # |
---|
| 43 | # -r RemoteShellCommand specifies the fully qualified pathname for |
---|
| 44 | # the remote shell program to be used by GPFS. |
---|
| 45 | # The default is /usr/bin/rsh. |
---|
| 46 | # |
---|
| 47 | # -R RemoteFileCopyCommand specifies the fully qualified pathname for |
---|
| 48 | # the remote file copy program to be used by GPFS. |
---|
| 49 | # The default is /usr/bin/rcp. |
---|
| 50 | # |
---|
| 51 | # -C ClusterName specifies a new name for the cluster. If the name |
---|
| 52 | # contains dots it is assumed to be a fully qualified |
---|
| 53 | # domain name. Otherwise, the domain will default |
---|
| 54 | # to the domain of the primary configuration server. |
---|
| 55 | # |
---|
| 56 | # -N NodeDesc,NodeDesc,... specifies a comma-separated list of node |
---|
| 57 | # descriptors that specify the admin node |
---|
| 58 | # interfaces to be used in the cluster. |
---|
| 59 | # The node descriptors have the format: |
---|
| 60 | # daemonNodeName:nodeRoles:adminNodeName: |
---|
| 61 | # The nodeRoles field is currently just a place-holder |
---|
| 62 | # and is ignored. |
---|
| 63 | # |
---|
| 64 | # -N NodeFile specifies a file of node descriptors that specify |
---|
| 65 | # the admin node interfaces to be used in the cluster. |
---|
| 66 | # The lines in the input file have the format: |
---|
| 67 | # daemonNodeName:nodeRoles:adminNodeName: |
---|
| 68 | # The nodeRoles field is currently just a place-holder |
---|
| 69 | # and is ignored. |
---|
| 70 | # |
---|
| 71 | # Note: When used with the -p or -s options, this command will most |
---|
| 72 | # likely be needed when the current primary server is not available |
---|
| 73 | # and it will be impossible to obtain the sdr lock and protect |
---|
| 74 | # against concurrent execution of some other mm command. |
---|
| 75 | # Under such conditions, the user must assure that no other mm |
---|
| 76 | # command is run until the completion of the mmchcluster command |
---|
| 77 | # and that as many of the remaining nodes as possible are available. |
---|
| 78 | # |
---|
| 79 | ############################################################################### |
---|
| 80 | |
---|
| 81 | # Include global declarations and service routines. |
---|
| 82 | . /usr/lpp/mmfs/bin/mmglobfuncs |
---|
| 83 | . /usr/lpp/mmfs/bin/mmsdrfsdef |
---|
| 84 | |
---|
| 85 | sourceFile="mmchcluster.sh" |
---|
| 86 | [[ -n $DEBUG || -n $DEBUGmmchcluster ]] && set -x |
---|
| 87 | $mmTRACE_ENTER "$*" |
---|
| 88 | |
---|
| 89 | |
---|
| 90 | # Local work files. Names should be of the form: |
---|
| 91 | # fn=${tmpDir}fn.${mmcmd}.$$ |
---|
| 92 | allNodes=${tmpDir}allNodes.${mmcmd}.$$ |
---|
| 93 | clientNodes=${tmpDir}clientNodes.${mmcmd}.$$ |
---|
| 94 | inputNodes=${tmpDir}inputNodes.${mmcmd}.$$ |
---|
| 95 | processedNodes=${tmpDir}processedNodes.${mmcmd}.$$ |
---|
| 96 | initErrors=${tmpDir}initErrors.${mmcmd}.$$ |
---|
| 97 | # Note: Do not include initErrors in LOCAL_FILES yet; we'll do it later. |
---|
| 98 | |
---|
| 99 | LOCAL_FILES=" $allNodes $clientNodes $inputNodes $processedNodes " |
---|
| 100 | |
---|
| 101 | |
---|
| 102 | # Local declarations |
---|
| 103 | |
---|
| 104 | usageMsg=359 |
---|
| 105 | newNodeNumbers="" |
---|
| 106 | backupServer="" |
---|
| 107 | rshPath="" |
---|
| 108 | rcpPath="" |
---|
| 109 | integer nodeCount |
---|
| 110 | integer n |
---|
| 111 | rc=0 |
---|
| 112 | |
---|
| 113 | Cflag="" |
---|
| 114 | Nflag="" |
---|
| 115 | pflag="" |
---|
| 116 | rflag="" |
---|
| 117 | Rflag="" |
---|
| 118 | sflag="" |
---|
| 119 | Carg="" |
---|
| 120 | parg="" |
---|
| 121 | rarg="" |
---|
| 122 | Rarg="" |
---|
| 123 | sarg="" |
---|
| 124 | otherOpt="" |
---|
| 125 | |
---|
| 126 | |
---|
| 127 | # Local functions |
---|
| 128 | |
---|
| 129 | |
---|
| 130 | ########################################################################## |
---|
| 131 | # |
---|
| 132 | # Function: Specify the admin network for the GPFS cluster. |
---|
| 133 | # |
---|
| 134 | # Input: $1 - file or list of node descriptors containing the |
---|
| 135 | # adapter information as follows: |
---|
| 136 | # daemonNodeName:nodeRoles:adminNodeName: |
---|
| 137 | # |
---|
| 138 | # Returns: 0 - no errors encountered |
---|
| 139 | # non-zero - unexpected error |
---|
| 140 | # |
---|
| 141 | ########################################################################## |
---|
| 142 | function specifyAdminNetwork # <networkInfo> |
---|
| 143 | { |
---|
| 144 | typeset sourceFile="mmchcluster.sh" |
---|
| 145 | [[ -n $DEBUG || -n $DEBUGspecifyAdminNetwork ]] && set -x |
---|
| 146 | $mmTRACE_ENTER "$*" |
---|
| 147 | typeset networkInfo="$1" |
---|
| 148 | |
---|
| 149 | typeset failedNodes sdrfsLine mmcommonOutput |
---|
| 150 | typeset nodeLine nodeName nodeName2 nodeStatus |
---|
| 151 | # typeset nodeRoles |
---|
| 152 | typeset hostResult nodeNumber adminNodeName adminIpa |
---|
| 153 | typeset nodeError newPrimaryName newBackupName commitOptions |
---|
| 154 | |
---|
| 155 | typeset rc=0 |
---|
| 156 | typeset changeMade="" |
---|
| 157 | typeset fatalError="" |
---|
| 158 | typeset sharedSdrservPort="" |
---|
| 159 | |
---|
| 160 | # The input parameter may be either a list or a file. Which is it? |
---|
| 161 | if [[ -f $networkInfo ]] |
---|
| 162 | then |
---|
| 163 | # It is a file; verify its existence and create our own copy. |
---|
| 164 | checkUserFile $networkInfo $inputNodes |
---|
| 165 | [[ $? -ne 0 ]] && cleanupAndExit |
---|
| 166 | else |
---|
| 167 | # It is not a file, so it must be a list. |
---|
| 168 | # Convert the input node list into a file. |
---|
| 169 | $rm -f $inputNodes |
---|
| 170 | IFS=',' |
---|
| 171 | for nodeDesc in $networkInfo |
---|
| 172 | do |
---|
| 173 | print -- "$nodeDesc" >> $inputNodes |
---|
| 174 | checkForErrors "writing to $inputNodes" $? |
---|
| 175 | done |
---|
| 176 | IFS="$IFS_sv" # Restore the default IFS setting. |
---|
| 177 | fi |
---|
| 178 | |
---|
| 179 | # Check the input data for correctness. |
---|
| 180 | # We check all the records rather than stop on the first error. |
---|
| 181 | $rm -f $processedNodes |
---|
| 182 | $touch $processedNodes # Ensure the tmp file exists even if empty. |
---|
| 183 | IFS=":" # Change the field separator to ':'. |
---|
| 184 | exec 3<&- |
---|
| 185 | exec 3< $inputNodes |
---|
| 186 | while read -u3 nodeLine |
---|
| 187 | do |
---|
| 188 | # Parse the line. |
---|
| 189 | set -f ; set -- $nodeLine ; set +f |
---|
| 190 | nodeName=$1 |
---|
| 191 | # nodeRoles=$2 |
---|
| 192 | nodeName2=$3 |
---|
| 193 | IFS="$IFS_sv" # Restore the default IFS setting. |
---|
| 194 | |
---|
| 195 | # Make sure neither node name is specified more than once. |
---|
| 196 | $grep -qw $nodeName $processedNodes > /dev/null 2>&1 |
---|
| 197 | if [[ $? -eq 0 ]] |
---|
| 198 | then |
---|
| 199 | # The node name is specified twice. |
---|
| 200 | printErrorMsg 347 $mmcmd $nodeName |
---|
| 201 | fatalError=yes |
---|
| 202 | fi |
---|
| 203 | |
---|
| 204 | # Check the admin node name if it was specified. |
---|
| 205 | if [[ -n $nodeName2 && $nodeName2 != $nodeName ]] |
---|
| 206 | then |
---|
| 207 | $grep -qw $nodeName2 $processedNodes > /dev/null 2>&1 |
---|
| 208 | if [[ $? -eq 0 ]] |
---|
| 209 | then |
---|
| 210 | # The node is specified twice. |
---|
| 211 | printErrorMsg 347 $mmcmd $nodeName2 |
---|
| 212 | fatalError=yes |
---|
| 213 | fi |
---|
| 214 | fi # end of if [[ -n $nodeName2 && $nodeName2 != $nodeName ]] |
---|
| 215 | |
---|
| 216 | # Add the node names to the list of processed nodes. |
---|
| 217 | print -- "${nodeName}:${nodeName2}" >> $processedNodes |
---|
| 218 | checkForErrors "Writing to file $processedNodes" $? |
---|
| 219 | |
---|
| 220 | IFS=":" # Change the separator back to ":" for the next iteration. |
---|
| 221 | |
---|
| 222 | done # end of while read -u3 nodeLine |
---|
| 223 | |
---|
| 224 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
| 225 | |
---|
| 226 | # Return to the caller if we encountered an error. |
---|
| 227 | [[ -n $fatalError ]] && return 1 |
---|
| 228 | |
---|
| 229 | # Ensure that the local copy of the mmsdrfs is up-to-date. |
---|
| 230 | # Set up trap exception handling and obtain the lock. |
---|
| 231 | trap pretrap HUP INT QUIT KILL |
---|
| 232 | gpfsInitOutput=$(gpfsInit $lockId) |
---|
| 233 | setGlobalVar $? $gpfsInitOutput |
---|
| 234 | |
---|
| 235 | # Stop here if the admin network support has not been activated yet. |
---|
| 236 | if [[ $sdrfsFormatLevel -eq 0 ]] |
---|
| 237 | then |
---|
| 238 | print -u2 "$mmcmd: The separate administration network support has not been enabled yet." |
---|
| 239 | print -u2 " Run \"mmchconfig release=LATEST\" to activate the new function." |
---|
| 240 | cleanupAndExit |
---|
| 241 | fi |
---|
| 242 | |
---|
| 243 | # Determine the lookup order for resolving host names. |
---|
| 244 | [[ $osName != AIX ]] && resolveOrder=$(setHostResolveOrder) |
---|
| 245 | |
---|
| 246 | # Go through the current mmsdrfs file. Increment the generation |
---|
| 247 | # number and build the node name list that will be needed later. |
---|
| 248 | # Remove all admin network related information. |
---|
| 249 | $rm -f $newsdrfs $nodefile |
---|
| 250 | newPrimaryName="" |
---|
| 251 | newBackupName="" |
---|
| 252 | IFS=":" # Change the field separator to ':'. |
---|
| 253 | exec 3<&- |
---|
| 254 | exec 3< $mmsdrfsFile |
---|
| 255 | while read -u3 sdrfsLine |
---|
| 256 | do |
---|
| 257 | # Parse the line. |
---|
| 258 | set -f ; set -A v -- - $sdrfsLine ; set +f |
---|
| 259 | |
---|
| 260 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
| 261 | printLine=true # Assume the line will be printed. |
---|
| 262 | |
---|
| 263 | case ${v[$LINE_TYPE_Field]} in |
---|
| 264 | |
---|
| 265 | $VERSION_LINE ) # This is the global header line. |
---|
| 266 | # Save the version line for updating later. |
---|
| 267 | versionLine=$(print_newLine) |
---|
| 268 | printLine=false |
---|
| 269 | ;; |
---|
| 270 | |
---|
| 271 | $NODESET_HDR ) |
---|
| 272 | # If the daemon and the mmsdrserv tcp ports are shared, |
---|
| 273 | # it will be necessary to ensure that the daemon is down |
---|
| 274 | # on the config server nodes if there names will change. |
---|
| 275 | if [[ -z ${v[$GETOBJECT_PORT_Field]} || |
---|
| 276 | ${v[$TCP_PORT_Field]} = ${v[$GETOBJECT_PORT_Field]} ]] |
---|
| 277 | then |
---|
| 278 | sharedSdrservPort=yes |
---|
| 279 | fi |
---|
| 280 | ;; |
---|
| 281 | |
---|
| 282 | $MEMBER_NODE ) # This line describes a node. |
---|
| 283 | # Add the reliable node name to nodefile. |
---|
| 284 | print -- "${v[$REL_HOSTNAME_Field]}" >> $nodefile |
---|
| 285 | checkForErrors "writing to file $nodefile" $? |
---|
| 286 | |
---|
| 287 | # Reset the node error flag. |
---|
| 288 | nodeError="" |
---|
| 289 | |
---|
| 290 | # Obtain the data for this node from the node file. |
---|
| 291 | nodeLine=$($awk -F: ' \ |
---|
| 292 | $1 == "'${v[$DAEMON_NODENAME_Field]}'" || \ |
---|
| 293 | $1 == "'${v[$REL_HOSTNAME_Field]}'" || \ |
---|
| 294 | $1 == "'${v[$NODE_NAME_Field]}'" || \ |
---|
| 295 | $1 == "'${v[$ADMIN_SHORTNAME_Field]}'" || \ |
---|
| 296 | $1 == "'${v[$NODE_NUMBER_Field]}'" || \ |
---|
| 297 | $1 == "'${v[$IPA_Field]}'" { \ |
---|
| 298 | { print $0 } \ |
---|
| 299 | { exit } \ |
---|
| 300 | } \ |
---|
| 301 | ' $inputNodes) |
---|
| 302 | |
---|
| 303 | if [[ -n $nodeLine ]] |
---|
| 304 | then |
---|
| 305 | # We found data for this node. Parse the input. |
---|
| 306 | IFS=":" # Change the field separator to ':'. |
---|
| 307 | set -f ; set -- $nodeLine ; set +f |
---|
| 308 | nodeName=$1 |
---|
| 309 | nodeName2=$3 |
---|
| 310 | IFS="$IFS_sv" # Restore the default IFS setting. |
---|
| 311 | |
---|
| 312 | # Determine the daemon node name. |
---|
| 313 | if [[ -n ${v[$DAEMON_NODENAME_Field]} ]] |
---|
| 314 | then |
---|
| 315 | daemonNodeName=${v[$DAEMON_NODENAME_Field]} |
---|
| 316 | else |
---|
| 317 | daemonNodeName=${v[$REL_HOSTNAME_Field]} |
---|
| 318 | fi |
---|
| 319 | |
---|
| 320 | # Did the user reset or specify the admin node name? |
---|
| 321 | if [[ -z $nodeName2 ]] |
---|
| 322 | then |
---|
| 323 | # The admin node name was null, indicating "reset"; |
---|
| 324 | # set the admin node name to the daemon node name value. |
---|
| 325 | adminNodeName=$daemonNodeName |
---|
| 326 | adminShortName=${v[$NODE_NAME_Field]} |
---|
| 327 | |
---|
| 328 | else |
---|
| 329 | # The admin node name was not null, indicating "specify"; |
---|
| 330 | # Determine the IP address for the specified admin node name. |
---|
| 331 | hostResult=$($host $nodeName2) |
---|
| 332 | set -f ; set -- $hostResult ; set +f |
---|
| 333 | adminNodeName=$1 |
---|
| 334 | adminShortName=${1%% *|.*} # Exclude everything after the first dot. |
---|
| 335 | adminIpa=${3%%,*} |
---|
| 336 | |
---|
| 337 | # Check that the admin node name has a valid IP address. |
---|
| 338 | if [[ -z $adminIpa ]] |
---|
| 339 | then |
---|
| 340 | # An invalid node name was specified. |
---|
| 341 | printErrorMsg 54 $mmcmd $nodeName2 |
---|
| 342 | fatalError=yes |
---|
| 343 | break |
---|
| 344 | fi |
---|
| 345 | |
---|
| 346 | # Invoke the checkAdapter function to ensure that |
---|
| 347 | # the specified adapter interface exists on the node. |
---|
| 348 | mmcommonOutput=$($mmcommon on1 ${v[$REL_HOSTNAME_Field]} \ |
---|
| 349 | checkAdapter $adminIpa 2> $errMsg) |
---|
| 350 | rc=$? |
---|
| 351 | set -f ; set -- $mmcommonOutput ; set +f |
---|
| 352 | nodeStatus=$1 |
---|
| 353 | if [[ $rc != 0 || $nodeStatus != success ]] |
---|
| 354 | then |
---|
| 355 | # The checkAdapter call failed. |
---|
| 356 | # We will not define a new admin node name for this node |
---|
| 357 | # but we will continue to process the remaining nodes. |
---|
| 358 | # Tell the world what went wrong with this node. |
---|
| 359 | if [[ $nodeStatus = ipa_alias ]] |
---|
| 360 | then |
---|
| 361 | # IP address aliasing is not supported. |
---|
| 362 | printErrorMsg 476 $mmcmd $nodeName2 |
---|
| 363 | elif [[ $nodeStatus = ipa_missing ]] |
---|
| 364 | then |
---|
| 365 | # The admin IP address is not known on the node. |
---|
| 366 | printErrorMsg 154 $mmcmd $nodeName2 ${v[$REL_HOSTNAME_Field]} |
---|
| 367 | elif [[ $rc = $MM_HostDown || $rc = $MM_ConnectTimeout ]] |
---|
| 368 | then |
---|
| 369 | # The node cannot be reached. |
---|
| 370 | printErrorMsg 340 $mmcmd ${v[$REL_HOSTNAME_Field]} |
---|
| 371 | else |
---|
| 372 | # Unexpected error. Display all possible error messages. |
---|
| 373 | [[ -s $errMsg ]] && $cat $errMsg 1>&2 |
---|
| 374 | [[ $rc -eq 0 ]] && rc=1 |
---|
| 375 | checkForErrors "checkAdapter ${v[$REL_HOSTNAME_Field]}" $rc |
---|
| 376 | fi |
---|
| 377 | |
---|
| 378 | # Append the node name to the list of failed nodes and |
---|
| 379 | # set a flag to indicate the node name did not check out. |
---|
| 380 | failedNodes="${failedNodes}\n\t${nodeName}" |
---|
| 381 | nodeError=yes |
---|
| 382 | |
---|
| 383 | fi # end of if [[ $rc != 0 || $nodeStatus != success ]] |
---|
| 384 | |
---|
| 385 | fi # end of if [[ -z $nodeName2 ]] |
---|
| 386 | |
---|
| 387 | # Update the member line if there was no error. |
---|
| 388 | if [[ -z $nodeError ]] |
---|
| 389 | then |
---|
| 390 | # Remember the new primary or backup server name for updating |
---|
| 391 | # the version line later if this is one of those servers. |
---|
| 392 | [[ ${v[$REL_HOSTNAME_Field]} = $primaryServer ]] && \ |
---|
| 393 | newPrimaryName=$adminNodeName |
---|
| 394 | [[ ${v[$REL_HOSTNAME_Field]} = $backupServer ]] && \ |
---|
| 395 | newBackupName=$adminNodeName |
---|
| 396 | |
---|
| 397 | # Things checked out ok. Set the node name fields. |
---|
| 398 | v[$DAEMON_NODENAME_Field]=$daemonNodeName |
---|
| 399 | v[$REL_HOSTNAME_Field]=$adminNodeName |
---|
| 400 | v[$ADMIN_SHORTNAME_Field]=$adminShortName |
---|
| 401 | changeMade=yes |
---|
| 402 | fi |
---|
| 403 | |
---|
| 404 | $rm -f $errMsg |
---|
| 405 | |
---|
| 406 | fi # end of if [[ -n $nodeLine ]] |
---|
| 407 | ;; |
---|
| 408 | |
---|
| 409 | * ) # We are not interested in any other lines. |
---|
| 410 | ;; |
---|
| 411 | |
---|
| 412 | esac # end of case ${v[$LINE_TYPE_Field]} in |
---|
| 413 | |
---|
| 414 | # Unless suppressed, write the line to the new mmsdrfs file. |
---|
| 415 | if [[ $printLine = true ]] |
---|
| 416 | then |
---|
| 417 | print_newLine >> $newsdrfs |
---|
| 418 | checkForErrors "writing to file $newsdrfs" $? |
---|
| 419 | fi |
---|
| 420 | |
---|
| 421 | IFS=":" # Change the separator back to ":" for the next iteration. |
---|
| 422 | |
---|
| 423 | done # end of while read -u3 |
---|
| 424 | |
---|
| 425 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
| 426 | |
---|
| 427 | # Go through the mmsdrfs file to update the NSD servers admin node names. |
---|
| 428 | $rm -f $tmpsdrfs |
---|
| 429 | IFS=":" |
---|
| 430 | exec 3<&- |
---|
| 431 | exec 3< $newsdrfs |
---|
| 432 | while read -u3 sdrfsLine |
---|
| 433 | do |
---|
| 434 | # Parse the line. |
---|
| 435 | set -f ; set -A v -- - $sdrfsLine ; set +f |
---|
| 436 | IFS="$IFS_sv" |
---|
| 437 | |
---|
| 438 | # Change some of the fields depending on the type of line. |
---|
| 439 | case ${v[$LINE_TYPE_Field]} in |
---|
| 440 | |
---|
| 441 | $SG_DISKS ) # This is the line for some disk. |
---|
| 442 | |
---|
| 443 | # If this disk is an NSD with a valid PVID value, |
---|
| 444 | # make sure the daemon nsd server names are recorded. |
---|
| 445 | if [[ ${v[$DISK_TYPE_Field]} = nsd && -n ${v[$PVID_Field]} ]] |
---|
| 446 | then |
---|
| 447 | # If a server node was specified, check that it is valid and |
---|
| 448 | # convert it to get the potentially new admin adapter name. |
---|
| 449 | # We determine whether a server was specified by checking for an |
---|
| 450 | # admin nsd server name, but we do not use that name for finding |
---|
| 451 | # the node information, since the old admin node name may |
---|
| 452 | # no longer exist as a result of the update we just did. |
---|
| 453 | # We use the daemon node name to find the node instead, |
---|
| 454 | # since mmchcluster -N does not change daemon node names. |
---|
| 455 | if [[ -n ${v[$NSD_PRIMARY_NODE_Field]} ]] |
---|
| 456 | then |
---|
| 457 | # If no daemon node name has yet been recorded for the |
---|
| 458 | # primary NSD server, determine and store it now. |
---|
| 459 | server=${v[$DAEMON_NSD_PRIMARY_Field]} |
---|
| 460 | if [[ -z $server ]] |
---|
| 461 | then |
---|
| 462 | server=$(checkAndConvertNodeValue \ |
---|
| 463 | ${v[$NSD_PRIMARY_NODE_Field]} $DAEMON_NODENAME_Field) |
---|
| 464 | checkForErrors "checkAndConvertNodeValue" $? |
---|
| 465 | v[$DAEMON_NSD_PRIMARY_Field]=$server |
---|
| 466 | fi |
---|
| 467 | # Use the primary server's daemon node name to obtain |
---|
| 468 | # the primary server's admin node name. |
---|
| 469 | v[$NSD_PRIMARY_NODE_Field]=$(checkAndConvertNodeValue \ |
---|
| 470 | $server $REL_HOSTNAME_Field $newsdrfs) |
---|
| 471 | checkForErrors "checkAndConvertNodeValue $server" $? |
---|
| 472 | fi |
---|
| 473 | if [[ -n ${v[$NSD_BACKUP_NODE_Field]} ]] |
---|
| 474 | then |
---|
| 475 | # If no daemon node name has yet been recorded for the |
---|
| 476 | # backup NSD server, determine and store it now. |
---|
| 477 | backup=${v[$DAEMON_NSD_BACKUP_Field]} |
---|
| 478 | if [[ -z $backup ]] |
---|
| 479 | then |
---|
| 480 | backup=$(checkAndConvertNodeValue \ |
---|
| 481 | ${v[$NSD_BACKUP_NODE_Field]} $DAEMON_NODENAME_Field) |
---|
| 482 | checkForErrors "checkAndConvertNodeValue" $? |
---|
| 483 | v[$DAEMON_NSD_BACKUP_Field]=$backup |
---|
| 484 | fi |
---|
| 485 | # Use the backup server's daemon node name to obtain |
---|
| 486 | # the backup server's admin node name. |
---|
| 487 | v[$NSD_BACKUP_NODE_Field]=$(checkAndConvertNodeValue \ |
---|
| 488 | $backup $REL_HOSTNAME_Field $newsdrfs) |
---|
| 489 | checkForErrors "checkAndConvertNodeValue $backup" $? |
---|
| 490 | fi |
---|
| 491 | fi # end of if (v[$DISK_TYPE_Field] == "nsd" && -n v[$PVID_Field]) |
---|
| 492 | ;; |
---|
| 493 | |
---|
| 494 | * ) # We are not interested in any other lines. |
---|
| 495 | ;; |
---|
| 496 | |
---|
| 497 | esac # end Change some of the fields |
---|
| 498 | |
---|
| 499 | # Build and write the line to the temp version of the mmsdrfs file. |
---|
| 500 | print_newLine >> $tmpsdrfs |
---|
| 501 | checkForErrors "writing to file $tmpsdrfs" $? |
---|
| 502 | |
---|
| 503 | IFS=":" # Change the separator back to ":" for the next iteration. |
---|
| 504 | |
---|
| 505 | done # end while read -u3 sdrfsLine |
---|
| 506 | |
---|
| 507 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
| 508 | |
---|
| 509 | # If a fatal error occurred, or if no changes were made, |
---|
| 510 | # release the lock, report any failed nodes, and return. |
---|
| 511 | if [[ -n $fatalError || -z $changeMade ]] |
---|
| 512 | then |
---|
| 513 | freeLockOnServer $primaryServer $ourNodeNumber >/dev/null |
---|
| 514 | if [[ -n $failedNodes ]] |
---|
| 515 | then |
---|
| 516 | # Administrative node names were not defined for nodes ... |
---|
| 517 | printErrorMsg 174 $mmcmd $failedNodes |
---|
| 518 | fi |
---|
| 519 | if [[ -n $fatalError ]] |
---|
| 520 | then |
---|
| 521 | printErrorMsg 389 $mmcmd # The command failed. |
---|
| 522 | else |
---|
| 523 | printErrorMsg 387 $mmcmd $mmcmd # Command quitting due to no valid nodes. |
---|
| 524 | fi |
---|
| 525 | return 1 |
---|
| 526 | fi |
---|
| 527 | |
---|
| 528 | # Create the updated version line and add it to the new sdrfs file. |
---|
| 529 | # The generation number is incremented and the server names may change. |
---|
| 530 | IFS=":" # Change the field separator to ':'. |
---|
| 531 | set -f ; set -A v -- - $versionLine ; set +f |
---|
| 532 | IFS="$IFS_sv" # Restore the default IFS setting. |
---|
| 533 | newGenNumber=${v[$SDRFS_GENNUM_Field]}+1 |
---|
| 534 | v[$SDRFS_GENNUM_Field]=$newGenNumber |
---|
| 535 | [[ -n $newPrimaryName ]] && v[$PRIMARY_SERVER_Field]=$newPrimaryName |
---|
| 536 | [[ -n $newBackupName ]] && v[$BACKUP_SERVER_Field]=$newBackupName |
---|
| 537 | print_newLine >> $tmpsdrfs |
---|
| 538 | checkForErrors "writing to file $tmpsdrfs" $? |
---|
| 539 | |
---|
| 540 | # If the GPFS and mmsdrserv daemons share the same tcp port number, |
---|
| 541 | # and the names of the primary or backup configuration servers are |
---|
| 542 | # changing, it is necessary to ensure that the GPFS daemon is down |
---|
| 543 | # on the server nodes and the mmsdrserv daemon is restarted. |
---|
| 544 | # Otherwise, the server nodes will continue giving (stale) Gpfs object |
---|
| 545 | # or return ESDR_NOT_SERVER errors. |
---|
| 546 | if [[ -n $sharedSdrservPort && ( -n $newPrimaryName || -n $newBackupName ) ]] |
---|
| 547 | then |
---|
| 548 | # Get the names of the config servers. |
---|
| 549 | print -- "${v[$PRIMARY_SERVER_Field]}\n${v[$BACKUP_SERVER_Field]}" > $tmpNodes |
---|
| 550 | checkForErrors "writing to file $tmpNodes" $? |
---|
| 551 | |
---|
| 552 | # Verify the daemon is down; do not lock the Gpfs object. |
---|
| 553 | printInfoMsg 453 |
---|
| 554 | verifyDaemonInactive $tmpNodes |
---|
| 555 | [[ $? -ne 0 ]] && return 1 |
---|
| 556 | |
---|
| 557 | commitOptions="initLocalNodeData,KILLSDRSERV" |
---|
| 558 | else |
---|
| 559 | commitOptions="initLocalNodeData" |
---|
| 560 | fi # end of if [[ -n $sharedSdrservPort ]] |
---|
| 561 | |
---|
| 562 | # Make sure the new sdrfs file is properly sorted. |
---|
| 563 | LC_ALL=C $SORT_MMSDRFS $tmpsdrfs -o $newsdrfs |
---|
| 564 | |
---|
| 565 | # Put the new mmsdrfs file into the sdr. This will make the newly-added |
---|
| 566 | # admin nodes visible to the rest of the nodes in the cluster. |
---|
| 567 | trap "" HUP INT QUIT KILL |
---|
| 568 | gpfsObjectInfo=$(commitChanges $nsId $nsId \ |
---|
| 569 | $gpfsObjectInfo $newGenNumber $newsdrfs $primaryServer $commitOptions) |
---|
| 570 | rc=$? |
---|
| 571 | if [[ $rc -ne 0 ]] |
---|
| 572 | then |
---|
| 573 | # We were unable to replace the file in the sdr. |
---|
| 574 | printErrorMsg 381 $mmcmd |
---|
| 575 | return 1 |
---|
| 576 | fi |
---|
| 577 | |
---|
| 578 | # Unlock the sdr. |
---|
| 579 | freeLockOnServer $primaryServer $ourNodeNumber >/dev/null |
---|
| 580 | trap posttrap HUP INT QUIT KILL |
---|
| 581 | |
---|
| 582 | # Propagate the new mmsdrfs file to all nodes in the cluster. |
---|
| 583 | # This process is asynchronous. |
---|
| 584 | propagateSdrfsFile async $nodefile $newsdrfs $newGenNumber initLocalNodeData |
---|
| 585 | |
---|
| 586 | # Report any nodes that did not check successfully. |
---|
| 587 | if [[ -n $failedNodes ]] |
---|
| 588 | then |
---|
| 589 | # Administrative node names were not defined for nodes ... |
---|
| 590 | printErrorMsg 174 $mmcmd $failedNodes |
---|
| 591 | fi |
---|
| 592 | |
---|
| 593 | return 0 |
---|
| 594 | |
---|
| 595 | } #----- end of function specifyAdminNetwork ------------------- |
---|
| 596 | |
---|
| 597 | |
---|
| 598 | ################################################################### |
---|
| 599 | # This function is called if there is an interrupt after the new |
---|
| 600 | # mmsdrfs file was committed on the new primary and backup servers |
---|
| 601 | # but before the change was propagated to the rest of the nodes. |
---|
| 602 | ################################################################### |
---|
| 603 | function localPosttrap |
---|
| 604 | { |
---|
| 605 | $mmTRACE_ENTER "$*" |
---|
| 606 | |
---|
| 607 | # Tell the guy which nodes must be up and which command to run. |
---|
| 608 | printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer" |
---|
| 609 | printErrorMsg 344 $mmcmd "mmchcluster -p LATEST" |
---|
| 610 | cleanupAndExit 2 |
---|
| 611 | |
---|
| 612 | } #----- end of function localPosttrap ------------------------ |
---|
| 613 | |
---|
| 614 | |
---|
| 615 | |
---|
| 616 | ###################### |
---|
| 617 | # Mainline processing |
---|
| 618 | ###################### |
---|
| 619 | |
---|
| 620 | |
---|
| 621 | ################################################### |
---|
| 622 | # Process the command arguments. |
---|
| 623 | ################################################### |
---|
| 624 | [[ $arg1 = '-?' || $arg1 = '-h' || $arg1 = '--help' || $arg1 = '--' ]] && \ |
---|
| 625 | syntaxError "help" $usageMsg |
---|
| 626 | |
---|
| 627 | [[ $argc -lt 2 ]] && \ |
---|
| 628 | syntaxError "missingArgs" $usageMsg |
---|
| 629 | |
---|
| 630 | while getopts :C:N:p:r:R:s: OPT |
---|
| 631 | do |
---|
| 632 | case $OPT in |
---|
| 633 | |
---|
| 634 | C) # cluster name |
---|
| 635 | [[ -n $Cflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
| 636 | Cflag="-$OPT" |
---|
| 637 | Carg=$OPTARG |
---|
| 638 | ;; |
---|
| 639 | |
---|
| 640 | N) # define/replace secondary network |
---|
| 641 | [[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
| 642 | Nflag="-$OPT" |
---|
| 643 | Narg=$OPTARG |
---|
| 644 | ;; |
---|
| 645 | |
---|
| 646 | p) # primary server |
---|
| 647 | [[ -n $pflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
| 648 | pflag="-$OPT" |
---|
| 649 | parg=$OPTARG |
---|
| 650 | otherOpt="-$OPT" |
---|
| 651 | ;; |
---|
| 652 | |
---|
| 653 | r) # remote shell command |
---|
| 654 | [[ -n $rflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
| 655 | rflag="-$OPT" |
---|
| 656 | rarg=$OPTARG |
---|
| 657 | [[ $rarg = ${rarg#/} ]] && \ |
---|
| 658 | syntaxError "absolutePath_2" $noUsageMsg "-$OPT" "$rarg" |
---|
| 659 | otherOpt="-$OPT" |
---|
| 660 | ;; |
---|
| 661 | |
---|
| 662 | R) # remote file copy command |
---|
| 663 | [[ -n $Rflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
| 664 | Rflag="-$OPT" |
---|
| 665 | Rarg=$OPTARG |
---|
| 666 | [[ $Rarg = ${Rarg#/} ]] && \ |
---|
| 667 | syntaxError "absolutePath_2" $noUsageMsg "-$OPT" "$Rarg" |
---|
| 668 | otherOpt="-$OPT" |
---|
| 669 | ;; |
---|
| 670 | |
---|
| 671 | s) # secondary server |
---|
| 672 | [[ -n $sflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
| 673 | sflag="-$OPT" |
---|
| 674 | sarg=$OPTARG |
---|
| 675 | otherOpt="-$OPT" |
---|
| 676 | ;; |
---|
| 677 | |
---|
| 678 | +[CNprRs]) # Invalid option |
---|
| 679 | syntaxError "invalidOption" $usageMsg $OPT |
---|
| 680 | ;; |
---|
| 681 | |
---|
| 682 | :) # Missing argument |
---|
| 683 | syntaxError "missingValue" $usageMsg $OPTARG |
---|
| 684 | ;; |
---|
| 685 | |
---|
| 686 | *) # Invalid option |
---|
| 687 | syntaxError "invalidOption" $usageMsg $OPTARG |
---|
| 688 | ;; |
---|
| 689 | esac |
---|
| 690 | |
---|
| 691 | done |
---|
| 692 | |
---|
| 693 | shift OPTIND-1 |
---|
| 694 | [[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1 |
---|
| 695 | |
---|
| 696 | [[ -n $sflag && $parg = LATEST ]] && \ |
---|
| 697 | syntaxError "invalidCombination" $usageMsg "-s" "-p LATEST" |
---|
| 698 | |
---|
| 699 | [[ -n $rflag && -n $pflag ]] && \ |
---|
| 700 | syntaxError "invalidCombination" $usageMsg "-r" "-p" |
---|
| 701 | |
---|
| 702 | [[ -n $rflag && -n $sflag ]] && \ |
---|
| 703 | syntaxError "invalidCombination" $usageMsg "-r" "-s" |
---|
| 704 | |
---|
| 705 | [[ -n $Rflag && -n $pflag ]] && \ |
---|
| 706 | syntaxError "invalidCombination" $usageMsg "-R" "-p" |
---|
| 707 | |
---|
| 708 | [[ -n $Rflag && -n $sflag ]] && \ |
---|
| 709 | syntaxError "invalidCombination" $usageMsg "-R" "-s" |
---|
| 710 | |
---|
| 711 | # The primary GPFS cluster configuration server cannot be removed. |
---|
| 712 | [[ -n $pflag && $parg = "" ]] && \ |
---|
| 713 | syntaxError "missingValue" $usageMsg "-p" |
---|
| 714 | |
---|
| 715 | [[ -n $Nflag && -n $otherOpt ]] && \ |
---|
| 716 | syntaxError "invalidCombination" $usageMsg "-N" "$otherOpt" |
---|
| 717 | |
---|
| 718 | [[ -n $Cflag && -n $otherOpt ]] && \ |
---|
| 719 | syntaxError "invalidCombination" $usageMsg "-C" "$otherOpt" |
---|
| 720 | |
---|
| 721 | |
---|
| 722 | ############################################################################# |
---|
| 723 | # If the request is to change a remote command, invoke the mmsetrcmd script. |
---|
| 724 | # Keep in mind that rarg and Rarg may include options for the respective |
---|
| 725 | # commands and, therefore, must always be quoted. |
---|
| 726 | ############################################################################# |
---|
| 727 | if [[ -n $rflag || -n $Rflag ]] |
---|
| 728 | then |
---|
| 729 | if [[ -z $Rflag ]] |
---|
| 730 | then |
---|
| 731 | $mmsetrcmd "$rflag" "$rarg" |
---|
| 732 | rc=$? |
---|
| 733 | elif [[ -z $rflag ]] |
---|
| 734 | then |
---|
| 735 | $mmsetrcmd "$Rflag" "$Rarg" |
---|
| 736 | rc=$? |
---|
| 737 | else |
---|
| 738 | $mmsetrcmd "$rflag" "$rarg" "$Rflag" "$Rarg" |
---|
| 739 | rc=$? |
---|
| 740 | fi |
---|
| 741 | cleanupAndExit $rc |
---|
| 742 | fi |
---|
| 743 | |
---|
| 744 | |
---|
| 745 | ############################################################# |
---|
| 746 | # If the request is to specify changes to the admin network, |
---|
| 747 | # invoke the function to do the work and exit. |
---|
| 748 | ############################################################# |
---|
| 749 | if [[ -n $Nflag ]] |
---|
| 750 | then |
---|
| 751 | specifyAdminNetwork "$Narg" |
---|
| 752 | cleanupAndExit $? |
---|
| 753 | fi |
---|
| 754 | |
---|
| 755 | |
---|
| 756 | ######################################################## |
---|
| 757 | # If the request is to change the cluster name, |
---|
| 758 | # invoke the mmsetrcmd script. |
---|
| 759 | ######################################################## |
---|
| 760 | if [[ -n $Cflag ]] |
---|
| 761 | then |
---|
| 762 | $mmsetrcmd "$Cflag" "$Carg" |
---|
| 763 | cleanupAndExit $? |
---|
| 764 | fi |
---|
| 765 | |
---|
| 766 | |
---|
| 767 | ################################################################# |
---|
| 768 | # Set up trap exception handling and call the gpfsInit function. |
---|
| 769 | # It will attempt to ensure that the local copy of the mmsdrfs |
---|
| 770 | # and the rest of the GPFS system files are up-to-date. |
---|
| 771 | # Try to get the lock but do not fail if this is not possible. |
---|
| 772 | ################################################################# |
---|
| 773 | trap pretrap HUP INT QUIT KILL |
---|
| 774 | |
---|
| 775 | if [[ $parg = LATEST ]] |
---|
| 776 | then |
---|
| 777 | # The LATEST keyword was specified. Try to obtain the |
---|
| 778 | # most recent mmsdrfs file (i.e., the mmsdrfs file with the |
---|
| 779 | # highest gen number) among all the nodes in the cluster. |
---|
| 780 | # To do that, use the local mmsdrfs file as a starting point. |
---|
| 781 | getNodeList $REL_HOSTNAME_Field $HOME_CLUSTER $mmsdrfsFile > $allNodes |
---|
| 782 | gpfsInitOutput=$(gpfsInitFromNonServer $allNodes $mmsdrfsFile) |
---|
| 783 | rc=$? |
---|
| 784 | |
---|
| 785 | else |
---|
| 786 | # The LATEST keyword was not specified. Try to obtain |
---|
| 787 | # the mmsdrfs file from one of the servers with locking. |
---|
| 788 | gpfsInitOutput=$(gpfsInit $lockId 2> $initErrors) |
---|
| 789 | rc=$? |
---|
| 790 | LOCAL_FILES="$LOCAL_FILES $initErrors " |
---|
| 791 | if [[ $rc -ne 0 ]] |
---|
| 792 | then |
---|
| 793 | # We failed to get the sdrfs file with a lock. Check whether |
---|
| 794 | # some other mm command currently holds the lock. If yes, give up. |
---|
| 795 | $grep -e "Timed out waiting for lock: Try again later." \ |
---|
| 796 | -e "6027-1229" $initErrors > /dev/null 2>&1 |
---|
| 797 | ec=$? |
---|
| 798 | if [[ $ec -eq 0 ]] |
---|
| 799 | then |
---|
| 800 | # Display the messages from gpfsInit. |
---|
| 801 | $cat $initErrors | \ |
---|
| 802 | $grep -v -e "6027-1227" -e "file is locked. Retrying..." 1>&2 |
---|
| 803 | cleanupAndExit |
---|
| 804 | fi |
---|
| 805 | |
---|
| 806 | # We failed to get the sdrfs file with a lock. Display any messages. |
---|
| 807 | $cat $initErrors 1>&2 |
---|
| 808 | # Processing continues. |
---|
| 809 | printErrorMsg 437 $mmcmd |
---|
| 810 | |
---|
| 811 | # Now try the gpfsInit again, but this time do not ask for a lock. |
---|
| 812 | # If there is a backup server, and if it is available, |
---|
| 813 | # we should be able to get the latest GPFS system files from there. |
---|
| 814 | gpfsInitOutput=$(gpfsInit nolock 2>/dev/null) |
---|
| 815 | rc=$? |
---|
| 816 | if [[ $rc -ne 0 ]] |
---|
| 817 | then |
---|
| 818 | # We also failed to get the sdrfs file without locking. Now try |
---|
| 819 | # to obtain the most recent mmsdrfs file (i.e., the mmsdrfs file |
---|
| 820 | # with the highest gen number) among all the nodes in the cluster. |
---|
| 821 | # To do that, use the local mmsdrfs file as a starting point. |
---|
| 822 | getNodeList $REL_HOSTNAME_Field $HOME_CLUSTER $mmsdrfsFile > $allNodes |
---|
| 823 | gpfsInitOutput=$(gpfsInitFromNonServer $allNodes $mmsdrfsFile) |
---|
| 824 | rc=$? |
---|
| 825 | fi |
---|
| 826 | fi |
---|
| 827 | fi # end of if [[ $parg = LATEST ]] |
---|
| 828 | |
---|
| 829 | # Check whether we succeeded in obtaining the desired mmsdrfs file. |
---|
| 830 | if [[ $rc -ne 0 ]] |
---|
| 831 | then |
---|
| 832 | # Not enough nodes are available. |
---|
| 833 | printErrorMsg 378 $mmcmd |
---|
| 834 | cleanupAndExit |
---|
| 835 | fi |
---|
| 836 | |
---|
| 837 | # Parse the output from the init function. |
---|
| 838 | setGlobalVar $rc $gpfsInitOutput |
---|
| 839 | |
---|
| 840 | if [[ $MMMODE = single ]] |
---|
| 841 | then |
---|
| 842 | # Command currently not valid for cluster type single. |
---|
| 843 | printErrorMsg 376 $mmcmd single |
---|
| 844 | cleanupAndExit |
---|
| 845 | fi |
---|
| 846 | |
---|
| 847 | if [[ $MMMODE != lc ]] |
---|
| 848 | then |
---|
| 849 | # Unknown GPFS nodeset type |
---|
| 850 | printErrorMsg 338 $mmcmd $MMMODE |
---|
| 851 | cleanupAndExit |
---|
| 852 | fi |
---|
| 853 | |
---|
| 854 | |
---|
| 855 | ####################################################### |
---|
| 856 | # Determine the reliable hostnames of the new servers. |
---|
| 857 | ####################################################### |
---|
| 858 | if [[ -n $pflag && $parg != LATEST ]] |
---|
| 859 | then |
---|
| 860 | # Find the name of the primary server. |
---|
| 861 | newPrimaryServer=$(checkAndConvertNodeValue $parg $REL_HOSTNAME_Field) |
---|
| 862 | if [[ $? -ne 0 ]] |
---|
| 863 | then |
---|
| 864 | printErrorMsg 352 $mmcmd $parg |
---|
| 865 | cleanupAndExit |
---|
| 866 | fi |
---|
| 867 | else |
---|
| 868 | # If -p not specified, the primary server remains the same. |
---|
| 869 | newPrimaryServer=$primaryServer |
---|
| 870 | fi # end of if [[ -n $parg && $parg != LATEST ]] |
---|
| 871 | |
---|
| 872 | if [[ -n $sflag ]] |
---|
| 873 | then |
---|
| 874 | if [[ -n $sarg ]] |
---|
| 875 | then |
---|
| 876 | # Find the name of the secondary server. |
---|
| 877 | newBackupServer=$(checkAndConvertNodeValue $sarg $REL_HOSTNAME_Field) |
---|
| 878 | if [[ $? -ne 0 ]] |
---|
| 879 | then |
---|
| 880 | printErrorMsg 352 $mmcmd $sarg |
---|
| 881 | cleanupAndExit |
---|
| 882 | fi |
---|
| 883 | else |
---|
| 884 | # We are deleting the backup server (-s "" was specified). |
---|
| 885 | newBackupServer="" |
---|
| 886 | fi |
---|
| 887 | else |
---|
| 888 | # If -s not specified, the backup server remains the same. |
---|
| 889 | newBackupServer=$backupServer |
---|
| 890 | fi # end of if [[ -n $sarg ]] |
---|
| 891 | |
---|
| 892 | # Cross check the two server names. |
---|
| 893 | if [[ $newBackupServer = $newPrimaryServer ]] |
---|
| 894 | then |
---|
| 895 | # The same node was specified as primary and backup server. |
---|
| 896 | printErrorMsg 346 $mmcmd |
---|
| 897 | cleanupAndExit |
---|
| 898 | fi |
---|
| 899 | |
---|
| 900 | # Check whether anything needs to be done at all. |
---|
| 901 | [[ $newPrimaryServer = $primaryServer && \ |
---|
| 902 | $newBackupServer = $backupServer && \ |
---|
| 903 | $parg != LATEST ]] && \ |
---|
| 904 | cleanupAndExit 0 # Servers are already as desired. |
---|
| 905 | |
---|
| 906 | |
---|
| 907 | ################################################################# |
---|
| 908 | # Go through the current mmsdrfs file. Increment the generation |
---|
| 909 | # number and change the server names. Create a file with the |
---|
| 910 | # reliable hostnames of all nodes in the cluster. |
---|
| 911 | ################################################################# |
---|
| 912 | $rm -f $newsdrfs $allNodes $clientNodes |
---|
| 913 | IFS=":" # Change the field separator to ':'. |
---|
| 914 | exec 3<&- |
---|
| 915 | exec 3< $mmsdrfsFile |
---|
| 916 | while read -u3 sdrfsLine |
---|
| 917 | do |
---|
| 918 | # Parse the line. |
---|
| 919 | set -f ; set -A v -- - $sdrfsLine ; set +f |
---|
| 920 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
| 921 | |
---|
| 922 | # Change some of the fields depending on the type of line. |
---|
| 923 | case ${v[$LINE_TYPE_Field]} in |
---|
| 924 | |
---|
| 925 | $VERSION_LINE ) |
---|
| 926 | # Increment the generation number. |
---|
| 927 | newGenNumber=${v[$SDRFS_GENNUM_Field]}+1 |
---|
| 928 | v[$SDRFS_GENNUM_Field]=$newGenNumber |
---|
| 929 | v[$PRIMARY_SERVER_Field]=$newPrimaryServer |
---|
| 930 | v[$BACKUP_SERVER_Field]=$newBackupServer |
---|
| 931 | ;; |
---|
| 932 | |
---|
| 933 | $NODESET_HDR ) |
---|
| 934 | # If the daemon and the mmsdrserv tcp ports are shared, |
---|
| 935 | # it will be necessary to ensure that the daemon is down |
---|
| 936 | # on the old and new config server nodes. |
---|
| 937 | if [[ -z ${v[$GETOBJECT_PORT_Field]} || |
---|
| 938 | ${v[$TCP_PORT_Field]} = ${v[$GETOBJECT_PORT_Field]} ]] |
---|
| 939 | then |
---|
| 940 | daemonMustBeDown=yes |
---|
| 941 | fi |
---|
| 942 | ;; |
---|
| 943 | |
---|
| 944 | $MEMBER_NODE ) |
---|
| 945 | # If this is our node, save the reliable name. |
---|
| 946 | [[ ${v[$NODE_NUMBER_Field]} = $ourNodeNumber ]] && \ |
---|
| 947 | ourNodeName=${v[$REL_HOSTNAME_Field]} |
---|
| 948 | |
---|
| 949 | # All nodes will go in the allNodes file. |
---|
| 950 | print -- "${v[$REL_HOSTNAME_Field]}" >> $allNodes |
---|
| 951 | checkForErrors "writing to file $allNodes" $? |
---|
| 952 | |
---|
| 953 | # The server nodes and the local node will |
---|
| 954 | # not go in the clientNodes file. |
---|
| 955 | if [[ ${v[$REL_HOSTNAME_Field]} != $newPrimaryServer && |
---|
| 956 | ${v[$REL_HOSTNAME_Field]} != $newBackupServer && |
---|
| 957 | ${v[$REL_HOSTNAME_Field]} != $ourNodeName ]] |
---|
| 958 | then |
---|
| 959 | print -- "${v[$REL_HOSTNAME_Field]}" >> $clientNodes |
---|
| 960 | checkForErrors "writing to file $clientNodes" $? |
---|
| 961 | fi |
---|
| 962 | ;; |
---|
| 963 | |
---|
| 964 | * ) # Pass all other lines without change. |
---|
| 965 | ;; |
---|
| 966 | |
---|
| 967 | esac # end Change some of the fields |
---|
| 968 | |
---|
| 969 | # Build and write the line to the new mmsdrfs file. |
---|
| 970 | print_newLine >> $newsdrfs |
---|
| 971 | checkForErrors "writing to file $newsdrfs" $? |
---|
| 972 | |
---|
| 973 | IFS=":" # Change the separator back to ":" for the next iteration. |
---|
| 974 | |
---|
| 975 | done # end of while read -u3 sdrfsLine |
---|
| 976 | |
---|
| 977 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
| 978 | |
---|
| 979 | |
---|
| 980 | ####################################################################### |
---|
| 981 | # If the GPFS and mmsdrserv daemons share the same tcp port number, |
---|
| 982 | # it is necessary to ensure that the GPFS daemon is down on the old |
---|
| 983 | # and new configuration server nodes. Otherwise, the old server nodes |
---|
| 984 | # will continue giving (stale) Gpfs object information, while the new |
---|
| 985 | # servers will not be able to respond to requests because the GPFS |
---|
| 986 | # daemon cannot assume mmsdrserv duties if it is already running. |
---|
| 987 | ####################################################################### |
---|
| 988 | if [[ -n $daemonMustBeDown && $parg != LATEST ]] |
---|
| 989 | then |
---|
| 990 | # Put the old and new server names in a file. |
---|
| 991 | print -- "$primaryServer\n$backupServer\n" \ |
---|
| 992 | "$newPrimaryServer\n$newBackupServer" > $tmpNodes |
---|
| 993 | checkForErrors "writing to file $tmpNodes" $? |
---|
| 994 | |
---|
| 995 | # Eliminate duplicate names. |
---|
| 996 | $sort -u $tmpNodes -o $tmpNodes |
---|
| 997 | checkForErrors "sort $tmpNodes" $? |
---|
| 998 | |
---|
| 999 | # Verify the daemon is down; do not lock the Gpfs object. |
---|
| 1000 | printInfoMsg 453 |
---|
| 1001 | verifyDaemonInactive $tmpNodes |
---|
| 1002 | [[ $? -ne 0 ]] && cleanupAndExit |
---|
| 1003 | fi # end of if [[ -n $daemonMustBeDown ]] |
---|
| 1004 | |
---|
| 1005 | |
---|
| 1006 | ###################################################### |
---|
| 1007 | # First, put the new mmsdrfs file on the two servers. |
---|
| 1008 | # This must succeed no matter what. |
---|
| 1009 | ###################################################### |
---|
| 1010 | trap "" HUP INT QUIT KILL |
---|
| 1011 | gpfsObjectInfo=$(commitChanges \ |
---|
| 1012 | $nsId $nsId $gpfsObjectInfo $newGenNumber $newsdrfs \ |
---|
| 1013 | $newPrimaryServer FORCE $newBackupServer) |
---|
| 1014 | rc=$? |
---|
| 1015 | if [[ $rc -ne 0 ]] |
---|
| 1016 | then |
---|
| 1017 | # Cannot replace file in the sdr. |
---|
| 1018 | printErrorMsg 381 $mmcmd |
---|
| 1019 | |
---|
| 1020 | # The mmchcluster failed - get out. |
---|
| 1021 | # Tell the guy which nodes must be up and which command to run. |
---|
| 1022 | printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer" |
---|
| 1023 | printErrorMsg 344 $mmcmd "mmchcluster" |
---|
| 1024 | cleanupAndExit |
---|
| 1025 | fi |
---|
| 1026 | |
---|
| 1027 | # Restore interrupts. |
---|
| 1028 | trap localPosttrap HUP INT QUIT KILL |
---|
| 1029 | |
---|
| 1030 | |
---|
| 1031 | ################################################# |
---|
| 1032 | # Propagate the changes to the non-server nodes. |
---|
| 1033 | ################################################# |
---|
| 1034 | if [[ $ourNodeName != $newPrimaryServer && |
---|
| 1035 | $ourNodeName != $newBackupServer ]] |
---|
| 1036 | then |
---|
| 1037 | $cp $newsdrfs $mmsdrfsFile |
---|
| 1038 | checkForErrors "writing to file $mmsdrfsFile" $? |
---|
| 1039 | fi |
---|
| 1040 | |
---|
| 1041 | if [[ -s $clientNodes ]] |
---|
| 1042 | then |
---|
| 1043 | # Calculate the checksum of the new mmsdrfs file. |
---|
| 1044 | sumOutput=$($sum $newsdrfs) |
---|
| 1045 | checkForErrors "sum $newsdrfs" $? |
---|
| 1046 | set -f ; set -- $sumOutput ; set +f |
---|
| 1047 | newSum=$1 |
---|
| 1048 | |
---|
| 1049 | #esjxx See if this can be replaced with pushSdr |
---|
| 1050 | # Tell all client nodes to copy the file from us. |
---|
| 1051 | $mmcommon onall $clientNodes $unreachedNodes copyRemoteFile \ |
---|
| 1052 | $ourNodeName $mmsdrfsFile $mmsdrfsFile $newSum > $tmpfile 2>&1 |
---|
| 1053 | rc=$? |
---|
| 1054 | |
---|
| 1055 | # Make a list of the nodes that were successfully updated. For each |
---|
| 1056 | # such node there will be a line in tmpfile that looks like this: |
---|
| 1057 | # nodename: copyRemoteFile:0 |
---|
| 1058 | updatedNodes=$($awk -F: ' { \ |
---|
| 1059 | if (($2 ~ "copyRemoteFile") && ($3 == "0")) { \ |
---|
| 1060 | { print $1 } \ |
---|
| 1061 | } \ |
---|
| 1062 | } ' $tmpfile) |
---|
| 1063 | checkForErrors awk $? |
---|
| 1064 | |
---|
| 1065 | # Determine the nodes that did not get the new data. |
---|
| 1066 | exec 3<&- |
---|
| 1067 | exec 3< $clientNodes |
---|
| 1068 | while read -u3 nodeName |
---|
| 1069 | do |
---|
| 1070 | for goodNode in $updatedNodes |
---|
| 1071 | do |
---|
| 1072 | [[ $nodeName = $goodNode ]] && \ |
---|
| 1073 | break |
---|
| 1074 | done |
---|
| 1075 | |
---|
| 1076 | [[ $nodeName != $goodNode ]] && \ |
---|
| 1077 | failedNodes="${failedNodes}\n\t${nodeName}" |
---|
| 1078 | done |
---|
| 1079 | |
---|
| 1080 | # If any nodes failed, put out as much information as possible. |
---|
| 1081 | if [[ -n $failedNodes ]] |
---|
| 1082 | then |
---|
| 1083 | # Collect error messages, if any, in file tmpfile2. |
---|
| 1084 | $grep -v "copyRemoteFile:" $tmpfile > $tmpfile2 |
---|
| 1085 | [[ -s $tmpfile2 ]] && \ |
---|
| 1086 | $cat $tmpfile2 1>&2 |
---|
| 1087 | |
---|
| 1088 | # Tell the user which nodes failed. |
---|
| 1089 | printErrorMsg 377 $mmcmd "$failedNodes" |
---|
| 1090 | # Tell the guy which nodes must be up and which command to run. |
---|
| 1091 | printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer" |
---|
| 1092 | printErrorMsg 344 $mmcmd "mmchcluster -p LATEST" |
---|
| 1093 | cleanupAndExit |
---|
| 1094 | fi # end if [[ -n $failedNodes ]] |
---|
| 1095 | |
---|
| 1096 | fi # end if [[ ! -s $clientNodes ]] |
---|
| 1097 | |
---|
| 1098 | |
---|
| 1099 | ############################## |
---|
| 1100 | # Unlock the sdr. |
---|
| 1101 | ############################## |
---|
| 1102 | [[ $sdrLocked = yes ]] && \ |
---|
| 1103 | freeLockOnServer $primaryServer $ourNodeNumber > /dev/null |
---|
| 1104 | sdrLocked=no |
---|
| 1105 | trap posttrap HUP INT QUIT KILL |
---|
| 1106 | |
---|
| 1107 | # Issue "command was successful" message. |
---|
| 1108 | printErrorMsg 272 $mmcmd |
---|
| 1109 | cleanupAndExit 0 |
---|
| 1110 | |
---|