| 1 | #!/bin/ksh | 
|---|
| 2 | # IBM_PROLOG_BEGIN_TAG  | 
|---|
| 3 | # This is an automatically generated prolog.  | 
|---|
| 4 | #   | 
|---|
| 5 | #   | 
|---|
| 6 | #   | 
|---|
| 7 | # Licensed Materials - Property of IBM  | 
|---|
| 8 | #   | 
|---|
| 9 | # (C) COPYRIGHT International Business Machines Corp. 2000,2007  | 
|---|
| 10 | # All Rights Reserved  | 
|---|
| 11 | #   | 
|---|
| 12 | # US Government Users Restricted Rights - Use, duplication or  | 
|---|
| 13 | # disclosure restricted by GSA ADP Schedule Contract with IBM Corp.  | 
|---|
| 14 | #   | 
|---|
| 15 | # IBM_PROLOG_END_TAG  | 
|---|
| 16 | # @(#)11 1.59.1.3 src/avs/fs/mmfs/ts/admin/mmchcluster.sh, mmfs, avs_rgpfs24, rgpfs24s009a 12/19/06 13:10:44 | 
|---|
| 17 | ############################################################################### | 
|---|
| 18 | # | 
|---|
| 19 | # Usage: | 
|---|
| 20 | #   mmchcluster {[-p PrimaryServer] [-s SecondaryServer]} | 
|---|
| 21 | # or | 
|---|
| 22 | #   mmchcluster -p LATEST | 
|---|
| 23 | # or | 
|---|
| 24 | #   mmchcluster {[-r RemoteShellCommand] [-R RemoteFileCopyCommand]} | 
|---|
| 25 | # or | 
|---|
| 26 | #   mmchcluster -C ClusterName | 
|---|
| 27 | # or | 
|---|
| 28 | #   mmchcluster -N {NodeDesc[,NodeDesc...] | NodeFile} | 
|---|
| 29 | # | 
|---|
| 30 | # where: | 
|---|
| 31 | # | 
|---|
| 32 | #   -p PrimaryServer    specifies the node to be used as the primary server | 
|---|
| 33 | #                       of the GPFS sdrfs data for this cluster. | 
|---|
| 34 | # | 
|---|
| 35 | #      LATEST           requests a check to be made that all currently | 
|---|
| 36 | #                       available nodes point to the correct primary and | 
|---|
| 37 | #                       backup server. | 
|---|
| 38 | # | 
|---|
| 39 | #   -s SecondaryServer  specifies the node to be used as the backup server | 
|---|
| 40 | #                       of the GPFS sdrfs data for this cluster (optional). | 
|---|
| 41 | #                       To remove a backup server, specify  -s "". | 
|---|
| 42 | # | 
|---|
| 43 | #   -r RemoteShellCommand   specifies the fully qualified pathname for | 
|---|
| 44 | #                       the remote shell program to be used by GPFS. | 
|---|
| 45 | #                       The default is /usr/bin/rsh. | 
|---|
| 46 | # | 
|---|
| 47 | #   -R RemoteFileCopyCommand  specifies the fully qualified pathname for | 
|---|
| 48 | #                       the remote file copy program to be used by GPFS. | 
|---|
| 49 | #                       The default is /usr/bin/rcp. | 
|---|
| 50 | # | 
|---|
| 51 | #   -C ClusterName      specifies a new name for the cluster.  If the name | 
|---|
| 52 | #                       contains dots it is assumed to be a fully qualified | 
|---|
| 53 | #                       domain name.  Otherwise, the domain will default | 
|---|
| 54 | #                       to the domain of the primary configuration server. | 
|---|
| 55 | # | 
|---|
| 56 | #   -N NodeDesc,NodeDesc,...  specifies a comma-separated list of node | 
|---|
| 57 | #                             descriptors that specify the admin node | 
|---|
| 58 | #                             interfaces to be used in the cluster. | 
|---|
| 59 | #                       The node descriptors have the format: | 
|---|
| 60 | #                         daemonNodeName:nodeRoles:adminNodeName: | 
|---|
| 61 | #                       The nodeRoles field is currently just a place-holder | 
|---|
| 62 | #                       and is ignored. | 
|---|
| 63 | # | 
|---|
| 64 | #   -N NodeFile         specifies a file of node descriptors that specify | 
|---|
| 65 | #                       the admin node interfaces to be used in the cluster. | 
|---|
| 66 | #                       The lines in the input file have the format: | 
|---|
| 67 | #                         daemonNodeName:nodeRoles:adminNodeName: | 
|---|
| 68 | #                       The nodeRoles field is currently just a place-holder | 
|---|
| 69 | #                       and is ignored. | 
|---|
| 70 | # | 
|---|
| 71 | # Note:  When used with the -p or -s options, this command will most | 
|---|
| 72 | #        likely be needed when the current primary server is not available | 
|---|
| 73 | #        and it will be impossible to obtain the sdr lock and protect | 
|---|
| 74 | #        against concurrent execution of some other mm command. | 
|---|
| 75 | #        Under such conditions, the user must assure that no other mm | 
|---|
| 76 | #        command is run until the completion of the mmchcluster command | 
|---|
| 77 | #        and that as many of the remaining nodes as possible are available. | 
|---|
| 78 | # | 
|---|
| 79 | ############################################################################### | 
|---|
| 80 |  | 
|---|
| 81 | # Include global declarations and service routines. | 
|---|
| 82 | . /usr/lpp/mmfs/bin/mmglobfuncs | 
|---|
| 83 | . /usr/lpp/mmfs/bin/mmsdrfsdef | 
|---|
| 84 |  | 
|---|
| 85 | sourceFile="mmchcluster.sh" | 
|---|
| 86 | [[ -n $DEBUG || -n $DEBUGmmchcluster ]] && set -x | 
|---|
| 87 | $mmTRACE_ENTER "$*" | 
|---|
| 88 |  | 
|---|
| 89 |  | 
|---|
| 90 | # Local work files.  Names should be of the form: | 
|---|
| 91 | #   fn=${tmpDir}fn.${mmcmd}.$$ | 
|---|
| 92 | allNodes=${tmpDir}allNodes.${mmcmd}.$$ | 
|---|
| 93 | clientNodes=${tmpDir}clientNodes.${mmcmd}.$$ | 
|---|
| 94 | inputNodes=${tmpDir}inputNodes.${mmcmd}.$$ | 
|---|
| 95 | processedNodes=${tmpDir}processedNodes.${mmcmd}.$$ | 
|---|
| 96 | initErrors=${tmpDir}initErrors.${mmcmd}.$$ | 
|---|
| 97 | # Note: Do not include initErrors in LOCAL_FILES yet; we'll do it later. | 
|---|
| 98 |  | 
|---|
| 99 | LOCAL_FILES=" $allNodes $clientNodes $inputNodes $processedNodes " | 
|---|
| 100 |  | 
|---|
| 101 |  | 
|---|
| 102 | # Local declarations | 
|---|
| 103 |  | 
|---|
| 104 | usageMsg=359 | 
|---|
| 105 | newNodeNumbers="" | 
|---|
| 106 | backupServer="" | 
|---|
| 107 | rshPath="" | 
|---|
| 108 | rcpPath="" | 
|---|
| 109 | integer nodeCount | 
|---|
| 110 | integer n | 
|---|
| 111 | rc=0 | 
|---|
| 112 |  | 
|---|
| 113 | Cflag="" | 
|---|
| 114 | Nflag="" | 
|---|
| 115 | pflag="" | 
|---|
| 116 | rflag="" | 
|---|
| 117 | Rflag="" | 
|---|
| 118 | sflag="" | 
|---|
| 119 | Carg="" | 
|---|
| 120 | parg="" | 
|---|
| 121 | rarg="" | 
|---|
| 122 | Rarg="" | 
|---|
| 123 | sarg="" | 
|---|
| 124 | otherOpt="" | 
|---|
| 125 |  | 
|---|
| 126 |  | 
|---|
| 127 | # Local functions | 
|---|
| 128 |  | 
|---|
| 129 |  | 
|---|
| 130 | ########################################################################## | 
|---|
| 131 | # | 
|---|
| 132 | # Function:  Specify the admin network for the GPFS cluster. | 
|---|
| 133 | # | 
|---|
| 134 | # Input:    $1 - file or list of node descriptors containing the | 
|---|
| 135 | #                adapter information as follows: | 
|---|
| 136 | #                  daemonNodeName:nodeRoles:adminNodeName: | 
|---|
| 137 | # | 
|---|
| 138 | # Returns:   0 - no errors encountered | 
|---|
| 139 | #            non-zero - unexpected error | 
|---|
| 140 | # | 
|---|
| 141 | ########################################################################## | 
|---|
| 142 | function specifyAdminNetwork  # <networkInfo> | 
|---|
| 143 | { | 
|---|
| 144 |   typeset sourceFile="mmchcluster.sh" | 
|---|
| 145 |   [[ -n $DEBUG || -n $DEBUGspecifyAdminNetwork ]] && set -x | 
|---|
| 146 |   $mmTRACE_ENTER "$*" | 
|---|
| 147 |   typeset networkInfo="$1" | 
|---|
| 148 |  | 
|---|
| 149 |   typeset failedNodes sdrfsLine mmcommonOutput | 
|---|
| 150 |   typeset nodeLine nodeName nodeName2 nodeStatus | 
|---|
| 151 | # typeset nodeRoles | 
|---|
| 152 |   typeset hostResult nodeNumber adminNodeName adminIpa | 
|---|
| 153 |   typeset nodeError newPrimaryName newBackupName commitOptions | 
|---|
| 154 |  | 
|---|
| 155 |   typeset rc=0 | 
|---|
| 156 |   typeset changeMade="" | 
|---|
| 157 |   typeset fatalError="" | 
|---|
| 158 |   typeset sharedSdrservPort="" | 
|---|
| 159 |  | 
|---|
| 160 |   # The input parameter may be either a list or a file.  Which is it? | 
|---|
| 161 |   if [[ -f $networkInfo ]] | 
|---|
| 162 |   then | 
|---|
| 163 |     # It is a file; verify its existence and create our own copy. | 
|---|
| 164 |     checkUserFile $networkInfo $inputNodes | 
|---|
| 165 |     [[ $? -ne 0 ]] && cleanupAndExit | 
|---|
| 166 |   else | 
|---|
| 167 |     # It is not a file, so it must be a list. | 
|---|
| 168 |     # Convert the input node list into a file. | 
|---|
| 169 |     $rm -f $inputNodes | 
|---|
| 170 |     IFS=',' | 
|---|
| 171 |     for nodeDesc in $networkInfo | 
|---|
| 172 |     do | 
|---|
| 173 |       print -- "$nodeDesc" >> $inputNodes | 
|---|
| 174 |       checkForErrors "writing to $inputNodes" $? | 
|---|
| 175 |     done | 
|---|
| 176 |     IFS="$IFS_sv"    # Restore the default IFS setting. | 
|---|
| 177 |   fi | 
|---|
| 178 |  | 
|---|
| 179 |   # Check the input data for correctness. | 
|---|
| 180 |   # We check all the records rather than stop on the first error. | 
|---|
| 181 |   $rm -f $processedNodes | 
|---|
| 182 |   $touch $processedNodes     # Ensure the tmp file exists even if empty. | 
|---|
| 183 |   IFS=":"                    # Change the field separator to ':'. | 
|---|
| 184 |   exec 3<&- | 
|---|
| 185 |   exec 3< $inputNodes | 
|---|
| 186 |   while read -u3 nodeLine | 
|---|
| 187 |   do | 
|---|
| 188 |     # Parse the line. | 
|---|
| 189 |     set -f ; set -- $nodeLine ; set +f | 
|---|
| 190 |     nodeName=$1 | 
|---|
| 191 | #   nodeRoles=$2 | 
|---|
| 192 |     nodeName2=$3 | 
|---|
| 193 |     IFS="$IFS_sv"    # Restore the default IFS setting. | 
|---|
| 194 |  | 
|---|
| 195 |     # Make sure neither node name is specified more than once. | 
|---|
| 196 |     $grep -qw $nodeName $processedNodes > /dev/null 2>&1 | 
|---|
| 197 |     if [[ $? -eq 0 ]] | 
|---|
| 198 |     then | 
|---|
| 199 |       # The node name is specified twice. | 
|---|
| 200 |       printErrorMsg 347 $mmcmd $nodeName | 
|---|
| 201 |       fatalError=yes | 
|---|
| 202 |     fi | 
|---|
| 203 |  | 
|---|
| 204 |     # Check the admin node name if it was specified. | 
|---|
| 205 |     if [[ -n $nodeName2 && $nodeName2 != $nodeName ]] | 
|---|
| 206 |     then | 
|---|
| 207 |       $grep -qw $nodeName2 $processedNodes > /dev/null 2>&1 | 
|---|
| 208 |       if [[ $? -eq 0 ]] | 
|---|
| 209 |       then | 
|---|
| 210 |         # The node is specified twice. | 
|---|
| 211 |         printErrorMsg 347 $mmcmd $nodeName2 | 
|---|
| 212 |         fatalError=yes | 
|---|
| 213 |       fi | 
|---|
| 214 |     fi  # end of if [[ -n $nodeName2 && $nodeName2 != $nodeName ]] | 
|---|
| 215 |  | 
|---|
| 216 |     # Add the node names to the list of processed nodes. | 
|---|
| 217 |     print -- "${nodeName}:${nodeName2}" >> $processedNodes | 
|---|
| 218 |     checkForErrors "Writing to file $processedNodes" $? | 
|---|
| 219 |  | 
|---|
| 220 |     IFS=":"  # Change the separator back to ":" for the next iteration. | 
|---|
| 221 |  | 
|---|
| 222 |   done  # end of while read -u3 nodeLine | 
|---|
| 223 |  | 
|---|
| 224 |   IFS="$IFS_sv"  # Restore the default IFS settings. | 
|---|
| 225 |  | 
|---|
| 226 |   # Return to the caller if we encountered an error. | 
|---|
| 227 |   [[ -n $fatalError ]] && return 1 | 
|---|
| 228 |  | 
|---|
| 229 |   # Ensure that the local copy of the mmsdrfs is up-to-date. | 
|---|
| 230 |   # Set up trap exception handling and obtain the lock. | 
|---|
| 231 |   trap pretrap HUP INT QUIT KILL | 
|---|
| 232 |   gpfsInitOutput=$(gpfsInit $lockId) | 
|---|
| 233 |   setGlobalVar $? $gpfsInitOutput | 
|---|
| 234 |  | 
|---|
| 235 |   # Stop here if the admin network support has not been activated yet. | 
|---|
| 236 |   if [[ $sdrfsFormatLevel -eq 0 ]] | 
|---|
| 237 |   then | 
|---|
| 238 |     print -u2 "$mmcmd:  The separate administration network support has not been enabled yet." | 
|---|
| 239 |     print -u2 "    Run \"mmchconfig release=LATEST\" to activate the new function." | 
|---|
| 240 |     cleanupAndExit | 
|---|
| 241 |   fi | 
|---|
| 242 |  | 
|---|
| 243 |   # Determine the lookup order for resolving host names. | 
|---|
| 244 |   [[ $osName != AIX ]] && resolveOrder=$(setHostResolveOrder) | 
|---|
| 245 |  | 
|---|
| 246 |   # Go through the current mmsdrfs file.  Increment the generation | 
|---|
| 247 |   # number and build the node name list that will be needed later. | 
|---|
| 248 |   # Remove all admin network related information. | 
|---|
| 249 |   $rm -f $newsdrfs $nodefile | 
|---|
| 250 |   newPrimaryName="" | 
|---|
| 251 |   newBackupName="" | 
|---|
| 252 |   IFS=":"                    # Change the field separator to ':'. | 
|---|
| 253 |   exec 3<&- | 
|---|
| 254 |   exec 3< $mmsdrfsFile | 
|---|
| 255 |   while read -u3 sdrfsLine | 
|---|
| 256 |   do | 
|---|
| 257 |     # Parse the line. | 
|---|
| 258 |     set -f ; set -A v -- - $sdrfsLine ; set +f | 
|---|
| 259 |  | 
|---|
| 260 |     IFS="$IFS_sv"      # Restore the default IFS settings. | 
|---|
| 261 |     printLine=true     # Assume the line will be printed. | 
|---|
| 262 |  | 
|---|
| 263 |     case ${v[$LINE_TYPE_Field]} in | 
|---|
| 264 |  | 
|---|
| 265 |       $VERSION_LINE )  # This is the global header line. | 
|---|
| 266 |         # Save the version line for updating later. | 
|---|
| 267 |         versionLine=$(print_newLine) | 
|---|
| 268 |         printLine=false | 
|---|
| 269 |         ;; | 
|---|
| 270 |  | 
|---|
| 271 |       $NODESET_HDR ) | 
|---|
| 272 |         # If the daemon and the mmsdrserv tcp ports are shared, | 
|---|
| 273 |         # it will be necessary to ensure that the daemon is down | 
|---|
| 274 |         # on the config server nodes if there names will change. | 
|---|
| 275 |         if [[ -z ${v[$GETOBJECT_PORT_Field]} || | 
|---|
| 276 |               ${v[$TCP_PORT_Field]} = ${v[$GETOBJECT_PORT_Field]} ]] | 
|---|
| 277 |         then | 
|---|
| 278 |           sharedSdrservPort=yes | 
|---|
| 279 |         fi | 
|---|
| 280 |         ;; | 
|---|
| 281 |  | 
|---|
| 282 |       $MEMBER_NODE )   # This line describes a node. | 
|---|
| 283 |         # Add the reliable node name to nodefile. | 
|---|
| 284 |         print -- "${v[$REL_HOSTNAME_Field]}" >> $nodefile | 
|---|
| 285 |         checkForErrors "writing to file $nodefile" $? | 
|---|
| 286 |  | 
|---|
| 287 |         # Reset the node error flag. | 
|---|
| 288 |         nodeError="" | 
|---|
| 289 |  | 
|---|
| 290 |         # Obtain the data for this node from the node file. | 
|---|
| 291 |         nodeLine=$($awk -F: '                        \ | 
|---|
| 292 |           $1 == "'${v[$DAEMON_NODENAME_Field]}'" ||  \ | 
|---|
| 293 |           $1 == "'${v[$REL_HOSTNAME_Field]}'"    ||  \ | 
|---|
| 294 |           $1 == "'${v[$NODE_NAME_Field]}'"       ||  \ | 
|---|
| 295 |           $1 == "'${v[$ADMIN_SHORTNAME_Field]}'" ||  \ | 
|---|
| 296 |           $1 == "'${v[$NODE_NUMBER_Field]}'"     ||  \ | 
|---|
| 297 |           $1 == "'${v[$IPA_Field]}'" {               \ | 
|---|
| 298 |             { print $0 }                             \ | 
|---|
| 299 |             { exit }                                 \ | 
|---|
| 300 |           }                                          \ | 
|---|
| 301 |         ' $inputNodes) | 
|---|
| 302 |  | 
|---|
| 303 |         if [[ -n $nodeLine ]] | 
|---|
| 304 |         then | 
|---|
| 305 |           # We found data for this node.  Parse the input. | 
|---|
| 306 |           IFS=":"              # Change the field separator to ':'. | 
|---|
| 307 |           set -f ; set -- $nodeLine ; set +f | 
|---|
| 308 |           nodeName=$1 | 
|---|
| 309 |           nodeName2=$3 | 
|---|
| 310 |           IFS="$IFS_sv"        # Restore the default IFS setting. | 
|---|
| 311 |  | 
|---|
| 312 |           # Determine the daemon node name. | 
|---|
| 313 |           if [[ -n ${v[$DAEMON_NODENAME_Field]} ]] | 
|---|
| 314 |           then | 
|---|
| 315 |             daemonNodeName=${v[$DAEMON_NODENAME_Field]} | 
|---|
| 316 |           else | 
|---|
| 317 |             daemonNodeName=${v[$REL_HOSTNAME_Field]} | 
|---|
| 318 |           fi | 
|---|
| 319 |  | 
|---|
| 320 |           # Did the user reset or specify the admin node name? | 
|---|
| 321 |           if [[ -z $nodeName2 ]] | 
|---|
| 322 |           then | 
|---|
| 323 |             # The admin node name was null, indicating "reset"; | 
|---|
| 324 |             # set the admin node name to the daemon node name value. | 
|---|
| 325 |             adminNodeName=$daemonNodeName | 
|---|
| 326 |             adminShortName=${v[$NODE_NAME_Field]} | 
|---|
| 327 |  | 
|---|
| 328 |           else | 
|---|
| 329 |             # The admin node name was not null, indicating "specify"; | 
|---|
| 330 |             # Determine the IP address for the specified admin node name. | 
|---|
| 331 |             hostResult=$($host $nodeName2) | 
|---|
| 332 |             set -f ; set -- $hostResult ; set +f | 
|---|
| 333 |             adminNodeName=$1 | 
|---|
| 334 |             adminShortName=${1%% *|.*} # Exclude everything after the first dot. | 
|---|
| 335 |             adminIpa=${3%%,*} | 
|---|
| 336 |  | 
|---|
| 337 |             # Check that the admin node name has a valid IP address. | 
|---|
| 338 |             if [[ -z $adminIpa ]] | 
|---|
| 339 |             then | 
|---|
| 340 |               # An invalid node name was specified. | 
|---|
| 341 |               printErrorMsg 54 $mmcmd $nodeName2 | 
|---|
| 342 |               fatalError=yes | 
|---|
| 343 |               break | 
|---|
| 344 |             fi | 
|---|
| 345 |  | 
|---|
| 346 |             # Invoke the checkAdapter function to ensure that | 
|---|
| 347 |             # the specified adapter interface exists on the node. | 
|---|
| 348 |             mmcommonOutput=$($mmcommon on1 ${v[$REL_HOSTNAME_Field]}  \ | 
|---|
| 349 |                checkAdapter $adminIpa 2> $errMsg) | 
|---|
| 350 |             rc=$? | 
|---|
| 351 |             set -f ; set -- $mmcommonOutput ; set +f | 
|---|
| 352 |             nodeStatus=$1 | 
|---|
| 353 |             if [[ $rc != 0 || $nodeStatus != success ]] | 
|---|
| 354 |             then | 
|---|
| 355 |               # The checkAdapter call failed. | 
|---|
| 356 |               # We will not define a new admin node name for this node | 
|---|
| 357 |               # but we will continue to process the remaining nodes. | 
|---|
| 358 |               # Tell the world what went wrong with this node. | 
|---|
| 359 |               if [[ $nodeStatus = ipa_alias ]] | 
|---|
| 360 |               then | 
|---|
| 361 |                 # IP address aliasing is not supported. | 
|---|
| 362 |                 printErrorMsg 476 $mmcmd $nodeName2 | 
|---|
| 363 |               elif [[ $nodeStatus = ipa_missing ]] | 
|---|
| 364 |               then | 
|---|
| 365 |                 # The admin IP address is not known on the node. | 
|---|
| 366 |                 printErrorMsg 154 $mmcmd $nodeName2 ${v[$REL_HOSTNAME_Field]} | 
|---|
| 367 |               elif [[ $rc = $MM_HostDown || $rc = $MM_ConnectTimeout ]] | 
|---|
| 368 |               then | 
|---|
| 369 |                 # The node cannot be reached. | 
|---|
| 370 |                 printErrorMsg 340 $mmcmd ${v[$REL_HOSTNAME_Field]} | 
|---|
| 371 |               else | 
|---|
| 372 |                 # Unexpected error.  Display all possible error messages. | 
|---|
| 373 |                 [[ -s $errMsg ]] && $cat $errMsg 1>&2 | 
|---|
| 374 |                 [[ $rc -eq 0 ]] && rc=1 | 
|---|
| 375 |                 checkForErrors "checkAdapter ${v[$REL_HOSTNAME_Field]}" $rc | 
|---|
| 376 |               fi | 
|---|
| 377 |  | 
|---|
| 378 |               # Append the node name to the list of failed nodes and | 
|---|
| 379 |               # set a flag to indicate the node name did not check out. | 
|---|
| 380 |               failedNodes="${failedNodes}\n\t${nodeName}" | 
|---|
| 381 |               nodeError=yes | 
|---|
| 382 |  | 
|---|
| 383 |             fi  # end of if [[ $rc != 0 || $nodeStatus != success ]] | 
|---|
| 384 |  | 
|---|
| 385 |           fi  # end of if [[ -z $nodeName2 ]] | 
|---|
| 386 |  | 
|---|
| 387 |           # Update the member line if there was no error. | 
|---|
| 388 |           if [[ -z $nodeError ]] | 
|---|
| 389 |           then | 
|---|
| 390 |             # Remember the new primary or backup server name for updating | 
|---|
| 391 |             # the version line later if this is one of those servers. | 
|---|
| 392 |             [[ ${v[$REL_HOSTNAME_Field]} = $primaryServer ]] &&  \ | 
|---|
| 393 |               newPrimaryName=$adminNodeName | 
|---|
| 394 |             [[ ${v[$REL_HOSTNAME_Field]} = $backupServer ]]  &&  \ | 
|---|
| 395 |               newBackupName=$adminNodeName | 
|---|
| 396 |  | 
|---|
| 397 |             # Things checked out ok.  Set the node name fields. | 
|---|
| 398 |             v[$DAEMON_NODENAME_Field]=$daemonNodeName | 
|---|
| 399 |             v[$REL_HOSTNAME_Field]=$adminNodeName | 
|---|
| 400 |             v[$ADMIN_SHORTNAME_Field]=$adminShortName | 
|---|
| 401 |             changeMade=yes | 
|---|
| 402 |           fi | 
|---|
| 403 |  | 
|---|
| 404 |           $rm -f $errMsg | 
|---|
| 405 |  | 
|---|
| 406 |         fi  # end of if [[ -n $nodeLine ]] | 
|---|
| 407 |         ;; | 
|---|
| 408 |  | 
|---|
| 409 |       * )  # We are not interested in any other lines. | 
|---|
| 410 |         ;; | 
|---|
| 411 |  | 
|---|
| 412 |     esac  # end of case ${v[$LINE_TYPE_Field]} in | 
|---|
| 413 |  | 
|---|
| 414 |     # Unless suppressed, write the line to the new mmsdrfs file. | 
|---|
| 415 |     if [[ $printLine = true ]] | 
|---|
| 416 |     then | 
|---|
| 417 |       print_newLine >> $newsdrfs | 
|---|
| 418 |       checkForErrors "writing to file $newsdrfs" $? | 
|---|
| 419 |     fi | 
|---|
| 420 |  | 
|---|
| 421 |     IFS=":"  # Change the separator back to ":" for the next iteration. | 
|---|
| 422 |  | 
|---|
| 423 |   done  # end of while read -u3 | 
|---|
| 424 |  | 
|---|
| 425 |   IFS="$IFS_sv"  # Restore the default IFS settings. | 
|---|
| 426 |  | 
|---|
| 427 |   # Go through the mmsdrfs file to update the NSD servers admin node names. | 
|---|
| 428 |   $rm -f $tmpsdrfs | 
|---|
| 429 |   IFS=":" | 
|---|
| 430 |   exec 3<&- | 
|---|
| 431 |   exec 3< $newsdrfs | 
|---|
| 432 |   while read -u3 sdrfsLine | 
|---|
| 433 |   do | 
|---|
| 434 |     # Parse the line. | 
|---|
| 435 |     set -f ; set -A v -- - $sdrfsLine ; set +f | 
|---|
| 436 |     IFS="$IFS_sv" | 
|---|
| 437 |  | 
|---|
| 438 |     # Change some of the fields depending on the type of line. | 
|---|
| 439 |     case ${v[$LINE_TYPE_Field]} in | 
|---|
| 440 |  | 
|---|
| 441 |       $SG_DISKS )  # This is the line for some disk. | 
|---|
| 442 |  | 
|---|
| 443 |         # If this disk is an NSD with a valid PVID value, | 
|---|
| 444 |         # make sure the daemon nsd server names are recorded. | 
|---|
| 445 |         if [[ ${v[$DISK_TYPE_Field]} = nsd && -n ${v[$PVID_Field]} ]] | 
|---|
| 446 |         then | 
|---|
| 447 |           # If a server node was specified, check that it is valid and | 
|---|
| 448 |           # convert it to get the potentially new admin adapter name. | 
|---|
| 449 |           # We determine whether a server was specified by checking for an | 
|---|
| 450 |           # admin nsd server name, but we do not use that name for finding | 
|---|
| 451 |           # the node information, since the old admin node name may | 
|---|
| 452 |           # no longer exist as a result of the update we just did. | 
|---|
| 453 |           # We use the daemon node name to find the node instead, | 
|---|
| 454 |           # since mmchcluster -N does not change daemon node names. | 
|---|
| 455 |           if [[ -n ${v[$NSD_PRIMARY_NODE_Field]} ]] | 
|---|
| 456 |           then | 
|---|
| 457 |             # If no daemon node name has yet been recorded for the | 
|---|
| 458 |             # primary NSD server, determine and store it now. | 
|---|
| 459 |             server=${v[$DAEMON_NSD_PRIMARY_Field]} | 
|---|
| 460 |             if [[ -z $server ]] | 
|---|
| 461 |             then | 
|---|
| 462 |               server=$(checkAndConvertNodeValue  \ | 
|---|
| 463 |                  ${v[$NSD_PRIMARY_NODE_Field]} $DAEMON_NODENAME_Field) | 
|---|
| 464 |               checkForErrors "checkAndConvertNodeValue" $? | 
|---|
| 465 |               v[$DAEMON_NSD_PRIMARY_Field]=$server | 
|---|
| 466 |             fi | 
|---|
| 467 |             # Use the primary server's daemon node name to obtain | 
|---|
| 468 |             # the primary server's admin node name. | 
|---|
| 469 |             v[$NSD_PRIMARY_NODE_Field]=$(checkAndConvertNodeValue  \ | 
|---|
| 470 |                $server $REL_HOSTNAME_Field $newsdrfs) | 
|---|
| 471 |             checkForErrors "checkAndConvertNodeValue $server" $? | 
|---|
| 472 |           fi | 
|---|
| 473 |           if [[ -n ${v[$NSD_BACKUP_NODE_Field]} ]] | 
|---|
| 474 |           then | 
|---|
| 475 |             # If no daemon node name has yet been recorded for the | 
|---|
| 476 |             # backup NSD server, determine and store it now. | 
|---|
| 477 |             backup=${v[$DAEMON_NSD_BACKUP_Field]} | 
|---|
| 478 |             if [[ -z $backup ]] | 
|---|
| 479 |             then | 
|---|
| 480 |               backup=$(checkAndConvertNodeValue  \ | 
|---|
| 481 |                  ${v[$NSD_BACKUP_NODE_Field]} $DAEMON_NODENAME_Field) | 
|---|
| 482 |               checkForErrors "checkAndConvertNodeValue" $? | 
|---|
| 483 |               v[$DAEMON_NSD_BACKUP_Field]=$backup | 
|---|
| 484 |             fi | 
|---|
| 485 |             # Use the backup server's daemon node name to obtain | 
|---|
| 486 |             # the backup server's admin node name. | 
|---|
| 487 |             v[$NSD_BACKUP_NODE_Field]=$(checkAndConvertNodeValue  \ | 
|---|
| 488 |                $backup $REL_HOSTNAME_Field $newsdrfs) | 
|---|
| 489 |             checkForErrors "checkAndConvertNodeValue $backup" $? | 
|---|
| 490 |           fi | 
|---|
| 491 |         fi  # end of if (v[$DISK_TYPE_Field] == "nsd" && -n v[$PVID_Field]) | 
|---|
| 492 |         ;; | 
|---|
| 493 |  | 
|---|
| 494 |       * )  # We are not interested in any other lines. | 
|---|
| 495 |         ;; | 
|---|
| 496 |  | 
|---|
| 497 |     esac  # end Change some of the fields | 
|---|
| 498 |  | 
|---|
| 499 |     # Build and write the line to the temp version of the mmsdrfs file. | 
|---|
| 500 |     print_newLine >> $tmpsdrfs | 
|---|
| 501 |     checkForErrors "writing to file $tmpsdrfs" $? | 
|---|
| 502 |  | 
|---|
| 503 |     IFS=":"  # Change the separator back to ":" for the next iteration. | 
|---|
| 504 |  | 
|---|
| 505 |   done  # end while read -u3 sdrfsLine | 
|---|
| 506 |  | 
|---|
| 507 |   IFS="$IFS_sv"  # Restore the default IFS settings. | 
|---|
| 508 |  | 
|---|
| 509 |   # If a fatal error occurred, or if no changes were made, | 
|---|
| 510 |   # release the lock, report any failed nodes, and return. | 
|---|
| 511 |   if [[ -n $fatalError || -z $changeMade ]] | 
|---|
| 512 |   then | 
|---|
| 513 |     freeLockOnServer $primaryServer $ourNodeNumber >/dev/null | 
|---|
| 514 |     if [[ -n $failedNodes ]] | 
|---|
| 515 |     then | 
|---|
| 516 |       # Administrative node names were not defined for nodes ... | 
|---|
| 517 |       printErrorMsg 174 $mmcmd $failedNodes | 
|---|
| 518 |     fi | 
|---|
| 519 |     if [[ -n $fatalError ]] | 
|---|
| 520 |     then | 
|---|
| 521 |       printErrorMsg 389 $mmcmd         # The command failed. | 
|---|
| 522 |     else | 
|---|
| 523 |       printErrorMsg 387 $mmcmd $mmcmd  # Command quitting due to no valid nodes. | 
|---|
| 524 |     fi | 
|---|
| 525 |     return 1 | 
|---|
| 526 |   fi | 
|---|
| 527 |  | 
|---|
| 528 |   # Create the updated version line and add it to the new sdrfs file. | 
|---|
| 529 |   # The generation number is incremented and the server names may change. | 
|---|
| 530 |   IFS=":"                    # Change the field separator to ':'. | 
|---|
| 531 |   set -f ; set -A v -- - $versionLine ; set +f | 
|---|
| 532 |   IFS="$IFS_sv"              # Restore the default IFS setting. | 
|---|
| 533 |   newGenNumber=${v[$SDRFS_GENNUM_Field]}+1 | 
|---|
| 534 |   v[$SDRFS_GENNUM_Field]=$newGenNumber | 
|---|
| 535 |   [[ -n $newPrimaryName ]] && v[$PRIMARY_SERVER_Field]=$newPrimaryName | 
|---|
| 536 |   [[ -n $newBackupName ]]  && v[$BACKUP_SERVER_Field]=$newBackupName | 
|---|
| 537 |   print_newLine >> $tmpsdrfs | 
|---|
| 538 |   checkForErrors "writing to file $tmpsdrfs" $? | 
|---|
| 539 |  | 
|---|
| 540 |   # If the GPFS and mmsdrserv daemons share the same tcp port number, | 
|---|
| 541 |   # and the names of the primary or backup configuration servers are | 
|---|
| 542 |   # changing, it is necessary to ensure that the GPFS daemon is down | 
|---|
| 543 |   # on the server nodes and the mmsdrserv daemon is restarted. | 
|---|
| 544 |   # Otherwise, the server nodes will continue giving (stale) Gpfs object | 
|---|
| 545 |   # or return ESDR_NOT_SERVER errors. | 
|---|
| 546 |   if [[ -n $sharedSdrservPort && ( -n $newPrimaryName || -n $newBackupName ) ]] | 
|---|
| 547 |   then | 
|---|
| 548 |     # Get the names of the config servers. | 
|---|
| 549 |     print -- "${v[$PRIMARY_SERVER_Field]}\n${v[$BACKUP_SERVER_Field]}" > $tmpNodes | 
|---|
| 550 |     checkForErrors "writing to file $tmpNodes" $? | 
|---|
| 551 |  | 
|---|
| 552 |     # Verify the daemon is down; do not lock the Gpfs object. | 
|---|
| 553 |     printInfoMsg 453 | 
|---|
| 554 |     verifyDaemonInactive $tmpNodes | 
|---|
| 555 |     [[ $? -ne 0 ]] && return 1 | 
|---|
| 556 |  | 
|---|
| 557 |     commitOptions="initLocalNodeData,KILLSDRSERV" | 
|---|
| 558 |   else | 
|---|
| 559 |     commitOptions="initLocalNodeData" | 
|---|
| 560 |   fi  # end of if [[ -n $sharedSdrservPort ]] | 
|---|
| 561 |  | 
|---|
| 562 |   # Make sure the new sdrfs file is properly sorted. | 
|---|
| 563 |   LC_ALL=C $SORT_MMSDRFS $tmpsdrfs -o $newsdrfs | 
|---|
| 564 |  | 
|---|
| 565 |   # Put the new mmsdrfs file into the sdr.  This will make the newly-added | 
|---|
| 566 |   # admin nodes visible to the rest of the nodes in the cluster. | 
|---|
| 567 |   trap "" HUP INT QUIT KILL | 
|---|
| 568 |   gpfsObjectInfo=$(commitChanges $nsId $nsId  \ | 
|---|
| 569 |      $gpfsObjectInfo $newGenNumber $newsdrfs $primaryServer $commitOptions) | 
|---|
| 570 |   rc=$? | 
|---|
| 571 |   if [[ $rc -ne 0 ]] | 
|---|
| 572 |   then | 
|---|
| 573 |     # We were unable to replace the file in the sdr. | 
|---|
| 574 |     printErrorMsg 381 $mmcmd | 
|---|
| 575 |     return 1 | 
|---|
| 576 |   fi | 
|---|
| 577 |  | 
|---|
| 578 |   # Unlock the sdr. | 
|---|
| 579 |   freeLockOnServer $primaryServer $ourNodeNumber >/dev/null | 
|---|
| 580 |   trap posttrap HUP INT QUIT KILL | 
|---|
| 581 |  | 
|---|
| 582 |   # Propagate the new mmsdrfs file to all nodes in the cluster. | 
|---|
| 583 |   # This process is asynchronous. | 
|---|
| 584 |   propagateSdrfsFile async $nodefile $newsdrfs $newGenNumber initLocalNodeData | 
|---|
| 585 |  | 
|---|
| 586 |   # Report any nodes that did not check successfully. | 
|---|
| 587 |   if [[ -n $failedNodes ]] | 
|---|
| 588 |   then | 
|---|
| 589 |     # Administrative node names were not defined for nodes ... | 
|---|
| 590 |     printErrorMsg 174 $mmcmd $failedNodes | 
|---|
| 591 |   fi | 
|---|
| 592 |  | 
|---|
| 593 |   return 0 | 
|---|
| 594 |  | 
|---|
| 595 | }  #----- end of function specifyAdminNetwork ------------------- | 
|---|
| 596 |  | 
|---|
| 597 |  | 
|---|
| 598 | ################################################################### | 
|---|
| 599 | # This function is called if there is an interrupt after the new | 
|---|
| 600 | # mmsdrfs file was committed on the new primary and backup servers | 
|---|
| 601 | # but before the change was propagated to the rest of the nodes. | 
|---|
| 602 | ################################################################### | 
|---|
| 603 | function localPosttrap | 
|---|
| 604 | { | 
|---|
| 605 |   $mmTRACE_ENTER "$*" | 
|---|
| 606 |  | 
|---|
| 607 |   # Tell the guy which nodes must be up and which command to run. | 
|---|
| 608 |   printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer" | 
|---|
| 609 |   printErrorMsg 344 $mmcmd "mmchcluster -p LATEST" | 
|---|
| 610 |   cleanupAndExit 2 | 
|---|
| 611 |  | 
|---|
| 612 | }  #----- end of function localPosttrap ------------------------ | 
|---|
| 613 |  | 
|---|
| 614 |  | 
|---|
| 615 |  | 
|---|
| 616 | ###################### | 
|---|
| 617 | # Mainline processing | 
|---|
| 618 | ###################### | 
|---|
| 619 |  | 
|---|
| 620 |  | 
|---|
| 621 | ################################################### | 
|---|
| 622 | # Process the command arguments. | 
|---|
| 623 | ################################################### | 
|---|
| 624 | [[ $arg1 = '-?' || $arg1 = '-h' || $arg1 = '--help' || $arg1 = '--' ]] && \ | 
|---|
| 625 |   syntaxError "help" $usageMsg | 
|---|
| 626 |  | 
|---|
| 627 | [[ $argc -lt 2  ]] && \ | 
|---|
| 628 |   syntaxError "missingArgs" $usageMsg | 
|---|
| 629 |  | 
|---|
| 630 | while getopts :C:N:p:r:R:s: OPT | 
|---|
| 631 | do | 
|---|
| 632 |   case $OPT in | 
|---|
| 633 |  | 
|---|
| 634 |     C) # cluster name | 
|---|
| 635 |        [[ -n $Cflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" | 
|---|
| 636 |        Cflag="-$OPT" | 
|---|
| 637 |        Carg=$OPTARG | 
|---|
| 638 |        ;; | 
|---|
| 639 |  | 
|---|
| 640 |     N) # define/replace secondary network | 
|---|
| 641 |        [[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" | 
|---|
| 642 |        Nflag="-$OPT" | 
|---|
| 643 |        Narg=$OPTARG | 
|---|
| 644 |        ;; | 
|---|
| 645 |  | 
|---|
| 646 |     p) # primary server | 
|---|
| 647 |        [[ -n $pflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" | 
|---|
| 648 |        pflag="-$OPT" | 
|---|
| 649 |        parg=$OPTARG | 
|---|
| 650 |        otherOpt="-$OPT" | 
|---|
| 651 |        ;; | 
|---|
| 652 |  | 
|---|
| 653 |     r) # remote shell command | 
|---|
| 654 |        [[ -n $rflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" | 
|---|
| 655 |        rflag="-$OPT" | 
|---|
| 656 |        rarg=$OPTARG | 
|---|
| 657 |        [[ $rarg = ${rarg#/} ]] && \ | 
|---|
| 658 |          syntaxError "absolutePath_2" $noUsageMsg "-$OPT" "$rarg" | 
|---|
| 659 |        otherOpt="-$OPT" | 
|---|
| 660 |        ;; | 
|---|
| 661 |  | 
|---|
| 662 |     R) # remote file copy command | 
|---|
| 663 |        [[ -n $Rflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" | 
|---|
| 664 |        Rflag="-$OPT" | 
|---|
| 665 |        Rarg=$OPTARG | 
|---|
| 666 |        [[ $Rarg = ${Rarg#/} ]] && \ | 
|---|
| 667 |          syntaxError "absolutePath_2" $noUsageMsg "-$OPT" "$Rarg" | 
|---|
| 668 |        otherOpt="-$OPT" | 
|---|
| 669 |        ;; | 
|---|
| 670 |  | 
|---|
| 671 |     s) # secondary server | 
|---|
| 672 |        [[ -n $sflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" | 
|---|
| 673 |        sflag="-$OPT" | 
|---|
| 674 |        sarg=$OPTARG | 
|---|
| 675 |        otherOpt="-$OPT" | 
|---|
| 676 |        ;; | 
|---|
| 677 |  | 
|---|
| 678 |     +[CNprRs]) # Invalid option | 
|---|
| 679 |        syntaxError "invalidOption" $usageMsg $OPT | 
|---|
| 680 |        ;; | 
|---|
| 681 |  | 
|---|
| 682 |     :) # Missing argument | 
|---|
| 683 |        syntaxError "missingValue" $usageMsg $OPTARG | 
|---|
| 684 |        ;; | 
|---|
| 685 |  | 
|---|
| 686 |     *) # Invalid option | 
|---|
| 687 |        syntaxError "invalidOption" $usageMsg $OPTARG | 
|---|
| 688 |        ;; | 
|---|
| 689 |   esac | 
|---|
| 690 |  | 
|---|
| 691 | done | 
|---|
| 692 |  | 
|---|
| 693 | shift OPTIND-1 | 
|---|
| 694 | [[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1 | 
|---|
| 695 |  | 
|---|
| 696 | [[ -n $sflag && $parg = LATEST ]] && \ | 
|---|
| 697 |   syntaxError "invalidCombination" $usageMsg "-s" "-p LATEST" | 
|---|
| 698 |  | 
|---|
| 699 | [[ -n $rflag && -n $pflag ]] && \ | 
|---|
| 700 |   syntaxError "invalidCombination" $usageMsg "-r" "-p" | 
|---|
| 701 |  | 
|---|
| 702 | [[ -n $rflag && -n $sflag ]] && \ | 
|---|
| 703 |   syntaxError "invalidCombination" $usageMsg "-r" "-s" | 
|---|
| 704 |  | 
|---|
| 705 | [[ -n $Rflag && -n $pflag ]] && \ | 
|---|
| 706 |   syntaxError "invalidCombination" $usageMsg "-R" "-p" | 
|---|
| 707 |  | 
|---|
| 708 | [[ -n $Rflag && -n $sflag ]] && \ | 
|---|
| 709 |   syntaxError "invalidCombination" $usageMsg "-R" "-s" | 
|---|
| 710 |  | 
|---|
| 711 | # The primary GPFS cluster configuration server cannot be removed. | 
|---|
| 712 | [[ -n $pflag && $parg = "" ]] && \ | 
|---|
| 713 |   syntaxError "missingValue" $usageMsg "-p" | 
|---|
| 714 |  | 
|---|
| 715 | [[ -n $Nflag && -n $otherOpt ]] && \ | 
|---|
| 716 |   syntaxError "invalidCombination"  $usageMsg "-N" "$otherOpt" | 
|---|
| 717 |  | 
|---|
| 718 | [[ -n $Cflag && -n $otherOpt ]] && \ | 
|---|
| 719 |   syntaxError "invalidCombination" $usageMsg "-C" "$otherOpt" | 
|---|
| 720 |  | 
|---|
| 721 |  | 
|---|
| 722 | ############################################################################# | 
|---|
| 723 | # If the request is to change a remote command, invoke the mmsetrcmd script. | 
|---|
| 724 | # Keep in mind that rarg and Rarg may include options for the respective | 
|---|
| 725 | # commands and, therefore, must always be quoted. | 
|---|
| 726 | ############################################################################# | 
|---|
| 727 | if [[ -n $rflag || -n $Rflag ]] | 
|---|
| 728 | then | 
|---|
| 729 |   if [[ -z $Rflag ]] | 
|---|
| 730 |   then | 
|---|
| 731 |     $mmsetrcmd "$rflag" "$rarg" | 
|---|
| 732 |     rc=$? | 
|---|
| 733 |   elif [[ -z $rflag ]] | 
|---|
| 734 |   then | 
|---|
| 735 |     $mmsetrcmd "$Rflag" "$Rarg" | 
|---|
| 736 |     rc=$? | 
|---|
| 737 |   else | 
|---|
| 738 |     $mmsetrcmd "$rflag" "$rarg" "$Rflag" "$Rarg" | 
|---|
| 739 |     rc=$? | 
|---|
| 740 |   fi | 
|---|
| 741 |   cleanupAndExit $rc | 
|---|
| 742 | fi | 
|---|
| 743 |  | 
|---|
| 744 |  | 
|---|
| 745 | ############################################################# | 
|---|
| 746 | # If the request is to specify changes to the admin network, | 
|---|
| 747 | # invoke the function to do the work and exit. | 
|---|
| 748 | ############################################################# | 
|---|
| 749 | if [[ -n $Nflag ]] | 
|---|
| 750 | then | 
|---|
| 751 |   specifyAdminNetwork "$Narg" | 
|---|
| 752 |   cleanupAndExit $? | 
|---|
| 753 | fi | 
|---|
| 754 |  | 
|---|
| 755 |  | 
|---|
| 756 | ######################################################## | 
|---|
| 757 | # If the request is to change the cluster name, | 
|---|
| 758 | # invoke the mmsetrcmd script. | 
|---|
| 759 | ######################################################## | 
|---|
| 760 | if [[ -n $Cflag ]] | 
|---|
| 761 | then | 
|---|
| 762 |   $mmsetrcmd "$Cflag" "$Carg" | 
|---|
| 763 |   cleanupAndExit $? | 
|---|
| 764 | fi | 
|---|
| 765 |  | 
|---|
| 766 |  | 
|---|
| 767 | ################################################################# | 
|---|
| 768 | # Set up trap exception handling and call the gpfsInit function. | 
|---|
| 769 | # It will attempt to ensure that the local copy of the mmsdrfs | 
|---|
| 770 | # and the rest of the GPFS system files are up-to-date. | 
|---|
| 771 | # Try to get the lock but do not fail if this is not possible. | 
|---|
| 772 | ################################################################# | 
|---|
| 773 | trap pretrap HUP INT QUIT KILL | 
|---|
| 774 |  | 
|---|
| 775 | if [[ $parg = LATEST ]] | 
|---|
| 776 | then | 
|---|
| 777 |   # The LATEST keyword was specified.  Try to obtain the | 
|---|
| 778 |   # most recent mmsdrfs file (i.e., the mmsdrfs file with the | 
|---|
| 779 |   # highest gen number) among all the nodes in the cluster. | 
|---|
| 780 |   # To do that, use the local mmsdrfs file as a starting point. | 
|---|
| 781 |   getNodeList $REL_HOSTNAME_Field $HOME_CLUSTER $mmsdrfsFile > $allNodes | 
|---|
| 782 |   gpfsInitOutput=$(gpfsInitFromNonServer $allNodes $mmsdrfsFile) | 
|---|
| 783 |   rc=$? | 
|---|
| 784 |  | 
|---|
| 785 | else | 
|---|
| 786 |   # The LATEST keyword was not specified.  Try to obtain | 
|---|
| 787 |   # the mmsdrfs file from one of the servers with locking. | 
|---|
| 788 |   gpfsInitOutput=$(gpfsInit $lockId 2> $initErrors) | 
|---|
| 789 |   rc=$? | 
|---|
| 790 |   LOCAL_FILES="$LOCAL_FILES $initErrors " | 
|---|
| 791 |   if [[ $rc -ne 0 ]] | 
|---|
| 792 |   then | 
|---|
| 793 |     # We failed to get the sdrfs file with a lock.  Check whether | 
|---|
| 794 |     # some other mm command currently holds the lock.  If yes, give up. | 
|---|
| 795 |     $grep -e "Timed out waiting for lock:  Try again later."  \ | 
|---|
| 796 |           -e "6027-1229"  $initErrors > /dev/null 2>&1 | 
|---|
| 797 |     ec=$? | 
|---|
| 798 |     if [[ $ec -eq 0 ]] | 
|---|
| 799 |     then | 
|---|
| 800 |       # Display the messages from gpfsInit. | 
|---|
| 801 |       $cat $initErrors | \ | 
|---|
| 802 |          $grep -v -e "6027-1227" -e "file is locked. Retrying..." 1>&2 | 
|---|
| 803 |       cleanupAndExit | 
|---|
| 804 |     fi | 
|---|
| 805 |  | 
|---|
| 806 |     # We failed to get the sdrfs file with a lock.  Display any messages. | 
|---|
| 807 |     $cat $initErrors 1>&2 | 
|---|
| 808 |     # Processing continues. | 
|---|
| 809 |     printErrorMsg 437 $mmcmd | 
|---|
| 810 |  | 
|---|
| 811 |     # Now try the gpfsInit again, but this time do not ask for a lock. | 
|---|
| 812 |     # If there is a backup server, and if it is available, | 
|---|
| 813 |     # we should be able to get the latest GPFS system files from there. | 
|---|
| 814 |     gpfsInitOutput=$(gpfsInit nolock 2>/dev/null) | 
|---|
| 815 |     rc=$? | 
|---|
| 816 |     if [[ $rc -ne 0 ]] | 
|---|
| 817 |     then | 
|---|
| 818 |       # We also failed to get the sdrfs file without locking.  Now try | 
|---|
| 819 |       # to obtain the most recent mmsdrfs file (i.e., the mmsdrfs file | 
|---|
| 820 |       # with the highest gen number) among all the nodes in the cluster. | 
|---|
| 821 |       # To do that, use the local mmsdrfs file as a starting point. | 
|---|
| 822 |       getNodeList $REL_HOSTNAME_Field $HOME_CLUSTER $mmsdrfsFile > $allNodes | 
|---|
| 823 |       gpfsInitOutput=$(gpfsInitFromNonServer $allNodes $mmsdrfsFile) | 
|---|
| 824 |       rc=$? | 
|---|
| 825 |     fi | 
|---|
| 826 |   fi | 
|---|
| 827 | fi   # end of if [[ $parg = LATEST ]] | 
|---|
| 828 |  | 
|---|
| 829 | # Check whether we succeeded in obtaining the desired mmsdrfs file. | 
|---|
| 830 | if [[ $rc -ne 0 ]] | 
|---|
| 831 | then | 
|---|
| 832 |   # Not enough nodes are available. | 
|---|
| 833 |   printErrorMsg 378 $mmcmd | 
|---|
| 834 |   cleanupAndExit | 
|---|
| 835 | fi | 
|---|
| 836 |  | 
|---|
| 837 | # Parse the output from the init function. | 
|---|
| 838 | setGlobalVar $rc $gpfsInitOutput | 
|---|
| 839 |  | 
|---|
| 840 | if [[ $MMMODE = single ]] | 
|---|
| 841 | then | 
|---|
| 842 |   # Command currently not valid for cluster type single. | 
|---|
| 843 |   printErrorMsg 376 $mmcmd single | 
|---|
| 844 |   cleanupAndExit | 
|---|
| 845 | fi | 
|---|
| 846 |  | 
|---|
| 847 | if [[ $MMMODE != lc ]] | 
|---|
| 848 | then | 
|---|
| 849 |   # Unknown GPFS nodeset type | 
|---|
| 850 |   printErrorMsg 338 $mmcmd $MMMODE | 
|---|
| 851 |   cleanupAndExit | 
|---|
| 852 | fi | 
|---|
| 853 |  | 
|---|
| 854 |  | 
|---|
| 855 | ####################################################### | 
|---|
| 856 | # Determine the reliable hostnames of the new servers. | 
|---|
| 857 | ####################################################### | 
|---|
| 858 | if [[ -n $pflag && $parg != LATEST ]] | 
|---|
| 859 | then | 
|---|
| 860 |   # Find the name of the primary server. | 
|---|
| 861 |   newPrimaryServer=$(checkAndConvertNodeValue $parg $REL_HOSTNAME_Field) | 
|---|
| 862 |   if [[ $? -ne 0 ]] | 
|---|
| 863 |   then | 
|---|
| 864 |     printErrorMsg 352 $mmcmd $parg | 
|---|
| 865 |     cleanupAndExit | 
|---|
| 866 |   fi | 
|---|
| 867 | else | 
|---|
| 868 |   # If -p not specified, the primary server remains the same. | 
|---|
| 869 |   newPrimaryServer=$primaryServer | 
|---|
| 870 | fi  # end of if [[ -n $parg && $parg != LATEST ]] | 
|---|
| 871 |  | 
|---|
| 872 | if [[ -n $sflag ]] | 
|---|
| 873 | then | 
|---|
| 874 |   if [[ -n $sarg ]] | 
|---|
| 875 |   then | 
|---|
| 876 |     # Find the name of the secondary server. | 
|---|
| 877 |     newBackupServer=$(checkAndConvertNodeValue $sarg $REL_HOSTNAME_Field) | 
|---|
| 878 |     if [[ $? -ne 0 ]] | 
|---|
| 879 |     then | 
|---|
| 880 |       printErrorMsg 352 $mmcmd $sarg | 
|---|
| 881 |       cleanupAndExit | 
|---|
| 882 |     fi | 
|---|
| 883 |   else | 
|---|
| 884 |     # We are deleting the backup server (-s "" was specified). | 
|---|
| 885 |     newBackupServer="" | 
|---|
| 886 |   fi | 
|---|
| 887 | else | 
|---|
| 888 |   # If -s not specified, the backup server remains the same. | 
|---|
| 889 |   newBackupServer=$backupServer | 
|---|
| 890 | fi  # end of if [[ -n $sarg ]] | 
|---|
| 891 |  | 
|---|
| 892 | # Cross check the two server names. | 
|---|
| 893 | if [[ $newBackupServer = $newPrimaryServer ]] | 
|---|
| 894 | then | 
|---|
| 895 |   # The same node was specified as primary and backup server. | 
|---|
| 896 |   printErrorMsg 346 $mmcmd | 
|---|
| 897 |   cleanupAndExit | 
|---|
| 898 | fi | 
|---|
| 899 |  | 
|---|
| 900 | # Check whether anything needs to be done at all. | 
|---|
| 901 | [[ $newPrimaryServer = $primaryServer &&  \ | 
|---|
| 902 |    $newBackupServer  = $backupServer  &&  \ | 
|---|
| 903 |    $parg != LATEST ]] &&                  \ | 
|---|
| 904 |   cleanupAndExit 0    # Servers are already as desired. | 
|---|
| 905 |  | 
|---|
| 906 |  | 
|---|
| 907 | ################################################################# | 
|---|
| 908 | # Go through the current mmsdrfs file.  Increment the generation | 
|---|
| 909 | # number and change the server names.  Create a file with the | 
|---|
| 910 | # reliable hostnames of all nodes in the cluster. | 
|---|
| 911 | ################################################################# | 
|---|
| 912 | $rm -f $newsdrfs $allNodes $clientNodes | 
|---|
| 913 | IFS=":"         # Change the field separator to ':'. | 
|---|
| 914 | exec 3<&- | 
|---|
| 915 | exec 3< $mmsdrfsFile | 
|---|
| 916 | while read -u3 sdrfsLine | 
|---|
| 917 | do | 
|---|
| 918 |   # Parse the line. | 
|---|
| 919 |   set -f ; set -A v -- - $sdrfsLine ; set +f | 
|---|
| 920 |   IFS="$IFS_sv"    # Restore the default IFS settings. | 
|---|
| 921 |  | 
|---|
| 922 |   # Change some of the fields depending on the type of line. | 
|---|
| 923 |   case ${v[$LINE_TYPE_Field]} in | 
|---|
| 924 |  | 
|---|
| 925 |     $VERSION_LINE ) | 
|---|
| 926 |       # Increment the generation number. | 
|---|
| 927 |       newGenNumber=${v[$SDRFS_GENNUM_Field]}+1 | 
|---|
| 928 |       v[$SDRFS_GENNUM_Field]=$newGenNumber | 
|---|
| 929 |       v[$PRIMARY_SERVER_Field]=$newPrimaryServer | 
|---|
| 930 |       v[$BACKUP_SERVER_Field]=$newBackupServer | 
|---|
| 931 |       ;; | 
|---|
| 932 |  | 
|---|
| 933 |     $NODESET_HDR ) | 
|---|
| 934 |       # If the daemon and the mmsdrserv tcp ports are shared, | 
|---|
| 935 |       # it will be necessary to ensure that the daemon is down | 
|---|
| 936 |       # on the old and new config server nodes. | 
|---|
| 937 |       if [[ -z ${v[$GETOBJECT_PORT_Field]} || | 
|---|
| 938 |             ${v[$TCP_PORT_Field]} = ${v[$GETOBJECT_PORT_Field]} ]] | 
|---|
| 939 |       then | 
|---|
| 940 |         daemonMustBeDown=yes | 
|---|
| 941 |       fi | 
|---|
| 942 |       ;; | 
|---|
| 943 |  | 
|---|
| 944 |     $MEMBER_NODE ) | 
|---|
| 945 |       # If this is our node, save the reliable name. | 
|---|
| 946 |       [[ ${v[$NODE_NUMBER_Field]} = $ourNodeNumber ]] &&  \ | 
|---|
| 947 |         ourNodeName=${v[$REL_HOSTNAME_Field]} | 
|---|
| 948 |  | 
|---|
| 949 |       # All nodes will go in the allNodes file. | 
|---|
| 950 |       print -- "${v[$REL_HOSTNAME_Field]}" >> $allNodes | 
|---|
| 951 |       checkForErrors "writing to file $allNodes" $? | 
|---|
| 952 |  | 
|---|
| 953 |       # The server nodes and the local node will | 
|---|
| 954 |       # not go in the clientNodes file. | 
|---|
| 955 |       if [[ ${v[$REL_HOSTNAME_Field]} != $newPrimaryServer && | 
|---|
| 956 |             ${v[$REL_HOSTNAME_Field]} != $newBackupServer  && | 
|---|
| 957 |             ${v[$REL_HOSTNAME_Field]} != $ourNodeName ]] | 
|---|
| 958 |       then | 
|---|
| 959 |         print -- "${v[$REL_HOSTNAME_Field]}" >> $clientNodes | 
|---|
| 960 |         checkForErrors "writing to file $clientNodes" $? | 
|---|
| 961 |       fi | 
|---|
| 962 |       ;; | 
|---|
| 963 |  | 
|---|
| 964 |     * )  # Pass all other lines without change. | 
|---|
| 965 |       ;; | 
|---|
| 966 |  | 
|---|
| 967 |   esac  # end Change some of the fields | 
|---|
| 968 |  | 
|---|
| 969 |   # Build and write the line to the new mmsdrfs file. | 
|---|
| 970 |   print_newLine >> $newsdrfs | 
|---|
| 971 |   checkForErrors "writing to file $newsdrfs" $? | 
|---|
| 972 |  | 
|---|
| 973 |   IFS=":"  # Change the separator back to ":" for the next iteration. | 
|---|
| 974 |  | 
|---|
| 975 | done  # end of while read -u3 sdrfsLine | 
|---|
| 976 |  | 
|---|
| 977 | IFS="$IFS_sv"  # Restore the default IFS settings. | 
|---|
| 978 |  | 
|---|
| 979 |  | 
|---|
| 980 | ####################################################################### | 
|---|
| 981 | # If the GPFS and mmsdrserv daemons share the same tcp port number, | 
|---|
| 982 | # it is necessary to ensure that the GPFS daemon is down on the old | 
|---|
| 983 | # and new configuration server nodes.  Otherwise, the old server nodes | 
|---|
| 984 | # will continue giving (stale) Gpfs object information, while the new | 
|---|
| 985 | # servers will not be able to respond to requests because the GPFS | 
|---|
| 986 | # daemon cannot assume mmsdrserv duties if it is already running. | 
|---|
| 987 | ####################################################################### | 
|---|
| 988 | if [[ -n $daemonMustBeDown && $parg != LATEST ]] | 
|---|
| 989 | then | 
|---|
| 990 |   # Put the old and new server names in a file. | 
|---|
| 991 |   print -- "$primaryServer\n$backupServer\n"  \ | 
|---|
| 992 |            "$newPrimaryServer\n$newBackupServer" > $tmpNodes | 
|---|
| 993 |   checkForErrors "writing to file $tmpNodes" $? | 
|---|
| 994 |  | 
|---|
| 995 |   # Eliminate duplicate names. | 
|---|
| 996 |   $sort -u $tmpNodes -o $tmpNodes | 
|---|
| 997 |   checkForErrors "sort $tmpNodes" $? | 
|---|
| 998 |  | 
|---|
| 999 |   # Verify the daemon is down; do not lock the Gpfs object. | 
|---|
| 1000 |   printInfoMsg 453 | 
|---|
| 1001 |   verifyDaemonInactive $tmpNodes | 
|---|
| 1002 |   [[ $? -ne 0 ]] && cleanupAndExit | 
|---|
| 1003 | fi  # end of if [[ -n $daemonMustBeDown ]] | 
|---|
| 1004 |  | 
|---|
| 1005 |  | 
|---|
| 1006 | ###################################################### | 
|---|
| 1007 | # First, put the new mmsdrfs file on the two servers. | 
|---|
| 1008 | # This must succeed no matter what. | 
|---|
| 1009 | ###################################################### | 
|---|
| 1010 | trap "" HUP INT QUIT KILL | 
|---|
| 1011 | gpfsObjectInfo=$(commitChanges  \ | 
|---|
| 1012 |    $nsId $nsId $gpfsObjectInfo $newGenNumber $newsdrfs  \ | 
|---|
| 1013 |    $newPrimaryServer FORCE $newBackupServer) | 
|---|
| 1014 | rc=$? | 
|---|
| 1015 | if [[ $rc -ne 0 ]] | 
|---|
| 1016 | then | 
|---|
| 1017 |   # Cannot replace file in the sdr. | 
|---|
| 1018 |   printErrorMsg 381 $mmcmd | 
|---|
| 1019 |  | 
|---|
| 1020 |   # The mmchcluster failed - get out. | 
|---|
| 1021 |   # Tell the guy which nodes must be up and which command to run. | 
|---|
| 1022 |   printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer" | 
|---|
| 1023 |   printErrorMsg 344 $mmcmd "mmchcluster" | 
|---|
| 1024 |   cleanupAndExit | 
|---|
| 1025 | fi | 
|---|
| 1026 |  | 
|---|
| 1027 | # Restore interrupts. | 
|---|
| 1028 | trap localPosttrap HUP INT QUIT KILL | 
|---|
| 1029 |  | 
|---|
| 1030 |  | 
|---|
| 1031 | ################################################# | 
|---|
| 1032 | # Propagate the changes to the non-server nodes. | 
|---|
| 1033 | ################################################# | 
|---|
| 1034 | if [[ $ourNodeName != $newPrimaryServer && | 
|---|
| 1035 |       $ourNodeName != $newBackupServer ]] | 
|---|
| 1036 | then | 
|---|
| 1037 |   $cp $newsdrfs $mmsdrfsFile | 
|---|
| 1038 |   checkForErrors "writing to file $mmsdrfsFile" $? | 
|---|
| 1039 | fi | 
|---|
| 1040 |  | 
|---|
| 1041 | if [[ -s $clientNodes ]] | 
|---|
| 1042 | then | 
|---|
| 1043 |   # Calculate the checksum of the new mmsdrfs file. | 
|---|
| 1044 |   sumOutput=$($sum $newsdrfs) | 
|---|
| 1045 |   checkForErrors "sum $newsdrfs" $? | 
|---|
| 1046 |   set -f ; set -- $sumOutput ; set +f | 
|---|
| 1047 |   newSum=$1 | 
|---|
| 1048 |  | 
|---|
| 1049 | #esjxx See if this can be replaced with pushSdr | 
|---|
| 1050 |   # Tell all client nodes to copy the file from us. | 
|---|
| 1051 |   $mmcommon onall $clientNodes $unreachedNodes copyRemoteFile  \ | 
|---|
| 1052 |               $ourNodeName $mmsdrfsFile $mmsdrfsFile $newSum > $tmpfile 2>&1 | 
|---|
| 1053 |   rc=$? | 
|---|
| 1054 |  | 
|---|
| 1055 |   # Make a list of the nodes that were successfully updated.  For each | 
|---|
| 1056 |   # such node there will be a line in tmpfile that looks like this: | 
|---|
| 1057 |   #   nodename: copyRemoteFile:0 | 
|---|
| 1058 |   updatedNodes=$($awk -F: ' {                         \ | 
|---|
| 1059 |     if (($2 ~ "copyRemoteFile") && ($3 == "0")) {     \ | 
|---|
| 1060 |       { print $1 }                                    \ | 
|---|
| 1061 |     }                                                 \ | 
|---|
| 1062 |   } ' $tmpfile) | 
|---|
| 1063 |   checkForErrors awk $? | 
|---|
| 1064 |  | 
|---|
| 1065 |   # Determine the nodes that did not get the new data. | 
|---|
| 1066 |   exec 3<&- | 
|---|
| 1067 |   exec 3< $clientNodes | 
|---|
| 1068 |   while read -u3 nodeName | 
|---|
| 1069 |   do | 
|---|
| 1070 |     for goodNode in $updatedNodes | 
|---|
| 1071 |     do | 
|---|
| 1072 |       [[ $nodeName = $goodNode ]] &&  \ | 
|---|
| 1073 |         break | 
|---|
| 1074 |     done | 
|---|
| 1075 |  | 
|---|
| 1076 |     [[ $nodeName != $goodNode ]] &&  \ | 
|---|
| 1077 |       failedNodes="${failedNodes}\n\t${nodeName}" | 
|---|
| 1078 |   done | 
|---|
| 1079 |  | 
|---|
| 1080 |   # If any nodes failed, put out as much information as possible. | 
|---|
| 1081 |   if [[ -n $failedNodes ]] | 
|---|
| 1082 |   then | 
|---|
| 1083 |     # Collect error messages, if any, in file tmpfile2. | 
|---|
| 1084 |     $grep -v "copyRemoteFile:" $tmpfile > $tmpfile2 | 
|---|
| 1085 |     [[ -s $tmpfile2 ]] &&  \ | 
|---|
| 1086 |       $cat $tmpfile2 1>&2 | 
|---|
| 1087 |  | 
|---|
| 1088 |     # Tell the user which nodes failed. | 
|---|
| 1089 |     printErrorMsg 377 $mmcmd "$failedNodes" | 
|---|
| 1090 |     # Tell the guy which nodes must be up and which command to run. | 
|---|
| 1091 |     printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer" | 
|---|
| 1092 |     printErrorMsg 344 $mmcmd "mmchcluster -p LATEST" | 
|---|
| 1093 |     cleanupAndExit | 
|---|
| 1094 |   fi   # end if [[ -n $failedNodes ]] | 
|---|
| 1095 |  | 
|---|
| 1096 | fi  # end if [[ ! -s $clientNodes ]] | 
|---|
| 1097 |  | 
|---|
| 1098 |  | 
|---|
| 1099 | ############################## | 
|---|
| 1100 | # Unlock the sdr. | 
|---|
| 1101 | ############################## | 
|---|
| 1102 | [[ $sdrLocked = yes ]] &&  \ | 
|---|
| 1103 |   freeLockOnServer $primaryServer $ourNodeNumber > /dev/null | 
|---|
| 1104 | sdrLocked=no | 
|---|
| 1105 | trap posttrap HUP INT QUIT KILL | 
|---|
| 1106 |  | 
|---|
| 1107 | # Issue "command was successful" message. | 
|---|
| 1108 | printErrorMsg 272 $mmcmd | 
|---|
| 1109 | cleanupAndExit 0 | 
|---|
| 1110 |  | 
|---|