1 | #!/bin/ksh |
---|
2 | # IBM_PROLOG_BEGIN_TAG |
---|
3 | # This is an automatically generated prolog. |
---|
4 | # |
---|
5 | # |
---|
6 | # |
---|
7 | # Licensed Materials - Property of IBM |
---|
8 | # |
---|
9 | # (C) COPYRIGHT International Business Machines Corp. 2000,2007 |
---|
10 | # All Rights Reserved |
---|
11 | # |
---|
12 | # US Government Users Restricted Rights - Use, duplication or |
---|
13 | # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. |
---|
14 | # |
---|
15 | # IBM_PROLOG_END_TAG |
---|
16 | # @(#)11 1.59.1.3 src/avs/fs/mmfs/ts/admin/mmchcluster.sh, mmfs, avs_rgpfs24, rgpfs24s009a 12/19/06 13:10:44 |
---|
17 | ############################################################################### |
---|
18 | # |
---|
19 | # Usage: |
---|
20 | # mmchcluster {[-p PrimaryServer] [-s SecondaryServer]} |
---|
21 | # or |
---|
22 | # mmchcluster -p LATEST |
---|
23 | # or |
---|
24 | # mmchcluster {[-r RemoteShellCommand] [-R RemoteFileCopyCommand]} |
---|
25 | # or |
---|
26 | # mmchcluster -C ClusterName |
---|
27 | # or |
---|
28 | # mmchcluster -N {NodeDesc[,NodeDesc...] | NodeFile} |
---|
29 | # |
---|
30 | # where: |
---|
31 | # |
---|
32 | # -p PrimaryServer specifies the node to be used as the primary server |
---|
33 | # of the GPFS sdrfs data for this cluster. |
---|
34 | # |
---|
35 | # LATEST requests a check to be made that all currently |
---|
36 | # available nodes point to the correct primary and |
---|
37 | # backup server. |
---|
38 | # |
---|
39 | # -s SecondaryServer specifies the node to be used as the backup server |
---|
40 | # of the GPFS sdrfs data for this cluster (optional). |
---|
41 | # To remove a backup server, specify -s "". |
---|
42 | # |
---|
43 | # -r RemoteShellCommand specifies the fully qualified pathname for |
---|
44 | # the remote shell program to be used by GPFS. |
---|
45 | # The default is /usr/bin/rsh. |
---|
46 | # |
---|
47 | # -R RemoteFileCopyCommand specifies the fully qualified pathname for |
---|
48 | # the remote file copy program to be used by GPFS. |
---|
49 | # The default is /usr/bin/rcp. |
---|
50 | # |
---|
51 | # -C ClusterName specifies a new name for the cluster. If the name |
---|
52 | # contains dots it is assumed to be a fully qualified |
---|
53 | # domain name. Otherwise, the domain will default |
---|
54 | # to the domain of the primary configuration server. |
---|
55 | # |
---|
56 | # -N NodeDesc,NodeDesc,... specifies a comma-separated list of node |
---|
57 | # descriptors that specify the admin node |
---|
58 | # interfaces to be used in the cluster. |
---|
59 | # The node descriptors have the format: |
---|
60 | # daemonNodeName:nodeRoles:adminNodeName: |
---|
61 | # The nodeRoles field is currently just a place-holder |
---|
62 | # and is ignored. |
---|
63 | # |
---|
64 | # -N NodeFile specifies a file of node descriptors that specify |
---|
65 | # the admin node interfaces to be used in the cluster. |
---|
66 | # The lines in the input file have the format: |
---|
67 | # daemonNodeName:nodeRoles:adminNodeName: |
---|
68 | # The nodeRoles field is currently just a place-holder |
---|
69 | # and is ignored. |
---|
70 | # |
---|
71 | # Note: When used with the -p or -s options, this command will most |
---|
72 | # likely be needed when the current primary server is not available |
---|
73 | # and it will be impossible to obtain the sdr lock and protect |
---|
74 | # against concurrent execution of some other mm command. |
---|
75 | # Under such conditions, the user must assure that no other mm |
---|
76 | # command is run until the completion of the mmchcluster command |
---|
77 | # and that as many of the remaining nodes as possible are available. |
---|
78 | # |
---|
79 | ############################################################################### |
---|
80 | |
---|
81 | # Include global declarations and service routines. |
---|
82 | . /usr/lpp/mmfs/bin/mmglobfuncs |
---|
83 | . /usr/lpp/mmfs/bin/mmsdrfsdef |
---|
84 | |
---|
85 | sourceFile="mmchcluster.sh" |
---|
86 | [[ -n $DEBUG || -n $DEBUGmmchcluster ]] && set -x |
---|
87 | $mmTRACE_ENTER "$*" |
---|
88 | |
---|
89 | |
---|
90 | # Local work files. Names should be of the form: |
---|
91 | # fn=${tmpDir}fn.${mmcmd}.$$ |
---|
92 | allNodes=${tmpDir}allNodes.${mmcmd}.$$ |
---|
93 | clientNodes=${tmpDir}clientNodes.${mmcmd}.$$ |
---|
94 | inputNodes=${tmpDir}inputNodes.${mmcmd}.$$ |
---|
95 | processedNodes=${tmpDir}processedNodes.${mmcmd}.$$ |
---|
96 | initErrors=${tmpDir}initErrors.${mmcmd}.$$ |
---|
97 | # Note: Do not include initErrors in LOCAL_FILES yet; we'll do it later. |
---|
98 | |
---|
99 | LOCAL_FILES=" $allNodes $clientNodes $inputNodes $processedNodes " |
---|
100 | |
---|
101 | |
---|
102 | # Local declarations |
---|
103 | |
---|
104 | usageMsg=359 |
---|
105 | newNodeNumbers="" |
---|
106 | backupServer="" |
---|
107 | rshPath="" |
---|
108 | rcpPath="" |
---|
109 | integer nodeCount |
---|
110 | integer n |
---|
111 | rc=0 |
---|
112 | |
---|
113 | Cflag="" |
---|
114 | Nflag="" |
---|
115 | pflag="" |
---|
116 | rflag="" |
---|
117 | Rflag="" |
---|
118 | sflag="" |
---|
119 | Carg="" |
---|
120 | parg="" |
---|
121 | rarg="" |
---|
122 | Rarg="" |
---|
123 | sarg="" |
---|
124 | otherOpt="" |
---|
125 | |
---|
126 | |
---|
127 | # Local functions |
---|
128 | |
---|
129 | |
---|
130 | ########################################################################## |
---|
131 | # |
---|
132 | # Function: Specify the admin network for the GPFS cluster. |
---|
133 | # |
---|
134 | # Input: $1 - file or list of node descriptors containing the |
---|
135 | # adapter information as follows: |
---|
136 | # daemonNodeName:nodeRoles:adminNodeName: |
---|
137 | # |
---|
138 | # Returns: 0 - no errors encountered |
---|
139 | # non-zero - unexpected error |
---|
140 | # |
---|
141 | ########################################################################## |
---|
142 | function specifyAdminNetwork # <networkInfo> |
---|
143 | { |
---|
144 | typeset sourceFile="mmchcluster.sh" |
---|
145 | [[ -n $DEBUG || -n $DEBUGspecifyAdminNetwork ]] && set -x |
---|
146 | $mmTRACE_ENTER "$*" |
---|
147 | typeset networkInfo="$1" |
---|
148 | |
---|
149 | typeset failedNodes sdrfsLine mmcommonOutput |
---|
150 | typeset nodeLine nodeName nodeName2 nodeStatus |
---|
151 | # typeset nodeRoles |
---|
152 | typeset hostResult nodeNumber adminNodeName adminIpa |
---|
153 | typeset nodeError newPrimaryName newBackupName commitOptions |
---|
154 | |
---|
155 | typeset rc=0 |
---|
156 | typeset changeMade="" |
---|
157 | typeset fatalError="" |
---|
158 | typeset sharedSdrservPort="" |
---|
159 | |
---|
160 | # The input parameter may be either a list or a file. Which is it? |
---|
161 | if [[ -f $networkInfo ]] |
---|
162 | then |
---|
163 | # It is a file; verify its existence and create our own copy. |
---|
164 | checkUserFile $networkInfo $inputNodes |
---|
165 | [[ $? -ne 0 ]] && cleanupAndExit |
---|
166 | else |
---|
167 | # It is not a file, so it must be a list. |
---|
168 | # Convert the input node list into a file. |
---|
169 | $rm -f $inputNodes |
---|
170 | IFS=',' |
---|
171 | for nodeDesc in $networkInfo |
---|
172 | do |
---|
173 | print -- "$nodeDesc" >> $inputNodes |
---|
174 | checkForErrors "writing to $inputNodes" $? |
---|
175 | done |
---|
176 | IFS="$IFS_sv" # Restore the default IFS setting. |
---|
177 | fi |
---|
178 | |
---|
179 | # Check the input data for correctness. |
---|
180 | # We check all the records rather than stop on the first error. |
---|
181 | $rm -f $processedNodes |
---|
182 | $touch $processedNodes # Ensure the tmp file exists even if empty. |
---|
183 | IFS=":" # Change the field separator to ':'. |
---|
184 | exec 3<&- |
---|
185 | exec 3< $inputNodes |
---|
186 | while read -u3 nodeLine |
---|
187 | do |
---|
188 | # Parse the line. |
---|
189 | set -f ; set -- $nodeLine ; set +f |
---|
190 | nodeName=$1 |
---|
191 | # nodeRoles=$2 |
---|
192 | nodeName2=$3 |
---|
193 | IFS="$IFS_sv" # Restore the default IFS setting. |
---|
194 | |
---|
195 | # Make sure neither node name is specified more than once. |
---|
196 | $grep -qw $nodeName $processedNodes > /dev/null 2>&1 |
---|
197 | if [[ $? -eq 0 ]] |
---|
198 | then |
---|
199 | # The node name is specified twice. |
---|
200 | printErrorMsg 347 $mmcmd $nodeName |
---|
201 | fatalError=yes |
---|
202 | fi |
---|
203 | |
---|
204 | # Check the admin node name if it was specified. |
---|
205 | if [[ -n $nodeName2 && $nodeName2 != $nodeName ]] |
---|
206 | then |
---|
207 | $grep -qw $nodeName2 $processedNodes > /dev/null 2>&1 |
---|
208 | if [[ $? -eq 0 ]] |
---|
209 | then |
---|
210 | # The node is specified twice. |
---|
211 | printErrorMsg 347 $mmcmd $nodeName2 |
---|
212 | fatalError=yes |
---|
213 | fi |
---|
214 | fi # end of if [[ -n $nodeName2 && $nodeName2 != $nodeName ]] |
---|
215 | |
---|
216 | # Add the node names to the list of processed nodes. |
---|
217 | print -- "${nodeName}:${nodeName2}" >> $processedNodes |
---|
218 | checkForErrors "Writing to file $processedNodes" $? |
---|
219 | |
---|
220 | IFS=":" # Change the separator back to ":" for the next iteration. |
---|
221 | |
---|
222 | done # end of while read -u3 nodeLine |
---|
223 | |
---|
224 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
225 | |
---|
226 | # Return to the caller if we encountered an error. |
---|
227 | [[ -n $fatalError ]] && return 1 |
---|
228 | |
---|
229 | # Ensure that the local copy of the mmsdrfs is up-to-date. |
---|
230 | # Set up trap exception handling and obtain the lock. |
---|
231 | trap pretrap HUP INT QUIT KILL |
---|
232 | gpfsInitOutput=$(gpfsInit $lockId) |
---|
233 | setGlobalVar $? $gpfsInitOutput |
---|
234 | |
---|
235 | # Stop here if the admin network support has not been activated yet. |
---|
236 | if [[ $sdrfsFormatLevel -eq 0 ]] |
---|
237 | then |
---|
238 | print -u2 "$mmcmd: The separate administration network support has not been enabled yet." |
---|
239 | print -u2 " Run \"mmchconfig release=LATEST\" to activate the new function." |
---|
240 | cleanupAndExit |
---|
241 | fi |
---|
242 | |
---|
243 | # Determine the lookup order for resolving host names. |
---|
244 | [[ $osName != AIX ]] && resolveOrder=$(setHostResolveOrder) |
---|
245 | |
---|
246 | # Go through the current mmsdrfs file. Increment the generation |
---|
247 | # number and build the node name list that will be needed later. |
---|
248 | # Remove all admin network related information. |
---|
249 | $rm -f $newsdrfs $nodefile |
---|
250 | newPrimaryName="" |
---|
251 | newBackupName="" |
---|
252 | IFS=":" # Change the field separator to ':'. |
---|
253 | exec 3<&- |
---|
254 | exec 3< $mmsdrfsFile |
---|
255 | while read -u3 sdrfsLine |
---|
256 | do |
---|
257 | # Parse the line. |
---|
258 | set -f ; set -A v -- - $sdrfsLine ; set +f |
---|
259 | |
---|
260 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
261 | printLine=true # Assume the line will be printed. |
---|
262 | |
---|
263 | case ${v[$LINE_TYPE_Field]} in |
---|
264 | |
---|
265 | $VERSION_LINE ) # This is the global header line. |
---|
266 | # Save the version line for updating later. |
---|
267 | versionLine=$(print_newLine) |
---|
268 | printLine=false |
---|
269 | ;; |
---|
270 | |
---|
271 | $NODESET_HDR ) |
---|
272 | # If the daemon and the mmsdrserv tcp ports are shared, |
---|
273 | # it will be necessary to ensure that the daemon is down |
---|
274 | # on the config server nodes if there names will change. |
---|
275 | if [[ -z ${v[$GETOBJECT_PORT_Field]} || |
---|
276 | ${v[$TCP_PORT_Field]} = ${v[$GETOBJECT_PORT_Field]} ]] |
---|
277 | then |
---|
278 | sharedSdrservPort=yes |
---|
279 | fi |
---|
280 | ;; |
---|
281 | |
---|
282 | $MEMBER_NODE ) # This line describes a node. |
---|
283 | # Add the reliable node name to nodefile. |
---|
284 | print -- "${v[$REL_HOSTNAME_Field]}" >> $nodefile |
---|
285 | checkForErrors "writing to file $nodefile" $? |
---|
286 | |
---|
287 | # Reset the node error flag. |
---|
288 | nodeError="" |
---|
289 | |
---|
290 | # Obtain the data for this node from the node file. |
---|
291 | nodeLine=$($awk -F: ' \ |
---|
292 | $1 == "'${v[$DAEMON_NODENAME_Field]}'" || \ |
---|
293 | $1 == "'${v[$REL_HOSTNAME_Field]}'" || \ |
---|
294 | $1 == "'${v[$NODE_NAME_Field]}'" || \ |
---|
295 | $1 == "'${v[$ADMIN_SHORTNAME_Field]}'" || \ |
---|
296 | $1 == "'${v[$NODE_NUMBER_Field]}'" || \ |
---|
297 | $1 == "'${v[$IPA_Field]}'" { \ |
---|
298 | { print $0 } \ |
---|
299 | { exit } \ |
---|
300 | } \ |
---|
301 | ' $inputNodes) |
---|
302 | |
---|
303 | if [[ -n $nodeLine ]] |
---|
304 | then |
---|
305 | # We found data for this node. Parse the input. |
---|
306 | IFS=":" # Change the field separator to ':'. |
---|
307 | set -f ; set -- $nodeLine ; set +f |
---|
308 | nodeName=$1 |
---|
309 | nodeName2=$3 |
---|
310 | IFS="$IFS_sv" # Restore the default IFS setting. |
---|
311 | |
---|
312 | # Determine the daemon node name. |
---|
313 | if [[ -n ${v[$DAEMON_NODENAME_Field]} ]] |
---|
314 | then |
---|
315 | daemonNodeName=${v[$DAEMON_NODENAME_Field]} |
---|
316 | else |
---|
317 | daemonNodeName=${v[$REL_HOSTNAME_Field]} |
---|
318 | fi |
---|
319 | |
---|
320 | # Did the user reset or specify the admin node name? |
---|
321 | if [[ -z $nodeName2 ]] |
---|
322 | then |
---|
323 | # The admin node name was null, indicating "reset"; |
---|
324 | # set the admin node name to the daemon node name value. |
---|
325 | adminNodeName=$daemonNodeName |
---|
326 | adminShortName=${v[$NODE_NAME_Field]} |
---|
327 | |
---|
328 | else |
---|
329 | # The admin node name was not null, indicating "specify"; |
---|
330 | # Determine the IP address for the specified admin node name. |
---|
331 | hostResult=$($host $nodeName2) |
---|
332 | set -f ; set -- $hostResult ; set +f |
---|
333 | adminNodeName=$1 |
---|
334 | adminShortName=${1%% *|.*} # Exclude everything after the first dot. |
---|
335 | adminIpa=${3%%,*} |
---|
336 | |
---|
337 | # Check that the admin node name has a valid IP address. |
---|
338 | if [[ -z $adminIpa ]] |
---|
339 | then |
---|
340 | # An invalid node name was specified. |
---|
341 | printErrorMsg 54 $mmcmd $nodeName2 |
---|
342 | fatalError=yes |
---|
343 | break |
---|
344 | fi |
---|
345 | |
---|
346 | # Invoke the checkAdapter function to ensure that |
---|
347 | # the specified adapter interface exists on the node. |
---|
348 | mmcommonOutput=$($mmcommon on1 ${v[$REL_HOSTNAME_Field]} \ |
---|
349 | checkAdapter $adminIpa 2> $errMsg) |
---|
350 | rc=$? |
---|
351 | set -f ; set -- $mmcommonOutput ; set +f |
---|
352 | nodeStatus=$1 |
---|
353 | if [[ $rc != 0 || $nodeStatus != success ]] |
---|
354 | then |
---|
355 | # The checkAdapter call failed. |
---|
356 | # We will not define a new admin node name for this node |
---|
357 | # but we will continue to process the remaining nodes. |
---|
358 | # Tell the world what went wrong with this node. |
---|
359 | if [[ $nodeStatus = ipa_alias ]] |
---|
360 | then |
---|
361 | # IP address aliasing is not supported. |
---|
362 | printErrorMsg 476 $mmcmd $nodeName2 |
---|
363 | elif [[ $nodeStatus = ipa_missing ]] |
---|
364 | then |
---|
365 | # The admin IP address is not known on the node. |
---|
366 | printErrorMsg 154 $mmcmd $nodeName2 ${v[$REL_HOSTNAME_Field]} |
---|
367 | elif [[ $rc = $MM_HostDown || $rc = $MM_ConnectTimeout ]] |
---|
368 | then |
---|
369 | # The node cannot be reached. |
---|
370 | printErrorMsg 340 $mmcmd ${v[$REL_HOSTNAME_Field]} |
---|
371 | else |
---|
372 | # Unexpected error. Display all possible error messages. |
---|
373 | [[ -s $errMsg ]] && $cat $errMsg 1>&2 |
---|
374 | [[ $rc -eq 0 ]] && rc=1 |
---|
375 | checkForErrors "checkAdapter ${v[$REL_HOSTNAME_Field]}" $rc |
---|
376 | fi |
---|
377 | |
---|
378 | # Append the node name to the list of failed nodes and |
---|
379 | # set a flag to indicate the node name did not check out. |
---|
380 | failedNodes="${failedNodes}\n\t${nodeName}" |
---|
381 | nodeError=yes |
---|
382 | |
---|
383 | fi # end of if [[ $rc != 0 || $nodeStatus != success ]] |
---|
384 | |
---|
385 | fi # end of if [[ -z $nodeName2 ]] |
---|
386 | |
---|
387 | # Update the member line if there was no error. |
---|
388 | if [[ -z $nodeError ]] |
---|
389 | then |
---|
390 | # Remember the new primary or backup server name for updating |
---|
391 | # the version line later if this is one of those servers. |
---|
392 | [[ ${v[$REL_HOSTNAME_Field]} = $primaryServer ]] && \ |
---|
393 | newPrimaryName=$adminNodeName |
---|
394 | [[ ${v[$REL_HOSTNAME_Field]} = $backupServer ]] && \ |
---|
395 | newBackupName=$adminNodeName |
---|
396 | |
---|
397 | # Things checked out ok. Set the node name fields. |
---|
398 | v[$DAEMON_NODENAME_Field]=$daemonNodeName |
---|
399 | v[$REL_HOSTNAME_Field]=$adminNodeName |
---|
400 | v[$ADMIN_SHORTNAME_Field]=$adminShortName |
---|
401 | changeMade=yes |
---|
402 | fi |
---|
403 | |
---|
404 | $rm -f $errMsg |
---|
405 | |
---|
406 | fi # end of if [[ -n $nodeLine ]] |
---|
407 | ;; |
---|
408 | |
---|
409 | * ) # We are not interested in any other lines. |
---|
410 | ;; |
---|
411 | |
---|
412 | esac # end of case ${v[$LINE_TYPE_Field]} in |
---|
413 | |
---|
414 | # Unless suppressed, write the line to the new mmsdrfs file. |
---|
415 | if [[ $printLine = true ]] |
---|
416 | then |
---|
417 | print_newLine >> $newsdrfs |
---|
418 | checkForErrors "writing to file $newsdrfs" $? |
---|
419 | fi |
---|
420 | |
---|
421 | IFS=":" # Change the separator back to ":" for the next iteration. |
---|
422 | |
---|
423 | done # end of while read -u3 |
---|
424 | |
---|
425 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
426 | |
---|
427 | # Go through the mmsdrfs file to update the NSD servers admin node names. |
---|
428 | $rm -f $tmpsdrfs |
---|
429 | IFS=":" |
---|
430 | exec 3<&- |
---|
431 | exec 3< $newsdrfs |
---|
432 | while read -u3 sdrfsLine |
---|
433 | do |
---|
434 | # Parse the line. |
---|
435 | set -f ; set -A v -- - $sdrfsLine ; set +f |
---|
436 | IFS="$IFS_sv" |
---|
437 | |
---|
438 | # Change some of the fields depending on the type of line. |
---|
439 | case ${v[$LINE_TYPE_Field]} in |
---|
440 | |
---|
441 | $SG_DISKS ) # This is the line for some disk. |
---|
442 | |
---|
443 | # If this disk is an NSD with a valid PVID value, |
---|
444 | # make sure the daemon nsd server names are recorded. |
---|
445 | if [[ ${v[$DISK_TYPE_Field]} = nsd && -n ${v[$PVID_Field]} ]] |
---|
446 | then |
---|
447 | # If a server node was specified, check that it is valid and |
---|
448 | # convert it to get the potentially new admin adapter name. |
---|
449 | # We determine whether a server was specified by checking for an |
---|
450 | # admin nsd server name, but we do not use that name for finding |
---|
451 | # the node information, since the old admin node name may |
---|
452 | # no longer exist as a result of the update we just did. |
---|
453 | # We use the daemon node name to find the node instead, |
---|
454 | # since mmchcluster -N does not change daemon node names. |
---|
455 | if [[ -n ${v[$NSD_PRIMARY_NODE_Field]} ]] |
---|
456 | then |
---|
457 | # If no daemon node name has yet been recorded for the |
---|
458 | # primary NSD server, determine and store it now. |
---|
459 | server=${v[$DAEMON_NSD_PRIMARY_Field]} |
---|
460 | if [[ -z $server ]] |
---|
461 | then |
---|
462 | server=$(checkAndConvertNodeValue \ |
---|
463 | ${v[$NSD_PRIMARY_NODE_Field]} $DAEMON_NODENAME_Field) |
---|
464 | checkForErrors "checkAndConvertNodeValue" $? |
---|
465 | v[$DAEMON_NSD_PRIMARY_Field]=$server |
---|
466 | fi |
---|
467 | # Use the primary server's daemon node name to obtain |
---|
468 | # the primary server's admin node name. |
---|
469 | v[$NSD_PRIMARY_NODE_Field]=$(checkAndConvertNodeValue \ |
---|
470 | $server $REL_HOSTNAME_Field $newsdrfs) |
---|
471 | checkForErrors "checkAndConvertNodeValue $server" $? |
---|
472 | fi |
---|
473 | if [[ -n ${v[$NSD_BACKUP_NODE_Field]} ]] |
---|
474 | then |
---|
475 | # If no daemon node name has yet been recorded for the |
---|
476 | # backup NSD server, determine and store it now. |
---|
477 | backup=${v[$DAEMON_NSD_BACKUP_Field]} |
---|
478 | if [[ -z $backup ]] |
---|
479 | then |
---|
480 | backup=$(checkAndConvertNodeValue \ |
---|
481 | ${v[$NSD_BACKUP_NODE_Field]} $DAEMON_NODENAME_Field) |
---|
482 | checkForErrors "checkAndConvertNodeValue" $? |
---|
483 | v[$DAEMON_NSD_BACKUP_Field]=$backup |
---|
484 | fi |
---|
485 | # Use the backup server's daemon node name to obtain |
---|
486 | # the backup server's admin node name. |
---|
487 | v[$NSD_BACKUP_NODE_Field]=$(checkAndConvertNodeValue \ |
---|
488 | $backup $REL_HOSTNAME_Field $newsdrfs) |
---|
489 | checkForErrors "checkAndConvertNodeValue $backup" $? |
---|
490 | fi |
---|
491 | fi # end of if (v[$DISK_TYPE_Field] == "nsd" && -n v[$PVID_Field]) |
---|
492 | ;; |
---|
493 | |
---|
494 | * ) # We are not interested in any other lines. |
---|
495 | ;; |
---|
496 | |
---|
497 | esac # end Change some of the fields |
---|
498 | |
---|
499 | # Build and write the line to the temp version of the mmsdrfs file. |
---|
500 | print_newLine >> $tmpsdrfs |
---|
501 | checkForErrors "writing to file $tmpsdrfs" $? |
---|
502 | |
---|
503 | IFS=":" # Change the separator back to ":" for the next iteration. |
---|
504 | |
---|
505 | done # end while read -u3 sdrfsLine |
---|
506 | |
---|
507 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
508 | |
---|
509 | # If a fatal error occurred, or if no changes were made, |
---|
510 | # release the lock, report any failed nodes, and return. |
---|
511 | if [[ -n $fatalError || -z $changeMade ]] |
---|
512 | then |
---|
513 | freeLockOnServer $primaryServer $ourNodeNumber >/dev/null |
---|
514 | if [[ -n $failedNodes ]] |
---|
515 | then |
---|
516 | # Administrative node names were not defined for nodes ... |
---|
517 | printErrorMsg 174 $mmcmd $failedNodes |
---|
518 | fi |
---|
519 | if [[ -n $fatalError ]] |
---|
520 | then |
---|
521 | printErrorMsg 389 $mmcmd # The command failed. |
---|
522 | else |
---|
523 | printErrorMsg 387 $mmcmd $mmcmd # Command quitting due to no valid nodes. |
---|
524 | fi |
---|
525 | return 1 |
---|
526 | fi |
---|
527 | |
---|
528 | # Create the updated version line and add it to the new sdrfs file. |
---|
529 | # The generation number is incremented and the server names may change. |
---|
530 | IFS=":" # Change the field separator to ':'. |
---|
531 | set -f ; set -A v -- - $versionLine ; set +f |
---|
532 | IFS="$IFS_sv" # Restore the default IFS setting. |
---|
533 | newGenNumber=${v[$SDRFS_GENNUM_Field]}+1 |
---|
534 | v[$SDRFS_GENNUM_Field]=$newGenNumber |
---|
535 | [[ -n $newPrimaryName ]] && v[$PRIMARY_SERVER_Field]=$newPrimaryName |
---|
536 | [[ -n $newBackupName ]] && v[$BACKUP_SERVER_Field]=$newBackupName |
---|
537 | print_newLine >> $tmpsdrfs |
---|
538 | checkForErrors "writing to file $tmpsdrfs" $? |
---|
539 | |
---|
540 | # If the GPFS and mmsdrserv daemons share the same tcp port number, |
---|
541 | # and the names of the primary or backup configuration servers are |
---|
542 | # changing, it is necessary to ensure that the GPFS daemon is down |
---|
543 | # on the server nodes and the mmsdrserv daemon is restarted. |
---|
544 | # Otherwise, the server nodes will continue giving (stale) Gpfs object |
---|
545 | # or return ESDR_NOT_SERVER errors. |
---|
546 | if [[ -n $sharedSdrservPort && ( -n $newPrimaryName || -n $newBackupName ) ]] |
---|
547 | then |
---|
548 | # Get the names of the config servers. |
---|
549 | print -- "${v[$PRIMARY_SERVER_Field]}\n${v[$BACKUP_SERVER_Field]}" > $tmpNodes |
---|
550 | checkForErrors "writing to file $tmpNodes" $? |
---|
551 | |
---|
552 | # Verify the daemon is down; do not lock the Gpfs object. |
---|
553 | printInfoMsg 453 |
---|
554 | verifyDaemonInactive $tmpNodes |
---|
555 | [[ $? -ne 0 ]] && return 1 |
---|
556 | |
---|
557 | commitOptions="initLocalNodeData,KILLSDRSERV" |
---|
558 | else |
---|
559 | commitOptions="initLocalNodeData" |
---|
560 | fi # end of if [[ -n $sharedSdrservPort ]] |
---|
561 | |
---|
562 | # Make sure the new sdrfs file is properly sorted. |
---|
563 | LC_ALL=C $SORT_MMSDRFS $tmpsdrfs -o $newsdrfs |
---|
564 | |
---|
565 | # Put the new mmsdrfs file into the sdr. This will make the newly-added |
---|
566 | # admin nodes visible to the rest of the nodes in the cluster. |
---|
567 | trap "" HUP INT QUIT KILL |
---|
568 | gpfsObjectInfo=$(commitChanges $nsId $nsId \ |
---|
569 | $gpfsObjectInfo $newGenNumber $newsdrfs $primaryServer $commitOptions) |
---|
570 | rc=$? |
---|
571 | if [[ $rc -ne 0 ]] |
---|
572 | then |
---|
573 | # We were unable to replace the file in the sdr. |
---|
574 | printErrorMsg 381 $mmcmd |
---|
575 | return 1 |
---|
576 | fi |
---|
577 | |
---|
578 | # Unlock the sdr. |
---|
579 | freeLockOnServer $primaryServer $ourNodeNumber >/dev/null |
---|
580 | trap posttrap HUP INT QUIT KILL |
---|
581 | |
---|
582 | # Propagate the new mmsdrfs file to all nodes in the cluster. |
---|
583 | # This process is asynchronous. |
---|
584 | propagateSdrfsFile async $nodefile $newsdrfs $newGenNumber initLocalNodeData |
---|
585 | |
---|
586 | # Report any nodes that did not check successfully. |
---|
587 | if [[ -n $failedNodes ]] |
---|
588 | then |
---|
589 | # Administrative node names were not defined for nodes ... |
---|
590 | printErrorMsg 174 $mmcmd $failedNodes |
---|
591 | fi |
---|
592 | |
---|
593 | return 0 |
---|
594 | |
---|
595 | } #----- end of function specifyAdminNetwork ------------------- |
---|
596 | |
---|
597 | |
---|
598 | ################################################################### |
---|
599 | # This function is called if there is an interrupt after the new |
---|
600 | # mmsdrfs file was committed on the new primary and backup servers |
---|
601 | # but before the change was propagated to the rest of the nodes. |
---|
602 | ################################################################### |
---|
603 | function localPosttrap |
---|
604 | { |
---|
605 | $mmTRACE_ENTER "$*" |
---|
606 | |
---|
607 | # Tell the guy which nodes must be up and which command to run. |
---|
608 | printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer" |
---|
609 | printErrorMsg 344 $mmcmd "mmchcluster -p LATEST" |
---|
610 | cleanupAndExit 2 |
---|
611 | |
---|
612 | } #----- end of function localPosttrap ------------------------ |
---|
613 | |
---|
614 | |
---|
615 | |
---|
616 | ###################### |
---|
617 | # Mainline processing |
---|
618 | ###################### |
---|
619 | |
---|
620 | |
---|
621 | ################################################### |
---|
622 | # Process the command arguments. |
---|
623 | ################################################### |
---|
624 | [[ $arg1 = '-?' || $arg1 = '-h' || $arg1 = '--help' || $arg1 = '--' ]] && \ |
---|
625 | syntaxError "help" $usageMsg |
---|
626 | |
---|
627 | [[ $argc -lt 2 ]] && \ |
---|
628 | syntaxError "missingArgs" $usageMsg |
---|
629 | |
---|
630 | while getopts :C:N:p:r:R:s: OPT |
---|
631 | do |
---|
632 | case $OPT in |
---|
633 | |
---|
634 | C) # cluster name |
---|
635 | [[ -n $Cflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
636 | Cflag="-$OPT" |
---|
637 | Carg=$OPTARG |
---|
638 | ;; |
---|
639 | |
---|
640 | N) # define/replace secondary network |
---|
641 | [[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
642 | Nflag="-$OPT" |
---|
643 | Narg=$OPTARG |
---|
644 | ;; |
---|
645 | |
---|
646 | p) # primary server |
---|
647 | [[ -n $pflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
648 | pflag="-$OPT" |
---|
649 | parg=$OPTARG |
---|
650 | otherOpt="-$OPT" |
---|
651 | ;; |
---|
652 | |
---|
653 | r) # remote shell command |
---|
654 | [[ -n $rflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
655 | rflag="-$OPT" |
---|
656 | rarg=$OPTARG |
---|
657 | [[ $rarg = ${rarg#/} ]] && \ |
---|
658 | syntaxError "absolutePath_2" $noUsageMsg "-$OPT" "$rarg" |
---|
659 | otherOpt="-$OPT" |
---|
660 | ;; |
---|
661 | |
---|
662 | R) # remote file copy command |
---|
663 | [[ -n $Rflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
664 | Rflag="-$OPT" |
---|
665 | Rarg=$OPTARG |
---|
666 | [[ $Rarg = ${Rarg#/} ]] && \ |
---|
667 | syntaxError "absolutePath_2" $noUsageMsg "-$OPT" "$Rarg" |
---|
668 | otherOpt="-$OPT" |
---|
669 | ;; |
---|
670 | |
---|
671 | s) # secondary server |
---|
672 | [[ -n $sflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
673 | sflag="-$OPT" |
---|
674 | sarg=$OPTARG |
---|
675 | otherOpt="-$OPT" |
---|
676 | ;; |
---|
677 | |
---|
678 | +[CNprRs]) # Invalid option |
---|
679 | syntaxError "invalidOption" $usageMsg $OPT |
---|
680 | ;; |
---|
681 | |
---|
682 | :) # Missing argument |
---|
683 | syntaxError "missingValue" $usageMsg $OPTARG |
---|
684 | ;; |
---|
685 | |
---|
686 | *) # Invalid option |
---|
687 | syntaxError "invalidOption" $usageMsg $OPTARG |
---|
688 | ;; |
---|
689 | esac |
---|
690 | |
---|
691 | done |
---|
692 | |
---|
693 | shift OPTIND-1 |
---|
694 | [[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1 |
---|
695 | |
---|
696 | [[ -n $sflag && $parg = LATEST ]] && \ |
---|
697 | syntaxError "invalidCombination" $usageMsg "-s" "-p LATEST" |
---|
698 | |
---|
699 | [[ -n $rflag && -n $pflag ]] && \ |
---|
700 | syntaxError "invalidCombination" $usageMsg "-r" "-p" |
---|
701 | |
---|
702 | [[ -n $rflag && -n $sflag ]] && \ |
---|
703 | syntaxError "invalidCombination" $usageMsg "-r" "-s" |
---|
704 | |
---|
705 | [[ -n $Rflag && -n $pflag ]] && \ |
---|
706 | syntaxError "invalidCombination" $usageMsg "-R" "-p" |
---|
707 | |
---|
708 | [[ -n $Rflag && -n $sflag ]] && \ |
---|
709 | syntaxError "invalidCombination" $usageMsg "-R" "-s" |
---|
710 | |
---|
711 | # The primary GPFS cluster configuration server cannot be removed. |
---|
712 | [[ -n $pflag && $parg = "" ]] && \ |
---|
713 | syntaxError "missingValue" $usageMsg "-p" |
---|
714 | |
---|
715 | [[ -n $Nflag && -n $otherOpt ]] && \ |
---|
716 | syntaxError "invalidCombination" $usageMsg "-N" "$otherOpt" |
---|
717 | |
---|
718 | [[ -n $Cflag && -n $otherOpt ]] && \ |
---|
719 | syntaxError "invalidCombination" $usageMsg "-C" "$otherOpt" |
---|
720 | |
---|
721 | |
---|
722 | ############################################################################# |
---|
723 | # If the request is to change a remote command, invoke the mmsetrcmd script. |
---|
724 | # Keep in mind that rarg and Rarg may include options for the respective |
---|
725 | # commands and, therefore, must always be quoted. |
---|
726 | ############################################################################# |
---|
727 | if [[ -n $rflag || -n $Rflag ]] |
---|
728 | then |
---|
729 | if [[ -z $Rflag ]] |
---|
730 | then |
---|
731 | $mmsetrcmd "$rflag" "$rarg" |
---|
732 | rc=$? |
---|
733 | elif [[ -z $rflag ]] |
---|
734 | then |
---|
735 | $mmsetrcmd "$Rflag" "$Rarg" |
---|
736 | rc=$? |
---|
737 | else |
---|
738 | $mmsetrcmd "$rflag" "$rarg" "$Rflag" "$Rarg" |
---|
739 | rc=$? |
---|
740 | fi |
---|
741 | cleanupAndExit $rc |
---|
742 | fi |
---|
743 | |
---|
744 | |
---|
745 | ############################################################# |
---|
746 | # If the request is to specify changes to the admin network, |
---|
747 | # invoke the function to do the work and exit. |
---|
748 | ############################################################# |
---|
749 | if [[ -n $Nflag ]] |
---|
750 | then |
---|
751 | specifyAdminNetwork "$Narg" |
---|
752 | cleanupAndExit $? |
---|
753 | fi |
---|
754 | |
---|
755 | |
---|
756 | ######################################################## |
---|
757 | # If the request is to change the cluster name, |
---|
758 | # invoke the mmsetrcmd script. |
---|
759 | ######################################################## |
---|
760 | if [[ -n $Cflag ]] |
---|
761 | then |
---|
762 | $mmsetrcmd "$Cflag" "$Carg" |
---|
763 | cleanupAndExit $? |
---|
764 | fi |
---|
765 | |
---|
766 | |
---|
767 | ################################################################# |
---|
768 | # Set up trap exception handling and call the gpfsInit function. |
---|
769 | # It will attempt to ensure that the local copy of the mmsdrfs |
---|
770 | # and the rest of the GPFS system files are up-to-date. |
---|
771 | # Try to get the lock but do not fail if this is not possible. |
---|
772 | ################################################################# |
---|
773 | trap pretrap HUP INT QUIT KILL |
---|
774 | |
---|
775 | if [[ $parg = LATEST ]] |
---|
776 | then |
---|
777 | # The LATEST keyword was specified. Try to obtain the |
---|
778 | # most recent mmsdrfs file (i.e., the mmsdrfs file with the |
---|
779 | # highest gen number) among all the nodes in the cluster. |
---|
780 | # To do that, use the local mmsdrfs file as a starting point. |
---|
781 | getNodeList $REL_HOSTNAME_Field $HOME_CLUSTER $mmsdrfsFile > $allNodes |
---|
782 | gpfsInitOutput=$(gpfsInitFromNonServer $allNodes $mmsdrfsFile) |
---|
783 | rc=$? |
---|
784 | |
---|
785 | else |
---|
786 | # The LATEST keyword was not specified. Try to obtain |
---|
787 | # the mmsdrfs file from one of the servers with locking. |
---|
788 | gpfsInitOutput=$(gpfsInit $lockId 2> $initErrors) |
---|
789 | rc=$? |
---|
790 | LOCAL_FILES="$LOCAL_FILES $initErrors " |
---|
791 | if [[ $rc -ne 0 ]] |
---|
792 | then |
---|
793 | # We failed to get the sdrfs file with a lock. Check whether |
---|
794 | # some other mm command currently holds the lock. If yes, give up. |
---|
795 | $grep -e "Timed out waiting for lock: Try again later." \ |
---|
796 | -e "6027-1229" $initErrors > /dev/null 2>&1 |
---|
797 | ec=$? |
---|
798 | if [[ $ec -eq 0 ]] |
---|
799 | then |
---|
800 | # Display the messages from gpfsInit. |
---|
801 | $cat $initErrors | \ |
---|
802 | $grep -v -e "6027-1227" -e "file is locked. Retrying..." 1>&2 |
---|
803 | cleanupAndExit |
---|
804 | fi |
---|
805 | |
---|
806 | # We failed to get the sdrfs file with a lock. Display any messages. |
---|
807 | $cat $initErrors 1>&2 |
---|
808 | # Processing continues. |
---|
809 | printErrorMsg 437 $mmcmd |
---|
810 | |
---|
811 | # Now try the gpfsInit again, but this time do not ask for a lock. |
---|
812 | # If there is a backup server, and if it is available, |
---|
813 | # we should be able to get the latest GPFS system files from there. |
---|
814 | gpfsInitOutput=$(gpfsInit nolock 2>/dev/null) |
---|
815 | rc=$? |
---|
816 | if [[ $rc -ne 0 ]] |
---|
817 | then |
---|
818 | # We also failed to get the sdrfs file without locking. Now try |
---|
819 | # to obtain the most recent mmsdrfs file (i.e., the mmsdrfs file |
---|
820 | # with the highest gen number) among all the nodes in the cluster. |
---|
821 | # To do that, use the local mmsdrfs file as a starting point. |
---|
822 | getNodeList $REL_HOSTNAME_Field $HOME_CLUSTER $mmsdrfsFile > $allNodes |
---|
823 | gpfsInitOutput=$(gpfsInitFromNonServer $allNodes $mmsdrfsFile) |
---|
824 | rc=$? |
---|
825 | fi |
---|
826 | fi |
---|
827 | fi # end of if [[ $parg = LATEST ]] |
---|
828 | |
---|
829 | # Check whether we succeeded in obtaining the desired mmsdrfs file. |
---|
830 | if [[ $rc -ne 0 ]] |
---|
831 | then |
---|
832 | # Not enough nodes are available. |
---|
833 | printErrorMsg 378 $mmcmd |
---|
834 | cleanupAndExit |
---|
835 | fi |
---|
836 | |
---|
837 | # Parse the output from the init function. |
---|
838 | setGlobalVar $rc $gpfsInitOutput |
---|
839 | |
---|
840 | if [[ $MMMODE = single ]] |
---|
841 | then |
---|
842 | # Command currently not valid for cluster type single. |
---|
843 | printErrorMsg 376 $mmcmd single |
---|
844 | cleanupAndExit |
---|
845 | fi |
---|
846 | |
---|
847 | if [[ $MMMODE != lc ]] |
---|
848 | then |
---|
849 | # Unknown GPFS nodeset type |
---|
850 | printErrorMsg 338 $mmcmd $MMMODE |
---|
851 | cleanupAndExit |
---|
852 | fi |
---|
853 | |
---|
854 | |
---|
855 | ####################################################### |
---|
856 | # Determine the reliable hostnames of the new servers. |
---|
857 | ####################################################### |
---|
858 | if [[ -n $pflag && $parg != LATEST ]] |
---|
859 | then |
---|
860 | # Find the name of the primary server. |
---|
861 | newPrimaryServer=$(checkAndConvertNodeValue $parg $REL_HOSTNAME_Field) |
---|
862 | if [[ $? -ne 0 ]] |
---|
863 | then |
---|
864 | printErrorMsg 352 $mmcmd $parg |
---|
865 | cleanupAndExit |
---|
866 | fi |
---|
867 | else |
---|
868 | # If -p not specified, the primary server remains the same. |
---|
869 | newPrimaryServer=$primaryServer |
---|
870 | fi # end of if [[ -n $parg && $parg != LATEST ]] |
---|
871 | |
---|
872 | if [[ -n $sflag ]] |
---|
873 | then |
---|
874 | if [[ -n $sarg ]] |
---|
875 | then |
---|
876 | # Find the name of the secondary server. |
---|
877 | newBackupServer=$(checkAndConvertNodeValue $sarg $REL_HOSTNAME_Field) |
---|
878 | if [[ $? -ne 0 ]] |
---|
879 | then |
---|
880 | printErrorMsg 352 $mmcmd $sarg |
---|
881 | cleanupAndExit |
---|
882 | fi |
---|
883 | else |
---|
884 | # We are deleting the backup server (-s "" was specified). |
---|
885 | newBackupServer="" |
---|
886 | fi |
---|
887 | else |
---|
888 | # If -s not specified, the backup server remains the same. |
---|
889 | newBackupServer=$backupServer |
---|
890 | fi # end of if [[ -n $sarg ]] |
---|
891 | |
---|
892 | # Cross check the two server names. |
---|
893 | if [[ $newBackupServer = $newPrimaryServer ]] |
---|
894 | then |
---|
895 | # The same node was specified as primary and backup server. |
---|
896 | printErrorMsg 346 $mmcmd |
---|
897 | cleanupAndExit |
---|
898 | fi |
---|
899 | |
---|
900 | # Check whether anything needs to be done at all. |
---|
901 | [[ $newPrimaryServer = $primaryServer && \ |
---|
902 | $newBackupServer = $backupServer && \ |
---|
903 | $parg != LATEST ]] && \ |
---|
904 | cleanupAndExit 0 # Servers are already as desired. |
---|
905 | |
---|
906 | |
---|
907 | ################################################################# |
---|
908 | # Go through the current mmsdrfs file. Increment the generation |
---|
909 | # number and change the server names. Create a file with the |
---|
910 | # reliable hostnames of all nodes in the cluster. |
---|
911 | ################################################################# |
---|
912 | $rm -f $newsdrfs $allNodes $clientNodes |
---|
913 | IFS=":" # Change the field separator to ':'. |
---|
914 | exec 3<&- |
---|
915 | exec 3< $mmsdrfsFile |
---|
916 | while read -u3 sdrfsLine |
---|
917 | do |
---|
918 | # Parse the line. |
---|
919 | set -f ; set -A v -- - $sdrfsLine ; set +f |
---|
920 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
921 | |
---|
922 | # Change some of the fields depending on the type of line. |
---|
923 | case ${v[$LINE_TYPE_Field]} in |
---|
924 | |
---|
925 | $VERSION_LINE ) |
---|
926 | # Increment the generation number. |
---|
927 | newGenNumber=${v[$SDRFS_GENNUM_Field]}+1 |
---|
928 | v[$SDRFS_GENNUM_Field]=$newGenNumber |
---|
929 | v[$PRIMARY_SERVER_Field]=$newPrimaryServer |
---|
930 | v[$BACKUP_SERVER_Field]=$newBackupServer |
---|
931 | ;; |
---|
932 | |
---|
933 | $NODESET_HDR ) |
---|
934 | # If the daemon and the mmsdrserv tcp ports are shared, |
---|
935 | # it will be necessary to ensure that the daemon is down |
---|
936 | # on the old and new config server nodes. |
---|
937 | if [[ -z ${v[$GETOBJECT_PORT_Field]} || |
---|
938 | ${v[$TCP_PORT_Field]} = ${v[$GETOBJECT_PORT_Field]} ]] |
---|
939 | then |
---|
940 | daemonMustBeDown=yes |
---|
941 | fi |
---|
942 | ;; |
---|
943 | |
---|
944 | $MEMBER_NODE ) |
---|
945 | # If this is our node, save the reliable name. |
---|
946 | [[ ${v[$NODE_NUMBER_Field]} = $ourNodeNumber ]] && \ |
---|
947 | ourNodeName=${v[$REL_HOSTNAME_Field]} |
---|
948 | |
---|
949 | # All nodes will go in the allNodes file. |
---|
950 | print -- "${v[$REL_HOSTNAME_Field]}" >> $allNodes |
---|
951 | checkForErrors "writing to file $allNodes" $? |
---|
952 | |
---|
953 | # The server nodes and the local node will |
---|
954 | # not go in the clientNodes file. |
---|
955 | if [[ ${v[$REL_HOSTNAME_Field]} != $newPrimaryServer && |
---|
956 | ${v[$REL_HOSTNAME_Field]} != $newBackupServer && |
---|
957 | ${v[$REL_HOSTNAME_Field]} != $ourNodeName ]] |
---|
958 | then |
---|
959 | print -- "${v[$REL_HOSTNAME_Field]}" >> $clientNodes |
---|
960 | checkForErrors "writing to file $clientNodes" $? |
---|
961 | fi |
---|
962 | ;; |
---|
963 | |
---|
964 | * ) # Pass all other lines without change. |
---|
965 | ;; |
---|
966 | |
---|
967 | esac # end Change some of the fields |
---|
968 | |
---|
969 | # Build and write the line to the new mmsdrfs file. |
---|
970 | print_newLine >> $newsdrfs |
---|
971 | checkForErrors "writing to file $newsdrfs" $? |
---|
972 | |
---|
973 | IFS=":" # Change the separator back to ":" for the next iteration. |
---|
974 | |
---|
975 | done # end of while read -u3 sdrfsLine |
---|
976 | |
---|
977 | IFS="$IFS_sv" # Restore the default IFS settings. |
---|
978 | |
---|
979 | |
---|
980 | ####################################################################### |
---|
981 | # If the GPFS and mmsdrserv daemons share the same tcp port number, |
---|
982 | # it is necessary to ensure that the GPFS daemon is down on the old |
---|
983 | # and new configuration server nodes. Otherwise, the old server nodes |
---|
984 | # will continue giving (stale) Gpfs object information, while the new |
---|
985 | # servers will not be able to respond to requests because the GPFS |
---|
986 | # daemon cannot assume mmsdrserv duties if it is already running. |
---|
987 | ####################################################################### |
---|
988 | if [[ -n $daemonMustBeDown && $parg != LATEST ]] |
---|
989 | then |
---|
990 | # Put the old and new server names in a file. |
---|
991 | print -- "$primaryServer\n$backupServer\n" \ |
---|
992 | "$newPrimaryServer\n$newBackupServer" > $tmpNodes |
---|
993 | checkForErrors "writing to file $tmpNodes" $? |
---|
994 | |
---|
995 | # Eliminate duplicate names. |
---|
996 | $sort -u $tmpNodes -o $tmpNodes |
---|
997 | checkForErrors "sort $tmpNodes" $? |
---|
998 | |
---|
999 | # Verify the daemon is down; do not lock the Gpfs object. |
---|
1000 | printInfoMsg 453 |
---|
1001 | verifyDaemonInactive $tmpNodes |
---|
1002 | [[ $? -ne 0 ]] && cleanupAndExit |
---|
1003 | fi # end of if [[ -n $daemonMustBeDown ]] |
---|
1004 | |
---|
1005 | |
---|
1006 | ###################################################### |
---|
1007 | # First, put the new mmsdrfs file on the two servers. |
---|
1008 | # This must succeed no matter what. |
---|
1009 | ###################################################### |
---|
1010 | trap "" HUP INT QUIT KILL |
---|
1011 | gpfsObjectInfo=$(commitChanges \ |
---|
1012 | $nsId $nsId $gpfsObjectInfo $newGenNumber $newsdrfs \ |
---|
1013 | $newPrimaryServer FORCE $newBackupServer) |
---|
1014 | rc=$? |
---|
1015 | if [[ $rc -ne 0 ]] |
---|
1016 | then |
---|
1017 | # Cannot replace file in the sdr. |
---|
1018 | printErrorMsg 381 $mmcmd |
---|
1019 | |
---|
1020 | # The mmchcluster failed - get out. |
---|
1021 | # Tell the guy which nodes must be up and which command to run. |
---|
1022 | printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer" |
---|
1023 | printErrorMsg 344 $mmcmd "mmchcluster" |
---|
1024 | cleanupAndExit |
---|
1025 | fi |
---|
1026 | |
---|
1027 | # Restore interrupts. |
---|
1028 | trap localPosttrap HUP INT QUIT KILL |
---|
1029 | |
---|
1030 | |
---|
1031 | ################################################# |
---|
1032 | # Propagate the changes to the non-server nodes. |
---|
1033 | ################################################# |
---|
1034 | if [[ $ourNodeName != $newPrimaryServer && |
---|
1035 | $ourNodeName != $newBackupServer ]] |
---|
1036 | then |
---|
1037 | $cp $newsdrfs $mmsdrfsFile |
---|
1038 | checkForErrors "writing to file $mmsdrfsFile" $? |
---|
1039 | fi |
---|
1040 | |
---|
1041 | if [[ -s $clientNodes ]] |
---|
1042 | then |
---|
1043 | # Calculate the checksum of the new mmsdrfs file. |
---|
1044 | sumOutput=$($sum $newsdrfs) |
---|
1045 | checkForErrors "sum $newsdrfs" $? |
---|
1046 | set -f ; set -- $sumOutput ; set +f |
---|
1047 | newSum=$1 |
---|
1048 | |
---|
1049 | #esjxx See if this can be replaced with pushSdr |
---|
1050 | # Tell all client nodes to copy the file from us. |
---|
1051 | $mmcommon onall $clientNodes $unreachedNodes copyRemoteFile \ |
---|
1052 | $ourNodeName $mmsdrfsFile $mmsdrfsFile $newSum > $tmpfile 2>&1 |
---|
1053 | rc=$? |
---|
1054 | |
---|
1055 | # Make a list of the nodes that were successfully updated. For each |
---|
1056 | # such node there will be a line in tmpfile that looks like this: |
---|
1057 | # nodename: copyRemoteFile:0 |
---|
1058 | updatedNodes=$($awk -F: ' { \ |
---|
1059 | if (($2 ~ "copyRemoteFile") && ($3 == "0")) { \ |
---|
1060 | { print $1 } \ |
---|
1061 | } \ |
---|
1062 | } ' $tmpfile) |
---|
1063 | checkForErrors awk $? |
---|
1064 | |
---|
1065 | # Determine the nodes that did not get the new data. |
---|
1066 | exec 3<&- |
---|
1067 | exec 3< $clientNodes |
---|
1068 | while read -u3 nodeName |
---|
1069 | do |
---|
1070 | for goodNode in $updatedNodes |
---|
1071 | do |
---|
1072 | [[ $nodeName = $goodNode ]] && \ |
---|
1073 | break |
---|
1074 | done |
---|
1075 | |
---|
1076 | [[ $nodeName != $goodNode ]] && \ |
---|
1077 | failedNodes="${failedNodes}\n\t${nodeName}" |
---|
1078 | done |
---|
1079 | |
---|
1080 | # If any nodes failed, put out as much information as possible. |
---|
1081 | if [[ -n $failedNodes ]] |
---|
1082 | then |
---|
1083 | # Collect error messages, if any, in file tmpfile2. |
---|
1084 | $grep -v "copyRemoteFile:" $tmpfile > $tmpfile2 |
---|
1085 | [[ -s $tmpfile2 ]] && \ |
---|
1086 | $cat $tmpfile2 1>&2 |
---|
1087 | |
---|
1088 | # Tell the user which nodes failed. |
---|
1089 | printErrorMsg 377 $mmcmd "$failedNodes" |
---|
1090 | # Tell the guy which nodes must be up and which command to run. |
---|
1091 | printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer" |
---|
1092 | printErrorMsg 344 $mmcmd "mmchcluster -p LATEST" |
---|
1093 | cleanupAndExit |
---|
1094 | fi # end if [[ -n $failedNodes ]] |
---|
1095 | |
---|
1096 | fi # end if [[ ! -s $clientNodes ]] |
---|
1097 | |
---|
1098 | |
---|
1099 | ############################## |
---|
1100 | # Unlock the sdr. |
---|
1101 | ############################## |
---|
1102 | [[ $sdrLocked = yes ]] && \ |
---|
1103 | freeLockOnServer $primaryServer $ourNodeNumber > /dev/null |
---|
1104 | sdrLocked=no |
---|
1105 | trap posttrap HUP INT QUIT KILL |
---|
1106 | |
---|
1107 | # Issue "command was successful" message. |
---|
1108 | printErrorMsg 272 $mmcmd |
---|
1109 | cleanupAndExit 0 |
---|
1110 | |
---|