Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

mmchcluster @ 214

Last change on this file since 214 was 16, checked in by rock, 17 years ago

Property svn:executable set to ``*
File size: 36.9 KB

Line
1	#!/bin/ksh
2	# IBM_PROLOG_BEGIN_TAG
3	# This is an automatically generated prolog.
4	#
5	#
6	#
7	# Licensed Materials - Property of IBM
8	#
9	# (C) COPYRIGHT International Business Machines Corp. 2000,2007
10	# All Rights Reserved
11	#
12	# US Government Users Restricted Rights - Use, duplication or
13	# disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
14	#
15	# IBM_PROLOG_END_TAG
16	# @(#)11 1.59.1.3 src/avs/fs/mmfs/ts/admin/mmchcluster.sh, mmfs, avs_rgpfs24, rgpfs24s009a 12/19/06 13:10:44
17	###############################################################################
18	#
19	# Usage:
20	# mmchcluster {[-p PrimaryServer] [-s SecondaryServer]}
21	# or
22	# mmchcluster -p LATEST
23	# or
24	# mmchcluster {[-r RemoteShellCommand] [-R RemoteFileCopyCommand]}
25	# or
26	# mmchcluster -C ClusterName
27	# or
28	# mmchcluster -N {NodeDesc[,NodeDesc...] \| NodeFile}
29	#
30	# where:
31	#
32	# -p PrimaryServer specifies the node to be used as the primary server
33	# of the GPFS sdrfs data for this cluster.
34	#
35	# LATEST requests a check to be made that all currently
36	# available nodes point to the correct primary and
37	# backup server.
38	#
39	# -s SecondaryServer specifies the node to be used as the backup server
40	# of the GPFS sdrfs data for this cluster (optional).
41	# To remove a backup server, specify -s "".
42	#
43	# -r RemoteShellCommand specifies the fully qualified pathname for
44	# the remote shell program to be used by GPFS.
45	# The default is /usr/bin/rsh.
46	#
47	# -R RemoteFileCopyCommand specifies the fully qualified pathname for
48	# the remote file copy program to be used by GPFS.
49	# The default is /usr/bin/rcp.
50	#
51	# -C ClusterName specifies a new name for the cluster. If the name
52	# contains dots it is assumed to be a fully qualified
53	# domain name. Otherwise, the domain will default
54	# to the domain of the primary configuration server.
55	#
56	# -N NodeDesc,NodeDesc,... specifies a comma-separated list of node
57	# descriptors that specify the admin node
58	# interfaces to be used in the cluster.
59	# The node descriptors have the format:
60	# daemonNodeName:nodeRoles:adminNodeName:
61	# The nodeRoles field is currently just a place-holder
62	# and is ignored.
63	#
64	# -N NodeFile specifies a file of node descriptors that specify
65	# the admin node interfaces to be used in the cluster.
66	# The lines in the input file have the format:
67	# daemonNodeName:nodeRoles:adminNodeName:
68	# The nodeRoles field is currently just a place-holder
69	# and is ignored.
70	#
71	# Note: When used with the -p or -s options, this command will most
72	# likely be needed when the current primary server is not available
73	# and it will be impossible to obtain the sdr lock and protect
74	# against concurrent execution of some other mm command.
75	# Under such conditions, the user must assure that no other mm
76	# command is run until the completion of the mmchcluster command
77	# and that as many of the remaining nodes as possible are available.
78	#
79	###############################################################################
80
81	# Include global declarations and service routines.
82	. /usr/lpp/mmfs/bin/mmglobfuncs
83	. /usr/lpp/mmfs/bin/mmsdrfsdef
84
85	sourceFile="mmchcluster.sh"
86	[[ -n $DEBUG \|\| -n $DEBUGmmchcluster ]] && set -x
87	$mmTRACE_ENTER "$*"
88
89
90	# Local work files. Names should be of the form:
91	# fn=${tmpDir}fn.${mmcmd}.$$
92	allNodes=${tmpDir}allNodes.${mmcmd}.$$
93	clientNodes=${tmpDir}clientNodes.${mmcmd}.$$
94	inputNodes=${tmpDir}inputNodes.${mmcmd}.$$
95	processedNodes=${tmpDir}processedNodes.${mmcmd}.$$
96	initErrors=${tmpDir}initErrors.${mmcmd}.$$
97	# Note: Do not include initErrors in LOCAL_FILES yet; we'll do it later.
98
99	LOCAL_FILES=" $allNodes $clientNodes $inputNodes $processedNodes "
100
101
102	# Local declarations
103
104	usageMsg=359
105	newNodeNumbers=""
106	backupServer=""
107	rshPath=""
108	rcpPath=""
109	integer nodeCount
110	integer n
111	rc=0
112
113	Cflag=""
114	Nflag=""
115	pflag=""
116	rflag=""
117	Rflag=""
118	sflag=""
119	Carg=""
120	parg=""
121	rarg=""
122	Rarg=""
123	sarg=""
124	otherOpt=""
125
126
127	# Local functions
128
129
130	##########################################################################
131	#
132	# Function: Specify the admin network for the GPFS cluster.
133	#
134	# Input: $1 - file or list of node descriptors containing the
135	# adapter information as follows:
136	# daemonNodeName:nodeRoles:adminNodeName:
137	#
138	# Returns: 0 - no errors encountered
139	# non-zero - unexpected error
140	#
141	##########################################################################
142	function specifyAdminNetwork # <networkInfo>
143	{
144	typeset sourceFile="mmchcluster.sh"
145	[[ -n $DEBUG \|\| -n $DEBUGspecifyAdminNetwork ]] && set -x
146	$mmTRACE_ENTER "$*"
147	typeset networkInfo="$1"
148
149	typeset failedNodes sdrfsLine mmcommonOutput
150	typeset nodeLine nodeName nodeName2 nodeStatus
151	# typeset nodeRoles
152	typeset hostResult nodeNumber adminNodeName adminIpa
153	typeset nodeError newPrimaryName newBackupName commitOptions
154
155	typeset rc=0
156	typeset changeMade=""
157	typeset fatalError=""
158	typeset sharedSdrservPort=""
159
160	# The input parameter may be either a list or a file. Which is it?
161	if [[ -f $networkInfo ]]
162	then
163	# It is a file; verify its existence and create our own copy.
164	checkUserFile $networkInfo $inputNodes
165	[[ $? -ne 0 ]] && cleanupAndExit
166	else
167	# It is not a file, so it must be a list.
168	# Convert the input node list into a file.
169	$rm -f $inputNodes
170	IFS=','
171	for nodeDesc in $networkInfo
172	do
173	print -- "$nodeDesc" >> $inputNodes
174	checkForErrors "writing to $inputNodes" $?
175	done
176	IFS="$IFS_sv" # Restore the default IFS setting.
177	fi
178
179	# Check the input data for correctness.
180	# We check all the records rather than stop on the first error.
181	$rm -f $processedNodes
182	$touch $processedNodes # Ensure the tmp file exists even if empty.
183	IFS=":" # Change the field separator to ':'.
184	exec 3<&-
185	exec 3< $inputNodes
186	while read -u3 nodeLine
187	do
188	# Parse the line.
189	set -f ; set -- $nodeLine ; set +f
190	nodeName=$1
191	# nodeRoles=$2
192	nodeName2=$3
193	IFS="$IFS_sv" # Restore the default IFS setting.
194
195	# Make sure neither node name is specified more than once.
196	$grep -qw $nodeName $processedNodes > /dev/null 2>&1
197	if [[ $? -eq 0 ]]
198	then
199	# The node name is specified twice.
200	printErrorMsg 347 $mmcmd $nodeName
201	fatalError=yes
202	fi
203
204	# Check the admin node name if it was specified.
205	if [[ -n $nodeName2 && $nodeName2 != $nodeName ]]
206	then
207	$grep -qw $nodeName2 $processedNodes > /dev/null 2>&1
208	if [[ $? -eq 0 ]]
209	then
210	# The node is specified twice.
211	printErrorMsg 347 $mmcmd $nodeName2
212	fatalError=yes
213	fi
214	fi # end of if [[ -n $nodeName2 && $nodeName2 != $nodeName ]]
215
216	# Add the node names to the list of processed nodes.
217	print -- "${nodeName}:${nodeName2}" >> $processedNodes
218	checkForErrors "Writing to file $processedNodes" $?
219
220	IFS=":" # Change the separator back to ":" for the next iteration.
221
222	done # end of while read -u3 nodeLine
223
224	IFS="$IFS_sv" # Restore the default IFS settings.
225
226	# Return to the caller if we encountered an error.
227	[[ -n $fatalError ]] && return 1
228
229	# Ensure that the local copy of the mmsdrfs is up-to-date.
230	# Set up trap exception handling and obtain the lock.
231	trap pretrap HUP INT QUIT KILL
232	gpfsInitOutput=$(gpfsInit $lockId)
233	setGlobalVar $? $gpfsInitOutput
234
235	# Stop here if the admin network support has not been activated yet.
236	if [[ $sdrfsFormatLevel -eq 0 ]]
237	then
238	print -u2 "$mmcmd: The separate administration network support has not been enabled yet."
239	print -u2 " Run \"mmchconfig release=LATEST\" to activate the new function."
240	cleanupAndExit
241	fi
242
243	# Determine the lookup order for resolving host names.
244	[[ $osName != AIX ]] && resolveOrder=$(setHostResolveOrder)
245
246	# Go through the current mmsdrfs file. Increment the generation
247	# number and build the node name list that will be needed later.
248	# Remove all admin network related information.
249	$rm -f $newsdrfs $nodefile
250	newPrimaryName=""
251	newBackupName=""
252	IFS=":" # Change the field separator to ':'.
253	exec 3<&-
254	exec 3< $mmsdrfsFile
255	while read -u3 sdrfsLine
256	do
257	# Parse the line.
258	set -f ; set -A v -- - $sdrfsLine ; set +f
259
260	IFS="$IFS_sv" # Restore the default IFS settings.
261	printLine=true # Assume the line will be printed.
262
263	case ${v[$LINE_TYPE_Field]} in
264
265	$VERSION_LINE ) # This is the global header line.
266	# Save the version line for updating later.
267	versionLine=$(print_newLine)
268	printLine=false
269	;;
270
271	$NODESET_HDR )
272	# If the daemon and the mmsdrserv tcp ports are shared,
273	# it will be necessary to ensure that the daemon is down
274	# on the config server nodes if there names will change.
275	if [[ -z ${v[$GETOBJECT_PORT_Field]} \|\|
276	${v[$TCP_PORT_Field]} = ${v[$GETOBJECT_PORT_Field]} ]]
277	then
278	sharedSdrservPort=yes
279	fi
280	;;
281
282	$MEMBER_NODE ) # This line describes a node.
283	# Add the reliable node name to nodefile.
284	print -- "${v[$REL_HOSTNAME_Field]}" >> $nodefile
285	checkForErrors "writing to file $nodefile" $?
286
287	# Reset the node error flag.
288	nodeError=""
289
290	# Obtain the data for this node from the node file.
291	nodeLine=$($awk -F: ' \
292	$1 == "'${v[$DAEMON_NODENAME_Field]}'" \|\| \
293	$1 == "'${v[$REL_HOSTNAME_Field]}'" \|\| \
294	$1 == "'${v[$NODE_NAME_Field]}'" \|\| \
295	$1 == "'${v[$ADMIN_SHORTNAME_Field]}'" \|\| \
296	$1 == "'${v[$NODE_NUMBER_Field]}'" \|\| \
297	$1 == "'${v[$IPA_Field]}'" { \
298	{ print $0 } \
299	{ exit } \
300	} \
301	' $inputNodes)
302
303	if [[ -n $nodeLine ]]
304	then
305	# We found data for this node. Parse the input.
306	IFS=":" # Change the field separator to ':'.
307	set -f ; set -- $nodeLine ; set +f
308	nodeName=$1
309	nodeName2=$3
310	IFS="$IFS_sv" # Restore the default IFS setting.
311
312	# Determine the daemon node name.
313	if [[ -n ${v[$DAEMON_NODENAME_Field]} ]]
314	then
315	daemonNodeName=${v[$DAEMON_NODENAME_Field]}
316	else
317	daemonNodeName=${v[$REL_HOSTNAME_Field]}
318	fi
319
320	# Did the user reset or specify the admin node name?
321	if [[ -z $nodeName2 ]]
322	then
323	# The admin node name was null, indicating "reset";
324	# set the admin node name to the daemon node name value.
325	adminNodeName=$daemonNodeName
326	adminShortName=${v[$NODE_NAME_Field]}
327
328	else
329	# The admin node name was not null, indicating "specify";
330	# Determine the IP address for the specified admin node name.
331	hostResult=$($host $nodeName2)
332	set -f ; set -- $hostResult ; set +f
333	adminNodeName=$1
334	adminShortName=${1%% \|.} # Exclude everything after the first dot.
335	adminIpa=${3%%,*}
336
337	# Check that the admin node name has a valid IP address.
338	if [[ -z $adminIpa ]]
339	then
340	# An invalid node name was specified.
341	printErrorMsg 54 $mmcmd $nodeName2
342	fatalError=yes
343	break
344	fi
345
346	# Invoke the checkAdapter function to ensure that
347	# the specified adapter interface exists on the node.
348	mmcommonOutput=$($mmcommon on1 ${v[$REL_HOSTNAME_Field]} \
349	checkAdapter $adminIpa 2> $errMsg)
350	rc=$?
351	set -f ; set -- $mmcommonOutput ; set +f
352	nodeStatus=$1
353	if [[ $rc != 0 \|\| $nodeStatus != success ]]
354	then
355	# The checkAdapter call failed.
356	# We will not define a new admin node name for this node
357	# but we will continue to process the remaining nodes.
358	# Tell the world what went wrong with this node.
359	if [[ $nodeStatus = ipa_alias ]]
360	then
361	# IP address aliasing is not supported.
362	printErrorMsg 476 $mmcmd $nodeName2
363	elif [[ $nodeStatus = ipa_missing ]]
364	then
365	# The admin IP address is not known on the node.
366	printErrorMsg 154 $mmcmd $nodeName2 ${v[$REL_HOSTNAME_Field]}
367	elif [[ $rc = $MM_HostDown \|\| $rc = $MM_ConnectTimeout ]]
368	then
369	# The node cannot be reached.
370	printErrorMsg 340 $mmcmd ${v[$REL_HOSTNAME_Field]}
371	else
372	# Unexpected error. Display all possible error messages.
373	[[ -s $errMsg ]] && $cat $errMsg 1>&2
374	[[ $rc -eq 0 ]] && rc=1
375	checkForErrors "checkAdapter ${v[$REL_HOSTNAME_Field]}" $rc
376	fi
377
378	# Append the node name to the list of failed nodes and
379	# set a flag to indicate the node name did not check out.
380	failedNodes="${failedNodes}\n\t${nodeName}"
381	nodeError=yes
382
383	fi # end of if [[ $rc != 0 \|\| $nodeStatus != success ]]
384
385	fi # end of if [[ -z $nodeName2 ]]
386
387	# Update the member line if there was no error.
388	if [[ -z $nodeError ]]
389	then
390	# Remember the new primary or backup server name for updating
391	# the version line later if this is one of those servers.
392	[[ ${v[$REL_HOSTNAME_Field]} = $primaryServer ]] && \
393	newPrimaryName=$adminNodeName
394	[[ ${v[$REL_HOSTNAME_Field]} = $backupServer ]] && \
395	newBackupName=$adminNodeName
396
397	# Things checked out ok. Set the node name fields.
398	v[$DAEMON_NODENAME_Field]=$daemonNodeName
399	v[$REL_HOSTNAME_Field]=$adminNodeName
400	v[$ADMIN_SHORTNAME_Field]=$adminShortName
401	changeMade=yes
402	fi
403
404	$rm -f $errMsg
405
406	fi # end of if [[ -n $nodeLine ]]
407	;;
408
409	* ) # We are not interested in any other lines.
410	;;
411
412	esac # end of case ${v[$LINE_TYPE_Field]} in
413
414	# Unless suppressed, write the line to the new mmsdrfs file.
415	if [[ $printLine = true ]]
416	then
417	print_newLine >> $newsdrfs
418	checkForErrors "writing to file $newsdrfs" $?
419	fi
420
421	IFS=":" # Change the separator back to ":" for the next iteration.
422
423	done # end of while read -u3
424
425	IFS="$IFS_sv" # Restore the default IFS settings.
426
427	# Go through the mmsdrfs file to update the NSD servers admin node names.
428	$rm -f $tmpsdrfs
429	IFS=":"
430	exec 3<&-
431	exec 3< $newsdrfs
432	while read -u3 sdrfsLine
433	do
434	# Parse the line.
435	set -f ; set -A v -- - $sdrfsLine ; set +f
436	IFS="$IFS_sv"
437
438	# Change some of the fields depending on the type of line.
439	case ${v[$LINE_TYPE_Field]} in
440
441	$SG_DISKS ) # This is the line for some disk.
442
443	# If this disk is an NSD with a valid PVID value,
444	# make sure the daemon nsd server names are recorded.
445	if [[ ${v[$DISK_TYPE_Field]} = nsd && -n ${v[$PVID_Field]} ]]
446	then
447	# If a server node was specified, check that it is valid and
448	# convert it to get the potentially new admin adapter name.
449	# We determine whether a server was specified by checking for an
450	# admin nsd server name, but we do not use that name for finding
451	# the node information, since the old admin node name may
452	# no longer exist as a result of the update we just did.
453	# We use the daemon node name to find the node instead,
454	# since mmchcluster -N does not change daemon node names.
455	if [[ -n ${v[$NSD_PRIMARY_NODE_Field]} ]]
456	then
457	# If no daemon node name has yet been recorded for the
458	# primary NSD server, determine and store it now.
459	server=${v[$DAEMON_NSD_PRIMARY_Field]}
460	if [[ -z $server ]]
461	then
462	server=$(checkAndConvertNodeValue \
463	${v[$NSD_PRIMARY_NODE_Field]} $DAEMON_NODENAME_Field)
464	checkForErrors "checkAndConvertNodeValue" $?
465	v[$DAEMON_NSD_PRIMARY_Field]=$server
466	fi
467	# Use the primary server's daemon node name to obtain
468	# the primary server's admin node name.
469	v[$NSD_PRIMARY_NODE_Field]=$(checkAndConvertNodeValue \
470	$server $REL_HOSTNAME_Field $newsdrfs)
471	checkForErrors "checkAndConvertNodeValue $server" $?
472	fi
473	if [[ -n ${v[$NSD_BACKUP_NODE_Field]} ]]
474	then
475	# If no daemon node name has yet been recorded for the
476	# backup NSD server, determine and store it now.
477	backup=${v[$DAEMON_NSD_BACKUP_Field]}
478	if [[ -z $backup ]]
479	then
480	backup=$(checkAndConvertNodeValue \
481	${v[$NSD_BACKUP_NODE_Field]} $DAEMON_NODENAME_Field)
482	checkForErrors "checkAndConvertNodeValue" $?
483	v[$DAEMON_NSD_BACKUP_Field]=$backup
484	fi
485	# Use the backup server's daemon node name to obtain
486	# the backup server's admin node name.
487	v[$NSD_BACKUP_NODE_Field]=$(checkAndConvertNodeValue \
488	$backup $REL_HOSTNAME_Field $newsdrfs)
489	checkForErrors "checkAndConvertNodeValue $backup" $?
490	fi
491	fi # end of if (v[$DISK_TYPE_Field] == "nsd" && -n v[$PVID_Field])
492	;;
493
494	* ) # We are not interested in any other lines.
495	;;
496
497	esac # end Change some of the fields
498
499	# Build and write the line to the temp version of the mmsdrfs file.
500	print_newLine >> $tmpsdrfs
501	checkForErrors "writing to file $tmpsdrfs" $?
502
503	IFS=":" # Change the separator back to ":" for the next iteration.
504
505	done # end while read -u3 sdrfsLine
506
507	IFS="$IFS_sv" # Restore the default IFS settings.
508
509	# If a fatal error occurred, or if no changes were made,
510	# release the lock, report any failed nodes, and return.
511	if [[ -n $fatalError \|\| -z $changeMade ]]
512	then
513	freeLockOnServer $primaryServer $ourNodeNumber >/dev/null
514	if [[ -n $failedNodes ]]
515	then
516	# Administrative node names were not defined for nodes ...
517	printErrorMsg 174 $mmcmd $failedNodes
518	fi
519	if [[ -n $fatalError ]]
520	then
521	printErrorMsg 389 $mmcmd # The command failed.
522	else
523	printErrorMsg 387 $mmcmd $mmcmd # Command quitting due to no valid nodes.
524	fi
525	return 1
526	fi
527
528	# Create the updated version line and add it to the new sdrfs file.
529	# The generation number is incremented and the server names may change.
530	IFS=":" # Change the field separator to ':'.
531	set -f ; set -A v -- - $versionLine ; set +f
532	IFS="$IFS_sv" # Restore the default IFS setting.
533	newGenNumber=${v[$SDRFS_GENNUM_Field]}+1
534	v[$SDRFS_GENNUM_Field]=$newGenNumber
535	[[ -n $newPrimaryName ]] && v[$PRIMARY_SERVER_Field]=$newPrimaryName
536	[[ -n $newBackupName ]] && v[$BACKUP_SERVER_Field]=$newBackupName
537	print_newLine >> $tmpsdrfs
538	checkForErrors "writing to file $tmpsdrfs" $?
539
540	# If the GPFS and mmsdrserv daemons share the same tcp port number,
541	# and the names of the primary or backup configuration servers are
542	# changing, it is necessary to ensure that the GPFS daemon is down
543	# on the server nodes and the mmsdrserv daemon is restarted.
544	# Otherwise, the server nodes will continue giving (stale) Gpfs object
545	# or return ESDR_NOT_SERVER errors.
546	if [[ -n $sharedSdrservPort && ( -n $newPrimaryName \|\| -n $newBackupName ) ]]
547	then
548	# Get the names of the config servers.
549	print -- "${v[$PRIMARY_SERVER_Field]}\n${v[$BACKUP_SERVER_Field]}" > $tmpNodes
550	checkForErrors "writing to file $tmpNodes" $?
551
552	# Verify the daemon is down; do not lock the Gpfs object.
553	printInfoMsg 453
554	verifyDaemonInactive $tmpNodes
555	[[ $? -ne 0 ]] && return 1
556
557	commitOptions="initLocalNodeData,KILLSDRSERV"
558	else
559	commitOptions="initLocalNodeData"
560	fi # end of if [[ -n $sharedSdrservPort ]]
561
562	# Make sure the new sdrfs file is properly sorted.
563	LC_ALL=C $SORT_MMSDRFS $tmpsdrfs -o $newsdrfs
564
565	# Put the new mmsdrfs file into the sdr. This will make the newly-added
566	# admin nodes visible to the rest of the nodes in the cluster.
567	trap "" HUP INT QUIT KILL
568	gpfsObjectInfo=$(commitChanges $nsId $nsId \
569	$gpfsObjectInfo $newGenNumber $newsdrfs $primaryServer $commitOptions)
570	rc=$?
571	if [[ $rc -ne 0 ]]
572	then
573	# We were unable to replace the file in the sdr.
574	printErrorMsg 381 $mmcmd
575	return 1
576	fi
577
578	# Unlock the sdr.
579	freeLockOnServer $primaryServer $ourNodeNumber >/dev/null
580	trap posttrap HUP INT QUIT KILL
581
582	# Propagate the new mmsdrfs file to all nodes in the cluster.
583	# This process is asynchronous.
584	propagateSdrfsFile async $nodefile $newsdrfs $newGenNumber initLocalNodeData
585
586	# Report any nodes that did not check successfully.
587	if [[ -n $failedNodes ]]
588	then
589	# Administrative node names were not defined for nodes ...
590	printErrorMsg 174 $mmcmd $failedNodes
591	fi
592
593	return 0
594
595	} #----- end of function specifyAdminNetwork -------------------
596
597
598	###################################################################
599	# This function is called if there is an interrupt after the new
600	# mmsdrfs file was committed on the new primary and backup servers
601	# but before the change was propagated to the rest of the nodes.
602	###################################################################
603	function localPosttrap
604	{
605	$mmTRACE_ENTER "$*"
606
607	# Tell the guy which nodes must be up and which command to run.
608	printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer"
609	printErrorMsg 344 $mmcmd "mmchcluster -p LATEST"
610	cleanupAndExit 2
611
612	} #----- end of function localPosttrap ------------------------
613
614
615
616	######################
617	# Mainline processing
618	######################
619
620
621	###################################################
622	# Process the command arguments.
623	###################################################
624	[[ $arg1 = '-?' \|\| $arg1 = '-h' \|\| $arg1 = '--help' \|\| $arg1 = '--' ]] && \
625	syntaxError "help" $usageMsg
626
627	[[ $argc -lt 2 ]] && \
628	syntaxError "missingArgs" $usageMsg
629
630	while getopts :C:N:p:r:R:s: OPT
631	do
632	case $OPT in
633
634	C) # cluster name
635	[[ -n $Cflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
636	Cflag="-$OPT"
637	Carg=$OPTARG
638	;;
639
640	N) # define/replace secondary network
641	[[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
642	Nflag="-$OPT"
643	Narg=$OPTARG
644	;;
645
646	p) # primary server
647	[[ -n $pflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
648	pflag="-$OPT"
649	parg=$OPTARG
650	otherOpt="-$OPT"
651	;;
652
653	r) # remote shell command
654	[[ -n $rflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
655	rflag="-$OPT"
656	rarg=$OPTARG
657	[[ $rarg = ${rarg#/} ]] && \
658	syntaxError "absolutePath_2" $noUsageMsg "-$OPT" "$rarg"
659	otherOpt="-$OPT"
660	;;
661
662	R) # remote file copy command
663	[[ -n $Rflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
664	Rflag="-$OPT"
665	Rarg=$OPTARG
666	[[ $Rarg = ${Rarg#/} ]] && \
667	syntaxError "absolutePath_2" $noUsageMsg "-$OPT" "$Rarg"
668	otherOpt="-$OPT"
669	;;
670
671	s) # secondary server
672	[[ -n $sflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
673	sflag="-$OPT"
674	sarg=$OPTARG
675	otherOpt="-$OPT"
676	;;
677
678	+[CNprRs]) # Invalid option
679	syntaxError "invalidOption" $usageMsg $OPT
680	;;
681
682	:) # Missing argument
683	syntaxError "missingValue" $usageMsg $OPTARG
684	;;
685
686	*) # Invalid option
687	syntaxError "invalidOption" $usageMsg $OPTARG
688	;;
689	esac
690
691	done
692
693	shift OPTIND-1
694	[[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1
695
696	[[ -n $sflag && $parg = LATEST ]] && \
697	syntaxError "invalidCombination" $usageMsg "-s" "-p LATEST"
698
699	[[ -n $rflag && -n $pflag ]] && \
700	syntaxError "invalidCombination" $usageMsg "-r" "-p"
701
702	[[ -n $rflag && -n $sflag ]] && \
703	syntaxError "invalidCombination" $usageMsg "-r" "-s"
704
705	[[ -n $Rflag && -n $pflag ]] && \
706	syntaxError "invalidCombination" $usageMsg "-R" "-p"
707
708	[[ -n $Rflag && -n $sflag ]] && \
709	syntaxError "invalidCombination" $usageMsg "-R" "-s"
710
711	# The primary GPFS cluster configuration server cannot be removed.
712	[[ -n $pflag && $parg = "" ]] && \
713	syntaxError "missingValue" $usageMsg "-p"
714
715	[[ -n $Nflag && -n $otherOpt ]] && \
716	syntaxError "invalidCombination" $usageMsg "-N" "$otherOpt"
717
718	[[ -n $Cflag && -n $otherOpt ]] && \
719	syntaxError "invalidCombination" $usageMsg "-C" "$otherOpt"
720
721
722	#############################################################################
723	# If the request is to change a remote command, invoke the mmsetrcmd script.
724	# Keep in mind that rarg and Rarg may include options for the respective
725	# commands and, therefore, must always be quoted.
726	#############################################################################
727	if [[ -n $rflag \|\| -n $Rflag ]]
728	then
729	if [[ -z $Rflag ]]
730	then
731	$mmsetrcmd "$rflag" "$rarg"
732	rc=$?
733	elif [[ -z $rflag ]]
734	then
735	$mmsetrcmd "$Rflag" "$Rarg"
736	rc=$?
737	else
738	$mmsetrcmd "$rflag" "$rarg" "$Rflag" "$Rarg"
739	rc=$?
740	fi
741	cleanupAndExit $rc
742	fi
743
744
745	#############################################################
746	# If the request is to specify changes to the admin network,
747	# invoke the function to do the work and exit.
748	#############################################################
749	if [[ -n $Nflag ]]
750	then
751	specifyAdminNetwork "$Narg"
752	cleanupAndExit $?
753	fi
754
755
756	########################################################
757	# If the request is to change the cluster name,
758	# invoke the mmsetrcmd script.
759	########################################################
760	if [[ -n $Cflag ]]
761	then
762	$mmsetrcmd "$Cflag" "$Carg"
763	cleanupAndExit $?
764	fi
765
766
767	#################################################################
768	# Set up trap exception handling and call the gpfsInit function.
769	# It will attempt to ensure that the local copy of the mmsdrfs
770	# and the rest of the GPFS system files are up-to-date.
771	# Try to get the lock but do not fail if this is not possible.
772	#################################################################
773	trap pretrap HUP INT QUIT KILL
774
775	if [[ $parg = LATEST ]]
776	then
777	# The LATEST keyword was specified. Try to obtain the
778	# most recent mmsdrfs file (i.e., the mmsdrfs file with the
779	# highest gen number) among all the nodes in the cluster.
780	# To do that, use the local mmsdrfs file as a starting point.
781	getNodeList $REL_HOSTNAME_Field $HOME_CLUSTER $mmsdrfsFile > $allNodes
782	gpfsInitOutput=$(gpfsInitFromNonServer $allNodes $mmsdrfsFile)
783	rc=$?
784
785	else
786	# The LATEST keyword was not specified. Try to obtain
787	# the mmsdrfs file from one of the servers with locking.
788	gpfsInitOutput=$(gpfsInit $lockId 2> $initErrors)
789	rc=$?
790	LOCAL_FILES="$LOCAL_FILES $initErrors "
791	if [[ $rc -ne 0 ]]
792	then
793	# We failed to get the sdrfs file with a lock. Check whether
794	# some other mm command currently holds the lock. If yes, give up.
795	$grep -e "Timed out waiting for lock: Try again later." \
796	-e "6027-1229" $initErrors > /dev/null 2>&1
797	ec=$?
798	if [[ $ec -eq 0 ]]
799	then
800	# Display the messages from gpfsInit.
801	$cat $initErrors \| \
802	$grep -v -e "6027-1227" -e "file is locked. Retrying..." 1>&2
803	cleanupAndExit
804	fi
805
806	# We failed to get the sdrfs file with a lock. Display any messages.
807	$cat $initErrors 1>&2
808	# Processing continues.
809	printErrorMsg 437 $mmcmd
810
811	# Now try the gpfsInit again, but this time do not ask for a lock.
812	# If there is a backup server, and if it is available,
813	# we should be able to get the latest GPFS system files from there.
814	gpfsInitOutput=$(gpfsInit nolock 2>/dev/null)
815	rc=$?
816	if [[ $rc -ne 0 ]]
817	then
818	# We also failed to get the sdrfs file without locking. Now try
819	# to obtain the most recent mmsdrfs file (i.e., the mmsdrfs file
820	# with the highest gen number) among all the nodes in the cluster.
821	# To do that, use the local mmsdrfs file as a starting point.
822	getNodeList $REL_HOSTNAME_Field $HOME_CLUSTER $mmsdrfsFile > $allNodes
823	gpfsInitOutput=$(gpfsInitFromNonServer $allNodes $mmsdrfsFile)
824	rc=$?
825	fi
826	fi
827	fi # end of if [[ $parg = LATEST ]]
828
829	# Check whether we succeeded in obtaining the desired mmsdrfs file.
830	if [[ $rc -ne 0 ]]
831	then
832	# Not enough nodes are available.
833	printErrorMsg 378 $mmcmd
834	cleanupAndExit
835	fi
836
837	# Parse the output from the init function.
838	setGlobalVar $rc $gpfsInitOutput
839
840	if [[ $MMMODE = single ]]
841	then
842	# Command currently not valid for cluster type single.
843	printErrorMsg 376 $mmcmd single
844	cleanupAndExit
845	fi
846
847	if [[ $MMMODE != lc ]]
848	then
849	# Unknown GPFS nodeset type
850	printErrorMsg 338 $mmcmd $MMMODE
851	cleanupAndExit
852	fi
853
854
855	#######################################################
856	# Determine the reliable hostnames of the new servers.
857	#######################################################
858	if [[ -n $pflag && $parg != LATEST ]]
859	then
860	# Find the name of the primary server.
861	newPrimaryServer=$(checkAndConvertNodeValue $parg $REL_HOSTNAME_Field)
862	if [[ $? -ne 0 ]]
863	then
864	printErrorMsg 352 $mmcmd $parg
865	cleanupAndExit
866	fi
867	else
868	# If -p not specified, the primary server remains the same.
869	newPrimaryServer=$primaryServer
870	fi # end of if [[ -n $parg && $parg != LATEST ]]
871
872	if [[ -n $sflag ]]
873	then
874	if [[ -n $sarg ]]
875	then
876	# Find the name of the secondary server.
877	newBackupServer=$(checkAndConvertNodeValue $sarg $REL_HOSTNAME_Field)
878	if [[ $? -ne 0 ]]
879	then
880	printErrorMsg 352 $mmcmd $sarg
881	cleanupAndExit
882	fi
883	else
884	# We are deleting the backup server (-s "" was specified).
885	newBackupServer=""
886	fi
887	else
888	# If -s not specified, the backup server remains the same.
889	newBackupServer=$backupServer
890	fi # end of if [[ -n $sarg ]]
891
892	# Cross check the two server names.
893	if [[ $newBackupServer = $newPrimaryServer ]]
894	then
895	# The same node was specified as primary and backup server.
896	printErrorMsg 346 $mmcmd
897	cleanupAndExit
898	fi
899
900	# Check whether anything needs to be done at all.
901	[[ $newPrimaryServer = $primaryServer && \
902	$newBackupServer = $backupServer && \
903	$parg != LATEST ]] && \
904	cleanupAndExit 0 # Servers are already as desired.
905
906
907	#################################################################
908	# Go through the current mmsdrfs file. Increment the generation
909	# number and change the server names. Create a file with the
910	# reliable hostnames of all nodes in the cluster.
911	#################################################################
912	$rm -f $newsdrfs $allNodes $clientNodes
913	IFS=":" # Change the field separator to ':'.
914	exec 3<&-
915	exec 3< $mmsdrfsFile
916	while read -u3 sdrfsLine
917	do
918	# Parse the line.
919	set -f ; set -A v -- - $sdrfsLine ; set +f
920	IFS="$IFS_sv" # Restore the default IFS settings.
921
922	# Change some of the fields depending on the type of line.
923	case ${v[$LINE_TYPE_Field]} in
924
925	$VERSION_LINE )
926	# Increment the generation number.
927	newGenNumber=${v[$SDRFS_GENNUM_Field]}+1
928	v[$SDRFS_GENNUM_Field]=$newGenNumber
929	v[$PRIMARY_SERVER_Field]=$newPrimaryServer
930	v[$BACKUP_SERVER_Field]=$newBackupServer
931	;;
932
933	$NODESET_HDR )
934	# If the daemon and the mmsdrserv tcp ports are shared,
935	# it will be necessary to ensure that the daemon is down
936	# on the old and new config server nodes.
937	if [[ -z ${v[$GETOBJECT_PORT_Field]} \|\|
938	${v[$TCP_PORT_Field]} = ${v[$GETOBJECT_PORT_Field]} ]]
939	then
940	daemonMustBeDown=yes
941	fi
942	;;
943
944	$MEMBER_NODE )
945	# If this is our node, save the reliable name.
946	[[ ${v[$NODE_NUMBER_Field]} = $ourNodeNumber ]] && \
947	ourNodeName=${v[$REL_HOSTNAME_Field]}
948
949	# All nodes will go in the allNodes file.
950	print -- "${v[$REL_HOSTNAME_Field]}" >> $allNodes
951	checkForErrors "writing to file $allNodes" $?
952
953	# The server nodes and the local node will
954	# not go in the clientNodes file.
955	if [[ ${v[$REL_HOSTNAME_Field]} != $newPrimaryServer &&
956	${v[$REL_HOSTNAME_Field]} != $newBackupServer &&
957	${v[$REL_HOSTNAME_Field]} != $ourNodeName ]]
958	then
959	print -- "${v[$REL_HOSTNAME_Field]}" >> $clientNodes
960	checkForErrors "writing to file $clientNodes" $?
961	fi
962	;;
963
964	* ) # Pass all other lines without change.
965	;;
966
967	esac # end Change some of the fields
968
969	# Build and write the line to the new mmsdrfs file.
970	print_newLine >> $newsdrfs
971	checkForErrors "writing to file $newsdrfs" $?
972
973	IFS=":" # Change the separator back to ":" for the next iteration.
974
975	done # end of while read -u3 sdrfsLine
976
977	IFS="$IFS_sv" # Restore the default IFS settings.
978
979
980	#######################################################################
981	# If the GPFS and mmsdrserv daemons share the same tcp port number,
982	# it is necessary to ensure that the GPFS daemon is down on the old
983	# and new configuration server nodes. Otherwise, the old server nodes
984	# will continue giving (stale) Gpfs object information, while the new
985	# servers will not be able to respond to requests because the GPFS
986	# daemon cannot assume mmsdrserv duties if it is already running.
987	#######################################################################
988	if [[ -n $daemonMustBeDown && $parg != LATEST ]]
989	then
990	# Put the old and new server names in a file.
991	print -- "$primaryServer\n$backupServer\n" \
992	"$newPrimaryServer\n$newBackupServer" > $tmpNodes
993	checkForErrors "writing to file $tmpNodes" $?
994
995	# Eliminate duplicate names.
996	$sort -u $tmpNodes -o $tmpNodes
997	checkForErrors "sort $tmpNodes" $?
998
999	# Verify the daemon is down; do not lock the Gpfs object.
1000	printInfoMsg 453
1001	verifyDaemonInactive $tmpNodes
1002	[[ $? -ne 0 ]] && cleanupAndExit
1003	fi # end of if [[ -n $daemonMustBeDown ]]
1004
1005
1006	######################################################
1007	# First, put the new mmsdrfs file on the two servers.
1008	# This must succeed no matter what.
1009	######################################################
1010	trap "" HUP INT QUIT KILL
1011	gpfsObjectInfo=$(commitChanges \
1012	$nsId $nsId $gpfsObjectInfo $newGenNumber $newsdrfs \
1013	$newPrimaryServer FORCE $newBackupServer)
1014	rc=$?
1015	if [[ $rc -ne 0 ]]
1016	then
1017	# Cannot replace file in the sdr.
1018	printErrorMsg 381 $mmcmd
1019
1020	# The mmchcluster failed - get out.
1021	# Tell the guy which nodes must be up and which command to run.
1022	printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer"
1023	printErrorMsg 344 $mmcmd "mmchcluster"
1024	cleanupAndExit
1025	fi
1026
1027	# Restore interrupts.
1028	trap localPosttrap HUP INT QUIT KILL
1029
1030
1031	#################################################
1032	# Propagate the changes to the non-server nodes.
1033	#################################################
1034	if [[ $ourNodeName != $newPrimaryServer &&
1035	$ourNodeName != $newBackupServer ]]
1036	then
1037	$cp $newsdrfs $mmsdrfsFile
1038	checkForErrors "writing to file $mmsdrfsFile" $?
1039	fi
1040
1041	if [[ -s $clientNodes ]]
1042	then
1043	# Calculate the checksum of the new mmsdrfs file.
1044	sumOutput=$($sum $newsdrfs)
1045	checkForErrors "sum $newsdrfs" $?
1046	set -f ; set -- $sumOutput ; set +f
1047	newSum=$1
1048
1049	#esjxx See if this can be replaced with pushSdr
1050	# Tell all client nodes to copy the file from us.
1051	$mmcommon onall $clientNodes $unreachedNodes copyRemoteFile \
1052	$ourNodeName $mmsdrfsFile $mmsdrfsFile $newSum > $tmpfile 2>&1
1053	rc=$?
1054
1055	# Make a list of the nodes that were successfully updated. For each
1056	# such node there will be a line in tmpfile that looks like this:
1057	# nodename: copyRemoteFile:0
1058	updatedNodes=$($awk -F: ' { \
1059	if (($2 ~ "copyRemoteFile") && ($3 == "0")) { \
1060	{ print $1 } \
1061	} \
1062	} ' $tmpfile)
1063	checkForErrors awk $?
1064
1065	# Determine the nodes that did not get the new data.
1066	exec 3<&-
1067	exec 3< $clientNodes
1068	while read -u3 nodeName
1069	do
1070	for goodNode in $updatedNodes
1071	do
1072	[[ $nodeName = $goodNode ]] && \
1073	break
1074	done
1075
1076	[[ $nodeName != $goodNode ]] && \
1077	failedNodes="${failedNodes}\n\t${nodeName}"
1078	done
1079
1080	# If any nodes failed, put out as much information as possible.
1081	if [[ -n $failedNodes ]]
1082	then
1083	# Collect error messages, if any, in file tmpfile2.
1084	$grep -v "copyRemoteFile:" $tmpfile > $tmpfile2
1085	[[ -s $tmpfile2 ]] && \
1086	$cat $tmpfile2 1>&2
1087
1088	# Tell the user which nodes failed.
1089	printErrorMsg 377 $mmcmd "$failedNodes"
1090	# Tell the guy which nodes must be up and which command to run.
1091	printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer"
1092	printErrorMsg 344 $mmcmd "mmchcluster -p LATEST"
1093	cleanupAndExit
1094	fi # end if [[ -n $failedNodes ]]
1095
1096	fi # end if [[ ! -s $clientNodes ]]
1097
1098
1099	##############################
1100	# Unlock the sdr.
1101	##############################
1102	[[ $sdrLocked = yes ]] && \
1103	freeLockOnServer $primaryServer $ourNodeNumber > /dev/null
1104	sdrLocked=no
1105	trap posttrap HUP INT QUIT KILL
1106
1107	# Issue "command was successful" message.
1108	printErrorMsg 272 $mmcmd
1109	cleanupAndExit 0
1110

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: gpfs_3.1_ker2.6.20/lpp/mmfs/bin/mmchcluster @ 214

Download in other formats: