Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

mmchcluster @ 195

Last change on this file since 195 was 16, checked in by rock, 17 years ago

Property svn:executable set to ``*
File size: 36.9 KB

Rev	Line
[16]	1	#!/bin/ksh
	2	# IBM_PROLOG_BEGIN_TAG
	3	# This is an automatically generated prolog.
	4	#
	5	#
	6	#
	7	# Licensed Materials - Property of IBM
	8	#
	9	# (C) COPYRIGHT International Business Machines Corp. 2000,2007
	10	# All Rights Reserved
	11	#
	12	# US Government Users Restricted Rights - Use, duplication or
	13	# disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
	14	#
	15	# IBM_PROLOG_END_TAG
	16	# @(#)11 1.59.1.3 src/avs/fs/mmfs/ts/admin/mmchcluster.sh, mmfs, avs_rgpfs24, rgpfs24s009a 12/19/06 13:10:44
	17	###############################################################################
	18	#
	19	# Usage:
	20	# mmchcluster {[-p PrimaryServer] [-s SecondaryServer]}
	21	# or
	22	# mmchcluster -p LATEST
	23	# or
	24	# mmchcluster {[-r RemoteShellCommand] [-R RemoteFileCopyCommand]}
	25	# or
	26	# mmchcluster -C ClusterName
	27	# or
	28	# mmchcluster -N {NodeDesc[,NodeDesc...] \| NodeFile}
	29	#
	30	# where:
	31	#
	32	# -p PrimaryServer specifies the node to be used as the primary server
	33	# of the GPFS sdrfs data for this cluster.
	34	#
	35	# LATEST requests a check to be made that all currently
	36	# available nodes point to the correct primary and
	37	# backup server.
	38	#
	39	# -s SecondaryServer specifies the node to be used as the backup server
	40	# of the GPFS sdrfs data for this cluster (optional).
	41	# To remove a backup server, specify -s "".
	42	#
	43	# -r RemoteShellCommand specifies the fully qualified pathname for
	44	# the remote shell program to be used by GPFS.
	45	# The default is /usr/bin/rsh.
	46	#
	47	# -R RemoteFileCopyCommand specifies the fully qualified pathname for
	48	# the remote file copy program to be used by GPFS.
	49	# The default is /usr/bin/rcp.
	50	#
	51	# -C ClusterName specifies a new name for the cluster. If the name
	52	# contains dots it is assumed to be a fully qualified
	53	# domain name. Otherwise, the domain will default
	54	# to the domain of the primary configuration server.
	55	#
	56	# -N NodeDesc,NodeDesc,... specifies a comma-separated list of node
	57	# descriptors that specify the admin node
	58	# interfaces to be used in the cluster.
	59	# The node descriptors have the format:
	60	# daemonNodeName:nodeRoles:adminNodeName:
	61	# The nodeRoles field is currently just a place-holder
	62	# and is ignored.
	63	#
	64	# -N NodeFile specifies a file of node descriptors that specify
	65	# the admin node interfaces to be used in the cluster.
	66	# The lines in the input file have the format:
	67	# daemonNodeName:nodeRoles:adminNodeName:
	68	# The nodeRoles field is currently just a place-holder
	69	# and is ignored.
	70	#
	71	# Note: When used with the -p or -s options, this command will most
	72	# likely be needed when the current primary server is not available
	73	# and it will be impossible to obtain the sdr lock and protect
	74	# against concurrent execution of some other mm command.
	75	# Under such conditions, the user must assure that no other mm
	76	# command is run until the completion of the mmchcluster command
	77	# and that as many of the remaining nodes as possible are available.
	78	#
	79	###############################################################################
	80
	81	# Include global declarations and service routines.
	82	. /usr/lpp/mmfs/bin/mmglobfuncs
	83	. /usr/lpp/mmfs/bin/mmsdrfsdef
	84
	85	sourceFile="mmchcluster.sh"
	86	[[ -n $DEBUG \|\| -n $DEBUGmmchcluster ]] && set -x
	87	$mmTRACE_ENTER "$*"
	88
	89
	90	# Local work files. Names should be of the form:
	91	# fn=${tmpDir}fn.${mmcmd}.$$
	92	allNodes=${tmpDir}allNodes.${mmcmd}.$$
	93	clientNodes=${tmpDir}clientNodes.${mmcmd}.$$
	94	inputNodes=${tmpDir}inputNodes.${mmcmd}.$$
	95	processedNodes=${tmpDir}processedNodes.${mmcmd}.$$
	96	initErrors=${tmpDir}initErrors.${mmcmd}.$$
	97	# Note: Do not include initErrors in LOCAL_FILES yet; we'll do it later.
	98
	99	LOCAL_FILES=" $allNodes $clientNodes $inputNodes $processedNodes "
	100
	101
	102	# Local declarations
	103
	104	usageMsg=359
	105	newNodeNumbers=""
	106	backupServer=""
	107	rshPath=""
	108	rcpPath=""
	109	integer nodeCount
	110	integer n
	111	rc=0
	112
	113	Cflag=""
	114	Nflag=""
	115	pflag=""
	116	rflag=""
	117	Rflag=""
	118	sflag=""
	119	Carg=""
	120	parg=""
	121	rarg=""
	122	Rarg=""
	123	sarg=""
	124	otherOpt=""
	125
	126
	127	# Local functions
	128
	129
	130	##########################################################################
	131	#
	132	# Function: Specify the admin network for the GPFS cluster.
	133	#
	134	# Input: $1 - file or list of node descriptors containing the
	135	# adapter information as follows:
	136	# daemonNodeName:nodeRoles:adminNodeName:
	137	#
	138	# Returns: 0 - no errors encountered
	139	# non-zero - unexpected error
	140	#
	141	##########################################################################
	142	function specifyAdminNetwork # <networkInfo>
	143	{
	144	typeset sourceFile="mmchcluster.sh"
	145	[[ -n $DEBUG \|\| -n $DEBUGspecifyAdminNetwork ]] && set -x
	146	$mmTRACE_ENTER "$*"
	147	typeset networkInfo="$1"
	148
	149	typeset failedNodes sdrfsLine mmcommonOutput
	150	typeset nodeLine nodeName nodeName2 nodeStatus
	151	# typeset nodeRoles
	152	typeset hostResult nodeNumber adminNodeName adminIpa
	153	typeset nodeError newPrimaryName newBackupName commitOptions
	154
	155	typeset rc=0
	156	typeset changeMade=""
	157	typeset fatalError=""
	158	typeset sharedSdrservPort=""
	159
	160	# The input parameter may be either a list or a file. Which is it?
	161	if [[ -f $networkInfo ]]
	162	then
	163	# It is a file; verify its existence and create our own copy.
	164	checkUserFile $networkInfo $inputNodes
	165	[[ $? -ne 0 ]] && cleanupAndExit
	166	else
	167	# It is not a file, so it must be a list.
	168	# Convert the input node list into a file.
	169	$rm -f $inputNodes
	170	IFS=','
	171	for nodeDesc in $networkInfo
	172	do
	173	print -- "$nodeDesc" >> $inputNodes
	174	checkForErrors "writing to $inputNodes" $?
	175	done
	176	IFS="$IFS_sv" # Restore the default IFS setting.
	177	fi
	178
	179	# Check the input data for correctness.
	180	# We check all the records rather than stop on the first error.
	181	$rm -f $processedNodes
	182	$touch $processedNodes # Ensure the tmp file exists even if empty.
	183	IFS=":" # Change the field separator to ':'.
	184	exec 3<&-
	185	exec 3< $inputNodes
	186	while read -u3 nodeLine
	187	do
	188	# Parse the line.
	189	set -f ; set -- $nodeLine ; set +f
	190	nodeName=$1
	191	# nodeRoles=$2
	192	nodeName2=$3
	193	IFS="$IFS_sv" # Restore the default IFS setting.
	194
	195	# Make sure neither node name is specified more than once.
	196	$grep -qw $nodeName $processedNodes > /dev/null 2>&1
	197	if [[ $? -eq 0 ]]
	198	then
	199	# The node name is specified twice.
	200	printErrorMsg 347 $mmcmd $nodeName
	201	fatalError=yes
	202	fi
	203
	204	# Check the admin node name if it was specified.
	205	if [[ -n $nodeName2 && $nodeName2 != $nodeName ]]
	206	then
	207	$grep -qw $nodeName2 $processedNodes > /dev/null 2>&1
	208	if [[ $? -eq 0 ]]
	209	then
	210	# The node is specified twice.
	211	printErrorMsg 347 $mmcmd $nodeName2
	212	fatalError=yes
	213	fi
	214	fi # end of if [[ -n $nodeName2 && $nodeName2 != $nodeName ]]
	215
	216	# Add the node names to the list of processed nodes.
	217	print -- "${nodeName}:${nodeName2}" >> $processedNodes
	218	checkForErrors "Writing to file $processedNodes" $?
	219
	220	IFS=":" # Change the separator back to ":" for the next iteration.
	221
	222	done # end of while read -u3 nodeLine
	223
	224	IFS="$IFS_sv" # Restore the default IFS settings.
	225
	226	# Return to the caller if we encountered an error.
	227	[[ -n $fatalError ]] && return 1
	228
	229	# Ensure that the local copy of the mmsdrfs is up-to-date.
	230	# Set up trap exception handling and obtain the lock.
	231	trap pretrap HUP INT QUIT KILL
	232	gpfsInitOutput=$(gpfsInit $lockId)
	233	setGlobalVar $? $gpfsInitOutput
	234
	235	# Stop here if the admin network support has not been activated yet.
	236	if [[ $sdrfsFormatLevel -eq 0 ]]
	237	then
	238	print -u2 "$mmcmd: The separate administration network support has not been enabled yet."
	239	print -u2 " Run \"mmchconfig release=LATEST\" to activate the new function."
	240	cleanupAndExit
	241	fi
	242
	243	# Determine the lookup order for resolving host names.
	244	[[ $osName != AIX ]] && resolveOrder=$(setHostResolveOrder)
	245
	246	# Go through the current mmsdrfs file. Increment the generation
	247	# number and build the node name list that will be needed later.
	248	# Remove all admin network related information.
	249	$rm -f $newsdrfs $nodefile
	250	newPrimaryName=""
	251	newBackupName=""
	252	IFS=":" # Change the field separator to ':'.
	253	exec 3<&-
	254	exec 3< $mmsdrfsFile
	255	while read -u3 sdrfsLine
	256	do
	257	# Parse the line.
	258	set -f ; set -A v -- - $sdrfsLine ; set +f
	259
	260	IFS="$IFS_sv" # Restore the default IFS settings.
	261	printLine=true # Assume the line will be printed.
	262
	263	case ${v[$LINE_TYPE_Field]} in
	264
	265	$VERSION_LINE ) # This is the global header line.
	266	# Save the version line for updating later.
	267	versionLine=$(print_newLine)
	268	printLine=false
	269	;;
	270
	271	$NODESET_HDR )
	272	# If the daemon and the mmsdrserv tcp ports are shared,
	273	# it will be necessary to ensure that the daemon is down
	274	# on the config server nodes if there names will change.
	275	if [[ -z ${v[$GETOBJECT_PORT_Field]} \|\|
	276	${v[$TCP_PORT_Field]} = ${v[$GETOBJECT_PORT_Field]} ]]
	277	then
	278	sharedSdrservPort=yes
	279	fi
	280	;;
	281
	282	$MEMBER_NODE ) # This line describes a node.
	283	# Add the reliable node name to nodefile.
	284	print -- "${v[$REL_HOSTNAME_Field]}" >> $nodefile
	285	checkForErrors "writing to file $nodefile" $?
	286
	287	# Reset the node error flag.
	288	nodeError=""
	289
	290	# Obtain the data for this node from the node file.
	291	nodeLine=$($awk -F: ' \
	292	$1 == "'${v[$DAEMON_NODENAME_Field]}'" \|\| \
	293	$1 == "'${v[$REL_HOSTNAME_Field]}'" \|\| \
	294	$1 == "'${v[$NODE_NAME_Field]}'" \|\| \
	295	$1 == "'${v[$ADMIN_SHORTNAME_Field]}'" \|\| \
	296	$1 == "'${v[$NODE_NUMBER_Field]}'" \|\| \
	297	$1 == "'${v[$IPA_Field]}'" { \
	298	{ print $0 } \
	299	{ exit } \
	300	} \
	301	' $inputNodes)
	302
	303	if [[ -n $nodeLine ]]
	304	then
	305	# We found data for this node. Parse the input.
	306	IFS=":" # Change the field separator to ':'.
	307	set -f ; set -- $nodeLine ; set +f
	308	nodeName=$1
	309	nodeName2=$3
	310	IFS="$IFS_sv" # Restore the default IFS setting.
	311
	312	# Determine the daemon node name.
	313	if [[ -n ${v[$DAEMON_NODENAME_Field]} ]]
	314	then
	315	daemonNodeName=${v[$DAEMON_NODENAME_Field]}
	316	else
	317	daemonNodeName=${v[$REL_HOSTNAME_Field]}
	318	fi
	319
	320	# Did the user reset or specify the admin node name?
	321	if [[ -z $nodeName2 ]]
	322	then
	323	# The admin node name was null, indicating "reset";
	324	# set the admin node name to the daemon node name value.
	325	adminNodeName=$daemonNodeName
	326	adminShortName=${v[$NODE_NAME_Field]}
	327
	328	else
	329	# The admin node name was not null, indicating "specify";
	330	# Determine the IP address for the specified admin node name.
	331	hostResult=$($host $nodeName2)
	332	set -f ; set -- $hostResult ; set +f
	333	adminNodeName=$1
	334	adminShortName=${1%% \|.} # Exclude everything after the first dot.
	335	adminIpa=${3%%,*}
	336
	337	# Check that the admin node name has a valid IP address.
	338	if [[ -z $adminIpa ]]
	339	then
	340	# An invalid node name was specified.
	341	printErrorMsg 54 $mmcmd $nodeName2
	342	fatalError=yes
	343	break
	344	fi
	345
	346	# Invoke the checkAdapter function to ensure that
	347	# the specified adapter interface exists on the node.
	348	mmcommonOutput=$($mmcommon on1 ${v[$REL_HOSTNAME_Field]} \
	349	checkAdapter $adminIpa 2> $errMsg)
	350	rc=$?
	351	set -f ; set -- $mmcommonOutput ; set +f
	352	nodeStatus=$1
	353	if [[ $rc != 0 \|\| $nodeStatus != success ]]
	354	then
	355	# The checkAdapter call failed.
	356	# We will not define a new admin node name for this node
	357	# but we will continue to process the remaining nodes.
	358	# Tell the world what went wrong with this node.
	359	if [[ $nodeStatus = ipa_alias ]]
	360	then
	361	# IP address aliasing is not supported.
	362	printErrorMsg 476 $mmcmd $nodeName2
	363	elif [[ $nodeStatus = ipa_missing ]]
	364	then
	365	# The admin IP address is not known on the node.
	366	printErrorMsg 154 $mmcmd $nodeName2 ${v[$REL_HOSTNAME_Field]}
	367	elif [[ $rc = $MM_HostDown \|\| $rc = $MM_ConnectTimeout ]]
	368	then
	369	# The node cannot be reached.
	370	printErrorMsg 340 $mmcmd ${v[$REL_HOSTNAME_Field]}
	371	else
	372	# Unexpected error. Display all possible error messages.
	373	[[ -s $errMsg ]] && $cat $errMsg 1>&2
	374	[[ $rc -eq 0 ]] && rc=1
	375	checkForErrors "checkAdapter ${v[$REL_HOSTNAME_Field]}" $rc
	376	fi
	377
	378	# Append the node name to the list of failed nodes and
	379	# set a flag to indicate the node name did not check out.
	380	failedNodes="${failedNodes}\n\t${nodeName}"
	381	nodeError=yes
	382
	383	fi # end of if [[ $rc != 0 \|\| $nodeStatus != success ]]
	384
	385	fi # end of if [[ -z $nodeName2 ]]
	386
	387	# Update the member line if there was no error.
	388	if [[ -z $nodeError ]]
	389	then
	390	# Remember the new primary or backup server name for updating
	391	# the version line later if this is one of those servers.
	392	[[ ${v[$REL_HOSTNAME_Field]} = $primaryServer ]] && \
	393	newPrimaryName=$adminNodeName
	394	[[ ${v[$REL_HOSTNAME_Field]} = $backupServer ]] && \
	395	newBackupName=$adminNodeName
	396
	397	# Things checked out ok. Set the node name fields.
	398	v[$DAEMON_NODENAME_Field]=$daemonNodeName
	399	v[$REL_HOSTNAME_Field]=$adminNodeName
	400	v[$ADMIN_SHORTNAME_Field]=$adminShortName
	401	changeMade=yes
	402	fi
	403
	404	$rm -f $errMsg
	405
	406	fi # end of if [[ -n $nodeLine ]]
	407	;;
	408
	409	* ) # We are not interested in any other lines.
	410	;;
	411
	412	esac # end of case ${v[$LINE_TYPE_Field]} in
	413
	414	# Unless suppressed, write the line to the new mmsdrfs file.
	415	if [[ $printLine = true ]]
	416	then
	417	print_newLine >> $newsdrfs
	418	checkForErrors "writing to file $newsdrfs" $?
	419	fi
	420
	421	IFS=":" # Change the separator back to ":" for the next iteration.
	422
	423	done # end of while read -u3
	424
	425	IFS="$IFS_sv" # Restore the default IFS settings.
	426
	427	# Go through the mmsdrfs file to update the NSD servers admin node names.
	428	$rm -f $tmpsdrfs
	429	IFS=":"
	430	exec 3<&-
	431	exec 3< $newsdrfs
	432	while read -u3 sdrfsLine
	433	do
	434	# Parse the line.
	435	set -f ; set -A v -- - $sdrfsLine ; set +f
	436	IFS="$IFS_sv"
	437
	438	# Change some of the fields depending on the type of line.
	439	case ${v[$LINE_TYPE_Field]} in
	440
	441	$SG_DISKS ) # This is the line for some disk.
	442
	443	# If this disk is an NSD with a valid PVID value,
	444	# make sure the daemon nsd server names are recorded.
	445	if [[ ${v[$DISK_TYPE_Field]} = nsd && -n ${v[$PVID_Field]} ]]
	446	then
	447	# If a server node was specified, check that it is valid and
	448	# convert it to get the potentially new admin adapter name.
	449	# We determine whether a server was specified by checking for an
	450	# admin nsd server name, but we do not use that name for finding
	451	# the node information, since the old admin node name may
	452	# no longer exist as a result of the update we just did.
	453	# We use the daemon node name to find the node instead,
	454	# since mmchcluster -N does not change daemon node names.
	455	if [[ -n ${v[$NSD_PRIMARY_NODE_Field]} ]]
	456	then
	457	# If no daemon node name has yet been recorded for the
	458	# primary NSD server, determine and store it now.
	459	server=${v[$DAEMON_NSD_PRIMARY_Field]}
	460	if [[ -z $server ]]
	461	then
	462	server=$(checkAndConvertNodeValue \
	463	${v[$NSD_PRIMARY_NODE_Field]} $DAEMON_NODENAME_Field)
	464	checkForErrors "checkAndConvertNodeValue" $?
	465	v[$DAEMON_NSD_PRIMARY_Field]=$server
	466	fi
	467	# Use the primary server's daemon node name to obtain
	468	# the primary server's admin node name.
	469	v[$NSD_PRIMARY_NODE_Field]=$(checkAndConvertNodeValue \
	470	$server $REL_HOSTNAME_Field $newsdrfs)
	471	checkForErrors "checkAndConvertNodeValue $server" $?
	472	fi
	473	if [[ -n ${v[$NSD_BACKUP_NODE_Field]} ]]
	474	then
	475	# If no daemon node name has yet been recorded for the
	476	# backup NSD server, determine and store it now.
	477	backup=${v[$DAEMON_NSD_BACKUP_Field]}
	478	if [[ -z $backup ]]
	479	then
	480	backup=$(checkAndConvertNodeValue \
	481	${v[$NSD_BACKUP_NODE_Field]} $DAEMON_NODENAME_Field)
	482	checkForErrors "checkAndConvertNodeValue" $?
	483	v[$DAEMON_NSD_BACKUP_Field]=$backup
	484	fi
	485	# Use the backup server's daemon node name to obtain
	486	# the backup server's admin node name.
	487	v[$NSD_BACKUP_NODE_Field]=$(checkAndConvertNodeValue \
	488	$backup $REL_HOSTNAME_Field $newsdrfs)
	489	checkForErrors "checkAndConvertNodeValue $backup" $?
	490	fi
	491	fi # end of if (v[$DISK_TYPE_Field] == "nsd" && -n v[$PVID_Field])
	492	;;
	493
	494	* ) # We are not interested in any other lines.
	495	;;
	496
	497	esac # end Change some of the fields
	498
	499	# Build and write the line to the temp version of the mmsdrfs file.
	500	print_newLine >> $tmpsdrfs
	501	checkForErrors "writing to file $tmpsdrfs" $?
	502
	503	IFS=":" # Change the separator back to ":" for the next iteration.
	504
	505	done # end while read -u3 sdrfsLine
	506
	507	IFS="$IFS_sv" # Restore the default IFS settings.
	508
	509	# If a fatal error occurred, or if no changes were made,
	510	# release the lock, report any failed nodes, and return.
	511	if [[ -n $fatalError \|\| -z $changeMade ]]
	512	then
	513	freeLockOnServer $primaryServer $ourNodeNumber >/dev/null
	514	if [[ -n $failedNodes ]]
	515	then
	516	# Administrative node names were not defined for nodes ...
	517	printErrorMsg 174 $mmcmd $failedNodes
	518	fi
	519	if [[ -n $fatalError ]]
	520	then
	521	printErrorMsg 389 $mmcmd # The command failed.
	522	else
	523	printErrorMsg 387 $mmcmd $mmcmd # Command quitting due to no valid nodes.
	524	fi
	525	return 1
	526	fi
	527
	528	# Create the updated version line and add it to the new sdrfs file.
	529	# The generation number is incremented and the server names may change.
	530	IFS=":" # Change the field separator to ':'.
	531	set -f ; set -A v -- - $versionLine ; set +f
	532	IFS="$IFS_sv" # Restore the default IFS setting.
	533	newGenNumber=${v[$SDRFS_GENNUM_Field]}+1
	534	v[$SDRFS_GENNUM_Field]=$newGenNumber
	535	[[ -n $newPrimaryName ]] && v[$PRIMARY_SERVER_Field]=$newPrimaryName
	536	[[ -n $newBackupName ]] && v[$BACKUP_SERVER_Field]=$newBackupName
	537	print_newLine >> $tmpsdrfs
	538	checkForErrors "writing to file $tmpsdrfs" $?
	539
	540	# If the GPFS and mmsdrserv daemons share the same tcp port number,
	541	# and the names of the primary or backup configuration servers are
	542	# changing, it is necessary to ensure that the GPFS daemon is down
	543	# on the server nodes and the mmsdrserv daemon is restarted.
	544	# Otherwise, the server nodes will continue giving (stale) Gpfs object
	545	# or return ESDR_NOT_SERVER errors.
	546	if [[ -n $sharedSdrservPort && ( -n $newPrimaryName \|\| -n $newBackupName ) ]]
	547	then
	548	# Get the names of the config servers.
	549	print -- "${v[$PRIMARY_SERVER_Field]}\n${v[$BACKUP_SERVER_Field]}" > $tmpNodes
	550	checkForErrors "writing to file $tmpNodes" $?
	551
	552	# Verify the daemon is down; do not lock the Gpfs object.
	553	printInfoMsg 453
	554	verifyDaemonInactive $tmpNodes
	555	[[ $? -ne 0 ]] && return 1
	556
	557	commitOptions="initLocalNodeData,KILLSDRSERV"
	558	else
	559	commitOptions="initLocalNodeData"
	560	fi # end of if [[ -n $sharedSdrservPort ]]
	561
	562	# Make sure the new sdrfs file is properly sorted.
	563	LC_ALL=C $SORT_MMSDRFS $tmpsdrfs -o $newsdrfs
	564
	565	# Put the new mmsdrfs file into the sdr. This will make the newly-added
	566	# admin nodes visible to the rest of the nodes in the cluster.
	567	trap "" HUP INT QUIT KILL
	568	gpfsObjectInfo=$(commitChanges $nsId $nsId \
	569	$gpfsObjectInfo $newGenNumber $newsdrfs $primaryServer $commitOptions)
	570	rc=$?
	571	if [[ $rc -ne 0 ]]
	572	then
	573	# We were unable to replace the file in the sdr.
	574	printErrorMsg 381 $mmcmd
	575	return 1
	576	fi
	577
	578	# Unlock the sdr.
	579	freeLockOnServer $primaryServer $ourNodeNumber >/dev/null
	580	trap posttrap HUP INT QUIT KILL
	581
	582	# Propagate the new mmsdrfs file to all nodes in the cluster.
	583	# This process is asynchronous.
	584	propagateSdrfsFile async $nodefile $newsdrfs $newGenNumber initLocalNodeData
	585
	586	# Report any nodes that did not check successfully.
	587	if [[ -n $failedNodes ]]
	588	then
	589	# Administrative node names were not defined for nodes ...
	590	printErrorMsg 174 $mmcmd $failedNodes
	591	fi
	592
	593	return 0
	594
	595	} #----- end of function specifyAdminNetwork -------------------
	596
	597
	598	###################################################################
	599	# This function is called if there is an interrupt after the new
	600	# mmsdrfs file was committed on the new primary and backup servers
	601	# but before the change was propagated to the rest of the nodes.
	602	###################################################################
	603	function localPosttrap
	604	{
	605	$mmTRACE_ENTER "$*"
	606
	607	# Tell the guy which nodes must be up and which command to run.
	608	printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer"
	609	printErrorMsg 344 $mmcmd "mmchcluster -p LATEST"
	610	cleanupAndExit 2
	611
	612	} #----- end of function localPosttrap ------------------------
	613
	614
	615
	616	######################
	617	# Mainline processing
	618	######################
	619
	620
	621	###################################################
	622	# Process the command arguments.
	623	###################################################
	624	[[ $arg1 = '-?' \|\| $arg1 = '-h' \|\| $arg1 = '--help' \|\| $arg1 = '--' ]] && \
	625	syntaxError "help" $usageMsg
	626
	627	[[ $argc -lt 2 ]] && \
	628	syntaxError "missingArgs" $usageMsg
	629
	630	while getopts :C:N:p:r:R:s: OPT
	631	do
	632	case $OPT in
	633
	634	C) # cluster name
	635	[[ -n $Cflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
	636	Cflag="-$OPT"
	637	Carg=$OPTARG
	638	;;
	639
	640	N) # define/replace secondary network
	641	[[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
	642	Nflag="-$OPT"
	643	Narg=$OPTARG
	644	;;
	645
	646	p) # primary server
	647	[[ -n $pflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
	648	pflag="-$OPT"
	649	parg=$OPTARG
	650	otherOpt="-$OPT"
	651	;;
	652
	653	r) # remote shell command
	654	[[ -n $rflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
	655	rflag="-$OPT"
	656	rarg=$OPTARG
	657	[[ $rarg = ${rarg#/} ]] && \
	658	syntaxError "absolutePath_2" $noUsageMsg "-$OPT" "$rarg"
	659	otherOpt="-$OPT"
	660	;;
	661
	662	R) # remote file copy command
	663	[[ -n $Rflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
	664	Rflag="-$OPT"
	665	Rarg=$OPTARG
	666	[[ $Rarg = ${Rarg#/} ]] && \
	667	syntaxError "absolutePath_2" $noUsageMsg "-$OPT" "$Rarg"
	668	otherOpt="-$OPT"
	669	;;
	670
	671	s) # secondary server
	672	[[ -n $sflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
	673	sflag="-$OPT"
	674	sarg=$OPTARG
	675	otherOpt="-$OPT"
	676	;;
	677
	678	+[CNprRs]) # Invalid option
	679	syntaxError "invalidOption" $usageMsg $OPT
	680	;;
	681
	682	:) # Missing argument
	683	syntaxError "missingValue" $usageMsg $OPTARG
	684	;;
	685
	686	*) # Invalid option
	687	syntaxError "invalidOption" $usageMsg $OPTARG
	688	;;
	689	esac
	690
	691	done
	692
	693	shift OPTIND-1
	694	[[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1
	695
	696	[[ -n $sflag && $parg = LATEST ]] && \
	697	syntaxError "invalidCombination" $usageMsg "-s" "-p LATEST"
	698
	699	[[ -n $rflag && -n $pflag ]] && \
	700	syntaxError "invalidCombination" $usageMsg "-r" "-p"
	701
	702	[[ -n $rflag && -n $sflag ]] && \
	703	syntaxError "invalidCombination" $usageMsg "-r" "-s"
	704
	705	[[ -n $Rflag && -n $pflag ]] && \
	706	syntaxError "invalidCombination" $usageMsg "-R" "-p"
	707
	708	[[ -n $Rflag && -n $sflag ]] && \
	709	syntaxError "invalidCombination" $usageMsg "-R" "-s"
	710
	711	# The primary GPFS cluster configuration server cannot be removed.
	712	[[ -n $pflag && $parg = "" ]] && \
	713	syntaxError "missingValue" $usageMsg "-p"
	714
	715	[[ -n $Nflag && -n $otherOpt ]] && \
	716	syntaxError "invalidCombination" $usageMsg "-N" "$otherOpt"
	717
	718	[[ -n $Cflag && -n $otherOpt ]] && \
	719	syntaxError "invalidCombination" $usageMsg "-C" "$otherOpt"
	720
	721
	722	#############################################################################
	723	# If the request is to change a remote command, invoke the mmsetrcmd script.
	724	# Keep in mind that rarg and Rarg may include options for the respective
	725	# commands and, therefore, must always be quoted.
	726	#############################################################################
	727	if [[ -n $rflag \|\| -n $Rflag ]]
	728	then
	729	if [[ -z $Rflag ]]
	730	then
	731	$mmsetrcmd "$rflag" "$rarg"
	732	rc=$?
	733	elif [[ -z $rflag ]]
	734	then
	735	$mmsetrcmd "$Rflag" "$Rarg"
	736	rc=$?
	737	else
	738	$mmsetrcmd "$rflag" "$rarg" "$Rflag" "$Rarg"
	739	rc=$?
	740	fi
	741	cleanupAndExit $rc
	742	fi
	743
	744
	745	#############################################################
	746	# If the request is to specify changes to the admin network,
	747	# invoke the function to do the work and exit.
	748	#############################################################
	749	if [[ -n $Nflag ]]
	750	then
	751	specifyAdminNetwork "$Narg"
	752	cleanupAndExit $?
	753	fi
	754
	755
	756	########################################################
	757	# If the request is to change the cluster name,
	758	# invoke the mmsetrcmd script.
	759	########################################################
	760	if [[ -n $Cflag ]]
	761	then
	762	$mmsetrcmd "$Cflag" "$Carg"
	763	cleanupAndExit $?
	764	fi
	765
	766
	767	#################################################################
	768	# Set up trap exception handling and call the gpfsInit function.
	769	# It will attempt to ensure that the local copy of the mmsdrfs
	770	# and the rest of the GPFS system files are up-to-date.
	771	# Try to get the lock but do not fail if this is not possible.
	772	#################################################################
	773	trap pretrap HUP INT QUIT KILL
	774
	775	if [[ $parg = LATEST ]]
	776	then
	777	# The LATEST keyword was specified. Try to obtain the
	778	# most recent mmsdrfs file (i.e., the mmsdrfs file with the
	779	# highest gen number) among all the nodes in the cluster.
	780	# To do that, use the local mmsdrfs file as a starting point.
	781	getNodeList $REL_HOSTNAME_Field $HOME_CLUSTER $mmsdrfsFile > $allNodes
	782	gpfsInitOutput=$(gpfsInitFromNonServer $allNodes $mmsdrfsFile)
	783	rc=$?
	784
	785	else
	786	# The LATEST keyword was not specified. Try to obtain
	787	# the mmsdrfs file from one of the servers with locking.
	788	gpfsInitOutput=$(gpfsInit $lockId 2> $initErrors)
	789	rc=$?
	790	LOCAL_FILES="$LOCAL_FILES $initErrors "
	791	if [[ $rc -ne 0 ]]
	792	then
	793	# We failed to get the sdrfs file with a lock. Check whether
	794	# some other mm command currently holds the lock. If yes, give up.
	795	$grep -e "Timed out waiting for lock: Try again later." \
	796	-e "6027-1229" $initErrors > /dev/null 2>&1
	797	ec=$?
	798	if [[ $ec -eq 0 ]]
	799	then
	800	# Display the messages from gpfsInit.
	801	$cat $initErrors \| \
	802	$grep -v -e "6027-1227" -e "file is locked. Retrying..." 1>&2
	803	cleanupAndExit
	804	fi
	805
	806	# We failed to get the sdrfs file with a lock. Display any messages.
	807	$cat $initErrors 1>&2
	808	# Processing continues.
	809	printErrorMsg 437 $mmcmd
	810
	811	# Now try the gpfsInit again, but this time do not ask for a lock.
	812	# If there is a backup server, and if it is available,
	813	# we should be able to get the latest GPFS system files from there.
	814	gpfsInitOutput=$(gpfsInit nolock 2>/dev/null)
	815	rc=$?
	816	if [[ $rc -ne 0 ]]
	817	then
	818	# We also failed to get the sdrfs file without locking. Now try
	819	# to obtain the most recent mmsdrfs file (i.e., the mmsdrfs file
	820	# with the highest gen number) among all the nodes in the cluster.
	821	# To do that, use the local mmsdrfs file as a starting point.
	822	getNodeList $REL_HOSTNAME_Field $HOME_CLUSTER $mmsdrfsFile > $allNodes
	823	gpfsInitOutput=$(gpfsInitFromNonServer $allNodes $mmsdrfsFile)
	824	rc=$?
	825	fi
	826	fi
	827	fi # end of if [[ $parg = LATEST ]]
	828
	829	# Check whether we succeeded in obtaining the desired mmsdrfs file.
	830	if [[ $rc -ne 0 ]]
	831	then
	832	# Not enough nodes are available.
	833	printErrorMsg 378 $mmcmd
	834	cleanupAndExit
	835	fi
	836
	837	# Parse the output from the init function.
	838	setGlobalVar $rc $gpfsInitOutput
	839
	840	if [[ $MMMODE = single ]]
	841	then
	842	# Command currently not valid for cluster type single.
	843	printErrorMsg 376 $mmcmd single
	844	cleanupAndExit
	845	fi
	846
	847	if [[ $MMMODE != lc ]]
	848	then
	849	# Unknown GPFS nodeset type
	850	printErrorMsg 338 $mmcmd $MMMODE
	851	cleanupAndExit
	852	fi
	853
	854
	855	#######################################################
	856	# Determine the reliable hostnames of the new servers.
	857	#######################################################
	858	if [[ -n $pflag && $parg != LATEST ]]
	859	then
	860	# Find the name of the primary server.
	861	newPrimaryServer=$(checkAndConvertNodeValue $parg $REL_HOSTNAME_Field)
	862	if [[ $? -ne 0 ]]
	863	then
	864	printErrorMsg 352 $mmcmd $parg
	865	cleanupAndExit
	866	fi
	867	else
	868	# If -p not specified, the primary server remains the same.
	869	newPrimaryServer=$primaryServer
	870	fi # end of if [[ -n $parg && $parg != LATEST ]]
	871
	872	if [[ -n $sflag ]]
	873	then
	874	if [[ -n $sarg ]]
	875	then
	876	# Find the name of the secondary server.
	877	newBackupServer=$(checkAndConvertNodeValue $sarg $REL_HOSTNAME_Field)
	878	if [[ $? -ne 0 ]]
	879	then
	880	printErrorMsg 352 $mmcmd $sarg
	881	cleanupAndExit
	882	fi
	883	else
	884	# We are deleting the backup server (-s "" was specified).
	885	newBackupServer=""
	886	fi
	887	else
	888	# If -s not specified, the backup server remains the same.
	889	newBackupServer=$backupServer
	890	fi # end of if [[ -n $sarg ]]
	891
	892	# Cross check the two server names.
	893	if [[ $newBackupServer = $newPrimaryServer ]]
	894	then
	895	# The same node was specified as primary and backup server.
	896	printErrorMsg 346 $mmcmd
	897	cleanupAndExit
	898	fi
	899
	900	# Check whether anything needs to be done at all.
	901	[[ $newPrimaryServer = $primaryServer && \
	902	$newBackupServer = $backupServer && \
	903	$parg != LATEST ]] && \
	904	cleanupAndExit 0 # Servers are already as desired.
	905
	906
	907	#################################################################
	908	# Go through the current mmsdrfs file. Increment the generation
	909	# number and change the server names. Create a file with the
	910	# reliable hostnames of all nodes in the cluster.
	911	#################################################################
	912	$rm -f $newsdrfs $allNodes $clientNodes
	913	IFS=":" # Change the field separator to ':'.
	914	exec 3<&-
	915	exec 3< $mmsdrfsFile
	916	while read -u3 sdrfsLine
	917	do
	918	# Parse the line.
	919	set -f ; set -A v -- - $sdrfsLine ; set +f
	920	IFS="$IFS_sv" # Restore the default IFS settings.
	921
	922	# Change some of the fields depending on the type of line.
	923	case ${v[$LINE_TYPE_Field]} in
	924
	925	$VERSION_LINE )
	926	# Increment the generation number.
	927	newGenNumber=${v[$SDRFS_GENNUM_Field]}+1
	928	v[$SDRFS_GENNUM_Field]=$newGenNumber
	929	v[$PRIMARY_SERVER_Field]=$newPrimaryServer
	930	v[$BACKUP_SERVER_Field]=$newBackupServer
	931	;;
	932
	933	$NODESET_HDR )
	934	# If the daemon and the mmsdrserv tcp ports are shared,
	935	# it will be necessary to ensure that the daemon is down
	936	# on the old and new config server nodes.
	937	if [[ -z ${v[$GETOBJECT_PORT_Field]} \|\|
	938	${v[$TCP_PORT_Field]} = ${v[$GETOBJECT_PORT_Field]} ]]
	939	then
	940	daemonMustBeDown=yes
	941	fi
	942	;;
	943
	944	$MEMBER_NODE )
	945	# If this is our node, save the reliable name.
	946	[[ ${v[$NODE_NUMBER_Field]} = $ourNodeNumber ]] && \
	947	ourNodeName=${v[$REL_HOSTNAME_Field]}
	948
	949	# All nodes will go in the allNodes file.
	950	print -- "${v[$REL_HOSTNAME_Field]}" >> $allNodes
	951	checkForErrors "writing to file $allNodes" $?
	952
	953	# The server nodes and the local node will
	954	# not go in the clientNodes file.
	955	if [[ ${v[$REL_HOSTNAME_Field]} != $newPrimaryServer &&
	956	${v[$REL_HOSTNAME_Field]} != $newBackupServer &&
	957	${v[$REL_HOSTNAME_Field]} != $ourNodeName ]]
	958	then
	959	print -- "${v[$REL_HOSTNAME_Field]}" >> $clientNodes
	960	checkForErrors "writing to file $clientNodes" $?
	961	fi
	962	;;
	963
	964	* ) # Pass all other lines without change.
	965	;;
	966
	967	esac # end Change some of the fields
	968
	969	# Build and write the line to the new mmsdrfs file.
	970	print_newLine >> $newsdrfs
	971	checkForErrors "writing to file $newsdrfs" $?
	972
	973	IFS=":" # Change the separator back to ":" for the next iteration.
	974
	975	done # end of while read -u3 sdrfsLine
	976
	977	IFS="$IFS_sv" # Restore the default IFS settings.
	978
	979
	980	#######################################################################
	981	# If the GPFS and mmsdrserv daemons share the same tcp port number,
	982	# it is necessary to ensure that the GPFS daemon is down on the old
	983	# and new configuration server nodes. Otherwise, the old server nodes
	984	# will continue giving (stale) Gpfs object information, while the new
	985	# servers will not be able to respond to requests because the GPFS
	986	# daemon cannot assume mmsdrserv duties if it is already running.
	987	#######################################################################
	988	if [[ -n $daemonMustBeDown && $parg != LATEST ]]
	989	then
	990	# Put the old and new server names in a file.
	991	print -- "$primaryServer\n$backupServer\n" \
	992	"$newPrimaryServer\n$newBackupServer" > $tmpNodes
	993	checkForErrors "writing to file $tmpNodes" $?
	994
	995	# Eliminate duplicate names.
	996	$sort -u $tmpNodes -o $tmpNodes
	997	checkForErrors "sort $tmpNodes" $?
	998
	999	# Verify the daemon is down; do not lock the Gpfs object.
	1000	printInfoMsg 453
	1001	verifyDaemonInactive $tmpNodes
	1002	[[ $? -ne 0 ]] && cleanupAndExit
	1003	fi # end of if [[ -n $daemonMustBeDown ]]
	1004
	1005
	1006	######################################################
	1007	# First, put the new mmsdrfs file on the two servers.
	1008	# This must succeed no matter what.
	1009	######################################################
	1010	trap "" HUP INT QUIT KILL
	1011	gpfsObjectInfo=$(commitChanges \
	1012	$nsId $nsId $gpfsObjectInfo $newGenNumber $newsdrfs \
	1013	$newPrimaryServer FORCE $newBackupServer)
	1014	rc=$?
	1015	if [[ $rc -ne 0 ]]
	1016	then
	1017	# Cannot replace file in the sdr.
	1018	printErrorMsg 381 $mmcmd
	1019
	1020	# The mmchcluster failed - get out.
	1021	# Tell the guy which nodes must be up and which command to run.
	1022	printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer"
	1023	printErrorMsg 344 $mmcmd "mmchcluster"
	1024	cleanupAndExit
	1025	fi
	1026
	1027	# Restore interrupts.
	1028	trap localPosttrap HUP INT QUIT KILL
	1029
	1030
	1031	#################################################
	1032	# Propagate the changes to the non-server nodes.
	1033	#################################################
	1034	if [[ $ourNodeName != $newPrimaryServer &&
	1035	$ourNodeName != $newBackupServer ]]
	1036	then
	1037	$cp $newsdrfs $mmsdrfsFile
	1038	checkForErrors "writing to file $mmsdrfsFile" $?
	1039	fi
	1040
	1041	if [[ -s $clientNodes ]]
	1042	then
	1043	# Calculate the checksum of the new mmsdrfs file.
	1044	sumOutput=$($sum $newsdrfs)
	1045	checkForErrors "sum $newsdrfs" $?
	1046	set -f ; set -- $sumOutput ; set +f
	1047	newSum=$1
	1048
	1049	#esjxx See if this can be replaced with pushSdr
	1050	# Tell all client nodes to copy the file from us.
	1051	$mmcommon onall $clientNodes $unreachedNodes copyRemoteFile \
	1052	$ourNodeName $mmsdrfsFile $mmsdrfsFile $newSum > $tmpfile 2>&1
	1053	rc=$?
	1054
	1055	# Make a list of the nodes that were successfully updated. For each
	1056	# such node there will be a line in tmpfile that looks like this:
	1057	# nodename: copyRemoteFile:0
	1058	updatedNodes=$($awk -F: ' { \
	1059	if (($2 ~ "copyRemoteFile") && ($3 == "0")) { \
	1060	{ print $1 } \
	1061	} \
	1062	} ' $tmpfile)
	1063	checkForErrors awk $?
	1064
	1065	# Determine the nodes that did not get the new data.
	1066	exec 3<&-
	1067	exec 3< $clientNodes
	1068	while read -u3 nodeName
	1069	do
	1070	for goodNode in $updatedNodes
	1071	do
	1072	[[ $nodeName = $goodNode ]] && \
	1073	break
	1074	done
	1075
	1076	[[ $nodeName != $goodNode ]] && \
	1077	failedNodes="${failedNodes}\n\t${nodeName}"
	1078	done
	1079
	1080	# If any nodes failed, put out as much information as possible.
	1081	if [[ -n $failedNodes ]]
	1082	then
	1083	# Collect error messages, if any, in file tmpfile2.
	1084	$grep -v "copyRemoteFile:" $tmpfile > $tmpfile2
	1085	[[ -s $tmpfile2 ]] && \
	1086	$cat $tmpfile2 1>&2
	1087
	1088	# Tell the user which nodes failed.
	1089	printErrorMsg 377 $mmcmd "$failedNodes"
	1090	# Tell the guy which nodes must be up and which command to run.
	1091	printErrorMsg 350 $mmcmd "\n\t$newPrimaryServer\t$newBackupServer"
	1092	printErrorMsg 344 $mmcmd "mmchcluster -p LATEST"
	1093	cleanupAndExit
	1094	fi # end if [[ -n $failedNodes ]]
	1095
	1096	fi # end if [[ ! -s $clientNodes ]]
	1097
	1098
	1099	##############################
	1100	# Unlock the sdr.
	1101	##############################
	1102	[[ $sdrLocked = yes ]] && \
	1103	freeLockOnServer $primaryServer $ourNodeNumber > /dev/null
	1104	sdrLocked=no
	1105	trap posttrap HUP INT QUIT KILL
	1106
	1107	# Issue "command was successful" message.
	1108	printErrorMsg 272 $mmcmd
	1109	cleanupAndExit 0
	1110

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: gpfs_3.1_ker2.6.20/lpp/mmfs/bin/mmchcluster @ 195

Download in other formats: