#!/bin/ksh # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # # # Licensed Materials - Property of IBM # # (C) COPYRIGHT International Business Machines Corp. 1999,2006 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#)88 1.233.1.72 src/avs/fs/mmfs/ts/admin/mmsdrfsdef.sh, mmfs, avs_rgpfs24, rgpfs24s006a 9/14/06 13:36:59 ############################################################################### # # This file contains declarations for manipulating the mmsdrfs file. # The mmsdrfs file is kept in the sdr and contains global information # pertaining to GPFS in a given SP partition or GPFS cluster. # # The mmsdrfs file is shadowed on each node in /var/mmfs/gen/mmsdrfs. # # The following is an example of a mmsdrfs file: # # %%9999%%:00_VERSION_LINE::1:3:78::lc:c154n02.ibm.com:c154n03.ibm.com:1000:: ... # %%9999%%:03_COMMENT::1: # %%9999%%:03_COMMENT::2: This is a machine generated file. Do not edit! # %%9999%%:03_COMMENT::3: # %%home%%:10_NODESET_HDR:::3:TCP:500:1191:complete:6668::700:700:AIX: # %%home%%:20_MEMBER_NODE::1:1:c54n01:9.114.54.21:c54n01.ibm.com:manager::-1:0: # %%home%%:20_MEMBER_NODE::2:3:c54n03:9.114.54.23:c54n03.ibm.com:client::-1:2: # %%home%%:20_MEMBER_NODE::3:2:c54n02:9.114.54.22:c54n02.ibm.com:manager::-1:1: # %%home%%:30_SG_HEADR:gpfsXYZ::152:: # %%home%%:40_SG_ETCFS:gpfsXYZ:1:/mmfs/gpfsXYZ: # %%home%%:40_SG_ETCFS:gpfsXYZ:2: dev = /dev/gpfsXYZ # %%home%%:40_SG_ETCFS:gpfsXYZ:3: vfs = mmfs # %%home%%:40_SG_ETCFS:gpfsXYZ:4: nodename = - # %%home%%:40_SG_ETCFS:gpfsXYZ:5: mount = false # %%home%%:40_SG_ETCFS:gpfsXYZ:6: type = mmfs # %%home%%:40_SG_ETCFS:gpfsXYZ:7: account = false # %%home%%:50_SG_MOUNT:gpfsXYZ::rw:mtime:atime:userquota;groupquota: # %%home%%:60_SG_DISKS:gpfsXYZ:1:mmfslv01:4288:4004:dataAndMetadata: # %%home%%:60_SG_DISKS:gpfsXYZ:2:mmfslv02:4288:4005:dataOnly: # # Each line is a collection of colon-separated fields. # # The first field is always a cluster name. '%%home%%' indicates the # local cluster. '%%9999%%' indicates global information. # # The second field is a line type identifier. The first 2 characters # are used to specify a collating sequence for sorting the records # within the file. # # The third field, depending on the record type, is either unused, # or is a file system device name. This field allows related # records within the same nodeset to be grouped together. # # The fourth field is used, where needed, to sequence related records. # # The rest of the fields are collectively known as data fields. # The number and meaning of the data fields depends on the line type. # # The following types of lines are recognized. Lines that have # a different format are ignored without generating an error. # Additional data fields are also silently ignored. This should # provide for some backward compatibility with future releases. # # - 00_VERSION_LINE Identifies the version and generation number of # the mmsdrfs file. The meaning of the data fields # is as follows: # # SDRFS_FORMAT The format level of the file. # It is incremented every time a new piece of # information is added to the mssdrfs file. # SDRFS_VERSION The major version of the file; currently 3. # SDRFS_GENNUM The generation number of the file. # It is incremented every time the # mssdrfs file is changed. This number # is also kept in the SDR Gpfs class. # RUNNING_CMD When not null, daemon is not allowed to start # and file systems cannot be mounted. # CLUSTER_TYPE Type of cluster environment: lc, or single. # PRIMARY_SERVER Reliable name of primary sdr repository. # BACKUP_SERVER Reliable name of backup sdr repository. # HIGHEST_GPFS_DISK_NBR Highest number used to generate # a global name for an NSD disk. # RSH_PATH Full path name of command to execute in # place of rsh; for example, /bin/ssh. # RCP_PATH Full path name of command to execute in # place of rcp; for example, /bin/scp. # CLUSTERID Cluster identifier. # CLUSTER_SUBTYPE Always lc2 # GENNUM_TSTAMP Time stamp of the generation number. # CLUSTER_NAME Cluster name. # # - 03_COMMENT There can be any number of comment lines following # the global version line and at the beginning of each # nodeset. Currently, there are only three such lines # at the beginning of the mmsdrfs file. They are used # to warn against editing the file. # # - 10_NODESET_HDR There is one such line per GPFS nodeset. # The meaning of the data fields is as follows: # # NODE_COUNT Number of member nodes in the set # COMM_PROTOCOL Communication protocol: TCP # JOB_KEY (obsolete) # TCP_PORT Port number for TCP # CONVERSION_STATE (obsolete) # EVENTS_PORT Port number for Events Exporter (obsolete) # LAPI_MODE (obsolete) # MIN_DAEMON_VERSION Lowest release level on any node # MAX_DAEMON_VERSION Highest release level on any node # OS_ENVIRONMENT AIX, Linux, or mixed. # # - 20_MEMBER_NODE There is one such line for each node in the cluster. # The meaning of the data fields is as follows: # # NODE_NUMBER Number associated with the node # NODE_NAME Name specified when nodeset was created # IPA IP address for NODE_NAME # REL_HOSTNAME Reliable hostname # DESIGNATION client, manager # LAPI_ADAPTER (obsolete) # LAPI_WINDOW (obsolete) # SWITCH_NODE_NUMBER (obsolete) # ADDNODE_STATE 'new' or null. Indicates whether the node # should be considered for quorum. # ADAPTER_TYPE adapter type for the above IPA: css0, en0, .. # BACKUP_IPA (obsolete) # BACKUP_ADAPTER_TYPE (obsolete) # DAEMON_VERSION Confirmed release level installed on the node. # PRODUCT_VERSION Fileset or rpm release string identifier. # OS_NAME Operating system: AIX or Linux # # - 30_SG_HEADR A header record identifying a file system. # # - 40_SG_ETCFS /etc/filesystems information. # The meaning of the data fields is as follows: # # ETCFS_TEXT The exact text that should appear in /etc/filesystems # # - 50_SG_MOUNT File system mount options. The meaning of the # data fields is as follows: # # RW_OPT 'rw' or 'ro' # MTIME_OPT 'mtime' or 'nomtime' # ATIME_OPT 'atime' or 'noatime' # QUOTA_OPT 'userquota;groupquota' or NULL # OTHER_OPT comma-separated list of additional options # # - 60_SG_DISKS There is one such record for each disk in the file # system. The meaning of the data fields is as follows: # # VSD_NAME Name of the VSD disk # DISK_SIZE Size of the disk # FAILURE_GROUP Failure group to which the disk belongs # DISK_USAGE The following values are possible: # dataOnly, metadataOnly, descOnly, dataAndMetadata, # or unknown (fs created by an old release) # PVID Unique pvid (nsd disks) # DISK_TYPE 'nsd', 'vsd', 'lv', 'disk', ... # NSD_PRIMARY_NODE Name of primary NSD server # NSD_BACKUP_NODE Name of secondary NSD server # DISK_SUBTYPE 'pr', 'ssa', or 'other' # VPATH_FLAG 'vpath' or 'notvpath' # NSD_SUBTYPE disk type of the disk underlying the nsd # (e.g., vsd, lv, hdisk, vpath, generic) # NAME_SOURCE source of disk name ('user' for user-supplied # or 'cmd' for command-generated) # NSD_SUBTYPE_DISKNAME disk name of the disk underlying the nsd # (needed by the daemon if the subtype is lv) # # - 70_MMFSCFG mmfs.cfg file information. Since this file can have # colons as part of the text, the lines can be split # in a variable number of fields. This is taken into # account by the routines that manipulate these lines. # The mmfs.cfg data should not contain trailing # colons (a colon followed by a space is acceptable). # ############################################################################### ################################################################### # Pull in operating system dependent declarations and functions. ################################################################### [[ -e ${mmcmdDir}/mmsdrfsdef.$osName ]] && \ . ${mmcmdDir}/mmsdrfsdef.$osName ########################################################################### # # Function: Increment the generation number and optionally delete # or replace one or more lines in the mmsdrfs file. # # Input: $1 - mmsdrfs file to use # $2 - the first one or more consecutive fields of # the line to delete or replace # $3 - replacement line # # Output: the new mmsdrfs generation number # # Returns: 0 - everything worked # 1 - unexpected error # # Examples: To just increment the generation number without # any other changes, use # newGenNumber=$(updateSdrfsFile $sdrfs) # # To increment the generation number and remove # all lines for a particular nodeset, use # newGenNumber=$(updateSdrfsFile $sdrfs $nodesetId) # # To increment the generation number and replace # a particular line, use # newGenNumber=$(updateSdrfsFile $sdrfs $oldLine $newLine) # ########################################################################### function updateSdrfsFile # [[:..] [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGupdateSdrfsFile ]] && set -x $mmTRACE_ENTER "$*" typeset sdrfs=$1 typeset lineToRemove=$2 typeset newLine=$3 typeset outfile=${sdrfs}tmp [[ ! -f $sdrfs ]] && return 1 $rm -f $outfile # If no substitute line is given, i.e., if the caller wants only to delete # one or more lines from the mmsdrfs file, initialize newLine to an illegal # value - all lines start with a nodeset id followed by line type indicator. [[ -z $newLine ]] && newLine="::" # Similarly, if no line to remove is specified, i.e., if the only thing the # caller wants is to increment the generation number, set lineToRemove to an # illegal value. The goal is to by-pass the middle clause in the awk script. [[ -z $lineToRemove ]] && lineToRemove="$" $awk -F: ' \ BEGIN { gen = 0 } \ # If this is the global header line, increment the gen number. \ /^'$GLOBAL_ID:$VERSION_LINE:'/ { \ { gen = $'$SDRFS_GENNUM_Field' + 1 } \ { $'$SDRFS_GENNUM_Field' = gen } \ { print $1":" $2":" $3":" $4":" $5":" $6":" $7":" $8":" $9":"$10":" \ $11":"$12":"$13":"$14":"$15":"$16":"$17":"$18":"$19":"$20":" \ $21":"$22":"$23":"$24":"$25":"$26":"$27":" >> "'$outfile'" } \ { next } \ } \ # If this line is to be removed, check if it should be replaced. \ /^'$lineToRemove'/ { \ if ("'$newLine'" == "::") { \ { next } \ } \ else { \ { print "'$newLine'" >> "'$outfile'" } \ { next } \ } \ } \ # All other lines are echoed without change. \ { print $0 >> "'$outfile'" } \ END { print gen } \ ' $sdrfs checkForErrors awk $? # The file was updated successfully. $mv $outfile $sdrfs checkForErrors "mv $outfile $sdrfs" $? return 0 } #----- end of function updateSdrfsFile -------------------- ###################################################################### # # Function: Append the specified mmfs.cfg file to the mmsdrfs file. # Each mmfs.cfg line is preceded by the 4 leading fields # common to each mmsdrfs line. The generation number # is not affected by this function. # # Input: $1 - nodesetId # $2 - mmfs.cfg file to append # $3 - mmsdrfs file to append to # # Output: The new mmsdrfs file # # Returns: 0 - worked # 1 - unexpected error # # Example: appendCfgFile $nodesetId $newcfg $tmpsdrfs # ###################################################################### function appendCfgFile # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGappendCfgFile ]] && set -x $mmTRACE_ENTER "$*" typeset nodesetId=$1 typeset cfgFile=$2 typeset sdrfsFile=$3 typeset -i lineNumber=0 [[ ! -f $sdrfsFile ]] && return 1 exec 3<&- exec 3< $cfgFile IFS="" # Reset IFS to preserve blanks and tabs. while read -u3 cfgLine do lineNumber=$lineNumber+1 print -- "$nodesetId:$MMFSCFG::$lineNumber:$cfgLine" >> $sdrfsFile done checkForErrors "writing to file $sdrfsFile" $? IFS="$IFS_sv" return 0 } #----- end of function appendCfgFile ---------------------- ###################################################################### # # Function: Append the specified file to the mmsdrfs file. # Each line is preceded by the 4 leading fields # common to each mmsdrfs line. The generation number # is not affected by this function. # # Input: $1 - clusterName # $2 - file to append # $3 - line type value # $3 - mmsdrfs file to append to # # Output: The new mmsdrfs file # # Returns: 0 - worked # 1 - unexpected error # # Example: appendFile $clusterName $filename $lineType $tmpsdrfs # ###################################################################### function appendFile # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGappendFile ]] && set -x $mmTRACE_ENTER "$*" typeset clusterName=$1 typeset fileName=$2 typeset lineType=$3 typeset sdrfsFile=$4 [[ ! -f $sdrfsFile ]] && return 1 $awk ' \ BEGIN { lineNumber = 0 } \ { lineNumber += 1 } \ { print "'$clusterName':'$lineType'::"lineNumber":"$0 >> "'$sdrfsFile'" } \ ' $fileName checkForErrors "awk" $? return 0 } #----- end of function appendFile ------------------------- ###################################################################### # # Function: Set the running command field in the GPFS object. # # Input: $1 - Value for the RUNNING_CMD field, or null. # $2 - Name of the primary config server. # # Output: None. # # Returns: Zero if successful, non-zero otherwise. # ###################################################################### function setRunningCommand # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGsetRunningCommand ]] && set -x $mmTRACE_ENTER "$*" typeset commandName="$1" typeset primaryServer=$2 typeset rc=0 typeset sdrservPort mmsdrcliResult kword gpfsObject timeout [[ $commandName = null || $commandName = NULL ]] && commandName="" # Retrieve the tcp port number for the mmsdserv daemon. sdrservPort=$(getSdrservPort $mmsdrfsFile) checkForErrors getSdrservPort $? # If the mmsdrserv TCP port number is set to 0, return. # The user does not want to use the mmsdrserv daemon. [[ $sdrservPort -eq 0 ]] && return 0 # Find the value of the mmsdrservTimeout parameter. timeout=$(showCfgValue mmsdrservTimeout) [[ -z $timeout ]] && timeout=10 # Retrieve the current value of the Gpfs object. #esjdbg # Show msgs during development and testing only. #esjdbg mmsdrcliResult=$($mmsdrcli getObj $primaryServer $sdrservPort $timeout) mmsdrcliResult=$($mmsdrcli getObj $primaryServer $sdrservPort $timeout 2>>$mmsdrservLog) rc=$? if [[ $mmsdrcliResult = Gpfs* && $rc -eq 0 ]] then # The mmsdrcli call worked; parse the result and construct # the new Gpfs object. It will include the new running_cmd value. IFS=':' set -f ; set -- $mmsdrcliResult ; set +f kword=$1 gpfsObject="$2:$3:$commandName:$5:$6:" IFS="$IFS_sv" # Set the new Gpfs object. #esjdbg # Show msgs during development and testing only. #esjdbg $mmsdrcli setObj $primaryServer $sdrservPort $timeout "$gpfsObject" $mmsdrcli setObj $primaryServer $sdrservPort $timeout "$gpfsObject" 2>>$mmsdrservLog rc=$? #esjxx - decide how to handle errors else : #esjxx - decide how to handle errors (restart mmsdrserv, etc.) fi # end of if [[ $mmsdrcliResult = Gpfs* && $rc -ne 0 ]] $mmTRACE_EXIT "rc=$rc" return 0 #esjxx Temporarily ignore all errors. return $rc } #----- end of function setRunningCommand ----------------- ################################################################ # # Function: Set the running command field in the GPFS object. # # Input: $1 - Value for the RUNNING_CMD field, or null. # # Output: None. # # Returns: Zero if successful, fatal exit otherwise. # ################################################################ function updateGpfsObject # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGupdateGpfsObject ]] && set -x $mmTRACE_ENTER "$*" typeset cmdName=$1 typeset outfile=${sdrfs}tmp $rm -f $outfile [[ $cmdName = null || $cmdName = NULL ]] && cmdName="" $awk -F: ' \ # If this is the global header line, change certain fields. \ /^'$GLOBAL_ID:$VERSION_LINE:'/ { \ { $'$RUNNING_CMD_Field' = "'$cmdName'" } \ { print $1":" $2":" $3":" $4":" $5":" $6":" $7":" $8":" $9":"$10":" \ $11":"$12":"$13":"$14":"$15":"$16":"$17":"$18":"$19":"$20":" \ $21":"$22":"$23":"$24":"$25":"$26":"$27":" >> "'$outfile'" } \ { next } \ } \ # All other lines are echoed without change. \ { print $0 >> "'$outfile'" } \ ' $mmsdrfsFile checkForErrors "updateGpfsObject: awk: " $? # The file was updated successfully. $mv $outfile $mmsdrfsFile checkForErrors "updateGpfsObject: mv $outfile $mmsdrfsFile " $? return 0 } #----- end of function updateGpfsObject ------------------ ################################################################## # # Function: Retrieve the GPFS object from the mmsdrfs file # # Input: none # # Output: The GPFS object information # # Returns: Zero if successful, non-zero otherwise. # ################################################################## function getGpfsObjectFromFile { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetGpfsObjectFromFile ]] && set -x $mmTRACE_ENTER "$*" typeset v2genNumber runningCmnd typeset versionLine="" # Read and parse the first line of the mmsdrfs file. versionLine=$($head -1 $mmsdrfsFile) IFS=':' set -f ; set -- $versionLine ; set +f IFS="$IFS_sv" # At this point $2 has the line type (should be $VERSION_LINE), # $6 contains the gen number, and $7 - the running command field. # Perform a quick sanity check if [[ $2 != $VERSION_LINE ]] then # Corrupted mmsdrfs file printErrorMsg 278 $mmcmd 126 print -u2 "$versionLine" return 1 fi # Generate and display the Gpfs object print "$4:$6:$7:" return 0 } #----- end of function getGpfsObjectFromFile ----------------- ###################################################################### # # Function: Get the GPFS object information. # Note: This function is expected to be called only # on the primary or backup config server nodes. # # Input: None. # # Output: The Gpfs object. # # Returns: Zero if successful, non-zero otherwise. # ###################################################################### function getGpfsObject # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetGpfsObject ]] && set -x $mmTRACE_ENTER "$*" typeset rc=0 typeset gpfsObject kword typeset sdrservPort mmsdrcliResult timeout # Retrieve the tcp port number for the mmsdserv daemon. sdrservPort=$(getSdrservPort $mmsdrfsFile) rc=$? if [[ $rc -ne 0 ]] then printErrorMsg 171 "getGpfsObject" "getSdrservPort" $rc return $rc fi # If the mmsdrserv TCP port number is set to 0, # the user does not want to use the mmsdrserv daemon. if [[ $sdrservPort -ne 0 ]] then # Find the value of the mmsdrservTimeout parameter. timeout=$(showCfgValue mmsdrservTimeout) [[ -z $timeout ]] && timeout=10 # Retrieve the current value of the Gpfs object # using the mmsdrserv daemon. Ignore errors. mmsdrcliResult=$($mmsdrcli getObj $ourNodeName $sdrservPort $timeout 2>>$mmsdrservLog) rc=$? $mmTRACE "mmsdrcli getObj returned GpfsObj=$mmsdrcliResult rc=$rc" fi # end of if [[ $sdrservPort -eq 0 ]] if [[ $mmsdrcliResult = Gpfs* && $rc -eq 0 ]] then # The mmsdrcli call worked; parse the result and construct # the Gpfs object to be returned to the caller. IFS=':' set -f ; set -- $mmsdrcliResult ; set +f kword=$1 gpfsObject="$2:$3:$4" IFS="$IFS_sv" else # The mmsdrcli call was not successful; get the information # from the header line in the mmsdrfs file. gpfsObject=$(getGpfsObjectFromFile) rc=$? # Try to restart the mmsdrserv daemon so that it is ready # for the next request that comes along. Ignore errors. startSdrServ $sdrservPort >> $mmsdrservLog 2>&1 fi # end of if [[ $mmsdrcliResult = Gpfs* && $rc -ne 0 ]] # Show the result and return. [[ $rc -eq 0 ]] && print -- "$gpfsObject" return $rc } #----- end of function getGpfsObject ------------------------- ########################################################################### # # Function: Return a global attribute associated with the cluster. # # Input: $1 - NODESET_HDR field to include in the list # $2 - nodesetId for the nodeset; # defaults to $HOME_CLUSTER # $GLOBAL_ID indicates all nodesets (obsolete) # $3 - mmsdrfs file to use; defaults to $mmsdrfsFile # # Output: List of the requested fields. # # Examples: # commProtocol=$(getNodesetInfo $COMM_PROTOCOL_Field $HOME_CLUSTER $sdrfs) # ########################################################################### function getNodesetInfo # [ []] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetNodesetInfo ]] && set -x $mmTRACE_ENTER "$*" typeset outputField=$1 typeset nodesetId=$2 typeset sdrfs=$3 [[ -z $nodesetId ]] && nodesetId=$HOME_CLUSTER [[ -z $sdrfs ]] && sdrfs=$mmsdrfsFile [[ ! -f $sdrfs ]] && return 0 # Generate a list with the requested information. $awk -F: ' \ /':$NODESET_HDR:'/ { \ if ( $'$NODESETID_Field' == "'$nodesetId'" || \ "'$nodesetId'" == "'$GLOBAL_ID'") { \ {print $'$outputField'} \ } \ } \ ' $sdrfs checkForErrors awk $? return 0 } #----- end of function getNodesetInfo ------------------------- ########################################################################### # # Function: Create a list with a given value for all nodes in the cluster. # # Input: $1 - MEMBER_NODE field to include in the list # $2 - nodesetId for the nodeset; # defaults to $HOME_CLUSTER # $GLOBAL_ID indicates all nodesets (obsolete) # $3 - mmsdrfs file to use; defaults to $mmsdrfsFile # # Output: List of the requested fields # # Example: nodeList=$(getNodeList $NODE_NUMBER_Field $nodesetId $sdrfs) # ########################################################################### function getNodeList # [ []] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetNodeList ]] && set -x $mmTRACE_ENTER "$*" typeset outputField=$1 typeset nodesetId=$2 typeset sdrfs=$3 [[ -z $nodesetId ]] && nodesetId=$HOME_CLUSTER [[ -z $sdrfs ]] && sdrfs=$mmsdrfsFile [[ ! -f $sdrfs ]] && return 0 # Generate a list with the requested node information. $awk -F: ' \ /':$MEMBER_NODE:'/ { \ if ( $'$NODESETID_Field' == "'$nodesetId'" || \ "'$nodesetId'" == "'$GLOBAL_ID'") { \ { print $'$outputField' } \ } \ } \ ' $sdrfs checkForErrors awk $? return 0 } #----- end of function getNodeList ----------------------------- ############################################################################### # # Function: Get data from a specified field for the node whose data for # a specified data field matches a specified value (whew!) # # Input: $1 - field of the MEMBER_NODE line whose data is to be returned # $2 - field of the MEMBER_NODE line whose data is to be found # $3 - value to search for in the field specified by the 2nd parm # $4 - nodesetId for the nodeset; $GLOBAL_ID indicates all nodesets. # $5 - mmsdrfs file to use for the search # # Output: requested field for the desired node. # # Example: # nodeNumber=$(getNodeInfo $NODE_NUMBER_Field $NODE_NAME_Field \ # $node_name $HOME_CLUSTER $sdrfs) # ############################################################################### function getNodeInfo # [ []] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetNodeInfo ]] && set -x $mmTRACE_ENTER "$*" typeset retField=$1 typeset srchField=$2 typeset srchVal=$3 typeset nodesetId=$4 typeset sdrfs=$5 [[ -z $nodesetId ]] && nodesetId=$HOME_CLUSTER [[ -z $sdrfs ]] && sdrfs=$mmsdrfsFile [[ ! -f $sdrfs ]] && return 0 # Find the requested node information. $awk -F: ' \ /':$MEMBER_NODE:'/ { \ if ( $'$NODESETID_Field' == "'$nodesetId'" || \ "'$nodesetId'" == "'$GLOBAL_ID'") { \ if ($'$srchField' == "'$srchVal'") { \ print $'$retField' \ } \ } \ } \ ' $sdrfs checkForErrors awk $? return 0 } #----- end of function getNodeInfo ------------------------ ##################################################################### # # Function: Return all nodes in GPFS nodesets # Input: None # Output: Columnar list of all nodes # # Note: Used by SMIT # ##################################################################### function getUsedNodes { getNodeList $NODE_NAME_Field $GLOBAL_ID $mmsdrfsFile return $? } #----- end of function getUsedNodes ----------------------- ##################################################################### # # Function: Return all GPFS file system devices # Input: None # Output: Columnar list of all devices # # Note: Used by SMIT # ##################################################################### function getUsedDevices { $awk -F: ' \ /':$SG_HEADR:'/ { print "/dev/"$'$DEV_NAME_Field' } \ ' $mmsdrfsFile checkForErrors awk $? return 0 } #----- end of function getUsedDevices ---------------------- ############################################################################### # # Function: Create a file with certain information for all nodes # in the cluster. # # Input: $1 - MEMBER_NODE field to include in the file # $2 - nodesetId for the nodeset. # $GLOBAL_ID indicates all nodesets. # $3 - mmsdrfs file to use # $4 - name of the file to create # # Output: Number of records in the output file # # Example: # nodeCount=$(getNodeFile $REL_HOSTNAME_Field $nodesetId $mmsdrfsFile $nodefile) # ############################################################################### function getNodeFile # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetNodeFile ]] && set -x $mmTRACE_ENTER "$*" typeset outputField=$1 typeset nodesetId=$2 typeset sdrfs=$3 typeset outfile=$4 [[ ! -f $sdrfs ]] && return 0 [[ -z $outfile ]] && return 0 $rm -f $outfile # Generate the requested node information. $awk -F: ' \ BEGIN { n = 0 } \ /':$MEMBER_NODE:'/ { \ if ( $'$NODESETID_Field' == "'$nodesetId'" || \ "'$nodesetId'" == "'$GLOBAL_ID'") { \ { print $'$outputField' >> "'$outfile'" } \ { n = n + 1 } \ } \ } \ END { print n } \ ' $sdrfs checkForErrors awk $? return 0 } #----- end of function getNodeFile --------------------------- ######################################################################### # # Function: Create a file containing the names of all the nodes in the # cluster that are within a specified SG_DISKS node class. # # Input: $1 - SG_DISKS node class for which node names are desired # (currently, the only supported class is nsdnodes) # $2 - name of the file to create # # Output: zero if any nodes were found, non-zero otherwise # # Example: # getNsdNodeClass nsdnodes $nodefile # rc=$? # ######################################################################### function getNsdNodeClass # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetNsdNodeClass ]] && set -x $mmTRACE_ENTER "$*" typeset nodeClass=$1 typeset outfile=$2 [[ -z $outfile ]] && return 0 $rm -f $outfile # Generate the requested node information. $awk -F: ' \ /':$SG_DISKS:'/ { \ if ( "'$nodeClass'" == "nsdnodes") { \ if ($'$NSD_PRIMARY_NODE_Field' != "" ) { \ { print $'$NSD_PRIMARY_NODE_Field' >> "'$outfile'" } \ } \ if ($'$NSD_BACKUP_NODE_Field' != "" ) { \ { print $'$NSD_BACKUP_NODE_Field' >> "'$outfile'" } \ } \ { next } \ } \ } \ ' $mmsdrfsFile checkForErrors awk $? # Return with return code 1 if no nodes were found. if [[ ! -s $outfile ]] then # No nodes matched the input specification. printErrorMsg 345 $mmcmd return 1 fi # Since the same nsd server may be specified on multiple # SG_DISKS lines, sort the output file for uniqueness. $sort -u $outfile -o $outfile checkForErrors "sort -u $outfile" $? return 0 } #----- end of function getNsdNodeClass ----------------------------- ############################################################################## # # Function: Create a file containing the names of all the nodes in the # cluster that are within a specified MEMBER_NODE node class. # # Input: $1 - node class # $2 - field of the MEMBER_NODE line to be returned in the # output node file # $3 - "output the input node value" flag (yes or no) # $4 - output node file # # Output: The output node file has been created. # Each line of this file looks like: # nodeOutputValue nodeInputValue (if 3rd arg is yes) # or just # nodeOutputValue (if 3rd arg is not yes) # # Returns: 0 indicates complete success (nodes were found, no errors) # 1 indicates the class was found to contain no nodes # 2 indicates there was an error in the input parameters # # Example: # createNodefileForClass "$nodeList" $DAEMON_NODENAME_Field no $nodefile # [[ $? -ne 0 ]] && cleanupAndExit # ############################################################################## function createNodefileForClass # # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGcreateNodefileForClass ]] && set -x $mmTRACE_ENTER "$*" typeset nodeClass=$1 typeset desiredField=$2 typeset outputInputNodeValue=$3 typeset outfile=$4 [[ -z $outfile ]] && return 2 $rm -f $outfile case $nodeClass in quorumnodes ) nodeClassField=$CORE_QUORUM_Field nodeClassFieldValue=$quorumNode ;; nonquorumnodes ) nodeClassField=$CORE_QUORUM_Field nodeClassFieldValue=$nonQuorumNode ;; managernodes ) nodeClassField=$DESIGNATION_Field nodeClassFieldValue=$MANAGER ;; clientnodes ) nodeClassField=$DESIGNATION_Field nodeClassFieldValue=$CLIENT ;; all ) nodeClassField=$LINE_TYPE_Field nodeClassFieldValue=$MEMBER_NODE ;; * ) # unknown class print -u2 "createNodefileForClass: class $nodeClass not supported" return 2 ;; esac # end # Generate the requested node information. $awk -F: ' \ /':$MEMBER_NODE:'/ { \ if ( $'$nodeClassField' == "'$nodeClassFieldValue'" ) { \ if ('$desiredField' == '$DAEMON_NODENAME_Field' && \ $'$DAEMON_NODENAME_Field' == "") { \ if ( "'$outputInputNodeValue'" == "yes" ) { \ { print $'$REL_HOSTNAME_Field' " '$nodeClass'" >> "'$outfile'" } \ } else { \ { print $'$REL_HOSTNAME_Field' >> "'$outfile'" } \ } \ } else { \ if ( "'$outputInputNodeValue'" == "yes" ) { \ { print $'$desiredField' " '$nodeClass'" >> "'$outfile'" } \ } else { \ { print $'$desiredField' >> "'$outfile'" } \ } \ } \ { next } \ } \ } \ ' $mmsdrfsFile checkForErrors awk $? # Return with return code 1 if no nodes were found. if [[ ! -s $outfile ]] then # No nodes matched the input specification. printErrorMsg 345 $mmcmd return 1 fi return 0 } #----- end of function createNodefileForClass -------------------- ############################################################################### # # Function: Convert input node data to an output file containing desired # node data. # # Input: $1 - input node list (comma-separated), node file, or node class # $2 - field of the MEMBER_NODE line to be returned in the # output node file # $3 - "output the input node value" flag (yes or no) # $4 - output node file # # Output: The output node file has been created. # Each line of this file looks like: # nodeOutputValue nodeInputValue (if 3rd arg is yes) # or just # nodeOutputValue (if 3rd arg is not yes) # # Returns: zero indicates complete success (nodes were found, no errors); # non-zero indicates an input node was not found in the sdrfs file, # or that no nodes were found satisfying the input specification # # Note: The "node identifiers" in the input list or file could be the # short or long admin node name, the short or long communications # ("daemon") node name, the node number, a node number range, # or the IP address corresponding to the daemon node name. # # Example: # createVerifiedNodefile "$nodeList" $DAEMON_NODENAME_Field no $nodefile # [[ $? -ne 0 ]] && cleanupAndExit # (proceed to use $nodefile . . . ) # ############################################################################### function createVerifiedNodefile # # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGcreateVerifiedNodefile ]] && set -x $mmTRACE_ENTER "$*" typeset inputData=$1 typeset desiredField=$2 typeset outputInputNodeValue=$3 typeset outputFile=$4 typeset nodeName errorFound inValue desiredValue inputNodefile typeset -l inputData_lc=$inputData typeset -i int1 int2 n # The input data can be a node list, a node file, or a node class. # Determine which it is, and if necessary, convert it into a file. if [[ $inputData = /* ]] then # The input data is an absolute pathname node file. inputNodefile=$inputData elif [[ $inputData = *,* ]] then # The input data is a node list. Convert the list into a file. $rm -f $tmpNodes IFS=',' for inValue in $inputData do print -- "$inValue" >> $tmpNodes checkForErrors "writing to $tmpNodes" $? done IFS="$IFS_sv" # Restore the default IFS setting. inputNodefile=$tmpNodes elif [[ $inputData_lc = quorumnodes || $inputData_lc = nonquorumnodes || $inputData_lc = managernodes || $inputData_lc = clientnodes || $inputData_lc = all ]] then # Create the desired output for the node class and return. createNodefileForClass $inputData_lc $desiredField $outputInputNodeValue $outputFile return $? elif [[ $inputData_lc = nsdnodes ]] then # The nsdnodes node class must be handled differently # because it is determined from the SG_DISKS lines. # Create a file containing all the nodes in the class. getNsdNodeClass $inputData_lc $tmpNodes [[ $? -ne 0 ]] && return 1 inputNodefile=$tmpNodes elif [[ $inputData = +([0-9])*([0-9])"-"+([0-9])*([0-9]) ]] then # The input data is a range of node numbers. print -- "$inputData" >> $tmpNodes checkForErrors "writing to $tmpNodes" $? inputNodefile=$tmpNodes else # The input data is a single node or a relative pathname file. # Run it through the checking routine to determine whether it is # the name of an existing node. If it is, use it as a node name. # Otherwise, use the data as an input node file. nodeName=$(checkAndConvertNodeValue $inputData $desiredField 2>/dev/null) if [[ $? -eq 0 ]] then # The input value was a node name. # Print the desired value(s) to the output file and return. if [[ $outputInputNodeValue = yes ]] then print -- "$nodeName $inputData" > $outputFile checkForErrors "writing to file $outputFile" $? else print -- "$nodeName" > $outputFile checkForErrors "writing to file $outputFile" $? fi return 0 elif [[ -s $inputData ]] then # The input value is not a node name but there is a file by that name. # Assume the file contains node names. inputNodefile=$inputData else # An invalid node name was specified. printErrorMsg 54 $mmcmd $inputData # No nodes matched the input specification. printErrorMsg 345 $mmcmd return 1 fi # end of if [[ $? = 0 ]] fi # end of if [[ $inputData = /* ]] # If we are dealing with an input file provided by the user, # ensure the file is OK and create our own copy. if [[ $inputNodefile != $tmpNodes ]] then checkUserFile $inputNodefile $tmpNodes [[ $? -ne 0 ]] && return 1 inputNodefile=$tmpNodes fi # Create a file containing the desired node data # for each node passed in the input file. errorFound="" $rm -f $outputFile # Make sure the file is empty. exec 3<&- exec 3< $inputNodefile while read -u3 inValue do # Skip empty and comment lines. [[ $inValue = *([$BLANKchar$TABchar]) ]] && continue [[ $inValue = *([$BLANKchar$TABchar])#* ]] && continue if [[ $inValue = +([0-9])*([0-9])"-"+([0-9])*([0-9]) ]] then # The user specified a range of node numbers. # Parse the input to determine the min and max node numbers. IFS='-' set -f ; set -- $inValue ; set +f int1=$1 int2=$2 IFS="$IFS_sv" # Check the range for correctness. if [[ $int1 -gt $int2 ]] then printErrorMsg 569 $mmcmd $int1 $int2 errorFound=true continue fi n=$int1-1 while (( (n=n+1) <= $int2 )) do # Find the desired values for the nodes in the range. # Since a range was specified, we are looking at node numbers. desiredValue=$($awk -F: ' \ $'$LINE_TYPE_Field' == "'$MEMBER_NODE'" { \ if ($'$NODE_NUMBER_Field' == "'$n'") { \ { print $'$desiredField' } \ { exit } \ } \ } \ # If we encounter a line after the MEMBER_NODE lines, \ # exit the awk script. \ $'$LINE_TYPE_Field' == "'$MMFSCFG'" { \ { exit } \ } \ ' $mmsdrfsFile) checkForErrors awk $? # If the node was found, append a line to the output file. # If the node was not found, issue an error message. if [[ -n $desiredValue ]] then # Append the desired value to the output file. if [[ $outputInputNodeValue = yes ]] then print -- "$desiredValue $n" >> $outputFile checkForErrors "writing to file $outputFile" $? else print -- "$desiredValue" >> $outputFile checkForErrors "writing to file $outputFile" $? fi fi # end of if [[ -n $desiredValue ]] done # end of while (( (n=n+1) < $int2 )) do else # Find the desired value for this node. # The node may have been specified using the short or long admin # node name, the short or long daemon node name, the node number, # or the IP address corresponding to the daemon node name. desiredValue=$($awk -F: ' \ $'$LINE_TYPE_Field' == "'$MEMBER_NODE'" { \ if ($'$ADMIN_SHORTNAME_Field' == "'$inValue'" || \ $'$REL_HOSTNAME_Field' == "'$inValue'" || \ $'$NODE_NAME_Field' == "'$inValue'" || \ $'$DAEMON_NODENAME_Field' == "'$inValue'" || \ $'$NODE_NUMBER_Field' == "'$inValue'" || \ $'$IPA_Field' == "'$inValue'") { \ if ('$desiredField' == '$DAEMON_NODENAME_Field' && \ $'$DAEMON_NODENAME_Field' == "") { \ { print $'$REL_HOSTNAME_Field' } \ } else { \ { print $'$desiredField' } \ } \ { exit } \ } \ } \ # If we encounter a line after the MEMBER_NODE lines, \ # exit the awk script. \ $'$LINE_TYPE_Field' == "'$MMFSCFG'" { \ { exit } \ } \ ' $mmsdrfsFile) checkForErrors awk $? # Did we fail to find the node and the input value # appears to be an IP address? If so, check whether # it is the IP address of an admin network adapter. if [[ -z $desiredValue && \ $inValue = +([0-9]).+([0-9]).+([0-9]).+([0-9]) ]] then hostResult=$($host $inValue) set -f ; set -- $hostResult ; set +f nodeName=$1 ipa=${3%%,*} # Exclude everything after the first comma. if [[ $ipa = $inValue ]] then # If the IP address is corresponds to an admin node name, # obtain the desired value for the node. desiredValue=$($awk -F: ' \ $'$LINE_TYPE_Field' == "'$MEMBER_NODE'" { \ if ($'$REL_HOSTNAME_Field' == "'$nodeName'") { \ { print $'$desiredField' } \ { exit } \ } \ } \ # If we encounter a line after the MEMBER_NODE lines, \ # exit the awk script. \ $'$LINE_TYPE_Field' == "'$MMFSCFG'" { \ { exit } \ } \ ' $mmsdrfsFile) checkForErrors awk $? fi # end of if [[ $ipa = $inValue ]] fi # end of if [[ -z $desiredValue && ... # If the node was found, append a line to the output file. # If the node was not found, issue an error message. if [[ -n $desiredValue ]] then # Append the desired value to the output file. if [[ $outputInputNodeValue = yes ]] then print -- "$desiredValue $inValue" >> $outputFile checkForErrors "writing to file $outputFile" $? else print -- "$desiredValue" >> $outputFile checkForErrors "writing to file $outputFile" $? fi else # An invalid node name was specified. printErrorMsg 54 $mmcmd $inValue errorFound=true fi # end of if [[ -n $desiredValue ]] fi # end of if [[ $inValue = +([1-9])*([0-9])"-"+([1-9])*([0-9]) ]] done # end of while read -u3 inValue do # Get rid of the temp file that was used. $rm -f $tmpNodes # Is the output file empty or non-existent? if [[ ! -s $outputFile ]] then # No nodes matched the input specification. printErrorMsg 345 $mmcmd return 1 fi # Exit with a non-zero return code if an error was reported. [[ -n $errorFound ]] && return 1 return 0 } #----- end of function createVerifiedNodefile ------------------- ############################################################################## # # Function: Check and convert input node data to an output node value. # # Input: $1 - input node data value # $2 - field of the MEMBER_NODE line to be returned # $3 - (optional) the sdrfs file to be used # (If not specified, the official one is used.) # # Output: the desired output node value # # Returns: zero indicates complete success # non-zero indicates input data was not found in the sdrfs file # # Note: The value passed as the input node data could be the short or # long GPFS admin node name, the short or long GPFS communications # ("daemon") node name, the node number, or the IP address # corresponding to the daemon node name. # # Example: # nodeName=$(checkAndConvertNodeValue $ipa $REL_HOSTNAME_Field) # [[ $? -ne 0 ]] && cleanupAndExit # (proceed to use $nodeName . . . ) # ############################################################################## function checkAndConvertNodeValue # [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGcheckAndConvertNodeValue ]] && set -x $mmTRACE_ENTER "$*" typeset inputValue=$1 typeset desiredField=$2 typeset sdrfsFile=$3 typeset outputValue # If the sdrfs file was not passed, use the official one. [[ -z $sdrfsFile ]] && sdrfsFile=$mmsdrfsFile # Find the specified node value for the specified node. # The node may have been specified using the short or long admin # node name, the short or long daemon node name, the node number, # or the IP address corresponding to the daemon node name. outputValue=$($awk -F: ' \ /':$MEMBER_NODE:'/ { \ if ($'$ADMIN_SHORTNAME_Field' == "'$inputValue'" || \ $'$REL_HOSTNAME_Field' == "'$inputValue'" || \ $'$NODE_NAME_Field' == "'$inputValue'" || \ $'$DAEMON_NODENAME_Field' == "'$inputValue'" || \ $'$NODE_NUMBER_Field' == "'$inputValue'" || \ $'$IPA_Field' == "'$inputValue'") { \ if ('$desiredField' == '$DAEMON_NODENAME_Field' && \ $'$DAEMON_NODENAME_Field' == "") { \ { print $'$REL_HOSTNAME_Field' } \ } else { \ { print $'$desiredField' } \ } \ { exit } \ } \ } \ ' $sdrfsFile) checkForErrors awk $? if [[ -z $outputValue ]] then # An invalid node name was specified. printErrorMsg 54 $mmcmd $inputValue return 1 fi print $outputValue return 0 } #----- end of function checkAndConvertNodeValue ----------------- ###################################################################### # # Function: Find the nodeset to which this node belongs. # # Input: $1 - mmsdrfs file to use # $2 - node number (optional) # # Output: nodesetId. Zero indicates that the node does not # belong to any nodeset. # # Returns: Always zero # # Example: nodesetId=$(findNodesetId $mmsdrfsFile $ourNodeNumber) # ###################################################################### function findNodesetId # [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGfindNodesetId ]] && set -x $mmTRACE_ENTER "$*" typeset sdrfs=$1 typeset nodeNumber=$2 typeset nodesetId=0 # Find our node number. if [[ -z $nodeNumber ]] then getLocalNodeData nodeNumber=$ourNodeNumber fi # Find out the nodeset to which the node belongs. if [[ $nodeNumber -ne 0 && -f $sdrfs ]] then nodesetId=$($awk -F: ' \ /':$MEMBER_NODE:'/ { \ if ($'$NODE_NUMBER_Field' == "'$nodeNumber'") { \ { print $'$NODESETID_Field' } \ { exit 0 } \ } \ } \ ' $sdrfs) checkForErrors awk $? [[ -z $nodesetId ]] && nodesetId=0 fi # Return the result. print -- "$nodesetId" return 0 } #----- end of function findNodesetId -------------------- ################################################################## # # Function: Prints a line for the mmsdrfs file # # Input: Array elements v[1], v[2], ... are expected to have # the values to be included in the mmsdrfs line. # ################################################################## function print_newLine { typeset newLine current_IFS # In order to display the colon field separators, # and to preserve the tabs in the /etc/filesystem # entries, temporarily set IFS to new line only. current_IFS=$IFS IFS=" " # Build the output line. case ${v[$LINE_TYPE_Field]} in $SG_ETCFS ) newLine=${v[1]}:${v[2]}:${v[3]}:${v[4]}:${v[5]} # If this is the mount point line of the stanza, add a trailing ":". [[ ${v[$LINE_NUMBER_Field]} = $MOUNT_POINT_Line ]] && \ newLine=$newLine: ;; $COMMENT_LINE ) newLine=${v[1]}:${v[2]}:${v[3]}:${v[4]}:${v[5]} ;; * ) newLine=${v[1]}:${v[2]}:${v[3]}:${v[4]}:${v[5]}:${v[6]}:${v[7]}:${v[8]} newLine=$newLine:${v[9]}:${v[10]}:${v[11]}:${v[12]}:${v[13]}:${v[14]}:${v[15]} newLine=$newLine:${v[16]}:${v[17]}:${v[18]}:${v[19]}:${v[20]}:${v[21]}:${v[22]} newLine=$newLine:${v[23]}:${v[24]}:${v[25]}:${v[26]}:${v[27]}: ;; esac # end Build the output line print -- "$newLine" # Restore the IFS to the value it had prior to entering this function. IFS=$current_IFS } #----- end of function print_newLine ------------------------- ################################################################## # # Function: Prints a line for the mmsdrfs file # # Input: Variables v1, v2, ... are expected to have # the values to be included in the mmsdrfs line. # ################################################################## function print_newLine_var { typeset newLine current_IFS # In order to display the colon field separators, # and to preserve the tabs in the /etc/filesystem # entries, temporarily set IFS to new line only. current_IFS=$IFS IFS=" " # Build the output line. case $v2 in $SG_ETCFS ) newLine=$v1:$v2:$v3:$v4:$v5 # If this is the mount point line of the stanza, add a trailing ":". [[ $v4 = $MOUNT_POINT_Line ]] && \ newLine=$newLine: ;; $COMMENT_LINE ) newLine=$v1:$v2:$v3:$v4:$v5 ;; * ) # If mmfs.cfg or unknown line type, pass all vars. This may put extra # ":"s at the end of the line, but that should cause no harm. newLine=$v1:$v2:$v3:$v4:$v5:$v6:$v7:$v8:$v9:$v10:$v11:$v12:$v13:$v14:$v15 newLine=$newLine:$v16:$v17:$v18:$v19:$v20:$v21:$v22:$v23:$v24:$v25:$v26:$v27: ;; esac # end of Build the output line print -- "$newLine" # Restore the IFS to the value it had prior to entering this function. IFS=$current_IFS } #----- end of function print_newLine_var ------------------------ ############################################################################### # # Function: Invoke the appropriate routine for the current environment # to commit the changes to the sdr. # # Input: $1 - nodeset id (the nodeset affected by the command) # $2 - local nodeset id (the nodeset to which this node belongs) # $3 - gpfsObjectInfo # $4 - new version 2 generation number # $5 - mmsdrfs file to write to the sdr # $6 - primary server for storing the data # The following parameters are optional # $7 - if FORCE, do not check locks, gen number, and server validity; # if KILLSDRSERV, kill the currently running mmsdrserv daemon # if initLocalNodeData, recreate file mmfsNodeData. # Note: FORCE implies KILLSDRSERV as well. # $8 - backup server for storing the data # # Output: Updated Gpfs object # # Returns: 0 - sdr data committed successfully # 1 - error encountered # ############################################################################### function commitChanges # # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGcommitChanges ]] && set -x $mmTRACE_ENTER "$*" typeset nodesetId=$1 typeset localNodesetId=$2 typeset gpfsObjectInfo=$3 typeset -i newGenNumber=$4 typeset newsdrfs=$5 typeset primaryServer=$6 typeset commitOption=$7 typeset backupServer=$8 typeset commitOutput rc checksum sumOutput typeset versionLine genNumber runningCmnd # Verify the correctness of the mmsdrfs file. checkSdrfsFile $newsdrfs checkForErrors "checkSdrfsFile" $? # Initialize optional parameters. [[ -z $commitOption || $commitOption = null ]] && commitOption=NO_FORCE [[ $backupServer = "_NOSECONDARY_" ]] && backupServer="" # Calculate the checksum for the mmsdrfs file. sumOutput=$($sum $newsdrfs) checkForErrors "sum $newsdrfs" $? set -f ; set -- $sumOutput ; set +f checksum=$1 # See who should handle the actual commit process. if [[ $primaryServer = $ourNodeName ]] then # If the node requesting the commit is the primary server itself, # commit the changes here. This will also rebuild the local environment. commitOutput=$(commitToPrimaryServer $newsdrfs $checksum $newGenNumber \ $ourNodeName $primaryServer $commitOption $backupServer) rc=$? else # If the primary server is some other node, # pass the request to the remote server. commitOutput=$($mmcommon on1 $primaryServer \ commitToPrimaryServer $newsdrfs $checksum $newGenNumber \ $ourNodeName $primaryServer $commitOption $backupServer) rc=$? if [[ $rc -eq 0 ]] then # Move the mmsdrfs file to its place to make the refresh official. if [[ $newsdrfs != $mmsdrfsFile ]] then $cp $newsdrfs $mmsdrfsFile $mmsync $mmsdrfsFile # Make sure that non-privileged commands can read this file. $chmod a+r $mmsdrfsFile >/dev/null 2>&1 fi # If necessary, recreate the local node data. if [[ $commitOption = *initLocalNodeData* ]] then $rm -f $mmfsNodeData getLocalNodeData $mmsdrfsFile fi # Rebuild the local GPFS system files. If this fails, remove one # of the system files to force the node to go through gpfsInit next # time a command is issued on it; do not fail the commit request. updateMmfsEnvironment $HOME_CLUSTER $mmsdrfsFile [[ $? -ne 0 ]] && $rm -f $mmfscfgFile fi # end of if [[ $rc -eq 0 ]] fi # end if [[ $primaryServer = $ourNodeName ]] # If something went wrong, return. [[ $rc -ne 0 ]] && return $rc # Return the latest value of the Gpfs object. print -- "$commitOutput" return 0 } #----- end of function commitChanges --------------------------- ############################################################################### # # Function: Retrieves the specified mmsdrfs file from the client and # propagates the changes to the backup server. If an error # is encountered, the mmsdrfs file is restored from its # cached versions on the local node. # # Input: $1 - mmsdrfs file to commit. # $2 - checksum of the new mmsdrfs file. # $3 - the generation number of the new file. # $4 - reliable name of the client requesting the commit. # $5 - reliable name of the primary server where the data # will be stored (the node that executes this function). # $6 - if FORCE, do not check locks, gen number, and server validity. # if KILLSDRSERV, kill the currently running mmsdrserv daemon. # if initLocalNodeData, recreate file mmfsNodeData. # Note: FORCE implies KILLSDRSERV as well. # $7 - reliable name of the backup server. # # Output: Updated Gpfs object # # Returns: 0 - successful commit # 1 - error encountered # ############################################################################### function commitToPrimaryServer # # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGcommitToPrimaryServer ]] && set -x $mmTRACE_ENTER "$*" typeset newsdrfs=$1 typeset newSum=$2 typeset -i newGenNumber=$3 typeset clientHostname=$4 typeset primaryServer=$5 typeset commitOption=$6 typeset backupServer=$7 typeset -i oldGenNumberP oldGenNumberB typeset mmmodeP primaryServerP backupServerP typeset mmmodeB primaryServerB backupServerB typeset createShadowResult copyRemoteFileResult magicWord typeset lockHolderInfo lockHolderPid lockHolderHostname typeset sumOutput rcpSum rc gpfsObjectInfo typeset sdrservPort sdrfsFormatLevel # if [[ $commitOption != FORCE ]] # then # #esj Add 'Am I the primary server?' check here # # # Make sure the client requesting the commit has the sdr lock # lockHolderInfo=$($tail -n -1 $haslock 2>/dev/null) # IFS=':' # set -f ; set -- $lockHolderInfo ; set +f # IFS="$IFS_sv" # lockHolderPid=$1 # lockHolderHostname=$2 # # if [[ $lockHolderHostname != $clientHostname ]] # then # # If somebody else has the lock, see if the process is still alive. ##esj add the logic for the checkLock function # [[ -n $lockHolderHostname ]] && \ # checkLock $lockHolderPid $lockHolderHostname # # Regardless, our guy is not allowed to continue. # print -u2 "$clientHostname does not have the sdr lock" # return 1 # fi # fi # end if [[ $commitOption != FORCE ]] # Get the new file from the client. if [[ $primaryServer != $clientHostname ]] then $rcp ${clientHostname}:${newsdrfs} $mmsdrfsTmp checkForErrors "$rcp ${clientHostname}:${newsdrfs}" $? # Verify the checksum of the newly-received file. sumOutput=$($sum $mmsdrfsTmp) checkForErrors "sum $mmsdrfsTmp" $? set -f ; set -- $sumOutput ; set +f rcpSum=$1 if [[ $newSum -ne $rcpSum ]] then # Error retrieving data from client. printErrorMsg 379 $mmcmd $clientHostname $primaryServer cleanupAndExit fi else $cp $newsdrfs $mmsdrfsTmp checkForErrors "cp $newsdrfs $mmsdrfsTmp" $? fi # Create a shadow file. This function will create the 'uncommitted' # record which signifies the start of the commit processing. createShadowResult=$(createShadow 0 $newSum $commitOption) rc=$? checkForErrors "createShadow" $rc # Parse the output from createShadow. IFS=':' set -f ; set -- $createShadowResult ; set +f IFS="$IFS_sv" # magicWord=$1 # not needed for now # mmmodeP=$2 # not needed for now oldGenNumberP=$3 primaryServerP=$4 backupServerP=$5 # If this is a regular commit processing (i.e., we are not changing # servers), verify the generation number and pick the backup server. if [[ $commitOption != FORCE ]] then # Make sure the client is not trying to commit a back level file. if [[ $newGenNumber -lt $oldGenNumberP ]] then # Client copy of mmsdrfs is not at the latest level. printErrorMsg 380 $mmcmd $clientHostname cleanupAndExit fi [[ -z $backupServer ]] && \ backupServer=$backupServerP fi # end of if [[ $commitOption != FORCE ]] # If the primary and backup servers are defined to be the same, # pretend there is no backup server. (This should never happen!) [[ $backupServer = $primaryServer ]] && \ backupServer="" # If a backup repository is defined, tell it to create its shadow file. if [[ -n $backupServer ]] then $mmcommon on1 $backupServer \ createShadow $oldGenNumberP $newSum $commitOption > $tmpfile 2>&1 # Filter any garbage from the mmcommon command (traces, warnings, etc.). createShadowResult=$($grep ^createShadow $tmpfile) # Parse the output from the createShadow call. IFS=':' set -f ; set -- $createShadowResult ; set +f IFS="$IFS_sv" magicWord=$1 # mmmodeB=$2 # not needed for now oldGenNumberB=$3 primaryServerB=$4 backupServerB=$5 if [[ $magicWord != createShadow ]] then # Something went wrong, give up. $rm -f $uncommitted.$newSum ${mmsdrfsPrev}.$oldGenNumberP $mmsdrfsTmp # Show the mmcommon output. [[ -s $tmpfile ]] && $cat $tmpfile 1>&2 # Backup server problem printErrorMsg 383 $mmcmd $backupServer cleanupAndExit fi # Verify that the values returned from the createShadow on the backup # are consistent with the values that we have so far... if [[ $commitOption != FORCE ]] then if [[ $oldGenNumberP -ne $oldGenNumberB || $primaryServerP != $primaryServerB || $backupServerP != $backupServerB ]] then # The mmsdrfs files on the two servers are not the same. printErrorMsg 382 $mmcmd $primaryServer $backupServer cleanupAndExit fi fi # end of if [[ $commitOption != FORCE ]] fi # end if [[ -n $backupServer ]] # If necessary, recreate the local node data. if [[ $commitOption = *initLocalNodeData* ]] then $rm -f $mmfsNodeData getLocalNodeData $mmsdrfsTmp fi # Apply the changes to this node. If something goes wrong, # the changes will be reversed later. updateMmfsEnvironment $HOME_CLUSTER $mmsdrfsTmp rc=$? # If the update of the GPFS environment fails, we will remove # one of the system files to make sure that the server node # is forced to go through gpfsInit next time a command is # issued on it, but will not fail the current commit request. [[ $rc -ne 0 ]] && $rm -f $mmfscfgFile # All preliminary work is done. Move the new file to its proper place. $mv $mmsdrfsTmp $mmsdrfsFile $mmsync $mmsdrfsFile # Tell the backup server to get the new file. if [[ -n $backupServer ]] then $mmcommon on1 $backupServer \ copyRemoteFile $primaryServer $mmsdrfsFile $mmsdrfsFile $newSum > $tmpfile 2>&1 rc=$? # Filter any garbage from the mmcommon command (traces, warnings, etc.). copyRemoteFileResult=$($grep ^copyRemoteFile $tmpfile) # Parse the output from the copyRemoteFile call. IFS=':' set -f ; set -- $copyRemoteFileResult ; set +f IFS="$IFS_sv" magicWord=$1 rc=$2 if [[ $magicWord != copyRemoteFile || $rc -ne 0 ]] then # Something went wrong. Show the mmcommon output. [[ -s $tmpfile ]] && $cat $tmpfile 1>&2 # Restore the old environment and give up. $mv ${mmsdrfsPrev}.$oldGenNumberP $mmsdrfsFile $mmsync $mmsdrfsFile $rm -f $uncommitted.$newSum # Backup server problem. printErrorMsg 383 $mmcmd $backupServer # Reverse changes on the primary server node. if [[ $commitOption = *initLocalNodeData* ]] then $rm -f $mmfsNodeData getLocalNodeData fi updateMmfsEnvironment $HOME_CLUSTER $mmsdrfsFile cleanupAndExit fi # end of if [[ $magicWord != copyRemoteFile || $rc -ne 0 ]] fi # end if [[ -n $backupServer ]] # Remove the uncommitted record. THIS IS THE REAL COMMIT POINT. $rm -f $uncommitted.$newSum ${mmsdrfsPrev}.$oldGenNumberP #esjxx - handle this better; avoid reading the file sdrfsFormatLevel=$($head -1 $mmsdrfsFile | $GETVALUE $SDRFS_FORMAT_Field) gpfsObjectInfo="$sdrfsFormatLevel:$newGenNumber:" # Remove the uncommitted record from the backup server # Do not wait for this to finish, but let error messages show up. if [[ -n $backupServer ]] then $mmcommon on1 $backupServer removeUncommitted $oldGenNumberP $newSum & fi # Make sure that non-privileged commands can read the mmsdrfs file. $chmod a+r $mmsdrfsFile >/dev/null 2>&1 # Restart or reset the mmsdrserv daemon. if [[ $commitOption = *KILLSDRSERV* || $commitOption = FORCE ]] then # If the mmsdrserv daemon was killed, restart it now. startSdrServ CURRENT >> $mmsdrservLog 2>&1 rc=$? [[ -n $backupServer && $rc -eq 0 ]] && \ $mmcommon on1 $backupServer startSdrServ CURRENT >> $mmsdrservLog 2>&1 else # Otherwise, just reset the currently running mmsdrserv daemon. resetSdrServ $gpfsObjectInfo $ourNodeName CURRENT >> $mmsdrservLog 2>&1 fi # Asynchronously invoke the user exit for backing the mmsdrfs file. if [[ -x $mmsdrbackup ]] then $mmsdrbackup $newGenNumber >/dev/null 2>&1 & fi # Return to the client the new Gpfs object. print -- "$gpfsObjectInfo" return 0 } #----- end of function commitToPrimaryServer -------------------- ########################################################################### # # Function: Prepare for commit processing: make a copy of the current # mmsdrfs file and create an uncommitted record to signify # the start of the commit processing. # # Input: $1 - generation number of the mmsdrfs file to be shadowed # (zero, if the number is not known yet) # $2 - uncommitted record version (this is the checksum # of the new mmsdrfs file) # $3 - if FORCE, ignore previous uncommitted or shadow files. # if KILLSDRSERV, kill the currently running mmsdrserv daemon. # Note: FORCE implies KILLSDRSERV as well. # # Output: Colon-separated string containing the fields: # createShadow:$mmmode:$genNumber:$primaryServer:$backupServer # # Returns: 0 - success # 1 - error encountered # ########################################################################### function createShadow # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGcreateShadow ]] && set -x $mmTRACE_ENTER "$*" typeset expectedGenNumber=$1 typeset version=$2 typeset commitOption=$3 typeset -i genNumber typeset mmmode primaryServer backupServer rc typeset needToCopyFile=no typeset versionLine="" if [[ $commitOption = FORCE ]] then # Cleanup any traces of previous uncommitted or shadow files. # If this option is invoked, most likely we are cleaning up # some previous mess anyway. $rm -f ${mmsdrfsPrev}.* $uncommitted.* # else # # If uncommitted or shadow file exists, a previous commit # # request did not complete; invoke the cleanupSdr function. # if [[ -e ${mmsdrfsPrev}.* || -e $uncommitted.* ]] # then ##esj add the logic for the cleanupSdr function # cleanupSdr # checkForErrors "cleanupSdr" $? # fi fi # Make sure the mmsdrfs file exists. if [[ ! -e $mmsdrfsFile ]] then [[ -z $ourNodeName ]] && ourNodeName=$($hostname) # Node is not member of a cluster. printErrorMsg 352 $mmcmd $ourNodeName cleanupAndExit fi # Parse the version line of the mmsdrfs file. versionLine=$($head -1 $mmsdrfsFile) IFS=':' set -f ; set -A v -- - $versionLine ; set +f IFS="$IFS_sv" # Perform a quick sanity check. [[ ${v[$LINE_TYPE_Field]} != $VERSION_LINE ]] && \ corruptedSdrFileExit 132 "$versionLine" # Collect some of the values that will be returned to the caller. mmmode=${v[$CLUSTER_TYPE_Field]} genNumber=${v[$SDRFS_GENNUM_Field]} primaryServer=${v[$PRIMARY_SERVER_Field]} backupServer=${v[$BACKUP_SERVER_Field]} # Ensure the gen number looks OK. if [[ $expectedGenNumber -eq $genNumber || $expectedGenNumber -eq 0 || $commitOption = FORCE ]] then # If the current gen number is as expected, or if an expected number # is not specified (because we already know the file is OK), or if # this is a FORCED commit (changing servers), assume things are in order. needToCopyFile=no elif [[ $expectedGenNumber -lt $genNumber ]] then # If the current gen number is greater than expected, # the information on the primary is incorrect. printErrorMsg 382 $mmcmd $primaryServer $backupServer cleanupAndExit else # If the current gen number is less than what it is expected to be, # we'll need to bring the backup server up to date. needToCopyFile=yes fi # end if [[ $expectedGenNumber -eq $genNumber ... if [[ $needToCopyFile = yes ]] then # If the current gen number is less than what it is expected to be, # bring the backup server up to date. copyRemoteFile $primaryServer $mmsdrfsFile $mmsdrfsFile 0 > /dev/null checkForErrors "copyRemoteFile ${primaryServer}:${mmsdrfsFile}" $? # Read and parse the new, updated, VERSION_LINE # Parse the version line of the mmsdrfs file. versionLine=$($head -1 $mmsdrfsFile) IFS=':' set -f ; set -A v -- - $versionLine ; set +f IFS="$IFS_sv" # Perform a quick sanity check. [[ ${v[$LINE_TYPE_Field]} != $VERSION_LINE ]] && \ corruptedSdrFileExit 133 "$versionLine" # Collect some of the values that will be returned to the caller. mmmode=${v[$CLUSTER_TYPE_Field]} genNumber=${v[$SDRFS_GENNUM_Field]} primaryServer=${v[$PRIMARY_SERVER_Field]} backupServer=${v[$BACKUP_SERVER_Field]} fi # end if [[ $needToCopyFile = yes ]] # Create a shadow file. $cp $mmsdrfsFile ${mmsdrfsPrev}.$genNumber checkForErrors "cp $mmsdrfsFile ${mmsdrfsPrev}.$genNumber" $? # Create an 'uncommitted' record. # THIS IS WHERE THE COMMIT PROTOCOL REALLY STARTS. $touch $uncommitted.$version checkForErrors "touch $uncommitted.$version " $? $mmsync ${mmsdrfsPrev}.$genNumber $uncommitted.$version # If necessary, kill the current instance of the mmsdrserv daemon. # It will be restarted shortly (from commitToPrimaryServer). [[ $commitOption = *KILLSDRSERV* || $commitOption = FORCE ]] && killSdrServ print -- "createShadow:$mmmode:$genNumber:$primaryServer:$backupServer" return 0 } #----- end of function createShadow ----------------------------- ############################################################################## # # Function: Copies the specified file from the remote node and stores # it on the local node as the specified local file. # # Input: $1 - remote node to get file from # $2 - file to get from remote node # $3 - name of local file to be created by copying # the remote file to the local node # $4 - checksum of the remote file, 0 if not known. # # Output: If successful, the file is copied. # # Returns: 0 - successful copy # non-zero - error encountered # ############################################################################## function copyRemoteFile # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGcopyRemoteFile ]] && set -x $mmTRACE_ENTER "$*" typeset remoteNode=$1 typeset remoteFile=$2 typeset localFile=$3 typeset remoteSum=$4 typeset rc ec localSum sumOutput versionLine [[ -z $remoteSum ]] && remoteSum=0 # Get the file from the remote node. $rcp ${remoteNode}:${remoteFile} ${localFile}tmp$$ rc=$? # If the checksum is known, verify it is still the same. if [[ $rc = 0 && $remoteSum != 0 ]] then sumOutput=$($sum ${localFile}tmp$$) rc=$? set -f ; set -- $sumOutput ; set +f localSum=$1 if [[ $rc = 0 && $remoteSum != $localSum ]] then printErrorMsg 379 $mmcmd $remoteNode $ourNodeName rc=1 fi fi $mmsync ${localFile}tmp$$ # If we are getting a new version of the mmsdrfs file, # update the GPFS files if necessary. if [[ $rc -eq 0 && $localFile = $mmsdrfsFile ]] then # Update the local system files. updateMmfsEnvironment $HOME_CLUSTER ${localFile}tmp$$ ec=$? # If the update of the GPFS environment fails, we will remove # one of the system files to make sure that the server node # is forced to go through gpfsInit next time a command is # issued on it, but will not fail the current request. [[ $ec -ne 0 ]] && $rm -f $mmfscfgFile fi # end of if [[ $rc -eq 0 && $localFile = $mmsdrfsFile ]] if [[ $rc -eq 0 ]] then $mv ${localFile}tmp$$ $localFile $mmsync $localFile else $rm -f ${localFile}tmp$$ fi # Make sure that non-privileged commands can read the mmsdrfs file $chmod a+r $mmsdrfsFile >/dev/null 2>&1 print -- "copyRemoteFile:$rc" return $rc } #----- end of function copyRemoteFile ------------------------- ########################################################################### # # Function: Parse the output from a gpfsInit call and set # certain global and local variables accordingly. # # Input: $1 - return code from gpfsInit # $2 - output string from gpfsInit # # Output: None # # Returns: 0 - gpfsInit finished successfully # If error, no return - processing stopped. # ########################################################################### function setGlobalVar # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGsetGlobalVar ]] && set -x $mmTRACE_ENTER "$*" [[ $1 -ne 0 ]] && cleanupAndExit mmmode=$2 sdrLocked=$3 nsId=$4 ourNodeNumber=$5 gpfsObjectInfo=$6 primaryServer=$7 backupServer=$8 ourNodeName=$9 typeset rshPath=${10} typeset rcpPath=${11} sdrGenNumber=${12} sdrGenTimestamp=${13} environmentType=${14} ourShortName=${15} sdrfsFormatLevel=${16} [[ $backupServer = "_NOSECONDARY_" ]] && backupServer="" nodesetId=$nsId fsHomeCluster=$nsId # Make the GPFS nodeset type globally available. export MMMODE=$mmmode # Export remote commands if necessary. if [[ -n $rshPath && $rshPath != "_DEFAULT_" ]] then rsh="$rshPath" export GPFS_rshPath="$rshPath" fi if [[ -n $rcpPath && $rcpPath != "_DEFAULT_" ]] then rcp="$rcpPath" export GPFS_rcpPath="$rcpPath" fi # Figure out the environment within which we are running. [[ -z $environmentType ]] && environmentType=$MMMODE [[ $mmmode != $environmentType && $environmentType != lc2 ]] && \ environmentType="rpd" export environmentType=$environmentType return 0 } #----- end of function setGlobalVar -------------------------- ########################################################################### # # Function: Make sure that the local copies of the mmsdrfs, mmfs.cfg, # and all other relevant system files are up to date. # If requested, get the sdr lock. # # Input: $1 - Process id to be used as an identifier for # the sdr lock, or the string 'nolock'. # # Output: The function prints a line with the following values: # mmmode - command mode (lc, single, ...) # sdrLocked - yes or no # nodesetId - HOME_CLUSTER # ourNodeNumber - the node number assigned to this node # gpfsObjectInfo - the most current value of the Gpfs object # primaryServer - hostname of primary server or _NOPRIMARY_ # backupServer - hostname of backup server or _NOSECONDARY_ # ourNodeName - the reliable hostname of this node # rshPath - remote shell command or _DEFAULT_ # rcpPath - remote file copy command or _DEFAULT_ # sdrGenNumber - most recent mmsdrfs generation number # sdrGenTimestamp - timestamp of the mmsdrfs generation number # environmentType - environment within which GPFS is defined # ourShortName - the unqualified name of the GPFS adapter # sdrfsFormatLevel- the format level of the mmsdrfs file # # Note: sdrLocked is a global variable and its value is not # altered unless a lock is obtained. # # Returns: 0 - files refreshed successfully; SDR locked, if requested. # 1 - unexpected error # ########################################################################### function gpfsInit # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgpfsInit ]] && set -x $mmTRACE_ENTER "$*" typeset lockId=$1 typeset -i localGenNumber=0 typeset localGenTimestamp=0 typeset versionLine="" typeset mmmode="" typeset rc=0 typeset primary backup initOutput ############################################### # Preliminary work common to all environments ############################################### if [[ ! -f $mmsdrfsFile ]] then # Either the node does not belong to a cluster # or the config information is lost/corrupted. printErrorMsg 282 $mmcmd cleanupAndExit fi # Parse the version line from the local mmsdrfs file. versionLine=$($head -1 $mmsdrfsFile) IFS=':' set -f ; set -A v -- - $versionLine ; set +f IFS="$IFS_sv" # Perform a quick sanity check. [[ ${v[$LINE_TYPE_Field]} != $VERSION_LINE ]] && \ corruptedSdrFileExit 128 "$versionLine" # Retrieve the cluster environment, the generation number, # and the primary and backup server names. if [[ -z ${v[$CLUSTER_TYPE_Field]} ]] then # If the cluster type field is null, assume sp. mmmode=sp else mmmode=${v[$CLUSTER_TYPE_Field]} fi sdrfsVersion=${v[$SDRFS_VERSION_Field]} localGenNumber=${v[$SDRFS_GENNUM_Field]} primary=${v[$PRIMARY_SERVER_Field]} backup=${v[$BACKUP_SERVER_Field]} [[ -z $backup ]] && backup="_NOSECONDARY_" localGenTimestamp=${v[$GENNUM_TSTAMP_Field]} [[ -z $localGenTimestamp ]] && localGenTimestamp=0 rshPath=${v[$RSH_PATH_Field]} rcpPath=${v[$RCP_PATH_Field]} environmentType=${v[$CLUSTER_SUBTYPE_Field]} [[ -z $environmentType ]] && environmentType=$mmmode [[ $mmmode != $environmentType && $environmentType != lc2 ]] && \ environmentType="rpd" # Make the mode value globally available. export MMMODE=$mmmode export environmentType=$environmentType # Export remote commands if necessary. if [[ -n $rshPath && $rshPath != "_DEFAULT_" ]] then rsh="$rshPath" export GPFS_rshPath="$rshPath" fi if [[ -n $rcpPath && $rcpPath != "_DEFAULT_" ]] then rcp="$rcpPath" export GPFS_rcpPath="$rcpPath" fi ############################################### # Depending on the environment, invoke # the appropriate initialization routine. ############################################### if [[ $MMMODE = lc || $MMMODE = single ]] then if [[ $sdrfsVersion -lt $CURRENT_SDRFS_VERSION ]] then print -u2 "$mmcmd: The GPFS cluster configuration data is not up to date." print -u2 " Migrate the cluster using mmexportfs/mmimportfs." print -u2 " See the GPFS Concepts, Planning, and Installation Guide for instructions." cleanupAndExit fi initOutput=$(gpfsClusterInit $lockId $localGenNumber \ $primary $backup $localGenTimestamp) rc=$? elif [[ $MMMODE = sp || $MMMODE = rpd || $MMMODE = hacmp ]] then print -u2 "$mmcmd: GPFS cluster type $MMMODE is no longer supported." print -u2 " You must move to a supported GPFS environment (cluster type lc)." print -u2 " See the GPFS Concepts, Planning, and Installation Guide for instructions." cleanupAndExit else # Unknown GPFS execution environment printErrorMsg 338 $mmcmd $MMMODE cleanupAndExit fi [[ -z $initOutput || $rc -ne 0 ]] && \ return 1 # Things must have worked. print -- "$initOutput" return 0 } #----- end of function gpfsInit -------------------------------- ########################################################################### # # Function: Make sure that the local copy of the mmsdrfs file # and any other relevant system files are up to date. # If requested, get the sdr lock. # # Input: $1 - Process id to be used as an identifier for # the sdr lock, or the string 'nolock'. # $2 - generation number of local sdrfs file # $3 - primary server for obtaining sdrfs data # $4 - secondary server for obtaining sdrfs data # $5 - generation number time stamp # # Output: This function prints a line with the following values: # mmmode - command execution environment # sdrLocked - yes or no # nodesetId - HOME_CLUSTER # ourNodeNumber - the node number of this node # gpfsObjectInfo - the most current value of the Gpfs object # primaryServer - primary server for obtaining sdrfs data # backupServer - secondary server for obtaining sdrfs data # ourNodeName - the reliable hostname of this node # rshPath - remote shell command or _DEFAULT_ # rcpPath - remote file copy command or _DEFAULT_ # sdrGenNumber - most recent mmsdrfs generation number # sdrGenTimestamp - timestamp of the mmsdrfs generation number # environmentType - environment within which GPFS is defined # ourShortName - the unqualified name of the GPFS adapter # sdrfsFormatLevel- the format level of the mmsdrfs file # # Note: sdrLocked is a global variable and its value is not # altered unless a lock is obtained. # # Returns: 0 - files refreshed successfully; sdr locked, if requested. # non-zero - unexpected error # ########################################################################### function gpfsClusterInit # # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgpfsClusterInit ]] && set -x $mmTRACE_ENTER "$*" typeset lockId=$1 typeset localGenNumber=$2 typeset primaryServer=$3 typeset backupServer=$4 typeset localGenTimestamp=$5 typeset gpfsObjectInfo kword sdrservPort mmsdrcliResult typeset genNumber runningCmnd junk sdrfsFile timeout typeset envLevel firstChoice secondChoice rshPath rcpPath typeset expirationData currentTime genTimestamp sdrfsFormatLevel typeset validGenNumber validGenTimestamp expirationTime typeset newKeyGenNumber committedKeyGenNumber secLevel typeset needToRefresh=true typeset versionLine="" typeset lockResult="" typeset rc=0 # Some initialization $rm -f $mmsdrfsShadow [[ $backupServer = "_NOSECONDARY_" ]] && backupServer="" # Find our node name and node number. getLocalNodeData # Processing depends on whether a lock is requested or not. if [[ $lockId != nolock ]] then # If a lock is requested, get it. # This will also ensure that we have the latest mmsdrfs data. lockResult=$(getLockOnServer $primaryServer $lockId WAIT $localGenNumber) rc=$? # Look at the result from the lock request. # If it is more than one word, we assume it is an error message. set -f ; set -- $lockResult ; set +f gpfsObjectInfo=$1 junk=$2 if [[ -z $gpfsObjectInfo || $gpfsObjectInfo = fail || -n $junk || $rc -ne 0 ]] then # Print error message (if any) from getLockOnServer, as well as # a message stating we were unable to get the lock. Then exit. [[ -n $junk ]] && print -u2 "$lockResult" printErrorMsg 398 $mmcmd $primaryServer return 1 fi # If we get here, the lock was successfully obtained. sdrLocked=yes else # No lock needed. See if the local mmsdrfs data can be trusted. # If this node is one of the repository server nodes, # its mmsdrfs file is up to date by definition. if [[ $ourNodeName = $primaryServer || $ourNodeName = $backupServer ]] then needToRefresh=false # If the sdr is locked as part of mmstartup processing, # we might be able to save a trip to the server. elif [[ -s $mmSdrLockExp ]] then # Find the value of the gen number and the time period # for which it is promised not to change. expirationData=$($head -1 $mmSdrLockExp 2>/dev/null) IFS=':' set -f ; set -- $expirationData ; set +f kword=$1 validGenNumber=$2 validGenTimestamp=$3 expirationTime=$4 IFS="$IFS_sv" [[ $kword != mmSdrLockExp ]] && \ corruptedSdrFileExit 140 "$expirationData" # See if the local data is current. currentTime=$($perl -e 'print time') if [[ $currentTime -lt $expirationTime && $localGenNumber -eq $validGenNumber && $localGenTimestamp -eq $validGenTimestamp ]] then # The local data is guaranteed to be up to date. $mmTRACE "Local data guaranteed - expiration time $expirationTime" needToRefresh=false elif [[ $currentTime -ge $expirationTime ]] then # If the mmSdrLockExp file is old, get rid of it. $mmTRACE "Removing the mmSdrLockExp file" $rm -f $mmSdrLockExp else : # do nothing fi # end of if [[ $currentTime -lt $expirationTime && ... ]] # Try to get the Gpfs object using the mmsdrserv daemon. # If something goes wrong, ignore the error and continue # with the traditional refresh processing. else # Retrieve the tcp port number for the mmsdserv daemon. sdrservPort=$(getSdrservPort $mmsdrfsFile) checkForErrors getSdrservPort $? # If the mmsdrserv TCP port number is set to 0, # the user does not want to use the mmsdrserv daemon. if [[ $sdrservPort -ne 0 ]] then # Find the value of the mmsdrservTimeout parameter. timeout=$(showCfgValue mmsdrservTimeout) [[ -z $timeout ]] && timeout=10 # If we have two servers, we will spread the work by picking a # server depending on our node number. If the first choice fails, # then we'll try the second one. if [[ -n $backupServer && $ourNodeNumber%2 -eq 0 ]] then firstChoice=$backupServer secondChoice=$primaryServer else firstChoice=$primaryServer secondChoice=$backupServer fi # end of if [[ -n $backupServer && $ourNodeNumber%2 -eq 0 ]] #esjdbg # Show msgs during development and testing only. #esjdbg mmsdrcliResult=$($mmsdrcli getObjSdrfs \ #esjdbg $firstChoice $sdrservPort $timeout $localGenNumber $mmsdrfsShadow) mmsdrcliResult=$($mmsdrcli getObjSdrfs \ $firstChoice $sdrservPort $timeout \ $localGenNumber $mmsdrfsShadow 2>>$mmsdrservLog) rc=$? if [[ -z $mmsdrcliResult || $mmsdrcliResult != Gpfs* || $rc -ne 0 ]] then # The first attempt failed; try the other server (if there is one). if [[ -n $secondChoice ]] then #esjdbg # Show msgs during development and testing only. #esjdbg mmsdrcliResult=$($mmsdrcli getObjSdrfs \ #esjdbg $secondChoice $sdrservPort $timeout $localGenNumber $mmsdrfsShadow) mmsdrcliResult=$($mmsdrcli getObjSdrfs \ $secondChoice $sdrservPort $timeout \ $localGenNumber $mmsdrfsShadow 2>>$mmsdrservLog) rc=$? fi fi # end of if [[ -z $mmsdrcliResult || $mmsdrcliResult != Gpfs* ... if [[ $mmsdrcliResult = Gpfs* && $rc -eq 0 ]] then # The mmsdrcli call worked; we either already have the latest data, # or the latest data was put in file mmsdrfsShadow. $mmTRACE "Local data verified via mmsdrcli getObjSdrfs." needToRefresh=false else : # mmsdrcli failed; we'll try sendClusterSDRFiles in a second. fi # end of if [[ $mmsdrcliResult = Gpfs* && $rc -ne 0 ]] fi # end of if [[ $sdrservPort -ne 0 ]] fi # end if [[ $ourNodeName = $primaryServer || $ourNodeName = $backupServer ]] # We come here if we still need to get the data from one of the servers. if [[ $needToRefresh = true ]] then # If we have two servers, we will spread the work by picking a # server depending on our node number. If the first choice fails, # then we'll try the second one. if [[ -n $backupServer && $ourNodeNumber%2 -eq 0 ]] then firstChoice=$backupServer secondChoice=$primaryServer else firstChoice=$primaryServer secondChoice=$backupServer fi # end of if [[ -n $backupServer && $ourNodeNumber%2 -eq 0 ]] # Tell the primary server to send the latest version of # the mmsdrfs file if our local version is not current. # If sent, it will initially be stored in a shadow file # until the rest of the system files are successfully updated. mmcommonOutput=$($mmcommon on1 $firstChoice \ sendClusterSDRFiles $localGenNumber $ourNodeName $mmsdrfsShadow $lockId) rc=$? # Look at the result from the sendClusterSDRFiles request. # If it is more than one word, we assume it is an error message. set -f ; set -- $mmcommonOutput ; set +f gpfsObjectInfo=$1 junk=$2 if [[ -z $gpfsObjectInfo || $gpfsObjectInfo = fail || -n $junk || $rc -ne 0 ]] then # Give up if a backup server is not defined. if [[ -z $secondChoice ]] then # Print error message from sendClusterSDRFiles. [[ -n $junk ]] && print -u2 "$mmcommonOutput" # Attempt to get data from primary server failed. printErrorMsg 342 $mmcmd $firstChoice return 1 fi # Issue a warning: Attempt to get data from primary server failed. printErrorMsg 341 $mmcmd $firstChoice rc=0 # Tell the backup server to send the latest version of # the mmsdrfs file if our local version is not current. mmcommonOutput=$($mmcommon on1 $secondChoice \ sendClusterSDRFiles $localGenNumber $ourNodeName $mmsdrfsShadow $lockId) rc=$? # Look at the result from the sendClusterSDRFiles request. # If it is more than one word, we assume it is an error message. set -f ; set -- $mmcommonOutput ; set +f gpfsObjectInfo=$1 junk=$2 if [[ -z $gpfsObjectInfo || $gpfsObjectInfo = fail || -n $junk || $rc -ne 0 ]] then # Print error message from sendClusterSDRFiles. [[ -n $junk ]] && print -u2 "$mmcommonOutput" # Attempt to get data from backup server failed. printErrorMsg 342 $mmcmd $secondChoice return 1 fi fi # end if [[ -z $gpfsObjectInfo || -n $junk || $rc -ne 0 ]] fi # end if [[ $needToRefresh = true ]] fi # end if [[ $lockId != nolock ]] # At this point we have the lock (if it was requested) and the latest # mmsdrfs information is either already in file mmsdrfsFile or it was # put by one of the servers in file mmsdrfsShadow. # Decide which file to use to get the latest information. if [[ -f $mmsdrfsShadow ]] then sdrfsFile=$mmsdrfsShadow else sdrfsFile=$mmsdrfsFile fi # Parse the version line of the mmsdrfs file. versionLine=$($head -1 $sdrfsFile) IFS=':' set -f ; set -A v -- - $versionLine ; set +f IFS="$IFS_sv" # Perform a quick sanity check. [[ ${v[$LINE_TYPE_Field]} != $VERSION_LINE ]] && \ corruptedSdrFileExit 130 "$versionLine" # Extract the fields that we must return to the caller. sdrfsFormatLevel=${v[$SDRFS_FORMAT_Field]} genNumber=${v[$SDRFS_GENNUM_Field]} runningCmnd=${v[$RUNNING_CMD_Field]} primaryServer=${v[$PRIMARY_SERVER_Field]} backupServer=${v[$BACKUP_SERVER_Field]} rshPath=${v[$RSH_PATH_Field]} rcpPath=${v[$RCP_PATH_Field]} genTimestamp=${v[$GENNUM_TSTAMP_Field]} [[ -z $genTimestamp ]] && genTimestamp=0 environmentType=${v[$CLUSTER_SUBTYPE_Field]} [[ -z $environmentType ]] && environmentType=$MMMODE newKeyGenNumber=${v[$NEW_KEY_Field]} [[ -z $newKeyGenNumber ]] && newKeyGenNumber=0 committedKeyGenNumber=${v[$COMMITTED_KEY_Field]} [[ -z $committedKeyGenNumber ]] && committedKeyGenNumber=$newKeyGenNumber secLevel=${v[$SECLEVEL_Field]} [[ -z $secLevel ]] && secLevel=0 # Build the Gpfs object information. gpfsObjectInfo="$sdrfsFormatLevel:$genNumber:$runningCmnd" # Rebuild the GPFS environment if necessary. checkMmfsEnvironment $sdrfsFile $genNumber $newKeyGenNumber \ $committedKeyGenNumber $secLevel rc=$? if [[ $rc -ne 0 ]] then # Rebuild the rest of the system files. updateMmfsEnvironment $HOME_CLUSTER $sdrfsFile checkForErrors updateMmfsEnvironment $? fi if [[ -f $mmsdrfsShadow ]] then # Things seem to have worked ok. # Commit the latest level. $mv $mmsdrfsShadow $mmsdrfsFile checkForErrors mv $? $mmsync $mmsdrfsFile # Make sure that non-privileged commands can read this file. $chmod a+r $mmsdrfsFile >/dev/null 2>&1 fi # Put out the results and return. [[ -z $backupServer ]] && backupServer="_NOSECONDARY_" [[ -z $rshPath ]] && rshPath="_DEFAULT_" [[ -z $rcpPath ]] && rcpPath="_DEFAULT_" print -- "$MMMODE $sdrLocked $HOME_CLUSTER $ourNodeNumber $gpfsObjectInfo" \ "$primaryServer $backupServer $ourNodeName $rshPath $rcpPath" \ "$genNumber $genTimestamp $environmentType $ourShortName $sdrfsFormatLevel" return 0 } #----- end of function gpfsClusterInit ------------------------ ############################################################################# # # Function: When the primary and backup servers are not available, # try to get the latest mmsdrfs data by polling all nodes # and retrieving the data from the node with the highest # generation number. # # Input: $1 - file with the names of the nodes to consider # # Output: This function prints a line with the following values: # mmmode - command mode # sdrLocked - yes or no # nodesetId - HOME_CLUSTER # ourNodeNumber - the node number of this node # gpfsObjectInfo - the most current value of the Gpfs object # primaryServer - primary server for obtaining sdrfs data # backupServer - secondary server for obtaining sdrfs data # ourNodeName - the reliable hostname of this node # rshPath - remote shell command or _DEFAULT_ # rcpPath - remote file copy command or _DEFAULT_ # sdrGenNumber - most recent mmsdrfs generation number # sdrGenTimestamp - timestamp of the mmsdrfs generation number # environmentType - environment within which GPFS is defined # ourShortName - the unqualified name of the GPFS adapter # sdrfsFormatLevel- the format level of the mmsdrfs file # # Note: sdrLocked is a global variable and its value is not # altered unless a lock is obtained. # # Returns: 0 - files refreshed successfully # non-zero - unexpected error # ############################################################################# function gpfsInitFromNonServer # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgpfsInitFromNonServer ]] && set -x $mmTRACE_ENTER "$*" typeset nodeNamesFile=$1 typeset localGenNumber primaryServer backupServer runningCmnd typeset mostRecentPrimaryServer mostRecentBackupServer rshPath rcpPath typeset mostRecentGenNumber mmmode junk sdrfsFormatLevel typeset gpfsObjectInfo mmcommonOutput nodeName typeset environmentType mostRecentGenTimestamp typeset -i ourNodeNumber typeset rc=0 typeset mmdshNodeName=0 typeset versionLine="" if [[ ! -f $mmsdrfsFile ]] then # Either the node does not belong to a cluster # or the config information is lost/corrupted. printErrorMsg 282 $mmcmd cleanupAndExit fi # Parse the version line from the local mmsdrfs file. versionLine=$($head -1 $mmsdrfsFile) IFS=':' set -f ; set -A v -- - $versionLine ; set +f IFS="$IFS_sv" # Perform a quick sanity check. [[ ${v[$LINE_TYPE_Field]} != $VERSION_LINE ]] && \ corruptedSdrFileExit 131 "$versionLine" # Retrieve the cluster environment, the generation number, # and a few other things. if [[ -z ${v[$CLUSTER_TYPE_Field]} ]] then mmmode=sp else mmmode=${v[$CLUSTER_TYPE_Field]} fi # Retrieve other needed information. sdrfsFormatLevel=${v[$SDRFS_FORMAT_Field]} localGenNumber=${v[$SDRFS_GENNUM_Field]} runningCmnd=${v[$RUNNING_CMD_Field]} primaryServer=${v[$PRIMARY_SERVER_Field]} backupServer=${v[$BACKUP_SERVER_Field]} rshPath=${v[$RSH_PATH_Field]} rcpPath=${v[$RCP_PATH_Field]} mostRecentGenTimestamp=${v[$GENNUM_TSTAMP_Field]} [[ -z $mostRecentGenTimestamp ]] && mostRecentGenTimestamp=0 environmentType=${v[$CLUSTER_SUBTYPE_Field]} [[ -z $environmentType ]] && environmentType=$mmmode [[ $mmmode != $environmentType && $environmentType != lc2 ]] && \ environmentType="rpd" # Export global variables. export MMMODE=$mmmode export environmentType=$environmentType if [[ -n $rshPath && $rshPath != "_DEFAULT_" ]] then rsh="$rshPath" export GPFS_rshPath="$rshPath" fi if [[ -n $rcpPath && $rcpPath != "_DEFAULT_" ]] then rcp="$rcpPath" export GPFS_rcpPath="$rcpPath" fi if [[ $MMMODE != lc && $MMMODE != single ]] then # Unknown GPFS nodeset type printErrorMsg 338 $mmcmd $MMMODE cleanupAndExit fi # Find our node name and node number. getLocalNodeData # If there are no other nodes, assume the local node has good data. if [[ ! -s $nodeNamesFile ]] then # Build the Gpfs object information. gpfsObjectInfo="$sdrfsFormatLevel:$localGenNumber:$runningCmnd" # Put out the results and return. [[ -z $backupServer ]] && backupServer="_NOSECONDARY_" [[ -z $rshPath ]] && rshPath="_DEFAULT_" [[ -z $rcpPath ]] && rcpPath="_DEFAULT_" print -- "$MMMODE $sdrLocked $HOME_CLUSTER $ourNodeNumber $gpfsObjectInfo" \ "$primaryServer $backupServer $ourNodeName $rshPath $rcpPath" \ "$localGenNumber $mostRecentGenTimestamp $environmentType" \ "$ourShortName $sdrfsFormatLevel" return 0 fi # Collect the version lines from the local copies # of the mmsdrfs file on each of the nodes. $mmcommon onall $nodeNamesFile $unreachedNodes \ adminCmd $head -1 $mmsdrfsFile > $tmpfile # If no version lines were returned, either all the nodes are down # or something is very, very wrong. if [[ ! -s $tmpfile ]] then : #esj handle this case better checkForErrors "gpfsInitFromNonServer: All nodes are down or sdr data corrupted" 1 fi # Sort the information based on the generation number field. # Note: The generation number is field 6 but because mmdsh # prepends "nodename: " to each line, we specify sort key 7. $sort -t: -k 7,7nr -o $tmpfile $tmpfile checkForErrors "gpfsInitFromNonServer: sort $tmpfile" $? # Extract the most recent generation number, primary and backup # server names. Create a list of the nodes with good mmsdrfs data. # Prepare the file for reading. IFS=":" exec 3<&- exec 3< $tmpfile # Read the first line. read -u3 versionLine # Parse the line. Unlike elsewhere in the code, here we do not have to shift # the fields to account for v[0] not having a value. This is one of the few # instances where v[0] has meaningful information, i.e., the node name # prepended by the mmdsh command. set -f ; set -A v - $versionLine ; set +f # Start a list of nodes with good mmsdrfs data. print -- "${v[$mmdshNodeName]}" > $tmpfile2 # Get the most recent values for the fields of interest. mostRecentGenNumber=${v[$SDRFS_GENNUM_Field]} mostRecentPrimaryServer=${v[$PRIMARY_SERVER_Field]} mostRecentBackupServer=${v[$BACKUP_SERVER_Field]} mostRecentGenTimestamp=${v[$GENNUM_TSTAMP_Field]} [[ -z $mostRecentGenTimestamp ]] && mostRecentGenTimestamp=0 # Go through the rest of the file to be sure that there are no discrepancies. while read -u3 versionLine do # Parse the line. See parsing comments above. set -f ; set -A v - $versionLine ; set +f # Stop if we are past the section with the most recent nodes. [[ $mostRecentGenNumber -gt ${v[$SDRFS_GENNUM_Field]} ]] && \ break # Verify that the information is consistent. if [[ $mostRecentPrimaryServer != ${v[$PRIMARY_SERVER_Field]} || $mostRecentBackupServer != ${v[$BACKUP_SERVER_Field]} ]] then checkForErrors \ "gpfsInitFromNonServer: sdr data corrupted (multiple server definitions)" 1 fi # Add the node to the list of nodes with good mmsdrfs data. print -- "${v[$mmdshNodeName]}" >> $tmpfile2 done # end while read -u3 versionLine IFS="$IFS_sv" # Restore the default IFS settings. # If we already have the latest level of the mmsdrfs file, we are done. if [[ $localGenNumber = $mostRecentGenNumber ]] then # Verify that the information is consistent. if [[ $primaryServer != $mostRecentPrimaryServer || $backupServer != $mostRecentBackupServer ]] then checkForErrors \ "gpfsInitFromNonServer: sdr data corrupted (multiple server definitions)" 1 fi # Build the Gpfs object information. gpfsObjectInfo="$sdrfsFormatLevel:$localGenNumber:$runningCmnd" # Put out the results and return. [[ -z $backupServer ]] && backupServer="_NOSECONDARY_" [[ -z $rshPath ]] && rshPath="_DEFAULT_" [[ -z $rcpPath ]] && rcpPath="_DEFAULT_" print -- "$MMMODE $sdrLocked $HOME_CLUSTER $ourNodeNumber $gpfsObjectInfo" \ "$primaryServer $backupServer $ourNodeName $rshPath $rcpPath" \ "$mostRecentGenNumber $mostRecentGenTimestamp $environmentType" \ "$ourShortName $sdrfsFormatLevel" return 0 fi # If we get here, we know that our version of the mmsdrfs file is not # the latest. Request the latest mmsdrfs file from one of the nodes # that are listed in file tmpfile2. Keep trying until successful. exec 3<&- exec 3< $tmpfile2 while read -u3 nodeName do mmcommonOutput=$($mmcommon on1 $nodeName \ sendClusterSDRFiles $localGenNumber $ourNodeName $mmsdrfsShadow nolock) rc=$? # Look at the result from the sendClusterSDRFiles request. # If it is more than one word, we assume it is an error message. set -f ; set -- $mmcommonOutput ; set +f gpfsObjectInfo=$1 junk=$2 if [[ -z $gpfsObjectInfo || $gpfsObjectInfo = fail || -n $junk || $rc -ne 0 ]] then # If this node failed, try the next one. continue else # We should have the latest mmsdrfs file. break fi done # end while read -u3 nodeName # If none of the sendClusterSDRFiles requests was successful, give up. if [[ -z $gpfsObjectInfo || $gpfsObjectInfo = fail || -n $junk || $rc -ne 0 ]] then checkForErrors \ "gpfsInitFromNonServer: multiple sendClusterSDRFiles failure" 1 fi # At this point we have the latest mmsdrfs information in file mmsdrfsShadow. # Parse the version line to collect the last missing pieces of information. versionLine=$($head -1 $mmsdrfsShadow) IFS=':' set -f ; set -A v -- - $versionLine ; set +f IFS="$IFS_sv" [[ ${v[$LINE_TYPE_Field]} != $VERSION_LINE ]] && \ checkForErrors "gpfsInitFromNonServer: Missing or invalid $mmsdrfsShadow" 1 sdrfsFormatLevel=${v[$SDRFS_FORMAT_Field]} primaryServer=${v[$PRIMARY_SERVER_Field]} backupServer=${v[$BACKUP_SERVER_Field]} rshPath=${v[$RSH_PATH_Field]} rcpPath=${v[$RCP_PATH_Field]} mostRecentGenTimestamp=${v[$GENNUM_TSTAMP_Field]} [[ -z $mostRecentGenTimestamp ]] && mostRecentGenTimestamp=0 environmentType=${v[$CLUSTER_SUBTYPE_Field]} [[ -z $environmentType ]] && environmentType=$MMMODE [[ $mmmode != $environmentType && $environmentType != lc2 ]] && \ environmentType="rpd" # Rebuild the GPFS environment. updateMmfsEnvironment $HOME_CLUSTER $mmsdrfsShadow checkForErrors updateMmfsEnvironment $? # Things seem to have worked OK. Commit the latest level. $mv $mmsdrfsShadow $mmsdrfsFile checkForErrors "gpfsInitFromNonServer: mv $mmsdrfsShadow" $? $mmsync $mmsdrfsFile # Make sure that non-privileged commands can read this file. $chmod a+r $mmsdrfsFile >/dev/null 2>&1 # Put out the results and return. [[ -z $backupServer ]] && backupServer="_NOSECONDARY_" [[ -z $rshPath ]] && rshPath="_DEFAULT_" [[ -z $rcpPath ]] && rcpPath="_DEFAULT_" print -- "$MMMODE $sdrLocked $HOME_CLUSTER $ourNodeNumber $gpfsObjectInfo" \ "$primaryServer $backupServer $ourNodeName $rshPath $rcpPath" \ "$mostRecentGenNumber $mostRecentGenTimestamp $environmentType" \ "$ourShortName $sdrfsFormatLevel" return 0 } #----- end of function gpfsInitFromNonServer -------------------- ########################################################################### # # Function: If cluster type is lc or single, this function works exactly # as gpfsInit. For other cluster types (sp, rpd, hacmp) the # function will return the same information as gpfsInit but # there will be no retrival of data from the primary server # and locking will be simulated. This is intended to support # mmexportfs and mmdelnode to allow a system to be migrated # to cluster type lc. # # Input: $1 - Process id to be used as an identifier for # the sdr lock, or the string 'nolock'. # # Output: The function prints a line with the following values: # mmmode - command mode (sp, hacmp, ...) # sdrLocked - yes or no # nodesetId - nodeset identifier or 0 (if the node # is not a member of any known nodeset) # ourNodeNumber - the node number assigned to this node # gpfsObjectInfo - the most current value of the Gpfs object # primaryServer - hostname of primary server or _NOPRIMARY_ # backupServer - hostname of backup server or _NOSECONDARY_ # ourNodeName - the reliable hostname of this node # rshPath - remote shell command or _DEFAULT_ # rcpPath - remote file copy command or _DEFAULT_ # sdrGenNumber - most recent mmsdrfs generation number # sdrGenTimestamp - timestamp of the mmsdrfs generation number # environmentType - environment within which GPFS is defined # ourShortName - the unqualified name of the GPFS adapter # sdrfsFormatLevel- the format level of the mmsdrfs file # # Returns: 0 - no errors # 1 - unexpected error # ########################################################################### function gpfsInitGeneric # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgpfsInitGeneric ]] && set -x $mmTRACE_ENTER "$*" typeset lockId=$1 typeset localGenNumber localGenTimestamp versionLine mmmode nodesetId typeset primaryServer backupServer initOutput gpfsObjectInfo typeset sdrfsFormatLevel rshPath rcpPath environmentType typeset rc=0 # If the mmsdrfs file is missing, # there is a need for manual intervention. if [[ ! -f $mmsdrfsFile ]] then # Either the node does not belong to a cluster # or the config information is lost/corrupted. printErrorMsg 282 $mmcmd cleanupAndExit fi # Read and parse the first line of the mmsdrfs file. versionLine=$($head -1 $mmsdrfsFile) IFS=':' set -f ; set -A v -- - $versionLine ; set +f IFS="$IFS_sv" # Perform a quick sanity check. [[ ${v[$LINE_TYPE_Field]} != $VERSION_LINE ]] && \ corruptedSdrFileExit 134 "$versionLine" # Retrieve cluster environment information. mmmode=${v[$CLUSTER_TYPE_Field]} [[ -z $mmmode ]] && mmmode=sp sdrfsFormatLevel=${v[$SDRFS_FORMAT_Field]} localGenNumber=${v[$SDRFS_GENNUM_Field]} gpfsObjectInfo="${sdrfsFormatLevel}:${localGenNumber}:" primaryServer=${v[$PRIMARY_SERVER_Field]} [[ -z $primaryServer ]] && primaryServer="_NOPRIMARY_" backupServer=${v[$BACKUP_SERVER_Field]} [[ -z $backupServer ]] && backupServer="_NOSECONDARY_" localGenTimestamp=${v[$GENNUM_TSTAMP_Field]} [[ -z $localGenTimestamp ]] && localGenTimestamp=0 rshPath=${v[$RSH_PATH_Field]} [[ -z $rshPath ]] && rshPath="_DEFAULT_" rcpPath=${v[$RCP_PATH_Field]} [[ -z $rcpPath ]] && rcpPath="_DEFAULT_" environmentType=${v[$CLUSTER_SUBTYPE_Field]} [[ -z $environmentType ]] && environmentType=$mmmode [[ $mmmode != $environmentType && $environmentType != lc2 ]] && \ environmentType="rpd" # Make the mode value globally available. export MMMODE=$mmmode export environmentType=$environmentType # Export remote commands if necessary. if [[ -n $rshPath && $rshPath != "_DEFAULT_" ]] then rsh="$rshPath" export GPFS_rshPath="$rshPath" fi if [[ -n $rcpPath && $rcpPath != "_DEFAULT_" ]] then rcp="$rcpPath" export GPFS_rcpPath="$rcpPath" fi # Depending on the environment, # invoke the appropriate initialization routine. if [[ $MMMODE = lc || $MMMODE = single ]] then # We are dealing with a supported GPFS cluster environment. # Everything works as expected. initOutput=$(gpfsClusterInit $lockId $localGenNumber \ $primaryServer $backupServer $localGenTimestamp) rc=$? elif [[ $MMMODE = sp || $MMMODE = rpd || $MMMODE = hacmp ]] then # We are dealing with an obsolete GPFS cluster environment. # Pretend that the local file is up to date and return the # global values. If a lock is requested, pretend the lock # is indeed obtained. Make up any missing information. # This function should be called only in a strictly controlled # environment where the user is responsible for not creating # conflicts. # Ignore the lock request. We are relying on the user # not to run two commands at the same time. sdrLocked=no # Determine the local node data. If file mmfsNodeData is # missing, there is a need for manual intervention. [[ ! -f $mmfsNodeData ]] && \ checkForErrors "gpfsInitGeneric: Missing file $mmfsNodeData" 1 getLocalNodeData # Find out the nodeset to which this node belongs. nodesetId=$(findNodesetId $mmsdrfsFile $ourNodeNumber) [[ -z $nodesetId ]] && nodesetId=0 initOutput=$(print -- "$MMMODE $sdrLocked $nodesetId $ourNodeNumber" \ "$gpfsObjectInfo $primaryServer $backupServer" \ "$ourNodeName $rshPath $rcpPath $localGenNumber" \ "$localGenTimestamp $environmentType" \ "$ourShortName $sdrfsFormatLevel") rc=0 else # Unknown GPFS execution environment printErrorMsg 338 $mmcmd $MMMODE cleanupAndExit fi # end if [[ $MMMODE = lc || $MMMODE = single ]] [[ -z $initOutput || $rc -ne 0 ]] && \ return 1 # Things must have worked. print -- "$initOutput" return 0 } #----- end of function gpfsInitGeneric ------------------------ #################################################################### # # Function: Removes file mmSdrLockExp after the data is not valid # any more. If requested, unlocks the sdr. # This function is intended to run as a background job. # # Input: $1 - expiration data. The format is: # mmSdrLockExp::: # $2 - lock action: unlock or doNotUnlock # # Output: None. # # Returns: Always zero. # #################################################################### function expirationDataCleanup # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGexpirationDataCleanup ]] && set -x $mmTRACE_ENTER "$*" typeset expirationData=$1 typeset lockAction=$2 typeset kword expirationTime currentTime waitPeriod typeset safetyInterval=5 # Parse the input. IFS=':' set -f ; set -- $expirationData ; set +f kword=$1 expirationTime=$4 IFS="$IFS_sv" # Sanity check. [[ $kword != mmSdrLockExp ]] && \ corruptedSdrFileExit 138 "$expirationData" # Calculate how much to wait. currentTime=$($perl -e 'print time') (( waitPeriod = expirationTime - currentTime )) [[ $waitPeriod -gt 0 ]] && \ sleep $waitPeriod # At this point the mmSdrLockExp file is obsolete. $mmTRACE "Removing the mmSdrLockExp file" $rm -f $mmSdrLockExp # If we need to free the lock, wait a few more seconds # to allow in-flight processes to finish their work. if [[ $lockAction = unlock ]] then sleep $safetyInterval # Initialize some variables and free the lock. getLocalNodeData primaryServer=$($head -1 $mmsdrfsFile | $GETVALUE $PRIMARY_SERVER_Field) freeLockOnServer $primaryServer > /dev/null fi $mmTRACE_EXIT 0 return 0 } #----- end of function expirationDataCleanup ---------------- #################################################################### # # Function: Used to check operations after the lock directory # is created, but before the lock function completes. # Verifies that a command executed successfully. # If the return code from the command is not zero, # the function issues a message, performs cleanup # and stops processing. # # Input: $1 - name of the command to check # $2 - return code from the execution of the command # #################################################################### function checkForErrorsAndUnlock # { if [ $2 != "0" ] then $mmTRACE "sdr lock RELEASED" $rm -rf $lockdir 2>/dev/null # Unexpected error printErrorMsg 171 "$mmcmd" "$1" $2 cleanupAndExit fi } #----- end of function checkForErrorsAndUnlock ---------------- #################################################################### # # Function: Check operations after the local env lock directory # is created, but before the lock function completes. # Verifies that a command executed successfully. # If the return code from the command is not zero, # the function issues a message, performs cleanup # and stops processing. # # Input: $1 - name of the command to check # $2 - return code from the execution of the command # #################################################################### function checkForErrorsAndUnlockEnvLock # { if [ $2 != "0" ] then $mmTRACE "mmfsEnvLock RELEASED" $rm -rf $mmfsEnvLockDir 2>/dev/null # Unexpected error printErrorMsg 171 "$mmcmd" "$1" $2 cleanupAndExit fi } #----- end of function checkForErrorsAndUnlockEnvLock ---------- ############################################################################### # # Function: Obtain the global sdr lock. If the lock is not available, # verify that the process that holds the lock is still alive. # # This function should be executed only on the primary server node. # # Input: $1 - lockId # # Output: The current value of the Gpfs object, or 'fail'. # # Returns: 0 - no problems found # 1 - unexpected error # ############################################################################### function getLock # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetLock ]] && set -x $mmTRACE_ENTER "$*" typeset lockId=$1 typeset gpfsObjectInfo v1genNumber v2genNumber typeset pidCheckAll pidCheck typeset lockHolderInfo lockHolderHostname lockHolderPid typeset lockDuration currentTime lockExpirationTime typeset safetyInterval=8 typeset rc=0 #esj Call a function that verifies that this still is a valid server. # Look at the lockId string. Normally, it consists of :. # But if this is the special mmstartup lock, then the format is: # mmSdrLockExp:: # If this is the case, calculate the lock's expiration time and add # it to the lock id string. This information will be used later to # see if the lock can be freed. if [[ $lockId = mmSdrLockExp* ]] then IFS=':' set -f ; set -- $lockId ; set +f IFS="$IFS_sv" lockHolderHostname=$2 lockDuration=$3 currentTime=$($perl -e 'print time') (( lockExpirationTime = currentTime + lockDuration + safetyInterval )) lockId="mmSdrLockExp:$lockHolderHostname:$lockDuration:$lockExpirationTime" lockId="$lockId:$5:$6:$7:$8" # for future extensions fi # end of if [[ $lockId = mmSdrLockExp* ]] # We are on the primary server. Get the lock. This is # equivalent to being able to create the lock directory. $mkdir $lockdir 2>/dev/null if [[ $? = 0 ]] then $mmTRACE "$1 lock GRANTED" # We have the lock. Create a file with the lock id in it. print -- "$lockId" >$haslock checkForErrorsAndUnlock "write to haslock file" $? $mmsync $haslock # Find the Gpfs object. gpfsObjectInfo=$(getGpfsObject) checkForErrorsAndUnlock "getLock: getGpfsObject" $? IFS=':' set -f ; set -- $gpfsObjectInfo ; set +f IFS="$IFS_sv" v1genNumber=$1 v2genNumber=$2 if [[ -z $v2genNumber ]] then # Unexpected value for the Gpfs object. printErrorMsg 286 getLock $gpfsObjectInfo return 1 fi # Everything worked just fine; return the gpfsObjectInfo. print -- "$gpfsObjectInfo" return 0 fi # Somebody has the lock (mkdir failed). See if we can steal it. # Get the pid and hostname of the process that has the lock. lockHolderInfo=$($tail -n -1 $haslock 2>/dev/null) IFS=':' set -f ; set -- $lockHolderInfo ; set +f IFS="$IFS_sv" lockHolderPid=$1 lockHolderHostname=$2 lockDuration=$3 lockExpirationTime=$4 if [[ -z $lockHolderPid ]] then # If we do not find a process id, we are either jumping in the # middle of some other process getting the lock (the lock # directory was created but the haslock file has not been # created yet), or the haslock file is empty (this can happen # if the node dies before the haslock file makes it to disk). # As far as we are concerned, our lock attempt failed. We will # give the guy a second to finish writing to the file and if there # is still no pid recorded, we will assume that something went # wrong with the other process and will try to cleanup the mess. # Hopefully, we will get the lock on one of the retries. $sleep 2 [[ ! -s $haslock ]] && $rm -r $lockdir 2>/dev/null print -- "fail" return 0 fi if [[ $lockHolderPid = mmSdrLockExp ]] then # If this is a special lock (with an expiration time associated with it), # see if the lock has expired. If not, pause for a while before giving up. # The idea is to extend the overall lock waiting period without changing # the rest of the code too much. currentTime=$($perl -e 'print time') if [[ $currentTime -le $lockExpirationTime ]] then $mmTRACE "$1 lock held by $lockHolderHostname $lockHolderPid" $sleep 15 print -- "fail" return 0 else $mmTRACE "$1 lock held by $lockHolderHostname has expired" fi # enf of if [[ $currentTime -gt $lockExpirationTime ]] else # We have the pid and hostname of the process holding the lock. # See if this process is still alive. if [[ $lockHolderHostname = $ourNodeName ]] then pidCheckAll=$($mmremote pid $lockHolderPid) else pidCheckAll=$($mmcommon on1 $lockHolderHostname pid $lockHolderPid) rc=$? fi pidCheck=$(print -- "$pidCheckAll" | $egrep "^(died|alive)$") if [[ $pidCheck != died ]] then $mmTRACE "$1 lock held by $lockHolderHostname $lockHolderPid" # The process that has the lock is still alive, or we could # not determine its status. Either way, give up. if [[ $pidCheck = alive ]] then print -- "fail" return 0 else # Propagate error messages if any. [[ -n $pidCheckAll ]] && print -u2 "$pidCheckAll" printErrorMsg 440 $mmcmd $lockHolderHostname return $rc fi fi # end of if [[ $pidCheck != died ]] fi # end of if [[ $lockHolderPid = mmSdrLockExp ]] # The other process went away without unlocking or the lock expired. # Steal the lock. $rm -r $lockdir 2>/dev/null $sleep 3 # Got to give competing processes time to not see it. $mkdir $lockdir 2>/dev/null if [[ $? = 0 ]] then # We have the lock. Create a file with the lock id in it. print -- "$lockId" >$haslock checkForErrorsAndUnlock "write to haslock file" $? $mmsync $haslock $mmTRACE "$1 lock STOLEN" # Find the Gpfs object. gpfsObjectInfo=$(getGpfsObject) checkForErrorsAndUnlock "getLock: getGpfsObject" $? IFS=':' set -f ; set -- $gpfsObjectInfo ; set +f IFS="$IFS_sv" v1genNumber=$1 v2genNumber=$2 if [[ -z $v2genNumber ]] then # Unexpected value for the Gpfs object. printErrorMsg 286 getLock $gpfsObjectInfo return 1 fi # Everything worked just fine; return the gpfsObjectInfo. $mmTRACE_EXIT "rc=0 Gpfs=$gpfsObjectInfo" print -- "$gpfsObjectInfo" return 0 fi # We couldn't steal the lock (mkdir failed); give up. $mmTRACE_EXIT "$1 lock request failed" print -- "fail" return 0 } #----- end of function getLock ------------------------------- ############################################################################# # # Function: Try to obtain the sdr lock on the specified server. # If not available, keep retrying for up to 60 seconds # before giving up. As a side effect, if necessary, # the latest version of the mmsdrfs file is retrieved # and stored on the local node. # # Input: $1 - lockServer # $2 - lockId # $3 - WAIT or NOWAIT # $4 - generation number of the sdr file on the client # # Output: The current value of the Gpfs object, or 'fail'. # # Returns: 0 - no problems found # 1 - unexpected error # ############################################################################# function getLockOnServer # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetLockOnServer ]] && set -x $mmTRACE_ENTER "$*" typeset lockServer=$1 typeset lockId=$2 typeset waitIndicator=$3 typeset genNumber=$4 typeset -i waitPeriod typeset lockResult gpfsObjectInfo junk # If this hasn't been done yet, add the hostname to the lockId. [[ $lockId = ${lockId%:*} ]] && \ lockId="${lockId}:${ourNodeName}" # Pass the lock request either to the routine that actually gets the lock, # or to the lock (primary) server. Keep doing this until you get the lock # or the wait time exceeds 60 seconds. waitPeriod=4 while true do if [[ $ourNodeName = $lockServer ]] then # We are on the primary server. Try to get the lock. lockResult=$(getLock $lockId) else # If this isn't the primary server, forward the request. lockResult=$($mmcommon on1 $lockServer sendClusterSDRFiles \ $genNumber $ourNodeName $mmsdrfsShadow $lockId) fi # Look at the result from the lock request. # If it is more than one word, we assume it is an error message. set -f ; set -- $lockResult ; set +f gpfsObjectInfo=$1 junk=$2 if [[ -z $gpfsObjectInfo || -n $junk ]] then # If unexpected output, give up. [[ -n $junk ]] && \ print -u2 "$lockResult" return 1 fi if [[ $lockResult != fail ]] then # If we got the lock, and we had to wait for it, # put out a message. Return the latest gpfs object. [[ $waitPeriod -gt 4 ]] && \ printErrorMsg 128 $mmcmd print -- "$gpfsObjectInfo" return 0 fi if [[ $waitIndicator = NOWAIT ]] then # The lock is not available and we have NOWAIT option print -- "fail" return 0 fi if [[ $waitPeriod -gt 11 ]] then # The lock is still not available and we have waited over 60 sec. # Its time to give up. printErrorMsg 129 $mmcmd print -- "fail" return 0 fi # The lock is not available right away. We will try again after # pausing for a few seconds. The first time around we wait 4 seconds, # then after each iteration we will increase the wait period by a second. # After the interim wait period becomes 11 seconds, which brings the # total wait to 60 seconds, we'll give up. # # Note: If this is a special lock (with an expiration time associated # with it), there is an additional pause in the getLock function # which increases the total lock waiting period. # [[ $waitPeriod -eq 4 ]] && \ printErrorMsg 127 $mmcmd $sleep $waitPeriod (( waitPeriod += 1 )) continue done # end while true return 1 # should never get here } #----- end of function getLockOnServer --------------------- #################################################################### # # Function: Free the global sdr lock. # # Input: $1 - nodeNumber # $2 - lock server # # Output: 'ok' or 'error' # # Returns: 0 # #################################################################### function freeLock # [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGfreeLock ]] && set -x $mmTRACE_ENTER "$*" typeset nodeNumber=$1 typeset lockServer=$2 typeset unlockOutput unlockResult junk typeset rc=0 [[ -z $lockServer ]] && lockServer=$primaryServer # If this is the lock server, remove the lock. if [[ $nodeNumber -eq 0 ]] then # Remove the lock $mmTRACE "lock RELEASED" $rm -r $lockdir 2>/dev/null if [[ $? -eq 0 ]] then print -- "ok" else print -- "error" fi return 0 fi # Otherwise, send the request to the lock server. unlockOutput=$($mmcommon on1 $lockServer unlock 0) set -f ; set -- $unlockOutput ; set +f unlockResult=$1 junk=$2 if [[ -z $unlockResult || -n $junk ]] then print -- "error" else print -- "$unlockResult" fi return 0 } #----- end of function freeLock ----------------------- ############################################################# # # Function: Free the global sdr lock. # # Input: $1 - lockServer # $2 - local node number # # Output: 'ok' or 'error' # # Returns: 0 - no problems found # 1 - unexpected error # ############################################################# function freeLockOnServer # [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGfreeLockOnServer ]] && set -x $mmTRACE_ENTER "$*" typeset lockServer=$1 typeset nodeNumber=$2 if [[ $ourNodeName = $lockServer ]] then # If this is the primary server, set nodeNumber to 0. # This will tell the freeNode routine that it is running # on the primary server and that the lock can be freed. nodeNumber=0 else # Otherwise, make sure nodeNumber is non-zero. nodeNumber=1 fi # Release the lock. [[ $getCredCalled = no ]] && getCred freeLock $nodeNumber $lockServer return 0 } #----- end of function freeLockOnServer ----------------------- ########################################################################## # # Function: Get the local mmfsEnv lock. The lock is designed to protect # the process of updating the mmfs environment. The scope of # the lock is the local node only. # # Input: $1 - lockId # $2 - (optional) NOWAIT indicator # # Output: 'granted', or 'fail'. # # Returns: 0 - no problems found # 1 - unexpected error # ########################################################################## function getLocalEnvLock # [NOWAIT] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetLocalEnvLock ]] && set -x $mmTRACE_ENTER "$*" typeset lockId=$1 typeset waitIndicator=$2 typeset -i totalWait=0 typeset getEnvLockOutput lockResult magicWord junk # Start the wait loop for the mmfsEnv lock. # Keep trying until the lock is obtained or # the wait time exceeds 30 seconds. while true do # Try to get the lock. getEnvLockOutput=$(getEnvLock $lockId) # Look at the result from the lock request. # If it is more than one word, we assume it is an error message. IFS=':' set -f ; set -- $getEnvLockOutput ; set +f magicWord=$1 lockResult=$2 IFS="$IFS_sv" if [[ $magicWord != getEnvLock ]] then # If unexpected output, give up. [[ -n $getEnvLockOutput ]] && \ print -u2 "$getEnvLockOutput" # Unexpected error obtaining the local environment update lock printErrorMsg 509 $mmcmd $mmTRACE_EXIT "rc=1 Unexpected error from getEnvLock $lockId" return 1 fi if [[ $lockResult != fail ]] then # If we got the lock, nothing more to do. print -- "granted" $mmTRACE_EXIT "mmfsEnvLock $lockId granted - wait time $totalWait" return 0 fi if [[ $waitIndicator = NOWAIT ]] then # The lock is not available and we have the NOWAIT option. print -- "fail" $mmTRACE_EXIT "mmfsEnvLock $lockId NOT granted - NOWAIT option" return 0 fi if [[ $totalWait -gt 30 ]] then # The lock is still not available and we have waited over 30 sec. # It's time to give up. # $mmcmd: Local update lock busy for more than 30 seconds. printErrorMsg 510 $mmcmd print -- "fail" $mmTRACE_EXIT "mmfsEnvLock $lockId NOT granted - timeout $totalWait" return 0 fi # The lock is not available right away. Try again in 3 seconds. $sleep 3 (( totalWait += 3 )) $mmTRACE "waiting for mmfsEnvLock $lockId $totalWait sec" continue done # end while true # We should never get here. printErrorMsg 171 $mmcmd "function getLocalEnvLock" 1 return 1 } #----- end of function getLocalEnvLock -------------------------- ############################################################################ # # Function: Obtain the mmfsEnv lock. If the lock is not available, # verify that the process that holds the lock is still alive. # # Input: $1 - lockId (pid of the requesting process) # # Output: 'granted' or 'fail'. # # Returns: 0 - no problems found # 1 - unexpected error # ############################################################################ function getEnvLock # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetEnvLock ]] && set -x $mmTRACE_ENTER "$*" typeset lockId=$1 typeset format lockHolderInfo lockHolderPid currentProcessInfo typeset rc=0 # Ensure the lock id has some value. [[ -z $lockId ]] && lockId=$$ # Setup the variable describing the output that we want from the ps command. # We want to get the process id, the parent id, and the full command string. # Headers should be surpressed. if [[ $osName = AIX ]] then format="pid=,ppid=,args=" else # osName = Linux format="pid,ppid,args --no-headers" fi # Get the lock. This is equivalent to being able # to create the mmfsEnvLock directory. $mkdir $mmfsEnvLockDir 2>/dev/null if [[ $? = 0 ]] then # We have the lock. Create a file with information that # identifies the lock holder. lockHolderInfo=$($ps -o $format -p $lockId) lockHolderInfo=${lockHolderInfo##+( )} # strip leading blanks [[ -z $lockHolderInfo ]] && lockHolderInfo=$lockId print -- "$lockHolderInfo" > $hasEnvLock checkForErrorsAndUnlockEnvLock "write to hasEnvLock file" $? $mmsync $hasEnvLock # Everything worked just fine. print -- "getEnvLock:granted" $mmTRACE_EXIT "mmfsEnvLock $lockId GRANTED" return 0 fi # Somebody has the lock (mkdir failed). See if we can steal it. # Get the pid of the process that has the lock. lockHolderInfo=$($cat $hasEnvLock 2>/dev/null) lockHolderPid=${lockHolderInfo%% *} if [[ -z $lockHolderPid ]] then # If we do not find a process id, we are either jumping in the # middle of some other process getting the lock (the lock # directory was created but the hasEnvLock file has not been # created yet), or the hasEnvLock file is empty (this can happen # if the node dies before the hasEnvLock file makes it to disk). # As far as we are concerned, our lock attempt failed. We will # give the guy a second to finish writing to the file and if there # is still no pid recorded, we will assume that something went # wrong with the other process and will try to cleanup the mess. # Hopefully, we will get the lock on one of the retries. $sleep 2 [[ ! -s $hasEnvLock ]] && $rm -r $mmfsEnvLockDir 2>/dev/null print -- "getEnvLock:fail" $mmTRACE_EXIT "mmfsEnvLock $lockId failed - unknown lock holder" return 0 fi # We have the pid of the process holding the lock. # See if this process is still alive. currentProcessInfo=$($ps -o $format -p $lockHolderPid) currentProcessInfo=${currentProcessInfo##+( )} # strip leading blanks if [[ $lockHolderInfo = $currentProcessInfo ]] then # The process that has the lock is still alive - give up. print -- "getEnvLock:fail" $mmTRACE_EXIT "mmfsEnvLock $lockId failed - held by $lockHolderInfo" return 0 fi # The other process went away without unlocking. Steal the lock. $rm -r $mmfsEnvLockDir 2>/dev/null $mmTRACE "$lockId removed stale mmfsEnvLock" $sleep 2 # Give competing processes time to not see it. $mkdir $mmfsEnvLockDir 2>/dev/null if [[ $? = 0 ]] then # We have the lock. Create a file with information that # identifies the lock holder. lockHolderInfo=$($ps -o $format -p $lockId) lockHolderInfo=${lockHolderInfo##+( )} # strip leading blanks [[ -z $lockHolderInfo ]] && lockHolderInfo=$lockId print -- "$lockHolderInfo" > $hasEnvLock checkForErrorsAndUnlockEnvLock "write to hasEnvLock file" $? $mmsync $hasEnvLock # Everything worked just fine. print -- "getEnvLock:granted" $mmTRACE_EXIT "mmfsEnvLock $lockId STOLEN" return 0 fi # We couldn't steal the lock (mkdir failed); give up. print -- "getEnvLock:fail" $mmTRACE "$lockId getEnvLock request failed" return 0 } #----- end of function getEnvLock ------------------- #################################################################### # # Function: Free the local mmfsEnv lock. # # Input: (none) # # Output: 'ok' or 'error' # # Returns: 0 - no problems found # non-zero - unexpected error # #################################################################### function freeEnvLock # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGfreeEnvLock ]] && set -x $mmTRACE_ENTER "$*" typeset rc=0 # Remove the lock. $rm -r $mmfsEnvLockDir 2>/dev/null rc=$? if [[ $rc -eq 0 ]] then print -- "ok" $mmTRACE_EXIT "mmfsEnvLock RELEASED" else print -- "error" $mmTRACE_EXIT "freeEnvLock failed - rc=$rc" fi return $rc } #----- end of function freeEnvLock -------------------------- ######################################################################## # # Function: Confirm that the specified /dev entry was created by GPFS. # # Input: $1 - purpose for the check: mount or delete # $2 - fully-qualified device name # $3 - expected major number for the /dev entry # $4 - expected minor number for the /dev entry or NULL. # NULL indicates bypass the minor number check. # # Output: None # # Returns: 0 - Device created by GPFS # 1 - Device not created by GPFS or unexpected error # ######################################################################## function confirmMajorMinor # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGconfirmMajorMinor ]] && set -x $mmTRACE_ENTER "$*" typeset purpose=$1 typeset etcDevice=$2 typeset majorNumber=$3 typeset minorNumber=$4 typeset lsOutput major minor # majorNumber should contain the major number that was # used when the /dev entry was first created by us. # If this value has not been passed by the caller, # figure it out from the suffix of file mmfsVfsNumber*. if [[ -z $majorNumber || $majorNumber -eq 0 ]] then majorNumber=$($ls ${mmfsVfsNumber}+([0-9]) 2>/dev/null) majorNumber=${majorNumber#$mmfsVfsNumber} # If still not known, use the default value. [[ -z $majorNumber ]] && \ majorNumber=$defaultMajorNumber fi # If minor number is not specified, bypass those checks. [[ -z $minorNumber ]] && minorNumber=NULL # Get more detailed information about the /dev entry. lsOutput=$($ls -lL $etcDevice) if [[ -n $lsOutput ]] then # There is an entry in /dev with the same name. # The output from the ls command looks something like this: # crw-r--r-- 1 root system 2,101 Mar 06 18:54 /dev/gpfs # Figure out the major and minor numbers. major=${lsOutput%%,*} major=${major%% } major=${major##* } minor=${lsOutput##*,} minor=${minor## } minor=${minor%% *} # If the /dev entry was created by GPFS, and has the needed # attributes, return OK. if [[ $lsOutput = ${fsDeviceType}* ]] then # If the rest of the parms check out, return 0. [[ $major -eq $majorNumber && ($minorNumber = NULL || $minorNumber = $minor) ]] && \ return 0 # When deleting /dev entries, we can relax some of the conditions. if [[ $purpose = delete ]] then [[ $osName = Linux || $osName = AIX && $major -eq $majorNumber ]] && \ return 0 fi fi # Something is wrong. Issue appropriate messages. if [[ $lsOutput != ${fsDeviceType}* ]] then print -u2 "$mmcmd: $etcDevice is expected to be a '$fsDeviceType' device." else : # The device type seems to be OK. fi if [[ ($osName = AIX || $purpose = mount) && $major -ne $majorNumber ]] then print -u2 "$mmcmd: $etcDevice is expected to have major number $majorNumber." else : # The major number seems to be OK or we do not care about it. fi if [[ $purpose = mount && $minorNumber != NULL && $minorNumber != $minor ]] then print -u2 "$mmcmd: $etcDevice is expected to have minor number $minorNumber." elif [[ $minor -lt $minMinorNumber ]] then print -u2 "$mmcmd: $etcDevice is expected to have minor number $minMinorNumber or higher." else : # The minor number seems to be OK or we do not care about it. fi # Show the attributes of the current /dev entry. print -u2 "$mmcmd: The current $etcDevice entry is:" print -u2 "$lsOutput" fi # end of if [[ -n $lsOutput ]] # At this point, either there is no /dev entry, # or one or more of its attributes are bad. return 1 } #----- end of function confirmMajorMinor --------------------- ######################################################################## # # Function: Create an entry in /dev # # Input: $1 - fully-qualified device name # $2 - major number for the /dev entry # $3 - minor number for the /dev entry # # Output: None # # Returns: 0 - device created # 1 - unexpected error # ######################################################################## function createDevEntry # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGcreateDevEntry ]] && set -x $mmTRACE_ENTER "$*" typeset device=$1 typeset major=$2 typeset minor=$3 typeset devType minorMin minorMax deviceName fqDeviceName inuse rc typeset -i intValue # Set OS-dependent values. if [[ $osName = AIX ]] then devType="c" minorMin=100 minorMax=65535 elif [[ $osName = Linux ]] then devType="b" minorMin=100 minorMax=255 else checkForErrors "createDevEntry: Unknown operating system $osName" 1 fi # Verify input parameters. deviceName=${device##+(/)dev+(/)} # name stripped of /dev/ prefix fqDeviceName="/dev/$deviceName" # fully-qualified name (with /dev/ prefix) [[ -z $major ]] && \ major=$defaultMajorNumber if [[ -n $minor ]] then intValue=$(checkIntRange minorNumber $minor $minorMin $minorMax) rc=$? [[ $rc -ne 0 ]] && return $rc fi # Create the /dev entry. if [[ -n $minor ]] then # If specific major and minor numbers are requested, # create the device with those values. inuse=$($ls -lL /dev 2>/dev/null | $grep "^${devType}.* $major, *$minor ") if [[ -z $inuse ]] then $mknod $fqDeviceName $devType $major $minor rc=$? else # A device with the specified major/minor numbers already exists. printErrorMsg 461 $mmcmd $major $minor rc=1 fi else # If minor number is not specified, start exhaustive search. minor=$minorMin while [[ $minor -lt $minorMax ]] do inuse=$($ls -lL /dev 2>/dev/null | $grep "^${devType}.* $major, *$minor ") if [[ -z $inuse ]] then $mknod $fqDeviceName $devType $major $minor rc=$? break fi minor=$((minor + 1)) done # end while [[ $minor -lt $minorMax ]] fi # end of if [[ -n $minor ]] # Print error message, if necessary, and return. [[ $rc -ne 0 ]] && print -u2 "$mmcmd: Device entry $device not created." return $rc } #----- end of function createDevEntry ---------------------- #################################################################### # # Function: Remove the device and mount point for a file system # # Input: $1 - fully-qualified device name # $2 - mount point to be removed or _NO_MOUNT_POINT_DELETE_ # $3 - major number for /dev entries # # Output: None # # Returns: 0 - all errors are ignored # #################################################################### function removeMountPoint # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGremoveMountPoint ]] && set -x $mmTRACE_ENTER "$*" typeset etcDevice=$1 typeset etcMountpoint=$2 typeset majorNumber=$3 # Remove the /dev entry provided it was created by GPFS. confirmMajorMinor delete $etcDevice $majorNumber 2>/dev/null [[ $? -eq 0 ]] && $rm -f $etcDevice 2>/dev/null # If we had to remove only the /dev entry, there is nothing more to do. [[ $etcMountpoint = "_NO_MOUNT_POINT_DELETE_" ]] && \ return 0 # Make sure the file system was not left mounted by # a previous gpfs invocation that ended abnormally. # If yes, force unmount the file system before removing the mount point. if [[ $osName = AIX ]] then stillMounted=$($mount \ | $awk '{ if ($4 == "mmfs") print $2; if ($3 == "mmfs") print $1}' \ | $awk '{ if ($1 == "'"$etcDevice"'") print $1 }') elif [[ $osName = Linux ]] then stillMounted=$($mount -t gpfs \ | $awk '{ if ($1 == "'"$etcDevice"'") print $1 }') else : # assume not mounted (should never be here) fi if [[ -n $stillMounted ]] then $unmount -f $etcDevice 2>/dev/null [[ $? -eq 0 ]] && stillMounted="" fi # If the file system is not mounted, remove the mount point. [[ -z $stillMounted ]] && \ $rmdir $etcMountpoint 2>/dev/null return 0 } #----- end of function removeMountPoint ---------------------- ########################################################################## # # Function: Rebuild the GPFS environment if the provided mmsdrfs # file has a newer generation number. The input file is # deleted before return from this function. # # Input: $1 - mmsdrfs file to use. The name must be # $sdrfsFileLevel # $2 - checksum for the mmsdrfs file. # $3 - key file generated by openssl, or NULL. # $4 - checksum for the key file, or 0. # # Output: none # # Returns: 0 - no need to rebuild the environment # 1 - something is not right # ########################################################################## function upgradeSystemFiles # [ ] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGupgradeSystemFiles ]] && set -x $mmTRACE_ENTER "$*" typeset sdrfs=$1 typeset originalSum=$2 typeset keyFile=$3 typeset keyFileOriginalSum=$4 typeset rc=0 typeset envLockResult envLevel typeset currentGenNumber requestedGenNumber typeset sumOutput checksum keyGenNumber fname # Add the input file to the list of files that are removed on exit. # Note: The new format mmauth key file should not be deleted. [[ $keyFile = NULL ]] && keyFile="" LOCAL_FILES="$LOCAL_FILES $sdrfs" [[ $keyFile != ${genkeyData}* ]] && LOCAL_FILES="$LOCAL_FILES $keyFile" # Verify the file was copied correctly. sumOutput=$($sum $sdrfs) rc=$? set -f ; set -- $sumOutput ; set +f checksum=$1 if [[ $rc -ne 0 || $checksum != $originalSum ]] then [[ $rc -eq 0 ]] && rc=1 checkForErrors "upgradeSystemFiles: sum $sdrfs" $rc fi if [[ -n $keyFile ]] then sumOutput=$($sum $keyFile) rc=$? set -f ; set -- $sumOutput ; set +f checksum=$1 if [[ $rc -ne 0 || $checksum != $keyFileOriginalSum ]] then [[ $rc -eq 0 ]] && rc=1 checkForErrors "upgradeSystemFiles: sum $keyFile" $rc fi # If this is an old format mmauth genkey file, # move the staged key file into the ssl directory. if [[ $keyFile = ${mmauthKeyGen}* ]] then keyGenNumber=${keyFile#$mmauthKeyGen} $mv $keyFile ${privateKey}$keyGenNumber checkForErrors "mv $keyFile ${privateKey}$keyGenNumber" $? fi fi # end of if [[ -n $keyFile ]] # Obtain the local mmfsEnv update lock. if [[ $envLocked != yes ]] then envLockResult=$(getLocalEnvLock $$) rc=$? if [[ $rc -ne 0 || $envLockResult != granted ]] then # We failed to obtain the local environment update lock. printErrorMsg 511 $mmcmd cleanupAndExit fi envLocked=yes fi # Find out the current and requested level of the system files. envLevel=$($ls ${mmfsEnvLevel}+([0-9]) 2>/dev/null) currentGenNumber=${envLevel#$mmfsEnvLevel} [[ $currentGenNumber != +([0-9]) ]] && currentGenNumber=0 requestedGenNumber=${sdrfs#$mmsdrfsGen} # Verify that the input file has a reasonable name, # and is more recent than the current mmsdrfs file. if [[ -n $requestedGenNumber && $currentGenNumber -ge $requestedGenNumber ]] then # We are already up-to-date. freeEnvLock > /dev/null envLocked=no return 0 fi # We are not up-to-date. Rebuild the files. updateMmfsEnvironment $HOME_CLUSTER $sdrfs checkForErrors updateMmfsEnvironment $? # Things seem to have worked OK. Commit the latest level. $mv $sdrfs $mmsdrfsFile checkForErrors "upgradeSystemFiles: mv $sdrfs" $? # Make sure that non-privileged commands can read this file. $chmod a+r $mmsdrfsFile >/dev/null 2>&1 $mmsync $mmsdrfsFile ${mmfsEnvLevel}$requestedGenNumber 2>/dev/null # Clean up any leftover old files. for fname in $($ls ${mmsdrfsGen}+([0-9]) 2>/dev/null) do [[ ${fname#$mmsdrfsGen} -lt $requestedGenNumber ]] && \ $rm -f $fname done # Release the local mmfsEnv update lock and return. freeEnvLock > /dev/null envLocked=no return 0 } #----- end of function upgradeSystemFiles --------------------- ########################################################################## # # Function: Verify that there is no need to rebuild the GPFS environment. # # Input: $1 - mmsdrfs file to use # $2 - expected mmsdrfs gen number value # $3 - expected new private key gen number # $4 - expected committed private key gen number # $5 - security level indicator: 0 (disabled), 1 (enabled) # # Output: none # # Returns: 0 - no need to rebuild the environment # 1 - something is not right # ########################################################################## function checkMmfsEnvironment # # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGcheckMmfsEnvironment ]] && set -x $mmTRACE_ENTER "$*" typeset sdrfs=$1 typeset genNumber=$2 typeset newKeyGenNumber=$3 typeset committedKeyGenNumber=$4 typeset secLevel=$5 typeset rc=0 typeset envLevel sdrfsLine nodesetIdField lineTypeField devNameField typeset lineNumberField fqDevName mountPoint existingMajorNumber typeset mountPointParentDir mountPointBaseName mountPointExists typeset stanzaLine mountType automountDir linkTarget typeset currentCommittedKeyLevel currentNewKeyLevel # Update the GPFS environment if the mmsdrfs file will be replaced # or if the current gen number is different than the last known one. # Note: The latter can happen if we are a server node and there was # a failure in updateMmfsEnvironment after the main file is committed. envLevel=$($ls ${mmfsEnvLevel}+([0-9]) 2>/dev/null) [[ -f $mmsdrfsShadow || ${envLevel#$mmfsEnvLevel} != $genNumber ]] && \ return 1 # Check the current level of the private key files. currentNewKeyLevel=$($ls ${mmfsNewKeyLevel}+([0-9]) 2>/dev/null) [[ ${currentNewKeyLevel#$mmfsNewKeyLevel} != $newKeyGenNumber ]] && \ return 1 currentCommittedKeyLevel=$($ls ${mmfsCommittedKeyLevel}+([0-9]) 2>/dev/null) [[ ${currentCommittedKeyLevel#$mmfsCommittedKeyLevel} != $committedKeyGenNumber ]] && \ return 1 # If security is enabled, verify the authorized_keys file exists. [[ $secLevel -gt 0 && ! -s $authorizedKeys ]] && \ return 1 # Ensure the mmfs.cfg file is present. [[ ! -s $mmfscfgFile ]] && \ return 1 # Ensure the device major number is what it should be. [[ $osName = Linux ]] && checkVfsNumber [[ $currentMajorNumber -ne $neededMajorNumber ]] && \ return 1 # To avoid potential deadlocks with GPFS recovery processing, # mount point checking will be performed only prior to daemon startup # and when mounting a specific file system. This process is controlled # with the MOUNT_POINT_CHECK environment variable. [[ -z $MOUNT_POINT_CHECK ]] && return 0 # Ensure all file system /dev entries and mount points are present. IFS=":" exec 3<&- exec 3< $sdrfs while read -u3 sdrfsLine do # Parse the line. set -f ; set -- $sdrfsLine ; set +f nodesetIdField=$1 lineTypeField=$2 devNameField=$3 lineNumberField=$4 stanzaLine=$5 IFS="$IFS_sv" if [[ $lineTypeField = $SG_ETCFS && ($MOUNT_POINT_CHECK = $devNameField || $MOUNT_POINT_CHECK = all || $MOUNT_POINT_CHECK = all_local || $MOUNT_POINT_CHECK = all_remote) ]] then # This is a stanza line for a file system that we want to check. if [[ $lineNumberField -eq $MOUNT_POINT_Line ]] then fqDevName="/dev/$devNameField" mountPoint="${stanzaLine%:}" # Ensure there is a /dev entry for the file system. [[ ! -r $fqDevName ]] && return 1 # Ensure the device name has the correct major number. if [[ $osName = Linux ]] then existingMajorNumber=$(LC_ALL=C $ls -l $fqDevName 2>/dev/null | \ $awk ' { print $5 } ' ) existingMajorNumber=${existingMajorNumber%,*} [[ $existingMajorNumber -ne $neededMajorNumber ]] && \ return 1 # In the Linux environment the mount point checking will be postponed # until we know the type of mounting in effect for this file system. else # If this is AIX, we can check the mount point right here. # Since there are no special links when the automounter is # involved, it is sufficient to verify that there is an entry # for the mount point in the mount point's parent directory. # This eliminates potentially troublesome stat() calls into GPFS. mountPointParentDir=$(dirname $mountPoint) if [[ -n $mountPointParentDir ]] then mountPointBaseName=$(basename $mountPoint) [[ -n $mountPointBaseName ]] && \ mountPointExists=$(LC_ALL=C $ls -1 $mountPointParentDir 2>/dev/null | \ $grep "^$mountPointBaseName$") fi [[ -z $mountPointExists ]] && return 1 fi # end of if [[ $osName = Linux ]] elif [[ $lineNumberField -eq $MOUNT_Line && $osName = Linux ]] then # If this is Linux, finish the mount point checking. # Find out the type of mounting - regular vs. automount. mountType=${stanzaLine#*=$BLANKchar} # Verify the right mount point is in place. if [[ $mountType = automount ]] then # The mount point must be a symlink. if [[ -L $mountPoint ]] then # This is indeed a symlink. Verify the target is correct. # First, find out the value of the automount directory. if [[ -z $automountDir ]] then automountDir=$(showCfgValue automountDir) [[ -z $automountDir ]] && automountDir=$defaultAutomountDir # Make sure the directory exists. [[ ! -e $automountDir ]] && return 1 fi # Determine the current target of the symlink. linkTarget=$(LC_ALL=C $ls -l $mountPoint 2>/dev/null) linkTarget=${linkTarget##*$BLANKchar} if [[ $linkTarget = ${automountDir}/${deviceName} ]] then # The link looks good. mountPointExists=yes else # The link needs to be rebuild. return 1 fi else # Either the mount point is missing or it is not a symlink. return 1 fi # end of if [[ -L $mountPoint ]] else # The mount point must be a directory. if [[ ! -d $mountPoint ]] then # This is either a symlink (presumably from a file system # that used to have the automount attribute specified), # or the mount point is missing altogether. return 1 else : # Things look good. fi # end of if [[ ! -d $mountPoint ]] fi # end of if [[ $mountType = automount ]] else : # We are not interested in the other stanza lines. fi # end of if [[ $lineNumberField -eq $MOUNT_POINT_Line ]] fi # end of if [[ $lineTypeField = $SG_ETCFS && ... IFS=":" # Prepare for the next iteration. done # end while read -u3 sdrfsLine IFS="$IFS_sv" # Restore the default IFS settings. # If we get here, there is no need to rebuild the GPFS environment. # Everything seems to be in order. return 0 } #----- end of function checkMmfsEnvironment --------------------- ############################################################################## # # Function: Recreate all files that are needed for proper daemon operations. # Reconcile the content of the local /etc/filesystems file with # the file system information in the mmsdrfs file. # # Input: $1 - nodesetId to which this node belongs # $2 - sdrfs file to use # # Output: None # # Returns: 0 - mmfs environment is up to date # 1 - unexpected error # ############################################################################## function updateMmfsEnvironment # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGupdateMmfsEnvironment ]] && set -x $mmTRACE_ENTER "$*" typeset nodesetId=$1 typeset sdrfs=$2 typeset nodesetIdField lineTypeField devNameField lineNumberField typeset sdrfsLine sdrDeviceNameList fsDeleted junk vsdPath typeset etcMountpoint etcDevice cfgLine genNumber fsLine disksFound typeset majorNumber envLockResult startupMountDeviceList fsName typeset remoteFileSystem remoteDeviceName options automountOption typeset secLevel clusterName fname mountOption automountFileSystemsFound typeset automountDir keyLine ourClusterName cipherList defaultCipherList typeset primaryServer backupServer mmsdrservTcpPort tscTcpPort typeset restartAutomounter currentKnownClusters sdrfsFormatLevel typeset keyGenNumber certificateLine typeset nodeIndex=0 typeset ourNodeIndex=0 typeset fsNumber=0 typeset firstLine=yes typeset n=0 typeset rc=0 typeset rc1=0 typeset rc2=0 ########################################### # If our node number is not known, get it. ########################################### [[ -z $ourNodeNumber ]] && \ getLocalNodeData ########################################################### # Ensure the authorization key files are up to date. ########################################################### if [[ -z $MMAUTH_GENKEY_RUNNING ]] then updateKeyFiles $sdrfs rc=$? [[ $rc -ne 0 ]] && return 1 fi # end of if [[ -z $MMAUTH_GENKEY_RUNNING ]] ######################################### # Obtain the local mmfsEnv update lock. ######################################### if [[ $envLocked != yes ]] then envLockResult=$(getLocalEnvLock $$) rc=$? if [[ $rc -ne 0 || $envLockResult != granted ]] then # We failed to obtain the local environment update lock. printErrorMsg 511 $mmcmd return 1 fi envLocked=yes fi # end of if [[ $envLocked != yes ]] ####################################################################### # Decide what should be the major number value for the /dev entries. ####################################################################### checkVfsNumber majorNumber=$neededMajorNumber ####################################################################### # Based on the current information in the mmsdrfs file, recreate # all configuration files (mmfs.cfg, security files, etc.). # Recreate /dev entries and mount points as necessary. ####################################################################### $rm -f $stanza $tmpCfg $remainingFs $tmpDirectMap \ ${mmfsEnvLevel}+([0-9]) ${mmfsVfsNumber}+([0-9]) ${exclDiskFile}* \ $tmpAuthKeys $tmpAuthCertificate ${tmpKnownCluster}* ${tmpKnownCertificate}* sdrDeviceNameList="" IFS=":" # Change the field separator to ':'. exec 3<&- exec 3< $sdrfs while read -u3 sdrfsLine do # Parse the line. set -f ; set -- $sdrfsLine ; set +f nodesetIdField=$1 clusterName=$1 lineTypeField=$2 devNameField=$3 lineNumberField=$4 # Generate the new versions of the different system files. case $lineTypeField in $VERSION_LINE ) # Retrieve the format and gen number of the file. sdrfsFormatLevel=$4 genNumber=$6 # Retrieve our cluster name. ourClusterName=${18} # Retrieve the security level currently in effect. # But if genkey processing is in progress, pretend # secLevel is 0 (there is no key to work with yet). secLevel=${20} [[ -n $MMAUTH_GENKEY_RUNNING ]] && secLevel=0 keyGenNumber=${19} [[ -z $keyGenNumber ]] && keyGenNumber=0 # Retrieve the names of the configuration servers. primaryServer=$9 backupServer=${10} ;; $NODESET_HDR ) if [[ $secLevel -gt 0 && $clusterName = $HOME_CLUSTER ]] then # Start building the authorzed_keys file with # the information for the local cluster. # Note: In early 2.3 clusters (prior to PTF3) the cipher # list was kept only in the MMFSCFG section. defaultCipherList=$7 [[ -z $defaultCipherList ]] && defaultCipherList=AUTHONLY print -- "clusterName=$ourClusterName" >> $tmpAuthKeys checkForErrors "writing to file $tmpAuthKeys" $? print -- "cipherList=$defaultCipherList" >> $tmpAuthKeys checkForErrors "writing to file $tmpAuthKeys" $? certificateLine="" firstLine=yes # Add the local public keys. if [[ -f $newPublicKey ]] then exec 4<&- exec 4< $newPublicKey IFS="" # Reset IFS to preserve blanks and tabs. while read -u4 keyLine do if [[ $keyLine = "clusterName="* ]] then : # Skip the line; we have already put this information out. elif [[ $keyLine = "clusterID="* ]] then # Echo the line without a change. print -- "$keyLine" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "genkeyFormat="* ]] then : # Skip the line; the daemon does not need this information. elif [[ $keyLine = "genkeyCompatibleFormat="* ]] then : # Skip the line; the daemon does not need this information. elif [[ $keyLine = "keyGenNumber="* ]] then # Replace the keyword. print -- "newKeyGenNumber=${keyLine#keyGenNumber=}" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "keyDigest="* ]] then # Replace the keyword. print -- "newKeyDigest=${keyLine#keyDigest=}" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "publicKey=" ]] then # Replace the keyword. print -- "newPublicKey=" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "certificate=" ]] then if [[ -e $MULTIPLE_KEYS_5 ]] ; then # start ifdef # Replace the keyword. print -- "newCertificate=" >> $tmpAuthKeys rc=$? print -- "newCertificate=" >> $tmpAuthCertificates checkForErrors "writing to file $tmpAuthCertificates" $? # Start building the authorzed_certificate file with # the information for the local cluster. print -- "clusterName=$ourClusterName" >> $tmpAuthCertificates checkForErrors "writing to file $tmpAuthCertificates" $? fi # end ifdef if [[ -e $MULTIPLE_KEYS_5 ]] certificateLine=yes else if [[ -e $MULTIPLE_KEYS_5 ]] ; then # start ifdef # All other lines are echoed without a change. print -- "$keyLine" >> $tmpAuthKeys rc=$? # If this is part of the certificate, add it to the cert file. if [[ -n $certificateLine ]] then print -- "$keyLine" >> $tmpAuthCertificates checkForErrors "writing to file $tmpAuthCertificates" $? fi else # else ifdef if [[ -e $MULTIPLE_KEYS_5 ]] # If this is NOT part of the certificate, print the line. if [[ -z $certificateLine ]] then print -- "$keyLine" >> $tmpAuthKeys rc=$? fi fi # end ifdef if [[ -e $MULTIPLE_KEYS_5 ]] fi # end of if [[ $keyLine = "clusterName="* ]] checkForErrors "writing to file $tmpAuthKeys" $rc done # end while read -u4 keyLine IFS=":" # Change the separator back to ":" for the next iteration. fi # end of if [[ -f $newPublicKey ]] certificateLine="" if [[ -f $committedPublicKey ]] then exec 4<&- exec 4< $committedPublicKey IFS="" # Reset IFS to preserve blanks and tabs. while read -u4 keyLine do # If sdrfsFormatLevel is 0, or genkey was not run since # the GPFS 3.1 code was activated, there will be only # one public key file ($committedPublicKey) and it will # not contain the stanza lines; generate the missing data. # Note that this does not apply to $newPublicKey; if that # file exists, it is guaranteed to have the stanza format. if [[ -n $firstLine && $keyLine != "clusterName="* ]] then print -- "clusterID=0" >> $tmpAuthKeys print -- "committedKeyGenNumber=$keyGenNumber" >> $tmpAuthKeys print -- "committedPublicKey=" >> $tmpAuthKeys checkForErrors "writing to file $tmpAuthKeys" $? fi firstLine="" if [[ $keyLine = "clusterName="* ]] then : # Skip the line; we have already put this information out. elif [[ $keyLine = "clusterID="* && -f $newPublicKey ]] then : # Skip the line; we have already put this information out. elif [[ $keyLine = "clusterID="* ]] then # Echo the line without a change. print -- "$keyLine" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "genkeyFormat="* ]] then : # Skip the line; the daemon does not need this information. elif [[ $keyLine = "genkeyCompatibleFormat="* ]] then : # Skip the line; the daemon does not need this information. elif [[ $keyLine = "keyGenNumber="* ]] then # Replace the keyword. print -- "committedKeyGenNumber=${keyLine#keyGenNumber=}" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "keyDigest="* ]] then # Replace the keyword. print -- "committedKeyDigest=${keyLine#keyDigest=}" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "publicKey=" ]] then # Replace the keyword. print -- "committedPublicKey=" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "certificate=" ]] then if [[ -e $MULTIPLE_KEYS_5 ]] ; then # start ifdef # Replace the keyword. print -- "committedCertificate=" >> $tmpAuthKeys rc=$? print -- "committedCertificate=" >> $tmpAuthCertificates checkForErrors "writing to file $tmpAuthCertificates" $? # If not already done, start creating the certificate file. if [[ ! -s $tmpAuthCertificates ]] then print -- "clusterName=$ourClusterName" >> $tmpAuthCertificates checkForErrors "writing to file $tmpAuthCertificates" $? fi fi # end ifdef if [[ -e $MULTIPLE_KEYS_5 ]] certificateLine=yes else if [[ -e $MULTIPLE_KEYS_5 ]] ; then # start ifdef # All other lines are echoed without a change. print -- "$keyLine" >> $tmpAuthKeys rc=$? # If this is part of the certificate, add it to the cert file. if [[ -n $certificateLine ]] then print -- "$keyLine" >> $tmpAuthCertificates checkForErrors "writing to file $tmpAuthCertificates" $? fi else # else ifdef if [[ -e $MULTIPLE_KEYS_5 ]] # If this is NOT part of the certificate, print the line. if [[ -z $certificateLine ]] then print -- "$keyLine" >> $tmpAuthKeys rc=$? fi fi # end ifdef if [[ -e $MULTIPLE_KEYS_5 ]] fi # end of if [[ $keyLine = "clusterName="* ]] checkForErrors "writing to file $tmpAuthKeys" $rc done # end while read -u4 keyLine IFS=":" # Change the separator back to ":" for the next iteration. fi # end of if [[ -f $committedPublicKey ]] certificateLine="" # Make a copy of the local cluster's certificates. if [[ -s $tmpAuthCertificates ]] then $cp $tmpAuthCertificates $localCertificates checkForErrors "cp $tmpAuthCertificates $localCertificates" $? fi # Make a copy of the local cluster's key information # but exclude the cipherList stanza. The local keys # will be added to the known_cluster files further down. if [[ -s $tmpAuthKeys ]] then $grep -v "^cipherList=" $tmpAuthKeys > $localAuthKeys checkForErrors "writing to file $localAuthKeys" $? fi fi # end of if [[ $secLevel -gt 0 && $clusterName = $HOME_CLUSTER ]] # Determine the TCP port numbers for the mmfsd and sdrserv daemons. if [[ -n ${17} ]] then mmsdrservTcpPort=${17} # GETOBJECT_PORT_Field else mmsdrservTcpPort=$8 # TCP_PORT_Field fi tscTcpPort=$8 # TCP_PORT_Field ;; $MEMBER_NODE ) # We are interested only in information pertaining to our nodeset. [[ $nodesetIdField != $nodesetId ]] && \ continue # Determine the index of this node relative # to the rest of the nodes in the cluster. [[ $5 = $ourNodeNumber ]] && \ ourNodeIndex=$nodeIndex (( nodeIndex += 1 )) ;; $SG_HEADR ) # Remember the minor number. minorNumber=$5 # See if this is a remote file system. remoteFileSystem=$7 remoteDeviceName=$8 ;; $SG_ETCFS ) # Get the stanza line. stanzaLine=$5 # Collect all stanza lines for all filesystems into the stanza file. # Make sure the device name and mount point exist. # This processing is operating system dependent. if [[ $osName = AIX ]] then if [[ $lineNumberField -eq $MOUNT_POINT_Line ]] then # This is the first line of a stanza. Add a blank line separator. print -- "" >> $stanza # Add the fully-qualified device name to the list of device names sdrDeviceNameList="$sdrDeviceNameList /dev/${devNameField}" # Save the information about the file system to be used # later when checking mount points and /dev entries. printf "%s %s %s %s " "/dev/${devNameField}" "${stanzaLine%:}" \ "$majorNumber" "$minorNumber" >> $remainingFs checkForErrors "writing to file $remainingFs" $? # Save the mount point. etcMountpoint="${stanzaLine%:}" # Make sure the mount point stanza line ends with a ':'. stanzaLine="$stanzaLine:" elif [[ $lineNumberField -eq $MOUNT_Line ]] then mountOption=${stanzaLine#*=$BLANKchar} if [[ $mountOption = mmfs ]] then # If this file system is to be mounted when GPFS is started, # add its name to the corresponding list. startupMountDeviceList="$startupMountDeviceList /dev/${devNameField}" (( fsNumber += 1 )) elif [[ $mountOption = automount ]] then automountFileSystemsFound=yes fi # Add the mount option to the remainingFs file. printf " %s\n" "$mountOption" >> $remainingFs checkForErrors "writing to file $remainingFs" $? fi # end of if [[ $lineNumberField -eq $MOUNT_POINT_Line ]] # To preserve the tabs in the /etc/filesystem # temporarily set IFS to new line only. IFS=" " # Add the stanza line to the stanza file. print -- "$stanzaLine" >> $stanza checkForErrors "writing to file $stanza" $? IFS=":" # Change the separator back to ":" for the next iteration. elif [[ $osName = Linux ]] then case $lineNumberField in $MOUNT_POINT_Line ) # Add the fully-qualified device name to the list of device names sdrDeviceNameList="$sdrDeviceNameList /dev/${devNameField}" # Save the information about the file system to be used # later when checking mount points and /dev entries. printf "%s %s %s %s " "/dev/${devNameField}" "${stanzaLine%:}" \ "$majorNumber" "$minorNumber" >> $remainingFs checkForErrors "writing to file $remainingFs" $? # Save the mount point. etcMountpoint="${stanzaLine%:}" ;; $MOUNT_Line ) # Figure out the autostart option. mountOption=${stanzaLine#*=$BLANKchar} if [[ $mountOption = mmfs ]] then automountOption="autostart" startupMountDeviceList="$startupMountDeviceList /dev/${devNameField}" (( fsNumber += 1 )) elif [[ $mountOption = automount ]] then automountOption="automount" automountFileSystemsFound=yes else automountOption="noauto" fi # Add the mount option to the remainingFs file. printf " %s\n" "$mountOption" >> $remainingFs checkForErrors "writing to file $remainingFs" $? ;; * ) : # Do nothing. There is no equivalent for this in Linux. ;; esac # case $lineNumberField in else : # This should never happen. fi # end if [[ $osName = AIX ]] ;; $SG_MOUNT ) # Build the options string. options="" [[ -n $5 ]] && options="${options},$5" [[ -n $6 ]] && options="${options},$6" [[ -n $7 ]] && options="${options},$7" [[ -n $9 ]] && options="${options},$9" [[ -n $8 ]] && options="${options},quota=$8" # Add the device name. if [[ -n $remoteFileSystem ]] then options="${options},dev=${nodesetIdField}:${remoteDeviceName}" options="${options},ldev=${devNameField}" else options="${options},dev=${devNameField}" fi # Add the autostart option name. [[ $osName = Linux ]] && \ options="${options},${automountOption}" # Ensure the string does not start with a comma. options="${options#,}" # Complete the stanza. if [[ $osName = AIX ]] then # Create the last line of the /etc/filesystems stanza. stanzaLine="${OPTIONS_Line_Prefix}${options}" # To preserve the tabs in the /etc/filesystems # temporarily set IFS to new line only. IFS=" " # Add the stanza line to the stanza file. print -- "$stanzaLine" >> $stanza checkForErrors "writing to file $stanza" $? IFS=":" # Change the separator back to ":" for the next iteration. # If this file system is automounted, add an entry # in the direct map file for the automounter. if [[ $mountOption = automount ]] then printf "%s %s %s\n" "${etcMountpoint}" "-fstype=mmfs,${options}" \ ":/dev/${devNameField}" >> $tmpDirectMap checkForErrors "writing to file $tmpDirectMap" $? fi elif [[ $osName = Linux ]] then # Create the /etc/fstab line. printf "%-20s %-20s %-10s %-15s 0 0\n" \ "/dev/${devNameField}" "${etcMountpoint}" \ "gpfs" "${options}" >> $stanza checkForErrors "writing to file $stanza" $? else : # This should never happen. fi # end if [[ $osName = AIX ]] ;; $SG_DISKS ) # Put all excluded disks in individual files for each file system. if [[ ${18} = $excludedDisk ]] then print -- "$5" >> "${exclDiskFile}.${devNameField}" checkForErrors "writing to file ${exclDiskFile}.${devNameField}" $? fi disksFound=yes ;; $MMFSCFG ) # this is a line from the mmfs.cfg file for our nodeset # The mmfs.cfg information is everything past the first 4 fields. shift 4 cfgLine="$*" # To preserve tabs, temporarily set IFS to new line only. IFS=" " # Strip trailing colons and add the line to the mmfs.cfg file. print -- "${cfgLine%%+(:)}" >> $tmpCfg checkForErrors "writing to file $tmpCfg" $? IFS=":" # Change the separator back to ":" for the next iteration. ;; $AUTHORIZED_CLUSTER ) # remote cluster authorization information. if [[ $secLevel -gt 0 ]] then cipherList=$7 [[ -z $cipherList ]] && cipherList=$defaultCipherList print -- "clusterName=$clusterName" >> $tmpAuthKeys checkForErrors "writing to file $tmpAuthKeys" $? print -- "cipherList=$cipherList" >> $tmpAuthKeys checkForErrors "writing to file $tmpAuthKeys" $? certificateLine="" firstLine=yes fi # end of if [[ $secLevel -gt 0 ]] ;; $AUTHORIZED_KEY ) # a public key for an authorized cluster. if [[ $secLevel -gt 0 ]] then # The key information is everything past the first 4 fields. shift 4 keyLine=$* # To preserve tabs, temporarily set IFS to new line only. IFS=" " # Strip trailing colons and add the line to the authorized keys file. keyLine="${keyLine%%+(:)}" if [[ $lineNumberField -eq 1 ]] then if [[ $keyLine = "clusterName="* ]] then # This key file was generated by the 3.1 version of mmauth genkey. # The line identifies the cluster on which the key was generated. : # Skip the line; we have already put this information out. else # This key file was generated by the 2.3 version of mmauth genkey. # Simulate the missing stanza lines and let the daemon deal with it. print -- "clusterID=0" >> $tmpAuthKeys print -- "committedKeyGenNumber=0" >> $tmpAuthKeys print -- "committedPublicKey=" >> $tmpAuthKeys print -- "$keyLine" >> $tmpAuthKeys checkForErrors "writing to file $tmpAuthKeys" $? fi # end of if [[ $keyLine = "clusterName="* ]] elif [[ $keyLine = "clusterID="* ]] then # Echo the line without a change. print -- "$keyLine" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "genkeyFormat="* ]] then : # Skip the line; the daemon does not need this information. elif [[ $keyLine = "genkeyCompatibleFormat="* ]] then : # Skip the line; the daemon does not need this information. elif [[ $keyLine = "keyGenNumber="* ]] then # Replace the keyword. print -- "committedKeyGenNumber=${keyLine#keyGenNumber=}" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "keyDigest="* ]] then # Replace the keyword. print -- "committedKeyDigest=${keyLine#keyDigest=}" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "publicKey=" ]] then # Replace the keyword. print -- "committedPublicKey=" >> $tmpAuthKeys rc=$? elif [[ $keyLine = "certificate=" ]] then if [[ -e $MULTIPLE_KEYS_5 ]] ; then # start ifdef # Replace the keyword. print -- "committedCertificate=" >> $tmpAuthKeys rc=$? print -- "committedCertificate=" >> $tmpAuthCertificates checkForErrors "writing to file $tmpAuthCertificates" $? # Start a new section in the certificate file. print -- "clusterName=$clusterName" >> $tmpAuthCertificates checkForErrors "writing to file $tmpAuthCertificates" $? fi # end ifdef if [[ -e $MULTIPLE_KEYS_5 ]] certificateLine=yes else if [[ -e $MULTIPLE_KEYS_5 ]] ; then # start ifdef # All other lines are echoed without a change. print -- "$keyLine" >> $tmpAuthKeys checkForErrors "writing to file $tmpAuthKeys" $? # If this is part of the certificate, add it to the cert file. if [[ -n $certificateLine ]] then print -- "$keyLine" >> $tmpAuthCertificates checkForErrors "writing to file $tmpAuthCertificates" $? fi else # else ifdef if [[ -e $MULTIPLE_KEYS_5 ]] # If this is NOT part of the certificate, print the line. if [[ -z $certificateLine ]] then print -- "$keyLine" >> $tmpAuthKeys checkForErrors "writing to file $tmpAuthKeys" $? fi fi # end ifdef if [[ -e $MULTIPLE_KEYS_5 ]] fi # end of if [[ $lineNumberField -eq 1 ]] IFS=":" # Change the separator back to ":" for the next iteration. fi # end of if [[ $secLevel -gt 0 ]] ;; $REM_CLUSTER_KEY ) # a public key for some other cluster. # The key information is everything past the first 4 fields. shift 4 keyLine=$* # To preserve tabs, temporarily set IFS to new line only. IFS=" " # Strip trailing colons and add the line to the appropriate known_clusters file. keyLine="${keyLine%%+(:)}" if [[ $lineNumberField -eq 1 ]] then certificateLine="" if [[ $keyLine = "clusterName="* ]] then # This key file was generated by the 3.1 version of mmauth genkey. # The line identifies the cluster on which the key was generated. # Recreate the line to ensure it has the correct cluster name. print -- "clusterName=${clusterName}" >> ${tmpKnownCluster}${clusterName} checkForErrors "writing to file ${tmpKnownCluster}${clusterName}" $? print -- "clusterName=${clusterName}" >> ${tmpKnownCertificate}${clusterName} checkForErrors "writing to file ${tmpKnownCertificate}${clusterName}" $? else # This key file was generated by the 2.3 version of mmauth genkey. # Simulate the missing stanza lines and let the daemon deal with it. print -- "clusterName=${clusterName}" >> ${tmpKnownCluster}${clusterName} print -- "clusterID=0" >> ${tmpKnownCluster}${clusterName} print -- "committedKeyGenNumber=0" >> ${tmpKnownCluster}${clusterName} print -- "committedPublicKey=" >> ${tmpKnownCluster}${clusterName} print -- "$keyLine" >> ${tmpKnownCluster}${clusterName} checkForErrors "writing to file ${tmpKnownCluster}${clusterName}" $? fi # end of if [[ $keyLine = "clusterName="* ]] elif [[ $keyLine = "clusterID="* ]] then # Echo the line without a change. print -- "$keyLine" >> ${tmpKnownCluster}${clusterName} rc=$? elif [[ $keyLine = "genkeyFormat="* ]] then : # Skip the line; the daemon does not need this information. elif [[ $keyLine = "genkeyCompatibleFormat="* ]] then : # Skip the line; the daemon does not need this information. elif [[ $keyLine = "keyGenNumber="* ]] then # Replace the keyword. print -- "committedKeyGenNumber=${keyLine#keyGenNumber=}" >> \ ${tmpKnownCluster}${clusterName} rc=$? elif [[ $keyLine = "keyDigest="* ]] then # Replace the keyword. print -- "committedKeyDigest=${keyLine#keyDigest=}" >> \ ${tmpKnownCluster}${clusterName} rc=$? elif [[ $keyLine = "publicKey=" ]] then # Replace the keyword. print -- "committedPublicKey=" >> ${tmpKnownCluster}${clusterName} rc=$? elif [[ $keyLine = "certificate=" ]] then if [[ -e $MULTIPLE_KEYS_5 ]] ; then # start ifdef # Replace the keyword. print -- "committedCertificate=" >> ${tmpKnownCluster}${clusterName} rc=$? print -- "committedCertificate=" >> ${tmpKnownCertificate}${clusterName} checkForErrors "writing to file ${tmpKnownCertificate}${clusterName}" $? fi # end ifdef if [[ -e $MULTIPLE_KEYS_5 ]] certificateLine=yes else if [[ -e $MULTIPLE_KEYS_5 ]] ; then # start ifdef # All other lines are echoed without a change. print -- "$keyLine" >> ${tmpKnownCluster}${clusterName} checkForErrors "writing to file ${tmpKnownCluster}${clusterName}" $? # If this is part of the certificate, add it to the cert file. if [[ -n $certificateLine ]] then print -- "$keyLine" >> ${tmpKnownCertificate}${clusterName} checkForErrors "writing to file ${tmpKnownCertificate}${clusterName}" $? fi else # else ifdef if [[ -e $MULTIPLE_KEYS_5 ]] # If this is NOT part of the certificate, print the line. if [[ -z $certificateLine ]] then print -- "$keyLine" >> ${tmpKnownCluster}${clusterName} checkForErrors "writing to file ${tmpKnownCluster}${clusterName}" $? fi fi # end ifdef if [[ -e $MULTIPLE_KEYS_5 ]] fi # end of if [[ $lineNumberField -eq 1 ]] IFS=":" # Change the separator back to ":" for the next iteration. ;; * ) # We are not interested in any other lines. ;; esac # end case $lineTypeField done # end while read -u3 sdrfsLine IFS="$IFS_sv" # Restore the default IFS settings. ########################################################## # Generate a list of the file systems that are currently # present in the local /etc/filesystems file. ########################################################## getCurrentStanzaList $oldstanza rc=$? checkForErrors "updateMmfsEnvironment: getCurrentStanzaList $oldstanza" $rc ######################################################################## # Remove all stanzas that are currently present in /etc/filesystems. # If a file system was deleted or moved to a different nodeset since # the last time this function was executed, we have to do this anyway. # If a file system still exists, rather than try to figure out if # anything in the stanza changed, we first delete everything that is # mmfs-related from the /etc/filesystems file and then append our # stanza file which contains the most current sdr data. ######################################################################## removeAllStanzas $newstanza rc=$? checkForErrors "updateMmfsEnvironment: removeAllStanzas $newstanza" $rc ######################################################################## # Remove the device name and mount point for any file system # that does not belong to this nodeset any more. ######################################################################## exec 3<&- exec 3< $oldstanza read -u3 fsLine # Skip the header line from the lsfs output. while read -u3 fsLine do # Parse the line. The first field is the file system # mount point. The second field is the device name. # We are not interested in the rest of the information. IFS=":" set -f ; set -- $fsLine ; set +f etcMountpoint=$1 etcDevice=$2 IFS="$IFS_sv" # See if the device name is present in the mmsdrfs file. fsDeleted=true for fsName in $sdrDeviceNameList do if [[ $fsName = $etcDevice ]] then fsDeleted=false break fi done # If necessary, remove the device name and mount point. # All errors are ignored. [[ $fsDeleted = true ]] && \ removeMountPoint $etcDevice $etcMountpoint $currentMajorNumber done # end read -u3 fsLine ########################################################### # Make sure there are /dev entries and mount points for # all file systems that still exist in the nodeset. ########################################################### if [[ -s $remainingFs ]] then # Find the value of the automount directory. # Note: This is really needed on Linux only. [[ $osName = Linux ]] && \ automountDir=$(showCfgValue automountDir no $ourShortName $tmpCfg) [[ -z $automountDir ]] && automountDir=$defaultAutomountDir [[ $osName = Linux ]] && $mkdir -p $automountDir # Verify the device name and mount points. exec 3<&- exec 3< $remainingFs while read -u3 createMountPointParms do createMountPoint $createMountPointParms $automountDir checkForErrors "createMountPoint $createMountPointParms $automountDir" $? done # end read -u3 createMountPointParms fi # end of if [[ -s $remainingFs ]] ########################################################### # Determine the values of the dynamic mmfs.cfg parameters # that are unique to this node. These values are always # determined dynamically every time mmfs.cfg is generated. ########################################################### # Start a node override section for this node. print -- "[${ourShortName}]" >> $tmpCfg checkForErrors "writing to file $tmpCfg" $? # Determine the value of the psspVsd mmfs.cfg parameter. if [[ $osName = AIX ]] then vsdPath=$(LC_ALL=C $ls -l $vsdatalst 2>/dev/null) if [[ $vsdPath = *${vsdatalstPSSP}* && -x $vsdatalstPSSP ]] then print -- "psspVsd yes" >> $tmpCfg checkForErrors "writing to file $tmpCfg" $? elif [[ $vsdPath = *${vsdatalstRSCT}* && -x $vsdatalstRSCT ]] then print -- "psspVsd no" >> $tmpCfg checkForErrors "writing to file $tmpCfg" $? else : # Do nothing. fi fi # end of if [[ $osName = AIX ]] # Determine the value of the takeOverSdrServ mmfs.cfg parameter. if [[ ( $ourNodeName = $primaryServer || $ourNodeName = $backupServer ) && $mmsdrservTcpPort = $tscTcpPort ]] then print -- "takeOverSdrServ yes" >> $tmpCfg checkForErrors "writing to file $tmpCfg" $? else print -- "takeOverSdrServ no" >> $tmpCfg checkForErrors "writing to file $tmpCfg" $? fi # end of if [[ $ourNodeName = $primaryServer || ... ########################################################## # Create a file with the names of the file systems that # must be mounted when the GPFS daemon is started. # The ordering of the file systems depends on the index # of this node within the cluster. This is done to avoid # the simultaneous mounting of the same file system from # all of the nodes in the cluster. ########################################################## if [[ -n $startupMountDeviceList ]] then startupMountFile1=${tmpDir}startupMountFile1.${mmcmd}.$$ startupMountFile2=${tmpDir}startupMountFile2.${mmcmd}.$$ $rm -f $startupMountFile $startupMountFile1 $startupMountFile2 n=0 [[ $fsNumber -gt 0 ]] && \ (( ourNodeIndex = ourNodeIndex % fsNumber )) for fsName in $startupMountDeviceList do if [[ $n -ge $ourNodeIndex ]] then print -- "$fsName" >> $startupMountFile1 else print -- "$fsName" >> $startupMountFile2 fi (( n += 1 )) done # Combine the two parts of the startup mounts file. # Since this isn't a critical file, ignore errors. $cat $startupMountFile1 $startupMountFile2 >> $startupMountFile 2>/dev/null $rm -f $startupMountFile1 $startupMountFile2 else # There are no file systems to mount at startup time. # Clear any leftover file. $rm -f $startupMountFile fi # end of if [[ -n $startupMountDeviceList ]] ########################################################### $mmTRACE "Start critical section - copying system files" ########################################################### # Add the latest stanza information to /etc/filesystems. if [[ -s $stanza ]] then $cat $stanza >> $newstanza checkForErrors "cat $stanza >> $newstanza" $? fi # See if the /etc/filesystems information needs to be updated. $diff $newstanza $etcFilesystems >/dev/null 2>/dev/null rc=$? if [[ $rc -ne 0 ]] then $cp $newstanza $etcFilesystems rc=$? $mmsync $etcFilesystems checkForErrors "cp $newstanza $etcFilesystems" $rc fi # See if the mmfs.cfg information needs to be updated. $diff $tmpCfg $mmfscfg >/dev/null 2>/dev/null rc=$? if [[ $rc -ne 0 ]] then $mv -f $tmpCfg $mmfscfg checkForErrors "mv tmpCfg $mmfscfg" $? $mmsync $mmfscfg fi # See if the automounter map information needs to be updated. if [[ -f $tmpDirectMap ]] then $diff $tmpDirectMap $mmDirectMap >/dev/null 2>/dev/null rc=$? if [[ $rc -ne 0 ]] then $mv -f $tmpDirectMap $mmDirectMap checkForErrors "mv tmpDirectMap $mmDirectMap" $? $mmsync $mmDirectMap restartAutomounter=yes else restartAutomounter="" fi elif [[ -s $mmDirectMap ]] then $rm -f $mmDirectMap $touch $mmDirectMap $mmsync $mmDirectMap restartAutomounter=yes else restartAutomounter="" fi # end of if [[ -f $tmpDirectMap ]] # Update the authorized_keys file. if [[ $secLevel -gt 0 ]] then # Security is enabled. Replace the authorized_keys file # with its latest version, provided there is a difference. $diff $tmpAuthKeys $authorizedKeys >/dev/null 2>/dev/null rc=$? if [[ $rc -ne 0 ]] then $mv -f $tmpAuthKeys $authorizedKeys checkForErrors "mv $tmpAuthKeys $authorizedKeys" $? $mmsync $authorizedKeys 2>/dev/null fi if [[ -e $MULTIPLE_KEYS_5 ]] ; then # start ifdef if [[ -s $tmpAuthCertificates ]] then $diff $tmpAuthCertificates $authorizedCertificates >/dev/null 2>/dev/null rc=$? if [[ $rc -ne 0 ]] then $mv -f $tmpAuthCertificates $authorizedCertificates checkForErrors "mv $tmpAuthCertificates $authorizedCertificates" $? $mmsync $authorizedCertificates 2>/dev/null fi fi # end of if [[ -s $tmpAuthCertificates ]] fi # end ifdef if [[ -e $MULTIPLE_KEYS_5 ]] else # Security is not enabled. Ensure there is no left-over file. $rm -f $authorizedKeys $authorizedCertificates fi # end of if [[ $secLevel -gt 0 ]] # Create the known_cluster files for all presently defined remote clusters. for fname in $($ls ${tmpKnownCluster}* 2>/dev/null) do # Extract the cluster name and put it on the list of known clusters. clusterName=${fname#$tmpKnownCluster} currentKnownClusters="$currentKnownClusters $clusterName " # Append the local cluster's keys. if [[ -s $localAuthKeys ]] then $cat $localAuthKeys >> $fname checkForErrors "writing to file $fname" $? fi # If the new known_cluster file is different from the existing one, # replace the existing file with the latest version. $diff $fname ${knownCluster}.$clusterName >/dev/null 2>/dev/null rc=$? if [[ $rc -ne 0 ]] then $mv -f $fname ${knownCluster}.$clusterName checkForErrors "mv $fname ${knownCluster}.$clusterName" $? $mmsync ${knownCluster}.$clusterName fi done # end of for fname in $($ls ${tmpKnownCluster}* 2>/dev/null) # Remove obsolete known_cluster files, if any. for fname in $($ls ${knownCluster}* 2>/dev/null) do [[ $currentKnownClusters != *" ${fname#$knownCluster.} "* ]] && \ $rm -f $fname done if [[ -e $MULTIPLE_KEYS_5 ]] ; then # start ifdef # Create the known_certificate files for all presently defined remote clusters. currentKnownClusters="" for fname in $($ls ${tmpKnownCertificate}* 2>/dev/null) do # Extract the cluster name and put it on the list of known clusters. clusterName=${fname#$tmpKnownCertificate} currentKnownClusters="$currentKnownClusters $clusterName " # Append the local cluster's certificates. if [[ -s $localCertificates ]] then $cat $localCertificates >> $fname checkForErrors "writing to file $fname" $? fi # If the new known_cluster file is different from the existing one, # replace the existing file with the latest version. $diff $fname ${knownCertificate}.$clusterName >/dev/null 2>/dev/null rc=$? if [[ $rc -ne 0 ]] then $mv -f $fname ${knownCertificate}.$clusterName checkForErrors "mv $fname ${knownCertificate}.$clusterName" $? $mmsync ${knownCertificate}.$clusterName fi done # end of for fname in $($ls ${tmpKnownCertificate}* 2>/dev/null) # Remove obsolete known_certificate files, if any. for fname in $($ls ${knownCertificate}* 2>/dev/null) do [[ $currentKnownClusters != *" ${fname#$knownCertificate.} "* ]] && \ $rm -f $fname done fi # end ifdef if [[ -e $MULTIPLE_KEYS_5 ]] # Clear false errors from the diffs above. rc=0 ############################################################## $mmTRACE "End critical section - copying system files" ############################################################## # Create an empty file. The suffix in the file name shows the gen number # of the mmsdrfs file that was used to create the system files. $touch ${mmfsEnvLevel}$genNumber # Create another empty file. Its file name suffix will show # the value of the major number used when creating /dev entries. $touch ${mmfsVfsNumber}$majorNumber ######################################### # Release the local mmfsEnv update lock. ######################################### if [[ $envLocked = yes ]] then freeEnvLock > /dev/null envLocked=no fi ######################################### # Do some AIX-specific processing. ######################################### if [[ $osName = AIX ]] then # Verify there is an mmfs entry in /etc/auto_mount. [[ -n $automountFileSystemsFound ]] && \ checkAutomountDefine # Restart the automounter if the automount map file changed. # Hide all messages if automount mounts are not presently used. if [[ -n $restartAutomounter ]] then if [[ -n $automountFileSystemsFound ]] then $automount else [[ -x $automount ]] && $automount >/dev/null 2>/dev/null fi fi # Generate the file listing the NSD physical volumes # if there are disks and the file does not exist. [[ ! -f $nsdpvol && -n $disksFound ]] && \ listNsdPhysicalVolumes $nsdpvol fi # end of if [[ $osName = AIX ]] $mmTRACE_EXIT "rc=$rc" return $rc } #----- end of function updateMmfsEnvironment --------------- ###################################################################### # # Function: Ensure the required levels of the authorization key # files are present on this node. # # Input: $1 - sdrfs file to use # # Output: Current security level # # Returns: 0 - no problems found # 1 - unexpected error # ###################################################################### function updateKeyFiles # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGupdateKeyFiles ]] && set -x $mmTRACE_ENTER "$*" typeset sdrfsFile=$1 typeset versionLine primaryServer backupServer ourClusterName typeset fname firstChoice secondChoice linkTarget keyLevel typeset committedKeyGenNumber newKeyGenNumber keyGenNumber typeset currentCommittedKeyLevel currentNewKeyLevel typeset rebuildCommittedFiles rebuildNewFiles neededKeyGenNumbers typeset ourClusterId sdrfsFormatLevel keyfileFormatLevel keyFile typeset rc=0 typeset secLevel=0 # Parse the version line of the mmsdrfs file and get needed data. versionLine=$($head -1 $sdrfsFile) IFS=':' set -f ; set -A v -- - $versionLine ; set +f IFS="$IFS_sv" primaryServer=${v[$PRIMARY_SERVER_Field]} backupServer=${v[$BACKUP_SERVER_Field]} [[ $backupServer = "_NOSECONDARY_" ]] && backupServer="" ourClusterName=${v[$CLUSTER_NAME_Field]} ourClusterId=${v[$CLUSTERID_Field]#gpfs} [[ -z $ourClusterId ]] && ourClusterId=0 newKeyGenNumber=${v[$NEW_KEY_Field]} [[ -z $newKeyGenNumber ]] && newKeyGenNumber=0 committedKeyGenNumber=${v[$COMMITTED_KEY_Field]} [[ -z $committedKeyGenNumber || $committedKeyGenNumber -eq 0 ]] && \ committedKeyGenNumber=$newKeyGenNumber secLevel=${v[$SECLEVEL_Field]} [[ -z $secLevel ]] && secLevel=0 sdrfsFormatLevel=${v[$SDRFS_FORMAT_Field]} keyfileFormatLevel=${v[$KEYFILE_FORMAT_Field]} [[ -z $keyfileFormatLevel ]] && keyfileFormatLevel=0 # Determine which file to retrieve. This depends on what version of # mmauth generated the file. if [[ $keyfileFormatLevel = $CURRENT_KEYFILE_FORMAT ]] then keyFile=$genkeyData else keyFile=$privateKey fi #--------------------------------------------------------------------- # # Verify that the key files indeed exist and that they are correctly # linked. Here are the basic rules: # # - If newKeyGenNumber = 0, the user has not created yet a private # key ("mmauth genkey new" was never issued). The authorized_keys # file will not be created and no files will be created in the ssl # directory. # # - If newKeyGenNumber > 0 and committedKeyGenNumber = 0, the mmsdrfs # file does not have valid information - see the initialization # code above. This is a "should never happen" case. # Note: The above turns out to be an acceptable case for early 2.3 # clusters (prior to PTF3 ??); pretend committedKeyGenNumber # is not there. # # - If committedKeyGenNumber > newKeyGenNumber, the mmsdrfs file # does not have valid information. This is a "should never happen". # # - If committedKeyGenNumber = newKeyGenNumber, the daemon will # see only one key in the authorized_keys file with a record # header "committedPublicKey=". This is the normal case. # # The link committedPrivateKey (ssl/id_rsa_committed) must # exist and point to file ${privateKey}$committedKeyGenNumber. # The link newPrivateKey (ssl/id_rsa_new) should not exist. # # - If committedKeyGenNumber < newKeyGenNumber, the daemon will # see two keys in the authorized_keys file. The record headers # will be "committedPublicKey=" and "newPublicKey=". # # The link committedPrivateKey (ssl/id_rsa_committed) must # exist and point to file ${privateKey}$committedKeyGenNumber. # The link newPrivateKey (ssl/id_rsa_new) must exist and point # to file ${privateKey}$newKeyGenNumber. # #--------------------------------------------------------------------- # Perform quick sanity checks. [[ $committedKeyGenNumber -eq 0 && $newKeyGenNumber -gt 0 && $sdrfsFormatLevel -gt 0 || $committedKeyGenNumber -gt $newKeyGenNumber ]] && \ corruptedSdrFileExit 138 "$versionLine" # If a key has not been generated yet, there isn't much to do. if [[ $newKeyGenNumber -eq 0 ]] then # Create key level indicators. The suffix in the file name shows the # generation number of the key files currently in effect. # At this point, both gen numbers are zero. $rm -f ${mmfsNewKeyLevel}+([0-9]) ${mmfsCommittedKeyLevel}+([0-9]) $touch ${mmfsNewKeyLevel}${newKeyGenNumber} $touch ${mmfsCommittedKeyLevel}${committedKeyGenNumber} return 0 fi # Find out the current level of the key files. currentNewKeyLevel=$($ls ${mmfsNewKeyLevel}+([0-9]) 2>/dev/null) currentNewKeyLevel=${currentNewKeyLevel#$mmfsNewKeyLevel} [[ $currentNewKeyLevel != +([0-9]) ]] && currentNewKeyLevel=0 currentCommittedKeyLevel=$($ls ${mmfsCommittedKeyLevel}+([0-9]) 2>/dev/null) currentCommittedKeyLevel=${currentCommittedKeyLevel#$mmfsCommittedKeyLevel} [[ $currentCommittedKeyLevel != +([0-9]) ]] && currentCommittedKeyLevel=0 # If the key was generated and successfully committed, # there will only be a "committed" key. if [[ $committedKeyGenNumber -eq $newKeyGenNumber ]] then # There should be only a "committed" key in this case. $rm -f $newPrivateKey $newPublicKey $newCertificate # Verify that the all of the committed key files and needed links exist. if [[ $currentCommittedKeyLevel -eq $committedKeyGenNumber ]] then linkTarget=$(LC_ALL=C $ls -l $committedPrivateKey 2>/dev/null) linkTarget=${linkTarget##*$BLANKchar} if [[ -f ${privateKey}$committedKeyGenNumber && -L $committedPrivateKey && $linkTarget = ${privateKey#$sslDir}$committedKeyGenNumber && -f $committedPublicKey && -f $committedCertificate && -f ${keyFile}${committedKeyGenNumber} ]] then return 0 fi fi # end if [[ $currentCommittedKeyLevel -eq $committedKeyGenNumber ]] rebuildNewFiles="" rebuildCommittedFiles=yes neededKeyGenNumbers=$committedKeyGenNumber # If a new key was generated, but not yet committed, # and there already is a committed previous key, # there will be two keys: "new" and "committed". else # [[ $committedKeyGenNumber -lt $newKeyGenNumber ]] # Assume, for now, that neither link is in order. rebuildNewFiles=yes rebuildCommittedFiles=yes # Verify that the all of the committed key files and needed links exist. if [[ $currentCommittedKeyLevel -eq $committedKeyGenNumber ]] then linkTarget=$(LC_ALL=C $ls -l $committedPrivateKey 2>/dev/null) linkTarget=${linkTarget##*$BLANKchar} if [[ -f ${privateKey}$committedKeyGenNumber && -L $committedPrivateKey && $linkTarget = ${privateKey#$sslDir}$committedKeyGenNumber && -f $committedPublicKey && -f $committedCertificate && -f ${keyFile}${committedKeyGenNumber} ]] then rebuildCommittedFiles="" else neededKeyGenNumbers=$committedKeyGenNumber fi else neededKeyGenNumbers=$committedKeyGenNumber fi # end if [[ $currentCommittedKeyLevel -eq $committedKeyGenNumber ]] # Verify that the all of the uncommitted key files and needed links exist. if [[ $currentNewKeyLevel -eq $newKeyGenNumber ]] then linkTarget=$(LC_ALL=C $ls -l $newPrivateKey 2>/dev/null) linkTarget=${linkTarget##*$BLANKchar} if [[ -f ${privateKey}$newKeyGenNumber && -L $newPrivateKey && $linkTarget = ${privateKey#$sslDir}$newKeyGenNumber && -f $newPublicKey && -f $newCertificate && -f ${keyFile}${newKeyGenNumber} ]] then rebuildNewFiles="" else neededKeyGenNumbers="$neededKeyGenNumbers $newKeyGenNumber" fi else neededKeyGenNumbers="$neededKeyGenNumbers $newKeyGenNumber" fi # end if [[ $currentNewKeyLevel -eq $newKeyGenNumber ]] # If both links are OK, we are done. [[ -z $rebuildNewFiles && -z $rebuildCommittedFiles ]] && \ return 0 fi # end of if [[ $committedKeyGenNumber -eq 0 ]] # If here, something is not quite all right; rebuild the files. # Remove the level indicators for the files and links that need work. $rm -f ${mmfsNewKeyLevel}+([0-9]) ${mmfsCommittedKeyLevel}+([0-9]) # If we have two servers, we will spread the work by picking a server # depending on our node number. If the first choice fails, then we'll # try the second one. if [[ -n $backupServer && $ourNodeNumber%2 -eq 0 ]] then firstChoice=$backupServer secondChoice=$primaryServer else firstChoice=$primaryServer secondChoice=$backupServer fi # Retrieve the missing files. for keyGenNumber in $neededKeyGenNumbers do if [[ ! -f ${keyFile}$keyGenNumber ]] then # If this node is one of the repository server nodes, it should # have had the key file. Before giving up, try to get the file # from the other server, assuming one exists. if [[ $ourNodeName = $primaryServer || $ourNodeName = $backupServer ]] then if [[ -z $secondChoice ]] then # The key file file was not found and there is no backup server. # Tell the user to recover the file or regenerate it. printErrorMsg 507 $mmcmd ${keyFile}$keyGenNumber return 1 elif [[ $ourNodeName = $primaryServer ]] then # If we are on the primary server, try the backup. firstChoice=$backupServer else # If we are on the backup server, try the primary. firstChoice=$primaryServer fi # There is no second choice any more. secondChoice="" fi # end of if [[ $ourNodeName = $primaryServer || ... # Get the file from the first choice server. umask 077 $rcp -p ${firstChoice}:${keyFile}$keyGenNumber ${keyFile}$keyGenNumber rc=$? umask $UMASK_sv if [[ $rc -ne 0 || ! -f ${keyFile}$keyGenNumber ]] then # Give up if a backup server is not defined. if [[ -z $secondChoice ]] then # Attempt to get data from primary server failed. printErrorMsg 342 $mmcmd $firstChoice # The key file file was not found. # Tell the user to recover the file or regenerate it. printErrorMsg 507 $mmcmd ${keyFile}$keyGenNumber return 1 fi # Issue a warning: Attempt to get data from the server failed. printErrorMsg 341 $mmcmd $firstChoice rc=0 # Get the file from the second server. umask 077 $rcp -p ${secondChoice}:${keyFile}$keyGenNumber ${keyFile}$keyGenNumber rc=$? umask $UMASK_sv if [[ $rc -ne 0 || ! -f ${keyFile}$keyGenNumber ]] then # Attempt to get data from the server failed. printErrorMsg 342 $mmcmd $secondChoice # The key file file was not found. # Tell the user to recover it or regenerate it. printErrorMsg 507 $mmcmd ${keyFile}$keyGenNumber return 1 fi fi # end if [[ $rc -ne 0 || ! -f ${keyFile}$keyGenNumber ]] fi # end if [[ ! -f ${keyFile}$keyGenNumber ]] done # end of for keyGenNumber in $neededKeyGenNumbers # Obtain the local mmfsEnv update lock. if [[ $envLocked != yes ]] then envLockResult=$(getLocalEnvLock $$) rc=$? if [[ $rc -ne 0 || $envLockResult != granted ]] then # $mmcmd: Failed to obtain the local environment update lock printErrorMsg 511 $mmcmd return 1 fi envLocked=yes fi # Recreate the security files as necessary. if [[ -n $rebuildCommittedFiles ]] then # If we are dealing with keys generated by mmauth 3.1 (or later), # all of the information is contained in the genkeyData file. # Otherwise, we have to generate the public key and certificate files. if [[ $keyfileFormatLevel = $CURRENT_KEYFILE_FORMAT ]] then parseKeyFiles ${keyFile}${committedKeyGenNumber} $committedKeyGenNumber \ ${privateKey}$committedKeyGenNumber $committedPublicKey $committedCertificate rc=$? else # Generate the corresponding public keys. generatePublicKeyFiles $secLevel $ourClusterName $committedKeyGenNumber rc=$? fi [[ $rc -ne 0 ]] && return $rc # Recreate the link to the private key file. $rm -f $committedPrivateKey cd $sslDir $ln -sf ${privateKey#$sslDir}$committedKeyGenNumber ${committedPrivateKey#$sslDir} rc=$? cd - >/dev/null checkForErrors \ "ln -sf ${privateKey#$sslDir}$committedKeyGenNumber ${committedPrivateKey#$sslDir}" $rc fi # end of if [[ -n $rebuildCommittedFiles ]] if [[ -n $rebuildNewFiles ]] then # We must be dealing with keys generated by mmauth 3.1 (or later). parseKeyFiles ${keyFile}${newKeyGenNumber} $newKeyGenNumber \ ${privateKey}$newKeyGenNumber $newPublicKey $newCertificate rc=$? [[ $rc -ne 0 ]] && return $rc # Recreate the link to the private key file. $rm -f $newPrivateKey cd $sslDir $ln -sf ${privateKey#$sslDir}$newKeyGenNumber ${newPrivateKey#$sslDir} rc=$? cd - >/dev/null checkForErrors \ "ln -sf ${privateKey#$sslDir}$newKeyGenNumber ${newPrivateKey#$sslDir}" $rc fi # end of if [[ -n $rebuildNewFiles ]] # Create a link to the most recent public key files. if [[ -s $newPublicKey ]] then $ln -sf $newPublicKey $publicKey checkForErrors "ln -sf $newPublicKey $publicKey" $? elif [[ -s $committedPublicKey ]] then $ln -sf $committedPublicKey $publicKey checkForErrors "ln -sf $committedPublicKey $publicKey" $? elif [[ $secLevel -gt 0 ]] then checkForErrors "updateKeyFiles: missing public key file" 1 else : # seclevel is 0; it is OK if no key files were found. fi # end of if [[ -s $newPublicKey ]] # Create key level indicators. The suffix in the file name shows the # generation number of the key files currently in effect. $rm -f ${mmfsNewKeyLevel}+([0-9]) ${mmfsCommittedKeyLevel}+([0-9]) $touch ${mmfsNewKeyLevel}${newKeyGenNumber} $touch ${mmfsCommittedKeyLevel}${committedKeyGenNumber} # Release the local mmfsEnv update lock. if [[ $envLocked = yes ]] then freeEnvLock > /dev/null envLocked=no fi # Clean up any leftover old files. # Note: The id_rsa link and the mmfsKeyLevel indicator are not used anymore. # They have been replaced with their "new" and "committed" counterparts. $rm -f $privateKey ${mmsdrfsDir}mmfsKeyLevel+([0-9]) for fname in $($ls ${privateKey}+([0-9]) 2>/dev/null) do keyLevel=${fname#$privateKey} [[ $keyLevel -lt $committedKeyGenNumber ]] && \ $rm -f $fname done # end of for fname in $($ls ${privateKey}+([0-9]) 2>/dev/null) for fname in $($ls ${genkeyData}+([0-9]) 2>/dev/null) do keyLevel=${fname#$genkeyData} [[ $keyLevel -lt $committedKeyGenNumber ]] && \ $rm -f $fname done # end of for fname in $($ls ${genkeyData}+([0-9]) 2>/dev/null) return $rc } #----- end of function updateKeyFiles ------------------- ###################################################################### # # Function: Parse a file generated by mmauth genkey into its # components: private key, public key and certificate. # # Input: $1 - mmauth genkey file to use # $2 - key generation number # $3 - name of private key file to create # $4 - name of public key file to create # $5 - name of certificate key file to create # # Output: None explicit; security files generated. # # Returns: 0 - no problems found # 1 - unexpected error # ###################################################################### function parseKeyFiles # # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGparseKeyFiles ]] && set -x $mmTRACE_ENTER "$*" typeset genkeyFile=$1 typeset keyGenNumber=$2 typeset privateKeyFile=$3 typeset publicKeyFile=$4 typeset certificateFile=$5 typeset keyLine publicKeyLine privateKeyLine certificateLine typeset rc=0 [[ ! -f $genkeyFile ]] && \ checkForErrors "parseKeyFiles: missing input file $genkeyFile" 19 [[ -z $certificateFile ]] && \ checkForErrors "parseKeyFiles: missing input parameters" 1 $rm -f $tmpPublicKey $tmpPrivateKey $tmpCertificate # Prepare the file for reading. IFS="" # Reset IFS to preserve blanks and tabs. exec 4<&- exec 4< $genkeyFile # Read the first line. It must me the clusterName stanza. read -u4 keyLine [[ $keyLine != "clusterName="* ]] && \ checkForErrors "parseKeyFiles: invalid input file $genkeyFile" 11 # Start creating the new public key file. print -- "$keyLine" >> $tmpPublicKey checkForErrors "writing to file $tmpPublicKey" $? # Read the second line. It must me the clusterId stanza. read -u4 keyLine [[ $keyLine != "clusterID="* ]] && \ checkForErrors "parseKeyFiles: invalid input file $genkeyFile" 12 print -- "$keyLine" >> $tmpPublicKey checkForErrors "writing to file $tmpPublicKey" $? # Read the third line. It must me the format level for the file. read -u4 keyLine [[ $keyLine != "genkeyFormat="* ]] && \ checkForErrors "parseKeyFiles: invalid input file $genkeyFile" 13 print -- "$keyLine" >> $tmpPublicKey checkForErrors "writing to file $tmpPublicKey" $? # Read the fourth line. It must me the compatible format level. read -u4 keyLine [[ $keyLine != "genkeyCompatibleFormat="* ]] && \ checkForErrors "parseKeyFiles: invalid input file $genkeyFile" 14 print -- "$keyLine" >> $tmpPublicKey checkForErrors "writing to file $tmpPublicKey" $? # Read the fifth line. It must contain the key generation number. read -u4 keyLine [[ $keyLine != "keyGenNumber="* ]] && \ checkForErrors "parseKeyFiles: invalid input file $genkeyFile" 15 [[ $keyGenNumber != ${keyLine#keyGenNumber=} ]] && \ checkForErrors "parseKeyFiles: invalid generation number" 16 print -- "$keyLine" >> $tmpPublicKey checkForErrors "writing to file $tmpPublicKey" $? # Read the sixth line. It must contain the key SHA digest. read -u4 keyLine [[ $keyLine != "keyDigest="* ]] && \ checkForErrors "parseKeyFiles: invalid input file $genkeyFile" 17 print -- "$keyLine" >> $tmpPublicKey checkForErrors "writing to file $tmpPublicKey" $? # Parse the rest of the lines. while read -u4 keyLine do if [[ $keyLine = "privateKey=" ]] then # This is the begining of the private key portion of the file. privateKeyLine=yes publicKeyLine="" certificateLine="" elif [[ $keyLine = "publicKey=" ]] then # This is the begining of the public key portion of the file. privateKeyLine="" publicKeyLine=yes certificateLine="" print -- "$keyLine" >> $tmpPublicKey checkForErrors "writing to file $tmpPublicKey" $? elif [[ $keyLine = "certificate=" ]] then # This is the begining of the certificate portion of the file. privateKeyLine="" publicKeyLine="" certificateLine=yes print -- "$keyLine" >> $tmpPublicKey checkForErrors "writing to file $tmpPublicKey" $? else # This is not a stanza line but actual key data. # Append the line to the correct file(s). if [[ -n $privateKeyLine ]] then print -- "$keyLine" >> $tmpPrivateKey checkForErrors "writing to file $tmpPrivateKey" $? elif [[ -n $publicKeyLine ]] then print -- "$keyLine" >> $tmpPublicKey checkForErrors "writing to file $tmpPublicKey" $? elif [[ -n $certificateLine ]] then print -- "$keyLine" >> $tmpCertificate checkForErrors "writing to file $tmpCertificate" $? print -- "$keyLine" >> $tmpPublicKey checkForErrors "writing to file $tmpPublicKey" $? else : # noop; should never be here fi # end of if [[ -n $privateKeyLine ]] fi # end of if [[ $keyLine = "privateKey=" ]] done # end while read -u4 keyLine IFS="$IFS_sv" # Restore the default IFS settings. # If the existing key files are different from the new ones, # replace them. Otherwise, leave the existing files as is. $diff $tmpPrivateKey $newPublicKey >/dev/null 2>/dev/null if [[ $? -ne 0 ]] then $mv -f $tmpPrivateKey $privateKeyFile checkForErrors "mv $tmpPrivateKey $privateKeyFile" $? $mmsync $privateKeyFile fi $diff $tmpPublicKey $publicKeyFile >/dev/null 2>/dev/null if [[ $? -ne 0 ]] then $mv -f $tmpPublicKey $publicKeyFile checkForErrors "mv $tmpPublicKey $publicKeyFile" $? $mmsync $publicKeyFile fi $diff $tmpCertificate $certificateFile >/dev/null 2>/dev/null if [[ $? -ne 0 ]] then $mv -f $tmpCertificate $certificateFile checkForErrors "mv $tmpCertificate $certificateFile" $? $mmsync $certificateFile fi return $rc } #----- end of function parseKeyFiles ------------------- ###################################################################### # # Function: Ensure the required public authorization key files and # self signed certificates are present on this node. # Note: This is needed only for key files generated with # mmauth genkey prior to 3.1. # # Input: $1 - security level in effect for the cluster # $2 - our cluster name # $3 - generation number of the key file # # Output: none # # Returns: 0 - no problems found # 1 - unexpected error # ###################################################################### function generatePublicKeyFiles # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgeneratePublicKeyFiles ]] && set -x $mmTRACE_ENTER "$*" typeset secLevel=$1 typeset clusterName=$2 typeset keyGenNumber=$3 typeset rc=0 typeset rc1=0 typeset rc2=0 typeset randOpt randFile # The private key file must have been retrieved prior to calling this function. # If the file is miising, ignore the error if security is not enabled. if [[ ! -e ${privateKey}$keyGenNumber ]] then if [[ $secLevel -gt 0 ]] then checkForErrors "generatePublicKeyFiles: missing file ${privateKey}$keyGenNumber" 1 else return 0 fi fi # end of if [[ -e ${privateKey}$keyGenNumber ]] # If the sslrandfile user exit is installed, use it to determine # the value for the -rand paramter for the openssl calls. if [[ -x $sslrandfile ]] then randFile=$($sslrandfile 2>/dev/null) [[ -n $randFile ]] && \ randOpt="-rand $randFile" fi # end of if [[ -x $sslrandfile ]] # Regenerate the openssl.conf file. $sed 's/= gpfsCluster/= '$clusterName'/' $opensslConfFile > $certConfigFile checkForErrors "creating $certConfigFile" $? # Regenerate the public keys and self signed certificates from the private # key files currently in effect. This is done always, but we will ignore # errors if security is not enabled. $rm -f $publicKey $tmpPublicKey $committedCertificate $openssl rsa -in ${privateKey}$keyGenNumber -pubout -out $tmpPublicKey \ >/dev/null 2>/dev/null rc1=$? $openssl req $randOpt -new -x509 -days 16427 -key ${privateKey}$keyGenNumber \ -out $committedCertificate -config $certConfigFile >/dev/null 2>/dev/null rc2=$? if [[ $secLevel -gt 0 ]] then # Security is enabled. # Ensure the openssl commands above indeed worked. If there is a problem, # try again before giving up. If the thing works, that's fine. If not, # hopefully there will be an error message telling the user what is wrong. if [[ $rc1 -ne 0 ]] then $openssl rsa -in ${privateKey}$keyGenNumber -pubout -out $tmpPublicKey rc1=$? checkForErrors \ "openssl rsa -in ${privateKey}$keyGenNumber -pubout -out $tmpPublicKey" $rc1 fi if [[ $rc2 -ne 0 ]] then $openssl req $randOpt -new -x509 -days 16427 -key ${privateKey}$keyGenNumber \ -out $committedCertificate -config $certConfigFile rc2=$? checkForErrors \ "openssl req $randOpt -new -x509 -days 16427 -key ${privateKey}$keyGenNumber -out $committedCertificate -config $certConfigFile" $rc2 fi fi # end of if [[ $secLevel -gt 0 ]] # Create the final version of the public key files. In addition to the actual # key, we also add the name and id of the cluster, the key generation number, # the SHA digest (fingerprint), and the certificate file. if [[ -e $tmpPublicKey && $rc1 -eq 0 ]] then # If the new public key file is different from the existing one, # replace it. Otherwise, leave the existing file as is. $diff $tmpPublicKey $committedPublicKey >/dev/null 2>/dev/null rc=$? if [[ $rc -ne 0 ]] then $mv -f $tmpPublicKey $committedPublicKey checkForErrors "mv $tmpPublicKey $committedPublicKey" $? $mmsync $committedPublicKey fi fi # end of if [[ -e $tmpPublicKey && $rc1 -eq 0 ]] return 0 } #----- end of function generatePublicKeyFiles ------------------- ###################################################################### # # Function: Ensure the given public key file was indeed created by # the mmauth genkey command on the specified cluster. # Create a local copy of the file that has certain generic # stanza keywords replaced with the keywords that are # known to the GPFS daemon. # # Input: $1 - key file to verify # $2 - current format level of the mmsdrfs file # $3 - name of the file to create # $4 - expected cluster name # $5 - (optional) alternate allowed cluster name # # Output: None # # Returns: 0 - no errors encountered # non-zero - unexpected error # ###################################################################### function verifyPublicKeyFile # # [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGverifyPublicKeyFile ]] && set -x $mmTRACE_ENTER "$*" typeset keyFile=$1 typeset sdrfsFormatLevel=$2 typeset newKeyFile=$3 typeset clusterName=$4 typeset clusterName2=$5 typeset rc=0 typeset keyLine originatingCluster typeset keyGenNumberFound publicKeyFound publicKeyValueFound typeset clusterIdFound keyDigestFound typeset certificateFound certificateValueFound typeset intValue genkeyFormatLevel genkeyFormatFound typeset genkeyCompatibleFormatLevel genkeyCompatibleFormatFound # Verify the existence of the file and create our own copy. checkUserFile $keyFile $tmpPublicKey [[ $? -ne 0 ]] && return 1 # If the mmsdrfs file is at release 2.3 level, accept the key file as is. if [[ $sdrfsFormatLevel -eq 0 ]] then $cp $tmpPublicKey $newKeyFile checkForErrors "cp $tmpPublicKey $newKeyFile" $? return 0 fi # Prepare the input file for reading. exec 3<&- exec 3< $tmpPublicKey # Process the first line; it must identify the originating cluster. read -u3 keyLine if [[ $keyLine != "clusterName="* || $keyLine = "clusterName=" ]] then # If the file does not have any of the stanza key words, assume it was # created by a 2.3 version of mmauth genkey. Accept it at face value. $grep -q -e "^clusterName=" -e "^clusterID=" -e "^keyDigest=" \ -e "^genkeyFormat=" -e "^genkeyCompatibleFormat=" \ -e "^keyGenNumber=" -e "^publicKey=" $tmpPublicKey >/dev/null 2>&1 rc=$? if [[ $rc -eq 1 ]] then $cp $tmpPublicKey $newKeyFile checkForErrors "cp $tmpPublicKey $newKeyFile" $? return 0 fi # Otherwise, the file has invalid format. print -u2 "$mmcmd: File $keyFile does not have a valid format." print -u2 " Ensure the file is generated with the mmauth genkey command." return 1 fi # Get the name of the cluster on which the file was generated. originatingCluster="${keyLine#clusterName=}" # Ensure the originating cluster is the one we expect. # Note that if we are in the process of changing the cluster # name, we will accept both the old and new names as valid. if [[ $originatingCluster != $clusterName && $originatingCluster != $clusterName2 ]] then # The file was not generated on the expected cluster. print -u2 "$mmcmd: File $keyFile was generated on cluster $originatingCluster." print -u2 " The expected cluster is $clusterName." return 1 fi # The input file look OK so far. Start creating the local copy. print -- "clusterName=$clusterName" > $newKeyFile checkForErrors "writing to file $newKeyFile" $? # Process the rest of the lines. # Verify that all known keywords and the actual key are present. IFS="" # Reset IFS to preserve blanks and tabs. while read -u3 keyLine do if [[ $keyLine = "clusterName="* ]] then # Multiple cluster name lines found. print -u2 "$mmcmd: Multiple lines with the clusterName parameter found." rc=1 break elif [[ $keyLine = "clusterID="* ]] then if [[ -n $clusterIdFound ]] then # Multiple clusterID lines found. print -u2 "$mmcmd: Multiple lines with the clusterID parameter found." rc=1 break else clusterIdFound=yes fi # Pass the line without a change. print -- "$keyLine" >> $newKeyFile checkForErrors "writing to file $newKeyFile" $? elif [[ $keyLine = "genkeyFormat="* ]] then if [[ -n $genkeyFormatFound ]] then # Multiple format level lines found. print -u2 "$mmcmd: Multiple lines with the genkeyFormat parameter found." rc=1 break else genkeyFormatFound=yes fi # Retrieve the value. genkeyFormatLevel=${keyLine#genkeyFormat=} intValue=$(checkIntRange genkeyFormat $genkeyFormatLevel) if [[ $? -ne 0 ]] then # Integer expected. rc=1 break fi # Pass the line without a change. print -- "$keyLine" >> $newKeyFile checkForErrors "writing to file $newKeyFile" $? elif [[ $keyLine = "genkeyCompatibleFormat="* ]] then if [[ -n $genkeyCompatibleFormatFound ]] then # Multiple compatible format lines found. print -u2 "$mmcmd: Multiple lines with genkeyCompatibleFormat found." rc=1 break else genkeyCompatibleFormatFound=yes fi # Retrieve the value. genkeyCompatibleFormatLevel=${keyLine#genkeyCompatibleFormat=} intValue=$(checkIntRange genkeyCompatibleFormat $genkeyCompatibleFormatLevel) if [[ $? -ne 0 ]] then # Integer expected. rc=1 break fi if [[ $genkeyCompatibleFormatLevel -gt $COMPATIBLE_KEYFILE_FORMAT ]] then # Our level of the code cannot understand this file. print -u2 "$mmcmd: Incompatible key file format." rc=1 break fi # Pass the line without a change. print -- "$keyLine" >> $newKeyFile checkForErrors "writing to file $newKeyFile" $? elif [[ $keyLine = "keyGenNumber="* ]] then if [[ -n $keyGenNumberFound ]] then # Multiple key generation numbers found. print -u2 "$mmcmd: Multiple lines with the keyGenNumber parameter found." rc=1 break else keyGenNumberFound=yes fi # Pass the line without a change. print -- "$keyLine" >> $newKeyFile checkForErrors "writing to file $newKeyFile" $? elif [[ $keyLine = "keyDigest="* ]] then if [[ -n $keyDigestFound ]] then # Multiple keyDigest lines found. print -u2 "$mmcmd: Multiple lines with the keyDigest parameter found." rc=1 break else keyDigestFound=yes fi # Pass the line without a change. print -- "$keyLine" >> $newKeyFile checkForErrors "writing to file $newKeyFile" $? elif [[ $keyLine = "publicKey=" ]] then if [[ -n $publicKeyFound ]] then # Multiple public keys found. print -u2 "$mmcmd: Multiple lines with the publicKey parameter found." rc=1 break else publicKeyFound=yes fi # Pass the line without a change. print -- "$keyLine" >> $newKeyFile checkForErrors "writing to file $newKeyFile" $? elif [[ $keyLine = "certificate=" ]] then if [[ -n $certificateFound ]] then # Multiple certificates found. print -u2 "$mmcmd: Multiple lines with the certificate parameter found." rc=1 break else certificateFound=yes fi # Pass the line without a change. print -- "$keyLine" >> $newKeyFile checkForErrors "writing to file $newKeyFile" $? else # If we have already seen the publicKey keyword, # but have not reached the certifcate section, # assume this is the actual key value. [[ -n $publicKeyFound && -z $certificateFound ]] && \ publicKeyValueFound=yes # If we have already seen the certificate keyword, # and have moved beyond the key value, # assume this is the actual certificate. [[ -n $publicKeyValueFound && -n $certificateFound ]] && \ certificateValueFound=yes # All other lines are echoed without a change. print -- "$keyLine" >> $newKeyFile checkForErrors "writing to file $newKeyFile" $? fi # end of if [[ $keyLine = "keyGenNumber="* ]] done # end while read -u3 keyLine IFS="$IFS_sv" if [[ $rc -eq 0 ]] then # If we went through the entire file without a problem so far, # make sure that nothing is missing. if [[ -z $keyGenNumberFound ]] then # Missing key generation number line. print -u2 "$mmcmd: Missing line with the keyGenNumber parameter." rc=1 fi if [[ -z $genkeyFormatFound ]] then # Missing genkeyFormat line. print -u2 "$mmcmd: Missing line with the genkeyFormat parameter." rc=1 fi if [[ -z $genkeyCompatibleFormatFound ]] then # Missing genkeyCompatibleFormat line. print -u2 "$mmcmd: Missing line with the genkeyCompatibleFormat parameter." rc=1 fi if [[ -z $certificateFound ]] then # Missing certificate header line. print -u2 "$mmcmd: Missing line with the certificate parameter." rc=1 fi if [[ -z $certificateValueFound ]] then # Missing certificate value. print -u2 "$mmcmd: Missing certificate value." rc=1 fi if [[ -z $publicKeyFound ]] then # Missing public key header line. print -u2 "$mmcmd: Missing line with the publicKey parameter." rc=1 fi if [[ -z $publicKeyValueFound ]] then # Missing public key value. print -u2 "$mmcmd: Missing public key value." rc=1 fi fi # end of if [[ $rc -eq 0 ]] if [[ $rc -ne 0 ]] then # The input key file has invalid format. print -u2 "$mmcmd: File $keyFile has invalid format." $rm -f $newKeyFile fi return $rc } #----- end of function verifyPublicKeyFile ------------------- #################################################################### # # Function: Remove all mmfs-related system files. Remove from # /etc/filesystems all stanzas for mmfs file systems. # Remove /dev entries and mount points. # # Input: None # # Output: None # # Returns: 0 - errors are ignored # #################################################################### function removeFromCluster # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGremoveFromCluster ]] && set -x $mmTRACE_ENTER "$*" typeset rc=0 typeset fsLine # Generate a list of the file systems that are currently # present, depending on the operating system, in the local # /etc/filesystems or /etc/fstab file. Ignore any errors. getCurrentStanzaList $oldstanza > /dev/null 2>&1 # Figure out the major number value for the /dev entries. [[ $osName = Linux ]] && checkVfsNumber # Remove the device name and mount point for any file system # that does not belong to this nodeset any more. exec 3<&- exec 3< $oldstanza read -u3 fsLine # Skip the header line from the lsfs output. while read -u3 fsLine do # Parse the line. The first field is the file system # mount point. The second field is the device name. # We are not interested in the rest of the information. IFS=":" set -f ; set -- $fsLine ; set +f IFS="$IFS_sv" [[ -z $1 || -z $2 ]] && continue removeMountPoint $2 $1 $currentMajorNumber >/dev/null 2>&1 done # Remove all mmfs stanzas that are currently present in /etc/filesystems. removeAllStanzas $newstanza >/dev/null 2>&1 [[ $? -ne 0 ]] && return 0 $cp $newstanza $etcFilesystems $mmsync $etcFilesystems # Remove the mmfs entry from /etc/auto_mount. [[ $osName = AIX ]] && clearAutomountDefine # Ensure the sdrserv daemon is not running on this node. killSdrServ >/dev/null 2>&1 # Remove all GPFS system files. $rm -rf $mmsdrfsFile $mmfsNodeData $mmSdrLockExp $nsdmap $nsdpvol \ ${mmfsEnvLevel}+([0-9]) ${mmsdrfsGen}+([0-9]) $mmfscfg \ $startupMountFile $mmDirectMap ${mmfsVfsNumber}+([0-9]) \ ${sslDir}/* ${mmbackupDir}/* ${mmpmonDir}/* \ ${mmfsNewKeyLevel}+([0-9]) ${mmfsCommittedKeyLevel}+([0-9]) return 0 } #----- end of function removeFromCluster ------------------- ########################################################################### # # Function: Verifies that the daemon is inactive on a set of nodes. # Optionally, marks the running_cmnd attribute of the Gpfs # object which prevents the daemon from starting on any node. # # Input: $1 - file with the reliable hostnames of the nodes to verify # $2 - executing command (optional) # # Output: none # # Returns: 0 - Daemon inactive on all nodes # 1 - Error detected, or daemon active on one or more nodes # ########################################################################### function verifyDaemonInactive # [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGverifyDaemonInactive ]] && set -x $mmTRACE_ENTER "$*" typeset nodefile=$1 typeset commandName=$2 typeset mmcommonOutput nodeName result junk errMsgLine skipThisNode typeset errorFound="" typeset rc=0 ####################################################################### # If requested, lock the gpfs object to prevent daemons from starting. ####################################################################### if [[ -n $commandName ]] then [[ $getCredCalled = no ]] && getCred setRunningCommand "$commandName" $primaryServer checkForErrors setRunningCommand $? gpfsLocked=yes fi ####################################################################### # If the MMBYPASSDAEMONDOWN variable is set, assume GPFS is down on # all relevant nodes. This should be used only when the user is in # full control of the system and knows the state of GPFS on all nodes. # Intended for testing purposes only. ####################################################################### if [[ -n $MMBYPASSDAEMONDOWN ]] then $mmTRACE_EXIT "rc=0 verifyDaemonInactive bypassed" return 0 fi ################################################################## # Make sure that the daemon is not running on any of the nodes # that are listed in the input node names file. ################################################################## $mmcommon onall $nodefile $unreachedNodes active >$tmpfile 2>&1 rc=$? if [[ ! -s $tmpfile ]] then # We didn't even get out of this box. # Hopefully, mmcommon has issued some message. checkForErrors "verifyDaemonInactive: mmcommon onall" $rc else # If this is a single node cluster, prepend the node name to # the front of each record in tmpfile, since mmcommon onall # does not use mmdsh for single node clusters. if [[ $MMMODE = single ]] then $sed "s/^/$ourNodeName: /" $tmpfile > $tmpfile2 $mv $tmpfile2 $tmpfile checkForErrors "mv $tmpfile2 $tmpfile" $? fi # end of if [[ $MMMODE = single ]] # Parse the output from the daemon active test. $rm -f $errMsg $tmpfile2 exec 3<&- exec 3< $tmpfile while read -u3 mmcommonOutput do set -f ; set -- $mmcommonOutput ; set +f nodeName=$1 result=$2 junk=$3 if [[ $result = "active" && -z $junk ]] then errorFound=yes # MMFS is still active on this node. printErrorMsg 63 $mmcmd "${nodeName%:}" # Add the name to the list of nodes for which we have a definitive answer. print -- "$nodeName" >> $tmpfile2 checkForErrors "writing to file $tmpfile2" $? elif [[ $result = "down" && -z $junk ]] then # This is the correct result. # Add the name to the list of nodes for which we have a definitive answer. print -- "$nodeName" >> $tmpfile2 checkForErrors "writing to file $tmpfile2" $? elif [[ -n $result ]] then # Filter out 'no response' type messages. result=$(print -- "$mmcommonOutput" | \ $grep -v -e '2501-006' -e 'No response' \ -e '2501-018' -e 'Connection refused' \ -e '6027-1617' -e 'There are no available nodes on which to run') if [[ -z $result ]] then # If the node is down, the daemon must be down too. # Add the name to the list of nodes for which we have a definitive answer. print -- "$nodeName" >> $tmpfile2 checkForErrors "writing to file $tmpfile2" $? else # Unexpected output; must be an error. # Collect the lines in a separate file for later. print -- "$mmcommonOutput" >> $errMsg checkForErrors "writing to file $errMsg" $? fi fi # end of if [[ $result = "active" && -z $junk ]] done # end of while read -u3 mmcommonOutput fi # end of if [[ ! -s $tmpfile ]] # Make a pass through the errMsg file and remove all lines that come # from nodes for which we have a definitive answer. Any such lines # are most likely warning messages that can be ignored. The goal is # to avoid failing a command if we know for sure that a node is down. if [[ -s $errMsg && -s $tmpfile2 ]] then $rm -f $errMsg2 $touch $errMsg2 exec 3<&- exec 3< $errMsg while read -u3 errMsgLine do set -f ; set -- $errMsgLine ; set +f nodeName=$1 # See if this line comes from a node that can be skipped. skipThisNode=$($grep -w $nodeName $tmpfile2) if [[ -z $skipThisNode ]] then # Must be a legitimate error. # Collect the lines in a separate file. print -- "$errMsgLine" >> $errMsg2 checkForErrors "writing to file $errMsg2" $? errorFound=yes fi done # end of while read -u3 errMsgLine $mv $errMsg2 $errMsg checkForErrors "mv $errMsg2 $errMsg" $? fi # end of if [[ -s $errMsg && -s $tmpfile2 ]] if [[ -n $errorFound || ! -s $tmpfile2 ]] then [[ -s $errMsg ]] && $cat $errMsg 1>&2 $rm -f $errMsg $errMsg2 return 1 fi $rm -f $errMsg $errMsg2 return 0 } #----- end of function verifyDaemonInactive ------------ ############################################################# # # Function: Retrieve the node number and reliable hostname # for the local node. # # Input: $1 - name of mmsdrfs file to use (optional) # # Output: None. The global variables ourNodeNumber and # ourNodeName are set. # # Returns: Zero if successful, non-zero otherwise. # ############################################################# function getLocalNodeData # [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetLocalNodeData ]] && set -x $mmTRACE_ENTER "$*" typeset sdrfsFile=$1 typeset nodeData typeset nodeLine="" [[ ! -f $sdrfsFile ]] && sdrfsFile=$mmsdrfsFile # If the mmfsNodeData file is missing, try to restore it. if [[ ! -f $mmfsNodeData ]] then if [[ $MMMODE = lc ]] then # Try to find the node number by checking all local addresses. nodeData=$(guessLocalNodeNumber) set -f ; set -- $nodeData ; set +f ourNodeNumber=$1 ourNodeName=$3 elif [[ $MMMODE = single ]] then ourNodeName=$($hostname) ourNodeNumber=1 else # unknown cluster type checkForErrors "getLocalNodeData: Unknown environmentType $MMMODE" 1 fi # end if [[ $MMMODE = lc ]] # Find the line that describes the local node # and create the mmfsNodeData file. [[ -f $sdrfsFile ]] && \ $awk -F: ' \ /':$MEMBER_NODE:'/ { \ if ($'$NODE_NUMBER_Field' == "'$ourNodeNumber'") { \ { print $0 >> "'$mmfsNodeData'" } \ {exit 0} \ } \ } \ ' $sdrfsFile # If the mmfsNodeData file is still missing, give up. # Return the locally-determined node number and name # and hope for the best. if [[ ! -f $mmfsNodeData ]] then if [[ -z $ourNodeName || -z $ourNodeNumber ]] then checkForErrors "getLocalNodeData: Missing $mmfsNodeData" 1 else ourShortName=${ourNodeName%%.*} return 0 fi fi fi # end if [[ ! -f $mmfsNodeData ]] # Read and parse the first line of the mmfsNodeData file. nodeLine=$($head -1 $mmfsNodeData) IFS=':' set -f ; set -- $nodeLine ; set +f IFS="$IFS_sv" # At this point $2 is the line type (should be $MEMBER_NODE), # $5 is the node number, and $8 is the reliable node name. # Perform a quick sanity check. if [[ $2 != $MEMBER_NODE ]] then # Corrupted mmfsNodeData file corruptedSdrFileExit 129 "$nodeLine" fi # Set the global variables. ourNodeNumber=$5 ourShortName=$6 ourNodeName=$8 return 0 } #----- end of function getLocalNodeData -------------------- #################################################################### # # Function: Make an educated guess as to what this node's # node number and reliable name might be. # # Input: None # # Output: node number, IP address, and reliable node name # # Returns: Zero if successful, non-zero otherwise. # #################################################################### function guessLocalNodeNumber # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGguessLocalNodeNumber ]] && set -x $mmTRACE_ENTER "$*" typeset ipa ipaList sdrfsLine typeset rc=0 # If the mmsdrfs file is missing, there is nothing we can do. if [[ ! -f $mmsdrfsFile ]] then printErrorMsg 171 "guessLocalNodeNumber" "$mmsdrfsFile not found" 1 return 1 fi # Get the node's adapter information. LC_ALL=C $ifconfig -a > $adfile rc=$? if [[ $rc -ne 0 ]] then printErrorMsg 171 "guessLocalNodeNumber" "ifconfig -a" $rc return 1 fi # Prepare a list of all locally-known IP addresses. # The assumptions are that the ifconfig output is organized in stanzas. # Each stanza begins with an adapter type starting in column 1. # All other lines are indented by at least one space or tab character. # The IP addresses are prefixed with the string 'inet addr:'. if [[ $osName = Linux ]] then ipaList=$($awk ' \ / addr:/ { for ( i = 1; i <= NF; i++ ) { \ if ( $i ~ "^addr:" ) { \ { print $i } \ { next } \ } \ } \ } \ ' $adfile) rc=$? elif [[ $osName = AIX ]] then ipaList=$($awk ' \ $1 == "inet" { print $2 } \ ' $adfile) rc=$? else checkForErrors "guessLocalNodeNumber: Unknown operating system $osName" 1 fi if [[ $rc -ne 0 ]] then printErrorMsg 171 "guessLocalNodeNumber" "awk" $rc return 1 fi # Examine the mmsdrfs file and find a MEMBER_NODE line with # an IP address that matches one of the local IP addresses. # Return the corresponding node number and reliable name. IFS=":" # Change the field separator to ':'. exec 3<&- exec 3< $mmsdrfsFile while read -u3 sdrfsLine do # Parse the line. set -f ; set -A v -- - $sdrfsLine ; set +f IFS="$IFS_sv" if [[ ${v[$LINE_TYPE_Field]} = $MEMBER_NODE ]] then for ipa in $ipaList do if [[ ${ipa#addr:} = ${v[$IPA_Field]} ]] then print -- "${v[$NODE_NUMBER_Field]} ${v[$IPA_Field]} ${v[$REL_HOSTNAME_Field]}" return 0 fi done fi IFS=":" # Change the separator back to ":" for the next iteration. done # end while read -u3 sdrfsLine IFS="$IFS_sv" # Restore the default IFS settings. # If we get here, there was no matching IP address in the mmsdrfs file. return 1 } #----- end of function guessLocalNodeNumber -------------------- ######################################################################## # # Function: Verify the correctness of the mmsdrfs file. # # Input: $1 - name of mmsdrfs file # # Output: None # # Returns: 0 - no errors # nnn - error encountered # ######################################################################## function checkSdrfsFile # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGcheckSdrfsFile ]] && set -x $mmTRACE_ENTER "$*" typeset sdrfsFile=$1 typeset sdrfsLine ############################################################# # Read the mmsdrfs file a record at a time. # Stop and issue an error if an unknown line is encountered. ############################################################# exec 3<&- exec 3< $sdrfsFile while read -u3 sdrfsLine do # Parse the line. IFS=':' set -f ; set -- $sdrfsLine ; set +f IFS="$IFS_sv" case $2 in $VERSION_LINE ) : ;; $COMMENT_LINE ) : ;; $NODESET_HDR ) : ;; $MEMBER_NODE ) : ;; $SG_HEADR ) : ;; $SG_ETCFS ) : ;; $SG_MOUNT ) : ;; $SG_DISKS ) : ;; $MMFSCFG ) : ;; $REM_CLUSTER ) : ;; $REM_CLUSTER_KEY ) : ;; $AUTHORIZED_CLUSTER ) : ;; $AUTHORIZED_KEY ) : ;; $AUTHORIZED_FS ) : ;; $HSMDATA ) : ;; $HSMVERSION ) : ;; * ) # Unexpected line; corrupted sdrfs file. printErrorMsg 278 $mmcmd 135 print -u2 "$sdrfsLine" return 1 ;; esac done # end while read -u3 sdrfsLine return 0 } #----- end of function checkSdrfsFile ----------------------- ######################################################################## # # Function: Perform the actions specified as options on the # propagateSdrfsFile function call. Ignore errors. # # Input: $1 - comma-separated list of actions. # # Output: None # # Returns: 0 - no errors # nnn - error encountered # ######################################################################## function handlePropagateOptions # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGhandlePropagateOptions ]] && set -x $mmTRACE_ENTER "$*" typeset optionString=$1 typeset action # Perform additional tasks as per the propagate options parameter. # Ignore errors. It is OK if the daemon is down. IFS="," for action in $optionString do IFS="$IFS_sv" # Restore the default IFS settings. case $action in rereadnsd ) $tsctl rereadnsd > /dev/null 2>&1 listNsdPhysicalVolumes $nsdpvol 2>/dev/null ;; rereadNodeList ) $tsctl rereadNodeList > /dev/null 2>&1 ;; rereadContactNodes* ) $tsctl rereadContactNodes ${action#rereadContactNodes} > /dev/null 2>&1 ;; rmdir* ) $rmdir ${action#rmdir} > /dev/null 2>&1 ;; initLocalNodeData ) $rm -f $mmfsNodeData getLocalNodeData > /dev/null 2>&1 ;; * ) : # Unknown action; do not complain. ;; esac # end of case $action in IFS="," # Change the separator back to "," for the next iteration. done # for action in $arg4 IFS=$IFS_sv # Restore the default IFS settings. return 0 } #----- end of function handlePropagateOptions -------------------- ########################################################################### # # Function: Invoke the mmcommon pushSdr or pushSdr_async function # to propagate the specified mmsdrfs file. # # Input: $1 - type of processing: sync or async # $2 - file with the reliable hostnames of the nodes to update # $3 - name of mmsdrfs file to push # $4 - generation number of the file # $5 - additional options (e.g. rereadnsd, etc.) # # Output: None # # Returns: 0 - no errors # nnn - error encountered # ########################################################################### function propagateSdrfsFile # # [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGpropagateSdrfsFile ]] && set -x $mmTRACE_ENTER "$*" typeset processingType=$1 typeset nodefile=$2 typeset sdrfsFile=$3 typeset genNumber=$4 typeset options="$5" typeset sumOutput checksum typeset rc=0 # Give up if the node names file is empty. [[ ! -s $nodefile ]] && \ return 0 # Exclude the local host from the list of nodes. $grep -v -w $ourNodeName $nodefile > ${nodefile}PushSdr # Perform the optional tasks, if any, on the local node. [[ -n $options ]] && \ handlePropagateOptions "$options" # If the node names file had only our name in it, we are done. if [[ ! -s ${nodefile}PushSdr ]] then $rm -f ${nodefile}PushSdr return 0 fi # If the MMDONOTPUSHSDR variable is set, and there are no additional # tasks to execute, do not propagate the changes to the rest of the # nodes. The user is expected to issue mmrefresh or each node will # upgrade its environment as needed via the gpfsInit processing. # Intended for testing purposes only. if [[ -n $MMDONOTPUSHSDR && -z $options ]] then $mmTRACE_EXIT "rc=0 propagateSdrfsFile bypassed" return 0 fi # Create a copy of the mmsdrfs file to propagate. $cp $sdrfsFile ${mmsdrfsGen}${genNumber} checkForErrors "cp $newsdrfs ${mmsdrfsGen}${genNumber}" $? # Calculate the checksum. sumOutput=$($sum $sdrfsFile) checkForErrors "sum $sdrfsFile" $? set -f ; set -- $sumOutput ; set +f checksum=$1 # Propagate the mmsdrfs file. if [[ $processingType = async ]] then # Tell the user asynchronous notification starts. printErrorMsg 271 $mmcmd $mmcommon pushSdr_async \ ${nodefile}PushSdr ${mmsdrfsGen}${genNumber} $checksum "$options" & rc=$? else # Ask mmcommon to propagate the mmsdrfs file synchronously. # For now, this path is taken only as a result of daemon-initiated # processing. There is no need for informational messages. $mmcommon pushSdr \ ${nodefile}PushSdr ${mmsdrfsGen}${genNumber} $checksum "$options" rc=$? # Cleanup files created by this function. # In the async path, this is done by the mmcommon pushSdr_async command. $rm -f ${nodefile}PushSdr ${mmsdrfsGen}${genNumber} fi $mmTRACE_EXIT "$rc" return $rc } #----- end of function propagateSdrfsFile ---------------------- ######################################################################## # # Function: Invoke the mmcommon pushKey or pushKey_async function # to propagate the specified mmsdrfs and auth key files. # # Input: $1 - type of processing: sync or async # $2 - file with the reliable hostnames of the nodes to update # $3 - name of mmsdrfs file to push # $4 - generation number of the file # $5 - name of the key file to push # $6 - generation number of the key file # $7 - additional options # # Output: None # # Returns: 0 - no errors # nnn - error encountered # ######################################################################## function propagateKeyFile # # [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGpropagateKeyFile ]] && set -x $mmTRACE_ENTER "$*" typeset processingType=$1 typeset nodefile=$2 typeset sdrfsFile=$3 typeset genNumber=$4 typeset keyFile=$5 typeset keyGenNumber=$6 typeset options="$7" typeset sumOutput checksum keyChecksum keyDataFile typeset rc=0 # Give up if the node names file is empty. [[ ! -s $nodefile ]] && \ return 0 # Exclude the local host from the list of nodes. $grep -v -w $ourNodeName $nodefile > ${nodefile}PushKey # Give up if the node names file had only our name in it. if [[ ! -s ${nodefile}PushKey ]] then $rm -f ${nodefile}PushKey return 0 fi # Create a copy of the mmsdrfs file to propagate. $cp $sdrfsFile ${mmsdrfsGen}${genNumber} checkForErrors "cp $newsdrfs ${mmsdrfsGen}${genNumber}" $? # Calculate the checksum. sumOutput=$($sum $sdrfsFile) checkForErrors "sum $sdrfsFile" $? set -f ; set -- $sumOutput ; set +f checksum=$1 # If the file is not already in the stage directory, copy it there. # This is the case with old format mmauth genkey files. The new # format files are already put in the stage directory by mmauth. if [[ $keyFile = ${genkeyData}* ]] then keyDataFile=$keyFile else keyDataFile=${mmauthKeyGen}${keyGenNumber} umask 077 $cp $keyFile $keyDataFile checkForErrors "cp $keyFile $keyDataFile" $? umask $UMASK_sv fi # Calculate the checksum. sumOutput=$($sum $keyFile) checkForErrors "sum $keyFile" $? set -f ; set -- $sumOutput ; set +f keyChecksum=$1 # Propagate the mmsdrfs file. if [[ $processingType = async ]] then # Tell the user asynchronous notification starts. printErrorMsg 271 $mmcmd $mmcommon pushKey_async \ ${nodefile}PushKey ${mmsdrfsGen}${genNumber} $checksum \ $keyDataFile $keyChecksum "$options" & rc=$? else # Ask mmcommon to propagate the mmsdrfs file synchronously. $mmcommon pushKey \ ${nodefile}PushKey ${mmsdrfsGen}${genNumber} $checksum \ $keyDataFile $keyChecksum "$options" rc=$? # Cleanup files created by this function. # In the async path, this is done by the mmcommon pushKey_async command. $rm -f ${nodefile}PushKey ${mmsdrfsGen}${genNumber} [[ $keyDataFilea != ${genkeyData}* ]] && $rm -f $keyDataFile fi $mmTRACE_EXIT "$rc" return $rc } #----- end of function propagateKeyFile ---------------------- #################################################################### # # Function: Start a new automounter daemon. # # Input: $1 - value of the automountDir mmfs.cfg parameter # # Output: None # # Returns: 0 - success # 1 - automount command failed # #################################################################### function startAutomounter # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGstartAutomounter ]] && set -x $mmTRACE_ENTER "$*" typeset automountDir=$1 typeset pid typeset rc=0 # Check whether the automount command is installed. if [[ ! -x $automount ]] then # Warn the user that automount mounting will not work. printErrorMsg 321 $mmcmd $automount return 1 fi # If AIX, just run the automount command and return. if [[ $osName = AIX ]] then checkAutomountDefine $automount return 0 fi # In the Linux environment we need to do a few more things. # Ensure the automount directory exists. $mkdir -p $automountDir # Determine whether the automounter is already running and # whether it was started with the correct directory parameter. pid=$($ps -eo "pid args" | \ $awk ' \ /mmdynamicmap/ && ! /this process/ { \ if ( $3 == "'$automountDir'" ) { \ { pid = "OK" } \ { exit } \ } else { \ { pid = $1 } \ } \ } \ END { print pid } \ ') # If the automounter is already running, but the directory is # not the right one, kill the current process before restarting. if [[ -n $pid && $pid != "OK" ]] then $kill -KILL $pid $sleep 5 pid="" fi # Start the new automount daemon. if [[ -z $pid ]] then $automount $automountDir program $mmdynamicmap rc=$? fi return $rc } #----- end of function startAutomounter ------------------ ########################################################################## # # Function: Retrieve the mmsdrserv TCP port number. # # Input: $1 - mmsdrfs file to use # # Output: The mmsdrserv TCP port number. # # Returns: Zero if successful, non-zero otherwise. # ########################################################################## function getSdrservPort # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGgetSdrservPort ]] && set -x $mmTRACE_ENTER "$*" typeset sdrfs=$1 typeset sdrservPort [[ ! -f $sdrfs ]] && \ checkForErrors "getSdrservPort: missing file $sdrfs" 1 # If the mmsdrserv TCP port number is explicitly set, return # its value. Otherwise, return the current tscTcpPort value. sdrservPort=$($awk -F: ' \ /^'$HOME_CLUSTER:$NODESET_HDR:'/ { \ if ( $'$GETOBJECT_PORT_Field' == "" ) { \ { print $'$TCP_PORT_Field' } \ } else { \ { print $'$GETOBJECT_PORT_Field' } \ } \ { exit } \ } \ ' $sdrfs) checkForErrors awk $? [[ -z $sdrservPort ]] && sdrservPort=$defaultTcpPort print -- $sdrservPort return 0 } #----- end of function getSdrservPort ------------------ #################################################################### # # Function: Terminate the mmsdrserv daemon. # # Input: None # # Output: None # # Returns: 0 - mmsdrserv terminated. # 78 (ETIMEDOUT) - still runing after 30 seconds. # #################################################################### function killSdrServ # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGkillSdrServ ]] && set -x $mmTRACE_ENTER "$*" typeset pid rc typeset -i waitPeriod # Find the pid of the currently-running mmsdrserv daemon. pid=$($cat $mmsdrservPid 2>/dev/null) # If mmsdrserv is currently running, kill it. if [[ -n $pid ]] then $kill -TERM $pid $sleep 1 fi # Wait until all mmsdrserv threads have disappeared. # If necessary, send more SIGTERM signals. while true do # See if there are any mmsdrserv threads still running. pid=$($ps -eo "pid args" | $awk '/mmsdrserv / && !/this process/ {print $1}') if [[ -z $pid ]] then # All mmsdrserv threads are gone; nothing more to do. rc=0 break elif [[ $waitPeriod -gt 30 ]] then # mmsdrserv did not terminate in 30 seconds; give up. rc=$MM_TimedOut break else # Kill the remaining mmsdrserv threads. $kill -TERM $pid $sleep 1 (( waitPeriod += 1 )) fi # end of if [[ -z $pid ]] done # end while true return $rc } #----- end of function killSdrServ ------------------- #################################################################### # # Function: Ensure the mmsdrserv daemon is running and is aware # of the value of the latest Gpfs object. # # Input: $1 - mmsdrserv port number or CURRENT # # Output: None # # Returns: 0 - mmsdrserv started. # non-zero - problem encountered. # #################################################################### function startSdrServ # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGstartSdrServ ]] && set -x $mmTRACE_ENTER "$*" typeset sdrservPort=$1 typeset rc typeset maxThreads timeout # If the mmsdrserv TCP port number is not known, figure it out. if [[ $sdrservPort = CURRENT ]] then sdrservPort=$(getSdrservPort $mmsdrfsFile) checkForErrors getSdrservPort $? fi # If the mmsdrserv TCP port number is set to 0, return. # The user does not want to use the mmsdrserv daemon. [[ $sdrservPort -eq 0 ]] && return 0 # Find the value of the mmsdrservTimeout parameter. timeout=$(showCfgValue mmsdrservTimeout) [[ -z $timeout ]] && timeout=10 # Find the value of the mmsdrservWorkerPool parameter. maxThreads=$(showCfgValue mmsdrservWorkerPool) [[ -z $maxThreads ]] && maxThreads=10 # Start the mmsdrserv daemon. $mmsdrserv $sdrservPort $timeout $maxThreads $mmsdrservLog rc=$? $mmTRACE "mmsdrserv $sdrservPort $timeout $maxThreads $mmsdrservLog returned rc=$rc" # # If something goes wrong, decide what to do. # if [[ $rc -ne 0 ]] # then # # 128) # ESDR_OK "Ok" # 129) # ESDR_CONN_REJECTED "Connection rejected by the server" # 130) # ESDR_BAD_ATTACHMENT "Invalid attachment length" # 131) # ESDR_INVALREQUEST "Unknown request type" # 132) # ESDR_NOT_SERVER "Node is not a server" # 133) # ESDR_NOT_PRIMSERVER "Node is not a primary server" # 134) # ESDR_NOT_SECSERVER "Node is not a secondary server" # 135) # ESDR_SECSERVER_DOWN "Secondary server could not be reached" # 136) # ESDR_BAD_HOSTNAME "Hostname resolution failure" # 137) # ESDR_NODEDATA_READ_ERR "Error reading mmfsNodeData from disk" # 138) # ESDR_SDRFS_READ_ERR "Error reading mmsdrfs from disk" # 139) # ESDR_SDRFS_WRITE_ERR "Error writing mmsdrfs to disk" # 140) # ESDR_SDRFS_FORMAT_ERR "Format of the mmsdrfs file is invalid" # 141) # ESDR_OBJ_FORMAT_ERR "Format of the GPFS Object is invalid" # 142) # ESDR_NOMEM "Out of memory" # 143) # ESDR_CONNREFUSED "Unable to connect to the server" # 144) # ESDR_CONNRESET "Connection shut down" # 145) # ESDR_ADDRINUSE "Port already in use" # 146) # ESDR_SERVER_ERR "Internal server error" # 147) # ESDR_DAEMON_CRASHED "Daemon crashed or was killed with a signal" # 148) # ESDR_SERVER_DISABLED "Daemon disabled by request or inconsistency" # fi [[ $rc = 128 ]] && rc=0 # ESDR_OK is 128 return $rc } #----- end of function startSdrServ --------------------- #################################################################### # # Function: Ensure the mmsdrserv daemon is running and is aware # of the value of the latest Gpfs object. # # Input: $1 - Gpfs object or CURRENT # $2 - mmsdrserv host name or DEFAULT # $3 - mmsdrserv port number or CURRENT # # Output: None # # Returns: 0 - mmsdrserv reset. # non-zero - problem encountered. # #################################################################### function resetSdrServ # { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGresetSdrServ ]] && set -x $mmTRACE_ENTER "$*" typeset gpfsObject=$1 typeset serverHost=$2 typeset sdrservPort=$3 typeset rc timeout typeset needToKillSdrServ="" typeset needToStartSdrServ="" # If the mmsdrserv TCP port number is not known, figure it out. if [[ $sdrservPort = CURRENT ]] then sdrservPort=$(getSdrservPort $mmsdrfsFile) checkForErrors getSdrservPort $? fi # If the mmsdrserv TCP port number is set to 0, return. # The user does not want to use the mmsdrserv daemon. [[ $sdrservPort -eq 0 ]] && return 0 # If the mmsdrserv server host name is not known, # assume the primary configuration data server. [[ $serverHost = DEFAULT ]] && serverHost=$primaryServer # If the value of the Gpfs object is not known, # determine it from the current mmsdrfs file. [[ $gpfsObject = CURRENT ]] && gpfsObject=$(getGpfsObjectFromFile) # Find the value of the mmsdrservTimeout parameter. timeout=$(showCfgValue mmsdrservTimeout) [[ -z $timeout ]] && timeout=10 # Issue a setObj request. This will ensure the Gpfs object # is updated on both configuration servers and will ensure # that mmsdrserv will return the correct mmsdrfs file. #esjdbg # Show msgs during development and testing only. #esjdbg $mmsdrcli setObj $serverHost $sdrservPort $timeout "$gpfsObject" $mmsdrcli setObj $serverHost $sdrservPort $timeout "$gpfsObject" 2>>$mmsdrservLog rc=$? $mmTRACE "mmsdrcli setObj $gpfsObjectInfo returned rc=$rc" # If something goes wrong, decide what to do. # - If mmsdrserv is not running, restart the daemon. # - If something else goes wrong, first kill the current # incarnation of the daemon and then restart it. if [[ $rc -ne 0 ]] then [[ $rc = 128 ]] && rc=0 # ESDR_OK is 128 needToKilltSdrServ=yes : #esjx consider using case in place of the if. # 128) # ESDR_OK "Ok" # 129) # ESDR_CONN_REJECTED "Connection rejected by the server" # 130) # ESDR_BAD_ATTACHMENT "Invalid attachment length" # 131) # ESDR_INVALREQUEST "Unknown request type" # 132) # ESDR_NOT_SERVER "Node is not a server" # 133) # ESDR_NOT_PRIMSERVER "Node is not a primary server" # 134) # ESDR_NOT_SECSERVER "Node is not a secondary server" # 135) # ESDR_SECSERVER_DOWN "Secondary server could not be reached" # 136) # ESDR_BAD_HOSTNAME "Hostname resolution failure" # 137) # ESDR_NODEDATA_READ_ERR "Error reading mmfsNodeData from disk" # 138) # ESDR_SDRFS_READ_ERR "Error reading mmsdrfs from disk" # 139) # ESDR_SDRFS_WRITE_ERR "Error writing mmsdrfs to disk" # 140) # ESDR_SDRFS_FORMAT_ERR "Format of the mmsdrfs file is invalid" # 141) # ESDR_OBJ_FORMAT_ERR "Format of the GPFS Object is invalid" # 142) # ESDR_NOMEM "Out of memory" # 143) # ESDR_CONNREFUSED "Unable to connect to the server" # 144) # ESDR_CONNRESET "Connection shut down" # 145) # ESDR_ADDRINUSE "Port already in use" # 146) # ESDR_SERVER_ERR "Internal server error" # 147) # ESDR_DAEMON_CRASHED "Daemon crashed or was killed with a signal" # 148) # ESDR_SERVER_DISABLED "Daemon disabled by request or inconsistency" fi # end of if [[ $rc -ne 0 ]] # If necessary, kill the current instance of the mmsdrserv daemon. if [[ -n $needToKilltSdrServ ]] then # call killSdrServ on both the primary and backup server nodes. killSdrServ >> $mmsdrservLog 2>&1 [[ -n $backupServer ]] && \ $mmcommon on1 $backupServer killSdrServ >> $mmsdrservLog 2>&1 fi # end of if [[ -n $needToKilltSdrServ ]] # If the setObj call fails because the mmsdrserv is not running, # restart the mmsdrserv daemon. This will automatically load the # correct Gpfs object. # # Note: This should be done only if we are already on the primary server. esjxx ??? # if [[ -n $needToStartSdrServ && $serverHost = $primaryServer ]] then startSdrServ CURRENT >> $mmsdrservLog 2>&1 rc=$? [[ -n $backupServer && $rc -eq 0 ]] && \ $mmcommon on1 $backupServer startSdrServ CURRENT >> $mmsdrservLog 2>&1 fi # end of if [[ -n $needToStartSdrServ ]] return $rc } #----- end of function resetSdrServ -------------------- ########################################################################## # # Function: This function produces the data needed by the AIX lspv # command to indicate which physical disks are in use by GPFS. # # Note: This code should also work on nodes that have GPFS # installed but are not members of a cluster, i.e., # when there is no mmsdrfs file on the node. # # Input: $1 - name of file in which to store the NSD information # # Output: Each record of the output file consists of the following # blank-separated fields: # # physicalVolumeName nsdName # # Returns: 0 - file created successfully # If an error is encountered, this function invokes the # cleanupAndExit routine which terminates execution. # ########################################################################## function listNsdPhysicalVolumes { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGlistNsdPhysicalVolumes ]] && set -x $mmTRACE_ENTER "$*" typeset outputFile=$1 typeset nsdLine nsdId deviceName pvolName sdrfsLine # Get a list containing the NSD id and the device name for each NSD. $tspreparedisk -S > $diskNamesFile checkForErrors "listNsdPhysicalVolumes: tspreparedisk -S" $? # For each NSD reported by tspreparedisk, create a line listing # the underlying physical volume and the NSD name. This information # is obtained from the tspreparedisk output and the mmsdrfs file. $rm -f $volGroupFile exec 3<&- exec 3< $diskNamesFile while read -u3 nsdLine do # Parse the line. set -f ; set -- $nsdLine ; set +f nsdId=$1 deviceName=$2 # Skip over the tspreparedisk status line when it is encountered. [[ $nsdId = tspreparedisk* ]] && continue # Depending on the environment, strip /dev/r or just /dev/ # from the beginning of the device name. if [[ $osName = AIX ]] then # Strip the leading "/dev/r" from the device name. pvolName=${deviceName##+(/)dev+(/)r} else # Strip the leading "/dev/" from the device name. pvolName=${deviceName#/dev/} fi sdrfsLine="" [[ -f $mmsdrfsFile ]] && sdrfsLine=$($grep -e ":$nsdId:nsd:" $mmsdrfsFile) if [[ -n $sdrfsLine ]] then # The disk appears in the mmsdrfs file. Parse the line and print # a line of output showing the physical disk name and the nsd name. IFS=":" # Change the field separator to ':'. set -f ; set -A v -- - $sdrfsLine ; set +f IFS="$IFS_sv" # Restore the default IFS settings. print -- "$pvolName ${v[$DISK_NAME_Field]}" >> $volGroupFile else # The disk does not appear in the mmsdrfs file. # Print the device name and "gpfs" to indicate that this is a gpfs disk. print -- $pvolName gpfs >> $volGroupFile fi done # end of while read -u3 nsdLine do # Everything seems to have gone ok so far. # Rename the file so that the caller can find it. $touch $volGroupFile $mv $volGroupFile $outputFile checkForErrors "mv $volGroupFile $outputFile" $? # Make sure that non-privileged commands can read this file. $chmod a+r $outputFile >/dev/null 2>&1 $rm -f $diskNamesFile $volGroupFile return 0 } #----- end of function listNsdPhysicalVolumes ------------------ ########################################################################### # # Function: Runs the specified command on the local node # # Input: $1 - file system that cannot be mounted, or _NO_MOUNT_CHECK_ # $2 - scope of mount checking # $3 - symbolic link indicator: _LINK_ or _NO_LINK_ # $4 - command to execute; must be mmremote or tsxxxxxxx # $5 - argument list for the remote command # # Output: Depends on the particular command # # Returns: 0 - command completed successfully # return code from the command # ########################################################################### function runLocalCommand # [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGrunLocalCommand ]] && set -x $mmTRACE_ENTER "$*" typeset fsToCheck=$1 typeset scope=$2 typeset sLink=$3 typeset command=$4 shift 4 # Move past required parms. typeset args="$@" typeset rc=0 typeset cmndDir=$mmcmdDir typeset fqDeviceName localDevName headerMsg Coption [[ $sLink = $LINK ]] && cmndDir="${cmndDir}/${links}" if [[ $fsToCheck = $NO_MOUNT_CHECK ]] then # Check whether the daemon is up and accepting commands. # The caller is not interested in mounted file systems. $tsstatus -1 >$errMsg 2>&1 rc=$(remapRC $?) if [[ $rc -ne 0 ]] then if [[ -s $errMsg && $rc -ne $MM_DaemonDown && $rc -ne $MM_QuorumWait ]] then $cat $errMsg 1>&2 fi $rm -f $errMsg return $rc fi $rm -f $errMsg else # Process the mount check scope parameter. It determines the value, # if any, for the -C option on the tsstatus call that will follow. if [[ $scope = $CHECK_ALL || $scope = NULL ]] then Coption="" else Coption="-C $scope" fi # Find out whether anybody has the file system mounted. # # - The only acceptable return code is 2 (the daemon # is up but the file system is not mounted anywhere). # # - Return code 0 means that the daemon is up and that # the file system is mounted on at least one node. # The list of node names is returned by tsstatus. # # Note that the tsstatus output comes from the stripe group # manager node which always resides in the cluster that owns # the file system. In the case of remote file systems, the # manager node has no knowledge about the local name for the # file system. Therefore, we have to intercept the tsstaus # output and replace the header line with one that is more # meaningful for the local cluster. To keep the code simple, # we do this for both local and remote file systems. # LC_ALL=C $tsstatus -m $fsToCheck $Coption >$tmpfile 2>$errMsg rc=$(remapRC $?) if [[ $rc -ne 2 ]] then if [[ -s $tmpfile ]] then # Construct a message to be used in place of the first line # in the tsstatus output. If checking a remote file system, # the local device name can be passed as a command argument. if [[ $command = $MOUNT_CHECK_ONLY && $args = ldev=* ]] then localDevName=${args#ldev=} headerMsg=$(printInfoMsg 521 "$localDevName" "($fsToCheck)") else headerMsg=$(printInfoMsg 522 "$fsToCheck") fi # Filter the result from the tsstatus command and replace # the header line with our own version. $awk ' \ /File system/ && /is managed by node/ && /and mounted on:/ { \ { print "'"$headerMsg"'" } \ { next } \ } \ { print $0 } \ ' $tmpfile else # tststatus -m returned no data. Show any error output produced. if [[ -s $errMsg && $rc -ne $MM_DaemonDown && $rc -ne $MM_QuorumWait ]] then $cat $errMsg 1>&2 fi fi # end of if [[ -s $tmpfile ]] $rm -f $errMsg [[ $rc -eq 0 ]] && rc=$MM_FsMounted return $rc fi # end of if [[ $rc -ne 2 ]] $rm -f $errMsg # If this is only a check for mounted file system, force the current # fs manager node to resign (if requested) and return. Note that at # this point we know that the file system is not mounted anywhere. if [[ $command = $MOUNT_CHECK_ONLY ]] then [[ $args = $RESIGN_FSMGR ]] && \ $tschmgr $fsToCheck -r >/dev/null 2>&1 return 0 fi fi # end of if [[ $fsToCheck = $NO_MOUNT_CHECK ]] # Do some command specific processing. if [[ $command = tscrfs ]] then # If executing tscrfs, make sure the file system name # does not conflict with any existing entry in /dev. set -f ; set -- $args ; set +f fqDeviceName=$1 if [[ -e $fqDeviceName ]] then # If there is a /dev entry by the same name, make sure # the GPFS environment is up to date before giving up. gpfsInitOutput=$(gpfsInit nolock 2>/dev/null) if [[ -e $fqDeviceName ]] then # Device already exists on node. printErrorMsg 530 $mmcmd $fqDeviceName $ourNodeName return $MM_FsExists fi fi fi # end of if [[ $command = tscrfs ]] # Fix quotes around disk descriptor lists, if any. args=$(print -- "$args" | $tr '"' ' ') # Execute the requested command. $cmndDir/$command $args rc=$(remapRC $?) # Return with the result from the ts command. return $rc } #----- end of function runLocalCommand ----------------------- ###################################################################### # # Function: Runs the specified command on the local node # # Input: $1 - symbolic link indicator: _LINK_ or _NO_LINK_ # $2 - command to execute; this command should not require # the daemon to be up, as we do not check for this. # This is the difference between runLocalCommand # and this routine (runLocalCommand2). # $3 - argument list for the remote command # # Output: Depends on the particular command # # Returns: 0 - command completed successfully # return code from the command # ###################################################################### function runLocalCommand2 # [] { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGrunLocalCommand2 ]] && set -x $mmTRACE_ENTER "$*" typeset sLink=$1 typeset command=$2 shift 2 # Move past required parms. typeset args=$@ typeset rc=0 typeset cmndDir=$mmcmdDir [[ $sLink = $LINK ]] && cmndDir="${cmndDir}/${links}" # Fix quotes around disk descriptor lists, if any. args=$(print -- "$args" | $tr '"' ' ') # Execute the requested command. $cmndDir/$command $args rc=$(remapRC $?) # Return with the result from the command. return $rc } #----- end of function runLocalCommand2 ---------------------- ############################################################################# # # Function: Wrapper function for executing commands on one or more nodes. # Provides similar functionality to the corresponding mmcommon # verbs. See function comments for more details. # # Input: $1 - type of execution (on1, onNode, onall, etc.) # # Output: Depends on the specific command being executed. # # Returns: Return code from the command being executed. # ############################################################################# function run { typeset sourceFile="mmsdrfsdef.sh" [[ -n $DEBUG || -n $DEBUGrun ]] && set -x $mmTRACE_ENTER "$*" typeset argc=$# typeset where=$1 typeset target=$2 typeset arg2="$2" typeset arg3="$3" typeset arg4="$4" typeset arg5="$5" typeset arg6="$6" typeset arg7="$7" typeset arg8="$8" typeset arg9="$9" typeset rc=0 typeset operands remoteCommand rcInfo remoteVerb arguments typeset nodeName chosenNode [[ -z $ourNodeName ]] && getLocalNodeData case $where in #--------------------------------------------------- on1 ) # run on1 [] #--------------------------------------------------- if [[ $argc -lt 3 ]] then operands=" [] " printErrorMsg 260 run $kword "$operands" cleanupAndExit fi # Shift past the hostname and get the command to execute and its args. shift 2 remoteCommand=$* remoteVerb=$1 if [[ $target = $ourNodeName || $MMMODE = single ]] then # If we happen to be the target node, run the command locally. $mmremote $remoteCommand rc=$? else # Invoke mmdsh to execute the command on the remote system, # suppressing prepending of the hostname to the output lines. $rm -f $tmpDir$remoteVerb$$.* $mmdsh -svL $target $mmremote onbehalf2 $ourNodeName $remoteVerb$$ \ $MMMODE $NO_LINK mmremote $remoteCommand rc=$? # Determine the return code. The goal is to pass back the return code # from the mmremote command. Because rsh and different versions of ssh # handle the return code differently (e.g., rsh does not propagate # it back), mmremote onbehalf creates a special file that has the # return code appended at the end of its name. If we do not see this # file on our side, we assume that mmdsh returned the return code from # the remote command. Although this is not necessarily always the case # (there could have been a problem with the touch command that creates # the special file), it is the best we can do under the circumstances. rcInfo=$($ls $tmpDir$remoteVerb$$.* 2> /dev/null) $rm -f $tmpDir$remoteVerb$$.* if [[ -n $rcInfo ]] then # The return code was passed back via the empty file mechanism. # Extract the return code from the file name. rc=${rcInfo#$tmpDir$remoteVerb$$\.} fi fi # end of if [[ $target = $ourNodeName || $MMMODE = single ]] ;; #---------------------------------------------------- onNode ) # run onNode [] #---------------------------------------------------- if [[ $argc -lt 3 ]] then operands=" [] " printErrorMsg 260 run $kword "$operands" cleanupAndExit fi # Shift past the hostname and get the command to execute and its args. shift 2 remoteCommand=$* if [[ $target = $ourNodeName || $MMMODE = single ]] then # If we happen to be the target node, run the command locally. $mmremote $remoteCommand rc=$? else # Otherwise, use mmdsh to execute the command on the remote node, # suppressing prepending of the hostname to the output lines. $mmdsh -svL $target $mmremote $remoteCommand rc=$? fi # end of if [[ $target = $ourNodeName || $MMMODE = single ]] ;; #---------------------------------------------- onall) # run onall # [] #---------------------------------------------- if [[ $argc -lt 4 ]] then operands=" []" printErrorMsg 260 run $kword "$operands" cleanupAndExit fi # Shift past the reliable names file and the "not reachable" nodes file # and get the command to execute and its args. shift 3 remoteCommand=$* if [[ $MMMODE = single ]] then # Execute the command locally. $mmremote $remoteCommand rc=$? else # Invoke mmdsh to execute command on the remote nodes. $mmdsh -vF $target -R $arg3 $mmremote $remoteCommand rc=$? fi ;; #--------------------------- * ) # Unknown action requested #--------------------------- # Invalid keyword printErrorMsg 133 run $kword cleanupAndExit ;; esac return $rc } #----- end of function run ------------------------------------