source: gpfs_3.1_ker2.6.20/lpp/mmfs/bin/mmsdrrestore @ 177

Last change on this file since 177 was 16, checked in by rock, 17 years ago
  • Property svn:executable set to *
File size: 13.4 KB
RevLine 
[16]1#!/bin/ksh
2# IBM_PROLOG_BEGIN_TAG
3# This is an automatically generated prolog.
4
5
6
7# Licensed Materials - Property of IBM
8
9# (C) COPYRIGHT International Business Machines Corp. 2005,2006
10# All Rights Reserved
11
12# US Government Users Restricted Rights - Use, duplication or
13# disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
14
15# IBM_PROLOG_END_TAG
16# @(#)93 1.1.1.3 src/avs/fs/mmfs/ts/admin/mmsdrrestore.sh, mmfs, avs_rgpfs24, rgpfs240610b 2/17/06 16:51:07
17##############################################################################
18#
19# Ensure that the GPFS system files on the specified nodes are at the
20# most recent level.  If the local mmsdrfs file is missing, use the file
21# specified with the -F option from the node specified with the -p option.
22#
23# Usage:
24#   mmsdrrestore [-p NodeName] [-F mmsdrfsFile] [-R remoteFileCopyCommand]
25#                [-a | -N {Node[,Node...] | NodeFile | NodeClass}]
26#
27# where:
28#   -p NodeName        specifies the node from which to obtain a valid
29#                      mmsdrfs file.  This should be either the primary
30#                      configuration server or a node that contains a
31#                      valid backup copy of the mmsdrfs file.
32#                      If not specified, the local node is assumed.
33#
34#   -F mmsdrfsFile     specifies the pathname of the mmsdrfs file to use.
35#                      If not specified, /var/mmfs/gen/mmsdrfs is assumed.
36#
37#   -R remoteFileCopyCommand  specifies the fully-qualified pathname for
38#                      the remote file copy program to be used for staging
39#                      the mmsdrfs file.  The default is /usr/bin/rcp.
40#
41# Future options:
42#   -a                 Restore all nodes in the GPFS cluster.
43#
44#   -N Node,Node,...   specifies the affected nodes in a list or in a file.
45#   -N NodeFile        NodeClass may be one of several possible node classes
46#   -N NodeClass       (e.g., quorumnodes, managernodes, nsdnodes, etc.)
47#
48# Notes:
49#   If not explicitly specified otherwise, only the local node is affected.
50#
51#   If the -p and/or -F parameters are specified, i.e., if the local mmsdrfs
52#   file will be replaced, the command must be first run on the primary and
53#   backup cluster configuration servers.
54#
55#   If the local mmsdrfs file is going to be replaced, the command saves
56#   the original file as /var/mmfs/gen/mmsdrfs.mmsdrrestore.backup
57#
58#   The -R option has to be specified only if the mmsdrfs file needs to be
59#   copied from a different node and the cluster was set to use ssh/scp.
60#   Once the correct mmsdrfs file is staged, the rest of the code will use
61#   the remote commands that have been defined for the cluster.  In other
62#   words, you can not change the remote shell and file copy programs with
63#   the mmsdrrestore command; use mmchcluster for this purpose once the
64#   cluster has been restored.
65#
66##############################################################################
67
68# Include global declarations and service routines.
69. /usr/lpp/mmfs/bin/mmglobfuncs
70. /usr/lpp/mmfs/bin/mmsdrfsdef
71. /usr/lpp/mmfs/bin/mmfsfuncs
72
73sourceFile="mmsdrrestore.sh"
74[[ -n $DEBUG || -n $DEBUGmmsdrrestore ]] && set -x
75$mmTRACE_ENTER "$*"
76
77
78# Local variables
79
80usageMsg=284
81rc=0
82
83
84# Local routines
85
86
87#####################################################################
88#
89# Function:  Verifies that a command executed successfully.  If the
90#            return code from the command is not zero, the function
91#            issues a message, restores the original mmsdrfs file,
92#            and exits.
93#
94# Input:     $1 - text to be printed in the message
95#            $2 - return code from the execution of the command
96#
97#####################################################################
98function checkForErrorsAndRestoreOldFile
99{
100  sourceFile="mmsdrrestore.sh"
101  [[ -n $DEBUG || -n $DEBUGcheckForErrorsAndRestoreOldFile ]] && set -x
102  $mmTRACE_ENTER "$*"
103  typeset msgSub=$1
104  typeset rc=$2
105
106  if [ $rc != "0" ]
107  then
108    # If asked, issue an "unexpected error" message.
109    [[ $msgSub != NOMSG ]] &&  \
110      printErrorMsg 171 "$mmcmd" "$msgSub" $rc
111
112    # Remove the mmsdrfs file if it was copied from another place.
113    [[ -n $userProvidedFile ]] &&  \
114      $rm -f $mmsdrfsFile
115
116    # Restore the original mmsdrfs file.
117    [[ -f ${mmsdrfsFile}.${mmcmd}.backup ]] &&  \
118      $mv -f ${mmsdrfsFile}.${mmcmd}.backup $mmsdrfsFile
119
120    cleanupAndExit $2
121  fi
122
123}  #------ end of function checkForErrorsAndRestoreOldFile -------
124
125
126
127#######################
128# Mainline processing
129#######################
130
131
132##################################
133# Process each of the arguments.
134##################################
135[[ $arg1 = '-?' || $arg1 = '-h' || $arg1 = '--help' || $arg1 = '--' ]] &&  \
136  syntaxError "help" $usageMsg
137
138while getopts :aF:N:p:R: OPT
139do
140  case $OPT in
141    a) [[ -n $aflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
142       aflag="-$OPT"
143       [[ -n $Nflag ]] &&  \
144         syntaxError "invalidCombination" $usageMsg $aflag $Nflag
145       syntaxError "invalidOption" $usageMsg $OPT  #esjx option not allowed yet
146       ;;
147
148    F) # source mmsdrfs file
149       [[ -n $Farg ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
150       Farg=$OPTARG
151       ;;
152
153    N) [[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
154       Nflag="-$OPT"
155       nodenames="$OPTARG"
156       [[ -n $aflag ]] &&  \
157         syntaxError "invalidCombination" $usageMsg $Nflag $aflag
158       syntaxError "invalidOption" $usageMsg $OPT  #esjx option not allowed yet
159       ;;
160
161    p) # primary server
162       [[ -n $parg ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
163       parg=$OPTARG
164       ;;
165
166    R) # remote file copy command
167       [[ -n $Rarg ]] && syntaxError "multiple" $noUsageMsg "-$OPT"
168       Rarg=$OPTARG
169       [[ $Rarg != /* ]] &&  \
170         syntaxError "absolutePath_2" $noUsageMsg "-$OPT" "$OPTARG"
171       ;;
172
173    :) syntaxError "missingValue" $usageMsg $OPTARG
174       ;;
175
176    +[aFNpR])
177       syntaxError "invalidOption" $usageMsg "$OPT"
178       ;;
179
180    *) syntaxError "invalidOption" $usageMsg $OPTARG
181       ;;
182
183  esac
184done  # end of while getopts :afF:N:p:r:R: OPT do
185
186shift OPTIND-1
187[[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1
188
189
190###########################################################
191# Process the optional remote file copy command parameter.
192###########################################################
193if [[ -n $Rarg ]]
194then
195  if [[ ! -x $Rarg ]]
196  then
197    # The specified remote file copy command is incorrect
198    # (it is not an executable).
199    printErrorMsg 423 $mmcmd ${Rarg%% *}
200    cleanupAndExit
201  fi
202  rcp="$Rarg"
203  export GPFS_rcpPath="$rcp"
204fi
205
206
207###################################################################
208# If an mmsdrfs file exists and we are told not to use it, create
209# a backup of the local mmsdrfs file before proceding any further.
210###################################################################
211if [[ ( -n $parg || -n $Farg && $Farg != $mmsdrfsFile ) && -f $mmsdrfsFile ]]
212then
213  $cp $mmsdrfsFile ${mmsdrfsFile}.${mmcmd}.backup
214  checkForErrors "cp $mmsdrfsFile ${mmsdrfsFile}.${mmcmd}.backup" $?
215fi
216
217
218##############################################################
219# If we are told where to find the good mmsdrfs file, get it.
220##############################################################
221sourceNode=$parg
222sourceFile=$Farg
223[[ -z $sourceFile ]] && sourceFile=$mmsdrfsFile
224
225if [[ -n $parg ]]
226then
227  # The file must be copied from a remote node.
228  isNodeReachable $sourceNode
229  if [[ $? -ne 0 ]]
230  then
231    # The node is not reachable.
232    printErrorMsg 340 $mmcmd $sourceNode
233    checkForErrorsAndRestoreOldFile NOMSG 1
234  fi
235
236  rcpResult=$(LC_ALL=C $rcp ${sourceNode}:${sourceFile} $mmsdrfsFile 2>&1)
237  rc=$?
238  if [[ $rc -ne 0 ]]
239  then
240    # If there was no need to copy the file, that's fine.
241    print -- "$rcpResult" | $grep -e "refer to the same file" >/dev/null
242    if [[ $? -ne 0 ]]
243    then
244      # Show the error from the rcp command and give up.
245      print -u2 "$rcpResult"
246      checkForErrorsAndRestoreOldFile  \
247        "$rcp ${sourceNode}:${sourceFile} $mmsdrfsFile" $rc
248    fi
249  fi  # end of if [[ $rc -ne 0 ]]
250  userProvidedFile=yes
251
252elif [[ $sourceFile != $mmsdrfsFile ]]
253then
254  # The file must be copied from some other place within the local node.
255  $cp $sourceFile $mmsdrfsFile
256  checkForErrorsAndRestoreOldFile "cp $sourceFile $mmsdrfsFile" $?
257  userProvidedFile=yes
258
259else
260  : # The local mmsdrfs file is supposed to be good.
261fi  # end of if [[ -n $parg ]]
262
263# At this point we must have an mmsdrfs file to work with.
264if [[ ! -f $mmsdrfsFile ]]
265then
266  # Either the node does not belong to a cluster
267  # or the config information is lost/corrupted.
268  printErrorMsg 282 $mmcmd
269  checkForErrors "$mmcmd: Missing /var/mmfs/gen/mmsdrfs" 1
270fi
271
272# If this is a user-specified mmsdrfs file, perform a quick sanity check.
273if [[ -n $userProvidedFile ]]
274then
275  checkSdrfsFile $mmsdrfsFile
276  checkForErrorsAndRestoreOldFile "$mmcmd: Invalid input file" $?
277fi
278
279
280########################################################################
281# See if this node belongs to the cluster defined by this mmsdrfs file.
282########################################################################
283# Remove the local mmfsNodeData file.  We will derive the information
284# based on the currently active IP addresses of the node.
285$rm -f $mmfsNodeData
286
287# Determine the node's GPFS node number by checking all local addresses.
288nodeData=$(guessLocalNodeNumber)
289rc=$?
290set -f ; set -- $nodeData ; set +f
291ourNodeNumber=$1
292ourNodeName=$3
293
294if [[ -z $ourNodeNumber ]]
295then
296  # The node does not appear to belong to the cluster rpresented
297  # by this mmsdrfs file.  Either the file is from a different
298  # cluster, or not all adapter interfaces have been activated.
299  # Or the node indeed is not part of the cluster.
300
301  # Determine the name of the source file for the message.
302  sdrfs="$sourceFile"
303  [[ -n $sourceNode ]] && sdrfs="${sourceNode}:${sourceFile}"
304
305  # There is no record for this node in the file.
306  printErrorMsg 283 $mmcmd $sdrfs
307  checkForErrorsAndRestoreOldFile NOMSG 1
308fi
309
310
311#############################################################################
312# At this point, the current mmsdrfs file is assumed to be the correct one.
313# The code continues with the standard gpfsInit processing.  This means
314# that the primary or backup servers, as per the current mmsdrfs file,
315# will be consulted to ensure this is indeed the latest version of the file.
316#############################################################################
317# Remove the current versions of the system files to force the environment
318# to be rebuilt based on the information in the new mmsdrfs file.
319$rm -f $mmfscfgFile $mmfsNodeData ${mmfsEnvLevel}+([0-9])  \
320       ${mmfsNewKeyLevel}+([0-9]) ${mmfsCommittedKeyLevel}+([0-9])  \
321       $nsdpvol
322
323
324########################################################################
325# Set up trap exception handling and call the gpfsInit function.
326# It will ensure that the local copy of the mmsdrfs and the rest of the
327# GPFS system files are up-to-date.  There is no need to lock the sdr.
328########################################################################
329trap pretrap2 HUP INT QUIT KILL
330gpfsInitOutput=$(gpfsInit nolock)
331setGlobalVar $? $gpfsInitOutput
332
333
334###########################################################
335# Ensure the sdrserv daemon is running on this node only
336# if the node is a primary or backup configuration server.
337###########################################################
338killSdrServ >/dev/null 2>&1
339[[ $ourNodeName = $primaryServer || $ourNodeName = $backupServer ]] &&  \
340  startSdrServ CURRENT
341
342# The node was successfully restored.
343printErrorMsg 285 $mmcmd $ourNodeName
344
345# Done for now.  The -a and -N options are not implemented yet.
346cleanupAndExit 0
347
348
349#######################################################
350# Create a file containing all of the affected nodes.
351#######################################################
352if [[ -n $aflag ]]
353then
354  # Get a list of all nodes in the cluster.
355  getNodeList $REL_HOSTNAME_Field $GLOBAL_ID $mmsdrfsFile > $nodefile
356
357elif [[ -n $Nflag ]]
358then
359  # Convert the passed data into a file containing admin node names.
360  createVerifiedNodefile $nodenames $REL_HOSTNAME_Field no $nodefile
361  [[ $? -ne 0 ]] && cleanupAndExit
362
363else
364  : # Only the local node is affected.
365fi  # end of if [[ -n $aflag ]]
366
367
368# Ensure we have the proper credentials.
369[[ $getCredCalled = no ]] && getCred
370
371
372#esjxx - If more than one node is involved AND a new mmsdrfs file was copied,
373#esjxx   push the file to the rest of the affected nodes.  This will be done
374#esjxx   with a new mmremote call that will take care of mmsdrserv as well.
375#esjxx   We will need to pass the names of the primary and backup servers
376#esjxx   and clusterID to ensure that the individual nodes will go to the
377#esjxx   right place for the correct version of the mmsdrfs file.
378#esjxx   If the individual nodes have different information, they should go
379#esjxx   to the correct primary only and specify current gen number 0 or 1.
380#esjxx   This should apply to the backup too ???
381
382
383##########################
384# Restore the GPFS nodes.
385##########################
386if [[ ! -s $nodefile ]]
387then
388  : # This is a request to restore only the local node.  We are done.
389else
390  # This is a request to restore other nodes as well.
391  $mmcommon onall $nodefile $unreachedNodes cfg -f
392  rc=$?
393fi  # end of if [[ ! -s $nodefile ]]
394
395# If any nodes could not be reached, tell the user which ones.
396if [[ -s $unreachedNodes ]]
397then
398  # The following nodes could not be reached: . . .
399  printErrorMsg 270 $mmcmd
400  $cat $unreachedNodes 1>&2
401fi
402
403cleanupAndExit $rc
404
Note: See TracBrowser for help on using the repository browser.