1 | #!/bin/ksh |
---|
2 | # IBM_PROLOG_BEGIN_TAG |
---|
3 | # This is an automatically generated prolog. |
---|
4 | # |
---|
5 | # |
---|
6 | # |
---|
7 | # Licensed Materials - Property of IBM |
---|
8 | # |
---|
9 | # (C) COPYRIGHT International Business Machines Corp. 2000,2006 |
---|
10 | # All Rights Reserved |
---|
11 | # |
---|
12 | # US Government Users Restricted Rights - Use, duplication or |
---|
13 | # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. |
---|
14 | # |
---|
15 | # IBM_PROLOG_END_TAG |
---|
16 | # @(#)78 1.32.1.2 src/avs/fs/mmfs/ts/admin/mmstartup.sh, mmfs, avs_rgpfs24, rgpfs24s003a 5/26/06 09:48:07 |
---|
17 | ############################################################################## |
---|
18 | # |
---|
19 | # The GPFS daemons on the specified nodes will be started. |
---|
20 | # |
---|
21 | # Usage: |
---|
22 | # |
---|
23 | # mmstartup [-a | -N {Node[,Node...] | NodeFile | NodeClass}] |
---|
24 | # [-E EnvVar=Value...] [ -T ] |
---|
25 | # |
---|
26 | # where: |
---|
27 | # |
---|
28 | # -a Start the daemon on all nodes in the GPFS cluster. |
---|
29 | # |
---|
30 | # -N Node,Node,... Specify the nodes on which the daemon is to be started. |
---|
31 | # -N NodeFile NodeClass may be one of several possible node classes |
---|
32 | # -N NodeClass (e.g., quorumnodes, managernodes, nsdnodes, etc.) |
---|
33 | # |
---|
34 | # -E EnvVar=Value Specify an environment variable to be passed to the GPFS |
---|
35 | # daemon. More than one -E option can be specified. |
---|
36 | # |
---|
37 | # -T start tracing at daemon startup and cut a trace report |
---|
38 | # at daemon shutdown |
---|
39 | # |
---|
40 | # |
---|
41 | # If not explicitly specified otherwise, the daemon is started on the local |
---|
42 | # node only. |
---|
43 | # |
---|
44 | # |
---|
45 | # Undocumented options: |
---|
46 | # |
---|
47 | # -e seconds Estimated startup time (in seconds). During this time |
---|
48 | # period, the mmsdrfs file is guaranteed not to change. |
---|
49 | # Specifying a value of 0 disables the locking that would |
---|
50 | # otherwise take place for large non-sp clusters. |
---|
51 | # |
---|
52 | # -f Force the loading of the kernel extensions (Linux only). |
---|
53 | # |
---|
54 | # -G Force trace records to be cut on all nodes when there is |
---|
55 | # a daemon failure on some node. |
---|
56 | # |
---|
57 | # -t {yes|traceFile} Start the mm commands tracing facility. If traceFile |
---|
58 | # is specified, it must be a fully qualified pathname. |
---|
59 | # Otherwise, the trace results are appended to file |
---|
60 | # /tmp/mmfs/mmScriptTrace. |
---|
61 | # |
---|
62 | # |
---|
63 | # Obsolete options: |
---|
64 | # |
---|
65 | # -C NodesetId Start the daemon on all nodes in the specified nodeset. |
---|
66 | # Assumed to be the same as -a. |
---|
67 | # |
---|
68 | # -W NodeFile Start the daemon on all nodes whose reliable hostnames |
---|
69 | # are listed one per line in NodeFilename. |
---|
70 | # Cannot be specified with -a, -N, -w, or -n. |
---|
71 | # |
---|
72 | # -w nodenames Start the daemon on all nodes whose reliable hostnames |
---|
73 | # are in the comma-separated nodenames list. |
---|
74 | # Cannot be specified with -a, -N or -W. |
---|
75 | # If both -w and -n are specified, the lists are combined. |
---|
76 | # |
---|
77 | # -n nodenums Start the daemon on all nodes whose node numbers are |
---|
78 | # in the comma-separated nodenums list. |
---|
79 | # Cannot be specified with -a, -N, or -W. |
---|
80 | # If both -w and -n are specified, the lists are combined. |
---|
81 | # |
---|
82 | ############################################################################## |
---|
83 | |
---|
84 | # Include global declarations and service routines. |
---|
85 | . /usr/lpp/mmfs/bin/mmglobfuncs |
---|
86 | . /usr/lpp/mmfs/bin/mmsdrfsdef |
---|
87 | |
---|
88 | sourceFile="mmstartup.sh" |
---|
89 | [[ -n $DEBUG || -n $DEBUGmmstartup ]] && set -x |
---|
90 | $mmTRACE_ENTER "$*" |
---|
91 | |
---|
92 | |
---|
93 | # Local variables |
---|
94 | |
---|
95 | usageMsg=391 |
---|
96 | typeset -i timeout=0 |
---|
97 | typeset -i nodes=0 |
---|
98 | rc=0 |
---|
99 | nodenames="" |
---|
100 | nodenums="" |
---|
101 | wcoll="" |
---|
102 | aflag="" |
---|
103 | Cflag="" |
---|
104 | eflag="" |
---|
105 | Eflag="" |
---|
106 | Estring="" |
---|
107 | fflag="" |
---|
108 | Gflag="" |
---|
109 | nflag="" |
---|
110 | Nflag="" |
---|
111 | Tflag="" |
---|
112 | tflag="" |
---|
113 | Wflag="" |
---|
114 | wflag="" |
---|
115 | nodeList="" |
---|
116 | |
---|
117 | |
---|
118 | |
---|
119 | ####################### |
---|
120 | # Mainline processing. |
---|
121 | ####################### |
---|
122 | |
---|
123 | |
---|
124 | ################################## |
---|
125 | # Process each of the arguments. |
---|
126 | ################################## |
---|
127 | [[ $arg1 = '-?' || $arg1 = '-h' || $arg1 = '--help' || $arg1 = '--' ]] && \ |
---|
128 | syntaxError "help" $usageMsg |
---|
129 | |
---|
130 | while getopts :aC:e:E:fGn:N:t:Tw:W: OPT |
---|
131 | do |
---|
132 | case $OPT in |
---|
133 | |
---|
134 | a) [[ -n $aflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
135 | aflag="-$OPT" |
---|
136 | [[ -n $Cflag || -n $Wflag || -n $wflag || -n $nflag || -n $Nflag ]] && \ |
---|
137 | syntaxError "invalidCombination" \ |
---|
138 | $usageMsg $aflag $Cflag $Wflag $wflag $nflag $Nflag |
---|
139 | ;; |
---|
140 | |
---|
141 | C) # syntaxError "obsoleteOption" $usageMsg "-$OPT" |
---|
142 | [[ -n $Cflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
143 | Cflag="-$OPT" |
---|
144 | nodesetId="$OPTARG" |
---|
145 | [[ -n $aflag || -n $Wflag || -n $wflag || -n $nflag || -n $Nflag ]] && \ |
---|
146 | syntaxError "invalidCombination" \ |
---|
147 | $usageMsg $Cflag $aflag $Wflag $wflag $nflag $Nflag |
---|
148 | ;; |
---|
149 | |
---|
150 | e) [[ -n $eflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
151 | eflag="-$OPT" |
---|
152 | estimatedStartupTime=$(checkIntRange estimatedStartupTime "$OPTARG") |
---|
153 | [[ $? -ne 0 ]] && cleanupAndExit |
---|
154 | ;; |
---|
155 | |
---|
156 | E) # more than one -E option is allowed |
---|
157 | envString="${envString} -$OPT $OPTARG" |
---|
158 | ;; |
---|
159 | |
---|
160 | f) [[ -n $fflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
161 | fflag="-$OPT" |
---|
162 | [[ $osName != Linux ]] && syntaxError "invalidOption" $usageMsg "-$OPT" |
---|
163 | ;; |
---|
164 | |
---|
165 | G) [[ -n $Gflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
166 | Gflag="-$OPT" |
---|
167 | ;; |
---|
168 | |
---|
169 | n) [[ -n $nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
170 | nflag="-$OPT" |
---|
171 | nodenums="$OPTARG" |
---|
172 | [[ -n $Cflag || -n $Wflag || -n $aflag || -n $Nflag ]] && \ |
---|
173 | syntaxError "invalidCombination" \ |
---|
174 | $usageMsg $nflag $Cflag $Wflag $aflag $Nflag |
---|
175 | ;; |
---|
176 | |
---|
177 | N) [[ -n $Nflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
178 | Nflag="-$OPT" |
---|
179 | nodenames="$OPTARG" |
---|
180 | [[ -n $Cflag || -n $Wflag || -n $aflag || -n $nflag || -n $wflag ]] && \ |
---|
181 | syntaxError "invalidCombination" \ |
---|
182 | $usageMsg $Nflag $Cflag $Wflag $aflag $nflag $wflag |
---|
183 | ;; |
---|
184 | |
---|
185 | t) [[ -n $tflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
186 | tflag="-$OPT $OPTARG" |
---|
187 | ;; |
---|
188 | |
---|
189 | T) [[ -n $Tflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
190 | Tflag="-$OPT" |
---|
191 | ;; |
---|
192 | |
---|
193 | w) [[ -n $wflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
194 | wflag="-$OPT" |
---|
195 | nodenames="$OPTARG" |
---|
196 | [[ -n $Cflag || -n $Wflag || -n $aflag || -n $Nflag ]] && \ |
---|
197 | syntaxError "invalidCombination" \ |
---|
198 | $usageMsg $wflag $Cflag $Wflag $aflag $Nflag |
---|
199 | ;; |
---|
200 | |
---|
201 | W) [[ -n $Wflag ]] && syntaxError "multiple" $noUsageMsg "-$OPT" |
---|
202 | Wflag="-$OPT" |
---|
203 | wcoll="$OPTARG" |
---|
204 | [[ -n $Cflag || -n $aflag || -n $wflag || -n $nflag || -n $Nflag ]] && \ |
---|
205 | syntaxError "invalidCombination" \ |
---|
206 | $usageMsg $Wflag $Cflag $aflag $wflag $nflag $Nflag |
---|
207 | ;; |
---|
208 | |
---|
209 | :) syntaxError "missingValue" $usageMsg $OPTARG |
---|
210 | ;; |
---|
211 | |
---|
212 | +[aCeEfnNtTwW]) |
---|
213 | syntaxError "invalidOption" $usageMsg "$OPT" |
---|
214 | ;; |
---|
215 | |
---|
216 | *) syntaxError "invalidOption" $usageMsg $OPTARG |
---|
217 | ;; |
---|
218 | |
---|
219 | esac |
---|
220 | done |
---|
221 | |
---|
222 | shift OPTIND-1 |
---|
223 | [[ $# != 0 ]] && syntaxError "extraArg" $usageMsg $1 |
---|
224 | |
---|
225 | # Rather than fail, convert the obsolete -C option to -a. |
---|
226 | [[ -n $Cflag ]] && aflag="-a" |
---|
227 | |
---|
228 | |
---|
229 | ######################################################################## |
---|
230 | # Set up trap exception handling and call the gpfsInit function. |
---|
231 | # It will ensure that the local copy of the mmsdrfs and the rest of the |
---|
232 | # GPFS system files are up-to-date. There is no need to lock the sdr. |
---|
233 | ######################################################################## |
---|
234 | trap pretrap2 HUP INT QUIT KILL |
---|
235 | gpfsInitOutput=$(gpfsInit nolock) |
---|
236 | setGlobalVar $? $gpfsInitOutput |
---|
237 | |
---|
238 | |
---|
239 | ####################################################### |
---|
240 | # Create a file containing all of the specified nodes. |
---|
241 | ####################################################### |
---|
242 | if [[ -n $aflag ]] |
---|
243 | then |
---|
244 | # Get a list of the nodes. |
---|
245 | getNodeList $REL_HOSTNAME_Field $GLOBAL_ID $mmsdrfsFile > $nodefile |
---|
246 | |
---|
247 | elif [[ -n $Nflag ]] |
---|
248 | then |
---|
249 | # Convert the passed data into a file containing admin node names. |
---|
250 | createVerifiedNodefile $nodenames $REL_HOSTNAME_Field no $nodefile |
---|
251 | [[ $? -ne 0 ]] && cleanupAndExit |
---|
252 | |
---|
253 | elif [[ -n $Wflag ]] |
---|
254 | then |
---|
255 | # Verify the input file is readable. |
---|
256 | if [[ ! -f $Wcoll || ! -r $Wcoll ]] |
---|
257 | then |
---|
258 | printErrorMsg 43 $mmcmd $Wcoll |
---|
259 | cleanupAndExit |
---|
260 | fi |
---|
261 | |
---|
262 | # Filter out comment lines and localhost entries. |
---|
263 | $grep -v -e "localhost" -e "^#" "$Wcoll" > $tmpfile |
---|
264 | |
---|
265 | # Convert any entries in the node file into admin node names. |
---|
266 | if [[ -s $tmpfile ]] |
---|
267 | then |
---|
268 | createVerifiedNodefile $tmpfile $REL_HOSTNAME_Field no $nodefile |
---|
269 | [[ $? -ne 0 ]] && cleanupAndExit |
---|
270 | else |
---|
271 | # No node names were specified. |
---|
272 | printErrorMsg 328 $mmcmd $Wcoll |
---|
273 | cleanupAndExit |
---|
274 | fi |
---|
275 | |
---|
276 | else |
---|
277 | # Either no option was specified, or we have some combination of -w and -n. |
---|
278 | |
---|
279 | # Convert the node names list (if any) into a file. |
---|
280 | $rm -f $tmpfile |
---|
281 | if [[ -n $nodenames ]] |
---|
282 | then |
---|
283 | for i in $(print $nodenames | $tr "," " ") |
---|
284 | do |
---|
285 | print -- "$i" >> $tmpfile |
---|
286 | done |
---|
287 | fi |
---|
288 | |
---|
289 | # Append the node number list (if any) to the node file. |
---|
290 | if [[ -n $nodenums ]] |
---|
291 | then |
---|
292 | for i in $(print $nodenums | $tr "," " ") |
---|
293 | do |
---|
294 | print -- "$i" >> $tmpfile |
---|
295 | done |
---|
296 | fi |
---|
297 | |
---|
298 | # Convert the entries in the node file into admin node names. |
---|
299 | if [[ -s $tmpfile ]] |
---|
300 | then |
---|
301 | createVerifiedNodefile $tmpfile $REL_HOSTNAME_Field no $nodefile |
---|
302 | [[ $? -ne 0 ]] && cleanupAndExit |
---|
303 | fi |
---|
304 | |
---|
305 | fi # end of if [[ -n $aflag ]] |
---|
306 | |
---|
307 | |
---|
308 | # Ensure we have the proper credentials. |
---|
309 | [[ $getCredCalled = no ]] && getCred |
---|
310 | |
---|
311 | |
---|
312 | ############################################################## |
---|
313 | # If starting GPFS on a large number of nodes, lock the sdr. |
---|
314 | # The goal is to prevent the config data from changing and to |
---|
315 | # inform the rest of the nodes that they can trust their data |
---|
316 | # without having to check with the server nodes first. |
---|
317 | ############################################################## |
---|
318 | [[ -s $nodefile ]] && nodes=$($cat $nodefile | $wc -l) |
---|
319 | if [[ -z $eflag && $nodes -gt 32 || |
---|
320 | -n $eflag && $estimatedStartupTime -gt 0 ]] |
---|
321 | then |
---|
322 | # If the estimated startup time is given by the user, |
---|
323 | # use the value from the command line. Otherwise, pick |
---|
324 | # a number based on the number of nodes to start. |
---|
325 | if [[ -z $eflag ]] |
---|
326 | then |
---|
327 | if [[ $nodes -lt 128 ]] |
---|
328 | then |
---|
329 | estimatedStartupTime=90 |
---|
330 | elif [[ $nodes -lt 256 ]] |
---|
331 | then |
---|
332 | estimatedStartupTime=150 |
---|
333 | else |
---|
334 | estimatedStartupTime=240 |
---|
335 | fi |
---|
336 | |
---|
337 | # Add additional time for each file system that will be mounted. |
---|
338 | fsToMount=$($cat $startupMountFile 2>/dev/null | $wc -l ) |
---|
339 | [[ $fsToMount -gt 0 ]] && \ |
---|
340 | (( estimatedStartupTime = estimatedStartupTime + 10 * $fsToMount )) |
---|
341 | fi # end of if [[ -z $eflag ]] |
---|
342 | |
---|
343 | # Create a special lock id. |
---|
344 | expLockId="mmSdrLockExp:$ourNodeName:$estimatedStartupTime" |
---|
345 | |
---|
346 | # Try to obtain the sdr lock. |
---|
347 | gpfsInitOutput=$(gpfsInit $expLockId 2>/dev/null) |
---|
348 | rc=$? |
---|
349 | if [[ $rc -eq 0 ]] |
---|
350 | then |
---|
351 | # We got the lock. Parse the output to get the latest |
---|
352 | # generation number and corresponding timestamp. |
---|
353 | setGlobalVar $rc $gpfsInitOutput |
---|
354 | |
---|
355 | # Create the expiration token to be passed to the nodes. |
---|
356 | currentTime=$($perl -e 'print time') |
---|
357 | (( expirationTime = currentTime + estimatedStartupTime )) |
---|
358 | expirationData="mmSdrLockExp:$sdrGenNumber:$sdrGenTimestamp:$expirationTime" |
---|
359 | |
---|
360 | # Start the background process that will free the lock. |
---|
361 | # Reset the sdrLocked var to prevent the unlocking that |
---|
362 | # would otherwise take place as part of cleanupAndExit. |
---|
363 | sdrLocked=no |
---|
364 | $mmcommon expirationDataCleanup $expirationData unlock >/dev/null 2>&1 & |
---|
365 | |
---|
366 | # Add an option letter in front of the string. |
---|
367 | expirationData="-e $expirationData" |
---|
368 | |
---|
369 | else |
---|
370 | # gpfsInit failed and we did not get the lock. |
---|
371 | # Ignore the error and continue with the regular processing. |
---|
372 | expirationData="" |
---|
373 | fi # end of if [[ $rc -eq 0 ]] |
---|
374 | |
---|
375 | else |
---|
376 | # Do not do anything special if starting just a few nodes, |
---|
377 | # or the user explicitly requested no locking (-e 0 on the command line). |
---|
378 | expirationData="" |
---|
379 | fi # end of if [[ $nodes -gt 32 ]] |
---|
380 | |
---|
381 | |
---|
382 | #################################################################### |
---|
383 | # If daemon tracing is specified, decide what its scope should be. |
---|
384 | #################################################################### |
---|
385 | if [[ -n $Tflag ]] |
---|
386 | then |
---|
387 | if [[ -n $Gflag ]] |
---|
388 | then |
---|
389 | Tflag="-T global" |
---|
390 | else |
---|
391 | Tflag="-T local" |
---|
392 | fi |
---|
393 | elif [[ -n $Gflag ]] |
---|
394 | then |
---|
395 | Tflag="-T global" |
---|
396 | else |
---|
397 | Tflag="" |
---|
398 | fi # end of if [[ -n $Tflag ]] |
---|
399 | |
---|
400 | |
---|
401 | ############################################ |
---|
402 | # Start GPFS daemon on the specified nodes. |
---|
403 | ############################################ |
---|
404 | |
---|
405 | # Starting GPFS ... |
---|
406 | printInfoMsg 392 "$(date)" $mmcmd |
---|
407 | |
---|
408 | if [[ ! -s $nodefile ]] |
---|
409 | then |
---|
410 | # The request is to start the local daemon only. |
---|
411 | $mmremote startSubsys $fflag $Tflag $tflag $envString |
---|
412 | rc=$? |
---|
413 | else |
---|
414 | # The request is to start the daemon on a number of nodes. |
---|
415 | $mmcommon onall $nodefile $unreachedNodes \ |
---|
416 | startSubsys $expirationData $fflag $Tflag $tflag $envString |
---|
417 | rc=$? |
---|
418 | fi |
---|
419 | |
---|
420 | # If any nodes could not be reached, tell the user which ones. |
---|
421 | if [[ -s $unreachedNodes ]] |
---|
422 | then |
---|
423 | # The following nodes could not be reached: . . . |
---|
424 | printErrorMsg 270 $mmcmd |
---|
425 | $cat $unreachedNodes 1>&2 |
---|
426 | fi |
---|
427 | |
---|
428 | cleanupAndExit $rc |
---|
429 | |
---|