source: gpfs_3.1_ker2.6.20/lpp/mmfs/bin/mmtrace @ 148

Last change on this file since 148 was 16, checked in by rock, 17 years ago
  • Property svn:executable set to *
File size: 18.6 KB
Line 
1#!/bin/ksh
2# IBM_PROLOG_BEGIN_TAG
3# This is an automatically generated prolog.
4
5
6
7# Licensed Materials - Property of IBM
8
9# (C) COPYRIGHT International Business Machines Corp. 2006,2007
10# All Rights Reserved
11
12# US Government Users Restricted Rights - Use, duplication or
13# disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
14
15# IBM_PROLOG_END_TAG
16# @(#)28  1.29.1.1  src/avs/fs/mmfs/ts/admin/mmtrace.sh, mmfs, avs_rgpfs24, rgpfs24s010a 2/1/07 12:58:58
17###########################################################################
18#
19# Script to gather GPFS traces for debugging purposes.
20#
21# Syntax: mmtrace [start | stop] [noformat | formatall] [tail | head | cont] [dispatch]
22#                 [trace={io | all | def | same}]
23#                 [trace="trace_class level [trace_class level ...]"]
24#
25# Normal use is to issue "mmtrace" to start trace, rerun failure scenario,
26# then issue "mmtrace stop" when failure event occurs.  If the problem
27# is a daemon failure then "mmtrace stop" can be called from the runmmfs
28# script to capture the event and stop trace before the daemon starts up
29# again.  If the problem is a daemon failure during startup, use the -T
30# option on the mmstartup command.
31#
32# "mmtrace" with no trace arguments can be used to start/restart the trace
33# without modifing the existing trace level settings. "mmfsadm showtrace"
34# will show the current level settings. A set of trace levels can be set
35# permanently using
36#   "mmchconfig trace="trace_class level [trace_class level ...]"
37# These will be the default trace levels whenever the GPFS daemon starts,
38# but will not actually produce traces until mmtrace is run to start the
39# AIX trace or the Linux lxtrace command.
40#
41# "mmtrace io" can be used to trace a minimal set of operations that
42# show application requests to GPFS and the resulting disk IO.
43#
44# "mmtrace all" can be used to trace all GPFS traces at level 9,
45# plus all AIX traces (if running on AIX).
46#
47# "mmtrace def" can be used to trace a generic set of GPFS trace levels.
48#
49# "mmtrace same" will not change the GPFS trace levels.
50#
51# See "Modifiable trace levels" at the end of the script if the default
52# traces need to be changed.
53#
54# The "noformat" option can be used to just keep the raw unformatted
55# trcfiles and not spend CPU cycles formatting them into trcrpts while
56# doing some other tests.
57#
58# The "formatall" option can be used to have this script format all the raw
59# trcfiles it finds in the TRCDIR directory (presumably left there by
60# previous mmtrace calls that had the "noformat" option specified.)
61# "formatall" implies "stop" if no other parameter specified.
62#
63# Note that a new trace report is produced each time this script is run.
64# Also, in the AIX environment, system trace is stopped and started
65# regardless of whether it was originally initiated by this script.
66# As a result, this script should be used carefully and generally only
67# at the direction of IBM service for the purposes of debugging GPFS.
68#
69# Trace output is captured to the directory specified by TRCDIR below
70# which by default is /tmp/mmfs.
71#
72###########################################################################
73#
74# Include global declarations and service routines
75. /usr/lpp/mmfs/bin/mmglobfuncs
76. /usr/lpp/mmfs/bin/mmsdrfsdef
77
78sourceFile="mmtrace.sh"
79[[ -n $DEBUG || -n $DEBUGmmtrace ]] && set -x
80
81# Local routines
82
83####################################################################
84#
85# Function:  Locate the active dataStructureDump mmfs.cfg entry
86#            for the local node, specified by its short name.
87#
88#
89# Input:     nodename -- Local host short name
90# Output:    Value of active dataStructureDump, empty if no
91#            stanza located.
92#
93####################################################################
94function getDataDumpDir
95{
96  value=$(perl -e '
97    # Open mmfs.cfg for processing
98    open(MMFSCFG, $ARGV[0]) or exit;
99
100    # Host short name
101    $hosts = $ARGV[1];
102
103    # Obtain interface info for specified node
104    ($name, $aliases, $addrtype, $length, @addrs) = gethostbyname $hosts;
105
106    # Combine short/long hostnames and any aliases
107    $hosts = join(" ", $hosts, $name, $aliases);
108
109    # Empty dataDumpDir
110    undef $dataDumpDir;
111
112    # Process each line of file (common stanzas first)
113    $this_node = true;
114    LINE: while ($line = <MMFSCFG>)
115    {
116      # Skip comment/blank lines
117      next LINE if $line =~ m/^ *#/;
118      next LINE if $line =~ m/^ *$/;
119
120      chomp($line);
121      if ($line =~ /^ *\[(.*)\]/)
122      {
123        # Node specific stanzas --> search node list (brackets stripped)
124        $this_node = false;
125        foreach $node (split(",", $1))
126        {
127          if ($node eq "common")
128          {
129            # Common stanza --> applies to all nodes
130            $this_node = true;
131            last;
132          }
133          else
134          {
135            # Node(s) specific stanza
136            foreach $host (split(" ", $hosts))
137            {
138              if ($node eq $host)
139              {
140                # Node stanza located
141                $this_node = true;
142                last;
143              }
144            }
145          }
146        }
147      }
148      elsif (($line =~ s/^ *dataStructureDump *//) && ($this_node eq true))
149      {
150        # Node specific dataStructureDump attribute --> set value
151        $dataDumpDir = $line;
152      }
153    }
154
155    # Close mmfs.cfg and return result via print
156    close(MMFSCFG);
157    print $dataDumpDir;
158  ' $mmfscfgFile $1)
159
160  # Return the result
161  print -- "$value"
162  return 0
163} #------------- end of function getDataDumpDir ------------------
164
165####################################################################
166#
167# Function:  Stop the currently running trace, if any.
168#            If trace was running then move the raw trace file
169#            to the output directory and produce a trace report.
170#            The report is run in the background to avoid delays
171#            in starting GPFS.
172#
173####################################################################
174function stopTrace
175{
176  typeset sourceFile="mmtrace.sh"
177  [[ -n $DEBUG || -n $DEBUGstopTrace ]] && set -x
178
179  if [[ $osName = AIX ]]
180  then
181    # Stop the current trace
182    $trcstop >/dev/null 2>&1
183    if [[ $? -eq 0 ]]
184    then
185      $mv $RUNTRCFILE $TRCFILE
186      if [[ $? = 0 ]]
187      then
188        print -- "$mmcmd: move $RUNTRCFILE $TRCFILE"
189      else
190        # tracing may have been going to default trcfile
191        $mv /var/adm/ras/trcfile $TRCFILE
192        if [[ $? = 0 ]]
193        then
194          print -- "$mmcmd: move /var/adm/ras/trcfile $TRCFILE"
195        else
196          # mv command failed
197          printErrorMsg 104 "$mmcmd" "mv $RUNTRCFILE $TRCFILE"
198          [[ $trace_format != "formatall" ]] && cleanupAndExit
199        fi
200      fi
201      sync
202    fi
203    if [[ $trace_format = "formatall" ]]
204    then
205      for TRCFILE in $TRCPREF.*
206      do
207        TRCRPTFILE=$TRCRPTPREF.${TRCFILE#$TRCPREF.}
208         # Start formating trcfiles, one at a time
209        print -- "$mmcmd: formatting $TRCFILE to $TRCRPTFILE"
210        $trcrpt -O tid=on${trace_dispatch} -t $TRCRPTFMTAIX -o $TRCRPTFILE $TRCFILE && \
211          sync && $rm $TRCFILE
212      done
213    elif [[ -f $TRCFILE && $trace_format != "noformat" ]]
214    then
215      # Start formating the file in the background
216      TRCRPTFILE=$TRCRPTPREF.${TRCFILE#$TRCPREF.}
217      print -- "$mmcmd: formatting $TRCFILE to $TRCRPTFILE"
218      ($trcrpt -O tid=on${trace_dispatch} -t $TRCRPTFMTAIX -o $TRCRPTFILE $TRCFILE && \
219         sync && $rm $TRCFILE; ) &
220    fi
221
222  elif [[ $osName = Linux ]]
223  then
224    # Stop the current trace
225    $lxtrace off >/dev/null 2>&1
226    if [[ $? -eq 0 ]]
227    then
228      $mv $RUNTRCFILE $TRCFILE
229      if [[ $? = 0 ]]
230      then
231        print -- "$mmcmd: move $RUNTRCFILE $TRCFILE"
232      else
233        # tracing may have been going to default trcfile
234        $mv /tmp/lxtrace.trc $TRCFILE
235        if [[ $? = 0 ]]
236        then
237          print -- "$mmcmd: move /tmp/lxtrace.trc $TRCFILE"
238        else
239          # mv command failed
240          printErrorMsg 104 "$mmcmd" "mv $RUNTRCFILE $TRCFILE"
241          [[ $trace_format != "formatall" ]] && cleanupAndExit
242        fi
243      fi
244      sync
245    fi
246
247    if [[ ! -s $RUNTRCERRFILE ]]
248    then
249      # Tracing err file empty --> remove it
250      rm -f $RUNTRCERRFILE
251    elif [[ -f $TRCFILE ]]
252    then
253      # Tracing errors occured --> pair it with tracing file
254      $mv $RUNTRCERRFILE $TRCERRFILE
255      if [[ $? -ne 0 ]]
256      then
257        # mv command failed
258        printErrorMsg 104 "$mmcmd" "mv $RUNTRCERRFILE $TRCERRFILE"
259        [[ $trace_format != "formatall" ]] && cleanupAndExit
260      fi 
261    fi
262
263    if [[ $trace_format = "formatall"  ]]
264    then
265      for TRCFILE in $TRCPREF.*
266      do
267        TRCRPTFILE=$TRCRPTPREF.${TRCFILE#$TRCPREF.}
268        TRCRPTERRFILE=$TRCRPTERRPREF.${TRCERRFILE#$TRCERRPREF.}
269        print -- "$mmcmd: formatting $TRCFILE to $TRCRPTFILE"
270        # Start formating trcfiles, one at a time
271        $lxtrace format -t $TRCRPTFMTLINUX -o $TRCRPTFILE $TRCFILE && \
272          sync && $rm $TRCFILE
273        [[ -f $TRCERRFILE ]] && $mv $TRCERRFILE $TRCRPTERRFILE && sync
274      done
275    elif [[ -f $TRCFILE && $trace_format != "noformat" ]]
276    then
277      # Start formating the file in the background
278      TRCRPTFILE=$TRCRPTPREF.${TRCFILE#$TRCPREF.}
279      TRCRPTERRFILE=$TRCRPTERRPREF.${TRCERRFILE#$TRCERRPREF.}
280      print -- "$mmcmd: formatting $TRCFILE to $TRCRPTFILE"
281      ($lxtrace format -t $TRCRPTFMTLINUX -o $TRCRPTFILE $TRCFILE && \
282         sync && $rm $TRCFILE; ) &
283      ([[ -f $TRCERRFILE ]] && $mv $TRCERRFILE $TRCRPTERRFILE && sync) &
284    fi
285
286  else
287    checkForErrors "Unknown operating system $osName" 1
288  fi
289
290  return 0
291} #------------- end of function stopTrace ------------------
292
293####################################################################
294#
295# Function:  Turn tracing on.
296#
297####################################################################
298function startTrace
299{
300  typeset sourceFile="mmtrace.sh"
301  [[ -n $DEBUG || -n $DEBUGstartTrace ]] && set -x
302
303  if [[ $osName = AIX ]]
304  then
305    hooks=""
306    if [[ $trace_settings != "all" ]]; then
307      hooks="-j 005,006,00A,306,307,308,309"
308      [[ -n $trace_dispatch ]] && hooks=$hooks",100,101,102,103,106,200"
309    fi
310    # The trace options specify a default 16MB logfile, a 8MB memory buffer,
311    # and logging the last of the wrapping memory buffer of trace data.
312    # -L value must be twice the setting of -T
313    $trace -a $traceht -L $TRCFILESIZE -T $TRCBUFSIZE $hooks -o $RUNTRCFILE
314
315  elif [[ $osName = Linux ]]
316  then
317    # Use the default 16MB file and 64K buffer. Redirect stderr to a file to
318    # capture tracing errors. Redirect stdin/stdout to /dev/null to prevent
319    # remote shell invocataions of mmtrace from waiting on open pipes.
320    $lxtrace on $RUNTRCFILE $TRCFILESIZE $TRCBUFSIZE 0</dev/null 1>/dev/null \
321      2>$RUNTRCERRFILE
322
323  else
324    checkForErrors "Unknown operating system $osName" 1
325  fi
326
327  return 0
328} #------------- end of function startTrace ------------------
329
330
331############################
332# Mainline processing
333############################
334
335#################################
336# Process the command arguments.
337#################################
338
339trace_action=""
340trace_format=""
341trace_settings=""
342trace_help=""
343trace_headtail=""
344trace_dispatch=""
345traceht="-l"
346
347while [ "$1" != "" ]
348do
349  case $1 in
350    start)
351      # Start tracing
352      [[ -n $trace_action ]] && trace_help="true"
353      trace_action=$1
354      ;;
355    stop)
356      # Stop tracing
357      [[ -n $trace_action ]] && trace_help="true"
358      trace_action=$1
359      ;;
360    noformat)
361      # Do not format traces
362      [[ -n $trace_format ]] && trace_help="true"
363      trace_format=$1
364      ;;
365    formatall)
366      # Format traces
367      [[ -n $trace_format ]] && trace_help="true"
368      trace_format=$1
369      ;;
370    head)
371      # Head of trace
372      [[ -n $trace_headtail ]] && trace_help="true"
373      trace_headtail=$1; traceht="-f"
374      ;;
375    cont)
376      # Continuous trace to file
377      [[ -n $trace_headtail ]] && trace_help="true"
378      trace_headtail=$1; traceht=""
379      ;;
380    tail)
381      # Tail of trace
382      [[ -n $trace_headtail ]] && trace_help="true"
383      trace_headtail=$1; traceht="-l"
384      ;;
385    dispatch)
386      # Add AIX dispatch tracehooks
387      [[ -n $trace_dispatch ]] && trace_help="true"
388      trace_dispatch=",cpuid=on"
389      ;;
390    trace*)
391      # Trace settings
392      [[ -n $trace_settings ]] && trace_help="true"
393      trace_settings=$1
394      ;;
395    *)
396      # Unknown option/action --> Display usage
397      trace_help="true"
398  esac
399  shift
400done
401
402# Check for help requests
403if [[ $trace_help = "true" ]]
404then
405  print -u2 -- "Usage: mmtrace [start | stop] [noformat | formatall] [tail | head | cont] [dispatch]"
406  print -u2 -- "               [trace={io | all | def | same}]"
407  print -u2 -- "               [trace=\"trace_class level [trace_class level ...]\"]"
408  print -u2 -- ""
409  print -u2 -- "  If \"formatall\" is specified without a trace action, then"
410  print -u2 -- "  \"stop\" is the default. All other command invocations"
411  print -u2 -- "  where neither \"start\" nor \"stop\" are specified default"
412  print -u2 -- "  to \"start\"."
413  print -u2 -- ""
414  print -u2 -- "  Specifying \"trace=all\", \"trace=io\", or \"trace=def\""
415  print -u2 -- "  enables predefined trace settings."
416  print -u2 -- ""
417  print -u2 -- "  Specifying \"trace=same\" does not modify the current settings."
418  print -u2 -- ""
419  print -u2 -- "  If no trace= argument given and no trace levels are"
420  print -u2 -- "  currently set, the default trace settings will be used."
421  print -u2 -- ""
422  print -u2 -- "  A set of trace levels can be set permanently using"
423  print -u2 -- "    "mmchconfig trace="trace_class level [trace_class level ...]"
424  print -u2 -- "  These will be the default trace levels whenever the GPFS daemon"
425  print -u2 -- "  starts, but will not actually produce traces until mmtrace"
426  print -u2 -- "  is run to start the AIX trace or the Linux lxtrace command."
427  print -u2 -- ""
428  print -u2 -- "  Default trace output directory is /tmp/mmfs. Override by"
429  print -u2 -- "  setting TRCDIR environment variable."
430  print -u2 -- ""
431  print -u2 -- "  AIX only: (Linux trace only implements \"cont\" function)"
432  print -u2 -- "    If \"tail\" is specified, the trace buffer wraps and is"
433  print -u2 -- "      written to the trace file when \"mmtrace stop\" is issued."
434  print -u2 -- "      (This is the default)"
435  print -u2 -- "    If \"head\" is specified, the trace buffer fills only once"
436  print -u2 -- "      and is written to the trace file when \"mmtrace stop\" is issued."
437  print -u2 -- "    If \"cont\" is specified, the trace file is continuously"
438  print -u2 -- "      written to as the trace buffer fills up. This file will wrap."
439  print -u2 -- "      The tracefile size defaults to 16M and can be overridden"
440  print -u2 -- "      by setting the TRCFILESIZE environment variable."
441  print -u2 -- "    If \"dispatch\" is specified, AIX dispatching tracehooks"
442  print -u2 -- "      will also be enabled."
443  return 1
444fi
445
446# If a trace action has not been specified, default to "start" in all cases
447# except when "formatall" has been requested.
448if [[ -z $trace_action && $trace_format = "formatall" ]]
449then
450  trace_action="stop"
451elif [[ -z $trace_action ]]
452then
453  trace_action="start"
454fi
455
456if [[ -n $trace_settings ]]
457then
458  # Retrieve tha actual trace classes/levels
459  trace_settings=${trace_settings#*=}
460fi
461
462# Ensure the GPFS system data is up to date.
463gpfsInitOutput=$(gpfsInit nolock)
464setGlobalVar $? $gpfsInitOutput
465
466# Define the short hostname
467SHORTHOST=$($hostname -s)
468
469# Define the trace directory. The default value may be overridden with the
470# following priority utilized:
471#     1) TRCDIR environment variable
472#     2) dataStructureDump setting in mmfs.cfg
473#     3) /tmp/mmfs default
474[[ -z $TRCDIR ]] && TRCDIR=$(getDataDumpDir $SHORTHOST)
475
476# Default TRCDIR if overrides NULL or not fully qualified
477if [[ -z $TRCDIR || ($TRCDIR = ${TRCDIR#/}) ]]
478then
479  TRCDIR=/tmp/mmfs
480fi
481
482# Define the trace report directory and formatting templates
483[[ -z $TRCRPTDIR || ($TRCRPTDIR = ${TRCRPTDIR#/}) ]] && TRCRPTDIR=$TRCDIR
484[[ -z $TRCRPTFMTAIX ]] && TRCRPTFMTAIX=/etc/trcfmt
485[[ -z $TRCRPTFMTLINUX ]] && TRCRPTFMTLINUX=/usr/lpp/mmfs/mmfs.trcfmt
486
487#get env value for TRCDISPATCH for AIX
488[[ -n $TRCDISPATCH && $TRCDISPATCH != off && $TRCDISPATCH != no && $TRCDISPATCH != 0 ]] && trace_dispatch=",cpuid=on"
489
490# Define the trace file name, trace report prefix, and
491# file size and trace buffer sizes.
492TRCPREF=$TRCDIR/trcfile
493TRCERRPREF=$TRCDIR/trcerrfile
494TRCRPTPREF=$TRCRPTDIR/trcrpt
495TRCRPTERRPREF=$TRCRPTDIR/trcerr
496if [[ $osName = AIX ]]
497then
498  RUNTRCFILE=$TRCDIR/trcfile.$SHORTHOST
499  [[ -z $TRCFILESIZE ]] && TRCFILESIZE=16000000
500  [[ -z $TRCBUFSIZE ]] && TRCBUFSIZE=8000000
501
502  # Verify file/buffer sizes
503  if [ $TRCFILESIZE -lt 2*$TRCBUFSIZE ]
504  then
505    print -u2 -- "$osName trace file must be twice the trace buffer size"
506    return 1
507  fi
508elif [[ $osName = Linux ]]
509then
510  RUNTRCFILE=$TRCDIR/lxtrace.trc.$SHORTHOST
511  RUNTRCERRFILE=$TRCDIR/lxtrace.trcerr.$SHORTHOST
512  [[ -z $TRCFILESIZE ]] && TRCFILESIZE=16777216
513  [[ -z $TRCBUFSIZE ]] && TRCBUFSIZE=65536
514
515  # Verify file/buffer sizes
516  if [ $TRCFILESIZE -lt $TRCBUFSIZE ]
517  then
518    print -u2 -- "$osName trace file must be larger than the trace buffer size"
519    return 1
520  fi
521fi
522
523# Define the name of the trace file being formatted this time
524DATEHMS=$($date +"%y%m%d.%H.%M.%S")
525TRCFILE=$TRCPREF.$DATEHMS.$SHORTHOST
526TRCERRFILE=$TRCERRPREF.$DATEHMS.$SHORTHOST
527
528# Ensure the trace files/reports directories exist
529$mkdir -p $TRCDIR $TRCRPTDIR
530
531# Stop the current trace, producing a trace report if requested.
532stopTrace
533
534# If option is not "stop" then start trace and enable GPFS trace levels.
535if [[ $trace_action = "start" ]]
536then
537  # Turn tracing on.
538  startTrace
539
540  # Check if gpfs is up If not, we rely on trace levels in mmfs.cfg.
541  $tsstatus -1 >/dev/null 2>&1
542  rc=$?
543  if [[ $rc -eq 0 || $rc -eq 2 ]]
544  then
545
546    if [[ -z $trace_settings ]]
547    then
548      # Check whether any trace levels have been set. If not use default set
549      anytrace=$($mmfsadm showtrace 2>/dev/null |
550                 $awk '{if ($2 == ":" && $3 != "0") {print "1";exit}}')
551      [[ -z $anytrace ]] && trace_settings="def"
552    fi
553    if [[ $trace_settings = "io" ]]
554    then
555      # Minimal trace levels for tracking application requests and
556      # disk IO that results from those requests
557      $mmfsadm trace all 0 io 1 vnop 1 vnode 1
558    elif [[ $trace_settings = "all" ]]
559    then
560      # Maximum trace levels for GPFS
561      $mmfsadm trace all 9
562    elif [[ $trace_settings = "def" ]]
563    then
564      # A general set of trace levels for GPFS
565      $mmfsadm trace all 4 tm 2 thread 1 mutex 1 vnode 2 ksvfs 3 klockl 2 io 3 pgalloc 1 mb 1 lock 2 fsck 3
566    elif [[ $trace_settings = "same" ]]
567    then
568      : # nop
569    elif [[ -n $trace_settings ]]
570    then
571      # User specified trace settings
572      $mmfsadm trace $trace_settings
573    fi
574  fi
575fi
576
577cleanupAndExit 0
Note: See TracBrowser for help on using the repository browser.