[16] | 1 | /*************************************************************************** |
---|
| 2 | * |
---|
| 3 | * Copyright (C) 2001 International Business Machines |
---|
| 4 | * All rights reserved. |
---|
| 5 | * |
---|
| 6 | * This file is part of the GPFS mmfslinux kernel module. |
---|
| 7 | * |
---|
| 8 | * Redistribution and use in source and binary forms, with or without |
---|
| 9 | * modification, are permitted provided that the following conditions |
---|
| 10 | * are met: |
---|
| 11 | * |
---|
| 12 | * 1. Redistributions of source code must retain the above copyright notice, |
---|
| 13 | * this list of conditions and the following disclaimer. |
---|
| 14 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
| 15 | * notice, this list of conditions and the following disclaimer in the |
---|
| 16 | * documentation and/or other materials provided with the distribution. |
---|
| 17 | * 3. The name of the author may not be used to endorse or promote products |
---|
| 18 | * derived from this software without specific prior written |
---|
| 19 | * permission. |
---|
| 20 | * |
---|
| 21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
---|
| 22 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
---|
| 23 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
---|
| 24 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
| 25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
---|
| 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
---|
| 27 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
---|
| 28 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
---|
| 29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
---|
| 30 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
| 31 | * |
---|
| 32 | *************************************************************************** */ |
---|
| 33 | /* @(#)25 1.65.1.6 src/avs/fs/mmfs/ts/kernext/gpl-linux/tracedev.c, mmfs, avs_rgpfs24, rgpfs24s011a 3/14/07 10:57:03 */ |
---|
| 34 | |
---|
| 35 | /************************************************************************** |
---|
| 36 | * |
---|
| 37 | * Loadable kernel module that implements the trace device. |
---|
| 38 | * |
---|
| 39 | **************************************************************************/ |
---|
| 40 | |
---|
| 41 | #ifndef GPFS_PRINTF |
---|
| 42 | |
---|
| 43 | #ifndef __KERNEL__ |
---|
| 44 | # define __KERNEL__ |
---|
| 45 | #endif |
---|
| 46 | |
---|
| 47 | #ifndef KBUILD_MODNAME |
---|
| 48 | #define KBUILD_MODNAME tracedev |
---|
| 49 | #endif |
---|
| 50 | |
---|
| 51 | /* If trace is built into kernel, pick up GPFS flag definitions from a file |
---|
| 52 | rather than requiring them to be defined on the command line. */ |
---|
| 53 | #ifndef MODULE |
---|
| 54 | /* #include <linux/ktrace.h> */ |
---|
| 55 | #endif |
---|
| 56 | |
---|
| 57 | #include <Shark-gpl.h> |
---|
| 58 | |
---|
| 59 | #include <linux/version.h> |
---|
| 60 | #include <linux/kernel.h> |
---|
| 61 | #include <linux/module.h> |
---|
| 62 | #include <linux/errno.h> |
---|
| 63 | #include <linux/slab.h> |
---|
| 64 | #include <linux/smp_lock.h> |
---|
| 65 | #include <linux/vmalloc.h> |
---|
| 66 | #include <linux/string.h> |
---|
| 67 | |
---|
| 68 | #if LINUX_KERNEL_VERSION > 2060900 |
---|
| 69 | #include <linux/hardirq.h> /* in_interrupt */ |
---|
| 70 | #else |
---|
| 71 | #include <asm/hardirq.h> /* in_interrupt */ |
---|
| 72 | #endif |
---|
| 73 | #include <asm/uaccess.h> /* copy_to/from_user */ |
---|
| 74 | |
---|
| 75 | |
---|
| 76 | #include <stdarg.h> |
---|
| 77 | #include <Trace.h> |
---|
| 78 | #include <lxtrace.h> |
---|
| 79 | #include <verdep.h> |
---|
| 80 | |
---|
| 81 | #ifdef __64BIT__ |
---|
| 82 | # define Int64 long long |
---|
| 83 | # define ARGLEN 8 |
---|
| 84 | # define ARGTYPE Int64 |
---|
| 85 | #else |
---|
| 86 | # define Int32 int |
---|
| 87 | # define ARGLEN 4 |
---|
| 88 | # define ARGTYPE Int32 |
---|
| 89 | #endif /* __64BIT__ */ |
---|
| 90 | |
---|
| 91 | char stringPadding[8]; |
---|
| 92 | |
---|
| 93 | #if LINUX_KERNEL_VERSION > 2060900 || \ |
---|
| 94 | (LINUX_KERNEL_VERSION > 2060000 && (defined(GPFS_ARCH_PPC64) || defined(GPFS_ARCH_X86_64))) |
---|
| 95 | #define EXPORTKDUMPDEV |
---|
| 96 | #endif |
---|
| 97 | |
---|
| 98 | #ifdef EXPORTKDUMPDEV |
---|
| 99 | static int major_kdump = -1; |
---|
| 100 | #endif |
---|
| 101 | |
---|
| 102 | #if defined(MODULE) && (LINUX_KERNEL_VERSION >= 2040900) |
---|
| 103 | MODULE_LICENSE("GPL"); |
---|
| 104 | MODULE_DESCRIPTION ("GPFS portability layer (tracing module)"); |
---|
| 105 | MODULE_AUTHOR ("IBM <gpfs@us.ibm.com>"); |
---|
| 106 | #endif /* MODULE */ |
---|
| 107 | |
---|
| 108 | /* If trace is built into kernel, then this is a dummy module */ |
---|
| 109 | #ifndef KTRACE |
---|
| 110 | |
---|
| 111 | /* the daemon's task structure (for signal) */ |
---|
| 112 | static struct task_struct *taskP; |
---|
| 113 | |
---|
| 114 | /* The writeLock serializes trace writers. It should be avoided by |
---|
| 115 | * other operations in order to allow the writers to continue unimpeded. |
---|
| 116 | * The writeLock must be held when accessing the following fields in the |
---|
| 117 | * trace header element: nWaits, nBuffers, nLost, writeBuf |
---|
| 118 | */ |
---|
| 119 | static spinlock_t writeLock; |
---|
| 120 | |
---|
| 121 | /* The readLock serializes trace operations, as well as most other access |
---|
| 122 | * to the trace header element. Whenever both readLock and writeLock are |
---|
| 123 | * required, readLock is always to be acquired first. |
---|
| 124 | */ |
---|
| 125 | static struct semaphore readLock; |
---|
| 126 | |
---|
| 127 | /* The readFull flag synchronizes access to readBuf by readers and writers. |
---|
| 128 | Writers set this after filling readBuf and wait for this to be clear |
---|
| 129 | before filling readBuf. Readers use this flag to tell if readBuf has |
---|
| 130 | any data and clear this after processing. Using an atomic variable |
---|
| 131 | allows steady-state tracing to be done without readers needing to |
---|
| 132 | acquire a lock that would block writers. Note that atomic operations |
---|
| 133 | do not generally act as memory barriers, so explicit barrier calls may |
---|
| 134 | be necessary before or after accessing readFull. Spinlocks act as |
---|
| 135 | partial memory barriers, so explicit barriers can be avoided in some |
---|
| 136 | cases where spinlocks are used. */ |
---|
| 137 | static atomic_t readFull; |
---|
| 138 | |
---|
| 139 | /* Trace Header Element - THE anchor for the trace state */ |
---|
| 140 | static trcdev_header_t lxthe; |
---|
| 141 | static wait_queue_head_t daemonWaitQ; |
---|
| 142 | |
---|
| 143 | /* Export pointers to internal data structures for debugging */ |
---|
| 144 | struct |
---|
| 145 | { |
---|
| 146 | trcdev_header_t *lxtheP; |
---|
| 147 | wait_queue_head_t *daemonWaitQP; |
---|
| 148 | struct semaphore *readLockP; |
---|
| 149 | spinlock_t *writeLockP; |
---|
| 150 | } TraceVarAddrs = { &lxthe, &daemonWaitQ, &readLock, &writeLock }; |
---|
| 151 | |
---|
| 152 | /* A trcdev_buffer is dirty if there is any data in it (nextP != beginP) AND the |
---|
| 153 | * dirtyP has not yet been advanced (by trc_read) past the data (to nextP) */ |
---|
| 154 | #define IS_DIRTY(b) (b.nextP != b.beginP && b.nextP != b.dirtyP) |
---|
| 155 | |
---|
| 156 | /* A trace record passed from a user thread consists of a data header |
---|
| 157 | followed by the marshalled arguments */ |
---|
| 158 | struct trcRec |
---|
| 159 | { |
---|
| 160 | trc_datahdr_t hdr; |
---|
| 161 | char data[LXTRACE_MAX_DATA-sizeof(trc_datahdr_t)]; |
---|
| 162 | }; |
---|
| 163 | |
---|
| 164 | |
---|
| 165 | /* Updating state information requires the writeLock in addition to |
---|
| 166 | * readLock. The readLock is widely held where the trace header is |
---|
| 167 | * manipulated, but for the brief period of updating the state field, get the |
---|
| 168 | * writeLock as well. |
---|
| 169 | */ |
---|
| 170 | static void setTraceState(trcdev_state_t newState) |
---|
| 171 | { |
---|
| 172 | spin_lock(&writeLock); |
---|
| 173 | lxthe.state = newState; |
---|
| 174 | wake_up(&daemonWaitQ); |
---|
| 175 | spin_unlock(&writeLock); |
---|
| 176 | } |
---|
| 177 | |
---|
| 178 | |
---|
| 179 | /* Return true if the specified hookword is currently being traced. */ |
---|
| 180 | static Boolean isTraced(uint hw) |
---|
| 181 | { |
---|
| 182 | return lxthe.state == trc_active;; |
---|
| 183 | } |
---|
| 184 | |
---|
| 185 | |
---|
| 186 | /* Construct the static trace header element ("lxthe"). |
---|
| 187 | * trc_open will allocate buffers and set the appropriate values. */ |
---|
| 188 | static void trc_init() |
---|
| 189 | { |
---|
| 190 | spin_lock_init(&writeLock); |
---|
| 191 | sema_init(&readLock, 1); |
---|
| 192 | |
---|
| 193 | /* Note: Locks are not needed here. There better not be any other |
---|
| 194 | threads trying to access lxthe at this point. If there were, then |
---|
| 195 | what would happen if a thread tried to acquire the locks a few |
---|
| 196 | instructions earlier, before we initialized the locks? */ |
---|
| 197 | |
---|
| 198 | lxthe.major = 0; /* dynamic assignment (by register_chrdev in trc_register) */ |
---|
| 199 | lxthe.minor = 0; |
---|
| 200 | lxthe.bufSize = 0; |
---|
| 201 | lxthe.nOpens = 0; |
---|
| 202 | lxthe.nWaits = 0; |
---|
| 203 | lxthe.nBuffers = 0; |
---|
| 204 | lxthe.nLost = 0; |
---|
| 205 | atomic_set(&readFull, 0); |
---|
| 206 | taskP = NULL; |
---|
| 207 | init_waitqueue_head(&daemonWaitQ); |
---|
| 208 | |
---|
| 209 | lxthe.writeBuf.beginP = NULL; |
---|
| 210 | lxthe.writeBuf.endP = NULL; |
---|
| 211 | lxthe.writeBuf.nextP = NULL; |
---|
| 212 | lxthe.writeBuf.dirtyP = NULL; |
---|
| 213 | |
---|
| 214 | lxthe.readBuf = lxthe.writeBuf; |
---|
| 215 | |
---|
| 216 | lxthe.state = trc_initialized; |
---|
| 217 | } |
---|
| 218 | |
---|
| 219 | /* Destroy the static trace header element (lxthe) */ |
---|
| 220 | static void trc_term() |
---|
| 221 | { |
---|
| 222 | /* Note: Locks are not needed here. We're about to re-initialize, so |
---|
| 223 | if anyone is still using lxthe at this point, we would clobber them. */ |
---|
| 224 | |
---|
| 225 | /* The two buffers are allocated together. Free them both here. */ |
---|
| 226 | if (lxthe.writeBuf.beginP) |
---|
| 227 | vfree(MIN(lxthe.writeBuf.beginP, lxthe.readBuf.beginP)); |
---|
| 228 | |
---|
| 229 | /* (re)initialize all fields. Rather than copy all the stuff that happens |
---|
| 230 | * in trc_init, we can use it here to reset all the fields. */ |
---|
| 231 | trc_init(); |
---|
| 232 | } |
---|
| 233 | |
---|
| 234 | #ifdef EXPORTKDUMPDEV |
---|
| 235 | static ssize_t kdump_read(struct file *fileP, char *bufP, size_t nBytes, loff_t *ppos) |
---|
| 236 | { |
---|
| 237 | int rc= -EINVAL; |
---|
| 238 | #if defined(GPFS_ARCH_X86_64) && LINUX_KERNEL_VERSION >= 2061600 |
---|
| 239 | /* rw_verify_area does not allow kernel addr range, |
---|
| 240 | so a read() will fail with EINVAL. |
---|
| 241 | We subtracted the base kernel addr is kdump.c and add back in here. */ |
---|
| 242 | unsigned long highBits = GPFS_KERNEL_OFFSET; |
---|
| 243 | #else |
---|
| 244 | unsigned long highBits = 0; |
---|
| 245 | #endif |
---|
| 246 | |
---|
| 247 | if (virt_addr_valid((unsigned long)*ppos + highBits)) |
---|
| 248 | if (copy_to_user(bufP, (void *)((unsigned long)*ppos + highBits), nBytes)==0) |
---|
| 249 | rc=nBytes; |
---|
| 250 | return((ssize_t)rc); |
---|
| 251 | } |
---|
| 252 | |
---|
| 253 | static int kdump_open(struct inode *inodeP, struct file *fileP) |
---|
| 254 | { |
---|
| 255 | MY_MODULE_INCREMENT(); |
---|
| 256 | fileP->f_pos=0; |
---|
| 257 | return 0; |
---|
| 258 | } |
---|
| 259 | |
---|
| 260 | static int kdump_close(struct inode *inodeP, struct file *fileP) |
---|
| 261 | { |
---|
| 262 | MY_MODULE_DECREMENT(); |
---|
| 263 | return 0; |
---|
| 264 | } |
---|
| 265 | |
---|
| 266 | static loff_t kdump_lseek(struct file *fileP, loff_t offset, int orgin) |
---|
| 267 | { |
---|
| 268 | loff_t rc; |
---|
| 269 | |
---|
| 270 | if (orgin != 0) |
---|
| 271 | return(-EAGAIN); |
---|
| 272 | |
---|
| 273 | fileP->f_pos = offset; |
---|
| 274 | |
---|
| 275 | return(offset); |
---|
| 276 | } |
---|
| 277 | #endif |
---|
| 278 | /* The device open operation. The first open is initiated by the trace daemon, |
---|
| 279 | * and comes after registration. It results in the allocation of the trace |
---|
| 280 | * buffers, and identifying the trace daemon (so it can be signalled when |
---|
| 281 | * buffers are ready to be read). */ |
---|
| 282 | static int trc_open(struct inode *inodeP, struct file *fileP) |
---|
| 283 | { |
---|
| 284 | int rc = 0; |
---|
| 285 | |
---|
| 286 | /* Serialize multiple opens and prevent state changes */ |
---|
| 287 | down(&readLock); |
---|
| 288 | |
---|
| 289 | /* Only the daemon opens the device O_RDWR, and only does so when turning |
---|
| 290 | * trace on. |
---|
| 291 | */ |
---|
| 292 | if ((fileP->f_flags & O_ACCMODE) == O_RDWR) |
---|
| 293 | { |
---|
| 294 | if (lxthe.state != trc_initialized) |
---|
| 295 | { |
---|
| 296 | rc = -EALREADY; |
---|
| 297 | goto exit; |
---|
| 298 | } |
---|
| 299 | |
---|
| 300 | /* The first open (lxtrace on) requires initialization of the header. */ |
---|
| 301 | lxthe.minor = MINOR(inodeP->i_rdev); |
---|
| 302 | |
---|
| 303 | /* Only supporting one such device */ |
---|
| 304 | if (lxthe.minor > 0) |
---|
| 305 | { |
---|
| 306 | rc = -ENODEV; |
---|
| 307 | goto exit; |
---|
| 308 | } |
---|
| 309 | |
---|
| 310 | /* If not configured otherwise, use the default buffer size. */ |
---|
| 311 | if (lxthe.bufSize == 0) |
---|
| 312 | lxthe.bufSize = DEF_TRC_BUFSIZE; |
---|
| 313 | |
---|
| 314 | /* Allocate dual trace buffers (new records go into the write buffer, |
---|
| 315 | * and the daemon reads (via trc_read) from the read buffer). */ |
---|
| 316 | lxthe.writeBuf.beginP = vmalloc(2*lxthe.bufSize); |
---|
| 317 | if (!lxthe.writeBuf.beginP) |
---|
| 318 | { |
---|
| 319 | rc = -ENOMEM; |
---|
| 320 | goto exit; |
---|
| 321 | } |
---|
| 322 | lxthe.writeBuf.endP = lxthe.writeBuf.beginP + lxthe.bufSize - 1; |
---|
| 323 | lxthe.writeBuf.nextP = lxthe.writeBuf.beginP; |
---|
| 324 | lxthe.writeBuf.dirtyP = lxthe.writeBuf.beginP; |
---|
| 325 | lxthe.writeBuf.bufNum = 1; |
---|
| 326 | |
---|
| 327 | lxthe.readBuf.beginP = lxthe.writeBuf.beginP + lxthe.bufSize; |
---|
| 328 | lxthe.readBuf.endP = lxthe.readBuf.beginP + lxthe.bufSize - 1; |
---|
| 329 | lxthe.readBuf.nextP = lxthe.readBuf.beginP; |
---|
| 330 | lxthe.readBuf.dirtyP = lxthe.readBuf.beginP; |
---|
| 331 | lxthe.readBuf.bufNum = 2; |
---|
| 332 | |
---|
| 333 | /* Save pointer to the daemon task information, and mark the |
---|
| 334 | * device open. */ |
---|
| 335 | taskP = current; |
---|
| 336 | setTraceState(trc_opened); |
---|
| 337 | |
---|
| 338 | /* Since threads that handle VM page-outs also do traces, set flag so |
---|
| 339 | that we will not get blocked waiting to allocate pages. Otherwise a |
---|
| 340 | deadlock could occur if the page-out thread was waiting for us to |
---|
| 341 | empty the trace buffer, and we are waiting for the page-out thread |
---|
| 342 | to free some pages. */ |
---|
| 343 | current->flags |= PF_MEMALLOC; |
---|
| 344 | } |
---|
| 345 | |
---|
| 346 | /* Applications must open the trace device O_WRONLY. These opens do not |
---|
| 347 | * require any processing. If the daemon has turned tracing on, the open |
---|
| 348 | * is allowed and subsequent write() calls will be handled. If the daemon |
---|
| 349 | * has NOT turned tracing on, the application open will be granted, but |
---|
| 350 | * subsequent write() calls will NOOP |
---|
| 351 | * until the daemon turns trace on (state == trac_active). */ |
---|
| 352 | |
---|
| 353 | else if ((fileP->f_flags & O_ACCMODE) != O_WRONLY) |
---|
| 354 | { |
---|
| 355 | /* After "trace on", subsequent trace control commands open O_RDONLY. */ |
---|
| 356 | if (lxthe.state != trc_active) |
---|
| 357 | { |
---|
| 358 | rc = -EALREADY; |
---|
| 359 | goto exit; |
---|
| 360 | } |
---|
| 361 | } |
---|
| 362 | |
---|
| 363 | lxthe.nOpens += 1; |
---|
| 364 | |
---|
| 365 | MY_MODULE_INCREMENT(); |
---|
| 366 | |
---|
| 367 | exit: |
---|
| 368 | up(&readLock); |
---|
| 369 | return rc; |
---|
| 370 | } |
---|
| 371 | |
---|
| 372 | /* The device read operation. This is to be used only by the trace daemon to |
---|
| 373 | * retrieve trace buffers for the purposes of writing to the output file. */ |
---|
| 374 | static ssize_t trc_read(struct file *fileP, char *bufP, size_t nBytes, loff_t *ppos) |
---|
| 375 | { |
---|
| 376 | ssize_t nDone = 0; |
---|
| 377 | ssize_t nReady; |
---|
| 378 | |
---|
| 379 | /* All access to lxthe.readBuf is protected via the readLock. */ |
---|
| 380 | down(&readLock); |
---|
| 381 | |
---|
| 382 | /* Only the trace daemon is allowed to read. */ |
---|
| 383 | if (taskP && taskP->pid != current->pid) |
---|
| 384 | { |
---|
| 385 | nDone = -EPERM; |
---|
| 386 | goto exit; |
---|
| 387 | } |
---|
| 388 | |
---|
| 389 | /* See if there is data waiting to be processed by the daemon. Read is |
---|
| 390 | * allowed here during normal operation (trc_active) and as trace is |
---|
| 391 | * terminating (this to get the last group of buffered records). */ |
---|
| 392 | if ((lxthe.state == trc_active || lxthe.state == trc_stopped) && |
---|
| 393 | atomic_read(&readFull)) |
---|
| 394 | { |
---|
| 395 | /* Be sure that we don't access readBuf until after readFull is set */ |
---|
| 396 | rmb(); |
---|
| 397 | |
---|
| 398 | if (IS_DIRTY(lxthe.readBuf)) |
---|
| 399 | { |
---|
| 400 | /* Make sure that the caller's buffer is large enough to hold |
---|
| 401 | * what we have. */ |
---|
| 402 | nReady = lxthe.readBuf.nextP - lxthe.readBuf.beginP; |
---|
| 403 | if (nReady > nBytes) |
---|
| 404 | { |
---|
| 405 | nDone = -EFBIG; |
---|
| 406 | goto exit; |
---|
| 407 | } |
---|
| 408 | |
---|
| 409 | if (copy_to_user(bufP, lxthe.readBuf.dirtyP, nReady)) |
---|
| 410 | { |
---|
| 411 | nDone = -EFAULT; |
---|
| 412 | goto exit; |
---|
| 413 | } |
---|
| 414 | |
---|
| 415 | nDone = nReady; |
---|
| 416 | lxthe.readBuf.dirtyP += nDone; |
---|
| 417 | } |
---|
| 418 | |
---|
| 419 | /* Allow writers to use readBuf */ |
---|
| 420 | if (!IS_DIRTY(lxthe.readBuf)) |
---|
| 421 | { |
---|
| 422 | wmb(); |
---|
| 423 | atomic_set(&readFull, 0); |
---|
| 424 | wake_up(&daemonWaitQ); |
---|
| 425 | } |
---|
| 426 | } |
---|
| 427 | |
---|
| 428 | exit: |
---|
| 429 | up(&readLock); |
---|
| 430 | return nDone; |
---|
| 431 | } |
---|
| 432 | |
---|
| 433 | static void my_send_sig_info(int mySig, struct siginfo * sigData, struct task_struct *taskP) |
---|
| 434 | { |
---|
| 435 | struct task_struct *g, *tsP; |
---|
| 436 | // read_lock(&tasklist_lock); |
---|
| 437 | rcu_read_lock(); |
---|
| 438 | |
---|
| 439 | DO_EACH_THREAD(g,tsP) |
---|
| 440 | { |
---|
| 441 | if (tsP == taskP) |
---|
| 442 | { |
---|
| 443 | send_sig_info(mySig, sigData, tsP); |
---|
| 444 | break; |
---|
| 445 | } |
---|
| 446 | } WHILE_EACH_THREAD(g,tsP); |
---|
| 447 | // read_unlock(&tasklist_lock); |
---|
| 448 | rcu_read_unlock(); |
---|
| 449 | } |
---|
| 450 | |
---|
| 451 | /* Internal routine to schedule i/o of the trace buffer. |
---|
| 452 | NOTE that this routine is called while holding the writeLock. */ |
---|
| 453 | static void trc_signal_io() |
---|
| 454 | { |
---|
| 455 | trcdev_buffer_t tBuf; |
---|
| 456 | struct siginfo sigData; |
---|
| 457 | |
---|
| 458 | /* DBGASSERT(atomic_read(&readFull) == 0); */ |
---|
| 459 | |
---|
| 460 | /* Switch the buffers. We don't have to worry about trc_read looking at |
---|
| 461 | readBuf while we do this because it always verify that readFull is |
---|
| 462 | non-zero before accessing readBuf. */ |
---|
| 463 | rmb(); |
---|
| 464 | tBuf = lxthe.readBuf; |
---|
| 465 | lxthe.readBuf = lxthe.writeBuf; |
---|
| 466 | lxthe.writeBuf= tBuf; |
---|
| 467 | lxthe.nBuffers++; /* Number of buffers filled */ |
---|
| 468 | |
---|
| 469 | /* Mark readBuf full so that writers won't switch to it until after the |
---|
| 470 | daemon has processed it. Do write memory barrier to ensure that |
---|
| 471 | our change to readBuf makes it to memory before readFull is set. */ |
---|
| 472 | wmb(); |
---|
| 473 | atomic_set(&readFull, 1); |
---|
| 474 | |
---|
| 475 | /* Reset the (new) writeBuf to a clean state */ |
---|
| 476 | lxthe.writeBuf.dirtyP = lxthe.writeBuf.nextP = lxthe.writeBuf.beginP; |
---|
| 477 | |
---|
| 478 | /* Debug for 471707: Since all trace records begin with a header the |
---|
| 479 | * very first thing in a dirty buffer should be a valid header. If |
---|
| 480 | * this is not the case, print debug information to the log file. |
---|
| 481 | */ |
---|
| 482 | if (IS_DIRTY(tBuf) && (((trc_header_t *)tBuf.beginP)->trMagic != LXTRACE_MAGIC)) |
---|
| 483 | { |
---|
| 484 | printk("trc_signal_io: bad trace buffer! trMagic 0x%X\n", |
---|
| 485 | ((trc_header_t *)tBuf.beginP)->trMagic); |
---|
| 486 | printk("trc_signal_io: begin 0x%x end 0x%X next 0x%X dirty 0x%X isDirty %d\n", |
---|
| 487 | (trc_header_t *)tBuf.beginP, (trc_header_t *)tBuf.endP, |
---|
| 488 | (trc_header_t *)tBuf.nextP, (trc_header_t *)tBuf.dirtyP, |
---|
| 489 | IS_DIRTY(tBuf)); |
---|
| 490 | } |
---|
| 491 | |
---|
| 492 | /* Signal daemon that there is a trace buffer to be read and processed. */ |
---|
| 493 | sigData.si_signo = SIGIO; |
---|
| 494 | sigData.si_errno = 0; |
---|
| 495 | sigData.si_code = SI_KERNEL; |
---|
| 496 | my_send_sig_info(SIGIO, &sigData, taskP); |
---|
| 497 | } |
---|
| 498 | |
---|
| 499 | |
---|
| 500 | /* Return true if trace writer will have to wait for daemon to make room |
---|
| 501 | for a trace record of the specified length. */ |
---|
| 502 | static int writeMustBlock(int len) |
---|
| 503 | { |
---|
| 504 | return (len > lxthe.writeBuf.endP - lxthe.writeBuf.nextP + 1 && |
---|
| 505 | atomic_read(&readFull) && lxthe.state == trc_active); |
---|
| 506 | } |
---|
| 507 | |
---|
| 508 | |
---|
| 509 | /* Reserves space for a trace record whose data header plus arguments |
---|
| 510 | totals nBytes. Returns 0 if space was reserved for the trace record, |
---|
| 511 | or non-zero if space could not be found because the buffer is full |
---|
| 512 | and cantBlock is set, or because tracing is not enabled. If space |
---|
| 513 | was reserved successfully, builds a trace header, then copies the |
---|
| 514 | trace header and the given data header into the trace device buffer. |
---|
| 515 | After returning from this routine, trc_append_record should be called |
---|
| 516 | 0 or more times, then trc_end_record. Returns with writeLock held iff |
---|
| 517 | 0 was returned. */ |
---|
| 518 | static int |
---|
| 519 | trc_start_record(trc_datahdr_t * hdrP, size_t nBytes, int cantBlock) |
---|
| 520 | { |
---|
| 521 | int trclen; |
---|
| 522 | trc_header_t tHdr; |
---|
| 523 | |
---|
| 524 | /* Construct the trace record header */ |
---|
| 525 | tHdr.trMagic = LXTRACE_MAGIC; |
---|
| 526 | |
---|
| 527 | /* Wait to set the timestamp (tHdr.trTime) until after all serialization. |
---|
| 528 | * When multiple threads call trace, they don't necessarily get the |
---|
| 529 | * writeLock in FIFO order so setting the timestamp here can result |
---|
| 530 | * in times going backwards in the trace file. |
---|
| 531 | * do_gettimeofday(&tHdr.trTime); |
---|
| 532 | */ |
---|
| 533 | |
---|
| 534 | tHdr.trProcess = current->pid; |
---|
| 535 | tHdr.trCPU = smp_processor_id(); |
---|
| 536 | tHdr.trLength = nBytes; |
---|
| 537 | trclen = nBytes + sizeof(trc_header_t); |
---|
| 538 | |
---|
| 539 | /* Serialize access to writeBuf */ |
---|
| 540 | spin_lock(&writeLock); |
---|
| 541 | |
---|
| 542 | /* If this trace record will not fit in the write buffer, and the read |
---|
| 543 | buffer is still full, and trace is active, then we must wait for the |
---|
| 544 | daemon to empty the read buffer. */ |
---|
| 545 | if (writeMustBlock(trclen)) |
---|
| 546 | { |
---|
| 547 | if (cantBlock) |
---|
| 548 | { |
---|
| 549 | lxthe.nLost++; |
---|
| 550 | spin_unlock(&writeLock); |
---|
| 551 | return 1; |
---|
| 552 | } |
---|
| 553 | |
---|
| 554 | if (lxthe.state != trc_active) |
---|
| 555 | { |
---|
| 556 | spin_unlock(&writeLock); |
---|
| 557 | return 1; |
---|
| 558 | } |
---|
| 559 | |
---|
| 560 | lxthe.nWaits++; |
---|
| 561 | do |
---|
| 562 | { |
---|
| 563 | spin_unlock(&writeLock); |
---|
| 564 | wait_event(daemonWaitQ, !writeMustBlock(trclen)); |
---|
| 565 | spin_lock(&writeLock); |
---|
| 566 | } while (writeMustBlock(trclen)); |
---|
| 567 | } |
---|
| 568 | |
---|
| 569 | if (lxthe.state != trc_active) |
---|
| 570 | { |
---|
| 571 | spin_unlock(&writeLock); |
---|
| 572 | return 1; |
---|
| 573 | } |
---|
| 574 | |
---|
| 575 | /* Will the trace record fit into the write buffer? If not, then we can |
---|
| 576 | swap with the read buffer which must be empty at this point (else we |
---|
| 577 | wouldn't have come out of previous wait loop). */ |
---|
| 578 | if (trclen > lxthe.writeBuf.endP - lxthe.writeBuf.nextP + 1) |
---|
| 579 | { |
---|
| 580 | /* Swap write buffer with read buffer and signal daemon to process the |
---|
| 581 | data. */ |
---|
| 582 | trc_signal_io(); |
---|
| 583 | |
---|
| 584 | /* This could be an assert, since write buffer must be empty now. */ |
---|
| 585 | if (trclen > lxthe.writeBuf.endP - lxthe.writeBuf.nextP + 1) |
---|
| 586 | { |
---|
| 587 | spin_unlock(&writeLock); |
---|
| 588 | return 1; |
---|
| 589 | } |
---|
| 590 | } |
---|
| 591 | |
---|
| 592 | /* Now that there isn't anything to block the writing of this |
---|
| 593 | * record, insert the timestamp. |
---|
| 594 | */ |
---|
| 595 | do_gettimeofday(&tHdr.trTime); |
---|
| 596 | |
---|
| 597 | /* Insert the header stamp into the buffer ahead of the application |
---|
| 598 | record and remember its location. */ |
---|
| 599 | lxthe.tHdrP = (trc_header_t *)lxthe.writeBuf.nextP; |
---|
| 600 | tHdr.trBuf = lxthe.writeBuf.bufNum; |
---|
| 601 | memcpy(lxthe.writeBuf.nextP, &tHdr, sizeof(tHdr)); |
---|
| 602 | lxthe.writeBuf.nextP += sizeof(tHdr); |
---|
| 603 | |
---|
| 604 | /* Move the application trace header directly into the trace buffer and |
---|
| 605 | remember its location */ |
---|
| 606 | lxthe.hdrP = (trc_datahdr_t *)lxthe.writeBuf.nextP; |
---|
| 607 | memcpy(lxthe.writeBuf.nextP, hdrP, sizeof(*hdrP)); |
---|
| 608 | lxthe.writeBuf.nextP += sizeof(*hdrP); |
---|
| 609 | |
---|
| 610 | /* Return with writeLock still held */ |
---|
| 611 | return 0; |
---|
| 612 | } |
---|
| 613 | |
---|
| 614 | |
---|
| 615 | /* Append a portion of a trace record to the write buffer. Must have |
---|
| 616 | previously called trc_start_record. */ |
---|
| 617 | static void trc_append_record(const void* bufP, size_t nBytes) |
---|
| 618 | { |
---|
| 619 | /* Move the application trace record directly into the trace buffer */ |
---|
| 620 | memcpy(lxthe.writeBuf.nextP, bufP, nBytes); |
---|
| 621 | lxthe.writeBuf.nextP += nBytes; |
---|
| 622 | } |
---|
| 623 | |
---|
| 624 | |
---|
| 625 | /* Finish a trace record */ |
---|
| 626 | static void trc_end_record() |
---|
| 627 | { |
---|
| 628 | spin_unlock(&writeLock); |
---|
| 629 | } |
---|
| 630 | |
---|
| 631 | |
---|
| 632 | static ssize_t |
---|
| 633 | trc_write(struct file *fileP, const char *bufP, size_t nBytes, loff_t *posP) |
---|
| 634 | { |
---|
| 635 | struct trcRec tr; |
---|
| 636 | int rc; |
---|
| 637 | int dataBytes; |
---|
| 638 | |
---|
| 639 | /* Copy trace record from user address space */ |
---|
| 640 | if (nBytes < 4 || nBytes > LXTRACE_MAX_DATA) |
---|
| 641 | return -EINVAL; |
---|
| 642 | if (copy_from_user(&tr, bufP, nBytes)) |
---|
| 643 | return -EFAULT; |
---|
| 644 | |
---|
| 645 | /* The beginning of the trace record is a hookword number. Verify that |
---|
| 646 | the specified hookword is being traced. If not, return as if the trace |
---|
| 647 | was successful. */ |
---|
| 648 | if (isTraced(tr.hdr.trHook)) |
---|
| 649 | { |
---|
| 650 | rc = trc_start_record(&tr.hdr, nBytes, false); |
---|
| 651 | if (rc == 0) |
---|
| 652 | { |
---|
| 653 | dataBytes = nBytes - sizeof(tr.hdr); |
---|
| 654 | if (dataBytes > 0) |
---|
| 655 | trc_append_record(&tr.data[0], dataBytes); |
---|
| 656 | trc_end_record(); |
---|
| 657 | } |
---|
| 658 | } |
---|
| 659 | return nBytes; |
---|
| 660 | } |
---|
| 661 | |
---|
| 662 | /* Before close, a sync of the trace device will flush the records |
---|
| 663 | * still in the read buffer (even though it might not be full). A |
---|
| 664 | * close without this call could result in the loss of these records. |
---|
| 665 | * Must not call fsync from daemon termination signal handler because |
---|
| 666 | * that could deadlock if a SIGIO is still pending. |
---|
| 667 | */ |
---|
| 668 | static int |
---|
| 669 | trc_fsync_internal(struct file* fileP, struct dentry* dP, int datasync) |
---|
| 670 | { |
---|
| 671 | spin_lock(&writeLock); |
---|
| 672 | |
---|
| 673 | /* If read buffer is still full, wait for daemon to process it */ |
---|
| 674 | while (atomic_read(&readFull) && |
---|
| 675 | (lxthe.state == trc_active || lxthe.state == trc_stopped)) |
---|
| 676 | { |
---|
| 677 | spin_unlock(&writeLock); |
---|
| 678 | wait_event(daemonWaitQ, |
---|
| 679 | !(atomic_read(&readFull) && |
---|
| 680 | (lxthe.state == trc_active || lxthe.state == trc_stopped))); |
---|
| 681 | spin_lock(&writeLock); |
---|
| 682 | } |
---|
| 683 | |
---|
| 684 | /* Allow fsync during normal operation OR after ioctl(trc_end) has |
---|
| 685 | disabled further trace writing (allows an fsync before close to |
---|
| 686 | flush the buffered records). */ |
---|
| 687 | if (lxthe.writeBuf.nextP != lxthe.writeBuf.beginP && |
---|
| 688 | (lxthe.state == trc_active || lxthe.state == trc_stopped)) |
---|
| 689 | trc_signal_io(); |
---|
| 690 | |
---|
| 691 | spin_unlock(&writeLock); |
---|
| 692 | return 0; |
---|
| 693 | } |
---|
| 694 | |
---|
| 695 | |
---|
| 696 | /* The externally visible version of trc_fsync_internal */ |
---|
| 697 | int trc_fsync() |
---|
| 698 | { |
---|
| 699 | return trc_fsync_internal(NULL, NULL, 0); |
---|
| 700 | } |
---|
| 701 | |
---|
| 702 | |
---|
| 703 | /* The device close operation. */ |
---|
| 704 | static int trc_close(struct inode *inodeP, struct file *fileP) |
---|
| 705 | { |
---|
| 706 | down(&readLock); |
---|
| 707 | |
---|
| 708 | /* The trace daemon only closes the device upon termination. */ |
---|
| 709 | if (taskP && taskP->pid == current->pid) |
---|
| 710 | { |
---|
| 711 | /* The final trace daemon close. Reset for subsequent use. */ |
---|
| 712 | setTraceState(trc_initialized); |
---|
| 713 | |
---|
| 714 | /* We don't really need writeLock here since writers won't do anything |
---|
| 715 | after state is set to trc_initialized, but it doesn't hurt. */ |
---|
| 716 | spin_lock(&writeLock); |
---|
| 717 | lxthe.nWaits = 0; |
---|
| 718 | lxthe.nBuffers = 0; |
---|
| 719 | lxthe.nLost = 0; |
---|
| 720 | spin_unlock(&writeLock); |
---|
| 721 | taskP = NULL; |
---|
| 722 | current->flags &= ~PF_MEMALLOC; |
---|
| 723 | |
---|
| 724 | /* Free the two trace buffers. */ |
---|
| 725 | if (lxthe.writeBuf.beginP) |
---|
| 726 | { |
---|
| 727 | vfree(MIN(lxthe.writeBuf.beginP, lxthe.readBuf.beginP)); |
---|
| 728 | |
---|
| 729 | lxthe.writeBuf.beginP = NULL; |
---|
| 730 | lxthe.writeBuf.endP = NULL; |
---|
| 731 | lxthe.writeBuf.nextP = NULL; |
---|
| 732 | lxthe.writeBuf.dirtyP = NULL; |
---|
| 733 | |
---|
| 734 | lxthe.readBuf = lxthe.writeBuf; |
---|
| 735 | } |
---|
| 736 | } |
---|
| 737 | |
---|
| 738 | lxthe.nOpens -= 1; |
---|
| 739 | |
---|
| 740 | MY_MODULE_DECREMENT(); |
---|
| 741 | |
---|
| 742 | up(&readLock); |
---|
| 743 | return 0; |
---|
| 744 | } |
---|
| 745 | |
---|
| 746 | |
---|
| 747 | /* ioctl op used to for low-level access to trace operation. */ |
---|
| 748 | static int trc_ioctl(struct inode *inodeP, struct file *fileP, |
---|
| 749 | unsigned int op, unsigned long kx_args) |
---|
| 750 | { |
---|
| 751 | int h, rc = 0; |
---|
| 752 | Boolean readLockHeld = false; |
---|
| 753 | struct kArgs args_cp; |
---|
| 754 | struct kArgs *args = (struct kArgs *)kx_args; |
---|
| 755 | char *p; |
---|
| 756 | char *newBufP; |
---|
| 757 | char *trc_dumpP; |
---|
| 758 | char *trc_nextP; |
---|
| 759 | struct siginfo sigData; |
---|
| 760 | int waitCount = 0; |
---|
| 761 | |
---|
| 762 | down(&readLock); |
---|
| 763 | readLockHeld = true; |
---|
| 764 | |
---|
| 765 | switch (op) |
---|
| 766 | { |
---|
| 767 | case trc_begin: |
---|
| 768 | if (lxthe.state == trc_active) |
---|
| 769 | { |
---|
| 770 | rc = -EALREADY; |
---|
| 771 | break; |
---|
| 772 | } |
---|
| 773 | if (lxthe.state != trc_opened) |
---|
| 774 | { |
---|
| 775 | rc = -EBADF; |
---|
| 776 | break; |
---|
| 777 | } |
---|
| 778 | setTraceState(trc_active); |
---|
| 779 | break; |
---|
| 780 | |
---|
| 781 | case trc_end: |
---|
| 782 | if (lxthe.state != trc_active) |
---|
| 783 | rc = -EBADF; |
---|
| 784 | else |
---|
| 785 | { |
---|
| 786 | setTraceState(trc_stopped); |
---|
| 787 | up(&readLock); |
---|
| 788 | readLockHeld = false; |
---|
| 789 | trc_fsync(); |
---|
| 790 | |
---|
| 791 | /* Signal the daemon to terminate. */ |
---|
| 792 | sigData.si_signo = SIGTERM; |
---|
| 793 | sigData.si_errno = 0; |
---|
| 794 | sigData.si_code = SI_KERNEL; |
---|
| 795 | my_send_sig_info(SIGTERM, &sigData, taskP); |
---|
| 796 | } |
---|
| 797 | |
---|
| 798 | /* Wait for lxtrace to terminate, but don't wait forever. |
---|
| 799 | At this point the signal has been delivered to lxtrace, |
---|
| 800 | but it may take some time for the process to exit. Since |
---|
| 801 | lxthe.state is changed from trc_stopped to trc_initialized |
---|
| 802 | in trc_close(), which is called when lxtrace exits, if we |
---|
| 803 | return control to the caller right away, there'd be a window |
---|
| 804 | when tracing has ostensibly been stopped, and it should be |
---|
| 805 | OK to start tracing again, but trying to do so would fail |
---|
| 806 | with EALREADY in trc_open because lxthe.state is not what |
---|
| 807 | the code expects. So we give lxtrace some time to terminate. |
---|
| 808 | Something could go seriously wrong, and lxtrace may get stuck, |
---|
| 809 | we don't wait forever. */ |
---|
| 810 | while (lxthe.state == trc_stopped && waitCount++ < 10) |
---|
| 811 | { |
---|
| 812 | current->state = TASK_INTERRUPTIBLE; |
---|
| 813 | schedule_timeout(100); |
---|
| 814 | } |
---|
| 815 | break; |
---|
| 816 | |
---|
| 817 | case trc_bufSize: |
---|
| 818 | |
---|
| 819 | /* The daemon may call ioctl to change the desired buffer size. |
---|
| 820 | On open, buffers of the default size are allocated. This call |
---|
| 821 | frees the current buffers (replacing them with new ones). Any |
---|
| 822 | trace records currently in the buffers will be lost. */ |
---|
| 823 | |
---|
| 824 | if (lxthe.state != trc_opened) |
---|
| 825 | { |
---|
| 826 | rc = -EPERM; |
---|
| 827 | break; |
---|
| 828 | } |
---|
| 829 | |
---|
| 830 | /* get the argument array */ |
---|
| 831 | if (copy_from_user(&args_cp, args, sizeof(args_cp))) |
---|
| 832 | { |
---|
| 833 | rc = -EFAULT; |
---|
| 834 | break; |
---|
| 835 | } |
---|
| 836 | |
---|
| 837 | /* Allocate the new (dual) trace buffers. |
---|
| 838 | * arg1 is the requested buffer size */ |
---|
| 839 | newBufP = vmalloc(2*args_cp.arg1); |
---|
| 840 | if (!newBufP) |
---|
| 841 | { |
---|
| 842 | rc = -ENOMEM; |
---|
| 843 | break; |
---|
| 844 | } |
---|
| 845 | |
---|
| 846 | /* Free the previous buffers. Since the state is currently |
---|
| 847 | * "trc_opened" and we are holding readLock, neither readers nor |
---|
| 848 | * writers can be using the buffers at this time. */ |
---|
| 849 | if (lxthe.writeBuf.beginP) |
---|
| 850 | vfree(MIN(lxthe.writeBuf.beginP, lxthe.readBuf.beginP)); |
---|
| 851 | |
---|
| 852 | lxthe.bufSize = args_cp.arg1; |
---|
| 853 | lxthe.writeBuf.beginP = newBufP; |
---|
| 854 | |
---|
| 855 | lxthe.writeBuf.endP = lxthe.writeBuf.beginP + lxthe.bufSize - 1; |
---|
| 856 | lxthe.writeBuf.nextP = lxthe.writeBuf.beginP; |
---|
| 857 | lxthe.writeBuf.dirtyP = lxthe.writeBuf.beginP; |
---|
| 858 | |
---|
| 859 | lxthe.readBuf.beginP = lxthe.writeBuf.beginP + lxthe.bufSize; |
---|
| 860 | lxthe.readBuf.endP = lxthe.readBuf.beginP + lxthe.bufSize - 1; |
---|
| 861 | lxthe.readBuf.nextP = lxthe.readBuf.beginP; |
---|
| 862 | lxthe.readBuf.dirtyP = lxthe.readBuf.beginP; |
---|
| 863 | break; |
---|
| 864 | |
---|
| 865 | case trc_dump: |
---|
| 866 | |
---|
| 867 | /* format trace header information and return to daemon */ |
---|
| 868 | trc_dumpP = vmalloc(LXTRACE_DUMP_SIZE); |
---|
| 869 | if (trc_dumpP == NULL) |
---|
| 870 | { |
---|
| 871 | rc = -ENOMEM; |
---|
| 872 | break; |
---|
| 873 | } |
---|
| 874 | |
---|
| 875 | if (copy_from_user(&args_cp, args, sizeof(args_cp))) |
---|
| 876 | { |
---|
| 877 | rc = -EFAULT; |
---|
| 878 | break; |
---|
| 879 | } |
---|
| 880 | |
---|
| 881 | /* Block writers so that we can look at writeBuf. */ |
---|
| 882 | spin_lock(&writeLock); |
---|
| 883 | |
---|
| 884 | /* Format the state information suitable for displaying by |
---|
| 885 | * the daemon. |
---|
| 886 | */ |
---|
| 887 | trc_nextP = trc_dumpP; |
---|
| 888 | sprintf(trc_nextP, "Trace Header Element: 0x%08X\n", &lxthe); |
---|
| 889 | trc_nextP += strlen(trc_nextP); |
---|
| 890 | |
---|
| 891 | /* Global information on device number, buffer sizes, |
---|
| 892 | * and lost records. |
---|
| 893 | */ |
---|
| 894 | sprintf(trc_nextP, " Major %d Minor %d bufSize 0x%X nOpens %d " |
---|
| 895 | "nBuffers %d nLost %d nWaits %d Daemon %d\n", |
---|
| 896 | lxthe.major, lxthe.minor, lxthe.bufSize, lxthe.nOpens, |
---|
| 897 | lxthe.nBuffers, lxthe.nLost, lxthe.nWaits, |
---|
| 898 | taskP ? taskP->pid: 0); |
---|
| 899 | trc_nextP += strlen(trc_nextP); |
---|
| 900 | |
---|
| 901 | sprintf(trc_nextP, "\n"); |
---|
| 902 | trc_nextP += strlen(trc_nextP); |
---|
| 903 | |
---|
| 904 | /* Append buffer information */ |
---|
| 905 | sprintf(trc_nextP, " writeBuf: beginP 0x%X endP 0x%X nextP 0x%X " |
---|
| 906 | "dirtyP 0x%X isDirty %d\n", |
---|
| 907 | lxthe.writeBuf.beginP, lxthe.writeBuf.endP, |
---|
| 908 | lxthe.writeBuf.nextP, lxthe.writeBuf.dirtyP, |
---|
| 909 | IS_DIRTY(lxthe.writeBuf)); |
---|
| 910 | trc_nextP += strlen(trc_nextP); |
---|
| 911 | |
---|
| 912 | sprintf(trc_nextP, " readBuf : beginP 0x%X endP 0x%X nextP 0x%X " |
---|
| 913 | "dirtyP 0x%X isDirty %d\n", |
---|
| 914 | lxthe.readBuf.beginP, lxthe.readBuf.endP, |
---|
| 915 | lxthe.readBuf.nextP, lxthe.readBuf.dirtyP, |
---|
| 916 | IS_DIRTY(lxthe.readBuf)); |
---|
| 917 | trc_nextP += strlen(trc_nextP); |
---|
| 918 | |
---|
| 919 | #if 0 |
---|
| 920 | /* verify dumpBuf size */ |
---|
| 921 | sprintf(trc_nextP, " dumpBuf size %d (used %d)\n", |
---|
| 922 | LXTRACE_DUMP_SIZE, (trc_nextP-trc_dumpP)); |
---|
| 923 | trc_nextP += strlen(trc_nextP); |
---|
| 924 | #endif |
---|
| 925 | spin_unlock(&writeLock); |
---|
| 926 | |
---|
| 927 | /* arg1 is the user buffer size, arg2 is the address of the buffer */ |
---|
| 928 | if (copy_to_user((char *)args_cp.arg2, trc_dumpP, |
---|
| 929 | MIN(strlen(trc_dumpP)+1, args_cp.arg1))) |
---|
| 930 | rc = -EFAULT; |
---|
| 931 | |
---|
| 932 | vfree(trc_dumpP); |
---|
| 933 | break; |
---|
| 934 | |
---|
| 935 | default: |
---|
| 936 | rc = -EINVAL; |
---|
| 937 | break; |
---|
| 938 | } |
---|
| 939 | |
---|
| 940 | if (readLockHeld) |
---|
| 941 | up(&readLock); |
---|
| 942 | |
---|
| 943 | return rc; |
---|
| 944 | } |
---|
| 945 | |
---|
| 946 | static struct file_operations trc_ops = |
---|
| 947 | { |
---|
| 948 | llseek: NULL, |
---|
| 949 | read: trc_read, /* read op allows the daemon to retrieve records */ |
---|
| 950 | write: trc_write, /* Trace points write to the device */ |
---|
| 951 | readdir: NULL, |
---|
| 952 | poll: NULL, |
---|
| 953 | ioctl: trc_ioctl, /* control op to change buffering or dump state */ |
---|
| 954 | mmap: NULL, |
---|
| 955 | open: trc_open, /* Prepare the device for tracing */ |
---|
| 956 | flush: NULL, |
---|
| 957 | release: trc_close, /* Terminate tracing and close the device */ |
---|
| 958 | fsync: trc_fsync_internal, /* Sync all buffered data to the daemon */ |
---|
| 959 | fasync: NULL, |
---|
| 960 | lock: NULL, |
---|
| 961 | aio_read: NULL, |
---|
| 962 | aio_write: NULL, |
---|
| 963 | }; |
---|
| 964 | |
---|
| 965 | #ifdef EXPORTKDUMPDEV |
---|
| 966 | static struct file_operations kdump_ops = |
---|
| 967 | { |
---|
| 968 | llseek: kdump_lseek, |
---|
| 969 | read: kdump_read, /* read op allows the daemon to retrieve records */ |
---|
| 970 | write: NULL, /* Trace points write to the device */ |
---|
| 971 | readdir: NULL, |
---|
| 972 | poll: NULL, |
---|
| 973 | ioctl: NULL, /* control op to change buffering or dump state */ |
---|
| 974 | mmap: NULL, |
---|
| 975 | open: kdump_open, /* Prepare the device for tracing */ |
---|
| 976 | flush: NULL, |
---|
| 977 | release: kdump_close, /* Terminate tracing and close the device */ |
---|
| 978 | fsync: NULL, /* Sync all buffered data to the daemon */ |
---|
| 979 | fasync: NULL, |
---|
| 980 | lock: NULL, |
---|
| 981 | aio_read: NULL, |
---|
| 982 | aio_write: NULL, |
---|
| 983 | }; |
---|
| 984 | #endif |
---|
| 985 | /* Register the trace device "/dev/trace" and save the major number in |
---|
| 986 | * the header |
---|
| 987 | */ |
---|
| 988 | static int trc_register() |
---|
| 989 | { |
---|
| 990 | int major = register_chrdev(0, "trace", &trc_ops); |
---|
| 991 | if (major < 0) |
---|
| 992 | return major; |
---|
| 993 | lxthe.major = major; |
---|
| 994 | #ifdef EXPORTKDUMPDEV |
---|
| 995 | major_kdump = register_chrdev(0, "kdump", &kdump_ops); |
---|
| 996 | #endif |
---|
| 997 | return 0; |
---|
| 998 | } |
---|
| 999 | |
---|
| 1000 | /* Unregister the trace device */ |
---|
| 1001 | static void trc_unregister() |
---|
| 1002 | { |
---|
| 1003 | unregister_chrdev(lxthe.major, "trace"); |
---|
| 1004 | lxthe.major = 0; |
---|
| 1005 | #ifdef EXPORTKDUMPDEV |
---|
| 1006 | if (major_kdump >= 0) |
---|
| 1007 | unregister_chrdev(major_kdump, "kdump"); |
---|
| 1008 | major_kdump = 0; |
---|
| 1009 | #endif |
---|
| 1010 | |
---|
| 1011 | } |
---|
| 1012 | |
---|
| 1013 | |
---|
| 1014 | static void |
---|
| 1015 | _STraceArgs(int* trRecLenP, int* stringLenP, int nArgs, int pos, va_list listP) |
---|
| 1016 | { |
---|
| 1017 | int dataLen; |
---|
| 1018 | int i; |
---|
| 1019 | ARGTYPE tmpint; |
---|
| 1020 | char *s; |
---|
| 1021 | int stringLen; |
---|
| 1022 | int stringPadLen; |
---|
| 1023 | |
---|
| 1024 | dataLen = 0; |
---|
| 1025 | |
---|
| 1026 | /* Handle argument lists that include a string parameter */ |
---|
| 1027 | if (pos >= 0 && pos < LXTRACE_MAX_FORMAT_SUBS) |
---|
| 1028 | { |
---|
| 1029 | /* Items (if any) preceeding the string argument */ |
---|
| 1030 | for (i = 0; i < pos; i++) |
---|
| 1031 | { |
---|
| 1032 | tmpint = va_arg(listP, ARGTYPE); |
---|
| 1033 | trc_append_record(&tmpint, ARGLEN); |
---|
| 1034 | dataLen += ARGLEN; |
---|
| 1035 | } |
---|
| 1036 | |
---|
| 1037 | /* Copy the string, making sure it does not overflow the buffer */ |
---|
| 1038 | s = va_arg(listP, char*); |
---|
| 1039 | if (s < (char*)4096) /* bad address */ |
---|
| 1040 | { |
---|
| 1041 | printk("_STrace: bad address 0x%X hook 0x%X\n", s, lxthe.hdrP->trHook); |
---|
| 1042 | s = "<bad address>"; |
---|
| 1043 | } |
---|
| 1044 | stringLen = strlen(s); |
---|
| 1045 | stringLen = MIN(stringLen, |
---|
| 1046 | LXTRACE_MAX_DATA - sizeof(trc_datahdr_t) - |
---|
| 1047 | (nArgs*ARGLEN) - 1 - (ARGLEN-1)); |
---|
| 1048 | trc_append_record(s, stringLen); |
---|
| 1049 | stringPadLen = ARGLEN - (stringLen%ARGLEN); |
---|
| 1050 | trc_append_record(stringPadding, stringPadLen); |
---|
| 1051 | *stringLenP = stringLen + stringPadLen; |
---|
| 1052 | dataLen += stringLen + stringPadLen; |
---|
| 1053 | |
---|
| 1054 | /* Append items following string argument */ |
---|
| 1055 | for (i = pos; i < nArgs; i++) |
---|
| 1056 | { |
---|
| 1057 | tmpint = va_arg(listP, ARGTYPE); |
---|
| 1058 | trc_append_record(&tmpint, ARGLEN); |
---|
| 1059 | dataLen += ARGLEN; |
---|
| 1060 | } |
---|
| 1061 | } |
---|
| 1062 | else /* !IS_SFORMAT */ |
---|
| 1063 | { |
---|
| 1064 | /* Place the fixed parameters in the temporary trace buffer */ |
---|
| 1065 | for (i = 0; i < nArgs; i++) |
---|
| 1066 | { |
---|
| 1067 | tmpint = va_arg(listP, ARGTYPE); |
---|
| 1068 | trc_append_record(&tmpint, ARGLEN); |
---|
| 1069 | dataLen += ARGLEN; |
---|
| 1070 | } |
---|
| 1071 | *stringLenP = 0; |
---|
| 1072 | } |
---|
| 1073 | |
---|
| 1074 | /* Append the float argument */ |
---|
| 1075 | if (pos == _TR_FORMAT_F) |
---|
| 1076 | { |
---|
| 1077 | /* Although the argument is really a double, don't tell the compiler, |
---|
| 1078 | so that it will not generate code using floating point hardware |
---|
| 1079 | that is not supposed to be used in the kernel. */ |
---|
| 1080 | /* double tmpdbl = va_arg(listP, double); */ |
---|
| 1081 | unsigned long long tmpdbl = va_arg(listP, unsigned long long); |
---|
| 1082 | trc_append_record(&tmpdbl, sizeof(tmpdbl)); |
---|
| 1083 | dataLen += sizeof(tmpdbl); |
---|
| 1084 | } |
---|
| 1085 | |
---|
| 1086 | *trRecLenP = sizeof(trc_datahdr_t) + dataLen; |
---|
| 1087 | /* DBGASSERT(*trRecLenP <= LXTRACE_MAX_DATA); */ |
---|
| 1088 | } |
---|
| 1089 | |
---|
| 1090 | |
---|
| 1091 | void _STraceNB(int hookword, int nArgs, int pos, ...) |
---|
| 1092 | { |
---|
| 1093 | trc_datahdr_t hdr; |
---|
| 1094 | int recLen; |
---|
| 1095 | int rc; |
---|
| 1096 | va_list listP; |
---|
| 1097 | int trRecLen; |
---|
| 1098 | int stringLen; |
---|
| 1099 | |
---|
| 1100 | /* Trace calls from interrupt level are not supported. If anybody needs |
---|
| 1101 | them, changing writeLock to use spin_lock_irqsave should be all that |
---|
| 1102 | is needed to allow non-blocking traces to work. */ |
---|
| 1103 | if (in_interrupt()) |
---|
| 1104 | return; |
---|
| 1105 | |
---|
| 1106 | if (!isTraced(hookword)) |
---|
| 1107 | return; |
---|
| 1108 | |
---|
| 1109 | /* Test for trace formats that aren't supported yet */ |
---|
| 1110 | if ((pos == _TR_FORMAT_I) && (nArgs > LXTRACE_MAX_FORMAT_SUBS)) |
---|
| 1111 | { |
---|
| 1112 | #ifdef DBGASSERTS |
---|
| 1113 | printk("_STrace: too many arguments (hook %X)\n", hookword); |
---|
| 1114 | #endif /* DBGASSERTS */ |
---|
| 1115 | return; |
---|
| 1116 | } |
---|
| 1117 | |
---|
| 1118 | /* Build a data header and append it to the trace file. If there is a |
---|
| 1119 | string, the length is not yet known, so use the maximum. It will be |
---|
| 1120 | patched to the correct value later. */ |
---|
| 1121 | hdr.trHook = hookword; |
---|
| 1122 | hdr.trNArgs = nArgs; |
---|
| 1123 | hdr.trSPos = pos; |
---|
| 1124 | hdr.trSLen = 0; /* invalid if there is a string; fix below */ |
---|
| 1125 | if (pos >= 0 && pos < LXTRACE_MAX_FORMAT_SUBS) |
---|
| 1126 | recLen = LXTRACE_MAX_DATA; |
---|
| 1127 | else |
---|
| 1128 | { |
---|
| 1129 | recLen = sizeof(hdr) + nArgs*ARGLEN; |
---|
| 1130 | if (pos == _TR_FORMAT_F) |
---|
| 1131 | recLen += ARGLEN; |
---|
| 1132 | } |
---|
| 1133 | rc = trc_start_record(&hdr, recLen, true); |
---|
| 1134 | |
---|
| 1135 | /* If the header was successfully written, collect arguments directly into |
---|
| 1136 | the trace buffer */ |
---|
| 1137 | if (rc == 0) |
---|
| 1138 | { |
---|
| 1139 | va_start(listP, pos); |
---|
| 1140 | _STraceArgs(&trRecLen, &stringLen, nArgs, pos, listP); |
---|
| 1141 | va_end(listP); |
---|
| 1142 | |
---|
| 1143 | /* Patch the string and record lengths now that the string has been |
---|
| 1144 | copied */ |
---|
| 1145 | lxthe.hdrP->trSLen = stringLen; |
---|
| 1146 | lxthe.tHdrP->trLength = trRecLen; |
---|
| 1147 | |
---|
| 1148 | /* Trace record complete */ |
---|
| 1149 | trc_end_record(); |
---|
| 1150 | } |
---|
| 1151 | } |
---|
| 1152 | |
---|
| 1153 | void _STrace(int hookword, int nArgs, int pos, ...) |
---|
| 1154 | { |
---|
| 1155 | trc_datahdr_t hdr; |
---|
| 1156 | int recLen; |
---|
| 1157 | int rc; |
---|
| 1158 | va_list listP; |
---|
| 1159 | int trRecLen; |
---|
| 1160 | int stringLen; |
---|
| 1161 | |
---|
| 1162 | /* Trace calls from interrupt level are not supported. If anybody needs |
---|
| 1163 | them, changing writeLock to use spin_lock_irqsave should be all that |
---|
| 1164 | is needed to allow non-blocking traces to work. */ |
---|
| 1165 | if (in_interrupt()) |
---|
| 1166 | return; |
---|
| 1167 | |
---|
| 1168 | if (!isTraced(hookword)) |
---|
| 1169 | return; |
---|
| 1170 | |
---|
| 1171 | /* Test for trace formats that aren't supported yet */ |
---|
| 1172 | if ((pos == _TR_FORMAT_I) && (nArgs > LXTRACE_MAX_FORMAT_SUBS)) |
---|
| 1173 | { |
---|
| 1174 | #ifdef DBGASSERTS |
---|
| 1175 | printk("_STrace: too many arguments (hook %X)\n", hookword); |
---|
| 1176 | #endif /* DBGASSERTS */ |
---|
| 1177 | return; |
---|
| 1178 | } |
---|
| 1179 | |
---|
| 1180 | /* Build a data header and append it to the trace file. If there is a |
---|
| 1181 | string, the length is not yet known, so use the maximum. It will be |
---|
| 1182 | patched to the correct value later. */ |
---|
| 1183 | hdr.trHook = hookword; |
---|
| 1184 | hdr.trNArgs = nArgs; |
---|
| 1185 | hdr.trSPos = pos; |
---|
| 1186 | hdr.trSLen = 0; /* invalid if there is a string; fix below */ |
---|
| 1187 | if (pos >= 0 && pos < LXTRACE_MAX_FORMAT_SUBS) |
---|
| 1188 | recLen = LXTRACE_MAX_DATA; |
---|
| 1189 | else |
---|
| 1190 | { |
---|
| 1191 | recLen = sizeof(hdr) + nArgs*ARGLEN; |
---|
| 1192 | if (pos == _TR_FORMAT_F) |
---|
| 1193 | recLen += ARGLEN; |
---|
| 1194 | } |
---|
| 1195 | rc = trc_start_record(&hdr, recLen, false); |
---|
| 1196 | |
---|
| 1197 | /* If the header was successfully written, collect arguments directly into |
---|
| 1198 | the trace buffer */ |
---|
| 1199 | if (rc == 0) |
---|
| 1200 | { |
---|
| 1201 | va_start(listP, pos); |
---|
| 1202 | _STraceArgs(&trRecLen, &stringLen, nArgs, pos, listP); |
---|
| 1203 | va_end(listP); |
---|
| 1204 | |
---|
| 1205 | /* Patch the string and record lengths now that the string has been |
---|
| 1206 | copied */ |
---|
| 1207 | lxthe.hdrP->trSLen = stringLen; |
---|
| 1208 | lxthe.tHdrP->trLength = trRecLen; |
---|
| 1209 | |
---|
| 1210 | /* Trace record complete */ |
---|
| 1211 | trc_end_record(); |
---|
| 1212 | } |
---|
| 1213 | } |
---|
| 1214 | |
---|
| 1215 | void _XTraceNB(int hookword, char *fmt, ...) |
---|
| 1216 | { |
---|
| 1217 | trc_datahdr_t hdr; |
---|
| 1218 | int rc; |
---|
| 1219 | va_list vargs; |
---|
| 1220 | int stringLen; |
---|
| 1221 | |
---|
| 1222 | /* Trace calls from interrupt level are not supported. If anybody needs |
---|
| 1223 | them, changing writeLock to use spin_lock_irqsave should be all that |
---|
| 1224 | is needed to allow non-blocking traces to work. */ |
---|
| 1225 | if (in_interrupt()) |
---|
| 1226 | return; |
---|
| 1227 | |
---|
| 1228 | if (!isTraced(hookword)) |
---|
| 1229 | return; |
---|
| 1230 | |
---|
| 1231 | /* Build a data header and append it to the trace file. Since the length |
---|
| 1232 | is not yet known, use the maximum. It will be patched to the correct |
---|
| 1233 | value later. */ |
---|
| 1234 | hdr.trHook = hookword; |
---|
| 1235 | hdr.trNArgs = 0; |
---|
| 1236 | hdr.trSPos = _TR_FORMAT_X; |
---|
| 1237 | hdr.trSLen = -1; /* invalid; fix below */ |
---|
| 1238 | rc = trc_start_record(&hdr, LXTRACE_MAX_DATA, true); |
---|
| 1239 | |
---|
| 1240 | /* If the header was successfully written, format the string directly |
---|
| 1241 | into the trace buffer */ |
---|
| 1242 | if (rc == 0) |
---|
| 1243 | { |
---|
| 1244 | va_start(vargs, fmt); |
---|
| 1245 | stringLen = vsnprintf(lxthe.writeBuf.nextP, |
---|
| 1246 | LXTRACE_MAX_DATA-sizeof(trc_datahdr_t), fmt, vargs) + 1; |
---|
| 1247 | va_end(vargs); |
---|
| 1248 | if (stringLen > LXTRACE_MAX_DATA-sizeof(trc_datahdr_t)) |
---|
| 1249 | { |
---|
| 1250 | printk("_XTraceNB: argument too long. len=%d max=%d hook=0x%X\n", |
---|
| 1251 | stringLen, LXTRACE_MAX_DATA-sizeof(trc_datahdr_t)-1, hookword); |
---|
| 1252 | stringLen = LXTRACE_MAX_DATA-sizeof(trc_datahdr_t); |
---|
| 1253 | } |
---|
| 1254 | |
---|
| 1255 | /* Patch the string and record lengths now that vsnprintf has calculated |
---|
| 1256 | the length that it formatted */ |
---|
| 1257 | lxthe.hdrP->trSLen = ((stringLen+ARGLEN-1)/ARGLEN)*ARGLEN; |
---|
| 1258 | lxthe.tHdrP->trLength = sizeof(hdr) + lxthe.hdrP->trSLen; |
---|
| 1259 | |
---|
| 1260 | /* Advance pointer into trace buffer by the length of the string just |
---|
| 1261 | appended */ |
---|
| 1262 | lxthe.writeBuf.nextP += lxthe.hdrP->trSLen; |
---|
| 1263 | |
---|
| 1264 | /* Trace record complete */ |
---|
| 1265 | trc_end_record(); |
---|
| 1266 | } |
---|
| 1267 | } |
---|
| 1268 | |
---|
| 1269 | void _XTrace(int hookword, char *fmt, ...) |
---|
| 1270 | { |
---|
| 1271 | trc_datahdr_t hdr; |
---|
| 1272 | int rc; |
---|
| 1273 | va_list vargs; |
---|
| 1274 | int stringLen; |
---|
| 1275 | |
---|
| 1276 | /* Trace calls from interrupt level are not supported. If anybody needs |
---|
| 1277 | them, changing writeLock to use spin_lock_irqsave should be all that |
---|
| 1278 | is needed to allow non-blocking traces to work. */ |
---|
| 1279 | if (in_interrupt()) |
---|
| 1280 | return; |
---|
| 1281 | |
---|
| 1282 | if (!isTraced(hookword)) |
---|
| 1283 | return; |
---|
| 1284 | |
---|
| 1285 | /* Build a data header and append it to the trace file. Since the length |
---|
| 1286 | is not yet known, use the maximum. It will be patched to the correct |
---|
| 1287 | value later. */ |
---|
| 1288 | hdr.trHook = hookword; |
---|
| 1289 | hdr.trNArgs = 0; |
---|
| 1290 | hdr.trSPos = _TR_FORMAT_X; |
---|
| 1291 | hdr.trSLen = -1; /* invalid; fix below */ |
---|
| 1292 | rc = trc_start_record(&hdr, LXTRACE_MAX_DATA, false); |
---|
| 1293 | |
---|
| 1294 | /* If the header was successfully written, format the string directly |
---|
| 1295 | into the trace buffer */ |
---|
| 1296 | if (rc == 0) |
---|
| 1297 | { |
---|
| 1298 | va_start(vargs, fmt); |
---|
| 1299 | stringLen = vsnprintf(lxthe.writeBuf.nextP, |
---|
| 1300 | LXTRACE_MAX_DATA-sizeof(trc_datahdr_t), fmt, vargs) + 1; |
---|
| 1301 | va_end(vargs); |
---|
| 1302 | if (stringLen > LXTRACE_MAX_DATA-sizeof(trc_datahdr_t)) |
---|
| 1303 | { |
---|
| 1304 | printk("_XTrace: argument too long. len=%d max=%d hook=0x%X\n", |
---|
| 1305 | stringLen, LXTRACE_MAX_DATA-sizeof(trc_datahdr_t)-1, hookword); |
---|
| 1306 | stringLen = LXTRACE_MAX_DATA-sizeof(trc_datahdr_t); |
---|
| 1307 | } |
---|
| 1308 | |
---|
| 1309 | /* Patch the string and record lengths now that vsnprintf has calculated |
---|
| 1310 | the length that it formatted */ |
---|
| 1311 | lxthe.hdrP->trSLen = ((stringLen+ARGLEN-1)/ARGLEN)*ARGLEN; |
---|
| 1312 | lxthe.tHdrP->trLength = sizeof(hdr) + lxthe.hdrP->trSLen; |
---|
| 1313 | |
---|
| 1314 | /* Advance pointer into trace buffer by the length of the string just |
---|
| 1315 | appended */ |
---|
| 1316 | lxthe.writeBuf.nextP += lxthe.hdrP->trSLen; |
---|
| 1317 | |
---|
| 1318 | /* Trace record complete */ |
---|
| 1319 | trc_end_record(); |
---|
| 1320 | } |
---|
| 1321 | } |
---|
| 1322 | |
---|
| 1323 | /* Module initialization */ |
---|
| 1324 | MY_INIT_FUNCTION() |
---|
| 1325 | { |
---|
| 1326 | trc_init(); |
---|
| 1327 | return trc_register(); |
---|
| 1328 | } |
---|
| 1329 | |
---|
| 1330 | MY_EXIT_FUNCTION() |
---|
| 1331 | { |
---|
| 1332 | trc_unregister(); |
---|
| 1333 | trc_term(); |
---|
| 1334 | } |
---|
| 1335 | |
---|
| 1336 | DEFINE_MODULE_INIT(); |
---|
| 1337 | DEFINE_MODULE_EXIT(); |
---|
| 1338 | |
---|
| 1339 | #endif /* GPFS_PRINTF */ |
---|
| 1340 | #endif /* KTRACE */ |
---|