1 | /*************************************************************************** |
---|
2 | * |
---|
3 | * Copyright (C) 2001 International Business Machines |
---|
4 | * All rights reserved. |
---|
5 | * |
---|
6 | * This file is part of the GPFS mmfslinux kernel module. |
---|
7 | * |
---|
8 | * Redistribution and use in source and binary forms, with or without |
---|
9 | * modification, are permitted provided that the following conditions |
---|
10 | * are met: |
---|
11 | * |
---|
12 | * 1. Redistributions of source code must retain the above copyright notice, |
---|
13 | * this list of conditions and the following disclaimer. |
---|
14 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
15 | * notice, this list of conditions and the following disclaimer in the |
---|
16 | * documentation and/or other materials provided with the distribution. |
---|
17 | * 3. The name of the author may not be used to endorse or promote products |
---|
18 | * derived from this software without specific prior written |
---|
19 | * permission. |
---|
20 | * |
---|
21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
---|
22 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
---|
23 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
---|
24 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
---|
26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
---|
27 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
---|
28 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
---|
29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
---|
30 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
31 | * |
---|
32 | *************************************************************************** */ |
---|
33 | /* @(#)25 1.65.1.6 src/avs/fs/mmfs/ts/kernext/gpl-linux/tracedev.c, mmfs, avs_rgpfs24, rgpfs24s011a 3/14/07 10:57:03 */ |
---|
34 | |
---|
35 | /************************************************************************** |
---|
36 | * |
---|
37 | * Loadable kernel module that implements the trace device. |
---|
38 | * |
---|
39 | **************************************************************************/ |
---|
40 | |
---|
41 | #ifndef GPFS_PRINTF |
---|
42 | |
---|
43 | #ifndef __KERNEL__ |
---|
44 | # define __KERNEL__ |
---|
45 | #endif |
---|
46 | |
---|
47 | #ifndef KBUILD_MODNAME |
---|
48 | #define KBUILD_MODNAME tracedev |
---|
49 | #endif |
---|
50 | |
---|
51 | /* If trace is built into kernel, pick up GPFS flag definitions from a file |
---|
52 | rather than requiring them to be defined on the command line. */ |
---|
53 | #ifndef MODULE |
---|
54 | /* #include <linux/ktrace.h> */ |
---|
55 | #endif |
---|
56 | |
---|
57 | #include <Shark-gpl.h> |
---|
58 | |
---|
59 | #include <linux/version.h> |
---|
60 | #include <linux/kernel.h> |
---|
61 | #include <linux/module.h> |
---|
62 | #include <linux/errno.h> |
---|
63 | #include <linux/slab.h> |
---|
64 | #include <linux/smp_lock.h> |
---|
65 | #include <linux/vmalloc.h> |
---|
66 | #include <linux/string.h> |
---|
67 | |
---|
68 | #if LINUX_KERNEL_VERSION > 2060900 |
---|
69 | #include <linux/hardirq.h> /* in_interrupt */ |
---|
70 | #else |
---|
71 | #include <asm/hardirq.h> /* in_interrupt */ |
---|
72 | #endif |
---|
73 | #include <asm/uaccess.h> /* copy_to/from_user */ |
---|
74 | |
---|
75 | |
---|
76 | #include <stdarg.h> |
---|
77 | #include <Trace.h> |
---|
78 | #include <lxtrace.h> |
---|
79 | #include <verdep.h> |
---|
80 | |
---|
81 | #ifdef __64BIT__ |
---|
82 | # define Int64 long long |
---|
83 | # define ARGLEN 8 |
---|
84 | # define ARGTYPE Int64 |
---|
85 | #else |
---|
86 | # define Int32 int |
---|
87 | # define ARGLEN 4 |
---|
88 | # define ARGTYPE Int32 |
---|
89 | #endif /* __64BIT__ */ |
---|
90 | |
---|
91 | char stringPadding[8]; |
---|
92 | |
---|
93 | #if LINUX_KERNEL_VERSION > 2060900 || \ |
---|
94 | (LINUX_KERNEL_VERSION > 2060000 && (defined(GPFS_ARCH_PPC64) || defined(GPFS_ARCH_X86_64))) |
---|
95 | #define EXPORTKDUMPDEV |
---|
96 | #endif |
---|
97 | |
---|
98 | #ifdef EXPORTKDUMPDEV |
---|
99 | static int major_kdump = -1; |
---|
100 | #endif |
---|
101 | |
---|
102 | #if defined(MODULE) && (LINUX_KERNEL_VERSION >= 2040900) |
---|
103 | MODULE_LICENSE("GPL"); |
---|
104 | MODULE_DESCRIPTION ("GPFS portability layer (tracing module)"); |
---|
105 | MODULE_AUTHOR ("IBM <gpfs@us.ibm.com>"); |
---|
106 | #endif /* MODULE */ |
---|
107 | |
---|
108 | /* If trace is built into kernel, then this is a dummy module */ |
---|
109 | #ifndef KTRACE |
---|
110 | |
---|
111 | /* the daemon's task structure (for signal) */ |
---|
112 | static struct task_struct *taskP; |
---|
113 | |
---|
114 | /* The writeLock serializes trace writers. It should be avoided by |
---|
115 | * other operations in order to allow the writers to continue unimpeded. |
---|
116 | * The writeLock must be held when accessing the following fields in the |
---|
117 | * trace header element: nWaits, nBuffers, nLost, writeBuf |
---|
118 | */ |
---|
119 | static spinlock_t writeLock; |
---|
120 | |
---|
121 | /* The readLock serializes trace operations, as well as most other access |
---|
122 | * to the trace header element. Whenever both readLock and writeLock are |
---|
123 | * required, readLock is always to be acquired first. |
---|
124 | */ |
---|
125 | static struct semaphore readLock; |
---|
126 | |
---|
127 | /* The readFull flag synchronizes access to readBuf by readers and writers. |
---|
128 | Writers set this after filling readBuf and wait for this to be clear |
---|
129 | before filling readBuf. Readers use this flag to tell if readBuf has |
---|
130 | any data and clear this after processing. Using an atomic variable |
---|
131 | allows steady-state tracing to be done without readers needing to |
---|
132 | acquire a lock that would block writers. Note that atomic operations |
---|
133 | do not generally act as memory barriers, so explicit barrier calls may |
---|
134 | be necessary before or after accessing readFull. Spinlocks act as |
---|
135 | partial memory barriers, so explicit barriers can be avoided in some |
---|
136 | cases where spinlocks are used. */ |
---|
137 | static atomic_t readFull; |
---|
138 | |
---|
139 | /* Trace Header Element - THE anchor for the trace state */ |
---|
140 | static trcdev_header_t lxthe; |
---|
141 | static wait_queue_head_t daemonWaitQ; |
---|
142 | |
---|
143 | /* Export pointers to internal data structures for debugging */ |
---|
144 | struct |
---|
145 | { |
---|
146 | trcdev_header_t *lxtheP; |
---|
147 | wait_queue_head_t *daemonWaitQP; |
---|
148 | struct semaphore *readLockP; |
---|
149 | spinlock_t *writeLockP; |
---|
150 | } TraceVarAddrs = { &lxthe, &daemonWaitQ, &readLock, &writeLock }; |
---|
151 | |
---|
152 | /* A trcdev_buffer is dirty if there is any data in it (nextP != beginP) AND the |
---|
153 | * dirtyP has not yet been advanced (by trc_read) past the data (to nextP) */ |
---|
154 | #define IS_DIRTY(b) (b.nextP != b.beginP && b.nextP != b.dirtyP) |
---|
155 | |
---|
156 | /* A trace record passed from a user thread consists of a data header |
---|
157 | followed by the marshalled arguments */ |
---|
158 | struct trcRec |
---|
159 | { |
---|
160 | trc_datahdr_t hdr; |
---|
161 | char data[LXTRACE_MAX_DATA-sizeof(trc_datahdr_t)]; |
---|
162 | }; |
---|
163 | |
---|
164 | |
---|
165 | /* Updating state information requires the writeLock in addition to |
---|
166 | * readLock. The readLock is widely held where the trace header is |
---|
167 | * manipulated, but for the brief period of updating the state field, get the |
---|
168 | * writeLock as well. |
---|
169 | */ |
---|
170 | static void setTraceState(trcdev_state_t newState) |
---|
171 | { |
---|
172 | spin_lock(&writeLock); |
---|
173 | lxthe.state = newState; |
---|
174 | wake_up(&daemonWaitQ); |
---|
175 | spin_unlock(&writeLock); |
---|
176 | } |
---|
177 | |
---|
178 | |
---|
179 | /* Return true if the specified hookword is currently being traced. */ |
---|
180 | static Boolean isTraced(uint hw) |
---|
181 | { |
---|
182 | return lxthe.state == trc_active;; |
---|
183 | } |
---|
184 | |
---|
185 | |
---|
186 | /* Construct the static trace header element ("lxthe"). |
---|
187 | * trc_open will allocate buffers and set the appropriate values. */ |
---|
188 | static void trc_init() |
---|
189 | { |
---|
190 | spin_lock_init(&writeLock); |
---|
191 | sema_init(&readLock, 1); |
---|
192 | |
---|
193 | /* Note: Locks are not needed here. There better not be any other |
---|
194 | threads trying to access lxthe at this point. If there were, then |
---|
195 | what would happen if a thread tried to acquire the locks a few |
---|
196 | instructions earlier, before we initialized the locks? */ |
---|
197 | |
---|
198 | lxthe.major = 0; /* dynamic assignment (by register_chrdev in trc_register) */ |
---|
199 | lxthe.minor = 0; |
---|
200 | lxthe.bufSize = 0; |
---|
201 | lxthe.nOpens = 0; |
---|
202 | lxthe.nWaits = 0; |
---|
203 | lxthe.nBuffers = 0; |
---|
204 | lxthe.nLost = 0; |
---|
205 | atomic_set(&readFull, 0); |
---|
206 | taskP = NULL; |
---|
207 | init_waitqueue_head(&daemonWaitQ); |
---|
208 | |
---|
209 | lxthe.writeBuf.beginP = NULL; |
---|
210 | lxthe.writeBuf.endP = NULL; |
---|
211 | lxthe.writeBuf.nextP = NULL; |
---|
212 | lxthe.writeBuf.dirtyP = NULL; |
---|
213 | |
---|
214 | lxthe.readBuf = lxthe.writeBuf; |
---|
215 | |
---|
216 | lxthe.state = trc_initialized; |
---|
217 | } |
---|
218 | |
---|
219 | /* Destroy the static trace header element (lxthe) */ |
---|
220 | static void trc_term() |
---|
221 | { |
---|
222 | /* Note: Locks are not needed here. We're about to re-initialize, so |
---|
223 | if anyone is still using lxthe at this point, we would clobber them. */ |
---|
224 | |
---|
225 | /* The two buffers are allocated together. Free them both here. */ |
---|
226 | if (lxthe.writeBuf.beginP) |
---|
227 | vfree(MIN(lxthe.writeBuf.beginP, lxthe.readBuf.beginP)); |
---|
228 | |
---|
229 | /* (re)initialize all fields. Rather than copy all the stuff that happens |
---|
230 | * in trc_init, we can use it here to reset all the fields. */ |
---|
231 | trc_init(); |
---|
232 | } |
---|
233 | |
---|
234 | #ifdef EXPORTKDUMPDEV |
---|
235 | static ssize_t kdump_read(struct file *fileP, char *bufP, size_t nBytes, loff_t *ppos) |
---|
236 | { |
---|
237 | int rc= -EINVAL; |
---|
238 | #if defined(GPFS_ARCH_X86_64) && LINUX_KERNEL_VERSION >= 2061600 |
---|
239 | /* rw_verify_area does not allow kernel addr range, |
---|
240 | so a read() will fail with EINVAL. |
---|
241 | We subtracted the base kernel addr is kdump.c and add back in here. */ |
---|
242 | unsigned long highBits = GPFS_KERNEL_OFFSET; |
---|
243 | #else |
---|
244 | unsigned long highBits = 0; |
---|
245 | #endif |
---|
246 | |
---|
247 | if (virt_addr_valid((unsigned long)*ppos + highBits)) |
---|
248 | if (copy_to_user(bufP, (void *)((unsigned long)*ppos + highBits), nBytes)==0) |
---|
249 | rc=nBytes; |
---|
250 | return((ssize_t)rc); |
---|
251 | } |
---|
252 | |
---|
253 | static int kdump_open(struct inode *inodeP, struct file *fileP) |
---|
254 | { |
---|
255 | MY_MODULE_INCREMENT(); |
---|
256 | fileP->f_pos=0; |
---|
257 | return 0; |
---|
258 | } |
---|
259 | |
---|
260 | static int kdump_close(struct inode *inodeP, struct file *fileP) |
---|
261 | { |
---|
262 | MY_MODULE_DECREMENT(); |
---|
263 | return 0; |
---|
264 | } |
---|
265 | |
---|
266 | static loff_t kdump_lseek(struct file *fileP, loff_t offset, int orgin) |
---|
267 | { |
---|
268 | loff_t rc; |
---|
269 | |
---|
270 | if (orgin != 0) |
---|
271 | return(-EAGAIN); |
---|
272 | |
---|
273 | fileP->f_pos = offset; |
---|
274 | |
---|
275 | return(offset); |
---|
276 | } |
---|
277 | #endif |
---|
278 | /* The device open operation. The first open is initiated by the trace daemon, |
---|
279 | * and comes after registration. It results in the allocation of the trace |
---|
280 | * buffers, and identifying the trace daemon (so it can be signalled when |
---|
281 | * buffers are ready to be read). */ |
---|
282 | static int trc_open(struct inode *inodeP, struct file *fileP) |
---|
283 | { |
---|
284 | int rc = 0; |
---|
285 | |
---|
286 | /* Serialize multiple opens and prevent state changes */ |
---|
287 | down(&readLock); |
---|
288 | |
---|
289 | /* Only the daemon opens the device O_RDWR, and only does so when turning |
---|
290 | * trace on. |
---|
291 | */ |
---|
292 | if ((fileP->f_flags & O_ACCMODE) == O_RDWR) |
---|
293 | { |
---|
294 | if (lxthe.state != trc_initialized) |
---|
295 | { |
---|
296 | rc = -EALREADY; |
---|
297 | goto exit; |
---|
298 | } |
---|
299 | |
---|
300 | /* The first open (lxtrace on) requires initialization of the header. */ |
---|
301 | lxthe.minor = MINOR(inodeP->i_rdev); |
---|
302 | |
---|
303 | /* Only supporting one such device */ |
---|
304 | if (lxthe.minor > 0) |
---|
305 | { |
---|
306 | rc = -ENODEV; |
---|
307 | goto exit; |
---|
308 | } |
---|
309 | |
---|
310 | /* If not configured otherwise, use the default buffer size. */ |
---|
311 | if (lxthe.bufSize == 0) |
---|
312 | lxthe.bufSize = DEF_TRC_BUFSIZE; |
---|
313 | |
---|
314 | /* Allocate dual trace buffers (new records go into the write buffer, |
---|
315 | * and the daemon reads (via trc_read) from the read buffer). */ |
---|
316 | lxthe.writeBuf.beginP = vmalloc(2*lxthe.bufSize); |
---|
317 | if (!lxthe.writeBuf.beginP) |
---|
318 | { |
---|
319 | rc = -ENOMEM; |
---|
320 | goto exit; |
---|
321 | } |
---|
322 | lxthe.writeBuf.endP = lxthe.writeBuf.beginP + lxthe.bufSize - 1; |
---|
323 | lxthe.writeBuf.nextP = lxthe.writeBuf.beginP; |
---|
324 | lxthe.writeBuf.dirtyP = lxthe.writeBuf.beginP; |
---|
325 | lxthe.writeBuf.bufNum = 1; |
---|
326 | |
---|
327 | lxthe.readBuf.beginP = lxthe.writeBuf.beginP + lxthe.bufSize; |
---|
328 | lxthe.readBuf.endP = lxthe.readBuf.beginP + lxthe.bufSize - 1; |
---|
329 | lxthe.readBuf.nextP = lxthe.readBuf.beginP; |
---|
330 | lxthe.readBuf.dirtyP = lxthe.readBuf.beginP; |
---|
331 | lxthe.readBuf.bufNum = 2; |
---|
332 | |
---|
333 | /* Save pointer to the daemon task information, and mark the |
---|
334 | * device open. */ |
---|
335 | taskP = current; |
---|
336 | setTraceState(trc_opened); |
---|
337 | |
---|
338 | /* Since threads that handle VM page-outs also do traces, set flag so |
---|
339 | that we will not get blocked waiting to allocate pages. Otherwise a |
---|
340 | deadlock could occur if the page-out thread was waiting for us to |
---|
341 | empty the trace buffer, and we are waiting for the page-out thread |
---|
342 | to free some pages. */ |
---|
343 | current->flags |= PF_MEMALLOC; |
---|
344 | } |
---|
345 | |
---|
346 | /* Applications must open the trace device O_WRONLY. These opens do not |
---|
347 | * require any processing. If the daemon has turned tracing on, the open |
---|
348 | * is allowed and subsequent write() calls will be handled. If the daemon |
---|
349 | * has NOT turned tracing on, the application open will be granted, but |
---|
350 | * subsequent write() calls will NOOP |
---|
351 | * until the daemon turns trace on (state == trac_active). */ |
---|
352 | |
---|
353 | else if ((fileP->f_flags & O_ACCMODE) != O_WRONLY) |
---|
354 | { |
---|
355 | /* After "trace on", subsequent trace control commands open O_RDONLY. */ |
---|
356 | if (lxthe.state != trc_active) |
---|
357 | { |
---|
358 | rc = -EALREADY; |
---|
359 | goto exit; |
---|
360 | } |
---|
361 | } |
---|
362 | |
---|
363 | lxthe.nOpens += 1; |
---|
364 | |
---|
365 | MY_MODULE_INCREMENT(); |
---|
366 | |
---|
367 | exit: |
---|
368 | up(&readLock); |
---|
369 | return rc; |
---|
370 | } |
---|
371 | |
---|
372 | /* The device read operation. This is to be used only by the trace daemon to |
---|
373 | * retrieve trace buffers for the purposes of writing to the output file. */ |
---|
374 | static ssize_t trc_read(struct file *fileP, char *bufP, size_t nBytes, loff_t *ppos) |
---|
375 | { |
---|
376 | ssize_t nDone = 0; |
---|
377 | ssize_t nReady; |
---|
378 | |
---|
379 | /* All access to lxthe.readBuf is protected via the readLock. */ |
---|
380 | down(&readLock); |
---|
381 | |
---|
382 | /* Only the trace daemon is allowed to read. */ |
---|
383 | if (taskP && taskP->pid != current->pid) |
---|
384 | { |
---|
385 | nDone = -EPERM; |
---|
386 | goto exit; |
---|
387 | } |
---|
388 | |
---|
389 | /* See if there is data waiting to be processed by the daemon. Read is |
---|
390 | * allowed here during normal operation (trc_active) and as trace is |
---|
391 | * terminating (this to get the last group of buffered records). */ |
---|
392 | if ((lxthe.state == trc_active || lxthe.state == trc_stopped) && |
---|
393 | atomic_read(&readFull)) |
---|
394 | { |
---|
395 | /* Be sure that we don't access readBuf until after readFull is set */ |
---|
396 | rmb(); |
---|
397 | |
---|
398 | if (IS_DIRTY(lxthe.readBuf)) |
---|
399 | { |
---|
400 | /* Make sure that the caller's buffer is large enough to hold |
---|
401 | * what we have. */ |
---|
402 | nReady = lxthe.readBuf.nextP - lxthe.readBuf.beginP; |
---|
403 | if (nReady > nBytes) |
---|
404 | { |
---|
405 | nDone = -EFBIG; |
---|
406 | goto exit; |
---|
407 | } |
---|
408 | |
---|
409 | if (copy_to_user(bufP, lxthe.readBuf.dirtyP, nReady)) |
---|
410 | { |
---|
411 | nDone = -EFAULT; |
---|
412 | goto exit; |
---|
413 | } |
---|
414 | |
---|
415 | nDone = nReady; |
---|
416 | lxthe.readBuf.dirtyP += nDone; |
---|
417 | } |
---|
418 | |
---|
419 | /* Allow writers to use readBuf */ |
---|
420 | if (!IS_DIRTY(lxthe.readBuf)) |
---|
421 | { |
---|
422 | wmb(); |
---|
423 | atomic_set(&readFull, 0); |
---|
424 | wake_up(&daemonWaitQ); |
---|
425 | } |
---|
426 | } |
---|
427 | |
---|
428 | exit: |
---|
429 | up(&readLock); |
---|
430 | return nDone; |
---|
431 | } |
---|
432 | |
---|
433 | static void my_send_sig_info(int mySig, struct siginfo * sigData, struct task_struct *taskP) |
---|
434 | { |
---|
435 | struct task_struct *g, *tsP; |
---|
436 | // read_lock(&tasklist_lock); |
---|
437 | rcu_read_lock(); |
---|
438 | |
---|
439 | DO_EACH_THREAD(g,tsP) |
---|
440 | { |
---|
441 | if (tsP == taskP) |
---|
442 | { |
---|
443 | send_sig_info(mySig, sigData, tsP); |
---|
444 | break; |
---|
445 | } |
---|
446 | } WHILE_EACH_THREAD(g,tsP); |
---|
447 | // read_unlock(&tasklist_lock); |
---|
448 | rcu_read_unlock(); |
---|
449 | } |
---|
450 | |
---|
451 | /* Internal routine to schedule i/o of the trace buffer. |
---|
452 | NOTE that this routine is called while holding the writeLock. */ |
---|
453 | static void trc_signal_io() |
---|
454 | { |
---|
455 | trcdev_buffer_t tBuf; |
---|
456 | struct siginfo sigData; |
---|
457 | |
---|
458 | /* DBGASSERT(atomic_read(&readFull) == 0); */ |
---|
459 | |
---|
460 | /* Switch the buffers. We don't have to worry about trc_read looking at |
---|
461 | readBuf while we do this because it always verify that readFull is |
---|
462 | non-zero before accessing readBuf. */ |
---|
463 | rmb(); |
---|
464 | tBuf = lxthe.readBuf; |
---|
465 | lxthe.readBuf = lxthe.writeBuf; |
---|
466 | lxthe.writeBuf= tBuf; |
---|
467 | lxthe.nBuffers++; /* Number of buffers filled */ |
---|
468 | |
---|
469 | /* Mark readBuf full so that writers won't switch to it until after the |
---|
470 | daemon has processed it. Do write memory barrier to ensure that |
---|
471 | our change to readBuf makes it to memory before readFull is set. */ |
---|
472 | wmb(); |
---|
473 | atomic_set(&readFull, 1); |
---|
474 | |
---|
475 | /* Reset the (new) writeBuf to a clean state */ |
---|
476 | lxthe.writeBuf.dirtyP = lxthe.writeBuf.nextP = lxthe.writeBuf.beginP; |
---|
477 | |
---|
478 | /* Debug for 471707: Since all trace records begin with a header the |
---|
479 | * very first thing in a dirty buffer should be a valid header. If |
---|
480 | * this is not the case, print debug information to the log file. |
---|
481 | */ |
---|
482 | if (IS_DIRTY(tBuf) && (((trc_header_t *)tBuf.beginP)->trMagic != LXTRACE_MAGIC)) |
---|
483 | { |
---|
484 | printk("trc_signal_io: bad trace buffer! trMagic 0x%X\n", |
---|
485 | ((trc_header_t *)tBuf.beginP)->trMagic); |
---|
486 | printk("trc_signal_io: begin 0x%x end 0x%X next 0x%X dirty 0x%X isDirty %d\n", |
---|
487 | (trc_header_t *)tBuf.beginP, (trc_header_t *)tBuf.endP, |
---|
488 | (trc_header_t *)tBuf.nextP, (trc_header_t *)tBuf.dirtyP, |
---|
489 | IS_DIRTY(tBuf)); |
---|
490 | } |
---|
491 | |
---|
492 | /* Signal daemon that there is a trace buffer to be read and processed. */ |
---|
493 | sigData.si_signo = SIGIO; |
---|
494 | sigData.si_errno = 0; |
---|
495 | sigData.si_code = SI_KERNEL; |
---|
496 | my_send_sig_info(SIGIO, &sigData, taskP); |
---|
497 | } |
---|
498 | |
---|
499 | |
---|
500 | /* Return true if trace writer will have to wait for daemon to make room |
---|
501 | for a trace record of the specified length. */ |
---|
502 | static int writeMustBlock(int len) |
---|
503 | { |
---|
504 | return (len > lxthe.writeBuf.endP - lxthe.writeBuf.nextP + 1 && |
---|
505 | atomic_read(&readFull) && lxthe.state == trc_active); |
---|
506 | } |
---|
507 | |
---|
508 | |
---|
509 | /* Reserves space for a trace record whose data header plus arguments |
---|
510 | totals nBytes. Returns 0 if space was reserved for the trace record, |
---|
511 | or non-zero if space could not be found because the buffer is full |
---|
512 | and cantBlock is set, or because tracing is not enabled. If space |
---|
513 | was reserved successfully, builds a trace header, then copies the |
---|
514 | trace header and the given data header into the trace device buffer. |
---|
515 | After returning from this routine, trc_append_record should be called |
---|
516 | 0 or more times, then trc_end_record. Returns with writeLock held iff |
---|
517 | 0 was returned. */ |
---|
518 | static int |
---|
519 | trc_start_record(trc_datahdr_t * hdrP, size_t nBytes, int cantBlock) |
---|
520 | { |
---|
521 | int trclen; |
---|
522 | trc_header_t tHdr; |
---|
523 | |
---|
524 | /* Construct the trace record header */ |
---|
525 | tHdr.trMagic = LXTRACE_MAGIC; |
---|
526 | |
---|
527 | /* Wait to set the timestamp (tHdr.trTime) until after all serialization. |
---|
528 | * When multiple threads call trace, they don't necessarily get the |
---|
529 | * writeLock in FIFO order so setting the timestamp here can result |
---|
530 | * in times going backwards in the trace file. |
---|
531 | * do_gettimeofday(&tHdr.trTime); |
---|
532 | */ |
---|
533 | |
---|
534 | tHdr.trProcess = current->pid; |
---|
535 | tHdr.trCPU = smp_processor_id(); |
---|
536 | tHdr.trLength = nBytes; |
---|
537 | trclen = nBytes + sizeof(trc_header_t); |
---|
538 | |
---|
539 | /* Serialize access to writeBuf */ |
---|
540 | spin_lock(&writeLock); |
---|
541 | |
---|
542 | /* If this trace record will not fit in the write buffer, and the read |
---|
543 | buffer is still full, and trace is active, then we must wait for the |
---|
544 | daemon to empty the read buffer. */ |
---|
545 | if (writeMustBlock(trclen)) |
---|
546 | { |
---|
547 | if (cantBlock) |
---|
548 | { |
---|
549 | lxthe.nLost++; |
---|
550 | spin_unlock(&writeLock); |
---|
551 | return 1; |
---|
552 | } |
---|
553 | |
---|
554 | if (lxthe.state != trc_active) |
---|
555 | { |
---|
556 | spin_unlock(&writeLock); |
---|
557 | return 1; |
---|
558 | } |
---|
559 | |
---|
560 | lxthe.nWaits++; |
---|
561 | do |
---|
562 | { |
---|
563 | spin_unlock(&writeLock); |
---|
564 | wait_event(daemonWaitQ, !writeMustBlock(trclen)); |
---|
565 | spin_lock(&writeLock); |
---|
566 | } while (writeMustBlock(trclen)); |
---|
567 | } |
---|
568 | |
---|
569 | if (lxthe.state != trc_active) |
---|
570 | { |
---|
571 | spin_unlock(&writeLock); |
---|
572 | return 1; |
---|
573 | } |
---|
574 | |
---|
575 | /* Will the trace record fit into the write buffer? If not, then we can |
---|
576 | swap with the read buffer which must be empty at this point (else we |
---|
577 | wouldn't have come out of previous wait loop). */ |
---|
578 | if (trclen > lxthe.writeBuf.endP - lxthe.writeBuf.nextP + 1) |
---|
579 | { |
---|
580 | /* Swap write buffer with read buffer and signal daemon to process the |
---|
581 | data. */ |
---|
582 | trc_signal_io(); |
---|
583 | |
---|
584 | /* This could be an assert, since write buffer must be empty now. */ |
---|
585 | if (trclen > lxthe.writeBuf.endP - lxthe.writeBuf.nextP + 1) |
---|
586 | { |
---|
587 | spin_unlock(&writeLock); |
---|
588 | return 1; |
---|
589 | } |
---|
590 | } |
---|
591 | |
---|
592 | /* Now that there isn't anything to block the writing of this |
---|
593 | * record, insert the timestamp. |
---|
594 | */ |
---|
595 | do_gettimeofday(&tHdr.trTime); |
---|
596 | |
---|
597 | /* Insert the header stamp into the buffer ahead of the application |
---|
598 | record and remember its location. */ |
---|
599 | lxthe.tHdrP = (trc_header_t *)lxthe.writeBuf.nextP; |
---|
600 | tHdr.trBuf = lxthe.writeBuf.bufNum; |
---|
601 | memcpy(lxthe.writeBuf.nextP, &tHdr, sizeof(tHdr)); |
---|
602 | lxthe.writeBuf.nextP += sizeof(tHdr); |
---|
603 | |
---|
604 | /* Move the application trace header directly into the trace buffer and |
---|
605 | remember its location */ |
---|
606 | lxthe.hdrP = (trc_datahdr_t *)lxthe.writeBuf.nextP; |
---|
607 | memcpy(lxthe.writeBuf.nextP, hdrP, sizeof(*hdrP)); |
---|
608 | lxthe.writeBuf.nextP += sizeof(*hdrP); |
---|
609 | |
---|
610 | /* Return with writeLock still held */ |
---|
611 | return 0; |
---|
612 | } |
---|
613 | |
---|
614 | |
---|
615 | /* Append a portion of a trace record to the write buffer. Must have |
---|
616 | previously called trc_start_record. */ |
---|
617 | static void trc_append_record(const void* bufP, size_t nBytes) |
---|
618 | { |
---|
619 | /* Move the application trace record directly into the trace buffer */ |
---|
620 | memcpy(lxthe.writeBuf.nextP, bufP, nBytes); |
---|
621 | lxthe.writeBuf.nextP += nBytes; |
---|
622 | } |
---|
623 | |
---|
624 | |
---|
625 | /* Finish a trace record */ |
---|
626 | static void trc_end_record() |
---|
627 | { |
---|
628 | spin_unlock(&writeLock); |
---|
629 | } |
---|
630 | |
---|
631 | |
---|
632 | static ssize_t |
---|
633 | trc_write(struct file *fileP, const char *bufP, size_t nBytes, loff_t *posP) |
---|
634 | { |
---|
635 | struct trcRec tr; |
---|
636 | int rc; |
---|
637 | int dataBytes; |
---|
638 | |
---|
639 | /* Copy trace record from user address space */ |
---|
640 | if (nBytes < 4 || nBytes > LXTRACE_MAX_DATA) |
---|
641 | return -EINVAL; |
---|
642 | if (copy_from_user(&tr, bufP, nBytes)) |
---|
643 | return -EFAULT; |
---|
644 | |
---|
645 | /* The beginning of the trace record is a hookword number. Verify that |
---|
646 | the specified hookword is being traced. If not, return as if the trace |
---|
647 | was successful. */ |
---|
648 | if (isTraced(tr.hdr.trHook)) |
---|
649 | { |
---|
650 | rc = trc_start_record(&tr.hdr, nBytes, false); |
---|
651 | if (rc == 0) |
---|
652 | { |
---|
653 | dataBytes = nBytes - sizeof(tr.hdr); |
---|
654 | if (dataBytes > 0) |
---|
655 | trc_append_record(&tr.data[0], dataBytes); |
---|
656 | trc_end_record(); |
---|
657 | } |
---|
658 | } |
---|
659 | return nBytes; |
---|
660 | } |
---|
661 | |
---|
662 | /* Before close, a sync of the trace device will flush the records |
---|
663 | * still in the read buffer (even though it might not be full). A |
---|
664 | * close without this call could result in the loss of these records. |
---|
665 | * Must not call fsync from daemon termination signal handler because |
---|
666 | * that could deadlock if a SIGIO is still pending. |
---|
667 | */ |
---|
668 | static int |
---|
669 | trc_fsync_internal(struct file* fileP, struct dentry* dP, int datasync) |
---|
670 | { |
---|
671 | spin_lock(&writeLock); |
---|
672 | |
---|
673 | /* If read buffer is still full, wait for daemon to process it */ |
---|
674 | while (atomic_read(&readFull) && |
---|
675 | (lxthe.state == trc_active || lxthe.state == trc_stopped)) |
---|
676 | { |
---|
677 | spin_unlock(&writeLock); |
---|
678 | wait_event(daemonWaitQ, |
---|
679 | !(atomic_read(&readFull) && |
---|
680 | (lxthe.state == trc_active || lxthe.state == trc_stopped))); |
---|
681 | spin_lock(&writeLock); |
---|
682 | } |
---|
683 | |
---|
684 | /* Allow fsync during normal operation OR after ioctl(trc_end) has |
---|
685 | disabled further trace writing (allows an fsync before close to |
---|
686 | flush the buffered records). */ |
---|
687 | if (lxthe.writeBuf.nextP != lxthe.writeBuf.beginP && |
---|
688 | (lxthe.state == trc_active || lxthe.state == trc_stopped)) |
---|
689 | trc_signal_io(); |
---|
690 | |
---|
691 | spin_unlock(&writeLock); |
---|
692 | return 0; |
---|
693 | } |
---|
694 | |
---|
695 | |
---|
696 | /* The externally visible version of trc_fsync_internal */ |
---|
697 | int trc_fsync() |
---|
698 | { |
---|
699 | return trc_fsync_internal(NULL, NULL, 0); |
---|
700 | } |
---|
701 | |
---|
702 | |
---|
703 | /* The device close operation. */ |
---|
704 | static int trc_close(struct inode *inodeP, struct file *fileP) |
---|
705 | { |
---|
706 | down(&readLock); |
---|
707 | |
---|
708 | /* The trace daemon only closes the device upon termination. */ |
---|
709 | if (taskP && taskP->pid == current->pid) |
---|
710 | { |
---|
711 | /* The final trace daemon close. Reset for subsequent use. */ |
---|
712 | setTraceState(trc_initialized); |
---|
713 | |
---|
714 | /* We don't really need writeLock here since writers won't do anything |
---|
715 | after state is set to trc_initialized, but it doesn't hurt. */ |
---|
716 | spin_lock(&writeLock); |
---|
717 | lxthe.nWaits = 0; |
---|
718 | lxthe.nBuffers = 0; |
---|
719 | lxthe.nLost = 0; |
---|
720 | spin_unlock(&writeLock); |
---|
721 | taskP = NULL; |
---|
722 | current->flags &= ~PF_MEMALLOC; |
---|
723 | |
---|
724 | /* Free the two trace buffers. */ |
---|
725 | if (lxthe.writeBuf.beginP) |
---|
726 | { |
---|
727 | vfree(MIN(lxthe.writeBuf.beginP, lxthe.readBuf.beginP)); |
---|
728 | |
---|
729 | lxthe.writeBuf.beginP = NULL; |
---|
730 | lxthe.writeBuf.endP = NULL; |
---|
731 | lxthe.writeBuf.nextP = NULL; |
---|
732 | lxthe.writeBuf.dirtyP = NULL; |
---|
733 | |
---|
734 | lxthe.readBuf = lxthe.writeBuf; |
---|
735 | } |
---|
736 | } |
---|
737 | |
---|
738 | lxthe.nOpens -= 1; |
---|
739 | |
---|
740 | MY_MODULE_DECREMENT(); |
---|
741 | |
---|
742 | up(&readLock); |
---|
743 | return 0; |
---|
744 | } |
---|
745 | |
---|
746 | |
---|
747 | /* ioctl op used to for low-level access to trace operation. */ |
---|
748 | static int trc_ioctl(struct inode *inodeP, struct file *fileP, |
---|
749 | unsigned int op, unsigned long kx_args) |
---|
750 | { |
---|
751 | int h, rc = 0; |
---|
752 | Boolean readLockHeld = false; |
---|
753 | struct kArgs args_cp; |
---|
754 | struct kArgs *args = (struct kArgs *)kx_args; |
---|
755 | char *p; |
---|
756 | char *newBufP; |
---|
757 | char *trc_dumpP; |
---|
758 | char *trc_nextP; |
---|
759 | struct siginfo sigData; |
---|
760 | int waitCount = 0; |
---|
761 | |
---|
762 | down(&readLock); |
---|
763 | readLockHeld = true; |
---|
764 | |
---|
765 | switch (op) |
---|
766 | { |
---|
767 | case trc_begin: |
---|
768 | if (lxthe.state == trc_active) |
---|
769 | { |
---|
770 | rc = -EALREADY; |
---|
771 | break; |
---|
772 | } |
---|
773 | if (lxthe.state != trc_opened) |
---|
774 | { |
---|
775 | rc = -EBADF; |
---|
776 | break; |
---|
777 | } |
---|
778 | setTraceState(trc_active); |
---|
779 | break; |
---|
780 | |
---|
781 | case trc_end: |
---|
782 | if (lxthe.state != trc_active) |
---|
783 | rc = -EBADF; |
---|
784 | else |
---|
785 | { |
---|
786 | setTraceState(trc_stopped); |
---|
787 | up(&readLock); |
---|
788 | readLockHeld = false; |
---|
789 | trc_fsync(); |
---|
790 | |
---|
791 | /* Signal the daemon to terminate. */ |
---|
792 | sigData.si_signo = SIGTERM; |
---|
793 | sigData.si_errno = 0; |
---|
794 | sigData.si_code = SI_KERNEL; |
---|
795 | my_send_sig_info(SIGTERM, &sigData, taskP); |
---|
796 | } |
---|
797 | |
---|
798 | /* Wait for lxtrace to terminate, but don't wait forever. |
---|
799 | At this point the signal has been delivered to lxtrace, |
---|
800 | but it may take some time for the process to exit. Since |
---|
801 | lxthe.state is changed from trc_stopped to trc_initialized |
---|
802 | in trc_close(), which is called when lxtrace exits, if we |
---|
803 | return control to the caller right away, there'd be a window |
---|
804 | when tracing has ostensibly been stopped, and it should be |
---|
805 | OK to start tracing again, but trying to do so would fail |
---|
806 | with EALREADY in trc_open because lxthe.state is not what |
---|
807 | the code expects. So we give lxtrace some time to terminate. |
---|
808 | Something could go seriously wrong, and lxtrace may get stuck, |
---|
809 | we don't wait forever. */ |
---|
810 | while (lxthe.state == trc_stopped && waitCount++ < 10) |
---|
811 | { |
---|
812 | current->state = TASK_INTERRUPTIBLE; |
---|
813 | schedule_timeout(100); |
---|
814 | } |
---|
815 | break; |
---|
816 | |
---|
817 | case trc_bufSize: |
---|
818 | |
---|
819 | /* The daemon may call ioctl to change the desired buffer size. |
---|
820 | On open, buffers of the default size are allocated. This call |
---|
821 | frees the current buffers (replacing them with new ones). Any |
---|
822 | trace records currently in the buffers will be lost. */ |
---|
823 | |
---|
824 | if (lxthe.state != trc_opened) |
---|
825 | { |
---|
826 | rc = -EPERM; |
---|
827 | break; |
---|
828 | } |
---|
829 | |
---|
830 | /* get the argument array */ |
---|
831 | if (copy_from_user(&args_cp, args, sizeof(args_cp))) |
---|
832 | { |
---|
833 | rc = -EFAULT; |
---|
834 | break; |
---|
835 | } |
---|
836 | |
---|
837 | /* Allocate the new (dual) trace buffers. |
---|
838 | * arg1 is the requested buffer size */ |
---|
839 | newBufP = vmalloc(2*args_cp.arg1); |
---|
840 | if (!newBufP) |
---|
841 | { |
---|
842 | rc = -ENOMEM; |
---|
843 | break; |
---|
844 | } |
---|
845 | |
---|
846 | /* Free the previous buffers. Since the state is currently |
---|
847 | * "trc_opened" and we are holding readLock, neither readers nor |
---|
848 | * writers can be using the buffers at this time. */ |
---|
849 | if (lxthe.writeBuf.beginP) |
---|
850 | vfree(MIN(lxthe.writeBuf.beginP, lxthe.readBuf.beginP)); |
---|
851 | |
---|
852 | lxthe.bufSize = args_cp.arg1; |
---|
853 | lxthe.writeBuf.beginP = newBufP; |
---|
854 | |
---|
855 | lxthe.writeBuf.endP = lxthe.writeBuf.beginP + lxthe.bufSize - 1; |
---|
856 | lxthe.writeBuf.nextP = lxthe.writeBuf.beginP; |
---|
857 | lxthe.writeBuf.dirtyP = lxthe.writeBuf.beginP; |
---|
858 | |
---|
859 | lxthe.readBuf.beginP = lxthe.writeBuf.beginP + lxthe.bufSize; |
---|
860 | lxthe.readBuf.endP = lxthe.readBuf.beginP + lxthe.bufSize - 1; |
---|
861 | lxthe.readBuf.nextP = lxthe.readBuf.beginP; |
---|
862 | lxthe.readBuf.dirtyP = lxthe.readBuf.beginP; |
---|
863 | break; |
---|
864 | |
---|
865 | case trc_dump: |
---|
866 | |
---|
867 | /* format trace header information and return to daemon */ |
---|
868 | trc_dumpP = vmalloc(LXTRACE_DUMP_SIZE); |
---|
869 | if (trc_dumpP == NULL) |
---|
870 | { |
---|
871 | rc = -ENOMEM; |
---|
872 | break; |
---|
873 | } |
---|
874 | |
---|
875 | if (copy_from_user(&args_cp, args, sizeof(args_cp))) |
---|
876 | { |
---|
877 | rc = -EFAULT; |
---|
878 | break; |
---|
879 | } |
---|
880 | |
---|
881 | /* Block writers so that we can look at writeBuf. */ |
---|
882 | spin_lock(&writeLock); |
---|
883 | |
---|
884 | /* Format the state information suitable for displaying by |
---|
885 | * the daemon. |
---|
886 | */ |
---|
887 | trc_nextP = trc_dumpP; |
---|
888 | sprintf(trc_nextP, "Trace Header Element: 0x%08X\n", &lxthe); |
---|
889 | trc_nextP += strlen(trc_nextP); |
---|
890 | |
---|
891 | /* Global information on device number, buffer sizes, |
---|
892 | * and lost records. |
---|
893 | */ |
---|
894 | sprintf(trc_nextP, " Major %d Minor %d bufSize 0x%X nOpens %d " |
---|
895 | "nBuffers %d nLost %d nWaits %d Daemon %d\n", |
---|
896 | lxthe.major, lxthe.minor, lxthe.bufSize, lxthe.nOpens, |
---|
897 | lxthe.nBuffers, lxthe.nLost, lxthe.nWaits, |
---|
898 | taskP ? taskP->pid: 0); |
---|
899 | trc_nextP += strlen(trc_nextP); |
---|
900 | |
---|
901 | sprintf(trc_nextP, "\n"); |
---|
902 | trc_nextP += strlen(trc_nextP); |
---|
903 | |
---|
904 | /* Append buffer information */ |
---|
905 | sprintf(trc_nextP, " writeBuf: beginP 0x%X endP 0x%X nextP 0x%X " |
---|
906 | "dirtyP 0x%X isDirty %d\n", |
---|
907 | lxthe.writeBuf.beginP, lxthe.writeBuf.endP, |
---|
908 | lxthe.writeBuf.nextP, lxthe.writeBuf.dirtyP, |
---|
909 | IS_DIRTY(lxthe.writeBuf)); |
---|
910 | trc_nextP += strlen(trc_nextP); |
---|
911 | |
---|
912 | sprintf(trc_nextP, " readBuf : beginP 0x%X endP 0x%X nextP 0x%X " |
---|
913 | "dirtyP 0x%X isDirty %d\n", |
---|
914 | lxthe.readBuf.beginP, lxthe.readBuf.endP, |
---|
915 | lxthe.readBuf.nextP, lxthe.readBuf.dirtyP, |
---|
916 | IS_DIRTY(lxthe.readBuf)); |
---|
917 | trc_nextP += strlen(trc_nextP); |
---|
918 | |
---|
919 | #if 0 |
---|
920 | /* verify dumpBuf size */ |
---|
921 | sprintf(trc_nextP, " dumpBuf size %d (used %d)\n", |
---|
922 | LXTRACE_DUMP_SIZE, (trc_nextP-trc_dumpP)); |
---|
923 | trc_nextP += strlen(trc_nextP); |
---|
924 | #endif |
---|
925 | spin_unlock(&writeLock); |
---|
926 | |
---|
927 | /* arg1 is the user buffer size, arg2 is the address of the buffer */ |
---|
928 | if (copy_to_user((char *)args_cp.arg2, trc_dumpP, |
---|
929 | MIN(strlen(trc_dumpP)+1, args_cp.arg1))) |
---|
930 | rc = -EFAULT; |
---|
931 | |
---|
932 | vfree(trc_dumpP); |
---|
933 | break; |
---|
934 | |
---|
935 | default: |
---|
936 | rc = -EINVAL; |
---|
937 | break; |
---|
938 | } |
---|
939 | |
---|
940 | if (readLockHeld) |
---|
941 | up(&readLock); |
---|
942 | |
---|
943 | return rc; |
---|
944 | } |
---|
945 | |
---|
946 | static struct file_operations trc_ops = |
---|
947 | { |
---|
948 | llseek: NULL, |
---|
949 | read: trc_read, /* read op allows the daemon to retrieve records */ |
---|
950 | write: trc_write, /* Trace points write to the device */ |
---|
951 | readdir: NULL, |
---|
952 | poll: NULL, |
---|
953 | ioctl: trc_ioctl, /* control op to change buffering or dump state */ |
---|
954 | mmap: NULL, |
---|
955 | open: trc_open, /* Prepare the device for tracing */ |
---|
956 | flush: NULL, |
---|
957 | release: trc_close, /* Terminate tracing and close the device */ |
---|
958 | fsync: trc_fsync_internal, /* Sync all buffered data to the daemon */ |
---|
959 | fasync: NULL, |
---|
960 | lock: NULL, |
---|
961 | aio_read: NULL, |
---|
962 | aio_write: NULL, |
---|
963 | }; |
---|
964 | |
---|
965 | #ifdef EXPORTKDUMPDEV |
---|
966 | static struct file_operations kdump_ops = |
---|
967 | { |
---|
968 | llseek: kdump_lseek, |
---|
969 | read: kdump_read, /* read op allows the daemon to retrieve records */ |
---|
970 | write: NULL, /* Trace points write to the device */ |
---|
971 | readdir: NULL, |
---|
972 | poll: NULL, |
---|
973 | ioctl: NULL, /* control op to change buffering or dump state */ |
---|
974 | mmap: NULL, |
---|
975 | open: kdump_open, /* Prepare the device for tracing */ |
---|
976 | flush: NULL, |
---|
977 | release: kdump_close, /* Terminate tracing and close the device */ |
---|
978 | fsync: NULL, /* Sync all buffered data to the daemon */ |
---|
979 | fasync: NULL, |
---|
980 | lock: NULL, |
---|
981 | aio_read: NULL, |
---|
982 | aio_write: NULL, |
---|
983 | }; |
---|
984 | #endif |
---|
985 | /* Register the trace device "/dev/trace" and save the major number in |
---|
986 | * the header |
---|
987 | */ |
---|
988 | static int trc_register() |
---|
989 | { |
---|
990 | int major = register_chrdev(0, "trace", &trc_ops); |
---|
991 | if (major < 0) |
---|
992 | return major; |
---|
993 | lxthe.major = major; |
---|
994 | #ifdef EXPORTKDUMPDEV |
---|
995 | major_kdump = register_chrdev(0, "kdump", &kdump_ops); |
---|
996 | #endif |
---|
997 | return 0; |
---|
998 | } |
---|
999 | |
---|
1000 | /* Unregister the trace device */ |
---|
1001 | static void trc_unregister() |
---|
1002 | { |
---|
1003 | unregister_chrdev(lxthe.major, "trace"); |
---|
1004 | lxthe.major = 0; |
---|
1005 | #ifdef EXPORTKDUMPDEV |
---|
1006 | if (major_kdump >= 0) |
---|
1007 | unregister_chrdev(major_kdump, "kdump"); |
---|
1008 | major_kdump = 0; |
---|
1009 | #endif |
---|
1010 | |
---|
1011 | } |
---|
1012 | |
---|
1013 | |
---|
1014 | static void |
---|
1015 | _STraceArgs(int* trRecLenP, int* stringLenP, int nArgs, int pos, va_list listP) |
---|
1016 | { |
---|
1017 | int dataLen; |
---|
1018 | int i; |
---|
1019 | ARGTYPE tmpint; |
---|
1020 | char *s; |
---|
1021 | int stringLen; |
---|
1022 | int stringPadLen; |
---|
1023 | |
---|
1024 | dataLen = 0; |
---|
1025 | |
---|
1026 | /* Handle argument lists that include a string parameter */ |
---|
1027 | if (pos >= 0 && pos < LXTRACE_MAX_FORMAT_SUBS) |
---|
1028 | { |
---|
1029 | /* Items (if any) preceeding the string argument */ |
---|
1030 | for (i = 0; i < pos; i++) |
---|
1031 | { |
---|
1032 | tmpint = va_arg(listP, ARGTYPE); |
---|
1033 | trc_append_record(&tmpint, ARGLEN); |
---|
1034 | dataLen += ARGLEN; |
---|
1035 | } |
---|
1036 | |
---|
1037 | /* Copy the string, making sure it does not overflow the buffer */ |
---|
1038 | s = va_arg(listP, char*); |
---|
1039 | if (s < (char*)4096) /* bad address */ |
---|
1040 | { |
---|
1041 | printk("_STrace: bad address 0x%X hook 0x%X\n", s, lxthe.hdrP->trHook); |
---|
1042 | s = "<bad address>"; |
---|
1043 | } |
---|
1044 | stringLen = strlen(s); |
---|
1045 | stringLen = MIN(stringLen, |
---|
1046 | LXTRACE_MAX_DATA - sizeof(trc_datahdr_t) - |
---|
1047 | (nArgs*ARGLEN) - 1 - (ARGLEN-1)); |
---|
1048 | trc_append_record(s, stringLen); |
---|
1049 | stringPadLen = ARGLEN - (stringLen%ARGLEN); |
---|
1050 | trc_append_record(stringPadding, stringPadLen); |
---|
1051 | *stringLenP = stringLen + stringPadLen; |
---|
1052 | dataLen += stringLen + stringPadLen; |
---|
1053 | |
---|
1054 | /* Append items following string argument */ |
---|
1055 | for (i = pos; i < nArgs; i++) |
---|
1056 | { |
---|
1057 | tmpint = va_arg(listP, ARGTYPE); |
---|
1058 | trc_append_record(&tmpint, ARGLEN); |
---|
1059 | dataLen += ARGLEN; |
---|
1060 | } |
---|
1061 | } |
---|
1062 | else /* !IS_SFORMAT */ |
---|
1063 | { |
---|
1064 | /* Place the fixed parameters in the temporary trace buffer */ |
---|
1065 | for (i = 0; i < nArgs; i++) |
---|
1066 | { |
---|
1067 | tmpint = va_arg(listP, ARGTYPE); |
---|
1068 | trc_append_record(&tmpint, ARGLEN); |
---|
1069 | dataLen += ARGLEN; |
---|
1070 | } |
---|
1071 | *stringLenP = 0; |
---|
1072 | } |
---|
1073 | |
---|
1074 | /* Append the float argument */ |
---|
1075 | if (pos == _TR_FORMAT_F) |
---|
1076 | { |
---|
1077 | /* Although the argument is really a double, don't tell the compiler, |
---|
1078 | so that it will not generate code using floating point hardware |
---|
1079 | that is not supposed to be used in the kernel. */ |
---|
1080 | /* double tmpdbl = va_arg(listP, double); */ |
---|
1081 | unsigned long long tmpdbl = va_arg(listP, unsigned long long); |
---|
1082 | trc_append_record(&tmpdbl, sizeof(tmpdbl)); |
---|
1083 | dataLen += sizeof(tmpdbl); |
---|
1084 | } |
---|
1085 | |
---|
1086 | *trRecLenP = sizeof(trc_datahdr_t) + dataLen; |
---|
1087 | /* DBGASSERT(*trRecLenP <= LXTRACE_MAX_DATA); */ |
---|
1088 | } |
---|
1089 | |
---|
1090 | |
---|
1091 | void _STraceNB(int hookword, int nArgs, int pos, ...) |
---|
1092 | { |
---|
1093 | trc_datahdr_t hdr; |
---|
1094 | int recLen; |
---|
1095 | int rc; |
---|
1096 | va_list listP; |
---|
1097 | int trRecLen; |
---|
1098 | int stringLen; |
---|
1099 | |
---|
1100 | /* Trace calls from interrupt level are not supported. If anybody needs |
---|
1101 | them, changing writeLock to use spin_lock_irqsave should be all that |
---|
1102 | is needed to allow non-blocking traces to work. */ |
---|
1103 | if (in_interrupt()) |
---|
1104 | return; |
---|
1105 | |
---|
1106 | if (!isTraced(hookword)) |
---|
1107 | return; |
---|
1108 | |
---|
1109 | /* Test for trace formats that aren't supported yet */ |
---|
1110 | if ((pos == _TR_FORMAT_I) && (nArgs > LXTRACE_MAX_FORMAT_SUBS)) |
---|
1111 | { |
---|
1112 | #ifdef DBGASSERTS |
---|
1113 | printk("_STrace: too many arguments (hook %X)\n", hookword); |
---|
1114 | #endif /* DBGASSERTS */ |
---|
1115 | return; |
---|
1116 | } |
---|
1117 | |
---|
1118 | /* Build a data header and append it to the trace file. If there is a |
---|
1119 | string, the length is not yet known, so use the maximum. It will be |
---|
1120 | patched to the correct value later. */ |
---|
1121 | hdr.trHook = hookword; |
---|
1122 | hdr.trNArgs = nArgs; |
---|
1123 | hdr.trSPos = pos; |
---|
1124 | hdr.trSLen = 0; /* invalid if there is a string; fix below */ |
---|
1125 | if (pos >= 0 && pos < LXTRACE_MAX_FORMAT_SUBS) |
---|
1126 | recLen = LXTRACE_MAX_DATA; |
---|
1127 | else |
---|
1128 | { |
---|
1129 | recLen = sizeof(hdr) + nArgs*ARGLEN; |
---|
1130 | if (pos == _TR_FORMAT_F) |
---|
1131 | recLen += ARGLEN; |
---|
1132 | } |
---|
1133 | rc = trc_start_record(&hdr, recLen, true); |
---|
1134 | |
---|
1135 | /* If the header was successfully written, collect arguments directly into |
---|
1136 | the trace buffer */ |
---|
1137 | if (rc == 0) |
---|
1138 | { |
---|
1139 | va_start(listP, pos); |
---|
1140 | _STraceArgs(&trRecLen, &stringLen, nArgs, pos, listP); |
---|
1141 | va_end(listP); |
---|
1142 | |
---|
1143 | /* Patch the string and record lengths now that the string has been |
---|
1144 | copied */ |
---|
1145 | lxthe.hdrP->trSLen = stringLen; |
---|
1146 | lxthe.tHdrP->trLength = trRecLen; |
---|
1147 | |
---|
1148 | /* Trace record complete */ |
---|
1149 | trc_end_record(); |
---|
1150 | } |
---|
1151 | } |
---|
1152 | |
---|
1153 | void _STrace(int hookword, int nArgs, int pos, ...) |
---|
1154 | { |
---|
1155 | trc_datahdr_t hdr; |
---|
1156 | int recLen; |
---|
1157 | int rc; |
---|
1158 | va_list listP; |
---|
1159 | int trRecLen; |
---|
1160 | int stringLen; |
---|
1161 | |
---|
1162 | /* Trace calls from interrupt level are not supported. If anybody needs |
---|
1163 | them, changing writeLock to use spin_lock_irqsave should be all that |
---|
1164 | is needed to allow non-blocking traces to work. */ |
---|
1165 | if (in_interrupt()) |
---|
1166 | return; |
---|
1167 | |
---|
1168 | if (!isTraced(hookword)) |
---|
1169 | return; |
---|
1170 | |
---|
1171 | /* Test for trace formats that aren't supported yet */ |
---|
1172 | if ((pos == _TR_FORMAT_I) && (nArgs > LXTRACE_MAX_FORMAT_SUBS)) |
---|
1173 | { |
---|
1174 | #ifdef DBGASSERTS |
---|
1175 | printk("_STrace: too many arguments (hook %X)\n", hookword); |
---|
1176 | #endif /* DBGASSERTS */ |
---|
1177 | return; |
---|
1178 | } |
---|
1179 | |
---|
1180 | /* Build a data header and append it to the trace file. If there is a |
---|
1181 | string, the length is not yet known, so use the maximum. It will be |
---|
1182 | patched to the correct value later. */ |
---|
1183 | hdr.trHook = hookword; |
---|
1184 | hdr.trNArgs = nArgs; |
---|
1185 | hdr.trSPos = pos; |
---|
1186 | hdr.trSLen = 0; /* invalid if there is a string; fix below */ |
---|
1187 | if (pos >= 0 && pos < LXTRACE_MAX_FORMAT_SUBS) |
---|
1188 | recLen = LXTRACE_MAX_DATA; |
---|
1189 | else |
---|
1190 | { |
---|
1191 | recLen = sizeof(hdr) + nArgs*ARGLEN; |
---|
1192 | if (pos == _TR_FORMAT_F) |
---|
1193 | recLen += ARGLEN; |
---|
1194 | } |
---|
1195 | rc = trc_start_record(&hdr, recLen, false); |
---|
1196 | |
---|
1197 | /* If the header was successfully written, collect arguments directly into |
---|
1198 | the trace buffer */ |
---|
1199 | if (rc == 0) |
---|
1200 | { |
---|
1201 | va_start(listP, pos); |
---|
1202 | _STraceArgs(&trRecLen, &stringLen, nArgs, pos, listP); |
---|
1203 | va_end(listP); |
---|
1204 | |
---|
1205 | /* Patch the string and record lengths now that the string has been |
---|
1206 | copied */ |
---|
1207 | lxthe.hdrP->trSLen = stringLen; |
---|
1208 | lxthe.tHdrP->trLength = trRecLen; |
---|
1209 | |
---|
1210 | /* Trace record complete */ |
---|
1211 | trc_end_record(); |
---|
1212 | } |
---|
1213 | } |
---|
1214 | |
---|
1215 | void _XTraceNB(int hookword, char *fmt, ...) |
---|
1216 | { |
---|
1217 | trc_datahdr_t hdr; |
---|
1218 | int rc; |
---|
1219 | va_list vargs; |
---|
1220 | int stringLen; |
---|
1221 | |
---|
1222 | /* Trace calls from interrupt level are not supported. If anybody needs |
---|
1223 | them, changing writeLock to use spin_lock_irqsave should be all that |
---|
1224 | is needed to allow non-blocking traces to work. */ |
---|
1225 | if (in_interrupt()) |
---|
1226 | return; |
---|
1227 | |
---|
1228 | if (!isTraced(hookword)) |
---|
1229 | return; |
---|
1230 | |
---|
1231 | /* Build a data header and append it to the trace file. Since the length |
---|
1232 | is not yet known, use the maximum. It will be patched to the correct |
---|
1233 | value later. */ |
---|
1234 | hdr.trHook = hookword; |
---|
1235 | hdr.trNArgs = 0; |
---|
1236 | hdr.trSPos = _TR_FORMAT_X; |
---|
1237 | hdr.trSLen = -1; /* invalid; fix below */ |
---|
1238 | rc = trc_start_record(&hdr, LXTRACE_MAX_DATA, true); |
---|
1239 | |
---|
1240 | /* If the header was successfully written, format the string directly |
---|
1241 | into the trace buffer */ |
---|
1242 | if (rc == 0) |
---|
1243 | { |
---|
1244 | va_start(vargs, fmt); |
---|
1245 | stringLen = vsnprintf(lxthe.writeBuf.nextP, |
---|
1246 | LXTRACE_MAX_DATA-sizeof(trc_datahdr_t), fmt, vargs) + 1; |
---|
1247 | va_end(vargs); |
---|
1248 | if (stringLen > LXTRACE_MAX_DATA-sizeof(trc_datahdr_t)) |
---|
1249 | { |
---|
1250 | printk("_XTraceNB: argument too long. len=%d max=%d hook=0x%X\n", |
---|
1251 | stringLen, LXTRACE_MAX_DATA-sizeof(trc_datahdr_t)-1, hookword); |
---|
1252 | stringLen = LXTRACE_MAX_DATA-sizeof(trc_datahdr_t); |
---|
1253 | } |
---|
1254 | |
---|
1255 | /* Patch the string and record lengths now that vsnprintf has calculated |
---|
1256 | the length that it formatted */ |
---|
1257 | lxthe.hdrP->trSLen = ((stringLen+ARGLEN-1)/ARGLEN)*ARGLEN; |
---|
1258 | lxthe.tHdrP->trLength = sizeof(hdr) + lxthe.hdrP->trSLen; |
---|
1259 | |
---|
1260 | /* Advance pointer into trace buffer by the length of the string just |
---|
1261 | appended */ |
---|
1262 | lxthe.writeBuf.nextP += lxthe.hdrP->trSLen; |
---|
1263 | |
---|
1264 | /* Trace record complete */ |
---|
1265 | trc_end_record(); |
---|
1266 | } |
---|
1267 | } |
---|
1268 | |
---|
1269 | void _XTrace(int hookword, char *fmt, ...) |
---|
1270 | { |
---|
1271 | trc_datahdr_t hdr; |
---|
1272 | int rc; |
---|
1273 | va_list vargs; |
---|
1274 | int stringLen; |
---|
1275 | |
---|
1276 | /* Trace calls from interrupt level are not supported. If anybody needs |
---|
1277 | them, changing writeLock to use spin_lock_irqsave should be all that |
---|
1278 | is needed to allow non-blocking traces to work. */ |
---|
1279 | if (in_interrupt()) |
---|
1280 | return; |
---|
1281 | |
---|
1282 | if (!isTraced(hookword)) |
---|
1283 | return; |
---|
1284 | |
---|
1285 | /* Build a data header and append it to the trace file. Since the length |
---|
1286 | is not yet known, use the maximum. It will be patched to the correct |
---|
1287 | value later. */ |
---|
1288 | hdr.trHook = hookword; |
---|
1289 | hdr.trNArgs = 0; |
---|
1290 | hdr.trSPos = _TR_FORMAT_X; |
---|
1291 | hdr.trSLen = -1; /* invalid; fix below */ |
---|
1292 | rc = trc_start_record(&hdr, LXTRACE_MAX_DATA, false); |
---|
1293 | |
---|
1294 | /* If the header was successfully written, format the string directly |
---|
1295 | into the trace buffer */ |
---|
1296 | if (rc == 0) |
---|
1297 | { |
---|
1298 | va_start(vargs, fmt); |
---|
1299 | stringLen = vsnprintf(lxthe.writeBuf.nextP, |
---|
1300 | LXTRACE_MAX_DATA-sizeof(trc_datahdr_t), fmt, vargs) + 1; |
---|
1301 | va_end(vargs); |
---|
1302 | if (stringLen > LXTRACE_MAX_DATA-sizeof(trc_datahdr_t)) |
---|
1303 | { |
---|
1304 | printk("_XTrace: argument too long. len=%d max=%d hook=0x%X\n", |
---|
1305 | stringLen, LXTRACE_MAX_DATA-sizeof(trc_datahdr_t)-1, hookword); |
---|
1306 | stringLen = LXTRACE_MAX_DATA-sizeof(trc_datahdr_t); |
---|
1307 | } |
---|
1308 | |
---|
1309 | /* Patch the string and record lengths now that vsnprintf has calculated |
---|
1310 | the length that it formatted */ |
---|
1311 | lxthe.hdrP->trSLen = ((stringLen+ARGLEN-1)/ARGLEN)*ARGLEN; |
---|
1312 | lxthe.tHdrP->trLength = sizeof(hdr) + lxthe.hdrP->trSLen; |
---|
1313 | |
---|
1314 | /* Advance pointer into trace buffer by the length of the string just |
---|
1315 | appended */ |
---|
1316 | lxthe.writeBuf.nextP += lxthe.hdrP->trSLen; |
---|
1317 | |
---|
1318 | /* Trace record complete */ |
---|
1319 | trc_end_record(); |
---|
1320 | } |
---|
1321 | } |
---|
1322 | |
---|
1323 | /* Module initialization */ |
---|
1324 | MY_INIT_FUNCTION() |
---|
1325 | { |
---|
1326 | trc_init(); |
---|
1327 | return trc_register(); |
---|
1328 | } |
---|
1329 | |
---|
1330 | MY_EXIT_FUNCTION() |
---|
1331 | { |
---|
1332 | trc_unregister(); |
---|
1333 | trc_term(); |
---|
1334 | } |
---|
1335 | |
---|
1336 | DEFINE_MODULE_INIT(); |
---|
1337 | DEFINE_MODULE_EXIT(); |
---|
1338 | |
---|
1339 | #endif /* GPFS_PRINTF */ |
---|
1340 | #endif /* KTRACE */ |
---|