1 | /*************************************************************************** |
---|
2 | * |
---|
3 | * Copyright (C) 2001 International Business Machines |
---|
4 | * All rights reserved. |
---|
5 | * |
---|
6 | * This file is part of the GPFS mmfslinux kernel module. |
---|
7 | * |
---|
8 | * Redistribution and use in source and binary forms, with or without |
---|
9 | * modification, are permitted provided that the following conditions |
---|
10 | * are met: |
---|
11 | * |
---|
12 | * 1. Redistributions of source code must retain the above copyright notice, |
---|
13 | * this list of conditions and the following disclaimer. |
---|
14 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
15 | * notice, this list of conditions and the following disclaimer in the |
---|
16 | * documentation and/or other materials provided with the distribution. |
---|
17 | * 3. The name of the author may not be used to endorse or promote products |
---|
18 | * derived from this software without specific prior written |
---|
19 | * permission. |
---|
20 | * |
---|
21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
---|
22 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
---|
23 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
---|
24 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
---|
26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
---|
27 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
---|
28 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
---|
29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
---|
30 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
31 | * |
---|
32 | *************************************************************************** */ |
---|
33 | /* @(#)16 1.158.1.9 src/avs/fs/mmfs/ts/kernext/gpl-linux/cxiSystem.c, mmfs, avs_rgpfs24, rgpfs24s007a 10/24/06 19:12:27 */ |
---|
34 | /* |
---|
35 | * Linux implementation of basic common services |
---|
36 | * |
---|
37 | * Contents: |
---|
38 | * cxiGetThreadId |
---|
39 | * getpid |
---|
40 | * cxiIsSuperUser |
---|
41 | * DoPanic |
---|
42 | * logAssertFailed |
---|
43 | * Kernel memory allocation services: |
---|
44 | * cxiMallocPinned |
---|
45 | * cxiFreePinned |
---|
46 | * |
---|
47 | */ |
---|
48 | |
---|
49 | #include <Shark-gpl.h> |
---|
50 | |
---|
51 | #include <linux/kernel.h> |
---|
52 | #include <linux/module.h> |
---|
53 | #include <linux/sched.h> |
---|
54 | #include <linux/slab.h> |
---|
55 | #include <linux/wait.h> |
---|
56 | #include <linux/time.h> |
---|
57 | #include <linux/file.h> |
---|
58 | #include <linux/string.h> |
---|
59 | #include <asm/uaccess.h> |
---|
60 | #include <linux/smp_lock.h> |
---|
61 | #include <linux/vmalloc.h> |
---|
62 | #include <linux/fs.h> |
---|
63 | #include <linux/interrupt.h> |
---|
64 | #undef memcmp |
---|
65 | |
---|
66 | #define DEFINE_TRACE_GBL_VARS |
---|
67 | #include <Logger-gpl.h> |
---|
68 | #include <verdep.h> |
---|
69 | #include <linux2gpfs.h> |
---|
70 | #include <cxiSystem.h> |
---|
71 | #include <cxiAtomic.h> |
---|
72 | #include <cxi2gpfs.h> |
---|
73 | #include <cxiIOBuffer.h> |
---|
74 | #include <cxiSharedSeg.h> |
---|
75 | #include <cxiCred.h> |
---|
76 | |
---|
77 | #include <Trace.h> |
---|
78 | #include <lxtrace.h> |
---|
79 | #include <cxiMode.h> |
---|
80 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
81 | #include <linux/swap.h> |
---|
82 | #include <linux/writeback.h> |
---|
83 | #endif |
---|
84 | |
---|
85 | #if LINUX_KERNEL_VERSION >= 2040900 |
---|
86 | /* This is in the Redhat kernel series */ |
---|
87 | extern int posix_locks_deadlock(struct file_lock *, struct file_lock *); |
---|
88 | #endif |
---|
89 | |
---|
90 | #ifdef INSTRUMENT_LOCKS |
---|
91 | struct BlockingMutexStats BlockingMutexStatsTable[MAX_GPFS_LOCK_NAMES]; |
---|
92 | #endif /* INSTRUMENT_LOCKS */ |
---|
93 | |
---|
94 | /* We record the daemon's process group since it can uniquely identify |
---|
95 | * a thread as being part of the GPFS daemon. pid is unique per thread |
---|
96 | * on linux due to their clone implementation. |
---|
97 | */ |
---|
98 | static pid_t DaemonPGrp = -1; |
---|
99 | |
---|
100 | /* Get the kernel thread ID. */ |
---|
101 | cxiThreadId cxiGetThreadId() |
---|
102 | { |
---|
103 | /* ENTER(1); */ |
---|
104 | return current->pid; |
---|
105 | } |
---|
106 | |
---|
107 | /* Get the kernel process ID. */ |
---|
108 | pid_t getpid() |
---|
109 | { |
---|
110 | /* ENTER(1); */ |
---|
111 | return current->pid; |
---|
112 | } |
---|
113 | |
---|
114 | /* bufP is caller's ext_cred_t buffer |
---|
115 | * uCredPP is the ucred struct (NULL on Linux) |
---|
116 | * eCredPP is the ext_cred_t struct * (if successful) |
---|
117 | * |
---|
118 | * cxiPutCred should be called to release when operation has been completed. |
---|
119 | */ |
---|
120 | int cxiGetCred(void *bufP, void **uCredPP, void **eCredPP) |
---|
121 | { |
---|
122 | ext_cred_t *eCredP = (ext_cred_t *)bufP; |
---|
123 | |
---|
124 | ENTER(0); |
---|
125 | *uCredPP = NULL; |
---|
126 | *eCredPP = NULL; |
---|
127 | |
---|
128 | if (!bufP) |
---|
129 | { |
---|
130 | EXIT_RC(0, EINVAL); |
---|
131 | return EINVAL; |
---|
132 | } |
---|
133 | |
---|
134 | setCred(eCredP); |
---|
135 | *eCredPP = (void *)eCredP; |
---|
136 | |
---|
137 | xerror: |
---|
138 | EXIT(0); |
---|
139 | return 0; |
---|
140 | } |
---|
141 | |
---|
142 | /* Release of cxiGetCred() structures (nothing to do on Linux) */ |
---|
143 | int cxiPutCred(void *userCredP, void *extCredP) |
---|
144 | { |
---|
145 | if (userCredP || !extCredP) |
---|
146 | return EINVAL; |
---|
147 | |
---|
148 | return 0; |
---|
149 | } |
---|
150 | |
---|
151 | /* Convert a kernel stack address to the thread ID of the thread that |
---|
152 | * uses that stack |
---|
153 | */ |
---|
154 | int |
---|
155 | cxiStackAddrToThreadId(char* stackP, cxiThreadId* tidP) |
---|
156 | { |
---|
157 | struct task_struct * tP; |
---|
158 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
159 | /* the kernel stack is base off the thread_info struct in the 2.6 kernel |
---|
160 | * will get the task pointer out of thread_info struct. |
---|
161 | */ |
---|
162 | struct thread_info * iP; |
---|
163 | ENTER(0); |
---|
164 | iP = (struct thread_info *) ((UIntPtr)stackP & ~((UIntPtr)(THREAD_SIZE-1))); |
---|
165 | tP = iP->task; |
---|
166 | #else |
---|
167 | /* the kernel stack is base off the task_struct struct in the 2.4 kernel */ |
---|
168 | tP = (struct task_struct *) ((UIntPtr)stackP & ~((UIntPtr)(THREAD_SIZE-1))); |
---|
169 | #endif |
---|
170 | ENTER(0); |
---|
171 | *tidP = tP->pid; |
---|
172 | EXIT(0); |
---|
173 | return 0; |
---|
174 | } |
---|
175 | |
---|
176 | /* Convert a kernel thread pointer to the corresponding thread ID */ |
---|
177 | int |
---|
178 | cxiThreadPtrToThreadId(char* threadP, cxiThreadId* tidP) |
---|
179 | { |
---|
180 | struct task_struct * tP; |
---|
181 | |
---|
182 | ENTER(0); |
---|
183 | tP = (struct task_struct *) threadP; |
---|
184 | *tidP = tP->pid; |
---|
185 | |
---|
186 | EXIT(0); |
---|
187 | return 0; |
---|
188 | } |
---|
189 | |
---|
190 | |
---|
191 | /* Return true if caller has has maximum authorization (is root) */ |
---|
192 | Boolean cxiIsSuperUser() |
---|
193 | { |
---|
194 | return (current->euid == 0); |
---|
195 | } |
---|
196 | |
---|
197 | |
---|
198 | /* Get the process max filesize limit (ulimit -f) */ |
---|
199 | Int64 cxiGetMaxFileSize() |
---|
200 | { |
---|
201 | if ((signed long)MY_RLIM_CUR(RLIMIT_FSIZE) == -1L) |
---|
202 | return MAX_INT64; |
---|
203 | else |
---|
204 | return (MY_RLIM_CUR(RLIMIT_FSIZE)); |
---|
205 | } |
---|
206 | |
---|
207 | /* Routine to send a signal to the current thread/process */ |
---|
208 | void cxiSendSigThread(int sig) |
---|
209 | { |
---|
210 | ENTER(0); |
---|
211 | send_sig(sig, current, 0); |
---|
212 | EXIT(0); |
---|
213 | } |
---|
214 | |
---|
215 | |
---|
216 | #ifdef MALLOC_DEBUG |
---|
217 | /* This tracks mallocs and frees on a limited basis. |
---|
218 | * Implemented originally to determine if we were leaking |
---|
219 | * any memory after an unload. This is not really thread |
---|
220 | * safe for multiple processors unless they're automatically |
---|
221 | * cache coherent without memory barriers (i386). Its useful |
---|
222 | * for detecting memory leaks on a single processor system. |
---|
223 | */ |
---|
224 | #define MALLOC_RECORDS 5000 /* max mallocs to track */ |
---|
225 | struct mallocStat |
---|
226 | { |
---|
227 | void *beginP; |
---|
228 | unsigned short size; |
---|
229 | unsigned short type; |
---|
230 | }; |
---|
231 | static struct mallocStat *mstatP = NULL; |
---|
232 | unsigned int nextMalloc = 0; |
---|
233 | |
---|
234 | void |
---|
235 | MallocDebugStart() |
---|
236 | { |
---|
237 | int i; |
---|
238 | |
---|
239 | ENTER(0); |
---|
240 | if (mstatP == NULL) |
---|
241 | mstatP = vmalloc(MALLOC_RECORDS * sizeof(struct mallocStat)); |
---|
242 | |
---|
243 | if (mstatP == NULL) |
---|
244 | { |
---|
245 | EXIT(0); |
---|
246 | return; |
---|
247 | } |
---|
248 | |
---|
249 | for (i = 0; i < MALLOC_RECORDS; i++) |
---|
250 | { |
---|
251 | mstatP[i].beginP = NULL; |
---|
252 | mstatP[i].size = 0; |
---|
253 | mstatP[i].type = 0; |
---|
254 | } |
---|
255 | printk("MallocDebugStart 0x%X\n", mstatP); |
---|
256 | EXIT(0); |
---|
257 | } |
---|
258 | |
---|
259 | void |
---|
260 | MallocDebugEnd() |
---|
261 | { |
---|
262 | int i; |
---|
263 | |
---|
264 | ENTER(0); |
---|
265 | if (mstatP != NULL) |
---|
266 | { |
---|
267 | for (i = 0; i < MALLOC_RECORDS; i++) |
---|
268 | { |
---|
269 | if (mstatP[i].beginP != NULL) |
---|
270 | printk("MallocDebug: beginP 0x%X size %d type %d STILL ALLOCATED!\n", |
---|
271 | mstatP[i].beginP, mstatP[i].size, mstatP[i].type); |
---|
272 | } |
---|
273 | } |
---|
274 | |
---|
275 | vfree(mstatP); |
---|
276 | mstatP = NULL; |
---|
277 | EXIT(0); |
---|
278 | } |
---|
279 | |
---|
280 | void |
---|
281 | MallocDebugNew(void *ptr, unsigned short size, unsigned short type) |
---|
282 | { |
---|
283 | void *bP; |
---|
284 | int i; |
---|
285 | int j; |
---|
286 | int swrc; |
---|
287 | int oldval; |
---|
288 | int where = nextMalloc; |
---|
289 | |
---|
290 | ENTER(0); |
---|
291 | |
---|
292 | if (mstatP == NULL) |
---|
293 | { |
---|
294 | EXIT(0); |
---|
295 | return; |
---|
296 | } |
---|
297 | |
---|
298 | for (i = where; i < MALLOC_RECORDS + where; i++) |
---|
299 | { |
---|
300 | if (i >= MALLOC_RECORDS) |
---|
301 | j = i - MALLOC_RECORDS; |
---|
302 | else |
---|
303 | j = i; |
---|
304 | |
---|
305 | bP = mstatP[j].beginP; |
---|
306 | if (bP == NULL) |
---|
307 | { |
---|
308 | swrc = ATOMIC_SWAP(&mstatP[j].beginP, &bP, ptr); |
---|
309 | if (swrc) |
---|
310 | { |
---|
311 | mstatP[j].size = size; |
---|
312 | mstatP[j].type = type; |
---|
313 | break; |
---|
314 | } |
---|
315 | } |
---|
316 | } |
---|
317 | |
---|
318 | EXIT(0); |
---|
319 | } |
---|
320 | |
---|
321 | void |
---|
322 | MallocDebugDelete(void *ptr) |
---|
323 | { |
---|
324 | void *bP; |
---|
325 | int i; |
---|
326 | int swrc; |
---|
327 | int next; |
---|
328 | int found = 0; |
---|
329 | |
---|
330 | ENTER(0); |
---|
331 | if (mstatP == NULL) |
---|
332 | { |
---|
333 | EXIT(0); |
---|
334 | return; |
---|
335 | } |
---|
336 | |
---|
337 | for (i = 0; i < MALLOC_RECORDS; i++) |
---|
338 | { |
---|
339 | bP = mstatP[i].beginP; |
---|
340 | if (bP == ptr) |
---|
341 | { |
---|
342 | next = nextMalloc; |
---|
343 | ATOMIC_SWAP(&nextMalloc, &next, i); |
---|
344 | |
---|
345 | swrc = ATOMIC_SWAP(&mstatP[i].beginP, &bP, NULL); |
---|
346 | DBGASSERT(swrc); |
---|
347 | found = 1; |
---|
348 | break; |
---|
349 | } |
---|
350 | } |
---|
351 | |
---|
352 | if (!found) |
---|
353 | printk("MallocDebug: 0x%X not found!\n", ptr); |
---|
354 | EXIT(0); |
---|
355 | } |
---|
356 | #endif /* MALLOC_DEBUG */ |
---|
357 | |
---|
358 | /* Allocate pinned kernel memory */ |
---|
359 | void* cxiMallocPinned(int nBytes) |
---|
360 | { |
---|
361 | void *ptr; |
---|
362 | |
---|
363 | /* kmalloc only supports requests for up to 131027 bytes. Anything |
---|
364 | larger than this results in a BUG() call. */ |
---|
365 | ENTER(0); |
---|
366 | if (nBytes > 131072) |
---|
367 | { |
---|
368 | EXIT(0); |
---|
369 | return NULL; |
---|
370 | } |
---|
371 | |
---|
372 | ptr = kmalloc(nBytes, GFP_KERNEL); |
---|
373 | |
---|
374 | #ifdef MALLOC_DEBUG |
---|
375 | MallocDebugNew(ptr, nBytes, 1); |
---|
376 | #endif |
---|
377 | |
---|
378 | EXIT(0); |
---|
379 | return ptr; |
---|
380 | } |
---|
381 | |
---|
382 | /* Free pinned kernel memory that was allocated with cxiMallocPinned */ |
---|
383 | /* Must not block on lack of memory resourses */ |
---|
384 | void cxiFreePinned(void* p) |
---|
385 | { |
---|
386 | ENTER(0); |
---|
387 | #ifdef MALLOC_DEBUG |
---|
388 | MallocDebugDelete(p); |
---|
389 | #endif |
---|
390 | |
---|
391 | kfree(p); |
---|
392 | EXIT(0); |
---|
393 | } |
---|
394 | |
---|
395 | /* Get the kernel thread ID. */ |
---|
396 | void* cxiGetFcntlOwner(eflock_t *flP) |
---|
397 | { |
---|
398 | return flP? flP->l_owner: current->files; |
---|
399 | } |
---|
400 | |
---|
401 | #if LINUX_KERNEL_VERSION > 2060900 |
---|
402 | struct lock_manager_operations lm_operations = { |
---|
403 | }; |
---|
404 | #endif |
---|
405 | |
---|
406 | /* Perform local advisory locking. */ |
---|
407 | int cxiFcntlLock(void *advObjP, |
---|
408 | int cmd, |
---|
409 | void *lockStructP, |
---|
410 | cxiFlock_t *flockP, |
---|
411 | int (*retryCB)(), |
---|
412 | cxiOff64_t size, |
---|
413 | cxiOff64_t offset, |
---|
414 | ulong *retry_idP) |
---|
415 | { |
---|
416 | int len, rc = 0; |
---|
417 | // struct file *fP; |
---|
418 | struct file_lock fl, *flP, *gflP, *cflP; |
---|
419 | Boolean keepLockElement = false; |
---|
420 | |
---|
421 | /* cast platform independent arguments as appropriate for linux */ |
---|
422 | void (*RetryFcn)(struct file_lock*) = (void (*)(struct file_lock*))retryCB; |
---|
423 | // fP = (struct file *)advObjP; |
---|
424 | struct file localFile, *filp = &localFile; |
---|
425 | struct dentry localDEntry, *dp = &localDEntry; |
---|
426 | ENTER(0); |
---|
427 | flP = (struct file_lock *) lockStructP; |
---|
428 | |
---|
429 | localFile.f_dentry = &localDEntry; |
---|
430 | localDEntry.d_inode = (struct inode *)advObjP; |
---|
431 | |
---|
432 | /* Lock commands can have two different values. Convert them at |
---|
433 | * entry to the portability layer so that we only have to check |
---|
434 | * for one of them. |
---|
435 | */ |
---|
436 | #if !defined(__64BIT__) |
---|
437 | if (cmd == F_GETLK64) cmd = F_GETLK; |
---|
438 | if (cmd == F_SETLK64) cmd = F_SETLK; |
---|
439 | if (cmd == F_SETLKW64) cmd = F_SETLKW; |
---|
440 | #endif |
---|
441 | |
---|
442 | /* Callers have the option of passing a platform dependent lock structure |
---|
443 | (struct file_lock *lockSructP) or the generic (cxiFlock_t *flockP). */ |
---|
444 | if (flockP) |
---|
445 | { |
---|
446 | flP = &fl; /* Use a local file_lock structure */ |
---|
447 | |
---|
448 | /* If there is a potential for blocking, must malloc the locking structure |
---|
449 | so it can persist until the lock becomes available (in Retry()). */ |
---|
450 | |
---|
451 | if (cmd == F_SETLKW) |
---|
452 | { |
---|
453 | #ifdef NFS_CLUSTER_LOCKS |
---|
454 | len = sizeof(struct file_lock) + |
---|
455 | sizeof(struct file) + |
---|
456 | sizeof(struct dentry); |
---|
457 | #else |
---|
458 | len = sizeof(struct file_lock); |
---|
459 | #endif |
---|
460 | flP = (struct file_lock*)cxiMallocUnpinned(len); |
---|
461 | if (flP == NULL) |
---|
462 | { |
---|
463 | rc = ENOMEM; |
---|
464 | goto exit; |
---|
465 | } |
---|
466 | cxiMemset(flP, 0, len); |
---|
467 | #ifdef NFS_CLUSTER_LOCKS |
---|
468 | filp = (struct file*)((char *)flP + sizeof(struct file_lock)); |
---|
469 | dp = (struct dentry *)((char *)filp + sizeof(struct file)); |
---|
470 | filp->f_dentry = dp; |
---|
471 | dp->d_inode = (struct inode *)advObjP; |
---|
472 | #endif |
---|
473 | } |
---|
474 | else |
---|
475 | cxiMemset(flP, 0, sizeof(*flP)); |
---|
476 | |
---|
477 | locks_init_lock(flP); /* Initialize list_head structs */ |
---|
478 | if (flockP->l_file == NULL) |
---|
479 | flockP->l_file = filp; |
---|
480 | |
---|
481 | /* fl_wait needs to be initialized because when unlock happens, the |
---|
482 | linux routine locks_wake_up_blocks invokes our retry routine via |
---|
483 | fl_notify and then calls wake_up(fl_wait) on the assumption that |
---|
484 | the waiter is local. */ |
---|
485 | |
---|
486 | cxiWaitEventInit((cxiWaitEvent_t *)&flP->fl_wait); |
---|
487 | |
---|
488 | cxiFlockToVFS(flockP, flP); |
---|
489 | } |
---|
490 | |
---|
491 | /* daemon didn't know the owner and required kernel code to fill it in. */ |
---|
492 | if (!flP->fl_owner) |
---|
493 | flP->fl_owner = (fl_owner_t)cxiGetFcntlOwner(NULL); |
---|
494 | |
---|
495 | #if 0 |
---|
496 | /* Validate the file pointer. Kernel locking routines are going to |
---|
497 | use these without verifying them. If any of them are NULL, find |
---|
498 | out now before they generate a segment violation. */ |
---|
499 | if ((!fP) || (!fP->f_dentry) || (!fP->f_dentry->d_inode)) |
---|
500 | { |
---|
501 | if (cmd == F_GETLK) |
---|
502 | flP->fl_type = F_UNLCK; |
---|
503 | else |
---|
504 | rc = EINVAL; |
---|
505 | goto exit; |
---|
506 | } |
---|
507 | #endif |
---|
508 | |
---|
509 | /* Note that this all depends on us having serialized such locking for |
---|
510 | this file during from before the posix_test_lock() until after the |
---|
511 | posix_block_lock(). The revoke lock that we hold here provides us |
---|
512 | the necessary serilization. */ |
---|
513 | |
---|
514 | TRACE7(TRACE_VNODE, 3, TRCID_FCNTLLOCK_ENTER, |
---|
515 | "cxiFcntlLock posix_lock_file: pid %d owner 0x%X inodeP 0x%X " |
---|
516 | "range 0x%lX-%lX cmd %s type %s\n", |
---|
517 | flP->fl_pid, flP->fl_owner, advObjP, flP->fl_start, flP->fl_end, |
---|
518 | (cmd == F_GETLK) ? "GETLK" : (cmd == F_SETLK) ? "SETLK" : "SETLKW", |
---|
519 | (flP->fl_type == F_RDLCK) ? "RDLCK" : |
---|
520 | (flP->fl_type == F_WRLCK) ? "WRLCK" : "UNLCK"); |
---|
521 | |
---|
522 | if (cmd == F_GETLK) |
---|
523 | { |
---|
524 | /* Check for conflicts. If found, return the information. |
---|
525 | If there are NO conflicts, return F_UNLCK in fl_type. */ |
---|
526 | #if LINUX_KERNEL_VERSION >= 2061700 |
---|
527 | struct file_lock conf; |
---|
528 | gflP = &conf; |
---|
529 | rc = posix_test_lock(filp, flP, gflP); |
---|
530 | if (rc) { |
---|
531 | rc = 0; |
---|
532 | #else |
---|
533 | if (NULL != (gflP = posix_test_lock(&localFile, flP))) { |
---|
534 | #endif |
---|
535 | flP->fl_start = gflP->fl_start; |
---|
536 | flP->fl_end = gflP->fl_end; |
---|
537 | flP->fl_type = gflP->fl_type; |
---|
538 | flP->fl_pid = gflP->fl_pid; |
---|
539 | flP->fl_owner = gflP->fl_owner; |
---|
540 | } |
---|
541 | else |
---|
542 | flP->fl_type = F_UNLCK; |
---|
543 | |
---|
544 | TRACE6(TRACE_VNODE, 3, TRCID_FCNTLLOCK_GETLK, |
---|
545 | "cxiFcntlLock getlk: pid %d owner 0x%X inodeP 0x%X " |
---|
546 | "range 0x%lX-%lX type %s\n", |
---|
547 | flP->fl_pid, flP->fl_owner, advObjP, flP->fl_start, flP->fl_end, |
---|
548 | (flP->fl_type == F_RDLCK) ? "RDLCK" : |
---|
549 | (flP->fl_type == F_WRLCK) ? "WRLCK" : "UNLCK"); |
---|
550 | } |
---|
551 | else |
---|
552 | { /* Begin: do the locking, but handle the blocking via our retry routine. */ |
---|
553 | /* Test the lock. What this really does for us is return the blocker |
---|
554 | if one exists. This is needed to queue up the request if a conflicting |
---|
555 | lock is already held. */ |
---|
556 | |
---|
557 | #ifdef NFS_CLUSTER_LOCKS |
---|
558 | if (cmd == F_SETLKW) { |
---|
559 | flP->fl_flags |= FL_SLEEP; |
---|
560 | if (!flP->fl_lmops) { |
---|
561 | flP->fl_lmops = &lm_operations; |
---|
562 | flP->fl_lmops->fl_notify = (void *)RetryFcn; |
---|
563 | } |
---|
564 | } |
---|
565 | rc = POSIX_LOCK_FILE(filp, flP); |
---|
566 | if (rc == -EAGAIN && (cmd == F_SETLKW) && |
---|
567 | flP->fl_lmops == &lm_operations) |
---|
568 | { |
---|
569 | /* Queue the blocker structures */ |
---|
570 | keepLockElement = true; |
---|
571 | if (retry_idP) |
---|
572 | *retry_idP = (ulong)flP; // returned to caller and saved in sleepElement |
---|
573 | } |
---|
574 | #else |
---|
575 | #if LINUX_KERNEL_VERSION >= 2061700 |
---|
576 | if ((flP->fl_type == F_UNLCK) || !(posix_test_lock(&localFile, flP, cflP))) |
---|
577 | #else |
---|
578 | if ((flP->fl_type == F_UNLCK) || !(cflP = posix_test_lock(&localFile, flP))) |
---|
579 | #endif |
---|
580 | { |
---|
581 | /* No conflicting lock: get the lock for the caller. */ |
---|
582 | rc = POSIX_LOCK_FILE(&localFile, flP); |
---|
583 | } |
---|
584 | else |
---|
585 | { /* Conflicting lock: ..... */ |
---|
586 | rc = EAGAIN; |
---|
587 | |
---|
588 | if (cmd == F_SETLKW) |
---|
589 | { |
---|
590 | /*if (posix_locks_deadlock(flP, cflP)) |
---|
591 | { |
---|
592 | rc = EDEADLK; |
---|
593 | } |
---|
594 | else*/ |
---|
595 | { |
---|
596 | /* Queue the blocker structures */ |
---|
597 | keepLockElement = true; |
---|
598 | if (retry_idP) |
---|
599 | *retry_idP = (ulong)flP; // returned to caller and saved in sleepElement |
---|
600 | #if LINUX_KERNEL_VERSION > 2060900 |
---|
601 | flP->fl_lmops = &lm_operations; |
---|
602 | flP->fl_lmops->fl_notify = RetryFcn; |
---|
603 | #else |
---|
604 | flP->fl_notify = RetryFcn; |
---|
605 | #endif |
---|
606 | #if LINUX_KERNEL_VERSION < 2061700 |
---|
607 | posix_block_lock(cflP, flP); |
---|
608 | #endif |
---|
609 | } |
---|
610 | } |
---|
611 | } |
---|
612 | #endif |
---|
613 | |
---|
614 | TRACE2(TRACE_VNODE, 3, TRCID_FCNTLLOCK_EXIT, |
---|
615 | "cxiFcntlLock posix_lock_file: rc %d retry_id 0x%lX\n", rc, cflP); |
---|
616 | } /* End: do the locking, but handle the blocking via our retry routine. */ |
---|
617 | |
---|
618 | exit: |
---|
619 | |
---|
620 | if (flockP) |
---|
621 | { |
---|
622 | /* Caller wanted results in flockP */ |
---|
623 | cxiVFSToFlock((void *)flP, flockP); |
---|
624 | |
---|
625 | /* If we allocated the locking structure and then didn't need to use |
---|
626 | it (the lock request didn't block), free it. */ |
---|
627 | |
---|
628 | if ((flP!=&fl) && (!keepLockElement)) { |
---|
629 | cxiFreeUnpinned(flP); |
---|
630 | } |
---|
631 | } |
---|
632 | |
---|
633 | #ifdef NFS_CLUSTER_LOCKS |
---|
634 | if (rc < 0) |
---|
635 | rc = -rc; /* make it positive */ |
---|
636 | #endif |
---|
637 | EXIT_RC(0, rc); |
---|
638 | return rc; |
---|
639 | } |
---|
640 | |
---|
641 | void cxiFcntlUnblock(void *retry_idP) |
---|
642 | { |
---|
643 | struct file_lock *flP = (struct file_lock *)retry_idP; |
---|
644 | |
---|
645 | ENTER(0); |
---|
646 | /* Include some sanity checks on the retry id (file_lock) |
---|
647 | before passing it into the routine that does the work. |
---|
648 | It should be properly linked (via its list_head structures) |
---|
649 | in a file_lock_list that has blocked waiters. Also, |
---|
650 | we would only be backing this out by the process that |
---|
651 | has originally blocked, so verify the pid. */ |
---|
652 | |
---|
653 | if (!list_empty(&flP->fl_block) && !list_empty(&flP->fl_link) && |
---|
654 | flP->fl_next && flP->fl_pid == getpid()) |
---|
655 | { |
---|
656 | POSIX_UNBLOCK_LOCK(flP); |
---|
657 | } |
---|
658 | EXIT(0); |
---|
659 | } |
---|
660 | |
---|
661 | int |
---|
662 | cxiFcntlReset(void *vfsP, cxiPid_t mmfsd_pid) |
---|
663 | { |
---|
664 | int rc = 0; |
---|
665 | struct super_block *sbP = (struct super_block *)vfsP; |
---|
666 | struct list_head *fllP; |
---|
667 | struct file_lock *fl; |
---|
668 | struct dentry *dentryP; |
---|
669 | |
---|
670 | ENTER(0); |
---|
671 | lock_kernel(); |
---|
672 | |
---|
673 | restart: |
---|
674 | |
---|
675 | #if LINUX_KERNEL_VERSION >= 2061600 |
---|
676 | //??? find a different way to clear locks file_lock_list is not exported anymore |
---|
677 | #else |
---|
678 | fllP = file_lock_list.next; |
---|
679 | |
---|
680 | while(fllP != &file_lock_list) |
---|
681 | { |
---|
682 | fl = list_entry(fllP, struct file_lock, fl_link); |
---|
683 | fllP = fllP->next; |
---|
684 | |
---|
685 | /* If there are mmfs lock structures, release them. */ |
---|
686 | |
---|
687 | if (fl && |
---|
688 | fl->fl_file && |
---|
689 | fl->fl_file->f_dentry && |
---|
690 | fl->fl_file->f_dentry->d_inode) |
---|
691 | { |
---|
692 | dentryP = fl->fl_file->f_dentry; |
---|
693 | |
---|
694 | /* If this lock belongs to the specified vfs, release advisory locks. */ |
---|
695 | if (dentryP->d_sb == sbP) |
---|
696 | { |
---|
697 | /* remove all our locks */ |
---|
698 | rc = gpfs_ops.gpfsFcntlReset((void *)dentryP->d_inode, mmfsd_pid); |
---|
699 | if (rc == ENOSYS) |
---|
700 | goto xerror; |
---|
701 | |
---|
702 | /* After freeing unknown numbers of locks in gpfsFcntlReset (all |
---|
703 | locks for the inode), restart from the top of the lock list */ |
---|
704 | goto restart; |
---|
705 | } |
---|
706 | } |
---|
707 | } |
---|
708 | #endif |
---|
709 | |
---|
710 | xerror: |
---|
711 | unlock_kernel(); |
---|
712 | EXIT_RC(0, rc); |
---|
713 | return rc; |
---|
714 | } |
---|
715 | |
---|
716 | void * |
---|
717 | cxiGetPrivVfsP(void *vfsP) |
---|
718 | { |
---|
719 | struct super_block *sbP = (struct super_block *)vfsP; |
---|
720 | |
---|
721 | /* Do some sanity checking */ |
---|
722 | if ( (sbP->s_magic != GPFS_SUPER_MAGIC) || |
---|
723 | ((UIntPtr) SBLOCK_PRIVATE(sbP) < GPFS_KERNEL_OFFSET) ) |
---|
724 | printSuperList(sbP); |
---|
725 | LOGASSERT( sbP->s_magic == GPFS_SUPER_MAGIC ); |
---|
726 | LOGASSERT( (UIntPtr) SBLOCK_PRIVATE(sbP) >= GPFS_KERNEL_OFFSET ); |
---|
727 | |
---|
728 | return (SBLOCK_PRIVATE(sbP)); |
---|
729 | } |
---|
730 | |
---|
731 | |
---|
732 | #ifdef NFS_DEBUG |
---|
733 | /* These flags are defined in the kernel and control various cprintk |
---|
734 | calls. This provides us a way to easily turn these on/off for |
---|
735 | debugging our NFS support. */ |
---|
736 | extern unsigned int nlm_debug; |
---|
737 | extern unsigned int nfsd_debug; |
---|
738 | extern unsigned int nfs_debug; |
---|
739 | extern unsigned int rpc_debug; |
---|
740 | #endif |
---|
741 | |
---|
742 | int cxiTrace(cxiTrace_t trace) |
---|
743 | { |
---|
744 | #ifdef NFS_DEBUG |
---|
745 | int rc = 0; |
---|
746 | |
---|
747 | ENTER(0); |
---|
748 | switch (trace) |
---|
749 | { |
---|
750 | case cxiTraceNFS: |
---|
751 | nlm_debug = nfsd_debug = nfs_debug = rpc_debug = ~0; |
---|
752 | break; |
---|
753 | case cxiTraceNFSoff: |
---|
754 | nlm_debug = nfsd_debug = nfs_debug = rpc_debug = 0; |
---|
755 | break; |
---|
756 | default: |
---|
757 | rc = EINVAL; |
---|
758 | break; |
---|
759 | } |
---|
760 | EXIT_RC(0, rc); |
---|
761 | return rc; |
---|
762 | #else |
---|
763 | return ENOSYS; |
---|
764 | #endif |
---|
765 | } |
---|
766 | |
---|
767 | void cxiFlockToVFS(eflock_t* lckdatP, void* vP) |
---|
768 | { |
---|
769 | struct file_lock* flP = (struct file_lock *)vP; |
---|
770 | |
---|
771 | ENTER(0); |
---|
772 | if ((flP) && (lckdatP)) |
---|
773 | { |
---|
774 | flP->fl_pid = lckdatP->l_pid; |
---|
775 | flP->fl_owner = lckdatP->l_owner; |
---|
776 | flP->fl_type = lckdatP->l_type; |
---|
777 | flP->fl_start = lckdatP->l_start; |
---|
778 | flP->fl_flags = FL_POSIX; |
---|
779 | #ifdef NFS_CLUSTER_LOCKS |
---|
780 | flP->fl_lmops = lckdatP->l_lmops; |
---|
781 | flP->fl_file = lckdatP->l_file; |
---|
782 | flP->fl_ops = NULL; |
---|
783 | #else |
---|
784 | #if LINUX_KERNEL_VERSION < 2061700 |
---|
785 | if (lckdatP->l_caller == L_CALLER_LOCKD) |
---|
786 | flP->fl_flags |= FL_LOCKD; |
---|
787 | #endif |
---|
788 | #endif |
---|
789 | if (lckdatP->l_len == 0) |
---|
790 | flP->fl_end = FL_OFFSET_MAX; |
---|
791 | else |
---|
792 | flP->fl_end = lckdatP->l_len + lckdatP->l_start - 1; |
---|
793 | } |
---|
794 | EXIT(0); |
---|
795 | return; |
---|
796 | } |
---|
797 | |
---|
798 | #ifdef NFS_CLUSTER_LOCKS |
---|
799 | int cxiVFSCallback(eflock_t* lckreqP, eflock_t* lckdatP, |
---|
800 | int(* callback)(void *, void *, int), int result) |
---|
801 | { |
---|
802 | struct file_lock fl; |
---|
803 | struct file *fileP; |
---|
804 | struct file_lock conf, *confP = NULL; |
---|
805 | int rc; |
---|
806 | |
---|
807 | ENTER(0); |
---|
808 | |
---|
809 | cxiFlockToVFS(lckreqP, &fl); |
---|
810 | fileP = fl.fl_file; |
---|
811 | if (!fileP) { |
---|
812 | return -1; |
---|
813 | } |
---|
814 | if (lckdatP) { |
---|
815 | cxiFlockToVFS(lckdatP, &conf); |
---|
816 | confP = &conf; |
---|
817 | } |
---|
818 | if (!result) { /* try to get the posix lock */ |
---|
819 | rc = POSIX_LOCK_FILE(fileP, &fl); |
---|
820 | if (rc) |
---|
821 | callback(&fl, NULL, EBUSY); |
---|
822 | else { /* got the posix lock */ |
---|
823 | rc = callback(&fl, confP, result); |
---|
824 | if (rc) { /* too late, free the lock */ |
---|
825 | fl.fl_type = F_UNLCK; |
---|
826 | rc = POSIX_LOCK_FILE(fileP, &fl); |
---|
827 | } |
---|
828 | } |
---|
829 | } |
---|
830 | else |
---|
831 | rc = callback(&fl, confP, result); |
---|
832 | |
---|
833 | #ifdef NFS_CLUSTER_LOCKS |
---|
834 | if (rc < 0) |
---|
835 | rc = -rc; /* make it positive */ |
---|
836 | #endif |
---|
837 | EXIT_RC(0, rc); |
---|
838 | return rc; |
---|
839 | } |
---|
840 | #endif |
---|
841 | |
---|
842 | void cxiVFSToFlock(void *vP, eflock_t *lckdatP) |
---|
843 | { |
---|
844 | struct file_lock* flP = (struct file_lock *)vP; |
---|
845 | |
---|
846 | ENTER(0); |
---|
847 | if ((flP) && (lckdatP)) |
---|
848 | { |
---|
849 | lckdatP->l_pid = flP->fl_pid; |
---|
850 | lckdatP->l_owner = flP->fl_owner; |
---|
851 | lckdatP->l_type = flP->fl_type; |
---|
852 | lckdatP->l_start = flP->fl_start; |
---|
853 | lckdatP->l_flags = flP->fl_flags; |
---|
854 | #ifdef NFS_CLUSTER_LOCKS |
---|
855 | lckdatP->l_lmops = flP->fl_lmops; |
---|
856 | lckdatP->l_file = flP->fl_file; |
---|
857 | if (lckdatP->l_lmops) /* must be lockd or nfsd */ |
---|
858 | #else |
---|
859 | #if LINUX_KERNEL_VERSION >= 2061700 |
---|
860 | if (lckdatP->l_lmops) /* must be lockd or nfsd */ |
---|
861 | #else |
---|
862 | if (flP->fl_flags & FL_LOCKD) |
---|
863 | #endif |
---|
864 | #endif |
---|
865 | lckdatP->l_caller = L_CALLER_LOCKD; |
---|
866 | else |
---|
867 | lckdatP->l_caller = L_CALLER_NULL; |
---|
868 | if (flP->fl_end == FL_OFFSET_MAX) |
---|
869 | lckdatP->l_len = 0; |
---|
870 | else |
---|
871 | lckdatP->l_len = flP->fl_end - flP->fl_start + 1; |
---|
872 | } |
---|
873 | EXIT(0); |
---|
874 | return; |
---|
875 | } |
---|
876 | |
---|
877 | |
---|
878 | /* Sleep for the indicated number of milliseconds */ |
---|
879 | void cxiSleep(int ms) |
---|
880 | { |
---|
881 | ENTER(0); |
---|
882 | TRACE1(TRACE_VNODE, 9, TRCID_SLEEP, |
---|
883 | "cxiSleep: begin delay %d\n", ms); |
---|
884 | current->state = TASK_INTERRUPTIBLE; |
---|
885 | /* For large HZ rearrange jiffies calculation and |
---|
886 | use presumably larger word size to minimize overflow risk */ |
---|
887 | if (unlikely(HZ > 1000)) |
---|
888 | schedule_timeout(((long)ms)*HZ/1000); |
---|
889 | else |
---|
890 | schedule_timeout(ms/(1000/HZ)); |
---|
891 | TRACE2(TRACE_VNODE, 9, TRCID_SLEEP_END, |
---|
892 | "cxiSleep: end delay %d HZ %d\n", ms, HZ); |
---|
893 | EXIT(0); |
---|
894 | } |
---|
895 | |
---|
896 | |
---|
897 | void cxiOpenNFS(void *iP) |
---|
898 | { |
---|
899 | struct inode *inodeP = (struct inode *)iP; |
---|
900 | int refcount; |
---|
901 | |
---|
902 | /* A reference is placed on the cxiNode here when the first NFS reference |
---|
903 | is added */ |
---|
904 | ENTER(0); |
---|
905 | refcount = cxiRefOSNode(NULL, ((cxiNode_t *)(cxiGetCnP(inodeP))), iP, 1); |
---|
906 | |
---|
907 | TRACE7(TRACE_VNODE, 3, TRCID_OPENNFS, |
---|
908 | "openNFS iP 0x%lX ino %d (0x%X) mode 0x%X nlink %d gen_ip 0x%lX " |
---|
909 | "refcount %d\n", |
---|
910 | inodeP, (inodeP) ? inodeP->i_ino : -1, |
---|
911 | (inodeP) ? inodeP->i_ino : -1, |
---|
912 | (inodeP) ? inodeP->i_mode : -1, |
---|
913 | (inodeP) ? inodeP->i_nlink : -1, |
---|
914 | (inodeP) ? inodeP->PRVINODE : NULL, |
---|
915 | refcount); |
---|
916 | |
---|
917 | DBGASSERT(refcount != 0); |
---|
918 | EXIT(0); |
---|
919 | } |
---|
920 | |
---|
921 | |
---|
922 | int cxiCloseNFS(void *vP, void *viP) |
---|
923 | { |
---|
924 | int rc; |
---|
925 | struct inode *iP = (struct inode *)vP; |
---|
926 | |
---|
927 | /* If viP is NULL, the file was never actually opened. |
---|
928 | If viP is not NULL, close it. */ |
---|
929 | ENTER(0); |
---|
930 | if (viP == NULL) |
---|
931 | rc = 0; |
---|
932 | else { |
---|
933 | if (VP_TO_PVP(iP) != NULL && VP_TO_CNP(iP) != NULL) { |
---|
934 | rc = gpfs_ops.gpfsClose(VP_TO_PVP(iP), VP_TO_CNP(iP), FREAD|FWRITE, |
---|
935 | (struct MMFSVInfo *)viP, true); |
---|
936 | cxiPutOSNode((void *)iP); |
---|
937 | } |
---|
938 | } |
---|
939 | |
---|
940 | EXIT_RC(0, rc); |
---|
941 | return rc; |
---|
942 | } |
---|
943 | |
---|
944 | static int cxiNFSCluster = 0; |
---|
945 | |
---|
946 | void cxiSetNFSCluster(int set) |
---|
947 | { |
---|
948 | cxiNFSCluster = set; |
---|
949 | } |
---|
950 | |
---|
951 | /* To avoid failing the NFS client the NFSD thread is put to sleep. Another |
---|
952 | node will takeover this client and the operation will continue without any |
---|
953 | errors to the application. |
---|
954 | */ |
---|
955 | void cxiNFSError(int rc, const char *str) |
---|
956 | { |
---|
957 | TRACE2(TRACE_VNODE, 9, TRCID_NFS_ERROR, |
---|
958 | "cxiNFSError: %s got rc %d\n", str, rc); |
---|
959 | if (cxiNFSCluster && cxiIsNFSThread() && (rc == ESTALE || rc == -ESTALE)) |
---|
960 | { |
---|
961 | TRACE2(TRACE_VNODE, 1, TRCID_NFS_ERROR_1, |
---|
962 | "cxiNFSError: NFS got error %d from %s sleep\n", rc, str); |
---|
963 | cxiSleep(120000); // wait 120 seconds |
---|
964 | } |
---|
965 | } |
---|
966 | |
---|
967 | void * cxiGetNfsP(void *vP) |
---|
968 | { |
---|
969 | if (vP && VP_TO_CNP((struct inode *)vP)) |
---|
970 | return VP_TO_NFSP((struct inode *)vP); |
---|
971 | else |
---|
972 | return NULL; |
---|
973 | } |
---|
974 | |
---|
975 | void cxiSetNfsP(void *vP, void *newP) |
---|
976 | { |
---|
977 | if (VP_TO_CNP((struct inode *)vP)) |
---|
978 | VP_TO_NFSP((struct inode *)vP) = newP; |
---|
979 | } |
---|
980 | |
---|
981 | void * cxiGetCnP(void *vP) |
---|
982 | { return (void *)VP_TO_CNP((struct inode *)vP); } |
---|
983 | |
---|
984 | void * cxiGetPvP(void *vP) |
---|
985 | { return (void *)VP_TO_PVP((struct inode *)vP); } |
---|
986 | |
---|
987 | void * cxiGNPtoVP(void *vP) |
---|
988 | { return (void *)GNP_TO_VP((struct cxiNode_t *)vP); } |
---|
989 | |
---|
990 | /* Main routine of kproc */ |
---|
991 | static int kprocMain(void *argP) |
---|
992 | { |
---|
993 | cxiKProcData_t *kpdP = (cxiKProcData_t *)argP; |
---|
994 | |
---|
995 | /* Change our process name */ |
---|
996 | ENTER(0); |
---|
997 | current->comm[sizeof(current->comm) - 1] = '\0'; |
---|
998 | strncpy(current->comm, kpdP->nameP, sizeof(current->comm) - 1); |
---|
999 | |
---|
1000 | /* Change parent of a kernel process so that when it exits, it won't |
---|
1001 | * send a SIGCHLD signal to the process that created it, and it won't |
---|
1002 | * be left as a zombie. |
---|
1003 | */ |
---|
1004 | DAEMONIZE(kpdP->nameP); |
---|
1005 | |
---|
1006 | /* Call the function specified by startKProc */ |
---|
1007 | kpdP->func(kpdP); |
---|
1008 | EXIT(0); |
---|
1009 | return 0; |
---|
1010 | } |
---|
1011 | |
---|
1012 | /* Create a new kernel process */ |
---|
1013 | cxiPid_t |
---|
1014 | cxiStartKProc(struct cxiKProcData_t *kpdP) |
---|
1015 | { |
---|
1016 | cxiPid_t pid = kernel_thread(kprocMain, kpdP, kpdP->kprocFlags); |
---|
1017 | ENTER(0); |
---|
1018 | kpdP->pid = pid > 0 ? pid : KPROC_FAILED_PID; |
---|
1019 | |
---|
1020 | TRACE2(TRACE_VNODE, 1, TRCID_CXISTART_KPROC_LINUX, |
---|
1021 | "cxiStartKProc %s pid %d \n", kpdP->nameP, kpdP->pid); |
---|
1022 | EXIT(0); |
---|
1023 | return kpdP->pid; |
---|
1024 | } |
---|
1025 | |
---|
1026 | void |
---|
1027 | cxiStopKProc(struct cxiKProcData_t *kpdP) |
---|
1028 | { |
---|
1029 | cxiPid_t pid; |
---|
1030 | |
---|
1031 | ENTER(0); |
---|
1032 | cxiBlockingMutexAcquire(&kpdP->lock); |
---|
1033 | |
---|
1034 | TRACE2(TRACE_VNODE, 1, TRCID_CXISTOP_KPROC_LINUX, |
---|
1035 | "cxiStopKProc: %s pid %d \n", kpdP->nameP, kpdP->pid); |
---|
1036 | |
---|
1037 | if (!KPROC_RUNNING(kpdP)) |
---|
1038 | { |
---|
1039 | cxiBlockingMutexRelease(&kpdP->lock); |
---|
1040 | EXIT(0); |
---|
1041 | return; |
---|
1042 | } |
---|
1043 | |
---|
1044 | pid = kpdP->pid; // Cache pid before signal/wait |
---|
1045 | kpdP->terminate = true; |
---|
1046 | cxiWaitEventSignal(&kpdP->kprocEvent); |
---|
1047 | |
---|
1048 | while (kpdP->pid != KPROC_UNASSIGNED_PID) |
---|
1049 | cxiWaitEventWait(&kpdP->startStopEvent, &kpdP->lock, 0); |
---|
1050 | |
---|
1051 | cxiBlockingMutexRelease(&kpdP->lock); |
---|
1052 | EXIT(0); |
---|
1053 | } |
---|
1054 | |
---|
1055 | /*------------------------------------------------------------------- |
---|
1056 | * logAssertFailed - Subroutine consolidating logGenIF() and |
---|
1057 | * DoPanic() calls. |
---|
1058 | *------------------------------------------------------------------*/ |
---|
1059 | |
---|
1060 | static char PanicMsgBuf[2048]; |
---|
1061 | |
---|
1062 | void cxiPanic(const char* panicStrP) |
---|
1063 | { |
---|
1064 | printk( GPFS_NOTICE "kp %d: cxiPanic: %s\n", cxiGetThreadId(), panicStrP); |
---|
1065 | TRACE1(TRACE_ERRLOG, 0, TRCID_PANIC, "cxiPanic: %s\n", panicStrP); |
---|
1066 | #ifndef DISABLE_KERNEL_PANIC |
---|
1067 | BUG(); |
---|
1068 | #endif |
---|
1069 | } |
---|
1070 | |
---|
1071 | static void |
---|
1072 | DoPanic(char* condP, char* filenameP, int lineNum, Int32 retCode, |
---|
1073 | Int32 reasonCode, char *dataStr) |
---|
1074 | { |
---|
1075 | const char *p; |
---|
1076 | int bytesLeft; |
---|
1077 | |
---|
1078 | p = cxiStrrchr(filenameP, '/'); |
---|
1079 | if (p == NULL) |
---|
1080 | p = filenameP; |
---|
1081 | else |
---|
1082 | p += 1; |
---|
1083 | |
---|
1084 | sprintf(PanicMsgBuf, "%s:%d:%d:%d:", p, lineNum, retCode, reasonCode); |
---|
1085 | bytesLeft = sizeof(PanicMsgBuf) - strlen(PanicMsgBuf); |
---|
1086 | if (dataStr) |
---|
1087 | { |
---|
1088 | strncat(PanicMsgBuf, dataStr, bytesLeft-1); |
---|
1089 | bytesLeft = sizeof(PanicMsgBuf) - strlen(PanicMsgBuf); |
---|
1090 | } |
---|
1091 | strncat(PanicMsgBuf, ":", bytesLeft-1); |
---|
1092 | bytesLeft = sizeof(PanicMsgBuf) - strlen(PanicMsgBuf); |
---|
1093 | if (condP) |
---|
1094 | strncat(PanicMsgBuf, condP, bytesLeft-1); |
---|
1095 | cxiPanic(PanicMsgBuf); |
---|
1096 | } |
---|
1097 | |
---|
1098 | #ifdef MODULE |
---|
1099 | void |
---|
1100 | logAssertFailed(UInt32 flags, /* LOG_FATAL_ERROR or LOG_NONFATAL_ERROR */ |
---|
1101 | char *srcFileName, /* __FILE__ */ |
---|
1102 | UInt32 srcLineNumber, /* __LINE__ */ |
---|
1103 | Int32 retCode, /* return code value */ |
---|
1104 | Int32 reasonCode, /* normally errno */ |
---|
1105 | UInt32 logRecTag, /* tag if have associated error log rec */ |
---|
1106 | char *dataStr, /* assert data string */ |
---|
1107 | char *failingExpr) /* expression that evaluated to false */ |
---|
1108 | { |
---|
1109 | int i; |
---|
1110 | |
---|
1111 | printk("GPFS logAssertFailed: %s file %s line %d\n", |
---|
1112 | failingExpr, srcFileName, srcLineNumber); |
---|
1113 | ENTER(0); |
---|
1114 | TRACE3(TRACE_ERRLOG, 0, TRCID_MODULE_LOGASSERT_1, |
---|
1115 | "logAssertFailed: %s retCode %d reasonCode %d\n", |
---|
1116 | failingExpr, retCode, reasonCode); |
---|
1117 | TRACE2(TRACE_ERRLOG, 0, TRCID_MODULE_LOGASSERT_2, |
---|
1118 | "logAssertFailed: file %s line %d\n", srcFileName, srcLineNumber); |
---|
1119 | #ifndef GPFS_PRINTF |
---|
1120 | /* fsync buffered lxtrace records */ |
---|
1121 | trc_fsync(); |
---|
1122 | |
---|
1123 | #ifdef STOP_TRACE_ON_FAILURE |
---|
1124 | /* Turn off tracing right after the failure occurs. This may only turn |
---|
1125 | off tracing in the kernel. */ |
---|
1126 | for (i=0 ; i<MAX_TRACE_CLASSES ; i++) |
---|
1127 | TraceFlagsP[i] = 0; |
---|
1128 | #endif |
---|
1129 | |
---|
1130 | /* Wait 10 seconds to allow the lxtrace daemon to complete the sync. */ |
---|
1131 | cxiSleep(10000); |
---|
1132 | #endif |
---|
1133 | gpfs_ops.gpfsDaemonToDie(srcFileName, srcLineNumber, retCode, reasonCode, |
---|
1134 | dataStr, failingExpr); |
---|
1135 | |
---|
1136 | DoPanic(failingExpr, srcFileName, srcLineNumber, retCode, reasonCode, |
---|
1137 | dataStr); |
---|
1138 | } |
---|
1139 | #else /* !MODULE */ |
---|
1140 | void |
---|
1141 | logAssertFailed(UInt32 flags, |
---|
1142 | char *srcFileName, |
---|
1143 | UInt32 srcLineNumber, |
---|
1144 | Int32 retCode, |
---|
1145 | Int32 reasonCode, |
---|
1146 | UInt32 logRecTag, |
---|
1147 | char *dataStr, |
---|
1148 | char *failingExpr); |
---|
1149 | #endif /* MODULE */ |
---|
1150 | |
---|
1151 | |
---|
1152 | typedef struct cxiWaitElement_t |
---|
1153 | { |
---|
1154 | cxiWaitList_t waitList; /* previous and next element in chain */ |
---|
1155 | |
---|
1156 | /* Linux would normally organize a wait_queue_head_t with any number |
---|
1157 | * of wait_queue_t elements. However since we're implementing "wakeup |
---|
1158 | * with return code" we have to ensure the OS wakes up the exact sleeper |
---|
1159 | * we want. Thus we have only a one to one relationship to ensure the |
---|
1160 | * OS can only pick our favorite. |
---|
1161 | */ |
---|
1162 | wait_queue_head_t qhead; |
---|
1163 | wait_queue_t qwaiter; |
---|
1164 | int wakeupRC; /* wakeup return code */ |
---|
1165 | |
---|
1166 | } cxiWaitElement_t; |
---|
1167 | |
---|
1168 | |
---|
1169 | #define CXI_WAIT_LIST_ADD(headP, elementP) \ |
---|
1170 | (headP)->prevP->nextP = (elementP); \ |
---|
1171 | (elementP)->prevP = (headP)->prevP; \ |
---|
1172 | (headP)->prevP = (elementP); \ |
---|
1173 | (elementP)->nextP = (headP); |
---|
1174 | |
---|
1175 | #define CXI_WAIT_LIST_REMOVE(elementP) \ |
---|
1176 | (elementP)->prevP->nextP = (elementP)->nextP; \ |
---|
1177 | (elementP)->nextP->prevP = (elementP)->prevP; |
---|
1178 | |
---|
1179 | |
---|
1180 | /* Initialize abstract wait event with OS specific |
---|
1181 | * initialization function |
---|
1182 | */ |
---|
1183 | void |
---|
1184 | cxiWaitEventInit(cxiWaitEvent_t *weP) |
---|
1185 | { |
---|
1186 | spinlock_t *lockP = (spinlock_t *)&weP->lword; |
---|
1187 | |
---|
1188 | spin_lock_init(lockP); |
---|
1189 | weP->waitList.nextP = weP->waitList.prevP = &weP->waitList; |
---|
1190 | } |
---|
1191 | |
---|
1192 | Boolean |
---|
1193 | cxiWaitEventHasWaiters(cxiWaitEvent_t *weP) |
---|
1194 | { |
---|
1195 | unsigned long flags; |
---|
1196 | spinlock_t *lockP = (spinlock_t *)(weP->lword); |
---|
1197 | Boolean rc; |
---|
1198 | |
---|
1199 | SPIN_LOCK_IRQ(lockP, flags); |
---|
1200 | rc = (weP->waitList.nextP != &weP->waitList); |
---|
1201 | SPIN_UNLOCK_IRQ(lockP, flags); |
---|
1202 | return rc; |
---|
1203 | } |
---|
1204 | |
---|
1205 | /* Do not add trace records. Some callers depend on not being |
---|
1206 | * interrupted by the trace daemon. |
---|
1207 | */ |
---|
1208 | enum WakeType { wBroadcast, wSignal, wWakeOne }; |
---|
1209 | static inline void |
---|
1210 | doWakeup(cxiWaitEvent_t *wEventP, enum WakeType wtype, int wakeupRC) |
---|
1211 | { |
---|
1212 | unsigned long flags; |
---|
1213 | spinlock_t *lockP = (spinlock_t *)(wEventP->lword); |
---|
1214 | cxiWaitList_t *headP; |
---|
1215 | cxiWaitList_t *tmpP; |
---|
1216 | cxiWaitElement_t *wP; |
---|
1217 | |
---|
1218 | SPIN_LOCK_IRQ(lockP, flags); |
---|
1219 | |
---|
1220 | /* We wake up from the front back (FIFO semantics). |
---|
1221 | * There's only one wait element per wake_queue_head_t so |
---|
1222 | * record the return code and wake up the one element. |
---|
1223 | */ |
---|
1224 | headP = &wEventP->waitList; |
---|
1225 | |
---|
1226 | for (tmpP = headP->nextP; tmpP != headP; tmpP = tmpP->nextP) |
---|
1227 | { |
---|
1228 | wP = list_entry(tmpP, cxiWaitElement_t, waitList); |
---|
1229 | wP->wakeupRC = wakeupRC; |
---|
1230 | |
---|
1231 | wake_up(&wP->qhead); |
---|
1232 | if (wtype != wBroadcast) |
---|
1233 | { |
---|
1234 | /* The difference between wSignal and wWakeOne is that the latter |
---|
1235 | guarantees that multiple wake up calls will each pick a different |
---|
1236 | thread if more than one is waiting. With wSignal, if a thread is |
---|
1237 | awakened but hasn't had a chance to run, then subsequent wake up |
---|
1238 | calls might all wake the same thread. |
---|
1239 | |
---|
1240 | On AIX, the calling routine (e_wakeup_one) removes the waiter from |
---|
1241 | the queue, unlike Linux where removal is done by the waiting |
---|
1242 | thread when it wakes up. Nothing special has to be done on AIX to |
---|
1243 | get the nWakeOne style of wakeup. |
---|
1244 | |
---|
1245 | Note: This is an inline routine and the wType argument is a |
---|
1246 | compile-time constant, so the "if" tests in this routine are done |
---|
1247 | by the compiler and do not generate any code. */ |
---|
1248 | |
---|
1249 | if (wtype == wWakeOne) |
---|
1250 | { |
---|
1251 | /* Move this entry to tail of list so that the next wakeup call will |
---|
1252 | pick somebody else. */ |
---|
1253 | CXI_WAIT_LIST_REMOVE(tmpP); |
---|
1254 | CXI_WAIT_LIST_ADD(headP, tmpP); |
---|
1255 | } |
---|
1256 | break; |
---|
1257 | } |
---|
1258 | } |
---|
1259 | SPIN_UNLOCK_IRQ(lockP, flags); |
---|
1260 | } |
---|
1261 | |
---|
1262 | int |
---|
1263 | cxiCopyIn(char *from, char *to, unsigned long size) |
---|
1264 | { |
---|
1265 | /* The daemon needs to bypass access checks since copy to |
---|
1266 | * shared segment would inadvertantly fail. |
---|
1267 | */ |
---|
1268 | ENTER(0); |
---|
1269 | if (PROCESS_GROUP(current) == DaemonPGrp) |
---|
1270 | __copy_from_user(to, from, size); |
---|
1271 | else |
---|
1272 | if (copy_from_user(to, from, size)) |
---|
1273 | { |
---|
1274 | EXIT_RC(0, EFAULT); |
---|
1275 | return EFAULT; |
---|
1276 | } |
---|
1277 | EXIT(0); |
---|
1278 | return 0; |
---|
1279 | } |
---|
1280 | |
---|
1281 | int |
---|
1282 | cxiCopyOut(char *from, char *to, unsigned long size) |
---|
1283 | { |
---|
1284 | int ignore; |
---|
1285 | /* The daemon needs to bypass access checks since copy to |
---|
1286 | * shared segment would inadvertantly fail. |
---|
1287 | */ |
---|
1288 | ENTER(0); |
---|
1289 | if (PROCESS_GROUP(current) == DaemonPGrp) |
---|
1290 | ignore = __copy_to_user(to, from, size); |
---|
1291 | else |
---|
1292 | if (copy_to_user(to, from, size)) |
---|
1293 | { |
---|
1294 | EXIT_RC(0, EFAULT); |
---|
1295 | return EFAULT; |
---|
1296 | } |
---|
1297 | EXIT(0); |
---|
1298 | return 0; |
---|
1299 | } |
---|
1300 | |
---|
1301 | int |
---|
1302 | cxiCopyInstr(char *from, char *to, unsigned long size, unsigned long *len) |
---|
1303 | { |
---|
1304 | long retval; |
---|
1305 | |
---|
1306 | ENTER(0); |
---|
1307 | retval = strncpy_from_user(to, from, size); |
---|
1308 | if ((retval > 0) && (retval <= size)) |
---|
1309 | { |
---|
1310 | *len = retval; |
---|
1311 | EXIT(0); |
---|
1312 | return 0; |
---|
1313 | } |
---|
1314 | *len = 0; |
---|
1315 | if (retval < 0) |
---|
1316 | retval = EFAULT; |
---|
1317 | else |
---|
1318 | retval = E2BIG; |
---|
1319 | EXIT_RC(0, retval); |
---|
1320 | return (int)retval; |
---|
1321 | } |
---|
1322 | |
---|
1323 | long cxiSafeGetLong(long* from) |
---|
1324 | { |
---|
1325 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
1326 | long tmp; |
---|
1327 | (void)__get_user_nocheck(tmp, from, sizeof(long)); |
---|
1328 | return tmp; |
---|
1329 | #else |
---|
1330 | return *from; |
---|
1331 | #endif |
---|
1332 | } |
---|
1333 | |
---|
1334 | int cxiSafeGetInt(int* from) |
---|
1335 | { |
---|
1336 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
1337 | int tmp; |
---|
1338 | __get_user_nocheck(tmp, from, sizeof(int)); |
---|
1339 | return tmp; |
---|
1340 | #else |
---|
1341 | return *from; |
---|
1342 | #endif |
---|
1343 | } |
---|
1344 | |
---|
1345 | void cxiSafePutLong(long val, long* to) |
---|
1346 | { |
---|
1347 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
1348 | __put_user_nocheck(val, to, sizeof(long)); |
---|
1349 | #else |
---|
1350 | *to = val; |
---|
1351 | #endif |
---|
1352 | } |
---|
1353 | |
---|
1354 | void cxiSafePutInt(int val, int* to) |
---|
1355 | { |
---|
1356 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
1357 | __put_user_nocheck(val, to, sizeof(int)); |
---|
1358 | #else |
---|
1359 | *to = val; |
---|
1360 | #endif |
---|
1361 | } |
---|
1362 | |
---|
1363 | #ifdef GPFS_ARCH_X86_64 |
---|
1364 | /* Check if 64-bit user process */ |
---|
1365 | int |
---|
1366 | cxiIS64U(char *addr) |
---|
1367 | { |
---|
1368 | #if LINUX_KERNEL_VERSION > 2060500 |
---|
1369 | return !(test_thread_flag(TIF_IA32)); |
---|
1370 | #else |
---|
1371 | return !(current->thread.flags & THREAD_IA32); |
---|
1372 | #endif |
---|
1373 | } |
---|
1374 | #endif |
---|
1375 | |
---|
1376 | int |
---|
1377 | socket_aio_dequeue() |
---|
1378 | { |
---|
1379 | return -1; |
---|
1380 | } |
---|
1381 | |
---|
1382 | /* Transfer data from buffer(s) in user space to or from a buffer in the |
---|
1383 | kernel. */ |
---|
1384 | int |
---|
1385 | cxiUiomove(register char* kBufP, /* address of kernel buffer */ |
---|
1386 | register unsigned long nBytes, /* #bytes to transfer */ |
---|
1387 | Boolean toKernel, /* direction of xfer(read/write)*/ |
---|
1388 | register struct cxiUio_t* uioP) /* user area description */ |
---|
1389 | { |
---|
1390 | register struct cxiIovec_t * iovP; |
---|
1391 | unsigned long cnt; |
---|
1392 | int rc; |
---|
1393 | #ifdef TRACE_IO_DATA |
---|
1394 | char* origKBufP = kBufP; |
---|
1395 | int trcdata[4]; |
---|
1396 | #endif |
---|
1397 | int ignore; |
---|
1398 | |
---|
1399 | ENTER(0); |
---|
1400 | TRACE4(TRACE_FOPS, 6, TRCID_CXISYSTEM_037, |
---|
1401 | "cxiUiomove enter: kBufP 0x%lX uioP 0x%lX nBytes %d toKernel %d\n", |
---|
1402 | kBufP, uioP, nBytes, toKernel); |
---|
1403 | if (uioP->uio_resid <= 0) |
---|
1404 | { |
---|
1405 | EXIT_RC(0, ENOMEM); |
---|
1406 | return ENOMEM; |
---|
1407 | } |
---|
1408 | rc = 0; |
---|
1409 | if (uioP->uio_iovcnt == 1) |
---|
1410 | { |
---|
1411 | /* |
---|
1412 | * Fastpath for most common case of iovcnt == 1. Saves a |
---|
1413 | * few instructions. |
---|
1414 | */ |
---|
1415 | iovP = uioP->uio_iov; |
---|
1416 | cnt = iovP->iov_len; |
---|
1417 | if (cnt <= 0) |
---|
1418 | { |
---|
1419 | uioP->uio_iovcnt--; |
---|
1420 | uioP->uio_iov++; |
---|
1421 | uioP->uio_iovdcnt++; |
---|
1422 | EXIT(0); |
---|
1423 | return 0; |
---|
1424 | } |
---|
1425 | if (cnt > nBytes) |
---|
1426 | cnt = nBytes; |
---|
1427 | |
---|
1428 | if (toKernel) |
---|
1429 | { |
---|
1430 | /* The daemon needs to bypass access checks since copy to |
---|
1431 | * shared segment would inadvertantly fail. Copies to |
---|
1432 | * kernel address space also perform no validity check. |
---|
1433 | */ |
---|
1434 | if (PROCESS_GROUP(current) == DaemonPGrp || uioP->uio_segflg == UIO_SYSSPACE) |
---|
1435 | __copy_from_user(kBufP, (char *)iovP->iov_base, cnt); |
---|
1436 | else |
---|
1437 | if (copy_from_user(kBufP, (char *)iovP->iov_base, cnt)) |
---|
1438 | { |
---|
1439 | EXIT_RC(0, EFAULT); |
---|
1440 | return EFAULT; |
---|
1441 | } |
---|
1442 | } |
---|
1443 | else |
---|
1444 | { |
---|
1445 | int spam; |
---|
1446 | /* The daemon needs to bypass access checks since copy to |
---|
1447 | * shared segment would inadvertantly fail. Copies to |
---|
1448 | * kernel address space also perform no validity check. |
---|
1449 | */ |
---|
1450 | if (PROCESS_GROUP(current) == DaemonPGrp || uioP->uio_segflg == UIO_SYSSPACE) |
---|
1451 | ignore = __copy_to_user((char *)iovP->iov_base, kBufP, cnt); |
---|
1452 | else |
---|
1453 | if (copy_to_user((char *)iovP->iov_base, kBufP, cnt)) |
---|
1454 | { |
---|
1455 | EXIT_RC(0, EFAULT); |
---|
1456 | return EFAULT; |
---|
1457 | } |
---|
1458 | } |
---|
1459 | |
---|
1460 | iovP->iov_base = (char *)iovP->iov_base + cnt; |
---|
1461 | iovP->iov_len -= cnt; |
---|
1462 | uioP->uio_resid -= cnt; |
---|
1463 | uioP->uio_offset += cnt; |
---|
1464 | #ifdef TRACE_IO_DATA |
---|
1465 | if (cnt >= sizeof(trcdata)) |
---|
1466 | memcpy(trcdata, origKBufP, sizeof(trcdata)); |
---|
1467 | else |
---|
1468 | { |
---|
1469 | memset(trcdata, 0xAA, sizeof(trcdata)); |
---|
1470 | memcpy(trcdata, origKBufP, cnt); |
---|
1471 | } |
---|
1472 | TRACE5(TRACE_FOPS, 7, TRCID_CXISYSTEM_039a, |
---|
1473 | "uiomove exit 1: rc %d data %08X %08X %08X %08X\n", |
---|
1474 | rc, trcdata[0], trcdata[1], trcdata[2], trcdata[3]); |
---|
1475 | #else |
---|
1476 | TRACE1(TRACE_FOPS, 7, TRCID_CXISYSTEM_039, |
---|
1477 | "uiomove exit 1: rc %d\n", |
---|
1478 | rc); |
---|
1479 | #endif |
---|
1480 | EXIT_RC(0, rc); |
---|
1481 | return rc; |
---|
1482 | } |
---|
1483 | while (nBytes > 0 && uioP->uio_resid && rc == 0) |
---|
1484 | { |
---|
1485 | if (uioP->uio_iovcnt <= 0) |
---|
1486 | { |
---|
1487 | EXIT_RC(0, ENOMEM); |
---|
1488 | return ENOMEM; |
---|
1489 | } |
---|
1490 | iovP = uioP->uio_iov; |
---|
1491 | cnt = iovP->iov_len; |
---|
1492 | if (cnt <= 0) |
---|
1493 | { |
---|
1494 | uioP->uio_iovcnt--; |
---|
1495 | uioP->uio_iov++; |
---|
1496 | uioP->uio_iovdcnt++; |
---|
1497 | continue; |
---|
1498 | } |
---|
1499 | if (cnt > nBytes) |
---|
1500 | cnt = nBytes; |
---|
1501 | |
---|
1502 | if (toKernel) |
---|
1503 | { |
---|
1504 | /* The daemon needs to bypass access checks since copy to |
---|
1505 | * shared segment would inadvertantly fail. Copies to |
---|
1506 | * kernel address space also perform no validity check. |
---|
1507 | */ |
---|
1508 | if (PROCESS_GROUP(current) == DaemonPGrp || uioP->uio_segflg == UIO_SYSSPACE) |
---|
1509 | __copy_from_user(kBufP, (char *)iovP->iov_base, cnt); |
---|
1510 | else |
---|
1511 | if (copy_from_user(kBufP, (char *)iovP->iov_base, cnt)) |
---|
1512 | { |
---|
1513 | EXIT_RC(0, EFAULT); |
---|
1514 | return EFAULT; |
---|
1515 | } |
---|
1516 | } |
---|
1517 | else |
---|
1518 | { |
---|
1519 | /* The daemon needs to bypass access checks since copy to |
---|
1520 | * shared segment would inadvertantly fail. Copies to |
---|
1521 | * kernel address space also perform no validity check. |
---|
1522 | */ |
---|
1523 | if (PROCESS_GROUP(current) == DaemonPGrp || uioP->uio_segflg == UIO_SYSSPACE) |
---|
1524 | ignore = __copy_to_user((char *)iovP->iov_base, kBufP, cnt); |
---|
1525 | else |
---|
1526 | if (copy_to_user((char *)iovP->iov_base, kBufP, cnt)) |
---|
1527 | { |
---|
1528 | EXIT_RC(0, EFAULT); |
---|
1529 | return EFAULT; |
---|
1530 | } |
---|
1531 | } |
---|
1532 | iovP->iov_base = (char *)iovP->iov_base + cnt; |
---|
1533 | iovP->iov_len -= cnt; |
---|
1534 | uioP->uio_resid -= cnt; |
---|
1535 | uioP->uio_offset += cnt; |
---|
1536 | kBufP += cnt; |
---|
1537 | nBytes -= cnt; |
---|
1538 | } |
---|
1539 | #ifdef TRACE_IO_DATA |
---|
1540 | cnt = kBufP - origKBufP; |
---|
1541 | if (cnt >= sizeof(trcdata)) |
---|
1542 | memcpy(trcdata, origKBufP, sizeof(trcdata)); |
---|
1543 | else |
---|
1544 | { |
---|
1545 | memset(trcdata, 0xAA, sizeof(trcdata)); |
---|
1546 | memcpy(trcdata, origKBufP, cnt); |
---|
1547 | } |
---|
1548 | TRACE5(TRACE_FOPS, 7, TRCID_CXISYSTEM_041a, |
---|
1549 | "uiomove exit 2: rc %d data %08X %08X %08X %08X\n", |
---|
1550 | rc, trcdata[0], trcdata[1], trcdata[2], trcdata[3]); |
---|
1551 | #else |
---|
1552 | TRACE1(TRACE_FOPS, 7, TRCID_CXISYSTEM_041, |
---|
1553 | "uiomove exit 2: rc %d\n", |
---|
1554 | rc); |
---|
1555 | #endif |
---|
1556 | EXIT_RC(0, rc); |
---|
1557 | return rc; |
---|
1558 | } |
---|
1559 | |
---|
1560 | /* |
---|
1561 | Try to force some sanity checks at compile type |
---|
1562 | */ |
---|
1563 | /* TO DO: revise this to handle comparisons beyond equality/inequality */ |
---|
1564 | /* STATIC_DBGASSERT(sizeof(spinlock_t), SPINLOCK_T_SIZE); */ |
---|
1565 | |
---|
1566 | /* A routine to check that the definitions in our cxiTypes.h |
---|
1567 | * files are equivalent to the system definitions. The module |
---|
1568 | * should not load if it receives an error from this routine. |
---|
1569 | */ |
---|
1570 | int |
---|
1571 | cxiCheckTypes() |
---|
1572 | { |
---|
1573 | int rc = 0; |
---|
1574 | ENTER(0); |
---|
1575 | |
---|
1576 | /* Make sure cxiBlockingMutex_t fits in the space provided. If not, |
---|
1577 | the implementation of the cxiBlockingMutex... routines needs to |
---|
1578 | use the embedded space to record a pointer to kmalloc'ed space holding |
---|
1579 | the semaphore. */ |
---|
1580 | if (sizeof(struct semaphore) > GPFS_LINUX_SEM_SIZE) |
---|
1581 | { |
---|
1582 | printk("cxiCheckTypes: semaphore %ld > GPFS_LINUX_SEM_SIZE %ld\n", |
---|
1583 | sizeof(struct semaphore), GPFS_LINUX_SEM_SIZE); |
---|
1584 | rc = 1; |
---|
1585 | } |
---|
1586 | |
---|
1587 | /* Size of spinlock_t is smaller for UP case with gcc 3.x, so just |
---|
1588 | insure SPINLOCK_T_SIZE is large enough for both the UP and SMP case. */ |
---|
1589 | if (sizeof(spinlock_t) > SPINLOCK_T_SIZE) |
---|
1590 | { |
---|
1591 | printk("cxiCheckTypes: spinlock_t %ld > SPINLOCK_T__SIZE %ld\n", |
---|
1592 | sizeof(spinlock_t), SPINLOCK_T_SIZE); |
---|
1593 | rc = 2; |
---|
1594 | } |
---|
1595 | |
---|
1596 | /* Ensure that size of pid_t matches cxiThreadId (32-bits) */ |
---|
1597 | if (sizeof(pid_t) != sizeof(cxiThreadId)) |
---|
1598 | { |
---|
1599 | printk("cxiCheckTypes: pid_t %ld != cxiThreadId %ld\n", |
---|
1600 | sizeof(pid_t), sizeof(cxiThreadId)); |
---|
1601 | rc = 3; |
---|
1602 | } |
---|
1603 | |
---|
1604 | if (rc > 0) |
---|
1605 | TRACE1(TRACE_TASKING, 2, TRCID_CXISYSTEM_CHKTYPES, |
---|
1606 | "cxiCheckTypes: system type mismatch on type number %d!\n", rc); |
---|
1607 | EXIT_RC(0, rc); |
---|
1608 | return rc; |
---|
1609 | } |
---|
1610 | |
---|
1611 | /* Routine to get current time of day in nanosecond format. |
---|
1612 | */ |
---|
1613 | int |
---|
1614 | cxiGetTOD(cxiTimeStruc_t *tsP) |
---|
1615 | { |
---|
1616 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
1617 | struct timespec ts; |
---|
1618 | #else |
---|
1619 | struct timeval tv; |
---|
1620 | #endif |
---|
1621 | |
---|
1622 | ENTER(0); |
---|
1623 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
1624 | ts = CURRENT_TIME; |
---|
1625 | tsP->tv_sec = ts.tv_sec; |
---|
1626 | tsP->tv_nsec = ts.tv_nsec; |
---|
1627 | #else |
---|
1628 | /* This call returns microseconds so we fudge it to nanoseconds */ |
---|
1629 | do_gettimeofday(&tv); |
---|
1630 | tsP->tv_sec = tv.tv_sec; |
---|
1631 | tsP->tv_nsec = tv.tv_usec * 1000; |
---|
1632 | #endif |
---|
1633 | |
---|
1634 | EXIT(0); |
---|
1635 | return 0; |
---|
1636 | } |
---|
1637 | |
---|
1638 | Boolean |
---|
1639 | cxiIsNFSThread() |
---|
1640 | { |
---|
1641 | # if defined(GPFS_LITTLE_ENDIAN) && !defined(__64BIT__) |
---|
1642 | /* Note comparison against a multibyte character constant (not a string |
---|
1643 | constant). Order of characters in word is reversed due to little- |
---|
1644 | endian representation of integers. */ |
---|
1645 | if (* ((int*)¤t->comm[0]) != 0x6473666e) // 'dsfn' |
---|
1646 | return false; |
---|
1647 | if (* ((char*)¤t->comm[4]) == '\0') |
---|
1648 | return true; |
---|
1649 | return (* ((int*)¤t->comm[2]) == 0x00346473); // '4ds' |
---|
1650 | # else |
---|
1651 | if ((strcmp(current->comm, "nfsd") == 0) || |
---|
1652 | (strcmp(current->comm, "nfsd4") == 0)) |
---|
1653 | return true; |
---|
1654 | return false; |
---|
1655 | # endif |
---|
1656 | } |
---|
1657 | |
---|
1658 | Boolean |
---|
1659 | cxiIsLockdThread() |
---|
1660 | { |
---|
1661 | # if defined(GPFS_LITTLE_ENDIAN) && !defined(__64BIT__) |
---|
1662 | /* Note comparison against a multibyte character constant (not a string |
---|
1663 | constant). Order of characters in word is reversed due to little- |
---|
1664 | endian representation of integers. */ |
---|
1665 | if ((* ((int*)¤t->comm[0]) != 0x6b636f6c) | // 'kcol' |
---|
1666 | (* ((int*)¤t->comm[2]) != 0x00646b63)); // ' dkc' |
---|
1667 | return false; |
---|
1668 | return * ((char*)¤t->comm[5]) == '\0'; |
---|
1669 | # else |
---|
1670 | return (strcmp(current->comm, "lockd") == 0); |
---|
1671 | # endif |
---|
1672 | } |
---|
1673 | |
---|
1674 | Boolean |
---|
1675 | cxiIsNFS4Thread() |
---|
1676 | { |
---|
1677 | # if defined(GPFS_LITTLE_ENDIAN) && !defined(__64BIT__) |
---|
1678 | /* Note comparison against a multibyte character constant (not a string |
---|
1679 | constant). Order of characters in word is reversed due to little- |
---|
1680 | endian representation of integers. */ |
---|
1681 | if ((* ((int*)¤t->comm[0]) != 0x6473666e) | // 'dsfn' |
---|
1682 | (* ((int*)¤t->comm[2]) != 0x00346473)); // '4ds' |
---|
1683 | return false; |
---|
1684 | return * ((char*)¤t->comm[5]) == '\0'; |
---|
1685 | # else |
---|
1686 | return (strcmp(current->comm, "nfsd4") == 0); |
---|
1687 | # endif |
---|
1688 | } |
---|
1689 | |
---|
1690 | Boolean |
---|
1691 | cxiIsKupdateThread() |
---|
1692 | { |
---|
1693 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
1694 | /* In 2.6 pdflush replaced kupdated and bdflush from 2.4 */ |
---|
1695 | return current_is_pdflush(); |
---|
1696 | #else |
---|
1697 | return (strcmp(current->comm, "kupdated") == 0); |
---|
1698 | #endif |
---|
1699 | } |
---|
1700 | |
---|
1701 | #ifdef SMB_LOCKS |
---|
1702 | Boolean |
---|
1703 | cxiIsSambaOrLockdThread() |
---|
1704 | { |
---|
1705 | # if defined(GPFS_LITTLE_ENDIAN) && !defined(__64BIT__) |
---|
1706 | /* Note comparison against a multibyte character constant (not a string |
---|
1707 | constant). Order of characters in word is reversed due to little- |
---|
1708 | endian representation of integers. */ |
---|
1709 | Boolean rc = (((* ((int*)¤t->comm[0]) == 0x64626d73) & // 'dbms' |
---|
1710 | (* ((char*)¤t->comm[4]) == '\0')) | |
---|
1711 | ((* ((int*)¤t->comm[0]) == 0x6b636f6c) & // 'kcol' |
---|
1712 | (* ((int*)¤t->comm[2]) == 0x00646b63))); // 'dkc' |
---|
1713 | return rc; |
---|
1714 | # else |
---|
1715 | return ((strcmp(current->comm, "smbd") == 0) | |
---|
1716 | (strcmp(current->comm, "lockd") == 0)); |
---|
1717 | # endif |
---|
1718 | } |
---|
1719 | |
---|
1720 | Boolean |
---|
1721 | cxiIsSambaThread() |
---|
1722 | { |
---|
1723 | # if defined(GPFS_LITTLE_ENDIAN) && !defined(__64BIT__) |
---|
1724 | /* Note comparison against a multibyte character constant (not a string |
---|
1725 | constant). Order of characters in word is reversed due to little- |
---|
1726 | endian representation of integers. */ |
---|
1727 | Boolean rc = ((* ((int*)¤t->comm[0]) == 0x64626d73) & // 'dbms' |
---|
1728 | (* ((char*)¤t->comm[4]) == '\0')); |
---|
1729 | return rc; |
---|
1730 | # else |
---|
1731 | return (strcmp(current->comm, "smbd") == 0); |
---|
1732 | # endif |
---|
1733 | } |
---|
1734 | #endif |
---|
1735 | |
---|
1736 | Boolean |
---|
1737 | cxiIsGPFSThread() |
---|
1738 | { |
---|
1739 | # if defined(GPFS_LITTLE_ENDIAN) && !defined(__64BIT__) |
---|
1740 | return (((* ((int*)¤t->comm[0]) == 0x73666d6d) & // 'sfmm' |
---|
1741 | (* ((int*)¤t->comm[2]) == 0x00647366))); // 'dsf' |
---|
1742 | # else |
---|
1743 | return (strcmp(current->comm, "mmfsd") == 0); |
---|
1744 | # endif |
---|
1745 | } |
---|
1746 | |
---|
1747 | Boolean |
---|
1748 | cxiIsKswapdThread() |
---|
1749 | { |
---|
1750 | #if LINUX_KERNEL_VERSION > 2060000 |
---|
1751 | /* On 2.6, there may be multiple kswapd processes, named kswapd0, kswapd1, |
---|
1752 | * etc. We don't have to depend on the process name to identify kswapd |
---|
1753 | * processes on 2.6 though, there's a better way. */ |
---|
1754 | return current_is_kswapd(); |
---|
1755 | #else |
---|
1756 | # if defined(GPFS_LITTLE_ENDIAN) && !defined(__64BIT__) |
---|
1757 | return ((* ((int*)¤t->comm[0]) == 0x6177736b) & // 'awsk' |
---|
1758 | (* ((int*)¤t->comm[3]) == 0x00647061)); // ' dpa' |
---|
1759 | # else |
---|
1760 | return (strcmp(current->comm, "kswapd") == 0); |
---|
1761 | # endif |
---|
1762 | #endif |
---|
1763 | } |
---|
1764 | |
---|
1765 | #ifdef INSTRUMENT_LOCKS |
---|
1766 | void InitBlockingMutexStats() |
---|
1767 | { |
---|
1768 | memset(BlockingMutexStatsTable, 0, sizeof(BlockingMutexStatsTable)); |
---|
1769 | } |
---|
1770 | #endif |
---|
1771 | |
---|
1772 | /* Initialize a cxiBlockingMutex_t. Instead of the DBGASSERT, this routine |
---|
1773 | should kmalloc a struct semaphore if bmSem is too small. */ |
---|
1774 | void cxiBlockingMutexInit(cxiBlockingMutex_t* mP, int bmNameIdx) |
---|
1775 | { |
---|
1776 | ENTER(0); |
---|
1777 | DBGASSERT(sizeof(struct semaphore) <= GPFS_LINUX_SEM_SIZE); |
---|
1778 | #ifdef INSTRUMENT_LOCKS |
---|
1779 | DBGASSERT(bmNameIdx < MAX_GPFS_LOCK_NAMES); |
---|
1780 | #endif /* INSTRUMENT_LOCKS */ |
---|
1781 | |
---|
1782 | TRACE2(TRACE_KLOCKL, 3, TRCID_BM_INIT, |
---|
1783 | "cxiBlockingMutexInit: mP 0x%lX idx %d\n", |
---|
1784 | mP, bmNameIdx); |
---|
1785 | init_MUTEX((struct semaphore *)mP->bmSem); |
---|
1786 | mP->bmOwnerP = NULL; |
---|
1787 | mP->lockNameIndex = bmNameIdx; |
---|
1788 | EXIT(0); |
---|
1789 | } |
---|
1790 | |
---|
1791 | |
---|
1792 | /* Enter critical section, blocking this thread if necessary. Mark this |
---|
1793 | thread as the owner of the mutex before returning. */ |
---|
1794 | void |
---|
1795 | REGPARMS cxiBlockingMutexAcquire(cxiBlockingMutex_t* mP) |
---|
1796 | { |
---|
1797 | ENTER(1); |
---|
1798 | TRACE4(TRACE_KLOCKL, 9, TRCID_BM_ACQ, |
---|
1799 | "cxiBlockingMutexAcquire: about to acquire 0x%lX type %d " |
---|
1800 | "current 0x%lX currentOwner 0x%lX\n", |
---|
1801 | mP, mP->lockNameIndex, current, mP->bmOwnerP); |
---|
1802 | |
---|
1803 | DBGASSERTRC(mP->bmOwnerP != (char *)current, |
---|
1804 | PTR_TO_INT32(mP->bmOwnerP), PTR_TO_INT32(mP), 0); |
---|
1805 | |
---|
1806 | #ifdef INSTRUMENT_LOCKS |
---|
1807 | BlockingMutexStatsTable[mP->lockNameIndex].bmsAcquires += 1; |
---|
1808 | if (mP->bmOwnerP != NULL) |
---|
1809 | BlockingMutexStatsTable[mP->lockNameIndex].bmsConflicts += 1; |
---|
1810 | #endif |
---|
1811 | |
---|
1812 | down((struct semaphore *)mP->bmSem); |
---|
1813 | mP->bmOwnerP = (char *)current; |
---|
1814 | |
---|
1815 | TRACE1(TRACE_KLOCKL, 9, TRCID_BM_ACQ_EXIT, |
---|
1816 | "cxiBlockingMutexAcquire: returning after acquiring 0x%lX\n", mP); |
---|
1817 | EXIT(1); |
---|
1818 | } |
---|
1819 | |
---|
1820 | |
---|
1821 | /* Leave critical section and awaken waiting threads */ |
---|
1822 | void |
---|
1823 | REGPARMS cxiBlockingMutexRelease(cxiBlockingMutex_t* mP) |
---|
1824 | { |
---|
1825 | ENTER(1); |
---|
1826 | TRACE4(TRACE_KLOCKL, 9, TRCID_BM_REL, |
---|
1827 | "cxiBlockingMutexRelease: about to release 0x%lX type %d " |
---|
1828 | "current 0x%lX currentOwner 0x%lX\n", |
---|
1829 | mP, mP->lockNameIndex,current, mP->bmOwnerP); |
---|
1830 | |
---|
1831 | if (mP->bmOwnerP == (char *)current) |
---|
1832 | { |
---|
1833 | mP->bmOwnerP = NULL; |
---|
1834 | up((struct semaphore *)mP->bmSem); |
---|
1835 | } |
---|
1836 | EXIT(1); |
---|
1837 | } |
---|
1838 | |
---|
1839 | /* Free resources associated with this cxiBlockingMutex_t in preparation |
---|
1840 | for freeing the storage it occupies */ |
---|
1841 | void cxiBlockingMutexTerm(cxiBlockingMutex_t* mP) |
---|
1842 | { |
---|
1843 | ENTER(0); |
---|
1844 | TRACE2(TRACE_KLOCKL, 3, TRCID_BM_TERM, |
---|
1845 | "cxiBlockingMutexTerm: mP 0x%lX type %d\n", mP, mP->lockNameIndex); |
---|
1846 | |
---|
1847 | /* Verify that mutex is not held */ |
---|
1848 | DBGASSERT(mP->bmOwnerP == NULL); |
---|
1849 | DBGASSERT(atomic_read(&((struct semaphore *)mP->bmSem)->count) == 1); |
---|
1850 | EXIT(0); |
---|
1851 | } |
---|
1852 | |
---|
1853 | |
---|
1854 | /* Return true if a cxiBlockingMutex_t is held by the calling process */ |
---|
1855 | Boolean |
---|
1856 | cxiBlockingMutexHeldByCaller(cxiBlockingMutex_t* mP) |
---|
1857 | { |
---|
1858 | Boolean result; |
---|
1859 | char* ownerP; |
---|
1860 | cxiPid_t ownerPid; |
---|
1861 | |
---|
1862 | /* Cache bmOwnerP is case it changes to NULL */ |
---|
1863 | ENTER(0); |
---|
1864 | ownerP = mP->bmOwnerP; |
---|
1865 | if (ownerP == NULL) |
---|
1866 | result = false; |
---|
1867 | else |
---|
1868 | { |
---|
1869 | cxiThreadPtrToThreadId(ownerP, &ownerPid); |
---|
1870 | result = (current->pid == ownerPid); |
---|
1871 | } |
---|
1872 | TRACE2(TRACE_KLOCKL, 9, TRCID_CXISYSTEM_017, |
---|
1873 | "cxiBlockingMutexHeldByCaller: owner 0x%lX returns %d\n", |
---|
1874 | ownerP, result); |
---|
1875 | EXIT_RC(0, result); |
---|
1876 | return result; |
---|
1877 | } |
---|
1878 | |
---|
1879 | |
---|
1880 | /* Return true if a cxiBlockingMutex_t has one or more processes waiting |
---|
1881 | on it */ |
---|
1882 | Boolean cxiBlockingMutexHasWaiters(cxiBlockingMutex_t* mP) |
---|
1883 | { |
---|
1884 | struct semaphore * semP = (struct semaphore *)mP->bmSem; |
---|
1885 | Boolean result; |
---|
1886 | |
---|
1887 | ENTER(0); |
---|
1888 | if ((void*)semP->wait.task_list.next != (void*)&semP->wait.task_list.next) |
---|
1889 | result = true; |
---|
1890 | else |
---|
1891 | result = false; |
---|
1892 | TRACE2(TRACE_KLOCKL, 9, TRCID_CXISYSTEM_018, |
---|
1893 | "cxiBlockingMutexHasWaiters: mP 0x%lX hasWaiters %d\n", |
---|
1894 | mP, result); |
---|
1895 | EXIT_RC(0, result); |
---|
1896 | return result; |
---|
1897 | } |
---|
1898 | |
---|
1899 | |
---|
1900 | /* Wait for a cxiWaitEventSignal, cxiWaitEventBroadcast, or |
---|
1901 | cxiWaitEventBroadcastRC. Drop the associated cxiBlockingMutex_t |
---|
1902 | *mutexP while waiting, and reacquire it before returning. |
---|
1903 | If INTERRUPTIBLE is set in waitFlags, waits interruptibly; |
---|
1904 | otherwise, waits uninterruptibly. |
---|
1905 | Returns THREAD_INTERRUPTED if interrupted before being woken up, |
---|
1906 | THREAD_AWAKENED, if woken up by cxiWaitEventSignal or |
---|
1907 | cxiWaitEventBroadcast, or the result value passed to |
---|
1908 | cxiWaitEventWakeupResult, if woken up by cxiWaitEventWakeupResult. */ |
---|
1909 | int cxiWaitEventWait(cxiWaitEvent_t* weP, cxiBlockingMutex_t* mutexP, |
---|
1910 | int waitFlags) |
---|
1911 | { |
---|
1912 | spinlock_t *lockP = (spinlock_t *)(weP->lword); |
---|
1913 | unsigned long flags; |
---|
1914 | cxiWaitElement_t waitElement; |
---|
1915 | int count = 0; |
---|
1916 | Boolean done; |
---|
1917 | |
---|
1918 | ENTER(0); |
---|
1919 | TRACE3(TRACE_KLOCKL, 3, TRCID_CXISYSTEM_EVENT_WAIT_ENTER, |
---|
1920 | "cxiWaitEventWait enter: weP 0x%lX waitFlags 0x%X about to release " |
---|
1921 | "mutex 0x%lX \n", weP, waitFlags, mutexP); |
---|
1922 | |
---|
1923 | /* Verify that caller is holding the mutex */ |
---|
1924 | DBGASSERTRC(mutexP->bmOwnerP == (char *)current, |
---|
1925 | PTR_TO_INT32(mutexP->bmOwnerP), PTR_TO_INT32(mutexP), 0); |
---|
1926 | |
---|
1927 | /* initialize our wait element */ |
---|
1928 | init_waitqueue_head(&waitElement.qhead); |
---|
1929 | init_waitqueue_entry(&waitElement.qwaiter, current); |
---|
1930 | __add_wait_queue(&waitElement.qhead, &waitElement.qwaiter); |
---|
1931 | waitElement.wakeupRC = 0; |
---|
1932 | |
---|
1933 | /* update our task state to not running any more */ |
---|
1934 | if (waitFlags & INTERRUPTIBLE) |
---|
1935 | current->state = TASK_INTERRUPTIBLE; |
---|
1936 | else |
---|
1937 | current->state = TASK_UNINTERRUPTIBLE; |
---|
1938 | |
---|
1939 | /* add our wait element to the end of the wait list */ |
---|
1940 | SPIN_LOCK_IRQ(lockP, flags); |
---|
1941 | |
---|
1942 | CXI_WAIT_LIST_ADD(&weP->waitList, &waitElement.waitList); |
---|
1943 | |
---|
1944 | SPIN_UNLOCK_IRQ(lockP, flags); |
---|
1945 | |
---|
1946 | /* Release the mutex. Note: calling cxiBlockingMutexRelease here is |
---|
1947 | problematic, because it makes trace calls, which may block the current |
---|
1948 | process, which would overwrite the task state (current->state) we just |
---|
1949 | updated. A way around this would be to move out task state update to |
---|
1950 | after the call to cxiBlockingMutexRelease, but then, before calling |
---|
1951 | schedule(), we would have to re-acquire the wait-list lock and check |
---|
1952 | wakeupRC to see whether somebody has already woken us up since we |
---|
1953 | released the mutex. Since there is a trace at the top of this routine, |
---|
1954 | we don't need the one in cxiBlockingMutexRelease; hence, just do the |
---|
1955 | release right here. */ |
---|
1956 | mutexP->bmOwnerP = NULL; |
---|
1957 | up((struct semaphore *)mutexP->bmSem); |
---|
1958 | |
---|
1959 | again: |
---|
1960 | /* call the scheduler */ |
---|
1961 | schedule(); |
---|
1962 | |
---|
1963 | /* Remove ourself from the wait list ... except: |
---|
1964 | Even though we may enter uninterrubtible sleep, this sleep can in |
---|
1965 | fact be interrupted in at least two scenarios: |
---|
1966 | 1) page_alloc code may call wakeup_kswapd(). This should be |
---|
1967 | a very rare event with the current code, since we make an effort |
---|
1968 | to avoid blocking kswapd. |
---|
1969 | 2) While signals are supposed to be ignored during uninterruptible |
---|
1970 | sleep, it turns out that some signals, e.g. SIGSEGV and SIGBUS, |
---|
1971 | cause us to wake up. It doesn't look like the signal has been |
---|
1972 | delivered yet, but sleep is interrupted. The signal will be |
---|
1973 | delivered later (probably when exiting kernel). |
---|
1974 | Our callers can't handle unexpected return from uninterruptible |
---|
1975 | sleep. In either of the two cases above, it should be safe to go |
---|
1976 | back to sleep and wait to be woken up properly. |
---|
1977 | */ |
---|
1978 | SPIN_LOCK_IRQ(lockP, flags); |
---|
1979 | |
---|
1980 | if (waitElement.wakeupRC == 0 && |
---|
1981 | !(waitFlags & INTERRUPTIBLE)) |
---|
1982 | { |
---|
1983 | TRACE3N(TRACE_KLOCKL, 1, TRCID_CXISYSTEM_EVENT_WAIT_INTERRUPTED, |
---|
1984 | "cxiWaitEventWait: interrupted weP 0x%lX mutexP 0x%lX rc %d\n", |
---|
1985 | weP, mutexP, waitElement.wakeupRC); |
---|
1986 | current->state = TASK_UNINTERRUPTIBLE; |
---|
1987 | done = false; |
---|
1988 | } |
---|
1989 | else |
---|
1990 | { |
---|
1991 | CXI_WAIT_LIST_REMOVE(&waitElement.waitList); |
---|
1992 | done = true; |
---|
1993 | } |
---|
1994 | |
---|
1995 | SPIN_UNLOCK_IRQ(lockP, flags); |
---|
1996 | |
---|
1997 | if (!done) |
---|
1998 | goto again; |
---|
1999 | |
---|
2000 | /* re-acquire the mutex */ |
---|
2001 | cxiBlockingMutexAcquire(mutexP); |
---|
2002 | |
---|
2003 | TRACE3(TRACE_KLOCKL, 9, TRCID_CXISYSTEM_EVENT_WAIT_EXIT, |
---|
2004 | "cxiWaitEventWait exit: weP 0x%lX mutexP 0x%lX rc %d\n", |
---|
2005 | weP, mutexP, waitElement.wakeupRC); |
---|
2006 | |
---|
2007 | /* A zero wakeup code means we were interrupted rather than woken up */ |
---|
2008 | EXIT(0); |
---|
2009 | if (waitElement.wakeupRC != 0) |
---|
2010 | return waitElement.wakeupRC; |
---|
2011 | else |
---|
2012 | return THREAD_INTERRUPTED; |
---|
2013 | } |
---|
2014 | |
---|
2015 | /* Wake up one thread waiting on this cxiWaitEvent_t. Must not sleep */ |
---|
2016 | void |
---|
2017 | cxiWaitEventSignal(cxiWaitEvent_t* weP) |
---|
2018 | { |
---|
2019 | /* ENTER(0); */ |
---|
2020 | TRACE1N(TRACE_KLOCKL, 3, TRCID_CXISYSTEM_SIGNAL, |
---|
2021 | "cxiWaitEventSignal: weP 0x%lX\n", weP); |
---|
2022 | |
---|
2023 | doWakeup(weP, wSignal, THREAD_AWAKENED); /* wake up one */ |
---|
2024 | /* EXIT(0); */ |
---|
2025 | } |
---|
2026 | |
---|
2027 | |
---|
2028 | /* Wake up one thread waiting on this cxiWaitEvent_t. This is the same as |
---|
2029 | cxiWaitEventSignal(), except this routine guarantees that multiple wake |
---|
2030 | up calls will each pick a different thread if more than one is waiting. */ |
---|
2031 | void |
---|
2032 | cxiWaitEventWakeupOne(cxiWaitEvent_t* weP) |
---|
2033 | { |
---|
2034 | ENTER(0); |
---|
2035 | TRACE1(TRACE_KLOCKL, 3, TRCID_CXISYSTEM_WAKEUP_ONE, |
---|
2036 | "cxiWaitEventWakeupOne: weP 0x%lX\n", weP); |
---|
2037 | |
---|
2038 | doWakeup(weP, wWakeOne, THREAD_AWAKENED); /* wake up one */ |
---|
2039 | EXIT(0); |
---|
2040 | } |
---|
2041 | |
---|
2042 | |
---|
2043 | /* Wake up all threads waiting on this cxiWaitEvent_t */ |
---|
2044 | void |
---|
2045 | cxiWaitEventBroadcast(cxiWaitEvent_t* weP) |
---|
2046 | { |
---|
2047 | ENTER(0); |
---|
2048 | TRACE1(TRACE_KLOCKL, 3, TRCID_CXISYSTEM_BROADCAST, |
---|
2049 | "cxiWaitEventBroadcastRC: weP 0x%lX\n", weP); |
---|
2050 | |
---|
2051 | doWakeup(weP, wBroadcast, THREAD_AWAKENED); /* wake up all */ |
---|
2052 | EXIT(0); |
---|
2053 | } |
---|
2054 | |
---|
2055 | |
---|
2056 | /* Wake up all threads waiting on this cxiWaitEvent_t and cause them to |
---|
2057 | return rc from their cxiWaitEventWait calls. */ |
---|
2058 | void |
---|
2059 | cxiWaitEventBroadcastRC(cxiWaitEvent_t* weP, int rc) |
---|
2060 | { |
---|
2061 | ENTER(0); |
---|
2062 | TRACE2(TRACE_KLOCKL, 3, TRCID_CXISYSTEM_BROADCAST_RC, |
---|
2063 | "cxiWaitEventBroadcastRC: weP 0x%lX rc %d\n", weP, rc); |
---|
2064 | |
---|
2065 | doWakeup(weP, wBroadcast, rc); /* wake up all */ |
---|
2066 | EXIT_RC(0, rc); |
---|
2067 | } |
---|
2068 | |
---|
2069 | /* alloc big memory area */ |
---|
2070 | void * |
---|
2071 | cxiBigMalloc(int size) |
---|
2072 | { |
---|
2073 | void *ptr; |
---|
2074 | |
---|
2075 | ENTER(0); |
---|
2076 | ptr = vmalloc(size); |
---|
2077 | |
---|
2078 | #ifdef MALLOC_DEBUG |
---|
2079 | MallocDebugNew(ptr, size, 2); |
---|
2080 | #endif |
---|
2081 | |
---|
2082 | EXIT(0); |
---|
2083 | return ptr; |
---|
2084 | } |
---|
2085 | |
---|
2086 | /* free big memory area */ |
---|
2087 | void |
---|
2088 | cxiBigFree(char *ptr) |
---|
2089 | { |
---|
2090 | ENTER(0); |
---|
2091 | #ifdef MALLOC_DEBUG |
---|
2092 | MallocDebugDelete(ptr); |
---|
2093 | #endif |
---|
2094 | |
---|
2095 | EXIT(0); |
---|
2096 | vfree(ptr); |
---|
2097 | } |
---|
2098 | |
---|
2099 | #ifdef SMB_LOCKS |
---|
2100 | /* Determine if current process has this file open */ |
---|
2101 | void * |
---|
2102 | cxiCheckOpen(struct cxiNode_t* cnP) |
---|
2103 | { |
---|
2104 | int count; |
---|
2105 | int i; |
---|
2106 | struct file** fdList; |
---|
2107 | struct file* fileP; |
---|
2108 | struct inode* inodeP; |
---|
2109 | |
---|
2110 | ENTER(0); |
---|
2111 | #if LINUX_KERNEL_VERSION >= 2061300 |
---|
2112 | count = current->files->fdt->max_fds; |
---|
2113 | fdList = current->files->fdt->fd; |
---|
2114 | #else |
---|
2115 | count = current->files->max_fds; |
---|
2116 | fdList = current->files->fd; |
---|
2117 | #endif |
---|
2118 | inodeP = GNP_TO_VP(cnP); |
---|
2119 | |
---|
2120 | TRACE3(TRACE_VNODE,9,TRCID_CXICHECKOPEN_ENTRY, |
---|
2121 | "cxiCheckOpen: entry. %d files in fd list. Checking for inode %d " |
---|
2122 | "at 0x%x", count, inodeP->i_ino, inodeP); |
---|
2123 | |
---|
2124 | for (i=0; i<count; i++) |
---|
2125 | { |
---|
2126 | fileP = fdList[i]; |
---|
2127 | |
---|
2128 | if (fileP) |
---|
2129 | { |
---|
2130 | if (fdList[i]->f_dentry->d_inode == inodeP) |
---|
2131 | { |
---|
2132 | TRACE1(TRACE_VNODE, 9,TRCID_CXICHECKOPEN_FOUND, |
---|
2133 | "cxiCheckOpen: found open file. vinfoP 0x%x", |
---|
2134 | fileP->private_data); |
---|
2135 | EXIT(0); |
---|
2136 | return fileP->private_data; |
---|
2137 | } |
---|
2138 | } |
---|
2139 | } |
---|
2140 | |
---|
2141 | EXIT(0); |
---|
2142 | return NULL; |
---|
2143 | } |
---|
2144 | |
---|
2145 | int cxiBreakOplock(void *breakArgP, int oplockNew) |
---|
2146 | { |
---|
2147 | /* On Linux, we use its kernel oplock support. The get_lease() |
---|
2148 | * call is the operation to revoke conflicting leases. |
---|
2149 | */ |
---|
2150 | int rc; |
---|
2151 | ENTER(0); |
---|
2152 | |
---|
2153 | /* O_NONBLOCK: prevents the thread from waiting for the lease return. |
---|
2154 | * In the case of a Samba thread, we only want to get EWOULDBLOCK |
---|
2155 | * back if the conflict is held within Samba iteself. If a wait is |
---|
2156 | * needed, breakSMBOplock will invoke cxiWaitForBreak. |
---|
2157 | */ |
---|
2158 | |
---|
2159 | /* Linux op to revoke conflicting leases */ |
---|
2160 | rc = abs(REVOKE_LEASE((struct inode *)breakArgP, |
---|
2161 | (cxiIsSambaThread()? 0: O_NONBLOCK) | |
---|
2162 | ((oplockNew==smbOplockShared)? FMODE_READ: FMODE_WRITE))); |
---|
2163 | |
---|
2164 | TRACE3(TRACE_VNODE, 4,TRCID_CXIBREAKOPLOCK, |
---|
2165 | "cxiBreakOplock: exit rc %d inode 0x%lX oplock %d\n", |
---|
2166 | rc, breakArgP, oplockNew); |
---|
2167 | |
---|
2168 | EXIT(0); |
---|
2169 | return rc; |
---|
2170 | } |
---|
2171 | |
---|
2172 | DECLARE_WAIT_QUEUE_HEAD(oplock_break_queue); |
---|
2173 | |
---|
2174 | /* No initialization required on Linux */ |
---|
2175 | int cxiInitBreakQ() { return 0; } |
---|
2176 | |
---|
2177 | /* No initialization required on Linux */ |
---|
2178 | int cxiTermBreakQ() { return 0; } |
---|
2179 | |
---|
2180 | /* Send the notification that the oplock break completed */ |
---|
2181 | int cxiSendBreakMsg(void *ofP) |
---|
2182 | { |
---|
2183 | ENTER(0); |
---|
2184 | /* There is only one oplock_break_queue, and no means to pass the ofP back to |
---|
2185 | * the waiters. This will wake all of them up and they will recheck their |
---|
2186 | * oplock states and wait again if necessary (with a timeout). |
---|
2187 | */ |
---|
2188 | wake_up_interruptible(&oplock_break_queue); |
---|
2189 | |
---|
2190 | TRACE1(TRACE_SMB, 3, TRCID_SEND_BREAK, "cxiSendBreakMsg: ofP 0x%lX\n", ofP); |
---|
2191 | EXIT(0); |
---|
2192 | return 0; |
---|
2193 | } |
---|
2194 | |
---|
2195 | /* Suspend the caller until either the oplock break completes, or the timeout |
---|
2196 | * is reached. |
---|
2197 | */ |
---|
2198 | int cxiWaitForBreak(void *fileArgP, int oplockCurrent, int timeoutSeconds) |
---|
2199 | { |
---|
2200 | DECLARE_WAITQUEUE(wait, current); |
---|
2201 | signed long timeout; |
---|
2202 | |
---|
2203 | ENTER(0); |
---|
2204 | TRACE3(TRACE_SMB, 5, TRCID_BREAKWAIT, |
---|
2205 | "cxiWaitForBreak: file 0x%lX, oplockCurrent %d timeoutSeconds %d\n", |
---|
2206 | fileArgP, oplockCurrent, timeoutSeconds); |
---|
2207 | |
---|
2208 | add_wait_queue(&oplock_break_queue, &wait); |
---|
2209 | timeout = timeoutSeconds * HZ; |
---|
2210 | while (timeout > 0) { |
---|
2211 | set_current_state(TASK_INTERRUPTIBLE); |
---|
2212 | /* Check whether the oplock has been released or downgraded */ |
---|
2213 | if (gpfs_ops.SMBGetOplockState(fileArgP) < oplockCurrent) |
---|
2214 | break; |
---|
2215 | timeout = schedule_timeout(timeout); |
---|
2216 | } |
---|
2217 | set_current_state(TASK_RUNNING); |
---|
2218 | remove_wait_queue(&oplock_break_queue, &wait); |
---|
2219 | |
---|
2220 | TRACE0(TRACE_SMB, 5, TRCID_BREAKWAIT_EXIT, |
---|
2221 | "cxiWaitForBreak exit\n"); |
---|
2222 | |
---|
2223 | EXIT(0); |
---|
2224 | return 0; |
---|
2225 | } |
---|
2226 | #endif |
---|
2227 | |
---|
2228 | |
---|
2229 | /* Get the address of the first byte not addressible by processes */ |
---|
2230 | UIntPtr cxiGetKernelBoundary() |
---|
2231 | { |
---|
2232 | return GPFS_KERNEL_OFFSET; |
---|
2233 | } |
---|
2234 | |
---|
2235 | |
---|
2236 | /* Return true if this process holds the big kernel lock (BKL) */ |
---|
2237 | Boolean cxiHoldsBKL() |
---|
2238 | { |
---|
2239 | return current->lock_depth >= 0; |
---|
2240 | } |
---|
2241 | |
---|
2242 | |
---|
2243 | /* Tell the OS that this thread is involved in handling VM page-out |
---|
2244 | requests and should not be blocked waiting for page allocation. |
---|
2245 | Return true if successful. */ |
---|
2246 | Boolean cxiSetPageoutThread() |
---|
2247 | { |
---|
2248 | if (current->flags & PF_MEMALLOC) |
---|
2249 | return false; |
---|
2250 | current->flags |= PF_MEMALLOC; |
---|
2251 | return true; |
---|
2252 | } |
---|
2253 | |
---|
2254 | |
---|
2255 | /* Tell the OS that this thread is no longer involved in handling VM |
---|
2256 | page-out requests. */ |
---|
2257 | void cxiClearPageoutThread() |
---|
2258 | { |
---|
2259 | current->flags &= ~PF_MEMALLOC; |
---|
2260 | } |
---|
2261 | |
---|
2262 | |
---|
2263 | /* Yield the CPU to allow other processes to run */ |
---|
2264 | void |
---|
2265 | cxiYield() |
---|
2266 | { |
---|
2267 | ENTER(0); |
---|
2268 | schedule(); |
---|
2269 | EXIT(0); |
---|
2270 | } |
---|
2271 | |
---|
2272 | /* Linux filldir has changed signatures depending on kernel level. |
---|
2273 | * We always pass a 64bit offset from the GPFS layer. |
---|
2274 | */ |
---|
2275 | int |
---|
2276 | cxiFillDir(void *vargP, const char *nameP, int namelen, |
---|
2277 | offset_t offset, ino_t ino) |
---|
2278 | { |
---|
2279 | int result; |
---|
2280 | cxiFillDirArg_t *fillDirArgP = (cxiFillDirArg_t *)vargP; |
---|
2281 | filldir_t fnP = (filldir_t)fillDirArgP->fnP; |
---|
2282 | ENTER(0); |
---|
2283 | |
---|
2284 | result = (*fnP)(fillDirArgP->argP, nameP, namelen, |
---|
2285 | (loff_t)offset, ino, 0 /* DT_UNKNOWN */); |
---|
2286 | EXIT_RC(0, result); |
---|
2287 | return result; |
---|
2288 | } |
---|
2289 | |
---|
2290 | #ifdef DISK_LEASE_DMS |
---|
2291 | |
---|
2292 | static struct timer_list DMSTimer[MAX_DMS_INDEX]; |
---|
2293 | static int (*DMSgetNIOsInProgressP)(int); |
---|
2294 | |
---|
2295 | #define PANIC_FOR_REAL 1 |
---|
2296 | |
---|
2297 | static void cxiDMSExpired(unsigned long data) |
---|
2298 | { |
---|
2299 | int idx = data; |
---|
2300 | int nIOs = DMSgetNIOsInProgressP(idx); |
---|
2301 | /* ENTER(0); */ |
---|
2302 | /* This code is executed on the interrupt level -- can't use tracing */ |
---|
2303 | printk("GPFS Deadman Switch timer [%d] has expired; IOs in progress: %d\n", |
---|
2304 | idx, nIOs); |
---|
2305 | #ifdef PANIC_FOR_REAL |
---|
2306 | if (nIOs != 0) |
---|
2307 | panic("GPFS Deadman Switch timer has expired, and there are still" |
---|
2308 | " %d outstanding I/O requests\n", nIOs); |
---|
2309 | #endif |
---|
2310 | } |
---|
2311 | |
---|
2312 | /* |
---|
2313 | Start dead man switch, with the timeout specified by the delay |
---|
2314 | argument (in seconds). |
---|
2315 | */ |
---|
2316 | void cxiStartDMS(int idx, int delay, int (*funcP)(int)) |
---|
2317 | { |
---|
2318 | unsigned long njiffies = delay * HZ; |
---|
2319 | |
---|
2320 | /* Only allow the daemon or other root users to make this kernel call */ |
---|
2321 | if (!cxiIsSuperUser()) |
---|
2322 | return; |
---|
2323 | ENTER(0); |
---|
2324 | |
---|
2325 | /* There can be only one timer active at any given moment */ |
---|
2326 | if (timer_pending(&DMSTimer[idx])) |
---|
2327 | del_timer(&DMSTimer[idx]); |
---|
2328 | |
---|
2329 | init_timer(&DMSTimer[idx]); |
---|
2330 | DMSTimer[idx].expires = jiffies + njiffies; |
---|
2331 | DMSTimer[idx].function = cxiDMSExpired; |
---|
2332 | DMSTimer[idx].data = idx; |
---|
2333 | /* save the pointer to nIOsInProgress to a static var */ |
---|
2334 | DMSgetNIOsInProgressP = funcP; |
---|
2335 | add_timer(&DMSTimer[idx]); |
---|
2336 | TRACE3(TRACE_DLEASE, 2, TRCID_DMS_STARTED, |
---|
2337 | "DMS timer [%d] started, delay %d, time %d\n", |
---|
2338 | idx, delay, jiffies/HZ); |
---|
2339 | EXIT(0); |
---|
2340 | } |
---|
2341 | |
---|
2342 | void cxiStopDMS(int idx) |
---|
2343 | { |
---|
2344 | /* Only allow the daemon or other root users to make this kernel call */ |
---|
2345 | if (!cxiIsSuperUser()) |
---|
2346 | return; |
---|
2347 | ENTER(0); |
---|
2348 | |
---|
2349 | if (timer_pending(&DMSTimer[idx])) |
---|
2350 | del_timer(&DMSTimer[idx]); |
---|
2351 | TRACE2(TRACE_DLEASE, 2, TRCID_DMS_STOPPED, |
---|
2352 | "DMS timer [%d] stopped, time %d\n", idx, jiffies/HZ); |
---|
2353 | EXIT(0); |
---|
2354 | } |
---|
2355 | |
---|
2356 | /* dummy init routine. Since on Linux the timer is |
---|
2357 | stored in a static memory, there's nothing to be done |
---|
2358 | */ |
---|
2359 | int cxiInitDMS(void) |
---|
2360 | { |
---|
2361 | return 0; |
---|
2362 | } |
---|
2363 | |
---|
2364 | void cxiShutdownDMS(void) |
---|
2365 | { |
---|
2366 | int i; |
---|
2367 | |
---|
2368 | ENTER(0); |
---|
2369 | for (i = 0; i < MAX_DMS_INDEX; i++) |
---|
2370 | cxiStopDMS(i); |
---|
2371 | EXIT(0); |
---|
2372 | } |
---|
2373 | |
---|
2374 | #endif /* DISK_LEASE_DMS */ |
---|
2375 | |
---|
2376 | void cxiSetBit(unsigned long *flagP, int flag_bit) |
---|
2377 | { |
---|
2378 | set_bit(flag_bit,flagP); |
---|
2379 | } |
---|
2380 | void cxiClearBit(unsigned long *flagP, int flag_bit) |
---|
2381 | { |
---|
2382 | clear_bit(flag_bit,flagP); |
---|
2383 | } |
---|
2384 | Boolean cxiTestBit(unsigned long *flagP, int flag_bit) |
---|
2385 | { |
---|
2386 | return test_bit(flag_bit,flagP); |
---|
2387 | } |
---|
2388 | |
---|
2389 | /* In order to setup our termination callback routine (gpfs_f_cleanup) |
---|
2390 | * we create a dummy file and add it to our file table. Then, upon |
---|
2391 | * process termination, the release file operation will be called in |
---|
2392 | * order to close the file. The only operation we define for this |
---|
2393 | * dummy file is release (gpfs_f_cleanup). |
---|
2394 | */ |
---|
2395 | int |
---|
2396 | cxiRegisterCleanup() |
---|
2397 | { |
---|
2398 | int code = 0, rc = 0; |
---|
2399 | struct inode *iP = NULL; |
---|
2400 | struct file *fileP = NULL; |
---|
2401 | struct dentry *dentryP = NULL; |
---|
2402 | extern int cleanupFD; |
---|
2403 | extern struct super_block *shutdownSuperP; |
---|
2404 | |
---|
2405 | /* We record the daemon's process group because certain |
---|
2406 | * checks on cxiCopyIn/cxiCopyOut are bypassed for the daemon. |
---|
2407 | */ |
---|
2408 | ENTER(0); |
---|
2409 | DaemonPGrp = PROCESS_GROUP(current); |
---|
2410 | |
---|
2411 | /* Make sure we only create one file */ |
---|
2412 | if (cleanupFD) |
---|
2413 | { |
---|
2414 | EXIT_RC(0, EEXIST); |
---|
2415 | return EEXIST; |
---|
2416 | } |
---|
2417 | |
---|
2418 | DBGASSERT(shutdownSuperP != NULL); |
---|
2419 | |
---|
2420 | /* Allocate an inode struct */ |
---|
2421 | iP = NEW_INODE(shutdownSuperP); |
---|
2422 | if (!iP) |
---|
2423 | { |
---|
2424 | code = 1; |
---|
2425 | rc = ENOMEM; |
---|
2426 | goto xerror; |
---|
2427 | } |
---|
2428 | iP->i_mode = S_IFREG; |
---|
2429 | |
---|
2430 | /* Allocate an available file descriptor */ |
---|
2431 | cleanupFD = get_unused_fd(); |
---|
2432 | if (cleanupFD < 0) |
---|
2433 | { |
---|
2434 | code = 2; |
---|
2435 | rc = ENFILE; |
---|
2436 | goto xerror; |
---|
2437 | } |
---|
2438 | |
---|
2439 | /* Allocate a file struct */ |
---|
2440 | fileP = get_empty_filp(); |
---|
2441 | if (!fileP) |
---|
2442 | { |
---|
2443 | code = 3; |
---|
2444 | rc = ENFILE; |
---|
2445 | goto xerror; |
---|
2446 | } |
---|
2447 | |
---|
2448 | /* Allocate a dentry sruct */ |
---|
2449 | dentryP = dget(d_alloc_root(iP)); |
---|
2450 | if (!dentryP) |
---|
2451 | { |
---|
2452 | code = 4; |
---|
2453 | rc = ENOMEM; |
---|
2454 | goto xerror; |
---|
2455 | } |
---|
2456 | |
---|
2457 | /* Initialize and chain our file sructure */ |
---|
2458 | fileP->f_dentry = dentryP; |
---|
2459 | fileP->f_op = &gpfs_cleanup_fops; |
---|
2460 | fileP->f_flags = O_RDONLY; |
---|
2461 | atomic_set(&fileP->f_count, 1); |
---|
2462 | |
---|
2463 | /* Just chain it on the current root mount. When |
---|
2464 | * the file is closed its fput() will decrement |
---|
2465 | * the mount count (hence the mntget here) |
---|
2466 | */ |
---|
2467 | fileP->f_vfsmnt = mntget(current->fs->rootmnt); |
---|
2468 | |
---|
2469 | /* Install the descriptor so it gets "closed" upon our termination */ |
---|
2470 | fd_install(cleanupFD, fileP); |
---|
2471 | |
---|
2472 | /* Set FD_CLOEXEC so that forked processes (like mmfsup.scr) do not |
---|
2473 | * inherrit this descriptor. We want the cleanup routine to be run |
---|
2474 | * when the last mmfsd process terminates. |
---|
2475 | */ |
---|
2476 | #if LINUX_KERNEL_VERSION >= 2061300 |
---|
2477 | FD_SET(cleanupFD, current->files->fdt->close_on_exec); |
---|
2478 | #else |
---|
2479 | FD_SET(cleanupFD, current->files->close_on_exec); |
---|
2480 | #endif |
---|
2481 | /* Once the descriptor for this dummy file is added to our file table, |
---|
2482 | * it is inherrited by all the processes of the daemon. As each |
---|
2483 | * terminates, the files->count is decremented and on the last process |
---|
2484 | * termination all the descriptors will be closed by filp_close. |
---|
2485 | * |
---|
2486 | * The one catch here is that our file table is inherrited by the |
---|
2487 | * kernel threads we start as well as user processes. This would |
---|
2488 | * cause a problem in that daemon termination does not include these |
---|
2489 | * kernel threads which aren't killed until restart (and therefore |
---|
2490 | * the file is never closed). In order for our operation to be |
---|
2491 | * driven at daemon termiation, we must remove the file table from |
---|
2492 | * these kernel threads. This is done in via cxiReparent() by |
---|
2493 | * the mmap pager kproc. |
---|
2494 | */ |
---|
2495 | |
---|
2496 | xerror: |
---|
2497 | TRACE4(TRACE_VNODE, 1, TRCID_CXIREGISTERCLEANUP_EXIT, |
---|
2498 | "cxiRegisterCleanup: fd %d iP %X rc %d code %d\n", |
---|
2499 | cleanupFD, iP, rc, code); |
---|
2500 | |
---|
2501 | if (rc) |
---|
2502 | { |
---|
2503 | if (dentryP); |
---|
2504 | dput(dentryP); |
---|
2505 | |
---|
2506 | if (cleanupFD) |
---|
2507 | put_unused_fd(cleanupFD); |
---|
2508 | |
---|
2509 | if (fileP) |
---|
2510 | #if LINUX_KERNEL_VERSION > 2060900 |
---|
2511 | fput(fileP); |
---|
2512 | #else |
---|
2513 | put_filp(fileP); |
---|
2514 | #endif |
---|
2515 | |
---|
2516 | if (iP) |
---|
2517 | iput(iP); |
---|
2518 | |
---|
2519 | cleanupFD = 0; |
---|
2520 | } |
---|
2521 | |
---|
2522 | EXIT_RC(0, rc); |
---|
2523 | return rc; |
---|
2524 | } |
---|
2525 | |
---|
2526 | #ifdef NFS4_ACL |
---|
2527 | /* Linux routines to be called when processing NFSv4 audit/alarm ACL entries */ |
---|
2528 | int cxiAuditWrite(int numargs, ...) { return ENOSYS; } |
---|
2529 | #endif /* NFS4_ACL */ |
---|
2530 | |
---|
2531 | /* Currently no OS specific VFS initialization for Linux */ |
---|
2532 | int |
---|
2533 | cxiInitVFS(int vfsType) |
---|
2534 | { |
---|
2535 | return 0; |
---|
2536 | } |
---|
2537 | |
---|
2538 | UIntPtr |
---|
2539 | cxiGetKernelStackSize() |
---|
2540 | { |
---|
2541 | return (UIntPtr)THREAD_SIZE; |
---|
2542 | } |
---|
2543 | |
---|
2544 | #if defined(DMAPI) || (SANERGY) |
---|
2545 | |
---|
2546 | void cxiPathRel(void *ndP) |
---|
2547 | { |
---|
2548 | DBGASSERT( ndP != NULL); |
---|
2549 | path_release( (struct nameidata *) ndP); |
---|
2550 | cxiFreeUnpinned(ndP); |
---|
2551 | } |
---|
2552 | |
---|
2553 | int |
---|
2554 | cxiPathToVfsP(void **privVfsPP, char *kpathname, void **ndPP, void **cnPP, |
---|
2555 | Boolean traverseLink) |
---|
2556 | { |
---|
2557 | struct gpfsVfsData_t *privVfsP = NULL; |
---|
2558 | struct nameidata *ndP; |
---|
2559 | struct inode * iP; |
---|
2560 | cxiNode_t *cnP; |
---|
2561 | int rc = 0; |
---|
2562 | Boolean rel = false; |
---|
2563 | int code = 0; |
---|
2564 | *ndPP = NULL; |
---|
2565 | *privVfsPP = NULL; |
---|
2566 | |
---|
2567 | ENTER(0); |
---|
2568 | if (kpathname == NULL) |
---|
2569 | { |
---|
2570 | code = 1; |
---|
2571 | rc = EINVAL; |
---|
2572 | goto xerror; |
---|
2573 | } |
---|
2574 | |
---|
2575 | ndP = (struct nameidata *)cxiMallocUnpinned(sizeof(struct nameidata)); |
---|
2576 | if (ndP == NULL) |
---|
2577 | { |
---|
2578 | code = 2; |
---|
2579 | rc = ENOMEM; |
---|
2580 | goto xerror; |
---|
2581 | } |
---|
2582 | |
---|
2583 | /* For DMAPI, this is called by dm_path_to_handle or dm_path_to_fshandle, |
---|
2584 | * According to dmapi documentation, we should return the symbolic link |
---|
2585 | * itself instead of the object that link references. |
---|
2586 | * so here we need to use the function which does not traverse the link */ |
---|
2587 | if (!traverseLink) |
---|
2588 | rc = user_path_walk_link(kpathname, ndP); |
---|
2589 | else |
---|
2590 | rc = user_path_walk(kpathname, ndP); |
---|
2591 | |
---|
2592 | if (rc) |
---|
2593 | { |
---|
2594 | rc = -rc; |
---|
2595 | code = 3; |
---|
2596 | goto xerror; |
---|
2597 | } |
---|
2598 | |
---|
2599 | rel = true; |
---|
2600 | iP = ndP->dentry->d_inode; |
---|
2601 | DBGASSERT(iP != NULL); |
---|
2602 | if (!GPFS_TYPE(iP)) |
---|
2603 | { |
---|
2604 | code = 4; |
---|
2605 | rc = EINVAL; |
---|
2606 | goto xerror; |
---|
2607 | } |
---|
2608 | |
---|
2609 | privVfsP = VP_TO_PVP(iP); |
---|
2610 | |
---|
2611 | if (privVfsP == NULL) |
---|
2612 | { |
---|
2613 | code = 5; |
---|
2614 | rc = ENOENT; |
---|
2615 | } |
---|
2616 | cnP = VP_TO_CNP(iP); |
---|
2617 | *privVfsPP = (void *)privVfsP; |
---|
2618 | *ndPP = (void *)ndP; |
---|
2619 | if (cnPP != NULL) |
---|
2620 | *cnPP = (void *)cnP; |
---|
2621 | |
---|
2622 | xerror: |
---|
2623 | if (rc && ndP) |
---|
2624 | { |
---|
2625 | if (rel) |
---|
2626 | cxiPathRel(ndP); |
---|
2627 | else |
---|
2628 | cxiFreeUnpinned(ndP); |
---|
2629 | } |
---|
2630 | EXIT_RC(0, rc); |
---|
2631 | return rc; |
---|
2632 | } |
---|
2633 | |
---|
2634 | void |
---|
2635 | cxiSetCred(void *eCredPP) |
---|
2636 | { |
---|
2637 | ext_cred_t *eCredP = (ext_cred_t *)eCredPP; |
---|
2638 | setCred(eCredP); |
---|
2639 | } |
---|
2640 | |
---|
2641 | #endif /* DMAPI or SANERGY */ |
---|
2642 | |
---|
2643 | |
---|
2644 | #ifdef KSTACK_CHECK |
---|
2645 | /* Kernel stack checking: for each active thread that is making |
---|
2646 | subroutine calls in the kernel, allocate a stack_history_t. Within |
---|
2647 | each stack_history_t, create a frame_desc_t for each level of |
---|
2648 | subroutine call. Two lists of frame_desc_t's are maintained: one for |
---|
2649 | the current call stack, and one for the deepest call stack seen so |
---|
2650 | far for this thread. Upon exit from the lowest-level routine, check |
---|
2651 | whether the maximum stack depth threshhold has been exceeded. If it |
---|
2652 | has, print the traceback of the maximum stack usage. Keep hashes of |
---|
2653 | the tracebacks printed to avoid printing the same traceback more than |
---|
2654 | once. Since cxiTraceExit is not called for every routine exit, |
---|
2655 | maintenance of call chains is not exact; a routine entry with |
---|
2656 | stackUsed less than the current entry implies return of the previous |
---|
2657 | routine. |
---|
2658 | |
---|
2659 | Note that these routines cannot call any other routine that has |
---|
2660 | ENTER/EXIT macros inside of it, to avoid recursion. */ |
---|
2661 | |
---|
2662 | /* Maximum size of of a stack frame before it is considered large enough |
---|
2663 | to complain about */ |
---|
2664 | #define STACK_LIMIT_WARNING (THREAD_SIZE - (THREAD_SIZE/3) ) |
---|
2665 | |
---|
2666 | /* Description of one level of a call stack */ |
---|
2667 | typedef struct frame_desc |
---|
2668 | { |
---|
2669 | /* Function name and file name containing the function */ |
---|
2670 | const char * fdFuncNameP; |
---|
2671 | const char * fdFileNameP; |
---|
2672 | |
---|
2673 | /* Pointer to frame_desc of caller, or NULL if this is the first |
---|
2674 | frame. Also used to link free frame descriptors together on the |
---|
2675 | shFreeHeadP free list. */ |
---|
2676 | struct frame_desc * fdCallerP; |
---|
2677 | |
---|
2678 | /* Line number near the beginning of fdFuncNameP */ |
---|
2679 | int fdLineNum; |
---|
2680 | |
---|
2681 | /* Total stack usage up to and including this routine */ |
---|
2682 | int fdStackUsed; |
---|
2683 | |
---|
2684 | /* Reference count for this frame_desc_t. Can be 2 if this descriptor |
---|
2685 | is reachable from both shCurrentP and shMaxP. */ |
---|
2686 | int fdRef; |
---|
2687 | } frame_desc_t; |
---|
2688 | |
---|
2689 | |
---|
2690 | /* Each stack_history is only used by one thread, so no locking is |
---|
2691 | needed within a stack_history. This is allocated as a single page. |
---|
2692 | */ |
---|
2693 | typedef struct stack_history |
---|
2694 | { |
---|
2695 | /* ID of thread to which this stack_history_t belongs */ |
---|
2696 | cxiThreadId shThreadId; |
---|
2697 | |
---|
2698 | /* Bucket index in historyHash that points to this stack_history_t, |
---|
2699 | or -1 if this stack_history_t is on an overflow list */ |
---|
2700 | int shBucketNum; |
---|
2701 | |
---|
2702 | /* Next stack_history_t in same hash overflow list or on free list */ |
---|
2703 | struct stack_history * shNextP; |
---|
2704 | |
---|
2705 | /* Pointer to the frame descriptor for the routine that most recently |
---|
2706 | called fdEnter without a matching fdExit. Following the fdCallerP |
---|
2707 | pointers through these frame descriptors gives the current callback |
---|
2708 | chain. */ |
---|
2709 | frame_desc_t * shCurrentP; |
---|
2710 | |
---|
2711 | /* Pointer to the frame descriptor that had the maximum stack usage |
---|
2712 | seen thus far for this thread. Following the fdCallerP pointers |
---|
2713 | through these frame descriptors gives the callback chain with |
---|
2714 | maximal stack usage. */ |
---|
2715 | frame_desc_t * shMaxP; |
---|
2716 | |
---|
2717 | /* Head of list of free frame_desc_t's */ |
---|
2718 | frame_desc_t * shFreeHeadP; |
---|
2719 | |
---|
2720 | /* Area that holds frame_desc_t's. These will be linked together and |
---|
2721 | put on the list shFreeHeadP. */ |
---|
2722 | #define SH_PREFIX_LEN (sizeof(cxiThreadId) + \ |
---|
2723 | sizeof(int) + \ |
---|
2724 | sizeof(struct stack_history *) + \ |
---|
2725 | 3*sizeof(frame_desc_t *)) |
---|
2726 | #define SH_NFRAMES ((PAGE_SIZE-SH_PREFIX_LEN)/sizeof(frame_desc_t)) |
---|
2727 | frame_desc_t shFrames[SH_NFRAMES]; |
---|
2728 | } stack_history_t; |
---|
2729 | |
---|
2730 | /* Global structures */ |
---|
2731 | struct |
---|
2732 | { |
---|
2733 | /* Global flag controlling whether kernel stack checking is enabled. |
---|
2734 | Initially false; set true during kernel module initialization, |
---|
2735 | then set false again during kernel module termination. */ |
---|
2736 | Boolean shActive; |
---|
2737 | |
---|
2738 | /* Mutex protecting updates to the variables that follow. This cannot |
---|
2739 | be a cxiBlockMutex_t because then the stack checking routines would |
---|
2740 | get called recursively. */ |
---|
2741 | struct semaphore shMutex; |
---|
2742 | |
---|
2743 | /* List of free stack_history_t's and count of how many free entries |
---|
2744 | there are. Excess stack_history_t's beyond a threshhold are freed |
---|
2745 | back to the operating system. */ |
---|
2746 | stack_history_t * freeHeadP; |
---|
2747 | int nFree; |
---|
2748 | #define MAX_FREE_STACK_HISTORIES 16 |
---|
2749 | |
---|
2750 | /* Hash table of active stack_history_t's. To find the entry for a |
---|
2751 | particular thread, hash its thread id to a bucket. If any of the |
---|
2752 | entries in bucket[] match the desired thread id, the pointer to |
---|
2753 | the stack_history_t can be returned without acquiring any locks. If |
---|
2754 | the bucket does not contain the desired thread id, look for it on |
---|
2755 | the overflow list under protection of shMutex. */ |
---|
2756 | #define HISTORY_HASH_SIZE 64 |
---|
2757 | #define HISTS_PER_BUCKET 3 |
---|
2758 | struct |
---|
2759 | { |
---|
2760 | struct |
---|
2761 | { |
---|
2762 | stack_history_t * historyP; |
---|
2763 | cxiThreadId threadId; |
---|
2764 | } bucket[HISTS_PER_BUCKET]; |
---|
2765 | stack_history_t * overflowP; |
---|
2766 | } historyHash[HISTORY_HASH_SIZE]; |
---|
2767 | |
---|
2768 | /* List of hash values for tracebacks that have already been printed. |
---|
2769 | Used to avoid printing the same traceback more than once. Nothing |
---|
2770 | is ever deleted from this table, so to find an entry start |
---|
2771 | searching at its hash value and continue until the entry is found |
---|
2772 | or an empty slot is encountered. The total occupancy of the table |
---|
2773 | is limited to MAX_TRACEBACKS to restrict the amount of searching |
---|
2774 | that will be required, and to guarantee that searches will |
---|
2775 | terminate. */ |
---|
2776 | #define TB_HASH_SIZE 64 |
---|
2777 | #define MAX_TRACEBACKS 32 |
---|
2778 | unsigned int tracebackHash[TB_HASH_SIZE]; |
---|
2779 | int nTracebackHashEntries; |
---|
2780 | } SHG; |
---|
2781 | |
---|
2782 | |
---|
2783 | /* Private version of DBGASSERT used only within stack checking code. |
---|
2784 | Cannot use DBGASSERT without risking recursion. */ |
---|
2785 | #ifdef DBGASSERTS |
---|
2786 | #define SH_ASSERT(_ex) \ |
---|
2787 | if (!(_ex)) { \ |
---|
2788 | printk("GPFS stack checking assert failed: " # _ex " file %s line %d\n", \ |
---|
2789 | __FILE__, __LINE__); \ |
---|
2790 | DoPanic(# _ex, __FILE__, __LINE__, 0, 0, ""); \ |
---|
2791 | } else ((void)0) |
---|
2792 | #else |
---|
2793 | #define SH_ASSERT(_ex) ((void)0) |
---|
2794 | #endif |
---|
2795 | |
---|
2796 | |
---|
2797 | /* Initialize and enable stack depth checking */ |
---|
2798 | void shInit() |
---|
2799 | { |
---|
2800 | /* Clear stack checking globals */ |
---|
2801 | cxiMemset(&SHG, 0, sizeof(SHG)); |
---|
2802 | |
---|
2803 | /* Init mutex */ |
---|
2804 | init_MUTEX(&SHG.shMutex); |
---|
2805 | |
---|
2806 | /* Turn on stack depth checking and make sure the change is visible */ |
---|
2807 | SHG.shActive = true; |
---|
2808 | wmb(); |
---|
2809 | } |
---|
2810 | |
---|
2811 | |
---|
2812 | /* Turn off stack depth checking and free all allocated memory. This does |
---|
2813 | not have to return the global state to what it was when the module was |
---|
2814 | first loaded, since it will not be used again. */ |
---|
2815 | void shTerm() |
---|
2816 | { |
---|
2817 | int h; |
---|
2818 | int b; |
---|
2819 | stack_history_t * shP; |
---|
2820 | stack_history_t * shNextP; |
---|
2821 | |
---|
2822 | /* Turn off stack depth checking and make sure the chenge is visible */ |
---|
2823 | SHG.shActive = false; |
---|
2824 | wmb(); |
---|
2825 | |
---|
2826 | /* Get and then release mutex. This insures that a thread that is |
---|
2827 | in the middle of writing a traceback finishes writing it before |
---|
2828 | we free the data structures it was using. */ |
---|
2829 | /* ?? although there could be another thread waiting for the mutex ... */ |
---|
2830 | down(&SHG.shMutex); |
---|
2831 | up(&SHG.shMutex); |
---|
2832 | |
---|
2833 | /* Wait briefly to allow threads in the middle of the stack checking |
---|
2834 | code to finish what they are doing */ |
---|
2835 | /* ?? Of course, this is not really safe, but this is debugging code, |
---|
2836 | right? */ |
---|
2837 | schedule_timeout(HZ/2); |
---|
2838 | |
---|
2839 | /* Terminate mutex */ |
---|
2840 | // nothing to do |
---|
2841 | |
---|
2842 | /* Free all stack_history_t's on the free list */ |
---|
2843 | shP = SHG.freeHeadP; |
---|
2844 | while (shP != NULL) |
---|
2845 | { |
---|
2846 | shNextP = shP->shNextP; |
---|
2847 | kfree(shP); |
---|
2848 | shP = shNextP; |
---|
2849 | } |
---|
2850 | |
---|
2851 | /* Free all stack_history_t's in the hash table */ |
---|
2852 | for (h=0 ; h<HISTORY_HASH_SIZE ; h++) |
---|
2853 | { |
---|
2854 | for (b=0 ; b<HISTS_PER_BUCKET ; b++) |
---|
2855 | if (SHG.historyHash[h].bucket[b].historyP != NULL) |
---|
2856 | kfree(SHG.historyHash[h].bucket[b].historyP); |
---|
2857 | shP = SHG.historyHash[h].overflowP; |
---|
2858 | while (shP != NULL) |
---|
2859 | { |
---|
2860 | shNextP = shP->shNextP; |
---|
2861 | kfree(shP); |
---|
2862 | shP = shNextP; |
---|
2863 | } |
---|
2864 | } |
---|
2865 | } |
---|
2866 | |
---|
2867 | |
---|
2868 | /* Allocate and initialize a new stack_history_t */ |
---|
2869 | static stack_history_t * shAllocInit() |
---|
2870 | { |
---|
2871 | stack_history_t * shP; |
---|
2872 | int f; |
---|
2873 | |
---|
2874 | up(&SHG.shMutex); |
---|
2875 | shP = (stack_history_t *) kmalloc(sizeof(stack_history_t), GFP_KERNEL); |
---|
2876 | SH_ASSERT(shP != NULL); |
---|
2877 | down(&SHG.shMutex); |
---|
2878 | cxiMemset(shP, 0, sizeof(stack_history_t)); |
---|
2879 | for (f=0 ; f<=SH_NFRAMES-2 ; f++) |
---|
2880 | shP->shFrames[f].fdCallerP = &shP->shFrames[f+1]; |
---|
2881 | shP->shFreeHeadP = &shP->shFrames[0]; |
---|
2882 | return shP; |
---|
2883 | } |
---|
2884 | |
---|
2885 | |
---|
2886 | /* Get a stack_history_t off the free list or build a new one */ |
---|
2887 | static stack_history_t * shGet() |
---|
2888 | { |
---|
2889 | stack_history_t * shP; |
---|
2890 | |
---|
2891 | /* Use free list if one is available there */ |
---|
2892 | shP = SHG.freeHeadP; |
---|
2893 | if (shP != NULL) |
---|
2894 | { |
---|
2895 | SHG.freeHeadP = shP->shNextP; |
---|
2896 | SHG.nFree -= 1; |
---|
2897 | return shP; |
---|
2898 | } |
---|
2899 | |
---|
2900 | /* Make a new one if necessary */ |
---|
2901 | return shAllocInit(); |
---|
2902 | } |
---|
2903 | |
---|
2904 | |
---|
2905 | /* Free a stack_history_t. Put it on the free list if there are not |
---|
2906 | already too many free, or else free it back to the operating system. |
---|
2907 | */ |
---|
2908 | static void shPut(stack_history_t * shP) |
---|
2909 | { |
---|
2910 | int h; |
---|
2911 | int b; |
---|
2912 | stack_history_t ** shPrevPP; |
---|
2913 | stack_history_t * p; |
---|
2914 | |
---|
2915 | /* Both call stacks should be empty */ |
---|
2916 | SH_ASSERT(shP->shCurrentP == NULL); |
---|
2917 | SH_ASSERT(shP->shMaxP == NULL); |
---|
2918 | |
---|
2919 | /* Must hold mutex while changing the hash table */ |
---|
2920 | down(&SHG.shMutex); |
---|
2921 | |
---|
2922 | /* Clear pointer to this stack_history_t from the hash table */ |
---|
2923 | h = ((int)shP->shThreadId) & (HISTORY_HASH_SIZE-1); |
---|
2924 | b = shP->shBucketNum; |
---|
2925 | if (b != -1) |
---|
2926 | { |
---|
2927 | SH_ASSERT(SHG.historyHash[h].bucket[b].historyP == shP); |
---|
2928 | SHG.historyHash[h].bucket[b].historyP = NULL; |
---|
2929 | SHG.historyHash[h].bucket[b].threadId = 0; |
---|
2930 | } |
---|
2931 | else |
---|
2932 | { |
---|
2933 | shPrevPP = &SHG.historyHash[h].overflowP; |
---|
2934 | p = *shPrevPP; |
---|
2935 | while (p != NULL) |
---|
2936 | { |
---|
2937 | if (p == shP) |
---|
2938 | { |
---|
2939 | *shPrevPP = shP->shNextP; |
---|
2940 | break; |
---|
2941 | } |
---|
2942 | shPrevPP = &p->shNextP; |
---|
2943 | p = *shPrevPP; |
---|
2944 | } |
---|
2945 | } |
---|
2946 | |
---|
2947 | /* If not too many already free, add to free list */ |
---|
2948 | if (SHG.nFree < MAX_FREE_STACK_HISTORIES) |
---|
2949 | { |
---|
2950 | shP->shNextP = SHG.freeHeadP; |
---|
2951 | SHG.freeHeadP = shP; |
---|
2952 | SHG.nFree += 1; |
---|
2953 | up(&SHG.shMutex); |
---|
2954 | return; |
---|
2955 | } |
---|
2956 | |
---|
2957 | /* Otherwise, really free it */ |
---|
2958 | up(&SHG.shMutex); |
---|
2959 | kfree(shP); |
---|
2960 | } |
---|
2961 | |
---|
2962 | |
---|
2963 | /* Find the stack_history_t for the current thread, or allocate one if |
---|
2964 | one does not already exist */ |
---|
2965 | static stack_history_t * shFind() |
---|
2966 | { |
---|
2967 | stack_history_t * shP; |
---|
2968 | cxiThreadId id = current->pid; |
---|
2969 | int h = ((int)id) & (HISTORY_HASH_SIZE-1); |
---|
2970 | int b; |
---|
2971 | |
---|
2972 | /* Look at all entries within the bucket given by the hash of the |
---|
2973 | thread ID. No locking needs to be done for this search. */ |
---|
2974 | for (b=0 ; b<HISTS_PER_BUCKET ; b++) |
---|
2975 | if (SHG.historyHash[h].bucket[b].threadId == id) |
---|
2976 | return SHG.historyHash[h].bucket[b].historyP; |
---|
2977 | |
---|
2978 | /* Must hold mutex while changing the hash table */ |
---|
2979 | down(&SHG.shMutex); |
---|
2980 | |
---|
2981 | /* Search the overflow list */ |
---|
2982 | shP = SHG.historyHash[h].overflowP; |
---|
2983 | while (shP != NULL) |
---|
2984 | { |
---|
2985 | if (shP->shThreadId == id) |
---|
2986 | goto exit; |
---|
2987 | shP = shP->shNextP; |
---|
2988 | } |
---|
2989 | |
---|
2990 | /* No stack_history_t for this thread yet. Get one off the free list |
---|
2991 | or build one. */ |
---|
2992 | shP = shGet(); |
---|
2993 | shP->shThreadId = id; |
---|
2994 | shP->shNextP = NULL; |
---|
2995 | |
---|
2996 | /* Find a slot for the new stack_history_t in the hash table */ |
---|
2997 | for (b=0 ; b<HISTS_PER_BUCKET ; b++) |
---|
2998 | if (SHG.historyHash[h].bucket[b].historyP == NULL) |
---|
2999 | { |
---|
3000 | SHG.historyHash[h].bucket[b].historyP = shP; |
---|
3001 | SHG.historyHash[h].bucket[b].threadId = id; |
---|
3002 | shP->shBucketNum = b; |
---|
3003 | goto exit; |
---|
3004 | } |
---|
3005 | |
---|
3006 | /* No slots available; add new stack_history_t to overflow list */ |
---|
3007 | shP->shBucketNum = -1; |
---|
3008 | shP->shNextP = SHG.historyHash[h].overflowP; |
---|
3009 | SHG.historyHash[h].overflowP = shP; |
---|
3010 | |
---|
3011 | exit: |
---|
3012 | /* Release mutex before returning */ |
---|
3013 | up(&SHG.shMutex); |
---|
3014 | return shP; |
---|
3015 | } |
---|
3016 | |
---|
3017 | |
---|
3018 | /* Allocate a frame descriptor within the given stack_history_t. This |
---|
3019 | cannot be allowed to fail, so if there are no more free descriptors, |
---|
3020 | throw away the bottom frame descriptor and return that. The reference |
---|
3021 | count of the frame descriptor that is returned is undefined. */ |
---|
3022 | static frame_desc_t * fdGet(stack_history_t * shP) |
---|
3023 | { |
---|
3024 | frame_desc_t * fdP; |
---|
3025 | frame_desc_t ** fdPrevPP; |
---|
3026 | int prevRef; |
---|
3027 | |
---|
3028 | /* Look on the free list within the stack_history_t */ |
---|
3029 | fdP = shP->shFreeHeadP; |
---|
3030 | if (fdP != NULL) |
---|
3031 | { |
---|
3032 | shP->shFreeHeadP = fdP->fdCallerP; |
---|
3033 | return fdP; |
---|
3034 | } |
---|
3035 | |
---|
3036 | /* No free descriptors; first try stealing one off the bottom of the |
---|
3037 | current call stack */ |
---|
3038 | fdP = shP->shCurrentP; |
---|
3039 | if (fdP != NULL) |
---|
3040 | { |
---|
3041 | /* Find the bottom entry of the current call stack */ |
---|
3042 | fdPrevPP = &shP->shCurrentP; |
---|
3043 | prevRef = 1; |
---|
3044 | while (fdP->fdCallerP != NULL) |
---|
3045 | { |
---|
3046 | fdPrevPP = &fdP->fdCallerP; |
---|
3047 | prevRef = fdP->fdRef; |
---|
3048 | fdP = *fdPrevPP; |
---|
3049 | } |
---|
3050 | |
---|
3051 | /* Remove the bottom entry of the current call stack */ |
---|
3052 | *fdPrevPP = NULL; |
---|
3053 | |
---|
3054 | /* Reduce the reference count on the entry just removed. The |
---|
3055 | reference count decreases by the reference count of the frame |
---|
3056 | that used to point to *fdP. If *fdP is no longer referenced, no |
---|
3057 | further work is needed. If *fdP is still referenced from the max |
---|
3058 | depth stack (it must be the bottom entry), we will eventually |
---|
3059 | return it, but only after removing it from the bottom of the max |
---|
3060 | depth stack. We know that fdP will be returned, but we have to |
---|
3061 | search through the max depth stack to find the pointer to *fdP. |
---|
3062 | */ |
---|
3063 | fdP->fdRef -= prevRef; |
---|
3064 | if (fdP->fdRef == 0) |
---|
3065 | return fdP; |
---|
3066 | } |
---|
3067 | |
---|
3068 | /* Still no free descriptors; steal the frame descriptor off the |
---|
3069 | bottom of the maximum depth call stack */ |
---|
3070 | fdP = shP->shMaxP; |
---|
3071 | if (fdP != NULL) |
---|
3072 | { |
---|
3073 | /* Find the bottom entry of the max depth call stack */ |
---|
3074 | fdPrevPP = &shP->shMaxP; |
---|
3075 | while (fdP->fdCallerP != NULL) |
---|
3076 | { |
---|
3077 | fdPrevPP = &fdP->fdCallerP; |
---|
3078 | fdP = *fdPrevPP; |
---|
3079 | } |
---|
3080 | |
---|
3081 | /* Remove the bottom entry of the max depth call stack */ |
---|
3082 | *fdPrevPP = NULL; |
---|
3083 | |
---|
3084 | /* The bottom entry of the max depth call stack that was just |
---|
3085 | removed must have a reference count of one; otherwise it would |
---|
3086 | still be on the current call stack and removing the bottom entry |
---|
3087 | of that stack would have reduced the reference count of some |
---|
3088 | frame descriptor from 2 to 0. */ |
---|
3089 | SH_ASSERT(fdP->fdRef == 1); |
---|
3090 | return fdP; |
---|
3091 | } |
---|
3092 | SH_ASSERT(!"cannot alloc frame_desc_t"); |
---|
3093 | return NULL; |
---|
3094 | } |
---|
3095 | |
---|
3096 | |
---|
3097 | /* Decrease the reference count on a frame descriptor. If it becomes |
---|
3098 | zero, return it to the free list */ |
---|
3099 | static void fdDiscard(frame_desc_t * fdP, stack_history_t * shP) |
---|
3100 | //inline static void fdDiscard(frame_desc_t * fdP, stack_history_t * shP) |
---|
3101 | { |
---|
3102 | if (fdP->fdRef > 1) |
---|
3103 | { |
---|
3104 | fdP->fdRef -= 1; |
---|
3105 | TRACE3(TRACE_ENTRYEXIT, 11, TRCID_FDDISCARD1, |
---|
3106 | "fdDiscard: fdP 0x%lX shP 0x%lX rtn %s refcnt now 1\n", |
---|
3107 | fdP, shP, fdP->fdFuncNameP); |
---|
3108 | return; |
---|
3109 | } |
---|
3110 | |
---|
3111 | fdP->fdCallerP = shP->shFreeHeadP; |
---|
3112 | shP->shFreeHeadP = fdP; |
---|
3113 | TRACE3(TRACE_ENTRYEXIT, 11, TRCID_FDDISCARD2, |
---|
3114 | "fdDiscard: fdP 0x%lX shP 0x%lX rtn %s refcnt now 0\n", |
---|
3115 | fdP, shP, fdP->fdFuncNameP); |
---|
3116 | } |
---|
3117 | |
---|
3118 | |
---|
3119 | /* If the maximum stack depth exceeds the threshhold, print its |
---|
3120 | traceback if it has not already been printed. Reset the maximum |
---|
3121 | depth stack to empty. Only called when the current stack is already |
---|
3122 | empty. */ |
---|
3123 | static void shDisplay(stack_history_t * shP) |
---|
3124 | { |
---|
3125 | frame_desc_t * fdP; |
---|
3126 | unsigned int tbHash; |
---|
3127 | frame_desc_t * fdNextP; |
---|
3128 | int slot; |
---|
3129 | |
---|
3130 | SH_ASSERT(shP->shCurrentP == NULL); |
---|
3131 | |
---|
3132 | /* If the maximum stack depth is less than the threshhold, just free |
---|
3133 | the call chain and return */ |
---|
3134 | fdP = shP->shMaxP; |
---|
3135 | if (fdP == NULL || |
---|
3136 | fdP->fdStackUsed < STACK_LIMIT_WARNING) |
---|
3137 | goto exit; |
---|
3138 | |
---|
3139 | /* Compute a hash of the traceback call chain */ |
---|
3140 | tbHash = 0; |
---|
3141 | while (fdP != NULL) |
---|
3142 | { |
---|
3143 | tbHash <<= 1; |
---|
3144 | tbHash ^= (((unsigned int)fdP->fdStackUsed) << 15) ^ fdP->fdLineNum; |
---|
3145 | fdP = fdP->fdCallerP; |
---|
3146 | } |
---|
3147 | |
---|
3148 | /* Search for the hash of the call chain in the table of tracebacks that |
---|
3149 | have already been printed. Searching the hash table can be done without |
---|
3150 | any locks, since entries are never deleted. The loop must eventually |
---|
3151 | terminate, since the table will not be allowed to fill up. */ |
---|
3152 | search: |
---|
3153 | slot = tbHash % TB_HASH_SIZE; |
---|
3154 | while (SHG.tracebackHash[slot] != 0) |
---|
3155 | { |
---|
3156 | if (SHG.tracebackHash[slot] == tbHash) |
---|
3157 | /* This traceback has already been printed */ |
---|
3158 | goto exit; |
---|
3159 | slot = (slot+1) % TB_HASH_SIZE; |
---|
3160 | } |
---|
3161 | |
---|
3162 | /* The hash of the current max depth traceback was not found in the |
---|
3163 | table and should be inserted at position 'slot'. Do this under |
---|
3164 | protection of the mutex. If 'slot' has been used by the time we |
---|
3165 | get the mutex, drop the mutex and repeat the search. */ |
---|
3166 | down(&SHG.shMutex); |
---|
3167 | if (SHG.nTracebackHashEntries >= MAX_TRACEBACKS) |
---|
3168 | goto exitMutexHeld; |
---|
3169 | if (SHG.tracebackHash[slot] != 0) |
---|
3170 | { |
---|
3171 | up(&SHG.shMutex); |
---|
3172 | goto search; |
---|
3173 | } |
---|
3174 | SHG.tracebackHash[slot] = tbHash; |
---|
3175 | SHG.nTracebackHashEntries += 1; |
---|
3176 | |
---|
3177 | /* Print the traceback */ |
---|
3178 | fdP = shP->shMaxP; |
---|
3179 | printk("\nGPFS kernel stack for process %d(%s) used %d bytes\n", |
---|
3180 | current->pid, current->comm, fdP->fdStackUsed); |
---|
3181 | printk(" stack function\n"); |
---|
3182 | printk(" used\n"); |
---|
3183 | printk(" ----- -----------------------------------------------------\n"); |
---|
3184 | while (fdP != NULL) |
---|
3185 | { |
---|
3186 | printk(" %5d %s at %s:%d\n", |
---|
3187 | fdP->fdStackUsed, fdP->fdFuncNameP, fdP->fdFileNameP, fdP->fdLineNum); |
---|
3188 | fdP = fdP->fdCallerP; |
---|
3189 | } |
---|
3190 | printk(" traceback signature %08X\n", tbHash); |
---|
3191 | |
---|
3192 | /* If the maximum number of allowed tracebacks has been reached, turn |
---|
3193 | off further stack checking. */ |
---|
3194 | if (SHG.nTracebackHashEntries >= MAX_TRACEBACKS) |
---|
3195 | { |
---|
3196 | printk("Maximum number of GPFS deep stack tracebacks reached\n"); |
---|
3197 | printk("GPFS stack checking disabled\n"); |
---|
3198 | SHG.shActive = false; |
---|
3199 | wmb(); |
---|
3200 | } |
---|
3201 | |
---|
3202 | exitMutexHeld: |
---|
3203 | up(&SHG.shMutex); |
---|
3204 | |
---|
3205 | exit: |
---|
3206 | /* Free all stack frame descriptors for the max depth call chain back |
---|
3207 | to the internal free list. */ |
---|
3208 | fdP = shP->shMaxP; |
---|
3209 | while (fdP != NULL) |
---|
3210 | { |
---|
3211 | SH_ASSERT(fdP->fdRef == 1); |
---|
3212 | fdNextP = fdP->fdCallerP; |
---|
3213 | fdP->fdCallerP = shP->shFreeHeadP; |
---|
3214 | shP->shFreeHeadP = fdP; |
---|
3215 | fdP = fdNextP; |
---|
3216 | } |
---|
3217 | shP->shMaxP = NULL; |
---|
3218 | } |
---|
3219 | |
---|
3220 | |
---|
3221 | /* Process routine entry */ |
---|
3222 | static void fdEntry(frame_desc_t * fdP, stack_history_t * shP) |
---|
3223 | { |
---|
3224 | frame_desc_t * popP; |
---|
3225 | frame_desc_t * p; |
---|
3226 | |
---|
3227 | TRACE5(TRACE_ENTRYEXIT, 11, TRCID_FDENTRY, |
---|
3228 | "fdEntry: fdP 0x%lX shP 0x%lX rtn %s shCurrentP 0x%lX used %d\n", |
---|
3229 | fdP, shP, fdP->fdFuncNameP, shP->shCurrentP, fdP->fdStackUsed); |
---|
3230 | |
---|
3231 | /* If this is the first call by this thread, set up the two call chains */ |
---|
3232 | if (shP->shCurrentP == NULL) |
---|
3233 | { |
---|
3234 | SH_ASSERT(shP->shMaxP == NULL); |
---|
3235 | shP->shCurrentP = fdP; |
---|
3236 | shP->shMaxP = fdP; |
---|
3237 | fdP->fdCallerP = NULL; |
---|
3238 | fdP->fdRef = 2; |
---|
3239 | return; |
---|
3240 | } |
---|
3241 | else |
---|
3242 | SH_ASSERT(shP->shMaxP != NULL); |
---|
3243 | |
---|
3244 | /* Process routine exits implied by the number of bytes of stack that |
---|
3245 | are currently in use. The test needs to be for strict less than |
---|
3246 | because inlined routines share the same stack frame as their |
---|
3247 | caller, but both routines will do entry/exit processing. */ |
---|
3248 | popP = shP->shCurrentP; |
---|
3249 | while (fdP->fdStackUsed < popP->fdStackUsed) |
---|
3250 | { |
---|
3251 | p = popP->fdCallerP; |
---|
3252 | shP->shCurrentP = p; |
---|
3253 | TRACE1(TRACE_ENTRYEXIT, 11, TRCID_IMPLIED_EXIT, |
---|
3254 | "fdEntry: implied exit from rtn %s\n", |
---|
3255 | popP->fdFuncNameP); |
---|
3256 | fdDiscard(popP, shP); |
---|
3257 | if (p == NULL) |
---|
3258 | { |
---|
3259 | /* The outermost routine returned before this call without calling |
---|
3260 | fdExit. Test for a large maximum stack, then reset the |
---|
3261 | maximum. */ |
---|
3262 | shDisplay(shP); |
---|
3263 | |
---|
3264 | /* The current routine is the one and only */ |
---|
3265 | shP->shCurrentP = fdP; |
---|
3266 | shP->shMaxP = fdP; |
---|
3267 | fdP->fdCallerP = NULL; |
---|
3268 | fdP->fdRef = 2; |
---|
3269 | return; |
---|
3270 | } |
---|
3271 | popP = p; |
---|
3272 | } |
---|
3273 | |
---|
3274 | /* If this is an extension of the current max depth stack, just add |
---|
3275 | this routine to the top of both stacks */ |
---|
3276 | if (fdP->fdStackUsed > shP->shMaxP->fdStackUsed && |
---|
3277 | shP->shCurrentP == shP->shMaxP) |
---|
3278 | { |
---|
3279 | fdP->fdCallerP = shP->shCurrentP; |
---|
3280 | shP->shCurrentP = fdP; |
---|
3281 | shP->shMaxP = fdP; |
---|
3282 | fdP->fdRef = 2; |
---|
3283 | TRACE2(TRACE_ENTRYEXIT, 11, TRCID_NEWMAX_EXTEND, |
---|
3284 | "fdEntry: extending new max stack %d fdP 0x%lX\n", |
---|
3285 | fdP->fdStackUsed, fdP); |
---|
3286 | return; |
---|
3287 | } |
---|
3288 | |
---|
3289 | /* Make this new routine be the top of the stack */ |
---|
3290 | fdP->fdCallerP = shP->shCurrentP; |
---|
3291 | shP->shCurrentP = fdP; |
---|
3292 | fdP->fdRef = 1; |
---|
3293 | |
---|
3294 | /* If this new routine has a greater stack depth than the previous max, |
---|
3295 | unreference the previous max depth call chain and add additional |
---|
3296 | references to the current one. */ |
---|
3297 | if (fdP->fdStackUsed > shP->shMaxP->fdStackUsed) |
---|
3298 | { |
---|
3299 | popP = shP->shMaxP; |
---|
3300 | do |
---|
3301 | { |
---|
3302 | p = popP->fdCallerP; |
---|
3303 | fdDiscard(popP, shP); |
---|
3304 | popP = p; |
---|
3305 | } while (popP != NULL); |
---|
3306 | p = fdP; |
---|
3307 | do |
---|
3308 | { |
---|
3309 | p->fdRef = 2; |
---|
3310 | p = p->fdCallerP; |
---|
3311 | } while (p != NULL); |
---|
3312 | TRACE2(TRACE_ENTRYEXIT, 11, TRCID_NEWMAX, |
---|
3313 | "fdEntry: new max stack %d fdP 0x%lX\n", |
---|
3314 | fdP->fdStackUsed, fdP); |
---|
3315 | shP->shMaxP = fdP; |
---|
3316 | } |
---|
3317 | } |
---|
3318 | |
---|
3319 | |
---|
3320 | /* Process routine exit */ |
---|
3321 | static void fdExit(const char * funcnameP) |
---|
3322 | { |
---|
3323 | stack_history_t * shP; |
---|
3324 | frame_desc_t * lastPopP; |
---|
3325 | frame_desc_t * popP; |
---|
3326 | frame_desc_t * p; |
---|
3327 | |
---|
3328 | /* Locate or create stack_history_t for this thread */ |
---|
3329 | shP = shFind(); |
---|
3330 | |
---|
3331 | /* If call stack is already empty, there is nothing to do except free |
---|
3332 | the stack_history_t */ |
---|
3333 | if (shP->shCurrentP == NULL) |
---|
3334 | { |
---|
3335 | SH_ASSERT(shP->shMaxP == NULL); |
---|
3336 | shPut(shP); |
---|
3337 | return; |
---|
3338 | } |
---|
3339 | |
---|
3340 | /* Search backward on the call stack for a routine name that matches |
---|
3341 | the one being exitted. In C++, the ENTER/EXIT macros will pass the |
---|
3342 | same string constant (same address) to fdEntry and fdExit. The C |
---|
3343 | versions of the macros may pass two different copies of the same |
---|
3344 | string. This loop cannot pop routines it skips off the stack, since |
---|
3345 | the routine might never be found. */ |
---|
3346 | p = shP->shCurrentP; |
---|
3347 | for (;;) |
---|
3348 | { |
---|
3349 | if (p->fdFuncNameP == funcnameP || |
---|
3350 | cxiStrcmp(p->fdFuncNameP, funcnameP) == 0) |
---|
3351 | { |
---|
3352 | TRACE4(TRACE_ENTRYEXIT, 11, TRCID_FDEXIT, |
---|
3353 | "fdExit: p 0x%lX shP 0x%lX rtn %s shCurrentP 0x%lX\n", |
---|
3354 | p, shP, p->fdFuncNameP, shP->shCurrentP); |
---|
3355 | lastPopP = p; |
---|
3356 | break; |
---|
3357 | } |
---|
3358 | p = p->fdCallerP; |
---|
3359 | if (p == NULL) |
---|
3360 | { |
---|
3361 | /* Routine name not found. Do not pop stack. */ |
---|
3362 | /* printk("No entry found when exitting %s\n", funcnameP); */ |
---|
3363 | TRACE1(TRACE_ENTRYEXIT, 11, TRCID_FDEXIT_NOTFOUND, |
---|
3364 | "No entry found when exitting %s\n", funcnameP); |
---|
3365 | return; |
---|
3366 | } |
---|
3367 | } |
---|
3368 | |
---|
3369 | /* Pop all routines up to and including lastPopP */ |
---|
3370 | p = shP->shCurrentP; |
---|
3371 | do |
---|
3372 | { |
---|
3373 | popP = p; |
---|
3374 | p = popP->fdCallerP; |
---|
3375 | fdDiscard(popP, shP); |
---|
3376 | } while (popP != lastPopP); |
---|
3377 | shP->shCurrentP = p; |
---|
3378 | |
---|
3379 | /* If this was the return of the outermost routine, print new maximum |
---|
3380 | stack depth traceback and discard the stack_history_t */ |
---|
3381 | if (shP->shCurrentP == NULL) |
---|
3382 | { |
---|
3383 | shDisplay(shP); |
---|
3384 | shPut(shP); |
---|
3385 | } |
---|
3386 | } |
---|
3387 | |
---|
3388 | #endif /* KSTACK_CHECK */ |
---|
3389 | |
---|
3390 | |
---|
3391 | #if defined(ENTRYEXIT_TRACE) || defined(KSTACK_CHECK) |
---|
3392 | void cxiTraceEntry(int level, const char * funcnameP, |
---|
3393 | const char * filenameP, int lineNum) |
---|
3394 | { |
---|
3395 | int stackUsed = THREAD_SIZE - (((unsigned long)&stackUsed) & (THREAD_SIZE-1)); |
---|
3396 | #ifdef KSTACK_CHECK |
---|
3397 | stack_history_t * shP; |
---|
3398 | frame_desc_t * fdP; |
---|
3399 | #endif /* KSTACK_CHECK */ |
---|
3400 | |
---|
3401 | #ifdef ENTRYEXIT_TRACE |
---|
3402 | /* Need to use a constant trace level in the TRACE macro call to get |
---|
3403 | the .trclst file (and later the .trcfmt file) built correctly */ |
---|
3404 | if (_TRACE_IS_ON(TRACE_ENTRYEXIT, BASE_ENTEREXIT_LEVEL + level)) |
---|
3405 | { |
---|
3406 | TRACE5(TRACE_ENTRYEXIT, 1, TRCID_KTRACE_LINUX_ENTER, |
---|
3407 | "-->K %s (%s:%d) level %d stackUsed %d\n", |
---|
3408 | funcnameP, filenameP, lineNum, level, stackUsed); |
---|
3409 | } |
---|
3410 | #endif /* ENTRYEXIT_TRACE */ |
---|
3411 | |
---|
3412 | #ifdef KSTACK_CHECK |
---|
3413 | /* Nothing to do if kernel stack checking is disabled */ |
---|
3414 | if (!SHG.shActive) |
---|
3415 | return; |
---|
3416 | |
---|
3417 | /* Do not attempt to keep track of stack usage in interrupt handlers */ |
---|
3418 | if (in_interrupt()) |
---|
3419 | return; |
---|
3420 | |
---|
3421 | /* Locate or create stack_history_t for this thread */ |
---|
3422 | shP = shFind(); |
---|
3423 | |
---|
3424 | /* Get a new frame descriptor and fill it in */ |
---|
3425 | fdP = fdGet(shP); |
---|
3426 | fdP->fdFuncNameP = funcnameP; |
---|
3427 | fdP->fdFileNameP = filenameP; |
---|
3428 | fdP->fdLineNum = lineNum; |
---|
3429 | fdP->fdStackUsed = stackUsed; |
---|
3430 | |
---|
3431 | /* Perform stack checking for this routine entry */ |
---|
3432 | fdEntry(fdP, shP); |
---|
3433 | #endif /* KSTACK_CHECK */ |
---|
3434 | } |
---|
3435 | |
---|
3436 | |
---|
3437 | void cxiTraceExit(int level, const char * funcnameP) |
---|
3438 | { |
---|
3439 | #ifdef ENTRYEXIT_TRACE |
---|
3440 | /* Need to use a constant trace level in the TRACE macro call to get |
---|
3441 | the .trclst file (and later the .trcfmt file) built correctly */ |
---|
3442 | if (_TRACE_IS_ON(TRACE_ENTRYEXIT, BASE_ENTEREXIT_LEVEL + level)) |
---|
3443 | TRACE1(TRACE_ENTRYEXIT, 1, TRCID_KTRACE_LINUX_EXIT, |
---|
3444 | "<--K %s\n", funcnameP); |
---|
3445 | #endif /* ENTRYEXIT_TRACE */ |
---|
3446 | |
---|
3447 | #ifdef KSTACK_CHECK |
---|
3448 | /* Nothing to do if kernel stack checking is disabled */ |
---|
3449 | if (!SHG.shActive) |
---|
3450 | return; |
---|
3451 | |
---|
3452 | /* Do not attempt to keep track of stack usage in interrupt handlers */ |
---|
3453 | if (in_interrupt()) |
---|
3454 | return; |
---|
3455 | |
---|
3456 | /* Process routine exit */ |
---|
3457 | fdExit(funcnameP); |
---|
3458 | #endif /* KSTACK_CHECK */ |
---|
3459 | } |
---|
3460 | void cxiTraceExitRC(int level, const char * funcnameP, int rc) |
---|
3461 | { |
---|
3462 | #ifdef ENTRYEXIT_TRACE |
---|
3463 | /* Need to use a constant trace level in the TRACE macro call to get |
---|
3464 | the .trclst file (and later the .trcfmt file) built correctly */ |
---|
3465 | if (_TRACE_IS_ON(TRACE_ENTRYEXIT, BASE_ENTEREXIT_LEVEL + level)) |
---|
3466 | TRACE2(TRACE_ENTRYEXIT, 1, TRCID_KTRACE_LINUX_EXIT_RC, |
---|
3467 | "<--K %s rc %d\n", funcnameP, rc); |
---|
3468 | #endif /* ENTRYEXIT_TRACE */ |
---|
3469 | |
---|
3470 | #ifdef KSTACK_CHECK |
---|
3471 | /* Nothing to do if kernel stack checking is disabled */ |
---|
3472 | if (!SHG.shActive) |
---|
3473 | return; |
---|
3474 | |
---|
3475 | /* Do not attempt to keep track of stack usage in interrupt handlers */ |
---|
3476 | if (in_interrupt()) |
---|
3477 | return; |
---|
3478 | |
---|
3479 | /* Process routine exit */ |
---|
3480 | fdExit(funcnameP); |
---|
3481 | #endif /* KSTACK_CHECK */ |
---|
3482 | } |
---|
3483 | #endif /* defined(ENTRYEXIT_TRACE) || defined(KSTACK_CHECK) */ |
---|
3484 | |
---|
3485 | |
---|
3486 | #ifdef UIDREMAP |
---|
3487 | size_t cxiGetUserEnvironmentSize(void) |
---|
3488 | { |
---|
3489 | return (current->mm->env_end - current->mm->env_start); |
---|
3490 | } |
---|
3491 | |
---|
3492 | int cxiGetUserEnvironment(char* buf, size_t len) |
---|
3493 | { |
---|
3494 | return cxiCopyIn((char*)current->mm->env_start, buf, len); |
---|
3495 | } |
---|
3496 | #endif |
---|
3497 | |
---|
3498 | Boolean cxiHasMountHelper() |
---|
3499 | { |
---|
3500 | return USING_MOUNT_HELPER(); |
---|
3501 | } |
---|
3502 | |
---|
3503 | #ifdef P_NFS4 |
---|
3504 | |
---|
3505 | #include <linux/nfsd/nfs4layoutxdr.h> |
---|
3506 | |
---|
3507 | /* convert ip address to string */ |
---|
3508 | char *IPtoString(int ip, char *buf) |
---|
3509 | { |
---|
3510 | unsigned char *a = (char *)&ip; |
---|
3511 | |
---|
3512 | sprintf(buf, "%u.%u.%u.%u", a[0], a[1], a[2], a[3]); |
---|
3513 | |
---|
3514 | return buf; |
---|
3515 | } |
---|
3516 | |
---|
3517 | static void printfh(char *s, int *fh) |
---|
3518 | { |
---|
3519 | #ifdef GPFS_PRINTK |
---|
3520 | printk("%s: %d: %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
---|
3521 | s, fh[0],fh[1],fh[2],fh[3],fh[4],fh[5],fh[6],fh[7],fh[8],fh[9]); |
---|
3522 | #endif |
---|
3523 | } |
---|
3524 | |
---|
3525 | int cxiSetFH(int *fhP, int sid) |
---|
3526 | { |
---|
3527 | struct knfsd_fh *fh = (struct knfsd_fh *)fhP; |
---|
3528 | |
---|
3529 | printfh("cxiSetFH-1", fhP); |
---|
3530 | if (fh->fh_size > 8) { |
---|
3531 | fh->fh_size += 4; // fh_size + 4 for sid |
---|
3532 | fh->fh_fsid_type += max_fsid_type; |
---|
3533 | fhP[(fh->fh_size >> 2)] = sid; |
---|
3534 | fh->fh_fileid_type = 7; // see code in gpfs_decode_fh() |
---|
3535 | #ifdef GPFS_PRINTK |
---|
3536 | printk("cxiSetFH size %d fsid_type %d fileid %d\n", |
---|
3537 | fh->fh_size, fh->fh_fsid_type, fh->fh_fileid_type); |
---|
3538 | #endif |
---|
3539 | printfh("cxiSetFH-2", fhP); |
---|
3540 | return 0; |
---|
3541 | } |
---|
3542 | return ENOENT; |
---|
3543 | } |
---|
3544 | |
---|
3545 | /* Call to NFS server on MDS to get open state */ |
---|
3546 | int cxiOpenState(void *vfsP, void *p) |
---|
3547 | { |
---|
3548 | int rc = ENOENT; |
---|
3549 | struct super_block *sbP = (struct super_block *)vfsP; |
---|
3550 | struct pnfs_get_state *osP = p; |
---|
3551 | struct gpfsVfsData_t *privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP); |
---|
3552 | |
---|
3553 | #ifdef GPFS_PRINTK |
---|
3554 | printk("cxiOpenState1 sb %p p %p \n", sbP, p); |
---|
3555 | printk("cxiOpenState cb_get_state %p\n", |
---|
3556 | sbP->s_export_op->cb_get_state); |
---|
3557 | #endif |
---|
3558 | if (sbP->s_export_op->cb_get_state) |
---|
3559 | rc = sbP->s_export_op->cb_get_state(osP); |
---|
3560 | |
---|
3561 | gpfs_ops.gpfsGetVerifier(privVfsP, osP->verifier); |
---|
3562 | #ifdef GPFS_PRINTK |
---|
3563 | printk("cxiOpenState rc %d devid %x verifier %x:%x\n", |
---|
3564 | rc, osP->devid, osP->verifier[0], osP->verifier[1]); |
---|
3565 | #endif |
---|
3566 | |
---|
3567 | return rc; |
---|
3568 | } |
---|
3569 | /* Call to NFS server on DS to get change open state or close the file */ |
---|
3570 | int cxiChangeState(void *vfsP, void *p) |
---|
3571 | { |
---|
3572 | int rc = ENOENT; |
---|
3573 | struct super_block *sbP = (struct super_block *)vfsP; |
---|
3574 | struct pnfs_get_state *osP = p; |
---|
3575 | |
---|
3576 | if (sbP->s_export_op->cb_change_state) |
---|
3577 | rc = sbP->s_export_op->cb_change_state(osP); |
---|
3578 | #ifdef GPFS_PRINTK |
---|
3579 | printk("cxiChangeState2 sb %p p %p access %d\n", sbP, p, osP->access); |
---|
3580 | #endif |
---|
3581 | |
---|
3582 | return rc; |
---|
3583 | } |
---|
3584 | /* Call to NFS server on MDS to recall layout */ |
---|
3585 | int cxiRecallLayout(void *vfsP, void *vP, void *p) |
---|
3586 | { |
---|
3587 | int rc = ENOENT; |
---|
3588 | struct super_block *sbP = (struct super_block *)vfsP; |
---|
3589 | struct inode *iP = (struct inode *)vP; |
---|
3590 | struct layout_recall lr; |
---|
3591 | |
---|
3592 | lr.fsid = sbP; |
---|
3593 | lr.offset = 0; |
---|
3594 | lr.length = -1; |
---|
3595 | |
---|
3596 | if (iP == NULL) // recall all layouts for this fs |
---|
3597 | lr.layout_type = RECALL_FSID; |
---|
3598 | |
---|
3599 | #ifdef GPFS_PRINTK |
---|
3600 | printk("cxiRecallLayout sbP %p type %d\n", sbP, lr.layout_type); |
---|
3601 | #endif |
---|
3602 | if (sbP->s_export_op->cb_layout_recall) { |
---|
3603 | rc = sbP->s_export_op->cb_layout_recall(sbP, iP, &lr); |
---|
3604 | } |
---|
3605 | else { |
---|
3606 | lr.layout_type = RECALL_FILE; |
---|
3607 | #ifdef GPFS_PRINTK |
---|
3608 | printk("cxiRecallLayout sbP %p iP %p type %d\n", sbP, iP, lr.layout_type); |
---|
3609 | #endif |
---|
3610 | } |
---|
3611 | |
---|
3612 | #ifdef GPFS_PRINTK |
---|
3613 | printk("cxiRecallLayout sbP %p iP %p rc %d\n", sbP, iP, rc); |
---|
3614 | #endif |
---|
3615 | return rc; |
---|
3616 | } |
---|
3617 | |
---|
3618 | /* Get device list |
---|
3619 | |
---|
3620 | gd_type |
---|
3621 | in: requested layout type. |
---|
3622 | out: available lauout type. |
---|
3623 | gd_cookie |
---|
3624 | in: cookie returned on the last operation. |
---|
3625 | out: none zero cookie if some devices did not fit in the buffer. |
---|
3626 | gd_maxcount |
---|
3627 | in: buffer size in bytes. |
---|
3628 | gd_buffer |
---|
3629 | in: pointer to buffer. |
---|
3630 | gd_devlist_len |
---|
3631 | out: number of items returned in the buffer. |
---|
3632 | |
---|
3633 | error: |
---|
3634 | Use the same retrun codes as used for GTEDEVLIST |
---|
3635 | */ |
---|
3636 | int |
---|
3637 | cxiGetDeviceList(int nDests, int *idList, void *P) |
---|
3638 | { |
---|
3639 | ENTER(0); |
---|
3640 | int rc = 0; |
---|
3641 | int i, len, left; |
---|
3642 | int j = 0; |
---|
3643 | char *p, *tp; |
---|
3644 | char tmp[32]; |
---|
3645 | struct nfsd4_pnfs_getdevlist *dl = (struct nfsd4_pnfs_getdevlist *)P; |
---|
3646 | struct nfsd4_pnfs_devlist *gd_buf = NULL; |
---|
3647 | struct pnfs_filelayout_devaddr *dev; |
---|
3648 | |
---|
3649 | #ifdef GPFS_PRINTK |
---|
3650 | printk("xxx cxiGetDeviceList enter nDests %d idList %p \n", nDests, idList); |
---|
3651 | #endif |
---|
3652 | |
---|
3653 | dl->gd_type = LAYOUT_NFSV4_FILES; |
---|
3654 | dl->gd_cookie = 0; |
---|
3655 | dl->gd_devlist_len = 0; |
---|
3656 | left = dl->gd_maxcount; |
---|
3657 | tp = &tmp[0]; |
---|
3658 | |
---|
3659 | len = sizeof(struct nfsd4_pnfs_devlist) * nDests; |
---|
3660 | #ifdef GPFS_PRINTK |
---|
3661 | printk("xxx cxiGetDeviceList len %d left %d\n", len, left); |
---|
3662 | #endif |
---|
3663 | if (nDests > left) { |
---|
3664 | rc = ENOMEM; //??? NFS4ERR_TOOSMALL |
---|
3665 | goto xerror; |
---|
3666 | } |
---|
3667 | gd_buf = (struct nfsd4_pnfs_devlist *)cxiMallocUnpinned(len); |
---|
3668 | if (gd_buf == NULL) { |
---|
3669 | rc = ENOMEM; |
---|
3670 | goto xerror; |
---|
3671 | } |
---|
3672 | memset(gd_buf, 0, len); |
---|
3673 | dl->gd_devlist = gd_buf; |
---|
3674 | |
---|
3675 | #ifdef GPFS_PRINTK |
---|
3676 | printk("xxx cxiGetDeviceList gd_buf %p count %d\n", gd_buf, nDests); |
---|
3677 | #endif |
---|
3678 | for (i = 0; i < nDests; i++) |
---|
3679 | { |
---|
3680 | /* make both device id and device address be the same for now */ |
---|
3681 | gd_buf[j].dev_id = idList[i]; |
---|
3682 | gd_buf[j].dev_lotype = LAYOUT_NFSV4_FILES; |
---|
3683 | if (gd_buf[j].dev_id == INADDR_NONE) |
---|
3684 | continue; |
---|
3685 | |
---|
3686 | IPtoString(gd_buf[j].dev_id, tp); |
---|
3687 | len = (cxiStrlen(tp)); |
---|
3688 | |
---|
3689 | p = (char *)cxiMallocUnpinned(sizeof(struct pnfs_filelayout_devaddr)); |
---|
3690 | if (p == NULL) { |
---|
3691 | rc = ENOMEM; |
---|
3692 | goto xerror; |
---|
3693 | } |
---|
3694 | memset(p, 0, sizeof(struct pnfs_filelayout_devaddr)); |
---|
3695 | gd_buf[j].dev_addr = p; |
---|
3696 | |
---|
3697 | dev = (struct pnfs_filelayout_devaddr *)p; |
---|
3698 | dev->r_addr.len = len + 4; /* for ".8.1" */ |
---|
3699 | |
---|
3700 | p = (char *)cxiMallocUnpinned(dev->r_addr.len+1); |
---|
3701 | if (p == NULL) { |
---|
3702 | rc = ENOMEM; |
---|
3703 | goto xerror; |
---|
3704 | } |
---|
3705 | dev->r_addr.data = p; |
---|
3706 | cxiMemcpy(p, tp, len); |
---|
3707 | p = p + len; |
---|
3708 | cxiStrcpy(p, ".8.1"); /* port 2049 = 0x801 = "8.1" */ |
---|
3709 | |
---|
3710 | dev->r_netid.len = 3; /*'tcp'*/ |
---|
3711 | p = (char *)cxiMallocUnpinned(dev->r_netid.len+1); |
---|
3712 | if (p == NULL) { |
---|
3713 | rc = ENOMEM; |
---|
3714 | goto xerror; |
---|
3715 | } |
---|
3716 | cxiStrcpy(p, "tcp"); |
---|
3717 | dev->r_netid.data = p; |
---|
3718 | |
---|
3719 | left = left - 1; |
---|
3720 | dl->gd_devlist_len++; |
---|
3721 | |
---|
3722 | TRACE4(TRACE_VNODE, 2, TRCID_CXI_GET_DEVICELIST_P1, |
---|
3723 | "gpfsGetDeviceList index %d len %d ip %s left %d\n", |
---|
3724 | i, dev->r_addr.len, dev->r_addr.data, left); |
---|
3725 | #ifdef GPFS_PRINTK |
---|
3726 | printk("xxx cxiGetDeviceList index %d id %d len %d ip %s left %d ops %p %p\n", |
---|
3727 | i, gd_buf[j].dev_id, dev->r_addr.len, |
---|
3728 | dev->r_addr.data, left, dl->gd_ops, dl->gd_ops->devaddr_encode); |
---|
3729 | #endif |
---|
3730 | |
---|
3731 | j++; |
---|
3732 | } |
---|
3733 | |
---|
3734 | exit: |
---|
3735 | |
---|
3736 | TRACE2(TRACE_VNODE, 2, TRCID_CXI_GET_DEVICELIST_EXIT, |
---|
3737 | "cxiGetDeviceList exit: rc %d len %d", rc, len); |
---|
3738 | return rc; |
---|
3739 | |
---|
3740 | xerror: |
---|
3741 | |
---|
3742 | if (gd_buf != NULL) { |
---|
3743 | for (i = 0; i < j; i++) |
---|
3744 | { |
---|
3745 | dev = gd_buf[i].dev_addr; |
---|
3746 | if (dev) { |
---|
3747 | cxiFreeUnpinned(dev->r_addr.data); |
---|
3748 | cxiFreeUnpinned(dev->r_netid.data); |
---|
3749 | cxiFreeUnpinned(dev); |
---|
3750 | } |
---|
3751 | } |
---|
3752 | cxiFreeUnpinned(gd_buf); |
---|
3753 | } |
---|
3754 | goto exit; |
---|
3755 | } |
---|
3756 | |
---|
3757 | int |
---|
3758 | cxiGetDeviceInfo(void *P) |
---|
3759 | { |
---|
3760 | ENTER(0); |
---|
3761 | int rc; |
---|
3762 | int len; |
---|
3763 | char *p, *tp; |
---|
3764 | char tmp[32]; |
---|
3765 | struct nfsd4_pnfs_getdevinfo *da = (struct nfsd4_pnfs_getdevinfo *)P; |
---|
3766 | tp = &tmp[0]; |
---|
3767 | struct pnfs_filelayout_devaddr *dev; |
---|
3768 | |
---|
3769 | IPtoString(da->gd_dev_id, tp); |
---|
3770 | |
---|
3771 | dev = (struct pnfs_filelayout_devaddr *)cxiMallocUnpinned(sizeof(struct pnfs_filelayout_devaddr)); |
---|
3772 | if (dev == NULL) { |
---|
3773 | rc = ENOMEM; |
---|
3774 | goto xerror; |
---|
3775 | } |
---|
3776 | da->gd_devaddr = dev; |
---|
3777 | |
---|
3778 | len = (cxiStrlen(tp)); |
---|
3779 | dev->r_addr.len = len + 4; /* for ".8.1" */ |
---|
3780 | |
---|
3781 | p = (char *)cxiMallocUnpinned(dev->r_addr.len+1); |
---|
3782 | if (p == NULL) { |
---|
3783 | cxiFreeUnpinned(dev); |
---|
3784 | rc = ENOMEM; |
---|
3785 | goto xerror; |
---|
3786 | } |
---|
3787 | dev->r_addr.data = p; |
---|
3788 | cxiMemcpy(p, tp, len); |
---|
3789 | p = p + len; |
---|
3790 | cxiStrcpy(p, ".8.1"); /* port 2049 = 0x801 = "8.1" */ |
---|
3791 | |
---|
3792 | dev->r_netid.len = 3; /*'tcp'*/ |
---|
3793 | p = (char *)cxiMallocUnpinned(dev->r_netid.len+1); |
---|
3794 | if (p == NULL) { |
---|
3795 | cxiFreeUnpinned(dev->r_addr.data); |
---|
3796 | cxiFreeUnpinned(dev); |
---|
3797 | rc = ENOMEM; |
---|
3798 | goto xerror; |
---|
3799 | } |
---|
3800 | cxiStrcpy(p, "tcp"); |
---|
3801 | dev->r_netid.data = p; |
---|
3802 | |
---|
3803 | TRACE2(TRACE_VNODE, 2, TRCID_GPFSOPS_GET_DEVICELINFO_P1, |
---|
3804 | "gpfsGetDeviceInfo len %d ip %s\n", |
---|
3805 | dev->r_addr.len, dev->r_addr.data); |
---|
3806 | |
---|
3807 | #ifdef GPFS_PRINTK |
---|
3808 | printk("xxx cxiGetDeviceInfo id %d len %d ip %s\n", |
---|
3809 | da->gd_dev_id, dev->r_addr.len, dev->r_addr.data); |
---|
3810 | #endif |
---|
3811 | |
---|
3812 | xerror: |
---|
3813 | |
---|
3814 | TRACE1(TRACE_VNODE, 2, TRCID_CXI_GET_DEVICELINFO_EXIT, |
---|
3815 | "cxiGetDeviceInfo exit: rc %d\n", rc); |
---|
3816 | |
---|
3817 | return rc; |
---|
3818 | } |
---|
3819 | /* get layout |
---|
3820 | lg_type |
---|
3821 | in: requested layout type. |
---|
3822 | out: available lauout type. |
---|
3823 | lg_offset |
---|
3824 | in: requested offset. |
---|
3825 | out: returned offset. |
---|
3826 | lg_length |
---|
3827 | in: requested length. |
---|
3828 | out: returned length. |
---|
3829 | lg_mxcnt |
---|
3830 | in: buffer size in bytes. |
---|
3831 | lg_llist |
---|
3832 | in: pointer to buffer. |
---|
3833 | lg_layout |
---|
3834 | out: number of items returned in the buffer. |
---|
3835 | |
---|
3836 | if the file is big(?) return all nodes in layout |
---|
3837 | if the file is small return no layout or just one node, choose one node in |
---|
3838 | random but make sure it is the same node for the same file. |
---|
3839 | */ |
---|
3840 | int |
---|
3841 | cxiGetLayout(int nDests, int *idList, cxiVattr_t *vattr, int myAddr, void *P) |
---|
3842 | { |
---|
3843 | ENTER(0); |
---|
3844 | char *p, *n; |
---|
3845 | int i, rc, left, len; |
---|
3846 | struct nfsd4_pnfs_layoutget *gl = (struct nfsd4_pnfs_layoutget *)P; |
---|
3847 | struct nfsd4_pnfs_layoutlist *lg_buf = NULL; |
---|
3848 | struct nfsd4_pnfs_filelayout *layout = NULL; |
---|
3849 | |
---|
3850 | TRACE2(TRACE_VNODE, 2, TRCID_CXI_GET_LAYOUT_ENTER, |
---|
3851 | "cxiGetLayout: nDests %d myAddr %x\n", nDests,myAddr); |
---|
3852 | |
---|
3853 | /* set node id in fh and increase fh size by 4 */ |
---|
3854 | rc = cxiSetFH((int *)&gl->lg_fh, myAddr); |
---|
3855 | if (rc != 0) |
---|
3856 | goto xerror; |
---|
3857 | |
---|
3858 | gl->lg_type = LAYOUT_NFSV4_FILES; |
---|
3859 | gl->lg_offset = 0; |
---|
3860 | gl->lg_length = MAX_UINT64; /* The maximum file size */ |
---|
3861 | |
---|
3862 | layout = (struct nfsd4_pnfs_filelayout *)cxiMallocUnpinned(sizeof(struct nfsd4_pnfs_filelayout)); |
---|
3863 | if (layout == NULL) { |
---|
3864 | rc = ENOMEM; |
---|
3865 | goto xerror; |
---|
3866 | } |
---|
3867 | gl->lg_layout = layout; |
---|
3868 | layout->lg_stripe_type = STRIPE_DENSE; |
---|
3869 | layout->lg_commit_through_mds = true; |
---|
3870 | layout->lg_stripe_unit = vattr->va_blocksize; /* preferred blocksize */ |
---|
3871 | layout->lg_file_size = vattr->va_size; /* file size in bytes */ |
---|
3872 | layout->lg_llistlen = 0; |
---|
3873 | |
---|
3874 | left = gl->lg_mxcnt; |
---|
3875 | |
---|
3876 | len = sizeof(struct nfsd4_pnfs_layoutlist) * nDests; |
---|
3877 | if (len > left) { |
---|
3878 | rc = ENOMEM; // NFS4ERR_TOOSMALL |
---|
3879 | goto xerror; |
---|
3880 | } |
---|
3881 | lg_buf = (struct nfsd4_pnfs_layoutlist *)cxiMallocUnpinned(len); |
---|
3882 | if (lg_buf == NULL) { |
---|
3883 | rc = ENOMEM; |
---|
3884 | goto xerror; |
---|
3885 | } |
---|
3886 | memset(lg_buf, 0, len); |
---|
3887 | layout->lg_llist = lg_buf; |
---|
3888 | left = left - len; |
---|
3889 | |
---|
3890 | for (i = 0; i < nDests; i++) |
---|
3891 | { |
---|
3892 | /* make both device id and device address be the same for now */ |
---|
3893 | lg_buf[i].dev_ids.len = 1; //??? can return a list of dev ids ???? |
---|
3894 | lg_buf[i].dev_ids.list = (u32 *)cxiMallocUnpinned(sizeof(u32)*lg_buf[i].dev_ids.len); |
---|
3895 | if (lg_buf[i].dev_ids.list == NULL) { |
---|
3896 | rc = ENOMEM; |
---|
3897 | goto xerror; |
---|
3898 | } |
---|
3899 | lg_buf[i].dev_ids.list[0] = idList[i]; |
---|
3900 | layout->lg_llistlen++; |
---|
3901 | lg_buf[i].fhp = (struct knfsd_fh *)&gl->lg_fh; |
---|
3902 | |
---|
3903 | #ifdef GPFS_PRINTK |
---|
3904 | printk("cxiGetLayout index %d id %d xid 0x%lX len %d\n", |
---|
3905 | i, idList[i], idList[i], len); |
---|
3906 | #endif |
---|
3907 | TRACE3(TRACE_VNODE, 2, TRCID_CXI_GET_LAYOUT_P1, |
---|
3908 | "cxiGetLayout index %d id 0x%lX len %d\n", |
---|
3909 | i, idList[i], len); |
---|
3910 | |
---|
3911 | } |
---|
3912 | if (i == 0) { |
---|
3913 | layout->lg_llistlen = 0; |
---|
3914 | cxiFreeUnpinned(lg_buf); |
---|
3915 | } |
---|
3916 | |
---|
3917 | #ifdef GPFS_PRINTK |
---|
3918 | printk("cxiGetLayout: type %d iomode %d offset %lld length %lld minlength %lld mxcnt %d ops %p layouts %p\n", |
---|
3919 | gl->lg_type, gl->lg_iomode, gl->lg_offset, gl->lg_length, gl->lg_minlength, |
---|
3920 | gl->lg_mxcnt, gl->lg_ops, gl->lg_layout); |
---|
3921 | |
---|
3922 | printfh("cxiGetLayout:", gl->lg_fh); |
---|
3923 | |
---|
3924 | printk("cxiGetLayout: layout stripe_type %d stripe_unit %lld file_size %lld llistlen %d llist %p\n", |
---|
3925 | layout->lg_stripe_type, layout->lg_stripe_unit,layout->lg_file_size, |
---|
3926 | layout->lg_llistlen,layout->lg_llist); |
---|
3927 | #endif |
---|
3928 | |
---|
3929 | exit: |
---|
3930 | |
---|
3931 | TRACE3(TRACE_VNODE, 2, TRCID_CXI_GET_LAYOUT_EXIT, |
---|
3932 | "cxiGetLayout exit: rc %d len %d p 0x%lX", rc, len, p); |
---|
3933 | |
---|
3934 | return rc; |
---|
3935 | |
---|
3936 | xerror: |
---|
3937 | |
---|
3938 | if (lg_buf) { |
---|
3939 | gl->lg_length = 0; |
---|
3940 | for (i = 0; i < nDests; i++) |
---|
3941 | { |
---|
3942 | cxiFreeUnpinned(lg_buf[i].dev_ids.list); |
---|
3943 | } |
---|
3944 | cxiFreeUnpinned(lg_buf); |
---|
3945 | } |
---|
3946 | if (layout) |
---|
3947 | cxiFreeUnpinned(layout); |
---|
3948 | |
---|
3949 | goto exit; |
---|
3950 | } |
---|
3951 | #endif |
---|
3952 | |
---|
3953 | int cxiCheckThreadState(cxiThreadId tid) |
---|
3954 | { |
---|
3955 | struct task_struct *t, *g; |
---|
3956 | int rc = ENOENT; |
---|
3957 | |
---|
3958 | // read_lock(&tasklist_lock); |
---|
3959 | rcu_read_lock(); |
---|
3960 | |
---|
3961 | DO_EACH_THREAD(g,t) |
---|
3962 | { |
---|
3963 | /* We are looking for a thread with a given tid and the same parent as |
---|
3964 | the caller (the caller must be another mmfsd thread */ |
---|
3965 | if (t->pid == tid && |
---|
3966 | cxiStrcmp(t->comm, current->comm) == 0) |
---|
3967 | { |
---|
3968 | rc = 0; |
---|
3969 | break; |
---|
3970 | } |
---|
3971 | } WHILE_EACH_THREAD(g,t); |
---|
3972 | // read_unlock(&tasklist_lock); |
---|
3973 | rcu_read_unlock(); |
---|
3974 | |
---|
3975 | return rc; |
---|
3976 | } |
---|
3977 | |
---|