Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

super.c @ 65

Last change on this file since 65 was 16, checked in by rock, 17 years ago

File size: 71.0 KB

Line
1	/***************************************************************************
2	*
3	* Copyright (C) 2001 International Business Machines
4	* All rights reserved.
5	*
6	* This file is part of the GPFS mmfslinux kernel module.
7	*
8	* Redistribution and use in source and binary forms, with or without
9	* modification, are permitted provided that the following conditions
10	* are met:
11	*
12	* 1. Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* 2. Redistributions in binary form must reproduce the above copyright
15	* notice, this list of conditions and the following disclaimer in the
16	* documentation and/or other materials provided with the distribution.
17	* 3. The name of the author may not be used to endorse or promote products
18	* derived from this software without specific prior written
19	* permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27	* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28	* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29	* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30	* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31	*
32	*************************************************************************** */
33	/* @(#)24 1.157.1.8 src/avs/fs/mmfs/ts/kernext/gpl-linux/super.c, mmfs, avs_rgpfs24, rgpfs24s011a 4/5/07 11:15:55 */
34	/*
35	* Superblock operations
36	*
37	* Contents:
38	* TraceBKL
39	* gpfs_s_read_inode2
40	* gpfs_s_read_inode
41	* gpfs_s_delete_inode
42	* gpfs_s_notify_change
43	* gpfs_s_put_super
44	* gpfs_s_statfs
45	* gpfs_s_umount_begin
46	* gpfs_s_remount
47	* gpfs_s_write_inode
48	* gpfs_s_clear_inode
49	* gpfs_s_write_super
50	* gpfs_s_fs_locations
51	* gpfs_fill_super
52	* gpfs_reg_fs
53	* gpfs_unreg_fs
54	* kill_mmfsd
55	* get_myinode
56	* exec_mmfs
57	* fork_mount_helper
58	* vfsUserCleanup
59	* cxiSetMountInfo
60	* cxiUnmount
61	* cxiReactivateOSNode
62	* cxiNewOSNode
63	* cxiFreeOSNode
64	* cxiDeleteMmap
65	* cxiReinitOSNode
66	* cxiFindOSNode
67	* cxiDumpOSNode
68	* cxiRefOSNode
69	* cxiInactiveOSNode
70	* cxiPutOSNode
71	* cxiDestroyOSNode
72	* cxiSetOSNodeType
73	* cxiUpdateInode
74	* cxiCanUncacheOSNode
75	* cxiAddOSNode
76	*
77	*/
78
79	#include <Shark-gpl.h>
80
81	#include <linux/string.h>
82	#include <linux/module.h>
83	#include <linux/errno.h>
84	#include <linux/fs.h>
85	#include <linux/smp_lock.h>
86
87	#ifndef GPFS_ARCH_X86_64
88	#define __KERNEL_SYSCALLS__
89	#endif
90	#include <linux/unistd.h>
91	#include <asm/uaccess.h> /* KERNEL_DS */
92
93	#define FOOBAR #error Do not do this
94
95	/* GPFS headers */
96	#include <verdep.h>
97	#include <linux2gpfs.h>
98	#include <cxiSystem.h>
99	#include <cxiTypes.h>
100	#include <cxiAtomic.h>
101	#include <cxi2gpfs.h>
102	#include <cxiIOBuffer.h>
103	#include <cxiSharedSeg.h>
104	#include <cxiCred.h>
105	#include <linux2gpfs.h>
106	#include <Trace.h>
107	#include <cxiVFSStats.h>
108	#include <linux/kmod.h>
109	#if LINUX_KERNEL_VERSION > 2060000
110	#include <linux/wait.h>
111	#endif
112
113	/* forward declaration */
114	int vfsUserCleanup(struct super_block *sbP,
115	struct gpfsVfsData_t *privVfsP, Boolean force);
116
117	extern struct file_system_type gpfs_fs_type;
118
119	static DECLARE_WAIT_QUEUE_HEAD(pwq);
120
121	int mmfsd_module_active = 0;
122	static int mmfsd_id = -1;
123	static int mount_id = -1;
124	char mountCmd[CXI_MAXPATHLEN+1] = "M ";
125	char mmfs_path[CXI_MAXPATHLEN+1] = "";
126	char bin_path[CXI_MAXPATHLEN+1];
127	static char mount_opt[CXI_MAXPATHLEN+1];
128
129	static unsigned int unusedInodeNum = 1;
130	static struct inode *unusedInodeP = NULL;
131	static struct super_block *unusedSuperP = NULL;
132	struct super_block *shutdownSuperP = NULL;
133
134	static spinlock_t inode_lock;
135
136	/* Routine to trace whether kernel lock is held */
137	#ifdef VERBOSETRACE
138	void TraceBKL()
139	{
140	TRACE2(TRACE_VNODE, 10, TRCID_VNODE_BKL,
141	"BKL %d lock_depth %d\n", kernel_locked(), current->lock_depth);
142	}
143	#endif
144
145	#include <linux/pagemap.h>
146
147	#if HAS_SOP_ALLOC_INODE
148	static struct kmem_cache * gpfsInodeCacheP;
149	struct gpfs_bloated_inode
150	{
151	struct inode inode;
152	char cxiNode[CXINODE_SIZE];
153	};
154
155	static void
156	gpfs_init_once(void * iP, struct kmem_cache * cacheP, unsigned long flags)
157	{
158	if ((flags & (SLAB_CTOR_VERIFY\|SLAB_CTOR_CONSTRUCTOR)) ==
159	SLAB_CTOR_CONSTRUCTOR)
160	inode_init_once((struct inode *)iP);
161	}
162
163	int
164	gpfs_init_inodecache(void)
165	{
166	gpfsInodeCacheP = kmem_cache_create("gpfsInodeCache",
167	sizeof(struct gpfs_bloated_inode), 0,
168	SLAB_HWCACHE_ALIGN\|SLAB_RECLAIM_ACCOUNT,
169	gpfs_init_once, NULL);
170	if (gpfsInodeCacheP == NULL)
171	return -ENOMEM;
172	return 0;
173	}
174
175	struct inode *
176	gpfs_alloc_inode(struct super_block *sbP)
177	{
178	struct inode * iP;
179
180	iP = (struct inode *)kmem_cache_alloc(gpfsInodeCacheP, GFP_KERNEL);
181	TRACE1N(TRACE_VNODE, 1, TRCID_LINUXOPS_GPFS_ALLOC_INODE_EXIT,
182	"gpfs_alloc_inode: inode 0x%lX\n", iP);
183	return iP;
184	}
185
186	void
187	gpfs_destroy_inode(struct inode *iP)
188	{
189	TRACE1N(TRACE_VNODE, 1, TRCID_LINUXOPS_GPFS_DESTROY_INODE,
190	"gpfs_destroy_inode: inode 0x%lX\n", (void *)iP);
191	kmem_cache_free(gpfsInodeCacheP, (void *)iP);
192	}
193
194	void
195	gpfs_destroy_inodecache(void)
196	{
197	while (kmem_cache_shrink(gpfsInodeCacheP) != 0)
198	cxiSleep(40);
199	kmem_cache_destroy(gpfsInodeCacheP);
200	}
201
202	#endif /* HAS_SOP_ALLOC_INODE */
203
204	/* This routine is called from iget() just after allocating a new inode.
205	This is a variant of the normal read_inode operation that allows passing an
206	opaque parameter through iget4 into read_inode2. We need the parameter to
207	know whether read_inode2 is being called from a normal lookup opration,
208	where we are already holding a distributed lock on the file, or from nfs
209	calling iget, where we need to get the lock inside of read_inode2.
210
211	Note: In the Linux source the call to read_inode2 is labelled a "reiserfs
212	specific hack" with the additional warning "We don't want this to last, and
213	are looking for VFS changes that will allow us to get rid of it." If and
214	when such a change is made, we will hopefully be able to adapt our code
215	accordingly. Otherwise, if read_inode2 goes away without a suitable
216	replacement, we will have to use a more expensive approach, e.g., a global
217	table where lookup would leave some state before calling iget. */
218	void
219	gpfs_s_read_inode2(struct inode iP, void opaque)
220	{
221	struct gpfsVfsData_t *privVfsP;
222	ino_t inum = iP->i_ino;
223	cxiNode_t *cnP;
224	int rc;
225
226	ENTER(0);
227	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_ENTER,
228	"gpfs_s_read_inode2 enter: inode 0x%lX inode %d\n",
229	iP, inum);
230	/* BKL is sometimes held at entry */
231
232	#if HAS_SOP_ALLOC_INODE
233	cnP = (cxiNode_t )&((struct gpfs_bloated_inode )iP)->cxiNode;
234	#else
235	/* allocate cxiNode_t */
236	if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
237	{
238	/* need to allocate separate storage for the cxiNode_t */
239	cnP = (cxiNode_t *)cxiMallocUnpinned(CXINODE_SIZE);
240	if (cnP == NULL)
241	goto exit_bad;
242	}
243	else
244	{
245	/* we can store the cxiNode_t in the part of the iP->u
246	* union after the PRVINODE field
247	*/
248	cnP = (cxiNode_t *)(&iP->PRVINODE + 1);
249	}
250	#endif
251
252	memset(cnP, 0, CXINODE_SIZE);
253
254	/*TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE_1,
255	"gpfs_s_read_inode2: iP 0x%lX cnP 0x%lX uSize-void* %d nodeSize %d",
256	iP, cnP, sizeof(iP->PRVINODE) - sizeof(void *), CXINODE_SIZE);
257
258	*/
259	/* connect cxiNode_t to struct inode */
260	cnP->osNodeP = iP;
261	iP->PRVINODE = cnP;
262
263	/* get inode attributes */
264	privVfsP = VP_TO_PVP(iP);
265	rc = gpfs_ops.gpfsInodeRead(privVfsP, cnP, inum, opaque);
266
267	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_EXIT,
268	"gpfs_s_read_inode2 exit: inode 0x%lX rc %d",
269	iP, rc);
270
271	if (rc == 0)
272	{
273	EXIT(0);
274	return; // success!
275	}
276
277	/* undo cxiNode_t allocation */
278	cnP->osNodeP = NULL;
279	iP->PRVINODE = NULL;
280
281	#if !HAS_SOP_ALLOC_INODE
282	if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
283	cxiFreeUnpinned(cnP);
284	#endif
285
286	exit_bad:
287	/* make_bad_inode will initialize iP so that all operations return EIO;
288	also set i_nlink to zero so that the bad inode will be thrown out of
289	the cache at the next opportunity */
290	make_bad_inode(iP);
291	iP->i_nlink = 0;
292	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_EXIT_BAD,
293	"gpfs_s_read_inode2 exit: inode 0x%lX rc BADINODE",
294	iP);
295
296	if (rc)
297	cxiErrorNFS(rc);
298
299	EXIT(0);
300	}
301
302	/* The following routine should never be called, since we have a read_inode2
303	operation. However, knfsd checks the operation table and refuses to export
304	a file system if its read_inode operation ptr is NULL. Hence, we need to
305	have one, even if it never gets called. */
306	void
307	gpfs_s_read_inode(struct inode *iP)
308	{
309	/* only iget will use read_inode; this shouldn't happen as long as
310	gpfs_nfsd_iget is being invoked via fh_to_dentry/gpfs_fh_to_dentry */
311	ENTER(0);
312	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_HUH,
313	"gpfs_s_read_inode: ? calling make_bad_inode");
314	make_bad_inode(iP);
315	EXIT(0);
316	}
317
318
319	/* The following routine is called from iput when the i_count goes to zero and
320	the link count in the inode is zero, which presumably means that the file
321	was deleted. If so, we should free the disk space occupied by the file. */
322	void
323	gpfs_s_delete_inode(struct inode *iP)
324	{
325	cxiNode_t *cnP;
326	ext_cred_t eCred;
327	Boolean isGPFS = cxiIsGPFSThread();
328	struct gpfsVfsData_t *privVfsP;
329
330	ENTER(0);
331	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_DELETE_INODE,
332	"gpfs_s_delete_inode enter: inode 0x%lX inode %d gpfsThread %d\n",
333	iP, iP->i_ino, isGPFS);
334	TraceBKL();
335
336	#if MUST_TRUNCATE_INODE_PAGES
337	truncate_inode_pages(&iP->i_data, 0);
338	#endif
339
340	cnP = VP_TO_CNP(iP);
341
342	if (!cnP)
343	{
344	/* The cxiNode_t is allocated in gpfs_s_read_inode2, so if cnP is NULL,
345	this means gpfs_s_read_inode2 failed and has marked this as a bad
346	inode. No further actions necessary in this case. */
347	goto xerror;
348	}
349
350	if (TestCtFlag(cnP, destroyIfDelInode))
351	{
352	privVfsP = VP_TO_PVP(iP);
353	DBGASSERT(privVfsP != NULL);
354
355	/* ?? "eCred is passed all the way to the daemon, and then is ignored
356	there," FBS 5/24/01 */
357	setCred(&eCred);
358
359	gpfs_ops.gpfsInodeDelete(privVfsP, cnP, isGPFS, &eCred);
360
361	iP->PRVINODE = NULL;
362	cnP->osNodeP = NULL;
363
364	#if !HAS_SOP_ALLOC_INODE
365	/* If necessary, free the cxiNode_t structure which was allocated
366	* in gpfs_s_read_inode2.
367	*/
368	if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
369	cxiFreeUnpinned(cnP);
370	#endif
371	}
372
373	xerror:
374	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_DELETE_INODE_1,
375	"gpfs_s_delete_inode exit: inode 0x%lX cnP 0x%lX\n",
376	iP, cnP);
377
378	clear_inode(iP);
379	EXIT(0);
380	}
381
382	int
383	gpfs_s_notify_change(struct dentry dentryP, struct iattr attrP)
384	{
385	int rc;
386
387	ENTER(0);
388	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_NOTIFY_ENTER,
389	"gpfs_s_notify_change enter: inode 0x%lX attr 0x%lX\n",
390	dentryP->d_inode, attrP);
391	TraceBKL();
392
393	rc = gpfs_i_setattr_internal(dentryP->d_inode, attrP);
394
395	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_NOTIFY_EXIT,
396	"gpfs_s_notify_change exit: inode 0x%lX rc %d\n",
397	dentryP->d_inode, rc);
398	EXIT(0);
399	if (rc)
400	return (-rc);
401	return rc;
402	}
403
404	/* put_super is called just before the super_block is freed in do_unmount */
405	void
406	gpfs_s_put_super(struct super_block *sbP)
407	{
408	int rc = 0;
409	struct gpfsVfsData_t *privVfsP;
410
411	ENTER(0);
412	LOGASSERT(sbP != NULL);
413	LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
414	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_PUTSUPER_ENTER,
415	"gpfs_s_put_super enter: sbP 0x%lX sbP->s_dev 0x%X\n",
416	sbP, sbP->s_dev);
417	TraceBKL();
418
419	rc = cxiUnmount(sbP, false, true);
420
421	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_PUTSUPER_EXIT,
422	"gpfs_s_put_super exit: rc %d\n", rc);
423
424	EXIT(0);
425	}
426
427	int
428	gpfs_s_statfs(struct dentry den, struct KSTATFS bufP)
429	{
430	struct super_block *sbP = den->d_sb;
431	int rc;
432	int code = 0;
433	int len = sizeof(struct KSTATFS);
434	struct gpfsVfsData_t privVfsP = (struct gpfsVfsData_t )SBLOCK_PRIVATE(sbP);
435	cxiStatfs_t statfs;
436
437	VFS_STAT_START(statfsCall);
438	ENTER(0);
439	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_STATFS_ENTER,
440	"gpfs_s_statfs enter: sbP 0x%lX len %d\n", sbP, len);
441	memset(bufP, 0, len);
442	/* BKL is held at entry */
443
444	LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
445	LOGASSERT(privVfsP != NULL);
446	rc = gpfs_ops.gpfsStatfs(privVfsP, &statfs);
447	if (rc)
448	{
449	rc = -rc;
450	code = 1;
451	goto xerror;
452	}
453
454	bufP->f_type = GPFS_SUPER_MAGIC;
455	bufP->f_bsize = statfs.f_bsize;
456	bufP->f_blocks = statfs.f_blocks;
457	bufP->f_bfree = statfs.f_bfree;
458	bufP->f_bavail = statfs.f_bavail;
459	bufP->f_files = statfs.f_files;
460	bufP->f_ffree = statfs.f_ffree;
461	bufP->f_namelen = statfs.f_name_max;
462	bufP->f_fsid.val[0] = statfs.f_fsid.val[0];
463	bufP->f_fsid.val[1] = statfs.f_fsid.val[1];
464
465	/* If filesystem size cannot be represented by the OS statfs structure,
466	increase the "block size" and reduce the numbers */
467	if (sizeof(bufP->f_blocks) < sizeof(statfs.f_blocks))
468	{
469	while (bufP->f_blocks != statfs.f_blocks)
470	{
471	statfs.f_bsize <<= 1; // double f_bsize
472	statfs.f_blocks >>= 1; // halve the rest
473	statfs.f_bfree >>= 1;
474	statfs.f_bavail >>= 1;
475	bufP->f_bsize = statfs.f_bsize;
476	bufP->f_blocks = statfs.f_blocks;
477	bufP->f_bfree = statfs.f_bfree;
478	bufP->f_bavail = statfs.f_bavail;
479	}
480	}
481
482	xerror:
483	TRACE7(TRACE_VNODE, 1, TRCID_LINUXOPS_STATFS_EXIT,
484	"gpfs_s_statfs exit: f_blocks %lld f_bfree %lld f_files %d f_free %d "
485	"f_bsize %d code %d rc %d\n",
486	statfs.f_blocks, statfs.f_bfree, bufP->f_files, bufP->f_ffree,
487	bufP->f_bsize, code, rc);
488
489	if (rc)
490	cxiErrorNFS(rc);
491
492	VFS_STAT_STOP;
493	EXIT(0);
494	return rc;
495	}
496
497	/* umount_begin is called only when the force option is used */
498	void
499	#if LINUX_KERNEL_VERSION >= 2061700
500	gpfs_s_umount_begin(struct vfsmount *vfs, int flags)
501	#else
502	gpfs_s_umount_begin(struct super_block * sbP)
503	#endif
504	{
505	int dmrc = 0;
506	struct gpfsVfsData_t *privVfsP;
507	#if LINUX_KERNEL_VERSION >= 2061700
508	struct super_block * sbP;
509	LOGASSERT(vfs != NULL);
510	LOGASSERT(vfs->mnt_sb != NULL);
511	sbP = vfs->mnt_sb;
512	#endif
513
514	ENTER(0);
515	LOGASSERT(sbP != NULL);
516	LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
517	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_UMOUNT_ENTER,
518	"gpfs_s_umount_begin enter: sbP 0x%lX sbP->s_dev 0x%X "
519	"root vfsmount 0x%X pwd vfsmount 0x%X\n", sbP, sbP->s_dev,
520	current->fs ? current->fs->rootmnt : NULL,
521	current->fs ? current->fs->pwdmnt : NULL);
522	TraceBKL();
523
524	privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
525
526	/* We may need to generate a preunmount DMAPI event, since this
527	* is a user initiated force unmount and we need to inform any
528	* DM application before we start flushing out VFS users.
529	*/
530	if (privVfsP)
531	{
532	#ifdef DMAPI
533	Boolean doDMEvents = false;
534	struct dentry *dP = NULL;
535	struct inode *iP = NULL;
536	cxiNode_t *cnP = NULL;
537
538	dP = sbP->s_root;
539	if (dP != NULL)
540	iP = dP->d_inode;
541	if (iP != NULL)
542	cnP = VP_TO_CNP(iP);
543
544	/* Generate preunmount event. We have to present this because
545	* vfsUserCleanup() may potentially kill processes on forced unmount.
546	* Since the DM application may have an open file in this file system
547	* we have to warn him. The DM application may not however receive
548	* the final unmount event if we can't get everything released. If
549	* VFS users still exist after this, then no mntput() and subsequent
550	* gpfs_s_put_super() will occur.
551	*/
552	dmrc = gpfs_ops.gpfsDmUnmountEvent(true, true, privVfsP, cnP,
553	&doDMEvents, NULL, NULL, NULL, 0);
554	#endif
555
556	/* Force unmount */
557	vfsUserCleanup(sbP, privVfsP, true);
558
559	if (sbP->s_root)
560	printDentryTree(sbP->s_root, 10);
561	}
562
563	exit:
564	TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_UMOUNT_EXIT,
565	"gpfs_s_umount_begin exit: sbP 0x%lX privVfsP 0x%lX dmrc %d "
566	"s_active %d s_count 0x%X active files %d\n",
567	sbP, privVfsP, dmrc, atomic_read(&sbP->s_active),
568	sbP->s_count, !list_empty(&sbP->s_files));
569
570	/* Module count is decremented later on in do_unmount via gpfs_s_put_super */
571	EXIT(0);
572	}
573
574	int
575	gpfs_s_remount(struct super_block sbP, int flags, char *data)
576	{
577	ENTER(0);
578	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_REMOUNT,
579	"gpfs_s_remount: called\n");
580	TraceBKL();
581	EXIT(0);
582	return 0;
583	}
584
585	void
586	gpfs_s_write_inode(struct inode *inode)
587	{
588	ENTER(0);
589	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_WRITEINODE,
590	"gpfs_s_write_inode: called\n");
591	TraceBKL();
592	EXIT(0);
593	}
594
595
596	/* This routine is called from iput() just before the storage of the
597	Linux inode is freed */
598	void
599	gpfs_s_clear_inode(struct inode *iP)
600	{
601	int code = 0;
602	struct gpfsVfsData_t *privVfsP;
603	cxiNode_t *cnP;
604
605	ENTER(0);
606	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_CLEARINODE,
607	"gpfs_s_clear_inode enter: inode 0x%lX inode %d generic_ip 0x%lX\n",
608	iP, iP->i_ino, iP->PRVINODE);
609	TRACE3(TRACE_VNODE, 5, TRCID_LINUXOPS_CLEARINODE_DETAILS,
610	"gpfs_s_clear_inode: cnP 0x%lX privVfsP 0x%lX tooBig %d\n",
611	VP_TO_CNP(iP), VP_TO_PVP(iP),
612	NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE));
613
614	DBGASSERT(atomic_read((atomic_t *)&iP->i_count) == 0);
615
616	cnP = VP_TO_CNP(iP);
617	privVfsP = VP_TO_PVP(iP);
618
619	if (cnP)
620	{
621	if (privVfsP)
622	gpfs_ops.gpfsRele(privVfsP, cnP, (void *)iP, vnOp);
623
624	/* if necessary, free the cxiNode_t storage that we allocated in
625	gpfs_s_read_inode2 */
626	if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
627	{
628	iP->PRVINODE = NULL;
629	cxiFreeUnpinned(cnP);
630	}
631	}
632
633	xerror:
634	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_CLEARINODE_EXIT,
635	"gpfs_s_clear_inode exit: inode 0x%lX generic_ip 0x%lX code %d\n",
636	iP, iP->PRVINODE, code);
637	EXIT(0);
638	}
639
640	void
641	gpfs_s_write_super(struct super_block * sbP)
642	{
643	int rc = 0;
644	struct gpfsVfsData_t *privVfsP;
645
646	ENTER(0);
647	LOGASSERT(sbP != NULL);
648	LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
649	privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
650	LOGASSERT(privVfsP != NULL);
651	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_WRITESUPER,
652	"gpfs_s_write_super enter: sbP 0x%lX\n", sbP);
653
654	/* We have to either adhere to the s_dirt semantics or
655	* ignore all syncs. Once a file systems write_super gets
656	* called, sync_supers() restarts the super block scan. If
657	* we don't turn off s_dirt then sync_supers() will be caught
658	* in a loop. Alternatively if we only ignored kupdated then
659	*
660	* 1) a person could write to a file (which turns on s_dirt)
661	* 2) kupdated could run (and be ignored) but the s_dirt is turned off
662	* 3) the user attempts a sync from the command line sync, but that
663	* does nothing since s_dirt was off
664	* 4) the user expected the sync to have done something before he
665	* halts the machine.
666	*/
667	sbP->s_dirt = 0;
668
669	/*
670	* jcw: Another way to handle this would be never turn on the s_dirt flag,
671	* and not to even have a write_super callback. Then neither kupdated nor
672	* sync would do anything. The sync watchdog in the GPFS daemon would
673	* substitute for kupdated. To regain the semantics of sync, we would
674	* create dummy inodes that would have I_DIRTY set, and link one such inode
675	* onto each GPFS superblock. Then sync would notice the dirty inodes
676	* and call back through their write_inode callbacks. This would be
677	* the only use of I_DIRTY by GPFS, so it could be reinterpreted to mean
678	* "sync this file system". For now, s_dirt is still set and reset, but
679	* s_dirt gets reset for all file systems before they have all been synced,
680	* so the race described above can occur. The permanently-dirty inode
681	* needs to be implemented to fix this.
682	*/
683	/* goto xerror; */
684
685	/* BKL is held at entry */
686	TRACE0(TRACE_VNODE, 3, TRCID_LINUXOPS_WRITESUPER_3,
687	"gpfs_s_write_super: performing sync");
688
689	rc = gpfs_ops.gpfsSyncfs(privVfsP);
690	if (rc) {
691	cxiErrorNFS(rc);
692	rc = -rc;
693	}
694	xerror:
695	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_WRITESUPER_5,
696	"gpfs_s_write_super exit: sbP 0x%lX rc %d\n", sbP, rc);
697	EXIT(0);
698	}
699
700
701
702	#if LINUX_KERNEL_VERSION >= 2060000
703	int gpfs_get_sb(struct file_system_type *fsTypeP,
704	int flags, const char devNameP, void dataP, struct vfsmount *mnt)
705	{
706	struct super_block *sbP;
707	int sb_ret = 0;
708
709	ENTER(0);
710	sb_ret = get_sb_nodev(fsTypeP, flags, dataP, gpfs_fill_super, mnt);
711
712	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_GET_SB,
713	"gpfs_get_sb: flags 0x%X dataP 0x%X sbP %d\n",
714	flags, dataP, sb_ret);
715
716	EXIT(0);
717	return sb_ret;
718	}
719
720	int
721	gpfs_fill_super(struct super_block sbP, void dataP, int silent)
722	#else
723	struct super_block *
724	gpfs_fill_super(struct super_block sbP, void dataP, int silent)
725	#endif
726	{
727	int kernel_unlock = 0;
728	struct inode *rootIP = NULL;
729	struct dentry *rootDP = NULL;
730	char *myBufP = NULL;
731	char *sgNameP;
732	char *strP;
733	char *mountpointP;
734	char *optionsP;
735	int rc = 0;
736	int mountHelperID = -1;
737	int code = 0;
738	int namelen;
739	struct gpfsVfsData_t *privVfsP;
740	cxiNode_t *cnRootP;
741	cxiIno_t rootINum;
742	char bname[BDEVNAME_SIZE];
743	Boolean restricted = false;
744
745	ENTER(0);
746	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_1,
747	"gpfs_fill_super enter: sbP 0x%lX dev 0x%X silent %d data '%s'\n",
748	sbP, sbP->s_dev, silent, ((char *)dataP == NULL) ? "" : dataP);
749
750	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_2,
751	"gpfs_fill_super: dev name '%s'\n",
752	(sbP->s_bdev == NULL) ? "" : SBLOCK_BDEVNAME(sbP,bname));
753
754	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_3,
755	"gpfs_fill_super: s_flags 0x%x\n", sbP->s_flags);
756
757	/* A mount increases reference count on module */
758	#if LINUX_KERNEL_VERSION < 2060000
759	MY_MODULE_INCREMENT();
760	#endif
761
762	if (dataP == NULL \|\| (char )dataP == '\0')
763	{
764	rc = EINVAL;
765	code = 1;
766	goto xerror;
767	}
768
769	if (strlen((char *)dataP) > CXI_MAXPATHLEN)
770	{
771	rc = ENAMETOOLONG;
772	code = 2;
773	goto xerror;
774	}
775
776	sbP->s_magic = GPFS_SUPER_MAGIC;
777	sbP->s_op = &gpfs_sops;
778	#if LINUX_KERNEL_VERSION > 2060000
779	sbP->s_export_op = &gpfs_export_ops;
780	#endif
781
782	SBLOCK_PRIVATE(sbP) = NULL;
783
784	sbP->s_root = NULL;
785	sbP->s_blocksize = 0;
786	sbP->s_blocksize_bits = 0;
787	/* maximum filesize (avoid sign bit due to use with loff_t) */
788	sbP->s_maxbytes = 0x7FFFFFFFFFFFFFFFULL;
789
790	myBufP = (char )cxiMallocPinned(strlen((char )dataP) + 1);
791	if (myBufP == NULL)
792	{
793	code = 3;
794	rc = ENOMEM;
795	goto xerror;
796	}
797	strcpy(myBufP, (char *)dataP);
798	optionsP = myBufP;
799
800	/* This is the syntax parser for the options field. At
801	* least one option must be "dev=<devname>".
802	*/
803	sgNameP = NULL;
804	strP = myBufP;
805
806	while(strP)
807	{
808	if (!strncmp(strP, "dev=", 4))
809	{
810	sgNameP = (char *)strchr(strP, '=') + 1;
811	strP = (char )strchr(strP, ','); / more options */
812	if (strP)
813	namelen = strP - sgNameP;
814	else
815	namelen = strlen(sgNameP);
816
817	/* Copy the sgName into the first part of the
818	* buffer, null terminate it, then append the
819	* full option list.
820	*/
821	strncpy(myBufP, sgNameP, namelen);
822	sgNameP = myBufP;
823	sgNameP[namelen] = '\0';
824
825	optionsP = myBufP + namelen + 1;
826	/* Move the options next (if there are any) */
827	strcpy(optionsP, strP?(char *)strP:"");
828	break;
829	}
830	else
831	{
832	strP = (char *)strchr(strP, ',');
833	if (strP) strP++;
834	}
835	}
836
837	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_OPTIONS,
838	"gpfs_fill_super: optionsP \"%s\"\n",
839	strP ? (char *) strP:"");
840
841	while (strP)
842	{
843	/* look for rs option */
844	strP = (char *)strchr(strP, ',');
845	if (strP)
846	strP++;
847	if (strP)
848	{
849	if (!strncmp(strP, "rs", 2))
850	{
851	restricted = true;
852	break;
853	}
854	}
855	}
856
857	if (sgNameP == NULL \|\| *sgNameP == '\0')
858	{
859	code = 4;
860	rc = EINVAL;
861	goto xerror;
862	}
863	mountpointP = sgNameP; /* ??? */
864
865	if (restricted)
866	{
867	/* restricted mount - make it readonly */
868	sbP->s_flags \|= MS_RDONLY;
869	}
870
871	strcpy(mmfs_path, bin_path);
872	strcat(mmfs_path, "/mmfsmount");
873
874	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_STARTHELPER,
875	"gpfs_fill_super: start mount helper '%s'\n", mmfs_path);
876
877	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_STARTHELPER1,
878	"gpfs_fill_super: s_flags 0x%x (rs %d), mountpointP %s\n",
879	sbP->s_flags, restricted, mountpointP);
880
881	if (strlen(sgNameP) > CXI_MAXPATHLEN)
882	{
883	rc = ENAMETOOLONG;
884	code = 5;
885	goto xerror;
886	}
887	rc = gpfs_ops.gpfsReady();
888	if (rc != 0)
889	{
890	rc = EAGAIN;
891	code = 6;
892	goto xerror;
893	}
894
895	/* Start a new process that will receive and forward all messages during the
896	* mount process to the mount invoker. The current process will wait for
897	* this new process (in HandleMBUnmount()) and the daemon to be connected with
898	* a socket and only than call SFSMountFS() that does the real mount work.
899	*/
900	strcpy(&mountCmd[2], sgNameP); // "M /dev/gpfs1"
901	if (cxiHasMountHelper())
902	mountHelperID = fork_mount_helper(mountCmd);
903	else
904	{
905	/* Use special pid (-1) when not using mount helper */
906	mountHelperID = -1;
907	}
908
909	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_HELPERRC,
910	"gpfs_fill_super: mount helper mountHelperID %d\n", mountHelperID);
911
912	#if LINUX_KERNEL_VERSION < 2060000
913	/* BKL is not held during get_sb in 2.6 */
914	if (kernel_locked())
915	{
916	unlock_kernel();
917	kernel_unlock = 1;
918	}
919	#else
920	/*
921	* In 2.5, a bunch of calls originating from sys_sync will try to down
922	* s_umount and block, because it's already downed in get_sb_nodev, and won't
923	* be upped until get_sb returns (in do_kern_mount). During gpfsMount, we'll
924	* call mmcommon getEFOption, and that will at some point try to do a sync
925	* (e.g. in gpfsClusterInit, two times), and mount will deadlock. One way
926	* to fix this is to take out relevant sync's in the shell scripts, but this
927	* is dodgy because we might end up pulling a new sdr from another node, and
928	* that's a long and compelex path, I don't think one can guarantee there
929	* won't be any syscalls that desire s_umount along the way. Need to think
930	* how to fix this right. For now, up the semaphore for the duration of
931	* the gpfsMount (possibly opening up a window for other races e.g. with
932	* unmount).
933	*/
934	up_write(&sbP->s_umount);
935	#endif
936	rc = gpfs_ops.gpfsMount((void *)sbP, PAGE_SIZE, sgNameP, mountpointP,
937	optionsP,
938	(struct gpfsVfsData_t **)&(SBLOCK_PRIVATE(sbP)),
939	&cnRootP, /* returned root cxiNode_t */
940	&rootINum, /* returned root inode number */
941	NULL, /* not a soft mount */
942	mountHelperID /* mount helper id */,
943	-1U, /* no unique mount ID specified */
944	(sbP->s_flags & MS_RDONLY), /* is it readonly */
945	true); /* allocate pinned memory */
946
947	#if LINUX_KERNEL_VERSION < 2060000
948	/* BKL is not held during get_sb in 2.5 */
949	if (kernel_unlock)
950	lock_kernel();
951	#else
952	down_write(&sbP->s_umount);
953	#endif
954
955	if (rc)
956	{
957	code = 7;
958	goto xerror;
959	}
960
961	privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
962	DBGASSERT(cnRootP != NULL);
963	rootIP = (struct inode *)cnRootP->osNodeP;
964
965	DBGASSERT(rootIP != NULL);
966	DBGASSERT(rootIP->PRVINODE == cnRootP);
967	DBGASSERT(cnRootP->osNodeP == rootIP);
968
969	/* Successful mount in daemon. Allocate root directory cache entry */
970	rootDP = d_alloc_root(rootIP);
971	if (!rootDP)
972	{
973	rc = gpfs_ops.gpfsUnmount(privVfsP, true);
974	if (rc == 0 \|\| rc == ENOSYS)
975	gpfs_ops.gpfsFinishUnmount(privVfsP);
976
977	code = 8;
978	goto xerror;
979	}
980
981	rootDP->d_op = &gpfs_dops_valid;
982	sbP->s_root = rootDP;
983
984	sbP->s_dirt = 1; /* keep it on for sync to work */
985
986	if (myBufP != NULL)
987	cxiFreePinned(myBufP);
988
989	#if (LINUX_KERNEL_VERSION < 2060000)
990	unlock_super(sbP);
991	#endif
992	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_SUCCEED,
993	"gpfs_fill_super exit: success sbP 0x%lX\n", sbP);
994	EXIT(0);
995	#if LINUX_KERNEL_VERSION >= 2060000
996	return 0;
997	#else
998	return sbP;
999	#endif
1000
1001	xerror:
1002	if (rootDP)
1003	dput(rootDP);
1004	if (rootIP)
1005	iput(rootIP);
1006
1007	if (myBufP != NULL)
1008	cxiFreePinned(myBufP);
1009
1010	#if LINUX_KERNEL_VERSION < 2060000
1011	unlock_super(sbP);
1012
1013	sbP->s_dev = 0;
1014	#endif
1015
1016	/* An unmount decrements module use count */
1017	#if LINUX_KERNEL_VERSION < 2060000
1018	MY_MODULE_DECREMENT();
1019	#endif
1020
1021	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_FAILED,
1022	"gpfs_fill_super: failed code %d rc %d\n", code, rc);
1023	EXIT(0);
1024	#if LINUX_KERNEL_VERSION >= 2060000
1025	return -rc;
1026	#else
1027	return NULL;
1028	#endif
1029	}
1030
1031	int
1032	gpfs_reg_fs()
1033	{
1034	int rc;
1035
1036	ENTER(0);
1037	spin_lock_init(&inode_lock);
1038
1039	rc = register_filesystem(&gpfs_fs_type);
1040	if (rc)
1041	goto xerror;
1042
1043	/* We create a dummy super block for purposes of instantiating
1044	* a shutdown file descriptor. When the daemon dies this file
1045	* will be closed and its special ops will be called.
1046	* See cxiRegisterCleanup()
1047	*/
1048	shutdownSuperP = cxiMallocPinned(sizeof(struct super_block));
1049	if (!shutdownSuperP)
1050	{
1051	unregister_filesystem(&gpfs_fs_type);
1052	rc = -ENOMEM;
1053	goto xerror;
1054	}
1055
1056	SET_SUPER_BLOCK(shutdownSuperP, &null_sops);
1057
1058	xerror:
1059	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REGFS,
1060	"gpfs_reg_fs shutdownSuperP 0x%lX rc %d\n",
1061	shutdownSuperP, rc);
1062	EXIT(0);
1063	return rc;
1064	}
1065
1066	void
1067	gpfs_unreg_fs()
1068	{
1069	int rc;
1070
1071	ENTER(0);
1072	rc = unregister_filesystem(&gpfs_fs_type);
1073
1074	if (shutdownSuperP)
1075	{
1076	UNSET_SUPER_BLOCK(shutdownSuperP);
1077	cxiFreePinned(shutdownSuperP);
1078	shutdownSuperP = NULL;
1079	}
1080
1081	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_UNREGFS,
1082	"gpfs_unreg_fs rc %d\n", rc);
1083	EXIT(0);
1084	}
1085
1086	void
1087	kill_mmfsd(void)
1088	{
1089	ENTER(0);
1090	if (mmfsd_id != -1)
1091	{
1092	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_KILLMMFSD,
1093	"kill_mmfsd: pid %X\n", mmfsd_id);
1094
1095	kill_proc(mmfsd_id, SIGTERM, 1);
1096	if (mmfsd_id != -1)
1097	#if LINUX_KERNEL_VERSION > 2060000
1098	wait_event(pwq,0);
1099	#else
1100	sleep_on(&pwq);
1101	#endif
1102	}
1103	EXIT(0);
1104	}
1105
1106	/*
1107	* Note: since this function is executed as kernel_thread "main" routine,
1108	* it may not be safe to use stack at all, e.g. call non-inlined functions,
1109	* at least in the success path. See comments e.g. in asm-i386/unistd.h
1110	*/
1111	int
1112	exec_mmfs(void *nothing)
1113	{
1114	static char *argv[] = { mmfs_path, mount_opt, NULL };
1115	static char *envp[] = { "HOME=/", NULL };
1116	int rc;
1117
1118	ENTER(0);
1119	set_fs(KERNEL_DS);
1120
1121	rc = EXEC_HELPER(mmfs_path, argv, envp, 1 /* wait if possible */);
1122
1123	xerror:
1124	if(rc)
1125	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_EXECMMFS_EXIT,
1126	"exec_mmfs: exit rc -1 errno %d path %s\n", errno, mmfs_path);
1127	EXIT(0);
1128	return rc;
1129	}
1130
1131	int
1132	fork_mount_helper(char *data)
1133	{
1134
1135	ENTER(0);
1136	strcpy(mount_opt, data);
1137	mount_id = kernel_thread(exec_mmfs, 0, 0);
1138
1139	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_FORK_MOUNTHELPER,
1140	"fork_mount_helper: new pid %d\n", mount_id);
1141
1142	EXIT(0);
1143	return mount_id;
1144	}
1145
1146	/* Set device id and other information for a file system being mounted */
1147	int
1148	cxiSetMountInfo(void *osVfsP, cxiDev_t sgDevID,
1149	int bsize, void osRootNodeP, cxiNode_t cnRootP,
1150	Boolean releRootP, void gnRootP,
1151	fsid_t fsid)/* (out) maintain hold on root */
1152	{
1153	struct super_block sbP = (struct super_block )osVfsP;
1154	struct inode rootIP = (struct inode )osRootNodeP; // root dir inode
1155	int i;
1156
1157	ENTER(0);
1158	TRACE4(TRACE_VNODE, 1, TRCID_SET_MOUNT_INFO,
1159	"cxiSetMountInfo: sbP 0x%lX rootIP 0x%lX cnRootP 0x%lX "
1160	"gnRootP 0x%lX\n", sbP, rootIP, cnRootP, gnRootP);
1161	DBGASSERT(sbP != NULL);
1162
1163	/* This is the auto remount case where mmfsd died/killed and restarted. */
1164	if (gnRootP == cnRootP)
1165	{
1166	/* Since the OS independent layer looked up and held the
1167	* root vnode, we've got too many hold counts for a reconnect.
1168	* Tell upper layer that we must release.
1169	*/
1170	*releRootP = true;
1171	}
1172	else
1173	{
1174	/* Don't attempt to release the root VFS node */
1175	*releRootP = false;
1176	sbP->s_blocksize = bsize;
1177	for (i = sbP->s_blocksize, sbP->s_blocksize_bits = 0; i != 1; i >>= 1)
1178	sbP->s_blocksize_bits++;
1179	}
1180	if (rootIP != NULL)
1181	{
1182	DBGASSERT(rootIP->i_ino == INODENUM_ROOTDIR_FILE);
1183	DBGASSERT(rootIP->PRVINODE == cnRootP);
1184	}
1185
1186	EXIT(0);
1187	return 0;
1188	}
1189
1190	/* Attempt whatever we can to get holders of VFS elements
1191	* (dcache entries, etc) to leave.
1192	*/
1193	int
1194	vfsUserCleanup(struct super_block *sbP,
1195	struct gpfsVfsData_t *privVfsP, Boolean force)
1196	{
1197	struct siginfo sinfo;
1198	struct task_struct g, tsP;
1199	Boolean killit;
1200	int rc;
1201
1202	ENTER(0);
1203
1204	#ifndef GPFS_ARCH_POWER
1205	/* Forced unmount doesn't really work very well on Linux since
1206	* the VFS layer is very stateful. If a process is sitting in
1207	* the file system, its vmount count will not go to zero and a
1208	* proper unmount can occur. We're experimenting with the
1209	* semantics (akin to umount -k on other OSes) where processes
1210	* are killed if they are within a forced unmounted file system.
1211	*
1212	* Note that this doesn't get everyone. If you have a file open
1213	* in GPFS but don't have your current working directory in GPFS
1214	* then you're not killed. To kill those user (or close their
1215	* files) you'd have to traipse thru the file table. There's
1216	* a lot of OS specific code there that we wouldn't want to get
1217	* into.
1218	*/
1219	if (force)
1220	{
1221	sinfo.si_signo = SIGKILL;
1222	sinfo.si_errno = 0;
1223	sinfo.si_code = SI_KERNEL;
1224	sinfo.si_addr = vfsUserCleanup;
1225	sinfo.si_pid = current->pid;
1226	sinfo.si_uid = current->uid;
1227
1228	// read_lock(&tasklist_lock);
1229	rcu_read_lock();
1230
1231	DO_EACH_THREAD(g,tsP)
1232	{
1233	task_lock(tsP);
1234	if (tsP->fs && tsP->fs->pwdmnt && tsP->fs->pwdmnt->mnt_sb == sbP)
1235	killit = true;
1236	else
1237	killit = false;
1238	task_unlock(tsP);
1239
1240	if (killit)
1241	send_sig_info(SIGKILL, &sinfo, tsP);
1242	} WHILE_EACH_THREAD(g,tsP);
1243	// read_unlock(&tasklist_lock);
1244	rcu_read_unlock();
1245	}
1246	#endif
1247
1248	/* Purge cached OS VFS nodes/cxiNodes. */
1249	rc = gpfs_ops.gpfsUncache(privVfsP);
1250
1251	EXIT(0);
1252	return rc;
1253	}
1254
1255	/* Called by gpfs_s_put_super() when the last holder of the superblock
1256	* is gone. We should be able to successfully clean up and become
1257	* unmounted.
1258	*/
1259	int
1260	cxiUnmount(void *osVfsP, Boolean force, Boolean doDMEvents)
1261	{
1262	int rc = 0;
1263	int dmrc = 0;
1264	struct super_block sbP = (struct super_block )osVfsP;
1265	struct gpfsVfsData_t *privVfsP;
1266	#ifdef DMAPI
1267	Boolean dmDoUnmountEvent = false;
1268	void *sgUidP = NULL;
1269	void *eventlistP = NULL;
1270	void *sessLocP = NULL;
1271	struct dentry *dP = NULL;
1272	struct inode *iP = NULL;
1273	cxiNode_t *cnP = NULL;
1274	#endif
1275
1276	ENTER(0);
1277	LOGASSERT(sbP != NULL);
1278	privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
1279
1280	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CXIUNMOUNT_ENTER,
1281	"cxiUnmount: enter privVfsP 0x%lX sbP 0x%lX force %d doDM %d\n",
1282	privVfsP, sbP, force, doDMEvents);
1283
1284	if (privVfsP == NULL)
1285	goto exit;
1286
1287	#ifdef DMAPI
1288	dP = sbP->s_root;
1289	if (dP != NULL)
1290	iP = dP->d_inode;
1291	if (iP != NULL)
1292	cnP = VP_TO_CNP(iP);
1293
1294	/* Generate preunmount event */
1295	if (doDMEvents)
1296	{
1297	rc = gpfs_ops.gpfsDmUnmountEvent(true, force, privVfsP, cnP,
1298	&dmDoUnmountEvent, &sgUidP,
1299	&eventlistP, &sessLocP, 0);
1300	/* We should continue unmount even if it fails. Otherwise, linux
1301	screwup and cannot remount unless we shutdown the daemon */
1302	}
1303	#endif
1304
1305	/* The superblock is unallocated by the kernel after gpfs_s_put_super /
1306	cxiUnmount, regardless of any errors here because it doesn't check
1307	a return code from the filesystem specific put_super call, so we need to
1308	proceed through these calls even if an error occurs; not cleaning up
1309	things in gpfsFinishUnmount (ie, the gpfs mount list) after an error
1310	with unmount causes havoc when the daemon later restarts. */
1311
1312	rc = vfsUserCleanup(sbP, privVfsP, force);
1313	if (rc == ENOSYS)
1314	rc = 0;
1315
1316	rc = gpfs_ops.gpfsUnmount(privVfsP, force);
1317	if (rc == ENOSYS)
1318	rc = 0;
1319
1320	gpfs_ops.gpfsFinishUnmount(privVfsP);
1321	SBLOCK_PRIVATE(sbP) = NULL;
1322
1323	#ifdef DMAPI
1324	if (dmDoUnmountEvent)
1325	dmrc = gpfs_ops.gpfsDmUnmountEvent(false, force, NULL, NULL,
1326	&dmDoUnmountEvent, &sgUidP,
1327	&eventlistP, &sessLocP, rc);
1328	#endif
1329	sbP->s_dirt = 0;
1330
1331	printSuperList(sbP);
1332
1333	/* An unmount decrements module use count */
1334	#if LINUX_KERNEL_VERSION < 2060000
1335	MY_MODULE_DECREMENT();
1336	#endif
1337
1338	exit:
1339	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_CXIUNMOUNT_EXIT,
1340	"cxiUnmount: exit rc %d dmrc %d\n", rc, dmrc);
1341	EXIT(0);
1342	return rc;
1343	}
1344
1345	int
1346	cxiReactivateOSNode(void osVfsP, cxiNode_t cnP, void **osNodePP)
1347	{
1348	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_REACTIVATE_VNODE,
1349	"cxiReactivateOSNode: sbP 0x%lX cxiNodeP 0x%lX osNodePP 0x%lX\n",
1350	osVfsP, cnP, osNodePP);
1351	LOGASSERT(0); // not implemented on linux
1352	return 0;
1353	}
1354
1355
1356	#if LINUX_KERNEL_VERSION >= 2060000
1357	static int
1358	inodeFindActor(struct inode iP, void opaqueP)
1359	{
1360	/* iget4 can be called on one thread which goes to create a new
1361	* inode (get_new_inode, gpfs_s_read_inode2, gpfsInodeRead, readOSNode)
1362	* but before that thread completes initializing the cxiNode_t, another
1363	* thread calls iget4 and gets here (find_inode, inodeFindActor).
1364	* Similar races exist when an inode is being deleted.
1365	*
1366	* Ideally, we'd like to spin_unlock() on the inode_lock and call
1367	* wait_on_inode() but we cannot release the inode_lock here (find_inode
1368	* is depending on it to protect its list_entry() calls). Fortunately,
1369	* iget4 does exactly this wait for the inode upon return from
1370	* find_inode. Returning zero here would cause get_new_inode to be
1371	* called (which would assert when it found the first thread had
1372	* already allocated the gnode). Return 1 and iget4 will do the
1373	* necessary wait.
1374	*
1375	* We can't call anything here that could sleep because we are holding
1376	* the inode_lock and sleeping can result in a hang
1377	* TRACE4N does not block and is ok here.
1378	*/
1379
1380	TRACE4N(TRACE_VNODE, 2, TRCID_LINUXOPS_INODEFINDACTOR,
1381	"inodeFindActor: iP 0x%lX i_state 0x%x cxiNodeP 0x%lX isBad %d\n",
1382	iP, iP->i_state, VP_TO_CNP(iP), is_bad_inode(iP));
1383
1384	if (iP->i_state & INODE_IN_CACHE)
1385	return 1;
1386
1387	if (VP_TO_CNP(iP) == NULL)
1388	{
1389	if (iP->i_state == 0)
1390	return 0;
1391	else
1392	return 1;
1393	}
1394
1395	return gpfs_ops.gpfsInodeFindActor(VP_TO_CNP(iP), iP->i_ino, opaqueP);
1396	}
1397
1398	static int
1399	inodeInitLocked(struct inode iP, void opaqueP)
1400	{
1401	cxiIGetArg_t argsP = (cxiIGetArg_t )opaqueP;
1402
1403	iP->i_ino = argsP->extInodeNum;
1404	return 0;
1405	}
1406	#else
1407	static int
1408	inodeFindActor(struct inode iP, unsigned long inodeNum, void opaqueP)
1409	{
1410	/* iget4 can be called on one thread which goes to create a new
1411	* inode (get_new_inode, gpfs_s_read_inode2, gpfsInodeRead, readOSNode)
1412	* but before that thread completes initializing the cxiNode_t, another
1413	* thread calls iget4 and gets here (find_inode, inodeFindActor).
1414	* Similar races exist when an inode is being deleted.
1415	*
1416	* Ideally, we'd like to spin_unlock() on the inode_lock and call
1417	* wait_on_inode() but we cannot release the inode_lock here (find_inode
1418	* is depending on it to protect its list_entry() calls). Fortunately,
1419	* iget4 does exactly this wait for the inode upon return from
1420	* find_inode. Returning zero here would cause get_new_inode to be
1421	* called (which would assert when it found the first thread had
1422	* already allocated the gnode). Return 1 and iget4 will do the
1423	* necessary wait.
1424	*
1425	* We can't call anything here that could sleep because we are holding
1426	* the inode_lock and sleeping can result in a hang
1427	* TRACE3N does not block and is ok here.
1428	*/
1429
1430	TRACE3N(TRACE_VNODE, 2, TRCID_LINUXOPS_INODEFINDACTOR2,
1431	"inodeFindActor: iP 0x%lX i_state 0x%x cxiNodeP 0x%lX\n",
1432	iP, iP->i_state, VP_TO_CNP(iP));
1433
1434	if (iP->i_state & INODE_IN_CACHE)
1435	return 1;
1436
1437	if (VP_TO_CNP(iP) == NULL)
1438	{
1439	if (iP->i_state == 0)
1440	return 0;
1441	else
1442	return 1;
1443	}
1444
1445	return gpfs_ops.gpfsInodeFindActor(VP_TO_CNP(iP), inodeNum, opaqueP);
1446	}
1447	#endif
1448
1449
1450	int
1451	cxiNewOSNode(void osVfsP, cxiNode_t cnPP, void *osNodePP,
1452	cxiIno_t inum, int nodeSize, void *opaqueP)
1453	{
1454	struct super_block sbP = (struct super_block )osVfsP;
1455	struct inode *iP;
1456	int rc;
1457	int loop_count = 0;
1458	int sleep_count = 0;
1459
1460	ENTER(0);
1461	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE,
1462	"cxiNewOSNode: sbP 0x%lX inum %d size %d",
1463	sbP, inum, nodeSize);
1464
1465	/* The requested nodeSize must match CXINODE_SIZE */
1466	if (nodeSize != CXINODE_SIZE)
1467	goto bad_node_size;
1468
1469	repeat:
1470	#if LINUX_KERNEL_VERSION >= 2060000
1471	iP = iget5_locked(sbP, inum, inodeFindActor, inodeInitLocked, opaqueP);
1472	#else
1473	iP = iget4(sbP, inum, inodeFindActor, opaqueP);
1474	#endif
1475	if (iP == NULL)
1476	{
1477	*cnPP = NULL;
1478	*osNodePP = NULL;
1479	rc = ENOMEM;
1480	goto xerror;
1481	}
1482
1483	#if !HAS_SOP_READ_INODE2
1484	/* We fill in the inode as opposed to a read_inode
1485	* operation executed with iget()
1486	*/
1487	if (iP->i_state & I_NEW)
1488	{
1489	gpfs_s_read_inode2(iP, opaqueP);
1490	unlock_new_inode(iP);
1491	}
1492	#endif
1493
1494	if (is_bad_inode(iP))
1495	{
1496	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_NEW_BAD,
1497	"cxiNewOSNode: BAD INODE 0x%X\n", iP);
1498	*cnPP = NULL;
1499	*osNodePP = NULL;
1500	iput(iP);
1501	rc = EIO;
1502	goto xerror;
1503	}
1504
1505	/* Did we get the right inode ?
1506	* When inodeFindActor is called from find_inode() and the inode
1507	* is in transition it might return found without checking sanpId
1508	* so go check again.
1509	*/
1510	#if LINUX_KERNEL_VERSION >= 2060000
1511	if (!inodeFindActor(iP, opaqueP))
1512	#else
1513	if (!inodeFindActor(iP, iP->i_ino, opaqueP))
1514	#endif
1515	{
1516	if (sleep_count > 10)
1517	{
1518	TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_NEW_VNODE_2,
1519	"cxiNewOSNode: rc ESTALE inode 0x%lX ino %d i_state 0x%x "
1520	"cxiNodeP 0x%lX isBad %d\n", iP, iP->i_ino, iP->i_state,
1521	VP_TO_CNP(iP), is_bad_inode(iP));
1522
1523	*cnPP = NULL;
1524	*osNodePP = NULL;
1525	iput(iP);
1526	rc = EIO;
1527	goto xerror;
1528	}
1529
1530	if (loop_count > 1000)
1531	{
1532	cxiSleep(10);
1533	sleep_count++;
1534	loop_count = 0;
1535	}
1536
1537	loop_count++;
1538	iput(iP);
1539	goto repeat;
1540	}
1541
1542	DBGASSERT(iP->PRVINODE != NULL);
1543	cnPP = (cxiNode_t )iP->PRVINODE;
1544	*osNodePP = iP;
1545	rc = 0;
1546
1547	xerror:
1548	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE_EXIT,
1549	"cxiNewOSNode: exit osNodeP 0x%lX cnP 0x%lX rc %d\n",
1550	osNodePP, cnPP, rc);
1551	EXIT(0);
1552	return rc;
1553
1554	bad_node_size:
1555	/* The requested nodeSize does not match CXINODE_SIZE.
1556	Whoever called us is an incompitble version of the code or was
1557	somehow not compiled correctly. */
1558	TRACE2(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE_BAD,
1559	"cxiNewOSNode: requested nodeSize %d does not match CXINODE_SIZE %d",
1560	nodeSize, CXINODE_SIZE);
1561	printk("mmfs: module inconsistency detected in cxiNewOSNode:\n"
1562	" requested nodeSize %d does not match CXINODE_SIZE %d\n",
1563	nodeSize, CXINODE_SIZE);
1564	LOGASSERT(!"nodeSize != CXINODE_SIZE");
1565	EXIT(0);
1566	return ELIBBAD;
1567	}
1568
1569
1570	/* The linux kernel decrements the inode count and deallocates the
1571	* inode after gpfs_s_put_inode() is called therefore this routine
1572	* doesn't perform a delete.
1573	*/
1574	void
1575	cxiFreeOSNode(void osVfsP, struct cxiNode_t cnP, void *osNodeP)
1576	{
1577	struct super_block sbP = (struct super_block )osVfsP;
1578	struct inode iP = (struct inode )osNodeP;
1579
1580	ENTER(0);
1581	TRACE5(TRACE_VNODE, 2, TRCID_LINUXOPS_DELETE_VNODE,
1582	"cxiFreeOSNode enter: sbP 0x%lX cxiNodeP 0x%lX "
1583	"iP 0x%lX inode %d i_count %d\n",
1584	sbP, cnP, iP,
1585	iP ? iP->i_ino : -1,
1586	iP ? atomic_read((atomic_t *)&iP->i_count) : 0);
1587
1588	DBGASSERT(cnP->osNodeP == iP);
1589	cnP->osNodeP = NULL;
1590
1591	if (iP)
1592	{
1593	DBGASSERT(atomic_read((atomic_t *)&iP->i_count) == 0);
1594	iP->i_op = NULL;
1595	iP->i_fop = NULL;
1596	if (iP->i_mapping)
1597	iP->i_mapping->a_ops = &gpfs_aops_after_inode_delete;
1598	iP->i_size = 0;
1599	iP->i_nlink = 0;
1600	}
1601	EXIT(0);
1602	}
1603
1604	void
1605	cxiDeleteMmap(cxiVmid_t segid)
1606	{
1607	TRACE1(TRACE_VNODE, 2, TRCID_LINUXOPS_DELETE_MMAP,
1608	"cxiDeleteMmap: segid 0x%X\n", segid);
1609	}
1610
1611	void
1612	cxiReinitOSNode(void osVfsP, struct cxiNode_t cnP, void *osNodeP)
1613	{
1614	struct super_block sbP = (struct super_block )osVfsP;
1615	struct inode iP = (struct inode )osNodeP;
1616
1617	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_REINIT_VNODE,
1618	"cxiReinitOSNode: sbP 0x%lX cnP 0x%lX iP 0x%lX\n",
1619	sbP, cnP, iP);
1620	LOGASSERT(0); // not implemented on linux
1621	}
1622
1623	void
1624	cxiDumpOSNode(cxiNode_t *cnP)
1625	{
1626	struct inode iP = (struct inode )cnP->osNodeP;
1627	struct list_head dListP, dHeadP;
1628	struct dentry *dentry;
1629
1630	ENTER(0);
1631	TRACE2(TRACE_VNODE, 2, TRCID_LINUXOPS_DUMP_VNODE,
1632	"cxiDumpOSNode: cxiNodeP 0x%lX iP 0x%lX\n", cnP, iP);
1633	if (iP)
1634	{
1635	printInode(iP);
1636
1637	dHeadP = &iP->i_dentry;
1638	spin_lock(&dcache_lock);
1639	for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next)
1640	{
1641	dentry = list_entry(dListP, struct dentry, d_alias);
1642	printDentry(dentry);
1643	}
1644	spin_unlock(&dcache_lock);
1645	}
1646	EXIT(0);
1647	}
1648
1649	#if LINUX_KERNEL_VERSION >= 2060000
1650	static int
1651	igrabInodeFindActor(struct inode iP, void opaqueP)
1652	{
1653	/* igrab can be called while another thread is doing a finial iput
1654	* so instead we are call ilookup5. ilookup5 processes stuff under
1655	* the inode_lock so if we are in here and find the inode then
1656	* ilookup5 will increase i_count
1657	*
1658	* We can't call anything here that could sleep because we are holding
1659	* the inode_lock and sleeping can result in a hang
1660	* TRACE4N does not block and is ok here.
1661	*/
1662
1663	TRACE3N(TRACE_VNODE, 2, TRCID_LINUXOPS_IGRABINODEFINDACTOR,
1664	"igrabInodeFindActor: iP 0x%lX i_state 0x%x inode 0x%lX \n",
1665	iP, iP->i_state, (struct inode *) opaqueP);
1666
1667	if (iP->i_state & INODE_BEING_RELEASED)
1668	return 0;
1669
1670	if (iP != (struct inode *) opaqueP)
1671	return 0;
1672
1673	return 1;
1674	}
1675	#endif
1676
1677	/* On linux we can't just decrement the i_count
1678	* thus this routine will only accept a positive
1679	* increment. If you want to put a reference then
1680	* call cxiPutOSNode() which calls back thru the VFS
1681	* layer.
1682	*/
1683	int
1684	cxiRefOSNode(void osVfsP, cxiNode_t cnP, void *osNodeP, int inc)
1685	{
1686	return cxiRefOsNode(osVfsP,cnP,osNodeP,inc,false);
1687	}
1688
1689	int
1690	cxiRefOsNode(void osVfsP, cxiNode_t cnP, void *osNodeP, int inc,
1691	Boolean calledFromRevoke)
1692	{
1693	struct inode iP = (struct inode )osNodeP;
1694	struct inode *riP = NULL;
1695	int holdCount;
1696	int ino;
1697
1698	ENTER(0);
1699	DBGASSERT(iP != NULL);
1700	DBGASSERT(inc == 1);
1701
1702	#if LINUX_KERNEL_VERSION >= 2060000
1703	/* The igrab() may fail if this inode is actively going
1704	* thru a release.
1705	*/
1706	if(osVfsP)
1707	{
1708	/* we already have a hold */
1709	riP = igrab(iP);
1710	}
1711	/* we may not currently have a hold so use ilookup5 */
1712	else if(GPFS_TYPE(iP))
1713	{
1714	riP = ilookup5(iP->i_sb, iP->i_ino, igrabInodeFindActor, (void*)iP);
1715	}
1716	#else
1717	/* The igrab() may fail if this inode is actively going
1718	* thru a release.
1719	*/
1720	riP = igrab(iP);
1721	#endif
1722	if (riP)
1723	{
1724	DBGASSERT(!(iP->i_state & INODE_BEING_RELEASED));
1725	holdCount = atomic_read((atomic_t *)&riP->i_count);
1726	ino = riP->i_ino;
1727	}
1728	else
1729	{
1730	holdCount = 0;
1731	ino = -1;
1732	/* If this function is called from revoke handler check of this inode
1733	is being released
1734	*/
1735	if (calledFromRevoke && (iP->i_state & INODE_BEING_RELEASED) )
1736	holdCount = -1;
1737	}
1738	TRACE5(TRACE_VNODE, 2, TRCID_LINUXOPS_REF_VNODE,
1739	"cxiRefOSNode exit: sbP 0x%lX cxiNodeP 0x%lX iP 0x%lX inode %d "
1740	"i_count to %d", osVfsP, cnP, iP, ino, holdCount);
1741	EXIT(0);
1742	return holdCount;
1743	}
1744
1745	/* Determines if OS node is inactive */
1746	int
1747	cxiInactiveOSNode(void osVfsP, struct cxiNode_t cnP, void *osNodeP,
1748	Boolean canCacheP, Boolean hasReferencesP)
1749	{
1750	struct inode iP = (struct inode )osNodeP;
1751	struct super_block sbP = (struct super_block )osVfsP;
1752	int holdCount;
1753
1754	ENTER(0);
1755	DBGASSERT(cnP->osNodeP == iP);
1756
1757	*canCacheP = false;
1758	*hasReferencesP = false;
1759
1760	holdCount = atomic_read((atomic_t *)&iP->i_count);
1761	if (holdCount > 0)
1762	*hasReferencesP = true;
1763
1764	TRACE6(TRACE_VNODE, 2, TRCID_LINUXOPS_INACTIVE_VNODE,
1765	"cxiInactiveOSNode: sbP 0x%lX cxiNodeP 0x%lX iP 0x%lX "
1766	"i_count %d canCache %d hasReferences %d\n", sbP, cnP, iP,
1767	holdCount, canCacheP, hasReferencesP);
1768
1769	EXIT(0);
1770	return holdCount;
1771	}
1772
1773	void
1774	cxiPutOSNode(void *vP)
1775	{
1776	struct inode iP = (struct inode )vP;
1777	int holdCount;
1778
1779	ENTER(0);
1780	DBGASSERT(iP != NULL);
1781	DBGASSERT(!(iP->i_state & INODE_BEING_RELEASED));
1782	holdCount = atomic_read((atomic_t *)&iP->i_count);
1783	DBGASSERT(holdCount > 0);
1784
1785	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_PUT_OSNODE,
1786	"cxiPutOSNode enter: iP 0x%lX inode %d i_count to %d\n",
1787	iP, iP->i_ino, holdCount-1);
1788
1789	iput(iP);
1790
1791	EXIT(0);
1792	return;
1793	}
1794
1795	void
1796	cxiDestroyOSNode(void *vP)
1797	{
1798	struct inode iP = (struct inode )vP;
1799	int holdCount;
1800
1801	ENTER(0);
1802	DBGASSERT(iP != NULL);
1803	holdCount = atomic_read((atomic_t *)&iP->i_count);
1804	DBGASSERT(holdCount > 0);
1805
1806	TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_DESTROY_OSNODE,
1807	"cxiDestroyOSNode enter: iP 0x%lX inode %d i_count %d i_nlink %d\n",
1808	iP, iP->i_ino, holdCount, iP->i_nlink);
1809
1810	iP->i_nlink = 0;
1811	EXIT(0);
1812	return;
1813	}
1814
1815	void
1816	cxiSetOSNodeType(struct cxiNode_t *cnP, cxiMode_t mode, cxiDev_t dev)
1817	{
1818	ENTER(0);
1819	if (S_ISDIR(mode))
1820	cnP->nType = cxiVDIR;
1821	else if (S_ISREG(mode))
1822	cnP->nType = cxiVREG;
1823	else if (S_ISLNK(mode))
1824	cnP->nType = cxiVLNK;
1825	else if (S_ISCHR(mode))
1826	cnP->nType = cxiVCHR;
1827	else if (S_ISBLK(mode))
1828	cnP->nType = cxiVBLK;
1829	else if (S_ISFIFO(mode))
1830	cnP->nType = cxiVFIFO;
1831	else if (S_ISSOCK(mode))
1832	cnP->nType = cxiVSOCK;
1833	else
1834	DBGASSERT(0);
1835	EXIT(0);
1836	}
1837
1838	void
1839	cxiUpdateInode(cxiNode_t cnP, cxiVattr_t attrP, int what)
1840	{
1841	struct inode iP = (struct inode )cnP->osNodeP;
1842
1843	ENTER(0);
1844	if (iP != NULL)
1845	{
1846	if (what & CXIUP_ATIME)
1847	{
1848	CXITIME_TO_INODETIME(attrP->va_atime, iP->i_atime);
1849	EXIT(0);
1850	return;
1851	}
1852	if (what & CXIUP_MODE)
1853	{
1854	iP->i_mode = attrP->va_mode;
1855	CXITIME_TO_INODETIME(attrP->va_ctime, iP->i_ctime);
1856	}
1857	if (what & CXIUP_OWN)
1858	{
1859	iP->i_mode = attrP->va_mode;
1860	iP->i_uid = attrP->va_uid;
1861	iP->i_gid = attrP->va_gid;
1862	CXITIME_TO_INODETIME(attrP->va_ctime, iP->i_ctime);
1863	}
1864	if (what & CXIUP_NLINK)
1865	{
1866	iP->i_nlink = attrP->va_nlink;
1867	}
1868	if (what & CXIUP_SIZE)
1869	{
1870	iP->i_size = attrP->va_size;
1871	iP->i_blocks = attrP->va_blocks;
1872	}
1873	if (what & CXIUP_SIZE_BIG)
1874	{
1875	spin_lock(&inode_lock);
1876	if (attrP->va_size > iP->i_size)
1877	{
1878	iP->i_size = attrP->va_size;
1879	iP->i_blocks = attrP->va_blocks;
1880	}
1881	spin_unlock(&inode_lock);
1882	}
1883	if (what & CXIUP_TIMES)
1884	{
1885	CXITIME_TO_INODETIME(attrP->va_atime, iP->i_atime);
1886	CXITIME_TO_INODETIME(attrP->va_mtime, iP->i_mtime);
1887	CXITIME_TO_INODETIME(attrP->va_ctime, iP->i_ctime);
1888	}
1889	if (what & CXIUP_PERM)
1890	{
1891	iP->i_mode = attrP->va_mode;
1892	iP->i_uid = attrP->va_uid;
1893	iP->i_gid = attrP->va_gid;
1894	cnP->xinfo = attrP->va_xinfo;
1895	setIopTable(iP, (attrP->va_xinfo & VA_XPERM) != 0);
1896	cnP->icValid \|= CXI_IC_PERM;
1897	}
1898	if ((what & CXIUP_NLINK) && TestCtFlag(cnP,destroyIfDelInode))
1899	{
1900	cxiDropInvalidDCacheEntry(cnP);
1901
1902	/* swapd must be notified to prune dcache entries */
1903	if (TestCtFlag(cnP, pruneDCacheNeeded))
1904	gpfs_ops.gpfsSwapdEnqueue(cnP);
1905	}
1906	}
1907
1908	TRACE4(TRACE_VNODE, 3, TRCID_CXIUPDATE_INODE_3,
1909	"cxiUpdateInode: iP 0x%X atime 0x%X mtime 0x%X ctime 0x%X\n",
1910	iP, GET_INODETIME_SEC(iP->i_atime), GET_INODETIME_SEC(iP->i_mtime),
1911	GET_INODETIME_SEC(iP->i_ctime));
1912
1913	TRACE7(TRACE_VNODE, 3, TRCID_CXIUPDATE_INODE_1,
1914	"cxiUpdateInode: what %d mode 0x%X uid %d gid %d nlink %d size %lld"
1915	" blocks %d\n",
1916	what, iP->i_mode, iP->i_uid, iP->i_gid, iP->i_nlink,
1917	iP->i_size, iP->i_blocks);
1918	EXIT(0);
1919	}
1920
1921	/* Determine if operating system specific node belongs to a particular VFS and
1922	can be uncached. Returns OS node if it exists, the determination of
1923	whether it can be uncached or not. */
1924	Boolean
1925	cxiCanUncacheOSNode(void osVfsP, struct cxiNode_t cnP, void **vP)
1926	{
1927	struct inode iP = (struct inode )cnP->osNodeP;
1928	int count = 0;
1929
1930	ENTER(0);
1931	if (iP != NULL && iP->i_sb == osVfsP)
1932	{
1933	count = atomic_read((atomic_t *)&iP->i_count);
1934	vP = (void )iP;
1935	}
1936	else
1937	*vP = NULL;
1938
1939	TRACE6(TRACE_VNODE, 2, TRCID_LINUXOPS_CANUNCACHE_OSNODE,
1940	"cxiCanUncacheOSNode: cxiNode 0x%lx vP 0x%lX osVfsP 0x%lX "
1941	"i_sb 0x%lX inode %d i_count %d\n", cnP, vP, osVfsP,
1942	(iP ? iP->i_sb : 0), (iP ? iP->i_ino : 0), count);
1943	EXIT(0);
1944	return (count == 0);
1945	}
1946
1947
1948	/* Add operating system specific node to the lookup cache.
1949	This routine is called with the necessary distributed lock held to
1950	guarantee that the lookup cache entry is valid. */
1951	#ifdef CCL
1952	void *
1953	cxiAddOSNode(void dentryP, void vP, DentryOpTableTypes dopTabType, int lookup)
1954	#else
1955	void *
1956	cxiAddOSNode(void dentryP, void vP, int lookup)
1957	#endif
1958	{
1959	struct inode iP = (struct inode )vP;
1960	struct dentry dP = (struct dentry )dentryP;
1961
1962	ENTER(0);
1963	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_ADD_OSNODE,
1964	"cxiAddOSNode: dentry 0x%lX vP 0x%lX unhashed %d",
1965	dentryP, vP, d_unhashed(dP));
1966
1967	/* mark dentry valid */
1968	#ifdef CCL
1969	switch(dopTabType)
1970	{
1971	/* Positive dcache entry for inexact file name match for Samba user.
1972	Only valid for other Samba users.
1973	Not valid for local/NFS users. Forces lookup for local/NFS users. */
1974	case DOpOnlyValidIfSamba:
1975	dP->d_op = &gpfs_dops_valid_if_Samba;
1976	break;
1977	/* Negative dcache entry for exact file name match for local/NFS user.
1978	Only valid for other local/NFS users.
1979	Not valid for Samba users. Forces lookup for Samba users. */
1980	case DOpInvalidIfSamba:
1981	dP->d_op = &gpfs_dops_invalid_if_Samba;
1982	break;
1983	default:
1984	dP->d_op = &gpfs_dops_valid;
1985	break;
1986	}
1987	#else
1988	dP->d_op = &gpfs_dops_valid;
1989	#endif
1990
1991	if (!d_unhashed(dP))
1992	{
1993	/* hook up dentry and inode */
1994	d_instantiate(dP, iP);
1995	dP = NULL;
1996	}
1997	else
1998	{
1999	#if LINUX_KERNEL_VERSION >= 2060000
2000	if (lookup)
2001	{
2002	dP = d_splice_alias(iP, dP);
2003	goto exit;
2004	}
2005	#endif
2006	/* hook up dentry and inode */
2007	d_instantiate(dP, iP);
2008
2009	/* if not yet done so, add to hash list */
2010	d_rehash(dP);
2011
2012	dP = NULL;
2013	}
2014	exit:
2015
2016	EXIT(0);
2017	return dP;
2018	}
2019
2020
2021	#ifdef NFS4_CLUSTER
2022	/* get list of fs locations, return number of locations */
2023	int
2024	gpfs_s_fs_locations(struct super_block sbP, char *bufP)
2025	{
2026	int rc;
2027	int code = 0;
2028	int loc_count;
2029	struct gpfsVfsData_t privVfsP = (struct gpfsVfsData_t )SBLOCK_PRIVATE(sbP);
2030	LOGASSERT(privVfsP != NULL);
2031
2032	ENTER(0);
2033	VFS_STAT_START(fsLocationCall);
2034
2035	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_LOCFS_ENTER,
2036	"gpfs_s_fs_locations enter: sbP 0x%lX\n", sbP);
2037
2038	rc = gpfs_ops.gpfsFsLocations(privVfsP, bufP, &loc_count);
2039	if (rc)
2040	{
2041	rc = -rc;
2042	code = 1;
2043	goto xerror;
2044	}
2045	rc = loc_count;
2046
2047	xerror:
2048	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LOCFS_EXIT,
2049	"gpfs_s_fs_locations exit: sbP 0x%lX code %d rc %d\n",
2050	sbP, code, rc);
2051	VFS_STAT_STOP;
2052	EXIT(0);
2053	return rc;
2054	}
2055	#endif
2056
2057
2058	/* Functions for converting between an NFS file handle and a dentry.
2059	We define our own functions rather than using the generic ones in
2060	fs/nfsd/nfsfh.c so we can revalidate the file inode, since it could have
2061	been changed by another node. */
2062
2063	static struct dentry *
2064	gpfs_nfsd_iget_dentry(struct inode *inode, __u32 generation)
2065	{
2066	struct list_head *lp;
2067	struct dentry *result;
2068
2069	ENTER(0);
2070	TRACE2(TRACE_VNODE, 3, TRCID_NFSD_IGET_DENTRY_1,
2071	"gpfs_nfsd_iget_dentry: inode %d generation %d",
2072	inode->i_ino, generation);
2073
2074	/* Now find a dentry. If possible, get a well-connected one. */
2075	spin_lock(&dcache_lock);
2076	for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next)
2077	{
2078	result = list_entry(lp, struct dentry, d_alias);
2079
2080	if (!(result->d_flags & DCACHE_DFLAGS_DISCONNECTED))
2081	{
2082	dget_locked(result);
2083	#if LINUX_KERNEL_VERSION >= 2060000
2084	result->d_flags \|= DCACHE_REFERENCED;
2085	#else
2086	result->d_vfs_flags \|= DCACHE_REFERENCED;
2087	#endif
2088	spin_unlock(&dcache_lock);
2089
2090	if (result->d_inode != inode)
2091	{
2092	TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_31,
2093	"gpfs_nfsd_iget_dentry:0 dentry flags 0x%x count %d inode 0x%lX "
2094	"time %lu",
2095	result->d_flags, atomic_read(&result->d_count),
2096	result->d_inode, result->d_time);
2097
2098	TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_41,
2099	"gpfs_nfsd_iget_dentry:0 Inode %lu nlink %d count %d gen %u %u "
2100	"state %lu flags 0x%x",
2101	inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
2102	inode->i_generation, generation, inode->i_state, inode->i_flags);
2103
2104	dput(result);
2105	goto build_dentry;
2106	}
2107	if (gpfs_i_revalidate(result))
2108	{
2109	TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_3,
2110	"gpfs_nfsd_iget_dentry:1 dentry flags 0x%x count %d inode 0x%lX "
2111	"time %lu",
2112	result->d_flags, atomic_read(&result->d_count),
2113	result->d_inode, result->d_time);
2114
2115	TRACE7(TRACE_VNODE, 1, TRCID_NFSD_IGET_4,
2116	"gpfs_nfsd_iget_dentry:1 Inode %lu nlink %d count %d gen %u %u "
2117	"state %lu flags 0x%x",
2118	inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
2119	inode->i_generation, generation, inode->i_state, inode->i_flags);
2120
2121	iput(inode);
2122	dput(result);
2123	EXIT(0);
2124	return ERR_PTR(-ESTALE);
2125	}
2126	if (generation &&
2127	generation != 0xffffffff && /* GENNUM_UNKNOWN */
2128	inode->i_generation != generation)
2129	{
2130	/* we didn't find the right inode.. */
2131	TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_5,
2132	"gpfs_nfsd_iget_dentry:2 dentry flags 0x%x count %d inode 0x%lX "
2133	"time %lu",
2134	result->d_flags, atomic_read(&result->d_count),
2135	result->d_inode, result->d_time);
2136
2137	TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_6,
2138	"gpfs_nfsd_iget_dentry:2 Inode %lu nlink %d count %d gen %u %u "
2139	"state %lu flags 0x%x",
2140	inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
2141	inode->i_generation, generation, inode->i_state, inode->i_flags);
2142
2143	iput(inode);
2144	dput(result);
2145	EXIT(0);
2146	return ERR_PTR(-ESTALE);
2147	}
2148	iput(inode);
2149	EXIT(0);
2150	return result;
2151	}
2152	}
2153	spin_unlock(&dcache_lock);
2154
2155	build_dentry:
2156
2157	#if LINUX_KERNEL_VERSION < 2060000
2158	result = d_alloc_root(inode);
2159	#else
2160	result = d_alloc_anon(inode);
2161	#endif
2162	if (result == NULL)
2163	{
2164	iput(inode);
2165	EXIT(0);
2166	return ERR_PTR(-ENOMEM);
2167	}
2168	#if LINUX_KERNEL_VERSION < 2060000
2169	result->d_flags \|= DCACHE_DFLAGS_DISCONNECTED;
2170	#endif
2171
2172	if (gpfs_i_revalidate(result))
2173	{
2174	TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_7,
2175	"gpfs_nfsd_iget:3 dentry flags 0x%x count %d inode 0x%lX time %lu",
2176	result->d_flags, atomic_read(&result->d_count),
2177	result->d_inode, result->d_time);
2178
2179	TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_8,
2180	"gpfs_nfsd_iget:3 Inode %lu nlink %d count %d gen %u %u "
2181	"state %lu flags 0x%x",
2182	inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
2183	inode->i_generation, generation, inode->i_state, inode->i_flags);
2184
2185	/* The dput call here is releases the dcache entry that was
2186	* allocated by to d_alloc_root. It also results in an iput effectively
2187	* removing the hold we place by our iget call above.
2188	*/
2189	dput(result);
2190	EXIT(0);
2191	return ERR_PTR(-ESTALE);
2192	}
2193	if (generation &&
2194	generation != 0xffffffff && /* GENNUM_UNKNOWN */
2195	inode->i_generation != generation)
2196	{
2197	/* we didn't find the right inode.. */
2198	TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_9,
2199	"gpfs_nfsd_iget:4 dentry flags 0x%x count %d inode 0x%lX time %lu",
2200	result->d_flags, atomic_read(&result->d_count),
2201	result->d_inode, result->d_time);
2202
2203	TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_10,
2204	"gpfs_nfsd_iget:4 Inode %lu nlink %d count %d gen %u %u "
2205	"state %lu flags 0x%x",
2206	inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
2207	inode->i_generation, generation, inode->i_state, inode->i_flags);
2208
2209	/* Release the dcache entry. This also does an iput. */
2210	dput(result);
2211	EXIT(0);
2212	return ERR_PTR(-ESTALE);
2213	}
2214	EXIT(0);
2215	return result;
2216	}
2217
2218	static struct dentry *
2219	gpfs_nfsd_iget(struct super_block *sbP, unsigned long ino,
2220	cxiIGetArg_t *argP, __u32 generation)
2221	{
2222	int rc;
2223	struct inode *inode;
2224	struct gpfsVfsData_t *privVfsP;
2225
2226	ENTER(0);
2227	TRACE6(TRACE_VNODE, 3, TRCID_NFSD_IGET_1,
2228	"gpfs_nfsd_iget: sbP 0x%lX extino %d inode %d snapid %d "
2229	"fileset %d generation %d",
2230	sbP, ino, argP->inodeNum, argP->snapId, argP->filesetId,
2231	generation);
2232
2233	/* get the inode */
2234	if (ino == 0)
2235	{
2236	EXIT(0);
2237	return ERR_PTR(-ESTALE);
2238	}
2239
2240	/* Callers have set inodeNum/snapId in argP. vattrP is NULL and
2241	* readInodeCalled is false, but these will be set appropriately in
2242	* gpfsNFSIget after it obtains the attributes.
2243	*/
2244
2245	privVfsP = (struct gpfsVfsData_t *)cxiGetPrivVfsP(sbP);
2246	rc = gpfs_ops.gpfsNFSIget(privVfsP, argP, generation, (void **)&inode);
2247
2248	if (rc)
2249	{
2250	cxiErrorNFS(rc);
2251
2252	EXIT(0);
2253	return ERR_PTR(-rc);
2254	}
2255
2256	if (inode == NULL)
2257	{
2258	EXIT(0);
2259	return ERR_PTR(-ENOMEM);
2260	}
2261
2262	if (is_bad_inode(inode))
2263	{
2264	EXIT(0);
2265	return ERR_PTR(-ESTALE);
2266	}
2267
2268	/* gpfsNFSIget will have called findOrCreateLinux/cxiNewOSNode which
2269	* makes the iget call along with the inodeFindActor validation.
2270	*/
2271
2272	EXIT(0);
2273	return(gpfs_nfsd_iget_dentry(inode,generation));
2274
2275	}
2276
2277	#if LINUX_KERNEL_VERSION >= 2060000
2278	/* export_operations for nfsd communication with our file system
2279	* via gpfs_export_ops
2280	*/
2281
2282	/*
2283	* gpfs_get_dparent: (get_parent) find the parent dentry for a given dentry
2284	*/
2285	struct dentry gpfs_get_dparent(struct dentry child)
2286	{
2287	int rc = 0;
2288	struct dentry *result = NULL;
2289	struct gpfsVfsData_t *privVfsP;
2290	ext_cred_t eCred;
2291	cxiNode_t *dcnP;
2292	cxiIno_t iNum = (cxiIno_t)-1;
2293	cxiNode_t *cnP = NULL;
2294	struct inode *newInodeP = NULL;
2295	struct dentry *retP;
2296
2297	ENTER(0);
2298	VFS_INC(get_parentCall);
2299
2300	TRACE2(TRACE_VNODE, 3, TRCID_GET_DPARENT_ENTER,
2301	"gpfs_get_dparent: dentry 0x%lX inode 0x%d",
2302	child, child->d_inode->i_ino);
2303
2304	dcnP = VP_TO_CNP(child->d_inode);
2305	privVfsP = VP_TO_PVP(child->d_inode);
2306	DBGASSERT(privVfsP != NULL);
2307
2308	setCred(&eCred);
2309
2310	if (!dcnP)
2311	{
2312	/* This can happen due to a bug in linux/fs/dcache.c (prune_dcache)
2313	where "count" entries are to be pruned, but the last one is
2314	found to be recently referenced. When this happens, count is
2315	decremented, but the loop is not terminated. The result is that
2316	it continues to prune entries past where it should (prunes
2317	everything). If our patch for this is not applied, the result
2318	is a kernel failure as the cxiNode is referenced. Checking
2319	here (and revalidate) allows us to reject the call instead. */
2320	PRINTINODE(child->d_inode);
2321	result = (struct dentry *)ERR_PTR(-ESTALE);
2322	goto xerror;
2323	}
2324
2325	rc = gpfs_ops.gpfsLookup(privVfsP, (void *)child->d_inode, dcnP,
2326	NULL, (char *)"..",
2327	(void **)&newInodeP, &cnP, &iNum, NULL,
2328	NULL, &eCred, (void **)&retP);
2329	if (rc == 0)
2330	{
2331	DBGASSERT(cnP != NULL);
2332	DBGASSERT(iNum != -1);
2333	DBGASSERT(newInodeP != NULL);
2334	DBGASSERT(newInodeP->PRVINODE == cnP);
2335	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
2336	result = gpfs_nfsd_iget_dentry(newInodeP, (__u32)newInodeP->i_generation);
2337	}
2338	else
2339	{
2340	cxiErrorNFS(rc);
2341	result = (struct dentry *)ERR_PTR(-rc);
2342	iNum = -1;
2343	}
2344
2345	xerror:
2346
2347	TRACE4(TRACE_VNODE, 3, TRCID_GET_DPARENT_EXIT,
2348	"gpfs_get_dparent dentry 0x%lX inode %d result %lX err%d \n",
2349	child, iNum, result, IS_ERR(result)? PTR_ERR(result): 0);
2350	EXIT(0);
2351	return result;
2352	}
2353
2354	/*
2355	* gpfs_get_dentry: (get_dentry) find dentry for the inode given a file handle
2356	*/
2357	struct dentry gpfs_get_dentry(struct super_block sbP, void * vdata)
2358	{
2359	__u32 *data=vdata;
2360	unsigned long ino;
2361	cxiIGetArg_t arg;
2362	__u32 generation;
2363	struct dentry *result;
2364
2365	ENTER(0);
2366	VFS_INC(get_dentryCall);
2367
2368	ino = data[0];
2369	if (IS_SNAPROOTDIR_EXT_INO(ino))
2370	arg.inodeNum = SNAPROOTDIR_INT_INO;
2371	else if (IS_SNAPLINKDIR_EXT_INO(ino))
2372	arg.inodeNum = data[3];
2373	else
2374	arg.inodeNum = ino;
2375	arg.snapId = data[1];
2376	generation = data[2];
2377	arg.extInodeNum = ino;
2378	arg.filesetId = (unsigned)-1; //FIXME
2379
2380	arg.vattrP = NULL;
2381	arg.readInodeCalled = false;
2382	result = gpfs_nfsd_iget(sbP, ino, &arg, generation);
2383	EXIT(0);
2384	return result;
2385	}
2386
2387	/* It is acceptable to create a disconnected dentry for pNFS since it is used
2388	only for read/write. The check if it was exported is not required since
2389	the call to the MDS will verify that the file is open.
2390	*/
2391	static int gpfs_acceptable(void expv, struct dentry dentry)
2392	{
2393	if (dentry && dentry->d_inode) {
2394	#ifdef GPFS_PRINTK
2395	printk("gpfs_acceptable ino %d\n", dentry->d_inode->i_ino);
2396	#endif
2397	return 1;
2398	}
2399	return 0;
2400	}
2401
2402	/*
2403	* gpfs_decode_fh: (decode_fh) decode a file handle returning ptr to it's dentry
2404	*/
2405	struct dentry *
2406	gpfs_decode_fh(struct super_block sbP, __u32 fh,
2407	int len, int fhtype,
2408	int (acceptable)(void context, struct dentry *de),
2409	void *context)
2410	{
2411	#if LINUX_KERNEL_VERSION == 2060800
2412	int len = *lenP;
2413	#endif
2414	struct dentry *result;
2415	__u32 parent[4]={0};
2416
2417	ENTER(0);
2418	VFS_INC(decode_fhCall);
2419
2420	#ifdef GPFS_PRINTK
2421	printk("gpfs_decode_fh %08x %08x %08x %08x %08x %08x %08x\n",
2422	fh[0],fh[1],fh[2],fh[3],fh[4],fh[5],fh[6]);
2423	#endif
2424	TRACE4(TRACE_VNODE, 3, TRCID_DECODE_FH_1,
2425	"gpfs_decode_fh: sbP 0x%lX fh 0x%lX, len %d type %d",
2426	sbP, fh, len, fhtype);
2427	if (fhtype > 4 && fhtype < 8 && len >= 5)
2428	{
2429	parent[0]=fh[3]; /* ino */
2430	parent[1]=fh[4]; /* p_sid */
2431	if (len>5)
2432	{
2433	parent[2]=fh[5]; /* generation */
2434	parent[3]=fh[3]; /* ino */
2435	}
2436
2437	if (cxiIsLockdThread() // check for lockd thread
2438	#ifdef P_NFS4
2439	\|\| fhtype == 7 // it is a pNFS fh, disconnected fh is acceptable.
2440	#endif
2441	)
2442	result = sbP->s_export_op->find_exported_dentry(sbP, fh, parent,
2443	gpfs_acceptable, context);
2444	else
2445	result = sbP->s_export_op->find_exported_dentry(sbP, fh, parent,
2446	acceptable, context);
2447	TRACE4(TRACE_VNODE, 3, TRCID_DECODE_FH_2,
2448	"gpfs_decode_fh: sbP 0x%lX fh 0x%lX result %lX err %d",
2449	sbP, fh, result, IS_ERR(result)? PTR_ERR(result): 0);
2450	#if LINUX_KERNEL_VERSION == 2060800
2451	*lenP = 0;
2452	#endif
2453	if (IS_ERR(result))
2454	cxiErrorNFS(PTR_ERR(result));
2455
2456	EXIT(0);
2457	return result;
2458	}
2459
2460	TRACE2(TRACE_VNODE, 3, TRCID_DECODE_FH_3,
2461	"gpfs_decode_fh: sbP 0x%lX fh 0x%lX -EINVAL",
2462	sbP, fh);
2463	EXIT(0);
2464	return ERR_PTR(-EINVAL);
2465	}
2466
2467	/*
2468	* gpfs_encode_fh: (encode_fh) encode a file handle from the given dentry
2469	*/
2470	int
2471	gpfs_encode_fh(struct dentry dentry, __u32 fh, int *lenp,
2472	int need_parent)
2473	{
2474	UInt32 d_sid, p_sid;
2475
2476	ENTER(0);
2477	VFS_INC(encode_fhCall);
2478
2479	if (*lenp < 5)
2480	{
2481	EXIT(0);
2482	return 255;
2483	}
2484
2485	if (gpfs_ops.gpfsGetSnapIdPair(VP_TO_CNP(dentry->d_inode),
2486	&d_sid, &p_sid) != 0)
2487	{
2488	EXIT(0);
2489	return 255;
2490	}
2491
2492	fh[0] = (__u32) dentry->d_inode->i_ino;
2493	fh[1] = d_sid;
2494	fh[2] = (__u32) dentry->d_inode->i_generation;
2495	fh[3] = (__u32) dentry->d_parent->d_inode->i_ino;
2496	fh[4] = p_sid;
2497	if (*lenp > 5)
2498	{
2499	/* There was enough room to compelete parent */
2500	fh[5] = (__u32) dentry->d_parent->d_inode->i_generation;
2501	*lenp = 6;
2502	}
2503	else
2504	*lenp = 5;
2505
2506	EXIT(0);
2507	return *lenp;
2508	}
2509	#else
2510	struct dentry gpfs_fh_to_dentry(struct super_block sbP, __u32 *fh,
2511	int len, int fhtype, int parent)
2512	{
2513	unsigned long ino;
2514	cxiIGetArg_t arg;
2515	__u32 generation;
2516	struct dentry *result;
2517
2518	ENTER(0);
2519	TRACE5(TRACE_VNODE, 3, TRCID_FH_TO_DENTRY_1,
2520	"gpfs_fh_to_dentry: sbP 0x%lX fh 0x%lX, len %d type %d parent %d",
2521	sbP, fh, len, fhtype, parent);
2522
2523	if (fhtype == 3 && len >= 5)
2524	{
2525	if (parent)
2526	{
2527	ino = fh[3];
2528	if (IS_SNAPROOTDIR_EXT_INO(ino))
2529	arg.inodeNum = SNAPROOTDIR_INT_INO;
2530	else if (IS_SNAPLINKDIR_EXT_INO(ino))
2531	arg.inodeNum = IS_SNAPROOTDIR_EXT_INO(fh[0]) ?
2532	SNAPROOTDIR_INT_INO : fh[0];
2533	else
2534	arg.inodeNum = ino;
2535	arg.snapId = fh[4];
2536	generation = 0xffffffff; /* GENNUM_UNKNOWN */
2537	}
2538	else
2539	{
2540	ino = fh[0];
2541	if (IS_SNAPROOTDIR_EXT_INO(ino))
2542	arg.inodeNum = SNAPROOTDIR_INT_INO;
2543	else if (IS_SNAPLINKDIR_EXT_INO(ino))
2544	arg.inodeNum = fh[3];
2545	else
2546	arg.inodeNum = ino;
2547	arg.snapId = fh[1];
2548	generation = fh[2];
2549	}
2550	arg.filesetId = (unsigned)-1; // FIXME
2551	arg.vattrP = NULL;
2552	arg.readInodeCalled = false;
2553	result = gpfs_nfsd_iget(sbP, ino, &arg, generation);
2554
2555	TRACE4(TRACE_VNODE, 3, TRCID_FH_TO_DENTRY_2,
2556	"gpfs_fh_to_dentry: sbP 0x%lX fh 0x%lX result %lX err %d",
2557	sbP, fh, result, IS_ERR(result)? PTR_ERR(result): 0);
2558
2559	EXIT(0);
2560	return result;
2561	}
2562
2563	TRACE2(TRACE_VNODE, 3, TRCID_FH_TO_DENTRY_3,
2564	"gpfs_fh_to_dentry: sbP 0x%lX fh 0x%lX -EINVAL",
2565	sbP, fh);
2566
2567	EXIT(0);
2568	return ERR_PTR(-EINVAL);
2569	}
2570
2571	int gpfs_dentry_to_fh(struct dentry dentry, __u32 fh, int *lenp,
2572	int need_parent)
2573	{
2574	UInt32 d_sid, p_sid;
2575
2576	if (*lenp < 5)
2577	return 255;
2578
2579	ENTER(0);
2580	if (gpfs_ops.gpfsGetSnapIdPair(VP_TO_CNP(dentry->d_inode),
2581	&d_sid, &p_sid) != 0)
2582	{
2583	EXIT(0);
2584	return 255;
2585	}
2586	fh[0] = (__u32) dentry->d_inode->i_ino;
2587	fh[1] = d_sid;
2588	fh[2] = (__u32) dentry->d_inode->i_generation;
2589	fh[3] = (__u32) dentry->d_parent->d_inode->i_ino;
2590	fh[4] = p_sid;
2591
2592	*lenp = 5;
2593	EXIT(0);
2594	return 3;
2595	}
2596	#endif
2597
2598	void
2599	printSuper(struct super_block *sbP)
2600	{
2601	if (!_TRACE_IS_ON(TRACE_VNODE, 3))
2602	return;
2603
2604	/* private field won't make much sense for non-GPFS file systems */
2605	TRACE4N(TRACE_VNODE, 3, TRCID_PRINTSUPER_1,
2606	"printSuper: sbP 0x%lX magic 0x%lX type 0x%lX private 0x%lX\n",
2607	sbP, sbP->s_magic, sbP->s_type, SBLOCK_PRIVATE(sbP));
2608
2609	TRACE3N(TRACE_VNODE, 3, TRCID_PRINTSUPER_3,
2610	"printSuper: s_dev 0x%X count 0x%X active %d\n",
2611	sbP->s_dev, sbP->s_count, atomic_read(&sbP->s_active));
2612	}
2613
2614	void
2615	printSuperList(struct super_block *sbP)
2616	{
2617	struct list_head *lP;
2618	struct super_block *sP;
2619
2620	if (!_TRACE_IS_ON(TRACE_VNODE, 5))
2621	return;
2622
2623	/* Run through all super blocks starting from provided GPFS super block. */
2624	/* Ideally we would lock sb_lock, but we can't access it,
2625	so small probability of this breaking, which is why it is at
2626	a higher trace level (vnode 5). */
2627	TRACE0N(TRACE_VNODE, 5, TRCID_PRINTALLSUPER_1,
2628	"printSuperList:\n");
2629	printSuper(sbP);
2630	list_for_each(lP, &sbP->s_list)
2631	{
2632	sP = sb_entry(lP);
2633	printSuper(sP);
2634	}
2635	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: gpfs_3.1_ker2.6.20/lpp/mmfs/src/gpl-linux/super.c @ 65

Download in other formats: