Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

super.c @ 145

Last change on this file since 145 was 16, checked in by rock, 17 years ago

File size: 71.0 KB

Rev	Line
[16]	1	/***************************************************************************
	2	*
	3	* Copyright (C) 2001 International Business Machines
	4	* All rights reserved.
	5	*
	6	* This file is part of the GPFS mmfslinux kernel module.
	7	*
	8	* Redistribution and use in source and binary forms, with or without
	9	* modification, are permitted provided that the following conditions
	10	* are met:
	11	*
	12	* 1. Redistributions of source code must retain the above copyright notice,
	13	* this list of conditions and the following disclaimer.
	14	* 2. Redistributions in binary form must reproduce the above copyright
	15	* notice, this list of conditions and the following disclaimer in the
	16	* documentation and/or other materials provided with the distribution.
	17	* 3. The name of the author may not be used to endorse or promote products
	18	* derived from this software without specific prior written
	19	* permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	25	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	26	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
	27	* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
	28	* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
	29	* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
	30	* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	31	*
	32	*************************************************************************** */
	33	/* @(#)24 1.157.1.8 src/avs/fs/mmfs/ts/kernext/gpl-linux/super.c, mmfs, avs_rgpfs24, rgpfs24s011a 4/5/07 11:15:55 */
	34	/*
	35	* Superblock operations
	36	*
	37	* Contents:
	38	* TraceBKL
	39	* gpfs_s_read_inode2
	40	* gpfs_s_read_inode
	41	* gpfs_s_delete_inode
	42	* gpfs_s_notify_change
	43	* gpfs_s_put_super
	44	* gpfs_s_statfs
	45	* gpfs_s_umount_begin
	46	* gpfs_s_remount
	47	* gpfs_s_write_inode
	48	* gpfs_s_clear_inode
	49	* gpfs_s_write_super
	50	* gpfs_s_fs_locations
	51	* gpfs_fill_super
	52	* gpfs_reg_fs
	53	* gpfs_unreg_fs
	54	* kill_mmfsd
	55	* get_myinode
	56	* exec_mmfs
	57	* fork_mount_helper
	58	* vfsUserCleanup
	59	* cxiSetMountInfo
	60	* cxiUnmount
	61	* cxiReactivateOSNode
	62	* cxiNewOSNode
	63	* cxiFreeOSNode
	64	* cxiDeleteMmap
	65	* cxiReinitOSNode
	66	* cxiFindOSNode
	67	* cxiDumpOSNode
	68	* cxiRefOSNode
	69	* cxiInactiveOSNode
	70	* cxiPutOSNode
	71	* cxiDestroyOSNode
	72	* cxiSetOSNodeType
	73	* cxiUpdateInode
	74	* cxiCanUncacheOSNode
	75	* cxiAddOSNode
	76	*
	77	*/
	78
	79	#include <Shark-gpl.h>
	80
	81	#include <linux/string.h>
	82	#include <linux/module.h>
	83	#include <linux/errno.h>
	84	#include <linux/fs.h>
	85	#include <linux/smp_lock.h>
	86
	87	#ifndef GPFS_ARCH_X86_64
	88	#define __KERNEL_SYSCALLS__
	89	#endif
	90	#include <linux/unistd.h>
	91	#include <asm/uaccess.h> /* KERNEL_DS */
	92
	93	#define FOOBAR #error Do not do this
	94
	95	/* GPFS headers */
	96	#include <verdep.h>
	97	#include <linux2gpfs.h>
	98	#include <cxiSystem.h>
	99	#include <cxiTypes.h>
	100	#include <cxiAtomic.h>
	101	#include <cxi2gpfs.h>
	102	#include <cxiIOBuffer.h>
	103	#include <cxiSharedSeg.h>
	104	#include <cxiCred.h>
	105	#include <linux2gpfs.h>
	106	#include <Trace.h>
	107	#include <cxiVFSStats.h>
	108	#include <linux/kmod.h>
	109	#if LINUX_KERNEL_VERSION > 2060000
	110	#include <linux/wait.h>
	111	#endif
	112
	113	/* forward declaration */
	114	int vfsUserCleanup(struct super_block *sbP,
	115	struct gpfsVfsData_t *privVfsP, Boolean force);
	116
	117	extern struct file_system_type gpfs_fs_type;
	118
	119	static DECLARE_WAIT_QUEUE_HEAD(pwq);
	120
	121	int mmfsd_module_active = 0;
	122	static int mmfsd_id = -1;
	123	static int mount_id = -1;
	124	char mountCmd[CXI_MAXPATHLEN+1] = "M ";
	125	char mmfs_path[CXI_MAXPATHLEN+1] = "";
	126	char bin_path[CXI_MAXPATHLEN+1];
	127	static char mount_opt[CXI_MAXPATHLEN+1];
	128
	129	static unsigned int unusedInodeNum = 1;
	130	static struct inode *unusedInodeP = NULL;
	131	static struct super_block *unusedSuperP = NULL;
	132	struct super_block *shutdownSuperP = NULL;
	133
	134	static spinlock_t inode_lock;
	135
	136	/* Routine to trace whether kernel lock is held */
	137	#ifdef VERBOSETRACE
	138	void TraceBKL()
	139	{
	140	TRACE2(TRACE_VNODE, 10, TRCID_VNODE_BKL,
	141	"BKL %d lock_depth %d\n", kernel_locked(), current->lock_depth);
	142	}
	143	#endif
	144
	145	#include <linux/pagemap.h>
	146
	147	#if HAS_SOP_ALLOC_INODE
	148	static struct kmem_cache * gpfsInodeCacheP;
	149	struct gpfs_bloated_inode
	150	{
	151	struct inode inode;
	152	char cxiNode[CXINODE_SIZE];
	153	};
	154
	155	static void
	156	gpfs_init_once(void * iP, struct kmem_cache * cacheP, unsigned long flags)
	157	{
	158	if ((flags & (SLAB_CTOR_VERIFY\|SLAB_CTOR_CONSTRUCTOR)) ==
	159	SLAB_CTOR_CONSTRUCTOR)
	160	inode_init_once((struct inode *)iP);
	161	}
	162
	163	int
	164	gpfs_init_inodecache(void)
	165	{
	166	gpfsInodeCacheP = kmem_cache_create("gpfsInodeCache",
	167	sizeof(struct gpfs_bloated_inode), 0,
	168	SLAB_HWCACHE_ALIGN\|SLAB_RECLAIM_ACCOUNT,
	169	gpfs_init_once, NULL);
	170	if (gpfsInodeCacheP == NULL)
	171	return -ENOMEM;
	172	return 0;
	173	}
	174
	175	struct inode *
	176	gpfs_alloc_inode(struct super_block *sbP)
	177	{
	178	struct inode * iP;
	179
	180	iP = (struct inode *)kmem_cache_alloc(gpfsInodeCacheP, GFP_KERNEL);
	181	TRACE1N(TRACE_VNODE, 1, TRCID_LINUXOPS_GPFS_ALLOC_INODE_EXIT,
	182	"gpfs_alloc_inode: inode 0x%lX\n", iP);
	183	return iP;
	184	}
	185
	186	void
	187	gpfs_destroy_inode(struct inode *iP)
	188	{
	189	TRACE1N(TRACE_VNODE, 1, TRCID_LINUXOPS_GPFS_DESTROY_INODE,
	190	"gpfs_destroy_inode: inode 0x%lX\n", (void *)iP);
	191	kmem_cache_free(gpfsInodeCacheP, (void *)iP);
	192	}
	193
	194	void
	195	gpfs_destroy_inodecache(void)
	196	{
	197	while (kmem_cache_shrink(gpfsInodeCacheP) != 0)
	198	cxiSleep(40);
	199	kmem_cache_destroy(gpfsInodeCacheP);
	200	}
	201
	202	#endif /* HAS_SOP_ALLOC_INODE */
	203
	204	/* This routine is called from iget() just after allocating a new inode.
	205	This is a variant of the normal read_inode operation that allows passing an
	206	opaque parameter through iget4 into read_inode2. We need the parameter to
	207	know whether read_inode2 is being called from a normal lookup opration,
	208	where we are already holding a distributed lock on the file, or from nfs
	209	calling iget, where we need to get the lock inside of read_inode2.
	210
	211	Note: In the Linux source the call to read_inode2 is labelled a "reiserfs
	212	specific hack" with the additional warning "We don't want this to last, and
	213	are looking for VFS changes that will allow us to get rid of it." If and
	214	when such a change is made, we will hopefully be able to adapt our code
	215	accordingly. Otherwise, if read_inode2 goes away without a suitable
	216	replacement, we will have to use a more expensive approach, e.g., a global
	217	table where lookup would leave some state before calling iget. */
	218	void
	219	gpfs_s_read_inode2(struct inode iP, void opaque)
	220	{
	221	struct gpfsVfsData_t *privVfsP;
	222	ino_t inum = iP->i_ino;
	223	cxiNode_t *cnP;
	224	int rc;
	225
	226	ENTER(0);
	227	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_ENTER,
	228	"gpfs_s_read_inode2 enter: inode 0x%lX inode %d\n",
	229	iP, inum);
	230	/* BKL is sometimes held at entry */
	231
	232	#if HAS_SOP_ALLOC_INODE
	233	cnP = (cxiNode_t )&((struct gpfs_bloated_inode )iP)->cxiNode;
	234	#else
	235	/* allocate cxiNode_t */
	236	if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
	237	{
	238	/* need to allocate separate storage for the cxiNode_t */
	239	cnP = (cxiNode_t *)cxiMallocUnpinned(CXINODE_SIZE);
	240	if (cnP == NULL)
	241	goto exit_bad;
	242	}
	243	else
	244	{
	245	/* we can store the cxiNode_t in the part of the iP->u
	246	* union after the PRVINODE field
	247	*/
	248	cnP = (cxiNode_t *)(&iP->PRVINODE + 1);
	249	}
	250	#endif
	251
	252	memset(cnP, 0, CXINODE_SIZE);
	253
	254	/*TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE_1,
	255	"gpfs_s_read_inode2: iP 0x%lX cnP 0x%lX uSize-void* %d nodeSize %d",
	256	iP, cnP, sizeof(iP->PRVINODE) - sizeof(void *), CXINODE_SIZE);
	257
	258	*/
	259	/* connect cxiNode_t to struct inode */
	260	cnP->osNodeP = iP;
	261	iP->PRVINODE = cnP;
	262
	263	/* get inode attributes */
	264	privVfsP = VP_TO_PVP(iP);
	265	rc = gpfs_ops.gpfsInodeRead(privVfsP, cnP, inum, opaque);
	266
	267	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_EXIT,
	268	"gpfs_s_read_inode2 exit: inode 0x%lX rc %d",
	269	iP, rc);
	270
	271	if (rc == 0)
	272	{
	273	EXIT(0);
	274	return; // success!
	275	}
	276
	277	/* undo cxiNode_t allocation */
	278	cnP->osNodeP = NULL;
	279	iP->PRVINODE = NULL;
	280
	281	#if !HAS_SOP_ALLOC_INODE
	282	if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
	283	cxiFreeUnpinned(cnP);
	284	#endif
	285
	286	exit_bad:
	287	/* make_bad_inode will initialize iP so that all operations return EIO;
	288	also set i_nlink to zero so that the bad inode will be thrown out of
	289	the cache at the next opportunity */
	290	make_bad_inode(iP);
	291	iP->i_nlink = 0;
	292	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_EXIT_BAD,
	293	"gpfs_s_read_inode2 exit: inode 0x%lX rc BADINODE",
	294	iP);
	295
	296	if (rc)
	297	cxiErrorNFS(rc);
	298
	299	EXIT(0);
	300	}
	301
	302	/* The following routine should never be called, since we have a read_inode2
	303	operation. However, knfsd checks the operation table and refuses to export
	304	a file system if its read_inode operation ptr is NULL. Hence, we need to
	305	have one, even if it never gets called. */
	306	void
	307	gpfs_s_read_inode(struct inode *iP)
	308	{
	309	/* only iget will use read_inode; this shouldn't happen as long as
	310	gpfs_nfsd_iget is being invoked via fh_to_dentry/gpfs_fh_to_dentry */
	311	ENTER(0);
	312	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_HUH,
	313	"gpfs_s_read_inode: ? calling make_bad_inode");
	314	make_bad_inode(iP);
	315	EXIT(0);
	316	}
	317
	318
	319	/* The following routine is called from iput when the i_count goes to zero and
	320	the link count in the inode is zero, which presumably means that the file
	321	was deleted. If so, we should free the disk space occupied by the file. */
	322	void
	323	gpfs_s_delete_inode(struct inode *iP)
	324	{
	325	cxiNode_t *cnP;
	326	ext_cred_t eCred;
	327	Boolean isGPFS = cxiIsGPFSThread();
	328	struct gpfsVfsData_t *privVfsP;
	329
	330	ENTER(0);
	331	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_DELETE_INODE,
	332	"gpfs_s_delete_inode enter: inode 0x%lX inode %d gpfsThread %d\n",
	333	iP, iP->i_ino, isGPFS);
	334	TraceBKL();
	335
	336	#if MUST_TRUNCATE_INODE_PAGES
	337	truncate_inode_pages(&iP->i_data, 0);
	338	#endif
	339
	340	cnP = VP_TO_CNP(iP);
	341
	342	if (!cnP)
	343	{
	344	/* The cxiNode_t is allocated in gpfs_s_read_inode2, so if cnP is NULL,
	345	this means gpfs_s_read_inode2 failed and has marked this as a bad
	346	inode. No further actions necessary in this case. */
	347	goto xerror;
	348	}
	349
	350	if (TestCtFlag(cnP, destroyIfDelInode))
	351	{
	352	privVfsP = VP_TO_PVP(iP);
	353	DBGASSERT(privVfsP != NULL);
	354
	355	/* ?? "eCred is passed all the way to the daemon, and then is ignored
	356	there," FBS 5/24/01 */
	357	setCred(&eCred);
	358
	359	gpfs_ops.gpfsInodeDelete(privVfsP, cnP, isGPFS, &eCred);
	360
	361	iP->PRVINODE = NULL;
	362	cnP->osNodeP = NULL;
	363
	364	#if !HAS_SOP_ALLOC_INODE
	365	/* If necessary, free the cxiNode_t structure which was allocated
	366	* in gpfs_s_read_inode2.
	367	*/
	368	if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
	369	cxiFreeUnpinned(cnP);
	370	#endif
	371	}
	372
	373	xerror:
	374	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_DELETE_INODE_1,
	375	"gpfs_s_delete_inode exit: inode 0x%lX cnP 0x%lX\n",
	376	iP, cnP);
	377
	378	clear_inode(iP);
	379	EXIT(0);
	380	}
	381
	382	int
	383	gpfs_s_notify_change(struct dentry dentryP, struct iattr attrP)
	384	{
	385	int rc;
	386
	387	ENTER(0);
	388	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_NOTIFY_ENTER,
	389	"gpfs_s_notify_change enter: inode 0x%lX attr 0x%lX\n",
	390	dentryP->d_inode, attrP);
	391	TraceBKL();
	392
	393	rc = gpfs_i_setattr_internal(dentryP->d_inode, attrP);
	394
	395	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_NOTIFY_EXIT,
	396	"gpfs_s_notify_change exit: inode 0x%lX rc %d\n",
	397	dentryP->d_inode, rc);
	398	EXIT(0);
	399	if (rc)
	400	return (-rc);
	401	return rc;
	402	}
	403
	404	/* put_super is called just before the super_block is freed in do_unmount */
	405	void
	406	gpfs_s_put_super(struct super_block *sbP)
	407	{
	408	int rc = 0;
	409	struct gpfsVfsData_t *privVfsP;
	410
	411	ENTER(0);
	412	LOGASSERT(sbP != NULL);
	413	LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
	414	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_PUTSUPER_ENTER,
	415	"gpfs_s_put_super enter: sbP 0x%lX sbP->s_dev 0x%X\n",
	416	sbP, sbP->s_dev);
	417	TraceBKL();
	418
	419	rc = cxiUnmount(sbP, false, true);
	420
	421	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_PUTSUPER_EXIT,
	422	"gpfs_s_put_super exit: rc %d\n", rc);
	423
	424	EXIT(0);
	425	}
	426
	427	int
	428	gpfs_s_statfs(struct dentry den, struct KSTATFS bufP)
	429	{
	430	struct super_block *sbP = den->d_sb;
	431	int rc;
	432	int code = 0;
	433	int len = sizeof(struct KSTATFS);
	434	struct gpfsVfsData_t privVfsP = (struct gpfsVfsData_t )SBLOCK_PRIVATE(sbP);
	435	cxiStatfs_t statfs;
	436
	437	VFS_STAT_START(statfsCall);
	438	ENTER(0);
	439	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_STATFS_ENTER,
	440	"gpfs_s_statfs enter: sbP 0x%lX len %d\n", sbP, len);
	441	memset(bufP, 0, len);
	442	/* BKL is held at entry */
	443
	444	LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
	445	LOGASSERT(privVfsP != NULL);
	446	rc = gpfs_ops.gpfsStatfs(privVfsP, &statfs);
	447	if (rc)
	448	{
	449	rc = -rc;
	450	code = 1;
	451	goto xerror;
	452	}
	453
	454	bufP->f_type = GPFS_SUPER_MAGIC;
	455	bufP->f_bsize = statfs.f_bsize;
	456	bufP->f_blocks = statfs.f_blocks;
	457	bufP->f_bfree = statfs.f_bfree;
	458	bufP->f_bavail = statfs.f_bavail;
	459	bufP->f_files = statfs.f_files;
	460	bufP->f_ffree = statfs.f_ffree;
	461	bufP->f_namelen = statfs.f_name_max;
	462	bufP->f_fsid.val[0] = statfs.f_fsid.val[0];
	463	bufP->f_fsid.val[1] = statfs.f_fsid.val[1];
	464
	465	/* If filesystem size cannot be represented by the OS statfs structure,
	466	increase the "block size" and reduce the numbers */
	467	if (sizeof(bufP->f_blocks) < sizeof(statfs.f_blocks))
	468	{
	469	while (bufP->f_blocks != statfs.f_blocks)
	470	{
	471	statfs.f_bsize <<= 1; // double f_bsize
	472	statfs.f_blocks >>= 1; // halve the rest
	473	statfs.f_bfree >>= 1;
	474	statfs.f_bavail >>= 1;
	475	bufP->f_bsize = statfs.f_bsize;
	476	bufP->f_blocks = statfs.f_blocks;
	477	bufP->f_bfree = statfs.f_bfree;
	478	bufP->f_bavail = statfs.f_bavail;
	479	}
	480	}
	481
	482	xerror:
	483	TRACE7(TRACE_VNODE, 1, TRCID_LINUXOPS_STATFS_EXIT,
	484	"gpfs_s_statfs exit: f_blocks %lld f_bfree %lld f_files %d f_free %d "
	485	"f_bsize %d code %d rc %d\n",
	486	statfs.f_blocks, statfs.f_bfree, bufP->f_files, bufP->f_ffree,
	487	bufP->f_bsize, code, rc);
	488
	489	if (rc)
	490	cxiErrorNFS(rc);
	491
	492	VFS_STAT_STOP;
	493	EXIT(0);
	494	return rc;
	495	}
	496
	497	/* umount_begin is called only when the force option is used */
	498	void
	499	#if LINUX_KERNEL_VERSION >= 2061700
	500	gpfs_s_umount_begin(struct vfsmount *vfs, int flags)
	501	#else
	502	gpfs_s_umount_begin(struct super_block * sbP)
	503	#endif
	504	{
	505	int dmrc = 0;
	506	struct gpfsVfsData_t *privVfsP;
	507	#if LINUX_KERNEL_VERSION >= 2061700
	508	struct super_block * sbP;
	509	LOGASSERT(vfs != NULL);
	510	LOGASSERT(vfs->mnt_sb != NULL);
	511	sbP = vfs->mnt_sb;
	512	#endif
	513
	514	ENTER(0);
	515	LOGASSERT(sbP != NULL);
	516	LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
	517	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_UMOUNT_ENTER,
	518	"gpfs_s_umount_begin enter: sbP 0x%lX sbP->s_dev 0x%X "
	519	"root vfsmount 0x%X pwd vfsmount 0x%X\n", sbP, sbP->s_dev,
	520	current->fs ? current->fs->rootmnt : NULL,
	521	current->fs ? current->fs->pwdmnt : NULL);
	522	TraceBKL();
	523
	524	privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
	525
	526	/* We may need to generate a preunmount DMAPI event, since this
	527	* is a user initiated force unmount and we need to inform any
	528	* DM application before we start flushing out VFS users.
	529	*/
	530	if (privVfsP)
	531	{
	532	#ifdef DMAPI
	533	Boolean doDMEvents = false;
	534	struct dentry *dP = NULL;
	535	struct inode *iP = NULL;
	536	cxiNode_t *cnP = NULL;
	537
	538	dP = sbP->s_root;
	539	if (dP != NULL)
	540	iP = dP->d_inode;
	541	if (iP != NULL)
	542	cnP = VP_TO_CNP(iP);
	543
	544	/* Generate preunmount event. We have to present this because
	545	* vfsUserCleanup() may potentially kill processes on forced unmount.
	546	* Since the DM application may have an open file in this file system
	547	* we have to warn him. The DM application may not however receive
	548	* the final unmount event if we can't get everything released. If
	549	* VFS users still exist after this, then no mntput() and subsequent
	550	* gpfs_s_put_super() will occur.
	551	*/
	552	dmrc = gpfs_ops.gpfsDmUnmountEvent(true, true, privVfsP, cnP,
	553	&doDMEvents, NULL, NULL, NULL, 0);
	554	#endif
	555
	556	/* Force unmount */
	557	vfsUserCleanup(sbP, privVfsP, true);
	558
	559	if (sbP->s_root)
	560	printDentryTree(sbP->s_root, 10);
	561	}
	562
	563	exit:
	564	TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_UMOUNT_EXIT,
	565	"gpfs_s_umount_begin exit: sbP 0x%lX privVfsP 0x%lX dmrc %d "
	566	"s_active %d s_count 0x%X active files %d\n",
	567	sbP, privVfsP, dmrc, atomic_read(&sbP->s_active),
	568	sbP->s_count, !list_empty(&sbP->s_files));
	569
	570	/* Module count is decremented later on in do_unmount via gpfs_s_put_super */
	571	EXIT(0);
	572	}
	573
	574	int
	575	gpfs_s_remount(struct super_block sbP, int flags, char *data)
	576	{
	577	ENTER(0);
	578	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_REMOUNT,
	579	"gpfs_s_remount: called\n");
	580	TraceBKL();
	581	EXIT(0);
	582	return 0;
	583	}
	584
	585	void
	586	gpfs_s_write_inode(struct inode *inode)
	587	{
	588	ENTER(0);
	589	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_WRITEINODE,
	590	"gpfs_s_write_inode: called\n");
	591	TraceBKL();
	592	EXIT(0);
	593	}
	594
	595
	596	/* This routine is called from iput() just before the storage of the
	597	Linux inode is freed */
	598	void
	599	gpfs_s_clear_inode(struct inode *iP)
	600	{
	601	int code = 0;
	602	struct gpfsVfsData_t *privVfsP;
	603	cxiNode_t *cnP;
	604
	605	ENTER(0);
	606	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_CLEARINODE,
	607	"gpfs_s_clear_inode enter: inode 0x%lX inode %d generic_ip 0x%lX\n",
	608	iP, iP->i_ino, iP->PRVINODE);
	609	TRACE3(TRACE_VNODE, 5, TRCID_LINUXOPS_CLEARINODE_DETAILS,
	610	"gpfs_s_clear_inode: cnP 0x%lX privVfsP 0x%lX tooBig %d\n",
	611	VP_TO_CNP(iP), VP_TO_PVP(iP),
	612	NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE));
	613
	614	DBGASSERT(atomic_read((atomic_t *)&iP->i_count) == 0);
	615
	616	cnP = VP_TO_CNP(iP);
	617	privVfsP = VP_TO_PVP(iP);
	618
	619	if (cnP)
	620	{
	621	if (privVfsP)
	622	gpfs_ops.gpfsRele(privVfsP, cnP, (void *)iP, vnOp);
	623
	624	/* if necessary, free the cxiNode_t storage that we allocated in
	625	gpfs_s_read_inode2 */
	626	if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
	627	{
	628	iP->PRVINODE = NULL;
	629	cxiFreeUnpinned(cnP);
	630	}
	631	}
	632
	633	xerror:
	634	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_CLEARINODE_EXIT,
	635	"gpfs_s_clear_inode exit: inode 0x%lX generic_ip 0x%lX code %d\n",
	636	iP, iP->PRVINODE, code);
	637	EXIT(0);
	638	}
	639
	640	void
	641	gpfs_s_write_super(struct super_block * sbP)
	642	{
	643	int rc = 0;
	644	struct gpfsVfsData_t *privVfsP;
	645
	646	ENTER(0);
	647	LOGASSERT(sbP != NULL);
	648	LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
	649	privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
	650	LOGASSERT(privVfsP != NULL);
	651	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_WRITESUPER,
	652	"gpfs_s_write_super enter: sbP 0x%lX\n", sbP);
	653
	654	/* We have to either adhere to the s_dirt semantics or
	655	* ignore all syncs. Once a file systems write_super gets
	656	* called, sync_supers() restarts the super block scan. If
	657	* we don't turn off s_dirt then sync_supers() will be caught
	658	* in a loop. Alternatively if we only ignored kupdated then
	659	*
	660	* 1) a person could write to a file (which turns on s_dirt)
	661	* 2) kupdated could run (and be ignored) but the s_dirt is turned off
	662	* 3) the user attempts a sync from the command line sync, but that
	663	* does nothing since s_dirt was off
	664	* 4) the user expected the sync to have done something before he
	665	* halts the machine.
	666	*/
	667	sbP->s_dirt = 0;
	668
	669	/*
	670	* jcw: Another way to handle this would be never turn on the s_dirt flag,
	671	* and not to even have a write_super callback. Then neither kupdated nor
	672	* sync would do anything. The sync watchdog in the GPFS daemon would
	673	* substitute for kupdated. To regain the semantics of sync, we would
	674	* create dummy inodes that would have I_DIRTY set, and link one such inode
	675	* onto each GPFS superblock. Then sync would notice the dirty inodes
	676	* and call back through their write_inode callbacks. This would be
	677	* the only use of I_DIRTY by GPFS, so it could be reinterpreted to mean
	678	* "sync this file system". For now, s_dirt is still set and reset, but
	679	* s_dirt gets reset for all file systems before they have all been synced,
	680	* so the race described above can occur. The permanently-dirty inode
	681	* needs to be implemented to fix this.
	682	*/
	683	/* goto xerror; */
	684
	685	/* BKL is held at entry */
	686	TRACE0(TRACE_VNODE, 3, TRCID_LINUXOPS_WRITESUPER_3,
	687	"gpfs_s_write_super: performing sync");
	688
	689	rc = gpfs_ops.gpfsSyncfs(privVfsP);
	690	if (rc) {
	691	cxiErrorNFS(rc);
	692	rc = -rc;
	693	}
	694	xerror:
	695	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_WRITESUPER_5,
	696	"gpfs_s_write_super exit: sbP 0x%lX rc %d\n", sbP, rc);
	697	EXIT(0);
	698	}
	699
	700
	701
	702	#if LINUX_KERNEL_VERSION >= 2060000
	703	int gpfs_get_sb(struct file_system_type *fsTypeP,
	704	int flags, const char devNameP, void dataP, struct vfsmount *mnt)
	705	{
	706	struct super_block *sbP;
	707	int sb_ret = 0;
	708
	709	ENTER(0);
	710	sb_ret = get_sb_nodev(fsTypeP, flags, dataP, gpfs_fill_super, mnt);
	711
	712	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_GET_SB,
	713	"gpfs_get_sb: flags 0x%X dataP 0x%X sbP %d\n",
	714	flags, dataP, sb_ret);
	715
	716	EXIT(0);
	717	return sb_ret;
	718	}
	719
	720	int
	721	gpfs_fill_super(struct super_block sbP, void dataP, int silent)
	722	#else
	723	struct super_block *
	724	gpfs_fill_super(struct super_block sbP, void dataP, int silent)
	725	#endif
	726	{
	727	int kernel_unlock = 0;
	728	struct inode *rootIP = NULL;
	729	struct dentry *rootDP = NULL;
	730	char *myBufP = NULL;
	731	char *sgNameP;
	732	char *strP;
	733	char *mountpointP;
	734	char *optionsP;
	735	int rc = 0;
	736	int mountHelperID = -1;
	737	int code = 0;
	738	int namelen;
	739	struct gpfsVfsData_t *privVfsP;
	740	cxiNode_t *cnRootP;
	741	cxiIno_t rootINum;
	742	char bname[BDEVNAME_SIZE];
	743	Boolean restricted = false;
	744
	745	ENTER(0);
	746	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_1,
	747	"gpfs_fill_super enter: sbP 0x%lX dev 0x%X silent %d data '%s'\n",
	748	sbP, sbP->s_dev, silent, ((char *)dataP == NULL) ? "" : dataP);
	749
	750	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_2,
	751	"gpfs_fill_super: dev name '%s'\n",
	752	(sbP->s_bdev == NULL) ? "" : SBLOCK_BDEVNAME(sbP,bname));
	753
	754	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_3,
	755	"gpfs_fill_super: s_flags 0x%x\n", sbP->s_flags);
	756
	757	/* A mount increases reference count on module */
	758	#if LINUX_KERNEL_VERSION < 2060000
	759	MY_MODULE_INCREMENT();
	760	#endif
	761
	762	if (dataP == NULL \|\| (char )dataP == '\0')
	763	{
	764	rc = EINVAL;
	765	code = 1;
	766	goto xerror;
	767	}
	768
	769	if (strlen((char *)dataP) > CXI_MAXPATHLEN)
	770	{
	771	rc = ENAMETOOLONG;
	772	code = 2;
	773	goto xerror;
	774	}
	775
	776	sbP->s_magic = GPFS_SUPER_MAGIC;
	777	sbP->s_op = &gpfs_sops;
	778	#if LINUX_KERNEL_VERSION > 2060000
	779	sbP->s_export_op = &gpfs_export_ops;
	780	#endif
	781
	782	SBLOCK_PRIVATE(sbP) = NULL;
	783
	784	sbP->s_root = NULL;
	785	sbP->s_blocksize = 0;
	786	sbP->s_blocksize_bits = 0;
	787	/* maximum filesize (avoid sign bit due to use with loff_t) */
	788	sbP->s_maxbytes = 0x7FFFFFFFFFFFFFFFULL;
	789
	790	myBufP = (char )cxiMallocPinned(strlen((char )dataP) + 1);
	791	if (myBufP == NULL)
	792	{
	793	code = 3;
	794	rc = ENOMEM;
	795	goto xerror;
	796	}
	797	strcpy(myBufP, (char *)dataP);
	798	optionsP = myBufP;
	799
	800	/* This is the syntax parser for the options field. At
	801	* least one option must be "dev=<devname>".
	802	*/
	803	sgNameP = NULL;
	804	strP = myBufP;
	805
	806	while(strP)
	807	{
	808	if (!strncmp(strP, "dev=", 4))
	809	{
	810	sgNameP = (char *)strchr(strP, '=') + 1;
	811	strP = (char )strchr(strP, ','); / more options */
	812	if (strP)
	813	namelen = strP - sgNameP;
	814	else
	815	namelen = strlen(sgNameP);
	816
	817	/* Copy the sgName into the first part of the
	818	* buffer, null terminate it, then append the
	819	* full option list.
	820	*/
	821	strncpy(myBufP, sgNameP, namelen);
	822	sgNameP = myBufP;
	823	sgNameP[namelen] = '\0';
	824
	825	optionsP = myBufP + namelen + 1;
	826	/* Move the options next (if there are any) */
	827	strcpy(optionsP, strP?(char *)strP:"");
	828	break;
	829	}
	830	else
	831	{
	832	strP = (char *)strchr(strP, ',');
	833	if (strP) strP++;
	834	}
	835	}
	836
	837	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_OPTIONS,
	838	"gpfs_fill_super: optionsP \"%s\"\n",
	839	strP ? (char *) strP:"");
	840
	841	while (strP)
	842	{
	843	/* look for rs option */
	844	strP = (char *)strchr(strP, ',');
	845	if (strP)
	846	strP++;
	847	if (strP)
	848	{
	849	if (!strncmp(strP, "rs", 2))
	850	{
	851	restricted = true;
	852	break;
	853	}
	854	}
	855	}
	856
	857	if (sgNameP == NULL \|\| *sgNameP == '\0')
	858	{
	859	code = 4;
	860	rc = EINVAL;
	861	goto xerror;
	862	}
	863	mountpointP = sgNameP; /* ??? */
	864
	865	if (restricted)
	866	{
	867	/* restricted mount - make it readonly */
	868	sbP->s_flags \|= MS_RDONLY;
	869	}
	870
	871	strcpy(mmfs_path, bin_path);
	872	strcat(mmfs_path, "/mmfsmount");
	873
	874	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_STARTHELPER,
	875	"gpfs_fill_super: start mount helper '%s'\n", mmfs_path);
	876
	877	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_STARTHELPER1,
	878	"gpfs_fill_super: s_flags 0x%x (rs %d), mountpointP %s\n",
	879	sbP->s_flags, restricted, mountpointP);
	880
	881	if (strlen(sgNameP) > CXI_MAXPATHLEN)
	882	{
	883	rc = ENAMETOOLONG;
	884	code = 5;
	885	goto xerror;
	886	}
	887	rc = gpfs_ops.gpfsReady();
	888	if (rc != 0)
	889	{
	890	rc = EAGAIN;
	891	code = 6;
	892	goto xerror;
	893	}
	894
	895	/* Start a new process that will receive and forward all messages during the
	896	* mount process to the mount invoker. The current process will wait for
	897	* this new process (in HandleMBUnmount()) and the daemon to be connected with
	898	* a socket and only than call SFSMountFS() that does the real mount work.
	899	*/
	900	strcpy(&mountCmd[2], sgNameP); // "M /dev/gpfs1"
	901	if (cxiHasMountHelper())
	902	mountHelperID = fork_mount_helper(mountCmd);
	903	else
	904	{
	905	/* Use special pid (-1) when not using mount helper */
	906	mountHelperID = -1;
	907	}
	908
	909	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_HELPERRC,
	910	"gpfs_fill_super: mount helper mountHelperID %d\n", mountHelperID);
	911
	912	#if LINUX_KERNEL_VERSION < 2060000
	913	/* BKL is not held during get_sb in 2.6 */
	914	if (kernel_locked())
	915	{
	916	unlock_kernel();
	917	kernel_unlock = 1;
	918	}
	919	#else
	920	/*
	921	* In 2.5, a bunch of calls originating from sys_sync will try to down
	922	* s_umount and block, because it's already downed in get_sb_nodev, and won't
	923	* be upped until get_sb returns (in do_kern_mount). During gpfsMount, we'll
	924	* call mmcommon getEFOption, and that will at some point try to do a sync
	925	* (e.g. in gpfsClusterInit, two times), and mount will deadlock. One way
	926	* to fix this is to take out relevant sync's in the shell scripts, but this
	927	* is dodgy because we might end up pulling a new sdr from another node, and
	928	* that's a long and compelex path, I don't think one can guarantee there
	929	* won't be any syscalls that desire s_umount along the way. Need to think
	930	* how to fix this right. For now, up the semaphore for the duration of
	931	* the gpfsMount (possibly opening up a window for other races e.g. with
	932	* unmount).
	933	*/
	934	up_write(&sbP->s_umount);
	935	#endif
	936	rc = gpfs_ops.gpfsMount((void *)sbP, PAGE_SIZE, sgNameP, mountpointP,
	937	optionsP,
	938	(struct gpfsVfsData_t **)&(SBLOCK_PRIVATE(sbP)),
	939	&cnRootP, /* returned root cxiNode_t */
	940	&rootINum, /* returned root inode number */
	941	NULL, /* not a soft mount */
	942	mountHelperID /* mount helper id */,
	943	-1U, /* no unique mount ID specified */
	944	(sbP->s_flags & MS_RDONLY), /* is it readonly */
	945	true); /* allocate pinned memory */
	946
	947	#if LINUX_KERNEL_VERSION < 2060000
	948	/* BKL is not held during get_sb in 2.5 */
	949	if (kernel_unlock)
	950	lock_kernel();
	951	#else
	952	down_write(&sbP->s_umount);
	953	#endif
	954
	955	if (rc)
	956	{
	957	code = 7;
	958	goto xerror;
	959	}
	960
	961	privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
	962	DBGASSERT(cnRootP != NULL);
	963	rootIP = (struct inode *)cnRootP->osNodeP;
	964
	965	DBGASSERT(rootIP != NULL);
	966	DBGASSERT(rootIP->PRVINODE == cnRootP);
	967	DBGASSERT(cnRootP->osNodeP == rootIP);
	968
	969	/* Successful mount in daemon. Allocate root directory cache entry */
	970	rootDP = d_alloc_root(rootIP);
	971	if (!rootDP)
	972	{
	973	rc = gpfs_ops.gpfsUnmount(privVfsP, true);
	974	if (rc == 0 \|\| rc == ENOSYS)
	975	gpfs_ops.gpfsFinishUnmount(privVfsP);
	976
	977	code = 8;
	978	goto xerror;
	979	}
	980
	981	rootDP->d_op = &gpfs_dops_valid;
	982	sbP->s_root = rootDP;
	983
	984	sbP->s_dirt = 1; /* keep it on for sync to work */
	985
	986	if (myBufP != NULL)
	987	cxiFreePinned(myBufP);
	988
	989	#if (LINUX_KERNEL_VERSION < 2060000)
	990	unlock_super(sbP);
	991	#endif
	992	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_SUCCEED,
	993	"gpfs_fill_super exit: success sbP 0x%lX\n", sbP);
	994	EXIT(0);
	995	#if LINUX_KERNEL_VERSION >= 2060000
	996	return 0;
	997	#else
	998	return sbP;
	999	#endif
	1000
	1001	xerror:
	1002	if (rootDP)
	1003	dput(rootDP);
	1004	if (rootIP)
	1005	iput(rootIP);
	1006
	1007	if (myBufP != NULL)
	1008	cxiFreePinned(myBufP);
	1009
	1010	#if LINUX_KERNEL_VERSION < 2060000
	1011	unlock_super(sbP);
	1012
	1013	sbP->s_dev = 0;
	1014	#endif
	1015
	1016	/* An unmount decrements module use count */
	1017	#if LINUX_KERNEL_VERSION < 2060000
	1018	MY_MODULE_DECREMENT();
	1019	#endif
	1020
	1021	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_FAILED,
	1022	"gpfs_fill_super: failed code %d rc %d\n", code, rc);
	1023	EXIT(0);
	1024	#if LINUX_KERNEL_VERSION >= 2060000
	1025	return -rc;
	1026	#else
	1027	return NULL;
	1028	#endif
	1029	}
	1030
	1031	int
	1032	gpfs_reg_fs()
	1033	{
	1034	int rc;
	1035
	1036	ENTER(0);
	1037	spin_lock_init(&inode_lock);
	1038
	1039	rc = register_filesystem(&gpfs_fs_type);
	1040	if (rc)
	1041	goto xerror;
	1042
	1043	/* We create a dummy super block for purposes of instantiating
	1044	* a shutdown file descriptor. When the daemon dies this file
	1045	* will be closed and its special ops will be called.
	1046	* See cxiRegisterCleanup()
	1047	*/
	1048	shutdownSuperP = cxiMallocPinned(sizeof(struct super_block));
	1049	if (!shutdownSuperP)
	1050	{
	1051	unregister_filesystem(&gpfs_fs_type);
	1052	rc = -ENOMEM;
	1053	goto xerror;
	1054	}
	1055
	1056	SET_SUPER_BLOCK(shutdownSuperP, &null_sops);
	1057
	1058	xerror:
	1059	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REGFS,
	1060	"gpfs_reg_fs shutdownSuperP 0x%lX rc %d\n",
	1061	shutdownSuperP, rc);
	1062	EXIT(0);
	1063	return rc;
	1064	}
	1065
	1066	void
	1067	gpfs_unreg_fs()
	1068	{
	1069	int rc;
	1070
	1071	ENTER(0);
	1072	rc = unregister_filesystem(&gpfs_fs_type);
	1073
	1074	if (shutdownSuperP)
	1075	{
	1076	UNSET_SUPER_BLOCK(shutdownSuperP);
	1077	cxiFreePinned(shutdownSuperP);
	1078	shutdownSuperP = NULL;
	1079	}
	1080
	1081	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_UNREGFS,
	1082	"gpfs_unreg_fs rc %d\n", rc);
	1083	EXIT(0);
	1084	}
	1085
	1086	void
	1087	kill_mmfsd(void)
	1088	{
	1089	ENTER(0);
	1090	if (mmfsd_id != -1)
	1091	{
	1092	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_KILLMMFSD,
	1093	"kill_mmfsd: pid %X\n", mmfsd_id);
	1094
	1095	kill_proc(mmfsd_id, SIGTERM, 1);
	1096	if (mmfsd_id != -1)
	1097	#if LINUX_KERNEL_VERSION > 2060000
	1098	wait_event(pwq,0);
	1099	#else
	1100	sleep_on(&pwq);
	1101	#endif
	1102	}
	1103	EXIT(0);
	1104	}
	1105
	1106	/*
	1107	* Note: since this function is executed as kernel_thread "main" routine,
	1108	* it may not be safe to use stack at all, e.g. call non-inlined functions,
	1109	* at least in the success path. See comments e.g. in asm-i386/unistd.h
	1110	*/
	1111	int
	1112	exec_mmfs(void *nothing)
	1113	{
	1114	static char *argv[] = { mmfs_path, mount_opt, NULL };
	1115	static char *envp[] = { "HOME=/", NULL };
	1116	int rc;
	1117
	1118	ENTER(0);
	1119	set_fs(KERNEL_DS);
	1120
	1121	rc = EXEC_HELPER(mmfs_path, argv, envp, 1 /* wait if possible */);
	1122
	1123	xerror:
	1124	if(rc)
	1125	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_EXECMMFS_EXIT,
	1126	"exec_mmfs: exit rc -1 errno %d path %s\n", errno, mmfs_path);
	1127	EXIT(0);
	1128	return rc;
	1129	}
	1130
	1131	int
	1132	fork_mount_helper(char *data)
	1133	{
	1134
	1135	ENTER(0);
	1136	strcpy(mount_opt, data);
	1137	mount_id = kernel_thread(exec_mmfs, 0, 0);
	1138
	1139	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_FORK_MOUNTHELPER,
	1140	"fork_mount_helper: new pid %d\n", mount_id);
	1141
	1142	EXIT(0);
	1143	return mount_id;
	1144	}
	1145
	1146	/* Set device id and other information for a file system being mounted */
	1147	int
	1148	cxiSetMountInfo(void *osVfsP, cxiDev_t sgDevID,
	1149	int bsize, void osRootNodeP, cxiNode_t cnRootP,
	1150	Boolean releRootP, void gnRootP,
	1151	fsid_t fsid)/* (out) maintain hold on root */
	1152	{
	1153	struct super_block sbP = (struct super_block )osVfsP;
	1154	struct inode rootIP = (struct inode )osRootNodeP; // root dir inode
	1155	int i;
	1156
	1157	ENTER(0);
	1158	TRACE4(TRACE_VNODE, 1, TRCID_SET_MOUNT_INFO,
	1159	"cxiSetMountInfo: sbP 0x%lX rootIP 0x%lX cnRootP 0x%lX "
	1160	"gnRootP 0x%lX\n", sbP, rootIP, cnRootP, gnRootP);
	1161	DBGASSERT(sbP != NULL);
	1162
	1163	/* This is the auto remount case where mmfsd died/killed and restarted. */
	1164	if (gnRootP == cnRootP)
	1165	{
	1166	/* Since the OS independent layer looked up and held the
	1167	* root vnode, we've got too many hold counts for a reconnect.
	1168	* Tell upper layer that we must release.
	1169	*/
	1170	*releRootP = true;
	1171	}
	1172	else
	1173	{
	1174	/* Don't attempt to release the root VFS node */
	1175	*releRootP = false;
	1176	sbP->s_blocksize = bsize;
	1177	for (i = sbP->s_blocksize, sbP->s_blocksize_bits = 0; i != 1; i >>= 1)
	1178	sbP->s_blocksize_bits++;
	1179	}
	1180	if (rootIP != NULL)
	1181	{
	1182	DBGASSERT(rootIP->i_ino == INODENUM_ROOTDIR_FILE);
	1183	DBGASSERT(rootIP->PRVINODE == cnRootP);
	1184	}
	1185
	1186	EXIT(0);
	1187	return 0;
	1188	}
	1189
	1190	/* Attempt whatever we can to get holders of VFS elements
	1191	* (dcache entries, etc) to leave.
	1192	*/
	1193	int
	1194	vfsUserCleanup(struct super_block *sbP,
	1195	struct gpfsVfsData_t *privVfsP, Boolean force)
	1196	{
	1197	struct siginfo sinfo;
	1198	struct task_struct g, tsP;
	1199	Boolean killit;
	1200	int rc;
	1201
	1202	ENTER(0);
	1203
	1204	#ifndef GPFS_ARCH_POWER
	1205	/* Forced unmount doesn't really work very well on Linux since
	1206	* the VFS layer is very stateful. If a process is sitting in
	1207	* the file system, its vmount count will not go to zero and a
	1208	* proper unmount can occur. We're experimenting with the
	1209	* semantics (akin to umount -k on other OSes) where processes
	1210	* are killed if they are within a forced unmounted file system.
	1211	*
	1212	* Note that this doesn't get everyone. If you have a file open
	1213	* in GPFS but don't have your current working directory in GPFS
	1214	* then you're not killed. To kill those user (or close their
	1215	* files) you'd have to traipse thru the file table. There's
	1216	* a lot of OS specific code there that we wouldn't want to get
	1217	* into.
	1218	*/
	1219	if (force)
	1220	{
	1221	sinfo.si_signo = SIGKILL;
	1222	sinfo.si_errno = 0;
	1223	sinfo.si_code = SI_KERNEL;
	1224	sinfo.si_addr = vfsUserCleanup;
	1225	sinfo.si_pid = current->pid;
	1226	sinfo.si_uid = current->uid;
	1227
	1228	// read_lock(&tasklist_lock);
	1229	rcu_read_lock();
	1230
	1231	DO_EACH_THREAD(g,tsP)
	1232	{
	1233	task_lock(tsP);
	1234	if (tsP->fs && tsP->fs->pwdmnt && tsP->fs->pwdmnt->mnt_sb == sbP)
	1235	killit = true;
	1236	else
	1237	killit = false;
	1238	task_unlock(tsP);
	1239
	1240	if (killit)
	1241	send_sig_info(SIGKILL, &sinfo, tsP);
	1242	} WHILE_EACH_THREAD(g,tsP);
	1243	// read_unlock(&tasklist_lock);
	1244	rcu_read_unlock();
	1245	}
	1246	#endif
	1247
	1248	/* Purge cached OS VFS nodes/cxiNodes. */
	1249	rc = gpfs_ops.gpfsUncache(privVfsP);
	1250
	1251	EXIT(0);
	1252	return rc;
	1253	}
	1254
	1255	/* Called by gpfs_s_put_super() when the last holder of the superblock
	1256	* is gone. We should be able to successfully clean up and become
	1257	* unmounted.
	1258	*/
	1259	int
	1260	cxiUnmount(void *osVfsP, Boolean force, Boolean doDMEvents)
	1261	{
	1262	int rc = 0;
	1263	int dmrc = 0;
	1264	struct super_block sbP = (struct super_block )osVfsP;
	1265	struct gpfsVfsData_t *privVfsP;
	1266	#ifdef DMAPI
	1267	Boolean dmDoUnmountEvent = false;
	1268	void *sgUidP = NULL;
	1269	void *eventlistP = NULL;
	1270	void *sessLocP = NULL;
	1271	struct dentry *dP = NULL;
	1272	struct inode *iP = NULL;
	1273	cxiNode_t *cnP = NULL;
	1274	#endif
	1275
	1276	ENTER(0);
	1277	LOGASSERT(sbP != NULL);
	1278	privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
	1279
	1280	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CXIUNMOUNT_ENTER,
	1281	"cxiUnmount: enter privVfsP 0x%lX sbP 0x%lX force %d doDM %d\n",
	1282	privVfsP, sbP, force, doDMEvents);
	1283
	1284	if (privVfsP == NULL)
	1285	goto exit;
	1286
	1287	#ifdef DMAPI
	1288	dP = sbP->s_root;
	1289	if (dP != NULL)
	1290	iP = dP->d_inode;
	1291	if (iP != NULL)
	1292	cnP = VP_TO_CNP(iP);
	1293
	1294	/* Generate preunmount event */
	1295	if (doDMEvents)
	1296	{
	1297	rc = gpfs_ops.gpfsDmUnmountEvent(true, force, privVfsP, cnP,
	1298	&dmDoUnmountEvent, &sgUidP,
	1299	&eventlistP, &sessLocP, 0);
	1300	/* We should continue unmount even if it fails. Otherwise, linux
	1301	screwup and cannot remount unless we shutdown the daemon */
	1302	}
	1303	#endif
	1304
	1305	/* The superblock is unallocated by the kernel after gpfs_s_put_super /
	1306	cxiUnmount, regardless of any errors here because it doesn't check
	1307	a return code from the filesystem specific put_super call, so we need to
	1308	proceed through these calls even if an error occurs; not cleaning up
	1309	things in gpfsFinishUnmount (ie, the gpfs mount list) after an error
	1310	with unmount causes havoc when the daemon later restarts. */
	1311
	1312	rc = vfsUserCleanup(sbP, privVfsP, force);
	1313	if (rc == ENOSYS)
	1314	rc = 0;
	1315
	1316	rc = gpfs_ops.gpfsUnmount(privVfsP, force);
	1317	if (rc == ENOSYS)
	1318	rc = 0;
	1319
	1320	gpfs_ops.gpfsFinishUnmount(privVfsP);
	1321	SBLOCK_PRIVATE(sbP) = NULL;
	1322
	1323	#ifdef DMAPI
	1324	if (dmDoUnmountEvent)
	1325	dmrc = gpfs_ops.gpfsDmUnmountEvent(false, force, NULL, NULL,
	1326	&dmDoUnmountEvent, &sgUidP,
	1327	&eventlistP, &sessLocP, rc);
	1328	#endif
	1329	sbP->s_dirt = 0;
	1330
	1331	printSuperList(sbP);
	1332
	1333	/* An unmount decrements module use count */
	1334	#if LINUX_KERNEL_VERSION < 2060000
	1335	MY_MODULE_DECREMENT();
	1336	#endif
	1337
	1338	exit:
	1339	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_CXIUNMOUNT_EXIT,
	1340	"cxiUnmount: exit rc %d dmrc %d\n", rc, dmrc);
	1341	EXIT(0);
	1342	return rc;
	1343	}
	1344
	1345	int
	1346	cxiReactivateOSNode(void osVfsP, cxiNode_t cnP, void **osNodePP)
	1347	{
	1348	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_REACTIVATE_VNODE,
	1349	"cxiReactivateOSNode: sbP 0x%lX cxiNodeP 0x%lX osNodePP 0x%lX\n",
	1350	osVfsP, cnP, osNodePP);
	1351	LOGASSERT(0); // not implemented on linux
	1352	return 0;
	1353	}
	1354
	1355
	1356	#if LINUX_KERNEL_VERSION >= 2060000
	1357	static int
	1358	inodeFindActor(struct inode iP, void opaqueP)
	1359	{
	1360	/* iget4 can be called on one thread which goes to create a new
	1361	* inode (get_new_inode, gpfs_s_read_inode2, gpfsInodeRead, readOSNode)
	1362	* but before that thread completes initializing the cxiNode_t, another
	1363	* thread calls iget4 and gets here (find_inode, inodeFindActor).
	1364	* Similar races exist when an inode is being deleted.
	1365	*
	1366	* Ideally, we'd like to spin_unlock() on the inode_lock and call
	1367	* wait_on_inode() but we cannot release the inode_lock here (find_inode
	1368	* is depending on it to protect its list_entry() calls). Fortunately,
	1369	* iget4 does exactly this wait for the inode upon return from
	1370	* find_inode. Returning zero here would cause get_new_inode to be
	1371	* called (which would assert when it found the first thread had
	1372	* already allocated the gnode). Return 1 and iget4 will do the
	1373	* necessary wait.
	1374	*
	1375	* We can't call anything here that could sleep because we are holding
	1376	* the inode_lock and sleeping can result in a hang
	1377	* TRACE4N does not block and is ok here.
	1378	*/
	1379
	1380	TRACE4N(TRACE_VNODE, 2, TRCID_LINUXOPS_INODEFINDACTOR,
	1381	"inodeFindActor: iP 0x%lX i_state 0x%x cxiNodeP 0x%lX isBad %d\n",
	1382	iP, iP->i_state, VP_TO_CNP(iP), is_bad_inode(iP));
	1383
	1384	if (iP->i_state & INODE_IN_CACHE)
	1385	return 1;
	1386
	1387	if (VP_TO_CNP(iP) == NULL)
	1388	{
	1389	if (iP->i_state == 0)
	1390	return 0;
	1391	else
	1392	return 1;
	1393	}
	1394
	1395	return gpfs_ops.gpfsInodeFindActor(VP_TO_CNP(iP), iP->i_ino, opaqueP);
	1396	}
	1397
	1398	static int
	1399	inodeInitLocked(struct inode iP, void opaqueP)
	1400	{
	1401	cxiIGetArg_t argsP = (cxiIGetArg_t )opaqueP;
	1402
	1403	iP->i_ino = argsP->extInodeNum;
	1404	return 0;
	1405	}
	1406	#else
	1407	static int
	1408	inodeFindActor(struct inode iP, unsigned long inodeNum, void opaqueP)
	1409	{
	1410	/* iget4 can be called on one thread which goes to create a new
	1411	* inode (get_new_inode, gpfs_s_read_inode2, gpfsInodeRead, readOSNode)
	1412	* but before that thread completes initializing the cxiNode_t, another
	1413	* thread calls iget4 and gets here (find_inode, inodeFindActor).
	1414	* Similar races exist when an inode is being deleted.
	1415	*
	1416	* Ideally, we'd like to spin_unlock() on the inode_lock and call
	1417	* wait_on_inode() but we cannot release the inode_lock here (find_inode
	1418	* is depending on it to protect its list_entry() calls). Fortunately,
	1419	* iget4 does exactly this wait for the inode upon return from
	1420	* find_inode. Returning zero here would cause get_new_inode to be
	1421	* called (which would assert when it found the first thread had
	1422	* already allocated the gnode). Return 1 and iget4 will do the
	1423	* necessary wait.
	1424	*
	1425	* We can't call anything here that could sleep because we are holding
	1426	* the inode_lock and sleeping can result in a hang
	1427	* TRACE3N does not block and is ok here.
	1428	*/
	1429
	1430	TRACE3N(TRACE_VNODE, 2, TRCID_LINUXOPS_INODEFINDACTOR2,
	1431	"inodeFindActor: iP 0x%lX i_state 0x%x cxiNodeP 0x%lX\n",
	1432	iP, iP->i_state, VP_TO_CNP(iP));
	1433
	1434	if (iP->i_state & INODE_IN_CACHE)
	1435	return 1;
	1436
	1437	if (VP_TO_CNP(iP) == NULL)
	1438	{
	1439	if (iP->i_state == 0)
	1440	return 0;
	1441	else
	1442	return 1;
	1443	}
	1444
	1445	return gpfs_ops.gpfsInodeFindActor(VP_TO_CNP(iP), inodeNum, opaqueP);
	1446	}
	1447	#endif
	1448
	1449
	1450	int
	1451	cxiNewOSNode(void osVfsP, cxiNode_t cnPP, void *osNodePP,
	1452	cxiIno_t inum, int nodeSize, void *opaqueP)
	1453	{
	1454	struct super_block sbP = (struct super_block )osVfsP;
	1455	struct inode *iP;
	1456	int rc;
	1457	int loop_count = 0;
	1458	int sleep_count = 0;
	1459
	1460	ENTER(0);
	1461	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE,
	1462	"cxiNewOSNode: sbP 0x%lX inum %d size %d",
	1463	sbP, inum, nodeSize);
	1464
	1465	/* The requested nodeSize must match CXINODE_SIZE */
	1466	if (nodeSize != CXINODE_SIZE)
	1467	goto bad_node_size;
	1468
	1469	repeat:
	1470	#if LINUX_KERNEL_VERSION >= 2060000
	1471	iP = iget5_locked(sbP, inum, inodeFindActor, inodeInitLocked, opaqueP);
	1472	#else
	1473	iP = iget4(sbP, inum, inodeFindActor, opaqueP);
	1474	#endif
	1475	if (iP == NULL)
	1476	{
	1477	*cnPP = NULL;
	1478	*osNodePP = NULL;
	1479	rc = ENOMEM;
	1480	goto xerror;
	1481	}
	1482
	1483	#if !HAS_SOP_READ_INODE2
	1484	/* We fill in the inode as opposed to a read_inode
	1485	* operation executed with iget()
	1486	*/
	1487	if (iP->i_state & I_NEW)
	1488	{
	1489	gpfs_s_read_inode2(iP, opaqueP);
	1490	unlock_new_inode(iP);
	1491	}
	1492	#endif
	1493
	1494	if (is_bad_inode(iP))
	1495	{
	1496	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_NEW_BAD,
	1497	"cxiNewOSNode: BAD INODE 0x%X\n", iP);
	1498	*cnPP = NULL;
	1499	*osNodePP = NULL;
	1500	iput(iP);
	1501	rc = EIO;
	1502	goto xerror;
	1503	}
	1504
	1505	/* Did we get the right inode ?
	1506	* When inodeFindActor is called from find_inode() and the inode
	1507	* is in transition it might return found without checking sanpId
	1508	* so go check again.
	1509	*/
	1510	#if LINUX_KERNEL_VERSION >= 2060000
	1511	if (!inodeFindActor(iP, opaqueP))
	1512	#else
	1513	if (!inodeFindActor(iP, iP->i_ino, opaqueP))
	1514	#endif
	1515	{
	1516	if (sleep_count > 10)
	1517	{
	1518	TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_NEW_VNODE_2,
	1519	"cxiNewOSNode: rc ESTALE inode 0x%lX ino %d i_state 0x%x "
	1520	"cxiNodeP 0x%lX isBad %d\n", iP, iP->i_ino, iP->i_state,
	1521	VP_TO_CNP(iP), is_bad_inode(iP));
	1522
	1523	*cnPP = NULL;
	1524	*osNodePP = NULL;
	1525	iput(iP);
	1526	rc = EIO;
	1527	goto xerror;
	1528	}
	1529
	1530	if (loop_count > 1000)
	1531	{
	1532	cxiSleep(10);
	1533	sleep_count++;
	1534	loop_count = 0;
	1535	}
	1536
	1537	loop_count++;
	1538	iput(iP);
	1539	goto repeat;
	1540	}
	1541
	1542	DBGASSERT(iP->PRVINODE != NULL);
	1543	cnPP = (cxiNode_t )iP->PRVINODE;
	1544	*osNodePP = iP;
	1545	rc = 0;
	1546
	1547	xerror:
	1548	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE_EXIT,
	1549	"cxiNewOSNode: exit osNodeP 0x%lX cnP 0x%lX rc %d\n",
	1550	osNodePP, cnPP, rc);
	1551	EXIT(0);
	1552	return rc;
	1553
	1554	bad_node_size:
	1555	/* The requested nodeSize does not match CXINODE_SIZE.
	1556	Whoever called us is an incompitble version of the code or was
	1557	somehow not compiled correctly. */
	1558	TRACE2(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE_BAD,
	1559	"cxiNewOSNode: requested nodeSize %d does not match CXINODE_SIZE %d",
	1560	nodeSize, CXINODE_SIZE);
	1561	printk("mmfs: module inconsistency detected in cxiNewOSNode:\n"
	1562	" requested nodeSize %d does not match CXINODE_SIZE %d\n",
	1563	nodeSize, CXINODE_SIZE);
	1564	LOGASSERT(!"nodeSize != CXINODE_SIZE");
	1565	EXIT(0);
	1566	return ELIBBAD;
	1567	}
	1568
	1569
	1570	/* The linux kernel decrements the inode count and deallocates the
	1571	* inode after gpfs_s_put_inode() is called therefore this routine
	1572	* doesn't perform a delete.
	1573	*/
	1574	void
	1575	cxiFreeOSNode(void osVfsP, struct cxiNode_t cnP, void *osNodeP)
	1576	{
	1577	struct super_block sbP = (struct super_block )osVfsP;
	1578	struct inode iP = (struct inode )osNodeP;
	1579
	1580	ENTER(0);
	1581	TRACE5(TRACE_VNODE, 2, TRCID_LINUXOPS_DELETE_VNODE,
	1582	"cxiFreeOSNode enter: sbP 0x%lX cxiNodeP 0x%lX "
	1583	"iP 0x%lX inode %d i_count %d\n",
	1584	sbP, cnP, iP,
	1585	iP ? iP->i_ino : -1,
	1586	iP ? atomic_read((atomic_t *)&iP->i_count) : 0);
	1587
	1588	DBGASSERT(cnP->osNodeP == iP);
	1589	cnP->osNodeP = NULL;
	1590
	1591	if (iP)
	1592	{
	1593	DBGASSERT(atomic_read((atomic_t *)&iP->i_count) == 0);
	1594	iP->i_op = NULL;
	1595	iP->i_fop = NULL;
	1596	if (iP->i_mapping)
	1597	iP->i_mapping->a_ops = &gpfs_aops_after_inode_delete;
	1598	iP->i_size = 0;
	1599	iP->i_nlink = 0;
	1600	}
	1601	EXIT(0);
	1602	}
	1603
	1604	void
	1605	cxiDeleteMmap(cxiVmid_t segid)
	1606	{
	1607	TRACE1(TRACE_VNODE, 2, TRCID_LINUXOPS_DELETE_MMAP,
	1608	"cxiDeleteMmap: segid 0x%X\n", segid);
	1609	}
	1610
	1611	void
	1612	cxiReinitOSNode(void osVfsP, struct cxiNode_t cnP, void *osNodeP)
	1613	{
	1614	struct super_block sbP = (struct super_block )osVfsP;
	1615	struct inode iP = (struct inode )osNodeP;
	1616
	1617	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_REINIT_VNODE,
	1618	"cxiReinitOSNode: sbP 0x%lX cnP 0x%lX iP 0x%lX\n",
	1619	sbP, cnP, iP);
	1620	LOGASSERT(0); // not implemented on linux
	1621	}
	1622
	1623	void
	1624	cxiDumpOSNode(cxiNode_t *cnP)
	1625	{
	1626	struct inode iP = (struct inode )cnP->osNodeP;
	1627	struct list_head dListP, dHeadP;
	1628	struct dentry *dentry;
	1629
	1630	ENTER(0);
	1631	TRACE2(TRACE_VNODE, 2, TRCID_LINUXOPS_DUMP_VNODE,
	1632	"cxiDumpOSNode: cxiNodeP 0x%lX iP 0x%lX\n", cnP, iP);
	1633	if (iP)
	1634	{
	1635	printInode(iP);
	1636
	1637	dHeadP = &iP->i_dentry;
	1638	spin_lock(&dcache_lock);
	1639	for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next)
	1640	{
	1641	dentry = list_entry(dListP, struct dentry, d_alias);
	1642	printDentry(dentry);
	1643	}
	1644	spin_unlock(&dcache_lock);
	1645	}
	1646	EXIT(0);
	1647	}
	1648
	1649	#if LINUX_KERNEL_VERSION >= 2060000
	1650	static int
	1651	igrabInodeFindActor(struct inode iP, void opaqueP)
	1652	{
	1653	/* igrab can be called while another thread is doing a finial iput
	1654	* so instead we are call ilookup5. ilookup5 processes stuff under
	1655	* the inode_lock so if we are in here and find the inode then
	1656	* ilookup5 will increase i_count
	1657	*
	1658	* We can't call anything here that could sleep because we are holding
	1659	* the inode_lock and sleeping can result in a hang
	1660	* TRACE4N does not block and is ok here.
	1661	*/
	1662
	1663	TRACE3N(TRACE_VNODE, 2, TRCID_LINUXOPS_IGRABINODEFINDACTOR,
	1664	"igrabInodeFindActor: iP 0x%lX i_state 0x%x inode 0x%lX \n",
	1665	iP, iP->i_state, (struct inode *) opaqueP);
	1666
	1667	if (iP->i_state & INODE_BEING_RELEASED)
	1668	return 0;
	1669
	1670	if (iP != (struct inode *) opaqueP)
	1671	return 0;
	1672
	1673	return 1;
	1674	}
	1675	#endif
	1676
	1677	/* On linux we can't just decrement the i_count
	1678	* thus this routine will only accept a positive
	1679	* increment. If you want to put a reference then
	1680	* call cxiPutOSNode() which calls back thru the VFS
	1681	* layer.
	1682	*/
	1683	int
	1684	cxiRefOSNode(void osVfsP, cxiNode_t cnP, void *osNodeP, int inc)
	1685	{
	1686	return cxiRefOsNode(osVfsP,cnP,osNodeP,inc,false);
	1687	}
	1688
	1689	int
	1690	cxiRefOsNode(void osVfsP, cxiNode_t cnP, void *osNodeP, int inc,
	1691	Boolean calledFromRevoke)
	1692	{
	1693	struct inode iP = (struct inode )osNodeP;
	1694	struct inode *riP = NULL;
	1695	int holdCount;
	1696	int ino;
	1697
	1698	ENTER(0);
	1699	DBGASSERT(iP != NULL);
	1700	DBGASSERT(inc == 1);
	1701
	1702	#if LINUX_KERNEL_VERSION >= 2060000
	1703	/* The igrab() may fail if this inode is actively going
	1704	* thru a release.
	1705	*/
	1706	if(osVfsP)
	1707	{
	1708	/* we already have a hold */
	1709	riP = igrab(iP);
	1710	}
	1711	/* we may not currently have a hold so use ilookup5 */
	1712	else if(GPFS_TYPE(iP))
	1713	{
	1714	riP = ilookup5(iP->i_sb, iP->i_ino, igrabInodeFindActor, (void*)iP);
	1715	}
	1716	#else
	1717	/* The igrab() may fail if this inode is actively going
	1718	* thru a release.
	1719	*/
	1720	riP = igrab(iP);
	1721	#endif
	1722	if (riP)
	1723	{
	1724	DBGASSERT(!(iP->i_state & INODE_BEING_RELEASED));
	1725	holdCount = atomic_read((atomic_t *)&riP->i_count);
	1726	ino = riP->i_ino;
	1727	}
	1728	else
	1729	{
	1730	holdCount = 0;
	1731	ino = -1;
	1732	/* If this function is called from revoke handler check of this inode
	1733	is being released
	1734	*/
	1735	if (calledFromRevoke && (iP->i_state & INODE_BEING_RELEASED) )
	1736	holdCount = -1;
	1737	}
	1738	TRACE5(TRACE_VNODE, 2, TRCID_LINUXOPS_REF_VNODE,
	1739	"cxiRefOSNode exit: sbP 0x%lX cxiNodeP 0x%lX iP 0x%lX inode %d "
	1740	"i_count to %d", osVfsP, cnP, iP, ino, holdCount);
	1741	EXIT(0);
	1742	return holdCount;
	1743	}
	1744
	1745	/* Determines if OS node is inactive */
	1746	int
	1747	cxiInactiveOSNode(void osVfsP, struct cxiNode_t cnP, void *osNodeP,
	1748	Boolean canCacheP, Boolean hasReferencesP)
	1749	{
	1750	struct inode iP = (struct inode )osNodeP;
	1751	struct super_block sbP = (struct super_block )osVfsP;
	1752	int holdCount;
	1753
	1754	ENTER(0);
	1755	DBGASSERT(cnP->osNodeP == iP);
	1756
	1757	*canCacheP = false;
	1758	*hasReferencesP = false;
	1759
	1760	holdCount = atomic_read((atomic_t *)&iP->i_count);
	1761	if (holdCount > 0)
	1762	*hasReferencesP = true;
	1763
	1764	TRACE6(TRACE_VNODE, 2, TRCID_LINUXOPS_INACTIVE_VNODE,
	1765	"cxiInactiveOSNode: sbP 0x%lX cxiNodeP 0x%lX iP 0x%lX "
	1766	"i_count %d canCache %d hasReferences %d\n", sbP, cnP, iP,
	1767	holdCount, canCacheP, hasReferencesP);
	1768
	1769	EXIT(0);
	1770	return holdCount;
	1771	}
	1772
	1773	void
	1774	cxiPutOSNode(void *vP)
	1775	{
	1776	struct inode iP = (struct inode )vP;
	1777	int holdCount;
	1778
	1779	ENTER(0);
	1780	DBGASSERT(iP != NULL);
	1781	DBGASSERT(!(iP->i_state & INODE_BEING_RELEASED));
	1782	holdCount = atomic_read((atomic_t *)&iP->i_count);
	1783	DBGASSERT(holdCount > 0);
	1784
	1785	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_PUT_OSNODE,
	1786	"cxiPutOSNode enter: iP 0x%lX inode %d i_count to %d\n",
	1787	iP, iP->i_ino, holdCount-1);
	1788
	1789	iput(iP);
	1790
	1791	EXIT(0);
	1792	return;
	1793	}
	1794
	1795	void
	1796	cxiDestroyOSNode(void *vP)
	1797	{
	1798	struct inode iP = (struct inode )vP;
	1799	int holdCount;
	1800
	1801	ENTER(0);
	1802	DBGASSERT(iP != NULL);
	1803	holdCount = atomic_read((atomic_t *)&iP->i_count);
	1804	DBGASSERT(holdCount > 0);
	1805
	1806	TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_DESTROY_OSNODE,
	1807	"cxiDestroyOSNode enter: iP 0x%lX inode %d i_count %d i_nlink %d\n",
	1808	iP, iP->i_ino, holdCount, iP->i_nlink);
	1809
	1810	iP->i_nlink = 0;
	1811	EXIT(0);
	1812	return;
	1813	}
	1814
	1815	void
	1816	cxiSetOSNodeType(struct cxiNode_t *cnP, cxiMode_t mode, cxiDev_t dev)
	1817	{
	1818	ENTER(0);
	1819	if (S_ISDIR(mode))
	1820	cnP->nType = cxiVDIR;
	1821	else if (S_ISREG(mode))
	1822	cnP->nType = cxiVREG;
	1823	else if (S_ISLNK(mode))
	1824	cnP->nType = cxiVLNK;
	1825	else if (S_ISCHR(mode))
	1826	cnP->nType = cxiVCHR;
	1827	else if (S_ISBLK(mode))
	1828	cnP->nType = cxiVBLK;
	1829	else if (S_ISFIFO(mode))
	1830	cnP->nType = cxiVFIFO;
	1831	else if (S_ISSOCK(mode))
	1832	cnP->nType = cxiVSOCK;
	1833	else
	1834	DBGASSERT(0);
	1835	EXIT(0);
	1836	}
	1837
	1838	void
	1839	cxiUpdateInode(cxiNode_t cnP, cxiVattr_t attrP, int what)
	1840	{
	1841	struct inode iP = (struct inode )cnP->osNodeP;
	1842
	1843	ENTER(0);
	1844	if (iP != NULL)
	1845	{
	1846	if (what & CXIUP_ATIME)
	1847	{
	1848	CXITIME_TO_INODETIME(attrP->va_atime, iP->i_atime);
	1849	EXIT(0);
	1850	return;
	1851	}
	1852	if (what & CXIUP_MODE)
	1853	{
	1854	iP->i_mode = attrP->va_mode;
	1855	CXITIME_TO_INODETIME(attrP->va_ctime, iP->i_ctime);
	1856	}
	1857	if (what & CXIUP_OWN)
	1858	{
	1859	iP->i_mode = attrP->va_mode;
	1860	iP->i_uid = attrP->va_uid;
	1861	iP->i_gid = attrP->va_gid;
	1862	CXITIME_TO_INODETIME(attrP->va_ctime, iP->i_ctime);
	1863	}
	1864	if (what & CXIUP_NLINK)
	1865	{
	1866	iP->i_nlink = attrP->va_nlink;
	1867	}
	1868	if (what & CXIUP_SIZE)
	1869	{
	1870	iP->i_size = attrP->va_size;
	1871	iP->i_blocks = attrP->va_blocks;
	1872	}
	1873	if (what & CXIUP_SIZE_BIG)
	1874	{
	1875	spin_lock(&inode_lock);
	1876	if (attrP->va_size > iP->i_size)
	1877	{
	1878	iP->i_size = attrP->va_size;
	1879	iP->i_blocks = attrP->va_blocks;
	1880	}
	1881	spin_unlock(&inode_lock);
	1882	}
	1883	if (what & CXIUP_TIMES)
	1884	{
	1885	CXITIME_TO_INODETIME(attrP->va_atime, iP->i_atime);
	1886	CXITIME_TO_INODETIME(attrP->va_mtime, iP->i_mtime);
	1887	CXITIME_TO_INODETIME(attrP->va_ctime, iP->i_ctime);
	1888	}
	1889	if (what & CXIUP_PERM)
	1890	{
	1891	iP->i_mode = attrP->va_mode;
	1892	iP->i_uid = attrP->va_uid;
	1893	iP->i_gid = attrP->va_gid;
	1894	cnP->xinfo = attrP->va_xinfo;
	1895	setIopTable(iP, (attrP->va_xinfo & VA_XPERM) != 0);
	1896	cnP->icValid \|= CXI_IC_PERM;
	1897	}
	1898	if ((what & CXIUP_NLINK) && TestCtFlag(cnP,destroyIfDelInode))
	1899	{
	1900	cxiDropInvalidDCacheEntry(cnP);
	1901
	1902	/* swapd must be notified to prune dcache entries */
	1903	if (TestCtFlag(cnP, pruneDCacheNeeded))
	1904	gpfs_ops.gpfsSwapdEnqueue(cnP);
	1905	}
	1906	}
	1907
	1908	TRACE4(TRACE_VNODE, 3, TRCID_CXIUPDATE_INODE_3,
	1909	"cxiUpdateInode: iP 0x%X atime 0x%X mtime 0x%X ctime 0x%X\n",
	1910	iP, GET_INODETIME_SEC(iP->i_atime), GET_INODETIME_SEC(iP->i_mtime),
	1911	GET_INODETIME_SEC(iP->i_ctime));
	1912
	1913	TRACE7(TRACE_VNODE, 3, TRCID_CXIUPDATE_INODE_1,
	1914	"cxiUpdateInode: what %d mode 0x%X uid %d gid %d nlink %d size %lld"
	1915	" blocks %d\n",
	1916	what, iP->i_mode, iP->i_uid, iP->i_gid, iP->i_nlink,
	1917	iP->i_size, iP->i_blocks);
	1918	EXIT(0);
	1919	}
	1920
	1921	/* Determine if operating system specific node belongs to a particular VFS and
	1922	can be uncached. Returns OS node if it exists, the determination of
	1923	whether it can be uncached or not. */
	1924	Boolean
	1925	cxiCanUncacheOSNode(void osVfsP, struct cxiNode_t cnP, void **vP)
	1926	{
	1927	struct inode iP = (struct inode )cnP->osNodeP;
	1928	int count = 0;
	1929
	1930	ENTER(0);
	1931	if (iP != NULL && iP->i_sb == osVfsP)
	1932	{
	1933	count = atomic_read((atomic_t *)&iP->i_count);
	1934	vP = (void )iP;
	1935	}
	1936	else
	1937	*vP = NULL;
	1938
	1939	TRACE6(TRACE_VNODE, 2, TRCID_LINUXOPS_CANUNCACHE_OSNODE,
	1940	"cxiCanUncacheOSNode: cxiNode 0x%lx vP 0x%lX osVfsP 0x%lX "
	1941	"i_sb 0x%lX inode %d i_count %d\n", cnP, vP, osVfsP,
	1942	(iP ? iP->i_sb : 0), (iP ? iP->i_ino : 0), count);
	1943	EXIT(0);
	1944	return (count == 0);
	1945	}
	1946
	1947
	1948	/* Add operating system specific node to the lookup cache.
	1949	This routine is called with the necessary distributed lock held to
	1950	guarantee that the lookup cache entry is valid. */
	1951	#ifdef CCL
	1952	void *
	1953	cxiAddOSNode(void dentryP, void vP, DentryOpTableTypes dopTabType, int lookup)
	1954	#else
	1955	void *
	1956	cxiAddOSNode(void dentryP, void vP, int lookup)
	1957	#endif
	1958	{
	1959	struct inode iP = (struct inode )vP;
	1960	struct dentry dP = (struct dentry )dentryP;
	1961
	1962	ENTER(0);
	1963	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_ADD_OSNODE,
	1964	"cxiAddOSNode: dentry 0x%lX vP 0x%lX unhashed %d",
	1965	dentryP, vP, d_unhashed(dP));
	1966
	1967	/* mark dentry valid */
	1968	#ifdef CCL
	1969	switch(dopTabType)
	1970	{
	1971	/* Positive dcache entry for inexact file name match for Samba user.
	1972	Only valid for other Samba users.
	1973	Not valid for local/NFS users. Forces lookup for local/NFS users. */
	1974	case DOpOnlyValidIfSamba:
	1975	dP->d_op = &gpfs_dops_valid_if_Samba;
	1976	break;
	1977	/* Negative dcache entry for exact file name match for local/NFS user.
	1978	Only valid for other local/NFS users.
	1979	Not valid for Samba users. Forces lookup for Samba users. */
	1980	case DOpInvalidIfSamba:
	1981	dP->d_op = &gpfs_dops_invalid_if_Samba;
	1982	break;
	1983	default:
	1984	dP->d_op = &gpfs_dops_valid;
	1985	break;
	1986	}
	1987	#else
	1988	dP->d_op = &gpfs_dops_valid;
	1989	#endif
	1990
	1991	if (!d_unhashed(dP))
	1992	{
	1993	/* hook up dentry and inode */
	1994	d_instantiate(dP, iP);
	1995	dP = NULL;
	1996	}
	1997	else
	1998	{
	1999	#if LINUX_KERNEL_VERSION >= 2060000
	2000	if (lookup)
	2001	{
	2002	dP = d_splice_alias(iP, dP);
	2003	goto exit;
	2004	}
	2005	#endif
	2006	/* hook up dentry and inode */
	2007	d_instantiate(dP, iP);
	2008
	2009	/* if not yet done so, add to hash list */
	2010	d_rehash(dP);
	2011
	2012	dP = NULL;
	2013	}
	2014	exit:
	2015
	2016	EXIT(0);
	2017	return dP;
	2018	}
	2019
	2020
	2021	#ifdef NFS4_CLUSTER
	2022	/* get list of fs locations, return number of locations */
	2023	int
	2024	gpfs_s_fs_locations(struct super_block sbP, char *bufP)
	2025	{
	2026	int rc;
	2027	int code = 0;
	2028	int loc_count;
	2029	struct gpfsVfsData_t privVfsP = (struct gpfsVfsData_t )SBLOCK_PRIVATE(sbP);
	2030	LOGASSERT(privVfsP != NULL);
	2031
	2032	ENTER(0);
	2033	VFS_STAT_START(fsLocationCall);
	2034
	2035	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_LOCFS_ENTER,
	2036	"gpfs_s_fs_locations enter: sbP 0x%lX\n", sbP);
	2037
	2038	rc = gpfs_ops.gpfsFsLocations(privVfsP, bufP, &loc_count);
	2039	if (rc)
	2040	{
	2041	rc = -rc;
	2042	code = 1;
	2043	goto xerror;
	2044	}
	2045	rc = loc_count;
	2046
	2047	xerror:
	2048	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LOCFS_EXIT,
	2049	"gpfs_s_fs_locations exit: sbP 0x%lX code %d rc %d\n",
	2050	sbP, code, rc);
	2051	VFS_STAT_STOP;
	2052	EXIT(0);
	2053	return rc;
	2054	}
	2055	#endif
	2056
	2057
	2058	/* Functions for converting between an NFS file handle and a dentry.
	2059	We define our own functions rather than using the generic ones in
	2060	fs/nfsd/nfsfh.c so we can revalidate the file inode, since it could have
	2061	been changed by another node. */
	2062
	2063	static struct dentry *
	2064	gpfs_nfsd_iget_dentry(struct inode *inode, __u32 generation)
	2065	{
	2066	struct list_head *lp;
	2067	struct dentry *result;
	2068
	2069	ENTER(0);
	2070	TRACE2(TRACE_VNODE, 3, TRCID_NFSD_IGET_DENTRY_1,
	2071	"gpfs_nfsd_iget_dentry: inode %d generation %d",
	2072	inode->i_ino, generation);
	2073
	2074	/* Now find a dentry. If possible, get a well-connected one. */
	2075	spin_lock(&dcache_lock);
	2076	for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next)
	2077	{
	2078	result = list_entry(lp, struct dentry, d_alias);
	2079
	2080	if (!(result->d_flags & DCACHE_DFLAGS_DISCONNECTED))
	2081	{
	2082	dget_locked(result);
	2083	#if LINUX_KERNEL_VERSION >= 2060000
	2084	result->d_flags \|= DCACHE_REFERENCED;
	2085	#else
	2086	result->d_vfs_flags \|= DCACHE_REFERENCED;
	2087	#endif
	2088	spin_unlock(&dcache_lock);
	2089
	2090	if (result->d_inode != inode)
	2091	{
	2092	TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_31,
	2093	"gpfs_nfsd_iget_dentry:0 dentry flags 0x%x count %d inode 0x%lX "
	2094	"time %lu",
	2095	result->d_flags, atomic_read(&result->d_count),
	2096	result->d_inode, result->d_time);
	2097
	2098	TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_41,
	2099	"gpfs_nfsd_iget_dentry:0 Inode %lu nlink %d count %d gen %u %u "
	2100	"state %lu flags 0x%x",
	2101	inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
	2102	inode->i_generation, generation, inode->i_state, inode->i_flags);
	2103
	2104	dput(result);
	2105	goto build_dentry;
	2106	}
	2107	if (gpfs_i_revalidate(result))
	2108	{
	2109	TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_3,
	2110	"gpfs_nfsd_iget_dentry:1 dentry flags 0x%x count %d inode 0x%lX "
	2111	"time %lu",
	2112	result->d_flags, atomic_read(&result->d_count),
	2113	result->d_inode, result->d_time);
	2114
	2115	TRACE7(TRACE_VNODE, 1, TRCID_NFSD_IGET_4,
	2116	"gpfs_nfsd_iget_dentry:1 Inode %lu nlink %d count %d gen %u %u "
	2117	"state %lu flags 0x%x",
	2118	inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
	2119	inode->i_generation, generation, inode->i_state, inode->i_flags);
	2120
	2121	iput(inode);
	2122	dput(result);
	2123	EXIT(0);
	2124	return ERR_PTR(-ESTALE);
	2125	}
	2126	if (generation &&
	2127	generation != 0xffffffff && /* GENNUM_UNKNOWN */
	2128	inode->i_generation != generation)
	2129	{
	2130	/* we didn't find the right inode.. */
	2131	TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_5,
	2132	"gpfs_nfsd_iget_dentry:2 dentry flags 0x%x count %d inode 0x%lX "
	2133	"time %lu",
	2134	result->d_flags, atomic_read(&result->d_count),
	2135	result->d_inode, result->d_time);
	2136
	2137	TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_6,
	2138	"gpfs_nfsd_iget_dentry:2 Inode %lu nlink %d count %d gen %u %u "
	2139	"state %lu flags 0x%x",
	2140	inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
	2141	inode->i_generation, generation, inode->i_state, inode->i_flags);
	2142
	2143	iput(inode);
	2144	dput(result);
	2145	EXIT(0);
	2146	return ERR_PTR(-ESTALE);
	2147	}
	2148	iput(inode);
	2149	EXIT(0);
	2150	return result;
	2151	}
	2152	}
	2153	spin_unlock(&dcache_lock);
	2154
	2155	build_dentry:
	2156
	2157	#if LINUX_KERNEL_VERSION < 2060000
	2158	result = d_alloc_root(inode);
	2159	#else
	2160	result = d_alloc_anon(inode);
	2161	#endif
	2162	if (result == NULL)
	2163	{
	2164	iput(inode);
	2165	EXIT(0);
	2166	return ERR_PTR(-ENOMEM);
	2167	}
	2168	#if LINUX_KERNEL_VERSION < 2060000
	2169	result->d_flags \|= DCACHE_DFLAGS_DISCONNECTED;
	2170	#endif
	2171
	2172	if (gpfs_i_revalidate(result))
	2173	{
	2174	TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_7,
	2175	"gpfs_nfsd_iget:3 dentry flags 0x%x count %d inode 0x%lX time %lu",
	2176	result->d_flags, atomic_read(&result->d_count),
	2177	result->d_inode, result->d_time);
	2178
	2179	TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_8,
	2180	"gpfs_nfsd_iget:3 Inode %lu nlink %d count %d gen %u %u "
	2181	"state %lu flags 0x%x",
	2182	inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
	2183	inode->i_generation, generation, inode->i_state, inode->i_flags);
	2184
	2185	/* The dput call here is releases the dcache entry that was
	2186	* allocated by to d_alloc_root. It also results in an iput effectively
	2187	* removing the hold we place by our iget call above.
	2188	*/
	2189	dput(result);
	2190	EXIT(0);
	2191	return ERR_PTR(-ESTALE);
	2192	}
	2193	if (generation &&
	2194	generation != 0xffffffff && /* GENNUM_UNKNOWN */
	2195	inode->i_generation != generation)
	2196	{
	2197	/* we didn't find the right inode.. */
	2198	TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_9,
	2199	"gpfs_nfsd_iget:4 dentry flags 0x%x count %d inode 0x%lX time %lu",
	2200	result->d_flags, atomic_read(&result->d_count),
	2201	result->d_inode, result->d_time);
	2202
	2203	TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_10,
	2204	"gpfs_nfsd_iget:4 Inode %lu nlink %d count %d gen %u %u "
	2205	"state %lu flags 0x%x",
	2206	inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
	2207	inode->i_generation, generation, inode->i_state, inode->i_flags);
	2208
	2209	/* Release the dcache entry. This also does an iput. */
	2210	dput(result);
	2211	EXIT(0);
	2212	return ERR_PTR(-ESTALE);
	2213	}
	2214	EXIT(0);
	2215	return result;
	2216	}
	2217
	2218	static struct dentry *
	2219	gpfs_nfsd_iget(struct super_block *sbP, unsigned long ino,
	2220	cxiIGetArg_t *argP, __u32 generation)
	2221	{
	2222	int rc;
	2223	struct inode *inode;
	2224	struct gpfsVfsData_t *privVfsP;
	2225
	2226	ENTER(0);
	2227	TRACE6(TRACE_VNODE, 3, TRCID_NFSD_IGET_1,
	2228	"gpfs_nfsd_iget: sbP 0x%lX extino %d inode %d snapid %d "
	2229	"fileset %d generation %d",
	2230	sbP, ino, argP->inodeNum, argP->snapId, argP->filesetId,
	2231	generation);
	2232
	2233	/* get the inode */
	2234	if (ino == 0)
	2235	{
	2236	EXIT(0);
	2237	return ERR_PTR(-ESTALE);
	2238	}
	2239
	2240	/* Callers have set inodeNum/snapId in argP. vattrP is NULL and
	2241	* readInodeCalled is false, but these will be set appropriately in
	2242	* gpfsNFSIget after it obtains the attributes.
	2243	*/
	2244
	2245	privVfsP = (struct gpfsVfsData_t *)cxiGetPrivVfsP(sbP);
	2246	rc = gpfs_ops.gpfsNFSIget(privVfsP, argP, generation, (void **)&inode);
	2247
	2248	if (rc)
	2249	{
	2250	cxiErrorNFS(rc);
	2251
	2252	EXIT(0);
	2253	return ERR_PTR(-rc);
	2254	}
	2255
	2256	if (inode == NULL)
	2257	{
	2258	EXIT(0);
	2259	return ERR_PTR(-ENOMEM);
	2260	}
	2261
	2262	if (is_bad_inode(inode))
	2263	{
	2264	EXIT(0);
	2265	return ERR_PTR(-ESTALE);
	2266	}
	2267
	2268	/* gpfsNFSIget will have called findOrCreateLinux/cxiNewOSNode which
	2269	* makes the iget call along with the inodeFindActor validation.
	2270	*/
	2271
	2272	EXIT(0);
	2273	return(gpfs_nfsd_iget_dentry(inode,generation));
	2274
	2275	}
	2276
	2277	#if LINUX_KERNEL_VERSION >= 2060000
	2278	/* export_operations for nfsd communication with our file system
	2279	* via gpfs_export_ops
	2280	*/
	2281
	2282	/*
	2283	* gpfs_get_dparent: (get_parent) find the parent dentry for a given dentry
	2284	*/
	2285	struct dentry gpfs_get_dparent(struct dentry child)
	2286	{
	2287	int rc = 0;
	2288	struct dentry *result = NULL;
	2289	struct gpfsVfsData_t *privVfsP;
	2290	ext_cred_t eCred;
	2291	cxiNode_t *dcnP;
	2292	cxiIno_t iNum = (cxiIno_t)-1;
	2293	cxiNode_t *cnP = NULL;
	2294	struct inode *newInodeP = NULL;
	2295	struct dentry *retP;
	2296
	2297	ENTER(0);
	2298	VFS_INC(get_parentCall);
	2299
	2300	TRACE2(TRACE_VNODE, 3, TRCID_GET_DPARENT_ENTER,
	2301	"gpfs_get_dparent: dentry 0x%lX inode 0x%d",
	2302	child, child->d_inode->i_ino);
	2303
	2304	dcnP = VP_TO_CNP(child->d_inode);
	2305	privVfsP = VP_TO_PVP(child->d_inode);
	2306	DBGASSERT(privVfsP != NULL);
	2307
	2308	setCred(&eCred);
	2309
	2310	if (!dcnP)
	2311	{
	2312	/* This can happen due to a bug in linux/fs/dcache.c (prune_dcache)
	2313	where "count" entries are to be pruned, but the last one is
	2314	found to be recently referenced. When this happens, count is
	2315	decremented, but the loop is not terminated. The result is that
	2316	it continues to prune entries past where it should (prunes
	2317	everything). If our patch for this is not applied, the result
	2318	is a kernel failure as the cxiNode is referenced. Checking
	2319	here (and revalidate) allows us to reject the call instead. */
	2320	PRINTINODE(child->d_inode);
	2321	result = (struct dentry *)ERR_PTR(-ESTALE);
	2322	goto xerror;
	2323	}
	2324
	2325	rc = gpfs_ops.gpfsLookup(privVfsP, (void *)child->d_inode, dcnP,
	2326	NULL, (char *)"..",
	2327	(void **)&newInodeP, &cnP, &iNum, NULL,
	2328	NULL, &eCred, (void **)&retP);
	2329	if (rc == 0)
	2330	{
	2331	DBGASSERT(cnP != NULL);
	2332	DBGASSERT(iNum != -1);
	2333	DBGASSERT(newInodeP != NULL);
	2334	DBGASSERT(newInodeP->PRVINODE == cnP);
	2335	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
	2336	result = gpfs_nfsd_iget_dentry(newInodeP, (__u32)newInodeP->i_generation);
	2337	}
	2338	else
	2339	{
	2340	cxiErrorNFS(rc);
	2341	result = (struct dentry *)ERR_PTR(-rc);
	2342	iNum = -1;
	2343	}
	2344
	2345	xerror:
	2346
	2347	TRACE4(TRACE_VNODE, 3, TRCID_GET_DPARENT_EXIT,
	2348	"gpfs_get_dparent dentry 0x%lX inode %d result %lX err%d \n",
	2349	child, iNum, result, IS_ERR(result)? PTR_ERR(result): 0);
	2350	EXIT(0);
	2351	return result;
	2352	}
	2353
	2354	/*
	2355	* gpfs_get_dentry: (get_dentry) find dentry for the inode given a file handle
	2356	*/
	2357	struct dentry gpfs_get_dentry(struct super_block sbP, void * vdata)
	2358	{
	2359	__u32 *data=vdata;
	2360	unsigned long ino;
	2361	cxiIGetArg_t arg;
	2362	__u32 generation;
	2363	struct dentry *result;
	2364
	2365	ENTER(0);
	2366	VFS_INC(get_dentryCall);
	2367
	2368	ino = data[0];
	2369	if (IS_SNAPROOTDIR_EXT_INO(ino))
	2370	arg.inodeNum = SNAPROOTDIR_INT_INO;
	2371	else if (IS_SNAPLINKDIR_EXT_INO(ino))
	2372	arg.inodeNum = data[3];
	2373	else
	2374	arg.inodeNum = ino;
	2375	arg.snapId = data[1];
	2376	generation = data[2];
	2377	arg.extInodeNum = ino;
	2378	arg.filesetId = (unsigned)-1; //FIXME
	2379
	2380	arg.vattrP = NULL;
	2381	arg.readInodeCalled = false;
	2382	result = gpfs_nfsd_iget(sbP, ino, &arg, generation);
	2383	EXIT(0);
	2384	return result;
	2385	}
	2386
	2387	/* It is acceptable to create a disconnected dentry for pNFS since it is used
	2388	only for read/write. The check if it was exported is not required since
	2389	the call to the MDS will verify that the file is open.
	2390	*/
	2391	static int gpfs_acceptable(void expv, struct dentry dentry)
	2392	{
	2393	if (dentry && dentry->d_inode) {
	2394	#ifdef GPFS_PRINTK
	2395	printk("gpfs_acceptable ino %d\n", dentry->d_inode->i_ino);
	2396	#endif
	2397	return 1;
	2398	}
	2399	return 0;
	2400	}
	2401
	2402	/*
	2403	* gpfs_decode_fh: (decode_fh) decode a file handle returning ptr to it's dentry
	2404	*/
	2405	struct dentry *
	2406	gpfs_decode_fh(struct super_block sbP, __u32 fh,
	2407	int len, int fhtype,
	2408	int (acceptable)(void context, struct dentry *de),
	2409	void *context)
	2410	{
	2411	#if LINUX_KERNEL_VERSION == 2060800
	2412	int len = *lenP;
	2413	#endif
	2414	struct dentry *result;
	2415	__u32 parent[4]={0};
	2416
	2417	ENTER(0);
	2418	VFS_INC(decode_fhCall);
	2419
	2420	#ifdef GPFS_PRINTK
	2421	printk("gpfs_decode_fh %08x %08x %08x %08x %08x %08x %08x\n",
	2422	fh[0],fh[1],fh[2],fh[3],fh[4],fh[5],fh[6]);
	2423	#endif
	2424	TRACE4(TRACE_VNODE, 3, TRCID_DECODE_FH_1,
	2425	"gpfs_decode_fh: sbP 0x%lX fh 0x%lX, len %d type %d",
	2426	sbP, fh, len, fhtype);
	2427	if (fhtype > 4 && fhtype < 8 && len >= 5)
	2428	{
	2429	parent[0]=fh[3]; /* ino */
	2430	parent[1]=fh[4]; /* p_sid */
	2431	if (len>5)
	2432	{
	2433	parent[2]=fh[5]; /* generation */
	2434	parent[3]=fh[3]; /* ino */
	2435	}
	2436
	2437	if (cxiIsLockdThread() // check for lockd thread
	2438	#ifdef P_NFS4
	2439	\|\| fhtype == 7 // it is a pNFS fh, disconnected fh is acceptable.
	2440	#endif
	2441	)
	2442	result = sbP->s_export_op->find_exported_dentry(sbP, fh, parent,
	2443	gpfs_acceptable, context);
	2444	else
	2445	result = sbP->s_export_op->find_exported_dentry(sbP, fh, parent,
	2446	acceptable, context);
	2447	TRACE4(TRACE_VNODE, 3, TRCID_DECODE_FH_2,
	2448	"gpfs_decode_fh: sbP 0x%lX fh 0x%lX result %lX err %d",
	2449	sbP, fh, result, IS_ERR(result)? PTR_ERR(result): 0);
	2450	#if LINUX_KERNEL_VERSION == 2060800
	2451	*lenP = 0;
	2452	#endif
	2453	if (IS_ERR(result))
	2454	cxiErrorNFS(PTR_ERR(result));
	2455
	2456	EXIT(0);
	2457	return result;
	2458	}
	2459
	2460	TRACE2(TRACE_VNODE, 3, TRCID_DECODE_FH_3,
	2461	"gpfs_decode_fh: sbP 0x%lX fh 0x%lX -EINVAL",
	2462	sbP, fh);
	2463	EXIT(0);
	2464	return ERR_PTR(-EINVAL);
	2465	}
	2466
	2467	/*
	2468	* gpfs_encode_fh: (encode_fh) encode a file handle from the given dentry
	2469	*/
	2470	int
	2471	gpfs_encode_fh(struct dentry dentry, __u32 fh, int *lenp,
	2472	int need_parent)
	2473	{
	2474	UInt32 d_sid, p_sid;
	2475
	2476	ENTER(0);
	2477	VFS_INC(encode_fhCall);
	2478
	2479	if (*lenp < 5)
	2480	{
	2481	EXIT(0);
	2482	return 255;
	2483	}
	2484
	2485	if (gpfs_ops.gpfsGetSnapIdPair(VP_TO_CNP(dentry->d_inode),
	2486	&d_sid, &p_sid) != 0)
	2487	{
	2488	EXIT(0);
	2489	return 255;
	2490	}
	2491
	2492	fh[0] = (__u32) dentry->d_inode->i_ino;
	2493	fh[1] = d_sid;
	2494	fh[2] = (__u32) dentry->d_inode->i_generation;
	2495	fh[3] = (__u32) dentry->d_parent->d_inode->i_ino;
	2496	fh[4] = p_sid;
	2497	if (*lenp > 5)
	2498	{
	2499	/* There was enough room to compelete parent */
	2500	fh[5] = (__u32) dentry->d_parent->d_inode->i_generation;
	2501	*lenp = 6;
	2502	}
	2503	else
	2504	*lenp = 5;
	2505
	2506	EXIT(0);
	2507	return *lenp;
	2508	}
	2509	#else
	2510	struct dentry gpfs_fh_to_dentry(struct super_block sbP, __u32 *fh,
	2511	int len, int fhtype, int parent)
	2512	{
	2513	unsigned long ino;
	2514	cxiIGetArg_t arg;
	2515	__u32 generation;
	2516	struct dentry *result;
	2517
	2518	ENTER(0);
	2519	TRACE5(TRACE_VNODE, 3, TRCID_FH_TO_DENTRY_1,
	2520	"gpfs_fh_to_dentry: sbP 0x%lX fh 0x%lX, len %d type %d parent %d",
	2521	sbP, fh, len, fhtype, parent);
	2522
	2523	if (fhtype == 3 && len >= 5)
	2524	{
	2525	if (parent)
	2526	{
	2527	ino = fh[3];
	2528	if (IS_SNAPROOTDIR_EXT_INO(ino))
	2529	arg.inodeNum = SNAPROOTDIR_INT_INO;
	2530	else if (IS_SNAPLINKDIR_EXT_INO(ino))
	2531	arg.inodeNum = IS_SNAPROOTDIR_EXT_INO(fh[0]) ?
	2532	SNAPROOTDIR_INT_INO : fh[0];
	2533	else
	2534	arg.inodeNum = ino;
	2535	arg.snapId = fh[4];
	2536	generation = 0xffffffff; /* GENNUM_UNKNOWN */
	2537	}
	2538	else
	2539	{
	2540	ino = fh[0];
	2541	if (IS_SNAPROOTDIR_EXT_INO(ino))
	2542	arg.inodeNum = SNAPROOTDIR_INT_INO;
	2543	else if (IS_SNAPLINKDIR_EXT_INO(ino))
	2544	arg.inodeNum = fh[3];
	2545	else
	2546	arg.inodeNum = ino;
	2547	arg.snapId = fh[1];
	2548	generation = fh[2];
	2549	}
	2550	arg.filesetId = (unsigned)-1; // FIXME
	2551	arg.vattrP = NULL;
	2552	arg.readInodeCalled = false;
	2553	result = gpfs_nfsd_iget(sbP, ino, &arg, generation);
	2554
	2555	TRACE4(TRACE_VNODE, 3, TRCID_FH_TO_DENTRY_2,
	2556	"gpfs_fh_to_dentry: sbP 0x%lX fh 0x%lX result %lX err %d",
	2557	sbP, fh, result, IS_ERR(result)? PTR_ERR(result): 0);
	2558
	2559	EXIT(0);
	2560	return result;
	2561	}
	2562
	2563	TRACE2(TRACE_VNODE, 3, TRCID_FH_TO_DENTRY_3,
	2564	"gpfs_fh_to_dentry: sbP 0x%lX fh 0x%lX -EINVAL",
	2565	sbP, fh);
	2566
	2567	EXIT(0);
	2568	return ERR_PTR(-EINVAL);
	2569	}
	2570
	2571	int gpfs_dentry_to_fh(struct dentry dentry, __u32 fh, int *lenp,
	2572	int need_parent)
	2573	{
	2574	UInt32 d_sid, p_sid;
	2575
	2576	if (*lenp < 5)
	2577	return 255;
	2578
	2579	ENTER(0);
	2580	if (gpfs_ops.gpfsGetSnapIdPair(VP_TO_CNP(dentry->d_inode),
	2581	&d_sid, &p_sid) != 0)
	2582	{
	2583	EXIT(0);
	2584	return 255;
	2585	}
	2586	fh[0] = (__u32) dentry->d_inode->i_ino;
	2587	fh[1] = d_sid;
	2588	fh[2] = (__u32) dentry->d_inode->i_generation;
	2589	fh[3] = (__u32) dentry->d_parent->d_inode->i_ino;
	2590	fh[4] = p_sid;
	2591
	2592	*lenp = 5;
	2593	EXIT(0);
	2594	return 3;
	2595	}
	2596	#endif
	2597
	2598	void
	2599	printSuper(struct super_block *sbP)
	2600	{
	2601	if (!_TRACE_IS_ON(TRACE_VNODE, 3))
	2602	return;
	2603
	2604	/* private field won't make much sense for non-GPFS file systems */
	2605	TRACE4N(TRACE_VNODE, 3, TRCID_PRINTSUPER_1,
	2606	"printSuper: sbP 0x%lX magic 0x%lX type 0x%lX private 0x%lX\n",
	2607	sbP, sbP->s_magic, sbP->s_type, SBLOCK_PRIVATE(sbP));
	2608
	2609	TRACE3N(TRACE_VNODE, 3, TRCID_PRINTSUPER_3,
	2610	"printSuper: s_dev 0x%X count 0x%X active %d\n",
	2611	sbP->s_dev, sbP->s_count, atomic_read(&sbP->s_active));
	2612	}
	2613
	2614	void
	2615	printSuperList(struct super_block *sbP)
	2616	{
	2617	struct list_head *lP;
	2618	struct super_block *sP;
	2619
	2620	if (!_TRACE_IS_ON(TRACE_VNODE, 5))
	2621	return;
	2622
	2623	/* Run through all super blocks starting from provided GPFS super block. */
	2624	/* Ideally we would lock sb_lock, but we can't access it,
	2625	so small probability of this breaking, which is why it is at
	2626	a higher trace level (vnode 5). */
	2627	TRACE0N(TRACE_VNODE, 5, TRCID_PRINTALLSUPER_1,
	2628	"printSuperList:\n");
	2629	printSuper(sbP);
	2630	list_for_each(lP, &sbP->s_list)
	2631	{
	2632	sP = sb_entry(lP);
	2633	printSuper(sP);
	2634	}
	2635	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: gpfs_3.1_ker2.6.20/lpp/mmfs/src/gpl-linux/super.c @ 145

Download in other formats: