Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

inode.c @ 146

Last change on this file since 146 was 16, checked in by rock, 17 years ago

File size: 61.1 KB

Rev	Line
[16]	1	/***************************************************************************
	2	*
	3	* Copyright (C) 2001 International Business Machines
	4	* All rights reserved.
	5	*
	6	* This file is part of the GPFS mmfslinux kernel module.
	7	*
	8	* Redistribution and use in source and binary forms, with or without
	9	* modification, are permitted provided that the following conditions
	10	* are met:
	11	*
	12	* 1. Redistributions of source code must retain the above copyright notice,
	13	* this list of conditions and the following disclaimer.
	14	* 2. Redistributions in binary form must reproduce the above copyright
	15	* notice, this list of conditions and the following disclaimer in the
	16	* documentation and/or other materials provided with the distribution.
	17	* 3. The name of the author may not be used to endorse or promote products
	18	* derived from this software without specific prior written
	19	* permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	25	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	26	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
	27	* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
	28	* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
	29	* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
	30	* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	31	*
	32	*************************************************************************** */
	33	/* @(#)01 1.90.1.4 src/avs/fs/mmfs/ts/kernext/gpl-linux/inode.c, mmfs, avs_rgpfs24, rgpfs24s012a 4/17/07 15:54:47 */
	34	/*
	35	* Inode operations
	36	*
	37	* Contents:
	38	* printInode
	39	* printDentry
	40	* cxiSetOSNode
	41	* cxiInvalidatePerm
	42	* getIattr
	43	* get_umask
	44	* setCred
	45	* gpfs_i_create
	46	* gpfs_i_lookup
	47	* gpfs_i_link
	48	* gpfs_i_unlink
	49	* gpfs_i_symlink
	50	* gpfs_i_mkdir
	51	* gpfs_i_rmdir
	52	* gpfs_i_mknod
	53	* gpfs_i_rename
	54	* gpfs_i_readlink
	55	* gpfs_i_follow_link
	56	* gpfs_i_readpage (in mmap.c)
	57	* gpfs_i_writepage (in mmap.c)
	58	* gpfs_i_bmap
	59	* gpfs_i_truncate
	60	* gpfs_i_permission
	61	* gpfs_i_smap
	62	* gpfs_i_updatepage
	63	* gpfs_i_revalidate
	64	* gpfs_i_setattr
	65	* gpfs_i_setattr_internal
	66	* gpfs_i_getattr
	67	* gpfs_i_getattr_internal
	68	* gpfs_i_lock
	69	* gpfs_i_getxattr
	70	* gpfs_i_setxattr
	71	* gpfs_i_listxattr
	72	* gpfs_i_removexattr
	73	*/
	74
	75	#include <Shark-gpl.h>
	76
	77	#include <linux/fs.h>
	78	#include <linux/sched.h>
	79	#include <linux/slab.h>
	80	#include <linux/errno.h>
	81	#include <linux/smp_lock.h>
	82	#include <linux/mm.h>
	83	#include <linux/highmem.h>
	84	#include <linux/kdev_t.h>
	85
	86	#include <verdep.h>
	87	#include <cxiMode.h>
	88	#include <cxiSystem.h>
	89	#include <cxi2gpfs.h>
	90	#include <cxiVFSStats.h>
	91	#include <cxiCred.h>
	92
	93	#include <linux2gpfs.h>
	94	#include <Trace.h>
	95
	96	#if LINUX_KERNEL_VERSION > 2060000
	97	#include <cxiTSFattr.h>
	98	#endif
	99
	100	#ifdef MODULE
	101	#include <linux/module.h>
	102	#endif /* MODULE */
	103
	104	void
	105	printInode(struct inode *iP)
	106	{
	107	TRACE7(TRACE_VNODE, 3, TRCID_PRINTINODE_1,
	108	"printInode: iP 0x%lX inode %d (0x%X) i_count %d dev 0x%X "
	109	"mode 0x%X nlink %d\n",
	110	iP, iP->i_ino, iP->i_ino, atomic_read((atomic_t *)&iP->i_count),
	111	KDEV_INT(iP->i_rdev), iP->i_mode, iP->i_nlink);
	112
	113	TRACE6(TRACE_VNODE, 3, TRCID_PRINTINODE_2,
	114	"printInode: uid %d gid %d rdev 0x%X atime 0x%X mtime 0x%X "
	115	"ctime 0x%X\n", iP->i_uid, iP->i_gid, KDEV_INT(iP->i_rdev),
	116	GET_INODETIME_SEC(iP->i_atime), GET_INODETIME_SEC(iP->i_mtime),
	117	GET_INODETIME_SEC(iP->i_ctime));
	118
	119	TRACE5(TRACE_VNODE, 3, TRCID_PRINTINODE_4,
	120	"printInode: size %lld blksize 0x%X blocks %d ver 0x%X op 0x%lX\n",
	121	iP->i_size, iP->i_blocks, iP->i_blocks, iP->i_version,
	122	iP->i_op);
	123
	124	TRACE6(TRACE_VNODE, 3, TRCID_PRINTINODE_5,
	125	"printInode: fop 0x%lX sb 0x%lX flags 0x%X state 0x%X gen %d "
	126	"generic 0x%lX\n", iP->i_fop, iP->i_sb, iP->i_flags, iP->i_state,
	127	iP->i_generation, iP->PRVINODE);
	128
	129	TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_6,
	130	"printInode: list 0x%lX next 0x%lX prev 0x%lX\n",
	131	&(iP->i_list), iP->i_list.next, iP->i_list.prev);
	132
	133	TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_7,
	134	"printInode: dentry 0x%lX next 0x%lX prev 0x%lX\n",
	135	&(iP->i_dentry), iP->i_dentry.next, iP->i_dentry.prev);
	136
	137	#if LINUX_KERNEL_VERSION < 2050000
	138	TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_8,
	139	"printInode: hash 0x%lX next 0x%lX prev 0x%lX\n",
	140	&(iP->i_hash), iP->i_hash.next, iP->i_hash.prev);
	141	#else
	142	TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_9,
	143	"printInode: hash 0x%lX next 0x%lX prev 0x%lX\n",
	144	&(iP->i_hash), iP->i_hash.next, *iP->i_hash.pprev);
	145	#endif
	146	}
	147
	148	void
	149	printDentry(struct dentry *dP)
	150	{
	151	struct inode *iP = dP->d_inode;
	152
	153	if (!_TRACE_IS_ON(TRACE_VNODE, 3))
	154	return;
	155
	156	TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_1,
	157	"printDentry: dentry 0x%lX count %d name '%s'\n",
	158	dP, atomic_read((atomic_t *)&dP->d_count), dP->d_name.name);
	159
	160	TRACE5N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_2,
	161	"printDentry: time 0x%X op 0x%lX flags 0x%X parent 0x%lX "
	162	"inode 0x%X\n", dP->d_time, dP->d_op, dP->d_flags,
	163	dP->d_parent, iP);
	164
	165	if (iP)
	166	{
	167	if (!list_empty(&iP->i_dentry))
	168	TRACE4N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_3A,
	169	"printDentry: i_ino %d i_count %d "
	170	"i_dentry next 0x%lX i_dentry prev 0x%lX\n",
	171	iP->i_ino, atomic_read((atomic_t *)&iP->i_count),
	172	list_entry(iP->i_dentry.next, struct dentry, d_alias),
	173	list_entry(iP->i_dentry.prev, struct dentry, d_alias));
	174	else
	175	TRACE2N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_3B,
	176	"printDentry: i_ino %d i_count %d\n",
	177	iP->i_ino, atomic_read((atomic_t *)&iP->i_count));
	178	}
	179
	180	TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_3C,
	181	"printDentry: &d_hash 0x%lX d_hash.next 0x%lX d_hash.prev 0x%lX\n",
	182	&dP->d_child, dP->d_child.next, dP->d_child.prev);
	183
	184	TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_4,
	185	"printDentry: &child 0x%lX child.next 0x%lX child.prev 0x%lX\n",
	186	&dP->d_child, dP->d_child.next, dP->d_child.prev);
	187
	188	if (!list_empty(&dP->d_subdirs))
	189	TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_5,
	190	"printDentry: &subdirs 0x%lX subdir next 0x%lX "
	191	"subdir prev 0x%lX\n", &dP->d_subdirs,
	192	list_entry(dP->d_subdirs.next, struct dentry, d_child),
	193	list_entry(dP->d_subdirs.prev, struct dentry, d_child));
	194	}
	195
	196	/* Print directory entry tree up to maxPrint elements.
	197	* If maxPrint is 0 then there is no upper limit.
	198	*/
	199	void
	200	printDentryTree(struct dentry *entryDP, int maxPrint)
	201	{
	202	int count = 0;
	203	struct list_head *lhP;
	204	struct dentry *siblingDP;
	205	struct dentry *parentDP;
	206
	207	/* Check trace level required by printDentry() */
	208	if (!_TRACE_IS_ON(TRACE_VNODE, 3))
	209	return;
	210
	211	spin_lock(&dcache_lock);
	212
	213	parentDP = entryDP;
	214	lhP = parentDP->d_subdirs.next;
	215
	216	printDentry(parentDP);
	217	if (maxPrint > 0 && ++count >= maxPrint)
	218	goto xerror;
	219
	220	if (list_empty(&parentDP->d_subdirs))
	221	goto xerror;
	222
	223	do
	224	{
	225	while (lhP != &parentDP->d_subdirs)
	226	{
	227	siblingDP = list_entry(lhP, struct dentry, d_child);
	228
	229	printDentry(siblingDP);
	230	if (maxPrint > 0 && ++count >= maxPrint)
	231	goto xerror;
	232
	233	if (!list_empty(&siblingDP->d_subdirs))
	234	{
	235	parentDP = siblingDP;
	236	lhP = siblingDP->d_subdirs.next;
	237	continue;
	238	}
	239
	240	lhP = siblingDP->d_child.next;
	241	parentDP = siblingDP->d_parent;
	242	}
	243
	244	siblingDP = siblingDP->d_parent;
	245	parentDP = siblingDP->d_parent;
	246	lhP = siblingDP->d_child.next;
	247	}
	248	while (lhP != entryDP->d_child.next);
	249
	250	xerror:
	251	spin_unlock(&dcache_lock);
	252
	253	return;
	254	}
	255
	256	/* Set the inode operations table for a regular file or directory. Call
	257	with xperm set to true if the file has extended permission attributes
	258	(i.e. an ACL). This routine is a no-op if the inode is not a regular
	259	file or directory.
	260
	261	If the file does not have extended attributes, the table that is used
	262	will have a null value for the permission routine pointer. This will
	263	cause Linux to perform access checks directly instead of acquiring the
	264	kernel lock and calling GPFS, giving better performance. */
	265	void setIopTable(struct inode *iP, Boolean xperm)
	266	{
	267	struct inode_operations newopP, stdopP, *xopP;
	268	struct list_head *lp;
	269	int count = 0;
	270
	271	/* Choose the correct inode operations table based on whether this is a
	272	directory or a regular file. Assume that the file has extended
	273	attributes so that GPFS permission checking will be required. */
	274	ENTER(0);
	275	if (S_ISDIR(iP->i_mode))
	276	xopP = &gpfs_dir_iops_xperm;
	277	else if (S_ISREG(iP->i_mode))
	278	xopP = &gpfs_iops_xperm;
	279	else
	280	{
	281	EXIT(0);
	282	return;
	283	}
	284
	285	/* If the file really does have extended attributes (or if the token has
	286	been lost so that we do not know the status), set extended permission
	287	table and exit. */
	288	if (xperm)
	289	{
	290	iP->i_op = xopP;
	291	EXIT(0);
	292	return;
	293	}
	294
	295	/* Get address of an inode operations table that has a generic permission
	296	routine pointer. */
	297	iP->i_op = S_ISDIR(iP->i_mode) ? &gpfs_dir_iops_stdperm : &gpfs_iops_stdperm;
	298	EXIT(0);
	299	}
	300
	301
	302	void
	303	cxiSetOSNode(void osVfsP, cxiNode_t cnP, cxiVattr_t *attrP)
	304	{
	305	struct super_block sbP = (struct super_block )osVfsP;
	306	struct inode inodeP = (struct inode )cnP->osNodeP;
	307
	308	ENTER(0);
	309	DBGASSERT(inodeP != NULL);
	310	DBGASSERT(inodeP->PRVINODE == cnP);
	311	DBGASSERT(inodeP->i_sb == sbP);
	312
	313	inodeP->i_mode = attrP->va_mode;
	314	inodeP->i_nlink = attrP->va_nlink;
	315	inodeP->i_uid = attrP->va_uid;
	316	inodeP->i_gid = attrP->va_gid;
	317	inodeP->i_rdev = cxiDevToKernelDev(cxiDev32ToDev(attrP->va_rdev));
	318
	319	CXITIME_TO_INODETIME(attrP->va_atime, inodeP->i_atime);
	320	CXITIME_TO_INODETIME(attrP->va_mtime, inodeP->i_mtime);
	321	CXITIME_TO_INODETIME(attrP->va_ctime, inodeP->i_ctime);
	322
	323	inodeP->i_size = attrP->va_size;
	324	inodeP->i_blocks = attrP->va_blocksize;
	325	inodeP->i_blocks = attrP->va_blocks;
	326	inodeP->i_generation = attrP->va_gen;
	327	inodeP->i_flags = 0;
	328
	329	cnP->xinfo = attrP->va_xinfo;
	330
	331	switch (inodeP->i_mode & S_IFMT)
	332	{
	333	case S_IFREG:
	334	setIopTable(inodeP, (attrP->va_xinfo & VA_XPERM) != 0);
	335	if (cxiIsNFSThread())
	336	inodeP->i_fop = &gpfs_fops_no_sendfile;
	337	else
	338	inodeP->i_fop = &gpfs_fops;
	339	break;
	340
	341	case S_IFDIR:
	342	setIopTable(inodeP, (attrP->va_xinfo & VA_XPERM) != 0);
	343	inodeP->i_fop = &gpfs_dir_fops;
	344	break;
	345
	346	case S_IFLNK:
	347	inodeP->i_op = &gpfs_link_iops;
	348	inodeP->i_fop = &gpfs_fops;
	349	break;
	350
	351	case S_IFBLK:
	352	case S_IFCHR:
	353	case S_IFIFO:
	354	case S_IFSOCK:
	355	/* Set vector table for special files, gpfs will not get
	356	* these operations.
	357	*/
	358	#if LINUX_KERNEL_VERSION >= 2060000
	359	init_special_inode(inodeP, inodeP->i_mode, inodeP->i_rdev);
	360	#else
	361	init_special_inode(inodeP, inodeP->i_mode,
	362	kdev_t_to_nr(inodeP->i_rdev));
	363	#endif
	364	break;
	365	}
	366	if (inodeP->i_mapping)
	367	inodeP->i_mapping->a_ops = &gpfs_aops;
	368
	369	cnP->icValid = CXI_IC_ALL;
	370
	371	TRACE7(TRACE_VNODE, 2, TRCID_LINUXOPS_SETINODE,
	372	"cxiSetOSNode: inodeP 0x%lX inode %d i_count %d i_mode 0x%X "
	373	"i_xinfo 0x%X i_nlink %d i_size %lld\n",
	374	inodeP, inodeP->i_ino, atomic_read((atomic_t *)&inodeP->i_count),
	375	inodeP->i_mode, attrP->va_xinfo, inodeP->i_nlink, inodeP->i_size);
	376	EXIT(0);
	377	return;
	378	}
	379
	380
	381	/* The following function is called from cxiInvalidateAttr when the
	382	CXI_IC_PERM option was specified, which indicates that permission related
	383	attributes cached in the struct inode (owner, mode, etc.) are no longer
	384	known to be valid. */
	385	void
	386	cxiInvalidatePerm(cxiNode_t *cnP)
	387	{
	388	struct inode inodeP = (struct inode )cnP->osNodeP;
	389
	390	ENTER(0);
	391	TRACE3(TRACE_VNODE, 2, TRCID_CXIINVA_PERM,
	392	"cxiInvalidatePerm: cnP 0x%lX std %d dir std %d",
	393	cnP, inodeP->i_op == &gpfs_iops_stdperm,
	394	inodeP->i_op == &gpfs_dir_iops_stdperm);
	395
	396	/* Set the inode operation table to gpfs_..._xperm; the next permission
	397	check will then go through our gpfs_i_permission function, which will
	398	revalidate permission attributes and set the inode operation table
	399	back to gpfs_..._stdperm, if appropriate. Note: since symlinks always
	400	have permission iop set, setIopTable is a noop for symlinks. */
	401	setIopTable(inodeP, true);
	402	EXIT(0);
	403	}
	404
	405	static void
	406	getIattr(struct inode inodeP, struct iattr attrP)
	407	{
	408	ENTER(0);
	409	// attrP->ia_valid = ??? ;
	410	attrP->ia_mode = inodeP->i_mode;
	411	attrP->ia_uid = inodeP->i_uid;
	412	attrP->ia_gid = inodeP->i_gid;
	413	attrP->ia_size = inodeP->i_size;
	414	attrP->ia_atime = inodeP->i_atime;
	415	attrP->ia_mtime = inodeP->i_mtime;
	416	attrP->ia_ctime = inodeP->i_ctime;
	417	EXIT(0);
	418	return;
	419	}
	420
	421	static inline int
	422	get_umask()
	423	{
	424	return (current->fs->umask);
	425	}
	426
	427
	428	/* Record credentials of current thread */
	429	void
	430	setCred(ext_cred_t *credP)
	431	{
	432	int nGroups;
	433
	434	ENTER(0);
	435	credP->principal = current->fsuid; /* user id */
	436	credP->group = current->fsgid; /* primary group id */
	437
	438	#if LINUX_KERNEL_VERSION > 2060300
	439	nGroups = MIN(current->group_info->ngroups, MIN(ECRED_NGROUPS, NGROUPS_SMALL));
	440	#else
	441	nGroups = MIN(current->ngroups, ECRED_NGROUPS);
	442	#endif
	443	credP->num_groups = nGroups;
	444	if (nGroups > 0)
	445	#if LINUX_KERNEL_VERSION > 2060300
	446	memcpy(credP->eGroups, current->group_info->blocks[0], nGroups*sizeof(gid_t));
	447	/* ?? This is incorrect. Linux 2.6 supports a very large list of
	448	groups by allocating a page for each bunch of groups. Only if
	449	there are <= NGROUPS_SMALL groups is the space in
	450	group_info->small_block used. GPFS will only see the prefix of
	451	the group set. */
	452	/* To save kernel stack space, the GPFS ext_cred_t should keep a
	453	pointer to the array of groups. The group set cannot change
	454	during a GPFS system call since the caller can only make one
	455	system call at a time. */
	456	#else
	457	memcpy(credP->eGroups, current->groups, nGroups*sizeof(gid_t));
	458	#endif
	459	EXIT(0);
	460	}
	461
	462	/* inode_operations */
	463
	464	/* Called with a negative (no inode) dir cache entry.
	465	* If this call succeeds, we fill in with d_instantiate().
	466	*/
	467
	468	int
	469	gpfs_i_create(struct inode diP, struct dentry dentryP, int mode
	470	#if LINUX_KERNEL_VERSION >= 2060000
	471	, struct nameidata *ni
	472	#endif
	473	)
	474	{
	475	int rc;
	476	struct gpfsVfsData_t *privVfsP;
	477	cxiNode_t *dcnP;
	478	cxiNode_t *cnP = NULL;
	479	cxiIno_t iNum = (cxiIno_t)-1;
	480	struct inode *newInodeP = NULL;
	481	int flags = FWRITE \| FCREAT \| FEXCL;
	482	cxiMode_t umask = get_umask();
	483	ext_cred_t eCred;
	484	struct dentry *retP;
	485
	486	VFS_STAT_START(createCall);
	487	ENTER(0);
	488	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CREATE_ENTER,
	489	"gpfs_i_create enter: iP 0x%lX dentryP 0x%lX mode 0x%X name '%s'\n",
	490	diP, dentryP, mode, dentryP->d_name.name);
	491	/* BKL is held at entry */
	492
	493	dcnP = VP_TO_CNP(diP);
	494	privVfsP = VP_TO_PVP(diP);
	495	LOGASSERT(privVfsP != NULL);
	496
	497	retry:
	498
	499	setCred(&eCred);
	500	rc = gpfs_ops.gpfsCreate(privVfsP, dcnP, (void **)&newInodeP, &cnP, &iNum, 0,
	501	flags, dentryP, (char *)dentryP->d_name.name,
	502	mode, umask, NULL, &eCred);
	503	if (rc == 0)
	504	{
	505	DBGASSERT(cnP != NULL);
	506	DBGASSERT(iNum != -1);
	507	DBGASSERT(newInodeP != NULL);
	508	DBGASSERT(newInodeP->PRVINODE == cnP);
	509	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
	510	cnP->createRaceLoserThreadId = 0;
	511	}
	512
	513	/* linux would normally serialize the creates on a directory (via the
	514	* parent directory semaphore) to ensure that a create didn't fail with
	515	* EEXIST. However in a multinode environment we may perform a lookup
	516	* on one node (thinking the file doesn't exist) yet a create is
	517	* performed on a different node before linux can call the physical
	518	* file systems create. We attempt to reconcile this case by marking
	519	* the fact that this happened and checking the FEXCL flag at gpfs_f_open()
	520	* to see if we should have failed this with EEXIST.
	521	*/
	522	if (rc == EEXIST)
	523	{
	524	/* Make sure that this create call is part of the linux open call. NFS
	525	and mknod calls create without an open, so check that this is not one
	526	of those calls. On the open call the open flags are available and if
	527	the FEXCL was on fail it with EEXIST. */
	528	int mode1;
	529
	530	/* Skip if NFS create call. */
	531	if (cxiIsNFSThread())
	532	goto retExist;
	533
	534	/* ??? if (sys_mknod call) goto xerror; */
	535
	536	/* Do it only if trying to create a regular file. */
	537	if (((mode & S_IFMT) != 0) && !(mode & S_IFREG))
	538	goto retExist;
	539
	540	setCred(&eCred); // rebuild since gpfsCreate may remap ids
	541	rc = gpfs_ops.gpfsLookup(privVfsP, (void *)diP, dcnP,
	542	dentryP, (char *)dentryP->d_name.name,
	543	(void **)&newInodeP, &cnP, &iNum, NULL,
	544	&mode1, &eCred, (void **)&retP);
	545	if (rc == ENOENT)
	546	goto retry;
	547	if (!rc)
	548	{
	549	/* If the file that was found was a directory than return the
	550	return code that linux would have returned. */
	551	if (S_ISDIR(newInodeP->i_mode))
	552	{
	553	rc = EISDIR;
	554	goto retExist;
	555	}
	556	cnP->createRaceLoserThreadId = cxiGetThreadId();
	557	}
	558	}
	559
	560	retExist:
	561	if (rc)
	562	{
	563	d_drop(dentryP);
	564	goto xerror;
	565	}
	566	diP->i_sb->s_dirt = 1;
	567
	568	xerror:
	569	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CREATE_EXIT,
	570	"gpfs_i_create exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
	571	newInodeP, iNum, iNum, rc);
	572
	573	if (rc)
	574	cxiErrorNFS(rc);
	575
	576	VFS_STAT_STOP;
	577	EXIT(0);
	578	return -rc;
	579	}
	580
	581	/* If this routine successfully finds the file, it should
	582	* add the dentry to the hash list with d_add() and return
	583	* null. If a failure occurs then return non null and the
	584	* dentry will be dput() by the linux lfs layer
	585	*/
	586	struct dentry *
	587	gpfs_i_lookup(struct inode diP, struct dentry dentryP
	588	#if LINUX_KERNEL_VERSION >= 2060000
	589	, struct nameidata *ni
	590	#endif
	591	)
	592	{
	593	int code = 0;
	594	int rc = 0;
	595	struct dentry *retP = NULL;
	596	struct gpfsVfsData_t *privVfsP;
	597	ext_cred_t eCred;
	598	cxiNode_t *dcnP;
	599	cxiMode_t mode = 0;
	600	cxiIno_t iNum = (cxiIno_t)-1;
	601	cxiNode_t *cnP = NULL;
	602	struct inode *newInodeP = NULL;
	603
	604	VFS_STAT_START(lookupCall);
	605	ENTER(0);
	606	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_ENTER,
	607	"gpfs_i_lookup enter: diP 0x%lX dentryP 0x%lX name '%s'\n",
	608	diP, dentryP, dentryP->d_name.name);
	609	/* BKL is held at entry */
	610
	611	dcnP = VP_TO_CNP(diP);
	612	privVfsP = VP_TO_PVP(diP);
	613	LOGASSERT(privVfsP != NULL);
	614
	615	setCred(&eCred);
	616
	617	if (!dcnP)
	618	{
	619	/* This can happen due to a bug in linux/fs/dcache.c (prune_dcache)
	620	where "count" entries are to be pruned, but the last one is
	621	found to be recently referenced. When this happens, count is
	622	decremented, but the loop is not terminated. The result is that
	623	it continues to prune entries past where it should (prunes
	624	everything). If our patch for this is not applied, the result
	625	is a kernel failure as the cxiNode is referenced. Checking
	626	here (and revalidate) allows us to reject the call instead. */
	627
	628	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_STALE,
	629	"cxiNode for inode 0x%lX (ino 0x%X) was FREED!\n",
	630	diP, diP->i_ino);
	631
	632	/* Although we may like to know more about this inode, it is not
	633	* ok to call PRINTINODE(iP) here.
	634	*/
	635	rc = ESTALE;
	636	code = 1;
	637	retP = (struct dentry *)ERR_PTR(-rc);
	638	goto xerror;
	639	}
	640
	641	rc = gpfs_ops.gpfsLookup(privVfsP, (void *)diP, dcnP,
	642	dentryP, (char *)dentryP->d_name.name,
	643	(void **)&newInodeP, &cnP, &iNum, NULL,
	644	&mode, &eCred, (void **)&retP);
	645
	646	if (rc == 0)
	647	{
	648	DBGASSERT(cnP != NULL);
	649	DBGASSERT(iNum != -1);
	650	DBGASSERT(newInodeP != NULL);
	651	DBGASSERT(newInodeP->PRVINODE == cnP);
	652	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
	653	}
	654	else if (rc != ENOENT) // internal failure
	655	{
	656	cxiErrorNFS(rc);
	657	code = 2;
	658	retP = (struct dentry *)ERR_PTR(-rc);
	659	goto xerror;
	660	}
	661	else if (diP->i_nlink == 0) // ENOENT but unlinked parent
	662	{
	663	/* This odd code is here because this function would normally
	664	* exit with a negative dcache entry on ENOENT. However if
	665	* we allow a negative dcache entry in a directory thats been
	666	* deleted (but we're still sitting in it) then the d_count
	667	* will never go to zero and we'll strand any open file that
	668	* is associated with the parent directory. If we drop the
	669	* dentry and return the ENOENT then the VFS will dput the
	670	* dentry. The scenario that gave us trouble was:
	671	*
	672	* NODE 1 NODE 2
	673	* `rm -rf dirA` `rm -rf dirA`
	674	* ==========================================================
	675	* gpfs_f_open("dirA", ...)
	676	* gpfs_f_readdir(...)
	677	* [read "fileA", "fileB"] gpfs_f_open("dirA", ...)
	678	* gpfs_f_readdir(...)
	679	* [read "fileA", "fileB"]
	680	*
	681	* gpfs_i_lookup("fileA")
	682	* gpfs_i_unlink("fileA")
	683	* gpfs_s_delete_inode(fileA's inode)
	684	* gpfs_i_lookup("fileB")
	685	* gpfs_i_unlink("fileB")
	686	* gpfs_s_delete_inode(fileB's inode)
	687	* ...
	688	* gpfs_i_rmdir("dirA", ...)
	689	* gpfs_s_delete_inode(dirA's inode)
	690	* destroyOnLastClose=1 for dirA <======
	691	*
	692	* gpfs_i_lookup("fileA")
	693	* [creates a negative dentry for fileA,
	694	* increments dirA's reference count]
	695	* gpfs_i_lookup("fileB")
	696	* [creates a negative dentry for fileB,
	697	* increments dirA's reference count]
	698	*/
	699	DBGASSERT(dentryP->d_inode == NULL);
	700	dentryP->d_op = NULL;
	701	d_drop(dentryP);
	702
	703	code = 3;
	704	retP = (struct dentry *)ERR_PTR(-rc);
	705	goto xerror;
	706	}
	707
	708	PRINTDENTRY(dentryP);
	709
	710	xerror:
	711	TRACE7(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_EXIT,
	712	"gpfs_i_lookup exit: new inode 0x%lX iNum %d (0x%X) cnP 0x%lX retP 0x%lX "
	713	"code %d rc %d\n", newInodeP, iNum, iNum, cnP, retP, code, rc);
	714
	715	VFS_STAT_STOP;
	716	EXIT(0);
	717	return retP;
	718	}
	719
	720	int
	721	gpfs_i_link(struct dentry oldDentryP, struct inode diP,
	722	struct dentry *dentryP)
	723	{
	724	int rc = 0;
	725	struct inode *iP = oldDentryP->d_inode;
	726	cxiNode_t *dcnP;
	727	cxiNode_t *cnP = NULL;
	728	struct gpfsVfsData_t *privVfsP;
	729	char *tnameP;
	730	ext_cred_t eCred;
	731
	732	VFS_STAT_START(linkCall);
	733	ENTER(0);
	734	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_LINK_ENTER,
	735	"gpfs_i_link enter: diP 0x%lX dentryP 0x%lX "
	736	"dentryP 0x%lX name '%s'\n", diP, oldDentryP, dentryP,
	737	dentryP->d_name.name);
	738	/* BKL is held at entry */
	739
	740	cnP = VP_TO_CNP(iP);
	741	dcnP = VP_TO_CNP(diP);
	742	privVfsP = VP_TO_PVP(diP);
	743	LOGASSERT(privVfsP != NULL);
	744
	745	setCred(&eCred);
	746	rc = gpfs_ops.gpfsLink(privVfsP, cnP, dcnP,
	747	dentryP, (char *)dentryP->d_name.name, &eCred);
	748	if (rc)
	749	{
	750	d_drop(dentryP);
	751	goto xerror;
	752	}
	753	iP->i_sb->s_dirt = 1;
	754
	755	xerror:
	756	PRINTINODE(iP);
	757	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LINK_EXIT,
	758	"gpfs_i_link exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);
	759
	760	if (rc)
	761	cxiErrorNFS(rc);
	762
	763	VFS_STAT_STOP;
	764	EXIT(0);
	765	return -rc;
	766	}
	767
	768	int
	769	gpfs_i_unlink(struct inode diP, struct dentry dentryP)
	770	{
	771	int rc = 0;
	772	struct gpfsVfsData_t *privVfsP;
	773	struct inode *iP = dentryP->d_inode;
	774	cxiNode_t *dcnP;
	775	cxiNode_t *cnP;
	776	ext_cred_t eCred;
	777	struct dentry_operations *orig_d_opP;
	778
	779	VFS_STAT_START(removeCall);
	780	ENTER(0);
	781	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_UNLINK_ENTER,
	782	"gpfs_i_unlink enter: diP 0x%lX iP 0x%lX dentryP 0x%lX name '%s'\n",
	783	diP, iP, dentryP, dentryP->d_name.name);
	784	/* BKL is held at entry */
	785
	786	cnP = VP_TO_CNP(iP);
	787
	788	dcnP = VP_TO_CNP(diP);
	789	privVfsP = VP_TO_PVP(diP);
	790	LOGASSERT(privVfsP != NULL);
	791
	792	/* Regarding dcache entry update: upon returning from gpfs_i_unlink, the VFS
	793	layer will turn the dentry into a valid, negative dcache entry by calling
	794	d_delete(). If another node then creates a new file with the same name,
	795	the BR token revoke for the directory block will invalidate the negative
	796	dcache entry. However, there is a window between the gpfsRemove() and
	797	the d_delete(), where a BR token revoke would not recognize that it
	798	should invalidate the dcache entry, because d_delete() has not yet turned
	799	it into a negative dcache entry. To fix this, we mark the dentry as
	800	"valid with d_delete pending"; the meaning of this state is "the dentry
	801	is still valid, but a BR token revoke should mark it as 'needing
	802	revalidation', even if it does not (yet) look like a negative dcache
	803	entry". Note that we don't want to mark "valid with d_delete pending"
	804	entries as invalid in the BR revoke handler, because we don't know for
	805	sure that the file is in fact going to be deleted. The unlink operation
	806	may fail, for any number of reasons, and the dentry should not be marked
	807	as invalid prematurely. It's safe to mark a dentry as 'needing
	808	revalidation', however. Ideally, we should swap d_op inside gpfsRemove
	809	while we are holding the BR lock on the directory. However, (1) there is
	810	local synchronization in the VFS (our caller is holding the i_sem
	811	semaphore on the directory) that will prevent other threads from doing a
	812	lookup or create that might change the state back to just plain "valid"
	813	before the gpfsRemove has happened, and (2) a BR revoke that happens
	814	before the gpfsRemove might unnecessarily mark the dentry as 'needing
	815	revalidation'; this is sub-optimal, but it doesn't hurt. Also see
	816	comment in gpfs_i_rmdir. */
	817	orig_d_opP = dentryP->d_op;
	818	dentryP->d_op = &gpfs_dops_ddeletepending;
	819
	820	setCred(&eCred);
	821	rc = gpfs_ops.gpfsRemove(privVfsP, cnP, dcnP, (char *)dentryP->d_name.name,
	822	&eCred);
	823	if (rc)
	824	{
	825	d_drop(dentryP);
	826	if (dentryP->d_op == &gpfs_dops_ddeletepending)
	827	dentryP->d_op = orig_d_opP;
	828	goto xerror;
	829	}
	830	diP->i_sb->s_dirt = 1;
	831
	832	/* d_delete will be called at VFS layer if rc == 0 */
	833
	834	xerror:
	835	PRINTINODE(iP);
	836	PRINTDENTRY(dentryP);
	837	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_UNLINK_EXIT,
	838	"gpfs_i_unlink exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);
	839
	840	if (rc)
	841	cxiErrorNFS(rc);
	842
	843	VFS_STAT_STOP;
	844	EXIT(0);
	845	return -rc;
	846	}
	847
	848	int
	849	gpfs_i_symlink(struct inode diP, struct dentry dentryP,
	850	const char *symlinkTargetP)
	851	{
	852	int rc = 0;
	853	cxiNode_t *dcnP;
	854	cxiNode_t *cnP;
	855	cxiIno_t iNum = (cxiIno_t)-1;
	856	struct inode *newInodeP = NULL;
	857	struct gpfsVfsData_t *privVfsP;
	858	ext_cred_t eCred;
	859
	860	VFS_STAT_START(symlinkCall);
	861	ENTER(0);
	862	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK1,
	863	"gpfs_i_symlink enter: iP 0x%lX dentryP 0x%lX symlinkTargetP '%s'\n",
	864	diP, dentryP, symlinkTargetP);
	865	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK2,
	866	"gpfs_i_symlink: newLinkName '%s'\n", dentryP->d_name.name);
	867	/* BKL is held at entry */
	868
	869	dcnP = VP_TO_CNP(diP);
	870	privVfsP = VP_TO_PVP(diP);
	871	LOGASSERT(privVfsP != NULL);
	872
	873	setCred(&eCred);
	874	rc = gpfs_ops.gpfsSymlink(privVfsP, dcnP, (void **)&newInodeP, &cnP,
	875	&iNum, dentryP, (char *)dentryP->d_name.name,
	876	(char *)symlinkTargetP, &eCred);
	877	if (rc == 0)
	878	{
	879	DBGASSERT(cnP != NULL);
	880	DBGASSERT(iNum != -1);
	881	DBGASSERT(newInodeP != NULL);
	882	DBGASSERT(newInodeP->PRVINODE == cnP);
	883	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
	884	}
	885	else
	886	{
	887	d_drop(dentryP);
	888	goto xerror;
	889	}
	890	diP->i_sb->s_dirt = 1;
	891
	892	xerror:
	893	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK_EXIT,
	894	"gpfs_i_symlink exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
	895	newInodeP, iNum, iNum, rc);
	896
	897	if (rc)
	898	cxiErrorNFS(rc);
	899
	900	VFS_STAT_STOP;
	901	EXIT(0);
	902	return -rc;
	903	}
	904
	905	int
	906	gpfs_i_mkdir(struct inode diP, struct dentry dentryP, int mode)
	907	{
	908	int rc = 0;
	909	struct gpfsVfsData_t *privVfsP;
	910	cxiNode_t *dcnP;
	911	cxiNode_t *cnP;
	912	cxiMode_t umask;
	913	ext_cred_t eCred;
	914	cxiIno_t iNum = (cxiIno_t)-1;
	915	struct inode *newInodeP = NULL;
	916
	917	VFS_STAT_START(mkdirCall);
	918	ENTER(0);
	919	umask = get_umask(); /* LFS should not apply umask and we may not */
	920
	921	dcnP = VP_TO_CNP(diP);
	922	privVfsP = VP_TO_PVP(diP);
	923	LOGASSERT(privVfsP != NULL);
	924
	925	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_MKDIR_ENTER,
	926	"gpfs_i_mkdir enter: diP 0x%lX mode 0x%X name '%s'\n",
	927	diP, mode, dentryP->d_name.name);
	928	/* BKL is held at entry */
	929
	930	setCred(&eCred);
	931	rc = gpfs_ops.gpfsMkdir(privVfsP, dcnP, (void **)&newInodeP, &cnP, &iNum,
	932	dentryP, (char *)dentryP->d_name.name, mode, umask,
	933	&eCred);
	934
	935	if (rc == 0)
	936	{
	937	DBGASSERT(cnP != NULL);
	938	DBGASSERT(iNum != -1);
	939	DBGASSERT(newInodeP != NULL);
	940	DBGASSERT(newInodeP->PRVINODE == cnP);
	941	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
	942	}
	943	else
	944	{
	945	d_drop(dentryP);
	946	goto xerror;
	947	}
	948	diP->i_sb->s_dirt = 1;
	949
	950	xerror:
	951	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKDIR_EXIT,
	952	"gpfs_i_mkdir exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
	953	newInodeP, iNum, iNum, rc);
	954
	955	if (rc)
	956	cxiErrorNFS(rc);
	957
	958	VFS_STAT_STOP;
	959	EXIT(0);
	960	return -rc;
	961	}
	962
	963	int
	964	gpfs_i_rmdir(struct inode diP, struct dentry dentryP)
	965	{
	966	int rc;
	967	struct inode *iP = dentryP->d_inode;
	968	cxiNode_t *dcnP;
	969	cxiNode_t *cnP;
	970	struct gpfsVfsData_t *privVfsP;
	971	ext_cred_t eCred;
	972	struct dentry_operations *orig_d_opP;
	973
	974	VFS_STAT_START(rmdirCall);
	975	ENTER(0);
	976	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_RMDIR_ENTER,
	977	"gpfs_i_rmdir enter: diP 0x%lX iP 0x%lX name '%s'\n",
	978	diP, iP, dentryP->d_name.name);
	979	/* BKL is held at entry */
	980
	981	cnP = VP_TO_CNP(iP);
	982	dcnP = VP_TO_CNP(diP);
	983	privVfsP = VP_TO_PVP(diP);
	984	LOGASSERT(privVfsP != NULL);
	985
	986	/* See comment in gpfs_i_unlink. Note that Linux kernel processes
	987	directory dentries a little differently from regular file
	988	dentries. In particular, it doesn't appear that a successful
	989	rmdir call results in the removed directory dentry being turned
	990	into a valid negative dentry; the dentry just gets unhashed and
	991	recycled if it had no references at the time of rmdir. If the
	992	dentry did have extra references, e.g. due to a process using the
	993	directory in question as cwd, the dentry is unhashed, but it
	994	remains a positive dentry pointing to the deleted inode, and will
	995	remain as such until the dentry ref count goes to zero, at which
	996	point the dentry is recycled. So there's no apparent need to
	997	mark directory dentries as 'needing revalidation' during BR token
	998	revoke (we do know that we need to do this for regular files).
	999	However, this particular aspect of Linux kernel operation is not
	1000	guaranteed to always work in this fashion, so we might as well
	1001	try to stay on the safe side of things, and treat directories the
	1002	same way as regular files. It doesn't appear that marking a
	1003	dentry as 'needing revalidation' has any ill effects besides extra
	1004	cycles required for revalidation, and BR token revoke handler
	1005	racing with an unsuccessful gpfsRmdir is a rare enough event to
	1006	tolerate this extra performance hit. */
	1007	orig_d_opP = dentryP->d_op;
	1008	dentryP->d_op = &gpfs_dops_ddeletepending;
	1009
	1010	setCred(&eCred);
	1011	rc = gpfs_ops.gpfsRmdir(privVfsP, cnP, dcnP, (char *)dentryP->d_name.name,
	1012	&eCred);
	1013	if (rc)
	1014	{
	1015	if (rc == EEXIST)
	1016	rc = ENOTEMPTY;
	1017	if (dentryP->d_op == &gpfs_dops_ddeletepending)
	1018	dentryP->d_op = orig_d_opP;
	1019	/* d_drop(dentryP); */
	1020	goto xerror;
	1021	}
	1022	diP->i_sb->s_dirt = 1;
	1023
	1024	/* d_delete will be called at VFS layer if rc == 0 */
	1025	xerror:
	1026	PRINTINODE(iP);
	1027	PRINTDENTRY(dentryP);
	1028	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_RMDIR_EXIT,
	1029	"gpfs_i_rmdir exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);
	1030
	1031	if (rc)
	1032	cxiErrorNFS(rc);
	1033
	1034	VFS_STAT_STOP;
	1035	EXIT(0);
	1036	return -rc;
	1037	}
	1038
	1039	int
	1040	#if LINUX_KERNEL_VERSION >= 2050000
	1041	gpfs_i_mknod(struct inode diP, struct dentry dentryP, int mode, dev_t rdev)
	1042	#else
	1043	gpfs_i_mknod(struct inode diP, struct dentry dentryP, int mode, int rdev)
	1044	#endif
	1045	{
	1046	int rc = 0;
	1047	struct gpfsVfsData_t *privVfsP;
	1048	cxiNode_t *dcnP;
	1049	cxiNode_t *cnP;
	1050	cxiIno_t iNum = (cxiIno_t)-1;
	1051	struct inode *newInodeP = NULL;
	1052	cxiMode_t umask = get_umask();
	1053	ext_cred_t eCred;
	1054	cxiDev32_t rdev32;
	1055
	1056	VFS_STAT_START(mknodCall);
	1057	ENTER(0);
	1058	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKNOD_ENTER,
	1059	"gpfs_i_mknod enter: diP 0x%lX mode 0x%X rdev 0x%X name '%s'\n",
	1060	diP, mode, (int)rdev, dentryP->d_name.name);
	1061	/* BKL is held at entry */
	1062
	1063	dcnP = VP_TO_CNP(diP);
	1064	privVfsP = VP_TO_PVP(diP);
	1065	LOGASSERT(privVfsP != NULL);
	1066
	1067	setCred(&eCred);
	1068	rdev32 = cxiDevToDev32(rdev);
	1069	rc = gpfs_ops.gpfsMknod(privVfsP, dcnP, (void **)&newInodeP, &cnP,
	1070	&iNum, dentryP, (char *)dentryP->d_name.name,
	1071	mode, umask, (cxiDev_t)rdev32, &eCred);
	1072	if (rc == 0)
	1073	{
	1074	DBGASSERT(cnP != NULL);
	1075	DBGASSERT(iNum != -1);
	1076	DBGASSERT(newInodeP != NULL);
	1077	DBGASSERT(newInodeP->PRVINODE == cnP);
	1078	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
	1079	}
	1080	else
	1081	{
	1082	d_drop(dentryP);
	1083	goto xerror;
	1084	}
	1085	diP->i_sb->s_dirt = 1;
	1086
	1087	/* Set vector table for special files, gpfs will not get these operations.*/
	1088	#if LINUX_KERNEL_VERSION >= 2060000
	1089	init_special_inode(newInodeP, newInodeP->i_mode, newInodeP->i_rdev);
	1090	#else
	1091	init_special_inode(newInodeP, newInodeP->i_mode,
	1092	kdev_t_to_nr(newInodeP->i_rdev));
	1093	#endif
	1094
	1095	xerror:
	1096	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKNOD_EXIT,
	1097	"gpfs_i_mknod exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
	1098	newInodeP, iNum, iNum, rc);
	1099
	1100	VFS_STAT_STOP;
	1101	EXIT(0);
	1102	return -rc;
	1103	}
	1104
	1105	int
	1106	gpfs_i_rename(struct inode diP, struct dentry dentryP,
	1107	struct inode tdiP, struct dentry tDentryP)
	1108	{
	1109	int rc = 0;
	1110	struct inode *iP = dentryP->d_inode;
	1111	struct inode *tiP = tDentryP->d_inode;
	1112	struct gpfsVfsData_t *privVfsP;
	1113	cxiNode_t sourceCNP, sourceDirCNP, targetCNP, targetDirCNP;
	1114	ext_cred_t eCred;
	1115
	1116	VFS_STAT_START(renameCall);
	1117	ENTER(0);
	1118	TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_RENAME_1,
	1119	"gpfs_i_rename enter: iP 0x%lX dvP 0x%lX name '%s'"
	1120	" tiP 0x%lX tdiP 0x%lX new name '%s'\n",
	1121	iP, diP, dentryP->d_name.name, tiP, tdiP, tDentryP->d_name.name);
	1122	/* BKL is held at entry */
	1123
	1124	/* Do not allow simple rename across mount points */
	1125	if (diP->i_sb != tdiP->i_sb)
	1126	{
	1127	rc = EXDEV;
	1128	goto xerror;
	1129	}
	1130
	1131	sourceCNP = VP_TO_CNP(iP);
	1132	sourceDirCNP = VP_TO_CNP(diP);
	1133
	1134	targetCNP = (tiP != NULL) ? VP_TO_CNP(tiP) : NULL;
	1135	targetDirCNP = VP_TO_CNP(tdiP);
	1136
	1137	privVfsP = VP_TO_PVP(iP);
	1138	LOGASSERT(privVfsP != NULL);
	1139
	1140	setCred(&eCred);
	1141	rc = gpfs_ops.gpfsRename(privVfsP, sourceCNP, sourceDirCNP,
	1142	(char *)dentryP->d_name.name, targetCNP,
	1143	targetDirCNP, (char *)tDentryP->d_name.name,
	1144	&eCred);
	1145	if (rc == 0)
	1146	{
	1147	gpfs_i_getattr_internal(iP);
	1148	gpfs_i_getattr_internal(diP);
	1149
	1150	if (tiP)
	1151	gpfs_i_getattr_internal(tiP);
	1152
	1153	if (tdiP != diP)
	1154	gpfs_i_getattr_internal(tdiP);
	1155
	1156	diP->i_sb->s_dirt = 1;
	1157	}
	1158
	1159	xerror:
	1160	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_RENAME_EXIT,
	1161	"gpfs_i_rename exit: iP 0x%lX rc %d\n", iP, rc);
	1162
	1163	if (rc)
	1164	cxiErrorNFS(rc);
	1165
	1166	VFS_STAT_STOP;
	1167	EXIT(0);
	1168	return -rc;
	1169	}
	1170
	1171	int
	1172	gpfs_i_readlink(struct dentry dentryP, char bufP, int buflen)
	1173	{
	1174	int rc = 0;
	1175	Boolean gotBKL = false;
	1176	struct cxiUio_t tmpUio;
	1177	cxiIovec_t tmpIovec;
	1178	struct inode *iP = dentryP->d_inode;
	1179	struct gpfsVfsData_t *privVfsP;
	1180	cxiNode_t *cnP;
	1181
	1182	VFS_STAT_START(readlinkCall);
	1183	ENTER(0);
	1184	TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_READLINK_ENTER,
	1185	"gpfs_i_readlink enter: dentryP 0x%lX bufP 0x%lX len %d "
	1186	"iP 0x%lX name '%s'\n",
	1187	dentryP, bufP, buflen, iP, dentryP->d_name.name);
	1188
	1189	/* BKL is not held at entry, except for NFS calls */
	1190	TraceBKL();
	1191	if (current->lock_depth >= 0) /* kernel lock is held by me */
	1192	{
	1193	gotBKL = true;
	1194	unlock_kernel();
	1195	}
	1196
	1197	cnP = VP_TO_CNP(iP);
	1198	privVfsP = VP_TO_PVP(iP);
	1199	LOGASSERT(privVfsP != NULL);
	1200
	1201	tmpIovec.iov_base = bufP; /* base memory address */
	1202	tmpIovec.iov_len = buflen; /* length of transfer for this area */
	1203
	1204	tmpUio.uio_iov = &tmpIovec; /* ptr to array of iovec structs */
	1205	tmpUio.uio_iovcnt = 1; /* #iovec elements left to be processed */
	1206	tmpUio.uio_iovdcnt = 0; /* #iovec elements already processed */
	1207	tmpUio.uio_offset = 0; /* byte offset in file/dev to read/write */
	1208	tmpUio.uio_resid = buflen; /* #bytes left in data area */
	1209	tmpUio.uio_segflg = UIO_USERSPACE; /* copy to user space buffer */
	1210	tmpUio.uio_fmode = 0; /* file modes from open file struct */
	1211
	1212	rc = gpfs_ops.gpfsReadlink(privVfsP, cnP, &tmpUio);
	1213
	1214	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READLINK_EXIT,
	1215	"gpfs_i_readlink exit: iP 0x%lX uio_resid %ld offset %d rc %d\n",
	1216	iP, tmpUio.uio_resid, tmpUio.uio_offset, rc);
	1217
	1218	VFS_STAT_STOP;
	1219
	1220	if (gotBKL) /* If held kernel lock on entry then reacquire it */
	1221	lock_kernel();
	1222
	1223	if (rc)
	1224	cxiErrorNFS(rc);
	1225
	1226	EXIT(0);
	1227	if (rc)
	1228	return (-rc);
	1229
	1230	return (buflen - tmpUio.uio_resid);
	1231	}
	1232
	1233	#if LINUX_KERNEL_VERSION >= 2061600
	1234	void* gpfs_i_follow_link(struct dentry dentry, struct nameidata nd)
	1235	#else
	1236	int gpfs_i_follow_link(struct dentry dentry, struct nameidata nd)
	1237	#endif
	1238	{
	1239	int rc;
	1240	Boolean gotBKL = false;
	1241	struct cxiUio_t tmpUio;
	1242	cxiIovec_t tmpIovec;
	1243	struct inode *iP = dentry->d_inode;
	1244	struct gpfsVfsData_t *privVfsP;
	1245	cxiNode_t *cnP;
	1246	char *buf = NULL;
	1247
	1248	ENTER(0);
	1249	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_FOLLOW_LINK_ENTER,
	1250	"gpfs_i_follow_link enter: inode 0x%lX name '%s'\n",
	1251	dentry->d_inode, dentry->d_name.name);
	1252
	1253	/* BKL is not held at entry, except for NFS calls */
	1254	TraceBKL();
	1255	if (current->lock_depth >= 0) /* kernel lock is held by me */
	1256	{
	1257	gotBKL = true;
	1258	unlock_kernel();
	1259	}
	1260
	1261	/* Allocate a temporary buffer to hold the symlink contents */
	1262	buf = cxiMallocPinned(CXI_PATH_MAX+1);
	1263	if (buf == NULL)
	1264	{
	1265	rc = -ENOMEM;
	1266	goto xerror;
	1267	}
	1268
	1269	cnP = VP_TO_CNP(iP);
	1270	privVfsP = VP_TO_PVP(iP);
	1271	LOGASSERT(privVfsP != NULL);
	1272
	1273	tmpIovec.iov_base = buf; /* base memory address */
	1274	tmpIovec.iov_len = PATH_MAX; /* length of transfer for this area */
	1275
	1276	tmpUio.uio_iov = &tmpIovec; /* ptr to array of iovec structs */
	1277	tmpUio.uio_iovcnt = 1; /* #iovec elements left to be processed */
	1278	tmpUio.uio_iovdcnt = 0; /* #iovec elements already processed */
	1279	tmpUio.uio_offset = 0; /* byte offset in file/dev to read/write */
	1280	tmpUio.uio_resid = PATH_MAX; /* #bytes left in data area */
	1281	tmpUio.uio_segflg = UIO_SYSSPACE; /* copy to kernel space buffer */
	1282	tmpUio.uio_fmode = 0; /* file modes from open file struct */
	1283
	1284	/* Read symlink contents */
	1285	rc = gpfs_ops.gpfsReadlink(privVfsP, cnP, &tmpUio);
	1286	if (rc)
	1287	{
	1288	cxiErrorNFS(rc);
	1289	rc = -rc;
	1290	goto xerror;
	1291	}
	1292
	1293	/* set end of string */
	1294	buf[PATH_MAX - tmpUio.uio_resid] = 0;
	1295
	1296	TRACE2(TRACE_VNODE, 2, TRCID_LINUXOPS_FOLLOW_LINK_1,
	1297	"gpfs_i_follow_link readlink rc %d data '%s'\n", rc, buf);
	1298
	1299	VFS_FOLLOW_LINK(rc, nd, buf);
	1300
	1301	exit:
	1302	if (buf)
	1303	cxiFreePinned(buf);
	1304
	1305	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_FOLLOW_LINK_2,
	1306	"gpfs_i_follow_link exit: inode 0x%lX rc %d\n",
	1307	dentry->d_inode, rc);
	1308
	1309	if (gotBKL) /* If held kernel lock on entry then reacquire it */
	1310	lock_kernel();
	1311
	1312	EXIT(0);
	1313
	1314	#if LINUX_KERNEL_VERSION >= 2061600
	1315	return NULL; /* no cookie */
	1316	#else
	1317	return rc;
	1318	#endif
	1319
	1320	xerror:
	1321	path_release(nd);
	1322	goto exit;
	1323
	1324	}
	1325
	1326	#ifdef HAS_IOP_PUT_LINK
	1327
	1328	#if LINUX_KERNEL_VERSION >= 2061600
	1329	void gpfs_i_put_link(struct dentry dentry, struct nameidata nd, void* cookie)
	1330	#else
	1331	void gpfs_i_put_link(struct dentry dentry, struct nameidata nd)
	1332	#endif
	1333	{
	1334	char *buf = nd_get_link(nd);
	1335	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_PUTLINK,
	1336	"gpfs_i_put_link dentry 0x%lX nd 0x%lX buf 0x%lX\n", dentry, nd,
	1337	!IS_ERR(buf)? buf : NULL);
	1338	if (!IS_ERR(buf))
	1339	cxiFreePinned(buf);
	1340	}
	1341
	1342	#endif /* HAS_IOP_PUT_LINK */
	1343
	1344	int
	1345	gpfs_i_bmap(struct inode *iP, int fragment)
	1346	{
	1347	ENTER(0);
	1348	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_BMAP,
	1349	"gpfs_i_bmap: rc ENOSYS\n");
	1350	TraceBKL();
	1351	EXIT(0);
	1352	return -ENOSYS;
	1353	}
	1354
	1355	void
	1356	gpfs_i_truncate(struct inode *iP)
	1357	{
	1358	ENTER(0);
	1359	/* Nothing to do since the file size was updated on the notify_change
	1360	* call which preceeded this call
	1361	*/
	1362	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_TRUNCATE,
	1363	"gpfs_i_truncate: inode 0x%lX\n", iP);
	1364	TraceBKL();
	1365	EXIT(0);
	1366	}
	1367
	1368	int
	1369	gpfs_i_permission(struct inode *iP, int mode
	1370	#if LINUX_KERNEL_VERSION >= 2060000
	1371	, struct nameidata *ni
	1372	#endif
	1373	)
	1374	{
	1375	cxiNode_t *cnP;
	1376	struct gpfsVfsData_t *privVfsP;
	1377	ext_cred_t eCred;
	1378	int rc = 0;
	1379
	1380	VFS_STAT_START(accessCall);
	1381	ENTER(0);
	1382
	1383	/* BKL is held at entry */
	1384
	1385	cnP = VP_TO_CNP(iP);
	1386
	1387	TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_ACCESS_ENTER,
	1388	"gpfs_i_permission enter: iP 0x%lX mode 0x%X uid %d gid %d "
	1389	"i_mode 0x%X i_xinfo 0x%X", iP, mode, current->fsuid,
	1390	current->fsgid, iP->i_mode, cnP->xinfo);
	1391
	1392	privVfsP = VP_TO_PVP(iP);
	1393	LOGASSERT(privVfsP != NULL);
	1394
	1395	if (mode) /* call permission check only if got access mode */
	1396	{
	1397	setCred(&eCred);
	1398	rc = gpfs_ops.gpfsAccess(privVfsP, cnP, mode, ACC_SELF, &eCred);
	1399	}
	1400
	1401	xerror:
	1402	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_ACCESS_EXIT,
	1403	"gpfs_i_permission exit: iP 0x%lX std %d dir std %d rc %d",
	1404	iP, iP->i_op == &gpfs_iops_stdperm, iP->i_op == &gpfs_dir_iops_stdperm,
	1405	rc);
	1406
	1407	if (rc)
	1408	cxiErrorNFS(rc);
	1409
	1410	VFS_STAT_STOP;
	1411	EXIT(0);
	1412	return -rc;
	1413	}
	1414
	1415	int
	1416	gpfs_i_smap(struct inode *iP, int sector)
	1417	{
	1418	ENTER(0);
	1419	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_SMAP,
	1420	"gpfs_i_smap: rc ENOSYS\n");
	1421	TraceBKL();
	1422	EXIT(0);
	1423	return -ENOSYS;
	1424	}
	1425
	1426	int
	1427	gpfs_i_updatepage(struct file fP, struct page pageP, const char *bufP,
	1428	unsigned long offset, uint count, int sync)
	1429	{
	1430	ENTER(0);
	1431	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_UPDATEPAGE,
	1432	"gpfs_i_updatepage: rc ENOSYS\n");
	1433	TraceBKL();
	1434	EXIT(0);
	1435	return -ENOSYS;
	1436	}
	1437
	1438	int
	1439	gpfs_i_revalidate(struct dentry *dentryP)
	1440	{
	1441	int rc;
	1442	int code = 0;
	1443	struct inode *iP = dentryP->d_inode;
	1444	cxiNode_t *cnP;
	1445	cxiVattr_t vattr;
	1446	struct gpfsVfsData_t *privVfsP;
	1447
	1448	ENTER(0);
	1449	VFS_INC(revalidateCount);
	1450	TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_REVALIDATE_ENTER,
	1451	"gpfs_i_revalidate enter: dentryP 0x%lX iP 0x%lX ino 0x%X name '%s'\n",
	1452	dentryP, dentryP->d_inode,
	1453	(iP) ? iP->i_ino : -1, dentryP->d_name.name);
	1454	/* BKL is usually not held, but seems to be held when coming here as
	1455	part of setting an ACL */
	1456
	1457	if (iP == NULL)
	1458	{
	1459	code = 1;
	1460	rc = ENOENT;
	1461	goto xerror;
	1462	}
	1463	cnP = VP_TO_CNP(iP);
	1464
	1465	if (!cnP)
	1466	{
	1467	/* This can happen due to a bug in linux/fs/dcache.c (prune_dcache)
	1468	where "count" entries are to be pruned, but the last one is
	1469	found to be recently referenced. When this happens, count is
	1470	decremented, but the loop is not terminated. The result is that
	1471	it continues to prune entries past where it should (prunes
	1472	everything). If our patch for this is not applied, the result
	1473	is a kernel failure as the cxiNode is referenced. Checking
	1474	here (and lookup) allows us to reject the call instead. */
	1475
	1476	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REVALIDATE_STALE,
	1477	"gpfs_i_revalidate: cxiNode for iP 0x%lX (ino %d) was FREED!\n",
	1478	iP, iP->i_ino);
	1479
	1480	/* Although we may like to know more about this inode, it is not
	1481	* ok to call PRINTINODE(iP) here.
	1482	*/
	1483
	1484	rc = ESTALE;
	1485	code = 2;
	1486	goto xerror;
	1487	}
	1488
	1489	if ((cnP->icValid & CXI_IC_STAT) == CXI_IC_STAT)
	1490	{
	1491	rc = 0;
	1492	code = 3;
	1493	goto xerror;
	1494	}
	1495
	1496	privVfsP = VP_TO_PVP(iP);
	1497	LOGASSERT(privVfsP != NULL);
	1498
	1499	/* This has the effect of calling us back under a lock and
	1500	* setting the inode attributes at the OS level (since this
	1501	* operating system caches this info in the vfs layer)
	1502	*/
	1503	rc = gpfs_ops.gpfsGetattr(privVfsP, cnP, &vattr, false);
	1504	PRINTINODE(iP);
	1505
	1506	#if 0
	1507	/* Delay briefly to give token revoke races a chance to happen, if there
	1508	are any. Time delay is in jiffies (10ms). */
	1509	# define howLong 5
	1510	TRACE1(TRACE_VNODE, 4, TRCID_REVAL_DELAY,
	1511	"gpfs_i_revalidate: begin delay %d\n", howLong);
	1512	current->state = TASK_INTERRUPTIBLE;
	1513	schedule_timeout(howLong);
	1514	TRACE1(TRACE_VNODE, 14, TRCID_REVAL_DELAY_END,
	1515	"gpfs_i_revalidate: end delay %d\n", howLong);
	1516	#endif
	1517
	1518	xerror:
	1519	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_REVALIDATE_EXIT,
	1520	"gpfs_i_revalidate exit: dentry 0x%lX code %d rc %d\n",
	1521	dentryP, code, rc);
	1522
	1523	if (rc)
	1524	cxiErrorNFS(rc);
	1525
	1526	EXIT(0);
	1527	return -rc;
	1528	}
	1529
	1530	int
	1531	gpfs_i_setattr(struct dentry dentryP, struct iattr iattrP)
	1532	{
	1533	int rc;
	1534
	1535	VFS_STAT_START(setattrCall);
	1536	ENTER(0);
	1537	rc = gpfs_i_setattr_internal(dentryP->d_inode, iattrP);
	1538
	1539	VFS_STAT_STOP;
	1540	EXIT(0);
	1541	return -rc;
	1542	}
	1543
	1544	int
	1545	gpfs_i_setattr_internal(struct inode iP, struct iattr aP)
	1546	{
	1547	int rc = 0;
	1548	int code = 0;
	1549	long arg1; /* must be large enough on 64bit to contain */
	1550	long arg2; /* either a pointer or integer */
	1551	long arg3;
	1552	cxiTimeStruc_t atime, mtime, ctime;
	1553	cxiNode_t *cnP;
	1554	struct gpfsVfsData_t *privVfsP;
	1555	ext_cred_t eCred;
	1556	unsigned int ia_valid;
	1557
	1558	ENTER(0);
	1559	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_SETATTR_ENTER,
	1560	"gpfs_i_setattr enter: iP 0x%lX ia_valid 0x%X\n", iP, aP->ia_valid);
	1561	/* ?? Callers of this are inconsistent about whether the BKL is held */
	1562
	1563	cnP = VP_TO_CNP(iP);
	1564	privVfsP = VP_TO_PVP(iP);
	1565	LOGASSERT(privVfsP != NULL);
	1566
	1567	ia_valid = aP->ia_valid;
	1568
	1569	/* Change file size */
	1570	if (ia_valid & ATTR_SIZE)
	1571	{
	1572	arg1 = (long)&aP->ia_size;
	1573	arg2 = 0;
	1574	arg3 = 0;
	1575
	1576	/* call gpfsSetattr, unless we know that new size is the same */
	1577	if (!(cnP->icValid & CXI_IC_ATTR) \|\|
	1578	((struct inode *)cnP->osNodeP)->i_size != aP->ia_size)
	1579	{
	1580	setCred(&eCred); // rebuild since gpfsSetattr may remap ids
	1581	rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_SIZE, arg1, arg2, arg3,
	1582	&eCred);
	1583	if (rc != 0)
	1584	{
	1585	code = 1;
	1586	goto xerror;
	1587	}
	1588
	1589	/* gpfsSetattr(... V_SIZE ...) will have updated ctime and mtime.
	1590	No need to do this again. */
	1591	ia_valid &= ~(ATTR_MTIME \| ATTR_CTIME);
	1592	}
	1593	}
	1594
	1595	/* Change file mode */
	1596	if (ia_valid & ATTR_MODE)
	1597	{
	1598	arg1 = (long)aP->ia_mode;
	1599	arg2 = 0;
	1600	arg3 = 0;
	1601
	1602	setCred(&eCred); // rebuild since gpfsSetattr may remap ids
	1603	rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_MODE, arg1, arg2, arg3, &eCred);
	1604	if (rc != 0)
	1605	{
	1606	code = 2;
	1607	goto xerror;
	1608	}
	1609	}
	1610
	1611	/* Change uid or gid */
	1612	if (ia_valid & (ATTR_UID \| ATTR_GID))
	1613	{
	1614	arg1 = 0;
	1615	arg2 = 0;
	1616	arg3 = 0;
	1617
	1618	if (ia_valid & ATTR_UID)
	1619	arg2 = (long)aP->ia_uid;
	1620	else
	1621	arg1 \|= T_OWNER_AS_IS;
	1622
	1623	if (ia_valid & ATTR_GID)
	1624	arg3 = (long)aP->ia_gid;
	1625	else
	1626	arg1 \|= T_GROUP_AS_IS;
	1627
	1628	setCred(&eCred); // rebuild since gpfsSetattr may remap ids
	1629	rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_OWN, arg1, arg2, arg3, &eCred);
	1630	if (rc != 0)
	1631	{
	1632	code = 3;
	1633	goto xerror;
	1634	}
	1635	}
	1636
	1637	/* Change access, modification, or change time */
	1638	if (ia_valid & (ATTR_ATIME \| ATTR_MTIME \| ATTR_CTIME))
	1639	{
	1640	arg1 = 0;
	1641	arg2 = 0;
	1642	arg3 = 0;
	1643
	1644	if (ia_valid & ATTR_ATIME)
	1645	{
	1646	CXITIME_FROM_INODETIME(atime, aP->ia_atime);
	1647	arg1 = (long)&atime;
	1648	}
	1649	if (ia_valid & ATTR_MTIME)
	1650	{
	1651	CXITIME_FROM_INODETIME(mtime, aP->ia_mtime);
	1652	arg2 = (long)&mtime;
	1653	}
	1654	if (ia_valid & ATTR_CTIME)
	1655	{
	1656	CXITIME_FROM_INODETIME(ctime, aP->ia_ctime);
	1657	arg3 = (long)&ctime;
	1658	}
	1659	setCred(&eCred); // rebuild since gpfsSetattr may remap ids
	1660	rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_STIME, arg1, arg2, arg3, &eCred);
	1661	if (rc != 0)
	1662	{
	1663	code = 4;
	1664	goto xerror;
	1665	}
	1666	}
	1667
	1668	xerror:
	1669
	1670	if (rc == 0)
	1671	{
	1672	/* For NFS we might need to write the inode but the check will be done
	1673	* in gpfsSyncNFS().
	1674	*/
	1675	if (cxiAllowNFSFsync())
	1676	{
	1677	setCred(&eCred); // rebuild since gpfsSetattr may remap ids
	1678	rc = gpfs_ops.gpfsSyncNFS(privVfsP, cnP, 0, &eCred);
	1679	}
	1680
	1681	iP->i_sb->s_dirt = 1;
	1682	}
	1683	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_SETATTR_EXIT,
	1684	"gpfs_i_setattr exit: iP 0x%lX code %d rc %d\n", iP, code, rc);
	1685
	1686	if (rc)
	1687	cxiErrorNFS(rc);
	1688
	1689	EXIT(0);
	1690	return rc;
	1691	}
	1692
	1693	#if LINUX_KERNEL_VERSION >= 2050000
	1694	int
	1695	gpfs_i_getattr(struct vfsmount mntP, struct dentry dentryP,
	1696	struct kstat *kstatP)
	1697	#else
	1698	int
	1699	gpfs_i_getattr(struct dentry dentryP, struct iattr iattrP)
	1700	#endif
	1701	{
	1702	int rc;
	1703	struct inode *iP = dentryP->d_inode;
	1704	cxiNode_t *cnP;
	1705
	1706	VFS_STAT_START(getattrCall);
	1707	ENTER(0);
	1708
	1709	cnP = VP_TO_CNP(iP);
	1710
	1711	if (cnP && ((cnP->icValid & CXI_IC_STAT) == CXI_IC_STAT)) /* attr are vaild */
	1712	rc = 0;
	1713	else
	1714	rc = gpfs_i_getattr_internal(iP);
	1715
	1716	if (!rc)
	1717	#if LINUX_KERNEL_VERSION >= 2050000
	1718	generic_fillattr(iP, kstatP);
	1719	#else
	1720	getIattr(iP, iattrP);
	1721	#endif
	1722	else
	1723	rc = -rc;
	1724
	1725	VFS_STAT_STOP;
	1726	EXIT(0);
	1727	return rc;
	1728	}
	1729
	1730	int
	1731	gpfs_i_getattr_internal(struct inode *iP)
	1732	{
	1733	int rc = 0;
	1734	cxiNode_t *cnP;
	1735	struct gpfsVfsData_t *privVfsP;
	1736	cxiVattr_t vattr;
	1737
	1738	ENTER(0);
	1739	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_GETATTR_ENTER,
	1740	"gpfs_i_getattr enter: iP 0x%lX\n", iP);
	1741	/* BKL is held at entry */
	1742
	1743	privVfsP = VP_TO_PVP(iP);
	1744	LOGASSERT(privVfsP != NULL);
	1745	cnP = VP_TO_CNP(iP);
	1746
	1747	/* This has the effect of calling us back under a lock and
	1748	* setting the inode attributes at the OS level (since this
	1749	* operating system caches this info in the vfs layer)
	1750	*/
	1751	rc = gpfs_ops.gpfsGetattr(privVfsP, cnP, &vattr, false);
	1752	PRINTINODE(iP);
	1753
	1754	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETATTR_EXIT,
	1755	"gpfs_i_getattr exit: iP 0x%lX rc %d\n", iP, rc);
	1756
	1757	if (rc)
	1758	cxiErrorNFS(rc);
	1759
	1760	EXIT(0);
	1761	return rc;
	1762	}
	1763
	1764	#if LINUX_KERNEL_VERSION > 2060000
	1765	#include <cxiAclUser.h>
	1766
	1767	#define XATTR_SECURITY_PREFIX "security."
	1768	#define XATTR_TRUSTED_PREFIX "trusted."
	1769	#define XATTR_USER_PREFIX "user."
	1770	#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access"
	1771	#define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default"
	1772
	1773	static const char *
	1774	test_prefix(const char name, const char prefix)
	1775	{
	1776	while (prefix && name == *prefix) {
	1777	name++;
	1778	prefix++;
	1779	}
	1780	return *prefix ? NULL : name;
	1781	}
	1782
	1783	/*
	1784	* Inode operation getxattr()
	1785	*
	1786	*/
	1787	ssize_t
	1788	gpfs_i_getxattr(struct dentry dentry, const char name, void *buf,
	1789	size_t buf_size)
	1790	{
	1791	int rc;
	1792	cxiNode_t *cnP;
	1793	struct gpfsVfsData_t *privVfsP;
	1794	struct tsxattr xattr;
	1795	struct tsxattrs xattrs;
	1796	ext_cred_t eCred;
	1797	void *argP = &xattrs;
	1798	int flags = 0;
	1799	struct inode *iP = dentry->d_inode;
	1800	mm_segment_t oldfs;
	1801	const char *n;
	1802
	1803	ENTER(0);
	1804	VFS_STAT_START(getxattrCall);
	1805
	1806	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_ENTER,
	1807	"gpfs_i_getxattr enter: iP 0x%lX name %s buf 0x%lX size %d\n",
	1808	iP, (name) ? name : "NULL", buf, buf_size);
	1809
	1810	if (iP == NULL)
	1811	{
	1812	rc = ENOENT;
	1813	goto xerror;
	1814	}
	1815
	1816	#ifdef CONFIG_FS_POSIX_ACL
	1817	if (n = test_prefix(name, XATTR_NAME_ACL_ACCESS)) {
	1818	if (n && (strcmp(n, "") != 0)) {
	1819	rc = EINVAL;
	1820	goto xerror;
	1821	}
	1822	rc = gpfs_get_posix_acl(dentry, ACL_TYPE_ACCESS, buf, buf_size);
	1823	goto xerror2;
	1824	}
	1825	if (S_ISDIR(iP->i_mode))
	1826	{
	1827	if (n = test_prefix(name, XATTR_NAME_ACL_DEFAULT)) {
	1828	if (n && (strcmp(n, "") != 0)) {
	1829	rc = EINVAL;
	1830	goto xerror;
	1831	}
	1832	rc = gpfs_get_posix_acl(dentry, ACL_TYPE_DEFAULT, buf, buf_size);
	1833	goto xerror2;
	1834	}
	1835	}
	1836	#endif
	1837	if (n = test_prefix(name, XATTR_SECURITY_PREFIX)) {
	1838	if (n && (strcmp(n, "") == 0)) {
	1839	rc = EINVAL;
	1840	goto xerror;
	1841	}
	1842	goto xattr;
	1843	}
	1844	if (n = test_prefix(name, XATTR_TRUSTED_PREFIX)) {
	1845	if (n && (strcmp(n, "") == 0)) {
	1846	rc = EINVAL;
	1847	goto xerror;
	1848	}
	1849	if (!capable(CAP_SYS_ADMIN)) {
	1850	rc = EPERM;
	1851	goto xerror;
	1852	}
	1853	goto xattr;
	1854	}
	1855	if (n = test_prefix(name, XATTR_USER_PREFIX)) {
	1856	if (n && (strcmp(n, "") == 0)) {
	1857	rc = EINVAL;
	1858	goto xerror;
	1859	}
	1860	goto xattr;
	1861	}
	1862	rc = EOPNOTSUPP;
	1863	goto xerror;
	1864
	1865	xattr:
	1866	setCred(&eCred);
	1867	xattrs.appId = 3; // application id GPFS_ATTR_INTERNAL_APPL_ID
	1868	xattrs.nattrs = 1; // no of attributes to get or set
	1869	xattrs.attrs = &xattr; // attributes to get or set
	1870
	1871	xattr.keyP = (char*) name; // attribute key
	1872	xattr.keyLen = strlen(name) + 1; // key length
	1873	xattr.valueP = buf; // attribute value
	1874	xattr.valueLen = buf_size; // length of attribute value
	1875
	1876	privVfsP = VP_TO_PVP(iP);
	1877	LOGASSERT(privVfsP != NULL);
	1878	cnP = VP_TO_CNP(iP);
	1879
	1880	oldfs = get_fs();
	1881	set_fs(get_ds());
	1882
	1883	rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, GET_XATTR, argP,
	1884	NULL, &eCred);
	1885
	1886	set_fs(oldfs);
	1887	if (!rc)
	1888	{
	1889	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_EXIT0,
	1890	"gpfs_i_getxattr exit: iP 0x%lX len %d\n", iP, xattr.valueLen);
	1891	VFS_STAT_STOP;
	1892	EXIT(0);
	1893	if (xattr.valueLen < 0)
	1894	rc = ENODATA;
	1895	else
	1896	return (xattr.valueLen);
	1897	}
	1898
	1899	xerror:
	1900	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_EXIT,
	1901	"gpfs_i_getxattr exit: iP 0x%lX rc %d\n", iP, rc);
	1902
	1903	if (rc)
	1904	cxiErrorNFS(rc);
	1905
	1906	VFS_STAT_STOP;
	1907	EXIT(0);
	1908	return (-rc);
	1909
	1910	xerror2:
	1911	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_EXIT2,
	1912	"gpfs_i_getxattr exit2: iP 0x%lX rc %d\n", iP, rc);
	1913
	1914	if (rc)
	1915	cxiErrorNFS(rc);
	1916
	1917	VFS_STAT_STOP;
	1918	EXIT(0);
	1919	return (rc);
	1920	}
	1921
	1922	/*
	1923	* Inode operation setxattr()
	1924	*
	1925	*/
	1926	int
	1927	gpfs_i_setxattr(struct dentry dentry, const char name, const void *buf,
	1928	size_t buf_size, int ext_flags)
	1929	{
	1930	int rc;
	1931	cxiNode_t *cnP;
	1932	struct gpfsVfsData_t *privVfsP;
	1933	struct tsxattr xattr;
	1934	struct tsxattrs xattrs;
	1935	ext_cred_t eCred;
	1936	void *argP = &xattrs;
	1937	int flags = 0;
	1938	struct inode *iP = dentry->d_inode;
	1939	mm_segment_t oldfs;
	1940	const char *n;
	1941
	1942	ENTER(0);
	1943	VFS_STAT_START(setxattrCall);
	1944
	1945	TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_SETEXTATTR_ENTER,
	1946	"gpfs_i_setxattr enter: iP 0x%lX name %s buf 0x%lX size %d flags 0x%X\n",
	1947	iP, (name) ? name : "NULL", buf, buf_size, ext_flags);
	1948
	1949	if (iP == NULL)
	1950	{
	1951	rc = ENOENT;
	1952	goto xerror;
	1953	}
	1954
	1955	#ifdef CONFIG_FS_POSIX_ACL
	1956	if (n = test_prefix(name, XATTR_NAME_ACL_ACCESS)) {
	1957	if (n && (strcmp(n, "") != 0)) {
	1958	rc = EINVAL;
	1959	goto xerror;
	1960	}
	1961	if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
	1962	return EPERM;
	1963	rc = gpfs_set_posix_acl(dentry, ACL_TYPE_ACCESS, buf, buf_size);
	1964	goto xerror;
	1965	}
	1966	if (S_ISDIR(iP->i_mode))
	1967	{
	1968	if (n = test_prefix(name, XATTR_NAME_ACL_DEFAULT)) {
	1969	if (n && (strcmp(n, "") != 0)) {
	1970	rc = EINVAL;
	1971	goto xerror;
	1972	}
	1973	if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
	1974	return EPERM;
	1975	rc = gpfs_set_posix_acl(dentry, ACL_TYPE_DEFAULT, buf, buf_size);
	1976	goto xerror;
	1977	}
	1978	}
	1979	#endif
	1980	if (n = test_prefix(name, XATTR_SECURITY_PREFIX)) {
	1981	if (n && (strcmp(n, "") == 0)) {
	1982	rc = EINVAL;
	1983	goto xerror;
	1984	}
	1985	goto xattr;
	1986	}
	1987	if (n = test_prefix(name, XATTR_TRUSTED_PREFIX)) {
	1988	if (n && (strcmp(n, "") == 0)) {
	1989	rc = EINVAL;
	1990	goto xerror;
	1991	}
	1992	if (!capable(CAP_SYS_ADMIN)) {
	1993	rc = EPERM;
	1994	goto xerror;
	1995	}
	1996	goto xattr;
	1997	}
	1998	if (n = test_prefix(name, XATTR_USER_PREFIX)) {
	1999	if (n && (strcmp(n, "") == 0)) {
	2000	rc = EINVAL;
	2001	goto xerror;
	2002	}
	2003	goto xattr;
	2004	}
	2005	rc = EOPNOTSUPP;
	2006	goto xerror;
	2007
	2008	xattr:
	2009	setCred(&eCred);
	2010	xattrs.appId = 3; // application id GPFS_ATTR_INTERNAL_APPL_ID
	2011	xattrs.nattrs = 1; // no of attributes to get or set
	2012	xattrs.attrs = &xattr; // attributes to get or set
	2013
	2014	xattr.keyP = (char*) name; // attribute key
	2015	xattr.keyLen = strlen(name) + 1; // key length
	2016	xattr.valueP = (char *)buf; // attribute value
	2017	xattr.valueLen = buf_size; // length of attribute value
	2018
	2019	privVfsP = VP_TO_PVP(iP);
	2020	LOGASSERT(privVfsP != NULL);
	2021	cnP = VP_TO_CNP(iP);
	2022
	2023	oldfs = get_fs();
	2024	set_fs(get_ds());
	2025
	2026	rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, SET_XATTR, argP,
	2027	NULL, &eCred);
	2028	set_fs(oldfs);
	2029	xerror:
	2030	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_SETEXTATTR_EXIT,
	2031	"gpfs_i_setxattr exit: iP 0x%lX rc %d\n", iP, rc);
	2032
	2033	if (rc)
	2034	cxiErrorNFS(rc);
	2035
	2036	VFS_STAT_STOP;
	2037	EXIT(0);
	2038	return (-rc);
	2039	}
	2040
	2041	/*
	2042	* Inode operation listxattr()
	2043	*
	2044	* Copy a list of attribute names into the buffer
	2045	* provided, or compute the buffer size required.
	2046	* Buffer is NULL to compute the size of the buffer required.
	2047	*
	2048	* Returns a negative error number on failure, or the number of bytes
	2049	* used / required on success.
	2050	*/
	2051	ssize_t
	2052	gpfs_i_listxattr(struct dentry dentry, char buf, size_t buf_size)
	2053	{
	2054	int rc;
	2055	cxiNode_t *cnP;
	2056	struct gpfsVfsData_t *privVfsP;
	2057	struct tsxattr xattr;
	2058	struct tsxattrs xattrs;
	2059	ext_cred_t eCred;
	2060	void *argP = &xattrs;
	2061	int flags = 0;
	2062	struct inode *iP = dentry->d_inode;
	2063	mm_segment_t oldfs;
	2064
	2065	ENTER(0);
	2066	VFS_STAT_START(listxattrCall);
	2067
	2068	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LISTXATTR_IN,
	2069	"gpfs_i_listxattr enter: iP 0x%lX buf 0x%lX buf_size %d\n",
	2070	iP, buf, buf_size);
	2071
	2072
	2073	if (iP == NULL)
	2074	{
	2075	rc = ENOENT;
	2076	goto xerror;
	2077	}
	2078	setCred(&eCred);
	2079	xattrs.appId = 3; // application id GPFS_ATTR_INTERNAL_APPL_ID
	2080	xattrs.nattrs = 0; // get all attribute name
	2081	xattrs.attrs = &xattr; // attributes to get or set
	2082
	2083	xattr.keyP = NULL; // attribute key
	2084	xattr.keyLen = 0; // key length
	2085	xattr.valueP = buf; // attribute value
	2086	xattr.valueLen = buf_size; // length of attribute value
	2087
	2088	privVfsP = VP_TO_PVP(iP);
	2089	LOGASSERT(privVfsP != NULL);
	2090	cnP = VP_TO_CNP(iP);
	2091
	2092	oldfs = get_fs();
	2093	set_fs(get_ds());
	2094
	2095	/* which names can we show ??? */
	2096	rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, LIST_XATTR, argP,
	2097	NULL, &eCred);
	2098
	2099	set_fs(oldfs);
	2100	if (!rc)
	2101	{
	2102	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_LISTXTATTR_EXIT0,
	2103	"gpfs_i_listxattr exit: iP 0x%lX len %d\n", iP, xattr.valueLen);
	2104	VFS_STAT_STOP;
	2105	EXIT(0);
	2106	return (xattr.valueLen);
	2107	}
	2108
	2109	xerror:
	2110	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_LISTXTATTR_EXIT,
	2111	"gpfs_i_listxattr exit: iP 0x%lX rc %d\n", iP, rc);
	2112
	2113	if (rc)
	2114	cxiErrorNFS(rc);
	2115
	2116	VFS_STAT_STOP;
	2117	EXIT(0);
	2118	return (-rc);
	2119	}
	2120
	2121	/*
	2122	* Inode operation removexattr()
	2123	*
	2124	*/
	2125	int
	2126	gpfs_i_removexattr(struct dentry dentry, const char name)
	2127	{
	2128	int rc;
	2129	cxiNode_t *cnP;
	2130	struct gpfsVfsData_t *privVfsP;
	2131	struct tsxattr xattr;
	2132	struct tsxattrs xattrs;
	2133	ext_cred_t eCred;
	2134	void *argP = &xattrs;
	2135	int flags = 0;
	2136	struct inode *iP = dentry->d_inode;
	2137	mm_segment_t oldfs;
	2138	const char *n;
	2139
	2140	ENTER(0);
	2141	VFS_STAT_START(removexattrCall);
	2142
	2143	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REMOVEXATTR_IN,
	2144	"gpfs_i_removexattr enter: iP 0x%lX name %s\n", iP, (name) ? name : "NULL");
	2145
	2146	if (iP == NULL)
	2147	{
	2148	rc = ENOENT;
	2149	goto xerror;
	2150	}
	2151	#ifdef CONFIG_FS_POSIX_ACL
	2152	if (n = test_prefix(name, XATTR_NAME_ACL_ACCESS)) {
	2153	if (n && (strcmp(n, "") != 0)) {
	2154	rc = EINVAL;
	2155	goto xerror;
	2156	}
	2157	if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
	2158	return EPERM;
	2159	rc = gpfs_set_posix_acl(dentry, ACL_TYPE_ACCESS, NULL, -1);
	2160	goto xerror;
	2161	}
	2162	if (S_ISDIR(iP->i_mode))
	2163	{
	2164	if (n = test_prefix(name, XATTR_NAME_ACL_DEFAULT)) {
	2165	if (n && (strcmp(n, "") != 0)) {
	2166	rc = EINVAL;
	2167	goto xerror;
	2168	}
	2169	if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
	2170	return EPERM;
	2171	rc = gpfs_set_posix_acl(dentry, ACL_TYPE_DEFAULT, NULL, -1);
	2172	goto xerror;
	2173	}
	2174	}
	2175	#endif
	2176	if (n = test_prefix(name, XATTR_SECURITY_PREFIX)) {
	2177	if (n && (strcmp(n, "") == 0)) {
	2178	rc = EINVAL;
	2179	goto xerror;
	2180	}
	2181	goto xattr;
	2182	}
	2183	if (n = test_prefix(name, XATTR_TRUSTED_PREFIX)) {
	2184	if (n && (strcmp(n, "") == 0)) {
	2185	rc = EINVAL;
	2186	goto xerror;
	2187	}
	2188	if (!capable(CAP_SYS_ADMIN)) {
	2189	rc = EPERM;
	2190	goto xerror;
	2191	}
	2192	goto xattr;
	2193	}
	2194	if (n = test_prefix(name, XATTR_USER_PREFIX)) {
	2195	if (n && (strcmp(n, "") == 0)) {
	2196	rc = EINVAL;
	2197	goto xerror;
	2198	}
	2199	goto xattr;
	2200	}
	2201	rc = EOPNOTSUPP;
	2202	goto xerror;
	2203
	2204	xattr:
	2205	setCred(&eCred);
	2206	xattrs.appId = 3; // application id GPFS_ATTR_INTERNAL_APPL_ID
	2207	xattrs.nattrs = 1; // no of attributes to get or set
	2208	xattrs.attrs = &xattr; // attributes to delete
	2209
	2210	xattr.keyP = (char*) name; // attribute key
	2211	xattr.keyLen = strlen(name) + 1; // key length
	2212	xattr.valueP = NULL; // attribute value
	2213	xattr.valueLen = -1; // length < zero means delete
	2214
	2215	privVfsP = VP_TO_PVP(iP);
	2216	LOGASSERT(privVfsP != NULL);
	2217	cnP = VP_TO_CNP(iP);
	2218
	2219	oldfs = get_fs();
	2220	set_fs(get_ds());
	2221
	2222	rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, SET_XATTR, argP,
	2223	NULL, &eCred);
	2224	set_fs(oldfs);
	2225
	2226	xerror:
	2227	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REMOVEXATTR_EXIT,
	2228	"gpfs_i_removexattr exit: iP 0x%lX rc %d\n", iP, rc);
	2229
	2230	if (rc)
	2231	cxiErrorNFS(rc);
	2232
	2233	VFS_STAT_STOP;
	2234	EXIT(0);
	2235	return (-rc);
	2236	}
	2237	#endif

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: gpfs_3.1_ker2.6.20/lpp/mmfs/src/gpl-linux/inode.c @ 146

Download in other formats: