Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

inode.c @ 16

Last change on this file since 16 was 16, checked in by rock, 16 years ago

File size: 61.1 KB

Line
1	/***************************************************************************
2	*
3	* Copyright (C) 2001 International Business Machines
4	* All rights reserved.
5	*
6	* This file is part of the GPFS mmfslinux kernel module.
7	*
8	* Redistribution and use in source and binary forms, with or without
9	* modification, are permitted provided that the following conditions
10	* are met:
11	*
12	* 1. Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* 2. Redistributions in binary form must reproduce the above copyright
15	* notice, this list of conditions and the following disclaimer in the
16	* documentation and/or other materials provided with the distribution.
17	* 3. The name of the author may not be used to endorse or promote products
18	* derived from this software without specific prior written
19	* permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27	* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28	* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29	* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30	* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31	*
32	*************************************************************************** */
33	/* @(#)01 1.90.1.4 src/avs/fs/mmfs/ts/kernext/gpl-linux/inode.c, mmfs, avs_rgpfs24, rgpfs24s012a 4/17/07 15:54:47 */
34	/*
35	* Inode operations
36	*
37	* Contents:
38	* printInode
39	* printDentry
40	* cxiSetOSNode
41	* cxiInvalidatePerm
42	* getIattr
43	* get_umask
44	* setCred
45	* gpfs_i_create
46	* gpfs_i_lookup
47	* gpfs_i_link
48	* gpfs_i_unlink
49	* gpfs_i_symlink
50	* gpfs_i_mkdir
51	* gpfs_i_rmdir
52	* gpfs_i_mknod
53	* gpfs_i_rename
54	* gpfs_i_readlink
55	* gpfs_i_follow_link
56	* gpfs_i_readpage (in mmap.c)
57	* gpfs_i_writepage (in mmap.c)
58	* gpfs_i_bmap
59	* gpfs_i_truncate
60	* gpfs_i_permission
61	* gpfs_i_smap
62	* gpfs_i_updatepage
63	* gpfs_i_revalidate
64	* gpfs_i_setattr
65	* gpfs_i_setattr_internal
66	* gpfs_i_getattr
67	* gpfs_i_getattr_internal
68	* gpfs_i_lock
69	* gpfs_i_getxattr
70	* gpfs_i_setxattr
71	* gpfs_i_listxattr
72	* gpfs_i_removexattr
73	*/
74
75	#include <Shark-gpl.h>
76
77	#include <linux/fs.h>
78	#include <linux/sched.h>
79	#include <linux/slab.h>
80	#include <linux/errno.h>
81	#include <linux/smp_lock.h>
82	#include <linux/mm.h>
83	#include <linux/highmem.h>
84	#include <linux/kdev_t.h>
85
86	#include <verdep.h>
87	#include <cxiMode.h>
88	#include <cxiSystem.h>
89	#include <cxi2gpfs.h>
90	#include <cxiVFSStats.h>
91	#include <cxiCred.h>
92
93	#include <linux2gpfs.h>
94	#include <Trace.h>
95
96	#if LINUX_KERNEL_VERSION > 2060000
97	#include <cxiTSFattr.h>
98	#endif
99
100	#ifdef MODULE
101	#include <linux/module.h>
102	#endif /* MODULE */
103
104	void
105	printInode(struct inode *iP)
106	{
107	TRACE7(TRACE_VNODE, 3, TRCID_PRINTINODE_1,
108	"printInode: iP 0x%lX inode %d (0x%X) i_count %d dev 0x%X "
109	"mode 0x%X nlink %d\n",
110	iP, iP->i_ino, iP->i_ino, atomic_read((atomic_t *)&iP->i_count),
111	KDEV_INT(iP->i_rdev), iP->i_mode, iP->i_nlink);
112
113	TRACE6(TRACE_VNODE, 3, TRCID_PRINTINODE_2,
114	"printInode: uid %d gid %d rdev 0x%X atime 0x%X mtime 0x%X "
115	"ctime 0x%X\n", iP->i_uid, iP->i_gid, KDEV_INT(iP->i_rdev),
116	GET_INODETIME_SEC(iP->i_atime), GET_INODETIME_SEC(iP->i_mtime),
117	GET_INODETIME_SEC(iP->i_ctime));
118
119	TRACE5(TRACE_VNODE, 3, TRCID_PRINTINODE_4,
120	"printInode: size %lld blksize 0x%X blocks %d ver 0x%X op 0x%lX\n",
121	iP->i_size, iP->i_blocks, iP->i_blocks, iP->i_version,
122	iP->i_op);
123
124	TRACE6(TRACE_VNODE, 3, TRCID_PRINTINODE_5,
125	"printInode: fop 0x%lX sb 0x%lX flags 0x%X state 0x%X gen %d "
126	"generic 0x%lX\n", iP->i_fop, iP->i_sb, iP->i_flags, iP->i_state,
127	iP->i_generation, iP->PRVINODE);
128
129	TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_6,
130	"printInode: list 0x%lX next 0x%lX prev 0x%lX\n",
131	&(iP->i_list), iP->i_list.next, iP->i_list.prev);
132
133	TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_7,
134	"printInode: dentry 0x%lX next 0x%lX prev 0x%lX\n",
135	&(iP->i_dentry), iP->i_dentry.next, iP->i_dentry.prev);
136
137	#if LINUX_KERNEL_VERSION < 2050000
138	TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_8,
139	"printInode: hash 0x%lX next 0x%lX prev 0x%lX\n",
140	&(iP->i_hash), iP->i_hash.next, iP->i_hash.prev);
141	#else
142	TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_9,
143	"printInode: hash 0x%lX next 0x%lX prev 0x%lX\n",
144	&(iP->i_hash), iP->i_hash.next, *iP->i_hash.pprev);
145	#endif
146	}
147
148	void
149	printDentry(struct dentry *dP)
150	{
151	struct inode *iP = dP->d_inode;
152
153	if (!_TRACE_IS_ON(TRACE_VNODE, 3))
154	return;
155
156	TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_1,
157	"printDentry: dentry 0x%lX count %d name '%s'\n",
158	dP, atomic_read((atomic_t *)&dP->d_count), dP->d_name.name);
159
160	TRACE5N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_2,
161	"printDentry: time 0x%X op 0x%lX flags 0x%X parent 0x%lX "
162	"inode 0x%X\n", dP->d_time, dP->d_op, dP->d_flags,
163	dP->d_parent, iP);
164
165	if (iP)
166	{
167	if (!list_empty(&iP->i_dentry))
168	TRACE4N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_3A,
169	"printDentry: i_ino %d i_count %d "
170	"i_dentry next 0x%lX i_dentry prev 0x%lX\n",
171	iP->i_ino, atomic_read((atomic_t *)&iP->i_count),
172	list_entry(iP->i_dentry.next, struct dentry, d_alias),
173	list_entry(iP->i_dentry.prev, struct dentry, d_alias));
174	else
175	TRACE2N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_3B,
176	"printDentry: i_ino %d i_count %d\n",
177	iP->i_ino, atomic_read((atomic_t *)&iP->i_count));
178	}
179
180	TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_3C,
181	"printDentry: &d_hash 0x%lX d_hash.next 0x%lX d_hash.prev 0x%lX\n",
182	&dP->d_child, dP->d_child.next, dP->d_child.prev);
183
184	TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_4,
185	"printDentry: &child 0x%lX child.next 0x%lX child.prev 0x%lX\n",
186	&dP->d_child, dP->d_child.next, dP->d_child.prev);
187
188	if (!list_empty(&dP->d_subdirs))
189	TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_5,
190	"printDentry: &subdirs 0x%lX subdir next 0x%lX "
191	"subdir prev 0x%lX\n", &dP->d_subdirs,
192	list_entry(dP->d_subdirs.next, struct dentry, d_child),
193	list_entry(dP->d_subdirs.prev, struct dentry, d_child));
194	}
195
196	/* Print directory entry tree up to maxPrint elements.
197	* If maxPrint is 0 then there is no upper limit.
198	*/
199	void
200	printDentryTree(struct dentry *entryDP, int maxPrint)
201	{
202	int count = 0;
203	struct list_head *lhP;
204	struct dentry *siblingDP;
205	struct dentry *parentDP;
206
207	/* Check trace level required by printDentry() */
208	if (!_TRACE_IS_ON(TRACE_VNODE, 3))
209	return;
210
211	spin_lock(&dcache_lock);
212
213	parentDP = entryDP;
214	lhP = parentDP->d_subdirs.next;
215
216	printDentry(parentDP);
217	if (maxPrint > 0 && ++count >= maxPrint)
218	goto xerror;
219
220	if (list_empty(&parentDP->d_subdirs))
221	goto xerror;
222
223	do
224	{
225	while (lhP != &parentDP->d_subdirs)
226	{
227	siblingDP = list_entry(lhP, struct dentry, d_child);
228
229	printDentry(siblingDP);
230	if (maxPrint > 0 && ++count >= maxPrint)
231	goto xerror;
232
233	if (!list_empty(&siblingDP->d_subdirs))
234	{
235	parentDP = siblingDP;
236	lhP = siblingDP->d_subdirs.next;
237	continue;
238	}
239
240	lhP = siblingDP->d_child.next;
241	parentDP = siblingDP->d_parent;
242	}
243
244	siblingDP = siblingDP->d_parent;
245	parentDP = siblingDP->d_parent;
246	lhP = siblingDP->d_child.next;
247	}
248	while (lhP != entryDP->d_child.next);
249
250	xerror:
251	spin_unlock(&dcache_lock);
252
253	return;
254	}
255
256	/* Set the inode operations table for a regular file or directory. Call
257	with xperm set to true if the file has extended permission attributes
258	(i.e. an ACL). This routine is a no-op if the inode is not a regular
259	file or directory.
260
261	If the file does not have extended attributes, the table that is used
262	will have a null value for the permission routine pointer. This will
263	cause Linux to perform access checks directly instead of acquiring the
264	kernel lock and calling GPFS, giving better performance. */
265	void setIopTable(struct inode *iP, Boolean xperm)
266	{
267	struct inode_operations newopP, stdopP, *xopP;
268	struct list_head *lp;
269	int count = 0;
270
271	/* Choose the correct inode operations table based on whether this is a
272	directory or a regular file. Assume that the file has extended
273	attributes so that GPFS permission checking will be required. */
274	ENTER(0);
275	if (S_ISDIR(iP->i_mode))
276	xopP = &gpfs_dir_iops_xperm;
277	else if (S_ISREG(iP->i_mode))
278	xopP = &gpfs_iops_xperm;
279	else
280	{
281	EXIT(0);
282	return;
283	}
284
285	/* If the file really does have extended attributes (or if the token has
286	been lost so that we do not know the status), set extended permission
287	table and exit. */
288	if (xperm)
289	{
290	iP->i_op = xopP;
291	EXIT(0);
292	return;
293	}
294
295	/* Get address of an inode operations table that has a generic permission
296	routine pointer. */
297	iP->i_op = S_ISDIR(iP->i_mode) ? &gpfs_dir_iops_stdperm : &gpfs_iops_stdperm;
298	EXIT(0);
299	}
300
301
302	void
303	cxiSetOSNode(void osVfsP, cxiNode_t cnP, cxiVattr_t *attrP)
304	{
305	struct super_block sbP = (struct super_block )osVfsP;
306	struct inode inodeP = (struct inode )cnP->osNodeP;
307
308	ENTER(0);
309	DBGASSERT(inodeP != NULL);
310	DBGASSERT(inodeP->PRVINODE == cnP);
311	DBGASSERT(inodeP->i_sb == sbP);
312
313	inodeP->i_mode = attrP->va_mode;
314	inodeP->i_nlink = attrP->va_nlink;
315	inodeP->i_uid = attrP->va_uid;
316	inodeP->i_gid = attrP->va_gid;
317	inodeP->i_rdev = cxiDevToKernelDev(cxiDev32ToDev(attrP->va_rdev));
318
319	CXITIME_TO_INODETIME(attrP->va_atime, inodeP->i_atime);
320	CXITIME_TO_INODETIME(attrP->va_mtime, inodeP->i_mtime);
321	CXITIME_TO_INODETIME(attrP->va_ctime, inodeP->i_ctime);
322
323	inodeP->i_size = attrP->va_size;
324	inodeP->i_blocks = attrP->va_blocksize;
325	inodeP->i_blocks = attrP->va_blocks;
326	inodeP->i_generation = attrP->va_gen;
327	inodeP->i_flags = 0;
328
329	cnP->xinfo = attrP->va_xinfo;
330
331	switch (inodeP->i_mode & S_IFMT)
332	{
333	case S_IFREG:
334	setIopTable(inodeP, (attrP->va_xinfo & VA_XPERM) != 0);
335	if (cxiIsNFSThread())
336	inodeP->i_fop = &gpfs_fops_no_sendfile;
337	else
338	inodeP->i_fop = &gpfs_fops;
339	break;
340
341	case S_IFDIR:
342	setIopTable(inodeP, (attrP->va_xinfo & VA_XPERM) != 0);
343	inodeP->i_fop = &gpfs_dir_fops;
344	break;
345
346	case S_IFLNK:
347	inodeP->i_op = &gpfs_link_iops;
348	inodeP->i_fop = &gpfs_fops;
349	break;
350
351	case S_IFBLK:
352	case S_IFCHR:
353	case S_IFIFO:
354	case S_IFSOCK:
355	/* Set vector table for special files, gpfs will not get
356	* these operations.
357	*/
358	#if LINUX_KERNEL_VERSION >= 2060000
359	init_special_inode(inodeP, inodeP->i_mode, inodeP->i_rdev);
360	#else
361	init_special_inode(inodeP, inodeP->i_mode,
362	kdev_t_to_nr(inodeP->i_rdev));
363	#endif
364	break;
365	}
366	if (inodeP->i_mapping)
367	inodeP->i_mapping->a_ops = &gpfs_aops;
368
369	cnP->icValid = CXI_IC_ALL;
370
371	TRACE7(TRACE_VNODE, 2, TRCID_LINUXOPS_SETINODE,
372	"cxiSetOSNode: inodeP 0x%lX inode %d i_count %d i_mode 0x%X "
373	"i_xinfo 0x%X i_nlink %d i_size %lld\n",
374	inodeP, inodeP->i_ino, atomic_read((atomic_t *)&inodeP->i_count),
375	inodeP->i_mode, attrP->va_xinfo, inodeP->i_nlink, inodeP->i_size);
376	EXIT(0);
377	return;
378	}
379
380
381	/* The following function is called from cxiInvalidateAttr when the
382	CXI_IC_PERM option was specified, which indicates that permission related
383	attributes cached in the struct inode (owner, mode, etc.) are no longer
384	known to be valid. */
385	void
386	cxiInvalidatePerm(cxiNode_t *cnP)
387	{
388	struct inode inodeP = (struct inode )cnP->osNodeP;
389
390	ENTER(0);
391	TRACE3(TRACE_VNODE, 2, TRCID_CXIINVA_PERM,
392	"cxiInvalidatePerm: cnP 0x%lX std %d dir std %d",
393	cnP, inodeP->i_op == &gpfs_iops_stdperm,
394	inodeP->i_op == &gpfs_dir_iops_stdperm);
395
396	/* Set the inode operation table to gpfs_..._xperm; the next permission
397	check will then go through our gpfs_i_permission function, which will
398	revalidate permission attributes and set the inode operation table
399	back to gpfs_..._stdperm, if appropriate. Note: since symlinks always
400	have permission iop set, setIopTable is a noop for symlinks. */
401	setIopTable(inodeP, true);
402	EXIT(0);
403	}
404
405	static void
406	getIattr(struct inode inodeP, struct iattr attrP)
407	{
408	ENTER(0);
409	// attrP->ia_valid = ??? ;
410	attrP->ia_mode = inodeP->i_mode;
411	attrP->ia_uid = inodeP->i_uid;
412	attrP->ia_gid = inodeP->i_gid;
413	attrP->ia_size = inodeP->i_size;
414	attrP->ia_atime = inodeP->i_atime;
415	attrP->ia_mtime = inodeP->i_mtime;
416	attrP->ia_ctime = inodeP->i_ctime;
417	EXIT(0);
418	return;
419	}
420
421	static inline int
422	get_umask()
423	{
424	return (current->fs->umask);
425	}
426
427
428	/* Record credentials of current thread */
429	void
430	setCred(ext_cred_t *credP)
431	{
432	int nGroups;
433
434	ENTER(0);
435	credP->principal = current->fsuid; /* user id */
436	credP->group = current->fsgid; /* primary group id */
437
438	#if LINUX_KERNEL_VERSION > 2060300
439	nGroups = MIN(current->group_info->ngroups, MIN(ECRED_NGROUPS, NGROUPS_SMALL));
440	#else
441	nGroups = MIN(current->ngroups, ECRED_NGROUPS);
442	#endif
443	credP->num_groups = nGroups;
444	if (nGroups > 0)
445	#if LINUX_KERNEL_VERSION > 2060300
446	memcpy(credP->eGroups, current->group_info->blocks[0], nGroups*sizeof(gid_t));
447	/* ?? This is incorrect. Linux 2.6 supports a very large list of
448	groups by allocating a page for each bunch of groups. Only if
449	there are <= NGROUPS_SMALL groups is the space in
450	group_info->small_block used. GPFS will only see the prefix of
451	the group set. */
452	/* To save kernel stack space, the GPFS ext_cred_t should keep a
453	pointer to the array of groups. The group set cannot change
454	during a GPFS system call since the caller can only make one
455	system call at a time. */
456	#else
457	memcpy(credP->eGroups, current->groups, nGroups*sizeof(gid_t));
458	#endif
459	EXIT(0);
460	}
461
462	/* inode_operations */
463
464	/* Called with a negative (no inode) dir cache entry.
465	* If this call succeeds, we fill in with d_instantiate().
466	*/
467
468	int
469	gpfs_i_create(struct inode diP, struct dentry dentryP, int mode
470	#if LINUX_KERNEL_VERSION >= 2060000
471	, struct nameidata *ni
472	#endif
473	)
474	{
475	int rc;
476	struct gpfsVfsData_t *privVfsP;
477	cxiNode_t *dcnP;
478	cxiNode_t *cnP = NULL;
479	cxiIno_t iNum = (cxiIno_t)-1;
480	struct inode *newInodeP = NULL;
481	int flags = FWRITE \| FCREAT \| FEXCL;
482	cxiMode_t umask = get_umask();
483	ext_cred_t eCred;
484	struct dentry *retP;
485
486	VFS_STAT_START(createCall);
487	ENTER(0);
488	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CREATE_ENTER,
489	"gpfs_i_create enter: iP 0x%lX dentryP 0x%lX mode 0x%X name '%s'\n",
490	diP, dentryP, mode, dentryP->d_name.name);
491	/* BKL is held at entry */
492
493	dcnP = VP_TO_CNP(diP);
494	privVfsP = VP_TO_PVP(diP);
495	LOGASSERT(privVfsP != NULL);
496
497	retry:
498
499	setCred(&eCred);
500	rc = gpfs_ops.gpfsCreate(privVfsP, dcnP, (void **)&newInodeP, &cnP, &iNum, 0,
501	flags, dentryP, (char *)dentryP->d_name.name,
502	mode, umask, NULL, &eCred);
503	if (rc == 0)
504	{
505	DBGASSERT(cnP != NULL);
506	DBGASSERT(iNum != -1);
507	DBGASSERT(newInodeP != NULL);
508	DBGASSERT(newInodeP->PRVINODE == cnP);
509	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
510	cnP->createRaceLoserThreadId = 0;
511	}
512
513	/* linux would normally serialize the creates on a directory (via the
514	* parent directory semaphore) to ensure that a create didn't fail with
515	* EEXIST. However in a multinode environment we may perform a lookup
516	* on one node (thinking the file doesn't exist) yet a create is
517	* performed on a different node before linux can call the physical
518	* file systems create. We attempt to reconcile this case by marking
519	* the fact that this happened and checking the FEXCL flag at gpfs_f_open()
520	* to see if we should have failed this with EEXIST.
521	*/
522	if (rc == EEXIST)
523	{
524	/* Make sure that this create call is part of the linux open call. NFS
525	and mknod calls create without an open, so check that this is not one
526	of those calls. On the open call the open flags are available and if
527	the FEXCL was on fail it with EEXIST. */
528	int mode1;
529
530	/* Skip if NFS create call. */
531	if (cxiIsNFSThread())
532	goto retExist;
533
534	/* ??? if (sys_mknod call) goto xerror; */
535
536	/* Do it only if trying to create a regular file. */
537	if (((mode & S_IFMT) != 0) && !(mode & S_IFREG))
538	goto retExist;
539
540	setCred(&eCred); // rebuild since gpfsCreate may remap ids
541	rc = gpfs_ops.gpfsLookup(privVfsP, (void *)diP, dcnP,
542	dentryP, (char *)dentryP->d_name.name,
543	(void **)&newInodeP, &cnP, &iNum, NULL,
544	&mode1, &eCred, (void **)&retP);
545	if (rc == ENOENT)
546	goto retry;
547	if (!rc)
548	{
549	/* If the file that was found was a directory than return the
550	return code that linux would have returned. */
551	if (S_ISDIR(newInodeP->i_mode))
552	{
553	rc = EISDIR;
554	goto retExist;
555	}
556	cnP->createRaceLoserThreadId = cxiGetThreadId();
557	}
558	}
559
560	retExist:
561	if (rc)
562	{
563	d_drop(dentryP);
564	goto xerror;
565	}
566	diP->i_sb->s_dirt = 1;
567
568	xerror:
569	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CREATE_EXIT,
570	"gpfs_i_create exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
571	newInodeP, iNum, iNum, rc);
572
573	if (rc)
574	cxiErrorNFS(rc);
575
576	VFS_STAT_STOP;
577	EXIT(0);
578	return -rc;
579	}
580
581	/* If this routine successfully finds the file, it should
582	* add the dentry to the hash list with d_add() and return
583	* null. If a failure occurs then return non null and the
584	* dentry will be dput() by the linux lfs layer
585	*/
586	struct dentry *
587	gpfs_i_lookup(struct inode diP, struct dentry dentryP
588	#if LINUX_KERNEL_VERSION >= 2060000
589	, struct nameidata *ni
590	#endif
591	)
592	{
593	int code = 0;
594	int rc = 0;
595	struct dentry *retP = NULL;
596	struct gpfsVfsData_t *privVfsP;
597	ext_cred_t eCred;
598	cxiNode_t *dcnP;
599	cxiMode_t mode = 0;
600	cxiIno_t iNum = (cxiIno_t)-1;
601	cxiNode_t *cnP = NULL;
602	struct inode *newInodeP = NULL;
603
604	VFS_STAT_START(lookupCall);
605	ENTER(0);
606	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_ENTER,
607	"gpfs_i_lookup enter: diP 0x%lX dentryP 0x%lX name '%s'\n",
608	diP, dentryP, dentryP->d_name.name);
609	/* BKL is held at entry */
610
611	dcnP = VP_TO_CNP(diP);
612	privVfsP = VP_TO_PVP(diP);
613	LOGASSERT(privVfsP != NULL);
614
615	setCred(&eCred);
616
617	if (!dcnP)
618	{
619	/* This can happen due to a bug in linux/fs/dcache.c (prune_dcache)
620	where "count" entries are to be pruned, but the last one is
621	found to be recently referenced. When this happens, count is
622	decremented, but the loop is not terminated. The result is that
623	it continues to prune entries past where it should (prunes
624	everything). If our patch for this is not applied, the result
625	is a kernel failure as the cxiNode is referenced. Checking
626	here (and revalidate) allows us to reject the call instead. */
627
628	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_STALE,
629	"cxiNode for inode 0x%lX (ino 0x%X) was FREED!\n",
630	diP, diP->i_ino);
631
632	/* Although we may like to know more about this inode, it is not
633	* ok to call PRINTINODE(iP) here.
634	*/
635	rc = ESTALE;
636	code = 1;
637	retP = (struct dentry *)ERR_PTR(-rc);
638	goto xerror;
639	}
640
641	rc = gpfs_ops.gpfsLookup(privVfsP, (void *)diP, dcnP,
642	dentryP, (char *)dentryP->d_name.name,
643	(void **)&newInodeP, &cnP, &iNum, NULL,
644	&mode, &eCred, (void **)&retP);
645
646	if (rc == 0)
647	{
648	DBGASSERT(cnP != NULL);
649	DBGASSERT(iNum != -1);
650	DBGASSERT(newInodeP != NULL);
651	DBGASSERT(newInodeP->PRVINODE == cnP);
652	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
653	}
654	else if (rc != ENOENT) // internal failure
655	{
656	cxiErrorNFS(rc);
657	code = 2;
658	retP = (struct dentry *)ERR_PTR(-rc);
659	goto xerror;
660	}
661	else if (diP->i_nlink == 0) // ENOENT but unlinked parent
662	{
663	/* This odd code is here because this function would normally
664	* exit with a negative dcache entry on ENOENT. However if
665	* we allow a negative dcache entry in a directory thats been
666	* deleted (but we're still sitting in it) then the d_count
667	* will never go to zero and we'll strand any open file that
668	* is associated with the parent directory. If we drop the
669	* dentry and return the ENOENT then the VFS will dput the
670	* dentry. The scenario that gave us trouble was:
671	*
672	* NODE 1 NODE 2
673	* `rm -rf dirA` `rm -rf dirA`
674	* ==========================================================
675	* gpfs_f_open("dirA", ...)
676	* gpfs_f_readdir(...)
677	* [read "fileA", "fileB"] gpfs_f_open("dirA", ...)
678	* gpfs_f_readdir(...)
679	* [read "fileA", "fileB"]
680	*
681	* gpfs_i_lookup("fileA")
682	* gpfs_i_unlink("fileA")
683	* gpfs_s_delete_inode(fileA's inode)
684	* gpfs_i_lookup("fileB")
685	* gpfs_i_unlink("fileB")
686	* gpfs_s_delete_inode(fileB's inode)
687	* ...
688	* gpfs_i_rmdir("dirA", ...)
689	* gpfs_s_delete_inode(dirA's inode)
690	* destroyOnLastClose=1 for dirA <======
691	*
692	* gpfs_i_lookup("fileA")
693	* [creates a negative dentry for fileA,
694	* increments dirA's reference count]
695	* gpfs_i_lookup("fileB")
696	* [creates a negative dentry for fileB,
697	* increments dirA's reference count]
698	*/
699	DBGASSERT(dentryP->d_inode == NULL);
700	dentryP->d_op = NULL;
701	d_drop(dentryP);
702
703	code = 3;
704	retP = (struct dentry *)ERR_PTR(-rc);
705	goto xerror;
706	}
707
708	PRINTDENTRY(dentryP);
709
710	xerror:
711	TRACE7(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_EXIT,
712	"gpfs_i_lookup exit: new inode 0x%lX iNum %d (0x%X) cnP 0x%lX retP 0x%lX "
713	"code %d rc %d\n", newInodeP, iNum, iNum, cnP, retP, code, rc);
714
715	VFS_STAT_STOP;
716	EXIT(0);
717	return retP;
718	}
719
720	int
721	gpfs_i_link(struct dentry oldDentryP, struct inode diP,
722	struct dentry *dentryP)
723	{
724	int rc = 0;
725	struct inode *iP = oldDentryP->d_inode;
726	cxiNode_t *dcnP;
727	cxiNode_t *cnP = NULL;
728	struct gpfsVfsData_t *privVfsP;
729	char *tnameP;
730	ext_cred_t eCred;
731
732	VFS_STAT_START(linkCall);
733	ENTER(0);
734	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_LINK_ENTER,
735	"gpfs_i_link enter: diP 0x%lX dentryP 0x%lX "
736	"dentryP 0x%lX name '%s'\n", diP, oldDentryP, dentryP,
737	dentryP->d_name.name);
738	/* BKL is held at entry */
739
740	cnP = VP_TO_CNP(iP);
741	dcnP = VP_TO_CNP(diP);
742	privVfsP = VP_TO_PVP(diP);
743	LOGASSERT(privVfsP != NULL);
744
745	setCred(&eCred);
746	rc = gpfs_ops.gpfsLink(privVfsP, cnP, dcnP,
747	dentryP, (char *)dentryP->d_name.name, &eCred);
748	if (rc)
749	{
750	d_drop(dentryP);
751	goto xerror;
752	}
753	iP->i_sb->s_dirt = 1;
754
755	xerror:
756	PRINTINODE(iP);
757	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LINK_EXIT,
758	"gpfs_i_link exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);
759
760	if (rc)
761	cxiErrorNFS(rc);
762
763	VFS_STAT_STOP;
764	EXIT(0);
765	return -rc;
766	}
767
768	int
769	gpfs_i_unlink(struct inode diP, struct dentry dentryP)
770	{
771	int rc = 0;
772	struct gpfsVfsData_t *privVfsP;
773	struct inode *iP = dentryP->d_inode;
774	cxiNode_t *dcnP;
775	cxiNode_t *cnP;
776	ext_cred_t eCred;
777	struct dentry_operations *orig_d_opP;
778
779	VFS_STAT_START(removeCall);
780	ENTER(0);
781	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_UNLINK_ENTER,
782	"gpfs_i_unlink enter: diP 0x%lX iP 0x%lX dentryP 0x%lX name '%s'\n",
783	diP, iP, dentryP, dentryP->d_name.name);
784	/* BKL is held at entry */
785
786	cnP = VP_TO_CNP(iP);
787
788	dcnP = VP_TO_CNP(diP);
789	privVfsP = VP_TO_PVP(diP);
790	LOGASSERT(privVfsP != NULL);
791
792	/* Regarding dcache entry update: upon returning from gpfs_i_unlink, the VFS
793	layer will turn the dentry into a valid, negative dcache entry by calling
794	d_delete(). If another node then creates a new file with the same name,
795	the BR token revoke for the directory block will invalidate the negative
796	dcache entry. However, there is a window between the gpfsRemove() and
797	the d_delete(), where a BR token revoke would not recognize that it
798	should invalidate the dcache entry, because d_delete() has not yet turned
799	it into a negative dcache entry. To fix this, we mark the dentry as
800	"valid with d_delete pending"; the meaning of this state is "the dentry
801	is still valid, but a BR token revoke should mark it as 'needing
802	revalidation', even if it does not (yet) look like a negative dcache
803	entry". Note that we don't want to mark "valid with d_delete pending"
804	entries as invalid in the BR revoke handler, because we don't know for
805	sure that the file is in fact going to be deleted. The unlink operation
806	may fail, for any number of reasons, and the dentry should not be marked
807	as invalid prematurely. It's safe to mark a dentry as 'needing
808	revalidation', however. Ideally, we should swap d_op inside gpfsRemove
809	while we are holding the BR lock on the directory. However, (1) there is
810	local synchronization in the VFS (our caller is holding the i_sem
811	semaphore on the directory) that will prevent other threads from doing a
812	lookup or create that might change the state back to just plain "valid"
813	before the gpfsRemove has happened, and (2) a BR revoke that happens
814	before the gpfsRemove might unnecessarily mark the dentry as 'needing
815	revalidation'; this is sub-optimal, but it doesn't hurt. Also see
816	comment in gpfs_i_rmdir. */
817	orig_d_opP = dentryP->d_op;
818	dentryP->d_op = &gpfs_dops_ddeletepending;
819
820	setCred(&eCred);
821	rc = gpfs_ops.gpfsRemove(privVfsP, cnP, dcnP, (char *)dentryP->d_name.name,
822	&eCred);
823	if (rc)
824	{
825	d_drop(dentryP);
826	if (dentryP->d_op == &gpfs_dops_ddeletepending)
827	dentryP->d_op = orig_d_opP;
828	goto xerror;
829	}
830	diP->i_sb->s_dirt = 1;
831
832	/* d_delete will be called at VFS layer if rc == 0 */
833
834	xerror:
835	PRINTINODE(iP);
836	PRINTDENTRY(dentryP);
837	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_UNLINK_EXIT,
838	"gpfs_i_unlink exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);
839
840	if (rc)
841	cxiErrorNFS(rc);
842
843	VFS_STAT_STOP;
844	EXIT(0);
845	return -rc;
846	}
847
848	int
849	gpfs_i_symlink(struct inode diP, struct dentry dentryP,
850	const char *symlinkTargetP)
851	{
852	int rc = 0;
853	cxiNode_t *dcnP;
854	cxiNode_t *cnP;
855	cxiIno_t iNum = (cxiIno_t)-1;
856	struct inode *newInodeP = NULL;
857	struct gpfsVfsData_t *privVfsP;
858	ext_cred_t eCred;
859
860	VFS_STAT_START(symlinkCall);
861	ENTER(0);
862	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK1,
863	"gpfs_i_symlink enter: iP 0x%lX dentryP 0x%lX symlinkTargetP '%s'\n",
864	diP, dentryP, symlinkTargetP);
865	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK2,
866	"gpfs_i_symlink: newLinkName '%s'\n", dentryP->d_name.name);
867	/* BKL is held at entry */
868
869	dcnP = VP_TO_CNP(diP);
870	privVfsP = VP_TO_PVP(diP);
871	LOGASSERT(privVfsP != NULL);
872
873	setCred(&eCred);
874	rc = gpfs_ops.gpfsSymlink(privVfsP, dcnP, (void **)&newInodeP, &cnP,
875	&iNum, dentryP, (char *)dentryP->d_name.name,
876	(char *)symlinkTargetP, &eCred);
877	if (rc == 0)
878	{
879	DBGASSERT(cnP != NULL);
880	DBGASSERT(iNum != -1);
881	DBGASSERT(newInodeP != NULL);
882	DBGASSERT(newInodeP->PRVINODE == cnP);
883	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
884	}
885	else
886	{
887	d_drop(dentryP);
888	goto xerror;
889	}
890	diP->i_sb->s_dirt = 1;
891
892	xerror:
893	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK_EXIT,
894	"gpfs_i_symlink exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
895	newInodeP, iNum, iNum, rc);
896
897	if (rc)
898	cxiErrorNFS(rc);
899
900	VFS_STAT_STOP;
901	EXIT(0);
902	return -rc;
903	}
904
905	int
906	gpfs_i_mkdir(struct inode diP, struct dentry dentryP, int mode)
907	{
908	int rc = 0;
909	struct gpfsVfsData_t *privVfsP;
910	cxiNode_t *dcnP;
911	cxiNode_t *cnP;
912	cxiMode_t umask;
913	ext_cred_t eCred;
914	cxiIno_t iNum = (cxiIno_t)-1;
915	struct inode *newInodeP = NULL;
916
917	VFS_STAT_START(mkdirCall);
918	ENTER(0);
919	umask = get_umask(); /* LFS should not apply umask and we may not */
920
921	dcnP = VP_TO_CNP(diP);
922	privVfsP = VP_TO_PVP(diP);
923	LOGASSERT(privVfsP != NULL);
924
925	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_MKDIR_ENTER,
926	"gpfs_i_mkdir enter: diP 0x%lX mode 0x%X name '%s'\n",
927	diP, mode, dentryP->d_name.name);
928	/* BKL is held at entry */
929
930	setCred(&eCred);
931	rc = gpfs_ops.gpfsMkdir(privVfsP, dcnP, (void **)&newInodeP, &cnP, &iNum,
932	dentryP, (char *)dentryP->d_name.name, mode, umask,
933	&eCred);
934
935	if (rc == 0)
936	{
937	DBGASSERT(cnP != NULL);
938	DBGASSERT(iNum != -1);
939	DBGASSERT(newInodeP != NULL);
940	DBGASSERT(newInodeP->PRVINODE == cnP);
941	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
942	}
943	else
944	{
945	d_drop(dentryP);
946	goto xerror;
947	}
948	diP->i_sb->s_dirt = 1;
949
950	xerror:
951	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKDIR_EXIT,
952	"gpfs_i_mkdir exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
953	newInodeP, iNum, iNum, rc);
954
955	if (rc)
956	cxiErrorNFS(rc);
957
958	VFS_STAT_STOP;
959	EXIT(0);
960	return -rc;
961	}
962
963	int
964	gpfs_i_rmdir(struct inode diP, struct dentry dentryP)
965	{
966	int rc;
967	struct inode *iP = dentryP->d_inode;
968	cxiNode_t *dcnP;
969	cxiNode_t *cnP;
970	struct gpfsVfsData_t *privVfsP;
971	ext_cred_t eCred;
972	struct dentry_operations *orig_d_opP;
973
974	VFS_STAT_START(rmdirCall);
975	ENTER(0);
976	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_RMDIR_ENTER,
977	"gpfs_i_rmdir enter: diP 0x%lX iP 0x%lX name '%s'\n",
978	diP, iP, dentryP->d_name.name);
979	/* BKL is held at entry */
980
981	cnP = VP_TO_CNP(iP);
982	dcnP = VP_TO_CNP(diP);
983	privVfsP = VP_TO_PVP(diP);
984	LOGASSERT(privVfsP != NULL);
985
986	/* See comment in gpfs_i_unlink. Note that Linux kernel processes
987	directory dentries a little differently from regular file
988	dentries. In particular, it doesn't appear that a successful
989	rmdir call results in the removed directory dentry being turned
990	into a valid negative dentry; the dentry just gets unhashed and
991	recycled if it had no references at the time of rmdir. If the
992	dentry did have extra references, e.g. due to a process using the
993	directory in question as cwd, the dentry is unhashed, but it
994	remains a positive dentry pointing to the deleted inode, and will
995	remain as such until the dentry ref count goes to zero, at which
996	point the dentry is recycled. So there's no apparent need to
997	mark directory dentries as 'needing revalidation' during BR token
998	revoke (we do know that we need to do this for regular files).
999	However, this particular aspect of Linux kernel operation is not
1000	guaranteed to always work in this fashion, so we might as well
1001	try to stay on the safe side of things, and treat directories the
1002	same way as regular files. It doesn't appear that marking a
1003	dentry as 'needing revalidation' has any ill effects besides extra
1004	cycles required for revalidation, and BR token revoke handler
1005	racing with an unsuccessful gpfsRmdir is a rare enough event to
1006	tolerate this extra performance hit. */
1007	orig_d_opP = dentryP->d_op;
1008	dentryP->d_op = &gpfs_dops_ddeletepending;
1009
1010	setCred(&eCred);
1011	rc = gpfs_ops.gpfsRmdir(privVfsP, cnP, dcnP, (char *)dentryP->d_name.name,
1012	&eCred);
1013	if (rc)
1014	{
1015	if (rc == EEXIST)
1016	rc = ENOTEMPTY;
1017	if (dentryP->d_op == &gpfs_dops_ddeletepending)
1018	dentryP->d_op = orig_d_opP;
1019	/* d_drop(dentryP); */
1020	goto xerror;
1021	}
1022	diP->i_sb->s_dirt = 1;
1023
1024	/* d_delete will be called at VFS layer if rc == 0 */
1025	xerror:
1026	PRINTINODE(iP);
1027	PRINTDENTRY(dentryP);
1028	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_RMDIR_EXIT,
1029	"gpfs_i_rmdir exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);
1030
1031	if (rc)
1032	cxiErrorNFS(rc);
1033
1034	VFS_STAT_STOP;
1035	EXIT(0);
1036	return -rc;
1037	}
1038
1039	int
1040	#if LINUX_KERNEL_VERSION >= 2050000
1041	gpfs_i_mknod(struct inode diP, struct dentry dentryP, int mode, dev_t rdev)
1042	#else
1043	gpfs_i_mknod(struct inode diP, struct dentry dentryP, int mode, int rdev)
1044	#endif
1045	{
1046	int rc = 0;
1047	struct gpfsVfsData_t *privVfsP;
1048	cxiNode_t *dcnP;
1049	cxiNode_t *cnP;
1050	cxiIno_t iNum = (cxiIno_t)-1;
1051	struct inode *newInodeP = NULL;
1052	cxiMode_t umask = get_umask();
1053	ext_cred_t eCred;
1054	cxiDev32_t rdev32;
1055
1056	VFS_STAT_START(mknodCall);
1057	ENTER(0);
1058	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKNOD_ENTER,
1059	"gpfs_i_mknod enter: diP 0x%lX mode 0x%X rdev 0x%X name '%s'\n",
1060	diP, mode, (int)rdev, dentryP->d_name.name);
1061	/* BKL is held at entry */
1062
1063	dcnP = VP_TO_CNP(diP);
1064	privVfsP = VP_TO_PVP(diP);
1065	LOGASSERT(privVfsP != NULL);
1066
1067	setCred(&eCred);
1068	rdev32 = cxiDevToDev32(rdev);
1069	rc = gpfs_ops.gpfsMknod(privVfsP, dcnP, (void **)&newInodeP, &cnP,
1070	&iNum, dentryP, (char *)dentryP->d_name.name,
1071	mode, umask, (cxiDev_t)rdev32, &eCred);
1072	if (rc == 0)
1073	{
1074	DBGASSERT(cnP != NULL);
1075	DBGASSERT(iNum != -1);
1076	DBGASSERT(newInodeP != NULL);
1077	DBGASSERT(newInodeP->PRVINODE == cnP);
1078	DBGASSERT(cnP->osNodeP == (void *)newInodeP);
1079	}
1080	else
1081	{
1082	d_drop(dentryP);
1083	goto xerror;
1084	}
1085	diP->i_sb->s_dirt = 1;
1086
1087	/* Set vector table for special files, gpfs will not get these operations.*/
1088	#if LINUX_KERNEL_VERSION >= 2060000
1089	init_special_inode(newInodeP, newInodeP->i_mode, newInodeP->i_rdev);
1090	#else
1091	init_special_inode(newInodeP, newInodeP->i_mode,
1092	kdev_t_to_nr(newInodeP->i_rdev));
1093	#endif
1094
1095	xerror:
1096	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKNOD_EXIT,
1097	"gpfs_i_mknod exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
1098	newInodeP, iNum, iNum, rc);
1099
1100	VFS_STAT_STOP;
1101	EXIT(0);
1102	return -rc;
1103	}
1104
1105	int
1106	gpfs_i_rename(struct inode diP, struct dentry dentryP,
1107	struct inode tdiP, struct dentry tDentryP)
1108	{
1109	int rc = 0;
1110	struct inode *iP = dentryP->d_inode;
1111	struct inode *tiP = tDentryP->d_inode;
1112	struct gpfsVfsData_t *privVfsP;
1113	cxiNode_t sourceCNP, sourceDirCNP, targetCNP, targetDirCNP;
1114	ext_cred_t eCred;
1115
1116	VFS_STAT_START(renameCall);
1117	ENTER(0);
1118	TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_RENAME_1,
1119	"gpfs_i_rename enter: iP 0x%lX dvP 0x%lX name '%s'"
1120	" tiP 0x%lX tdiP 0x%lX new name '%s'\n",
1121	iP, diP, dentryP->d_name.name, tiP, tdiP, tDentryP->d_name.name);
1122	/* BKL is held at entry */
1123
1124	/* Do not allow simple rename across mount points */
1125	if (diP->i_sb != tdiP->i_sb)
1126	{
1127	rc = EXDEV;
1128	goto xerror;
1129	}
1130
1131	sourceCNP = VP_TO_CNP(iP);
1132	sourceDirCNP = VP_TO_CNP(diP);
1133
1134	targetCNP = (tiP != NULL) ? VP_TO_CNP(tiP) : NULL;
1135	targetDirCNP = VP_TO_CNP(tdiP);
1136
1137	privVfsP = VP_TO_PVP(iP);
1138	LOGASSERT(privVfsP != NULL);
1139
1140	setCred(&eCred);
1141	rc = gpfs_ops.gpfsRename(privVfsP, sourceCNP, sourceDirCNP,
1142	(char *)dentryP->d_name.name, targetCNP,
1143	targetDirCNP, (char *)tDentryP->d_name.name,
1144	&eCred);
1145	if (rc == 0)
1146	{
1147	gpfs_i_getattr_internal(iP);
1148	gpfs_i_getattr_internal(diP);
1149
1150	if (tiP)
1151	gpfs_i_getattr_internal(tiP);
1152
1153	if (tdiP != diP)
1154	gpfs_i_getattr_internal(tdiP);
1155
1156	diP->i_sb->s_dirt = 1;
1157	}
1158
1159	xerror:
1160	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_RENAME_EXIT,
1161	"gpfs_i_rename exit: iP 0x%lX rc %d\n", iP, rc);
1162
1163	if (rc)
1164	cxiErrorNFS(rc);
1165
1166	VFS_STAT_STOP;
1167	EXIT(0);
1168	return -rc;
1169	}
1170
1171	int
1172	gpfs_i_readlink(struct dentry dentryP, char bufP, int buflen)
1173	{
1174	int rc = 0;
1175	Boolean gotBKL = false;
1176	struct cxiUio_t tmpUio;
1177	cxiIovec_t tmpIovec;
1178	struct inode *iP = dentryP->d_inode;
1179	struct gpfsVfsData_t *privVfsP;
1180	cxiNode_t *cnP;
1181
1182	VFS_STAT_START(readlinkCall);
1183	ENTER(0);
1184	TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_READLINK_ENTER,
1185	"gpfs_i_readlink enter: dentryP 0x%lX bufP 0x%lX len %d "
1186	"iP 0x%lX name '%s'\n",
1187	dentryP, bufP, buflen, iP, dentryP->d_name.name);
1188
1189	/* BKL is not held at entry, except for NFS calls */
1190	TraceBKL();
1191	if (current->lock_depth >= 0) /* kernel lock is held by me */
1192	{
1193	gotBKL = true;
1194	unlock_kernel();
1195	}
1196
1197	cnP = VP_TO_CNP(iP);
1198	privVfsP = VP_TO_PVP(iP);
1199	LOGASSERT(privVfsP != NULL);
1200
1201	tmpIovec.iov_base = bufP; /* base memory address */
1202	tmpIovec.iov_len = buflen; /* length of transfer for this area */
1203
1204	tmpUio.uio_iov = &tmpIovec; /* ptr to array of iovec structs */
1205	tmpUio.uio_iovcnt = 1; /* #iovec elements left to be processed */
1206	tmpUio.uio_iovdcnt = 0; /* #iovec elements already processed */
1207	tmpUio.uio_offset = 0; /* byte offset in file/dev to read/write */
1208	tmpUio.uio_resid = buflen; /* #bytes left in data area */
1209	tmpUio.uio_segflg = UIO_USERSPACE; /* copy to user space buffer */
1210	tmpUio.uio_fmode = 0; /* file modes from open file struct */
1211
1212	rc = gpfs_ops.gpfsReadlink(privVfsP, cnP, &tmpUio);
1213
1214	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READLINK_EXIT,
1215	"gpfs_i_readlink exit: iP 0x%lX uio_resid %ld offset %d rc %d\n",
1216	iP, tmpUio.uio_resid, tmpUio.uio_offset, rc);
1217
1218	VFS_STAT_STOP;
1219
1220	if (gotBKL) /* If held kernel lock on entry then reacquire it */
1221	lock_kernel();
1222
1223	if (rc)
1224	cxiErrorNFS(rc);
1225
1226	EXIT(0);
1227	if (rc)
1228	return (-rc);
1229
1230	return (buflen - tmpUio.uio_resid);
1231	}
1232
1233	#if LINUX_KERNEL_VERSION >= 2061600
1234	void* gpfs_i_follow_link(struct dentry dentry, struct nameidata nd)
1235	#else
1236	int gpfs_i_follow_link(struct dentry dentry, struct nameidata nd)
1237	#endif
1238	{
1239	int rc;
1240	Boolean gotBKL = false;
1241	struct cxiUio_t tmpUio;
1242	cxiIovec_t tmpIovec;
1243	struct inode *iP = dentry->d_inode;
1244	struct gpfsVfsData_t *privVfsP;
1245	cxiNode_t *cnP;
1246	char *buf = NULL;
1247
1248	ENTER(0);
1249	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_FOLLOW_LINK_ENTER,
1250	"gpfs_i_follow_link enter: inode 0x%lX name '%s'\n",
1251	dentry->d_inode, dentry->d_name.name);
1252
1253	/* BKL is not held at entry, except for NFS calls */
1254	TraceBKL();
1255	if (current->lock_depth >= 0) /* kernel lock is held by me */
1256	{
1257	gotBKL = true;
1258	unlock_kernel();
1259	}
1260
1261	/* Allocate a temporary buffer to hold the symlink contents */
1262	buf = cxiMallocPinned(CXI_PATH_MAX+1);
1263	if (buf == NULL)
1264	{
1265	rc = -ENOMEM;
1266	goto xerror;
1267	}
1268
1269	cnP = VP_TO_CNP(iP);
1270	privVfsP = VP_TO_PVP(iP);
1271	LOGASSERT(privVfsP != NULL);
1272
1273	tmpIovec.iov_base = buf; /* base memory address */
1274	tmpIovec.iov_len = PATH_MAX; /* length of transfer for this area */
1275
1276	tmpUio.uio_iov = &tmpIovec; /* ptr to array of iovec structs */
1277	tmpUio.uio_iovcnt = 1; /* #iovec elements left to be processed */
1278	tmpUio.uio_iovdcnt = 0; /* #iovec elements already processed */
1279	tmpUio.uio_offset = 0; /* byte offset in file/dev to read/write */
1280	tmpUio.uio_resid = PATH_MAX; /* #bytes left in data area */
1281	tmpUio.uio_segflg = UIO_SYSSPACE; /* copy to kernel space buffer */
1282	tmpUio.uio_fmode = 0; /* file modes from open file struct */
1283
1284	/* Read symlink contents */
1285	rc = gpfs_ops.gpfsReadlink(privVfsP, cnP, &tmpUio);
1286	if (rc)
1287	{
1288	cxiErrorNFS(rc);
1289	rc = -rc;
1290	goto xerror;
1291	}
1292
1293	/* set end of string */
1294	buf[PATH_MAX - tmpUio.uio_resid] = 0;
1295
1296	TRACE2(TRACE_VNODE, 2, TRCID_LINUXOPS_FOLLOW_LINK_1,
1297	"gpfs_i_follow_link readlink rc %d data '%s'\n", rc, buf);
1298
1299	VFS_FOLLOW_LINK(rc, nd, buf);
1300
1301	exit:
1302	if (buf)
1303	cxiFreePinned(buf);
1304
1305	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_FOLLOW_LINK_2,
1306	"gpfs_i_follow_link exit: inode 0x%lX rc %d\n",
1307	dentry->d_inode, rc);
1308
1309	if (gotBKL) /* If held kernel lock on entry then reacquire it */
1310	lock_kernel();
1311
1312	EXIT(0);
1313
1314	#if LINUX_KERNEL_VERSION >= 2061600
1315	return NULL; /* no cookie */
1316	#else
1317	return rc;
1318	#endif
1319
1320	xerror:
1321	path_release(nd);
1322	goto exit;
1323
1324	}
1325
1326	#ifdef HAS_IOP_PUT_LINK
1327
1328	#if LINUX_KERNEL_VERSION >= 2061600
1329	void gpfs_i_put_link(struct dentry dentry, struct nameidata nd, void* cookie)
1330	#else
1331	void gpfs_i_put_link(struct dentry dentry, struct nameidata nd)
1332	#endif
1333	{
1334	char *buf = nd_get_link(nd);
1335	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_PUTLINK,
1336	"gpfs_i_put_link dentry 0x%lX nd 0x%lX buf 0x%lX\n", dentry, nd,
1337	!IS_ERR(buf)? buf : NULL);
1338	if (!IS_ERR(buf))
1339	cxiFreePinned(buf);
1340	}
1341
1342	#endif /* HAS_IOP_PUT_LINK */
1343
1344	int
1345	gpfs_i_bmap(struct inode *iP, int fragment)
1346	{
1347	ENTER(0);
1348	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_BMAP,
1349	"gpfs_i_bmap: rc ENOSYS\n");
1350	TraceBKL();
1351	EXIT(0);
1352	return -ENOSYS;
1353	}
1354
1355	void
1356	gpfs_i_truncate(struct inode *iP)
1357	{
1358	ENTER(0);
1359	/* Nothing to do since the file size was updated on the notify_change
1360	* call which preceeded this call
1361	*/
1362	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_TRUNCATE,
1363	"gpfs_i_truncate: inode 0x%lX\n", iP);
1364	TraceBKL();
1365	EXIT(0);
1366	}
1367
1368	int
1369	gpfs_i_permission(struct inode *iP, int mode
1370	#if LINUX_KERNEL_VERSION >= 2060000
1371	, struct nameidata *ni
1372	#endif
1373	)
1374	{
1375	cxiNode_t *cnP;
1376	struct gpfsVfsData_t *privVfsP;
1377	ext_cred_t eCred;
1378	int rc = 0;
1379
1380	VFS_STAT_START(accessCall);
1381	ENTER(0);
1382
1383	/* BKL is held at entry */
1384
1385	cnP = VP_TO_CNP(iP);
1386
1387	TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_ACCESS_ENTER,
1388	"gpfs_i_permission enter: iP 0x%lX mode 0x%X uid %d gid %d "
1389	"i_mode 0x%X i_xinfo 0x%X", iP, mode, current->fsuid,
1390	current->fsgid, iP->i_mode, cnP->xinfo);
1391
1392	privVfsP = VP_TO_PVP(iP);
1393	LOGASSERT(privVfsP != NULL);
1394
1395	if (mode) /* call permission check only if got access mode */
1396	{
1397	setCred(&eCred);
1398	rc = gpfs_ops.gpfsAccess(privVfsP, cnP, mode, ACC_SELF, &eCred);
1399	}
1400
1401	xerror:
1402	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_ACCESS_EXIT,
1403	"gpfs_i_permission exit: iP 0x%lX std %d dir std %d rc %d",
1404	iP, iP->i_op == &gpfs_iops_stdperm, iP->i_op == &gpfs_dir_iops_stdperm,
1405	rc);
1406
1407	if (rc)
1408	cxiErrorNFS(rc);
1409
1410	VFS_STAT_STOP;
1411	EXIT(0);
1412	return -rc;
1413	}
1414
1415	int
1416	gpfs_i_smap(struct inode *iP, int sector)
1417	{
1418	ENTER(0);
1419	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_SMAP,
1420	"gpfs_i_smap: rc ENOSYS\n");
1421	TraceBKL();
1422	EXIT(0);
1423	return -ENOSYS;
1424	}
1425
1426	int
1427	gpfs_i_updatepage(struct file fP, struct page pageP, const char *bufP,
1428	unsigned long offset, uint count, int sync)
1429	{
1430	ENTER(0);
1431	TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_UPDATEPAGE,
1432	"gpfs_i_updatepage: rc ENOSYS\n");
1433	TraceBKL();
1434	EXIT(0);
1435	return -ENOSYS;
1436	}
1437
1438	int
1439	gpfs_i_revalidate(struct dentry *dentryP)
1440	{
1441	int rc;
1442	int code = 0;
1443	struct inode *iP = dentryP->d_inode;
1444	cxiNode_t *cnP;
1445	cxiVattr_t vattr;
1446	struct gpfsVfsData_t *privVfsP;
1447
1448	ENTER(0);
1449	VFS_INC(revalidateCount);
1450	TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_REVALIDATE_ENTER,
1451	"gpfs_i_revalidate enter: dentryP 0x%lX iP 0x%lX ino 0x%X name '%s'\n",
1452	dentryP, dentryP->d_inode,
1453	(iP) ? iP->i_ino : -1, dentryP->d_name.name);
1454	/* BKL is usually not held, but seems to be held when coming here as
1455	part of setting an ACL */
1456
1457	if (iP == NULL)
1458	{
1459	code = 1;
1460	rc = ENOENT;
1461	goto xerror;
1462	}
1463	cnP = VP_TO_CNP(iP);
1464
1465	if (!cnP)
1466	{
1467	/* This can happen due to a bug in linux/fs/dcache.c (prune_dcache)
1468	where "count" entries are to be pruned, but the last one is
1469	found to be recently referenced. When this happens, count is
1470	decremented, but the loop is not terminated. The result is that
1471	it continues to prune entries past where it should (prunes
1472	everything). If our patch for this is not applied, the result
1473	is a kernel failure as the cxiNode is referenced. Checking
1474	here (and lookup) allows us to reject the call instead. */
1475
1476	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REVALIDATE_STALE,
1477	"gpfs_i_revalidate: cxiNode for iP 0x%lX (ino %d) was FREED!\n",
1478	iP, iP->i_ino);
1479
1480	/* Although we may like to know more about this inode, it is not
1481	* ok to call PRINTINODE(iP) here.
1482	*/
1483
1484	rc = ESTALE;
1485	code = 2;
1486	goto xerror;
1487	}
1488
1489	if ((cnP->icValid & CXI_IC_STAT) == CXI_IC_STAT)
1490	{
1491	rc = 0;
1492	code = 3;
1493	goto xerror;
1494	}
1495
1496	privVfsP = VP_TO_PVP(iP);
1497	LOGASSERT(privVfsP != NULL);
1498
1499	/* This has the effect of calling us back under a lock and
1500	* setting the inode attributes at the OS level (since this
1501	* operating system caches this info in the vfs layer)
1502	*/
1503	rc = gpfs_ops.gpfsGetattr(privVfsP, cnP, &vattr, false);
1504	PRINTINODE(iP);
1505
1506	#if 0
1507	/* Delay briefly to give token revoke races a chance to happen, if there
1508	are any. Time delay is in jiffies (10ms). */
1509	# define howLong 5
1510	TRACE1(TRACE_VNODE, 4, TRCID_REVAL_DELAY,
1511	"gpfs_i_revalidate: begin delay %d\n", howLong);
1512	current->state = TASK_INTERRUPTIBLE;
1513	schedule_timeout(howLong);
1514	TRACE1(TRACE_VNODE, 14, TRCID_REVAL_DELAY_END,
1515	"gpfs_i_revalidate: end delay %d\n", howLong);
1516	#endif
1517
1518	xerror:
1519	TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_REVALIDATE_EXIT,
1520	"gpfs_i_revalidate exit: dentry 0x%lX code %d rc %d\n",
1521	dentryP, code, rc);
1522
1523	if (rc)
1524	cxiErrorNFS(rc);
1525
1526	EXIT(0);
1527	return -rc;
1528	}
1529
1530	int
1531	gpfs_i_setattr(struct dentry dentryP, struct iattr iattrP)
1532	{
1533	int rc;
1534
1535	VFS_STAT_START(setattrCall);
1536	ENTER(0);
1537	rc = gpfs_i_setattr_internal(dentryP->d_inode, iattrP);
1538
1539	VFS_STAT_STOP;
1540	EXIT(0);
1541	return -rc;
1542	}
1543
1544	int
1545	gpfs_i_setattr_internal(struct inode iP, struct iattr aP)
1546	{
1547	int rc = 0;
1548	int code = 0;
1549	long arg1; /* must be large enough on 64bit to contain */
1550	long arg2; /* either a pointer or integer */
1551	long arg3;
1552	cxiTimeStruc_t atime, mtime, ctime;
1553	cxiNode_t *cnP;
1554	struct gpfsVfsData_t *privVfsP;
1555	ext_cred_t eCred;
1556	unsigned int ia_valid;
1557
1558	ENTER(0);
1559	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_SETATTR_ENTER,
1560	"gpfs_i_setattr enter: iP 0x%lX ia_valid 0x%X\n", iP, aP->ia_valid);
1561	/* ?? Callers of this are inconsistent about whether the BKL is held */
1562
1563	cnP = VP_TO_CNP(iP);
1564	privVfsP = VP_TO_PVP(iP);
1565	LOGASSERT(privVfsP != NULL);
1566
1567	ia_valid = aP->ia_valid;
1568
1569	/* Change file size */
1570	if (ia_valid & ATTR_SIZE)
1571	{
1572	arg1 = (long)&aP->ia_size;
1573	arg2 = 0;
1574	arg3 = 0;
1575
1576	/* call gpfsSetattr, unless we know that new size is the same */
1577	if (!(cnP->icValid & CXI_IC_ATTR) \|\|
1578	((struct inode *)cnP->osNodeP)->i_size != aP->ia_size)
1579	{
1580	setCred(&eCred); // rebuild since gpfsSetattr may remap ids
1581	rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_SIZE, arg1, arg2, arg3,
1582	&eCred);
1583	if (rc != 0)
1584	{
1585	code = 1;
1586	goto xerror;
1587	}
1588
1589	/* gpfsSetattr(... V_SIZE ...) will have updated ctime and mtime.
1590	No need to do this again. */
1591	ia_valid &= ~(ATTR_MTIME \| ATTR_CTIME);
1592	}
1593	}
1594
1595	/* Change file mode */
1596	if (ia_valid & ATTR_MODE)
1597	{
1598	arg1 = (long)aP->ia_mode;
1599	arg2 = 0;
1600	arg3 = 0;
1601
1602	setCred(&eCred); // rebuild since gpfsSetattr may remap ids
1603	rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_MODE, arg1, arg2, arg3, &eCred);
1604	if (rc != 0)
1605	{
1606	code = 2;
1607	goto xerror;
1608	}
1609	}
1610
1611	/* Change uid or gid */
1612	if (ia_valid & (ATTR_UID \| ATTR_GID))
1613	{
1614	arg1 = 0;
1615	arg2 = 0;
1616	arg3 = 0;
1617
1618	if (ia_valid & ATTR_UID)
1619	arg2 = (long)aP->ia_uid;
1620	else
1621	arg1 \|= T_OWNER_AS_IS;
1622
1623	if (ia_valid & ATTR_GID)
1624	arg3 = (long)aP->ia_gid;
1625	else
1626	arg1 \|= T_GROUP_AS_IS;
1627
1628	setCred(&eCred); // rebuild since gpfsSetattr may remap ids
1629	rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_OWN, arg1, arg2, arg3, &eCred);
1630	if (rc != 0)
1631	{
1632	code = 3;
1633	goto xerror;
1634	}
1635	}
1636
1637	/* Change access, modification, or change time */
1638	if (ia_valid & (ATTR_ATIME \| ATTR_MTIME \| ATTR_CTIME))
1639	{
1640	arg1 = 0;
1641	arg2 = 0;
1642	arg3 = 0;
1643
1644	if (ia_valid & ATTR_ATIME)
1645	{
1646	CXITIME_FROM_INODETIME(atime, aP->ia_atime);
1647	arg1 = (long)&atime;
1648	}
1649	if (ia_valid & ATTR_MTIME)
1650	{
1651	CXITIME_FROM_INODETIME(mtime, aP->ia_mtime);
1652	arg2 = (long)&mtime;
1653	}
1654	if (ia_valid & ATTR_CTIME)
1655	{
1656	CXITIME_FROM_INODETIME(ctime, aP->ia_ctime);
1657	arg3 = (long)&ctime;
1658	}
1659	setCred(&eCred); // rebuild since gpfsSetattr may remap ids
1660	rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_STIME, arg1, arg2, arg3, &eCred);
1661	if (rc != 0)
1662	{
1663	code = 4;
1664	goto xerror;
1665	}
1666	}
1667
1668	xerror:
1669
1670	if (rc == 0)
1671	{
1672	/* For NFS we might need to write the inode but the check will be done
1673	* in gpfsSyncNFS().
1674	*/
1675	if (cxiAllowNFSFsync())
1676	{
1677	setCred(&eCred); // rebuild since gpfsSetattr may remap ids
1678	rc = gpfs_ops.gpfsSyncNFS(privVfsP, cnP, 0, &eCred);
1679	}
1680
1681	iP->i_sb->s_dirt = 1;
1682	}
1683	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_SETATTR_EXIT,
1684	"gpfs_i_setattr exit: iP 0x%lX code %d rc %d\n", iP, code, rc);
1685
1686	if (rc)
1687	cxiErrorNFS(rc);
1688
1689	EXIT(0);
1690	return rc;
1691	}
1692
1693	#if LINUX_KERNEL_VERSION >= 2050000
1694	int
1695	gpfs_i_getattr(struct vfsmount mntP, struct dentry dentryP,
1696	struct kstat *kstatP)
1697	#else
1698	int
1699	gpfs_i_getattr(struct dentry dentryP, struct iattr iattrP)
1700	#endif
1701	{
1702	int rc;
1703	struct inode *iP = dentryP->d_inode;
1704	cxiNode_t *cnP;
1705
1706	VFS_STAT_START(getattrCall);
1707	ENTER(0);
1708
1709	cnP = VP_TO_CNP(iP);
1710
1711	if (cnP && ((cnP->icValid & CXI_IC_STAT) == CXI_IC_STAT)) /* attr are vaild */
1712	rc = 0;
1713	else
1714	rc = gpfs_i_getattr_internal(iP);
1715
1716	if (!rc)
1717	#if LINUX_KERNEL_VERSION >= 2050000
1718	generic_fillattr(iP, kstatP);
1719	#else
1720	getIattr(iP, iattrP);
1721	#endif
1722	else
1723	rc = -rc;
1724
1725	VFS_STAT_STOP;
1726	EXIT(0);
1727	return rc;
1728	}
1729
1730	int
1731	gpfs_i_getattr_internal(struct inode *iP)
1732	{
1733	int rc = 0;
1734	cxiNode_t *cnP;
1735	struct gpfsVfsData_t *privVfsP;
1736	cxiVattr_t vattr;
1737
1738	ENTER(0);
1739	TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_GETATTR_ENTER,
1740	"gpfs_i_getattr enter: iP 0x%lX\n", iP);
1741	/* BKL is held at entry */
1742
1743	privVfsP = VP_TO_PVP(iP);
1744	LOGASSERT(privVfsP != NULL);
1745	cnP = VP_TO_CNP(iP);
1746
1747	/* This has the effect of calling us back under a lock and
1748	* setting the inode attributes at the OS level (since this
1749	* operating system caches this info in the vfs layer)
1750	*/
1751	rc = gpfs_ops.gpfsGetattr(privVfsP, cnP, &vattr, false);
1752	PRINTINODE(iP);
1753
1754	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETATTR_EXIT,
1755	"gpfs_i_getattr exit: iP 0x%lX rc %d\n", iP, rc);
1756
1757	if (rc)
1758	cxiErrorNFS(rc);
1759
1760	EXIT(0);
1761	return rc;
1762	}
1763
1764	#if LINUX_KERNEL_VERSION > 2060000
1765	#include <cxiAclUser.h>
1766
1767	#define XATTR_SECURITY_PREFIX "security."
1768	#define XATTR_TRUSTED_PREFIX "trusted."
1769	#define XATTR_USER_PREFIX "user."
1770	#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access"
1771	#define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default"
1772
1773	static const char *
1774	test_prefix(const char name, const char prefix)
1775	{
1776	while (prefix && name == *prefix) {
1777	name++;
1778	prefix++;
1779	}
1780	return *prefix ? NULL : name;
1781	}
1782
1783	/*
1784	* Inode operation getxattr()
1785	*
1786	*/
1787	ssize_t
1788	gpfs_i_getxattr(struct dentry dentry, const char name, void *buf,
1789	size_t buf_size)
1790	{
1791	int rc;
1792	cxiNode_t *cnP;
1793	struct gpfsVfsData_t *privVfsP;
1794	struct tsxattr xattr;
1795	struct tsxattrs xattrs;
1796	ext_cred_t eCred;
1797	void *argP = &xattrs;
1798	int flags = 0;
1799	struct inode *iP = dentry->d_inode;
1800	mm_segment_t oldfs;
1801	const char *n;
1802
1803	ENTER(0);
1804	VFS_STAT_START(getxattrCall);
1805
1806	TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_ENTER,
1807	"gpfs_i_getxattr enter: iP 0x%lX name %s buf 0x%lX size %d\n",
1808	iP, (name) ? name : "NULL", buf, buf_size);
1809
1810	if (iP == NULL)
1811	{
1812	rc = ENOENT;
1813	goto xerror;
1814	}
1815
1816	#ifdef CONFIG_FS_POSIX_ACL
1817	if (n = test_prefix(name, XATTR_NAME_ACL_ACCESS)) {
1818	if (n && (strcmp(n, "") != 0)) {
1819	rc = EINVAL;
1820	goto xerror;
1821	}
1822	rc = gpfs_get_posix_acl(dentry, ACL_TYPE_ACCESS, buf, buf_size);
1823	goto xerror2;
1824	}
1825	if (S_ISDIR(iP->i_mode))
1826	{
1827	if (n = test_prefix(name, XATTR_NAME_ACL_DEFAULT)) {
1828	if (n && (strcmp(n, "") != 0)) {
1829	rc = EINVAL;
1830	goto xerror;
1831	}
1832	rc = gpfs_get_posix_acl(dentry, ACL_TYPE_DEFAULT, buf, buf_size);
1833	goto xerror2;
1834	}
1835	}
1836	#endif
1837	if (n = test_prefix(name, XATTR_SECURITY_PREFIX)) {
1838	if (n && (strcmp(n, "") == 0)) {
1839	rc = EINVAL;
1840	goto xerror;
1841	}
1842	goto xattr;
1843	}
1844	if (n = test_prefix(name, XATTR_TRUSTED_PREFIX)) {
1845	if (n && (strcmp(n, "") == 0)) {
1846	rc = EINVAL;
1847	goto xerror;
1848	}
1849	if (!capable(CAP_SYS_ADMIN)) {
1850	rc = EPERM;
1851	goto xerror;
1852	}
1853	goto xattr;
1854	}
1855	if (n = test_prefix(name, XATTR_USER_PREFIX)) {
1856	if (n && (strcmp(n, "") == 0)) {
1857	rc = EINVAL;
1858	goto xerror;
1859	}
1860	goto xattr;
1861	}
1862	rc = EOPNOTSUPP;
1863	goto xerror;
1864
1865	xattr:
1866	setCred(&eCred);
1867	xattrs.appId = 3; // application id GPFS_ATTR_INTERNAL_APPL_ID
1868	xattrs.nattrs = 1; // no of attributes to get or set
1869	xattrs.attrs = &xattr; // attributes to get or set
1870
1871	xattr.keyP = (char*) name; // attribute key
1872	xattr.keyLen = strlen(name) + 1; // key length
1873	xattr.valueP = buf; // attribute value
1874	xattr.valueLen = buf_size; // length of attribute value
1875
1876	privVfsP = VP_TO_PVP(iP);
1877	LOGASSERT(privVfsP != NULL);
1878	cnP = VP_TO_CNP(iP);
1879
1880	oldfs = get_fs();
1881	set_fs(get_ds());
1882
1883	rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, GET_XATTR, argP,
1884	NULL, &eCred);
1885
1886	set_fs(oldfs);
1887	if (!rc)
1888	{
1889	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_EXIT0,
1890	"gpfs_i_getxattr exit: iP 0x%lX len %d\n", iP, xattr.valueLen);
1891	VFS_STAT_STOP;
1892	EXIT(0);
1893	if (xattr.valueLen < 0)
1894	rc = ENODATA;
1895	else
1896	return (xattr.valueLen);
1897	}
1898
1899	xerror:
1900	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_EXIT,
1901	"gpfs_i_getxattr exit: iP 0x%lX rc %d\n", iP, rc);
1902
1903	if (rc)
1904	cxiErrorNFS(rc);
1905
1906	VFS_STAT_STOP;
1907	EXIT(0);
1908	return (-rc);
1909
1910	xerror2:
1911	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_EXIT2,
1912	"gpfs_i_getxattr exit2: iP 0x%lX rc %d\n", iP, rc);
1913
1914	if (rc)
1915	cxiErrorNFS(rc);
1916
1917	VFS_STAT_STOP;
1918	EXIT(0);
1919	return (rc);
1920	}
1921
1922	/*
1923	* Inode operation setxattr()
1924	*
1925	*/
1926	int
1927	gpfs_i_setxattr(struct dentry dentry, const char name, const void *buf,
1928	size_t buf_size, int ext_flags)
1929	{
1930	int rc;
1931	cxiNode_t *cnP;
1932	struct gpfsVfsData_t *privVfsP;
1933	struct tsxattr xattr;
1934	struct tsxattrs xattrs;
1935	ext_cred_t eCred;
1936	void *argP = &xattrs;
1937	int flags = 0;
1938	struct inode *iP = dentry->d_inode;
1939	mm_segment_t oldfs;
1940	const char *n;
1941
1942	ENTER(0);
1943	VFS_STAT_START(setxattrCall);
1944
1945	TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_SETEXTATTR_ENTER,
1946	"gpfs_i_setxattr enter: iP 0x%lX name %s buf 0x%lX size %d flags 0x%X\n",
1947	iP, (name) ? name : "NULL", buf, buf_size, ext_flags);
1948
1949	if (iP == NULL)
1950	{
1951	rc = ENOENT;
1952	goto xerror;
1953	}
1954
1955	#ifdef CONFIG_FS_POSIX_ACL
1956	if (n = test_prefix(name, XATTR_NAME_ACL_ACCESS)) {
1957	if (n && (strcmp(n, "") != 0)) {
1958	rc = EINVAL;
1959	goto xerror;
1960	}
1961	if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
1962	return EPERM;
1963	rc = gpfs_set_posix_acl(dentry, ACL_TYPE_ACCESS, buf, buf_size);
1964	goto xerror;
1965	}
1966	if (S_ISDIR(iP->i_mode))
1967	{
1968	if (n = test_prefix(name, XATTR_NAME_ACL_DEFAULT)) {
1969	if (n && (strcmp(n, "") != 0)) {
1970	rc = EINVAL;
1971	goto xerror;
1972	}
1973	if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
1974	return EPERM;
1975	rc = gpfs_set_posix_acl(dentry, ACL_TYPE_DEFAULT, buf, buf_size);
1976	goto xerror;
1977	}
1978	}
1979	#endif
1980	if (n = test_prefix(name, XATTR_SECURITY_PREFIX)) {
1981	if (n && (strcmp(n, "") == 0)) {
1982	rc = EINVAL;
1983	goto xerror;
1984	}
1985	goto xattr;
1986	}
1987	if (n = test_prefix(name, XATTR_TRUSTED_PREFIX)) {
1988	if (n && (strcmp(n, "") == 0)) {
1989	rc = EINVAL;
1990	goto xerror;
1991	}
1992	if (!capable(CAP_SYS_ADMIN)) {
1993	rc = EPERM;
1994	goto xerror;
1995	}
1996	goto xattr;
1997	}
1998	if (n = test_prefix(name, XATTR_USER_PREFIX)) {
1999	if (n && (strcmp(n, "") == 0)) {
2000	rc = EINVAL;
2001	goto xerror;
2002	}
2003	goto xattr;
2004	}
2005	rc = EOPNOTSUPP;
2006	goto xerror;
2007
2008	xattr:
2009	setCred(&eCred);
2010	xattrs.appId = 3; // application id GPFS_ATTR_INTERNAL_APPL_ID
2011	xattrs.nattrs = 1; // no of attributes to get or set
2012	xattrs.attrs = &xattr; // attributes to get or set
2013
2014	xattr.keyP = (char*) name; // attribute key
2015	xattr.keyLen = strlen(name) + 1; // key length
2016	xattr.valueP = (char *)buf; // attribute value
2017	xattr.valueLen = buf_size; // length of attribute value
2018
2019	privVfsP = VP_TO_PVP(iP);
2020	LOGASSERT(privVfsP != NULL);
2021	cnP = VP_TO_CNP(iP);
2022
2023	oldfs = get_fs();
2024	set_fs(get_ds());
2025
2026	rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, SET_XATTR, argP,
2027	NULL, &eCred);
2028	set_fs(oldfs);
2029	xerror:
2030	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_SETEXTATTR_EXIT,
2031	"gpfs_i_setxattr exit: iP 0x%lX rc %d\n", iP, rc);
2032
2033	if (rc)
2034	cxiErrorNFS(rc);
2035
2036	VFS_STAT_STOP;
2037	EXIT(0);
2038	return (-rc);
2039	}
2040
2041	/*
2042	* Inode operation listxattr()
2043	*
2044	* Copy a list of attribute names into the buffer
2045	* provided, or compute the buffer size required.
2046	* Buffer is NULL to compute the size of the buffer required.
2047	*
2048	* Returns a negative error number on failure, or the number of bytes
2049	* used / required on success.
2050	*/
2051	ssize_t
2052	gpfs_i_listxattr(struct dentry dentry, char buf, size_t buf_size)
2053	{
2054	int rc;
2055	cxiNode_t *cnP;
2056	struct gpfsVfsData_t *privVfsP;
2057	struct tsxattr xattr;
2058	struct tsxattrs xattrs;
2059	ext_cred_t eCred;
2060	void *argP = &xattrs;
2061	int flags = 0;
2062	struct inode *iP = dentry->d_inode;
2063	mm_segment_t oldfs;
2064
2065	ENTER(0);
2066	VFS_STAT_START(listxattrCall);
2067
2068	TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LISTXATTR_IN,
2069	"gpfs_i_listxattr enter: iP 0x%lX buf 0x%lX buf_size %d\n",
2070	iP, buf, buf_size);
2071
2072
2073	if (iP == NULL)
2074	{
2075	rc = ENOENT;
2076	goto xerror;
2077	}
2078	setCred(&eCred);
2079	xattrs.appId = 3; // application id GPFS_ATTR_INTERNAL_APPL_ID
2080	xattrs.nattrs = 0; // get all attribute name
2081	xattrs.attrs = &xattr; // attributes to get or set
2082
2083	xattr.keyP = NULL; // attribute key
2084	xattr.keyLen = 0; // key length
2085	xattr.valueP = buf; // attribute value
2086	xattr.valueLen = buf_size; // length of attribute value
2087
2088	privVfsP = VP_TO_PVP(iP);
2089	LOGASSERT(privVfsP != NULL);
2090	cnP = VP_TO_CNP(iP);
2091
2092	oldfs = get_fs();
2093	set_fs(get_ds());
2094
2095	/* which names can we show ??? */
2096	rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, LIST_XATTR, argP,
2097	NULL, &eCred);
2098
2099	set_fs(oldfs);
2100	if (!rc)
2101	{
2102	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_LISTXTATTR_EXIT0,
2103	"gpfs_i_listxattr exit: iP 0x%lX len %d\n", iP, xattr.valueLen);
2104	VFS_STAT_STOP;
2105	EXIT(0);
2106	return (xattr.valueLen);
2107	}
2108
2109	xerror:
2110	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_LISTXTATTR_EXIT,
2111	"gpfs_i_listxattr exit: iP 0x%lX rc %d\n", iP, rc);
2112
2113	if (rc)
2114	cxiErrorNFS(rc);
2115
2116	VFS_STAT_STOP;
2117	EXIT(0);
2118	return (-rc);
2119	}
2120
2121	/*
2122	* Inode operation removexattr()
2123	*
2124	*/
2125	int
2126	gpfs_i_removexattr(struct dentry dentry, const char name)
2127	{
2128	int rc;
2129	cxiNode_t *cnP;
2130	struct gpfsVfsData_t *privVfsP;
2131	struct tsxattr xattr;
2132	struct tsxattrs xattrs;
2133	ext_cred_t eCred;
2134	void *argP = &xattrs;
2135	int flags = 0;
2136	struct inode *iP = dentry->d_inode;
2137	mm_segment_t oldfs;
2138	const char *n;
2139
2140	ENTER(0);
2141	VFS_STAT_START(removexattrCall);
2142
2143	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REMOVEXATTR_IN,
2144	"gpfs_i_removexattr enter: iP 0x%lX name %s\n", iP, (name) ? name : "NULL");
2145
2146	if (iP == NULL)
2147	{
2148	rc = ENOENT;
2149	goto xerror;
2150	}
2151	#ifdef CONFIG_FS_POSIX_ACL
2152	if (n = test_prefix(name, XATTR_NAME_ACL_ACCESS)) {
2153	if (n && (strcmp(n, "") != 0)) {
2154	rc = EINVAL;
2155	goto xerror;
2156	}
2157	if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
2158	return EPERM;
2159	rc = gpfs_set_posix_acl(dentry, ACL_TYPE_ACCESS, NULL, -1);
2160	goto xerror;
2161	}
2162	if (S_ISDIR(iP->i_mode))
2163	{
2164	if (n = test_prefix(name, XATTR_NAME_ACL_DEFAULT)) {
2165	if (n && (strcmp(n, "") != 0)) {
2166	rc = EINVAL;
2167	goto xerror;
2168	}
2169	if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
2170	return EPERM;
2171	rc = gpfs_set_posix_acl(dentry, ACL_TYPE_DEFAULT, NULL, -1);
2172	goto xerror;
2173	}
2174	}
2175	#endif
2176	if (n = test_prefix(name, XATTR_SECURITY_PREFIX)) {
2177	if (n && (strcmp(n, "") == 0)) {
2178	rc = EINVAL;
2179	goto xerror;
2180	}
2181	goto xattr;
2182	}
2183	if (n = test_prefix(name, XATTR_TRUSTED_PREFIX)) {
2184	if (n && (strcmp(n, "") == 0)) {
2185	rc = EINVAL;
2186	goto xerror;
2187	}
2188	if (!capable(CAP_SYS_ADMIN)) {
2189	rc = EPERM;
2190	goto xerror;
2191	}
2192	goto xattr;
2193	}
2194	if (n = test_prefix(name, XATTR_USER_PREFIX)) {
2195	if (n && (strcmp(n, "") == 0)) {
2196	rc = EINVAL;
2197	goto xerror;
2198	}
2199	goto xattr;
2200	}
2201	rc = EOPNOTSUPP;
2202	goto xerror;
2203
2204	xattr:
2205	setCred(&eCred);
2206	xattrs.appId = 3; // application id GPFS_ATTR_INTERNAL_APPL_ID
2207	xattrs.nattrs = 1; // no of attributes to get or set
2208	xattrs.attrs = &xattr; // attributes to delete
2209
2210	xattr.keyP = (char*) name; // attribute key
2211	xattr.keyLen = strlen(name) + 1; // key length
2212	xattr.valueP = NULL; // attribute value
2213	xattr.valueLen = -1; // length < zero means delete
2214
2215	privVfsP = VP_TO_PVP(iP);
2216	LOGASSERT(privVfsP != NULL);
2217	cnP = VP_TO_CNP(iP);
2218
2219	oldfs = get_fs();
2220	set_fs(get_ds());
2221
2222	rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, SET_XATTR, argP,
2223	NULL, &eCred);
2224	set_fs(oldfs);
2225
2226	xerror:
2227	TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REMOVEXATTR_EXIT,
2228	"gpfs_i_removexattr exit: iP 0x%lX rc %d\n", iP, rc);
2229
2230	if (rc)
2231	cxiErrorNFS(rc);
2232
2233	VFS_STAT_STOP;
2234	EXIT(0);
2235	return (-rc);
2236	}
2237	#endif

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: gpfs_3.1_ker2.6.20/lpp/mmfs/src/gpl-linux/inode.c @ 16

Download in other formats: