source: gpfs_3.1_ker2.6.20/lpp/mmfs/src/gpl-linux/super.c @ 16

Last change on this file since 16 was 16, checked in by rock, 16 years ago
File size: 71.0 KB
Line 
1/***************************************************************************
2 *
3 * Copyright (C) 2001 International Business Machines
4 * All rights reserved.
5 *
6 * This file is part of the GPFS mmfslinux kernel module.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *  1. Redistributions of source code must retain the above copyright notice,
13 *     this list of conditions and the following disclaimer.
14 *  2. Redistributions in binary form must reproduce the above copyright
15 *     notice, this list of conditions and the following disclaimer in the
16 *     documentation and/or other materials provided with the distribution.
17 *  3. The name of the author may not be used to endorse or promote products
18 *     derived from this software without specific prior written
19 *     permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 *************************************************************************** */
33/* @(#)24       1.157.1.8  src/avs/fs/mmfs/ts/kernext/gpl-linux/super.c, mmfs, avs_rgpfs24, rgpfs24s011a 4/5/07 11:15:55 */
34/*
35 * Superblock operations
36 *
37 * Contents:
38 *   TraceBKL
39 *   gpfs_s_read_inode2
40 *   gpfs_s_read_inode
41 *   gpfs_s_delete_inode
42 *   gpfs_s_notify_change
43 *   gpfs_s_put_super
44 *   gpfs_s_statfs
45 *   gpfs_s_umount_begin
46 *   gpfs_s_remount
47 *   gpfs_s_write_inode
48 *   gpfs_s_clear_inode
49 *   gpfs_s_write_super
50 *   gpfs_s_fs_locations
51 *   gpfs_fill_super
52 *   gpfs_reg_fs
53 *   gpfs_unreg_fs
54 *   kill_mmfsd
55 *   get_myinode
56 *   exec_mmfs
57 *   fork_mount_helper
58 *   vfsUserCleanup
59 *   cxiSetMountInfo
60 *   cxiUnmount
61 *   cxiReactivateOSNode
62 *   cxiNewOSNode
63 *   cxiFreeOSNode
64 *   cxiDeleteMmap
65 *   cxiReinitOSNode
66 *   cxiFindOSNode
67 *   cxiDumpOSNode
68 *   cxiRefOSNode
69 *   cxiInactiveOSNode
70 *   cxiPutOSNode
71 *   cxiDestroyOSNode
72 *   cxiSetOSNodeType
73 *   cxiUpdateInode
74 *   cxiCanUncacheOSNode
75 *   cxiAddOSNode
76 *
77 */
78
79#include <Shark-gpl.h>
80
81#include <linux/string.h>
82#include <linux/module.h>
83#include <linux/errno.h>
84#include <linux/fs.h>
85#include <linux/smp_lock.h>
86
87#ifndef GPFS_ARCH_X86_64
88#define __KERNEL_SYSCALLS__
89#endif
90#include <linux/unistd.h>
91#include <asm/uaccess.h>   /* KERNEL_DS */
92
93#define FOOBAR #error Do not do this
94
95/* GPFS headers */
96#include <verdep.h>
97#include <linux2gpfs.h>
98#include <cxiSystem.h>
99#include <cxiTypes.h>
100#include <cxiAtomic.h>
101#include <cxi2gpfs.h>
102#include <cxiIOBuffer.h>
103#include <cxiSharedSeg.h>
104#include <cxiCred.h>
105#include <linux2gpfs.h>
106#include <Trace.h>
107#include <cxiVFSStats.h>
108#include <linux/kmod.h>
109#if LINUX_KERNEL_VERSION > 2060000
110#include <linux/wait.h>
111#endif
112
113/* forward declaration */
114int vfsUserCleanup(struct super_block *sbP, 
115                   struct gpfsVfsData_t *privVfsP, Boolean force);
116
117extern struct file_system_type  gpfs_fs_type;
118
119static DECLARE_WAIT_QUEUE_HEAD(pwq);
120
121int mmfsd_module_active = 0;
122static int mmfsd_id = -1;
123static int mount_id = -1;
124char mountCmd[CXI_MAXPATHLEN+1] = "M ";
125char mmfs_path[CXI_MAXPATHLEN+1] = "";
126char bin_path[CXI_MAXPATHLEN+1];
127static char mount_opt[CXI_MAXPATHLEN+1];
128
129static unsigned int unusedInodeNum = 1;
130static struct inode *unusedInodeP = NULL;
131static struct super_block *unusedSuperP = NULL;
132struct super_block *shutdownSuperP = NULL;
133
134static spinlock_t inode_lock;
135
136/* Routine to trace whether kernel lock is held */
137#ifdef VERBOSETRACE
138void TraceBKL()
139{
140  TRACE2(TRACE_VNODE, 10, TRCID_VNODE_BKL,
141         "BKL %d lock_depth %d\n", kernel_locked(), current->lock_depth);
142}
143#endif
144
145#include <linux/pagemap.h>
146
147#if HAS_SOP_ALLOC_INODE
148static struct kmem_cache * gpfsInodeCacheP;
149struct gpfs_bloated_inode
150{
151   struct inode inode;
152   char cxiNode[CXINODE_SIZE];
153};
154
155static void 
156gpfs_init_once(void * iP, struct kmem_cache * cacheP, unsigned long flags)
157{
158  if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 
159      SLAB_CTOR_CONSTRUCTOR)
160     inode_init_once((struct inode *)iP);
161}
162
163int
164gpfs_init_inodecache(void)
165{
166  gpfsInodeCacheP = kmem_cache_create("gpfsInodeCache",
167                                      sizeof(struct gpfs_bloated_inode), 0,
168                                      SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
169                                      gpfs_init_once, NULL);
170  if (gpfsInodeCacheP == NULL)
171    return -ENOMEM;
172  return 0;
173}
174
175struct inode *
176gpfs_alloc_inode(struct super_block *sbP)
177{
178  struct inode * iP;
179
180  iP = (struct inode *)kmem_cache_alloc(gpfsInodeCacheP, GFP_KERNEL);
181  TRACE1N(TRACE_VNODE, 1, TRCID_LINUXOPS_GPFS_ALLOC_INODE_EXIT,
182         "gpfs_alloc_inode: inode 0x%lX\n", iP);
183  return iP;
184}
185
186void 
187gpfs_destroy_inode(struct inode *iP)
188{
189  TRACE1N(TRACE_VNODE, 1, TRCID_LINUXOPS_GPFS_DESTROY_INODE,
190         "gpfs_destroy_inode: inode 0x%lX\n", (void *)iP);
191  kmem_cache_free(gpfsInodeCacheP, (void *)iP);
192}
193
194void 
195gpfs_destroy_inodecache(void)
196{
197  while (kmem_cache_shrink(gpfsInodeCacheP) != 0)
198      cxiSleep(40);
199  kmem_cache_destroy(gpfsInodeCacheP);
200}
201
202#endif /* HAS_SOP_ALLOC_INODE */
203
204/* This routine is called from iget() just after allocating a new inode.
205   This is a variant of the normal read_inode operation that allows passing an
206   opaque parameter through iget4 into read_inode2.  We need the parameter to
207   know whether read_inode2 is being called from a normal lookup opration,
208   where we are already holding a distributed lock on the file, or from nfs
209   calling iget, where we need to get the lock inside of read_inode2.
210
211   Note: In the Linux source the call to read_inode2 is labelled a "reiserfs
212   specific hack" with the additional warning "We don't want this to last, and
213   are looking for VFS changes that will allow us to get rid of it." If and
214   when such a change is made, we will hopefully be able to adapt our code
215   accordingly.  Otherwise, if read_inode2 goes away without a suitable
216   replacement, we will have to use a more expensive approach, e.g., a global
217   table where lookup would leave some state before calling iget. */
218void
219gpfs_s_read_inode2(struct inode *iP, void *opaque)
220{
221  struct gpfsVfsData_t *privVfsP;
222  ino_t inum = iP->i_ino;
223  cxiNode_t *cnP;
224  int rc;
225
226  ENTER(0);
227  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_ENTER,
228         "gpfs_s_read_inode2 enter: inode 0x%lX inode %d\n",
229         iP, inum);
230  /* BKL is sometimes held at entry */
231
232#if HAS_SOP_ALLOC_INODE
233  cnP = (cxiNode_t *)&((struct gpfs_bloated_inode *)iP)->cxiNode;
234#else
235  /* allocate cxiNode_t */
236  if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
237  {
238    /* need to allocate separate storage for the cxiNode_t */
239    cnP = (cxiNode_t *)cxiMallocUnpinned(CXINODE_SIZE);
240    if (cnP == NULL)
241      goto exit_bad;
242  }
243  else
244  {
245    /* we can store the cxiNode_t in the part of the iP->u
246     * union after the PRVINODE field
247     */
248    cnP = (cxiNode_t *)(&iP->PRVINODE + 1);
249  }
250#endif
251
252  memset(cnP, 0, CXINODE_SIZE);
253
254  /*TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE_1,
255         "gpfs_s_read_inode2: iP 0x%lX cnP 0x%lX uSize-void* %d nodeSize %d",
256         iP, cnP, sizeof(iP->PRVINODE) - sizeof(void *), CXINODE_SIZE);
257 
258  */
259  /* connect cxiNode_t to struct inode */
260  cnP->osNodeP = iP;
261  iP->PRVINODE = cnP;
262
263  /* get inode attributes */
264  privVfsP = VP_TO_PVP(iP);
265  rc = gpfs_ops.gpfsInodeRead(privVfsP, cnP, inum, opaque);
266
267  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_EXIT,
268         "gpfs_s_read_inode2 exit: inode 0x%lX rc %d",
269         iP, rc);
270
271  if (rc == 0)
272  {
273    EXIT(0);
274    return;  // success!
275  }
276
277  /* undo cxiNode_t allocation */
278  cnP->osNodeP = NULL;
279  iP->PRVINODE = NULL;
280
281#if !HAS_SOP_ALLOC_INODE
282  if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
283    cxiFreeUnpinned(cnP);
284#endif
285
286exit_bad:
287  /* make_bad_inode will initialize iP so that all operations return EIO;
288     also set i_nlink to zero so that the bad inode will be thrown out of
289     the cache at the next opportunity */
290  make_bad_inode(iP);
291  iP->i_nlink = 0;
292  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_EXIT_BAD,
293         "gpfs_s_read_inode2 exit: inode 0x%lX rc BADINODE",
294         iP);
295
296  if (rc)
297    cxiErrorNFS(rc);
298
299  EXIT(0);
300}
301
302/* The following routine should never be called, since we have a read_inode2
303   operation.  However, knfsd checks the operation table and refuses to export
304   a file system if its read_inode operation ptr is NULL.  Hence, we need to
305   have one, even if it never gets called. */
306void
307gpfs_s_read_inode(struct inode *iP)
308{
309  /* only iget will use read_inode; this shouldn't happen as long as
310     gpfs_nfsd_iget is being invoked via fh_to_dentry/gpfs_fh_to_dentry */
311  ENTER(0);
312  TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_READINODE_HUH,
313         "gpfs_s_read_inode: ? calling make_bad_inode");
314  make_bad_inode(iP);
315  EXIT(0);
316}
317
318
319/* The following routine is called from iput when the i_count goes to zero and
320   the link count in the inode is zero, which presumably means that the file
321   was deleted.  If so, we should free the disk space occupied by the file. */
322void
323gpfs_s_delete_inode(struct inode *iP)
324{
325  cxiNode_t *cnP;
326  ext_cred_t eCred;
327  Boolean isGPFS = cxiIsGPFSThread();
328  struct gpfsVfsData_t *privVfsP;
329
330  ENTER(0);
331  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_DELETE_INODE,
332        "gpfs_s_delete_inode enter: inode 0x%lX inode %d gpfsThread %d\n",
333   iP, iP->i_ino, isGPFS);
334  TraceBKL();
335
336#if MUST_TRUNCATE_INODE_PAGES
337  truncate_inode_pages(&iP->i_data, 0);
338#endif
339
340  cnP = VP_TO_CNP(iP);
341
342  if (!cnP) 
343  {
344    /* The cxiNode_t is allocated in gpfs_s_read_inode2, so if cnP is NULL,
345       this means gpfs_s_read_inode2 failed and has marked this as a bad
346       inode.  No further actions necessary in this case. */
347    goto xerror;
348  }
349
350  if (TestCtFlag(cnP, destroyIfDelInode))
351  {
352    privVfsP = VP_TO_PVP(iP);
353    DBGASSERT(privVfsP != NULL);
354
355    /* ?? "eCred is passed all the way to the daemon, and then is ignored
356       there," FBS 5/24/01 */
357    setCred(&eCred);
358
359    gpfs_ops.gpfsInodeDelete(privVfsP, cnP, isGPFS, &eCred);
360
361    iP->PRVINODE = NULL;
362    cnP->osNodeP = NULL;
363
364#if !HAS_SOP_ALLOC_INODE
365    /* If necessary, free the cxiNode_t structure which was allocated
366     * in gpfs_s_read_inode2.
367     */
368    if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
369      cxiFreeUnpinned(cnP);
370#endif
371  }
372
373xerror:
374  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_DELETE_INODE_1,
375         "gpfs_s_delete_inode exit: inode 0x%lX cnP 0x%lX\n",
376         iP, cnP);
377
378  clear_inode(iP);
379  EXIT(0);
380}
381
382int 
383gpfs_s_notify_change(struct dentry *dentryP, struct iattr *attrP)
384{
385  int rc;
386
387  ENTER(0);
388  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_NOTIFY_ENTER,
389         "gpfs_s_notify_change enter: inode 0x%lX attr 0x%lX\n",
390         dentryP->d_inode, attrP);
391  TraceBKL();
392
393  rc = gpfs_i_setattr_internal(dentryP->d_inode, attrP);
394
395  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_NOTIFY_EXIT,
396         "gpfs_s_notify_change exit: inode 0x%lX rc %d\n",
397         dentryP->d_inode, rc);
398  EXIT(0);
399  if (rc)
400    return (-rc);
401  return rc;
402}
403
404/* put_super is called just before the super_block is freed in do_unmount */
405void 
406gpfs_s_put_super(struct super_block *sbP)
407{
408  int rc = 0;
409  struct gpfsVfsData_t *privVfsP;
410
411  ENTER(0);
412  LOGASSERT(sbP != NULL);
413  LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
414  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_PUTSUPER_ENTER,
415         "gpfs_s_put_super enter: sbP 0x%lX sbP->s_dev 0x%X\n",
416         sbP, sbP->s_dev);
417  TraceBKL();
418
419  rc = cxiUnmount(sbP, false, true);
420
421  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_PUTSUPER_EXIT,
422         "gpfs_s_put_super exit: rc %d\n", rc);
423
424  EXIT(0);
425}
426
427int 
428gpfs_s_statfs(struct dentry *den, struct KSTATFS *bufP)
429{
430  struct super_block *sbP = den->d_sb;
431  int rc;
432  int code = 0;
433  int len = sizeof(struct KSTATFS);
434  struct gpfsVfsData_t *privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
435  cxiStatfs_t statfs; 
436
437  VFS_STAT_START(statfsCall);
438  ENTER(0);
439  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_STATFS_ENTER,
440         "gpfs_s_statfs enter: sbP 0x%lX len %d\n", sbP, len);
441  memset(bufP, 0, len);
442  /* BKL is held at entry */
443
444  LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
445  LOGASSERT(privVfsP != NULL);
446  rc = gpfs_ops.gpfsStatfs(privVfsP, &statfs);
447  if (rc)
448  {
449    rc = -rc;
450    code = 1;
451    goto xerror;
452  }
453
454  bufP->f_type = GPFS_SUPER_MAGIC;
455  bufP->f_bsize = statfs.f_bsize;
456  bufP->f_blocks = statfs.f_blocks;
457  bufP->f_bfree = statfs.f_bfree;
458  bufP->f_bavail = statfs.f_bavail;
459  bufP->f_files = statfs.f_files;
460  bufP->f_ffree = statfs.f_ffree; 
461  bufP->f_namelen = statfs.f_name_max;
462  bufP->f_fsid.val[0] = statfs.f_fsid.val[0];
463  bufP->f_fsid.val[1] = statfs.f_fsid.val[1];
464
465  /* If filesystem size cannot be represented by the OS statfs structure,
466     increase the "block size" and reduce the numbers */
467  if (sizeof(bufP->f_blocks) < sizeof(statfs.f_blocks))
468  {
469    while (bufP->f_blocks != statfs.f_blocks)
470    {
471      statfs.f_bsize  <<= 1;  // double f_bsize
472      statfs.f_blocks >>= 1;  // halve the rest
473      statfs.f_bfree  >>= 1;
474      statfs.f_bavail >>= 1;
475      bufP->f_bsize = statfs.f_bsize;
476      bufP->f_blocks = statfs.f_blocks;
477      bufP->f_bfree = statfs.f_bfree;
478      bufP->f_bavail = statfs.f_bavail;
479    }
480  }
481
482xerror:
483  TRACE7(TRACE_VNODE, 1, TRCID_LINUXOPS_STATFS_EXIT,
484         "gpfs_s_statfs exit: f_blocks %lld f_bfree %lld f_files %d f_free %d "
485         "f_bsize %d code %d rc %d\n",
486         statfs.f_blocks, statfs.f_bfree, bufP->f_files, bufP->f_ffree,
487         bufP->f_bsize, code, rc);
488
489  if (rc)
490    cxiErrorNFS(rc);
491
492  VFS_STAT_STOP;
493  EXIT(0);
494  return rc;
495}
496
497/* umount_begin is called only when the force option is used */
498void 
499#if LINUX_KERNEL_VERSION >= 2061700
500gpfs_s_umount_begin(struct vfsmount *vfs, int flags)
501#else
502gpfs_s_umount_begin(struct super_block * sbP)
503#endif
504{
505  int dmrc = 0;
506  struct gpfsVfsData_t *privVfsP;
507#if LINUX_KERNEL_VERSION >= 2061700
508  struct super_block * sbP;
509  LOGASSERT(vfs != NULL);
510  LOGASSERT(vfs->mnt_sb != NULL);
511  sbP = vfs->mnt_sb;
512#endif
513
514  ENTER(0);
515  LOGASSERT(sbP != NULL);
516  LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
517  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_UMOUNT_ENTER,
518         "gpfs_s_umount_begin enter: sbP 0x%lX sbP->s_dev 0x%X "
519         "root vfsmount 0x%X pwd vfsmount 0x%X\n", sbP, sbP->s_dev, 
520         current->fs ? current->fs->rootmnt : NULL,
521         current->fs ? current->fs->pwdmnt : NULL);
522  TraceBKL();
523
524  privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
525
526  /* We may need to generate a preunmount DMAPI event, since this
527   * is a user initiated force unmount and we need to inform any
528   * DM application before we start flushing out VFS users.
529   */
530  if (privVfsP)
531  {
532#ifdef DMAPI
533    Boolean doDMEvents = false;
534    struct dentry *dP = NULL;
535    struct inode *iP = NULL;
536    cxiNode_t *cnP = NULL;
537
538    dP = sbP->s_root;
539    if (dP != NULL)
540      iP = dP->d_inode;
541    if (iP != NULL)
542      cnP = VP_TO_CNP(iP);
543
544    /* Generate preunmount event.  We have to present this because
545     * vfsUserCleanup() may potentially kill processes on forced unmount.
546     * Since the DM application may have an open file in this file system
547     * we have to warn him.   The DM application may not however receive
548     * the final unmount event if we can't get everything released.  If
549     * VFS users still exist after this, then no mntput() and subsequent
550     * gpfs_s_put_super() will occur.
551     */
552    dmrc = gpfs_ops.gpfsDmUnmountEvent(true, true, privVfsP, cnP,
553                                       &doDMEvents, NULL, NULL, NULL, 0);
554#endif
555
556    /* Force unmount */
557    vfsUserCleanup(sbP, privVfsP, true);
558
559    if (sbP->s_root)
560      printDentryTree(sbP->s_root, 10);
561  }
562
563exit:
564  TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_UMOUNT_EXIT,
565         "gpfs_s_umount_begin exit: sbP 0x%lX privVfsP 0x%lX dmrc %d "
566         "s_active %d s_count 0x%X active files %d\n", 
567         sbP, privVfsP, dmrc, atomic_read(&sbP->s_active), 
568         sbP->s_count, !list_empty(&sbP->s_files));
569
570  /* Module count is decremented later on in do_unmount via gpfs_s_put_super */
571  EXIT(0);
572}
573
574int 
575gpfs_s_remount(struct super_block *sbP, int *flags, char *data)
576{
577  ENTER(0);
578  TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_REMOUNT,
579         "gpfs_s_remount: called\n");
580  TraceBKL();
581  EXIT(0);
582  return 0;
583}
584
585void 
586gpfs_s_write_inode(struct inode *inode)
587{
588  ENTER(0);
589  TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_WRITEINODE,
590         "gpfs_s_write_inode: called\n");
591  TraceBKL();
592  EXIT(0);
593}
594
595
596/* This routine is called from iput() just before the storage of the
597   Linux inode is freed */
598void
599gpfs_s_clear_inode(struct inode *iP)
600{
601  int code = 0;
602  struct gpfsVfsData_t *privVfsP;
603  cxiNode_t *cnP; 
604
605  ENTER(0);
606  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_CLEARINODE,
607         "gpfs_s_clear_inode enter: inode 0x%lX inode %d generic_ip 0x%lX\n",
608         iP, iP->i_ino, iP->PRVINODE);
609  TRACE3(TRACE_VNODE, 5, TRCID_LINUXOPS_CLEARINODE_DETAILS, 
610         "gpfs_s_clear_inode: cnP 0x%lX privVfsP 0x%lX tooBig %d\n",
611         VP_TO_CNP(iP), VP_TO_PVP(iP), 
612         NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE));
613
614  DBGASSERT(atomic_read((atomic_t *)&iP->i_count) == 0);
615
616  cnP = VP_TO_CNP(iP);
617  privVfsP = VP_TO_PVP(iP);
618
619  if (cnP)
620  {
621    if (privVfsP)
622      gpfs_ops.gpfsRele(privVfsP, cnP, (void *)iP, vnOp);
623
624    /* if necessary, free the cxiNode_t storage that we allocated in
625       gpfs_s_read_inode2 */
626    if (NODE_TOO_BIG_FOR_OSNODE(CXINODE_SIZE))
627    {
628      iP->PRVINODE = NULL;
629      cxiFreeUnpinned(cnP);
630    }
631  }
632
633xerror:
634  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_CLEARINODE_EXIT,
635         "gpfs_s_clear_inode exit: inode 0x%lX generic_ip 0x%lX code %d\n",
636         iP, iP->PRVINODE, code);
637  EXIT(0);
638}
639
640void 
641gpfs_s_write_super(struct super_block * sbP)
642{
643  int rc = 0;
644  struct gpfsVfsData_t *privVfsP;
645
646  ENTER(0);
647  LOGASSERT(sbP != NULL);
648  LOGASSERT(sbP->s_magic == GPFS_SUPER_MAGIC);
649  privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
650  LOGASSERT(privVfsP != NULL);
651  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_WRITESUPER,
652         "gpfs_s_write_super enter: sbP 0x%lX\n", sbP);
653
654  /* We have to either adhere to the s_dirt semantics or
655   * ignore all syncs.  Once a file systems write_super gets
656   * called, sync_supers() restarts the super block scan.  If
657   * we don't turn off s_dirt then sync_supers() will be caught
658   * in a loop.  Alternatively if we only ignored kupdated then
659   *
660   * 1) a person could write to a file (which turns on s_dirt)
661   * 2) kupdated could run (and be ignored) but the s_dirt is turned off
662   * 3) the user attempts a sync from the command line sync, but that
663   *    does nothing since s_dirt was off
664   * 4) the user expected the sync to have done something before he
665   *    halts the machine.
666   */
667  sbP->s_dirt = 0;
668
669   /*
670   * jcw: Another way to handle this would be never turn on the s_dirt flag,
671   * and not to even have a write_super callback.  Then neither kupdated nor
672   * sync would do anything.  The sync watchdog in the GPFS daemon would
673   * substitute for kupdated.  To regain the semantics of sync, we would
674   * create dummy inodes that would have I_DIRTY set, and link one such inode
675   * onto each GPFS superblock.  Then sync would notice the dirty inodes
676   * and call back through their write_inode callbacks.  This would be
677   * the only use of I_DIRTY by GPFS, so it could be reinterpreted to mean
678   * "sync this file system".  For now, s_dirt is still set and reset, but
679   * s_dirt gets reset for all file systems before they have all been synced,
680   * so the race described above can occur.  The permanently-dirty inode
681   * needs to be implemented to fix this.
682   */
683/*  goto xerror; */
684
685  /* BKL is held at entry */
686  TRACE0(TRACE_VNODE, 3, TRCID_LINUXOPS_WRITESUPER_3,
687         "gpfs_s_write_super: performing sync");
688
689  rc = gpfs_ops.gpfsSyncfs(privVfsP);
690  if (rc) {
691    cxiErrorNFS(rc);
692    rc = -rc;
693  }
694xerror:
695  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_WRITESUPER_5,
696         "gpfs_s_write_super exit: sbP 0x%lX rc %d\n", sbP, rc);
697  EXIT(0);
698}
699
700
701
702#if LINUX_KERNEL_VERSION >= 2060000
703int gpfs_get_sb(struct file_system_type *fsTypeP,
704            int flags, const char *devNameP, void *dataP, struct vfsmount *mnt)
705{
706  struct super_block *sbP;
707  int sb_ret = 0;
708
709  ENTER(0);
710  sb_ret = get_sb_nodev(fsTypeP, flags, dataP, gpfs_fill_super, mnt);
711
712  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_GET_SB,
713           "gpfs_get_sb: flags 0x%X dataP 0x%X sbP %d\n",
714         flags, dataP, sb_ret);
715         
716  EXIT(0);
717  return sb_ret;
718}
719
720int
721gpfs_fill_super(struct super_block *sbP, void *dataP, int silent)
722#else
723struct super_block *
724gpfs_fill_super(struct super_block *sbP, void *dataP, int silent)
725#endif
726{
727  int kernel_unlock = 0;
728  struct inode *rootIP = NULL;
729  struct dentry *rootDP = NULL;
730  char *myBufP = NULL;
731  char *sgNameP;
732  char *strP;
733  char *mountpointP;
734  char *optionsP;
735  int rc = 0;
736  int mountHelperID = -1;
737  int code = 0;
738  int namelen;
739  struct gpfsVfsData_t *privVfsP;
740  cxiNode_t *cnRootP;
741  cxiIno_t rootINum;
742  char bname[BDEVNAME_SIZE];
743  Boolean restricted = false;
744
745  ENTER(0);
746  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_1,
747         "gpfs_fill_super enter: sbP 0x%lX dev 0x%X silent %d data '%s'\n",
748         sbP, sbP->s_dev, silent, ((char *)dataP == NULL) ? "" : dataP);
749
750  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_2,
751         "gpfs_fill_super: dev name '%s'\n", 
752         (sbP->s_bdev == NULL) ? "" : SBLOCK_BDEVNAME(sbP,bname));
753
754  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_3,
755         "gpfs_fill_super: s_flags 0x%x\n", sbP->s_flags);
756
757  /* A mount increases reference count on module */
758#if LINUX_KERNEL_VERSION < 2060000
759  MY_MODULE_INCREMENT();
760#endif
761
762  if (dataP == NULL || *(char *)dataP == '\0')
763  {
764    rc = EINVAL;
765    code = 1;
766    goto xerror;
767  }
768   
769  if (strlen((char *)dataP) > CXI_MAXPATHLEN)
770  {
771    rc = ENAMETOOLONG;
772    code = 2;
773    goto xerror;
774  }
775
776  sbP->s_magic = GPFS_SUPER_MAGIC;
777  sbP->s_op = &gpfs_sops;
778#if LINUX_KERNEL_VERSION > 2060000
779  sbP->s_export_op = &gpfs_export_ops;
780#endif
781
782  SBLOCK_PRIVATE(sbP) = NULL;
783
784  sbP->s_root = NULL;
785  sbP->s_blocksize = 0;
786  sbP->s_blocksize_bits = 0;
787  /* maximum filesize (avoid sign bit due to use with loff_t) */
788  sbP->s_maxbytes = 0x7FFFFFFFFFFFFFFFULL;
789
790  myBufP = (char *)cxiMallocPinned(strlen((char *)dataP) + 1);
791  if (myBufP == NULL)
792  {
793    code = 3;
794    rc = ENOMEM;
795    goto xerror;
796  }
797  strcpy(myBufP, (char *)dataP);
798  optionsP = myBufP;
799
800  /* This is the syntax parser for the options field.  At
801   * least one option must be "dev=<devname>".
802   */
803  sgNameP = NULL;
804  strP = myBufP;
805
806  while(strP)
807  {
808    if (!strncmp(strP, "dev=", 4))
809    {
810      sgNameP = (char *)strchr(strP, '=') + 1;
811      strP = (char *)strchr(strP, ','); /* more options */
812      if (strP)
813        namelen = strP - sgNameP;
814      else
815        namelen = strlen(sgNameP);
816
817      /* Copy the sgName into the first part of the
818       * buffer, null terminate it, then append the
819       * full option list.
820       */
821      strncpy(myBufP, sgNameP, namelen);
822      sgNameP = myBufP;
823      sgNameP[namelen] = '\0';
824
825      optionsP = myBufP + namelen + 1;
826      /* Move the options next (if there are any) */
827      strcpy(optionsP, strP?(char *)strP:"");
828      break;
829    }
830    else
831    {
832      strP = (char *)strchr(strP, ',');
833      if (strP) strP++;
834    }
835  }
836
837  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_OPTIONS,
838         "gpfs_fill_super: optionsP \"%s\"\n",
839         strP ? (char *) strP:"");
840
841  while (strP)
842  {
843    /* look for rs option */
844    strP = (char *)strchr(strP, ',');
845    if (strP) 
846      strP++;
847    if (strP)
848    {
849      if (!strncmp(strP, "rs", 2))
850      {
851        restricted = true;
852        break;
853      }
854    }
855  }
856
857  if (sgNameP == NULL || *sgNameP == '\0')
858  {
859    code = 4;
860    rc = EINVAL;
861    goto xerror;
862  }
863  mountpointP = sgNameP;  /* ??? */
864
865  if (restricted)
866  {
867    /* restricted mount - make it readonly */
868    sbP->s_flags |= MS_RDONLY;
869  }
870
871  strcpy(mmfs_path, bin_path);
872  strcat(mmfs_path, "/mmfsmount");
873
874  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_STARTHELPER,
875         "gpfs_fill_super: start mount helper '%s'\n", mmfs_path);
876
877  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_STARTHELPER1,
878         "gpfs_fill_super: s_flags 0x%x (rs %d), mountpointP %s\n", 
879          sbP->s_flags, restricted, mountpointP);
880
881  if (strlen(sgNameP) > CXI_MAXPATHLEN)
882  {
883    rc = ENAMETOOLONG;
884    code = 5;
885    goto xerror;
886  }
887  rc = gpfs_ops.gpfsReady();
888  if (rc != 0)
889  {
890    rc = EAGAIN;
891    code = 6;
892    goto xerror;
893  }
894
895  /* Start a new process that will receive and forward all messages during the
896   * mount process to the mount invoker. The current process will wait for
897   * this new process (in HandleMBUnmount()) and the daemon to be connected with
898   * a socket and only than call SFSMountFS() that does the real mount work.
899   */
900  strcpy(&mountCmd[2], sgNameP);               // "M /dev/gpfs1"
901  if (cxiHasMountHelper())
902    mountHelperID = fork_mount_helper(mountCmd);
903  else
904  {
905    /* Use special pid (-1) when not using mount helper */
906    mountHelperID = -1;
907  }
908
909  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_HELPERRC,
910         "gpfs_fill_super: mount helper mountHelperID %d\n", mountHelperID);
911
912#if LINUX_KERNEL_VERSION < 2060000
913  /* BKL is not held during get_sb in 2.6 */
914  if (kernel_locked())
915  {
916    unlock_kernel();
917    kernel_unlock = 1;
918  }
919#else
920  /*
921   * In 2.5, a bunch of calls originating from sys_sync will try to down
922   * s_umount and block, because it's already downed in get_sb_nodev, and won't
923   * be upped until get_sb returns (in do_kern_mount).  During gpfsMount, we'll
924   * call mmcommon getEFOption, and that will at some point try to do a sync
925   * (e.g. in gpfsClusterInit, two times), and mount will deadlock.  One way
926   * to fix this is to take out relevant sync's in the shell scripts, but this
927   * is dodgy because we might end up pulling a new sdr from another node, and
928   * that's a long and compelex path, I don't think one can guarantee there
929   * won't be any syscalls that desire s_umount along the way.  Need to think
930   * how to fix this right.  For now, up the semaphore for the duration of
931   * the gpfsMount (possibly opening up a window for other races e.g. with
932   * unmount).
933   */
934  up_write(&sbP->s_umount);
935#endif
936  rc = gpfs_ops.gpfsMount((void *)sbP, PAGE_SIZE, sgNameP, mountpointP,
937                 optionsP,
938                 (struct gpfsVfsData_t **)&(SBLOCK_PRIVATE(sbP)),
939                 &cnRootP,      /* returned root cxiNode_t */
940                 &rootINum,     /* returned root inode number */
941                 NULL,          /* not a soft mount */
942                 mountHelperID  /* mount helper id */,
943                 -1U,           /* no unique mount ID specified */
944                 (sbP->s_flags & MS_RDONLY), /* is it readonly */
945                 true);   /* allocate pinned memory */
946
947#if LINUX_KERNEL_VERSION < 2060000
948  /* BKL is not held during get_sb in 2.5 */
949  if (kernel_unlock)
950    lock_kernel();
951#else
952  down_write(&sbP->s_umount);
953#endif
954
955  if (rc)
956  {
957    code = 7;
958    goto xerror;
959  }
960
961  privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
962  DBGASSERT(cnRootP != NULL);
963  rootIP = (struct inode *)cnRootP->osNodeP;
964
965  DBGASSERT(rootIP != NULL);
966  DBGASSERT(rootIP->PRVINODE == cnRootP);
967  DBGASSERT(cnRootP->osNodeP == rootIP);
968
969  /* Successful mount in daemon.  Allocate root directory cache entry */
970  rootDP = d_alloc_root(rootIP);
971  if (!rootDP)
972  {
973    rc = gpfs_ops.gpfsUnmount(privVfsP, true);
974    if (rc == 0 || rc == ENOSYS)
975      gpfs_ops.gpfsFinishUnmount(privVfsP);
976
977    code = 8;
978    goto xerror;
979  }
980
981  rootDP->d_op = &gpfs_dops_valid;
982  sbP->s_root = rootDP;
983
984  sbP->s_dirt = 1;            /* keep it on for sync to work */
985
986  if (myBufP != NULL)
987    cxiFreePinned(myBufP);
988
989#if (LINUX_KERNEL_VERSION < 2060000)
990  unlock_super(sbP);
991#endif
992  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_SUCCEED,
993         "gpfs_fill_super exit: success sbP 0x%lX\n", sbP);
994  EXIT(0);
995#if LINUX_KERNEL_VERSION >= 2060000
996  return 0;
997#else
998  return sbP;
999#endif
1000
1001xerror:
1002  if (rootDP)
1003    dput(rootDP);
1004  if (rootIP)
1005    iput(rootIP);
1006
1007  if (myBufP != NULL)
1008    cxiFreePinned(myBufP);
1009
1010#if LINUX_KERNEL_VERSION < 2060000
1011  unlock_super(sbP);
1012
1013  sbP->s_dev = 0;
1014#endif
1015
1016  /* An unmount decrements module use count */
1017#if LINUX_KERNEL_VERSION < 2060000
1018  MY_MODULE_DECREMENT();
1019#endif
1020
1021  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_READSUPER_FAILED,
1022         "gpfs_fill_super: failed code %d rc %d\n", code, rc);
1023  EXIT(0);
1024#if LINUX_KERNEL_VERSION >= 2060000
1025  return -rc;
1026#else
1027  return NULL;
1028#endif
1029}
1030
1031int
1032gpfs_reg_fs()
1033{
1034  int rc;
1035
1036  ENTER(0);
1037  spin_lock_init(&inode_lock);
1038
1039  rc = register_filesystem(&gpfs_fs_type);
1040  if (rc)
1041    goto xerror;
1042
1043  /* We create a dummy super block for purposes of instantiating
1044   * a shutdown file descriptor.  When the daemon dies this file
1045   * will be closed and its special ops will be called. 
1046   * See cxiRegisterCleanup()
1047   */
1048  shutdownSuperP = cxiMallocPinned(sizeof(struct super_block));
1049  if (!shutdownSuperP)
1050  {
1051    unregister_filesystem(&gpfs_fs_type);
1052    rc = -ENOMEM;
1053    goto xerror;
1054  }
1055 
1056  SET_SUPER_BLOCK(shutdownSuperP, &null_sops);
1057
1058xerror:
1059  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REGFS,
1060         "gpfs_reg_fs shutdownSuperP 0x%lX rc %d\n", 
1061         shutdownSuperP, rc);
1062  EXIT(0);
1063  return rc;
1064}
1065
1066void
1067gpfs_unreg_fs()
1068{
1069  int rc;
1070
1071  ENTER(0);
1072  rc = unregister_filesystem(&gpfs_fs_type);
1073
1074  if (shutdownSuperP)
1075  {
1076    UNSET_SUPER_BLOCK(shutdownSuperP);
1077    cxiFreePinned(shutdownSuperP);
1078    shutdownSuperP = NULL;
1079  }
1080
1081  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_UNREGFS,
1082         "gpfs_unreg_fs rc %d\n", rc);
1083  EXIT(0);
1084}
1085
1086void
1087kill_mmfsd(void)
1088{
1089  ENTER(0);
1090  if (mmfsd_id != -1)
1091  {
1092    TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_KILLMMFSD,
1093          "kill_mmfsd: pid %X\n", mmfsd_id);
1094
1095    kill_proc(mmfsd_id, SIGTERM, 1);
1096    if (mmfsd_id != -1)
1097#if LINUX_KERNEL_VERSION > 2060000
1098      wait_event(pwq,0);
1099#else
1100      sleep_on(&pwq);
1101#endif
1102  }
1103  EXIT(0);
1104}
1105
1106/*
1107 * Note: since this function is executed as kernel_thread "main" routine,
1108 * it may not be safe to use stack at all, e.g. call non-inlined functions,
1109 * at least in the success path.  See comments e.g. in asm-i386/unistd.h
1110 */
1111int
1112exec_mmfs(void *nothing)
1113{
1114  static char *argv[] = { mmfs_path, mount_opt, NULL };
1115  static char *envp[] = { "HOME=/", NULL };
1116  int rc;
1117
1118  ENTER(0);
1119  set_fs(KERNEL_DS);
1120
1121  rc = EXEC_HELPER(mmfs_path, argv, envp, 1 /* wait if possible */);
1122
1123xerror:
1124  if(rc)
1125    TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_EXECMMFS_EXIT,
1126         "exec_mmfs: exit rc -1 errno %d path %s\n", errno, mmfs_path);
1127  EXIT(0);
1128  return rc;
1129}
1130
1131int
1132fork_mount_helper(char *data)
1133{
1134
1135  ENTER(0);
1136  strcpy(mount_opt, data);
1137  mount_id = kernel_thread(exec_mmfs, 0, 0);
1138
1139  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_FORK_MOUNTHELPER,
1140        "fork_mount_helper: new pid %d\n", mount_id);
1141
1142  EXIT(0);
1143  return mount_id;
1144}
1145
1146/* Set device id and other information for a file system being mounted */
1147int
1148cxiSetMountInfo(void *osVfsP, cxiDev_t sgDevID,
1149                int bsize, void *osRootNodeP, cxiNode_t *cnRootP,
1150                Boolean *releRootP, void *gnRootP, 
1151                fsid_t fsid)/* (out) maintain hold on root */
1152{
1153  struct super_block *sbP = (struct super_block *)osVfsP;
1154  struct inode *rootIP = (struct inode *)osRootNodeP; // root dir inode
1155  int i;
1156
1157  ENTER(0);
1158  TRACE4(TRACE_VNODE, 1, TRCID_SET_MOUNT_INFO,
1159         "cxiSetMountInfo: sbP 0x%lX rootIP 0x%lX cnRootP 0x%lX "
1160         "gnRootP 0x%lX\n", sbP, rootIP, cnRootP, gnRootP);
1161  DBGASSERT(sbP != NULL);
1162
1163  /* This is the auto remount case where mmfsd died/killed and restarted. */
1164  if (gnRootP == cnRootP)
1165  {
1166    /* Since the OS independent layer looked up and held the
1167     * root vnode, we've got too many hold counts for a reconnect.
1168     * Tell upper layer that we must release.
1169     */
1170    *releRootP = true;
1171  }
1172  else
1173  {
1174    /* Don't attempt to release the root VFS node */
1175    *releRootP = false;
1176    sbP->s_blocksize = bsize;
1177    for (i = sbP->s_blocksize, sbP->s_blocksize_bits = 0; i != 1; i >>= 1)
1178      sbP->s_blocksize_bits++;
1179  }
1180  if (rootIP != NULL)
1181  {
1182    DBGASSERT(rootIP->i_ino == INODENUM_ROOTDIR_FILE);
1183    DBGASSERT(rootIP->PRVINODE == cnRootP);
1184  }
1185
1186  EXIT(0);
1187  return 0;
1188}
1189
1190/* Attempt whatever we can to get holders of VFS elements
1191 * (dcache entries, etc) to leave.
1192 */
1193int
1194vfsUserCleanup(struct super_block *sbP, 
1195               struct gpfsVfsData_t *privVfsP, Boolean force)
1196{
1197  struct siginfo sinfo;
1198  struct task_struct *g, *tsP;
1199  Boolean killit;
1200  int rc;
1201
1202  ENTER(0);
1203
1204#ifndef GPFS_ARCH_POWER
1205  /* Forced unmount doesn't really work very well on Linux since
1206   * the VFS layer is very stateful.  If a process is sitting in
1207   * the file system, its vmount count will not go to zero and a
1208   * proper unmount can occur.  We're experimenting with the
1209   * semantics (akin to umount -k on other OSes) where processes
1210   * are killed if they are within a forced unmounted file system.
1211   *
1212   * Note that this doesn't get everyone.   If you have a file open
1213   * in GPFS but don't have your current working directory in GPFS
1214   * then you're not killed.   To kill those user (or close their
1215   * files) you'd have to traipse thru the file table.  There's
1216   * a lot of OS specific code there that we wouldn't want to get
1217   * into.
1218   */
1219  if (force)
1220  {
1221    sinfo.si_signo = SIGKILL;
1222    sinfo.si_errno = 0;
1223    sinfo.si_code = SI_KERNEL;
1224    sinfo.si_addr = vfsUserCleanup;
1225    sinfo.si_pid = current->pid;
1226    sinfo.si_uid = current->uid;
1227
1228    // read_lock(&tasklist_lock);
1229    rcu_read_lock();
1230
1231    DO_EACH_THREAD(g,tsP)
1232    {
1233      task_lock(tsP);
1234      if (tsP->fs && tsP->fs->pwdmnt && tsP->fs->pwdmnt->mnt_sb == sbP)
1235        killit = true;
1236      else
1237        killit = false;
1238      task_unlock(tsP);
1239
1240      if (killit)
1241        send_sig_info(SIGKILL, &sinfo, tsP);
1242    } WHILE_EACH_THREAD(g,tsP);
1243    // read_unlock(&tasklist_lock);
1244    rcu_read_unlock();
1245  }
1246#endif
1247       
1248  /* Purge cached OS VFS nodes/cxiNodes. */
1249  rc = gpfs_ops.gpfsUncache(privVfsP);
1250
1251  EXIT(0);
1252  return rc;
1253}
1254
1255/* Called by gpfs_s_put_super() when the last holder of the superblock
1256 * is gone.   We should be able to successfully clean up and become
1257 * unmounted.
1258 */
1259int 
1260cxiUnmount(void *osVfsP, Boolean force, Boolean doDMEvents)
1261{
1262  int rc = 0;
1263  int dmrc = 0;
1264  struct super_block *sbP = (struct super_block *)osVfsP;
1265  struct gpfsVfsData_t *privVfsP;
1266#ifdef DMAPI
1267  Boolean dmDoUnmountEvent = false;
1268  void *sgUidP = NULL;
1269  void *eventlistP = NULL;
1270  void *sessLocP = NULL;
1271  struct dentry *dP = NULL;
1272  struct inode *iP = NULL;
1273  cxiNode_t *cnP = NULL;
1274#endif
1275
1276  ENTER(0);
1277  LOGASSERT(sbP != NULL);
1278  privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
1279
1280  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CXIUNMOUNT_ENTER,
1281         "cxiUnmount: enter privVfsP 0x%lX sbP 0x%lX force %d doDM %d\n", 
1282         privVfsP, sbP, force, doDMEvents);
1283
1284  if (privVfsP == NULL)
1285    goto exit;
1286
1287#ifdef DMAPI
1288  dP = sbP->s_root;
1289  if (dP != NULL)
1290    iP = dP->d_inode;
1291  if (iP != NULL)
1292    cnP = VP_TO_CNP(iP);
1293
1294  /* Generate preunmount event */
1295  if (doDMEvents)
1296  {
1297    rc = gpfs_ops.gpfsDmUnmountEvent(true, force, privVfsP, cnP,
1298                                     &dmDoUnmountEvent, &sgUidP, 
1299                                     &eventlistP, &sessLocP, 0);
1300    /* We should continue unmount even if it fails. Otherwise, linux
1301       screwup and cannot remount unless we shutdown the daemon */
1302  }
1303#endif
1304 
1305  /* The superblock is unallocated by the kernel after gpfs_s_put_super /
1306     cxiUnmount, regardless of any errors here because it doesn't check
1307     a return code from the filesystem specific put_super call, so we need to
1308     proceed through these calls even if an error occurs; not cleaning up
1309     things in gpfsFinishUnmount (ie, the gpfs mount list) after an error
1310     with unmount causes havoc when the daemon later restarts. */
1311
1312  rc = vfsUserCleanup(sbP, privVfsP, force);
1313  if (rc == ENOSYS)
1314    rc = 0;
1315
1316  rc = gpfs_ops.gpfsUnmount(privVfsP, force);
1317  if (rc == ENOSYS)
1318    rc = 0; 
1319
1320  gpfs_ops.gpfsFinishUnmount(privVfsP);
1321  SBLOCK_PRIVATE(sbP) = NULL;
1322
1323#ifdef DMAPI
1324  if (dmDoUnmountEvent)
1325    dmrc = gpfs_ops.gpfsDmUnmountEvent(false, force, NULL, NULL,
1326                                       &dmDoUnmountEvent, &sgUidP, 
1327                                       &eventlistP, &sessLocP, rc);
1328#endif
1329  sbP->s_dirt = 0;
1330
1331  printSuperList(sbP);
1332
1333  /* An unmount decrements module use count */
1334#if LINUX_KERNEL_VERSION < 2060000
1335  MY_MODULE_DECREMENT();
1336#endif
1337
1338exit:
1339  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_CXIUNMOUNT_EXIT,
1340         "cxiUnmount: exit rc %d dmrc %d\n", rc, dmrc);
1341  EXIT(0);
1342  return rc;
1343}
1344
1345int
1346cxiReactivateOSNode(void *osVfsP, cxiNode_t *cnP, void **osNodePP)
1347{
1348  TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_REACTIVATE_VNODE,
1349         "cxiReactivateOSNode: sbP 0x%lX cxiNodeP 0x%lX osNodePP 0x%lX\n",
1350         osVfsP, cnP, osNodePP);
1351  LOGASSERT(0);   // not implemented on linux
1352  return 0;
1353}
1354
1355
1356#if LINUX_KERNEL_VERSION >= 2060000
1357static int
1358inodeFindActor(struct inode *iP, void *opaqueP)
1359{
1360  /* iget4 can be called on one thread which goes to create a new
1361   * inode (get_new_inode, gpfs_s_read_inode2, gpfsInodeRead, readOSNode)
1362   * but before that thread completes initializing the cxiNode_t, another
1363   * thread calls iget4 and gets here (find_inode, inodeFindActor).
1364   * Similar races exist when an inode is being deleted.
1365   *
1366   * Ideally, we'd like to spin_unlock() on the inode_lock and call
1367   * wait_on_inode() but we cannot release the inode_lock here (find_inode
1368   * is depending on it to protect its list_entry() calls).  Fortunately,
1369   * iget4 does exactly this wait for the inode upon return from
1370   * find_inode.  Returning zero here would cause get_new_inode to be
1371   * called (which would assert when it found the first thread had
1372   * already allocated the gnode).  Return 1 and iget4 will do the
1373   * necessary wait.
1374   *
1375   * We can't call anything here that could sleep because we are holding
1376   * the inode_lock and sleeping can result in a hang
1377   * TRACE4N does not block and is ok here.
1378   */
1379
1380   TRACE4N(TRACE_VNODE, 2, TRCID_LINUXOPS_INODEFINDACTOR,
1381          "inodeFindActor: iP 0x%lX i_state 0x%x cxiNodeP 0x%lX isBad %d\n",
1382          iP, iP->i_state, VP_TO_CNP(iP), is_bad_inode(iP));
1383
1384   if (iP->i_state & INODE_IN_CACHE)
1385     return 1;
1386
1387   if (VP_TO_CNP(iP) == NULL)
1388   {
1389     if (iP->i_state == 0)
1390       return 0;
1391     else
1392       return 1;
1393   }
1394
1395  return gpfs_ops.gpfsInodeFindActor(VP_TO_CNP(iP), iP->i_ino, opaqueP);
1396}
1397
1398static int
1399inodeInitLocked(struct inode *iP, void *opaqueP)
1400{
1401  cxiIGetArg_t *argsP = (cxiIGetArg_t *)opaqueP;
1402
1403  iP->i_ino = argsP->extInodeNum;
1404  return 0;
1405}
1406#else
1407static int
1408inodeFindActor(struct inode *iP, unsigned long inodeNum, void *opaqueP)
1409{
1410  /* iget4 can be called on one thread which goes to create a new
1411   * inode (get_new_inode, gpfs_s_read_inode2, gpfsInodeRead, readOSNode)
1412   * but before that thread completes initializing the cxiNode_t, another
1413   * thread calls iget4 and gets here (find_inode, inodeFindActor).
1414   * Similar races exist when an inode is being deleted.
1415   *
1416   * Ideally, we'd like to spin_unlock() on the inode_lock and call
1417   * wait_on_inode() but we cannot release the inode_lock here (find_inode
1418   * is depending on it to protect its list_entry() calls).  Fortunately,
1419   * iget4 does exactly this wait for the inode upon return from
1420   * find_inode.  Returning zero here would cause get_new_inode to be
1421   * called (which would assert when it found the first thread had
1422   * already allocated the gnode).  Return 1 and iget4 will do the
1423   * necessary wait.
1424   *
1425   * We can't call anything here that could sleep because we are holding
1426   * the inode_lock and sleeping can result in a hang
1427   * TRACE3N does not block and is ok here.
1428   */
1429
1430   TRACE3N(TRACE_VNODE, 2, TRCID_LINUXOPS_INODEFINDACTOR2,
1431          "inodeFindActor: iP 0x%lX i_state 0x%x cxiNodeP 0x%lX\n",
1432          iP, iP->i_state, VP_TO_CNP(iP));
1433
1434   if (iP->i_state & INODE_IN_CACHE)
1435     return 1;
1436
1437   if (VP_TO_CNP(iP) == NULL)
1438   {
1439     if (iP->i_state == 0)
1440       return 0;
1441     else
1442       return 1;
1443   }
1444
1445  return gpfs_ops.gpfsInodeFindActor(VP_TO_CNP(iP), inodeNum, opaqueP);
1446}
1447#endif
1448
1449
1450int
1451cxiNewOSNode(void *osVfsP, cxiNode_t **cnPP, void **osNodePP,
1452             cxiIno_t inum, int nodeSize, void *opaqueP)
1453{
1454  struct super_block *sbP = (struct super_block *)osVfsP;
1455  struct inode *iP;
1456  int rc;
1457  int loop_count = 0;
1458  int sleep_count = 0;
1459
1460  ENTER(0);
1461  TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE,
1462         "cxiNewOSNode: sbP 0x%lX inum %d size %d",
1463         sbP, inum, nodeSize);
1464
1465  /* The requested nodeSize must match CXINODE_SIZE */
1466  if (nodeSize != CXINODE_SIZE)
1467    goto bad_node_size;
1468
1469repeat:
1470#if LINUX_KERNEL_VERSION >= 2060000
1471  iP = iget5_locked(sbP, inum, inodeFindActor, inodeInitLocked, opaqueP);
1472#else
1473  iP = iget4(sbP, inum, inodeFindActor, opaqueP);
1474#endif
1475  if (iP == NULL)
1476  {
1477    *cnPP = NULL;
1478    *osNodePP = NULL;
1479    rc = ENOMEM;
1480    goto xerror;
1481  }
1482
1483#if !HAS_SOP_READ_INODE2
1484  /* We fill in the inode as opposed to a read_inode
1485   * operation executed with iget()
1486   */
1487  if (iP->i_state & I_NEW)
1488  {
1489    gpfs_s_read_inode2(iP, opaqueP);
1490    unlock_new_inode(iP);
1491  }
1492#endif
1493 
1494  if (is_bad_inode(iP))
1495  {
1496    TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_NEW_BAD,
1497           "cxiNewOSNode: BAD INODE 0x%X\n", iP);
1498    *cnPP = NULL;
1499    *osNodePP = NULL;
1500    iput(iP);
1501    rc = EIO;
1502    goto xerror;
1503  }
1504
1505  /* Did we get the right inode ?
1506   * When inodeFindActor is called from find_inode() and the inode
1507   * is in transition it might return found without checking sanpId
1508   * so go check again.
1509   */
1510#if LINUX_KERNEL_VERSION >= 2060000
1511  if (!inodeFindActor(iP, opaqueP))
1512#else
1513  if (!inodeFindActor(iP, iP->i_ino, opaqueP))
1514#endif
1515  {
1516    if (sleep_count > 10)
1517    {
1518      TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_NEW_VNODE_2,
1519             "cxiNewOSNode: rc ESTALE inode 0x%lX ino %d i_state 0x%x "
1520             "cxiNodeP 0x%lX isBad %d\n", iP, iP->i_ino, iP->i_state, 
1521             VP_TO_CNP(iP), is_bad_inode(iP));
1522
1523      *cnPP = NULL;
1524      *osNodePP = NULL;
1525      iput(iP);
1526      rc = EIO;
1527      goto xerror;
1528    }
1529
1530    if (loop_count > 1000)
1531    {
1532      cxiSleep(10);
1533      sleep_count++;
1534      loop_count = 0;
1535    }
1536
1537    loop_count++;
1538    iput(iP);
1539    goto repeat;
1540  }
1541 
1542  DBGASSERT(iP->PRVINODE != NULL);
1543  *cnPP = (cxiNode_t *)iP->PRVINODE;
1544  *osNodePP = iP;
1545  rc = 0;
1546
1547xerror:
1548  TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE_EXIT,
1549         "cxiNewOSNode: exit osNodeP 0x%lX cnP 0x%lX rc %d\n",
1550         *osNodePP, *cnPP, rc);
1551  EXIT(0);
1552  return rc;
1553
1554bad_node_size:
1555  /* The requested nodeSize does not match CXINODE_SIZE.
1556     Whoever called us is an incompitble version of the code or was
1557     somehow not compiled correctly. */
1558  TRACE2(TRACE_VNODE, 2, TRCID_LINUXOPS_NEW_VNODE_BAD,
1559         "cxiNewOSNode: requested nodeSize %d does not match CXINODE_SIZE %d",
1560         nodeSize, CXINODE_SIZE);
1561  printk("mmfs: module inconsistency detected in cxiNewOSNode:\n"
1562         "      requested nodeSize %d does not match CXINODE_SIZE %d\n",
1563         nodeSize, CXINODE_SIZE);
1564  LOGASSERT(!"nodeSize != CXINODE_SIZE");
1565  EXIT(0);
1566  return ELIBBAD;
1567}
1568
1569
1570/* The linux kernel decrements the inode count and deallocates the
1571 * inode after gpfs_s_put_inode() is called therefore this routine
1572 * doesn't perform a delete.
1573 */
1574void
1575cxiFreeOSNode(void *osVfsP, struct cxiNode_t *cnP, void *osNodeP)
1576{
1577  struct super_block *sbP = (struct super_block *)osVfsP;
1578  struct inode *iP = (struct inode *)osNodeP;
1579
1580  ENTER(0);
1581  TRACE5(TRACE_VNODE, 2, TRCID_LINUXOPS_DELETE_VNODE,
1582         "cxiFreeOSNode enter: sbP 0x%lX cxiNodeP 0x%lX "
1583         "iP 0x%lX inode %d i_count %d\n",
1584         sbP, cnP, iP,
1585         iP ? iP->i_ino : -1,
1586         iP ? atomic_read((atomic_t *)&iP->i_count) : 0);
1587
1588  DBGASSERT(cnP->osNodeP == iP);
1589  cnP->osNodeP = NULL;
1590
1591  if (iP)
1592  {
1593    DBGASSERT(atomic_read((atomic_t *)&iP->i_count) == 0);
1594    iP->i_op = NULL;
1595    iP->i_fop = NULL;
1596    if (iP->i_mapping)
1597      iP->i_mapping->a_ops = &gpfs_aops_after_inode_delete;
1598    iP->i_size = 0;
1599    iP->i_nlink = 0;
1600  }
1601  EXIT(0);
1602}
1603
1604void
1605cxiDeleteMmap(cxiVmid_t segid)
1606{
1607  TRACE1(TRACE_VNODE, 2, TRCID_LINUXOPS_DELETE_MMAP,
1608         "cxiDeleteMmap: segid 0x%X\n", segid);
1609}
1610
1611void
1612cxiReinitOSNode(void *osVfsP, struct cxiNode_t *cnP, void *osNodeP)
1613{
1614  struct super_block *sbP = (struct super_block *)osVfsP;
1615  struct inode *iP = (struct inode *)osNodeP;
1616
1617  TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_REINIT_VNODE,
1618         "cxiReinitOSNode: sbP 0x%lX cnP 0x%lX iP 0x%lX\n",
1619         sbP, cnP, iP);
1620  LOGASSERT(0);   // not implemented on linux
1621}
1622
1623void
1624cxiDumpOSNode(cxiNode_t *cnP)
1625{
1626  struct inode *iP = (struct inode *)cnP->osNodeP;
1627  struct list_head *dListP, *dHeadP;
1628  struct dentry *dentry;
1629
1630  ENTER(0);
1631  TRACE2(TRACE_VNODE, 2, TRCID_LINUXOPS_DUMP_VNODE,
1632         "cxiDumpOSNode: cxiNodeP 0x%lX iP 0x%lX\n", cnP, iP);
1633  if (iP)
1634  {
1635    printInode(iP);
1636
1637    dHeadP = &iP->i_dentry;
1638    spin_lock(&dcache_lock);
1639    for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next)
1640    {
1641      dentry = list_entry(dListP, struct dentry, d_alias);
1642      printDentry(dentry);
1643    }
1644    spin_unlock(&dcache_lock);
1645  }
1646  EXIT(0);
1647}
1648
1649#if LINUX_KERNEL_VERSION >= 2060000
1650static int
1651igrabInodeFindActor(struct inode *iP, void *opaqueP)
1652{
1653  /* igrab can be called while another thread is doing a finial iput
1654   * so instead we are call ilookup5. ilookup5 processes stuff under
1655   * the inode_lock so if we are in here and find the inode then
1656   * ilookup5 will increase i_count
1657   *
1658   * We can't call anything here that could sleep because we are holding
1659   * the inode_lock and sleeping can result in a hang
1660   * TRACE4N does not block and is ok here.
1661   */
1662
1663   TRACE3N(TRACE_VNODE, 2, TRCID_LINUXOPS_IGRABINODEFINDACTOR,
1664          "igrabInodeFindActor: iP 0x%lX i_state 0x%x inode 0x%lX \n",
1665          iP, iP->i_state, (struct inode *) opaqueP);
1666
1667   if (iP->i_state & INODE_BEING_RELEASED)
1668     return 0;
1669
1670   if (iP != (struct inode *) opaqueP)
1671     return 0;
1672
1673   return 1;
1674}
1675#endif
1676
1677/* On linux we can't just decrement the i_count
1678 * thus this routine will only accept a positive
1679 * increment.  If you want to put a reference then
1680 * call cxiPutOSNode() which calls back thru the VFS
1681 * layer.
1682 */
1683int
1684cxiRefOSNode(void *osVfsP, cxiNode_t *cnP, void *osNodeP, int inc)
1685{
1686  return cxiRefOsNode(osVfsP,cnP,osNodeP,inc,false);
1687}
1688
1689int
1690cxiRefOsNode(void *osVfsP, cxiNode_t *cnP, void *osNodeP, int inc,
1691             Boolean calledFromRevoke)
1692{
1693  struct inode *iP = (struct inode *)osNodeP;
1694  struct inode *riP = NULL;
1695  int holdCount;
1696  int ino;
1697
1698  ENTER(0);
1699  DBGASSERT(iP != NULL);
1700  DBGASSERT(inc == 1);
1701
1702#if LINUX_KERNEL_VERSION >= 2060000
1703  /* The igrab() may fail if this inode is actively going
1704   * thru a release.
1705   */
1706  if(osVfsP)
1707  {
1708     /* we already have a hold */
1709     riP = igrab(iP);
1710  }
1711  /* we may not currently have a hold so use ilookup5 */
1712  else if(GPFS_TYPE(iP))
1713  {
1714     riP = ilookup5(iP->i_sb, iP->i_ino, igrabInodeFindActor, (void*)iP);
1715  }
1716#else
1717  /* The igrab() may fail if this inode is actively going
1718   * thru a release.
1719   */
1720  riP = igrab(iP);
1721#endif
1722  if (riP)
1723  {
1724    DBGASSERT(!(iP->i_state & INODE_BEING_RELEASED));
1725    holdCount = atomic_read((atomic_t *)&riP->i_count);
1726    ino = riP->i_ino;
1727  }
1728  else
1729  {
1730    holdCount = 0;
1731    ino = -1;
1732    /* If this function is called from revoke handler check of this inode
1733       is being released
1734    */
1735    if (calledFromRevoke && (iP->i_state & INODE_BEING_RELEASED) )
1736      holdCount = -1;
1737  }
1738  TRACE5(TRACE_VNODE, 2, TRCID_LINUXOPS_REF_VNODE,
1739         "cxiRefOSNode exit: sbP 0x%lX cxiNodeP 0x%lX iP 0x%lX inode %d "
1740         "i_count to %d", osVfsP, cnP, iP, ino, holdCount);
1741  EXIT(0);
1742  return holdCount;
1743}
1744 
1745/* Determines if OS node is inactive */
1746int 
1747cxiInactiveOSNode(void *osVfsP, struct cxiNode_t *cnP, void *osNodeP, 
1748                 Boolean *canCacheP, Boolean *hasReferencesP)
1749{
1750  struct inode *iP = (struct inode *)osNodeP;
1751  struct super_block *sbP = (struct super_block *)osVfsP;
1752  int holdCount;
1753
1754  ENTER(0);
1755  DBGASSERT(cnP->osNodeP == iP);
1756
1757  *canCacheP = false;
1758  *hasReferencesP = false;
1759
1760  holdCount = atomic_read((atomic_t *)&iP->i_count);
1761  if (holdCount > 0)
1762    *hasReferencesP = true;
1763
1764  TRACE6(TRACE_VNODE, 2, TRCID_LINUXOPS_INACTIVE_VNODE,
1765         "cxiInactiveOSNode: sbP 0x%lX cxiNodeP 0x%lX iP 0x%lX "
1766         "i_count %d canCache %d hasReferences %d\n", sbP, cnP, iP, 
1767         holdCount, *canCacheP, *hasReferencesP);
1768
1769  EXIT(0);
1770  return holdCount;
1771}
1772
1773void
1774cxiPutOSNode(void *vP)
1775{
1776  struct inode *iP = (struct inode *)vP;
1777  int holdCount;
1778
1779  ENTER(0);
1780  DBGASSERT(iP != NULL);
1781  DBGASSERT(!(iP->i_state & INODE_BEING_RELEASED));
1782  holdCount = atomic_read((atomic_t *)&iP->i_count);
1783  DBGASSERT(holdCount > 0);
1784
1785  TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_PUT_OSNODE,
1786         "cxiPutOSNode enter: iP 0x%lX inode %d i_count to %d\n",
1787         iP, iP->i_ino, holdCount-1);
1788
1789  iput(iP);
1790
1791  EXIT(0);
1792  return;
1793}
1794
1795void
1796cxiDestroyOSNode(void *vP)
1797{
1798  struct inode *iP = (struct inode *)vP;
1799  int holdCount;
1800
1801  ENTER(0);
1802  DBGASSERT(iP != NULL);
1803  holdCount = atomic_read((atomic_t *)&iP->i_count);
1804  DBGASSERT(holdCount > 0);
1805
1806  TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_DESTROY_OSNODE,
1807         "cxiDestroyOSNode enter: iP 0x%lX inode %d i_count %d i_nlink %d\n",
1808         iP, iP->i_ino, holdCount, iP->i_nlink);
1809
1810  iP->i_nlink = 0;
1811  EXIT(0);
1812  return;
1813}
1814
1815void
1816cxiSetOSNodeType(struct cxiNode_t *cnP, cxiMode_t mode, cxiDev_t dev)
1817{
1818  ENTER(0);
1819  if (S_ISDIR(mode))
1820    cnP->nType = cxiVDIR;
1821  else if (S_ISREG(mode))
1822    cnP->nType = cxiVREG;
1823  else if (S_ISLNK(mode))
1824    cnP->nType = cxiVLNK;
1825  else if (S_ISCHR(mode))
1826    cnP->nType = cxiVCHR;
1827  else if (S_ISBLK(mode))
1828    cnP->nType = cxiVBLK;
1829  else if (S_ISFIFO(mode))
1830    cnP->nType = cxiVFIFO;
1831  else if (S_ISSOCK(mode))
1832    cnP->nType = cxiVSOCK;
1833  else
1834    DBGASSERT(0);
1835  EXIT(0);
1836}
1837
1838void
1839cxiUpdateInode(cxiNode_t *cnP, cxiVattr_t *attrP, int what)
1840{
1841  struct inode *iP = (struct inode *)cnP->osNodeP;
1842
1843  ENTER(0);
1844  if (iP != NULL)
1845  {
1846    if (what & CXIUP_ATIME)
1847    {
1848      CXITIME_TO_INODETIME(attrP->va_atime, iP->i_atime);
1849      EXIT(0);
1850      return;
1851    }
1852    if (what & CXIUP_MODE)
1853    {
1854      iP->i_mode = attrP->va_mode;
1855      CXITIME_TO_INODETIME(attrP->va_ctime, iP->i_ctime);
1856    }
1857    if (what & CXIUP_OWN)
1858    {
1859      iP->i_mode = attrP->va_mode;
1860      iP->i_uid  = attrP->va_uid;
1861      iP->i_gid  = attrP->va_gid;
1862      CXITIME_TO_INODETIME(attrP->va_ctime, iP->i_ctime);
1863    }
1864    if (what & CXIUP_NLINK)
1865    {
1866      iP->i_nlink = attrP->va_nlink;
1867    }
1868    if (what & CXIUP_SIZE)
1869    {
1870      iP->i_size = attrP->va_size;
1871      iP->i_blocks = attrP->va_blocks;
1872    }
1873    if (what & CXIUP_SIZE_BIG)
1874    {
1875      spin_lock(&inode_lock);
1876      if (attrP->va_size > iP->i_size)
1877      {
1878        iP->i_size = attrP->va_size;
1879        iP->i_blocks = attrP->va_blocks;
1880      }
1881      spin_unlock(&inode_lock);
1882    }
1883    if (what & CXIUP_TIMES)
1884    {
1885      CXITIME_TO_INODETIME(attrP->va_atime, iP->i_atime);
1886      CXITIME_TO_INODETIME(attrP->va_mtime, iP->i_mtime);
1887      CXITIME_TO_INODETIME(attrP->va_ctime, iP->i_ctime);
1888    }
1889    if (what & CXIUP_PERM)
1890    {
1891      iP->i_mode = attrP->va_mode;
1892      iP->i_uid  = attrP->va_uid;
1893      iP->i_gid  = attrP->va_gid;
1894      cnP->xinfo = attrP->va_xinfo;
1895      setIopTable(iP, (attrP->va_xinfo & VA_XPERM) != 0);
1896      cnP->icValid |= CXI_IC_PERM;
1897    }
1898    if ((what & CXIUP_NLINK) && TestCtFlag(cnP,destroyIfDelInode))
1899    {
1900      cxiDropInvalidDCacheEntry(cnP);
1901
1902      /* swapd must be notified to prune dcache entries */
1903      if (TestCtFlag(cnP, pruneDCacheNeeded))
1904        gpfs_ops.gpfsSwapdEnqueue(cnP);
1905    }
1906  }
1907
1908  TRACE4(TRACE_VNODE, 3, TRCID_CXIUPDATE_INODE_3,
1909     "cxiUpdateInode: iP 0x%X atime 0x%X mtime 0x%X ctime 0x%X\n",
1910      iP, GET_INODETIME_SEC(iP->i_atime), GET_INODETIME_SEC(iP->i_mtime), 
1911      GET_INODETIME_SEC(iP->i_ctime));
1912
1913  TRACE7(TRACE_VNODE, 3, TRCID_CXIUPDATE_INODE_1,
1914     "cxiUpdateInode: what %d mode 0x%X uid %d gid %d nlink %d size %lld"
1915     " blocks %d\n",
1916     what, iP->i_mode, iP->i_uid, iP->i_gid, iP->i_nlink,
1917     iP->i_size, iP->i_blocks);
1918  EXIT(0);
1919}
1920
1921/* Determine if operating system specific node belongs to a particular VFS and
1922   can be uncached.  Returns OS node if it exists, the determination of
1923   whether it can be uncached or not. */
1924Boolean
1925cxiCanUncacheOSNode(void *osVfsP, struct cxiNode_t *cnP, void **vP)
1926{
1927  struct inode *iP = (struct inode *)cnP->osNodeP;
1928  int count = 0;
1929
1930  ENTER(0);
1931  if (iP != NULL && iP->i_sb == osVfsP)
1932  {
1933    count = atomic_read((atomic_t *)&iP->i_count);
1934    *vP = (void *)iP;
1935  }
1936  else
1937    *vP = NULL;
1938
1939  TRACE6(TRACE_VNODE, 2, TRCID_LINUXOPS_CANUNCACHE_OSNODE,
1940         "cxiCanUncacheOSNode: cxiNode 0x%lx vP 0x%lX osVfsP 0x%lX "
1941         "i_sb 0x%lX inode %d i_count %d\n", cnP, vP, osVfsP,
1942         (iP ? iP->i_sb : 0), (iP ? iP->i_ino : 0), count);
1943  EXIT(0);
1944  return (count == 0);
1945}
1946
1947
1948/* Add operating system specific node to the lookup cache.
1949   This routine is called with the necessary distributed lock held to
1950   guarantee that the lookup cache entry is valid. */
1951#ifdef CCL
1952void * 
1953cxiAddOSNode(void *dentryP, void *vP, DentryOpTableTypes dopTabType, int lookup)
1954#else
1955void *
1956cxiAddOSNode(void *dentryP, void *vP, int lookup)
1957#endif
1958{
1959  struct inode *iP = (struct inode *)vP;
1960  struct dentry *dP = (struct dentry *)dentryP;
1961
1962  ENTER(0);
1963  TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_ADD_OSNODE,
1964         "cxiAddOSNode: dentry 0x%lX vP 0x%lX unhashed %d",
1965         dentryP, vP, d_unhashed(dP));
1966
1967  /* mark dentry valid */
1968#ifdef CCL
1969  switch(dopTabType)
1970  {
1971    /* Positive dcache entry for inexact file name match for Samba user.
1972       Only valid for other Samba users.
1973       Not valid for local/NFS users.  Forces lookup for local/NFS users. */
1974    case DOpOnlyValidIfSamba:
1975      dP->d_op = &gpfs_dops_valid_if_Samba;
1976      break;
1977    /* Negative dcache entry for exact file name match for local/NFS user.
1978       Only valid for other local/NFS users.
1979       Not valid for Samba users.  Forces lookup for Samba users. */
1980    case DOpInvalidIfSamba:
1981      dP->d_op = &gpfs_dops_invalid_if_Samba;
1982      break;
1983    default:
1984      dP->d_op = &gpfs_dops_valid;
1985      break;
1986  }
1987#else
1988  dP->d_op = &gpfs_dops_valid;
1989#endif
1990
1991  if (!d_unhashed(dP))
1992  {
1993    /* hook up dentry and inode */
1994    d_instantiate(dP, iP);
1995    dP = NULL;
1996  }
1997  else 
1998  {
1999#if LINUX_KERNEL_VERSION >= 2060000
2000    if (lookup)
2001    { 
2002      dP = d_splice_alias(iP, dP);
2003      goto exit;
2004    }
2005#endif
2006    /* hook up dentry and inode */
2007    d_instantiate(dP, iP);
2008
2009    /* if not yet done so, add to hash list */
2010    d_rehash(dP);
2011
2012    dP = NULL;
2013  }
2014exit:
2015
2016  EXIT(0);
2017  return dP;
2018}
2019
2020
2021#ifdef NFS4_CLUSTER
2022/* get list of fs locations, return number of locations */
2023int
2024gpfs_s_fs_locations(struct super_block *sbP, char **bufP)
2025{
2026  int rc;
2027  int code = 0;
2028  int loc_count;
2029  struct gpfsVfsData_t *privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
2030  LOGASSERT(privVfsP != NULL);
2031
2032  ENTER(0);
2033  VFS_STAT_START(fsLocationCall);
2034
2035  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_LOCFS_ENTER,
2036         "gpfs_s_fs_locations enter: sbP 0x%lX\n", sbP);
2037
2038  rc = gpfs_ops.gpfsFsLocations(privVfsP, bufP, &loc_count);
2039  if (rc)
2040  {
2041    rc = -rc;
2042    code = 1;
2043    goto xerror;
2044  }
2045  rc = loc_count;
2046
2047xerror:
2048  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LOCFS_EXIT,
2049         "gpfs_s_fs_locations exit: sbP 0x%lX code %d rc %d\n",
2050         sbP, code, rc);
2051  VFS_STAT_STOP;
2052  EXIT(0);
2053  return rc;
2054}
2055#endif
2056
2057
2058/* Functions for converting between an NFS file handle and a dentry.
2059   We define our own functions rather than using the generic ones in
2060   fs/nfsd/nfsfh.c so we can revalidate the file inode, since it could have
2061   been changed by another node. */
2062
2063static struct dentry *
2064gpfs_nfsd_iget_dentry(struct inode *inode, __u32 generation)
2065{
2066  struct list_head *lp;
2067  struct dentry *result;
2068
2069  ENTER(0);
2070  TRACE2(TRACE_VNODE, 3, TRCID_NFSD_IGET_DENTRY_1,
2071         "gpfs_nfsd_iget_dentry: inode %d generation %d",
2072   inode->i_ino,  generation);
2073
2074  /* Now find a dentry.  If possible, get a well-connected one. */
2075  spin_lock(&dcache_lock);
2076  for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next)
2077  {
2078    result = list_entry(lp, struct dentry, d_alias);
2079
2080    if (!(result->d_flags & DCACHE_DFLAGS_DISCONNECTED))
2081    {
2082      dget_locked(result);
2083#if LINUX_KERNEL_VERSION >= 2060000
2084      result->d_flags |= DCACHE_REFERENCED;
2085#else
2086      result->d_vfs_flags |= DCACHE_REFERENCED;
2087#endif
2088      spin_unlock(&dcache_lock);
2089
2090      if (result->d_inode != inode)
2091      {
2092        TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_31,
2093               "gpfs_nfsd_iget_dentry:0 dentry flags 0x%x count %d inode 0x%lX "
2094               "time %lu", 
2095               result->d_flags, atomic_read(&result->d_count),
2096               result->d_inode, result->d_time);
2097
2098        TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_41,
2099               "gpfs_nfsd_iget_dentry:0 Inode %lu nlink %d count %d gen %u %u "
2100               "state %lu flags 0x%x",
2101               inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
2102               inode->i_generation, generation, inode->i_state, inode->i_flags);
2103
2104        dput(result);
2105        goto build_dentry;
2106      }
2107      if (gpfs_i_revalidate(result))
2108      {
2109        TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_3,
2110               "gpfs_nfsd_iget_dentry:1 dentry flags 0x%x count %d inode 0x%lX "
2111               "time %lu", 
2112               result->d_flags, atomic_read(&result->d_count),
2113               result->d_inode, result->d_time);
2114
2115        TRACE7(TRACE_VNODE, 1, TRCID_NFSD_IGET_4,
2116               "gpfs_nfsd_iget_dentry:1 Inode %lu nlink %d count %d gen %u %u "
2117               "state %lu flags 0x%x",
2118               inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
2119               inode->i_generation, generation, inode->i_state, inode->i_flags);
2120
2121        iput(inode);
2122        dput(result);
2123        EXIT(0);
2124        return ERR_PTR(-ESTALE);
2125      }
2126      if (generation && 
2127          generation != 0xffffffff && /* GENNUM_UNKNOWN */
2128          inode->i_generation != generation)
2129      {
2130        /* we didn't find the right inode.. */
2131        TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_5,
2132               "gpfs_nfsd_iget_dentry:2 dentry flags 0x%x count %d inode 0x%lX "
2133               "time %lu", 
2134               result->d_flags, atomic_read(&result->d_count),
2135               result->d_inode, result->d_time);
2136
2137        TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_6,
2138               "gpfs_nfsd_iget_dentry:2 Inode %lu nlink %d count %d gen %u %u "
2139               "state %lu flags 0x%x",
2140               inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
2141               inode->i_generation, generation, inode->i_state, inode->i_flags);
2142
2143        iput(inode);
2144        dput(result);
2145        EXIT(0);
2146        return ERR_PTR(-ESTALE);
2147      }
2148      iput(inode); 
2149      EXIT(0);
2150      return result;
2151    }
2152  }
2153  spin_unlock(&dcache_lock);
2154
2155build_dentry:
2156
2157#if LINUX_KERNEL_VERSION < 2060000
2158  result = d_alloc_root(inode);
2159#else
2160  result = d_alloc_anon(inode);
2161#endif
2162  if (result == NULL)
2163  {
2164    iput(inode);
2165    EXIT(0);
2166    return ERR_PTR(-ENOMEM);
2167  }
2168#if LINUX_KERNEL_VERSION < 2060000
2169  result->d_flags |= DCACHE_DFLAGS_DISCONNECTED;
2170#endif
2171
2172  if (gpfs_i_revalidate(result))
2173  {
2174    TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_7,
2175           "gpfs_nfsd_iget:3 dentry flags 0x%x count %d inode 0x%lX time %lu",
2176           result->d_flags, atomic_read(&result->d_count),
2177           result->d_inode, result->d_time);
2178
2179    TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_8,
2180           "gpfs_nfsd_iget:3 Inode %lu nlink %d count %d gen %u %u "
2181           "state %lu flags 0x%x",
2182           inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
2183           inode->i_generation, generation, inode->i_state, inode->i_flags);
2184
2185    /* The dput call here is releases the dcache entry that was
2186     * allocated by to d_alloc_root. It also results in an iput effectively
2187     * removing the hold we place by our iget call above.
2188     */
2189    dput(result);
2190    EXIT(0);
2191    return ERR_PTR(-ESTALE);
2192  }
2193  if (generation && 
2194      generation != 0xffffffff && /* GENNUM_UNKNOWN */
2195      inode->i_generation != generation)
2196  {
2197    /* we didn't find the right inode.. */
2198    TRACE4(TRACE_VNODE, 11, TRCID_NFSD_IGET_9,
2199           "gpfs_nfsd_iget:4 dentry flags 0x%x count %d inode 0x%lX time %lu",
2200           result->d_flags, atomic_read(&result->d_count),
2201           result->d_inode, result->d_time);
2202
2203    TRACE7(TRACE_VNODE, 11, TRCID_NFSD_IGET_10,
2204           "gpfs_nfsd_iget:4 Inode %lu nlink %d count %d gen %u %u "
2205           "state %lu flags 0x%x",
2206           inode->i_ino, inode->i_nlink, atomic_read(&inode->i_count),
2207           inode->i_generation, generation, inode->i_state, inode->i_flags);
2208
2209    /* Release the dcache entry.  This also does an iput. */
2210    dput(result);
2211    EXIT(0);
2212    return ERR_PTR(-ESTALE);
2213  }
2214  EXIT(0);
2215  return result;
2216}
2217
2218static struct dentry *
2219gpfs_nfsd_iget(struct super_block *sbP, unsigned long ino,
2220               cxiIGetArg_t *argP, __u32 generation)
2221{
2222  int rc;
2223  struct inode *inode;
2224  struct gpfsVfsData_t *privVfsP;
2225
2226  ENTER(0);
2227  TRACE6(TRACE_VNODE, 3, TRCID_NFSD_IGET_1,
2228         "gpfs_nfsd_iget: sbP 0x%lX extino %d inode %d snapid %d "
2229         "fileset %d generation %d",
2230         sbP, ino, argP->inodeNum, argP->snapId, argP->filesetId,
2231         generation);
2232
2233  /* get the inode */
2234  if (ino == 0)
2235  {
2236    EXIT(0);
2237    return ERR_PTR(-ESTALE);
2238  }
2239
2240  /* Callers have set inodeNum/snapId in argP.  vattrP is NULL and
2241   * readInodeCalled is false, but these will be set appropriately in
2242   * gpfsNFSIget after it obtains the attributes.
2243   */
2244
2245  privVfsP = (struct gpfsVfsData_t *)cxiGetPrivVfsP(sbP);
2246  rc = gpfs_ops.gpfsNFSIget(privVfsP, argP, generation, (void **)&inode);
2247
2248  if (rc)
2249  {
2250    cxiErrorNFS(rc);
2251
2252    EXIT(0);
2253    return ERR_PTR(-rc);
2254  }
2255
2256  if (inode == NULL)
2257  {
2258    EXIT(0);
2259    return ERR_PTR(-ENOMEM);
2260  }
2261
2262  if (is_bad_inode(inode))
2263  {
2264    EXIT(0);
2265    return ERR_PTR(-ESTALE);
2266  }
2267
2268  /* gpfsNFSIget will have called findOrCreateLinux/cxiNewOSNode which
2269   * makes the iget call along with the inodeFindActor validation.
2270   */
2271
2272  EXIT(0);
2273  return(gpfs_nfsd_iget_dentry(inode,generation));
2274
2275}
2276
2277#if LINUX_KERNEL_VERSION >= 2060000
2278/* export_operations for nfsd communication with our file system
2279 * via gpfs_export_ops
2280 */
2281
2282/*
2283 * gpfs_get_dparent: (get_parent) find the parent dentry for a given dentry
2284 */
2285struct dentry *gpfs_get_dparent(struct dentry * child)
2286{
2287  int rc = 0;
2288  struct dentry *result = NULL;
2289  struct gpfsVfsData_t *privVfsP;
2290  ext_cred_t eCred;
2291  cxiNode_t *dcnP;
2292  cxiIno_t iNum = (cxiIno_t)-1;
2293  cxiNode_t *cnP = NULL;
2294  struct inode *newInodeP = NULL;
2295  struct dentry *retP;
2296
2297  ENTER(0);
2298  VFS_INC(get_parentCall);
2299
2300  TRACE2(TRACE_VNODE, 3, TRCID_GET_DPARENT_ENTER,
2301         "gpfs_get_dparent: dentry 0x%lX inode 0x%d",
2302   child,  child->d_inode->i_ino);
2303
2304  dcnP = VP_TO_CNP(child->d_inode);
2305  privVfsP = VP_TO_PVP(child->d_inode);
2306  DBGASSERT(privVfsP != NULL);
2307
2308  setCred(&eCred);
2309
2310  if (!dcnP)
2311  {
2312    /* This can happen due to a bug in linux/fs/dcache.c (prune_dcache)
2313       where "count" entries are to be pruned, but the last one is
2314       found to be recently referenced.  When this happens, count is
2315       decremented, but the loop is not terminated.  The result is that
2316       it continues to prune entries past where it should (prunes
2317       everything).  If our patch for this is not applied, the result
2318       is a kernel failure as the cxiNode is referenced.  Checking
2319       here (and revalidate) allows us to reject the call instead. */
2320    PRINTINODE(child->d_inode);
2321    result = (struct dentry *)ERR_PTR(-ESTALE);
2322    goto xerror;
2323  }
2324
2325  rc = gpfs_ops.gpfsLookup(privVfsP, (void *)child->d_inode, dcnP,
2326                           NULL, (char *)"..",
2327                           (void **)&newInodeP, &cnP, &iNum, NULL,
2328                           NULL, &eCred, (void **)&retP);
2329  if (rc == 0)
2330  {
2331    DBGASSERT(cnP != NULL);
2332    DBGASSERT(iNum != -1);
2333    DBGASSERT(newInodeP != NULL);
2334    DBGASSERT(newInodeP->PRVINODE == cnP);
2335    DBGASSERT(cnP->osNodeP == (void *)newInodeP);
2336    result = gpfs_nfsd_iget_dentry(newInodeP, (__u32)newInodeP->i_generation);
2337  }
2338  else 
2339  {
2340    cxiErrorNFS(rc);
2341    result  = (struct dentry *)ERR_PTR(-rc);
2342    iNum = -1;
2343  }
2344
2345xerror:
2346
2347  TRACE4(TRACE_VNODE, 3, TRCID_GET_DPARENT_EXIT,
2348  "gpfs_get_dparent dentry 0x%lX inode %d result %lX err%d \n",
2349     child, iNum, result, IS_ERR(result)? PTR_ERR(result): 0);
2350  EXIT(0);
2351  return result;
2352}
2353
2354/*
2355 * gpfs_get_dentry: (get_dentry) find dentry for the inode given a file handle
2356 */
2357struct dentry *gpfs_get_dentry(struct super_block *sbP, void * vdata)
2358{
2359  __u32 *data=vdata;
2360  unsigned long ino;
2361  cxiIGetArg_t arg;
2362  __u32 generation;
2363  struct dentry *result;
2364
2365  ENTER(0);
2366  VFS_INC(get_dentryCall);
2367
2368  ino = data[0];
2369  if (IS_SNAPROOTDIR_EXT_INO(ino))
2370    arg.inodeNum = SNAPROOTDIR_INT_INO;
2371  else if (IS_SNAPLINKDIR_EXT_INO(ino))
2372    arg.inodeNum = data[3];
2373  else
2374    arg.inodeNum = ino;
2375  arg.snapId = data[1];
2376  generation = data[2];
2377  arg.extInodeNum = ino;
2378  arg.filesetId = (unsigned)-1; //FIXME
2379   
2380  arg.vattrP = NULL;
2381  arg.readInodeCalled = false;
2382  result = gpfs_nfsd_iget(sbP, ino, &arg, generation);
2383  EXIT(0);
2384  return result;
2385}
2386
2387/* It is acceptable to create a disconnected dentry for pNFS since it is used
2388   only for read/write. The check if it was exported is not required since
2389   the call to the MDS will verify that the file is open.
2390*/
2391static int gpfs_acceptable(void *expv, struct dentry *dentry)
2392{
2393  if (dentry && dentry->d_inode) {
2394#ifdef GPFS_PRINTK
2395    printk("gpfs_acceptable ino %d\n", dentry->d_inode->i_ino);
2396#endif
2397    return 1;
2398  }
2399  return 0;
2400}
2401
2402/*
2403 * gpfs_decode_fh: (decode_fh) decode a file handle returning ptr to it's dentry
2404 */
2405struct dentry *
2406gpfs_decode_fh(struct super_block *sbP, __u32 *fh,
2407               int len, int fhtype,
2408               int (*acceptable)(void *context, struct dentry *de),
2409               void *context)
2410{
2411#if LINUX_KERNEL_VERSION == 2060800
2412  int len = *lenP;
2413#endif
2414  struct dentry *result;
2415  __u32 parent[4]={0};
2416
2417  ENTER(0);
2418  VFS_INC(decode_fhCall);
2419
2420#ifdef GPFS_PRINTK
2421  printk("gpfs_decode_fh %08x %08x %08x %08x %08x %08x %08x\n",
2422    fh[0],fh[1],fh[2],fh[3],fh[4],fh[5],fh[6]);
2423#endif
2424  TRACE4(TRACE_VNODE, 3, TRCID_DECODE_FH_1,
2425         "gpfs_decode_fh: sbP 0x%lX fh 0x%lX, len %d type %d",
2426         sbP, fh, len, fhtype);
2427  if (fhtype > 4 && fhtype < 8 && len >= 5)
2428  {
2429    parent[0]=fh[3];  /* ino */
2430    parent[1]=fh[4];  /* p_sid */
2431    if (len>5)
2432    {
2433      parent[2]=fh[5]; /* generation */
2434      parent[3]=fh[3]; /* ino */
2435    }
2436
2437    if (cxiIsLockdThread()  // check for lockd thread
2438#ifdef P_NFS4
2439        || fhtype == 7 // it is a pNFS fh, disconnected fh is acceptable.
2440#endif
2441       )
2442      result = sbP->s_export_op->find_exported_dentry(sbP, fh, parent,
2443                                                      gpfs_acceptable, context);
2444    else
2445      result = sbP->s_export_op->find_exported_dentry(sbP, fh, parent,
2446                                                       acceptable, context);
2447    TRACE4(TRACE_VNODE, 3, TRCID_DECODE_FH_2,
2448           "gpfs_decode_fh: sbP 0x%lX fh 0x%lX result %lX err %d",
2449           sbP, fh, result, IS_ERR(result)? PTR_ERR(result): 0);
2450#if LINUX_KERNEL_VERSION == 2060800
2451    *lenP = 0;
2452#endif
2453    if (IS_ERR(result))
2454      cxiErrorNFS(PTR_ERR(result));
2455
2456    EXIT(0);
2457    return result;
2458  }
2459
2460  TRACE2(TRACE_VNODE, 3, TRCID_DECODE_FH_3,
2461         "gpfs_decode_fh: sbP 0x%lX fh 0x%lX -EINVAL",
2462         sbP, fh);
2463  EXIT(0);
2464  return ERR_PTR(-EINVAL);
2465}
2466
2467/*
2468 * gpfs_encode_fh: (encode_fh) encode a file handle from the given dentry
2469 */
2470int 
2471gpfs_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp,
2472               int need_parent)
2473{
2474  UInt32 d_sid, p_sid;
2475
2476  ENTER(0);
2477  VFS_INC(encode_fhCall);
2478
2479  if (*lenp < 5)
2480  {
2481    EXIT(0);
2482    return 255;
2483  }
2484
2485  if (gpfs_ops.gpfsGetSnapIdPair(VP_TO_CNP(dentry->d_inode),
2486                                 &d_sid, &p_sid) != 0)
2487  {
2488    EXIT(0);
2489    return 255;
2490  }
2491
2492  fh[0] = (__u32) dentry->d_inode->i_ino;
2493  fh[1] = d_sid;
2494  fh[2] = (__u32) dentry->d_inode->i_generation;
2495  fh[3] = (__u32) dentry->d_parent->d_inode->i_ino;
2496  fh[4] = p_sid;
2497  if (*lenp > 5)
2498  {
2499  /* There was enough room to compelete parent */
2500    fh[5] = (__u32) dentry->d_parent->d_inode->i_generation;
2501    *lenp = 6;
2502  }
2503  else
2504    *lenp = 5;
2505
2506  EXIT(0);
2507  return *lenp;
2508}
2509#else
2510struct dentry *gpfs_fh_to_dentry(struct super_block *sbP, __u32 *fh,
2511                                 int len, int fhtype, int parent)
2512{
2513  unsigned long ino;
2514  cxiIGetArg_t arg;
2515  __u32 generation;
2516  struct dentry *result;
2517
2518  ENTER(0);
2519  TRACE5(TRACE_VNODE, 3, TRCID_FH_TO_DENTRY_1,
2520         "gpfs_fh_to_dentry: sbP 0x%lX fh 0x%lX, len %d type %d parent %d",
2521         sbP, fh, len, fhtype, parent);
2522
2523  if (fhtype == 3 && len >= 5)
2524  {
2525    if (parent)
2526    {
2527      ino = fh[3];
2528      if (IS_SNAPROOTDIR_EXT_INO(ino))
2529        arg.inodeNum = SNAPROOTDIR_INT_INO;
2530      else if (IS_SNAPLINKDIR_EXT_INO(ino))
2531        arg.inodeNum = IS_SNAPROOTDIR_EXT_INO(fh[0]) ? 
2532                       SNAPROOTDIR_INT_INO : fh[0];
2533      else
2534        arg.inodeNum = ino;
2535      arg.snapId = fh[4];
2536      generation = 0xffffffff; /* GENNUM_UNKNOWN */
2537    }
2538    else
2539    {
2540      ino = fh[0];
2541      if (IS_SNAPROOTDIR_EXT_INO(ino))
2542        arg.inodeNum = SNAPROOTDIR_INT_INO;
2543      else if (IS_SNAPLINKDIR_EXT_INO(ino))
2544        arg.inodeNum = fh[3];
2545      else
2546        arg.inodeNum = ino;
2547      arg.snapId = fh[1];
2548      generation = fh[2];
2549    }
2550    arg.filesetId = (unsigned)-1; // FIXME
2551    arg.vattrP = NULL;
2552    arg.readInodeCalled = false;
2553    result = gpfs_nfsd_iget(sbP, ino, &arg, generation);
2554
2555    TRACE4(TRACE_VNODE, 3, TRCID_FH_TO_DENTRY_2,
2556           "gpfs_fh_to_dentry: sbP 0x%lX fh 0x%lX result %lX err %d",
2557           sbP, fh, result, IS_ERR(result)? PTR_ERR(result): 0);
2558
2559    EXIT(0);
2560    return result;
2561  }
2562
2563  TRACE2(TRACE_VNODE, 3, TRCID_FH_TO_DENTRY_3,
2564         "gpfs_fh_to_dentry: sbP 0x%lX fh 0x%lX -EINVAL",
2565         sbP, fh);
2566
2567  EXIT(0);
2568  return ERR_PTR(-EINVAL);
2569}
2570
2571int gpfs_dentry_to_fh(struct dentry *dentry, __u32 *fh, int *lenp,
2572                      int need_parent)
2573{
2574  UInt32 d_sid, p_sid;
2575
2576  if (*lenp < 5)
2577    return 255;
2578
2579  ENTER(0);
2580  if (gpfs_ops.gpfsGetSnapIdPair(VP_TO_CNP(dentry->d_inode),
2581                                 &d_sid, &p_sid) != 0)
2582  {
2583    EXIT(0);
2584    return 255;
2585  }
2586  fh[0] = (__u32) dentry->d_inode->i_ino;
2587  fh[1] = d_sid;
2588  fh[2] = (__u32) dentry->d_inode->i_generation;
2589  fh[3] = (__u32) dentry->d_parent->d_inode->i_ino;
2590  fh[4] = p_sid;
2591
2592  *lenp = 5;
2593  EXIT(0);
2594  return 3;
2595}
2596#endif
2597
2598void
2599printSuper(struct super_block *sbP)
2600{
2601  if (!_TRACE_IS_ON(TRACE_VNODE, 3))
2602    return;
2603
2604  /* private field won't make much sense for non-GPFS file systems */
2605  TRACE4N(TRACE_VNODE, 3, TRCID_PRINTSUPER_1,
2606         "printSuper: sbP 0x%lX magic 0x%lX type 0x%lX private 0x%lX\n",
2607         sbP, sbP->s_magic, sbP->s_type, SBLOCK_PRIVATE(sbP));
2608
2609  TRACE3N(TRACE_VNODE, 3, TRCID_PRINTSUPER_3,
2610         "printSuper: s_dev 0x%X count 0x%X active %d\n",
2611         sbP->s_dev, sbP->s_count, atomic_read(&sbP->s_active));
2612}
2613
2614void
2615printSuperList(struct super_block *sbP)
2616{
2617  struct list_head *lP;
2618  struct super_block *sP;
2619
2620  if (!_TRACE_IS_ON(TRACE_VNODE, 5))
2621    return;
2622
2623  /* Run through all super blocks starting from provided GPFS super block. */
2624  /* Ideally we would lock sb_lock, but we can't access it,
2625     so small probability of this breaking, which is why it is at
2626     a higher trace level (vnode 5). */
2627  TRACE0N(TRACE_VNODE, 5, TRCID_PRINTALLSUPER_1,
2628           "printSuperList:\n");
2629  printSuper(sbP);
2630  list_for_each(lP, &sbP->s_list)
2631  {
2632    sP = sb_entry(lP);
2633    printSuper(sP);
2634  }
2635}
Note: See TracBrowser for help on using the repository browser.