/***************************************************************************
 *
 * Copyright (C) 2001 International Business Machines
 * All rights reserved.
 *
 * This file is part of the GPFS mmfslinux kernel module.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions 
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice, 
 *     this list of conditions and the following disclaimer. 
 *  2. Redistributions in binary form must reproduce the above copyright 
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution. 
 *  3. The name of the author may not be used to endorse or promote products 
 *     derived from this software without specific prior written
 *     permission. 
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *************************************************************************** */
/* @(#)01       1.90.1.4  src/avs/fs/mmfs/ts/kernext/gpl-linux/inode.c, mmfs, avs_rgpfs24, rgpfs24s012a 4/17/07 15:54:47 */
/*
 * Inode operations
 *
 * Contents:
 *   printInode
 *   printDentry
 *   cxiSetOSNode
 *   cxiInvalidatePerm
 *   getIattr
 *   get_umask
 *   setCred
 *   gpfs_i_create
 *   gpfs_i_lookup
 *   gpfs_i_link
 *   gpfs_i_unlink
 *   gpfs_i_symlink
 *   gpfs_i_mkdir
 *   gpfs_i_rmdir
 *   gpfs_i_mknod
 *   gpfs_i_rename
 *   gpfs_i_readlink
 *   gpfs_i_follow_link
 *   gpfs_i_readpage        (in mmap.c)
 *   gpfs_i_writepage       (in mmap.c)
 *   gpfs_i_bmap
 *   gpfs_i_truncate
 *   gpfs_i_permission
 *   gpfs_i_smap
 *   gpfs_i_updatepage
 *   gpfs_i_revalidate
 *   gpfs_i_setattr
 *   gpfs_i_setattr_internal
 *   gpfs_i_getattr
 *   gpfs_i_getattr_internal
 *   gpfs_i_lock
 *   gpfs_i_getxattr
 *   gpfs_i_setxattr
 *   gpfs_i_listxattr
 *   gpfs_i_removexattr
 */

#include <Shark-gpl.h>

#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/smp_lock.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/kdev_t.h>

#include <verdep.h>
#include <cxiMode.h>
#include <cxiSystem.h>
#include <cxi2gpfs.h>
#include <cxiVFSStats.h>
#include <cxiCred.h>

#include <linux2gpfs.h>
#include <Trace.h>

#if LINUX_KERNEL_VERSION > 2060000
#include <cxiTSFattr.h>
#endif

#ifdef MODULE
#include <linux/module.h>
#endif /* MODULE */
 
void
printInode(struct inode *iP)
{
  TRACE7(TRACE_VNODE, 3, TRCID_PRINTINODE_1,
         "printInode: iP 0x%lX inode %d (0x%X) i_count %d dev 0x%X "
         "mode 0x%X nlink %d\n",
         iP, iP->i_ino, iP->i_ino, atomic_read((atomic_t *)&iP->i_count),
         KDEV_INT(iP->i_rdev), iP->i_mode, iP->i_nlink);

  TRACE6(TRACE_VNODE, 3, TRCID_PRINTINODE_2,
         "printInode: uid %d gid %d rdev 0x%X atime 0x%X mtime 0x%X "
         "ctime 0x%X\n", iP->i_uid, iP->i_gid, KDEV_INT(iP->i_rdev), 
         GET_INODETIME_SEC(iP->i_atime), GET_INODETIME_SEC(iP->i_mtime), 
         GET_INODETIME_SEC(iP->i_ctime));

  TRACE5(TRACE_VNODE, 3, TRCID_PRINTINODE_4,
         "printInode: size %lld blksize 0x%X blocks %d ver 0x%X op 0x%lX\n",
         iP->i_size, iP->i_blocks, iP->i_blocks, iP->i_version,
         iP->i_op);

  TRACE6(TRACE_VNODE, 3, TRCID_PRINTINODE_5,
         "printInode: fop 0x%lX sb 0x%lX flags 0x%X state 0x%X gen %d "
         "generic 0x%lX\n", iP->i_fop, iP->i_sb, iP->i_flags, iP->i_state,
         iP->i_generation, iP->PRVINODE);

  TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_6,
         "printInode: list 0x%lX next 0x%lX prev 0x%lX\n",
         &(iP->i_list), iP->i_list.next, iP->i_list.prev);

  TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_7,
         "printInode: dentry 0x%lX next 0x%lX prev 0x%lX\n",
         &(iP->i_dentry), iP->i_dentry.next, iP->i_dentry.prev);

#if LINUX_KERNEL_VERSION < 2050000
  TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_8,
         "printInode: hash 0x%lX next 0x%lX prev 0x%lX\n",
         &(iP->i_hash), iP->i_hash.next, iP->i_hash.prev);
#else
  TRACE3(TRACE_VNODE, 3, TRCID_PRINTINODE_9,
         "printInode: hash 0x%lX next 0x%lX prev 0x%lX\n",
         &(iP->i_hash), iP->i_hash.next, *iP->i_hash.pprev);
#endif
}

void
printDentry(struct dentry *dP)
{
  struct inode *iP = dP->d_inode;

  if (!_TRACE_IS_ON(TRACE_VNODE, 3))
    return;

  TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_1,
          "printDentry: dentry 0x%lX count %d name '%s'\n",
          dP, atomic_read((atomic_t *)&dP->d_count), dP->d_name.name);

  TRACE5N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_2,
          "printDentry: time 0x%X op 0x%lX flags 0x%X parent 0x%lX "
          "inode 0x%X\n", dP->d_time, dP->d_op, dP->d_flags, 
          dP->d_parent, iP);

  if (iP)    
  {
    if (!list_empty(&iP->i_dentry))
      TRACE4N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_3A,
              "printDentry: i_ino %d i_count %d "
              "i_dentry next 0x%lX i_dentry prev 0x%lX\n",
              iP->i_ino, atomic_read((atomic_t *)&iP->i_count), 
              list_entry(iP->i_dentry.next, struct dentry, d_alias),
              list_entry(iP->i_dentry.prev, struct dentry, d_alias));
    else
      TRACE2N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_3B,
              "printDentry: i_ino %d i_count %d\n", 
              iP->i_ino, atomic_read((atomic_t *)&iP->i_count));
  }

  TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_3C,
          "printDentry: &d_hash 0x%lX d_hash.next 0x%lX d_hash.prev 0x%lX\n", 
          &dP->d_child, dP->d_child.next, dP->d_child.prev);

  TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_4,
          "printDentry: &child 0x%lX child.next 0x%lX child.prev 0x%lX\n", 
          &dP->d_child, dP->d_child.next, dP->d_child.prev);

  if (!list_empty(&dP->d_subdirs))
    TRACE3N(TRACE_VNODE, 3, TRCID_PRINTDENTRY_5,
            "printDentry: &subdirs 0x%lX subdir next 0x%lX "
            "subdir prev 0x%lX\n", &dP->d_subdirs,
            list_entry(dP->d_subdirs.next, struct dentry, d_child),
            list_entry(dP->d_subdirs.prev, struct dentry, d_child));
}

/* Print directory entry tree up to maxPrint elements.
 * If maxPrint is 0 then there is no upper limit.
 */
void
printDentryTree(struct dentry *entryDP, int maxPrint)
{
  int count = 0;
  struct list_head *lhP;
  struct dentry *siblingDP;
  struct dentry *parentDP;

  /* Check trace level required by printDentry() */
  if (!_TRACE_IS_ON(TRACE_VNODE, 3))
    return;

  spin_lock(&dcache_lock);

  parentDP = entryDP;
  lhP = parentDP->d_subdirs.next;

  printDentry(parentDP);
  if (maxPrint > 0 && ++count >= maxPrint)
    goto xerror;

  if (list_empty(&parentDP->d_subdirs))
    goto xerror;

  do
  {
    while (lhP != &parentDP->d_subdirs)
    {
      siblingDP = list_entry(lhP, struct dentry, d_child);

      printDentry(siblingDP);
      if (maxPrint > 0 && ++count >= maxPrint)
        goto xerror;

      if (!list_empty(&siblingDP->d_subdirs))
      {
        parentDP = siblingDP;
        lhP = siblingDP->d_subdirs.next;
        continue;
      }

      lhP = siblingDP->d_child.next;
      parentDP = siblingDP->d_parent;
    }
  
    siblingDP = siblingDP->d_parent;
    parentDP = siblingDP->d_parent;
    lhP = siblingDP->d_child.next;
  } 
  while (lhP != entryDP->d_child.next);

xerror:
  spin_unlock(&dcache_lock);

  return;
}

/* Set the inode operations table for a regular file or directory.  Call
   with xperm set to true if the file has extended permission attributes
   (i.e. an ACL).  This routine is a no-op if the inode is not a regular
   file or directory.

   If the file does not have extended attributes, the table that is used
   will have a null value for the permission routine pointer.  This will
   cause Linux to perform access checks directly instead of acquiring the
   kernel lock and calling GPFS, giving better performance. */
void setIopTable(struct inode *iP, Boolean xperm)
{
  struct inode_operations *newopP, *stdopP, *xopP;
  struct list_head *lp;
  int count = 0;

  /* Choose the correct inode operations table based on whether this is a
     directory or a regular file.  Assume that the file has extended
     attributes so that GPFS permission checking will be required. */
  ENTER(0);
  if (S_ISDIR(iP->i_mode))
    xopP = &gpfs_dir_iops_xperm;
  else if (S_ISREG(iP->i_mode))
    xopP = &gpfs_iops_xperm;
  else
  {
    EXIT(0);
    return;
  }

  /* If the file really does have extended attributes (or if the token has
     been lost so that we do not know the status), set extended permission
     table and exit. */
  if (xperm)
  {
    iP->i_op = xopP;
    EXIT(0);
    return;
  }

  /* Get address of an inode operations table that has a generic permission
     routine pointer. */
  iP->i_op = S_ISDIR(iP->i_mode) ? &gpfs_dir_iops_stdperm : &gpfs_iops_stdperm;
  EXIT(0);
}


void
cxiSetOSNode(void *osVfsP, cxiNode_t *cnP, cxiVattr_t *attrP)
{
  struct super_block *sbP = (struct super_block *)osVfsP;
  struct inode *inodeP = (struct inode *)cnP->osNodeP;

  ENTER(0);
  DBGASSERT(inodeP != NULL);
  DBGASSERT(inodeP->PRVINODE == cnP);
  DBGASSERT(inodeP->i_sb == sbP);

  inodeP->i_mode = attrP->va_mode;
  inodeP->i_nlink = attrP->va_nlink;
  inodeP->i_uid  = attrP->va_uid;
  inodeP->i_gid  = attrP->va_gid;
  inodeP->i_rdev = cxiDevToKernelDev(cxiDev32ToDev(attrP->va_rdev));

  CXITIME_TO_INODETIME(attrP->va_atime, inodeP->i_atime);
  CXITIME_TO_INODETIME(attrP->va_mtime, inodeP->i_mtime);
  CXITIME_TO_INODETIME(attrP->va_ctime, inodeP->i_ctime);

  inodeP->i_size = attrP->va_size;
  inodeP->i_blocks = attrP->va_blocksize;
  inodeP->i_blocks = attrP->va_blocks;
  inodeP->i_generation = attrP->va_gen;
  inodeP->i_flags = 0;

  cnP->xinfo = attrP->va_xinfo;

  switch (inodeP->i_mode & S_IFMT)
  {
    case S_IFREG:
      setIopTable(inodeP, (attrP->va_xinfo & VA_XPERM) != 0);
      if (cxiIsNFSThread())
        inodeP->i_fop = &gpfs_fops_no_sendfile;
      else
        inodeP->i_fop = &gpfs_fops;
      break;

    case S_IFDIR:
      setIopTable(inodeP, (attrP->va_xinfo & VA_XPERM) != 0);
      inodeP->i_fop = &gpfs_dir_fops;
      break;

    case S_IFLNK:
      inodeP->i_op = &gpfs_link_iops;
      inodeP->i_fop = &gpfs_fops;
      break;

    case S_IFBLK:
    case S_IFCHR:
    case S_IFIFO:
    case S_IFSOCK:
      /* Set vector table for special files, gpfs will not get 
       * these operations. 
       */
#if LINUX_KERNEL_VERSION >= 2060000
      init_special_inode(inodeP, inodeP->i_mode, inodeP->i_rdev);
#else
      init_special_inode(inodeP, inodeP->i_mode,
                         kdev_t_to_nr(inodeP->i_rdev));
#endif
      break;
  }
  if (inodeP->i_mapping)
    inodeP->i_mapping->a_ops = &gpfs_aops;

  cnP->icValid = CXI_IC_ALL;

  TRACE7(TRACE_VNODE, 2, TRCID_LINUXOPS_SETINODE,
         "cxiSetOSNode: inodeP 0x%lX inode %d i_count %d i_mode 0x%X "
         "i_xinfo 0x%X i_nlink %d i_size %lld\n",
         inodeP, inodeP->i_ino, atomic_read((atomic_t *)&inodeP->i_count),
         inodeP->i_mode, attrP->va_xinfo, inodeP->i_nlink, inodeP->i_size);
  EXIT(0);
  return;
}


/* The following function is called from cxiInvalidateAttr when the
   CXI_IC_PERM option was specified, which indicates that permission related
   attributes cached in the struct inode (owner, mode, etc.) are no longer
   known to be valid. */
void
cxiInvalidatePerm(cxiNode_t *cnP)
{
  struct inode *inodeP = (struct inode *)cnP->osNodeP;

  ENTER(0);
  TRACE3(TRACE_VNODE, 2, TRCID_CXIINVA_PERM,
         "cxiInvalidatePerm: cnP 0x%lX std %d dir std %d",
         cnP, inodeP->i_op == &gpfs_iops_stdperm,
         inodeP->i_op == &gpfs_dir_iops_stdperm);

  /* Set the inode operation table to gpfs_..._xperm; the next permission
     check will then go through our gpfs_i_permission function, which will
     revalidate permission attributes and set the inode operation table
     back to gpfs_..._stdperm, if appropriate. Note: since symlinks always
     have permission iop set, setIopTable is a noop for symlinks. */
  setIopTable(inodeP, true);
  EXIT(0);
}

static void
getIattr(struct inode *inodeP, struct iattr *attrP)
{
  ENTER(0);
  // attrP->ia_valid = ??? ;
  attrP->ia_mode = inodeP->i_mode;
  attrP->ia_uid = inodeP->i_uid;
  attrP->ia_gid = inodeP->i_gid;
  attrP->ia_size = inodeP->i_size;
  attrP->ia_atime = inodeP->i_atime;
  attrP->ia_mtime = inodeP->i_mtime;
  attrP->ia_ctime = inodeP->i_ctime;
  EXIT(0);
  return;
}

static inline int
get_umask()
{
  return (current->fs->umask);
}


/* Record credentials of current thread */
void
setCred(ext_cred_t *credP)
{
  int nGroups;

  ENTER(0);
  credP->principal = current->fsuid; /* user id */
  credP->group = current->fsgid;     /* primary group id */

#if LINUX_KERNEL_VERSION > 2060300
  nGroups = MIN(current->group_info->ngroups, MIN(ECRED_NGROUPS, NGROUPS_SMALL));
#else
  nGroups = MIN(current->ngroups, ECRED_NGROUPS);
#endif
  credP->num_groups = nGroups;
  if (nGroups > 0)
#if LINUX_KERNEL_VERSION > 2060300
    memcpy(credP->eGroups, current->group_info->blocks[0], nGroups*sizeof(gid_t));
    /* ?? This is incorrect.  Linux 2.6 supports a very large list of
       groups by allocating a page for each bunch of groups.  Only if
       there are <= NGROUPS_SMALL groups is the space in
       group_info->small_block used.  GPFS will only see the prefix of
       the group set. */
    /* To save kernel stack space, the GPFS ext_cred_t should keep a
       pointer to the array of groups.  The group set cannot change
       during a GPFS system call since the caller can only make one
       system call at a time. */
#else
    memcpy(credP->eGroups, current->groups, nGroups*sizeof(gid_t));
#endif
  EXIT(0);
}

/* inode_operations */

/* Called with a negative (no inode) dir cache entry.
 * If this call succeeds, we fill in with d_instantiate(). 
 */

int
gpfs_i_create(struct inode *diP, struct dentry *dentryP, int mode
#if LINUX_KERNEL_VERSION >= 2060000
              , struct nameidata *ni
#endif
              )
{
  int rc;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *dcnP;
  cxiNode_t *cnP = NULL;
  cxiIno_t iNum = (cxiIno_t)-1;
  struct inode *newInodeP = NULL;
  int flags = FWRITE | FCREAT | FEXCL;
  cxiMode_t umask = get_umask();
  ext_cred_t eCred;
  struct dentry *retP;

  VFS_STAT_START(createCall);
  ENTER(0);
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CREATE_ENTER,
         "gpfs_i_create enter: iP 0x%lX dentryP 0x%lX mode 0x%X name '%s'\n",
         diP, dentryP, mode, dentryP->d_name.name);
  /* BKL is held at entry */

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  LOGASSERT(privVfsP != NULL);

retry:

  setCred(&eCred);
  rc = gpfs_ops.gpfsCreate(privVfsP, dcnP, (void **)&newInodeP, &cnP, &iNum, 0,
                           flags, dentryP, (char *)dentryP->d_name.name,
                           mode, umask, NULL, &eCred);
  if (rc == 0)
  {
    DBGASSERT(cnP != NULL);
    DBGASSERT(iNum != -1);
    DBGASSERT(newInodeP != NULL);
    DBGASSERT(newInodeP->PRVINODE == cnP);
    DBGASSERT(cnP->osNodeP == (void *)newInodeP);
    cnP->createRaceLoserThreadId = 0;
  }

  /* linux would normally serialize the creates on a directory (via the 
   * parent directory semaphore) to ensure that a create didn't fail with 
   * EEXIST.  However in a multinode environment we may perform a lookup 
   * on one node (thinking the file doesn't exist) yet a create is 
   * performed on a different node before linux can call the physical
   * file systems create.  We attempt to reconcile this case by marking
   * the fact that this happened and checking the FEXCL flag at gpfs_f_open()
   * to see if we should have failed this with EEXIST.
   */
  if (rc == EEXIST)
  {
    /* Make sure that this create call is part of the linux open call.  NFS
       and mknod calls create without an open, so check that this is not one
       of those calls. On the open call the open flags are available and if
       the FEXCL was on fail it with EEXIST. */
    int mode1;

    /* Skip if NFS create call. */
    if (cxiIsNFSThread())
      goto retExist;

    /* ??? if (sys_mknod call) goto xerror; */

    /* Do it only if trying to create a regular file. */
    if (((mode & S_IFMT) != 0) && !(mode & S_IFREG))
      goto retExist;

    setCred(&eCred); // rebuild since gpfsCreate may remap ids
    rc = gpfs_ops.gpfsLookup(privVfsP, (void *)diP, dcnP,
                             dentryP, (char *)dentryP->d_name.name,
                             (void **)&newInodeP, &cnP, &iNum, NULL,
                             &mode1, &eCred, (void **)&retP);
    if (rc == ENOENT)
      goto retry;
    if (!rc)
    {
      /* If the file that was found was a directory than return the
         return code that linux would have returned. */
      if (S_ISDIR(newInodeP->i_mode))
      {
        rc = EISDIR;
        goto retExist;
      }
      cnP->createRaceLoserThreadId = cxiGetThreadId();
    }
  }

retExist:
  if (rc)
  {
    d_drop(dentryP);
    goto xerror;
  }
  diP->i_sb->s_dirt = 1;

xerror:
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CREATE_EXIT,
         "gpfs_i_create exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
         newInodeP, iNum, iNum, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return -rc;
}

/* If this routine successfully finds the file, it should
 * add the dentry to the hash list with d_add() and return
 * null.  If a failure occurs then return non null and the
 * dentry will be dput() by the linux lfs layer
 */
struct dentry *
gpfs_i_lookup(struct inode *diP, struct dentry *dentryP
#if LINUX_KERNEL_VERSION >= 2060000
              , struct nameidata *ni
#endif
              )
{
  int code = 0;
  int rc = 0;
  struct dentry *retP = NULL;
  struct gpfsVfsData_t *privVfsP;
  ext_cred_t eCred;
  cxiNode_t *dcnP;
  cxiMode_t mode = 0;
  cxiIno_t iNum = (cxiIno_t)-1;
  cxiNode_t *cnP = NULL;
  struct inode *newInodeP = NULL;

  VFS_STAT_START(lookupCall);
  ENTER(0);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_ENTER,
         "gpfs_i_lookup enter: diP 0x%lX dentryP 0x%lX name '%s'\n",
         diP, dentryP, dentryP->d_name.name);
  /* BKL is held at entry */

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  LOGASSERT(privVfsP != NULL);

  setCred(&eCred);

  if (!dcnP)
  {
    /* This can happen due to a bug in linux/fs/dcache.c (prune_dcache)
       where "count" entries are to be pruned, but the last one is
       found to be recently referenced.  When this happens, count is
       decremented, but the loop is not terminated.  The result is that
       it continues to prune entries past where it should (prunes
       everything).  If our patch for this is not applied, the result
       is a kernel failure as the cxiNode is referenced.  Checking
       here (and revalidate) allows us to reject the call instead. */

    TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_STALE,
           "cxiNode for inode 0x%lX (ino 0x%X) was FREED!\n",
           diP, diP->i_ino);

    /* Although we may like to know more about this inode, it is not
     * ok to call PRINTINODE(iP) here.
     */
    rc = ESTALE;
    code = 1;
    retP = (struct dentry *)ERR_PTR(-rc);
    goto xerror;
  }

  rc = gpfs_ops.gpfsLookup(privVfsP, (void *)diP, dcnP,
                           dentryP, (char *)dentryP->d_name.name,
                           (void **)&newInodeP, &cnP, &iNum, NULL,
                           &mode, &eCred, (void **)&retP);

  if (rc == 0)
  {
    DBGASSERT(cnP != NULL);
    DBGASSERT(iNum != -1);
    DBGASSERT(newInodeP != NULL);
    DBGASSERT(newInodeP->PRVINODE == cnP);
    DBGASSERT(cnP->osNodeP == (void *)newInodeP);
  }
  else if (rc != ENOENT) // internal failure
  {
    cxiErrorNFS(rc);
    code = 2;
    retP = (struct dentry *)ERR_PTR(-rc);
    goto xerror;
  }
  else if (diP->i_nlink == 0) // ENOENT but unlinked parent
  {
    /* This odd code is here because this function would normally 
     * exit with a negative dcache entry on ENOENT.  However if 
     * we allow a negative dcache entry in a directory thats been
     * deleted (but we're still sitting in it) then the d_count
     * will never go to zero and we'll strand any open file that
     * is associated with the parent directory.  If we drop the 
     * dentry and return the ENOENT then the VFS will dput the
     * dentry.  The scenario that gave us trouble was:
     *
     * NODE 1                               NODE 2
     * `rm -rf dirA`                        `rm -rf dirA`
     * ==========================================================
     * gpfs_f_open("dirA", ...)
     * gpfs_f_readdir(...)
     * [read "fileA", "fileB"]              gpfs_f_open("dirA", ...)
     *                                      gpfs_f_readdir(...)
     *                                      [read "fileA", "fileB"]
     *
     *                                      gpfs_i_lookup("fileA")
     *                                      gpfs_i_unlink("fileA")
     *                                      gpfs_s_delete_inode(fileA's inode)
     *                                      gpfs_i_lookup("fileB")
     *                                      gpfs_i_unlink("fileB")
     *                                      gpfs_s_delete_inode(fileB's inode)
     *                                      ...
     *                                      gpfs_i_rmdir("dirA", ...)
     *                                      gpfs_s_delete_inode(dirA's inode)
     * destroyOnLastClose=1 for dirA        <======
     * 
     * gpfs_i_lookup("fileA")
     *  [creates a negative dentry for fileA,
     *   increments dirA's reference count]
     * gpfs_i_lookup("fileB")
     *  [creates a negative dentry for fileB,
     *   increments dirA's reference count]
     */
    DBGASSERT(dentryP->d_inode == NULL);
    dentryP->d_op = NULL;
    d_drop(dentryP);

    code = 3;
    retP = (struct dentry *)ERR_PTR(-rc);
    goto xerror;
  }

  PRINTDENTRY(dentryP);

xerror:
  TRACE7(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_EXIT,
         "gpfs_i_lookup exit: new inode 0x%lX iNum %d (0x%X) cnP 0x%lX retP 0x%lX "
         "code %d rc %d\n", newInodeP, iNum, iNum, cnP, retP, code, rc);

  VFS_STAT_STOP;
  EXIT(0);
  return retP;
}

int
gpfs_i_link(struct dentry *oldDentryP, struct inode *diP,
            struct dentry *dentryP)
{
  int rc = 0;
  struct inode *iP = oldDentryP->d_inode;
  cxiNode_t *dcnP;
  cxiNode_t *cnP = NULL;
  struct gpfsVfsData_t *privVfsP;
  char *tnameP;
  ext_cred_t eCred;

  VFS_STAT_START(linkCall);
  ENTER(0);
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_LINK_ENTER,
         "gpfs_i_link enter: diP 0x%lX dentryP 0x%lX "
         "dentryP 0x%lX name '%s'\n", diP, oldDentryP, dentryP,
         dentryP->d_name.name);
  /* BKL is held at entry */

  cnP = VP_TO_CNP(iP);
  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  LOGASSERT(privVfsP != NULL);

  setCred(&eCred);
  rc = gpfs_ops.gpfsLink(privVfsP, cnP, dcnP,
                         dentryP, (char *)dentryP->d_name.name, &eCred);
  if (rc)
  {
    d_drop(dentryP);
    goto xerror;
  }
  iP->i_sb->s_dirt = 1;

xerror:
  PRINTINODE(iP);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LINK_EXIT,
         "gpfs_i_link exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return -rc;
}

int
gpfs_i_unlink(struct inode *diP, struct dentry *dentryP)
{
  int rc = 0;
  struct gpfsVfsData_t *privVfsP;
  struct inode *iP = dentryP->d_inode;
  cxiNode_t *dcnP;
  cxiNode_t *cnP;
  ext_cred_t eCred;
  struct dentry_operations *orig_d_opP;

  VFS_STAT_START(removeCall);
  ENTER(0);
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_UNLINK_ENTER,
         "gpfs_i_unlink enter: diP 0x%lX iP 0x%lX dentryP 0x%lX name '%s'\n",
         diP, iP, dentryP, dentryP->d_name.name);
  /* BKL is held at entry */

  cnP = VP_TO_CNP(iP);

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  LOGASSERT(privVfsP != NULL);

  /* Regarding dcache entry update: upon returning from gpfs_i_unlink, the VFS
     layer will turn the dentry into a valid, negative dcache entry by calling
     d_delete().  If another node then creates a new file with the same name,
     the BR token revoke for the directory block will invalidate the negative
     dcache entry.  However, there is a window between the gpfsRemove() and
     the d_delete(), where a BR token revoke would not recognize that it
     should invalidate the dcache entry, because d_delete() has not yet turned
     it into a negative dcache entry.  To fix this, we mark the dentry as
     "valid with d_delete pending"; the meaning of this state is "the dentry
     is still valid, but a BR token revoke should mark it as 'needing 
     revalidation', even if it does not (yet) look like a negative dcache 
     entry".  Note that we don't want to mark "valid with d_delete pending"
     entries as invalid in the BR revoke handler, because we don't know for 
     sure that the file is in fact going to be deleted.  The unlink operation
     may fail, for any number of reasons, and the dentry should not be marked
     as invalid prematurely.  It's safe to mark a dentry as 'needing 
     revalidation', however.  Ideally, we should swap d_op inside gpfsRemove 
     while we are holding the BR lock on the directory.  However, (1) there is
     local synchronization in the VFS (our caller is holding the i_sem 
     semaphore on the directory) that will prevent other threads from doing a 
     lookup or create that might change the state back to just plain "valid" 
     before the gpfsRemove has happened, and (2) a BR revoke that happens 
     before the gpfsRemove might unnecessarily mark the dentry as 'needing 
     revalidation'; this is sub-optimal, but it doesn't hurt.  Also see 
     comment in gpfs_i_rmdir. */
  orig_d_opP = dentryP->d_op;
  dentryP->d_op = &gpfs_dops_ddeletepending;

  setCred(&eCred);
  rc = gpfs_ops.gpfsRemove(privVfsP, cnP, dcnP, (char *)dentryP->d_name.name,
                           &eCred);
  if (rc)
  {
    d_drop(dentryP);
    if (dentryP->d_op == &gpfs_dops_ddeletepending)
      dentryP->d_op = orig_d_opP;
    goto xerror;
  }
  diP->i_sb->s_dirt = 1;

  /* d_delete will be called at VFS layer if rc == 0 */

xerror:
  PRINTINODE(iP);
  PRINTDENTRY(dentryP);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_UNLINK_EXIT,
         "gpfs_i_unlink exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return -rc;
}

int
gpfs_i_symlink(struct inode *diP, struct dentry *dentryP,
               const char *symlinkTargetP)
{
  int rc = 0;
  cxiNode_t *dcnP;
  cxiNode_t *cnP;
  cxiIno_t iNum = (cxiIno_t)-1;
  struct inode *newInodeP = NULL;
  struct gpfsVfsData_t *privVfsP;
  ext_cred_t eCred;

  VFS_STAT_START(symlinkCall);
  ENTER(0);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK1,
         "gpfs_i_symlink enter: iP 0x%lX dentryP 0x%lX symlinkTargetP '%s'\n",
         diP, dentryP, symlinkTargetP);
  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK2,
         "gpfs_i_symlink: newLinkName '%s'\n", dentryP->d_name.name);
  /* BKL is held at entry */

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  LOGASSERT(privVfsP != NULL);

  setCred(&eCred);
  rc = gpfs_ops.gpfsSymlink(privVfsP, dcnP, (void **)&newInodeP, &cnP,
                            &iNum, dentryP, (char *)dentryP->d_name.name,
                            (char *)symlinkTargetP, &eCred);
  if (rc == 0)
  {
    DBGASSERT(cnP != NULL);
    DBGASSERT(iNum != -1);
    DBGASSERT(newInodeP != NULL);
    DBGASSERT(newInodeP->PRVINODE == cnP);
    DBGASSERT(cnP->osNodeP == (void *)newInodeP);
  }
  else
  {
    d_drop(dentryP);
    goto xerror;
  }
  diP->i_sb->s_dirt = 1;

xerror:
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK_EXIT,
         "gpfs_i_symlink exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
         newInodeP, iNum, iNum, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return -rc;
}

int
gpfs_i_mkdir(struct inode *diP, struct dentry *dentryP, int mode)
{
  int rc = 0;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *dcnP;
  cxiNode_t *cnP;
  cxiMode_t umask;
  ext_cred_t eCred;
  cxiIno_t iNum = (cxiIno_t)-1;
  struct inode *newInodeP = NULL;
  
  VFS_STAT_START(mkdirCall);
  ENTER(0);
  umask = get_umask();  /* LFS should not apply umask and we may not */

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  LOGASSERT(privVfsP != NULL);

  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_MKDIR_ENTER,
         "gpfs_i_mkdir enter: diP 0x%lX mode 0x%X name '%s'\n",
         diP, mode, dentryP->d_name.name);
  /* BKL is held at entry */

  setCred(&eCred);
  rc = gpfs_ops.gpfsMkdir(privVfsP, dcnP, (void **)&newInodeP, &cnP, &iNum,
                          dentryP, (char *)dentryP->d_name.name, mode, umask,
                          &eCred);

  if (rc == 0)
  {
    DBGASSERT(cnP != NULL);
    DBGASSERT(iNum != -1);
    DBGASSERT(newInodeP != NULL);
    DBGASSERT(newInodeP->PRVINODE == cnP);
    DBGASSERT(cnP->osNodeP == (void *)newInodeP);
  }
  else
  {
    d_drop(dentryP);
    goto xerror;
  }
  diP->i_sb->s_dirt = 1;

xerror:
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKDIR_EXIT,
         "gpfs_i_mkdir exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
         newInodeP, iNum, iNum, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return -rc;
}

int
gpfs_i_rmdir(struct inode *diP, struct dentry *dentryP)
{
  int rc;
  struct inode *iP = dentryP->d_inode;
  cxiNode_t *dcnP;
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  ext_cred_t eCred;
  struct dentry_operations *orig_d_opP;

  VFS_STAT_START(rmdirCall);
  ENTER(0);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_RMDIR_ENTER,
         "gpfs_i_rmdir enter: diP 0x%lX iP 0x%lX name '%s'\n",
         diP, iP, dentryP->d_name.name);
  /* BKL is held at entry */

  cnP = VP_TO_CNP(iP);
  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  LOGASSERT(privVfsP != NULL);

  /* See comment in gpfs_i_unlink.  Note that Linux kernel processes 
     directory dentries a little differently from regular file
     dentries.  In particular, it doesn't appear that a successful
     rmdir call results in the removed directory dentry being turned
     into a valid negative dentry; the dentry just gets unhashed and
     recycled if it had no references at the time of rmdir.  If the
     dentry did have extra references, e.g. due to a process using the
     directory in question as cwd, the dentry is unhashed, but it
     remains a positive dentry pointing to the deleted inode, and will
     remain as such until the dentry ref count goes to zero, at which
     point the dentry is recycled.  So there's no apparent need to 
     mark directory dentries as 'needing revalidation' during BR token
     revoke (we do know that we need to do this for regular files).
     However, this particular aspect of Linux kernel operation is not
     guaranteed to always work in this fashion, so we might as well 
     try to stay on the safe side of things, and treat directories the
     same way as regular files.  It doesn't appear that marking a
     dentry as 'needing revalidation' has any ill effects besides extra
     cycles required for revalidation, and BR token revoke handler 
     racing with an unsuccessful gpfsRmdir is a rare enough event to
     tolerate this extra performance hit. */
  orig_d_opP = dentryP->d_op;
  dentryP->d_op = &gpfs_dops_ddeletepending;

  setCred(&eCred);
  rc = gpfs_ops.gpfsRmdir(privVfsP, cnP, dcnP, (char *)dentryP->d_name.name,
                          &eCred);
  if (rc)
  {
    if (rc == EEXIST)
      rc = ENOTEMPTY;
    if (dentryP->d_op == &gpfs_dops_ddeletepending)
      dentryP->d_op = orig_d_opP;
    /* d_drop(dentryP); */
    goto xerror;
  }
  diP->i_sb->s_dirt = 1;

  /* d_delete will be called at VFS layer if rc == 0 */
xerror:
  PRINTINODE(iP);
  PRINTDENTRY(dentryP);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_RMDIR_EXIT,
         "gpfs_i_rmdir exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return -rc;
}

int
#if LINUX_KERNEL_VERSION >= 2050000
gpfs_i_mknod(struct inode *diP, struct dentry *dentryP, int mode, dev_t rdev)
#else
gpfs_i_mknod(struct inode *diP, struct dentry *dentryP, int mode, int rdev)
#endif
{
  int rc = 0;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *dcnP;
  cxiNode_t *cnP;
  cxiIno_t iNum = (cxiIno_t)-1;
  struct inode *newInodeP = NULL;
  cxiMode_t umask = get_umask();
  ext_cred_t eCred;
  cxiDev32_t rdev32;

  VFS_STAT_START(mknodCall);
  ENTER(0);
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKNOD_ENTER,
         "gpfs_i_mknod enter: diP 0x%lX mode 0x%X rdev 0x%X name '%s'\n",
         diP, mode, (int)rdev, dentryP->d_name.name);
  /* BKL is held at entry */

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  LOGASSERT(privVfsP != NULL);

  setCred(&eCred);
  rdev32 = cxiDevToDev32(rdev);
  rc = gpfs_ops.gpfsMknod(privVfsP, dcnP, (void **)&newInodeP, &cnP,
                          &iNum, dentryP, (char *)dentryP->d_name.name,
                          mode, umask, (cxiDev_t)rdev32, &eCred);
  if (rc == 0)
  {
    DBGASSERT(cnP != NULL);
    DBGASSERT(iNum != -1);
    DBGASSERT(newInodeP != NULL);
    DBGASSERT(newInodeP->PRVINODE == cnP);
    DBGASSERT(cnP->osNodeP == (void *)newInodeP);
  }
  else
  {
    d_drop(dentryP);
    goto xerror;
  }
  diP->i_sb->s_dirt = 1;

  /* Set vector table for special files, gpfs will not get these operations.*/
#if LINUX_KERNEL_VERSION >= 2060000
  init_special_inode(newInodeP, newInodeP->i_mode, newInodeP->i_rdev);
#else
  init_special_inode(newInodeP, newInodeP->i_mode,
                     kdev_t_to_nr(newInodeP->i_rdev));
#endif

xerror:
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKNOD_EXIT,
         "gpfs_i_mknod exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
         newInodeP, iNum, iNum, rc);
  
  VFS_STAT_STOP;
  EXIT(0);
  return -rc;
}

int
gpfs_i_rename(struct inode *diP, struct dentry *dentryP,
              struct inode *tdiP, struct dentry *tDentryP)
{
  int rc = 0;
  struct inode *iP = dentryP->d_inode;
  struct inode *tiP = tDentryP->d_inode;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *sourceCNP, *sourceDirCNP, *targetCNP, *targetDirCNP;
  ext_cred_t eCred;
  
  VFS_STAT_START(renameCall);
  ENTER(0);
  TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_RENAME_1,
         "gpfs_i_rename enter: iP 0x%lX dvP 0x%lX name '%s'"
         " tiP 0x%lX tdiP 0x%lX new name '%s'\n",
         iP, diP, dentryP->d_name.name, tiP, tdiP, tDentryP->d_name.name);
  /* BKL is held at entry */

  /* Do not allow simple rename across mount points */
  if (diP->i_sb != tdiP->i_sb)
  {
    rc = EXDEV;
    goto xerror;
  }

  sourceCNP = VP_TO_CNP(iP);
  sourceDirCNP = VP_TO_CNP(diP);

  targetCNP = (tiP != NULL) ? VP_TO_CNP(tiP) : NULL;
  targetDirCNP = VP_TO_CNP(tdiP);

  privVfsP = VP_TO_PVP(iP);
  LOGASSERT(privVfsP != NULL);

  setCred(&eCred);
  rc = gpfs_ops.gpfsRename(privVfsP, sourceCNP, sourceDirCNP,
                           (char *)dentryP->d_name.name, targetCNP,
                           targetDirCNP, (char *)tDentryP->d_name.name,
                           &eCred);
  if (rc == 0)
  {
    gpfs_i_getattr_internal(iP);
    gpfs_i_getattr_internal(diP);

    if (tiP)
      gpfs_i_getattr_internal(tiP);

    if (tdiP != diP)
      gpfs_i_getattr_internal(tdiP);

    diP->i_sb->s_dirt = 1;
  }

xerror:
  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_RENAME_EXIT,
         "gpfs_i_rename exit: iP 0x%lX rc %d\n", iP, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return -rc;
}

int
gpfs_i_readlink(struct dentry *dentryP, char *bufP, int buflen)
{
  int rc = 0;
  Boolean gotBKL = false;
  struct cxiUio_t tmpUio;
  cxiIovec_t tmpIovec;
  struct inode *iP = dentryP->d_inode;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *cnP;
  
  VFS_STAT_START(readlinkCall);
  ENTER(0);
  TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_READLINK_ENTER,
         "gpfs_i_readlink enter: dentryP 0x%lX bufP 0x%lX len %d "
           "iP 0x%lX name '%s'\n",
         dentryP, bufP, buflen, iP, dentryP->d_name.name);

  /* BKL is not held at entry, except for NFS calls */
  TraceBKL();
  if (current->lock_depth >= 0)  /* kernel lock is held by me */
  {
    gotBKL = true;
    unlock_kernel();
  }

  cnP = VP_TO_CNP(iP);
  privVfsP = VP_TO_PVP(iP);
  LOGASSERT(privVfsP != NULL);

  tmpIovec.iov_base = bufP;          /* base memory address                   */
  tmpIovec.iov_len = buflen;         /* length of transfer for this area      */

  tmpUio.uio_iov = &tmpIovec;        /* ptr to array of iovec structs         */
  tmpUio.uio_iovcnt = 1;             /* #iovec elements left to be processed  */
  tmpUio.uio_iovdcnt = 0;            /* #iovec elements already processed     */
  tmpUio.uio_offset = 0;             /* byte offset in file/dev to read/write */
  tmpUio.uio_resid = buflen;         /* #bytes left in data area              */
  tmpUio.uio_segflg = UIO_USERSPACE; /* copy to user space buffer             */
  tmpUio.uio_fmode = 0;              /* file modes from open file struct      */

  rc = gpfs_ops.gpfsReadlink(privVfsP, cnP, &tmpUio);

  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READLINK_EXIT,
        "gpfs_i_readlink exit: iP 0x%lX uio_resid %ld offset %d rc %d\n",
         iP, tmpUio.uio_resid, tmpUio.uio_offset, rc);

  VFS_STAT_STOP;

  if (gotBKL)        /* If held kernel lock on entry then reacquire it */
    lock_kernel();

  if (rc)
    cxiErrorNFS(rc);

  EXIT(0);
  if (rc)
    return (-rc);

  return (buflen - tmpUio.uio_resid);
}

#if LINUX_KERNEL_VERSION >= 2061600
void* gpfs_i_follow_link(struct dentry *dentry, struct nameidata *nd)
#else
int gpfs_i_follow_link(struct dentry *dentry, struct nameidata *nd)
#endif
{
  int rc;
  Boolean gotBKL = false;
  struct cxiUio_t tmpUio;
  cxiIovec_t tmpIovec;
  struct inode *iP = dentry->d_inode;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *cnP;
  char *buf = NULL;

  ENTER(0);
  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_FOLLOW_LINK_ENTER,
         "gpfs_i_follow_link enter: inode 0x%lX name '%s'\n",
         dentry->d_inode, dentry->d_name.name);

  /* BKL is not held at entry, except for NFS calls */
  TraceBKL();
  if (current->lock_depth >= 0)  /* kernel lock is held by me */
  {
    gotBKL = true;
    unlock_kernel();
  }

  /* Allocate a temporary buffer to hold the symlink contents */
  buf = cxiMallocPinned(CXI_PATH_MAX+1);
  if (buf == NULL)
  {
    rc = -ENOMEM;
    goto xerror;
  }

  cnP = VP_TO_CNP(iP);
  privVfsP = VP_TO_PVP(iP);
  LOGASSERT(privVfsP != NULL);

  tmpIovec.iov_base = buf;          /* base memory address                   */
  tmpIovec.iov_len = PATH_MAX;      /* length of transfer for this area      */

  tmpUio.uio_iov = &tmpIovec;       /* ptr to array of iovec structs         */
  tmpUio.uio_iovcnt = 1;            /* #iovec elements left to be processed  */
  tmpUio.uio_iovdcnt = 0;           /* #iovec elements already processed     */
  tmpUio.uio_offset = 0;            /* byte offset in file/dev to read/write */
  tmpUio.uio_resid = PATH_MAX;      /* #bytes left in data area              */
  tmpUio.uio_segflg = UIO_SYSSPACE; /* copy to kernel space buffer           */
  tmpUio.uio_fmode = 0;             /* file modes from open file struct      */

  /* Read symlink contents */
  rc = gpfs_ops.gpfsReadlink(privVfsP, cnP, &tmpUio);
  if (rc)
  {
    cxiErrorNFS(rc);
    rc = -rc;
    goto xerror;
  }
  
  /* set end of string */
  buf[PATH_MAX - tmpUio.uio_resid] = 0;

  TRACE2(TRACE_VNODE, 2, TRCID_LINUXOPS_FOLLOW_LINK_1,
         "gpfs_i_follow_link readlink rc %d data '%s'\n", rc, buf);

  VFS_FOLLOW_LINK(rc, nd, buf);

exit:
  if (buf)
    cxiFreePinned(buf);

  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_FOLLOW_LINK_2,
         "gpfs_i_follow_link exit: inode 0x%lX rc %d\n",
         dentry->d_inode, rc);

  if (gotBKL)        /* If held kernel lock on entry then reacquire it */
    lock_kernel();

  EXIT(0);

#if LINUX_KERNEL_VERSION >= 2061600
  return NULL;  /* no cookie */
#else
  return rc;
#endif

xerror:
  path_release(nd);
  goto exit;

}

#ifdef HAS_IOP_PUT_LINK

#if LINUX_KERNEL_VERSION >= 2061600
void gpfs_i_put_link(struct dentry *dentry, struct nameidata *nd, void* cookie)
#else
void gpfs_i_put_link(struct dentry *dentry, struct nameidata *nd)
#endif
{
  char *buf = nd_get_link(nd);
  TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_PUTLINK,
        "gpfs_i_put_link dentry 0x%lX nd 0x%lX buf 0x%lX\n", dentry, nd, 
        !IS_ERR(buf)? buf : NULL);
  if (!IS_ERR(buf))
     cxiFreePinned(buf);
}

#endif /* HAS_IOP_PUT_LINK */

int
gpfs_i_bmap(struct inode *iP, int fragment)
{
  ENTER(0);
  TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_BMAP,
         "gpfs_i_bmap: rc ENOSYS\n");
  TraceBKL();
  EXIT(0);
  return -ENOSYS;
}

void
gpfs_i_truncate(struct inode *iP)
{
  ENTER(0);
  /* Nothing to do since the file size was updated on the notify_change
   * call which preceeded this call
   */
  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_TRUNCATE,
         "gpfs_i_truncate: inode 0x%lX\n", iP);
  TraceBKL();
  EXIT(0);
}

int
gpfs_i_permission(struct inode *iP, int mode
#if LINUX_KERNEL_VERSION >= 2060000
                  , struct nameidata *ni
#endif
                  )
{
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  ext_cred_t eCred;
  int rc = 0;

  VFS_STAT_START(accessCall);
  ENTER(0);

  /* BKL is held at entry */

  cnP = VP_TO_CNP(iP);

  TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_ACCESS_ENTER,
         "gpfs_i_permission enter: iP 0x%lX mode 0x%X uid %d gid %d "
         "i_mode 0x%X i_xinfo 0x%X", iP, mode, current->fsuid, 
         current->fsgid, iP->i_mode, cnP->xinfo);

  privVfsP = VP_TO_PVP(iP);
  LOGASSERT(privVfsP != NULL);

  if (mode)        /* call permission check only if got access mode */
  {
    setCred(&eCred);
    rc = gpfs_ops.gpfsAccess(privVfsP, cnP, mode, ACC_SELF, &eCred);
  }

xerror:
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_ACCESS_EXIT,
         "gpfs_i_permission exit: iP 0x%lX std %d dir std %d rc %d",
         iP, iP->i_op == &gpfs_iops_stdperm, iP->i_op == &gpfs_dir_iops_stdperm,
         rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return -rc;
}

int
gpfs_i_smap(struct inode *iP, int sector)
{
  ENTER(0);
  TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_SMAP,
         "gpfs_i_smap: rc ENOSYS\n");
  TraceBKL();
  EXIT(0);
  return -ENOSYS;
}

int
gpfs_i_updatepage(struct file *fP, struct page *pageP, const char *bufP,
                  unsigned long offset, uint count, int sync)
{
  ENTER(0);
  TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_UPDATEPAGE,
         "gpfs_i_updatepage: rc ENOSYS\n");
  TraceBKL();
  EXIT(0);
  return -ENOSYS;
}

int
gpfs_i_revalidate(struct dentry *dentryP)
{
  int rc;
  int code = 0;
  struct inode *iP = dentryP->d_inode;
  cxiNode_t *cnP;
  cxiVattr_t vattr;
  struct gpfsVfsData_t *privVfsP;

  ENTER(0);
  VFS_INC(revalidateCount);
  TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_REVALIDATE_ENTER,
         "gpfs_i_revalidate enter: dentryP 0x%lX iP 0x%lX ino 0x%X name '%s'\n",
         dentryP, dentryP->d_inode, 
         (iP) ? iP->i_ino : -1,  dentryP->d_name.name);
  /* BKL is usually not held, but seems to be held when coming here as
     part of setting an ACL */

  if (iP == NULL)
  {
    code = 1;
    rc = ENOENT;
    goto xerror;
  }
  cnP = VP_TO_CNP(iP);

  if (!cnP)
  {
    /* This can happen due to a bug in linux/fs/dcache.c (prune_dcache)
       where "count" entries are to be pruned, but the last one is
       found to be recently referenced.  When this happens, count is
       decremented, but the loop is not terminated.  The result is that
       it continues to prune entries past where it should (prunes
       everything).  If our patch for this is not applied, the result
       is a kernel failure as the cxiNode is referenced.  Checking
       here (and lookup) allows us to reject the call instead. */
      
    TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REVALIDATE_STALE,
           "gpfs_i_revalidate: cxiNode for iP 0x%lX (ino %d) was FREED!\n",
           iP, iP->i_ino);

    /* Although we may like to know more about this inode, it is not
     * ok to call PRINTINODE(iP) here.
     */

    rc = ESTALE;
    code = 2;
    goto xerror;
  }

  if ((cnP->icValid & CXI_IC_STAT) == CXI_IC_STAT)
  {
    rc = 0;
    code = 3;
    goto xerror;
  }

  privVfsP = VP_TO_PVP(iP);
  LOGASSERT(privVfsP != NULL);

  /* This has the effect of calling us back under a lock and 
   * setting the inode attributes at the OS level (since this 
   * operating system caches this info in the vfs layer)
   */
  rc = gpfs_ops.gpfsGetattr(privVfsP, cnP, &vattr, false);
  PRINTINODE(iP);

#if 0
  /* Delay briefly to give token revoke races a chance to happen, if there
     are any.  Time delay is in jiffies (10ms). */
#  define howLong 5
  TRACE1(TRACE_VNODE, 4, TRCID_REVAL_DELAY,
         "gpfs_i_revalidate: begin delay %d\n", howLong);
  current->state = TASK_INTERRUPTIBLE;
  schedule_timeout(howLong);
  TRACE1(TRACE_VNODE, 14, TRCID_REVAL_DELAY_END,
         "gpfs_i_revalidate: end delay %d\n", howLong);
#endif

xerror:
  TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_REVALIDATE_EXIT,
         "gpfs_i_revalidate exit: dentry 0x%lX code %d rc %d\n",
         dentryP, code, rc);

  if (rc)
    cxiErrorNFS(rc);

  EXIT(0);
  return -rc;
}

int
gpfs_i_setattr(struct dentry *dentryP, struct iattr *iattrP)
{
  int rc;

  VFS_STAT_START(setattrCall);
  ENTER(0);
  rc = gpfs_i_setattr_internal(dentryP->d_inode, iattrP);

  VFS_STAT_STOP;
  EXIT(0);
  return -rc;
}

int
gpfs_i_setattr_internal(struct inode *iP, struct iattr *aP)
{
  int rc = 0;
  int code = 0;
  long arg1;      /* must be large enough on 64bit to contain */
  long arg2;      /*   either a pointer or integer            */
  long arg3;
  cxiTimeStruc_t atime, mtime, ctime;
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  ext_cred_t eCred;
  unsigned int ia_valid;

  ENTER(0);
  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_SETATTR_ENTER,
         "gpfs_i_setattr enter: iP 0x%lX ia_valid 0x%X\n", iP, aP->ia_valid);
  /* ?? Callers of this are inconsistent about whether the BKL is held */

  cnP = VP_TO_CNP(iP);
  privVfsP = VP_TO_PVP(iP);
  LOGASSERT(privVfsP != NULL);

  ia_valid = aP->ia_valid;

  /* Change file size */
  if (ia_valid & ATTR_SIZE)
  {
    arg1 = (long)&aP->ia_size;
    arg2 = 0;
    arg3 = 0;

    /* call gpfsSetattr, unless we know that new size is the same */
    if (!(cnP->icValid & CXI_IC_ATTR) ||
        ((struct inode *)cnP->osNodeP)->i_size != aP->ia_size)
    {
      setCred(&eCred); // rebuild since gpfsSetattr may remap ids
      rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_SIZE, arg1, arg2, arg3,
                                &eCred);
      if (rc != 0)
      {
        code = 1;
        goto xerror;
      }

      /* gpfsSetattr(... V_SIZE ...) will have updated ctime and mtime.
         No need to do this again. */
      ia_valid &= ~(ATTR_MTIME | ATTR_CTIME);
    }
  }

  /* Change file mode */
  if (ia_valid & ATTR_MODE)
  {
    arg1 = (long)aP->ia_mode;
    arg2 = 0;
    arg3 = 0;

    setCred(&eCred); // rebuild since gpfsSetattr may remap ids
    rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_MODE, arg1, arg2, arg3, &eCred);
    if (rc != 0)
    {
      code = 2;
      goto xerror;
    }
  }

  /* Change uid or gid */
  if (ia_valid & (ATTR_UID | ATTR_GID))
  {
    arg1 = 0;
    arg2 = 0;
    arg3 = 0;

    if (ia_valid & ATTR_UID)
      arg2 = (long)aP->ia_uid;
    else
      arg1 |= T_OWNER_AS_IS;

    if (ia_valid & ATTR_GID)
      arg3 = (long)aP->ia_gid;
    else
      arg1 |= T_GROUP_AS_IS;

    setCred(&eCred); // rebuild since gpfsSetattr may remap ids
    rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_OWN, arg1, arg2, arg3, &eCred);
    if (rc != 0)
    {
      code = 3;
      goto xerror;
    }
  }

  /* Change access, modification, or change time */
  if (ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME))
  {
    arg1 = 0;
    arg2 = 0;
    arg3 = 0;
   
    if (ia_valid & ATTR_ATIME)
    {
      CXITIME_FROM_INODETIME(atime, aP->ia_atime);
      arg1 = (long)&atime;
    }
    if (ia_valid & ATTR_MTIME)
    {
      CXITIME_FROM_INODETIME(mtime, aP->ia_mtime);
      arg2 = (long)&mtime;
    }
    if (ia_valid & ATTR_CTIME)
    {
      CXITIME_FROM_INODETIME(ctime, aP->ia_ctime);
      arg3 = (long)&ctime;
    }
    setCred(&eCred); // rebuild since gpfsSetattr may remap ids
    rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_STIME, arg1, arg2, arg3, &eCred);
    if (rc != 0)
    {
      code = 4;
      goto xerror;
    }
  }

xerror:

  if (rc == 0)
  {
    /* For NFS we might need to write the inode but the check will be done 
     * in gpfsSyncNFS(). 
     */
    if (cxiAllowNFSFsync())
    {
      setCred(&eCred); // rebuild since gpfsSetattr may remap ids
      rc = gpfs_ops.gpfsSyncNFS(privVfsP, cnP, 0, &eCred);
    }

    iP->i_sb->s_dirt = 1;
  }
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_SETATTR_EXIT,
         "gpfs_i_setattr exit: iP 0x%lX code %d rc %d\n", iP, code, rc);

  if (rc)
    cxiErrorNFS(rc);

  EXIT(0);
  return rc;
}

#if LINUX_KERNEL_VERSION >= 2050000
int
gpfs_i_getattr(struct vfsmount *mntP, struct dentry *dentryP, 
               struct kstat *kstatP)
#else
int
gpfs_i_getattr(struct dentry *dentryP, struct iattr *iattrP)
#endif
{
  int rc;
  struct inode *iP = dentryP->d_inode;
  cxiNode_t *cnP;

  VFS_STAT_START(getattrCall);
  ENTER(0);

  cnP = VP_TO_CNP(iP);

  if (cnP && ((cnP->icValid & CXI_IC_STAT) == CXI_IC_STAT)) /* attr are vaild */
    rc = 0;
  else
    rc = gpfs_i_getattr_internal(iP);

  if (!rc)
#if LINUX_KERNEL_VERSION >= 2050000
    generic_fillattr(iP, kstatP);
#else
    getIattr(iP, iattrP);
#endif
  else
    rc = -rc;

  VFS_STAT_STOP;
  EXIT(0);
  return rc;
}

int
gpfs_i_getattr_internal(struct inode *iP)
{
  int rc = 0;
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  cxiVattr_t vattr;

  ENTER(0);
  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_GETATTR_ENTER,
         "gpfs_i_getattr enter: iP 0x%lX\n", iP);
  /* BKL is held at entry */

  privVfsP = VP_TO_PVP(iP);
  LOGASSERT(privVfsP != NULL);
  cnP = VP_TO_CNP(iP);

  /* This has the effect of calling us back under a lock and 
   * setting the inode attributes at the OS level (since this 
   * operating system caches this info in the vfs layer)
   */
  rc = gpfs_ops.gpfsGetattr(privVfsP, cnP, &vattr, false);
  PRINTINODE(iP);

  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETATTR_EXIT,
         "gpfs_i_getattr exit: iP 0x%lX rc %d\n", iP, rc);

  if (rc)
    cxiErrorNFS(rc);

  EXIT(0);
  return rc;
}

#if LINUX_KERNEL_VERSION > 2060000
#include <cxiAclUser.h>

#define XATTR_SECURITY_PREFIX "security."
#define XATTR_TRUSTED_PREFIX "trusted."
#define XATTR_USER_PREFIX "user."
#define XATTR_NAME_ACL_ACCESS	"system.posix_acl_access"
#define XATTR_NAME_ACL_DEFAULT	"system.posix_acl_default"

static const char *
test_prefix(const char *name, const char *prefix)
{
  while (*prefix && *name == *prefix) {
    name++;
    prefix++;
  }
  return *prefix ? NULL : name;
}

/*
 * Inode operation getxattr()
 *
 */
ssize_t
gpfs_i_getxattr(struct dentry *dentry, const char *name, void *buf,
                size_t buf_size)
{
  int rc;
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  struct tsxattr xattr;
  struct tsxattrs xattrs;
  ext_cred_t eCred;
  void *argP = &xattrs;
  int flags = 0;
  struct inode *iP = dentry->d_inode;
  mm_segment_t oldfs;
  const char *n;

  ENTER(0);
  VFS_STAT_START(getxattrCall);

  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_ENTER,
         "gpfs_i_getxattr enter: iP 0x%lX name %s buf 0x%lX size %d\n",
         iP, (name) ? name : "NULL", buf, buf_size);

  if (iP == NULL)
  {
    rc = ENOENT;
    goto xerror;
  }

#ifdef CONFIG_FS_POSIX_ACL
  if (n = test_prefix(name, XATTR_NAME_ACL_ACCESS)) {
    if (n && (strcmp(n, "") != 0)) {
      rc = EINVAL;
      goto xerror;
    }
    rc = gpfs_get_posix_acl(dentry, ACL_TYPE_ACCESS, buf, buf_size);
    goto xerror2;
  }
  if (S_ISDIR(iP->i_mode))
  {
    if (n = test_prefix(name, XATTR_NAME_ACL_DEFAULT)) {
      if (n && (strcmp(n, "") != 0)) {
        rc = EINVAL;
        goto xerror;
      }
      rc = gpfs_get_posix_acl(dentry, ACL_TYPE_DEFAULT, buf, buf_size);
      goto xerror2;
    }
  }
#endif
  if (n = test_prefix(name, XATTR_SECURITY_PREFIX)) {
    if (n && (strcmp(n, "") == 0)) {
      rc = EINVAL;
      goto xerror;
    }
    goto xattr;
  }
  if (n = test_prefix(name, XATTR_TRUSTED_PREFIX)) {
    if (n && (strcmp(n, "") == 0)) {
      rc = EINVAL;
      goto xerror;
    }
    if (!capable(CAP_SYS_ADMIN)) {
      rc = EPERM;
      goto xerror;
    }
    goto xattr;
  }
  if (n = test_prefix(name, XATTR_USER_PREFIX)) {
    if (n && (strcmp(n, "") == 0)) {
      rc = EINVAL;
      goto xerror;
    }
    goto xattr;
  }
  rc = EOPNOTSUPP;
  goto xerror;

xattr:
  setCred(&eCred);
  xattrs.appId = 3;       // application id GPFS_ATTR_INTERNAL_APPL_ID
  xattrs.nattrs = 1;      // no of attributes to get or set
  xattrs.attrs = &xattr;  // attributes to get or set

  xattr.keyP = (char*) name;        // attribute key
  xattr.keyLen = strlen(name) + 1;  // key length
  xattr.valueP = buf;               // attribute value
  xattr.valueLen = buf_size;        // length of attribute value

  privVfsP = VP_TO_PVP(iP);
  LOGASSERT(privVfsP != NULL);
  cnP = VP_TO_CNP(iP);

  oldfs = get_fs();
  set_fs(get_ds());

  rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, GET_XATTR, argP,
                          NULL, &eCred);

  set_fs(oldfs);
  if (!rc)
  {
    TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_EXIT0,
           "gpfs_i_getxattr exit: iP 0x%lX len %d\n", iP, xattr.valueLen);
    VFS_STAT_STOP;
    EXIT(0);
    if (xattr.valueLen < 0)
      rc = ENODATA;
    else
      return (xattr.valueLen);
  }

xerror:
  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_EXIT,
         "gpfs_i_getxattr exit: iP 0x%lX rc %d\n", iP, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return (-rc);

xerror2:
  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETEXTATTR_EXIT2,
         "gpfs_i_getxattr exit2: iP 0x%lX rc %d\n", iP, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return (rc);
}

/*
 * Inode operation setxattr()
 *
 */
int
gpfs_i_setxattr(struct dentry *dentry, const char *name, const void *buf,
                size_t buf_size, int ext_flags)
{
  int rc;
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  struct tsxattr xattr;
  struct tsxattrs xattrs;
  ext_cred_t eCred;
  void *argP = &xattrs;
  int flags = 0;
  struct inode *iP = dentry->d_inode;
  mm_segment_t oldfs;
  const char *n;

  ENTER(0);
  VFS_STAT_START(setxattrCall);

  TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_SETEXTATTR_ENTER,
         "gpfs_i_setxattr enter: iP 0x%lX name %s buf 0x%lX size %d flags 0x%X\n",
         iP, (name) ? name : "NULL", buf, buf_size, ext_flags);

  if (iP == NULL)
  {
    rc = ENOENT;
    goto xerror;
  }

#ifdef CONFIG_FS_POSIX_ACL
  if (n = test_prefix(name, XATTR_NAME_ACL_ACCESS)) {
    if (n && (strcmp(n, "") != 0)) {
      rc = EINVAL;
      goto xerror;
    }
    if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
      return EPERM;
    rc = gpfs_set_posix_acl(dentry, ACL_TYPE_ACCESS, buf, buf_size);
    goto xerror;
  }
  if (S_ISDIR(iP->i_mode))
  {
    if (n = test_prefix(name, XATTR_NAME_ACL_DEFAULT)) {
      if (n && (strcmp(n, "") != 0)) {
        rc = EINVAL;
        goto xerror;
      }
      if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
        return EPERM;
      rc = gpfs_set_posix_acl(dentry, ACL_TYPE_DEFAULT, buf, buf_size);
      goto xerror;
    }
  }
#endif
  if (n = test_prefix(name, XATTR_SECURITY_PREFIX)) {
    if (n && (strcmp(n, "") == 0)) {
      rc = EINVAL;
      goto xerror;
    }
    goto xattr;
  }
  if (n = test_prefix(name, XATTR_TRUSTED_PREFIX)) {
    if (n && (strcmp(n, "") == 0)) {
      rc = EINVAL;
      goto xerror;
    }
    if (!capable(CAP_SYS_ADMIN)) {
      rc = EPERM;
      goto xerror;
    }
    goto xattr;
  }
  if (n = test_prefix(name, XATTR_USER_PREFIX)) {
    if (n && (strcmp(n, "") == 0)) {
      rc = EINVAL;
      goto xerror;
    }
    goto xattr;
  }
  rc = EOPNOTSUPP;
  goto xerror;

xattr:
  setCred(&eCred);
  xattrs.appId = 3;       // application id GPFS_ATTR_INTERNAL_APPL_ID
  xattrs.nattrs = 1;      // no of attributes to get or set
  xattrs.attrs = &xattr;  // attributes to get or set

  xattr.keyP = (char*) name;            // attribute key
  xattr.keyLen = strlen(name) + 1;      // key length
  xattr.valueP = (char *)buf;           // attribute value
  xattr.valueLen = buf_size;            // length of attribute value

  privVfsP = VP_TO_PVP(iP);
  LOGASSERT(privVfsP != NULL);
  cnP = VP_TO_CNP(iP);

  oldfs = get_fs();
  set_fs(get_ds());

  rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, SET_XATTR, argP,
                          NULL, &eCred);
  set_fs(oldfs);
xerror:
  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_SETEXTATTR_EXIT,
         "gpfs_i_setxattr exit: iP 0x%lX rc %d\n", iP, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return (-rc);
}

/*
 * Inode operation listxattr()
 *
 * Copy a list of attribute names into the buffer
 * provided, or compute the buffer size required.
 * Buffer is NULL to compute the size of the buffer required.
 *
 * Returns a negative error number on failure, or the number of bytes
 * used / required on success.
 */
ssize_t
gpfs_i_listxattr(struct dentry *dentry, char *buf, size_t buf_size)
{
  int rc;
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  struct tsxattr xattr;
  struct tsxattrs xattrs;
  ext_cred_t eCred;
  void *argP = &xattrs;
  int flags = 0;
  struct inode *iP = dentry->d_inode;
  mm_segment_t oldfs;

  ENTER(0);
  VFS_STAT_START(listxattrCall);

  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LISTXATTR_IN,
         "gpfs_i_listxattr enter: iP 0x%lX buf 0x%lX buf_size %d\n",
          iP, buf, buf_size);


  if (iP == NULL)
  {
    rc = ENOENT;
    goto xerror;
  }
  setCred(&eCred);
  xattrs.appId = 3;       // application id GPFS_ATTR_INTERNAL_APPL_ID
  xattrs.nattrs = 0;      // get all attribute name
  xattrs.attrs = &xattr;  // attributes to get or set

  xattr.keyP = NULL;            // attribute key
  xattr.keyLen = 0;             // key length
  xattr.valueP = buf;           // attribute value
  xattr.valueLen = buf_size;    // length of attribute value

  privVfsP = VP_TO_PVP(iP);
  LOGASSERT(privVfsP != NULL);
  cnP = VP_TO_CNP(iP);

  oldfs = get_fs();
  set_fs(get_ds());

  /* which names can we show ??? */
  rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, LIST_XATTR, argP,
                          NULL, &eCred);

  set_fs(oldfs);
  if (!rc)
  {
    TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_LISTXTATTR_EXIT0,
           "gpfs_i_listxattr exit: iP 0x%lX len %d\n", iP, xattr.valueLen);
    VFS_STAT_STOP;
    EXIT(0);
    return (xattr.valueLen);
  }

xerror:
  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_LISTXTATTR_EXIT,
         "gpfs_i_listxattr exit: iP 0x%lX rc %d\n", iP, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return (-rc);
}

/*
 * Inode operation removexattr()
 *
 */
int
gpfs_i_removexattr(struct dentry *dentry, const char *name)
{
  int rc;
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  struct tsxattr xattr;
  struct tsxattrs xattrs;
  ext_cred_t eCred;
  void *argP = &xattrs;
  int flags = 0;
  struct inode *iP = dentry->d_inode;
  mm_segment_t oldfs;
  const char *n;

  ENTER(0);
  VFS_STAT_START(removexattrCall);

  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REMOVEXATTR_IN,
         "gpfs_i_removexattr enter: iP 0x%lX name %s\n", iP, (name) ? name : "NULL");

  if (iP == NULL)
  {
    rc = ENOENT;
    goto xerror;
  }
#ifdef CONFIG_FS_POSIX_ACL
  if (n = test_prefix(name, XATTR_NAME_ACL_ACCESS)) {
    if (n && (strcmp(n, "") != 0)) {
      rc = EINVAL;
      goto xerror;
    }
    if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
      return EPERM;
    rc = gpfs_set_posix_acl(dentry, ACL_TYPE_ACCESS, NULL, -1);
    goto xerror;
  }
  if (S_ISDIR(iP->i_mode))
  {
    if (n = test_prefix(name, XATTR_NAME_ACL_DEFAULT)) {
      if (n && (strcmp(n, "") != 0)) {
        rc = EINVAL;
        goto xerror;
      }
      if ((current->fsuid != iP->i_uid) && !capable(CAP_FOWNER))
        return EPERM;
      rc = gpfs_set_posix_acl(dentry, ACL_TYPE_DEFAULT, NULL, -1);
      goto xerror;
    }
  }
#endif
  if (n = test_prefix(name, XATTR_SECURITY_PREFIX)) {
    if (n && (strcmp(n, "") == 0)) {
      rc = EINVAL;
      goto xerror;
    }
    goto xattr;
  }
  if (n = test_prefix(name, XATTR_TRUSTED_PREFIX)) {
    if (n && (strcmp(n, "") == 0)) {
      rc = EINVAL;
      goto xerror;
    }
    if (!capable(CAP_SYS_ADMIN)) {
      rc = EPERM;
      goto xerror;
    }
    goto xattr;
  }
  if (n = test_prefix(name, XATTR_USER_PREFIX)) {
    if (n && (strcmp(n, "") == 0)) {
      rc = EINVAL;
      goto xerror;
    }
    goto xattr;
  }
  rc = EOPNOTSUPP;
  goto xerror;

xattr:
  setCred(&eCred);
  xattrs.appId = 3;       // application id GPFS_ATTR_INTERNAL_APPL_ID
  xattrs.nattrs = 1;      // no of attributes to get or set
  xattrs.attrs = &xattr;  // attributes to delete

  xattr.keyP = (char*) name;            // attribute key
  xattr.keyLen = strlen(name) + 1;      // key length
  xattr.valueP = NULL;                  // attribute value
  xattr.valueLen = -1;                  // length < zero means delete

  privVfsP = VP_TO_PVP(iP);
  LOGASSERT(privVfsP != NULL);
  cnP = VP_TO_CNP(iP);

  oldfs = get_fs();
  set_fs(get_ds());

  rc = gpfs_ops.gpfsFattr(privVfsP, cnP, NULL, flags, SET_XATTR, argP,
                          NULL, &eCred);
  set_fs(oldfs);

xerror:
  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REMOVEXATTR_EXIT,
         "gpfs_i_removexattr exit: iP 0x%lX rc %d\n", iP, rc);

  if (rc)
    cxiErrorNFS(rc);

  VFS_STAT_STOP;
  EXIT(0);
  return (-rc);
}
#endif