source: gpfs_3.1_ker2.6.20/lpp/mmfs/src/gpl-linux/file.c @ 223

Last change on this file since 223 was 16, checked in by rock, 17 years ago
File size: 42.6 KB
Line 
1/***************************************************************************
2 *
3 * Copyright (C) 2001 International Business Machines
4 * All rights reserved.
5 *
6 * This file is part of the GPFS mmfslinux kernel module.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *  1. Redistributions of source code must retain the above copyright notice,
13 *     this list of conditions and the following disclaimer.
14 *  2. Redistributions in binary form must reproduce the above copyright
15 *     notice, this list of conditions and the following disclaimer in the
16 *     documentation and/or other materials provided with the distribution.
17 *  3. The name of the author may not be used to endorse or promote products
18 *     derived from this software without specific prior written
19 *     permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 *************************************************************************** */
33/* @(#)95       1.95.1.8  src/avs/fs/mmfs/ts/kernext/gpl-linux/file.c, mmfs, avs_rgpfs24, rgpfs24s012a 4/30/07 20:13:26 */
34/*
35 * File operations
36 *
37 * Contents:
38 *   gpfs_f_llseek
39 *   gpfs_f_readdir
40 *   gpfs_f_poll
41 *   gpfs_f_ioctl
42 *   gpfs_filemap_open
43 *   gpfs_filemap_close
44 *   gpfs_filemap_nopage       (in mmap.c)
45 *   gpfs_filemap_nopagedone   (in mmap.c)
46 *   gpfs_f_mmap
47 *   gpfs_f_open
48 *   gpfs_f_release
49 *   gpfs_f_fsync
50 *   gpfs_f_fasync
51 *   fsyncInternal
52 *   gpfs_f_lock
53 *   gpfs_f_flock
54 *   rdwrInternal
55 *   gpfs_f_read
56 *   gpfs_f_dir_read
57 *   gpfs_f_write
58 *   gpfs_f_readv
59 *   gpfs_f_writev
60 *   gpfs_f_cleanup
61 *
62 */
63
64#include <Shark-gpl.h>
65
66#include <linux/fs.h>
67#include <linux/errno.h>
68#include <linux/stat.h>
69#include <linux/fcntl.h>
70#ifdef MODULE
71#include <linux/module.h>
72#endif
73#include <linux/slab.h>
74#include <linux/smp_lock.h>
75#include <linux/mm.h>
76#include <linux/mman.h>
77#ifdef NFS4_CLUSTER
78#include <linux/nfs4.h>
79#endif /* NFS4_CLUSTER */
80#if LINUX_KERNEL_VERSION >= 2050000
81#include <linux/pagemap.h>
82#include <linux/page-flags.h>
83#endif
84
85#include <linux2gpfs.h>
86#ifdef P_NFS4
87#include <linux/sunrpc/svc.h>
88#include <linux/nfsd/nfsfh.h>
89#include <linux/nfsd/nfsd4_pnfs.h>
90#include <linux/nfsd/pnfsd.h>
91#endif
92#include <cxiTypes.h>
93#include <cxiSystem.h>
94#include <cxiMode.h>
95#include <cxi2gpfs.h>
96#include <cxiVFSStats.h>
97#include <cxiCred.h>
98#include <cxiIOBuffer.h>
99#include <cxiMmap.h>
100#include <Trace.h>
101#include <verdep.h>
102
103
104/* prototypes */
105static int fsyncInternal(struct file *fP);
106
107/* file_operations */
108
109loff_t
110gpfs_f_llseek(struct file *fP, loff_t offset, int origin)
111{
112  struct inode *iP = fP->f_dentry->d_inode;
113  loff_t rc = -EINVAL;
114
115  ENTER(0);
116  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LLSEEK_ENTER,
117         "gpfs_f_llseek enter: fP 0x%lX offset 0x%llX origin %d\n",
118         fP, offset, origin);
119  /* BKL is held at entry */
120
121  switch (origin)
122  {
123    case 2:
124      gpfs_i_getattr_internal(iP);
125      offset += iP->i_size;
126      break;
127
128    case 1:
129      offset += fP->f_pos;
130  }
131
132  if (offset >= 0)
133  {
134    rc = offset;
135    if (offset != fP->f_pos)
136    {
137      fP->f_pos = offset;
138#if LINUX_KERNEL_VERSION < 2050000
139      fP->f_reada = 0;
140#endif
141    }
142  }
143  else
144    cxiErrorNFS(rc);
145
146  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_LLSEEK_EXIT,
147         "gpfs_f_llseek exit: fP 0x%lX offset 0x%llX origin %d rc 0x%llX\n",
148         fP, offset, origin, rc);
149  EXIT(0);
150  return rc;
151}
152
153/* Save everything we need to make the OS-specific filldir call. */
154typedef struct {
155  offset_t  offset;
156  ino_t     ino;
157  int       namelen;
158  char      name[1];
159} fillDirEntry;
160
161/* gpfs_f_readdir provides a buffer for NFS_fillDir to place entries,
162 * and this structure to keep track of its use over successive calls.
163 */
164typedef struct {
165  fillDirEntry   *firstP;  /* first entry */
166  fillDirEntry   *endP;    /* buffer end  */
167  fillDirEntry   *bufferP; /* current location */
168} NFS_fillDirArgs;
169
170/* Return the location of our next filldir entry.  Allow for the size
171 * of the fillDirEntry struct plus the namelen.  Round to dblword.
172 */
173#define NEXT_FILLDIR_ENTRY(eP, len) \
174  (fillDirEntry *)((caddr_t)(eP)+(((sizeof(fillDirEntry)+(len)+3)>>2)<<2))
175
176/* Size of our NFS_fillDir buffer. */
177#define FILLDIR_BUFSIZE 700
178
179/* For NFS readdir, we provide our own filldir callback so that we can save
180 * the records until after we release our locks.  We can then make the real
181 * filldir calls without fear they will deadlock when they loopback to the
182 * filesystem for permission checks, etc.
183 */
184int
185NFS_fillDir(void *myArgP, char *nameP, int namelen,
186            offset_t offset, const ino_t ino)
187{
188  NFS_fillDirArgs *argsP = (NFS_fillDirArgs *)myArgP;
189  fillDirEntry *entryP = argsP->bufferP;
190  fillDirEntry *nextP = NEXT_FILLDIR_ENTRY(entryP, namelen);
191
192  /* If this entry will not fit, report the full condition. */
193  if (nextP > argsP->endP)
194    return -EINVAL;
195
196  /* Save filldir information to make the real call later */
197  entryP->offset = offset;
198  entryP->ino = ino;
199  entryP->namelen = namelen;
200  cxiMemcpy(entryP->name, nameP, namelen+1);
201
202  /* Bump the entry location in arg structure for the next call */
203  argsP->bufferP = nextP;
204
205  return 0;
206}
207
208int
209gpfs_f_readdir(struct file *fP, void *direntP, filldir_t filldir)
210{
211  int rc;
212  Boolean klock = false;
213  struct gpfsVfsData_t *privVfsP;
214  cxiNode_t *cnP;
215  struct inode *iP;
216  cxiFillDirArg_t fillDirArg;
217  fillDirEntry *fillDirBuf = NULL;
218  NFS_fillDirArgs filldirArgs;
219
220  VFS_STAT_START(readdirCall);
221  ENTER(0);
222  DBGASSERT(fP != NULL);
223
224  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READDIR_ENTER,
225         "gpfs_f_readdir enter: fP 0x%lX direntP 0x%lX "
226         "filldir 0x%lX pos %lld\n", fP, direntP, filldir, fP->f_pos);
227  /* BKL is held at entry */
228
229  /* Quick check for EOF */
230  if (fP->f_pos == GPFS_DIR_EOF)
231  {
232    rc = 0;  // end-of-directory
233  }
234  else
235  {
236
237    iP = fP->f_dentry->d_inode;
238    DBGASSERT(iP != NULL);
239    cnP = VP_TO_CNP(iP);
240    privVfsP = VP_TO_PVP(iP);
241    DBGASSERT(privVfsP != NULL);
242
243    /* When called by NFS, wait to make the filldar calls until after
244     * we return from gpfsReaddir.  The NFS filldir implementation
245     * includes callbacks (e.g., permission checks) into the filesystem
246     * and these calls may result in getting locks out-of-order and
247     * are therefore subject to deadlock.
248     */
249    if (cxiIsNFSThread())
250    {
251      /* Specify a special filldir function where we will save entry
252       * information.  Upon our return from gpfsReaddir, we no longer
253       * hold any locks so we will then go through these saved entries
254       * and make the real filldir calls.
255       */
256      fillDirArg.fnP = (void *)NFS_fillDir;
257      fillDirArg.argP = &filldirArgs;
258
259      fillDirBuf = (fillDirEntry *)cxiMallocPinned(FILLDIR_BUFSIZE);
260
261      filldirArgs.firstP = fillDirBuf;
262      filldirArgs.endP = (fillDirEntry *)((caddr_t)fillDirBuf + FILLDIR_BUFSIZE);
263      filldirArgs.bufferP = filldirArgs.firstP;
264    }
265    else
266    {
267      /* Unfortunately we can't use the OS version of the filldir
268       * routine directly.  It has different signatures in varying
269       * kernel levels, so we use cxiFillDir() in the portability layer
270       * to handle the different signatures.
271       */
272      fillDirArg.fnP = (void *)filldir;
273      fillDirArg.argP = direntP;
274    }
275
276    if (kernel_locked())
277    {
278      unlock_kernel();
279      klock = true;
280    }
281    rc = gpfs_ops.gpfsReaddir(privVfsP, cnP, &fillDirArg, cxiFillDir,
282                              &fP->f_pos, vnOp, NULL);
283
284    /* Even if gpfsReaddir reports an error, we want to look
285     * to see if there were any entries previously returned.
286     */
287    if (cxiIsNFSThread() && (filldirArgs.bufferP > filldirArgs.firstP))
288    {
289      int fillrc;
290      fillDirEntry *nextP = filldirArgs.firstP;
291
292      /* Set the real filldir fcn/arg pointers */
293      fillDirArg.fnP = (void *)filldir;
294      fillDirArg.argP = direntP;
295
296      while(nextP < filldirArgs.bufferP)
297      {
298        /* Do not overlay any gpfsReadir rc. */
299        fillrc = CALL_FILLDIR(cxiFillDir,
300                              &fillDirArg, 
301                              (nextP->name), 
302                              (nextP->namelen), 
303                              (nextP->offset), 
304                              (nextP->ino));
305        if (fillrc < 0)
306        {
307          rc = 0; /* entry doesn't fit is ok (will resume at offset) */
308
309          /* Reset f_pos based on what we've been able to pass back
310           * to NFS.  This is where they will start on the next call.
311           */
312          fP->f_pos = nextP->offset; /* next offset for nfsd_readdir */
313
314          break;
315        }
316        nextP = NEXT_FILLDIR_ENTRY(nextP, nextP->namelen);
317      }
318    }
319
320    if (klock)
321      lock_kernel();
322  }
323
324  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_READDIR_EXIT,
325         "gpfs_f_readdir exit: fP 0x%lX pos %lld code 0 rc %d\n",
326         fP, fP->f_pos, rc);
327
328  if (fillDirBuf)
329    cxiFreePinned(fillDirBuf);
330
331  if (rc)
332    cxiErrorNFS(rc);
333
334  VFS_STAT_STOP;
335  EXIT(0);
336  return (-rc);
337}
338
339uint
340gpfs_f_poll(struct file *fP, struct poll_table_struct *wait)
341{
342  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_POLL,
343         "gpfs_f_poll: rc bits POLLERR: fP 0x%lX\n", fP);
344  return (uint)0; // ?? which POLL* bits
345}
346
347int
348gpfs_f_ioctl(struct inode *iP, struct file *fP, uint cmd, unsigned long arg)
349{
350  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_IOCTL,
351         "gpfs_f_ioctl: rc -ENOTTY: iP 0x%lX fP 0x%lX cmd %d\n",
352         iP, fP, cmd);
353  return -ENOTTY; // no one can really explain why this errno, but it is common
354}
355
356
357/* called for every child process forked after mmap */
358void gpfs_filemap_open(struct vm_area_struct * vma)
359{
360  int rc = 0;
361  Boolean writeAccess = false;
362  cxiNode_t *cnP;
363  ext_cred_t eCred;
364  struct file *file = vma->vm_file;
365  struct inode *inode = file->f_dentry->d_inode;
366  struct gpfsVfsData_t *privVfsP;
367  struct mm_struct *mm = vma->vm_mm;
368  long long offset;
369  long long length;
370
371  ENTER(0);
372  TRACE4(TRACE_VNODE, 2, TRCID_FM_OPEN,
373         "gpfs_filemap_open enter: vma 0x%lX inode %d icount %d name %s\n",
374         vma, inode->i_ino, atomic_read((atomic_t *)&inode->i_count),
375         file->f_dentry? file->f_dentry->d_name.name: (const unsigned char*)"");
376
377  TRACE2(TRACE_VNODE, 2, TRCID_FM_OPEN_1,
378         "gpfs_filemap_open : mm 0x%lX mm_users %d\n",
379         mm, atomic_read(&mm->mm_users));
380
381  cnP = VP_TO_CNP(inode);
382  privVfsP = VP_TO_PVP(inode);
383  DBGASSERT(privVfsP != NULL);
384
385  if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
386    MMAP_WRITE_ACCESS(writeAccess);
387
388  setCred(&eCred);
389 
390  offset = vma->vm_pgoff<<PAGE_SHIFT;
391  length = vma->vm_end - vma->vm_start;
392  rc = gpfs_ops.gpfsMmap(privVfsP, cnP, (void *)inode, &eCred, NULL,
393                         writeAccess,false, offset,length);
394
395  TRACE2(TRACE_VNODE, 2, TRCID_FM_OPEN_EXIT,
396         "gpfs_filemap_open exit: vma 0x%lX icount %d\n",
397         vma, atomic_read((atomic_t *)&inode->i_count));
398
399  if (rc)
400    cxiErrorNFS(rc);
401
402  EXIT(0);
403}
404
405void gpfs_filemap_close(struct vm_area_struct * vma)
406{
407  struct file *fP = vma->vm_file;
408  struct inode *inode = fP->f_dentry->d_inode;
409  int flags, rc;
410  struct gpfsVfsData_t *privVfsP;
411  cxiNode_t *cnP = VP_TO_CNP(inode);
412  struct mm_struct *mm = vma->vm_mm;
413
414  VFS_STAT_START(unmapCall);
415  ENTER(0);
416
417  if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
418    flags = 0;
419  else
420    flags = CXI_SHM_RDONLY;
421
422  privVfsP = VP_TO_PVP(inode);
423
424  TRACE3(TRACE_VNODE, 2, TRCID_FM_CLOSE_ENTER,
425         "gpfs_filemap_close: vma 0x%lX inode 0x%lX i_count %d\n",
426         vma, inode, (Int32)atomic_read((atomic_t *)&inode->i_count));
427  TRACE3(TRACE_VNODE, 2, TRCID_FM_CLOSE_ENTER1,
428         "gpfs_filemap_close: inode %d, name %s, nrpages %d\n",
429         inode->i_ino,
430         fP->f_dentry? fP->f_dentry->d_name.name: (const unsigned char*)"",
431         inode->i_data.nrpages);
432  TRACE2(TRACE_VNODE, 2, TRCID_FM_CLOSE_ENR,
433         "gpfs_filemap_close: mm 0x%lX mm_users %d\n",
434         mm,atomic_read(&mm->mm_users));
435 
436  rc = gpfs_ops.gpfsUnmap(privVfsP, cnP, flags);
437  cxiPutOSNode((void *)inode);
438
439  TRACE3(TRACE_VNODE, 2, TRCID_FM_CLOSE,
440         "gpfs_filemap_close: vma 0x%lX inode 0x%lX i_count %d\n",
441         vma, inode, (Int32)atomic_read((atomic_t *)&inode->i_count));
442
443  if (rc)
444    cxiErrorNFS(rc);
445
446  VFS_STAT_STOP;
447  EXIT(0);
448}
449
450int
451gpfs_f_mmap(struct file *fP, struct vm_area_struct *vma)
452{
453  int rc;
454  int code = 0;
455  Boolean heldVnode = false;
456  Boolean writeAccess = false;
457  cxiNode_t *cnP;
458  struct gpfsVfsData_t *privVfsP;
459  struct inode *iP = fP->f_dentry->d_inode;
460  ext_cred_t eCred;
461  cxiVattr_t vattr;
462  struct mm_struct *mm = vma->vm_mm;
463  long long offset;
464  long long length;
465
466  VFS_STAT_START(map_lloffCall);
467  ENTER(0);
468  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_MMAP_ENTER,
469         "gpfs_f_mmap enter: fP 0x%lX inum %d vma 0x%1X\n",
470         fP, iP->i_ino, vma);
471
472  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MMAP_ENTER_A,
473         "gpfs_f_mmap: vm_start 0x%lX vm_end 0x%lX, vmpgoff 0x%lX, "
474         "vmflags 0x%lX\n",
475         vma->vm_start, vma->vm_end, vma->vm_pgoff, vma->vm_flags);
476   TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_MMAP_ENTER_A1,
477          "gpfs_f_mmap: inode %d icount %d name %s nrpages %d\n",
478          iP->i_ino, atomic_read((atomic_t *)&iP->i_count),
479          fP->f_dentry ? fP->f_dentry->d_name.name : (const unsigned char*)"",
480          iP->i_data.nrpages);
481  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_MMAP_ENTER_AB,
482         "gpfs_f_mmap: mm 0x%lX mm_users %d\n",
483         mm,atomic_read(&mm->mm_users));
484
485  cnP = VP_TO_CNP(iP);
486  privVfsP = VP_TO_PVP(iP);
487  DBGASSERT(privVfsP != NULL);
488
489  if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
490  {
491    MMAP_WRITE_ACCESS(writeAccess);
492    if (!writeAccess)
493    {
494      /* Patch must be applied at this kernel level for mmap write */
495      code = 1;
496      rc = -EINVAL;
497      goto xerror;
498    }
499  }
500
501  setCred(&eCred);
502
503  offset = vma->vm_pgoff<<PAGE_SHIFT;
504  length = vma->vm_end - vma->vm_start;
505  rc = gpfs_ops.gpfsMmap(privVfsP, cnP, (void *)iP, &eCred, NULL, 
506                         writeAccess, true,offset,length);
507  if (rc != 0)
508  {
509    code = 2;
510    rc = -rc;
511    goto xerror;
512  }
513 
514  heldVnode = true;
515
516  if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE) &&
517      !iP->i_mapping->a_ops->writepage)
518  {
519    code = 3;
520    rc = -EINVAL;
521    goto xerror;
522  }
523
524  if (!iP->i_sb || !S_ISREG(iP->i_mode))
525  {
526    code = 4;
527    rc = -EACCES;
528    goto xerror;
529  }
530
531  if (!iP->i_mapping->a_ops->readpage)
532  {
533    code = 5;
534    rc = -ENOEXEC;
535    goto xerror;
536  }
537
538  /* revalidate linux inode */
539  /* This has the effect of calling us back under a lock and
540   * setting the inode attributes at the OS level (since this
541   * operating system caches this info in the vfs layer)
542   */
543  rc = gpfs_ops.gpfsGetattr(privVfsP, cnP, &vattr, false);
544  if (rc != 0)
545  {
546    code = 6;
547    rc = -rc;
548    goto xerror;
549  }
550
551#ifdef UPDATE_ATIME
552  UPDATE_ATIME(iP);
553#else
554#if LINUX_KERNEL_VERSION >= 2061600
555  touch_atime(NULL, fP->f_dentry);
556#else
557  update_atime(iP);
558#endif
559#endif
560  vma->vm_ops = &gpfs_vmop;
561
562xerror:
563  if (rc != 0 && heldVnode)
564    cxiPutOSNode((void *)iP); // corresponding hold in gpfsMmap
565
566  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_MMAP_EXIT,
567         "gpfs_f_mmap exit: rc %d code %d\n", rc, code);
568
569  if (rc)
570    cxiErrorNFS(rc);
571
572  VFS_STAT_STOP;
573  EXIT(0);
574  return rc;
575}
576
577int
578gpfs_f_open(struct inode *iP, struct file *fP)
579{
580  int rc = 0;
581  int code = 0;
582  Boolean gotBKL = false;
583  int flags = cxiOpenFlagsXlate(fP->f_flags);
584  int iflags = cxiIsSambaThread()? GPFS_OPEN_NO_SMBLOCK: 0;
585  cxiNode_t *cnP;
586  struct gpfsVfsData_t *privVfsP;
587  ext_cred_t eCred;
588
589  VFS_STAT_START(openCall);
590  ENTER(0);
591  TRACE7(TRACE_VNODE, 1, TRCID_LINUXOPS_OPEN_ENTER,
592         "gpfs_f_open enter: iP 0x%lX fP 0x%lX f_flags 0x%X dP 0x%lX '%s' "
593         "flags 0x%X isNFS %d\n", iP, fP, fP->f_flags, fP->f_dentry,
594         fP->f_dentry? fP->f_dentry->d_name.name: (const unsigned char*)"",
595         flags, cxiIsNFSThread());
596
597  /* BKL is not held at entry, except for NFS calls */
598  TraceBKL();
599  if (current->lock_depth >= 0)  /* kernel lock is held by me */
600  {
601    gotBKL = true;
602    unlock_kernel();
603  }
604
605  cnP = VP_TO_CNP(iP);
606  privVfsP = VP_TO_PVP(iP);
607  DBGASSERT(privVfsP != NULL);
608
609  /* see comment in gpfs_i_create() on the reason for this code */
610  if (cnP->createRaceLoserThreadId &&
611      cnP->createRaceLoserThreadId == cxiGetThreadId())
612  {
613    int fflags = cxiOpenFlagsXlate(fP->f_flags);
614    int amode;
615
616    cnP->createRaceLoserThreadId = 0;
617    code = EEXIST;
618
619    amode = ((flags & FWRITE ? W_ACC : 0) |
620             (flags & FREAD ? R_ACC : 0)  |
621             (flags & FTRUNC ? W_ACC : 0));
622
623    TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_OPEN_01,
624           "gpfs_f_open fileExist iP 0x%lX cnP 0x%lX fflags 0x%X amode 0x%X\n",
625           iP, cnP, fflags, amode);
626
627    /* Check if FEXCL and FCREAT are on and the file exists return EEXIST
628     * could not do it at create time because the open flgas are not availble
629     * on the create call.
630     */
631    if ((flags & FEXCL) && (flags & FCREAT))
632    {
633      rc = EEXIST;
634      goto xerror;
635    }
636
637    setCred(&eCred);
638    rc = gpfs_ops.gpfsAccess(privVfsP, cnP, amode, ACC_SELF, &eCred);
639    if (rc)
640      goto xerror;
641  }
642
643  if (cxiIsNFSThread() && GNP_IS_FILE(cnP))
644  {
645    int NFSflags;
646    int code;
647
648    BEGIN_FAR_CODE;
649    /* Linux NFS will not do vget so the clone vnode cannot be created then.
650       Need to GetNFS here so the NFS structures will be available. */
651
652#ifdef NFS_CLUSTER_LOCKS //??? temp fix for NFSv4
653    fP->f_mode |= FMODE_READ;
654#endif
655
656    NFSflags = FWRITE|FREAD;
657    rc = gpfs_ops.gpfsGetNFS((void *)iP,
658                             (struct MMFSVInfo **)&fP->private_data,
659                             &NFSflags);
660    if (rc != 0)
661    {
662      code = ENOSYS; //??EGET_NFS;
663      goto xerror;
664    }
665    DBGASSERT((struct MMFSVInfo *)fP->private_data != NULL);
666
667    END_FAR_CODE;
668    goto xerror;
669  }
670
671  setCred(&eCred);   // rebuild since gpfsAccess may have remapped the ids
672  rc = gpfs_ops.gpfsOpen(privVfsP, cnP, flags, iflags, 0,
673                         (struct MMFSVInfo **)&fP->private_data, &eCred);
674
675xerror:
676  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_OPEN_EXIT,
677         "gpfs_f_open exit: iP 0x%lX vinfoP 0x%lX code %d rc %d\n",
678         iP, (struct MMFSVInfo *)fP->private_data, code, rc);
679
680  VFS_STAT_STOP;
681
682  if (gotBKL)        /* If held kernel lock on entry then reacquire it */
683    lock_kernel();
684
685  if (rc)
686    cxiErrorNFS(rc);
687
688  EXIT(0);
689  return (-rc);
690}
691
692int
693gpfs_f_release(struct inode *iP, struct file *fP)
694{
695  int rc = 0;
696  int code = 0;
697  int flags = cxiOpenFlagsXlate(fP->f_flags);
698  struct MMFSVInfo *vinfoP = (struct MMFSVInfo *)fP->private_data;
699  cxiNode_t *cnP;
700  struct gpfsVfsData_t *privVfsP;
701
702  VFS_STAT_START(closeCall);
703  ENTER(0);
704  cnP = VP_TO_CNP(iP);
705  privVfsP = VP_TO_PVP(iP);
706  DBGASSERT(privVfsP != NULL);
707
708  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CLOSE_ENTER,
709         "gpfs_f_release enter: iP 0x%lX f_flags 0x%X flags 0x%X vinfoP 0x%lX\n",
710         iP, fP->f_flags, flags, vinfoP);
711  /* BKL is held if the file was open R/W, otherwise not held */
712
713  /* If nfsd is closing one of its files, schedule it for a delayed close. */
714  if (cnP && VP_TO_NFSP(iP) && cxiIsNFSThread())
715  {
716    DBGASSERT(GNP_IS_FILE(cnP));
717
718    /* On the last NFS release, a watchdog will be set to close the file
719       after a delay. */
720
721    rc = gpfs_ops.gpfsReleaseNFS(iP);
722
723    goto xerror;
724  }
725
726  rc = gpfs_ops.gpfsClose(privVfsP, cnP, flags, vinfoP, true);
727
728  fP->private_data = NULL;  // MMFSVInfo was freed
729
730xerror:
731  TRACE2(TRACE_VNODE, 1, TRCID_CLOSE_EXIT,
732         "gpfs_f_release exit: code %d rc %d\n", code, rc);
733
734  if (rc)
735    cxiErrorNFS(rc);
736
737  VFS_STAT_STOP;
738  EXIT(0);
739  return (-rc);
740}
741
742int
743gpfs_f_fsync(struct file *fP, struct dentry *direntP, int datasync)
744{
745  int rc;
746
747  ENTER(0);
748  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_FSYNC_ENTER,
749         "gpfs_f_fsync enter: fP 0x%lX dirent 0x%lX datasync %d\n",
750         fP, direntP, datasync);
751  /* Linux doc says BKL is held, but it does not seem to be */
752
753  rc = fsyncInternal(fP);
754
755  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_FSYNC_EXIT,
756         "gpfs_f_fsync exit: file 0x%lX rc %d\n", fP, rc);
757
758  EXIT(0);
759  return (-rc);
760}
761
762int
763gpfs_f_fasync(int fd, struct file *fP, int on)
764{
765  int rc;
766
767  ENTER(0);
768  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_FASYNC_ENTER,
769         "gpfs_f_fasync enter: fd %d fP 0x%lX on %d\n",
770         fd, fP, on);
771  /* Linux doc says BKL is held, but it does not seem to be */
772
773  rc = fsyncInternal(fP);
774
775  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_FASYNC_EXIT,
776         "gpfs_f_fasync exit: fP 0x%lX rc %d\n", fP, rc);
777
778  EXIT(0);
779  return (-rc);
780}
781
782static int
783fsyncInternal(struct file *fP)
784{
785  int rc = 0;
786  cxiNode_t *cnP;
787  struct inode *iP;
788  struct gpfsVfsData_t *privVfsP;
789  ext_cred_t eCred;
790  struct MMFSVInfo *vinfoP;
791
792  VFS_STAT_START(fsyncCall);
793  ENTER(0);
794  VFS_INC(fsyncCall);
795  /* Creating files via nfs can get us here with a null fP. */
796  if (!fP)
797    goto xerror;
798
799  vinfoP = (struct MMFSVInfo *)fP->private_data;
800
801  iP = fP->f_dentry->d_inode;
802  DBGASSERT(iP != NULL);
803
804  cnP = VP_TO_CNP(iP);
805  privVfsP = VP_TO_PVP(iP);
806  DBGASSERT(privVfsP != NULL);
807
808  setCred(&eCred);
809  rc = gpfs_ops.gpfsFsync(privVfsP, vinfoP, cnP, FFILESYNC, &eCred);
810
811  if (rc)
812    cxiErrorNFS(rc);
813
814xerror:
815  VFS_STAT_STOP;
816  EXIT(0);
817  return rc;
818}
819
820#ifdef NFS_CLUSTER_LOCKS
821void
822gpfs_grace(int on_off)
823{
824  gpfs_ops.gpfsGrace(on_off);
825}
826
827/*
828 * arg is lock type F_RDLCK:0 F_WRLCK:1 F_UNLCK:2
829*/
830int
831gpfs_f_set_lease(struct file *fP, long arg, struct file_lock **flPP)
832{
833  int rc = EAGAIN;
834  int NFSflags;
835  int mode, oplockWant, oplockGot, flags;
836  void *cb_token, *cookie;
837  struct file_lock *flP;
838  ext_cred_t eCred;
839  cxiNode_t *cnP;
840  struct gpfsVfsData_t *privVfsP;
841  struct MMFSVInfo *vinfoP = (struct MMFSVInfo *)fP->private_data;
842  struct inode *iP = fP->f_dentry->d_inode;
843
844//VFS_STAT_START(lockctlCall);
845  ENTER(0);
846
847  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_LEASE_ENTER,
848         "gpfs_f_set_lease: fP 0x%lX iP 0x%lX type %s by %s\n",
849          fP, iP, (arg == F_RDLCK) ? "RD" :(arg == F_WRLCK) ? "WR" : "UNLCK",
850          (cxiIsNFSThread()) ? "NFS" : "SAMBA");
851
852  privVfsP = VP_TO_PVP(fP->f_dentry->d_inode);
853  DBGASSERT(privVfsP != NULL);
854
855  flP = *flPP;
856  cookie = NULL;
857  NFSflags = FREAD;
858  mode = FMODE_WRITE;
859  flags = RESERVE_NONE;
860  oplockGot = smbOplockNone;
861  cb_token = iP;
862
863  if (arg == F_UNLCK) {
864    cb_token = NULL;
865    oplockWant = smbOplockNone;
866    flags = RESERVE_DOWNGRADE;
867  }
868  else if (arg == F_RDLCK)
869    oplockWant = smbOplockShared;
870  else if (arg == F_WRLCK) {
871    oplockWant = smbOplockExclusive;
872    NFSflags |= FWRITE;
873    mode = FMODE_READ;
874  }
875  else goto xerror;
876
877  if (cxiIsNFSThread())
878  {
879    rc = gpfs_ops.gpfsGetNFS((void *)iP, (struct MMFSVInfo **)&vinfoP,
880                                                           &NFSflags);
881    if (rc)
882      goto xerror;
883
884    cnP = VP_TO_CNP(iP);
885    setCred(&eCred);
886    rc = gpfs_ops.gpfsOpenNFS(privVfsP, cnP, NFSflags, vinfoP, &eCred);
887    if (rc)
888      goto xerror2;
889  }
890  rc = gpfs_ops.gpfsReserveDelegation(fP, vinfoP , privVfsP, oplockWant, flags,
891                                                             cb_token, cookie);
892  if (rc)
893    goto xerror2;
894
895  lock_kernel();
896  rc = setlease(fP, arg, flPP);
897  unlock_kernel();
898
899  if (rc) {  // if error release the delegation
900    gpfs_ops.gpfsReserveDelegation(fP, vinfoP , privVfsP, smbOplockNone,
901                                   RESERVE_DOWNGRADE, NULL, NULL);
902    if (rc < 0)
903      rc = -rc;  /* make it positive */
904  }
905  else {   // rc=0
906    oplockGot = gpfs_ops.SMBGetOplockStateV(vinfoP);
907    if (oplockGot == oplockWant)
908      goto xerror2;
909    else {   // already lost the delegation
910      __break_lease(iP, FMODE_WRITE);
911    }
912  }
913
914xerror2:
915  if (cxiIsNFSThread())
916    gpfs_ops.gpfsReleaseNFS(iP);
917
918xerror:
919  TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_LEASE_EXIT,
920   "gpfs_f_set_lease: fP 0x%lX flP 0x%lX rc %d oplockWant %d oplockGot %d %s\n",
921    fP, flP, rc, oplockWant, oplockGot, (oplockGot == smbOplockShared) ? "RD" :
922    (oplockGot == smbOplockExclusive) ? "WR" : "NONE");
923
924//VFS_STAT_STOP;
925  EXIT(0);
926  return (-rc);
927}
928#endif
929
930int
931gpfs_f_lock(struct file *fP, int cmd, struct file_lock *flP)
932{
933  int rc = 0;
934  int code = 0;
935  cxiNode_t *cnP;
936  ext_cred_t eCred;
937  struct gpfsVfsData_t *privVfsP;
938  eflock_t lckdat;
939  unsigned long localRetryId = 0;
940  int(* vfs_callback)(void *, void *, int) = NULL;
941
942  VFS_STAT_START(lockctlCall);
943  ENTER(0);
944
945  /* Linux converts flock64 to flock before calling GPFS lock routine,
946     but leaves "cmd" as is. Allow these to go through. */
947#if !defined(__64BIT__)
948  if (cmd == F_GETLK64) cmd = F_GETLK;
949  if (cmd == F_SETLK64) cmd = F_SETLK;
950  if (cmd == F_SETLKW64) cmd = F_SETLKW;
951#endif
952
953  if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW))
954  {
955    code = 2;
956    rc = ENOSYS;
957    goto xerror;
958  }
959
960  setCred(&eCred);
961  TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_LOCKCTL_ENTER,
962         "gpfs_f_lock enter: pid %d fp 0x%lX range 0x%lX:%lX cmd %s type %s\n",
963         flP->fl_pid, fP, flP->fl_start, flP->fl_end,
964         (cmd == F_GETLK) ? "GETLK" : (cmd == F_SETLK) ? "SETLK" : "SETLKW",
965         (flP->fl_type == F_RDLCK) ? "RDLCK" :
966         (flP->fl_type == F_WRLCK) ? "WRLCK" : "UNLCK");
967
968  TRACE5(TRACE_VNODE, 3, TRCID_LINUXOPS_LOCKCTL_ENTER2,
969         "gpfs_f_lock       : pos 0x%lX iP 0x%lX fl_flags 0x%X uid %d gid %d\n",
970         fP->f_pos, fP->f_dentry->d_inode, flP->fl_flags,
971         eCred.principal, eCred.group);
972  TraceBKL();
973
974  cnP = VP_TO_CNP(fP->f_dentry->d_inode);
975  privVfsP = VP_TO_PVP(fP->f_dentry->d_inode);
976  DBGASSERT(privVfsP != NULL);
977
978  /* convert file_lock to eflock */
979  cxiVFSToFlock((void *)flP, &lckdat);
980#ifdef NFS_CLUSTER_LOCKS
981#if 0
982  if (flP->fl_state == FL_CANCELED && flP->fl_type != F_UNLCK)
983  {
984    //??? just unblock queued lock
985    // use kxCleanupAcquires() which is used only for AIX now
986    rc = 0;
987    goto xerror;
988  }
989#endif
990#if 0
991  if (flP->fl_lmops) // && (flP->fl_flags & (FL_LOCKD|FL_NFSv4)))
992  {
993    if ((flP->fl_flags & FL_SLEEP) &&
994         flP->fl_lmops->fl_vfs_callback &&
995         flP->fl_type != F_UNLCK)
996    {
997      vfs_callback = flP->fl_lmops->fl_vfs_callback;
998      cmd = F_SETLKW;
999    }
1000  }
1001#endif
1002#endif
1003
1004  lckdat.l_whence = SEEK_SET;
1005
1006  rc = gpfs_ops.gpfsFcntl(NULL,    // KernelOperation initialized in gpfsFcntl
1007                          privVfsP,
1008                          NULL,     // struct vnode *vP or NULL
1009                                    // advObjP (advisory lock object) is inode
1010                          fP->f_dentry->d_inode,
1011                          flP,      // struct file_lock
1012                          cnP,
1013                          0,        // offset
1014                          &lckdat,  // struct cxiFlock_t
1015                          cmd,
1016                          vfs_callback, // lockd callback
1017                          &localRetryId,
1018                          &eCred);
1019
1020xerror:
1021
1022  TRACE2(TRACE_VNODE, 11, TRCID_LINUXOPS_LOCKCTL_DIAG2,
1023         "gpfs_f_lock: fP 0x%lX, f_dentry 0x%lX",
1024         fP, fP->f_dentry);
1025
1026  VFS_STAT_STOP;
1027
1028  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_LOCKCTL_EXIT,
1029         "gpfs_f_lock exit: rc %d",
1030         rc);
1031
1032  EXIT(0);
1033  return (-rc);
1034}
1035
1036#ifdef NFS4_CLUSTER
1037/*
1038 * cmd: F_SETLKW or F_SETLK
1039 * fl->fl_flags = FL_FLOCK; if sys_flock(), can use other flags for NFSv4
1040 * fl->fl_start = 0, fl->fl_end = OFFSET_MAX;
1041 * fl->fl_type =
1042 *  LOCK_MAND   allow other processes read
1043 *  or LOCK_MAND&LOCK_RW  allow other processes read and write
1044 *  or F_RDLCK    LOCK_SH -- a shared lock.
1045 *  or F_WRLCK    LOCK_EX -- an exclusive lock.
1046 *  or F_UNLCK    LOCK_UN -- remove an existing lock.
1047 *
1048 *  LOCK_MAND -- a `mandatory' flock. This exists to emulate Windows Share Modes.
1049 */
1050int
1051gpfs_f_flock(struct file *fP, int cmd, struct file_lock *flP)
1052{
1053  int rc = 0;
1054  struct gpfsVfsData_t *privVfsP;
1055  int shareWant = 0;
1056  int flags = 0;
1057  struct MMFSVInfo *vinfoP = (struct MMFSVInfo *)fP->private_data;
1058  privVfsP = VP_TO_PVP(fP->f_dentry->d_inode);
1059  DBGASSERT(privVfsP != NULL);
1060
1061  VFS_STAT_START(flockCall);
1062  ENTER(0);
1063
1064  TRACE5(TRACE_VNODE, 1, TRCID_LINUX_FLOCK_ENTER,
1065        "gpfs_f_flock: enter fP 0x%lX flP 0x%lX cmd %d flags 0x%X type 0x%X\n",
1066         fP, flP, cmd, flP->fl_flags, flP->fl_type);
1067
1068  if ((cmd != F_SETLK) && (cmd != F_SETLKW))
1069  {
1070    rc = ENOSYS;
1071    goto xerror;
1072  }
1073
1074  shareWant |= ALLOW_SHARE_DELETE;
1075
1076  if (flP->fl_flags & FL_FLOCK) {
1077    /* Translate (and validate) the type arguments to our shareWant */
1078    if (flP->fl_type & LOCK_MAND) {
1079       if (flP->fl_type & LOCK_RW)
1080           shareWant |= coRead|coWriteM|coWriteA;
1081       else
1082           shareWant |= coRead|coWriteM|coWriteA|coDenyR;
1083    }
1084    else {
1085      switch (flP->fl_type) {
1086      case F_RDLCK:               /* LOCK_SH */
1087        shareWant |= coRead|coDenyWM|coDenyWA;
1088        break;
1089      case F_WRLCK:               /* LOCK_EX */
1090        shareWant |= coRead|coWriteM|coWriteA|coDenyWM|coDenyWA|coDenyR;
1091        break;
1092      case F_UNLCK:               /* LOCK_UN */
1093        flags |= RESERVE_DOWNGRADE;
1094        shareWant |= 0;
1095        break;
1096      default:
1097        rc = EINVAL;
1098        goto xerror;
1099      }
1100    }
1101  }
1102  else {
1103
1104//??? add code for NFSv4 shares and delegations
1105    shareWant = coNFS4Share;
1106    if (!(flP->fl_flags & FL_SLEEP))
1107      flags |= RESERVE_NOWAIT;
1108
1109  }
1110  /* Call to make the reservation */
1111  rc = gpfs_ops.gpfsReserveShare(fP, vinfoP, privVfsP, flags, shareWant,
1112                                 NULL, NULL);
1113
1114xerror:
1115
1116 TRACE1(TRACE_VNODE, 1, TRCID_LINUX_FLOCK_EXIT,
1117         "gpfs_f_flock: exit rc %d\n", rc);
1118
1119  if (rc)
1120    cxiErrorNFS(rc);
1121
1122  VFS_STAT_STOP;
1123  EXIT(0);
1124  return (-rc);
1125}
1126#endif /* NFS4_CLUSTER */
1127
1128static inline ssize_t
1129rdwrInternal(struct file *fP, cxiRdWr_t op, const struct cxiIovec_t *iovecP,
1130             unsigned long count, loff_t *offsetP)
1131{
1132  int i, rc;
1133  Boolean gotBKL = false;
1134  ssize_t total_len = 0;
1135  struct cxiUio_t tmp_uio;
1136  int flags = cxiOpenFlagsXlate(fP->f_flags);
1137  struct gpfsVfsData_t *privVfsP;
1138  cxiNode_t *cnP;
1139  struct MMFSVInfo *vinfoP = (struct MMFSVInfo *)fP->private_data;
1140  struct inode *iP;
1141  ext_cred_t eCred;
1142  ssize_t tmp_len;
1143
1144  VFS_STAT_START((op == CXI_READ)? readCall: writeCall);
1145  ENTER(0);
1146  DBGASSERT(fP != NULL);
1147  iP = fP->f_dentry->d_inode;
1148  DBGASSERT(iP != NULL);
1149
1150  TRACE11(TRACE_VNODE, 1, TRCID_LINUXOPS_RDWRINT_ENTER,
1151          "gpfs_f_rdwr enter: fP 0x%lX f_flags 0x%X flags 0x%X op %d "
1152          "iovec 0x%lX count %d offset 0x%llX "
1153          "dentry 0x%lX private 0x%lX iP 0x%lX name '%s'\n",
1154          fP, fP->f_flags, flags, op, iovecP, count, *offsetP, fP->f_dentry,
1155          fP->private_data, fP->f_dentry->d_inode, fP->f_dentry->d_name.name);
1156
1157  /* BKL is not held at entry, except for NFS calls */
1158  TraceBKL();
1159  if (current->lock_depth >= 0)  /* kernel lock is held by me */
1160  {
1161    gotBKL = true;
1162    unlock_kernel();
1163  }
1164
1165  privVfsP = VP_TO_PVP(iP);
1166  DBGASSERT(privVfsP != NULL);
1167  cnP = VP_TO_CNP(iP);
1168
1169  tmp_uio.uio_iov = (struct cxiIovec_t *)iovecP; /* ptr to iovec struct array */
1170  tmp_uio.uio_iovcnt = count;         /* #iovec elements left to be processed */
1171  tmp_uio.uio_iovdcnt = 0;            /* #iovec elements already processed    */
1172  tmp_uio.uio_offset = *offsetP;      /* byte offset in file/dev to read/write*/
1173  tmp_uio.uio_segflg = UIO_USERSPACE; /* copy to user space                   */
1174  tmp_uio.uio_fmode = 0;              /* file modes from open file struct     */
1175
1176  for (i = 0; i < count; i++)
1177    total_len += iovecP[i].iov_len;
1178
1179  tmp_uio.uio_resid = total_len; /* #bytes left in data area */
1180
1181 /* We should -EINVAL if total length is not >= 0
1182  * Be careful here because uio_resid is a unsigned
1183  * long not an ssize_t
1184  */
1185  tmp_len = (ssize_t)tmp_uio.uio_resid;
1186  if ( tmp_len  < 0)
1187  {
1188    EXIT(0);
1189    return (-EINVAL);
1190  }
1191
1192  DBGASSERT(vinfoP != NULL);
1193
1194  setCred(&eCred);
1195  if (op == CXI_READ)
1196    rc = gpfs_ops.gpfsRead(privVfsP, NULL, cnP, flags, &tmp_uio,
1197                           vinfoP, NULL, NULL, &eCred, false, true);
1198  else
1199  {
1200    rc = gpfs_ops.gpfsWrite(privVfsP, NULL, cnP, flags, &tmp_uio,
1201                            vinfoP, NULL, NULL, &eCred, false, true);
1202    iP->i_sb->s_dirt = 1;
1203  }
1204
1205  TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_RDWRINT_EXIT,
1206         "gpfs_f_rdwr exit: fP 0x%lX total_len %d uio_resid %ld "
1207         "offset 0x%llX rc %d\n", fP, total_len, tmp_uio.uio_resid,
1208         tmp_uio.uio_offset, rc);
1209
1210  VFS_STAT_STOP;
1211
1212  if (gotBKL)        /* If held kernel lock on entry then reacquire it */
1213    lock_kernel();
1214
1215  if (rc)
1216  {
1217    cxiErrorNFS(rc);
1218
1219    EXIT(0);
1220    return (-rc);
1221  }
1222
1223  *offsetP = tmp_uio.uio_offset;
1224  EXIT(0);
1225  return (total_len - tmp_uio.uio_resid);
1226}
1227
1228ssize_t
1229gpfs_f_read(struct file *fP, char *bufP, size_t count,
1230            loff_t *offsetP)
1231{
1232  ssize_t rc;
1233  cxiIovec_t tmp_iovec;
1234
1235  ENTER(0);
1236  tmp_iovec.iov_base = bufP;    /* base memory address                  */
1237  tmp_iovec.iov_len = count;    /* length of transfer for this area     */
1238
1239  rc = rdwrInternal(fP, CXI_READ, &tmp_iovec, 1, offsetP);
1240
1241  EXIT(0);
1242  return rc;
1243}
1244
1245ssize_t
1246gpfs_f_dir_read(struct file *fP, char *bufP, size_t count,
1247                loff_t *offsetP)
1248{
1249  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_READ_DIR,
1250         "gpfs_f_dir_read: fP 0x%lX EISDIR\n", fP);
1251  return -EISDIR;
1252}
1253
1254ssize_t
1255gpfs_f_write(struct file *fP, const char *bufP, size_t count,
1256             loff_t *offsetP)
1257{
1258  ssize_t rc;
1259  cxiIovec_t tmp_iovec;
1260
1261  ENTER(0);
1262  tmp_iovec.iov_base = (char *)bufP; /* base memory address              */
1263  tmp_iovec.iov_len = count;         /* length of transfer for this area */
1264
1265  rc = rdwrInternal(fP, CXI_WRITE, &tmp_iovec, 1, offsetP);
1266
1267  EXIT(0);
1268  return rc;
1269}
1270
1271ssize_t
1272gpfs_f_readv(struct kiocb *iocb, const struct iovec *iovecP,
1273             unsigned long count, loff_t offsetP)
1274{
1275  int rc;
1276  ENTER(0);
1277  rc = rdwrInternal(iocb->ki_filp, CXI_READ, (const struct cxiIovec_t *)iovecP,
1278                    count, &offsetP);
1279  EXIT(0);
1280  return rc;
1281}
1282
1283ssize_t
1284gpfs_f_writev(struct kiocb *iocb, const struct iovec *iovecP,
1285              unsigned long count, loff_t offsetP)
1286{
1287  int rc;
1288  ENTER(0);
1289  rc = rdwrInternal(iocb->ki_filp, CXI_WRITE, (const struct cxiIovec_t *)iovecP,
1290                    count, &offsetP);
1291  EXIT(0);
1292  return rc;
1293}
1294
1295#ifdef NFS4_CLUSTER
1296int
1297gpfs_f_share(struct file *fP, unsigned int share_access, unsigned int share_deny)
1298{
1299  int err;
1300  struct inode *iP;
1301  struct dentry *dentryP;
1302  int shareHave, shareWant;
1303  struct gpfsVfsData_t *privVfsP;
1304  ext_cred_t eCred;
1305  cxiNode_t *cnP;
1306  int flags = RESERVE_NONE;
1307 
1308  ENTER(0);
1309  err = 0;
1310  dentryP = fP? fP->f_dentry: NULL;
1311  iP = dentryP? dentryP->d_inode: NULL;
1312
1313  TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_SHARE_ENTER,
1314         "gpfs_f_share enter: fP 0x%lX ino %d (%s) access 0x%X deny 0x%X\n",
1315         fP, iP? iP->i_ino: -1,
1316         dentryP? dentryP->d_name.name: (const unsigned char*)"",
1317         share_access, share_deny);
1318
1319  if (fP)
1320    get_file(fP);
1321
1322  /* Validate the file and obtain privVfsP */
1323  if (!iP || !(privVfsP = VP_TO_PVP(iP))) {
1324    err = EBADF;
1325    goto xerror;
1326  }
1327
1328  if ((share_access == 0) && (share_deny == 0))
1329  {
1330    /* This type of request can happen after the server recalls a delegation.
1331     * We reject the request which we recognize since no access/deny flags
1332     * are given.  This then causes the client to open the file at the server
1333     * (no delegation) and continue. */
1334
1335    TRACE3(TRACE_VNODE, 3, TRCID_LINUXOPS_SHARE_RESET,
1336           "gpfs_f_share: RESET (fP 0x%lX iP 0x%lX privVfsP 0x%lX)\n",
1337           fP, iP, iP? VP_TO_PVP(fP->f_dentry->d_inode): NULL);
1338
1339    goto xerror;
1340  }
1341
1342  /* Translate (and validate) the NFS4 share/deny arguments to our shareWant */
1343
1344  /* setup for the XLATE_NFS4 calls */
1345  err = EINVAL;
1346  shareWant = coNFS4Share|ALLOW_SHARE_DELETE;
1347
1348  XLATE_NFS4_ACCESS(share_access, shareWant);
1349  XLATE_NFS4_DENY(share_deny, shareWant);
1350
1351  setCred(&eCred);
1352  cnP = VP_TO_CNP(iP);
1353  /* Call to make the reservation */
1354  err = gpfs_ops.gpfsReserveShare(fP, fP->private_data, privVfsP, flags,
1355                                  shareWant, cnP, &eCred);
1356
1357xerror:
1358  if (fP) fput(fP);
1359
1360  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_SHARE_EXIT, 
1361         "gpfs_f_share exit: rc %d\n", err);
1362  EXIT(0);
1363  return err;
1364}
1365#endif /* NFS4_CLUSTER */
1366
1367extern int cleanupFD;
1368
1369/* gpfs_f_cleanup is a routine that runs when the last mmfsd
1370   process terminates.  It allows us to do some basic cleanup
1371   so that the daemon can be restarted nicely. */
1372
1373int gpfs_f_cleanup(struct inode *iP, struct file *fP)
1374{
1375  int rc = 0;
1376
1377  ENTER(0);
1378  if (cleanupFD)
1379  {
1380    rc = gpfs_ops.gpfsCleanup();
1381    cleanupFD = 0;
1382  }
1383  EXIT(0);
1384  return rc;
1385}
1386
1387#if LINUX_KERNEL_VERSION >= 2060000 || defined(SUSE_LINUX)
1388/* gpfs_f_direct_IO() is never called. Open currently requires a "value" in
1389 * gpfs_aops->direct_IO to be successful when O_DIRECT is supplied on the open
1390 * call. The linux "generic" file routines eventually call this op. We do not use
1391 * the generic file routines so gpfs_f_direct_IO is never called.
1392 */
1393#if LINUX_KERNEL_VERSION >= 2060000
1394ssize_t gpfs_f_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iovecP,
1395                         loff_t in_offset, unsigned long count)
1396#elif defined(SUSE_LINUX)
1397int gpfs_f_direct_IO(int rw, struct file *file, struct kiobuf *kiobuf,
1398                        unsigned long in_offset, int count)
1399#endif
1400{
1401  LOGASSERT(!"gpfs_f_direct_IO not supported");
1402}
1403
1404#endif
1405
1406#ifdef P_NFS4
1407static void printfh2(char *s, int *fh)
1408{
1409#ifdef GPFS_PRINTK
1410  printk("%s: %d: %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
1411           s, fh[0],fh[1],fh[2],fh[3],fh[4],fh[5],fh[6],fh[7],fh[8],fh[9]);
1412#endif
1413}
1414
1415int gpfs_get_devicelist(struct super_block *sbP, void *p)
1416{
1417  int rc = 0;
1418  struct gpfsVfsData_t *privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
1419
1420//  VFS_STAT_START(gpfs_get_devicelist);
1421  ENTER(0);
1422  DBGASSERT(privVfsP != NULL);
1423
1424  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GET_DEVICELIST_ENTER,
1425         "gpfs_get_devicelist: sbP 0x%lX p 0x%lX\n", sbP, p);
1426
1427  rc = gpfs_ops.gpfsGetDeviceList(privVfsP, p);
1428#ifdef GPFS_PRINTK
1429  printk("gpfs_get_devicelist: rc %d\n", rc);
1430#endif
1431
1432xerror:
1433  TRACE1(TRACE_VNODE, 1, TRCID_GET_DEVICELIST_EXIT,
1434         "gpfs_get_devicelist exit: rc %d\n", rc);
1435
1436//  VFS_STAT_STOP;
1437  EXIT(0);
1438  return (-rc);
1439}
1440
1441int
1442gpfs_layout_get(struct inode *iP, void *p)
1443{
1444  int rc = 0;
1445  int code = 0;
1446  cxiNode_t *cnP;
1447  ext_cred_t eCred;
1448  struct gpfsVfsData_t *privVfsP;
1449  struct nfsd4_pnfs_layoutget *lgp = (struct nfsd4_pnfs_layoutget *)p;
1450
1451//  VFS_STAT_START(gpfs_layout_get);
1452  ENTER(0);
1453  cnP = VP_TO_CNP(iP);
1454  privVfsP = VP_TO_PVP(iP);
1455  DBGASSERT(privVfsP != NULL);
1456
1457  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GET_LAYOUT_ENTER,
1458         "gpfs_layout_get: iP 0x%lX p 0x%lX\n", iP, p);
1459
1460  setCred(&eCred);
1461
1462  rc = gpfs_ops.gpfsGetLayout(privVfsP, cnP, p, &eCred);
1463
1464xerror:
1465  TRACE2(TRACE_VNODE, 1, TRCID_GET_LAYOUT_EXIT,
1466         "gpfs_layout_get exit: code %d rc %d\n", code, rc);
1467
1468//  VFS_STAT_STOP;
1469  EXIT(0);
1470  return (-rc);
1471}
1472
1473/* pNFS: return layout type */
1474#define LAYOUT_NFSV4_FILES 1
1475int
1476gpfs_layout_type()
1477{
1478#ifdef GPFS_PRINTK
1479  printk("xxx gpfs_layout_type LAYOUT_NFSV4_FILES=%d\n", LAYOUT_NFSV4_FILES);
1480#endif
1481  return LAYOUT_NFSV4_FILES;
1482}
1483
1484gpfs_get_state(struct inode *iP, void *fh, void *p)
1485{
1486  int rc = 0;
1487  int nodeId, len, code = 0;
1488  cxiNode_t *cnP;
1489  ext_cred_t eCred;
1490  struct gpfsVfsData_t *privVfsP;
1491  struct pnfs_get_state *osP = (struct pnfs_get_state *)p;
1492  struct knfsd_fh *fhP = (struct knfsd_fh *)fh;
1493
1494//  VFS_STAT_START(gpfs_get_state);
1495  ENTER(0);
1496  cnP = VP_TO_CNP(iP);
1497  privVfsP = VP_TO_PVP(iP);
1498  DBGASSERT(privVfsP != NULL);
1499
1500#ifdef GPFS_PRINTK
1501  printk("gpfs_get_state iP %p fh type %d fh size %d\n",
1502          iP, fhP->fh_fsid_type, fhP->fh_size);
1503  printfh2("gpfs_get_state:", (int *)fhP);
1504#endif
1505  len = sizeof(struct pnfs_get_state);
1506  if (fhP->fh_fsid_type >= max_fsid_type && fhP->fh_size > 8) {
1507    nodeId = fhP->fh_base.fh_pad[(fhP->fh_size >> 2) -1];
1508  }
1509  else {
1510    rc = ENOENT;
1511    goto xerror;
1512  }
1513  setCred(&eCred);
1514  osP->devid = gpfs_ops.gpfsGetMyDevID(privVfsP);
1515
1516  rc = gpfs_ops.gpfsGetOpenState(privVfsP, cnP, nodeId, p, len, &eCred);
1517
1518  TRACE7(TRACE_VNODE, 2, TRCID_LINUXOPS_SET_STATEID_ENTER,
1519      "gpfs_get_state: iP 0x%lX mds %x ds %x p 0x%lX len %d verf 0x%lX:0x%lX\n",
1520       iP, nodeId, osP->devid, p, len, osP->verifier[0], osP->verifier[1]);
1521#ifdef GPFS_PRINTK
1522  printk("gpfs_get_state mds-id %x my-id %x verifier %x:%x\n",
1523                        nodeId, osP->devid, osP->verifier[0], osP->verifier[1]);
1524#endif
1525
1526xerror:
1527  TRACE2(TRACE_VNODE, 1, TRCID_SET_STETEID_EXIT,
1528         "gpfs_get_state exit: code %d rc %d\n", code, rc);
1529
1530//  VFS_STAT_STOP;
1531  EXIT(0);
1532  return (-rc);
1533}
1534
1535int
1536gpfs_layout_return(struct inode *iP, void *p)
1537{
1538  int rc = 0;
1539  cxiNode_t *cnP;
1540  struct gpfsVfsData_t *privVfsP;
1541
1542//  VFS_STAT_START(gpfs_layout_return);
1543  ENTER(0);
1544  cnP = VP_TO_CNP(iP);
1545  privVfsP = VP_TO_PVP(iP);
1546  DBGASSERT(privVfsP != NULL);
1547
1548#ifdef GPFS_PRINTK
1549  printk("gpfs_layout_return iP %p\n", iP);
1550#endif
1551  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_LAYOUT_RET_ENTER,
1552         "gpfs_layout_return: iP 0x%lX\n", iP);
1553
1554  rc = gpfs_ops.gpfsLayoutRetrun(privVfsP, cnP, p, sizeof(struct layout_return));
1555
1556xerror:
1557  TRACE2(TRACE_VNODE, 1, TRCID_LAYOUT_RET_EXIT,
1558         "gpfs_layout_return exit: code %d iP 0x%lX\n", rc, iP);
1559
1560//  VFS_STAT_STOP;
1561  EXIT(0);
1562  return (-rc);
1563}
1564int
1565gpfs_get_deviceinfo(struct super_block *sbP, void *p)
1566{
1567  int rc;
1568  struct gpfsVfsData_t *privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
1569
1570//  VFS_STAT_START(gpfs_get_deviceinfo);
1571  ENTER(0);
1572  DBGASSERT(privVfsP != NULL);
1573
1574  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GET_DEVICEINFO_ENTER,
1575         "gpfs_get_deviceinfo: sbP 0x%lX p 0x%lX\n", sbP, p);
1576
1577#ifdef GPFS_PRINTK
1578  printk("gpfs_get_deviceinfo: sbP 0x%lX p 0x%lX\n", sbP, p);
1579#endif
1580  rc = gpfs_ops.gpfsGetDeviceInfo(privVfsP, p);
1581#ifdef GPFS_PRINTK
1582  printk("gpfs_get_deviceinfo: rc %d\n", rc);
1583#endif
1584
1585xerror:
1586  TRACE1(TRACE_VNODE, 1, TRCID_GET_DEVICEINFO_EXIT,
1587         "gpfs_get_deviceinfo exit: rc %d\n", rc);
1588
1589//  VFS_STAT_STOP;
1590  EXIT(0);
1591  return (-rc);
1592}
1593
1594void gpfs_get_verifier(struct super_block *sbP, u32 *p)
1595{
1596  int rc;
1597  struct gpfsVfsData_t *privVfsP = (struct gpfsVfsData_t *)SBLOCK_PRIVATE(sbP);
1598
1599//  VFS_STAT_START(gpfs_get_verifier);
1600  ENTER(0);
1601  DBGASSERT(privVfsP != NULL);
1602
1603#ifdef GPFS_PRINTK
1604  printk("gpfs_get_verifier: sbP 0x%lX p 0x%lX\n", sbP, p);
1605#endif
1606
1607  gpfs_ops.gpfsGetVerifier(privVfsP, p);
1608
1609#ifdef GPFS_PRINTK
1610  printk("gpfs_get_verifier: sbP 0x%lX v1 0x%lX v2 0x%lX\n", sbP, *p, *(p+1));
1611#endif
1612
1613//  VFS_STAT_STOP;
1614  EXIT(0);
1615  return;
1616}
1617
1618#endif
Note: See TracBrowser for help on using the repository browser.