source: gpfs_3.1_ker2.6.20/lpp/mmfs/src/gpl-linux/mmap.c @ 223

Last change on this file since 223 was 16, checked in by rock, 17 years ago
File size: 21.7 KB
Line 
1/***************************************************************************
2 *
3 * Copyright (C) 2001 International Business Machines
4 * All rights reserved.
5 *
6 * This file is part of the GPFS mmfslinux kernel module.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *  1. Redistributions of source code must retain the above copyright notice,
13 *     this list of conditions and the following disclaimer.
14 *  2. Redistributions in binary form must reproduce the above copyright
15 *     notice, this list of conditions and the following disclaimer in the
16 *     documentation and/or other materials provided with the distribution.
17 *  3. The name of the author may not be used to endorse or promote products
18 *     derived from this software without specific prior written
19 *     permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 *************************************************************************** */
33/* @(#)26       1.86  src/avs/fs/mmfs/ts/kernext/gpl-linux/mmap.c, mmfs, avs_rgpfs24, rgpfs24s003a 5/8/06 11:04:56 */
34
35#include <Shark-gpl.h>
36#include <arch-gpl.h>
37
38#include <linux/mm.h>
39#if defined(REDHAT_AS_LINUX) && LINUX_KERNEL_VERSION >= 2042101
40#include <linux/mm_inline.h>
41#endif
42
43#include <linux/pagemap.h>
44#include <linux/module.h>
45#include <asm/pgalloc.h>
46#include <linux/mman.h>
47#include <linux/file.h>
48#include <linux/sched.h>
49#include <linux/smp_lock.h>
50#include <linux/delay.h>
51
52#include <verdep.h>
53#include <cxiSystem.h>
54#include <cxi2gpfs.h>
55#include <cxiMmap.h>
56#include <linux2gpfs.h>
57#include <cxi2gpfs.h>
58#include <Trace.h>
59#include <LockNames.h>
60
61
62/* True if the paging operations are enabled.  Serialized using PQLockWord. */
63static Boolean mmapEnabled = false;
64
65/* Storage for page queue entries */
66#define MAX_PAGEQUE_ENTRIES 500
67static cxibuf_t Page_queue[MAX_PAGEQUE_ENTRIES];
68
69/* Head of list of free page queue entries, protected by PQLockWord */
70static cxibuf_t *PageQueueFreeP;
71static cxiBlockingMutex_t PQLockWord;
72
73
74/* dump page contents
75 * flag = 0 ==> after read from disk
76 *        1 ==> write
77 */
78static void dump_page(struct vm_area_struct *vma, struct page *page, int flag)
79{
80#ifdef TRACE_IO_DATA
81  int trcbuf[12];
82  char *what = (flag == 1) ? "write" : "read";
83  char *kaddr = kmap(page);
84  ENTER(0);
85  memcpy(trcbuf, kaddr, sizeof(trcbuf));
86  kunmap(page);
87
88  TRACE8(TRACE_VNODE, 6, TRCID_MMAP_DIRTY_PAGE_DUMP,
89         "dump 0 %s page 0x%lX: vma 0x%08X count %d data %08X %08X %08X %08X\n",
90         what, page, vma, page_count(page),
91         CPUToBigEnd32(trcbuf[0]),
92         CPUToBigEnd32(trcbuf[1]),
93         CPUToBigEnd32(trcbuf[2]),
94         CPUToBigEnd32(trcbuf[3]));
95  TRACE8(TRACE_VNODE, 9, TRCID_MMAP_DIRTY_PAGE_DUMP_A,
96         "dump 1 %s page 0x%lX: vma 0x%08X count %d data %08X %08X %08X %08X\n",
97         what, page, vma, page_count(page),
98         CPUToBigEnd32(trcbuf[4]),
99         CPUToBigEnd32(trcbuf[5]),
100         CPUToBigEnd32(trcbuf[6]),
101         CPUToBigEnd32(trcbuf[7]));
102  TRACE8(TRACE_VNODE, 9, TRCID_MMAP_DIRTY_PAGE_DUMP_B,
103         "dump 2 %s page 0x%lX: vma 0x%08X count %d data %08X %08X %08X %08X\n",
104         what, page, vma, page_count(page),
105         CPUToBigEnd32(trcbuf[8]),
106         CPUToBigEnd32(trcbuf[9]),
107         CPUToBigEnd32(trcbuf[10]),
108         CPUToBigEnd32(trcbuf[11]));
109  EXIT(0);
110#endif
111}
112
113
114/* Disable paging operations */
115void mmapKill()
116{
117  ENTER(0);
118  cxiBlockingMutexAcquire(&PQLockWord);
119  mmapEnabled = false;
120  cxiBlockingMutexRelease(&PQLockWord);
121  EXIT(0);
122}
123
124void EnableMmap()
125{
126  /* It is ok to change without holding PQLockWord since it is
127   * called from initialization
128   */
129  mmapEnabled = true;
130}
131
132
133int cxiMmapRegister(void *dummy)
134{
135  int i;
136
137  ENTER(0);
138  TRACE0(TRACE_VNODE, 2, TRCID_MMAP_REG_ENTER,
139         "cxiMmapRegister enter\n");
140
141  cxiBlockingMutexInit(&PQLockWord, GPFS_LOCK_MMAP_FREEQ_IDX);
142
143  TRACE2(TRACE_VNODE, 2, TRCID_MMAP_REG_5,
144        "cxiMmapRegister: Page_queue addr range [0x%lX - 0x%lX]\n",
145        &Page_queue[0], &Page_queue[MAX_PAGEQUE_ENTRIES - 1] );
146
147  /* Initialize page queue entries.  When a page arrives for read or write
148     (by readpage or writepage functions), the page information will be
149     copied to a free queue entry and that entry will be added to the end
150     of the pager kproc queue. */
151  PageQueueFreeP = NULL;
152  for (i = 0; i < MAX_PAGEQUE_ENTRIES; i++)
153  {
154    Page_queue[i].av_forw = PageQueueFreeP;
155    PageQueueFreeP = &Page_queue[i];
156    Page_queue[i].pageP = NULL;
157    Page_queue[i].b_vp = NULL;
158    Page_queue[i].vinfoP = NULL;
159    Page_queue[i].b_baddr = NULL;
160    Page_queue[i].b_flags = 0;
161    Page_queue[i].b_blkno = 0;
162  }
163
164  mmapEnabled = true;
165  EXIT(0);
166  return 0;
167}
168
169/* Module termination */
170int cxiMmapUnregister(void *dummy)
171{
172  ENTER(0);
173  TRACE0(TRACE_VNODE, 2, TRCID_MMAP_UNREG_ENTER,
174         "cxiMmapUnregister enter\n");
175  PageQueueFreeP = NULL;
176  mmapEnabled = false;
177  EXIT(0);
178  return 0;
179}
180
181Int64 getFilePos(cxibuf_t *bufP)
182{
183  Int64 pos = (Int64) bufP->b_blkno << PAGE_SHIFT;
184  ENTER(0);
185  TRACE1(TRACE_VNODE, 5, TRCID_MMAP_FILEPOS_ENTER,
186         "getFilePos: pos 0x%llX\n", pos);
187  EXIT(0);
188  return pos;
189}
190
191char *VM_Attach(cxibuf_t *bufP)
192{
193  DBGASSERT(bufP->pageP != NULL);
194  return kmap(bufP->pageP);
195}
196
197void VM_Detach(cxibuf_t *bufP, char *baddrP)
198{
199  kunmap(bufP->pageP);
200}
201
202void IoDone(cxibuf_t *bufP)
203{
204  struct page *pageP = bufP->pageP;
205
206  if (pageP != NULL)
207  {
208    TRACE5(TRACE_VNODE, 2, TRCID_MMAP_IO_ENTER,
209           "IoDone enter: b_flags 0x%lX pageP 0x%lX index %d count %d flags 0x%lX\n",
210           bufP->b_flags, pageP, pageP->index, page_count(pageP), pageP->flags);
211
212    /* error in read or write operation */
213    if ((bufP->b_flags & B_ERROR) != 0)
214      SetPageError(pageP);
215    else if ((bufP->b_flags & B_READ) != 0)
216      SetPageUptodate(pageP);
217
218    TRACE2(TRACE_VNODE, 2, TRCID_MMAP_IO_EXIT,
219           "IoDone exit: pageP 0x%lX flags 0x%lX\n",
220           pageP, pageP->flags);
221
222#if LINUX_KERNEL_VERSION >= 2050000
223    if ((bufP->b_flags & B_READ) == 0)
224      /* This was a writeback request. Signal its completion by clearing the
225   writeback flag. */
226      end_page_writeback(pageP);
227    else
228#endif
229      PAGE_UNLOCK(pageP);
230  }
231 
232  /* If this was an asynchronous request, free the buf struct.  For
233     synchronous requests, the buf is a stack variable. */
234  if ((bufP->b_flags & B_ASYNC) != 0)
235  {
236    cxiBlockingMutexAcquire(&PQLockWord);
237    bufP->av_forw = PageQueueFreeP;
238    PageQueueFreeP = bufP;
239    cxiBlockingMutexRelease(&PQLockWord);
240  }
241 
242
243}
244
245void getVp(void *gnP, void **vP, struct gpfsVfsData_t  **privVfsP)
246{
247  cxiNode_t *cP = (cxiNode_t *)gnP;
248  struct inode *iP = (struct inode *)cP->osNodeP;
249  *privVfsP = VP_TO_PVP(iP);
250  *vP = cP->osNodeP;
251}
252
253
254/* Flush/invalidate a mapped range:
255       CmfProtect  - Remove pages from address space so that new
256                     references will cause a page fault or protection fault
257       CmfFlush    - Write dirty pages
258       CmfInval    - Prevent cached page from being re-used
259 */
260int cxiMmapFlush(cxiNode_t *cnP, UInt64 start, UInt64 end,
261                 enum CmflushOption cmopt)
262{
263  int rc = 0;
264  struct inode *inodeP = cnP->osNodeP;
265
266  ENTER(0);
267  TRACE5(TRACE_VNODE, 2, TRCID_MMAP_FLUSH_ENTER,
268         "cxiMmapFlush: cnP 0x%lX inodeNum %d opt %d range 0x%llX-0x%llX\n",
269         cnP, inodeP->i_ino, cmopt, start, end);
270
271  switch (cmopt)
272  {
273    case CmfProtect:
274
275      /* Block new modifications to page.  This clears PTEs, which will force
276         them to page fault.  It also transfers the dirty bit from the PTE to
277         the page struct.  */
278      UNMAP_MAPPING_RANGE(inodeP->i_mapping, start, 0);
279      break;
280
281    case CmfFlush:
282      FILEMAP_FDATASYNC(rc, inodeP->i_mapping);
283      if (rc == 0)
284        FILEMAP_FDATAWAIT(rc, inodeP->i_mapping);
285      break;
286
287    case CmfInval:
288      truncate_inode_pages(inodeP->i_mapping, (start & PAGE_CACHE_MASK));
289      break;
290  }
291
292  TRACE1(TRACE_VNODE, 2, TRCID_MMAP_FLUSH_EXIT,
293         "cxiMmapFlush exit: rc %d\n", rc);
294  EXIT(0);
295  return rc;
296}
297
298
299/* Lock a cache page for inode bufP->inodeP at index bufP->b_blkno,
300   creating if necessary.  Save pointer to page in bufP->pageP.  On error,
301   return with bufP->pageP NULL.  Page will be locked and a reference will
302   be added.  Return non-zero if page is already up to date. */
303int cxiMmapGetPage(cxibuf_t *bufP)
304{
305  int rc = 0;
306  struct inode *inodeP = (struct inode *)bufP->b_inodeP;
307  struct page *pageP = grab_cache_page(inodeP->i_mapping, bufP->b_blkno);
308
309  ENTER(0);
310  if (pageP != NULL)
311  {
312    if (PAGE_UP_TO_DATE(pageP))
313      rc = EEXIST;
314    else
315      ClearPageError(pageP);
316
317    TRACE6(TRACE_VNODE, 1, TRCID_CXIGETPAGE,
318           "cxiMmapGetPage: page 0x%lX index %d count %d flags 0x%lX mapping 0x%lX uptodate %d\n",
319           pageP, pageP->index, page_count(pageP), pageP->flags,
320           pageP->mapping, (rc != 0));
321  }
322  bufP->pageP = pageP;
323  EXIT(0);
324  return rc;
325}
326
327
328/* Release/unlock page */
329void cxiMmapReleasePage(struct page *pageP)
330{
331  ENTER(0);
332  TRACE4(TRACE_VNODE, 1, TRCID_CXIRELPAGE,
333         "cxiMmapReleasePage: released page 0x%lX index %d count %d flags 0x%lX\n",
334         pageP, pageP->index, page_count(pageP), pageP->flags);
335
336  PAGE_UNLOCK(pageP);
337  page_cache_release(pageP);
338  EXIT(0);
339}
340
341
342/* Called from do_no_page() to handle page fault.  Add page to cache if not
343   already there and add a reference.  If contents are not already up to
344   date, then read new contents from disk.  Return NULL if failure. */
345struct page *
346#if LINUX_KERNEL_VERSION > 2060300
347gpfs_filemap_nopage(struct vm_area_struct *area, unsigned long address,
348                    int * noShare)
349#else
350gpfs_filemap_nopage(struct vm_area_struct *area, unsigned long address,
351                    int noShare)
352#endif
353{
354  unsigned long index;
355  struct page *pageP = NULL;
356  struct page **hashP;
357  struct file *fileP = area->vm_file;
358  struct inode *inodeP;
359  struct MMFSVInfo *vinfoP;
360  Boolean haveFlushLock = false;
361  cxiNode_t *cnP;
362  cxibuf_t buf;
363 
364  VFS_STAT_START(readpageCall);
365  ENTER(0);
366
367  TRACE6(TRACE_VNODE, 2, TRCID_LINUXOPS_NOPAGE,
368         "gpfs_filemap_nopage enter: area 0x%lX address 0x%lX vm_file 0x%lX "
369         "vm_mm 0x%lX mm_users %d noShare %d\n", area, address, fileP, 
370         area->vm_mm, atomic_read(&area->vm_mm->mm_users), noShare);
371
372  index = area->vm_pgoff + ((address - area->vm_start) >> PAGE_CACHE_SHIFT);
373
374  TRACE4(TRACE_VNODE, 3, TRCID_LINUXOPS_NOPAGE_1,
375         "gpfs_filemap_nopage: vm_start 0x%lX vm_end 0x%lX vm_flags 0x%lX "
376         "index %d\n", area->vm_start, area->vm_end, area->vm_flags, index);
377
378  /* Check that paging operations are still enabled */
379  if (!mmapEnabled)
380    goto exit;
381 
382  LOGASSERT(fileP != NULL);
383  inodeP = fileP->f_dentry->d_inode;
384  LOGASSERT(inodeP != NULL);
385  cnP = VP_TO_CNP(inodeP);
386
387  /* Remember that there were paging requests under the given instance */
388  vinfoP = (struct MMFSVInfo *)fileP->private_data;
389  if (vinfoP != NULL)
390    ((struct cxiVinfo_t*)vinfoP)->rwPageDone = true;
391
392  /* See if this page is already in the cache, and add a reference if so */
393#if LINUX_KERNEL_VERSION >= 2057200
394  pageP = find_get_page(inodeP->i_mapping, index);
395#else
396  hashP = page_hash(inodeP->i_mapping, index);
397  pageP = __find_get_page(inodeP->i_mapping, index, hashP);
398#endif
399  if (pageP)
400  {
401    /* Page is already cached.  If it is up to date, then we do not need to
402       read it.  Hold mmap flush lock until after making pte valid. */
403    gpfs_ops.gpfsMmapFlushLock(cnP);
404    haveFlushLock = true;
405
406    if (PAGE_UP_TO_DATE(pageP))
407      goto exit;
408
409    /* Not up to date.  Release page and go through processRead to fetch
410       the data. */
411    gpfs_ops.gpfsMmapFlushUnlock(cnP);
412    haveFlushLock = false;
413
414    page_cache_release(pageP);
415  }
416
417  /* Initialize buf struct for mmap read.  We don't have to fill in a
418     data address since the page won't be allocated until after all the
419     necessary locks have been obtained in kSFSRead. */
420  buf.av_forw = NULL;
421  buf.pageP = NULL;
422  buf.b_vp = cnP;
423  buf.vinfoP = vinfoP;
424  buf.privVfsP = VP_TO_PVP(inodeP);
425  buf.b_baddr = NULL;
426  buf.b_flags = B_READ | B_PFEOF;
427  buf.b_blkno = index;
428  buf.b_bcount = PAGE_SIZE;
429  buf.b_error = 0;
430  buf.b_inodeP = inodeP;
431
432  /* Read the page.  If successful, this returns with mmap flush lock held
433     and a reference added to page. */
434  gpfs_ops.gpfsQueueBufs(&buf);
435
436  pageP = buf.pageP;
437  if (pageP)
438    haveFlushLock = true;
439
440exit:
441#if defined(REDHAT_AS_LINUX) && LINUX_KERNEL_VERSION < 2042100
442  /* The noShare flag is only used on earlier kernels (of which Redhat
443   * Advanced Server is one).  This code is pretty much common to all
444   * the nopage functions and thus was put in the common do_no_page()
445   * function.  It's present here for RHAS.
446   */
447  if (noShare && pageP)
448  {
449    struct page *newPageP = alloc_page(GFP_HIGHUSER);
450    if (newPageP)
451    {
452      copy_user_highpage(newPageP, pageP, address);
453      flush_page_to_ram(newPageP);
454    }
455   
456    page_cache_release(pageP);
457    pageP = newPageP;
458  }
459#endif
460
461  /* If we return non-NULL, then nopagedone routine will be called. */
462  if (pageP)
463  {
464    TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_NOPAGE_2,
465           "gpfs_filemap_nopage: return page 0x%lX count %d flags 0x%lX "
466           "mm_users %d\n", pageP, page_count(pageP), pageP->flags,
467           atomic_read(&area->vm_mm->mm_users));
468
469    dump_page(area, pageP, 0);
470  }
471  else
472    TRACE0(TRACE_VNODE, 2, TRCID_LINUXOPS_NOPAGE_3,
473           "gpfs_filemap_nopage: return page NULL");
474
475#if !defined(MMAP_LINUX_PATCH) || LINUX_KERNEL_VERSION >= 2060000
476  /* If we don't have the nopagedone patch, release mmap flush lock here.
477   * If flush/invalidate runs before do_no_page can make the PTE valid,
478   *  the application might see stale data and updates could be lost.
479   */
480  if (haveFlushLock)
481    gpfs_ops.gpfsMmapFlushUnlock(cnP);
482#endif
483
484  VFS_STAT_STOP;
485  EXIT(0);
486  return pageP;
487}
488
489
490/* Called from do_no_page() after making PTE valid */
491void
492gpfs_filemap_nopagedone(struct vm_area_struct *area, unsigned long address,
493                        int status)
494{
495  struct inode *inodeP = area->vm_file->f_dentry->d_inode;
496  cxiNode_t *cnP = VP_TO_CNP(inodeP);
497
498  ENTER(0);
499  TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_NOPAGEDONE,
500         "gpfs_filemap_nopagedone: cnP 0x%lX area 0x%lX address 0x%lX status %d\n",
501         cnP, area, address, status);
502
503  gpfs_ops.gpfsMmapFlushUnlock(cnP);
504  EXIT(0);
505}
506
507
508/* Address space operation to read a page from a file.  On entry, the page
509   is locked and it is in the page cache.  If this routine is successful,
510   it marks the page up to date and unlocks it.  Page faulting of a mapped
511   file will call gpfs_filemap_nopage, not this routine.  The main user of
512   this routine is the sendfile() system call. */
513int
514gpfs_i_readpage(struct file *fileP, struct page *pageP)
515{
516  int rc = 0, rc1 = 0, code = 0;
517  struct dentry *dentryP = fileP->f_dentry;
518  struct inode *inodeP = dentryP->d_inode;
519  cxiNode_t *cnP = VP_TO_CNP(inodeP);
520  struct gpfsVfsData_t *privVfsP;
521  int index = pageP->index;
522  cxibuf_t buf;
523  struct page *bufPageP;
524  char *kaddr1;
525  char *kaddr2;
526  ext_cred_t eCred;
527
528
529  ENTER(0);
530  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READPAGE_ENTER,
531         "gpfs_i_readpage enter: fileP 0x%lX cnP 0x%lX inodeP 0x%lX inode %d\n",
532         fileP, cnP, inodeP, inodeP->i_ino);
533  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READPAGE_ENTER_A,
534         "gpfs_i_readpage: page 0x%lX index %d count %d flags 0x%lX\n",
535         pageP, index, page_count(pageP), pageP->flags);
536                                                                               
537  /* Unlock the page.  In order to read the page, we will have to obtain a
538     file lock and byte range lock, and we can't do that while holding a
539     page lock.  The page is not yet marked up to date, so it won't hurt if
540     another process attempts to read this page.  We don't have to add a
541     reference to the page since our caller is expecting us to return with
542     the page unlocked, so he must already have taken care of that. */
543  PAGE_UNLOCK(pageP);
544
545  /* Make sure file is open if called from NFS */
546  if (cxiIsNFSThread())
547  {
548    int NFSflags = FREAD;
549
550    BEGIN_FAR_CODE;
551    DBGASSERT(GNP_IS_FILE(cnP));
552    rc = gpfs_ops.gpfsGetNFS((void *)inodeP,
553                             (struct MMFSVInfo **)&fileP->private_data,
554                             &NFSflags); 
555    if (rc != 0)
556    {
557      code = 1;
558      goto xerror;
559    }
560
561    DBGASSERT((struct MMFSVInfo *)fileP->private_data != NULL);
562
563    setCred(&eCred);
564    privVfsP = VP_TO_PVP(inodeP);
565    DBGASSERT(privVfsP != NULL);
566    rc = gpfs_ops.gpfsOpenNFS(privVfsP, cnP, FREAD,
567                              (struct MMFSVInfo *)fileP->private_data, &eCred);
568    if (rc != 0)
569    {
570      code = 2;
571      goto xerror;
572    }
573    END_FAR_CODE;
574  }
575
576  buf.av_forw = NULL;
577  buf.pageP = NULL;
578  buf.b_vp = cnP;
579  buf.vinfoP = (struct MMFSVInfo *)fileP->private_data;
580  buf.privVfsP = VP_TO_PVP(inodeP);
581  buf.b_baddr = NULL;
582  buf.b_flags = B_READ | B_PFEOF | B_SENDFILE;
583  buf.b_blkno = index;
584  buf.b_bcount = PAGE_SIZE;
585  buf.b_error = 0;
586  buf.b_inodeP = inodeP;
587
588  /* Read the page.  If successful, this returns with mmap flush lock held
589     and a reference added to page. */
590  gpfs_ops.gpfsQueueBufs(&buf);
591
592  if (buf.pageP != NULL)
593  {
594
595    bufPageP = buf.pageP;
596    TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_READPAGE1,
597           "gpfs_i_readpage: return page 0x%lX index %d count %d flags 0x%lX\n",
598           bufPageP, bufPageP->index, page_count(bufPageP), bufPageP->flags);
599
600    dump_page(NULL, bufPageP, 0);
601    if (buf.pageP != pageP)
602    {
603
604      /* may be pageP has been removed from the page cache by
605         truncate_inode_pages. Since caller has reference, when removed by
606         truncate_inode_pages from page cache, it is orphaned and will be
607         deleted as soon as the count goes to zero.  Therefore,
608         grab_cache_page doesn't find it and creates a new page instead.
609         Just copy the new page into pageP so that sendfile can use it and
610         decrement the count, which will delete the page   
611      */
612      kaddr1 = kmap(pageP);
613      kaddr2 = kmap(bufPageP);
614      memcpy(kaddr1,kaddr2,PAGE_SIZE);
615      kunmap(pageP);
616      kunmap(bufPageP);
617      SetPageUptodate(pageP);
618    }
619
620    /* Release reference that was added by gpfsReadpage */
621    page_cache_release(bufPageP);
622
623    /* Release mmap flush lock.  This lock is used to block invalidate until
624       after PTE is made valid, but we aren't making any PTEs valid here. */
625    gpfs_ops.gpfsMmapFlushUnlock(cnP);
626  }
627  else
628  {
629    rc = EFAULT;
630    code = 3;
631  }
632 
633  /* Perform release on file if called from NFS */
634  if (cxiIsNFSThread())
635  {
636    DBGASSERT(GNP_IS_FILE(cnP));
637
638    /* On the last NFS release, a watchdog will be set to close the file
639       after a delay. */
640    rc1 = gpfs_ops.gpfsReleaseNFS(inodeP);
641    if ((rc1 != 0) && (rc == 0))
642    {
643      code = 4;
644      rc = rc1;
645    }
646  }
647
648xerror:
649
650  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_READPAGE_EXIT,
651         "gpfs_i_readpage exit: inodeP 0x%lX rc %d code %d\n", inodeP, rc,
652         code);
653  EXIT(0);
654  return -rc;
655}
656
657
658/* Address space operation to asynchronously write a page to a file.  On
659   entry, the page is locked.  This routine queues a write request to a
660   pager kproc and returns.  The kproc will unlock the page when write is
661   complete, and that will wake up any waiters. */
662int
663#if LINUX_KERNEL_VERSION >= 2050000
664gpfs_i_writepage(struct page *pageP, struct writeback_control *wbcP)
665#else
666gpfs_i_writepage(struct page *pageP)
667#endif
668{
669  int rc = 0;
670  struct inode *inodeP = (struct inode *) pageP->mapping->host;
671  cxiNode_t *cnP = VP_TO_CNP(inodeP);
672  cxibuf_t *bufP, buf;
673
674  VFS_STAT_START(writepageCall);
675  ENTER(0);
676  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_WRPAGE_ENTER,
677         "gpfs_i_writepage enter: cnP 0x%lX inodeP 0x%lX inode %d\n",
678          cnP, inodeP, inodeP->i_ino);
679  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_WRPAGE_ENTER_A,
680         "gpfs_i_writepage: page 0x%lX index %d count %d flags 0x%lX\n",
681         pageP, pageP->index, page_count(pageP), pageP->flags);
682  dump_page(NULL, pageP, 1);
683
684  /* Get a request buffer.  If none are available, allocate one on stack
685     and do the write synchronously. */
686  cxiBlockingMutexAcquire(&PQLockWord);
687  if (PageQueueFreeP == NULL)
688  {
689    bufP = &buf;
690    bufP->b_flags = B_WRITE;
691  }
692  else
693  {
694    bufP = PageQueueFreeP;
695    PageQueueFreeP = bufP->av_forw;
696    bufP->b_flags = B_WRITE | B_ASYNC;
697  }
698  cxiBlockingMutexRelease(&PQLockWord);
699
700  /* Initialize buffer */
701  bufP->av_forw = NULL;
702  bufP->pageP = pageP;
703  bufP->b_vp = cnP;
704  bufP->vinfoP = NULL;
705  bufP->privVfsP = VP_TO_PVP(inodeP);
706  bufP->b_baddr = NULL;
707  bufP->b_blkno = pageP->index;
708  bufP->b_bcount = PAGE_SIZE;
709  bufP->b_error = 0;
710  bufP->b_inodeP = NULL;
711
712#if LINUX_KERNEL_VERSION >= 2050000
713  /* Set the page writeback flag and unlock the page. When write is complete,
714     the pager kproc will call IoDone to clear this flag and wake up any
715     threads waiting for this write to complete. */
716  set_page_writeback(pageP);
717  PAGE_UNLOCK(pageP); 
718#endif
719
720  /* Queue the buffer to a pager kproc and return. */
721  gpfs_ops.gpfsQueueBufs(bufP);
722
723exit:
724  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_WRPAGE_EXIT,
725         "gpfs_i_writepage exit: inodeP 0x%lX rc %d\n", inodeP, rc);
726
727  VFS_STAT_STOP;
728  EXIT(0);
729  return -rc;
730}
Note: See TracBrowser for help on using the repository browser.