1 | /*************************************************************************** |
---|
2 | * |
---|
3 | * Copyright (C) 2001 International Business Machines |
---|
4 | * All rights reserved. |
---|
5 | * |
---|
6 | * This file is part of the GPFS mmfslinux kernel module. |
---|
7 | * |
---|
8 | * Redistribution and use in source and binary forms, with or without |
---|
9 | * modification, are permitted provided that the following conditions |
---|
10 | * are met: |
---|
11 | * |
---|
12 | * 1. Redistributions of source code must retain the above copyright notice, |
---|
13 | * this list of conditions and the following disclaimer. |
---|
14 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
15 | * notice, this list of conditions and the following disclaimer in the |
---|
16 | * documentation and/or other materials provided with the distribution. |
---|
17 | * 3. The name of the author may not be used to endorse or promote products |
---|
18 | * derived from this software without specific prior written |
---|
19 | * permission. |
---|
20 | * |
---|
21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
---|
22 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
---|
23 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
---|
24 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
---|
26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
---|
27 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
---|
28 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
---|
29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
---|
30 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
31 | * |
---|
32 | *************************************************************************** */ |
---|
33 | /* @(#)26 1.86 src/avs/fs/mmfs/ts/kernext/gpl-linux/mmap.c, mmfs, avs_rgpfs24, rgpfs24s003a 5/8/06 11:04:56 */ |
---|
34 | |
---|
35 | #include <Shark-gpl.h> |
---|
36 | #include <arch-gpl.h> |
---|
37 | |
---|
38 | #include <linux/mm.h> |
---|
39 | #if defined(REDHAT_AS_LINUX) && LINUX_KERNEL_VERSION >= 2042101 |
---|
40 | #include <linux/mm_inline.h> |
---|
41 | #endif |
---|
42 | |
---|
43 | #include <linux/pagemap.h> |
---|
44 | #include <linux/module.h> |
---|
45 | #include <asm/pgalloc.h> |
---|
46 | #include <linux/mman.h> |
---|
47 | #include <linux/file.h> |
---|
48 | #include <linux/sched.h> |
---|
49 | #include <linux/smp_lock.h> |
---|
50 | #include <linux/delay.h> |
---|
51 | |
---|
52 | #include <verdep.h> |
---|
53 | #include <cxiSystem.h> |
---|
54 | #include <cxi2gpfs.h> |
---|
55 | #include <cxiMmap.h> |
---|
56 | #include <linux2gpfs.h> |
---|
57 | #include <cxi2gpfs.h> |
---|
58 | #include <Trace.h> |
---|
59 | #include <LockNames.h> |
---|
60 | |
---|
61 | |
---|
62 | /* True if the paging operations are enabled. Serialized using PQLockWord. */ |
---|
63 | static Boolean mmapEnabled = false; |
---|
64 | |
---|
65 | /* Storage for page queue entries */ |
---|
66 | #define MAX_PAGEQUE_ENTRIES 500 |
---|
67 | static cxibuf_t Page_queue[MAX_PAGEQUE_ENTRIES]; |
---|
68 | |
---|
69 | /* Head of list of free page queue entries, protected by PQLockWord */ |
---|
70 | static cxibuf_t *PageQueueFreeP; |
---|
71 | static cxiBlockingMutex_t PQLockWord; |
---|
72 | |
---|
73 | |
---|
74 | /* dump page contents |
---|
75 | * flag = 0 ==> after read from disk |
---|
76 | * 1 ==> write |
---|
77 | */ |
---|
78 | static void dump_page(struct vm_area_struct *vma, struct page *page, int flag) |
---|
79 | { |
---|
80 | #ifdef TRACE_IO_DATA |
---|
81 | int trcbuf[12]; |
---|
82 | char *what = (flag == 1) ? "write" : "read"; |
---|
83 | char *kaddr = kmap(page); |
---|
84 | ENTER(0); |
---|
85 | memcpy(trcbuf, kaddr, sizeof(trcbuf)); |
---|
86 | kunmap(page); |
---|
87 | |
---|
88 | TRACE8(TRACE_VNODE, 6, TRCID_MMAP_DIRTY_PAGE_DUMP, |
---|
89 | "dump 0 %s page 0x%lX: vma 0x%08X count %d data %08X %08X %08X %08X\n", |
---|
90 | what, page, vma, page_count(page), |
---|
91 | CPUToBigEnd32(trcbuf[0]), |
---|
92 | CPUToBigEnd32(trcbuf[1]), |
---|
93 | CPUToBigEnd32(trcbuf[2]), |
---|
94 | CPUToBigEnd32(trcbuf[3])); |
---|
95 | TRACE8(TRACE_VNODE, 9, TRCID_MMAP_DIRTY_PAGE_DUMP_A, |
---|
96 | "dump 1 %s page 0x%lX: vma 0x%08X count %d data %08X %08X %08X %08X\n", |
---|
97 | what, page, vma, page_count(page), |
---|
98 | CPUToBigEnd32(trcbuf[4]), |
---|
99 | CPUToBigEnd32(trcbuf[5]), |
---|
100 | CPUToBigEnd32(trcbuf[6]), |
---|
101 | CPUToBigEnd32(trcbuf[7])); |
---|
102 | TRACE8(TRACE_VNODE, 9, TRCID_MMAP_DIRTY_PAGE_DUMP_B, |
---|
103 | "dump 2 %s page 0x%lX: vma 0x%08X count %d data %08X %08X %08X %08X\n", |
---|
104 | what, page, vma, page_count(page), |
---|
105 | CPUToBigEnd32(trcbuf[8]), |
---|
106 | CPUToBigEnd32(trcbuf[9]), |
---|
107 | CPUToBigEnd32(trcbuf[10]), |
---|
108 | CPUToBigEnd32(trcbuf[11])); |
---|
109 | EXIT(0); |
---|
110 | #endif |
---|
111 | } |
---|
112 | |
---|
113 | |
---|
114 | /* Disable paging operations */ |
---|
115 | void mmapKill() |
---|
116 | { |
---|
117 | ENTER(0); |
---|
118 | cxiBlockingMutexAcquire(&PQLockWord); |
---|
119 | mmapEnabled = false; |
---|
120 | cxiBlockingMutexRelease(&PQLockWord); |
---|
121 | EXIT(0); |
---|
122 | } |
---|
123 | |
---|
124 | void EnableMmap() |
---|
125 | { |
---|
126 | /* It is ok to change without holding PQLockWord since it is |
---|
127 | * called from initialization |
---|
128 | */ |
---|
129 | mmapEnabled = true; |
---|
130 | } |
---|
131 | |
---|
132 | |
---|
133 | int cxiMmapRegister(void *dummy) |
---|
134 | { |
---|
135 | int i; |
---|
136 | |
---|
137 | ENTER(0); |
---|
138 | TRACE0(TRACE_VNODE, 2, TRCID_MMAP_REG_ENTER, |
---|
139 | "cxiMmapRegister enter\n"); |
---|
140 | |
---|
141 | cxiBlockingMutexInit(&PQLockWord, GPFS_LOCK_MMAP_FREEQ_IDX); |
---|
142 | |
---|
143 | TRACE2(TRACE_VNODE, 2, TRCID_MMAP_REG_5, |
---|
144 | "cxiMmapRegister: Page_queue addr range [0x%lX - 0x%lX]\n", |
---|
145 | &Page_queue[0], &Page_queue[MAX_PAGEQUE_ENTRIES - 1] ); |
---|
146 | |
---|
147 | /* Initialize page queue entries. When a page arrives for read or write |
---|
148 | (by readpage or writepage functions), the page information will be |
---|
149 | copied to a free queue entry and that entry will be added to the end |
---|
150 | of the pager kproc queue. */ |
---|
151 | PageQueueFreeP = NULL; |
---|
152 | for (i = 0; i < MAX_PAGEQUE_ENTRIES; i++) |
---|
153 | { |
---|
154 | Page_queue[i].av_forw = PageQueueFreeP; |
---|
155 | PageQueueFreeP = &Page_queue[i]; |
---|
156 | Page_queue[i].pageP = NULL; |
---|
157 | Page_queue[i].b_vp = NULL; |
---|
158 | Page_queue[i].vinfoP = NULL; |
---|
159 | Page_queue[i].b_baddr = NULL; |
---|
160 | Page_queue[i].b_flags = 0; |
---|
161 | Page_queue[i].b_blkno = 0; |
---|
162 | } |
---|
163 | |
---|
164 | mmapEnabled = true; |
---|
165 | EXIT(0); |
---|
166 | return 0; |
---|
167 | } |
---|
168 | |
---|
169 | /* Module termination */ |
---|
170 | int cxiMmapUnregister(void *dummy) |
---|
171 | { |
---|
172 | ENTER(0); |
---|
173 | TRACE0(TRACE_VNODE, 2, TRCID_MMAP_UNREG_ENTER, |
---|
174 | "cxiMmapUnregister enter\n"); |
---|
175 | PageQueueFreeP = NULL; |
---|
176 | mmapEnabled = false; |
---|
177 | EXIT(0); |
---|
178 | return 0; |
---|
179 | } |
---|
180 | |
---|
181 | Int64 getFilePos(cxibuf_t *bufP) |
---|
182 | { |
---|
183 | Int64 pos = (Int64) bufP->b_blkno << PAGE_SHIFT; |
---|
184 | ENTER(0); |
---|
185 | TRACE1(TRACE_VNODE, 5, TRCID_MMAP_FILEPOS_ENTER, |
---|
186 | "getFilePos: pos 0x%llX\n", pos); |
---|
187 | EXIT(0); |
---|
188 | return pos; |
---|
189 | } |
---|
190 | |
---|
191 | char *VM_Attach(cxibuf_t *bufP) |
---|
192 | { |
---|
193 | DBGASSERT(bufP->pageP != NULL); |
---|
194 | return kmap(bufP->pageP); |
---|
195 | } |
---|
196 | |
---|
197 | void VM_Detach(cxibuf_t *bufP, char *baddrP) |
---|
198 | { |
---|
199 | kunmap(bufP->pageP); |
---|
200 | } |
---|
201 | |
---|
202 | void IoDone(cxibuf_t *bufP) |
---|
203 | { |
---|
204 | struct page *pageP = bufP->pageP; |
---|
205 | |
---|
206 | if (pageP != NULL) |
---|
207 | { |
---|
208 | TRACE5(TRACE_VNODE, 2, TRCID_MMAP_IO_ENTER, |
---|
209 | "IoDone enter: b_flags 0x%lX pageP 0x%lX index %d count %d flags 0x%lX\n", |
---|
210 | bufP->b_flags, pageP, pageP->index, page_count(pageP), pageP->flags); |
---|
211 | |
---|
212 | /* error in read or write operation */ |
---|
213 | if ((bufP->b_flags & B_ERROR) != 0) |
---|
214 | SetPageError(pageP); |
---|
215 | else if ((bufP->b_flags & B_READ) != 0) |
---|
216 | SetPageUptodate(pageP); |
---|
217 | |
---|
218 | TRACE2(TRACE_VNODE, 2, TRCID_MMAP_IO_EXIT, |
---|
219 | "IoDone exit: pageP 0x%lX flags 0x%lX\n", |
---|
220 | pageP, pageP->flags); |
---|
221 | |
---|
222 | #if LINUX_KERNEL_VERSION >= 2050000 |
---|
223 | if ((bufP->b_flags & B_READ) == 0) |
---|
224 | /* This was a writeback request. Signal its completion by clearing the |
---|
225 | writeback flag. */ |
---|
226 | end_page_writeback(pageP); |
---|
227 | else |
---|
228 | #endif |
---|
229 | PAGE_UNLOCK(pageP); |
---|
230 | } |
---|
231 | |
---|
232 | /* If this was an asynchronous request, free the buf struct. For |
---|
233 | synchronous requests, the buf is a stack variable. */ |
---|
234 | if ((bufP->b_flags & B_ASYNC) != 0) |
---|
235 | { |
---|
236 | cxiBlockingMutexAcquire(&PQLockWord); |
---|
237 | bufP->av_forw = PageQueueFreeP; |
---|
238 | PageQueueFreeP = bufP; |
---|
239 | cxiBlockingMutexRelease(&PQLockWord); |
---|
240 | } |
---|
241 | |
---|
242 | |
---|
243 | } |
---|
244 | |
---|
245 | void getVp(void *gnP, void **vP, struct gpfsVfsData_t **privVfsP) |
---|
246 | { |
---|
247 | cxiNode_t *cP = (cxiNode_t *)gnP; |
---|
248 | struct inode *iP = (struct inode *)cP->osNodeP; |
---|
249 | *privVfsP = VP_TO_PVP(iP); |
---|
250 | *vP = cP->osNodeP; |
---|
251 | } |
---|
252 | |
---|
253 | |
---|
254 | /* Flush/invalidate a mapped range: |
---|
255 | CmfProtect - Remove pages from address space so that new |
---|
256 | references will cause a page fault or protection fault |
---|
257 | CmfFlush - Write dirty pages |
---|
258 | CmfInval - Prevent cached page from being re-used |
---|
259 | */ |
---|
260 | int cxiMmapFlush(cxiNode_t *cnP, UInt64 start, UInt64 end, |
---|
261 | enum CmflushOption cmopt) |
---|
262 | { |
---|
263 | int rc = 0; |
---|
264 | struct inode *inodeP = cnP->osNodeP; |
---|
265 | |
---|
266 | ENTER(0); |
---|
267 | TRACE5(TRACE_VNODE, 2, TRCID_MMAP_FLUSH_ENTER, |
---|
268 | "cxiMmapFlush: cnP 0x%lX inodeNum %d opt %d range 0x%llX-0x%llX\n", |
---|
269 | cnP, inodeP->i_ino, cmopt, start, end); |
---|
270 | |
---|
271 | switch (cmopt) |
---|
272 | { |
---|
273 | case CmfProtect: |
---|
274 | |
---|
275 | /* Block new modifications to page. This clears PTEs, which will force |
---|
276 | them to page fault. It also transfers the dirty bit from the PTE to |
---|
277 | the page struct. */ |
---|
278 | UNMAP_MAPPING_RANGE(inodeP->i_mapping, start, 0); |
---|
279 | break; |
---|
280 | |
---|
281 | case CmfFlush: |
---|
282 | FILEMAP_FDATASYNC(rc, inodeP->i_mapping); |
---|
283 | if (rc == 0) |
---|
284 | FILEMAP_FDATAWAIT(rc, inodeP->i_mapping); |
---|
285 | break; |
---|
286 | |
---|
287 | case CmfInval: |
---|
288 | truncate_inode_pages(inodeP->i_mapping, (start & PAGE_CACHE_MASK)); |
---|
289 | break; |
---|
290 | } |
---|
291 | |
---|
292 | TRACE1(TRACE_VNODE, 2, TRCID_MMAP_FLUSH_EXIT, |
---|
293 | "cxiMmapFlush exit: rc %d\n", rc); |
---|
294 | EXIT(0); |
---|
295 | return rc; |
---|
296 | } |
---|
297 | |
---|
298 | |
---|
299 | /* Lock a cache page for inode bufP->inodeP at index bufP->b_blkno, |
---|
300 | creating if necessary. Save pointer to page in bufP->pageP. On error, |
---|
301 | return with bufP->pageP NULL. Page will be locked and a reference will |
---|
302 | be added. Return non-zero if page is already up to date. */ |
---|
303 | int cxiMmapGetPage(cxibuf_t *bufP) |
---|
304 | { |
---|
305 | int rc = 0; |
---|
306 | struct inode *inodeP = (struct inode *)bufP->b_inodeP; |
---|
307 | struct page *pageP = grab_cache_page(inodeP->i_mapping, bufP->b_blkno); |
---|
308 | |
---|
309 | ENTER(0); |
---|
310 | if (pageP != NULL) |
---|
311 | { |
---|
312 | if (PAGE_UP_TO_DATE(pageP)) |
---|
313 | rc = EEXIST; |
---|
314 | else |
---|
315 | ClearPageError(pageP); |
---|
316 | |
---|
317 | TRACE6(TRACE_VNODE, 1, TRCID_CXIGETPAGE, |
---|
318 | "cxiMmapGetPage: page 0x%lX index %d count %d flags 0x%lX mapping 0x%lX uptodate %d\n", |
---|
319 | pageP, pageP->index, page_count(pageP), pageP->flags, |
---|
320 | pageP->mapping, (rc != 0)); |
---|
321 | } |
---|
322 | bufP->pageP = pageP; |
---|
323 | EXIT(0); |
---|
324 | return rc; |
---|
325 | } |
---|
326 | |
---|
327 | |
---|
328 | /* Release/unlock page */ |
---|
329 | void cxiMmapReleasePage(struct page *pageP) |
---|
330 | { |
---|
331 | ENTER(0); |
---|
332 | TRACE4(TRACE_VNODE, 1, TRCID_CXIRELPAGE, |
---|
333 | "cxiMmapReleasePage: released page 0x%lX index %d count %d flags 0x%lX\n", |
---|
334 | pageP, pageP->index, page_count(pageP), pageP->flags); |
---|
335 | |
---|
336 | PAGE_UNLOCK(pageP); |
---|
337 | page_cache_release(pageP); |
---|
338 | EXIT(0); |
---|
339 | } |
---|
340 | |
---|
341 | |
---|
342 | /* Called from do_no_page() to handle page fault. Add page to cache if not |
---|
343 | already there and add a reference. If contents are not already up to |
---|
344 | date, then read new contents from disk. Return NULL if failure. */ |
---|
345 | struct page * |
---|
346 | #if LINUX_KERNEL_VERSION > 2060300 |
---|
347 | gpfs_filemap_nopage(struct vm_area_struct *area, unsigned long address, |
---|
348 | int * noShare) |
---|
349 | #else |
---|
350 | gpfs_filemap_nopage(struct vm_area_struct *area, unsigned long address, |
---|
351 | int noShare) |
---|
352 | #endif |
---|
353 | { |
---|
354 | unsigned long index; |
---|
355 | struct page *pageP = NULL; |
---|
356 | struct page **hashP; |
---|
357 | struct file *fileP = area->vm_file; |
---|
358 | struct inode *inodeP; |
---|
359 | struct MMFSVInfo *vinfoP; |
---|
360 | Boolean haveFlushLock = false; |
---|
361 | cxiNode_t *cnP; |
---|
362 | cxibuf_t buf; |
---|
363 | |
---|
364 | VFS_STAT_START(readpageCall); |
---|
365 | ENTER(0); |
---|
366 | |
---|
367 | TRACE6(TRACE_VNODE, 2, TRCID_LINUXOPS_NOPAGE, |
---|
368 | "gpfs_filemap_nopage enter: area 0x%lX address 0x%lX vm_file 0x%lX " |
---|
369 | "vm_mm 0x%lX mm_users %d noShare %d\n", area, address, fileP, |
---|
370 | area->vm_mm, atomic_read(&area->vm_mm->mm_users), noShare); |
---|
371 | |
---|
372 | index = area->vm_pgoff + ((address - area->vm_start) >> PAGE_CACHE_SHIFT); |
---|
373 | |
---|
374 | TRACE4(TRACE_VNODE, 3, TRCID_LINUXOPS_NOPAGE_1, |
---|
375 | "gpfs_filemap_nopage: vm_start 0x%lX vm_end 0x%lX vm_flags 0x%lX " |
---|
376 | "index %d\n", area->vm_start, area->vm_end, area->vm_flags, index); |
---|
377 | |
---|
378 | /* Check that paging operations are still enabled */ |
---|
379 | if (!mmapEnabled) |
---|
380 | goto exit; |
---|
381 | |
---|
382 | LOGASSERT(fileP != NULL); |
---|
383 | inodeP = fileP->f_dentry->d_inode; |
---|
384 | LOGASSERT(inodeP != NULL); |
---|
385 | cnP = VP_TO_CNP(inodeP); |
---|
386 | |
---|
387 | /* Remember that there were paging requests under the given instance */ |
---|
388 | vinfoP = (struct MMFSVInfo *)fileP->private_data; |
---|
389 | if (vinfoP != NULL) |
---|
390 | ((struct cxiVinfo_t*)vinfoP)->rwPageDone = true; |
---|
391 | |
---|
392 | /* See if this page is already in the cache, and add a reference if so */ |
---|
393 | #if LINUX_KERNEL_VERSION >= 2057200 |
---|
394 | pageP = find_get_page(inodeP->i_mapping, index); |
---|
395 | #else |
---|
396 | hashP = page_hash(inodeP->i_mapping, index); |
---|
397 | pageP = __find_get_page(inodeP->i_mapping, index, hashP); |
---|
398 | #endif |
---|
399 | if (pageP) |
---|
400 | { |
---|
401 | /* Page is already cached. If it is up to date, then we do not need to |
---|
402 | read it. Hold mmap flush lock until after making pte valid. */ |
---|
403 | gpfs_ops.gpfsMmapFlushLock(cnP); |
---|
404 | haveFlushLock = true; |
---|
405 | |
---|
406 | if (PAGE_UP_TO_DATE(pageP)) |
---|
407 | goto exit; |
---|
408 | |
---|
409 | /* Not up to date. Release page and go through processRead to fetch |
---|
410 | the data. */ |
---|
411 | gpfs_ops.gpfsMmapFlushUnlock(cnP); |
---|
412 | haveFlushLock = false; |
---|
413 | |
---|
414 | page_cache_release(pageP); |
---|
415 | } |
---|
416 | |
---|
417 | /* Initialize buf struct for mmap read. We don't have to fill in a |
---|
418 | data address since the page won't be allocated until after all the |
---|
419 | necessary locks have been obtained in kSFSRead. */ |
---|
420 | buf.av_forw = NULL; |
---|
421 | buf.pageP = NULL; |
---|
422 | buf.b_vp = cnP; |
---|
423 | buf.vinfoP = vinfoP; |
---|
424 | buf.privVfsP = VP_TO_PVP(inodeP); |
---|
425 | buf.b_baddr = NULL; |
---|
426 | buf.b_flags = B_READ | B_PFEOF; |
---|
427 | buf.b_blkno = index; |
---|
428 | buf.b_bcount = PAGE_SIZE; |
---|
429 | buf.b_error = 0; |
---|
430 | buf.b_inodeP = inodeP; |
---|
431 | |
---|
432 | /* Read the page. If successful, this returns with mmap flush lock held |
---|
433 | and a reference added to page. */ |
---|
434 | gpfs_ops.gpfsQueueBufs(&buf); |
---|
435 | |
---|
436 | pageP = buf.pageP; |
---|
437 | if (pageP) |
---|
438 | haveFlushLock = true; |
---|
439 | |
---|
440 | exit: |
---|
441 | #if defined(REDHAT_AS_LINUX) && LINUX_KERNEL_VERSION < 2042100 |
---|
442 | /* The noShare flag is only used on earlier kernels (of which Redhat |
---|
443 | * Advanced Server is one). This code is pretty much common to all |
---|
444 | * the nopage functions and thus was put in the common do_no_page() |
---|
445 | * function. It's present here for RHAS. |
---|
446 | */ |
---|
447 | if (noShare && pageP) |
---|
448 | { |
---|
449 | struct page *newPageP = alloc_page(GFP_HIGHUSER); |
---|
450 | if (newPageP) |
---|
451 | { |
---|
452 | copy_user_highpage(newPageP, pageP, address); |
---|
453 | flush_page_to_ram(newPageP); |
---|
454 | } |
---|
455 | |
---|
456 | page_cache_release(pageP); |
---|
457 | pageP = newPageP; |
---|
458 | } |
---|
459 | #endif |
---|
460 | |
---|
461 | /* If we return non-NULL, then nopagedone routine will be called. */ |
---|
462 | if (pageP) |
---|
463 | { |
---|
464 | TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_NOPAGE_2, |
---|
465 | "gpfs_filemap_nopage: return page 0x%lX count %d flags 0x%lX " |
---|
466 | "mm_users %d\n", pageP, page_count(pageP), pageP->flags, |
---|
467 | atomic_read(&area->vm_mm->mm_users)); |
---|
468 | |
---|
469 | dump_page(area, pageP, 0); |
---|
470 | } |
---|
471 | else |
---|
472 | TRACE0(TRACE_VNODE, 2, TRCID_LINUXOPS_NOPAGE_3, |
---|
473 | "gpfs_filemap_nopage: return page NULL"); |
---|
474 | |
---|
475 | #if !defined(MMAP_LINUX_PATCH) || LINUX_KERNEL_VERSION >= 2060000 |
---|
476 | /* If we don't have the nopagedone patch, release mmap flush lock here. |
---|
477 | * If flush/invalidate runs before do_no_page can make the PTE valid, |
---|
478 | * the application might see stale data and updates could be lost. |
---|
479 | */ |
---|
480 | if (haveFlushLock) |
---|
481 | gpfs_ops.gpfsMmapFlushUnlock(cnP); |
---|
482 | #endif |
---|
483 | |
---|
484 | VFS_STAT_STOP; |
---|
485 | EXIT(0); |
---|
486 | return pageP; |
---|
487 | } |
---|
488 | |
---|
489 | |
---|
490 | /* Called from do_no_page() after making PTE valid */ |
---|
491 | void |
---|
492 | gpfs_filemap_nopagedone(struct vm_area_struct *area, unsigned long address, |
---|
493 | int status) |
---|
494 | { |
---|
495 | struct inode *inodeP = area->vm_file->f_dentry->d_inode; |
---|
496 | cxiNode_t *cnP = VP_TO_CNP(inodeP); |
---|
497 | |
---|
498 | ENTER(0); |
---|
499 | TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_NOPAGEDONE, |
---|
500 | "gpfs_filemap_nopagedone: cnP 0x%lX area 0x%lX address 0x%lX status %d\n", |
---|
501 | cnP, area, address, status); |
---|
502 | |
---|
503 | gpfs_ops.gpfsMmapFlushUnlock(cnP); |
---|
504 | EXIT(0); |
---|
505 | } |
---|
506 | |
---|
507 | |
---|
508 | /* Address space operation to read a page from a file. On entry, the page |
---|
509 | is locked and it is in the page cache. If this routine is successful, |
---|
510 | it marks the page up to date and unlocks it. Page faulting of a mapped |
---|
511 | file will call gpfs_filemap_nopage, not this routine. The main user of |
---|
512 | this routine is the sendfile() system call. */ |
---|
513 | int |
---|
514 | gpfs_i_readpage(struct file *fileP, struct page *pageP) |
---|
515 | { |
---|
516 | int rc = 0, rc1 = 0, code = 0; |
---|
517 | struct dentry *dentryP = fileP->f_dentry; |
---|
518 | struct inode *inodeP = dentryP->d_inode; |
---|
519 | cxiNode_t *cnP = VP_TO_CNP(inodeP); |
---|
520 | struct gpfsVfsData_t *privVfsP; |
---|
521 | int index = pageP->index; |
---|
522 | cxibuf_t buf; |
---|
523 | struct page *bufPageP; |
---|
524 | char *kaddr1; |
---|
525 | char *kaddr2; |
---|
526 | ext_cred_t eCred; |
---|
527 | |
---|
528 | |
---|
529 | ENTER(0); |
---|
530 | TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READPAGE_ENTER, |
---|
531 | "gpfs_i_readpage enter: fileP 0x%lX cnP 0x%lX inodeP 0x%lX inode %d\n", |
---|
532 | fileP, cnP, inodeP, inodeP->i_ino); |
---|
533 | TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READPAGE_ENTER_A, |
---|
534 | "gpfs_i_readpage: page 0x%lX index %d count %d flags 0x%lX\n", |
---|
535 | pageP, index, page_count(pageP), pageP->flags); |
---|
536 | |
---|
537 | /* Unlock the page. In order to read the page, we will have to obtain a |
---|
538 | file lock and byte range lock, and we can't do that while holding a |
---|
539 | page lock. The page is not yet marked up to date, so it won't hurt if |
---|
540 | another process attempts to read this page. We don't have to add a |
---|
541 | reference to the page since our caller is expecting us to return with |
---|
542 | the page unlocked, so he must already have taken care of that. */ |
---|
543 | PAGE_UNLOCK(pageP); |
---|
544 | |
---|
545 | /* Make sure file is open if called from NFS */ |
---|
546 | if (cxiIsNFSThread()) |
---|
547 | { |
---|
548 | int NFSflags = FREAD; |
---|
549 | |
---|
550 | BEGIN_FAR_CODE; |
---|
551 | DBGASSERT(GNP_IS_FILE(cnP)); |
---|
552 | rc = gpfs_ops.gpfsGetNFS((void *)inodeP, |
---|
553 | (struct MMFSVInfo **)&fileP->private_data, |
---|
554 | &NFSflags); |
---|
555 | if (rc != 0) |
---|
556 | { |
---|
557 | code = 1; |
---|
558 | goto xerror; |
---|
559 | } |
---|
560 | |
---|
561 | DBGASSERT((struct MMFSVInfo *)fileP->private_data != NULL); |
---|
562 | |
---|
563 | setCred(&eCred); |
---|
564 | privVfsP = VP_TO_PVP(inodeP); |
---|
565 | DBGASSERT(privVfsP != NULL); |
---|
566 | rc = gpfs_ops.gpfsOpenNFS(privVfsP, cnP, FREAD, |
---|
567 | (struct MMFSVInfo *)fileP->private_data, &eCred); |
---|
568 | if (rc != 0) |
---|
569 | { |
---|
570 | code = 2; |
---|
571 | goto xerror; |
---|
572 | } |
---|
573 | END_FAR_CODE; |
---|
574 | } |
---|
575 | |
---|
576 | buf.av_forw = NULL; |
---|
577 | buf.pageP = NULL; |
---|
578 | buf.b_vp = cnP; |
---|
579 | buf.vinfoP = (struct MMFSVInfo *)fileP->private_data; |
---|
580 | buf.privVfsP = VP_TO_PVP(inodeP); |
---|
581 | buf.b_baddr = NULL; |
---|
582 | buf.b_flags = B_READ | B_PFEOF | B_SENDFILE; |
---|
583 | buf.b_blkno = index; |
---|
584 | buf.b_bcount = PAGE_SIZE; |
---|
585 | buf.b_error = 0; |
---|
586 | buf.b_inodeP = inodeP; |
---|
587 | |
---|
588 | /* Read the page. If successful, this returns with mmap flush lock held |
---|
589 | and a reference added to page. */ |
---|
590 | gpfs_ops.gpfsQueueBufs(&buf); |
---|
591 | |
---|
592 | if (buf.pageP != NULL) |
---|
593 | { |
---|
594 | |
---|
595 | bufPageP = buf.pageP; |
---|
596 | TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_READPAGE1, |
---|
597 | "gpfs_i_readpage: return page 0x%lX index %d count %d flags 0x%lX\n", |
---|
598 | bufPageP, bufPageP->index, page_count(bufPageP), bufPageP->flags); |
---|
599 | |
---|
600 | dump_page(NULL, bufPageP, 0); |
---|
601 | if (buf.pageP != pageP) |
---|
602 | { |
---|
603 | |
---|
604 | /* may be pageP has been removed from the page cache by |
---|
605 | truncate_inode_pages. Since caller has reference, when removed by |
---|
606 | truncate_inode_pages from page cache, it is orphaned and will be |
---|
607 | deleted as soon as the count goes to zero. Therefore, |
---|
608 | grab_cache_page doesn't find it and creates a new page instead. |
---|
609 | Just copy the new page into pageP so that sendfile can use it and |
---|
610 | decrement the count, which will delete the page |
---|
611 | */ |
---|
612 | kaddr1 = kmap(pageP); |
---|
613 | kaddr2 = kmap(bufPageP); |
---|
614 | memcpy(kaddr1,kaddr2,PAGE_SIZE); |
---|
615 | kunmap(pageP); |
---|
616 | kunmap(bufPageP); |
---|
617 | SetPageUptodate(pageP); |
---|
618 | } |
---|
619 | |
---|
620 | /* Release reference that was added by gpfsReadpage */ |
---|
621 | page_cache_release(bufPageP); |
---|
622 | |
---|
623 | /* Release mmap flush lock. This lock is used to block invalidate until |
---|
624 | after PTE is made valid, but we aren't making any PTEs valid here. */ |
---|
625 | gpfs_ops.gpfsMmapFlushUnlock(cnP); |
---|
626 | } |
---|
627 | else |
---|
628 | { |
---|
629 | rc = EFAULT; |
---|
630 | code = 3; |
---|
631 | } |
---|
632 | |
---|
633 | /* Perform release on file if called from NFS */ |
---|
634 | if (cxiIsNFSThread()) |
---|
635 | { |
---|
636 | DBGASSERT(GNP_IS_FILE(cnP)); |
---|
637 | |
---|
638 | /* On the last NFS release, a watchdog will be set to close the file |
---|
639 | after a delay. */ |
---|
640 | rc1 = gpfs_ops.gpfsReleaseNFS(inodeP); |
---|
641 | if ((rc1 != 0) && (rc == 0)) |
---|
642 | { |
---|
643 | code = 4; |
---|
644 | rc = rc1; |
---|
645 | } |
---|
646 | } |
---|
647 | |
---|
648 | xerror: |
---|
649 | |
---|
650 | TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_READPAGE_EXIT, |
---|
651 | "gpfs_i_readpage exit: inodeP 0x%lX rc %d code %d\n", inodeP, rc, |
---|
652 | code); |
---|
653 | EXIT(0); |
---|
654 | return -rc; |
---|
655 | } |
---|
656 | |
---|
657 | |
---|
658 | /* Address space operation to asynchronously write a page to a file. On |
---|
659 | entry, the page is locked. This routine queues a write request to a |
---|
660 | pager kproc and returns. The kproc will unlock the page when write is |
---|
661 | complete, and that will wake up any waiters. */ |
---|
662 | int |
---|
663 | #if LINUX_KERNEL_VERSION >= 2050000 |
---|
664 | gpfs_i_writepage(struct page *pageP, struct writeback_control *wbcP) |
---|
665 | #else |
---|
666 | gpfs_i_writepage(struct page *pageP) |
---|
667 | #endif |
---|
668 | { |
---|
669 | int rc = 0; |
---|
670 | struct inode *inodeP = (struct inode *) pageP->mapping->host; |
---|
671 | cxiNode_t *cnP = VP_TO_CNP(inodeP); |
---|
672 | cxibuf_t *bufP, buf; |
---|
673 | |
---|
674 | VFS_STAT_START(writepageCall); |
---|
675 | ENTER(0); |
---|
676 | TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_WRPAGE_ENTER, |
---|
677 | "gpfs_i_writepage enter: cnP 0x%lX inodeP 0x%lX inode %d\n", |
---|
678 | cnP, inodeP, inodeP->i_ino); |
---|
679 | TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_WRPAGE_ENTER_A, |
---|
680 | "gpfs_i_writepage: page 0x%lX index %d count %d flags 0x%lX\n", |
---|
681 | pageP, pageP->index, page_count(pageP), pageP->flags); |
---|
682 | dump_page(NULL, pageP, 1); |
---|
683 | |
---|
684 | /* Get a request buffer. If none are available, allocate one on stack |
---|
685 | and do the write synchronously. */ |
---|
686 | cxiBlockingMutexAcquire(&PQLockWord); |
---|
687 | if (PageQueueFreeP == NULL) |
---|
688 | { |
---|
689 | bufP = &buf; |
---|
690 | bufP->b_flags = B_WRITE; |
---|
691 | } |
---|
692 | else |
---|
693 | { |
---|
694 | bufP = PageQueueFreeP; |
---|
695 | PageQueueFreeP = bufP->av_forw; |
---|
696 | bufP->b_flags = B_WRITE | B_ASYNC; |
---|
697 | } |
---|
698 | cxiBlockingMutexRelease(&PQLockWord); |
---|
699 | |
---|
700 | /* Initialize buffer */ |
---|
701 | bufP->av_forw = NULL; |
---|
702 | bufP->pageP = pageP; |
---|
703 | bufP->b_vp = cnP; |
---|
704 | bufP->vinfoP = NULL; |
---|
705 | bufP->privVfsP = VP_TO_PVP(inodeP); |
---|
706 | bufP->b_baddr = NULL; |
---|
707 | bufP->b_blkno = pageP->index; |
---|
708 | bufP->b_bcount = PAGE_SIZE; |
---|
709 | bufP->b_error = 0; |
---|
710 | bufP->b_inodeP = NULL; |
---|
711 | |
---|
712 | #if LINUX_KERNEL_VERSION >= 2050000 |
---|
713 | /* Set the page writeback flag and unlock the page. When write is complete, |
---|
714 | the pager kproc will call IoDone to clear this flag and wake up any |
---|
715 | threads waiting for this write to complete. */ |
---|
716 | set_page_writeback(pageP); |
---|
717 | PAGE_UNLOCK(pageP); |
---|
718 | #endif |
---|
719 | |
---|
720 | /* Queue the buffer to a pager kproc and return. */ |
---|
721 | gpfs_ops.gpfsQueueBufs(bufP); |
---|
722 | |
---|
723 | exit: |
---|
724 | TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_WRPAGE_EXIT, |
---|
725 | "gpfs_i_writepage exit: inodeP 0x%lX rc %d\n", inodeP, rc); |
---|
726 | |
---|
727 | VFS_STAT_STOP; |
---|
728 | EXIT(0); |
---|
729 | return -rc; |
---|
730 | } |
---|