source: gpfs_3.1_ker2.6.20/lpp/mmfs/src/gpl-linux/cxiIOBuffer.c @ 223

Last change on this file since 223 was 16, checked in by rock, 17 years ago
File size: 56.0 KB
RevLine 
[16]1/***************************************************************************
2 *
3 * Copyright (C) 2001 International Business Machines
4 * All rights reserved.
5 *
6 * This file is part of the GPFS mmfslinux kernel module.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *  1. Redistributions of source code must retain the above copyright notice,
13 *     this list of conditions and the following disclaimer.
14 *  2. Redistributions in binary form must reproduce the above copyright
15 *     notice, this list of conditions and the following disclaimer in the
16 *     documentation and/or other materials provided with the distribution.
17 *  3. The name of the author may not be used to endorse or promote products
18 *     derived from this software without specific prior written
19 *     permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 *************************************************************************** */
33/* @(#)37       1.62.1.3  src/avs/fs/mmfs/ts/kernext/gpl-linux/cxiIOBuffer.c, mmfs, avs_rgpfs24, rgpfs24s010a 2/8/07 15:40:30 */
34/*
35 * Linux implementation of I/O buffers
36 *
37 * Contents:
38 *   static struct cxiKernelIOBufferDesc_t* kibdAlloc
39 *   static void kibdFree
40 *   static void deallocKernelIOBufferDesc
41 *   static int allocKernelIOBufferDesc
42 *   KibdModuleInit
43 *   KibdModuleTerm
44 *   cxiKibdPin
45 *   cxiKibdUnpin
46 *   cxiKibdUnpinAll
47 *   cxiKibdPinmm
48 *   cxiKibdUnpinmm
49 *
50 *   cxiAttachIOBuffer
51 *   cxiDetachIOBuffer
52 *   cxiUXfer
53 *   cxiKXfer
54 *   cxiKZero
55 *   cxiMapDiscontiguousRW
56 *   cxiUnmapDiscontiguousRW
57 *   cxiMapContiguousRO
58 *   cxiUnmapContiguousRO
59 *   BHioDone
60 *   cxiStartIO
61 *   cxiWaitIO
62 *   cxiKDoIO
63 *   GetDiskInfoX
64 */
65
66#include <Shark-gpl.h>
67
68#include <linux/module.h>
69#include <linux/string.h>
70#include <linux/slab.h>
71#include <linux/vmalloc.h>
72#include <linux/mm.h>
73#include <linux/blkdev.h>
74#include <linux/fs.h>
75#include <linux/smp_lock.h>
76#if LINUX_KERNEL_VERSION >= 2050000
77#include <linux/bio.h>
78#else
79#include <linux/iobuf.h>
80#endif
81
82#include <Logger-gpl.h>
83#include <Trace.h>
84#include <cxiSystem.h>
85#include <linux2gpfs.h>
86#include <verdep.h>
87#include <cxiIOBuffer.h>
88#include <cxiAtomic.h>
89#include <cxiTypes.h>
90#include <linux/mman.h>
91
92#ifdef CONFIG_BGL
93/* BG/L version of Linux doesn't define get_user_pages, so define it here */
94#define get_user_pages(tsk, mm, start, len, write, force, pages, vmas) \
95  __get_user_pages(tsk, mm, start, len, write, force, pages, vmas, 0)
96#endif
97
98/* Returns a page pointer from a cxiKernelIOBufferDesc_t
99 * The INDEX of the page to return is relative to the
100 * KIBDP supplied.  For instance a KIBD may only contain
101 * twenty pages.  If you supply a KIBD and an index of twenty
102 * (index starts from zero) then we'll move to the next KIBD
103 * in the chain and update the INDEX to be zero.  Thus PAGEINDEX,
104 * KIBD, and PAGEP may be updated by this macro.
105 */
106#define KIBD_GET_PAGE(KIBDP, INDEX, PAGEP)              \
107  while ((KIBDP) && (INDEX) >= (KIBDP)->kibdPages)      \
108  {                                                     \
109    (INDEX) -= (KIBDP)->kibdPages;                      \
110    (KIBDP) = (KIBDP)->kibdNextP;                       \
111  }                                                     \
112  if (KIBDP)                                            \
113    (PAGEP) = (struct page *)(KIBDP)->maplist[(INDEX)]; \
114  else                                                  \
115    (PAGEP) = NULL;
116
117/* Spin lock protecting list of all top-level cxiKernelIOBufferDesc_t's.
118   Using a static initializer here (spinlock_t KibdLock = SPIN_LOCK_UNLOCKED)
119   does not work, because SPIN_LOCK_UNLOCKED contains a cast to type spinlock_t.
120   In C++, (but not in C), this causes KibdLock to be put in the bss section,
121   and code to be generated to perform the initialization.  Unfortunately,
122   this initialization code does not get called, because kernel modules do
123   not have the full C++ environment established. */
124spinlock_t KibdLock;
125
126/* Static pointer to slab allocator for cxiKernelIOBufferDesc_t's */
127struct kmem_cache* KibdCacheP = NULL;
128
129/* Static head of doubly-linked list of top-level cxiKernelIOBufferDesc_t's.
130   The list is protected by KibdLock. */
131struct cxiKernelIOBufferDesc_t* KibdGblHeadP = NULL;
132
133/* Count of number of delays in busy wait loop in cxiWaitIO */
134atomic_t cxiWaitIONDelays;
135
136/* Group of Linux buffer_heads allocated together for a multi-page I/O.  A
137   chunk is just less than half a page. */
138#define BUFFER_HEADS_PER_CHUNK \
139  ((PAGE_SIZE/2-(2*sizeof(void*)+sizeof(int)+sizeof(atomic_t))) / \
140  (sizeof(void*)+sizeof(struct buffer_head)))
141
142struct cxiBufHeadChunk_t
143{
144  /* Next and previous chunks of buffers used for an I/O.  The list is
145     circular. */
146  struct cxiBufHeadChunk_t* bhcNextP;
147  struct cxiBufHeadChunk_t* bhcPrevP;
148
149  /* Number of buffer_heads used in this chunk */
150  int nBHUsed;
151
152  /* Number of buffer_heads in this chunk that have been submitted, but
153     whose iodone handler has not finished running.  Always updated
154     with atomic operations, since this field is accessed asynchronously
155     from interrupt level. */
156  atomic_t nBHActive;
157
158#if LINUX_KERNEL_VERSION >= 2050000
159  struct bio *biop[BUFFER_HEADS_PER_CHUNK];
160#endif
161
162  /* Space for buffer_heads */
163  struct buffer_head bh[BUFFER_HEADS_PER_CHUNK];
164};
165
166/* Static pointer to slab allocator for cxiBufHeadChunk_t's */
167struct kmem_cache* BhcCacheP = NULL;
168
169/* Allocate and initialize a new cxiKernelIOBufferDesc_t object.  Uses the
170   slab allocator for this object type. */
171static struct cxiKernelIOBufferDesc_t * 
172kibdAlloc()
173{
174  struct cxiKernelIOBufferDesc_t* kibdP;
175  int i;
176
177  ENTER(0);
178  kibdP = (struct cxiKernelIOBufferDesc_t*)
179            kmem_cache_alloc(KibdCacheP, GFP_KERNEL);
180  TRACE1(TRACE_KSVFS, 14, TRCID_KIBD_NEW,
181         "kibdAlloc: allocated cxiKernelIOBufferDesc_t at 0x%lX\n", kibdP);
182  if (kibdP != NULL)
183  {
184    kibdP->kibdVaddr = NULL;
185    kibdP->kibdPages = 0;
186    kibdP->kibdTotalPages = 0;
187    kibdP->kibdNextP = NULL;
188    kibdP->gblNextP = NULL;
189    kibdP->gblPrevP = NULL;
190
191    for (i=0; i < PAGES_PER_KIBD; i++)
192      kibdP->maplist[i] = NULL;
193  }
194  EXIT(0);
195  return kibdP;
196}
197
198/* Free a cxiKernelIOBufferDesc_t back to its slab allocator */
199static void 
200kibdFree(struct cxiKernelIOBufferDesc_t* kibdP)
201{
202  ENTER(0);
203  TRACE1(TRACE_KSVFS, 14, TRCID_KIBD_DELETE,
204         "kibdFree: freeing cxiKernelIOBufferDesc_t at 0x%lX\n", kibdP);
205  kmem_cache_free(KibdCacheP, (void*)kibdP);
206  EXIT(0);
207}
208
209
210/* Destroy a cxiKernelIOBufferDesc_t object. */
211static void 
212deallocKernelIOBufferDesc(struct cxiKernelIOBufferDesc_t* kibdP)
213{
214  struct cxiKernelIOBufferDesc_t *kibdPrevP;
215  struct page *pageP;
216  int pageIndex = 0;
217  int pageTotal = kibdP->kibdTotalPages;
218
219  ENTER(0);
220  for (;;)
221  {
222    kibdPrevP = kibdP;
223    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
224    if (pageP == NULL)
225      break;
226   
227    page_cache_release(pageP);
228
229    if (kibdPrevP != kibdP)
230    {
231      TRACE4(TRACE_KSVFS, 11, TRCID_DEALLOC_KIBD_1,
232             "deallocKernelIOBufferDesc: kibdP 0x%lX vaddr 0x%lX kibdPages %d "
233             "kibdNextP 0x%lX\n", kibdPrevP, kibdPrevP->kibdVaddr, 
234             kibdPrevP->kibdPages, kibdP);
235
236      pageTotal -= kibdPrevP->kibdPages;
237      kibdFree(kibdPrevP);
238    }
239
240    pageIndex++;
241  }
242
243  if (kibdPrevP != kibdP && kibdPrevP)
244  {
245    TRACE4(TRACE_KSVFS, 11, TRCID_DEALLOC_KIBD_2,
246           "deallocKernelIOBufferDesc: kibdP 0x%lX vaddr 0x%lX kibdPages %d "
247           "kibdNextP 0x%lX\n", kibdPrevP, kibdPrevP->kibdVaddr, 
248           kibdPrevP->kibdPages, kibdP);
249
250    pageTotal -= kibdPrevP->kibdPages;
251    kibdFree(kibdPrevP);
252  }
253
254  /* Make sure all the constituent cxiKernelIODesc_t page counts added
255   * up to the total page count in the first cxiKernelIODesct_t
256   */
257  DBGASSERT(pageTotal == 0);
258  EXIT(0);
259}
260
261
262/* Create a cxiKernelIOBufferDesc_t that maps the given region of
263 * the user address space of this process.  The buffer virtual address
264 * must be on a page boundary.
265 */
266static int 
267allocKernelIOBufferDesc(char* vaddr, int nPages,
268                        struct cxiKernelIOBufferDesc_t** kibdPP)
269{
270  struct cxiKernelIOBufferDesc_t* kibdP;
271  struct cxiKernelIOBufferDesc_t* kibdPrevP = NULL;
272  struct cxiKernelIOBufferDesc_t* kibdHeadP = NULL;
273  int rc;
274  int mapPages = 0;
275  int totalPages = 0;
276  struct page * pageP;
277  struct address_space * addrSpaceP;
278
279  /* Validate parameters */
280  ENTER(0);
281  DBGASSERT(((IntPtr)vaddr & (PAGE_SIZE-1)) == 0);
282
283  if (nPages)
284  {
285    kibdHeadP = kibdPrevP = kibdP = kibdAlloc();
286    if (kibdP == NULL)
287    {
288      rc = -ENOMEM;
289      goto errorExit;
290    }
291  }
292
293  while (nPages) 
294  {
295    mapPages = nPages;
296    if (mapPages > PAGES_PER_KIBD)
297      mapPages = PAGES_PER_KIBD;
298
299    down_read(&current->mm->mmap_sem);
300    rc = get_user_pages(current, current->mm, (unsigned long)vaddr, 
301                        mapPages, VM_WRITE, 0 /* force */, 
302                        (struct page **)kibdP->maplist, NULL);
303    up_read(&current->mm->mmap_sem);
304
305    if (rc != mapPages)
306      goto errorExit;
307
308    kibdP->kibdVaddr = vaddr;
309    kibdP->kibdPages = mapPages;
310
311    TRACE3(TRACE_KSVFS, 11, TRCID_ALLOC_KIBD_1,
312           "allocKernelIOBufferDesc: kibdP 0x%lX vaddr 0x%lX kibdPages %d\n",
313           kibdP, kibdP->kibdVaddr, kibdPrevP->kibdPages);
314
315    vaddr += mapPages * PAGE_SIZE;
316    totalPages += mapPages;
317
318    nPages -= mapPages;
319    if (nPages)
320    {
321      kibdP = kibdAlloc();
322      if (kibdP == NULL)
323      {
324        rc = -ENOMEM;
325        goto errorExit;
326      }
327      kibdPrevP->kibdNextP = kibdP;
328      kibdPrevP = kibdP;
329    }
330  }
331
332  /* Total page count is kept only in the first one */
333  kibdHeadP->kibdTotalPages = totalPages;
334
335  /* Ensure these pages are't mapped to any inode, otherwise
336   * we won't be able to disclaim them.  We did have a problem
337   * where MAP_SHARED semantics would cause this.
338   */
339  pageP = (struct page *)kibdHeadP->maplist[0];
340  DBGASSERT(pageP != NULL);
341
342  addrSpaceP = pageP->mapping;
343#if LINUX_KERNEL_VERSION >= 2060600 || (defined(SUSE_LINUX) && LINUX_KERNEL_VERSION >= 2060507)
344  /* MAP_ANONYMOUS flags will have PG_anon turned on.  */
345  DBGASSERT(PageAnon(pageP));
346#else
347  DBGASSERT(addrSpaceP == NULL || addrSpaceP->host == NULL);
348#endif
349 
350
351  /* Success! */
352  *kibdPP = kibdHeadP;
353  EXIT(0);
354  return 0;
355
356errorExit:
357  TRACE5(TRACE_KSVFS, 11, TRCID_ALLOC_KIBD_2,
358         "allocKernelIOBufferDesc: vaddr 0x%lX mapPages %d totalPages %d "
359         "kibdHeadP 0x%lX rc %d\n", vaddr, mapPages, totalPages, 
360         kibdHeadP, rc);
361
362  /* Unmap and deallocate kiobufs, delete cxiKernelIOBufferDesc_t */
363  if (kibdHeadP)
364  { 
365    kibdHeadP->kibdTotalPages = totalPages;
366    deallocKernelIOBufferDesc(kibdHeadP);
367  }
368 
369  EXIT(0);
370  return ((rc < 0) ? -rc : ENOMEM);
371}
372
373/* Initialization routine - called when module is loaded */
374void 
375KibdModuleInit()
376{
377  int rc;
378
379  ENTER(0);
380  TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_INIT,
381         "KibdModuleInit called\n");
382
383  /* Create a slab allocator for cxiKernelIOBufferDesc_t objects */
384  KibdCacheP = kmem_cache_create("kernIOBufDesc",
385                                 sizeof(struct cxiKernelIOBufferDesc_t),
386                                 0 /* offset */,
387                                 0 /* flags */,
388                                 NULL /* ctor */,
389                                 NULL /* dtor */);
390  if (KibdCacheP == NULL)
391    cxiPanic("Cannot create cxiKernelIOBufferDesc_t cache\n");
392
393  spin_lock_init(&KibdLock);
394
395  /* Create a slab allocator for cxiBufHeadChunk_t objects */
396  BhcCacheP = kmem_cache_create("BufHeadChunk",
397                                 sizeof(struct cxiBufHeadChunk_t),
398                                 0 /* offset */,
399                                 0 /* flags */,
400                                 NULL /* ctor */,
401                                 NULL /* dtor */);
402  if (BhcCacheP == NULL)
403    cxiPanic("Cannot create cxiBufHeadChunk_t cache\n");
404
405#if LINUX_KERNEL_VERSION >= 2060000
406  if (gpfs_init_inodecache()!=0)
407    cxiPanic("Cannot create gpfsInodeCache cache\n");
408#endif
409
410  atomic_set(&cxiWaitIONDelays, 0);
411  EXIT(0);
412}
413
414/* Termination routine - called just before module is unloaded */
415void 
416KibdModuleTerm()
417{
418  int rc;
419
420  ENTER(0);
421  TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_TERM,
422         "KibdModuleTerm called\n");
423
424  /* Destroy slab allocator for cxiBufHeadChunk_t objects */
425  kmem_cache_destroy(BhcCacheP);
426
427  /* We have to ensure these are all deallocated otherwise
428   * the kmem_cache_destroy of the KibdCacheP will fail.
429   * An attempt to reload GPFS would encounter the slab
430   * cache still existing.
431   */
432  cxiKibdUnpinAll();
433
434#if LINUX_KERNEL_VERSION >= 2050000
435  /* Ugly ugly ugly FIXME
436   * On 2.5, kmem_cache_destroy may or may not succeed in actually destroying
437   * the cache.  Even when kmem_cache_free 's been called for every allocated
438   * chunk, internally, not all of the objects are on the free list.  They'll
439   * get there eventually by the virtue of cache_reap being called from a
440   * timer routine every REAPTIMEOUT_CPUC (default 2*HZ).  If
441   * kmem_cache_destroy is called before all slabs are moved to the free list
442   * (no active slabs left), it'll fail, and when kmem_cache_create is called
443   * again, it'll panic the kernel, and that's what typically happens when GPFS
444   * restarts.  Until we figure out how to do this right, keep calling
445   * cache_shrink until it tells us that it's safe to call cache_destroy
446   */
447  while (kmem_cache_shrink(KibdCacheP) != 0)
448    cxiSleep(400);
449#endif
450
451  /* Destroy slab allocator for cxiKernelIOBufferDesc_t objects */
452  kmem_cache_destroy(KibdCacheP);
453
454#if LINUX_KERNEL_VERSION >= 2060000
455  gpfs_destroy_inodecache();
456#endif
457  EXIT(0);
458}
459
460
461/* Create a cxiKernelIOBufferDesc_t object (or list of cxiKernelIOBufferDesc_t
462   objects) describing an I/O buffer in the user address space of the
463   calling process and link it onto the list of all such objects.  Pins
464   the user-level buffer.  The buffer virtual address must be on a page
465   boundary.  The length can be arbitrarily large, but must be a multiple
466   of the page size.  Returns 0 if successful, non-zero if unsuccessful.
467   */
468int 
469cxiKibdPin(char* vaddr, int len, struct cxiKernelIOBufferDesc_t** kibdPP)
470{
471  int nPages;
472  struct cxiKernelIOBufferDesc_t* headP;
473  struct cxiKernelIOBufferDesc_t* kibdP;
474  int rc;
475
476  /* Validate parameters */
477  ENTER(0);
478  TRACE2(TRACE_KSVFS, 5, TRCID_KIBDPIN_ENTER,
479         "cxiKibdPin: vaddr 0x%lX len 0x%X\n",
480         vaddr, len);
481  DBGASSERT(((IntPtr)vaddr & (PAGE_SIZE-1)) == 0);
482  DBGASSERT((len & (PAGE_SIZE-1)) == 0);
483
484  nPages = len / PAGE_SIZE;
485  rc = allocKernelIOBufferDesc(vaddr, nPages, &headP);
486  if (rc != 0)
487  {
488    EXIT(0);
489    return rc;
490  }
491
492  /* Add this cxiKernelIOBufferDesc_t to the global list before returning */
493  TRACE1(TRACE_KSVFS, 12, TRCID_KIBDPIN_EXIT,
494         "cxiKibdPin exit: returning 0x%lX\n", headP);
495
496  spin_lock(&KibdLock);
497  headP->gblNextP = KibdGblHeadP;
498  if (KibdGblHeadP != NULL)
499    KibdGblHeadP->gblPrevP = headP;
500  KibdGblHeadP = headP;
501  spin_unlock(&KibdLock);
502
503  *kibdPP = headP;
504  EXIT(0);
505  return 0;
506}
507
508
509/* Remove a cxiKernelIOBufferDesc_t object from the list of all
510   such objects, destroy it and all chained cxiKernelIOBufferDesc_t objects
511   associated with it, and unpin the associated user-level buffer. */
512void 
513cxiKibdUnpin(struct cxiKernelIOBufferDesc_t* kibdP)
514{
515  struct cxiKernelIOBufferDesc_t* nextP;
516  struct cxiKernelIOBufferDesc_t* prevP;
517
518  /* Remove this cxiKernelIOBufferDesc_t from the global list */
519  ENTER(0);
520  spin_lock(&KibdLock);
521  nextP = kibdP->gblNextP;
522  prevP = kibdP->gblPrevP;
523  if (nextP != NULL)
524    nextP->gblPrevP = prevP;
525  if (prevP != NULL)
526    prevP->gblNextP = nextP;
527  else
528    KibdGblHeadP = nextP;
529  spin_unlock(&KibdLock);
530
531  /* Free the cxiKernelIOBufferDesc_t */
532  deallocKernelIOBufferDesc(kibdP);
533  EXIT(0);
534}
535
536
537/* Free all cxiKernelIOBufferDesc_t's, and unpin their underlying storage. */
538void 
539cxiKibdUnpinAll()
540{
541  struct cxiKernelIOBufferDesc_t* nextP;
542  struct cxiKernelIOBufferDesc_t* kibdP;
543
544  ENTER(0);
545  TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_UNPIN_ALL_ENTER,
546         "cxiKibdUnpinAll entry\n");
547  for (;;)
548  {
549    /* Remove first cxiKernelIOBufferDesc_t on global list */
550    spin_lock(&KibdLock);
551    kibdP = KibdGblHeadP;
552    if (kibdP == NULL)
553    {
554      spin_unlock(&KibdLock);
555      break;
556    }
557    nextP = kibdP->gblNextP;
558    if (nextP != NULL)
559      nextP->gblPrevP = NULL;
560    KibdGblHeadP = nextP;
561    spin_unlock(&KibdLock);
562
563    /* Deallocate the cxiKernelIOBufferDesc_t and unpin its storage */
564    deallocKernelIOBufferDesc(kibdP);
565  }
566  TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_UNPIN_ALL_EXIT,
567         "cxiKibdUnpinAll exit\n");
568  EXIT(0);
569}
570
571
572#ifdef MMAP_DIO
573/* Create a cxiKernelIOBufferDesc_t object for a page in user address space
574   that is already pinned.  The page will be mapped into kernel address
575   space.  This is used by mmap routines that want to do direct I/O from
576   user page to disk.  The cxiKernelIOBufferDesc_t that this routine
577   creates can be passed to cxiKDoIO just like one that was created by
578   cxiKibdPin. */
579int 
580cxiKibdPinmm(struct page *pageP, struct cxiKernelIOBufferDesc_t** kibdPP)
581{
582  struct cxiKernelIOBufferDesc_t* kibdP;
583
584  ENTER(0);
585  kibdP = kibdAlloc();
586  if (kibdP == NULL)
587  {
588    EXIT(0);
589    return -ENOMEM;
590  }
591
592  kibdP->kibdVaddr = kmap(pageP);
593  kibdP->maplist[0] = (char *)pageP;
594  kibdP->kibdPages = 1;
595  kibdP->kibdTotalPages = 1;
596
597  *kibdPP = kibdP;
598  EXIT(0);
599  return 0;
600}
601
602
603/* Free a cxiKernelIOBufferDesc_t that was created by cxiKibdPinmm. */
604void 
605cxiKibdUnpinmm(struct page *pageP, struct cxiKernelIOBufferDesc_t* kibdP)
606{
607  ENTER(0);
608  kunmap(pageP);
609  kibdFree(kibdP);
610  EXIT(0);
611}
612#endif /* MMAP_DIO */
613
614
615/* Attach an I/O buffer to the kernel's virtual address space.  The
616   cxiIOBufferAttachment_t returned in *attachP must be used as a parameter of
617   most of the other operations on cxiIOBuffer_t's. */
618void 
619cxiAttachIOBuffer(struct cxiIOBuffer_t* iobP,
620                  struct cxiIOBufferAttachment_t* attachP)
621{
622  int oldPinCount;
623  int newPinCount;
624  int rc;
625
626  /* Increase the pin count on this I/O buffer.  If the buffer is not already
627     pinned, call the pinBuffer callback routine to arrange for the buffer
628     to be pinned, then try again. */
629  ENTER(0);
630  TRACE1(TRACE_KSVFS, 5, TRCID_ATTACH_ENTER,
631         "cxiAttachIOBuffer: dataPtr 0x%lX\n", OffsetToDataPtr(iobP,0,0));
632  for (;;)
633  {
634    oldPinCount = iobP->pinCount;
635    DBGASSERT(oldPinCount > 0);
636    if (oldPinCount == 0)
637    {
638        DBGASSERT(oldPinCount > 0);
639        break;
640  //      rc = xxx->pinBufferCallback(iobP);
641  //      if (rc != 0)
642  //        return rc;
643    }
644    else
645    {
646      newPinCount = oldPinCount+1;
647      rc = compare_and_swap((atomic_p)&iobP->pinCount, &oldPinCount, 
648                            newPinCount);
649      if (rc == 1)
650        break;
651    }
652  }
653
654  /* Once the pin of the buffer succeeds, it must have a
655   * cxiKernelIOBufferDesc_t.  Use that as the attachment data.
656   */
657  DBGASSERT(iobP->kernelIOBufferDescP != NULL);
658  attachP->kDescP = iobP->kernelIOBufferDescP;
659  TRACE2(TRACE_KSVFS, 11, TRCID_ATTACH_KIBD,
660         "cxiAttachIOBuffer: kernelIOBufferDescP 0x%lX newPinCount %d\n",
661         iobP->kernelIOBufferDescP, newPinCount);
662  EXIT(0);
663}
664
665
666/* Detach a buffer from the kernel's virtual address space. */
667void 
668cxiDetachIOBuffer(struct cxiIOBuffer_t* iobP,
669                  struct cxiIOBufferAttachment_t* attachP)
670{
671  /* Validate attachment data */
672  ENTER(0);
673  TRACE3(TRACE_KSVFS, 5, TRCID_DETACH_KIBD,
674         "cxiDetachIOBuffer: dataPtr 0x%lX kDescP 0x%lX oldPinCount %d\n",
675         OffsetToDataPtr(iobP,0,0), attachP->kDescP, iobP->pinCount);
676  if (attachP->kDescP == NULL)
677  {
678    EXIT(0);
679    return;
680  }
681  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);
682
683  /* Decrement I/O buffer pin count */
684  DBGASSERT(iobP->pinCount >= 2);
685  ATOMIC_ADD(&iobP->pinCount, -1);
686
687  /* Invalidate attachment data */
688  attachP->kDescP = NULL;
689  EXIT(0);
690}
691
692
693/* Transfer len bytes beginning at offset bufOffset within I/O buffer *iobP
694   to or from a user buffer.  The direction of the transfer is given with
695   respect to the I/O buffer.  Returns EOK if successful, other error
696   codes if unsuccessful. */
697int 
698cxiUXfer(struct cxiIOBuffer_t* iobP, Boolean toIOBuffer,
699         const struct cxiIOBufferAttachment_t* attachP,
700         void* vkopP, int bufOffset, int len, struct cxiUio_t* uioP)
701{
702  int pageIndex;
703  struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP;
704  int pageOffset;
705  struct page * pageP;
706  int pageLen;
707  unsigned long kaddr;
708  int rc = 0;
709
710  ENTER(0);
711  /* Validate parameters */
712  TRACE5(TRACE_KSVFS, 5, TRCID_UXFER_LINUX,
713         "cxiUXfer: dataPtr 0x%lX kBuf 0x%lX toIOBuf %d offset %d len %d\n",
714         OffsetToDataPtr(iobP,0,0), kibdP, toIOBuffer, bufOffset, len);
715
716  DBGASSERT(bufOffset >= 0);
717  DBGASSERT(bufOffset+len <= iobP->ioBufLen);
718  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);
719  DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0));
720  DBGASSERT(iobP->ioBufLen/PAGE_SIZE <= kibdP->kibdTotalPages);
721  DBGASSERT(iobP->pinCount >= 2);
722
723  /* Transfer data in or out of as many cxiKernelIOBufferDesc_t's as necessary
724     to satisfy the data move request */
725  pageIndex = bufOffset / PAGE_SIZE;
726  pageOffset = bufOffset % PAGE_SIZE;
727  pageLen = PAGE_SIZE - pageOffset;
728  for (;;)
729  {
730    /* Calculate how many bytes to move in or out of the current page of the
731       I/O buffer */
732    if (len < pageLen)
733      pageLen = len;
734
735    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
736    DBGASSERT(pageP != NULL);
737
738    /* Map current I/O buffer page into the kernel's address space
739       temporarily, then copy data in or out of the page */
740    kaddr = (unsigned long)kmap(pageP);
741    TRACE4(TRACE_KSVFS, 12, TRCID_UXFER_UIOMOVE,
742           "cxiUXfer: uiomove pageIndex %d kaddr 0x%lX pageOffset %d "
743           "pageLen %d\n", pageIndex, kaddr, pageOffset, pageLen);
744
745    rc = cxiUiomove((char *)(kaddr + pageOffset), pageLen, toIOBuffer, uioP);
746    kunmap(pageP);
747
748    /* Leave loop if an error occurred on the move */
749    if (rc != 0)
750      break;
751
752    /* Update length left to copy and test for loop termination */
753    len -= pageLen;
754    if (len <= 0)
755      break;
756
757    /* Set up for next iteration.  If the page just copied is the last
758       page of this cxiKernelIOBufferDesc_t, advance to the next one. */
759    pageOffset = 0;
760    pageLen = PAGE_SIZE;
761    pageIndex += 1;
762  }  /* end of do forever */
763
764  EXIT(0);
765  return rc;
766}
767
768
769/* Perform cross-memory transfer of len bytes from user memory in current
770   task to memory in specified address space.  If toXmem is true then
771   copy is from userAddrP to udataP/xmemP, otherwise the opposite. */
772int 
773cxiXmemXfer(char *userAddrP, int len, char *udataP, cxiXmem_t *xmemP,
774            Boolean toXmem)
775{
776  int rc = 0;
777  int bufOffset, pageIndex, pageOffset, pageLen;
778  void *kaddrP;
779  struct page *pageP;
780  struct cxiKernelIOBufferDesc_t *kibdP = xmemP->kibdP;
781
782  ENTER(0);
783  TRACE5(TRACE_KSVFS, 5, TRCID_XMEMXFER_LINUX,
784         "cxiXmemXfer: userAddrP 0x%lX len %d udataP 0x%lX "
785         "kibdP 0x%lX toXmem %d\n", userAddrP, len, udataP, kibdP, toXmem);
786
787  bufOffset = udataP - kibdP->kibdVaddr;
788  DBGASSERT(bufOffset >= 0);
789  DBGASSERT(bufOffset + len <= kibdP->kibdTotalPages * PAGE_SIZE);
790
791  /* Transfer data in or out of as many cxiKernelIOBufferDesc_t's as necessary
792     to satisfy the data move request */
793  pageIndex = bufOffset / PAGE_SIZE;
794  pageOffset = bufOffset % PAGE_SIZE;
795  pageLen = PAGE_SIZE - pageOffset;
796  for (;;)
797  {
798    /* Calculate how many bytes to move in or out of the current page of the
799       I/O buffer */
800    if (len < pageLen)
801      pageLen = len;
802
803    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
804    DBGASSERT(pageP != NULL);
805
806    /* Map current I/O buffer page into the kernel's address space
807       temporarily, then copy data in or out of the page */
808    kaddrP = kmap(pageP);
809    TRACE4(TRACE_KSVFS, 12, TRCID_XMEMFER_COPY,
810           "cxiXmemXfer: copy pageIndex %d kaddrP 0x%lX pageOffset %d "
811           "pageLen %d\n", pageIndex, kaddrP, pageOffset, pageLen);
812
813    if (toXmem)
814      rc = cxiCopyIn(userAddrP, (char *)kaddrP + pageOffset, pageLen);
815    else
816      rc = cxiCopyOut((char *)kaddrP + pageOffset, userAddrP, pageLen);
817
818    kunmap(pageP);
819
820    /* Leave loop if an error occurred on the move */
821    if (rc != 0)
822      break;
823
824    /* Update length left to copy and test for loop termination */
825    len -= pageLen;
826    if (len <= 0)
827      break;
828
829    /* Set up for next iteration.  If the page just copied is the last
830       page of this cxiKernelIOBufferDesc_t, advance to the next one. */
831    userAddrP += pageLen;
832    pageOffset = 0;
833    pageLen = PAGE_SIZE;
834    pageIndex += 1;
835  }  /* end of do forever */
836
837  EXIT(0);
838  return rc;
839}
840
841
842/* Transfer len bytes beginning at offset bufOffset within I/O buffer *iobP
843   to or from a contiguous kernel buffer.  The direction of the transfer
844   is given with respect to the I/O buffer.  Returns EOK if successful,
845   other error codes if unsuccessful. */
846int 
847cxiKXfer(struct cxiIOBuffer_t* iobP, Boolean toIOBuffer,
848         const struct cxiIOBufferAttachment_t* attachP,
849         int bufOffset, int len, char* kBufP)
850{
851  int pageIndex;
852  struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP;
853  int pageOffset;
854  struct page * pageP;
855  int pageLen;
856  unsigned long kaddr;
857
858  /* Validate parameters */
859  ENTER(0);
860  TRACE6(TRACE_KSVFS, 5, TRCID_KXFER_LINUX,
861         "cxiKXfer: dataPtr 0x%lX kBuf 0x%lX toIOBuf %d offset %d len %d "
862         "kBufP 0x%lX\n", OffsetToDataPtr(iobP,0,0), kibdP,
863         toIOBuffer, bufOffset, len, kBufP);
864
865  DBGASSERT(bufOffset >= 0);
866  DBGASSERT(bufOffset+len <= iobP->ioBufLen);
867  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);
868  DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0));
869  DBGASSERT(iobP->ioBufLen/PAGE_SIZE <= kibdP->kibdTotalPages);
870  DBGASSERT(iobP->pinCount >= 2);
871
872  /* Transfer data in or out of as many cxiKernelIOBufferDesc_t's as necessary
873     to satisfy the data move request */
874  pageIndex = bufOffset / PAGE_SIZE;
875  pageOffset = bufOffset % PAGE_SIZE;
876  pageLen = PAGE_SIZE - pageOffset;
877  for (;;)
878  {
879    /* Calculate how many bytes to move in or out of the current page of the
880       I/O buffer */
881    if (len < pageLen)
882      pageLen = len;
883
884    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
885    DBGASSERT(pageP != NULL);
886
887    /* Map current I/O buffer page into the kernel's address space
888       temporarily, then copy data in or out of the page */
889    kaddr = (unsigned long)kmap(pageP);
890    TRACE5(TRACE_KSVFS, 12, TRCID_KXFER_MEMCPY,
891           "cxiKXfer: move kibdP 0x%lX pageIndex %d kaddr 0x%lX "
892           "pageOffset %d pageLen %d\n",
893           kibdP, pageIndex, kaddr, pageOffset, pageLen);
894
895    if (toIOBuffer)
896      memcpy((void *)(kaddr + pageOffset), kBufP, pageLen);
897    else
898      memcpy(kBufP, (void *)(kaddr + pageOffset), pageLen);
899    kunmap(pageP);
900
901    /* Update length left to copy and test for loop termination */
902    len -= pageLen;
903    if (len <= 0)
904      break;
905
906    /* Set up for next iteration.  If the page just copied is the last
907       page of this cxiKernelIOBufferDesc_t, advance to the next one. */
908    kBufP += pageLen;
909    pageOffset = 0;
910    pageLen = PAGE_SIZE;
911    pageIndex += 1;
912  }  /* end of do forever */
913
914  EXIT(0);
915  return 0;
916}
917
918
919/* Set len bytes beginning at offset bufOffset within I/O buffer *iobP
920   to zero.  Returns EOK if successful, other error codes if unsuccessful. */
921int 
922cxiKZero(struct cxiIOBuffer_t* iobP,
923         const struct cxiIOBufferAttachment_t* attachP,
924         int bufOffset, int len)
925{
926  int pageIndex;
927  struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP;
928  int pageOffset;
929  struct page * pageP;
930  int pageLen;
931  unsigned long kaddr;
932
933  /* Validate parameters */
934  ENTER(0);
935  TRACE4(TRACE_KSVFS, 5, TRCID_KZERO_LINUX,
936         "cxiKZero: dataPtr 0x%lX kBuf 0x%lX offset %d len %d\n",
937         OffsetToDataPtr(iobP,0,0), kibdP, bufOffset, len);
938
939  DBGASSERT(bufOffset >= 0);
940  DBGASSERT(bufOffset+len <= iobP->ioBufLen);
941  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);
942  DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0));
943  DBGASSERT(iobP->ioBufLen/PAGE_SIZE <= kibdP->kibdTotalPages);
944  DBGASSERT(iobP->pinCount >= 2);
945
946  /* Zero data in as many cxiKernelIOBufferDesc_t's as necessary to complete
947     the request */
948  pageIndex = bufOffset / PAGE_SIZE;
949  pageOffset = bufOffset % PAGE_SIZE;
950  pageLen = PAGE_SIZE - pageOffset;
951  for (;;)
952  {
953    /* Calculate how many bytes to zero in the current page of the I/O
954       buffer */
955    if (len < pageLen)
956      pageLen = len;
957
958    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
959    DBGASSERT(pageP != NULL);
960
961    /* Map current I/O buffer page into the kernel's address space
962       temporarily, then zero data in the page */
963    kaddr = (unsigned long)kmap(pageP);
964    TRACE4(TRACE_KSVFS, 12, TRCID_KZERO_MEMSET,
965           "cxiKZero: zero pageIndex %d kaddr 0x%lX pageOffset %d pageLen %d\n",
966           pageIndex, kaddr, pageOffset, pageLen);
967    memset((void *)(kaddr + pageOffset), 0, pageLen);
968    kunmap(pageP);
969
970    /* Update length left to zero and test for loop termination */
971    len -= pageLen;
972    if (len <= 0)
973      break;
974
975    /* Set up for next iteration.  If the page just zeroed is the last
976       page of this cxiKernelIOBufferDesc_t, advance to the next one. */
977    pageOffset = 0;
978    pageLen = PAGE_SIZE;
979    pageIndex += 1;
980  }  /* end of do forever */
981
982  EXIT(0);
983  return 0;
984}
985
986
987/* Map an I/O buffer so it can be read and written from kernel code
988   running in the context of a user thread.  Depending on the platform, the
989   addresses at which the I/O buffer gets mapped may not be contiguous.  The
990   details of how the buffer got mapped are handled by the
991   cxiDiscontiguousDirectoryBuffer_t object that is filled in by this call.
992   On some platforms, mapping buffers using this call consumes scarce
993   resources, so all cxiMapDiscontiguousRW calls should be promptly matched by
994   cxiUnmapDiscontiguousRW calls as soon as the operation that required access
995   to the I/O buffer completes.  Returns 0 if successful, other error codes
996   if unsuccessful. */
997int 
998cxiMapDiscontiguousRW(struct cxiIOBuffer_t* iobP,
999                      const struct cxiIOBufferAttachment_t* attachP,
1000                      struct cxiDiscontiguousDirectoryBuffer_t* discontigP)
1001{
1002  /* ?? WARNING: Since this must kmap multiple pages, there is the
1003     possibility of deadlock if multiple threads are part of the way through
1004     executing this code, and LAST_PKMAP pages (512 or 1024) have already
1005     been kmapped.  There needs to be flow control whereby threads reserve
1006     enough pages to complete all of their kmaps before they begin acquiring
1007     pages. */
1008  struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP;
1009  int pageIndex;
1010  int dirIndex;
1011  int mapPages;
1012  struct page * pageP;
1013  unsigned long kaddr;
1014
1015  /* __CXI_BUFFERS_ARE_CONTIGUOUS is not #defined */
1016
1017  /* Validate parameters */
1018  ENTER(0);
1019  TRACE3(TRACE_KSVFS, 4, TRCID_MAP_DISCONTIG_ENTER,
1020         "cxiMapDiscontiguousRW: dataPtr 0x%lX kBufP 0x%lX ioBufLen 0x%X\n",
1021         OffsetToDataPtr(iobP,0,0), kibdP, iobP->ioBufLen);
1022
1023  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);
1024  DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0));
1025  DBGASSERT(iobP->pinCount >= 2);
1026
1027  /* The mappable buffer memory may be longer than a directory block */
1028  mapPages = (iobP->ioBufLen + DISCONTIG_PAGE_SIZE - 1) / DISCONTIG_PAGE_SIZE;
1029  mapPages = MIN(mapPages, MAX_PAGES_PER_DIRBLOCK);
1030
1031  pageIndex = 0;
1032  for (dirIndex=0 ; dirIndex<mapPages ; dirIndex++)
1033  {
1034    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
1035    if (pageP == NULL)
1036      break;
1037
1038    kaddr = (unsigned long)kmap(pageP);
1039    TRACE4(TRACE_KSVFS, 12, TRCID_MAP_DISCONTIG_KMAP,
1040           "cxiMapDiscontiguousRW: dirIndex %d kibdP 0x%lX pageP 0x%lX "
1041           "kaddr 0x%lX\n", dirIndex, kibdP, pageP, kaddr);
1042
1043    DBGASSERT(dirIndex < MAX_PAGES_PER_DIRBLOCK);
1044    discontigP->userPagePointerArray[dirIndex] = (char*)kaddr;
1045    discontigP->osPagePointerArray[dirIndex] = (void*)pageP;
1046
1047    pageIndex++;
1048  }
1049
1050  discontigP->mappedLen = dirIndex * DISCONTIG_PAGE_SIZE;
1051  EXIT(0);
1052  return 0;
1053}
1054
1055
1056/* Unmap an I/O buffer previously mapped */
1057void 
1058cxiUnmapDiscontiguousRW(struct cxiIOBuffer_t* iobP,
1059                        struct cxiDiscontiguousDirectoryBuffer_t* discontigP)
1060{
1061  int pageIndex;
1062  struct page * pageP;
1063  int mappedPages;
1064
1065  ENTER(0);
1066  TRACE4(TRACE_KSVFS, 4, TRCID_UNMAP_DISCONTIG_ENTER,
1067         "cxiUnmapDiscontiguousRW: dataPtr 0x%lX kBufP 0x%lX ioBufLen 0x%X "
1068         "mappedLen %d\n", OffsetToDataPtr(iobP,0,0), iobP->kernelIOBufferDescP,
1069         iobP->ioBufLen, discontigP->mappedLen);
1070
1071  /* Unmap all pages in discontiguous map.  If the osPagePointerArray entry
1072   * is NULL, it means that the last mapping was made via MapContiguousBuffer,
1073   * which did not do any kmaps that need to be kunmap'ped.
1074   */
1075  mappedPages = (discontigP->mappedLen + DISCONTIG_PAGE_SIZE - 1) /
1076                DISCONTIG_PAGE_SIZE;
1077
1078  for (pageIndex = 0; pageIndex < mappedPages; pageIndex++)
1079  {
1080    pageP = (struct page *)discontigP->osPagePointerArray[pageIndex];
1081    TRACE3(TRACE_KSVFS, 12, TRCID_UNMAP_DISCONTIG_KUNMAP,
1082           "cxiUnmapDiscontiguousRW: unmap pageIndex %d pageP 0x%lX "
1083           "kaddr 0x%lX\n", pageIndex, pageP, 
1084           discontigP->userPagePointerArray[pageIndex]);
1085
1086    if (pageP != NULL)
1087    {
1088      kunmap(pageP);
1089      discontigP->osPagePointerArray[pageIndex] = NULL;
1090    }
1091    discontigP->userPagePointerArray[pageIndex] = NULL;
1092  }
1093  discontigP->mappedLen = 0;
1094  EXIT(0);
1095}
1096
1097/* Return an address in kernel memory that holds a contigous read-only
1098   copy of a portion of an I/O buffer.  If possible, this will be a
1099   mapping of the I/O buffer.  If necessary, this routine will allocate a
1100   new block of kernel memory and copy the requested data to it.  The
1101   returned cxiContiguousBuffer_t encapsulates what method was used, so
1102   that cxiUnmapContiguousRO can release whatever resources were obtained by
1103   this call.  Returns 0 if successful, other error codes if
1104   unsuccessful. */
1105int 
1106cxiMapContiguousRO(struct cxiIOBuffer_t* iobP,
1107                   const struct cxiIOBufferAttachment_t* attachP,
1108                   int bufOffset, int len, const char** contigBasePP,
1109                   struct cxiContiguousBuffer_t* contigP)
1110{
1111  int pageIndex;
1112  int pageOffset;
1113  int endPageIndex;
1114  struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP; 
1115  struct page * pageP;
1116  unsigned long kaddr;
1117  char* tempBufP;
1118  Boolean usedKmalloc;
1119  int rc;
1120
1121  /* Validate parameters */
1122  ENTER(0);
1123  TRACE4(TRACE_KSVFS, 4, TRCID_MAP_CONTIG_ENTER,
1124         "cxiMapContiguousRO: dataPtr 0x%lX kBufP 0x%lX bufOffset %d len %d\n",
1125         OffsetToDataPtr(iobP,0,0), kibdP, bufOffset, len);
1126
1127  DBGASSERT(bufOffset >= 0);
1128  DBGASSERT(bufOffset+len <= iobP->ioBufLen);
1129  DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP);
1130  DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0));
1131  DBGASSERT(iobP->ioBufLen/PAGE_SIZE <= kibdP->kibdTotalPages);
1132  DBGASSERT(iobP->pinCount >= 2);
1133
1134  /* If the requested piece of the I/O buffer does not cross a page boundary,
1135     then map the page and return the mapped address within the page */
1136  pageIndex = bufOffset / PAGE_SIZE;
1137  pageOffset = bufOffset % PAGE_SIZE;
1138  endPageIndex = (bufOffset+len-1) / PAGE_SIZE;
1139  if (pageIndex == endPageIndex)
1140  {
1141    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
1142    DBGASSERT(pageP != NULL);
1143
1144    /* Map I/O buffer page into the kernel's address space */
1145    kaddr = (unsigned long)kmap(pageP);
1146
1147    /* Return address within the mapped page, and set map state so
1148       cxiUnmapContiguousRO knows to do kunmap */
1149    *contigBasePP = (char*) (kaddr+pageOffset);
1150    contigP->mallocedBaseP = NULL;
1151    contigP->usedKmalloc = false;
1152    contigP->pageP = pageP;
1153    TRACE2(TRACE_KSVFS, 5, TRCID_MAP_CONTIG_KMAP,
1154           "cxiMapContiguousRO: mapped pageP 0x%lX at 0x%lX\n",
1155           pageP, *contigBasePP);
1156    EXIT(0);
1157    return 0;
1158  }
1159
1160  /* Otherwise, the requested part of the I/O buffer spans page boundaries.
1161     Allocate a contiguous buffer, and copy data from the I/O buffer to the
1162     temporary buffer. */
1163  else
1164  {
1165    if (len <= PAGE_SIZE)
1166    {
1167      tempBufP = (char *)kmalloc(len, GFP_KERNEL);
1168      usedKmalloc = true;
1169    }
1170    else
1171    {
1172      tempBufP = (char*)vmalloc(len);
1173      usedKmalloc = false;
1174    }
1175    if (tempBufP == NULL)
1176    {
1177      EXIT(0);
1178      return -ENOMEM;
1179    }
1180    rc = cxiKXfer(iobP, CXI_XFER_FROM_IOBUFFER, attachP, bufOffset, len,
1181                  tempBufP);
1182    if (rc != 0)
1183    {
1184      if (usedKmalloc)
1185        kfree((void*)tempBufP);
1186      else
1187        vfree((void*)tempBufP);
1188      EXIT(0);
1189      return rc;
1190    }
1191#ifdef MALLOC_DEBUG
1192    MallocDebugNew(tempBufP, len, 4);
1193#endif
1194
1195    /* Return address within the contiguous temporary buffer, and set map
1196       state so cxiUnmapContiguousRO knows to do vfree */
1197    *contigBasePP = tempBufP;
1198    contigP->mallocedBaseP = tempBufP;
1199    contigP->usedKmalloc = usedKmalloc;
1200    contigP->pageP = NULL;
1201    TRACE1(TRACE_KSVFS, 5, TRCID_MAP_CONTIG_VMALLOC,
1202           "cxiMapContiguousRO: copied to 0x%lX\n", tempBufP);
1203    EXIT(0);
1204    return 0;
1205  }
1206}
1207
1208
1209/* Release a mapping or copy obtained with cxiMapContiguousRO */
1210void 
1211cxiUnmapContiguousRO(struct cxiIOBuffer_t* iobP,
1212                     struct cxiContiguousBuffer_t* contigP)
1213{
1214  ENTER(0);
1215  if (contigP->mallocedBaseP != NULL)
1216  {
1217    TRACE2(TRACE_KSVFS, 4, TRCID_UNMAP_CONTIG_VFREE,
1218           "cxiUnmapContiguousRO: dataPtr 0x%lX vfree 0x%lX\n",
1219           OffsetToDataPtr(iobP,0,0), contigP->mallocedBaseP);
1220    DBGASSERT(contigP->pageP == NULL);
1221
1222    if (contigP->usedKmalloc)
1223      kfree((void*)contigP->mallocedBaseP);
1224    else
1225      vfree((void*)contigP->mallocedBaseP);
1226
1227#ifdef MALLOC_DEBUG
1228    MallocDebugDelete(contigP->mallocedBaseP);
1229#endif
1230    contigP->mallocedBaseP = NULL;
1231  }
1232  else
1233  {
1234    TRACE2(TRACE_KSVFS, 4, TRCID_UNMAP_CONTIG_KUNMAP,
1235           "cxiUnmapContiguousRO: dataPtr 0x%lX kunmap 0x%lX\n",
1236           OffsetToDataPtr(iobP,0,0), contigP->pageP);
1237    DBGASSERT(contigP->pageP != NULL);
1238    kunmap((struct page *)contigP->pageP);
1239    contigP->pageP = NULL;
1240  }
1241  EXIT(0);
1242}
1243
1244
1245#if LINUX_KERNEL_VERSION < 2050000
1246/* iodone routine for GPFS buffer_heads.  Unlock buffer and wake up
1247 * waiters, if any.
1248 */
1249static void 
1250BHioDone(struct buffer_head* bhP, int uptodate)
1251{
1252  struct cxiBufHeadChunk_t* bhcP;
1253
1254  mark_buffer_uptodate(bhP, uptodate);
1255  bhcP = (struct cxiBufHeadChunk_t*)bhP->b_private;
1256  unlock_buffer(bhP);
1257  atomic_dec(&bhcP->nBHActive);
1258}
1259
1260/* Start a read or write of the given sectors from dev.  Data should be
1261 * placed into the I/O buffer beginning at byte offset bufOffset.  Returns
1262 * 0 on success, negative values on error.  All of the data to be
1263 * transferred will be in the first cxiKernelIOBufferDesc_t.
1264 */
1265int 
1266cxiStartIO(struct cxiKernelIOBufferDesc_t* kibdHeadP,
1267           Boolean isWrite, cxiDev_t dev, UInt64 startSector, int nSectors,
1268           int bufOffset, struct cxiBufHeadChunk_t** bhcHeadPP)
1269{
1270  int bufEndOffset;
1271  int nTotalPages;
1272  struct cxiBufHeadChunk_t* bhcP;
1273  struct cxiBufHeadChunk_t* bhcHeadP;
1274  struct cxiBufHeadChunk_t* bhcTailP;
1275  int nBHsAllocated;
1276  int pageIndex;
1277  int pageOffset;
1278  int sectorsThisBH;
1279  struct buffer_head* bhP;
1280  struct page* pageP;
1281  struct cxiBufHeadChunk_t* p;
1282  struct cxiKernelIOBufferDesc_t* kibdP = kibdHeadP;
1283  kdev_t kdev = cxiDevToKernelDev(dev); /* Convert to kernel version of dev_t */
1284
1285  /* Validate parameters */
1286  ENTER(0);
1287  TRACE6(TRACE_IO, 4, TRCID_KDOIO_LINUX,
1288         "cxiStartIO: kBuf 0x%lX isWrite %d dev 0x%X sector %llu nSectors %d "
1289         "offset %d\n", kibdP, isWrite, dev, startSector, nSectors, bufOffset);
1290
1291  DBGASSERT(kibdP != NULL);
1292  DBGASSERT(bufOffset >= 0);
1293  DBGASSERT(nSectors > 0);
1294
1295  /* Compute the total number of pages spanned by the portion of the
1296     buffer that will participate in the I/O.  This equals the number
1297     of buffer_heads that will be used. */
1298  bufEndOffset = bufOffset + nSectors*512 - 1;
1299  nTotalPages = (bufEndOffset/PAGE_SIZE) - (bufOffset/PAGE_SIZE) + 1;
1300
1301  /* Allocate the entire list of buffer_head chunks needed for this I/O */
1302  bhcP = (struct cxiBufHeadChunk_t*) kmem_cache_alloc(BhcCacheP, GFP_KERNEL);
1303  bhcHeadP = bhcP;
1304  if (bhcP == NULL)
1305    goto enomem;
1306
1307  bhcP->bhcNextP = bhcP;
1308  bhcP->bhcPrevP = bhcP;
1309  bhcP->nBHUsed = 0;
1310  atomic_set(&bhcP->nBHActive, 0);
1311  nBHsAllocated = BUFFER_HEADS_PER_CHUNK;
1312
1313  while (nBHsAllocated < nTotalPages)
1314  {
1315    bhcP = (struct cxiBufHeadChunk_t*) kmem_cache_alloc(BhcCacheP, GFP_KERNEL);
1316    if (bhcP == NULL) goto enomem;
1317
1318    bhcTailP = bhcHeadP->bhcPrevP;
1319    bhcP->bhcNextP = bhcHeadP;
1320    bhcP->bhcPrevP = bhcTailP;
1321    bhcTailP->bhcNextP = bhcP;
1322    bhcHeadP->bhcPrevP = bhcP;
1323    bhcP->nBHUsed = 0;
1324    atomic_set(&bhcP->nBHActive, 0);
1325    nBHsAllocated += BUFFER_HEADS_PER_CHUNK;
1326  }
1327
1328  /* Build and submit a buffer_head for each page of the current I/O */
1329  bhcP = bhcHeadP;
1330  pageIndex = bufOffset / PAGE_SIZE;
1331  pageOffset = bufOffset % PAGE_SIZE;
1332
1333  DBGASSERT(pageOffset%512 == 0);
1334  sectorsThisBH = MIN((PAGE_SIZE-pageOffset) / 512, nSectors);
1335  while (nSectors > 0)
1336  {
1337    /* Get a buffer_head for the the next page */
1338    if (bhcP->nBHUsed == BUFFER_HEADS_PER_CHUNK)
1339    {
1340      bhcP = bhcP->bhcNextP;
1341      DBGASSERT(bhcP->nBHUsed == 0);
1342    }
1343    bhP = &bhcP->bh[bhcP->nBHUsed];
1344    bhcP->nBHUsed += 1;
1345
1346    /* Initialize the new buffer_head */
1347    memset(bhP, 0, sizeof(*bhP));
1348
1349    KIBD_GET_PAGE(kibdP, pageIndex, pageP);
1350    DBGASSERT(pageP != NULL);
1351
1352    /* Build and submit the buffer_head for the current page */
1353    bhP->b_size = sectorsThisBH * 512;
1354    bhP->b_page = pageP;
1355#ifndef __64BIT__
1356    if (PageHighMem(pageP))
1357      bhP->b_data = (char *)(0 + pageOffset);
1358    else
1359#endif
1360      bhP->b_data = page_address(pageP) + pageOffset;
1361
1362    bhP->b_this_page = bhP;
1363    bhP->b_end_io = BHioDone;
1364    bhP->b_private = (void*)bhcP;
1365    bhP->b_blocknr = startSector;
1366    init_waitqueue_head(&bhP->b_wait);
1367    bhP->b_dev = kdev;
1368    bhP->b_rdev = kdev;
1369    bhP->b_rsector = startSector;
1370    bhP->b_list = BUF_CLEAN;
1371    bhP->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req) |
1372                   (1 << BH_Uptodate);
1373    TRACE6(TRACE_IO, 6, TRCID_KDOIO_LINUX_BH,
1374           "cxiStartIO: bhcP 0x%lX bhP 0x%lX sector %llu sectorsThisBH %d state 0x%X pageP 0x%lX\n",
1375           bhcP, bhP, startSector, sectorsThisBH, bhP->b_state, pageP);
1376
1377    atomic_inc(&bhcP->nBHActive);
1378
1379    generic_make_request(isWrite, bhP);
1380
1381    if (isWrite)
1382      MOD_PGPGOUT(sectorsThisBH);
1383    else
1384      MOD_PGPGIN(sectorsThisBH);
1385
1386    /* Advance to next page */
1387    startSector += sectorsThisBH;
1388    nSectors -= sectorsThisBH;
1389    sectorsThisBH = MIN(nSectors, PAGE_SIZE/512);
1390    pageIndex += 1;
1391    pageOffset = 0;
1392  }
1393
1394  /* Unplug the disk to be sure I/Os actually get started */
1395  run_task_queue(&tq_disk);
1396
1397  /* Set success return code and return list of active buffer_heads */
1398  *bhcHeadPP = bhcHeadP;
1399  EXIT(0);
1400  return 0;
1401
1402enomem:
1403
1404  /* Free buffer_head chunks allocated so far and return failure */
1405  if (bhcHeadP != NULL)
1406  {
1407    bhcP = bhcHeadP;
1408    bhcTailP = bhcHeadP->bhcPrevP;
1409    do
1410    {
1411      p = bhcP;
1412      bhcP = bhcP->bhcNextP;
1413      kmem_cache_free(BhcCacheP, (void*)p);
1414    }
1415    while (p != bhcTailP);
1416  }
1417  EXIT(0);
1418  return -ENOMEM;
1419}
1420
1421/* Routine to set up the disk block size and get disk parameters */
1422int 
1423GetDiskInfoX(cxiDev_t devId, struct cxiDiskInfo_t* diskInfoP)
1424{
1425  kdev_t kdev;
1426  int n1KBlocks;
1427
1428  /* Convert to kernel version of dev_t */
1429  ENTER(0);
1430  kdev = cxiDevToKernelDev(devId);
1431
1432  /* Get hardware sector size.  If unknown, assume 512. */
1433#if LINUX_KERNEL_VERSION >= 2040312
1434  diskInfoP->sectorSize = get_hardsect_size(kdev);
1435#else
1436  diskInfoP->sectorSize = get_hardblocksize(kdev);
1437#endif
1438  if (diskInfoP->sectorSize == 0)
1439    diskInfoP->sectorSize = 512;
1440
1441  /* Set blocksize of this device to hardware sector size */
1442  set_blocksize(kdev, diskInfoP->sectorSize);
1443
1444  /* If defined, return number of sectors on device */
1445  n1KBlocks = 0;
1446  if (blk_size[MAJOR(kdev)])
1447    n1KBlocks = blk_size[MAJOR(kdev)][MINOR(kdev)];
1448  diskInfoP->totalSectors = (Int64)n1KBlocks * 1024 / diskInfoP->sectorSize;
1449  TRACE3(TRACE_IO, 2, TRCID_DISKINFO,
1450         "GetDiskInfo: devId %08lX sector size %d totalSectors %lld\n",
1451         devId, diskInfoP->sectorSize, diskInfoP->totalSectors);
1452#if 0
1453  printk("VMALLOC_START=0x%lX VMALLOC_END=0x%lX\n",
1454         VMALLOC_START, VMALLOC_END);
1455#endif
1456
1457  EXIT(0);
1458  return 0;
1459}
1460
1461#else /* >= 2050000 */
1462
1463/* iodone routine for struct bio */
1464static int 
1465bioDone(struct bio *bioP, unsigned int done, int err)
1466{
1467  struct buffer_head *bhP;
1468  struct cxiBufHeadChunk_t *bhcP;
1469
1470  if (bioP->bi_size)
1471    return 1;
1472
1473  /* wakes up waiters who will deallocate bio buffer head chunk */
1474  bhP = (struct buffer_head *)bioP->bi_private;
1475  bhcP = (struct cxiBufHeadChunk_t *)bhP->b_private;
1476
1477  if (test_bit(BIO_UPTODATE, &bioP->bi_flags))
1478    set_buffer_uptodate(bhP);
1479  else
1480    clear_buffer_uptodate(bhP);
1481
1482  unlock_buffer(bhP);
1483  atomic_dec(&bhcP->nBHActive);
1484
1485  return 0;
1486}
1487
1488/* Start a read or write of the given sectors from dev.  Data should be
1489 * placed into the I/O buffer beginning at byte offset bufOffset.  Returns
1490 * 0 on success, negative values on error.  All of the data to be
1491 * transferred will be in the first cxiKernelIOBufferDesc_t.
1492 */
1493int 
1494cxiStartIO(struct cxiKernelIOBufferDesc_t *kibdHeadP,
1495           Boolean isWrite, cxiDev_t dev, UInt64 startSector, int nSectors,
1496           int bufOffset, struct cxiBufHeadChunk_t **bhcHeadPP)
1497{
1498  int i;
1499  int vecsAllocated;
1500  int bufEndOffset;
1501  int nTotalPages;
1502  int iovIndex;
1503  int pageIndex;
1504  int pageOffset;
1505  int sectorsThisPage;
1506  int nBHsAllocated;
1507  struct bio *bioP;
1508  struct buffer_head *bhP;
1509  struct page *pageP;
1510  struct cxiBufHeadChunk_t *p;
1511  struct cxiBufHeadChunk_t *bhcP;
1512  struct cxiBufHeadChunk_t *bhcHeadP;
1513  struct cxiBufHeadChunk_t *bhcTailP;
1514  struct cxiKernelIOBufferDesc_t *kibdP = kibdHeadP;
1515  struct block_device *bdevP = bdget(new_decode_dev(dev));
1516  int maxIOVec = bio_get_nr_vecs(bdevP); /* query max device vectors */
1517  request_queue_t* reqQP;
1518
1519  ENTER(0);
1520
1521  LOGASSERT(bdevP != NULL && bdevP->bd_disk != NULL);
1522
1523  /* Validate parameters */
1524  TRACE6(TRACE_IO, 4, TRCID_KDOIO_LINUX_BIO,
1525         "cxiStartIO: kBuf 0x%lX isWrite %d dev 0x%X sector %llu nSectors %d "
1526         "offset %d\n", kibdP, isWrite, dev, startSector, nSectors, bufOffset);
1527
1528  DBGASSERT(kibdP != NULL);
1529  DBGASSERT(bufOffset >= 0);
1530  DBGASSERT(nSectors > 0);
1531
1532  /* Compute the total number of pages spanned by the portion of the
1533   * buffer that will participate in the I/O.  This equals the number
1534   * of io vectors needed.
1535   */
1536  bufEndOffset = bufOffset + nSectors*512 - 1;
1537  nTotalPages = (bufEndOffset/PAGE_SIZE) - (bufOffset/PAGE_SIZE) + 1;
1538
1539  /* Compute the pageIndex in the kibd struct as well as the offset
1540   * in the first page to read/write.
1541   */
1542  pageIndex = bufOffset / PAGE_SIZE;
1543  pageOffset = bufOffset % PAGE_SIZE;
1544  DBGASSERT(pageOffset%512 == 0);
1545
1546  /* Allocate a single buffer_head chunk and link it to itself.
1547   * Subsequent buffer_head chunks may be needed and are allocated
1548   * below.
1549   */
1550  bhcP = (struct cxiBufHeadChunk_t *)kmem_cache_alloc(BhcCacheP, GFP_KERNEL);
1551  bhcHeadP = bhcP;
1552  if (bhcP == NULL)
1553    goto enomem;
1554
1555  bhcP->bhcNextP = bhcP; /* circular link to itself */
1556  bhcP->bhcPrevP = bhcP;
1557  bhcP->nBHUsed = 0;
1558  atomic_set(&bhcP->nBHActive, 0);
1559  nBHsAllocated = BUFFER_HEADS_PER_CHUNK;
1560
1561  while (nSectors > 0)
1562  {
1563    vecsAllocated = MIN(nTotalPages, maxIOVec);
1564
1565    bioP = bio_alloc(GFP_NOIO, vecsAllocated);
1566    if (bioP == NULL)
1567      goto enomem;
1568
1569   /* Allocate a buffer head and point to it from the bio struct.
1570    * We submit the bio struct directly but wait on the dummy
1571    * buffer_head struc, since primitives exist for waiting/wakeup
1572    * there.  We want to submit bios instead of buffer heads since
1573    * the bio can encapsulate a larger i/o whereas buffer_heads can
1574    * only do a page.
1575    */ 
1576    if (bhcP->nBHUsed == BUFFER_HEADS_PER_CHUNK)
1577    {
1578      bhcP = (struct cxiBufHeadChunk_t *)kmem_cache_alloc(BhcCacheP, 
1579                                                          GFP_KERNEL);
1580      if (bhcP == NULL) 
1581        goto enomem;
1582
1583      bhcTailP = bhcHeadP->bhcPrevP;
1584      bhcP->bhcNextP = bhcHeadP;
1585      bhcP->bhcPrevP = bhcTailP;
1586      bhcTailP->bhcNextP = bhcP;
1587      bhcHeadP->bhcPrevP = bhcP;
1588      bhcP->nBHUsed = 0;
1589
1590      atomic_set(&bhcP->nBHActive, 0);
1591      nBHsAllocated += BUFFER_HEADS_PER_CHUNK;
1592    }
1593
1594    /* Use next available buffer head and increment used count */
1595    bhcP->biop[bhcP->nBHUsed] = bioP;
1596    bhP = &bhcP->bh[bhcP->nBHUsed];
1597    bhcP->nBHUsed++;
1598
1599    bhP->b_state = 0;
1600    atomic_set(&bhP->b_count, 1); /* set to one for unlock_buffer */
1601    bhP->b_this_page = NULL;
1602    bhP->b_page = NULL;
1603    bhP->b_blocknr = 0;
1604    bhP->b_size = 0;
1605    bhP->b_data = NULL;
1606    bhP->b_bdev = NULL;
1607
1608    /* buffer head points to buffer head chunk */
1609    bhP->b_private = (void *)bhcP;
1610
1611    iovIndex = 0;
1612    bioP->bi_vcnt = 0;   /* accumulated below as number of bi_io_vecs */
1613    bioP->bi_idx = 0;    /* used by lower layer for recording current index */
1614    bioP->bi_size = 0;
1615    bioP->bi_bdev = bdevP;
1616    bioP->bi_end_io = bioDone;
1617
1618    /* bio points to buffer head that we'll wait on */
1619    bioP->bi_private = (void *)bhP;
1620    bioP->bi_sector = startSector;
1621
1622    sectorsThisPage = MIN((PAGE_SIZE-pageOffset) / 512, nSectors);
1623
1624    while (iovIndex < vecsAllocated)
1625    {
1626      KIBD_GET_PAGE(kibdP, pageIndex, pageP);
1627      DBGASSERT(pageP != NULL);
1628
1629      bioP->bi_io_vec[iovIndex].bv_page = pageP;
1630      bioP->bi_io_vec[iovIndex].bv_len = sectorsThisPage * 512;
1631      bioP->bi_io_vec[iovIndex].bv_offset = pageOffset;
1632      TRACE6(TRACE_IO, 6, TRCID_KDOIO_LINUX_BIO_PAGE,
1633             "cxiStartIO: bhcP 0x%lX bioP 0x%lX index %d sector %llu sectorsThisPage %d pageP 0x%lX\n",
1634             bhcP, bioP, iovIndex, startSector, sectorsThisPage, pageP);
1635      iovIndex++;
1636
1637      bioP->bi_vcnt = iovIndex;
1638      bioP->bi_size += (sectorsThisPage * 512);
1639
1640      /* Advance to next page */
1641      startSector += sectorsThisPage;
1642      nSectors -= sectorsThisPage;
1643      sectorsThisPage = MIN(nSectors, PAGE_SIZE/512);
1644      pageIndex += 1;
1645      pageOffset = 0;
1646    }
1647
1648    bufOffset += bioP->bi_size;
1649    nTotalPages -= bioP->bi_vcnt;
1650
1651    /* Fill in a couple of fields in this dummy buffer head
1652     * that will be examined in unlock_buffer().
1653     */
1654    set_buffer_locked(bhP);
1655    bhP->b_page = pageP;
1656
1657    atomic_inc(&bhcP->nBHActive);
1658
1659    submit_bio(isWrite, bioP);
1660  }
1661
1662  /* Unplug the device queue to avoid 3ms delay when no other I/O in
1663     progress on the device */
1664  reqQP = bdev_get_queue(bdevP);
1665  if (reqQP->unplug_fn != NULL)
1666    reqQP->unplug_fn(reqQP);
1667
1668  *bhcHeadPP = bhcHeadP;
1669  EXIT(0);
1670  return 0;
1671
1672enomem:
1673
1674  /* Free buffer_head chunks allocated so far and return failure */
1675  if (bhcHeadP != NULL)
1676  {
1677    bhcP = bhcHeadP;
1678    bhcTailP = bhcHeadP->bhcPrevP;
1679    do
1680    {
1681      for (i = 0; i < bhcP->nBHUsed; i++)
1682        bio_put(bhcP->biop[i]);
1683
1684      p = bhcP;
1685      bhcP = bhcP->bhcNextP;
1686      kmem_cache_free(BhcCacheP, (void*)p);
1687    }
1688    while (p != bhcTailP);
1689  }
1690  EXIT(0);
1691  return -ENOMEM;
1692}
1693
1694/* Routine to set up the disk block size and get disk parameters */
1695int 
1696GetDiskInfoX(cxiDev_t devId, struct cxiDiskInfo_t* diskInfoP)
1697{
1698  struct block_device *bdevP = bdget(new_decode_dev(devId));
1699
1700  ENTER(0);
1701  LOGASSERT(bdevP != NULL && bdevP->bd_disk != NULL);
1702
1703  diskInfoP->sectorSize = bdev_hardsect_size(bdevP);
1704
1705  if (diskInfoP->sectorSize == 0)
1706    diskInfoP->sectorSize = 512;
1707
1708  /* Set blocksize of this device to hardware sector size */
1709  set_blocksize(bdevP, diskInfoP->sectorSize);
1710
1711  DBGASSERT(bdevP->bd_inode != NULL);
1712  diskInfoP->totalSectors = bdevP->bd_inode->i_size / diskInfoP->sectorSize;
1713
1714  TRACE3(TRACE_IO, 2, TRCID_GET_DISKINFOX,
1715         "GetDiskInfoX: devId %08lX sector size %d totalSectors %lld\n",
1716         devId, diskInfoP->sectorSize, diskInfoP->totalSectors);
1717  EXIT(0);
1718  return 0;
1719}
1720#endif
1721
1722/* Wait for a group of I/Os to complete.  Free the buffer heads after all
1723 * I/O is finished.  Returns -EIO if any buffer_head had an error.
1724 */
1725static int 
1726cxiWaitIO(struct cxiBufHeadChunk_t *bhcHeadP)
1727{
1728  int i;
1729  int rc;
1730  struct buffer_head* bhP;
1731  struct cxiBufHeadChunk_t* bhcP;
1732  struct cxiBufHeadChunk_t* p;
1733#if LINUX_KERNEL_VERSION >= 2050000
1734  struct bio *bioP;
1735#endif
1736
1737  /* Wait for I/O to be complete on all buffer_heads.  Wait on buffer_heads
1738   * in the reverse of the order in which I/O was started.  By waiting on
1739   * the last buffer_head first, it is likely that the calling thread will
1740   * only have to sleep once.
1741   */
1742  ENTER(0);
1743  rc = 0;
1744  DBGASSERT(bhcHeadP != NULL);
1745  bhcP = bhcHeadP->bhcPrevP;
1746  do
1747  {
1748    for (i = bhcP->nBHUsed-1; i >= 0; i--)
1749    {
1750      bhP = &bhcP->bh[i];
1751#if LINUX_KERNEL_VERSION >= 2050000
1752      bioP = bhcP->biop[i];
1753
1754      TRACE5(TRACE_IO, 12, TRCID_KWAITIO_BIO,
1755             "cxiWaitIO: bhP 0x%lX bioP 0x%lX sector %d size %d state 0x%lX\n",
1756             bhP, bioP, bioP->bi_sector, bioP->bi_size, bioP->bi_flags);
1757#else
1758      TRACE4(TRACE_IO, 12, TRCID_KWAITIO_BH,
1759             "cxiWaitIO: bhP 0x%lX sector %d size %d state 0x%lX\n",
1760             bhP, bhP->b_blocknr, bhP->b_size, bhP->b_state);
1761#endif
1762
1763      wait_on_buffer(bhP);
1764      if (!buffer_uptodate(bhP))
1765      {
1766#if LINUX_KERNEL_VERSION >= 2050000
1767        TRACE5(TRACE_IO, 1, TRCID_KWAITIO_BIO_ERR,
1768               "cxiWaitIO: bhP 0x%lX bioP 0x%lX sector %d size %d "
1769               "state 0x%lX\n", bhP, bioP, bioP->bi_sector, bioP->bi_size, 
1770               bioP->bi_flags);
1771#else
1772        TRACE4(TRACE_IO, 1, TRCID_KWAITIO_BH_ERR,
1773               "cxiWaitIO: error bhP 0x%lX sector %d size %d state 0x%lX\n",
1774               bhP, bhP->b_blocknr, bhP->b_size, bhP->b_state);
1775#endif
1776        rc = -EIO;
1777      }
1778#if LINUX_KERNEL_VERSION >= 2050000
1779      bio_put(bioP);
1780      bhcP->biop[i] = NULL;
1781#endif
1782    }
1783
1784    p = bhcP;
1785    bhcP = bhcP->bhcPrevP;
1786
1787    /* All of the I/Os in all of the buffer_heads inside of the
1788     * cxiBufHeadChunk_t pointed to by p are complete (the BH_Lock bits
1789     * have all been turned off).  However, it is possible that some I/O
1790     * completion handlers may not yet have returned from BHioDone and
1791     * therefore may not have finished accessing fields within the chunk
1792     * of buffer_heads.  The nBHActive keeps track of how many
1793     * completion routines have not yet returned.  If this is non-zero,
1794     * the cxiBufHeadChunk_t cannot be freed yet.  Delay briefly to
1795     * allow the interrupt handler on another processor to complete,
1796     * then free the cxiBufHeadChunk_t.  Repeat the delay until the
1797     * cxiBufHeadChunk_t is no longer in use by any interrupt handlers.
1798     */
1799    while (atomic_read(&p->nBHActive) > 0)
1800    {
1801      TRACE2(TRACE_IO, 1, TRCID_KWAITIO_BH_BUSY,
1802             "cxiWaitIO: p 0x%lX waiting for %d I/O completion handlers\n",
1803             p, atomic_read(&p->nBHActive));
1804      cxiSleep(10);
1805      atomic_inc(&cxiWaitIONDelays);
1806    }
1807
1808    kmem_cache_free(BhcCacheP, (void*)p);
1809
1810  } 
1811  while (p != bhcHeadP);
1812
1813  EXIT(0);
1814  return rc;
1815}
1816
1817/* Read or write the given sectors from dev.  Data should be placed into the
1818 * I/O buffer beginning at byte offset bufOffset.  Returns EOK on success,
1819 * negative values on error.  All of the data to be transferred will be in
1820 * the first cxiKernelIOBufferDesc_t.
1821 */
1822int 
1823cxiKDoIO(struct cxiKernelIOBufferDesc_t* kibdP,
1824         Boolean isWrite, cxiDev_t dev, UInt64 startSector, 
1825         int nSectors, int sectorSize, int bufOffset)
1826{
1827  int rc;
1828  struct cxiBufHeadChunk_t* bhcHeadP;
1829
1830  ENTER(0);
1831  DBGASSERT(sectorSize == 512);
1832
1833#ifdef KCSTRACE
1834  current->kcst_info.data[0] = dev;
1835  current->kcst_info.data[1] = startSector;
1836  current->kcst_info.data[2] = nSectors;
1837#endif
1838
1839  rc = cxiStartIO(kibdP, isWrite, dev, startSector, nSectors, 
1840                  bufOffset, &bhcHeadP);
1841  if (rc == 0)
1842    rc = cxiWaitIO(bhcHeadP);
1843
1844#ifdef KCSTRACE
1845  current->kcst_info.data[0] = 0;
1846  current->kcst_info.data[1] = 0;
1847  current->kcst_info.data[2] = 0;
1848#endif
1849
1850  EXIT(0);
1851  return rc;
1852}
Note: See TracBrowser for help on using the repository browser.