[16] | 1 | /*************************************************************************** |
---|
| 2 | * |
---|
| 3 | * Copyright (C) 2001 International Business Machines |
---|
| 4 | * All rights reserved. |
---|
| 5 | * |
---|
| 6 | * This file is part of the GPFS mmfslinux kernel module. |
---|
| 7 | * |
---|
| 8 | * Redistribution and use in source and binary forms, with or without |
---|
| 9 | * modification, are permitted provided that the following conditions |
---|
| 10 | * are met: |
---|
| 11 | * |
---|
| 12 | * 1. Redistributions of source code must retain the above copyright notice, |
---|
| 13 | * this list of conditions and the following disclaimer. |
---|
| 14 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
| 15 | * notice, this list of conditions and the following disclaimer in the |
---|
| 16 | * documentation and/or other materials provided with the distribution. |
---|
| 17 | * 3. The name of the author may not be used to endorse or promote products |
---|
| 18 | * derived from this software without specific prior written |
---|
| 19 | * permission. |
---|
| 20 | * |
---|
| 21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
---|
| 22 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
---|
| 23 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
---|
| 24 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
| 25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
---|
| 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
---|
| 27 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
---|
| 28 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
---|
| 29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
---|
| 30 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
| 31 | * |
---|
| 32 | *************************************************************************** */ |
---|
| 33 | /* @(#)37 1.62.1.3 src/avs/fs/mmfs/ts/kernext/gpl-linux/cxiIOBuffer.c, mmfs, avs_rgpfs24, rgpfs24s010a 2/8/07 15:40:30 */ |
---|
| 34 | /* |
---|
| 35 | * Linux implementation of I/O buffers |
---|
| 36 | * |
---|
| 37 | * Contents: |
---|
| 38 | * static struct cxiKernelIOBufferDesc_t* kibdAlloc |
---|
| 39 | * static void kibdFree |
---|
| 40 | * static void deallocKernelIOBufferDesc |
---|
| 41 | * static int allocKernelIOBufferDesc |
---|
| 42 | * KibdModuleInit |
---|
| 43 | * KibdModuleTerm |
---|
| 44 | * cxiKibdPin |
---|
| 45 | * cxiKibdUnpin |
---|
| 46 | * cxiKibdUnpinAll |
---|
| 47 | * cxiKibdPinmm |
---|
| 48 | * cxiKibdUnpinmm |
---|
| 49 | * |
---|
| 50 | * cxiAttachIOBuffer |
---|
| 51 | * cxiDetachIOBuffer |
---|
| 52 | * cxiUXfer |
---|
| 53 | * cxiKXfer |
---|
| 54 | * cxiKZero |
---|
| 55 | * cxiMapDiscontiguousRW |
---|
| 56 | * cxiUnmapDiscontiguousRW |
---|
| 57 | * cxiMapContiguousRO |
---|
| 58 | * cxiUnmapContiguousRO |
---|
| 59 | * BHioDone |
---|
| 60 | * cxiStartIO |
---|
| 61 | * cxiWaitIO |
---|
| 62 | * cxiKDoIO |
---|
| 63 | * GetDiskInfoX |
---|
| 64 | */ |
---|
| 65 | |
---|
| 66 | #include <Shark-gpl.h> |
---|
| 67 | |
---|
| 68 | #include <linux/module.h> |
---|
| 69 | #include <linux/string.h> |
---|
| 70 | #include <linux/slab.h> |
---|
| 71 | #include <linux/vmalloc.h> |
---|
| 72 | #include <linux/mm.h> |
---|
| 73 | #include <linux/blkdev.h> |
---|
| 74 | #include <linux/fs.h> |
---|
| 75 | #include <linux/smp_lock.h> |
---|
| 76 | #if LINUX_KERNEL_VERSION >= 2050000 |
---|
| 77 | #include <linux/bio.h> |
---|
| 78 | #else |
---|
| 79 | #include <linux/iobuf.h> |
---|
| 80 | #endif |
---|
| 81 | |
---|
| 82 | #include <Logger-gpl.h> |
---|
| 83 | #include <Trace.h> |
---|
| 84 | #include <cxiSystem.h> |
---|
| 85 | #include <linux2gpfs.h> |
---|
| 86 | #include <verdep.h> |
---|
| 87 | #include <cxiIOBuffer.h> |
---|
| 88 | #include <cxiAtomic.h> |
---|
| 89 | #include <cxiTypes.h> |
---|
| 90 | #include <linux/mman.h> |
---|
| 91 | |
---|
| 92 | #ifdef CONFIG_BGL |
---|
| 93 | /* BG/L version of Linux doesn't define get_user_pages, so define it here */ |
---|
| 94 | #define get_user_pages(tsk, mm, start, len, write, force, pages, vmas) \ |
---|
| 95 | __get_user_pages(tsk, mm, start, len, write, force, pages, vmas, 0) |
---|
| 96 | #endif |
---|
| 97 | |
---|
| 98 | /* Returns a page pointer from a cxiKernelIOBufferDesc_t |
---|
| 99 | * The INDEX of the page to return is relative to the |
---|
| 100 | * KIBDP supplied. For instance a KIBD may only contain |
---|
| 101 | * twenty pages. If you supply a KIBD and an index of twenty |
---|
| 102 | * (index starts from zero) then we'll move to the next KIBD |
---|
| 103 | * in the chain and update the INDEX to be zero. Thus PAGEINDEX, |
---|
| 104 | * KIBD, and PAGEP may be updated by this macro. |
---|
| 105 | */ |
---|
| 106 | #define KIBD_GET_PAGE(KIBDP, INDEX, PAGEP) \ |
---|
| 107 | while ((KIBDP) && (INDEX) >= (KIBDP)->kibdPages) \ |
---|
| 108 | { \ |
---|
| 109 | (INDEX) -= (KIBDP)->kibdPages; \ |
---|
| 110 | (KIBDP) = (KIBDP)->kibdNextP; \ |
---|
| 111 | } \ |
---|
| 112 | if (KIBDP) \ |
---|
| 113 | (PAGEP) = (struct page *)(KIBDP)->maplist[(INDEX)]; \ |
---|
| 114 | else \ |
---|
| 115 | (PAGEP) = NULL; |
---|
| 116 | |
---|
| 117 | /* Spin lock protecting list of all top-level cxiKernelIOBufferDesc_t's. |
---|
| 118 | Using a static initializer here (spinlock_t KibdLock = SPIN_LOCK_UNLOCKED) |
---|
| 119 | does not work, because SPIN_LOCK_UNLOCKED contains a cast to type spinlock_t. |
---|
| 120 | In C++, (but not in C), this causes KibdLock to be put in the bss section, |
---|
| 121 | and code to be generated to perform the initialization. Unfortunately, |
---|
| 122 | this initialization code does not get called, because kernel modules do |
---|
| 123 | not have the full C++ environment established. */ |
---|
| 124 | spinlock_t KibdLock; |
---|
| 125 | |
---|
| 126 | /* Static pointer to slab allocator for cxiKernelIOBufferDesc_t's */ |
---|
| 127 | struct kmem_cache* KibdCacheP = NULL; |
---|
| 128 | |
---|
| 129 | /* Static head of doubly-linked list of top-level cxiKernelIOBufferDesc_t's. |
---|
| 130 | The list is protected by KibdLock. */ |
---|
| 131 | struct cxiKernelIOBufferDesc_t* KibdGblHeadP = NULL; |
---|
| 132 | |
---|
| 133 | /* Count of number of delays in busy wait loop in cxiWaitIO */ |
---|
| 134 | atomic_t cxiWaitIONDelays; |
---|
| 135 | |
---|
| 136 | /* Group of Linux buffer_heads allocated together for a multi-page I/O. A |
---|
| 137 | chunk is just less than half a page. */ |
---|
| 138 | #define BUFFER_HEADS_PER_CHUNK \ |
---|
| 139 | ((PAGE_SIZE/2-(2*sizeof(void*)+sizeof(int)+sizeof(atomic_t))) / \ |
---|
| 140 | (sizeof(void*)+sizeof(struct buffer_head))) |
---|
| 141 | |
---|
| 142 | struct cxiBufHeadChunk_t |
---|
| 143 | { |
---|
| 144 | /* Next and previous chunks of buffers used for an I/O. The list is |
---|
| 145 | circular. */ |
---|
| 146 | struct cxiBufHeadChunk_t* bhcNextP; |
---|
| 147 | struct cxiBufHeadChunk_t* bhcPrevP; |
---|
| 148 | |
---|
| 149 | /* Number of buffer_heads used in this chunk */ |
---|
| 150 | int nBHUsed; |
---|
| 151 | |
---|
| 152 | /* Number of buffer_heads in this chunk that have been submitted, but |
---|
| 153 | whose iodone handler has not finished running. Always updated |
---|
| 154 | with atomic operations, since this field is accessed asynchronously |
---|
| 155 | from interrupt level. */ |
---|
| 156 | atomic_t nBHActive; |
---|
| 157 | |
---|
| 158 | #if LINUX_KERNEL_VERSION >= 2050000 |
---|
| 159 | struct bio *biop[BUFFER_HEADS_PER_CHUNK]; |
---|
| 160 | #endif |
---|
| 161 | |
---|
| 162 | /* Space for buffer_heads */ |
---|
| 163 | struct buffer_head bh[BUFFER_HEADS_PER_CHUNK]; |
---|
| 164 | }; |
---|
| 165 | |
---|
| 166 | /* Static pointer to slab allocator for cxiBufHeadChunk_t's */ |
---|
| 167 | struct kmem_cache* BhcCacheP = NULL; |
---|
| 168 | |
---|
| 169 | /* Allocate and initialize a new cxiKernelIOBufferDesc_t object. Uses the |
---|
| 170 | slab allocator for this object type. */ |
---|
| 171 | static struct cxiKernelIOBufferDesc_t * |
---|
| 172 | kibdAlloc() |
---|
| 173 | { |
---|
| 174 | struct cxiKernelIOBufferDesc_t* kibdP; |
---|
| 175 | int i; |
---|
| 176 | |
---|
| 177 | ENTER(0); |
---|
| 178 | kibdP = (struct cxiKernelIOBufferDesc_t*) |
---|
| 179 | kmem_cache_alloc(KibdCacheP, GFP_KERNEL); |
---|
| 180 | TRACE1(TRACE_KSVFS, 14, TRCID_KIBD_NEW, |
---|
| 181 | "kibdAlloc: allocated cxiKernelIOBufferDesc_t at 0x%lX\n", kibdP); |
---|
| 182 | if (kibdP != NULL) |
---|
| 183 | { |
---|
| 184 | kibdP->kibdVaddr = NULL; |
---|
| 185 | kibdP->kibdPages = 0; |
---|
| 186 | kibdP->kibdTotalPages = 0; |
---|
| 187 | kibdP->kibdNextP = NULL; |
---|
| 188 | kibdP->gblNextP = NULL; |
---|
| 189 | kibdP->gblPrevP = NULL; |
---|
| 190 | |
---|
| 191 | for (i=0; i < PAGES_PER_KIBD; i++) |
---|
| 192 | kibdP->maplist[i] = NULL; |
---|
| 193 | } |
---|
| 194 | EXIT(0); |
---|
| 195 | return kibdP; |
---|
| 196 | } |
---|
| 197 | |
---|
| 198 | /* Free a cxiKernelIOBufferDesc_t back to its slab allocator */ |
---|
| 199 | static void |
---|
| 200 | kibdFree(struct cxiKernelIOBufferDesc_t* kibdP) |
---|
| 201 | { |
---|
| 202 | ENTER(0); |
---|
| 203 | TRACE1(TRACE_KSVFS, 14, TRCID_KIBD_DELETE, |
---|
| 204 | "kibdFree: freeing cxiKernelIOBufferDesc_t at 0x%lX\n", kibdP); |
---|
| 205 | kmem_cache_free(KibdCacheP, (void*)kibdP); |
---|
| 206 | EXIT(0); |
---|
| 207 | } |
---|
| 208 | |
---|
| 209 | |
---|
| 210 | /* Destroy a cxiKernelIOBufferDesc_t object. */ |
---|
| 211 | static void |
---|
| 212 | deallocKernelIOBufferDesc(struct cxiKernelIOBufferDesc_t* kibdP) |
---|
| 213 | { |
---|
| 214 | struct cxiKernelIOBufferDesc_t *kibdPrevP; |
---|
| 215 | struct page *pageP; |
---|
| 216 | int pageIndex = 0; |
---|
| 217 | int pageTotal = kibdP->kibdTotalPages; |
---|
| 218 | |
---|
| 219 | ENTER(0); |
---|
| 220 | for (;;) |
---|
| 221 | { |
---|
| 222 | kibdPrevP = kibdP; |
---|
| 223 | KIBD_GET_PAGE(kibdP, pageIndex, pageP); |
---|
| 224 | if (pageP == NULL) |
---|
| 225 | break; |
---|
| 226 | |
---|
| 227 | page_cache_release(pageP); |
---|
| 228 | |
---|
| 229 | if (kibdPrevP != kibdP) |
---|
| 230 | { |
---|
| 231 | TRACE4(TRACE_KSVFS, 11, TRCID_DEALLOC_KIBD_1, |
---|
| 232 | "deallocKernelIOBufferDesc: kibdP 0x%lX vaddr 0x%lX kibdPages %d " |
---|
| 233 | "kibdNextP 0x%lX\n", kibdPrevP, kibdPrevP->kibdVaddr, |
---|
| 234 | kibdPrevP->kibdPages, kibdP); |
---|
| 235 | |
---|
| 236 | pageTotal -= kibdPrevP->kibdPages; |
---|
| 237 | kibdFree(kibdPrevP); |
---|
| 238 | } |
---|
| 239 | |
---|
| 240 | pageIndex++; |
---|
| 241 | } |
---|
| 242 | |
---|
| 243 | if (kibdPrevP != kibdP && kibdPrevP) |
---|
| 244 | { |
---|
| 245 | TRACE4(TRACE_KSVFS, 11, TRCID_DEALLOC_KIBD_2, |
---|
| 246 | "deallocKernelIOBufferDesc: kibdP 0x%lX vaddr 0x%lX kibdPages %d " |
---|
| 247 | "kibdNextP 0x%lX\n", kibdPrevP, kibdPrevP->kibdVaddr, |
---|
| 248 | kibdPrevP->kibdPages, kibdP); |
---|
| 249 | |
---|
| 250 | pageTotal -= kibdPrevP->kibdPages; |
---|
| 251 | kibdFree(kibdPrevP); |
---|
| 252 | } |
---|
| 253 | |
---|
| 254 | /* Make sure all the constituent cxiKernelIODesc_t page counts added |
---|
| 255 | * up to the total page count in the first cxiKernelIODesct_t |
---|
| 256 | */ |
---|
| 257 | DBGASSERT(pageTotal == 0); |
---|
| 258 | EXIT(0); |
---|
| 259 | } |
---|
| 260 | |
---|
| 261 | |
---|
| 262 | /* Create a cxiKernelIOBufferDesc_t that maps the given region of |
---|
| 263 | * the user address space of this process. The buffer virtual address |
---|
| 264 | * must be on a page boundary. |
---|
| 265 | */ |
---|
| 266 | static int |
---|
| 267 | allocKernelIOBufferDesc(char* vaddr, int nPages, |
---|
| 268 | struct cxiKernelIOBufferDesc_t** kibdPP) |
---|
| 269 | { |
---|
| 270 | struct cxiKernelIOBufferDesc_t* kibdP; |
---|
| 271 | struct cxiKernelIOBufferDesc_t* kibdPrevP = NULL; |
---|
| 272 | struct cxiKernelIOBufferDesc_t* kibdHeadP = NULL; |
---|
| 273 | int rc; |
---|
| 274 | int mapPages = 0; |
---|
| 275 | int totalPages = 0; |
---|
| 276 | struct page * pageP; |
---|
| 277 | struct address_space * addrSpaceP; |
---|
| 278 | |
---|
| 279 | /* Validate parameters */ |
---|
| 280 | ENTER(0); |
---|
| 281 | DBGASSERT(((IntPtr)vaddr & (PAGE_SIZE-1)) == 0); |
---|
| 282 | |
---|
| 283 | if (nPages) |
---|
| 284 | { |
---|
| 285 | kibdHeadP = kibdPrevP = kibdP = kibdAlloc(); |
---|
| 286 | if (kibdP == NULL) |
---|
| 287 | { |
---|
| 288 | rc = -ENOMEM; |
---|
| 289 | goto errorExit; |
---|
| 290 | } |
---|
| 291 | } |
---|
| 292 | |
---|
| 293 | while (nPages) |
---|
| 294 | { |
---|
| 295 | mapPages = nPages; |
---|
| 296 | if (mapPages > PAGES_PER_KIBD) |
---|
| 297 | mapPages = PAGES_PER_KIBD; |
---|
| 298 | |
---|
| 299 | down_read(¤t->mm->mmap_sem); |
---|
| 300 | rc = get_user_pages(current, current->mm, (unsigned long)vaddr, |
---|
| 301 | mapPages, VM_WRITE, 0 /* force */, |
---|
| 302 | (struct page **)kibdP->maplist, NULL); |
---|
| 303 | up_read(¤t->mm->mmap_sem); |
---|
| 304 | |
---|
| 305 | if (rc != mapPages) |
---|
| 306 | goto errorExit; |
---|
| 307 | |
---|
| 308 | kibdP->kibdVaddr = vaddr; |
---|
| 309 | kibdP->kibdPages = mapPages; |
---|
| 310 | |
---|
| 311 | TRACE3(TRACE_KSVFS, 11, TRCID_ALLOC_KIBD_1, |
---|
| 312 | "allocKernelIOBufferDesc: kibdP 0x%lX vaddr 0x%lX kibdPages %d\n", |
---|
| 313 | kibdP, kibdP->kibdVaddr, kibdPrevP->kibdPages); |
---|
| 314 | |
---|
| 315 | vaddr += mapPages * PAGE_SIZE; |
---|
| 316 | totalPages += mapPages; |
---|
| 317 | |
---|
| 318 | nPages -= mapPages; |
---|
| 319 | if (nPages) |
---|
| 320 | { |
---|
| 321 | kibdP = kibdAlloc(); |
---|
| 322 | if (kibdP == NULL) |
---|
| 323 | { |
---|
| 324 | rc = -ENOMEM; |
---|
| 325 | goto errorExit; |
---|
| 326 | } |
---|
| 327 | kibdPrevP->kibdNextP = kibdP; |
---|
| 328 | kibdPrevP = kibdP; |
---|
| 329 | } |
---|
| 330 | } |
---|
| 331 | |
---|
| 332 | /* Total page count is kept only in the first one */ |
---|
| 333 | kibdHeadP->kibdTotalPages = totalPages; |
---|
| 334 | |
---|
| 335 | /* Ensure these pages are't mapped to any inode, otherwise |
---|
| 336 | * we won't be able to disclaim them. We did have a problem |
---|
| 337 | * where MAP_SHARED semantics would cause this. |
---|
| 338 | */ |
---|
| 339 | pageP = (struct page *)kibdHeadP->maplist[0]; |
---|
| 340 | DBGASSERT(pageP != NULL); |
---|
| 341 | |
---|
| 342 | addrSpaceP = pageP->mapping; |
---|
| 343 | #if LINUX_KERNEL_VERSION >= 2060600 || (defined(SUSE_LINUX) && LINUX_KERNEL_VERSION >= 2060507) |
---|
| 344 | /* MAP_ANONYMOUS flags will have PG_anon turned on. */ |
---|
| 345 | DBGASSERT(PageAnon(pageP)); |
---|
| 346 | #else |
---|
| 347 | DBGASSERT(addrSpaceP == NULL || addrSpaceP->host == NULL); |
---|
| 348 | #endif |
---|
| 349 | |
---|
| 350 | |
---|
| 351 | /* Success! */ |
---|
| 352 | *kibdPP = kibdHeadP; |
---|
| 353 | EXIT(0); |
---|
| 354 | return 0; |
---|
| 355 | |
---|
| 356 | errorExit: |
---|
| 357 | TRACE5(TRACE_KSVFS, 11, TRCID_ALLOC_KIBD_2, |
---|
| 358 | "allocKernelIOBufferDesc: vaddr 0x%lX mapPages %d totalPages %d " |
---|
| 359 | "kibdHeadP 0x%lX rc %d\n", vaddr, mapPages, totalPages, |
---|
| 360 | kibdHeadP, rc); |
---|
| 361 | |
---|
| 362 | /* Unmap and deallocate kiobufs, delete cxiKernelIOBufferDesc_t */ |
---|
| 363 | if (kibdHeadP) |
---|
| 364 | { |
---|
| 365 | kibdHeadP->kibdTotalPages = totalPages; |
---|
| 366 | deallocKernelIOBufferDesc(kibdHeadP); |
---|
| 367 | } |
---|
| 368 | |
---|
| 369 | EXIT(0); |
---|
| 370 | return ((rc < 0) ? -rc : ENOMEM); |
---|
| 371 | } |
---|
| 372 | |
---|
| 373 | /* Initialization routine - called when module is loaded */ |
---|
| 374 | void |
---|
| 375 | KibdModuleInit() |
---|
| 376 | { |
---|
| 377 | int rc; |
---|
| 378 | |
---|
| 379 | ENTER(0); |
---|
| 380 | TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_INIT, |
---|
| 381 | "KibdModuleInit called\n"); |
---|
| 382 | |
---|
| 383 | /* Create a slab allocator for cxiKernelIOBufferDesc_t objects */ |
---|
| 384 | KibdCacheP = kmem_cache_create("kernIOBufDesc", |
---|
| 385 | sizeof(struct cxiKernelIOBufferDesc_t), |
---|
| 386 | 0 /* offset */, |
---|
| 387 | 0 /* flags */, |
---|
| 388 | NULL /* ctor */, |
---|
| 389 | NULL /* dtor */); |
---|
| 390 | if (KibdCacheP == NULL) |
---|
| 391 | cxiPanic("Cannot create cxiKernelIOBufferDesc_t cache\n"); |
---|
| 392 | |
---|
| 393 | spin_lock_init(&KibdLock); |
---|
| 394 | |
---|
| 395 | /* Create a slab allocator for cxiBufHeadChunk_t objects */ |
---|
| 396 | BhcCacheP = kmem_cache_create("BufHeadChunk", |
---|
| 397 | sizeof(struct cxiBufHeadChunk_t), |
---|
| 398 | 0 /* offset */, |
---|
| 399 | 0 /* flags */, |
---|
| 400 | NULL /* ctor */, |
---|
| 401 | NULL /* dtor */); |
---|
| 402 | if (BhcCacheP == NULL) |
---|
| 403 | cxiPanic("Cannot create cxiBufHeadChunk_t cache\n"); |
---|
| 404 | |
---|
| 405 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
| 406 | if (gpfs_init_inodecache()!=0) |
---|
| 407 | cxiPanic("Cannot create gpfsInodeCache cache\n"); |
---|
| 408 | #endif |
---|
| 409 | |
---|
| 410 | atomic_set(&cxiWaitIONDelays, 0); |
---|
| 411 | EXIT(0); |
---|
| 412 | } |
---|
| 413 | |
---|
| 414 | /* Termination routine - called just before module is unloaded */ |
---|
| 415 | void |
---|
| 416 | KibdModuleTerm() |
---|
| 417 | { |
---|
| 418 | int rc; |
---|
| 419 | |
---|
| 420 | ENTER(0); |
---|
| 421 | TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_TERM, |
---|
| 422 | "KibdModuleTerm called\n"); |
---|
| 423 | |
---|
| 424 | /* Destroy slab allocator for cxiBufHeadChunk_t objects */ |
---|
| 425 | kmem_cache_destroy(BhcCacheP); |
---|
| 426 | |
---|
| 427 | /* We have to ensure these are all deallocated otherwise |
---|
| 428 | * the kmem_cache_destroy of the KibdCacheP will fail. |
---|
| 429 | * An attempt to reload GPFS would encounter the slab |
---|
| 430 | * cache still existing. |
---|
| 431 | */ |
---|
| 432 | cxiKibdUnpinAll(); |
---|
| 433 | |
---|
| 434 | #if LINUX_KERNEL_VERSION >= 2050000 |
---|
| 435 | /* Ugly ugly ugly FIXME |
---|
| 436 | * On 2.5, kmem_cache_destroy may or may not succeed in actually destroying |
---|
| 437 | * the cache. Even when kmem_cache_free 's been called for every allocated |
---|
| 438 | * chunk, internally, not all of the objects are on the free list. They'll |
---|
| 439 | * get there eventually by the virtue of cache_reap being called from a |
---|
| 440 | * timer routine every REAPTIMEOUT_CPUC (default 2*HZ). If |
---|
| 441 | * kmem_cache_destroy is called before all slabs are moved to the free list |
---|
| 442 | * (no active slabs left), it'll fail, and when kmem_cache_create is called |
---|
| 443 | * again, it'll panic the kernel, and that's what typically happens when GPFS |
---|
| 444 | * restarts. Until we figure out how to do this right, keep calling |
---|
| 445 | * cache_shrink until it tells us that it's safe to call cache_destroy |
---|
| 446 | */ |
---|
| 447 | while (kmem_cache_shrink(KibdCacheP) != 0) |
---|
| 448 | cxiSleep(400); |
---|
| 449 | #endif |
---|
| 450 | |
---|
| 451 | /* Destroy slab allocator for cxiKernelIOBufferDesc_t objects */ |
---|
| 452 | kmem_cache_destroy(KibdCacheP); |
---|
| 453 | |
---|
| 454 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
| 455 | gpfs_destroy_inodecache(); |
---|
| 456 | #endif |
---|
| 457 | EXIT(0); |
---|
| 458 | } |
---|
| 459 | |
---|
| 460 | |
---|
| 461 | /* Create a cxiKernelIOBufferDesc_t object (or list of cxiKernelIOBufferDesc_t |
---|
| 462 | objects) describing an I/O buffer in the user address space of the |
---|
| 463 | calling process and link it onto the list of all such objects. Pins |
---|
| 464 | the user-level buffer. The buffer virtual address must be on a page |
---|
| 465 | boundary. The length can be arbitrarily large, but must be a multiple |
---|
| 466 | of the page size. Returns 0 if successful, non-zero if unsuccessful. |
---|
| 467 | */ |
---|
| 468 | int |
---|
| 469 | cxiKibdPin(char* vaddr, int len, struct cxiKernelIOBufferDesc_t** kibdPP) |
---|
| 470 | { |
---|
| 471 | int nPages; |
---|
| 472 | struct cxiKernelIOBufferDesc_t* headP; |
---|
| 473 | struct cxiKernelIOBufferDesc_t* kibdP; |
---|
| 474 | int rc; |
---|
| 475 | |
---|
| 476 | /* Validate parameters */ |
---|
| 477 | ENTER(0); |
---|
| 478 | TRACE2(TRACE_KSVFS, 5, TRCID_KIBDPIN_ENTER, |
---|
| 479 | "cxiKibdPin: vaddr 0x%lX len 0x%X\n", |
---|
| 480 | vaddr, len); |
---|
| 481 | DBGASSERT(((IntPtr)vaddr & (PAGE_SIZE-1)) == 0); |
---|
| 482 | DBGASSERT((len & (PAGE_SIZE-1)) == 0); |
---|
| 483 | |
---|
| 484 | nPages = len / PAGE_SIZE; |
---|
| 485 | rc = allocKernelIOBufferDesc(vaddr, nPages, &headP); |
---|
| 486 | if (rc != 0) |
---|
| 487 | { |
---|
| 488 | EXIT(0); |
---|
| 489 | return rc; |
---|
| 490 | } |
---|
| 491 | |
---|
| 492 | /* Add this cxiKernelIOBufferDesc_t to the global list before returning */ |
---|
| 493 | TRACE1(TRACE_KSVFS, 12, TRCID_KIBDPIN_EXIT, |
---|
| 494 | "cxiKibdPin exit: returning 0x%lX\n", headP); |
---|
| 495 | |
---|
| 496 | spin_lock(&KibdLock); |
---|
| 497 | headP->gblNextP = KibdGblHeadP; |
---|
| 498 | if (KibdGblHeadP != NULL) |
---|
| 499 | KibdGblHeadP->gblPrevP = headP; |
---|
| 500 | KibdGblHeadP = headP; |
---|
| 501 | spin_unlock(&KibdLock); |
---|
| 502 | |
---|
| 503 | *kibdPP = headP; |
---|
| 504 | EXIT(0); |
---|
| 505 | return 0; |
---|
| 506 | } |
---|
| 507 | |
---|
| 508 | |
---|
| 509 | /* Remove a cxiKernelIOBufferDesc_t object from the list of all |
---|
| 510 | such objects, destroy it and all chained cxiKernelIOBufferDesc_t objects |
---|
| 511 | associated with it, and unpin the associated user-level buffer. */ |
---|
| 512 | void |
---|
| 513 | cxiKibdUnpin(struct cxiKernelIOBufferDesc_t* kibdP) |
---|
| 514 | { |
---|
| 515 | struct cxiKernelIOBufferDesc_t* nextP; |
---|
| 516 | struct cxiKernelIOBufferDesc_t* prevP; |
---|
| 517 | |
---|
| 518 | /* Remove this cxiKernelIOBufferDesc_t from the global list */ |
---|
| 519 | ENTER(0); |
---|
| 520 | spin_lock(&KibdLock); |
---|
| 521 | nextP = kibdP->gblNextP; |
---|
| 522 | prevP = kibdP->gblPrevP; |
---|
| 523 | if (nextP != NULL) |
---|
| 524 | nextP->gblPrevP = prevP; |
---|
| 525 | if (prevP != NULL) |
---|
| 526 | prevP->gblNextP = nextP; |
---|
| 527 | else |
---|
| 528 | KibdGblHeadP = nextP; |
---|
| 529 | spin_unlock(&KibdLock); |
---|
| 530 | |
---|
| 531 | /* Free the cxiKernelIOBufferDesc_t */ |
---|
| 532 | deallocKernelIOBufferDesc(kibdP); |
---|
| 533 | EXIT(0); |
---|
| 534 | } |
---|
| 535 | |
---|
| 536 | |
---|
| 537 | /* Free all cxiKernelIOBufferDesc_t's, and unpin their underlying storage. */ |
---|
| 538 | void |
---|
| 539 | cxiKibdUnpinAll() |
---|
| 540 | { |
---|
| 541 | struct cxiKernelIOBufferDesc_t* nextP; |
---|
| 542 | struct cxiKernelIOBufferDesc_t* kibdP; |
---|
| 543 | |
---|
| 544 | ENTER(0); |
---|
| 545 | TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_UNPIN_ALL_ENTER, |
---|
| 546 | "cxiKibdUnpinAll entry\n"); |
---|
| 547 | for (;;) |
---|
| 548 | { |
---|
| 549 | /* Remove first cxiKernelIOBufferDesc_t on global list */ |
---|
| 550 | spin_lock(&KibdLock); |
---|
| 551 | kibdP = KibdGblHeadP; |
---|
| 552 | if (kibdP == NULL) |
---|
| 553 | { |
---|
| 554 | spin_unlock(&KibdLock); |
---|
| 555 | break; |
---|
| 556 | } |
---|
| 557 | nextP = kibdP->gblNextP; |
---|
| 558 | if (nextP != NULL) |
---|
| 559 | nextP->gblPrevP = NULL; |
---|
| 560 | KibdGblHeadP = nextP; |
---|
| 561 | spin_unlock(&KibdLock); |
---|
| 562 | |
---|
| 563 | /* Deallocate the cxiKernelIOBufferDesc_t and unpin its storage */ |
---|
| 564 | deallocKernelIOBufferDesc(kibdP); |
---|
| 565 | } |
---|
| 566 | TRACE0(TRACE_KSVFS, 1, TRCID_KIBD_UNPIN_ALL_EXIT, |
---|
| 567 | "cxiKibdUnpinAll exit\n"); |
---|
| 568 | EXIT(0); |
---|
| 569 | } |
---|
| 570 | |
---|
| 571 | |
---|
| 572 | #ifdef MMAP_DIO |
---|
| 573 | /* Create a cxiKernelIOBufferDesc_t object for a page in user address space |
---|
| 574 | that is already pinned. The page will be mapped into kernel address |
---|
| 575 | space. This is used by mmap routines that want to do direct I/O from |
---|
| 576 | user page to disk. The cxiKernelIOBufferDesc_t that this routine |
---|
| 577 | creates can be passed to cxiKDoIO just like one that was created by |
---|
| 578 | cxiKibdPin. */ |
---|
| 579 | int |
---|
| 580 | cxiKibdPinmm(struct page *pageP, struct cxiKernelIOBufferDesc_t** kibdPP) |
---|
| 581 | { |
---|
| 582 | struct cxiKernelIOBufferDesc_t* kibdP; |
---|
| 583 | |
---|
| 584 | ENTER(0); |
---|
| 585 | kibdP = kibdAlloc(); |
---|
| 586 | if (kibdP == NULL) |
---|
| 587 | { |
---|
| 588 | EXIT(0); |
---|
| 589 | return -ENOMEM; |
---|
| 590 | } |
---|
| 591 | |
---|
| 592 | kibdP->kibdVaddr = kmap(pageP); |
---|
| 593 | kibdP->maplist[0] = (char *)pageP; |
---|
| 594 | kibdP->kibdPages = 1; |
---|
| 595 | kibdP->kibdTotalPages = 1; |
---|
| 596 | |
---|
| 597 | *kibdPP = kibdP; |
---|
| 598 | EXIT(0); |
---|
| 599 | return 0; |
---|
| 600 | } |
---|
| 601 | |
---|
| 602 | |
---|
| 603 | /* Free a cxiKernelIOBufferDesc_t that was created by cxiKibdPinmm. */ |
---|
| 604 | void |
---|
| 605 | cxiKibdUnpinmm(struct page *pageP, struct cxiKernelIOBufferDesc_t* kibdP) |
---|
| 606 | { |
---|
| 607 | ENTER(0); |
---|
| 608 | kunmap(pageP); |
---|
| 609 | kibdFree(kibdP); |
---|
| 610 | EXIT(0); |
---|
| 611 | } |
---|
| 612 | #endif /* MMAP_DIO */ |
---|
| 613 | |
---|
| 614 | |
---|
| 615 | /* Attach an I/O buffer to the kernel's virtual address space. The |
---|
| 616 | cxiIOBufferAttachment_t returned in *attachP must be used as a parameter of |
---|
| 617 | most of the other operations on cxiIOBuffer_t's. */ |
---|
| 618 | void |
---|
| 619 | cxiAttachIOBuffer(struct cxiIOBuffer_t* iobP, |
---|
| 620 | struct cxiIOBufferAttachment_t* attachP) |
---|
| 621 | { |
---|
| 622 | int oldPinCount; |
---|
| 623 | int newPinCount; |
---|
| 624 | int rc; |
---|
| 625 | |
---|
| 626 | /* Increase the pin count on this I/O buffer. If the buffer is not already |
---|
| 627 | pinned, call the pinBuffer callback routine to arrange for the buffer |
---|
| 628 | to be pinned, then try again. */ |
---|
| 629 | ENTER(0); |
---|
| 630 | TRACE1(TRACE_KSVFS, 5, TRCID_ATTACH_ENTER, |
---|
| 631 | "cxiAttachIOBuffer: dataPtr 0x%lX\n", OffsetToDataPtr(iobP,0,0)); |
---|
| 632 | for (;;) |
---|
| 633 | { |
---|
| 634 | oldPinCount = iobP->pinCount; |
---|
| 635 | DBGASSERT(oldPinCount > 0); |
---|
| 636 | if (oldPinCount == 0) |
---|
| 637 | { |
---|
| 638 | DBGASSERT(oldPinCount > 0); |
---|
| 639 | break; |
---|
| 640 | // rc = xxx->pinBufferCallback(iobP); |
---|
| 641 | // if (rc != 0) |
---|
| 642 | // return rc; |
---|
| 643 | } |
---|
| 644 | else |
---|
| 645 | { |
---|
| 646 | newPinCount = oldPinCount+1; |
---|
| 647 | rc = compare_and_swap((atomic_p)&iobP->pinCount, &oldPinCount, |
---|
| 648 | newPinCount); |
---|
| 649 | if (rc == 1) |
---|
| 650 | break; |
---|
| 651 | } |
---|
| 652 | } |
---|
| 653 | |
---|
| 654 | /* Once the pin of the buffer succeeds, it must have a |
---|
| 655 | * cxiKernelIOBufferDesc_t. Use that as the attachment data. |
---|
| 656 | */ |
---|
| 657 | DBGASSERT(iobP->kernelIOBufferDescP != NULL); |
---|
| 658 | attachP->kDescP = iobP->kernelIOBufferDescP; |
---|
| 659 | TRACE2(TRACE_KSVFS, 11, TRCID_ATTACH_KIBD, |
---|
| 660 | "cxiAttachIOBuffer: kernelIOBufferDescP 0x%lX newPinCount %d\n", |
---|
| 661 | iobP->kernelIOBufferDescP, newPinCount); |
---|
| 662 | EXIT(0); |
---|
| 663 | } |
---|
| 664 | |
---|
| 665 | |
---|
| 666 | /* Detach a buffer from the kernel's virtual address space. */ |
---|
| 667 | void |
---|
| 668 | cxiDetachIOBuffer(struct cxiIOBuffer_t* iobP, |
---|
| 669 | struct cxiIOBufferAttachment_t* attachP) |
---|
| 670 | { |
---|
| 671 | /* Validate attachment data */ |
---|
| 672 | ENTER(0); |
---|
| 673 | TRACE3(TRACE_KSVFS, 5, TRCID_DETACH_KIBD, |
---|
| 674 | "cxiDetachIOBuffer: dataPtr 0x%lX kDescP 0x%lX oldPinCount %d\n", |
---|
| 675 | OffsetToDataPtr(iobP,0,0), attachP->kDescP, iobP->pinCount); |
---|
| 676 | if (attachP->kDescP == NULL) |
---|
| 677 | { |
---|
| 678 | EXIT(0); |
---|
| 679 | return; |
---|
| 680 | } |
---|
| 681 | DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP); |
---|
| 682 | |
---|
| 683 | /* Decrement I/O buffer pin count */ |
---|
| 684 | DBGASSERT(iobP->pinCount >= 2); |
---|
| 685 | ATOMIC_ADD(&iobP->pinCount, -1); |
---|
| 686 | |
---|
| 687 | /* Invalidate attachment data */ |
---|
| 688 | attachP->kDescP = NULL; |
---|
| 689 | EXIT(0); |
---|
| 690 | } |
---|
| 691 | |
---|
| 692 | |
---|
| 693 | /* Transfer len bytes beginning at offset bufOffset within I/O buffer *iobP |
---|
| 694 | to or from a user buffer. The direction of the transfer is given with |
---|
| 695 | respect to the I/O buffer. Returns EOK if successful, other error |
---|
| 696 | codes if unsuccessful. */ |
---|
| 697 | int |
---|
| 698 | cxiUXfer(struct cxiIOBuffer_t* iobP, Boolean toIOBuffer, |
---|
| 699 | const struct cxiIOBufferAttachment_t* attachP, |
---|
| 700 | void* vkopP, int bufOffset, int len, struct cxiUio_t* uioP) |
---|
| 701 | { |
---|
| 702 | int pageIndex; |
---|
| 703 | struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP; |
---|
| 704 | int pageOffset; |
---|
| 705 | struct page * pageP; |
---|
| 706 | int pageLen; |
---|
| 707 | unsigned long kaddr; |
---|
| 708 | int rc = 0; |
---|
| 709 | |
---|
| 710 | ENTER(0); |
---|
| 711 | /* Validate parameters */ |
---|
| 712 | TRACE5(TRACE_KSVFS, 5, TRCID_UXFER_LINUX, |
---|
| 713 | "cxiUXfer: dataPtr 0x%lX kBuf 0x%lX toIOBuf %d offset %d len %d\n", |
---|
| 714 | OffsetToDataPtr(iobP,0,0), kibdP, toIOBuffer, bufOffset, len); |
---|
| 715 | |
---|
| 716 | DBGASSERT(bufOffset >= 0); |
---|
| 717 | DBGASSERT(bufOffset+len <= iobP->ioBufLen); |
---|
| 718 | DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP); |
---|
| 719 | DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0)); |
---|
| 720 | DBGASSERT(iobP->ioBufLen/PAGE_SIZE <= kibdP->kibdTotalPages); |
---|
| 721 | DBGASSERT(iobP->pinCount >= 2); |
---|
| 722 | |
---|
| 723 | /* Transfer data in or out of as many cxiKernelIOBufferDesc_t's as necessary |
---|
| 724 | to satisfy the data move request */ |
---|
| 725 | pageIndex = bufOffset / PAGE_SIZE; |
---|
| 726 | pageOffset = bufOffset % PAGE_SIZE; |
---|
| 727 | pageLen = PAGE_SIZE - pageOffset; |
---|
| 728 | for (;;) |
---|
| 729 | { |
---|
| 730 | /* Calculate how many bytes to move in or out of the current page of the |
---|
| 731 | I/O buffer */ |
---|
| 732 | if (len < pageLen) |
---|
| 733 | pageLen = len; |
---|
| 734 | |
---|
| 735 | KIBD_GET_PAGE(kibdP, pageIndex, pageP); |
---|
| 736 | DBGASSERT(pageP != NULL); |
---|
| 737 | |
---|
| 738 | /* Map current I/O buffer page into the kernel's address space |
---|
| 739 | temporarily, then copy data in or out of the page */ |
---|
| 740 | kaddr = (unsigned long)kmap(pageP); |
---|
| 741 | TRACE4(TRACE_KSVFS, 12, TRCID_UXFER_UIOMOVE, |
---|
| 742 | "cxiUXfer: uiomove pageIndex %d kaddr 0x%lX pageOffset %d " |
---|
| 743 | "pageLen %d\n", pageIndex, kaddr, pageOffset, pageLen); |
---|
| 744 | |
---|
| 745 | rc = cxiUiomove((char *)(kaddr + pageOffset), pageLen, toIOBuffer, uioP); |
---|
| 746 | kunmap(pageP); |
---|
| 747 | |
---|
| 748 | /* Leave loop if an error occurred on the move */ |
---|
| 749 | if (rc != 0) |
---|
| 750 | break; |
---|
| 751 | |
---|
| 752 | /* Update length left to copy and test for loop termination */ |
---|
| 753 | len -= pageLen; |
---|
| 754 | if (len <= 0) |
---|
| 755 | break; |
---|
| 756 | |
---|
| 757 | /* Set up for next iteration. If the page just copied is the last |
---|
| 758 | page of this cxiKernelIOBufferDesc_t, advance to the next one. */ |
---|
| 759 | pageOffset = 0; |
---|
| 760 | pageLen = PAGE_SIZE; |
---|
| 761 | pageIndex += 1; |
---|
| 762 | } /* end of do forever */ |
---|
| 763 | |
---|
| 764 | EXIT(0); |
---|
| 765 | return rc; |
---|
| 766 | } |
---|
| 767 | |
---|
| 768 | |
---|
| 769 | /* Perform cross-memory transfer of len bytes from user memory in current |
---|
| 770 | task to memory in specified address space. If toXmem is true then |
---|
| 771 | copy is from userAddrP to udataP/xmemP, otherwise the opposite. */ |
---|
| 772 | int |
---|
| 773 | cxiXmemXfer(char *userAddrP, int len, char *udataP, cxiXmem_t *xmemP, |
---|
| 774 | Boolean toXmem) |
---|
| 775 | { |
---|
| 776 | int rc = 0; |
---|
| 777 | int bufOffset, pageIndex, pageOffset, pageLen; |
---|
| 778 | void *kaddrP; |
---|
| 779 | struct page *pageP; |
---|
| 780 | struct cxiKernelIOBufferDesc_t *kibdP = xmemP->kibdP; |
---|
| 781 | |
---|
| 782 | ENTER(0); |
---|
| 783 | TRACE5(TRACE_KSVFS, 5, TRCID_XMEMXFER_LINUX, |
---|
| 784 | "cxiXmemXfer: userAddrP 0x%lX len %d udataP 0x%lX " |
---|
| 785 | "kibdP 0x%lX toXmem %d\n", userAddrP, len, udataP, kibdP, toXmem); |
---|
| 786 | |
---|
| 787 | bufOffset = udataP - kibdP->kibdVaddr; |
---|
| 788 | DBGASSERT(bufOffset >= 0); |
---|
| 789 | DBGASSERT(bufOffset + len <= kibdP->kibdTotalPages * PAGE_SIZE); |
---|
| 790 | |
---|
| 791 | /* Transfer data in or out of as many cxiKernelIOBufferDesc_t's as necessary |
---|
| 792 | to satisfy the data move request */ |
---|
| 793 | pageIndex = bufOffset / PAGE_SIZE; |
---|
| 794 | pageOffset = bufOffset % PAGE_SIZE; |
---|
| 795 | pageLen = PAGE_SIZE - pageOffset; |
---|
| 796 | for (;;) |
---|
| 797 | { |
---|
| 798 | /* Calculate how many bytes to move in or out of the current page of the |
---|
| 799 | I/O buffer */ |
---|
| 800 | if (len < pageLen) |
---|
| 801 | pageLen = len; |
---|
| 802 | |
---|
| 803 | KIBD_GET_PAGE(kibdP, pageIndex, pageP); |
---|
| 804 | DBGASSERT(pageP != NULL); |
---|
| 805 | |
---|
| 806 | /* Map current I/O buffer page into the kernel's address space |
---|
| 807 | temporarily, then copy data in or out of the page */ |
---|
| 808 | kaddrP = kmap(pageP); |
---|
| 809 | TRACE4(TRACE_KSVFS, 12, TRCID_XMEMFER_COPY, |
---|
| 810 | "cxiXmemXfer: copy pageIndex %d kaddrP 0x%lX pageOffset %d " |
---|
| 811 | "pageLen %d\n", pageIndex, kaddrP, pageOffset, pageLen); |
---|
| 812 | |
---|
| 813 | if (toXmem) |
---|
| 814 | rc = cxiCopyIn(userAddrP, (char *)kaddrP + pageOffset, pageLen); |
---|
| 815 | else |
---|
| 816 | rc = cxiCopyOut((char *)kaddrP + pageOffset, userAddrP, pageLen); |
---|
| 817 | |
---|
| 818 | kunmap(pageP); |
---|
| 819 | |
---|
| 820 | /* Leave loop if an error occurred on the move */ |
---|
| 821 | if (rc != 0) |
---|
| 822 | break; |
---|
| 823 | |
---|
| 824 | /* Update length left to copy and test for loop termination */ |
---|
| 825 | len -= pageLen; |
---|
| 826 | if (len <= 0) |
---|
| 827 | break; |
---|
| 828 | |
---|
| 829 | /* Set up for next iteration. If the page just copied is the last |
---|
| 830 | page of this cxiKernelIOBufferDesc_t, advance to the next one. */ |
---|
| 831 | userAddrP += pageLen; |
---|
| 832 | pageOffset = 0; |
---|
| 833 | pageLen = PAGE_SIZE; |
---|
| 834 | pageIndex += 1; |
---|
| 835 | } /* end of do forever */ |
---|
| 836 | |
---|
| 837 | EXIT(0); |
---|
| 838 | return rc; |
---|
| 839 | } |
---|
| 840 | |
---|
| 841 | |
---|
| 842 | /* Transfer len bytes beginning at offset bufOffset within I/O buffer *iobP |
---|
| 843 | to or from a contiguous kernel buffer. The direction of the transfer |
---|
| 844 | is given with respect to the I/O buffer. Returns EOK if successful, |
---|
| 845 | other error codes if unsuccessful. */ |
---|
| 846 | int |
---|
| 847 | cxiKXfer(struct cxiIOBuffer_t* iobP, Boolean toIOBuffer, |
---|
| 848 | const struct cxiIOBufferAttachment_t* attachP, |
---|
| 849 | int bufOffset, int len, char* kBufP) |
---|
| 850 | { |
---|
| 851 | int pageIndex; |
---|
| 852 | struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP; |
---|
| 853 | int pageOffset; |
---|
| 854 | struct page * pageP; |
---|
| 855 | int pageLen; |
---|
| 856 | unsigned long kaddr; |
---|
| 857 | |
---|
| 858 | /* Validate parameters */ |
---|
| 859 | ENTER(0); |
---|
| 860 | TRACE6(TRACE_KSVFS, 5, TRCID_KXFER_LINUX, |
---|
| 861 | "cxiKXfer: dataPtr 0x%lX kBuf 0x%lX toIOBuf %d offset %d len %d " |
---|
| 862 | "kBufP 0x%lX\n", OffsetToDataPtr(iobP,0,0), kibdP, |
---|
| 863 | toIOBuffer, bufOffset, len, kBufP); |
---|
| 864 | |
---|
| 865 | DBGASSERT(bufOffset >= 0); |
---|
| 866 | DBGASSERT(bufOffset+len <= iobP->ioBufLen); |
---|
| 867 | DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP); |
---|
| 868 | DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0)); |
---|
| 869 | DBGASSERT(iobP->ioBufLen/PAGE_SIZE <= kibdP->kibdTotalPages); |
---|
| 870 | DBGASSERT(iobP->pinCount >= 2); |
---|
| 871 | |
---|
| 872 | /* Transfer data in or out of as many cxiKernelIOBufferDesc_t's as necessary |
---|
| 873 | to satisfy the data move request */ |
---|
| 874 | pageIndex = bufOffset / PAGE_SIZE; |
---|
| 875 | pageOffset = bufOffset % PAGE_SIZE; |
---|
| 876 | pageLen = PAGE_SIZE - pageOffset; |
---|
| 877 | for (;;) |
---|
| 878 | { |
---|
| 879 | /* Calculate how many bytes to move in or out of the current page of the |
---|
| 880 | I/O buffer */ |
---|
| 881 | if (len < pageLen) |
---|
| 882 | pageLen = len; |
---|
| 883 | |
---|
| 884 | KIBD_GET_PAGE(kibdP, pageIndex, pageP); |
---|
| 885 | DBGASSERT(pageP != NULL); |
---|
| 886 | |
---|
| 887 | /* Map current I/O buffer page into the kernel's address space |
---|
| 888 | temporarily, then copy data in or out of the page */ |
---|
| 889 | kaddr = (unsigned long)kmap(pageP); |
---|
| 890 | TRACE5(TRACE_KSVFS, 12, TRCID_KXFER_MEMCPY, |
---|
| 891 | "cxiKXfer: move kibdP 0x%lX pageIndex %d kaddr 0x%lX " |
---|
| 892 | "pageOffset %d pageLen %d\n", |
---|
| 893 | kibdP, pageIndex, kaddr, pageOffset, pageLen); |
---|
| 894 | |
---|
| 895 | if (toIOBuffer) |
---|
| 896 | memcpy((void *)(kaddr + pageOffset), kBufP, pageLen); |
---|
| 897 | else |
---|
| 898 | memcpy(kBufP, (void *)(kaddr + pageOffset), pageLen); |
---|
| 899 | kunmap(pageP); |
---|
| 900 | |
---|
| 901 | /* Update length left to copy and test for loop termination */ |
---|
| 902 | len -= pageLen; |
---|
| 903 | if (len <= 0) |
---|
| 904 | break; |
---|
| 905 | |
---|
| 906 | /* Set up for next iteration. If the page just copied is the last |
---|
| 907 | page of this cxiKernelIOBufferDesc_t, advance to the next one. */ |
---|
| 908 | kBufP += pageLen; |
---|
| 909 | pageOffset = 0; |
---|
| 910 | pageLen = PAGE_SIZE; |
---|
| 911 | pageIndex += 1; |
---|
| 912 | } /* end of do forever */ |
---|
| 913 | |
---|
| 914 | EXIT(0); |
---|
| 915 | return 0; |
---|
| 916 | } |
---|
| 917 | |
---|
| 918 | |
---|
| 919 | /* Set len bytes beginning at offset bufOffset within I/O buffer *iobP |
---|
| 920 | to zero. Returns EOK if successful, other error codes if unsuccessful. */ |
---|
| 921 | int |
---|
| 922 | cxiKZero(struct cxiIOBuffer_t* iobP, |
---|
| 923 | const struct cxiIOBufferAttachment_t* attachP, |
---|
| 924 | int bufOffset, int len) |
---|
| 925 | { |
---|
| 926 | int pageIndex; |
---|
| 927 | struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP; |
---|
| 928 | int pageOffset; |
---|
| 929 | struct page * pageP; |
---|
| 930 | int pageLen; |
---|
| 931 | unsigned long kaddr; |
---|
| 932 | |
---|
| 933 | /* Validate parameters */ |
---|
| 934 | ENTER(0); |
---|
| 935 | TRACE4(TRACE_KSVFS, 5, TRCID_KZERO_LINUX, |
---|
| 936 | "cxiKZero: dataPtr 0x%lX kBuf 0x%lX offset %d len %d\n", |
---|
| 937 | OffsetToDataPtr(iobP,0,0), kibdP, bufOffset, len); |
---|
| 938 | |
---|
| 939 | DBGASSERT(bufOffset >= 0); |
---|
| 940 | DBGASSERT(bufOffset+len <= iobP->ioBufLen); |
---|
| 941 | DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP); |
---|
| 942 | DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0)); |
---|
| 943 | DBGASSERT(iobP->ioBufLen/PAGE_SIZE <= kibdP->kibdTotalPages); |
---|
| 944 | DBGASSERT(iobP->pinCount >= 2); |
---|
| 945 | |
---|
| 946 | /* Zero data in as many cxiKernelIOBufferDesc_t's as necessary to complete |
---|
| 947 | the request */ |
---|
| 948 | pageIndex = bufOffset / PAGE_SIZE; |
---|
| 949 | pageOffset = bufOffset % PAGE_SIZE; |
---|
| 950 | pageLen = PAGE_SIZE - pageOffset; |
---|
| 951 | for (;;) |
---|
| 952 | { |
---|
| 953 | /* Calculate how many bytes to zero in the current page of the I/O |
---|
| 954 | buffer */ |
---|
| 955 | if (len < pageLen) |
---|
| 956 | pageLen = len; |
---|
| 957 | |
---|
| 958 | KIBD_GET_PAGE(kibdP, pageIndex, pageP); |
---|
| 959 | DBGASSERT(pageP != NULL); |
---|
| 960 | |
---|
| 961 | /* Map current I/O buffer page into the kernel's address space |
---|
| 962 | temporarily, then zero data in the page */ |
---|
| 963 | kaddr = (unsigned long)kmap(pageP); |
---|
| 964 | TRACE4(TRACE_KSVFS, 12, TRCID_KZERO_MEMSET, |
---|
| 965 | "cxiKZero: zero pageIndex %d kaddr 0x%lX pageOffset %d pageLen %d\n", |
---|
| 966 | pageIndex, kaddr, pageOffset, pageLen); |
---|
| 967 | memset((void *)(kaddr + pageOffset), 0, pageLen); |
---|
| 968 | kunmap(pageP); |
---|
| 969 | |
---|
| 970 | /* Update length left to zero and test for loop termination */ |
---|
| 971 | len -= pageLen; |
---|
| 972 | if (len <= 0) |
---|
| 973 | break; |
---|
| 974 | |
---|
| 975 | /* Set up for next iteration. If the page just zeroed is the last |
---|
| 976 | page of this cxiKernelIOBufferDesc_t, advance to the next one. */ |
---|
| 977 | pageOffset = 0; |
---|
| 978 | pageLen = PAGE_SIZE; |
---|
| 979 | pageIndex += 1; |
---|
| 980 | } /* end of do forever */ |
---|
| 981 | |
---|
| 982 | EXIT(0); |
---|
| 983 | return 0; |
---|
| 984 | } |
---|
| 985 | |
---|
| 986 | |
---|
| 987 | /* Map an I/O buffer so it can be read and written from kernel code |
---|
| 988 | running in the context of a user thread. Depending on the platform, the |
---|
| 989 | addresses at which the I/O buffer gets mapped may not be contiguous. The |
---|
| 990 | details of how the buffer got mapped are handled by the |
---|
| 991 | cxiDiscontiguousDirectoryBuffer_t object that is filled in by this call. |
---|
| 992 | On some platforms, mapping buffers using this call consumes scarce |
---|
| 993 | resources, so all cxiMapDiscontiguousRW calls should be promptly matched by |
---|
| 994 | cxiUnmapDiscontiguousRW calls as soon as the operation that required access |
---|
| 995 | to the I/O buffer completes. Returns 0 if successful, other error codes |
---|
| 996 | if unsuccessful. */ |
---|
| 997 | int |
---|
| 998 | cxiMapDiscontiguousRW(struct cxiIOBuffer_t* iobP, |
---|
| 999 | const struct cxiIOBufferAttachment_t* attachP, |
---|
| 1000 | struct cxiDiscontiguousDirectoryBuffer_t* discontigP) |
---|
| 1001 | { |
---|
| 1002 | /* ?? WARNING: Since this must kmap multiple pages, there is the |
---|
| 1003 | possibility of deadlock if multiple threads are part of the way through |
---|
| 1004 | executing this code, and LAST_PKMAP pages (512 or 1024) have already |
---|
| 1005 | been kmapped. There needs to be flow control whereby threads reserve |
---|
| 1006 | enough pages to complete all of their kmaps before they begin acquiring |
---|
| 1007 | pages. */ |
---|
| 1008 | struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP; |
---|
| 1009 | int pageIndex; |
---|
| 1010 | int dirIndex; |
---|
| 1011 | int mapPages; |
---|
| 1012 | struct page * pageP; |
---|
| 1013 | unsigned long kaddr; |
---|
| 1014 | |
---|
| 1015 | /* __CXI_BUFFERS_ARE_CONTIGUOUS is not #defined */ |
---|
| 1016 | |
---|
| 1017 | /* Validate parameters */ |
---|
| 1018 | ENTER(0); |
---|
| 1019 | TRACE3(TRACE_KSVFS, 4, TRCID_MAP_DISCONTIG_ENTER, |
---|
| 1020 | "cxiMapDiscontiguousRW: dataPtr 0x%lX kBufP 0x%lX ioBufLen 0x%X\n", |
---|
| 1021 | OffsetToDataPtr(iobP,0,0), kibdP, iobP->ioBufLen); |
---|
| 1022 | |
---|
| 1023 | DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP); |
---|
| 1024 | DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0)); |
---|
| 1025 | DBGASSERT(iobP->pinCount >= 2); |
---|
| 1026 | |
---|
| 1027 | /* The mappable buffer memory may be longer than a directory block */ |
---|
| 1028 | mapPages = (iobP->ioBufLen + DISCONTIG_PAGE_SIZE - 1) / DISCONTIG_PAGE_SIZE; |
---|
| 1029 | mapPages = MIN(mapPages, MAX_PAGES_PER_DIRBLOCK); |
---|
| 1030 | |
---|
| 1031 | pageIndex = 0; |
---|
| 1032 | for (dirIndex=0 ; dirIndex<mapPages ; dirIndex++) |
---|
| 1033 | { |
---|
| 1034 | KIBD_GET_PAGE(kibdP, pageIndex, pageP); |
---|
| 1035 | if (pageP == NULL) |
---|
| 1036 | break; |
---|
| 1037 | |
---|
| 1038 | kaddr = (unsigned long)kmap(pageP); |
---|
| 1039 | TRACE4(TRACE_KSVFS, 12, TRCID_MAP_DISCONTIG_KMAP, |
---|
| 1040 | "cxiMapDiscontiguousRW: dirIndex %d kibdP 0x%lX pageP 0x%lX " |
---|
| 1041 | "kaddr 0x%lX\n", dirIndex, kibdP, pageP, kaddr); |
---|
| 1042 | |
---|
| 1043 | DBGASSERT(dirIndex < MAX_PAGES_PER_DIRBLOCK); |
---|
| 1044 | discontigP->userPagePointerArray[dirIndex] = (char*)kaddr; |
---|
| 1045 | discontigP->osPagePointerArray[dirIndex] = (void*)pageP; |
---|
| 1046 | |
---|
| 1047 | pageIndex++; |
---|
| 1048 | } |
---|
| 1049 | |
---|
| 1050 | discontigP->mappedLen = dirIndex * DISCONTIG_PAGE_SIZE; |
---|
| 1051 | EXIT(0); |
---|
| 1052 | return 0; |
---|
| 1053 | } |
---|
| 1054 | |
---|
| 1055 | |
---|
| 1056 | /* Unmap an I/O buffer previously mapped */ |
---|
| 1057 | void |
---|
| 1058 | cxiUnmapDiscontiguousRW(struct cxiIOBuffer_t* iobP, |
---|
| 1059 | struct cxiDiscontiguousDirectoryBuffer_t* discontigP) |
---|
| 1060 | { |
---|
| 1061 | int pageIndex; |
---|
| 1062 | struct page * pageP; |
---|
| 1063 | int mappedPages; |
---|
| 1064 | |
---|
| 1065 | ENTER(0); |
---|
| 1066 | TRACE4(TRACE_KSVFS, 4, TRCID_UNMAP_DISCONTIG_ENTER, |
---|
| 1067 | "cxiUnmapDiscontiguousRW: dataPtr 0x%lX kBufP 0x%lX ioBufLen 0x%X " |
---|
| 1068 | "mappedLen %d\n", OffsetToDataPtr(iobP,0,0), iobP->kernelIOBufferDescP, |
---|
| 1069 | iobP->ioBufLen, discontigP->mappedLen); |
---|
| 1070 | |
---|
| 1071 | /* Unmap all pages in discontiguous map. If the osPagePointerArray entry |
---|
| 1072 | * is NULL, it means that the last mapping was made via MapContiguousBuffer, |
---|
| 1073 | * which did not do any kmaps that need to be kunmap'ped. |
---|
| 1074 | */ |
---|
| 1075 | mappedPages = (discontigP->mappedLen + DISCONTIG_PAGE_SIZE - 1) / |
---|
| 1076 | DISCONTIG_PAGE_SIZE; |
---|
| 1077 | |
---|
| 1078 | for (pageIndex = 0; pageIndex < mappedPages; pageIndex++) |
---|
| 1079 | { |
---|
| 1080 | pageP = (struct page *)discontigP->osPagePointerArray[pageIndex]; |
---|
| 1081 | TRACE3(TRACE_KSVFS, 12, TRCID_UNMAP_DISCONTIG_KUNMAP, |
---|
| 1082 | "cxiUnmapDiscontiguousRW: unmap pageIndex %d pageP 0x%lX " |
---|
| 1083 | "kaddr 0x%lX\n", pageIndex, pageP, |
---|
| 1084 | discontigP->userPagePointerArray[pageIndex]); |
---|
| 1085 | |
---|
| 1086 | if (pageP != NULL) |
---|
| 1087 | { |
---|
| 1088 | kunmap(pageP); |
---|
| 1089 | discontigP->osPagePointerArray[pageIndex] = NULL; |
---|
| 1090 | } |
---|
| 1091 | discontigP->userPagePointerArray[pageIndex] = NULL; |
---|
| 1092 | } |
---|
| 1093 | discontigP->mappedLen = 0; |
---|
| 1094 | EXIT(0); |
---|
| 1095 | } |
---|
| 1096 | |
---|
| 1097 | /* Return an address in kernel memory that holds a contigous read-only |
---|
| 1098 | copy of a portion of an I/O buffer. If possible, this will be a |
---|
| 1099 | mapping of the I/O buffer. If necessary, this routine will allocate a |
---|
| 1100 | new block of kernel memory and copy the requested data to it. The |
---|
| 1101 | returned cxiContiguousBuffer_t encapsulates what method was used, so |
---|
| 1102 | that cxiUnmapContiguousRO can release whatever resources were obtained by |
---|
| 1103 | this call. Returns 0 if successful, other error codes if |
---|
| 1104 | unsuccessful. */ |
---|
| 1105 | int |
---|
| 1106 | cxiMapContiguousRO(struct cxiIOBuffer_t* iobP, |
---|
| 1107 | const struct cxiIOBufferAttachment_t* attachP, |
---|
| 1108 | int bufOffset, int len, const char** contigBasePP, |
---|
| 1109 | struct cxiContiguousBuffer_t* contigP) |
---|
| 1110 | { |
---|
| 1111 | int pageIndex; |
---|
| 1112 | int pageOffset; |
---|
| 1113 | int endPageIndex; |
---|
| 1114 | struct cxiKernelIOBufferDesc_t* kibdP = iobP->kernelIOBufferDescP; |
---|
| 1115 | struct page * pageP; |
---|
| 1116 | unsigned long kaddr; |
---|
| 1117 | char* tempBufP; |
---|
| 1118 | Boolean usedKmalloc; |
---|
| 1119 | int rc; |
---|
| 1120 | |
---|
| 1121 | /* Validate parameters */ |
---|
| 1122 | ENTER(0); |
---|
| 1123 | TRACE4(TRACE_KSVFS, 4, TRCID_MAP_CONTIG_ENTER, |
---|
| 1124 | "cxiMapContiguousRO: dataPtr 0x%lX kBufP 0x%lX bufOffset %d len %d\n", |
---|
| 1125 | OffsetToDataPtr(iobP,0,0), kibdP, bufOffset, len); |
---|
| 1126 | |
---|
| 1127 | DBGASSERT(bufOffset >= 0); |
---|
| 1128 | DBGASSERT(bufOffset+len <= iobP->ioBufLen); |
---|
| 1129 | DBGASSERT(attachP->kDescP == iobP->kernelIOBufferDescP); |
---|
| 1130 | DBGASSERT(kibdP->kibdVaddr == OffsetToDataPtr(iobP,0,0)); |
---|
| 1131 | DBGASSERT(iobP->ioBufLen/PAGE_SIZE <= kibdP->kibdTotalPages); |
---|
| 1132 | DBGASSERT(iobP->pinCount >= 2); |
---|
| 1133 | |
---|
| 1134 | /* If the requested piece of the I/O buffer does not cross a page boundary, |
---|
| 1135 | then map the page and return the mapped address within the page */ |
---|
| 1136 | pageIndex = bufOffset / PAGE_SIZE; |
---|
| 1137 | pageOffset = bufOffset % PAGE_SIZE; |
---|
| 1138 | endPageIndex = (bufOffset+len-1) / PAGE_SIZE; |
---|
| 1139 | if (pageIndex == endPageIndex) |
---|
| 1140 | { |
---|
| 1141 | KIBD_GET_PAGE(kibdP, pageIndex, pageP); |
---|
| 1142 | DBGASSERT(pageP != NULL); |
---|
| 1143 | |
---|
| 1144 | /* Map I/O buffer page into the kernel's address space */ |
---|
| 1145 | kaddr = (unsigned long)kmap(pageP); |
---|
| 1146 | |
---|
| 1147 | /* Return address within the mapped page, and set map state so |
---|
| 1148 | cxiUnmapContiguousRO knows to do kunmap */ |
---|
| 1149 | *contigBasePP = (char*) (kaddr+pageOffset); |
---|
| 1150 | contigP->mallocedBaseP = NULL; |
---|
| 1151 | contigP->usedKmalloc = false; |
---|
| 1152 | contigP->pageP = pageP; |
---|
| 1153 | TRACE2(TRACE_KSVFS, 5, TRCID_MAP_CONTIG_KMAP, |
---|
| 1154 | "cxiMapContiguousRO: mapped pageP 0x%lX at 0x%lX\n", |
---|
| 1155 | pageP, *contigBasePP); |
---|
| 1156 | EXIT(0); |
---|
| 1157 | return 0; |
---|
| 1158 | } |
---|
| 1159 | |
---|
| 1160 | /* Otherwise, the requested part of the I/O buffer spans page boundaries. |
---|
| 1161 | Allocate a contiguous buffer, and copy data from the I/O buffer to the |
---|
| 1162 | temporary buffer. */ |
---|
| 1163 | else |
---|
| 1164 | { |
---|
| 1165 | if (len <= PAGE_SIZE) |
---|
| 1166 | { |
---|
| 1167 | tempBufP = (char *)kmalloc(len, GFP_KERNEL); |
---|
| 1168 | usedKmalloc = true; |
---|
| 1169 | } |
---|
| 1170 | else |
---|
| 1171 | { |
---|
| 1172 | tempBufP = (char*)vmalloc(len); |
---|
| 1173 | usedKmalloc = false; |
---|
| 1174 | } |
---|
| 1175 | if (tempBufP == NULL) |
---|
| 1176 | { |
---|
| 1177 | EXIT(0); |
---|
| 1178 | return -ENOMEM; |
---|
| 1179 | } |
---|
| 1180 | rc = cxiKXfer(iobP, CXI_XFER_FROM_IOBUFFER, attachP, bufOffset, len, |
---|
| 1181 | tempBufP); |
---|
| 1182 | if (rc != 0) |
---|
| 1183 | { |
---|
| 1184 | if (usedKmalloc) |
---|
| 1185 | kfree((void*)tempBufP); |
---|
| 1186 | else |
---|
| 1187 | vfree((void*)tempBufP); |
---|
| 1188 | EXIT(0); |
---|
| 1189 | return rc; |
---|
| 1190 | } |
---|
| 1191 | #ifdef MALLOC_DEBUG |
---|
| 1192 | MallocDebugNew(tempBufP, len, 4); |
---|
| 1193 | #endif |
---|
| 1194 | |
---|
| 1195 | /* Return address within the contiguous temporary buffer, and set map |
---|
| 1196 | state so cxiUnmapContiguousRO knows to do vfree */ |
---|
| 1197 | *contigBasePP = tempBufP; |
---|
| 1198 | contigP->mallocedBaseP = tempBufP; |
---|
| 1199 | contigP->usedKmalloc = usedKmalloc; |
---|
| 1200 | contigP->pageP = NULL; |
---|
| 1201 | TRACE1(TRACE_KSVFS, 5, TRCID_MAP_CONTIG_VMALLOC, |
---|
| 1202 | "cxiMapContiguousRO: copied to 0x%lX\n", tempBufP); |
---|
| 1203 | EXIT(0); |
---|
| 1204 | return 0; |
---|
| 1205 | } |
---|
| 1206 | } |
---|
| 1207 | |
---|
| 1208 | |
---|
| 1209 | /* Release a mapping or copy obtained with cxiMapContiguousRO */ |
---|
| 1210 | void |
---|
| 1211 | cxiUnmapContiguousRO(struct cxiIOBuffer_t* iobP, |
---|
| 1212 | struct cxiContiguousBuffer_t* contigP) |
---|
| 1213 | { |
---|
| 1214 | ENTER(0); |
---|
| 1215 | if (contigP->mallocedBaseP != NULL) |
---|
| 1216 | { |
---|
| 1217 | TRACE2(TRACE_KSVFS, 4, TRCID_UNMAP_CONTIG_VFREE, |
---|
| 1218 | "cxiUnmapContiguousRO: dataPtr 0x%lX vfree 0x%lX\n", |
---|
| 1219 | OffsetToDataPtr(iobP,0,0), contigP->mallocedBaseP); |
---|
| 1220 | DBGASSERT(contigP->pageP == NULL); |
---|
| 1221 | |
---|
| 1222 | if (contigP->usedKmalloc) |
---|
| 1223 | kfree((void*)contigP->mallocedBaseP); |
---|
| 1224 | else |
---|
| 1225 | vfree((void*)contigP->mallocedBaseP); |
---|
| 1226 | |
---|
| 1227 | #ifdef MALLOC_DEBUG |
---|
| 1228 | MallocDebugDelete(contigP->mallocedBaseP); |
---|
| 1229 | #endif |
---|
| 1230 | contigP->mallocedBaseP = NULL; |
---|
| 1231 | } |
---|
| 1232 | else |
---|
| 1233 | { |
---|
| 1234 | TRACE2(TRACE_KSVFS, 4, TRCID_UNMAP_CONTIG_KUNMAP, |
---|
| 1235 | "cxiUnmapContiguousRO: dataPtr 0x%lX kunmap 0x%lX\n", |
---|
| 1236 | OffsetToDataPtr(iobP,0,0), contigP->pageP); |
---|
| 1237 | DBGASSERT(contigP->pageP != NULL); |
---|
| 1238 | kunmap((struct page *)contigP->pageP); |
---|
| 1239 | contigP->pageP = NULL; |
---|
| 1240 | } |
---|
| 1241 | EXIT(0); |
---|
| 1242 | } |
---|
| 1243 | |
---|
| 1244 | |
---|
| 1245 | #if LINUX_KERNEL_VERSION < 2050000 |
---|
| 1246 | /* iodone routine for GPFS buffer_heads. Unlock buffer and wake up |
---|
| 1247 | * waiters, if any. |
---|
| 1248 | */ |
---|
| 1249 | static void |
---|
| 1250 | BHioDone(struct buffer_head* bhP, int uptodate) |
---|
| 1251 | { |
---|
| 1252 | struct cxiBufHeadChunk_t* bhcP; |
---|
| 1253 | |
---|
| 1254 | mark_buffer_uptodate(bhP, uptodate); |
---|
| 1255 | bhcP = (struct cxiBufHeadChunk_t*)bhP->b_private; |
---|
| 1256 | unlock_buffer(bhP); |
---|
| 1257 | atomic_dec(&bhcP->nBHActive); |
---|
| 1258 | } |
---|
| 1259 | |
---|
| 1260 | /* Start a read or write of the given sectors from dev. Data should be |
---|
| 1261 | * placed into the I/O buffer beginning at byte offset bufOffset. Returns |
---|
| 1262 | * 0 on success, negative values on error. All of the data to be |
---|
| 1263 | * transferred will be in the first cxiKernelIOBufferDesc_t. |
---|
| 1264 | */ |
---|
| 1265 | int |
---|
| 1266 | cxiStartIO(struct cxiKernelIOBufferDesc_t* kibdHeadP, |
---|
| 1267 | Boolean isWrite, cxiDev_t dev, UInt64 startSector, int nSectors, |
---|
| 1268 | int bufOffset, struct cxiBufHeadChunk_t** bhcHeadPP) |
---|
| 1269 | { |
---|
| 1270 | int bufEndOffset; |
---|
| 1271 | int nTotalPages; |
---|
| 1272 | struct cxiBufHeadChunk_t* bhcP; |
---|
| 1273 | struct cxiBufHeadChunk_t* bhcHeadP; |
---|
| 1274 | struct cxiBufHeadChunk_t* bhcTailP; |
---|
| 1275 | int nBHsAllocated; |
---|
| 1276 | int pageIndex; |
---|
| 1277 | int pageOffset; |
---|
| 1278 | int sectorsThisBH; |
---|
| 1279 | struct buffer_head* bhP; |
---|
| 1280 | struct page* pageP; |
---|
| 1281 | struct cxiBufHeadChunk_t* p; |
---|
| 1282 | struct cxiKernelIOBufferDesc_t* kibdP = kibdHeadP; |
---|
| 1283 | kdev_t kdev = cxiDevToKernelDev(dev); /* Convert to kernel version of dev_t */ |
---|
| 1284 | |
---|
| 1285 | /* Validate parameters */ |
---|
| 1286 | ENTER(0); |
---|
| 1287 | TRACE6(TRACE_IO, 4, TRCID_KDOIO_LINUX, |
---|
| 1288 | "cxiStartIO: kBuf 0x%lX isWrite %d dev 0x%X sector %llu nSectors %d " |
---|
| 1289 | "offset %d\n", kibdP, isWrite, dev, startSector, nSectors, bufOffset); |
---|
| 1290 | |
---|
| 1291 | DBGASSERT(kibdP != NULL); |
---|
| 1292 | DBGASSERT(bufOffset >= 0); |
---|
| 1293 | DBGASSERT(nSectors > 0); |
---|
| 1294 | |
---|
| 1295 | /* Compute the total number of pages spanned by the portion of the |
---|
| 1296 | buffer that will participate in the I/O. This equals the number |
---|
| 1297 | of buffer_heads that will be used. */ |
---|
| 1298 | bufEndOffset = bufOffset + nSectors*512 - 1; |
---|
| 1299 | nTotalPages = (bufEndOffset/PAGE_SIZE) - (bufOffset/PAGE_SIZE) + 1; |
---|
| 1300 | |
---|
| 1301 | /* Allocate the entire list of buffer_head chunks needed for this I/O */ |
---|
| 1302 | bhcP = (struct cxiBufHeadChunk_t*) kmem_cache_alloc(BhcCacheP, GFP_KERNEL); |
---|
| 1303 | bhcHeadP = bhcP; |
---|
| 1304 | if (bhcP == NULL) |
---|
| 1305 | goto enomem; |
---|
| 1306 | |
---|
| 1307 | bhcP->bhcNextP = bhcP; |
---|
| 1308 | bhcP->bhcPrevP = bhcP; |
---|
| 1309 | bhcP->nBHUsed = 0; |
---|
| 1310 | atomic_set(&bhcP->nBHActive, 0); |
---|
| 1311 | nBHsAllocated = BUFFER_HEADS_PER_CHUNK; |
---|
| 1312 | |
---|
| 1313 | while (nBHsAllocated < nTotalPages) |
---|
| 1314 | { |
---|
| 1315 | bhcP = (struct cxiBufHeadChunk_t*) kmem_cache_alloc(BhcCacheP, GFP_KERNEL); |
---|
| 1316 | if (bhcP == NULL) goto enomem; |
---|
| 1317 | |
---|
| 1318 | bhcTailP = bhcHeadP->bhcPrevP; |
---|
| 1319 | bhcP->bhcNextP = bhcHeadP; |
---|
| 1320 | bhcP->bhcPrevP = bhcTailP; |
---|
| 1321 | bhcTailP->bhcNextP = bhcP; |
---|
| 1322 | bhcHeadP->bhcPrevP = bhcP; |
---|
| 1323 | bhcP->nBHUsed = 0; |
---|
| 1324 | atomic_set(&bhcP->nBHActive, 0); |
---|
| 1325 | nBHsAllocated += BUFFER_HEADS_PER_CHUNK; |
---|
| 1326 | } |
---|
| 1327 | |
---|
| 1328 | /* Build and submit a buffer_head for each page of the current I/O */ |
---|
| 1329 | bhcP = bhcHeadP; |
---|
| 1330 | pageIndex = bufOffset / PAGE_SIZE; |
---|
| 1331 | pageOffset = bufOffset % PAGE_SIZE; |
---|
| 1332 | |
---|
| 1333 | DBGASSERT(pageOffset%512 == 0); |
---|
| 1334 | sectorsThisBH = MIN((PAGE_SIZE-pageOffset) / 512, nSectors); |
---|
| 1335 | while (nSectors > 0) |
---|
| 1336 | { |
---|
| 1337 | /* Get a buffer_head for the the next page */ |
---|
| 1338 | if (bhcP->nBHUsed == BUFFER_HEADS_PER_CHUNK) |
---|
| 1339 | { |
---|
| 1340 | bhcP = bhcP->bhcNextP; |
---|
| 1341 | DBGASSERT(bhcP->nBHUsed == 0); |
---|
| 1342 | } |
---|
| 1343 | bhP = &bhcP->bh[bhcP->nBHUsed]; |
---|
| 1344 | bhcP->nBHUsed += 1; |
---|
| 1345 | |
---|
| 1346 | /* Initialize the new buffer_head */ |
---|
| 1347 | memset(bhP, 0, sizeof(*bhP)); |
---|
| 1348 | |
---|
| 1349 | KIBD_GET_PAGE(kibdP, pageIndex, pageP); |
---|
| 1350 | DBGASSERT(pageP != NULL); |
---|
| 1351 | |
---|
| 1352 | /* Build and submit the buffer_head for the current page */ |
---|
| 1353 | bhP->b_size = sectorsThisBH * 512; |
---|
| 1354 | bhP->b_page = pageP; |
---|
| 1355 | #ifndef __64BIT__ |
---|
| 1356 | if (PageHighMem(pageP)) |
---|
| 1357 | bhP->b_data = (char *)(0 + pageOffset); |
---|
| 1358 | else |
---|
| 1359 | #endif |
---|
| 1360 | bhP->b_data = page_address(pageP) + pageOffset; |
---|
| 1361 | |
---|
| 1362 | bhP->b_this_page = bhP; |
---|
| 1363 | bhP->b_end_io = BHioDone; |
---|
| 1364 | bhP->b_private = (void*)bhcP; |
---|
| 1365 | bhP->b_blocknr = startSector; |
---|
| 1366 | init_waitqueue_head(&bhP->b_wait); |
---|
| 1367 | bhP->b_dev = kdev; |
---|
| 1368 | bhP->b_rdev = kdev; |
---|
| 1369 | bhP->b_rsector = startSector; |
---|
| 1370 | bhP->b_list = BUF_CLEAN; |
---|
| 1371 | bhP->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req) | |
---|
| 1372 | (1 << BH_Uptodate); |
---|
| 1373 | TRACE6(TRACE_IO, 6, TRCID_KDOIO_LINUX_BH, |
---|
| 1374 | "cxiStartIO: bhcP 0x%lX bhP 0x%lX sector %llu sectorsThisBH %d state 0x%X pageP 0x%lX\n", |
---|
| 1375 | bhcP, bhP, startSector, sectorsThisBH, bhP->b_state, pageP); |
---|
| 1376 | |
---|
| 1377 | atomic_inc(&bhcP->nBHActive); |
---|
| 1378 | |
---|
| 1379 | generic_make_request(isWrite, bhP); |
---|
| 1380 | |
---|
| 1381 | if (isWrite) |
---|
| 1382 | MOD_PGPGOUT(sectorsThisBH); |
---|
| 1383 | else |
---|
| 1384 | MOD_PGPGIN(sectorsThisBH); |
---|
| 1385 | |
---|
| 1386 | /* Advance to next page */ |
---|
| 1387 | startSector += sectorsThisBH; |
---|
| 1388 | nSectors -= sectorsThisBH; |
---|
| 1389 | sectorsThisBH = MIN(nSectors, PAGE_SIZE/512); |
---|
| 1390 | pageIndex += 1; |
---|
| 1391 | pageOffset = 0; |
---|
| 1392 | } |
---|
| 1393 | |
---|
| 1394 | /* Unplug the disk to be sure I/Os actually get started */ |
---|
| 1395 | run_task_queue(&tq_disk); |
---|
| 1396 | |
---|
| 1397 | /* Set success return code and return list of active buffer_heads */ |
---|
| 1398 | *bhcHeadPP = bhcHeadP; |
---|
| 1399 | EXIT(0); |
---|
| 1400 | return 0; |
---|
| 1401 | |
---|
| 1402 | enomem: |
---|
| 1403 | |
---|
| 1404 | /* Free buffer_head chunks allocated so far and return failure */ |
---|
| 1405 | if (bhcHeadP != NULL) |
---|
| 1406 | { |
---|
| 1407 | bhcP = bhcHeadP; |
---|
| 1408 | bhcTailP = bhcHeadP->bhcPrevP; |
---|
| 1409 | do |
---|
| 1410 | { |
---|
| 1411 | p = bhcP; |
---|
| 1412 | bhcP = bhcP->bhcNextP; |
---|
| 1413 | kmem_cache_free(BhcCacheP, (void*)p); |
---|
| 1414 | } |
---|
| 1415 | while (p != bhcTailP); |
---|
| 1416 | } |
---|
| 1417 | EXIT(0); |
---|
| 1418 | return -ENOMEM; |
---|
| 1419 | } |
---|
| 1420 | |
---|
| 1421 | /* Routine to set up the disk block size and get disk parameters */ |
---|
| 1422 | int |
---|
| 1423 | GetDiskInfoX(cxiDev_t devId, struct cxiDiskInfo_t* diskInfoP) |
---|
| 1424 | { |
---|
| 1425 | kdev_t kdev; |
---|
| 1426 | int n1KBlocks; |
---|
| 1427 | |
---|
| 1428 | /* Convert to kernel version of dev_t */ |
---|
| 1429 | ENTER(0); |
---|
| 1430 | kdev = cxiDevToKernelDev(devId); |
---|
| 1431 | |
---|
| 1432 | /* Get hardware sector size. If unknown, assume 512. */ |
---|
| 1433 | #if LINUX_KERNEL_VERSION >= 2040312 |
---|
| 1434 | diskInfoP->sectorSize = get_hardsect_size(kdev); |
---|
| 1435 | #else |
---|
| 1436 | diskInfoP->sectorSize = get_hardblocksize(kdev); |
---|
| 1437 | #endif |
---|
| 1438 | if (diskInfoP->sectorSize == 0) |
---|
| 1439 | diskInfoP->sectorSize = 512; |
---|
| 1440 | |
---|
| 1441 | /* Set blocksize of this device to hardware sector size */ |
---|
| 1442 | set_blocksize(kdev, diskInfoP->sectorSize); |
---|
| 1443 | |
---|
| 1444 | /* If defined, return number of sectors on device */ |
---|
| 1445 | n1KBlocks = 0; |
---|
| 1446 | if (blk_size[MAJOR(kdev)]) |
---|
| 1447 | n1KBlocks = blk_size[MAJOR(kdev)][MINOR(kdev)]; |
---|
| 1448 | diskInfoP->totalSectors = (Int64)n1KBlocks * 1024 / diskInfoP->sectorSize; |
---|
| 1449 | TRACE3(TRACE_IO, 2, TRCID_DISKINFO, |
---|
| 1450 | "GetDiskInfo: devId %08lX sector size %d totalSectors %lld\n", |
---|
| 1451 | devId, diskInfoP->sectorSize, diskInfoP->totalSectors); |
---|
| 1452 | #if 0 |
---|
| 1453 | printk("VMALLOC_START=0x%lX VMALLOC_END=0x%lX\n", |
---|
| 1454 | VMALLOC_START, VMALLOC_END); |
---|
| 1455 | #endif |
---|
| 1456 | |
---|
| 1457 | EXIT(0); |
---|
| 1458 | return 0; |
---|
| 1459 | } |
---|
| 1460 | |
---|
| 1461 | #else /* >= 2050000 */ |
---|
| 1462 | |
---|
| 1463 | /* iodone routine for struct bio */ |
---|
| 1464 | static int |
---|
| 1465 | bioDone(struct bio *bioP, unsigned int done, int err) |
---|
| 1466 | { |
---|
| 1467 | struct buffer_head *bhP; |
---|
| 1468 | struct cxiBufHeadChunk_t *bhcP; |
---|
| 1469 | |
---|
| 1470 | if (bioP->bi_size) |
---|
| 1471 | return 1; |
---|
| 1472 | |
---|
| 1473 | /* wakes up waiters who will deallocate bio buffer head chunk */ |
---|
| 1474 | bhP = (struct buffer_head *)bioP->bi_private; |
---|
| 1475 | bhcP = (struct cxiBufHeadChunk_t *)bhP->b_private; |
---|
| 1476 | |
---|
| 1477 | if (test_bit(BIO_UPTODATE, &bioP->bi_flags)) |
---|
| 1478 | set_buffer_uptodate(bhP); |
---|
| 1479 | else |
---|
| 1480 | clear_buffer_uptodate(bhP); |
---|
| 1481 | |
---|
| 1482 | unlock_buffer(bhP); |
---|
| 1483 | atomic_dec(&bhcP->nBHActive); |
---|
| 1484 | |
---|
| 1485 | return 0; |
---|
| 1486 | } |
---|
| 1487 | |
---|
| 1488 | /* Start a read or write of the given sectors from dev. Data should be |
---|
| 1489 | * placed into the I/O buffer beginning at byte offset bufOffset. Returns |
---|
| 1490 | * 0 on success, negative values on error. All of the data to be |
---|
| 1491 | * transferred will be in the first cxiKernelIOBufferDesc_t. |
---|
| 1492 | */ |
---|
| 1493 | int |
---|
| 1494 | cxiStartIO(struct cxiKernelIOBufferDesc_t *kibdHeadP, |
---|
| 1495 | Boolean isWrite, cxiDev_t dev, UInt64 startSector, int nSectors, |
---|
| 1496 | int bufOffset, struct cxiBufHeadChunk_t **bhcHeadPP) |
---|
| 1497 | { |
---|
| 1498 | int i; |
---|
| 1499 | int vecsAllocated; |
---|
| 1500 | int bufEndOffset; |
---|
| 1501 | int nTotalPages; |
---|
| 1502 | int iovIndex; |
---|
| 1503 | int pageIndex; |
---|
| 1504 | int pageOffset; |
---|
| 1505 | int sectorsThisPage; |
---|
| 1506 | int nBHsAllocated; |
---|
| 1507 | struct bio *bioP; |
---|
| 1508 | struct buffer_head *bhP; |
---|
| 1509 | struct page *pageP; |
---|
| 1510 | struct cxiBufHeadChunk_t *p; |
---|
| 1511 | struct cxiBufHeadChunk_t *bhcP; |
---|
| 1512 | struct cxiBufHeadChunk_t *bhcHeadP; |
---|
| 1513 | struct cxiBufHeadChunk_t *bhcTailP; |
---|
| 1514 | struct cxiKernelIOBufferDesc_t *kibdP = kibdHeadP; |
---|
| 1515 | struct block_device *bdevP = bdget(new_decode_dev(dev)); |
---|
| 1516 | int maxIOVec = bio_get_nr_vecs(bdevP); /* query max device vectors */ |
---|
| 1517 | request_queue_t* reqQP; |
---|
| 1518 | |
---|
| 1519 | ENTER(0); |
---|
| 1520 | |
---|
| 1521 | LOGASSERT(bdevP != NULL && bdevP->bd_disk != NULL); |
---|
| 1522 | |
---|
| 1523 | /* Validate parameters */ |
---|
| 1524 | TRACE6(TRACE_IO, 4, TRCID_KDOIO_LINUX_BIO, |
---|
| 1525 | "cxiStartIO: kBuf 0x%lX isWrite %d dev 0x%X sector %llu nSectors %d " |
---|
| 1526 | "offset %d\n", kibdP, isWrite, dev, startSector, nSectors, bufOffset); |
---|
| 1527 | |
---|
| 1528 | DBGASSERT(kibdP != NULL); |
---|
| 1529 | DBGASSERT(bufOffset >= 0); |
---|
| 1530 | DBGASSERT(nSectors > 0); |
---|
| 1531 | |
---|
| 1532 | /* Compute the total number of pages spanned by the portion of the |
---|
| 1533 | * buffer that will participate in the I/O. This equals the number |
---|
| 1534 | * of io vectors needed. |
---|
| 1535 | */ |
---|
| 1536 | bufEndOffset = bufOffset + nSectors*512 - 1; |
---|
| 1537 | nTotalPages = (bufEndOffset/PAGE_SIZE) - (bufOffset/PAGE_SIZE) + 1; |
---|
| 1538 | |
---|
| 1539 | /* Compute the pageIndex in the kibd struct as well as the offset |
---|
| 1540 | * in the first page to read/write. |
---|
| 1541 | */ |
---|
| 1542 | pageIndex = bufOffset / PAGE_SIZE; |
---|
| 1543 | pageOffset = bufOffset % PAGE_SIZE; |
---|
| 1544 | DBGASSERT(pageOffset%512 == 0); |
---|
| 1545 | |
---|
| 1546 | /* Allocate a single buffer_head chunk and link it to itself. |
---|
| 1547 | * Subsequent buffer_head chunks may be needed and are allocated |
---|
| 1548 | * below. |
---|
| 1549 | */ |
---|
| 1550 | bhcP = (struct cxiBufHeadChunk_t *)kmem_cache_alloc(BhcCacheP, GFP_KERNEL); |
---|
| 1551 | bhcHeadP = bhcP; |
---|
| 1552 | if (bhcP == NULL) |
---|
| 1553 | goto enomem; |
---|
| 1554 | |
---|
| 1555 | bhcP->bhcNextP = bhcP; /* circular link to itself */ |
---|
| 1556 | bhcP->bhcPrevP = bhcP; |
---|
| 1557 | bhcP->nBHUsed = 0; |
---|
| 1558 | atomic_set(&bhcP->nBHActive, 0); |
---|
| 1559 | nBHsAllocated = BUFFER_HEADS_PER_CHUNK; |
---|
| 1560 | |
---|
| 1561 | while (nSectors > 0) |
---|
| 1562 | { |
---|
| 1563 | vecsAllocated = MIN(nTotalPages, maxIOVec); |
---|
| 1564 | |
---|
| 1565 | bioP = bio_alloc(GFP_NOIO, vecsAllocated); |
---|
| 1566 | if (bioP == NULL) |
---|
| 1567 | goto enomem; |
---|
| 1568 | |
---|
| 1569 | /* Allocate a buffer head and point to it from the bio struct. |
---|
| 1570 | * We submit the bio struct directly but wait on the dummy |
---|
| 1571 | * buffer_head struc, since primitives exist for waiting/wakeup |
---|
| 1572 | * there. We want to submit bios instead of buffer heads since |
---|
| 1573 | * the bio can encapsulate a larger i/o whereas buffer_heads can |
---|
| 1574 | * only do a page. |
---|
| 1575 | */ |
---|
| 1576 | if (bhcP->nBHUsed == BUFFER_HEADS_PER_CHUNK) |
---|
| 1577 | { |
---|
| 1578 | bhcP = (struct cxiBufHeadChunk_t *)kmem_cache_alloc(BhcCacheP, |
---|
| 1579 | GFP_KERNEL); |
---|
| 1580 | if (bhcP == NULL) |
---|
| 1581 | goto enomem; |
---|
| 1582 | |
---|
| 1583 | bhcTailP = bhcHeadP->bhcPrevP; |
---|
| 1584 | bhcP->bhcNextP = bhcHeadP; |
---|
| 1585 | bhcP->bhcPrevP = bhcTailP; |
---|
| 1586 | bhcTailP->bhcNextP = bhcP; |
---|
| 1587 | bhcHeadP->bhcPrevP = bhcP; |
---|
| 1588 | bhcP->nBHUsed = 0; |
---|
| 1589 | |
---|
| 1590 | atomic_set(&bhcP->nBHActive, 0); |
---|
| 1591 | nBHsAllocated += BUFFER_HEADS_PER_CHUNK; |
---|
| 1592 | } |
---|
| 1593 | |
---|
| 1594 | /* Use next available buffer head and increment used count */ |
---|
| 1595 | bhcP->biop[bhcP->nBHUsed] = bioP; |
---|
| 1596 | bhP = &bhcP->bh[bhcP->nBHUsed]; |
---|
| 1597 | bhcP->nBHUsed++; |
---|
| 1598 | |
---|
| 1599 | bhP->b_state = 0; |
---|
| 1600 | atomic_set(&bhP->b_count, 1); /* set to one for unlock_buffer */ |
---|
| 1601 | bhP->b_this_page = NULL; |
---|
| 1602 | bhP->b_page = NULL; |
---|
| 1603 | bhP->b_blocknr = 0; |
---|
| 1604 | bhP->b_size = 0; |
---|
| 1605 | bhP->b_data = NULL; |
---|
| 1606 | bhP->b_bdev = NULL; |
---|
| 1607 | |
---|
| 1608 | /* buffer head points to buffer head chunk */ |
---|
| 1609 | bhP->b_private = (void *)bhcP; |
---|
| 1610 | |
---|
| 1611 | iovIndex = 0; |
---|
| 1612 | bioP->bi_vcnt = 0; /* accumulated below as number of bi_io_vecs */ |
---|
| 1613 | bioP->bi_idx = 0; /* used by lower layer for recording current index */ |
---|
| 1614 | bioP->bi_size = 0; |
---|
| 1615 | bioP->bi_bdev = bdevP; |
---|
| 1616 | bioP->bi_end_io = bioDone; |
---|
| 1617 | |
---|
| 1618 | /* bio points to buffer head that we'll wait on */ |
---|
| 1619 | bioP->bi_private = (void *)bhP; |
---|
| 1620 | bioP->bi_sector = startSector; |
---|
| 1621 | |
---|
| 1622 | sectorsThisPage = MIN((PAGE_SIZE-pageOffset) / 512, nSectors); |
---|
| 1623 | |
---|
| 1624 | while (iovIndex < vecsAllocated) |
---|
| 1625 | { |
---|
| 1626 | KIBD_GET_PAGE(kibdP, pageIndex, pageP); |
---|
| 1627 | DBGASSERT(pageP != NULL); |
---|
| 1628 | |
---|
| 1629 | bioP->bi_io_vec[iovIndex].bv_page = pageP; |
---|
| 1630 | bioP->bi_io_vec[iovIndex].bv_len = sectorsThisPage * 512; |
---|
| 1631 | bioP->bi_io_vec[iovIndex].bv_offset = pageOffset; |
---|
| 1632 | TRACE6(TRACE_IO, 6, TRCID_KDOIO_LINUX_BIO_PAGE, |
---|
| 1633 | "cxiStartIO: bhcP 0x%lX bioP 0x%lX index %d sector %llu sectorsThisPage %d pageP 0x%lX\n", |
---|
| 1634 | bhcP, bioP, iovIndex, startSector, sectorsThisPage, pageP); |
---|
| 1635 | iovIndex++; |
---|
| 1636 | |
---|
| 1637 | bioP->bi_vcnt = iovIndex; |
---|
| 1638 | bioP->bi_size += (sectorsThisPage * 512); |
---|
| 1639 | |
---|
| 1640 | /* Advance to next page */ |
---|
| 1641 | startSector += sectorsThisPage; |
---|
| 1642 | nSectors -= sectorsThisPage; |
---|
| 1643 | sectorsThisPage = MIN(nSectors, PAGE_SIZE/512); |
---|
| 1644 | pageIndex += 1; |
---|
| 1645 | pageOffset = 0; |
---|
| 1646 | } |
---|
| 1647 | |
---|
| 1648 | bufOffset += bioP->bi_size; |
---|
| 1649 | nTotalPages -= bioP->bi_vcnt; |
---|
| 1650 | |
---|
| 1651 | /* Fill in a couple of fields in this dummy buffer head |
---|
| 1652 | * that will be examined in unlock_buffer(). |
---|
| 1653 | */ |
---|
| 1654 | set_buffer_locked(bhP); |
---|
| 1655 | bhP->b_page = pageP; |
---|
| 1656 | |
---|
| 1657 | atomic_inc(&bhcP->nBHActive); |
---|
| 1658 | |
---|
| 1659 | submit_bio(isWrite, bioP); |
---|
| 1660 | } |
---|
| 1661 | |
---|
| 1662 | /* Unplug the device queue to avoid 3ms delay when no other I/O in |
---|
| 1663 | progress on the device */ |
---|
| 1664 | reqQP = bdev_get_queue(bdevP); |
---|
| 1665 | if (reqQP->unplug_fn != NULL) |
---|
| 1666 | reqQP->unplug_fn(reqQP); |
---|
| 1667 | |
---|
| 1668 | *bhcHeadPP = bhcHeadP; |
---|
| 1669 | EXIT(0); |
---|
| 1670 | return 0; |
---|
| 1671 | |
---|
| 1672 | enomem: |
---|
| 1673 | |
---|
| 1674 | /* Free buffer_head chunks allocated so far and return failure */ |
---|
| 1675 | if (bhcHeadP != NULL) |
---|
| 1676 | { |
---|
| 1677 | bhcP = bhcHeadP; |
---|
| 1678 | bhcTailP = bhcHeadP->bhcPrevP; |
---|
| 1679 | do |
---|
| 1680 | { |
---|
| 1681 | for (i = 0; i < bhcP->nBHUsed; i++) |
---|
| 1682 | bio_put(bhcP->biop[i]); |
---|
| 1683 | |
---|
| 1684 | p = bhcP; |
---|
| 1685 | bhcP = bhcP->bhcNextP; |
---|
| 1686 | kmem_cache_free(BhcCacheP, (void*)p); |
---|
| 1687 | } |
---|
| 1688 | while (p != bhcTailP); |
---|
| 1689 | } |
---|
| 1690 | EXIT(0); |
---|
| 1691 | return -ENOMEM; |
---|
| 1692 | } |
---|
| 1693 | |
---|
| 1694 | /* Routine to set up the disk block size and get disk parameters */ |
---|
| 1695 | int |
---|
| 1696 | GetDiskInfoX(cxiDev_t devId, struct cxiDiskInfo_t* diskInfoP) |
---|
| 1697 | { |
---|
| 1698 | struct block_device *bdevP = bdget(new_decode_dev(devId)); |
---|
| 1699 | |
---|
| 1700 | ENTER(0); |
---|
| 1701 | LOGASSERT(bdevP != NULL && bdevP->bd_disk != NULL); |
---|
| 1702 | |
---|
| 1703 | diskInfoP->sectorSize = bdev_hardsect_size(bdevP); |
---|
| 1704 | |
---|
| 1705 | if (diskInfoP->sectorSize == 0) |
---|
| 1706 | diskInfoP->sectorSize = 512; |
---|
| 1707 | |
---|
| 1708 | /* Set blocksize of this device to hardware sector size */ |
---|
| 1709 | set_blocksize(bdevP, diskInfoP->sectorSize); |
---|
| 1710 | |
---|
| 1711 | DBGASSERT(bdevP->bd_inode != NULL); |
---|
| 1712 | diskInfoP->totalSectors = bdevP->bd_inode->i_size / diskInfoP->sectorSize; |
---|
| 1713 | |
---|
| 1714 | TRACE3(TRACE_IO, 2, TRCID_GET_DISKINFOX, |
---|
| 1715 | "GetDiskInfoX: devId %08lX sector size %d totalSectors %lld\n", |
---|
| 1716 | devId, diskInfoP->sectorSize, diskInfoP->totalSectors); |
---|
| 1717 | EXIT(0); |
---|
| 1718 | return 0; |
---|
| 1719 | } |
---|
| 1720 | #endif |
---|
| 1721 | |
---|
| 1722 | /* Wait for a group of I/Os to complete. Free the buffer heads after all |
---|
| 1723 | * I/O is finished. Returns -EIO if any buffer_head had an error. |
---|
| 1724 | */ |
---|
| 1725 | static int |
---|
| 1726 | cxiWaitIO(struct cxiBufHeadChunk_t *bhcHeadP) |
---|
| 1727 | { |
---|
| 1728 | int i; |
---|
| 1729 | int rc; |
---|
| 1730 | struct buffer_head* bhP; |
---|
| 1731 | struct cxiBufHeadChunk_t* bhcP; |
---|
| 1732 | struct cxiBufHeadChunk_t* p; |
---|
| 1733 | #if LINUX_KERNEL_VERSION >= 2050000 |
---|
| 1734 | struct bio *bioP; |
---|
| 1735 | #endif |
---|
| 1736 | |
---|
| 1737 | /* Wait for I/O to be complete on all buffer_heads. Wait on buffer_heads |
---|
| 1738 | * in the reverse of the order in which I/O was started. By waiting on |
---|
| 1739 | * the last buffer_head first, it is likely that the calling thread will |
---|
| 1740 | * only have to sleep once. |
---|
| 1741 | */ |
---|
| 1742 | ENTER(0); |
---|
| 1743 | rc = 0; |
---|
| 1744 | DBGASSERT(bhcHeadP != NULL); |
---|
| 1745 | bhcP = bhcHeadP->bhcPrevP; |
---|
| 1746 | do |
---|
| 1747 | { |
---|
| 1748 | for (i = bhcP->nBHUsed-1; i >= 0; i--) |
---|
| 1749 | { |
---|
| 1750 | bhP = &bhcP->bh[i]; |
---|
| 1751 | #if LINUX_KERNEL_VERSION >= 2050000 |
---|
| 1752 | bioP = bhcP->biop[i]; |
---|
| 1753 | |
---|
| 1754 | TRACE5(TRACE_IO, 12, TRCID_KWAITIO_BIO, |
---|
| 1755 | "cxiWaitIO: bhP 0x%lX bioP 0x%lX sector %d size %d state 0x%lX\n", |
---|
| 1756 | bhP, bioP, bioP->bi_sector, bioP->bi_size, bioP->bi_flags); |
---|
| 1757 | #else |
---|
| 1758 | TRACE4(TRACE_IO, 12, TRCID_KWAITIO_BH, |
---|
| 1759 | "cxiWaitIO: bhP 0x%lX sector %d size %d state 0x%lX\n", |
---|
| 1760 | bhP, bhP->b_blocknr, bhP->b_size, bhP->b_state); |
---|
| 1761 | #endif |
---|
| 1762 | |
---|
| 1763 | wait_on_buffer(bhP); |
---|
| 1764 | if (!buffer_uptodate(bhP)) |
---|
| 1765 | { |
---|
| 1766 | #if LINUX_KERNEL_VERSION >= 2050000 |
---|
| 1767 | TRACE5(TRACE_IO, 1, TRCID_KWAITIO_BIO_ERR, |
---|
| 1768 | "cxiWaitIO: bhP 0x%lX bioP 0x%lX sector %d size %d " |
---|
| 1769 | "state 0x%lX\n", bhP, bioP, bioP->bi_sector, bioP->bi_size, |
---|
| 1770 | bioP->bi_flags); |
---|
| 1771 | #else |
---|
| 1772 | TRACE4(TRACE_IO, 1, TRCID_KWAITIO_BH_ERR, |
---|
| 1773 | "cxiWaitIO: error bhP 0x%lX sector %d size %d state 0x%lX\n", |
---|
| 1774 | bhP, bhP->b_blocknr, bhP->b_size, bhP->b_state); |
---|
| 1775 | #endif |
---|
| 1776 | rc = -EIO; |
---|
| 1777 | } |
---|
| 1778 | #if LINUX_KERNEL_VERSION >= 2050000 |
---|
| 1779 | bio_put(bioP); |
---|
| 1780 | bhcP->biop[i] = NULL; |
---|
| 1781 | #endif |
---|
| 1782 | } |
---|
| 1783 | |
---|
| 1784 | p = bhcP; |
---|
| 1785 | bhcP = bhcP->bhcPrevP; |
---|
| 1786 | |
---|
| 1787 | /* All of the I/Os in all of the buffer_heads inside of the |
---|
| 1788 | * cxiBufHeadChunk_t pointed to by p are complete (the BH_Lock bits |
---|
| 1789 | * have all been turned off). However, it is possible that some I/O |
---|
| 1790 | * completion handlers may not yet have returned from BHioDone and |
---|
| 1791 | * therefore may not have finished accessing fields within the chunk |
---|
| 1792 | * of buffer_heads. The nBHActive keeps track of how many |
---|
| 1793 | * completion routines have not yet returned. If this is non-zero, |
---|
| 1794 | * the cxiBufHeadChunk_t cannot be freed yet. Delay briefly to |
---|
| 1795 | * allow the interrupt handler on another processor to complete, |
---|
| 1796 | * then free the cxiBufHeadChunk_t. Repeat the delay until the |
---|
| 1797 | * cxiBufHeadChunk_t is no longer in use by any interrupt handlers. |
---|
| 1798 | */ |
---|
| 1799 | while (atomic_read(&p->nBHActive) > 0) |
---|
| 1800 | { |
---|
| 1801 | TRACE2(TRACE_IO, 1, TRCID_KWAITIO_BH_BUSY, |
---|
| 1802 | "cxiWaitIO: p 0x%lX waiting for %d I/O completion handlers\n", |
---|
| 1803 | p, atomic_read(&p->nBHActive)); |
---|
| 1804 | cxiSleep(10); |
---|
| 1805 | atomic_inc(&cxiWaitIONDelays); |
---|
| 1806 | } |
---|
| 1807 | |
---|
| 1808 | kmem_cache_free(BhcCacheP, (void*)p); |
---|
| 1809 | |
---|
| 1810 | } |
---|
| 1811 | while (p != bhcHeadP); |
---|
| 1812 | |
---|
| 1813 | EXIT(0); |
---|
| 1814 | return rc; |
---|
| 1815 | } |
---|
| 1816 | |
---|
| 1817 | /* Read or write the given sectors from dev. Data should be placed into the |
---|
| 1818 | * I/O buffer beginning at byte offset bufOffset. Returns EOK on success, |
---|
| 1819 | * negative values on error. All of the data to be transferred will be in |
---|
| 1820 | * the first cxiKernelIOBufferDesc_t. |
---|
| 1821 | */ |
---|
| 1822 | int |
---|
| 1823 | cxiKDoIO(struct cxiKernelIOBufferDesc_t* kibdP, |
---|
| 1824 | Boolean isWrite, cxiDev_t dev, UInt64 startSector, |
---|
| 1825 | int nSectors, int sectorSize, int bufOffset) |
---|
| 1826 | { |
---|
| 1827 | int rc; |
---|
| 1828 | struct cxiBufHeadChunk_t* bhcHeadP; |
---|
| 1829 | |
---|
| 1830 | ENTER(0); |
---|
| 1831 | DBGASSERT(sectorSize == 512); |
---|
| 1832 | |
---|
| 1833 | #ifdef KCSTRACE |
---|
| 1834 | current->kcst_info.data[0] = dev; |
---|
| 1835 | current->kcst_info.data[1] = startSector; |
---|
| 1836 | current->kcst_info.data[2] = nSectors; |
---|
| 1837 | #endif |
---|
| 1838 | |
---|
| 1839 | rc = cxiStartIO(kibdP, isWrite, dev, startSector, nSectors, |
---|
| 1840 | bufOffset, &bhcHeadP); |
---|
| 1841 | if (rc == 0) |
---|
| 1842 | rc = cxiWaitIO(bhcHeadP); |
---|
| 1843 | |
---|
| 1844 | #ifdef KCSTRACE |
---|
| 1845 | current->kcst_info.data[0] = 0; |
---|
| 1846 | current->kcst_info.data[1] = 0; |
---|
| 1847 | current->kcst_info.data[2] = 0; |
---|
| 1848 | #endif |
---|
| 1849 | |
---|
| 1850 | EXIT(0); |
---|
| 1851 | return rc; |
---|
| 1852 | } |
---|