/*************************************************************************** * * Copyright (C) 2001 International Business Machines * All rights reserved. * * This file is part of the GPFS mmfslinux kernel module. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * *************************************************************************** */ /* @(#)13 1.45.1.2 src/avs/fs/mmfs/ts/kernext/ibm-linux/cxiIOBuffer-plat.h, mmfs, avs_rgpfs24, rgpfs24s007a 10/25/06 17:16:36 */ /* * Abstraction of an I/O buffer, Linux implementation * * Contents: * struct cxiKernelIOBufferDesc_t * struct cxiIOBufferAttachment_t * InitBufferAttachment * struct cxiContiguousBuffer_t * InitContiguousBuffer * EXTERNC int kxPinKernelIOBuffer * Methods for manipulating cxiIOBuffer_t's * #define __CXI_BUFFERS_ARE_CONTIGUOUS * GetDiskInfoX */ #ifndef _h_cxiIOBuffer_plat #define _h_cxiIOBuffer_plat #ifndef _h_cxiIOBuffer #error Platform header (XXX-plat.h) should not be included directly #endif /* Address of the first byte past the end of memory addressible by processes (PAGE_OFFSET), and routine to get this value from the kernel. Stacks are below this address. */ EXTERNC UIntPtr KernelBoundary; EXTERNC int kxGetKernelBoundary(UIntPtr* kBoundP); /* forward declarations */ struct page; /* User address space range used for page pool. */ #ifdef GPFS_ARCH_I386 #define LINUX_PAGE_POOL_BASE 0x44000000 #endif #ifdef GPFS_ARCH_POWER #define LINUX_PAGE_POOL_BASE 0x54000000 #endif #ifdef GPFS_ARCH_IA64 /* Use shared memory region after TM pool */ /* TM pool address + 4GB */ #define LINUX_PAGE_POOL_BASE 0x6000010100000000 #endif #ifdef GPFS_ARCH_PPC64 /* TM pool address + 2GB */ #define LINUX_PAGE_POOL_BASE 0x0000002080000000UL #endif #ifdef GPFS_ARCH_X86_64 /* TM pool address + 4GB */ #define LINUX_PAGE_POOL_BASE 0x0000005100000000UL #endif #define POOL_MMAP_CHUNK_SIZE 0x04000000 /* Address where token manager malloc pool begins */ #ifdef GPFS_ARCH_I386 # define TM_POOL_START 0x30000000 #endif #ifdef GPFS_ARCH_POWER # define TM_POOL_START 0x40000000 #endif #ifdef GPFS_ARCH_IA64 # define TM_POOL_START 0x6000010000000000 #endif #ifdef GPFS_ARCH_PPC64 /* this is the top portion of the brk area. We count on the daemon never growing big enough to get here. There's only 2TB of address space available in userspace on ppc64, so a completely safe spot is hard to find */ # define TM_POOL_START 0x0000002000000000UL #endif #ifdef GPFS_ARCH_X86_64 # define TM_POOL_START 0x0000005000000000UL #endif /* Buffers in user address space must be aligned to a boundary of this size in order to perform an I/O request. */ #define IOBUF_ALIGN_SIZE 512 /* Kernel data structure associated with an I/O buffer. I/O buffers that are pinned (or attached) point to one of these structures from their kernelIOBufferDescP field. It describes the physical pages occupied by the I/O buffer using Linux kiobufs. These are linked together in a global list anchored in the kernel so that pinned storage can be released when the GPFS daemon terminates abnormally. Each I/O buffer has one cxiKernelIOBufferDesc_t on this global list. However, since one cxiKernelIOBufferDesc_t can map at most PAGES_PER_KIBD pages, large I/O buffers require multiple cxiKernelIOBufferDesc_t's. */ struct cxiKernelIOBufferDesc_t { /* Daemon address for beginning of I/O buffer. This address must be aligned on a page boundary. */ char* kibdVaddr; /* Number of pages described by this cxiKernelIOBufferDesc_t. */ int kibdPages; /* Number of pages described by this chain of cxiKernelIOBufferDesc_t * Only valid for the first cxiKernelIOBufferDesc_t in the chain. */ int kibdTotalPages; /* List pointer. Used for a chain of cxiKernelIOBufferDesc_t's. */ struct cxiKernelIOBufferDesc_t* kibdNextP; /* An I/O buffer is described by a chain of cxiKernelIOBufferDesc_t, * of which the head descriptor is placed on a global list. Thus these * fields are only valid for the first cxiKernelIOBufferDesc_t in the * chain of descriptors. */ struct cxiKernelIOBufferDesc_t* gblNextP; struct cxiKernelIOBufferDesc_t* gblPrevP; #define PAGES_PER_KIBD (64*1024/PAGE_SIZE) /* 64K */ char* maplist[PAGES_PER_KIBD]; }; /* Struct that records the mapping within * the daemon address space. A group of these are allocated * as an array in the shared seg and the memory descriptor * vindex points to the appropriate element. */ struct cxiMemoryMapping_t { char *vaddr; /* daemon address mapping */ #ifdef SSEG_SWIZZLE_PTRS char *kvaddr; #endif int kBytes; /* size of the area in kilobytes */ short vindex; /* index in shared segment mapping array */ }; static inline void InitMemoryMapping(struct cxiMemoryMapping_t *mmP) { mmP->vaddr = NULL; mmP->kBytes = 0; mmP->vindex = -1; #ifdef SSEG_SWIZZLE_PTRS mmP->kvaddr = NULL; #endif } static inline Boolean IsMemoryMappingFree(struct cxiMemoryMapping_t *mmP) { if (mmP->kBytes == 0) return true; return false; } typedef struct cxiMemoryMapping_t cxiMemoryMapping_t; /* Initialization and termination routines. Called at module load and unload, respectively. */ EXTERNC void KibdModuleInit(); EXTERNC void KibdModuleTerm(); /* Create a cxiKernelIOBufferDesc_t object (or list of cxiKernelIOBufferDesc_t objects) describing an I/O buffer in the user address space of the calling process and link it onto the list of all such objects. Pins the user-level buffer. The buffer virtual address must be on a page boundary. The length can be arbitrarily large, but must be a multiple of the page size. Returns 0 if successful, non-zero if unsuccessful. */ EXTERNC int cxiKibdPin(char* vaddr, int len, struct cxiKernelIOBufferDesc_t** kibdPP); /* Remove a cxiKernelIOBufferDesc_t object from the list of all such objects, destroy it and all chained cxiKernelIOBufferDesc_t objects associated with it, and unpin the associated user-level buffer. */ EXTERNC void cxiKibdUnpin(struct cxiKernelIOBufferDesc_t* kibdP); /* Free all cxiKernelIOBufferDesc_t's, and unpin their underlying storage. */ EXTERNC void cxiKibdUnpinAll(); #ifdef MMAP_DIO /* Create a cxiKernelIOBufferDesc_t object for a page in user address space that is already pinned. The page will be mapped into kernel address space. This is used by mmap routines that want to do direct I/O from user page to disk. The cxiKernelIOBufferDesc_t that this routine creates can be passed to cxiKDoIO just like one that was created by cxiKibdPin. */ EXTERNC int cxiKibdPinmm(struct page *pageP, struct cxiKernelIOBufferDesc_t** kibdPP); /* Free a cxiKernelIOBufferDesc_t that was created by cxiKibdPinmm. */ EXTERNC void cxiKibdUnpinmm(struct page *pageP, struct cxiKernelIOBufferDesc_t* kibdP); #endif /* MMAP_DIO */ /* Handle that describes a particular cxiIOBuffer_t that has been attached. On Linux, this is a pointer to a cxiLinuxKernelIOBufferDesc_t. */ struct cxiIOBufferAttachment_t { struct cxiKernelIOBufferDesc_t* kDescP; }; /* Initialize a cxiIOBufferAttachment_t */ static inline void InitBufferAttachment(struct cxiIOBufferAttachment_t* baP) { baP->kDescP = NULL; }; /* Result of making a read-only copy of a portion of an I/O buffer. On Linux, this must record the base address of the copy buffer, if one was required. If data was mapped in place, the cxiContiguousBuffer_t records which page was kmapped. */ struct cxiContiguousBuffer_t { /* Base of storage allocated with vmalloc / kmalloc, or NULL if data is referenced in place. */ char* mallocedBaseP; /* True if storage pointed to be mallocedBaseP was allocated using kmalloc. If false, then vmalloc was used. */ Boolean usedKmalloc; /* Pointer used to remember which page to unmap, or NULL if data was copied to mallocedBaseP by mapContiguousRO. */ void* pageP; }; /* Initialize a cxiContiguousBuffer_t */ static inline void InitContiguousBuffer(struct cxiContiguousBuffer_t* cbP) { cbP->mallocedBaseP = NULL; cbP->usedKmalloc = false; cbP->pageP = NULL; } /* Kernel calls used by cxiK... routines to call the Kibd... routines */ EXTERNC int kxPinKernelIOBuffer(char* vaddr, int len, struct cxiKernelIOBufferDesc_t** pinnedPP); EXTERNC int kxUnpinKernelIOBuffer(struct cxiKernelIOBufferDesc_t* pinnedP); EXTERNC int kxUnpinAllKernelIOBuffers(); /* Methods for manipulating cxiIOBuffer_t's */ /* Return true if the fields describing the IOBuffer are self-consistent */ #define IOBUFFER_IS_CONSISTENT(IOBP) (true) /* Pin the pages belonging to this I/O buffer */ EXTERNC void KPinIOBuffer(struct cxiIOBuffer_t* iobP); /* Unpin the pages belonging to this I/O buffer */ EXTERNC void KUnpinIOBuffer(struct cxiIOBuffer_t* iobP); /* Split the kernel buffer descriptor into two adjacent I/O buffers */ EXTERNC void KSplitIOBuffer(struct cxiIOBuffer_t* iobP, int frontPages, struct cxiIOBuffer_t* rearBufP); /* Merge the kernel buffer descriptors of two adjacent I/O buffers. The I/O buffer p should be destroyed after this call, since its pages will be merged into the buffer *iobP. */ EXTERNC void KMergeIOBuffer(struct cxiIOBuffer_t* iobP, struct cxiIOBuffer_t* p); /* Read or write the given sectors from dev. Data should be placed into the I/O buffer beginning at byte offset bufOffset. Returns EOK on success, negative values on error. All of the data to be transferred will be in the first cxiKernelIOBufferDesc_t. */ EXTERNC int cxiKDoIO(struct cxiKernelIOBufferDesc_t* kibdP, Boolean isWrite, cxiDev_t dev, UInt64 startSector, int nSectors, int sectorSize, int bufOffset); /* On Linux, I/O buffers can be accessed at contiguous virtual addresses from the daemon process, but not from kernel code */ #ifndef _KERNEL #define __CXI_BUFFERS_ARE_CONTIGUOUS #endif /* Routine to set up the disk block size and get disk parameters */ EXTERNC int GetDiskInfoX(cxiDev_t devId, struct cxiDiskInfo_t* diskInfoP); #endif /* _h_cxiIOBuffer_plat */