/* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ /* */ /* */ /* Licensed Materials - Property of IBM */ /* */ /* (C) COPYRIGHT International Business Machines Corp. 1999,2006 */ /* All Rights Reserved */ /* */ /* US Government Users Restricted Rights - Use, duplication or */ /* disclosure restricted by GSA ADP Schedule Contract with IBM Corp. */ /* */ /* IBM_PROLOG_END_TAG */ /* Implementation of PrefetchedIrregularXfer library. See irreg.h for * interface details. * * Contents: * pixReset * pixInit * pixDeclareAccesses * pixXfer * pixTerm * pixSetTraceLevel * pixGenerateBlocks * pixIssueHint */ #ifdef GPFS_LINUX /* Use 64 bit version of stat, etc. */ #define _LARGEFILE_SOURCE #define _LARGEFILE64_SOURCE #define _FILE_OFFSET_BITS 64 typedef long long offset_t; #endif #ifdef GPFS_AIX /* Use 64 bit version of stat, etc. */ #define _LARGE_FILES #endif #include #include #include #include #include #include #include #include /* from /usr/lpp/mmfs/include */ #include /* ABSOLUTELY needed for correct lseek64 prototype */ #include "irreg.h" /* Forward declarations of internal routines */ void pixGenerateBlocks(struct PrefetchedIrregularXfer * pixP); int pixIssueHint(struct PrefetchedIrregularXfer * pixP); /* Internal routine to clear a PrefetchedIrregularXfer struct */ static void pixReset(struct PrefetchedIrregularXfer * pixP) { pixP->fHandle = -1; pixP->blockSize = -1; pixP->isWrite = 0; pixP->accDescP = NULL; pixP->nAccesses = -1; pixP->nextAccessIndex = 0; pixP->nextPrefetchIndex = 0; pixP->oldestAcceptedBlockIndex = 0; pixP->nAcceptedBlockIndices = 0; pixP->nReleaseBlockIndices = 0; pixP->firstPendingBlockIndex = 0; pixP->nPendingBlockIndices = 0; pixP->netBlocksPrefetched = 0; } /* Initialize (must be first call) */ void pixInit(struct PrefetchedIrregularXfer * pixP) { pixReset(pixP); pixP->verbosity = 0; } /* Declare the sequence of accesses that will be made by subsequent calls to the pixXfer method. The storage passed in will not be modified by this class, but must not be deallocated by the caller until all pixXfer calls have completed. Returns an errno value on failure, or 0 if the call is successful. No access in accDescP may be longer than MAX_ACCESS_BLOCKS times the block size of this file, or else pixXfer will abort the access sequence and return an error. */ int pixDeclareAccesses(struct PrefetchedIrregularXfer * pixP, int fHandle, int isWrite, int nAccesses, const struct pixAccDesc * accDescP) { int rc; struct stat statBuf; /* Clear any prior prefetches */ pixTerm(pixP); /* Compute block size of this file */ rc = fstat(fHandle, &statBuf); if (rc != 0) return errno; /* Set up state for pixXfer() */ pixP->fHandle = fHandle; pixP->blockSize = statBuf.st_blksize; pixP->isWrite = isWrite; pixP->accDescP = accDescP; pixP->nAccesses = nAccesses; /* Other fields already set by pixTerm() */ /* Generate and execute initial prefetches */ pixGenerateBlocks(pixP); rc = pixIssueHint(pixP); return rc; } /* Perform the next read or write access defined by an earlier call to pixDeclareAccesses. The buffer is assumed to be large enough to contain the next access from the previously declared list. Returns an errno value or zero as the function result. If zero is returned, stores the number of bytes transferred in *nBytesP. */ int pixXfer(struct PrefetchedIrregularXfer * pixP, caddr_t bufP, int* nBytesP) { int len; int nBytesXferred; offset_t desiredOffset; offset_t actualOffset; int rc; offset_t blockNum; int blockOffset; int blockLen; int nBlocks; int blockSlot; int i; /* If pixDeclareAccesses has not been called, or if abort has been called, return EINVAL */ if (pixP->fHandle == -1) return EINVAL; /* If all declared accesses have been completed, it is an error to try to do any more */ if (pixP->nextAccessIndex >= pixP->nAccesses) { pixTerm(pixP); return EINVAL; } /* If the next access is too big, abort and return an error */ len = pixP->accDescP[pixP->nextAccessIndex].len; if (len > MAX_ACCESS_BLOCKS*pixP->blockSize) { return EFBIG; } /* Seek to the proper position in the file and read or write the amount of data that was requested */ desiredOffset = pixP->accDescP[pixP->nextAccessIndex].off; if (pixP->verbosity >= 1) printf("XFER at %lld(0x%llX) len %d\n", desiredOffset, desiredOffset, len); actualOffset = lseek64(pixP->fHandle, desiredOffset, SEEK_SET); if (actualOffset != desiredOffset) { pixTerm(pixP); return EINVAL; } if (pixP->isWrite) nBytesXferred = write(pixP->fHandle, bufP, len); else nBytesXferred = read(pixP->fHandle, bufP, len); /* If an error occurred, abort and return the errno value */ if (nBytesXferred == -1) { rc = errno; pixTerm(pixP); return rc; } *nBytesP = nBytesXferred; /* Mark the blocks spanned by the request just done as releasable. They must appear in the blockList table as the oldest blocks prefetched and not yet marked releasable. */ blockNum = desiredOffset / pixP->blockSize; blockOffset = desiredOffset % pixP->blockSize; nBlocks = (desiredOffset + len - 1)/pixP->blockSize - blockNum + 1; blockSlot = (pixP->oldestAcceptedBlockIndex + pixP->nReleaseBlockIndices) % MAX_BLOCKS; for (i=0; iblockList[blockSlot].blockNumber == blockNum); pixP->nReleaseBlockIndices += 1; blockSlot = (blockSlot+1) % MAX_BLOCKS; blockNum += 1; } /* Generate and execute additional prefetches and release blocks that have already been accessed */ pixP->nextAccessIndex += 1; pixGenerateBlocks(pixP); rc = pixIssueHint(pixP); return rc; } /* Abort pending prefetches and release all prefetched blocks. Returns the state of the PrefetchedIrregularXfer object to what it was just after calling pixInit(), except that verbosity is not reset. */ void pixTerm(struct PrefetchedIrregularXfer * pixP) { struct { gpfsFcntlHeader_t hdr; gpfsCancelHints_t cancel; } cancelArg; /* If pixDeclareAccesses has been called, there may be outstanding hints that need to be cancelled */ if (pixP->fHandle != -1) { cancelArg.hdr.totalLength = sizeof(cancelArg); cancelArg.hdr.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION; cancelArg.hdr.fcntlReserved = 0; cancelArg.cancel.structLen = sizeof(gpfsCancelHints_t); cancelArg.cancel.structType = GPFS_CANCEL_HINTS; (void)gpfs_fcntl(pixP->fHandle, &cancelArg); } /* Clear PrefetchedIrregularXfer object */ pixReset(pixP); } /* Set trace level. A value of 0 does no tracing, while higher values trace more stuff. */ void pixSetTraceLevel(struct PrefetchedIrregularXfer * pixP, int level) { pixP->verbosity = level; } /* Internal routines */ /* Process access descriptions and add entries to blockList for as many complete accesses as possible */ void pixGenerateBlocks(struct PrefetchedIrregularXfer * pixP) { offset_t off; int len; offset_t blockNum; int blockOffset; int blockLen; int nBlocks; int nBlocksFree; int i; int blockSlot; while (pixP->nextPrefetchIndex < pixP->nAccesses) { /* Compute how many blocks are touched by the next access in the list */ off = pixP->accDescP[pixP->nextPrefetchIndex].off; len = pixP->accDescP[pixP->nextPrefetchIndex].len; blockNum = off / pixP->blockSize; blockOffset = off % pixP->blockSize; nBlocks = (off + len - 1)/pixP->blockSize - blockNum + 1; /* If there is not enough room in blockList for all the blocks of this access, give up */ /* ?? Notice that this code will not work properly if a really big request (more blocks than there are in blockList) appears in accDescP. That is the reason for the restriction that no access be longer than MAX_ACCESS_BLOCKS blocks long. */ nBlocksFree = MAX_BLOCKS - pixP->nAcceptedBlockIndices - pixP->nPendingBlockIndices; if (nBlocks > nBlocksFree) break; /* Add descriptions of each of the blocks touched by the next access to blockList */ blockSlot = (pixP->firstPendingBlockIndex + pixP->nPendingBlockIndices) % MAX_BLOCKS; for (i=0; iblockList[blockSlot].blockNumber = blockNum; pixP->blockList[blockSlot].blkOffset = blockOffset; if (len < pixP->blockSize-blockOffset) blockLen = len; else blockLen = pixP->blockSize - blockOffset; pixP->blockList[blockSlot].blkLen = blockLen; blockSlot = (blockSlot+1) % MAX_BLOCKS; pixP->nPendingBlockIndices += 1; blockNum += 1; blockOffset = 0; len -= blockLen; } pixP->nextPrefetchIndex += 1; } } /* Issue as many release/prefetch multiple access range hints as possible from the list of pending block indicies. Return errno if the hint call fails, otherwise return 0. */ int pixIssueHint(struct PrefetchedIrregularXfer * pixP) { struct { gpfsFcntlHeader_t hdr; gpfsMultipleAccessRange_t marh; } accHint; int i; int blockSlot; int tempNPendingBlockIndices; int nRequestedPrefetches; int nRequestedReleases; int rc; int nActualPrefetches; /* Keep making hint calls until not all of the prefetch hints are accepted and there are no more blocks to release */ do { /* Add prefetch blocks to hint */ blockSlot = pixP->firstPendingBlockIndex; tempNPendingBlockIndices = pixP->nPendingBlockIndices; nRequestedPrefetches = 0; for (i=0; iblockList[blockSlot].blockNumber; accHint.marh.accRangeArray[i].start = pixP->blockList[blockSlot].blkOffset; accHint.marh.accRangeArray[i].length = pixP->blockList[blockSlot].blkLen; accHint.marh.accRangeArray[i].isWrite = pixP->isWrite; nRequestedPrefetches += 1; blockSlot = (blockSlot+1) % MAX_BLOCKS; tempNPendingBlockIndices -= 1; } accHint.marh.accRangeCnt = nRequestedPrefetches; /* Add list of blocks to be released to the hint */ nRequestedReleases = 0; for (i=0; inReleaseBlockIndices <= 0) break; accHint.marh.relRangeArray[i].blockNumber = pixP->blockList[pixP->oldestAcceptedBlockIndex].blockNumber; accHint.marh.relRangeArray[i].start = pixP->blockList[pixP->oldestAcceptedBlockIndex].blkOffset; accHint.marh.relRangeArray[i].length = pixP->blockList[pixP->oldestAcceptedBlockIndex].blkLen; accHint.marh.relRangeArray[i].isWrite = pixP->isWrite; nRequestedReleases += 1; pixP->oldestAcceptedBlockIndex = (pixP->oldestAcceptedBlockIndex+1) % MAX_BLOCKS; pixP->nReleaseBlockIndices -= 1; pixP->nAcceptedBlockIndices -= 1; } accHint.marh.relRangeCnt = nRequestedReleases; /* If the hint is empty, return without doing anything */ if (nRequestedPrefetches == 0 && nRequestedReleases == 0) return 0; /* Finish filling in the hint, then issue it to GPFS */ accHint.hdr.totalLength = sizeof(accHint); accHint.hdr.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION; accHint.hdr.fcntlReserved = 0; accHint.marh.structLen = sizeof(accHint.marh); accHint.marh.structType = GPFS_MULTIPLE_ACCESS_RANGE; if (pixP->verbosity >= 2) { printf("Requesting %d blocks:\n", accHint.marh.accRangeCnt); for (i=0; ifHandle, &accHint); /* If the hint was unsuccessful, clean up and return errno */ if (rc != 0) { rc = errno; pixTerm(pixP); return rc; } pixP->netBlocksPrefetched += accHint.marh.accRangeCnt - nRequestedReleases; if (pixP->verbosity >= 1) printf("Released %d blocks. Prefetched %d of %d requested. " "Net prefetches now %d.\n", nRequestedReleases, accHint.marh.accRangeCnt, nRequestedPrefetches, pixP->netBlocksPrefetched); /* Change state of pending block descriptions to accepted according to how many of the prefetches were started by GPFS. Descriptions of blocks to be released were already updated when forming the hint. */ nActualPrefetches = accHint.marh.accRangeCnt; pixP->firstPendingBlockIndex = (pixP->firstPendingBlockIndex+nActualPrefetches) % MAX_BLOCKS; pixP->nPendingBlockIndices -= nActualPrefetches; pixP->nAcceptedBlockIndices += nActualPrefetches; } while ((nActualPrefetches == nRequestedPrefetches && pixP->nPendingBlockIndices > 0) || pixP->nReleaseBlockIndices > 0); return 0; }