source: gpfs_3.1_ker2.6.20/lpp/mmfs/samples/perf/irreg.c @ 16

Last change on this file since 16 was 16, checked in by rock, 16 years ago
File size: 14.2 KB
Line 
1/* IBM_PROLOG_BEGIN_TAG                                                   */
2/* This is an automatically generated prolog.                             */
3/*                                                                        */
4/*                                                                        */
5/*                                                                        */
6/* Licensed Materials - Property of IBM                                   */
7/*                                                                        */
8/* (C) COPYRIGHT International Business Machines Corp. 1999,2006          */
9/* All Rights Reserved                                                    */
10/*                                                                        */
11/* US Government Users Restricted Rights - Use, duplication or            */
12/* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.      */
13/*                                                                        */
14/* IBM_PROLOG_END_TAG                                                     */
15
16/* Implementation of PrefetchedIrregularXfer library.  See irreg.h for
17 * interface details.
18 *
19 * Contents:
20 *   pixReset
21 *   pixInit
22 *   pixDeclareAccesses
23 *   pixXfer
24 *   pixTerm
25 *   pixSetTraceLevel
26 *   pixGenerateBlocks
27 *   pixIssueHint
28 */
29
30#ifdef GPFS_LINUX
31/* Use 64 bit version of stat, etc. */
32#define _LARGEFILE_SOURCE
33#define _LARGEFILE64_SOURCE
34#define _FILE_OFFSET_BITS 64
35
36typedef long long offset_t;
37#endif
38
39#ifdef GPFS_AIX
40/* Use 64 bit version of stat, etc. */
41#define _LARGE_FILES
42#endif
43
44#include <sys/types.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <fcntl.h>
48#include <sys/stat.h>
49#include <assert.h>
50#include <errno.h>
51#include <gpfs_fcntl.h>   /* from /usr/lpp/mmfs/include */
52#include <unistd.h>       /* ABSOLUTELY needed for correct lseek64 prototype */
53#include "irreg.h"
54
55
56/* Forward declarations of internal routines */
57void pixGenerateBlocks(struct PrefetchedIrregularXfer * pixP);
58int pixIssueHint(struct PrefetchedIrregularXfer * pixP);
59
60
61/* Internal routine to clear a PrefetchedIrregularXfer struct */
62static void pixReset(struct PrefetchedIrregularXfer * pixP)
63{
64  pixP->fHandle = -1;
65  pixP->blockSize = -1;
66  pixP->isWrite = 0;
67  pixP->accDescP = NULL;
68  pixP->nAccesses = -1;
69  pixP->nextAccessIndex = 0;
70  pixP->nextPrefetchIndex = 0;
71  pixP->oldestAcceptedBlockIndex = 0;
72  pixP->nAcceptedBlockIndices = 0;
73  pixP->nReleaseBlockIndices = 0;
74  pixP->firstPendingBlockIndex = 0;
75  pixP->nPendingBlockIndices = 0;
76  pixP->netBlocksPrefetched = 0;
77}
78
79
80/* Initialize (must be first call) */
81void pixInit(struct PrefetchedIrregularXfer * pixP)
82{
83  pixReset(pixP);
84  pixP->verbosity = 0;
85}
86
87
88/* Declare the sequence of accesses that will be made by subsequent calls
89   to the pixXfer method.  The storage passed in will not be modified by
90   this class, but must not be deallocated by the caller until all pixXfer
91   calls have completed.  Returns an errno value on failure, or 0 if the
92   call is successful.  No access in accDescP may be longer than
93   MAX_ACCESS_BLOCKS times the block size of this file, or else pixXfer will
94   abort the access sequence and return an error. */
95int pixDeclareAccesses(struct PrefetchedIrregularXfer * pixP, int fHandle,
96                       int isWrite, int nAccesses,
97                       const struct pixAccDesc * accDescP)
98{
99  int rc;
100  struct stat statBuf;
101
102  /* Clear any prior prefetches */
103  pixTerm(pixP);
104
105  /* Compute block size of this file */
106  rc = fstat(fHandle, &statBuf);
107  if (rc != 0)
108    return errno;
109
110  /* Set up state for pixXfer() */
111  pixP->fHandle = fHandle;
112  pixP->blockSize = statBuf.st_blksize;
113  pixP->isWrite = isWrite;
114  pixP->accDescP = accDescP;
115  pixP->nAccesses = nAccesses;
116  /* Other fields already set by pixTerm() */
117
118  /* Generate and execute initial prefetches */
119  pixGenerateBlocks(pixP);
120  rc = pixIssueHint(pixP);
121  return rc;
122}
123
124
125/* Perform the next read or write access defined by an earlier call to
126   pixDeclareAccesses.  The buffer is assumed to be large enough to contain
127   the next access from the previously declared list.  Returns an errno
128   value or zero as the function result.  If zero is returned, stores the
129   number of bytes transferred in *nBytesP. */
130int pixXfer(struct PrefetchedIrregularXfer * pixP, caddr_t bufP, int* nBytesP)
131{
132  int len;
133  int nBytesXferred;
134  offset_t desiredOffset;
135  offset_t actualOffset;
136  int rc;
137  offset_t blockNum;
138  int blockOffset;
139  int blockLen;
140  int nBlocks;
141  int blockSlot;
142  int i;
143
144  /* If pixDeclareAccesses has not been called, or if abort has been called,
145     return EINVAL */
146  if (pixP->fHandle == -1)
147    return EINVAL;
148
149  /* If all declared accesses have been completed, it is an error to try
150     to do any more */
151  if (pixP->nextAccessIndex >= pixP->nAccesses)
152  {
153    pixTerm(pixP);
154    return EINVAL;
155  }
156
157  /* If the next access is too big, abort and return an error */
158  len = pixP->accDescP[pixP->nextAccessIndex].len;
159  if (len > MAX_ACCESS_BLOCKS*pixP->blockSize)
160  {
161    return EFBIG;
162  }
163
164  /* Seek to the proper position in the file and read or write the
165     amount of data that was requested */
166  desiredOffset = pixP->accDescP[pixP->nextAccessIndex].off;
167  if (pixP->verbosity >= 1)
168    printf("XFER at %lld(0x%llX) len %d\n",
169           desiredOffset, desiredOffset, len);
170
171  actualOffset = lseek64(pixP->fHandle, desiredOffset, SEEK_SET);
172  if (actualOffset != desiredOffset)
173  {
174    pixTerm(pixP);
175    return EINVAL;
176  }
177  if (pixP->isWrite)
178    nBytesXferred = write(pixP->fHandle, bufP, len);
179  else
180    nBytesXferred = read(pixP->fHandle, bufP, len);
181
182  /* If an error occurred, abort and return the errno value */
183  if (nBytesXferred == -1)
184  {
185    rc = errno;
186    pixTerm(pixP);
187    return rc;
188  }
189  *nBytesP = nBytesXferred;
190
191  /* Mark the blocks spanned by the request just done as releasable.  They
192     must appear in the blockList table as the oldest blocks prefetched
193     and not yet marked releasable. */
194  blockNum = desiredOffset / pixP->blockSize;
195  blockOffset = desiredOffset % pixP->blockSize;
196  nBlocks = (desiredOffset + len - 1)/pixP->blockSize - blockNum + 1;
197  blockSlot = (pixP->oldestAcceptedBlockIndex + pixP->nReleaseBlockIndices) %
198              MAX_BLOCKS;
199  for (i=0; i<nBlocks; i++)
200  {
201    assert(pixP->blockList[blockSlot].blockNumber == blockNum);
202    pixP->nReleaseBlockIndices += 1;
203    blockSlot = (blockSlot+1) % MAX_BLOCKS;
204    blockNum += 1;
205  }
206
207  /* Generate and execute additional prefetches and release blocks that
208     have already been accessed */
209  pixP->nextAccessIndex += 1;
210  pixGenerateBlocks(pixP);
211  rc = pixIssueHint(pixP);
212  return rc;
213}
214
215
216/* Abort pending prefetches and release all prefetched blocks.  Returns
217   the state of the PrefetchedIrregularXfer object to what it was just
218   after calling pixInit(), except that verbosity is not reset. */
219void pixTerm(struct PrefetchedIrregularXfer * pixP)
220{
221  struct
222  {
223    gpfsFcntlHeader_t hdr;
224    gpfsCancelHints_t cancel;
225  } cancelArg;
226
227  /* If pixDeclareAccesses has been called, there may be outstanding hints
228     that need to be cancelled */
229  if (pixP->fHandle != -1)
230  {
231    cancelArg.hdr.totalLength = sizeof(cancelArg);
232    cancelArg.hdr.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
233    cancelArg.hdr.fcntlReserved = 0;
234    cancelArg.cancel.structLen = sizeof(gpfsCancelHints_t);
235    cancelArg.cancel.structType = GPFS_CANCEL_HINTS;
236    (void)gpfs_fcntl(pixP->fHandle, &cancelArg);
237  }
238
239  /* Clear PrefetchedIrregularXfer object */
240  pixReset(pixP);
241}
242
243
244/* Set trace level.  A value of 0 does no tracing, while higher values
245   trace more stuff. */
246void pixSetTraceLevel(struct PrefetchedIrregularXfer * pixP, int level)
247{
248  pixP->verbosity = level;
249}
250
251
252/* Internal routines */
253
254
255/* Process access descriptions and add entries to blockList for as many
256   complete accesses as possible */
257void pixGenerateBlocks(struct PrefetchedIrregularXfer * pixP)
258{
259  offset_t off;
260  int len;
261  offset_t blockNum;
262  int blockOffset;
263  int blockLen;
264  int nBlocks;
265  int nBlocksFree;
266  int i;
267  int blockSlot;
268
269  while (pixP->nextPrefetchIndex < pixP->nAccesses)
270  {
271    /* Compute how many blocks are touched by the next access in the
272       list */
273    off = pixP->accDescP[pixP->nextPrefetchIndex].off;
274    len = pixP->accDescP[pixP->nextPrefetchIndex].len;
275    blockNum = off / pixP->blockSize;
276    blockOffset = off % pixP->blockSize;
277    nBlocks = (off + len - 1)/pixP->blockSize - blockNum + 1;
278
279    /* If there is not enough room in blockList for all the blocks of this
280       access, give up */
281    /* ?? Notice that this code will not work properly if a really big
282       request (more blocks than there are in blockList) appears in
283       accDescP.  That is the reason for the restriction that no access
284       be longer than MAX_ACCESS_BLOCKS blocks long. */
285    nBlocksFree = MAX_BLOCKS - pixP->nAcceptedBlockIndices -
286                  pixP->nPendingBlockIndices;
287    if (nBlocks > nBlocksFree)
288      break;
289
290    /* Add descriptions of each of the blocks touched by the next access
291       to blockList */
292    blockSlot = (pixP->firstPendingBlockIndex + pixP->nPendingBlockIndices) %
293                MAX_BLOCKS;
294    for (i=0; i<nBlocks; i++)
295    {
296      pixP->blockList[blockSlot].blockNumber = blockNum;
297      pixP->blockList[blockSlot].blkOffset = blockOffset;
298      if (len < pixP->blockSize-blockOffset)
299        blockLen = len;
300      else
301        blockLen = pixP->blockSize - blockOffset;
302      pixP->blockList[blockSlot].blkLen = blockLen;
303      blockSlot = (blockSlot+1) % MAX_BLOCKS;
304      pixP->nPendingBlockIndices += 1;
305      blockNum += 1;
306      blockOffset = 0;
307      len -= blockLen;
308    }
309    pixP->nextPrefetchIndex += 1;
310  }
311}
312
313
314/* Issue as many release/prefetch multiple access range hints as possible
315   from the list of pending block indicies.  Return errno if the hint call
316   fails, otherwise return 0. */
317int pixIssueHint(struct PrefetchedIrregularXfer * pixP)
318{
319  struct
320  {
321    gpfsFcntlHeader_t hdr;
322    gpfsMultipleAccessRange_t marh;
323  } accHint;
324  int i;
325  int blockSlot;
326  int tempNPendingBlockIndices;
327  int nRequestedPrefetches;
328  int nRequestedReleases;
329  int rc;
330  int nActualPrefetches;
331
332  /* Keep making hint calls until not all of the prefetch hints are
333     accepted and there are no more blocks to release */
334  do
335  {
336    /* Add prefetch blocks to hint */
337    blockSlot = pixP->firstPendingBlockIndex;
338    tempNPendingBlockIndices = pixP->nPendingBlockIndices;
339    nRequestedPrefetches = 0;
340    for (i=0; i<GPFS_MAX_RANGE_COUNT; i++)
341    {
342      if (tempNPendingBlockIndices <= 0)
343        break;
344      accHint.marh.accRangeArray[i].blockNumber =
345        pixP->blockList[blockSlot].blockNumber;
346      accHint.marh.accRangeArray[i].start =
347        pixP->blockList[blockSlot].blkOffset;
348      accHint.marh.accRangeArray[i].length =
349        pixP->blockList[blockSlot].blkLen;
350      accHint.marh.accRangeArray[i].isWrite =
351        pixP->isWrite;
352      nRequestedPrefetches += 1;
353      blockSlot = (blockSlot+1) % MAX_BLOCKS;
354      tempNPendingBlockIndices -= 1;
355    }
356    accHint.marh.accRangeCnt = nRequestedPrefetches;
357
358    /* Add list of blocks to be released to the hint */
359    nRequestedReleases = 0;
360    for (i=0; i<GPFS_MAX_RANGE_COUNT; i++)
361    {
362      if (pixP->nReleaseBlockIndices <= 0)
363        break;
364      accHint.marh.relRangeArray[i].blockNumber =
365        pixP->blockList[pixP->oldestAcceptedBlockIndex].blockNumber;
366      accHint.marh.relRangeArray[i].start =
367        pixP->blockList[pixP->oldestAcceptedBlockIndex].blkOffset;
368      accHint.marh.relRangeArray[i].length =
369        pixP->blockList[pixP->oldestAcceptedBlockIndex].blkLen;
370      accHint.marh.relRangeArray[i].isWrite =
371        pixP->isWrite;
372      nRequestedReleases += 1;
373      pixP->oldestAcceptedBlockIndex = (pixP->oldestAcceptedBlockIndex+1) %
374                                       MAX_BLOCKS;
375      pixP->nReleaseBlockIndices -= 1;
376      pixP->nAcceptedBlockIndices -= 1;
377    }
378    accHint.marh.relRangeCnt = nRequestedReleases;
379
380    /* If the hint is empty, return without doing anything */
381    if (nRequestedPrefetches == 0  &&  nRequestedReleases == 0)
382      return 0;
383
384    /* Finish filling in the hint, then issue it to GPFS */
385    accHint.hdr.totalLength = sizeof(accHint);
386    accHint.hdr.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
387    accHint.hdr.fcntlReserved = 0;
388    accHint.marh.structLen = sizeof(accHint.marh);
389    accHint.marh.structType = GPFS_MULTIPLE_ACCESS_RANGE;
390
391    if (pixP->verbosity >= 2)
392    {
393      printf("Requesting %d blocks:\n", accHint.marh.accRangeCnt);
394      for (i=0; i<accHint.marh.accRangeCnt; i++)
395        printf("  block %lld(0x%llX) offset %d len %d\n",
396               accHint.marh.accRangeArray[i].blockNumber,
397               accHint.marh.accRangeArray[i].blockNumber,
398               accHint.marh.accRangeArray[i].start,
399               accHint.marh.accRangeArray[i].length);
400      printf("Releasing %d blocks:\n", accHint.marh.relRangeCnt);
401      for (i=0; i<accHint.marh.relRangeCnt; i++)
402        printf("  block %lld(0x%llX) offset %d len %d\n",
403               accHint.marh.relRangeArray[i].blockNumber,
404               accHint.marh.relRangeArray[i].blockNumber,
405               accHint.marh.relRangeArray[i].start,
406               accHint.marh.relRangeArray[i].length);
407    }
408
409    rc = gpfs_fcntl(pixP->fHandle, &accHint);
410
411    /* If the hint was unsuccessful, clean up and return errno */
412    if (rc != 0)
413    {
414      rc = errno;
415      pixTerm(pixP);
416      return rc;
417    }
418
419    pixP->netBlocksPrefetched += accHint.marh.accRangeCnt - nRequestedReleases;
420    if (pixP->verbosity >= 1)
421      printf("Released %d blocks.  Prefetched %d of %d requested.  "
422               "Net prefetches now %d.\n",
423             nRequestedReleases, accHint.marh.accRangeCnt,
424             nRequestedPrefetches, pixP->netBlocksPrefetched);
425
426    /* Change state of pending block descriptions to accepted according to
427       how many of the prefetches were started by GPFS.  Descriptions of
428       blocks to be released were already updated when forming the hint. */
429    nActualPrefetches = accHint.marh.accRangeCnt;
430    pixP->firstPendingBlockIndex =
431      (pixP->firstPendingBlockIndex+nActualPrefetches) % MAX_BLOCKS;
432    pixP->nPendingBlockIndices -= nActualPrefetches;
433    pixP->nAcceptedBlockIndices += nActualPrefetches;
434
435  } while ((nActualPrefetches == nRequestedPrefetches  &&
436            pixP->nPendingBlockIndices > 0)  ||
437           pixP->nReleaseBlockIndices > 0);
438  return 0;
439}
Note: See TracBrowser for help on using the repository browser.