[16] | 1 | /* IBM_PROLOG_BEGIN_TAG */ |
---|
| 2 | /* This is an automatically generated prolog. */ |
---|
| 3 | /* */ |
---|
| 4 | /* */ |
---|
| 5 | /* */ |
---|
| 6 | /* Licensed Materials - Property of IBM */ |
---|
| 7 | /* */ |
---|
| 8 | /* (C) COPYRIGHT International Business Machines Corp. 1999,2006 */ |
---|
| 9 | /* All Rights Reserved */ |
---|
| 10 | /* */ |
---|
| 11 | /* US Government Users Restricted Rights - Use, duplication or */ |
---|
| 12 | /* disclosure restricted by GSA ADP Schedule Contract with IBM Corp. */ |
---|
| 13 | /* */ |
---|
| 14 | /* IBM_PROLOG_END_TAG */ |
---|
| 15 | |
---|
| 16 | /* Multinode GPFS performance test program. Measures aggregate data |
---|
| 17 | transfer rate across several nodes for random, strided, or sequential |
---|
| 18 | access patterns. */ |
---|
| 19 | |
---|
| 20 | #ifdef GPFS_LINUX |
---|
| 21 | /* Use 64 bit version of stat, etc. */ |
---|
| 22 | #define _LARGEFILE_SOURCE |
---|
| 23 | #define _LARGEFILE64_SOURCE |
---|
| 24 | #define _FILE_OFFSET_BITS 64 |
---|
| 25 | |
---|
| 26 | #define PAGE_SIZE 4096 |
---|
| 27 | typedef long long offset_t; |
---|
| 28 | typedef struct timebasestruct |
---|
| 29 | { |
---|
| 30 | unsigned int tb_high; /* high 32 bits, or seconds */ |
---|
| 31 | unsigned int tb_low; /* low 32 bits, or nanoseconds */ |
---|
| 32 | } timebasestruct_t; |
---|
| 33 | |
---|
| 34 | void GetTOD(timebasestruct_t *tP); |
---|
| 35 | #endif |
---|
| 36 | |
---|
| 37 | #ifdef GPFS_AIX |
---|
| 38 | /* Use 64 bit version of stat, etc. */ |
---|
| 39 | #define _LARGE_FILES |
---|
| 40 | #define _AIX_PTHREADS_D7 |
---|
| 41 | |
---|
| 42 | #define GetTOD(tP) read_real_time((tP),TIMEBASE_SZ) |
---|
| 43 | #endif |
---|
| 44 | |
---|
| 45 | #include <pthread.h> |
---|
| 46 | #include <stdio.h> |
---|
| 47 | #include <fcntl.h> |
---|
| 48 | #include <errno.h> |
---|
| 49 | #include <sys/time.h> |
---|
| 50 | #include <sys/stat.h> |
---|
| 51 | #include <sys/ipc.h> |
---|
| 52 | #include <sys/shm.h> |
---|
| 53 | #include <assert.h> |
---|
| 54 | #include <time.h> |
---|
| 55 | #include <stdlib.h> |
---|
| 56 | #include <unistd.h> |
---|
| 57 | #include <ctype.h> |
---|
| 58 | #include <string.h> |
---|
| 59 | #include <gpfs_fcntl.h> |
---|
| 60 | #include "irreg.h" |
---|
| 61 | #ifndef NO_AIO |
---|
| 62 | #ifdef GPFS_LINUX |
---|
| 63 | #define __USE_GNU |
---|
| 64 | #endif |
---|
| 65 | #ifdef GPFS_AIX |
---|
| 66 | #define _AIO_AIX_SOURCE |
---|
| 67 | #endif |
---|
| 68 | #include <aio.h> |
---|
| 69 | #endif |
---|
| 70 | |
---|
| 71 | #ifdef MULTI_NODE |
---|
| 72 | #include <mpi.h> |
---|
| 73 | #endif |
---|
| 74 | |
---|
| 75 | #ifndef O_DIRECT |
---|
| 76 | #ifdef GPFS_LINUX |
---|
| 77 | #ifdef GPFS_ARCH_PPC64 |
---|
| 78 | #define O_DIRECT 0400000 |
---|
| 79 | #else |
---|
| 80 | #define O_DIRECT 040000 |
---|
| 81 | #endif |
---|
| 82 | #endif |
---|
| 83 | #ifdef GPFS_AIX |
---|
| 84 | #define O_DIRECT 0x08000000 |
---|
| 85 | #endif |
---|
| 86 | #endif |
---|
| 87 | |
---|
| 88 | #ifdef GPFS_LINUX |
---|
| 89 | #define CONST const |
---|
| 90 | #define RANDRET_T int32_t |
---|
| 91 | #define RANDOM_R(_r,_s) random_r(_s,_r); |
---|
| 92 | #define INITSTATE_R(_sd,_st,_sz,_rp,_rrs) initstate_r(_sd,_st,_sz,_rrs) |
---|
| 93 | #define SETSTATE_R(_rs,_rp,_rrs) setstate_r(_rs,_rrs) |
---|
| 94 | |
---|
| 95 | #define AIO_READ(_h,_p) aio_read(_p) |
---|
| 96 | #define AIO_WRITE(_h,_p) aio_write(_p) |
---|
| 97 | #define AIO_SUSPEND(_n,_p) aio_suspend(_p, _n, NULL) |
---|
| 98 | #define AIO_RETURN(_p) aio_return(&(_p)) |
---|
| 99 | #define AIO_ERROR(_p) aio_error(&(_p)) |
---|
| 100 | struct aioinit st_aioinit; |
---|
| 101 | #endif |
---|
| 102 | #ifdef GPFS_AIX |
---|
| 103 | #define CONST |
---|
| 104 | #define RANDRET_T long |
---|
| 105 | #define RANDOM_R(_r,_s) random_r(_r,_s); |
---|
| 106 | #define INITSTATE_R(_sd,_st,_sz,_rp,_rrs) initstate_r(_sd,_st,_sz,_rp,_rrs) |
---|
| 107 | #define SETSTATE_R(_rs,_rp,_rrs) setstate_r(_rs,_rp,_rrs) |
---|
| 108 | |
---|
| 109 | #define AIO_READ(_h,_p) aio_read(_h,_p) |
---|
| 110 | #define AIO_WRITE(_h,_p) aio_write(_h,_p) |
---|
| 111 | #define AIO_SUSPEND(_n,_p) aio_suspend(_n,_p) |
---|
| 112 | #define AIO_RETURN(_p) aio_return((_p).aio_handle) |
---|
| 113 | #define AIO_ERROR(_p) aio_error((_p).aio_handle) |
---|
| 114 | #endif |
---|
| 115 | |
---|
| 116 | /* Useful data types */ |
---|
| 117 | typedef unsigned int Boolean; |
---|
| 118 | #define true 1 |
---|
| 119 | #define false 0 |
---|
| 120 | typedef long long Int64; |
---|
| 121 | |
---|
| 122 | /* Process rank within job */ |
---|
| 123 | int ProcNum = -1; |
---|
| 124 | |
---|
| 125 | /* Total number of processes in job */ |
---|
| 126 | int NProcs = -1; |
---|
| 127 | |
---|
| 128 | /* Input parameters specified on command line */ |
---|
| 129 | enum { NoTest, CreateTest, WriteTest, ReadTest, UncacheTest } TestType = NoTest; |
---|
| 130 | enum |
---|
| 131 | { |
---|
| 132 | NoPattern, RandPattern, RandPatternWithHint, StridedPattern, SeqPattern |
---|
| 133 | } TestPattern = NoPattern; |
---|
| 134 | Boolean LabelledOutput = true; |
---|
| 135 | int RecordSize = -1; |
---|
| 136 | Int64 BytesToTransfer = -1; |
---|
| 137 | Int64 Stride = 0; |
---|
| 138 | int NThreadsPerProcess = 1; |
---|
| 139 | Boolean DoInvalidate = true; |
---|
| 140 | Boolean UseDataShipping = false; |
---|
| 141 | Boolean UseDirectIO = false; |
---|
| 142 | Boolean UseSharedMem = false; |
---|
| 143 | Boolean UseOsync = false; |
---|
| 144 | Boolean DoFsync = false; |
---|
| 145 | Boolean CycleData = true; |
---|
| 146 | Boolean TaskTimes = false; |
---|
| 147 | char* FileNameP = NULL; |
---|
| 148 | int Verbosity = 0; |
---|
| 149 | int DoFreeAll = false; |
---|
| 150 | Boolean UseAsyncIO = false; |
---|
| 151 | int AIODepth = 0; |
---|
| 152 | |
---|
| 153 | /* Maximum number of threads allowed */ |
---|
| 154 | #define MAX_THREADS 64 |
---|
| 155 | |
---|
| 156 | /* Values derived from input parameters */ |
---|
| 157 | Int64 RecordsInFile = -1; |
---|
| 158 | Int64 RecordsToTransfer = -1; |
---|
| 159 | |
---|
| 160 | /* Parameter passed to worker threads */ |
---|
| 161 | struct ThreadParm |
---|
| 162 | { |
---|
| 163 | /* For sequential and strided access, define the range of record |
---|
| 164 | numbers that will be accessed by this thread. The first record to |
---|
| 165 | be accessed is given by startRecordNum. If the range from |
---|
| 166 | startRecordNum to maxRecordNum is smaller than the number of |
---|
| 167 | records to be processed by this thread, the thread will cycle |
---|
| 168 | through the set of records from restartRecordNum to maxRecordNum as |
---|
| 169 | many times as is necessary to process the required number of |
---|
| 170 | records. Not used for random access patterns. */ |
---|
| 171 | Int64 startRecordNum; |
---|
| 172 | Int64 restartRecordNum; |
---|
| 173 | Int64 maxRecordNum; |
---|
| 174 | |
---|
| 175 | /* Number of records to be processed by this thread */ |
---|
| 176 | Int64 nRecords; |
---|
| 177 | |
---|
| 178 | /* Buffer address (possibly not page-aligned) */ |
---|
| 179 | char *origbufP; |
---|
| 180 | |
---|
| 181 | /* Page-aligned buffer of size RecordSize to be used by this thread */ |
---|
| 182 | char* bufP; |
---|
| 183 | |
---|
| 184 | /* Shared memory ID for buffer if using -shm option */ |
---|
| 185 | int shmid; |
---|
| 186 | |
---|
| 187 | /* Index of this entry */ |
---|
| 188 | int threadIndex; |
---|
| 189 | |
---|
| 190 | /* Number of seconds spent by this thread from before the first read or |
---|
| 191 | write until after the last read or write, not counting the file open |
---|
| 192 | or close. */ |
---|
| 193 | double xferInterval; |
---|
| 194 | } ParmArray[MAX_THREADS]; |
---|
| 195 | |
---|
| 196 | /* Maximum size of a batch of prefetch requests given to the |
---|
| 197 | PrefetchedIrregularXfer class at one time */ |
---|
| 198 | #define MAX_PREFETCHES 1024 |
---|
| 199 | |
---|
| 200 | |
---|
| 201 | /* Print usage message only on process 0, then exit */ |
---|
| 202 | void Usage() |
---|
| 203 | { |
---|
| 204 | if (ProcNum == 0) |
---|
| 205 | fprintf(stderr, |
---|
| 206 | "\nUsage: gpfsperf operation pattern fn [options]\n" |
---|
| 207 | " operations:\n" |
---|
| 208 | " read - read from the file fn\n" |
---|
| 209 | " write - write to the file fn\n" |
---|
| 210 | " create - create fn if it does not exist, then do a write test\n" |
---|
| 211 | " uncache - remove cached blocks of fn from buffer pool, then exit\n" |
---|
| 212 | " access patterns\n" |
---|
| 213 | " rand - access random records\n" |
---|
| 214 | " randhint - access random records, but use GPFS multiple access range\n" |
---|
| 215 | " hint\n" |
---|
| 216 | " strided - access records according to a non-overlapped strided\n" |
---|
| 217 | " pattern\n" |
---|
| 218 | " seq - access records sequentially\n" |
---|
| 219 | " fn - name of file to be accessed\n" |
---|
| 220 | " options can be:\n" |
---|
| 221 | " -nolabels - produce a single line of output containing all parameters\n" |
---|
| 222 | " and the measured data rate. Format of the output line is\n" |
---|
| 223 | " 'op pattern fn recordSize nBytes fileSize nProcs nThreads \n" |
---|
| 224 | " strideRecs inv ds dio shm fsync reltoken aio osync rate util'\n" |
---|
| 225 | " (default is to produce multi-line labelled output)\n" |
---|
| 226 | " -r recsize - record size (defaults to filesystem block size)\n" |
---|
| 227 | " -n nBytes - number of bytes to transfer (defaults to file size)\n" |
---|
| 228 | " -s stride - number of bytes between successive accesses by the\n" |
---|
| 229 | " same thread. Only meaningful for strided access patterns.\n" |
---|
| 230 | " Must be a multiple of the record size.\n" |
---|
| 231 | " Default is number of threads times number of processes.\n" |
---|
| 232 | " -th nThreads - number of threads per process (defaults to 1)\n" |
---|
| 233 | " -noinv - do not clear blocks of fn from the GPFS file cache before\n" |
---|
| 234 | " starting the test (default is to clear the cache)\n" |
---|
| 235 | " -ds - use GPFS data shipping (default is not to use data\n" |
---|
| 236 | " shipping)\n" |
---|
| 237 | " -aio depth - use Asynch I/O, prefetching to depth (default 0, max 1000)\n" |
---|
| 238 | " -dio - use direct I/O (default is not to use direct I/O)\n" |
---|
| 239 | " -shm - use shared memory buffer, with pinned large pages if\n" |
---|
| 240 | " possible (default is not to use shared memory buffer)\n" |
---|
| 241 | " -reltoken - release byte range token after open\n" |
---|
| 242 | " -fsync - fsync the file at the conclusion of a write or create test\n" |
---|
| 243 | " to insure that no dirty data remain buffered (default is\n" |
---|
| 244 | " not to fsync the file)\n" |
---|
| 245 | " -nocycle - for the seq access pattern when -n is larger than\n" |
---|
| 246 | " filesize / number-of-threads, read the entire file on each\n" |
---|
| 247 | " node rather than repeatedly reading the same segment of\n" |
---|
| 248 | " the file\n" |
---|
| 249 | " -tTimes - print detailed task times\n" |
---|
| 250 | " -v - verbose tracing\n" |
---|
| 251 | " -V - very verbose tracing\n" |
---|
| 252 | "\n" |
---|
| 253 | " Numbers can be given using K, M, G, or T suffixes, in either case, to\n" |
---|
| 254 | " denote 2**10, 2**20, 2**30, or 2**40, respectively, or can have an R\n" |
---|
| 255 | " suffix to denote a multiple of the record size.\n" |
---|
| 256 | "\n" |
---|
| 257 | " Reported data rate is in units of 1000 bytes/second.\n"); |
---|
| 258 | exit(1); |
---|
| 259 | } |
---|
| 260 | |
---|
| 261 | |
---|
| 262 | /* Convert a string to a number. The string may contain yy* prefixes and |
---|
| 263 | suffixes K, M, G, or T in either case. The suffix r or R is interpreted |
---|
| 264 | as the record size as input by the -r parameter. It is an error to use |
---|
| 265 | an R suffix if the -r parameter has not already appeared. */ |
---|
| 266 | long long GetNum(const char* p) |
---|
| 267 | { |
---|
| 268 | Int64 sign = +1; |
---|
| 269 | Int64 num = 0; |
---|
| 270 | const char* saveP = p; |
---|
| 271 | |
---|
| 272 | if (*p == '-') |
---|
| 273 | { |
---|
| 274 | sign = -1; |
---|
| 275 | p += 1; |
---|
| 276 | } |
---|
| 277 | while (isdigit(*p)) |
---|
| 278 | { |
---|
| 279 | num = 10*num + (*p)-'0'; |
---|
| 280 | p += 1; |
---|
| 281 | } |
---|
| 282 | num *= sign; |
---|
| 283 | |
---|
| 284 | switch (*p) |
---|
| 285 | { |
---|
| 286 | case '\0': |
---|
| 287 | return num; |
---|
| 288 | |
---|
| 289 | case '*': |
---|
| 290 | return num * GetNum(p+1); |
---|
| 291 | |
---|
| 292 | case '-': |
---|
| 293 | return num - GetNum(p+1); |
---|
| 294 | |
---|
| 295 | case '+': |
---|
| 296 | return num + GetNum(p+1); |
---|
| 297 | |
---|
| 298 | case 'k': |
---|
| 299 | case 'K': |
---|
| 300 | return 1024LL * num; |
---|
| 301 | |
---|
| 302 | case 'm': |
---|
| 303 | case 'M': |
---|
| 304 | return 1024LL * 1024LL * num; |
---|
| 305 | |
---|
| 306 | case 'g': |
---|
| 307 | case 'G': |
---|
| 308 | return 1024LL * 1024LL * 1024LL * num; |
---|
| 309 | |
---|
| 310 | case 't': |
---|
| 311 | case 'T': |
---|
| 312 | return 1024LL * 1024LL * 1024LL * 1024LL * num; |
---|
| 313 | |
---|
| 314 | case 'r': |
---|
| 315 | case 'R': |
---|
| 316 | if (RecordSize == -1) |
---|
| 317 | { |
---|
| 318 | fprintf(stderr, "Cannot use suffix R before -r parameter\n"); |
---|
| 319 | exit(1); |
---|
| 320 | } |
---|
| 321 | return (Int64)RecordSize * num; |
---|
| 322 | |
---|
| 323 | default: |
---|
| 324 | fprintf(stderr, "Invalid number %s\n", saveP); |
---|
| 325 | exit(1); |
---|
| 326 | } |
---|
| 327 | return 0; /* eliminate compiler warning */ |
---|
| 328 | } |
---|
| 329 | |
---|
| 330 | |
---|
| 331 | /* Convert a long integer to a string with a size suffix. For example, |
---|
| 332 | 2048 displays as 2K. */ |
---|
| 333 | char* int64ToString(Int64 in, char* bufP) |
---|
| 334 | { |
---|
| 335 | Int64 K = 1024LL; |
---|
| 336 | Int64 M = 1024LL*1024LL; |
---|
| 337 | Int64 G = 1024LL*1024LL*1024LL; |
---|
| 338 | |
---|
| 339 | if ((in % G) == 0) |
---|
| 340 | sprintf(bufP, "%lldG", in/G); |
---|
| 341 | else if ((in % M) == 0) |
---|
| 342 | sprintf(bufP, "%lldM", in/M); |
---|
| 343 | else if ((in % K) == 0) |
---|
| 344 | sprintf(bufP, "%lldK", in/K); |
---|
| 345 | else |
---|
| 346 | sprintf(bufP, "%lld", in); |
---|
| 347 | return bufP; |
---|
| 348 | } |
---|
| 349 | |
---|
| 350 | |
---|
| 351 | /* Verify that a return code is 0. If it is not, exit the program */ |
---|
| 352 | void CheckRC(int rc, const char* reasonP) |
---|
| 353 | { |
---|
| 354 | if (rc == 0) |
---|
| 355 | return; |
---|
| 356 | fprintf(stderr, "RC %d from %s, exitting\n", rc, reasonP); |
---|
| 357 | exit(1); |
---|
| 358 | } |
---|
| 359 | |
---|
| 360 | |
---|
| 361 | /* Compute the difference between two timestamps and return it as a |
---|
| 362 | floating point number of seconds */ |
---|
| 363 | double deltaTime(timebasestruct_t* startP, timebasestruct_t* stopP) |
---|
| 364 | { |
---|
| 365 | double dStartTime, dStopTime; |
---|
| 366 | |
---|
| 367 | #ifdef GPFS_AIX |
---|
| 368 | time_base_to_time(startP, TIMEBASE_SZ); |
---|
| 369 | time_base_to_time(stopP, TIMEBASE_SZ); |
---|
| 370 | #endif |
---|
| 371 | dStartTime = startP->tb_high + .000000001*startP->tb_low; |
---|
| 372 | dStopTime = stopP->tb_high + .000000001*stopP->tb_low; |
---|
| 373 | return dStopTime-dStartTime; |
---|
| 374 | } |
---|
| 375 | |
---|
| 376 | |
---|
| 377 | /* Do a series of random accesses to a file that has already been opened */ |
---|
| 378 | void DoRandTest(int handle, struct ThreadParm * parmP, const char* labelP) |
---|
| 379 | { |
---|
| 380 | timebasestruct_t time; |
---|
| 381 | uint seed; |
---|
| 382 | struct random_data rState; |
---|
| 383 | char randState[256]; |
---|
| 384 | char* randP; |
---|
| 385 | Int64 nRecordsLeft; |
---|
| 386 | RANDRET_T randRet; |
---|
| 387 | Int64 nextRecordNum; |
---|
| 388 | offset_t desiredOffset; |
---|
| 389 | offset_t actualOffset; |
---|
| 390 | offset_t len; |
---|
| 391 | |
---|
| 392 | /* Set the seed of the random number generator so each thread accesses |
---|
| 393 | a different sequence of records */ |
---|
| 394 | GetTOD(&time); |
---|
| 395 | seed = (time.tb_high ^ time.tb_low) + (parmP->threadIndex+1)*getpid(); |
---|
| 396 | memset(&rState, 0, sizeof(rState)); |
---|
| 397 | |
---|
| 398 | INITSTATE_R(seed, randState, sizeof(randState), &randP, &rState); |
---|
| 399 | SETSTATE_R(randState, &randP, &rState); |
---|
| 400 | |
---|
| 401 | /* Read or write random records */ |
---|
| 402 | nRecordsLeft = parmP->nRecords; |
---|
| 403 | while (nRecordsLeft > 0) |
---|
| 404 | { |
---|
| 405 | /* Generate the next record number */ |
---|
| 406 | RANDOM_R(&randRet, &rState); |
---|
| 407 | nextRecordNum = randRet % RecordsInFile; |
---|
| 408 | |
---|
| 409 | /* Seek to the next record */ |
---|
| 410 | desiredOffset = (offset_t) (nextRecordNum * RecordSize); |
---|
| 411 | actualOffset = lseek64(handle, desiredOffset, SEEK_SET); |
---|
| 412 | if (actualOffset != desiredOffset) |
---|
| 413 | { |
---|
| 414 | fprintf(stderr, "Error lseek64'ing to %lld in %s\n", |
---|
| 415 | desiredOffset, FileNameP); |
---|
| 416 | exit(1); |
---|
| 417 | } |
---|
| 418 | |
---|
| 419 | /* Read or write next record */ |
---|
| 420 | if (Verbosity >= 2) |
---|
| 421 | printf(" th %d: %s record %lld\n", |
---|
| 422 | parmP->threadIndex, labelP, nextRecordNum); |
---|
| 423 | if (TestType == ReadTest) |
---|
| 424 | len = read(handle, parmP->bufP, RecordSize); |
---|
| 425 | else |
---|
| 426 | len = write(handle, parmP->bufP, RecordSize); |
---|
| 427 | if (len != RecordSize) |
---|
| 428 | { |
---|
| 429 | fprintf(stderr, "Wrong length in %s: %lld\n", labelP, len); |
---|
| 430 | exit(1); |
---|
| 431 | } |
---|
| 432 | |
---|
| 433 | /* Decrement number of records remaining and loop */ |
---|
| 434 | nRecordsLeft -= 1; |
---|
| 435 | } |
---|
| 436 | |
---|
| 437 | } |
---|
| 438 | |
---|
| 439 | |
---|
| 440 | /* Do a series of random accesses to a file that has already been opened. |
---|
| 441 | Before each group of accesses, issue a GPFS multiple access range hint |
---|
| 442 | so GPFS can prefetch blocks before they are needed. */ |
---|
| 443 | void DoRandTestWithHint(int handle, struct ThreadParm * parmP, const char* labelP) |
---|
| 444 | { |
---|
| 445 | struct pixAccDesc * accP; |
---|
| 446 | struct PrefetchedIrregularXfer x; |
---|
| 447 | timebasestruct_t time; |
---|
| 448 | uint seed; |
---|
| 449 | struct random_data rState; |
---|
| 450 | char randState[256]; |
---|
| 451 | char* randP; |
---|
| 452 | Int64 nRecordsLeft; |
---|
| 453 | int nRecordsInChunk; |
---|
| 454 | int i; |
---|
| 455 | RANDRET_T randRet; |
---|
| 456 | Int64 nextRecordNum; |
---|
| 457 | int rc; |
---|
| 458 | int nBytes; |
---|
| 459 | |
---|
| 460 | /* Allocate space to describe a batch of random accesses */ |
---|
| 461 | accP = (struct pixAccDesc *)malloc(MAX_PREFETCHES*sizeof(struct pixAccDesc)); |
---|
| 462 | if (accP == NULL) |
---|
| 463 | { |
---|
| 464 | fprintf(stderr, "Cannot allocate access descriptors\n"); |
---|
| 465 | exit(1); |
---|
| 466 | } |
---|
| 467 | |
---|
| 468 | /* Initialize the PrefetchedIrregularXfer state */ |
---|
| 469 | pixInit(&x); |
---|
| 470 | |
---|
| 471 | /* Turn on tracing in the PrefetchedIrregularXfer class according to |
---|
| 472 | the -v flag */ |
---|
| 473 | if (Verbosity >= 2) |
---|
| 474 | pixSetTraceLevel(&x, 1); |
---|
| 475 | |
---|
| 476 | /* Set the seed of the random number generator so each thread accesses |
---|
| 477 | a different sequence of records */ |
---|
| 478 | GetTOD(&time); |
---|
| 479 | seed = (time.tb_high ^ time.tb_low) + (parmP->threadIndex+1)*getpid(); |
---|
| 480 | memset(&rState, 0, sizeof(rState)); |
---|
| 481 | |
---|
| 482 | INITSTATE_R(seed, randState, sizeof(randState), &randP, &rState); |
---|
| 483 | SETSTATE_R(randState, &randP, &rState); |
---|
| 484 | |
---|
| 485 | /* Read or write random records */ |
---|
| 486 | nRecordsLeft = parmP->nRecords; |
---|
| 487 | while (nRecordsLeft > 0) |
---|
| 488 | { |
---|
| 489 | /* Generate the next batch of record numbers */ |
---|
| 490 | if (nRecordsLeft >= MAX_PREFETCHES) |
---|
| 491 | nRecordsInChunk = MAX_PREFETCHES; |
---|
| 492 | else |
---|
| 493 | nRecordsInChunk = nRecordsLeft; |
---|
| 494 | for (i=0; i<nRecordsInChunk; i++) |
---|
| 495 | { |
---|
| 496 | RANDOM_R(&randRet, &rState); |
---|
| 497 | nextRecordNum = randRet % RecordsInFile; |
---|
| 498 | accP[i].off = (Int64)nextRecordNum * (Int64)RecordSize; |
---|
| 499 | accP[i].len = RecordSize; |
---|
| 500 | } |
---|
| 501 | |
---|
| 502 | /* Give the list of future accesses to the PrefetchedIrregularXfer object |
---|
| 503 | so it can begin issuing GPFS multiple access range hints to prefetch |
---|
| 504 | the data that will be needed */ |
---|
| 505 | rc = pixDeclareAccesses(&x, handle, TestType!=ReadTest, nRecordsInChunk, accP); |
---|
| 506 | if (rc != 0) |
---|
| 507 | { |
---|
| 508 | fprintf(stderr, "Error %d from declareAccesses\n", rc); |
---|
| 509 | exit(1); |
---|
| 510 | } |
---|
| 511 | |
---|
| 512 | /* Perform all of the reads or writes in the current chunk */ |
---|
| 513 | for (i=0; i<nRecordsInChunk; i++) |
---|
| 514 | { |
---|
| 515 | rc = pixXfer(&x, parmP->bufP, &nBytes); |
---|
| 516 | if (rc != 0) |
---|
| 517 | { |
---|
| 518 | fprintf(stderr, "Error %d from xfer %d\n", rc, i); |
---|
| 519 | exit(1); |
---|
| 520 | } |
---|
| 521 | if (nBytes != RecordSize) |
---|
| 522 | { |
---|
| 523 | fprintf(stderr, "xfer %d moved %d bytes instead of %d\n", |
---|
| 524 | i, nBytes, RecordSize); |
---|
| 525 | exit(1); |
---|
| 526 | } |
---|
| 527 | } |
---|
| 528 | |
---|
| 529 | /* Decrement number of records remaining and loop */ |
---|
| 530 | nRecordsLeft -= nRecordsInChunk; |
---|
| 531 | } |
---|
| 532 | |
---|
| 533 | /* Clean up PrefetchedIrregularXfer object, free any prefetched records, |
---|
| 534 | and free storage for access descriptions */ |
---|
| 535 | pixTerm(&x); |
---|
| 536 | free(accP); |
---|
| 537 | } |
---|
| 538 | |
---|
| 539 | |
---|
| 540 | /* Set a MAR for a file on this node. */ |
---|
| 541 | void FreeAllRanges(int handle) |
---|
| 542 | { |
---|
| 543 | struct |
---|
| 544 | { |
---|
| 545 | gpfsFcntlHeader_t hdr; |
---|
| 546 | gpfsFreeRange_t fr; |
---|
| 547 | } freeHint; |
---|
| 548 | int rc; |
---|
| 549 | |
---|
| 550 | /* Issue the free range hint */ |
---|
| 551 | freeHint.hdr.totalLength = sizeof(freeHint); |
---|
| 552 | freeHint.hdr.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION; |
---|
| 553 | freeHint.hdr.fcntlReserved = 0; |
---|
| 554 | freeHint.fr.structLen = sizeof(gpfsFreeRange_t); |
---|
| 555 | freeHint.fr.structType = GPFS_FREE_RANGE; |
---|
| 556 | freeHint.fr.start = 0; |
---|
| 557 | freeHint.fr.length = 0; |
---|
| 558 | rc = gpfs_fcntl(handle, &freeHint); |
---|
| 559 | if (rc != 0) |
---|
| 560 | { |
---|
| 561 | fprintf(stderr, "gpfs_fcntl free range hint failed for file '%s'. " |
---|
| 562 | "errno=%d errorOffset=%d\n", |
---|
| 563 | FileNameP, errno, freeHint.hdr.errorOffset); |
---|
| 564 | exit(1); |
---|
| 565 | } |
---|
| 566 | } |
---|
| 567 | |
---|
| 568 | |
---|
| 569 | /* Set a MAR for a file on this node. */ |
---|
| 570 | void MARange(int handle, offset_t blockNumber, offset_t startoff, |
---|
| 571 | offset_t blkLen, Boolean isWrite) |
---|
| 572 | { |
---|
| 573 | struct |
---|
| 574 | { |
---|
| 575 | gpfsFcntlHeader_t hdr; |
---|
| 576 | gpfsMultipleAccessRange_t marh; |
---|
| 577 | } accHint; |
---|
| 578 | int rc; |
---|
| 579 | |
---|
| 580 | /* Issue the access range hint */ |
---|
| 581 | accHint.hdr.totalLength = sizeof(accHint); |
---|
| 582 | accHint.hdr.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION; |
---|
| 583 | accHint.hdr.fcntlReserved = 0; |
---|
| 584 | accHint.marh.structLen = sizeof(accHint.marh); |
---|
| 585 | accHint.marh.structType = GPFS_MULTIPLE_ACCESS_RANGE; |
---|
| 586 | accHint.marh.accRangeCnt = 1; |
---|
| 587 | accHint.marh.relRangeCnt = 0; |
---|
| 588 | accHint.marh.accRangeArray[0].blockNumber = blockNumber; |
---|
| 589 | accHint.marh.accRangeArray[0].start = startoff; |
---|
| 590 | accHint.marh.accRangeArray[0].length = blkLen; |
---|
| 591 | accHint.marh.accRangeArray[0].isWrite = isWrite; |
---|
| 592 | rc = gpfs_fcntl(handle, &accHint); |
---|
| 593 | if (rc != 0) |
---|
| 594 | { |
---|
| 595 | fprintf(stderr, "gpfs_fcntl access range hint failed for file '%s'. " |
---|
| 596 | "errno=%d errorOffset=%d\n", |
---|
| 597 | FileNameP, errno, accHint.hdr.errorOffset); |
---|
| 598 | exit(1); |
---|
| 599 | } |
---|
| 600 | } |
---|
| 601 | |
---|
| 602 | /* Do a series of strided accesses to a file that has already been |
---|
| 603 | opened */ |
---|
| 604 | void DoStridedTest(int handle, struct ThreadParm * parmP, const char* labelP) |
---|
| 605 | { |
---|
| 606 | Int64 strideRecords; |
---|
| 607 | Int64 nextRecordNum; |
---|
| 608 | Int64 maxRecordNum; |
---|
| 609 | Int64 nRecordsLeft; |
---|
| 610 | offset_t desiredOffset; |
---|
| 611 | offset_t actualOffset; |
---|
| 612 | offset_t len; |
---|
| 613 | |
---|
| 614 | /* Read or write records according to the strided pattern. If the end of |
---|
| 615 | the file is reached, seek back to the start record. */ |
---|
| 616 | if (Stride != 0) |
---|
| 617 | strideRecords = Stride / RecordSize; |
---|
| 618 | else |
---|
| 619 | strideRecords = NThreadsPerProcess * NProcs; |
---|
| 620 | nextRecordNum = parmP->startRecordNum; |
---|
| 621 | maxRecordNum = parmP->maxRecordNum; |
---|
| 622 | nRecordsLeft = parmP->nRecords; |
---|
| 623 | |
---|
| 624 | while (nRecordsLeft > 0) |
---|
| 625 | { |
---|
| 626 | /* Seek to the next record */ |
---|
| 627 | desiredOffset = (offset_t) (nextRecordNum * RecordSize); |
---|
| 628 | actualOffset = lseek64(handle, desiredOffset, SEEK_SET); |
---|
| 629 | if (actualOffset != desiredOffset) |
---|
| 630 | { |
---|
| 631 | fprintf(stderr, "Error lseek64'ing to %lld in %s\n", |
---|
| 632 | desiredOffset, FileNameP); |
---|
| 633 | exit(1); |
---|
| 634 | } |
---|
| 635 | |
---|
| 636 | /* Read or write next record */ |
---|
| 637 | if (Verbosity >= 2) |
---|
| 638 | printf(" th %d: %s record %lld\n", |
---|
| 639 | parmP->threadIndex, labelP, nextRecordNum); |
---|
| 640 | if (TestType == ReadTest) |
---|
| 641 | len = read(handle, parmP->bufP, RecordSize); |
---|
| 642 | else |
---|
| 643 | len = write(handle, parmP->bufP, RecordSize); |
---|
| 644 | if (len != RecordSize) |
---|
| 645 | { |
---|
| 646 | fprintf(stderr, "Wrong length in %s: %lld\n", labelP, len); |
---|
| 647 | exit(1); |
---|
| 648 | } |
---|
| 649 | |
---|
| 650 | /* If next record would be outside of the area to be processed by this |
---|
| 651 | thread, go back to the beginning of the area */ |
---|
| 652 | nextRecordNum += strideRecords; |
---|
| 653 | if (nextRecordNum > maxRecordNum) |
---|
| 654 | nextRecordNum = parmP->restartRecordNum; |
---|
| 655 | nRecordsLeft -= 1; |
---|
| 656 | } |
---|
| 657 | } |
---|
| 658 | |
---|
| 659 | |
---|
| 660 | /* Do a series of sequential accesses to a file that has already been |
---|
| 661 | opened */ |
---|
| 662 | void DoSeqTest(int handle, struct ThreadParm * parmP, const char* labelP) |
---|
| 663 | { |
---|
| 664 | Int64 nextRecordNum; |
---|
| 665 | offset_t desiredOffset; |
---|
| 666 | offset_t actualOffset; |
---|
| 667 | Int64 maxRecordNum; |
---|
| 668 | Int64 nRecordsLeft; |
---|
| 669 | offset_t len; |
---|
| 670 | |
---|
| 671 | /* Seek to the beginning of the area that this thread should access */ |
---|
| 672 | nextRecordNum = parmP->startRecordNum; |
---|
| 673 | desiredOffset = (offset_t) (nextRecordNum * RecordSize); |
---|
| 674 | actualOffset = lseek64(handle, desiredOffset, SEEK_SET); |
---|
| 675 | if (actualOffset != desiredOffset) |
---|
| 676 | { |
---|
| 677 | fprintf(stderr, "Error lseek64'ing to %lld in %s\n", desiredOffset, FileNameP); |
---|
| 678 | exit(1); |
---|
| 679 | } |
---|
| 680 | |
---|
| 681 | /* Read or write records sequentially. If the end of the area to be |
---|
| 682 | accessed by this thread is reached, seek back to the beginning of the |
---|
| 683 | area. */ |
---|
| 684 | maxRecordNum = parmP->maxRecordNum; |
---|
| 685 | nRecordsLeft = parmP->nRecords; |
---|
| 686 | while (nRecordsLeft > 0) |
---|
| 687 | { |
---|
| 688 | /* Read or write next record */ |
---|
| 689 | if (Verbosity >= 2) |
---|
| 690 | printf(" th %d: %s record %lld\n", |
---|
| 691 | parmP->threadIndex, labelP, nextRecordNum); |
---|
| 692 | if (TestType == ReadTest) |
---|
| 693 | len = read(handle, parmP->bufP, RecordSize); |
---|
| 694 | else |
---|
| 695 | len = write(handle, parmP->bufP, RecordSize); |
---|
| 696 | if (len != RecordSize) |
---|
| 697 | { |
---|
| 698 | fprintf(stderr, "Wrong length in %s: %lld\n", labelP, len); |
---|
| 699 | exit(1); |
---|
| 700 | } |
---|
| 701 | |
---|
| 702 | /* If next record would be outside of the area to be processed by this |
---|
| 703 | thread, seek back to the beginning of the area */ |
---|
| 704 | nextRecordNum += 1; |
---|
| 705 | if (nextRecordNum > maxRecordNum) |
---|
| 706 | { |
---|
| 707 | nextRecordNum = parmP->restartRecordNum; |
---|
| 708 | desiredOffset = (offset_t) (nextRecordNum * RecordSize); |
---|
| 709 | actualOffset = lseek64(handle, desiredOffset, SEEK_SET); |
---|
| 710 | if (actualOffset != desiredOffset) |
---|
| 711 | { |
---|
| 712 | fprintf(stderr, "Error lseek64'ing to %lld in %s\n", |
---|
| 713 | desiredOffset, FileNameP); |
---|
| 714 | exit(1); |
---|
| 715 | } |
---|
| 716 | } |
---|
| 717 | nRecordsLeft -= 1; |
---|
| 718 | } |
---|
| 719 | } |
---|
| 720 | |
---|
| 721 | |
---|
| 722 | /* Do a series of strided accesses to a file that has already been |
---|
| 723 | opened, using AIO interface */ |
---|
| 724 | void DoStridedAIOTest(int handle, struct ThreadParm * parmP, const char* labelP) |
---|
| 725 | { |
---|
| 726 | #ifndef NO_AIO |
---|
| 727 | Int64 strideRecords; |
---|
| 728 | Int64 nextRecordNum; |
---|
| 729 | Int64 maxRecordNum; |
---|
| 730 | Int64 nRecordsLeft; |
---|
| 731 | int len; |
---|
| 732 | int i, rc; |
---|
| 733 | char *curbufP; |
---|
| 734 | int OpIndex, PrefetchIndex; |
---|
| 735 | struct aiocb *accP; |
---|
| 736 | |
---|
| 737 | /* Allocate space to describe a batch of accesses */ |
---|
| 738 | accP = (struct aiocb *)malloc(AIODepth*sizeof(struct aiocb)); |
---|
| 739 | if (accP == NULL) |
---|
| 740 | { |
---|
| 741 | fprintf(stderr, "Cannot allocate AIO descriptors\n"); |
---|
| 742 | exit(1); |
---|
| 743 | } |
---|
| 744 | |
---|
| 745 | /* Set pointers to the buffers to be read into */ |
---|
| 746 | curbufP = parmP->bufP; |
---|
| 747 | for (i = 0; i < AIODepth; i++) |
---|
| 748 | { |
---|
| 749 | accP[i].aio_buf = curbufP; |
---|
| 750 | accP[i].aio_nbytes = RecordSize; |
---|
| 751 | /*accP[i].aio_offset = 0; set later */ |
---|
| 752 | curbufP += RecordSize; |
---|
| 753 | #ifdef GPFS_AIX |
---|
| 754 | accP[i].aio_whence = SEEK_SET; |
---|
| 755 | accP[i].aio_flag = 0; /* not AIO_SIGNAL */ |
---|
| 756 | accP[i].aio_handle = 0; |
---|
| 757 | #else |
---|
| 758 | accP[i].aio_fildes = handle; |
---|
| 759 | accP[i].aio_lio_opcode = 0; |
---|
| 760 | accP[i].aio_reqprio = 0; |
---|
| 761 | accP[i].aio_sigevent.sigev_notify = SIGEV_NONE; |
---|
| 762 | #endif |
---|
| 763 | } |
---|
| 764 | |
---|
| 765 | /* Read or write records according to the strided pattern. If the end of |
---|
| 766 | the file is reached, seek back to the start record. */ |
---|
| 767 | if (Stride != 0) |
---|
| 768 | strideRecords = Stride / RecordSize; |
---|
| 769 | else |
---|
| 770 | strideRecords = NThreadsPerProcess * NProcs; |
---|
| 771 | nextRecordNum = parmP->startRecordNum; |
---|
| 772 | |
---|
| 773 | maxRecordNum = parmP->maxRecordNum; |
---|
| 774 | nRecordsLeft = parmP->nRecords; |
---|
| 775 | OpIndex = 0; |
---|
| 776 | PrefetchIndex = 0; |
---|
| 777 | while (nRecordsLeft > 0) |
---|
| 778 | { |
---|
| 779 | if (PrefetchIndex >= AIODepth) |
---|
| 780 | { |
---|
| 781 | CONST struct aiocb *accPP[] = { &accP[OpIndex] }; |
---|
| 782 | /* All prefetches done, wait for next buffer before starting next |
---|
| 783 | prefetch */ |
---|
| 784 | do |
---|
| 785 | { |
---|
| 786 | rc = AIO_SUSPEND(1, accPP); |
---|
| 787 | if (rc < 0 && errno != EINTR) |
---|
| 788 | { |
---|
| 789 | fprintf(stderr, "Error %d from aio_suspend %d\n", errno, OpIndex); |
---|
| 790 | exit(1); |
---|
| 791 | } |
---|
| 792 | } while (rc != 0); |
---|
| 793 | len = AIO_RETURN(accP[OpIndex]); |
---|
| 794 | if (len < 0) |
---|
| 795 | { |
---|
| 796 | fprintf(stderr, "Error %d from AIO %d\n", |
---|
| 797 | AIO_ERROR(accP[OpIndex]), OpIndex); |
---|
| 798 | exit(1); |
---|
| 799 | } |
---|
| 800 | if (len != RecordSize) |
---|
| 801 | { |
---|
| 802 | fprintf(stderr, "AIO %d moved %d bytes instead of %d\n", |
---|
| 803 | OpIndex, len, RecordSize); |
---|
| 804 | exit(1); |
---|
| 805 | } |
---|
| 806 | PrefetchIndex = AIODepth; /* don't overflow PrefetchIndex */ |
---|
| 807 | } |
---|
| 808 | |
---|
| 809 | /* Read or write next record */ |
---|
| 810 | accP[OpIndex].aio_offset = (offset_t)nextRecordNum * (offset_t)RecordSize; |
---|
| 811 | if (Verbosity >= 2) |
---|
| 812 | printf(" th %d: %s record %lld\n", |
---|
| 813 | parmP->threadIndex, labelP, nextRecordNum); |
---|
| 814 | /* Start the next read or write */ |
---|
| 815 | do |
---|
| 816 | { |
---|
| 817 | if (TestType == ReadTest) |
---|
| 818 | rc = AIO_READ(handle, &accP[OpIndex]); |
---|
| 819 | else |
---|
| 820 | rc = AIO_WRITE(handle, &accP[OpIndex]); |
---|
| 821 | if (rc < 0) |
---|
| 822 | { |
---|
| 823 | if (errno != EAGAIN) |
---|
| 824 | { |
---|
| 825 | fprintf(stderr, "Error %d from aio_read/write %d\n", errno, OpIndex); |
---|
| 826 | exit(1); |
---|
| 827 | } |
---|
| 828 | } |
---|
| 829 | } while (rc != 0); |
---|
| 830 | |
---|
| 831 | /* If next record would be outside of the area to be processed by this |
---|
| 832 | thread, go back to the beginning of the area */ |
---|
| 833 | nextRecordNum += strideRecords; |
---|
| 834 | if (nextRecordNum > maxRecordNum) |
---|
| 835 | nextRecordNum = parmP->restartRecordNum; |
---|
| 836 | nRecordsLeft -= 1; |
---|
| 837 | OpIndex++; |
---|
| 838 | if (OpIndex >= AIODepth) OpIndex = 0; |
---|
| 839 | PrefetchIndex++; |
---|
| 840 | } |
---|
| 841 | #endif /* ! NO_AIO */ |
---|
| 842 | } |
---|
| 843 | |
---|
| 844 | |
---|
| 845 | /* Do a series of sequential accesses to a file that has already been |
---|
| 846 | opened, using AIO interface */ |
---|
| 847 | void DoSeqAIOTest(int handle, struct ThreadParm * parmP, const char* labelP) |
---|
| 848 | { |
---|
| 849 | #ifndef NO_AIO |
---|
| 850 | Int64 nextRecordNum; |
---|
| 851 | Int64 maxRecordNum; |
---|
| 852 | Int64 nRecordsLeft; |
---|
| 853 | int len; |
---|
| 854 | int i, rc; |
---|
| 855 | char *curbufP; |
---|
| 856 | int OpIndex, PrefetchIndex; |
---|
| 857 | struct aiocb *accP; |
---|
| 858 | |
---|
| 859 | /* Allocate space to describe a batch of accesses */ |
---|
| 860 | accP = (struct aiocb *)malloc(AIODepth*sizeof(struct aiocb)); |
---|
| 861 | if (accP == NULL) |
---|
| 862 | { |
---|
| 863 | fprintf(stderr, "Cannot allocate AIO descriptors\n"); |
---|
| 864 | exit(1); |
---|
| 865 | } |
---|
| 866 | |
---|
| 867 | /* Set pointers to the buffers to be read into */ |
---|
| 868 | curbufP = parmP->bufP; |
---|
| 869 | for (i = 0; i < AIODepth; i++) |
---|
| 870 | { |
---|
| 871 | accP[i].aio_buf = curbufP; |
---|
| 872 | accP[i].aio_nbytes = RecordSize; |
---|
| 873 | /*accP[i].aio_offset = 0; set later */ |
---|
| 874 | curbufP += RecordSize; |
---|
| 875 | #ifdef GPFS_AIX |
---|
| 876 | accP[i].aio_whence = SEEK_SET; |
---|
| 877 | accP[i].aio_flag = 0; /* not AIO_SIGNAL */ |
---|
| 878 | accP[i].aio_handle = 0; |
---|
| 879 | #else |
---|
| 880 | accP[i].aio_fildes = handle; |
---|
| 881 | accP[i].aio_lio_opcode = 0; |
---|
| 882 | accP[i].aio_reqprio = 0; |
---|
| 883 | accP[i].aio_sigevent.sigev_notify = SIGEV_NONE; |
---|
| 884 | #endif |
---|
| 885 | } |
---|
| 886 | |
---|
| 887 | /* Seek to the beginning of the area that this thread should access */ |
---|
| 888 | nextRecordNum = parmP->startRecordNum; |
---|
| 889 | |
---|
| 890 | /* Read or write records sequentially. If the end of the area to be |
---|
| 891 | accessed by this thread is reached, seek back to the beginning of the |
---|
| 892 | area. */ |
---|
| 893 | maxRecordNum = parmP->maxRecordNum; |
---|
| 894 | nRecordsLeft = parmP->nRecords; |
---|
| 895 | OpIndex = 0; |
---|
| 896 | PrefetchIndex = 0; |
---|
| 897 | while (nRecordsLeft > 0) |
---|
| 898 | { |
---|
| 899 | if (PrefetchIndex >= AIODepth) |
---|
| 900 | { |
---|
| 901 | CONST struct aiocb *accPP[] = { &accP[OpIndex] }; |
---|
| 902 | /* All prefetches done, wait for next buffer before starting next |
---|
| 903 | prefetch */ |
---|
| 904 | do |
---|
| 905 | { |
---|
| 906 | rc = AIO_SUSPEND(1, accPP); |
---|
| 907 | if (rc < 0 && errno != EINTR) |
---|
| 908 | { |
---|
| 909 | fprintf(stderr, "Error %d from aio_suspend %d\n", errno, OpIndex); |
---|
| 910 | exit(1); |
---|
| 911 | } |
---|
| 912 | } while (rc != 0); |
---|
| 913 | len = AIO_RETURN(accP[OpIndex]); |
---|
| 914 | if (len < 0) |
---|
| 915 | { |
---|
| 916 | fprintf(stderr, "Error %d from AIO %d\n", |
---|
| 917 | AIO_ERROR(accP[OpIndex]), OpIndex); |
---|
| 918 | exit(1); |
---|
| 919 | } |
---|
| 920 | if (len != RecordSize) |
---|
| 921 | { |
---|
| 922 | fprintf(stderr, "AIO %d moved %d bytes instead of %d\n", |
---|
| 923 | OpIndex, len, RecordSize); |
---|
| 924 | exit(1); |
---|
| 925 | } |
---|
| 926 | PrefetchIndex = AIODepth; /* don't overflow PrefetchIndex */ |
---|
| 927 | } |
---|
| 928 | |
---|
| 929 | /* Read or write next record */ |
---|
| 930 | accP[OpIndex].aio_offset = (offset_t)nextRecordNum * (offset_t)RecordSize; |
---|
| 931 | if (Verbosity >= 2) |
---|
| 932 | printf(" th %d: %s record %lld\n", |
---|
| 933 | parmP->threadIndex, labelP, nextRecordNum); |
---|
| 934 | /* Start the next read or write */ |
---|
| 935 | do |
---|
| 936 | { |
---|
| 937 | if (TestType == ReadTest) |
---|
| 938 | rc = AIO_READ(handle, &accP[OpIndex]); |
---|
| 939 | else |
---|
| 940 | rc = AIO_WRITE(handle, &accP[OpIndex]); |
---|
| 941 | if (rc < 0) |
---|
| 942 | { |
---|
| 943 | if (errno != EAGAIN) |
---|
| 944 | { |
---|
| 945 | fprintf(stderr, "Error %d from aio_read/write %d\n", errno, OpIndex); |
---|
| 946 | exit(1); |
---|
| 947 | } |
---|
| 948 | } |
---|
| 949 | } while (rc != 0); |
---|
| 950 | |
---|
| 951 | /* If next record would be outside of the area to be processed by this |
---|
| 952 | thread, seek back to the beginning of the area */ |
---|
| 953 | nextRecordNum += 1; |
---|
| 954 | if (nextRecordNum > maxRecordNum) |
---|
| 955 | nextRecordNum = parmP->restartRecordNum; |
---|
| 956 | nRecordsLeft -= 1; |
---|
| 957 | OpIndex++; |
---|
| 958 | if (OpIndex >= AIODepth) OpIndex = 0; |
---|
| 959 | PrefetchIndex++; |
---|
| 960 | } |
---|
| 961 | #endif /* ! NO_AIO */ |
---|
| 962 | } |
---|
| 963 | |
---|
| 964 | |
---|
| 965 | /* Do a series of random accesses to a file that has already been opened. |
---|
| 966 | Issue a AIO calls to prefetch blocks before they are needed. */ |
---|
| 967 | void DoRandAIOTest(int handle, struct ThreadParm * parmP, const char* labelP) |
---|
| 968 | { |
---|
| 969 | #ifndef NO_AIO |
---|
| 970 | timebasestruct_t time; |
---|
| 971 | uint seed; |
---|
| 972 | struct random_data rState; |
---|
| 973 | char randState[256]; |
---|
| 974 | char* randP; |
---|
| 975 | Int64 nRecordsLeft; |
---|
| 976 | RANDRET_T randRet; |
---|
| 977 | Int64 nextRecordNum; |
---|
| 978 | int i, rc; |
---|
| 979 | int len; |
---|
| 980 | char *curbufP; |
---|
| 981 | int OpIndex, PrefetchIndex; |
---|
| 982 | struct aiocb *accP; |
---|
| 983 | |
---|
| 984 | /* Allocate space to describe a batch of random accesses */ |
---|
| 985 | accP = (struct aiocb *)malloc(AIODepth*sizeof(struct aiocb)); |
---|
| 986 | if (accP == NULL) |
---|
| 987 | { |
---|
| 988 | fprintf(stderr, "Cannot allocate AIO descriptors\n"); |
---|
| 989 | exit(1); |
---|
| 990 | } |
---|
| 991 | |
---|
| 992 | /* Set pointers to the buffers to be read into */ |
---|
| 993 | curbufP = parmP->bufP; |
---|
| 994 | for (i = 0; i < AIODepth; i++) |
---|
| 995 | { |
---|
| 996 | accP[i].aio_buf = curbufP; |
---|
| 997 | accP[i].aio_nbytes = RecordSize; |
---|
| 998 | /*accP[i].aio_offset = 0; set later */ |
---|
| 999 | curbufP += RecordSize; |
---|
| 1000 | #ifdef GPFS_AIX |
---|
| 1001 | accP[i].aio_whence = SEEK_SET; |
---|
| 1002 | accP[i].aio_flag = 0; /* not AIO_SIGNAL */ |
---|
| 1003 | accP[i].aio_handle = 0; |
---|
| 1004 | #else |
---|
| 1005 | accP[i].aio_fildes = handle; |
---|
| 1006 | accP[i].aio_lio_opcode = 0; |
---|
| 1007 | accP[i].aio_reqprio = 0; |
---|
| 1008 | accP[i].aio_sigevent.sigev_notify = SIGEV_NONE; |
---|
| 1009 | #endif |
---|
| 1010 | } |
---|
| 1011 | |
---|
| 1012 | /* Set the seed of the random number generator so each thread accesses |
---|
| 1013 | a different sequence of records */ |
---|
| 1014 | GetTOD(&time); |
---|
| 1015 | seed = (time.tb_high ^ time.tb_low) + (parmP->threadIndex+1)*getpid(); |
---|
| 1016 | memset(&rState, 0, sizeof(rState)); |
---|
| 1017 | |
---|
| 1018 | INITSTATE_R(seed, randState, sizeof(randState), &randP, &rState); |
---|
| 1019 | SETSTATE_R(randState, &randP, &rState); |
---|
| 1020 | |
---|
| 1021 | /* Read or write random records */ |
---|
| 1022 | OpIndex = 0; |
---|
| 1023 | PrefetchIndex = 0; |
---|
| 1024 | nRecordsLeft = parmP->nRecords; |
---|
| 1025 | while (nRecordsLeft > 0) |
---|
| 1026 | { |
---|
| 1027 | if (PrefetchIndex >= AIODepth) |
---|
| 1028 | { |
---|
| 1029 | CONST struct aiocb *accPP[] = { &accP[OpIndex] }; |
---|
| 1030 | /* All prefetches done, wait for next buffer before starting next |
---|
| 1031 | prefetch */ |
---|
| 1032 | do |
---|
| 1033 | { |
---|
| 1034 | rc = AIO_SUSPEND(1, accPP); |
---|
| 1035 | if (rc < 0 && errno != EINTR) |
---|
| 1036 | { |
---|
| 1037 | fprintf(stderr, "Error %d from aio_suspend %d\n", errno, OpIndex); |
---|
| 1038 | exit(1); |
---|
| 1039 | } |
---|
| 1040 | } while (rc != 0); |
---|
| 1041 | len = AIO_RETURN(accP[OpIndex]); |
---|
| 1042 | if (len < 0) |
---|
| 1043 | { |
---|
| 1044 | fprintf(stderr, "Error %d from AIO %d\n", |
---|
| 1045 | AIO_ERROR(accP[OpIndex]), OpIndex); |
---|
| 1046 | exit(1); |
---|
| 1047 | } |
---|
| 1048 | if (len != RecordSize) |
---|
| 1049 | { |
---|
| 1050 | fprintf(stderr, "AIO %d moved %d bytes instead of %d\n", |
---|
| 1051 | OpIndex, len, RecordSize); |
---|
| 1052 | exit(1); |
---|
| 1053 | } |
---|
| 1054 | PrefetchIndex = AIODepth; /* don't overflow PrefetchIndex */ |
---|
| 1055 | } |
---|
| 1056 | |
---|
| 1057 | /* Generate the next record number */ |
---|
| 1058 | RANDOM_R(&randRet, &rState); |
---|
| 1059 | nextRecordNum = randRet % RecordsInFile; |
---|
| 1060 | accP[OpIndex].aio_offset = (Int64)nextRecordNum * (Int64)RecordSize; |
---|
| 1061 | |
---|
| 1062 | /* Start the next read or write */ |
---|
| 1063 | do |
---|
| 1064 | { |
---|
| 1065 | if (TestType == ReadTest) |
---|
| 1066 | rc = AIO_READ(handle, &accP[OpIndex]); |
---|
| 1067 | else |
---|
| 1068 | rc = AIO_WRITE(handle, &accP[OpIndex]); |
---|
| 1069 | if (rc < 0) |
---|
| 1070 | { |
---|
| 1071 | if (errno != EAGAIN) |
---|
| 1072 | { |
---|
| 1073 | fprintf(stderr, "Error %d from aio_read/write %d\n", errno, OpIndex); |
---|
| 1074 | exit(1); |
---|
| 1075 | } |
---|
| 1076 | } |
---|
| 1077 | } while (rc != 0); |
---|
| 1078 | |
---|
| 1079 | /* Decrement number of records remaining and loop */ |
---|
| 1080 | nRecordsLeft--; |
---|
| 1081 | OpIndex++; |
---|
| 1082 | if (OpIndex >= AIODepth) OpIndex = 0; |
---|
| 1083 | PrefetchIndex++; |
---|
| 1084 | } |
---|
| 1085 | |
---|
| 1086 | /* Clean up aiocbs */ |
---|
| 1087 | free(accP); |
---|
| 1088 | #endif /* ! NO_AIO */ |
---|
| 1089 | } |
---|
| 1090 | |
---|
| 1091 | |
---|
| 1092 | /* Start up data shipping. The second parameter is the total number of |
---|
| 1093 | open instances on all nodes that will be operating on the file. Must be |
---|
| 1094 | called for every such instance with the same value of nInsts. */ |
---|
| 1095 | void StartDataShipping(int handle, int nInsts) |
---|
| 1096 | { |
---|
| 1097 | struct |
---|
| 1098 | { |
---|
| 1099 | gpfsFcntlHeader_t hdr; |
---|
| 1100 | gpfsDataShipStart_t start; |
---|
| 1101 | } dsStart; |
---|
| 1102 | int rc; |
---|
| 1103 | |
---|
| 1104 | dsStart.hdr.totalLength = sizeof(dsStart); |
---|
| 1105 | dsStart.hdr.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION; |
---|
| 1106 | dsStart.hdr.fcntlReserved = 0; |
---|
| 1107 | dsStart.start.structLen = sizeof(gpfsDataShipStart_t); |
---|
| 1108 | dsStart.start.structType = GPFS_DATA_SHIP_START; |
---|
| 1109 | dsStart.start.numInstances = nInsts; |
---|
| 1110 | dsStart.start.reserved = 0; |
---|
| 1111 | rc = gpfs_fcntl(handle, &dsStart); |
---|
| 1112 | if (rc != 0) |
---|
| 1113 | { |
---|
| 1114 | fprintf(stderr, "gpfs_fcntl DS start directive failed. " |
---|
| 1115 | "errno=%d errorOffset=%d\n", errno, dsStart.hdr.errorOffset); |
---|
| 1116 | exit(1); |
---|
| 1117 | } |
---|
| 1118 | } |
---|
| 1119 | |
---|
| 1120 | |
---|
| 1121 | /* Shut down data shipping. Must be called for every handle for which |
---|
| 1122 | StartDataShipping was called. */ |
---|
| 1123 | void StopDataShipping(int handle) |
---|
| 1124 | { |
---|
| 1125 | struct |
---|
| 1126 | { |
---|
| 1127 | gpfsFcntlHeader_t hdr; |
---|
| 1128 | gpfsDataShipStop_t stop; |
---|
| 1129 | } dsStop; |
---|
| 1130 | int rc; |
---|
| 1131 | |
---|
| 1132 | dsStop.hdr.totalLength = sizeof(dsStop); |
---|
| 1133 | dsStop.hdr.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION; |
---|
| 1134 | dsStop.hdr.fcntlReserved = 0; |
---|
| 1135 | dsStop.stop.structLen = sizeof(dsStop.stop); |
---|
| 1136 | dsStop.stop.structType = GPFS_DATA_SHIP_STOP; |
---|
| 1137 | rc = gpfs_fcntl(handle, &dsStop); |
---|
| 1138 | if (rc != 0) |
---|
| 1139 | printf("gpfs_fcntl DS stop directive failed. errno=%d errorOffset=%d\n", |
---|
| 1140 | errno, dsStop.hdr.errorOffset); |
---|
| 1141 | } |
---|
| 1142 | |
---|
| 1143 | |
---|
| 1144 | /* Body of one worker thread. Parameter is the local thread index. */ |
---|
| 1145 | void* TestThreadBody(void* parm) |
---|
| 1146 | { |
---|
| 1147 | long th = (long)parm; |
---|
| 1148 | struct ThreadParm * parmP= &ParmArray[th]; |
---|
| 1149 | int openFlags; |
---|
| 1150 | timebasestruct_t afterOpenTime, beforeCloseTime; |
---|
| 1151 | int handle; |
---|
| 1152 | char* labelP; |
---|
| 1153 | int rc; |
---|
| 1154 | |
---|
| 1155 | /* Dump thread info if verbose tracing enabled */ |
---|
| 1156 | if (Verbosity >= 1) |
---|
| 1157 | printf("%d: th %d starting startRecordNum %lld restartRecordNum %lld maxRecordNum %lld nRecords %lld\n", |
---|
| 1158 | ProcNum, parmP->threadIndex, parmP->startRecordNum, |
---|
| 1159 | parmP->restartRecordNum, parmP->maxRecordNum, parmP->nRecords); |
---|
| 1160 | |
---|
| 1161 | /* Open the file with the appropriate flags */ |
---|
| 1162 | switch (TestType) |
---|
| 1163 | { |
---|
| 1164 | case CreateTest: |
---|
| 1165 | if (ProcNum == 0) |
---|
| 1166 | openFlags = O_CREAT | O_WRONLY; |
---|
| 1167 | else |
---|
| 1168 | openFlags = O_WRONLY; |
---|
| 1169 | break; |
---|
| 1170 | |
---|
| 1171 | case WriteTest: |
---|
| 1172 | openFlags = O_WRONLY; |
---|
| 1173 | break; |
---|
| 1174 | |
---|
| 1175 | case ReadTest: |
---|
| 1176 | openFlags = O_RDONLY; |
---|
| 1177 | break; |
---|
| 1178 | |
---|
| 1179 | default: |
---|
| 1180 | openFlags = 0; |
---|
| 1181 | break; |
---|
| 1182 | } |
---|
| 1183 | if (UseDirectIO) |
---|
| 1184 | openFlags |= O_DIRECT; |
---|
| 1185 | if (UseOsync) |
---|
| 1186 | openFlags |= O_SYNC; |
---|
| 1187 | #ifdef MULTI_NODE |
---|
| 1188 | if (TestType == CreateTest) |
---|
| 1189 | { |
---|
| 1190 | /* Create file on process 0 node first, then open on the rest */ |
---|
| 1191 | if (ProcNum == 0) |
---|
| 1192 | { |
---|
| 1193 | handle = open(FileNameP, openFlags, 0666); |
---|
| 1194 | if (DoFreeAll) |
---|
| 1195 | FreeAllRanges(handle); |
---|
| 1196 | } |
---|
| 1197 | rc = MPI_Barrier(MPI_COMM_WORLD); |
---|
| 1198 | CheckRC(rc, "start MPI_Barrier"); |
---|
| 1199 | if (ProcNum != 0) |
---|
| 1200 | handle = open(FileNameP, openFlags, 0666); |
---|
| 1201 | } |
---|
| 1202 | else |
---|
| 1203 | handle = open(FileNameP, openFlags, 0666); |
---|
| 1204 | #else |
---|
| 1205 | handle = open(FileNameP, openFlags, 0666); |
---|
| 1206 | #endif |
---|
| 1207 | if (handle == -1) |
---|
| 1208 | { |
---|
| 1209 | fprintf(stderr, "Could not open file %s, errno=%d\n", |
---|
| 1210 | FileNameP, errno); |
---|
| 1211 | exit(1); |
---|
| 1212 | } |
---|
| 1213 | |
---|
| 1214 | /* Set up data shipping if necessary */ |
---|
| 1215 | if (UseDataShipping) |
---|
| 1216 | StartDataShipping(handle, NThreadsPerProcess * NProcs); |
---|
| 1217 | |
---|
| 1218 | /* Capture start timestamp for transfers by this thread */ |
---|
| 1219 | GetTOD(&afterOpenTime); |
---|
| 1220 | |
---|
| 1221 | /* Read or write the requested number of records according to the correct |
---|
| 1222 | access pattern */ |
---|
| 1223 | if (TestType == ReadTest) |
---|
| 1224 | labelP = "read"; |
---|
| 1225 | else |
---|
| 1226 | labelP = "write"; |
---|
| 1227 | switch (TestPattern) |
---|
| 1228 | { |
---|
| 1229 | case RandPattern: |
---|
| 1230 | if (UseAsyncIO) |
---|
| 1231 | DoRandAIOTest(handle, parmP, labelP); |
---|
| 1232 | else |
---|
| 1233 | DoRandTest(handle, parmP, labelP); |
---|
| 1234 | break; |
---|
| 1235 | |
---|
| 1236 | case RandPatternWithHint: |
---|
| 1237 | DoRandTestWithHint(handle, parmP, labelP); |
---|
| 1238 | break; |
---|
| 1239 | |
---|
| 1240 | case StridedPattern: |
---|
| 1241 | if (UseAsyncIO) |
---|
| 1242 | DoStridedAIOTest(handle, parmP, labelP); |
---|
| 1243 | else |
---|
| 1244 | DoStridedTest(handle, parmP, labelP); |
---|
| 1245 | break; |
---|
| 1246 | |
---|
| 1247 | case SeqPattern: |
---|
| 1248 | if (UseAsyncIO) |
---|
| 1249 | DoSeqAIOTest(handle, parmP, labelP); |
---|
| 1250 | else |
---|
| 1251 | DoSeqTest(handle, parmP, labelP); |
---|
| 1252 | break; |
---|
| 1253 | |
---|
| 1254 | default: |
---|
| 1255 | break; |
---|
| 1256 | } |
---|
| 1257 | |
---|
| 1258 | /* Timestamp end of transfers and remember how long the thread ran */ |
---|
| 1259 | GetTOD(&beforeCloseTime); |
---|
| 1260 | parmP->xferInterval = deltaTime(&afterOpenTime, &beforeCloseTime); |
---|
| 1261 | |
---|
| 1262 | /* Shut down data shipping if it was in effect */ |
---|
| 1263 | if (UseDataShipping) |
---|
| 1264 | StopDataShipping(handle); |
---|
| 1265 | |
---|
| 1266 | /* Close the file and print progress indicator */ |
---|
| 1267 | rc = close(handle); |
---|
| 1268 | CheckRC(rc, "closing file"); |
---|
| 1269 | if (Verbosity >= 1) |
---|
| 1270 | printf("%d: th %d finished, did all transfers in %.2f seconds\n", |
---|
| 1271 | ProcNum, parmP->threadIndex, parmP->xferInterval); |
---|
| 1272 | pthread_exit(NULL); |
---|
| 1273 | } |
---|
| 1274 | |
---|
| 1275 | |
---|
| 1276 | /* Invalidate all cached data held on behalf of a file on this node. */ |
---|
| 1277 | void InvalidateFileCache(char* invFileNameP) |
---|
| 1278 | { |
---|
| 1279 | int handle; |
---|
| 1280 | struct |
---|
| 1281 | { |
---|
| 1282 | gpfsFcntlHeader_t hdr; |
---|
| 1283 | gpfsClearFileCache_t inv; |
---|
| 1284 | } invCacheHint; |
---|
| 1285 | int rc; |
---|
| 1286 | |
---|
| 1287 | /* Open the file. If the open fails, the file cannot be cached. */ |
---|
| 1288 | handle = open(invFileNameP, O_RDONLY, 0); |
---|
| 1289 | if (handle == -1) |
---|
| 1290 | return; |
---|
| 1291 | |
---|
| 1292 | /* Issue the invalidate hint */ |
---|
| 1293 | invCacheHint.hdr.totalLength = sizeof(invCacheHint); |
---|
| 1294 | invCacheHint.hdr.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION; |
---|
| 1295 | invCacheHint.hdr.fcntlReserved = 0; |
---|
| 1296 | invCacheHint.inv.structLen = sizeof(gpfsClearFileCache_t); |
---|
| 1297 | invCacheHint.inv.structType = GPFS_CLEAR_FILE_CACHE; |
---|
| 1298 | rc = gpfs_fcntl(handle, &invCacheHint); |
---|
| 1299 | if (rc != 0) |
---|
| 1300 | { |
---|
| 1301 | fprintf(stderr, "gpfs_fcntl clear cache hint failed for file '%s'. " |
---|
| 1302 | "errno=%d errorOffset=%d\n", |
---|
| 1303 | invFileNameP, errno, invCacheHint.hdr.errorOffset); |
---|
| 1304 | exit(1); |
---|
| 1305 | } |
---|
| 1306 | |
---|
| 1307 | /* Close the file */ |
---|
| 1308 | rc = close(handle); |
---|
| 1309 | if (rc == -1) |
---|
| 1310 | { |
---|
| 1311 | fprintf(stderr, "Could not close file '%s' after flushing file cache, errno=%d\n", |
---|
| 1312 | invFileNameP, errno); |
---|
| 1313 | exit(1); |
---|
| 1314 | } |
---|
| 1315 | } |
---|
| 1316 | |
---|
| 1317 | |
---|
| 1318 | /* This routine is called at exit to clean up any shared memory segments |
---|
| 1319 | that might have been created if the UseSharedMem option was selected. */ |
---|
| 1320 | static void cleanupSegs() |
---|
| 1321 | { |
---|
| 1322 | int th; |
---|
| 1323 | for (th=0; th<NThreadsPerProcess; th++) |
---|
| 1324 | if (ParmArray[th].shmid >= 0) |
---|
| 1325 | { |
---|
| 1326 | shmdt(ParmArray[th].origbufP); |
---|
| 1327 | shmctl(ParmArray[th].shmid, IPC_RMID, NULL); |
---|
| 1328 | } |
---|
| 1329 | } |
---|
| 1330 | |
---|
| 1331 | |
---|
| 1332 | /* Main routine */ |
---|
| 1333 | int main(int argc, char *argvPP[]) |
---|
| 1334 | { |
---|
| 1335 | int i, j, rc, len; |
---|
| 1336 | struct stat statBuf; |
---|
| 1337 | int handle; |
---|
| 1338 | long th; |
---|
| 1339 | Int64 baseRecordsPerThreadToXfer; |
---|
| 1340 | Int64 threadsWithExtraRecordToXfer; |
---|
| 1341 | Int64 baseRecordsPerThreadInFile; |
---|
| 1342 | Int64 threadsWithExtraRecordInFile; |
---|
| 1343 | int gblThreadIndex; |
---|
| 1344 | char* p; |
---|
| 1345 | char buf1[32], buf2[32], buf3[32]; |
---|
| 1346 | pthread_t threadIds[MAX_THREADS]; |
---|
| 1347 | pthread_attr_t threadAttr; |
---|
| 1348 | timebasestruct_t startTime, endTime; |
---|
| 1349 | double totalSeconds; |
---|
| 1350 | double threadTimes[MAX_THREADS]; |
---|
| 1351 | double* allXferTimesP = NULL; |
---|
| 1352 | double totalXferTimes; |
---|
| 1353 | double threadUtil; |
---|
| 1354 | double dataRate; |
---|
| 1355 | double* dP; |
---|
| 1356 | |
---|
| 1357 | /* Initialize MPI communication */ |
---|
| 1358 | #ifdef MULTI_NODE |
---|
| 1359 | MPI_Init(&argc, &argvPP); |
---|
| 1360 | MPI_Comm_rank(MPI_COMM_WORLD, &ProcNum); |
---|
| 1361 | MPI_Comm_size(MPI_COMM_WORLD, &NProcs); |
---|
| 1362 | #else |
---|
| 1363 | ProcNum = 0; |
---|
| 1364 | NProcs = 1; |
---|
| 1365 | #endif |
---|
| 1366 | |
---|
| 1367 | /* Parse arguments */ |
---|
| 1368 | for (i=1; i<argc; i++) |
---|
| 1369 | { |
---|
| 1370 | if (strcmp(argvPP[i], "-r") == 0 && i+1 < argc) |
---|
| 1371 | { |
---|
| 1372 | i += 1; |
---|
| 1373 | RecordSize = GetNum(argvPP[i]); |
---|
| 1374 | } |
---|
| 1375 | else if (strcmp(argvPP[i], "-n") == 0 && i+1<argc) |
---|
| 1376 | { |
---|
| 1377 | i += 1; |
---|
| 1378 | BytesToTransfer = GetNum(argvPP[i]); |
---|
| 1379 | } |
---|
| 1380 | else if (strcmp(argvPP[i], "-s") == 0 && i+1<argc) |
---|
| 1381 | { |
---|
| 1382 | i += 1; |
---|
| 1383 | Stride = GetNum(argvPP[i]); |
---|
| 1384 | } |
---|
| 1385 | else if (strcmp(argvPP[i], "-th") == 0 && i+1<argc) |
---|
| 1386 | { |
---|
| 1387 | i += 1; |
---|
| 1388 | NThreadsPerProcess = GetNum(argvPP[i]); |
---|
| 1389 | } |
---|
| 1390 | else if (strcmp(argvPP[i], "create") == 0) |
---|
| 1391 | TestType = CreateTest; |
---|
| 1392 | else if (strcmp(argvPP[i], "write") == 0) |
---|
| 1393 | TestType = WriteTest; |
---|
| 1394 | else if (strcmp(argvPP[i], "read") == 0) |
---|
| 1395 | TestType = ReadTest; |
---|
| 1396 | else if (strcmp(argvPP[i], "uncache") == 0) |
---|
| 1397 | TestType = UncacheTest; |
---|
| 1398 | else if (strcmp(argvPP[i], "rand") == 0) |
---|
| 1399 | TestPattern = RandPattern; |
---|
| 1400 | else if (strcmp(argvPP[i], "randhint") == 0) |
---|
| 1401 | TestPattern = RandPatternWithHint; |
---|
| 1402 | else if (strcmp(argvPP[i], "strided") == 0) |
---|
| 1403 | TestPattern = StridedPattern; |
---|
| 1404 | else if (strcmp(argvPP[i], "seq") == 0) |
---|
| 1405 | TestPattern = SeqPattern; |
---|
| 1406 | else if (strcmp(argvPP[i], "-nolabels") == 0) |
---|
| 1407 | LabelledOutput = false; |
---|
| 1408 | else if (strcmp(argvPP[i], "-noinv") == 0) |
---|
| 1409 | DoInvalidate = false; |
---|
| 1410 | else if (strcmp(argvPP[i], "-ds") == 0) |
---|
| 1411 | UseDataShipping = true; |
---|
| 1412 | else if (strcmp(argvPP[i], "-aio") == 0) |
---|
| 1413 | { |
---|
| 1414 | i += 1; |
---|
| 1415 | #ifdef NO_AIO |
---|
| 1416 | fprintf(stderr, "aio option ignored.\n"); |
---|
| 1417 | #else |
---|
| 1418 | AIODepth = GetNum(argvPP[i]); |
---|
| 1419 | UseAsyncIO = true; |
---|
| 1420 | #endif /* ! NO_AIO */ |
---|
| 1421 | } |
---|
| 1422 | else if (strcmp(argvPP[i], "-dio") == 0) |
---|
| 1423 | UseDirectIO = true; |
---|
| 1424 | else if (strcmp(argvPP[i], "-shm") == 0) |
---|
| 1425 | UseSharedMem = true; |
---|
| 1426 | else if (strcmp(argvPP[i], "-reltoken") == 0) |
---|
| 1427 | DoFreeAll = true; |
---|
| 1428 | else if (strcmp(argvPP[i], "-fsync") == 0) |
---|
| 1429 | DoFsync = true; |
---|
| 1430 | else if (strcmp(argvPP[i], "-nocycle") == 0) |
---|
| 1431 | CycleData = false; |
---|
| 1432 | else if (strcmp(argvPP[i], "-tTimes") == 0) |
---|
| 1433 | TaskTimes = true; |
---|
| 1434 | else if (strcmp(argvPP[i], "-osync") == 0) |
---|
| 1435 | UseOsync = true; |
---|
| 1436 | else if (strcmp(argvPP[i], "-v") == 0) |
---|
| 1437 | Verbosity = 1; |
---|
| 1438 | else if (strcmp(argvPP[i], "-V") == 0) |
---|
| 1439 | Verbosity = 2; |
---|
| 1440 | else |
---|
| 1441 | { |
---|
| 1442 | if (FileNameP == NULL) |
---|
| 1443 | FileNameP = argvPP[i]; |
---|
| 1444 | else |
---|
| 1445 | { |
---|
| 1446 | if (ProcNum == 0) |
---|
| 1447 | fprintf(stderr, "File name '%s' already specified.\n", FileNameP); |
---|
| 1448 | exit(1); |
---|
| 1449 | } |
---|
| 1450 | } |
---|
| 1451 | } |
---|
| 1452 | |
---|
| 1453 | /* Validate arguments */ |
---|
| 1454 | if (FileNameP == NULL) |
---|
| 1455 | { |
---|
| 1456 | if (ProcNum == 0) |
---|
| 1457 | fprintf(stderr, "No file name given\n"); |
---|
| 1458 | Usage(); |
---|
| 1459 | } |
---|
| 1460 | |
---|
| 1461 | /* Stat the file to compute its size. If the file will be created by |
---|
| 1462 | this test, use the size given by the -n parameter. */ |
---|
| 1463 | rc = stat(FileNameP, &statBuf); |
---|
| 1464 | if (rc == -1) |
---|
| 1465 | { |
---|
| 1466 | if (TestType == CreateTest) |
---|
| 1467 | { |
---|
| 1468 | statBuf.st_blksize = RecordSize; |
---|
| 1469 | statBuf.st_size = BytesToTransfer; |
---|
| 1470 | } |
---|
| 1471 | else |
---|
| 1472 | { |
---|
| 1473 | if (ProcNum == 0) |
---|
| 1474 | fprintf(stderr, "Could not stat file '%s', errno=%d\n", |
---|
| 1475 | FileNameP, errno); |
---|
| 1476 | exit(1); |
---|
| 1477 | } |
---|
| 1478 | } |
---|
| 1479 | if (UseAsyncIO) |
---|
| 1480 | { |
---|
| 1481 | if (TestPattern==RandPatternWithHint) |
---|
| 1482 | { |
---|
| 1483 | fprintf(stderr, "Cannot use -aio with randhint\n"); |
---|
| 1484 | exit(1); |
---|
| 1485 | } |
---|
| 1486 | if (NThreadsPerProcess > 1) |
---|
| 1487 | { |
---|
| 1488 | fprintf(stderr, "Multiple threads per process cannot be used with -aio\n"); |
---|
| 1489 | exit(1); |
---|
| 1490 | } |
---|
| 1491 | if (AIODepth <= 0 || AIODepth > 1000) |
---|
| 1492 | { |
---|
| 1493 | fprintf(stderr, "-aio depth must be greater than 0 and less than 1000\n"); |
---|
| 1494 | exit(1); |
---|
| 1495 | } |
---|
| 1496 | } |
---|
| 1497 | |
---|
| 1498 | /* If the file already exists, issue the GPFS hint that flushes and |
---|
| 1499 | invalidates all cached data belonging to the file so that each |
---|
| 1500 | test begins in the same state */ |
---|
| 1501 | if (rc == 0 && (DoInvalidate || TestType == UncacheTest)) |
---|
| 1502 | { |
---|
| 1503 | InvalidateFileCache(FileNameP); |
---|
| 1504 | if (TestType == UncacheTest) |
---|
| 1505 | { |
---|
| 1506 | /* Nothing else to do. Wait until all processes have reached this |
---|
| 1507 | point, then exit */ |
---|
| 1508 | #ifdef MULTI_NODE |
---|
| 1509 | rc = MPI_Barrier(MPI_COMM_WORLD); |
---|
| 1510 | CheckRC(rc, "finish MPI_Barrier"); |
---|
| 1511 | #endif |
---|
| 1512 | if (ProcNum == 0) |
---|
| 1513 | printf("uncache %s\n", FileNameP); |
---|
| 1514 | #ifdef MULTI_NODE |
---|
| 1515 | MPI_Finalize(); |
---|
| 1516 | #endif |
---|
| 1517 | return 0; |
---|
| 1518 | } |
---|
| 1519 | } |
---|
| 1520 | |
---|
| 1521 | /* Continue validating arguments after cache invalidation */ |
---|
| 1522 | if (TestType == NoTest) |
---|
| 1523 | { |
---|
| 1524 | if (ProcNum == 0) |
---|
| 1525 | fprintf(stderr, "No test type given\n"); |
---|
| 1526 | Usage(); |
---|
| 1527 | } |
---|
| 1528 | if (TestPattern == NoPattern) |
---|
| 1529 | { |
---|
| 1530 | if (ProcNum == 0) |
---|
| 1531 | fprintf(stderr, "No access pattern given\n"); |
---|
| 1532 | Usage(); |
---|
| 1533 | } |
---|
| 1534 | if (Stride != 0 && |
---|
| 1535 | TestPattern != StridedPattern) |
---|
| 1536 | { |
---|
| 1537 | if (ProcNum == 0) |
---|
| 1538 | fprintf(stderr, "Cannot specify -s unless access pattern is strided\n"); |
---|
| 1539 | Usage(); |
---|
| 1540 | } |
---|
| 1541 | if (NThreadsPerProcess <= 0 || NThreadsPerProcess > MAX_THREADS) |
---|
| 1542 | { |
---|
| 1543 | if (ProcNum == 0) |
---|
| 1544 | fprintf(stderr, "Invalid number of threads\n"); |
---|
| 1545 | Usage(); |
---|
| 1546 | } |
---|
| 1547 | |
---|
| 1548 | /* If record size or number of bytes to transfer are not known, use the |
---|
| 1549 | information from the stat */ |
---|
| 1550 | if (RecordSize == -1) |
---|
| 1551 | RecordSize = statBuf.st_blksize; |
---|
| 1552 | if (BytesToTransfer == -1) |
---|
| 1553 | BytesToTransfer = (Int64)statBuf.st_size; |
---|
| 1554 | |
---|
| 1555 | /* Make sure that -r and -n are known */ |
---|
| 1556 | if (RecordSize == -1) |
---|
| 1557 | { |
---|
| 1558 | if (ProcNum == 0) |
---|
| 1559 | fprintf(stderr, "Record size not known; use -r option\n"); |
---|
| 1560 | exit(1); |
---|
| 1561 | } |
---|
| 1562 | if (BytesToTransfer == -1) |
---|
| 1563 | { |
---|
| 1564 | if (ProcNum == 0) |
---|
| 1565 | fprintf(stderr, "Bytes to transfer not known; use -n option\n"); |
---|
| 1566 | exit(1); |
---|
| 1567 | } |
---|
| 1568 | |
---|
| 1569 | /* Make sure -s is legal */ |
---|
| 1570 | if (Stride != 0 && |
---|
| 1571 | (Stride%RecordSize) != 0) |
---|
| 1572 | { |
---|
| 1573 | if (ProcNum == 0) |
---|
| 1574 | fprintf(stderr, "Stride must be a multiple of record size\n"); |
---|
| 1575 | exit(1); |
---|
| 1576 | } |
---|
| 1577 | |
---|
| 1578 | /* Compute number of records to read and write as well as number of |
---|
| 1579 | records in the file */ |
---|
| 1580 | RecordsInFile = (Int64)statBuf.st_size / RecordSize; |
---|
| 1581 | RecordsToTransfer = BytesToTransfer / RecordSize; |
---|
| 1582 | BytesToTransfer = RecordsToTransfer * RecordSize; |
---|
| 1583 | |
---|
| 1584 | /* Make sure the file is big enough to allow non-overlapping accesses */ |
---|
| 1585 | if (NThreadsPerProcess*NProcs > RecordsInFile && |
---|
| 1586 | (TestPattern==StridedPattern || TestPattern==SeqPattern)) |
---|
| 1587 | { |
---|
| 1588 | if (ProcNum == 0) |
---|
| 1589 | fprintf(stderr, "File too small to allow non-overlapping accesses\n"); |
---|
| 1590 | exit(1); |
---|
| 1591 | } |
---|
| 1592 | |
---|
| 1593 | /* If using shared memory, register a routine to clean up segments on exit */ |
---|
| 1594 | if (UseSharedMem) |
---|
| 1595 | { |
---|
| 1596 | for (th=0; th<NThreadsPerProcess; th++) |
---|
| 1597 | ParmArray[th].shmid = -1; |
---|
| 1598 | |
---|
| 1599 | if (atexit(cleanupSegs) < 0) |
---|
| 1600 | { |
---|
| 1601 | fprintf(stderr, "Cannot register exit routine\n"); |
---|
| 1602 | exit(1); |
---|
| 1603 | } |
---|
| 1604 | } |
---|
| 1605 | |
---|
| 1606 | /* Compute parameters for each worker thread, allocate each thread a |
---|
| 1607 | buffer, and touch all pages of the buffer to avoid page faults after we |
---|
| 1608 | begin timing. The calculation of which records are processed by each |
---|
| 1609 | thread is complicated by the possibility that the number of records in |
---|
| 1610 | the file and/or the number of records to be processed may not be an |
---|
| 1611 | integral multiple of the total number of threads. |
---|
| 1612 | For example, suppose there are 2 threads on each of 3 nodes, there |
---|
| 1613 | are a total of 14 records in the file, and we wish to process a total |
---|
| 1614 | of 17 records. Using a strided access pattern, the records to be |
---|
| 1615 | processed by each thread are as follows: |
---|
| 1616 | proc 0 thread 0: 0 6 12 |
---|
| 1617 | proc 0 thread 1: 1 7 13 |
---|
| 1618 | proc 1 thread 0: 2 8 2 |
---|
| 1619 | proc 1 thread 1: 3 9 3 |
---|
| 1620 | proc 2 thread 0: 4 10 4 |
---|
| 1621 | proc 2 thread 1: 5 11 |
---|
| 1622 | Under the same assumptions, a sequential access pattern would do the |
---|
| 1623 | following accesses: |
---|
| 1624 | proc 0 thread 0: 0 1 2 |
---|
| 1625 | proc 0 thread 1: 3 4 5 |
---|
| 1626 | proc 1 thread 0: 6 7 6 |
---|
| 1627 | proc 1 thread 1: 8 9 8 |
---|
| 1628 | proc 2 thread 0: 10 11 10 |
---|
| 1629 | proc 2 thread 1: 12 13 |
---|
| 1630 | */ |
---|
| 1631 | baseRecordsPerThreadToXfer = RecordsToTransfer / (NThreadsPerProcess*NProcs); |
---|
| 1632 | threadsWithExtraRecordToXfer = RecordsToTransfer - |
---|
| 1633 | (baseRecordsPerThreadToXfer*NThreadsPerProcess*NProcs); |
---|
| 1634 | baseRecordsPerThreadInFile = RecordsInFile / (NThreadsPerProcess*NProcs); |
---|
| 1635 | threadsWithExtraRecordInFile = RecordsInFile - |
---|
| 1636 | (baseRecordsPerThreadInFile*NThreadsPerProcess*NProcs); |
---|
| 1637 | for (th=0; th<NThreadsPerProcess; th++) |
---|
| 1638 | { |
---|
| 1639 | gblThreadIndex = ProcNum*NThreadsPerProcess + th; |
---|
| 1640 | ParmArray[th].nRecords = baseRecordsPerThreadToXfer; |
---|
| 1641 | if (gblThreadIndex < threadsWithExtraRecordToXfer) |
---|
| 1642 | ParmArray[th].nRecords += 1; |
---|
| 1643 | if (TestPattern == StridedPattern) |
---|
| 1644 | { |
---|
| 1645 | ParmArray[th].startRecordNum = gblThreadIndex; |
---|
| 1646 | ParmArray[th].restartRecordNum = gblThreadIndex; |
---|
| 1647 | ParmArray[th].maxRecordNum = RecordsInFile - 1; |
---|
| 1648 | } |
---|
| 1649 | else if (TestPattern == SeqPattern) |
---|
| 1650 | { |
---|
| 1651 | if (gblThreadIndex < threadsWithExtraRecordInFile) |
---|
| 1652 | ParmArray[th].startRecordNum = gblThreadIndex * |
---|
| 1653 | (baseRecordsPerThreadInFile+1); |
---|
| 1654 | else |
---|
| 1655 | ParmArray[th].startRecordNum = gblThreadIndex*baseRecordsPerThreadInFile + |
---|
| 1656 | threadsWithExtraRecordInFile; |
---|
| 1657 | if (CycleData) |
---|
| 1658 | { |
---|
| 1659 | ParmArray[th].restartRecordNum = ParmArray[th].startRecordNum; |
---|
| 1660 | ParmArray[th].maxRecordNum = ParmArray[th].startRecordNum + |
---|
| 1661 | baseRecordsPerThreadInFile - 1; |
---|
| 1662 | if (gblThreadIndex < threadsWithExtraRecordInFile) |
---|
| 1663 | ParmArray[th].maxRecordNum += 1; |
---|
| 1664 | } |
---|
| 1665 | else |
---|
| 1666 | { |
---|
| 1667 | ParmArray[th].restartRecordNum = 0; |
---|
| 1668 | ParmArray[th].maxRecordNum = RecordsInFile - 1; |
---|
| 1669 | } |
---|
| 1670 | } |
---|
| 1671 | else /* random */ |
---|
| 1672 | { |
---|
| 1673 | ParmArray[th].startRecordNum = 0; |
---|
| 1674 | ParmArray[th].restartRecordNum = 0; |
---|
| 1675 | ParmArray[th].maxRecordNum = RecordsInFile - 1; |
---|
| 1676 | } |
---|
| 1677 | |
---|
| 1678 | len = RecordSize; |
---|
| 1679 | if (UseAsyncIO) |
---|
| 1680 | len *= AIODepth; |
---|
| 1681 | len += PAGE_SIZE; |
---|
| 1682 | |
---|
| 1683 | if (UseSharedMem) |
---|
| 1684 | { |
---|
| 1685 | int shmid = shmget(IPC_PRIVATE, len, IPC_CREAT | S_IRUSR | S_IWUSR |
---|
| 1686 | #ifdef GPFS_AIX |
---|
| 1687 | | SHM_LGPAGE | SHM_PIN |
---|
| 1688 | #endif |
---|
| 1689 | ); |
---|
| 1690 | if (shmid < 0) |
---|
| 1691 | { |
---|
| 1692 | fprintf(stderr, "Cannot allocate shared memory buffer for thread %d, errno %d\n", |
---|
| 1693 | gblThreadIndex, errno); |
---|
| 1694 | exit(1); |
---|
| 1695 | } |
---|
| 1696 | p = (char *)shmat(shmid, 0, 0); |
---|
| 1697 | if (p == (char *)-1) |
---|
| 1698 | { |
---|
| 1699 | fprintf(stderr, "Cannot attach shared memory buffer for thread %d, errno %d\n", |
---|
| 1700 | gblThreadIndex, errno); |
---|
| 1701 | shmctl(shmid, IPC_RMID, NULL); |
---|
| 1702 | exit(1); |
---|
| 1703 | } |
---|
| 1704 | ParmArray[th].shmid = shmid; |
---|
| 1705 | } |
---|
| 1706 | else |
---|
| 1707 | { |
---|
| 1708 | p = (char*) malloc(len); |
---|
| 1709 | if (p == NULL) |
---|
| 1710 | { |
---|
| 1711 | fprintf(stderr, "Cannot allocate buffer for thread %d\n", gblThreadIndex); |
---|
| 1712 | exit(1); |
---|
| 1713 | } |
---|
| 1714 | } |
---|
| 1715 | ParmArray[th].origbufP = p; |
---|
| 1716 | ParmArray[th].bufP = (char*)(((ulong)p + PAGE_SIZE - 1) & ~(PAGE_SIZE-1)); |
---|
| 1717 | memset(ParmArray[th].bufP, (char)gblThreadIndex, RecordSize); |
---|
| 1718 | ParmArray[th].threadIndex = th; |
---|
| 1719 | } |
---|
| 1720 | |
---|
| 1721 | if (UseAsyncIO) |
---|
| 1722 | { |
---|
| 1723 | #ifdef GPFS_LINUX |
---|
| 1724 | #ifndef NO_AIO |
---|
| 1725 | memset(&st_aioinit, '\0', sizeof(st_aioinit)); |
---|
| 1726 | st_aioinit.aio_threads = AIODepth; |
---|
| 1727 | st_aioinit.aio_num = AIODepth; |
---|
| 1728 | st_aioinit.aio_idle_time = 10; |
---|
| 1729 | aio_init(&st_aioinit); |
---|
| 1730 | #endif |
---|
| 1731 | #endif |
---|
| 1732 | } |
---|
| 1733 | |
---|
| 1734 | /* Print message describing test */ |
---|
| 1735 | if (LabelledOutput && |
---|
| 1736 | ProcNum == 0) |
---|
| 1737 | { |
---|
| 1738 | printf("%s %s %s %s\n", |
---|
| 1739 | argvPP[0], |
---|
| 1740 | TestType==CreateTest ? "create" : |
---|
| 1741 | TestType==WriteTest ? "write" : |
---|
| 1742 | TestType==ReadTest ? "read" : |
---|
| 1743 | "??", |
---|
| 1744 | TestPattern==RandPattern ? "rand" : |
---|
| 1745 | TestPattern==RandPatternWithHint ? "randhint" : |
---|
| 1746 | TestPattern==StridedPattern ? "strided" : |
---|
| 1747 | TestPattern==SeqPattern ? "seq" : |
---|
| 1748 | "??", |
---|
| 1749 | FileNameP); |
---|
| 1750 | printf(" recSize %s nBytes %s fileSize %s\n", |
---|
| 1751 | int64ToString(RecordSize, buf1), |
---|
| 1752 | int64ToString(BytesToTransfer, buf2), |
---|
| 1753 | int64ToString((Int64)statBuf.st_size, buf3)); |
---|
| 1754 | printf(" nProcesses %d nThreadsPerProcess %d\n", |
---|
| 1755 | NProcs, |
---|
| 1756 | NThreadsPerProcess); |
---|
| 1757 | if (TestPattern == StridedPattern && |
---|
| 1758 | Stride != 0) |
---|
| 1759 | printf(" stride %lld records\n", Stride/RecordSize); |
---|
| 1760 | printf(" file cache %sflushed before test\n", |
---|
| 1761 | DoInvalidate ? "" : "not "); |
---|
| 1762 | printf(" %susing data shipping\n", |
---|
| 1763 | UseDataShipping ? "" : "not "); |
---|
| 1764 | printf(" %susing direct I/O\n", |
---|
| 1765 | UseDirectIO ? "" : "not "); |
---|
| 1766 | printf(" offsets accessed will%s cycle through the same file segment\n", |
---|
| 1767 | CycleData ? "" : " not"); |
---|
| 1768 | printf(" %susing shared memory buffer\n", |
---|
| 1769 | UseSharedMem ? "" : "not "); |
---|
| 1770 | if (UseOsync) |
---|
| 1771 | printf(" using O_SYNC option\n"); |
---|
| 1772 | if (UseAsyncIO) |
---|
| 1773 | printf(" using AIO to depth %d\n", AIODepth); |
---|
| 1774 | printf(" %sreleasing byte-range token after open\n", |
---|
| 1775 | DoFreeAll ? "" : "not "); |
---|
| 1776 | if (TestType != ReadTest) |
---|
| 1777 | printf(" %sfsync at end of test\n", |
---|
| 1778 | DoFsync ? "" : "no "); |
---|
| 1779 | } |
---|
| 1780 | |
---|
| 1781 | #ifdef MULTI_NODE |
---|
| 1782 | /* Synchronize all processes before continuing */ |
---|
| 1783 | rc = MPI_Barrier(MPI_COMM_WORLD); |
---|
| 1784 | CheckRC(rc, "start MPI_Barrier"); |
---|
| 1785 | #endif |
---|
| 1786 | |
---|
| 1787 | /* Get start time */ |
---|
| 1788 | GetTOD(&startTime); |
---|
| 1789 | |
---|
| 1790 | #ifdef MULTI_NODE |
---|
| 1791 | /* Insure that no process does any work until after the master gets the |
---|
| 1792 | start timestamp */ |
---|
| 1793 | rc = MPI_Barrier(MPI_COMM_WORLD); |
---|
| 1794 | CheckRC(rc, "startTime MPI_Barrier"); |
---|
| 1795 | #endif |
---|
| 1796 | |
---|
| 1797 | /* Fork off worker threads, then wait for them all to finish */ |
---|
| 1798 | pthread_attr_init(&threadAttr); |
---|
| 1799 | pthread_attr_setdetachstate(&threadAttr, PTHREAD_CREATE_JOINABLE); |
---|
| 1800 | for (th=0; th<NThreadsPerProcess; th++) |
---|
| 1801 | { |
---|
| 1802 | rc = pthread_create(&threadIds[th], &threadAttr, TestThreadBody, |
---|
| 1803 | (void *)th); |
---|
| 1804 | CheckRC(rc, "pthread_create"); |
---|
| 1805 | } |
---|
| 1806 | for (th=0; th<NThreadsPerProcess; th++) |
---|
| 1807 | { |
---|
| 1808 | rc = pthread_join(threadIds[th], NULL); |
---|
| 1809 | CheckRC(rc, "pthread_join"); |
---|
| 1810 | } |
---|
| 1811 | |
---|
| 1812 | #ifdef MULTI_NODE |
---|
| 1813 | /* Synchronize all processes (wait for all threads on all nodes to |
---|
| 1814 | finish) */ |
---|
| 1815 | rc = MPI_Barrier(MPI_COMM_WORLD); |
---|
| 1816 | CheckRC(rc, "finish MPI_Barrier"); |
---|
| 1817 | #endif |
---|
| 1818 | |
---|
| 1819 | /* Flush the file to disk if -fsync option was given and this test may |
---|
| 1820 | have changed the file. On version 1.3 of GPFS and later, this only |
---|
| 1821 | needs to be done by one process, since fsync by one node flushes the |
---|
| 1822 | file globally. Earlier versions of GPFS must perform the fsync on all |
---|
| 1823 | nodes to insure that all data and metadata have been written to disk. |
---|
| 1824 | The file has already been closed by the threads on this node, so we |
---|
| 1825 | need to reopen the file just to do the fsync. */ |
---|
| 1826 | if (DoFsync && |
---|
| 1827 | ProcNum == 0 && |
---|
| 1828 | TestType != ReadTest) |
---|
| 1829 | { |
---|
| 1830 | handle = open(FileNameP, O_WRONLY, 0666); |
---|
| 1831 | if (handle == -1) |
---|
| 1832 | { |
---|
| 1833 | fprintf(stderr, "Could not open file '%s' to fsync it, errno=%d\n", |
---|
| 1834 | FileNameP, errno); |
---|
| 1835 | exit(1); |
---|
| 1836 | } |
---|
| 1837 | rc = fsync(handle); |
---|
| 1838 | if (rc == -1) |
---|
| 1839 | { |
---|
| 1840 | fprintf(stderr, "Error %d from fsync\n", errno); |
---|
| 1841 | exit(1); |
---|
| 1842 | } |
---|
| 1843 | rc = close(handle); |
---|
| 1844 | if (rc == -1) |
---|
| 1845 | { |
---|
| 1846 | fprintf(stderr, "Could not close file '%s' after fsync, errno=%d\n", |
---|
| 1847 | FileNameP, errno); |
---|
| 1848 | exit(1); |
---|
| 1849 | } |
---|
| 1850 | } |
---|
| 1851 | #ifdef MULTI_NODE |
---|
| 1852 | if (DoFsync && TestType != ReadTest) |
---|
| 1853 | { |
---|
| 1854 | rc = MPI_Barrier(MPI_COMM_WORLD); |
---|
| 1855 | CheckRC(rc, "fsync MPI_Barrier"); |
---|
| 1856 | } |
---|
| 1857 | #endif |
---|
| 1858 | |
---|
| 1859 | /* Get stop time and compute test interval */ |
---|
| 1860 | GetTOD(&endTime); |
---|
| 1861 | totalSeconds = deltaTime(&startTime, &endTime); |
---|
| 1862 | if (Verbosity >= 1) |
---|
| 1863 | printf("%d: totalSeconds %f\n", ProcNum, totalSeconds); |
---|
| 1864 | |
---|
| 1865 | /* Thread utilization or efficiency: we would like to know if all threads |
---|
| 1866 | took the same amount of time or if some threads finished significantly |
---|
| 1867 | before others. For example, suppose the test involved 4 threads and |
---|
| 1868 | ran for a total of 10 seconds. If all four threads required the full |
---|
| 1869 | 10 seconds to do all of their reads or writes, the utilization would be |
---|
| 1870 | (10+10+10+10)/(4*10) = 1.000. On the other hand, if 3 of the threads |
---|
| 1871 | took 4 seconds each and the last one took 9 seconds, the utilization |
---|
| 1872 | would be (4+4+4+9)/(4*10) = 0.525. Gather the thread run time |
---|
| 1873 | information from all processes to process 0. */ |
---|
| 1874 | for (th=0; th<NThreadsPerProcess; th++) |
---|
| 1875 | threadTimes[th] = ParmArray[th].xferInterval; |
---|
| 1876 | if (ProcNum == 0) |
---|
| 1877 | allXferTimesP = (double*)malloc(NProcs*NThreadsPerProcess*sizeof(double)); |
---|
| 1878 | #ifdef MULTI_NODE |
---|
| 1879 | rc = MPI_Gather((void*)threadTimes, NThreadsPerProcess*sizeof(double), MPI_BYTE, |
---|
| 1880 | (void*)allXferTimesP, NThreadsPerProcess*sizeof(double), MPI_BYTE, |
---|
| 1881 | 0, MPI_COMM_WORLD); |
---|
| 1882 | CheckRC(rc, "MPI_Gather"); |
---|
| 1883 | #else |
---|
| 1884 | for (i=0; i<NThreadsPerProcess; i++) |
---|
| 1885 | allXferTimesP[i] = threadTimes[i]; |
---|
| 1886 | #endif |
---|
| 1887 | |
---|
| 1888 | if (ProcNum == 0) |
---|
| 1889 | { |
---|
| 1890 | /* Compute the thread utilization according to the definition above */ |
---|
| 1891 | totalXferTimes = 0.0; |
---|
| 1892 | for (i=0; i<NProcs*NThreadsPerProcess; i++) |
---|
| 1893 | totalXferTimes += allXferTimesP[i]; |
---|
| 1894 | threadUtil = totalXferTimes / (NProcs*NThreadsPerProcess*totalSeconds); |
---|
| 1895 | |
---|
| 1896 | /* Print detailed task and thread times */ |
---|
| 1897 | if (TaskTimes) |
---|
| 1898 | { |
---|
| 1899 | printf("\n Task times\n"); |
---|
| 1900 | printf(" task thd time fract\n"); |
---|
| 1901 | dP = allXferTimesP; |
---|
| 1902 | for (i=0; i<NProcs; i++) |
---|
| 1903 | for (j=0; j<NThreadsPerProcess; j++) |
---|
| 1904 | { |
---|
| 1905 | printf(" %5d %5d %8.2f %5.3f\n", |
---|
| 1906 | i, j, *dP, *dP/totalSeconds); |
---|
| 1907 | dP += 1; |
---|
| 1908 | } |
---|
| 1909 | } |
---|
| 1910 | |
---|
| 1911 | /* Compute data rate and print results */ |
---|
| 1912 | dataRate = (BytesToTransfer / totalSeconds) / 1000.0; |
---|
| 1913 | if (LabelledOutput) |
---|
| 1914 | printf(" Data rate was %.2f Kbytes/sec, thread utilization %.3f\n", |
---|
| 1915 | dataRate, threadUtil); |
---|
| 1916 | else |
---|
| 1917 | printf("%s %s %s %d %lld %lld %d %d %lld %d %d %d %d %d %d %d %d %d %.2f %.3f\n", |
---|
| 1918 | TestType==CreateTest ? "create" : |
---|
| 1919 | TestType==WriteTest ? "write" : |
---|
| 1920 | TestType==ReadTest ? "read" : |
---|
| 1921 | "??", |
---|
| 1922 | TestPattern==RandPattern ? "rand" : |
---|
| 1923 | TestPattern==RandPatternWithHint ? "randhint" : |
---|
| 1924 | TestPattern==StridedPattern ? "strided" : |
---|
| 1925 | TestPattern==SeqPattern ? "seq" : |
---|
| 1926 | "??", |
---|
| 1927 | FileNameP, |
---|
| 1928 | RecordSize, |
---|
| 1929 | BytesToTransfer, |
---|
| 1930 | (Int64)statBuf.st_size, |
---|
| 1931 | NProcs, |
---|
| 1932 | NThreadsPerProcess, |
---|
| 1933 | Stride/RecordSize, |
---|
| 1934 | DoInvalidate, |
---|
| 1935 | UseDataShipping, |
---|
| 1936 | UseDirectIO, |
---|
| 1937 | UseSharedMem, |
---|
| 1938 | DoFsync, |
---|
| 1939 | CycleData, |
---|
| 1940 | DoFreeAll, |
---|
| 1941 | AIODepth, |
---|
| 1942 | UseOsync, |
---|
| 1943 | dataRate, |
---|
| 1944 | threadUtil); |
---|
| 1945 | |
---|
| 1946 | } |
---|
| 1947 | |
---|
| 1948 | #ifdef MULTI_NODE |
---|
| 1949 | /* Terminate MPI */ |
---|
| 1950 | MPI_Finalize(); |
---|
| 1951 | #endif |
---|
| 1952 | |
---|
| 1953 | /* Return success */ |
---|
| 1954 | return 0; |
---|
| 1955 | } |
---|
| 1956 | |
---|
| 1957 | #ifdef GPFS_LINUX |
---|
| 1958 | void |
---|
| 1959 | GetTOD(timebasestruct_t *tP) |
---|
| 1960 | { |
---|
| 1961 | struct timeval time; |
---|
| 1962 | |
---|
| 1963 | gettimeofday(&time, NULL); |
---|
| 1964 | tP->tb_high = time.tv_sec; |
---|
| 1965 | tP->tb_low = time.tv_usec * 1000; |
---|
| 1966 | } |
---|
| 1967 | #endif |
---|