[16] | 1 | ## Sample configuration file |
---|
| 2 | |
---|
| 3 | ## Numbers may end with a single letter: |
---|
| 4 | ## k or K meaning 1024 |
---|
| 5 | ## m or M meaning 1048576 (1024*1024) |
---|
| 6 | ## |
---|
| 7 | ## The '#' character is the comment character. Any parameter |
---|
| 8 | ## modified herein should have any preceding '#' removed. |
---|
| 9 | ## |
---|
| 10 | |
---|
| 11 | ######## Memory / Shared Segment Configuration ######## |
---|
| 12 | |
---|
| 13 | ## The pagepool is used for I/O buffers. It is always pinned. |
---|
| 14 | ## The allowable range is 4M to 512M (AIX). |
---|
| 15 | ## The allowable range is 4M to 1300M (LINUX). |
---|
| 16 | #pagepool 64M |
---|
| 17 | |
---|
| 18 | ## maxblocksize controls the maximum file system block size allowed. |
---|
| 19 | ## File systems with larger block sizes cannot be mounted or created |
---|
| 20 | ## unless the value of maxblocksize is increased. |
---|
| 21 | ## The allowable range is 16K to 16M |
---|
| 22 | ## default: maxblocksize 1M |
---|
| 23 | #maxblocksize |
---|
| 24 | |
---|
| 25 | ## Maximum number of files to cache. If the number of concurrently open |
---|
| 26 | ## files is bigger, then the number of cached files will exceed this value. |
---|
| 27 | ## The allowable range is 1 to 100000 |
---|
| 28 | #maxFilesToCache 1000 |
---|
| 29 | |
---|
| 30 | ## The maximum number of stat cache entries. |
---|
| 31 | ## The default is 4 times the value of the maxFilesToCache parameter. |
---|
| 32 | ## The allowable range is 0 to 10000000 |
---|
| 33 | #maxStatCache |
---|
| 34 | |
---|
| 35 | ######## DMAPI configuration ######## |
---|
| 36 | |
---|
| 37 | ## The dmapiEventTimeout parameter controls the blocking of file operation |
---|
| 38 | ## threads of NFS and DFS, while in the kernel waiting for the handling of |
---|
| 39 | ## a DMAPI synchronous event. The parameter value is the maximum time, in |
---|
| 40 | ## milliseconds, the thread will block. When this time expires, the file |
---|
| 41 | ## operation returns ENOTREADY, and the event continues asynchronously. |
---|
| 42 | ## The NFS/DFS server is expected to repeatedly retry the operation, which |
---|
| 43 | ## eventually will find the response of the original event and continue. |
---|
| 44 | ## This mechanism applies only to read, write and truncate events, and only |
---|
| 45 | ## when such events come from NFS and DFS server threads. The timeout value |
---|
| 46 | ## is given in milliseconds. The value 0 indicates immediate timeout (fully |
---|
| 47 | ## asynchronous event). A value greater or equal 86400000 (which is 24 hours) |
---|
| 48 | ## is considered "infinity" (no timeout, fully synchronous event). |
---|
| 49 | ## The default value is 86400000. |
---|
| 50 | #dmapiEventTimeout 86400000 |
---|
| 51 | |
---|
| 52 | ## The dmapiSessionFailureTimeout parameter controls the blocking of file |
---|
| 53 | ## operation threads, while in the kernel, waiting for the handling of a DMAPI |
---|
| 54 | ## synchronous event that is enqueued on a session that has suffered a failure. |
---|
| 55 | ## The parameter value is the maximum time, in seconds, the thread will wait |
---|
| 56 | ## for the recovery of the failed session. When this time expires and the |
---|
| 57 | ## session has not yet recovered, the event is aborted and the file operation |
---|
| 58 | ## fails, returning the EIO error. The timeout value is given in full seconds. |
---|
| 59 | ## The value 0 indicates immediate timeout (immediate failure of the file |
---|
| 60 | ## operation). A value greater or equal 86400 (which is 24 hours) is considered |
---|
| 61 | ## "infinity" (no timeout, indefinite blocking until the session recovers). |
---|
| 62 | ## The default value is 0. |
---|
| 63 | #dmapiSessionFailureTimeout 0 |
---|
| 64 | |
---|
| 65 | ## The dmapiMountTimeout parameter controls the blocking of mount operations, |
---|
| 66 | ## waiting for a disposition for the mount event to be set. This timeout is |
---|
| 67 | ## activated at most once on each node, by the first external mount of a |
---|
| 68 | ## file system which has DMAPI enabled, and only if there has never before |
---|
| 69 | ## been a mount disposition. Any mount operation on this node that starts |
---|
| 70 | ## while the timeout period is active will wait for the mount disposition. The |
---|
| 71 | ## parameter value is the maximum time, in seconds, that the mount operation |
---|
| 72 | ## will wait for a disposition. When this time expires and there is still no |
---|
| 73 | ## disposition for the mount event, the mount operation fails, returning the |
---|
| 74 | ## EIO error. The timeout value is given in full seconds. The value 0 indicates |
---|
| 75 | ## immediate timeout (immediate failure of the mount operation). A value |
---|
| 76 | ## greater or equal 86400 (which is 24 hours) is considered "infinity" (no |
---|
| 77 | ## timeout, indefinite blocking until the there is a disposition). |
---|
| 78 | ## The default value is 60. |
---|
| 79 | #dmapiMountTimeout 60 |
---|
| 80 | |
---|
| 81 | ######## Prefetch tuning ########## |
---|
| 82 | |
---|
| 83 | ## The value of the 'prefetchThreads' parameter controls the maximum |
---|
| 84 | ## possible number of threads dedicated to prefetching data for |
---|
| 85 | ## files that are read sequentially, or to handle sequential writebehind. |
---|
| 86 | ## The actual degree of parallelism for prefetching is determined |
---|
| 87 | ## dynamically in the daemon. |
---|
| 88 | ## (minimum 2, maximum 104) |
---|
| 89 | #prefetchThreads 72 |
---|
| 90 | |
---|
| 91 | ## The 'worker1Threads' parameter controls the maximum number of threads |
---|
| 92 | ## that are used to handle other operations associated with data access. |
---|
| 93 | ## The primary use is for random read/write requests that cannot be prefetched. |
---|
| 94 | ## random IO requests or small file activity. |
---|
| 95 | ## (minimum 1, maximum 64) |
---|
| 96 | #worker1Threads 48 |
---|
| 97 | |
---|
| 98 | ## maxMBpS is an estimate of how many MB per sec of data can be transferred |
---|
| 99 | ## in or out of a single node. The value is used in calculating the |
---|
| 100 | ## amount of IO that can be done to effectively prefetch data for readers |
---|
| 101 | ## and/or or write-behind data from writers. The maximum number of IOs in |
---|
| 102 | ## progress concurrantly will be 2 * min(nDisks, maxMBpS*avgIOtime/blockSize), |
---|
| 103 | ## where nDisks is the number disks that make a filesystem, |
---|
| 104 | ## avgIOtime is a measured average of the last 16 full block IO times, and |
---|
| 105 | ## blockSize is the block size for a full block in the filesystem (e.g. 256K). |
---|
| 106 | ## By lowering this value, you can artificially limit how much IO one node |
---|
| 107 | ## can put on all the VSD servers, if there are lots of nodes that |
---|
| 108 | ## can overrun a few VSD servers. Setting this too high will usually |
---|
| 109 | ## not hurt because of other limiting factors such as the size of the |
---|
| 110 | ## pagepool, or the number of prefetchThreads or worker1Threads. |
---|
| 111 | #maxMBpS 150 |
---|
| 112 | |
---|
| 113 | ######## Problem determination Configuration ######## |
---|
| 114 | |
---|
| 115 | ## Tracing of individual classes of events/operations can be activated by |
---|
| 116 | ## adding "trace <trace-class> <trace level>" lines below. |
---|
| 117 | trace all 0 |
---|
| 118 | |
---|
| 119 | ## The 'unmountOnDiskFail' keyword controls how the daemon will respond when |
---|
| 120 | ## a disk failure is detected. |
---|
| 121 | ## |
---|
| 122 | ## When it is set to "no", the daemon will mark the disk as failed and |
---|
| 123 | ## continue as long as it can without using the disk. All nodes that are |
---|
| 124 | ## using this disk will be notified of the disk failure. The disk can be |
---|
| 125 | ## made active again by using the "mmchdisk" command. This is the |
---|
| 126 | ## suggested setting when metadata and data replication is used because |
---|
| 127 | ## the replica can be used until the disk can be brought online again. |
---|
| 128 | ## |
---|
| 129 | ## When this is set to "yes", any disk failure will cause only the local |
---|
| 130 | ## node to panic (force-unmount) the filesystem that contains that disk. |
---|
| 131 | ## Other filesystems on this node and other nodes will continue to function |
---|
| 132 | ## normally (if they can. The local node can try and remount the filesystem |
---|
| 133 | ## when the disk problem has been resolved. This is the suggested setting |
---|
| 134 | ## when using VSD disks in large multinode configurations and replication is |
---|
| 135 | ## not being used. |
---|
| 136 | ## |
---|
| 137 | ## When it is set to "meta", the daemon will mark the disk as failed and |
---|
| 138 | ## continue as long as it can without using the disk. All nodes that are |
---|
| 139 | ## using this disk will be notified of the disk failure. The disk can be |
---|
| 140 | ## made active again by using the "mmchdisk" command. This is the |
---|
| 141 | ## suggested setting when metadata replication is used and there are lots of |
---|
| 142 | ## dataOnly disks because the replica can be used until the disk can be |
---|
| 143 | ## brought online again. The filesystem will remain mounted over dataOnly disk |
---|
| 144 | ## failures, at the expense of user applications getting EIO errors when |
---|
| 145 | ## trying to use disks that have been marked down. |
---|
| 146 | #unmountOnDiskFail no |
---|
| 147 | |
---|
| 148 | ## The 'dataStructDump' keyword controls whether mmfs will produce a |
---|
| 149 | ## formatted dump of its internal data structures into a file named |
---|
| 150 | ## internaldump.<daemon pid>.signal whenever it aborts. |
---|
| 151 | ## The following entry can either be a directory name in which the file |
---|
| 152 | ## will reside, or otherwise a boolean value. When given a positive |
---|
| 153 | ## boolean value the directory defaults to /tmp/mmfs. |
---|
| 154 | #dataStructureDump yes |
---|
| 155 | |
---|
| 156 | ######## Node Override Configuration ######## |
---|
| 157 | ## |
---|
| 158 | ## In a multi-node configuration, it may be desirable to configure some |
---|
| 159 | ## nodes differently than others. This can be accomplished by placing |
---|
| 160 | ## separate, potentially different, copies of the mmfs.cfg file on each |
---|
| 161 | ## node. However, since maintaining separate copies of the configuration |
---|
| 162 | ## file on each node will likely be more difficult and error prone, |
---|
| 163 | ## the same effect can be achieved via node overrides wherein a single |
---|
| 164 | ## mmfs.cfg file is replicated on every node. |
---|
| 165 | ## |
---|
| 166 | ## A node override is introduced by a line containing a node name or list |
---|
| 167 | ## of node names in square brackets. All parameter specifications |
---|
| 168 | ## that follow will apply only to the listed nodes. A "[common]" line |
---|
| 169 | ## ends a section of node overrides. For example the following fragment: |
---|
| 170 | ## |
---|
| 171 | ## pagepool 30M |
---|
| 172 | ## |
---|
| 173 | ## [tiger5,tiger6] |
---|
| 174 | ## pagepool 10M |
---|
| 175 | ## |
---|
| 176 | ## [tiger9] |
---|
| 177 | ## pagepool 64M |
---|
| 178 | ## |
---|
| 179 | ## [common] |
---|
| 180 | ## maxFilesToCache 200 |
---|
| 181 | ## |
---|
| 182 | ## configures the page pool on most nodes as 30 megabytes. However, |
---|
| 183 | ## on tiger5 and tiger6 the page pool is configured with a smaller |
---|
| 184 | ## page pool of 10 megabytes. On tiger9 the page pool is configured |
---|
| 185 | ## with a larger page pool of 64 megabytes. Lines after the "[common]" line |
---|
| 186 | ## again apply to all nodes, i.e. every node will have a maxFilesToCache |
---|
| 187 | ## of 200. |
---|
| 188 | |
---|