source: nutchez-0.1/conf/hadoop-default.xml @ 95

Last change on this file since 95 was 66, checked in by waue, 16 years ago

NutchEz - an easy way to nutch

  • Property svn:executable set to *
File size: 38.3 KB
Line 
1<?xml version="1.0"?>
2<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
4<!-- Do not modify this file directly.  Instead, copy entries that you -->
5<!-- wish to modify from this file into hadoop-site.xml and change them -->
6<!-- there.  If hadoop-site.xml does not already exist, create it.      -->
7
8<configuration>
9
10<!--- global properties -->
11
12<property>
13  <name>hadoop.tmp.dir</name>
14  <value>/tmp/hadoop-${user.name}</value>
15  <description>A base for other temporary directories.</description>
16</property>
17
18<property>
19  <name>hadoop.native.lib</name>
20  <value>true</value>
21  <description>Should native hadoop libraries, if present, be used.</description>
22</property>
23
24<!--- logging properties -->
25
26<property>
27  <name>hadoop.logfile.size</name>
28  <value>10000000</value>
29  <description>The max size of each log file</description>
30</property>
31
32<property>
33  <name>hadoop.logfile.count</name>
34  <value>10</value>
35  <description>The max number of log files</description>
36</property>
37
38<property>
39  <name>hadoop.job.history.location</name>
40  <value></value>
41  <description> If job tracker is static the history files are stored
42  in this single well known place. If No value is set here, by default,
43  it is in the local file system at ${hadoop.log.dir}/history.
44  </description>
45</property>
46
47<property>
48  <name>hadoop.job.history.user.location</name>
49  <value></value>
50  <description> User can specify a location to store the history files of
51  a particular job. If nothing is specified, the logs are stored in
52  output directory. The files are stored in "_logs/history/" in the directory.
53  User can stop logging by giving the value "none".
54  </description>
55</property>
56
57<property>
58  <name>dfs.namenode.logging.level</name>
59  <value>info</value>
60  <description>The logging level for dfs namenode. Other values are "dir"(trac
61e namespace mutations), "block"(trace block under/over replications and block
62creations/deletions), or "all".</description>
63</property>
64
65<!-- i/o properties -->
66
67<property>
68  <name>io.sort.factor</name>
69  <value>10</value>
70  <description>The number of streams to merge at once while sorting
71  files.  This determines the number of open file handles.</description>
72</property>
73
74<property>
75  <name>io.sort.mb</name>
76  <value>100</value>
77  <description>The total amount of buffer memory to use while sorting
78  files, in megabytes.  By default, gives each merge stream 1MB, which
79  should minimize seeks.</description>
80</property>
81
82<property>
83  <name>io.sort.record.percent</name>
84  <value>0.05</value>
85  <description>The percentage of io.sort.mb dedicated to tracking record
86  boundaries. Let this value be r, io.sort.mb be x. The maximum number
87  of records collected before the collection thread must block is equal
88  to (r * x) / 4</description>
89</property>
90
91<property>
92  <name>io.sort.spill.percent</name>
93  <value>0.80</value>
94  <description>The soft limit in either the buffer or record collection
95  buffers. Once reached, a thread will begin to spill the contents to disk
96  in the background. Note that this does not imply any chunking of data to
97  the spill. A value less than 0.5 is not recommended.</description>
98</property>
99
100<property>
101  <name>io.file.buffer.size</name>
102  <value>4096</value>
103  <description>The size of buffer for use in sequence files.
104  The size of this buffer should probably be a multiple of hardware
105  page size (4096 on Intel x86), and it determines how much data is
106  buffered during read and write operations.</description>
107</property>
108 
109<property>
110  <name>io.bytes.per.checksum</name>
111  <value>512</value>
112  <description>The number of bytes per checksum.  Must not be larger than
113  io.file.buffer.size.</description>
114</property>
115
116<property>
117  <name>io.skip.checksum.errors</name>
118  <value>false</value>
119  <description>If true, when a checksum error is encountered while
120  reading a sequence file, entries are skipped, instead of throwing an
121  exception.</description>
122</property>
123 
124<property>
125  <name>io.map.index.skip</name>
126  <value>0</value>
127  <description>Number of index entries to skip between each entry.
128  Zero by default. Setting this to values larger than zero can
129  facilitate opening large map files using less memory.</description>
130</property>
131
132<property>
133  <name>io.compression.codecs</name>
134  <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</value>
135  <description>A list of the compression codec classes that can be used
136               for compression/decompression.</description>
137</property>
138
139<property>
140  <name>io.serializations</name>
141  <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
142  <description>A list of serialization classes that can be used for
143  obtaining serializers and deserializers.</description>
144</property>
145
146<!-- file system properties -->
147
148<property>
149  <name>fs.default.name</name>
150  <value>file:///</value>
151  <description>The name of the default file system.  A URI whose
152  scheme and authority determine the FileSystem implementation.  The
153  uri's scheme determines the config property (fs.SCHEME.impl) naming
154  the FileSystem implementation class.  The uri's authority is used to
155  determine the host, port, etc. for a filesystem.</description>
156</property>
157
158<property>
159  <name>fs.trash.interval</name>
160  <value>0</value>
161  <description>Number of minutes between trash checkpoints.
162  If zero, the trash feature is disabled.
163  </description>
164</property>
165
166<property>
167  <name>fs.file.impl</name>
168  <value>org.apache.hadoop.fs.LocalFileSystem</value>
169  <description>The FileSystem for file: uris.</description>
170</property>
171
172<property>
173  <name>fs.hdfs.impl</name>
174  <value>org.apache.hadoop.dfs.DistributedFileSystem</value>
175  <description>The FileSystem for hdfs: uris.</description>
176</property>
177
178<property>
179  <name>fs.s3.impl</name>
180  <value>org.apache.hadoop.fs.s3.S3FileSystem</value>
181  <description>The FileSystem for s3: uris.</description>
182</property>
183
184<property>
185  <name>fs.s3n.impl</name>
186  <value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
187  <description>The FileSystem for s3n: (Native S3) uris.</description>
188</property>
189
190<property>
191  <name>fs.kfs.impl</name>
192  <value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
193  <description>The FileSystem for kfs: uris.</description>
194</property>
195
196<property>
197  <name>fs.hftp.impl</name>
198  <value>org.apache.hadoop.dfs.HftpFileSystem</value>
199</property>
200
201<property>
202  <name>fs.hsftp.impl</name>
203  <value>org.apache.hadoop.dfs.HsftpFileSystem</value>
204</property>
205
206<property>
207  <name>fs.ftp.impl</name>
208  <value>org.apache.hadoop.fs.ftp.FTPFileSystem</value>
209  <description>The FileSystem for ftp: uris.</description>
210</property>
211
212<property>
213  <name>fs.ramfs.impl</name>
214  <value>org.apache.hadoop.fs.InMemoryFileSystem</value>
215  <description>The FileSystem for ramfs: uris.</description>
216</property>
217
218<property>
219  <name>fs.har.impl</name>
220  <value>org.apache.hadoop.fs.HarFileSystem</value>
221  <description>The filesystem for Hadoop archives. </description>
222</property>
223
224<property>
225  <name>fs.inmemory.size.mb</name>
226  <value>75</value>
227  <description>The size of the in-memory filsystem instance in MB</description>
228</property>
229
230<property>
231  <name>fs.checkpoint.dir</name>
232  <value>${hadoop.tmp.dir}/dfs/namesecondary</value>
233  <description>Determines where on the local filesystem the DFS secondary
234      name node should store the temporary images and edits to merge.
235      If this is a comma-delimited list of directories then the image is
236      replicated in all of the directories for redundancy.
237  </description>
238</property>
239
240<property>
241  <name>fs.checkpoint.period</name>
242  <value>3600</value>
243  <description>The number of seconds between two periodic checkpoints.
244  </description>
245</property>
246
247<property>
248  <name>fs.checkpoint.size</name>
249  <value>67108864</value>
250  <description>The size of the current edit log (in bytes) that triggers
251       a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
252  </description>
253</property>
254
255<property>
256  <name>dfs.secondary.http.address</name>
257  <value>0.0.0.0:50090</value>
258  <description>
259    The secondary namenode http server address and port.
260    If the port is 0 then the server will start on a free port.
261  </description>
262</property>
263
264<property>
265  <name>dfs.datanode.address</name>
266  <value>0.0.0.0:50010</value>
267  <description>
268    The address where the datanode server will listen to.
269    If the port is 0 then the server will start on a free port.
270  </description>
271</property>
272
273<property>
274  <name>dfs.datanode.http.address</name>
275  <value>0.0.0.0:50075</value>
276  <description>
277    The datanode http server address and port.
278    If the port is 0 then the server will start on a free port.
279  </description>
280</property>
281
282<property>
283  <name>dfs.datanode.ipc.address</name>
284  <value>0.0.0.0:50020</value>
285  <description>
286    The datanode ipc server address and port.
287    If the port is 0 then the server will start on a free port.
288  </description>
289</property>
290
291<property>
292  <name>dfs.datanode.handler.count</name>
293  <value>3</value>
294  <description>The number of server threads for the datanode.</description>
295</property>
296
297<property>
298  <name>dfs.http.address</name>
299  <value>0.0.0.0:50070</value>
300  <description>
301    The address and the base port where the dfs namenode web ui will listen on.
302    If the port is 0 then the server will start on a free port.
303  </description>
304</property>
305
306<property>
307  <name>dfs.datanode.https.address</name>
308  <value>0.0.0.0:50475</value>
309</property>
310
311<property>
312  <name>dfs.https.address</name>
313  <value>0.0.0.0:50470</value>
314</property>
315
316<property>
317  <name>https.keystore.info.rsrc</name>
318  <value>sslinfo.xml</value>
319  <description>The name of the resource from which ssl keystore information
320  will be extracted
321  </description>
322</property>
323
324 <property>
325  <name>dfs.datanode.dns.interface</name>
326  <value>default</value>
327  <description>The name of the Network Interface from which a data node should
328  report its IP address.
329  </description>
330 </property>
331 
332<property>
333  <name>dfs.datanode.dns.nameserver</name>
334  <value>default</value>
335  <description>The host name or IP address of the name server (DNS)
336  which a DataNode should use to determine the host name used by the
337  NameNode for communication and display purposes.
338  </description>
339 </property>
340 
341<property>
342  <name>dfs.replication.considerLoad</name>
343  <value>true</value>
344  <description>Decide if chooseTarget considers the target's load or not
345  </description>
346</property>
347<property>
348  <name>dfs.default.chunk.view.size</name>
349  <value>32768</value>
350  <description>The number of bytes to view for a file on the browser.
351  </description>
352</property>
353
354<property>
355  <name>dfs.datanode.du.reserved</name>
356  <value>0</value>
357  <description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
358  </description>
359</property>
360
361<property>
362  <name>dfs.datanode.du.pct</name>
363  <value>0.98f</value>
364  <description>When calculating remaining space, only use this percentage of the real available space
365  </description>
366</property>
367
368<property>
369  <name>dfs.name.dir</name>
370  <value>${hadoop.tmp.dir}/dfs/name</value>
371  <description>Determines where on the local filesystem the DFS name node
372      should store the name table.  If this is a comma-delimited list
373      of directories then the name table is replicated in all of the
374      directories, for redundancy. </description>
375</property>
376
377<property>
378  <name>dfs.web.ugi</name>
379  <value>webuser,webgroup</value>
380  <description>The user account used by the web interface.
381    Syntax: USERNAME,GROUP1,GROUP2, ...
382  </description>
383</property>
384
385<property>
386  <name>dfs.permissions</name>
387  <value>true</value>
388  <description>
389    If "true", enable permission checking in HDFS.
390    If "false", permission checking is turned off,
391    but all other behavior is unchanged.
392    Switching from one parameter value to the other does not change the mode,
393    owner or group of files or directories.
394  </description>
395</property>
396
397<property>
398  <name>dfs.permissions.supergroup</name>
399  <value>supergroup</value>
400  <description>The name of the group of super-users.</description>
401</property>
402
403<property>
404  <name>dfs.client.buffer.dir</name>
405  <value>${hadoop.tmp.dir}/dfs/tmp</value>
406  <description>Determines where on the local filesystem an DFS client
407  should store its blocks before it sends them to the datanode.
408  </description>
409</property>
410
411<property>
412  <name>dfs.data.dir</name>
413  <value>${hadoop.tmp.dir}/dfs/data</value>
414  <description>Determines where on the local filesystem an DFS data node
415  should store its blocks.  If this is a comma-delimited
416  list of directories, then data will be stored in all named
417  directories, typically on different devices.
418  Directories that do not exist are ignored.
419  </description>
420</property>
421
422<property>
423  <name>dfs.replication</name>
424  <value>3</value>
425  <description>Default block replication.
426  The actual number of replications can be specified when the file is created.
427  The default is used if replication is not specified in create time.
428  </description>
429</property>
430
431<property>
432  <name>dfs.replication.max</name>
433  <value>512</value>
434  <description>Maximal block replication.
435  </description>
436</property>
437
438<property>
439  <name>dfs.replication.min</name>
440  <value>1</value>
441  <description>Minimal block replication.
442  </description>
443</property>
444
445<property>
446  <name>dfs.block.size</name>
447  <value>67108864</value>
448  <description>The default block size for new files.</description>
449</property>
450
451<property>
452  <name>dfs.df.interval</name>
453  <value>60000</value>
454  <description>Disk usage statistics refresh interval in msec.</description>
455</property>
456
457<property>
458  <name>dfs.client.block.write.retries</name>
459  <value>3</value>
460  <description>The number of retries for writing blocks to the data nodes,
461  before we signal failure to the application.
462  </description>
463</property>
464
465<property>
466  <name>dfs.blockreport.intervalMsec</name>
467  <value>3600000</value>
468  <description>Determines block reporting interval in milliseconds.</description>
469</property>
470
471<property>
472  <name>dfs.blockreport.initialDelay</name>  <value>0</value>
473  <description>Delay for first block report in seconds.</description>
474</property>
475
476<property>
477  <name>dfs.heartbeat.interval</name>
478  <value>3</value>
479  <description>Determines datanode heartbeat interval in seconds.</description>
480</property>
481
482<property>
483  <name>dfs.namenode.handler.count</name>
484  <value>10</value>
485  <description>The number of server threads for the namenode.</description>
486</property>
487
488<property>
489  <name>dfs.safemode.threshold.pct</name>
490  <value>0.999f</value>
491  <description>
492    Specifies the percentage of blocks that should satisfy
493    the minimal replication requirement defined by dfs.replication.min.
494    Values less than or equal to 0 mean not to start in safe mode.
495    Values greater than 1 will make safe mode permanent.
496  </description>
497</property>
498
499<property>
500  <name>dfs.safemode.extension</name>
501  <value>30000</value>
502  <description>
503    Determines extension of safe mode in milliseconds
504    after the threshold level is reached.
505  </description>
506</property>
507
508<property>
509  <name>dfs.balance.bandwidthPerSec</name>
510  <value>1048576</value>
511  <description>
512        Specifies the maximum amount of bandwidth that each datanode
513        can utilize for the balancing purpose in term of
514        the number of bytes per second.
515  </description>
516</property>
517
518<property>
519  <name>dfs.hosts</name>
520  <value></value>
521  <description>Names a file that contains a list of hosts that are
522  permitted to connect to the namenode. The full pathname of the file
523  must be specified.  If the value is empty, all hosts are
524  permitted.</description>
525</property>
526
527<property>
528  <name>dfs.hosts.exclude</name>
529  <value></value>
530  <description>Names a file that contains a list of hosts that are
531  not permitted to connect to the namenode.  The full pathname of the
532  file must be specified.  If the value is empty, no hosts are
533  excluded.</description>
534</property> 
535
536<property>
537  <name>dfs.max.objects</name>
538  <value>0</value>
539  <description>The maximum number of files, directories and blocks
540  dfs supports. A value of zero indicates no limit to the number
541  of objects that dfs supports.
542  </description>
543</property>
544
545<property>
546  <name>dfs.namenode.decommission.interval</name>
547  <value>30</value>
548  <description>Namenode periodicity in seconds to check if decommission is complete.</description>
549</property>
550
551<property>
552  <name>dfs.namenode.decommission.nodes.per.interval</name>
553  <value>5</value>
554  <description>The number of nodes namenode checks if decommission is complete
555  in each dfs.namenode.decommission.interval.</description>
556</property>
557
558<property>
559  <name>dfs.replication.interval</name>
560  <value>3</value>
561  <description>The periodicity in seconds with which the namenode computes repliaction work for datanodes. </description>
562</property>
563
564<property>
565  <name>fs.s3.block.size</name>
566  <value>67108864</value>
567  <description>Block size to use when writing files to S3.</description>
568</property>
569
570<property>
571  <name>fs.s3.buffer.dir</name>
572  <value>${hadoop.tmp.dir}/s3</value>
573  <description>Determines where on the local filesystem the S3 filesystem
574  should store files before sending them to S3
575  (or after retrieving them from S3).
576  </description>
577</property>
578
579<property>
580  <name>fs.s3.maxRetries</name>
581  <value>4</value>
582  <description>The maximum number of retries for reading or writing files to S3,
583  before we signal failure to the application.
584  </description>
585</property>
586
587<property>
588  <name>fs.s3.sleepTimeSeconds</name>
589  <value>10</value>
590  <description>The number of seconds to sleep between each S3 retry.
591  </description>
592</property>
593
594<!-- map/reduce properties -->
595
596<property>
597  <name>mapred.job.tracker</name>
598  <value>local</value>
599  <description>The host and port that the MapReduce job tracker runs
600  at.  If "local", then jobs are run in-process as a single map
601  and reduce task.
602  </description>
603</property>
604
605<property>
606  <name>mapred.job.tracker.http.address</name>
607  <value>0.0.0.0:50030</value>
608  <description>
609    The job tracker http server address and port the server will listen on.
610    If the port is 0 then the server will start on a free port.
611  </description>
612</property>
613
614<property>
615  <name>mapred.job.tracker.handler.count</name>
616  <value>10</value>
617  <description>
618    The number of server threads for the JobTracker. This should be roughly
619    4% of the number of tasktracker nodes.
620  </description>
621</property>
622
623<property>
624  <name>mapred.task.tracker.report.address</name>
625  <value>127.0.0.1:0</value>
626  <description>The interface and port that task tracker server listens on.
627  Since it is only connected to by the tasks, it uses the local interface.
628  EXPERT ONLY. Should only be changed if your host does not have the loopback
629  interface.</description>
630</property>
631
632<property>
633  <name>mapred.local.dir</name>
634  <value>${hadoop.tmp.dir}/mapred/local</value>
635  <description>The local directory where MapReduce stores intermediate
636  data files.  May be a comma-separated list of
637  directories on different devices in order to spread disk i/o.
638  Directories that do not exist are ignored.
639  </description>
640</property>
641
642<property>
643  <name>local.cache.size</name>
644  <value>10737418240</value>
645  <description>The limit on the size of cache you want to keep, set by default
646  to 10GB. This will act as a soft limit on the cache directory for out of band data.
647  </description>
648</property>
649           
650<property>
651  <name>mapred.system.dir</name>
652  <value>${hadoop.tmp.dir}/mapred/system</value>
653  <description>The shared directory where MapReduce stores control files.
654  </description>
655</property>
656
657<property>
658  <name>mapred.temp.dir</name>
659  <value>${hadoop.tmp.dir}/mapred/temp</value>
660  <description>A shared directory for temporary files.
661  </description>
662</property>
663
664<property>
665  <name>mapred.local.dir.minspacestart</name>
666  <value>0</value>
667  <description>If the space in mapred.local.dir drops under this,
668  do not ask for more tasks.
669  Value in bytes.
670  </description>
671</property>
672
673<property>
674  <name>mapred.local.dir.minspacekill</name>
675  <value>0</value>
676  <description>If the space in mapred.local.dir drops under this,
677    do not ask more tasks until all the current ones have finished and
678    cleaned up. Also, to save the rest of the tasks we have running,
679    kill one of them, to clean up some space. Start with the reduce tasks,
680    then go with the ones that have finished the least.
681    Value in bytes.
682  </description>
683</property>
684
685<property>
686  <name>mapred.tasktracker.expiry.interval</name>
687  <value>600000</value>
688  <description>Expert: The time-interval, in miliseconds, after which
689  a tasktracker is declared 'lost' if it doesn't send heartbeats.
690  </description>
691</property>
692
693<property>
694  <name>mapred.map.tasks</name>
695  <value>2</value>
696  <description>The default number of map tasks per job.  Typically set
697  to a prime several times greater than number of available hosts.
698  Ignored when mapred.job.tracker is "local". 
699  </description>
700</property>
701
702<property>
703  <name>mapred.reduce.tasks</name>
704  <value>1</value>
705  <description>The default number of reduce tasks per job.  Typically set
706  to a prime close to the number of available hosts.  Ignored when
707  mapred.job.tracker is "local".
708  </description>
709</property>
710
711<property>
712  <name>mapred.map.max.attempts</name>
713  <value>4</value>
714  <description>Expert: The maximum number of attempts per map task.
715  In other words, framework will try to execute a map task these many number
716  of times before giving up on it.
717  </description>
718</property>
719
720<property>
721  <name>mapred.reduce.max.attempts</name>
722  <value>4</value>
723  <description>Expert: The maximum number of attempts per reduce task.
724  In other words, framework will try to execute a reduce task these many number
725  of times before giving up on it.
726  </description>
727</property>
728
729<property>
730  <name>mapred.reduce.parallel.copies</name>
731  <value>5</value>
732  <description>The default number of parallel transfers run by reduce
733  during the copy(shuffle) phase.
734  </description>
735</property>
736
737<property>
738  <name>mapred.reduce.copy.backoff</name>
739  <value>300</value>
740  <description>The maximum amount of time (in seconds) a reducer spends on
741  fetching one map output before declaring it as failed.
742  </description>
743</property>
744
745<property>
746  <name>mapred.task.timeout</name>
747  <value>600000</value>
748  <description>The number of milliseconds before a task will be
749  terminated if it neither reads an input, writes an output, nor
750  updates its status string.
751  </description>
752</property>
753
754<property>
755  <name>mapred.tasktracker.map.tasks.maximum</name>
756  <value>2</value>
757  <description>The maximum number of map tasks that will be run
758  simultaneously by a task tracker.
759  </description>
760</property>
761
762<property>
763  <name>mapred.tasktracker.reduce.tasks.maximum</name>
764  <value>2</value>
765  <description>The maximum number of reduce tasks that will be run
766  simultaneously by a task tracker.
767  </description>
768</property>
769
770<property>
771  <name>mapred.jobtracker.completeuserjobs.maximum</name>
772  <value>100</value>
773  <description>The maximum number of complete jobs per user to keep around before delegating them to the job history.
774  </description>
775</property>
776
777<property>
778  <name>mapred.child.java.opts</name>
779  <value>-Xmx200m</value>
780  <description>Java opts for the task tracker child processes. 
781  The following symbol, if present, will be interpolated: @taskid@ is replaced
782  by current TaskID. Any other occurrences of '@' will go unchanged.
783  For example, to enable verbose gc logging to a file named for the taskid in
784  /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
785        -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
786 
787  The configuration variable mapred.child.ulimit can be used to control the
788  maximum virtual memory of the child processes.
789  </description>
790</property>
791
792<property>
793  <name>mapred.child.ulimit</name>
794  <value></value>
795  <description>The maximum virtual memory, in KB, of a process launched by the
796  Map-Reduce framework. This can be used to control both the Mapper/Reducer
797  tasks and applications using Hadoop Pipes, Hadoop Streaming etc.
798  By default it is left unspecified to let cluster admins control it via
799  limits.conf and other such relevant mechanisms.
800 
801  Note: mapred.child.ulimit must be greater than or equal to the -Xmx passed to
802  JavaVM, else the VM might not start.
803  </description>
804</property>
805
806<property>
807  <name>mapred.child.tmp</name>
808  <value>./tmp</value>
809  <description> To set the value of tmp directory for map and reduce tasks.
810  If the value is an absolute path, it is directly assigned. Otherwise, it is
811  prepended with task's working directory. The java tasks are executed with
812  option -Djava.io.tmpdir='the absolute path of the tmp dir'. Pipes and
813  streaming are set with environment variable,
814   TMPDIR='the absolute path of the tmp dir'
815  </description>
816</property>
817
818<property>
819  <name>mapred.inmem.merge.threshold</name>
820  <value>1000</value>
821  <description>The threshold, in terms of the number of files
822  for the in-memory merge process. When we accumulate threshold number of files
823  we initiate the in-memory merge and spill to disk. A value of 0 or less than
824  0 indicates we want to DON'T have any threshold and instead depend only on
825  the ramfs's memory consumption to trigger the merge.
826  </description>
827</property>
828
829<property>
830  <name>mapred.map.tasks.speculative.execution</name>
831  <value>true</value>
832  <description>If true, then multiple instances of some map tasks
833               may be executed in parallel.</description>
834</property>
835
836<property>
837  <name>mapred.reduce.tasks.speculative.execution</name>
838  <value>true</value>
839  <description>If true, then multiple instances of some reduce tasks
840               may be executed in parallel.</description>
841</property>
842
843<property>
844  <name>mapred.min.split.size</name>
845  <value>0</value>
846  <description>The minimum size chunk that map input should be split
847  into.  Note that some file formats may have minimum split sizes that
848  take priority over this setting.</description>
849</property>
850
851<property>
852  <name>mapred.submit.replication</name>
853  <value>10</value>
854  <description>The replication level for submitted job files.  This
855  should be around the square root of the number of nodes.
856  </description>
857</property>
858
859
860<property>
861  <name>mapred.tasktracker.dns.interface</name>
862  <value>default</value>
863  <description>The name of the Network Interface from which a task
864  tracker should report its IP address.
865  </description>
866 </property>
867 
868<property>
869  <name>mapred.tasktracker.dns.nameserver</name>
870  <value>default</value>
871  <description>The host name or IP address of the name server (DNS)
872  which a TaskTracker should use to determine the host name used by
873  the JobTracker for communication and display purposes.
874  </description>
875 </property>
876 
877<property>
878  <name>tasktracker.http.threads</name>
879  <value>40</value>
880  <description>The number of worker threads that for the http server. This is
881               used for map output fetching
882  </description>
883</property>
884
885<property>
886  <name>mapred.task.tracker.http.address</name>
887  <value>0.0.0.0:50060</value>
888  <description>
889    The task tracker http server address and port.
890    If the port is 0 then the server will start on a free port.
891  </description>
892</property>
893
894<property>
895  <name>keep.failed.task.files</name>
896  <value>false</value>
897  <description>Should the files for failed tasks be kept. This should only be
898               used on jobs that are failing, because the storage is never
899               reclaimed. It also prevents the map outputs from being erased
900               from the reduce directory as they are consumed.</description>
901</property>
902
903<!--
904  <property>
905  <name>keep.task.files.pattern</name>
906  <value>.*_m_123456_0</value>
907  <description>Keep all files from tasks whose task names match the given
908               regular expression. Defaults to none.</description>
909  </property>
910-->
911
912<property>
913  <name>mapred.output.compress</name>
914  <value>false</value>
915  <description>Should the job outputs be compressed?
916  </description>
917</property>
918
919<property>
920  <name>mapred.output.compression.type</name>
921  <value>RECORD</value>
922  <description>If the job outputs are to compressed as SequenceFiles, how should
923               they be compressed? Should be one of NONE, RECORD or BLOCK.
924  </description>
925</property>
926
927<property>
928  <name>mapred.output.compression.codec</name>
929  <value>org.apache.hadoop.io.compress.DefaultCodec</value>
930  <description>If the job outputs are compressed, how should they be compressed?
931  </description>
932</property>
933
934<property>
935  <name>mapred.compress.map.output</name>
936  <value>false</value>
937  <description>Should the outputs of the maps be compressed before being
938               sent across the network. Uses SequenceFile compression.
939  </description>
940</property>
941
942<property>
943  <name>mapred.map.output.compression.codec</name>
944  <value>org.apache.hadoop.io.compress.DefaultCodec</value>
945  <description>If the map outputs are compressed, how should they be
946               compressed?
947  </description>
948</property>
949
950<property>
951  <name>io.seqfile.compress.blocksize</name>
952  <value>1000000</value>
953  <description>The minimum block size for compression in block compressed
954          SequenceFiles.
955  </description>
956</property>
957
958<property>
959  <name>io.seqfile.lazydecompress</name>
960  <value>true</value>
961  <description>Should values of block-compressed SequenceFiles be decompressed
962          only when necessary.
963  </description>
964</property>
965
966<property>
967  <name>io.seqfile.sorter.recordlimit</name>
968  <value>1000000</value>
969  <description>The limit on number of records to be kept in memory in a spill
970          in SequenceFiles.Sorter
971  </description>
972</property>
973
974<property>
975  <name>map.sort.class</name>
976  <value>org.apache.hadoop.util.QuickSort</value>
977  <description>The default sort class for sorting keys.
978  </description>
979</property>
980
981<property>
982  <name>mapred.userlog.limit.kb</name>
983  <value>0</value>
984  <description>The maximum size of user-logs of each task in KB. 0 disables the cap.
985  </description>
986</property>
987
988<property>
989  <name>mapred.userlog.retain.hours</name>
990  <value>24</value>
991  <description>The maximum time, in hours, for which the user-logs are to be
992          retained.
993  </description>
994</property>
995
996<property>
997  <name>mapred.hosts</name>
998  <value></value>
999  <description>Names a file that contains the list of nodes that may
1000  connect to the jobtracker.  If the value is empty, all hosts are
1001  permitted.</description>
1002</property>
1003
1004<property>
1005  <name>mapred.hosts.exclude</name>
1006  <value></value>
1007  <description>Names a file that contains the list of hosts that
1008  should be excluded by the jobtracker.  If the value is empty, no
1009  hosts are excluded.</description>
1010</property> 
1011
1012<property>
1013  <name>mapred.max.tracker.failures</name>
1014  <value>4</value>
1015  <description>The number of task-failures on a tasktracker of a given job
1016               after which new tasks of that job aren't assigned to it.
1017  </description>
1018</property>
1019
1020<property>
1021  <name>jobclient.output.filter</name>
1022  <value>FAILED</value>
1023  <description>The filter for controlling the output of the task's userlogs sent
1024               to the console of the JobClient.
1025               The permissible options are: NONE, KILLED, FAILED, SUCCEEDED and
1026               ALL.
1027  </description>
1028</property>
1029
1030  <property>
1031    <name>mapred.job.tracker.persist.jobstatus.active</name>
1032    <value>false</value>
1033    <description>Indicates if persistency of job status information is
1034      active or not.
1035    </description>
1036  </property>
1037
1038  <property>
1039  <name>mapred.job.tracker.persist.jobstatus.hours</name>
1040  <value>0</value>
1041  <description>The number of hours job status information is persisted in DFS.
1042    The job status information will be available after it drops of the memory
1043    queue and between jobtracker restarts. With a zero value the job status
1044    information is not persisted at all in DFS.
1045  </description>
1046</property>
1047
1048  <property>
1049    <name>mapred.job.tracker.persist.jobstatus.dir</name>
1050    <value>/jobtracker/jobsInfo</value>
1051    <description>The directory where the job status information is persisted
1052      in a file system to be available after it drops of the memory queue and
1053      between jobtracker restarts.
1054    </description>
1055  </property>
1056
1057  <property>
1058    <name>mapred.task.profile</name>
1059    <value>false</value>
1060    <description>To set whether the system should collect profiler
1061     information for some of the tasks in this job? The information is stored
1062     in the user log directory. The value is "true" if task profiling
1063     is enabled.</description>
1064  </property>
1065
1066  <property>
1067    <name>mapred.task.profile.maps</name>
1068    <value>0-2</value>
1069    <description> To set the ranges of map tasks to profile.
1070    mapred.task.profile has to be set to true for the value to be accounted.
1071    </description>
1072  </property>
1073
1074  <property>
1075    <name>mapred.task.profile.reduces</name>
1076    <value>0-2</value>
1077    <description> To set the ranges of reduce tasks to profile.
1078    mapred.task.profile has to be set to true for the value to be accounted.
1079    </description>
1080  </property>
1081
1082  <property>
1083    <name>mapred.line.input.format.linespermap</name>
1084    <value>1</value>
1085    <description> Number of lines per split in NLineInputFormat.
1086    </description>
1087  </property>
1088
1089<!-- ipc properties -->
1090
1091<property>
1092  <name>ipc.client.idlethreshold</name>
1093  <value>4000</value>
1094  <description>Defines the threshold number of connections after which
1095               connections will be inspected for idleness.
1096  </description>
1097</property>
1098
1099<property>
1100  <name>ipc.client.kill.max</name>
1101  <value>10</value>
1102  <description>Defines the maximum number of clients to disconnect in one go.
1103  </description>
1104</property>
1105
1106<property>
1107  <name>ipc.client.connection.maxidletime</name>
1108  <value>10000</value>
1109  <description>The maximum time in msec after which a client will bring down the
1110               connection to the server.
1111  </description>
1112</property>
1113
1114<property>
1115  <name>ipc.client.connect.max.retries</name>
1116  <value>10</value>
1117  <description>Indicates the number of retries a client will make to establish
1118               a server connection.
1119  </description>
1120</property>
1121
1122<property>
1123  <name>ipc.server.listen.queue.size</name>
1124  <value>128</value>
1125  <description>Indicates the length of the listen queue for servers accepting
1126               client connections.
1127  </description>
1128</property>
1129
1130<property>
1131  <name>ipc.server.tcpnodelay</name>
1132  <value>false</value>
1133  <description>Turn on/off Nagle's algorithm for the TCP socket connection on
1134  the server. Setting to true disables the algorithm and may decrease latency
1135  with a cost of more/smaller packets.
1136  </description>
1137</property>
1138
1139<property>
1140  <name>ipc.client.tcpnodelay</name>
1141  <value>false</value>
1142  <description>Turn on/off Nagle's algorithm for the TCP socket connection on
1143  the client. Setting to true disables the algorithm and may decrease latency
1144  with a cost of more/smaller packets.
1145  </description>
1146</property>
1147
1148<!-- Job Notification Configuration -->
1149
1150<!--
1151<property>
1152 <name>job.end.notification.url</name>
1153 <value>http://localhost:8080/jobstatus.php?jobId=$jobId&amp;jobStatus=$jobStatus</value>
1154 <description>Indicates url which will be called on completion of job to inform
1155              end status of job.
1156              User can give at most 2 variables with URI : $jobId and $jobStatus.
1157              If they are present in URI, then they will be replaced by their
1158              respective values.
1159</description>
1160</property>
1161-->
1162
1163<property>
1164  <name>job.end.retry.attempts</name>
1165  <value>0</value>
1166  <description>Indicates how many times hadoop should attempt to contact the
1167               notification URL </description>
1168</property>
1169
1170<property>
1171  <name>job.end.retry.interval</name>
1172   <value>30000</value>
1173   <description>Indicates time in milliseconds between notification URL retry
1174                calls</description>
1175</property>
1176
1177<!-- Web Interface Configuration -->
1178
1179<property>
1180  <name>webinterface.private.actions</name>
1181  <value>false</value>
1182  <description> If set to true, the web interfaces of JT and NN may contain
1183                actions, such as kill job, delete file, etc., that should
1184                not be exposed to public. Enable this option if the interfaces
1185                are only reachable by those who have the right authorization.
1186  </description>
1187</property>
1188
1189<!-- Proxy Configuration -->
1190
1191<property>
1192  <name>hadoop.rpc.socket.factory.class.default</name>
1193  <value>org.apache.hadoop.net.StandardSocketFactory</value>
1194  <description> Default SocketFactory to use. This parameter is expected to be
1195    formatted as "package.FactoryClassName".
1196  </description>
1197</property>
1198
1199<property>
1200  <name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
1201  <value></value>
1202  <description> SocketFactory to use to connect to a DFS. If null or empty, use
1203    hadoop.rpc.socket.class.default. This socket factory is also used by
1204    DFSClient to create sockets to DataNodes.
1205  </description>
1206</property>
1207
1208<property>
1209  <name>hadoop.rpc.socket.factory.class.JobSubmissionProtocol</name>
1210  <value></value>
1211  <description> SocketFactory to use to connect to a Map/Reduce master
1212    (JobTracker). If null or empty, then use hadoop.rpc.socket.class.default.
1213  </description>
1214</property>
1215
1216<property>
1217  <name>hadoop.socks.server</name>
1218  <value></value>
1219  <description> Address (host:port) of the SOCKS server to be used by the
1220    SocksSocketFactory.
1221  </description>
1222</property>
1223
1224<!-- Rack Configuration -->
1225
1226<property>
1227  <name>topology.node.switch.mapping.impl</name>
1228  <value>org.apache.hadoop.net.ScriptBasedMapping</value>
1229  <description> The default implementation of the DNSToSwitchMapping. It
1230    invokes a script specified in topology.script.file.name to resolve
1231    node names. If the value for topology.script.file.name is not set, the
1232    default value of DEFAULT_RACK is returned for all node names.
1233  </description>
1234</property>
1235
1236<property>
1237  <name>topology.script.file.name</name>
1238  <value></value>
1239  <description> The script name that should be invoked to resolve DNS names to
1240    NetworkTopology names. Example: the script would take host.foo.bar as an
1241    argument, and return /rack1 as the output.
1242  </description>
1243</property>
1244
1245<property>
1246  <name>topology.script.number.args</name>
1247  <value>20</value>
1248  <description> The max number of args that the script configured with
1249    topology.script.file.name should be run with. Each arg is an
1250    IP address.
1251  </description>
1252</property>
1253
1254<property>
1255  <name>mapred.task.cache.levels</name>
1256  <value>2</value>
1257  <description> This is the max level of the task cache. For example, if
1258    the level is 2, the tasks cached are at the host level and at the rack
1259    level.
1260  </description>
1261</property>
1262
1263<property>
1264  <name>mapred.merge.recordsBeforeProgress</name>
1265  <value>10000</value>
1266  <description> The number of records to process during merge before
1267   sending a progress notification to the TaskTracker.
1268  </description>
1269</property>
1270
1271</configuration>
Note: See TracBrowser for help on using the repository browser.