Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

hadoop-default.xml @ 95

Last change on this file since 95 was 66, checked in by waue, 16 years ago
NutchEz - an easy way to nutch
Property svn:executable set to ``*
File size: 38.3 KB

Line
1	<?xml version="1.0"?>
2	<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
4	<!-- Do not modify this file directly. Instead, copy entries that you -->
5	<!-- wish to modify from this file into hadoop-site.xml and change them -->
6	<!-- there. If hadoop-site.xml does not already exist, create it. -->
7
8	<configuration>
9
10	<!--- global properties -->
11
12	<property>
13	<name>hadoop.tmp.dir</name>
14	<value>/tmp/hadoop-${user.name}</value>
15	<description>A base for other temporary directories.</description>
16	</property>
17
18	<property>
19	<name>hadoop.native.lib</name>
20	<value>true</value>
21	<description>Should native hadoop libraries, if present, be used.</description>
22	</property>
23
24	<!--- logging properties -->
25
26	<property>
27	<name>hadoop.logfile.size</name>
28	<value>10000000</value>
29	<description>The max size of each log file</description>
30	</property>
31
32	<property>
33	<name>hadoop.logfile.count</name>
34	<value>10</value>
35	<description>The max number of log files</description>
36	</property>
37
38	<property>
39	<name>hadoop.job.history.location</name>
40	<value></value>
41	<description> If job tracker is static the history files are stored
42	in this single well known place. If No value is set here, by default,
43	it is in the local file system at ${hadoop.log.dir}/history.
44	</description>
45	</property>
46
47	<property>
48	<name>hadoop.job.history.user.location</name>
49	<value></value>
50	<description> User can specify a location to store the history files of
51	a particular job. If nothing is specified, the logs are stored in
52	output directory. The files are stored in "_logs/history/" in the directory.
53	User can stop logging by giving the value "none".
54	</description>
55	</property>
56
57	<property>
58	<name>dfs.namenode.logging.level</name>
59	<value>info</value>
60	<description>The logging level for dfs namenode. Other values are "dir"(trac
61	e namespace mutations), "block"(trace block under/over replications and block
62	creations/deletions), or "all".</description>
63	</property>
64
65	<!-- i/o properties -->
66
67	<property>
68	<name>io.sort.factor</name>
69	<value>10</value>
70	<description>The number of streams to merge at once while sorting
71	files. This determines the number of open file handles.</description>
72	</property>
73
74	<property>
75	<name>io.sort.mb</name>
76	<value>100</value>
77	<description>The total amount of buffer memory to use while sorting
78	files, in megabytes. By default, gives each merge stream 1MB, which
79	should minimize seeks.</description>
80	</property>
81
82	<property>
83	<name>io.sort.record.percent</name>
84	<value>0.05</value>
85	<description>The percentage of io.sort.mb dedicated to tracking record
86	boundaries. Let this value be r, io.sort.mb be x. The maximum number
87	of records collected before the collection thread must block is equal
88	to (r * x) / 4</description>
89	</property>
90
91	<property>
92	<name>io.sort.spill.percent</name>
93	<value>0.80</value>
94	<description>The soft limit in either the buffer or record collection
95	buffers. Once reached, a thread will begin to spill the contents to disk
96	in the background. Note that this does not imply any chunking of data to
97	the spill. A value less than 0.5 is not recommended.</description>
98	</property>
99
100	<property>
101	<name>io.file.buffer.size</name>
102	<value>4096</value>
103	<description>The size of buffer for use in sequence files.
104	The size of this buffer should probably be a multiple of hardware
105	page size (4096 on Intel x86), and it determines how much data is
106	buffered during read and write operations.</description>
107	</property>
108
109	<property>
110	<name>io.bytes.per.checksum</name>
111	<value>512</value>
112	<description>The number of bytes per checksum. Must not be larger than
113	io.file.buffer.size.</description>
114	</property>
115
116	<property>
117	<name>io.skip.checksum.errors</name>
118	<value>false</value>
119	<description>If true, when a checksum error is encountered while
120	reading a sequence file, entries are skipped, instead of throwing an
121	exception.</description>
122	</property>
123
124	<property>
125	<name>io.map.index.skip</name>
126	<value>0</value>
127	<description>Number of index entries to skip between each entry.
128	Zero by default. Setting this to values larger than zero can
129	facilitate opening large map files using less memory.</description>
130	</property>
131
132	<property>
133	<name>io.compression.codecs</name>
134	<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</value>
135	<description>A list of the compression codec classes that can be used
136	for compression/decompression.</description>
137	</property>
138
139	<property>
140	<name>io.serializations</name>
141	<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
142	<description>A list of serialization classes that can be used for
143	obtaining serializers and deserializers.</description>
144	</property>
145
146	<!-- file system properties -->
147
148	<property>
149	<name>fs.default.name</name>
150	<value>file:///</value>
151	<description>The name of the default file system. A URI whose
152	scheme and authority determine the FileSystem implementation. The
153	uri's scheme determines the config property (fs.SCHEME.impl) naming
154	the FileSystem implementation class. The uri's authority is used to
155	determine the host, port, etc. for a filesystem.</description>
156	</property>
157
158	<property>
159	<name>fs.trash.interval</name>
160	<value>0</value>
161	<description>Number of minutes between trash checkpoints.
162	If zero, the trash feature is disabled.
163	</description>
164	</property>
165
166	<property>
167	<name>fs.file.impl</name>
168	<value>org.apache.hadoop.fs.LocalFileSystem</value>
169	<description>The FileSystem for file: uris.</description>
170	</property>
171
172	<property>
173	<name>fs.hdfs.impl</name>
174	<value>org.apache.hadoop.dfs.DistributedFileSystem</value>
175	<description>The FileSystem for hdfs: uris.</description>
176	</property>
177
178	<property>
179	<name>fs.s3.impl</name>
180	<value>org.apache.hadoop.fs.s3.S3FileSystem</value>
181	<description>The FileSystem for s3: uris.</description>
182	</property>
183
184	<property>
185	<name>fs.s3n.impl</name>
186	<value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
187	<description>The FileSystem for s3n: (Native S3) uris.</description>
188	</property>
189
190	<property>
191	<name>fs.kfs.impl</name>
192	<value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
193	<description>The FileSystem for kfs: uris.</description>
194	</property>
195
196	<property>
197	<name>fs.hftp.impl</name>
198	<value>org.apache.hadoop.dfs.HftpFileSystem</value>
199	</property>
200
201	<property>
202	<name>fs.hsftp.impl</name>
203	<value>org.apache.hadoop.dfs.HsftpFileSystem</value>
204	</property>
205
206	<property>
207	<name>fs.ftp.impl</name>
208	<value>org.apache.hadoop.fs.ftp.FTPFileSystem</value>
209	<description>The FileSystem for ftp: uris.</description>
210	</property>
211
212	<property>
213	<name>fs.ramfs.impl</name>
214	<value>org.apache.hadoop.fs.InMemoryFileSystem</value>
215	<description>The FileSystem for ramfs: uris.</description>
216	</property>
217
218	<property>
219	<name>fs.har.impl</name>
220	<value>org.apache.hadoop.fs.HarFileSystem</value>
221	<description>The filesystem for Hadoop archives. </description>
222	</property>
223
224	<property>
225	<name>fs.inmemory.size.mb</name>
226	<value>75</value>
227	<description>The size of the in-memory filsystem instance in MB</description>
228	</property>
229
230	<property>
231	<name>fs.checkpoint.dir</name>
232	<value>${hadoop.tmp.dir}/dfs/namesecondary</value>
233	<description>Determines where on the local filesystem the DFS secondary
234	name node should store the temporary images and edits to merge.
235	If this is a comma-delimited list of directories then the image is
236	replicated in all of the directories for redundancy.
237	</description>
238	</property>
239
240	<property>
241	<name>fs.checkpoint.period</name>
242	<value>3600</value>
243	<description>The number of seconds between two periodic checkpoints.
244	</description>
245	</property>
246
247	<property>
248	<name>fs.checkpoint.size</name>
249	<value>67108864</value>
250	<description>The size of the current edit log (in bytes) that triggers
251	a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
252	</description>
253	</property>
254
255	<property>
256	<name>dfs.secondary.http.address</name>
257	<value>0.0.0.0:50090</value>
258	<description>
259	The secondary namenode http server address and port.
260	If the port is 0 then the server will start on a free port.
261	</description>
262	</property>
263
264	<property>
265	<name>dfs.datanode.address</name>
266	<value>0.0.0.0:50010</value>
267	<description>
268	The address where the datanode server will listen to.
269	If the port is 0 then the server will start on a free port.
270	</description>
271	</property>
272
273	<property>
274	<name>dfs.datanode.http.address</name>
275	<value>0.0.0.0:50075</value>
276	<description>
277	The datanode http server address and port.
278	If the port is 0 then the server will start on a free port.
279	</description>
280	</property>
281
282	<property>
283	<name>dfs.datanode.ipc.address</name>
284	<value>0.0.0.0:50020</value>
285	<description>
286	The datanode ipc server address and port.
287	If the port is 0 then the server will start on a free port.
288	</description>
289	</property>
290
291	<property>
292	<name>dfs.datanode.handler.count</name>
293	<value>3</value>
294	<description>The number of server threads for the datanode.</description>
295	</property>
296
297	<property>
298	<name>dfs.http.address</name>
299	<value>0.0.0.0:50070</value>
300	<description>
301	The address and the base port where the dfs namenode web ui will listen on.
302	If the port is 0 then the server will start on a free port.
303	</description>
304	</property>
305
306	<property>
307	<name>dfs.datanode.https.address</name>
308	<value>0.0.0.0:50475</value>
309	</property>
310
311	<property>
312	<name>dfs.https.address</name>
313	<value>0.0.0.0:50470</value>
314	</property>
315
316	<property>
317	<name>https.keystore.info.rsrc</name>
318	<value>sslinfo.xml</value>
319	<description>The name of the resource from which ssl keystore information
320	will be extracted
321	</description>
322	</property>
323
324	<property>
325	<name>dfs.datanode.dns.interface</name>
326	<value>default</value>
327	<description>The name of the Network Interface from which a data node should
328	report its IP address.
329	</description>
330	</property>
331
332	<property>
333	<name>dfs.datanode.dns.nameserver</name>
334	<value>default</value>
335	<description>The host name or IP address of the name server (DNS)
336	which a DataNode should use to determine the host name used by the
337	NameNode for communication and display purposes.
338	</description>
339	</property>
340
341	<property>
342	<name>dfs.replication.considerLoad</name>
343	<value>true</value>
344	<description>Decide if chooseTarget considers the target's load or not
345	</description>
346	</property>
347	<property>
348	<name>dfs.default.chunk.view.size</name>
349	<value>32768</value>
350	<description>The number of bytes to view for a file on the browser.
351	</description>
352	</property>
353
354	<property>
355	<name>dfs.datanode.du.reserved</name>
356	<value>0</value>
357	<description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
358	</description>
359	</property>
360
361	<property>
362	<name>dfs.datanode.du.pct</name>
363	<value>0.98f</value>
364	<description>When calculating remaining space, only use this percentage of the real available space
365	</description>
366	</property>
367
368	<property>
369	<name>dfs.name.dir</name>
370	<value>${hadoop.tmp.dir}/dfs/name</value>
371	<description>Determines where on the local filesystem the DFS name node
372	should store the name table. If this is a comma-delimited list
373	of directories then the name table is replicated in all of the
374	directories, for redundancy. </description>
375	</property>
376
377	<property>
378	<name>dfs.web.ugi</name>
379	<value>webuser,webgroup</value>
380	<description>The user account used by the web interface.
381	Syntax: USERNAME,GROUP1,GROUP2, ...
382	</description>
383	</property>
384
385	<property>
386	<name>dfs.permissions</name>
387	<value>true</value>
388	<description>
389	If "true", enable permission checking in HDFS.
390	If "false", permission checking is turned off,
391	but all other behavior is unchanged.
392	Switching from one parameter value to the other does not change the mode,
393	owner or group of files or directories.
394	</description>
395	</property>
396
397	<property>
398	<name>dfs.permissions.supergroup</name>
399	<value>supergroup</value>
400	<description>The name of the group of super-users.</description>
401	</property>
402
403	<property>
404	<name>dfs.client.buffer.dir</name>
405	<value>${hadoop.tmp.dir}/dfs/tmp</value>
406	<description>Determines where on the local filesystem an DFS client
407	should store its blocks before it sends them to the datanode.
408	</description>
409	</property>
410
411	<property>
412	<name>dfs.data.dir</name>
413	<value>${hadoop.tmp.dir}/dfs/data</value>
414	<description>Determines where on the local filesystem an DFS data node
415	should store its blocks. If this is a comma-delimited
416	list of directories, then data will be stored in all named
417	directories, typically on different devices.
418	Directories that do not exist are ignored.
419	</description>
420	</property>
421
422	<property>
423	<name>dfs.replication</name>
424	<value>3</value>
425	<description>Default block replication.
426	The actual number of replications can be specified when the file is created.
427	The default is used if replication is not specified in create time.
428	</description>
429	</property>
430
431	<property>
432	<name>dfs.replication.max</name>
433	<value>512</value>
434	<description>Maximal block replication.
435	</description>
436	</property>
437
438	<property>
439	<name>dfs.replication.min</name>
440	<value>1</value>
441	<description>Minimal block replication.
442	</description>
443	</property>
444
445	<property>
446	<name>dfs.block.size</name>
447	<value>67108864</value>
448	<description>The default block size for new files.</description>
449	</property>
450
451	<property>
452	<name>dfs.df.interval</name>
453	<value>60000</value>
454	<description>Disk usage statistics refresh interval in msec.</description>
455	</property>
456
457	<property>
458	<name>dfs.client.block.write.retries</name>
459	<value>3</value>
460	<description>The number of retries for writing blocks to the data nodes,
461	before we signal failure to the application.
462	</description>
463	</property>
464
465	<property>
466	<name>dfs.blockreport.intervalMsec</name>
467	<value>3600000</value>
468	<description>Determines block reporting interval in milliseconds.</description>
469	</property>
470
471	<property>
472	<name>dfs.blockreport.initialDelay</name> <value>0</value>
473	<description>Delay for first block report in seconds.</description>
474	</property>
475
476	<property>
477	<name>dfs.heartbeat.interval</name>
478	<value>3</value>
479	<description>Determines datanode heartbeat interval in seconds.</description>
480	</property>
481
482	<property>
483	<name>dfs.namenode.handler.count</name>
484	<value>10</value>
485	<description>The number of server threads for the namenode.</description>
486	</property>
487
488	<property>
489	<name>dfs.safemode.threshold.pct</name>
490	<value>0.999f</value>
491	<description>
492	Specifies the percentage of blocks that should satisfy
493	the minimal replication requirement defined by dfs.replication.min.
494	Values less than or equal to 0 mean not to start in safe mode.
495	Values greater than 1 will make safe mode permanent.
496	</description>
497	</property>
498
499	<property>
500	<name>dfs.safemode.extension</name>
501	<value>30000</value>
502	<description>
503	Determines extension of safe mode in milliseconds
504	after the threshold level is reached.
505	</description>
506	</property>
507
508	<property>
509	<name>dfs.balance.bandwidthPerSec</name>
510	<value>1048576</value>
511	<description>
512	Specifies the maximum amount of bandwidth that each datanode
513	can utilize for the balancing purpose in term of
514	the number of bytes per second.
515	</description>
516	</property>
517
518	<property>
519	<name>dfs.hosts</name>
520	<value></value>
521	<description>Names a file that contains a list of hosts that are
522	permitted to connect to the namenode. The full pathname of the file
523	must be specified. If the value is empty, all hosts are
524	permitted.</description>
525	</property>
526
527	<property>
528	<name>dfs.hosts.exclude</name>
529	<value></value>
530	<description>Names a file that contains a list of hosts that are
531	not permitted to connect to the namenode. The full pathname of the
532	file must be specified. If the value is empty, no hosts are
533	excluded.</description>
534	</property>
535
536	<property>
537	<name>dfs.max.objects</name>
538	<value>0</value>
539	<description>The maximum number of files, directories and blocks
540	dfs supports. A value of zero indicates no limit to the number
541	of objects that dfs supports.
542	</description>
543	</property>
544
545	<property>
546	<name>dfs.namenode.decommission.interval</name>
547	<value>30</value>
548	<description>Namenode periodicity in seconds to check if decommission is complete.</description>
549	</property>
550
551	<property>
552	<name>dfs.namenode.decommission.nodes.per.interval</name>
553	<value>5</value>
554	<description>The number of nodes namenode checks if decommission is complete
555	in each dfs.namenode.decommission.interval.</description>
556	</property>
557
558	<property>
559	<name>dfs.replication.interval</name>
560	<value>3</value>
561	<description>The periodicity in seconds with which the namenode computes repliaction work for datanodes. </description>
562	</property>
563
564	<property>
565	<name>fs.s3.block.size</name>
566	<value>67108864</value>
567	<description>Block size to use when writing files to S3.</description>
568	</property>
569
570	<property>
571	<name>fs.s3.buffer.dir</name>
572	<value>${hadoop.tmp.dir}/s3</value>
573	<description>Determines where on the local filesystem the S3 filesystem
574	should store files before sending them to S3
575	(or after retrieving them from S3).
576	</description>
577	</property>
578
579	<property>
580	<name>fs.s3.maxRetries</name>
581	<value>4</value>
582	<description>The maximum number of retries for reading or writing files to S3,
583	before we signal failure to the application.
584	</description>
585	</property>
586
587	<property>
588	<name>fs.s3.sleepTimeSeconds</name>
589	<value>10</value>
590	<description>The number of seconds to sleep between each S3 retry.
591	</description>
592	</property>
593
594	<!-- map/reduce properties -->
595
596	<property>
597	<name>mapred.job.tracker</name>
598	<value>local</value>
599	<description>The host and port that the MapReduce job tracker runs
600	at. If "local", then jobs are run in-process as a single map
601	and reduce task.
602	</description>
603	</property>
604
605	<property>
606	<name>mapred.job.tracker.http.address</name>
607	<value>0.0.0.0:50030</value>
608	<description>
609	The job tracker http server address and port the server will listen on.
610	If the port is 0 then the server will start on a free port.
611	</description>
612	</property>
613
614	<property>
615	<name>mapred.job.tracker.handler.count</name>
616	<value>10</value>
617	<description>
618	The number of server threads for the JobTracker. This should be roughly
619	4% of the number of tasktracker nodes.
620	</description>
621	</property>
622
623	<property>
624	<name>mapred.task.tracker.report.address</name>
625	<value>127.0.0.1:0</value>
626	<description>The interface and port that task tracker server listens on.
627	Since it is only connected to by the tasks, it uses the local interface.
628	EXPERT ONLY. Should only be changed if your host does not have the loopback
629	interface.</description>
630	</property>
631
632	<property>
633	<name>mapred.local.dir</name>
634	<value>${hadoop.tmp.dir}/mapred/local</value>
635	<description>The local directory where MapReduce stores intermediate
636	data files. May be a comma-separated list of
637	directories on different devices in order to spread disk i/o.
638	Directories that do not exist are ignored.
639	</description>
640	</property>
641
642	<property>
643	<name>local.cache.size</name>
644	<value>10737418240</value>
645	<description>The limit on the size of cache you want to keep, set by default
646	to 10GB. This will act as a soft limit on the cache directory for out of band data.
647	</description>
648	</property>
649
650	<property>
651	<name>mapred.system.dir</name>
652	<value>${hadoop.tmp.dir}/mapred/system</value>
653	<description>The shared directory where MapReduce stores control files.
654	</description>
655	</property>
656
657	<property>
658	<name>mapred.temp.dir</name>
659	<value>${hadoop.tmp.dir}/mapred/temp</value>
660	<description>A shared directory for temporary files.
661	</description>
662	</property>
663
664	<property>
665	<name>mapred.local.dir.minspacestart</name>
666	<value>0</value>
667	<description>If the space in mapred.local.dir drops under this,
668	do not ask for more tasks.
669	Value in bytes.
670	</description>
671	</property>
672
673	<property>
674	<name>mapred.local.dir.minspacekill</name>
675	<value>0</value>
676	<description>If the space in mapred.local.dir drops under this,
677	do not ask more tasks until all the current ones have finished and
678	cleaned up. Also, to save the rest of the tasks we have running,
679	kill one of them, to clean up some space. Start with the reduce tasks,
680	then go with the ones that have finished the least.
681	Value in bytes.
682	</description>
683	</property>
684
685	<property>
686	<name>mapred.tasktracker.expiry.interval</name>
687	<value>600000</value>
688	<description>Expert: The time-interval, in miliseconds, after which
689	a tasktracker is declared 'lost' if it doesn't send heartbeats.
690	</description>
691	</property>
692
693	<property>
694	<name>mapred.map.tasks</name>
695	<value>2</value>
696	<description>The default number of map tasks per job. Typically set
697	to a prime several times greater than number of available hosts.
698	Ignored when mapred.job.tracker is "local".
699	</description>
700	</property>
701
702	<property>
703	<name>mapred.reduce.tasks</name>
704	<value>1</value>
705	<description>The default number of reduce tasks per job. Typically set
706	to a prime close to the number of available hosts. Ignored when
707	mapred.job.tracker is "local".
708	</description>
709	</property>
710
711	<property>
712	<name>mapred.map.max.attempts</name>
713	<value>4</value>
714	<description>Expert: The maximum number of attempts per map task.
715	In other words, framework will try to execute a map task these many number
716	of times before giving up on it.
717	</description>
718	</property>
719
720	<property>
721	<name>mapred.reduce.max.attempts</name>
722	<value>4</value>
723	<description>Expert: The maximum number of attempts per reduce task.
724	In other words, framework will try to execute a reduce task these many number
725	of times before giving up on it.
726	</description>
727	</property>
728
729	<property>
730	<name>mapred.reduce.parallel.copies</name>
731	<value>5</value>
732	<description>The default number of parallel transfers run by reduce
733	during the copy(shuffle) phase.
734	</description>
735	</property>
736
737	<property>
738	<name>mapred.reduce.copy.backoff</name>
739	<value>300</value>
740	<description>The maximum amount of time (in seconds) a reducer spends on
741	fetching one map output before declaring it as failed.
742	</description>
743	</property>
744
745	<property>
746	<name>mapred.task.timeout</name>
747	<value>600000</value>
748	<description>The number of milliseconds before a task will be
749	terminated if it neither reads an input, writes an output, nor
750	updates its status string.
751	</description>
752	</property>
753
754	<property>
755	<name>mapred.tasktracker.map.tasks.maximum</name>
756	<value>2</value>
757	<description>The maximum number of map tasks that will be run
758	simultaneously by a task tracker.
759	</description>
760	</property>
761
762	<property>
763	<name>mapred.tasktracker.reduce.tasks.maximum</name>
764	<value>2</value>
765	<description>The maximum number of reduce tasks that will be run
766	simultaneously by a task tracker.
767	</description>
768	</property>
769
770	<property>
771	<name>mapred.jobtracker.completeuserjobs.maximum</name>
772	<value>100</value>
773	<description>The maximum number of complete jobs per user to keep around before delegating them to the job history.
774	</description>
775	</property>
776
777	<property>
778	<name>mapred.child.java.opts</name>
779	<value>-Xmx200m</value>
780	<description>Java opts for the task tracker child processes.
781	The following symbol, if present, will be interpolated: @taskid@ is replaced
782	by current TaskID. Any other occurrences of '@' will go unchanged.
783	For example, to enable verbose gc logging to a file named for the taskid in
784	/tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
785	-Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
786
787	The configuration variable mapred.child.ulimit can be used to control the
788	maximum virtual memory of the child processes.
789	</description>
790	</property>
791
792	<property>
793	<name>mapred.child.ulimit</name>
794	<value></value>
795	<description>The maximum virtual memory, in KB, of a process launched by the
796	Map-Reduce framework. This can be used to control both the Mapper/Reducer
797	tasks and applications using Hadoop Pipes, Hadoop Streaming etc.
798	By default it is left unspecified to let cluster admins control it via
799	limits.conf and other such relevant mechanisms.
800
801	Note: mapred.child.ulimit must be greater than or equal to the -Xmx passed to
802	JavaVM, else the VM might not start.
803	</description>
804	</property>
805
806	<property>
807	<name>mapred.child.tmp</name>
808	<value>./tmp</value>
809	<description> To set the value of tmp directory for map and reduce tasks.
810	If the value is an absolute path, it is directly assigned. Otherwise, it is
811	prepended with task's working directory. The java tasks are executed with
812	option -Djava.io.tmpdir='the absolute path of the tmp dir'. Pipes and
813	streaming are set with environment variable,
814	TMPDIR='the absolute path of the tmp dir'
815	</description>
816	</property>
817
818	<property>
819	<name>mapred.inmem.merge.threshold</name>
820	<value>1000</value>
821	<description>The threshold, in terms of the number of files
822	for the in-memory merge process. When we accumulate threshold number of files
823	we initiate the in-memory merge and spill to disk. A value of 0 or less than
824	0 indicates we want to DON'T have any threshold and instead depend only on
825	the ramfs's memory consumption to trigger the merge.
826	</description>
827	</property>
828
829	<property>
830	<name>mapred.map.tasks.speculative.execution</name>
831	<value>true</value>
832	<description>If true, then multiple instances of some map tasks
833	may be executed in parallel.</description>
834	</property>
835
836	<property>
837	<name>mapred.reduce.tasks.speculative.execution</name>
838	<value>true</value>
839	<description>If true, then multiple instances of some reduce tasks
840	may be executed in parallel.</description>
841	</property>
842
843	<property>
844	<name>mapred.min.split.size</name>
845	<value>0</value>
846	<description>The minimum size chunk that map input should be split
847	into. Note that some file formats may have minimum split sizes that
848	take priority over this setting.</description>
849	</property>
850
851	<property>
852	<name>mapred.submit.replication</name>
853	<value>10</value>
854	<description>The replication level for submitted job files. This
855	should be around the square root of the number of nodes.
856	</description>
857	</property>
858
859
860	<property>
861	<name>mapred.tasktracker.dns.interface</name>
862	<value>default</value>
863	<description>The name of the Network Interface from which a task
864	tracker should report its IP address.
865	</description>
866	</property>
867
868	<property>
869	<name>mapred.tasktracker.dns.nameserver</name>
870	<value>default</value>
871	<description>The host name or IP address of the name server (DNS)
872	which a TaskTracker should use to determine the host name used by
873	the JobTracker for communication and display purposes.
874	</description>
875	</property>
876
877	<property>
878	<name>tasktracker.http.threads</name>
879	<value>40</value>
880	<description>The number of worker threads that for the http server. This is
881	used for map output fetching
882	</description>
883	</property>
884
885	<property>
886	<name>mapred.task.tracker.http.address</name>
887	<value>0.0.0.0:50060</value>
888	<description>
889	The task tracker http server address and port.
890	If the port is 0 then the server will start on a free port.
891	</description>
892	</property>
893
894	<property>
895	<name>keep.failed.task.files</name>
896	<value>false</value>
897	<description>Should the files for failed tasks be kept. This should only be
898	used on jobs that are failing, because the storage is never
899	reclaimed. It also prevents the map outputs from being erased
900	from the reduce directory as they are consumed.</description>
901	</property>
902
903	<!--
904	<property>
905	<name>keep.task.files.pattern</name>
906	<value>.*_m_123456_0</value>
907	<description>Keep all files from tasks whose task names match the given
908	regular expression. Defaults to none.</description>
909	</property>
910	-->
911
912	<property>
913	<name>mapred.output.compress</name>
914	<value>false</value>
915	<description>Should the job outputs be compressed?
916	</description>
917	</property>
918
919	<property>
920	<name>mapred.output.compression.type</name>
921	<value>RECORD</value>
922	<description>If the job outputs are to compressed as SequenceFiles, how should
923	they be compressed? Should be one of NONE, RECORD or BLOCK.
924	</description>
925	</property>
926
927	<property>
928	<name>mapred.output.compression.codec</name>
929	<value>org.apache.hadoop.io.compress.DefaultCodec</value>
930	<description>If the job outputs are compressed, how should they be compressed?
931	</description>
932	</property>
933
934	<property>
935	<name>mapred.compress.map.output</name>
936	<value>false</value>
937	<description>Should the outputs of the maps be compressed before being
938	sent across the network. Uses SequenceFile compression.
939	</description>
940	</property>
941
942	<property>
943	<name>mapred.map.output.compression.codec</name>
944	<value>org.apache.hadoop.io.compress.DefaultCodec</value>
945	<description>If the map outputs are compressed, how should they be
946	compressed?
947	</description>
948	</property>
949
950	<property>
951	<name>io.seqfile.compress.blocksize</name>
952	<value>1000000</value>
953	<description>The minimum block size for compression in block compressed
954	SequenceFiles.
955	</description>
956	</property>
957
958	<property>
959	<name>io.seqfile.lazydecompress</name>
960	<value>true</value>
961	<description>Should values of block-compressed SequenceFiles be decompressed
962	only when necessary.
963	</description>
964	</property>
965
966	<property>
967	<name>io.seqfile.sorter.recordlimit</name>
968	<value>1000000</value>
969	<description>The limit on number of records to be kept in memory in a spill
970	in SequenceFiles.Sorter
971	</description>
972	</property>
973
974	<property>
975	<name>map.sort.class</name>
976	<value>org.apache.hadoop.util.QuickSort</value>
977	<description>The default sort class for sorting keys.
978	</description>
979	</property>
980
981	<property>
982	<name>mapred.userlog.limit.kb</name>
983	<value>0</value>
984	<description>The maximum size of user-logs of each task in KB. 0 disables the cap.
985	</description>
986	</property>
987
988	<property>
989	<name>mapred.userlog.retain.hours</name>
990	<value>24</value>
991	<description>The maximum time, in hours, for which the user-logs are to be
992	retained.
993	</description>
994	</property>
995
996	<property>
997	<name>mapred.hosts</name>
998	<value></value>
999	<description>Names a file that contains the list of nodes that may
1000	connect to the jobtracker. If the value is empty, all hosts are
1001	permitted.</description>
1002	</property>
1003
1004	<property>
1005	<name>mapred.hosts.exclude</name>
1006	<value></value>
1007	<description>Names a file that contains the list of hosts that
1008	should be excluded by the jobtracker. If the value is empty, no
1009	hosts are excluded.</description>
1010	</property>
1011
1012	<property>
1013	<name>mapred.max.tracker.failures</name>
1014	<value>4</value>
1015	<description>The number of task-failures on a tasktracker of a given job
1016	after which new tasks of that job aren't assigned to it.
1017	</description>
1018	</property>
1019
1020	<property>
1021	<name>jobclient.output.filter</name>
1022	<value>FAILED</value>
1023	<description>The filter for controlling the output of the task's userlogs sent
1024	to the console of the JobClient.
1025	The permissible options are: NONE, KILLED, FAILED, SUCCEEDED and
1026	ALL.
1027	</description>
1028	</property>
1029
1030	<property>
1031	<name>mapred.job.tracker.persist.jobstatus.active</name>
1032	<value>false</value>
1033	<description>Indicates if persistency of job status information is
1034	active or not.
1035	</description>
1036	</property>
1037
1038	<property>
1039	<name>mapred.job.tracker.persist.jobstatus.hours</name>
1040	<value>0</value>
1041	<description>The number of hours job status information is persisted in DFS.
1042	The job status information will be available after it drops of the memory
1043	queue and between jobtracker restarts. With a zero value the job status
1044	information is not persisted at all in DFS.
1045	</description>
1046	</property>
1047
1048	<property>
1049	<name>mapred.job.tracker.persist.jobstatus.dir</name>
1050	<value>/jobtracker/jobsInfo</value>
1051	<description>The directory where the job status information is persisted
1052	in a file system to be available after it drops of the memory queue and
1053	between jobtracker restarts.
1054	</description>
1055	</property>
1056
1057	<property>
1058	<name>mapred.task.profile</name>
1059	<value>false</value>
1060	<description>To set whether the system should collect profiler
1061	information for some of the tasks in this job? The information is stored
1062	in the user log directory. The value is "true" if task profiling
1063	is enabled.</description>
1064	</property>
1065
1066	<property>
1067	<name>mapred.task.profile.maps</name>
1068	<value>0-2</value>
1069	<description> To set the ranges of map tasks to profile.
1070	mapred.task.profile has to be set to true for the value to be accounted.
1071	</description>
1072	</property>
1073
1074	<property>
1075	<name>mapred.task.profile.reduces</name>
1076	<value>0-2</value>
1077	<description> To set the ranges of reduce tasks to profile.
1078	mapred.task.profile has to be set to true for the value to be accounted.
1079	</description>
1080	</property>
1081
1082	<property>
1083	<name>mapred.line.input.format.linespermap</name>
1084	<value>1</value>
1085	<description> Number of lines per split in NLineInputFormat.
1086	</description>
1087	</property>
1088
1089	<!-- ipc properties -->
1090
1091	<property>
1092	<name>ipc.client.idlethreshold</name>
1093	<value>4000</value>
1094	<description>Defines the threshold number of connections after which
1095	connections will be inspected for idleness.
1096	</description>
1097	</property>
1098
1099	<property>
1100	<name>ipc.client.kill.max</name>
1101	<value>10</value>
1102	<description>Defines the maximum number of clients to disconnect in one go.
1103	</description>
1104	</property>
1105
1106	<property>
1107	<name>ipc.client.connection.maxidletime</name>
1108	<value>10000</value>
1109	<description>The maximum time in msec after which a client will bring down the
1110	connection to the server.
1111	</description>
1112	</property>
1113
1114	<property>
1115	<name>ipc.client.connect.max.retries</name>
1116	<value>10</value>
1117	<description>Indicates the number of retries a client will make to establish
1118	a server connection.
1119	</description>
1120	</property>
1121
1122	<property>
1123	<name>ipc.server.listen.queue.size</name>
1124	<value>128</value>
1125	<description>Indicates the length of the listen queue for servers accepting
1126	client connections.
1127	</description>
1128	</property>
1129
1130	<property>
1131	<name>ipc.server.tcpnodelay</name>
1132	<value>false</value>
1133	<description>Turn on/off Nagle's algorithm for the TCP socket connection on
1134	the server. Setting to true disables the algorithm and may decrease latency
1135	with a cost of more/smaller packets.
1136	</description>
1137	</property>
1138
1139	<property>
1140	<name>ipc.client.tcpnodelay</name>
1141	<value>false</value>
1142	<description>Turn on/off Nagle's algorithm for the TCP socket connection on
1143	the client. Setting to true disables the algorithm and may decrease latency
1144	with a cost of more/smaller packets.
1145	</description>
1146	</property>
1147
1148	<!-- Job Notification Configuration -->
1149
1150	<!--
1151	<property>
1152	<name>job.end.notification.url</name>
1153	<value>http://localhost:8080/jobstatus.php?jobId=$jobId&jobStatus=$jobStatus</value>
1154	<description>Indicates url which will be called on completion of job to inform
1155	end status of job.
1156	User can give at most 2 variables with URI : $jobId and $jobStatus.
1157	If they are present in URI, then they will be replaced by their
1158	respective values.
1159	</description>
1160	</property>
1161	-->
1162
1163	<property>
1164	<name>job.end.retry.attempts</name>
1165	<value>0</value>
1166	<description>Indicates how many times hadoop should attempt to contact the
1167	notification URL </description>
1168	</property>
1169
1170	<property>
1171	<name>job.end.retry.interval</name>
1172	<value>30000</value>
1173	<description>Indicates time in milliseconds between notification URL retry
1174	calls</description>
1175	</property>
1176
1177	<!-- Web Interface Configuration -->
1178
1179	<property>
1180	<name>webinterface.private.actions</name>
1181	<value>false</value>
1182	<description> If set to true, the web interfaces of JT and NN may contain
1183	actions, such as kill job, delete file, etc., that should
1184	not be exposed to public. Enable this option if the interfaces
1185	are only reachable by those who have the right authorization.
1186	</description>
1187	</property>
1188
1189	<!-- Proxy Configuration -->
1190
1191	<property>
1192	<name>hadoop.rpc.socket.factory.class.default</name>
1193	<value>org.apache.hadoop.net.StandardSocketFactory</value>
1194	<description> Default SocketFactory to use. This parameter is expected to be
1195	formatted as "package.FactoryClassName".
1196	</description>
1197	</property>
1198
1199	<property>
1200	<name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
1201	<value></value>
1202	<description> SocketFactory to use to connect to a DFS. If null or empty, use
1203	hadoop.rpc.socket.class.default. This socket factory is also used by
1204	DFSClient to create sockets to DataNodes.
1205	</description>
1206	</property>
1207
1208	<property>
1209	<name>hadoop.rpc.socket.factory.class.JobSubmissionProtocol</name>
1210	<value></value>
1211	<description> SocketFactory to use to connect to a Map/Reduce master
1212	(JobTracker). If null or empty, then use hadoop.rpc.socket.class.default.
1213	</description>
1214	</property>
1215
1216	<property>
1217	<name>hadoop.socks.server</name>
1218	<value></value>
1219	<description> Address (host:port) of the SOCKS server to be used by the
1220	SocksSocketFactory.
1221	</description>
1222	</property>
1223
1224	<!-- Rack Configuration -->
1225
1226	<property>
1227	<name>topology.node.switch.mapping.impl</name>
1228	<value>org.apache.hadoop.net.ScriptBasedMapping</value>
1229	<description> The default implementation of the DNSToSwitchMapping. It
1230	invokes a script specified in topology.script.file.name to resolve
1231	node names. If the value for topology.script.file.name is not set, the
1232	default value of DEFAULT_RACK is returned for all node names.
1233	</description>
1234	</property>
1235
1236	<property>
1237	<name>topology.script.file.name</name>
1238	<value></value>
1239	<description> The script name that should be invoked to resolve DNS names to
1240	NetworkTopology names. Example: the script would take host.foo.bar as an
1241	argument, and return /rack1 as the output.
1242	</description>
1243	</property>
1244
1245	<property>
1246	<name>topology.script.number.args</name>
1247	<value>20</value>
1248	<description> The max number of args that the script configured with
1249	topology.script.file.name should be run with. Each arg is an
1250	IP address.
1251	</description>
1252	</property>
1253
1254	<property>
1255	<name>mapred.task.cache.levels</name>
1256	<value>2</value>
1257	<description> This is the max level of the task cache. For example, if
1258	the level is 2, the tasks cached are at the host level and at the rack
1259	level.
1260	</description>
1261	</property>
1262
1263	<property>
1264	<name>mapred.merge.recordsBeforeProgress</name>
1265	<value>10000</value>
1266	<description> The number of records to process during merge before
1267	sending a progress notification to the TaskTracker.
1268	</description>
1269	</property>
1270
1271	</configuration>

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: nutchez-0.1/conf/hadoop-default.xml @ 95

Download in other formats: