Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

hadoop-default.xml @ 88

Last change on this file since 88 was 66, checked in by waue, 15 years ago
NutchEz - an easy way to nutch
Property svn:executable set to ``*
File size: 38.3 KB

Rev	Line
[66]	1	<?xml version="1.0"?>
	2	<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
	3
	4	<!-- Do not modify this file directly. Instead, copy entries that you -->
	5	<!-- wish to modify from this file into hadoop-site.xml and change them -->
	6	<!-- there. If hadoop-site.xml does not already exist, create it. -->
	7
	8	<configuration>
	9
	10	<!--- global properties -->
	11
	12	<property>
	13	<name>hadoop.tmp.dir</name>
	14	<value>/tmp/hadoop-${user.name}</value>
	15	<description>A base for other temporary directories.</description>
	16	</property>
	17
	18	<property>
	19	<name>hadoop.native.lib</name>
	20	<value>true</value>
	21	<description>Should native hadoop libraries, if present, be used.</description>
	22	</property>
	23
	24	<!--- logging properties -->
	25
	26	<property>
	27	<name>hadoop.logfile.size</name>
	28	<value>10000000</value>
	29	<description>The max size of each log file</description>
	30	</property>
	31
	32	<property>
	33	<name>hadoop.logfile.count</name>
	34	<value>10</value>
	35	<description>The max number of log files</description>
	36	</property>
	37
	38	<property>
	39	<name>hadoop.job.history.location</name>
	40	<value></value>
	41	<description> If job tracker is static the history files are stored
	42	in this single well known place. If No value is set here, by default,
	43	it is in the local file system at ${hadoop.log.dir}/history.
	44	</description>
	45	</property>
	46
	47	<property>
	48	<name>hadoop.job.history.user.location</name>
	49	<value></value>
	50	<description> User can specify a location to store the history files of
	51	a particular job. If nothing is specified, the logs are stored in
	52	output directory. The files are stored in "_logs/history/" in the directory.
	53	User can stop logging by giving the value "none".
	54	</description>
	55	</property>
	56
	57	<property>
	58	<name>dfs.namenode.logging.level</name>
	59	<value>info</value>
	60	<description>The logging level for dfs namenode. Other values are "dir"(trac
	61	e namespace mutations), "block"(trace block under/over replications and block
	62	creations/deletions), or "all".</description>
	63	</property>
	64
	65	<!-- i/o properties -->
	66
	67	<property>
	68	<name>io.sort.factor</name>
	69	<value>10</value>
	70	<description>The number of streams to merge at once while sorting
	71	files. This determines the number of open file handles.</description>
	72	</property>
	73
	74	<property>
	75	<name>io.sort.mb</name>
	76	<value>100</value>
	77	<description>The total amount of buffer memory to use while sorting
	78	files, in megabytes. By default, gives each merge stream 1MB, which
	79	should minimize seeks.</description>
	80	</property>
	81
	82	<property>
	83	<name>io.sort.record.percent</name>
	84	<value>0.05</value>
	85	<description>The percentage of io.sort.mb dedicated to tracking record
	86	boundaries. Let this value be r, io.sort.mb be x. The maximum number
	87	of records collected before the collection thread must block is equal
	88	to (r * x) / 4</description>
	89	</property>
	90
	91	<property>
	92	<name>io.sort.spill.percent</name>
	93	<value>0.80</value>
	94	<description>The soft limit in either the buffer or record collection
	95	buffers. Once reached, a thread will begin to spill the contents to disk
	96	in the background. Note that this does not imply any chunking of data to
	97	the spill. A value less than 0.5 is not recommended.</description>
	98	</property>
	99
	100	<property>
	101	<name>io.file.buffer.size</name>
	102	<value>4096</value>
	103	<description>The size of buffer for use in sequence files.
	104	The size of this buffer should probably be a multiple of hardware
	105	page size (4096 on Intel x86), and it determines how much data is
	106	buffered during read and write operations.</description>
	107	</property>
	108
	109	<property>
	110	<name>io.bytes.per.checksum</name>
	111	<value>512</value>
	112	<description>The number of bytes per checksum. Must not be larger than
	113	io.file.buffer.size.</description>
	114	</property>
	115
	116	<property>
	117	<name>io.skip.checksum.errors</name>
	118	<value>false</value>
	119	<description>If true, when a checksum error is encountered while
	120	reading a sequence file, entries are skipped, instead of throwing an
	121	exception.</description>
	122	</property>
	123
	124	<property>
	125	<name>io.map.index.skip</name>
	126	<value>0</value>
	127	<description>Number of index entries to skip between each entry.
	128	Zero by default. Setting this to values larger than zero can
	129	facilitate opening large map files using less memory.</description>
	130	</property>
	131
	132	<property>
	133	<name>io.compression.codecs</name>
	134	<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</value>
	135	<description>A list of the compression codec classes that can be used
	136	for compression/decompression.</description>
	137	</property>
	138
	139	<property>
	140	<name>io.serializations</name>
	141	<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
	142	<description>A list of serialization classes that can be used for
	143	obtaining serializers and deserializers.</description>
	144	</property>
	145
	146	<!-- file system properties -->
	147
	148	<property>
	149	<name>fs.default.name</name>
	150	<value>file:///</value>
	151	<description>The name of the default file system. A URI whose
	152	scheme and authority determine the FileSystem implementation. The
	153	uri's scheme determines the config property (fs.SCHEME.impl) naming
	154	the FileSystem implementation class. The uri's authority is used to
	155	determine the host, port, etc. for a filesystem.</description>
	156	</property>
	157
	158	<property>
	159	<name>fs.trash.interval</name>
	160	<value>0</value>
	161	<description>Number of minutes between trash checkpoints.
	162	If zero, the trash feature is disabled.
	163	</description>
	164	</property>
	165
	166	<property>
	167	<name>fs.file.impl</name>
	168	<value>org.apache.hadoop.fs.LocalFileSystem</value>
	169	<description>The FileSystem for file: uris.</description>
	170	</property>
	171
	172	<property>
	173	<name>fs.hdfs.impl</name>
	174	<value>org.apache.hadoop.dfs.DistributedFileSystem</value>
	175	<description>The FileSystem for hdfs: uris.</description>
	176	</property>
	177
	178	<property>
	179	<name>fs.s3.impl</name>
	180	<value>org.apache.hadoop.fs.s3.S3FileSystem</value>
	181	<description>The FileSystem for s3: uris.</description>
	182	</property>
	183
	184	<property>
	185	<name>fs.s3n.impl</name>
	186	<value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
	187	<description>The FileSystem for s3n: (Native S3) uris.</description>
	188	</property>
	189
	190	<property>
	191	<name>fs.kfs.impl</name>
	192	<value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
	193	<description>The FileSystem for kfs: uris.</description>
	194	</property>
	195
	196	<property>
	197	<name>fs.hftp.impl</name>
	198	<value>org.apache.hadoop.dfs.HftpFileSystem</value>
	199	</property>
	200
	201	<property>
	202	<name>fs.hsftp.impl</name>
	203	<value>org.apache.hadoop.dfs.HsftpFileSystem</value>
	204	</property>
	205
	206	<property>
	207	<name>fs.ftp.impl</name>
	208	<value>org.apache.hadoop.fs.ftp.FTPFileSystem</value>
	209	<description>The FileSystem for ftp: uris.</description>
	210	</property>
	211
	212	<property>
	213	<name>fs.ramfs.impl</name>
	214	<value>org.apache.hadoop.fs.InMemoryFileSystem</value>
	215	<description>The FileSystem for ramfs: uris.</description>
	216	</property>
	217
	218	<property>
	219	<name>fs.har.impl</name>
	220	<value>org.apache.hadoop.fs.HarFileSystem</value>
	221	<description>The filesystem for Hadoop archives. </description>
	222	</property>
	223
	224	<property>
	225	<name>fs.inmemory.size.mb</name>
	226	<value>75</value>
	227	<description>The size of the in-memory filsystem instance in MB</description>
	228	</property>
	229
	230	<property>
	231	<name>fs.checkpoint.dir</name>
	232	<value>${hadoop.tmp.dir}/dfs/namesecondary</value>
	233	<description>Determines where on the local filesystem the DFS secondary
	234	name node should store the temporary images and edits to merge.
	235	If this is a comma-delimited list of directories then the image is
	236	replicated in all of the directories for redundancy.
	237	</description>
	238	</property>
	239
	240	<property>
	241	<name>fs.checkpoint.period</name>
	242	<value>3600</value>
	243	<description>The number of seconds between two periodic checkpoints.
	244	</description>
	245	</property>
	246
	247	<property>
	248	<name>fs.checkpoint.size</name>
	249	<value>67108864</value>
	250	<description>The size of the current edit log (in bytes) that triggers
	251	a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
	252	</description>
	253	</property>
	254
	255	<property>
	256	<name>dfs.secondary.http.address</name>
	257	<value>0.0.0.0:50090</value>
	258	<description>
	259	The secondary namenode http server address and port.
	260	If the port is 0 then the server will start on a free port.
	261	</description>
	262	</property>
	263
	264	<property>
	265	<name>dfs.datanode.address</name>
	266	<value>0.0.0.0:50010</value>
	267	<description>
	268	The address where the datanode server will listen to.
	269	If the port is 0 then the server will start on a free port.
	270	</description>
	271	</property>
	272
	273	<property>
	274	<name>dfs.datanode.http.address</name>
	275	<value>0.0.0.0:50075</value>
	276	<description>
	277	The datanode http server address and port.
	278	If the port is 0 then the server will start on a free port.
	279	</description>
	280	</property>
	281
	282	<property>
	283	<name>dfs.datanode.ipc.address</name>
	284	<value>0.0.0.0:50020</value>
	285	<description>
	286	The datanode ipc server address and port.
	287	If the port is 0 then the server will start on a free port.
	288	</description>
	289	</property>
	290
	291	<property>
	292	<name>dfs.datanode.handler.count</name>
	293	<value>3</value>
	294	<description>The number of server threads for the datanode.</description>
	295	</property>
	296
	297	<property>
	298	<name>dfs.http.address</name>
	299	<value>0.0.0.0:50070</value>
	300	<description>
	301	The address and the base port where the dfs namenode web ui will listen on.
	302	If the port is 0 then the server will start on a free port.
	303	</description>
	304	</property>
	305
	306	<property>
	307	<name>dfs.datanode.https.address</name>
	308	<value>0.0.0.0:50475</value>
	309	</property>
	310
	311	<property>
	312	<name>dfs.https.address</name>
	313	<value>0.0.0.0:50470</value>
	314	</property>
	315
	316	<property>
	317	<name>https.keystore.info.rsrc</name>
	318	<value>sslinfo.xml</value>
	319	<description>The name of the resource from which ssl keystore information
	320	will be extracted
	321	</description>
	322	</property>
	323
	324	<property>
	325	<name>dfs.datanode.dns.interface</name>
	326	<value>default</value>
	327	<description>The name of the Network Interface from which a data node should
	328	report its IP address.
	329	</description>
	330	</property>
	331
	332	<property>
	333	<name>dfs.datanode.dns.nameserver</name>
	334	<value>default</value>
	335	<description>The host name or IP address of the name server (DNS)
	336	which a DataNode should use to determine the host name used by the
	337	NameNode for communication and display purposes.
	338	</description>
	339	</property>
	340
	341	<property>
	342	<name>dfs.replication.considerLoad</name>
	343	<value>true</value>
	344	<description>Decide if chooseTarget considers the target's load or not
	345	</description>
	346	</property>
	347	<property>
	348	<name>dfs.default.chunk.view.size</name>
	349	<value>32768</value>
	350	<description>The number of bytes to view for a file on the browser.
	351	</description>
	352	</property>
	353
	354	<property>
	355	<name>dfs.datanode.du.reserved</name>
	356	<value>0</value>
	357	<description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
	358	</description>
	359	</property>
	360
	361	<property>
	362	<name>dfs.datanode.du.pct</name>
	363	<value>0.98f</value>
	364	<description>When calculating remaining space, only use this percentage of the real available space
	365	</description>
	366	</property>
	367
	368	<property>
	369	<name>dfs.name.dir</name>
	370	<value>${hadoop.tmp.dir}/dfs/name</value>
	371	<description>Determines where on the local filesystem the DFS name node
	372	should store the name table. If this is a comma-delimited list
	373	of directories then the name table is replicated in all of the
	374	directories, for redundancy. </description>
	375	</property>
	376
	377	<property>
	378	<name>dfs.web.ugi</name>
	379	<value>webuser,webgroup</value>
	380	<description>The user account used by the web interface.
	381	Syntax: USERNAME,GROUP1,GROUP2, ...
	382	</description>
	383	</property>
	384
	385	<property>
	386	<name>dfs.permissions</name>
	387	<value>true</value>
	388	<description>
	389	If "true", enable permission checking in HDFS.
	390	If "false", permission checking is turned off,
	391	but all other behavior is unchanged.
	392	Switching from one parameter value to the other does not change the mode,
	393	owner or group of files or directories.
	394	</description>
	395	</property>
	396
	397	<property>
	398	<name>dfs.permissions.supergroup</name>
	399	<value>supergroup</value>
	400	<description>The name of the group of super-users.</description>
	401	</property>
	402
	403	<property>
	404	<name>dfs.client.buffer.dir</name>
	405	<value>${hadoop.tmp.dir}/dfs/tmp</value>
	406	<description>Determines where on the local filesystem an DFS client
	407	should store its blocks before it sends them to the datanode.
	408	</description>
	409	</property>
	410
	411	<property>
	412	<name>dfs.data.dir</name>
	413	<value>${hadoop.tmp.dir}/dfs/data</value>
	414	<description>Determines where on the local filesystem an DFS data node
	415	should store its blocks. If this is a comma-delimited
	416	list of directories, then data will be stored in all named
	417	directories, typically on different devices.
	418	Directories that do not exist are ignored.
	419	</description>
	420	</property>
	421
	422	<property>
	423	<name>dfs.replication</name>
	424	<value>3</value>
	425	<description>Default block replication.
	426	The actual number of replications can be specified when the file is created.
	427	The default is used if replication is not specified in create time.
	428	</description>
	429	</property>
	430
	431	<property>
	432	<name>dfs.replication.max</name>
	433	<value>512</value>
	434	<description>Maximal block replication.
	435	</description>
	436	</property>
	437
	438	<property>
	439	<name>dfs.replication.min</name>
	440	<value>1</value>
	441	<description>Minimal block replication.
	442	</description>
	443	</property>
	444
	445	<property>
	446	<name>dfs.block.size</name>
	447	<value>67108864</value>
	448	<description>The default block size for new files.</description>
	449	</property>
	450
	451	<property>
	452	<name>dfs.df.interval</name>
	453	<value>60000</value>
	454	<description>Disk usage statistics refresh interval in msec.</description>
	455	</property>
	456
	457	<property>
	458	<name>dfs.client.block.write.retries</name>
	459	<value>3</value>
	460	<description>The number of retries for writing blocks to the data nodes,
	461	before we signal failure to the application.
	462	</description>
	463	</property>
	464
	465	<property>
	466	<name>dfs.blockreport.intervalMsec</name>
	467	<value>3600000</value>
	468	<description>Determines block reporting interval in milliseconds.</description>
	469	</property>
	470
	471	<property>
	472	<name>dfs.blockreport.initialDelay</name> <value>0</value>
	473	<description>Delay for first block report in seconds.</description>
	474	</property>
	475
	476	<property>
	477	<name>dfs.heartbeat.interval</name>
	478	<value>3</value>
	479	<description>Determines datanode heartbeat interval in seconds.</description>
	480	</property>
	481
	482	<property>
	483	<name>dfs.namenode.handler.count</name>
	484	<value>10</value>
	485	<description>The number of server threads for the namenode.</description>
	486	</property>
	487
	488	<property>
	489	<name>dfs.safemode.threshold.pct</name>
	490	<value>0.999f</value>
	491	<description>
	492	Specifies the percentage of blocks that should satisfy
	493	the minimal replication requirement defined by dfs.replication.min.
	494	Values less than or equal to 0 mean not to start in safe mode.
	495	Values greater than 1 will make safe mode permanent.
	496	</description>
	497	</property>
	498
	499	<property>
	500	<name>dfs.safemode.extension</name>
	501	<value>30000</value>
	502	<description>
	503	Determines extension of safe mode in milliseconds
	504	after the threshold level is reached.
	505	</description>
	506	</property>
	507
	508	<property>
	509	<name>dfs.balance.bandwidthPerSec</name>
	510	<value>1048576</value>
	511	<description>
	512	Specifies the maximum amount of bandwidth that each datanode
	513	can utilize for the balancing purpose in term of
	514	the number of bytes per second.
	515	</description>
	516	</property>
	517
	518	<property>
	519	<name>dfs.hosts</name>
	520	<value></value>
	521	<description>Names a file that contains a list of hosts that are
	522	permitted to connect to the namenode. The full pathname of the file
	523	must be specified. If the value is empty, all hosts are
	524	permitted.</description>
	525	</property>
	526
	527	<property>
	528	<name>dfs.hosts.exclude</name>
	529	<value></value>
	530	<description>Names a file that contains a list of hosts that are
	531	not permitted to connect to the namenode. The full pathname of the
	532	file must be specified. If the value is empty, no hosts are
	533	excluded.</description>
	534	</property>
	535
	536	<property>
	537	<name>dfs.max.objects</name>
	538	<value>0</value>
	539	<description>The maximum number of files, directories and blocks
	540	dfs supports. A value of zero indicates no limit to the number
	541	of objects that dfs supports.
	542	</description>
	543	</property>
	544
	545	<property>
	546	<name>dfs.namenode.decommission.interval</name>
	547	<value>30</value>
	548	<description>Namenode periodicity in seconds to check if decommission is complete.</description>
	549	</property>
	550
	551	<property>
	552	<name>dfs.namenode.decommission.nodes.per.interval</name>
	553	<value>5</value>
	554	<description>The number of nodes namenode checks if decommission is complete
	555	in each dfs.namenode.decommission.interval.</description>
	556	</property>
	557
	558	<property>
	559	<name>dfs.replication.interval</name>
	560	<value>3</value>
	561	<description>The periodicity in seconds with which the namenode computes repliaction work for datanodes. </description>
	562	</property>
	563
	564	<property>
	565	<name>fs.s3.block.size</name>
	566	<value>67108864</value>
	567	<description>Block size to use when writing files to S3.</description>
	568	</property>
	569
	570	<property>
	571	<name>fs.s3.buffer.dir</name>
	572	<value>${hadoop.tmp.dir}/s3</value>
	573	<description>Determines where on the local filesystem the S3 filesystem
	574	should store files before sending them to S3
	575	(or after retrieving them from S3).
	576	</description>
	577	</property>
	578
	579	<property>
	580	<name>fs.s3.maxRetries</name>
	581	<value>4</value>
	582	<description>The maximum number of retries for reading or writing files to S3,
	583	before we signal failure to the application.
	584	</description>
	585	</property>
	586
	587	<property>
	588	<name>fs.s3.sleepTimeSeconds</name>
	589	<value>10</value>
	590	<description>The number of seconds to sleep between each S3 retry.
	591	</description>
	592	</property>
	593
	594	<!-- map/reduce properties -->
	595
	596	<property>
	597	<name>mapred.job.tracker</name>
	598	<value>local</value>
	599	<description>The host and port that the MapReduce job tracker runs
	600	at. If "local", then jobs are run in-process as a single map
	601	and reduce task.
	602	</description>
	603	</property>
	604
	605	<property>
	606	<name>mapred.job.tracker.http.address</name>
	607	<value>0.0.0.0:50030</value>
	608	<description>
	609	The job tracker http server address and port the server will listen on.
	610	If the port is 0 then the server will start on a free port.
	611	</description>
	612	</property>
	613
	614	<property>
	615	<name>mapred.job.tracker.handler.count</name>
	616	<value>10</value>
	617	<description>
	618	The number of server threads for the JobTracker. This should be roughly
	619	4% of the number of tasktracker nodes.
	620	</description>
	621	</property>
	622
	623	<property>
	624	<name>mapred.task.tracker.report.address</name>
	625	<value>127.0.0.1:0</value>
	626	<description>The interface and port that task tracker server listens on.
	627	Since it is only connected to by the tasks, it uses the local interface.
	628	EXPERT ONLY. Should only be changed if your host does not have the loopback
	629	interface.</description>
	630	</property>
	631
	632	<property>
	633	<name>mapred.local.dir</name>
	634	<value>${hadoop.tmp.dir}/mapred/local</value>
	635	<description>The local directory where MapReduce stores intermediate
	636	data files. May be a comma-separated list of
	637	directories on different devices in order to spread disk i/o.
	638	Directories that do not exist are ignored.
	639	</description>
	640	</property>
	641
	642	<property>
	643	<name>local.cache.size</name>
	644	<value>10737418240</value>
	645	<description>The limit on the size of cache you want to keep, set by default
	646	to 10GB. This will act as a soft limit on the cache directory for out of band data.
	647	</description>
	648	</property>
	649
	650	<property>
	651	<name>mapred.system.dir</name>
	652	<value>${hadoop.tmp.dir}/mapred/system</value>
	653	<description>The shared directory where MapReduce stores control files.
	654	</description>
	655	</property>
	656
	657	<property>
	658	<name>mapred.temp.dir</name>
	659	<value>${hadoop.tmp.dir}/mapred/temp</value>
	660	<description>A shared directory for temporary files.
	661	</description>
	662	</property>
	663
	664	<property>
	665	<name>mapred.local.dir.minspacestart</name>
	666	<value>0</value>
	667	<description>If the space in mapred.local.dir drops under this,
	668	do not ask for more tasks.
	669	Value in bytes.
	670	</description>
	671	</property>
	672
	673	<property>
	674	<name>mapred.local.dir.minspacekill</name>
	675	<value>0</value>
	676	<description>If the space in mapred.local.dir drops under this,
	677	do not ask more tasks until all the current ones have finished and
	678	cleaned up. Also, to save the rest of the tasks we have running,
	679	kill one of them, to clean up some space. Start with the reduce tasks,
	680	then go with the ones that have finished the least.
	681	Value in bytes.
	682	</description>
	683	</property>
	684
	685	<property>
	686	<name>mapred.tasktracker.expiry.interval</name>
	687	<value>600000</value>
	688	<description>Expert: The time-interval, in miliseconds, after which
	689	a tasktracker is declared 'lost' if it doesn't send heartbeats.
	690	</description>
	691	</property>
	692
	693	<property>
	694	<name>mapred.map.tasks</name>
	695	<value>2</value>
	696	<description>The default number of map tasks per job. Typically set
	697	to a prime several times greater than number of available hosts.
	698	Ignored when mapred.job.tracker is "local".
	699	</description>
	700	</property>
	701
	702	<property>
	703	<name>mapred.reduce.tasks</name>
	704	<value>1</value>
	705	<description>The default number of reduce tasks per job. Typically set
	706	to a prime close to the number of available hosts. Ignored when
	707	mapred.job.tracker is "local".
	708	</description>
	709	</property>
	710
	711	<property>
	712	<name>mapred.map.max.attempts</name>
	713	<value>4</value>
	714	<description>Expert: The maximum number of attempts per map task.
	715	In other words, framework will try to execute a map task these many number
	716	of times before giving up on it.
	717	</description>
	718	</property>
	719
	720	<property>
	721	<name>mapred.reduce.max.attempts</name>
	722	<value>4</value>
	723	<description>Expert: The maximum number of attempts per reduce task.
	724	In other words, framework will try to execute a reduce task these many number
	725	of times before giving up on it.
	726	</description>
	727	</property>
	728
	729	<property>
	730	<name>mapred.reduce.parallel.copies</name>
	731	<value>5</value>
	732	<description>The default number of parallel transfers run by reduce
	733	during the copy(shuffle) phase.
	734	</description>
	735	</property>
	736
	737	<property>
	738	<name>mapred.reduce.copy.backoff</name>
	739	<value>300</value>
	740	<description>The maximum amount of time (in seconds) a reducer spends on
	741	fetching one map output before declaring it as failed.
	742	</description>
	743	</property>
	744
	745	<property>
	746	<name>mapred.task.timeout</name>
	747	<value>600000</value>
	748	<description>The number of milliseconds before a task will be
	749	terminated if it neither reads an input, writes an output, nor
	750	updates its status string.
	751	</description>
	752	</property>
	753
	754	<property>
	755	<name>mapred.tasktracker.map.tasks.maximum</name>
	756	<value>2</value>
	757	<description>The maximum number of map tasks that will be run
	758	simultaneously by a task tracker.
	759	</description>
	760	</property>
	761
	762	<property>
	763	<name>mapred.tasktracker.reduce.tasks.maximum</name>
	764	<value>2</value>
	765	<description>The maximum number of reduce tasks that will be run
	766	simultaneously by a task tracker.
	767	</description>
	768	</property>
	769
	770	<property>
	771	<name>mapred.jobtracker.completeuserjobs.maximum</name>
	772	<value>100</value>
	773	<description>The maximum number of complete jobs per user to keep around before delegating them to the job history.
	774	</description>
	775	</property>
	776
	777	<property>
	778	<name>mapred.child.java.opts</name>
	779	<value>-Xmx200m</value>
	780	<description>Java opts for the task tracker child processes.
	781	The following symbol, if present, will be interpolated: @taskid@ is replaced
	782	by current TaskID. Any other occurrences of '@' will go unchanged.
	783	For example, to enable verbose gc logging to a file named for the taskid in
	784	/tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
	785	-Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
	786
	787	The configuration variable mapred.child.ulimit can be used to control the
	788	maximum virtual memory of the child processes.
	789	</description>
	790	</property>
	791
	792	<property>
	793	<name>mapred.child.ulimit</name>
	794	<value></value>
	795	<description>The maximum virtual memory, in KB, of a process launched by the
	796	Map-Reduce framework. This can be used to control both the Mapper/Reducer
	797	tasks and applications using Hadoop Pipes, Hadoop Streaming etc.
	798	By default it is left unspecified to let cluster admins control it via
	799	limits.conf and other such relevant mechanisms.
	800
	801	Note: mapred.child.ulimit must be greater than or equal to the -Xmx passed to
	802	JavaVM, else the VM might not start.
	803	</description>
	804	</property>
	805
	806	<property>
	807	<name>mapred.child.tmp</name>
	808	<value>./tmp</value>
	809	<description> To set the value of tmp directory for map and reduce tasks.
	810	If the value is an absolute path, it is directly assigned. Otherwise, it is
	811	prepended with task's working directory. The java tasks are executed with
	812	option -Djava.io.tmpdir='the absolute path of the tmp dir'. Pipes and
	813	streaming are set with environment variable,
	814	TMPDIR='the absolute path of the tmp dir'
	815	</description>
	816	</property>
	817
	818	<property>
	819	<name>mapred.inmem.merge.threshold</name>
	820	<value>1000</value>
	821	<description>The threshold, in terms of the number of files
	822	for the in-memory merge process. When we accumulate threshold number of files
	823	we initiate the in-memory merge and spill to disk. A value of 0 or less than
	824	0 indicates we want to DON'T have any threshold and instead depend only on
	825	the ramfs's memory consumption to trigger the merge.
	826	</description>
	827	</property>
	828
	829	<property>
	830	<name>mapred.map.tasks.speculative.execution</name>
	831	<value>true</value>
	832	<description>If true, then multiple instances of some map tasks
	833	may be executed in parallel.</description>
	834	</property>
	835
	836	<property>
	837	<name>mapred.reduce.tasks.speculative.execution</name>
	838	<value>true</value>
	839	<description>If true, then multiple instances of some reduce tasks
	840	may be executed in parallel.</description>
	841	</property>
	842
	843	<property>
	844	<name>mapred.min.split.size</name>
	845	<value>0</value>
	846	<description>The minimum size chunk that map input should be split
	847	into. Note that some file formats may have minimum split sizes that
	848	take priority over this setting.</description>
	849	</property>
	850
	851	<property>
	852	<name>mapred.submit.replication</name>
	853	<value>10</value>
	854	<description>The replication level for submitted job files. This
	855	should be around the square root of the number of nodes.
	856	</description>
	857	</property>
	858
	859
	860	<property>
	861	<name>mapred.tasktracker.dns.interface</name>
	862	<value>default</value>
	863	<description>The name of the Network Interface from which a task
	864	tracker should report its IP address.
	865	</description>
	866	</property>
	867
	868	<property>
	869	<name>mapred.tasktracker.dns.nameserver</name>
	870	<value>default</value>
	871	<description>The host name or IP address of the name server (DNS)
	872	which a TaskTracker should use to determine the host name used by
	873	the JobTracker for communication and display purposes.
	874	</description>
	875	</property>
	876
	877	<property>
	878	<name>tasktracker.http.threads</name>
	879	<value>40</value>
	880	<description>The number of worker threads that for the http server. This is
	881	used for map output fetching
	882	</description>
	883	</property>
	884
	885	<property>
	886	<name>mapred.task.tracker.http.address</name>
	887	<value>0.0.0.0:50060</value>
	888	<description>
	889	The task tracker http server address and port.
	890	If the port is 0 then the server will start on a free port.
	891	</description>
	892	</property>
	893
	894	<property>
	895	<name>keep.failed.task.files</name>
	896	<value>false</value>
	897	<description>Should the files for failed tasks be kept. This should only be
	898	used on jobs that are failing, because the storage is never
	899	reclaimed. It also prevents the map outputs from being erased
	900	from the reduce directory as they are consumed.</description>
	901	</property>
	902
	903	<!--
	904	<property>
	905	<name>keep.task.files.pattern</name>
	906	<value>.*_m_123456_0</value>
	907	<description>Keep all files from tasks whose task names match the given
	908	regular expression. Defaults to none.</description>
	909	</property>
	910	-->
	911
	912	<property>
	913	<name>mapred.output.compress</name>
	914	<value>false</value>
	915	<description>Should the job outputs be compressed?
	916	</description>
	917	</property>
	918
	919	<property>
	920	<name>mapred.output.compression.type</name>
	921	<value>RECORD</value>
	922	<description>If the job outputs are to compressed as SequenceFiles, how should
	923	they be compressed? Should be one of NONE, RECORD or BLOCK.
	924	</description>
	925	</property>
	926
	927	<property>
	928	<name>mapred.output.compression.codec</name>
	929	<value>org.apache.hadoop.io.compress.DefaultCodec</value>
	930	<description>If the job outputs are compressed, how should they be compressed?
	931	</description>
	932	</property>
	933
	934	<property>
	935	<name>mapred.compress.map.output</name>
	936	<value>false</value>
	937	<description>Should the outputs of the maps be compressed before being
	938	sent across the network. Uses SequenceFile compression.
	939	</description>
	940	</property>
	941
	942	<property>
	943	<name>mapred.map.output.compression.codec</name>
	944	<value>org.apache.hadoop.io.compress.DefaultCodec</value>
	945	<description>If the map outputs are compressed, how should they be
	946	compressed?
	947	</description>
	948	</property>
	949
	950	<property>
	951	<name>io.seqfile.compress.blocksize</name>
	952	<value>1000000</value>
	953	<description>The minimum block size for compression in block compressed
	954	SequenceFiles.
	955	</description>
	956	</property>
	957
	958	<property>
	959	<name>io.seqfile.lazydecompress</name>
	960	<value>true</value>
	961	<description>Should values of block-compressed SequenceFiles be decompressed
	962	only when necessary.
	963	</description>
	964	</property>
	965
	966	<property>
	967	<name>io.seqfile.sorter.recordlimit</name>
	968	<value>1000000</value>
	969	<description>The limit on number of records to be kept in memory in a spill
	970	in SequenceFiles.Sorter
	971	</description>
	972	</property>
	973
	974	<property>
	975	<name>map.sort.class</name>
	976	<value>org.apache.hadoop.util.QuickSort</value>
	977	<description>The default sort class for sorting keys.
	978	</description>
	979	</property>
	980
	981	<property>
	982	<name>mapred.userlog.limit.kb</name>
	983	<value>0</value>
	984	<description>The maximum size of user-logs of each task in KB. 0 disables the cap.
	985	</description>
	986	</property>
	987
	988	<property>
	989	<name>mapred.userlog.retain.hours</name>
	990	<value>24</value>
	991	<description>The maximum time, in hours, for which the user-logs are to be
	992	retained.
	993	</description>
	994	</property>
	995
	996	<property>
	997	<name>mapred.hosts</name>
	998	<value></value>
	999	<description>Names a file that contains the list of nodes that may
	1000	connect to the jobtracker. If the value is empty, all hosts are
	1001	permitted.</description>
	1002	</property>
	1003
	1004	<property>
	1005	<name>mapred.hosts.exclude</name>
	1006	<value></value>
	1007	<description>Names a file that contains the list of hosts that
	1008	should be excluded by the jobtracker. If the value is empty, no
	1009	hosts are excluded.</description>
	1010	</property>
	1011
	1012	<property>
	1013	<name>mapred.max.tracker.failures</name>
	1014	<value>4</value>
	1015	<description>The number of task-failures on a tasktracker of a given job
	1016	after which new tasks of that job aren't assigned to it.
	1017	</description>
	1018	</property>
	1019
	1020	<property>
	1021	<name>jobclient.output.filter</name>
	1022	<value>FAILED</value>
	1023	<description>The filter for controlling the output of the task's userlogs sent
	1024	to the console of the JobClient.
	1025	The permissible options are: NONE, KILLED, FAILED, SUCCEEDED and
	1026	ALL.
	1027	</description>
	1028	</property>
	1029
	1030	<property>
	1031	<name>mapred.job.tracker.persist.jobstatus.active</name>
	1032	<value>false</value>
	1033	<description>Indicates if persistency of job status information is
	1034	active or not.
	1035	</description>
	1036	</property>
	1037
	1038	<property>
	1039	<name>mapred.job.tracker.persist.jobstatus.hours</name>
	1040	<value>0</value>
	1041	<description>The number of hours job status information is persisted in DFS.
	1042	The job status information will be available after it drops of the memory
	1043	queue and between jobtracker restarts. With a zero value the job status
	1044	information is not persisted at all in DFS.
	1045	</description>
	1046	</property>
	1047
	1048	<property>
	1049	<name>mapred.job.tracker.persist.jobstatus.dir</name>
	1050	<value>/jobtracker/jobsInfo</value>
	1051	<description>The directory where the job status information is persisted
	1052	in a file system to be available after it drops of the memory queue and
	1053	between jobtracker restarts.
	1054	</description>
	1055	</property>
	1056
	1057	<property>
	1058	<name>mapred.task.profile</name>
	1059	<value>false</value>
	1060	<description>To set whether the system should collect profiler
	1061	information for some of the tasks in this job? The information is stored
	1062	in the user log directory. The value is "true" if task profiling
	1063	is enabled.</description>
	1064	</property>
	1065
	1066	<property>
	1067	<name>mapred.task.profile.maps</name>
	1068	<value>0-2</value>
	1069	<description> To set the ranges of map tasks to profile.
	1070	mapred.task.profile has to be set to true for the value to be accounted.
	1071	</description>
	1072	</property>
	1073
	1074	<property>
	1075	<name>mapred.task.profile.reduces</name>
	1076	<value>0-2</value>
	1077	<description> To set the ranges of reduce tasks to profile.
	1078	mapred.task.profile has to be set to true for the value to be accounted.
	1079	</description>
	1080	</property>
	1081
	1082	<property>
	1083	<name>mapred.line.input.format.linespermap</name>
	1084	<value>1</value>
	1085	<description> Number of lines per split in NLineInputFormat.
	1086	</description>
	1087	</property>
	1088
	1089	<!-- ipc properties -->
	1090
	1091	<property>
	1092	<name>ipc.client.idlethreshold</name>
	1093	<value>4000</value>
	1094	<description>Defines the threshold number of connections after which
	1095	connections will be inspected for idleness.
	1096	</description>
	1097	</property>
	1098
	1099	<property>
	1100	<name>ipc.client.kill.max</name>
	1101	<value>10</value>
	1102	<description>Defines the maximum number of clients to disconnect in one go.
	1103	</description>
	1104	</property>
	1105
	1106	<property>
	1107	<name>ipc.client.connection.maxidletime</name>
	1108	<value>10000</value>
	1109	<description>The maximum time in msec after which a client will bring down the
	1110	connection to the server.
	1111	</description>
	1112	</property>
	1113
	1114	<property>
	1115	<name>ipc.client.connect.max.retries</name>
	1116	<value>10</value>
	1117	<description>Indicates the number of retries a client will make to establish
	1118	a server connection.
	1119	</description>
	1120	</property>
	1121
	1122	<property>
	1123	<name>ipc.server.listen.queue.size</name>
	1124	<value>128</value>
	1125	<description>Indicates the length of the listen queue for servers accepting
	1126	client connections.
	1127	</description>
	1128	</property>
	1129
	1130	<property>
	1131	<name>ipc.server.tcpnodelay</name>
	1132	<value>false</value>
	1133	<description>Turn on/off Nagle's algorithm for the TCP socket connection on
	1134	the server. Setting to true disables the algorithm and may decrease latency
	1135	with a cost of more/smaller packets.
	1136	</description>
	1137	</property>
	1138
	1139	<property>
	1140	<name>ipc.client.tcpnodelay</name>
	1141	<value>false</value>
	1142	<description>Turn on/off Nagle's algorithm for the TCP socket connection on
	1143	the client. Setting to true disables the algorithm and may decrease latency
	1144	with a cost of more/smaller packets.
	1145	</description>
	1146	</property>
	1147
	1148	<!-- Job Notification Configuration -->
	1149
	1150	<!--
	1151	<property>
	1152	<name>job.end.notification.url</name>
	1153	<value>http://localhost:8080/jobstatus.php?jobId=$jobId&jobStatus=$jobStatus</value>
	1154	<description>Indicates url which will be called on completion of job to inform
	1155	end status of job.
	1156	User can give at most 2 variables with URI : $jobId and $jobStatus.
	1157	If they are present in URI, then they will be replaced by their
	1158	respective values.
	1159	</description>
	1160	</property>
	1161	-->
	1162
	1163	<property>
	1164	<name>job.end.retry.attempts</name>
	1165	<value>0</value>
	1166	<description>Indicates how many times hadoop should attempt to contact the
	1167	notification URL </description>
	1168	</property>
	1169
	1170	<property>
	1171	<name>job.end.retry.interval</name>
	1172	<value>30000</value>
	1173	<description>Indicates time in milliseconds between notification URL retry
	1174	calls</description>
	1175	</property>
	1176
	1177	<!-- Web Interface Configuration -->
	1178
	1179	<property>
	1180	<name>webinterface.private.actions</name>
	1181	<value>false</value>
	1182	<description> If set to true, the web interfaces of JT and NN may contain
	1183	actions, such as kill job, delete file, etc., that should
	1184	not be exposed to public. Enable this option if the interfaces
	1185	are only reachable by those who have the right authorization.
	1186	</description>
	1187	</property>
	1188
	1189	<!-- Proxy Configuration -->
	1190
	1191	<property>
	1192	<name>hadoop.rpc.socket.factory.class.default</name>
	1193	<value>org.apache.hadoop.net.StandardSocketFactory</value>
	1194	<description> Default SocketFactory to use. This parameter is expected to be
	1195	formatted as "package.FactoryClassName".
	1196	</description>
	1197	</property>
	1198
	1199	<property>
	1200	<name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
	1201	<value></value>
	1202	<description> SocketFactory to use to connect to a DFS. If null or empty, use
	1203	hadoop.rpc.socket.class.default. This socket factory is also used by
	1204	DFSClient to create sockets to DataNodes.
	1205	</description>
	1206	</property>
	1207
	1208	<property>
	1209	<name>hadoop.rpc.socket.factory.class.JobSubmissionProtocol</name>
	1210	<value></value>
	1211	<description> SocketFactory to use to connect to a Map/Reduce master
	1212	(JobTracker). If null or empty, then use hadoop.rpc.socket.class.default.
	1213	</description>
	1214	</property>
	1215
	1216	<property>
	1217	<name>hadoop.socks.server</name>
	1218	<value></value>
	1219	<description> Address (host:port) of the SOCKS server to be used by the
	1220	SocksSocketFactory.
	1221	</description>
	1222	</property>
	1223
	1224	<!-- Rack Configuration -->
	1225
	1226	<property>
	1227	<name>topology.node.switch.mapping.impl</name>
	1228	<value>org.apache.hadoop.net.ScriptBasedMapping</value>
	1229	<description> The default implementation of the DNSToSwitchMapping. It
	1230	invokes a script specified in topology.script.file.name to resolve
	1231	node names. If the value for topology.script.file.name is not set, the
	1232	default value of DEFAULT_RACK is returned for all node names.
	1233	</description>
	1234	</property>
	1235
	1236	<property>
	1237	<name>topology.script.file.name</name>
	1238	<value></value>
	1239	<description> The script name that should be invoked to resolve DNS names to
	1240	NetworkTopology names. Example: the script would take host.foo.bar as an
	1241	argument, and return /rack1 as the output.
	1242	</description>
	1243	</property>
	1244
	1245	<property>
	1246	<name>topology.script.number.args</name>
	1247	<value>20</value>
	1248	<description> The max number of args that the script configured with
	1249	topology.script.file.name should be run with. Each arg is an
	1250	IP address.
	1251	</description>
	1252	</property>
	1253
	1254	<property>
	1255	<name>mapred.task.cache.levels</name>
	1256	<value>2</value>
	1257	<description> This is the max level of the task cache. For example, if
	1258	the level is 2, the tasks cached are at the host level and at the rack
	1259	level.
	1260	</description>
	1261	</property>
	1262
	1263	<property>
	1264	<name>mapred.merge.recordsBeforeProgress</name>
	1265	<value>10000</value>
	1266	<description> The number of records to process during merge before
	1267	sending a progress notification to the TaskTracker.
	1268	</description>
	1269	</property>
	1270
	1271	</configuration>

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: nutchez-0.1/conf/hadoop-default.xml @ 88

Download in other formats: