source: nutchez-0.1/bin/hadoop @ 156

Last change on this file since 156 was 66, checked in by waue, 16 years ago

NutchEz - an easy way to nutch

  • Property svn:executable set to *
File size: 9.2 KB
Line 
1#!/usr/bin/env bash
2
3# Licensed to the Apache Software Foundation (ASF) under one or more
4# contributor license agreements.  See the NOTICE file distributed with
5# this work for additional information regarding copyright ownership.
6# The ASF licenses this file to You under the Apache License, Version 2.0
7# (the "License"); you may not use this file except in compliance with
8# the License.  You may obtain a copy of the License at
9#
10#     http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18
19# The Hadoop command script
20#
21# Environment Variables
22#
23#   JAVA_HOME        The java implementation to use.  Overrides JAVA_HOME.
24#
25#   HADOOP_CLASSPATH Extra Java CLASSPATH entries.
26#
27#   HADOOP_HEAPSIZE  The maximum amount of heap to use, in MB.
28#                    Default is 1000.
29#
30#   HADOOP_OPTS      Extra Java runtime options.
31#   
32#   HADOOP_NAMENODE_OPTS       These options are added to HADOOP_OPTS
33#   HADOOP_CLIENT_OPTS         when the respective command is run.
34#   HADOOP_{COMMAND}_OPTS etc  HADOOP_JT_OPTS applies to JobTracker
35#                              for e.g.  HADOOP_CLIENT_OPTS applies to
36#                              more than one command (fs, dfs, fsck,
37#                              dfsadmin etc) 
38#
39#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
40#
41#   HADOOP_ROOT_LOGGER The root appender. Default is INFO,console
42#
43
44bin=`dirname "$0"`
45bin=`cd "$bin"; pwd`
46
47. "$bin"/hadoop-config.sh
48
49cygwin=false
50case "`uname`" in
51CYGWIN*) cygwin=true;;
52esac
53
54# if no args specified, show usage
55if [ $# = 0 ]; then
56  echo "Usage: hadoop [--config confdir] COMMAND"
57  echo "where COMMAND is one of:"
58  echo "  namenode -format     format the DFS filesystem"
59  echo "  secondarynamenode    run the DFS secondary namenode"
60  echo "  namenode             run the DFS namenode"
61  echo "  datanode             run a DFS datanode"
62  echo "  dfsadmin             run a DFS admin client"
63  echo "  fsck                 run a DFS filesystem checking utility"
64  echo "  fs                   run a generic filesystem user client"
65  echo "  balancer             run a cluster balancing utility"
66  echo "  jobtracker           run the MapReduce job Tracker node" 
67  echo "  pipes                run a Pipes job"
68  echo "  tasktracker          run a MapReduce task Tracker node" 
69  echo "  job                  manipulate MapReduce jobs"
70  echo "  queue                get information regarding JobQueues" 
71  echo "  version              print the version"
72  echo "  jar <jar>            run a jar file"
73  echo "  distcp <srcurl> <desturl> copy file or directories recursively"
74  echo "  archive -archiveName NAME <src>* <dest> create a hadoop archive"
75  echo "  daemonlog            get/set the log level for each daemon"
76  echo " or"
77  echo "  CLASSNAME            run the class named CLASSNAME"
78  echo "Most commands print help when invoked w/o parameters."
79  exit 1
80fi
81
82# get arguments
83COMMAND=$1
84shift
85
86if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
87  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
88fi
89
90# some Java parameters
91if [ "$JAVA_HOME" != "" ]; then
92  #echo "run java in $JAVA_HOME"
93  JAVA_HOME=$JAVA_HOME
94fi
95 
96if [ "$JAVA_HOME" = "" ]; then
97  echo "Error: JAVA_HOME is not set."
98  exit 1
99fi
100
101JAVA=$JAVA_HOME/bin/java
102JAVA_HEAP_MAX=-Xmx1000m
103
104# check envvars which might override default args
105if [ "$HADOOP_HEAPSIZE" != "" ]; then
106  #echo "run with heapsize $HADOOP_HEAPSIZE"
107  JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
108  #echo $JAVA_HEAP_MAX
109fi
110
111# CLASSPATH initially contains $HADOOP_CONF_DIR
112CLASSPATH="${HADOOP_CONF_DIR}"
113CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
114
115# for developers, add Hadoop classes to CLASSPATH
116if [ -d "$HADOOP_HOME/build/classes" ]; then
117  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/classes
118fi
119if [ -d "$HADOOP_HOME/build/webapps" ]; then
120  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build
121fi
122if [ -d "$HADOOP_HOME/build/test/classes" ]; then
123  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/test/classes
124fi
125if [ -d "$HADOOP_HOME/build/tools" ]; then
126  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/tools
127fi
128
129# so that filenames w/ spaces are handled correctly in loops below
130IFS=
131
132# for releases, add core hadoop jar & webapps to CLASSPATH
133if [ -d "$HADOOP_HOME/webapps" ]; then
134  CLASSPATH=${CLASSPATH}:$HADOOP_HOME
135fi
136for f in $HADOOP_HOME/hadoop-*-core.jar; do
137  CLASSPATH=${CLASSPATH}:$f;
138done
139
140# add libs to CLASSPATH
141for f in $HADOOP_HOME/lib/*.jar; do
142  CLASSPATH=${CLASSPATH}:$f;
143done
144
145for f in $HADOOP_HOME/lib/jetty-ext/*.jar; do
146  CLASSPATH=${CLASSPATH}:$f;
147done
148
149for f in $HADOOP_HOME/hadoop-*-tools.jar; do
150  TOOL_PATH=${TOOL_PATH}:$f;
151done
152for f in $HADOOP_HOME/build/hadoop-*-tools.jar; do
153  TOOL_PATH=${TOOL_PATH}:$f;
154done
155
156# add user-specified CLASSPATH last
157if [ "$HADOOP_CLASSPATH" != "" ]; then
158  CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
159fi
160
161# default log directory & file
162if [ "$HADOOP_LOG_DIR" = "" ]; then
163  HADOOP_LOG_DIR="$HADOOP_HOME/logs"
164fi
165if [ "$HADOOP_LOGFILE" = "" ]; then
166  HADOOP_LOGFILE='hadoop.log'
167fi
168
169# restore ordinary behaviour
170unset IFS
171
172# figure out which class to run
173if [ "$COMMAND" = "namenode" ] ; then
174  CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
175  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS"
176elif [ "$COMMAND" = "secondarynamenode" ] ; then
177  CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
178  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
179elif [ "$COMMAND" = "datanode" ] ; then
180  CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
181  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_DATANODE_OPTS"
182elif [ "$COMMAND" = "fs" ] ; then
183  CLASS=org.apache.hadoop.fs.FsShell
184  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
185elif [ "$COMMAND" = "dfs" ] ; then
186  CLASS=org.apache.hadoop.fs.FsShell
187  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
188elif [ "$COMMAND" = "dfsadmin" ] ; then
189  CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
190  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
191elif [ "$COMMAND" = "fsck" ] ; then
192  CLASS=org.apache.hadoop.hdfs.tools.DFSck
193  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
194elif [ "$COMMAND" = "balancer" ] ; then
195  CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
196  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
197elif [ "$COMMAND" = "jobtracker" ] ; then
198  CLASS=org.apache.hadoop.mapred.JobTracker
199  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOBTRACKER_OPTS"
200elif [ "$COMMAND" = "tasktracker" ] ; then
201  CLASS=org.apache.hadoop.mapred.TaskTracker
202  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_TASKTRACKER_OPTS"
203elif [ "$COMMAND" = "job" ] ; then
204  CLASS=org.apache.hadoop.mapred.JobClient
205elif [ "$COMMAND" = "queue" ] ; then
206  CLASS=org.apache.hadoop.mapred.JobQueueClient
207elif [ "$COMMAND" = "pipes" ] ; then
208  CLASS=org.apache.hadoop.mapred.pipes.Submitter
209  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
210elif [ "$COMMAND" = "version" ] ; then
211  CLASS=org.apache.hadoop.util.VersionInfo
212  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
213elif [ "$COMMAND" = "jar" ] ; then
214  CLASS=org.apache.hadoop.mapred.JobShell
215elif [ "$COMMAND" = "distcp" ] ; then
216  CLASS=org.apache.hadoop.tools.DistCp
217  CLASSPATH=${CLASSPATH}:${TOOL_PATH}
218  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
219elif [ "$COMMAND" = "daemonlog" ] ; then
220  CLASS=org.apache.hadoop.log.LogLevel
221  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
222elif [ "$COMMAND" = "archive" ] ; then
223  CLASS=org.apache.hadoop.tools.HadoopArchives
224  CLASSPATH=${CLASSPATH}:${TOOL_PATH}
225  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
226elif [ "$COMMAND" = "sampler" ] ; then
227  CLASS=org.apache.hadoop.mapred.lib.InputSampler
228  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
229else
230  CLASS=$COMMAND
231fi
232
233# cygwin path translation
234if $cygwin; then
235  CLASSPATH=`cygpath -p -w "$CLASSPATH"`
236  HADOOP_HOME=`cygpath -d "$HADOOP_HOME"`
237  HADOOP_LOG_DIR=`cygpath -d "$HADOOP_LOG_DIR"`
238  TOOL_PATH=`cygpath -p -w "$TOOL_PATH"`
239fi
240# setup 'java.library.path' for native-hadoop code if necessary
241JAVA_LIBRARY_PATH=''
242if [ -d "${HADOOP_HOME}/build/native" -o -d "${HADOOP_HOME}/lib/native" ]; then
243  JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} org.apache.hadoop.util.PlatformName | sed -e 's/ /_/g' | sed -e "s/ /_/g"`
244 
245  if [ -d "$HADOOP_HOME/build/native" ]; then
246    JAVA_LIBRARY_PATH=${HADOOP_HOME}/build/native/${JAVA_PLATFORM}/lib
247  fi
248 
249  if [ -d "${HADOOP_HOME}/lib/native" ]; then
250    if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
251      JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_HOME}/lib/native/${JAVA_PLATFORM}
252    else
253      JAVA_LIBRARY_PATH=${HADOOP_HOME}/lib/native/${JAVA_PLATFORM}
254    fi
255  fi
256fi
257
258# cygwin path translation
259if $cygwin; then
260  JAVA_LIBRARY_PATH=`cygpath -p "$JAVA_LIBRARY_PATH"`
261fi
262
263HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
264HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
265HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_HOME"
266HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
267HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
268if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
269  HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
270fi 
271
272# run it
273exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"
Note: See TracBrowser for help on using the repository browser.