指令
$ /opt/hadoop/bin/hadoop dfs -mkdir input
$ /opt/hadoop/bin/hadoop dfs -put /opt/hadoop/README.txt /user/hadoop/input
$ /opt/hadoop/bin/hadoop dfs -ls input
Found 1 items
-rw-r--r-- 1 hadoop supergroup 1366 2010-09-28 11:13 /user/hadoop/input/README.txt
程式碼
package org.nchc.hadoop;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class HelloHadoop {
static public class HelloMapper extends
Mapper<LongWritable, Text, LongWritable, Text> {
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// 將出入資料 原封不動的寫入 輸出
context.write((LongWritable) key, (Text) value);
}
}
static public class HelloReducer extends
Reducer<LongWritable, Text, LongWritable, Text> {
public void reduce(LongWritable key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
Text val = new Text();
// 取回 val 的資料
for (Text str : values) {
val.set(str.toString());
}
// 將取回的資料引入輸出
context.write(key, val);
}
}
public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {
// 引入 $HADOOP_HOME/conf 內控制檔內的資料
Configuration conf = new Configuration();
// 宣告job 取得conf 並設定名稱 Hadoop Hello World
Job job = new Job(conf, "Hadoop Hello World");
// 設定此運算的主程式
job.setJarByClass(HelloHadoop.class);
// 設定輸入路徑
FileInputFormat.setInputPaths(job, "/user/hadoop/input");
// 設定輸出路徑
FileOutputFormat.setOutputPath(job, new Path("/user/hadoop/output-hh1"));
// 指定定map class
job.setMapperClass(HelloMapper.class);
// 指定reduce class
job.setReducerClass(HelloReducer.class);
// 開使運算
job.waitForCompletion(true);
}
}