Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

SequenceFileProcessor.java @ 53

Last change on this file since 53 was 20, checked in by waue, 16 years ago
將改完的 hadoop 0.17版package 放來備份目前繼續開發 hadoop 0.16 + hbase 1.3
File size: 4.3 KB

Line
1	/**
2	* Program: BuildHTable.java
3	* Editor: Waue Chen
4	* From : NCHC. Taiwn
5	* Last Update Date: 07/02/2008
6	* Upgrade to 0.17
7	* Re-code from : Cloud9: A MapReduce Library for Hadoop
8	*/
9
10
11	package tw.org.nchc.util;
12
13	import java.io.IOException;
14
15	import org.apache.hadoop.fs.FileStatus;
16	import org.apache.hadoop.fs.FileSystem;
17	import org.apache.hadoop.fs.Path;
18	import org.apache.hadoop.io.SequenceFile;
19	import org.apache.hadoop.io.Writable;
20	import org.apache.hadoop.io.WritableComparable;
21	import org.apache.hadoop.mapred.JobConf;
22
23	/**
24	* Upgrade from hadoop 0.16 to 0.17
25	* <p>
26	* Harness for processing one or more {@link SequenceFile}s within a single
27	* process. This class is useful when you want to iterate through all key-value
28	* pairs in a SequenceFile outside the context of a MapReduce task (or where
29	* writing the computation as a MapReduce would be overkill). One example usage
30	* case is to sum up all the values in a SequenceFile — this may be useful
31	* if you want to make sure probabilities sum to one. Here's the code fragment
32	* that would accomplish this:
33	* </p>
34	*
35	* <pre>
36	* KeyValueProcess<Tuple, FloatWritable> process = SequenceFileProcessor
37	* .<Tuple, FloatWritable> process("foo",
38	* new KeyValueProcess<Tuple, FloatWritable>() {
39	* public float sum = 0.0f;
40	*
41	* public void process(Tuple tuple, FloatWritable f) {
42	* sum += f.get();
43	* }
44	*
45	* public void report() {
46	* setProperty("sum", sum);
47	* }
48	* });
49	*
50	* float sum = (Float) process.getProperty("sum");
51	* </pre>
52	*
53	* <p>
54	* The static method takes a path and and a {@link KeyValueProcess}. This
55	* example uses an anonymous inner class to make the code more concise; the
56	* static method returns the <code>KeyValueProcess</code> so that you can
57	* retrieve results from it. The path can either be a file or a directory; if it
58	* is a directory, all files in that directory are processed.
59	* </p>
60	*
61	* @param <K>
62	* type of key
63	* @param <V>
64	* type of value
65	*/
66	public class SequenceFileProcessor<K extends WritableComparable, V extends Writable> {
67
68	private Path mPath;
69	private JobConf conf;
70	private KeyValueProcess<K, V> mProcessor;
71	private SequenceFile.Reader mReader;
72	private K mKey;
73	private V mValue;
74
75	/**
76	* Processes one or more <code>SequenceFile</code>s. The
77	* {@link KeyValueProcess} is applied to every key-value pair in the file if
78	* <code>path</code> denotes a file, or all files in the directory if
79	* <code>path</code> denotes a directory.
80	*
81	* @param <K1>
82	* type of key
83	* @param <V1>
84	* type of value
85	* @param path
86	* either a file or a directory
87	* @param p
88	* the KeyValueProcess to apply
89	* @return the KeyValueProcess applied
90	*/
91	public static <K1 extends WritableComparable, V1 extends Writable> KeyValueProcess<K1, V1> process(
92	String path, KeyValueProcess<K1, V1> p) {
93
94	try {
95	SequenceFileProcessor<K1, V1> processor = new SequenceFileProcessor<K1, V1>(
96	path, p);
97	processor.run();
98	} catch (Exception e) {
99	e.printStackTrace();
100	}
101
102	return p;
103	}
104
105	private SequenceFileProcessor(String location, KeyValueProcess<K, V> p)
106	throws IOException {
107
108	mPath = new Path(location);
109	conf = new JobConf();
110
111	mProcessor = p;
112
113	}
114
115	private void run() throws IOException {
116	if (!FileSystem.get(conf).isFile(mPath)) {
117	Path[] pa = new Path[] { mPath };
118	Path p;
119	// hadoop 0.17 -> listStatus();
120	FileStatus[] fi = FileSystem.get(conf).listStatus(pa);
121	for (int i =0 ; i<fi.length ; i++) {
122	p = fi[i].getPath();
123	// System.out.println("Applying to " + p);
124	applyToFile(p);
125	}
126	} else {
127	applyToFile(mPath);
128	}
129
130	}
131
132	@SuppressWarnings("unchecked")
133	private void applyToFile(Path path) throws IOException {
134	mReader = new SequenceFile.Reader(FileSystem.get(conf), path, conf);
135
136	try {
137	mKey = (K) mReader.getKeyClass().newInstance();
138	mValue = (V) mReader.getValueClass().newInstance();
139	} catch (Exception e) {
140	e.printStackTrace();
141	}
142
143	while (mReader.next(mKey, mValue) == true) {
144	mProcessor.process(mKey, mValue);
145	}
146
147	mReader.close();
148	mProcessor.report();
149	}
150	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: sample/hadoop-0.17/tw/org/nchc/util/SequenceFileProcessor.java @ 53

Download in other formats: