Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

DemoPackRecords.java @ 20

Last change on this file since 20 was 20, checked in by waue, 16 years ago
將改完的 hadoop 0.17版package 放來備份目前繼續開發 hadoop 0.16 + hbase 1.3
File size: 2.6 KB

Line
1	/*
2	* Cloud9: A MapReduce Library for Hadoop
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License"); you
5	* may not use this file except in compliance with the License. You may
6	* obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13	* implied. See the License for the specific language governing
14	* permissions and limitations under the License.
15	*/
16
17	package tw.org.nchc.demo;
18
19	import java.io.BufferedReader;
20	import java.io.FileInputStream;
21	import java.io.IOException;
22	import java.io.InputStreamReader;
23
24	import tw.org.nchc.tuple.Schema;
25	import tw.org.nchc.tuple.Tuple;
26	import tw.org.nchc.util.LocalTupleRecordWriter;
27
28	/**
29	* <p>
30	* Demo that packs the sample collection into records using the tuple library,
31	* illustrating the use of the {@link tw.org.nchc.tuple.Tuple} class. The
32	* records are stored in a local SequenceFile; this file can then be transfered
33	* over to HDFS to serve as the starting point for a MapReduce operation.
34	* </p>
35	*
36	* <p>
37	* Each record is a tuple; the first field of the tuple is a String with the
38	* field name "text", which consists of the raw text of the record.
39	* </p>
40	*
41	* @see DemoPackRecords2
42	* @see DemoReadPackedRecords
43	*/
44	public class DemoPackRecords {
45	private DemoPackRecords() {
46	}
47
48	// define the tuple schema for the input record
49	private static final Schema RECORD_SCHEMA = new Schema();
50	static {
51	RECORD_SCHEMA.addField("text", String.class, "");
52	}
53
54	// instantiate a single tuple
55	private static Tuple tuple = RECORD_SCHEMA.instantiate();
56
57	/**
58	* Runs the demo.
59	*/
60	public static void main(String[] args) throws IOException {
61	String infile = "../umd-hadoop-dist/sample-input/bible+shakes.nopunc";
62	String outfile = "../umd-hadoop-dist/sample-input/bible+shakes.nopunc.packed";
63
64	// create LocalTupleRecordWriter to write tuples to a local SequenceFile
65	LocalTupleRecordWriter writer = new LocalTupleRecordWriter(outfile);
66
67	// read in raw text records, line separated
68	BufferedReader data = new BufferedReader(new InputStreamReader(
69	new FileInputStream(infile)));
70
71	String line;
72	while ((line = data.readLine()) != null) {
73	// write the record
74	tuple.set(0, line);
75	writer.add(tuple);
76	}
77
78	data.close();
79	writer.close();
80
81	System.out.println("Wrote " + writer.getRecordCount() + " records.");
82	}
83	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: sample/hadoop-0.17/tw/org/nchc/demo/DemoPackRecords.java @ 20

Download in other formats: