Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

DemoPackRecords.java @ 57

Last change on this file since 57 was 20, checked in by waue, 16 years ago
將改完的 hadoop 0.17版package 放來備份目前繼續開發 hadoop 0.16 + hbase 1.3
File size: 2.6 KB

Rev	Line
[20]	1	/*
	2	* Cloud9: A MapReduce Library for Hadoop
	3	*
	4	* Licensed under the Apache License, Version 2.0 (the "License"); you
	5	* may not use this file except in compliance with the License. You may
	6	* obtain a copy of the License at
	7	*
	8	* http://www.apache.org/licenses/LICENSE-2.0
	9	*
	10	* Unless required by applicable law or agreed to in writing, software
	11	* distributed under the License is distributed on an "AS IS" BASIS,
	12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
	13	* implied. See the License for the specific language governing
	14	* permissions and limitations under the License.
	15	*/
	16
	17	package tw.org.nchc.demo;
	18
	19	import java.io.BufferedReader;
	20	import java.io.FileInputStream;
	21	import java.io.IOException;
	22	import java.io.InputStreamReader;
	23
	24	import tw.org.nchc.tuple.Schema;
	25	import tw.org.nchc.tuple.Tuple;
	26	import tw.org.nchc.util.LocalTupleRecordWriter;
	27
	28	/**
	29	* <p>
	30	* Demo that packs the sample collection into records using the tuple library,
	31	* illustrating the use of the {@link tw.org.nchc.tuple.Tuple} class. The
	32	* records are stored in a local SequenceFile; this file can then be transfered
	33	* over to HDFS to serve as the starting point for a MapReduce operation.
	34	* </p>
	35	*
	36	* <p>
	37	* Each record is a tuple; the first field of the tuple is a String with the
	38	* field name "text", which consists of the raw text of the record.
	39	* </p>
	40	*
	41	* @see DemoPackRecords2
	42	* @see DemoReadPackedRecords
	43	*/
	44	public class DemoPackRecords {
	45	private DemoPackRecords() {
	46	}
	47
	48	// define the tuple schema for the input record
	49	private static final Schema RECORD_SCHEMA = new Schema();
	50	static {
	51	RECORD_SCHEMA.addField("text", String.class, "");
	52	}
	53
	54	// instantiate a single tuple
	55	private static Tuple tuple = RECORD_SCHEMA.instantiate();
	56
	57	/**
	58	* Runs the demo.
	59	*/
	60	public static void main(String[] args) throws IOException {
	61	String infile = "../umd-hadoop-dist/sample-input/bible+shakes.nopunc";
	62	String outfile = "../umd-hadoop-dist/sample-input/bible+shakes.nopunc.packed";
	63
	64	// create LocalTupleRecordWriter to write tuples to a local SequenceFile
	65	LocalTupleRecordWriter writer = new LocalTupleRecordWriter(outfile);
	66
	67	// read in raw text records, line separated
	68	BufferedReader data = new BufferedReader(new InputStreamReader(
	69	new FileInputStream(infile)));
	70
	71	String line;
	72	while ((line = data.readLine()) != null) {
	73	// write the record
	74	tuple.set(0, line);
	75	writer.add(tuple);
	76	}
	77
	78	data.close();
	79	writer.close();
	80
	81	System.out.println("Wrote " + writer.getRecordCount() + " records.");
	82	}
	83	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: sample/hadoop-0.17/tw/org/nchc/demo/DemoPackRecords.java @ 57

Download in other formats: