source: sample/hadoop-0.17/tw/org/nchc/demo/DemoPackRecords.java @ 20

Last change on this file since 20 was 20, checked in by waue, 16 years ago

將改完的 hadoop 0.17版package 放來備份
目前繼續開發 hadoop 0.16 + hbase 1.3

File size: 2.6 KB
Line 
1/*
2 * Cloud9: A MapReduce Library for Hadoop
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you
5 * may not use this file except in compliance with the License. You may
6 * obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 * implied. See the License for the specific language governing
14 * permissions and limitations under the License.
15 */
16
17package tw.org.nchc.demo;
18
19import java.io.BufferedReader;
20import java.io.FileInputStream;
21import java.io.IOException;
22import java.io.InputStreamReader;
23
24import tw.org.nchc.tuple.Schema;
25import tw.org.nchc.tuple.Tuple;
26import tw.org.nchc.util.LocalTupleRecordWriter;
27
28/**
29 * <p>
30 * Demo that packs the sample collection into records using the tuple library,
31 * illustrating the use of the {@link tw.org.nchc.tuple.Tuple} class. The
32 * records are stored in a local SequenceFile; this file can then be transfered
33 * over to HDFS to serve as the starting point for a MapReduce operation.
34 * </p>
35 *
36 * <p>
37 * Each record is a tuple; the first field of the tuple is a String with the
38 * field name "text", which consists of the raw text of the record.
39 * </p>
40 *
41 * @see DemoPackRecords2
42 * @see DemoReadPackedRecords
43 */
44public class DemoPackRecords {
45  private DemoPackRecords() {
46  }
47
48  // define the tuple schema for the input record
49  private static final Schema RECORD_SCHEMA = new Schema();
50  static {
51    RECORD_SCHEMA.addField("text", String.class, "");
52  }
53
54  // instantiate a single tuple
55  private static Tuple tuple = RECORD_SCHEMA.instantiate();
56
57  /**
58   * Runs the demo.
59   */
60  public static void main(String[] args) throws IOException {
61    String infile = "../umd-hadoop-dist/sample-input/bible+shakes.nopunc";
62    String outfile = "../umd-hadoop-dist/sample-input/bible+shakes.nopunc.packed";
63
64    // create LocalTupleRecordWriter to write tuples to a local SequenceFile
65    LocalTupleRecordWriter writer = new LocalTupleRecordWriter(outfile);
66
67    // read in raw text records, line separated
68    BufferedReader data = new BufferedReader(new InputStreamReader(
69        new FileInputStream(infile)));
70
71    String line;
72    while ((line = data.readLine()) != null) {
73      // write the record
74      tuple.set(0, line);
75      writer.add(tuple);
76    }
77
78    data.close();
79    writer.close();
80
81    System.out.println("Wrote " + writer.getRecordCount() + " records.");
82  }
83}
Note: See TracBrowser for help on using the repository browser.