/* * Cloud9: A MapReduce Library for Hadoop * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package tw.org.nchc.tuple; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; /** *
* Class that represents a tuple in Hadoop's data type system. Tuples are * instantiated from a {@link Schema}. The Tuple class implements * WritableComparable, so it can be directly used as MapReduce keys and values. * The natural sort order of tuples is defined by an internally-generated byte * representation and is not based on field values. This class, combined with * {@link ListWritable}, allows the user to define arbitrarily complex data * structures. *
* *
* All fields can either be indexed via its integer position or its field name.
* Each field is typed, which can be determined via {@link #getFieldType(int)}.
* Fields can either contain an object of the specified type or a special symbol
* String. The method {@link #containsSymbol(int)} can be used to check if a
* field contains a special symbol. If the field contains a special symbol,
* {@link #get(int)} will return null
. If the field does not
* contain a special symbol, {@link #getSymbol(int)} will return
* null
.
*
* Here is a typical usage scenario for special symbols: say you had tuples that
* represented count(a, b)
, where a
and
* b
are tokens you observe. There is often a need to compute
* count(a, *)
, for example, to derive conditional
* probabilities. In this case, you can use a special symbol to represent the
* *
, and distinguish it from the lexical token '*
'.
*
* The natural sort order of the Tuple is defined by {@link #compareTo(Object)}. * Tuples are sorted by field, with special symbols always appearing first * within each field. *
* * @see ListWritable * @see Schema * */ public class Tuple implements WritableComparable { protected static final byte SYMBOL = 0; protected static final byte INT = 1; protected static final byte BOOLEAN = 2; protected static final byte LONG = 3; protected static final byte FLOAT = 4; protected static final byte DOUBLE = 5; protected static final byte STRING = 6; protected static final byte WRITABLE = 7; private Object[] mObjects; private String[] mSymbols; private String[] mFields; private Class>[] mTypes; private Mapnull
if the field contains a special symbol.
*
* @param i
* field position
* @return object at field, or null
if the field contains a
* special symbol
*/
public Object get(int i) {
return mObjects[i];
}
/**
* Returns object at a particular field (by name) in this Tuple. Returns
* null
if the field contains a special symbol.
*
* @param field
* field name
* @return object at field, or null
if the field contains a
* special symbol
*/
public Object get(String field) {
if (mFieldLookup == null)
initLookup();
if (!mFieldLookup.containsKey(field)) {
throw new TupleException("Field '" + field + "' does not exist!");
}
return get(mFieldLookup.get(field));
}
/**
* Returns special symbol at a particular field (by position). Returns
* null
if the field does not contain a special symbol.
*
* @param i
* field position
* @return special symbol at field, or null
if the field does
* not contain a special symbol.
*/
public String getSymbol(int i) {
if (mObjects[i] != null)
return null;
return mSymbols[i];
}
/**
* Returns special symbol at a particular field (by name). Returns
* null
if the field does not contain a special symbol.
*
* @param field
* field name
* @return special symbol at field, or null
if the field does
* not contain a special symbol.
*/
public String getSymbol(String field) {
if (mFieldLookup == null)
initLookup();
if (!mFieldLookup.containsKey(field)) {
throw new TupleException("Field '" + field + "' does not exist!");
}
return getSymbol(mFieldLookup.get(field));
}
/**
* Determines if a particular field (by position) contains a special symbol.
*
* @param i
* field position
* @return true
if the field contains a special symbol, or
* false
otherwise
*/
public boolean containsSymbol(int i) {
return mObjects[i] == null;
}
/**
* Determines if a particular field (by name) contains a special symbol.
*
* @param field
* field name
* @return true
if the field contains a special symbol, or
* false
otherwise
*/
public boolean containsSymbol(String field) {
if (mFieldLookup == null)
initLookup();
if (!mFieldLookup.containsKey(field)) {
throw new TupleException("Field '" + field + "' does not exist!");
}
return containsSymbol(mFieldLookup.get(field));
}
/**
* Returns the type of a particular field (by position).
*
* @param i
* field position
* @return type of the field
*/
public Class> getFieldType(int i) {
return mTypes[i];
}
/**
* Returns the type of a particular field (by name).
*
* @param field
* field name
* @return type of the field
*/
public Class> getFieldType(String field) {
if (mFieldLookup == null)
initLookup();
if (!mFieldLookup.containsKey(field)) {
throw new TupleException("Field '" + field + "' does not exist!");
}
return getFieldType(mFieldLookup.get(field));
}
public int getFieldCount() {
return mFields.length;
}
/**
* Lazily construct the lookup table for this schema. Used to accelerate
* name-based lookups of schema information.
*/
private void initLookup() {
mFieldLookup = new HashMap
* Defines a natural sort order for the Tuple class. Following standard
* convention, this method returns a value less than zero, a value greater
* than zero, or zero if this Tuple should be sorted before, sorted after,
* or is equal to obj
. The sort order is defined as follows:
*
obj
.
*/
public int compareTo(Object obj) {
Tuple that = (Tuple) obj;
// iterate through the fields
for (int i = 0; i < this.getFieldCount(); i++) {
// if both contain special symbol, then sort special symbols
if (this.containsSymbol(i) && that.containsSymbol(i)) {
String thisSymbol = this.getSymbol(i);
String thatSymbol = that.getSymbol(i);
// special symbols identical; move to next field
if (!thisSymbol.equals(thatSymbol)) {
return thisSymbol.compareTo(thatSymbol);
}
} else {
// special symbols always come first
if (this.containsSymbol(i))
return -1;
if (that.containsSymbol(i))
return 1;
@SuppressWarnings("unchecked")
Comparable