/*
* Cloud9: A MapReduce Library for Hadoop
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package tw.org.nchc.tuple;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.io.Writable;
/**
*
* Description of a Tuple's structure. The Schema class keeps track of column
* names, data types, and default values. The following code fragment
* illustrates the use of this class:
*
*
*
* public static final Schema MYSCHEMA = new Schema();
* static {
* MYSCHEMA.addField("token", String.class, "");
* MYSCHEMA.addField("int", Integer.class, new Integer(1));
* }
*
*
*
* The following field types are allowed:
*
*
*
* - Basic Java primitives: Boolean, Integer, Long, Float, Double, String
* - Classes that implement Writable
*
*
*
* Schema instances can be locked to prevent further changes. Any attempt to
* alter a locked Schema will result in a runtime exception being thrown. If a
* Schema is not locked, callers are free to add new fields and edit default
* values.
*
*
*
* New Tuple instances can be created directly from Schema objects through the
* use of the {@link #instantiate()} method. A call to that method implicitly
* locks the Schema.
*
*
*
* Acknowledgments: much of this code was adapted from the Prefuse Visualization Toolkit.
*
*
*/
public class Schema implements Cloneable {
private String[] mFieldNames;
private Class>[] mFieldTypes;
private Object[] mDefaultValues;
private Map mFieldLookup;
private int mFieldCount;
private boolean mLocked;
// ------------------------------------------------------------------------
// Constructors
/**
* Creates a new empty Schema.
*/
public Schema() {
this(10);
}
/**
* Creates a new empty Schema with a starting capacity for a given number of
* fields.
*
* @param n
* the number of columns in this schema
*/
public Schema(int n) {
mFieldNames = new String[n];
mFieldTypes = new Class>[n];
mDefaultValues = new Object[n];
mFieldCount = 0;
mLocked = false;
}
/**
* Create a new Schema consisting of the given field names and types.
*
* @param names
* the field names
* @param types
* the field types (as Class instances)
*/
public Schema(String[] names, Class>[] types) {
this(names.length);
// check the schema validity
if (names.length != types.length) {
throw new IllegalArgumentException(
"Input arrays should be the same length");
}
for (int i = 0; i < names.length; ++i) {
addField(names[i], types[i], null);
}
}
/**
* Create a new Schema consisting of the given field names, types, and
* default field values.
*
* @param names
* the field names
* @param types
* the field types (as Class instances)
* @param defaults
* the default values for each field
*/
public Schema(String[] names, Class>[] types, Object[] defaults) {
this(names.length);
// check the schema validity
if (names.length != types.length || types.length != defaults.length) {
throw new IllegalArgumentException(
"Input arrays should be the same length");
}
for (int i = 0; i < names.length; ++i) {
addField(names[i], types[i], defaults[i]);
}
}
/**
* Creates a copy of this Schema. Cloned copies of a locked Schema will not
* inherit the locked status.
*
* @see java.lang.Object#clone()
*/
public Object clone() {
Schema s = new Schema(mFieldCount);
for (int i = 0; i < mFieldCount; ++i) {
s.addField(mFieldNames[i], mFieldTypes[i], mDefaultValues[i]);
}
return s;
}
/**
* Lazily construct the lookup table for this schema. Used to accelerate
* name-based lookups of schema information.
*/
protected void initLookup() {
mFieldLookup = new HashMap();
for (int i = 0; i < mFieldNames.length; ++i) {
mFieldLookup.put(mFieldNames[i], new Integer(i));
}
}
// ------------------------------------------------------------------------
// Accessors / Mutators
/**
* Locks the Schema, preventing any additional changes. Locked Schemas
* cannot be unlocked! Cloned copies of a locked schema will not inherit
* this locked status.
*
* @return a reference to this schema
*/
public Schema lockSchema() {
mLocked = true;
return this;
}
/**
* Checks if this schema is locked. Locked Schemas can not be edited.
*
* @return true if this Schema is locked, false otherwise
*/
public boolean isLocked() {
return mLocked;
}
/**
* Adds a field to this Schema.
*
* @param name
* the field name
* @param type
* the field type (as a Class instance)
* @throws IllegalArgumentException
* if either name or type are null or the name already exists in
* this schema.
*/
public void addField(String name, Class> type) {
addField(name, type, null);
}
/**
* Adds a field to this schema.
*
* @param name
* the field name
* @param type
* the field type (as a Class instance)
* @throws IllegalArgumentException
* if either name or type are null or the name already exists in
* this schema.
*/
public void addField(String name, Class> type, Object defaultValue) {
if (!(type == Integer.class || type == Boolean.class
|| type == Long.class || type == Float.class
|| type == Double.class || type == String.class || (!type
.isInterface() && Writable.class.isAssignableFrom(type)))) {
throw new SchemaException("Illegal field type: "
+ type.getCanonicalName());
}
// check lock status
if (mLocked) {
throw new IllegalStateException(
"Can not add column to a locked Schema.");
}
// check for validity
if (name == null) {
throw new IllegalArgumentException(
"Null column names are not allowed.");
}
if (type == null) {
throw new IllegalArgumentException(
"Null column types are not allowed.");
}
for (int i = 0; i < mFieldCount; ++i) {
if (mFieldNames[i].equals(name)) {
throw new IllegalArgumentException(
"Duplicate column names are not allowed: "
+ mFieldNames[i]);
}
}
// resize if necessary
if (mFieldNames.length == mFieldCount) {
int capacity = (3 * mFieldNames.length) / 2 + 1;
String[] names = new String[capacity];
Class>[] types = new Class[capacity];
Object[] dflts = new Object[capacity];
System.arraycopy(mFieldNames, 0, names, 0, mFieldCount);
System.arraycopy(mFieldTypes, 0, types, 0, mFieldCount);
System.arraycopy(mDefaultValues, 0, dflts, 0, mFieldCount);
mFieldNames = names;
mFieldTypes = types;
mDefaultValues = dflts;
}
mFieldNames[mFieldCount] = name;
mFieldTypes[mFieldCount] = type;
mDefaultValues[mFieldCount] = defaultValue;
if (mFieldLookup != null)
mFieldLookup.put(name, new Integer(mFieldCount));
mFieldCount++;
}
/**
* Returns the number of fields in this Schema.
*
* @return the number of fields in this Schema
*/
public int getFieldCount() {
return mFieldCount;
}
/**
* Returns the name of the field at the given position.
*
* @param index
* the field index
* @return the field name
*/
public String getFieldName(int index) {
return mFieldNames[index];
}
/**
* Returns the position of a field given its name.
*
* @param field
* the field name
* @return the field position index
*/
public int getFieldIndex(String field) {
if (mFieldLookup == null)
initLookup();
Integer idx = (Integer) mFieldLookup.get(field);
return (idx == null ? -1 : idx.intValue());
}
/**
* Returns the type of the field at the given position.
*
* @param index
* the column index
* @return the column type
*/
public Class> getFieldType(int index) {
return mFieldTypes[index];
}
/**
* Returns the type of the field given its name.
*
* @param field
* the field name
* @return the field type
*/
public Class> getFieldType(String field) {
int idx = getFieldIndex(field);
return (idx < 0 ? null : mFieldTypes[idx]);
}
/**
* Returns the default value of the field at the given position.
*
* @param index
* the field index
* @return the field's default value
*/
public Object getDefault(int index) {
return mDefaultValues[index];
}
/**
* Returns the default value of the field with the given name.
*
* @param field
* the field name
* @return the field's default value
*/
public Object getDefault(String field) {
int idx = getFieldIndex(field);
return (idx < 0 ? null : mDefaultValues[idx]);
}
/**
* Sets the default value for the given field.
*
* @param index
* the index position of the field to set the default for
* @param val
* the new default value
*/
public void setDefault(int index, Object val) {
// check lock status
if (mLocked) {
throw new IllegalStateException(
"Can not update default values of a locked Schema.");
}
mDefaultValues[index] = val;
}
/**
* Sets the default value for the given field.
*
* @param field
* the name of field to set the default for
* @param val
* the new default value
*/
public void setDefault(String field, Object val) {
// check lock status
if (mLocked) {
throw new IllegalStateException(
"Can not update default values of a locked Schema.");
}
int idx = getFieldIndex(field);
mDefaultValues[idx] = val;
}
/**
* Sets the default value for the given field as an int
.
*
* @param field
* the name of field to set the default for
* @param val
* the new default value
*/
public void setDefault(String field, int val) {
setDefault(field, new Integer(val));
}
/**
* Set the default value for the given field as a long
.
*
* @param field
* the name of field to set the default for
* @param val
* the new default value
*/
public void setDefault(String field, long val) {
setDefault(field, new Long(val));
}
/**
* Set the default value for the given field as a float
.
*
* @param field
* the name of field to set the default for
* @param val
* the new default value
*/
public void setDefault(String field, float val) {
setDefault(field, new Float(val));
}
/**
* Set the default value for the given field as a double
.
*
* @param field
* the name of field to set the default for
* @param val
* the new default value
*/
public void setDefault(String field, double val) {
setDefault(field, new Double(val));
}
/**
* Set the default value for the given field as a boolean
.
*
* @param field
* the name of field to set the default for
* @param val
* the new default value
*/
public void setDefault(String field, boolean val) {
setDefault(field, val ? Boolean.TRUE : Boolean.FALSE);
}
// ------------------------------------------------------------------------
// Comparison and Hashing
/**
* Compares this Schema with another one for equality.
*/
public boolean equals(Object o) {
if (!(o instanceof Schema))
return false;
Schema s = (Schema) o;
if (mFieldCount != s.getFieldCount())
return false;
for (int i = 0; i < mFieldCount; ++i) {
if (!(mFieldNames[i].equals(s.getFieldName(i))
&& mFieldTypes[i].equals(s.getFieldType(i)) && mDefaultValues[i]
.equals(s.getDefault(i)))) {
return false;
}
}
return true;
}
/**
* Computes a hashcode for this schema.
*/
public int hashCode() {
int hashcode = 0;
for (int i = 0; i < mFieldCount; ++i) {
int idx = i + 1;
int code = idx * mFieldNames[i].hashCode();
code ^= idx * mFieldTypes[i].hashCode();
if (mDefaultValues[i] != null)
code ^= mDefaultValues[i].hashCode();
hashcode ^= code;
}
return hashcode;
}
/**
* Returns a descriptive String for this schema.
*/
public String toString() {
StringBuffer sbuf = new StringBuffer();
sbuf.append("Schema[");
for (int i = 0; i < mFieldCount; ++i) {
if (i > 0)
sbuf.append(' ');
sbuf.append('(').append(mFieldNames[i]).append(", ");
sbuf.append(mFieldTypes[i].getName()).append(", ");
sbuf.append(mDefaultValues[i]).append(')');
}
sbuf.append(']');
return sbuf.toString();
}
// ------------------------------------------------------------------------
// Tuple Operations
/**
* Instantiate a new Tuple instance with this Schema. Fields of the newly
* instantiated Tuple are set to default value.
*
* @return a new Tuple with this Schema
*/
public Tuple instantiate() {
lockSchema();
Object[] objects = new Object[mFieldCount];
System.arraycopy(mDefaultValues, 0, objects, 0, mFieldCount);
String[] symbols = new String[mFieldCount];
String[] fields = new String[mFieldCount];
System.arraycopy(mFieldNames, 0, fields, 0, mFieldCount);
Class>[] types = new Class>[mFieldCount];
System.arraycopy(mFieldTypes, 0, types, 0, mFieldCount);
return new Tuple(objects, symbols, fields, types);
}
/**
* Instantiate a new Tuple instance with this Schema.
*
* @param objects
* values of each field
* @return a new Tuple with this Schema
*/
public Tuple instantiate(Object... objects) {
lockSchema();
String[] symbols = new String[mFieldCount];
String[] fields = new String[mFieldCount];
System.arraycopy(mFieldNames, 0, fields, 0, mFieldCount);
Class>[] types = new Class[mFieldCount];
System.arraycopy(mFieldTypes, 0, types, 0, mFieldCount);
return new Tuple(objects, symbols, fields, types);
}
} // end of class Schema