source: sample/hadoop-0.16/tw/org/nchc/util/InstanceCounter.java @ 243

Last change on this file since 243 was 21, checked in by waue, 16 years ago

hadoop 0.16

File size: 3.9 KB
Line 
1/*
2 * Cloud9: A MapReduce Library for Hadoop
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you
5 * may not use this file except in compliance with the License. You may
6 * obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 * implied. See the License for the specific language governing
14 * permissions and limitations under the License.
15 */
16
17package tw.org.nchc.util;
18
19import java.util.ArrayList;
20import java.util.Collections;
21import java.util.List;
22import java.util.Map;
23import java.util.SortedSet;
24import java.util.TreeSet;
25
26/**
27 * A class for keeping track of the number of times an object has been
28 * encountered. This is useful for counting things in a stream, e.g., POS tags,
29 * terms, etc.
30 */
31public class InstanceCounter<T extends Comparable<T>> {
32
33  // internal representation---although the scores are doubles, counts are
34  // obviously integers
35  private ScoreSortedMap<T> mMap;
36
37  private int mTotalCount = 0;
38
39  /**
40   * Constructs an <code>InstanceCounter</code>.
41   */
42  public InstanceCounter() {
43    mMap = new ScoreSortedMap<T>();
44  }
45
46  /**
47   * Adds an instance to the set of observations.
48   *
49   * @param instance
50   *            the instance observed
51   */
52  public void count(T instance) {
53    if (mMap.containsKey(instance)) {
54      mMap.put(instance, mMap.get(instance) + 1);
55    } else {
56      mMap.put(instance, 1.0);
57    }
58    mTotalCount++;
59  }
60
61  /**
62   * Prints each instance and how many times its been observed, sorted by the
63   * counts.
64   */
65  public void printCounts() {
66    for (Map.Entry<T, Double> map : mMap.getSortedEntries()) {
67      System.out.println(map.getValue().intValue() + "\t" + map.getKey());
68    }
69  }
70
71  /**
72   * Returns a list of <code>InstanceCount</code> objects, sorted by count.
73   */
74  public List<InstanceCount> getCounts() {
75    List<InstanceCount> l = new ArrayList<InstanceCount>();
76
77    for (Map.Entry<T, Double> map : mMap.getSortedEntries()) {
78      l.add(new InstanceCount(map.getKey(), map.getValue().intValue(),
79          map.getValue() / (double) mTotalCount));
80    }
81
82    return Collections.unmodifiableList(l);
83  }
84
85  /**
86   * Returns the total number of observations.
87   *
88   * @return the total number of observations
89   */
90  public int getTotalCount() {
91    return mTotalCount;
92  }
93
94  /**
95   * Returns the number of times a particular instance has been observed.
96   *
97   * @param inst
98   *            the instance
99   * @return the count of the instance
100   */
101  public int getCount(T inst) {
102    if (mMap.containsKey(inst)) {
103      return mMap.get(inst).intValue();
104    }
105
106    return 0;
107  }
108
109  /**
110   * Returns a collection of all objects observed, sorted by their natural
111   * order.
112   *
113   * @return a collection of all objects observed, sorted by their natural
114   *         order.
115   */
116  public SortedSet<T> getObservedObjects() {
117    SortedSet<T> t = new TreeSet<T>();
118
119    for (T obj : mMap.keySet()) {
120      t.add(obj);
121    }
122
123    return t;
124  }
125
126  /**
127   * A class that holds an instance, its count, and its frequency.
128   */
129  public class InstanceCount {
130    private T mInstance;
131
132    private int mCount;
133
134    private double mFreq;
135
136    private InstanceCount(T instance, int cnt, double freq) {
137      mInstance = instance;
138      mCount = cnt;
139      mFreq = freq;
140    }
141
142    /**
143     * Returns the instance.
144     */
145    public T getInstance() {
146      return mInstance;
147    }
148
149    /**
150     * Returns the number of times the instance has been observed.
151     */
152    public int getCount() {
153      return mCount;
154    }
155
156    /**
157     * Returns the frequency that this instance has been observed. Frequency
158     * is the count divided by the total number of observed instances.
159     */
160    public double getFrequency() {
161      return mFreq;
162    }
163  }
164
165  public void clear() {
166    mMap.clear();
167  }
168
169}
Note: See TracBrowser for help on using the repository browser.