[20] | 1 | /* |
---|
| 2 | * Cloud9: A MapReduce Library for Hadoop |
---|
| 3 | * |
---|
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you |
---|
| 5 | * may not use this file except in compliance with the License. You may |
---|
| 6 | * obtain a copy of the License at |
---|
| 7 | * |
---|
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 9 | * |
---|
| 10 | * Unless required by applicable law or agreed to in writing, software |
---|
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
---|
| 13 | * implied. See the License for the specific language governing |
---|
| 14 | * permissions and limitations under the License. |
---|
| 15 | */ |
---|
| 16 | |
---|
| 17 | package tw.org.nchc.util; |
---|
| 18 | |
---|
| 19 | import java.util.ArrayList; |
---|
| 20 | import java.util.Collections; |
---|
| 21 | import java.util.Comparator; |
---|
| 22 | import java.util.HashMap; |
---|
| 23 | import java.util.Iterator; |
---|
| 24 | import java.util.List; |
---|
| 25 | import java.util.Map; |
---|
| 26 | import java.util.NoSuchElementException; |
---|
| 27 | import java.util.SortedSet; |
---|
| 28 | import java.util.TreeSet; |
---|
| 29 | |
---|
| 30 | /** |
---|
| 31 | * A Map that holds scores (doubles) associated with each object (key) and |
---|
| 32 | * supports iteration by score. Many applications call for this type of |
---|
| 33 | * functionality: the ability to associate scores with objects coupled with the |
---|
| 34 | * ability to sort entries by their scores. |
---|
| 35 | * |
---|
| 36 | * @param <K> |
---|
| 37 | * type of key |
---|
| 38 | */ |
---|
| 39 | public class ScoreSortedMap<K extends Comparable<K>> extends HashMap<K, Double> { |
---|
| 40 | |
---|
| 41 | private static final long serialVersionUID = 2983410765L; |
---|
| 42 | |
---|
| 43 | /** |
---|
| 44 | * Constructs a <code>ScoreSortedMap</code>. |
---|
| 45 | */ |
---|
| 46 | public ScoreSortedMap() { |
---|
| 47 | super(); |
---|
| 48 | } |
---|
| 49 | |
---|
| 50 | /** |
---|
| 51 | * Returns the all entries sorted by scores. |
---|
| 52 | * |
---|
| 53 | * @return a sorted set view of the entries sorted by scores |
---|
| 54 | */ |
---|
| 55 | public SortedSet<Map.Entry<K, Double>> getSortedEntries() { |
---|
| 56 | SortedSet<Map.Entry<K, Double>> entries = new TreeSet<Map.Entry<K, Double>>( |
---|
| 57 | new Comparator<Map.Entry<K, Double>>() { |
---|
| 58 | public int compare(Map.Entry<K, Double> e1, |
---|
| 59 | Map.Entry<K, Double> e2) { |
---|
| 60 | if (e1.getValue() > e2.getValue()) { |
---|
| 61 | return -1; |
---|
| 62 | } else if (e1.getValue() < e2.getValue()) { |
---|
| 63 | return 1; |
---|
| 64 | } |
---|
| 65 | return e1.getKey().compareTo(e2.getKey()); |
---|
| 66 | } |
---|
| 67 | }); |
---|
| 68 | |
---|
| 69 | for (Map.Entry<K, Double> entry : this.entrySet()) { |
---|
| 70 | entries.add(entry); |
---|
| 71 | } |
---|
| 72 | |
---|
| 73 | return Collections.unmodifiableSortedSet(entries); |
---|
| 74 | } |
---|
| 75 | |
---|
| 76 | /** |
---|
| 77 | * Returns the <i>n</i> top entries sorted by scores. |
---|
| 78 | * |
---|
| 79 | * @param n |
---|
| 80 | * number of entries to retrieve |
---|
| 81 | * @return a Set view of the entries sorted by scores |
---|
| 82 | */ |
---|
| 83 | public SortedSet<Map.Entry<K, Double>> getSortedEntries(int n) { |
---|
| 84 | |
---|
| 85 | SortedSet<Map.Entry<K, Double>> entries = new TreeSet<Map.Entry<K, Double>>( |
---|
| 86 | new Comparator<Map.Entry<K, Double>>() { |
---|
| 87 | public int compare(Map.Entry<K, Double> e1, |
---|
| 88 | Map.Entry<K, Double> e2) { |
---|
| 89 | if (e1.getValue() > e2.getValue()) { |
---|
| 90 | return -1; |
---|
| 91 | } else if (e1.getValue() < e2.getValue()) { |
---|
| 92 | return 1; |
---|
| 93 | } |
---|
| 94 | return e1.getKey().compareTo(e2.getKey()); |
---|
| 95 | } |
---|
| 96 | }); |
---|
| 97 | |
---|
| 98 | int cnt = 0; |
---|
| 99 | for (Map.Entry<K, Double> entry : getSortedEntries()) { |
---|
| 100 | entries.add(entry); |
---|
| 101 | cnt++; |
---|
| 102 | if (cnt >= n) |
---|
| 103 | break; |
---|
| 104 | } |
---|
| 105 | |
---|
| 106 | return Collections.unmodifiableSortedSet(entries); |
---|
| 107 | } |
---|
| 108 | |
---|
| 109 | /** |
---|
| 110 | * Returns the top-scoring entry. |
---|
| 111 | * |
---|
| 112 | * @return the top-scoring entry |
---|
| 113 | */ |
---|
| 114 | public Map.Entry<K, Double> getTopEntry() { |
---|
| 115 | return getSortedEntries().first(); |
---|
| 116 | } |
---|
| 117 | |
---|
| 118 | /** |
---|
| 119 | * Returns the <i>i</i>th scoring entry. |
---|
| 120 | * |
---|
| 121 | * @param i |
---|
| 122 | * the rank |
---|
| 123 | * @return the <i>i</i>th scoring entry |
---|
| 124 | */ |
---|
| 125 | public Map.Entry<K, Double> getEntryByRank(int i) { |
---|
| 126 | if (i > this.size()) |
---|
| 127 | throw new NoSuchElementException("Error: index out of bounds"); |
---|
| 128 | |
---|
| 129 | Iterator<Map.Entry<K, Double>> iter = getSortedEntries().iterator(); |
---|
| 130 | |
---|
| 131 | int n = 0; |
---|
| 132 | while (n++ < i - 1) |
---|
| 133 | iter.next(); |
---|
| 134 | |
---|
| 135 | return iter.next(); |
---|
| 136 | } |
---|
| 137 | |
---|
| 138 | /** |
---|
| 139 | * Returns a list of the keys, sorted by score. |
---|
| 140 | * |
---|
| 141 | * @return a list of the keys, sorted by score |
---|
| 142 | */ |
---|
| 143 | public List<K> getSortedKeys() { |
---|
| 144 | List<K> list = new ArrayList<K>(); |
---|
| 145 | |
---|
| 146 | for (Map.Entry<K, Double> entry : getSortedEntries()) { |
---|
| 147 | list.add(entry.getKey()); |
---|
| 148 | } |
---|
| 149 | |
---|
| 150 | return list; |
---|
| 151 | } |
---|
| 152 | |
---|
| 153 | /** |
---|
| 154 | * Normalizes all scores to a value between zero and one. Note that if all |
---|
| 155 | * keys have a single score, no action is performed. |
---|
| 156 | */ |
---|
| 157 | public void normalizeScores() { |
---|
| 158 | double max = Double.NEGATIVE_INFINITY; |
---|
| 159 | double min = Double.POSITIVE_INFINITY; |
---|
| 160 | |
---|
| 161 | for (Map.Entry<K, Double> entry : this.entrySet()) { |
---|
| 162 | double score = entry.getValue(); |
---|
| 163 | |
---|
| 164 | if (score > max) |
---|
| 165 | max = score; |
---|
| 166 | |
---|
| 167 | if (score < min) |
---|
| 168 | min = score; |
---|
| 169 | |
---|
| 170 | } |
---|
| 171 | |
---|
| 172 | // if there's only one value, then meaningless to normalize |
---|
| 173 | if (max == min) |
---|
| 174 | return; |
---|
| 175 | |
---|
| 176 | for (Map.Entry<K, Double> entry : this.entrySet()) { |
---|
| 177 | K cur = entry.getKey(); |
---|
| 178 | double score = entry.getValue(); |
---|
| 179 | |
---|
| 180 | this.put(cur, (score - min) / (max - min)); |
---|
| 181 | } |
---|
| 182 | |
---|
| 183 | } |
---|
| 184 | |
---|
| 185 | /** |
---|
| 186 | * Returns a new <code>ScoreSortedMap</code> where the score of each key |
---|
| 187 | * in this object has been linearly interpolated with scores drawn from |
---|
| 188 | * another <code>ScoreSortedMap</code>. A weight of <code>lambda</code> |
---|
| 189 | * is given to the score from this object, and a weight of (1-<code>lambda</code>) |
---|
| 190 | * is given to the score from the other <code>ScoreSortedMap</code>. Both |
---|
| 191 | * <code>ScoreSortedMap</code>s are first normalized. Note that if a key |
---|
| 192 | * is not contained in this object, but present in the other |
---|
| 193 | * <code>ScoreSortedMap</code>, it will <b>not</b> be present in the new |
---|
| 194 | * <code>ScoreSortedMap</code>. |
---|
| 195 | * |
---|
| 196 | * @param s |
---|
| 197 | * the other <code>ScoreSortedMap</code> |
---|
| 198 | * @param lambda |
---|
| 199 | * weight assigned to scores from this object |
---|
| 200 | * @return a new <code>ScoreSortedMap</code> with linearly-interpolated |
---|
| 201 | * scores |
---|
| 202 | */ |
---|
| 203 | public ScoreSortedMap<K> linearInterpolationWith(ScoreSortedMap<K> s, |
---|
| 204 | double lambda) { |
---|
| 205 | this.normalizeScores(); |
---|
| 206 | s.normalizeScores(); |
---|
| 207 | |
---|
| 208 | ScoreSortedMap<K> entries = new ScoreSortedMap<K>(); |
---|
| 209 | |
---|
| 210 | for (Map.Entry<K, Double> entry : getSortedEntries()) { |
---|
| 211 | double score1 = entry.getValue(); |
---|
| 212 | double score2 = 0.0d; |
---|
| 213 | |
---|
| 214 | if (s.containsKey(entry.getKey())) { |
---|
| 215 | score2 = s.get(entry.getKey()); |
---|
| 216 | } |
---|
| 217 | |
---|
| 218 | double newscore = lambda * score1 + (1 - lambda) * score2; |
---|
| 219 | // System.out.println(lambda + " * " + score1 + " + (1-" + lambda + |
---|
| 220 | // ") * " + score2 + " = " + newscore); |
---|
| 221 | entries.put(entry.getKey(), newscore); |
---|
| 222 | } |
---|
| 223 | |
---|
| 224 | return entries; |
---|
| 225 | } |
---|
| 226 | |
---|
| 227 | } |
---|