[66] | 1 | <%-- |
---|
| 2 | Licensed to the Apache Software Foundation (ASF) under one or more |
---|
| 3 | contributor license agreements. See the NOTICE file distributed with |
---|
| 4 | this work for additional information regarding copyright ownership. |
---|
| 5 | The ASF licenses this file to You under the Apache License, Version 2.0 |
---|
| 6 | (the "License"); you may not use this file except in compliance with |
---|
| 7 | the License. You may obtain a copy of the License at |
---|
| 8 | |
---|
| 9 | http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 10 | |
---|
| 11 | Unless required by applicable law or agreed to in writing, software |
---|
| 12 | distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
| 14 | See the License for the specific language governing permissions and |
---|
| 15 | limitations under the License. |
---|
| 16 | --%> |
---|
| 17 | <% |
---|
| 18 | |
---|
| 19 | // @author Dawid Weiss |
---|
| 20 | // |
---|
| 21 | // PERFORMANCE/USER INTERFACE NOTE: |
---|
| 22 | // |
---|
| 23 | // What I do here is merely a demonstration. In real life the clustering |
---|
| 24 | // process should be done in a separate "processing" stream, most likely |
---|
| 25 | // a separate HTML frame that the user's browser requests data to. |
---|
| 26 | // We don't want the user to wait with plain snippets until the clusters |
---|
| 27 | // are created. |
---|
| 28 | // |
---|
| 29 | // Also: clustering is resource consuming, so a cache of recent queries |
---|
| 30 | // would be in place. Besides, such cache would also be beneficial for the |
---|
| 31 | // purpose of re-querying existing clusters (remember that the |
---|
| 32 | // clustering extension may be a heuristic returning a DIFFERENT set of |
---|
| 33 | // clusters for an identical input). |
---|
| 34 | // See www.vivisimo.com for details of how this can be done using frames, or |
---|
| 35 | // http://carrot.cs.put.poznan.pl for an example of a Javascript solution. |
---|
| 36 | |
---|
| 37 | // cluster the hits |
---|
| 38 | HitsCluster [] clusters = null; |
---|
| 39 | if (clusterer != null) { |
---|
| 40 | final long clusteringStart = System.currentTimeMillis(); |
---|
| 41 | try { |
---|
| 42 | clusters = clusterer.clusterHits( details, Summary.toStrings(summaries) ); |
---|
| 43 | final long clusteringDuration = System.currentTimeMillis() - clusteringStart; |
---|
| 44 | bean.LOG.info("Clustering took: " + clusteringDuration + " milliseconds."); |
---|
| 45 | } catch (Exception e) { |
---|
| 46 | // failed to do clustering (see below) |
---|
| 47 | } |
---|
| 48 | } |
---|
| 49 | |
---|
| 50 | if (clusterer == null) { |
---|
| 51 | %>No clustering extension found.<% |
---|
| 52 | } else { |
---|
| 53 | if (clusters == null) { |
---|
| 54 | %>Unable to do clustering.<% |
---|
| 55 | } else if (clusters.length == 0) { |
---|
| 56 | %>No clusters found.<% |
---|
| 57 | } else { |
---|
| 58 | // display top N clusters and top Q documents inside them. |
---|
| 59 | int N = 10; |
---|
| 60 | int Q = 3; |
---|
| 61 | int maxLabels = 2; |
---|
| 62 | |
---|
| 63 | int displayCounter = 0; |
---|
| 64 | N = Math.min(N, clusters.length ); |
---|
| 65 | |
---|
| 66 | for (int clusterIndex = 0 ; clusterIndex < N ; clusterIndex++) { |
---|
| 67 | HitsCluster cluster = clusters[ clusterIndex ]; |
---|
| 68 | String [] clusterLabels = cluster.getDescriptionLabels(); |
---|
| 69 | |
---|
| 70 | // probably leave it on for now |
---|
| 71 | //if (cluster.isJunkCluster()) continue; |
---|
| 72 | |
---|
| 73 | // output cluster label. |
---|
| 74 | %><div style="margin: 0px; padding: 0px; font-weight: bold;"><% |
---|
| 75 | for (int k=0;k<maxLabels && k<clusterLabels.length;k++) { |
---|
| 76 | if (k>0) out.print(", "); |
---|
| 77 | out.print( Entities.encode(clusterLabels[k]) ); |
---|
| 78 | } |
---|
| 79 | %></div><% |
---|
| 80 | |
---|
| 81 | // now output sample documents from the inside |
---|
| 82 | HitDetails[] documents = cluster.getHits(); |
---|
| 83 | if (documents.length > 0) { |
---|
| 84 | %><ul style="font-size: 90%; margin-top: .5em;"><% |
---|
| 85 | for (int k = 0; k < Q && k < documents.length; k++) { |
---|
| 86 | HitDetails detail = documents[ k ]; |
---|
| 87 | String title = detail.getValue("title"); |
---|
| 88 | String url = detail.getValue("url"); |
---|
| 89 | if (title == null || title.equals("")) title = url; |
---|
| 90 | if (title.length() > 35) title = title.substring(0,35) + "..."; |
---|
| 91 | %> |
---|
| 92 | <li><a href="<%=url%>"><%= Entities.encode(title) %></a></li> |
---|
| 93 | <% |
---|
| 94 | } |
---|
| 95 | %></ul><% |
---|
| 96 | } |
---|
| 97 | |
---|
| 98 | // ignore subclusters for now, ALTHOUGH HIERARCHICAL CLUSTERING |
---|
| 99 | // METHODS DO EXIST AND ARE VERY USEFUL |
---|
| 100 | // HitsCluster [] subclusters = cluster.getSubclusters(); |
---|
| 101 | } |
---|
| 102 | } |
---|
| 103 | } |
---|
| 104 | |
---|
| 105 | %> |
---|