| [66] | 1 | <%-- | 
|---|
|  | 2 | Licensed to the Apache Software Foundation (ASF) under one or more | 
|---|
|  | 3 | contributor license agreements.  See the NOTICE file distributed with | 
|---|
|  | 4 | this work for additional information regarding copyright ownership. | 
|---|
|  | 5 | The ASF licenses this file to You under the Apache License, Version 2.0 | 
|---|
|  | 6 | (the "License"); you may not use this file except in compliance with | 
|---|
|  | 7 | the License.  You may obtain a copy of the License at | 
|---|
|  | 8 |  | 
|---|
|  | 9 | http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
|  | 10 |  | 
|---|
|  | 11 | Unless required by applicable law or agreed to in writing, software | 
|---|
|  | 12 | distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
|  | 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
|  | 14 | See the License for the specific language governing permissions and | 
|---|
|  | 15 | limitations under the License. | 
|---|
|  | 16 | --%> | 
|---|
|  | 17 | <% | 
|---|
|  | 18 |  | 
|---|
|  | 19 | // @author Dawid Weiss | 
|---|
|  | 20 | // | 
|---|
|  | 21 | // PERFORMANCE/USER INTERFACE NOTE: | 
|---|
|  | 22 | // | 
|---|
|  | 23 | // What I do here is merely a demonstration. In real life the clustering | 
|---|
|  | 24 | // process should be done in a separate "processing" stream, most likely | 
|---|
|  | 25 | // a separate HTML frame that the user's browser requests data to. | 
|---|
|  | 26 | // We don't want the user to wait with plain snippets until the clusters | 
|---|
|  | 27 | // are created. | 
|---|
|  | 28 | // | 
|---|
|  | 29 | // Also: clustering is resource consuming, so a cache of recent queries | 
|---|
|  | 30 | // would be in place. Besides, such cache would also be beneficial for the | 
|---|
|  | 31 | // purpose of re-querying existing clusters (remember that the | 
|---|
|  | 32 | // clustering extension may be a heuristic returning a DIFFERENT set of | 
|---|
|  | 33 | // clusters for an identical input). | 
|---|
|  | 34 | // See www.vivisimo.com for details of how this can be done using frames, or | 
|---|
|  | 35 | // http://carrot.cs.put.poznan.pl for an example of a Javascript solution. | 
|---|
|  | 36 |  | 
|---|
|  | 37 | // cluster the hits | 
|---|
|  | 38 | HitsCluster [] clusters = null; | 
|---|
|  | 39 | if (clusterer != null) { | 
|---|
|  | 40 | final long clusteringStart = System.currentTimeMillis(); | 
|---|
|  | 41 | try { | 
|---|
|  | 42 | clusters = clusterer.clusterHits( details, Summary.toStrings(summaries) ); | 
|---|
|  | 43 | final long clusteringDuration = System.currentTimeMillis() - clusteringStart; | 
|---|
|  | 44 | bean.LOG.info("Clustering took: " + clusteringDuration + " milliseconds."); | 
|---|
|  | 45 | } catch (Exception e) { | 
|---|
|  | 46 | // failed to do clustering (see below) | 
|---|
|  | 47 | } | 
|---|
|  | 48 | } | 
|---|
|  | 49 |  | 
|---|
|  | 50 | if (clusterer == null) { | 
|---|
|  | 51 | %>No clustering extension found.<% | 
|---|
|  | 52 | } else { | 
|---|
|  | 53 | if (clusters == null) { | 
|---|
|  | 54 | %>Unable to do clustering.<% | 
|---|
|  | 55 | } else if (clusters.length == 0) { | 
|---|
|  | 56 | %>No clusters found.<% | 
|---|
|  | 57 | } else { | 
|---|
|  | 58 | // display top N clusters and top Q documents inside them. | 
|---|
|  | 59 | int N = 10; | 
|---|
|  | 60 | int Q = 3; | 
|---|
|  | 61 | int maxLabels = 2; | 
|---|
|  | 62 |  | 
|---|
|  | 63 | int displayCounter = 0; | 
|---|
|  | 64 | N = Math.min(N, clusters.length ); | 
|---|
|  | 65 |  | 
|---|
|  | 66 | for (int clusterIndex = 0 ; clusterIndex < N ; clusterIndex++) { | 
|---|
|  | 67 | HitsCluster cluster = clusters[ clusterIndex ]; | 
|---|
|  | 68 | String [] clusterLabels = cluster.getDescriptionLabels(); | 
|---|
|  | 69 |  | 
|---|
|  | 70 | // probably leave it on for now | 
|---|
|  | 71 | //if (cluster.isJunkCluster()) continue; | 
|---|
|  | 72 |  | 
|---|
|  | 73 | // output cluster label. | 
|---|
|  | 74 | %><div style="margin: 0px; padding: 0px; font-weight: bold;"><% | 
|---|
|  | 75 | for (int k=0;k<maxLabels && k<clusterLabels.length;k++) { | 
|---|
|  | 76 | if (k>0) out.print(", "); | 
|---|
|  | 77 | out.print( Entities.encode(clusterLabels[k]) ); | 
|---|
|  | 78 | } | 
|---|
|  | 79 | %></div><% | 
|---|
|  | 80 |  | 
|---|
|  | 81 | // now output sample documents from the inside | 
|---|
|  | 82 | HitDetails[] documents = cluster.getHits(); | 
|---|
|  | 83 | if (documents.length > 0) { | 
|---|
|  | 84 | %><ul style="font-size: 90%; margin-top: .5em;"><% | 
|---|
|  | 85 | for (int k = 0; k < Q && k < documents.length; k++) { | 
|---|
|  | 86 | HitDetails detail = documents[ k ]; | 
|---|
|  | 87 | String title = detail.getValue("title"); | 
|---|
|  | 88 | String url = detail.getValue("url"); | 
|---|
|  | 89 | if (title == null || title.equals("")) title = url; | 
|---|
|  | 90 | if (title.length() > 35) title = title.substring(0,35) + "..."; | 
|---|
|  | 91 | %> | 
|---|
|  | 92 | <li><a href="<%=url%>"><%= Entities.encode(title) %></a></li> | 
|---|
|  | 93 | <% | 
|---|
|  | 94 | } | 
|---|
|  | 95 | %></ul><% | 
|---|
|  | 96 | } | 
|---|
|  | 97 |  | 
|---|
|  | 98 | // ignore subclusters for now, ALTHOUGH HIERARCHICAL CLUSTERING | 
|---|
|  | 99 | // METHODS DO EXIST AND ARE VERY USEFUL | 
|---|
|  | 100 | // HitsCluster [] subclusters = cluster.getSubclusters(); | 
|---|
|  | 101 | } | 
|---|
|  | 102 | } | 
|---|
|  | 103 | } | 
|---|
|  | 104 |  | 
|---|
|  | 105 | %> | 
|---|