| 1 | <%-- | 
|---|
| 2 |   Licensed to the Apache Software Foundation (ASF) under one or more | 
|---|
| 3 |   contributor license agreements.  See the NOTICE file distributed with | 
|---|
| 4 |   this work for additional information regarding copyright ownership. | 
|---|
| 5 |   The ASF licenses this file to You under the Apache License, Version 2.0 | 
|---|
| 6 |   (the "License"); you may not use this file except in compliance with | 
|---|
| 7 |   the License.  You may obtain a copy of the License at | 
|---|
| 8 |    | 
|---|
| 9 |   http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 10 |    | 
|---|
| 11 |   Unless required by applicable law or agreed to in writing, software | 
|---|
| 12 |   distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 13 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 14 |   See the License for the specific language governing permissions and | 
|---|
| 15 |   limitations under the License. | 
|---|
| 16 | --%> | 
|---|
| 17 | <% | 
|---|
| 18 |  | 
|---|
| 19 | // @author Dawid Weiss | 
|---|
| 20 | // | 
|---|
| 21 | // PERFORMANCE/USER INTERFACE NOTE: | 
|---|
| 22 | // | 
|---|
| 23 | // What I do here is merely a demonstration. In real life the clustering | 
|---|
| 24 | // process should be done in a separate "processing" stream, most likely | 
|---|
| 25 | // a separate HTML frame that the user's browser requests data to. | 
|---|
| 26 | // We don't want the user to wait with plain snippets until the clusters | 
|---|
| 27 | // are created. | 
|---|
| 28 | // | 
|---|
| 29 | // Also: clustering is resource consuming, so a cache of recent queries  | 
|---|
| 30 | // would be in place. Besides, such cache would also be beneficial for the | 
|---|
| 31 | // purpose of re-querying existing clusters (remember that the | 
|---|
| 32 | // clustering extension may be a heuristic returning a DIFFERENT set of | 
|---|
| 33 | // clusters for an identical input). | 
|---|
| 34 | // See www.vivisimo.com for details of how this can be done using frames, or | 
|---|
| 35 | // http://carrot.cs.put.poznan.pl for an example of a Javascript solution. | 
|---|
| 36 |  | 
|---|
| 37 | // cluster the hits | 
|---|
| 38 | HitsCluster [] clusters = null; | 
|---|
| 39 | if (clusterer != null) { | 
|---|
| 40 |   final long clusteringStart = System.currentTimeMillis(); | 
|---|
| 41 |   try { | 
|---|
| 42 |     clusters = clusterer.clusterHits( details, Summary.toStrings(summaries) ); | 
|---|
| 43 |     final long clusteringDuration = System.currentTimeMillis() - clusteringStart; | 
|---|
| 44 |     bean.LOG.info("Clustering took: " + clusteringDuration + " milliseconds."); | 
|---|
| 45 |   } catch (Exception e) { | 
|---|
| 46 |     // failed to do clustering (see below) | 
|---|
| 47 |   } | 
|---|
| 48 | } | 
|---|
| 49 |  | 
|---|
| 50 | if (clusterer == null) { | 
|---|
| 51 |   %>No clustering extension found.<% | 
|---|
| 52 | } else { | 
|---|
| 53 |   if (clusters == null) { | 
|---|
| 54 |     %>Unable to do clustering.<% | 
|---|
| 55 |   } else if (clusters.length == 0) { | 
|---|
| 56 |     %>No clusters found.<% | 
|---|
| 57 |   } else { | 
|---|
| 58 |     // display top N clusters and top Q documents inside them. | 
|---|
| 59 |     int N = 10; | 
|---|
| 60 |     int Q = 3; | 
|---|
| 61 |     int maxLabels = 2; | 
|---|
| 62 |      | 
|---|
| 63 |     int displayCounter = 0; | 
|---|
| 64 |     N = Math.min(N, clusters.length ); | 
|---|
| 65 |  | 
|---|
| 66 |     for (int clusterIndex = 0 ; clusterIndex < N ; clusterIndex++) { | 
|---|
| 67 |       HitsCluster cluster = clusters[ clusterIndex ]; | 
|---|
| 68 |       String [] clusterLabels = cluster.getDescriptionLabels(); | 
|---|
| 69 |        | 
|---|
| 70 |       // probably leave it on for now | 
|---|
| 71 |       //if (cluster.isJunkCluster()) continue; | 
|---|
| 72 |  | 
|---|
| 73 |       // output cluster label. | 
|---|
| 74 |       %><div style="margin: 0px; padding: 0px; font-weight: bold;"><% | 
|---|
| 75 |       for (int k=0;k<maxLabels && k<clusterLabels.length;k++) { | 
|---|
| 76 |         if (k>0) out.print(", "); | 
|---|
| 77 |         out.print( Entities.encode(clusterLabels[k]) ); | 
|---|
| 78 |       } | 
|---|
| 79 |       %></div><% | 
|---|
| 80 |         | 
|---|
| 81 |       // now output sample documents from the inside | 
|---|
| 82 |       HitDetails[] documents = cluster.getHits(); | 
|---|
| 83 |       if (documents.length > 0) { | 
|---|
| 84 |         %><ul style="font-size: 90%; margin-top: .5em;"><% | 
|---|
| 85 |         for (int k = 0; k < Q && k < documents.length; k++) { | 
|---|
| 86 |           HitDetails detail = documents[ k ]; | 
|---|
| 87 |           String title = detail.getValue("title"); | 
|---|
| 88 |           String url = detail.getValue("url"); | 
|---|
| 89 |           if (title == null || title.equals("")) title = url; | 
|---|
| 90 |           if (title.length() > 35) title = title.substring(0,35) + "..."; | 
|---|
| 91 |           %> | 
|---|
| 92 |             <li><a href="<%=url%>"><%= Entities.encode(title) %></a></li> | 
|---|
| 93 |           <% | 
|---|
| 94 |         } | 
|---|
| 95 |         %></ul><% | 
|---|
| 96 |       } | 
|---|
| 97 |         | 
|---|
| 98 |       // ignore subclusters for now, ALTHOUGH HIERARCHICAL CLUSTERING | 
|---|
| 99 |       // METHODS DO EXIST AND ARE VERY USEFUL | 
|---|
| 100 |       // HitsCluster [] subclusters = cluster.getSubclusters(); | 
|---|
| 101 |     } | 
|---|
| 102 |   } | 
|---|
| 103 | } | 
|---|
| 104 |  | 
|---|
| 105 | %> | 
|---|