| [66] | 1 | <%-- | 
|---|
 | 2 |   Licensed to the Apache Software Foundation (ASF) under one or more | 
|---|
 | 3 |   contributor license agreements.  See the NOTICE file distributed with | 
|---|
 | 4 |   this work for additional information regarding copyright ownership. | 
|---|
 | 5 |   The ASF licenses this file to You under the Apache License, Version 2.0 | 
|---|
 | 6 |   (the "License"); you may not use this file except in compliance with | 
|---|
 | 7 |   the License.  You may obtain a copy of the License at | 
|---|
 | 8 |    | 
|---|
 | 9 |   http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
 | 10 |    | 
|---|
 | 11 |   Unless required by applicable law or agreed to in writing, software | 
|---|
 | 12 |   distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
 | 13 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
 | 14 |   See the License for the specific language governing permissions and | 
|---|
 | 15 |   limitations under the License. | 
|---|
 | 16 | --%> | 
|---|
 | 17 | <% | 
|---|
 | 18 |  | 
|---|
 | 19 | // @author Dawid Weiss | 
|---|
 | 20 | // | 
|---|
 | 21 | // PERFORMANCE/USER INTERFACE NOTE: | 
|---|
 | 22 | // | 
|---|
 | 23 | // What I do here is merely a demonstration. In real life the clustering | 
|---|
 | 24 | // process should be done in a separate "processing" stream, most likely | 
|---|
 | 25 | // a separate HTML frame that the user's browser requests data to. | 
|---|
 | 26 | // We don't want the user to wait with plain snippets until the clusters | 
|---|
 | 27 | // are created. | 
|---|
 | 28 | // | 
|---|
 | 29 | // Also: clustering is resource consuming, so a cache of recent queries  | 
|---|
 | 30 | // would be in place. Besides, such cache would also be beneficial for the | 
|---|
 | 31 | // purpose of re-querying existing clusters (remember that the | 
|---|
 | 32 | // clustering extension may be a heuristic returning a DIFFERENT set of | 
|---|
 | 33 | // clusters for an identical input). | 
|---|
 | 34 | // See www.vivisimo.com for details of how this can be done using frames, or | 
|---|
 | 35 | // http://carrot.cs.put.poznan.pl for an example of a Javascript solution. | 
|---|
 | 36 |  | 
|---|
 | 37 | // cluster the hits | 
|---|
 | 38 | HitsCluster [] clusters = null; | 
|---|
 | 39 | if (clusterer != null) { | 
|---|
 | 40 |   final long clusteringStart = System.currentTimeMillis(); | 
|---|
 | 41 |   try { | 
|---|
 | 42 |     clusters = clusterer.clusterHits( details, Summary.toStrings(summaries) ); | 
|---|
 | 43 |     final long clusteringDuration = System.currentTimeMillis() - clusteringStart; | 
|---|
 | 44 |     bean.LOG.info("Clustering took: " + clusteringDuration + " milliseconds."); | 
|---|
 | 45 |   } catch (Exception e) { | 
|---|
 | 46 |     // failed to do clustering (see below) | 
|---|
 | 47 |   } | 
|---|
 | 48 | } | 
|---|
 | 49 |  | 
|---|
 | 50 | if (clusterer == null) { | 
|---|
 | 51 |   %>No clustering extension found.<% | 
|---|
 | 52 | } else { | 
|---|
 | 53 |   if (clusters == null) { | 
|---|
 | 54 |     %>Unable to do clustering.<% | 
|---|
 | 55 |   } else if (clusters.length == 0) { | 
|---|
 | 56 |     %>No clusters found.<% | 
|---|
 | 57 |   } else { | 
|---|
 | 58 |     // display top N clusters and top Q documents inside them. | 
|---|
 | 59 |     int N = 10; | 
|---|
 | 60 |     int Q = 3; | 
|---|
 | 61 |     int maxLabels = 2; | 
|---|
 | 62 |      | 
|---|
 | 63 |     int displayCounter = 0; | 
|---|
 | 64 |     N = Math.min(N, clusters.length ); | 
|---|
 | 65 |  | 
|---|
 | 66 |     for (int clusterIndex = 0 ; clusterIndex < N ; clusterIndex++) { | 
|---|
 | 67 |       HitsCluster cluster = clusters[ clusterIndex ]; | 
|---|
 | 68 |       String [] clusterLabels = cluster.getDescriptionLabels(); | 
|---|
 | 69 |        | 
|---|
 | 70 |       // probably leave it on for now | 
|---|
 | 71 |       //if (cluster.isJunkCluster()) continue; | 
|---|
 | 72 |  | 
|---|
 | 73 |       // output cluster label. | 
|---|
 | 74 |       %><div style="margin: 0px; padding: 0px; font-weight: bold;"><% | 
|---|
 | 75 |       for (int k=0;k<maxLabels && k<clusterLabels.length;k++) { | 
|---|
 | 76 |         if (k>0) out.print(", "); | 
|---|
 | 77 |         out.print( Entities.encode(clusterLabels[k]) ); | 
|---|
 | 78 |       } | 
|---|
 | 79 |       %></div><% | 
|---|
 | 80 |         | 
|---|
 | 81 |       // now output sample documents from the inside | 
|---|
 | 82 |       HitDetails[] documents = cluster.getHits(); | 
|---|
 | 83 |       if (documents.length > 0) { | 
|---|
 | 84 |         %><ul style="font-size: 90%; margin-top: .5em;"><% | 
|---|
 | 85 |         for (int k = 0; k < Q && k < documents.length; k++) { | 
|---|
 | 86 |           HitDetails detail = documents[ k ]; | 
|---|
 | 87 |           String title = detail.getValue("title"); | 
|---|
 | 88 |           String url = detail.getValue("url"); | 
|---|
 | 89 |           if (title == null || title.equals("")) title = url; | 
|---|
 | 90 |           if (title.length() > 35) title = title.substring(0,35) + "..."; | 
|---|
 | 91 |           %> | 
|---|
 | 92 |             <li><a href="<%=url%>"><%= Entities.encode(title) %></a></li> | 
|---|
 | 93 |           <% | 
|---|
 | 94 |         } | 
|---|
 | 95 |         %></ul><% | 
|---|
 | 96 |       } | 
|---|
 | 97 |         | 
|---|
 | 98 |       // ignore subclusters for now, ALTHOUGH HIERARCHICAL CLUSTERING | 
|---|
 | 99 |       // METHODS DO EXIST AND ARE VERY USEFUL | 
|---|
 | 100 |       // HitsCluster [] subclusters = cluster.getSubclusters(); | 
|---|
 | 101 |     } | 
|---|
 | 102 |   } | 
|---|
 | 103 | } | 
|---|
 | 104 |  | 
|---|
 | 105 | %> | 
|---|