1 | <%-- |
---|
2 | Licensed to the Apache Software Foundation (ASF) under one or more |
---|
3 | contributor license agreements. See the NOTICE file distributed with |
---|
4 | this work for additional information regarding copyright ownership. |
---|
5 | The ASF licenses this file to You under the Apache License, Version 2.0 |
---|
6 | (the "License"); you may not use this file except in compliance with |
---|
7 | the License. You may obtain a copy of the License at |
---|
8 | |
---|
9 | http://www.apache.org/licenses/LICENSE-2.0 |
---|
10 | |
---|
11 | Unless required by applicable law or agreed to in writing, software |
---|
12 | distributed under the License is distributed on an "AS IS" BASIS, |
---|
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
14 | See the License for the specific language governing permissions and |
---|
15 | limitations under the License. |
---|
16 | --%> |
---|
17 | <% |
---|
18 | |
---|
19 | // @author Dawid Weiss |
---|
20 | // |
---|
21 | // PERFORMANCE/USER INTERFACE NOTE: |
---|
22 | // |
---|
23 | // What I do here is merely a demonstration. In real life the clustering |
---|
24 | // process should be done in a separate "processing" stream, most likely |
---|
25 | // a separate HTML frame that the user's browser requests data to. |
---|
26 | // We don't want the user to wait with plain snippets until the clusters |
---|
27 | // are created. |
---|
28 | // |
---|
29 | // Also: clustering is resource consuming, so a cache of recent queries |
---|
30 | // would be in place. Besides, such cache would also be beneficial for the |
---|
31 | // purpose of re-querying existing clusters (remember that the |
---|
32 | // clustering extension may be a heuristic returning a DIFFERENT set of |
---|
33 | // clusters for an identical input). |
---|
34 | // See www.vivisimo.com for details of how this can be done using frames, or |
---|
35 | // http://carrot.cs.put.poznan.pl for an example of a Javascript solution. |
---|
36 | |
---|
37 | // cluster the hits |
---|
38 | HitsCluster [] clusters = null; |
---|
39 | if (clusterer != null) { |
---|
40 | final long clusteringStart = System.currentTimeMillis(); |
---|
41 | try { |
---|
42 | clusters = clusterer.clusterHits( details, Summary.toStrings(summaries) ); |
---|
43 | final long clusteringDuration = System.currentTimeMillis() - clusteringStart; |
---|
44 | bean.LOG.info("Clustering took: " + clusteringDuration + " milliseconds."); |
---|
45 | } catch (Exception e) { |
---|
46 | // failed to do clustering (see below) |
---|
47 | } |
---|
48 | } |
---|
49 | |
---|
50 | if (clusterer == null) { |
---|
51 | %>No clustering extension found.<% |
---|
52 | } else { |
---|
53 | if (clusters == null) { |
---|
54 | %>Unable to do clustering.<% |
---|
55 | } else if (clusters.length == 0) { |
---|
56 | %>No clusters found.<% |
---|
57 | } else { |
---|
58 | // display top N clusters and top Q documents inside them. |
---|
59 | int N = 10; |
---|
60 | int Q = 3; |
---|
61 | int maxLabels = 2; |
---|
62 | |
---|
63 | int displayCounter = 0; |
---|
64 | N = Math.min(N, clusters.length ); |
---|
65 | |
---|
66 | for (int clusterIndex = 0 ; clusterIndex < N ; clusterIndex++) { |
---|
67 | HitsCluster cluster = clusters[ clusterIndex ]; |
---|
68 | String [] clusterLabels = cluster.getDescriptionLabels(); |
---|
69 | |
---|
70 | // probably leave it on for now |
---|
71 | //if (cluster.isJunkCluster()) continue; |
---|
72 | |
---|
73 | // output cluster label. |
---|
74 | %><div style="margin: 0px; padding: 0px; font-weight: bold;"><% |
---|
75 | for (int k=0;k<maxLabels && k<clusterLabels.length;k++) { |
---|
76 | if (k>0) out.print(", "); |
---|
77 | out.print( Entities.encode(clusterLabels[k]) ); |
---|
78 | } |
---|
79 | %></div><% |
---|
80 | |
---|
81 | // now output sample documents from the inside |
---|
82 | HitDetails[] documents = cluster.getHits(); |
---|
83 | if (documents.length > 0) { |
---|
84 | %><ul style="font-size: 90%; margin-top: .5em;"><% |
---|
85 | for (int k = 0; k < Q && k < documents.length; k++) { |
---|
86 | HitDetails detail = documents[ k ]; |
---|
87 | String title = detail.getValue("title"); |
---|
88 | String url = detail.getValue("url"); |
---|
89 | if (title == null || title.equals("")) title = url; |
---|
90 | if (title.length() > 35) title = title.substring(0,35) + "..."; |
---|
91 | %> |
---|
92 | <li><a href="<%=url%>"><%= Entities.encode(title) %></a></li> |
---|
93 | <% |
---|
94 | } |
---|
95 | %></ul><% |
---|
96 | } |
---|
97 | |
---|
98 | // ignore subclusters for now, ALTHOUGH HIERARCHICAL CLUSTERING |
---|
99 | // METHODS DO EXIST AND ARE VERY USEFUL |
---|
100 | // HitsCluster [] subclusters = cluster.getSubclusters(); |
---|
101 | } |
---|
102 | } |
---|
103 | } |
---|
104 | |
---|
105 | %> |
---|