Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

cluster.jsp @ 186

Last change on this file since 186 was 66, checked in by waue, 17 years ago
NutchEz - an easy way to nutch
File size: 3.8 KB

Rev	Line
[66]	1	<%--
	2	Licensed to the Apache Software Foundation (ASF) under one or more
	3	contributor license agreements. See the NOTICE file distributed with
	4	this work for additional information regarding copyright ownership.
	5	The ASF licenses this file to You under the Apache License, Version 2.0
	6	(the "License"); you may not use this file except in compliance with
	7	the License. You may obtain a copy of the License at
	8
	9	http://www.apache.org/licenses/LICENSE-2.0
	10
	11	Unless required by applicable law or agreed to in writing, software
	12	distributed under the License is distributed on an "AS IS" BASIS,
	13	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	See the License for the specific language governing permissions and
	15	limitations under the License.
	16	--%>
	17	<%
	18
	19	// @author Dawid Weiss
	20	//
	21	// PERFORMANCE/USER INTERFACE NOTE:
	22	//
	23	// What I do here is merely a demonstration. In real life the clustering
	24	// process should be done in a separate "processing" stream, most likely
	25	// a separate HTML frame that the user's browser requests data to.
	26	// We don't want the user to wait with plain snippets until the clusters
	27	// are created.
	28	//
	29	// Also: clustering is resource consuming, so a cache of recent queries
	30	// would be in place. Besides, such cache would also be beneficial for the
	31	// purpose of re-querying existing clusters (remember that the
	32	// clustering extension may be a heuristic returning a DIFFERENT set of
	33	// clusters for an identical input).
	34	// See www.vivisimo.com for details of how this can be done using frames, or
	35	// http://carrot.cs.put.poznan.pl for an example of a Javascript solution.
	36
	37	// cluster the hits
	38	HitsCluster [] clusters = null;
	39	if (clusterer != null) {
	40	final long clusteringStart = System.currentTimeMillis();
	41	try {
	42	clusters = clusterer.clusterHits( details, Summary.toStrings(summaries) );
	43	final long clusteringDuration = System.currentTimeMillis() - clusteringStart;
	44	bean.LOG.info("Clustering took: " + clusteringDuration + " milliseconds.");
	45	} catch (Exception e) {
	46	// failed to do clustering (see below)
	47	}
	48	}
	49
	50	if (clusterer == null) {
	51	%>No clustering extension found.<%
	52	} else {
	53	if (clusters == null) {
	54	%>Unable to do clustering.<%
	55	} else if (clusters.length == 0) {
	56	%>No clusters found.<%
	57	} else {
	58	// display top N clusters and top Q documents inside them.
	59	int N = 10;
	60	int Q = 3;
	61	int maxLabels = 2;
	62
	63	int displayCounter = 0;
	64	N = Math.min(N, clusters.length );
	65
	66	for (int clusterIndex = 0 ; clusterIndex < N ; clusterIndex++) {
	67	HitsCluster cluster = clusters[ clusterIndex ];
	68	String [] clusterLabels = cluster.getDescriptionLabels();
	69
	70	// probably leave it on for now
	71	//if (cluster.isJunkCluster()) continue;
	72
	73	// output cluster label.
	74	%><div style="margin: 0px; padding: 0px; font-weight: bold;"><%
	75	for (int k=0;k<maxLabels && k<clusterLabels.length;k++) {
	76	if (k>0) out.print(", ");
	77	out.print( Entities.encode(clusterLabels[k]) );
	78	}
	79	%></div><%
	80
	81	// now output sample documents from the inside
	82	HitDetails[] documents = cluster.getHits();
	83	if (documents.length > 0) {
	84	%><ul style="font-size: 90%; margin-top: .5em;"><%
	85	for (int k = 0; k < Q && k < documents.length; k++) {
	86	HitDetails detail = documents[ k ];
	87	String title = detail.getValue("title");
	88	String url = detail.getValue("url");
	89	if (title == null \|\| title.equals("")) title = url;
	90	if (title.length() > 35) title = title.substring(0,35) + "...";
	91	%>
	92	<li><a href="<%=url%>"><%= Entities.encode(title) %></a></li>
	93	<%
	94	}
	95	%></ul><%
	96	}
	97
	98	// ignore subclusters for now, ALTHOUGH HIERARCHICAL CLUSTERING
	99	// METHODS DO EXIST AND ARE VERY USEFUL
	100	// HitsCluster [] subclusters = cluster.getSubclusters();
	101	}
	102	}
	103	}
	104
	105	%>

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: nutchez-0.1/tomcat/webapps/ROOT/cluster.jsp @ 186

Download in other formats: