source: nutchez-0.1/tomcat/webapps/ROOT/search.jsp

Last change on this file was 66, checked in by waue, 16 years ago

NutchEz - an easy way to nutch

File size: 11.9 KB
Line 
1<%--
2  Licensed to the Apache Software Foundation (ASF) under one or more
3  contributor license agreements.  See the NOTICE file distributed with
4  this work for additional information regarding copyright ownership.
5  The ASF licenses this file to You under the Apache License, Version 2.0
6  (the "License"); you may not use this file except in compliance with
7  the License.  You may obtain a copy of the License at
8 
9  http://www.apache.org/licenses/LICENSE-2.0
10 
11  Unless required by applicable law or agreed to in writing, software
12  distributed under the License is distributed on an "AS IS" BASIS,
13  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  See the License for the specific language governing permissions and
15  limitations under the License.
16--%>
17<%@ page
18  session="false"
19  contentType="text/html; charset=UTF-8"
20  pageEncoding="UTF-8"
21
22  import="java.io.*"
23  import="java.util.*"
24  import="java.net.*"
25  import="javax.servlet.http.*"
26  import="javax.servlet.*"
27
28  import="org.apache.nutch.html.Entities"
29  import="org.apache.nutch.metadata.Nutch"
30  import="org.apache.nutch.searcher.*"
31  import="org.apache.nutch.plugin.*"
32  import="org.apache.nutch.clustering.*"
33  import="org.apache.hadoop.conf.*"
34  import="org.apache.nutch.util.NutchConfiguration"
35%><%!
36  /**
37   * Number of hits to retrieve and cluster if clustering extension is available
38   * and clustering is on. By default, 100. Configurable via nutch-conf.xml.
39   */
40  private int HITS_TO_CLUSTER;
41
42  /**
43   * Maximum hits per page to be displayed.
44   */
45  private int MAX_HITS_PER_PAGE;
46
47  /**
48   * An instance of the clustering extension, if available.
49   */
50  private OnlineClusterer clusterer;
51 
52  /**
53   * Nutch configuration for this servlet.
54   */
55  private Configuration nutchConf;
56
57  /**
58   * Initialize search bean.
59   */
60  public void jspInit() {
61    super.jspInit();
62   
63    final ServletContext application = getServletContext(); 
64    nutchConf = NutchConfiguration.get(application);
65    HITS_TO_CLUSTER = nutchConf.getInt("extension.clustering.hits-to-cluster", 100);
66    MAX_HITS_PER_PAGE = nutchConf.getInt("searcher.max.hits.per.page", -1);
67
68    try {
69      clusterer = new OnlineClustererFactory(nutchConf).getOnlineClusterer();
70    } catch (PluginRuntimeException e) {
71      super.log("Could not initialize online clusterer: " + e.toString());
72    }
73  }
74%>
75
76<%--
77// Uncomment this to enable query refinement.
78// Do the same to "refine-query.jsp" below.,
79<%@ include file="./refine-query-init.jsp" %>
80--%>
81
82<%
83  // The Nutch bean instance is initialized through a ServletContextListener
84  // that is setup in the web.xml file
85  NutchBean bean = NutchBean.get(application, nutchConf);
86  // set the character encoding to use when interpreting request values
87  request.setCharacterEncoding("UTF-8");
88
89  bean.LOG.info("query request from " + request.getRemoteAddr());
90
91  // get query from request
92  String queryString = request.getParameter("query");
93  if (queryString == null)
94    queryString = "";
95  String htmlQueryString = Entities.encode(queryString);
96 
97  // a flag to make the code cleaner a bit.
98  boolean clusteringAvailable = (clusterer != null);
99
100  String clustering = "";
101  if (clusteringAvailable && "yes".equals(request.getParameter("clustering")))
102    clustering = "yes";
103
104  int start = 0;          // first hit to display
105  String startString = request.getParameter("start");
106  if (startString != null)
107    start = Integer.parseInt(startString);
108
109  int hitsPerPage = 10;          // number of hits to display
110  String hitsString = request.getParameter("hitsPerPage");
111  if (hitsString != null)
112    hitsPerPage = Integer.parseInt(hitsString);
113  if(MAX_HITS_PER_PAGE > 0 && hitsPerPage > MAX_HITS_PER_PAGE)
114    hitsPerPage = MAX_HITS_PER_PAGE;
115
116  int hitsPerSite = 2;                            // max hits per site
117  String hitsPerSiteString = request.getParameter("hitsPerSite");
118  if (hitsPerSiteString != null)
119    hitsPerSite = Integer.parseInt(hitsPerSiteString);
120
121  String sort = request.getParameter("sort");
122  boolean reverse =
123    sort!=null && "true".equals(request.getParameter("reverse"));
124
125  String params = "&hitsPerPage="+hitsPerPage
126     +(sort==null ? "" : "&sort="+sort+(reverse?"&reverse=true":""));
127
128  int hitsToCluster = HITS_TO_CLUSTER;            // number of hits to cluster
129
130  // get the lang from request
131  String queryLang = request.getParameter("lang");
132  if (queryLang == null) { queryLang = ""; }
133  Query query = Query.parse(queryString, queryLang, nutchConf);
134  bean.LOG.info("query: " + queryString);
135  bean.LOG.info("lang: " + queryLang);
136
137  String language =
138    ResourceBundle.getBundle("org.nutch.jsp.search", request.getLocale())
139    .getLocale().getLanguage();
140  String requestURI = HttpUtils.getRequestURL(request).toString();
141  String base = requestURI.substring(0, requestURI.lastIndexOf('/'));
142  String rss = "../opensearch?query="+htmlQueryString
143    +"&hitsPerSite="+hitsPerSite+"&lang="+queryLang+params;
144%><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
145<%
146  // To prevent the character encoding declared with 'contentType' page
147  // directive from being overriden by JSTL (apache i18n), we freeze it
148  // by flushing the output buffer.
149  // see http://java.sun.com/developer/technicalArticles/Intl/MultilingualJSP/
150  out.flush();
151%>
152<%@ taglib uri="http://jakarta.apache.org/taglibs/i18n" prefix="i18n" %>
153<i18n:bundle baseName="org.nutch.jsp.search"/>
154<html lang="<%= language %>">
155<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
156<head>
157<title>Nutch: <i18n:message key="title"/></title>
158<link rel="icon" href="img/favicon.ico" type="image/x-icon"/>
159<link rel="shortcut icon" href="img/favicon.ico" type="image/x-icon"/>
160<link rel="alternate" type="application/rss+xml" title="RSS" href="<%=rss%>"/>
161<jsp:include page="include/style.html"/>
162<base href="<%= base  + "/" + language %>/">
163<script type="text/javascript">
164<!--
165function queryfocus() { document.search.query.focus(); }
166// -->
167</script>
168</head>
169
170<body onLoad="queryfocus();">
171
172<jsp:include page="<%= language + \"/include/header.html\"%>"/>
173
174 <form name="search" action="../search.jsp" method="get">
175 <input name="query" size=44 value="<%=htmlQueryString%>">
176 <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>">
177 <input type="hidden" name="lang" value="<%=language%>">
178 <input type="submit" value="<i18n:message key="search"/>">
179 <% if (clusteringAvailable) { %>
180   <input id="clustbox" type="checkbox" name="clustering" value="yes" <% if (clustering.equals("yes")) { %>CHECKED<% } %>>
181    <label for="clustbox"><i18n:message key="clustering"/></label>
182 <% } %>
183 <a href="help.html">help</a>
184 </form>
185
186<%--
187// Uncomment this to enable query refinement.
188// Do the same to "refine-query-init.jsp" above.
189<%@ include file="./refine-query.jsp" %>
190--%>
191
192<%
193   // how many hits to retrieve? if clustering is on and available,
194   // take "hitsToCluster", otherwise just get hitsPerPage
195   int hitsToRetrieve = (clusteringAvailable && clustering.equals("yes") ? hitsToCluster : hitsPerPage);
196
197   if (clusteringAvailable && clustering.equals("yes")) {
198     bean.LOG.info("Clustering is on, hits to retrieve: " + hitsToRetrieve);
199   }
200
201   // perform query
202    // NOTE by Dawid Weiss:
203    // The 'clustering' window actually moves with the start
204    // position.... this is good, bad?... ugly?....
205   Hits hits;
206   try{
207     hits = bean.search(query, start + hitsToRetrieve, hitsPerSite, "site",
208                        sort, reverse);
209   } catch (IOException e){
210     hits = new Hits(0,new Hit[0]); 
211   }
212   int end = (int)Math.min(hits.getLength(), start + hitsPerPage);
213   int length = end-start;
214   int realEnd = (int)Math.min(hits.getLength(), start + hitsToRetrieve);
215
216   Hit[] show = hits.getHits(start, realEnd-start);
217   HitDetails[] details = bean.getDetails(show);
218   Summary[] summaries = bean.getSummary(details, query);
219   bean.LOG.info("total hits: " + hits.getTotal());
220%>
221
222<i18n:message key="hits">
223  <i18n:messageArg value="<%=new Long((end==0)?0:(start+1))%>"/>
224  <i18n:messageArg value="<%=new Long(end)%>"/>
225  <i18n:messageArg value="<%=new Long(hits.getTotal())%>"/>
226</i18n:message>
227
228<%
229// be responsive
230out.flush();
231%>
232
233<br><br>
234
235<% if (clustering.equals("yes") && length != 0) { %>
236<table border=0 cellspacing="3" cellpadding="0">
237
238<tr>
239
240<td valign="top">
241
242<% } %>
243
244<%
245  for (int i = 0; i < length; i++) {      // display the hits
246    Hit hit = show[i];
247    HitDetails detail = details[i];
248    String title = detail.getValue("title");
249    String url = detail.getValue("url");
250    String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getUniqueKey();
251    String summary = summaries[i].toHtml(true);
252    String caching = detail.getValue("cache");
253    boolean showSummary = true;
254    boolean showCached = true;
255    if (caching != null) {
256      showSummary = !caching.equals(Nutch.CACHING_FORBIDDEN_ALL);
257      showCached = !caching.equals(Nutch.CACHING_FORBIDDEN_NONE);
258    }
259
260    if (title == null || title.equals("")) {      // use url for docs w/o title
261      title = url;
262    }
263    %>
264    <b><a href="<%=url%>"><%=Entities.encode(title)%></a></b>
265    <%@ include file="more.jsp" %>
266    <% if (!"".equals(summary) && showSummary) { %>
267    <br><%=summary%>
268    <% } %>
269    <br>
270    <span class="url"><%=Entities.encode(url)%></span>
271    <%
272      if (showCached) {
273        %>(<a href="../cached.jsp?<%=id%>"><i18n:message key="cached"/></a>) <%
274    }
275    %>
276    (<a href="../explain.jsp?<%=id%>&query=<%=URLEncoder.encode(queryString, "UTF-8")%>&lang=<%=queryLang%>"><i18n:message key="explain"/></a>)
277    (<a href="../anchors.jsp?<%=id%>"><i18n:message key="anchors"/></a>)
278    <% if (hit.moreFromDupExcluded()) {
279    String more =
280    "query="+URLEncoder.encode("site:"+hit.getDedupValue()+" "+queryString, "UTF8")
281    +params+"&hitsPerSite="+0
282    +"&lang="+queryLang
283    +"&clustering="+clustering;%>
284    (<a href="../search.jsp?<%=more%>"><i18n:message key="moreFrom"/>
285     <%=hit.getDedupValue()%></a>)
286    <% } %>
287    <br><br>
288<% } %>
289
290<% if (clustering.equals("yes") && length != 0) { %>
291
292</td>
293
294<!-- clusters -->
295<td style="border-right: 1px dotted gray;" />&#160;</td>
296<td align="left" valign="top" width="25%">
297<%@ include file="cluster.jsp" %>
298</td>
299
300</tr>
301</table>
302
303<% } %>
304
305<%
306
307if ((hits.totalIsExact() && end < hits.getTotal()) // more hits to show
308    || (!hits.totalIsExact() && (hits.getLength() > start+hitsPerPage))) {
309%>
310    <form name="next" action="../search.jsp" method="get">
311    <input type="hidden" name="query" value="<%=htmlQueryString%>">
312    <input type="hidden" name="lang" value="<%=queryLang%>">
313    <input type="hidden" name="start" value="<%=end%>">
314    <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>">
315    <input type="hidden" name="hitsPerSite" value="<%=hitsPerSite%>">
316    <input type="hidden" name="clustering" value="<%=clustering%>">
317    <input type="submit" value="<i18n:message key="next"/>">
318<% if (sort != null) { %>
319    <input type="hidden" name="sort" value="<%=sort%>">
320    <input type="hidden" name="reverse" value="<%=reverse%>">
321<% } %>
322    </form>
323<%
324    }
325
326if ((!hits.totalIsExact() && (hits.getLength() <= start+hitsPerPage))) {
327%>
328    <form name="showAllHits" action="../search.jsp" method="get">
329    <input type="hidden" name="query" value="<%=htmlQueryString%>">
330    <input type="hidden" name="lang" value="<%=queryLang%>">
331    <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>">
332    <input type="hidden" name="hitsPerSite" value="0">
333    <input type="hidden" name="clustering" value="<%=clustering%>">
334    <input type="submit" value="<i18n:message key="showAllHits"/>">
335<% if (sort != null) { %>
336    <input type="hidden" name="sort" value="<%=sort%>">
337    <input type="hidden" name="reverse" value="<%=reverse%>">
338<% } %>
339    </form>
340<%
341    }
342%>
343
344<table bgcolor="3333ff" align="right">
345<tr><td bgcolor="ff9900"><a href="<%=rss%>"><font color="ffffff"><b>RSS</b>
346</font></a></td></tr>
347</table>
348
349<p>
350<a href="http://wiki.apache.org/nutch/FAQ">
351<img border="0" src="../img/poweredbynutch_01.gif">
352</a>
353
354<jsp:include page="/include/footer.html"/>
355
356</body>
357</html>
Note: See TracBrowser for help on using the repository browser.