1 | <%-- |
---|
2 | Licensed to the Apache Software Foundation (ASF) under one or more |
---|
3 | contributor license agreements. See the NOTICE file distributed with |
---|
4 | this work for additional information regarding copyright ownership. |
---|
5 | The ASF licenses this file to You under the Apache License, Version 2.0 |
---|
6 | (the "License"); you may not use this file except in compliance with |
---|
7 | the License. You may obtain a copy of the License at |
---|
8 | |
---|
9 | http://www.apache.org/licenses/LICENSE-2.0 |
---|
10 | |
---|
11 | Unless required by applicable law or agreed to in writing, software |
---|
12 | distributed under the License is distributed on an "AS IS" BASIS, |
---|
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
14 | See the License for the specific language governing permissions and |
---|
15 | limitations under the License. |
---|
16 | --%> |
---|
17 | <%@ page |
---|
18 | session="false" |
---|
19 | contentType="text/html; charset=UTF-8" |
---|
20 | pageEncoding="UTF-8" |
---|
21 | |
---|
22 | import="java.io.*" |
---|
23 | import="java.util.*" |
---|
24 | import="java.net.*" |
---|
25 | import="javax.servlet.http.*" |
---|
26 | import="javax.servlet.*" |
---|
27 | |
---|
28 | import="org.apache.nutch.html.Entities" |
---|
29 | import="org.apache.nutch.metadata.Nutch" |
---|
30 | import="org.apache.nutch.searcher.*" |
---|
31 | import="org.apache.nutch.plugin.*" |
---|
32 | import="org.apache.nutch.clustering.*" |
---|
33 | import="org.apache.hadoop.conf.*" |
---|
34 | import="org.apache.nutch.util.NutchConfiguration" |
---|
35 | %><%! |
---|
36 | /** |
---|
37 | * Number of hits to retrieve and cluster if clustering extension is available |
---|
38 | * and clustering is on. By default, 100. Configurable via nutch-conf.xml. |
---|
39 | */ |
---|
40 | private int HITS_TO_CLUSTER; |
---|
41 | |
---|
42 | /** |
---|
43 | * Maximum hits per page to be displayed. |
---|
44 | */ |
---|
45 | private int MAX_HITS_PER_PAGE; |
---|
46 | |
---|
47 | /** |
---|
48 | * An instance of the clustering extension, if available. |
---|
49 | */ |
---|
50 | private OnlineClusterer clusterer; |
---|
51 | |
---|
52 | /** |
---|
53 | * Nutch configuration for this servlet. |
---|
54 | */ |
---|
55 | private Configuration nutchConf; |
---|
56 | |
---|
57 | /** |
---|
58 | * Initialize search bean. |
---|
59 | */ |
---|
60 | public void jspInit() { |
---|
61 | super.jspInit(); |
---|
62 | |
---|
63 | final ServletContext application = getServletContext(); |
---|
64 | nutchConf = NutchConfiguration.get(application); |
---|
65 | HITS_TO_CLUSTER = nutchConf.getInt("extension.clustering.hits-to-cluster", 100); |
---|
66 | MAX_HITS_PER_PAGE = nutchConf.getInt("searcher.max.hits.per.page", -1); |
---|
67 | |
---|
68 | try { |
---|
69 | clusterer = new OnlineClustererFactory(nutchConf).getOnlineClusterer(); |
---|
70 | } catch (PluginRuntimeException e) { |
---|
71 | super.log("Could not initialize online clusterer: " + e.toString()); |
---|
72 | } |
---|
73 | } |
---|
74 | %> |
---|
75 | |
---|
76 | <%-- |
---|
77 | // Uncomment this to enable query refinement. |
---|
78 | // Do the same to "refine-query.jsp" below., |
---|
79 | <%@ include file="./refine-query-init.jsp" %> |
---|
80 | --%> |
---|
81 | |
---|
82 | <% |
---|
83 | // The Nutch bean instance is initialized through a ServletContextListener |
---|
84 | // that is setup in the web.xml file |
---|
85 | NutchBean bean = NutchBean.get(application, nutchConf); |
---|
86 | // set the character encoding to use when interpreting request values |
---|
87 | request.setCharacterEncoding("UTF-8"); |
---|
88 | |
---|
89 | bean.LOG.info("query request from " + request.getRemoteAddr()); |
---|
90 | |
---|
91 | // get query from request |
---|
92 | String queryString = request.getParameter("query"); |
---|
93 | if (queryString == null) |
---|
94 | queryString = ""; |
---|
95 | String htmlQueryString = Entities.encode(queryString); |
---|
96 | |
---|
97 | // a flag to make the code cleaner a bit. |
---|
98 | boolean clusteringAvailable = (clusterer != null); |
---|
99 | |
---|
100 | String clustering = ""; |
---|
101 | if (clusteringAvailable && "yes".equals(request.getParameter("clustering"))) |
---|
102 | clustering = "yes"; |
---|
103 | |
---|
104 | int start = 0; // first hit to display |
---|
105 | String startString = request.getParameter("start"); |
---|
106 | if (startString != null) |
---|
107 | start = Integer.parseInt(startString); |
---|
108 | |
---|
109 | int hitsPerPage = 10; // number of hits to display |
---|
110 | String hitsString = request.getParameter("hitsPerPage"); |
---|
111 | if (hitsString != null) |
---|
112 | hitsPerPage = Integer.parseInt(hitsString); |
---|
113 | if(MAX_HITS_PER_PAGE > 0 && hitsPerPage > MAX_HITS_PER_PAGE) |
---|
114 | hitsPerPage = MAX_HITS_PER_PAGE; |
---|
115 | |
---|
116 | int hitsPerSite = 2; // max hits per site |
---|
117 | String hitsPerSiteString = request.getParameter("hitsPerSite"); |
---|
118 | if (hitsPerSiteString != null) |
---|
119 | hitsPerSite = Integer.parseInt(hitsPerSiteString); |
---|
120 | |
---|
121 | String sort = request.getParameter("sort"); |
---|
122 | boolean reverse = |
---|
123 | sort!=null && "true".equals(request.getParameter("reverse")); |
---|
124 | |
---|
125 | String params = "&hitsPerPage="+hitsPerPage |
---|
126 | +(sort==null ? "" : "&sort="+sort+(reverse?"&reverse=true":"")); |
---|
127 | |
---|
128 | int hitsToCluster = HITS_TO_CLUSTER; // number of hits to cluster |
---|
129 | |
---|
130 | // get the lang from request |
---|
131 | String queryLang = request.getParameter("lang"); |
---|
132 | if (queryLang == null) { queryLang = ""; } |
---|
133 | Query query = Query.parse(queryString, queryLang, nutchConf); |
---|
134 | bean.LOG.info("query: " + queryString); |
---|
135 | bean.LOG.info("lang: " + queryLang); |
---|
136 | |
---|
137 | String language = |
---|
138 | ResourceBundle.getBundle("org.nutch.jsp.search", request.getLocale()) |
---|
139 | .getLocale().getLanguage(); |
---|
140 | String requestURI = HttpUtils.getRequestURL(request).toString(); |
---|
141 | String base = requestURI.substring(0, requestURI.lastIndexOf('/')); |
---|
142 | String rss = "../opensearch?query="+htmlQueryString |
---|
143 | +"&hitsPerSite="+hitsPerSite+"&lang="+queryLang+params; |
---|
144 | %><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> |
---|
145 | <% |
---|
146 | // To prevent the character encoding declared with 'contentType' page |
---|
147 | // directive from being overriden by JSTL (apache i18n), we freeze it |
---|
148 | // by flushing the output buffer. |
---|
149 | // see http://java.sun.com/developer/technicalArticles/Intl/MultilingualJSP/ |
---|
150 | out.flush(); |
---|
151 | %> |
---|
152 | <%@ taglib uri="http://jakarta.apache.org/taglibs/i18n" prefix="i18n" %> |
---|
153 | <i18n:bundle baseName="org.nutch.jsp.search"/> |
---|
154 | <html lang="<%= language %>"> |
---|
155 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> |
---|
156 | <head> |
---|
157 | <title>Nutch: <i18n:message key="title"/></title> |
---|
158 | <link rel="icon" href="img/favicon.ico" type="image/x-icon"/> |
---|
159 | <link rel="shortcut icon" href="img/favicon.ico" type="image/x-icon"/> |
---|
160 | <link rel="alternate" type="application/rss+xml" title="RSS" href="<%=rss%>"/> |
---|
161 | <jsp:include page="include/style.html"/> |
---|
162 | <base href="<%= base + "/" + language %>/"> |
---|
163 | <script type="text/javascript"> |
---|
164 | <!-- |
---|
165 | function queryfocus() { document.search.query.focus(); } |
---|
166 | // --> |
---|
167 | </script> |
---|
168 | </head> |
---|
169 | |
---|
170 | <body onLoad="queryfocus();"> |
---|
171 | |
---|
172 | <jsp:include page="<%= language + \"/include/header.html\"%>"/> |
---|
173 | |
---|
174 | <form name="search" action="../search.jsp" method="get"> |
---|
175 | <input name="query" size=44 value="<%=htmlQueryString%>"> |
---|
176 | <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> |
---|
177 | <input type="hidden" name="lang" value="<%=language%>"> |
---|
178 | <input type="submit" value="<i18n:message key="search"/>"> |
---|
179 | <% if (clusteringAvailable) { %> |
---|
180 | <input id="clustbox" type="checkbox" name="clustering" value="yes" <% if (clustering.equals("yes")) { %>CHECKED<% } %>> |
---|
181 | <label for="clustbox"><i18n:message key="clustering"/></label> |
---|
182 | <% } %> |
---|
183 | <a href="help.html">help</a> |
---|
184 | </form> |
---|
185 | |
---|
186 | <%-- |
---|
187 | // Uncomment this to enable query refinement. |
---|
188 | // Do the same to "refine-query-init.jsp" above. |
---|
189 | <%@ include file="./refine-query.jsp" %> |
---|
190 | --%> |
---|
191 | |
---|
192 | <% |
---|
193 | // how many hits to retrieve? if clustering is on and available, |
---|
194 | // take "hitsToCluster", otherwise just get hitsPerPage |
---|
195 | int hitsToRetrieve = (clusteringAvailable && clustering.equals("yes") ? hitsToCluster : hitsPerPage); |
---|
196 | |
---|
197 | if (clusteringAvailable && clustering.equals("yes")) { |
---|
198 | bean.LOG.info("Clustering is on, hits to retrieve: " + hitsToRetrieve); |
---|
199 | } |
---|
200 | |
---|
201 | // perform query |
---|
202 | // NOTE by Dawid Weiss: |
---|
203 | // The 'clustering' window actually moves with the start |
---|
204 | // position.... this is good, bad?... ugly?.... |
---|
205 | Hits hits; |
---|
206 | try{ |
---|
207 | hits = bean.search(query, start + hitsToRetrieve, hitsPerSite, "site", |
---|
208 | sort, reverse); |
---|
209 | } catch (IOException e){ |
---|
210 | hits = new Hits(0,new Hit[0]); |
---|
211 | } |
---|
212 | int end = (int)Math.min(hits.getLength(), start + hitsPerPage); |
---|
213 | int length = end-start; |
---|
214 | int realEnd = (int)Math.min(hits.getLength(), start + hitsToRetrieve); |
---|
215 | |
---|
216 | Hit[] show = hits.getHits(start, realEnd-start); |
---|
217 | HitDetails[] details = bean.getDetails(show); |
---|
218 | Summary[] summaries = bean.getSummary(details, query); |
---|
219 | bean.LOG.info("total hits: " + hits.getTotal()); |
---|
220 | %> |
---|
221 | |
---|
222 | <i18n:message key="hits"> |
---|
223 | <i18n:messageArg value="<%=new Long((end==0)?0:(start+1))%>"/> |
---|
224 | <i18n:messageArg value="<%=new Long(end)%>"/> |
---|
225 | <i18n:messageArg value="<%=new Long(hits.getTotal())%>"/> |
---|
226 | </i18n:message> |
---|
227 | |
---|
228 | <% |
---|
229 | // be responsive |
---|
230 | out.flush(); |
---|
231 | %> |
---|
232 | |
---|
233 | <br><br> |
---|
234 | |
---|
235 | <% if (clustering.equals("yes") && length != 0) { %> |
---|
236 | <table border=0 cellspacing="3" cellpadding="0"> |
---|
237 | |
---|
238 | <tr> |
---|
239 | |
---|
240 | <td valign="top"> |
---|
241 | |
---|
242 | <% } %> |
---|
243 | |
---|
244 | <% |
---|
245 | for (int i = 0; i < length; i++) { // display the hits |
---|
246 | Hit hit = show[i]; |
---|
247 | HitDetails detail = details[i]; |
---|
248 | String title = detail.getValue("title"); |
---|
249 | String url = detail.getValue("url"); |
---|
250 | String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getUniqueKey(); |
---|
251 | String summary = summaries[i].toHtml(true); |
---|
252 | String caching = detail.getValue("cache"); |
---|
253 | boolean showSummary = true; |
---|
254 | boolean showCached = true; |
---|
255 | if (caching != null) { |
---|
256 | showSummary = !caching.equals(Nutch.CACHING_FORBIDDEN_ALL); |
---|
257 | showCached = !caching.equals(Nutch.CACHING_FORBIDDEN_NONE); |
---|
258 | } |
---|
259 | |
---|
260 | if (title == null || title.equals("")) { // use url for docs w/o title |
---|
261 | title = url; |
---|
262 | } |
---|
263 | %> |
---|
264 | <b><a href="<%=url%>"><%=Entities.encode(title)%></a></b> |
---|
265 | <%@ include file="more.jsp" %> |
---|
266 | <% if (!"".equals(summary) && showSummary) { %> |
---|
267 | <br><%=summary%> |
---|
268 | <% } %> |
---|
269 | <br> |
---|
270 | <span class="url"><%=Entities.encode(url)%></span> |
---|
271 | <% |
---|
272 | if (showCached) { |
---|
273 | %>(<a href="../cached.jsp?<%=id%>"><i18n:message key="cached"/></a>) <% |
---|
274 | } |
---|
275 | %> |
---|
276 | (<a href="../explain.jsp?<%=id%>&query=<%=URLEncoder.encode(queryString, "UTF-8")%>&lang=<%=queryLang%>"><i18n:message key="explain"/></a>) |
---|
277 | (<a href="../anchors.jsp?<%=id%>"><i18n:message key="anchors"/></a>) |
---|
278 | <% if (hit.moreFromDupExcluded()) { |
---|
279 | String more = |
---|
280 | "query="+URLEncoder.encode("site:"+hit.getDedupValue()+" "+queryString, "UTF8") |
---|
281 | +params+"&hitsPerSite="+0 |
---|
282 | +"&lang="+queryLang |
---|
283 | +"&clustering="+clustering;%> |
---|
284 | (<a href="../search.jsp?<%=more%>"><i18n:message key="moreFrom"/> |
---|
285 | <%=hit.getDedupValue()%></a>) |
---|
286 | <% } %> |
---|
287 | <br><br> |
---|
288 | <% } %> |
---|
289 | |
---|
290 | <% if (clustering.equals("yes") && length != 0) { %> |
---|
291 | |
---|
292 | </td> |
---|
293 | |
---|
294 | <!-- clusters --> |
---|
295 | <td style="border-right: 1px dotted gray;" /> </td> |
---|
296 | <td align="left" valign="top" width="25%"> |
---|
297 | <%@ include file="cluster.jsp" %> |
---|
298 | </td> |
---|
299 | |
---|
300 | </tr> |
---|
301 | </table> |
---|
302 | |
---|
303 | <% } %> |
---|
304 | |
---|
305 | <% |
---|
306 | |
---|
307 | if ((hits.totalIsExact() && end < hits.getTotal()) // more hits to show |
---|
308 | || (!hits.totalIsExact() && (hits.getLength() > start+hitsPerPage))) { |
---|
309 | %> |
---|
310 | <form name="next" action="../search.jsp" method="get"> |
---|
311 | <input type="hidden" name="query" value="<%=htmlQueryString%>"> |
---|
312 | <input type="hidden" name="lang" value="<%=queryLang%>"> |
---|
313 | <input type="hidden" name="start" value="<%=end%>"> |
---|
314 | <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> |
---|
315 | <input type="hidden" name="hitsPerSite" value="<%=hitsPerSite%>"> |
---|
316 | <input type="hidden" name="clustering" value="<%=clustering%>"> |
---|
317 | <input type="submit" value="<i18n:message key="next"/>"> |
---|
318 | <% if (sort != null) { %> |
---|
319 | <input type="hidden" name="sort" value="<%=sort%>"> |
---|
320 | <input type="hidden" name="reverse" value="<%=reverse%>"> |
---|
321 | <% } %> |
---|
322 | </form> |
---|
323 | <% |
---|
324 | } |
---|
325 | |
---|
326 | if ((!hits.totalIsExact() && (hits.getLength() <= start+hitsPerPage))) { |
---|
327 | %> |
---|
328 | <form name="showAllHits" action="../search.jsp" method="get"> |
---|
329 | <input type="hidden" name="query" value="<%=htmlQueryString%>"> |
---|
330 | <input type="hidden" name="lang" value="<%=queryLang%>"> |
---|
331 | <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> |
---|
332 | <input type="hidden" name="hitsPerSite" value="0"> |
---|
333 | <input type="hidden" name="clustering" value="<%=clustering%>"> |
---|
334 | <input type="submit" value="<i18n:message key="showAllHits"/>"> |
---|
335 | <% if (sort != null) { %> |
---|
336 | <input type="hidden" name="sort" value="<%=sort%>"> |
---|
337 | <input type="hidden" name="reverse" value="<%=reverse%>"> |
---|
338 | <% } %> |
---|
339 | </form> |
---|
340 | <% |
---|
341 | } |
---|
342 | %> |
---|
343 | |
---|
344 | <table bgcolor="3333ff" align="right"> |
---|
345 | <tr><td bgcolor="ff9900"><a href="<%=rss%>"><font color="ffffff"><b>RSS</b> |
---|
346 | </font></a></td></tr> |
---|
347 | </table> |
---|
348 | |
---|
349 | <p> |
---|
350 | <a href="http://wiki.apache.org/nutch/FAQ"> |
---|
351 | <img border="0" src="../img/poweredbynutch_01.gif"> |
---|
352 | </a> |
---|
353 | |
---|
354 | <jsp:include page="/include/footer.html"/> |
---|
355 | |
---|
356 | </body> |
---|
357 | </html> |
---|