| [66] | 1 | <%-- | 
|---|
|  | 2 | Licensed to the Apache Software Foundation (ASF) under one or more | 
|---|
|  | 3 | contributor license agreements.  See the NOTICE file distributed with | 
|---|
|  | 4 | this work for additional information regarding copyright ownership. | 
|---|
|  | 5 | The ASF licenses this file to You under the Apache License, Version 2.0 | 
|---|
|  | 6 | (the "License"); you may not use this file except in compliance with | 
|---|
|  | 7 | the License.  You may obtain a copy of the License at | 
|---|
|  | 8 |  | 
|---|
|  | 9 | http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
|  | 10 |  | 
|---|
|  | 11 | Unless required by applicable law or agreed to in writing, software | 
|---|
|  | 12 | distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
|  | 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
|  | 14 | See the License for the specific language governing permissions and | 
|---|
|  | 15 | limitations under the License. | 
|---|
|  | 16 | --%> | 
|---|
|  | 17 | <%@ page | 
|---|
|  | 18 | session="false" | 
|---|
|  | 19 | contentType="text/html; charset=UTF-8" | 
|---|
|  | 20 | pageEncoding="UTF-8" | 
|---|
|  | 21 |  | 
|---|
|  | 22 | import="java.io.*" | 
|---|
|  | 23 | import="java.util.*" | 
|---|
|  | 24 | import="java.net.*" | 
|---|
|  | 25 | import="javax.servlet.http.*" | 
|---|
|  | 26 | import="javax.servlet.*" | 
|---|
|  | 27 |  | 
|---|
|  | 28 | import="org.apache.nutch.html.Entities" | 
|---|
|  | 29 | import="org.apache.nutch.metadata.Nutch" | 
|---|
|  | 30 | import="org.apache.nutch.searcher.*" | 
|---|
|  | 31 | import="org.apache.nutch.plugin.*" | 
|---|
|  | 32 | import="org.apache.nutch.clustering.*" | 
|---|
|  | 33 | import="org.apache.hadoop.conf.*" | 
|---|
|  | 34 | import="org.apache.nutch.util.NutchConfiguration" | 
|---|
|  | 35 | %><%! | 
|---|
|  | 36 | /** | 
|---|
|  | 37 | * Number of hits to retrieve and cluster if clustering extension is available | 
|---|
|  | 38 | * and clustering is on. By default, 100. Configurable via nutch-conf.xml. | 
|---|
|  | 39 | */ | 
|---|
|  | 40 | private int HITS_TO_CLUSTER; | 
|---|
|  | 41 |  | 
|---|
|  | 42 | /** | 
|---|
|  | 43 | * Maximum hits per page to be displayed. | 
|---|
|  | 44 | */ | 
|---|
|  | 45 | private int MAX_HITS_PER_PAGE; | 
|---|
|  | 46 |  | 
|---|
|  | 47 | /** | 
|---|
|  | 48 | * An instance of the clustering extension, if available. | 
|---|
|  | 49 | */ | 
|---|
|  | 50 | private OnlineClusterer clusterer; | 
|---|
|  | 51 |  | 
|---|
|  | 52 | /** | 
|---|
|  | 53 | * Nutch configuration for this servlet. | 
|---|
|  | 54 | */ | 
|---|
|  | 55 | private Configuration nutchConf; | 
|---|
|  | 56 |  | 
|---|
|  | 57 | /** | 
|---|
|  | 58 | * Initialize search bean. | 
|---|
|  | 59 | */ | 
|---|
|  | 60 | public void jspInit() { | 
|---|
|  | 61 | super.jspInit(); | 
|---|
|  | 62 |  | 
|---|
|  | 63 | final ServletContext application = getServletContext(); | 
|---|
|  | 64 | nutchConf = NutchConfiguration.get(application); | 
|---|
|  | 65 | HITS_TO_CLUSTER = nutchConf.getInt("extension.clustering.hits-to-cluster", 100); | 
|---|
|  | 66 | MAX_HITS_PER_PAGE = nutchConf.getInt("searcher.max.hits.per.page", -1); | 
|---|
|  | 67 |  | 
|---|
|  | 68 | try { | 
|---|
|  | 69 | clusterer = new OnlineClustererFactory(nutchConf).getOnlineClusterer(); | 
|---|
|  | 70 | } catch (PluginRuntimeException e) { | 
|---|
|  | 71 | super.log("Could not initialize online clusterer: " + e.toString()); | 
|---|
|  | 72 | } | 
|---|
|  | 73 | } | 
|---|
|  | 74 | %> | 
|---|
|  | 75 |  | 
|---|
|  | 76 | <%-- | 
|---|
|  | 77 | // Uncomment this to enable query refinement. | 
|---|
|  | 78 | // Do the same to "refine-query.jsp" below., | 
|---|
|  | 79 | <%@ include file="./refine-query-init.jsp" %> | 
|---|
|  | 80 | --%> | 
|---|
|  | 81 |  | 
|---|
|  | 82 | <% | 
|---|
|  | 83 | // The Nutch bean instance is initialized through a ServletContextListener | 
|---|
|  | 84 | // that is setup in the web.xml file | 
|---|
|  | 85 | NutchBean bean = NutchBean.get(application, nutchConf); | 
|---|
|  | 86 | // set the character encoding to use when interpreting request values | 
|---|
|  | 87 | request.setCharacterEncoding("UTF-8"); | 
|---|
|  | 88 |  | 
|---|
|  | 89 | bean.LOG.info("query request from " + request.getRemoteAddr()); | 
|---|
|  | 90 |  | 
|---|
|  | 91 | // get query from request | 
|---|
|  | 92 | String queryString = request.getParameter("query"); | 
|---|
|  | 93 | if (queryString == null) | 
|---|
|  | 94 | queryString = ""; | 
|---|
|  | 95 | String htmlQueryString = Entities.encode(queryString); | 
|---|
|  | 96 |  | 
|---|
|  | 97 | // a flag to make the code cleaner a bit. | 
|---|
|  | 98 | boolean clusteringAvailable = (clusterer != null); | 
|---|
|  | 99 |  | 
|---|
|  | 100 | String clustering = ""; | 
|---|
|  | 101 | if (clusteringAvailable && "yes".equals(request.getParameter("clustering"))) | 
|---|
|  | 102 | clustering = "yes"; | 
|---|
|  | 103 |  | 
|---|
|  | 104 | int start = 0;          // first hit to display | 
|---|
|  | 105 | String startString = request.getParameter("start"); | 
|---|
|  | 106 | if (startString != null) | 
|---|
|  | 107 | start = Integer.parseInt(startString); | 
|---|
|  | 108 |  | 
|---|
|  | 109 | int hitsPerPage = 10;          // number of hits to display | 
|---|
|  | 110 | String hitsString = request.getParameter("hitsPerPage"); | 
|---|
|  | 111 | if (hitsString != null) | 
|---|
|  | 112 | hitsPerPage = Integer.parseInt(hitsString); | 
|---|
|  | 113 | if(MAX_HITS_PER_PAGE > 0 && hitsPerPage > MAX_HITS_PER_PAGE) | 
|---|
|  | 114 | hitsPerPage = MAX_HITS_PER_PAGE; | 
|---|
|  | 115 |  | 
|---|
|  | 116 | int hitsPerSite = 2;                            // max hits per site | 
|---|
|  | 117 | String hitsPerSiteString = request.getParameter("hitsPerSite"); | 
|---|
|  | 118 | if (hitsPerSiteString != null) | 
|---|
|  | 119 | hitsPerSite = Integer.parseInt(hitsPerSiteString); | 
|---|
|  | 120 |  | 
|---|
|  | 121 | String sort = request.getParameter("sort"); | 
|---|
|  | 122 | boolean reverse = | 
|---|
|  | 123 | sort!=null && "true".equals(request.getParameter("reverse")); | 
|---|
|  | 124 |  | 
|---|
|  | 125 | String params = "&hitsPerPage="+hitsPerPage | 
|---|
|  | 126 | +(sort==null ? "" : "&sort="+sort+(reverse?"&reverse=true":"")); | 
|---|
|  | 127 |  | 
|---|
|  | 128 | int hitsToCluster = HITS_TO_CLUSTER;            // number of hits to cluster | 
|---|
|  | 129 |  | 
|---|
|  | 130 | // get the lang from request | 
|---|
|  | 131 | String queryLang = request.getParameter("lang"); | 
|---|
|  | 132 | if (queryLang == null) { queryLang = ""; } | 
|---|
|  | 133 | Query query = Query.parse(queryString, queryLang, nutchConf); | 
|---|
|  | 134 | bean.LOG.info("query: " + queryString); | 
|---|
|  | 135 | bean.LOG.info("lang: " + queryLang); | 
|---|
|  | 136 |  | 
|---|
|  | 137 | String language = | 
|---|
|  | 138 | ResourceBundle.getBundle("org.nutch.jsp.search", request.getLocale()) | 
|---|
|  | 139 | .getLocale().getLanguage(); | 
|---|
|  | 140 | String requestURI = HttpUtils.getRequestURL(request).toString(); | 
|---|
|  | 141 | String base = requestURI.substring(0, requestURI.lastIndexOf('/')); | 
|---|
|  | 142 | String rss = "../opensearch?query="+htmlQueryString | 
|---|
|  | 143 | +"&hitsPerSite="+hitsPerSite+"&lang="+queryLang+params; | 
|---|
|  | 144 | %><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> | 
|---|
|  | 145 | <% | 
|---|
|  | 146 | // To prevent the character encoding declared with 'contentType' page | 
|---|
|  | 147 | // directive from being overriden by JSTL (apache i18n), we freeze it | 
|---|
|  | 148 | // by flushing the output buffer. | 
|---|
|  | 149 | // see http://java.sun.com/developer/technicalArticles/Intl/MultilingualJSP/ | 
|---|
|  | 150 | out.flush(); | 
|---|
|  | 151 | %> | 
|---|
|  | 152 | <%@ taglib uri="http://jakarta.apache.org/taglibs/i18n" prefix="i18n" %> | 
|---|
|  | 153 | <i18n:bundle baseName="org.nutch.jsp.search"/> | 
|---|
|  | 154 | <html lang="<%= language %>"> | 
|---|
|  | 155 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | 
|---|
|  | 156 | <head> | 
|---|
|  | 157 | <title>Nutch: <i18n:message key="title"/></title> | 
|---|
|  | 158 | <link rel="icon" href="img/favicon.ico" type="image/x-icon"/> | 
|---|
|  | 159 | <link rel="shortcut icon" href="img/favicon.ico" type="image/x-icon"/> | 
|---|
|  | 160 | <link rel="alternate" type="application/rss+xml" title="RSS" href="<%=rss%>"/> | 
|---|
|  | 161 | <jsp:include page="include/style.html"/> | 
|---|
|  | 162 | <base href="<%= base  + "/" + language %>/"> | 
|---|
|  | 163 | <script type="text/javascript"> | 
|---|
|  | 164 | <!-- | 
|---|
|  | 165 | function queryfocus() { document.search.query.focus(); } | 
|---|
|  | 166 | // --> | 
|---|
|  | 167 | </script> | 
|---|
|  | 168 | </head> | 
|---|
|  | 169 |  | 
|---|
|  | 170 | <body onLoad="queryfocus();"> | 
|---|
|  | 171 |  | 
|---|
|  | 172 | <jsp:include page="<%= language + \"/include/header.html\"%>"/> | 
|---|
|  | 173 |  | 
|---|
|  | 174 | <form name="search" action="../search.jsp" method="get"> | 
|---|
|  | 175 | <input name="query" size=44 value="<%=htmlQueryString%>"> | 
|---|
|  | 176 | <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> | 
|---|
|  | 177 | <input type="hidden" name="lang" value="<%=language%>"> | 
|---|
|  | 178 | <input type="submit" value="<i18n:message key="search"/>"> | 
|---|
|  | 179 | <% if (clusteringAvailable) { %> | 
|---|
|  | 180 | <input id="clustbox" type="checkbox" name="clustering" value="yes" <% if (clustering.equals("yes")) { %>CHECKED<% } %>> | 
|---|
|  | 181 | <label for="clustbox"><i18n:message key="clustering"/></label> | 
|---|
|  | 182 | <% } %> | 
|---|
|  | 183 | <a href="help.html">help</a> | 
|---|
|  | 184 | </form> | 
|---|
|  | 185 |  | 
|---|
|  | 186 | <%-- | 
|---|
|  | 187 | // Uncomment this to enable query refinement. | 
|---|
|  | 188 | // Do the same to "refine-query-init.jsp" above. | 
|---|
|  | 189 | <%@ include file="./refine-query.jsp" %> | 
|---|
|  | 190 | --%> | 
|---|
|  | 191 |  | 
|---|
|  | 192 | <% | 
|---|
|  | 193 | // how many hits to retrieve? if clustering is on and available, | 
|---|
|  | 194 | // take "hitsToCluster", otherwise just get hitsPerPage | 
|---|
|  | 195 | int hitsToRetrieve = (clusteringAvailable && clustering.equals("yes") ? hitsToCluster : hitsPerPage); | 
|---|
|  | 196 |  | 
|---|
|  | 197 | if (clusteringAvailable && clustering.equals("yes")) { | 
|---|
|  | 198 | bean.LOG.info("Clustering is on, hits to retrieve: " + hitsToRetrieve); | 
|---|
|  | 199 | } | 
|---|
|  | 200 |  | 
|---|
|  | 201 | // perform query | 
|---|
|  | 202 | // NOTE by Dawid Weiss: | 
|---|
|  | 203 | // The 'clustering' window actually moves with the start | 
|---|
|  | 204 | // position.... this is good, bad?... ugly?.... | 
|---|
|  | 205 | Hits hits; | 
|---|
|  | 206 | try{ | 
|---|
|  | 207 | hits = bean.search(query, start + hitsToRetrieve, hitsPerSite, "site", | 
|---|
|  | 208 | sort, reverse); | 
|---|
|  | 209 | } catch (IOException e){ | 
|---|
|  | 210 | hits = new Hits(0,new Hit[0]); | 
|---|
|  | 211 | } | 
|---|
|  | 212 | int end = (int)Math.min(hits.getLength(), start + hitsPerPage); | 
|---|
|  | 213 | int length = end-start; | 
|---|
|  | 214 | int realEnd = (int)Math.min(hits.getLength(), start + hitsToRetrieve); | 
|---|
|  | 215 |  | 
|---|
|  | 216 | Hit[] show = hits.getHits(start, realEnd-start); | 
|---|
|  | 217 | HitDetails[] details = bean.getDetails(show); | 
|---|
|  | 218 | Summary[] summaries = bean.getSummary(details, query); | 
|---|
|  | 219 | bean.LOG.info("total hits: " + hits.getTotal()); | 
|---|
|  | 220 | %> | 
|---|
|  | 221 |  | 
|---|
|  | 222 | <i18n:message key="hits"> | 
|---|
|  | 223 | <i18n:messageArg value="<%=new Long((end==0)?0:(start+1))%>"/> | 
|---|
|  | 224 | <i18n:messageArg value="<%=new Long(end)%>"/> | 
|---|
|  | 225 | <i18n:messageArg value="<%=new Long(hits.getTotal())%>"/> | 
|---|
|  | 226 | </i18n:message> | 
|---|
|  | 227 |  | 
|---|
|  | 228 | <% | 
|---|
|  | 229 | // be responsive | 
|---|
|  | 230 | out.flush(); | 
|---|
|  | 231 | %> | 
|---|
|  | 232 |  | 
|---|
|  | 233 | <br><br> | 
|---|
|  | 234 |  | 
|---|
|  | 235 | <% if (clustering.equals("yes") && length != 0) { %> | 
|---|
|  | 236 | <table border=0 cellspacing="3" cellpadding="0"> | 
|---|
|  | 237 |  | 
|---|
|  | 238 | <tr> | 
|---|
|  | 239 |  | 
|---|
|  | 240 | <td valign="top"> | 
|---|
|  | 241 |  | 
|---|
|  | 242 | <% } %> | 
|---|
|  | 243 |  | 
|---|
|  | 244 | <% | 
|---|
|  | 245 | for (int i = 0; i < length; i++) {      // display the hits | 
|---|
|  | 246 | Hit hit = show[i]; | 
|---|
|  | 247 | HitDetails detail = details[i]; | 
|---|
|  | 248 | String title = detail.getValue("title"); | 
|---|
|  | 249 | String url = detail.getValue("url"); | 
|---|
|  | 250 | String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getUniqueKey(); | 
|---|
|  | 251 | String summary = summaries[i].toHtml(true); | 
|---|
|  | 252 | String caching = detail.getValue("cache"); | 
|---|
|  | 253 | boolean showSummary = true; | 
|---|
|  | 254 | boolean showCached = true; | 
|---|
|  | 255 | if (caching != null) { | 
|---|
|  | 256 | showSummary = !caching.equals(Nutch.CACHING_FORBIDDEN_ALL); | 
|---|
|  | 257 | showCached = !caching.equals(Nutch.CACHING_FORBIDDEN_NONE); | 
|---|
|  | 258 | } | 
|---|
|  | 259 |  | 
|---|
|  | 260 | if (title == null || title.equals("")) {      // use url for docs w/o title | 
|---|
|  | 261 | title = url; | 
|---|
|  | 262 | } | 
|---|
|  | 263 | %> | 
|---|
|  | 264 | <b><a href="<%=url%>"><%=Entities.encode(title)%></a></b> | 
|---|
|  | 265 | <%@ include file="more.jsp" %> | 
|---|
|  | 266 | <% if (!"".equals(summary) && showSummary) { %> | 
|---|
|  | 267 | <br><%=summary%> | 
|---|
|  | 268 | <% } %> | 
|---|
|  | 269 | <br> | 
|---|
|  | 270 | <span class="url"><%=Entities.encode(url)%></span> | 
|---|
|  | 271 | <% | 
|---|
|  | 272 | if (showCached) { | 
|---|
|  | 273 | %>(<a href="../cached.jsp?<%=id%>"><i18n:message key="cached"/></a>) <% | 
|---|
|  | 274 | } | 
|---|
|  | 275 | %> | 
|---|
|  | 276 | (<a href="../explain.jsp?<%=id%>&query=<%=URLEncoder.encode(queryString, "UTF-8")%>&lang=<%=queryLang%>"><i18n:message key="explain"/></a>) | 
|---|
|  | 277 | (<a href="../anchors.jsp?<%=id%>"><i18n:message key="anchors"/></a>) | 
|---|
|  | 278 | <% if (hit.moreFromDupExcluded()) { | 
|---|
|  | 279 | String more = | 
|---|
|  | 280 | "query="+URLEncoder.encode("site:"+hit.getDedupValue()+" "+queryString, "UTF8") | 
|---|
|  | 281 | +params+"&hitsPerSite="+0 | 
|---|
|  | 282 | +"&lang="+queryLang | 
|---|
|  | 283 | +"&clustering="+clustering;%> | 
|---|
|  | 284 | (<a href="../search.jsp?<%=more%>"><i18n:message key="moreFrom"/> | 
|---|
|  | 285 | <%=hit.getDedupValue()%></a>) | 
|---|
|  | 286 | <% } %> | 
|---|
|  | 287 | <br><br> | 
|---|
|  | 288 | <% } %> | 
|---|
|  | 289 |  | 
|---|
|  | 290 | <% if (clustering.equals("yes") && length != 0) { %> | 
|---|
|  | 291 |  | 
|---|
|  | 292 | </td> | 
|---|
|  | 293 |  | 
|---|
|  | 294 | <!-- clusters --> | 
|---|
|  | 295 | <td style="border-right: 1px dotted gray;" /> </td> | 
|---|
|  | 296 | <td align="left" valign="top" width="25%"> | 
|---|
|  | 297 | <%@ include file="cluster.jsp" %> | 
|---|
|  | 298 | </td> | 
|---|
|  | 299 |  | 
|---|
|  | 300 | </tr> | 
|---|
|  | 301 | </table> | 
|---|
|  | 302 |  | 
|---|
|  | 303 | <% } %> | 
|---|
|  | 304 |  | 
|---|
|  | 305 | <% | 
|---|
|  | 306 |  | 
|---|
|  | 307 | if ((hits.totalIsExact() && end < hits.getTotal()) // more hits to show | 
|---|
|  | 308 | || (!hits.totalIsExact() && (hits.getLength() > start+hitsPerPage))) { | 
|---|
|  | 309 | %> | 
|---|
|  | 310 | <form name="next" action="../search.jsp" method="get"> | 
|---|
|  | 311 | <input type="hidden" name="query" value="<%=htmlQueryString%>"> | 
|---|
|  | 312 | <input type="hidden" name="lang" value="<%=queryLang%>"> | 
|---|
|  | 313 | <input type="hidden" name="start" value="<%=end%>"> | 
|---|
|  | 314 | <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> | 
|---|
|  | 315 | <input type="hidden" name="hitsPerSite" value="<%=hitsPerSite%>"> | 
|---|
|  | 316 | <input type="hidden" name="clustering" value="<%=clustering%>"> | 
|---|
|  | 317 | <input type="submit" value="<i18n:message key="next"/>"> | 
|---|
|  | 318 | <% if (sort != null) { %> | 
|---|
|  | 319 | <input type="hidden" name="sort" value="<%=sort%>"> | 
|---|
|  | 320 | <input type="hidden" name="reverse" value="<%=reverse%>"> | 
|---|
|  | 321 | <% } %> | 
|---|
|  | 322 | </form> | 
|---|
|  | 323 | <% | 
|---|
|  | 324 | } | 
|---|
|  | 325 |  | 
|---|
|  | 326 | if ((!hits.totalIsExact() && (hits.getLength() <= start+hitsPerPage))) { | 
|---|
|  | 327 | %> | 
|---|
|  | 328 | <form name="showAllHits" action="../search.jsp" method="get"> | 
|---|
|  | 329 | <input type="hidden" name="query" value="<%=htmlQueryString%>"> | 
|---|
|  | 330 | <input type="hidden" name="lang" value="<%=queryLang%>"> | 
|---|
|  | 331 | <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> | 
|---|
|  | 332 | <input type="hidden" name="hitsPerSite" value="0"> | 
|---|
|  | 333 | <input type="hidden" name="clustering" value="<%=clustering%>"> | 
|---|
|  | 334 | <input type="submit" value="<i18n:message key="showAllHits"/>"> | 
|---|
|  | 335 | <% if (sort != null) { %> | 
|---|
|  | 336 | <input type="hidden" name="sort" value="<%=sort%>"> | 
|---|
|  | 337 | <input type="hidden" name="reverse" value="<%=reverse%>"> | 
|---|
|  | 338 | <% } %> | 
|---|
|  | 339 | </form> | 
|---|
|  | 340 | <% | 
|---|
|  | 341 | } | 
|---|
|  | 342 | %> | 
|---|
|  | 343 |  | 
|---|
|  | 344 | <table bgcolor="3333ff" align="right"> | 
|---|
|  | 345 | <tr><td bgcolor="ff9900"><a href="<%=rss%>"><font color="ffffff"><b>RSS</b> | 
|---|
|  | 346 | </font></a></td></tr> | 
|---|
|  | 347 | </table> | 
|---|
|  | 348 |  | 
|---|
|  | 349 | <p> | 
|---|
|  | 350 | <a href="http://wiki.apache.org/nutch/FAQ"> | 
|---|
|  | 351 | <img border="0" src="../img/poweredbynutch_01.gif"> | 
|---|
|  | 352 | </a> | 
|---|
|  | 353 |  | 
|---|
|  | 354 | <jsp:include page="/include/footer.html"/> | 
|---|
|  | 355 |  | 
|---|
|  | 356 | </body> | 
|---|
|  | 357 | </html> | 
|---|