| 1 | <%-- | 
|---|
| 2 | Licensed to the Apache Software Foundation (ASF) under one or more | 
|---|
| 3 | contributor license agreements.  See the NOTICE file distributed with | 
|---|
| 4 | this work for additional information regarding copyright ownership. | 
|---|
| 5 | The ASF licenses this file to You under the Apache License, Version 2.0 | 
|---|
| 6 | (the "License"); you may not use this file except in compliance with | 
|---|
| 7 | the License.  You may obtain a copy of the License at | 
|---|
| 8 |  | 
|---|
| 9 | http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 10 |  | 
|---|
| 11 | Unless required by applicable law or agreed to in writing, software | 
|---|
| 12 | distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 14 | See the License for the specific language governing permissions and | 
|---|
| 15 | limitations under the License. | 
|---|
| 16 | --%> | 
|---|
| 17 | <%@ page | 
|---|
| 18 | session="false" | 
|---|
| 19 | contentType="text/html; charset=UTF-8" | 
|---|
| 20 | pageEncoding="UTF-8" | 
|---|
| 21 |  | 
|---|
| 22 | import="java.io.*" | 
|---|
| 23 | import="java.util.*" | 
|---|
| 24 | import="java.net.*" | 
|---|
| 25 | import="javax.servlet.http.*" | 
|---|
| 26 | import="javax.servlet.*" | 
|---|
| 27 |  | 
|---|
| 28 | import="org.apache.nutch.html.Entities" | 
|---|
| 29 | import="org.apache.nutch.metadata.Nutch" | 
|---|
| 30 | import="org.apache.nutch.searcher.*" | 
|---|
| 31 | import="org.apache.nutch.plugin.*" | 
|---|
| 32 | import="org.apache.nutch.clustering.*" | 
|---|
| 33 | import="org.apache.hadoop.conf.*" | 
|---|
| 34 | import="org.apache.nutch.util.NutchConfiguration" | 
|---|
| 35 | %><%! | 
|---|
| 36 | /** | 
|---|
| 37 | * Number of hits to retrieve and cluster if clustering extension is available | 
|---|
| 38 | * and clustering is on. By default, 100. Configurable via nutch-conf.xml. | 
|---|
| 39 | */ | 
|---|
| 40 | private int HITS_TO_CLUSTER; | 
|---|
| 41 |  | 
|---|
| 42 | /** | 
|---|
| 43 | * Maximum hits per page to be displayed. | 
|---|
| 44 | */ | 
|---|
| 45 | private int MAX_HITS_PER_PAGE; | 
|---|
| 46 |  | 
|---|
| 47 | /** | 
|---|
| 48 | * An instance of the clustering extension, if available. | 
|---|
| 49 | */ | 
|---|
| 50 | private OnlineClusterer clusterer; | 
|---|
| 51 |  | 
|---|
| 52 | /** | 
|---|
| 53 | * Nutch configuration for this servlet. | 
|---|
| 54 | */ | 
|---|
| 55 | private Configuration nutchConf; | 
|---|
| 56 |  | 
|---|
| 57 | /** | 
|---|
| 58 | * Initialize search bean. | 
|---|
| 59 | */ | 
|---|
| 60 | public void jspInit() { | 
|---|
| 61 | super.jspInit(); | 
|---|
| 62 |  | 
|---|
| 63 | final ServletContext application = getServletContext(); | 
|---|
| 64 | nutchConf = NutchConfiguration.get(application); | 
|---|
| 65 | HITS_TO_CLUSTER = nutchConf.getInt("extension.clustering.hits-to-cluster", 100); | 
|---|
| 66 | MAX_HITS_PER_PAGE = nutchConf.getInt("searcher.max.hits.per.page", -1); | 
|---|
| 67 |  | 
|---|
| 68 | try { | 
|---|
| 69 | clusterer = new OnlineClustererFactory(nutchConf).getOnlineClusterer(); | 
|---|
| 70 | } catch (PluginRuntimeException e) { | 
|---|
| 71 | super.log("Could not initialize online clusterer: " + e.toString()); | 
|---|
| 72 | } | 
|---|
| 73 | } | 
|---|
| 74 | %> | 
|---|
| 75 |  | 
|---|
| 76 | <%-- | 
|---|
| 77 | // Uncomment this to enable query refinement. | 
|---|
| 78 | // Do the same to "refine-query.jsp" below., | 
|---|
| 79 | <%@ include file="./refine-query-init.jsp" %> | 
|---|
| 80 | --%> | 
|---|
| 81 |  | 
|---|
| 82 | <% | 
|---|
| 83 | // The Nutch bean instance is initialized through a ServletContextListener | 
|---|
| 84 | // that is setup in the web.xml file | 
|---|
| 85 | NutchBean bean = NutchBean.get(application, nutchConf); | 
|---|
| 86 | // set the character encoding to use when interpreting request values | 
|---|
| 87 | request.setCharacterEncoding("UTF-8"); | 
|---|
| 88 |  | 
|---|
| 89 | bean.LOG.info("query request from " + request.getRemoteAddr()); | 
|---|
| 90 |  | 
|---|
| 91 | // get query from request | 
|---|
| 92 | String queryString = request.getParameter("query"); | 
|---|
| 93 | if (queryString == null) | 
|---|
| 94 | queryString = ""; | 
|---|
| 95 | String htmlQueryString = Entities.encode(queryString); | 
|---|
| 96 |  | 
|---|
| 97 | // a flag to make the code cleaner a bit. | 
|---|
| 98 | boolean clusteringAvailable = (clusterer != null); | 
|---|
| 99 |  | 
|---|
| 100 | String clustering = ""; | 
|---|
| 101 | if (clusteringAvailable && "yes".equals(request.getParameter("clustering"))) | 
|---|
| 102 | clustering = "yes"; | 
|---|
| 103 |  | 
|---|
| 104 | int start = 0;          // first hit to display | 
|---|
| 105 | String startString = request.getParameter("start"); | 
|---|
| 106 | if (startString != null) | 
|---|
| 107 | start = Integer.parseInt(startString); | 
|---|
| 108 |  | 
|---|
| 109 | int hitsPerPage = 10;          // number of hits to display | 
|---|
| 110 | String hitsString = request.getParameter("hitsPerPage"); | 
|---|
| 111 | if (hitsString != null) | 
|---|
| 112 | hitsPerPage = Integer.parseInt(hitsString); | 
|---|
| 113 | if(MAX_HITS_PER_PAGE > 0 && hitsPerPage > MAX_HITS_PER_PAGE) | 
|---|
| 114 | hitsPerPage = MAX_HITS_PER_PAGE; | 
|---|
| 115 |  | 
|---|
| 116 | int hitsPerSite = 2;                            // max hits per site | 
|---|
| 117 | String hitsPerSiteString = request.getParameter("hitsPerSite"); | 
|---|
| 118 | if (hitsPerSiteString != null) | 
|---|
| 119 | hitsPerSite = Integer.parseInt(hitsPerSiteString); | 
|---|
| 120 |  | 
|---|
| 121 | String sort = request.getParameter("sort"); | 
|---|
| 122 | boolean reverse = | 
|---|
| 123 | sort!=null && "true".equals(request.getParameter("reverse")); | 
|---|
| 124 |  | 
|---|
| 125 | String params = "&hitsPerPage="+hitsPerPage | 
|---|
| 126 | +(sort==null ? "" : "&sort="+sort+(reverse?"&reverse=true":"")); | 
|---|
| 127 |  | 
|---|
| 128 | int hitsToCluster = HITS_TO_CLUSTER;            // number of hits to cluster | 
|---|
| 129 |  | 
|---|
| 130 | // get the lang from request | 
|---|
| 131 | String queryLang = request.getParameter("lang"); | 
|---|
| 132 | if (queryLang == null) { queryLang = ""; } | 
|---|
| 133 | Query query = Query.parse(queryString, queryLang, nutchConf); | 
|---|
| 134 | bean.LOG.info("query: " + queryString); | 
|---|
| 135 | bean.LOG.info("lang: " + queryLang); | 
|---|
| 136 |  | 
|---|
| 137 | String language = | 
|---|
| 138 | ResourceBundle.getBundle("org.nutch.jsp.search", request.getLocale()) | 
|---|
| 139 | .getLocale().getLanguage(); | 
|---|
| 140 | String requestURI = HttpUtils.getRequestURL(request).toString(); | 
|---|
| 141 | String base = requestURI.substring(0, requestURI.lastIndexOf('/')); | 
|---|
| 142 | String rss = "../opensearch?query="+htmlQueryString | 
|---|
| 143 | +"&hitsPerSite="+hitsPerSite+"&lang="+queryLang+params; | 
|---|
| 144 | %><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> | 
|---|
| 145 | <% | 
|---|
| 146 | // To prevent the character encoding declared with 'contentType' page | 
|---|
| 147 | // directive from being overriden by JSTL (apache i18n), we freeze it | 
|---|
| 148 | // by flushing the output buffer. | 
|---|
| 149 | // see http://java.sun.com/developer/technicalArticles/Intl/MultilingualJSP/ | 
|---|
| 150 | out.flush(); | 
|---|
| 151 | %> | 
|---|
| 152 | <%@ taglib uri="http://jakarta.apache.org/taglibs/i18n" prefix="i18n" %> | 
|---|
| 153 | <i18n:bundle baseName="org.nutch.jsp.search"/> | 
|---|
| 154 | <html lang="<%= language %>"> | 
|---|
| 155 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | 
|---|
| 156 | <head> | 
|---|
| 157 | <title>Nutch: <i18n:message key="title"/></title> | 
|---|
| 158 | <link rel="icon" href="img/favicon.ico" type="image/x-icon"/> | 
|---|
| 159 | <link rel="shortcut icon" href="img/favicon.ico" type="image/x-icon"/> | 
|---|
| 160 | <link rel="alternate" type="application/rss+xml" title="RSS" href="<%=rss%>"/> | 
|---|
| 161 | <jsp:include page="include/style.html"/> | 
|---|
| 162 | <base href="<%= base  + "/" + language %>/"> | 
|---|
| 163 | <script type="text/javascript"> | 
|---|
| 164 | <!-- | 
|---|
| 165 | function queryfocus() { document.search.query.focus(); } | 
|---|
| 166 | // --> | 
|---|
| 167 | </script> | 
|---|
| 168 | </head> | 
|---|
| 169 |  | 
|---|
| 170 | <body onLoad="queryfocus();"> | 
|---|
| 171 |  | 
|---|
| 172 | <jsp:include page="<%= language + \"/include/header.html\"%>"/> | 
|---|
| 173 |  | 
|---|
| 174 | <form name="search" action="../search.jsp" method="get"> | 
|---|
| 175 | <input name="query" size=44 value="<%=htmlQueryString%>"> | 
|---|
| 176 | <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> | 
|---|
| 177 | <input type="hidden" name="lang" value="<%=language%>"> | 
|---|
| 178 | <input type="submit" value="<i18n:message key="search"/>"> | 
|---|
| 179 | <% if (clusteringAvailable) { %> | 
|---|
| 180 | <input id="clustbox" type="checkbox" name="clustering" value="yes" <% if (clustering.equals("yes")) { %>CHECKED<% } %>> | 
|---|
| 181 | <label for="clustbox"><i18n:message key="clustering"/></label> | 
|---|
| 182 | <% } %> | 
|---|
| 183 | <a href="help.html">help</a> | 
|---|
| 184 | </form> | 
|---|
| 185 |  | 
|---|
| 186 | <%-- | 
|---|
| 187 | // Uncomment this to enable query refinement. | 
|---|
| 188 | // Do the same to "refine-query-init.jsp" above. | 
|---|
| 189 | <%@ include file="./refine-query.jsp" %> | 
|---|
| 190 | --%> | 
|---|
| 191 |  | 
|---|
| 192 | <% | 
|---|
| 193 | // how many hits to retrieve? if clustering is on and available, | 
|---|
| 194 | // take "hitsToCluster", otherwise just get hitsPerPage | 
|---|
| 195 | int hitsToRetrieve = (clusteringAvailable && clustering.equals("yes") ? hitsToCluster : hitsPerPage); | 
|---|
| 196 |  | 
|---|
| 197 | if (clusteringAvailable && clustering.equals("yes")) { | 
|---|
| 198 | bean.LOG.info("Clustering is on, hits to retrieve: " + hitsToRetrieve); | 
|---|
| 199 | } | 
|---|
| 200 |  | 
|---|
| 201 | // perform query | 
|---|
| 202 | // NOTE by Dawid Weiss: | 
|---|
| 203 | // The 'clustering' window actually moves with the start | 
|---|
| 204 | // position.... this is good, bad?... ugly?.... | 
|---|
| 205 | Hits hits; | 
|---|
| 206 | try{ | 
|---|
| 207 | hits = bean.search(query, start + hitsToRetrieve, hitsPerSite, "site", | 
|---|
| 208 | sort, reverse); | 
|---|
| 209 | } catch (IOException e){ | 
|---|
| 210 | hits = new Hits(0,new Hit[0]); | 
|---|
| 211 | } | 
|---|
| 212 | int end = (int)Math.min(hits.getLength(), start + hitsPerPage); | 
|---|
| 213 | int length = end-start; | 
|---|
| 214 | int realEnd = (int)Math.min(hits.getLength(), start + hitsToRetrieve); | 
|---|
| 215 |  | 
|---|
| 216 | Hit[] show = hits.getHits(start, realEnd-start); | 
|---|
| 217 | HitDetails[] details = bean.getDetails(show); | 
|---|
| 218 | Summary[] summaries = bean.getSummary(details, query); | 
|---|
| 219 | bean.LOG.info("total hits: " + hits.getTotal()); | 
|---|
| 220 | %> | 
|---|
| 221 |  | 
|---|
| 222 | <i18n:message key="hits"> | 
|---|
| 223 | <i18n:messageArg value="<%=new Long((end==0)?0:(start+1))%>"/> | 
|---|
| 224 | <i18n:messageArg value="<%=new Long(end)%>"/> | 
|---|
| 225 | <i18n:messageArg value="<%=new Long(hits.getTotal())%>"/> | 
|---|
| 226 | </i18n:message> | 
|---|
| 227 |  | 
|---|
| 228 | <% | 
|---|
| 229 | // be responsive | 
|---|
| 230 | out.flush(); | 
|---|
| 231 | %> | 
|---|
| 232 |  | 
|---|
| 233 | <br><br> | 
|---|
| 234 |  | 
|---|
| 235 | <% if (clustering.equals("yes") && length != 0) { %> | 
|---|
| 236 | <table border=0 cellspacing="3" cellpadding="0"> | 
|---|
| 237 |  | 
|---|
| 238 | <tr> | 
|---|
| 239 |  | 
|---|
| 240 | <td valign="top"> | 
|---|
| 241 |  | 
|---|
| 242 | <% } %> | 
|---|
| 243 |  | 
|---|
| 244 | <% | 
|---|
| 245 | for (int i = 0; i < length; i++) {      // display the hits | 
|---|
| 246 | Hit hit = show[i]; | 
|---|
| 247 | HitDetails detail = details[i]; | 
|---|
| 248 | String title = detail.getValue("title"); | 
|---|
| 249 | String url = detail.getValue("url"); | 
|---|
| 250 | String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getUniqueKey(); | 
|---|
| 251 | String summary = summaries[i].toHtml(true); | 
|---|
| 252 | String caching = detail.getValue("cache"); | 
|---|
| 253 | boolean showSummary = true; | 
|---|
| 254 | boolean showCached = true; | 
|---|
| 255 | if (caching != null) { | 
|---|
| 256 | showSummary = !caching.equals(Nutch.CACHING_FORBIDDEN_ALL); | 
|---|
| 257 | showCached = !caching.equals(Nutch.CACHING_FORBIDDEN_NONE); | 
|---|
| 258 | } | 
|---|
| 259 |  | 
|---|
| 260 | if (title == null || title.equals("")) {      // use url for docs w/o title | 
|---|
| 261 | title = url; | 
|---|
| 262 | } | 
|---|
| 263 | %> | 
|---|
| 264 | <b><a href="<%=url%>"><%=Entities.encode(title)%></a></b> | 
|---|
| 265 | <%@ include file="more.jsp" %> | 
|---|
| 266 | <% if (!"".equals(summary) && showSummary) { %> | 
|---|
| 267 | <br><%=summary%> | 
|---|
| 268 | <% } %> | 
|---|
| 269 | <br> | 
|---|
| 270 | <span class="url"><%=Entities.encode(url)%></span> | 
|---|
| 271 | <% | 
|---|
| 272 | if (showCached) { | 
|---|
| 273 | %>(<a href="../cached.jsp?<%=id%>"><i18n:message key="cached"/></a>) <% | 
|---|
| 274 | } | 
|---|
| 275 | %> | 
|---|
| 276 | (<a href="../explain.jsp?<%=id%>&query=<%=URLEncoder.encode(queryString, "UTF-8")%>&lang=<%=queryLang%>"><i18n:message key="explain"/></a>) | 
|---|
| 277 | (<a href="../anchors.jsp?<%=id%>"><i18n:message key="anchors"/></a>) | 
|---|
| 278 | <% if (hit.moreFromDupExcluded()) { | 
|---|
| 279 | String more = | 
|---|
| 280 | "query="+URLEncoder.encode("site:"+hit.getDedupValue()+" "+queryString, "UTF8") | 
|---|
| 281 | +params+"&hitsPerSite="+0 | 
|---|
| 282 | +"&lang="+queryLang | 
|---|
| 283 | +"&clustering="+clustering;%> | 
|---|
| 284 | (<a href="../search.jsp?<%=more%>"><i18n:message key="moreFrom"/> | 
|---|
| 285 | <%=hit.getDedupValue()%></a>) | 
|---|
| 286 | <% } %> | 
|---|
| 287 | <br><br> | 
|---|
| 288 | <% } %> | 
|---|
| 289 |  | 
|---|
| 290 | <% if (clustering.equals("yes") && length != 0) { %> | 
|---|
| 291 |  | 
|---|
| 292 | </td> | 
|---|
| 293 |  | 
|---|
| 294 | <!-- clusters --> | 
|---|
| 295 | <td style="border-right: 1px dotted gray;" /> </td> | 
|---|
| 296 | <td align="left" valign="top" width="25%"> | 
|---|
| 297 | <%@ include file="cluster.jsp" %> | 
|---|
| 298 | </td> | 
|---|
| 299 |  | 
|---|
| 300 | </tr> | 
|---|
| 301 | </table> | 
|---|
| 302 |  | 
|---|
| 303 | <% } %> | 
|---|
| 304 |  | 
|---|
| 305 | <% | 
|---|
| 306 |  | 
|---|
| 307 | if ((hits.totalIsExact() && end < hits.getTotal()) // more hits to show | 
|---|
| 308 | || (!hits.totalIsExact() && (hits.getLength() > start+hitsPerPage))) { | 
|---|
| 309 | %> | 
|---|
| 310 | <form name="next" action="../search.jsp" method="get"> | 
|---|
| 311 | <input type="hidden" name="query" value="<%=htmlQueryString%>"> | 
|---|
| 312 | <input type="hidden" name="lang" value="<%=queryLang%>"> | 
|---|
| 313 | <input type="hidden" name="start" value="<%=end%>"> | 
|---|
| 314 | <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> | 
|---|
| 315 | <input type="hidden" name="hitsPerSite" value="<%=hitsPerSite%>"> | 
|---|
| 316 | <input type="hidden" name="clustering" value="<%=clustering%>"> | 
|---|
| 317 | <input type="submit" value="<i18n:message key="next"/>"> | 
|---|
| 318 | <% if (sort != null) { %> | 
|---|
| 319 | <input type="hidden" name="sort" value="<%=sort%>"> | 
|---|
| 320 | <input type="hidden" name="reverse" value="<%=reverse%>"> | 
|---|
| 321 | <% } %> | 
|---|
| 322 | </form> | 
|---|
| 323 | <% | 
|---|
| 324 | } | 
|---|
| 325 |  | 
|---|
| 326 | if ((!hits.totalIsExact() && (hits.getLength() <= start+hitsPerPage))) { | 
|---|
| 327 | %> | 
|---|
| 328 | <form name="showAllHits" action="../search.jsp" method="get"> | 
|---|
| 329 | <input type="hidden" name="query" value="<%=htmlQueryString%>"> | 
|---|
| 330 | <input type="hidden" name="lang" value="<%=queryLang%>"> | 
|---|
| 331 | <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> | 
|---|
| 332 | <input type="hidden" name="hitsPerSite" value="0"> | 
|---|
| 333 | <input type="hidden" name="clustering" value="<%=clustering%>"> | 
|---|
| 334 | <input type="submit" value="<i18n:message key="showAllHits"/>"> | 
|---|
| 335 | <% if (sort != null) { %> | 
|---|
| 336 | <input type="hidden" name="sort" value="<%=sort%>"> | 
|---|
| 337 | <input type="hidden" name="reverse" value="<%=reverse%>"> | 
|---|
| 338 | <% } %> | 
|---|
| 339 | </form> | 
|---|
| 340 | <% | 
|---|
| 341 | } | 
|---|
| 342 | %> | 
|---|
| 343 |  | 
|---|
| 344 | <table bgcolor="3333ff" align="right"> | 
|---|
| 345 | <tr><td bgcolor="ff9900"><a href="<%=rss%>"><font color="ffffff"><b>RSS</b> | 
|---|
| 346 | </font></a></td></tr> | 
|---|
| 347 | </table> | 
|---|
| 348 |  | 
|---|
| 349 | <p> | 
|---|
| 350 | <a href="http://wiki.apache.org/nutch/FAQ"> | 
|---|
| 351 | <img border="0" src="../img/poweredbynutch_01.gif"> | 
|---|
| 352 | </a> | 
|---|
| 353 |  | 
|---|
| 354 | <jsp:include page="/include/footer.html"/> | 
|---|
| 355 |  | 
|---|
| 356 | </body> | 
|---|
| 357 | </html> | 
|---|