source: nutchez-0.1/tomcat/webapps/ROOT/cached.jsp @ 66

Last change on this file since 66 was 66, checked in by waue, 15 years ago

NutchEz - an easy way to nutch

File size: 3.8 KB
Line 
1<%--
2  Licensed to the Apache Software Foundation (ASF) under one or more
3  contributor license agreements.  See the NOTICE file distributed with
4  this work for additional information regarding copyright ownership.
5  The ASF licenses this file to You under the Apache License, Version 2.0
6  (the "License"); you may not use this file except in compliance with
7  the License.  You may obtain a copy of the License at
8 
9  http://www.apache.org/licenses/LICENSE-2.0
10 
11  Unless required by applicable law or agreed to in writing, software
12  distributed under the License is distributed on an "AS IS" BASIS,
13  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  See the License for the specific language governing permissions and
15  limitations under the License.
16--%>
17<%@ page
18  session="false"
19  contentType="text/html; charset=UTF-8"
20  import="java.io.*"
21  import="java.util.*"
22
23  import="org.apache.nutch.searcher.*"
24  import="org.apache.nutch.parse.ParseData"
25  import="org.apache.nutch.metadata.Metadata"
26  import="org.apache.nutch.metadata.Nutch"
27  import="org.apache.hadoop.conf.Configuration"
28  import="org.apache.nutch.util.NutchConfiguration"
29%><%
30  Configuration nutchConf = NutchConfiguration.get(application);
31  NutchBean bean = NutchBean.get(application, nutchConf);
32  bean.LOG.info("cache request from " + request.getRemoteAddr());
33  Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")),
34                    request.getParameter("id"));
35  HitDetails details = bean.getDetails(hit);
36  String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getUniqueKey();
37
38  String language =
39    ResourceBundle.getBundle("org.nutch.jsp.cached", request.getLocale())
40    .getLocale().getLanguage();
41
42  Metadata metaData = bean.getParseData(details).getContentMeta();
43
44  String content = null;
45  String contentType = (String) metaData.get(Metadata.CONTENT_TYPE);
46  if (contentType.startsWith("text/html")) {
47    // FIXME : it's better to emit the original 'byte' sequence
48    // with 'charset' set to the value of 'CharEncoding',
49    // but I don't know how to emit 'byte sequence' in JSP.
50    // out.getOutputStream().write(bean.getContent(details)) may work,
51    // but I'm not sure.
52    String encoding = (String) metaData.get("CharEncodingForConversion"); 
53    if (encoding != null) {
54      try {
55        content = new String(bean.getContent(details), encoding);
56      }
57      catch (UnsupportedEncodingException e) {
58        // fallback to windows-1252
59        content = new String(bean.getContent(details), "windows-1252");
60      }
61    }
62    else 
63      content = new String(bean.getContent(details));
64  }
65%>
66<!--
67<base href="<%=details.getValue("url")%>">
68-->
69<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
70<%
71  out.flush();
72%>
73<%@ taglib uri="http://jakarta.apache.org/taglibs/i18n" prefix="i18n" %>
74<i18n:bundle baseName="org.nutch.jsp.cached"/>
75<h2 style="{color: rgb(255, 153, 0)}"><i18n:message key="title"/></h2>
76<h3>
77<i18n:message key="page">
78  <i18n:messageArg value="<%=details.getValue(\"url\")%>"/>
79</i18n:message>
80</h3>
81<hr>
82<!--
83   FIXME: have to sanitize 'content' : e.g. removing unncessary part
84        of head elememt
85-->
86<%
87   String caching = details.getValue("cache");
88   String url = details.getValue("url");
89   if (caching != null && !caching.equals(Nutch.CACHING_FORBIDDEN_NONE)) {
90%>
91Display of this content was administratively prohibited by the webmaster.
92You may visit the original page instead: <a href="<%=url%>"><%=url%></a>.
93<%
94     return;
95   }
96%>
97<% if (contentType.startsWith("text/html")) {%>
98
99<% if (content != null && !content.equals("")) {%>
100<%= content %>
101<% } else { %>
102<i18n:message key="noContent"/>
103<% } %>
104
105<% } else { %>
106
107The cached content has mime type "<%=contentType%>",
108click this <a href="./servlet/cached?<%=id%>">link</a> to download it directly.
109
110<% } %>
Note: See TracBrowser for help on using the repository browser.