1 | <%-- |
---|
2 | Licensed to the Apache Software Foundation (ASF) under one or more |
---|
3 | contributor license agreements. See the NOTICE file distributed with |
---|
4 | this work for additional information regarding copyright ownership. |
---|
5 | The ASF licenses this file to You under the Apache License, Version 2.0 |
---|
6 | (the "License"); you may not use this file except in compliance with |
---|
7 | the License. You may obtain a copy of the License at |
---|
8 | |
---|
9 | http://www.apache.org/licenses/LICENSE-2.0 |
---|
10 | |
---|
11 | Unless required by applicable law or agreed to in writing, software |
---|
12 | distributed under the License is distributed on an "AS IS" BASIS, |
---|
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
14 | See the License for the specific language governing permissions and |
---|
15 | limitations under the License. |
---|
16 | --%> |
---|
17 | <%@ page |
---|
18 | session="false" |
---|
19 | contentType="text/html; charset=UTF-8" |
---|
20 | import="java.io.*" |
---|
21 | import="java.util.*" |
---|
22 | |
---|
23 | import="org.apache.nutch.searcher.*" |
---|
24 | import="org.apache.nutch.parse.ParseData" |
---|
25 | import="org.apache.nutch.metadata.Metadata" |
---|
26 | import="org.apache.nutch.metadata.Nutch" |
---|
27 | import="org.apache.hadoop.conf.Configuration" |
---|
28 | import="org.apache.nutch.util.NutchConfiguration" |
---|
29 | %><% |
---|
30 | Configuration nutchConf = NutchConfiguration.get(application); |
---|
31 | NutchBean bean = NutchBean.get(application, nutchConf); |
---|
32 | bean.LOG.info("cache request from " + request.getRemoteAddr()); |
---|
33 | Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), |
---|
34 | request.getParameter("id")); |
---|
35 | HitDetails details = bean.getDetails(hit); |
---|
36 | String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getUniqueKey(); |
---|
37 | |
---|
38 | String language = |
---|
39 | ResourceBundle.getBundle("org.nutch.jsp.cached", request.getLocale()) |
---|
40 | .getLocale().getLanguage(); |
---|
41 | |
---|
42 | Metadata metaData = bean.getParseData(details).getContentMeta(); |
---|
43 | |
---|
44 | String content = null; |
---|
45 | String contentType = (String) metaData.get(Metadata.CONTENT_TYPE); |
---|
46 | if (contentType.startsWith("text/html")) { |
---|
47 | // FIXME : it's better to emit the original 'byte' sequence |
---|
48 | // with 'charset' set to the value of 'CharEncoding', |
---|
49 | // but I don't know how to emit 'byte sequence' in JSP. |
---|
50 | // out.getOutputStream().write(bean.getContent(details)) may work, |
---|
51 | // but I'm not sure. |
---|
52 | String encoding = (String) metaData.get("CharEncodingForConversion"); |
---|
53 | if (encoding != null) { |
---|
54 | try { |
---|
55 | content = new String(bean.getContent(details), encoding); |
---|
56 | } |
---|
57 | catch (UnsupportedEncodingException e) { |
---|
58 | // fallback to windows-1252 |
---|
59 | content = new String(bean.getContent(details), "windows-1252"); |
---|
60 | } |
---|
61 | } |
---|
62 | else |
---|
63 | content = new String(bean.getContent(details)); |
---|
64 | } |
---|
65 | %> |
---|
66 | <!-- |
---|
67 | <base href="<%=details.getValue("url")%>"> |
---|
68 | --> |
---|
69 | <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
---|
70 | <% |
---|
71 | out.flush(); |
---|
72 | %> |
---|
73 | <%@ taglib uri="http://jakarta.apache.org/taglibs/i18n" prefix="i18n" %> |
---|
74 | <i18n:bundle baseName="org.nutch.jsp.cached"/> |
---|
75 | <h2 style="{color: rgb(255, 153, 0)}"><i18n:message key="title"/></h2> |
---|
76 | <h3> |
---|
77 | <i18n:message key="page"> |
---|
78 | <i18n:messageArg value="<%=details.getValue(\"url\")%>"/> |
---|
79 | </i18n:message> |
---|
80 | </h3> |
---|
81 | <hr> |
---|
82 | <!-- |
---|
83 | FIXME: have to sanitize 'content' : e.g. removing unncessary part |
---|
84 | of head elememt |
---|
85 | --> |
---|
86 | <% |
---|
87 | String caching = details.getValue("cache"); |
---|
88 | String url = details.getValue("url"); |
---|
89 | if (caching != null && !caching.equals(Nutch.CACHING_FORBIDDEN_NONE)) { |
---|
90 | %> |
---|
91 | Display of this content was administratively prohibited by the webmaster. |
---|
92 | You may visit the original page instead: <a href="<%=url%>"><%=url%></a>. |
---|
93 | <% |
---|
94 | return; |
---|
95 | } |
---|
96 | %> |
---|
97 | <% if (contentType.startsWith("text/html")) {%> |
---|
98 | |
---|
99 | <% if (content != null && !content.equals("")) {%> |
---|
100 | <%= content %> |
---|
101 | <% } else { %> |
---|
102 | <i18n:message key="noContent"/> |
---|
103 | <% } %> |
---|
104 | |
---|
105 | <% } else { %> |
---|
106 | |
---|
107 | The cached content has mime type "<%=contentType%>", |
---|
108 | click this <a href="./servlet/cached?<%=id%>">link</a> to download it directly. |
---|
109 | |
---|
110 | <% } %> |
---|