| [66] | 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> | 
|---|
 | 2 | <html><!--This page is automatically generated.  Do not edit!--> | 
|---|
 | 3 | <head> | 
|---|
 | 4 | <META http-equiv="Content-Type" content="text/html; charset=UTF-8"> | 
|---|
 | 5 | <title>Nutch: about</title> | 
|---|
 | 6 | <style type="text/css"> | 
|---|
 | 7 |  | 
|---|
 | 8 | .menuTd { | 
|---|
 | 9 |   background-color: #F9F7F4; | 
|---|
 | 10 |   height: 25px; | 
|---|
 | 11 | } | 
|---|
 | 12 |  | 
|---|
 | 13 | .menuTdhover { | 
|---|
 | 14 |   background-color: #ECE5DC; | 
|---|
 | 15 |   height: 25px; | 
|---|
 | 16 | } | 
|---|
 | 17 |  | 
|---|
 | 18 | .menuEntry { | 
|---|
 | 19 |   font-family: Arial, Helvetica, sans-serif; | 
|---|
 | 20 |   font-size: 12px; | 
|---|
 | 21 |   color: #000000; | 
|---|
 | 22 |   text-decoration: none; | 
|---|
 | 23 | } | 
|---|
 | 24 |  | 
|---|
 | 25 | .body { | 
|---|
 | 26 |   background-color: #F9F7F4; | 
|---|
 | 27 | } | 
|---|
 | 28 |  | 
|---|
 | 29 | .bodytext { | 
|---|
 | 30 |   font-family: Arial, Helvetica, sans-serif; | 
|---|
 | 31 |   font-size: 12px; | 
|---|
 | 32 |   color: #000000; | 
|---|
 | 33 |   text-decoration: none; | 
|---|
 | 34 | } | 
|---|
 | 35 |  | 
|---|
 | 36 | .title { | 
|---|
 | 37 |   font-family: Arial, Helvetica, sans-serif; | 
|---|
 | 38 |   font-size: 26px; | 
|---|
 | 39 |   color: #FF9900; | 
|---|
 | 40 |   text-decoration: none; | 
|---|
 | 41 | } | 
|---|
 | 42 |  | 
|---|
 | 43 | .intro { | 
|---|
 | 44 |   font-family: Arial, Helvetica, sans-serif; | 
|---|
 | 45 |   font-size: 12px; | 
|---|
 | 46 |   color: #FF9900; | 
|---|
 | 47 |   text-decoration: none; | 
|---|
 | 48 | } | 
|---|
 | 49 |  | 
|---|
 | 50 | .orangeTd { | 
|---|
 | 51 |   background-color: #FF9900 | 
|---|
 | 52 | } | 
|---|
 | 53 |  | 
|---|
 | 54 | ul { | 
|---|
 | 55 |   list-style-image: url(../img/reiter/ul.gif) | 
|---|
 | 56 | } | 
|---|
 | 57 |  | 
|---|
 | 58 | h3 { | 
|---|
 | 59 |   font-family: Arial, Helvetica, sans-serif; | 
|---|
 | 60 |   font-size: 16px; | 
|---|
 | 61 |   color: #000000; | 
|---|
 | 62 | } | 
|---|
 | 63 |  | 
|---|
 | 64 | h4 { | 
|---|
 | 65 |   font-family: Arial, Helvetica, sans-serif; | 
|---|
 | 66 |   font-size: 14px; | 
|---|
 | 67 |   color: #000000; | 
|---|
 | 68 | } | 
|---|
 | 69 |  | 
|---|
 | 70 | .url { | 
|---|
 | 71 |   color: #996600; | 
|---|
 | 72 | } | 
|---|
 | 73 |  | 
|---|
 | 74 | .highlight { | 
|---|
 | 75 |   font-weight: bold; | 
|---|
 | 76 | } | 
|---|
 | 77 |  | 
|---|
 | 78 | .ellipsis { | 
|---|
 | 79 |   font-weight: bold; | 
|---|
 | 80 | } | 
|---|
 | 81 |  | 
|---|
 | 82 | </style> | 
|---|
 | 83 | <link rel="icon" href="../img/favicon.ico" type="image/x-icon"> | 
|---|
 | 84 | <link rel="shortcut icon" href="../img/favicon.ico" type="image/x-icon"> | 
|---|
 | 85 | <script type="text/javascript"> | 
|---|
 | 86 | <!-- | 
|---|
 | 87 | function queryfocus() { | 
|---|
 | 88 |   search = document.search; | 
|---|
 | 89 |   if (search != null) { search.query.focus(); } | 
|---|
 | 90 | } | 
|---|
 | 91 | // --></script> | 
|---|
 | 92 | </head> | 
|---|
 | 93 | <body onLoad="queryfocus();"> | 
|---|
 | 94 | <!--This file is automatically generated.  Do not edit!--> | 
|---|
 | 95 | <table width="635" border="0" cellpadding="0" cellspacing="0"> | 
|---|
 | 96 | <tr> | 
|---|
 | 97 | <td valign="bottom" width="140" rowspan="2"><a href="./"><img src="../img/reiter/logo_nutch.gif" border="0"></a><img src="../img/reiter/spacer_666666.gif" width="140" height="1"></td> | 
|---|
 | 98 | </tr> | 
|---|
 | 99 | <tr> | 
|---|
 | 100 | <td width="495" valign="bottom" align="right"> | 
|---|
 | 101 | <table border="0" cellpadding="0" cellspacing="0" width="495"> | 
|---|
 | 102 | <tr> | 
|---|
 | 103 | <td background="../img/reiter/_bg_reiter.gif" width="400"> </td><td height="28" valign="bottom" width="10"><img src="../img/reiter/reiter_inactive_le1.gif" border="0"></td><td background="../img/reiter/_bg_reiter_inactive.gif" valign="bottom" nowrap="nowrap"><a class="bodytext" href="about.html">About</a></td><td height="28" valign="bottom" width="10"><img src="../img/reiter/reiter_inactive_ri.gif" border="0"></td><td height="28" valign="bottom" width="10"><img src="../img/reiter/reiter_inactive_le.gif" border="0"></td><td background="../img/reiter/_bg_reiter_inactive.gif" valign="bottom" nowrap="nowrap"><a class="bodytext" href="http://wiki.apache.org/nutch/FAQ">FAQ</a></td><td height="28" valign="bottom" width="10"><img src="../img/reiter/reiter_inactive_ri.gif" border="0"></td> | 
|---|
 | 104 | </tr> | 
|---|
 | 105 | </table> | 
|---|
 | 106 | </td> | 
|---|
 | 107 | </tr> | 
|---|
 | 108 | </table> | 
|---|
 | 109 | <table width="635" border="0" cellpadding="0" cellspacing="0"> | 
|---|
 | 110 | <tr valign="top"> | 
|---|
 | 111 | <td width="140"> | 
|---|
 | 112 | <table width="100%" cellpadding="0" cellspacing="0"> | 
|---|
 | 113 | <tr class="menuTd" height="25"> | 
|---|
 | 114 | <td class="menuTd" onmouseover="this.className='menuTdhover';" onmouseout="this.className='menuTd'" width="100%"> :: <a href="http://lucene.apache.org/nutch/credits.html" class="menuEntry">Credits</a></td> | 
|---|
 | 115 | </tr> | 
|---|
 | 116 | <tr height="1px"> | 
|---|
 | 117 | <td><img src="../img/reiter/spacer_666666.gif" height="1" width="100%"></td> | 
|---|
 | 118 | </tr> | 
|---|
 | 119 | <tr class="menuTd" height="25"> | 
|---|
 | 120 | <td class="menuTd" onmouseover="this.className='menuTdhover';" onmouseout="this.className='menuTd'" width="100%"> :: <a href="http://lucene.apache.org/nutch/index.html" class="menuEntry">Status</a></td> | 
|---|
 | 121 | </tr> | 
|---|
 | 122 | <tr height="1px"> | 
|---|
 | 123 | <td><img src="../img/reiter/spacer_666666.gif" height="1" width="100%"></td> | 
|---|
 | 124 | </tr> | 
|---|
 | 125 | <tr> | 
|---|
 | 126 | <td> </td> | 
|---|
 | 127 | </tr> | 
|---|
 | 128 | </table> | 
|---|
 | 129 | </td><td width="20" background="../img/reiter/_spacer_cccccc.gif"> </td><td width="475" class="body"> | 
|---|
 | 130 | <table width="475" border="0" cellpadding="0" cellspacing="0"> | 
|---|
 | 131 | <tr> | 
|---|
 | 132 | <td class="title" height="125" width="275" valign="bottom">about</td><td height="125" width="200" valign="bottom"><img src="../img/reiter/robots.gif"></td> | 
|---|
 | 133 | </tr> | 
|---|
 | 134 | </table> | 
|---|
 | 135 | <br class="br"> | 
|---|
 | 136 | <span class="bodytext"> | 
|---|
 | 137 |  | 
|---|
 | 138 | </span><span class="intro"> | 
|---|
 | 139 | <p>Nutch is a nascent effort to implement an open-source web search | 
|---|
 | 140 | engine.</p> | 
|---|
 | 141 | </span><span class="bodytext"> | 
|---|
 | 142 |  | 
|---|
 | 143 | </span><span class="bodytext"> | 
|---|
 | 144 | <p>Web search is a basic requirement for internet navigation, yet the | 
|---|
 | 145 | number of web search engines is decreasing. Today's oligopoly could | 
|---|
 | 146 | soon be a monopoly, with a single company controlling nearly all web | 
|---|
 | 147 | search for its commercial gain.  That would not be good for users of | 
|---|
 | 148 | the internet.</p> | 
|---|
 | 149 | </span><span class="bodytext"> | 
|---|
 | 150 |  | 
|---|
 | 151 | </span><span class="bodytext"> | 
|---|
 | 152 | <p>Nutch provides a transparent alternative to commercial web search | 
|---|
 | 153 | engines.  Only open source search results can be fully trusted to be | 
|---|
 | 154 | without bias.  (Or at least their bias is public.)  All existing major | 
|---|
 | 155 | search engines have proprietary ranking formulas, and will not explain | 
|---|
 | 156 | why a given page ranks as it does.  Additionally, some search engines | 
|---|
 | 157 | determine which sites to index based on payments, rather than on the | 
|---|
 | 158 | merits of the sites themselves.  Nutch, on the other hand, has nothing | 
|---|
 | 159 | to hide and no motive to bias its results or its crawler in any way | 
|---|
 | 160 | other than to try to give each user the best results possible.</p> | 
|---|
 | 161 | </span><span class="bodytext"> | 
|---|
 | 162 |  | 
|---|
 | 163 | </span><span class="bodytext"> | 
|---|
 | 164 | <p>Nutch aims to enable anyone to easily and cost-effectively deploy a | 
|---|
 | 165 | world-class web search engine.  This is a substantial challenge.  To | 
|---|
 | 166 | succeed, Nutch software must be able to:</p> | 
|---|
 | 167 | </span><span class="bodytext"> | 
|---|
 | 168 | </span><span class="bodytext"> | 
|---|
 | 169 | <ul> | 
|---|
 | 170 |    | 
|---|
 | 171 | <li>fetch several billion pages per month</li> | 
|---|
 | 172 |    | 
|---|
 | 173 | <li>maintain an index of these pages</li> | 
|---|
 | 174 |    | 
|---|
 | 175 | <li>search that index up to 1000 times per second</li> | 
|---|
 | 176 |    | 
|---|
 | 177 | <li>provide very high quality search results</li> | 
|---|
 | 178 |    | 
|---|
 | 179 | <li>operate at minimal cost</li> | 
|---|
 | 180 |  | 
|---|
 | 181 | </ul> | 
|---|
 | 182 | </span><span class="bodytext"> | 
|---|
 | 183 |  | 
|---|
 | 184 | </span><span class="bodytext"> | 
|---|
 | 185 | <p>This is a challenging proposition.  If you believe in the merits of | 
|---|
 | 186 | this project, please help out, either as a <a href="http://www.apache.org/dev/">developer</a> or with a <a href="http://www.apache.org/foundation/contributing.html">donation</a> | 
|---|
 | 187 |  | 
|---|
 | 188 | </p> | 
|---|
 | 189 | </span><span class="bodytext"> | 
|---|
 | 190 |  | 
|---|
 | 191 | </span> | 
|---|
 | 192 | <br class="br"> | 
|---|
 | 193 | <br class="br"> | 
|---|
 | 194 | </td> | 
|---|
 | 195 | </tr> | 
|---|
 | 196 | </table> | 
|---|
 | 197 | <table width="635" border="0" cellpadding="0" cellspacing="0"> | 
|---|
 | 198 |    | 
|---|
 | 199 | <tr> | 
|---|
 | 200 |      | 
|---|
 | 201 | <td width="140"> </td> | 
|---|
 | 202 |     <td width="20"> </td> | 
|---|
 | 203 |     <td width="475" align="center"> | 
|---|
 | 204 |     <span class="bodytext"> | 
|---|
 | 205 |      | 
|---|
 | 206 | <br> | 
|---|
 | 207 |        | 
|---|
 | 208 | <a href="../ca/">ca</a> | | 
|---|
 | 209 |       <a href="../de/">de</a> | | 
|---|
 | 210 |       <a href="../en/">en</a> | | 
|---|
 | 211 |       <a href="../es/">es</a> | | 
|---|
 | 212 |       <a href="../fi/">fi</a> | | 
|---|
 | 213 |       <a href="../fr/">fr</a> | | 
|---|
 | 214 |       <a href="../hu/">hu</a> | | 
|---|
 | 215 |       <a href="../it/">it</a> | | 
|---|
 | 216 |       <a href="../jp/">jp</a> | | 
|---|
 | 217 |       <a href="../ms/">ms</a> | | 
|---|
 | 218 |       <a href="../nl/">nl</a> | | 
|---|
 | 219 |       <a href="../pl/">pl</a> | | 
|---|
 | 220 |       <a href="../pt/">pt</a> | | 
|---|
 | 221 |       <a href="../sh/">sh</a> | | 
|---|
 | 222 |       <a href="../sr/">sr</a> | | 
|---|
 | 223 |       <a href="../sv/">sv</a> | | 
|---|
 | 224 |       <a href="../th/">th</a> | | 
|---|
 | 225 |       <a href="../zh/">zh</a> | 
|---|
 | 226 |     </span> | 
|---|
 | 227 |     </td> | 
|---|
 | 228 |    | 
|---|
 | 229 | </tr> | 
|---|
 | 230 |    | 
|---|
 | 231 | <tr> | 
|---|
 | 232 |    | 
|---|
 | 233 | </tr> | 
|---|
 | 234 |  | 
|---|
 | 235 | </table> | 
|---|
 | 236 | </body> | 
|---|
 | 237 | </html> | 
|---|