Changes between Version 8 and Version 9 of waue/2009/nutch_install


Ignore:
Timestamp:
Apr 24, 2009, 4:06:14 PM (15 years ago)
Author:
waue
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • waue/2009/nutch_install

    v8 v9  
    8484  <name>http.agent.name</name>
    8585  <value>nutch</value>
    86   <description>HTTP 'User-Agent' request header. </description>
     86  <description>HTTP 'User-Agent' request header. </description> 
    8787</property>
    8888<property>
    8989  <name>http.agent.description</name>
    90   <value>nutch-crawl</value>
    91   <description>Further description</description>
    92 </property>
    93 <property>
    94   <name>http.agent.url</name>
    95   <value>localhost</value>
    96   <description>A URL to advertise in the User-Agent header. </description>
     90  <value>MyTest</value>
     91  <description>Further description</description> 
     92</property>
     93<property>
     94  <name>http.agent.url</name> 
     95  <value>localhost</value> 
     96  <description>A URL to advertise in the User-Agent header. </description> 
    9797</property>
    9898<property>
    9999  <name>http.agent.email</name>
    100   <value>user@nchc.org.tw</value>
    101   <description>An email address
    102   </description>
     100  <value>test@test.org.tw</value>
     101  <description>An email address 
     102  </description> 
    103103</property>
    104104<property>
     
    122122   <value>-1</value>
    123123   <description> </description>
    124  </property>
     124 </property> 
    125125 <property>
    126    <name>http.content.limit</name>
     126   <name>http.content.limit</name> 
    127127   <value>-1</value>
    128128 </property>
    129129<property>
    130 <property>
    131130  <name>indexer.mergeFactor</name>
    132131  <value>500</value>
    133   <description>The factor that determines the frequency of Lucene segment merges. </description>
    134 </property>
     132  <description>The factor that determines the frequency of Lucene segment
     133  merges. This must not be less than 2, higher values increase indexing
     134  speed but lead to increased RAM usage, and increase the number of
     135  open file handles (which may lead to "Too many open files" errors).
     136  NOTE: the "segments" here have nothing to do with Nutch segments, they
     137  are a low-level data unit used by Lucene.
     138  </description>
     139</property>
     140
    135141<property>
    136142  <name>indexer.minMergeDocs</name>
    137143  <value>500</value>
    138   <description>This number determines the minimum number of Lucene. </description>
     144  <description>This number determines the minimum number of Lucene
     145  Documents buffered in memory between Lucene segment merges. Larger
     146  values increase indexing speed and increase RAM usage.
     147  </description>
    139148</property>
    140149</configuration>