close
Warning:
Can't synchronize with repository "(default)" (Unsupported version control system "svn": /usr/lib/python2.7/dist-packages/libsvn/_delta.so: failed to map segment from shared object: Cannot allocate memory). Look in the Trac log for more information.
- Timestamp:
-
Jul 15, 2009, 3:59:30 PM (16 years ago)
- Author:
-
waue
- Comment:
-
--
Legend:
- Unmodified
- Added
- Removed
- Modified
-
|
v1
|
v2
|
|
| 1 | 1 | {{{ |
| 2 | | public void map(LongWritable key, Text value, |
| 3 | | OutputCollector<Text, IntWritable> output, Reporter reporter) |
| 4 | | throws IOException { |
| 5 | | String line = (caseSensitive) ? value.toString() : value.toString() |
| 6 | | .toLowerCase(); |
| | 2 | public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { |
| | 3 | public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { |
| 7 | 4 | |
| 8 | | for (String pattern : patternsToSkip) { |
| 9 | | line = line.replaceAll(pattern, ""); |
| 10 | | } |
| | 5 | while (tokenizer.hasMoreTokens()) { |
| | 6 | private final static IntWritable one = new IntWritable(1); |
| | 7 | private Text word = new Text(); |
| | 8 | output.collect(word, one); |
| 11 | 9 | |
| 12 | | StringTokenizer tokenizer = new StringTokenizer(line); |
| 13 | | while (tokenizer.hasMoreTokens()) { |
| 14 | | word.set(tokenizer.nextToken()); |
| 15 | | output.collect(word, one); |
| 16 | | reporter.incrCounter(Counters.INPUT_WORDS, 1); |
| 17 | | } |
| | 10 | }}} |
| 18 | 11 | |
| 19 | | if ((++numRecords % 100) == 0) { |
| 20 | | reporter.setStatus("Finished processing " + numRecords |
| 21 | | + " records " + "from the input file: " + inputFile); |
| 22 | | } |
| 23 | | } |
| 24 | | } |
| | 12 | {{{ |
| | 13 | public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { |
| | 14 | public void reduce(Text key, Iterator<IntWritable> values,OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { |
| | 15 | int sum = 0; |
| | 16 | while (values.hasNext()) { |
| | 17 | sum += values.next().get(); |
| | 18 | } |
| | 19 | output.collect(key, new IntWritable(sum)); |
| 25 | 20 | |
| 26 | | public static class Reduce extends MapReduceBase implements |
| 27 | | Reducer<Text, IntWritable, Text, IntWritable> { |
| 28 | | public void reduce(Text key, Iterator<IntWritable> values, |
| 29 | | OutputCollector<Text, IntWritable> output, Reporter reporter) |
| 30 | | throws IOException { |
| 31 | | int sum = 0; |
| 32 | | while (values.hasNext()) { |
| 33 | | sum += values.next().get(); |
| 34 | | } |
| 35 | | output.collect(key, new IntWritable(sum)); |
| 36 | | } |
| 37 | | } |
| 38 | 21 | }}} |