= 2013-08-14 =
 * 
{{{
#!html
 register /home/dvryaboy/src/pig/trunk/piggybank.jar;
 DEFINE LogLoader
 org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader();
 DEFINE DayExtractor
 org.apache.pig.piggybank.evaluation.util.apachelogparser.DateExtractor('yyyy-MM-dd');
 
}}}
 *
{{{
#!html
 %default LOGS 'access_log.small'
 %default GEO 'GeoLiteCity.dat'
 
}}}
 * 
{{{
#!html
 #!/usr/bin/env perl
 use warnings;
 use strict;
 use Geo::IP::PurePerl;
my ($path)=shift;
 my $gi = Geo::IP::PurePerl->new($path);
while (<>) {
 chomp;
 if (/([^\t]*)\t(.*)/) {
 my ($ip, $rest) = ($1, $2);
 my ($country_code, undef, $country_name, $region, $city)
 = $gi->get_city_record($ip);
 print join("\t", $country_code||'', $country_name||'',
 $region||'', $city||'', $ip, $rest), "\n";
 }
 }
 
}}}
 *
{{{
#!html
 with_country = STREAM notbots THROUGH `ipwrapper.sh $GEO`
 AS (country_code, country, state, city, ip, time, uri, bytes, userAgent);
geo_uri_groups = GROUP with_country BY country_code;
geo_uri_group_counts = FOREACH geo_uri_groups GENERATE
 group,
 COUNT(with_country) AS cnt,
 SUM(with_country.bytes) AS total_bytes;
geo_uri_group_counts = ORDER geo_uri_group_counts BY cnt DESC;
STORE geo_uri_group_counts INTO 'by_country.tsv';
 
}}}