wiki:jazz/13-08-14

Version 6 (modified by jazz, 11 years ago) (diff)

--

2013-08-14


  • %default LOGS 'access_log.small'
    %default GEO 'GeoLiteCity.dat'

  • #!/usr/bin/env perl
    use warnings;
    use strict;
    use Geo::IP::PurePerl;

    my ($path)=shift;
    my $gi = Geo::IP::PurePerl->new($path);

    while (<>) {
    chomp;
    if (/([^\t]*)\t(.*)/) {
    my ($ip, $rest) = ($1, $2);
    my ($country_code, undef, $country_name, $region, $city)
    = $gi->get_city_record($ip);
    print join("\t", $country_code||'', $country_name||'',
    $region||'', $city||'', $ip, $rest), "\n";
    }
    }


  • #!/usr/bin/env bash
    tar -xzf geo-pack.tgz
    PERL5LIB=$PERL5LIB:$(pwd) ./geostream.pl $1
    *
    with_country = STREAM notbots THROUGH `ipwrapper.sh $GEO`
    AS (country_code, country, state, city, ip, time, uri, bytes, userAgent);

    geo_uri_groups = GROUP with_country BY country_code;

    geo_uri_group_counts = FOREACH geo_uri_groups GENERATE
    group,
    COUNT(with_country) AS cnt,
    SUM(with_country.bytes) AS total_bytes;

    geo_uri_group_counts = ORDER geo_uri_group_counts BY cnt DESC;

    STORE geo_uri_group_counts INTO 'by_country.tsv';