| | 41 | * |
| | 42 | {{{ |
| | 43 | #!html |
| | 44 | <br /> DEFINE iplookup `ipwrapper.sh $GEO`<br /> ship ('ipwrapper.sh')<br /> cache('/home/dvryaboy/tmp/$GEO#$GEO');<br /> |
| | 45 | }}} |
| | 46 | |
| | 47 | * |
| | 48 | {{{ |
| | 49 | #!html |
| | 50 | <br /> logs = LOAD '$LOGS' USING LogLoader as<br /> (remoteAddr, remoteLogname, user, time, method,<br /> uri, proto, status, bytes, referer, userAgent);<br /> |
| | 51 | }}} |
| | 52 | |
| | 53 | * |
| | 54 | {{{ |
| | 55 | #!html |
| | 56 | <br /> logs = FILTER logs BY bytes != '-' AND uri matches '/apache.*';</p> |
| | 57 | <p>-- project just the columns we will need<br /> logs = FOREACH logs GENERATE<br /> remoteAddr,<br /> DayExtractor(time) as day, uri, bytes, userAgent;</p> |
| | 58 | <p>-- The filtering function is not actually in the PiggyBank.<br /> -- We plan on contributing it soon.<br /> notbots = FILTER logs BY (NOT<br /> org.apache.pig.piggybank.filtering.IsBotUA(userAgent));<br /> |
| | 59 | }}} |
| | 60 | |