| 41 | * |
| 42 | {{{ |
| 43 | #!html |
| 44 | <br /> DEFINE iplookup `ipwrapper.sh $GEO`<br /> ship ('ipwrapper.sh')<br /> cache('/home/dvryaboy/tmp/$GEO#$GEO');<br /> |
| 45 | }}} |
| 46 | |
| 47 | * |
| 48 | {{{ |
| 49 | #!html |
| 50 | <br /> logs = LOAD '$LOGS' USING LogLoader as<br /> (remoteAddr, remoteLogname, user, time, method,<br /> uri, proto, status, bytes, referer, userAgent);<br /> |
| 51 | }}} |
| 52 | |
| 53 | * |
| 54 | {{{ |
| 55 | #!html |
| 56 | <br /> logs = FILTER logs BY bytes != '-' AND uri matches '/apache.*';</p> |
| 57 | <p>-- project just the columns we will need<br /> logs = FOREACH logs GENERATE<br /> remoteAddr,<br /> DayExtractor(time) as day, uri, bytes, userAgent;</p> |
| 58 | <p>-- The filtering function is not actually in the PiggyBank.<br /> -- We plan on contributing it soon.<br /> notbots = FILTER logs BY (NOT<br /> org.apache.pig.piggybank.filtering.IsBotUA(userAgent));<br /> |
| 59 | }}} |
| 60 | |