| 15 | |
| 16 | == Regular Expression == |
| 17 | |
| 18 | === 利用Regular Expression統計每小時網站瀏覽人次 === |
| 19 | access_log"部份"內容 |
| 20 | {{{ |
| 21 | #!text |
| 22 | 202.160.180.62 - - [01/Dec/2005:10:40:47 +0800] "GET /~ckhung/z/exp/suexec.pl HTTP/1.0" 200 3 "-" "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)" |
| 23 | 140.117.92.69 - - [01/Dec/2005:14:41:36 +0800] "GET /mysql/ireland.gif HTTP/1.1" 304 - "http://penguin.im.cyut.edu.tw/mysql/manual_Installing.html" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)" |
| 24 | 66.249.65.210 - - [01/Dec/2005:18:56:17 +0800] "GET /~imnsa/plus/calendar_scheduler.php?mode=hour&d=1131570000&sid=c1740e65b9c0689bb182f94101633a0b HTTP/1.1" 200 33343 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" |
| 25 | 66.249.65.210 - - [01/Dec/2005:20:47:30 +0800] "GET /~imnsa/plus/calendar_scheduler.php?mode=hour&d=1127426400&sid=2bf2e41933fe9a143879042ec7edb982 HTTP/1.1" 200 33343 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" |
| 26 | 202.108.11.235 - - [01/Dec/2005:21:57:55 +0800] "GET /~imnsa/plus/portal.php?month=82 HTTP/1.1" 200 58368 "-" "Baiduspider+(+http://www.baidu.com/search/spider.htm)" |
| 27 | 220.139.212.11 - - [01/Dec/2005:23:12:10 +0800] "GET /~cloudcolors/blog/wp-content/images/bb/css.png HTTP/1.1" 200 299 "http://penguin.im.cyut.edu.tw/~cloudcolors/blog/?p=23" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)" |
| 28 | 66.249.65.210 - - [02/Dec/2005:01:55:53 +0800] "GET /~imnsa/plus/calendar_scheduler.php?mode=hour&d=1129676400&sid=31ec5649ed6354ca26737202d6059152 HTTP/1.1" 200 33977 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" |
| 29 | 66.249.65.210 - - [02/Dec/2005:07:46:36 +0800] "GET /~imnsa/plus/calendar_scheduler.php?mode=hour&d=1137466800&sid=c363ab24791116721ed8fe3014584979 HTTP/1.1" 200 33979 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" |
| 30 | }}} |
| 31 | |
| 32 | * Step1: Regular Expression 擷取"小時"資訊並存到t1 |
| 33 | {{{ |
| 34 | perl -ne 'print "$1\n" if /:([0-9][0-9]):/' < access_log > t1 |
| 35 | }}} |
| 36 | |
| 37 | * Step2: 排序t1並存到t2 |
| 38 | {{{ |
| 39 | sort < t1 > t2 |
| 40 | }}} |
| 41 | |
| 42 | * Step3: 刪除重覆列數並統計出現次數, done! |
| 43 | {{{ |
| 44 | uniq -c < t2 |
| 45 | }}} |
| 46 | |
| 47 | * 一行指令解(不會產生中間暫存檔) |
| 48 | {{{ |
| 49 | perl -ne 'print "$1\n" if /:([0-9][0-9]):/' < access_log | sort |uniq -c |
| 50 | }}} |