Pig 统计web日志ip 访问数量

浏览: 1712

pig脚本

records = load 'access_log.txt' USING PigStorage(' ') as(ip :chararray, v2:chararray,v3:chararray,v4:chararray,v5:chararray,v6:chararray,v7:chararray,v8:chararray);

grouped_records = group records by ip;

describe grouped_records;

ipclicks = foreach grouped_records generate group,COUNT(records);

dump ipclicks;

脚本解释:

USING PigStorage(' ') 每行内容分隔符是空格
as(ip :chararray, v2:chararray,v3:chararray,v4:chararray,v5:chararray,v6:chararray,v7:chararray,v8:chararray)
每行内容有9个字段,都是字符类型
describe grouped_records;的结果是
grouped_records: {group: chararray,records: {(ip: chararray,v2: chararray,v3: chararray,v4: chararray,v5: chararray,v6: chararray,v7: chararray,v8: chararray)}}
group 代表ip,records 代表该ip的所有记录
ipclicks = foreach grouped_records generate group,COUNT(records);
循环分组后的记录, 记录ip 和ip对应的记录数,即可得到每个ip访问次数

web 日志内容:

220.181.108.151 - - [31/Jan/2012:00:02:32 +0800] "GET /home.php?mod=space&uid=158&do=album&view=me&from=space HTTP/1.1" 200 8784 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
208.115.113.82 - - [31/Jan/2012:00:07:54 +0800] "GET /robots.txt HTTP/1.1" 200 582 "-" "Mozilla/5.0 (compatible; Ezooms/1.0; ezooms.bot@gmail.com)"
220.181.94.221 - - [31/Jan/2012:00:09:24 +0800] "GET /home.php?mod=spacecp&ac=pm&op=showmsg&handlekey=showmsg_3&touid=3&pmid=0&daterange=2&pid=398&tid=66 HTTP/1.1" 200 10070 "-" "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)"
112.97.24.243 - - [31/Jan/2012:00:14:48 +0800] "GET /data/cache/style_2_common.css?AZH HTTP/1.1" 200 57752 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:48 +0800] "GET /data/cache/style_2_widthauto.css?AZH HTTP/1.1" 200 1024 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:48 +0800] "GET /data/cache/style_2_forum_forumdisplay.css?AZH HTTP/1.1" 200 11486 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:47 +0800] "GET /forum-58-1.html HTTP/1.1" 200 67288 "http://googleads.g.doubleclick.net/mads/gma?u_audio=1&hl=zh-CN&preqs=83&app_name=2.1.2.iphone.com.cronlygames.fruitlink&prl=2434&u_h=480&u_so=p&net=ed&u_w=320&ptime=8345648&js=afma-sdk-i-v4.1.1&slotname=a14d85daa45532a&platform=iPhone&submodel=iPhone4%2C1&u_sd=2&format=320x50_mb&msid=420318724&output=html&region=mobile_app&u_tz=-480&ex=1&client_sdk=1&askip=3&jsv=1&ms=nggfgZVyqr7fynMZlRDjb_CUekl2d9biK2ETenLFUCv-utKybszxHFS1rjb-R44CDljkS45J75M5GDcbzU58WqQ1eiAv0qYfIgsABBIgQaLSKhSuenmu9pAX9GAmvpDDNp5wabc_Z45ldp4XBm8avXp4tzeTO_xkknIknYiLOVfTT2Bd3UsKiDH0GMGvksEsVoOH3-5X4Q_qPtx_lfTWA8Ok4GN0DV7BJzXWHfDkFqY37XI9x4V4WqfyTaKIIO6kQS6Z2h1_Ga55BAdidONW7-l6kIP9tCF21tuZFJnnYVs-lwej9yfpKIfQKr6y56AeG7Ep5EKQCMmYhe49RbC_1w" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:49 +0800] "GET /static/js/forum.js?AZH HTTP/1.1" 200 15331 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:49 +0800] "GET /static/js/jquery-1.6.js HTTP/1.1" 404 299 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:49 +0800] "GET /popwin_js.php?fid=58 HTTP/1.1" 404 289 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:49 +0800] "GET /static/js/floating-jf.js HTTP/1.1" 404 300 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:49 +0800] "GET /static/js/common.js?AZH HTTP/1.1" 200 63947 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:52 +0800] "GET /static/image/common/folder_common.gif HTTP/1.1" 200 347 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:52 +0800] "GET /static/image/common/logo.png HTTP/1.1" 200 4746 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:52 +0800] "GET /static/image/filetype/image_s.gif HTTP/1.1" 200 338 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:52 +0800] "GET /static/image/common/pin_3.gif HTTP/1.1" 200 189 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:52 +0800] "GET /ads/banner-01.gif HTTP/1.1" 200 34710 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:52 +0800] "GET /static/image/common/pn_post.png HTTP/1.1" 200 2789 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:53 +0800] "GET /static/image/common/folder_new.gif HTTP/1.1" 200 608 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:53 +0800] "GET /static/image/filetype/common.gif HTTP/1.1" 200 90 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
112.97.24.243 - - [31/Jan/2012:00:14:53 +0800] "GET /home.php?mod=misc&ac=sendmail&rand=1327940087 HTTP/1.1" 200 - "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
220.181.108.175 - - [31/Jan/2012:00:16:54 +0800] "GET /home.php?mod=space&uid=203&do=album&view=me&from=space HTTP/1.1" 200 8784 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
220.181.94.221 - - [31/Jan/2012:00:19:15 +0800] "GET /?72 HTTP/1.1" 200 13614 "-" "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)"
218.5.72.173 - - [31/Jan/2012:00:21:39 +0800] "GET /forum.php?mod=viewthread&tid=89 HTTP/1.0" 200 69798 "-" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322)"
65.52.109.151 - - [31/Jan/2012:00:24:47 +0800] "GET /robots.txt HTTP/1.1" 200 582 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
220.181.94.221 - - [31/Jan/2012:00:26:12 +0800] "GET /?67 HTTP/1.1" 200 13354 "-" "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)"
218.205.245.7 - - [31/Jan/2012:00:27:16 +0800] "GET /forum-58-1.html HTTP/1.0" 200 67288 "-" "-"
123.147.244.39 - - [31/Jan/2012:00:29:32 +0800] "GET /data/cache/style_2_common.css?AZH HTTP/1.1" 200 57752 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:32 +0800] "GET /data/cache/style_2_forum_forumdisplay.css?AZH HTTP/1.1" 200 11486 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:31 +0800] "GET /forum-58-1.html HTTP/1.1" 200 67288 "-" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:32 +0800] "GET /data/cache/style_2_widthauto.css?AZH HTTP/1.1" 200 1024 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:33 +0800] "GET /static/js/jquery-1.6.js HTTP/1.1" 404 299 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:33 +0800] "GET /static/js/floating-jf.js HTTP/1.1" 404 300 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:33 +0800] "GET /static/js/common.js?AZH HTTP/1.1" 200 63947 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:33 +0800] "GET /popwin_js.php?fid=58 HTTP/1.1" 404 289 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:33 +0800] "GET /static/js/forum.js?AZH HTTP/1.1" 200 15331 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:33 +0800] "GET /home.php?mod=misc&ac=sendmail&rand=1327940971 HTTP/1.1" 200 - "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/common/logo.png HTTP/1.1" 200 4746 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/common/pin_3.gif HTTP/1.1" 200 189 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/common/folder_common.gif HTTP/1.1" 200 347 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/common/pn_post.png HTTP/1.1" 200 2789 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /ads/banner-01.gif HTTP/1.1" 200 34710 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/filetype/image_s.gif HTTP/1.1" 200 338 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/filetype/common.gif HTTP/1.1" 200 90 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/common/fav.gif HTTP/1.1" 200 596 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/common/house.gif HTTP/1.1" 200 538 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/common/qmenu.png HTTP/1.1" 200 1744 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/common/login.gif HTTP/1.1" 200 420 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/common/arw_l.gif HTTP/1.1" 200 844 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"
123.147.244.39 - - [31/Jan/2012:00:29:34 +0800] "GET /static/image/common/folder_new.gif HTTP/1.1" 200 608 "http://f.dataguru.cn/forum-58-1.html" "Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Mobile/9A406"

推荐 0
本文由 策马行空 创作,采用 知识共享署名-相同方式共享 3.0 中国大陆许可协议 进行许可。
转载、引用前需联系作者,并署名作者且注明文章出处。
本站文章版权归原作者及原出处所有 。内容为作者个人观点, 并不代表本站赞同其观点和对其真实性负责。本站是一个个人学习交流的平台,并不用于任何商业目的,如果有任何问题,请及时联系我们,我们将根据著作权人的要求,立即更正或者删除有关内容。本站拥有对此声明的最终解释权。

0 个评论

要回复文章请先登录注册