code:从apache日志文件中获取所有的关于google的useragent[shell应用]
tags:shell,cat,awk,sort,uniq
从apache日志文件中获取所有的关于google的useragent
cat [dir]/access_log.2006*| awk '/Google/{sub(".*","",$1); sub(".*","",$2); sub(".*","",$3); sub(".*","",$4); sub(".*","",$5); sub(".*","",$6); sub(".*","",$7); sub(".*","",$8); sub(".*","",$9); sub(".*","",$10); sub(".*","",$11); print ; }'| awk '/Google/'|sort|uniq -c|sort –rn
分析:
cat [dir]/access_log.2006* # 输出所有dir指定的目录下以access_log.2006开头的文件的内容
awk '/Google/ # 过滤出所有包含Google的记录(按行)
{sub(".*","",$1); sub(".*","",$2); sub(".*","",$3); sub(".*","",$4); sub(".*","",$5); sub(".*","",$6); sub(".*","",$7); sub(".*","",$8); sub(".*","",$9); sub(".*","",$10); sub(".*","",$11); print ; }' # 通过 sub 对前11项(按空格进行区分)进行清除 ,print输出剩下的内容。
awk '/Google/' # 过滤出所有包含Google的项
sort|uniq -c # 排序并过滤重复,-c 输出重复次数
sort –rn # 按-n数字进行-r反向排序
数据样式
20060926 – 20061007 关于google的useragent列表
重复次数 useragent
167822 "Feedfetcher-Google; (+http://www.google.com/feedfetcher.html)"
4105 "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
2017 "Mozilla/5.0 (compatible; Google Desktop)"
1933 "Mozilla/4.0 (compatible; Google Desktop)"
257 "Planet Google (
35 "FeedFetcher-Google; (+http://www.google.com/feedfetcher.html)"
13 "Googlebot/2.1+(+http://www.googlebot.com/bot.html)"
12 "Mediapartners-Google/2.1"
10 "Nokia6820/2.0 (4.83) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)"
8 "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:
7 "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; Google Wireless Transcoder;)"
4 "Mozilla/4.0 (compatible; GoogleToolbar 2.0.114-big; Windows XP 5.1)"
2 "GoogleSpider2"
1 "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322; .NET CLR 2.0.50727; Google-TR-4)"
没有评论:
发表评论