这是Python for Uinux/Linux系统管理里面中一个例子,对最后的if做了修改,简单修改了异常语句以及增加了每个IP的流量统计。目前只能对单个日子文件做统计。
- #!/usr/bin/evn python
- '''
- USEAGE:
- apache_log_split.py some_log_file
- '''
- import sys
- def formatlogline(line):
- split_line=line.split()
- return {'remote_host':split_line[0],
- 'status':split_line[8],
- 'bytes_sent':split_line[9],
- }
- def generate_log_report(logfile):
- report_dict={}
- for line in logfile:
- line_dict=formatlogline(line)
- #print line_dict
- try:
- bytes_send=int(line_dict['bytes_sent'])
- except ValueError:
- continue
- report_dict.setdefault(line_dict['remote_host'],[]).append(bytes_send)
- return report_dict
- if __name__=='__main__':
- if not len(sys.argv) > 1:
- print __doc__
- sys.exit(1)
- infile_name=sys.argv[1]
- try:
- infile=open(infile_name,'r')
- except IOError:
- print "infile_name not found!"
- print __doc__
- sys.exit(1)
- else:
- log_report=generate_log_report(infile)
- #print log_report
- for k in log_report:
- print "IP: %s => byte_total: %s" % (k,sum(log_report[k]))
- infile.close()
这个程序在LOG抓取时有个小BUG,处理后的数据有
{'status': '304', 'bytes_sent': '-', 'remote_host': '192.168.1.1'}
bytes_sent为'-'这个会造成后续的处理错误。解决办法修改函数
formatlogline
- def formatlogline(line):
- split_line=line.split()
- dict_line={'remote_host':split_line[0],
- 'status':split_line[8],
- 'bytes_sent':split_line[9],
- }
- if dict_line['bytes_sent']=='-':
- dict_line['bytes_sent']='0'
- #return dict_line
- return dict_line