python日志收集

发布时间: 2022-04-26 04:26:50

① python处理日志的包有哪些

#coding:utf-8
#file: FileSplit.py

import os,os.path,time

def FileSplit(sourceFile, targetFolder):
sFile = open(sourceFile, 'r')
number = 100000 #每个小文件中保存100000条数据
dataLine = sFile.readline()
tempData = [] #缓存列表
fileNum = 1
if not os.path.isdir(targetFolder): #如果目标目录不存在，则创建
os.mkdir(targetFolder)
while dataLine: #有数据
for row in range(number):
tempData.append(dataLine) #将一行数据添加到列表中
dataLine = sFile.readline()
if not dataLine :
break
tFilename = os.path.join(targetFolder,os.path.split(sourceFile)[1] + str(fileNum) + ".txt")
tFile = open(tFilename, 'a+') #创建小文件
tFile.writelines(tempData) #将列表保存到文件中
tFile.close()
tempData = [] #清空缓存列表
print(tFilename + " 创建于: " + str(time.ctime()))
fileNum += 1 #文件编号

sFile.close()

if __name__ == "__main__" :
FileSplit("access.log","access")
#coding:utf-8
#file: Map.py

import os,os.path,re

def Map(sourceFile, targetFolder):
sFile = open(sourceFile, 'r')
dataLine = sFile.readline()
tempData = {} #缓存列表
if not os.path.isdir(targetFolder): #如果目标目录不存在，则创建
os.mkdir(targetFolder)
while dataLine: #有数据
p_re = re.compile(r'(GET|POST)\s(.*?)\sHTTP/1.[01]',re.IGNORECASE) #用正则表达式解析数据
match = p_re.findall(dataLine)
if match:
visitUrl = match[0][1]
if visitUrl in tempData:
tempData[visitUrl] += 1
else:
tempData[visitUrl] = 1
dataLine = sFile.readline() #读入下一行数据

sFile.close()

tList = []
for key,value in sorted(tempData.items(),key = lambda k:k[1],reverse = True):
tList.append(key + " " + str(value) + '\n')

tFilename = os.path.join(targetFolder,os.path.split(sourceFile)[1] + "_map.txt")
tFile = open(tFilename, 'a+') #创建小文件
tFile.writelines(tList) #将列表保存到文件中
tFile.close()

if __name__ == "__main__" :
Map("access\\access.log1.txt","access")
Map("access\\access.log2.txt","access")
Map("access\\access.log3.txt","access")
#coding:utf-8
#file: Rece.py

import os,os.path,re

def Rece(sourceFolder, targetFile):
tempData = {} #缓存列表
p_re = re.compile(r'(.*?)(\d{1,}$)',re.IGNORECASE) #用正则表达式解析数据
for root,dirs,files in os.walk(sourceFolder):
for fil in files:
if fil.endswith('_map.txt'): #是rece文件
sFile = open(os.path.abspath(os.path.join(root,fil)), 'r')
dataLine = sFile.readline()

while dataLine: #有数据
subdata = p_re.findall(dataLine) #用空格分割数据
#print(subdata[0][0]," ",subdata[0][1])
if subdata[0][0] in tempData:
tempData[subdata[0][0]] += int(subdata[0][1])
else:
tempData[subdata[0][0]] = int(subdata[0][1])
dataLine = sFile.readline() #读入下一行数据

sFile.close()

tList = []
for key,value in sorted(tempData.items(),key = lambda k:k[1],reverse = True):
tList.append(key + " " + str(value) + '\n')

tFilename = os.path.join(sourceFolder,targetFile + "_rece.txt")
tFile = open(tFilename, 'a+') #创建小文件
tFile.writelines(tList) #将列表保存到文件中
tFile.close()

if __name__ == "__main__" :
Rece("access","access")

② 如何用 python 分析网站日志

日志的记录

Python有一个logging模块，可以用来产生日志。
（1）学习资料
http://blog.sina.com.cn/s/blog_4b5039210100f1wv.html

http://blog.donews.com/limodou/archive/2005/02/16/278699.aspx
http://kenby.iteye.com/blog/1162698
http://blog.csdn.NET/fxjtoday/article/details/6307285
前边几篇文章仅仅是其它人的简单学习经验，下边这个链接中的内容比较全面。

http://www.red-dove.com/logging/index.html

（2）我需要关注内容
日志信息输出级别
logging模块提供了多种日志级别，如：NOTSET（0），DEBUG（10），
INFO（20），WARNING（30），WARNING（40），CRITICAL（50）。
设置方法：
logger = getLogger()
logger.serLevel(logging.DEBUG)

日志数据格式
使用Formatter设置日志的输出格式。
设置方法：
logger = getLogger()
handler = loggingFileHandler(XXX)
formatter = logging.Formatter("%(asctime)s %(levelname) %(message)s","%Y-%m-%d,%H:%M:%S")

%(asctime)s表示记录日志写入时间，"%Y-%m-%d,%H:%M:%S“设定了时间的具体写入格式。
%(levelname)s表示记录日志的级别。
%(message)s表示记录日志的具体内容。

日志对象初始化
def initLog():
logger = logging.getLogger()
handler = logging.FileHandler("日志保存路径")
formatter = logging.Formatter("%(asctime)s %(levelname) %(message)s","%Y-%m-%d,%H:%M:%S")
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel

写日志
logging.getLogger().info(), logging.getLogger().debug()......

2. 日志的分析。
（1）我的日志的内容。(log.txt)
2011-12-12,12:11:31 INFO Client1: 4356175.0 1.32366309133e+12 1.32366309134e+12
2011-12-12,12:11:33 INFO Client1: 4361320.0 1.32366309334e+12 1.32366309336e+12
2011-12-12,12:11:33 INFO Client0: 4361320.0 1.32366309389e+12 1.32366309391e+12
2011-12-12,12:11:39 INFO Client1: 4366364.0 1.32366309934e+12 1.32366309936e+12
2011-12-12,12:11:39 INFO Client0: 4366364.0 1.32366309989e+12 1.32366309991e+12
2011-12-12,12:11:43 INFO Client1: 4371416.0 1.32366310334e+12 1.32366310336e+12
2011-12-12,12:11:43 INFO Client0: 4371416.0 1.32366310389e+12 1.32366310391e+12
2011-12-12,12:11:49 INFO Client1: 4376450.0 1.32366310934e+12 1.32366310936e+12
我需要将上述内容逐行读出，并将三个时间戳提取出来，然后将其图形化。

(2) 文件操作以及字符串的分析。
打开文件，读取出一行日志。
file = file("日志路径"，“r”)
while True:
line = file.readline()
if len(len) == 0:
break;
print line
file.close()

从字符串中提取数据。
字符串操作学习资料：

http://reader.you.com/sharelite?itemId=-4646262544179865983&method=viewSharedItemThroughLink&sharedBy=-1137845767117085734
从上面展示出来的日志内容可见，主要数据都是用空格分隔，所以需要使用字符串的
split函数对字符串进行分割：
paraList = line.split(),该函数默认的分割符是空格,返回值为一个list。
paraList[3], paraList[4], paraList[5]中分别以字符串形式存储着我需要的时间戳。

使用float(paraList[3])将字符串转化为浮点数。
（3）将日志图形化。
matplotlib是python的一个绘图库。我打算用它来将日志图形化。
matplotlib学习资料。
matplotlib的下载与安装：
http://yexin218.iteye.com/blog/645894
http://blog.csdn.Net/sharkw/article/details/1924949

对matplotlib的宏观介绍：
http://apps.hi..com/share/detail/21928578
对matplotlib具体使用的详细介绍：

http://blog.sina.com.cn/s/blog_4b5039210100ie6a.html
在matplotlib中设置线条的颜色和形状：
http://blog.csdn.net/kkxgx/article/details/python

如果想对matplotlib有一个全面的了解，就需要阅读教程《Matplotlib for Python developers》,教程下载地址:
http://download.csdn.net/detail/nmgfrank/4006691

使用实例
import matplotlib.pyplot as plt

listX = [] #保存X轴数据
listY = [] #保存Y轴数据
listY1 = [] #保存Y轴数据

file = file("../log.txt","r")#打开日志文件

while True:
line = file.readline()#读取一行日志
if len(line) == 0:#如果到达日志末尾，退出
break
paraList = line.split()
print paraList[2]
print paraList[3]
print paraList[4]
print paraList[5]
if paraList[2] == "Client0:": #在坐标图中添加两个点，它们的X轴数值是相同的
listX.append(float(paraList[3]))
listY.append(float(paraList[5]) - float(paraList[3]))
listY1.append(float(paraList[4]) - float(paraList[3]))

file.close()

plt.plot(listX,listY,'bo-',listX,listY1,'ro')#画图
plt.title('tile')#设置所绘图像的标题
plt.xlabel('time in sec')#设置x轴名称
plt.ylabel('delays in ms'')#设置y轴名称

plt.show()

③ python有没有通用的日志统计系统

logging模块

importlogging
#配置日志，输出到控制台
logging.basicConfig(
level=logging.DEBUG,#日志记录级别
format="[%(asctime)s]%(name)s:%(levelname)s:%(message)s"#日志打印格式
)
#输出日志
logging.debug("Thisisadebug")
logging.info("Thisisaninfo")
logging.warning("Thisisawarning")
logging.error("Thisisanerror")
logging.critical("Thesystemisdown")

④ php 有没有类似 python 的 sentry 日志收集系统

php 有没有类似 python 的 sentry 日志收集系统
phpserialize 可以作为单纯的 Python 扩展件来使用，不过，通常还是经常应用在 Python 编程环境和 PHP 编程环境相互之间需要进行数据交换时。
phpserialize 安装很简单，在下载后，解压，然后 # python setup.py install 即可。
phpserialize 使用起来也很简单。
先导入该库： import phpserialize
利用 mps 进行序列化（变量 -> 格式化文本）： phpserialize.mps(vary)
使用 loads 进行反序列化（格式化文本 -> 变量）：phpserialize.loads(formated_string)

⑤ python里如何提取日志中的错误信息

只要进行提取日志中的错误信息，那么你可以编辑一段程序，然后这样的话才能够完成达到提取的。

⑥ 用python怎么实现自动记日志的功能

功能
[root@skatedb55 ~]# vi op_log_file.py
#!/usr/bin/env python
#-*- coding: utf-8 -*-
#Author：Skate
import os,time
def op_log(log):
f=file(log_file,'a')
date=time.strftime('%Y-%m-%d %H:%M:%S')
record = '%s %s\n' %(date,log)
f.write(record)

⑦ python 读取日志文件

#-*-coding:utf-8-*-


withopen('log.txt','r')asf:
foriinf:
ifdt.strftime(dt.now(),'%Y-%m-%d')ini:
#判断是否当天时间
if'ERROR'iniand'atcom.mytijian'ini:
#判断此行中是否含有'ERROR'及'atcom.mytijian'
if((dt.now()-dt.strptime(i.split(',')[0],'%Y-%m-%d%H:%M:%S')).seconds)<45*60:
#判断时间是为当前45分钟内
printi

⑧ Python记录详细调用堆栈日志的方法

Python记录详细调用堆栈日志的方法
这篇文章主要介绍了Python记录详细调用堆栈日志的方法,涉及Python调用堆栈日志的相关技巧,具有一定参考借鉴价值,需要的朋友可以参考下
import sys
import os
def detailtrace(info):
retStr = ""
curindex=0
f = sys._getframe()
f = f.f_back # first frame is detailtrace, ignore it
while hasattr(f, "f_code"):
co = f.f_code
retStr = "%s(%s:%s)->"%(os.path.basename(co.co_filename),
co.co_name,
f.f_lineno) + retStr
f = f.f_back
print retStr+info
def foo():
detailtrace("hello world")
def bar():
foo()
def main():
bar()
if __name__ == "__main__":
main()

输出：

aaa1.py(<mole>:27)->aaa1.py(main:24)->aaa1.py(bar:21)->aaa1.py(foo:18)->hello world

希望本文所述对大家的Python程序设计有所帮助。

⑨ Python语言扫描日志并统计

修复了一些小的拼写错误
修复了出现无效数据行会出现错误的BUG
修复了最小值统计方法的错误

===================下面开始咯log.py========
# -*- coding: cp936 -*-
#上一句不可以删！表示中文路径是GBK编码
importdatetime
#处理时间的模块
defsparse(target='log.txt') :
tgfile = file(target,"r")
event={}
#event是一个字典，key是事件的编号，value是数据（可以利用嵌套来扩展数据）
linelog = "Not Empty"
whilelinelog:
linelog = tgfile.readline()
data = linelog.split('')
#按空格将一行数据分为列表
# printdata #testing
iflen(data) > 4 : #有效的数据行
time1 = data[2][1:] + '' + data[3][:-1]
#将时间处理为（字符串）：年-月-日小时:分钟:秒
time2 = datetime.datetime.strptime(time1,'%Y-%m-%d %H:%M:%S')
#将时间识别为datetime类
if data[5] == "begin:" and data[6][:2] == "OK" :
#我不知道有没有 requestbegin: fail 这个东西，没有就把后半删掉吧！
ifnotevent.has_key(data[0]) :
#第一次发生某id的事件时初始化数据
event[data[0]]=[[1,time2,0]]
#我设置的value是一个列表，每个元素是一次记录，包括[是否没结束，开始时间，结束时间]。
else :
event[data[0]].append([1,time2,0])
#已经有过记录了就在记录后加一条新记录
ifdata[5] == "end:"anddata[6][:2] == "OK" :
#我想应该没有不出现begin就直接end的事件吧……
event[data[0]][-1][0]=0 #最后一条记录中写入：事件已经结束
event[data[0]][-1][2]=time2 #最后一条记录写入：记录结束时间
#如果还要处理其他的什么情形在这里添加if的判断
tgfile.close()
returnevent

defanalysis(target='log.txt') :
event = sparse(target)
#调用上面定于的sparse方法。其实简单的处理用不着这么做的……单纯为了扩展性
static = {}
#用于统计结果的字典（其key和event中的key相同）
foroneeventinevent :
#每个事件的记录
static[oneevent]=[0,0,0,0,-1]
#初始化每个事件的统计：[成功发生次数，总发生次数，总发生时间，最大发生时间，最小发生时间]
foronerecordinevent[oneevent] :
#每个事件的一次记录
static[oneevent][0] += 1 #总发生次数加一
if onerecord[0] == 0 : #成功事件
static[oneevent][1] += 1
time_delta = onerecord[2] - onerecord[1]
#计算结果是一个timedelta类型
inttimedelta = time_delta.days *24*60*60 + time_delta.seconds
#将时间差转化为以秒计算的整数
if inttimedelta > static[oneevent][3] :
static[oneevent][3] = inttimedelta #统计最大值
if inttimedelta < static[oneevent][4] or static[oneevent][4] < 0 :
static[oneevent][4] = inttimedelta #统计最小值
static[oneevent][2] += inttimedelta
return static

===================下面是log.txt===========
#10.0.0.0[2007-06-1223:27:08]requestbegin:OK
#30.0.0.0[2007-06-1223:28:08]requestbegin:fail
#10.0.0.0[2007-06-1223:37:08]requestbegin:OK
#10.0.0.0[2007-06-1223:37:18]requestforadata:OK
#10.0.0.0[2007-06-1223:37:19]receivedsomedata:OK
#10.0.0.0[2007-06-1300:27:08]requestend:OK
#20.0.0.0[2007-06-1300:37:08]requestbegin:OK
#20.0.0.0[2007-06-1300:47:08]requestend:OK
systemERROR:reboot
Another Invalid Line

#10.0.0.0[2007-06-1323:28:18]requestbegin:OK
#70.0.0.0[2007-06-1323:29:08]requestbegin:OK
#70.0.0.0[2007-06-1323:30:18]requestend:OK
#40.0.0.0[2007-06-1323:33:08]requestbegin:OK
#40.0.0.0[2007-06-1323:35:23]requestend:OK
#40.0.0.0[2007-06-1323:37:08]requestbegin:OK
#40.0.0.0[2007-06-1323:43:38]requestend:OK
#50.0.0.0[2007-06-1323:47:08]requestbegin:OK
#10.0.0.0[2007-06-1323:57:48]requestbegin:OK
#50.0.0.0[2007-06-1323:59:08]requestend:OK

===================下面是使用和输出========
importlog
output = log.analysis()
#或者直接log.analysis()

=============输出============
{'#2': [1, 1, 600, 600, 600], '#1': [4, 1, 3000, 3000, 3000], '#7': [1, 1, 70, 70, 70], '#5': [1, 1, 720, 720, 720], '#4': [2, 2, 525, 390, 135]}

比如#1事件，总次数output['#1'][0]==4次
成功次output['#1'][1]==1次
失败次output['#1'][0]-output['#1'][1]==3次
总时间output['#1'][2]==3000秒
平均时间output['#1'][2]/output['#1'][1]==3000/1==3000秒
最大时间output['#1'][3]==3000秒
最小时间output['#1'][4]==3000秒
共有len(output)==5种ID事件

阅读全文

热点内容

标记c语言发布：2025-09-17 22:28:38 浏览：591

编译原理语义检查发布：2025-09-17 22:26:07 浏览：660

uint哪个配置有行车记录仪发布：2025-09-17 22:15:38 浏览：429

设计php框架发布：2025-09-17 21:50:05 浏览：177

sql统计条数发布：2025-09-17 21:49:51 浏览：708

javastatic和发布：2025-09-17 21:35:35 浏览：365

星星算法发布：2025-09-17 21:34:19 浏览：139

杭州版式文件服务器地址怎么填写发布：2025-09-17 21:17:42 浏览：988

linux的dns怎么配置发布：2025-09-17 21:17:24 浏览：906

如何把安卓的软件放到苹果平板上发布：2025-09-17 21:09:38 浏览：492

python日志收集

与python日志收集相关的资讯