4 回答
TA贡献1856条经验 获得超11个赞
试试这个我的朋友
import re
def logs():
logs = []
w = '(?P<host>(?:\d+\.){3}\d+)\s+(?:\S+)\s+(?P<user_name>\S+)\s+\[(?P<time>[-+\w\s:/]+)\]\s+"(?P<request>.+?.+?)"'
with open("assets/logdata.txt", "r") as f:
logdata = f.read()
for m in re.finditer(w, logdata):
logs.append(m.groupdict())
return logs
TA贡献1784条经验 获得超9个赞
请看下面的代码:
import re
regex = re.compile(
r'(?P<host>(?:\d+\.){1,3}\d+)\s+-\s+'
r'(?P<user_name>[\w+\-]+)?\s+'
r'\[(?P<time>[-\w\s:/]+)\]\s+'
r'"(?P<request>\w+.+?)"'
)
def logs():
data = []
with open("assets/logdata.txt", "r") as f:
logdata = f.read()
for item in regex.finditer(logdata):
x = item.groupdict()
if x["user_name"] is None:
x["user_name"] = "-"
data.append(x)
return data
logs()
请在下面找到输出部分:
[{'host': '146.204.224.152', 'user_name': 'feest6811', 'time': '21/Jun/2019:15:45:24 -0700', 'request': 'POST /incentivize HTTP/ 1.1'}, {'主机': '197.109.77.178', '用户名': 'kertzmann3129', '时间': '21/Jun/2019:15:45:25 -0700', '请求': '删除/ virtual/solutions/target/web+services HTTP/2.0'}, {'host': '156.127.178.177', 'user_name': 'okuneva5222', 'time': '21/Jun/2019:15:45:27 -0700', '请求': '删除/interactive/transparent/niches/revolutionize HTTP/1.1'}, {'主机': '100.32.205.59', '用户名': 'ortiz8891', '时间': '21/ Jun/2019:15:45:28 -0700', 'request': 'PATCH /architectures HTTP/1.0'}, {'主机': '168.95.156.240', '用户名': 'stark2413', '时间': '21/Jun/2019:15:45:31 -0700', '请求': 'GET /参与 HTTP/2.0'}, .....] 文本文件的每一行有 979 个字典。
TA贡献1862条经验 获得超6个赞
import re
def logs():
mydata = []
with open("assets/logdata.txt", "r") as file:
logdata = file.read()
pattern="""
(?P<host>.*)
(\s+)
(?:\S+)
(\s+)
(?P<user_name>\S+)
(\s+)
\[(?P<time>.*)\]\
(\s)
(?P<request>"(.)*")"""
for item in re.finditer(pattern,logdata,re.VERBOSE):
new_item = (item.groupdict())
mydata.append(new_item)
return(mydata)
TA贡献1775条经验 获得超11个赞
您正在使用\w
get user_names
,但\w
不包括-
可以在日志中的内容(通用日志格式(CLF)),因此您可以使用\S+
(除空格之外的一个或多个任何内容)作为替代方案。对于time
您可以创建一个捕获组,仅允许该字段的预期字符(类)(例如\w\s
,-+
时区、/
日期和:
时间)用方括号(文字)括起来,可以为request
使用"
.
import re
regex = re.compile(
r'(?P<host>(?:\d+\.){3}\d+)\s+'
r'(?:\S+)\s+'
r'(?P<user_name>\S+)\s+'
r'\[(?P<time>[-+\w\s:/]+)\]\s+'
r'"(?P<request>POST.+?)"'
)
def logs():
data = []
with open("sample.txt", "r") as f:
logdata = f.read()
for m in regex.finditer(logdata):
data.append(m.groupdict())
return data
print(logs())
(将第一行中的 user_name 替换为“-”以在第二行进行测试)
[
{
"host":"146.204.224.152",
"user_name":"feest6811",
"time":"21/Jun/2019:15:45:24 -0700",
"request":"POST /incentivize HTTP/l.l"
},
{
"host":"146.204.224.152",
"user_name":"-",
"time":"21/Jun/2019:15:45:24 -0700",
"request":"POST /incentivize HTTP/l.l"
},
{
"host":"144.23.247.108",
"user_name":"auer7552",
"time":"21/Jun/2019:15:45:35 -0700",
"request":"POST /extensible/infrastructures/one-to-one/enterprise HTTP/l.l"
},
...
添加回答
举报