python爬虫headers处理小工具源码优化(支持浏览器、Charles、Fillder)
基于之前的源代码进行优化,(支持浏览器、Charles、Fillder)https://www.52pojie.cn/thread-1144758-1-1.html```
import tkinter as tk
import tkinter.messagebox
class MyApp(tk.Tk):
def __init__(self):
super().__init__()
self.title('header处理工具')
win_width = self.winfo_screenwidth()
win_height = self.winfo_screenheight()
my_width = 800
my_height = 560
x = (win_width - my_width) / 2
y = (win_height - my_height) / 2
self.geometry("%dx%d+%d+%d" % (my_width, my_height, x, y))
self.maxsize(my_width, my_height)
self.minsize(my_width, my_height)
self.button = tk.Button(self, text='转换', font=('宋体', 12), command=self.update)
self.button.place(x=380, y=300)
self.label1 = tk.Label(self, text='原始文本', font=('宋体', 10))
self.label1.place(x=160, y=5)
self.text1 = tk.Text(width=50, height=40)
self.text1.place(x=5, y=30)
self.label2 = tk.Label(self, text='结果文本', font=('宋体', 10))
self.label2.place(x=600, y=5)
self.text2 = tk.Text(width=50, height=40)
self.text2.place(x=440, y=30)
def update(self):
if len(self.text1.get('0.0', 'end')) == 1:
tkinter.messagebox.showinfo('提示', '请先填入内容继续')
else:
self.text2.delete('1.0', 'end')
text = self.text1.get('0.0', 'end').split("\n")
self.text2.insert('insert', "headers = {\n")
text =
for _ in text:
value = ''
if '\t' in _:
key_, value_ = _.split('\t', 1)
value = f"'{key_}': '{value_}'"
elif ':' in _ and _ != ':' and '\t' not in _:
key_, value_ = _.split(':', 1)
value = f"'{key_}': '{value_.replace(' ', '', 1)}'"
elif ':' in _ and _ == ':' and '\t' not in _:
if _ == ':':
key_, value_ = _.replace(':', '', 1).split(':', 1)
else:
key_, value_ = _.replace(':', '', 1).split(':', 1)
value = f"'{key_}': '{value_.replace(' ', '', 1)}'"
if text.index(_) < len(text) - 1 and _ != '' and _ != '\n':
value = value + ',\n'
self.text2.insert('insert', value)
self.text2.insert('insert', "\n}")
self.text2.update()
def main():
app = MyApp()
app.mainloop()
if __name__ == '__main__':
main()
``` 爬虫工具库,你值得拥有https://spidertools.cn/ [ 本帖最后由 塞北的雪 于 2023-4-16 02:13 编辑 ]\n\n
# coding=utf8
import re
regex = r"^(.+?):\s?(.*)"
test_str = ("Host: static.52pojie.cn\n"
"Connection: keep-alive\n"
"Pragma: no-cache\n"
"Cache-Control: no-cache\n"
"User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36\n"
"DNT: 1\n"
"Accept: */*\n"
"Sec-Fetch-Site: same-site\n"
"Sec-Fetch-Mode: no-cors\n"
"Sec-Fetch-Dest: script\n"
"Referer: https://www.52pojie.cn//\n"
"Accept-Encoding: gzip, deflate, br\n"
"Accept-Language: zh-CN,zh;q=0.9\n"
"Cookie: wzws_sessionid=D4C96D0943C796290377FE9F80432C16")
subst = "\"$1\":\"$2\""
result = re.sub(regex, subst, test_str, 0, re.MULTILINE)
if result:
print (result)
"Host":"static.52pojie.cn"
"Connection":"keep-alive"
"Pragma":"no-cache"
"Cache-Control":"no-cache"
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
"DNT":"1"
"Accept":"*/*"
"Sec-Fetch-Site":"same-site"
"Sec-Fetch-Mode":"no-cors"
"Sec-Fetch-Dest":"script"
"Referer":"https://www.52pojie.cn//"
"Accept-Encoding":"gzip, deflate, br"
"Accept-Language":"zh-CN,zh;q=0.9"
"Cookie":"wzws_sessionid=D4C96D0943C796290377FE9F80432C16"
谢谢大佬分享,太牛了 常处理headers的有用 用正则好很多,(.*)\s(.*) 学习python。 支持一下 感谢分享 学习了!感谢分享
页:
[1]
2