python爬虫headers处理小工具源码优化（支持浏览器、Charles、Fillder）

Clinton 发表于 2023-4-16 00:27

基于之前的源代码进行优化，（支持浏览器、Charles、Fillder）https://www.52pojie.cn/thread-1144758-1-1.html

```
import tkinter as tk
import tkinter.messagebox

class MyApp(tk.Tk):
def __init__(self):
   super().__init__()
   self.title('header处理工具')
   win_width = self.winfo_screenwidth()
   win_height = self.winfo_screenheight()
   my_width = 800
   my_height = 560
   x = (win_width - my_width) / 2
   y = (win_height - my_height) / 2
   self.geometry("%dx%d+%d+%d" % (my_width, my_height, x, y))
   self.maxsize(my_width, my_height)
   self.minsize(my_width, my_height)
   self.button = tk.Button(self, text='转换', font=('宋体', 12), command=self.update)
   self.button.place(x=380, y=300)
   self.label1 = tk.Label(self, text='原始文本', font=('宋体', 10))
   self.label1.place(x=160, y=5)
   self.text1 = tk.Text(width=50, height=40)
   self.text1.place(x=5, y=30)
   self.label2 = tk.Label(self, text='结果文本', font=('宋体', 10))
   self.label2.place(x=600, y=5)
   self.text2 = tk.Text(width=50, height=40)
   self.text2.place(x=440, y=30)

def update(self):
   if len(self.text1.get('0.0', 'end')) == 1:
         tkinter.messagebox.showinfo('提示', '请先填入内容继续')
   else:
         self.text2.delete('1.0', 'end')
         text = self.text1.get('0.0', 'end').split("\n")
         self.text2.insert('insert', "headers = {\n")
         text =
         for _ in text:
            value = ''
            if '\t' in _:
               key_, value_ = _.split('\t', 1)
               value = f"'{key_}': '{value_}'"
            elif ':' in _ and _ != ':' and '\t' not in _:
               key_, value_ = _.split(':', 1)
               value = f"'{key_}': '{value_.replace(' ', '', 1)}'"
            elif ':' in _ and _ == ':' and '\t' not in _:
               if _ == ':':
                     key_, value_ = _.replace(':', '', 1).split(':', 1)
               else:
                     key_, value_ = _.replace(':', '', 1).split(':', 1)
               value = f"'{key_}': '{value_.replace(' ', '', 1)}'"
            if text.index(_) < len(text) - 1 and _ != '' and _ != '\n':
               value = value + ',\n'
            self.text2.insert('insert', value)
         self.text2.insert('insert', "\n}")
         self.text2.update()

def main():
app = MyApp()
app.mainloop()

if __name__ == '__main__':
main()
```

mainblog 发表于 2023-4-16 10:46

爬虫工具库，你值得拥有https://spidertools.cn/

塞北的雪 发表于 2023-4-16 02:08

[ 本帖最后由塞北的雪于 2023-4-16 02:13 编辑 ]\n\n
# coding=utf8
import re
regex = r"^(.+?):\s?(.*)"
test_str = ("Host: static.52pojie.cn\n"
"Connection: keep-alive\n"
"Pragma: no-cache\n"
"Cache-Control: no-cache\n"
"User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36\n"
"DNT: 1\n"
"Accept: */*\n"
"Sec-Fetch-Site: same-site\n"
"Sec-Fetch-Mode: no-cors\n"
"Sec-Fetch-Dest: script\n"
"Referer: https://www.52pojie.cn//\n"
"Accept-Encoding: gzip, deflate, br\n"
"Accept-Language: zh-CN,zh;q=0.9\n"
"Cookie: wzws_sessionid=D4C96D0943C796290377FE9F80432C16")
subst = "\"$1\":\"$2\""
result = re.sub(regex, subst, test_str, 0, re.MULTILINE)
if result:
print (result)

"Host":"static.52pojie.cn"
"Connection":"keep-alive"
"Pragma":"no-cache"
"Cache-Control":"no-cache"
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
"DNT":"1"
"Accept":"*/*"
"Sec-Fetch-Site":"same-site"
"Sec-Fetch-Mode":"no-cors"
"Sec-Fetch-Dest":"script"
"Referer":"https://www.52pojie.cn//"
"Accept-Encoding":"gzip, deflate, br"
"Accept-Language":"zh-CN,zh;q=0.9"
"Cookie":"wzws_sessionid=D4C96D0943C796290377FE9F80432C16"

why3316 发表于 2023-4-16 07:34

谢谢大佬分享，太牛了

gst 发表于 2023-4-16 07:41

常处理headers的有用

梦汐发表于 2023-4-16 08:36

用正则好很多，(.*)\s(.*)

ccskcq 发表于 2023-4-16 09:06

学习python。

n1ghtc4t 发表于 2023-4-16 11:55

支持一下

LaoKuang 发表于 2023-4-16 13:35

感谢分享

Alice27 发表于 2023-4-16 15:26

学习了！感谢分享

页: [1] 2

吾爱破解 - 52pojie.cn's Archiver

python爬虫headers处理小工具源码优化（支持浏览器、Charles、Fillder）