可运行程序在dist文件夹下,如下,由于我这里使用的是ubuntu,所以没有显示exe后缀
运行该程序就可以产生一个图形界面,输入相关信息后点击爬取便可爬取图片壁纸,说明一下,程序执行后要等爬取完毕后才可以关闭程序。
所以爬取页数尽量不要太大,当然你也可以结束进程
代码,附源码文件
import tkinter as tk
import time
import requests
import re
from bs4 import BeautifulSoup
def click_button():
x = int(message1.get())
y = int(message2.get())
for page in range(x, y):
print(f"正在爬取第{page}页")
imgtype = message.get()
print(imgtype)
url = f"http://www.netbian.com/{imgtype}/index_{page}.htm"
resp = requests.get(url)
print(resp)
resp.encoding = 'gbk'
resp1 = resp.text
# print(resp1)
obj = re.compile(r'<a href="/desk/(?P<id>.*?).htm" title="(?P<name>.*?)" target', re.S)
alist = obj.finditer(resp1)
for a in alist:
id = a.group("id")
name = a.group("name")
# print(id,name)
url = f'http://www.netbian.com/desk/{id}-1920x1080.htm'
# print(url)
img_resp = requests.get(url)
# print(img_resp.text)
img_resp_text = img_resp.text
# print(img_resp_text)
child_page = BeautifulSoup(img_resp_text, "html.parser")
p = child_page.find("td", align="left")
img = p.find("img")
src = img.get("src")
img_resp = requests.get(src)
''''''
obj1 = re.compile(r'<tr><td align="left">.*?"(?P<Url>.*?)" title', re.S)
url2 = obj1.findall(img_resp_text)
# print(url2)
with open('img//' + f'{imgtype}//' + name + '.jpg', mode="wb") as f:
f.write(img_resp.content)
print("oveer!")
time.sleep(0.1)