本帖最后由 苏紫方璇 于 2023-8-28 00:37 编辑
[Python] 纯文本查看 复制代码 import urllib.request
from lxml import etree
# [url]https://www.sanguosha.cn/pc/wallpaper.html?type=1[/url]
# [url]https://www.sanguosha.cn/pc/wallpaper.html?type=1&page=2[/url]
# [url]https://www.sanguosha.cn/pc/wallpaper.html?type=2[/url]
# [url]https://www.sanguosha.cn/pc/wallpaper.html?type=2&page=2[/url]
# [url]https://www.sanguosha.cn/pc/wallpaper.html?type=2&page=3[/url]
# [url]https://www.sanguosha.cn/pc/wallpaper.html?type=3[/url]
def create_request(page):
if page == 1:
url = "https://www.sanguosha.cn/pc/wallpaper.html?type=1"
else:
url = "https://www.sanguosha.cn/pc/wallpaper.html?type=1&page=" + str(page)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/115.0.0.0 Safari/537.36"}
request = urllib.request.Request(url=url, headers=headers)
return request
def response_url(request):
response = urllib.request.urlopen(request)
content = response.read().decode("utf-8")
return content
def data_lod(content):
global num
tree = etree.HTML(content)
https_list = tree.xpath("//ul[2]//@data-cross")
name_list = tree.xpath("//ul[2]//@alt")
for i in range(len(name_list)):
base_url = https_list[i]
name = name_list[i]
num += 1
data_url = "https://www.sanguosha.cn" + base_url
urllib.request.urlretrieve(url=data_url, filename="D:/三国杀节气皮肤/" + name+str(num) + ".jpg")
if __name__ == '__main__':
num = 0
begin_page = int(input("请输入起始页码:"))
end_page = int(input("请输入起始结束:"))
for page in range(begin_page, end_page + 1):
request = create_request(page)
content = response_url(request)
data_lod(content) |