升级了一下 可以选择起始页面的…… 一共100多页 可以都一次性爬了 有会异步或者线程池的大佬可以指教一下 怎么更快速的下载!
[Python] 纯文本查看 复制代码 import requests
from lxml import etree
import os
if __name__ == "__main__":
headers = {
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Mobile Safari/537.36'
}
url = 'https://wallhaven.cc/toplist?page=%d'
#指定起始页面
page_start = int(input('输入起始页码:'))
page_end = int(input('输入结束页码:'))
if not os.path.exists('./wallhaven热图'):
os.mkdir('./wallhaven热图')
for page in range(page_start,page_end+1):
print('正在下载第%d页图片'%page)
new_url = format(url % page)
page_text = requests.get(url=new_url,headers=headers).text
tree = etree.HTML(page_text)
li_list = tree.xpath('//*[@id="thumbs"]/section/ul/li')
for li in li_list:
detail_url = li.xpath('./figure/a/@href')[0]
detail_page_text = requests.get(url=detail_url,headers=headers).text
tree2 = etree.HTML(detail_page_text)
img_src = tree2.xpath('//*[@id="wallpaper"]/@src')[0]
img_name = tree2.xpath('//*[@id="wallpaper"]/@src')[0].split('/')[-1]
img_path = 'wallhaven热图/'+img_name
img_data = requests.get(url=img_src,headers=headers).content
with open(img_path,'wb') as fp:
fp.write(img_data)
print(img_name,'下载成功!') |