本帖最后由 lihu5841314 于 2021-6-19 18:14 编辑
[Asm] 纯文本查看 复制代码 #爬取4k美图
import requests,os,time
from lxml import etree
import asyncio
import aiofile
from aiohttp import ClientSession
"""
url = "https://desk.zol.com.cn/meinv/yongzhuangmeinv/4096x2160/" #泳装
url2 = "https://desk.zol.com.cn/meinv/qingchunmeinv/4096x2160/" #清纯
url3 = "https://desk.zol.com.cn/meinv/yangyanmeinv/4096x2160/" #养眼
"""
print("可以下载的高清壁纸的类型1**为泳装,2**为清纯,3**为养眼,4**为古典,5**为长腿,6**为制服,7**为日本 ,8**为动漫 输入1 2 3 4 5 6 7 8 9 选择")
dic ={
1:"yongzhuangmeinv",
2:"qingchunmeinv",
3:"yangyanmeinv",
4:"gudianmeinv",
5:"changtuimeinv",
6:"zhifumeinv",
7:"/ribenmeinv",
8:"dongmanmeinv",
}
name = int(input("请选择你需要下载的高清壁纸的类型:"))
name = dic[name]
url = f"https://desk.zol.com.cn/meinv/{name}/"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36",
"Referer": "https://desk.zol.com.cn/"
}
if not os.path.exists("img"):
os.mkdir("img")
#1.获取下一页url
def get_response(url): #用来发送请求,获得响应
resp = requests.get(url=url,headers=headers)
resp.encoding = resp.apparent_encoding
return resp
def get_tree(resp): #解析首页获得下一页url
tree = etree.HTML(resp.text)
li_lis = tree.xpath('//ul[@class="pic-list2 clearfix"]/li')
img_urls = []
for li in li_lis:
img_url ="https://desk.zol.com.cn/"+ li.xpath('./a/@href')[0]
img_urls.append(img_url)
return img_urls
def get_detail(url): #获得4K图片的url
resp = get_response(url)
tree = etree.HTML(resp.text)
li_list = tree.xpath('//ul[@id="showImg"]/li')
img_down_urls = []
try:
for li in li_list:
next_page_url ="https://desk.zol.com.cn/"+ li.xpath('./a/@href')[0]
print("图片下载页**url**解析完成")
resp1 = get_response(next_page_url)
tree1 = etree.HTML(resp1.text)
img_detail_url ="https://desk.zol.com.cn/"+tree1.xpath('//dd[@id="tagfbl"]/a[1]/@href')[0]
print("高清图片**下载地址**解析完成")
resp2 = get_response(img_detail_url)
tree2 = etree.HTML(resp2.text)
img_down_url= tree2.xpath('//body/img[1]/@src')[0]
img_down_urls.append(img_down_url)
return img_down_urls
except IndexError:
print("list index out of range")
async def img_save(url): #图片存储
img_detail_name = url.split("/")[-1]
path = "img/" + img_detail_name
print(img_detail_name, "****协程正在下载****")
async with ClientSession() as session:
async with await session.get(url,headers=headers) as resp:
async with aiofile.async_open(path,"wb") as f:
resp = await resp.read()
await f.write(resp)
print(img_detail_name,"****下载完成****")
def main():
start = time.time()
resp = get_response(url)
img_urls = get_tree(resp)
tasks = []
for u in img_urls:
img_down_urls=get_detail(u)
for img_down_url in img_down_urls:
task =asyncio.ensure_future(img_save(img_down_url))
tasks.append(task)
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print("一共消耗了",end-start)
if __name__ == '__main__':
loop = asyncio.get_event_loop() #创建事件循环
main()
loop.close()
加点品种 提升下速度 [Asm] 纯文本查看 复制代码 #爬取4k美图
import requests,os,time
from lxml import etree
import asyncio
import aiofile
from aiohttp import ClientSession
"""
url = "https://desk.zol.com.cn/meinv/yongzhuangmeinv/4096x2160/" #泳装
url2 = "https://desk.zol.com.cn/meinv/qingchunmeinv/4096x2160/" #清纯
url3 = "https://desk.zol.com.cn/meinv/yangyanmeinv/4096x2160/" #养眼
"""
print("可以下载的高清壁纸的类型1**为泳装,1**为清纯,3**为养眼 输入1 2 3 选择")
dic ={
1:"yongzhuangmeinv",
2:"qingchunmeinv",
3:"yangyanmeinv"
}
name = int(input("请选择你需要下载的高清壁纸的类型:"))
name = dic[name]
url = f"https://desk.zol.com.cn/meinv/{name}/4096x2160/"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36",
"Referer": "https://desk.zol.com.cn/"
}
if not os.path.exists("img"):
os.mkdir("img")
#1.获取下一页url
def get_response(url): #用来发送请求,获得响应
resp = requests.get(url=url,headers=headers)
resp.encoding = resp.apparent_encoding
return resp
def get_tree(resp): #解析首页获得下一页url
tree = etree.HTML(resp.text)
li_lis = tree.xpath('//ul[@class="pic-list2 clearfix"]/li')
img_urls = []
for li in li_lis:
img_url ="https://desk.zol.com.cn/"+ li.xpath('./a/@href')[0]
img_urls.append(img_url)
return img_urls
def get_detail(url): #获得4K图片的url
resp = get_response(url)
tree = etree.HTML(resp.text)
li_list = tree.xpath('//ul[@id="showImg"]/li')
img_down_urls = []
try:
for li in li_list:
next_page_url ="https://desk.zol.com.cn/"+ li.xpath('./a/@href')[0]
print("图片下载页**url**解析完成")
resp1 = get_response(next_page_url)
tree1 = etree.HTML(resp1.text)
img_detail_url ="https://desk.zol.com.cn/"+tree1.xpath('//dd[@id="tagfbl"]/a[1]/@href')[0]
print("高清图片**下载地址**解析完成")
resp2 = get_response(img_detail_url)
tree2 = etree.HTML(resp2.text)
img_down_url= tree2.xpath('//body/img[1]/@src')[0]
img_down_urls.append(img_down_url)
return img_down_urls
except IndexError:
print("list index out of range")
async def img_save(url): #图片存储
img_detail_name = url.split("/")[-1]
path = "img/" + img_detail_name
print(img_detail_name, "****协程正在下载****")
async with ClientSession() as session:
async with await session.get(url,headers=headers) as resp:
async with aiofile.async_open(path,"wb") as f:
resp = await resp.read()
await f.write(resp)
print(img_detail_name,"****下载完成****")
def main():
start = time.time()
resp = get_response(url)
img_urls = get_tree(resp)
tasks = []
for u in img_urls:
img_down_urls=get_detail(u)
for img_down_url in img_down_urls:
task =asyncio.ensure_future(img_save(img_down_url))
tasks.append(task)
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print("一共消耗了",end-start)
if __name__ == '__main__':
loop = asyncio.get_event_loop() #创建事件循环
main()
loop.close()
异步协程速度快些 [Asm] 纯文本查看 复制代码 #爬取4k美图
import requests,os,time
from lxml import etree
from multiprocessing.dummy import Pool
"""
url = "https://desk.zol.com.cn/meinv/yongzhuangmeinv/4096x2160/" #泳装
url2 = "https://desk.zol.com.cn/meinv/qingchunmeinv/4096x2160/" #清纯
url3 = "https://desk.zol.com.cn/meinv/yangyanmeinv/4096x2160/" #养眼
"""
print("可以下载的高清壁纸的类型1**为泳装,1**为清纯,3**为养眼 输入1 2 3 选择")
dic ={
1:"yongzhuangmeinv",
2:"qingchunmeinv",
3:"yangyanmeinv"
}
name = int(input("请选择你需要下载的高清壁纸的类型:"))
name = dic[name]
url = f"https://desk.zol.com.cn/meinv/{name}/4096x2160/"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36",
"Referer": "https://desk.zol.com.cn/"
}
if not os.path.exists("img"):
os.mkdir("img")
#1.获取下一页url
def get_response(url): #用来发送请求,获得响应
resp = requests.get(url=url,headers=headers)
resp.encoding = resp.apparent_encoding
return resp
def get_tree(resp): #解析首页获得下一页url
tree = etree.HTML(resp.text)
li_lis = tree.xpath('//ul[@class="pic-list2 clearfix"]/li')
img_urls = []
for li in li_lis:
img_url ="https://desk.zol.com.cn/"+ li.xpath('./a/@href')[0]
img_urls.append(img_url)
return img_urls
def get_detail(url): #获得4K图片的url
resp = get_response(url)
tree = etree.HTML(resp.text)
li_list = tree.xpath('//ul[@id="showImg"]/li')
img_down_urls = []
try:
for li in li_list:
next_page_url ="https://desk.zol.com.cn/"+ li.xpath('./a/@href')[0]
print("图片下载页**url**解析完成")
resp1 = get_response(next_page_url)
tree1 = etree.HTML(resp1.text)
img_detail_url ="https://desk.zol.com.cn/"+tree1.xpath('//dd[@id="tagfbl"]/a[1]/@href')[0]
print("高清图片**下载地址**解析完成")
resp2 = get_response(img_detail_url)
tree2 = etree.HTML(resp2.text)
img_down_url= tree2.xpath('//body/img[1]/@src')[0]
img_down_urls.append(img_down_url)
return img_down_urls
except IndexError:
print("list index out of range")
def img_save(url): #图片存储
resp = get_response(url)
img_detail_name = url.split("/")[-1]
path = "img/" +img_detail_name
print(img_detail_name, "****正在下载****")
with open(path,"wb") as f:
f.write(resp.content)
print(img_detail_name,"****下载完成****")
def main():
pool = Pool(4)
start = time.time()
resp = get_response(url)
img_urls = get_tree(resp)
for u in img_urls:
img_down_urls=get_detail(u)
pool.map(img_save,img_down_urls)
pool.close()
pool.join()
end = time.time()
print("一共消耗了",end-start)
if __name__ == '__main__':
main()
封装成EXE的在网盘
链接:https://pan.baidu.com/s/1rw8KZ39dS90hm9HjrnExaA
提取码:1234
复制这段内容后打开百度网盘手机App,操作更方便哦 |