本帖最后由 lihu5841314 于 2021-6-13 10:04 编辑
[Asm] 纯文本查看 复制代码 import requests
from multiprocessing.dummy import Pool
import os
from lxml import etree
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"Referer":"https://www.vmgirls.com/"
}
if not os.path.exists("./mv_img"):
os.mkdir("./mv_img")
def resp(url):
rep = requests.get(url=url,headers=headers)
rep.encoding = rep.apparent_encoding
tree = etree.HTML(rep.text)
return tree
def img_detail(tree):
lis = tree.xpath('//div[@class="container"]/div/div')
# print(len(lis)) 13条数据 不知道怎么只能看到10条 以至于下面for循环报越界错误
urls = []
for li in lis:
try:
next_url = 'https://www.vmgirls.com' + li.xpath('.//a[@class="media-content"]/@href')[0]
title = li.xpath('.//a[@class="media-content"]/@title')[0]
urls.append(next_url)
# print(next_url)
except:
print('第{0}条数据处理失败'.format(li))
# print(urls)
return urls
def img_dow(url):
tree = resp(url)
a_lis = tree.xpath('//div[@class="nc-light-gallery"]/a')
i=0
for a in a_lis:
img_down_url ='https:'+ a.xpath('./@href')[0] # url 中混合了无用的url 暂时用try except处理 有好方法留言
img_name =a.xpath('./@title')[0]
try:
i += 1 #不加个序号 图片会重名
path = "./mv_img/" + img_name+ str(i) +".webp" #没有安装处理图片的库 暂不处理
rep1 = requests.get(url=img_down_url, headers=headers).content
with open(path, "wb") as pf:
print(img_name + str(i),"图片正在下载")
pf.write(rep1)
print(img_name + str(i), "图片下载完成")
except:
print('第{0}条数据处理失败'.format(url))
if __name__ == '__main__':
pool = Pool(4)
# 开始页
dic ={
"小姐姐":"beauty",
"少女情怀":"bilitis" ,
"轻私房":"urllittlesex",
}
print("请选择你喜欢的风格(小姐姐,少女情怀,轻私房)")
fen = input("请输入你喜欢的风格:")
fenge =dic[fen]
url = f'https://www.vmgirls.com/special/{fenge}/'
tree = resp(url)
urls = img_detail(tree)
img = pool.map(img_dow,urls)
pool.close()
pool.join() [Asm] 纯文本查看 复制代码 import requests
from multiprocessing.dummy import Pool
import os
from lxml import etree
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"Referer":"https://www.vmgirls.com/"
}
if not os.path.exists("./mv_img"):
os.mkdir("./mv_img")
def resp(url):
rep = requests.get(url=url,headers=headers)
rep.encoding = rep.apparent_encoding
tree = etree.HTML(rep.text)
return tree
def img_detail(tree):
lis = tree.xpath('//div[@class="container"]/div/div')
# print(len(lis)) 13条数据 不知道怎么只能看到10条 以至于下面for循环报越界错误
urls = []
for li in lis:
try:
next_url = 'https://www.vmgirls.com' + li.xpath('.//a[@class="media-content"]/@href')[0]
title = li.xpath('.//a[@class="media-content"]/@title')[0]
urls.append(next_url)
# print(next_url)
except:
print('第{0}条数据处理失败'.format(li))
# print(urls)
return urls
def img_dow(url):
tree = resp(url)
a_lis = tree.xpath('//div[@class="nc-light-gallery"]/a')
for a in a_lis:
img_down_url ='https:'+ a.xpath('./@href')[0] # url 中混合了无用的url 暂时用try except处理 有好方法留言
img_name =a.xpath('./@title')[0]
try:
path = "./mv_img/" + img_name+ ".webp" #没有安装处理图片的库 暂不处理
rep1 = requests.get(url=img_down_url, headers=headers).content
with open(path, "wb") as pf:
print(img_name, "图片正在下载")
pf.write(rep1)
print(img_name, "图片下载完成")
except:
print('第{0}条数据处理失败'.format(url))
if __name__ == '__main__':
pool = Pool(4)
# 开始页
dic ={
"小姐姐":"beauty",
"少女情怀":"bilitis" ,
"轻私房":"urllittlesex",
}
print("请选择你喜欢的风格(小姐姐,少女情怀,轻私房)")
fen = input("请输入你喜欢的风格:")
fenge =dic[fen]
url = f'https://www.vmgirls.com/special/{fenge}/'
tree = resp(url)
urls = img_detail(tree)
img = pool.map(img_dow,urls)
pool.close()
pool.join() |