都在爬美女图片 我也爬爬练手 有个师兄用正则爬的我用xpath爬交流交流
本帖最后由 lihu5841314 于 2021-6-13 10:04 编辑import requests
from multiprocessing.dummy import Pool
import os
from lxml import etree
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"Referer":"https://www.vmgirls.com/"
}
if notos.path.exists("./mv_img"):
os.mkdir("./mv_img")
def resp(url):
rep = requests.get(url=url,headers=headers)
rep.encoding = rep.apparent_encoding
tree=etree.HTML(rep.text)
return tree
def img_detail(tree):
lis = tree.xpath('//div[@class="container"]/div/div')
# print(len(lis)) 13条数据不知道怎么只能看到10条以至于下面for循环报越界错误
urls = []
for liin lis:
try:
next_url = 'https://www.vmgirls.com' + li.xpath('.//a[@class="media-content"]/@href')
title = li.xpath('.//a[@class="media-content"]/@title')
urls.append(next_url)
# print(next_url)
except:
print('第{0}条数据处理失败'.format(li))
# print(urls)
return urls
defimg_dow(url):
tree = resp(url)
a_lis = tree.xpath('//div[@class="nc-light-gallery"]/a')
i=0
for a in a_lis:
img_down_url ='https:'+ a.xpath('./@href')# url 中混合了无用的url暂时用tryexcept处理有好方法留言
img_name =a.xpath('./@title')
try:
i += 1#不加个序号图片会重名
path = "./mv_img/" + img_name+ str(i) +".webp"#没有安装处理图片的库 暂不处理
rep1 = requests.get(url=img_down_url, headers=headers).content
with open(path, "wb")as pf:
print(img_name + str(i),"图片正在下载")
pf.write(rep1)
print(img_name + str(i), "图片下载完成")
except:
print('第{0}条数据处理失败'.format(url))
if __name__ == '__main__':
pool = Pool(4)
# 开始页
dic ={
"小姐姐":"beauty",
"少女情怀":"bilitis" ,
"轻私房":"urllittlesex",
}
print("请选择你喜欢的风格(小姐姐,少女情怀,轻私房)")
fen = input("请输入你喜欢的风格:")
fenge =dic
url = f'https://www.vmgirls.com/special/{fenge}/'
tree = resp(url)
urls = img_detail(tree)
img = pool.map(img_dow,urls)
pool.close()
pool.join()import requests
from multiprocessing.dummy import Pool
import os
from lxml import etree
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"Referer":"https://www.vmgirls.com/"
}
if notos.path.exists("./mv_img"):
os.mkdir("./mv_img")
def resp(url):
rep = requests.get(url=url,headers=headers)
rep.encoding = rep.apparent_encoding
tree=etree.HTML(rep.text)
return tree
def img_detail(tree):
lis = tree.xpath('//div[@class="container"]/div/div')
# print(len(lis)) 13条数据不知道怎么只能看到10条以至于下面for循环报越界错误
urls = []
for liin lis:
try:
next_url = 'https://www.vmgirls.com' + li.xpath('.//a[@class="media-content"]/@href')
title = li.xpath('.//a[@class="media-content"]/@title')
urls.append(next_url)
# print(next_url)
except:
print('第{0}条数据处理失败'.format(li))
# print(urls)
return urls
defimg_dow(url):
tree = resp(url)
a_lis = tree.xpath('//div[@class="nc-light-gallery"]/a')
for a in a_lis:
img_down_url ='https:'+ a.xpath('./@href')# url 中混合了无用的url暂时用tryexcept处理有好方法留言
img_name =a.xpath('./@title')
try:
path = "./mv_img/" + img_name+ ".webp"#没有安装处理图片的库 暂不处理
rep1 = requests.get(url=img_down_url, headers=headers).content
with open(path, "wb")as pf:
print(img_name, "图片正在下载")
pf.write(rep1)
print(img_name, "图片下载完成")
except:
print('第{0}条数据处理失败'.format(url))
if __name__ == '__main__':
pool = Pool(4)
# 开始页
dic ={
"小姐姐":"beauty",
"少女情怀":"bilitis" ,
"轻私房":"urllittlesex",
}
print("请选择你喜欢的风格(小姐姐,少女情怀,轻私房)")
fen = input("请输入你喜欢的风格:")
fenge =dic
url = f'https://www.vmgirls.com/special/{fenge}/'
tree = resp(url)
urls = img_detail(tree)
img = pool.map(img_dow,urls)
pool.close()
pool.join() 不太懂呢 cptw 发表于 2021-6-12 22:21
不太懂呢
你拿来跑一遍就知道了图片可以爬下来我只是找不到消失的几个标签 cptw 发表于 2021-6-12 22:21
不太懂呢
这网站:victory:图片很不错爬下来很养眼 支持,网站不错{:1_918:} 网站不错{:1_918:} 老哥,这东西咋用? 啥意思……我都没看明白…… 都是具体略过,直接看小姐姐网址的主……嘿嘿。 希望楼主步骤说具体些,让我等小白学习一下
页:
[1]
2