目标网址:http://www.netbian.com/
调用模块
[Python] 纯文本查看 复制代码 import requests
from lxml import etree
[Python] 纯文本查看 复制代码 # 设置ua
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 "
"Safari/537.36"}
Page = int(input('请输入下载页数:'))
if Page == 1:
url = 'http://www.netbian.com/meinv/'
response = requests.get(url, headers=header).text
html = etree.HTML(response)
for a in range(4, 21):
print('正在下载第', a - 3, '张!')
response2 = html.xpath('//*[@id="main"]/div[3]/ul/li[' + str(a) + ']/a/@href')
# 确定图片地址
CQT_url = 'http://www.netbian.com/' + str(response2[0])
response2 = requests.get(CQT_url, headers=header).text
html2 = etree.HTML(response2)
TP_url = html2.xpath('//*[@id="main"]/div[3]/div/p/a/img/@src')[0]
TP_name = html2.xpath('//*[@id="main"]/div[3]/div/p/a/img/@alt')[0]
image = requests.get(TP_url)
# 将图片保存
file = open(fr"D:\代码保存\图片\{TP_name}.jpg", "wb")
file.write(image.content)
file.close()
elif Page >= 2:
for page in range(2, Page + 1):
url = 'http://www.netbian.com/meinv/index_' + str(page) + '.htm'
response = requests.get(url, headers=header).text
html = etree.HTML(response)
for a in range(4, 21):
print('正在下载第', page, '页!第', a - 3, '张!')
response2 = html.xpath('//*[@id="main"]/div[3]/ul/li[' + str(a) + ']/a/@href')
CQT_url = 'http://www.netbian.com/' + str(response2[0])
# print(CQT_url)
response2 = requests.get(CQT_url, headers=header).text
html2 = etree.HTML(response2)
TP_url = html2.xpath('//*[@id="main"]/div[3]/div/p/a/img/@src')[0]
TP_name = html2.xpath('//*[@id="main"]/div[3]/div/p/a/img/@alt')[0]
image = requests.get(TP_url)
file = open(fr"D:\代码保存\图片\{TP_name}.jpg", "wb")
file.write(image.content)
file.close()
翻页网址没办法通用,使用了if语句,网站好像设置了反爬,一页20张图,第三张xpath跟其他的不一样,所以我是从第四张开始的,师兄们不忙的话可以看一下,顺便指点指点小弟,差异图我贴上。
想发几张美女图片,太大了传不上来,哎!
|