小白一枚,大佬轻喷。。。这两天突然想学爬虫,主要还是因为硬盘剩余空间挺大的,觉得有点浪费,所以想装点使人耳目一新的文件进去
[Python] 纯文本查看 复制代码 import requests
import os
from lxml import html
etree = html.etree
url = "http://www.netbian.com/meinv"
domain = "http://www.netbian.com"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.63"
}
pwd = os.getcwd()
if os.path.exists(pwd+"\\img"):
pass
else:
print(f"妹子的豪华套房:{pwd}\img")
os.mkdir(pwd+"\\img")
def down():
html_data = etree.HTML(response.text)
lis = html_data.xpath('//*[@id="main"]/div[3]/ul/li')
for li_url in lis:
li_urls = li_url.xpath(".//a/@href")[0]
# 过滤广告图片
if "http" in li_urls:
print("有广告图片诱导链接!已跳过...")
pass
else:
# 请求子页面
child_html = requests.get(domain + li_urls, headers=headers)
child_html.encoding = 'gb2312'
# 解析
child_html_data = etree.HTML(child_html.text)
# 获取图片地址
photo_url = child_html_data.xpath("/html/body/div[2]/div[2]/div[3]/div/p/a/img/@src")[0]
# 获取图片名
photo_name = child_html_data.xpath("/html/body/div[2]/div[2]/div[3]/div/p/a/img/@title")[0]
# 下载
print(f"正在导入妹子: {photo_name}")
with open(f"{pwd}/img/" + photo_name + ".jpg", mode='wb') as f:
photo_data = requests.get(url=photo_url, headers=headers)
f.write(photo_data.content)
i = 1
while i < 64:
if i <2:
response = requests.get(url=url, headers=headers)
down()
else:
response = requests.get(url=url+f"/index_{i}.htm", headers=headers)
down()
i += 1
print("导入完成! 开始欣赏吧...")
成品地址:https://www.aliyundrive.com/s/3hjWQfVwbxw |