[Python] 纯文本查看 复制代码 import requests
import os
import time
import re
class meinuimages:
"""美女图片"""
# 全局
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
# 列表页URL
def get_url_list(self):
""" 获取URL列表 """
url_titless = []
for i in range(1, 3):
urls = "https://www.vmgirls.com/fresh/page/{}/".format(i)
# print('当前第%s页=url:%s' % (i, urls))
try:
res = requests.get(urls, self.headers).text
time.sleep(2)
obj = re.compile(
r' <a class="media-content" target="_blank" href="(?P<url>.*?)" title="(?P<title>.*?)"',
re.S)
url_title = obj.findall(res)
url_titles = {
'page': i,
'url': urls,
'list_title': url_title
}
url_titless.append(url_titles)
except:
print('请示错误当前第%s页=url:%s' % (i, urls))
return url_titless
# 详情图片
def get_url_img(self):
datas = []
for ss in self.get_url_list():
# print('当前第{}页: url={}'.format(ss['page'], ss['url']))
img_list = []
for hh in ss['list_title']:
html = requests.get(url=hh[0], headers=self.headers).text
obj = re.compile(r'<a rel="nofollow" href="(?P<img>.*?)" alt="(?P<title>.*?)"', re.S)
result_img = obj.findall(html)
# print(result_img)
lists = {'title': hh[1], 'img': result_img}
img_list.append(lists)
datas.append(img_list)
return datas
# 保存图片
def save_images(self):
global path
for filename in self.get_url_img()[0]:
try:
path = r"D:\美图爬虫"
os.makedirs(path + "\\" + filename['title'])
print("已创建文件夹")
except FileExistsError:
print("已经有该文件夹")
for images in filename['img']:
img_resp = requests.get(url=images[0], headers=self.headers)
img_name = images[0].split('/')[-1]
try:
with open(path + "\\" + filename['title'] + "\\" + img_name, mode='wb') as f:
f.write(img_resp.content)
print(filename['title'], img_name + ' 保存成功')
except TypeError:
print("保存失败:{}".format(filename['title']))
if __name__ == '__main__':
meinuimages().save_images()
|