本帖最后由 Teachers 于 2021-10-17 13:45 编辑
import os.path
import time
from lxml import etree
import requests
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
'referer': 'https://wallroom.io/',
'sec-fetch-dest': 'image'
}
def get_images_list():
response = requests.get(
'https://wallroom.io/',
headers=headers
).text
html = etree.HTML(response)
return html.xpath('//div[@class="image-list"]/div/a/@href')
def run():
images_list = get_images_list()
if not len(images_list):
print('获取图片列表失败')
return False
if not os.path.exists('图片采集'):
os.mkdir('图片采集')
url = 'https://wallroom.io/img/'
for images in images_list:
splits = images.split('/')
resolving = splits[1]
name_split = splits[2]
file_name = '图片采集/' + resolving + '-' + name_split + '.jpg'
if not os.path.exists(file_name):
images_content = requests.get(
url + resolving + '/' + name_split + '.jpg',
headers=headers
)
if 'image' in images_content.headers['Content-Type']:
print(name_split + '.jpg')
with open(file_name, 'wb') as f:
f.write(images_content.content)
if __name__ == '__main__':
while True:
run()
time.sleep(10)
|