import re
import requests
import html
import os
if os.path.exists('img'):
pass
else:
os.mkdir('img')
#定义agent
ha={
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.47'
,
'cookie':
'__cfduid=d6f5cb5a6c3911f07bd669ec3173736881612161329; xygkqecookieclassrecord=%2C7%2C; xygkqecookieztrecord=%2C6%2C; xygkqecookieinforecord=%2C7-22308%2C4-22305%2C'
,
'Referer':
'http://www.netbian.com'
}
#定义代{过}{滤}理IP
ip={'HTTP': '117.69.13.250:9999'}
#一共爬取9页
for w in range(1,10):
if w==1:
w_url='http://www.netbian.com/meinv/index.htm'
else:
w_url='http://www.netbian.com/meinv/index_{}.htm'.format(w)#这里输入网址
w_html=requests.get(w_url,headers=ha,proxies=ip)
w_html.encoding='gb2312'
w_data=str(html.unescape(w_html.text)).replace(' ',"")
all_urls=re.findall('<li><ahref="(.*?).htm"title="',w_data)
urls=[]
for u in all_urls:
if 'target' in u:
continue
else:
urls.append('http://www.netbian.com'+u+'-1920x1080.htm')
#下载壁纸
for i in range(len(urls)):
l_html = requests.get(urls[i], headers=ha, proxies=ip)
l_html.encoding = 'gb2312'
l_data = str(html.unescape(l_html.text)).replace(' ', "")
t_url = str(re.findall('<ahref="(.*?).jpg"title="',l_data)[0])+'.jpg'
title=re.findall('<title>(.*?)高清大图',l_data)[0]
file_name = title+str('.jpg')
response = requests.get(t_url, headers=ha)
with open('img'+'\\'+file_name, 'wb') as f:
f.write(response.content)
print(file_name)