[Python] 纯文本查看 复制代码 import requests
import re,os
from lxml import etree
########################下载主函数########################
def download(page,img_path,search):
url = "https://wall.alphacoders.com/search.php?" #https://wall.alphacoders.com/tag/china-wallpapers https://wall.alphacoders.com/search.php?
p_main_url = "https://wall.alphacoders.com" #https://wall.alphacoders.com/tag/china-wallpapers https://wall.alphacoders.com
print(page)
data = {
"page": page, # 页数
"lang": "Chinese", # 语言 没什么用
"search": search, # 索引 space landscape sea sunset computer
"view": "paged", # 分页显示
"min_resolution": "3840x2160", # 分辨率 3840x2160 1920x1080
"resolution_equals": "=", # 精准
"sort_search": "relevance", # 相关性
}
p_main_url_list = []
img_id_list = []
n_img_url_list = []
img_url_list = []
response_main = requests.post(url, data=data).text
html = etree.HTML(response_main)
# 获取每张图片主页后缀
p_url_list = html.xpath('//div[@class="center"]//div[@class="boxgrid"]//a//@href')
# 获取每张图片的主页完整URL
for a in p_url_list:
img_main_url = p_main_url + a
#print(img_main_url)
p_main_url_list.append(img_main_url)
# 获取图片IDs
for b in p_url_list:
img_id = re.findall(r"\d+\.?\d*", b)[0]
img_id_list.append(img_id)
# 获取图片编号
n_img_list = html.xpath('//div[@class="center"]//div//div[@class="boxgrid"]//img//@src')
for c in n_img_list:
n_img = re.findall(r"https://[a-z0-9]*.[a-z0-9]*.[a-z0-9]*/[0-9]*/", c)[0]
print(n_img)
n_img_url_list.append(n_img)
# 拼接图片 url
print(n_img_url_list)
print(img_id_list)
n = 0
for d in range(len(n_img_url_list)):
img_url = n_img_url_list[d] + img_id_list[d] + ".jpg"
img_url_list.append(img_url)
# 请求每张的图片的url 获取原图
n = 0
for e in img_url_list:
#print("返回信息:%s"%(response_img))
if os.path.exists("%s"%(img_path) + img_id_list[n] + ".jpg"):
print("文件已存在%s"%("%s"%(img_path) + img_id_list[n] + ".jpg"))
else:
response_img = requests.get(e).content
with open("%s"%(img_path) + img_id_list[n] + ".jpg", 'wb') as f:
f.write(response_img)
print('正在下载第{}张图片!'.format(n),img_path)
n = n + 1
if n == len(img_url_list):
print("%s图片下载完成!"%(img_path))
########################下载主函数########################
########################判断文件是否存在,不存在新建文件########################
def mkdir(path):
folder=os.path.exists(path)
if not folder: # 判断是否存在文件夹如果不存在则创建为文件夹
os.makedirs(path) # makedirs 创建文件时如果路径不存在会创建这个路径
print("创建文件夹%s"%(path))
else:
print("文件夹已存在%s"%(path))
#################保存文件、搜索关键词、下载页数
Newfile ='Sky'
search = 'sky' #索引 space landscape sea sunset computer particle
start_page= 1
end_page= 20
#################
img_path="D:\\workplace\\Django\\Project\\Utils\\Wallper\\%s\\"%(Newfile)
print("文件路径:%s"%(img_path))
mkdir(img_path)
########################判断文件是否存在,不存在新建文件########################
########################下载多页循环########################
for num in range(start_page,end_page):
print("正在打印第{}页".format(num))
download(num,img_path,search)
########################下载多页循环########################
########################字节转换########################
def hum_convert(value):
units = ["B", "KB", "MB", "GB", "TB", "PB"]
size = 1024.0
for i in range(len(units)):
if (value / size) < 1:
return "%.2f%s" % (value, units[i])
value = value / size
rootdir = '%s'%(img_path)
list = os.listdir(rootdir) #列出文件夹下所有的目录与文件
########################字节转换########################
for i in range(0,len(list)):
path = os.path.join(rootdir,list[i])
if os.path.isfile(path):
print('%s:'%(i),'文件名称:'+os.path.basename(path),'文件大小:%s'%(os.path.getsize(path)),'文件大小:%s'%hum_convert(os.path.getsize(path)))
for file in list:
filesize = os.path.getsize(img_path+file)
if filesize < 163:
print('正在删除:%s'%(img_path+file))
os.remove(img_path+file)
|