爬取Wallpaper Abyss 高清壁纸
本帖最后由 Traitor 于 2021-11-6 23:50 编辑刚学python没多久 代码写的有点烂 如果有不好的地方欢迎指出! 谢谢
PS: 那个地址可以换自己的
import requests
import re
from lxml import etree
url = "https://wall.alphacoders.com/search.php?"
p_main_url = "https://wall.alphacoders.com"
data = {
"page": "2", # 页数
"lang": "Chinese",# 语言 没什么用
"search": "landscape", # 索引
"view": "paged", # 分页显示
"min_resolution": "3840x2160",# 分辨率
"resolution_equals": "=", # 精准
"sort_search": "relevance", # 相关性
}
p_main_url_list = []
img_id_list = []
n_img_url_list = []
img_url_list = []
response_main = requests.post(url, data=data).text
html = etree.HTML(response_main)
# 获取每张图片主页后缀
p_url_list = html.xpath('//div[@class="center"]//div[@class="boxgrid"]//a//@href')
# 获取每张图片的主页完整URL
for a in p_url_list:
img_main_url = p_main_url + a
# print(img_main_url)
p_main_url_list.append(img_main_url)
# 获取图片IDs
for b in p_url_list:
img_id = re.findall(r"\d+\.?\d*", b)
img_id_list.append(img_id)
# 获取图片编号
n_img_list = html.xpath('//div[@class="center"]//div//div[@class="boxgrid"]//img//@src')
for c in n_img_list:
n_img = re.findall(r"https://*.*.*/*/", c)
# print(n_img)
n_img_url_list.append(n_img)
# 拼接图片 url
# print(n_img_url_list)
# print(img_id_list)
n = 0
for d in range(len(n_img_url_list)):
img_url = n_img_url_list + img_id_list + ".jpg"
img_url_list.append(img_url)
# 请求每张的图片的url 获取原图
n = 0
for e in img_url_list:
response_img = requests.get(e).content
with open("D:\\其他\\233\\Python\\爬虫入门到精通\\实战项目\\wallpaper\\img\\" + img_id_list + ".jpg", 'wb') as f:
f.write(response_img)
print('正在下载第{}张图片!'.format(n))
n = n + 1
if n == len(img_url_list):
print("图片下载完成!")
喻晓生 发表于 2021-11-9 08:42
试了下,很棒
import requests
import re,os
from lxml import etree
########################下载主函数########################
def download(page,img_path,search):
url = "https://wall.alphacoders.com/search.php?" #https://wall.alphacoders.com/tag/china-wallpapers https://wall.alphacoders.com/search.php?
p_main_url = "https://wall.alphacoders.com" #https://wall.alphacoders.com/tag/china-wallpapershttps://wall.alphacoders.com
print(page)
data = {
"page": page, # 页数
"lang": "Chinese",# 语言 没什么用
"search": search, # 索引 space landscape sea sunset computer
"view": "paged", # 分页显示
"min_resolution": "3840x2160",# 分辨率 3840x2160 1920x1080
"resolution_equals": "=", # 精准
"sort_search": "relevance", # 相关性
}
p_main_url_list = []
img_id_list = []
n_img_url_list = []
img_url_list = []
response_main = requests.post(url, data=data).text
html = etree.HTML(response_main)
# 获取每张图片主页后缀
p_url_list = html.xpath('//div[@class="center"]//div[@class="boxgrid"]//a//@href')
# 获取每张图片的主页完整URL
for a in p_url_list:
img_main_url = p_main_url + a
#print(img_main_url)
p_main_url_list.append(img_main_url)
# 获取图片IDs
for b in p_url_list:
img_id = re.findall(r"\d+\.?\d*", b)
img_id_list.append(img_id)
# 获取图片编号
n_img_list = html.xpath('//div[@class="center"]//div//div[@class="boxgrid"]//img//@src')
for c in n_img_list:
n_img = re.findall(r"https://*.*.*/*/", c)
print(n_img)
n_img_url_list.append(n_img)
# 拼接图片 url
print(n_img_url_list)
print(img_id_list)
n = 0
for d in range(len(n_img_url_list)):
img_url = n_img_url_list + img_id_list + ".jpg"
img_url_list.append(img_url)
# 请求每张的图片的url 获取原图
n = 0
for e in img_url_list:
#print("返回信息:%s"%(response_img))
if os.path.exists("%s"%(img_path) + img_id_list + ".jpg"):
print("文件已存在%s"%("%s"%(img_path) + img_id_list + ".jpg"))
else:
response_img = requests.get(e).content
with open("%s"%(img_path) + img_id_list + ".jpg", 'wb') as f:
f.write(response_img)
print('正在下载第{}张图片!'.format(n),img_path)
n = n + 1
if n == len(img_url_list):
print("%s图片下载完成!"%(img_path))
########################下载主函数########################
########################判断文件是否存在,不存在新建文件########################
def mkdir(path):
folder=os.path.exists(path)
if not folder:# 判断是否存在文件夹如果不存在则创建为文件夹
os.makedirs(path)# makedirs 创建文件时如果路径不存在会创建这个路径
print("创建文件夹%s"%(path))
else:
print("文件夹已存在%s"%(path))
#################保存文件、搜索关键词、下载页数
Newfile ='Sky'
search = 'sky' #索引 space landscape sea sunset computer particle
start_page= 1
end_page= 20
#################
img_path="D:\\workplace\\Django\\Project\\Utils\\Wallper\\%s\\"%(Newfile)
print("文件路径:%s"%(img_path))
mkdir(img_path)
########################判断文件是否存在,不存在新建文件########################
########################下载多页循环########################
for num in range(start_page,end_page):
print("正在打印第{}页".format(num))
download(num,img_path,search)
########################下载多页循环########################
########################字节转换########################
def hum_convert(value):
units = ["B", "KB", "MB", "GB", "TB", "PB"]
size = 1024.0
for i in range(len(units)):
if (value / size) < 1:
return "%.2f%s" % (value, units)
value = value / size
rootdir = '%s'%(img_path)
list = os.listdir(rootdir) #列出文件夹下所有的目录与文件
########################字节转换########################
for i in range(0,len(list)):
path = os.path.join(rootdir,list)
if os.path.isfile(path):
print('%s:'%(i),'文件名称:'+os.path.basename(path),'文件大小:%s'%(os.path.getsize(path)),'文件大小:%s'%hum_convert(os.path.getsize(path)))
for file in list:
filesize = os.path.getsize(img_path+file)
if filesize < 163:
print('正在删除:%s'%(img_path+file))
os.remove(img_path+file)
看懂了,感谢分享 不错!有空一起研究编程 好的,进来学习下 没看懂,但是感谢分享 不错,有空一起研究po**hub 感谢分享 试了下,很棒 感谢分享
页:
[1]
2