好久没学习了,写几个超简单的爬虫玩了会,代码不一定是最简的,也没做优化。。以后学会多线程
这些壁纸地址都从书签地球找的
第一个,4K壁纸爬取,这网站的图,好像是360那的,不管了。
[Python] 纯文本查看 复制代码 # -*- coding: utf-8 -*-
import os
import requests # 导入必备模块
def get():
for ppp in range(1, 100, 12): # 页码
url = "https://bird.ioliu.cn/v2?url=http://wallpaper.apc.360.cn/index.php?c=WallPaper&start={}&count=12&from=360chrome&a=getAppsByCategory&cid=36".format(ppp)
req = requests.get(url) # 请求
text = req.json() # 转换数据类型
for i in text["data"]: # 筛选
id = i["id"]
res = i["resolution"] # 分辨率
name = id + "---" + res # 保存文件名
root = "./壁纸/" # 下载目录
try:
url_m = i["url"]
url = url_m.replace("__85", "__100") # __100的是4K的,但只有85,手动修改
path = root + str(name) + ".jpg"
if not os.path.exists(root):
os.mkdir(root) # 创建文件夹
if not os.path.exists(path):
r = requests.get(url) # 获取二进制数据
with open(path, 'wb') as f: # 写入图片
f.write(r.content)
f.close()
print('{} 图片保存成功'.format(name))
except:
print(name, "错误")
get()
4K
第二个好像是海外的壁纸
[Python] 纯文本查看 复制代码 # -*- coding: utf-8 -*-
import requests
from lxml import etree
import os
url = "https://wallpaperscraft.com"
n = 0
for ppp in range(1,101):
req = requests.get(url + "/catalog/anime/page{}".format(ppp))
req_html = etree.HTML(req.text)
a = req_html.xpath('//*[@class="wallpapers__link"]//@href')
for i in a:
req = requests.get(url + i)
req_html = etree.HTML(req.text)
a = req_html.xpath('//*[@class="wallpaper-table__cell"]//@href')
req = requests.get(url + a[0])
req_html = etree.HTML(req.text)
a = req_html.xpath('//*[@class="gui-button gui-button_full-height"]//@href')
root = "./壁纸/"
path = root + str(n) + ".jpg"
try:
if not os.path.exists(root):
os.mkdir(root)
if not os.path.exists(path):
r = requests.get(a[0])
with open(path, 'wb') as f:
f.write(r.content)
f.close()
print('{} 图片保存成功'.format(n))
except:
print(n, "错误")
n += 1
第三个,二次元地址,有骚的~~~~
[Python] 纯文本查看 复制代码 # -*- coding: utf-8 -*-
import requests
import os
from urllib.request import urlretrieve
from lxml import etree
url = "https://anime-pictures.net"
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "max-age=0",
"content-type": "text/html; charset=UTF-8",
"cookie": "sitelang=zh_CN; __cfduid=d35094b211350abbf73c74accaf4e60a41618399143; cookieconsent_status=dismiss",
"if-none-match": "W/\"85073814374d01fb84f26b772f5f26ff4fee05a8\"",
"referer": "https://anime-pictures.net/pictures/view_posts/0?lang=zh_CN",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
def get():
j = 1
for ppppp in range(0, 5000):
req = requests.get(url + "/pictures/view_posts/{}?lang=zh_CN".format(ppppp), headers=headers) # 请求
req_text = req.text
req_html = etree.HTML(req_text) # 转换成html
a = req_html.xpath('//*[@class="posts_block"]/span/a//@href') # 筛选
for i in a:
req = requests.get(url + i, headers=headers) # 获取下载链接
wc = req.text
wc_html = etree.HTML(wc)
b = wc_html.xpath('//*[@class="download_icon"]//@href')
root = "./美图/"
path = root + str(j) + ".jpg"
if not os.path.exists(root):
os.mkdir(root) # 创建文件夹
if not os.path.exists(path):
r = requests.get(url + b[0]) # 获取二进制数据
with open(path, 'wb') as f: # 写入图片
f.write(r.content)
f.close()
print('第{}张图片保存成功'.format(j))
else:
print('此图片已存在')
j += 1
get()
[fly]欢迎大佬指出可优化之处[/fly] |