初学Python,根据网上的爬虫教程结合爱好写了个爬虫,能爬取LOL国服官网资料库里的图片
运行截图
运行结果
下面是源码
[Python] 纯文本查看 复制代码 # coding=utf-8
import requests
import re
import json
import os
import urllib
import time
import threading
from queue import Queue
def getHttpStatusCode(url):
try:
request = requests.get(url)
httpStatusCode = request.status_code
return httpStatusCode
except requests.exceptions.HTTPError as e:
return e
def Get_Hero_Jsurl(queue):
url = 'https://lol.qq.com/biz/hero/champion.js'
response = requests.get(url).content.decode('gb2312')
regular = r'data":(.*?),"version":"'
dict_js = json.loads(re.findall(regular, response)[0])
for _item in dict_js:
EN = dict_js[_item]['id']
queue.put(EN.format(id=_item))
def Stitching_JS_links(name_en):
link = 'https://lol.qq.com/biz/hero/{}.js'.format(name_en)
return link
def Parsing_hero_JS(url):
response = requests.get(url).content.decode('gb2312')
regular = r'data":(.*?),"version":"'
js_data = json.loads(re.findall(regular, response)[0])
result = {
'name_CN': js_data['name'] + ' ' + js_data['title'],
'name_EN': js_data['id'],
'skins': js_data['skins'],
'skill': js_data['spells'],
'passive': js_data['passive'],
}
return result
def Download_hero_resources(detail_url_list: Queue, id):
while len(detail_url_list.queue):
hero_name = detail_url_list.get()
Metadata = Parsing_hero_JS(Stitching_JS_links(hero_name))
path = os.getcwd() + '\\英雄相关\\' + Metadata['name_CN']
if not os.path.exists(path + '\\头像'):
os.makedirs(path + '\\头像')
if not os.path.exists(path + '\\皮肤'):
os.makedirs(path + '\\皮肤')
if not os.path.exists(path + '\\技能'):
os.makedirs(path + '\\技能')
# 下载皮肤和头像图 [img]https://game.gtimg.cn/images/lol/act/img/skin/big1000.jpg[/img]
for item_skin in Metadata['skins']:
url_0 = 'https://game.gtimg.cn/images/lol/act/img/skin/big{}.jpg'.format(
item_skin['id'])
url_1 = 'https://game.gtimg.cn/images/lol/act/img/skin/small{}.jpg'.format(
item_skin['id'])
urllib.request.urlretrieve(
url_0,
path + '\\皮肤\\' + item_skin['name'].replace('/', '') + '.jpg')
urllib.request.urlretrieve(
url_1,
path + '\\头像\\' + item_skin['name'].replace('/', '') + '.jpg')
pass
# 下载技能图 [img]https://game.gtimg.cn/images/lol/act/img/spell/AnnieR.png[/img]
for item_skill in Metadata['skill']:
url_3 = 'https://game.gtimg.cn/images/lol/act/img/spell/{}.png'.format(
item_skill['id'])
save_path = path + '\\技能\\' + item_skill[
'name'] + '_' + item_skill['id'].replace(hero_name,
'') + '.png'
if (getHttpStatusCode(url_3) != 404):
urllib.request.urlretrieve(url_3, save_path.replace('/', '-'))
url_3 = 'https://game.gtimg.cn/images/lol/act/img/passive/{}'.format(
Metadata['passive']['image']['full'])
if (getHttpStatusCode(url_3) == 404):
url_3 = 'https://game.gtimg.cn/images/lol/act/img/spell/{}'.format(
Metadata['passive']['image']['full'])
save_path = path + '\\技能\\' + Metadata['passive'][
'name'] + '_P' + '.png'
# test = getHttpStatusCode(url_3)
urllib.request.urlretrieve(url_3, save_path.replace('/', '-'))
def Features_1():
detail_url_queue = Queue(maxsize=1000)
thread = threading.Thread(target=Get_Hero_Jsurl, args=(detail_url_queue, ))
html_thread = []
thread.start()
for i in range(20):
thread2 = threading.Thread(target=Download_hero_resources,
args=(detail_url_queue, i))
html_thread.append(thread2)
start_time = time.time()
print('将在2秒后启动多线程下载')
time.sleep(2)
for i in range(20):
html_thread[i].start()
sum_num = len(detail_url_queue.queue)
thread.join()
for i in range(20):
html_thread[i].join()
time_ = time.time() - start_time
print('共下载了{}位英雄资源'.format(sum_num))
print("用时: {}分{}秒".format(int(time_ // 60), int(time_ % 60) + 1))
def Features_2():
hero_js = 'https://lol.qq.com/biz/hero/item.js'
response = requests.get(hero_js).content.decode('gb2312')
正则 = r'data":(.*?),"tree'
list_js = re.findall(正则, response)
dict_js = json.loads(list_js[0])
Item_name = {}
Item_url = {}
if not os.path.exists(os.getcwd() + '\\装备'):
os.makedirs(os.getcwd() + '\\装备')
for i in dict_js:
Item_name[i] = dict_js[i]['name']
url0 = 'https://game.gtimg.cn/images/lol/act/img/item/'
url1 = '.png'
Item_url[i] = url0 + i + url1
p = 0
for i in Item_url:
urllib.request.urlretrieve(
Item_url[i],
os.getcwd() + '\\装备\\' + Item_name[i] + '.png')
p += 1
_JD = (p / len(Item_url)) * 100
print('已完成 ', end='')
print('%.2lf' % _JD, end=' %\n')
def main():
print('LOL爬虫小工具 作者:艾斯托维亚')
print('———————————————————————————————————————')
print(' 1 | 从官网下载最新英雄头像、皮肤原画、技能图标')
print(' 2 | 从官网下载最新最新装备图 ')
print('———————————————————————————————————————')
n = input('请输入数字以选择功能\n')
n = int(n)
if n > 0 and n < 3:
swicth = {
1: Features_1,
2: Features_2,
}
swicth[n]()
input('数据保存在软件同目录\n按任意键退出\n')
else:
print('错误输入\n程序即将退出')
time.sleep(2)
if __name__ == "__main__":
main()
欢迎各位大佬指点
成品下载链接:https://www.lanzoux.com/iNUnbf71utc
|