工欲善其事 必先利其器
要想玩好LOL,那了解所有英雄的技能必然是其最基本的。所以此爬虫就应运而生
运行环境
python 3.7
此爬虫所用的库有
- requests (获取网页信息)
- openpyxl (Excel相关操作)
- pymysql (MySQL数据库相关操作)
- re (正则)
代码
下面有已打包为EXE的程序,可直接使用
主要代码
import requests
import re
import openpyxl
import pymysql
def get_html(hero):
headers = {
'Referer': 'http://lol.qq.com/web201310/info-defail.shtml?id=' + hero,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
url = 'http://lol.qq.com/biz/hero/' + hero + '.js'
# print(url)
response = requests.get(url, headers=headers)
response = response.text
return response
def hero_info(response):
# 英雄名称
hero_name = re.findall(r'"name":"(.*?)","title"', response, re.S)[0]
hero_title = re.findall(r'"title":"(.*?)","tags"', response, re.S)[0]
# 技能(QWER)
hero_spells = re.findall(r'"spells":(.*?),"passive"', response, re.S)[0]
# 技能名称
hero_spells_name = re.findall(
r'"name":"(.*?)","description"', hero_spells, re.S)
# 技能描述
hero_spells_description = re.findall(
r'"description":"(.*?)","image"', hero_spells, re.S)
# 技能消耗
hero_spells_resource = re.findall(
r'"resource":"(.*?)"}', hero_spells, re.S)
# 技能主被动
hero_spells_group = re.findall(r'"group":"(.*?)","x"', hero_spells, re.S)
spells_Q = hero_spells_name[0] + ':' + hero_spells_description[0] + \
'|' + hero_spells_resource[0] + '|' + hero_spells_group[0]
spells_W = hero_spells_name[1] + ':' + hero_spells_description[1] + \
'|' + hero_spells_resource[1] + '|' + hero_spells_group[1]
spells_E = hero_spells_name[2] + ':' + hero_spells_description[2] + \
'|' + hero_spells_resource[2] + '|' + hero_spells_group[2]
spells_R = hero_spells_name[3] + ':' + hero_spells_description[3] + \
'|' + hero_spells_resource[3] + '|' + hero_spells_group[3]
Spells = spells_Q + '\n' + spells_W + '\n' + spells_E + '\n' + spells_R
# 被动技能
hero_passive = re.findall(r'"passive":(.*?),"lore"', response, re.S)[0]
# 被动技能名称
hero_passive_name = re.findall(
r'"name":"(.*?)","description"', hero_passive, re.S)[0]
# 技能描述
hero_passive_description = re.findall(
r'"description":"(.*?)","image"', hero_passive, re.S)[0]
# 技能主被动
hero_passive_group = re.findall(
r'"group":"(.*?)","x"', hero_passive, re.S)[0]
passive = hero_passive_name + ':' + \
hero_passive_description + '|' + hero_passive_group
hero_spells_info = [hero_name, hero_title, passive, Spells]
return hero_spells_info
def get_hero():
with open('hero', 'r') as f:
hero = f.readlines()
return hero
def save_to_excel(her):
wb = openpyxl.Workbook()
ws = wb.active
ws['A1'] = '英雄称号'
ws['B1'] = '英雄名称'
ws['C1'] = '被动技能'
ws['D1'] = '主动技能'
for hero in her:
ws.append(hero)
wb.save('herotest.xlsx')
def save_to_mysql(her):
for i in her:
ch = '"' + i[0] + '"'
name = '"' + i[1] + '"'
bd_name = '"' + i[2] + '"'
zd_name = '"' + i[3] + '"'
db = pymysql.connect(host='localhost', user='root',
password='123456', database='python_mysql', charset='utf8')
cursor = db.cursor()
sql = ''' insert into lolheroinfo values (%s, %s, %s, %s);
''' % (ch, name, bd_name, zd_name)
# print(sql)
try:
# 执行sql语句
cursor.execute(sql)
# 提交到数据库执行
db.commit()
print(ch, ' insert into success!')
except:
db.rollback()
db.close()
return True
def main():
heros = get_hero()
her = []
for hero in heros:
hero = hero.split('"')[3]
response = get_html(hero)
her_infos = hero_info(response)
her_encode = []
for i in her_infos:
i = i.encode("latin-1").decode("unicode_escape")
her_encode.append(i)
her.append(her_encode)
save_to_excel(her)
#save_to_mysql(her)
if __name__ == '__main__':
main()
获取英雄数据文件
运行此文件后会在当前目录下生产hero的数据文件
import requests
import re
headers = {
'Referer': 'https://lol.qq.com/data/info-defail.shtml?id=Aatrox',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
}
response = requests.get('https://lol.qq.com/biz/hero/champion.js', headers=headers)
keys = re.findall(r'"keys":{(.*?)},"data"',response.text,re.S)
keys = keys[0]
keys = keys.split(',')
with open('hero','w') as f:
for key in keys:
f.write(key)
f.write('\n')
print(key)
_如果想要保存到MySQL,请先创建MySQL数据库,然后用下面的代码创建表,最后取消主代码save_tomysql(her)的注释即可
创建数据表代码
#!/usr/bin/python3
import pymysql
# 打开数据库连接
db = pymysql.connect("localhost","root","123456","python_mysql" )
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
# 使用 execute() 方法执行 SQL,如果表存在则删除
cursor.execute("DROP TABLE IF EXISTS EMPLOYEE")
# 使用预处理语句创建表
sql = """CREATE TABLE lolheroinfo (
英雄称号 CHAR(255),
英雄名称 CHAR(255),
被动技能 CHAR(255),
主动技能 varchar(999)
)
"""
cursor.execute(sql)
# 关闭数据库连接
db.close()
Excel版:
MySQL版:
下面是上文代码打包为EXE版(pyinstaller)
主文件(Excel版):https://www.lanzouj.com/i2dnmvg
主文件(MySQL版):https://www.lanzouj.com/i2dnmej
数据文件:https://www.lanzouj.com/i2dnn9a (必要文件,运行主文件时要读取他的信息。或者可以运行下面的 获取数据文件 进行自动生成)
获取数据文件:https://www.lanzouj.com/i2dnm7c (运行后会自动生成最新的数据文件)
最后温馨提示请合理使用爬虫