好友
阅读权限10
听众
最后登录1970-1-1
|
本帖最后由 Hao_Tian22 于 2020-3-28 17:22 编辑
py瞎写的酷狗爬虫,函数调用有点乱,带简单的GUI
有什么改进意见或者bug什么的可以提出来
第一次使用需要先自行设置cookies,没有cookies很容易被拒绝
一次性爬取太多也会出问题
爬取过程中会在当前目录下生成"音乐"和"数据"文件夹用来储存,请选择好目录再使用
还有……貌似大多数歌曲只能爬标准品质
还有个在下载界面中点关闭会报错的bug,懒得修了
下面是打包好的文件(没安装的模块会自动安装,调用pip在线安装)
github地址:https://github.com/HaoTian22/kugou-spider/releases(有免配置python环境,直接用的exe文件)
v0.1.0:更新GUI界面,更好操作
[python]酷狗音乐下载GUI版.7z
(5.47 KB, 下载次数: 189)
v0.1.1:支持酷狗分享码下载,修复若干bug,重写代码结构
v3重置版.7z
(4.24 KB, 下载次数: 125)
源代码:
第一个文件(主文件)
[Python] 纯文本查看 复制代码 import time
from 下载组件 import *
import os
import easygui
try: # 检测音乐文件夹,没有则新建
os.mkdir('音乐')
except:
print('检测到音乐文件夹已存在')
try:
os.mkdir('数据')
except:
print('检测到数据文件夹已存在')
with open('数据/cookies.txt', 'a'):
pass
with open('数据/歌单列表.txt', 'a'):
pass
with open('数据/歌单哈希值列表.txt', 'a'):
pass
song_download = kugou_download()
def download():
# 选择模式
mode_list = ['输入酷狗码', '根据歌曲名称下载', '根据哈希值下载',
'导入文件批量下载', '转换utf-8为gbk', '更新cookies']
mode = easygui.choicebox(msg='请选择下载模式', title='选择模式', choices=mode_list)
if mode == '输入酷狗码':
code = easygui.enterbox('请输入酷狗码', '输入酷狗码')
code_return = kugou_code(code)
# print(type(code_return))
if str(type(code_return)) == "<class 'list'>":
# 写入数据
with open("数据/歌单列表.txt", "w", encoding="utf-8") as f:
with open("数据/歌单哈希值列表.txt", "w") as d:
num = 1
song_list = []
song_choice_list = []
for i in code_return:
song_name = i['filename']
song_hash = i['hash']
f.write(song_name + '\n')
d.write(song_hash + '\n')
song_list.append(str(num)+' '+song_name)
num+=1
song_choice_name = easygui.multchoicebox(msg='选择你要下载的歌曲(可多选,按Cancel退出下载操作)',title='选择歌曲',choices=song_list)
if song_choice_name == None:
pass
else:
for i in song_choice_name:
song_choice_list.append(int(i.split(' ')[0])-1)
lyrics_mode = easygui.boolbox(msg='是否需要一键下载全部歌词?', choices=['是', '否'])
for i in song_choice_list:
print(song_download.download_main(code_return[i]['hash'], lyrics_mode))
time.sleep(1)
else:
lyrics_mode = easygui.boolbox('是否下载歌词?', choices=['是', '否'])
easygui.msgbox(msg=song_download.download_main(code_return, lyrics_mode), ok_button='继续')
elif mode == '根据歌曲名称下载':
song_name = easygui.enterbox(msg='请输入歌曲名称')
song_name_json = song_download.download_name(song_name)
i = 1
song_list = []
for song in song_name_json['data']['lists']:
file_name = str(i) + ' ' + song['FileName'].replace('<em>', '').replace('</em>', '').replace('<\\/em>',
'')
song_list.append(file_name)
i += 1
num = int(easygui.choicebox(msg='请在以上结果中选择你要下载的歌曲', choices=song_list).split(" ")[0])
lyrics_mode = easygui.boolbox('是否下载歌词?', choices=['是', '否'])
easygui.msgbox(
msg=song_download.download_main(song_name_json['data']['lists'][num - 1]['FileHash'], lyrics_mode),
ok_button='继续')
elif mode == '根据哈希值下载':
song_hash = easygui.enterbox(msg='输入哈希值',title='哈希值')
lyrics_mode = easygui.boolbox('是否下载歌词?', choices=['是', '否'])
song_download.download_main(song_hash,lyrics_mode)
elif mode == '导入文件批量下载':
with open('数据/歌单哈希值列表.txt', 'r') as f:
song_hash_list = f.read().split()
lyrics_mode = easygui.boolbox(msg='是否需要一键下载全部歌词?', choices=['是', '否'])
for i in song_hash_list:
print(song_download.download_main(i, lyrics_mode))
time.sleep(1)
elif mode == '更新cookies':
with open('数据/cookies.txt', 'r') as f:
cookies_old = f.read()
cookies = easygui.textbox(
'输入cookies,可在浏览器酷狗音乐页面按f12寻找\n下面的是原来的cookies,请删除后更改', '更新cookies', cookies_old)
if cookies:
with open('数据/cookies.txt', 'w') as f:
f.write(cookies)
else_mode = easygui.choicebox(msg='本次操作已完成,是否进行其他操作', choices=[
'继续使用', '打开文件夹', '关闭程序'])
if else_mode == '继续使用': # 循环调用
download()
elif else_mode == '打开文件夹':
os.system("explorer 音乐\n")
# 调用函数
debug_mode = False
if debug_mode==False:
try:
download()
except:
easygui.msgbox(msg='出现bug,程序将退出\n如有需要,请把debug_mode的值更改位True来打开调试模式以查看详情')
os.system('pause')
else:
download()
os.system('pause')
第二个文件(下载组件)
[Python] 纯文本查看 复制代码 from urllib import parse
from lxml import etree
import requests
import hashlib
def kugou_code(code):
# 这堆东西别瞎改,酷狗特别奇葩,data发的字符串,双引号还不能换成引号
data2 = {"appid": 1001, "clientver": 8392, "mid": "b1422385bca909d7ac9aadb285f05541",
"clienttime": 636307277, "key": "1bb5ba48267c0a4750ecda8d7b10368c"}
data = '{"appid":1001,"clientver":8392,"mid":"b1422385bca909d7ac9aadb285f05541","clienttime":636307277,"key":"1bb5ba48267c0a4750ecda8d7b10368c","data":"' + code + '"}'
# ----------------第一部分:获取用户信息----------------
page = requests.post(url="http://t.kugou.com/command/", data=data).text
page = eval(page)
# 复制前面的信息,补充后面的data
json2 = data2
json2["data"] = page["data"]['info']
if json2["data"]["type"] == 4: # 歌单酷狗码
# 删除多余的data信息,以免出事
del json2['data']['name'], json2['data']['username'], json2['data']['img'], json2['data']['img_size']
json2['data']['page'] = 1
json2['data']['pagesize'] = json2['data']['count']
del json2['data']['count']
# 这个我也不知道是什么,原版这样填的我就这样写吧
json2['data']['type'] = 3
print('共有' + str(json2['data']['pagesize']) + '首歌')
# 下面的是原版的json,改崩了对照下
# json2 = '{"appid":1001,"clientver":8392,"mid":"b1422385bca909d7ac9aadb285f05541","clienttime":636307277,"key":"1bb5ba48267c0a4750ecda8d7b10368c","data":{"id":8,"type":3,"userid":"399348742","collect_type":0,"page":1,"pagesize":81}}'
json2 = str(json2).replace("\'", "\"")
# -----------------第二部分:根据用户信息获取歌单-------------------
json3 = requests.post(url='http://www2.kugou.kugou.com/apps/kucodeAndShare/app/', data=json2).text
json3 = eval(json3)
song_list = json3['data']
return song_list
elif json2['data']["type"] == 1: # 单曲酷狗码
return page['data']['list']['hash']
def lyrics(json_list):
if str(json_list).find('纯音乐,请欣赏') != -1:
print('✔已检测到纯音乐,不需要歌词')
elif json_list == None or json_list['data']['lyrics'] == '':
print('❌此歌曲无歌词')
else:
with open('音乐/' + json_list['data']['audio_name'] + '.lrc', 'w', encoding='gb18030') as f:
f.write(
json_list['data']['lyrics'].replace('\n', '').replace('\ufeff', '').replace('[id:$00000000]',
'').replace('\r',
'', 1))
print('歌词下载完成')
class kugou_download:
def __init__(self):
self.headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4023.0 Safari/537.36 Edg/81.0.396.0'}
with open('数据/cookies.txt', 'r') as f:
cookies_dict = {}
try:
cookies1 = f.read()
cookies_list = cookies1.replace(' ', '').split(';')
for str1 in cookies_list:
key, values = str1.split('=', 1)
cookies_dict[key] = values
except:
cookies1 = 'kg_mid=b434c13fcd475da311e141a0cf532557; _WCMID=16477e145e53a4a7e38ece94; kg_dfid=1aJRd418KcGl0dnFZB3ucZDk; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1582544353; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e'
cookies_list = cookies1.replace(' ', '').split(';')
for str1 in cookies_list:
key, values = str1.split('=', 1)
cookies_dict[key] = values
self.cookies = cookies_dict
def download_main(self, song_hash, is_lyrics):
hash_url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery191044011229047114075_1566198263706&hash={}'.format(
song_hash)
json = requests.get(url=hash_url, headers=self.headers, cookies=self.cookies).text
main_json = eval(json[42:-2])
with open('数据/log.txt', 'w', encoding='utf-8') as log:
log.write(str(main_json))
if main_json['status'] == 0:
print('cookies过期或发生其他错误,请重试')
print('以下是错误代码:'+str(main_json))
quit(1)
# 傻逼文件名的检测替换
file_name_error = ['"', '?', '/', '*', ':', '\\', '|', '<', '>']
for file_name in file_name_error:
if main_json['data']['audio_name'].find(file_name) != -1:
main_json['data']['audio_name'] = main_json['data']['audio_name'].replace(file_name, ' ')
song_url = main_json['data']['play_url'].replace('\\', '')
song_name = main_json['data']['audio_name']
song_length = int(main_json['data']['timelength'])
song_free = main_json['data']['is_free_part'] # 试听歌曲为1,普通歌曲为0
if song_url == '': # 检测歌曲是否能下载
return '❌歌曲<{}>无数据或需要付费下载'
else:
try: # 检测是否存在已下载文件
notice_file_name = ''
notice = ''
if song_free == 1: # 试听歌曲检测
notice = '⚠歌曲为试听版,请核实'
notice_file_name = '[试听]'
with open('音乐/' + notice_file_name + song_name + '.mp3', 'xb') as f: # 检测歌曲是否已经存在,不存在则写入歌曲
song = requests.get(url=song_url, headers=self.headers, cookies=self.cookies)
f.write(song.content)
song_length_format = str(int(song_length / 1000) // 60) + ":" + str(int(song_length / 1000) % 60)
if is_lyrics:
lyrics(main_json)
return '✔歌曲<{}>下载完成\n歌曲时长{}\n'.format(song_name, song_length_format) + notice
except: # 歌曲存在的替换
return '⚠歌曲<' + song_name + '>已存在'
def download_name(self, name):
url_name = parse.quote(name)
url_name = url_name.replace('%20', '+')
url_json1 = 'https://songsearch.kugou.com/song_search_v2?callback=jQuery11240770641348037286_1566198223730' \
'&keyword={}&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection' \
'=1&privilege_filter=0&_=1566198223734'.format(url_name)
page1 = requests.get(url=url_json1, headers=self.headers).text
song_json = eval(page1[41:-2])
i = 0
song_list = []
song_dict = {}
for song in song_json['data']['lists']:
file_name = song['FileName'].replace('<em>', '').replace('</em>', '').replace('<\\/em>', '')
song_dict[file_name] = i
song_list.append(file_name)
i += 1
return song_json
# i = int(song_dict[eg.choicebox(msg='请在以上结果中选择你要下载的歌曲', choices=song_list)])
# i=int(input('请在以上结果中选择你要下载的歌曲(填数字编号)\n'))-1
# lyrics_mode = eg.boolbox('是否下载歌词?', choices=['是', '否'])
# if song_mode == 1: # 流畅
# lyrics(download_hash(song_json['data']['lists'][i]['FileHash'], True), lyrics_mode)
# elif song_mode == 2: # 高品
# lyrics(download_hash(song_json['data']['lists'][i]['HQFileHash'], True), lyrics_mode)
# elif song_mode == 3: # 超高
# lyrics(download_hash(song_json['data']['lists'][i]['SQFileHash'], True), lyrics_mode)
# elif song_mode == 4: # 无损
# lyrics(download_hash(song_json['data']['lists'][i]['ResFileHash'], True), lyrics_mode)
效果展示
功能选择
下载界面
使用教程
1.配置好python3环境
2.下载并解压
3.点击“点击启动.bat”
酷狗码:在酷狗客户端中右键歌单或歌曲,点击“分享”,“酷狗码”,把纯数字粘贴到该软件即可
歌名搜索:直接输入歌名即可
哈希值下载:酷狗客户端右键,"歌曲信息",里面有个哈希值复制即可(可精准下载某些要下载vip的歌曲)
导入文件:填写哈希值文件列表批量下载,一行一个哈希值
|
免费评分
-
查看全部评分
|