酷狗音乐网页歌曲爬取优化
本帖最后由 daofengyi 于 2022-11-28 14:43 编辑import re
import time
import json
import aiohttp
import aiofiles
import asyncio
import hashlib
import os.path
import argparse
import logging
read_me = """
# 日期: 20221128
# 功能: 酷狗网页版MP3音乐下载
1.增加日志模块
2.对mp3_url为空的异常处理
3.增加歌手过滤
4.增加页数选择
# 用法: python kugouDownload.py keyword -s singer -p page
# 示例: 搜索歌曲名:给你给我
python kugouDownload.py 给你给我
搜索歌曲名:给你给我 歌手: 毛不易
python kugouDownload.py 给你给我 -s 毛不易
搜索歌曲名:给你给我 歌手: 毛不易 两页结果: 2
python kugouDownload.py 给你给我 -s 毛不易 -p 2
# 来源: https://www.52pojie.cn/thread-1638865-1-1.html
"""
headers = {
"user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/89.0.4389.114 Mobile Safari/537.36 "
}
def generate_signature(t, search_keyword, page):
"""生成签名"""
sign_params = ['NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt', 'bitrate=0', 'callback=callback123',
'clienttime=' + str(t), 'clientver=2000', 'dfid=-', 'inputtype=0', 'iscorrection=1',
'isfuzzy=0',
f'keyword={search_keyword}', f'mid={str(t)}', f'page={str(page)}', 'pagesize=30',
'platform=WebFilter', 'privilege_filter=0', 'srcappid=2919', 'token=', 'userid=0',
f'uuid={str(t)}', 'NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt']
sign_params = ''.join(sign_params)
signature = hashlib.md5(sign_params.encode(encoding='UTF-8')).hexdigest()
return signature
async def main(search_keyword, singer_name, page):
async with aiohttp.ClientSession() as session:
url = 'https://complexsearch.kugou.com/v2/search/song'
t = time.time()
params = {
'callback': 'callback123',
'page': page,
'keyword': search_keyword,
'pagesize': '30',
'bitrate': '0',
'isfuzzy': '0',
'inputtype': '0',
'platform': 'WebFilter',
'userid': '0',
'clientver': '2000',
'iscorrection': '1',
'privilege_filter': '0',
'token': '',
'srcappid': '2919',
'clienttime': str(t),
'mid': str(t),
'uuid': str(t),
'dfid': '-',
'signature': generate_signature(t, search_keyword, page)
}
async with session.get(url=url, headers=headers, params=params) as resp:
if resp.status == 200:
resp_text = await resp.text()
json_data = json.loads(resp_text[12:-2:])
status = json_data['status']
song_list = []
if status == 1:
for item in json_data['data']['lists']:
song_info = {'SongName': re.sub(r"[/\\:*?\"<>|]", "_", item['SongName']),
'AlbumID': item['AlbumID'],
'FileHash': item['FileHash'], 'SQFileHash': item['SQFileHash'],
'HQFileHash': item['HQFileHash'], 'MvHash': item['MvHash'],
'Audioid': item['Audioid'],
'SingerName': re.sub(r"[/\\:*?\"<>|]", "_", item['SingerName'])}
song_list.append(song_info)
else:
logging.error(f'获取歌曲列表失败: {json_data["error_msg"]}')
if singer_name is not None:# 根据歌手名过滤
song_list = for song_info in song_list if song_info["SingerName"] == singer_name]
n = len(song_list)
if n > 0:
logging.info(f'获取歌曲列表成功,总共{len(song_list)}首, 准备下载...')
tasks = for song in song_list]
await asyncio.wait(tasks)
else:
logging.error(f'歌曲搜索结果为: {n}, 请更换关键词或者歌手重试')
else:
logging.error(f'错误代码: {resp.status}, 稍后重试')
async def get_song_play_addr(song_info):
async with aiohttp.ClientSession() as session:
url = 'https://wwwapi.kugou.com/yy/index.php'
params = {
'r': 'play/getdata',
'callback': 'jQuery191035601158181920933_1653052693184',
'hash': song_info['FileHash'],
'dfid': '2mSZvv2GejpK2VDsgh0K7U0O',
'appid': '1014',
'mid': 'c18aeb062e34929c6e90e3af8f7e2512',
'platid': '4',
'album_id': song_info['AlbumID'],
'_': '1653050047389'
}
async with session.get(url=url, headers=headers, params=params) as resp:
if resp.status == 200:
resp_text = await resp.text()
json_data = json.loads(resp_text[42:-2:].replace('\\', '').encode('utf8').decode('unicode_escape'))
data = json_data['data'] if json_data.get('data') is not None else None
mp3_url = data['play_url'] if data is not None and data.get('play_url') is not None else ""
if not mp3_url:
logging.error("未找到mp3_url下载链接")
else:
await save_mp3(mp3_url, song_info['SongName'], song_info['SingerName'])
else:
logging.info(f'错误代码: {resp.status}, 请稍后再试')
async def save_mp3(url, song_name, singer_name, default_sv_dir="music"):
os.makedirs(default_sv_dir, exist_ok=True)
async with aiohttp.ClientSession() as session:
async with session.get(url=url, headers=headers) as resp:
async with aiofiles.open(f'{default_sv_dir}/{song_name}-{singer_name}.mp3', mode='wb') as f:
await f.write(await resp.content.read())
logging.info(f'{song_name}--{singer_name}--下载完成')
if __name__ == '__main__':
logging.basicConfig(filename="kgDownloader.log", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO)
logging.info(read_me)
parser = argparse.ArgumentParser(
prog='kuGouDownloader',
description='酷狗音乐下载')
parser.add_argument('keyword', help='歌曲关键词')
parser.add_argument('-s', '--singer', help='歌手名')
parser.add_argument('-p', '--page', help='页数', type=int)
args = parser.parse_args()
keyword, singer = args.keyword, args.singer
total_page = args.page if args.page is not None else 1
loop = asyncio.get_event_loop()
for i in range(1, total_page+1):
logging.info(f"搜索: {keyword}, 歌手: {singer if singer is not None else '' }, 搜索页数: {i}")
loop.run_until_complete(main(keyword, singer, i))# 默认下载搜索列表的 第一页 共30首
基于原贴:https://www.52pojie.cn/thread-1638865-1-1.html,优化修改 很不错,但是下载的有的只有一分钟。还有不会用的,可以参考一下。(以下针对 小小白)
1。安装python 3.8或更高版本
2。如果运行py 时python 提示::运行缺少aiofile和aiohttp
3。添加环境变量运行:CMD 输入sysdm.cpl ,变量:path 路径就是python 的路径
4.python -m pip install --upgrade pip
5.pip3 install aiohttp
6.pip3 install aiofiles 可以的,很棒 支持楼主!感谢分享! 谢谢分享,学习一下 酷狗哭了T﹏T 酷狗哭了T﹏T 协程爬虫的时候,下载数据有的不是很完整 ,怎么解决这个问题? 可以爬取vip整首歌吗, 不能下高音质的吗?只能下载普通音质的啊
页:
[1]
2