某我音乐爬虫
用户故事
作为一位热爱听歌的用户,我经常会在各大音乐网站和视频网站听最新流行的歌曲。但是这些网站的歌曲和歌词都是在线的,无法下载到本地。我想要一个应用程序,能够帮我爬取这些网站上的流行歌曲和歌词,下载到本地,这样我就可以把歌曲和歌词存入U盘,在车上听和唱了。
请求头
headers = {
'Host': 'www.kuwo.cn',
'Referer': 'http://www.kuwo.cn/rankList',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/76.0.3809.87 Safari/537.36',
'Cookie': XXX
'csrf': 'XXx'
}
获取抖音排行版的歌曲
def get_music_list(page=1):
"""获取抖音排行版的歌曲"""
url = 'http://www.kuwo.cn/api/www/bang/bang/musicList'
param = {
"bangId": BANGID,
"pn": page,
"rn": "20",
"httpsStatus": "1",
"reqId": "2d9ff400-cf89-11ed-90ce-6f5d3667ff22"
}
response = requests.get(url=url, headers=headers, params=param)
if response.status_code == 200:
res = response.json()
if res['code'] == 200:
music_list = res['data']['musicList']
return music_list
获取音乐下载地址
def get_music_url(music_list):
"""获取音乐下载地址"""
uuid_par = uuid.uuid1()
# 获取时间戳,毫秒级别
# t = time.time()
# now_time = int(round(t * 1000))
url_list = []
name_list = []
for music in music_list:
name = music['name']
rid = music['rid']
param = {
'mid': rid,
'type': 'convert_url3',
'httpsStatus': 1,
'reqId': uuid_par,
}
music_info_url = " http://www.kuwo.cn/api/v1/www/music/playUrl"
response = requests.get(url=music_info_url, params=param)
try:
if response.status_code == 200:
# print(response.json())
down_url = response.json()['data']['url']
url_list.append(down_url)
name_list.append(name)
except Exception as e:
print(f"error:<{name}>", e)
return url_list, name_list
下载音乐
def save_music(url_list, name_list):
"""下载音乐"""
path = 'D:\\Music\\抖音'
# 如果不存在目录,创建目录
if not os.path.exists(path):
os.makedirs(path)
for i in range(1, len(url_list)):
name = name_list[i]
music = requests.get(url=url_list[i], verify=False)
if music.status_code == 200:
with open('{}/{}.mp3'.format(path, name), 'wb') as f:
f.write(music.content)
print('{}:Download successful: {}.mp3'.format(i, name))
else:
print('下载歌曲失败')
domain
def do_main():
for i in range(1, 2):
music_list = get_music_list()
url_list, name_list = get_music_url(music_list)
save_music(url_list, name_list)