python之爬取酷狗听书--新手小白练习
最近在逛酷狗听书,这个小说我听喜欢的,想爬取下来,但是呢。水平不够。不会翻页下载,小说有33页。我现在只会一页一页的下载,如果有大佬会设置,设置下给我看看,感激不尽~~~import requests
import re
link = 'https://m.kugou.com/ts/album/16t1qk50/'
head = {
"Cookie": "kg_mid=dacec04082d61858ea782e762e992a31; kg_dfid=34cJIh1FZO1y1rTg4V4UTHmK; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1694486748; kg_mid_temp=dacec04082d61858ea782e762e992a31; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1694494615",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
"Referer": "https://m.kugou.com/"
}
res2 = requests.get(url=link, headers=head)
html2 = res2.text
tiqu = re.findall('<span><a style="color: inherit" href="/ts/album/.*?/(.*?).html">', html2)
for audio_id in tiqu:
print(audio_id)
url = 'https://wwwapi.kugou.com/yy/index.php'
data = {
"r": "play/getdata",
"appid": "1014",
"dfid": "34cJIh1FZO1y1rTg4V4UTHmK",
"mid": "dacec04082d61858ea782e762e992a31",
"platid": "4",
"from": "112",
"encode_album_audio_id": audio_id,
"_": "1694494617474"
}
headers = {
"Cookie": "kg_mid=dacec04082d61858ea782e762e992a31; kg_dfid=34cJIh1FZO1y1rTg4V4UTHmK; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1694486748; kg_mid_temp=dacec04082d61858ea782e762e992a31; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1694494615",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
"Referer": "https://m.kugou.com/"
}
res = requests.get(url, params=data, headers=headers).json()
song_name = res["data"]["song_name"]
play_url = res["data"]["play_url"]
print(song_name, play_url)
music = requests.get(url=play_url, headers=headers).content
# 将音乐文件保存到本地的'video'目录下
with open('video\\' + song_name + '.mp3', mode='wb') as f:
f.write(music)
print('音频下载完成:', song_name) 本帖最后由 AiSwcm 于 2023-9-13 16:19 编辑
最外层加个for循环就好了
link = 'https://m.kugou.com/ts/album/16t1qk50/'
改成
https://www.kugou.com/ts/album/16t1qk50/p{页码}-1.html
import requests
代码
import re
for i in range(1,34):
link = f'https://www.kugou.com/ts/album/16t1qk50/p{i}-1.html'
head = {
"Cookie": "kg_mid=dacec04082d61858ea782e762e992a31; kg_dfid=34cJIh1FZO1y1rTg4V4UTHmK; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1694486748; kg_mid_temp=dacec04082d61858ea782e762e992a31; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1694494615",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
"Referer": "https://m.kugou.com/"
}
res2 = requests.get(url=link, headers=head)
html2 = res2.text
tiqu = re.findall('<span><a style="color: inherit" href="/ts/album/.*?/(.*?).html">', html2)
for audio_id in tiqu:
print(audio_id)
url = 'https://wwwapi.kugou.com/yy/index.php'
data = {
"r": "play/getdata",
"appid": "1014",
"dfid": "34cJIh1FZO1y1rTg4V4UTHmK",
"mid": "dacec04082d61858ea782e762e992a31",
"platid": "4",
"from": "112",
"encode_album_audio_id": audio_id,
"_": "1694494617474"
}
headers = {
"Cookie": "kg_mid=dacec04082d61858ea782e762e992a31; kg_dfid=34cJIh1FZO1y1rTg4V4UTHmK; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1694486748; kg_mid_temp=dacec04082d61858ea782e762e992a31; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1694494615",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
"Referer": "https://m.kugou.com/"
}
res = requests.get(url, params=data, headers=headers).json()
song_name = res["data"]["song_name"]
play_url = res["data"]["play_url"]
print(song_name, play_url)
music = requests.get(url=play_url, headers=headers).content
# 将音乐文件保存到本地的'video'目录下
with open('video\\' + song_name + '.mp3', mode='wb') as f:
f.write(music)
print('音频下载完成:', song_name)
AiSwcm 发表于 2023-9-13 16:17
最外层加个for循环就好了
link = 'https://m.kugou.com/ts/album/16t1qk50/'
谢谢大佬 小白直接来拿走复制粘贴修改{:1_918:} 照着改改就可用了 多谢分享。 同为新手,学习了 感谢楼主分享,学习一下谢谢 感谢分享学习一下