网易云热门评论爬取
爬取单曲评论请使用单曲模式,爬取歌单评论请使用完整歌单模式。id为歌单网址最后一串数字,
代码里面需要补全自己的cookies。
from Crypto.Cipher import AES
from binascii import b2a_base64
import json
import requests
from lxml import etree
import csv
import time
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
"cookie":"##################################################################################" #请输入自己的账号cookies
}
#window.asrsea(JSON.stringify(i5n), buV1x(["流泪", "强"]), buV1x(Rg2x.md), buV1x(["爱心", "女孩", "惊恐", "大笑"])
#window.asrsea(data,e,f,g)
e = '010001'#buV1x(["流泪", "强"])
#buV1x(Rg2x.md)
f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
#buV1x(["爱心", "女孩", "惊恐", "大笑"])
g = '0CoJUm6Qyw8W8jud'
#JSON.stringify(i5n)
i_detail = "zfow3tEY3ru4uybG" # 随机值1
i_hotcommon="vDIsXMJJZqADRVBP"# 随机值2
def enconda_params(data,key):#模拟网页加密过程
#转化为json格式,防止加到16位出现错误
d=16-len(data)%16
data=data + d*chr(d)
data = data.encode('utf-8')
aes=AES.new(key=key.encode('utf-8'),IV="0102030405060708".encode("utf-8"),mode=AES.MODE_CBC)
bs=aes.encrypt(data)
params = b2a_base64(bs).decode('utf-8')
return params
def get_params(data,mode_):#加密两次得到最终params值
first=enconda_params(data,g)
if mode_=="2":
second = enconda_params(first, i_hotcommon)
return second
elif mode_=="3":
second=enconda_params(first,i_detail)
return second
def get_hotcommon(id,mode_,song_name):
# print(id,mode_)
data = {
'cursor': '-1',
'offset': '0',
'orderType': '1',
'pageNo': '1',
'pageSize': '20',
'rid': f'R_SO_4_{id}',
'threadId': f'R_SO_4_{id}'
}
data = json.dumps(data)
post_code={
"params":get_params(data, mode_),
"encSecKey":"516070c7404b42f34c24ef20b659add657c39e9c52125e9e9f7f5441b4381833a407e5ed302cac5d24beea1c1629b17ccb86e0d9d57f6508db5fb7a6df660089ac57b093d19421d386101676a1c8d1e312e099a3463f81fbe91f28211f9eccccfbfc64148fdd65e2b9f5fcf439a865b95fb656e36f75091957f0a1d39ca8ddd3"
}
song_url = 'https://music.163.com/weapi/comment/resource/comments/get?csrf_token=0e94b5c95607da5d1273d8456d611f5f'
res=requests.post(url=song_url,data=post_code,headers=headers).content
# print(res)
res_dict=json.loads(res)
# print(res_dict)
hotcontent = {}
try:
for content in res_dict['data']['hotComments']:
content_text = content['content']
content_id = content['user']['nickname']
likedcount=content['likedCount']
hotcontent = likedcount
with open("网易云热评.csv", "a+", encoding="utf-8", newline='') as c:
csvwrite = csv.writer(c)
csvwrite.writerow()
print(song_name,id)
return hotcontent
except:
pass
# with open("网易云热评.csv", "a+", encoding="utf-8", newline='') as f:
# csvwrite = csv.writer(f)
# csvwrite.writerow()
#获取歌单页详情信息
def get_detail(id,mode_):
detail_url="https://music.163.com/weapi/v6/playlist/detail?csrf_token=0e94b5c95607da5d1273d8456d611f5f"
# print(get_params(data_1))
data_1= {"csrf_token": "0e94b5c95607da5d1273d8456d611f5f",
"id": f"{id}",
"limit": "1000",
"n": "1000",
"offset": "0",
"total": "true"}
data_1 = json.dumps(data_1)
# print(get_params(data_1))
post_code_1={
"params":get_params(data_1,mode_),
"encSecKey":"b5c805263ba9329ac40a910c3c80af59f269b2542ef8920cfd456f91e668ea097b671f9e212c45fa195faea3dc56d3b25ebe3d875d771363af59220e916e18d2dd965f937353b8f1e1dc114cf613d134777628bd3dab5072f4c3fbb01f0b5cb1ddce3ff91dacb93644737225c3d4799956f1cbd40a5f691a80d508ee1a63f7e8"
}
res=requests.post(url=detail_url,data=post_code_1,headers=headers).content
res_dict = json.loads(res)
# print(res_dict)
try:
# t = 1
for song_message in res_dict['playlist']['tracks']:
# print(song_message)
# if t%20==0:
# print("\n正在策略等待20s...\n")
# time.sleep(20)
# t = t + 1
# print(t)
song_id = song_message['id']
song_name= song_message['name']
# print(song_name,song_id)
# with open("网易云热评.csv", "a+", encoding="utf-8", newline='') as c:
# csvwrite = csv.writer(c)
# csvwrite.writerow()
get_hotcommon(str(song_id),str(2),song_name)
except:
pass
def get_163(list_id):
toplist_url = f'https://music.163.com/discover/toplist?id={list_id}'
response = requests.get(toplist_url, headers=headers)
html = response.content.decode()
html = etree.HTML(html)
try:
namelist = html.xpath("//div[@id='song-list-pre-cache']/ul[@class='f-hide']/li")
# 可选择保存到文件
# f = open('./wangyi_hotcomments.txt',mode='a',encoding='utf-8')
for name in namelist:
song_name = name.xpath('./a/text()')
song_id = name.xpath('./a/@href').split('=')
# print(song_name,song_id)
get_hotcommon(str(song_id),str(2),song_name)
# with open("网易云热评.csv", "a+", encoding="utf-8", newline='') as c:
# csvwrite = csv.writer(c)
# csvwrite.writerow()
# print(song_name,song_id)
except:
pass
def get_model():
print("作者:FUEL\n版本:1.0\n网易云热评爬取程序已启动...\n")
mode_=input("歌单模式or单曲模式or完整歌单模式:\n")
if mode_=="1":
list_id=input("请输入歌单ID:\n")
get_163(list_id=list_id)
elif mode_=="2":
asong_id = input("请输入单曲ID:\n")
song_name=input("请输入歌曲名称:\n")
# print(song_name)
get_hotcommon(asong_id, mode_,song_name)
elif mode_=="3":
list_id_complete = input("请输入歌单ID:\n")
get_detail(list_id_complete,mode_)
else:
print("输入错误,请重新输入!!!\n")
get_model()
print("\n恭喜你,歌曲热评爬取成功!!!")
if __name__=="__main__":
get_model() 深夜悲伤故事集锦,想起了一张表情包{:301_1008:} 感谢分享 感谢分享
感谢分享 感谢分享
页:
[1]