FUEL 发表于 2022-9-14 09:35

网易云热门评论爬取

爬取单曲评论请使用单曲模式,爬取歌单评论请使用完整歌单模式。
id为歌单网址最后一串数字,
代码里面需要补全自己的cookies。
from Crypto.Cipher import AES
from binascii import b2a_base64
import json
import requests
from lxml import etree
import csv
import time
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36',
    "cookie":"##################################################################################"   #请输入自己的账号cookies
}

#window.asrsea(JSON.stringify(i5n), buV1x(["流泪", "强"]), buV1x(Rg2x.md), buV1x(["爱心", "女孩", "惊恐", "大笑"])
#window.asrsea(data,e,f,g)
e = '010001'#buV1x(["流泪", "强"])
#buV1x(Rg2x.md)
f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
#buV1x(["爱心", "女孩", "惊恐", "大笑"])
g = '0CoJUm6Qyw8W8jud'
#JSON.stringify(i5n)
i_detail = "zfow3tEY3ru4uybG" # 随机值1
i_hotcommon="vDIsXMJJZqADRVBP"# 随机值2


def enconda_params(data,key):#模拟网页加密过程
    #转化为json格式,防止加到16位出现错误
    d=16-len(data)%16
    data=data + d*chr(d)
    data = data.encode('utf-8')
    aes=AES.new(key=key.encode('utf-8'),IV="0102030405060708".encode("utf-8"),mode=AES.MODE_CBC)
    bs=aes.encrypt(data)
    params = b2a_base64(bs).decode('utf-8')
    return params

def get_params(data,mode_):#加密两次得到最终params值
    first=enconda_params(data,g)
    if mode_=="2":
      second = enconda_params(first, i_hotcommon)
      return second
    elif mode_=="3":
      second=enconda_params(first,i_detail)
      return second

def get_hotcommon(id,mode_,song_name):
    # print(id,mode_)
    data = {
      'cursor': '-1',
      'offset': '0',
      'orderType': '1',
      'pageNo': '1',
      'pageSize': '20',
      'rid': f'R_SO_4_{id}',
      'threadId': f'R_SO_4_{id}'
    }
    data = json.dumps(data)
    post_code={
      "params":get_params(data, mode_),
      "encSecKey":"516070c7404b42f34c24ef20b659add657c39e9c52125e9e9f7f5441b4381833a407e5ed302cac5d24beea1c1629b17ccb86e0d9d57f6508db5fb7a6df660089ac57b093d19421d386101676a1c8d1e312e099a3463f81fbe91f28211f9eccccfbfc64148fdd65e2b9f5fcf439a865b95fb656e36f75091957f0a1d39ca8ddd3"
    }
    song_url = 'https://music.163.com/weapi/comment/resource/comments/get?csrf_token=0e94b5c95607da5d1273d8456d611f5f'
    res=requests.post(url=song_url,data=post_code,headers=headers).content
    # print(res)
    res_dict=json.loads(res)
    # print(res_dict)
    hotcontent = {}
    try:
      for content in res_dict['data']['hotComments']:
            content_text = content['content']
            content_id = content['user']['nickname']
            likedcount=content['likedCount']
            hotcontent = likedcount
      with open("网易云热评.csv", "a+", encoding="utf-8", newline='') as c:
            csvwrite = csv.writer(c)
            csvwrite.writerow()
      print(song_name,id)
      return hotcontent
    except:
      pass
    # with open("网易云热评.csv", "a+", encoding="utf-8", newline='') as f:
    #   csvwrite = csv.writer(f)
    #   csvwrite.writerow()

#获取歌单页详情信息
def get_detail(id,mode_):
    detail_url="https://music.163.com/weapi/v6/playlist/detail?csrf_token=0e94b5c95607da5d1273d8456d611f5f"
    # print(get_params(data_1))
    data_1= {"csrf_token": "0e94b5c95607da5d1273d8456d611f5f",
            "id": f"{id}",
            "limit": "1000",
            "n": "1000",
            "offset": "0",
            "total": "true"}
    data_1 = json.dumps(data_1)
    # print(get_params(data_1))
    post_code_1={
      "params":get_params(data_1,mode_),
      "encSecKey":"b5c805263ba9329ac40a910c3c80af59f269b2542ef8920cfd456f91e668ea097b671f9e212c45fa195faea3dc56d3b25ebe3d875d771363af59220e916e18d2dd965f937353b8f1e1dc114cf613d134777628bd3dab5072f4c3fbb01f0b5cb1ddce3ff91dacb93644737225c3d4799956f1cbd40a5f691a80d508ee1a63f7e8"
    }
    res=requests.post(url=detail_url,data=post_code_1,headers=headers).content
    res_dict = json.loads(res)
    # print(res_dict)
    try:
      # t = 1
      for song_message in res_dict['playlist']['tracks']:
            # print(song_message)
            # if t%20==0:
            #   print("\n正在策略等待20s...\n")
            #   time.sleep(20)
            # t = t + 1
            # print(t)
            song_id = song_message['id']
            song_name= song_message['name']
            # print(song_name,song_id)
                # with open("网易云热评.csv", "a+", encoding="utf-8", newline='') as c:
                #   csvwrite = csv.writer(c)
                #   csvwrite.writerow()
            get_hotcommon(str(song_id),str(2),song_name)


    except:
      pass

def get_163(list_id):
    toplist_url = f'https://music.163.com/discover/toplist?id={list_id}'
    response = requests.get(toplist_url, headers=headers)
    html = response.content.decode()
    html = etree.HTML(html)
    try:
      namelist = html.xpath("//div[@id='song-list-pre-cache']/ul[@class='f-hide']/li")
      # 可选择保存到文件
      # f = open('./wangyi_hotcomments.txt',mode='a',encoding='utf-8')
      for name in namelist:
            song_name = name.xpath('./a/text()')
            song_id = name.xpath('./a/@href').split('=')
            # print(song_name,song_id)
            get_hotcommon(str(song_id),str(2),song_name)
            # with open("网易云热评.csv", "a+", encoding="utf-8", newline='') as c:
            #   csvwrite = csv.writer(c)
            #   csvwrite.writerow()
            # print(song_name,song_id)
    except:
      pass

def get_model():
    print("作者:FUEL\n版本:1.0\n网易云热评爬取程序已启动...\n")
    mode_=input("歌单模式or单曲模式or完整歌单模式:\n")
    if mode_=="1":
      list_id=input("请输入歌单ID:\n")
      get_163(list_id=list_id)
    elif mode_=="2":
      asong_id = input("请输入单曲ID:\n")
      song_name=input("请输入歌曲名称:\n")
      # print(song_name)
      get_hotcommon(asong_id, mode_,song_name)
    elif mode_=="3":
      list_id_complete = input("请输入歌单ID:\n")
      get_detail(list_id_complete,mode_)
    else:
      print("输入错误,请重新输入!!!\n")
      get_model()
    print("\n恭喜你,歌曲热评爬取成功!!!")

if __name__=="__main__":
    get_model()                                                                                                                                             

Lect1R 发表于 2022-9-14 11:43

深夜悲伤故事集锦,想起了一张表情包{:301_1008:}

hgbj888 发表于 2022-9-14 13:51

感谢分享

tianya0908 发表于 2022-10-28 19:15

感谢分享

aac0007 发表于 2023-12-11 20:15

感谢分享

lovekk 发表于 2024-7-27 10:44

感谢分享
页: [1]
查看完整版本: 网易云热门评论爬取