[Python] 纯文本查看 复制代码 import requests
import pprint
import json
from collections import Counter
from bs4 import BeautifulSoup
def get_video(av):
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36"}
cid = requests.get('https://www.jijidown.com/api/v1/video/get_download_info?id=%s' % av,headers = headers)
try:
cid = json.loads(cid.text)['res'][0]['cid']#获取cid,通过bilibililjj
except IndexError:
print('pass')
pass #错误直接跳过
cap = requests.get('https://api.bilibili.com/x/v1/dm/list.so?oid=%s' % cid)
cap.encoding = 'UTF-8' #获取弹幕,抓包得到数据
cap = BeautifulSoup(cap.text,features="html.parser")
cap = cap.find_all('d')
cap_video = []
for a in cap: #所有弹幕整合在同一列表里
cap_video.append(a.text)
return(Counter(cap_video)) #转换为Counter格式返回
def get_av(): #获取排行榜所有av号
av = []
data = requests.get('https://www.bilibili.com/ranking')
data.encoding = 'UTF-8'
data = BeautifulSoup(data.text,features="html.parser")
data_1 = data.find_all('div',class_='img')
for i in data_1:
i_1 = (i.find('a').get('href'))
seat = i_1.find('av') + 2
av.append(i_1[seat:])
return av
av = get_av()
cap = Counter('') #设置cap为Counter格式
for data in av:
print(str(av.index(data) + 1) + '/' + str(len(av)))
cap = cap + get_video(data)
print(Counter(cap).most_common(50)) |