好友
阅读权限20
听众
最后登录1970-1-1
|
麦田孤望者
发表于 2020-1-28 17:47
本帖最后由 麦田孤望者 于 2020-1-28 18:45 编辑
这几天实在是太闲了...于是码了一个爬弹幕的程序
用的第三方库有: requests , pysimplegui , matplotlib
版本:一定是3.8 因为我有第42行用了海象操作符...当然那里改成return (cid_tuple,part,re.findall(re.compile('aid=\d{8}',re.S),url)[0][-8:])应该也是没问题的
生成效果
上代码
[Python] 纯文本查看 复制代码 import json
import os
import re
import sys
import time
from fnmatch import fnmatch
import matplotlib.pyplot as plt
import PySimpleGUI as sg
import requests
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
def findall(tx1,tx2,tx3):
return re.findall(re.compile('{}(.*?){}'.format(tx1,tx2),re.S),tx3)
def parse_1(url):
try:
aid = findall('https://www.bilibili.com/video/av','?spm_i',url)[0][:-1]
except IndexError:
aid = url[-8:]
aid = re.findall(re.compile('\d{8}',re.S),aid)[0]
#print('aid:',aid)
res = requests.get('https://api.bilibili.com/x/player/pagelist?aid={}&jsonp=jsonp'.format(aid))
cid = res.json()['data'][0]['cid']
#print('cid:',cid)
return (cid)
def parse_2(url):
cid_tuple = []
part=[]
res = requests.get(url)
html = findall('window.__kanzaki_ranko=','</script>',res.text)[0]
cid_list = json.loads(html)['main']['pages']
#rint(cid_list)
for i in cid_list:
cid_tuple.append(i['cid'])
part.append(i['part'])
return (cid_tuple,part,aid:=re.findall(re.compile('aid=\d{8}',re.S),url)[0][-8:])
def parse_3(url):
cid_list=[]
title_list=[]
res = requests.get(url).text
html=findall('.__INITIAL_STATE__=',';\(function\(\)',res)[0]
a = json.loads(html)['epList']
for i in a:
cid_list.append(i['cid'])
title_list.append(i['titleFormat']+' '+i['longTitle'])
return (cid_list,title_list,'')
def check(url,mode=''):
if fnmatch(url,'https://www.bilibili.com/*') == False:
print('请输入正确视频地址!')
sys.exit()
if fnmatch(url,'https://www.bilibili.com/video/*') == True:
#print('1')
return parse_1(url)
elif fnmatch(url,'https://www.bilibili.com/blackboard/*') == True:
#print('2')
return parse_2(url)
elif fnmatch(url,'https://www.bilibili.com/bangumi/*') == True:
#print('3')
return parse_3(url)
def get_comment(cid):
list1=[]
list2=[]
#print(cid)
res = requests.get('http://comment.bilibili.com/{}.xml'.format(cid))
res.encoding='utf-8'
comment_list = findall('<d','</d>',res.text)
for i in comment_list:
#print(i)
params = findall('p="','">',i)[0]
value = re.sub(params+'">','',i)[4:]
params = params.split(',')
list1.append(params)
list2.append(value)
a=(list1,list2)
return(a)
def check_again(tup,url):
layout=[]
if str(type(tup))=="<class 'tuple'>":
cid=tup[0]
for i,v in enumerate(tup[1]):
layout.append([sg.Button(v,font='宋体.ttf')])
window2 = sg.Window('请确认第几话',layout=layout)
a,b = window2.Read()
for q,r in enumerate(tup[1]):
if a == r:
break
anw = q
window2.close()
if int(anw) != i+1:
cid = [str(tup[0][int(anw)])]
title=str(tup[1][int(anw)])
else:
res = requests.get(url)
html=findall('<div id="viewbox_report" class="video-info report-wrap-module report-scroll-module">','class="video-title">',res.text)[0]
title=findall('title="','"',html)[0]
cid=[tup]
return (cid,title)
def main():
global video_url
tx1 = sg.Text('视频链接:',font='宋体.ttf')
txt = sg.Input('')
bt1 = sg.Button('确定',font='宋体.ttf')
window = sg.Window('哔哩哔哩弹幕',layout=[[tx1,txt,bt1]])
a,b = window.Read()
video_url = b[0]
window.close()
cid = check_again(check(video_url),video_url)
for i in cid[0]:
a= get_comment(int(i))
title=cid[1]
return (a,title)
def bubbleSort(arr):
n = len(arr)
for i in range(n):
for j in range(0, n-i-1):
if arr[j][0] > arr[j+1][0] :
arr[j], arr[j+1] = arr[j+1], arr[j]
return arr
def draw_():
mai=main()
x,y=mai[0][0],mai[0][1]
title=mai[1]
list1=[]
list2=[]
for rr,i in enumerate(x):
list1.append((int(float(i[0])),y[rr]))
list2=bubbleSort(list1)
max_time=list2[-1:][0][0]
len_lis=len(list2)
if max_time <=500:
e = 10
elif max_time >500:
e=30
elif max_time >1000:
e=120
elif max_time >3000:
e=300
a = max_time//e
b = max_time%e
c = a+1
list3=[]
list4=[]
d=0
for i in range(a):
lambda_=[]
for ii in list2:
if i*e<ii[0]<=i*e+e:
lambda_.append(ii)
list3.append(len(lambda_))
list4.append(i*e)
d+=len(lambda_)
list3.append(len_lis-d)
list4.append(max_time)
x = range(len(list4))
plt.figure(figsize=(len(list4)-6,6.5))
plt.plot(x, list3, marker='o', mec='r', mfc='w',label=u'弹幕数量曲线图')
plt.legend() # 让图例生效
plt.xticks(x, list4, rotation=20)
plt.margins(0)
plt.subplots_adjust(bottom=0.15)
plt.xlabel(u"时间/s ") #X轴标签
plt.ylabel("数量") #Y轴标签
plt.title(title) #标题
plt.show()
if __name__ == '__main__':
try:
draw_()
except BaseException as e:
print('ERROR:',e)
不写注释真的不是好习惯...还有...不要在意我的变量名...没想到合适的就用的abcde
看不懂的可以评论区问我
还有...求评分 |
-
免费评分
-
参与人数 3 | 吾爱币 +6 |
热心值 +3 |
收起
理由
|
溺水蛙
| + 1 |
+ 1 |
用心讨论,共获提升! |
苏紫方璇
| + 5 |
+ 1 |
欢迎分析讨论交流,吾爱破解论坛有你更精彩! |
netspirit
| |
+ 1 |
求bilibili视频和弹幕一起下载的工具 然后把弹幕转换成ass格式的 最好能屏. |
查看全部评分
|