[Python] 纯文本查看 复制代码
import pandas
from selenium import webdriver
import matplotlib.pyplot as plt
from selenium.webdriver.common.by import By
import pinyin
plt.rcParams["font.sans-serif"]=["SimHei"]
plt.rcParams["axes.unicode_minus"]=False
def getStrAllAplha(str):
return pinyin.get_initial(str, delimiter="").upper()
def getStrFirstAplha(str):
str = getStrAllAplha(str)
str = str[0:1]
return str.upper()
url = "https://search.bilibili.com/upuser?keyword=mc&from_source=webtop_search&spm_id_from=333.1007&order=fans"
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/41.0.2227.1 Safari/537.36'}
opt = webdriver.EdgeOptions()
opt.add_argument('--user-agent=%s' % header)
edge = webdriver.Edge(options=opt)
edge.implicitly_wait(10)
edge.get(url)
page=2
t = 1
name_l = []
people_l = []
for i in range(1,101):
try:
if t==21:
t=1
li = edge.find_element(by=By.XPATH,value='//*[@id="user-list"]/div[1]/ul/li['+str(t)+']')
t+=1
txt = li.text
name = txt.split("\n")[0].split("+")[0]
people = txt.split("\n")[2].split(":")[1].split("万")[0]
name_l.append(name)
people_l.append(float(people))
print(name,people)
except:
print("切页")
button = edge.find_element(by=By.XPATH,value='//*[@id="user-list"]/div[1]/div[2]/div/ul/li['+str(page)+']/button').click()
page+=1
abc_name = []
abc_name2 = []
abc_people = []
abc_people2 = []
abc = "A"
for i in range(26):
for j in name_l:
if getStrFirstAplha(j)==abc:
abc_name.append(j)
tt = name_l.index(j)
abc = chr(ord(abc)+1)
for id in abc_name:
if id not in abc_name2:
abc_name2.append(id)
for i in abc_name2:
abc_people2.append(people_l[name_l.index(i)])
print(len(abc_name2),abc_name2)
print(len(abc_people2),abc_people2)
data = pandas.DataFrame({"up名字": name_l, "粉丝数量/万": people_l})
data.to_csv("csv1.csv")
data = pandas.DataFrame({"up名字": abc_name2, "粉丝数量/万": abc_people2})
data.to_csv("csv2.csv")
plt.subplot(1,2,1)
bar1 = plt.bar(name_l,people_l,color=['r','r','r','b','b','b','b','b','b','b','b','b','b','b','b','b','b','b','b','b'])
plt.xticks(rotation=90,fontsize=13)
plt.bar_label(bar1, label_type='edge')
plt.subplot(1,2,2)
bar2 = plt.bar(abc_name2,abc_people2,color=['r','r','r','b','b','b','b','b','b','b','b','b','b','b','b','b','b','b','b','b'])
plt.bar_label(bar2, label_type='edge')
plt.xticks(rotation=90,fontsize=13)
plt.show()