全球大学排名分析
Python初学者,大佬请轻喷数据爬取代码
from lxml import etree
import requests
import pandas as pd
import time
for i in range(2015,2022):
urls = ["https://ranking.promisingedu.com/%d-qs-all-undergraduate"%i]
session = requests.Session()
for urlin urls:
response = session.get(url,headers={
'User-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3756.400 QQBrowser/10.5.4039.400'
})
content = response.text
html = etree.HTML(content)
time.sleep(1)
University = html.xpath('//table[@id="rk"]//td//text()')[:200]
Country = html.xpath('//table[@id="rk"]//td//text()')[:200]
Rank = html.xpath('//table[@id="rk"]//td//text()')[:200]
Overall_Score = html.xpath('//table[@id="rk"]//td//text()')[:200]
Year = str(i).split()*200
Content = {"Rank":Rank,"University":University,"Country":Country,"Overall_Score":Overall_Score,"Year":Year}
New = pd.DataFrame(Content)
New.to_csv("世界大学排名_%d.csv"%i)
print(New)
数据展示代码
import pandas as pd
from pyecharts.charts import Funnel,Pie,Timeline
from pyecharts import options as opts
import os
list = []
listfile = os.listdir(r"./data/")
t1 = Timeline()
for i in listfile:
df = pd.read_csv(r'./data/'+i,encoding='utf-8',index_col=0)
list.append(df)
Score_Data = pd.concat(, df["Rank"]], axis=1)
Rank = Score_Data.sort_values("Rank",ascending=True)[:10]
funnel = (Funnel()
.add("", .tolist(), Rank["Rank"].tolist())],
sort_='descending',
label_opts=opts.LabelOpts(position="inside"))
.set_global_opts(title_opts=opts.TitleOpts(title="大学排名(日期:{})".format(i[-8:-4]), pos_bottom=True))
)
t1.add(funnel,"{}".format(i[-8:-4]))
t1.render("university.html")
t2 = Timeline()
for i in listfile:
df = pd.read_csv(r'./data/'+i,encoding='utf-8',index_col=0)
list.append(df)
classfy_Country = df.groupby(df["Country"])
group_Country = classfy_Country.size().sort_values(ascending=False)
pie = (Pie()
.add('', ,
radius=["30%", "75%"],
rosetype="radius")
.set_global_opts(title_opts=opts.TitleOpts(title="地区分布(日期:{})".format(i[-8:-4]), pos_bottom=True))
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
)
t2.add(pie, "{}".format(i[-8:-4]))
t2.render("region.html")
Gather = pd.concat(list,ignore_index=True)
Gather.to_csv("Gather.csv")
老子就是拽 发表于 2020-12-23 14:46
兄弟,你嫩运行出来不?
https://ranking.promisingedu.com/%d-qs-all-undergraduate
我试了下它里面的这个排名的信息源网址进不去,就没去试了。 blannk 发表于 2020-12-23 10:00
牛逼啊,我也是初学者,但是你这个更加高级啊。能不能出个视频,在B站讲解下,这里面函数的意思吗?
兄弟,你嫩运行出来不? 来学习一下 弄得挺好的,学习下 为什么没有家里蹲大学 数据展示代码真的不错 功能强大,学习了 大佬,敢问咋运行不出来啊? 牛逼啊,我也是初学者,但是你这个更加高级啊。能不能出个视频,在B站讲解下,这里面函数的意思吗? https://ranking.promisingedu.com/%d-qs-all-undergraduate
这个网页目前输入进去,提示:400错误。
是不是意味着目前的这个代码暂时无效了?
页:
[1]
2