[Python] 纯文本查看 复制代码
from pyecharts.charts import *
import pandas as pd
from pyecharts import options as opts
from pyecharts.globals import CurrentConfig
import jieba
from collections import Counter
CurrentConfig.ONLINE_HOST = "https://cdn.kesci.com/lib/pyecharts_assets/" # 提高加载速度
init_opts = opts.InitOpts(width="1200px", height="800px")
#读取文件,根据自己保存文件路径
datas = pd.read_excel('某音评论.xlsx')
#清洗空值
df = datas.dropna()
#计算评论区IP分布
ip_count = df['ip属地'].value_counts()
#利用unque函数生成列表
ip = df['ip属地'].unique().tolist()
#切割数据,根据年-月-日计算时间分布
df[['日期', '时间']] = df['时间'].str.split(' ', expand=True)
data_time = df['日期']
data_time_list = data_time.unique().tolist()
data_time_count = data_time.value_counts()
#根据个人需要设计停用词
stopwords = ['的', '了', '是', '在', '有', '和', '与', '一个', '这', '那', '他', '她', '我们', '你们', '它们', '1', '2',
'3', '6', ]
#jieba库分词
word = [i for i in jieba.cut(str(df['内容']))]
#计算词频
word_counts = Counter(word)
#过滤停用词
filtered_words = [(word, count) for word, count in word_counts.items() if word not in stopwords]
#生成词云图
wordcloud = (
WordCloud()
.add("", filtered_words, word_size_range=[20, 100])
.set_global_opts(title_opts=opts.TitleOpts(title="评论区词云图"))
)
#生成饼状图
pie = (
Pie(init_opts=init_opts)
.add("ip分布", [list(z) for z in zip(ip, ip_count)],
radius=["40%", "75%"]
)
.set_global_opts(title_opts=opts.TitleOpts(title="评论区分布图"),
legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_right="2%")
)
)
#折线图
line = (
Line(init_opts)
.add_xaxis(data_time_list)
.add_yaxis('时间(年-月-日)', data_time_count)
.set_global_opts(
title_opts=opts.TitleOpts(title='活动时间分析'),
legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%")
)
)
#柱状图
bar=(
Bar()
.add_xaxis(data_time_list)
.add_yaxis('日期(年-月-日)',data_time_count.tolist())
.set_global_opts(
title_opts=opts.TitleOpts(title='柱状图')
)
)
#实例化page对象,用于页面布局
page = Page()
page.add(
wordcloud,
pie,
line,
bar
)
page.render('评论区.html')