好友
阅读权限10
听众
最后登录1970-1-1
|
Fujj
发表于 2020-6-8 08:46
本帖最后由 Fujj 于 2020-6-8 12:43 编辑
大佬勿喷!!!
主要使用的是requests,SnowNLP,WordCloud这三个模块:
requests:主要用来发送接收请求
SnowNLP:用于情感分析,也可以自己训练之后,使用自己的情感库进行分析
WordCloud:用来生成词云
首先利用fiddler抓取app评论链接api,分析组成参数(这里就不详细怎么抓包了),然后利用request模拟请求该url,对返回的数据进行遍历,写入excel,写入的同时进行情感分析
[Python] 纯文本查看 复制代码 import requests
import json
import xlwt
from snownlp import SnowNLP
from snownlp import sentiment
import time
import xlrd
import jieba
import pymysql
import matplotlib.pylab as plt
from wordcloud import WordCloud
from collections import Counter
import numpy as np
from PIL import Image
excel = xlwt.Workbook()
print("正在创建excel……")
# 拉取华为评论
def get_huawei():
print("准备拉取【华为应用市场】的评论")
sheet = excel.add_sheet("华为应用市场")
rowName = ['用户名', '手机版本', '评论内容', '评分(星)', '评论时间','正/负面评价']
for row in range(0,6):
sheet.write(0, row, rowName[row])
re = requests.get(
url="https://wap1.hispace.hicloud.com/uowap/index?method=internal.user.commenList3&serviceType=20&reqPageNum=15&maxResults=25&appid=C10523283&version=10.0.0&zone=&locale=zh_CN")
totalPages = json.loads(re.text)["totalPages"]
n =1
for i in range(1,totalPages+1):
url = "https://wap1.hispace.hicloud.com/uowap/index?method=internal.user.commenList3&serviceType=20&reqPageNum="+str(i)+"&maxResults=25&appid=C10523283&version=10.0.0&zone=&locale=zh_CN"
list = json.loads(requests.get(url).text)["list"]
for j in list :
sheet.write(n, 0, j["accountName"])
sheet.write(n, 1, j["phone"])
sheet.write(n, 2, j["commentInfo"])
sheet.write(n, 3, j["stars"])
sheet.write(n, 4, j["operTime"])
if SnowNLP(j["commentInfo"]).sentiments < 0.4:
sheet.write(n, 5, "负面评价")
elif SnowNLP(j["commentInfo"]).sentiments > 0.6:
sheet.write(n, 5, "正面评价")
else:
sheet.write(n, 5, "中性评价")
n = n+1
time.sleep(0.1)
# 拉取appstore评论
def get_app_store():
print("准备拉取【app store】的评论")
sheet = excel.add_sheet("苹果app store")
rowName = ['用户名', '标题', '评论内容', '评分(星)', '评论时间','正/负面评价']
for row in range(0, 6):
sheet.write(0, row, rowName[row])
header = {
"authority": "amp-api.apps.apple.com",
"method": "OPTIONS",
"path": "/v1/catalog/cn/apps/1112929490/reviews?l=zh-Hans-CN&offset=20&platform=web&additionalPlatforms=appletv%2Cipad%2Ciphone%2Cmac",
"scheme": "https",
"accept": "*/*",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"authorization": "Bearer eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IldlYlBsYXlLaWQifQ.eyJpc3MiOiJBTVBXZWJQbGF5IiwiaWF0IjoxNTkxMDQ0NjQxLCJleHAiOjE2MDY1OTY2NDF9.qJ4vlF4w9iHGLoJLsT9gjY2RyEv510XpwDVonTY6GhxbsHD8__dpC9O7p8naB_bRFqqHYfBLzatMbwNc85iB5Q",
# "access-control-request-headers":"authorization",
# "access-control-request-method":"GET",
"cache-control": "no-cache",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"origin": "https://apps.apple.com",
"pragma": "no-cache",
"referer": "https://apps.apple.com/cn/app/%E5%8F%AE%E5%97%92%E5%87%BA%E8%A1%8C-%E5%85%A8%E5%9B%BD%E8%BD%BB%E6%9D%BE%E7%95%85%E9%AA%91/id1112929490",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
}
x,n = 10,1
while 1:
url = "https://amp-api.apps.apple.com/v1/catalog/cn/apps/1112929490/reviews?l=zh-Hans-CN&offset=" + str(
x) + "&platform=web&additionalPlatforms=appletv%2Cipad%2Ciphone%2Cmac"
re = json.loads(requests.get(url=url, headers=header).text)
list = re["data"]
for q in list:
sheet.write(n, 0, q["attributes"]["userName"])
sheet.write(n, 1, q["attributes"]["title"])
sheet.write(n, 2, q["attributes"]["review"])
sheet.write(n, 3, q["attributes"]["rating"])
sheet.write(n, 4, q["attributes"]["date"])
if SnowNLP(q["attributes"]["review"]).sentiments < 0.4:
sheet.write(n, 5, "负面评价")
elif SnowNLP(q["attributes"]["review"]).sentiments > 0.6:
sheet.write(n, 5, "正面评价")
else:
sheet.write(n, 5, "中性评价")
n = n + 1
if "next" in re.keys():
pass
else:
print("共拉取【app store】评论"+str(n)+"条")
break
x = x + 10
time.sleep(0.1)
if __name__ == '__main__':
get_huawei()
get_app_store()
excel.save('各终端应用市场评论.xls')
接下来提取excel里面的评论,生成词云
[Python] 纯文本查看 复制代码 # 提取excel评论里面的词频
def getExcelData(excel, txt):
readbook = xlrd.open_workbook(excel)
result = {}
sheet = readbook.sheet_by_index(1) # 取第二个sheet页
rows = sheet.nrows
i = 0
while i < rows:
txt += sheet.cell(i, 2).value # 取第三列的值
i += 1
seg_list = jieba.cut(txt)
c = Counter()
for x in seg_list:
if len(x) > 1 and x != '\r\n':
c[x] += 1
for (k, v) in c.most_common():
result[k] = v # 放到字典中,用于生成词云的源数据
sheet = readbook.sheet_by_index(0) # 取第一个sheet页
rows = sheet.nrows
i = 0
while i < rows:
txt += sheet.cell(i, 2).value # 取第三列的值
i += 1
seg_list = jieba.cut(txt)
c = Counter()
for x in seg_list:
if len(x) > 1 and x != '\r\n':
c[x] += 1
for (k, v) in c.most_common():
result[k] = v # 放到字典中,用于生成词云的源数据
return result
# 根据词频生成词云
def makeWordCloud(txt):
image = Image.open('dingda.jpg') # 作为背景形状的图
graph = np.array(image)
# x, y = np.ogrid[:300, :500]
#
# mask = (x - 150) ** 2 + (y - 150) ** 2 > 150 ** 2
# mask = 255 * mask.astype(int)
wc = WordCloud(background_color="white",
max_words=500,
mask=graph,
repeat=True,
width=1000,
height=1000,
scale=10, # 这个数值越大,产生的图片分辨率越高,字迹越清晰
font_path="C:\Windows\Fonts\STXINGKA.TTF")
wc.generate_from_frequencies(txt)
wc.to_file('评论词频提取.png')
if __name__ == '__main__':
txt = ''
makeWordCloud(getExcelData("各终端应用市场评论.xls", txt))
print("评论词频提取.png 已经保存在文件夹")
|
免费评分
-
查看全部评分
本帖被以下淘专辑推荐:
- · 编程语言类|主题: 110, 订阅: 42
- · 源码|主题: 47, 订阅: 1
|