使用Requests爬取某租车软件应用商店上的评论，并进行情感分析，同时生成词云

Fujj · 发表于 2020-6-8 08:46

本帖最后由 Fujj 于 2020-6-8 12:43 编辑

大佬勿喷！！！

主要使用的是requests，SnowNLP，WordCloud这三个模块：

requests：主要用来发送接收请求
SnowNLP：用于情感分析，也可以自己训练之后，使用自己的情感库进行分析
WordCloud：用来生成词云

首先利用fiddler抓取app评论链接api，分析组成参数（这里就不详细怎么抓包了），然后利用request模拟请求该url，对返回的数据进行遍历，写入excel，写入的同时进行情感分析

[Python] 纯文本查看 复制代码

import requests
import json
import xlwt
from snownlp import SnowNLP
from snownlp import sentiment
import time
import xlrd
import jieba
import pymysql
import matplotlib.pylab as plt
from wordcloud import WordCloud
from collections import Counter
import numpy as np
from PIL import Image

excel = xlwt.Workbook()
print("正在创建excel……")

# 拉取华为评论
def get_huawei():
    print("准备拉取【华为应用市场】的评论")
    sheet = excel.add_sheet("华为应用市场")
    rowName = ['用户名', '手机版本', '评论内容', '评分（星）', '评论时间','正/负面评价']
    for row in range(0,6):
        sheet.write(0, row, rowName[row])
    re = requests.get(
        url="https://wap1.hispace.hicloud.com/uowap/index?method=internal.user.commenList3&serviceType=20&reqPageNum=15&maxResults=25&appid=C10523283&version=10.0.0&zone=&locale=zh_CN")
    totalPages = json.loads(re.text)["totalPages"]
    n =1
    for i in range(1,totalPages+1):
        url = "https://wap1.hispace.hicloud.com/uowap/index?method=internal.user.commenList3&serviceType=20&reqPageNum="+str(i)+"&maxResults=25&appid=C10523283&version=10.0.0&zone=&locale=zh_CN"
        list = json.loads(requests.get(url).text)["list"]
        for j in list :
            sheet.write(n, 0, j["accountName"])
            sheet.write(n, 1, j["phone"])
            sheet.write(n, 2, j["commentInfo"])
            sheet.write(n, 3, j["stars"])
            sheet.write(n, 4, j["operTime"])
            if SnowNLP(j["commentInfo"]).sentiments < 0.4:
                sheet.write(n, 5, "负面评价")
            elif SnowNLP(j["commentInfo"]).sentiments > 0.6:
                sheet.write(n, 5, "正面评价")
            else:
                sheet.write(n, 5, "中性评价")
            n = n+1
            time.sleep(0.1)

# 拉取appstore评论
def get_app_store():
    print("准备拉取【app store】的评论")
    sheet = excel.add_sheet("苹果app store")
    rowName = ['用户名', '标题', '评论内容', '评分（星）', '评论时间','正/负面评价']
    for row in range(0, 6):
        sheet.write(0, row, rowName[row])
    header = {
        "authority": "amp-api.apps.apple.com",
        "method": "OPTIONS",
        "path": "/v1/catalog/cn/apps/1112929490/reviews?l=zh-Hans-CN&offset=20&platform=web&additionalPlatforms=appletv%2Cipad%2Ciphone%2Cmac",
        "scheme": "https",
        "accept": "*/*",
        "accept-encoding": "gzip, deflate, br",
        "accept-language": "zh-CN,zh;q=0.9",
        "authorization": "Bearer eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IldlYlBsYXlLaWQifQ.eyJpc3MiOiJBTVBXZWJQbGF5IiwiaWF0IjoxNTkxMDQ0NjQxLCJleHAiOjE2MDY1OTY2NDF9.qJ4vlF4w9iHGLoJLsT9gjY2RyEv510XpwDVonTY6GhxbsHD8__dpC9O7p8naB_bRFqqHYfBLzatMbwNc85iB5Q",
        # "access-control-request-headers":"authorization",
        # "access-control-request-method":"GET",
        "cache-control": "no-cache",
        "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
        "origin": "https://apps.apple.com",
        "pragma": "no-cache",
        "referer": "https://apps.apple.com/cn/app/%E5%8F%AE%E5%97%92%E5%87%BA%E8%A1%8C-%E5%85%A8%E5%9B%BD%E8%BD%BB%E6%9D%BE%E7%95%85%E9%AA%91/id1112929490",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
    }
    x,n = 10,1
    while 1:
        url = "https://amp-api.apps.apple.com/v1/catalog/cn/apps/1112929490/reviews?l=zh-Hans-CN&offset=" + str(
            x) + "&platform=web&additionalPlatforms=appletv%2Cipad%2Ciphone%2Cmac"
        re = json.loads(requests.get(url=url, headers=header).text)
        list = re["data"]
        for q in list:
            sheet.write(n, 0, q["attributes"]["userName"])
            sheet.write(n, 1, q["attributes"]["title"])
            sheet.write(n, 2, q["attributes"]["review"])
            sheet.write(n, 3, q["attributes"]["rating"])
            sheet.write(n, 4, q["attributes"]["date"])
            if SnowNLP(q["attributes"]["review"]).sentiments < 0.4:
                sheet.write(n, 5, "负面评价")
            elif SnowNLP(q["attributes"]["review"]).sentiments > 0.6:
                sheet.write(n, 5, "正面评价")
            else:
                sheet.write(n, 5, "中性评价")
            n = n + 1
        if "next" in re.keys():
            pass
        else:
            print("共拉取【app store】评论"+str(n)+"条")
            break
        x = x + 10
        time.sleep(0.1)



if __name__ == '__main__':
    get_huawei()
    get_app_store()
    excel.save('各终端应用市场评论.xls')

接下来提取excel里面的评论，生成词云

[Python] 纯文本查看 复制代码

# 提取excel评论里面的词频
def getExcelData(excel, txt):
    readbook = xlrd.open_workbook(excel)
    result = {}
    sheet = readbook.sheet_by_index(1)  # 取第二个sheet页
    rows = sheet.nrows
    i = 0
    while i < rows:
        txt += sheet.cell(i, 2).value  # 取第三列的值
        i += 1
    seg_list = jieba.cut(txt)
    c = Counter()
    for x in seg_list:
        if len(x) > 1 and x != '\r\n':
            c[x] += 1
    for (k, v) in c.most_common():
        result[k] = v  # 放到字典中，用于生成词云的源数据

    sheet = readbook.sheet_by_index(0)  # 取第一个sheet页
    rows = sheet.nrows
    i = 0
    while i < rows:
        txt += sheet.cell(i, 2).value  # 取第三列的值
        i += 1
    seg_list = jieba.cut(txt)
    c = Counter()
    for x in seg_list:
        if len(x) > 1 and x != '\r\n':
            c[x] += 1
    for (k, v) in c.most_common():
        result[k] = v  # 放到字典中，用于生成词云的源数据

    return result

# 根据词频生成词云
def makeWordCloud(txt):
    image = Image.open('dingda.jpg')  # 作为背景形状的图
    graph = np.array(image)
    # x, y = np.ogrid[:300, :500]
    #
    # mask = (x - 150) ** 2 + (y - 150) ** 2 > 150 ** 2
    # mask = 255 * mask.astype(int)
    wc = WordCloud(background_color="white",
                   max_words=500,
                   mask=graph,
                   repeat=True,
                   width=1000,
                   height=1000,
                   scale=10,  # 这个数值越大，产生的图片分辨率越高，字迹越清晰
                   font_path="C:\Windows\Fonts\STXINGKA.TTF")
    wc.generate_from_frequencies(txt)
    wc.to_file('评论词频提取.png')




if __name__ == '__main__':
    txt = ''
    makeWordCloud(getExcelData("各终端应用市场评论.xls", txt))
    print("评论词频提取.png 已经保存在文件夹")

a186che · 发表于 2020-8-18 15:58

https://wap1.hispace.hicloud.com/uowap/index?method=internal.user.commenList3 这个是再华为手机市场里抓包到的吗？我怎么没看到

FANT456 · 发表于 2020-6-8 08:55

帮顶，前两天想用matlab生成词云，结果失败了

凌晨四点半 · 发表于 2020-6-8 08:57

可以，很有用

ysdy · 发表于 2020-6-8 09:12

不觉明历

云岛鹤川 · 发表于 2020-6-8 09:14

情感分析...是分析自己喜欢什么样的司机吗

wangqing1116 · 发表于 2020-6-8 09:20

不错，感谢分享

MinxArrix · 发表于 2020-6-8 10:22

这些导入看的头大

zucker · 发表于 2020-6-8 10:26

发一张词云图看看效果吧

处女-大龙猫 · 发表于 2020-6-8 10:50

词云效果图..........

王星星 · 发表于 2020-6-8 10:57

提示: 作者被禁止或删除内容自动屏蔽

帐号		自动登录	找回密码
密码			注册[Register]

王星星王星星当前离线好友阅读权限 0 听众最后登录 1970-1-1 头像被屏蔽	王星星发表于 2020-6-8 10:57 提示: 作者被禁止或删除内容自动屏蔽
王星星王星星当前离线好友阅读权限 0 听众最后登录 1970-1-1 头像被屏蔽
	回复支持举报

[Python 转载] 使用Requests爬取某租车软件应用商店上的评论，并进行情感分析，同时生成词云

免费评分

本帖被以下淘专辑推荐: