从慢慢买网站中爬取京东和天猫的商品历史价格
从csdn付费下载的代码,修改之后可以在idle正常运行了,但数据是生成表格的,可以自行改善、学习#!usr/bin/python
import json
import time
import random
import datetime
import requests
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from tkinter import *
from urllib import error
from urllib.parse import *
from lxml import etree
from requests.packages import urllib3
from matplotlib import pyplot
import xlrd
import socket
from time import ctime
mpl.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus']=False
now_date = time.strftime("%m-%d", time.localtime(time.time()))
now_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
phone_list = [
18303517744,
13613416611,
15219466201,
15036222256,
18438888133,
18876941131,
18876622089,
18889262767,
13715150077,
13717033838,
18351078990,
13467719111,
15997693333,
13730600607,
13699051071,
13849038741,
18352936688,
13880888292,
18822441999,
15777770130,
15777772845,
13727693111,
13632577333,
15976856868,
18222167181,
13512960022,
13530102266,
18300666187,
15824817777,
18349333171,
13838555227,
15890005577,
15890008887,
13838200888,
13924853168,
18822488887,
13972911999,
13428334566,
13566102222,
13732097555,
15233333323,
13682987828,
13923918859,
18859981392,
15818692899,
15012563066,
18222522000,
13828716737,
13692298935,
13706053195,
13887441413,
18322040999,
13911336673,
13801391870,
13433196988,
13702485588,
13924578588,
13924852345,
18823143456,
13637666699,
13755630022,
13920593529,
18702888838,
15198120000,
13908057178,
18844227188,
18750468844,
13505952075,
15768179999,
18356194521,
13696754521,
13788829706,
15208275054,
18777770214,
13551275898,
18280151115,
13677777254,
18769721000,
18897777726,
15814226133,
15918128980,
15918129083,
15918129282,
15918129090,
18300077779,
15022277000,
15875766666,
18428088892,
15703382298,
15131712232,
15732922520,
13874677777,
18255555551,
18393897777,
15180222225,
13505740467,
13780390000,
18859567892,
15277775445,
13662688881,
18213777222,
13761746746,
15000505062,
14761188884,
13809070207,
13818357698,
13873179698,
18817871288,
15112998888,
15703361816,
15290911121,
15107555885,
18396217171,
13825876548,
13619870320,
13778891234,
13548291222,
18282200022,
18402898980,
18328025788,
15228886138,
17878781118,
15123888444,
15837182792,
15838125087,
18703896718,
18736011629,
18839781750,
18837170569,
15777776964,
18761755000,
18751373210,
15962711155,
15962792088,
18761755088,
13656291113,
18862779378,
15190971978,
13777888585,
15068936333,
15204025988,
13654059991,
15775677700,
13684218789,
15281898765,
13616202666,
18751126999,
13812920788,
13809055222,
13962350777,
18353240966,
18853296464,
17839929705,
18838967382,
18749418806,
15093239328,
15188349522,
18236956924,
18348405579,
15093334268,
13505647555,
15220525678,
15020050513,
15020030417,
15267701717,
15088931331,
15906878938,
13646514938,
13706636314,
18867793298,
13739742666,
15731102345,
13859652222,
18232102678,
13601261337,
15231099666,
18337728521,
15203802168,
18331758666,
18736599499,
13930109099,
15738888289,
15738888538,
15738888576,
15738888697,
15738888963,
13797904444,
15243191111,
18405311888,
18405311888,
13791080000,
13791080000,
13908376207,
13908335110,
13908374332,
18702397333,
18702379555,
15922584000,
13783666664,
18335392777,
15217430000,
15992225679,
13585510688,
15818991889,
17806722226,
13536565653,
18738651999,
18388555511,
15825022222,
15882234084,
13776268888,
15018310888,
15113133313,
13701097729,
15726835666,
15058299222,
15118444415,
18820300009,
18825700007,
13829111788,
13825766788,
13480423333,
13711888886,
13532923333,
13825737888,
13537328888,
13686678888,
13538345678,
15016967488,
15917735557,
15217104555,
15917669777,
15017888444,
15931390000,
15267180777,
15068793333,
18335156789,
13835175177,
18202468383,
13926787833,
15815100303,
15892056631,
13599305858,
13616979898,
13511100900,
13786766667,
13686868538,
13632878899,
13883038222,
18838200011,
13911672661,
13521935222,
13802289678,
13728888822,
13801507158,
15093939323,
15160299539,
18831119031,
13974259999,
15807539093,
15023669066,
13785811099,
18716433334,
18834845999,
13507170130,
13507115301,
13995588392,
13657247111,
17839999122,
17839993883,
13807196657,
13807197319,
13807198517,
13807153256,
13807190231,
13908631578,
13908863082,
18822858108,
13510308789,
13510102070,
18419521214,
13877853333,
18351203222,
18261197555,
15815285757,
15261115522,
13903173981,
15132755552,
15019677099,
18862192899,
13678863811,
13983652278,
13856977511,
13589966223,
18337623210,
13979673333,
15007927777,
18837744446,
13950654999,
13861186488,
18870000005,
15158172221,
15824107733,
13790746666,
15802648889,
13808322226,
15823513000,
18883190766,
18883298278,
18375801115,
18375702233,
15023871222,
13779033333,
18872855555,
18270003333,
18886889988,
15777777783,
18881111115,
18882888802,
14799448888,
13688819128,
13688819693,
]
user_agent_m = [
'Mozilla/5.0 (Linux; Android 8.1; PAR-AL00 Build/HUAWEIPAR-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044304 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/tools',
'Mozilla/5.0 (Linux; Android 8.1; EML-AL00 Build/HUAWEIEML-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.143 Crosswalk/24.53.595.0 XWEB/358 MMWEBSDK/23 Mobile Safari/537.36 MicroMessenger/6.7.2.1340(0x2607023A) NetType/4G Language/zh_CN',
'Mozilla/5.0 (Linux; Android 8.0; MHA-AL00 Build/HUAWEIMHA-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044304 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/4G Language/zh_CN Process/tools',
'Mozilla/5.0 (Linux; Android 5.1.1; vivo X6S A Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044207 Mobile Safari/537.36 MicroMessenger/6.7.3.1340(0x26070332) NetType/4G Language/zh_CN Process/tools'
]
ip_origin = ["中国联通", "中国移动", "中国电信"]
c_devmodel_list = ['Mate10', 'P8青春', '荣耀7i', '畅玩7A', '荣耀8XMax',
'Mate10Pro', '荣耀10', 'M3青春', '荣耀8青春']
class CrawlCompareWeb:
"""
比价网反爬严格,考虑换ip突破,此条有待考证
另一个查询历史价格接口:http://tool.manmanbuy.com/history.aspx?DA=1&action=gethistory&url=http%3a%2
f%2fitem.tmall.com%2fitem.htm%3fid%3d532034800285&bjid=&spbh=&cxid=&zkid=&w=350&token=yva7088d209cdc
bbbf30e6af9cf24005ce2dx
破解token就可以
"""
def __init__(self, search_words, writer):
self.start_url = "https://apapia-search.manmanbuy.com/index_json.ashx"
self.decode_type = "utf-8"
self.total_page = None
self.writer = writer
self.words = search_words
self.search_words = quote(
search_words, encoding=self.decode_type, errors="replace"
)
self.headers = {
"Host": "apapia-search.manmanbuy.com",
"Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
"Proxy-Connection": "close",
"Cookie": "ASP.NET_SessionId=5nm1vf35xt2eisuhe2k0rm33; jjkcpnew111=cp98576765_1063811521_2018/9/26",
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_4 like Mac OS X) AppleWebKit/605.1.15 "
"(KHTML, like Gecko) Mobile/15F79 mmbWebBrowse",
"Content-Length": "523",
"Accept-Encoding": "gzip",
"Connection": "close",
}
self.data = "jsoncallback=%3F&c_devmodel=iPhone%207&f1=&c_win=w_375_h_667&c_devid=C5707B0E-7A25-4BDF-BDF4-C64F8" "1711CAB&c_devtype=phone&f2=&key={}&iszy=&f3=&c_dp=2&f4=&c_devtoken=&c_channel=AppStore&f5=&" "smallclass=&f6=&methodName=getsearchkeylist&username=&c_operator=%E4%B8%AD%E5%9B%BD%E8%81%94%E" "9%80%9A&price2=&c_ostype=ios&c_engver=1.2.81&c_ctrl=w_search_form_f_search_product_content&page={}" "&sign={}&ppid=&price1=&c_contype=wifi&t={}&orderby=&c_osver=11.4&siteid=&c_appver=3.0.2"
self.title_list = []
self.mall_list = []
self.iszy_list = []
self.price_list = []
self.sales_list = []
self.prourl_list = []
self.skuid_list = []
self.itemid_list = []
self.crawl_time_list = []
self.comment_list = []
def turn_page_get_info(self):
for i in range(1, 10):
t_1 = int(round(time.time() * 1000))
t_2 = t_1 + random.randint(1, 5)
data = self.data.format(self.search_words, i, t_1, t_2)
response = requests.post(self.start_url, data=data, headers=self.headers)
result_data = response.content.decode("utf-8")
if result_data:
datas = result_data.replace("'", "").replace("[", "").replace("]", "")
for j in datas.split("}"):
j = j.strip(",").strip("\n") + "}"
if "img" in j and j:
data_json = json.loads(j)
id = data_json["id"]
iszy = data_json["iszy"]
siteid = data_json["siteid"]
img = data_json["img"]
image = data_json["image"]
title = data_json["title"]
price = data_json["price"]
mall = data_json["mall"]
sales = data_json["sales"]
gourl = data_json["gourl"]
prourl = data_json["prourl"]
skuid = data_json["skuid"]
itemid = data_json["itemid"]
comment = data_json["comment"]
crawl_time = time.strftime(
"%Y%m%d%H%M%S", time.localtime(time.time())
)
self.title_list.append(title)
self.mall_list.append(mall)
self.iszy_list.append(iszy)
self.price_list.append(price)
self.sales_list.append(sales)
self.prourl_list.append(prourl)
self.skuid_list.append(skuid)
self.itemid_list.append(itemid)
self.comment_list.append(comment)
self.crawl_time_list.append(crawl_time)
time.sleep(random.uniform(2, 3))
else:
break
def download_file(self):
dataframe = pd.DataFrame(
columns=["商品标题", "平台", "店铺", "价格", "销量", "评论量", "地址", "sku"]
)
dataframe["商品标题"] = self.title_list
dataframe["平台"] = self.mall_list
dataframe["店铺"] = self.iszy_list
dataframe["价格"] = self.price_list
dataframe["销量"] = self.sales_list
dataframe["地址"] = self.prourl_list
dataframe["sku"] = self.skuid_list
dataframe["评论量"] = self.comment_list
to_c_sheet = (
self.words
+ "_"
+ "全网价格数据"
+ "_"
+ time.strftime("%m%d", time.localtime(time.time()))
)
dataframe.to_excel(
self.writer, index=False, encoding="utf-8", sheet_name=to_c_sheet
)
print("数据写入完成,进程结束")
class HistoryPriceSearch:
def __init__(self, search_url, writer):
self.search_preferential_url = "https://apapia-history.manmanbuy.com/ChromeWidgetServices/WidgetServices.ashx"
self.search_price_url = "https://ext.henzanapp.com/api.html"
self.t = int(time.time() * 1000)
self.preferential_headers = {
"Host": "apapia-history.manmanbuy.com",
"Content-Type": "application/x-www-form-urlencoded; charset:utf-8",
"Proxy-Connection": "close",
"Cookie": "jjkcpnew111:cp44979114_1063811528_2018/10/18",
"User-Agent": random.choice(user_agent_m),
"Content-Length": "548",
"Accept-Encoding": "gzip",
"Connection": "close",
}
self.price_headers = {
"Host": "ext.henzanapp.com",
"Proxy-Connection": "close",
"Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
"Content-Length": "4550",
"Accept-Encoding": "gzip",
"Connection": "close",
"Cookie": "mmzdd=482ef902b98b228c76a0f748e7deaa79",
}
self.preferential_data = {
"c_devid": "C5707B0E-7A25-4BDF-BDF4-C64F81711CAB",
"username": random.choice(phone_list),
"ipage": "",
"c_dp": "2",
"c_engver": "1.2.83",
"c_devtoken": "",
"c_devmodel": random.choice(c_devmodel_list),
"c_contype": "wifi",
"c_win": "w_375_h_667",
"t": self.t,
"c_firstchannel": "AppStore_update",
"p_url": search_url,
"sign": "07E0CB3EF0B16E74",
"c_ostype": "Android",
"jsoncallback": "%3F",
"c_ctrl": "w_search_trend0_f_content",
"methodName": "getZhekou",
"c_channel": "Google Play",
"c_devtype": "Android",
"c_operator": random.choice(ip_origin),
"c_appver": "3.0.5",
"c_firstquerendate": "1540799598929",
"ipagesize": "6",
"c_osver": "11.4",
}
self.price_data = {
"tPrice": "",
"toolbar_state": "open",
"path1": "qihoo-mall-goodsinfo",
"mid": "",
"tSale": "",
"fromTp": "0",
"checkinfo": "c9f8d7a8a8d7e899d7c9a9d709d9d71999d71909d7f8d9d7c999d7c8a9d709d9d7d899d7d809d7d8d9d78899d79909d7d8d9d7c909d71909d7d8d9d78819d7e909d7e8d9d7f8a9d7e999d709d9d7b909d7b9a9d7e9d9d7e819d7c909d7d8d9d78809d7b9a9d7d8d9d7d899d7f819d7e8d9d7e8980909d7b919d7e8d9d7f89809d7b819d7d8d9d7e809d7d819d7d8d9d7c899d7c999d7e8d9d7a8a8d799b8d7a8a8d7db5c1ccc7bdbfbcb9baba8a8d7b9a8d7a8a8d7888868e89898a8a8d799b8d7a8a8d7dbbb1cac8c7bdc2ca8a8d7b9a8d7a8a8d7f8a9d7a9a9d7c8d9d79819d79919d7e8d9d7a809d7a909d7f8d9d7a8a8d799b8d7a8a8d7db5c9b6c7bdbbb1cac8c7bdc2ca8a8d7b9a8d7a8a8d7f819d7c909d7d8d9d7d909d7d8a9d7e8d9d7b919d709a9d7c8d9d798a9d798a9d7d8d9d7a8a8d799b8d7a8a8d7accbcb9b7b1dacdbec1c4cdbcba8a8d7b9a8d7a8a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a897977b9b0b0bbb0dbb0b0bfbe9881d2c6adca91b19ababec985c8aa8a9cae9a8d7f8d819f89809b8e8a8e9a8d7c81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a897977b9b0bea2d0c9b0b0bdb2d981d2c6adba91b2a6cab68ca3a1ba8a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a8589b0bead82d9b0b0bdb8cbcba2c6adca9fa19f85c0dac0aeba8a9cae9a8d7f8d819f89809b8e8a8e9a8d7b81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a8589b0b8cb899bb0b0b1daa5cba3c6adca91bdc1adb8cfadc4ca8a9cae9a8d7f8d819f89809b8e8a8e9a8d7b81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68bb1c8c7b5cdbcc1c588897977b9b0bea7cfb9b0b0b193a2bba2c6a3aa9ac1a1a3cabc968c898a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7a8a8d799b8d7a8a8d7fb5c1c1ccc4cdc5ca8a8d7b9a8d7a8a8d78809d7c819d719d9d7c8a9d7a919d7f8d9d7b909d71909d7f8d9d79809d7b819d7d8d9d7a8a8d799b8d7a8a8d7fb9bcc8c7c0cbca8a8d7b9a8d7a8a8d719e898a8a8d799b8d7a8a8d75cdc6ccc5cbba8a8d7b9a8d7a8a8d78899d7b809d7e8d9d70919d7b9a9d7c8d9d7b9a9d78809d7d8d9d7d809d7e8a9d709d9d7a8a8d799b8d7a8a8d7dbbb1cac8c4c9bdbaca8a8d7b9a8d7a8a8d7889898a8a8d799b8d7a8a8d75cdc6cdb4c9bbca8a8d7b9a8d7a8a8d7f819d799a9d7d8d9d788a9d70909d709d9d7f819d7f819d7e8d9d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7a8a8d799b8d7a8a8d7db5c9b6ccc6c9b0cbbacdb5ca8a8d7b9a8d7a8a8d768f809c8f8dac988a8d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7e9a8d7d94a1bcaba0aa8a8d799b8d7a8a8d7db5c9b6c7bccbbdccb7cac8ca8a8d7b9a8d7a8a8d768f809c8f8dac9a8a8d799b8d7a8a8d7acdbab5cdc6c7b5cdbcc1ca8a8d7b9a8d7a8a8d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7e9a8d7d94a1bcaba0aa8a8d799b8d7a8a8d7cb6c9bacaba8a8d7b9a8d7a8a8d7f8d819f89809b8e8a8a8a8d799b8d7a8a8d7cb1cacdb4c4cdbbc7c9bab7c9bcca8a8d7b9a8d7a8a8d7c88888e898a8b8a898a8a8d799b8d7a8a8d7cb1ccc9bbba8a8d7b9a8d7a8a8d7e898b9f8d7b9f8d7b9f8d7e898a8a8d799b8d7a8a8d7cb1cbba8a8d7b9a8d7a8a8d7a8a8d799b8d7a8a8d7ac9bec7bdc3cbca8a8d7b9a8d78899b8d7a8a8d7db4c9bba7c6ca8a8d7b9a8d7a8a8d7886898a8a8d799b8d7a8a8d76c7c1cbcacdbeca8a8d7b9a8d7a8a8d7a8a8d799b8d7a8a8d7ccbb1cacccbc1ccba8a8d7b9a8d79899b8d7a8a8d7db4c9bbabc1ca8a8d7b9a8d7a8a8d7fb8c2c6888199c88b8c80d88b8c87bfb8c2c68bb1c8c7b5cdbcc1c588897977b9b0bea7cfb9b0b0b193a2bba2c6a3aa9ac1a1a3cabc968c898a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d7bc8ccccc0ca8a8d799b8d7a8a8d7bb1c8ca8a8d7b9a8d7a8a8d798a9d70999d7e8d9d7f809c8f8dac9d809d7b899d709d9d7f819d7d899d7d8d9d79919d788a9d7d8d9d7e8a9d7f819d7e8d9d7f819d7d899d7d8d9d7e8a9d7a9a9d7c8d9d7c899d709a9d7c8d9d7e819d7a899d709d9d7c999d7e919d7f8d9d719a9d799a9d7f8d9d79899d7c919d7e8d9d7d9a9d7b999d7e8d9d788a9d7e819d7e8d9d7d809d7b899d709d9d7e909d7c899d7d8d9d7d809d7b899d709d9d7b8a9d7d899d7d8d9d70909d71909d7f8d9d71999d7e919d719d9d7099888a8c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7a8a8d799b8d7a8a8d7db5c9b6ca8a8d7b9a8d7c8b89899b8d7a8a8d7dbbb1cac8ca8a8d7b9a8d79809c8c8c899b8d7a8a8d7cb1a8c7c0cbca8a8d7b9a8d7a8a8d7a999d7b909d7f8d9d71999d7c899d7d8d9d7a8a8d799b8d7a8a8d7db5c9b6a8c7c0cbca8a8d7a9f8",
"prevpop": "",
"bfrom": "normal",
"url": search_url,
"path2": "goodspricecmp",
"tplmd5": "7330361958732444829",
"hisOpn": "0",
"isGulike": "0",
"cv": "4.2.1.0",
"ref": search_url,
"v": "v5",
"pop": "1",
}
self.writer = writer
self.spname_list = []
self.spprice_list = []
self.dt_list = []
self.infoid_list = []
self.infotype_list = []
self.sppic_list = []
self.history_price_dict = {}
self.search_price_start_date = None
self.search_price_end_date = None
def parser_history_preferential_info(self, pages=None):
if pages is not None and isinstance(pages, int):
for page in range(1, pages + 1):
self.preferential_data["ipage"] = page
response = requests.post(
url=self.search_preferential_url,
headers=self.preferential_headers,
data=self.preferential_data,
verify=False,
)
print(response.content.decode("utf-8"))
if (
response.status_code == 200
and json.loads(response.content.decode("utf-8")).get("ok") == 1
):
json_data = json.loads(response.content.decode("utf-8"))
for i in json_data.get("zklist"):
spname = i.get("spname")
spprice = (
i.get("spprice").replace("<p>", "").replace("</p>", "")
)
if i.get("dt"):
timeArray = time.localtime(
int(re.findall("\d+", i.get("dt"))) / 1000
)
dt = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
else:
dt = None
infoid = i.get("infoid")
infotype = i.get("infotype")
sppic = i.get("sppic")
self.spname_list.append(spname)
self.spprice_list.append(spprice)
self.dt_list.append(dt)
self.infoid_list.append(infoid)
self.infotype_list.append(infotype)
self.sppic_list.append(sppic)
print(spname, spprice, dt, infoid, infotype, sppic)
else:
error_status_code = response.status_code
print("凉了,被ban了~ 状态码:%s,自己看的办吧" % error_status_code)
elif pages is None:
page = 1
self.preferential_data["ipage"] = page
response = requests.post(
url=self.search_preferential_url,
headers=self.preferential_headers,
data=self.preferential_data,
verify=False,
)
json_data = json.loads(response.content.decode("utf-8"))
print(self.search_preferential_url)
print(self.preferential_headers)
print(self.preferential_data)
print(json_data)
if response.status_code == 200 and json_data.get("ok") == 1:
while json_data.get("ok") == 1:
for i in json_data.get("zklist"):
spname = i.get("spname")
spprice = (
i.get("spprice").replace("<p>", "").replace("</p>", "")
)
if i.get("dt"):
timeArray = time.localtime(
int(re.findall("\d+", i.get("dt"))) / 1000
)
dt = time.strftime("%m-%d", timeArray)
else:
dt = None
infoid = i.get("infoid")
infotype = i.get("infotype")
sppic = i.get("sppic")
self.spname_list.append(spname)
self.spprice_list.append(spprice)
self.dt_list.append(dt)
self.infoid_list.append(infoid)
self.infotype_list.append(infotype)
self.sppic_list.append(sppic)
print(spname, spprice, dt, infoid, infotype, sppic)
page += 1
self.preferential_data["ipage"] = page
response = requests.post(
url=self.search_preferential_url,
headers=self.preferential_headers,
data=self.preferential_data,
verify=False,
timeout=5,
)
print(response.status_code)
print(self.preferential_data["ipage"])
json_data = json.loads(response.content.decode("utf-8"))
if not json_data["zklist"]:
break
print(json_data)
time.sleep(random.uniform(0.5, 1.0))
else:
print("该商品无历史优惠信息或User-Agent错误或触发反爬,请重试")
else:
print("数据抓取失败,洗洗睡吧")
def parser_history_price_info(self):
response = requests.post(
url=self.search_price_url,
data=self.price_data,
headers=self.price_headers,
verify=False,
)
print(response.content.decode("utf-8"))
if (
response.status_code == 200
and json.loads(response.content.decode("utf-8")).get("pcinfo")
):
json_data = json.loads(response.content.decode("utf-8"))
print(json_data)
self.search_price_start_date = json_data["pcinfo"]["bd"]
self.search_price_end_date = json_data["pcinfo"]["ed"]
for k in json_data["pcinfo"]["info"]:
self.history_price_dict] = k["pr"]
else:
print("数据为空,或者被ban~~")
def download_preferential_info_data(self):
dataframe = pd.DataFrame(
columns=["名称", "优惠信息", "日期", "infoid", "infotype", "商品主图"]
)
dataframe["名称"] = self.spname_list
dataframe["优惠信息"] = self.spprice_list
dataframe["日期"] = self.dt_list
dataframe["infoid"] = self.infoid_list
dataframe["infotype"] = self.infotype_list
dataframe["商品主图"] = self.sppic_list
to_c_sheet = (
"商品历史查询数据"
+ "_"
+ time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
)
dataframe.to_excel(
self.writer, index=False, encoding="utf-8", sheet_name=to_c_sheet
)
print("数据写入完成,进程结束")
pyplot.plot(self.dt_list, self.spprice_list)
pyplot.xlabel('日期')
pyplot.ylabel('价格')
pyplot.title(self.spname_list)
#将纵坐标等刻度划分
#设置填充选项:参数分别对应横坐标,纵坐标,纵坐标填充起始值,填充颜色(可以有更多选项)
pyplot.fill_between(self.dt_list, self.spprice_list, 10, color = 'white')
#使横坐标逆序输出
pyplot.gca().invert_xaxis()
#显示图表
now_time1 = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
pyplot.savefig("./" + now_time1 +".jpg")
print("已经保存")
pyplot.show()
def download_price_info_data(self):
fig = plt.figure()
mpl.rcParams["font.sans-serif"] = ["SimHei"]
mpl.rcParams["axes.unicode_minus"] = False
plt.rcParams["figure.figsize"] = (8.0, 4.0)
print(self.history_price_dict)
if self.history_price_dict:
min_date = min(self.history_price_dict, key=self.history_price_dict.get)
min_price = self.history_price_dict[
min(self.history_price_dict, key=self.history_price_dict.get)
]
max_date = max(self.history_price_dict, key=self.history_price_dict.get)
max_price = self.history_price_dict[
max(self.history_price_dict, key=self.history_price_dict.get)
]
plt.plot(self.history_price_dict.keys(), self.history_price_dict.values())
plt.text(
min(self.history_price_dict, key=self.history_price_dict.get),
self.history_price_dict[
min(self.history_price_dict, key=self.history_price_dict.get)
],
min(self.history_price_dict, key=self.history_price_dict.get),
ha="right",
va="bottom",
fontsize=10,
)
plt.text(
min(self.history_price_dict, key=self.history_price_dict.get),
self.history_price_dict[
min(self.history_price_dict, key=self.history_price_dict.get)
],
self.history_price_dict[
min(self.history_price_dict, key=self.history_price_dict.get)
],
ha="left",
va="bottom",
fontsize=10,
)
plt.title("历史价格分布")
plt.xlabel("日期")
plt.ylabel("金额")
fig.savefig("foo.png")
fig_title = (
time.strftime("%m%d", time.localtime(time.time())) + "历史价格查询"
)
sheet = self.writer.book.add_worksheet(fig_title)
sheet.insert_image(0, 0, "foo.png")
else:
print('数据为空,该商品未被收录')
class Application:
def __init__(self):
self.window = Tk()
self.text = Text(self.window)
# 设置窗口大小和位置
self.window.title("阳光价格")
self.window.geometry("290x430+500+280")
self.window.minsize(290, 380)
# 创建一个文本框
self.entry = Text(self.window)
self.entry.place(x=10, y=10, width=200, height=165)
self.entry.bind("<Key-Return>")
# 创建历史溯源按钮
self.submit_btn5 = Button(self.window, text=u"历史溯源", command=self.submit_5)
self.submit_btn5.place(x=220, y=150, width=60, height=25)
# 翻译结果标题
self.title_label = Label(self.window, text=u"运行日志:")
self.title_label.place(x=10, y=180)
# 翻译结果
self.result_text = Text(self.window, background="#ccc")
self.result_text.place(x=10, y=205, width=270, height=205)
# 所属标签
self.title_label = Label(self.window, text=u"2019_a7_price ")
self.title_label.place(x=60, y=410)
self.file_path = None
self.writer = None
def submit_5(self):
# 从输入框获取用户输入的值
self.result_text.delete(0.0, END)
self.file_path = "./" + now_date + "-" + "历史溯源" + ".xlsx"
self.writer = pd.ExcelWriter(self.file_path)
try:
key_title = (
self.entry.get(0.0, END)
.strip()
.replace("\n", " ")
.replace(",", ",")
.split(",")
)
for key in key_title:
if key:
# 将值传入对象
self.result_text.delete(0.0, END)
search_history = HistoryPriceSearch(key, self.writer)
# 抓取历史趋势
search_history.parser_history_preferential_info()
search_history.parser_history_price_info()
search_history.download_preferential_info_data()
search_history.download_price_info_data()
# 下载趋势数据
log_1 = (
"历史价格搜索中"
+ "\n"
+ "开始下载数据中…………"
+ "\n"
+ "下载数据请在跟程序处于相同位置查找,文件名为【当前时间+历史趋势】"
)
self.result_text.insert(END, log_1)
except ValueError as e:
self.result_text.delete(0.0, END)
log = (
"log: " + now_time + "" + "查询异常 " + str(e) + "\n" + "请检查键入格式:国产红富士"
)
self.result_text.insert(END, log)
except KeyError as e:
self.result_text.delete(0.0, END)
log = (
"log: " + now_time + "" + "查询异常 " + str(e) + "\n" + "请检查键入格式:烟台红富士"
)
self.result_text.insert(END, log)
except error.HTTPError as e:
self.result_text.delete(0.0, END)
log = "log: " + now_time + "" + "URL异常 " + str(e) + "更换关键词重试"
self.result_text.insert(END, log)
except error.URLError as e:
self.result_text.delete(0.0, END)
log = (
"log: "
+ now_time
+ ""
+ "请求异常 "
+ str(e)
+ "\n"
+ "查询太频繁啦~请稍后重新或换IP重试"
)
self.result_text.insert(END, log)
finally:
self.writer.save()
log = "\n" + "log: " + now_time + "" + "数据下载结束,请在程序所处位置查收Excel"
self.result_text.insert(END, log)
def run(self):
self.window.mainloop()
app = Application()
app.run()
后续可能会陆续更新python服务器与android客户端的交互,别问为什么,因为作业需要{:1_911:} 看不懂,但支持下。从最早的惠惠购到后来购物党、比价网,经过几次双十一618的对比目前最好的比价应该就是慢慢买了。 川哥 发表于 2019-11-15 11:09
小白一枚,不知道这串代码怎么使用?需要下载软件吗还是直接在网页F12中用?
需要下载python的编译软件哦,轻量级的idle便可,但不推荐没接触过编程的人使用呢,因为需要安装各种包等问题,需要debug一会 感谢分享 ! 小白一名 很好,学习了 厉害了。不过历史价格,人家有优惠券什么的 完全看不懂,谢谢 略长的代码,感谢分享! 小白一枚,不知道这串代码怎么使用?需要下载软件吗还是直接在网页F12中用?