Python 爬取 联联周边游,优惠信息表格筛选
# 爬取联联周边游,获取优惠信息## 简介
这个是爬取联联周边游的吃喝玩乐信息,并且以csv格式存储下来。为什么程序要睡眠5秒再去请求呢,因为频繁请求会被拉入黑名单
## 各个区域代码id
https://api.lianlianlvyou.com/wx/city/list?timestamp=1607060901447&ll_client=2&ll_versionCode=1&ll_version=1&Authorization=oo25swO-KY0lqBxiCxp2h75MdE8E&i=wx3623dfa9e7270632&locationid=0
### 直接上代码
```python
import requests
import time
import json
import csv
import sys
import time
#时间戳,取整数 https://tool.lu/timestamp/
#拓展 时间转日期time.localtime(time.time())
now=int(time.time())
#区域代码昆山 124
locationID="124"
header={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'}
#拆分函数
def isNullOrEmpty(obj):
return obj is None or len(obj)==0
#根据页码获取商品清单,默认是10页
def getShopListByPages(pages=10):
#结果输出
shopList=[]
#分页查询
pageIndex=0
# 商品列表地址,字符串拼接需要用括号包围外层
base_shopUrl=("https://api.lianlianlvyou.com/v1/wx/list?timestamp="
+str(now)
+"&locationid="+ locationID + "&ll_client=2&i=wx3623dfa9e7270632&ll_versionCode=1&ll_version=1&Authorization=oo25swO-KY0lqBxiCxp2h75MdE8E&longtitude=&latitude=&sortType=1&pageSize=10&pageIndex="
)
while(pageIndex<pages):
time.sleep(5)
pageIndex+=1
rsp=requests.get(base_shopUrl+str(pageIndex) )
if isNullOrEmpty(rsp.text):
continue
# json序列化成字典
for row in json.loads(rsp.text)['data']['list']['data']:
shopList.append(row)
#time.sleep(2)
return shopList
#根据ID获取商品详情
def getShopDetailById(id):
time.sleep(5)
#time.sleep(2)
# 商品详情展示地址
base_showDetailUrl="https://cd-nt.lianlianlvyou.com/?v=335&i=wx3623dfa9e7270632#/zhoubian/"
# 商品详情请求地址
base_detailUrl=(
"https://api.lianlianlvyou.com/v1/wx/product2?timestamp=" + str(now) + "&ll_client=2&ll_versionCode=1&ll_version=1&Authorization=oo25swO-KY0lqBxiCxp2h75MdE8E&" +
"locationid="+ locationID + "&a=&c=&i=wx3623dfa9e7270632&miniProgram=0&id=")
rsp=requests.get(base_detailUrl+str(id))
if isNullOrEmpty(rsp.text):
return None
data=json.loads(rsp.text)['data']
title=data["bizProduct"]["title"]
shopAddressList=[]
if "shops" in data["bizProduct"]:
for address in data["bizProduct"]["shops"]:
shopAddressList.append({"店铺名":address["name"],"店铺地址":address["address"]})
priceList=[]
for price in data["bizProduct"]["items"]:
priceList.append({"现销售价":price["salePriceStr"],"门市价":price["originPriceStr"]})
return ({"标题":title,"地址列表":shopAddressList,"价格列表":priceList,
"商品链接":base_showDetailUrl+str(id)})
# 保存为csv文件
def saveCsv(path,columns,rows):
f = open(path,'w',encoding='utf-8-sig',newline='')
csv_write=csv.writer(f)
csv_write.writerow(columns)
csv_write.writerows(rows)
return
def main():
path=sys.path
shopList=getShopListByPages()
result=[]
for shop in shopList:
row=getShopDetailById(shop["id"])
result.append(row)
# 去除空值
result=list(filter(lambda a: bool(1-isNullOrEmpty(a)) ,result))
# 最后结果,写入txt
# with open(path+'\\python.txt','w') as f:
# for row in result:
# print(row)
# f.write(str(row)+"\n")
# 写入csv
cols=list(result.keys())
rows=[]
for row in result:
print(row)
rows.append(list(row.values()))
saveCsv(path+'\\昆山.csv',cols,rows)
if __name__ == "__main__":
main()
``` 本帖最后由 a186che 于 2020-12-4 16:07 编辑
Alex.Merceryj 发表于 2020-12-4 16:02
程序睡眠5秒,有详细报错信息吗
应该具体在72行和29行 吧
runfile('C:/Users/Administrator/Documents/DM/llzbycs.py', wdir='C:/Users/Administrator/Documents/DM')Traceback (most recent call last):
File "C:\Users\Administrator\Documents\DM\llzbycs.py", line 92, in <module>
main()
File "C:\Users\Administrator\Documents\DM\llzbycs.py", line 72, in main
shopList=getShopListByPages()
File "C:\Users\Administrator\Documents\DM\llzbycs.py", line 29, in getShopListByPages
time.sleep(5) Traceback (most recent call last):
File "*******/lianlian.py", line 113, in <module>
main()
File "*******/lianlian.py", line 104, in main
cols = list(result.keys())
IndexError: list index out of range
IndexError: list index out of range,这个问题怎么解决啊?求大佬指教 顺便请问一下各位帅气美丽的小伙伴,有什么网址适合薅羊毛或者优惠信息活动的,我想趁着年轻多去玩玩 学习学习,感谢大佬分享 不错,这个挺好,感谢分享 很有用,谢谢分享 向大佬学习!!!! 感谢分享 厉害呀,大佬 厉害厉害