本帖最后由 wjl 于 2022-7-17 17:09 编辑
如何使用scrapy的POST请求获取json响应
试验程序在https://www.52pojie.cn/forum.php ... &page=2#pid43226781,用requests实现非常简单的程序在scrapy中试了很多种方法,都得不到json返回数据
[Python] 纯文本查看 复制代码 import scrapy
import random
import json
from ..useragents import user_agent_pool
class QdcypfjSpider(scrapy.Spider):
name = 'qdcypfj'
allowed_domains = ['www.cncyms.cn']
# start_urls = ['http://www.cncyms.cn/']
def start_requests(self):
url = 'http://www.cncyms.cn/pages.php'
for pagenum in range(0, 3):
post_data = {"pageNum": pagenum, "pname": None, "reltime": "副食品"}
# headers = {'User-Agents': random.choice(user_agent_pool)}
# headers = {'User-Agents': random.choice(user_agent_pool), 'Content-Type': 'application/json'}
# headers = {'Content-Type': 'application/json'}
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
# headers = {'User-Agents': random.choice(user_agent_pool),'Content-Type': 'application/json; charset=UTF-8'}
yield scrapy.Request(url=url,method='POST',body=json.dumps(post_data),headers=headers, callback=self.parse)
# yield scrapy.http.JsonRequest(url=url, headers=headers, callback=self.parse)
# yield scrapy.http.JsonRequest(url=url, body=post_data, headers=headers, callback=self.parse)
# yield scrapy.http.JsonRequest(url=url, body=post_data, callback=self.parse)
def parse(self, response):
print(response.url)
print(response.status)
print(json.loads(response.request.body))
# print(response.body['list'])
# print(response.body)
# print(response.body.decode())
# print(response.request.body)
# print(response.text)
# print(response.json())
# print(json.loads(response.text))
# yield json.loads(response.text)
# prices = json.loads(response.body_as_unicode())
# prices = prices['list']
# print(prices)
自己找到解决办法了
[Python] 纯文本查看 复制代码 import scrapy
import random
import json
from ..useragents import user_agent_pool
class QdcypfjSpider(scrapy.Spider):
name = 'qdcypfj'
allowed_domains = ['www.cncyms.cn']
# start_urls = ['http://www.cncyms.cn/']
def start_requests(self):
url = 'http://www.cncyms.cn/pages.php'
for pagenum in range(0, 3):
post_data = {"pageNum": str(pagenum), "pname": "", "reltime": "副食品"}
headers = {'User-Agents': random.choice(user_agent_pool)}
# headers = {'User-Agents': random.choice(user_agent_pool), 'Content-Type': 'application/json'}
# headers = {'Content-Type': 'application/json'}
# headers = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'}
# headers = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
# 'Referer': 'http://www.cncyms.cn/index.php?m=content&c=index&a=lists&catid=57'
# }
# headers = {'User-Agents': random.choice(user_agent_pool),'Content-Type': 'application/json; charset=UTF-8'}
# yield scrapy.Request(url=url,method='POST',body=json.dumps(post_data),headers=headers, callback=self.parse)
yield scrapy.FormRequest(url=url, formdata=post_data, headers=headers, callback=self.parse)
# yield scrapy.http.JsonRequest(url=url, headers=headers, callback=self.parse)
# yield scrapy.http.JsonRequest(url=url, body=post_data, headers=headers, callback=self.parse)
# yield scrapy.http.JsonRequest(url=url, body=post_data, callback=self.parse)
def parse(self, response):
print(response.url)
print(response.status)
# print(json.loads(response.request.body))
# print(response.body['list'])
# print(response.body)
# print(response.body.decode())
# print(response.request.body)
print(response.text)
print(response.json())
print(json.loads(response.text))
print(json.loads(response.body))
# yield json.loads(response.text)
# prices = json.loads(response.body_as_unicode())
# prices = prices['list']
# print(prices)
|