wjl 发表于 2022-7-15 11:17

如何使用scrapy的POST请求获取json响应?

本帖最后由 wjl 于 2022-7-17 17:09 编辑

如何使用scrapy的POST请求获取json响应

试验程序在https://www.52pojie.cn/forum.php ... &page=2#pid43226781,用requests实现非常简单的程序在scrapy中试了很多种方法,都得不到json返回数据


import scrapy
import random
import json
from ..useragents import user_agent_pool

class QdcypfjSpider(scrapy.Spider):
    name = 'qdcypfj'
    allowed_domains = ['www.cncyms.cn']
    # start_urls = ['http://www.cncyms.cn/']
    def start_requests(self):
      url = 'http://www.cncyms.cn/pages.php'
      for pagenum in range(0, 3):
            post_data = {"pageNum": pagenum, "pname": None, "reltime": "副食品"}
            # headers = {'User-Agents': random.choice(user_agent_pool)}
            # headers = {'User-Agents': random.choice(user_agent_pool), 'Content-Type': 'application/json'}
            # headers = {'Content-Type': 'application/json'}
            headers = {'Content-Type': 'application/x-www-form-urlencoded'}
            # headers = {'User-Agents': random.choice(user_agent_pool),'Content-Type': 'application/json; charset=UTF-8'}
            yield scrapy.Request(url=url,method='POST',body=json.dumps(post_data),headers=headers, callback=self.parse)
            # yield scrapy.http.JsonRequest(url=url, headers=headers, callback=self.parse)
            # yield scrapy.http.JsonRequest(url=url, body=post_data, headers=headers, callback=self.parse)
            # yield scrapy.http.JsonRequest(url=url, body=post_data, callback=self.parse)



    def parse(self, response):
      print(response.url)
      print(response.status)
      print(json.loads(response.request.body))
      # print(response.body['list'])
      # print(response.body)
      # print(response.body.decode())
      # print(response.request.body)
      # print(response.text)
      # print(response.json())
      # print(json.loads(response.text))
      # yield json.loads(response.text)
      # prices = json.loads(response.body_as_unicode())
      # prices = prices['list']
      # print(prices)

自己找到解决办法了

import scrapy
import random
import json
from ..useragents import user_agent_pool

class QdcypfjSpider(scrapy.Spider):
    name = 'qdcypfj'
    allowed_domains = ['www.cncyms.cn']
    # start_urls = ['http://www.cncyms.cn/']
    def start_requests(self):
      url = 'http://www.cncyms.cn/pages.php'
      for pagenum in range(0, 3):
            post_data = {"pageNum": str(pagenum), "pname": "", "reltime": "副食品"}
            headers = {'User-Agents': random.choice(user_agent_pool)}
            # headers = {'User-Agents': random.choice(user_agent_pool), 'Content-Type': 'application/json'}
            # headers = {'Content-Type': 'application/json'}
            # headers = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'}
            # headers = {'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            #            'Referer': 'http://www.cncyms.cn/index.php?m=content&c=index&a=lists&catid=57'
            #            }
            # headers = {'User-Agents': random.choice(user_agent_pool),'Content-Type': 'application/json; charset=UTF-8'}
            # yield scrapy.Request(url=url,method='POST',body=json.dumps(post_data),headers=headers, callback=self.parse)
            yield scrapy.FormRequest(url=url, formdata=post_data, headers=headers, callback=self.parse)
            # yield scrapy.http.JsonRequest(url=url, headers=headers, callback=self.parse)
            # yield scrapy.http.JsonRequest(url=url, body=post_data, headers=headers, callback=self.parse)
            # yield scrapy.http.JsonRequest(url=url, body=post_data, callback=self.parse)



    def parse(self, response):
      print(response.url)
      print(response.status)
      # print(json.loads(response.request.body))
      # print(response.body['list'])
      # print(response.body)
      # print(response.body.decode())
      # print(response.request.body)
      print(response.text)
      print(response.json())
      print(json.loads(response.text))
      print(json.loads(response.body))
      # yield json.loads(response.text)
      # prices = json.loads(response.body_as_unicode())
      # prices = prices['list']
      # print(prices)

外酥内嫩 发表于 2022-7-15 14:02

打断点看看有没有返回响应数据
页: [1]
查看完整版本: 如何使用scrapy的POST请求获取json响应?