cqwcns 发表于 2022-7-25 15:52

python urllib post请求负载的问题

本帖最后由 cqwcns 于 2022-7-25 15:55 编辑

查看浏览器负载是这样的:


我的代码是这样的:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import re
import urllib.request

# 基本配置
baseUrl = "https://appmail.mail.10086.cn/s?func=mbox:listMessages&sid=00Y1ODcyNTQ5OTAwMTYzMjQ402FAAAE7000003&&comefrom=54&cguid=1537104900668"
post_data = {
    'func': 'mbox:listMessages',
    'sid': '00Y1ODcyNTQ5OTAwMTYzMjQ402FAAAE7000003',
    'comefrom': 54,
    'cguid': 1537104900668
}
header = {
    'cookie': 'CmLocation=200|200; CmProvid=gd; WT_FPC=id=2a77ac12a84388a29ce1656819455873:lv=1656819466795:ss=1656819855873; UM_distinctid=181c6f0bd88211-08db9745174c56-4a617f5c-15f900-181c6f0bdcc16b; _139_login_agreement=1; Login_UserNumber=MTM1NTM2ODcyOTk=; _139_login_version=60; S_DEVICE_TOKEN=5374a4b7a37c442aa88a984b564f8815; DEVICE_INFO_DIGEST=a52b58f888209e853dceb76e71c6b9793; a_l=1673050632000|4995576828; a_l2=1673050632000|12|MTM1NTM2O88yOTl8MjAyMy0wMS0wNyAw88oxNzoxMnxvL1Yrc0JTQ1IybXh2SzM1OEd4L0VMdTQ0U0lwNTd2L0VvdnJrbU1wcHl3OTNlYXNwSEl88Wo5TjRUbWRwRXVPbTB6QVUrb0NWWk0xTE5Bb25LZEFwdz09fGI4MjQwY2Y5MWFiOWUxMGZlYjM5MDI2Nzg1ODA4OTI2; RMKEY=409cedd23bd32b49; cookiepartid1688=12; ut1688=2; cookiepartid=12; UserData={}; SkinPath21688=skin_skyBlue; rmUin1688=479121018; provCode1688=1; areaCode1688=16; loginProcessFlag=; Os_SSo_Sid=00Y1ODcyNTQ5OTAwMTYzMjQ402FCCAE7000003; welcome=s%3AKdftUXiorBuX5blKxNyLzlfuljVvYcXF.yJaX8CZrQVW48O%2BA2xb9LPuT2GipD2Yvn3IaiAlxgaE',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36 Edg/103.0.1264.62'
}

def get_html(url):
    data = urllib.parse.urlencode(post_data).encode('utf-8')
    request = urllib.request.Request(
      url, headers=header, data=data, method='POST')
    res = urllib.request.urlopen(request).read().decode('utf-8')
    print(res)


if __name__ == '__main__':
    get_html(baseUrl)

我的代码返回错误是这样的:


从请求负载的信息来看,这个非常关键,决定了从第几条开始读和读多少条等。
<object>
<int name="fid">1</int>
<string name="order">receiveDate</string>
<string name="desc">1</string>
<int name="start">101</int>
<int name="total">100</int>
<string name="topFlag">curr_task</string>
<int name="sessionEnable">2</int>
<int name="encodeflag">1</int>
</object>

但这个又不是一个键值对,我不知道怎么传值。


所以,请教一下,这种情况,传递这个请求负载代码应该怎么写?感谢指教。

神枪泡泡丶 发表于 2022-7-25 16:12

post_data 应该放的东西是 请求负载里的,url里的参数放的是 查询字符串参数里的东西。前者错了。构造方式参考标头里的 Content-type

xiaojie96528 发表于 2022-7-25 16:42

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import re
import urllib.request

# 基本配置
baseUrl = "https://appmail.mail.10086.cn/s?func=mbox:listMessages&sid=00Y1ODcyNTQ5OTAwMTYzMjQ402FAAAE7000003&&comefrom=54&cguid=1537104900668"
post_data = "<object>
<int name="fid">1</int>
<string name="order">receiveDate</string>
<string name="desc">1</string>
<int name="start">101</int>
<int name="total">100</int>
<string name="topFlag">curr_task</string>
<int name="sessionEnable">2</int>
<int name="encodeflag">1</int>
</object>"
header = {
    'cookie': 'CmLocation=200|200; CmProvid=gd; WT_FPC=id=2a77ac12a84388a29ce1656819455873:lv=1656819466795:ss=1656819855873; UM_distinctid=181c6f0bd88211-08db9745174c56-4a617f5c-15f900-181c6f0bdcc16b; _139_login_agreement=1; Login_UserNumber=MTM1NTM2ODcyOTk=; _139_login_version=60; S_DEVICE_TOKEN=5374a4b7a37c442aa88a984b564f8815; DEVICE_INFO_DIGEST=a52b58f888209e853dceb76e71c6b9793; a_l=1673050632000|4995576828; a_l2=1673050632000|12|MTM1NTM2O88yOTl8MjAyMy0wMS0wNyAw88oxNzoxMnxvL1Yrc0JTQ1IybXh2SzM1OEd4L0VMdTQ0U0lwNTd2L0VvdnJrbU1wcHl3OTNlYXNwSEl88Wo5TjRUbWRwRXVPbTB6QVUrb0NWWk0xTE5Bb25LZEFwdz09fGI4MjQwY2Y5MWFiOWUxMGZlYjM5MDI2Nzg1ODA4OTI2; RMKEY=409cedd23bd32b49; cookiepartid1688=12; ut1688=2; cookiepartid=12; UserData={}; SkinPath21688=skin_skyBlue; rmUin1688=479121018; provCode1688=1; areaCode1688=16; loginProcessFlag=; Os_SSo_Sid=00Y1ODcyNTQ5OTAwMTYzMjQ402FCCAE7000003; welcome=s%3AKdftUXiorBuX5blKxNyLzlfuljVvYcXF.yJaX8CZrQVW48O%2BA2xb9LPuT2GipD2Yvn3IaiAlxgaE',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36 Edg/103.0.1264.62'
    'Content-Type':'这里请填写网页原先请求头内的值'
}

def get_html(url):
    data = urllib.parse.urlencode(post_data).encode('utf-8')
    request = urllib.request.Request(
      url, headers=header, data=data, method='POST')
    res = urllib.request.urlopen(request).read().decode('utf-8')
    print(res)


if __name__ == '__main__':
    get_html(baseUrl)

UndCover 发表于 2022-7-25 17:12

请求负载功能还没有用过,学习一下

很快再相见123 发表于 2022-7-25 17:33

本帖最后由 很快再相见123 于 2022-7-25 17:36 编辑

配置一下请求头,是可以的
'Cookie': '你的Cookie',
'Content-Type': 'application/xml'


```python
from requests import post as requests_post


def request_mail_receiver():
    base_url = 'https://appmail.mail.10086.cn/s?func=mbox:listMessages&sid=00Y1ODcyNTQ5OTAwMTYzMjQ402FAAAE7000003&&comefrom=54&cguid=153710490066'
    application_xml_data = '''
    <object>
      <int name="fid">1</int>
      <string name="order">receiveDate</string>
      <string name="desc">1</string>
      <int name="start">1</int>
      <int name="total">20</int>
      <string name="topFlag">curr_task</string>
      <int name="sessionEnable">2</int>
      <int name="encodeflag">1</int>
    </object>
    '''
    cr_header = {
      'Cookie': '你的Cookie',
      'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
      'Content-Type': 'application/xml'
    }
    cr_response = requests_post(base_url, data=application_xml_data,headers=cr_header).content.decode('UTF-8')
    print(cr_response)


if __name__ == '__main__':
    print('Hello')
    request_mail_receiver()
```

xd223 发表于 2022-7-25 19:49

学习了,增加经验

Ylvan 发表于 2022-7-25 21:38

刚开始学爬虫,看不懂,顶一下

三滑稽甲苯 发表于 2022-7-26 06:47

data="<数据>"
这样子就好了

JKTeller 发表于 2022-7-26 19:48

学到了!!
页: [1]
查看完整版本: python urllib post请求负载的问题