python urllib post请求负载的问题
本帖最后由 cqwcns 于 2022-7-25 15:55 编辑查看浏览器负载是这样的:
我的代码是这样的:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import re
import urllib.request
# 基本配置
baseUrl = "https://appmail.mail.10086.cn/s?func=mbox:listMessages&sid=00Y1ODcyNTQ5OTAwMTYzMjQ402FAAAE7000003&&comefrom=54&cguid=1537104900668"
post_data = {
'func': 'mbox:listMessages',
'sid': '00Y1ODcyNTQ5OTAwMTYzMjQ402FAAAE7000003',
'comefrom': 54,
'cguid': 1537104900668
}
header = {
'cookie': 'CmLocation=200|200; CmProvid=gd; WT_FPC=id=2a77ac12a84388a29ce1656819455873:lv=1656819466795:ss=1656819855873; UM_distinctid=181c6f0bd88211-08db9745174c56-4a617f5c-15f900-181c6f0bdcc16b; _139_login_agreement=1; Login_UserNumber=MTM1NTM2ODcyOTk=; _139_login_version=60; S_DEVICE_TOKEN=5374a4b7a37c442aa88a984b564f8815; DEVICE_INFO_DIGEST=a52b58f888209e853dceb76e71c6b9793; a_l=1673050632000|4995576828; a_l2=1673050632000|12|MTM1NTM2O88yOTl8MjAyMy0wMS0wNyAw88oxNzoxMnxvL1Yrc0JTQ1IybXh2SzM1OEd4L0VMdTQ0U0lwNTd2L0VvdnJrbU1wcHl3OTNlYXNwSEl88Wo5TjRUbWRwRXVPbTB6QVUrb0NWWk0xTE5Bb25LZEFwdz09fGI4MjQwY2Y5MWFiOWUxMGZlYjM5MDI2Nzg1ODA4OTI2; RMKEY=409cedd23bd32b49; cookiepartid1688=12; ut1688=2; cookiepartid=12; UserData={}; SkinPath21688=skin_skyBlue; rmUin1688=479121018; provCode1688=1; areaCode1688=16; loginProcessFlag=; Os_SSo_Sid=00Y1ODcyNTQ5OTAwMTYzMjQ402FCCAE7000003; welcome=s%3AKdftUXiorBuX5blKxNyLzlfuljVvYcXF.yJaX8CZrQVW48O%2BA2xb9LPuT2GipD2Yvn3IaiAlxgaE',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36 Edg/103.0.1264.62'
}
def get_html(url):
data = urllib.parse.urlencode(post_data).encode('utf-8')
request = urllib.request.Request(
url, headers=header, data=data, method='POST')
res = urllib.request.urlopen(request).read().decode('utf-8')
print(res)
if __name__ == '__main__':
get_html(baseUrl)
我的代码返回错误是这样的:
从请求负载的信息来看,这个非常关键,决定了从第几条开始读和读多少条等。
<object>
<int name="fid">1</int>
<string name="order">receiveDate</string>
<string name="desc">1</string>
<int name="start">101</int>
<int name="total">100</int>
<string name="topFlag">curr_task</string>
<int name="sessionEnable">2</int>
<int name="encodeflag">1</int>
</object>
但这个又不是一个键值对,我不知道怎么传值。
所以,请教一下,这种情况,传递这个请求负载代码应该怎么写?感谢指教。
post_data 应该放的东西是 请求负载里的,url里的参数放的是 查询字符串参数里的东西。前者错了。构造方式参考标头里的 Content-type #!/usr/bin/python
# -*- coding: UTF-8 -*-
import re
import urllib.request
# 基本配置
baseUrl = "https://appmail.mail.10086.cn/s?func=mbox:listMessages&sid=00Y1ODcyNTQ5OTAwMTYzMjQ402FAAAE7000003&&comefrom=54&cguid=1537104900668"
post_data = "<object>
<int name="fid">1</int>
<string name="order">receiveDate</string>
<string name="desc">1</string>
<int name="start">101</int>
<int name="total">100</int>
<string name="topFlag">curr_task</string>
<int name="sessionEnable">2</int>
<int name="encodeflag">1</int>
</object>"
header = {
'cookie': 'CmLocation=200|200; CmProvid=gd; WT_FPC=id=2a77ac12a84388a29ce1656819455873:lv=1656819466795:ss=1656819855873; UM_distinctid=181c6f0bd88211-08db9745174c56-4a617f5c-15f900-181c6f0bdcc16b; _139_login_agreement=1; Login_UserNumber=MTM1NTM2ODcyOTk=; _139_login_version=60; S_DEVICE_TOKEN=5374a4b7a37c442aa88a984b564f8815; DEVICE_INFO_DIGEST=a52b58f888209e853dceb76e71c6b9793; a_l=1673050632000|4995576828; a_l2=1673050632000|12|MTM1NTM2O88yOTl8MjAyMy0wMS0wNyAw88oxNzoxMnxvL1Yrc0JTQ1IybXh2SzM1OEd4L0VMdTQ0U0lwNTd2L0VvdnJrbU1wcHl3OTNlYXNwSEl88Wo5TjRUbWRwRXVPbTB6QVUrb0NWWk0xTE5Bb25LZEFwdz09fGI4MjQwY2Y5MWFiOWUxMGZlYjM5MDI2Nzg1ODA4OTI2; RMKEY=409cedd23bd32b49; cookiepartid1688=12; ut1688=2; cookiepartid=12; UserData={}; SkinPath21688=skin_skyBlue; rmUin1688=479121018; provCode1688=1; areaCode1688=16; loginProcessFlag=; Os_SSo_Sid=00Y1ODcyNTQ5OTAwMTYzMjQ402FCCAE7000003; welcome=s%3AKdftUXiorBuX5blKxNyLzlfuljVvYcXF.yJaX8CZrQVW48O%2BA2xb9LPuT2GipD2Yvn3IaiAlxgaE',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36 Edg/103.0.1264.62'
'Content-Type':'这里请填写网页原先请求头内的值'
}
def get_html(url):
data = urllib.parse.urlencode(post_data).encode('utf-8')
request = urllib.request.Request(
url, headers=header, data=data, method='POST')
res = urllib.request.urlopen(request).read().decode('utf-8')
print(res)
if __name__ == '__main__':
get_html(baseUrl) 请求负载功能还没有用过,学习一下 本帖最后由 很快再相见123 于 2022-7-25 17:36 编辑
配置一下请求头,是可以的
'Cookie': '你的Cookie',
'Content-Type': 'application/xml'
```python
from requests import post as requests_post
def request_mail_receiver():
base_url = 'https://appmail.mail.10086.cn/s?func=mbox:listMessages&sid=00Y1ODcyNTQ5OTAwMTYzMjQ402FAAAE7000003&&comefrom=54&cguid=153710490066'
application_xml_data = '''
<object>
<int name="fid">1</int>
<string name="order">receiveDate</string>
<string name="desc">1</string>
<int name="start">1</int>
<int name="total">20</int>
<string name="topFlag">curr_task</string>
<int name="sessionEnable">2</int>
<int name="encodeflag">1</int>
</object>
'''
cr_header = {
'Cookie': '你的Cookie',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
'Content-Type': 'application/xml'
}
cr_response = requests_post(base_url, data=application_xml_data,headers=cr_header).content.decode('UTF-8')
print(cr_response)
if __name__ == '__main__':
print('Hello')
request_mail_receiver()
``` 学习了,增加经验 刚开始学爬虫,看不懂,顶一下 data="<数据>"
这样子就好了 学到了!!
页:
[1]