看论坛里一个12306的python爬虫帖子很感兴趣
于是去百度,搜啊 写了个练手的,只能做到登录,后面的慢慢加(不过要期末考试了、、、)
[Python] 纯文本查看 复制代码 # coding:utf-8
import requests
import urllib3
import os
USER = '你的12306账号'
PWD = '你的12306密码'
urllib3.disable_warnings() #这里因为ssl的原因喜欢报错,加了这句就不会了
#头信息,
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Host":"kyfw.12306.cn",
"Referer":"https://kyfw.12306.cn/otn/passport?redirect=/otn/"
}
#实例化一个session对象,将header更新就去,就不用每次都加header信息了,先get获取一个cookie
#大小写都可以,不过推荐大写
SESSION = requests.Session()
SESSION.headers.update(HEADERS)
SESSION.verify = False
SESSION.get('https://kyfw.12306.cn/otn/login/init')
#验证码进行验证的函数,通过图片的序列1-4,5-8来确定哪一张图片
def check_captcha():
check_captcha_url = 'https://kyfw.12306.cn/passport/captcha/captcha-check'
img_position = {
'1':'50,50',
'2':'120,58',
'3':'196,50',
'4':'271,44',
'5':'50,124',
'6':'124,116',
'7':'197,127',
'8':'275,120',
}
try:
position = input('输入图片的序列号,使用逗号隔开\n')
#输入示范 1,2 这样
position = position.split(',')
answer = []
for i in position:
answer.append(img_position[i])
answer = ','.join(answer)
#这里是将我们输入进来的进行处理,变成需要的坐标
data = {
'answer': answer,
'login_site': 'E',
'rand': 'sjrand'
}
response = SESSION.post(check_captcha_url, data=data)
#返回的是json格式
check_data = response.json()
if check_data["result_code"] != '4':
#如果验证码输入错误就重新执行main主函数,再来一次
print('验证码校验失败,请重新验证')
main()
else:
print('验证码校验成功')
except KeyError:
#输入范围是1-8,超过或者不符合规定就会出错,再来一次
print('验证码序列输入错误,请重新输入验证码序列!')
main()
#下载验证码,并且打开在最前,就不用再切换了
def download_capcha():
params = {
"login_site": "E",
"module": "login",
"rand": "sjrand",
"0.0263208596679263":""
}
captcha_url = 'https://kyfw.12306.cn/passport/captcha/captcha-image?'
try:
response = SESSION.get(captcha_url, params=params)
response.raise_for_status()
img_name = 'img.jpg'
with open(img_name, 'wb') as f:
f.write(response.content)
except:
exit('获取验证码失败')
os.system(img_name)
print('验证码下载完成')
return None
#进行登录
def login():
login_url = 'https://kyfw.12306.cn/passport/web/login'
login_form_data = {
'username': USER,
'password':PWD,
'appid': 'otn'
}
response = SESSION.post(login_url, data=login_form_data)
data = response.json()
if data["result_code"] == 0:
print('账号密码正确')
else:
exit('账号密码错误,请确定后重新登录')
#还需要获取权限
def uamtk():
uamtk_url = 'https://kyfw.12306.cn/passport/web/auth/uamtk'
uamtk_form_data = {
'appid': 'otn'
}
response = SESSION.post(uamtk_url, data=uamtk_form_data)
newapptk = response.json()['newapptk']
uamauthclient_url = 'https://kyfw.12306.cn/otn/uamauthclient'
uamauthclient_data = {
'tk': newapptk
}
response = SESSION.post(uamauthclient_url, data=uamauthclient_data)
uamtk_data = response.json()
if uamtk_data['result_code'] == 0:
print('登录成功')
else:
exit('未知错误!')
def main():
download_capcha()
check_captcha()
login()
uamtk()
if __name__ == '__main__':
main()
借鉴了 https://blog.csdn.net/r244925932/article/details/81225884
这个讲的很详细,实在不懂就看这个 |