python selenium 识别验证码自动登录。新学的练练手,这个网站明天就关闭了。
[Python] 纯文本查看 复制代码 import time
from selenium import webdriver
import requests
from bs4 import BeautifulSoup
import re
import base64
url = 'https://zy.hebeea.edu.cn:7001/hebgkzyfz/zyfz/main/index'
browser = webdriver.Chrome()
browser.maximize_window()
browser.get(url)
def main():
html = browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]')
print(html.get_attribute('textContent'))
login()
def login():
code = img()
username = '你的账号'
password = '密码'
un = browser.find_element_by_xpath('//*[@id="zjhm"]')
pw = browser.find_element_by_xpath('//*[@id="mm"]')
yzm = browser.find_element_by_xpath('//*[@id="kaptchaCode"]')
submit = browser.find_element_by_xpath('//*[@id="submitBtn"]')
un.send_keys(username)
time.sleep(0.7)
pw.send_keys(password)
time.sleep(0.7)
yzm.send_keys(code)
time.sleep(0.7)
submit.click()
def img():
headers = {'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Mobile Safari/537.36 Edg/91.0.864.48'}
response = requests.get(url,headers=headers)
soup = BeautifulSoup(response.text,'html.parser')
res = soup.find('img',id='VerificationCode')
finally_res = re.findall('src="(.*)" ',str(res))
#print(finally_res[0])
img_url = 'https://zy.hebeea.edu.cn:7001' + finally_res[0]
img_bytes = requests.get(img_url).content
with open('img1.jpg','wb') as f:
f.write(img_bytes)
img_file = open('img1.jpg',mode='rb')
finally_img = base64.b64encode(img_bytes) #base64编码
#print(finally_img)
img_file.close()
break_img_url = 'http://api.95man.com:8888/api/Http/Recog?Taken=你的taken&imgtype=1&len=4'
data = {'ImgBase64': finally_img}
img_content = requests.post(break_img_url,data=data)
code = re.findall('\|(.*)\|',img_content.text) #验证码
return code
if __name__ == '__main__':
main() |