使用Selenium自动答题问卷星(有题库,题目顺序随机)
本帖最后由 icer233 于 2024-10-16 00:02 编辑本文是作者为了解决学校作业(古诗文上下句填写),对于已知的4个问卷星链接写的自动答题程序,由于问卷星题目顺序随机,所以使用正则表达式在本地题库搜索答案
为了防止一些恶意行为,我不会放出问卷星链接,但是代码中关于与问卷星交互的代码部分还是可以参考的
本程序中有些是由于问卷星作者的不规范的题目而作出的特判,可能会看不懂
第一个(解决其中三个问卷星)
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.edge.service import Service
import re
import os
import shutil
import time
import configparser as cp
# 读取题库
fp = './poems.txt'
with open(fp, 'r', encoding='utf-8') as file:
poems = file.read()
# 信息
config_path = './config.ini'
inifile = cp.ConfigParser()
inifile.read(config_path, 'UTF-8')
url = inifile['db']['url']
name = inifile['db']['name']
wtime = int(inifile['db']['time'])
browser = inifile['db']['browser']
print('成功读取配置')
if browser == 'chrome':
chromedriver_path = './'
chrome_options = Options()
chrome_options.add_experimental_option('useAutomationExtension', False)# 不使用自动化扩展
# 设置Chrome的启动参数
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--allow-running-insecure-content")
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument("--no-default-browser-check")
chrome_options.add_argument("--no-first-run")
chrome_options.add_argument("--disable-background-networking")
chrome_options.add_argument("--remote-debugging-port=9255")
chrome_options.add_argument("--excludeswitches=enable-automation")
chrome_options.add_argument("--useautomationextension=false")
# 检查驱动是否存在于指定路径
chromedriver_executable_path = chromedriver_path + 'chromedriver.exe'
if not os.path.exists(chromedriver_executable_path):
# 1.使用ChromeDriverManager自动管理ChromeDriver,如果没有则自动下载
driver_path = ChromeDriverManager().install()
# 2.把下载的驱动文件,复制到指定位置
shutil.copy(driver_path, chromedriver_path)
# 设置ChromeDriver
service = Service(chromedriver_executable_path)
print('成功初始化Chrome设置')
driver = webdriver.Chrome(service=service, options=chrome_options)
elif browser == 'edge':
edgedriver_path = './'
edgedriver_executable_path = edgedriver_path + 'msedgedriver.exe'
if not os.path.exists(edgedriver_executable_path):
driver_path = EdgeChromiumDriverManager().install()
shutil.copy(driver_path, edgedriver_path)
service = Service(edgedriver_executable_path)
print('成功初始化Edge')
driver = webdriver.Edge()
# 打开问卷星
driver.get(url)
print('成功打开链接')
driver.implicitly_wait(1)
# 选择年级
select_element = driver.find_element(By.ID, 'q1') # 定位下拉框
select = Select(select_element)
select.select_by_value("4") # 选择“九年级”
selected_option = select.first_selected_option
driver.execute_script('show_next_page()')
print('成功设置年级')
driver.implicitly_wait(2)
# 写名字
name_input_box = driver.find_element(By.ID, 'q2')
name_input_box.send_keys(name)
driver.execute_script('show_next_page()')
print('成功设置名字', name)
driver.implicitly_wait(2)
def get_hint_type(poem_hint:str): # 返回0:首单句, 返回1:非首单句, 返回2:多句
ex_comma_3 = '[。\??!;,、]{1} ?[。\??!;,、]{1} ?[。\??!;,、]{1}'
if re.findall(ex_comma_3, poem_hint):
return 2
else:
ex_comma = '[。\??!;,、]{1} ?[。\??!;,、]{1}'
comma = re.findall(ex_comma, poem_hint)
return comma.__len__()
# 找答案
def find_poem_ans(poem_hint:str, hint_type:int):
ex_ans = '([\u4E00-\u9FA5]+)'
poem_hint = re.sub(r'\s+', '', poem_hint).strip()
if hint_type !=2 :
ex_comma = '[。\??!;,、]{2}'
if hint_type == 0:
poem_hint = poem_hint.replace('?', '\\?')
ex = ex_ans + poem_hint
try:
return re.findall(ex, poems)
except:
poem_hint = poem_hint
ex = ex_ans + poem_hint
return re.findall(ex, poems)
else:
pos = re.search(ex_comma, poem_hint)
start_pos = pos.start()
end_pos = pos.end()
left_pt = poem_hint
right_pt = poem_hint
ex = left_pt + ex_ans + right_pt
ex = ex.replace('?', '\\?')
return re.findall(ex, poems)
else:
ex_comma_3 = '[。\??!;,、]{3}'
pos = re.search(ex_comma_3, poem_hint)
start_pos = pos.start()
end_pos = pos.end()
p1 = poem_hint
p2 = poem_hint
p3 = poem_hint
ex = p1 + ex_ans + p2 + ex_ans + p3
try:
ans_list = re.findall(ex, poems)
except:
ans_list = re.findall(ex, poems)
return ans_list
# 开始答题
pg = 2
fieldsets = driver.find_elements(By.CLASS_NAME, 'fieldset')
while pg < fieldsets.__len__():
pg = pg + 1
fs = fieldsets
poem_hint = fs.text
print(pg, 'problem:', poem_hint)
hint_type = get_hint_type(poem_hint)
ans = find_poem_ans(poem_hint, get_hint_type(poem_hint))
print('ans:', ans)
if hint_type !=2:
poem_input_box = fs.find_element(By.CSS_SELECTOR, 'div.field.ui-field-contain > div.field-label.gapfilltitle > div.topichtml > div.topictext > label.textEdit.initStyle > span.textCont')
poem_input_box.send_keys(ans)
else:
ans1 = ans
ans2 = ans
poem_input_box_list = fs.find_elements(By.CSS_SELECTOR, '.textCont')
poem_input_box1 = poem_input_box_list
poem_input_box2 = poem_input_box_list
poem_input_box1.send_keys(ans1)
poem_input_box2.send_keys(ans2)
driver.execute_script('show_next_page()')
driver.implicitly_wait(1)
time.sleep(wtime)
driver.find_element(By.XPATH, '//*[@id="ctlNext"]').click()
os.system('pause')
driver.quit()
print('over')
第二个(解决最后一个问卷星)
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.edge.service import Service
import re
import os
import shutil
import configparser as cp
# 读取题库
fp = './poems.txt'
with open(fp, 'r', encoding='utf-8') as file:
poems = file.read()
# 信息
config_path = './config.ini'
inifile = cp.ConfigParser()
inifile.read(config_path, 'UTF-8')
url = ''
name = inifile['db']['name']
browser = inifile['db']['browser']
print('成功读取配置')
chromedriver_path = './'
chrome_options = Options()
chrome_options.add_experimental_option('useAutomationExtension', False)# 不使用自动化扩展
# 设置Chrome的启动参数
chrome_options.add_argument("--disable-popup-blocking")
chrome_options.add_argument("--allow-running-insecure-content")
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument("--no-default-browser-check")
chrome_options.add_argument("--no-first-run")
chrome_options.add_argument("--disable-background-networking")
chrome_options.add_argument("--remote-debugging-port=9255")
chrome_options.add_argument("--excludeswitches=enable-automation")
chrome_options.add_argument("--useautomationextension=false")
# 检查驱动是否存在于指定路径
chromedriver_executable_path = chromedriver_path + 'chromedriver.exe'
if not os.path.exists(chromedriver_executable_path):
# 1.使用ChromeDriverManager自动管理ChromeDriver,如果没有则自动下载
driver_path = ChromeDriverManager().install()
# 2.把下载的驱动文件,复制到指定位置
shutil.copy(driver_path, chromedriver_path)
# 设置ChromeDriver
service = Service(chromedriver_executable_path)
print('成功初始化Chrome设置')
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
})
# 打开问卷星
driver.get(url)
print('成功打开链接')
driver.implicitly_wait(1)
driver.find_element(By.XPATH, '//*[@id="form1"]/div/span').click()
next_btn = driver.find_element(By.XPATH, '//*[@id="lxNextBtn"]')
# 选择年级
select_element = driver.find_element(By.ID, 'q1') # 定位下拉框
select = Select(select_element)
select.select_by_value("4") # 选择“九年级”
selected_option = select.first_selected_option
next_btn.click()
print('成功设置年级')
driver.implicitly_wait(2)
# 写名字
name_input_box = driver.find_element(By.ID, 'q2')
name_input_box.send_keys(name)
next_btn.click()
print('成功设置名字', name)
driver.implicitly_wait(2)
# 找答案
def find_poem_ans(poem_hint:str):
if poem_hint == ',又何妨!':
return '酒酣胸胆尚开张。鬓微霜'
else:
ex_ans = '[\u4E00-\u9FA5]+'
poem_hint = poem_hint.replace('?', '\\?')
ex = ex_ans + poem_hint
try:
return re.findall(ex, poems)
except:
poem_hint = poem_hint
ex = ex_ans + poem_hint
return re.findall(ex, poems)
# 开始答题
pg = 2
fieldsets = driver.find_elements(By.CLASS_NAME, 'fieldset')
while pg < fieldsets.__len__():
pg = pg + 1
fs = fieldsets
poem_hint = fs.text
print(pg, 'problem:', poem_hint)
ans = find_poem_ans(poem_hint)
print('ans:', ans)
poem_input_box = fs.find_element(By.CSS_SELECTOR, 'div.field.ui-field-contain > div.field-label.gapfilltitle > div.topichtml > div.topictext > label.textEdit.initStyle > span.textCont')
poem_input_box.send_keys(ans)
next_btn.click()
driver.implicitly_wait(1)
driver.find_element(By.XPATH, '//*[@id="div75"]/div/div/div/label/span').send_keys('心非木石岂无感')
next_btn.click()
try:
driver.find_element(By.XPATH,'//*[@id="layui-layer1"]/div/a').click()
driver.find_element(By.XPATH,'//*[@id="SM_TXT_1"]')
except:
pass
# 可能要手动进行智能验证
os.system('pause')
driver.quit()
print('over')
建议尝试一下 Playwright 的 Python 1.43 版本(我只写了 1.43 版本的 JS 隔离脚本)。以下是改进的步骤:
1. 在 GitHub 上搜索并使用 HideMyPlaywright 来修复 Playwright 的 JS 隔离环境问题。
2. 在 `initScript` 阶段为每个 `page` 和 `context` 执行 `delete Object.getPrototypeOf(navigator).webdriver;`,删除 `webdriver` 标识。
3. 初始化 Playwright 时,为浏览器添加 `--disable-automation` 参数,以防止自动化检测。
4. 设置 Playwright 启动浏览器的 `Locale`、`TimezoneId` 和 `UserAgent` 为真实值,必须手动获取这些真实信息进行配置。
5. 为浏览器配置 WebRTC Network Limiter 以防止 DNS 泄露真实 IP(此步骤可选)。
6. 不要将 `Headless` 设置为 `true`,因为这样会导致 GPU 检测无法通过(特别提醒)。
通过这种简单的方法,可以快速通过 Cloudflare 的 5 秒盾检测(针对 HTTP L7 DDoS)。理论上,只要没有复杂的验证码,第一步检测不到异常环境,第二步则不会被拦截。 适合做程序的人,高 厉害啊! 谢谢分享 谢谢分享,给了一定的思路 MIAIONE 发表于 2024-10-13 12:10
建议尝试一下 Playwright 的 Python 1.43 版本(我只写了 1.43 版本的 JS 隔离脚本)。以下是改进的步骤:
...
感谢建议 路过,看看,下载试试,谢谢楼主分享 多谢分享,第二篇url暴露了 可以试试drissionpage,比Selenium好用,不用搞驱动或者升级什么的