本帖最后由 苏紫方璇 于 2023-8-20 15:17 编辑
[Python] 纯文本查看 复制代码 from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os
from urllib.parse import urlparse
# 替换为您的Google账号和密码
USERNAME = 'xxx@gmail.com'
PASSWORD = 'xxx'
URL = 'https:/xxxx.com/'
# 请将下面的路径替换为您下载的chromedriver所在的实际路径,chromedriver得跟你系统谷歌浏览器版本对应上
chromedriver_path = r'C:\Program Files\Google\Chrome\Application\chromedriver.exe'
# 启动Chrome浏览器
chrome_options = webdriver.ChromeOptions()
# 设置无头模式,可以隐藏浏览器窗口
# chrome_options.add_argument('--headless')
# chrome_options.add_argument('--disable-gpu')
# 设置用户代{过}{滤}理字符串
chrome_options.add_argument(
"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")
chrome_options.add_argument('--lang=en')
# 禁用浏览器特性
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
# chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
# 设置文件下载目录的路径
# download_directory = r"C:\Users\X\Downloads\Telegram Desktop"
parsed_url = urlparse(URL)
domain = parsed_url.netloc
user_path = os.path.expanduser("~")
download_directory = rf"{user_path}\Downloads\{domain}"
# 设置文件下载目录
chrome_options.add_experimental_option("prefs", {
"download.default_directory": download_directory,
"download.prompt_for_download": False, # 禁止下载前询问下载路径
"download.directory_upgrade": True, # 允许覆盖下载
"safebrowsing.enabled": True # 启用安全浏览
})
driver = webdriver.Chrome(executable_path=chromedriver_path, options=chrome_options)
try:
# 登录Google账号
driver.get("https://accounts.google.com")
time.sleep(2)
username_input = driver.find_element(By.ID, "identifierId")
username_input.send_keys(USERNAME)
username_input.send_keys(Keys.ENTER)
time.sleep(2)
# 使用其他等待条件,presence_of_element_located
password_input = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.NAME, "Passwd"))
)
# password_input = driver.find_element(By.NAME, "Passwd")
password_input.send_keys(PASSWORD)
password_input.send_keys(Keys.ENTER)
time.sleep(3)
# 进入Google搜索控制台
driver.get("https://search.google.com/u/0/search-console/welcome")
time.sleep(2)
# 等待搜索控制台页面加载完成
# 假设搜索控制台页面的标题为"Google Search Console"
expected_title = "Welcome to Google Search Console"
# while driver.title != expected_title:
# time.sleep(1)
WebDriverWait(driver, 6).until(EC.title_is(expected_title))
# 定位到data-input-type为2的div
# div_element = driver.find_element(By.CSS_SELECTOR, 'div[data-input-type="2"]')
div_element = driver.find_element(By.XPATH, '//div[@data-input-type="2"]')
# input_type_2_div = WebDriverWait(driver, 3).until(
# EC.presence_of_element_located(
# (By.CSS_SELECTOR, "div[data-input-type='2']"))
# )
div_element.click()
time.sleep(1)
# 在div元素下定位到input框并填充值
input_element = div_element.find_element(By.XPATH, "//input[@aria-label='https://www.example.com']")
input_element.clear() # 清空输入框中原有的值
input_element.send_keys(URL)
time.sleep(1)
# 点击"继续"按钮
# continue_button = div_element.find_element(By.XPATH, "//span[text()='Continue']/ancestor::div[@data-input-type='2']")
continue_button = driver.find_element(By.XPATH, "//div[@data-input-type='2']//span[text()='Continue']")
print(continue_button.get_attribute('outerHTML'))
continue_button.click()
time.sleep(5)
# 下载验证的HTML文件
download_link = driver.find_element(By.XPATH, '//div[contains(text(), "google")][contains(text(), ".html")]')
html = download_link.text
# 找到 "google" 和 ".html" 的索引位置
start_index = html.find('google')
end_index = html.find('.html')
# 使用切片截取子串
result = html[start_index:end_index + 5]
download_link.click()
time.sleep(2)
except Exception as e:
# 关闭浏览器
print(e)
finally:
driver.quit() |