事情的起因是最近运营的一些站点被挂马了,一个一个查实在废手,于是便有了此物
实用性不强,毕竟我也只用过一次
适用于64位版本号为130.0.6723.92的chromedriver:https://www.123865.com/s/TKR5Vv-RYu5v
[Python] 纯文本查看 复制代码 import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import os
from tkinter import Tk
from tkinter.filedialog import askopenfilename
# 隐藏Tkinter主窗口
Tk().withdraw()
# 打开文件选择对话框
file_path = askopenfilename(filetypes=[("Excel files", "*.xlsx;*.xls")])
# 检查是否选择了文件
if not file_path:
print("没有选择文件。")
exit()
# 读取Excel文件
df = pd.read_excel(file_path)
# 确保文件夹存在
screenshot_dir = r"C:\Users\zmn\Desktop\屏幕截图"
os.makedirs(screenshot_dir, exist_ok=True)
# 设置Chrome选项
chrome_options = Options()
chrome_options.add_argument("--headless") # 无头模式
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-gpu")
# 设置ChromeDriver路径
chrome_driver_path = r"D:\chromedriver-win64\chromedriver.exe"
service = Service(chrome_driver_path)
# 初始化WebDriver
driver = webdriver.Chrome(service=service, options=chrome_options)
# 遍历每个域名并进行搜索和截图
for index, row in df.iterrows():
domain = row[0] # 假设域名在第一列
search_url = f"https://www.baidu.com/s?wd=site%3A{domain}%20%E5%94%AE%E5%90%8E%E7%94%B5%E8%AF%9D"
driver.get(search_url)
time.sleep(2) # 等待页面加载
# 截图并保存
screenshot_path = os.path.join(screenshot_dir, f"{domain}.png")
driver.save_screenshot(screenshot_path)
print(f"已保存截图: {screenshot_path}")
# 关闭WebDriver
driver.quit() |