适用各种图片类文库,自动滚动页面时可以同时手动操作。当页面见底时,进行图片链接筛选。[Python] 纯文本查看 复制代码 from selenium import webdriver
from selenium.webdriver.common.by import By
# 等待页面加载完成,这里假设使用显式等待
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pyautogui
from time import sleep
# 初始化webdriver
driver = webdriver.Edge()
driver.maximize_window()
# 导航到目标网页
driver.get('https://max.book118.com/html/2024/0309/6134131010010100.shtm')
sleep(10)
# 获取屏幕尺寸
screen_width, screen_height = pyautogui.size()
# 计算屏幕中央的坐标
center_x = screen_width // 2
center_y = screen_height // 2
# 将鼠标移动到屏幕中央
pyautogui.moveTo(center_x, center_y)
sleep(1)
# 在当前位置(屏幕中央)执行单击操作
pyautogui.click()
for i in range(2000):
# 获取页面总高度
page_height = driver.execute_script("return document.body.scrollHeight;")
# 获取当前滚动位置
scroll_position = driver.execute_script("return window.pageYOffset || document.documentElement.scrollTop;")
# 判断是否滑动到底部
if scroll_position >= page_height - 755:
print("已经滑动到底部")
break
else:
print("还未滑动到底部")
# 模拟按下向下箭头键
pyautogui.press('pagedown')
sleep(2)
wait = WebDriverWait(driver, 10) # 等待10秒
# 定位页面上的所有图片元素
images = wait.until(EC.presence_of_all_elements_located((By.TAG_NAME, 'img')))
# 提取每个图片的src属性(即图片链接)
image_links = [img.get_attribute('src') for img in images]
url = []
# 打印图片链接
for link in image_links:
print(link)
print('--------------------------------------------------------------------------------------------')
text1 = input("请输入需要下载的图片链接的字符串:")
for link in image_links:
try:
if text1 in link:
url.append(link)
except:
pass
for ul in url:
print(ul)
a = input("是否下载图片?(y/n)")
# 清理资源
driver.quit()
|