爬文库可预览页面。图片链接有规律。
[Python] 纯文本查看 复制代码 import os
from PIL import Image
from fpdf import FPDF
import shutil
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep
import requests
def download_image_with_requests(image_url, save_path):
"""
使用requests库下载图片并保存到指定路径
:param image_url: 图片的URL
:param save_path: 保存路径(包括文件名)
"""
try:
response = requests.get(image_url, stream=True)
response.raise_for_status() # 如果请求失败,将抛出HTTPError异常
with open(save_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(f"Image downloaded successfully to {save_path}")
except requests.exceptions.HTTPError as errh:
print(f"Http Error: {errh}")
except requests.exceptions.ConnectionError as errc:
print(f"Error Connecting: {errc}")
except requests.exceptions.Timeout as errt:
print(f"Timeout Error: {errt}")
except requests.exceptions.RequestException as err:
print(f"OOps: Something Else: {err}")
def merge_jpg_to_pdf_and_delete_folder(folder_path):
# 创建一个FPDF对象
pdf = FPDF()
# 遍历文件夹中的所有文件
for filename in os.listdir(folder_path):
if filename.endswith(".jpg") or filename.endswith(".jpeg"):
# 打开图片并获取其尺寸
image_path = os.path.join(folder_path, filename)
with Image.open(image_path) as img:
# 将图片添加到PDF中,这里假设图片尺寸适应PDF页面大小
pdf.add_page()
pdf.image(image_path, 0, 0, 210) # 假设PDF页面大小为A4,即210mm宽
# 获取当前日期和时间
now = datetime.now()
# 格式化日期和时间,例如:2023-04-01_15-30-45.jpg
timestamp = now.strftime("%Y%m%d%H%M%S")
# 创建文件名,假设文件扩展名为.jpg
filename = f"{timestamp}.pdf"
# 使用os.path.dirname来获取父目录
folder_path1 = os.path.dirname(folder_path)
# 拼接文件夹路径和文件名
pdf_path = os.path.join(folder_path1, filename)
# 保存PDF文件,这里假设PDF文件名为merged.pdf,保存在当前目录下
pdf.output(pdf_path, "F")
# 删除文件夹及其内容
shutil.rmtree(folder_path)
print(f"PDF file {pdf_path} created and folder {folder_path} deleted.")
folder_path = 'D:/图片' # 替换为你想要创建的文件夹路径
# 使用makedirs创建文件夹,如果文件夹已存在且exist_ok=True,则不会抛出异常
os.makedirs(folder_path, exist_ok=True)
driver = webdriver.Edge()
driver.maximize_window()
print("网址格式示例:https://www.renrendoc.com/paper/239626450.html")
url = input("请输入类似文章网址:")
driver.get(url)
image = driver.find_element(By.XPATH, '//*[@id="page"]/div[2]/img')
driver.execute_script("arguments[0].scrollIntoView();", image)
sleep(1)
lk = image.get_attribute("src")
ims = [lk]
print(ims[0])
n = int(driver.find_element(By.CLASS_NAME,"total").text)
if n > 5:
n = 6
for i in range(2, n):
ims.append(lk[:-5]+str(i)+'.gif')
print(ims[i-1])
for im in ims:
save_path = os.path.join("d:/图片", str(ims.index(im)+1) + '.jpg')
download_image_with_requests(im, save_path)
# 使用函数,传入包含JPG图片的文件夹路径
merge_jpg_to_pdf_and_delete_folder("d:/图片")
|