本帖最后由 858983646 于 2024-6-1 10:05 编辑
用ai改了下,可以多文件同时处理,但是就是编译exe后反复输入文件号,py代码直接运行就没这个问题,Python没学过搞不定了。求教大佬[Asm] 纯文本查看 复制代码 import os
import concurrent.futures
from pypdf import PdfReader, PdfWriter
from tqdm import tqdm
from PIL import Image, ImageEnhance
from io import BytesIO
# 将图片转换为黑白,并增强对比度、亮度、锐度和色彩
def blacky(im):
im = im.convert('L')
im = ImageEnhance.Contrast(im).enhance(3)
im = ImageEnhance.Brightness(im).enhance(1.5)
im = ImageEnhance.Sharpness(im).enhance(2)
im = ImageEnhance.Color(im).enhance(1.5)
threshold = 128
table = [0 if i < threshold else 1 for i in range(256)]
new_image = im.point(table, '1')
imgbuffer = BytesIO()
new_image.save(imgbuffer, format="TIFF", compression='group4', optimize=True, dpi=[300, 300])
return Image.open(imgbuffer)
def get_page_number_from_indirect(reader, indirect_reference):
for i, page in enumerate(reader.pages):
if page.indirect_reference == indirect_reference:
return i
return None
def add_bookmarks_to_writer(writer, reader, outlines, parent=None):
parents = [] # 初始化parents列表
for item in outlines:
if isinstance(item, list):
# 如果书签有子项(嵌套的书签),递归处理
add_bookmarks_to_writer(writer, reader, item, parent=parent)
parents.append(parent) # 将当前父书签添加到parents列表中
else:
title = item.get('/Title')
indirect_reference = item.get('/Page')
page_num = get_page_number_from_indirect(reader, indirect_reference)
if '/Count' in item and item['/Count'] < 0:
new_parent = writer.add_outline_item(title, page_num, parent=parent)
parents.append(new_parent) # 将新创建的书签作为父书签添加到parents列表中
else:
writer.add_outline_item(title, page_num, parent=parent)
def process_pdf(file_index, choice, pdf_file, quality=None):
try:
reader = PdfReader(pdf_file)
writer = PdfWriter()
for page in tqdm(reader.pages, desc=f"Processing {pdf_file}"):
writer.add_page(page)
if choice == 1:
if reader.metadata is not None:
writer.add_metadata(reader.metadata)
else:
print("没有元数据可以添加。")
elif choice == 2:
writer.remove_images()
elif choice == 3:
if quality is None:
raise ValueError("图片质量值未提供")
for page in tqdm(writer.pages, desc=f"Compressing images {pdf_file}"):
for img in page.images:
img.replace(img.image, quality=quality)
elif choice == 4:
for page in tqdm(writer.pages, desc=f"Applying lossless compression {pdf_file}"):
page.compress_content_streams()
elif choice == 5:
for page in tqdm(writer.pages, desc=f"Binarizing images {pdf_file}"):
for img in page.images:
img.replace(blacky(img.image))
if reader.outline:
add_bookmarks_to_writer(writer, reader, reader.outline)
output_file = f"reduced_{os.path.splitext(pdf_file)[0]}.pdf"
with open(output_file, "wb") as f:
writer.write(f)
print(f"Processed file saved as {output_file}")
except Exception as e:
print(f"处理文件 {pdf_file} 时发生错误:{e}")
def main():
pdf_files = [f for f in os.listdir('.') if f.endswith('.pdf')]
if not pdf_files:
print("当前目录下没有找到PDF文件。")
return
# 用户交互部分,一次性获取所有输入
indices = input_indices(pdf_files)
choice = input_choice()
quality = input_quality(choice)
# 处理文件之前,先检查是否有输入错误,如果有则不执行任何操作
if indices is None or choice is None or (choice == 3 and quality is None):
return # 退出程序
# 获取CPU核心数
cpu_cores = os.cpu_count() or 1
# 使用ProcessPoolExecutor并行处理PDF文件
with concurrent.futures.ProcessPoolExecutor(max_workers=cpu_cores) as executor:
futures = [executor.submit(process_pdf, idx, choice, pdf_files[idx], quality if choice == 3 else None) for idx in indices]
for future in concurrent.futures.as_completed(futures):
try:
future.result()
except Exception as exc:
print(f"处理文件 {pdf_files[indices.index(idx)]} 时发生错误:{exc}")
# List all PDF files in the current directory
pdf_files = [f for f in os.listdir('.') if f.endswith('.pdf')]
for idx, file in enumerate(pdf_files):
print(f"{idx}: {file}")
def input_indices(pdf_files):
while True:
selection = input("输入'all'以处理所有PDF文件,或者输入用逗号分隔的文件索引:").strip().lower()
if selection == 'all':
return range(len(pdf_files))
else:
try:
indices = [int(idx) for idx in selection.split(',')]
if all(0 <= idx < len(pdf_files) for idx in indices):
return indices
else:
print("所有索引必须在0到{}之间。".format(len(pdf_files)-1))
except ValueError:
print("输入无效。请输入'all'或者用逗号分隔的索引。")
def input_choice():
while True:
try:
print("选择压缩PDF的方式")
print("1: 删除重复对象")
print("2: 删除图像")
print("3: 降低图片质量")
print("4: 使用无损压缩")
print("5: 二值化压缩为tif")
choice = int(input("输入选择 (1-5): "))
if choice < 1 or choice > 5:
raise ValueError
return choice
except ValueError:
print("输入无效。请输入1到5之间的数字。")
def input_quality(choice):
if choice == 3:
while True:
quality = input("请输入图片质量(1-100):")
if quality.isdigit() and 1 <= int(quality) <= 100:
return int(quality)
else:
print("输入错误,请重新输入图片质量(1-100)。")
return None
if __name__ == "__main__":
main() |