合并效果:
针对新版PyPDF2 更新的代码为:
[Python] 纯文本查看 复制代码 from PIL import Image, ImageFont, ImageDraw
import re
import os
from PyPDF2 import PdfReader, PdfWriter
class PDFMerge:
"作者:小小明 博客地址:https://xxmdmst.blog.csdn.net/"
@staticmethod
def get_mark_img(text, size=30):
width = len(text) * size
mark = Image.new(mode='RGB', size=(
width, size + 20), color=(255, 255, 255))
ImageDraw.Draw(im=mark) \
.text(xy=(0, 0),
text=text,
fill="black",
font=ImageFont.truetype('msyhbd.ttc', size=size))
mark.save("watermark.pdf", "PDF", resolution=100.0, save_all=True)
return mark
@staticmethod
def windows_files_sort(files):
files.sort(key=lambda s: [(s, int(n))
for s, n in re.findall('(\D+)(\d+)', f'a{s}0')])
def __merge_pdf_in(self, path, parent=None):
files = []
dirs = []
for file in os.listdir(path):
file = os.path.join(path, file)
if os.path.isfile(file):
if file.endswith(".pdf"):
files.append(file)
elif os.path.isdir(file):
if os.path.basename(file) != "__MACOSX":
dirs.append(file)
PDFMerge.windows_files_sort(files)
PDFMerge.windows_files_sort(dirs)
for pdf_file in files:
pdf_reader = PdfReader(pdf_file)
pdf_file = os.path.basename(pdf_file)
pageCount = len(pdf_reader.pages)
print(pdf_file, pageCount, self.pagenum_total)
for page in pdf_reader.pages:
page.compress_content_streams()
self.pdf_writer.add_page(page)
self.pdf_writer.add_outline_item(
pdf_file[:pdf_file.rfind(".")], self.pagenum_total, parent=parent)
self.pagenum_total += pageCount
for path in dirs:
title = os.path.basename(path)
print(title, self.pagenum_total)
PDFMerge.get_mark_img(title)
watermark = PdfReader('watermark.pdf').pages[0]
self.pdf_writer.add_page(watermark)
os.remove('watermark.pdf')
parent_id = self.pdf_writer.add_outline_item(
title, self.pagenum_total, parent=parent)
self.pagenum_total += 1
self.__merge_pdf_in(path, parent=parent_id)
def merge_pdf(self, path, out_name):
self.pagenum_total = 0
self.pdf_writer = PdfWriter()
self.__merge_pdf_in(path)
# os.remove('watermark.pdf')
print("总页数:", self.pagenum_total)
print("开始写出到文件")
with open(out_name, "wb") as outputfile:
self.pdf_writer.write(outputfile)
print("PDF文件合并完成")
if __name__ == '__main__':
pdfmerge = PDFMerge()
pdfmerge.merge_pdf(r"D:\tmpVoice\pdf", "test.pdf")
现成工具下载地址:https://gitcode.net/as604049322/python_gui/-/raw/master/pdf_tools_v0.2.exe?inline=false
原贴:https://blog.csdn.net/as604049322/article/details/126647906 |