import re
from PyPDF4.pdf import PdfFileReader as pr, PdfFileWriter as pw
def MergePDF(dir_path):
pdf_files = list()
merged_file = pw()
for path, _, files in os.walk(dir_path):
pdf_files.extend([os.path.join(path, f)
for f in files if f.lower().endswith('.pdf')]) # 遍历当前文件夹下所有pdf文件并组合成相对路径
pdf_files.sort(key=lambda i: int(re.findall(r'(\d+).*?', i)
[0]) if re.findall(r'(\d+).*?', i) else -1) # 按照路径排序(没有数字的在前面, 有数字的从小到大排序)
for pdf_file in pdf_files:
print(pdf_file)
try:
pdf = pr(open(pdf_file, "rb"))
except:
print(f'{pdf_file}无法解析')
continue
if pdf.isEncrypted:
print(f'{pdf_file} 是加密文件')
res = input('输入密码(回车键跳过)')
while res:
try:
pdf.decrypt(res) # pdf 解密
break
except:
print('密码错误')
res = input('输入密码(回车键跳过)')
if not res:
continue
pageCount = pdf.getNumPages()
# 分别将page添加到输出output中
for iPage in range(pageCount):
merged_file.addPage(pdf.getPage(iPage))
with open('merged.pdf', "wb") as outputfile:
# 注意这里的写法和正常的上下文文件写入是相反的
merged_file.write(outputfile)
print('Done')
input()
if __name__ == '__main__':
# 设置存放多个pdf文件的文件夹
dir_path = r'.'
MergePDF(dir_path)