吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 382|回复: 15
上一主题 下一主题
收起左侧

[Python 原创] 乱码名的pdf自动重命名

[复制链接]
跳转到指定楼层
楼主
barnett2016 发表于 2024-11-18 16:01 回帖奖励
本帖最后由 barnett2016 于 2024-11-18 16:05 编辑

网站下载出来的pdf,很多是数字 字母的乱码名,文件一多,手动改名很麻烦,
这个脚本就是从pdf里提取文字,再将pdf改成中文名。方便以后搜索查看。
[Python] 纯文本查看 复制代码
import os
import re
import random
from pdfplumber import open as pdf_open
from tkinter import Tk, filedialog, Text, Button, Scrollbar, END
import threading
import time  # 提示缺模块,pip安装即可,python3.7以上皆可正常用

def extract_title_efficiently(pdf_path):
    try:
        with pdf_open(pdf_path) as pdf:
            text = pdf.pages[0].extract_text()
            title_match = re.search(r'标题:(.+)', text)
            if title_match:
                return title_match.group(1).strip()
    except Exception as e:
        return None

def extract_chinese_text_from_first_four_lines(pdf_path):
    try:
        with pdf_open(pdf_path) as pdf:
            text = pdf.pages[0].extract_text().strip()
            lines = text.split('\n')
            first_four_lines = ' '.join(lines[:4]).strip()
            chinese_text = re.sub(r'[^\u4e00-\u9fff\d]+', '', first_four_lines)
            return chinese_text
    except Exception as e:
        return None

def generate_valid_filename(text, max_length=50):
    return re.sub(r'[<>:"/\\|?*\x00-\x1F\x7F]+', '-', text[:max_length])

def generate_unique_filename(folder_path, filename, new_filename):
    base, extension = os.path.splitext(filename)
    counter = 1
    while True:
        new_filename = f"{base}-{new_filename}"
        new_path = os.path.join(folder_path, new_filename + extension)
        if not os.path.exists(new_path):
            return new_filename + extension
        counter += 1

def load_skipped_files(skipped_file_path):
    skipped_files = set()
    if os.path.exists(skipped_file_path):
        with open(skipped_file_path, 'r', encoding='utf-8') as file:
            skipped_files = {line.strip() for line in file.readlines()}
    return skipped_files

def record_skipped_file(skipped_file_path, filename):
    with open(skipped_file_path, 'a', encoding='utf-8') as file:
        file.write(filename + '\n')

def rename_pdf_with_title_or_extracted_text(folder_path, text_box, log_file_path, skipped_file_path):
    if not os.path.isdir(folder_path):
        update_text_box(text_box, "错误:提供的路径不是一个目录。")
        return

    skipped_files = load_skipped_files(skipped_file_path)

    update_text_box(text_box, f"正在处理文件夹:{folder_path}")
    walk_folder(folder_path, skipped_files, text_box, log_file_path, skipped_file_path)

    update_text_box(text_box, "所有文件改名已完成。")

def walk_folder(folder_path, skipped_files, text_box, log_file_path, skipped_file_path):
    for root, dirs, files in os.walk(folder_path):
        for filename in sorted(files):
            if filename.lower().endswith('.pdf') and filename not in skipped_files:
                pdf_path = os.path.join(root, filename)
                start_time = time.time()  # 记录开始时间
                try:
                    title = extract_title_efficiently(pdf_path)
                    if not title:
                        title = extract_chinese_text_from_first_four_lines(pdf_path)
                    if title:
                        new_filename = generate_valid_filename(title)
                        new_filename = generate_unique_filename(root, filename, new_filename)
                        os.rename(pdf_path, os.path.join(root, new_filename))
                        with open(log_file_path, 'a', encoding='utf-8') as log_file:
                            log_file.write(f"文件 '{filename}' 已重命名为 '{new_filename}'\n")
                        update_text_box(text_box, f"文件 '{filename}' 已重命名为 '{new_filename}'")
                    else:
                        record_skipped_file(skipped_file_path, filename)
                        update_text_box(text_box, f"跳过文件:'{filename}'(未找到标题或中文文本)")
                except Exception as e:
                    record_skipped_file(skipped_file_path, filename)
                    update_text_box(text_box, f"跳过文件:'{filename}'(处理时发生错误)")
                elapsed_time = time.time() - start_time  # 计算处理时间
                if elapsed_time > 10:  # 如果处理时间超过10s
                    record_skipped_file(skipped_file_path, filename)
                    update_text_box(text_box, f"跳过文件:'{filename}'(处理时间超过10s)")

def update_text_box(text_box, message):
    text_box.config(state='normal')
    text_box.insert(END, message + '\n')
    text_box.yview(END)  # 滚动到文本框的底部
    text_box.config(state='disabled')

def do_renaming_thread(folder_path, text_box, log_file_path, skipped_file_path):
    rename_pdf_with_title_or_extracted_text(folder_path, text_box, log_file_path, skipped_file_path)

# 创建主窗口
root = Tk()
root.title("PDF Renamer by barnett2016")

# 创建文本框用于显示消息
text_box = Text(root, height=10)
text_box.pack()

# 创建滚动条
scrollbar = Scrollbar(root, orient='vertical', command=text_box.yview)
scrollbar.pack(side='right', fill='y')

# 将滚动条与文本框连接
text_box.config(yscrollcommand=scrollbar.set)

# 存储选择的文件夹路径
selected_folder = None

def browse_folder():
    global selected_folder
    selected_folder = filedialog.askdirectory()
    if selected_folder:
        update_text_box(text_box, f"请选择的文件夹:{selected_folder}")

def do_renaming():
    if selected_folder:
        log_file_path = os.path.join(selected_folder, "改名记录.txt")
        skipped_file_path = os.path.join(selected_folder, "skipped.txt")
        renaming_thread = threading.Thread(target=do_renaming_thread, args=(selected_folder, text_box, log_file_path, skipped_file_path))
        renaming_thread.start()
    else:
        update_text_box(text_box, "错误:请先选择文件夹。")

# 创建按钮用于选择文件夹
browse_button = Button(root, text="打开PDF所在文件夹", command=browse_folder)
browse_button.pack()

# 创建按钮用于执行重命名操作
rename_button = Button(root, text="开始重命名PDF文件", command=do_renaming)
rename_button.pack()

# 启动GUI主循环
root.mainloop()

免费评分

参与人数 3吾爱币 +9 热心值 +3 收起 理由
feilon5555 + 1 + 1 热心回复!
苏紫方璇 + 7 + 1 欢迎分析讨论交流,吾爱破解论坛有你更精彩!
niandon + 1 + 1 我很赞同!

查看全部评分

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

沙发
cku 发表于 2024-11-18 16:39
发票也可重命名吗
3#
tyy474 发表于 2024-11-18 17:07
最麻烦就是高速发票,加油发票,数量多了,下载回来都是乱七八糟的名字,头都大了几圈
4#
YiRan777 发表于 2024-11-18 17:24
5#
HjiaLe 发表于 2024-11-18 17:45
tyy474 发表于 2024-11-18 17:07
最麻烦就是高速发票,加油发票,数量多了,下载回来都是乱七八糟的名字,头都大了几圈

那些用的话可能会乱码
6#
loveaqc 发表于 2024-11-18 18:01
HjiaLe 发表于 2024-11-18 17:45
那些用的话可能会乱码

好的谢谢
7#
qcf 发表于 2024-11-18 18:02
好厉害。。。。。
8#
cxx0515 发表于 2024-11-18 18:26
cku 发表于 2024-11-18 16:39
发票也可重命名吗

能获取文件名的应该都可以,发票可以根据字段来组合重命名
9#
x3622691 发表于 2024-11-18 19:20
非常好,改名方便了!
10#
3291952274 发表于 2024-11-18 19:58
谢谢大佬
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2024-11-24 16:00

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表