[Python] 纯文本查看 复制代码
import tkinter as tk
from tkinter import filedialog, messagebox
from openpyxl import load_workbook
import os
import threading
from docx import Document
class WordDocumentGenerator:
def __init__(self, root):
self.root = root
self.root.title("Docx文件批量生成器")
self.root.geometry("560x750")
self.root.resizable(False, False)
self.stop_flag = False
# Excel 文件选择
tk.Label(self.root, text="选择 Excel 文件:").grid(row=0, column=0, padx=10, pady=10, sticky="e")
self.excel_entry = tk.Entry(self.root, width=50)
self.excel_entry.grid(row=0, column=1, padx=10, pady=10)
tk.Button(self.root, text="浏览", command=self.select_excel_file).grid(row=0, column=2, padx=10, pady=10)
# Word 模板选择
tk.Label(self.root, text="选择 Word 模板:").grid(row=1, column=0, padx=10, pady=10, sticky="e")
self.word_entry = tk.Entry(self.root, width=50)
self.word_entry.grid(row=1, column=1, padx=10, pady=10)
tk.Button(self.root, text="浏览", command=self.select_word_template).grid(row=1, column=2, padx=10, pady=10)
# 输出目录选择
tk.Label(self.root, text="选择输出目录:").grid(row=2, column=0, padx=10, pady=10, sticky="e")
self.output_entry = tk.Entry(self.root, width=50)
self.output_entry.grid(row=2, column=1, padx=10, pady=10)
tk.Button(self.root, text="浏览", command=self.select_output_directory).grid(row=2, column=2, padx=10, pady=10)
# 创建文件夹选项
self.folder_check_var = tk.BooleanVar()
tk.Checkbutton(self.root, text="是否生成文件夹", variable=self.folder_check_var).grid(row=3, column=0,
columnspan=3)
# 前缀和后缀输入
tk.Label(self.root, text="文件名前缀:").grid(row=4, column=0, padx=10, pady=10, sticky="e")
self.prefix_entry = tk.Entry(self.root, width=20)
self.prefix_entry.grid(row=4, column=1, padx=10, pady=10, sticky="w")
tk.Label(self.root, text="文件名后缀:").grid(row=4, column=1, padx=10, pady=10)
self.suffix_entry = tk.Entry(self.root, width=20)
self.suffix_entry.grid(row=4, column=1, padx=10, pady=10, sticky="e")
# 表头选择框(多选)
self.field_listbox_label = tk.Label(self.root, text="选择分组字段 \n(可以多选):")
self.field_listbox_label.grid(row=5, column=0, padx=10, pady=10, sticky="e")
self.field_listbox = tk.Listbox(self.root, selectmode=tk.MULTIPLE, height=10, width=50)
self.field_listbox.grid(row=5, column=1, padx=10, pady=10)
# 生成按钮和停止按钮居中
button_frame = tk.Frame(self.root)
button_frame.grid(row=6, column=0, columnspan=3)
# 设置列的权重,使其均匀分布
button_frame.grid_columnconfigure(0, weight=1)
button_frame.grid_columnconfigure(1, weight=1)
tk.Button(button_frame, text="生成文档", command=self.generate_word_docs).grid(row=0, column=0, padx=10,
pady=10)
tk.Button(button_frame, text="停止生成", command=self.stop_generating).grid(row=0, column=1, padx=10, pady=10)
# 日志文本框
self.log_text = tk.Text(self.root, height=19, width=70)
self.log_text.grid(row=7, column=0, columnspan=3, padx=10, pady=10)
self.log_text.config(state=tk.NORMAL)
self.log_text.insert(tk.END,
"1.Excel中的字段名要加上{}放到Word文件中。例如字段 Excel中要填充的字段为:交易卡号,在Word中以{交易卡号}填写。\n\n")
self.log_text.insert(tk.END,
"2.EXCEL第一列字段的值为生成的文件夹名字和默认的Word文档名字,Word文档名可以后面添加前缀和后缀。\n\n")
self.log_text.insert(tk.END,
"3.如有多行数据需要分组处理,指定分组字段,对数据进行多行分组处理,从而生成具有特定分组内容的文档。\n\n")
self.log_text.insert(tk.END,
"例如:张三有3个账号,那么和张三相关的字段(每行都不变的)需要选择分组,比如身份证号,姓名等,未分组的字段会自动根据模板生成多段内容。比如账号1、账号2、账号3\n")
def select_excel_file(self):
file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx")])
self.excel_entry.delete(0, tk.END)
self.excel_entry.insert(0, file_path)
# 加载Excel字段并显示在Listbox中
self.load_excel_fields(file_path)
def select_word_template(self):
file_path = filedialog.askopenfilename(filetypes=[("Word files", "*.docx")])
self.word_entry.delete(0, tk.END)
self.word_entry.insert(0, file_path)
def select_output_directory(self):
directory = filedialog.askdirectory()
self.output_entry.delete(0, tk.END)
self.output_entry.insert(0, directory)
def load_excel_fields(self, excel_file):
wb = load_workbook(excel_file, data_only=True)
sheet = wb.active
field_names = []
for col in range(1, sheet.max_column + 1):
field_name = sheet.cell(row=1, column=col).value
if field_name:
field_names.append(field_name)
# 清空现有的选择框,并填充新的字段名
self.field_listbox.delete(0, tk.END)
for field_name in field_names:
self.field_listbox.insert(tk.END, field_name)
def generate_word_docs(self):
self.stop_flag = False
excel_file = self.excel_entry.get()
word_template = self.word_entry.get()
output_dir = self.output_entry.get()
create_folders = self.folder_check_var.get()
prefix = self.prefix_entry.get()
suffix = self.suffix_entry.get()
# 获取用户选择的分组字段
selected_fields = [self.field_listbox.get(i) for i in self.field_listbox.curselection()]
if excel_file and word_template and output_dir:
self.log_text.delete(1.0, tk.END)
threading.Thread(target=self.fill_word_from_excel,
args=(excel_file, word_template, output_dir, create_folders, prefix, suffix,
selected_fields)).start()
else:
messagebox.showwarning("警告", "请先选择 Excel 文件、Word 模板和输出目录!")
def stop_generating(self):
self.stop_flag = True
def fill_word_from_excel(self, excel_file, word_template, output_dir, create_folders, prefix, suffix,
selected_fields):
self.log_text.insert(tk.END, "数据读取中...\n")
self.log_text.see(tk.END)
wb = load_workbook(excel_file, data_only=True)
sheet = wb.active
field_names = []
field_columns = {}
for col in range(1, sheet.max_column + 1):
field_name = sheet.cell(row=1, column=col).value
if field_name:
field_names.append(field_name)
field_columns[field_name] = col
document_count = 0
# 判断 Word 模板类型
is_docx = word_template.lower().endswith('.docx')
# 创建 Word 应用(在主线程中)
if is_docx:
doc_class = Document
# 获取选定的分组字段
group_field_names = selected_fields if selected_fields else [field_names[0]] # 默认使用第一列
# 获取第一列去重后的值(作为文件夹和文件名)
first_column_values = [sheet.cell(row=row, column=1).value for row in range(2, sheet.max_row + 1)]
unique_values = list(set(first_column_values))
for group_value in unique_values:
grouped = {}
# 按分组字段进行分组
for row in range(2, sheet.max_row + 1):
group_value_from_row = str(sheet.cell(row=row, column=1).value) # 获取第一列的值
if group_value_from_row != group_value:
continue
row_data = {}
for field_name in field_names:
column_index = field_columns[field_name]
row_data[field_name] = sheet.cell(row=row, column=column_index).value
# 用分组字段(例如 "交易卡号", "开户银行")组合作为键
group_key = tuple(row_data[field] for field in group_field_names)
if group_key not in grouped:
grouped[group_key] = []
grouped[group_key].append(row_data)
# 生成文件夹和 Word 文档
folder_path = output_dir
if create_folders:
folder_path = os.path.join(output_dir, group_value)
os.makedirs(folder_path, exist_ok=True)
if is_docx:
doc = doc_class(word_template)
# 获取模板中的每个段落
def replace_placeholders(paragraph, row_data):
for field_name, value in row_data.items():
placeholder = f"{{{field_name}}}"
inline_replacements = []
for run in paragraph.runs:
if placeholder in run.text:
field_value = 'null' if value is None else str(value).strip()
new_text = run.text.replace(placeholder, field_value)
inline_replacements.append((run, new_text))
for run, new_text in inline_replacements:
run.text = new_text
# 获取模板中包含未分组字段的段落
def get_paragraphs_with_non_group_fields(doc, non_group_fields):
paragraphs_to_copy = []
for paragraph in doc.paragraphs:
if any(f"{{{field}}}" in paragraph.text for field in non_group_fields):
paragraphs_to_copy.append(paragraph)
return paragraphs_to_copy
# 获取所有包含未分组字段的段落
non_group_fields = [field for field in field_names if field not in selected_fields]
template_paragraphs_with_non_group_fields = get_paragraphs_with_non_group_fields(doc, non_group_fields)
# 处理每个分组
for group_key, rows in grouped.items():
first_row_data = rows[0] # 第一组数据
# 计算新增段落的数量
num_new_paragraphs = len(rows) - 1 # 如果组内有2行数据,则新增1个段落
# 找到最后一个包含非分组字段的段落
last_non_group_paragraph = None
for paragraph in doc.paragraphs:
if any(f"{{{field}}}" in paragraph.text for field in non_group_fields):
last_non_group_paragraph = paragraph
# 新增段落,从第二组开始
for i in range(1, num_new_paragraphs + 1):
# 新增一段并复制模板中的段落(保留占位符)
for template_paragraph in template_paragraphs_with_non_group_fields:
new_paragraph = doc.add_paragraph()
# 复制段落的对齐方式、样式等
new_paragraph.alignment = template_paragraph.alignment
new_paragraph.style = template_paragraph.style
new_paragraph.paragraph_format.left_indent = template_paragraph.paragraph_format.left_indent
new_paragraph.paragraph_format.right_indent = template_paragraph.paragraph_format.right_indent
new_paragraph.paragraph_format.first_line_indent = template_paragraph.paragraph_format.first_line_indent
new_paragraph.paragraph_format.space_after = template_paragraph.paragraph_format.space_after
new_paragraph.paragraph_format.space_before = template_paragraph.paragraph_format.space_before
new_paragraph.paragraph_format.line_spacing = template_paragraph.paragraph_format.line_spacing
# 复制段落中的每个 run(保留文本和格式)
for run in template_paragraph.runs:
new_run = new_paragraph.add_run(run.text)
new_run.bold = run.bold
new_run.italic = run.italic
new_run.underline = run.underline
new_run.font.size = run.font.size
new_run.font.color.rgb = run.font.color.rgb
# 替换占位符,按分组数据进行替换
replace_placeholders(new_paragraph, rows[i]) # 使用rows[i]对应的组内数据来替换
# 找到插入位置:每次从一个非分组段落复制,那么插入位置就是复制段落后面
template_paragraph._element.addnext(new_paragraph._element) # 插入到当前段落后面
# 替换原始段落,使用第一组数据
for paragraph in doc.paragraphs:
replace_placeholders(paragraph, first_row_data)
# 处理表格数据
for table in doc.tables:
for table_row in table.rows:
for cell in table_row.cells:
for paragraph in cell.paragraphs:
replace_placeholders(paragraph, first_row_data)
# 删除原始的含有未替换段落
for paragraph in doc.paragraphs:
if any(f"{{{field}}}" in paragraph.text for field in non_group_fields):
paragraph._element.getparent().remove(paragraph._element)
# 保存文件
output_file = os.path.join(folder_path, f"{prefix}{group_value}{suffix}.docx")
doc.save(output_file)
# 在文本框中显示生成的文件名
self.log_text.insert(tk.END, f"已生成文件:{output_file}\n")
self.log_text.see(tk.END)
document_count += 1
messagebox.showinfo("完成", f"所有 Word 文档已生成完成!共生成了 {document_count} 个文档。")
if messagebox.askyesno("打开输出目录", "是否打开输出目录?"):
os.startfile(output_dir)
if __name__ == "__main__":
root = tk.Tk()
app = WordDocumentGenerator(root)
root.mainloop()