[Python] 纯文本查看 复制代码
import os
import pandas as pd
import tkinter as tk
from tkinter import filedialog, messagebox, Listbox, Scrollbar
from openpyxl import load_workbook
from openpyxl.styles import Alignment, PatternFill
# 定义处理Excel文件的函数
def process_excel_files(input_files, output_file):
# 初始化一个空字典,用于存储所有行索引和列索引
unique_rows = set()
unique_cols = set()
# 遍历所有Excel文件以收集所有唯一的行索引和列索引
for excel_file in input_files:
excel_file_path = os.path.join(os.getcwd(), excel_file)
try:
df = pd.read_excel(excel_file_path, index_col=0)
unique_rows.update(df.index)
unique_cols.update(df.columns)
except Exception as e:
print(f"Error reading {excel_file}: {e}")
continue
# 将集合转换为列表,以便可以作为DataFrame的索引和列
unique_rows = list(unique_rows)
unique_cols = list(unique_cols)
# 创建一个空的DataFrame,行索引和列索引分别设置为所有唯一的行索引和列索引
summary_df = pd.DataFrame(index=unique_rows, columns=unique_cols)
# 再次遍历所有Excel文件,填充汇总表
for excel_file in input_files:
excel_file_path = os.path.join(os.getcwd(), excel_file)
try:
df = pd.read_excel(excel_file_path, index_col=0)
except Exception as e:
print(f"Error reading {excel_file}: {e}")
continue
# 获取带扩展名的文件名
file_name_without_ext, file_extension = os.path.splitext(os.path.basename(excel_file))
file_name_with_ext = file_name_without_ext + file_extension
# 填充汇总表,为每个单元格的值添加文件名后缀
for idx in df.index:
for col in df.columns:
current_value = summary_df.at[idx, col]
new_value = df.at[idx, col]
if pd.isna(new_value):
continue
new_value_with_suffix = f"{new_value} * {file_name_with_ext}"
if pd.isna(current_value) or current_value == new_value_with_suffix:
summary_df.at[idx, col] = new_value_with_suffix
elif current_value != new_value_with_suffix:
summary_df.at[idx, col] = f"{current_value}‖r4kh5nV5zxSBDY92‖{new_value_with_suffix}"
# 处理DataFrame中的每个单元格
for idx in summary_df.index:
for col in summary_df.columns:
cell_value = str(summary_df.at[idx, col])
if '‖r4kh5nV5zxSBDY92‖' in cell_value:
values = cell_value.split('‖r4kh5nV5zxSBDY92‖')
unique_values = {v.rsplit(' * ', 1)[0] if ' * ' in v else v for v in values}
if len(unique_values) == 1:
summary_df.at[idx, col] = list(unique_values)[0]
# 去除不带‖r4kh5nV5zxSBDY92‖的单元格的文件名后缀
for idx in summary_df.index:
for col in summary_df.columns:
cell_value = summary_df.at[idx, col]
if pd.isna(cell_value):
continue
if isinstance(cell_value, str) and '‖r4kh5nV5zxSBDY92‖' not in cell_value:
parts = cell_value.rsplit(' * ', 1)
if len(parts) == 2:
summary_df.at[idx, col] = parts[0]
# 写入Excel文件,不包括格式
summary_df.to_excel(output_file, index=True, engine='openpyxl')
# 加载工作簿和工作表
workbook = load_workbook(output_file)
worksheet = workbook.active
# 遍历工作表中的所有单元格
for row in worksheet.iter_rows():
for cell in row:
if '‖r4kh5nV5zxSBDY92‖' in str(cell.value):
cell_value_list = cell.value.split('‖r4kh5nV5zxSBDY92‖')
cell.value = '\n'.join(cell_value_list)
cell.alignment = Alignment(wrap_text=True)
cell.fill = PatternFill(start_color="FFFF0000", end_color="FFFF0000", fill_type="solid")
# 保存工作簿
workbook.save(filename=output_file)
def process_excel_files_all(input_files, output_file):
# 初始化一个空字典,用于存储所有行索引和列索引
unique_rows = set()
unique_cols = set()
# 遍历所有Excel文件以收集所有唯一的行索引和列索引
for excel_file in input_files:
excel_file_path = os.path.join(os.getcwd(), excel_file)
try:
# 加载工作簿
workbook = load_workbook(excel_file_path)
# 遍历工作簿中的所有工作表
for sheet in workbook.sheetnames:
# 获取工作表数据
df = pd.read_excel(excel_file_path, sheet_name=sheet, index_col=0)
unique_rows.update(df.index)
unique_cols.update(df.columns)
except Exception as e:
print(f"Error reading {excel_file}: {e}")
continue
# 将集合转换为列表,以便可以作为DataFrame的索引和列
unique_rows = list(unique_rows)
unique_cols = list(unique_cols)
# 创建一个空的DataFrame,行索引和列索引分别设置为所有唯一的行索引和列索引
summary_df = pd.DataFrame(index=unique_rows, columns=unique_cols)
# 遍历所有Excel文件和工作表,填充汇总表
for excel_file in input_files:
excel_file_path = os.path.join(os.getcwd(), excel_file)
try:
workbook = load_workbook(excel_file_path)
for sheet in workbook.sheetnames:
# 重新读取当前工作表的数据
df = pd.read_excel(excel_file_path, sheet_name=sheet, index_col=0)
# 获取带扩展名的文件名和工作表名称
file_name_without_ext, file_extension = os.path.splitext(os.path.basename(excel_file))
sheet_name = sheet # 直接使用工作表名称
file_name_with_ext = f"{file_name_without_ext} - {sheet_name}{file_extension}"
# 填充汇总表,为每个单元格的值添加文件名和工作表名后缀
for idx in df.index:
for col in df.columns:
current_value = summary_df.at[idx, col]
new_value = df.at[idx, col]
if pd.isna(new_value):
continue
new_value_with_suffix = f"{new_value} * {file_name_with_ext}"
if pd.isna(current_value) or current_value == new_value_with_suffix:
summary_df.at[idx, col] = new_value_with_suffix
elif current_value != new_value_with_suffix:
summary_df.at[idx, col] = f"{current_value}‖r4kh5nV5zxSBDY92‖{new_value_with_suffix}"
except Exception as e:
print(f"Error reading {excel_file}: {e}")
continue
# 处理DataFrame中的每个单元格
for idx in summary_df.index:
for col in summary_df.columns:
cell_value = str(summary_df.at[idx, col])
if '‖r4kh5nV5zxSBDY92‖' in cell_value:
values = cell_value.split('‖r4kh5nV5zxSBDY92‖')
unique_values = {v.rsplit(' * ', 1)[0] if ' * ' in v else v for v in values}
if len(unique_values) == 1:
summary_df.at[idx, col] = list(unique_values)[0]
# 去除不带‖r4kh5nV5zxSBDY92‖的单元格的文件名后缀
for idx in summary_df.index:
for col in summary_df.columns:
cell_value = summary_df.at[idx, col]
if pd.isna(cell_value):
continue
if isinstance(cell_value, str) and '‖r4kh5nV5zxSBDY92‖' not in cell_value:
parts = cell_value.rsplit(' * ', 1)
if len(parts) == 2:
summary_df.at[idx, col] = parts[0]
# 写入Excel文件,不包括格式
summary_df.to_excel(output_file, index=True, engine='openpyxl')
# 加载工作簿和工作表
workbook = load_workbook(output_file)
worksheet = workbook.active
# 遍历工作表中的所有单元格
for row in worksheet.iter_rows():
for cell in row:
if '‖r4kh5nV5zxSBDY92‖' in str(cell.value):
cell_value_list = cell.value.split('‖r4kh5nV5zxSBDY92‖')
cell.value = '\n'.join(cell_value_list)
cell.alignment = Alignment(wrap_text=True)
cell.fill = PatternFill(start_color="FFFF0000", end_color="FFFF0000", fill_type="solid")
# 保存工作簿
workbook.save(filename=output_file)
# 创建GUI窗口
root = tk.Tk()
root.title("Excel 文件合并器1.0")
# 获取屏幕的尺寸,并计算窗口位置使其居中
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
width = 600
height = 300
x = (screen_width - width) // 2
y = (screen_height - height) // 2
root.geometry(f"{width}x{height}+{x}+{y}")
# 存储输入文件的全局变量
input_files = None
process_all_sheets = False # 新增变量,用于记录复选框状态
# 创建用于显示已选文件的Listbox和滚动条
file_listbox = Listbox(root, width=40, height=4) # 根据需要调整宽度和高度
file_scroll = Scrollbar(root, orient="vertical", command=file_listbox.yview) # 滚动条的command绑定到Listbox的yview
file_listbox.config(yscrollcommand=file_scroll.set) # Listbox的yscrollcommand绑定到滚动条的set
file_listbox.pack(side="left", fill="both", expand=True)
file_scroll.pack(side="right", fill="y")
# 定义选择输入文件的函数
def select_input_files():
global input_files
file_path = filedialog.askopenfilenames(filetypes=[("Excel files", "*.xlsx;*.xls")])
input_files = file_path if file_path else []
if input_files:
start_button.config(state='normal')
# 更新Listbox显示选中的文件
file_listbox.delete(0, tk.END) # 清空Listbox
for file in input_files:
file_listbox.insert(tk.END, file) # 插入文件路径到Listbox
# 创建输入文件选择按钮
input_button = tk.Button(root, text="选择输入文件", command=select_input_files)
input_button.pack()
# 创建复选框,让用户选择是否处理所有工作表
def toggle_checkbox():
global process_all_sheets
process_all_sheets = not process_all_sheets # 切换复选框状态
include_checkbox = tk.Checkbutton(root, text="处理所有工作表。不选处理每个excel的第一张表", command=toggle_checkbox)
include_checkbox.pack()
# 定义开始处理文件的函数
def start_process():
global input_files
if input_files:
output_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel files", "*.xlsx")])
if output_path:
# 根据复选框状态选择处理函数
if process_all_sheets:
process_excel_files_all(input_files, output_path)
else:
process_excel_files(input_files, output_path)
# 弹出完成消息框后重置状态
messagebox.showinfo("完成", "所有Excel文件已处理并汇总到一个Excel文件中。")
# 重置状态
input_files = None # 重置输入文件列表
file_listbox.delete(0, tk.END) # 清空Listbox
start_button.config(state='disabled') # 禁用开始处理按钮
include_checkbox.deselect() # 直接将复选框设置为未选中状态
else:
messagebox.showerror("错误", "请先选择输入文件。")
# 创建开始处理按钮(初始状态为禁用)
start_button = tk.Button(root, text="开始处理", command=start_process, state='disabled')
start_button.pack()
# 运行GUI主循环
root.mainloop()