一个合并Excel的小工具，把所有表行列标题内容合并到新表格

858983646 · 发表于 2024-6-5 23:00

本帖最后由 858983646 于 2024-6-10 11:04 编辑

完全用kimi写的，Python基础都还没看几个，编译出来40m有点大不会继续缩小了

链接:https://pan.baidu.com/s/1c7wkjZ0xCd5n3OowY5017A 提取码:c9wm 密码52pojie
32位的exe https://wwz.lanzn.com/il2bh20yzfli 密码:gz4b

更新了下，加了个运行完打开输出文件夹，链接: https://pan.baidu.com/s/1IqBMvY2tuibtzbplveZv_w?pwd=ycwn 提取码: ycwn
可以把多个Excel表合并到一张表，先是把所有行标题和列标题汇总，做成一个新表，
然后每个单元格内容根据对应行列标题名字填入对应总表单元格
要求每个表都是第一行和第一列是名标题，不同表每一个第一行和列标题同名就表示一个东西
ps1合并后有多个完全相同内容，则只显示一个，若多个内容不完全相同，则全部显示，并标明来源表且标红
2.打钩表示每个文件里的每张表都合并，不打勾表示只有每个文件的第一张才合并
Screenshot_2024-06-06-00-23-16-105_com.realvnc.viewer.android-edit.jpg

Screenshot_2024-06-06-00-23-16-105_com.realvnc.viewer.android-edit.jpg

效果如图

源码见下面

[Python] 纯文本查看 复制代码

import os
import pandas as pd
import tkinter as tk
from tkinter import filedialog, messagebox, Listbox, Scrollbar
from openpyxl import load_workbook
from openpyxl.styles import Alignment, PatternFill

# 定义处理Excel文件的函数
def process_excel_files(input_files, output_file):
    # 初始化一个空字典，用于存储所有行索引和列索引
    unique_rows = set()
    unique_cols = set()

    # 遍历所有Excel文件以收集所有唯一的行索引和列索引
    for excel_file in input_files:
        excel_file_path = os.path.join(os.getcwd(), excel_file)
        try:
            df = pd.read_excel(excel_file_path, index_col=0)
            unique_rows.update(df.index)
            unique_cols.update(df.columns)
        except Exception as e:
            print(f"Error reading {excel_file}: {e}")
            continue

    # 将集合转换为列表，以便可以作为DataFrame的索引和列
    unique_rows = list(unique_rows)
    unique_cols = list(unique_cols)

    # 创建一个空的DataFrame，行索引和列索引分别设置为所有唯一的行索引和列索引
    summary_df = pd.DataFrame(index=unique_rows, columns=unique_cols)

    # 再次遍历所有Excel文件，填充汇总表
    for excel_file in input_files:
        excel_file_path = os.path.join(os.getcwd(), excel_file)
        try:
            df = pd.read_excel(excel_file_path, index_col=0)
        except Exception as e:
            print(f"Error reading {excel_file}: {e}")
            continue

        # 获取带扩展名的文件名
        file_name_without_ext, file_extension = os.path.splitext(os.path.basename(excel_file))
        file_name_with_ext = file_name_without_ext + file_extension
        # 填充汇总表，为每个单元格的值添加文件名后缀
        for idx in df.index:
            for col in df.columns:
                current_value = summary_df.at[idx, col]
                new_value = df.at[idx, col]

                if pd.isna(new_value):
                    continue

                new_value_with_suffix = f"{new_value} * {file_name_with_ext}"

                if pd.isna(current_value) or current_value == new_value_with_suffix:
                    summary_df.at[idx, col] = new_value_with_suffix
                elif current_value != new_value_with_suffix:
                    summary_df.at[idx, col] = f"{current_value}‖r4kh5nV5zxSBDY92‖{new_value_with_suffix}"

    # 处理DataFrame中的每个单元格
    for idx in summary_df.index:
        for col in summary_df.columns:
            cell_value = str(summary_df.at[idx, col])
            if '‖r4kh5nV5zxSBDY92‖' in cell_value:
                values = cell_value.split('‖r4kh5nV5zxSBDY92‖')
                unique_values = {v.rsplit(' * ', 1)[0] if ' * ' in v else v for v in values}
                if len(unique_values) == 1:
                    summary_df.at[idx, col] = list(unique_values)[0]

    # 去除不带‖r4kh5nV5zxSBDY92‖的单元格的文件名后缀
    for idx in summary_df.index:
        for col in summary_df.columns:
            cell_value = summary_df.at[idx, col]
            if pd.isna(cell_value):
                continue
            if isinstance(cell_value, str) and '‖r4kh5nV5zxSBDY92‖' not in cell_value:
                parts = cell_value.rsplit(' * ', 1)
                if len(parts) == 2:
                    summary_df.at[idx, col] = parts[0]

    # 写入Excel文件，不包括格式
    summary_df.to_excel(output_file, index=True, engine='openpyxl')

    # 加载工作簿和工作表
    workbook = load_workbook(output_file)
    worksheet = workbook.active

    # 遍历工作表中的所有单元格
    for row in worksheet.iter_rows():
        for cell in row:
            if '‖r4kh5nV5zxSBDY92‖' in str(cell.value):
                cell_value_list = cell.value.split('‖r4kh5nV5zxSBDY92‖')
                cell.value = '\n'.join(cell_value_list)
                cell.alignment = Alignment(wrap_text=True)
                cell.fill = PatternFill(start_color="FFFF0000", end_color="FFFF0000", fill_type="solid")

    # 保存工作簿
    workbook.save(filename=output_file)
    
def process_excel_files_all(input_files, output_file):
    # 初始化一个空字典，用于存储所有行索引和列索引
    unique_rows = set()
    unique_cols = set()

    # 遍历所有Excel文件以收集所有唯一的行索引和列索引
    for excel_file in input_files:
        excel_file_path = os.path.join(os.getcwd(), excel_file)
        try:
            # 加载工作簿
            workbook = load_workbook(excel_file_path)
            # 遍历工作簿中的所有工作表
            for sheet in workbook.sheetnames:
                # 获取工作表数据
                df = pd.read_excel(excel_file_path, sheet_name=sheet, index_col=0)
                unique_rows.update(df.index)
                unique_cols.update(df.columns)
        except Exception as e:
            print(f"Error reading {excel_file}: {e}")
            continue

    # 将集合转换为列表，以便可以作为DataFrame的索引和列
    unique_rows = list(unique_rows)
    unique_cols = list(unique_cols)

    # 创建一个空的DataFrame，行索引和列索引分别设置为所有唯一的行索引和列索引
    summary_df = pd.DataFrame(index=unique_rows, columns=unique_cols)

    # 遍历所有Excel文件和工作表，填充汇总表
    for excel_file in input_files:
        excel_file_path = os.path.join(os.getcwd(), excel_file)
        try:
            workbook = load_workbook(excel_file_path)
            for sheet in workbook.sheetnames:
                # 重新读取当前工作表的数据
                df = pd.read_excel(excel_file_path, sheet_name=sheet, index_col=0)

                # 获取带扩展名的文件名和工作表名称
                file_name_without_ext, file_extension = os.path.splitext(os.path.basename(excel_file))
                sheet_name = sheet  # 直接使用工作表名称
                file_name_with_ext = f"{file_name_without_ext} - {sheet_name}{file_extension}"

                # 填充汇总表，为每个单元格的值添加文件名和工作表名后缀
                for idx in df.index:
                    for col in df.columns:
                        current_value = summary_df.at[idx, col]
                        new_value = df.at[idx, col]

                        if pd.isna(new_value):
                            continue

                        new_value_with_suffix = f"{new_value} * {file_name_with_ext}"

                        if pd.isna(current_value) or current_value == new_value_with_suffix:
                            summary_df.at[idx, col] = new_value_with_suffix
                        elif current_value != new_value_with_suffix:
                            summary_df.at[idx, col] = f"{current_value}‖r4kh5nV5zxSBDY92‖{new_value_with_suffix}"

        except Exception as e:
            print(f"Error reading {excel_file}: {e}")
            continue

    # 处理DataFrame中的每个单元格
    for idx in summary_df.index:
        for col in summary_df.columns:
            cell_value = str(summary_df.at[idx, col])
            if '‖r4kh5nV5zxSBDY92‖' in cell_value:
                values = cell_value.split('‖r4kh5nV5zxSBDY92‖')
                unique_values = {v.rsplit(' * ', 1)[0] if ' * ' in v else v for v in values}
                if len(unique_values) == 1:
                    summary_df.at[idx, col] = list(unique_values)[0]

    # 去除不带‖r4kh5nV5zxSBDY92‖的单元格的文件名后缀
    for idx in summary_df.index:
        for col in summary_df.columns:
            cell_value = summary_df.at[idx, col]
            if pd.isna(cell_value):
                continue
            if isinstance(cell_value, str) and '‖r4kh5nV5zxSBDY92‖' not in cell_value:
                parts = cell_value.rsplit(' * ', 1)
                if len(parts) == 2:
                    summary_df.at[idx, col] = parts[0]

    # 写入Excel文件，不包括格式
    summary_df.to_excel(output_file, index=True, engine='openpyxl')

    # 加载工作簿和工作表
    workbook = load_workbook(output_file)
    worksheet = workbook.active

    # 遍历工作表中的所有单元格
    for row in worksheet.iter_rows():
        for cell in row:
            if '‖r4kh5nV5zxSBDY92‖' in str(cell.value):
                cell_value_list = cell.value.split('‖r4kh5nV5zxSBDY92‖')
                cell.value = '\n'.join(cell_value_list)
                cell.alignment = Alignment(wrap_text=True)
                cell.fill = PatternFill(start_color="FFFF0000", end_color="FFFF0000", fill_type="solid")

    # 保存工作簿
    workbook.save(filename=output_file)


# 创建GUI窗口
root = tk.Tk()
root.title("Excel 文件合并器1.0")

# 获取屏幕的尺寸，并计算窗口位置使其居中
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
width = 600
height = 300
x = (screen_width - width) // 2
y = (screen_height - height) // 2
root.geometry(f"{width}x{height}+{x}+{y}")

# 存储输入文件的全局变量
input_files = None
process_all_sheets = False  # 新增变量，用于记录复选框状态

# 创建用于显示已选文件的Listbox和滚动条
file_listbox = Listbox(root, width=40, height=4)  # 根据需要调整宽度和高度
file_scroll = Scrollbar(root, orient="vertical", command=file_listbox.yview)  # 滚动条的command绑定到Listbox的yview
file_listbox.config(yscrollcommand=file_scroll.set)  # Listbox的yscrollcommand绑定到滚动条的set
file_listbox.pack(side="left", fill="both", expand=True)
file_scroll.pack(side="right", fill="y")

# 定义选择输入文件的函数
def select_input_files():
    global input_files
    file_path = filedialog.askopenfilenames(filetypes=[("Excel files", "*.xlsx;*.xls")])
    input_files = file_path if file_path else []
    if input_files:
        start_button.config(state='normal')
        # 更新Listbox显示选中的文件
        file_listbox.delete(0, tk.END)  # 清空Listbox
        for file in input_files:
            file_listbox.insert(tk.END, file)  # 插入文件路径到Listbox

# 创建输入文件选择按钮
input_button = tk.Button(root, text="选择输入文件", command=select_input_files)
input_button.pack()

# 创建复选框，让用户选择是否处理所有工作表
def toggle_checkbox():
    global process_all_sheets
    process_all_sheets = not process_all_sheets  # 切换复选框状态

include_checkbox = tk.Checkbutton(root, text="处理所有工作表。不选处理每个excel的第一张表", command=toggle_checkbox)
include_checkbox.pack()

# 定义开始处理文件的函数
def start_process():
    global input_files
    if input_files:
        output_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel files", "*.xlsx")])
        if output_path:
            # 根据复选框状态选择处理函数
            if process_all_sheets:
                process_excel_files_all(input_files, output_path)
            else:
                process_excel_files(input_files, output_path)
            # 弹出完成消息框后重置状态
            messagebox.showinfo("完成", "所有Excel文件已处理并汇总到一个Excel文件中。")
            # 重置状态
            input_files = None  # 重置输入文件列表
            file_listbox.delete(0, tk.END)  # 清空Listbox
            start_button.config(state='disabled')  # 禁用开始处理按钮
            include_checkbox.deselect()  # 直接将复选框设置为未选中状态
    else:
        messagebox.showerror("错误", "请先选择输入文件。")

# 创建开始处理按钮（初始状态为禁用）
start_button = tk.Button(root, text="开始处理", command=start_process, state='disabled')
start_button.pack()

# 运行GUI主循环
root.mainloop()

858983646 · 发表于 2024-6-14 20:28

xinke-qd 发表于 2024-6-14 14:22
我录了屏，您看一下什么问题：https://www.alipan.com/s/LBDKkhVLrt3

我也不太懂，都ai写的，你直接运行源码试试，1234依次安装运行就好，exe安装要Add python.exe to PATH也打钩
 链接:https://pan.baidu.com/s/1MptQ4O5W6vg1mnILqzWQMA 提取码:099v--来自百度网盘超级会员的分享

858983646 · 发表于 2024-6-6 16:52

freedover 发表于 2024-6-6 16:19
这个是什么场景的需求

我工作中中遇到这边发来的表格是某些部门的某些数据，另一边发来的是另外一些部门的另外某些数据，有重复也有不重复的，很多张看的要晕了，就做了这个直接合并

showhand · 发表于 2024-6-6 16:17

感谢分享，功能不错！

freedover · 发表于 2024-6-6 16:19

这个是什么场景的需求

12394613643 · 发表于 2024-6-6 16:51

原创的看看好不好去

xuming0315 · 发表于 2024-6-6 17:21

挺牛的表格合并啊。

PEKO16 · 发表于 2024-6-6 17:32

PQ就可以实现了 1行M语言完事

858983646 · 发表于 2024-6-6 17:50

PEKO16 发表于 2024-6-6 17:32
PQ就可以实现了 1行M语言完事

谢谢，pq没了解过，有空去看看

psj736213 · 发表于 2024-6-7 04:21

提示: 作者被禁止或删除内容自动屏蔽

zd8028122 · 发表于 2024-6-7 08:14

楼主，打开提示No error messages generated。F0rmatMessageW：参数错误

帐号		自动登录	找回密码
密码			注册[Register]

[原创工具] 一个合并Excel的小工具，把所有表行列标题内容合并到新表格

免费评分

psj736213 psj736213 当前离线好友阅读权限 0 听众最后登录 1970-1-1 头像被屏蔽	psj736213 发表于 2024-6-7 04:21 提示: 作者被禁止或删除内容自动屏蔽

	回复支持举报