考勤数据导出来后，要一个个删除重复数据，Python搞定

Dontp · 发表于 2024-11-25 15:55

本帖最后由 Dontp 于 2024-11-28 20:36 编辑

中控考勤导出来以后，数据很多重复的，就用Python做了一个小工具，代码贴出来，可以随便更改，改字段就行：

修改了一下，支持了 xls，打包成了一个exe文件，

https://wwis.lanzouq.com/iK4XR2gkakih
密码:1jp2

[Asm] 纯文本查看 复制代码

import pandas as pd
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import Alignment
from datetime import datetime, timedelta
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import os

def select_file():
    file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx;*.xls")])
    if not file_path:
        messagebox.showinfo("提示", "未选择文件，程序退出。")
        return None
    return file_path

def process_data(file_path):
    # 根据文件扩展名选择合适的引擎
    if file_path.endswith('.xlsx'):
        engine = 'openpyxl'
    elif file_path.endswith('.xls'):
        engine = 'xlrd'
    else:
        raise ValueError(f"不支持的文件格式: {file_path}")

    df = pd.read_excel(file_path, engine=engine, parse_dates=['日期时间'])

    # 获取输入文件的名称（不包括扩展名）
    input_file_name = os.path.splitext(os.path.basename(file_path))[0]

    # 处理数据
    processed_data = []

    def format_timedelta(td):
        """将timedelta格式化为'小时:分钟:秒'"""
        hours, remainder = divmod(td.total_seconds(), 3600)
        minutes, seconds = divmod(remainder, 60)
        return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02}"

    def calculate_time_with_half_hour_rule(td):
        """计算时间，30分钟以内四舍五入到0.5小时"""
        hours = td.total_seconds() // 3600
        minutes = (td.total_seconds() % 3600) // 60
        
        if minutes < 30:
            return hours
        elif 30 <= minutes < 60:
            return hours + 0.5
        else:
            return (hours + 1) + 0.5

    def calculate_overtime(start_time, end_time):
        """计算加班时间，返回小时数"""
        end_of_workday = datetime.combine(start_time.date(), datetime.strptime("19:00:00", "%H:%M:%S").time())
        midnight = datetime.combine(start_time.date(), datetime.strptime("00:00:00", "%H:%M:%S").time())

        if end_time > end_of_workday:
            overtime_hours = 0
            if end_time > midnight:
                # 跨越两天的情况
                first_part = max(timedelta(0), midnight - end_of_workday)
                second_part = max(timedelta(0), end_time - (midnight + timedelta(days=1)))
                total_overtime = first_part + second_part
            else:
                # 没有跨越两天的情况
                total_overtime = max(timedelta(0), end_time - end_of_workday)
            overtime_hours = calculate_time_with_half_hour_rule(total_overtime)
            return max(0, overtime_hours)  # 确保加班时间非负
        return 0

    def calculate_adjustment(start_time):
        """计算调休时间"""
        if start_time.time() > datetime.strptime("10:00:00", "%H:%M:%S").time():
            start_of_workday = datetime.combine(start_time.date(), datetime.strptime("10:00:00", "%H:%M:%S").time())
            adjustment = (start_time - start_of_workday).total_seconds() / 3600
            adjustment = max(1, int(adjustment))  # 不满1小时按1小时计算
            if adjustment > 3:
                adjustment -= 1
            return adjustment
        return 0

    def is_weekend(date):
        """判断日期是否为周末"""
        return date.weekday() >= 5  # 5表示周六，6表示周日

    for name, group in df.groupby('姓名'):
        group_list = group.sort_values(by='日期时间').values.tolist()
        if len(group_list) > 1:
            start_time = group_list[0][2]
            end_time = group_list[-1][2]
            
            # 处理跨天的情况
            if end_time < start_time:
                end_time += timedelta(days=1)
            
            hours_worked = (end_time - start_time)
            hours_worked_hours = calculate_time_with_half_hour_rule(hours_worked)
            standard_hours = max(0, hours_worked_hours - 1)
            
            # 判断是否为周末
            if is_weekend(start_time.date()):
                overtime_hours = standard_hours
                overtime_hours_end = standard_hours
                excess_hours = standard_hours
                adjustment_hours = 0  # 周末不计算调休时间
            else:
                overtime_hours = max(0, standard_hours - 8)
                overtime_hours_end = calculate_overtime(start_time, end_time)
                excess_hours = standard_hours - 8  # 保留负值
                adjustment_hours = calculate_adjustment(start_time)
            
            # 合并所有数据到一个字典
            data_dict = {
                "姓名": name,
                "考勤号码": group_list[0][1],
                "开始时间": start_time,
                "结束时间": end_time,
                "工时(小时)": hours_worked_hours,
                "统计时间(小时:分钟:秒)": format_timedelta(hours_worked),
                "标准工时": standard_hours,
                "超标时间": excess_hours,
                "加班时间(小时)": overtime_hours_end,
                "调休": adjustment_hours
            }
            
            # 添加合并后的数据
            processed_data.append(data_dict)
        else:
            start_time = group_list[0][2]
            end_time = group_list[0][2]
            
            hours_worked = timedelta(seconds=0)
            hours_worked_hours = 0
            standard_hours = 0
            overtime_hours = 0
            overtime_hours_end = 0
            excess_hours = 0
            
            # 判断是否为周末
            if is_weekend(start_time.date()):
                adjustment_hours = 0  # 周末不计算调休时间
            else:
                adjustment_hours = calculate_adjustment(start_time)
            
            # 合并所有数据到一个字典
            data_dict = {
                "姓名": name,
                "考勤号码": group_list[0][1],
                "开始时间": start_time,
                "结束时间": end_time,
                "工时(小时)": hours_worked_hours,
                "统计时间(小时:分钟:秒)": "00:00:00",
                "标准工时": standard_hours,
                "超标时间": excess_hours,
                "加班时间(小时)": overtime_hours_end,
                "调休": adjustment_hours
            }
            
            # 添加合并后的数据
            processed_data.append(data_dict)

    # 转换为DataFrame
    result_df = pd.DataFrame(processed_data)

    # 创建一个新的Excel工作簿
    wb = Workbook()
    ws = wb.active

    # 写入表头
    header = ["姓名", "考勤号码", "开始时间", "结束时间", "工时(小时)", "统计时间(小时:分钟:秒)", "标准工时", "超标时间", "加班时间(小时)", "调休"]
    ws.append(header)

    # 写入数据
    current_row = 2
    for index, row in result_df.iterrows():
        ws.cell(row=current_row, column=1, value=row["姓名"])
        ws.cell(row=current_row, column=2, value=row["考勤号码"])
        ws.cell(row=current_row, column=3, value=row["开始时间"])
        ws.cell(row=current_row, column=4, value=row["结束时间"])
        ws.cell(row=current_row, column=5, value=row["工时(小时)"])
        ws.cell(row=current_row, column=6, value=row["统计时间(小时:分钟:秒)"])
        ws.cell(row=current_row, column=7, value=row["标准工时"])
        ws.cell(row=current_row, column=8, value=row["超标时间"])
        ws.cell(row=current_row, column=9, value=row["加班时间(小时)"])
        ws.cell(row=current_row, column=10, value=row["调休"])
        
        current_row += 1

    # 合并单元格
    merge_ranges = {}
    for row in ws.iter_rows(min_row=2, max_col=1, max_row=ws.max_row):
        for cell in row:
            if cell.value and cell.value == ws[cell.row - 1][0].value:
                if cell.value not in merge_ranges:
                    merge_ranges[cell.value] = [ws[cell.row - 1][0].row, cell.row]
                else:
                    merge_ranges[cell.value][1] = cell.row

    for name, (start_row, end_row) in merge_ranges.items():
        ws.merge_cells(start_row=start_row, start_column=1, end_row=end_row, end_column=1)
        ws.merge_cells(start_row=start_row, start_column=2, end_row=end_row, end_column=2)

    # 设置单元格对齐方式
    for row in ws.iter_rows(min_row=1, max_col=10, max_row=ws.max_row):
        for cell in row:
            cell.alignment = Alignment(wrap_text=True, horizontal='center', vertical='center')

    # 保存Excel文件
    output_file_path = f'{input_file_name}_processed_data_merged.xlsx'
    wb.save(output_file_path)

    # 打印结果
    print(result_df.to_string(index=False))
    return output_file_path

def on_process_button_click():
    file_path = select_file()
    if file_path:
        try:
            output_file_path = process_data(file_path)
            messagebox.showinfo("成功", f"处理完成，输出文件已保存为: {output_file_path}")
        except Exception as e:
            messagebox.showerror("错误", f"处理过程中发生错误: {str(e)}")

# 创建主窗口
root = tk.Tk()
root.title("考勤数据处理工具")
root.geometry("280x100")  # 设置窗口大小为280x100

# 设置窗口图标
icon_path = 'icon.ico'  # 图标文件路径
if os.path.exists(icon_path):
    root.iconphoto(False, tk.PhotoImage(file=icon_path))
else:
    print(f"警告: 图标文件 '{icon_path}' 不存在，将使用默认图标。")

# 创建选择文件按钮
select_file_button = ttk.Button(root, text="选择文件", command=on_process_button_click)
select_file_button.pack(pady=20)

# 添加作者信息
author_label = tk.Label(root, text="by Donpt", fg="gray")
author_label.pack(side=tk.BOTTOM, anchor=tk.SE, pady=5, padx=5)

# 运行主循环
root.mainloop()

结果是

jun269 · 发表于 2024-11-25 16:05

强大的楼主能给打包成UI界面的就更给力了，要不然这个玩意一般人玩不来的。

tryEverything · 发表于 2024-11-25 16:08

好用，方便多了。Ctrl+C

a7512482 · 发表于 2024-11-25 16:24

不错，省事

dazhaorpa · 发表于 2024-11-25 16:54

很实用啊，楼主给力

xhawkjw · 发表于 2024-11-25 16:56

很实用，不错

hhpurple · 发表于 2024-11-25 17:02

优秀，谢谢楼主分享

gc1368 · 发表于 2024-11-25 17:08

楼主威武，好东西多分享

JJarvis · 发表于 2024-11-25 17:10

考勤机上导出的数据怎么会重复呢

andyle · 发表于 2024-11-25 17:24

考勤数据重复有点无法理解

帐号		自动登录	找回密码
密码			注册[Register]

[Python 原创] 考勤数据导出来后，要一个个删除重复数据，Python搞定

免费评分