python完全拷贝源EXCEL的内容和格式到目标EXCEL的尾部

dreamrise 发表于 2024-5-20 08:57

# 完全拷贝源EXCEL的内容和格式到目标EXCEL的尾部

```python
import openpyxl as op
import copy
import re
from collections import OrderedDict
from openpyxl.utils import get_column_letter, column_index_from_string
from openpyxl.utils.cell import coordinate_from_string

def fullcopy_sheet_s2t(s_wb, t_wb, s_ws=None, t_ws=None):
"""完全拷贝源EXCEL的内容和格式到目标EXCEL的尾部

Args:
   s_wb (_type_): 源工作簿
   t_wb (_type_): 目标工作簿
   s_ws (_type_, optional): 源工作表. Defaults to None.
   t_ws (_type_, optional): 目标工作表. Defaults to None.
"""

def get_effective_data_range(wb, sheet_name=None):
   """获取工作簿(第一个或指定)工作表的数据范围

   Args:
         wb (_type_): 工作表
         sheet_name (_type_, optional): 工作表名. Defaults to None.

   Returns:
         _type_: 开始CELL坐标,结束CELL坐标,有效数据行数
   """
   # 获取指定工作表
   if sheet_name is None:
         ws = wb]
   else:
         ws = wb

   cells =
   # print("🚀 ~ file: copyxlsx240517.py:199 ~ _list:", cells)
   if cells:
         start = cells
         # print("🚀 ~ file: copyxlsx240517.py:201 ~ cells:", cells)
         end = cells[-1]
         # print("🚀 ~ file: copyxlsx240517.py:203 ~ cells[-1]:", cells[-1])
         start_coordinate = start.coordinate
         end_coordinate = end.coordinate
         # 计算有效数据行数（不包括空行）
         data_row_count = end.row - start.row + 1
   else:
         # 空表格
         start_coordinate = None
         end_coordinate = None
         data_row_count = 0

   # 返回结果
   return (start_coordinate, end_coordinate, data_row_count)

def get_newarea(start_coordinate, end_coordinate, current_row, newrows):
   """根据原始坐标, 开始行, 新增行数，生成新增数据的目标区域

   Args:
         start_coordinate (_type_): 开始坐标
         end_coordinate (_type_): 结束坐标
         current_row (_type_): 开始行
         newrows (_type_): 新增行数

   Returns:
         _type_: _description_
   """
   start_col, start_row = coordinate_from_string(start_coordinate)
   end_col, end_row = coordinate_from_string(end_coordinate)
   new_start_row = start_row + current_row
   new_end_row = end_row + current_row
   new_start_col = start_col
   new_end_col = end_col
   # print(new_start_col + str(new_start_row), new_end_col + str(new_end_row))
   return new_start_col + str(new_start_row), new_end_col + str(new_end_row)

if s_ws is None:
   s_ws = s_wb.sheetnames
if t_ws is None:
   t_ws = s_ws

ws_from = s_wb
if t_ws not in t_wb.sheetnames:
   ws_to = t_wb.create_sheet(t_ws)
else:
   ws_to = t_wb

# 删除默认创建的空sheet
if s_ws != "Sheet" and "Sheet" in t_wb.sheetnames:
   del t_wb["Sheet"]

_, _, lastrow = get_effective_data_range(t_wb)

start_coordinate, end_coordinate, data_row_count = get_effective_data_range(s_wb)

s_range = f"{start_coordinate}:{end_coordinate}"

targe_start_coordinate, targe_end_coordinate = get_newarea(
   start_coordinate, end_coordinate, lastrow, data_row_count
)

t_range = f"{targe_start_coordinate}:{targe_end_coordinate}"

ws_to.sheet_properties.tabColor = ws_from.sheet_properties.tabColor
if s_range is not None:
   source_area = ws_from
else:
   source_area = ws_from

merge_cell_dict = OrderedDict()
merged_ranges = ws_from.merged_cells.ranges
for source_row in source_area:
   for source_cell in source_row:
         sc_str = str(source_cell)
         point_time = sc_str.count(".")
         sc_str = sc_str.replace(".", "", point_time - 1)
         start = sc_str.find(".")
         sc_str = sc_str
         for merged_range in merged_ranges:
            if source_cell.coordinate in merged_range:
               _cell_value = ws_from.cell(
                     row=merged_range.min_row, column=merged_range.min_col
               )
               merge_cell_dict = (
                     merged_range.min_row,
                     merged_range.min_col,
                     _cell_value,
               )
               continue
range_li = []
for val in set(merge_cell_dict.values()):
   tmp = []
   for x, y in merge_cell_dict.items():
         if y == val:
            tmp.append(x)
   if len(tmp):
         range_li.append(min(tmp) + ":" + max(tmp))

for i in range_li:
   # print(i)
   if s_range is not None:
         base_point_letter = s_range.split(":")
         base_point = ws_from
         base_row = base_point.row
         base_col = base_point.column
   else:
         base_point_letter = i.split(":")
         base_point = ws_from
         base_row = base_point.row
         base_col = base_point.column
   s = i.split(":")
   e = i.split(":")
   # 模板区间第一个点相对顶点距离
   base_delta_row = ws_from.row - base_row
   base_delta_col = ws_from.column - base_col
   # 模板区间两个端点距离
   delta_row = ws_from.row - ws_from.row
   delta_col = ws_from.column - ws_from.column
   # print(base_delta_row, base_delta_col, delta_row, delta_col)
   if t_range is not None:
         tar_s = t_range.split(":")
         tar_s_letter = re.findall(r"(+)", tar_s)
         tar_base_col_idx = column_index_from_string(tar_s_letter)
         tar_base_row_idx = int(re.findall(r"(\d+)", tar_s))
   else:
         tar_s = s
         tar_s_letter = re.findall(r"(+)", tar_s)
         tar_base_col_idx = column_index_from_string(tar_s_letter)
         tar_base_row_idx = int(re.findall(r"(\d+)", tar_s))
   # print(tar_base_row_idx, tar_base_col_idx)
   tar_range_s_col = get_column_letter(tar_base_col_idx + base_delta_col)
   tar_range_s_idx = tar_base_row_idx + base_delta_row
   tar_range_e_col = get_column_letter(
         tar_base_col_idx + base_delta_col + delta_col
   )
   tar_range_e_idx = tar_base_row_idx + base_delta_row + delta_row
   tar_merge = (
         tar_range_s_col
         + str(tar_range_s_idx)
         + ":"
         + tar_range_e_col
         + str(tar_range_e_idx)
   )
   # print('tar merge:', tar_merge)
   ws_to.merge_cells(tar_merge)

if s_range is not None and t_range is not None:
   source_point_letter = s_range.split(":")
   source_point = ws_from
   source_row = source_point.row
   source_col = source_point.column

   tar_point_letter = t_range.split(":")
   tar_point = ws_from
   tar_row = tar_point.row
   tar_col = tar_point.column

   delta_row = tar_row - source_row
   delta_col = tar_col - source_col
   print("ROW:", tar_row, source_row)
   print("COL:", tar_col, source_col)
else:
   delta_row = 0
   delta_col = 0
# print("DELTA ROW COL:", delta_row, delta_col)

for source_row in source_area:
   update_row_h = False
   for source_cell in source_row:
         source_x = source_cell.row
         new_x = source_x + delta_row
         source_y = source_cell.column
         new_y = source_y + delta_col

         if not update_row_h:
            ws_to.row_dimensions.height = ws_from.row_dimensions[
               source_x
            ].height
            update_row_h = True

         ws_to.column_dimensions[
            get_column_letter(new_y)
         ].width = ws_from.column_dimensions.width

         ws_to.cell(row=new_x, column=new_y, value=source_cell.value)

         # 设置单元格格式
         target_cell = ws_to.cell(new_x, new_y)
         target_cell.fill = copy.copy(source_cell.fill)
         if source_cell.has_style:
            target_cell._style = copy.copy(source_cell._style)
            target_cell.font = copy.copy(source_cell.font)
            target_cell.border = copy.copy(source_cell.border)
            target_cell.fill = copy.copy(source_cell.fill)
            target_cell.number_format = copy.copy(source_cell.number_format)
            target_cell.protection = copy.copy(source_cell.protection)
            target_cell.alignment = copy.copy(source_cell.alignment)

s_wb.close()
t_wb.close()

targe_wb = op.Workbook()# 新建一个EXCEL
source_wb1 = op.load_workbook("head.xlsx")
source_wb2 = op.load_workbook("data.xlsx")
source_wb3 = op.load_workbook("tail.xlsx")

fullcopy_sheet_s2t(source_wb1, targe_wb)
fullcopy_sheet_s2t(source_wb2, targe_wb)
fullcopy_sheet_s2t(source_wb3, targe_wb)

targe_wb.save(filename="all_V2.xlsx")
```

dreamrise 发表于 2024-5-20 09:48

XLWINGS 版本，缺点是需要安装OFFICE，不能用于LINUX服务器。优点是代码简单，内部有函数，还支持自动列宽。

def merge_excel_parts(
head_file_path: str, data_file_path: str, tail_file_path: str, output_file_path: str, sheetname: str = "Sheet1"
) -> str:
"""
将三个Excel文件（表头、数据、表尾）合并成一个新文件，并返回合并后文件的路径。

参数:
   head_file_path (str): 表头部分的Excel文件路径。
   data_file_path (str): 数据部分的Excel文件路径。
   tail_file_path (str): 表尾部分的Excel文件路径。
   output_file_path (str): 合并后Excel文件的输出路径。

返回:
   str: 合并后Excel文件的路径。
"""
import xlwings as xw

try:
   xwapp = xw.App(visible=False)

   # 创建一个新的Excel工作簿用于保存合并结果
   wb_output = xw.Book()
   # 不支持中文
   ws_output = wb_output.sheets

   # 头部和数据
   for part_path in :
         try:
            check_file_exists(part_path)
         except FileNotFoundError:
            print(f"文件{part_path}不存在，退出处理")
            exit()
         # 打开源Excel文件
         wb_source = xw.Book(part_path)
         ws_source = wb_source.sheets

         # 复制源工作簿的格式和数据
         # 使用 range.copy() 方法以保留原始格式
         ws_source.used_range.copy(ws_output.cells(1 + ws_output.used_range.last_cell.row, 1))

         # 关闭源Excel文件
         wb_source.close()

   # 尾部：如果没有尾部文件可以忽略
   try:
         check_file_exists(tail_file_path)
         wb_source = xw.Book(tail_file_path)
         ws_source = wb_source.sheets
         ws_source.used_range.copy(ws_output.cells(1 + ws_output.used_range.last_cell.row, 1))
         wb_source.close()
   except FileNotFoundError:
         """如果没有尾部文件可以忽略"""

   # 设置所有列的宽度为自动列宽
   for col in ws_output.used_range.columns:
         col.autofit()

   wb_output.save(output_file_path)
   wb_output.close()
   xwapp.quit()
   result = {"status": "success", "data": output_file_path}
except Exception as e:
   result = {"status": "fail", "data": e}

return result

q546624527 发表于 2024-5-20 09:12

我记得xlwings这个库比较好用，还能嵌入excel内部

Moinul 发表于 2024-5-20 09:24

大神，能支持识别每一列的title然后sheet2拷贝到sheet1对应的那一行吗？

dreamrise 发表于 2024-5-20 09:39

q546624527 发表于 2024-5-20 09:12
我记得xlwings这个库比较好用，还能嵌入excel内部

XLWINGS需要安装OFFICE，而且只支持WINDOWS、MAC OS。
因为服务器LINUX部署，而且不装OFFICE，所以用openpyxl写了一遍。

shiltonlace 发表于 2024-5-20 09:39

代码中的小火箭是啥意思？

dreamrise 发表于 2024-5-20 09:43

def merge_excel_parts(
head_file_path: str, data_file_path: str, tail_file_path: str, output_file_path: str
) -> str:
"""
将三个Excel文件（表头、数据、表尾）合并成一个新文件，并返回合并后文件的路径。

参数:
   head_file_path (str): 表头部分的Excel文件路径。
   data_file_path (str): 数据部分的Excel文件路径。
   tail_file_path (str): 表尾部分的Excel文件路径。
   output_file_path (str): 合并后Excel文件的输出路径。

返回:
   DICT
"""

targe_wb = op.Workbook()# 新建一个EXCEL
source_wb1 = op.load_workbook(head_file_path)
source_wb2 = op.load_workbook(data_file_path)
source_wb3 = op.load_workbook(tail_file_path)

fullcopy_sheet_s2t(source_wb1, targe_wb)
fullcopy_sheet_s2t(source_wb2, targe_wb)
fullcopy_sheet_s2t(source_wb3, targe_wb)

targe_wb.save(filename=output_file_path)

result = {"status": "success", "data": output_file_path}

return result

# 只传文件路径的方法
merge_excel_parts(
"head.xlsx",
"data.xlsx",
"tail.xlsx",
"all_v3.xlsx",
)

w1066602520 发表于 2024-5-20 09:46

这个真的是有用马上一试

wangyp0506c 发表于 2024-5-20 09:56

先收藏下{:1_893:} 后边再试试

space218 发表于 2024-5-20 10:04

没看懂是干啥用的！{:1_907:}

页: [1] 2 3

吾爱破解 - 52pojie.cn's Archiver

python完全拷贝源EXCEL的内容和格式到目标EXCEL的尾部