[Python] 纯文本查看 复制代码 import os
import pandas as pd
import time
from concurrent.futures import ThreadPoolExecutor
# 获取当前文件夹路径
current_dir = os.getcwd()
# 获取当前文件夹内的所有Excel文件
excel_files = [file for file in os.listdir(current_dir) if file.endswith('.xlsx') or file.endswith('.xls')]
# 设置pandas不输出警告信息
pd.options.mode.chained_assignment = None # 取消chained_assignment警告
pd.options.mode.use_inf_as_na = True # 将inf视为缺失值,避免警告输出
# 定义处理Excel文件的函数
def process_excel_sheet(args):
excel_file, sheet_name = args
# 创建以Excel文件命名的文件夹
excel_folder = os.path.splitext(excel_file)[0]
if not os.path.exists(excel_folder):
os.makedirs(excel_folder)
# 读取Excel文件中的每个sheet,跳过前两行
df_sheet = pd.read_excel(excel_file, sheet_name=sheet_name, skiprows=2, engine='openpyxl')
# 将sheet名作为文件名,保存为CSV文件
csv_file_name = os.path.join(excel_folder, f'{sheet_name}.csv')
df_sheet.to_csv(csv_file_name, index=False, encoding='gbk')
print(f'Saved {csv_file_name} successfully.')
# 创建处理任务列表
tasks = []
for excel_file in excel_files:
# 读取Excel文件中的sheet名
excel = pd.ExcelFile(excel_file)
sheet_names = excel.sheet_names
for sheet_name in sheet_names:
tasks.append((excel_file, sheet_name))
# 使用多线程处理Excel文件中的每个sheet
with ThreadPoolExecutor(max_workers=16) as executor: # 可根据需要设置最大线程数
executor.map(process_excel_sheet, tasks)
print('拆分完成!')
|