吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 23116|回复: 115
收起左侧

[原创工具] 拆分EXCEL文件工具

    [复制链接]
meforeg 发表于 2019-10-10 21:34
本帖最后由 meforeg 于 2020-3-3 19:15 编辑

根据某一列,拆分成几个不同的EXCEL文件
操作方法:将EXCEL文件拖至EXE文件上,根据提示操作。

链接:

https://www.lanzouj.com/b00za3j6j
工具二:应网友要求,增加一个将EXCEL文件里的多个工作表保存为单独工作薄的小工具:
https://www.lanzouj.com/b00za3j6j
其它工具链接:

【V2.3】批量合并EXCEL工作表小工具  https://www.52pojie.cn/thread-1029677-1-1.html

1.gif

PYTHON源码:
[Python] 纯文本查看 复制代码
import openpyxl
from openpyxl.utils import get_column_letter
import xlrd
import sys, os, collections
from pprint import pprint
from copy import copy


class splitExcel(object):

    def __init__(self, sourceFile, titleLine=None, splitColumn=None):
        self.sourceFile = sourceFile
        self.sourceWorkbook = openpyxl.load_workbook(sourceFile)
        self.targetWorkbook = openpyxl.Workbook()
        self.targetWorkbook.remove(self.targetWorkbook.active)

        # 源工作表(object对象)
        self.sourceWorksheet = None
        # 最大行数
        self.sourceWorkbookMaxRow = None
        # 最大列数
        self.sourceWorkbookMaxColumn = None
        # 源工作表索引号
        self.sourceWorksheetIndex = None
        # 标题所在行号,用户输入时索引从1开始,内部处理时请留意索引数
        # if titleLine == None:
        #     titleLine = int(input('请输入标题所在行[2]: ').strip())
        self.titleLine = titleLine
        # 根据哪个列进行拆分,用户输入时索引从1开始,内部处理时请留意索引数
        self.splitColumn = splitColumn
        # 源工作薄当前行号
        self.sbCurrectLine = None
        # 目标工作薄各表当前数据行号
        self.tbCurrectLines = {}
        # 表头文字
        self.header = []
        # 各表数据
        self.data = collections.OrderedDict()
        # 保存列宽
        self.columnsWidth = collections.OrderedDict()
        # 格式
        self.formats = {}

    def readData(self):
        wb = xlrd.open_workbook(self.sourceFile)
        ws = wb.sheet_by_index(self.sourceWorksheetIndex)
        for x in range(ws.nrows):
            if x < self.titleLine:
                # 表头
                self.header.append(ws.row_values(x))
            else:
                v = ws.cell(x, self.splitColumn - 1).value
                sheetName = self.clearSheetName(v)
                # 将表名加入data字典
                if sheetName not in self.data.keys():
                    self.data[sheetName] = []
                # 添加数据
                self.data[sheetName].append(ws.row_values(x))

    def selectSplitSheet(self):
        if len(self.sourceWorkbook.sheetnames) == 1:
            self.sourceWorksheet = self.sourceWorkbook.active
            self.sourceWorksheetIndex = 0
        else:
            _n_ = 0
            print('在工作薄中找到以下工作表:')
            for SheetName in self.sourceWorkbook.sheetnames:
                print(_n_, SheetName)
                _n_ += 1

            n = 0
            _input = input('请输入要拆分表的序号[0]: ').strip()
            if _input != '':
                n = int(_input)
            self.sourceWorksheet = self.sourceWorkbook.worksheets[n]
            self.sourceWorksheetIndex = n

    def selectSplitColumn(self):
        wb = xlrd.open_workbook(self.sourceFile)
        ws = wb.sheet_by_index(self.sourceWorksheetIndex)

        # 保存一下最大行数、列数,以供其它(属性)地方使用
        self.sourceWorkbookMaxRow = ws.nrows
        self.sourceWorkbookMaxColumn = ws.ncols

        print('\n在工作表的标题行(第 %s 行)找到以下列: ' % self.titleLine)

        for y in range(1, ws.ncols + 1):
            print(y, ws.cell(self.titleLine - 1, y - 1).value)

        columnNum = input('请输入拆分列号[2]: ').strip()
        if columnNum == '':
            columnNum = 2
        else:
            columnNum = int(columnNum)
        self.splitColumn = columnNum

    def readCellsStyle(self):
        ws = self.sourceWorksheet
        maxColumn = self.sourceWorkbookMaxColumn
        styles = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        fonts = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        borders = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        fills = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        alignments = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        number_formats = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        protections = [[None] * (maxColumn + 11) for i in range(self.titleLine + 11)]
        heights = [None] * (self.titleLine + 11)
        widths = [None] * (maxColumn + 11)
        # isDates = [None] * (maxColumn + 11)

        for x in range(1, self.titleLine + 2):
            heights[x] = ws.row_dimensions[x].height
            # print(x,'height',heights[x])
            # for y in range(1, ws.max_column + 1):
            for y in range(1, maxColumn + 1):
                styles[x][y] = copy(ws.cell(x, y).style.replace('常规', 'Normal'))
                fonts[x][y] = copy(ws.cell(x, y).font)
                borders[x][y] = copy(ws.cell(x, y).border)
                fills[x][y] = copy(ws.cell(x, y).fill)
                alignments[x][y] = copy(ws.cell(x, y).alignment)
                number_formats[x][y] = copy(ws.cell(x, y).number_format)
                protections[x][y] = copy(ws.cell(x, y).protection)
                if y not in widths:
                    widths[y] = ws.column_dimensions[get_column_letter(y)].width
                # print(y, get_column_letter(y), ws.column_dimensions[get_column_letter(y)].width)
                # if y not in isDates:
                #     isDates[y] = ws.cell(x, y).is_date

        self.formats['heights'] = heights
        self.formats['styles'] = styles
        self.formats['fonts'] = fonts
        self.formats['borders'] = borders
        self.formats['fills'] = fills
        self.formats['alignments'] = alignments
        self.formats['number_formats'] = number_formats
        self.formats['protections'] = protections
        self.formats['widths'] = widths
        # self.formats['isDates'] = isDates

    def writeFormatToNewWorkbook(self):
        for sheetName in self.data.keys():
            ws = self.targetWorkbook[sheetName]
            # for x in range(1,self.titleLine+2):
            for x in range(1, ws.max_row + 1):
                # 表头
                if x <= self.titleLine:
                    xx = x
                    # ws.row_dimensions[x].height = self.formats['heights'][x]
                else:
                    # 表数据
                    xx = self.titleLine + 1
                height = self.formats['heights'][xx]
                if not height == None and height > 0:
                    ws.row_dimensions[x].height = self.formats['heights'][xx]

                for y in range(1, ws.max_column + 1):
                    # 表头
                    if x <= self.titleLine:
                        # 路过空白单元格
                        # if ws.cell(x, y).value == '':
                        #     continue
                        width = self.formats['widths'][y]
                        if not width == None and width > 0:
                            ws.column_dimensions[get_column_letter(y)].width = self.formats['widths'][y]
                        xx = x
                    else:
                        # 表数据
                        xx = self.titleLine + 1
                    ws.cell(x, y).style = self.formats['styles'][xx][y]
                    ws.cell(x, y).font = self.formats['fonts'][xx][y]
                    ws.cell(x, y).border = self.formats['borders'][xx][y]
                    ws.cell(x, y).fill = self.formats['fills'][xx][y]
                    ws.cell(x, y).alignment = self.formats['alignments'][xx][y]
                    ws.cell(x, y).number_format = self.formats['number_formats'][xx][y]
                    ws.cell(x, y).protection = self.formats['protections'][xx][y]

                    # if x>self.titleLine and self.formats['isDates'][y]:
                    #     ws.cell(x, y).number_format = 'yyyy/mm/dd'

    def writeDataToNewWorkbook(self):
        for sheetName in self.data.keys():
            ws = self.targetWorkbook.create_sheet(sheetName)
            # 写入头文字
            x = 0  # 行号
            for row in self.header:
                x += 1
                y = 0  # 列号
                for cellValue in row:
                    y += 1
                    if not cellValue == '':
                        ws.cell(x, y).value = cellValue
            # 写入数据
            for row in self.data[sheetName]:
                ws.append(row)

    def clearSheetName(self, name, replaceAs='-'):
        invalidChars = r':\/?*[]:'
        for c in invalidChars:
            name = name.replace(c, replaceAs).strip()
        return name

    def selectTitleLine(self):
        wb = xlrd.open_workbook(self.sourceFile)
        ws = wb.sheet_by_index(self.sourceWorksheetIndex)

        # 保存一下最大行数、列数,以供其它(属性)地方使用
        self.sourceWorkbookMaxRow = ws.nrows
        self.sourceWorkbookMaxColumn = ws.ncols

        print('打印所拆分工作表前10行,前5列数据:')
        maxY = ws.ncols
        if ws.ncols > 5:
            maxY = 5
        for x in range(10):
            tempList = []
            for y in range(maxY):
                tempList.append(ws.cell(x, y).value)
            print('第%s行:' % (x + 1), tempList)

        titleLine = 2
        n = input('\n请输入标题行所在行号[2]:').strip()
        if not n == '':
            titleLine = int(n)

        self.titleLine = titleLine

    def make(self):
        self.selectSplitSheet()
        self.selectTitleLine()
        self.selectSplitColumn()

        print('开始读取数据...')
        self.readData()
        print('开始读取格式...')
        self.readCellsStyle()
        print('开始写入数据至分表...')
        self.writeDataToNewWorkbook()
        print('开始写入格式至分表...')
        self.writeFormatToNewWorkbook()

    def save(self, filename=None):
        if filename == None:
            splitPath = os.path.split(self.sourceFile)
            filename = splitPath[0] + '/拆分_' + splitPath[1]
        self.targetWorkbook.save(filename)
        self.sourceWorkbook.close()
        self.targetWorkbook.close()
        return filename


class saveWorksheetToWorkbook(object):
    def __init__(self, excelFile):
        self.excelFile = excelFile

    def saveTo(self, savePath=None, addNumToFilename=True):
        if savePath == None:
            splitPath = os.path.splitext(self.excelFile)
            savePath = splitPath[0]
            if not os.path.exists(savePath):
                os.makedirs(savePath, exist_ok=True)

        wb = openpyxl.load_workbook(self.excelFile)
        sheetNames = wb.sheetnames
        wb.close()

        #
        # _input=input('正在保存各表至独立工作薄,文件名前是否加序号?[Y] Y/N:').strip().lower()
        # if _input=='n':
        #     addNumToFilename=False

        n=0
        for sheetName in sheetNames:
            n+=1
            print('保存', n, sheetName)
            wb = openpyxl.load_workbook(self.excelFile)
            for ws in wb.worksheets:
                if not sheetName == ws.title:
                    wb.remove(ws)

            xh=''
            if addNumToFilename:
                xh=str(n)
            filename='%s/%s%s.xlsx' % (savePath, xh,sheetName)
            wb.save(filename)
            wb.close()

        return savePath


if __name__ == '__main__':
    file = r'C:\Users\Eyes\Desktop\汇总.xlsx'
    file = sys.argv[1]
    se = splitExcel(file)
    # file = r'C:\Users\Eyes\Desktop\2019年积分排名-2019-07-09.xlsx'
    # se = splitExcel(file, 1, 3)
    se.make()
    f = se.save()
    print('拆分汇总文件:', f)

    # f=r'C:\Users\Eyes\Desktop\汇总.拆分.xlsx'
    saveTo = saveWorksheetToWorkbook(f)
    p = saveTo.saveTo()
    print('拆分表保存文件夹:', p)

    input('\n完成,按回车键退出。')

免费评分

参与人数 32吾爱币 +32 热心值 +25 收起 理由
Jue0.0 + 1 谢谢@Thanks!
xgr2002 + 1 + 1 我很赞同!
hakuei1942 + 1 谢谢@Thanks!
hyebox + 1 + 1 欢迎分析讨论交流,吾爱破解论坛有你更精彩!
RRRRRRRRRRRRRRR + 1 我很赞同!
哆啦bug梦 + 1 + 1 我很赞同!
18032137867 + 1 + 1 我很赞同!
pibyaya + 1 支持原创,楼主加油!
jsnu1009 + 1 谢谢@Thanks!
lazy0lazy + 1 谢谢@Thanks!
ymhld + 1 + 1 感谢发布原创作品,吾爱破解论坛因你更精彩!
vigoss_98 + 1 我很赞同!
zerowx + 1 + 1 热心回复!
浓茶 + 1 + 1 拆分excel1.0工具,在win10下,不能用
rmyyzf + 1 + 1 谢谢@Thanks!
lyqjqly + 1 + 1 谢谢@Thanks!
云深不知处丶 + 1 + 1 谢谢@Thanks!
Pony21 + 1 + 1 谢谢@Thanks!
wjr0060 + 1 谢谢@Thanks!
向往的歌 + 1 + 1 虽不如某汇总大师好,但能感觉到楼主很用心!感谢分享啦!期待以后能分享到.
eli_baba + 1 + 1 用心讨论,共获提升!
夏夜吉他 + 1 + 1 我很赞同!
king_88888 + 1 + 1 我很赞同!
melodyduo + 1 谢谢@Thanks!
pguan + 1 + 1 谢谢@Thanks!
jingguojing + 1 我很赞同!
emin199 + 1 + 1 谢谢@Thanks!
caituaner + 1 谢谢@Thanks!
sora7 + 1 谢谢@Thanks!
Hmily + 6 + 1 感谢发布原创作品,吾爱破解论坛因你更精彩!
z1871691387 + 1 + 1 热心回复!
Hnldxjh + 1 + 1 谢老师无私奉献!

查看全部评分

本帖被以下淘专辑推荐:

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

huangitunes 发表于 2019-10-16 07:39
支持原创,感谢分享
silence05 发表于 2020-3-2 13:36
用VBA根据列内容自动拆分到同一工作簿,代码如下:
Sub chaifenshuju()


Dim SHT As Worksheet
Dim K, i, j As Integer
Dim irow As Integer '这个说的是一共多少行


l = InputBox("请输入你要按哪列分")
'判定输入是否正确
If IsNumeric(l) = False Or l < 1 Then
Exit Sub
End If

l = Val(l)


'删除无意义的表
Application.DisplayAlerts = False
If Sheets.Count > 1 Then
    For Each sht1 In Sheets
        If sht1.Name <> "数据" Then
            sht1.Delete
        End If
    Next
End If
Application.DisplayAlerts = True





irow = Sheet1.Range("a65536").End(xlUp).Row
'拆分表
For i = 2 To irow
    K = 0
    For Each SHT In Sheets
        If SHT.Name = Sheet1.Cells(i, l) Then
            K = 1
        End If
    Next
   
   
    If K = 0 Then
        Sheets.Add AFTER:=Sheets(Sheets.Count)
        Sheets(Sheets.Count).Name = Sheet1.Cells(i, l)
    End If

Next
'拷贝数据

For j = 2 To Sheets.Count
    Sheet1.Range("a1:f" & irow).AutoFilter Field:=l, Criteria1:=Sheets(j).Name
    Sheet1.Range("a1:f" & irow).Copy Sheets(j).Range("a1")
Next

Sheet1.Range("a1:f" & irow).AutoFilter

Sheet1.Select

MsgBox "已处理完毕"

End Sub



复制到宏里面直接运行就可以
 楼主| meforeg 发表于 2019-10-10 21:37
15774211127 发表于 2019-10-10 21:49
IMG_20191010_214903.jpg
那把锁点一下
 楼主| meforeg 发表于 2019-10-10 21:56

没有那把锁,可能级别还不够
Hnldxjh 发表于 2019-10-10 22:03
谢老师无私奉献!
cdwdz 发表于 2019-10-10 22:43
感谢分享   谢谢
罪不至挂机 发表于 2019-10-10 22:56
感谢分享
mikezhql 发表于 2019-10-11 07:43
感谢分享
abs 发表于 2019-10-11 08:38
这个可以,数据透视表也可以达成。
abs 发表于 2019-10-11 08:41
说错了,数据透视表只能拆分成在同一个簿里的多个表。需要VBA辅助才能拆成一个个的文件
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2024-11-25 05:12

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表