吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 9038|回复: 32
收起左侧

[Python 转载] Word 表格汇总到 Excel

  [复制链接]
lijt16 发表于 2021-2-2 18:55
本帖最后由 lijt16 于 2021-12-5 14:47 编辑

Word 表格汇总到Excel
Word 表格汇总到Excel
Word 表格汇总到Excel

具体使用看Word 表格汇总 Excel成品。

别再发看不懂了,看不懂的麻烦Ctrl+W关闭此贴,
别浪费你我的时间,别发无意义的评论,谢谢。


昨天发了Word 表格汇总 Excel,看评论大家还是需要源码,这样可以根据自己需求进行修改,所以还是上传上来吧,不过本人也是业余的,很多地方有改进的地方。

本来我是用的tkinter,没有图形界面的,想根据自己需要修改的只需要下这个就行了:
[Python] 纯文本查看 复制代码
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# -*- coding: UTF-8 -*-
import tkinter as tk
import win32com.client as wc
import os, re, glob, time, shutil
from tkinter import filedialog
from bs4 import BeautifulSoup
from pydocx import PyDocX
 
class Doc2Xlsx(object):
        """docstring for Doc2Xlsx"""
        def __init__(self):
                super(Doc2Xlsx, self).__init__()
 
        # doc2docx
        def doc2docx(self, path, file):
                word = wc.Dispatch("Word.Application")
                doc = word.Documents.Open(path + "/" + file)
                doc.SaveAs(path + "/%sx"%file, 12#12代表转换后为docx文件
                doc.Close
                word.Quit()   #结束后台进程
 
 
        # Del doc
        def delDoc(self, path, file):
                time_start=time.time()
                while True:
                        time_end=time.time()
                        wmi = wc.GetObject("winmgmts:")
                        proCodeCov = wmi.ExecQuery('select * from Win32_Process where Name="WINWORD.EXE"')
                        if len(proCodeCov) <= 0:
                                os.remove(path + "/" + file)
                                return
                        time.sleep(3)
                        if time_end - time_start > 20:
                                print('删除 【%s】 文档超时'%file)
                                return
 
 
        # docx path
        def getPath(self):
                root = tk.Tk()
                root.withdraw()
                path = filedialog.askdirectory(title = "选择Word登记表所在目录")
                docs = [i for i in os.listdir(path) if i.endswith(".doc")]
                if docs:
                        print("==================Doc2Docx==================")
                        for doc in docs:
                                self.doc2docx(path, doc)
                                self.delDoc(path, doc)
                                print("%s 已转换为: %s"%(doc, doc+"x"))
                        print("==================Doc2Docx==================\n")
                docxs = [i for i in os.listdir(path) if i.endswith(".docx")]
                return path, docxs
 
 
        # 根据html tr&td 标签分类
        def docxCategory(self, path, docxs):
                docx_cate = {}
 
                for docx in docxs:
                        html = PyDocX.to_html(path + "/%s"%docx)
                        soup = BeautifulSoup(html, 'html.parser')
                        elem = soup.find_all("tr")                              #每个tr标签对应表格的一行                     
                        elem1 = re.sub(r"(?<=<td).+?(?=</td>)", "", str(elem))  #去除单元格标签td之间的内容
 
                        if elem1 in docx_cate.keys():
                                e = docx_cate[elem1]
                                e.append(docx)
                                docx_cate[elem1] = e
                        else:
                                docx_cate[elem1] = [docx]
 
                print("==================docxCategory==================")
                for c, docx in enumerate(docx_cate.values()):
                        path_cate = path + "/_cate/类别%d"%(c+1)
 
                        if not os.path.exists(path_cate):
                                os.makedirs(path_cate)
 
                        print("类别%d: "%(c+1))
                        for i in docx:
                                print(i, end=" ")
                                # shutil.move("%s/%s"%(path, i), "%s/%s"%(path_cate, i))        #移动
                                shutil.copyfile("%s/%s"%(path, i), "%s/%s"%(path_cate, i))    #复制
                        print()
                print("==================docxCategory==================")
 
 
        # docx2xlsx. 模板命名model.docx,与word放置相同文件夹
        def docx2xlsx(self, path, docxs):
                docxs.remove("model.docx")
 
                # 获取模板文件所需数据的位置
                html = PyDocX.to_html(path + "/model.docx")
                soup = BeautifulSoup(html, 'html.parser')
                elem = soup.find_all("tr")
                data = {}
                for row, tr in enumerate(elem):
                        if tr.text:                           #该行有需要的数据
                                val = {}
                                units = tr.find_all("td")         #获取需要的单元格序号
                                for unit, td in enumerate(units):
                                        if td.text:
                                                val[unit] = td.text
                                data[row] = val
                # 创建xlsx
                xl = wc.Dispatch('Excel.Application')
                xl.Visible = True
                xl.Workbooks.Add()
                xlBook = xl.Workbooks(1)
                xlSheet = xl.Sheets(1)
                # 数据汇总
                try:
                        for c, docx in enumerate(docxs):
                                html = PyDocX.to_html(path + "/%s"%docx)
                                soup = BeautifulSoup(html, 'html.parser')
                                elem = soup.find_all("tr")
                                for r in data.keys():
                                        for u in data[r].keys():
                                                xlSheet.Cells(c+1, int(data[r][u])).Value = elem[r].find_all("td")[int(u)].text.strip()
                except:
                        print(docx,"出错!")
                xlBook.SaveAs(path.replace(r"/", "\\") + "\\result.xlsx")  #实际单\才能正常保存
                # xl.Quit() #关闭
 
 
if __name__ == '__main__':
        d2x = Doc2Xlsx()
        path, docxs = d2x.getPath()
        d2x.docx2xlsx(path, docxs)
        classNeeded = 0
        if classNeeded:
                d2x.docxCategory(path, docxs)



后来因为自己之前就一直想学一下qt模块,看了下pyqt5和pyside2,还是用了后者。
提一下这两者都可以用自带的qtdesigner设计个大概的样式再进行修改的,不过因为第一次用,很多地方不知道怎么用。
首先是转化生成并修改的引用文件,可以直接引用.UI文件,不过因为要打包就转化成py文件了:

[Python] 纯文本查看 复制代码
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# -*- coding: utf-8 -*-
from PySide2.QtCore import *
from PySide2.QtGui import *
from PySide2.QtWidgets import *
import qtawesome
 
class d2xUI(object):
 
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(540, 630) #设置显示大小
        sizePolicy = QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
        sizePolicy.setHeightForWidth(MainWindow.sizePolicy().hasHeightForWidth())
        MainWindow.setSizePolicy(sizePolicy)
        # MainWindow.setMinimumSize(QSize(540, 630)) #x*y最小规则
        icon = QIcon() #加载ico图标
        icon.addFile(u"Trans.ico", QSize(), QIcon.Normal, QIcon.Off)
        MainWindow.setWindowIcon(icon)
 
        self.centralwidget = QWidget(MainWindow)
        self.centralwidget.setObjectName(u"centralwidget")
 
        # 说明
        self.pushButton = QPushButton(self.centralwidget)
        self.pushButton.setObjectName(u"pushButton")
        self.pushButton.setGeometry(QRect(400, 320, 100, 40))
        # 开始
        self.pushButton_2 = QPushButton(self.centralwidget)
        self.pushButton_2.setObjectName(u"pushButton_2")
        self.pushButton_2.setGeometry(QRect(400, 370, 100, 40)) #xy位置-大小
        self.pushButton_2.setEnabled(False#初始化不可点击
        # 打开结果
        self.pushButton_5 = QPushButton(self.centralwidget)
        self.pushButton_5.setObjectName(u"pushButton_2")
        self.pushButton_5.setGeometry(QRect(400, 420, 100, 40)) #xy位置-大小
        self.pushButton_5.setEnabled(False#初始化不可点击
        # 出错列表
        self.pushButton_6 = QPushButton(self.centralwidget)
        self.pushButton_6.setObjectName(u"pushButton_2")
        self.pushButton_6.setGeometry(QRect(400, 470, 100, 40)) #xy位置-大小
        self.pushButton_6.setEnabled(False#初始化不可点击
        # 进度条
        self.progressBar = QProgressBar(self.centralwidget)
        self.progressBar.setObjectName(u"progressBar")
        self.progressBar.setGeometry(QRect(40, 550, 470, 30))
        self.progressBar.setStyleSheet(u"\n"
                        "QProgressBar::chunk\n"
                        "{\n"
                        "border-radius:15px;\n"
                        "background:qlineargradient(spread:pad,x1:0,y1:0,x2:1,y2:0,stop:0 #70649A,stop:1  #26B4FF);\n"
                        "border-radius:4px;/*\u6591\u9a6c\u7ebf \u5706\u89d2*/\n"
                        "border:1px solid black;\n"
                        "background-color:skyblue;\n"
                        "width:6px;margin:0.5px;/*\u5bbd\u5ea6\u548c\u5916\u8fb9\u8ddd*/\n"
                        " \n"
                        "}\n"
                        "QProgressBar#progressBar\n"
                        "{\n"
                        "height:20px;\n"
                        "text-align:center;\n"
                        "font-size:14px;\n"
                        "color:white;\n"
                        "border-radius:11px;\n"
                        "background: #0f1423 ;\n"
                        "}")
        self.progressBar.setValue(0)
        # 操作栏==============================================
        self.groupBox = QGroupBox(self.centralwidget)
        self.groupBox.setObjectName(u"groupBox")
        self.groupBox.setGeometry(QRect(40, 20, 470, 100))
        # 新建按钮组
        self.buttonGroup = QButtonGroup(MainWindow)
        self.buttonGroup.setObjectName(u"buttonGroup")
        # 汇总
        self.radioButton = QRadioButton(self.groupBox)
        self.radioButton.setChecked(True#默认选中
        self.radioButton.setObjectName(u"radioButton")
        self.radioButton.setGeometry(QRect(50, 30, 185, 50))
        # 分类
        self.radioButton_2 = QRadioButton(self.groupBox)
        self.radioButton_2.setObjectName(u"radioButton_2")
        self.radioButton_2.setGeometry(QRect(235, 30, 185, 50))
        # 添加按钮到按钮组
        self.buttonGroup.addButton(self.radioButton)
        self.buttonGroup.addButton(self.radioButton_2)
        # 操作栏==============================================
 
        # 路径栏==============================================
        self.groupBox_2 = QGroupBox(self.centralwidget)
        self.groupBox_2.setObjectName(u"groupBox_2")
        self.groupBox_2.setGeometry(QRect(40, 140, 470, 150))
        # 文件、模板
        self.comboBox = QComboBox(self.groupBox_2)
        self.comboBox.setObjectName(u"comboBox")
        self.comboBox.setGeometry(QRect(20, 30, 350, 40))
        self.comboBox_2 = QComboBox(self.groupBox_2)
        self.comboBox_2.setObjectName(u"comboBox_2")
        self.comboBox_2.setGeometry(QRect(20, 90, 350, 40))
        self.pushButton_3 = QPushButton(self.groupBox_2)
        self.pushButton_3.setObjectName(u"pushButton")
        self.pushButton_3.setGeometry(QRect(380, 30, 70, 40))
        self.pushButton_4 = QPushButton(self.groupBox_2)
        self.pushButton_4.setObjectName(u"pushButton")
        self.pushButton_4.setGeometry(QRect(380, 90, 70, 40))
        # 路径栏==============================================
 
        # 其他栏==============================================
        self.groupBox_3 = QGroupBox(self.centralwidget)
        self.groupBox_3.setObjectName(u"groupBox_3")
        self.groupBox_3.setGeometry(QRect(40, 310, 300, 210))
        # 首列添加序号、数字前加单引号、保存结果等
        self.checkBox = QCheckBox(self.groupBox_3)
        self.checkBox.setObjectName(u"checkBox")
        self.checkBox.setGeometry(QRect(30, 30, 250, 40))
        self.checkBox_2 = QCheckBox(self.groupBox_3)
        self.checkBox_2.setObjectName(u"checkBox_2")
        self.checkBox_2.setGeometry(QRect(30, 60, 250, 40))
        self.checkBox_3 = QCheckBox(self.groupBox_3)
        self.checkBox_3.setChecked(True#默认选中
        self.checkBox_3.setObjectName(u"checkBox_3")
        self.checkBox_3.setGeometry(QRect(30, 90, 250, 40))
        self.checkBox_4 = QCheckBox(self.groupBox_3)
        self.checkBox_4.setChecked(True#默认选中
        self.checkBox_4.setObjectName(u"checkBox_3")
        self.checkBox_4.setGeometry(QRect(30, 120, 250, 40))
        self.checkBox_5 = QCheckBox(self.groupBox_3)
        self.checkBox_5.setObjectName(u"checkBox_3")
        self.checkBox_5.setGeometry(QRect(30, 150, 250, 40))
 
        # 其他栏==============================================
 
        MainWindow.setCentralWidget(self.centralwidget)
        self.statusbar = QStatusBar(MainWindow)
        self.statusbar.setObjectName(u"statusbar")
        MainWindow.setStatusBar(self.statusbar)
        self.retranslateUi(MainWindow)
        QMetaObject.connectSlotsByName(MainWindow)
 
 
    def retranslateUi(self, MainWindow):
        MainWindow.setWindowTitle(QCoreApplication.translate("MainWindow", u"Doc2Xlsx", None))
        self.pushButton_2.setText(QCoreApplication.translate("MainWindow", u"开始运行", None))
        self.groupBox.setTitle(QCoreApplication.translate("MainWindow", u"操作", None))
        self.radioButton.setText(QCoreApplication.translate("MainWindow", u"汇总", None))
        self.radioButton_2.setText(QCoreApplication.translate("MainWindow", u"分类", None))
        self.pushButton.setText(QCoreApplication.translate("MainWindow", u"相关说明", None))
        self.groupBox_3.setTitle(QCoreApplication.translate("MainWindow", u"其他", None))
        self.checkBox.setText(QCoreApplication.translate("MainWindow", u"首列添加序号", None))
        self.checkBox_2.setText(QCoreApplication.translate("MainWindow", u"数字前添加单引号", None))
        self.checkBox_3.setText(QCoreApplication.translate("MainWindow", u"保存结果文件", None))
        self.checkBox_4.setText(QCoreApplication.translate("MainWindow", u"Excel后台运行", None))
        self.checkBox_5.setText(QCoreApplication.translate("MainWindow", u"对日期格式化", None))
        self.groupBox_2.setTitle(QCoreApplication.translate("MainWindow", u"路径", None))
        self.pushButton_3.setText(QCoreApplication.translate("MainWindow", u"文件", None))
        self.pushButton_4.setText(QCoreApplication.translate("MainWindow", u"模板", None))
        self.pushButton_5.setText(QCoreApplication.translate("MainWindow", u"打开结果", None))
        self.pushButton_6.setText(QCoreApplication.translate("MainWindow", u"出错列表", None))


然后是引用它的主功能文件,如果不用图形界面只要修改功能的话,只需要看下面的就好了:
[Python] 纯文本查看 复制代码
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# -*- coding: UTF-8 -*-
import win32com.client as wc
import os, re, glob, time, shutil
from bs4 import BeautifulSoup
from pydocx import PyDocX
from PySide2.QtWidgets import *
from PySide2.QtCore import Slot
import D2XUI
 
class Doc2Xlsx(QMainWindow, D2XUI.d2xUI):
 
    def __init__(self):
        super(Doc2Xlsx, self).__init__()
        self.addSerial = False
        self.addQuotaMark = False
        self.saveResult = True
        self.excelHidden = True
        self.regularDate = False
        self.dateLine = []
        self.pattern = re.compile(r"(\d{2,4})\D*(\d{0,2})\D*(\d{0,2})")
        self.operate = "summary"
        self.wordPath = ""
        self.modelPath = ""
        self.openResult = ""
 
        # 引用D2XUI函数
        self.setupUi(self)
        # 操作选项
        self.buttonGroup.buttonClicked.connect(self.Operate)
        # 路径选项
        self.pushButton_3.clicked.connect(self.FilePath)
        self.comboBox.currentIndexChanged.connect(self.FilePathRe)
        self.pushButton_4.clicked.connect(self.ModelPath)
        self.comboBox_2.currentIndexChanged.connect(self.ModelPathRe)
        # 其他选项
        self.checkBox.clicked.connect(self.Others)
        self.checkBox_2.clicked.connect(self.Others)
        self.checkBox_3.clicked.connect(self.ResHidden)
        self.checkBox_4.clicked.connect(self.ResHidden)
        self.checkBox_5.clicked.connect(self.RegularDate)
        # 说明
        self.pushButton.clicked.connect(self.Explain)
        # 开始
        self.pushButton_2.clicked.connect(self.Start)
        # 结果文件
        self.pushButton_5.clicked.connect(self.Result)
        # 出错列表
        self.pushButton_6.clicked.connect(self.Errorlist)
 
 
    @Slot() #开始运行
    def Start(self):
        self.progressBar.reset()
        # doc转docx并删除doc
        docs = [i for i in glob.glob(self.wordPath + "/*.doc")]
        for doc in docs:
            self.doc2docx(doc)
            self.delDoc(doc)
 
        if self.operate == "summary":
            self.docx2xlsx(self.wordPath, self.modelPath)
        elif self.operate == "classify":
            self.docxCategory(self.wordPath)
 
    @Slot() #选择汇总或分类
    def Operate(self):
        if self.buttonGroup.checkedButton().text() == "汇总":
            self.operate = "summary"
            self.pushButton_4.setEnabled(True) #设置模板可选中状态
        else:
            self.operate = "classify"
            self.pushButton_4.setEnabled(False)
        self.changeStart()
        self.progressBar.setValue(0)
 
    @Slot() #选择路径
    def FilePath(self):
        self.wordPath = QFileDialog.getExistingDirectory(self, "选择Word文件所在文件夹")
        self.comboBox.addItem(self.wordPath)
        self.comboBox.setCurrentText(self.wordPath)
        self.changeStart()
        self.progressBar.setValue(0)
 
    @Slot() #通过combobox选择历史路径
    def FilePathRe(self):
        self.wordPath = self.comboBox.currentText()
        self.progressBar.setValue(0)
 
    @Slot()
    def ModelPath(self): #选择模板
        self.modelPath = QFileDialog.getOpenFileName(self, "选择模板文件","","Docx文件(*.docx)")[0]
        self.comboBox_2.addItem(self.modelPath)
        self.comboBox_2.setCurrentText(self.modelPath)
        self.changeStart()
        self.progressBar.setValue(0)
 
    @Slot() #通过combobox选择历史模板
    def ModelPathRe(self):
        self.modelPath = self.comboBox_2.currentText()
        self.progressBar.setValue(0)
    
    @Slot() #其他:首列加序号、数字加单引
    def Others(self):
        self.addSerial = True if self.checkBox.isChecked() else False
        self.addQuotaMark = True if self.checkBox_2.isChecked() else False
        self.progressBar.setValue(0)
 
    @Slot() #其他:日期格式化
    def RegularDate(self):
        if self.checkBox_5.isChecked():
            self.regularDate = True
            dline, okPressed = QInputDialog.getMultiLineText(self, "日期格式化","日期所在Excel表列数(多列换行表示):")
            if okPressed:
                if not dline:
                    self.checkBox_5.setChecked(False)
                    self.regularDate = False
                else:
                    self.dateLine = [int(i.strip()) for i in dline.split("\n")]
            else:
                self.regularDate = False
                self.checkBox_5.setChecked(False)
        else:
            self.regularDate = False
        self.progressBar.setValue(0)
 
    @Slot() #其他:保存结果、Excel隐藏
    def ResHidden(self):
        if self.checkBox_3.isChecked():
            self.saveResult = True
            self.checkBox_4.setEnabled(True)
            self.excelHidden = True if self.checkBox_4.isChecked() else False
        else:
            self.saveResult = False
            self.checkBox_4.setChecked(False)
            self.checkBox_4.setEnabled(False)
            self.excelHidden = False
        self.progressBar.setValue(0)
 
    @Slot() #打开结果文件
    def Result(self):
        if self.operate == "classify":
            try:
                os.startfile(self.wordPath + "/_cate/")
            except:
                pass
        elif self.operate == "summary" and self.saveResult:
            try:
                os.startfile(self.wordPath + "/result.xlsx")
            except:
                pass
        self.progressBar.setValue(0)
 
    @Slot() #相关说明
    def Explain(self):
        msgBox = QMessageBox()
        # msgBox.setIcon(QMessageBox.Warning)
        msgBox.setWindowTitle ("说明")
        msgBox.addButton ("我已知晓=。=", QMessageBox.AcceptRole)
        msgBox.setStyleSheet('''
            QMessageBox { width:140px;height:20px;text-align:center;font-size:20px; };
            QPushButton { background-color: rgb(255, 85, 0); color: rgb(85, 255, 0); }''')
        message = '''
        使用前请仔细阅读以下说明:
        0. 注意数据备份;
        1. 表格无变化或提取内容位于变化部分前则无需分类;
        2. 分类后每个类别需分别制作模板文件进行汇总;
        3. [模板]:在相应单元格填Excel列数并删除所有内容;
        4. [数字前加单引号]:可避免手机号等科学记数法;
        5. [首列添加序号]:首列为序号而非数据可勾选;
        6. [保存结果文件]:后台运行保存否则前台显示结果;
        7. [对日期格式化]:统一为2021.01.01格式;
        8. [Excel后台运行]:不显示Excel操作窗口;
        9. doc会转换为docx并删除原doc;
        10. 数据出错与否最好都手动核查.'''
        msgBox.setText(message)
        msgBox.exec_()
 
    @Slot() #打开出错列表
    def Errorlist(self):
        try:
            os.startfile(self.wordPath + "/error.txt")
        except:
            self.pushButton_6.setEnabled(False)
        self.progressBar.setValue(0)
         
    @Slot() #切换操作时按钮变化
    def changeStart(self):
        if self.operate == "classify" and self.wordPath:
            self.pushButton_2.setEnabled(True)
        elif self.operate == "summary" and self.wordPath and self.modelPath:
            self.pushButton_2.setEnabled(True)
        elif self.operate == "summary" and self.wordPath and not self.modelPath:
            self.pushButton_2.setEnabled(False)
        self.progressBar.setValue(0)
 
 
    # 以下是功能函数=======================================================
    # doc2docx
    def doc2docx(self, file):
        word = wc.Dispatch("Word.Application")
        doc = word.Documents.Open(file)
        doc.SaveAs(file + "x", 12#12代表转换后为docx文件
        doc.Close
        word.Quit()   #结束后台进程
 
 
    # Del doc
    def delDoc(self, file):
        time_start=time.time()
        while True:
            time_end=time.time()
            wmi = wc.GetObject("winmgmts:")
            proCodeCov = wmi.ExecQuery('select * from Win32_Process where Name="WINWORD.EXE"')
            if len(proCodeCov) <= 0:
                os.remove(file)
                return
            time.sleep(3)
            if time_end - time_start > 20:
                return
 
 
    # 是否为数字,身份证号及手机号等
    def isNumber(self, s):
        try:
            # float(s)
            int(s)
            return True
        except ValueError:
            return False
 
 
    # 根据html tr&td 标签分类
    def docxCategory(self, path):
        docxs = [i for i in glob.glob(path + "/*.docx")]
        self.progressBar.setRange(0, len(docxs) + 1)
        self.progressBar.setValue(0)
        docx_cate = {}
 
        for c, docx in enumerate(docxs):
            self.progressBar.setValue(c + 2)
            html = PyDocX.to_html(docx)
            soup = BeautifulSoup(html, 'html.parser')
            elem = soup.find_all("tr")                              #每个tr标签对应表格的一行                     
            elem1 = re.sub(r"(?<=<td).+?(?=</td>)", "", str(elem))  #去除单元格标签td之间的内容
 
            if elem1 in docx_cate.keys():
                e = docx_cate[elem1]
                e.append(docx)
                docx_cate[elem1] = e
            else:
                docx_cate[elem1] = [docx]
        self.progressBar.setValue(0)
 
        flag = 1
        for c, docx in enumerate(docx_cate.values()):
            path_cate = path + "/_cate/类别%d/"%(c+1)
            if not os.path.exists(path_cate):
                os.makedirs(path_cate)
            for i in docx:
                shutil.copyfile(i, path_cate + os.path.split(i)[1]) #复制,移动则shutil.move
                self.progressBar.setValue(flag + 1)
                flag += 1
        self.pushButton_5.setEnabled(True) #打开结果按钮
         
 
    # docx2xlsx
    def docx2xlsx(self, path, model):
        if os.path.exists(path + "/error.txt"): #删除上一次的出错列表
            os.remove(path + "/error.txt")
        docxs = [i.replace("\\", "/") for i in glob.glob(path + "/*.docx")]
        if model in docxs:
            docxs.remove(model)
 
        self.progressBar.setRange(0, len(docxs) + 1)
        self.progressBar.setValue(0)
         
        # 获取模板文件所需数据的位置
        html = PyDocX.to_html(model)
        soup = BeautifulSoup(html, 'html.parser')
        elem = soup.find_all("tr")
        data = {}
        for row, tr in enumerate(elem):
            if tr.text:                           #该行有需要的数据
                val = {}
                units = tr.find_all("td")         #获取需要的单元格序号
                for unit, td in enumerate(units):
                    if td.text:
                        val[unit] = td.text
                data[row] = val
         
        # 创建xlsx
        xl = wc.Dispatch('Excel.Application')
        if self.saveResult: #是否可见
            xl.Visible = False if self.excelHidden else True
        else:
            xl.Visible = True
        xl.Workbooks.Add()
        xlBook = xl.Workbooks(1)
        xlSheet = xl.Sheets(1)
         
        # 数据汇总
        for c, docx in enumerate(docxs):
            html = PyDocX.to_html(docx)
            soup = BeautifulSoup(html, 'html.parser')
            elem = soup.find_all("tr")
            if self.addSerial:
                xlSheet.Cells(c+1, 1).Value = c + 1
            try:
                for r in data.keys():
                    for u in data[r].keys():
                        text = elem[r].find_all("td")[int(u)].text.strip()
                        if self.regularDate and int(data[r][u]) in self.dateLine: #格式化日期
                            try:
                                date = self.pattern.findall(text)[0]
                                text = "{}.{:0>2s}.{:0>2s}".format(date[0], date[1], date[2]) if date[2] else "{}.{:0>2s}".format(date[0], date[1])
                            except:
                                pass
                        if self.addQuotaMark and self.isNumber(text): #数字前加单引号
                            text = "'" + text
                        xlSheet.Cells(c+1, int(data[r][u])).Value = text
            except Exception as e:
                self.pushButton_6.setEnabled(True) #设置按钮为可点击状态
                with open(path + "/error.txt", "a", encoding="utf-8") as g:
                    g.write(str(os.path.split(docx)[1]) + "\n")
            self.progressBar.setValue(c + 2)
             
        if self.saveResult:
            xlBook.SaveAs(path.replace(r"/", "\\") + "\\result.xlsx")  #实际单\才能正常保存
            self.pushButton_5.setEnabled(True) #打开结果按钮
            if self.excelHidden:
                xl.Quit() #关闭ark
 
 
def main():
    app = QApplication([])
    d2x = Doc2Xlsx()
    d2x.show()
    app.exec_()
 
 
if __name__ == '__main__':
    main()



有评论说Excel催化剂能做到,我去下载了不知道怎么用功能是在太多了,有需要的朋友可以下载。
还有评论要的VBA版我也传上来了,我记得有官网的,程序里面应该就有,出错可以去看一下。

【链接】
https://pan.lanzouj.com/b00u7yupc
密码:9idc


https://wwa.lanzouj.com/b00u8190j
密码:bgm9




连接打不开的,蓝奏问题,把lanzous改为lanzoui。

免费评分

参与人数 11吾爱币 +20 热心值 +9 收起 理由
diliuyue + 1 谢谢@Thanks!
冷月1324 + 1 谢谢@Thanks!
ylixx + 1 我很赞同!
人二 + 2 + 1 感谢分享,学习一下
feiing + 1 + 1 谢谢@Thanks!
Jacobian + 1 + 1 谢谢@Thanks!
ladybug2020 + 1 + 1 谢谢@Thanks!
苏紫方璇 + 10 + 1 感谢发布原创作品,吾爱破解论坛因你更精彩!
www.52pojie.cn + 1 + 1 我很赞同!
hj170520 + 1 + 1 热心回复!
洋丶小洋丶 + 1 + 1 感谢发布原创作品,吾爱破解论坛因你更精彩!

查看全部评分

本帖被以下淘专辑推荐:

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

hxw139 发表于 2021-2-2 19:39
lijt16 发表于 2021-2-2 19:35
word表格汇总到excel,很难理解?

额   懂起了  不好意思  平时都是excel表格到word  这个还没操作过  感谢分享
 楼主| lijt16 发表于 2021-12-2 22:21
eersoft 发表于 2021-12-2 21:45
楼主分享的vba版是我的,楼主不错原样分享了。前段时间看到某知名大V 王D田,在视频中将我的工具 ...

牛,我是在做的过程中不经意发现你的工具的,真的厉害
blueczw 发表于 2021-2-2 19:01
 楼主| lijt16 发表于 2021-2-2 19:25
blueczw 发表于 2021-2-2 19:01
不明白这是嘛意思

emmmm你这。。
hxw139 发表于 2021-2-2 19:33
干嘛用的啊?不明所以
 楼主| lijt16 发表于 2021-2-2 19:35
hxw139 发表于 2021-2-2 19:33
干嘛用的啊?不明所以

word表格汇总到excel,很难理解?
ciker_li 发表于 2021-2-2 20:56
感谢楼主分享源码
bsjasd 发表于 2021-2-2 21:53
多谢楼主分享
hebeijianke 发表于 2021-2-2 23:45
感谢楼主分享,向楼主学习
Wapj_Wolf 发表于 2021-2-3 08:21
近700行的代码看着有些眼晕,够我消化一阵子的了,谢谢大佬无私分享……
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2025-4-11 13:50

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表