import
os
import
sys
from
collections
import
defaultdict
from
docx
import
Document
from
PyQt5.QtWidgets
import
(QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget,
QLabel, QPushButton, QTextEdit, QFileDialog, QSpinBox,
QGroupBox, QProgressBar)
from
PyQt5.QtCore
import
Qt
from
PyQt5.QtGui
import
QFont, QIcon
class
TitleExtractorApp(QMainWindow):
def
__init__(
self
):
super
().__init__()
self
.setWindowTitle(
"Word文档标题提取工具"
)
self
.setWindowIcon(QIcon(
'icon.png'
))
self
.setGeometry(
100
,
100
,
800
,
600
)
self
.setStyleSheet(
)
self
.init_ui()
def
init_ui(
self
):
main_widget
=
QWidget()
main_layout
=
QVBoxLayout()
title_label
=
QLabel(
"Word文档标题提取工具"
)
title_label.setFont(QFont(
'Arial'
,
16
, QFont.Bold))
title_label.setAlignment(Qt.AlignCenter)
title_label.setStyleSheet(
"color: #333; margin-bottom: 20px;"
)
settings_group
=
QGroupBox(
"提取设置"
)
settings_layout
=
QHBoxLayout()
folder_layout
=
QVBoxLayout()
self
.folder_label
=
QLabel(
"未选择文件夹"
)
self
.folder_label.setStyleSheet(
"color: #666;"
)
browse_button
=
QPushButton(
"选择文件夹"
)
browse_button.clicked.connect(
self
.select_folder)
folder_layout.addWidget(QLabel(
"文档文件夹:"
))
folder_layout.addWidget(
self
.folder_label)
folder_layout.addWidget(browse_button)
level_layout
=
QVBoxLayout()
level_label
=
QLabel(
"最大标题级别:"
)
self
.level_spin
=
QSpinBox()
self
.level_spin.setRange(
1
,
6
)
self
.level_spin.setValue(
3
)
level_layout.addWidget(level_label)
level_layout.addWidget(
self
.level_spin)
self
.progress_bar
=
QProgressBar()
self
.progress_bar.setRange(
0
,
100
)
self
.progress_bar.setValue(
0
)
self
.progress_bar.setTextVisible(
False
)
settings_layout.addLayout(folder_layout,
70
)
settings_layout.addLayout(level_layout,
30
)
settings_group.setLayout(settings_layout)
button_layout
=
QHBoxLayout()
extract_button
=
QPushButton(
"提取标题"
)
extract_button.clicked.connect(
self
.extract_titles)
extract_button.setStyleSheet(
"background-color: #2196F3;"
)
save_button
=
QPushButton(
"保存结果"
)
save_button.clicked.connect(
self
.save_results)
save_button.setStyleSheet(
"background-color: #FF9800;"
)
clear_button
=
QPushButton(
"清空结果"
)
clear_button.clicked.connect(
self
.clear_results)
clear_button.setStyleSheet(
"background-color: #f44336;"
)
button_layout.addWidget(extract_button)
button_layout.addWidget(save_button)
button_layout.addWidget(clear_button)
result_group
=
QGroupBox(
"提取结果"
)
result_layout
=
QVBoxLayout()
self
.result_text
=
QTextEdit()
self
.result_text.setReadOnly(
True
)
result_layout.addWidget(
self
.result_text)
result_group.setLayout(result_layout)
main_layout.addWidget(title_label)
main_layout.addWidget(settings_group)
main_layout.addWidget(
self
.progress_bar)
main_layout.addLayout(button_layout)
main_layout.addWidget(result_group)
main_widget.setLayout(main_layout)
self
.setCentralWidget(main_widget)
self
.statusBar().showMessage(
"准备就绪"
)
self
.selected_folder
=
""
self
.extracted_data
=
{}
def
select_folder(
self
):
folder
=
QFileDialog.getExistingDirectory(
self
,
"选择包含Word文档的文件夹"
)
if
folder:
self
.selected_folder
=
folder
self
.folder_label.setText(folder)
self
.statusBar().showMessage(f
"已选择文件夹: {folder}"
)
def
extract_titles(
self
):
if
not
self
.selected_folder:
self
.statusBar().showMessage(
"请先选择文件夹!"
,
3000
)
return
max_level
=
self
.level_spin.value()
self
.result_text.clear()
self
.extracted_data
=
{}
file_count
=
0
total_files
=
sum
(
1
for
_, _, files
in
os.walk(
self
.selected_folder)
for
f
in
files
if
f.endswith(
'.docx'
))
if
total_files
=
=
0
:
self
.statusBar().showMessage(
"所选文件夹中没有找到Word文档!"
,
3000
)
return
processed_files
=
0
for
root, _, files
in
os.walk(
self
.selected_folder):
for
filename
in
files:
if
filename.endswith(
'.docx'
):
file_path
=
os.path.join(root, filename)
try
:
doc
=
Document(file_path)
file_data
=
defaultdict(
list
)
for
p
in
doc.paragraphs:
if
p.style.name.startswith(
'Heading'
):
try
:
level
=
int
(p.style.name.split()[
1
])
if
level <
=
max_level:
file_data[level].append(p.text)
except
(IndexError, ValueError):
continue
if
file_data:
self
.extracted_data[filename]
=
file_data
processed_files
+
=
1
progress
=
int
((processed_files
/
total_files)
*
100
)
self
.progress_bar.setValue(progress)
except
Exception as e:
self
.result_text.append(f
"处理文件 {filename} 时出错: {str(e)}\n"
)
self
.display_results()
self
.progress_bar.setValue(
100
)
self
.statusBar().showMessage(f
"提取完成!共处理 {processed_files} 个文件"
,
5000
)
def
display_results(
self
):
self
.result_text.clear()
if
not
self
.extracted_data:
self
.result_text.append(
"没有提取到任何标题数据"
)
return
for
filename, levels
in
self
.extracted_data.items():
self
.result_text.append(f
"=== {filename} ==="
)
for
level
in
sorted
(levels.keys()):
self
.result_text.append(f
"\n[标题 {level}]"
)
for
i, title
in
enumerate
(levels[level],
1
):
self
.result_text.append(f
"{i}. {title}"
)
self
.result_text.append(
"\n"
)
def
save_results(
self
):
if
not
self
.extracted_data:
self
.statusBar().showMessage(
"没有可保存的数据!"
,
3000
)
return
file_path, _
=
QFileDialog.getSaveFileName(
self
,
"保存结果"
, "
", "
文本文件 (
*
.txt)")
if
file_path:
try
:
with
open
(file_path,
'w'
, encoding
=
'utf-8'
) as f:
for
filename, levels
in
self
.extracted_data.items():
f.write(f
"=== {filename} ===\n"
)
for
level
in
sorted
(levels.keys()):
f.write(f
"\n[标题 {level}]\n"
)
for
i, title
in
enumerate
(levels[level],
1
):
f.write(f
"{i}. {title}\n"
)
f.write(
"\n"
)
self
.statusBar().showMessage(f
"结果已保存到: {file_path}"
,
5000
)
except
Exception as e:
self
.statusBar().showMessage(f
"保存失败: {str(e)}"
,
5000
)
def
clear_results(
self
):
self
.result_text.clear()
self
.extracted_data
=
{}
self
.progress_bar.setValue(
0
)
self
.statusBar().showMessage(
"已清空结果"
,
3000
)
if
__name__
=
=
"__main__"
:
app
=
QApplication(sys.argv)
font
=
QFont()
font.setFamily(
"Segoe UI"
)
font.setPointSize(
10
)
app.setFont(font)
window
=
TitleExtractorApp()
window.show()
sys.exit(app.exec_())