[Python] 纯文本查看 复制代码
# -*- coding: utf-8 -*-
import tkinter as tk
from tkinter import filedialog
from tkinter import scrolledtext # 滚动文本框组件
from tkinter.messagebox import showerror,showinfo,showwarning
from tkinter import ttk
import re
import os
import os.path as osp
from glob import glob
import pandas as pd
class MyPage(tk.Frame):
def __init__(self, parent):
super().__init__(parent)
self.frame = tk.Frame(self, width=615, height=520, highlightbackground="black", highlightthickness=1, bd=3)
self.frame.pack(side='right', fill='y')
self.createWidget()
def createWidget(self):
tk.Label(self.frame, text="****Excel两列字符串模糊匹配子系统****", width=40, height=2, font=("Calibri",15,'bold'),fg='red') \
.place(relx=0.5,rely=0.0,anchor='n')
tk.Button(self.frame, text="excel(df_1):", width=12, height=1, font=("楷体", 11), bg='#BFEFFA',
relief='sunken',activebackground='green',command=lambda:self.select_xlsx(self.input_file1_entry)).place(x=162, y=60)
self.var_input_file1 = tk.StringVar()
self.input_file1_entry=tk.Entry(self.frame,width=30,textvariable=self.var_input_file1)
self.input_file1_entry.place(x=268, y=62)
self.input_file1_entry.bind("<Return>", self.select_xlsx)
tk.Label(self.frame,text="df_1待匹配的列名:",width=17, height=1, font=("楷体", 11), fg='red').place(x=162, y=110)
self.var_df_1= tk.StringVar()
self.input_df_1_entry=tk.Entry(self.frame,width=25 ,textvariable=self.var_df_1,justify='center')
self.input_df_1_entry.place(x=300, y=112)
tk.Button(self.frame, text="excel(df_2):", width=12, height=1, font=("楷体", 11), bg='#BFEFFA',
relief='sunken',activebackground='green',anchor='w',command=lambda: self.select_xlsx(self.input_file2_entry)).place(x=162, y=160)
self.var_input_file2 = tk.StringVar()
self.input_file2_entry=tk.Entry(self.frame,width=30,textvariable=self.var_input_file2)
self.input_file2_entry.place(x=268, y=162)
self.input_file2_entry.bind("<Return>", self.select_xlsx)
tk.Label(self.frame,text="df_2待匹配的列名:",width=17, height=1, font=("楷体", 11), fg='blue').place(x=162, y=210)
self.var_df_2= tk.StringVar()
self.input_df_2_entry=tk.Entry(self.frame,width=25,textvariable=self.var_df_2,justify='center')
self.input_df_2_entry.place(x=300, y=212)
tk.Label(self.frame,text="匹配阈值(最大100):",width=18, height=1, font=("楷体", 11), fg='green').place(x=162, y=260)
self.var_yuzhi= tk.StringVar()
self.input_yuzhi_entry=tk.Entry(self.frame,width=24,textvariable=self.var_yuzhi,justify='center')
self.input_yuzhi_entry.place(x=308, y=262)
tk.Button(self.frame, text="匹配结果", width=8, height=1, font=("楷体", 11), bg='#BFEFFA',
relief='sunken',activebackground='green',command=self.fuzzy_merge).place(x=162, y=310)
tk.Button(self.frame, text="系统重置", width=8, height=1, font=("楷体", 11), bg='#BFEFFA',
relief='sunken',activebackground='green',command=self.reset_system).place(x=282, y=310)
tk.Button(self.frame, text="系统退出", width=8, height=1, font=("楷体", 11), bg='#BFEFFA',
relief='sunken',activebackground='green',command=self.exit_system).place(x=402, y=310)
def select_xlsx(self,entry,event=None):
file_dialog = filedialog.askopenfilename(initialdir=None, title="选择xlsx文件", filetypes=[('Excel Files', ['.xlsx', '.xls'])], defaultextension=".xlsx")
if file_dialog:
entry.delete(0, tk.END) # 清空输入框内容
entry.insert(tk.END, file_dialog) # 在输入框中插入文件路径
#模糊匹配
def fuzzy_merge(self):
if not all([self.var_input_file1.get(),self.var_input_file2.get(),self.var_df_1.get(),self.var_df_2.get(),self.var_yuzhi.get()]):
showwarning('警告!','请填写空白!')
else:
df_1=pd.read_excel(self.input_file1_entry.get(),header=0)
df_2=pd.read_excel(self.input_file2_entry.get(),header=0)
key1=self.var_df_1.get()
key2=self.var_df_2.get()
threshold = int(self.var_yuzhi.get())
limit=2
s = df_2[key2].tolist()
m = df_1[key1].apply(lambda x: process.extract(x, s, limit=limit))#process.extract()返回的是字符串和相似度分数
df_1['matches'] = m
m2 = df_1['matches'].apply(lambda x: [i[0] for i in x if i[1] >= threshold][0] if len([i[0] for i in x if i[1] >= threshold]) > 0 else '')#i[0]是匹配字符串,i[1]是相似度分数
df_1['matches'] = m2
name,exc=osp.splitext(self.var_input_file1.get())
path=osp.dirname(name)
df_1.to_excel(osp.join(path,f'匹配分大于{threshold}结果'+exc),index=None)
os.startfile(osp.join(path,f'匹配分大于{threshold}结果')+exc)
def reset_system(self):
self.var_input_file1.set('')
self.var_input_file2.set('')
self.var_df_1.set('')
self.var_df_2.set('')
self.var_yuzhi.set('')
def exit_system(self):
from mian import StartPage
self.master.switch_frame(StartPage, "主界面")#返回主界面