[Python] 纯文本查看 复制代码
# -!- coding: utf-8 -!-
import re
import urllib.request
import urllib.parse
import time
import os
from lxml import etree
import pandas as pd
from openpyxl import load_workbook
#将SVG路径转换为png图片
from cairosvg import svg2png
import tkinter as tk
from tkinter.filedialog import askopenfilename
from tkinter.filedialog import askdirectory
from tkinter.filedialog import asksaveasfilename
from tkinter.ttk import *
OUTPUTADDRESS = 'D:/汉字笔画/'
def select_db_file(self):
db_file = askopenfilename(title="请选择汉字文件",filetypes=[('xlsx', '*.xlsx')])
self.db.set(db_file)
def select_save_file(self):
save_directory = askdirectory(initialdir=OUTPUTADDRESS)
self.f.set(save_directory+ '/')
class SelectPage:
def __init__(self, parent_window):
parent_window.destroy() # 销毁子界面
super(SelectPage, self).__init__()
self.createWidgets()
def createWidgets(self):
# 设置界面
self.window = tk.Tk() # 实例化object,建立窗口window
self.window.winfo_toplevel()
self.window.title('笔顺生成工具------Design By xxx')
# 字体颜色
font_color_label = Label(self.window, font=('微软雅黑', 10), text='字体颜色', justify='right')
font_color_label.grid(row=1,column=1,padx=(2,0),pady=(2,0),sticky='WE')
font_color_label2 = Label(self.window, font=('微软雅黑', 6), text='(黑色:#000000;灰色:#B8B8B8)', justify='right',background='lightskyblue')
font_color_label2.grid(row=1, column=2, padx=(2, 0), pady=(2, 0), sticky='WE')
self.window.font_color = tk.StringVar(value='#B8B8B8')
font_color_entry = Entry(self.window, width=80, textvariable=self.window.font_color)
font_color_entry.grid(row=1, column=3, padx=3, pady=3, sticky='WE')
# 字体最后一笔颜色
font_color_last_label = Label(self.window, font=('微软雅黑', 10), text='最后一笔颜色', justify='right')
font_color_last_label.grid(row=2, column=1, padx=(2,0), pady=(2,0), sticky='WE')
font_color_last_label2 = Label(self.window, font=('微软雅黑', 6), text='(白色:#FFFFFF;红色:#FF1111)',justify='right',background='lightskyblue')
font_color_last_label2.grid(row=2, column=2, padx=(2, 0), pady=(2, 0), sticky='WE')
self.window.font_color_last = tk.StringVar(value='#B8B8B8')
font_color_last_entry = Entry(self.window, width=80, textvariable=self.window.font_color_last)
font_color_last_entry.grid(row=2, column=3, padx=3, pady=3, sticky='WE')
# 需要爬取信息的汉字文件路径
self.window.db = tk.StringVar()
db_select = Button(self.window, text='汉字文件',command=lambda:select_db_file(self.window))
db_select.grid(row=3,column=1,columnspan=2 ,sticky='W',padx=(2,0),pady=(2,0))
ExcelFile_path = Entry(self.window,width=80,textvariable = self.window.db)
ExcelFile_path['state'] = 'readonly'
ExcelFile_path.grid(row=3,column=3,padx=3,pady=3,sticky='WE')
# 需要保存汉字信息的文件夹路径
self.window.f = tk.StringVar()
save_path = Button(self.window, text='笔顺保存地址',command=lambda:select_save_file(self.window))
save_path.grid(row=4,column=1,columnspan=2, sticky='W',padx=(2,0),pady=(2,0))
ExcelOutputFile_path = Entry(self.window,width=80,textvariable = self.window.f)
ExcelOutputFile_path['state'] = 'readonly'
ExcelOutputFile_path.grid(row=4, column=3,padx=3,pady=3,sticky='WE')
# 启动生成笔顺
ExcelFile_sheetName = '生字表(一上)'
create_btn = Button(self.window, text='生成笔顺',command=lambda: createBiShun(self.window,self.window.db.get(),ExcelFile_sheetName,self.window.f.get()))
create_btn.grid(row=5,column=1,columnspan=3,pady=(0,2))
self.window.columnconfigure(2, weight=1)
self.window.mainloop()
def get_font_color(self):
return self.window.font_color.get()
# 生成完整字体:读取汉字的详细信息,从百度网站读取信息
def ChineseChangeSVG2png(windows,svg_path,chinese):
#1. 根据路径生成svg_code
# style="fill: #FFFFFF ;黑色字体
outputpath = OUTPUTADDRESS + chinese
svg_output = {
'width':'83px',
'height':'83px',
'xmlns':'http://www.w3.org/2000/svg',
"font_color": "#000000", # 黑色
"font_color_last": "#FF1111", # 红色
"output_address": outputpath,
"output_filename": ''
}
if not os.path.exists(outputpath):#为每个汉字创建文件夹
os.mkdir(outputpath)
svg_code = []
svg_code_temp = '<svg style="width: ' + svg_output['width'] +'; height:' + svg_output['height'] +';" xmlns="' + svg_output['xmlns'] +'">'
svg_code.append(svg_code_temp)
svg_code_temp = ' <g transform="translate(3, 70) scale(0.07, -0.07)">'
svg_code.append(svg_code_temp)
# 加入路径代码
for outputpath_perstep in svg_path:
svg_code_temp = ' <path d="'+outputpath_perstep+'" style="fill: '+ svg_output['font_color'] +';"></path>'
svg_code.append(svg_code_temp)
svg_code_temp =' </g>'
svg_code.append(svg_code_temp)
svg_code_temp = '</svg>'
svg_code.append(svg_code_temp)
# 输出png图片
svgcode = '\n'.join(svg_code)
svg_output['output_filename'] = svg_output['output_address'] + '/' + chinese + '0.png' # 完整汉字的文件以0结尾
try:
svg2png(bytestring=svgcode, write_to=svg_output['output_filename'])
except Exception as e:
print('error:' + e)
def craw_pinyin(chinese):
# 获得汉字的拼音
url = 'http://hanyu.baidu.com/s?wd=' + urllib.parse.quote(chinese) + '&ptype=zici'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
}
serverError = True
chinese_details = {
"chinese": chinese, # 汉字
"pinyin": "", # 拼音
}
while serverError:
try:
request = urllib.request.Request(url, headers=header)
reponse = urllib.request.urlopen(request).read()
html = str(reponse)
web_html = etree.HTML(str(reponse, 'utf-8'))
# 取得拼音
try:
pinyin = web_html.xpath('//div[@id="pinyin"]')[0]
pinyin_text = '、'.join(pinyin.xpath('span/b/text()'))
chinese_details['pinyin'] = pinyin_text
except Exception as e:
print("无法获得" + chinese + "的属性:pinyin")
serverError = False
except Exception as e:
print(chinese + 'server error')
time.sleep(2)
print('读取汉字信息:' + chinese, pinyin_text)
return chinese_details
# 读取汉字的详细信息,从百度网站读取信息
def ChangeSVG2png(windows,svg_path,chinese):
#1. 根据路径生成svg_code
# style="fill: #B8B8B8 ;灰色字体
# style="fill: #FF1111 ;红色字体
outputpath = OUTPUTADDRESS + chinese
svg_output = {
'width':'83px',
'height':'83px',
'xmlns':'http://www.w3.org/2000/svg',
"font_color": "#B8B8B8", # 灰色
"font_color_last": "#FF1111", # 红色
"output_address": outputpath,
"output_filename": ''
}
if not os.path.exists(outputpath):#为每个汉字创建文件夹
os.mkdir(outputpath)
svg_code = []
svg_code_temp = '<svg style="width: ' + svg_output['width'] +'; height:' + svg_output['height'] +';" xmlns="' + svg_output['xmlns'] +'">'
svg_code.append(svg_code_temp)
svg_code_temp = ' <g transform="translate(3, 70) scale(0.07, -0.07)">'
svg_code.append(svg_code_temp)
# 加入路径代码
for outputpath_perstep in svg_path:
if svg_path.index(outputpath_perstep)+1 < len(svg_path):#中间笔画的颜色
# svg_code_temp = ' <path d="'+outputpath_perstep+'" style="fill: '+ svg_output['font_color'] +';"></path>'
svg_code_temp = ' <path d="' + outputpath_perstep + '" style="fill: ' + windows.font_color.get() + ';"></path>'
else:#最后一笔的颜色
# svg_code_temp = ' <path d="' + outputpath_perstep + '" style="fill: ' + svg_output['font_color_last'] + ';"></path>' # 红色
# svg_code_temp = ' <path d="' + outputpath_perstep + '" style="fill: ' + svg_output['font_color'] + ';"></path>' # 灰色
svg_code_temp = ' <path d="' + outputpath_perstep + '" style="fill: ' + windows.font_color_last.get() + ';"></path>' # 灰色
svg_code.append(svg_code_temp)
svg_code_temp =' </g>'
svg_code.append(svg_code_temp)
svg_code_temp = '</svg>'
svg_code.append(svg_code_temp)
# 输出png图片
svgcode = '\n'.join(svg_code)
svg_output['output_filename'] = svg_output['output_address'] + '/' + chinese + str(len(svg_path)) + '.png'
try:
svg2png(bytestring=svgcode, write_to=svg_output['output_filename'])
except Exception as e:
print('error:' + e)
def craw_pinyin(chinese):
# 获得汉字的拼音
url = 'http://hanyu.baidu.com/s?wd=' + urllib.parse.quote(chinese) + '&ptype=zici'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
}
serverError = True
chinese_details = {
"chinese": chinese, # 汉字
"pinyin": "", # 拼音
}
while serverError:
try:
request = urllib.request.Request(url, headers=header)
reponse = urllib.request.urlopen(request).read()
html = str(reponse)
web_html = etree.HTML(str(reponse, 'utf-8'))
# 取得拼音
try:
pinyin = web_html.xpath('//div[@id="pinyin"]')[0]
pinyin_text = '、'.join(pinyin.xpath('span/b/text()'))
chinese_details['pinyin'] = pinyin_text
except Exception as e:
print("无法获得" + chinese + "的属性:pinyin")
serverError = False
except Exception as e:
print(chinese + 'server error')
time.sleep(2)
print('读取汉字信息:' + chinese, pinyin_text)
return chinese_details
def craw(windows,chinese,ExcelOutputFile_path):
url = 'http://hanyu.baidu.com/s?wd=' + urllib.parse.quote(chinese) + '&ptype=zici'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
}
serverError = True
chinese_details = {
"chinese": chinese,# 汉字
"pinyin": "", # 拼音
"stroke_count":"", # 笔画数
"img_address": "",# 笔顺动画
"mp3_address": "",# 发音地址
"zimi":"",# 字谜
"basicmean":"", # 基本释义
"zuci": "", # 相关词组
"synonym":"", # 近义词
"antonym":"", # 反义词
"bishun_svg": "", # 笔顺SVG路径
"baikemean":"" #百科释义
}
while serverError:
try:
request = urllib.request.Request(url, headers=header)
reponse = urllib.request.urlopen(request).read()
html = str(reponse)
web_html = etree.HTML(str(reponse,'utf-8'))
if not os.path.exists(OUTPUTADDRESS + chinese + '/'): # 为每个汉字创建文件夹
os.mkdir(OUTPUTADDRESS + chinese + '/')
# 取得拼音
try:
pinyin = web_html.xpath('//div[@id="pinyin"]')[0]
pinyin_text = '、'.join(pinyin.xpath('span/b/text()'))
chinese_details['pinyin'] = pinyin_text
except Exception as e:
print("无法获得" + chinese + "的属性:pinyin")
# 取得笔画数
try:
stroke_count = web_html.xpath('//li[@id="stroke_count"]')[0]
stroke_count_text = stroke_count.xpath('span')[0].text
chinese_details['stroke_count'] = stroke_count_text
except Exception as e:
print("无法获得" + chinese + "的属性:pinyin")
#取得汉字动画和汉字读音
try:
imgs = re.compile('data-gif="(.+?\.gif)"').findall(html)# 获取汉字动图
mp3s = re.compile('url="(.+?\.mp3)"').findall(html)# 获取汉字读音
for img,mp3 in zip(imgs,mp3s):
imagename = ExcelOutputFile_path + chinese + '/' + chinese + '.gif'
imageurl = img
chinese_details['img_address'] = imageurl
mp3name = ExcelOutputFile_path + chinese + '/' + chinese + '.mp3'
mp3url = mp3
chinese_details['mp3_address'] = mp3url
# 下载动画和读音文件
try:
urllib.request.urlretrieve(imageurl, filename=imagename)
urllib.request.urlretrieve(mp3url, filename=mp3name)
except Exception as e:
print(chinese + ' failure')
except Exception as e:
print("无法获得" + chinese + "的属性:img,mp3")
#取得字谜
try:
zimi = web_html.xpath('//div[@id="miyu-wrapper"]')[0]
zimi_text = ' '.join(zimi.xpath('div/p/text()'))
chinese_details['zimi'] = zimi_text
except Exception as e:
print("无法获得" + chinese + "的属性:zimi")
#取得基本释义
try:
basicmean = web_html.xpath('//div[@id="basicmean-wrapper"]')[0]
basicmean_text = '。'.join(basicmean.xpath('div/dl/dd/p/text()'))
chinese_details['basicmean'] = basicmean_text
except Exception as e:
print("无法获得" + chinese + "的属性:basicmean")
# 取得相关组词
try:
zuci = web_html.xpath('//div[@id="zuci-wrapper"]')[0]
zuci_text = '、'.join(zuci.xpath('div/a/text()')[0:-1])
chinese_details['zuci'] = zuci_text
except Exception as e:
print("无法获得" + chinese + "的属性:zuci")
# 取得近义词
try:
synonym = web_html.xpath('//div[@id="synonym"]')[0]
synonym_text = '、'.join(synonym.xpath('div/a/text()'))
chinese_details['synonym'] = synonym_text
except Exception as e:
# print("无法获得" + chinese + "的属性:antonym")
pass
# 取得反义词antonym
try:
antonym = web_html.xpath('//div[@id="antonym"]')[0]
antonym_text = '、'.join(antonym.xpath('div/a/text()'))
chinese_details['antonym'] = antonym_text
except Exception as e:
# print("无法获得" + chinese + "的属性:antonym")
pass
# 取得SVG路径
try:
bishun = web_html.xpath('//div[@class="word-stroke-wrap"]')[0]
bishun_svg = [x.get('d') for x in bishun.xpath('./div[1]/div/div/svg/g/path')]
chinese_details['bishun_svg'] = bishun_svg
svgpaths = bishun_svg[0:-1]
svgpath_perstep = []
for svgpath in svgpaths:
svgpath_perstep.append(svgpath)
ChangeSVG2png(windows, svgpath_perstep, chinese)
ChineseChangeSVG2png(windows, svgpaths, chinese)
except Exception as e:
print("无法获得" + chinese + "的属性:bishun")
# 取得百科释义
try:
baikemean = web_html.xpath('//div[@id="baike-wrapper"]')[0]
baikemean_text = str(baikemean.xpath('normalize-space(div[2]/p/text())'))
chinese_details['baikemean'] = baikemean_text
except Exception as e:
print("无法获得" + chinese + "的属性:baikemean")
serverError = False
except Exception as e:
print(chinese + 'server error')
time.sleep(2)
print('读取汉字信息:' + chinese,pinyin_text)
return chinese_details
def createBiShun(windows,ExcelFile_path,ExcelFile_sheetName,ExcelOutputFile_path):
# # ExcelFile_path = r'F:\百度网盘同步空间\BaiduSyncdisk\4. 家庭教育\一年级\汉语拼音练习20221010.xlsx'
# # ExcelFile_sheetName = '生字表(一上)'
# df = pd.read_excel(ExcelFile_path, sheet_name=ExcelFile_sheetName, keep_default_na=False, engine='openpyxl') # sheet_name:None-多个工作表读取
#
df = pd.read_excel(ExcelFile_path, sheet_name=0, keep_default_na=False,
engine='openpyxl') # sheet_name:None-多个工作表读取;# sheet_name:ExcelFile_sheetName-指定工作表读取
strs = list(df['内容'])
for st in strs:
chinese = craw(windows,st,ExcelOutputFile_path)
df.loc[df['内容'] == st, '拼音'] = chinese['pinyin'] # 目标列名为列索引名称,x为目标值,y为更改值
df.loc[df['内容'] == st, '笔画数'] = chinese['stroke_count'] # 目标列名为列索引名称,x为目标值,y为更改值
df.loc[df['内容'] == st, '笔顺动画地址'] = chinese['img_address'] # 目标列名为列索引名称,x为目标值,y为更改值
df.loc[df['内容'] == st, '读音地址'] = chinese['mp3_address'] # 目标列名为列索引名称,x为目标值,y为更改值
df.loc[df['内容'] == st, '字谜'] = chinese['zimi'] # 目标列名为列索引名称,x为目标值,y为更改值
df.loc[df['内容'] == st, '基础释义'] = chinese['basicmean'] # 目标列名为列索引名称,x为目标值,y为更改值
df.loc[df['内容'] == st, '组词'] = chinese['zuci'] # 目标列名为列索引名称,x为目标值,y为更改值
df.loc[df['内容'] == st, '近义词'] = chinese['synonym'] # 目标列名为列索引名称,x为目标值,y为更改值
df.loc[df['内容'] == st, '反义词'] = chinese['antonym'] # 目标列名为列索引名称,x为目标值,y为更改值
df.loc[df['内容'] == st, '笔画SVG路径PATH'] = '\n'.join(chinese['bishun_svg']) # 目标列名为列索引名称,x为目标值,y为更改值
df.loc[df['内容'] == st, '百科释义'] = chinese['baikemean'] # 目标列名为列索引名称,x为目标值,y为更改值
with pd.ExcelWriter(ExcelFile_path, engin='openpyxl') as writer:
df.to_excel(writer, sheet_name=ExcelFile_sheetName, index=False)
print("写入完毕")
if __name__ == '__main__':
window = tk.Tk()
app = SelectPage(window)