获取汉字笔顺并形成笔顺图片

JaySun520 发表于 2023-7-24 14:01

由于不会笔画，所以利用python爬取百度上笔顺信息并生成对应的图片

# -!- coding: utf-8 -!-
import re
import urllib.request
import urllib.parse
import time
import os

from lxml import etree
import pandas as pd
from openpyxl import load_workbook

#将SVG路径转换为png图片
from cairosvg import svg2png
import tkinter as tk
from tkinter.filedialog import askopenfilename
from tkinter.filedialog import askdirectory
from tkinter.filedialog import asksaveasfilename
from tkinter.ttk import *

OUTPUTADDRESS = 'D:/汉字笔画/'

def select_db_file(self):
db_file = askopenfilename(title="请选择汉字文件",filetypes=[('xlsx', '*.xlsx')])
self.db.set(db_file)

def select_save_file(self):
save_directory = askdirectory(initialdir=OUTPUTADDRESS)
self.f.set(save_directory+ '/')

class SelectPage:
def __init__(self, parent_window):
   parent_window.destroy()# 销毁子界面
   super(SelectPage, self).__init__()
   self.createWidgets()

def createWidgets(self):
   # 设置界面
   self.window = tk.Tk()# 实例化object，建立窗口window
   self.window.winfo_toplevel()
   self.window.title('笔顺生成工具------Design By xxx')

   # 字体颜色
   font_color_label = Label(self.window, font=('微软雅黑', 10), text='字体颜色', justify='right')
   font_color_label.grid(row=1,column=1,padx=(2,0),pady=(2,0),sticky='WE')

   font_color_label2 = Label(self.window, font=('微软雅黑', 6), text='(黑色:#000000；灰色:#B8B8B8)', justify='right',background='lightskyblue')
   font_color_label2.grid(row=1, column=2, padx=(2, 0), pady=(2, 0), sticky='WE')

   self.window.font_color = tk.StringVar(value='#B8B8B8')
   font_color_entry = Entry(self.window, width=80, textvariable=self.window.font_color)
   font_color_entry.grid(row=1, column=3, padx=3, pady=3, sticky='WE')

   # 字体最后一笔颜色
   font_color_last_label = Label(self.window, font=('微软雅黑', 10), text='最后一笔颜色', justify='right')
   font_color_last_label.grid(row=2, column=1, padx=(2,0), pady=(2,0), sticky='WE')

   font_color_last_label2 = Label(self.window, font=('微软雅黑', 6), text='(白色:#FFFFFF；红色:#FF1111)',justify='right',background='lightskyblue')
   font_color_last_label2.grid(row=2, column=2, padx=(2, 0), pady=(2, 0), sticky='WE')

   self.window.font_color_last = tk.StringVar(value='#B8B8B8')
   font_color_last_entry = Entry(self.window, width=80, textvariable=self.window.font_color_last)
   font_color_last_entry.grid(row=2, column=3, padx=3, pady=3, sticky='WE')

   # 需要爬取信息的汉字文件路径
   self.window.db = tk.StringVar()
   db_select = Button(self.window, text='汉字文件',command=lambda:select_db_file(self.window))
   db_select.grid(row=3,column=1,columnspan=2 ,sticky='W',padx=(2,0),pady=(2,0))

   ExcelFile_path = Entry(self.window,width=80,textvariable = self.window.db)
   ExcelFile_path['state'] = 'readonly'
   ExcelFile_path.grid(row=3,column=3,padx=3,pady=3,sticky='WE')

   # 需要保存汉字信息的文件夹路径
   self.window.f = tk.StringVar()
   save_path = Button(self.window, text='笔顺保存地址',command=lambda:select_save_file(self.window))
   save_path.grid(row=4,column=1,columnspan=2, sticky='W',padx=(2,0),pady=(2,0))

   ExcelOutputFile_path = Entry(self.window,width=80,textvariable = self.window.f)
   ExcelOutputFile_path['state'] = 'readonly'
   ExcelOutputFile_path.grid(row=4, column=3,padx=3,pady=3,sticky='WE')

   # 启动生成笔顺
   ExcelFile_sheetName = '生字表（一上）'
   create_btn = Button(self.window, text='生成笔顺',command=lambda: createBiShun(self.window,self.window.db.get(),ExcelFile_sheetName,self.window.f.get()))
   create_btn.grid(row=5,column=1,columnspan=3,pady=(0,2))
   self.window.columnconfigure(2, weight=1)
   self.window.mainloop()

def get_font_color(self):
   return self.window.font_color.get()

# 生成完整字体：读取汉字的详细信息，从百度网站读取信息
def ChineseChangeSVG2png(windows,svg_path,chinese):
#1. 根据路径生成svg_code
# style="fill: #FFFFFF ；黑色字体
outputpath = OUTPUTADDRESS + chinese
svg_output = {
   'width':'83px',
   'height':'83px',
   'xmlns':'http://www.w3.org/2000/svg',
   "font_color": "#000000",# 黑色
   "font_color_last": "#FF1111",# 红色
   "output_address": outputpath,
   "output_filename": ''
}

if not os.path.exists(outputpath):#为每个汉字创建文件夹
   os.mkdir(outputpath)

svg_code = []
svg_code_temp = '<svg style="width: ' + svg_output['width'] +'; height:' + svg_output['height'] +';" xmlns="' + svg_output['xmlns'] +'">'
svg_code.append(svg_code_temp)
svg_code_temp = ' <g transform="translate(3, 70) scale(0.07, -0.07)">'
svg_code.append(svg_code_temp)
# 加入路径代码
for outputpath_perstep in svg_path:
   svg_code_temp = '    <path d="'+outputpath_perstep+'" style="fill: '+ svg_output['font_color'] +';"></path>'
   svg_code.append(svg_code_temp)
svg_code_temp =' </g>'
svg_code.append(svg_code_temp)
svg_code_temp = '</svg>'
svg_code.append(svg_code_temp)
# 输出png图片
svgcode = '\n'.join(svg_code)
svg_output['output_filename'] = svg_output['output_address'] + '/' + chinese + '0.png' # 完整汉字的文件以0结尾
try:
   svg2png(bytestring=svgcode, write_to=svg_output['output_filename'])
except Exception as e:
   print('error:' + e)

def craw_pinyin(chinese):
# 获得汉字的拼音
url = 'http://hanyu.baidu.com/s?wd=' + urllib.parse.quote(chinese) + '&ptype=zici'
header = {
   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
}
serverError = True
chinese_details = {
   "chinese": chinese,# 汉字
   "pinyin": "",# 拼音
}
while serverError:
   try:
         request = urllib.request.Request(url, headers=header)
         reponse = urllib.request.urlopen(request).read()
         html = str(reponse)
         web_html = etree.HTML(str(reponse, 'utf-8'))

         # 取得拼音
         try:
            pinyin = web_html.xpath('//div[@id="pinyin"]')
            pinyin_text = '、'.join(pinyin.xpath('span/b/text()'))
            chinese_details['pinyin'] = pinyin_text
         except Exception as e:
            print("无法获得" + chinese + "的属性:pinyin")

         serverError = False

   except Exception as e:
         print(chinese + 'server error')
         time.sleep(2)
print('读取汉字信息：' + chinese, pinyin_text)
return chinese_details

# 读取汉字的详细信息，从百度网站读取信息
def ChangeSVG2png(windows,svg_path,chinese):
#1. 根据路径生成svg_code
# style="fill: #B8B8B8 ；灰色字体
# style="fill: #FF1111 ；红色字体
outputpath = OUTPUTADDRESS + chinese
svg_output = {
   'width':'83px',
   'height':'83px',
   'xmlns':'http://www.w3.org/2000/svg',
   "font_color": "#B8B8B8",# 灰色
   "font_color_last": "#FF1111",# 红色
   "output_address": outputpath,
   "output_filename": ''
}

if not os.path.exists(outputpath):#为每个汉字创建文件夹
   os.mkdir(outputpath)

svg_code = []
svg_code_temp = '<svg style="width: ' + svg_output['width'] +'; height:' + svg_output['height'] +';" xmlns="' + svg_output['xmlns'] +'">'
svg_code.append(svg_code_temp)
svg_code_temp = ' <g transform="translate(3, 70) scale(0.07, -0.07)">'
svg_code.append(svg_code_temp)
# 加入路径代码
for outputpath_perstep in svg_path:
   if svg_path.index(outputpath_perstep)+1 < len(svg_path):#中间笔画的颜色
         # svg_code_temp = '    <path d="'+outputpath_perstep+'" style="fill: '+ svg_output['font_color'] +';"></path>'
         svg_code_temp = '    <path d="' + outputpath_perstep + '" style="fill: ' + windows.font_color.get() + ';"></path>'
   else:#最后一笔的颜色
         # svg_code_temp = '    <path d="' + outputpath_perstep + '" style="fill: ' + svg_output['font_color_last'] + ';"></path>' # 红色
         # svg_code_temp = '    <path d="' + outputpath_perstep + '" style="fill: ' + svg_output['font_color'] + ';"></path>' # 灰色
         svg_code_temp = '    <path d="' + outputpath_perstep + '" style="fill: ' + windows.font_color_last.get() + ';"></path>'# 灰色
   svg_code.append(svg_code_temp)
svg_code_temp =' </g>'
svg_code.append(svg_code_temp)
svg_code_temp = '</svg>'
svg_code.append(svg_code_temp)
# 输出png图片
svgcode = '\n'.join(svg_code)
svg_output['output_filename'] = svg_output['output_address'] + '/' + chinese + str(len(svg_path)) + '.png'
try:
   svg2png(bytestring=svgcode, write_to=svg_output['output_filename'])
except Exception as e:
   print('error:' + e)

def craw_pinyin(chinese):
# 获得汉字的拼音
url = 'http://hanyu.baidu.com/s?wd=' + urllib.parse.quote(chinese) + '&ptype=zici'
header = {
   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
}
serverError = True
chinese_details = {
   "chinese": chinese,# 汉字
   "pinyin": "",# 拼音
}
while serverError:
   try:
         request = urllib.request.Request(url, headers=header)
         reponse = urllib.request.urlopen(request).read()
         html = str(reponse)
         web_html = etree.HTML(str(reponse, 'utf-8'))

         # 取得拼音
         try:
            pinyin = web_html.xpath('//div[@id="pinyin"]')
            pinyin_text = '、'.join(pinyin.xpath('span/b/text()'))
            chinese_details['pinyin'] = pinyin_text
         except Exception as e:
            print("无法获得" + chinese + "的属性:pinyin")

         serverError = False

   except Exception as e:
         print(chinese + 'server error')
         time.sleep(2)
print('读取汉字信息：' + chinese, pinyin_text)
return chinese_details

def craw(windows,chinese,ExcelOutputFile_path):
url = 'http://hanyu.baidu.com/s?wd=' + urllib.parse.quote(chinese) + '&ptype=zici'
header = {
   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
}
serverError = True
chinese_details = {
   "chinese": chinese,# 汉字
   "pinyin": "", # 拼音
   "stroke_count":"", # 笔画数
   "img_address": "",# 笔顺动画
   "mp3_address": "",# 发音地址
   "zimi":"",# 字谜
   "basicmean":"", # 基本释义
   "zuci": "", # 相关词组
   "synonym":"", # 近义词
   "antonym":"", # 反义词
   "bishun_svg": "",# 笔顺SVG路径
   "baikemean":"" #百科释义
}
while serverError:
   try:

         request = urllib.request.Request(url, headers=header)
         reponse = urllib.request.urlopen(request).read()
         html = str(reponse)

         web_html = etree.HTML(str(reponse,'utf-8'))

         if not os.path.exists(OUTPUTADDRESS + chinese + '/'):# 为每个汉字创建文件夹
            os.mkdir(OUTPUTADDRESS + chinese + '/')

         # 取得拼音
         try:
            pinyin = web_html.xpath('//div[@id="pinyin"]')
            pinyin_text = '、'.join(pinyin.xpath('span/b/text()'))
            chinese_details['pinyin'] = pinyin_text
         except Exception as e:
            print("无法获得" + chinese + "的属性:pinyin")

         # 取得笔画数
         try:
            stroke_count = web_html.xpath('//li[@id="stroke_count"]')
            stroke_count_text = stroke_count.xpath('span').text
            chinese_details['stroke_count'] = stroke_count_text
         except Exception as e:
            print("无法获得" + chinese + "的属性:pinyin")

         #取得汉字动画和汉字读音
         try:
            imgs = re.compile('data-gif="(.+?\.gif)"').findall(html)# 获取汉字动图
            mp3s = re.compile('url="(.+?\.mp3)"').findall(html)# 获取汉字读音

            for img,mp3 in zip(imgs,mp3s):
               imagename = ExcelOutputFile_path +chinese + '/' + chinese + '.gif'
               imageurl = img
               chinese_details['img_address'] = imageurl
               mp3name = ExcelOutputFile_path + chinese + '/' + chinese + '.mp3'
               mp3url = mp3
               chinese_details['mp3_address'] = mp3url
               # 下载动画和读音文件
               try:
                     urllib.request.urlretrieve(imageurl, filename=imagename)
                     urllib.request.urlretrieve(mp3url, filename=mp3name)
               except Exception as e:
                     print(chinese + ' failure')
         except Exception as e:
            print("无法获得" + chinese + "的属性:img,mp3")

         #取得字谜
         try:
            zimi = web_html.xpath('//div[@id="miyu-wrapper"]')
            zimi_text = ' '.join(zimi.xpath('div/p/text()'))
            chinese_details['zimi'] = zimi_text
         except Exception as e:
            print("无法获得" + chinese + "的属性:zimi")

         #取得基本释义
         try:
            basicmean = web_html.xpath('//div[@id="basicmean-wrapper"]')
            basicmean_text = '。'.join(basicmean.xpath('div/dl/dd/p/text()'))
            chinese_details['basicmean'] = basicmean_text
         except Exception as e:
            print("无法获得" + chinese + "的属性:basicmean")

         # 取得相关组词
         try:
            zuci = web_html.xpath('//div[@id="zuci-wrapper"]')
            zuci_text = '、'.join(zuci.xpath('div/a/text()'))
            chinese_details['zuci'] = zuci_text
         except Exception as e:
            print("无法获得" + chinese + "的属性:zuci")

         # 取得近义词
         try:
            synonym = web_html.xpath('//div[@id="synonym"]')
            synonym_text = '、'.join(synonym.xpath('div/a/text()'))
            chinese_details['synonym'] = synonym_text
         except Exception as e:
            # print("无法获得" + chinese + "的属性:antonym")
            pass

         # 取得反义词antonym
         try:
            antonym = web_html.xpath('//div[@id="antonym"]')
            antonym_text = '、'.join(antonym.xpath('div/a/text()'))
            chinese_details['antonym'] = antonym_text
         except Exception as e:
            # print("无法获得" + chinese + "的属性:antonym")
            pass

         # 取得SVG路径
         try:
            bishun = web_html.xpath('//div[@class="word-stroke-wrap"]')
            bishun_svg = /div/div/svg/g/path')]
            chinese_details['bishun_svg'] = bishun_svg
            svgpaths = bishun_svg
            svgpath_perstep = []
            for svgpath in svgpaths:
               svgpath_perstep.append(svgpath)
               ChangeSVG2png(windows, svgpath_perstep, chinese)

            ChineseChangeSVG2png(windows, svgpaths, chinese)
         except Exception as e:
            print("无法获得" + chinese + "的属性:bishun")

         # 取得百科释义
         try:
            baikemean = web_html.xpath('//div[@id="baike-wrapper"]')
            baikemean_text = str(baikemean.xpath('normalize-space(div/p/text())'))
            chinese_details['baikemean'] = baikemean_text
         except Exception as e:
            print("无法获得" + chinese + "的属性:baikemean")

         serverError = False
   except Exception as e:
         print(chinese + 'server error')
         time.sleep(2)
print('读取汉字信息：' + chinese,pinyin_text)
return chinese_details

def createBiShun(windows,ExcelFile_path,ExcelFile_sheetName,ExcelOutputFile_path):
# # ExcelFile_path = r'F:\百度网盘同步空间\BaiduSyncdisk\4. 家庭教育\一年级\汉语拼音练习20221010.xlsx'
# # ExcelFile_sheetName = '生字表（一上）'
# df = pd.read_excel(ExcelFile_path, sheet_name=ExcelFile_sheetName, keep_default_na=False, engine='openpyxl')# sheet_name:None-多个工作表读取
#

df = pd.read_excel(ExcelFile_path, sheet_name=0, keep_default_na=False,
                  engine='openpyxl')# sheet_name:None-多个工作表读取；# sheet_name:ExcelFile_sheetName-指定工作表读取

strs = list(df['内容'])
for st in strs:
   chinese = craw(windows,st,ExcelOutputFile_path)
   df.loc == st, '拼音'] = chinese['pinyin']# 目标列名为列索引名称，x为目标值，y为更改值
   df.loc == st, '笔画数'] = chinese['stroke_count']# 目标列名为列索引名称，x为目标值，y为更改值
   df.loc == st, '笔顺动画地址'] = chinese['img_address']# 目标列名为列索引名称，x为目标值，y为更改值
   df.loc == st, '读音地址'] = chinese['mp3_address']# 目标列名为列索引名称，x为目标值，y为更改值
   df.loc == st, '字谜'] = chinese['zimi']# 目标列名为列索引名称，x为目标值，y为更改值
   df.loc == st, '基础释义'] = chinese['basicmean']# 目标列名为列索引名称，x为目标值，y为更改值
   df.loc == st, '组词'] = chinese['zuci']# 目标列名为列索引名称，x为目标值，y为更改值
   df.loc == st, '近义词'] = chinese['synonym']# 目标列名为列索引名称，x为目标值，y为更改值
   df.loc == st, '反义词'] = chinese['antonym']# 目标列名为列索引名称，x为目标值，y为更改值
   df.loc == st, '笔画SVG路径PATH'] = '\n'.join(chinese['bishun_svg'])# 目标列名为列索引名称，x为目标值，y为更改值
   df.loc == st, '百科释义'] = chinese['baikemean']# 目标列名为列索引名称，x为目标值，y为更改值

with pd.ExcelWriter(ExcelFile_path, engin='openpyxl') as writer:
   df.to_excel(writer, sheet_name=ExcelFile_sheetName, index=False)

print("写入完毕")

if __name__ == '__main__':
window = tk.Tk()
app = SelectPage(window)

freesaber 发表于 2023-7-24 16:44

之前自己也搞过，爬百度的汉字笔画gif。后来，爬下来的内容，实际用处不大。娃现在练字，直接用的这个，可以生成+打印的：https://young.hao.360.com/writepaper

xiatongxue 发表于 2023-7-24 16:10

厉害等我会了我也要出个教程学习学习

xinxiu 发表于 2023-7-24 14:30

厉害了，感谢分享。

SU150228 发表于 2023-7-24 14:36

图片是田字格样式就好了

testwind 发表于 2023-7-24 14:39

学习研究下，也许以后能用上

LAML 发表于 2023-7-24 14:42

虽然现在用不上，但是不妨碍点赞收藏（收藏癖路过），可以整个米字格的版本

wub88 发表于 2023-7-24 14:43

厉害了，感谢分享。

llamb 发表于 2023-7-24 15:02

好东西收藏一下

鹿鸣发表于 2023-7-24 15:17

这个实现起来这么多代码的，学习一下

额微粒波地 发表于 2023-7-24 16:21

赞一个，这个小工具不错

页: [1] 2 3 4

吾爱破解 - 52pojie.cn's Archiver

获取汉字笔顺并形成笔顺图片